Fix the word wrapping in formatting to handle escape sequences properly (#17316)

This commit is contained in:
Dongbo Wang 2022-05-16 16:10:55 -07:00 committed by GitHub
parent 41c7cd79da
commit c40066d5f2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 270 additions and 96 deletions

View File

@ -162,6 +162,7 @@
<Value>op</Value> <Value>op</Value>
<Value>my</Value> <Value>my</Value>
<Value>sb</Value> <Value>sb</Value>
<Value>vt</Value>
</CollectionProperty> </CollectionProperty>
</AnalyzerSettings> </AnalyzerSettings>
</Analyzer> </Analyzer>

View File

@ -6,8 +6,10 @@ using System.Collections.Generic;
using System.Collections.ObjectModel; using System.Collections.ObjectModel;
using System.Collections.Specialized; using System.Collections.Specialized;
using System.Globalization; using System.Globalization;
using System.Management.Automation;
using System.Management.Automation.Internal; using System.Management.Automation.Internal;
using System.Text; using System.Text;
using System.Text.RegularExpressions;
namespace Microsoft.PowerShell.Commands.Internal.Format namespace Microsoft.PowerShell.Commands.Internal.Format
{ {
@ -146,7 +148,7 @@ namespace Microsoft.PowerShell.Commands.Internal.Format
int indentationAbsoluteValue = (firstLineIndentation > 0) ? firstLineIndentation : -firstLineIndentation; int indentationAbsoluteValue = (firstLineIndentation > 0) ? firstLineIndentation : -firstLineIndentation;
if (indentationAbsoluteValue >= usefulWidth) if (indentationAbsoluteValue >= usefulWidth)
{ {
// valu too big, we reset it to zero // value too big, we reset it to zero
firstLineIndentation = 0; firstLineIndentation = 0;
} }
@ -353,27 +355,58 @@ namespace Microsoft.PowerShell.Commands.Internal.Format
private static IEnumerable<GetWordsResult> GetWords(string s) private static IEnumerable<GetWordsResult> GetWords(string s)
{ {
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
GetWordsResult result = new GetWordsResult(); StringBuilder vtSeqs = null;
Dictionary<int, int> vtRanges = null;
var valueStrDec = new ValueStringDecorated(s);
if (valueStrDec.IsDecorated)
{
vtSeqs = new StringBuilder();
vtRanges = valueStrDec.EscapeSequenceRanges;
}
bool wordHasVtSeqs = false;
for (int i = 0; i < s.Length; i++) for (int i = 0; i < s.Length; i++)
{ {
// Soft hyphen = \u00AD - Should break, and add a hyphen if needed. If not needed for a break, hyphen should be absent if (vtRanges?.TryGetValue(i, out int len) == true)
{
var vtSpan = s.AsSpan(i, len);
sb.Append(vtSpan);
vtSeqs.Append(vtSpan);
wordHasVtSeqs = true;
i += len - 1;
continue;
}
string delimiter = null;
if (s[i] == ' ' || s[i] == '\t' || s[i] == s_softHyphen) if (s[i] == ' ' || s[i] == '\t' || s[i] == s_softHyphen)
{ {
result.Word = sb.ToString(); // Soft hyphen = \u00AD - Should break, and add a hyphen if needed.
sb.Clear(); // If not needed for a break, hyphen should be absent.
result.Delim = new string(s[i], 1); delimiter = new string(s[i], 1);
yield return result;
} }
// Non-breaking space = \u00A0 - ideally shouldn't wrap
// Hard hyphen = \u2011 - Should not break
else if (s[i] == s_hardHyphen || s[i] == s_nonBreakingSpace) else if (s[i] == s_hardHyphen || s[i] == s_nonBreakingSpace)
{ {
result.Word = sb.ToString(); // Non-breaking space = \u00A0 - ideally shouldn't wrap.
sb.Clear(); // Hard hyphen = \u2011 - Should not break.
result.Delim = string.Empty; delimiter = string.Empty;
}
if (delimiter is not null)
{
if (wordHasVtSeqs && !sb.EndsWith(PSStyle.Instance.Reset))
{
sb.Append(PSStyle.Instance.Reset);
}
var result = new GetWordsResult()
{
Word = sb.ToString(),
Delim = delimiter
};
sb.Clear().Append(vtSeqs);
yield return result; yield return result;
} }
else else
@ -382,10 +415,23 @@ namespace Microsoft.PowerShell.Commands.Internal.Format
} }
} }
result.Word = sb.ToString(); if (wordHasVtSeqs)
result.Delim = string.Empty; {
if (sb.Length == vtSeqs.Length)
{
// This indicates 'sb' only contains all VT sequences, which may happen when the string ends with a word delimiter.
// For a word that contains VT sequence only, it's the same as an empty string to the formatting system,
// because nothing will actually be rendered.
// So, we use an empty string in this case to avoid unneeded string allocations.
sb.Clear();
}
else if (!sb.EndsWith(PSStyle.Instance.Reset))
{
sb.Append(PSStyle.Instance.Reset);
}
}
yield return result; yield return new GetWordsResult() { Word = sb.ToString(), Delim = string.Empty };
} }
internal static StringCollection GenerateLines(DisplayCells displayCells, string val, int firstLineLen, int followingLinesLen) internal static StringCollection GenerateLines(DisplayCells displayCells, string val, int firstLineLen, int followingLinesLen)
@ -412,9 +458,9 @@ namespace Microsoft.PowerShell.Commands.Internal.Format
} }
// break string on newlines and process each line separately // break string on newlines and process each line separately
string[] lines = SplitLines(val); List<string> lines = SplitLines(val);
for (int k = 0; k < lines.Length; k++) for (int k = 0; k < lines.Count; k++)
{ {
string currentLine = lines[k]; string currentLine = lines[k];
@ -530,9 +576,9 @@ namespace Microsoft.PowerShell.Commands.Internal.Format
} }
// break string on newlines and process each line separately // break string on newlines and process each line separately
string[] lines = SplitLines(val); List<string> lines = SplitLines(val);
for (int k = 0; k < lines.Length; k++) for (int k = 0; k < lines.Count; k++)
{ {
if (lines[k] == null || displayCells.Length(lines[k]) <= firstLineLen) if (lines[k] == null || displayCells.Length(lines[k]) <= firstLineLen)
{ {
@ -545,28 +591,34 @@ namespace Microsoft.PowerShell.Commands.Internal.Format
int lineWidth = firstLineLen; int lineWidth = firstLineLen;
bool firstLine = true; bool firstLine = true;
StringBuilder singleLine = new StringBuilder(); StringBuilder singleLine = new StringBuilder();
string resetStr = PSStyle.Instance.Reset;
foreach (GetWordsResult word in GetWords(lines[k])) foreach (GetWordsResult word in GetWords(lines[k]))
{ {
string wordToAdd = word.Word; string wordToAdd = word.Word;
string suffix = null;
// Handle soft hyphen // Handle soft hyphen
if (word.Delim == s_softHyphen.ToString()) if (word.Delim.Length == 1 && word.Delim[0] == s_softHyphen)
{ {
int wordWidthWithHyphen = displayCells.Length(wordToAdd) + displayCells.Length(s_softHyphen); int wordWidthWithHyphen = displayCells.Length(wordToAdd) + displayCells.Length(s_softHyphen);
// Add hyphen only if necessary // Add hyphen only if necessary
if (wordWidthWithHyphen == spacesLeft) if (wordWidthWithHyphen == spacesLeft)
{ {
wordToAdd += "-"; suffix = "-";
} }
} }
else else if (!string.IsNullOrEmpty(word.Delim))
{ {
if (!string.IsNullOrEmpty(word.Delim)) suffix = word.Delim;
{ }
wordToAdd += word.Delim;
} if (suffix is not null)
{
wordToAdd = wordToAdd.EndsWith(resetStr)
? wordToAdd.Insert(wordToAdd.Length - resetStr.Length, suffix)
: wordToAdd + suffix;
} }
int wordWidth = displayCells.Length(wordToAdd); int wordWidth = displayCells.Length(wordToAdd);
@ -591,15 +643,35 @@ namespace Microsoft.PowerShell.Commands.Internal.Format
// Word is wider than a single line // Word is wider than a single line
if (wordWidth > lineWidth) if (wordWidth > lineWidth)
{ {
foreach (char c in wordToAdd) Dictionary<int, int> vtRanges = null;
{ StringBuilder vtSeqs = null;
char charToAdd = c;
int charWidth = displayCells.Length(c);
// corner case: we have a two cell character and the current var valueStrDec = new ValueStringDecorated(wordToAdd);
// display length is one. if (valueStrDec.IsDecorated)
// add a single cell arbitrary character instead of the original {
// one and keep going vtSeqs = new StringBuilder();
vtRanges = valueStrDec.EscapeSequenceRanges;
}
bool hasEscSeqs = false;
for (int i = 0; i < wordToAdd.Length; i++)
{
if (vtRanges?.TryGetValue(i, out int len) == true)
{
var vtSpan = wordToAdd.AsSpan(i, len);
singleLine.Append(vtSpan);
vtSeqs.Append(vtSpan);
hasEscSeqs = true;
i += len - 1;
continue;
}
char charToAdd = wordToAdd[i];
int charWidth = displayCells.Length(charToAdd);
// Corner case: we have a two cell character and the current display length is one.
// Add a single cell arbitrary character instead of the original one and keep going.
if (charWidth > lineWidth) if (charWidth > lineWidth)
{ {
charToAdd = '?'; charToAdd = '?';
@ -608,9 +680,13 @@ namespace Microsoft.PowerShell.Commands.Internal.Format
if (charWidth > spacesLeft) if (charWidth > spacesLeft)
{ {
if (hasEscSeqs && !singleLine.EndsWith(resetStr))
{
singleLine.Append(resetStr);
}
retVal.Add(singleLine.ToString()); retVal.Add(singleLine.ToString());
singleLine.Clear(); singleLine.Clear().Append(vtSeqs).Append(charToAdd);
singleLine.Append(charToAdd);
if (firstLine) if (firstLine)
{ {
@ -632,8 +708,7 @@ namespace Microsoft.PowerShell.Commands.Internal.Format
if (wordWidth > spacesLeft) if (wordWidth > spacesLeft)
{ {
retVal.Add(singleLine.ToString()); retVal.Add(singleLine.ToString());
singleLine.Clear(); singleLine.Clear().Append(wordToAdd);
singleLine.Append(wordToAdd);
if (firstLine) if (firstLine)
{ {
@ -663,49 +738,77 @@ namespace Microsoft.PowerShell.Commands.Internal.Format
/// </summary> /// </summary>
/// <param name="s">String to split.</param> /// <param name="s">String to split.</param>
/// <returns>String array with the values.</returns> /// <returns>String array with the values.</returns>
internal static string[] SplitLines(string s) internal static List<string> SplitLines(string s)
{ {
if (string.IsNullOrEmpty(s)) if (string.IsNullOrEmpty(s) || !s.Contains('\n'))
return new string[1] { s }; {
return new List<string>(capacity: 1) { s?.Replace("\r", string.Empty) };
}
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
List<string> list = new List<string>();
foreach (char c in s) StringBuilder vtSeqs = null;
Dictionary<int, int> vtRanges = null;
var valueStrDec = new ValueStringDecorated(s);
if (valueStrDec.IsDecorated)
{ {
if (c != '\r') vtSeqs = new StringBuilder();
vtRanges = valueStrDec.EscapeSequenceRanges;
}
bool hasVtSeqs = false;
for (int i = 0; i < s.Length; i++)
{
if (vtRanges?.TryGetValue(i, out int len) == true)
{
var vtSpan = s.AsSpan(i, len);
sb.Append(vtSpan);
vtSeqs.Append(vtSpan);
hasVtSeqs = true;
i += len - 1;
continue;
}
char c = s[i];
if (c == '\n')
{
if (hasVtSeqs && !sb.EndsWith(PSStyle.Instance.Reset))
{
sb.Append(PSStyle.Instance.Reset);
}
list.Add(sb.ToString());
sb.Clear().Append(vtSeqs);
}
else if (c != '\r')
{
sb.Append(c); sb.Append(c);
}
} }
return sb.ToString().Split(s_newLineChar); if (hasVtSeqs)
}
#if false
internal static string StripNewLines (string s)
{
if (string.IsNullOrEmpty (s))
return s;
string[] lines = SplitLines (s);
if (lines.Length == 0)
return null;
if (lines.Length == 1)
return lines[0];
StringBuilder sb = new StringBuilder ();
for (int k = 0; k < lines.Length; k++)
{ {
if (k == 0) if (sb.Length == vtSeqs.Length)
sb.Append (lines[k]); {
else // This indicates 'sb' only contains all VT sequences, which may happen when the string ends with '\n'.
sb.Append (" " + lines[k]); // For a sub-string that contains VT sequence only, it's the same as an empty string to the formatting
// system, because nothing will actually be rendered.
// So, we use an empty string in this case to avoid unneeded string allocations.
sb.Clear();
}
else if (!sb.EndsWith(PSStyle.Instance.Reset))
{
sb.Append(PSStyle.Instance.Reset);
}
} }
return sb.ToString (); list.Add(sb.ToString());
return list;
} }
#endif
internal static string TruncateAtNewLine(string s) internal static string TruncateAtNewLine(string s)
{ {
if (string.IsNullOrEmpty(s)) if (string.IsNullOrEmpty(s))

View File

@ -1,6 +1,7 @@
// Copyright (c) Microsoft Corporation. // Copyright (c) Microsoft Corporation.
// Licensed under the MIT License. // Licensed under the MIT License.
using System.Collections.Generic;
using System.Globalization; using System.Globalization;
using System.IO; using System.IO;
using System.Management.Automation; using System.Management.Automation;
@ -379,10 +380,10 @@ namespace Microsoft.PowerShell.Commands.Internal.Format
} }
// check for line breaks // check for line breaks
string[] lines = StringManipulationHelper.SplitLines(val); List<string> lines = StringManipulationHelper.SplitLines(val);
// process the substrings as separate lines // process the substrings as separate lines
for (int k = 0; k < lines.Length; k++) for (int k = 0; k < lines.Count; k++)
{ {
// compute the display length of the string // compute the display length of the string
int displayLength = _displayCells.Length(lines[k]); int displayLength = _displayCells.Length(lines[k]);

View File

@ -2,6 +2,7 @@
// Licensed under the MIT License. // Licensed under the MIT License.
using System; using System;
using System.Collections.Generic;
using System.Collections.Specialized; using System.Collections.Specialized;
using System.Diagnostics; using System.Diagnostics;
using System.Management.Automation; using System.Management.Automation;
@ -180,12 +181,12 @@ namespace Microsoft.PowerShell.Commands.Internal.Format
propertyValue = string.Empty; propertyValue = string.Empty;
// make sure we honor embedded newlines // make sure we honor embedded newlines
string[] lines = StringManipulationHelper.SplitLines(propertyValue); List<string> lines = StringManipulationHelper.SplitLines(propertyValue);
// padding to use in the lines after the first // padding to use in the lines after the first
string padding = null; string padding = null;
for (int i = 0; i < lines.Length; i++) for (int i = 0; i < lines.Count; i++)
{ {
string prependString = null; string prependString = null;
@ -212,7 +213,7 @@ namespace Microsoft.PowerShell.Commands.Internal.Format
/// <param name="lo">LineOuput to write to.</param> /// <param name="lo">LineOuput to write to.</param>
private void WriteSingleLineHelper(string prependString, string line, LineOutput lo) private void WriteSingleLineHelper(string prependString, string line, LineOutput lo)
{ {
if (line == null) if (line is null)
{ {
line = string.Empty; line = string.Empty;
} }
@ -223,8 +224,8 @@ namespace Microsoft.PowerShell.Commands.Internal.Format
// split the lines // split the lines
StringCollection sc = StringManipulationHelper.GenerateLines(lo.DisplayCells, line, fieldCellCount, fieldCellCount); StringCollection sc = StringManipulationHelper.GenerateLines(lo.DisplayCells, line, fieldCellCount, fieldCellCount);
// padding to use in the lines after the first // The padding to use in the lines after the first.
string padding = StringUtil.Padding(_propertyLabelsDisplayLength); string headPadding = null;
// display the string collection // display the string collection
for (int k = 0; k < sc.Count; k++) for (int k = 0; k < sc.Count; k++)
@ -234,17 +235,26 @@ namespace Microsoft.PowerShell.Commands.Internal.Format
if (k == 0) if (k == 0)
{ {
_cachedBuilder if (string.IsNullOrWhiteSpace(prependString))
.Append(PSStyle.Instance.Formatting.FormatAccent) {
.Append(prependString) // Sometimes 'prependString' is just padding white spaces.
.Append(PSStyle.Instance.Reset) // We don't need to add formatting escape sequences in such a case.
.Append(str); _cachedBuilder.Append(prependString).Append(str);
}
else
{
_cachedBuilder
.Append(PSStyle.Instance.Formatting.FormatAccent)
.Append(prependString)
.Append(PSStyle.Instance.Reset)
.Append(str);
}
} }
else else
{ {
_cachedBuilder // Lazily calculate the padding to use for the subsequent lines as it's quite often that only the first line exists.
.Append(padding) headPadding ??= StringUtil.Padding(_propertyLabelsDisplayLength);
.Append(str); _cachedBuilder.Append(headPadding).Append(str);
} }
if (str.Contains(ValueStringDecorated.ESC) && !str.EndsWith(PSStyle.Instance.Reset)) if (str.Contains(ValueStringDecorated.ESC) && !str.EndsWith(PSStyle.Instance.Reset))

View File

@ -3,6 +3,7 @@
#nullable enable #nullable enable
using System.Collections.Generic;
using System.Text.RegularExpressions; using System.Text.RegularExpressions;
namespace System.Management.Automation.Internal namespace System.Management.Automation.Internal
@ -87,6 +88,7 @@ namespace System.Management.Automation.Internal
private readonly bool _isDecorated; private readonly bool _isDecorated;
private readonly string _text; private readonly string _text;
private string? _plaintextcontent; private string? _plaintextcontent;
private Dictionary<int, int>? _vtRanges;
private string PlainText private string PlainText
{ {
@ -110,6 +112,30 @@ namespace System.Management.Automation.Internal
// replace regex with .NET 6 API once available // replace regex with .NET 6 API once available
internal static readonly Regex AnsiRegex = new Regex($"{GraphicsRegex}|{CsiRegex}", RegexOptions.Compiled); internal static readonly Regex AnsiRegex = new Regex($"{GraphicsRegex}|{CsiRegex}", RegexOptions.Compiled);
/// <summary>
/// Get the ranges of all escape sequences in the text.
/// </summary>
/// <returns>
/// A dictionary with the key being the starting index of an escape sequence,
/// and the value being the length of the escape sequence.
/// </returns>
internal Dictionary<int, int>? EscapeSequenceRanges
{
get
{
if (_isDecorated && _vtRanges is null)
{
_vtRanges = new Dictionary<int, int>();
foreach (Match match in AnsiRegex.Matches(_text))
{
_vtRanges.Add(match.Index, match.Length);
}
}
return _vtRanges;
}
}
/// <summary> /// <summary>
/// Initializes a new instance of the <see cref="ValueStringDecorated"/> struct. /// Initializes a new instance of the <see cref="ValueStringDecorated"/> struct.
/// </summary> /// </summary>
@ -118,7 +144,8 @@ namespace System.Management.Automation.Internal
{ {
_text = text; _text = text;
_isDecorated = text.Contains(ESC); _isDecorated = text.Contains(ESC);
_plaintextcontent = null; _plaintextcontent = _isDecorated ? null : text;
_vtRanges = null;
} }
/// <summary> /// <summary>

View File

@ -157,22 +157,16 @@ namespace System.Management.Automation.Internal
if (valueStrDec.IsDecorated) if (valueStrDec.IsDecorated)
{ {
// Handle strings with VT sequences. // Handle strings with VT sequences.
var sb = new StringBuilder(capacity: str.Length);
bool copyStarted = startOffset == 0; bool copyStarted = startOffset == 0;
bool hasEscSeqs = false; bool hasEscSeqs = false;
bool firstNonEscChar = true; bool firstNonEscChar = true;
StringBuilder sb = new(capacity: str.Length);
// Find all escape sequences in the string, and keep track of their starting indexes and length. Dictionary<int, int> vtRanges = valueStrDec.EscapeSequenceRanges;
var ansiRanges = new Dictionary<int, int>();
foreach (Match match in ValueStringDecorated.AnsiRegex.Matches(str))
{
ansiRanges.Add(match.Index, match.Length);
}
for (int i = 0, offset = 0; i < str.Length; i++) for (int i = 0, offset = 0; i < str.Length; i++)
{ {
// Keep all leading ANSI escape sequences. // Keep all leading ANSI escape sequences.
if (ansiRanges.TryGetValue(i, out int len)) if (vtRanges.TryGetValue(i, out int len))
{ {
hasEscSeqs = true; hasEscSeqs = true;
sb.Append(str.AsSpan(i, len)); sb.Append(str.AsSpan(i, len));

View File

@ -25,7 +25,7 @@
}, },
"namingRules" : { "namingRules" : {
"allowCommonHungarianPrefixes" : true, "allowCommonHungarianPrefixes" : true,
"allowedHungarianPrefixes" : [ "n", "r", "l", "i", "io", "is", "fs", "lp", "dw", "h", "rs", "ps", "op", "sb", "my" ] "allowedHungarianPrefixes" : [ "n", "r", "l", "i", "io", "is", "fs", "lp", "dw", "h", "rs", "ps", "op", "sb", "my", "vt" ]
}, },
"maintainabilityRules" : { "maintainabilityRules" : {
"topLevelTypes" : [ "topLevelTypes" : [

View File

@ -342,4 +342,42 @@ Billy Bob… Senior DevOps … 13
$text = Get-Content $outFile -Raw $text = Get-Content $outFile -Raw
$text.Trim().Replace("`r", "") | Should -BeExactly $expected.Replace("`r", "") $text.Trim().Replace("`r", "") | Should -BeExactly $expected.Replace("`r", "")
} }
It "Word wrapping for string with escape sequences" {
$expected = @"
`e[32;1mLongDescription : `e[0m`e[33mPowerShell `e[0m
`e[33mscripting `e[0m
`e[33mlanguage`e[0m
"@
$obj = [pscustomobject] @{ LongDescription = "`e[33mPowerShell scripting language" }
$obj | Format-List | Out-String -Width 35 | Out-File $outFile
$text = Get-Content $outFile -Raw
$text.Trim().Replace("`r", "") | Should -BeExactly $expected.Replace("`r", "")
}
It "Splitting multi-line string with escape sequences" {
$expected = @"
`e[32;1mb : `e[0m`e[33mPowerShell is a task automation and configuration management program from Microsoft,`e[0m
`e[33mconsisting of a command-line shell and the associated scripting language`e[0m
"@
$obj = [pscustomobject] @{ b = "`e[33mPowerShell is a task automation and configuration management program from Microsoft,`nconsisting of a command-line shell and the associated scripting language" }
$obj | Format-List | Out-File $outFile
$text = Get-Content $outFile -Raw
$text.Trim().Replace("`r", "") | Should -BeExactly $expected.Replace("`r", "")
}
It "Wrapping long word with escape sequences" {
$expected = @"
`e[32;1mb : `e[0m`e[33mC:\repos\PowerShell\src\powershell-w`e[0m
`e[33min-core\bin\Debug\net7.0\win7-x64\pu`e[0m
`e[33mblish\pwsh.exe`e[0m
"@
$obj = [pscustomobject] @{ b = "`e[33mC:\repos\PowerShell\src\powershell-win-core\bin\Debug\net7.0\win7-x64\publish\pwsh.exe" }
$obj | Format-List | Out-String -Width 40 | Out-File $outFile
$text = Get-Content $outFile -Raw
$text.Trim().Replace("`r", "") | Should -BeExactly $expected.Replace("`r", "")
}
} }