Fix the word wrapping in formatting to handle escape sequences properly (#17316)

This commit is contained in:
Dongbo Wang 2022-05-16 16:10:55 -07:00 committed by GitHub
parent 41c7cd79da
commit c40066d5f2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 270 additions and 96 deletions

View File

@ -162,6 +162,7 @@
<Value>op</Value>
<Value>my</Value>
<Value>sb</Value>
<Value>vt</Value>
</CollectionProperty>
</AnalyzerSettings>
</Analyzer>

View File

@ -6,8 +6,10 @@ using System.Collections.Generic;
using System.Collections.ObjectModel;
using System.Collections.Specialized;
using System.Globalization;
using System.Management.Automation;
using System.Management.Automation.Internal;
using System.Text;
using System.Text.RegularExpressions;
namespace Microsoft.PowerShell.Commands.Internal.Format
{
@ -146,7 +148,7 @@ namespace Microsoft.PowerShell.Commands.Internal.Format
int indentationAbsoluteValue = (firstLineIndentation > 0) ? firstLineIndentation : -firstLineIndentation;
if (indentationAbsoluteValue >= usefulWidth)
{
// valu too big, we reset it to zero
// value too big, we reset it to zero
firstLineIndentation = 0;
}
@ -353,27 +355,58 @@ namespace Microsoft.PowerShell.Commands.Internal.Format
private static IEnumerable<GetWordsResult> GetWords(string s)
{
StringBuilder sb = new StringBuilder();
GetWordsResult result = new GetWordsResult();
StringBuilder vtSeqs = null;
Dictionary<int, int> vtRanges = null;
var valueStrDec = new ValueStringDecorated(s);
if (valueStrDec.IsDecorated)
{
vtSeqs = new StringBuilder();
vtRanges = valueStrDec.EscapeSequenceRanges;
}
bool wordHasVtSeqs = false;
for (int i = 0; i < s.Length; i++)
{
// Soft hyphen = \u00AD - Should break, and add a hyphen if needed. If not needed for a break, hyphen should be absent
if (vtRanges?.TryGetValue(i, out int len) == true)
{
var vtSpan = s.AsSpan(i, len);
sb.Append(vtSpan);
vtSeqs.Append(vtSpan);
wordHasVtSeqs = true;
i += len - 1;
continue;
}
string delimiter = null;
if (s[i] == ' ' || s[i] == '\t' || s[i] == s_softHyphen)
{
result.Word = sb.ToString();
sb.Clear();
result.Delim = new string(s[i], 1);
yield return result;
// Soft hyphen = \u00AD - Should break, and add a hyphen if needed.
// If not needed for a break, hyphen should be absent.
delimiter = new string(s[i], 1);
}
// Non-breaking space = \u00A0 - ideally shouldn't wrap
// Hard hyphen = \u2011 - Should not break
else if (s[i] == s_hardHyphen || s[i] == s_nonBreakingSpace)
{
result.Word = sb.ToString();
sb.Clear();
result.Delim = string.Empty;
// Non-breaking space = \u00A0 - ideally shouldn't wrap.
// Hard hyphen = \u2011 - Should not break.
delimiter = string.Empty;
}
if (delimiter is not null)
{
if (wordHasVtSeqs && !sb.EndsWith(PSStyle.Instance.Reset))
{
sb.Append(PSStyle.Instance.Reset);
}
var result = new GetWordsResult()
{
Word = sb.ToString(),
Delim = delimiter
};
sb.Clear().Append(vtSeqs);
yield return result;
}
else
@ -382,10 +415,23 @@ namespace Microsoft.PowerShell.Commands.Internal.Format
}
}
result.Word = sb.ToString();
result.Delim = string.Empty;
if (wordHasVtSeqs)
{
if (sb.Length == vtSeqs.Length)
{
// This indicates 'sb' only contains all VT sequences, which may happen when the string ends with a word delimiter.
// For a word that contains VT sequence only, it's the same as an empty string to the formatting system,
// because nothing will actually be rendered.
// So, we use an empty string in this case to avoid unneeded string allocations.
sb.Clear();
}
else if (!sb.EndsWith(PSStyle.Instance.Reset))
{
sb.Append(PSStyle.Instance.Reset);
}
}
yield return result;
yield return new GetWordsResult() { Word = sb.ToString(), Delim = string.Empty };
}
internal static StringCollection GenerateLines(DisplayCells displayCells, string val, int firstLineLen, int followingLinesLen)
@ -412,9 +458,9 @@ namespace Microsoft.PowerShell.Commands.Internal.Format
}
// break string on newlines and process each line separately
string[] lines = SplitLines(val);
List<string> lines = SplitLines(val);
for (int k = 0; k < lines.Length; k++)
for (int k = 0; k < lines.Count; k++)
{
string currentLine = lines[k];
@ -530,9 +576,9 @@ namespace Microsoft.PowerShell.Commands.Internal.Format
}
// break string on newlines and process each line separately
string[] lines = SplitLines(val);
List<string> lines = SplitLines(val);
for (int k = 0; k < lines.Length; k++)
for (int k = 0; k < lines.Count; k++)
{
if (lines[k] == null || displayCells.Length(lines[k]) <= firstLineLen)
{
@ -545,28 +591,34 @@ namespace Microsoft.PowerShell.Commands.Internal.Format
int lineWidth = firstLineLen;
bool firstLine = true;
StringBuilder singleLine = new StringBuilder();
string resetStr = PSStyle.Instance.Reset;
foreach (GetWordsResult word in GetWords(lines[k]))
{
string wordToAdd = word.Word;
string suffix = null;
// Handle soft hyphen
if (word.Delim == s_softHyphen.ToString())
if (word.Delim.Length == 1 && word.Delim[0] == s_softHyphen)
{
int wordWidthWithHyphen = displayCells.Length(wordToAdd) + displayCells.Length(s_softHyphen);
// Add hyphen only if necessary
if (wordWidthWithHyphen == spacesLeft)
{
wordToAdd += "-";
suffix = "-";
}
}
else
else if (!string.IsNullOrEmpty(word.Delim))
{
if (!string.IsNullOrEmpty(word.Delim))
{
wordToAdd += word.Delim;
}
suffix = word.Delim;
}
if (suffix is not null)
{
wordToAdd = wordToAdd.EndsWith(resetStr)
? wordToAdd.Insert(wordToAdd.Length - resetStr.Length, suffix)
: wordToAdd + suffix;
}
int wordWidth = displayCells.Length(wordToAdd);
@ -591,15 +643,35 @@ namespace Microsoft.PowerShell.Commands.Internal.Format
// Word is wider than a single line
if (wordWidth > lineWidth)
{
foreach (char c in wordToAdd)
{
char charToAdd = c;
int charWidth = displayCells.Length(c);
Dictionary<int, int> vtRanges = null;
StringBuilder vtSeqs = null;
// corner case: we have a two cell character and the current
// display length is one.
// add a single cell arbitrary character instead of the original
// one and keep going
var valueStrDec = new ValueStringDecorated(wordToAdd);
if (valueStrDec.IsDecorated)
{
vtSeqs = new StringBuilder();
vtRanges = valueStrDec.EscapeSequenceRanges;
}
bool hasEscSeqs = false;
for (int i = 0; i < wordToAdd.Length; i++)
{
if (vtRanges?.TryGetValue(i, out int len) == true)
{
var vtSpan = wordToAdd.AsSpan(i, len);
singleLine.Append(vtSpan);
vtSeqs.Append(vtSpan);
hasEscSeqs = true;
i += len - 1;
continue;
}
char charToAdd = wordToAdd[i];
int charWidth = displayCells.Length(charToAdd);
// Corner case: we have a two cell character and the current display length is one.
// Add a single cell arbitrary character instead of the original one and keep going.
if (charWidth > lineWidth)
{
charToAdd = '?';
@ -608,9 +680,13 @@ namespace Microsoft.PowerShell.Commands.Internal.Format
if (charWidth > spacesLeft)
{
if (hasEscSeqs && !singleLine.EndsWith(resetStr))
{
singleLine.Append(resetStr);
}
retVal.Add(singleLine.ToString());
singleLine.Clear();
singleLine.Append(charToAdd);
singleLine.Clear().Append(vtSeqs).Append(charToAdd);
if (firstLine)
{
@ -632,8 +708,7 @@ namespace Microsoft.PowerShell.Commands.Internal.Format
if (wordWidth > spacesLeft)
{
retVal.Add(singleLine.ToString());
singleLine.Clear();
singleLine.Append(wordToAdd);
singleLine.Clear().Append(wordToAdd);
if (firstLine)
{
@ -663,49 +738,77 @@ namespace Microsoft.PowerShell.Commands.Internal.Format
/// </summary>
/// <param name="s">String to split.</param>
/// <returns>String array with the values.</returns>
internal static string[] SplitLines(string s)
internal static List<string> SplitLines(string s)
{
if (string.IsNullOrEmpty(s))
return new string[1] { s };
if (string.IsNullOrEmpty(s) || !s.Contains('\n'))
{
return new List<string>(capacity: 1) { s?.Replace("\r", string.Empty) };
}
StringBuilder sb = new StringBuilder();
List<string> list = new List<string>();
foreach (char c in s)
StringBuilder vtSeqs = null;
Dictionary<int, int> vtRanges = null;
var valueStrDec = new ValueStringDecorated(s);
if (valueStrDec.IsDecorated)
{
if (c != '\r')
vtSeqs = new StringBuilder();
vtRanges = valueStrDec.EscapeSequenceRanges;
}
bool hasVtSeqs = false;
for (int i = 0; i < s.Length; i++)
{
if (vtRanges?.TryGetValue(i, out int len) == true)
{
var vtSpan = s.AsSpan(i, len);
sb.Append(vtSpan);
vtSeqs.Append(vtSpan);
hasVtSeqs = true;
i += len - 1;
continue;
}
char c = s[i];
if (c == '\n')
{
if (hasVtSeqs && !sb.EndsWith(PSStyle.Instance.Reset))
{
sb.Append(PSStyle.Instance.Reset);
}
list.Add(sb.ToString());
sb.Clear().Append(vtSeqs);
}
else if (c != '\r')
{
sb.Append(c);
}
}
return sb.ToString().Split(s_newLineChar);
}
#if false
internal static string StripNewLines (string s)
{
if (string.IsNullOrEmpty (s))
return s;
string[] lines = SplitLines (s);
if (lines.Length == 0)
return null;
if (lines.Length == 1)
return lines[0];
StringBuilder sb = new StringBuilder ();
for (int k = 0; k < lines.Length; k++)
if (hasVtSeqs)
{
if (k == 0)
sb.Append (lines[k]);
else
sb.Append (" " + lines[k]);
if (sb.Length == vtSeqs.Length)
{
// This indicates 'sb' only contains all VT sequences, which may happen when the string ends with '\n'.
// For a sub-string that contains VT sequence only, it's the same as an empty string to the formatting
// system, because nothing will actually be rendered.
// So, we use an empty string in this case to avoid unneeded string allocations.
sb.Clear();
}
else if (!sb.EndsWith(PSStyle.Instance.Reset))
{
sb.Append(PSStyle.Instance.Reset);
}
}
return sb.ToString ();
list.Add(sb.ToString());
return list;
}
#endif
internal static string TruncateAtNewLine(string s)
{
if (string.IsNullOrEmpty(s))

View File

@ -1,6 +1,7 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.Management.Automation;
@ -379,10 +380,10 @@ namespace Microsoft.PowerShell.Commands.Internal.Format
}
// check for line breaks
string[] lines = StringManipulationHelper.SplitLines(val);
List<string> lines = StringManipulationHelper.SplitLines(val);
// process the substrings as separate lines
for (int k = 0; k < lines.Length; k++)
for (int k = 0; k < lines.Count; k++)
{
// compute the display length of the string
int displayLength = _displayCells.Length(lines[k]);

View File

@ -2,6 +2,7 @@
// Licensed under the MIT License.
using System;
using System.Collections.Generic;
using System.Collections.Specialized;
using System.Diagnostics;
using System.Management.Automation;
@ -180,12 +181,12 @@ namespace Microsoft.PowerShell.Commands.Internal.Format
propertyValue = string.Empty;
// make sure we honor embedded newlines
string[] lines = StringManipulationHelper.SplitLines(propertyValue);
List<string> lines = StringManipulationHelper.SplitLines(propertyValue);
// padding to use in the lines after the first
string padding = null;
for (int i = 0; i < lines.Length; i++)
for (int i = 0; i < lines.Count; i++)
{
string prependString = null;
@ -212,7 +213,7 @@ namespace Microsoft.PowerShell.Commands.Internal.Format
/// <param name="lo">LineOuput to write to.</param>
private void WriteSingleLineHelper(string prependString, string line, LineOutput lo)
{
if (line == null)
if (line is null)
{
line = string.Empty;
}
@ -223,8 +224,8 @@ namespace Microsoft.PowerShell.Commands.Internal.Format
// split the lines
StringCollection sc = StringManipulationHelper.GenerateLines(lo.DisplayCells, line, fieldCellCount, fieldCellCount);
// padding to use in the lines after the first
string padding = StringUtil.Padding(_propertyLabelsDisplayLength);
// The padding to use in the lines after the first.
string headPadding = null;
// display the string collection
for (int k = 0; k < sc.Count; k++)
@ -234,17 +235,26 @@ namespace Microsoft.PowerShell.Commands.Internal.Format
if (k == 0)
{
_cachedBuilder
.Append(PSStyle.Instance.Formatting.FormatAccent)
.Append(prependString)
.Append(PSStyle.Instance.Reset)
.Append(str);
if (string.IsNullOrWhiteSpace(prependString))
{
// Sometimes 'prependString' is just padding white spaces.
// We don't need to add formatting escape sequences in such a case.
_cachedBuilder.Append(prependString).Append(str);
}
else
{
_cachedBuilder
.Append(PSStyle.Instance.Formatting.FormatAccent)
.Append(prependString)
.Append(PSStyle.Instance.Reset)
.Append(str);
}
}
else
{
_cachedBuilder
.Append(padding)
.Append(str);
// Lazily calculate the padding to use for the subsequent lines as it's quite often that only the first line exists.
headPadding ??= StringUtil.Padding(_propertyLabelsDisplayLength);
_cachedBuilder.Append(headPadding).Append(str);
}
if (str.Contains(ValueStringDecorated.ESC) && !str.EndsWith(PSStyle.Instance.Reset))

View File

@ -3,6 +3,7 @@
#nullable enable
using System.Collections.Generic;
using System.Text.RegularExpressions;
namespace System.Management.Automation.Internal
@ -87,6 +88,7 @@ namespace System.Management.Automation.Internal
private readonly bool _isDecorated;
private readonly string _text;
private string? _plaintextcontent;
private Dictionary<int, int>? _vtRanges;
private string PlainText
{
@ -110,6 +112,30 @@ namespace System.Management.Automation.Internal
// replace regex with .NET 6 API once available
internal static readonly Regex AnsiRegex = new Regex($"{GraphicsRegex}|{CsiRegex}", RegexOptions.Compiled);
/// <summary>
/// Get the ranges of all escape sequences in the text.
/// </summary>
/// <returns>
/// A dictionary with the key being the starting index of an escape sequence,
/// and the value being the length of the escape sequence.
/// </returns>
internal Dictionary<int, int>? EscapeSequenceRanges
{
get
{
if (_isDecorated && _vtRanges is null)
{
_vtRanges = new Dictionary<int, int>();
foreach (Match match in AnsiRegex.Matches(_text))
{
_vtRanges.Add(match.Index, match.Length);
}
}
return _vtRanges;
}
}
/// <summary>
/// Initializes a new instance of the <see cref="ValueStringDecorated"/> struct.
/// </summary>
@ -118,7 +144,8 @@ namespace System.Management.Automation.Internal
{
_text = text;
_isDecorated = text.Contains(ESC);
_plaintextcontent = null;
_plaintextcontent = _isDecorated ? null : text;
_vtRanges = null;
}
/// <summary>

View File

@ -157,22 +157,16 @@ namespace System.Management.Automation.Internal
if (valueStrDec.IsDecorated)
{
// Handle strings with VT sequences.
var sb = new StringBuilder(capacity: str.Length);
bool copyStarted = startOffset == 0;
bool hasEscSeqs = false;
bool firstNonEscChar = true;
// Find all escape sequences in the string, and keep track of their starting indexes and length.
var ansiRanges = new Dictionary<int, int>();
foreach (Match match in ValueStringDecorated.AnsiRegex.Matches(str))
{
ansiRanges.Add(match.Index, match.Length);
}
StringBuilder sb = new(capacity: str.Length);
Dictionary<int, int> vtRanges = valueStrDec.EscapeSequenceRanges;
for (int i = 0, offset = 0; i < str.Length; i++)
{
// Keep all leading ANSI escape sequences.
if (ansiRanges.TryGetValue(i, out int len))
if (vtRanges.TryGetValue(i, out int len))
{
hasEscSeqs = true;
sb.Append(str.AsSpan(i, len));

View File

@ -25,7 +25,7 @@
},
"namingRules" : {
"allowCommonHungarianPrefixes" : true,
"allowedHungarianPrefixes" : [ "n", "r", "l", "i", "io", "is", "fs", "lp", "dw", "h", "rs", "ps", "op", "sb", "my" ]
"allowedHungarianPrefixes" : [ "n", "r", "l", "i", "io", "is", "fs", "lp", "dw", "h", "rs", "ps", "op", "sb", "my", "vt" ]
},
"maintainabilityRules" : {
"topLevelTypes" : [

View File

@ -342,4 +342,42 @@ Billy Bob… Senior DevOps … 13
$text = Get-Content $outFile -Raw
$text.Trim().Replace("`r", "") | Should -BeExactly $expected.Replace("`r", "")
}
It "Word wrapping for string with escape sequences" {
$expected = @"
`e[32;1mLongDescription : `e[0m`e[33mPowerShell `e[0m
`e[33mscripting `e[0m
`e[33mlanguage`e[0m
"@
$obj = [pscustomobject] @{ LongDescription = "`e[33mPowerShell scripting language" }
$obj | Format-List | Out-String -Width 35 | Out-File $outFile
$text = Get-Content $outFile -Raw
$text.Trim().Replace("`r", "") | Should -BeExactly $expected.Replace("`r", "")
}
It "Splitting multi-line string with escape sequences" {
$expected = @"
`e[32;1mb : `e[0m`e[33mPowerShell is a task automation and configuration management program from Microsoft,`e[0m
`e[33mconsisting of a command-line shell and the associated scripting language`e[0m
"@
$obj = [pscustomobject] @{ b = "`e[33mPowerShell is a task automation and configuration management program from Microsoft,`nconsisting of a command-line shell and the associated scripting language" }
$obj | Format-List | Out-File $outFile
$text = Get-Content $outFile -Raw
$text.Trim().Replace("`r", "") | Should -BeExactly $expected.Replace("`r", "")
}
It "Wrapping long word with escape sequences" {
$expected = @"
`e[32;1mb : `e[0m`e[33mC:\repos\PowerShell\src\powershell-w`e[0m
`e[33min-core\bin\Debug\net7.0\win7-x64\pu`e[0m
`e[33mblish\pwsh.exe`e[0m
"@
$obj = [pscustomobject] @{ b = "`e[33mC:\repos\PowerShell\src\powershell-win-core\bin\Debug\net7.0\win7-x64\publish\pwsh.exe" }
$obj | Format-List | Out-String -Width 40 | Out-File $outFile
$text = Get-Content $outFile -Raw
$text.Trim().Replace("`r", "") | Should -BeExactly $expected.Replace("`r", "")
}
}