From c40066d5f204a7addc73a7182f279a4bf46302ed Mon Sep 17 00:00:00 2001 From: Dongbo Wang Date: Mon, 16 May 2022 16:10:55 -0700 Subject: [PATCH] Fix the word wrapping in formatting to handle escape sequences properly (#17316) --- Settings.StyleCop | 1 + .../FormatAndOutput/common/ComplexWriter.cs | 243 +++++++++++++----- .../FormatAndOutput/common/ILineOutput.cs | 5 +- .../FormatAndOutput/common/ListWriter.cs | 36 ++- .../FormatAndOutput/common/StringDecorated.cs | 29 ++- .../utils/StringUtil.cs | 12 +- stylecop.json | 2 +- .../engine/Formatting/PSStyle.Tests.ps1 | 38 +++ 8 files changed, 270 insertions(+), 96 deletions(-) diff --git a/Settings.StyleCop b/Settings.StyleCop index 7fa179ee02..e10c02bdd1 100644 --- a/Settings.StyleCop +++ b/Settings.StyleCop @@ -162,6 +162,7 @@ op my sb + vt diff --git a/src/System.Management.Automation/FormatAndOutput/common/ComplexWriter.cs b/src/System.Management.Automation/FormatAndOutput/common/ComplexWriter.cs index 3adb5a799c..0982a8e198 100644 --- a/src/System.Management.Automation/FormatAndOutput/common/ComplexWriter.cs +++ b/src/System.Management.Automation/FormatAndOutput/common/ComplexWriter.cs @@ -6,8 +6,10 @@ using System.Collections.Generic; using System.Collections.ObjectModel; using System.Collections.Specialized; using System.Globalization; +using System.Management.Automation; using System.Management.Automation.Internal; using System.Text; +using System.Text.RegularExpressions; namespace Microsoft.PowerShell.Commands.Internal.Format { @@ -146,7 +148,7 @@ namespace Microsoft.PowerShell.Commands.Internal.Format int indentationAbsoluteValue = (firstLineIndentation > 0) ? firstLineIndentation : -firstLineIndentation; if (indentationAbsoluteValue >= usefulWidth) { - // valu too big, we reset it to zero + // value too big, we reset it to zero firstLineIndentation = 0; } @@ -353,27 +355,58 @@ namespace Microsoft.PowerShell.Commands.Internal.Format private static IEnumerable GetWords(string s) { StringBuilder sb = new StringBuilder(); - GetWordsResult result = new GetWordsResult(); + StringBuilder vtSeqs = null; + Dictionary vtRanges = null; + var valueStrDec = new ValueStringDecorated(s); + if (valueStrDec.IsDecorated) + { + vtSeqs = new StringBuilder(); + vtRanges = valueStrDec.EscapeSequenceRanges; + } + + bool wordHasVtSeqs = false; for (int i = 0; i < s.Length; i++) { - // Soft hyphen = \u00AD - Should break, and add a hyphen if needed. If not needed for a break, hyphen should be absent + if (vtRanges?.TryGetValue(i, out int len) == true) + { + var vtSpan = s.AsSpan(i, len); + sb.Append(vtSpan); + vtSeqs.Append(vtSpan); + + wordHasVtSeqs = true; + i += len - 1; + continue; + } + + string delimiter = null; if (s[i] == ' ' || s[i] == '\t' || s[i] == s_softHyphen) { - result.Word = sb.ToString(); - sb.Clear(); - result.Delim = new string(s[i], 1); - - yield return result; + // Soft hyphen = \u00AD - Should break, and add a hyphen if needed. + // If not needed for a break, hyphen should be absent. + delimiter = new string(s[i], 1); } - // Non-breaking space = \u00A0 - ideally shouldn't wrap - // Hard hyphen = \u2011 - Should not break else if (s[i] == s_hardHyphen || s[i] == s_nonBreakingSpace) { - result.Word = sb.ToString(); - sb.Clear(); - result.Delim = string.Empty; + // Non-breaking space = \u00A0 - ideally shouldn't wrap. + // Hard hyphen = \u2011 - Should not break. + delimiter = string.Empty; + } + if (delimiter is not null) + { + if (wordHasVtSeqs && !sb.EndsWith(PSStyle.Instance.Reset)) + { + sb.Append(PSStyle.Instance.Reset); + } + + var result = new GetWordsResult() + { + Word = sb.ToString(), + Delim = delimiter + }; + + sb.Clear().Append(vtSeqs); yield return result; } else @@ -382,10 +415,23 @@ namespace Microsoft.PowerShell.Commands.Internal.Format } } - result.Word = sb.ToString(); - result.Delim = string.Empty; + if (wordHasVtSeqs) + { + if (sb.Length == vtSeqs.Length) + { + // This indicates 'sb' only contains all VT sequences, which may happen when the string ends with a word delimiter. + // For a word that contains VT sequence only, it's the same as an empty string to the formatting system, + // because nothing will actually be rendered. + // So, we use an empty string in this case to avoid unneeded string allocations. + sb.Clear(); + } + else if (!sb.EndsWith(PSStyle.Instance.Reset)) + { + sb.Append(PSStyle.Instance.Reset); + } + } - yield return result; + yield return new GetWordsResult() { Word = sb.ToString(), Delim = string.Empty }; } internal static StringCollection GenerateLines(DisplayCells displayCells, string val, int firstLineLen, int followingLinesLen) @@ -412,9 +458,9 @@ namespace Microsoft.PowerShell.Commands.Internal.Format } // break string on newlines and process each line separately - string[] lines = SplitLines(val); + List lines = SplitLines(val); - for (int k = 0; k < lines.Length; k++) + for (int k = 0; k < lines.Count; k++) { string currentLine = lines[k]; @@ -530,9 +576,9 @@ namespace Microsoft.PowerShell.Commands.Internal.Format } // break string on newlines and process each line separately - string[] lines = SplitLines(val); + List lines = SplitLines(val); - for (int k = 0; k < lines.Length; k++) + for (int k = 0; k < lines.Count; k++) { if (lines[k] == null || displayCells.Length(lines[k]) <= firstLineLen) { @@ -545,28 +591,34 @@ namespace Microsoft.PowerShell.Commands.Internal.Format int lineWidth = firstLineLen; bool firstLine = true; StringBuilder singleLine = new StringBuilder(); + string resetStr = PSStyle.Instance.Reset; foreach (GetWordsResult word in GetWords(lines[k])) { string wordToAdd = word.Word; + string suffix = null; // Handle soft hyphen - if (word.Delim == s_softHyphen.ToString()) + if (word.Delim.Length == 1 && word.Delim[0] == s_softHyphen) { int wordWidthWithHyphen = displayCells.Length(wordToAdd) + displayCells.Length(s_softHyphen); // Add hyphen only if necessary if (wordWidthWithHyphen == spacesLeft) { - wordToAdd += "-"; + suffix = "-"; } } - else + else if (!string.IsNullOrEmpty(word.Delim)) { - if (!string.IsNullOrEmpty(word.Delim)) - { - wordToAdd += word.Delim; - } + suffix = word.Delim; + } + + if (suffix is not null) + { + wordToAdd = wordToAdd.EndsWith(resetStr) + ? wordToAdd.Insert(wordToAdd.Length - resetStr.Length, suffix) + : wordToAdd + suffix; } int wordWidth = displayCells.Length(wordToAdd); @@ -591,15 +643,35 @@ namespace Microsoft.PowerShell.Commands.Internal.Format // Word is wider than a single line if (wordWidth > lineWidth) { - foreach (char c in wordToAdd) - { - char charToAdd = c; - int charWidth = displayCells.Length(c); + Dictionary vtRanges = null; + StringBuilder vtSeqs = null; - // corner case: we have a two cell character and the current - // display length is one. - // add a single cell arbitrary character instead of the original - // one and keep going + var valueStrDec = new ValueStringDecorated(wordToAdd); + if (valueStrDec.IsDecorated) + { + vtSeqs = new StringBuilder(); + vtRanges = valueStrDec.EscapeSequenceRanges; + } + + bool hasEscSeqs = false; + for (int i = 0; i < wordToAdd.Length; i++) + { + if (vtRanges?.TryGetValue(i, out int len) == true) + { + var vtSpan = wordToAdd.AsSpan(i, len); + singleLine.Append(vtSpan); + vtSeqs.Append(vtSpan); + + hasEscSeqs = true; + i += len - 1; + continue; + } + + char charToAdd = wordToAdd[i]; + int charWidth = displayCells.Length(charToAdd); + + // Corner case: we have a two cell character and the current display length is one. + // Add a single cell arbitrary character instead of the original one and keep going. if (charWidth > lineWidth) { charToAdd = '?'; @@ -608,9 +680,13 @@ namespace Microsoft.PowerShell.Commands.Internal.Format if (charWidth > spacesLeft) { + if (hasEscSeqs && !singleLine.EndsWith(resetStr)) + { + singleLine.Append(resetStr); + } + retVal.Add(singleLine.ToString()); - singleLine.Clear(); - singleLine.Append(charToAdd); + singleLine.Clear().Append(vtSeqs).Append(charToAdd); if (firstLine) { @@ -632,8 +708,7 @@ namespace Microsoft.PowerShell.Commands.Internal.Format if (wordWidth > spacesLeft) { retVal.Add(singleLine.ToString()); - singleLine.Clear(); - singleLine.Append(wordToAdd); + singleLine.Clear().Append(wordToAdd); if (firstLine) { @@ -663,49 +738,77 @@ namespace Microsoft.PowerShell.Commands.Internal.Format /// /// String to split. /// String array with the values. - internal static string[] SplitLines(string s) + internal static List SplitLines(string s) { - if (string.IsNullOrEmpty(s)) - return new string[1] { s }; + if (string.IsNullOrEmpty(s) || !s.Contains('\n')) + { + return new List(capacity: 1) { s?.Replace("\r", string.Empty) }; + } StringBuilder sb = new StringBuilder(); + List list = new List(); - foreach (char c in s) + StringBuilder vtSeqs = null; + Dictionary vtRanges = null; + + var valueStrDec = new ValueStringDecorated(s); + if (valueStrDec.IsDecorated) { - if (c != '\r') + vtSeqs = new StringBuilder(); + vtRanges = valueStrDec.EscapeSequenceRanges; + } + + bool hasVtSeqs = false; + for (int i = 0; i < s.Length; i++) + { + if (vtRanges?.TryGetValue(i, out int len) == true) + { + var vtSpan = s.AsSpan(i, len); + sb.Append(vtSpan); + vtSeqs.Append(vtSpan); + + hasVtSeqs = true; + i += len - 1; + continue; + } + + char c = s[i]; + if (c == '\n') + { + if (hasVtSeqs && !sb.EndsWith(PSStyle.Instance.Reset)) + { + sb.Append(PSStyle.Instance.Reset); + } + + list.Add(sb.ToString()); + sb.Clear().Append(vtSeqs); + } + else if (c != '\r') + { sb.Append(c); + } } - return sb.ToString().Split(s_newLineChar); - } - -#if false - internal static string StripNewLines (string s) - { - if (string.IsNullOrEmpty (s)) - return s; - - string[] lines = SplitLines (s); - - if (lines.Length == 0) - return null; - - if (lines.Length == 1) - return lines[0]; - - StringBuilder sb = new StringBuilder (); - - for (int k = 0; k < lines.Length; k++) + if (hasVtSeqs) { - if (k == 0) - sb.Append (lines[k]); - else - sb.Append (" " + lines[k]); + if (sb.Length == vtSeqs.Length) + { + // This indicates 'sb' only contains all VT sequences, which may happen when the string ends with '\n'. + // For a sub-string that contains VT sequence only, it's the same as an empty string to the formatting + // system, because nothing will actually be rendered. + // So, we use an empty string in this case to avoid unneeded string allocations. + sb.Clear(); + } + else if (!sb.EndsWith(PSStyle.Instance.Reset)) + { + sb.Append(PSStyle.Instance.Reset); + } } - return sb.ToString (); + list.Add(sb.ToString()); + return list; } -#endif + internal static string TruncateAtNewLine(string s) { if (string.IsNullOrEmpty(s)) diff --git a/src/System.Management.Automation/FormatAndOutput/common/ILineOutput.cs b/src/System.Management.Automation/FormatAndOutput/common/ILineOutput.cs index ddd69ae0ee..a1f373a5dd 100644 --- a/src/System.Management.Automation/FormatAndOutput/common/ILineOutput.cs +++ b/src/System.Management.Automation/FormatAndOutput/common/ILineOutput.cs @@ -1,6 +1,7 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. +using System.Collections.Generic; using System.Globalization; using System.IO; using System.Management.Automation; @@ -379,10 +380,10 @@ namespace Microsoft.PowerShell.Commands.Internal.Format } // check for line breaks - string[] lines = StringManipulationHelper.SplitLines(val); + List lines = StringManipulationHelper.SplitLines(val); // process the substrings as separate lines - for (int k = 0; k < lines.Length; k++) + for (int k = 0; k < lines.Count; k++) { // compute the display length of the string int displayLength = _displayCells.Length(lines[k]); diff --git a/src/System.Management.Automation/FormatAndOutput/common/ListWriter.cs b/src/System.Management.Automation/FormatAndOutput/common/ListWriter.cs index 16df0c5728..786a25c2d7 100644 --- a/src/System.Management.Automation/FormatAndOutput/common/ListWriter.cs +++ b/src/System.Management.Automation/FormatAndOutput/common/ListWriter.cs @@ -2,6 +2,7 @@ // Licensed under the MIT License. using System; +using System.Collections.Generic; using System.Collections.Specialized; using System.Diagnostics; using System.Management.Automation; @@ -180,12 +181,12 @@ namespace Microsoft.PowerShell.Commands.Internal.Format propertyValue = string.Empty; // make sure we honor embedded newlines - string[] lines = StringManipulationHelper.SplitLines(propertyValue); + List lines = StringManipulationHelper.SplitLines(propertyValue); // padding to use in the lines after the first string padding = null; - for (int i = 0; i < lines.Length; i++) + for (int i = 0; i < lines.Count; i++) { string prependString = null; @@ -212,7 +213,7 @@ namespace Microsoft.PowerShell.Commands.Internal.Format /// LineOuput to write to. private void WriteSingleLineHelper(string prependString, string line, LineOutput lo) { - if (line == null) + if (line is null) { line = string.Empty; } @@ -223,8 +224,8 @@ namespace Microsoft.PowerShell.Commands.Internal.Format // split the lines StringCollection sc = StringManipulationHelper.GenerateLines(lo.DisplayCells, line, fieldCellCount, fieldCellCount); - // padding to use in the lines after the first - string padding = StringUtil.Padding(_propertyLabelsDisplayLength); + // The padding to use in the lines after the first. + string headPadding = null; // display the string collection for (int k = 0; k < sc.Count; k++) @@ -234,17 +235,26 @@ namespace Microsoft.PowerShell.Commands.Internal.Format if (k == 0) { - _cachedBuilder - .Append(PSStyle.Instance.Formatting.FormatAccent) - .Append(prependString) - .Append(PSStyle.Instance.Reset) - .Append(str); + if (string.IsNullOrWhiteSpace(prependString)) + { + // Sometimes 'prependString' is just padding white spaces. + // We don't need to add formatting escape sequences in such a case. + _cachedBuilder.Append(prependString).Append(str); + } + else + { + _cachedBuilder + .Append(PSStyle.Instance.Formatting.FormatAccent) + .Append(prependString) + .Append(PSStyle.Instance.Reset) + .Append(str); + } } else { - _cachedBuilder - .Append(padding) - .Append(str); + // Lazily calculate the padding to use for the subsequent lines as it's quite often that only the first line exists. + headPadding ??= StringUtil.Padding(_propertyLabelsDisplayLength); + _cachedBuilder.Append(headPadding).Append(str); } if (str.Contains(ValueStringDecorated.ESC) && !str.EndsWith(PSStyle.Instance.Reset)) diff --git a/src/System.Management.Automation/FormatAndOutput/common/StringDecorated.cs b/src/System.Management.Automation/FormatAndOutput/common/StringDecorated.cs index 9b82cf3d7a..c2497bdff5 100644 --- a/src/System.Management.Automation/FormatAndOutput/common/StringDecorated.cs +++ b/src/System.Management.Automation/FormatAndOutput/common/StringDecorated.cs @@ -3,6 +3,7 @@ #nullable enable +using System.Collections.Generic; using System.Text.RegularExpressions; namespace System.Management.Automation.Internal @@ -87,6 +88,7 @@ namespace System.Management.Automation.Internal private readonly bool _isDecorated; private readonly string _text; private string? _plaintextcontent; + private Dictionary? _vtRanges; private string PlainText { @@ -110,6 +112,30 @@ namespace System.Management.Automation.Internal // replace regex with .NET 6 API once available internal static readonly Regex AnsiRegex = new Regex($"{GraphicsRegex}|{CsiRegex}", RegexOptions.Compiled); + /// + /// Get the ranges of all escape sequences in the text. + /// + /// + /// A dictionary with the key being the starting index of an escape sequence, + /// and the value being the length of the escape sequence. + /// + internal Dictionary? EscapeSequenceRanges + { + get + { + if (_isDecorated && _vtRanges is null) + { + _vtRanges = new Dictionary(); + foreach (Match match in AnsiRegex.Matches(_text)) + { + _vtRanges.Add(match.Index, match.Length); + } + } + + return _vtRanges; + } + } + /// /// Initializes a new instance of the struct. /// @@ -118,7 +144,8 @@ namespace System.Management.Automation.Internal { _text = text; _isDecorated = text.Contains(ESC); - _plaintextcontent = null; + _plaintextcontent = _isDecorated ? null : text; + _vtRanges = null; } /// diff --git a/src/System.Management.Automation/utils/StringUtil.cs b/src/System.Management.Automation/utils/StringUtil.cs index ed08f425ea..6bdedd03a3 100644 --- a/src/System.Management.Automation/utils/StringUtil.cs +++ b/src/System.Management.Automation/utils/StringUtil.cs @@ -157,22 +157,16 @@ namespace System.Management.Automation.Internal if (valueStrDec.IsDecorated) { // Handle strings with VT sequences. - var sb = new StringBuilder(capacity: str.Length); bool copyStarted = startOffset == 0; bool hasEscSeqs = false; bool firstNonEscChar = true; - - // Find all escape sequences in the string, and keep track of their starting indexes and length. - var ansiRanges = new Dictionary(); - foreach (Match match in ValueStringDecorated.AnsiRegex.Matches(str)) - { - ansiRanges.Add(match.Index, match.Length); - } + StringBuilder sb = new(capacity: str.Length); + Dictionary vtRanges = valueStrDec.EscapeSequenceRanges; for (int i = 0, offset = 0; i < str.Length; i++) { // Keep all leading ANSI escape sequences. - if (ansiRanges.TryGetValue(i, out int len)) + if (vtRanges.TryGetValue(i, out int len)) { hasEscSeqs = true; sb.Append(str.AsSpan(i, len)); diff --git a/stylecop.json b/stylecop.json index 77517f2095..9653436690 100644 --- a/stylecop.json +++ b/stylecop.json @@ -25,7 +25,7 @@ }, "namingRules" : { "allowCommonHungarianPrefixes" : true, - "allowedHungarianPrefixes" : [ "n", "r", "l", "i", "io", "is", "fs", "lp", "dw", "h", "rs", "ps", "op", "sb", "my" ] + "allowedHungarianPrefixes" : [ "n", "r", "l", "i", "io", "is", "fs", "lp", "dw", "h", "rs", "ps", "op", "sb", "my", "vt" ] }, "maintainabilityRules" : { "topLevelTypes" : [ diff --git a/test/powershell/engine/Formatting/PSStyle.Tests.ps1 b/test/powershell/engine/Formatting/PSStyle.Tests.ps1 index 24343537e8..aa54a63dc0 100644 --- a/test/powershell/engine/Formatting/PSStyle.Tests.ps1 +++ b/test/powershell/engine/Formatting/PSStyle.Tests.ps1 @@ -342,4 +342,42 @@ Billy Bob… Senior DevOps … 13 $text = Get-Content $outFile -Raw $text.Trim().Replace("`r", "") | Should -BeExactly $expected.Replace("`r", "") } + + It "Word wrapping for string with escape sequences" { + $expected = @" +`e[32;1mLongDescription : `e[0m`e[33mPowerShell `e[0m + `e[33mscripting `e[0m + `e[33mlanguage`e[0m +"@ + $obj = [pscustomobject] @{ LongDescription = "`e[33mPowerShell scripting language" } + $obj | Format-List | Out-String -Width 35 | Out-File $outFile + + $text = Get-Content $outFile -Raw + $text.Trim().Replace("`r", "") | Should -BeExactly $expected.Replace("`r", "") + } + + It "Splitting multi-line string with escape sequences" { + $expected = @" +`e[32;1mb : `e[0m`e[33mPowerShell is a task automation and configuration management program from Microsoft,`e[0m + `e[33mconsisting of a command-line shell and the associated scripting language`e[0m +"@ + $obj = [pscustomobject] @{ b = "`e[33mPowerShell is a task automation and configuration management program from Microsoft,`nconsisting of a command-line shell and the associated scripting language" } + $obj | Format-List | Out-File $outFile + + $text = Get-Content $outFile -Raw + $text.Trim().Replace("`r", "") | Should -BeExactly $expected.Replace("`r", "") + } + + It "Wrapping long word with escape sequences" { + $expected = @" +`e[32;1mb : `e[0m`e[33mC:\repos\PowerShell\src\powershell-w`e[0m + `e[33min-core\bin\Debug\net7.0\win7-x64\pu`e[0m + `e[33mblish\pwsh.exe`e[0m +"@ + $obj = [pscustomobject] @{ b = "`e[33mC:\repos\PowerShell\src\powershell-win-core\bin\Debug\net7.0\win7-x64\publish\pwsh.exe" } + $obj | Format-List | Out-String -Width 40 | Out-File $outFile + + $text = Get-Content $outFile -Raw + $text.Trim().Replace("`r", "") | Should -BeExactly $expected.Replace("`r", "") + } }