sed: fix handling of escaped delimiters in s/// search pattern, closes 14541

function                                             old     new   delta
copy_parsing_escapes                                  67      96     +29
parse_regex_delim                                    109     111      +2
get_address                                          213     215      +2
add_cmd                                             1176    1178      +2
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 4/0 up/down: 35/0)               Total: 35 bytes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
Denys Vlasenko 2022-01-23 18:48:49 +01:00
parent 33a9f34df5
commit e998c7c032
2 changed files with 21 additions and 8 deletions

View File

@ -246,7 +246,6 @@ static void cleanup_outname(void)
} }
/* strcpy, replacing "\from" with 'to'. If to is NUL, replacing "\any" with 'any' */ /* strcpy, replacing "\from" with 'to'. If to is NUL, replacing "\any" with 'any' */
static unsigned parse_escapes(char *dest, const char *string, int len, char from, char to) static unsigned parse_escapes(char *dest, const char *string, int len, char from, char to)
{ {
char *d = dest; char *d = dest;
@ -276,7 +275,7 @@ static unsigned parse_escapes(char *dest, const char *string, int len, char from
return d - dest; return d - dest;
} }
static char *copy_parsing_escapes(const char *string, int len) static char *copy_parsing_escapes(const char *string, int len, char delim)
{ {
const char *s; const char *s;
char *dest = xmalloc(len + 1); char *dest = xmalloc(len + 1);
@ -287,10 +286,15 @@ static char *copy_parsing_escapes(const char *string, int len)
len = parse_escapes(dest, string, len, s[1], s[0]); len = parse_escapes(dest, string, len, s[1], s[0]);
string = dest; string = dest;
} }
if (delim) {
/* we additionally unescape any instances of escaped delimiter.
* For example, in 's+9\++X+' the pattern is "9+", not "9\+".
*/
len = parse_escapes(dest, string, len, delim, delim);
}
return dest; return dest;
} }
/* /*
* index_of_next_unescaped_regexp_delim - walks left to right through a string * index_of_next_unescaped_regexp_delim - walks left to right through a string
* beginning at a specified index and returns the index of the next regular * beginning at a specified index and returns the index of the next regular
@ -347,12 +351,11 @@ static int parse_regex_delim(const char *cmdstr, char **match, char **replace)
/* save the match string */ /* save the match string */
idx = index_of_next_unescaped_regexp_delim(delimiter, cmdstr_ptr); idx = index_of_next_unescaped_regexp_delim(delimiter, cmdstr_ptr);
*match = copy_parsing_escapes(cmdstr_ptr, idx); *match = copy_parsing_escapes(cmdstr_ptr, idx, delimiter);
/* save the replacement string */ /* save the replacement string */
cmdstr_ptr += idx + 1; cmdstr_ptr += idx + 1;
idx = index_of_next_unescaped_regexp_delim(- (int)delimiter, cmdstr_ptr); idx = index_of_next_unescaped_regexp_delim(- (int)delimiter, cmdstr_ptr);
*replace = copy_parsing_escapes(cmdstr_ptr, idx); *replace = copy_parsing_escapes(cmdstr_ptr, idx, 0);
return ((cmdstr_ptr - cmdstr) + idx); return ((cmdstr_ptr - cmdstr) + idx);
} }
@ -380,7 +383,7 @@ static int get_address(const char *my_str, int *linenum, regex_t ** regex)
delimiter = *++pos; delimiter = *++pos;
next = index_of_next_unescaped_regexp_delim(delimiter, ++pos); next = index_of_next_unescaped_regexp_delim(delimiter, ++pos);
if (next != 0) { if (next != 0) {
temp = copy_parsing_escapes(pos, next); temp = copy_parsing_escapes(pos, next, 0);
G.previous_regex_ptr = *regex = xzalloc(sizeof(regex_t)); G.previous_regex_ptr = *regex = xzalloc(sizeof(regex_t));
xregcomp(*regex, temp, G.regex_type); xregcomp(*regex, temp, G.regex_type);
free(temp); free(temp);
@ -575,7 +578,7 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr)
cmdstr++; cmdstr++;
} }
len = strlen(cmdstr); len = strlen(cmdstr);
sed_cmd->string = copy_parsing_escapes(cmdstr, len); sed_cmd->string = copy_parsing_escapes(cmdstr, len, 0);
cmdstr += len; cmdstr += len;
/* "\anychar" -> "anychar" */ /* "\anychar" -> "anychar" */
parse_escapes(sed_cmd->string, sed_cmd->string, -1, '\0', '\0'); parse_escapes(sed_cmd->string, sed_cmd->string, -1, '\0', '\0');

View File

@ -324,6 +324,16 @@ testing "sed zero chars match/replace logic must not falsely trigger here 2" \
"sed 's/ *$/_/g'" \ "sed 's/ *$/_/g'" \
"qwerty_\n" "" "qwerty\n" "qwerty_\n" "" "qwerty\n"
# the pattern here is interpreted as "9+", not as "9\+"
testing "sed special char as s/// delimiter, in pattern" \
"sed 's+9\++X+'" \
"X8=17\n" "" "9+8=17\n"
# but in replacement string, "\&" remains "\&", not interpreted as "&"
testing "sed special char as s/// delimiter, in replacement" \
"sed 's&9&X\&&'" \
"X&+8=17\n" "" "9+8=17\n"
testing "sed /\$_in_regex/ should not match newlines, only end-of-line" \ testing "sed /\$_in_regex/ should not match newlines, only end-of-line" \
"sed ': testcont; /\\\\$/{ =; N; b testcont }'" \ "sed ': testcont; /\\\\$/{ =; N; b testcont }'" \
"\ "\