From 69623c0db0a540f26ee537bae09446d3dcdf1f80 Mon Sep 17 00:00:00 2001 From: DJ Delorie Date: Wed, 1 Sep 2021 13:17:34 -0400 Subject: [PATCH] posix: remove some iso-8859-encoded characters With the increasing adoption of UTF-8, modern editors may (will?) replace iso-8859-encoded characters in the range 0x80..0xff with their UTF-8 equivalent, as will mailers and other tools. This breaks our testsuite and corrupts patches. So, this patch starts replacing these problematic characters with \OCTal sequences instead (adding support for those in tst-fnmatch.c) or with plain ASCII characters (PTESTS). Reviewed-by: Carlos O'Donell --- posix/PTESTS | 548 ++++++++++++++++++++-------------------- posix/PTESTS2C.sed | 4 +- posix/bug-regex1.c | 4 +- posix/tst-fnmatch.c | 13 + posix/tst-fnmatch.input | 124 ++++----- 5 files changed, 357 insertions(+), 336 deletions(-) diff --git a/posix/PTESTS b/posix/PTESTS index 02b357cf2e..65e5084915 100644 --- a/posix/PTESTS +++ b/posix/PTESTS @@ -1,341 +1,347 @@ +# Future self: the delimiter is an ASCII vertical bar, which is also a +# REGEX special character, but hadn't already been used. Nearly every +# other plain ASCII character had been used by a test. Characters +# outside the plain ASCII range have a risk of being mangled by modern +# editors. So, avoid using | in a test, or if needed, select a new +# delimeter. # 2.8.2 Regular Expression General Requirement -2¦4¦bb*¦abbbc¦ -2¦2¦bb*¦ababbbc¦ -7¦9¦A#*::¦A:A#:qA::qA#::qA##::q¦ -1¦5¦A#*::¦A##::A#::qA::qA#:q¦ +2|4|bb*|abbbc| +2|2|bb*|ababbbc| +7|9|A#*::|A:A#:qA::qA#::qA##::q| +1|5|A#*::|A##::A#::qA::qA#:q| # 2.8.3.1.2 BRE Special Characters # GA108 -2¦2¦\.¦a.c¦ -2¦2¦\[¦a[c¦ -2¦2¦\\¦a\c¦ -2¦2¦\*¦a*c¦ -2¦2¦\^¦a^c¦ -2¦2¦\$¦a$c¦ -7¦11¦X\*Y\*8¦Y*8X*8X*Y*8¦ +2|2|\.|a.c| +2|2|\[|a[c| +2|2|\\|a\c| +2|2|\*|a*c| +2|2|\^|a^c| +2|2|\$|a$c| +7|11|X\*Y\*8|Y*8X*8X*Y*8| # GA109 -2¦2¦[.]¦a.c¦ -2¦2¦[[]¦a[c¦ --1¦-1¦[[]¦ac¦ -2¦2¦[\]¦a\c¦ -1¦1¦[\a]¦abc¦ -2¦2¦[\.]¦a\.c¦ -2¦2¦[\.]¦a.\c¦ -2¦2¦[*]¦a*c¦ -2¦2¦[$]¦a$c¦ -2¦2¦[X*Y8]¦7*8YX¦ +2|2|[.]|a.c| +2|2|[[]|a[c| +-1|-1|[[]|ac| +2|2|[\]|a\c| +1|1|[\a]|abc| +2|2|[\.]|a\.c| +2|2|[\.]|a.\c| +2|2|[*]|a*c| +2|2|[$]|a$c| +2|2|[X*Y8]|7*8YX| # GA110 -2¦2¦*¦a*c¦ -3¦4¦*a¦*b*a*c¦ -1¦5¦**9=¦***9=9¦ +2|2|*|a*c| +3|4|*a|*b*a*c| +1|5|**9=|***9=9| # GA111 -1¦1¦^*¦*bc¦ --1¦-1¦^*¦a*c¦ --1¦-1¦^*¦^*ab¦ -1¦5¦^**9=¦***9=¦ --1¦-1¦^*5<*9¦5<9*5<*9¦ +1|1|^*|*bc| +-1|-1|^*|a*c| +-1|-1|^*|^*ab| +1|5|^**9=|***9=| +-1|-1|^*5<*9|5<9*5<*9| # GA112 -2¦3¦\(*b\)¦a*b¦ --1¦-1¦\(*b\)¦ac¦ -1¦6¦A\(**9\)=¦A***9=79¦ +2|3|\(*b\)|a*b| +-1|-1|\(*b\)|ac| +1|6|A\(**9\)=|A***9=79| # GA113(1) -1¦3¦\(^*ab\)¦*ab¦ --1¦-1¦\(^*ab\)¦^*ab¦ --1¦-1¦\(^*b\)¦a*b¦ --1¦-1¦\(^*b\)¦^*b¦ +1|3|\(^*ab\)|*ab| +-1|-1|\(^*ab\)|^*ab| +-1|-1|\(^*b\)|a*b| +-1|-1|\(^*b\)|^*b| ### GA113(2) GNU regex implements GA113(1) -##-1¦-1¦\(^*ab\)¦*ab¦ -##-1¦-1¦\(^*ab\)¦^*ab¦ -##1¦1¦\(^*b\)¦b¦ -##1¦3¦\(^*b\)¦^^b¦ +##-1|-1|\(^*ab\)|*ab| +##-1|-1|\(^*ab\)|^*ab| +##1|1|\(^*b\)|b| +##1|3|\(^*b\)|^^b| # GA114 -1¦3¦a^b¦a^b¦ -1¦3¦a\^b¦a^b¦ -1¦1¦^^¦^bc¦ -2¦2¦\^¦a^c¦ -1¦1¦[c^b]¦^abc¦ -1¦1¦[\^ab]¦^ab¦ -2¦2¦[\^ab]¦c\d¦ --1¦-1¦[^^]¦^¦ -1¦3¦\(a^b\)¦a^b¦ -1¦3¦\(a\^b\)¦a^b¦ -2¦2¦\(\^\)¦a^b¦ +1|3|a^b|a^b| +1|3|a\^b|a^b| +1|1|^^|^bc| +2|2|\^|a^c| +1|1|[c^b]|^abc| +1|1|[\^ab]|^ab| +2|2|[\^ab]|c\d| +-1|-1|[^^]|^| +1|3|\(a^b\)|a^b| +1|3|\(a\^b\)|a^b| +2|2|\(\^\)|a^b| # GA115 -3¦3¦$$¦ab$¦ --1¦-1¦$$¦$ab¦ -2¦3¦$c¦a$c¦ -2¦2¦[$]¦a$c¦ -1¦2¦\$a¦$a¦ -3¦3¦\$$¦ab$¦ -2¦6¦A\([34]$[34]\)B¦XA4$3BY¦ +3|3|$$|ab$| +-1|-1|$$|$ab| +2|3|$c|a$c| +2|2|[$]|a$c| +1|2|\$a|$a| +3|3|\$$|ab$| +2|6|A\([34]$[34]\)B|XA4$3BY| # 2.8.3.1.3 Periods in BREs # GA116 -1¦1¦.¦abc¦ --1¦-1¦.ab¦abc¦ -1¦3¦ab.¦abc¦ -1¦3¦a.b¦a,b¦ --1¦-1¦.......¦PqRs6¦ -1¦7¦.......¦PqRs6T8¦ +1|1|.|abc| +-1|-1|.ab|abc| +1|3|ab.|abc| +1|3|a.b|a,b| +-1|-1|.......|PqRs6| +1|7|.......|PqRs6T8| # 2.8.3.2 RE Bracket Expression # GA118 -2¦2¦[abc]¦xbyz¦ --1¦-1¦[abc]¦xyz¦ -2¦2¦[abc]¦xbay¦ +2|2|[abc]|xbyz| +-1|-1|[abc]|xyz| +2|2|[abc]|xbay| # GA119 -2¦2¦[^a]¦abc¦ -4¦4¦[^]cd]¦cd]ef¦ -2¦2¦[^abc]¦axyz¦ --1¦-1¦[^abc]¦abc¦ -3¦3¦[^[.a.]b]¦abc¦ -3¦3¦[^[=a=]b]¦abc¦ -2¦2¦[^-ac]¦abcde-¦ -2¦2¦[^ac-]¦abcde-¦ -3¦3¦[^a-b]¦abcde¦ -3¦3¦[^a-bd-e]¦dec¦ -2¦2¦[^---]¦-ab¦ -16¦16¦[^a-zA-Z0-9]¦pqrstVWXYZ23579#¦ +2|2|[^a]|abc| +4|4|[^]cd]|cd]ef| +2|2|[^abc]|axyz| +-1|-1|[^abc]|abc| +3|3|[^[.a.]b]|abc| +3|3|[^[=a=]b]|abc| +2|2|[^-ac]|abcde-| +2|2|[^ac-]|abcde-| +3|3|[^a-b]|abcde| +3|3|[^a-bd-e]|dec| +2|2|[^---]|-ab| +16|16|[^a-zA-Z0-9]|pqrstVWXYZ23579#| # GA120(1) -3¦3¦[]a]¦cd]ef¦ -1¦1¦[]-a]¦a_b¦ -3¦3¦[][.-.]-0]¦ab0-]¦ -1¦1¦[]^a-z]¦string¦ +3|3|[]a]|cd]ef| +1|1|[]-a]|a_b| +3|3|[][.-.]-0]|ab0-]| +1|1|[]^a-z]|string| # GA120(2) -4¦4¦[^]cd]¦cd]ef¦ -0¦0¦[^]]*¦]]]]]]]]X¦ -0¦0¦[^]]*¦]]]]]]]]¦ -9¦9¦[^]]\{1,\}¦]]]]]]]]X¦ --1¦-1¦[^]]\{1,\}¦]]]]]]]]¦ +4|4|[^]cd]|cd]ef| +0|0|[^]]*|]]]]]]]]X| +0|0|[^]]*|]]]]]]]]| +9|9|[^]]\{1,\}|]]]]]]]]X| +-1|-1|[^]]\{1,\}|]]]]]]]]| # GA120(3) -3¦3¦[c[.].]d]¦ab]cd¦ -2¦8¦[a-z]*[[.].]][A-Z]*¦Abcd]DEFg¦ +3|3|[c[.].]d]|ab]cd| +2|8|[a-z]*[[.].]][A-Z]*|Abcd]DEFg| # GA121 -2¦2¦[[.a.]b]¦Abc¦ -1¦1¦[[.a.]b]¦aBc¦ --1¦-1¦[[.a.]b]¦ABc¦ -3¦3¦[^[.a.]b]¦abc¦ -3¦3¦[][.-.]-0]¦ab0-]¦ -3¦3¦[A-[.].]c]¦ab]!¦ +2|2|[[.a.]b]|Abc| +1|1|[[.a.]b]|aBc| +-1|-1|[[.a.]b]|ABc| +3|3|[^[.a.]b]|abc| +3|3|[][.-.]-0]|ab0-]| +3|3|[A-[.].]c]|ab]!| # GA122 --2¦-2¦[[.ch.]]¦abc¦ --2¦-2¦[[.ab.][.CD.][.EF.]]¦yZabCDEFQ9¦ +-2|-2|[[.ch.]]|abc| +-2|-2|[[.ab.][.CD.][.EF.]]|yZabCDEFQ9| # GA125 -2¦2¦[[=a=]b]¦Abc¦ -1¦1¦[[=a=]b]¦aBc¦ --1¦-1¦[[=a=]b]¦ABc¦ -3¦3¦[^[=a=]b]¦abc¦ +2|2|[[=a=]b]|Abc| +1|1|[[=a=]b]|aBc| +-1|-1|[[=a=]b]|ABc| +3|3|[^[=a=]b]|abc| # GA126 #W the expected result for [[:alnum:]]* is 2-7 which is wrong -0¦0¦[[:alnum:]]*¦ aB28gH¦ -2¦7¦[[:alnum:]][[:alnum:]]*¦ aB28gH¦ +0|0|[[:alnum:]]*| aB28gH| +2|7|[[:alnum:]][[:alnum:]]*| aB28gH| #W the expected result for [^[:alnum:]]* is 2-5 which is wrong -0¦0¦[^[:alnum:]]*¦2 ,a¦ -2¦5¦[^[:alnum:]][^[:alnum:]]*¦2 ,a¦ +0|0|[^[:alnum:]]*|2 ,a| +2|5|[^[:alnum:]][^[:alnum:]]*|2 ,a| #W the expected result for [[:alpha:]]* is 2-5 which is wrong -0¦0¦[[:alpha:]]*¦ aBgH2¦ -2¦5¦[[:alpha:]][[:alpha:]]*¦ aBgH2¦ -1¦6¦[^[:alpha:]]*¦2 8,a¦ -1¦2¦[[:blank:]]*¦ ¦ -1¦8¦[^[:blank:]]*¦aB28gH, ¦ -1¦2¦[[:cntrl:]]*¦  ¦ -1¦8¦[^[:cntrl:]]*¦aB2 8gh,¦ +0|0|[[:alpha:]]*| aBgH2| +2|5|[[:alpha:]][[:alpha:]]*| aBgH2| +1|6|[^[:alpha:]]*|2 8,a| +1|2|[[:blank:]]*| | +1|8|[^[:blank:]]*|aB28gH, | +1|2|[[:cntrl:]]*|  | +1|8|[^[:cntrl:]]*|aB2 8gh,| #W the expected result for [[:digit:]]* is 2-3 which is wrong -0¦0¦[[:digit:]]*¦a28¦ -2¦3¦[[:digit:]][[:digit:]]*¦a28¦ -1¦8¦[^[:digit:]]*¦aB gH,¦ -1¦7¦[[:graph:]]*¦aB28gH, ¦ -1¦3¦[^[:graph:]]*¦ ,¦ -1¦2¦[[:lower:]]*¦agB¦ -1¦8¦[^[:lower:]]*¦B2 8H,a¦ -1¦8¦[[:print:]]*¦aB2 8gH, ¦ -1¦2¦[^[:print:]]*¦  ¦ +0|0|[[:digit:]]*|a28| +2|3|[[:digit:]][[:digit:]]*|a28| +1|8|[^[:digit:]]*|aB gH,| +1|7|[[:graph:]]*|aB28gH, | +1|3|[^[:graph:]]*| ,| +1|2|[[:lower:]]*|agB| +1|8|[^[:lower:]]*|B2 8H,a| +1|8|[[:print:]]*|aB2 8gH, | +1|2|[^[:print:]]*|  | #W the expected result for [[:punct:]]* is 2-2 which is wrong -0¦0¦[[:punct:]]*¦a,2¦ -2¦3¦[[:punct:]][[:punct:]]*¦a,,2¦ -1¦9¦[^[:punct:]]*¦aB2 8gH¦ -1¦3¦[[:space:]]*¦ ¦ +0|0|[[:punct:]]*|a,2| +2|3|[[:punct:]][[:punct:]]*|a,,2| +1|9|[^[:punct:]]*|aB2 8gH| +1|3|[[:space:]]*| | #W the expected result for [^[:space:]]* is 2-9 which is wrong -0¦0¦[^[:space:]]*¦ aB28gH, ¦ -2¦9¦[^[:space:]][^[:space:]]*¦ aB28gH, ¦ +0|0|[^[:space:]]*| aB28gH, | +2|9|[^[:space:]][^[:space:]]*| aB28gH, | #W the expected result for [[:upper:]]* is 2-3 which is wrong -0¦0¦[[:upper:]]*¦aBH2¦ -2¦3¦[[:upper:]][[:upper:]]*¦aBH2¦ -1¦8¦[^[:upper:]]*¦a2 8g,B¦ +0|0|[[:upper:]]*|aBH2| +2|3|[[:upper:]][[:upper:]]*|aBH2| +1|8|[^[:upper:]]*|a2 8g,B| #W the expected result for [[:xdigit:]]* is 2-5 which is wrong -0¦0¦[[:xdigit:]]*¦gaB28h¦ -2¦5¦[[:xdigit:]][[:xdigit:]]*¦gaB28h¦ +0|0|[[:xdigit:]]*|gaB28h| +2|5|[[:xdigit:]][[:xdigit:]]*|gaB28h| #W the expected result for [^[:xdigit:]]* is 2-7 which is wrong -2¦7¦[^[:xdigit:]][^[:xdigit:]]*¦a gH,2¦ +2|7|[^[:xdigit:]][^[:xdigit:]]*|a gH,2| # GA127 --2¦-2¦[b-a]¦abc¦ -1¦1¦[a-c]¦bbccde¦ -2¦2¦[a-b]¦-bc¦ -3¦3¦[a-z0-9]¦AB0¦ -3¦3¦[^a-b]¦abcde¦ -3¦3¦[^a-bd-e]¦dec¦ -1¦1¦[]-a]¦a_b¦ -2¦2¦[+--]¦a,b¦ -2¦2¦[--/]¦a.b¦ -2¦2¦[^---]¦-ab¦ -3¦3¦[][.-.]-0]¦ab0-]¦ -3¦3¦[A-[.].]c]¦ab]!¦ -2¦6¦bc[d-w]xy¦abchxyz¦ +-2|-2|[b-a]|abc| +1|1|[a-c]|bbccde| +2|2|[a-b]|-bc| +3|3|[a-z0-9]|AB0| +3|3|[^a-b]|abcde| +3|3|[^a-bd-e]|dec| +1|1|[]-a]|a_b| +2|2|[+--]|a,b| +2|2|[--/]|a.b| +2|2|[^---]|-ab| +3|3|[][.-.]-0]|ab0-]| +3|3|[A-[.].]c]|ab]!| +2|6|bc[d-w]xy|abchxyz| # GA129 -1¦1¦[a-cd-f]¦dbccde¦ --1¦-1¦[a-ce-f]¦dBCCdE¦ -2¦4¦b[n-zA-M]Y¦absY9Z¦ -2¦4¦b[n-zA-M]Y¦abGY9Z¦ +1|1|[a-cd-f]|dbccde| +-1|-1|[a-ce-f]|dBCCdE| +2|4|b[n-zA-M]Y|absY9Z| +2|4|b[n-zA-M]Y|abGY9Z| # GA130 -3¦3¦[-xy]¦ac-¦ -2¦4¦c[-xy]D¦ac-D+¦ -2¦2¦[--/]¦a.b¦ -2¦4¦c[--/]D¦ac.D+b¦ -2¦2¦[^-ac]¦abcde-¦ -1¦3¦a[^-ac]c¦abcde-¦ -3¦3¦[xy-]¦zc-¦ -2¦4¦c[xy-]7¦zc-786¦ -2¦2¦[^ac-]¦abcde-¦ -2¦4¦a[^ac-]c¦5abcde-¦ -2¦2¦[+--]¦a,b¦ -2¦4¦a[+--]B¦Xa,By¦ -2¦2¦[^---]¦-ab¦ -4¦6¦X[^---]Y¦X-YXaYXbY¦ +3|3|[-xy]|ac-| +2|4|c[-xy]D|ac-D+| +2|2|[--/]|a.b| +2|4|c[--/]D|ac.D+b| +2|2|[^-ac]|abcde-| +1|3|a[^-ac]c|abcde-| +3|3|[xy-]|zc-| +2|4|c[xy-]7|zc-786| +2|2|[^ac-]|abcde-| +2|4|a[^ac-]c|5abcde-| +2|2|[+--]|a,b| +2|4|a[+--]B|Xa,By| +2|2|[^---]|-ab| +4|6|X[^---]Y|X-YXaYXbY| # 2.8.3.3 BREs Matching Multiple Characters # GA131 -3¦4¦cd¦abcdeabcde¦ -1¦2¦ag*b¦abcde¦ --1¦-1¦[a-c][e-f]¦abcdef¦ -3¦4¦[a-c][e-f]¦acbedf¦ -4¦8¦abc*XYZ¦890abXYZ#*¦ -4¦9¦abc*XYZ¦890abcXYZ#*¦ -4¦15¦abc*XYZ¦890abcccccccXYZ#*¦ --1¦-1¦abc*XYZ¦890abc*XYZ#*¦ +3|4|cd|abcdeabcde| +1|2|ag*b|abcde| +-1|-1|[a-c][e-f]|abcdef| +3|4|[a-c][e-f]|acbedf| +4|8|abc*XYZ|890abXYZ#*| +4|9|abc*XYZ|890abcXYZ#*| +4|15|abc*XYZ|890abcccccccXYZ#*| +-1|-1|abc*XYZ|890abc*XYZ#*| # GA132 -2¦4¦\(*bc\)¦a*bc¦ -1¦2¦\(ab\)¦abcde¦ -1¦10¦\(a\(b\(c\(d\(e\(f\(g\)h\(i\(j\)\)\)\)\)\)\)\)¦abcdefghijk¦ -3¦8¦43\(2\(6\)*0\)AB¦654320ABCD¦ -3¦9¦43\(2\(7\)*0\)AB¦6543270ABCD¦ -3¦12¦43\(2\(7\)*0\)AB¦6543277770ABCD¦ +2|4|\(*bc\)|a*bc| +1|2|\(ab\)|abcde| +1|10|\(a\(b\(c\(d\(e\(f\(g\)h\(i\(j\)\)\)\)\)\)\)\)|abcdefghijk| +3|8|43\(2\(6\)*0\)AB|654320ABCD| +3|9|43\(2\(7\)*0\)AB|6543270ABCD| +3|12|43\(2\(7\)*0\)AB|6543277770ABCD| # GA133 -1¦10¦\(a\(b\(c\(d\(e\(f\(g\)h\(i\(j\)\)\)\)\)\)\)\)¦abcdefghijk¦ --1¦-1¦\(a\(b\(c\(d\(e\(f\(g\)h\(i\(k\)\)\)\)\)\)\)\)¦abcdefghijk¦ +1|10|\(a\(b\(c\(d\(e\(f\(g\)h\(i\(j\)\)\)\)\)\)\)\)|abcdefghijk| +-1|-1|\(a\(b\(c\(d\(e\(f\(g\)h\(i\(k\)\)\)\)\)\)\)\)|abcdefghijk| # GA134 -2¦4¦\(bb*\)¦abbbc¦ -2¦2¦\(bb*\)¦ababbbc¦ -1¦6¦a\(.*b\)¦ababbbc¦ -1¦2¦a\(b*\)¦ababbbc¦ -1¦20¦a\(.*b\)c¦axcaxbbbcsxbbbbbbbbc¦ +2|4|\(bb*\)|abbbc| +2|2|\(bb*\)|ababbbc| +1|6|a\(.*b\)|ababbbc| +1|2|a\(b*\)|ababbbc| +1|20|a\(.*b\)c|axcaxbbbcsxbbbbbbbbc| # GA135 -1¦7¦\(a\(b\(c\(d\(e\)\)\)\)\)\4¦abcdededede¦ +1|7|\(a\(b\(c\(d\(e\)\)\)\)\)\4|abcdededede| #W POSIX does not really specify whether a\(b\)*c\1 matches acb. #W back references are supposed to expand to the last match, but what #W if there never was a match as in this case? --1¦-1¦a\(b\)*c\1¦acb¦ -1¦11¦\(a\(b\(c\(d\(e\(f\(g\)h\(i\(j\)\)\)\)\)\)\)\)\9¦abcdefghijjk¦ +-1|-1|a\(b\)*c\1|acb| +1|11|\(a\(b\(c\(d\(e\(f\(g\)h\(i\(j\)\)\)\)\)\)\)\)\9|abcdefghijjk| # GA136 #W These two tests have the same problem as the test in GA135. No match #W of a subexpression, why should the back reference be usable? #W 1 2 a\(b\)*c\1 acb -#W 4 7 a\(b\(c\(d\(f\)*\)\)\)\4¦xYzabcdePQRST --1¦-1¦a\(b\)*c\1¦acb¦ --1¦-1¦a\(b\(c\(d\(f\)*\)\)\)\4¦xYzabcdePQRST¦ +#W 4 7 a\(b\(c\(d\(f\)*\)\)\)\4|xYzabcdePQRST +-1|-1|a\(b\)*c\1|acb| +-1|-1|a\(b\(c\(d\(f\)*\)\)\)\4|xYzabcdePQRST| # GA137 --2¦-2¦\(a\(b\)\)\3¦foo¦ --2¦-2¦\(a\(b\)\)\(a\(b\)\)\5¦foo¦ +-2|-2|\(a\(b\)\)\3|foo| +-2|-2|\(a\(b\)\)\(a\(b\)\)\5|foo| # GA138 -1¦2¦ag*b¦abcde¦ -1¦10¦a.*b¦abababvbabc¦ -2¦5¦b*c¦abbbcdeabbbbbbcde¦ -2¦5¦bbb*c¦abbbcdeabbbbbbcde¦ -1¦5¦a\(b\)*c\1¦abbcbbb¦ --1¦-1¦a\(b\)*c\1¦abbdbd¦ -0¦0¦\([a-c]*\)\1¦abcacdef¦ -1¦6¦\([a-c]*\)\1¦abcabcabcd¦ -1¦2¦a^*b¦ab¦ -1¦5¦a^*b¦a^^^b¦ +1|2|ag*b|abcde| +1|10|a.*b|abababvbabc| +2|5|b*c|abbbcdeabbbbbbcde| +2|5|bbb*c|abbbcdeabbbbbbcde| +1|5|a\(b\)*c\1|abbcbbb| +-1|-1|a\(b\)*c\1|abbdbd| +0|0|\([a-c]*\)\1|abcacdef| +1|6|\([a-c]*\)\1|abcabcabcd| +1|2|a^*b|ab| +1|5|a^*b|a^^^b| # GA139 -1¦2¦a\{2\}¦aaaa¦ -1¦7¦\([a-c]*\)\{0,\}¦aabcaab¦ -1¦2¦\(a\)\1\{1,2\}¦aabc¦ -1¦3¦\(a\)\1\{1,2\}¦aaaabc¦ +1|2|a\{2\}|aaaa| +1|7|\([a-c]*\)\{0,\}|aabcaab| +1|2|\(a\)\1\{1,2\}|aabc| +1|3|\(a\)\1\{1,2\}|aaaabc| #W the expression \(\(a\)\1\)\{1,2\} is ill-formed, using \2 -1¦4¦\(\(a\)\2\)\{1,2\}¦aaaabc¦ +1|4|\(\(a\)\2\)\{1,2\}|aaaabc| # GA140 -1¦2¦a\{2\}¦aaaa¦ --1¦-1¦a\{2\}¦abcd¦ -0¦0¦a\{0\}¦aaaa¦ -1¦64¦a\{64\}¦aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa¦ +1|2|a\{2\}|aaaa| +-1|-1|a\{2\}|abcd| +0|0|a\{0\}|aaaa| +1|64|a\{64\}|aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa| # GA141 -1¦7¦\([a-c]*\)\{0,\}¦aabcaab¦ +1|7|\([a-c]*\)\{0,\}|aabcaab| #W the expected result for \([a-c]*\)\{2,\} is failure which isn't correct -1¦3¦\([a-c]*\)\{2,\}¦abcdefg¦ -1¦3¦\([a-c]*\)\{1,\}¦abcdefg¦ --1¦-1¦a\{64,\}¦aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa¦ +1|3|\([a-c]*\)\{2,\}|abcdefg| +1|3|\([a-c]*\)\{1,\}|abcdefg| +-1|-1|a\{64,\}|aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa| # GA142 -1¦3¦a\{2,3\}¦aaaa¦ --1¦-1¦a\{2,3\}¦abcd¦ -0¦0¦\([a-c]*\)\{0,0\}¦foo¦ -1¦63¦a\{1,63\}¦aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa¦ +1|3|a\{2,3\}|aaaa| +-1|-1|a\{2,3\}|abcd| +0|0|\([a-c]*\)\{0,0\}|foo| +1|63|a\{1,63\}|aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa| # 2.8.3.4 BRE Precedence # GA143 #W There are numerous bugs in the original version. -2¦19¦\^\[[[.].]]\\(\\1\\)\*\\{1,2\\}\$¦a^[]\(\1\)*\{1,2\}$b¦ -1¦6¦[[=*=]][[=\=]][[=]=]][[===]][[...]][[:punct:]]¦*\]=.;¦ -1¦6¦[$\(*\)^]*¦$\()*^¦ -1¦1¦[\1]¦1¦ -1¦1¦[\{1,2\}]¦{¦ +2|19|\^\[[[.].]]\\(\\1\\)\*\\{1,2\\}\$|a^[]\(\1\)*\{1,2\}$b| +1|6|[[=*=]][[=\=]][[=]=]][[===]][[...]][[:punct:]]|*\]=.;| +1|6|[$\(*\)^]*|$\()*^| +1|1|[\1]|1| +1|1|[\{1,2\}]|{| #W the expected result for \(*\)*\1* is 2-2 which isn't correct -0¦0¦\(*\)*\1*¦a*b*11¦ -2¦3¦\(*\)*\1*b¦a*b*11¦ +0|0|\(*\)*\1*|a*b*11| +2|3|\(*\)*\1*b|a*b*11| #W the expected result for \(a\(b\{1,2\}\)\{1,2\}\) is 1-5 which isn't correct -1¦3¦\(a\(b\{1,2\}\)\{1,2\}\)¦abbab¦ -1¦5¦\(a\(b\{1,2\}\)\)\{1,2\}¦abbab¦ -1¦1¦^\(^\(^a$\)$\)$¦a¦ -1¦2¦\(a\)\1$¦aa¦ -1¦3¦ab*¦abb¦ -1¦4¦ab\{2,4\}¦abbbc¦ +1|3|\(a\(b\{1,2\}\)\{1,2\}\)|abbab| +1|5|\(a\(b\{1,2\}\)\)\{1,2\}|abbab| +1|1|^\(^\(^a$\)$\)$|a| +1|2|\(a\)\1$|aa| +1|3|ab*|abb| +1|4|ab\{2,4\}|abbbc| # 2.8.3.5 BRE Expression Anchoring # GA144 -1¦1¦^a¦abc¦ --1¦-1¦^b¦abc¦ --1¦-1¦^[a-zA-Z]¦99Nine¦ -1¦4¦^[a-zA-Z]*¦Nine99¦ +1|1|^a|abc| +-1|-1|^b|abc| +-1|-1|^[a-zA-Z]|99Nine| +1|4|^[a-zA-Z]*|Nine99| # GA145(1) -1¦2¦\(^a\)\1¦aabc¦ --1¦-1¦\(^a\)\1¦^a^abc¦ -1¦2¦\(^^a\)¦^a¦ -1¦1¦\(^^\)¦^^¦ -1¦3¦\(^abc\)¦abcdef¦ --1¦-1¦\(^def\)¦abcdef¦ +1|2|\(^a\)\1|aabc| +-1|-1|\(^a\)\1|^a^abc| +1|2|\(^^a\)|^a| +1|1|\(^^\)|^^| +1|3|\(^abc\)|abcdef| +-1|-1|\(^def\)|abcdef| ### GA145(2) GNU regex implements GA145(1) -##-1¦-1¦\(^a\)\1¦aabc¦ -##1¦4¦\(^a\)\1¦^a^abc¦ -##-1¦-1¦\(^^a\)¦^a¦ -##1¦2¦\(^^\)¦^^¦ +##-1|-1|\(^a\)\1|aabc| +##1|4|\(^a\)\1|^a^abc| +##-1|-1|\(^^a\)|^a| +##1|2|\(^^\)|^^| # GA146 -3¦3¦a$¦cba¦ --1¦-1¦a$¦abc¦ -5¦7¦[a-z]*$¦99ZZxyz¦ +3|3|a$|cba| +-1|-1|a$|abc| +5|7|[a-z]*$|99ZZxyz| #W the expected result for [a-z]*$ is failure which isn't correct -10¦9¦[a-z]*$¦99ZZxyz99¦ -3¦3¦$$¦ab$¦ --1¦-1¦$$¦$ab¦ -3¦3¦\$$¦ab$¦ +10|9|[a-z]*$|99ZZxyz99| +3|3|$$|ab$| +-1|-1|$$|$ab| +3|3|\$$|ab$| # GA147(1) --1¦-1¦\(a$\)\1¦bcaa¦ --1¦-1¦\(a$\)\1¦ba$¦ --1¦-1¦\(ab$\)¦ab$¦ -1¦2¦\(ab$\)¦ab¦ -4¦6¦\(def$\)¦abcdef¦ --1¦-1¦\(abc$\)¦abcdef¦ +-1|-1|\(a$\)\1|bcaa| +-1|-1|\(a$\)\1|ba$| +-1|-1|\(ab$\)|ab$| +1|2|\(ab$\)|ab| +4|6|\(def$\)|abcdef| +-1|-1|\(abc$\)|abcdef| ### GA147(2) GNU regex implements GA147(1) -##-1¦-1¦\(a$\)\1¦bcaa¦ -##2¦5¦\(a$\)\1¦ba$a$¦ -##-1¦-1¦\(ab$\)¦ab¦ -##1¦3¦\(ab$\)¦ab$¦ +##-1|-1|\(a$\)\1|bcaa| +##2|5|\(a$\)\1|ba$a$| +##-1|-1|\(ab$\)|ab| +##1|3|\(ab$\)|ab$| # GA148 -0¦0¦^$¦¦ -1¦3¦^abc$¦abc¦ --1¦-1¦^xyz$¦^xyz^¦ --1¦-1¦^234$¦^234$¦ -1¦9¦^[a-zA-Z0-9]*$¦2aA3bB9zZ¦ --1¦-1¦^[a-z0-9]*$¦2aA3b#B9zZ¦ +0|0|^$|| +1|3|^abc$|abc| +-1|-1|^xyz$|^xyz^| +-1|-1|^234$|^234$| +1|9|^[a-zA-Z0-9]*$|2aA3bB9zZ| +-1|-1|^[a-z0-9]*$|2aA3b#B9zZ| diff --git a/posix/PTESTS2C.sed b/posix/PTESTS2C.sed index b6850a3754..500b4ec21c 100644 --- a/posix/PTESTS2C.sed +++ b/posix/PTESTS2C.sed @@ -1,6 +1,8 @@ +# Future self: the vertical bar is being used here as a delimiter in +# the input file, not in the usual alternate-choice regex meaning. /^##/d s/^# \(.*\)/ { 0, 0, "\1", NULL, },/ s/^#W \(.*\)/ { 0, 0, NULL, "\1" },/ -s/\([^¦]*\)¦\([^¦]*\)¦\([^¦]*\)¦\([^¦]*\)¦\(.*\)/ { \1, \2, "\3", "\4", \5 },/ +s/\([^|]*\)|\([^|]*\)|\([^|]*\)|\([^|]*\)|\(.*\)/ { \1, \2, "\3", "\4", \5 },/ s/\\/\\\\/g s/ /\\r/g diff --git a/posix/bug-regex1.c b/posix/bug-regex1.c index 38eb543951..4432a90b81 100644 --- a/posix/bug-regex1.c +++ b/posix/bug-regex1.c @@ -23,7 +23,7 @@ main (void) puts ("in C locale"); setlocale (LC_ALL, "C"); - s = re_compile_pattern ("[anů]*n", 7, ®ex); + s = re_compile_pattern ("[an\371]*n", 7, ®ex); if (s != NULL) { puts ("re_compile_pattern return non-NULL value"); @@ -43,7 +43,7 @@ main (void) puts ("in de_DE.ISO-8859-1 locale"); setlocale (LC_ALL, "de_DE.ISO-8859-1"); - s = re_compile_pattern ("[anů]*n", 7, ®ex); + s = re_compile_pattern ("[an\371]*n", 7, ®ex); if (s != NULL) { puts ("re_compile_pattern return non-NULL value"); diff --git a/posix/tst-fnmatch.c b/posix/tst-fnmatch.c index 7e1b28832a..670a3c3c44 100644 --- a/posix/tst-fnmatch.c +++ b/posix/tst-fnmatch.c @@ -193,6 +193,19 @@ next_input (char **line, int first, int last) *wp++ = '\t'; else if (*cp == 'n') *wp++ = '\n'; + else if (*cp >= '0' && *cp <= '7') + { + int ndigits = 0; + int cval = 0; + while (ndigits < 3 && *cp >= '0' && *cp <= '7') + { + cval *= 8; + cval += (*cp++) - '0'; + ndigits ++; + } + *wp++ = cval; + --cp; + } else *wp++ = *cp; diff --git a/posix/tst-fnmatch.input b/posix/tst-fnmatch.input index 67aac5aada..9d071683dd 100644 --- a/posix/tst-fnmatch.input +++ b/posix/tst-fnmatch.input @@ -477,90 +477,90 @@ C "-" "[Z-\\]]" NOMATCH # handling of ranges and the recognition of character (vs bytes). de_DE.ISO-8859-1 "a" "[a-z]" 0 de_DE.ISO-8859-1 "z" "[a-z]" 0 -de_DE.ISO-8859-1 "ä" "[a-z]" 0 -de_DE.ISO-8859-1 "ö" "[a-z]" 0 -de_DE.ISO-8859-1 "ü" "[a-z]" 0 +de_DE.ISO-8859-1 "\344" "[a-z]" 0 +de_DE.ISO-8859-1 "\366" "[a-z]" 0 +de_DE.ISO-8859-1 "\374" "[a-z]" 0 de_DE.ISO-8859-1 "A" "[a-z]" NOMATCH de_DE.ISO-8859-1 "Z" "[a-z]" NOMATCH -de_DE.ISO-8859-1 "Ä" "[a-z]" NOMATCH -de_DE.ISO-8859-1 "Ö" "[a-z]" NOMATCH -de_DE.ISO-8859-1 "Ü" "[a-z]" NOMATCH +de_DE.ISO-8859-1 "\304" "[a-z]" NOMATCH +de_DE.ISO-8859-1 "\326" "[a-z]" NOMATCH +de_DE.ISO-8859-1 "\334" "[a-z]" NOMATCH de_DE.ISO-8859-1 "a" "[A-Z]" NOMATCH de_DE.ISO-8859-1 "z" "[A-Z]" NOMATCH -de_DE.ISO-8859-1 "ä" "[A-Z]" NOMATCH -de_DE.ISO-8859-1 "ö" "[A-Z]" NOMATCH -de_DE.ISO-8859-1 "ü" "[A-Z]" NOMATCH +de_DE.ISO-8859-1 "\344" "[A-Z]" NOMATCH +de_DE.ISO-8859-1 "\366" "[A-Z]" NOMATCH +de_DE.ISO-8859-1 "\374" "[A-Z]" NOMATCH de_DE.ISO-8859-1 "A" "[A-Z]" 0 de_DE.ISO-8859-1 "Z" "[A-Z]" 0 -de_DE.ISO-8859-1 "Ä" "[A-Z]" 0 -de_DE.ISO-8859-1 "Ö" "[A-Z]" 0 -de_DE.ISO-8859-1 "Ü" "[A-Z]" 0 +de_DE.ISO-8859-1 "\304" "[A-Z]" 0 +de_DE.ISO-8859-1 "\326" "[A-Z]" 0 +de_DE.ISO-8859-1 "\334" "[A-Z]" 0 de_DE.ISO-8859-1 "a" "[[:lower:]]" 0 de_DE.ISO-8859-1 "z" "[[:lower:]]" 0 -de_DE.ISO-8859-1 "ä" "[[:lower:]]" 0 -de_DE.ISO-8859-1 "ö" "[[:lower:]]" 0 -de_DE.ISO-8859-1 "ü" "[[:lower:]]" 0 +de_DE.ISO-8859-1 "\344" "[[:lower:]]" 0 +de_DE.ISO-8859-1 "\366" "[[:lower:]]" 0 +de_DE.ISO-8859-1 "\374" "[[:lower:]]" 0 de_DE.ISO-8859-1 "A" "[[:lower:]]" NOMATCH de_DE.ISO-8859-1 "Z" "[[:lower:]]" NOMATCH -de_DE.ISO-8859-1 "Ä" "[[:lower:]]" NOMATCH -de_DE.ISO-8859-1 "Ö" "[[:lower:]]" NOMATCH -de_DE.ISO-8859-1 "Ü" "[[:lower:]]" NOMATCH +de_DE.ISO-8859-1 "\304" "[[:lower:]]" NOMATCH +de_DE.ISO-8859-1 "\326" "[[:lower:]]" NOMATCH +de_DE.ISO-8859-1 "\334" "[[:lower:]]" NOMATCH de_DE.ISO-8859-1 "a" "[[:upper:]]" NOMATCH de_DE.ISO-8859-1 "z" "[[:upper:]]" NOMATCH -de_DE.ISO-8859-1 "ä" "[[:upper:]]" NOMATCH -de_DE.ISO-8859-1 "ö" "[[:upper:]]" NOMATCH -de_DE.ISO-8859-1 "ü" "[[:upper:]]" NOMATCH +de_DE.ISO-8859-1 "\344" "[[:upper:]]" NOMATCH +de_DE.ISO-8859-1 "\366" "[[:upper:]]" NOMATCH +de_DE.ISO-8859-1 "\374" "[[:upper:]]" NOMATCH de_DE.ISO-8859-1 "A" "[[:upper:]]" 0 de_DE.ISO-8859-1 "Z" "[[:upper:]]" 0 -de_DE.ISO-8859-1 "Ä" "[[:upper:]]" 0 -de_DE.ISO-8859-1 "Ö" "[[:upper:]]" 0 -de_DE.ISO-8859-1 "Ü" "[[:upper:]]" 0 +de_DE.ISO-8859-1 "\304" "[[:upper:]]" 0 +de_DE.ISO-8859-1 "\326" "[[:upper:]]" 0 +de_DE.ISO-8859-1 "\334" "[[:upper:]]" 0 de_DE.ISO-8859-1 "a" "[[:alpha:]]" 0 de_DE.ISO-8859-1 "z" "[[:alpha:]]" 0 -de_DE.ISO-8859-1 "ä" "[[:alpha:]]" 0 -de_DE.ISO-8859-1 "ö" "[[:alpha:]]" 0 -de_DE.ISO-8859-1 "ü" "[[:alpha:]]" 0 +de_DE.ISO-8859-1 "\344" "[[:alpha:]]" 0 +de_DE.ISO-8859-1 "\366" "[[:alpha:]]" 0 +de_DE.ISO-8859-1 "\374" "[[:alpha:]]" 0 de_DE.ISO-8859-1 "A" "[[:alpha:]]" 0 de_DE.ISO-8859-1 "Z" "[[:alpha:]]" 0 -de_DE.ISO-8859-1 "Ä" "[[:alpha:]]" 0 -de_DE.ISO-8859-1 "Ö" "[[:alpha:]]" 0 -de_DE.ISO-8859-1 "Ü" "[[:alpha:]]" 0 +de_DE.ISO-8859-1 "\304" "[[:alpha:]]" 0 +de_DE.ISO-8859-1 "\326" "[[:alpha:]]" 0 +de_DE.ISO-8859-1 "\334" "[[:alpha:]]" 0 de_DE.ISO-8859-1 "a" "[[=a=]b]" 0 -de_DE.ISO-8859-1 "â" "[[=a=]b]" 0 -de_DE.ISO-8859-1 "ŕ" "[[=a=]b]" 0 -de_DE.ISO-8859-1 "á" "[[=a=]b]" 0 -de_DE.ISO-8859-1 "ä" "[[=a=]b]" 0 +de_DE.ISO-8859-1 "\342" "[[=a=]b]" 0 +de_DE.ISO-8859-1 "\340" "[[=a=]b]" 0 +de_DE.ISO-8859-1 "\341" "[[=a=]b]" 0 +de_DE.ISO-8859-1 "\344" "[[=a=]b]" 0 de_DE.ISO-8859-1 "b" "[[=a=]b]" 0 de_DE.ISO-8859-1 "c" "[[=a=]b]" NOMATCH -de_DE.ISO-8859-1 "a" "[[=â=]b]" 0 -de_DE.ISO-8859-1 "â" "[[=â=]b]" 0 -de_DE.ISO-8859-1 "ŕ" "[[=â=]b]" 0 -de_DE.ISO-8859-1 "á" "[[=â=]b]" 0 -de_DE.ISO-8859-1 "ä" "[[=â=]b]" 0 -de_DE.ISO-8859-1 "b" "[[=â=]b]" 0 -de_DE.ISO-8859-1 "c" "[[=â=]b]" NOMATCH -de_DE.ISO-8859-1 "a" "[[=ŕ=]b]" 0 -de_DE.ISO-8859-1 "â" "[[=ŕ=]b]" 0 -de_DE.ISO-8859-1 "ŕ" "[[=ŕ=]b]" 0 -de_DE.ISO-8859-1 "á" "[[=ŕ=]b]" 0 -de_DE.ISO-8859-1 "ä" "[[=ŕ=]b]" 0 -de_DE.ISO-8859-1 "b" "[[=ŕ=]b]" 0 -de_DE.ISO-8859-1 "c" "[[=ŕ=]b]" NOMATCH -de_DE.ISO-8859-1 "a" "[[=á=]b]" 0 -de_DE.ISO-8859-1 "â" "[[=á=]b]" 0 -de_DE.ISO-8859-1 "ŕ" "[[=á=]b]" 0 -de_DE.ISO-8859-1 "á" "[[=á=]b]" 0 -de_DE.ISO-8859-1 "ä" "[[=á=]b]" 0 -de_DE.ISO-8859-1 "b" "[[=á=]b]" 0 -de_DE.ISO-8859-1 "c" "[[=á=]b]" NOMATCH -de_DE.ISO-8859-1 "a" "[[=ä=]b]" 0 -de_DE.ISO-8859-1 "â" "[[=ä=]b]" 0 -de_DE.ISO-8859-1 "ŕ" "[[=ä=]b]" 0 -de_DE.ISO-8859-1 "á" "[[=ä=]b]" 0 -de_DE.ISO-8859-1 "ä" "[[=ä=]b]" 0 -de_DE.ISO-8859-1 "b" "[[=ä=]b]" 0 -de_DE.ISO-8859-1 "c" "[[=ä=]b]" NOMATCH +de_DE.ISO-8859-1 "a" "[[=\342=]b]" 0 +de_DE.ISO-8859-1 "\342" "[[=\342=]b]" 0 +de_DE.ISO-8859-1 "\340" "[[=\342=]b]" 0 +de_DE.ISO-8859-1 "\341" "[[=\342=]b]" 0 +de_DE.ISO-8859-1 "\344" "[[=\342=]b]" 0 +de_DE.ISO-8859-1 "b" "[[=\342=]b]" 0 +de_DE.ISO-8859-1 "c" "[[=\342=]b]" NOMATCH +de_DE.ISO-8859-1 "a" "[[=\340=]b]" 0 +de_DE.ISO-8859-1 "\342" "[[=\340=]b]" 0 +de_DE.ISO-8859-1 "\340" "[[=\340=]b]" 0 +de_DE.ISO-8859-1 "\341" "[[=\340=]b]" 0 +de_DE.ISO-8859-1 "\344" "[[=\340=]b]" 0 +de_DE.ISO-8859-1 "b" "[[=\340=]b]" 0 +de_DE.ISO-8859-1 "c" "[[=\340=]b]" NOMATCH +de_DE.ISO-8859-1 "a" "[[=\341=]b]" 0 +de_DE.ISO-8859-1 "\342" "[[=\341=]b]" 0 +de_DE.ISO-8859-1 "\340" "[[=\341=]b]" 0 +de_DE.ISO-8859-1 "\341" "[[=\341=]b]" 0 +de_DE.ISO-8859-1 "\344" "[[=\341=]b]" 0 +de_DE.ISO-8859-1 "b" "[[=\341=]b]" 0 +de_DE.ISO-8859-1 "c" "[[=\341=]b]" NOMATCH +de_DE.ISO-8859-1 "a" "[[=\344=]b]" 0 +de_DE.ISO-8859-1 "\342" "[[=\344=]b]" 0 +de_DE.ISO-8859-1 "\340" "[[=\344=]b]" 0 +de_DE.ISO-8859-1 "\341" "[[=\344=]b]" 0 +de_DE.ISO-8859-1 "\344" "[[=\344=]b]" 0 +de_DE.ISO-8859-1 "b" "[[=\344=]b]" 0 +de_DE.ISO-8859-1 "c" "[[=\344=]b]" NOMATCH de_DE.ISO-8859-1 "aa" "[[.a.]]a" 0 de_DE.ISO-8859-1 "ba" "[[.a.]]a" NOMATCH