diff --git a/ChangeLog b/ChangeLog index 2ef08f0ed1..2ee6a12704 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,15 @@ +2018-08-10 Paul Eggert + + regex: Gnulib unibyte RRI uses bytes not chars + Adjust the non-glibc code to agree with what Gawk needs for + rational range interpretation (RRI) for regular expression ranges. + In unibyte locales, Gawk wants ranges to use the underlying byte + rather than the character code point. This change does not affect + glibc proper. + * posix/regcomp.c (parse_byte) [!LIBC && RE_ENABLE_I18N]: + In unibyte locales, use the byte value rather than + running it through btowc. + 2018-08-10 Joseph Myers * sysdeps/generic/math-tests-snan.h: New file. diff --git a/posix/regcomp.c b/posix/regcomp.c index 3b0a3c6b6a..e81652f229 100644 --- a/posix/regcomp.c +++ b/posix/regcomp.c @@ -2684,15 +2684,14 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa, # ifdef RE_ENABLE_I18N /* Convert the byte B to the corresponding wide character. In a - unibyte locale, treat B as itself if it is an encoding error. - In a multibyte locale, return WEOF if B is an encoding error. */ + unibyte locale, treat B as itself. In a multibyte locale, return + WEOF if B is an encoding error. */ static wint_t parse_byte (unsigned char b, re_charset_t *mbcset) { - wint_t wc = __btowc (b); - return wc == WEOF && !mbcset ? b : wc; + return mbcset == NULL ? b : __btowc (b); } -#endif +# endif /* Local function for parse_bracket_exp only used in case of NOT _LIBC. Build the range expression which starts from START_ELEM, and ends