mirror of
https://sourceware.org/git/glibc.git
synced 2025-01-21 07:13:35 +08:00
(collate_output): Update.
* locale/programs/ld-collate.c (collate_output): Emit correct information for collation elements. Don't write over end of array idx. * posix/regex.c: Handle also collation elements at end of range. * posix/PTESTS: Fix a few typos.
This commit is contained in:
parent
1c5d461740
commit
ac8295d23b
@ -1,5 +1,12 @@
|
||||
1999-12-31 Ulrich Drepper <drepper@cygnus.com>
|
||||
|
||||
* locale/programs/ld-collate.c (collate_output): Emit correct
|
||||
information for collation elements.
|
||||
Don't write over end of array idx.
|
||||
* posix/regex.c: Handle also collation elements at end of range.
|
||||
|
||||
* posix/PTESTS: Fix a few typos.
|
||||
|
||||
* posix/bits/posix2_lim.h: Remove _POSIX2_EQUIV_CLASS_MAX. I have
|
||||
no idea where this came from.
|
||||
* sysdeps/posix/sysconf.c: Remove _POSIX2_EQUIV_CLASS_MAX
|
||||
|
@ -91,8 +91,6 @@ struct element_t
|
||||
unsigned int used_in_level;
|
||||
|
||||
struct element_list_t *weights;
|
||||
/* Index in the `weight' table in the output file for the character. */
|
||||
int32_t weights_idx;
|
||||
|
||||
/* Nonzero if this is a real character definition. */
|
||||
int is_character;
|
||||
@ -301,7 +299,6 @@ new_element (struct locale_collate_t *collate, const char *mbs, size_t mbslen,
|
||||
|
||||
/* Will be allocated later. */
|
||||
newp->weights = NULL;
|
||||
newp->weights_idx = 0;
|
||||
|
||||
newp->file = NULL;
|
||||
newp->line = 0;
|
||||
@ -1809,9 +1806,6 @@ output_weight (struct obstack *pool, struct locale_collate_t *collate,
|
||||
obstack_grow (pool, buf, len);
|
||||
}
|
||||
|
||||
/* Remember the index. */
|
||||
elem->weights_idx = retval;
|
||||
|
||||
return retval | ((elem->section->ruleidx & 0x7f) << 24);
|
||||
}
|
||||
|
||||
@ -1899,11 +1893,26 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap,
|
||||
/* If we have no LC_COLLATE data emit only the number of rules as zero. */
|
||||
if (collate == NULL)
|
||||
{
|
||||
int32_t dummy = 0;
|
||||
|
||||
while (cnt < _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE))
|
||||
{
|
||||
iov[2 + cnt].iov_base = (char *) "";
|
||||
iov[2 + cnt].iov_len = 0;
|
||||
idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
|
||||
/* The words have to be handled specially. */
|
||||
if (cnt == _NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE)
|
||||
|| cnt == _NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS)
|
||||
|| cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB))
|
||||
{
|
||||
iov[2 + cnt].iov_base = &dummy;
|
||||
iov[2 + cnt].iov_len = sizeof (int32_t);
|
||||
}
|
||||
else
|
||||
{
|
||||
iov[2 + cnt].iov_base = (char *) "";
|
||||
iov[2 + cnt].iov_len = 0;
|
||||
}
|
||||
|
||||
if (cnt + 1 < _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE))
|
||||
idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
|
||||
++cnt;
|
||||
}
|
||||
|
||||
@ -2453,23 +2462,20 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap,
|
||||
elem_table[idx * 2] = hash;
|
||||
elem_table[idx * 2 + 1] = obstack_object_size (&extrapool);
|
||||
|
||||
/* Now add the index into the weights table. We know the
|
||||
address is always 32bit aligned. */
|
||||
if (sizeof (int) == sizeof (int32_t))
|
||||
obstack_int_grow (&extrapool, runp->weights_idx);
|
||||
else
|
||||
obstack_grow (&extrapool, &runp->weights_idx,
|
||||
sizeof (int32_t));
|
||||
|
||||
/* The the string itself including length. */
|
||||
obstack_1grow (&extrapool, namelen);
|
||||
obstack_grow (&extrapool, runp->name, namelen);
|
||||
|
||||
/* And the multibyte representation. */
|
||||
obstack_1grow (&extrapool, runp->nmbs);
|
||||
obstack_grow (&extrapool, runp->mbs, runp->nmbs);
|
||||
|
||||
/* And align again to 32 bits. */
|
||||
if ((1 + namelen) % sizeof (int32_t) != 0)
|
||||
if ((1 + namelen + 1 + runp->nmbs) % sizeof (int32_t) != 0)
|
||||
obstack_grow (&extrapool, "\0\0",
|
||||
(sizeof (int32_t)
|
||||
- (1 + namelen) % sizeof (int32_t)));
|
||||
- ((1 + namelen + 1 + runp->nmbs)
|
||||
% sizeof (int32_t))));
|
||||
}
|
||||
}
|
||||
|
||||
@ -2492,7 +2498,6 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap,
|
||||
assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_EXTRAMB));
|
||||
iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
|
||||
iov[2 + cnt].iov_base = obstack_finish (&extrapool);
|
||||
idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
|
||||
++cnt;
|
||||
|
||||
|
||||
|
12
posix/PTESTS
12
posix/PTESTS
@ -115,7 +115,7 @@
|
||||
3¦3¦[][.-.]-0]¦ab0-]¦
|
||||
3¦3¦[A-[.].]c]¦ab]!¦
|
||||
# GA122
|
||||
-2¦-2¦[[.ch]]¦abc¦
|
||||
-2¦-2¦[[.ch.]]¦abc¦
|
||||
-2¦-2¦[[.ab.][.CD.][.EF.]]¦yZabCDEFQ9¦
|
||||
# GA125
|
||||
2¦2¦[[=a=]b]¦Abc¦
|
||||
@ -163,12 +163,12 @@
|
||||
2¦6¦bc[d-w]xy¦abchxyz¦
|
||||
# GA129
|
||||
1¦1¦[a-cd-f]¦dbccde¦
|
||||
-1¦-1¦[a-ce-f¦dBCCdE¦
|
||||
-1¦-1¦[a-ce-f]¦dBCCdE¦
|
||||
2¦4¦b[n-zA-M]Y¦absY9Z¦
|
||||
2¦4¦b[n-zA-M]Y¦abGY9Z¦
|
||||
# GA130
|
||||
3¦3¦[-xy]¦ac-¦
|
||||
2¦4¦[c[-xy]D¦ac-D+¦
|
||||
2¦4¦c[-xy]D¦ac-D+¦
|
||||
2¦2¦[--/]¦a.b¦
|
||||
2¦4¦c[--/]D¦ac.D+b¦
|
||||
2¦2¦[^-ac]¦abcde-¦
|
||||
@ -189,7 +189,7 @@
|
||||
3¦4¦[a-c][e-f]¦acbedf¦
|
||||
4¦8¦abc*XYZ¦890abXYZ#*¦
|
||||
4¦9¦abc*XYZ¦890abcXYZ#*¦
|
||||
4¦15¦abc*XYZ¦890abccccccccXYZ#*¦
|
||||
4¦15¦abc*XYZ¦890abcccccccXYZ#*¦
|
||||
-1¦-1¦abc*XYZ¦890abc*XYZ#*¦
|
||||
# GA132
|
||||
2¦4¦\(*bc\)¦a*bc¦
|
||||
@ -267,7 +267,7 @@
|
||||
1¦1¦^a¦abc¦
|
||||
-1¦-1¦^b¦abc¦
|
||||
-1¦-1¦^[a-zA-Z]¦99Nine¦
|
||||
1¦4¦^[a-zA-Z]¦Nine99¦
|
||||
1¦4¦^[a-zA-Z]*¦Nine99¦
|
||||
# GA145(1)
|
||||
1¦2¦\(^a\)\1¦aabc¦
|
||||
-1¦-1¦\(^a\)\1¦^a^abc¦
|
||||
@ -284,7 +284,7 @@
|
||||
3¦3¦a$¦cba¦
|
||||
-1¦-1¦a$¦abc¦
|
||||
5¦7¦[a-z]*$¦99ZZxyz¦
|
||||
-1¦-1¦[a-z]*$¦99ZZxyz99¦
|
||||
9¦9¦[a-z]*$¦99ZZxyz99¦
|
||||
3¦3¦$$¦ab$¦
|
||||
-1¦-1¦$$¦$ab¦
|
||||
3¦3¦\$$¦ab$¦
|
||||
|
@ -110,7 +110,7 @@
|
||||
{ 3, 3, "[][.-.]-0]", "ab0-]", },
|
||||
{ 3, 3, "[A-[.].]c]", "ab]!", },
|
||||
{ 0, 0, "GA122", NULL, },
|
||||
{ -2, -2, "[[.ch]]", "abc", },
|
||||
{ -2, -2, "[[.ch.]]", "abc", },
|
||||
{ -2, -2, "[[.ab.][.CD.][.EF.]]", "yZabCDEFQ9", },
|
||||
{ 0, 0, "GA125", NULL, },
|
||||
{ 2, 2, "[[=a=]b]", "Abc", },
|
||||
@ -158,12 +158,12 @@
|
||||
{ 2, 6, "bc[d-w]xy", "abchxyz", },
|
||||
{ 0, 0, "GA129", NULL, },
|
||||
{ 1, 1, "[a-cd-f]", "dbccde", },
|
||||
{ -1, -1, "[a-ce-f", "dBCCdE", },
|
||||
{ -1, -1, "[a-ce-f]", "dBCCdE", },
|
||||
{ 2, 4, "b[n-zA-M]Y", "absY9Z", },
|
||||
{ 2, 4, "b[n-zA-M]Y", "abGY9Z", },
|
||||
{ 0, 0, "GA130", NULL, },
|
||||
{ 3, 3, "[-xy]", "ac-", },
|
||||
{ 2, 4, "[c[-xy]D", "ac-D+", },
|
||||
{ 2, 4, "c[-xy]D", "ac-D+", },
|
||||
{ 2, 2, "[--/]", "a.b", },
|
||||
{ 2, 4, "c[--/]D", "ac.D+b", },
|
||||
{ 2, 2, "[^-ac]", "abcde-", },
|
||||
@ -184,7 +184,7 @@
|
||||
{ 3, 4, "[a-c][e-f]", "acbedf", },
|
||||
{ 4, 8, "abc*XYZ", "890abXYZ#*", },
|
||||
{ 4, 9, "abc*XYZ", "890abcXYZ#*", },
|
||||
{ 4, 15, "abc*XYZ", "890abccccccccXYZ#*", },
|
||||
{ 4, 15, "abc*XYZ", "890abcccccccXYZ#*", },
|
||||
{ -1, -1, "abc*XYZ", "890abc*XYZ#*", },
|
||||
{ 0, 0, "GA132", NULL, },
|
||||
{ 2, 4, "\\(*bc\\)", "a*bc", },
|
||||
@ -262,7 +262,7 @@
|
||||
{ 1, 1, "^a", "abc", },
|
||||
{ -1, -1, "^b", "abc", },
|
||||
{ -1, -1, "^[a-zA-Z]", "99Nine", },
|
||||
{ 1, 4, "^[a-zA-Z]", "Nine99", },
|
||||
{ 1, 4, "^[a-zA-Z]*", "Nine99", },
|
||||
{ 0, 0, "GA145(1)", NULL, },
|
||||
{ 1, 2, "\\(^a\\)\\1", "aabc", },
|
||||
{ -1, -1, "\\(^a\\)\\1", "^a^abc", },
|
||||
@ -274,7 +274,7 @@
|
||||
{ 3, 3, "a$", "cba", },
|
||||
{ -1, -1, "a$", "abc", },
|
||||
{ 5, 7, "[a-z]*$", "99ZZxyz", },
|
||||
{ -1, -1, "[a-z]*$", "99ZZxyz99", },
|
||||
{ 9, 9, "[a-z]*$", "99ZZxyz99", },
|
||||
{ 3, 3, "$$", "ab$", },
|
||||
{ -1, -1, "$$", "$ab", },
|
||||
{ 3, 3, "\\$$", "ab$", },
|
||||
|
@ -1570,7 +1570,8 @@ static boolean at_begline_loc_p _RE_ARGS ((const char *pattern, const char *p,
|
||||
reg_syntax_t syntax));
|
||||
static boolean at_endline_loc_p _RE_ARGS ((const char *p, const char *pend,
|
||||
reg_syntax_t syntax));
|
||||
static reg_errcode_t compile_range _RE_ARGS ((const char **p_ptr,
|
||||
static reg_errcode_t compile_range _RE_ARGS ((unsigned int range_start,
|
||||
const char **p_ptr,
|
||||
const char *pend,
|
||||
char *translate,
|
||||
reg_syntax_t syntax,
|
||||
@ -2174,6 +2175,7 @@ regex_compile (pattern, size, syntax, bufp)
|
||||
case '[':
|
||||
{
|
||||
boolean had_char_class = false;
|
||||
unsigned int range_start = 0xffffffff;
|
||||
|
||||
if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
|
||||
|
||||
@ -2217,6 +2219,7 @@ regex_compile (pattern, size, syntax, bufp)
|
||||
|
||||
PATFETCH (c1);
|
||||
SET_LIST_BIT (c1);
|
||||
range_start = c1;
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -2241,8 +2244,10 @@ regex_compile (pattern, size, syntax, bufp)
|
||||
&& *p != ']')
|
||||
{
|
||||
reg_errcode_t ret
|
||||
= compile_range (&p, pend, translate, syntax, b);
|
||||
= compile_range (range_start, &p, pend, translate,
|
||||
syntax, b);
|
||||
if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
|
||||
range_start = 0xffffffff;
|
||||
}
|
||||
|
||||
else if (p[0] == '-' && p[1] != ']')
|
||||
@ -2252,8 +2257,9 @@ regex_compile (pattern, size, syntax, bufp)
|
||||
/* Move past the `-'. */
|
||||
PATFETCH (c1);
|
||||
|
||||
ret = compile_range (&p, pend, translate, syntax, b);
|
||||
ret = compile_range (c, &p, pend, translate, syntax, b);
|
||||
if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
|
||||
range_start = 0xffffffff;
|
||||
}
|
||||
|
||||
/* See if we're at the beginning of a possible character
|
||||
@ -2376,6 +2382,7 @@ regex_compile (pattern, size, syntax, bufp)
|
||||
PATUNFETCH;
|
||||
SET_LIST_BIT ('[');
|
||||
SET_LIST_BIT (':');
|
||||
range_start = ':';
|
||||
had_char_class = false;
|
||||
}
|
||||
}
|
||||
@ -2503,6 +2510,16 @@ regex_compile (pattern, size, syntax, bufp)
|
||||
#endif
|
||||
had_char_class = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
c1++;
|
||||
while (c1--)
|
||||
PATUNFETCH;
|
||||
SET_LIST_BIT ('[');
|
||||
SET_LIST_BIT ('=');
|
||||
range_start = '=';
|
||||
had_char_class = false;
|
||||
}
|
||||
}
|
||||
else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '.')
|
||||
{
|
||||
@ -2553,6 +2570,7 @@ regex_compile (pattern, size, syntax, bufp)
|
||||
|
||||
/* Set the bit for the character. */
|
||||
SET_LIST_BIT (str[0]);
|
||||
range_start = ((const unsigned char *) str)[0];
|
||||
}
|
||||
#ifdef _LIBC
|
||||
else
|
||||
@ -2561,9 +2579,7 @@ regex_compile (pattern, size, syntax, bufp)
|
||||
those known to the collate implementation.
|
||||
First find out whether the bytes in `str' are
|
||||
actually from exactly one character. */
|
||||
const unsigned char *weights;
|
||||
int32_t table_size;
|
||||
const int32_t *table;
|
||||
const int32_t *symb_table;
|
||||
const unsigned char *extra;
|
||||
int32_t idx;
|
||||
@ -2574,10 +2590,6 @@ regex_compile (pattern, size, syntax, bufp)
|
||||
int32_t hash;
|
||||
int ch;
|
||||
|
||||
table = (const int32_t *)
|
||||
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
|
||||
weights = (const unsigned char *)
|
||||
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
|
||||
table_size =
|
||||
_NL_CURRENT_WORD (LC_COLLATE,
|
||||
_NL_COLLATE_SYMB_HASH_SIZEMB);
|
||||
@ -2598,17 +2610,15 @@ regex_compile (pattern, size, syntax, bufp)
|
||||
{
|
||||
/* First compare the hashing value. */
|
||||
if (symb_table[2 * elem] == hash
|
||||
&& (c1 == extra[symb_table[2 * elem + 1]
|
||||
+ sizeof (int32_t)])
|
||||
&& c1 == extra[symb_table[2 * elem + 1]]
|
||||
&& memcmp (str,
|
||||
&extra[symb_table[2 * elem + 1]
|
||||
+ sizeof (int32_t) + 1],
|
||||
+ 1],
|
||||
c1) == 0)
|
||||
{
|
||||
/* Yep, this is the entry. */
|
||||
idx = *((int32_t *)
|
||||
(extra
|
||||
+ symb_table[2 * elem + 1]));
|
||||
idx = symb_table[2 * elem + 1];
|
||||
idx += 1 + extra[idx];
|
||||
break;
|
||||
}
|
||||
|
||||
@ -2624,40 +2634,21 @@ regex_compile (pattern, size, syntax, bufp)
|
||||
class. */
|
||||
PATFETCH (c);
|
||||
|
||||
/* Now we have to go throught the whole table
|
||||
and find all characters which have the same
|
||||
first level weight.
|
||||
/* Now add the multibyte character(s) we found
|
||||
to the acceptabed list.
|
||||
|
||||
XXX Note that this is not entirely correct.
|
||||
we would have to match multibyte sequences
|
||||
but this is not possible with the current
|
||||
implementation. */
|
||||
for (ch = 1; ch < 256; ++ch)
|
||||
/* XXX This test would have to be changed if we
|
||||
would allow matching multibyte sequences. */
|
||||
if (table[ch] > 0)
|
||||
{
|
||||
int32_t idx2 = table[ch];
|
||||
size_t len = weights[idx2];
|
||||
|
||||
/* Test whether the lenghts match. */
|
||||
if (weights[idx] == len)
|
||||
{
|
||||
/* They do. New compare the bytes of
|
||||
the weight. */
|
||||
size_t cnt = 0;
|
||||
|
||||
while (cnt < len
|
||||
&& (weights[idx + 1 + cnt]
|
||||
== weights[idx2 + 1 + cnt]))
|
||||
++len;
|
||||
|
||||
if (cnt == len)
|
||||
/* They match. Mark the character as
|
||||
acceptable. */
|
||||
SET_LIST_BIT (ch);
|
||||
}
|
||||
}
|
||||
implementation. Also, we have to match
|
||||
collating symbols, which expand to more than
|
||||
one file, as a whole and not allow the
|
||||
individual bytes. */
|
||||
c1 = extra[idx++];
|
||||
if (c1 == 1)
|
||||
range_start = extra[idx];
|
||||
while (c1-- > 0)
|
||||
SET_LIST_BIT (extra[idx++]);
|
||||
}
|
||||
#endif
|
||||
had_char_class = false;
|
||||
@ -2668,7 +2659,8 @@ regex_compile (pattern, size, syntax, bufp)
|
||||
while (c1--)
|
||||
PATUNFETCH;
|
||||
SET_LIST_BIT ('[');
|
||||
SET_LIST_BIT ('=');
|
||||
SET_LIST_BIT ('.');
|
||||
range_start = '.';
|
||||
had_char_class = false;
|
||||
}
|
||||
}
|
||||
@ -2676,6 +2668,7 @@ regex_compile (pattern, size, syntax, bufp)
|
||||
{
|
||||
had_char_class = false;
|
||||
SET_LIST_BIT (c);
|
||||
range_start = c;
|
||||
}
|
||||
}
|
||||
|
||||
@ -3425,7 +3418,8 @@ group_in_compile_stack (compile_stack, regnum)
|
||||
`regex_compile' itself. */
|
||||
|
||||
static reg_errcode_t
|
||||
compile_range (p_ptr, pend, translate, syntax, b)
|
||||
compile_range (range_start, p_ptr, pend, translate, syntax, b)
|
||||
unsigned int range_start;
|
||||
const char **p_ptr, *pend;
|
||||
RE_TRANSLATE_TYPE translate;
|
||||
reg_syntax_t syntax;
|
||||
@ -3434,7 +3428,7 @@ compile_range (p_ptr, pend, translate, syntax, b)
|
||||
unsigned this_char;
|
||||
|
||||
const char *p = *p_ptr;
|
||||
unsigned int range_start, range_end;
|
||||
unsigned int range_end;
|
||||
|
||||
if (p == pend)
|
||||
return REG_ERANGE;
|
||||
@ -3447,7 +3441,6 @@ compile_range (p_ptr, pend, translate, syntax, b)
|
||||
We also want to fetch the endpoints without translating them; the
|
||||
appropriate translation is done in the bit-setting loop below. */
|
||||
/* The SVR4 compiler on the 3B2 had trouble with unsigned const char *. */
|
||||
range_start = ((const unsigned char *) p)[-2];
|
||||
range_end = ((const unsigned char *) p)[0];
|
||||
|
||||
/* Have to increment the pointer into the pattern string, so the
|
||||
|
Loading…
Reference in New Issue
Block a user