2021-01-03 03:32:25 +08:00
|
|
|
/* Copyright (C) 1995-2021 Free Software Foundation, Inc.
|
1997-02-15 12:31:36 +08:00
|
|
|
This file is part of the GNU C Library.
|
|
|
|
|
|
|
|
The GNU C Library is free software; you can redistribute it and/or
|
2001-07-06 12:58:11 +08:00
|
|
|
modify it under the terms of the GNU Lesser General Public
|
|
|
|
License as published by the Free Software Foundation; either
|
|
|
|
version 2.1 of the License, or (at your option) any later version.
|
1997-02-15 12:31:36 +08:00
|
|
|
|
|
|
|
The GNU C Library is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
2001-07-06 12:58:11 +08:00
|
|
|
Lesser General Public License for more details.
|
1997-02-15 12:31:36 +08:00
|
|
|
|
2001-07-06 12:58:11 +08:00
|
|
|
You should have received a copy of the GNU Lesser General Public
|
2012-02-10 07:18:22 +08:00
|
|
|
License along with the GNU C Library; if not, see
|
Prefer https to http for gnu.org and fsf.org URLs
Also, change sources.redhat.com to sourceware.org.
This patch was automatically generated by running the following shell
script, which uses GNU sed, and which avoids modifying files imported
from upstream:
sed -ri '
s,(http|ftp)(://(.*\.)?(gnu|fsf|sourceware)\.org($|[^.]|\.[^a-z])),https\2,g
s,(http|ftp)(://(.*\.)?)sources\.redhat\.com($|[^.]|\.[^a-z]),https\2sourceware.org\4,g
' \
$(find $(git ls-files) -prune -type f \
! -name '*.po' \
! -name 'ChangeLog*' \
! -path COPYING ! -path COPYING.LIB \
! -path manual/fdl-1.3.texi ! -path manual/lgpl-2.1.texi \
! -path manual/texinfo.tex ! -path scripts/config.guess \
! -path scripts/config.sub ! -path scripts/install-sh \
! -path scripts/mkinstalldirs ! -path scripts/move-if-change \
! -path INSTALL ! -path locale/programs/charmap-kw.h \
! -path po/libc.pot ! -path sysdeps/gnu/errlist.c \
! '(' -name configure \
-execdir test -f configure.ac -o -f configure.in ';' ')' \
! '(' -name preconfigure \
-execdir test -f preconfigure.ac ';' ')' \
-print)
and then by running 'make dist-prepare' to regenerate files built
from the altered files, and then executing the following to cleanup:
chmod a+x sysdeps/unix/sysv/linux/riscv/configure
# Omit irrelevant whitespace and comment-only changes,
# perhaps from a slightly-different Autoconf version.
git checkout -f \
sysdeps/csky/configure \
sysdeps/hppa/configure \
sysdeps/riscv/configure \
sysdeps/unix/sysv/linux/csky/configure
# Omit changes that caused a pre-commit check to fail like this:
# remote: *** error: sysdeps/powerpc/powerpc64/ppc-mcount.S: trailing lines
git checkout -f \
sysdeps/powerpc/powerpc64/ppc-mcount.S \
sysdeps/unix/sysv/linux/s390/s390-64/syscall.S
# Omit change that caused a pre-commit check to fail like this:
# remote: *** error: sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S: last line does not end in newline
git checkout -f sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S
2019-09-07 13:40:42 +08:00
|
|
|
<https://www.gnu.org/licenses/>. */
|
1997-02-15 12:31:36 +08:00
|
|
|
|
2004-03-15 05:12:06 +08:00
|
|
|
#include <assert.h>
|
|
|
|
#include <langinfo.h>
|
|
|
|
#include <locale.h>
|
|
|
|
#include <stddef.h>
|
|
|
|
#include <stdint.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <sys/param.h>
|
2002-08-06 16:40:20 +08:00
|
|
|
|
2004-03-15 05:12:06 +08:00
|
|
|
#ifndef STRING_TYPE
|
|
|
|
# define STRING_TYPE char
|
|
|
|
# define USTRING_TYPE unsigned char
|
|
|
|
# define STRXFRM __strxfrm_l
|
|
|
|
# define STRLEN strlen
|
|
|
|
# define STPNCPY __stpncpy
|
|
|
|
# define WEIGHT_H "../locale/weight.h"
|
|
|
|
# define SUFFIX MB
|
|
|
|
# define L(arg) arg
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#define CONCAT(a,b) CONCAT1(a,b)
|
|
|
|
#define CONCAT1(a,b) a##b
|
|
|
|
|
2015-01-13 14:03:56 +08:00
|
|
|
/* Maximum string size that is calculated with cached indices. Right now this
|
|
|
|
is an arbitrary value open to optimizations. SMALL_STR_SIZE * 4 has to be
|
|
|
|
lower than __MAX_ALLOCA_CUTOFF. Keep localedata/xfrm-test.c in sync. */
|
|
|
|
#define SMALL_STR_SIZE 4095
|
|
|
|
|
2004-03-15 05:12:06 +08:00
|
|
|
#include "../locale/localeinfo.h"
|
2014-09-12 07:02:17 +08:00
|
|
|
#include WEIGHT_H
|
2004-03-15 05:12:06 +08:00
|
|
|
|
2015-01-13 14:03:56 +08:00
|
|
|
/* Group locale data for shorter parameter lists. */
|
|
|
|
typedef struct
|
|
|
|
{
|
|
|
|
uint_fast32_t nrules;
|
|
|
|
unsigned char *rulesets;
|
|
|
|
USTRING_TYPE *weights;
|
|
|
|
int32_t *table;
|
|
|
|
USTRING_TYPE *extra;
|
|
|
|
int32_t *indirect;
|
|
|
|
} locale_data_t;
|
2004-03-15 05:12:06 +08:00
|
|
|
|
|
|
|
#ifndef WIDE_CHAR_VERSION
|
|
|
|
|
|
|
|
/* We need UTF-8 encoding of numbers. */
|
|
|
|
static int
|
|
|
|
utf8_encode (char *buf, int val)
|
|
|
|
{
|
|
|
|
int retval;
|
|
|
|
|
|
|
|
if (val < 0x80)
|
|
|
|
{
|
|
|
|
*buf++ = (char) val;
|
|
|
|
retval = 1;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
int step;
|
|
|
|
|
|
|
|
for (step = 2; step < 6; ++step)
|
|
|
|
if ((val & (~(uint32_t)0 << (5 * step + 1))) == 0)
|
|
|
|
break;
|
|
|
|
retval = step;
|
|
|
|
|
|
|
|
*buf = (unsigned char) (~0xff >> step);
|
|
|
|
--step;
|
|
|
|
do
|
|
|
|
{
|
|
|
|
buf[step] = 0x80 | (val & 0x3f);
|
|
|
|
val >>= 6;
|
|
|
|
}
|
|
|
|
while (--step > 0);
|
|
|
|
*buf |= val;
|
|
|
|
}
|
|
|
|
|
|
|
|
return retval;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2015-01-13 14:03:56 +08:00
|
|
|
/* Find next weight and rule index. Inlined since called for every char. */
|
|
|
|
static __always_inline size_t
|
|
|
|
find_idx (const USTRING_TYPE **us, int32_t *weight_idx,
|
|
|
|
unsigned char *rule_idx, const locale_data_t *l_data, const int pass)
|
|
|
|
{
|
|
|
|
int32_t tmp = findidx (l_data->table, l_data->indirect, l_data->extra, us,
|
|
|
|
-1);
|
|
|
|
*rule_idx = tmp >> 24;
|
|
|
|
int32_t idx = tmp & 0xffffff;
|
|
|
|
size_t len = l_data->weights[idx++];
|
|
|
|
|
|
|
|
/* Skip over indices of previous levels. */
|
|
|
|
for (int i = 0; i < pass; i++)
|
|
|
|
{
|
|
|
|
idx += len;
|
|
|
|
len = l_data->weights[idx++];
|
|
|
|
}
|
2004-03-15 05:12:06 +08:00
|
|
|
|
2015-01-13 14:03:56 +08:00
|
|
|
*weight_idx = idx;
|
|
|
|
return len;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
find_position (const USTRING_TYPE *us, const locale_data_t *l_data,
|
|
|
|
const int pass)
|
2004-03-15 05:12:06 +08:00
|
|
|
{
|
2015-01-13 14:03:56 +08:00
|
|
|
int32_t weight_idx;
|
|
|
|
unsigned char rule_idx;
|
|
|
|
const USTRING_TYPE *usrc = us;
|
|
|
|
|
|
|
|
find_idx (&usrc, &weight_idx, &rule_idx, l_data, pass);
|
|
|
|
return l_data->rulesets[rule_idx * l_data->nrules + pass] & sort_position;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Do the transformation. */
|
|
|
|
static size_t
|
|
|
|
do_xfrm (const USTRING_TYPE *usrc, STRING_TYPE *dest, size_t n,
|
|
|
|
const locale_data_t *l_data)
|
|
|
|
{
|
|
|
|
int32_t weight_idx;
|
|
|
|
unsigned char rule_idx;
|
2004-03-15 05:12:06 +08:00
|
|
|
uint_fast32_t pass;
|
2015-01-13 14:03:56 +08:00
|
|
|
size_t needed = 0;
|
2006-11-10 04:20:23 +08:00
|
|
|
size_t last_needed;
|
2004-03-15 05:12:06 +08:00
|
|
|
|
2015-01-13 14:03:56 +08:00
|
|
|
/* Now the passes over the weights. */
|
|
|
|
for (pass = 0; pass < l_data->nrules; ++pass)
|
2004-03-15 05:12:06 +08:00
|
|
|
{
|
2015-01-13 14:03:56 +08:00
|
|
|
size_t backw_len = 0;
|
|
|
|
last_needed = needed;
|
|
|
|
const USTRING_TYPE *cur = usrc;
|
|
|
|
const USTRING_TYPE *backw_start = NULL;
|
2004-03-15 05:12:06 +08:00
|
|
|
|
2015-01-13 14:03:56 +08:00
|
|
|
/* We assume that if a rule has defined `position' in one section
|
|
|
|
this is true for all of them. */
|
|
|
|
int position = find_position (cur, l_data, pass);
|
2004-03-15 05:12:06 +08:00
|
|
|
|
2015-01-13 14:03:56 +08:00
|
|
|
if (position == 0)
|
|
|
|
{
|
|
|
|
while (*cur != L('\0'))
|
|
|
|
{
|
|
|
|
const USTRING_TYPE *pos = cur;
|
|
|
|
size_t len = find_idx (&cur, &weight_idx, &rule_idx, l_data,
|
|
|
|
pass);
|
|
|
|
int rule = l_data->rulesets[rule_idx * l_data->nrules + pass];
|
2004-03-15 05:12:06 +08:00
|
|
|
|
2015-01-13 14:03:56 +08:00
|
|
|
if ((rule & sort_forward) != 0)
|
|
|
|
{
|
|
|
|
/* Handle the pushed backward sequence. */
|
|
|
|
if (backw_start != NULL)
|
|
|
|
{
|
|
|
|
for (size_t i = backw_len; i > 0; )
|
|
|
|
{
|
|
|
|
int32_t weight_idx;
|
|
|
|
unsigned char rule_idx;
|
|
|
|
size_t len = find_idx (&backw_start, &weight_idx,
|
|
|
|
&rule_idx, l_data, pass);
|
|
|
|
if (needed + i < n)
|
|
|
|
for (size_t j = len; j > 0; j--)
|
|
|
|
dest[needed + i - j] =
|
|
|
|
l_data->weights[weight_idx++];
|
|
|
|
|
|
|
|
i -= len;
|
|
|
|
}
|
2004-03-15 05:12:06 +08:00
|
|
|
|
2015-01-13 14:03:56 +08:00
|
|
|
needed += backw_len;
|
|
|
|
backw_start = NULL;
|
|
|
|
backw_len = 0;
|
|
|
|
}
|
2004-03-15 05:12:06 +08:00
|
|
|
|
2015-01-13 14:03:56 +08:00
|
|
|
/* Now handle the forward element. */
|
|
|
|
if (needed + len < n)
|
|
|
|
while (len-- > 0)
|
|
|
|
dest[needed++] = l_data->weights[weight_idx++];
|
|
|
|
else
|
|
|
|
/* No more characters fit into the buffer. */
|
|
|
|
needed += len;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* Remember start of the backward sequence & track length. */
|
|
|
|
if (backw_start == NULL)
|
|
|
|
backw_start = pos;
|
|
|
|
backw_len += len;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Handle the pushed backward sequence. */
|
|
|
|
if (backw_start != NULL)
|
|
|
|
{
|
|
|
|
for (size_t i = backw_len; i > 0; )
|
|
|
|
{
|
|
|
|
size_t len = find_idx (&backw_start, &weight_idx, &rule_idx,
|
|
|
|
l_data, pass);
|
|
|
|
if (needed + i < n)
|
|
|
|
for (size_t j = len; j > 0; j--)
|
|
|
|
dest[needed + i - j] =
|
|
|
|
l_data->weights[weight_idx++];
|
|
|
|
|
|
|
|
i -= len;
|
|
|
|
}
|
|
|
|
|
|
|
|
needed += backw_len;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
int val = 1;
|
|
|
|
#ifndef WIDE_CHAR_VERSION
|
|
|
|
char buf[7];
|
|
|
|
size_t buflen;
|
|
|
|
#endif
|
|
|
|
size_t i;
|
|
|
|
|
|
|
|
while (*cur != L('\0'))
|
|
|
|
{
|
|
|
|
const USTRING_TYPE *pos = cur;
|
|
|
|
size_t len = find_idx (&cur, &weight_idx, &rule_idx, l_data,
|
|
|
|
pass);
|
|
|
|
int rule = l_data->rulesets[rule_idx * l_data->nrules + pass];
|
|
|
|
|
|
|
|
if ((rule & sort_forward) != 0)
|
|
|
|
{
|
|
|
|
/* Handle the pushed backward sequence. */
|
|
|
|
if (backw_start != NULL)
|
|
|
|
{
|
|
|
|
for (size_t p = backw_len; p > 0; p--)
|
|
|
|
{
|
|
|
|
size_t len;
|
|
|
|
int32_t weight_idx;
|
|
|
|
unsigned char rule_idx;
|
|
|
|
const USTRING_TYPE *backw_cur = backw_start;
|
|
|
|
|
|
|
|
/* To prevent a warning init the used vars. */
|
|
|
|
len = find_idx (&backw_cur, &weight_idx,
|
|
|
|
&rule_idx, l_data, pass);
|
|
|
|
|
|
|
|
for (i = 1; i < p; i++)
|
|
|
|
len = find_idx (&backw_cur, &weight_idx,
|
|
|
|
&rule_idx, l_data, pass);
|
|
|
|
|
|
|
|
if (len != 0)
|
|
|
|
{
|
|
|
|
#ifdef WIDE_CHAR_VERSION
|
|
|
|
if (needed + 1 + len < n)
|
|
|
|
{
|
|
|
|
dest[needed] = val;
|
|
|
|
for (i = 0; i < len; ++i)
|
|
|
|
dest[needed + 1 + i] =
|
|
|
|
l_data->weights[weight_idx + i];
|
|
|
|
}
|
|
|
|
needed += 1 + len;
|
|
|
|
#else
|
|
|
|
buflen = utf8_encode (buf, val);
|
|
|
|
if (needed + buflen + len < n)
|
|
|
|
{
|
|
|
|
for (i = 0; i < buflen; ++i)
|
|
|
|
dest[needed + i] = buf[i];
|
|
|
|
for (i = 0; i < len; ++i)
|
|
|
|
dest[needed + buflen + i] =
|
|
|
|
l_data->weights[weight_idx + i];
|
|
|
|
}
|
|
|
|
needed += buflen + len;
|
|
|
|
#endif
|
|
|
|
val = 1;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
++val;
|
|
|
|
}
|
|
|
|
|
|
|
|
backw_start = NULL;
|
|
|
|
backw_len = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Now handle the forward element. */
|
|
|
|
if (len != 0)
|
|
|
|
{
|
|
|
|
#ifdef WIDE_CHAR_VERSION
|
|
|
|
if (needed + 1 + len < n)
|
|
|
|
{
|
|
|
|
dest[needed] = val;
|
|
|
|
for (i = 0; i < len; ++i)
|
|
|
|
dest[needed + 1 + i] =
|
|
|
|
l_data->weights[weight_idx + i];
|
|
|
|
}
|
|
|
|
needed += 1 + len;
|
|
|
|
#else
|
|
|
|
buflen = utf8_encode (buf, val);
|
|
|
|
if (needed + buflen + len < n)
|
|
|
|
{
|
|
|
|
for (i = 0; i < buflen; ++i)
|
|
|
|
dest[needed + i] = buf[i];
|
|
|
|
for (i = 0; i < len; ++i)
|
|
|
|
dest[needed + buflen + i] =
|
|
|
|
l_data->weights[weight_idx + i];
|
|
|
|
}
|
|
|
|
needed += buflen + len;
|
|
|
|
#endif
|
|
|
|
val = 1;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
++val;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* Remember start of the backward sequence & track length. */
|
|
|
|
if (backw_start == NULL)
|
|
|
|
backw_start = pos;
|
|
|
|
backw_len++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Handle the pushed backward sequence. */
|
|
|
|
if (backw_start != NULL)
|
|
|
|
{
|
|
|
|
for (size_t p = backw_len; p > 0; p--)
|
|
|
|
{
|
|
|
|
size_t len;
|
|
|
|
int32_t weight_idx;
|
|
|
|
unsigned char rule_idx;
|
|
|
|
const USTRING_TYPE *backw_cur = backw_start;
|
|
|
|
|
|
|
|
/* To prevent a warning init the used vars. */
|
|
|
|
len = find_idx (&backw_cur, &weight_idx,
|
|
|
|
&rule_idx, l_data, pass);
|
|
|
|
|
|
|
|
for (i = 1; i < p; i++)
|
|
|
|
len = find_idx (&backw_cur, &weight_idx,
|
|
|
|
&rule_idx, l_data, pass);
|
|
|
|
|
|
|
|
if (len != 0)
|
|
|
|
{
|
|
|
|
#ifdef WIDE_CHAR_VERSION
|
|
|
|
if (needed + 1 + len < n)
|
|
|
|
{
|
|
|
|
dest[needed] = val;
|
|
|
|
for (i = 0; i < len; ++i)
|
|
|
|
dest[needed + 1 + i] =
|
|
|
|
l_data->weights[weight_idx + i];
|
|
|
|
}
|
|
|
|
needed += 1 + len;
|
|
|
|
#else
|
|
|
|
buflen = utf8_encode (buf, val);
|
|
|
|
if (needed + buflen + len < n)
|
|
|
|
{
|
|
|
|
for (i = 0; i < buflen; ++i)
|
|
|
|
dest[needed + i] = buf[i];
|
|
|
|
for (i = 0; i < len; ++i)
|
|
|
|
dest[needed + buflen + i] =
|
|
|
|
l_data->weights[weight_idx + i];
|
|
|
|
}
|
|
|
|
needed += buflen + len;
|
|
|
|
#endif
|
|
|
|
val = 1;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
++val;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Finally store the byte to separate the passes or terminate
|
|
|
|
the string. */
|
|
|
|
if (needed < n)
|
|
|
|
dest[needed] = pass + 1 < l_data->nrules ? L('\1') : L('\0');
|
|
|
|
++needed;
|
2004-03-15 05:12:06 +08:00
|
|
|
}
|
|
|
|
|
2015-01-13 14:03:56 +08:00
|
|
|
/* This is a little optimization: many collation specifications have
|
|
|
|
a `position' rule at the end and if no non-ignored character
|
|
|
|
is found the last \1 byte is immediately followed by a \0 byte
|
|
|
|
signalling this. We can avoid the \1 byte(s). */
|
|
|
|
if (needed > 2 && needed == last_needed + 1)
|
2004-03-15 05:12:06 +08:00
|
|
|
{
|
2015-01-13 14:03:56 +08:00
|
|
|
/* Remove the \1 byte. */
|
|
|
|
if (--needed <= n)
|
|
|
|
dest[needed - 1] = L('\0');
|
2004-03-15 05:12:06 +08:00
|
|
|
}
|
|
|
|
|
2015-01-13 14:03:56 +08:00
|
|
|
/* Return the number of bytes/words we need, but don't count the NUL
|
|
|
|
byte/word at the end. */
|
|
|
|
return needed - 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Do the transformation using weight-index and rule cache. */
|
|
|
|
static size_t
|
|
|
|
do_xfrm_cached (STRING_TYPE *dest, size_t n, const locale_data_t *l_data,
|
|
|
|
size_t idxmax, int32_t *idxarr, const unsigned char *rulearr)
|
|
|
|
{
|
|
|
|
uint_fast32_t nrules = l_data->nrules;
|
|
|
|
unsigned char *rulesets = l_data->rulesets;
|
|
|
|
USTRING_TYPE *weights = l_data->weights;
|
|
|
|
uint_fast32_t pass;
|
|
|
|
size_t needed = 0;
|
|
|
|
size_t last_needed;
|
|
|
|
size_t idxcnt;
|
2004-03-15 05:12:06 +08:00
|
|
|
|
2015-01-13 14:03:56 +08:00
|
|
|
/* Now the passes over the weights. */
|
2004-03-15 05:12:06 +08:00
|
|
|
for (pass = 0; pass < nrules; ++pass)
|
|
|
|
{
|
|
|
|
size_t backw_stop = ~0ul;
|
|
|
|
int rule = rulesets[rulearr[0] * nrules + pass];
|
|
|
|
/* We assume that if a rule has defined `position' in one section
|
|
|
|
this is true for all of them. */
|
|
|
|
int position = rule & sort_position;
|
|
|
|
|
2006-11-10 04:20:23 +08:00
|
|
|
last_needed = needed;
|
2004-03-15 05:12:06 +08:00
|
|
|
if (position == 0)
|
|
|
|
{
|
|
|
|
for (idxcnt = 0; idxcnt < idxmax; ++idxcnt)
|
|
|
|
{
|
|
|
|
if ((rule & sort_forward) != 0)
|
|
|
|
{
|
|
|
|
size_t len;
|
|
|
|
|
|
|
|
if (backw_stop != ~0ul)
|
|
|
|
{
|
|
|
|
/* Handle the pushed elements now. */
|
|
|
|
size_t backw;
|
|
|
|
|
2005-10-16 04:51:49 +08:00
|
|
|
for (backw = idxcnt; backw > backw_stop; )
|
2004-03-15 05:12:06 +08:00
|
|
|
{
|
2005-10-16 04:51:49 +08:00
|
|
|
--backw;
|
2004-03-15 05:12:06 +08:00
|
|
|
len = weights[idxarr[backw]++];
|
|
|
|
|
|
|
|
if (needed + len < n)
|
|
|
|
while (len-- > 0)
|
|
|
|
dest[needed++] = weights[idxarr[backw]++];
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* No more characters fit into the buffer. */
|
|
|
|
needed += len;
|
|
|
|
idxarr[backw] += len;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
backw_stop = ~0ul;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Now handle the forward element. */
|
|
|
|
len = weights[idxarr[idxcnt]++];
|
|
|
|
if (needed + len < n)
|
|
|
|
while (len-- > 0)
|
|
|
|
dest[needed++] = weights[idxarr[idxcnt]++];
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* No more characters fit into the buffer. */
|
|
|
|
needed += len;
|
|
|
|
idxarr[idxcnt] += len;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* Remember where the backwards series started. */
|
|
|
|
if (backw_stop == ~0ul)
|
|
|
|
backw_stop = idxcnt;
|
|
|
|
}
|
|
|
|
|
|
|
|
rule = rulesets[rulearr[idxcnt + 1] * nrules + pass];
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (backw_stop != ~0ul)
|
|
|
|
{
|
|
|
|
/* Handle the pushed elements now. */
|
|
|
|
size_t backw;
|
|
|
|
|
|
|
|
backw = idxcnt;
|
|
|
|
while (backw > backw_stop)
|
|
|
|
{
|
|
|
|
size_t len = weights[idxarr[--backw]++];
|
|
|
|
|
|
|
|
if (needed + len < n)
|
|
|
|
while (len-- > 0)
|
|
|
|
dest[needed++] = weights[idxarr[backw]++];
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* No more characters fit into the buffer. */
|
|
|
|
needed += len;
|
|
|
|
idxarr[backw] += len;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
int val = 1;
|
|
|
|
#ifndef WIDE_CHAR_VERSION
|
|
|
|
char buf[7];
|
|
|
|
size_t buflen;
|
|
|
|
#endif
|
|
|
|
size_t i;
|
|
|
|
|
|
|
|
for (idxcnt = 0; idxcnt < idxmax; ++idxcnt)
|
|
|
|
{
|
|
|
|
if ((rule & sort_forward) != 0)
|
|
|
|
{
|
|
|
|
size_t len;
|
|
|
|
|
|
|
|
if (backw_stop != ~0ul)
|
|
|
|
{
|
|
|
|
/* Handle the pushed elements now. */
|
|
|
|
size_t backw;
|
|
|
|
|
2005-10-16 04:51:49 +08:00
|
|
|
for (backw = idxcnt; backw > backw_stop; )
|
2004-03-15 05:12:06 +08:00
|
|
|
{
|
2005-10-16 04:51:49 +08:00
|
|
|
--backw;
|
2004-03-15 05:12:06 +08:00
|
|
|
len = weights[idxarr[backw]++];
|
|
|
|
if (len != 0)
|
|
|
|
{
|
|
|
|
#ifdef WIDE_CHAR_VERSION
|
|
|
|
if (needed + 1 + len < n)
|
|
|
|
{
|
|
|
|
dest[needed] = val;
|
|
|
|
for (i = 0; i < len; ++i)
|
|
|
|
dest[needed + 1 + i] =
|
|
|
|
weights[idxarr[backw] + i];
|
|
|
|
}
|
|
|
|
needed += 1 + len;
|
|
|
|
#else
|
|
|
|
buflen = utf8_encode (buf, val);
|
|
|
|
if (needed + buflen + len < n)
|
|
|
|
{
|
|
|
|
for (i = 0; i < buflen; ++i)
|
|
|
|
dest[needed + i] = buf[i];
|
|
|
|
for (i = 0; i < len; ++i)
|
|
|
|
dest[needed + buflen + i] =
|
|
|
|
weights[idxarr[backw] + i];
|
|
|
|
}
|
|
|
|
needed += buflen + len;
|
|
|
|
#endif
|
|
|
|
idxarr[backw] += len;
|
|
|
|
val = 1;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
++val;
|
|
|
|
}
|
|
|
|
|
|
|
|
backw_stop = ~0ul;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Now handle the forward element. */
|
|
|
|
len = weights[idxarr[idxcnt]++];
|
|
|
|
if (len != 0)
|
|
|
|
{
|
|
|
|
#ifdef WIDE_CHAR_VERSION
|
|
|
|
if (needed + 1+ len < n)
|
|
|
|
{
|
|
|
|
dest[needed] = val;
|
|
|
|
for (i = 0; i < len; ++i)
|
|
|
|
dest[needed + 1 + i] =
|
|
|
|
weights[idxarr[idxcnt] + i];
|
|
|
|
}
|
|
|
|
needed += 1 + len;
|
|
|
|
#else
|
|
|
|
buflen = utf8_encode (buf, val);
|
|
|
|
if (needed + buflen + len < n)
|
|
|
|
{
|
|
|
|
for (i = 0; i < buflen; ++i)
|
|
|
|
dest[needed + i] = buf[i];
|
|
|
|
for (i = 0; i < len; ++i)
|
|
|
|
dest[needed + buflen + i] =
|
|
|
|
weights[idxarr[idxcnt] + i];
|
|
|
|
}
|
|
|
|
needed += buflen + len;
|
|
|
|
#endif
|
|
|
|
idxarr[idxcnt] += len;
|
|
|
|
val = 1;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
/* Note that we don't have to increment `idxarr[idxcnt]'
|
|
|
|
since the length is zero. */
|
|
|
|
++val;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* Remember where the backwards series started. */
|
|
|
|
if (backw_stop == ~0ul)
|
|
|
|
backw_stop = idxcnt;
|
|
|
|
}
|
|
|
|
|
|
|
|
rule = rulesets[rulearr[idxcnt + 1] * nrules + pass];
|
|
|
|
}
|
|
|
|
|
|
|
|
if (backw_stop != ~0ul)
|
|
|
|
{
|
|
|
|
/* Handle the pushed elements now. */
|
|
|
|
size_t backw;
|
|
|
|
|
|
|
|
backw = idxmax - 1;
|
|
|
|
while (backw > backw_stop)
|
|
|
|
{
|
|
|
|
size_t len = weights[idxarr[--backw]++];
|
|
|
|
if (len != 0)
|
|
|
|
{
|
|
|
|
#ifdef WIDE_CHAR_VERSION
|
|
|
|
if (needed + 1 + len < n)
|
|
|
|
{
|
|
|
|
dest[needed] = val;
|
|
|
|
for (i = 0; i < len; ++i)
|
|
|
|
dest[needed + 1 + i] =
|
|
|
|
weights[idxarr[backw] + i];
|
|
|
|
}
|
|
|
|
needed += 1 + len;
|
|
|
|
#else
|
|
|
|
buflen = utf8_encode (buf, val);
|
|
|
|
if (needed + buflen + len < n)
|
|
|
|
{
|
|
|
|
for (i = 0; i < buflen; ++i)
|
|
|
|
dest[needed + i] = buf[i];
|
|
|
|
for (i = 0; i < len; ++i)
|
|
|
|
dest[needed + buflen + i] =
|
|
|
|
weights[idxarr[backw] + i];
|
|
|
|
}
|
|
|
|
needed += buflen + len;
|
|
|
|
#endif
|
|
|
|
idxarr[backw] += len;
|
|
|
|
val = 1;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
++val;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Finally store the byte to separate the passes or terminate
|
|
|
|
the string. */
|
|
|
|
if (needed < n)
|
|
|
|
dest[needed] = pass + 1 < nrules ? L('\1') : L('\0');
|
|
|
|
++needed;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* This is a little optimization: many collation specifications have
|
|
|
|
a `position' rule at the end and if no non-ignored character
|
|
|
|
is found the last \1 byte is immediately followed by a \0 byte
|
|
|
|
signalling this. We can avoid the \1 byte(s). */
|
2006-11-10 04:20:23 +08:00
|
|
|
if (needed > 2 && needed == last_needed + 1)
|
2004-03-15 05:12:06 +08:00
|
|
|
{
|
|
|
|
/* Remove the \1 byte. */
|
2006-11-10 23:20:59 +08:00
|
|
|
if (--needed <= n)
|
2006-11-10 04:20:23 +08:00
|
|
|
dest[needed - 1] = L('\0');
|
2004-03-15 05:12:06 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Return the number of bytes/words we need, but don't count the NUL
|
|
|
|
byte/word at the end. */
|
|
|
|
return needed - 1;
|
|
|
|
}
|
2015-01-13 14:03:56 +08:00
|
|
|
|
|
|
|
size_t
|
Use locale_t, not __locale_t, throughout glibc
<locale.h> is specified to define locale_t in POSIX.1-2008, and so are
all of the headers that define functions that take locale_t arguments.
Under _GNU_SOURCE, the additional headers that define such functions
have also always defined locale_t. Therefore, there is no need to use
__locale_t in public function prototypes, nor in any internal code.
* ctype/ctype-c99_l.c, ctype/ctype.h, ctype/ctype_l.c
* include/monetary.h, include/stdlib.h, include/time.h
* include/wchar.h, locale/duplocale.c, locale/freelocale.c
* locale/global-locale.c, locale/langinfo.h, locale/locale.h
* locale/localeinfo.h, locale/newlocale.c
* locale/nl_langinfo_l.c, locale/uselocale.c
* localedata/bug-usesetlocale.c, localedata/tst-xlocale2.c
* stdio-common/vfscanf.c, stdlib/monetary.h, stdlib/stdlib.h
* stdlib/strfmon_l.c, stdlib/strtod_l.c, stdlib/strtof_l.c
* stdlib/strtol.c, stdlib/strtol_l.c, stdlib/strtold_l.c
* stdlib/strtoll_l.c, stdlib/strtoul_l.c, stdlib/strtoull_l.c
* string/strcasecmp.c, string/strcoll_l.c, string/string.h
* string/strings.h, string/strncase.c, string/strxfrm_l.c
* sysdeps/ieee754/float128/strtof128_l.c
* sysdeps/ieee754/float128/wcstof128.c
* sysdeps/ieee754/float128/wcstof128_l.c
* sysdeps/ieee754/ldbl-128ibm/strtold_l.c
* sysdeps/ieee754/ldbl-64-128/strtold_l.c
* sysdeps/ieee754/ldbl-opt/nldbl-compat.c
* sysdeps/ieee754/ldbl-opt/nldbl-strfmon_l.c
* sysdeps/ieee754/ldbl-opt/nldbl-strtold_l.c
* sysdeps/ieee754/ldbl-opt/nldbl-wcstold_l.c
* sysdeps/powerpc/powerpc32/power7/strcasecmp.S
* sysdeps/powerpc/powerpc64/power7/strcasecmp.S
* sysdeps/x86_64/strcasecmp_l-nonascii.c
* sysdeps/x86_64/strncase_l-nonascii.c, time/strftime_l.c
* time/strptime_l.c, time/time.h, wcsmbs/mbsrtowcs_l.c
* wcsmbs/wchar.h, wcsmbs/wcscasecmp.c, wcsmbs/wcsncase.c
* wcsmbs/wcstod.c, wcsmbs/wcstod_l.c, wcsmbs/wcstof.c
* wcsmbs/wcstof_l.c, wcsmbs/wcstol_l.c, wcsmbs/wcstold.c
* wcsmbs/wcstold_l.c, wcsmbs/wcstoll_l.c, wcsmbs/wcstoul_l.c
* wcsmbs/wcstoull_l.c, wctype/iswctype_l.c
* wctype/towctrans_l.c, wctype/wcfuncs_l.c
* wctype/wctrans_l.c, wctype/wctype.h, wctype/wctype_l.c:
Change all uses of __locale_t to locale_t.
2017-06-20 21:26:43 +08:00
|
|
|
STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n, locale_t l)
|
2015-01-13 14:03:56 +08:00
|
|
|
{
|
|
|
|
locale_data_t l_data;
|
|
|
|
struct __locale_data *current = l->__locales[LC_COLLATE];
|
|
|
|
l_data.nrules = current->values[_NL_ITEM_INDEX (_NL_COLLATE_NRULES)].word;
|
|
|
|
|
|
|
|
/* Handle byte comparison case. */
|
|
|
|
if (l_data.nrules == 0)
|
|
|
|
{
|
|
|
|
size_t srclen = STRLEN (src);
|
|
|
|
|
|
|
|
if (n != 0)
|
|
|
|
STPNCPY (dest, src, MIN (srclen + 1, n));
|
|
|
|
|
|
|
|
return srclen;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Handle an empty string, code hereafter relies on strlen (src) > 0. */
|
|
|
|
if (*src == L('\0'))
|
|
|
|
{
|
|
|
|
if (n != 0)
|
|
|
|
*dest = L('\0');
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Get the locale data. */
|
|
|
|
l_data.rulesets = (unsigned char *)
|
|
|
|
current->values[_NL_ITEM_INDEX (_NL_COLLATE_RULESETS)].string;
|
|
|
|
l_data.table = (int32_t *)
|
|
|
|
current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_TABLE,SUFFIX))].string;
|
|
|
|
l_data.weights = (USTRING_TYPE *)
|
|
|
|
current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_WEIGHT,SUFFIX))].string;
|
|
|
|
l_data.extra = (USTRING_TYPE *)
|
|
|
|
current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_EXTRA,SUFFIX))].string;
|
|
|
|
l_data.indirect = (int32_t *)
|
|
|
|
current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_INDIRECT,SUFFIX))].string;
|
|
|
|
|
|
|
|
assert (((uintptr_t) l_data.table) % __alignof__ (l_data.table[0]) == 0);
|
|
|
|
assert (((uintptr_t) l_data.weights) % __alignof__ (l_data.weights[0]) == 0);
|
|
|
|
assert (((uintptr_t) l_data.extra) % __alignof__ (l_data.extra[0]) == 0);
|
|
|
|
assert (((uintptr_t) l_data.indirect) % __alignof__ (l_data.indirect[0]) == 0);
|
|
|
|
|
|
|
|
/* We need the elements of the string as unsigned values since they
|
2020-12-11 10:00:00 +08:00
|
|
|
are used as indices. */
|
2015-01-13 14:03:56 +08:00
|
|
|
const USTRING_TYPE *usrc = (const USTRING_TYPE *) src;
|
|
|
|
|
|
|
|
/* Allocate cache for small strings on the stack and fill it with weight and
|
|
|
|
rule indices. If the cache size is not sufficient, continue with the
|
|
|
|
uncached xfrm version. */
|
|
|
|
size_t idxmax = 0;
|
|
|
|
const USTRING_TYPE *cur = usrc;
|
|
|
|
int32_t *idxarr = alloca (SMALL_STR_SIZE * sizeof (int32_t));
|
|
|
|
unsigned char *rulearr = alloca (SMALL_STR_SIZE + 1);
|
|
|
|
|
|
|
|
do
|
|
|
|
{
|
|
|
|
int32_t tmp = findidx (l_data.table, l_data.indirect, l_data.extra, &cur,
|
|
|
|
-1);
|
|
|
|
rulearr[idxmax] = tmp >> 24;
|
|
|
|
idxarr[idxmax] = tmp & 0xffffff;
|
|
|
|
|
|
|
|
++idxmax;
|
|
|
|
}
|
|
|
|
while (*cur != L('\0') && idxmax < SMALL_STR_SIZE);
|
|
|
|
|
|
|
|
/* This element is only read, the value never used but to determine
|
|
|
|
another value which then is ignored. */
|
|
|
|
rulearr[idxmax] = '\0';
|
|
|
|
|
|
|
|
/* Do the transformation. */
|
|
|
|
if (*cur == L('\0'))
|
|
|
|
return do_xfrm_cached (dest, n, &l_data, idxmax, idxarr, rulearr);
|
|
|
|
else
|
|
|
|
return do_xfrm (usrc, dest, n, &l_data);
|
|
|
|
}
|
2004-03-15 05:12:06 +08:00
|
|
|
libc_hidden_def (STRXFRM)
|
|
|
|
|
|
|
|
#ifndef WIDE_CHAR_VERSION
|
2002-08-06 16:40:20 +08:00
|
|
|
weak_alias (__strxfrm_l, strxfrm_l)
|
2004-03-15 05:12:06 +08:00
|
|
|
#endif
|