Don't autodetect the locale of numbers and

months, as this conflicts with POSIX.2 and is tricky to boot. (FLOATING_COMMA, NLS_STRNCMP, NLS_MAX_GROUPS, NLS_ONE_CHARACTER_STRING): Remove macros no longer used. (nls_grouping, nls_fraction_found, nls_month_found, nos_monthtab, nls_months_collide, nls_keyhead, us_monthtab): Remove variables no longer used. (struct nls_keyfield): Remove types no longer used. (strncoll_s2_readonly, nls_set_fraction, look_for_fraction, nls_month_is_either_locale, nls_numeric_format): Remove functions no longer used. (monthtab): Now has the role that us_monthtab had, but it's const only if ENABLE_NLS is not defined. (C_DECIMAL_POINT): Renamed from FLOATING_POINT. All uses changed. (MONTHS_PER_YEAR): Renamed from NLS_NUM_MONTHS. All uses changed. (struct_month_cmp): Renamed from nls_sort_month_comp. All uses changed. Use strcmp, not strcoll, since the user doesn't care about collating here. (inittables): Read locale data into monthtab, rather than modifying a separate month table and futzing with indirection. Do not worry about colliding months, since we no longer autodetect month locale. (fraccompare): Don't set no-longer-used variable nls_fraction_found. (getmonth): Use strncmp to compare months, since user doesn't care about collating here. Fix bug where code incorrectly assumed that strlen (monthtab[lo].name) == strlen (monthtab[ix].name). (keycompare, main): Don't autodetect month locale. (compare): Don't use NLS_MEMCP in code that can't be executed if need_locale is false, as NLS_MEMCP is equivalent to memcmp in that case. (sort, insertkey, main): Don't autodetect numeric locale.
2025-01-11 02:23:23 +08:00 · 1999-05-16 16:18:46 +00:00 · 1999-05-16 16:18:46 +00:00 · 108ff5810f
commit 108ff5810f
parent 4b11c1f734
1 changed files with 36 additions and 411 deletions
--- a/src/sort.c
+++ b/src/sort.c
@ -70,58 +70,33 @@ char *xstrdup ();
   status code greater than 1.  */
 #define SORT_FAILURE 2

-#define FLOATING_POINT  '.'
-#define FLOATING_COMMA  ','
+#define C_DECIMAL_POINT '.'
 #define NEGATION_SIGN   '-'
 #define NUMERIC_ZERO    '0'

 #ifdef ENABLE_NLS
 # define NLS_MEMCMP(S1, S2, Len) strncoll (S1, S2, Len)
-# define NLS_STRNCMP(S1, S2, Len) strncoll_s2_readonly (S1, S2, Len)
 #else
 # define NLS_MEMCMP(S1, S2, Len) memcmp (S1, S2, Len)
-# define NLS_STRNCMP(S1, S2, Len) strncmp (S1, S2, Len)
 #endif

 #ifdef ENABLE_NLS

 static char decimal_point;
 static int th_sep; /* if CHAR_MAX + 1, then there is no thousands separator */
-static char *nls_grouping;

 /* This is "C" locale, need another? */
 static int need_locale = 0;

-/* Should we look for decimal point? */
-static int nls_fraction_found = 1;
-
-/* Look for month notations in text? */
-static int nls_month_found = 1;
-
 # define IS_THOUSANDS_SEP(x) ((x) == th_sep)

 #else

-# define decimal_point FLOATING_POINT
+# define decimal_point C_DECIMAL_POINT
 # define IS_THOUSANDS_SEP(x) 0

 #endif

-/* If native language support is requested, make a 1-1 map to the
-   locale character map, otherwise ensure normal behavior.  */
-#ifdef ENABLE_NLS
-
-/* 12 months in a year */
-# define NLS_NUM_MONTHS 12
-
-/* Maximum number of elements, to allocate per allocation unit  */
-# define NLS_MAX_GROUPS 8
-
-/* A string with one character, to enforce char collation */
-# define NLS_ONE_CHARACTER_STRING " "
-
-#endif
-
 /* The kind of blanks for '-b' to skip in various options. */
 enum blanktype { bl_start, bl_end, bl_both };

@ -193,12 +168,21 @@ static int nonprinting[UCHAR_LIM];
 /* Table of non-dictionary characters (not letters, digits, or blanks). */
 static int nondictionary[UCHAR_LIM];

-/* Translation table folding lower case to upper. */
+/* Translation table folding lower case to upper.
+   FIXME: This doesn't work with multibyte character sets.  */
 static char fold_toupper[UCHAR_LIM];

-/* Table mapping 3-letter month names to integers.
+#define MONTHS_PER_YEAR 12
+
+#ifndef ENABLE_NLS
+# define NLS_CONST const
+#else
+# define NLS_CONST /* empty */
+#endif
+
+/* Table mapping month names to integers.
   Alphabetic order allows binary search. */
-static const struct month us_monthtab[] =
+static NLS_CONST struct month monthtab[] =
 {
  {"APR", 4},
  {"AUG", 8},
@ -214,26 +198,6 @@ static const struct month us_monthtab[] =
  {"SEP", 9}
 };

-#ifdef ENABLE_NLS
-
-/* Locale may have a different idea of month names   */
-static struct month nls_monthtab[NLS_NUM_MONTHS];
-static int nls_months_collide[NLS_NUM_MONTHS + 1];
-
-/* Numeric keys, to search for numeric format */
-struct nls_keyfield
-{
-  struct keyfield *key;
-  struct nls_keyfield *next;
-};
-
-static struct nls_keyfield *nls_keyhead = NULL;
-
-#endif
-
-/* Which month table to use in the program, default C */
-static const struct month *monthtab = us_monthtab;
-
 /* During the merge phase, the number of files to merge at once. */
 #define NMERGE 16

@ -502,10 +466,10 @@ zaptemp (const char *name)
 /* Initialize the character class tables. */

 static int
-nls_sort_month_comp (const void *m1, const void *m2)
+struct_month_cmp (const void *m1, const void *m2)
 {
-  return strcoll (((const struct month *) m1)->name,
-		  ((const struct month *) m2)->name);
+  return strcmp (((const struct month *) m1)->name,
+		 ((const struct month *) m2)->name);
 }

 /* Do collation on strings S1 and S2, but for at most L characters.
@ -535,35 +499,6 @@ strncoll (char *s1, char *s2, int len)
  return diff;
 }

-/* Do collation on strings S1 and S2, but for at most L characters.
-   Use the fact, that we KNOW that S2 is the shorter string and has
-   length LEN.  */
-static int
-strncoll_s2_readonly (char *s1, const char *s2, int len)
-{
-  register int diff;
-
-  assert (len == strlen (s2));
-  assert (len <= strlen (s1));
-
-  if (need_locale)
-    {
-      /* Emulate a strncoll function, by forcing strcoll to compare
-	 only the first LEN characters in each string. */
-      register unsigned char n1 = s1[len];
-
-      s1[len] = 0;
-      diff = strcoll (s1, s2);
-      s1[len] = n1;
-    }
-  else
-    {
-      diff = memcmp (s1, s2, len);
-    }
-
-  return diff;
-}
-
 #endif /* NLS */

 static void
@ -586,45 +521,27 @@ inittables (void)
    }

 #if defined ENABLE_NLS && HAVE_NL_LANGINFO
-  /* If We're not in the "C" locale, read in different names for months. */
+  /* If we're not in the "C" locale, read different names for months.  */
  if (need_locale)
    {
-      nls_months_collide[0] = 1;	/* if an error, look again       */
-      for (i = 0; i < NLS_NUM_MONTHS; i++)
+      for (i = 0; i < MONTHS_PER_YEAR; i++)
 	{
 	  char *s;
 	  size_t s_len;
-	  int j;
+	  size_t j;
+	  char *name;

-	  s = (char *) nl_langinfo (ABMON_1 + us_monthtab[i].val - 1);
+	  s = (char *) nl_langinfo (ABMON_1 + i);
 	  s_len = strlen (s);
-	  nls_monthtab[i].name = (char *) xmalloc (s_len + 1);
-	  nls_monthtab[i].val = us_monthtab[i].val;
+	  monthtab[i].name = name = (char *) xmalloc (s_len + 1);
+	  monthtab[i].val = i + 1;

-	  /* Be careful: abreviated month names
-	     may be longer than the usual 3 characters.  */
 	  for (j = 0; j < s_len; j++)
-	    nls_monthtab[i].name[j] = fold_toupper[UCHAR (s[j])];
-	  nls_monthtab[i].name[j] = '\0';
-
-	  nls_months_collide[nls_monthtab[i].val] = 0;
-	  for (j = 0; j < NLS_NUM_MONTHS; ++j)
-	    {
-	      if (STREQ (nls_monthtab[i].name, us_monthtab[i].name))
-		{
-		  /* There are indeed some month names in English which
-		     collide with the NLS name.  */
-		  nls_months_collide[nls_monthtab[i].val] = 1;
-		  break;
-		}
-	    }
+	    name[j] = fold_toupper[UCHAR (s[j])];
+	  name[j] = '\0';
 	}
-      /* Now quicksort the month table (should be sorted already!).
-         However, another locale doesn't rule out the possibility
-         of a different order of month names. */
-      qsort ((void *) nls_monthtab, NLS_NUM_MONTHS,
-	     sizeof (struct month), nls_sort_month_comp);
-      monthtab = nls_monthtab;
+      qsort ((void *) monthtab, MONTHS_PER_YEAR,
+	     sizeof (struct month), struct_month_cmp);
    }
 #endif /* NLS */
 }
@ -948,10 +865,6 @@ findlines (struct buffer *buf, struct lines *lines)
 static int
 fraccompare (register const char *a, register const char *b)
 {
-#ifdef ENABLE_NLS
-  nls_fraction_found = 1;
-#endif
-
  if (*a == decimal_point && *b == decimal_point)
    {
      while (*++a == *++b)
@ -986,160 +899,6 @@ fraccompare (register const char *a, register const char *b)
   machine numbers.  Comparatively slow for short strings, but asymptotically
   hideously fast. */

-/* The code here, is like the above... continuous reoccurrance of the
-   same code... improved 15-JAN-1997 in connection with native languages
-   support */
-
-#ifdef ENABLE_NLS
-
-/* Decide the kind of fraction the program will use */
-static void
-nls_set_fraction (char ch)
-{
-  if (!nls_fraction_found && ch != decimal_point)
-    {
-      if (ch == FLOATING_POINT)
-	{				/* US style */
-	  decimal_point = FLOATING_POINT;
-	  th_sep = FLOATING_COMMA;
-	}
-      else if (ch == FLOATING_COMMA)
-	{				/* EU style */
-	  decimal_point = FLOATING_COMMA;
-	  th_sep = FLOATING_POINT;
-	}
-      else if (ch != decimal_point)
-	{				/* Alien    */
-	  decimal_point = ch;
-	  th_sep = CHAR_MAX + 1;
-	}
-    }
-  nls_fraction_found = 1;
-}
-
-/* Look for a fraction
-   It isn't as simple as it looks... however, consider a number:
-      1.234,00
-      1,234.00
-   It's easy to tell which is a decimal point, and which isn't.  We use
-   the grouping information to find out how many digits are grouped together
-   for thousand separator.
-
-   The idea here, is to use the grouping information... but not to
-   spend time with verifying the groups... not too much time, anyway.
-   so, a number represented to us as:
-      1.234.567,89
-   will be taken and separated into different groups, separated by a
-   separator character (Decimal point or thousands separator).
-      {1,234,567}
-   these are the groups of digits that lead to a separator character,
-   and with the trailing group is added:
-      {1,234,567,89}
-   resulting in 4 groups of numbers.  If the resulting number of groups,
-   are none, or just 1... this is not enough to decide anything about
-   the decimal point.  We need at least two for that.  With two groups
-   we have at least one separator.  That separator can be a decimal
-   point, or a thousands separator... if it is a thousands separator
-   the number of digits in the last group, will comply with the first
-   rule in the grouping rule for numeric values. i.e.
-      |{89}| = grouping[0]
-   if so, and there are only two groups of numbers, the value cannot
-   be determined.  If there are three or more numbers, the separator
-   separating the groups is checked.  If these are the same, the
-   character is determined to be a thousands separator.  If they are
-   not the same, the last separator is determined to be a decimal
-   point.  If checking the grouping rules, we find out that there
-   are no grouping rules defined, either the grouping rules is NULL
-   or the first grouping number is 0, then the locale format is used.
-
-   We try to take an advantage of a special situation.  If the trailing
-   group, the one that normally should be the fractional part, turns
-   out to have the same length as the thousands separator rule says,
-   making a doubt on that it may be a decimal point, we look for the
-   group before that, i.e. with a two group form:
-     {1234,567}
-   where the grouping rule is 3;3... we take a look at group 1, and find
-   out that |{1234}| > larger of the two first grouping rules, then
-   the separator has to be a decimal point...
-   */
-
-static void
-look_for_fraction (const char *s, const char *e)
-{
-  register const char *p;
-  register unsigned short n = 0;
-  static unsigned short max_groups = 0;
-  static unsigned short *groups = NULL;
-
-  if (groups == NULL)
-    {
-      max_groups = NLS_MAX_GROUPS;
-      groups = (unsigned short *) xmalloc (sizeof (*groups) * max_groups);
-    }
-
-  /* skip blanks and signs */
-  while (blanks[UCHAR (*s)] || *s == NEGATION_SIGN)
-    s++;
-  /* groups = {}, n = 0 */
-  for (p = s; p < e; p++)
-    {
-      /* groups[n]={number of digits leading to separator n}
-         n = number of separators so far */
-      if (*p == decimal_point || *p == th_sep || *p == FLOATING_POINT)
-	{
-	  if (++n >= max_groups)
-	    {
-	      /* BIG Number... enlarge table */
-	      max_groups += NLS_MAX_GROUPS;
-	      groups = (unsigned short *) xrealloc ((char *) groups,
-						    (sizeof (*groups)
-						     * max_groups));
-	    }
-	  groups[n] = (unsigned short) (p - s);
-	  s = p + 1;
-	}
-      else if (!ISDIGIT (*p))
-	break;
-      /* mem[s..p]=digits only */
-    }
-  /* n = number of separators in s..e */
-  groups[++n] = (short) (p - s);
-  /* n = groups in the number */
-  if (n <= 1)
-    return;			/* Only one group of numbers... not enough */
-  p = nls_grouping;
-  /* p = address of group rules
-     s = address of next character after separator */
-  s = s - 1;			/* s = address of last separator */
-  if (p && *p)
-    {
-      /* a legal trailing group, iff groups[n] == first rule */
-      if (groups[n] != (short) *p)
-	nls_set_fraction (*s);
-      else
-	{
-	  if (n == 2)
-	    {			/* Only two groups */
-	      if (groups[n - 1] > max (p[0], p[1]))
-		nls_set_fraction (*s);
-	      return;
-	    }
-	  /* if the separators are the same, it's a thousands */
-	  if (*s != *(s - groups[n]))
-	    nls_set_fraction (*s);
-	  /* s[0] = thousands separator */
-	  else if (*s == th_sep)
-	    nls_fraction_found = 1;
-	}
-    }
-  else
-    {
-      /* no grouping allowed here, last separator IS decimal point */
-      nls_set_fraction (*s);
-    }
-}
-#endif
-
 static int
 numcompare (register const char *a, register const char *b)
 {
@ -1303,7 +1062,7 @@ static int
 getmonth (const char *s, int len)
 {
  char *month;
-  register int i, lo = 0, hi = 12, result;
+  register int i, lo = 0, hi = MONTHS_PER_YEAR, result;

  while (len > 0 && blanks[UCHAR (*s)])
    {
@ -1325,38 +1084,19 @@ getmonth (const char *s, int len)
    {
      int ix = (lo + hi) / 2;

-      len = strlen (monthtab[ix].name);
-      if (NLS_STRNCMP (month, monthtab[ix].name, len) < 0)
+      if (strncmp (month, monthtab[ix].name, strlen (monthtab[ix].name)) < 0)
 	hi = ix;
      else
 	lo = ix;
    }
  while (hi - lo > 1);

-  result = (!strncmp (month, monthtab[lo].name, len) ? monthtab[lo].val : 0);
+  result = (!strncmp (month, monthtab[lo].name, strlen (monthtab[lo].name))
+	    ? monthtab[lo].val : 0);

  return result;
 }

-#ifdef ENABLE_NLS
-/* Look for the month in locale table, and if that fails try with
-   us month name table                                              */
-static int
-nls_month_is_either_locale (const char *s, int len)
-{
-  int ind;
-
-  monthtab = nls_monthtab;
-  ind = getmonth (s, len);
-  if (ind == 0)
-    {
-      monthtab = us_monthtab;
-      ind = getmonth (s, len);
-    }
-  return ind;
-}
-#endif
-
 /* Compare two lines A and B trying every key in sequence until there
   are no more keys or a difference is found. */

@ -1460,32 +1200,7 @@ keycompare (const struct line *a, const struct line *b)
 	}
      else if (key->month)
 	{
-#ifdef ENABLE_NLS
-
-	  /* if we haven't decided which locale to go with, we get the
-	     month name from either.  If either month name is fully
-	     solved and the month name doesn't collide with the other
-	     locale... then use that table from there forward */
-	  if (!nls_month_found)
-	    {
-	      int x;
-
-	      x = nls_month_is_either_locale (texta, lena);
-	      nls_month_found = !nls_months_collide[x];
-	      if (nls_month_found)
-		{
-		  diff = x - getmonth (textb, lenb);
-		}
-	      else
-		{
-		  diff = nls_month_is_either_locale (textb, lenb);
-		  nls_month_found = !nls_months_collide[diff];
-		  diff = x - diff;
-		}
-	    }
-	  else
-#endif
-	    diff = getmonth (texta, lena) - getmonth (textb, lenb);
+	  diff = getmonth (texta, lena) - getmonth (textb, lenb);
 	  if (diff)
 	    return key->reverse ? -diff : diff;
 	  continue;
@ -1658,7 +1373,7 @@ compare (register const struct line *a, register const struct line *b)
      diff = UCHAR (*ap) - UCHAR (*bp);
      if (diff == 0)
 	{
-	  diff = NLS_MEMCMP (ap, bp, mini);
+	  diff = memcmp (ap, bp, mini);
 	  if (diff == 0)
 	    diff = tmpa - tmpb;
 	}
@ -1935,65 +1650,6 @@ mergefps (FILE **fps, register int nfps, FILE *ofp)
    }
 }

-#ifdef ENABLE_NLS
-
-/* Find the numeric format that this file represents to us for sorting. */
-static void
-nls_numeric_format (const struct line *line, int nlines)
-{
-  struct nls_keyfield *n_key = nls_keyhead;
-
-  /* line = first line, nlines = number of lines,
-     nls_fraction_found = false                           */
-  for (; !nls_fraction_found && nlines > 0; line++, nlines--)
-    {
-      int iter;
-      for (iter = 0; !nls_fraction_found; iter++)
-	{
-	  char *text;
-	  char *lim;
-	  struct keyfield *key = n_key->key;
-
-	  /* text = {}, lim = {}, key = first key */
-	  if (iter || line->keybeg == NULL)
-	    {
-	      /* Succeding keys, where the key field is
-                 specified                              */
-	      if (key->eword >= 0) /* key->eword = length of key */
-		lim = limfield (line, key);
-	      else
-		lim = line->text + line->length;
-	      /* lim = end of key field */
-
-	      if (key->sword >= 0) /* key->sword = start of key */
-		text = begfield (line, key);
-	      else
-		text = line->text;
-	      /* text = start of field */
-	    }
-	  else
-	    {
-	      /* First key is always the whole line */
-	      text = line->keybeg;
-	      lim = line->keylim;
-	    }
-	  /* text = start of text to sort
-             lim  = end of text to sort    */
-
-	  look_for_fraction (text, lim);
-
-	  /* nls_fraction_found = decimal_point found? */
-
-	  if ((n_key = n_key->next) == nls_keyhead)
-	    break;  /* No more keys for this line */
-	}
-    }
-  nls_fraction_found = 1;
-  /* decide on current decimal_point known */
-}
-
-#endif
-
 /* Sort the array LINES with NLINES members, using TEMP for temporary space. */

 static void
@ -2132,12 +1788,6 @@ sort (char **files, int nfiles, FILE *ofp)
 	      tmp = (struct line *)
 		xrealloc ((char *) tmp, ntmp * sizeof (struct line));
 	    }
-#ifdef ENABLE_NLS
-	  if (nls_keyhead)
-	    nls_keyhead = nls_keyhead->next;
-	  if (!nls_fraction_found && nls_keyhead)
-	    nls_numeric_format (lines.lines, lines.used);
-#endif
 	  sortlines (lines.lines, lines.used, tmp);
 	  if (feof (fp) && !nfiles && !n_temp_files && !buf.left)
 	    {
@ -2187,23 +1837,6 @@ insertkey (struct keyfield *key)
    k = k->next;
  k->next = key;
  key->next = NULL;
-#ifdef ENABLE_NLS
-  if (key->numeric || key->general_numeric)
-    {
-      struct nls_keyfield *nk;
-
-      nk = (struct nls_keyfield *) xmalloc (sizeof (struct nls_keyfield));
-      nk->key = key;
-      if (nls_keyhead)
-	{
-	  nk->next = nls_keyhead->next;
-	  nls_keyhead->next = nk;
-	}
-      else
-	nk->next = nk;
-      nls_keyhead = nk;
-    }
-#endif
 }

 static void
@ -2347,26 +1980,18 @@ main (int argc, char **argv)
    struct lconv *lconvp = localeconv ();

    /* If the locale doesn't define a decimal point, or if the decimal
-       point is multibyte, use the US notation.  We don't support
+       point is multibyte, use the C decimal point.  We don't support
       multibyte decimal points yet.  */
    decimal_point = *lconvp->decimal_point;
    if (! decimal_point || lconvp->decimal_point[1])
-      decimal_point = FLOATING_POINT;
-    else
-      nls_fraction_found = 0;  /* Figure out which decimal point to use  */
+      decimal_point = C_DECIMAL_POINT;

    /* We don't support multibyte thousands separators yet.  */
    th_sep = *lconvp->thousands_sep;
    if (! th_sep || lconvp->thousands_sep[1])
      th_sep = CHAR_MAX + 1;
-
-    nls_grouping  =  (char *) (lconvp->grouping);
  }

-  nls_month_found = 0;  /* Figure out which month notation to use */
-
-  monthtab = nls_monthtab;
-
 #endif /* NLS */

  bindtextdomain (PACKAGE, LOCALEDIR);