Update.

2000-04-06 Ulrich Drepper <drepper@redhat.com> * locale/programs/charmap.c (charmap_new_char): Add parameter step. Support ..(2).. ellipsis. (parse_charmap): Recognize ..(2).. etc and pass step down. Correctly generate names for UCS4 characters. * locale/programs/ld-ctype.c (struct translit_ignore_t): Add step. (ctype_finish): We know the wide character value for <SP>, don't search. (charclass_symbolic_ellipsis): Handle ..(2).. ellipsis. (charclass_ucs4_ellipsis): Likewise. (read_translit_ignore_entry): Store ellipsis step. (ctype_read): Recognize ..(2).. etc and pass step down. * locale/programs/linereader.c (lr_token): When seeing comment character ignore only rest of line in sources but stop at escaped newline. Recognize ..(2).. and ....(2)..... * locale/programs/locfile-token.h (enum token_t): Add tok_ellipsis2_2 and tok_ellipsis4_2.
2000-04-07 02:38:44 +00:00 · 2000-04-07 02:38:44 +00:00 · a0dc52061f
parent 372f94ba46
commit a0dc52061f
7 changed files with 1242 additions and 33 deletions
--- a/23
+++ b/23
@ -1,3 +1,26 @@
 2000-04-06  Ulrich Drepper  <drepper@redhat.com>
 	* locale/programs/charmap.c (charmap_new_char): Add parameter step.
 	Support ..(2).. ellipsis.
 	(parse_charmap): Recognize ..(2).. etc and pass step down.
 	Correctly generate names for UCS4 characters.
 	* locale/programs/ld-ctype.c (struct translit_ignore_t): Add step.
 	(ctype_finish): We know the wide character value for <SP>,
 	don't search.
 	(charclass_symbolic_ellipsis): Handle ..(2).. ellipsis.
 	(charclass_ucs4_ellipsis): Likewise.
 	(read_translit_ignore_entry): Store ellipsis step.
 	(ctype_read): Recognize ..(2).. etc and pass step down.
 	* locale/programs/linereader.c (lr_token): When seeing comment
 	character ignore only rest of line in sources but stop at escaped
 	newline.
 	Recognize ..(2).. and ....(2).....
 	* locale/programs/locfile-token.h (enum token_t): Add tok_ellipsis2_2
 	and tok_ellipsis4_2.
 2000-04-06  Andreas Jaeger  <aj@suse.de>
 	* sysdeps/unix/sysv/linux/alpha/Dist: init-first.h has been
--- a/locale/programs/charmap.c
+++ b/locale/programs/charmap.c
@ -53,7 +53,7 @@ static void new_width (struct linereader *cmfile, struct charmap_t *result,
 		       unsigned long int width);
 static void charmap_new_char (struct linereader *lr, struct charmap_t *cm,
 			      int nbytes, char *bytes, const char *from,
-			      const char *to, int decimal_ellipsis);
+			      const char *to, int decimal_ellipsis, int step);
 struct charmap_t *
@ -225,6 +225,7 @@ parse_charmap (struct linereader *cmfile)
  char *from_name = NULL;
  char *to_name = NULL;
  enum token_t ellipsis = 0;
  int step = 1;
  /* We don't want symbolic names in string to be translated.  */
  cmfile->translate_strings = 0;
@ -461,7 +462,7 @@ character sets with locking states are not supported"));
 						now->val.str.lenmb);
 	  else
 	    {
-	      obstack_printf (&result->mem_pool, "<%08X>",
+	      obstack_printf (&result->mem_pool, "U%08X",
 			      cmfile->token.val.ucs4);
 	      obstack_1grow (&result->mem_pool, '\0');
 	      from_name = (char *) obstack_finish (&result->mem_pool);
@ -475,9 +476,20 @@ character sets with locking states are not supported"));
 	  /* We have two possibilities: We can see an ellipsis or an
 	     encoding value.  */
 	  if (nowtok == tok_ellipsis3 || nowtok == tok_ellipsis4
-	      || nowtok == tok_ellipsis2)
+	      || nowtok == tok_ellipsis2 || nowtok == tok_ellipsis4_2
 	      || nowtok == tok_ellipsis2_2)
 	    {
 	      ellipsis = nowtok;
 	      if (nowtok == tok_ellipsis4_2)
 		{
 		  step = 2;
 		  nowtok = tok_ellipsis4;
 		}
 	      else if (nowtok == tok_ellipsis2_2)
 		{
 		  step = 2;
 		  nowtok = tok_ellipsis2;
 		}
 	      state = 4;
 	      continue;
 	    }
@ -502,13 +514,15 @@ character sets with locking states are not supported"));
 	  else
 	    charmap_new_char (cmfile, result, now->val.charcode.nbytes,
 			      now->val.charcode.bytes, from_name, to_name,
-			      ellipsis != tok_ellipsis2);
+			      ellipsis != tok_ellipsis2, step);
 	  /* Ignore trailing comment silently.  */
 	  lr_ignore_rest (cmfile, 0);
 	  from_name = NULL;
 	  to_name = NULL;
 	  ellipsis = tok_none;
 	  step = 1;
 	  state = 2;
 	  continue;
@ -531,7 +545,7 @@ character sets with locking states are not supported"));
 					      cmfile->token.val.str.lenmb);
 	  else
 	    {
-	      obstack_printf (&result->mem_pool, "<%08X>",
+	      obstack_printf (&result->mem_pool, "U%08X",
 			      cmfile->token.val.ucs4);
 	      obstack_1grow (&result->mem_pool, '\0');
 	      to_name = (char *) obstack_finish (&result->mem_pool);
@ -814,7 +828,7 @@ charmap_find_value (const struct charmap_t *cm, const char *name, size_t len)
 static void
 charmap_new_char (struct linereader *lr, struct charmap_t *cm,
 		  int nbytes, char *bytes, const char *from, const char *to,
-		  int decimal_ellipsis)
+		  int decimal_ellipsis, int step)
 {
  hash_table *ht = &cm->char_table;
  hash_table *bt = &cm->byte_table;
@ -833,7 +847,7 @@ charmap_new_char (struct linereader *lr, struct charmap_t *cm,
      newp = (struct charseq *) obstack_alloc (ob, sizeof (*newp) + nbytes);
      newp->nbytes = nbytes;
      memcpy (newp->bytes, bytes, nbytes);
-      newp->name = obstack_copy (ob, from, len1 + 1);
+      newp->name = from;
      newp->ucs4 = UNINITIALIZED_CHAR_VALUE;
      if ((from[0] == 'U' || from[0] == 'P') && (len1 == 5 || len1 == 9))
@ -852,7 +866,7 @@ charmap_new_char (struct linereader *lr, struct charmap_t *cm,
 	  char *endp;
 	  errno = 0;
-	  newp->ucs4 = strtoul (from, &endp, 16);
+	  newp->ucs4 = strtoul (from + 1, &endp, 16);
 	  if (endp - from != len1
 	      || (newp->ucs4 == ULONG_MAX && errno == ERANGE)
 	      || newp->ucs4 >= 0x80000000)
@ -916,7 +930,7 @@ hexadecimal range format should use only capital characters"));
      return;
    }
-  for (cnt = from_nr; cnt <= to_nr; ++cnt)
+  for (cnt = from_nr; cnt <= to_nr; cnt += step)
    {
      char *name_end;
      obstack_printf (ob, decimal_ellipsis ? "%.*s%0*d" : "%.*s%0*X",
--- a/locale/programs/ld-ctype.c
+++ b/locale/programs/ld-ctype.c
@ -97,6 +97,7 @@ struct translit_ignore_t
 {
  uint32_t from;
  uint32_t to;
  uint32_t step;
  const char *fname;
  size_t lineno;
@ -504,13 +505,8 @@ character '%s' in class `%s' must not be in class `%s'"),
    }
  /* ... and now test <SP> as a special case.  */
-  space_value = repertoire_find_value (ctype->repertoire, "SP", 2);
+  space_value = 32;
-  if (space_value == ILLEGAL_CHAR_VALUE)
+  if (((cnt = BITPOS (tok_space),
    {
      if (!be_quiet)
 	error (0, 0, _("character <SP> not defined in character map"));
    }
  else if (((cnt = BITPOS (tok_space),
 	(ELEM (ctype, class_collection, , space_value)
 	 & BITw (tok_space)) == 0)
       || (cnt = BITPOS (tok_blank),
@ -1236,7 +1232,8 @@ get_character (struct token *now, struct charmap_t *charmap,
 }
-/* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>'.  */
+/* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and
   the .(2). counterparts.  */
 static void
 charclass_symbolic_ellipsis (struct linereader *ldfile,
 			     struct locale_ctype_t *ctype,
@ -1246,7 +1243,7 @@ charclass_symbolic_ellipsis (struct linereader *ldfile,
 			     const char *last_str,
 			     unsigned long int class256_bit,
 			     unsigned long int class_bit, int base,
-			     int ignore_content, int handle_digits)
+			     int ignore_content, int handle_digits, int step)
 {
  const char *nowstr = now->val.str.startmb;
  char tmp[now->val.str.lenmb + 1];
@ -1288,7 +1285,7 @@ charclass_symbolic_ellipsis (struct linereader *ldfile,
  if (!ignore_content)
    {
      now->val.str.startmb = tmp;
-      while (++from <= to)
+      while ((from += step) <= to)
 	{
 	  struct charseq *seq;
 	  uint32_t wch;
@ -1346,7 +1343,7 @@ charclass_symbolic_ellipsis (struct linereader *ldfile,
 }
-/* Ellipsis like in `<U1234>..<U2345>'.  */
+/* Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'.  */
 static void
 charclass_ucs4_ellipsis (struct linereader *ldfile,
 			 struct locale_ctype_t *ctype,
@ -1355,7 +1352,7 @@ charclass_ucs4_ellipsis (struct linereader *ldfile,
 			 struct token *now, uint32_t last_wch,
 			 unsigned long int class256_bit,
 			 unsigned long int class_bit, int ignore_content,
-			 int handle_digits)
+			 int handle_digits, int step)
 {
  if (last_wch > now->val.ucs4)
    {
@ -1367,7 +1364,7 @@ to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
    }
  if (!ignore_content)
-    while (++last_wch <= now->val.ucs4)
+    while ((last_wch += step) <= now->val.ucs4)
      {
 	/* We have to find out whether there is a byte sequence corresponding
 	   to this UCS4 value.  */
@ -1376,6 +1373,11 @@ to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
 	snprintf (utmp, sizeof (utmp), "U%08X", last_wch);
 	seq = charmap_find_value (charmap, utmp, 9);
 	if (seq == NULL)
 	  {
 	    snprintf (utmp, sizeof (utmp), "U%04X", last_wch);
 	    seq = charmap_find_value (charmap, utmp, 5);
 	  }
 	if (seq == NULL)
 	  /* Try looking in the repertoire map.  */
@ -1779,6 +1781,7 @@ read_translit_ignore_entry (struct linereader *ldfile,
 	    obstack_alloc (&ctype->mempool, sizeof (struct translit_ignore_t));
 	  newp->from = from;
 	  newp->to = from;
 	  newp->step = 1;
 	  newp->next = ctype->translit_ignore;
 	  ctype->translit_ignore = newp;
@ -1788,11 +1791,12 @@ read_translit_ignore_entry (struct linereader *ldfile,
 	 line.  */
      now = lr_token (ldfile, charmap, repertoire);
-      if (now->tok == tok_ellipsis2)
+      if (now->tok == tok_ellipsis2 || now->tok == tok_ellipsis2_2)
 	{
 	  /* XXX Should we bother implementing `....'?  `...' certainly
 	     will not be implemented.  */
 	  uint32_t to;
 	  int step = now->tok == tok_ellipsis2_2 ? 2 : 1;
 	  now = lr_token (ldfile, charmap, repertoire);
@ -1823,7 +1827,10 @@ read_translit_ignore_entry (struct linereader *ldfile,
 	    {
 	      /* Make sure the `to'-value is larger.  */
 	      if (to >= from)
 		{
 		  newp->to = to;
 		  newp->step = step;
 		}
 	      else
 		lr_error (ldfile, _("\
 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
@ -1866,6 +1873,7 @@ ctype_read (struct linereader *ldfile, struct localedef_t *result,
  uint32_t last_wch = 0;
  enum token_t last_token;
  enum token_t ellipsis_token;
  int step;
  char last_charcode[16];
  size_t last_charcode_len = 0;
  const char *last_str = NULL;
@ -2040,6 +2048,7 @@ ctype_read (struct linereader *ldfile, struct localedef_t *result,
 	  ctype->class_done |= class_bit;
 	  last_token = tok_none;
 	  ellipsis_token = tok_none;
 	  step = 1;
 	  now = lr_token (ldfile, charmap, NULL);
 	  while (now->tok != tok_eol && now->tok != tok_eof)
 	    {
@ -2140,7 +2149,7 @@ the absolute ellipsis `...' must not be used"));
 						    == tok_ellipsis4
 						    ? 10 : 16),
 						   ignore_content,
-						   handle_digits);
+						   handle_digits, step);
 		    }
 		  else if (last_token == tok_ucs4)
 		    {
@ -2151,7 +2160,8 @@ with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
 		      charclass_ucs4_ellipsis (ldfile, ctype, charmap,
 					       repertoire, now, last_wch,
 					       class256_bit, class_bit,
-					       ignore_content, handle_digits);
+					       ignore_content, handle_digits,
 					       step);
 		    }
 		  else
 		    {
@ -2180,9 +2190,21 @@ with character code range values one must use the absolute ellipsis `...'"));
 		break;
 	      if (last_token != tok_none
-		  && now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4)
+		  && now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4_2)
 		{
 		  if (now->tok == tok_ellipsis2_2)
 		    {
 		      now->tok = tok_ellipsis2;
 		      step = 2;
 		    }
 		  else if (now->tok == tok_ellipsis4_2)
 		    {
 		      now->tok = tok_ellipsis4;
 		      step = 2;
 		    }
 		  ellipsis_token = now->tok;
 		  now = lr_token (ldfile, charmap, NULL);
 		  continue;
 		}
@ -2194,6 +2216,7 @@ with character code range values one must use the absolute ellipsis `...'"));
 	      now = lr_token (ldfile, charmap, NULL);
 	      ellipsis_token = tok_none;
 	      step = 1;
 	    }
 	  break;
--- a/locale/programs/linereader.c
+++ b/locale/programs/linereader.c
@ -185,6 +185,16 @@ lr_token (struct linereader *lr, const struct charmap_t *charmap,
      if (ch != lr->comment_char)
 	break;
      /* Is there an newline at the end of the buffer?  */
      if (lr->buf[lr->bufact - 1] != '\n')
 	{
 	  /* No.  Some people want this to mean that only the line in
 	     the file not the logical, concatenated line is ignored.
 	     Let's try this.  */
 	  lr->idx = lr->bufact;
 	  continue;
 	}
      /* Ignore rest of line.  */
      lr_ignore_rest (lr, 0);
      lr->token.tok = tok_eol;
@ -198,6 +208,14 @@ lr_token (struct linereader *lr, const struct charmap_t *charmap,
  /* Match ellipsis.  */
  if (ch == '.')
    {
      if (strncmp (&lr->buf[lr->idx], "...(2)....", 10) == 0)
 	{
 	  int cnt;
 	  for (cnt = 0; cnt < 10; ++cnt)
 	    lr_getc (lr);
 	  lr->token.tok = tok_ellipsis4_2;
 	  return &lr->token;
 	}
      if (strncmp (&lr->buf[lr->idx], "...", 3) == 0)
 	{
 	  lr_getc (lr);
@ -213,6 +231,14 @@ lr_token (struct linereader *lr, const struct charmap_t *charmap,
 	  lr->token.tok = tok_ellipsis3;
 	  return &lr->token;
 	}
      if (strncmp (&lr->buf[lr->idx], ".(2)..", 6) == 0)
 	{
 	  int cnt;
 	  for (cnt = 0; cnt < 6; ++cnt)
 	    lr_getc (lr);
 	  lr->token.tok = tok_ellipsis2_2;
 	  return &lr->token;
 	}
      if (lr->buf[lr->idx] == '.')
 	{
 	  lr_getc (lr);
--- a/locale/programs/locfile-token.h
+++ b/locale/programs/locfile-token.h
@ -1,4 +1,4 @@
-/* Copyright (C) 1996, 1997, 1998, 1999 Free Software Foundation, Inc.
+/* Copyright (C) 1996, 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by Ulrich Drepper <drepper@gnu.org>, 1996.
@ -31,6 +31,8 @@ enum token_t
  tok_ellipsis2,
  tok_ellipsis3,
  tok_ellipsis4,
  tok_ellipsis2_2,
  tok_ellipsis4_2,
  tok_semicolon,
  tok_comma,
  tok_open_brace,
--- a/localedata/ChangeLog
+++ b/localedata/ChangeLog
@ -1,3 +1,7 @@
 2000-04-06  Ulrich Drepper  <drepper@redhat.com>
 	* locales/i18n: New file.
 2000-03-27  Ulrich Drepper  <drepper@redhat.com>
 	* tst-fmon.data: Update test after last strfmon change.
--- a/localedata/locales/i18n
+++ b/localedata/locales/i18n