locale/programs/ld-ctype.c

   1 /* Copyright (C) 1995-2017 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3    Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
   4
   5    This program is free software; you can redistribute it and/or modify
   6    it under the terms of the GNU General Public License as published
   7    by the Free Software Foundation; version 2 of the License, or
   8    (at your option) any later version.
   9
  10    This program is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13    GNU General Public License for more details.
  14
  15    You should have received a copy of the GNU General Public License
  16    along with this program; if not, see <http://www.gnu.org/licenses/>.  */
  17
  18 #ifdef HAVE_CONFIG_H
  19 # include <config.h>
  20 #endif
  21
  22 #include <alloca.h>
  23 #include <byteswap.h>
  24 #include <endian.h>
  25 #include <errno.h>
  26 #include <limits.h>
  27 #include <obstack.h>
  28 #include <stdlib.h>
  29 #include <string.h>
  30 #include <wchar.h>
  31 #include <wctype.h>
  32 #include <stdint.h>
  33 #include <sys/uio.h>
  34
  35 #include "localedef.h"
  36 #include "charmap.h"
  37 #include "localeinfo.h"
  38 #include "langinfo.h"
  39 #include "linereader.h"
  40 #include "locfile-token.h"
  41 #include "locfile.h"
  42
  43 #include <assert.h>
  44
  45
  46 /* The bit used for representing a special class.  */
  47 #define BITPOS(class) ((class) - tok_upper)
  48 #define BIT(class) (_ISbit (BITPOS (class)))
  49 #define BITw(class) (_ISwbit (BITPOS (class)))
  50
  51 #define ELEM(ctype, collection, idx, value)                                   \
  52   *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx,     \
  53              &ctype->collection##_act idx, value)
  54
  55
  56 /* To be compatible with former implementations we for now restrict
  57    the number of bits for character classes to 16.  When compatibility
  58    is not necessary anymore increase the number to 32.  */
  59 #define char_class_t uint16_t
  60 #define char_class32_t uint32_t
  61
  62
  63 /* Type to describe a transliteration action.  We have a possibly
  64    multiple character from-string and a set of multiple character
  65    to-strings.  All are 32bit values since this is what is used in
  66    the gconv functions.  */
  67 struct translit_to_t
  68 {
  69   uint32_t *str;
  70
  71   struct translit_to_t *next;
  72 };
  73
  74 struct translit_t
  75 {
  76   uint32_t *from;
  77
  78   const char *fname;
  79   size_t lineno;
  80
  81   struct translit_to_t *to;
  82
  83   struct translit_t *next;
  84 };
  85
  86 struct translit_ignore_t
  87 {
  88   uint32_t from;
  89   uint32_t to;
  90   uint32_t step;
  91
  92   const char *fname;
  93   size_t lineno;
  94
  95   struct translit_ignore_t *next;
  96 };
  97
  98
  99 /* Type to describe a transliteration include statement.  */
 100 struct translit_include_t
 101 {
 102   const char *copy_locale;
 103   const char *copy_repertoire;
 104
 105   struct translit_include_t *next;
 106 };
 107
 108 /* Provide some dummy pointer for empty string.  */
 109 static uint32_t no_str[] = { 0 };
 110
 111
 112 /* Sparse table of uint32_t.  */
 113 #define TABLE idx_table
 114 #define ELEMENT uint32_t
 115 #define DEFAULT ((uint32_t) ~0)
 116 #define NO_ADD_LOCALE
 117 #include "3level.h"
 118
 119 #define TABLE wcwidth_table
 120 #define ELEMENT uint8_t
 121 #define DEFAULT 0xff
 122 #include "3level.h"
 123
 124 #define TABLE wctrans_table
 125 #define ELEMENT int32_t
 126 #define DEFAULT 0
 127 #define wctrans_table_add wctrans_table_add_internal
 128 #include "3level.h"
 129 #undef wctrans_table_add
 130 /* The wctrans_table must actually store the difference between the
 131    desired result and the argument.  */
 132 static inline void
 133 wctrans_table_add (struct wctrans_table *t, uint32_t wc, uint32_t mapped_wc)
 134 {
 135   wctrans_table_add_internal (t, wc, mapped_wc - wc);
 136 }
 137
 138 /* Construction of sparse 3-level tables.
 139    See wchar-lookup.h for their structure and the meaning of p and q.  */
 140
 141 struct wctype_table
 142 {
 143   /* Parameters.  */
 144   unsigned int p;
 145   unsigned int q;
 146   /* Working representation.  */
 147   size_t level1_alloc;
 148   size_t level1_size;
 149   uint32_t *level1;
 150   size_t level2_alloc;
 151   size_t level2_size;
 152   uint32_t *level2;
 153   size_t level3_alloc;
 154   size_t level3_size;
 155   uint32_t *level3;
 156   size_t result_size;
 157 };
 158
 159 static void add_locale_wctype_table (struct locale_file *file,
 160                                      struct wctype_table *t);
 161
 162 /* The real definition of the struct for the LC_CTYPE locale.  */
 163 struct locale_ctype_t
 164 {
 165   uint32_t *charnames;
 166   size_t charnames_max;
 167   size_t charnames_act;
 168   /* An index lookup table, to speedup find_idx.  */
 169   struct idx_table charnames_idx;
 170
 171   struct repertoire_t *repertoire;
 172
 173   /* We will allow up to 8 * sizeof (uint32_t) character classes.  */
 174 #define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
 175   size_t nr_charclass;
 176   const char *classnames[MAX_NR_CHARCLASS];
 177   uint32_t last_class_char;
 178   uint32_t class256_collection[256];
 179   uint32_t *class_collection;
 180   size_t class_collection_max;
 181   size_t class_collection_act;
 182   uint32_t class_done;
 183   uint32_t class_offset;
 184
 185   struct charseq **mbdigits;
 186   size_t mbdigits_act;
 187   size_t mbdigits_max;
 188   uint32_t *wcdigits;
 189   size_t wcdigits_act;
 190   size_t wcdigits_max;
 191
 192   struct charseq *mboutdigits[10];
 193   uint32_t wcoutdigits[10];
 194   size_t outdigits_act;
 195
 196   /* If the following number ever turns out to be too small simply
 197      increase it.  But I doubt it will.  --drepper@gnu */
 198 #define MAX_NR_CHARMAP 16
 199   const char *mapnames[MAX_NR_CHARMAP];
 200   uint32_t *map_collection[MAX_NR_CHARMAP];
 201   uint32_t map256_collection[2][256];
 202   size_t map_collection_max[MAX_NR_CHARMAP];
 203   size_t map_collection_act[MAX_NR_CHARMAP];
 204   size_t map_collection_nr;
 205   size_t last_map_idx;
 206   int tomap_done[MAX_NR_CHARMAP];
 207   uint32_t map_offset;
 208
 209   /* Transliteration information.  */
 210   struct translit_include_t *translit_include;
 211   struct translit_t *translit;
 212   struct translit_ignore_t *translit_ignore;
 213   uint32_t ntranslit_ignore;
 214
 215   uint32_t *default_missing;
 216   const char *default_missing_file;
 217   size_t default_missing_lineno;
 218
 219   uint32_t to_nonascii;
 220   uint32_t nonascii_case;
 221
 222   /* The arrays for the binary representation.  */
 223   char_class_t *ctype_b;
 224   char_class32_t *ctype32_b;
 225   uint32_t **map_b;
 226   uint32_t **map32_b;
 227   uint32_t **class_b;
 228   struct wctype_table *class_3level;
 229   struct wctrans_table *map_3level;
 230   uint32_t *class_name_ptr;
 231   uint32_t *map_name_ptr;
 232   struct wcwidth_table width;
 233   uint32_t mb_cur_max;
 234   const char *codeset_name;
 235   uint32_t *translit_from_idx;
 236   uint32_t *translit_from_tbl;
 237   uint32_t *translit_to_idx;
 238   uint32_t *translit_to_tbl;
 239   uint32_t translit_idx_size;
 240   size_t translit_from_tbl_size;
 241   size_t translit_to_tbl_size;
 242
 243   struct obstack mempool;
 244 };
 245
 246
 247 /* Marker for an empty slot.  This has the value 0xFFFFFFFF, regardless
 248    whether 'int' is 16 bit, 32 bit, or 64 bit.  */
 249 #define EMPTY ((uint32_t) ~0)
 250
 251
 252 #define obstack_chunk_alloc xmalloc
 253 #define obstack_chunk_free free
 254
 255
 256 /* Prototypes for local functions.  */
 257 static void ctype_startup (struct linereader *lr, struct localedef_t *locale,
 258                            const struct charmap_t *charmap,
 259                            struct localedef_t *copy_locale,
 260                            int ignore_content);
 261 static void ctype_class_new (struct linereader *lr,
 262                              struct locale_ctype_t *ctype, const char *name);
 263 static void ctype_map_new (struct linereader *lr,
 264                            struct locale_ctype_t *ctype,
 265                            const char *name, const struct charmap_t *charmap);
 266 static uint32_t *find_idx (struct locale_ctype_t *ctype, uint32_t **table,
 267                            size_t *max, size_t *act, uint32_t idx);
 268 static void set_class_defaults (struct locale_ctype_t *ctype,
 269                                 const struct charmap_t *charmap,
 270                                 struct repertoire_t *repertoire);
 271 static void allocate_arrays (struct locale_ctype_t *ctype,
 272                              const struct charmap_t *charmap,
 273                              struct repertoire_t *repertoire);
 274
 275
 276 static const char *longnames[] =
 277 {
 278   "zero", "one", "two", "three", "four",
 279   "five", "six", "seven", "eight", "nine"
 280 };
 281 static const char *uninames[] =
 282 {
 283   "U00000030", "U00000031", "U00000032", "U00000033", "U00000034",
 284   "U00000035", "U00000036", "U00000037", "U00000038", "U00000039"
 285 };
 286 static const unsigned char digits[] = "0123456789";
 287
 288
 289 static void
 290 ctype_startup (struct linereader *lr, struct localedef_t *locale,
 291                const struct charmap_t *charmap,
 292                struct localedef_t *copy_locale, int ignore_content)
 293 {
 294   unsigned int cnt;
 295   struct locale_ctype_t *ctype;
 296
 297   if (!ignore_content && locale->categories[LC_CTYPE].ctype == NULL)
 298     {
 299       if (copy_locale == NULL)
 300         {
 301           /* Allocate the needed room.  */
 302           locale->categories[LC_CTYPE].ctype = ctype =
 303             (struct locale_ctype_t *) xcalloc (1,
 304                                                sizeof (struct locale_ctype_t));
 305
 306           /* We have seen no names yet.  */
 307           ctype->charnames_max = charmap->mb_cur_max == 1 ? 256 : 512;
 308           ctype->charnames = (uint32_t *) xmalloc (ctype->charnames_max
 309                                                    * sizeof (uint32_t));
 310           for (cnt = 0; cnt < 256; ++cnt)
 311             ctype->charnames[cnt] = cnt;
 312           ctype->charnames_act = 256;
 313           idx_table_init (&ctype->charnames_idx);
 314
 315           /* Fill character class information.  */
 316           ctype->last_class_char = ILLEGAL_CHAR_VALUE;
 317           /* The order of the following instructions determines the bit
 318              positions!  */
 319           ctype_class_new (lr, ctype, "upper");
 320           ctype_class_new (lr, ctype, "lower");
 321           ctype_class_new (lr, ctype, "alpha");
 322           ctype_class_new (lr, ctype, "digit");
 323           ctype_class_new (lr, ctype, "xdigit");
 324           ctype_class_new (lr, ctype, "space");
 325           ctype_class_new (lr, ctype, "print");
 326           ctype_class_new (lr, ctype, "graph");
 327           ctype_class_new (lr, ctype, "blank");
 328           ctype_class_new (lr, ctype, "cntrl");
 329           ctype_class_new (lr, ctype, "punct");
 330           ctype_class_new (lr, ctype, "alnum");
 331
 332           ctype->class_collection_max = charmap->mb_cur_max == 1 ? 256 : 512;
 333           ctype->class_collection
 334             = (uint32_t *) xcalloc (sizeof (unsigned long int),
 335                                     ctype->class_collection_max);
 336           ctype->class_collection_act = 256;
 337
 338           /* Fill character map information.  */
 339           ctype->last_map_idx = MAX_NR_CHARMAP;
 340           ctype_map_new (lr, ctype, "toupper", charmap);
 341           ctype_map_new (lr, ctype, "tolower", charmap);
 342
 343           /* Fill first 256 entries in `toXXX' arrays.  */
 344           for (cnt = 0; cnt < 256; ++cnt)
 345             {
 346               ctype->map_collection[0][cnt] = cnt;
 347               ctype->map_collection[1][cnt] = cnt;
 348
 349               ctype->map256_collection[0][cnt] = cnt;
 350               ctype->map256_collection[1][cnt] = cnt;
 351             }
 352
 353           if (enc_not_ascii_compatible)
 354             ctype->to_nonascii = 1;
 355
 356           obstack_init (&ctype->mempool);
 357         }
 358       else
 359         ctype = locale->categories[LC_CTYPE].ctype =
 360           copy_locale->categories[LC_CTYPE].ctype;
 361     }
 362 }
 363
 364
 365 void
 366 ctype_finish (struct localedef_t *locale, const struct charmap_t *charmap)
 367 {
 368   /* See POSIX.2, table 2-6 for the meaning of the following table.  */
 369 #define NCLASS 12
 370   static const struct
 371   {
 372     const char *name;
 373     const char allow[NCLASS];
 374   }
 375   valid_table[NCLASS] =
 376   {
 377     /* The order is important.  See token.h for more information.
 378        M = Always, D = Default, - = Permitted, X = Mutually exclusive  */
 379     { "upper",  "--MX-XDDXXX-" },
 380     { "lower",  "--MX-XDDXXX-" },
 381     { "alpha",  "---X-XDDXXX-" },
 382     { "digit",  "XXX--XDDXXX-" },
 383     { "xdigit", "-----XDDXXX-" },
 384     { "space",  "XXXXX------X" },
 385     { "print",  "---------X--" },
 386     { "graph",  "---------X--" },
 387     { "blank",  "XXXXXM-----X" },
 388     { "cntrl",  "XXXXX-XX--XX" },
 389     { "punct",  "XXXXX-DD-X-X" },
 390     { "alnum",  "-----XDDXXX-" }
 391   };
 392   size_t cnt;
 393   int cls1, cls2;
 394   uint32_t space_value;
 395   struct charseq *space_seq;
 396   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 397   int warned;
 398   const void *key;
 399   size_t len;
 400   void *vdata;
 401   void *curs;
 402
 403   /* Now resolve copying and also handle completely missing definitions.  */
 404   if (ctype == NULL)
 405     {
 406       const char *repertoire_name;
 407
 408       /* First see whether we were supposed to copy.  If yes, find the
 409          actual definition.  */
 410       if (locale->copy_name[LC_CTYPE] != NULL)
 411         {
 412           /* Find the copying locale.  This has to happen transitively since
 413              the locale we are copying from might also copying another one.  */
 414           struct localedef_t *from = locale;
 415
 416           do
 417             from = find_locale (LC_CTYPE, from->copy_name[LC_CTYPE],
 418                                 from->repertoire_name, charmap);
 419           while (from->categories[LC_CTYPE].ctype == NULL
 420                  && from->copy_name[LC_CTYPE] != NULL);
 421
 422           ctype = locale->categories[LC_CTYPE].ctype
 423             = from->categories[LC_CTYPE].ctype;
 424         }
 425
 426       /* If there is still no definition issue an warning and create an
 427          empty one.  */
 428       if (ctype == NULL)
 429         {
 430           if (! be_quiet)
 431             WITH_CUR_LOCALE (error (0, 0, _("\
 432 No definition for %s category found"), "LC_CTYPE"));
 433           ctype_startup (NULL, locale, charmap, NULL, 0);
 434           ctype = locale->categories[LC_CTYPE].ctype;
 435         }
 436
 437       /* Get the repertoire we have to use.  */
 438       repertoire_name = locale->repertoire_name ?: repertoire_global;
 439       if (repertoire_name != NULL)
 440         ctype->repertoire = repertoire_read (repertoire_name);
 441     }
 442
 443   /* We need the name of the currently used 8-bit character set to
 444      make correct conversion between this 8-bit representation and the
 445      ISO 10646 character set used internally for wide characters.  */
 446   ctype->codeset_name = charmap->code_set_name;
 447   if (ctype->codeset_name == NULL)
 448     {
 449       if (! be_quiet)
 450         WITH_CUR_LOCALE (error (0, 0, _("\
 451 No character set name specified in charmap")));
 452       ctype->codeset_name = "//UNKNOWN//";
 453     }
 454
 455   /* Set default value for classes not specified.  */
 456   set_class_defaults (ctype, charmap, ctype->repertoire);
 457
 458   /* Check according to table.  */
 459   for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
 460     {
 461       uint32_t tmp = ctype->class_collection[cnt];
 462
 463       if (tmp != 0)
 464         {
 465           for (cls1 = 0; cls1 < NCLASS; ++cls1)
 466             if ((tmp & _ISwbit (cls1)) != 0)
 467               for (cls2 = 0; cls2 < NCLASS; ++cls2)
 468                 if (valid_table[cls1].allow[cls2] != '-')
 469                   {
 470                     int eq = (tmp & _ISwbit (cls2)) != 0;
 471                     switch (valid_table[cls1].allow[cls2])
 472                       {
 473                       case 'M':
 474                         if (!eq)
 475                           {
 476                             uint32_t value = ctype->charnames[cnt];
 477
 478                             if (!be_quiet)
 479                               WITH_CUR_LOCALE (error (0, 0, _("\
 480 character L'\\u%0*x' in class `%s' must be in class `%s'"),
 481                                                       value > 0xffff ? 8 : 4,
 482                                                       value,
 483                                                       valid_table[cls1].name,
 484                                                       valid_table[cls2].name));
 485                           }
 486                         break;
 487
 488                       case 'X':
 489                         if (eq)
 490                           {
 491                             uint32_t value = ctype->charnames[cnt];
 492
 493                             if (!be_quiet)
 494                               WITH_CUR_LOCALE (error (0, 0, _("\
 495 character L'\\u%0*x' in class `%s' must not be in class `%s'"),
 496                                                       value > 0xffff ? 8 : 4,
 497                                                       value,
 498                                                       valid_table[cls1].name,
 499                                                       valid_table[cls2].name));
 500                           }
 501                         break;
 502
 503                       case 'D':
 504                         ctype->class_collection[cnt] |= _ISwbit (cls2);
 505                         break;
 506
 507                       default:
 508                         WITH_CUR_LOCALE (error (5, 0, _("\
 509 internal error in %s, line %u"), __FUNCTION__, __LINE__));
 510                       }
 511                   }
 512         }
 513     }
 514
 515   for (cnt = 0; cnt < 256; ++cnt)
 516     {
 517       uint32_t tmp = ctype->class256_collection[cnt];
 518
 519       if (tmp != 0)
 520         {
 521           for (cls1 = 0; cls1 < NCLASS; ++cls1)
 522             if ((tmp & _ISbit (cls1)) != 0)
 523               for (cls2 = 0; cls2 < NCLASS; ++cls2)
 524                 if (valid_table[cls1].allow[cls2] != '-')
 525                   {
 526                     int eq = (tmp & _ISbit (cls2)) != 0;
 527                     switch (valid_table[cls1].allow[cls2])
 528                       {
 529                       case 'M':
 530                         if (!eq)
 531                           {
 532                             char buf[17];
 533
 534                             snprintf (buf, sizeof buf, "\\%Zo", cnt);
 535
 536                             if (!be_quiet)
 537                               WITH_CUR_LOCALE (error (0, 0, _("\
 538 character '%s' in class `%s' must be in class `%s'"),
 539                                                       buf,
 540                                                       valid_table[cls1].name,
 541                                                       valid_table[cls2].name));
 542                           }
 543                         break;
 544
 545                       case 'X':
 546                         if (eq)
 547                           {
 548                             char buf[17];
 549
 550                             snprintf (buf, sizeof buf, "\\%Zo", cnt);
 551
 552                             if (!be_quiet)
 553                               WITH_CUR_LOCALE (error (0, 0, _("\
 554 character '%s' in class `%s' must not be in class `%s'"),
 555                                                       buf,
 556                                                       valid_table[cls1].name,
 557                                                       valid_table[cls2].name));
 558                           }
 559                         break;
 560
 561                       case 'D':
 562                         ctype->class256_collection[cnt] |= _ISbit (cls2);
 563                         break;
 564
 565                       default:
 566                         WITH_CUR_LOCALE (error (5, 0, _("\
 567 internal error in %s, line %u"), __FUNCTION__, __LINE__));
 568                       }
 569                   }
 570         }
 571     }
 572
 573   /* ... and now test <SP> as a special case.  */
 574   space_value = 32;
 575   if (((cnt = BITPOS (tok_space),
 576         (ELEM (ctype, class_collection, , space_value)
 577          & BITw (tok_space)) == 0)
 578        || (cnt = BITPOS (tok_blank),
 579            (ELEM (ctype, class_collection, , space_value)
 580             & BITw (tok_blank)) == 0)))
 581     {
 582       if (!be_quiet)
 583         WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"),
 584                                 valid_table[cnt].name));
 585     }
 586   else if (((cnt = BITPOS (tok_punct),
 587              (ELEM (ctype, class_collection, , space_value)
 588               & BITw (tok_punct)) != 0)
 589             || (cnt = BITPOS (tok_graph),
 590                 (ELEM (ctype, class_collection, , space_value)
 591                  & BITw (tok_graph))
 592                 != 0)))
 593     {
 594       if (!be_quiet)
 595         WITH_CUR_LOCALE (error (0, 0, _("\
 596 <SP> character must not be in class `%s'"),
 597                                 valid_table[cnt].name));
 598     }
 599   else
 600     ELEM (ctype, class_collection, , space_value) |= BITw (tok_print);
 601
 602   space_seq = charmap_find_value (charmap, "SP", 2);
 603   if (space_seq == NULL)
 604     space_seq = charmap_find_value (charmap, "space", 5);
 605   if (space_seq == NULL)
 606     space_seq = charmap_find_value (charmap, "U00000020", 9);
 607   if (space_seq == NULL || space_seq->nbytes != 1)
 608     {
 609       if (!be_quiet)
 610         WITH_CUR_LOCALE (error (0, 0, _("\
 611 character <SP> not defined in character map")));
 612     }
 613   else if (((cnt = BITPOS (tok_space),
 614              (ctype->class256_collection[space_seq->bytes[0]]
 615               & BIT (tok_space)) == 0)
 616             || (cnt = BITPOS (tok_blank),
 617                 (ctype->class256_collection[space_seq->bytes[0]]
 618                  & BIT (tok_blank)) == 0)))
 619     {
 620       if (!be_quiet)
 621         WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"),
 622                                 valid_table[cnt].name));
 623     }
 624   else if (((cnt = BITPOS (tok_punct),
 625              (ctype->class256_collection[space_seq->bytes[0]]
 626               & BIT (tok_punct)) != 0)
 627             || (cnt = BITPOS (tok_graph),
 628                 (ctype->class256_collection[space_seq->bytes[0]]
 629                  & BIT (tok_graph)) != 0)))
 630     {
 631       if (!be_quiet)
 632         WITH_CUR_LOCALE (error (0, 0, _("\
 633 <SP> character must not be in class `%s'"),
 634                                 valid_table[cnt].name));
 635     }
 636   else
 637     ctype->class256_collection[space_seq->bytes[0]] |= BIT (tok_print);
 638
 639   /* Check whether all single-byte characters make to their upper/lowercase
 640      equivalent according to the ASCII rules.  */
 641   for (cnt = 'A'; cnt <= 'Z'; ++cnt)
 642     {
 643       uint32_t uppval = ctype->map256_collection[0][cnt];
 644       uint32_t lowval = ctype->map256_collection[1][cnt];
 645       uint32_t lowuppval = ctype->map256_collection[0][lowval];
 646       uint32_t lowlowval = ctype->map256_collection[1][lowval];
 647
 648       if (uppval != cnt
 649           || lowval != cnt + 0x20
 650           || lowuppval != cnt
 651           || lowlowval != cnt + 0x20)
 652         ctype->nonascii_case = 1;
 653     }
 654   for (cnt = 0; cnt < 256; ++cnt)
 655     if (cnt < 'A' || (cnt > 'Z' && cnt < 'a') || cnt > 'z')
 656       if (ctype->map256_collection[0][cnt] != cnt
 657           || ctype->map256_collection[1][cnt] != cnt)
 658         ctype->nonascii_case = 1;
 659
 660   /* Now that the tests are done make sure the name array contains all
 661      characters which are handled in the WIDTH section of the
 662      character set definition file.  */
 663   if (charmap->width_rules != NULL)
 664     for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
 665       {
 666         unsigned char bytes[charmap->mb_cur_max];
 667         int nbytes = charmap->width_rules[cnt].from->nbytes;
 668
 669         /* We have the range of character for which the width is
 670            specified described using byte sequences of the multibyte
 671            charset.  We have to convert this to UCS4 now.  And we
 672            cannot simply convert the beginning and the end of the
 673            sequence, we have to iterate over the byte sequence and
 674            convert it for every single character.  */
 675         memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
 676
 677         while (nbytes < charmap->width_rules[cnt].to->nbytes
 678                || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
 679                           nbytes) <= 0)
 680           {
 681             /* Find the UCS value for `bytes'.  */
 682             int inner;
 683             uint32_t wch;
 684             struct charseq *seq
 685               = charmap_find_symbol (charmap, (char *) bytes, nbytes);
 686
 687             if (seq == NULL)
 688               wch = ILLEGAL_CHAR_VALUE;
 689             else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
 690               wch = seq->ucs4;
 691             else
 692               wch = repertoire_find_value (ctype->repertoire, seq->name,
 693                                            strlen (seq->name));
 694
 695             if (wch != ILLEGAL_CHAR_VALUE)
 696               /* We are only interested in the side-effects of the
 697                  `find_idx' call.  It will add appropriate entries in
 698                  the name array if this is necessary.  */
 699               (void) find_idx (ctype, NULL, NULL, NULL, wch);
 700
 701             /* "Increment" the bytes sequence.  */
 702             inner = nbytes - 1;
 703             while (inner >= 0 && bytes[inner] == 0xff)
 704               --inner;
 705
 706             if (inner < 0)
 707               {
 708                 /* We have to extend the byte sequence.  */
 709                 if (nbytes >= charmap->width_rules[cnt].to->nbytes)
 710                   break;
 711
 712                 bytes[0] = 1;
 713                 memset (&bytes[1], 0, nbytes);
 714                 ++nbytes;
 715               }
 716             else
 717               {
 718                 ++bytes[inner];
 719                 while (++inner < nbytes)
 720                   bytes[inner] = 0;
 721               }
 722           }
 723       }
 724
 725   /* Now set all the other characters of the character set to the
 726      default width.  */
 727   curs = NULL;
 728   while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
 729     {
 730       struct charseq *data = (struct charseq *) vdata;
 731
 732       if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
 733         data->ucs4 = repertoire_find_value (ctype->repertoire,
 734                                             data->name, len);
 735
 736       if (data->ucs4 != ILLEGAL_CHAR_VALUE)
 737         (void) find_idx (ctype, NULL, NULL, NULL, data->ucs4);
 738     }
 739
 740   /* There must be a multiple of 10 digits.  */
 741   if (ctype->mbdigits_act % 10 != 0)
 742     {
 743       assert (ctype->mbdigits_act == ctype->wcdigits_act);
 744       ctype->wcdigits_act -= ctype->mbdigits_act % 10;
 745       ctype->mbdigits_act -= ctype->mbdigits_act % 10;
 746       WITH_CUR_LOCALE (error (0, 0, _("\
 747 `digit' category has not entries in groups of ten")));
 748     }
 749
 750   /* Check the input digits.  There must be a multiple of ten available.
 751      In each group it could be that one or the other character is missing.
 752      In this case the whole group must be removed.  */
 753   cnt = 0;
 754   while (cnt < ctype->mbdigits_act)
 755     {
 756       size_t inner;
 757       for (inner = 0; inner < 10; ++inner)
 758         if (ctype->mbdigits[cnt + inner] == NULL)
 759           break;
 760
 761       if (inner == 10)
 762         cnt += 10;
 763       else
 764         {
 765           /* Remove the group.  */
 766           memmove (&ctype->mbdigits[cnt], &ctype->mbdigits[cnt + 10],
 767                    ((ctype->wcdigits_act - cnt - 10)
 768                     * sizeof (ctype->mbdigits[0])));
 769           ctype->mbdigits_act -= 10;
 770         }
 771     }
 772
 773   /* If no input digits are given use the default.  */
 774   if (ctype->mbdigits_act == 0)
 775     {
 776       if (ctype->mbdigits_max == 0)
 777         {
 778           ctype->mbdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
 779                                            10 * sizeof (struct charseq *));
 780           ctype->mbdigits_max = 10;
 781         }
 782
 783       for (cnt = 0; cnt < 10; ++cnt)
 784         {
 785           ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
 786                                                       (char *) digits + cnt, 1);
 787           if (ctype->mbdigits[cnt] == NULL)
 788             {
 789               ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
 790                                                           longnames[cnt],
 791                                                           strlen (longnames[cnt]));
 792               if (ctype->mbdigits[cnt] == NULL)
 793                 {
 794                   /* Hum, this ain't good.  */
 795                   WITH_CUR_LOCALE (error (0, 0, _("\
 796 no input digits defined and none of the standard names in the charmap")));
 797
 798                   ctype->mbdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
 799                                                         sizeof (struct charseq) + 1);
 800
 801                   /* This is better than nothing.  */
 802                   ctype->mbdigits[cnt]->bytes[0] = digits[cnt];
 803                   ctype->mbdigits[cnt]->nbytes = 1;
 804                 }
 805             }
 806         }
 807
 808       ctype->mbdigits_act = 10;
 809     }
 810
 811   /* Check the wide character input digits.  There must be a multiple
 812      of ten available.  In each group it could be that one or the other
 813      character is missing.  In this case the whole group must be
 814      removed.  */
 815   cnt = 0;
 816   while (cnt < ctype->wcdigits_act)
 817     {
 818       size_t inner;
 819       for (inner = 0; inner < 10; ++inner)
 820         if (ctype->wcdigits[cnt + inner] == ILLEGAL_CHAR_VALUE)
 821           break;
 822
 823       if (inner == 10)
 824         cnt += 10;
 825       else
 826         {
 827           /* Remove the group.  */
 828           memmove (&ctype->wcdigits[cnt], &ctype->wcdigits[cnt + 10],
 829                    ((ctype->wcdigits_act - cnt - 10)
 830                     * sizeof (ctype->wcdigits[0])));
 831           ctype->wcdigits_act -= 10;
 832         }
 833     }
 834
 835   /* If no input digits are given use the default.  */
 836   if (ctype->wcdigits_act == 0)
 837     {
 838       if (ctype->wcdigits_max == 0)
 839         {
 840           ctype->wcdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
 841                                            10 * sizeof (uint32_t));
 842           ctype->wcdigits_max = 10;
 843         }
 844
 845       for (cnt = 0; cnt < 10; ++cnt)
 846         ctype->wcdigits[cnt] = L'0' + cnt;
 847
 848       ctype->mbdigits_act = 10;
 849     }
 850
 851   /* Check the outdigits.  */
 852   warned = 0;
 853   for (cnt = 0; cnt < 10; ++cnt)
 854     if (ctype->mboutdigits[cnt] == NULL)
 855       {
 856         static struct charseq replace[2];
 857
 858         if (!warned)
 859           {
 860             WITH_CUR_LOCALE (error (0, 0, _("\
 861 not all characters used in `outdigit' are available in the charmap")));
 862             warned = 1;
 863           }
 864
 865         replace[0].nbytes = 1;
 866         replace[0].bytes[0] = '?';
 867         replace[0].bytes[1] = '\0';
 868         ctype->mboutdigits[cnt] = &replace[0];
 869       }
 870
 871   warned = 0;
 872   for (cnt = 0; cnt < 10; ++cnt)
 873     if (ctype->wcoutdigits[cnt] == 0)
 874       {
 875         if (!warned)
 876           {
 877             WITH_CUR_LOCALE (error (0, 0, _("\
 878 not all characters used in `outdigit' are available in the repertoire")));
 879             warned = 1;
 880           }
 881
 882         ctype->wcoutdigits[cnt] = L'?';
 883       }
 884
 885   /* Sort the entries in the translit_ignore list.  */
 886   if (ctype->translit_ignore != NULL)
 887     {
 888       struct translit_ignore_t *firstp = ctype->translit_ignore;
 889       struct translit_ignore_t *runp;
 890
 891       ctype->ntranslit_ignore = 1;
 892
 893       for (runp = firstp->next; runp != NULL; runp = runp->next)
 894         {
 895           struct translit_ignore_t *lastp = NULL;
 896           struct translit_ignore_t *cmpp;
 897
 898           ++ctype->ntranslit_ignore;
 899
 900           for (cmpp = firstp; cmpp != NULL; lastp = cmpp, cmpp = cmpp->next)
 901             if (runp->from < cmpp->from)
 902               break;
 903
 904           runp->next = lastp;
 905           if (lastp == NULL)
 906             firstp = runp;
 907         }
 908
 909       ctype->translit_ignore = firstp;
 910     }
 911 }
 912
 913
 914 void
 915 ctype_output (struct localedef_t *locale, const struct charmap_t *charmap,
 916               const char *output_path)
 917 {
 918   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 919   const size_t nelems = (_NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1)
 920                          + ctype->nr_charclass + ctype->map_collection_nr);
 921   struct locale_file file;
 922   uint32_t default_missing_len;
 923   size_t elem, cnt;
 924
 925   /* Now prepare the output: Find the sizes of the table we can use.  */
 926   allocate_arrays (ctype, charmap, ctype->repertoire);
 927
 928   default_missing_len = (ctype->default_missing
 929                          ? wcslen ((wchar_t *) ctype->default_missing)
 930                          : 0);
 931
 932   init_locale_data (&file, nelems);
 933   for (elem = 0; elem < nelems; ++elem)
 934     {
 935       if (elem < _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1))
 936         switch (elem)
 937           {
 938 #define CTYPE_EMPTY(name) \
 939           case name:                                                          \
 940             add_locale_empty (&file);                                         \
 941             break
 942
 943           CTYPE_EMPTY(_NL_CTYPE_GAP1);
 944           CTYPE_EMPTY(_NL_CTYPE_GAP2);
 945           CTYPE_EMPTY(_NL_CTYPE_GAP3);
 946           CTYPE_EMPTY(_NL_CTYPE_GAP4);
 947           CTYPE_EMPTY(_NL_CTYPE_GAP5);
 948           CTYPE_EMPTY(_NL_CTYPE_GAP6);
 949
 950 #define CTYPE_RAW_DATA(name, base, size)                                      \
 951           case _NL_ITEM_INDEX (name):                                         \
 952             add_locale_raw_data (&file, base, size);                          \
 953             break
 954
 955           CTYPE_RAW_DATA (_NL_CTYPE_CLASS,
 956                           ctype->ctype_b,
 957                           (256 + 128) * sizeof (char_class_t));
 958
 959 #define CTYPE_UINT32_ARRAY(name, base, n_elems)                               \
 960           case _NL_ITEM_INDEX (name):                                         \
 961             add_locale_uint32_array (&file, base, n_elems);                   \
 962             break
 963
 964           CTYPE_UINT32_ARRAY (_NL_CTYPE_TOUPPER, ctype->map_b[0], 256 + 128);
 965           CTYPE_UINT32_ARRAY (_NL_CTYPE_TOLOWER, ctype->map_b[1], 256 + 128);
 966           CTYPE_UINT32_ARRAY (_NL_CTYPE_TOUPPER32, ctype->map32_b[0], 256);
 967           CTYPE_UINT32_ARRAY (_NL_CTYPE_TOLOWER32, ctype->map32_b[1], 256);
 968           CTYPE_RAW_DATA (_NL_CTYPE_CLASS32,
 969                           ctype->ctype32_b,
 970                           256 * sizeof (char_class32_t));
 971
 972 #define CTYPE_UINT32(name, value)                                             \
 973           case _NL_ITEM_INDEX (name):                                         \
 974             add_locale_uint32 (&file, value);                                 \
 975             break
 976
 977           CTYPE_UINT32 (_NL_CTYPE_CLASS_OFFSET, ctype->class_offset);
 978           CTYPE_UINT32 (_NL_CTYPE_MAP_OFFSET, ctype->map_offset);
 979           CTYPE_UINT32 (_NL_CTYPE_TRANSLIT_TAB_SIZE, ctype->translit_idx_size);
 980
 981           CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_FROM_IDX,
 982                               ctype->translit_from_idx,
 983                               ctype->translit_idx_size);
 984
 985           CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_FROM_TBL,
 986                               ctype->translit_from_tbl,
 987                               ctype->translit_from_tbl_size
 988                               / sizeof (uint32_t));
 989
 990           CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_TO_IDX,
 991                               ctype->translit_to_idx,
 992                               ctype->translit_idx_size);
 993
 994           CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_TO_TBL,
 995                               ctype->translit_to_tbl,
 996                               ctype->translit_to_tbl_size / sizeof (uint32_t));
 997
 998           case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
 999             /* The class name array.  */
1000             start_locale_structure (&file);
1001             for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
1002               add_locale_string (&file, ctype->classnames[cnt]);
1003             add_locale_char (&file, 0);
1004             align_locale_data (&file, LOCFILE_ALIGN);
1005             end_locale_structure (&file);
1006             break;
1007
1008           case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
1009             /* The class name array.  */
1010             start_locale_structure (&file);
1011             for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
1012               add_locale_string (&file, ctype->mapnames[cnt]);
1013             add_locale_char (&file, 0);
1014             align_locale_data (&file, LOCFILE_ALIGN);
1015             end_locale_structure (&file);
1016             break;
1017
1018           case _NL_ITEM_INDEX (_NL_CTYPE_WIDTH):
1019             add_locale_wcwidth_table (&file, &ctype->width);
1020             break;
1021
1022           CTYPE_UINT32 (_NL_CTYPE_MB_CUR_MAX, ctype->mb_cur_max);
1023
1024           case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
1025             add_locale_string (&file, ctype->codeset_name);
1026             break;
1027
1028           CTYPE_UINT32 (_NL_CTYPE_MAP_TO_NONASCII, ctype->to_nonascii);
1029
1030           CTYPE_UINT32 (_NL_CTYPE_NONASCII_CASE, ctype->nonascii_case);
1031
1032           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN):
1033             add_locale_uint32 (&file, ctype->mbdigits_act / 10);
1034             break;
1035
1036           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN):
1037             add_locale_uint32 (&file, ctype->wcdigits_act / 10);
1038             break;
1039
1040           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB):
1041             start_locale_structure (&file);
1042             for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
1043                  cnt < ctype->mbdigits_act; cnt += 10)
1044               {
1045                 add_locale_raw_data (&file, ctype->mbdigits[cnt]->bytes,
1046                                      ctype->mbdigits[cnt]->nbytes);
1047                 add_locale_char (&file, 0);
1048               }
1049             end_locale_structure (&file);
1050             break;
1051
1052           case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB):
1053             start_locale_structure (&file);
1054             cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB);
1055             add_locale_raw_data (&file, ctype->mboutdigits[cnt]->bytes,
1056                                  ctype->mboutdigits[cnt]->nbytes);
1057             add_locale_char (&file, 0);
1058             end_locale_structure (&file);
1059             break;
1060
1061           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC):
1062             start_locale_structure (&file);
1063             for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC);
1064                  cnt < ctype->wcdigits_act; cnt += 10)
1065               add_locale_uint32 (&file, ctype->wcdigits[cnt]);
1066             end_locale_structure (&file);
1067             break;
1068
1069           case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC):
1070             cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC);
1071             add_locale_uint32 (&file, ctype->wcoutdigits[cnt]);
1072             break;
1073
1074           case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN):
1075             add_locale_uint32 (&file, default_missing_len);
1076             break;
1077
1078           case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING):
1079             add_locale_uint32_array (&file, ctype->default_missing,
1080                                      default_missing_len);
1081             break;
1082
1083           case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE_LEN):
1084             add_locale_uint32 (&file, ctype->ntranslit_ignore);
1085             break;
1086
1087           case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE):
1088             start_locale_structure (&file);
1089             {
1090               struct translit_ignore_t *runp;
1091               for (runp = ctype->translit_ignore; runp != NULL;
1092                    runp = runp->next)
1093                 {
1094                   add_locale_uint32 (&file, runp->from);
1095                   add_locale_uint32 (&file, runp->to);
1096                   add_locale_uint32 (&file, runp->step);
1097                 }
1098             }
1099             end_locale_structure (&file);
1100             break;
1101
1102           default:
1103             assert (! "unknown CTYPE element");
1104           }
1105       else
1106         {
1107           /* Handle extra maps.  */
1108           size_t nr = elem - _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
1109           if (nr < ctype->nr_charclass)
1110             {
1111               start_locale_prelude (&file);
1112               add_locale_uint32_array (&file, ctype->class_b[nr], 256 / 32);
1113               end_locale_prelude (&file);
1114               add_locale_wctype_table (&file, &ctype->class_3level[nr]);
1115             }
1116           else
1117             {
1118               nr -= ctype->nr_charclass;
1119               assert (nr < ctype->map_collection_nr);
1120               add_locale_wctrans_table (&file, &ctype->map_3level[nr]);
1121             }
1122         }
1123     }
1124
1125   write_locale_data (output_path, LC_CTYPE, "LC_CTYPE", &file);
1126 }
1127
1128
1129 /* Local functions.  */
1130 static void
1131 ctype_class_new (struct linereader *lr, struct locale_ctype_t *ctype,
1132                  const char *name)
1133 {
1134   size_t cnt;
1135
1136   for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
1137     if (strcmp (ctype->classnames[cnt], name) == 0)
1138       break;
1139
1140   if (cnt < ctype->nr_charclass)
1141     {
1142       lr_error (lr, _("character class `%s' already defined"), name);
1143       return;
1144     }
1145
1146   if (ctype->nr_charclass == MAX_NR_CHARCLASS)
1147     /* Exit code 2 is prescribed in P1003.2b.  */
1148     WITH_CUR_LOCALE (error (2, 0, _("\
1149 implementation limit: no more than %Zd character classes allowed"),
1150                             MAX_NR_CHARCLASS));
1151
1152   ctype->classnames[ctype->nr_charclass++] = name;
1153 }
1154
1155
1156 static void
1157 ctype_map_new (struct linereader *lr, struct locale_ctype_t *ctype,
1158                const char *name, const struct charmap_t *charmap)
1159 {
1160   size_t max_chars = 0;
1161   size_t cnt;
1162
1163   for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
1164     {
1165       if (strcmp (ctype->mapnames[cnt], name) == 0)
1166         break;
1167
1168       if (max_chars < ctype->map_collection_max[cnt])
1169         max_chars = ctype->map_collection_max[cnt];
1170     }
1171
1172   if (cnt < ctype->map_collection_nr)
1173     {
1174       lr_error (lr, _("character map `%s' already defined"), name);
1175       return;
1176     }
1177
1178   if (ctype->map_collection_nr == MAX_NR_CHARMAP)
1179     /* Exit code 2 is prescribed in P1003.2b.  */
1180     WITH_CUR_LOCALE (error (2, 0, _("\
1181 implementation limit: no more than %d character maps allowed"),
1182                             MAX_NR_CHARMAP));
1183
1184   ctype->mapnames[cnt] = name;
1185
1186   if (max_chars == 0)
1187     ctype->map_collection_max[cnt] = charmap->mb_cur_max == 1 ? 256 : 512;
1188   else
1189     ctype->map_collection_max[cnt] = max_chars;
1190
1191   ctype->map_collection[cnt] = (uint32_t *)
1192     xcalloc (sizeof (uint32_t), ctype->map_collection_max[cnt]);
1193   ctype->map_collection_act[cnt] = 256;
1194
1195   ++ctype->map_collection_nr;
1196 }
1197
1198
1199 /* We have to be prepared that TABLE, MAX, and ACT can be NULL.  This
1200    is possible if we only want to extend the name array.  */
1201 static uint32_t *
1202 find_idx (struct locale_ctype_t *ctype, uint32_t **table, size_t *max,
1203           size_t *act, uint32_t idx)
1204 {
1205   size_t cnt;
1206
1207   if (idx < 256)
1208     return table == NULL ? NULL : &(*table)[idx];
1209
1210   /* Use the charnames_idx lookup table instead of the slow search loop.  */
1211 #if 1
1212   cnt = idx_table_get (&ctype->charnames_idx, idx);
1213   if (cnt == EMPTY)
1214     /* Not found.  */
1215     cnt = ctype->charnames_act;
1216 #else
1217   for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
1218     if (ctype->charnames[cnt] == idx)
1219       break;
1220 #endif
1221
1222   /* We have to distinguish two cases: the name is found or not.  */
1223   if (cnt == ctype->charnames_act)
1224     {
1225       /* Extend the name array.  */
1226       if (ctype->charnames_act == ctype->charnames_max)
1227         {
1228           ctype->charnames_max *= 2;
1229           ctype->charnames = (uint32_t *)
1230             xrealloc (ctype->charnames,
1231                       sizeof (uint32_t) * ctype->charnames_max);
1232         }
1233       ctype->charnames[ctype->charnames_act++] = idx;
1234       idx_table_add (&ctype->charnames_idx, idx, cnt);
1235     }
1236
1237   if (table == NULL)
1238     /* We have done everything we are asked to do.  */
1239     return NULL;
1240
1241   if (max == NULL)
1242     /* The caller does not want to extend the table.  */
1243     return (cnt >= *act ? NULL : &(*table)[cnt]);
1244
1245   if (cnt >= *act)
1246     {
1247       if (cnt >= *max)
1248         {
1249           size_t old_max = *max;
1250           do
1251             *max *= 2;
1252           while (*max <= cnt);
1253
1254           *table =
1255             (uint32_t *) xrealloc (*table, *max * sizeof (uint32_t));
1256           memset (&(*table)[old_max], '\0',
1257                   (*max - old_max) * sizeof (uint32_t));
1258         }
1259
1260       *act = cnt + 1;
1261     }
1262
1263   return &(*table)[cnt];
1264 }
1265
1266
1267 static int
1268 get_character (struct token *now, const struct charmap_t *charmap,
1269                struct repertoire_t *repertoire,
1270                struct charseq **seqp, uint32_t *wchp)
1271 {
1272   if (now->tok == tok_bsymbol)
1273     {
1274       /* This will hopefully be the normal case.  */
1275       *wchp = repertoire_find_value (repertoire, now->val.str.startmb,
1276                                      now->val.str.lenmb);
1277       *seqp = charmap_find_value (charmap, now->val.str.startmb,
1278                                   now->val.str.lenmb);
1279     }
1280   else if (now->tok == tok_ucs4)
1281     {
1282       char utmp[10];
1283
1284       snprintf (utmp, sizeof (utmp), "U%08X", now->val.ucs4);
1285       *seqp = charmap_find_value (charmap, utmp, 9);
1286
1287       if (*seqp == NULL)
1288         *seqp = repertoire_find_seq (repertoire, now->val.ucs4);
1289
1290       if (*seqp == NULL)
1291         {
1292           /* Compute the value in the charmap from the UCS value.  */
1293           const char *symbol = repertoire_find_symbol (repertoire,
1294                                                        now->val.ucs4);
1295
1296           if (symbol == NULL)
1297             *seqp = NULL;
1298           else
1299             *seqp = charmap_find_value (charmap, symbol, strlen (symbol));
1300
1301           if (*seqp == NULL)
1302             {
1303               if (repertoire != NULL)
1304                 {
1305                   /* Insert a negative entry.  */
1306                   static const struct charseq negative
1307                     = { .ucs4 = ILLEGAL_CHAR_VALUE };
1308                   uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1309                                                   sizeof (uint32_t));
1310                   *newp = now->val.ucs4;
1311
1312                   insert_entry (&repertoire->seq_table, newp,
1313                                 sizeof (uint32_t), (void *) &negative);
1314                 }
1315             }
1316           else
1317             (*seqp)->ucs4 = now->val.ucs4;
1318         }
1319       else if ((*seqp)->ucs4 != now->val.ucs4)
1320         *seqp = NULL;
1321
1322       *wchp = now->val.ucs4;
1323     }
1324   else if (now->tok == tok_charcode)
1325     {
1326       /* We must map from the byte code to UCS4.  */
1327       *seqp = charmap_find_symbol (charmap, now->val.str.startmb,
1328                                    now->val.str.lenmb);
1329
1330       if (*seqp == NULL)
1331         *wchp = ILLEGAL_CHAR_VALUE;
1332       else
1333         {
1334           if ((*seqp)->ucs4 == UNINITIALIZED_CHAR_VALUE)
1335             (*seqp)->ucs4 = repertoire_find_value (repertoire, (*seqp)->name,
1336                                                    strlen ((*seqp)->name));
1337           *wchp = (*seqp)->ucs4;
1338         }
1339     }
1340   else
1341     return 1;
1342
1343   return 0;
1344 }
1345
1346
1347 /* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and
1348    the .(2). counterparts.  */
1349 static void
1350 charclass_symbolic_ellipsis (struct linereader *ldfile,
1351                              struct locale_ctype_t *ctype,
1352                              const struct charmap_t *charmap,
1353                              struct repertoire_t *repertoire,
1354                              struct token *now,
1355                              const char *last_str,
1356                              unsigned long int class256_bit,
1357                              unsigned long int class_bit, int base,
1358                              int ignore_content, int handle_digits, int step)
1359 {
1360   const char *nowstr = now->val.str.startmb;
1361   char tmp[now->val.str.lenmb + 1];
1362   const char *cp;
1363   char *endp;
1364   unsigned long int from;
1365   unsigned long int to;
1366
1367   /* We have to compute the ellipsis values using the symbolic names.  */
1368   assert (last_str != NULL);
1369
1370   if (strlen (last_str) != now->val.str.lenmb)
1371     {
1372     invalid_range:
1373       lr_error (ldfile,
1374                 _("`%s' and `%.*s' are not valid names for symbolic range"),
1375                 last_str, (int) now->val.str.lenmb, nowstr);
1376       return;
1377     }
1378
1379   if (memcmp (last_str, nowstr, now->val.str.lenmb) == 0)
1380     /* Nothing to do, the names are the same.  */
1381     return;
1382
1383   for (cp = last_str; *cp == *(nowstr + (cp - last_str)); ++cp)
1384     ;
1385
1386   errno = 0;
1387   from = strtoul (cp, &endp, base);
1388   if ((from == UINT_MAX && errno == ERANGE) || *endp != '\0')
1389     goto invalid_range;
1390
1391   to = strtoul (nowstr + (cp - last_str), &endp, base);
1392   if ((to == UINT_MAX && errno == ERANGE)
1393       || (endp - nowstr) != now->val.str.lenmb || from >= to)
1394     goto invalid_range;
1395
1396   /* OK, we have a range FROM - TO.  Now we can create the symbolic names.  */
1397   if (!ignore_content)
1398     {
1399       now->val.str.startmb = tmp;
1400       while ((from += step) <= to)
1401         {
1402           struct charseq *seq;
1403           uint32_t wch;
1404
1405           sprintf (tmp, (base == 10 ? "%.*s%0*ld" : "%.*s%0*lX"),
1406                    (int) (cp - last_str), last_str,
1407                    (int) (now->val.str.lenmb - (cp - last_str)),
1408                    from);
1409
1410           get_character (now, charmap, repertoire, &seq, &wch);
1411
1412           if (seq != NULL && seq->nbytes == 1)
1413             /* Yep, we can store information about this byte sequence.  */
1414             ctype->class256_collection[seq->bytes[0]] |= class256_bit;
1415
1416           if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1417             /* We have the UCS4 position.  */
1418             *find_idx (ctype, &ctype->class_collection,
1419                        &ctype->class_collection_max,
1420                        &ctype->class_collection_act, wch) |= class_bit;
1421
1422           if (handle_digits == 1)
1423             {
1424               /* We must store the digit values.  */
1425               if (ctype->mbdigits_act == ctype->mbdigits_max)
1426                 {
1427                   ctype->mbdigits_max *= 2;
1428                   ctype->mbdigits = xrealloc (ctype->mbdigits,
1429                                               (ctype->mbdigits_max
1430                                                * sizeof (char *)));
1431                   ctype->wcdigits_max *= 2;
1432                   ctype->wcdigits = xrealloc (ctype->wcdigits,
1433                                               (ctype->wcdigits_max
1434                                                * sizeof (uint32_t)));
1435                 }
1436
1437               ctype->mbdigits[ctype->mbdigits_act++] = seq;
1438               ctype->wcdigits[ctype->wcdigits_act++] = wch;
1439             }
1440           else if (handle_digits == 2)
1441             {
1442               /* We must store the digit values.  */
1443               if (ctype->outdigits_act >= 10)
1444                 {
1445                   lr_error (ldfile, _("\
1446 %s: field `%s' does not contain exactly ten entries"),
1447                             "LC_CTYPE", "outdigit");
1448                   return;
1449                 }
1450
1451               ctype->mboutdigits[ctype->outdigits_act] = seq;
1452               ctype->wcoutdigits[ctype->outdigits_act] = wch;
1453               ++ctype->outdigits_act;
1454             }
1455         }
1456     }
1457 }
1458
1459
1460 /* Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'.  */
1461 static void
1462 charclass_ucs4_ellipsis (struct linereader *ldfile,
1463                          struct locale_ctype_t *ctype,
1464                          const struct charmap_t *charmap,
1465                          struct repertoire_t *repertoire,
1466                          struct token *now, uint32_t last_wch,
1467                          unsigned long int class256_bit,
1468                          unsigned long int class_bit, int ignore_content,
1469                          int handle_digits, int step)
1470 {
1471   if (last_wch > now->val.ucs4)
1472     {
1473       lr_error (ldfile, _("\
1474 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1475                 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, now->val.ucs4,
1476                 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, last_wch);
1477       return;
1478     }
1479
1480   if (!ignore_content)
1481     while ((last_wch += step) <= now->val.ucs4)
1482       {
1483         /* We have to find out whether there is a byte sequence corresponding
1484            to this UCS4 value.  */
1485         struct charseq *seq;
1486         char utmp[10];
1487
1488         snprintf (utmp, sizeof (utmp), "U%08X", last_wch);
1489         seq = charmap_find_value (charmap, utmp, 9);
1490         if (seq == NULL)
1491           {
1492             snprintf (utmp, sizeof (utmp), "U%04X", last_wch);
1493             seq = charmap_find_value (charmap, utmp, 5);
1494           }
1495
1496         if (seq == NULL)
1497           /* Try looking in the repertoire map.  */
1498           seq = repertoire_find_seq (repertoire, last_wch);
1499
1500         /* If this is the first time we look for this sequence create a new
1501            entry.  */
1502         if (seq == NULL)
1503           {
1504             static const struct charseq negative
1505               = { .ucs4 = ILLEGAL_CHAR_VALUE };
1506
1507             /* Find the symbolic name for this UCS4 value.  */
1508             if (repertoire != NULL)
1509               {
1510                 const char *symbol = repertoire_find_symbol (repertoire,
1511                                                              last_wch);
1512                 uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1513                                                 sizeof (uint32_t));
1514                 *newp = last_wch;
1515
1516                 if (symbol != NULL)
1517                   /* We have a name, now search the multibyte value.  */
1518                   seq = charmap_find_value (charmap, symbol, strlen (symbol));
1519
1520                 if (seq == NULL)
1521                   /* We have to create a fake entry.  */
1522                   seq = (struct charseq *) &negative;
1523                 else
1524                   seq->ucs4 = last_wch;
1525
1526                 insert_entry (&repertoire->seq_table, newp, sizeof (uint32_t),
1527                               seq);
1528               }
1529             else
1530               /* We have to create a fake entry.  */
1531               seq = (struct charseq *) &negative;
1532           }
1533
1534         /* We have a name, now search the multibyte value.  */
1535         if (seq->ucs4 == last_wch && seq->nbytes == 1)
1536           /* Yep, we can store information about this byte sequence.  */
1537           ctype->class256_collection[(size_t) seq->bytes[0]]
1538             |= class256_bit;
1539
1540         /* And of course we have the UCS4 position.  */
1541         if (class_bit != 0)
1542           *find_idx (ctype, &ctype->class_collection,
1543                      &ctype->class_collection_max,
1544                      &ctype->class_collection_act, last_wch) |= class_bit;
1545
1546         if (handle_digits == 1)
1547           {
1548             /* We must store the digit values.  */
1549             if (ctype->mbdigits_act == ctype->mbdigits_max)
1550               {
1551                 ctype->mbdigits_max *= 2;
1552                 ctype->mbdigits = xrealloc (ctype->mbdigits,
1553                                             (ctype->mbdigits_max
1554                                              * sizeof (char *)));
1555                 ctype->wcdigits_max *= 2;
1556                 ctype->wcdigits = xrealloc (ctype->wcdigits,
1557                                             (ctype->wcdigits_max
1558                                              * sizeof (uint32_t)));
1559               }
1560
1561             ctype->mbdigits[ctype->mbdigits_act++] = (seq->ucs4 == last_wch
1562                                                       ? seq : NULL);
1563             ctype->wcdigits[ctype->wcdigits_act++] = last_wch;
1564           }
1565         else if (handle_digits == 2)
1566           {
1567             /* We must store the digit values.  */
1568             if (ctype->outdigits_act >= 10)
1569               {
1570                 lr_error (ldfile, _("\
1571 %s: field `%s' does not contain exactly ten entries"),
1572                           "LC_CTYPE", "outdigit");
1573                 return;
1574               }
1575
1576             ctype->mboutdigits[ctype->outdigits_act] = (seq->ucs4 == last_wch
1577                                                         ? seq : NULL);
1578             ctype->wcoutdigits[ctype->outdigits_act] = last_wch;
1579             ++ctype->outdigits_act;
1580           }
1581       }
1582 }
1583
1584
1585 /* Ellipsis as in `/xea/x12.../xea/x34'.  */
1586 static void
1587 charclass_charcode_ellipsis (struct linereader *ldfile,
1588                              struct locale_ctype_t *ctype,
1589                              const struct charmap_t *charmap,
1590                              struct repertoire_t *repertoire,
1591                              struct token *now, char *last_charcode,
1592                              uint32_t last_charcode_len,
1593                              unsigned long int class256_bit,
1594                              unsigned long int class_bit, int ignore_content,
1595                              int handle_digits)
1596 {
1597   /* First check whether the to-value is larger.  */
1598   if (now->val.charcode.nbytes != last_charcode_len)
1599     {
1600       lr_error (ldfile, _("\
1601 start and end character sequence of range must have the same length"));
1602       return;
1603     }
1604
1605   if (memcmp (last_charcode, now->val.charcode.bytes, last_charcode_len) > 0)
1606     {
1607       lr_error (ldfile, _("\
1608 to-value character sequence is smaller than from-value sequence"));
1609       return;
1610     }
1611
1612   if (!ignore_content)
1613     {
1614       do
1615         {
1616           /* Increment the byte sequence value.  */
1617           struct charseq *seq;
1618           uint32_t wch;
1619           int i;
1620
1621           for (i = last_charcode_len - 1; i >= 0; --i)
1622             if (++last_charcode[i] != 0)
1623               break;
1624
1625           if (last_charcode_len == 1)
1626             /* Of course we have the charcode value.  */
1627             ctype->class256_collection[(size_t) last_charcode[0]]
1628               |= class256_bit;
1629
1630           /* Find the symbolic name.  */
1631           seq = charmap_find_symbol (charmap, last_charcode,
1632                                      last_charcode_len);
1633           if (seq != NULL)
1634             {
1635               if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1636                 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1637                                                    strlen (seq->name));
1638               wch = seq == NULL ? ILLEGAL_CHAR_VALUE : seq->ucs4;
1639
1640               if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1641                 *find_idx (ctype, &ctype->class_collection,
1642                            &ctype->class_collection_max,
1643                            &ctype->class_collection_act, wch) |= class_bit;
1644             }
1645           else
1646             wch = ILLEGAL_CHAR_VALUE;
1647
1648           if (handle_digits == 1)
1649             {
1650               /* We must store the digit values.  */
1651               if (ctype->mbdigits_act == ctype->mbdigits_max)
1652                 {
1653                   ctype->mbdigits_max *= 2;
1654                   ctype->mbdigits = xrealloc (ctype->mbdigits,
1655                                               (ctype->mbdigits_max
1656                                                * sizeof (char *)));
1657                   ctype->wcdigits_max *= 2;
1658                   ctype->wcdigits = xrealloc (ctype->wcdigits,
1659                                               (ctype->wcdigits_max
1660                                                * sizeof (uint32_t)));
1661                 }
1662
1663               seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1664               memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1665               seq->nbytes = last_charcode_len;
1666
1667               ctype->mbdigits[ctype->mbdigits_act++] = seq;
1668               ctype->wcdigits[ctype->wcdigits_act++] = wch;
1669             }
1670           else if (handle_digits == 2)
1671             {
1672               struct charseq *seq;
1673               /* We must store the digit values.  */
1674               if (ctype->outdigits_act >= 10)
1675                 {
1676                   lr_error (ldfile, _("\
1677 %s: field `%s' does not contain exactly ten entries"),
1678                             "LC_CTYPE", "outdigit");
1679                   return;
1680                 }
1681
1682               seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1683               memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1684               seq->nbytes = last_charcode_len;
1685
1686               ctype->mboutdigits[ctype->outdigits_act] = seq;
1687               ctype->wcoutdigits[ctype->outdigits_act] = wch;
1688               ++ctype->outdigits_act;
1689             }
1690         }
1691       while (memcmp (last_charcode, now->val.charcode.bytes,
1692                      last_charcode_len) != 0);
1693     }
1694 }
1695
1696
1697 static uint32_t *
1698 find_translit2 (struct locale_ctype_t *ctype, const struct charmap_t *charmap,
1699                 uint32_t wch)
1700 {
1701   struct translit_t *trunp = ctype->translit;
1702   struct translit_ignore_t *tirunp = ctype->translit_ignore;
1703
1704   while (trunp != NULL)
1705     {
1706       /* XXX We simplify things here.  The transliterations we look
1707          for are only allowed to have one character.  */
1708       if (trunp->from[0] == wch && trunp->from[1] == 0)
1709         {
1710           /* Found it.  Now look for a transliteration which can be
1711              represented with the character set.  */
1712           struct translit_to_t *torunp = trunp->to;
1713
1714           while (torunp != NULL)
1715             {
1716               int i;
1717
1718               for (i = 0; torunp->str[i] != 0; ++i)
1719                 {
1720                   char utmp[10];
1721
1722                   snprintf (utmp, sizeof (utmp), "U%08X", torunp->str[i]);
1723                   if (charmap_find_value (charmap, utmp, 9) == NULL)
1724                     /* This character cannot be represented.  */
1725                     break;
1726                 }
1727
1728               if (torunp->str[i] == 0)
1729                 return torunp->str;
1730
1731               torunp = torunp->next;
1732             }
1733
1734           break;
1735         }
1736
1737       trunp = trunp->next;
1738     }
1739
1740   /* Check for ignored chars.  */
1741   while (tirunp != NULL)
1742     {
1743       if (tirunp->from <= wch && tirunp->to >= wch)
1744         {
1745           uint32_t wi;
1746
1747           for (wi = tirunp->from; wi <= wch; wi += tirunp->step)
1748             if (wi == wch)
1749               return no_str;
1750         }
1751     }
1752
1753   /* Nothing found.  */
1754   return NULL;
1755 }
1756
1757
1758 uint32_t *
1759 find_translit (struct localedef_t *locale, const struct charmap_t *charmap,
1760                uint32_t wch)
1761 {
1762   struct locale_ctype_t *ctype;
1763   uint32_t *result = NULL;
1764
1765   assert (locale != NULL);
1766   ctype = locale->categories[LC_CTYPE].ctype;
1767
1768   if (ctype == NULL)
1769     return NULL;
1770
1771   if (ctype->translit != NULL)
1772     result = find_translit2 (ctype, charmap, wch);
1773
1774   if (result == NULL)
1775     {
1776       struct translit_include_t *irunp = ctype->translit_include;
1777
1778       while (irunp != NULL && result == NULL)
1779         {
1780           result = find_translit (find_locale (CTYPE_LOCALE,
1781                                                irunp->copy_locale,
1782                                                irunp->copy_repertoire,
1783                                                charmap),
1784                                   charmap, wch);
1785           irunp = irunp->next;
1786         }
1787     }
1788
1789   return result;
1790 }
1791
1792
1793 /* Read one transliteration entry.  */
1794 static uint32_t *
1795 read_widestring (struct linereader *ldfile, struct token *now,
1796                  const struct charmap_t *charmap,
1797                  struct repertoire_t *repertoire)
1798 {
1799   uint32_t *wstr;
1800
1801   if (now->tok == tok_default_missing)
1802     /* The special name "" will denote this case.  */
1803     wstr = no_str;
1804   else if (now->tok == tok_bsymbol)
1805     {
1806       /* Get the value from the repertoire.  */
1807       wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1808       wstr[0] = repertoire_find_value (repertoire, now->val.str.startmb,
1809                                        now->val.str.lenmb);
1810       if (wstr[0] == ILLEGAL_CHAR_VALUE)
1811         {
1812           /* We cannot proceed, we don't know the UCS4 value.  */
1813           free (wstr);
1814           return NULL;
1815         }
1816
1817       wstr[1] = 0;
1818     }
1819   else if (now->tok == tok_ucs4)
1820     {
1821       wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1822       wstr[0] = now->val.ucs4;
1823       wstr[1] = 0;
1824     }
1825   else if (now->tok == tok_charcode)
1826     {
1827       /* Argh, we have to convert to the symbol name first and then to the
1828          UCS4 value.  */
1829       struct charseq *seq = charmap_find_symbol (charmap,
1830                                                  now->val.str.startmb,
1831                                                  now->val.str.lenmb);
1832       if (seq == NULL)
1833         /* Cannot find the UCS4 value.  */
1834         return NULL;
1835
1836       if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1837         seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1838                                            strlen (seq->name));
1839       if (seq->ucs4 == ILLEGAL_CHAR_VALUE)
1840         /* We cannot proceed, we don't know the UCS4 value.  */
1841         return NULL;
1842
1843       wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1844       wstr[0] = seq->ucs4;
1845       wstr[1] = 0;
1846     }
1847   else if (now->tok == tok_string)
1848     {
1849       wstr = now->val.str.startwc;
1850       if (wstr == NULL || wstr[0] == 0)
1851         return NULL;
1852     }
1853   else
1854     {
1855       if (now->tok != tok_eol && now->tok != tok_eof)
1856         lr_ignore_rest (ldfile, 0);
1857       SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
1858       return (uint32_t *) -1l;
1859     }
1860
1861   return wstr;
1862 }
1863
1864
1865 static void
1866 read_translit_entry (struct linereader *ldfile, struct locale_ctype_t *ctype,
1867                      struct token *now, const struct charmap_t *charmap,
1868                      struct repertoire_t *repertoire)
1869 {
1870   uint32_t *from_wstr = read_widestring (ldfile, now, charmap, repertoire);
1871   struct translit_t *result;
1872   struct translit_to_t **top;
1873   struct obstack *ob = &ctype->mempool;
1874   int first;
1875   int ignore;
1876
1877   if (from_wstr == NULL)
1878     /* There is no valid from string.  */
1879     return;
1880
1881   result = (struct translit_t *) obstack_alloc (ob,
1882                                                 sizeof (struct translit_t));
1883   result->from = from_wstr;
1884   result->fname = ldfile->fname;
1885   result->lineno = ldfile->lineno;
1886   result->next = NULL;
1887   result->to = NULL;
1888   top = &result->to;
1889   first = 1;
1890   ignore = 0;
1891
1892   while (1)
1893     {
1894       uint32_t *to_wstr;
1895
1896       /* Next we have one or more transliterations.  They are
1897          separated by semicolons.  */
1898       now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
1899
1900       if (!first && (now->tok == tok_semicolon || now->tok == tok_eol))
1901         {
1902           /* One string read.  */
1903           const uint32_t zero = 0;
1904
1905           if (!ignore)
1906             {
1907               obstack_grow (ob, &zero, 4);
1908               to_wstr = obstack_finish (ob);
1909
1910               *top = obstack_alloc (ob, sizeof (struct translit_to_t));
1911               (*top)->str = to_wstr;
1912               (*top)->next = NULL;
1913             }
1914
1915           if (now->tok == tok_eol)
1916             {
1917               result->next = ctype->translit;
1918               ctype->translit = result;
1919               return;
1920             }
1921
1922           if (!ignore)
1923             top = &(*top)->next;
1924           ignore = 0;
1925         }
1926       else
1927         {
1928           to_wstr = read_widestring (ldfile, now, charmap, repertoire);
1929           if (to_wstr == (uint32_t *) -1l)
1930             {
1931               /* An error occurred.  */
1932               obstack_free (ob, result);
1933               return;
1934             }
1935
1936           if (to_wstr == NULL)
1937             ignore = 1;
1938           else
1939             /* This value is usable.  */
1940             obstack_grow (ob, to_wstr, wcslen ((wchar_t *) to_wstr) * 4);
1941
1942           first = 0;
1943         }
1944     }
1945 }
1946
1947
1948 static void
1949 read_translit_ignore_entry (struct linereader *ldfile,
1950                             struct locale_ctype_t *ctype,
1951                             const struct charmap_t *charmap,
1952                             struct repertoire_t *repertoire)
1953 {
1954   /* We expect a semicolon-separated list of characters we ignore.  We are
1955      only interested in the wide character definitions.  These must be
1956      single characters, possibly defining a range when an ellipsis is used.  */
1957   while (1)
1958     {
1959       struct token *now = lr_token (ldfile, charmap, NULL, repertoire,
1960                                     verbose);
1961       struct translit_ignore_t *newp;
1962       uint32_t from;
1963
1964       if (now->tok == tok_eol || now->tok == tok_eof)
1965         {
1966           lr_error (ldfile,
1967                     _("premature end of `translit_ignore' definition"));
1968           return;
1969         }
1970
1971       if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
1972         {
1973           lr_error (ldfile, _("syntax error"));
1974           lr_ignore_rest (ldfile, 0);
1975           return;
1976         }
1977
1978       if (now->tok == tok_ucs4)
1979         from = now->val.ucs4;
1980       else
1981         /* Try to get the value.  */
1982         from = repertoire_find_value (repertoire, now->val.str.startmb,
1983                                       now->val.str.lenmb);
1984
1985       if (from == ILLEGAL_CHAR_VALUE)
1986         {
1987           lr_error (ldfile, "invalid character name");
1988           newp = NULL;
1989         }
1990       else
1991         {
1992           newp = (struct translit_ignore_t *)
1993             obstack_alloc (&ctype->mempool, sizeof (struct translit_ignore_t));
1994           newp->from = from;
1995           newp->to = from;
1996           newp->step = 1;
1997
1998           newp->next = ctype->translit_ignore;
1999           ctype->translit_ignore = newp;
2000         }
2001
2002       /* Now we expect either a semicolon, an ellipsis, or the end of the
2003          line.  */
2004       now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2005
2006       if (now->tok == tok_ellipsis2 || now->tok == tok_ellipsis2_2)
2007         {
2008           /* XXX Should we bother implementing `....'?  `...' certainly
2009              will not be implemented.  */
2010           uint32_t to;
2011           int step = now->tok == tok_ellipsis2_2 ? 2 : 1;
2012
2013           now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2014
2015           if (now->tok == tok_eol || now->tok == tok_eof)
2016             {
2017               lr_error (ldfile,
2018                         _("premature end of `translit_ignore' definition"));
2019               return;
2020             }
2021
2022           if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
2023             {
2024               lr_error (ldfile, _("syntax error"));
2025               lr_ignore_rest (ldfile, 0);
2026               return;
2027             }
2028
2029           if (now->tok == tok_ucs4)
2030             to = now->val.ucs4;
2031           else
2032             /* Try to get the value.  */
2033             to = repertoire_find_value (repertoire, now->val.str.startmb,
2034                                         now->val.str.lenmb);
2035
2036           if (to == ILLEGAL_CHAR_VALUE)
2037             lr_error (ldfile, "invalid character name");
2038           else
2039             {
2040               /* Make sure the `to'-value is larger.  */
2041               if (to >= from)
2042                 {
2043                   newp->to = to;
2044                   newp->step = step;
2045                 }
2046               else
2047                 lr_error (ldfile, _("\
2048 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
2049                           (to | from) < 65536 ? 4 : 8, to,
2050                           (to | from) < 65536 ? 4 : 8, from);
2051             }
2052
2053           /* And the next token.  */
2054           now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2055         }
2056
2057       if (now->tok == tok_eol || now->tok == tok_eof)
2058         /* We are done.  */
2059         return;
2060
2061       if (now->tok == tok_semicolon)
2062         /* Next round.  */
2063         continue;
2064
2065       /* If we come here something is wrong.  */
2066       lr_error (ldfile, _("syntax error"));
2067       lr_ignore_rest (ldfile, 0);
2068       return;
2069     }
2070 }
2071
2072
2073 /* The parser for the LC_CTYPE section of the locale definition.  */
2074 void
2075 ctype_read (struct linereader *ldfile, struct localedef_t *result,
2076             const struct charmap_t *charmap, const char *repertoire_name,
2077             int ignore_content)
2078 {
2079   struct repertoire_t *repertoire = NULL;
2080   struct locale_ctype_t *ctype;
2081   struct token *now;
2082   enum token_t nowtok;
2083   size_t cnt;
2084   uint32_t last_wch = 0;
2085   enum token_t last_token;
2086   enum token_t ellipsis_token;
2087   int step;
2088   char last_charcode[16];
2089   size_t last_charcode_len = 0;
2090   const char *last_str = NULL;
2091   int mapidx;
2092   struct localedef_t *copy_locale = NULL;
2093
2094   /* Get the repertoire we have to use.  */
2095   if (repertoire_name != NULL)
2096     repertoire = repertoire_read (repertoire_name);
2097
2098   /* The rest of the line containing `LC_CTYPE' must be free.  */
2099   lr_ignore_rest (ldfile, 1);
2100
2101
2102   do
2103     {
2104       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2105       nowtok = now->tok;
2106     }
2107   while (nowtok == tok_eol);
2108
2109   /* If we see `copy' now we are almost done.  */
2110   if (nowtok == tok_copy)
2111     {
2112       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2113       if (now->tok != tok_string)
2114         {
2115           SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2116
2117         skip_category:
2118           do
2119             now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2120           while (now->tok != tok_eof && now->tok != tok_end);
2121
2122           if (now->tok != tok_eof
2123               || (now = lr_token (ldfile, charmap, NULL, NULL, verbose),
2124                   now->tok == tok_eof))
2125             lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
2126           else if (now->tok != tok_lc_ctype)
2127             {
2128               lr_error (ldfile, _("\
2129 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2130               lr_ignore_rest (ldfile, 0);
2131             }
2132           else
2133             lr_ignore_rest (ldfile, 1);
2134
2135           return;
2136         }
2137
2138       if (! ignore_content)
2139         {
2140           /* Get the locale definition.  */
2141           copy_locale = load_locale (LC_CTYPE, now->val.str.startmb,
2142                                      repertoire_name, charmap, NULL);
2143           if ((copy_locale->avail & CTYPE_LOCALE) == 0)
2144             {
2145               /* Not yet loaded.  So do it now.  */
2146               if (locfile_read (copy_locale, charmap) != 0)
2147                 goto skip_category;
2148             }
2149
2150           if (copy_locale->categories[LC_CTYPE].ctype == NULL)
2151             return;
2152         }
2153
2154       lr_ignore_rest (ldfile, 1);
2155
2156       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2157       nowtok = now->tok;
2158     }
2159
2160   /* Prepare the data structures.  */
2161   ctype_startup (ldfile, result, charmap, copy_locale, ignore_content);
2162   ctype = result->categories[LC_CTYPE].ctype;
2163
2164   /* Remember the repertoire we use.  */
2165   if (!ignore_content)
2166     ctype->repertoire = repertoire;
2167
2168   while (1)
2169     {
2170       unsigned long int class_bit = 0;
2171       unsigned long int class256_bit = 0;
2172       int handle_digits = 0;
2173
2174       /* Of course we don't proceed beyond the end of file.  */
2175       if (nowtok == tok_eof)
2176         break;
2177
2178       /* Ingore empty lines.  */
2179       if (nowtok == tok_eol)
2180         {
2181           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2182           nowtok = now->tok;
2183           continue;
2184         }
2185
2186       switch (nowtok)
2187         {
2188         case tok_charclass:
2189           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2190           while (now->tok == tok_ident || now->tok == tok_string)
2191             {
2192               ctype_class_new (ldfile, ctype, now->val.str.startmb);
2193               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2194               if (now->tok != tok_semicolon)
2195                 break;
2196               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2197             }
2198           if (now->tok != tok_eol)
2199             SYNTAX_ERROR (_("\
2200 %s: syntax error in definition of new character class"), "LC_CTYPE");
2201           break;
2202
2203         case tok_charconv:
2204           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2205           while (now->tok == tok_ident || now->tok == tok_string)
2206             {
2207               ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2208               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2209               if (now->tok != tok_semicolon)
2210                 break;
2211               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2212             }
2213           if (now->tok != tok_eol)
2214             SYNTAX_ERROR (_("\
2215 %s: syntax error in definition of new character map"), "LC_CTYPE");
2216           break;
2217
2218         case tok_class:
2219           /* Ignore the rest of the line if we don't need the input of
2220              this line.  */
2221           if (ignore_content)
2222             {
2223               lr_ignore_rest (ldfile, 0);
2224               break;
2225             }
2226
2227           /* We simply forget the `class' keyword and use the following
2228              operand to determine the bit.  */
2229           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2230           if (now->tok == tok_ident || now->tok == tok_string)
2231             {
2232               /* Must can be one of the predefined class names.  */
2233               for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2234                 if (strcmp (ctype->classnames[cnt], now->val.str.startmb) == 0)
2235                   break;
2236               if (cnt >= ctype->nr_charclass)
2237                 {
2238                   /* OK, it's a new class.  */
2239                   ctype_class_new (ldfile, ctype, now->val.str.startmb);
2240
2241                   class_bit = _ISwbit (ctype->nr_charclass - 1);
2242                 }
2243               else
2244                 {
2245                   class_bit = _ISwbit (cnt);
2246
2247                   free (now->val.str.startmb);
2248                 }
2249             }
2250           else if (now->tok == tok_digit)
2251             goto handle_tok_digit;
2252           else if (now->tok < tok_upper || now->tok > tok_blank)
2253             goto err_label;
2254           else
2255             {
2256               class_bit = BITw (now->tok);
2257               class256_bit = BIT (now->tok);
2258             }
2259
2260           /* The next character must be a semicolon.  */
2261           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2262           if (now->tok != tok_semicolon)
2263             goto err_label;
2264           goto read_charclass;
2265
2266         case tok_upper:
2267         case tok_lower:
2268         case tok_alpha:
2269         case tok_alnum:
2270         case tok_space:
2271         case tok_cntrl:
2272         case tok_punct:
2273         case tok_graph:
2274         case tok_print:
2275         case tok_xdigit:
2276         case tok_blank:
2277           /* Ignore the rest of the line if we don't need the input of
2278              this line.  */
2279           if (ignore_content)
2280             {
2281               lr_ignore_rest (ldfile, 0);
2282               break;
2283             }
2284
2285           class_bit = BITw (now->tok);
2286           class256_bit = BIT (now->tok);
2287           handle_digits = 0;
2288         read_charclass:
2289           ctype->class_done |= class_bit;
2290           last_token = tok_none;
2291           ellipsis_token = tok_none;
2292           step = 1;
2293           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2294           while (now->tok != tok_eol && now->tok != tok_eof)
2295             {
2296               uint32_t wch;
2297               struct charseq *seq;
2298
2299               if (ellipsis_token == tok_none)
2300                 {
2301                   if (get_character (now, charmap, repertoire, &seq, &wch))
2302                     goto err_label;
2303
2304                   if (!ignore_content && seq != NULL && seq->nbytes == 1)
2305                     /* Yep, we can store information about this byte
2306                        sequence.  */
2307                     ctype->class256_collection[seq->bytes[0]] |= class256_bit;
2308
2309                   if (!ignore_content && wch != ILLEGAL_CHAR_VALUE
2310                       && class_bit != 0)
2311                     /* We have the UCS4 position.  */
2312                     *find_idx (ctype, &ctype->class_collection,
2313                                &ctype->class_collection_max,
2314                                &ctype->class_collection_act, wch) |= class_bit;
2315
2316                   last_token = now->tok;
2317                   /* Terminate the string.  */
2318                   if (last_token == tok_bsymbol)
2319                     {
2320                       now->val.str.startmb[now->val.str.lenmb] = '\0';
2321                       last_str = now->val.str.startmb;
2322                     }
2323                   else
2324                     last_str = NULL;
2325                   last_wch = wch;
2326                   memcpy (last_charcode, now->val.charcode.bytes, 16);
2327                   last_charcode_len = now->val.charcode.nbytes;
2328
2329                   if (!ignore_content && handle_digits == 1)
2330                     {
2331                       /* We must store the digit values.  */
2332                       if (ctype->mbdigits_act == ctype->mbdigits_max)
2333                         {
2334                           ctype->mbdigits_max += 10;
2335                           ctype->mbdigits = xrealloc (ctype->mbdigits,
2336                                                       (ctype->mbdigits_max
2337                                                        * sizeof (char *)));
2338                           ctype->wcdigits_max += 10;
2339                           ctype->wcdigits = xrealloc (ctype->wcdigits,
2340                                                       (ctype->wcdigits_max
2341                                                        * sizeof (uint32_t)));
2342                         }
2343
2344                       ctype->mbdigits[ctype->mbdigits_act++] = seq;
2345                       ctype->wcdigits[ctype->wcdigits_act++] = wch;
2346                     }
2347                   else if (!ignore_content && handle_digits == 2)
2348                     {
2349                       /* We must store the digit values.  */
2350                       if (ctype->outdigits_act >= 10)
2351                         {
2352                           lr_error (ldfile, _("\
2353 %s: field `%s' does not contain exactly ten entries"),
2354                             "LC_CTYPE", "outdigit");
2355                           lr_ignore_rest (ldfile, 0);
2356                           break;
2357                         }
2358
2359                       ctype->mboutdigits[ctype->outdigits_act] = seq;
2360                       ctype->wcoutdigits[ctype->outdigits_act] = wch;
2361                       ++ctype->outdigits_act;
2362                     }
2363                 }
2364               else
2365                 {
2366                   /* Now it gets complicated.  We have to resolve the
2367                      ellipsis problem.  First we must distinguish between
2368                      the different kind of ellipsis and this must match the
2369                      tokens we have seen.  */
2370                   assert (last_token != tok_none);
2371
2372                   if (last_token != now->tok)
2373                     {
2374                       lr_error (ldfile, _("\
2375 ellipsis range must be marked by two operands of same type"));
2376                       lr_ignore_rest (ldfile, 0);
2377                       break;
2378                     }
2379
2380                   if (last_token == tok_bsymbol)
2381                     {
2382                       if (ellipsis_token == tok_ellipsis3)
2383                         lr_error (ldfile, _("with symbolic name range values \
2384 the absolute ellipsis `...' must not be used"));
2385
2386                       charclass_symbolic_ellipsis (ldfile, ctype, charmap,
2387                                                    repertoire, now, last_str,
2388                                                    class256_bit, class_bit,
2389                                                    (ellipsis_token
2390                                                     == tok_ellipsis4
2391                                                     ? 10 : 16),
2392                                                    ignore_content,
2393                                                    handle_digits, step);
2394                     }
2395                   else if (last_token == tok_ucs4)
2396                     {
2397                       if (ellipsis_token != tok_ellipsis2)
2398                         lr_error (ldfile, _("\
2399 with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
2400
2401                       charclass_ucs4_ellipsis (ldfile, ctype, charmap,
2402                                                repertoire, now, last_wch,
2403                                                class256_bit, class_bit,
2404                                                ignore_content, handle_digits,
2405                                                step);
2406                     }
2407                   else
2408                     {
2409                       assert (last_token == tok_charcode);
2410
2411                       if (ellipsis_token != tok_ellipsis3)
2412                         lr_error (ldfile, _("\
2413 with character code range values one must use the absolute ellipsis `...'"));
2414
2415                       charclass_charcode_ellipsis (ldfile, ctype, charmap,
2416                                                    repertoire, now,
2417                                                    last_charcode,
2418                                                    last_charcode_len,
2419                                                    class256_bit, class_bit,
2420                                                    ignore_content,
2421                                                    handle_digits);
2422                     }
2423
2424                   /* Now we have used the last value.  */
2425                   last_token = tok_none;
2426                 }
2427
2428               /* Next we expect a semicolon or the end of the line.  */
2429               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2430               if (now->tok == tok_eol || now->tok == tok_eof)
2431                 break;
2432
2433               if (last_token != tok_none
2434                   && now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4_2)
2435                 {
2436                   if (now->tok == tok_ellipsis2_2)
2437                     {
2438                       now->tok = tok_ellipsis2;
2439                       step = 2;
2440                     }
2441                   else if (now->tok == tok_ellipsis4_2)
2442                     {
2443                       now->tok = tok_ellipsis4;
2444                       step = 2;
2445                     }
2446
2447                   ellipsis_token = now->tok;
2448
2449                   now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2450                   continue;
2451                 }
2452
2453               if (now->tok != tok_semicolon)
2454                 goto err_label;
2455
2456               /* And get the next character.  */
2457               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2458
2459               ellipsis_token = tok_none;
2460               step = 1;
2461             }
2462           break;
2463
2464         case tok_digit:
2465           /* Ignore the rest of the line if we don't need the input of
2466              this line.  */
2467           if (ignore_content)
2468             {
2469               lr_ignore_rest (ldfile, 0);
2470               break;
2471             }
2472
2473         handle_tok_digit:
2474           class_bit = _ISwdigit;
2475           class256_bit = _ISdigit;
2476           handle_digits = 1;
2477           goto read_charclass;
2478
2479         case tok_outdigit:
2480           /* Ignore the rest of the line if we don't need the input of
2481              this line.  */
2482           if (ignore_content)
2483             {
2484               lr_ignore_rest (ldfile, 0);
2485               break;
2486             }
2487
2488           if (ctype->outdigits_act != 0)
2489             lr_error (ldfile, _("\
2490 %s: field `%s' declared more than once"),
2491                       "LC_CTYPE", "outdigit");
2492           class_bit = 0;
2493           class256_bit = 0;
2494           handle_digits = 2;
2495           goto read_charclass;
2496
2497         case tok_toupper:
2498           /* Ignore the rest of the line if we don't need the input of
2499              this line.  */
2500           if (ignore_content)
2501             {
2502               lr_ignore_rest (ldfile, 0);
2503               break;
2504             }
2505
2506           mapidx = 0;
2507           goto read_mapping;
2508
2509         case tok_tolower:
2510           /* Ignore the rest of the line if we don't need the input of
2511              this line.  */
2512           if (ignore_content)
2513             {
2514               lr_ignore_rest (ldfile, 0);
2515               break;
2516             }
2517
2518           mapidx = 1;
2519           goto read_mapping;
2520
2521         case tok_map:
2522           /* Ignore the rest of the line if we don't need the input of
2523              this line.  */
2524           if (ignore_content)
2525             {
2526               lr_ignore_rest (ldfile, 0);
2527               break;
2528             }
2529
2530           /* We simply forget the `map' keyword and use the following
2531              operand to determine the mapping.  */
2532           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2533           if (now->tok == tok_ident || now->tok == tok_string)
2534             {
2535               size_t cnt;
2536
2537               for (cnt = 2; cnt < ctype->map_collection_nr; ++cnt)
2538                 if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2539                   break;
2540
2541               if (cnt < ctype->map_collection_nr)
2542                 free (now->val.str.startmb);
2543               else
2544                 /* OK, it's a new map.  */
2545                 ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2546
2547               mapidx = cnt;
2548             }
2549           else if (now->tok < tok_toupper || now->tok > tok_tolower)
2550             goto err_label;
2551           else
2552             mapidx = now->tok - tok_toupper;
2553
2554           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2555           /* This better should be a semicolon.  */
2556           if (now->tok != tok_semicolon)
2557             goto err_label;
2558
2559         read_mapping:
2560           /* Test whether this mapping was already defined.  */
2561           if (ctype->tomap_done[mapidx])
2562             {
2563               lr_error (ldfile, _("duplicated definition for mapping `%s'"),
2564                         ctype->mapnames[mapidx]);
2565               lr_ignore_rest (ldfile, 0);
2566               break;
2567             }
2568           ctype->tomap_done[mapidx] = 1;
2569
2570           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2571           while (now->tok != tok_eol && now->tok != tok_eof)
2572             {
2573               struct charseq *from_seq;
2574               uint32_t from_wch;
2575               struct charseq *to_seq;
2576               uint32_t to_wch;
2577
2578               /* Every pair starts with an opening brace.  */
2579               if (now->tok != tok_open_brace)
2580                 goto err_label;
2581
2582               /* Next comes the from-value.  */
2583               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2584               if (get_character (now, charmap, repertoire, &from_seq,
2585                                  &from_wch) != 0)
2586                 goto err_label;
2587
2588               /* The next is a comma.  */
2589               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2590               if (now->tok != tok_comma)
2591                 goto err_label;
2592
2593               /* And the other value.  */
2594               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2595               if (get_character (now, charmap, repertoire, &to_seq,
2596                                  &to_wch) != 0)
2597                 goto err_label;
2598
2599               /* And the last thing is the closing brace.  */
2600               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2601               if (now->tok != tok_close_brace)
2602                 goto err_label;
2603
2604               if (!ignore_content)
2605                 {
2606                   /* Check whether the mapping converts from an ASCII value
2607                      to a non-ASCII value.  */
2608                   if (from_seq != NULL && from_seq->nbytes == 1
2609                       && isascii (from_seq->bytes[0])
2610                       && to_seq != NULL && (to_seq->nbytes != 1
2611                                             || !isascii (to_seq->bytes[0])))
2612                     ctype->to_nonascii = 1;
2613
2614                   if (mapidx < 2 && from_seq != NULL && to_seq != NULL
2615                       && from_seq->nbytes == 1 && to_seq->nbytes == 1)
2616                     /* We can use this value.  */
2617                     ctype->map256_collection[mapidx][from_seq->bytes[0]]
2618                       = to_seq->bytes[0];
2619
2620                   if (from_wch != ILLEGAL_CHAR_VALUE
2621                       && to_wch != ILLEGAL_CHAR_VALUE)
2622                     /* Both correct values.  */
2623                     *find_idx (ctype, &ctype->map_collection[mapidx],
2624                                &ctype->map_collection_max[mapidx],
2625                                &ctype->map_collection_act[mapidx],
2626                                from_wch) = to_wch;
2627                 }
2628
2629               /* Now comes a semicolon or the end of the line/file.  */
2630               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2631               if (now->tok == tok_semicolon)
2632                 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2633             }
2634           break;
2635
2636         case tok_translit_start:
2637           /* Ignore the entire translit section with its peculiar syntax
2638              if we don't need the input.  */
2639           if (ignore_content)
2640             {
2641               do
2642                 {
2643                   lr_ignore_rest (ldfile, 0);
2644                   now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2645                 }
2646               while (now->tok != tok_translit_end && now->tok != tok_eof);
2647
2648               if (now->tok == tok_eof)
2649                 lr_error (ldfile, _(\
2650 "%s: `translit_start' section does not end with `translit_end'"),
2651                           "LC_CTYPE");
2652
2653               break;
2654             }
2655
2656           /* The rest of the line better should be empty.  */
2657           lr_ignore_rest (ldfile, 1);
2658
2659           /* We count here the number of allocated entries in the `translit'
2660              array.  */
2661           cnt = 0;
2662
2663           ldfile->translate_strings = 1;
2664           ldfile->return_widestr = 1;
2665
2666           /* We proceed until we see the `translit_end' token.  */
2667           while (now = lr_token (ldfile, charmap, NULL, repertoire, verbose),
2668                  now->tok != tok_translit_end && now->tok != tok_eof)
2669             {
2670               if (now->tok == tok_eol)
2671                 /* Ignore empty lines.  */
2672                 continue;
2673
2674               if (now->tok == tok_include)
2675                 {
2676                   /* We have to include locale.  */
2677                   const char *locale_name;
2678                   const char *repertoire_name;
2679                   struct translit_include_t *include_stmt, **include_ptr;
2680
2681                   now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2682                   /* This should be a string or an identifier.  In any
2683                      case something to name a locale.  */
2684                   if (now->tok != tok_string && now->tok != tok_ident)
2685                     {
2686                     translit_syntax:
2687                       lr_error (ldfile, _("%s: syntax error"), "LC_CTYPE");
2688                       lr_ignore_rest (ldfile, 0);
2689                       continue;
2690                     }
2691                   locale_name = now->val.str.startmb;
2692
2693                   /* Next should be a semicolon.  */
2694                   now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2695                   if (now->tok != tok_semicolon)
2696                     goto translit_syntax;
2697
2698                   /* Now the repertoire name.  */
2699                   now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2700                   if ((now->tok != tok_string && now->tok != tok_ident)
2701                       || now->val.str.startmb == NULL)
2702                     goto translit_syntax;
2703                   repertoire_name = now->val.str.startmb;
2704                   if (repertoire_name[0] == '\0')
2705                     /* Ignore the empty string.  */
2706                     repertoire_name = NULL;
2707
2708                   /* Save the include statement for later processing.  */
2709                   include_stmt = (struct translit_include_t *)
2710                     xmalloc (sizeof (struct translit_include_t));
2711                   include_stmt->copy_locale = locale_name;
2712                   include_stmt->copy_repertoire = repertoire_name;
2713                   include_stmt->next = NULL;
2714
2715                   include_ptr = &ctype->translit_include;
2716                   while (*include_ptr != NULL)
2717                     include_ptr = &(*include_ptr)->next;
2718                   *include_ptr = include_stmt;
2719
2720                   /* The rest of the line must be empty.  */
2721                   lr_ignore_rest (ldfile, 1);
2722
2723                   /* Make sure the locale is read.  */
2724                   add_to_readlist (LC_CTYPE, locale_name, repertoire_name,
2725                                    1, NULL);
2726                   continue;
2727                 }
2728               else if (now->tok == tok_default_missing)
2729                 {
2730                   uint32_t *wstr;
2731
2732                   while (1)
2733                     {
2734                       /* We expect a single character or string as the
2735                          argument.  */
2736                       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2737                       wstr = read_widestring (ldfile, now, charmap,
2738                                               repertoire);
2739
2740                       if (wstr != NULL)
2741                         {
2742                           if (ctype->default_missing != NULL)
2743                             {
2744                               lr_error (ldfile, _("\
2745 %s: duplicate `default_missing' definition"), "LC_CTYPE");
2746                               WITH_CUR_LOCALE (error_at_line (0, 0,
2747                                                               ctype->default_missing_file,
2748                                                               ctype->default_missing_lineno,
2749                                                               _("\
2750 previous definition was here")));
2751                             }
2752                           else
2753                             {
2754                               ctype->default_missing = wstr;
2755                               ctype->default_missing_file = ldfile->fname;
2756                               ctype->default_missing_lineno = ldfile->lineno;
2757                             }
2758                           /* We can have more entries, ignore them.  */
2759                           lr_ignore_rest (ldfile, 0);
2760                           break;
2761                         }
2762                       else if (wstr == (uint32_t *) -1l)
2763                         /* This was an syntax error.  */
2764                         break;
2765
2766                       /* Maybe there is another replacement we can use.  */
2767                       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2768                       if (now->tok == tok_eol || now->tok == tok_eof)
2769                         {
2770                           /* Nothing found.  We tell the user.  */
2771                           lr_error (ldfile, _("\
2772 %s: no representable `default_missing' definition found"), "LC_CTYPE");
2773                           break;
2774                         }
2775                       if (now->tok != tok_semicolon)
2776                         goto translit_syntax;
2777                     }
2778
2779                   continue;
2780                 }
2781               else if (now->tok == tok_translit_ignore)
2782                 {
2783                   read_translit_ignore_entry (ldfile, ctype, charmap,
2784                                               repertoire);
2785                   continue;
2786                 }
2787
2788               read_translit_entry (ldfile, ctype, now, charmap, repertoire);
2789             }
2790           ldfile->return_widestr = 0;
2791
2792           if (now->tok == tok_eof)
2793             lr_error (ldfile, _(\
2794 "%s: `translit_start' section does not end with `translit_end'"),
2795                       "LC_CTYPE");
2796
2797           break;
2798
2799         case tok_ident:
2800           /* Ignore the rest of the line if we don't need the input of
2801              this line.  */
2802           if (ignore_content)
2803             {
2804               lr_ignore_rest (ldfile, 0);
2805               break;
2806             }
2807
2808           /* This could mean one of several things.  First test whether
2809              it's a character class name.  */
2810           for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2811             if (strcmp (now->val.str.startmb, ctype->classnames[cnt]) == 0)
2812               break;
2813           if (cnt < ctype->nr_charclass)
2814             {
2815               class_bit = _ISwbit (cnt);
2816               class256_bit = cnt <= 11 ? _ISbit (cnt) : 0;
2817               free (now->val.str.startmb);
2818               goto read_charclass;
2819             }
2820           for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
2821             if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2822               break;
2823           if (cnt < ctype->map_collection_nr)
2824             {
2825               mapidx = cnt;
2826               free (now->val.str.startmb);
2827               goto read_mapping;
2828             }
2829           break;
2830
2831         case tok_end:
2832           /* Next we assume `LC_CTYPE'.  */
2833           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2834           if (now->tok == tok_eof)
2835             break;
2836           if (now->tok == tok_eol)
2837             lr_error (ldfile, _("%s: incomplete `END' line"),
2838                       "LC_CTYPE");
2839           else if (now->tok != tok_lc_ctype)
2840             lr_error (ldfile, _("\
2841 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2842           lr_ignore_rest (ldfile, now->tok == tok_lc_ctype);
2843           return;
2844
2845         default:
2846         err_label:
2847           if (now->tok != tok_eof)
2848             SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2849         }
2850
2851       /* Prepare for the next round.  */
2852       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2853       nowtok = now->tok;
2854     }
2855
2856   /* When we come here we reached the end of the file.  */
2857   lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
2858 }
2859
2860
2861 /* Subroutine of set_class_defaults, below.  */
2862 static void
2863 set_one_default (struct locale_ctype_t *ctype,
2864                  const struct charmap_t *charmap,
2865                  int bitpos, int from, int to)
2866 {
2867   char tmp[2];
2868   int ch;
2869   int bit = _ISbit (bitpos);
2870   int bitw = _ISwbit (bitpos);
2871   /* Define string.  */
2872   strcpy (tmp, "?");
2873
2874   for (ch = from; ch <= to; ++ch)
2875     {
2876       struct charseq *seq;
2877       tmp[0] = ch;
2878
2879       seq = charmap_find_value (charmap, tmp, 1);
2880       if (seq == NULL)
2881         {
2882           char buf[10];
2883           sprintf (buf, "U%08X", ch);
2884           seq = charmap_find_value (charmap, buf, 9);
2885         }
2886       if (seq == NULL)
2887         {
2888           if (!be_quiet)
2889             WITH_CUR_LOCALE (error (0, 0, _("\
2890 %s: character `%s' not defined while needed as default value"),
2891                                     "LC_CTYPE", tmp));
2892         }
2893       else if (seq->nbytes != 1)
2894         WITH_CUR_LOCALE (error (0, 0, _("\
2895 %s: character `%s' in charmap not representable with one byte"),
2896                                 "LC_CTYPE", tmp));
2897       else
2898         ctype->class256_collection[seq->bytes[0]] |= bit;
2899
2900       /* No need to search here, the ASCII value is also the Unicode
2901          value.  */
2902       ELEM (ctype, class_collection, , ch) |= bitw;
2903     }
2904 }
2905
2906 static void
2907 set_class_defaults (struct locale_ctype_t *ctype,
2908                     const struct charmap_t *charmap,
2909                     struct repertoire_t *repertoire)
2910 {
2911 #define set_default(bitpos, from, to) \
2912   set_one_default (ctype, charmap, bitpos, from, to)
2913
2914   /* These function defines the default values for the classes and conversions
2915      according to POSIX.2 2.5.2.1.
2916      It may seem that the order of these if-blocks is arbitrary but it is NOT.
2917      Don't move them unless you know what you do!  */
2918
2919   /* Set default values if keyword was not present.  */
2920   if ((ctype->class_done & BITw (tok_upper)) == 0)
2921     /* "If this keyword [lower] is not specified, the lowercase letters
2922         `A' through `Z', ..., shall automatically belong to this class,
2923         with implementation defined character values."  [P1003.2, 2.5.2.1]  */
2924     set_default (BITPOS (tok_upper), 'A', 'Z');
2925
2926   if ((ctype->class_done & BITw (tok_lower)) == 0)
2927     /* "If this keyword [lower] is not specified, the lowercase letters
2928         `a' through `z', ..., shall automatically belong to this class,
2929         with implementation defined character values."  [P1003.2, 2.5.2.1]  */
2930     set_default (BITPOS (tok_lower), 'a', 'z');
2931
2932   if ((ctype->class_done & BITw (tok_alpha)) == 0)
2933     {
2934       /* Table 2-6 in P1003.2 says that characters in class `upper' or
2935          class `lower' *must* be in class `alpha'.  */
2936       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
2937       unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower);
2938
2939       for (size_t cnt = 0; cnt < 256; ++cnt)
2940         if ((ctype->class256_collection[cnt] & mask) != 0)
2941           ctype->class256_collection[cnt] |= BIT (tok_alpha);
2942
2943       for (size_t cnt = 0; cnt < ctype->class_collection_act; ++cnt)
2944         if ((ctype->class_collection[cnt] & maskw) != 0)
2945           ctype->class_collection[cnt] |= BITw (tok_alpha);
2946     }
2947
2948   if ((ctype->class_done & BITw (tok_digit)) == 0)
2949     /* "If this keyword [digit] is not specified, the digits `0' through
2950         `9', ..., shall automatically belong to this class, with
2951         implementation-defined character values."  [P1003.2, 2.5.2.1]  */
2952     set_default (BITPOS (tok_digit), '0', '9');
2953
2954   /* "Only characters specified for the `alpha' and `digit' keyword
2955      shall be specified.  Characters specified for the keyword `alpha'
2956      and `digit' are automatically included in this class.  */
2957   {
2958     unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
2959     unsigned long int maskw = BITw (tok_alpha) | BITw (tok_digit);
2960
2961     for (size_t cnt = 0; cnt < 256; ++cnt)
2962       if ((ctype->class256_collection[cnt] & mask) != 0)
2963         ctype->class256_collection[cnt] |= BIT (tok_alnum);
2964
2965     for (size_t cnt = 0; cnt < ctype->class_collection_act; ++cnt)
2966       if ((ctype->class_collection[cnt] & maskw) != 0)
2967         ctype->class_collection[cnt] |= BITw (tok_alnum);
2968   }
2969
2970   if ((ctype->class_done & BITw (tok_space)) == 0)
2971     /* "If this keyword [space] is not specified, the characters <space>,
2972         <form-feed>, <newline>, <carriage-return>, <tab>, and
2973         <vertical-tab>, ..., shall automatically belong to this class,
2974         with implementation-defined character values."  [P1003.2, 2.5.2.1]  */
2975     {
2976       struct charseq *seq;
2977
2978       seq = charmap_find_value (charmap, "space", 5);
2979       if (seq == NULL)
2980         seq = charmap_find_value (charmap, "SP", 2);
2981       if (seq == NULL)
2982         seq = charmap_find_value (charmap, "U00000020", 9);
2983       if (seq == NULL)
2984         {
2985           if (!be_quiet)
2986             WITH_CUR_LOCALE (error (0, 0, _("\
2987 %s: character `%s' not defined while needed as default value"),
2988                                     "LC_CTYPE", "<space>"));
2989         }
2990       else if (seq->nbytes != 1)
2991         WITH_CUR_LOCALE (error (0, 0, _("\
2992 %s: character `%s' in charmap not representable with one byte"),
2993                                 "LC_CTYPE", "<space>"));
2994       else
2995         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2996
2997       /* No need to search.  */
2998       ELEM (ctype, class_collection, , L' ') |= BITw (tok_space);
2999
3000       seq = charmap_find_value (charmap, "form-feed", 9);
3001       if (seq == NULL)
3002         seq = charmap_find_value (charmap, "U0000000C", 9);
3003       if (seq == NULL)
3004         {
3005           if (!be_quiet)
3006             WITH_CUR_LOCALE (error (0, 0, _("\
3007 %s: character `%s' not defined while needed as default value"),
3008                                     "LC_CTYPE", "<form-feed>"));
3009         }
3010       else if (seq->nbytes != 1)
3011         WITH_CUR_LOCALE (error (0, 0, _("\
3012 %s: character `%s' in charmap not representable with one byte"),
3013                                 "LC_CTYPE", "<form-feed>"));
3014       else
3015         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3016
3017       /* No need to search.  */
3018       ELEM (ctype, class_collection, , L'\f') |= BITw (tok_space);
3019
3020
3021       seq = charmap_find_value (charmap, "newline", 7);
3022       if (seq == NULL)
3023         seq = charmap_find_value (charmap, "U0000000A", 9);
3024       if (seq == NULL)
3025         {
3026           if (!be_quiet)
3027             WITH_CUR_LOCALE (error (0, 0, _("\
3028 %s: character `%s' not defined while needed as default value"),
3029                                     "LC_CTYPE", "<newline>"));
3030         }
3031       else if (seq->nbytes != 1)
3032         WITH_CUR_LOCALE (error (0, 0, _("\
3033 %s: character `%s' in charmap not representable with one byte"),
3034                                 "LC_CTYPE", "<newline>"));
3035       else
3036         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3037
3038       /* No need to search.  */
3039       ELEM (ctype, class_collection, , L'\n') |= BITw (tok_space);
3040
3041
3042       seq = charmap_find_value (charmap, "carriage-return", 15);
3043       if (seq == NULL)
3044         seq = charmap_find_value (charmap, "U0000000D", 9);
3045       if (seq == NULL)
3046         {
3047           if (!be_quiet)
3048             WITH_CUR_LOCALE (error (0, 0, _("\
3049 %s: character `%s' not defined while needed as default value"),
3050                                     "LC_CTYPE", "<carriage-return>"));
3051         }
3052       else if (seq->nbytes != 1)
3053         WITH_CUR_LOCALE (error (0, 0, _("\
3054 %s: character `%s' in charmap not representable with one byte"),
3055                                 "LC_CTYPE", "<carriage-return>"));
3056       else
3057         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3058
3059       /* No need to search.  */
3060       ELEM (ctype, class_collection, , L'\r') |= BITw (tok_space);
3061
3062
3063       seq = charmap_find_value (charmap, "tab", 3);
3064       if (seq == NULL)
3065         seq = charmap_find_value (charmap, "U00000009", 9);
3066       if (seq == NULL)
3067         {
3068           if (!be_quiet)
3069             WITH_CUR_LOCALE (error (0, 0, _("\
3070 %s: character `%s' not defined while needed as default value"),
3071                                     "LC_CTYPE", "<tab>"));
3072         }
3073       else if (seq->nbytes != 1)
3074         WITH_CUR_LOCALE (error (0, 0, _("\
3075 %s: character `%s' in charmap not representable with one byte"),
3076                                 "LC_CTYPE", "<tab>"));
3077       else
3078         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3079
3080       /* No need to search.  */
3081       ELEM (ctype, class_collection, , L'\t') |= BITw (tok_space);
3082
3083
3084       seq = charmap_find_value (charmap, "vertical-tab", 12);
3085       if (seq == NULL)
3086         seq = charmap_find_value (charmap, "U0000000B", 9);
3087       if (seq == NULL)
3088         {
3089           if (!be_quiet)
3090             WITH_CUR_LOCALE (error (0, 0, _("\
3091 %s: character `%s' not defined while needed as default value"),
3092                                     "LC_CTYPE", "<vertical-tab>"));
3093         }
3094       else if (seq->nbytes != 1)
3095         WITH_CUR_LOCALE (error (0, 0, _("\
3096 %s: character `%s' in charmap not representable with one byte"),
3097                                 "LC_CTYPE", "<vertical-tab>"));
3098       else
3099         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3100
3101       /* No need to search.  */
3102       ELEM (ctype, class_collection, , L'\v') |= BITw (tok_space);
3103     }
3104
3105   if ((ctype->class_done & BITw (tok_xdigit)) == 0)
3106     /* "If this keyword is not specified, the digits `0' to `9', the
3107         uppercase letters `A' through `F', and the lowercase letters `a'
3108         through `f', ..., shell automatically belong to this class, with
3109         implementation defined character values."  [P1003.2, 2.5.2.1]  */
3110     {
3111       set_default (BITPOS (tok_xdigit), '0', '9');
3112       set_default (BITPOS (tok_xdigit), 'A', 'F');
3113       set_default (BITPOS (tok_xdigit), 'a', 'f');
3114     }
3115
3116   if ((ctype->class_done & BITw (tok_blank)) == 0)
3117     /* "If this keyword [blank] is unspecified, the characters <space> and
3118        <tab> shall belong to this character class."  [P1003.2, 2.5.2.1]  */
3119    {
3120       struct charseq *seq;
3121
3122       seq = charmap_find_value (charmap, "space", 5);
3123       if (seq == NULL)
3124         seq = charmap_find_value (charmap, "SP", 2);
3125       if (seq == NULL)
3126         seq = charmap_find_value (charmap, "U00000020", 9);
3127       if (seq == NULL)
3128         {
3129           if (!be_quiet)
3130             WITH_CUR_LOCALE (error (0, 0, _("\
3131 %s: character `%s' not defined while needed as default value"),
3132                                     "LC_CTYPE", "<space>"));
3133         }
3134       else if (seq->nbytes != 1)
3135         WITH_CUR_LOCALE (error (0, 0, _("\
3136 %s: character `%s' in charmap not representable with one byte"),
3137                                 "LC_CTYPE", "<space>"));
3138       else
3139         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
3140
3141       /* No need to search.  */
3142       ELEM (ctype, class_collection, , L' ') |= BITw (tok_blank);
3143
3144
3145       seq = charmap_find_value (charmap, "tab", 3);
3146       if (seq == NULL)
3147         seq = charmap_find_value (charmap, "U00000009", 9);
3148       if (seq == NULL)
3149         {
3150           if (!be_quiet)
3151             WITH_CUR_LOCALE (error (0, 0, _("\
3152 %s: character `%s' not defined while needed as default value"),
3153                                     "LC_CTYPE", "<tab>"));
3154         }
3155       else if (seq->nbytes != 1)
3156         WITH_CUR_LOCALE (error (0, 0, _("\
3157 %s: character `%s' in charmap not representable with one byte"),
3158                                 "LC_CTYPE", "<tab>"));
3159       else
3160         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
3161
3162       /* No need to search.  */
3163       ELEM (ctype, class_collection, , L'\t') |= BITw (tok_blank);
3164     }
3165
3166   if ((ctype->class_done & BITw (tok_graph)) == 0)
3167     /* "If this keyword [graph] is not specified, characters specified for
3168         the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
3169         shall belong to this character class."  [P1003.2, 2.5.2.1]  */
3170     {
3171       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
3172         BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
3173       unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
3174         BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
3175         BITw (tok_punct);
3176
3177       for (size_t cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3178         if ((ctype->class_collection[cnt] & maskw) != 0)
3179           ctype->class_collection[cnt] |= BITw (tok_graph);
3180
3181       for (size_t cnt = 0; cnt < 256; ++cnt)
3182         if ((ctype->class256_collection[cnt] & mask) != 0)
3183           ctype->class256_collection[cnt] |= BIT (tok_graph);
3184     }
3185
3186   if ((ctype->class_done & BITw (tok_print)) == 0)
3187     /* "If this keyword [print] is not provided, characters specified for
3188         the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
3189         and the <space> character shall belong to this character class."
3190         [P1003.2, 2.5.2.1]  */
3191     {
3192       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
3193         BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
3194       unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
3195         BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
3196         BITw (tok_punct);
3197       struct charseq *seq;
3198
3199       for (size_t cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3200         if ((ctype->class_collection[cnt] & maskw) != 0)
3201           ctype->class_collection[cnt] |= BITw (tok_print);
3202
3203       for (size_t cnt = 0; cnt < 256; ++cnt)
3204         if ((ctype->class256_collection[cnt] & mask) != 0)
3205           ctype->class256_collection[cnt] |= BIT (tok_print);
3206
3207
3208       seq = charmap_find_value (charmap, "space", 5);
3209       if (seq == NULL)
3210         seq = charmap_find_value (charmap, "SP", 2);
3211       if (seq == NULL)
3212         seq = charmap_find_value (charmap, "U00000020", 9);
3213       if (seq == NULL)
3214         {
3215           if (!be_quiet)
3216             WITH_CUR_LOCALE (error (0, 0, _("\
3217 %s: character `%s' not defined while needed as default value"),
3218                                     "LC_CTYPE", "<space>"));
3219         }
3220       else if (seq->nbytes != 1)
3221         WITH_CUR_LOCALE (error (0, 0, _("\
3222 %s: character `%s' in charmap not representable with one byte"),
3223                                 "LC_CTYPE", "<space>"));
3224       else
3225         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_print);
3226
3227       /* No need to search.  */
3228       ELEM (ctype, class_collection, , L' ') |= BITw (tok_print);
3229     }
3230
3231   if (ctype->tomap_done[0] == 0)
3232     /* "If this keyword [toupper] is not specified, the lowercase letters
3233         `a' through `z', and their corresponding uppercase letters `A' to
3234         `Z', ..., shall automatically be included, with implementation-
3235         defined character values."  [P1003.2, 2.5.2.1]  */
3236     {
3237       char tmp[4];
3238       int ch;
3239
3240       strcpy (tmp, "<?>");
3241
3242       for (ch = 'a'; ch <= 'z'; ++ch)
3243         {
3244           struct charseq *seq_from, *seq_to;
3245
3246           tmp[1] = (char) ch;
3247
3248           seq_from = charmap_find_value (charmap, &tmp[1], 1);
3249           if (seq_from == NULL)
3250             {
3251               char buf[10];
3252               sprintf (buf, "U%08X", ch);
3253               seq_from = charmap_find_value (charmap, buf, 9);
3254             }
3255           if (seq_from == NULL)
3256             {
3257               if (!be_quiet)
3258                 WITH_CUR_LOCALE (error (0, 0, _("\
3259 %s: character `%s' not defined while needed as default value"),
3260                                         "LC_CTYPE", tmp));
3261             }
3262           else if (seq_from->nbytes != 1)
3263             {
3264               if (!be_quiet)
3265                 WITH_CUR_LOCALE (error (0, 0, _("\
3266 %s: character `%s' needed as default value not representable with one byte"),
3267                                         "LC_CTYPE", tmp));
3268             }
3269           else
3270             {
3271               /* This conversion is implementation defined.  */
3272               tmp[1] = (char) (ch + ('A' - 'a'));
3273               seq_to = charmap_find_value (charmap, &tmp[1], 1);
3274               if (seq_to == NULL)
3275                 {
3276                   char buf[10];
3277                   sprintf (buf, "U%08X", ch + ('A' - 'a'));
3278                   seq_to = charmap_find_value (charmap, buf, 9);
3279                 }
3280               if (seq_to == NULL)
3281                 {
3282                   if (!be_quiet)
3283                     WITH_CUR_LOCALE (error (0, 0, _("\
3284 %s: character `%s' not defined while needed as default value"),
3285                                             "LC_CTYPE", tmp));
3286                 }
3287               else if (seq_to->nbytes != 1)
3288                 {
3289                   if (!be_quiet)
3290                     WITH_CUR_LOCALE (error (0, 0, _("\
3291 %s: character `%s' needed as default value not representable with one byte"),
3292                                             "LC_CTYPE", tmp));
3293                 }
3294               else
3295                 /* The index [0] is determined by the order of the
3296                    `ctype_map_newP' calls in `ctype_startup'.  */
3297                 ctype->map256_collection[0][seq_from->bytes[0]]
3298                   = seq_to->bytes[0];
3299             }
3300
3301           /* No need to search.  */
3302           ELEM (ctype, map_collection, [0], ch) = ch + ('A' - 'a');
3303         }
3304     }
3305
3306   if (ctype->tomap_done[1] == 0)
3307     /* "If this keyword [tolower] is not specified, the mapping shall be
3308        the reverse mapping of the one specified to `toupper'."  [P1003.2]  */
3309     {
3310       for (size_t cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
3311         if (ctype->map_collection[0][cnt] != 0)
3312           ELEM (ctype, map_collection, [1],
3313                 ctype->map_collection[0][cnt])
3314             = ctype->charnames[cnt];
3315
3316       for (size_t cnt = 0; cnt < 256; ++cnt)
3317         if (ctype->map256_collection[0][cnt] != 0)
3318           ctype->map256_collection[1][ctype->map256_collection[0][cnt]] = cnt;
3319     }
3320
3321   if (ctype->outdigits_act != 10)
3322     {
3323       if (ctype->outdigits_act != 0)
3324         WITH_CUR_LOCALE (error (0, 0, _("\
3325 %s: field `%s' does not contain exactly ten entries"),
3326                                 "LC_CTYPE", "outdigit"));
3327
3328       for (size_t cnt = ctype->outdigits_act; cnt < 10; ++cnt)
3329         {
3330           ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3331                                                          (char *) digits + cnt,
3332                                                          1);
3333
3334           if (ctype->mboutdigits[cnt] == NULL)
3335             ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3336                                                            longnames[cnt],
3337                                                            strlen (longnames[cnt]));
3338
3339           if (ctype->mboutdigits[cnt] == NULL)
3340             ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3341                                                            uninames[cnt], 9);
3342
3343           if (ctype->mboutdigits[cnt] == NULL)
3344             {
3345               /* Provide a replacement.  */
3346               WITH_CUR_LOCALE (error (0, 0, _("\
3347 no output digits defined and none of the standard names in the charmap")));
3348
3349               ctype->mboutdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
3350                                                        sizeof (struct charseq)
3351                                                        + 1);
3352
3353               /* This is better than nothing.  */
3354               ctype->mboutdigits[cnt]->bytes[0] = digits[cnt];
3355               ctype->mboutdigits[cnt]->nbytes = 1;
3356             }
3357
3358           ctype->wcoutdigits[cnt] = L'0' + cnt;
3359         }
3360
3361       ctype->outdigits_act = 10;
3362     }
3363
3364 #undef set_default
3365 }
3366
3367
3368 /* Initialize.  Assumes t->p and t->q have already been set.  */
3369 static inline void
3370 wctype_table_init (struct wctype_table *t)
3371 {
3372   t->level1 = NULL;
3373   t->level1_alloc = t->level1_size = 0;
3374   t->level2 = NULL;
3375   t->level2_alloc = t->level2_size = 0;
3376   t->level3 = NULL;
3377   t->level3_alloc = t->level3_size = 0;
3378 }
3379
3380 /* Retrieve an entry.  */
3381 static inline int
3382 wctype_table_get (struct wctype_table *t, uint32_t wc)
3383 {
3384   uint32_t index1 = wc >> (t->q + t->p + 5);
3385   if (index1 < t->level1_size)
3386     {
3387       uint32_t lookup1 = t->level1[index1];
3388       if (lookup1 != EMPTY)
3389         {
3390           uint32_t index2 = ((wc >> (t->p + 5)) & ((1 << t->q) - 1))
3391                             + (lookup1 << t->q);
3392           uint32_t lookup2 = t->level2[index2];
3393           if (lookup2 != EMPTY)
3394             {
3395               uint32_t index3 = ((wc >> 5) & ((1 << t->p) - 1))
3396                                 + (lookup2 << t->p);
3397               uint32_t lookup3 = t->level3[index3];
3398               uint32_t index4 = wc & 0x1f;
3399
3400               return (lookup3 >> index4) & 1;
3401             }
3402         }
3403     }
3404   return 0;
3405 }
3406
3407 /* Add one entry.  */
3408 static void
3409 wctype_table_add (struct wctype_table *t, uint32_t wc)
3410 {
3411   uint32_t index1 = wc >> (t->q + t->p + 5);
3412   uint32_t index2 = (wc >> (t->p + 5)) & ((1 << t->q) - 1);
3413   uint32_t index3 = (wc >> 5) & ((1 << t->p) - 1);
3414   uint32_t index4 = wc & 0x1f;
3415   size_t i, i1, i2;
3416
3417   if (index1 >= t->level1_size)
3418     {
3419       if (index1 >= t->level1_alloc)
3420         {
3421           size_t alloc = 2 * t->level1_alloc;
3422           if (alloc <= index1)
3423             alloc = index1 + 1;
3424           t->level1 = (uint32_t *) xrealloc ((char *) t->level1,
3425                                              alloc * sizeof (uint32_t));
3426           t->level1_alloc = alloc;
3427         }
3428       while (index1 >= t->level1_size)
3429         t->level1[t->level1_size++] = EMPTY;
3430     }
3431
3432   if (t->level1[index1] == EMPTY)
3433     {
3434       if (t->level2_size == t->level2_alloc)
3435         {
3436           size_t alloc = 2 * t->level2_alloc + 1;
3437           t->level2 = (uint32_t *) xrealloc ((char *) t->level2,
3438                                              (alloc << t->q) * sizeof (uint32_t));
3439           t->level2_alloc = alloc;
3440         }
3441       i1 = t->level2_size << t->q;
3442       i2 = (t->level2_size + 1) << t->q;
3443       for (i = i1; i < i2; i++)
3444         t->level2[i] = EMPTY;
3445       t->level1[index1] = t->level2_size++;
3446     }
3447
3448   index2 += t->level1[index1] << t->q;
3449
3450   if (t->level2[index2] == EMPTY)
3451     {
3452       if (t->level3_size == t->level3_alloc)
3453         {
3454           size_t alloc = 2 * t->level3_alloc + 1;
3455           t->level3 = (uint32_t *) xrealloc ((char *) t->level3,
3456                                              (alloc << t->p) * sizeof (uint32_t));
3457           t->level3_alloc = alloc;
3458         }
3459       i1 = t->level3_size << t->p;
3460       i2 = (t->level3_size + 1) << t->p;
3461       for (i = i1; i < i2; i++)
3462         t->level3[i] = 0;
3463       t->level2[index2] = t->level3_size++;
3464     }
3465
3466   index3 += t->level2[index2] << t->p;
3467
3468   t->level3[index3] |= (uint32_t)1 << index4;
3469 }
3470
3471 /* Finalize and shrink.  */
3472 static void
3473 add_locale_wctype_table (struct locale_file *file, struct wctype_table *t)
3474 {
3475   size_t i, j, k;
3476   uint32_t reorder3[t->level3_size];
3477   uint32_t reorder2[t->level2_size];
3478   uint32_t level2_offset, level3_offset;
3479
3480   /* Uniquify level3 blocks.  */
3481   k = 0;
3482   for (j = 0; j < t->level3_size; j++)
3483     {
3484       for (i = 0; i < k; i++)
3485         if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p],
3486                     (1 << t->p) * sizeof (uint32_t)) == 0)
3487           break;
3488       /* Relocate block j to block i.  */
3489       reorder3[j] = i;
3490       if (i == k)
3491         {
3492           if (i != j)
3493             memcpy (&t->level3[i << t->p], &t->level3[j << t->p],
3494                     (1 << t->p) * sizeof (uint32_t));
3495           k++;
3496         }
3497     }
3498   t->level3_size = k;
3499
3500   for (i = 0; i < (t->level2_size << t->q); i++)
3501     if (t->level2[i] != EMPTY)
3502       t->level2[i] = reorder3[t->level2[i]];
3503
3504   /* Uniquify level2 blocks.  */
3505   k = 0;
3506   for (j = 0; j < t->level2_size; j++)
3507     {
3508       for (i = 0; i < k; i++)
3509         if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q],
3510                     (1 << t->q) * sizeof (uint32_t)) == 0)
3511           break;
3512       /* Relocate block j to block i.  */
3513       reorder2[j] = i;
3514       if (i == k)
3515         {
3516           if (i != j)
3517             memcpy (&t->level2[i << t->q], &t->level2[j << t->q],
3518                     (1 << t->q) * sizeof (uint32_t));
3519           k++;
3520         }
3521     }
3522   t->level2_size = k;
3523
3524   for (i = 0; i < t->level1_size; i++)
3525     if (t->level1[i] != EMPTY)
3526       t->level1[i] = reorder2[t->level1[i]];
3527
3528   t->result_size =
3529     5 * sizeof (uint32_t)
3530     + t->level1_size * sizeof (uint32_t)
3531     + (t->level2_size << t->q) * sizeof (uint32_t)
3532     + (t->level3_size << t->p) * sizeof (uint32_t);
3533
3534   level2_offset =
3535     5 * sizeof (uint32_t)
3536     + t->level1_size * sizeof (uint32_t);
3537   level3_offset =
3538     5 * sizeof (uint32_t)
3539     + t->level1_size * sizeof (uint32_t)
3540     + (t->level2_size << t->q) * sizeof (uint32_t);
3541
3542   start_locale_structure (file);
3543   add_locale_uint32 (file, t->q + t->p + 5);
3544   add_locale_uint32 (file, t->level1_size);
3545   add_locale_uint32 (file, t->p + 5);
3546   add_locale_uint32 (file, (1 << t->q) - 1);
3547   add_locale_uint32 (file, (1 << t->p) - 1);
3548
3549   for (i = 0; i < t->level1_size; i++)
3550     add_locale_uint32
3551       (file,
3552        t->level1[i] == EMPTY
3553        ? 0
3554        : (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset);
3555
3556   for (i = 0; i < (t->level2_size << t->q); i++)
3557     add_locale_uint32
3558       (file,
3559        t->level2[i] == EMPTY
3560        ? 0
3561        : (t->level2[i] << t->p) * sizeof (uint32_t) + level3_offset);
3562
3563   add_locale_uint32_array (file, t->level3, t->level3_size << t->p);
3564   end_locale_structure (file);
3565
3566   if (t->level1_alloc > 0)
3567     free (t->level1);
3568   if (t->level2_alloc > 0)
3569     free (t->level2);
3570   if (t->level3_alloc > 0)
3571     free (t->level3);
3572 }
3573
3574 /* Flattens the included transliterations into a translit list.
3575    Inserts them in the list at `cursor', and returns the new cursor.  */
3576 static struct translit_t **
3577 translit_flatten (struct locale_ctype_t *ctype,
3578                   const struct charmap_t *charmap,
3579                   struct translit_t **cursor)
3580 {
3581   while (ctype->translit_include != NULL)
3582     {
3583       const char *copy_locale = ctype->translit_include->copy_locale;
3584       const char *copy_repertoire = ctype->translit_include->copy_repertoire;
3585       struct localedef_t *other;
3586
3587       /* Unchain the include statement.  During the depth-first traversal
3588          we don't want to visit any locale more than once.  */
3589       ctype->translit_include = ctype->translit_include->next;
3590
3591       other = find_locale (LC_CTYPE, copy_locale, copy_repertoire, charmap);
3592
3593       if (other == NULL || other->categories[LC_CTYPE].ctype == NULL)
3594         {
3595           WITH_CUR_LOCALE (error (0, 0, _("\
3596 %s: transliteration data from locale `%s' not available"),
3597                                   "LC_CTYPE", copy_locale));
3598         }
3599       else
3600         {
3601           struct locale_ctype_t *other_ctype =
3602             other->categories[LC_CTYPE].ctype;
3603
3604           cursor = translit_flatten (other_ctype, charmap, cursor);
3605           assert (other_ctype->translit_include == NULL);
3606
3607           if (other_ctype->translit != NULL)
3608             {
3609               /* Insert the other_ctype->translit list at *cursor.  */
3610               struct translit_t *endp = other_ctype->translit;
3611               while (endp->next != NULL)
3612                 endp = endp->next;
3613
3614               endp->next = *cursor;
3615               *cursor = other_ctype->translit;
3616
3617               /* Avoid any risk of circular lists.  */
3618               other_ctype->translit = NULL;
3619
3620               cursor = &endp->next;
3621             }
3622
3623           if (ctype->default_missing == NULL)
3624             ctype->default_missing = other_ctype->default_missing;
3625         }
3626     }
3627
3628   return cursor;
3629 }
3630
3631 static void
3632 allocate_arrays (struct locale_ctype_t *ctype, const struct charmap_t *charmap,
3633                  struct repertoire_t *repertoire)
3634 {
3635   size_t idx, nr;
3636   const void *key;
3637   size_t len;
3638   void *vdata;
3639   void *curs;
3640
3641   /* You wonder about this amount of memory?  This is only because some
3642      users do not manage to address the array with unsigned values or
3643      data types with range >= 256.  '\200' would result in the array
3644      index -128.  To help these poor people we duplicate the entries for
3645      128 up to 255 below the entry for \0.  */
3646   ctype->ctype_b = (char_class_t *) xcalloc (256 + 128, sizeof (char_class_t));
3647   ctype->ctype32_b = (char_class32_t *) xcalloc (256, sizeof (char_class32_t));
3648   ctype->class_b = (uint32_t **)
3649     xmalloc (ctype->nr_charclass * sizeof (uint32_t *));
3650   ctype->class_3level = (struct wctype_table *)
3651     xmalloc (ctype->nr_charclass * sizeof (struct wctype_table));
3652
3653   /* This is the array accessed using the multibyte string elements.  */
3654   for (idx = 0; idx < 256; ++idx)
3655     ctype->ctype_b[128 + idx] = ctype->class256_collection[idx];
3656
3657   /* Mirror first 127 entries.  We must take care that entry -1 is not
3658      mirrored because EOF == -1.  */
3659   for (idx = 0; idx < 127; ++idx)
3660     ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
3661
3662   /* The 32 bit array contains all characters < 0x100.  */
3663   for (idx = 0; idx < ctype->class_collection_act; ++idx)
3664     if (ctype->charnames[idx] < 0x100)
3665       ctype->ctype32_b[ctype->charnames[idx]] = ctype->class_collection[idx];
3666
3667   for (nr = 0; nr < ctype->nr_charclass; nr++)
3668     {
3669       ctype->class_b[nr] = (uint32_t *) xcalloc (256 / 32, sizeof (uint32_t));
3670
3671       /* We only set CLASS_B for the bits in the ISO C classes, not
3672          the user defined classes.  The number should not change but
3673          who knows.  */
3674 #define LAST_ISO_C_BIT 11
3675       if (nr <= LAST_ISO_C_BIT)
3676         for (idx = 0; idx < 256; ++idx)
3677           if (ctype->class256_collection[idx] & _ISbit (nr))
3678             ctype->class_b[nr][idx >> 5] |= (uint32_t) 1 << (idx & 0x1f);
3679     }
3680
3681   for (nr = 0; nr < ctype->nr_charclass; nr++)
3682     {
3683       struct wctype_table *t;
3684
3685       t = &ctype->class_3level[nr];
3686       t->p = 4; /* or: 5 */
3687       t->q = 7; /* or: 6 */
3688       wctype_table_init (t);
3689
3690       for (idx = 0; idx < ctype->class_collection_act; ++idx)
3691         if (ctype->class_collection[idx] & _ISwbit (nr))
3692           wctype_table_add (t, ctype->charnames[idx]);
3693
3694       if (verbose)
3695         WITH_CUR_LOCALE (fprintf (stderr, _("\
3696 %s: table for class \"%s\": %lu bytes\n"),
3697                                  "LC_CTYPE", ctype->classnames[nr],
3698                                  (unsigned long int) t->result_size));
3699     }
3700
3701   /* Room for table of mappings.  */
3702   ctype->map_b = (uint32_t **) xmalloc (2 * sizeof (uint32_t *));
3703   ctype->map32_b = (uint32_t **) xmalloc (ctype->map_collection_nr
3704                                           * sizeof (uint32_t *));
3705   ctype->map_3level = (struct wctrans_table *)
3706     xmalloc (ctype->map_collection_nr * sizeof (struct wctrans_table));
3707
3708   /* Fill in all mappings.  */
3709   for (idx = 0; idx < 2; ++idx)
3710     {
3711       unsigned int idx2;
3712
3713       /* Allocate table.  */
3714       ctype->map_b[idx] = (uint32_t *)
3715         xmalloc ((256 + 128) * sizeof (uint32_t));
3716
3717       /* Copy values from collection.  */
3718       for (idx2 = 0; idx2 < 256; ++idx2)
3719         ctype->map_b[idx][128 + idx2] = ctype->map256_collection[idx][idx2];
3720
3721       /* Mirror first 127 entries.  We must take care not to map entry
3722          -1 because EOF == -1.  */
3723       for (idx2 = 0; idx2 < 127; ++idx2)
3724         ctype->map_b[idx][idx2] = ctype->map_b[idx][256 + idx2];
3725
3726       /* EOF must map to EOF.  */
3727       ctype->map_b[idx][127] = EOF;
3728     }
3729
3730   for (idx = 0; idx < ctype->map_collection_nr; ++idx)
3731     {
3732       unsigned int idx2;
3733
3734       /* Allocate table.  */
3735       ctype->map32_b[idx] = (uint32_t *) xmalloc (256 * sizeof (uint32_t));
3736
3737       /* Copy values from collection.  Default is identity mapping.  */
3738       for (idx2 = 0; idx2 < 256; ++idx2)
3739         ctype->map32_b[idx][idx2] =
3740           (ctype->map_collection[idx][idx2] != 0
3741            ? ctype->map_collection[idx][idx2]
3742            : idx2);
3743     }
3744
3745   for (nr = 0; nr < ctype->map_collection_nr; nr++)
3746     {
3747       struct wctrans_table *t;
3748
3749       t = &ctype->map_3level[nr];
3750       t->p = 7;
3751       t->q = 9;
3752       wctrans_table_init (t);
3753
3754       for (idx = 0; idx < ctype->map_collection_act[nr]; ++idx)
3755         if (ctype->map_collection[nr][idx] != 0)
3756           wctrans_table_add (t, ctype->charnames[idx],
3757                              ctype->map_collection[nr][idx]);
3758
3759       if (verbose)
3760         WITH_CUR_LOCALE (fprintf (stderr, _("\
3761 %s: table for map \"%s\": %lu bytes\n"),
3762                                  "LC_CTYPE", ctype->mapnames[nr],
3763                                  (unsigned long int) t->result_size));
3764     }
3765
3766   /* Extra array for class and map names.  */
3767   ctype->class_name_ptr = (uint32_t *) xmalloc (ctype->nr_charclass
3768                                                 * sizeof (uint32_t));
3769   ctype->map_name_ptr = (uint32_t *) xmalloc (ctype->map_collection_nr
3770                                               * sizeof (uint32_t));
3771
3772   ctype->class_offset = _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
3773   ctype->map_offset = ctype->class_offset + ctype->nr_charclass;
3774
3775   /* Array for width information.  Because the expected widths are very
3776      small (never larger than 2) we use only one single byte.  This
3777      saves space.
3778      We put only printable characters in the table.  wcwidth is specified
3779      to return -1 for non-printable characters.  Doing the check here
3780      saves a run-time check.
3781      But we put L'\0' in the table.  This again saves a run-time check.  */
3782   {
3783     struct wcwidth_table *t;
3784
3785     t = &ctype->width;
3786     t->p = 7;
3787     t->q = 9;
3788     wcwidth_table_init (t);
3789
3790     /* First set all the printable characters of the character set to
3791        the default width.  */
3792     curs = NULL;
3793     while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
3794       {
3795         struct charseq *data = (struct charseq *) vdata;
3796
3797         if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
3798           data->ucs4 = repertoire_find_value (ctype->repertoire,
3799                                               data->name, len);
3800
3801         if (data->ucs4 != ILLEGAL_CHAR_VALUE)
3802           {
3803             uint32_t *class_bits =
3804               find_idx (ctype, &ctype->class_collection, NULL,
3805                         &ctype->class_collection_act, data->ucs4);
3806
3807             if (class_bits != NULL && (*class_bits & BITw (tok_print)))
3808               wcwidth_table_add (t, data->ucs4, charmap->width_default);
3809           }
3810       }
3811
3812     /* Now add the explicitly specified widths.  */
3813     if (charmap->width_rules != NULL)
3814       for (size_t cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
3815         {
3816           unsigned char bytes[charmap->mb_cur_max];
3817           int nbytes = charmap->width_rules[cnt].from->nbytes;
3818
3819           /* We have the range of character for which the width is
3820              specified described using byte sequences of the multibyte
3821              charset.  We have to convert this to UCS4 now.  And we
3822              cannot simply convert the beginning and the end of the
3823              sequence, we have to iterate over the byte sequence and
3824              convert it for every single character.  */
3825           memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
3826
3827           while (nbytes < charmap->width_rules[cnt].to->nbytes
3828                  || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
3829                             nbytes) <= 0)
3830             {
3831               /* Find the UCS value for `bytes'.  */
3832               int inner;
3833               uint32_t wch;
3834               struct charseq *seq =
3835                 charmap_find_symbol (charmap, (char *) bytes, nbytes);
3836
3837               if (seq == NULL)
3838                 wch = ILLEGAL_CHAR_VALUE;
3839               else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
3840                 wch = seq->ucs4;
3841               else
3842                 wch = repertoire_find_value (ctype->repertoire, seq->name,
3843                                              strlen (seq->name));
3844
3845               if (wch != ILLEGAL_CHAR_VALUE)
3846                 {
3847                   /* Store the value.  */
3848                   uint32_t *class_bits =
3849                     find_idx (ctype, &ctype->class_collection, NULL,
3850                               &ctype->class_collection_act, wch);
3851
3852                   if (class_bits != NULL && (*class_bits & BITw (tok_print)))
3853                     wcwidth_table_add (t, wch,
3854                                        charmap->width_rules[cnt].width);
3855                 }
3856
3857               /* "Increment" the bytes sequence.  */
3858               inner = nbytes - 1;
3859               while (inner >= 0 && bytes[inner] == 0xff)
3860                 --inner;
3861
3862               if (inner < 0)
3863                 {
3864                   /* We have to extend the byte sequence.  */
3865                   if (nbytes >= charmap->width_rules[cnt].to->nbytes)
3866                     break;
3867
3868                   bytes[0] = 1;
3869                   memset (&bytes[1], 0, nbytes);
3870                   ++nbytes;
3871                 }
3872               else
3873                 {
3874                   ++bytes[inner];
3875                   while (++inner < nbytes)
3876                     bytes[inner] = 0;
3877                 }
3878             }
3879         }
3880
3881     /* Set the width of L'\0' to 0.  */
3882     wcwidth_table_add (t, 0, 0);
3883
3884     if (verbose)
3885       WITH_CUR_LOCALE (fprintf (stderr, _("%s: table for width: %lu bytes\n"),
3886                                "LC_CTYPE", (unsigned long int) t->result_size));
3887   }
3888
3889   /* Set MB_CUR_MAX.  */
3890   ctype->mb_cur_max = charmap->mb_cur_max;
3891
3892   /* Now determine the table for the transliteration information.
3893
3894      XXX It is not yet clear to me whether it is worth implementing a
3895      complicated algorithm which uses a hash table to locate the entries.
3896      For now I'll use a simple array which can be searching using binary
3897      search.  */
3898   if (ctype->translit_include != NULL)
3899     /* Traverse the locales mentioned in the `include' statements in a
3900        depth-first way and fold in their transliteration information.  */
3901     translit_flatten (ctype, charmap, &ctype->translit);
3902
3903   if (ctype->translit != NULL)
3904     {
3905       /* First count how many entries we have.  This is the upper limit
3906          since some entries from the included files might be overwritten.  */
3907       size_t number = 0;
3908       struct translit_t *runp = ctype->translit;
3909       struct translit_t **sorted;
3910       size_t from_len, to_len;
3911
3912       while (runp != NULL)
3913         {
3914           ++number;
3915           runp = runp->next;
3916         }
3917
3918       /* Next we allocate an array large enough and fill in the values.  */
3919       sorted = (struct translit_t **) alloca (number
3920                                               * sizeof (struct translit_t **));
3921       runp = ctype->translit;
3922       number = 0;
3923       do
3924         {
3925           /* Search for the place where to insert this string.
3926              XXX Better use a real sorting algorithm later.  */
3927           size_t idx = 0;
3928           int replace = 0;
3929
3930           while (idx < number)
3931             {
3932               int res = wcscmp ((const wchar_t *) sorted[idx]->from,
3933                                 (const wchar_t *) runp->from);
3934               if (res == 0)
3935                 {
3936                   replace = 1;
3937                   break;
3938                 }
3939               if (res > 0)
3940                 break;
3941               ++idx;
3942             }
3943
3944           if (replace)
3945             sorted[idx] = runp;
3946           else
3947             {
3948               memmove (&sorted[idx + 1], &sorted[idx],
3949                        (number - idx) * sizeof (struct translit_t *));
3950               sorted[idx] = runp;
3951               ++number;
3952             }
3953
3954           runp = runp->next;
3955         }
3956       while (runp != NULL);
3957
3958       /* The next step is putting all the possible transliteration
3959          strings in one memory block so that we can write it out.
3960          We need several different blocks:
3961          - index to the from-string array
3962          - from-string array
3963          - index to the to-string array
3964          - to-string array.
3965       */
3966       from_len = to_len = 0;
3967       for (size_t cnt = 0; cnt < number; ++cnt)
3968         {
3969           struct translit_to_t *srunp;
3970           from_len += wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
3971           srunp = sorted[cnt]->to;
3972           while (srunp != NULL)
3973             {
3974               to_len += wcslen ((const wchar_t *) srunp->str) + 1;
3975               srunp = srunp->next;
3976             }
3977           /* Plus one for the extra NUL character marking the end of
3978              the list for the current entry.  */
3979           ++to_len;
3980         }
3981
3982       /* We can allocate the arrays for the results.  */
3983       ctype->translit_from_idx = xmalloc (number * sizeof (uint32_t));
3984       ctype->translit_from_tbl = xmalloc (from_len * sizeof (uint32_t));
3985       ctype->translit_to_idx = xmalloc (number * sizeof (uint32_t));
3986       ctype->translit_to_tbl = xmalloc (to_len * sizeof (uint32_t));
3987
3988       from_len = 0;
3989       to_len = 0;
3990       for (size_t cnt = 0; cnt < number; ++cnt)
3991         {
3992           size_t len;
3993           struct translit_to_t *srunp;
3994
3995           ctype->translit_from_idx[cnt] = from_len;
3996           ctype->translit_to_idx[cnt] = to_len;
3997
3998           len = wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
3999           wmemcpy ((wchar_t *) &ctype->translit_from_tbl[from_len],
4000                    (const wchar_t *) sorted[cnt]->from, len);
4001           from_len += len;
4002
4003           ctype->translit_to_idx[cnt] = to_len;
4004           srunp = sorted[cnt]->to;
4005           while (srunp != NULL)
4006             {
4007               len = wcslen ((const wchar_t *) srunp->str) + 1;
4008               wmemcpy ((wchar_t *) &ctype->translit_to_tbl[to_len],
4009                        (const wchar_t *) srunp->str, len);
4010               to_len += len;
4011               srunp = srunp->next;
4012             }
4013           ctype->translit_to_tbl[to_len++] = L'\0';
4014         }
4015
4016       /* Store the information about the length.  */
4017       ctype->translit_idx_size = number;
4018       ctype->translit_from_tbl_size = from_len * sizeof (uint32_t);
4019       ctype->translit_to_tbl_size = to_len * sizeof (uint32_t);
4020     }
4021   else
4022     {
4023       ctype->translit_from_idx = no_str;
4024       ctype->translit_from_tbl = no_str;
4025       ctype->translit_to_tbl = no_str;
4026       ctype->translit_idx_size = 0;
4027       ctype->translit_from_tbl_size = 0;
4028       ctype->translit_to_tbl_size = 0;
4029     }
4030 }