locale/programs/ld-ctype.c

   1 /* Copyright (C) 1995-1999, 2000, 2001 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3    Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Lesser General Public
   7    License as published by the Free Software Foundation; either
   8    version 2.1 of the License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Lesser General Public License for more details.
  14
  15    You should have received a copy of the GNU Lesser General Public
  16    License along with the GNU C Library; if not, write to the Free
  17    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  18    02111-1307 USA.  */
  19
  20 #ifdef HAVE_CONFIG_H
  21 # include <config.h>
  22 #endif
  23
  24 #include <alloca.h>
  25 #include <byteswap.h>
  26 #include <endian.h>
  27 #include <errno.h>
  28 #include <limits.h>
  29 #include <obstack.h>
  30 #include <stdlib.h>
  31 #include <string.h>
  32 #include <wchar.h>
  33 #include <wctype.h>
  34 #include <sys/uio.h>
  35
  36 #include "charmap.h"
  37 #include "localeinfo.h"
  38 #include "langinfo.h"
  39 #include "linereader.h"
  40 #include "locfile-token.h"
  41 #include "locfile.h"
  42 #include "localedef.h"
  43
  44 #include <assert.h>
  45
  46
  47 #ifdef PREDEFINED_CLASSES
  48 /* These are the extra bits not in wctype.h since these are not preallocated
  49    classes.  */
  50 # define _ISwspecial1   (1 << 29)
  51 # define _ISwspecial2   (1 << 30)
  52 # define _ISwspecial3   (1 << 31)
  53 #endif
  54
  55
  56 /* The bit used for representing a special class.  */
  57 #define BITPOS(class) ((class) - tok_upper)
  58 #define BIT(class) (_ISbit (BITPOS (class)))
  59 #define BITw(class) (_ISwbit (BITPOS (class)))
  60
  61 #define ELEM(ctype, collection, idx, value)                                   \
  62   *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx,     \
  63              &ctype->collection##_act idx, value)
  64
  65
  66 /* To be compatible with former implementations we for now restrict
  67    the number of bits for character classes to 16.  When compatibility
  68    is not necessary anymore increase the number to 32.  */
  69 #define char_class_t uint16_t
  70 #define char_class32_t uint32_t
  71
  72
  73 /* Type to describe a transliteration action.  We have a possibly
  74    multiple character from-string and a set of multiple character
  75    to-strings.  All are 32bit values since this is what is used in
  76    the gconv functions.  */
  77 struct translit_to_t
  78 {
  79   uint32_t *str;
  80
  81   struct translit_to_t *next;
  82 };
  83
  84 struct translit_t
  85 {
  86   uint32_t *from;
  87
  88   const char *fname;
  89   size_t lineno;
  90
  91   struct translit_to_t *to;
  92
  93   struct translit_t *next;
  94 };
  95
  96 struct translit_ignore_t
  97 {
  98   uint32_t from;
  99   uint32_t to;
 100   uint32_t step;
 101
 102   const char *fname;
 103   size_t lineno;
 104
 105   struct translit_ignore_t *next;
 106 };
 107
 108
 109 /* Type to describe a transliteration include statement.  */
 110 struct translit_include_t
 111 {
 112   const char *copy_locale;
 113   const char *copy_repertoire;
 114
 115   struct translit_include_t *next;
 116 };
 117
 118
 119 /* Sparse table of uint32_t.  */
 120 #define TABLE idx_table
 121 #define ELEMENT uint32_t
 122 #define DEFAULT ((uint32_t) ~0)
 123 #define NO_FINALIZE
 124 #include "3level.h"
 125
 126
 127 /* The real definition of the struct for the LC_CTYPE locale.  */
 128 struct locale_ctype_t
 129 {
 130   uint32_t *charnames;
 131   size_t charnames_max;
 132   size_t charnames_act;
 133   /* An index lookup table, to speedup find_idx.  */
 134   struct idx_table charnames_idx;
 135
 136   struct repertoire_t *repertoire;
 137
 138   /* We will allow up to 8 * sizeof (uint32_t) character classes.  */
 139 #define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
 140   size_t nr_charclass;
 141   const char *classnames[MAX_NR_CHARCLASS];
 142   uint32_t last_class_char;
 143   uint32_t class256_collection[256];
 144   uint32_t *class_collection;
 145   size_t class_collection_max;
 146   size_t class_collection_act;
 147   uint32_t class_done;
 148   uint32_t class_offset;
 149
 150   struct charseq **mbdigits;
 151   size_t mbdigits_act;
 152   size_t mbdigits_max;
 153   uint32_t *wcdigits;
 154   size_t wcdigits_act;
 155   size_t wcdigits_max;
 156
 157   struct charseq *mboutdigits[10];
 158   uint32_t wcoutdigits[10];
 159   size_t outdigits_act;
 160
 161   /* If the following number ever turns out to be too small simply
 162      increase it.  But I doubt it will.  --drepper@gnu */
 163 #define MAX_NR_CHARMAP 16
 164   const char *mapnames[MAX_NR_CHARMAP];
 165   uint32_t *map_collection[MAX_NR_CHARMAP];
 166   uint32_t map256_collection[2][256];
 167   size_t map_collection_max[MAX_NR_CHARMAP];
 168   size_t map_collection_act[MAX_NR_CHARMAP];
 169   size_t map_collection_nr;
 170   size_t last_map_idx;
 171   int tomap_done[MAX_NR_CHARMAP];
 172   uint32_t map_offset;
 173
 174   /* Transliteration information.  */
 175   struct translit_include_t *translit_include;
 176   struct translit_t *translit;
 177   struct translit_ignore_t *translit_ignore;
 178   uint32_t ntranslit_ignore;
 179
 180   uint32_t *default_missing;
 181   const char *default_missing_file;
 182   size_t default_missing_lineno;
 183
 184   /* The arrays for the binary representation.  */
 185   char_class_t *ctype_b;
 186   char_class32_t *ctype32_b;
 187   uint32_t **map_b;
 188   uint32_t **map32_b;
 189   uint32_t **class_b;
 190   struct iovec *class_3level;
 191   struct iovec *map_3level;
 192   uint32_t *class_name_ptr;
 193   uint32_t *map_name_ptr;
 194   struct iovec width;
 195   uint32_t mb_cur_max;
 196   const char *codeset_name;
 197   uint32_t *translit_from_idx;
 198   uint32_t *translit_from_tbl;
 199   uint32_t *translit_to_idx;
 200   uint32_t *translit_to_tbl;
 201   uint32_t translit_idx_size;
 202   size_t translit_from_tbl_size;
 203   size_t translit_to_tbl_size;
 204
 205   struct obstack mempool;
 206 };
 207
 208
 209 /* Marker for an empty slot.  This has the value 0xFFFFFFFF, regardless
 210    whether 'int' is 16 bit, 32 bit, or 64 bit.  */
 211 #define EMPTY ((uint32_t) ~0)
 212
 213
 214 #define obstack_chunk_alloc xmalloc
 215 #define obstack_chunk_free free
 216
 217
 218 /* Prototypes for local functions.  */
 219 static void ctype_startup (struct linereader *lr, struct localedef_t *locale,
 220                            const struct charmap_t *charmap,
 221                            struct localedef_t *copy_locale,
 222                            int ignore_content);
 223 static void ctype_class_new (struct linereader *lr,
 224                              struct locale_ctype_t *ctype, const char *name);
 225 static void ctype_map_new (struct linereader *lr,
 226                            struct locale_ctype_t *ctype,
 227                            const char *name, const struct charmap_t *charmap);
 228 static uint32_t *find_idx (struct locale_ctype_t *ctype, uint32_t **table,
 229                            size_t *max, size_t *act, unsigned int idx);
 230 static void set_class_defaults (struct locale_ctype_t *ctype,
 231                                 const struct charmap_t *charmap,
 232                                 struct repertoire_t *repertoire);
 233 static void allocate_arrays (struct locale_ctype_t *ctype,
 234                              const struct charmap_t *charmap,
 235                              struct repertoire_t *repertoire);
 236
 237
 238 static const char *longnames[] =
 239 {
 240   "zero", "one", "two", "three", "four",
 241   "five", "six", "seven", "eight", "nine"
 242 };
 243 static const char *uninames[] =
 244 {
 245   "U00000030", "U00000031", "U00000032", "U00000033", "U00000034",
 246   "U00000035", "U00000036", "U00000037", "U00000038", "U00000039"
 247 };
 248 static const unsigned char digits[] = "0123456789";
 249
 250
 251 static void
 252 ctype_startup (struct linereader *lr, struct localedef_t *locale,
 253                const struct charmap_t *charmap,
 254                struct localedef_t *copy_locale, int ignore_content)
 255 {
 256   unsigned int cnt;
 257   struct locale_ctype_t *ctype;
 258
 259   if (!ignore_content && locale->categories[LC_CTYPE].ctype == NULL)
 260     {
 261       if (copy_locale == NULL)
 262         {
 263           /* Allocate the needed room.  */
 264           locale->categories[LC_CTYPE].ctype = ctype =
 265             (struct locale_ctype_t *) xcalloc (1,
 266                                                sizeof (struct locale_ctype_t));
 267
 268           /* We have seen no names yet.  */
 269           ctype->charnames_max = charmap->mb_cur_max == 1 ? 256 : 512;
 270           ctype->charnames =
 271             (unsigned int *) xmalloc (ctype->charnames_max
 272                                       * sizeof (unsigned int));
 273           for (cnt = 0; cnt < 256; ++cnt)
 274             ctype->charnames[cnt] = cnt;
 275           ctype->charnames_act = 256;
 276           idx_table_init (&ctype->charnames_idx);
 277
 278           /* Fill character class information.  */
 279           ctype->last_class_char = ILLEGAL_CHAR_VALUE;
 280           /* The order of the following instructions determines the bit
 281              positions!  */
 282           ctype_class_new (lr, ctype, "upper");
 283           ctype_class_new (lr, ctype, "lower");
 284           ctype_class_new (lr, ctype, "alpha");
 285           ctype_class_new (lr, ctype, "digit");
 286           ctype_class_new (lr, ctype, "xdigit");
 287           ctype_class_new (lr, ctype, "space");
 288           ctype_class_new (lr, ctype, "print");
 289           ctype_class_new (lr, ctype, "graph");
 290           ctype_class_new (lr, ctype, "blank");
 291           ctype_class_new (lr, ctype, "cntrl");
 292           ctype_class_new (lr, ctype, "punct");
 293           ctype_class_new (lr, ctype, "alnum");
 294 #ifdef PREDEFINED_CLASSES
 295           /* The following are extensions from ISO 14652.  */
 296           ctype_class_new (lr, ctype, "left_to_right");
 297           ctype_class_new (lr, ctype, "right_to_left");
 298           ctype_class_new (lr, ctype, "num_terminator");
 299           ctype_class_new (lr, ctype, "num_separator");
 300           ctype_class_new (lr, ctype, "segment_separator");
 301           ctype_class_new (lr, ctype, "block_separator");
 302           ctype_class_new (lr, ctype, "direction_control");
 303           ctype_class_new (lr, ctype, "sym_swap_layout");
 304           ctype_class_new (lr, ctype, "char_shape_selector");
 305           ctype_class_new (lr, ctype, "num_shape_selector");
 306           ctype_class_new (lr, ctype, "non_spacing");
 307           ctype_class_new (lr, ctype, "non_spacing_level3");
 308           ctype_class_new (lr, ctype, "normal_connect");
 309           ctype_class_new (lr, ctype, "r_connect");
 310           ctype_class_new (lr, ctype, "no_connect");
 311           ctype_class_new (lr, ctype, "no_connect-space");
 312           ctype_class_new (lr, ctype, "vowel_connect");
 313 #endif
 314
 315           ctype->class_collection_max = charmap->mb_cur_max == 1 ? 256 : 512;
 316           ctype->class_collection
 317             = (uint32_t *) xcalloc (sizeof (unsigned long int),
 318                                     ctype->class_collection_max);
 319           ctype->class_collection_act = 256;
 320
 321           /* Fill character map information.  */
 322           ctype->last_map_idx = MAX_NR_CHARMAP;
 323           ctype_map_new (lr, ctype, "toupper", charmap);
 324           ctype_map_new (lr, ctype, "tolower", charmap);
 325 #ifdef PREDEFINED_CLASSES
 326           ctype_map_new (lr, ctype, "tosymmetric", charmap);
 327 #endif
 328
 329           /* Fill first 256 entries in `toXXX' arrays.  */
 330           for (cnt = 0; cnt < 256; ++cnt)
 331             {
 332               ctype->map_collection[0][cnt] = cnt;
 333               ctype->map_collection[1][cnt] = cnt;
 334 #ifdef PREDEFINED_CLASSES
 335               ctype->map_collection[2][cnt] = cnt;
 336 #endif
 337               ctype->map256_collection[0][cnt] = cnt;
 338               ctype->map256_collection[1][cnt] = cnt;
 339             }
 340
 341           obstack_init (&ctype->mempool);
 342         }
 343       else
 344         ctype = locale->categories[LC_CTYPE].ctype =
 345           copy_locale->categories[LC_CTYPE].ctype;
 346     }
 347 }
 348
 349
 350 void
 351 ctype_finish (struct localedef_t *locale, const struct charmap_t *charmap)
 352 {
 353   /* See POSIX.2, table 2-6 for the meaning of the following table.  */
 354 #define NCLASS 12
 355   static const struct
 356   {
 357     const char *name;
 358     const char allow[NCLASS];
 359   }
 360   valid_table[NCLASS] =
 361   {
 362     /* The order is important.  See token.h for more information.
 363        M = Always, D = Default, - = Permitted, X = Mutually exclusive  */
 364     { "upper",  "--MX-XDDXXX-" },
 365     { "lower",  "--MX-XDDXXX-" },
 366     { "alpha",  "---X-XDDXXX-" },
 367     { "digit",  "XXX--XDDXXX-" },
 368     { "xdigit", "-----XDDXXX-" },
 369     { "space",  "XXXXX------X" },
 370     { "print",  "---------X--" },
 371     { "graph",  "---------X--" },
 372     { "blank",  "XXXXXM-----X" },
 373     { "cntrl",  "XXXXX-XX--XX" },
 374     { "punct",  "XXXXX-DD-X-X" },
 375     { "alnum",  "-----XDDXXX-" }
 376   };
 377   size_t cnt;
 378   int cls1, cls2;
 379   uint32_t space_value;
 380   struct charseq *space_seq;
 381   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 382   int warned;
 383   const void *key;
 384   size_t len;
 385   void *vdata;
 386   void *curs;
 387
 388   /* Now resolve copying and also handle completely missing definitions.  */
 389   if (ctype == NULL)
 390     {
 391       const char *repertoire_name;
 392
 393       /* First see whether we were supposed to copy.  If yes, find the
 394          actual definition.  */
 395       if (locale->copy_name[LC_CTYPE] != NULL)
 396         {
 397           /* Find the copying locale.  This has to happen transitively since
 398              the locale we are copying from might also copying another one.  */
 399           struct localedef_t *from = locale;
 400
 401           do
 402             from = find_locale (LC_CTYPE, from->copy_name[LC_CTYPE],
 403                                 from->repertoire_name, charmap);
 404           while (from->categories[LC_CTYPE].ctype == NULL
 405                  && from->copy_name[LC_CTYPE] != NULL);
 406
 407           ctype = locale->categories[LC_CTYPE].ctype
 408             = from->categories[LC_CTYPE].ctype;
 409         }
 410
 411       /* If there is still no definition issue an warning and create an
 412          empty one.  */
 413       if (ctype == NULL)
 414         {
 415           if (! be_quiet)
 416             error (0, 0, _("No definition for %s category found"), "LC_CTYPE");
 417           ctype_startup (NULL, locale, charmap, NULL, 0);
 418           ctype = locale->categories[LC_CTYPE].ctype;
 419         }
 420
 421       /* Get the repertoire we have to use.  */
 422       repertoire_name = locale->repertoire_name ?: repertoire_global;
 423       if (repertoire_name != NULL)
 424         ctype->repertoire = repertoire_read (repertoire_name);
 425     }
 426
 427   /* We need the name of the currently used 8-bit character set to
 428      make correct conversion between this 8-bit representation and the
 429      ISO 10646 character set used internally for wide characters.  */
 430   ctype->codeset_name = charmap->code_set_name;
 431   if (ctype->codeset_name == NULL)
 432     {
 433       if (! be_quiet)
 434         error (0, 0, _("No character set name specified in charmap"));
 435       ctype->codeset_name = "//UNKNOWN//";
 436     }
 437
 438   /* Set default value for classes not specified.  */
 439   set_class_defaults (ctype, charmap, ctype->repertoire);
 440
 441   /* Check according to table.  */
 442   for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
 443     {
 444       uint32_t tmp = ctype->class_collection[cnt];
 445
 446       if (tmp != 0)
 447         {
 448           for (cls1 = 0; cls1 < NCLASS; ++cls1)
 449             if ((tmp & _ISwbit (cls1)) != 0)
 450               for (cls2 = 0; cls2 < NCLASS; ++cls2)
 451                 if (valid_table[cls1].allow[cls2] != '-')
 452                   {
 453                     int eq = (tmp & _ISwbit (cls2)) != 0;
 454                     switch (valid_table[cls1].allow[cls2])
 455                       {
 456                       case 'M':
 457                         if (!eq)
 458                           {
 459                             uint32_t value = ctype->charnames[cnt];
 460
 461                             if (!be_quiet)
 462                               error (0, 0, _("\
 463 character L'\\u%0*x' in class `%s' must be in class `%s'"),
 464                                      value > 0xffff ? 8 : 4, value,
 465                                      valid_table[cls1].name,
 466                                      valid_table[cls2].name);
 467                           }
 468                         break;
 469
 470                       case 'X':
 471                         if (eq)
 472                           {
 473                             uint32_t value = ctype->charnames[cnt];
 474
 475                             if (!be_quiet)
 476                               error (0, 0, _("\
 477 character L'\\u%0*x' in class `%s' must not be in class `%s'"),
 478                                      value > 0xffff ? 8 : 4, value,
 479                                      valid_table[cls1].name,
 480                                      valid_table[cls2].name);
 481                           }
 482                         break;
 483
 484                       case 'D':
 485                         ctype->class_collection[cnt] |= _ISwbit (cls2);
 486                         break;
 487
 488                       default:
 489                         error (5, 0, _("internal error in %s, line %u"),
 490                                __FUNCTION__, __LINE__);
 491                       }
 492                   }
 493         }
 494     }
 495
 496   for (cnt = 0; cnt < 256; ++cnt)
 497     {
 498       uint32_t tmp = ctype->class256_collection[cnt];
 499
 500       if (tmp != 0)
 501         {
 502           for (cls1 = 0; cls1 < NCLASS; ++cls1)
 503             if ((tmp & _ISbit (cls1)) != 0)
 504               for (cls2 = 0; cls2 < NCLASS; ++cls2)
 505                 if (valid_table[cls1].allow[cls2] != '-')
 506                   {
 507                     int eq = (tmp & _ISbit (cls2)) != 0;
 508                     switch (valid_table[cls1].allow[cls2])
 509                       {
 510                       case 'M':
 511                         if (!eq)
 512                           {
 513                             char buf[17];
 514
 515                             snprintf (buf, sizeof buf, "\\%Zo", cnt);
 516
 517                             if (!be_quiet)
 518                               error (0, 0, _("\
 519 character '%s' in class `%s' must be in class `%s'"),
 520                                      buf, valid_table[cls1].name,
 521                                      valid_table[cls2].name);
 522                           }
 523                         break;
 524
 525                       case 'X':
 526                         if (eq)
 527                           {
 528                             char buf[17];
 529
 530                             snprintf (buf, sizeof buf, "\\%Zo", cnt);
 531
 532                             if (!be_quiet)
 533                               error (0, 0, _("\
 534 character '%s' in class `%s' must not be in class `%s'"),
 535                                      buf, valid_table[cls1].name,
 536                                      valid_table[cls2].name);
 537                           }
 538                         break;
 539
 540                       case 'D':
 541                         ctype->class256_collection[cnt] |= _ISbit (cls2);
 542                         break;
 543
 544                       default:
 545                         error (5, 0, _("internal error in %s, line %u"),
 546                                __FUNCTION__, __LINE__);
 547                       }
 548                   }
 549         }
 550     }
 551
 552   /* ... and now test <SP> as a special case.  */
 553   space_value = 32;
 554   if (((cnt = BITPOS (tok_space),
 555         (ELEM (ctype, class_collection, , space_value)
 556          & BITw (tok_space)) == 0)
 557        || (cnt = BITPOS (tok_blank),
 558            (ELEM (ctype, class_collection, , space_value)
 559             & BITw (tok_blank)) == 0)))
 560     {
 561       if (!be_quiet)
 562         error (0, 0, _("<SP> character not in class `%s'"),
 563                valid_table[cnt].name);
 564     }
 565   else if (((cnt = BITPOS (tok_punct),
 566              (ELEM (ctype, class_collection, , space_value)
 567               & BITw (tok_punct)) != 0)
 568             || (cnt = BITPOS (tok_graph),
 569                 (ELEM (ctype, class_collection, , space_value)
 570                  & BITw (tok_graph))
 571                 != 0)))
 572     {
 573       if (!be_quiet)
 574         error (0, 0, _("<SP> character must not be in class `%s'"),
 575                valid_table[cnt].name);
 576     }
 577   else
 578     ELEM (ctype, class_collection, , space_value) |= BITw (tok_print);
 579
 580   space_seq = charmap_find_value (charmap, "SP", 2);
 581   if (space_seq == NULL)
 582     space_seq = charmap_find_value (charmap, "space", 5);
 583   if (space_seq == NULL)
 584     space_seq = charmap_find_value (charmap, "U00000020", 9);
 585   if (space_seq == NULL || space_seq->nbytes != 1)
 586     {
 587       if (!be_quiet)
 588         error (0, 0, _("character <SP> not defined in character map"));
 589     }
 590   else if (((cnt = BITPOS (tok_space),
 591              (ctype->class256_collection[space_seq->bytes[0]]
 592               & BIT (tok_space)) == 0)
 593             || (cnt = BITPOS (tok_blank),
 594                 (ctype->class256_collection[space_seq->bytes[0]]
 595                  & BIT (tok_blank)) == 0)))
 596     {
 597       if (!be_quiet)
 598         error (0, 0, _("<SP> character not in class `%s'"),
 599                valid_table[cnt].name);
 600     }
 601   else if (((cnt = BITPOS (tok_punct),
 602              (ctype->class256_collection[space_seq->bytes[0]]
 603               & BIT (tok_punct)) != 0)
 604             || (cnt = BITPOS (tok_graph),
 605                 (ctype->class256_collection[space_seq->bytes[0]]
 606                  & BIT (tok_graph)) != 0)))
 607     {
 608       if (!be_quiet)
 609         error (0, 0, _("<SP> character must not be in class `%s'"),
 610                valid_table[cnt].name);
 611     }
 612   else
 613     ctype->class256_collection[space_seq->bytes[0]] |= BIT (tok_print);
 614
 615   /* Now that the tests are done make sure the name array contains all
 616      characters which are handled in the WIDTH section of the
 617      character set definition file.  */
 618   if (charmap->width_rules != NULL)
 619     for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
 620       {
 621         unsigned char bytes[charmap->mb_cur_max];
 622         int nbytes = charmap->width_rules[cnt].from->nbytes;
 623
 624         /* We have the range of character for which the width is
 625            specified described using byte sequences of the multibyte
 626            charset.  We have to convert this to UCS4 now.  And we
 627            cannot simply convert the beginning and the end of the
 628            sequence, we have to iterate over the byte sequence and
 629            convert it for every single character.  */
 630         memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
 631
 632         while (nbytes < charmap->width_rules[cnt].to->nbytes
 633                || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
 634                           nbytes) <= 0)
 635           {
 636             /* Find the UCS value for `bytes'.  */
 637             int inner;
 638             uint32_t wch;
 639             struct charseq *seq = charmap_find_symbol (charmap, bytes, nbytes);
 640
 641             if (seq == NULL)
 642               wch = ILLEGAL_CHAR_VALUE;
 643             else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
 644               wch = seq->ucs4;
 645             else
 646               wch = repertoire_find_value (ctype->repertoire, seq->name,
 647                                            strlen (seq->name));
 648
 649             if (wch != ILLEGAL_CHAR_VALUE)
 650               /* We are only interested in the side-effects of the
 651                  `find_idx' call.  It will add appropriate entries in
 652                  the name array if this is necessary.  */
 653               (void) find_idx (ctype, NULL, NULL, NULL, wch);
 654
 655             /* "Increment" the bytes sequence.  */
 656             inner = nbytes - 1;
 657             while (inner >= 0 && bytes[inner] == 0xff)
 658               --inner;
 659
 660             if (inner < 0)
 661               {
 662                 /* We have to extend the byte sequence.  */
 663                 if (nbytes >= charmap->width_rules[cnt].to->nbytes)
 664                   break;
 665
 666                 bytes[0] = 1;
 667                 memset (&bytes[1], 0, nbytes);
 668                 ++nbytes;
 669               }
 670             else
 671               {
 672                 ++bytes[inner];
 673                 while (++inner < nbytes)
 674                   bytes[inner] = 0;
 675               }
 676           }
 677       }
 678
 679   /* Now set all the other characters of the character set to the
 680      default width.  */
 681   curs = NULL;
 682   while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
 683     {
 684       struct charseq *data = (struct charseq *) vdata;
 685
 686       if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
 687         data->ucs4 = repertoire_find_value (ctype->repertoire,
 688                                             data->name, len);
 689
 690       if (data->ucs4 != ILLEGAL_CHAR_VALUE)
 691         (void) find_idx (ctype, NULL, NULL, NULL, data->ucs4);
 692     }
 693
 694   /* There must be a multiple of 10 digits.  */
 695   if (ctype->mbdigits_act % 10 != 0)
 696     {
 697       assert (ctype->mbdigits_act == ctype->wcdigits_act);
 698       ctype->wcdigits_act -= ctype->mbdigits_act % 10;
 699       ctype->mbdigits_act -= ctype->mbdigits_act % 10;
 700       error (0, 0, _("`digit' category has not entries in groups of ten"));
 701     }
 702
 703   /* Check the input digits.  There must be a multiple of ten available.
 704      In each group it could be that one or the other character is missing.
 705      In this case the whole group must be removed.  */
 706   cnt = 0;
 707   while (cnt < ctype->mbdigits_act)
 708     {
 709       size_t inner;
 710       for (inner = 0; inner < 10; ++inner)
 711         if (ctype->mbdigits[cnt + inner] == NULL)
 712           break;
 713
 714       if (inner == 10)
 715         cnt += 10;
 716       else
 717         {
 718           /* Remove the group.  */
 719           memmove (&ctype->mbdigits[cnt], &ctype->mbdigits[cnt + 10],
 720                    ((ctype->wcdigits_act - cnt - 10)
 721                     * sizeof (ctype->mbdigits[0])));
 722           ctype->mbdigits_act -= 10;
 723         }
 724     }
 725
 726   /* If no input digits are given use the default.  */
 727   if (ctype->mbdigits_act == 0)
 728     {
 729       if (ctype->mbdigits_max == 0)
 730         {
 731           ctype->mbdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
 732                                            10 * sizeof (struct charseq *));
 733           ctype->mbdigits_max = 10;
 734         }
 735
 736       for (cnt = 0; cnt < 10; ++cnt)
 737         {
 738           ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
 739                                                       digits + cnt, 1);
 740           if (ctype->mbdigits[cnt] == NULL)
 741             {
 742               ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
 743                                                           longnames[cnt],
 744                                                           strlen (longnames[cnt]));
 745               if (ctype->mbdigits[cnt] == NULL)
 746                 {
 747                   /* Hum, this ain't good.  */
 748                   error (0, 0, _("\
 749 no input digits defined and none of the standard names in the charmap"));
 750
 751                   ctype->mbdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
 752                                                         sizeof (struct charseq) + 1);
 753
 754                   /* This is better than nothing.  */
 755                   ctype->mbdigits[cnt]->bytes[0] = digits[cnt];
 756                   ctype->mbdigits[cnt]->nbytes = 1;
 757                 }
 758             }
 759         }
 760
 761       ctype->mbdigits_act = 10;
 762     }
 763
 764   /* Check the wide character input digits.  There must be a multiple
 765      of ten available.  In each group it could be that one or the other
 766      character is missing.  In this case the whole group must be
 767      removed.  */
 768   cnt = 0;
 769   while (cnt < ctype->wcdigits_act)
 770     {
 771       size_t inner;
 772       for (inner = 0; inner < 10; ++inner)
 773         if (ctype->wcdigits[cnt + inner] == ILLEGAL_CHAR_VALUE)
 774           break;
 775
 776       if (inner == 10)
 777         cnt += 10;
 778       else
 779         {
 780           /* Remove the group.  */
 781           memmove (&ctype->wcdigits[cnt], &ctype->wcdigits[cnt + 10],
 782                    ((ctype->wcdigits_act - cnt - 10)
 783                     * sizeof (ctype->wcdigits[0])));
 784           ctype->wcdigits_act -= 10;
 785         }
 786     }
 787
 788   /* If no input digits are given use the default.  */
 789   if (ctype->wcdigits_act == 0)
 790     {
 791       if (ctype->wcdigits_max == 0)
 792         {
 793           ctype->wcdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
 794                                            10 * sizeof (uint32_t));
 795           ctype->wcdigits_max = 10;
 796         }
 797
 798       for (cnt = 0; cnt < 10; ++cnt)
 799         ctype->wcdigits[cnt] = L'0' + cnt;
 800
 801       ctype->mbdigits_act = 10;
 802     }
 803
 804   /* Check the outdigits.  */
 805   warned = 0;
 806   for (cnt = 0; cnt < 10; ++cnt)
 807     if (ctype->mboutdigits[cnt] == NULL)
 808       {
 809         static struct charseq replace[2];
 810
 811         if (!warned)
 812           {
 813             error (0, 0, _("\
 814 not all characters used in `outdigit' are available in the charmap"));
 815             warned = 1;
 816           }
 817
 818         replace[0].nbytes = 1;
 819         replace[0].bytes[0] = '?';
 820         replace[0].bytes[1] = '\0';
 821         ctype->mboutdigits[cnt] = &replace[0];
 822       }
 823
 824   warned = 0;
 825   for (cnt = 0; cnt < 10; ++cnt)
 826     if (ctype->wcoutdigits[cnt] == 0)
 827       {
 828         if (!warned)
 829           {
 830             error (0, 0, _("\
 831 not all characters used in `outdigit' are available in the repertoire"));
 832             warned = 1;
 833           }
 834
 835         ctype->wcoutdigits[cnt] = L'?';
 836       }
 837
 838   /* Sort the entries in the translit_ignore list.  */
 839   if (ctype->translit_ignore != NULL)
 840     {
 841       struct translit_ignore_t *firstp = ctype->translit_ignore;
 842       struct translit_ignore_t *runp;
 843
 844       ctype->ntranslit_ignore = 1;
 845
 846       for (runp = firstp->next; runp != NULL; runp = runp->next)
 847         {
 848           struct translit_ignore_t *lastp = NULL;
 849           struct translit_ignore_t *cmpp;
 850
 851           ++ctype->ntranslit_ignore;
 852
 853           for (cmpp = firstp; cmpp != NULL; lastp = cmpp, cmpp = cmpp->next)
 854             if (runp->from < cmpp->from)
 855               break;
 856
 857           runp->next = lastp;
 858           if (lastp == NULL)
 859             firstp = runp;
 860         }
 861
 862       ctype->translit_ignore = firstp;
 863     }
 864 }
 865
 866
 867 void
 868 ctype_output (struct localedef_t *locale, const struct charmap_t *charmap,
 869               const char *output_path)
 870 {
 871   static const char nulbytes[4] = { 0, 0, 0, 0 };
 872   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 873   const size_t nelems = (_NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1)
 874                          + ctype->nr_charclass + ctype->map_collection_nr);
 875   struct iovec iov[2 + nelems + 2 * ctype->nr_charclass
 876                   + ctype->map_collection_nr + 4];
 877   struct locale_file data;
 878   uint32_t idx[nelems + 1];
 879   uint32_t default_missing_len;
 880   size_t elem, cnt, offset, total;
 881   char *cp;
 882
 883   /* Now prepare the output: Find the sizes of the table we can use.  */
 884   allocate_arrays (ctype, charmap, ctype->repertoire);
 885
 886   data.magic = LIMAGIC (LC_CTYPE);
 887   data.n = nelems;
 888   iov[0].iov_base = (void *) &data;
 889   iov[0].iov_len = sizeof (data);
 890
 891   iov[1].iov_base = (void *) idx;
 892   iov[1].iov_len = nelems * sizeof (uint32_t);
 893
 894   idx[0] = iov[0].iov_len + iov[1].iov_len;
 895   offset = 0;
 896
 897   for (elem = 0; elem < nelems; ++elem)
 898     {
 899       if (elem < _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1))
 900         switch (elem)
 901           {
 902 #define CTYPE_EMPTY(name) \
 903           case name:                                                          \
 904             iov[2 + elem + offset].iov_base = NULL;                           \
 905             iov[2 + elem + offset].iov_len = 0;                               \
 906             idx[elem + 1] = idx[elem];                                        \
 907             break
 908
 909           CTYPE_EMPTY(_NL_CTYPE_GAP1);
 910           CTYPE_EMPTY(_NL_CTYPE_GAP2);
 911           CTYPE_EMPTY(_NL_CTYPE_GAP3);
 912           CTYPE_EMPTY(_NL_CTYPE_GAP4);
 913           CTYPE_EMPTY(_NL_CTYPE_GAP5);
 914           CTYPE_EMPTY(_NL_CTYPE_GAP6);
 915
 916 #define CTYPE_DATA(name, base, len)                                           \
 917           case _NL_ITEM_INDEX (name):                                         \
 918             iov[2 + elem + offset].iov_base = (base);                         \
 919             iov[2 + elem + offset].iov_len = (len);                           \
 920             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;       \
 921             break
 922
 923           CTYPE_DATA (_NL_CTYPE_CLASS,
 924                       ctype->ctype_b,
 925                       (256 + 128) * sizeof (char_class_t));
 926
 927           CTYPE_DATA (_NL_CTYPE_TOUPPER,
 928                       ctype->map_b[0],
 929                       (256 + 128) * sizeof (uint32_t));
 930           CTYPE_DATA (_NL_CTYPE_TOLOWER,
 931                       ctype->map_b[1],
 932                       (256 + 128) * sizeof (uint32_t));
 933
 934           CTYPE_DATA (_NL_CTYPE_TOUPPER32,
 935                       ctype->map32_b[0],
 936                       256 * sizeof (uint32_t));
 937           CTYPE_DATA (_NL_CTYPE_TOLOWER32,
 938                       ctype->map32_b[1],
 939                       256 * sizeof (uint32_t));
 940
 941           CTYPE_DATA (_NL_CTYPE_CLASS32,
 942                       ctype->ctype32_b,
 943                       256 * sizeof (char_class32_t));
 944
 945           CTYPE_DATA (_NL_CTYPE_CLASS_OFFSET,
 946                       &ctype->class_offset, sizeof (uint32_t));
 947
 948           CTYPE_DATA (_NL_CTYPE_MAP_OFFSET,
 949                       &ctype->map_offset, sizeof (uint32_t));
 950
 951           CTYPE_DATA (_NL_CTYPE_TRANSLIT_TAB_SIZE,
 952                       &ctype->translit_idx_size, sizeof (uint32_t));
 953
 954           CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_IDX,
 955                       ctype->translit_from_idx,
 956                       ctype->translit_idx_size * sizeof (uint32_t));
 957
 958           CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_TBL,
 959                       ctype->translit_from_tbl,
 960                       ctype->translit_from_tbl_size);
 961
 962           CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_IDX,
 963                       ctype->translit_to_idx,
 964                       ctype->translit_idx_size * sizeof (uint32_t));
 965
 966           CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_TBL,
 967                       ctype->translit_to_tbl, ctype->translit_to_tbl_size);
 968
 969           case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
 970             /* The class name array.  */
 971             total = 0;
 972             for (cnt = 0; cnt < ctype->nr_charclass; ++cnt, ++offset)
 973               {
 974                 iov[2 + elem + offset].iov_base
 975                   = (void *) ctype->classnames[cnt];
 976                 iov[2 + elem + offset].iov_len
 977                   = strlen (ctype->classnames[cnt]) + 1;
 978                 total += iov[2 + elem + offset].iov_len;
 979               }
 980             iov[2 + elem + offset].iov_base = (void *) nulbytes;
 981             iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
 982             total += 1 + (4 - ((total + 1) % 4));
 983
 984             idx[elem + 1] = idx[elem] + total;
 985             break;
 986
 987           case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
 988             /* The class name array.  */
 989             total = 0;
 990             for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt, ++offset)
 991               {
 992                 iov[2 + elem + offset].iov_base
 993                   = (void *) ctype->mapnames[cnt];
 994                 iov[2 + elem + offset].iov_len
 995                   = strlen (ctype->mapnames[cnt]) + 1;
 996                 total += iov[2 + elem + offset].iov_len;
 997               }
 998             iov[2 + elem + offset].iov_base = (void *) nulbytes;
 999             iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
1000             total += 1 + (4 - ((total + 1) % 4));
1001
1002             idx[elem + 1] = idx[elem] + total;
1003             break;
1004
1005           CTYPE_DATA (_NL_CTYPE_WIDTH,
1006                       ctype->width.iov_base,
1007                       ctype->width.iov_len);
1008
1009           CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX,
1010                       &ctype->mb_cur_max, sizeof (uint32_t));
1011
1012           case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
1013             total = strlen (ctype->codeset_name) + 1;
1014             if (total % 4 == 0)
1015               iov[2 + elem + offset].iov_base = (char *) ctype->codeset_name;
1016             else
1017               {
1018                 iov[2 + elem + offset].iov_base = alloca ((total + 3) & ~3);
1019                 memset (mempcpy (iov[2 + elem + offset].iov_base,
1020                                  ctype->codeset_name, total),
1021                         '\0', 4 - (total & 3));
1022                 total = (total + 3) & ~3;
1023               }
1024             iov[2 + elem + offset].iov_len = total;
1025             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1026             break;
1027
1028           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN):
1029             iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
1030             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1031             *(uint32_t *) iov[2 + elem + offset].iov_base =
1032               ctype->mbdigits_act / 10;
1033             idx[elem + 1] = idx[elem] + sizeof (uint32_t);
1034             break;
1035
1036           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN):
1037             /* Align entries.  */
1038             iov[2 + elem + offset].iov_base = (void *) nulbytes;
1039             iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1040             idx[elem] += iov[2 + elem + offset].iov_len;
1041             ++offset;
1042
1043             iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
1044             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1045             *(uint32_t *) iov[2 + elem + offset].iov_base =
1046               ctype->wcdigits_act / 10;
1047             idx[elem + 1] = idx[elem] + sizeof (uint32_t);
1048             break;
1049
1050           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB):
1051             /* Compute the length of all possible characters.  For INDIGITS
1052                there might be more than one.  We simply concatenate all of
1053                them with a NUL byte following.  The NUL byte wouldn't be
1054                necessary but it makes it easier for the user.  */
1055             total = 0;
1056
1057             for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
1058                  cnt < ctype->mbdigits_act; cnt += 10)
1059               total += ctype->mbdigits[cnt]->nbytes + 1;
1060             iov[2 + elem + offset].iov_base = (char *) alloca (total);
1061             iov[2 + elem + offset].iov_len = total;
1062
1063             cp = iov[2 + elem + offset].iov_base;
1064             for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
1065                  cnt < ctype->mbdigits_act; cnt += 10)
1066               {
1067                 cp = mempcpy (cp, ctype->mbdigits[cnt]->bytes,
1068                               ctype->mbdigits[cnt]->nbytes);
1069                 *cp++ = '\0';
1070               }
1071             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1072             break;
1073
1074           case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB):
1075             /* Compute the length of all possible characters.  For INDIGITS
1076                there might be more than one.  We simply concatenate all of
1077                them with a NUL byte following.  The NUL byte wouldn't be
1078                necessary but it makes it easier for the user.  */
1079             cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB);
1080             total = ctype->mboutdigits[cnt]->nbytes + 1;
1081             iov[2 + elem + offset].iov_base = (char *) alloca (total);
1082             iov[2 + elem + offset].iov_len = total;
1083
1084             *(char *) mempcpy (iov[2 + elem + offset].iov_base,
1085                                ctype->mboutdigits[cnt]->bytes,
1086                                ctype->mboutdigits[cnt]->nbytes) = '\0';
1087             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1088             break;
1089
1090           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC):
1091             total = ctype->wcdigits_act / 10;
1092
1093             iov[2 + elem + offset].iov_base =
1094               (uint32_t *) alloca (total * sizeof (uint32_t));
1095             iov[2 + elem + offset].iov_len = total * sizeof (uint32_t);
1096
1097             for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC);
1098                  cnt < ctype->wcdigits_act; cnt += 10)
1099               ((uint32_t *) iov[2 + elem + offset].iov_base)[cnt / 10]
1100                 = ctype->wcdigits[cnt];
1101             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1102             break;
1103
1104           case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC):
1105             /* Align entries.  */
1106             iov[2 + elem + offset].iov_base = (void *) nulbytes;
1107             iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1108             idx[elem] += iov[2 + elem + offset].iov_len;
1109             ++offset;
1110             /* FALLTRHOUGH */
1111
1112           case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT1_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC):
1113             cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC);
1114             iov[2 + elem + offset].iov_base = &ctype->wcoutdigits[cnt];
1115             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1116             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1117             break;
1118
1119           case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN):
1120             /* Align entries.  */
1121             iov[2 + elem + offset].iov_base = (void *) nulbytes;
1122             iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1123             idx[elem] += iov[2 + elem + offset].iov_len;
1124             ++offset;
1125
1126             default_missing_len = (ctype->default_missing
1127                                    ? wcslen ((wchar_t *)ctype->default_missing)
1128                                    : 0);
1129             iov[2 + elem + offset].iov_base = &default_missing_len;
1130             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1131             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1132             break;
1133
1134           case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING):
1135             iov[2 + elem + offset].iov_base =
1136               ctype->default_missing ?: (uint32_t *) L"";
1137             iov[2 + elem + offset].iov_len =
1138               wcslen (iov[2 + elem + offset].iov_base);
1139             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1140             break;
1141
1142           case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE_LEN):
1143             /* Align entries.  */
1144             iov[2 + elem + offset].iov_base = (void *) nulbytes;
1145             iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1146             idx[elem] += iov[2 + elem + offset].iov_len;
1147             ++offset;
1148
1149             iov[2 + elem + offset].iov_base = &ctype->ntranslit_ignore;
1150             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1151             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1152             break;
1153
1154           case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE):
1155             {
1156               uint32_t *ranges = (uint32_t *) alloca (ctype->ntranslit_ignore
1157                                                       * 3 * sizeof (uint32_t));
1158               struct translit_ignore_t *runp;
1159
1160               iov[2 + elem + offset].iov_base = ranges;
1161               iov[2 + elem + offset].iov_len = (ctype->ntranslit_ignore
1162                                                 * 3 * sizeof (uint32_t));
1163
1164               for (runp = ctype->translit_ignore; runp != NULL;
1165                    runp = runp->next)
1166                 {
1167                   *ranges++ = runp->from;
1168                   *ranges++ = runp->to;
1169                   *ranges++ = runp->step;
1170                 }
1171             }
1172             /* Remove the following line in case a new entry is added
1173                after _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN.  */
1174             if (elem < nelems)
1175               idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1176             break;
1177
1178           default:
1179             assert (! "unknown CTYPE element");
1180           }
1181       else
1182         {
1183           /* Handle extra maps.  */
1184           size_t nr = elem - _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
1185           if (nr < ctype->nr_charclass)
1186             {
1187               iov[2 + elem + offset].iov_base = ctype->class_b[nr];
1188               iov[2 + elem + offset].iov_len = 256 / 32 * sizeof (uint32_t);
1189               idx[elem] += iov[2 + elem + offset].iov_len;
1190               ++offset;
1191
1192               iov[2 + elem + offset] = ctype->class_3level[nr];
1193             }
1194           else
1195             {
1196               nr -= ctype->nr_charclass;
1197               assert (nr < ctype->map_collection_nr);
1198               iov[2 + elem + offset] = ctype->map_3level[nr];
1199             }
1200           idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1201         }
1202     }
1203
1204   assert (2 + elem + offset == (nelems + 2 * ctype->nr_charclass
1205                                 + ctype->map_collection_nr + 4 + 2));
1206
1207   write_locale_data (output_path, "LC_CTYPE", 2 + elem + offset, iov);
1208 }
1209
1210
1211 /* Local functions.  */
1212 static void
1213 ctype_class_new (struct linereader *lr, struct locale_ctype_t *ctype,
1214                  const char *name)
1215 {
1216   size_t cnt;
1217
1218   for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
1219     if (strcmp (ctype->classnames[cnt], name) == 0)
1220       break;
1221
1222   if (cnt < ctype->nr_charclass)
1223     {
1224       lr_error (lr, _("character class `%s' already defined"), name);
1225       return;
1226     }
1227
1228   if (ctype->nr_charclass == MAX_NR_CHARCLASS)
1229     /* Exit code 2 is prescribed in P1003.2b.  */
1230     error (2, 0, _("\
1231 implementation limit: no more than %Zd character classes allowed"),
1232            MAX_NR_CHARCLASS);
1233
1234   ctype->classnames[ctype->nr_charclass++] = name;
1235 }
1236
1237
1238 static void
1239 ctype_map_new (struct linereader *lr, struct locale_ctype_t *ctype,
1240                const char *name, const struct charmap_t *charmap)
1241 {
1242   size_t max_chars = 0;
1243   size_t cnt;
1244
1245   for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
1246     {
1247       if (strcmp (ctype->mapnames[cnt], name) == 0)
1248         break;
1249
1250       if (max_chars < ctype->map_collection_max[cnt])
1251         max_chars = ctype->map_collection_max[cnt];
1252     }
1253
1254   if (cnt < ctype->map_collection_nr)
1255     {
1256       lr_error (lr, _("character map `%s' already defined"), name);
1257       return;
1258     }
1259
1260   if (ctype->map_collection_nr == MAX_NR_CHARMAP)
1261     /* Exit code 2 is prescribed in P1003.2b.  */
1262     error (2, 0, _("\
1263 implementation limit: no more than %d character maps allowed"),
1264            MAX_NR_CHARMAP);
1265
1266   ctype->mapnames[cnt] = name;
1267
1268   if (max_chars == 0)
1269     ctype->map_collection_max[cnt] = charmap->mb_cur_max == 1 ? 256 : 512;
1270   else
1271     ctype->map_collection_max[cnt] = max_chars;
1272
1273   ctype->map_collection[cnt] = (uint32_t *)
1274     xcalloc (sizeof (uint32_t), ctype->map_collection_max[cnt]);
1275   ctype->map_collection_act[cnt] = 256;
1276
1277   ++ctype->map_collection_nr;
1278 }
1279
1280
1281 /* We have to be prepared that TABLE, MAX, and ACT can be NULL.  This
1282    is possible if we only want to extend the name array.  */
1283 static uint32_t *
1284 find_idx (struct locale_ctype_t *ctype, uint32_t **table, size_t *max,
1285           size_t *act, uint32_t idx)
1286 {
1287   size_t cnt;
1288
1289   if (idx < 256)
1290     return table == NULL ? NULL : &(*table)[idx];
1291
1292   /* Use the charnames_idx lookup table instead of the slow search loop.  */
1293 #if 1
1294   cnt = idx_table_get (&ctype->charnames_idx, idx);
1295   if (cnt == EMPTY)
1296     /* Not found.  */
1297     cnt = ctype->charnames_act;
1298 #else
1299   for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
1300     if (ctype->charnames[cnt] == idx)
1301       break;
1302 #endif
1303
1304   /* We have to distinguish two cases: the name is found or not.  */
1305   if (cnt == ctype->charnames_act)
1306     {
1307       /* Extend the name array.  */
1308       if (ctype->charnames_act == ctype->charnames_max)
1309         {
1310           ctype->charnames_max *= 2;
1311           ctype->charnames = (uint32_t *)
1312             xrealloc (ctype->charnames,
1313                       sizeof (uint32_t) * ctype->charnames_max);
1314         }
1315       ctype->charnames[ctype->charnames_act++] = idx;
1316       idx_table_add (&ctype->charnames_idx, idx, cnt);
1317     }
1318
1319   if (table == NULL)
1320     /* We have done everything we are asked to do.  */
1321     return NULL;
1322
1323   if (max == NULL)
1324     /* The caller does not want to extend the table.  */
1325     return (cnt >= *act ? NULL : &(*table)[cnt]);
1326
1327   if (cnt >= *act)
1328     {
1329       if (cnt >= *max)
1330         {
1331           size_t old_max = *max;
1332           do
1333             *max *= 2;
1334           while (*max <= cnt);
1335
1336           *table =
1337             (uint32_t *) xrealloc (*table, *max * sizeof (uint32_t));
1338           memset (&(*table)[old_max], '\0',
1339                   (*max - old_max) * sizeof (uint32_t));
1340         }
1341
1342       *act = cnt + 1;
1343     }
1344
1345   return &(*table)[cnt];
1346 }
1347
1348
1349 static int
1350 get_character (struct token *now, const struct charmap_t *charmap,
1351                struct repertoire_t *repertoire,
1352                struct charseq **seqp, uint32_t *wchp)
1353 {
1354   if (now->tok == tok_bsymbol)
1355     {
1356       /* This will hopefully be the normal case.  */
1357       *wchp = repertoire_find_value (repertoire, now->val.str.startmb,
1358                                      now->val.str.lenmb);
1359       *seqp = charmap_find_value (charmap, now->val.str.startmb,
1360                                   now->val.str.lenmb);
1361     }
1362   else if (now->tok == tok_ucs4)
1363     {
1364       char utmp[10];
1365
1366       snprintf (utmp, sizeof (utmp), "U%08X", now->val.ucs4);
1367       *seqp = charmap_find_value (charmap, utmp, 9);
1368
1369       if (*seqp == NULL)
1370         *seqp = repertoire_find_seq (repertoire, now->val.ucs4);
1371
1372       if (*seqp == NULL)
1373         {
1374           /* Compute the value in the charmap from the UCS value.  */
1375           const char *symbol = repertoire_find_symbol (repertoire,
1376                                                        now->val.ucs4);
1377
1378           if (symbol == NULL)
1379             *seqp = NULL;
1380           else
1381             *seqp = charmap_find_value (charmap, symbol, strlen (symbol));
1382
1383           if (*seqp == NULL)
1384             {
1385               if (repertoire != NULL)
1386                 {
1387                   /* Insert a negative entry.  */
1388                   static const struct charseq negative
1389                     = { .ucs4 = ILLEGAL_CHAR_VALUE };
1390                   uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1391                                                   sizeof (uint32_t));
1392                   *newp = now->val.ucs4;
1393
1394                   insert_entry (&repertoire->seq_table, newp,
1395                                 sizeof (uint32_t), (void *) &negative);
1396                 }
1397             }
1398           else
1399             (*seqp)->ucs4 = now->val.ucs4;
1400         }
1401       else if ((*seqp)->ucs4 != now->val.ucs4)
1402         *seqp = NULL;
1403
1404       *wchp = now->val.ucs4;
1405     }
1406   else if (now->tok == tok_charcode)
1407     {
1408       /* We must map from the byte code to UCS4.  */
1409       *seqp = charmap_find_symbol (charmap, now->val.str.startmb,
1410                                    now->val.str.lenmb);
1411
1412       if (*seqp == NULL)
1413         *wchp = ILLEGAL_CHAR_VALUE;
1414       else
1415         {
1416           if ((*seqp)->ucs4 == UNINITIALIZED_CHAR_VALUE)
1417             (*seqp)->ucs4 = repertoire_find_value (repertoire, (*seqp)->name,
1418                                                    strlen ((*seqp)->name));
1419           *wchp = (*seqp)->ucs4;
1420         }
1421     }
1422   else
1423     return 1;
1424
1425   return 0;
1426 }
1427
1428
1429 /* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and
1430    the .(2). counterparts.  */
1431 static void
1432 charclass_symbolic_ellipsis (struct linereader *ldfile,
1433                              struct locale_ctype_t *ctype,
1434                              const struct charmap_t *charmap,
1435                              struct repertoire_t *repertoire,
1436                              struct token *now,
1437                              const char *last_str,
1438                              unsigned long int class256_bit,
1439                              unsigned long int class_bit, int base,
1440                              int ignore_content, int handle_digits, int step)
1441 {
1442   const char *nowstr = now->val.str.startmb;
1443   char tmp[now->val.str.lenmb + 1];
1444   const char *cp;
1445   char *endp;
1446   unsigned long int from;
1447   unsigned long int to;
1448
1449   /* We have to compute the ellipsis values using the symbolic names.  */
1450   assert (last_str != NULL);
1451
1452   if (strlen (last_str) != now->val.str.lenmb)
1453     {
1454     invalid_range:
1455       lr_error (ldfile,
1456                 _("`%s' and `%.*s' are no valid names for symbolic range"),
1457                 last_str, (int) now->val.str.lenmb, nowstr);
1458       return;
1459     }
1460
1461   if (memcmp (last_str, nowstr, now->val.str.lenmb) == 0)
1462     /* Nothing to do, the names are the same.  */
1463     return;
1464
1465   for (cp = last_str; *cp == *(nowstr + (cp - last_str)); ++cp)
1466     ;
1467
1468   errno = 0;
1469   from = strtoul (cp, &endp, base);
1470   if ((from == UINT_MAX && errno == ERANGE) || *endp != '\0')
1471     goto invalid_range;
1472
1473   to = strtoul (nowstr + (cp - last_str), &endp, base);
1474   if ((to == UINT_MAX && errno == ERANGE)
1475       || (endp - nowstr) != now->val.str.lenmb || from >= to)
1476     goto invalid_range;
1477
1478   /* OK, we have a range FROM - TO.  Now we can create the symbolic names.  */
1479   if (!ignore_content)
1480     {
1481       now->val.str.startmb = tmp;
1482       while ((from += step) <= to)
1483         {
1484           struct charseq *seq;
1485           uint32_t wch;
1486
1487           sprintf (tmp, (base == 10 ? "%.*s%0*ld" : "%.*s%0*lX"),
1488                    (int) (cp - last_str), last_str,
1489                    (int) (now->val.str.lenmb - (cp - last_str)),
1490                    from);
1491
1492           get_character (now, charmap, repertoire, &seq, &wch);
1493
1494           if (seq != NULL && seq->nbytes == 1)
1495             /* Yep, we can store information about this byte sequence.  */
1496             ctype->class256_collection[seq->bytes[0]] |= class256_bit;
1497
1498           if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1499             /* We have the UCS4 position.  */
1500             *find_idx (ctype, &ctype->class_collection,
1501                        &ctype->class_collection_max,
1502                        &ctype->class_collection_act, wch) |= class_bit;
1503
1504           if (handle_digits == 1)
1505             {
1506               /* We must store the digit values.  */
1507               if (ctype->mbdigits_act == ctype->mbdigits_max)
1508                 {
1509                   ctype->mbdigits_max *= 2;
1510                   ctype->mbdigits = xrealloc (ctype->mbdigits,
1511                                               (ctype->mbdigits_max
1512                                                * sizeof (char *)));
1513                   ctype->wcdigits_max *= 2;
1514                   ctype->wcdigits = xrealloc (ctype->wcdigits,
1515                                               (ctype->wcdigits_max
1516                                                * sizeof (uint32_t)));
1517                 }
1518
1519               ctype->mbdigits[ctype->mbdigits_act++] = seq;
1520               ctype->wcdigits[ctype->wcdigits_act++] = wch;
1521             }
1522           else if (handle_digits == 2)
1523             {
1524               /* We must store the digit values.  */
1525               if (ctype->outdigits_act >= 10)
1526                 {
1527                   lr_error (ldfile, _("\
1528 %s: field `%s' does not contain exactly ten entries"),
1529                             "LC_CTYPE", "outdigit");
1530                   return;
1531                 }
1532
1533               ctype->mboutdigits[ctype->outdigits_act] = seq;
1534               ctype->wcoutdigits[ctype->outdigits_act] = wch;
1535               ++ctype->outdigits_act;
1536             }
1537         }
1538     }
1539 }
1540
1541
1542 /* Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'.  */
1543 static void
1544 charclass_ucs4_ellipsis (struct linereader *ldfile,
1545                          struct locale_ctype_t *ctype,
1546                          const struct charmap_t *charmap,
1547                          struct repertoire_t *repertoire,
1548                          struct token *now, uint32_t last_wch,
1549                          unsigned long int class256_bit,
1550                          unsigned long int class_bit, int ignore_content,
1551                          int handle_digits, int step)
1552 {
1553   if (last_wch > now->val.ucs4)
1554     {
1555       lr_error (ldfile, _("\
1556 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1557                 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, now->val.ucs4,
1558                 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, last_wch);
1559       return;
1560     }
1561
1562   if (!ignore_content)
1563     while ((last_wch += step) <= now->val.ucs4)
1564       {
1565         /* We have to find out whether there is a byte sequence corresponding
1566            to this UCS4 value.  */
1567         struct charseq *seq;
1568         char utmp[10];
1569
1570         snprintf (utmp, sizeof (utmp), "U%08X", last_wch);
1571         seq = charmap_find_value (charmap, utmp, 9);
1572         if (seq == NULL)
1573           {
1574             snprintf (utmp, sizeof (utmp), "U%04X", last_wch);
1575             seq = charmap_find_value (charmap, utmp, 5);
1576           }
1577
1578         if (seq == NULL)
1579           /* Try looking in the repertoire map.  */
1580           seq = repertoire_find_seq (repertoire, last_wch);
1581
1582         /* If this is the first time we look for this sequence create a new
1583            entry.  */
1584         if (seq == NULL)
1585           {
1586             static const struct charseq negative
1587               = { .ucs4 = ILLEGAL_CHAR_VALUE };
1588
1589             /* Find the symbolic name for this UCS4 value.  */
1590             if (repertoire != NULL)
1591               {
1592                 const char *symbol = repertoire_find_symbol (repertoire,
1593                                                              last_wch);
1594                 uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1595                                                 sizeof (uint32_t));
1596                 *newp = last_wch;
1597
1598                 if (symbol != NULL)
1599                   /* We have a name, now search the multibyte value.  */
1600                   seq = charmap_find_value (charmap, symbol, strlen (symbol));
1601
1602                 if (seq == NULL)
1603                   /* We have to create a fake entry.  */
1604                   seq = (struct charseq *) &negative;
1605                 else
1606                   seq->ucs4 = last_wch;
1607
1608                 insert_entry (&repertoire->seq_table, newp, sizeof (uint32_t),
1609                               seq);
1610               }
1611             else
1612               /* We have to create a fake entry.  */
1613               seq = (struct charseq *) &negative;
1614           }
1615
1616         /* We have a name, now search the multibyte value.  */
1617         if (seq->ucs4 == last_wch && seq->nbytes == 1)
1618           /* Yep, we can store information about this byte sequence.  */
1619           ctype->class256_collection[(size_t) seq->bytes[0]]
1620             |= class256_bit;
1621
1622         /* And of course we have the UCS4 position.  */
1623         if (class_bit != 0)
1624           *find_idx (ctype, &ctype->class_collection,
1625                      &ctype->class_collection_max,
1626                      &ctype->class_collection_act, last_wch) |= class_bit;
1627
1628         if (handle_digits == 1)
1629           {
1630             /* We must store the digit values.  */
1631             if (ctype->mbdigits_act == ctype->mbdigits_max)
1632               {
1633                 ctype->mbdigits_max *= 2;
1634                 ctype->mbdigits = xrealloc (ctype->mbdigits,
1635                                             (ctype->mbdigits_max
1636                                              * sizeof (char *)));
1637                 ctype->wcdigits_max *= 2;
1638                 ctype->wcdigits = xrealloc (ctype->wcdigits,
1639                                             (ctype->wcdigits_max
1640                                              * sizeof (uint32_t)));
1641               }
1642
1643             ctype->mbdigits[ctype->mbdigits_act++] = (seq->ucs4 == last_wch
1644                                                       ? seq : NULL);
1645             ctype->wcdigits[ctype->wcdigits_act++] = last_wch;
1646           }
1647         else if (handle_digits == 2)
1648           {
1649             /* We must store the digit values.  */
1650             if (ctype->outdigits_act >= 10)
1651               {
1652                 lr_error (ldfile, _("\
1653 %s: field `%s' does not contain exactly ten entries"),
1654                           "LC_CTYPE", "outdigit");
1655                 return;
1656               }
1657
1658             ctype->mboutdigits[ctype->outdigits_act] = (seq->ucs4 == last_wch
1659                                                         ? seq : NULL);
1660             ctype->wcoutdigits[ctype->outdigits_act] = last_wch;
1661             ++ctype->outdigits_act;
1662           }
1663       }
1664 }
1665
1666
1667 /* Ellipsis as in `/xea/x12.../xea/x34'.  */
1668 static void
1669 charclass_charcode_ellipsis (struct linereader *ldfile,
1670                              struct locale_ctype_t *ctype,
1671                              const struct charmap_t *charmap,
1672                              struct repertoire_t *repertoire,
1673                              struct token *now, char *last_charcode,
1674                              uint32_t last_charcode_len,
1675                              unsigned long int class256_bit,
1676                              unsigned long int class_bit, int ignore_content,
1677                              int handle_digits)
1678 {
1679   /* First check whether the to-value is larger.  */
1680   if (now->val.charcode.nbytes != last_charcode_len)
1681     {
1682       lr_error (ldfile, _("\
1683 start and end character sequence of range must have the same length"));
1684       return;
1685     }
1686
1687   if (memcmp (last_charcode, now->val.charcode.bytes, last_charcode_len) > 0)
1688     {
1689       lr_error (ldfile, _("\
1690 to-value character sequence is smaller than from-value sequence"));
1691       return;
1692     }
1693
1694   if (!ignore_content)
1695     {
1696       do
1697         {
1698           /* Increment the byte sequence value.  */
1699           struct charseq *seq;
1700           uint32_t wch;
1701           int i;
1702
1703           for (i = last_charcode_len - 1; i >= 0; --i)
1704             if (++last_charcode[i] != 0)
1705               break;
1706
1707           if (last_charcode_len == 1)
1708             /* Of course we have the charcode value.  */
1709             ctype->class256_collection[(size_t) last_charcode[0]]
1710               |= class256_bit;
1711
1712           /* Find the symbolic name.  */
1713           seq = charmap_find_symbol (charmap, last_charcode,
1714                                      last_charcode_len);
1715           if (seq != NULL)
1716             {
1717               if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1718                 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1719                                                    strlen (seq->name));
1720               wch = seq == NULL ? ILLEGAL_CHAR_VALUE : seq->ucs4;
1721
1722               if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1723                 *find_idx (ctype, &ctype->class_collection,
1724                            &ctype->class_collection_max,
1725                            &ctype->class_collection_act, wch) |= class_bit;
1726             }
1727           else
1728             wch = ILLEGAL_CHAR_VALUE;
1729
1730           if (handle_digits == 1)
1731             {
1732               /* We must store the digit values.  */
1733               if (ctype->mbdigits_act == ctype->mbdigits_max)
1734                 {
1735                   ctype->mbdigits_max *= 2;
1736                   ctype->mbdigits = xrealloc (ctype->mbdigits,
1737                                               (ctype->mbdigits_max
1738                                                * sizeof (char *)));
1739                   ctype->wcdigits_max *= 2;
1740                   ctype->wcdigits = xrealloc (ctype->wcdigits,
1741                                               (ctype->wcdigits_max
1742                                                * sizeof (uint32_t)));
1743                 }
1744
1745               seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1746               memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1747               seq->nbytes = last_charcode_len;
1748
1749               ctype->mbdigits[ctype->mbdigits_act++] = seq;
1750               ctype->wcdigits[ctype->wcdigits_act++] = wch;
1751             }
1752           else if (handle_digits == 2)
1753             {
1754               struct charseq *seq;
1755               /* We must store the digit values.  */
1756               if (ctype->outdigits_act >= 10)
1757                 {
1758                   lr_error (ldfile, _("\
1759 %s: field `%s' does not contain exactly ten entries"),
1760                             "LC_CTYPE", "outdigit");
1761                   return;
1762                 }
1763
1764               seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1765               memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1766               seq->nbytes = last_charcode_len;
1767
1768               ctype->mboutdigits[ctype->outdigits_act] = seq;
1769               ctype->wcoutdigits[ctype->outdigits_act] = wch;
1770               ++ctype->outdigits_act;
1771             }
1772         }
1773       while (memcmp (last_charcode, now->val.charcode.bytes,
1774                      last_charcode_len) != 0);
1775     }
1776 }
1777
1778
1779 static uint32_t *
1780 find_translit2 (struct locale_ctype_t *ctype, const struct charmap_t *charmap,
1781                 uint32_t wch)
1782 {
1783   struct translit_t *trunp = ctype->translit;
1784   struct translit_ignore_t *tirunp = ctype->translit_ignore;
1785
1786   while (trunp != NULL)
1787     {
1788       /* XXX We simplify things here.  The transliterations we look
1789          for are only allowed to have one character.  */
1790       if (trunp->from[0] == wch && trunp->from[1] == 0)
1791         {
1792           /* Found it.  Now look for a transliteration which can be
1793              represented with the character set.  */
1794           struct translit_to_t *torunp = trunp->to;
1795
1796           while (torunp != NULL)
1797             {
1798               int i;
1799
1800               for (i = 0; torunp->str[i] != 0; ++i)
1801                 {
1802                   char utmp[10];
1803
1804                   snprintf (utmp, sizeof (utmp), "U%08X", torunp->str[i]);
1805                   if (charmap_find_value (charmap, utmp, 9) == NULL)
1806                     /* This character cannot be represented.  */
1807                     break;
1808                 }
1809
1810               if (torunp->str[i] == 0)
1811                 return torunp->str;
1812
1813               torunp = torunp->next;
1814             }
1815
1816           break;
1817         }
1818
1819       trunp = trunp->next;
1820     }
1821
1822   /* Check for ignored chars.  */
1823   while (tirunp != NULL)
1824     {
1825       if (tirunp->from <= wch && tirunp->to >= wch)
1826         {
1827           uint32_t wi;
1828
1829           for (wi = tirunp->from; wi <= wch; wi += tirunp->step)
1830             if (wi == wch)
1831               return (uint32_t []) { 0 };
1832         }
1833     }
1834
1835   /* Nothing found.  */
1836   return NULL;
1837 }
1838
1839
1840 uint32_t *
1841 find_translit (struct localedef_t *locale, const struct charmap_t *charmap,
1842                uint32_t wch)
1843 {
1844   struct locale_ctype_t *ctype;
1845   uint32_t *result = NULL;
1846
1847   assert (locale != NULL);
1848   ctype = locale->categories[LC_CTYPE].ctype;
1849
1850   if (ctype->translit != NULL)
1851     result = find_translit2 (ctype, charmap, wch);
1852
1853   if (result == NULL)
1854     {
1855       struct translit_include_t *irunp = ctype->translit_include;
1856
1857       while (irunp != NULL && result == NULL)
1858         {
1859           result = find_translit (find_locale (CTYPE_LOCALE,
1860                                                irunp->copy_locale,
1861                                                irunp->copy_repertoire,
1862                                                charmap),
1863                                   charmap, wch);
1864           irunp = irunp->next;
1865         }
1866     }
1867
1868   return result;
1869 }
1870
1871
1872 /* Read one transliteration entry.  */
1873 static uint32_t *
1874 read_widestring (struct linereader *ldfile, struct token *now,
1875                  const struct charmap_t *charmap,
1876                  struct repertoire_t *repertoire)
1877 {
1878   uint32_t *wstr;
1879
1880   if (now->tok == tok_default_missing)
1881     /* The special name "" will denote this case.  */
1882     wstr = ((uint32_t *) { 0 });
1883   else if (now->tok == tok_bsymbol)
1884     {
1885       /* Get the value from the repertoire.  */
1886       wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1887       wstr[0] = repertoire_find_value (repertoire, now->val.str.startmb,
1888                                        now->val.str.lenmb);
1889       if (wstr[0] == ILLEGAL_CHAR_VALUE)
1890         {
1891           /* We cannot proceed, we don't know the UCS4 value.  */
1892           free (wstr);
1893           return NULL;
1894         }
1895
1896       wstr[1] = 0;
1897     }
1898   else if (now->tok == tok_ucs4)
1899     {
1900       wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1901       wstr[0] = now->val.ucs4;
1902       wstr[1] = 0;
1903     }
1904   else if (now->tok == tok_charcode)
1905     {
1906       /* Argh, we have to convert to the symbol name first and then to the
1907          UCS4 value.  */
1908       struct charseq *seq = charmap_find_symbol (charmap,
1909                                                  now->val.str.startmb,
1910                                                  now->val.str.lenmb);
1911       if (seq == NULL)
1912         /* Cannot find the UCS4 value.  */
1913         return NULL;
1914
1915       if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1916         seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1917                                            strlen (seq->name));
1918       if (seq->ucs4 == ILLEGAL_CHAR_VALUE)
1919         /* We cannot proceed, we don't know the UCS4 value.  */
1920         return NULL;
1921
1922       wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1923       wstr[0] = seq->ucs4;
1924       wstr[1] = 0;
1925     }
1926   else if (now->tok == tok_string)
1927     {
1928       wstr = now->val.str.startwc;
1929       if (wstr == NULL || wstr[0] == 0)
1930         return NULL;
1931     }
1932   else
1933     {
1934       if (now->tok != tok_eol && now->tok != tok_eof)
1935         lr_ignore_rest (ldfile, 0);
1936       SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
1937       return (uint32_t *) -1l;
1938     }
1939
1940   return wstr;
1941 }
1942
1943
1944 static void
1945 read_translit_entry (struct linereader *ldfile, struct locale_ctype_t *ctype,
1946                      struct token *now, const struct charmap_t *charmap,
1947                      struct repertoire_t *repertoire)
1948 {
1949   uint32_t *from_wstr = read_widestring (ldfile, now, charmap, repertoire);
1950   struct translit_t *result;
1951   struct translit_to_t **top;
1952   struct obstack *ob = &ctype->mempool;
1953   int first;
1954   int ignore;
1955
1956   if (from_wstr == NULL)
1957     /* There is no valid from string.  */
1958     return;
1959
1960   result = (struct translit_t *) obstack_alloc (ob,
1961                                                 sizeof (struct translit_t));
1962   result->from = from_wstr;
1963   result->fname = ldfile->fname;
1964   result->lineno = ldfile->lineno;
1965   result->next = NULL;
1966   result->to = NULL;
1967   top = &result->to;
1968   first = 1;
1969   ignore = 0;
1970
1971   while (1)
1972     {
1973       uint32_t *to_wstr;
1974
1975       /* Next we have one or more transliterations.  They are
1976          separated by semicolons.  */
1977       now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
1978
1979       if (!first && (now->tok == tok_semicolon || now->tok == tok_eol))
1980         {
1981           /* One string read.  */
1982           const uint32_t zero = 0;
1983
1984           if (!ignore)
1985             {
1986               obstack_grow (ob, &zero, 4);
1987               to_wstr = obstack_finish (ob);
1988
1989               *top = obstack_alloc (ob, sizeof (struct translit_to_t));
1990               (*top)->str = to_wstr;
1991               (*top)->next = NULL;
1992             }
1993
1994           if (now->tok == tok_eol)
1995             {
1996               result->next = ctype->translit;
1997               ctype->translit = result;
1998               return;
1999             }
2000
2001           if (!ignore)
2002             top = &(*top)->next;
2003           ignore = 0;
2004         }
2005       else
2006         {
2007           to_wstr = read_widestring (ldfile, now, charmap, repertoire);
2008           if (to_wstr == (uint32_t *) -1l)
2009             {
2010               /* An error occurred.  */
2011               obstack_free (ob, result);
2012               return;
2013             }
2014
2015           if (to_wstr == NULL)
2016             ignore = 1;
2017           else
2018             /* This value is usable.  */
2019             obstack_grow (ob, to_wstr, wcslen ((wchar_t *) to_wstr) * 4);
2020
2021           first = 0;
2022         }
2023     }
2024 }
2025
2026
2027 static void
2028 read_translit_ignore_entry (struct linereader *ldfile,
2029                             struct locale_ctype_t *ctype,
2030                             const struct charmap_t *charmap,
2031                             struct repertoire_t *repertoire)
2032 {
2033   /* We expect a semicolon-separated list of characters we ignore.  We are
2034      only interested in the wide character definitions.  These must be
2035      single characters, possibly defining a range when an ellipsis is used.  */
2036   while (1)
2037     {
2038       struct token *now = lr_token (ldfile, charmap, NULL, repertoire,
2039                                     verbose);
2040       struct translit_ignore_t *newp;
2041       uint32_t from;
2042
2043       if (now->tok == tok_eol || now->tok == tok_eof)
2044         {
2045           lr_error (ldfile,
2046                     _("premature end of `translit_ignore' definition"));
2047           return;
2048         }
2049
2050       if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
2051         {
2052           lr_error (ldfile, _("syntax error"));
2053           lr_ignore_rest (ldfile, 0);
2054           return;
2055         }
2056
2057       if (now->tok == tok_ucs4)
2058         from = now->val.ucs4;
2059       else
2060         /* Try to get the value.  */
2061         from = repertoire_find_value (repertoire, now->val.str.startmb,
2062                                       now->val.str.lenmb);
2063
2064       if (from == ILLEGAL_CHAR_VALUE)
2065         {
2066           lr_error (ldfile, "invalid character name");
2067           newp = NULL;
2068         }
2069       else
2070         {
2071           newp = (struct translit_ignore_t *)
2072             obstack_alloc (&ctype->mempool, sizeof (struct translit_ignore_t));
2073           newp->from = from;
2074           newp->to = from;
2075           newp->step = 1;
2076
2077           newp->next = ctype->translit_ignore;
2078           ctype->translit_ignore = newp;
2079         }
2080
2081       /* Now we expect either a semicolon, an ellipsis, or the end of the
2082          line.  */
2083       now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2084
2085       if (now->tok == tok_ellipsis2 || now->tok == tok_ellipsis2_2)
2086         {
2087           /* XXX Should we bother implementing `....'?  `...' certainly
2088              will not be implemented.  */
2089           uint32_t to;
2090           int step = now->tok == tok_ellipsis2_2 ? 2 : 1;
2091
2092           now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2093
2094           if (now->tok == tok_eol || now->tok == tok_eof)
2095             {
2096               lr_error (ldfile,
2097                         _("premature end of `translit_ignore' definition"));
2098               return;
2099             }
2100
2101           if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
2102             {
2103               lr_error (ldfile, _("syntax error"));
2104               lr_ignore_rest (ldfile, 0);
2105               return;
2106             }
2107
2108           if (now->tok == tok_ucs4)
2109             to = now->val.ucs4;
2110           else
2111             /* Try to get the value.  */
2112             to = repertoire_find_value (repertoire, now->val.str.startmb,
2113                                         now->val.str.lenmb);
2114
2115           if (to == ILLEGAL_CHAR_VALUE)
2116             lr_error (ldfile, "invalid character name");
2117           else
2118             {
2119               /* Make sure the `to'-value is larger.  */
2120               if (to >= from)
2121                 {
2122                   newp->to = to;
2123                   newp->step = step;
2124                 }
2125               else
2126                 lr_error (ldfile, _("\
2127 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
2128                           (to | from) < 65536 ? 4 : 8, to,
2129                           (to | from) < 65536 ? 4 : 8, from);
2130             }
2131
2132           /* And the next token.  */
2133           now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2134         }
2135
2136       if (now->tok == tok_eol || now->tok == tok_eof)
2137         /* We are done.  */
2138         return;
2139
2140       if (now->tok == tok_semicolon)
2141         /* Next round.  */
2142         continue;
2143
2144       /* If we come here something is wrong.  */
2145       lr_error (ldfile, _("syntax error"));
2146       lr_ignore_rest (ldfile, 0);
2147       return;
2148     }
2149 }
2150
2151
2152 /* The parser for the LC_CTYPE section of the locale definition.  */
2153 void
2154 ctype_read (struct linereader *ldfile, struct localedef_t *result,
2155             const struct charmap_t *charmap, const char *repertoire_name,
2156             int ignore_content)
2157 {
2158   struct repertoire_t *repertoire = NULL;
2159   struct locale_ctype_t *ctype;
2160   struct token *now;
2161   enum token_t nowtok;
2162   size_t cnt;
2163   struct charseq *last_seq;
2164   uint32_t last_wch = 0;
2165   enum token_t last_token;
2166   enum token_t ellipsis_token;
2167   int step;
2168   char last_charcode[16];
2169   size_t last_charcode_len = 0;
2170   const char *last_str = NULL;
2171   int mapidx;
2172   struct localedef_t *copy_locale = NULL;
2173
2174   /* Get the repertoire we have to use.  */
2175   if (repertoire_name != NULL)
2176     repertoire = repertoire_read (repertoire_name);
2177
2178   /* The rest of the line containing `LC_CTYPE' must be free.  */
2179   lr_ignore_rest (ldfile, 1);
2180
2181
2182   do
2183     {
2184       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2185       nowtok = now->tok;
2186     }
2187   while (nowtok == tok_eol);
2188
2189   /* If we see `copy' now we are almost done.  */
2190   if (nowtok == tok_copy)
2191     {
2192       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2193       if (now->tok != tok_string)
2194         {
2195           SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2196
2197         skip_category:
2198           do
2199             now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2200           while (now->tok != tok_eof && now->tok != tok_end);
2201
2202           if (now->tok != tok_eof
2203               || (now = lr_token (ldfile, charmap, NULL, NULL, verbose),
2204                   now->tok == tok_eof))
2205             lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
2206           else if (now->tok != tok_lc_ctype)
2207             {
2208               lr_error (ldfile, _("\
2209 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2210               lr_ignore_rest (ldfile, 0);
2211             }
2212           else
2213             lr_ignore_rest (ldfile, 1);
2214
2215           return;
2216         }
2217
2218       if (! ignore_content)
2219         {
2220           /* Get the locale definition.  */
2221           copy_locale = load_locale (LC_CTYPE, now->val.str.startmb,
2222                                      repertoire_name, charmap, NULL);
2223           if ((copy_locale->avail & CTYPE_LOCALE) == 0)
2224             {
2225               /* Not yet loaded.  So do it now.  */
2226               if (locfile_read (copy_locale, charmap) != 0)
2227                 goto skip_category;
2228             }
2229         }
2230
2231       lr_ignore_rest (ldfile, 1);
2232
2233       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2234       nowtok = now->tok;
2235     }
2236
2237   /* Prepare the data structures.  */
2238   ctype_startup (ldfile, result, charmap, copy_locale, ignore_content);
2239   ctype = result->categories[LC_CTYPE].ctype;
2240
2241   /* Remember the repertoire we use.  */
2242   if (!ignore_content)
2243     ctype->repertoire = repertoire;
2244
2245   while (1)
2246     {
2247       unsigned long int class_bit = 0;
2248       unsigned long int class256_bit = 0;
2249       int handle_digits = 0;
2250
2251       /* Of course we don't proceed beyond the end of file.  */
2252       if (nowtok == tok_eof)
2253         break;
2254
2255       /* Ingore empty lines.  */
2256       if (nowtok == tok_eol)
2257         {
2258           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2259           nowtok = now->tok;
2260           continue;
2261         }
2262
2263       switch (nowtok)
2264         {
2265         case tok_charclass:
2266           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2267           while (now->tok == tok_ident || now->tok == tok_string)
2268             {
2269               ctype_class_new (ldfile, ctype, now->val.str.startmb);
2270               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2271               if (now->tok != tok_semicolon)
2272                 break;
2273               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2274             }
2275           if (now->tok != tok_eol)
2276             SYNTAX_ERROR (_("\
2277 %s: syntax error in definition of new character class"), "LC_CTYPE");
2278           break;
2279
2280         case tok_charconv:
2281           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2282           while (now->tok == tok_ident || now->tok == tok_string)
2283             {
2284               ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2285               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2286               if (now->tok != tok_semicolon)
2287                 break;
2288               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2289             }
2290           if (now->tok != tok_eol)
2291             SYNTAX_ERROR (_("\
2292 %s: syntax error in definition of new character map"), "LC_CTYPE");
2293           break;
2294
2295         case tok_class:
2296           /* Ignore the rest of the line if we don't need the input of
2297              this line.  */
2298           if (ignore_content)
2299             {
2300               lr_ignore_rest (ldfile, 0);
2301               break;
2302             }
2303
2304           /* We simply forget the `class' keyword and use the following
2305              operand to determine the bit.  */
2306           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2307           if (now->tok == tok_ident || now->tok == tok_string)
2308             {
2309               /* Must can be one of the predefined class names.  */
2310               for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2311                 if (strcmp (ctype->classnames[cnt], now->val.str.startmb) == 0)
2312                   break;
2313               if (cnt >= ctype->nr_charclass)
2314                 {
2315 #ifdef PREDEFINED_CLASSES
2316                   if (now->val.str.lenmb == 8
2317                       && memcmp ("special1", now->val.str.startmb, 8) == 0)
2318                     class_bit = _ISwspecial1;
2319                   else if (now->val.str.lenmb == 8
2320                       && memcmp ("special2", now->val.str.startmb, 8) == 0)
2321                     class_bit = _ISwspecial2;
2322                   else if (now->val.str.lenmb == 8
2323                       && memcmp ("special3", now->val.str.startmb, 8) == 0)
2324                     class_bit = _ISwspecial3;
2325                   else
2326 #endif
2327                     {
2328                       /* OK, it's a new class.  */
2329                       ctype_class_new (ldfile, ctype, now->val.str.startmb);
2330
2331                       class_bit = _ISwbit (ctype->nr_charclass - 1);
2332                     }
2333                 }
2334               else
2335                 {
2336                   class_bit = _ISwbit (cnt);
2337
2338                   free (now->val.str.startmb);
2339                 }
2340             }
2341           else if (now->tok == tok_digit)
2342             goto handle_tok_digit;
2343           else if (now->tok < tok_upper || now->tok > tok_blank)
2344             goto err_label;
2345           else
2346             {
2347               class_bit = BITw (now->tok);
2348               class256_bit = BIT (now->tok);
2349             }
2350
2351           /* The next character must be a semicolon.  */
2352           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2353           if (now->tok != tok_semicolon)
2354             goto err_label;
2355           goto read_charclass;
2356
2357         case tok_upper:
2358         case tok_lower:
2359         case tok_alpha:
2360         case tok_alnum:
2361         case tok_space:
2362         case tok_cntrl:
2363         case tok_punct:
2364         case tok_graph:
2365         case tok_print:
2366         case tok_xdigit:
2367         case tok_blank:
2368           /* Ignore the rest of the line if we don't need the input of
2369              this line.  */
2370           if (ignore_content)
2371             {
2372               lr_ignore_rest (ldfile, 0);
2373               break;
2374             }
2375
2376           class_bit = BITw (now->tok);
2377           class256_bit = BIT (now->tok);
2378           handle_digits = 0;
2379         read_charclass:
2380           ctype->class_done |= class_bit;
2381           last_token = tok_none;
2382           ellipsis_token = tok_none;
2383           step = 1;
2384           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2385           while (now->tok != tok_eol && now->tok != tok_eof)
2386             {
2387               uint32_t wch;
2388               struct charseq *seq;
2389
2390               if (ellipsis_token == tok_none)
2391                 {
2392                   if (get_character (now, charmap, repertoire, &seq, &wch))
2393                     goto err_label;
2394
2395                   if (!ignore_content && seq != NULL && seq->nbytes == 1)
2396                     /* Yep, we can store information about this byte
2397                        sequence.  */
2398                     ctype->class256_collection[seq->bytes[0]] |= class256_bit;
2399
2400                   if (!ignore_content && wch != ILLEGAL_CHAR_VALUE
2401                       && class_bit != 0)
2402                     /* We have the UCS4 position.  */
2403                     *find_idx (ctype, &ctype->class_collection,
2404                                &ctype->class_collection_max,
2405                                &ctype->class_collection_act, wch) |= class_bit;
2406
2407                   last_token = now->tok;
2408                   /* Terminate the string.  */
2409                   if (last_token == tok_bsymbol)
2410                     {
2411                       now->val.str.startmb[now->val.str.lenmb] = '\0';
2412                       last_str = now->val.str.startmb;
2413                     }
2414                   else
2415                     last_str = NULL;
2416                   last_seq = seq;
2417                   last_wch = wch;
2418                   memcpy (last_charcode, now->val.charcode.bytes, 16);
2419                   last_charcode_len = now->val.charcode.nbytes;
2420
2421                   if (!ignore_content && handle_digits == 1)
2422                     {
2423                       /* We must store the digit values.  */
2424                       if (ctype->mbdigits_act == ctype->mbdigits_max)
2425                         {
2426                           ctype->mbdigits_max += 10;
2427                           ctype->mbdigits = xrealloc (ctype->mbdigits,
2428                                                       (ctype->mbdigits_max
2429                                                        * sizeof (char *)));
2430                           ctype->wcdigits_max += 10;
2431                           ctype->wcdigits = xrealloc (ctype->wcdigits,
2432                                                       (ctype->wcdigits_max
2433                                                        * sizeof (uint32_t)));
2434                         }
2435
2436                       ctype->mbdigits[ctype->mbdigits_act++] = seq;
2437                       ctype->wcdigits[ctype->wcdigits_act++] = wch;
2438                     }
2439                   else if (!ignore_content && handle_digits == 2)
2440                     {
2441                       /* We must store the digit values.  */
2442                       if (ctype->outdigits_act >= 10)
2443                         {
2444                           lr_error (ldfile, _("\
2445 %s: field `%s' does not contain exactly ten entries"),
2446                             "LC_CTYPE", "outdigit");
2447                           lr_ignore_rest (ldfile, 0);
2448                           break;
2449                         }
2450
2451                       ctype->mboutdigits[ctype->outdigits_act] = seq;
2452                       ctype->wcoutdigits[ctype->outdigits_act] = wch;
2453                       ++ctype->outdigits_act;
2454                     }
2455                 }
2456               else
2457                 {
2458                   /* Now it gets complicated.  We have to resolve the
2459                      ellipsis problem.  First we must distinguish between
2460                      the different kind of ellipsis and this must match the
2461                      tokens we have seen.  */
2462                   assert (last_token != tok_none);
2463
2464                   if (last_token != now->tok)
2465                     {
2466                       lr_error (ldfile, _("\
2467 ellipsis range must be marked by two operands of same type"));
2468                       lr_ignore_rest (ldfile, 0);
2469                       break;
2470                     }
2471
2472                   if (last_token == tok_bsymbol)
2473                     {
2474                       if (ellipsis_token == tok_ellipsis3)
2475                         lr_error (ldfile, _("with symbolic name range values \
2476 the absolute ellipsis `...' must not be used"));
2477
2478                       charclass_symbolic_ellipsis (ldfile, ctype, charmap,
2479                                                    repertoire, now, last_str,
2480                                                    class256_bit, class_bit,
2481                                                    (ellipsis_token
2482                                                     == tok_ellipsis4
2483                                                     ? 10 : 16),
2484                                                    ignore_content,
2485                                                    handle_digits, step);
2486                     }
2487                   else if (last_token == tok_ucs4)
2488                     {
2489                       if (ellipsis_token != tok_ellipsis2)
2490                         lr_error (ldfile, _("\
2491 with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
2492
2493                       charclass_ucs4_ellipsis (ldfile, ctype, charmap,
2494                                                repertoire, now, last_wch,
2495                                                class256_bit, class_bit,
2496                                                ignore_content, handle_digits,
2497                                                step);
2498                     }
2499                   else
2500                     {
2501                       assert (last_token == tok_charcode);
2502
2503                       if (ellipsis_token != tok_ellipsis3)
2504                         lr_error (ldfile, _("\
2505 with character code range values one must use the absolute ellipsis `...'"));
2506
2507                       charclass_charcode_ellipsis (ldfile, ctype, charmap,
2508                                                    repertoire, now,
2509                                                    last_charcode,
2510                                                    last_charcode_len,
2511                                                    class256_bit, class_bit,
2512                                                    ignore_content,
2513                                                    handle_digits);
2514                     }
2515
2516                   /* Now we have used the last value.  */
2517                   last_token = tok_none;
2518                 }
2519
2520               /* Next we expect a semicolon or the end of the line.  */
2521               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2522               if (now->tok == tok_eol || now->tok == tok_eof)
2523                 break;
2524
2525               if (last_token != tok_none
2526                   && now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4_2)
2527                 {
2528                   if (now->tok == tok_ellipsis2_2)
2529                     {
2530                       now->tok = tok_ellipsis2;
2531                       step = 2;
2532                     }
2533                   else if (now->tok == tok_ellipsis4_2)
2534                     {
2535                       now->tok = tok_ellipsis4;
2536                       step = 2;
2537                     }
2538
2539                   ellipsis_token = now->tok;
2540
2541                   now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2542                   continue;
2543                 }
2544
2545               if (now->tok != tok_semicolon)
2546                 goto err_label;
2547
2548               /* And get the next character.  */
2549               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2550
2551               ellipsis_token = tok_none;
2552               step = 1;
2553             }
2554           break;
2555
2556         case tok_digit:
2557           /* Ignore the rest of the line if we don't need the input of
2558              this line.  */
2559           if (ignore_content)
2560             {
2561               lr_ignore_rest (ldfile, 0);
2562               break;
2563             }
2564
2565         handle_tok_digit:
2566           class_bit = _ISwdigit;
2567           class256_bit = _ISdigit;
2568           handle_digits = 1;
2569           goto read_charclass;
2570
2571         case tok_outdigit:
2572           /* Ignore the rest of the line if we don't need the input of
2573              this line.  */
2574           if (ignore_content)
2575             {
2576               lr_ignore_rest (ldfile, 0);
2577               break;
2578             }
2579
2580           if (ctype->outdigits_act != 0)
2581             lr_error (ldfile, _("\
2582 %s: field `%s' declared more than once"),
2583                       "LC_CTYPE", "outdigit");
2584           class_bit = 0;
2585           class256_bit = 0;
2586           handle_digits = 2;
2587           goto read_charclass;
2588
2589         case tok_toupper:
2590           /* Ignore the rest of the line if we don't need the input of
2591              this line.  */
2592           if (ignore_content)
2593             {
2594               lr_ignore_rest (ldfile, 0);
2595               break;
2596             }
2597
2598           mapidx = 0;
2599           goto read_mapping;
2600
2601         case tok_tolower:
2602           /* Ignore the rest of the line if we don't need the input of
2603              this line.  */
2604           if (ignore_content)
2605             {
2606               lr_ignore_rest (ldfile, 0);
2607               break;
2608             }
2609
2610           mapidx = 1;
2611           goto read_mapping;
2612
2613         case tok_map:
2614           /* Ignore the rest of the line if we don't need the input of
2615              this line.  */
2616           if (ignore_content)
2617             {
2618               lr_ignore_rest (ldfile, 0);
2619               break;
2620             }
2621
2622           /* We simply forget the `map' keyword and use the following
2623              operand to determine the mapping.  */
2624           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2625           if (now->tok == tok_ident || now->tok == tok_string)
2626             {
2627               size_t cnt;
2628
2629               for (cnt = 2; cnt < ctype->map_collection_nr; ++cnt)
2630                 if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2631                   break;
2632
2633               if (cnt < ctype->map_collection_nr)
2634                 free (now->val.str.startmb);
2635               else
2636                 /* OK, it's a new map.  */
2637                 ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2638
2639               mapidx = cnt;
2640             }
2641           else if (now->tok < tok_toupper || now->tok > tok_tolower)
2642             goto err_label;
2643           else
2644             mapidx = now->tok - tok_toupper;
2645
2646           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2647           /* This better should be a semicolon.  */
2648           if (now->tok != tok_semicolon)
2649             goto err_label;
2650
2651         read_mapping:
2652           /* Test whether this mapping was already defined.  */
2653           if (ctype->tomap_done[mapidx])
2654             {
2655               lr_error (ldfile, _("duplicated definition for mapping `%s'"),
2656                         ctype->mapnames[mapidx]);
2657               lr_ignore_rest (ldfile, 0);
2658               break;
2659             }
2660           ctype->tomap_done[mapidx] = 1;
2661
2662           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2663           while (now->tok != tok_eol && now->tok != tok_eof)
2664             {
2665               struct charseq *from_seq;
2666               uint32_t from_wch;
2667               struct charseq *to_seq;
2668               uint32_t to_wch;
2669
2670               /* Every pair starts with an opening brace.  */
2671               if (now->tok != tok_open_brace)
2672                 goto err_label;
2673
2674               /* Next comes the from-value.  */
2675               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2676               if (get_character (now, charmap, repertoire, &from_seq,
2677                                  &from_wch) != 0)
2678                 goto err_label;
2679
2680               /* The next is a comma.  */
2681               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2682               if (now->tok != tok_comma)
2683                 goto err_label;
2684
2685               /* And the other value.  */
2686               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2687               if (get_character (now, charmap, repertoire, &to_seq,
2688                                  &to_wch) != 0)
2689                 goto err_label;
2690
2691               /* And the last thing is the closing brace.  */
2692               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2693               if (now->tok != tok_close_brace)
2694                 goto err_label;
2695
2696               if (!ignore_content)
2697                 {
2698                   if (mapidx < 2 && from_seq != NULL && to_seq != NULL
2699                       && from_seq->nbytes == 1 && to_seq->nbytes == 1)
2700                     /* We can use this value.  */
2701                     ctype->map256_collection[mapidx][from_seq->bytes[0]]
2702                       = to_seq->bytes[0];
2703
2704                   if (from_wch != ILLEGAL_CHAR_VALUE
2705                       && to_wch != ILLEGAL_CHAR_VALUE)
2706                     /* Both correct values.  */
2707                     *find_idx (ctype, &ctype->map_collection[mapidx],
2708                                &ctype->map_collection_max[mapidx],
2709                                &ctype->map_collection_act[mapidx],
2710                                from_wch) = to_wch;
2711                 }
2712
2713               /* Now comes a semicolon or the end of the line/file.  */
2714               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2715               if (now->tok == tok_semicolon)
2716                 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2717             }
2718           break;
2719
2720         case tok_translit_start:
2721           /* Ignore the entire translit section with its peculiar syntax
2722              if we don't need the input.  */
2723           if (ignore_content)
2724             {
2725               do
2726                 {
2727                   lr_ignore_rest (ldfile, 0);
2728                   now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2729                 }
2730               while (now->tok != tok_translit_end && now->tok != tok_eof);
2731
2732               if (now->tok == tok_eof)
2733                 lr_error (ldfile, _(\
2734 "%s: `translit_start' section does not end with `translit_end'"),
2735                           "LC_CTYPE");
2736
2737               break;
2738             }
2739
2740           /* The rest of the line better should be empty.  */
2741           lr_ignore_rest (ldfile, 1);
2742
2743           /* We count here the number of allocated entries in the `translit'
2744              array.  */
2745           cnt = 0;
2746
2747           ldfile->translate_strings = 1;
2748           ldfile->return_widestr = 1;
2749
2750           /* We proceed until we see the `translit_end' token.  */
2751           while (now = lr_token (ldfile, charmap, NULL, repertoire, verbose),
2752                  now->tok != tok_translit_end && now->tok != tok_eof)
2753             {
2754               if (now->tok == tok_eol)
2755                 /* Ignore empty lines.  */
2756                 continue;
2757
2758               if (now->tok == tok_include)
2759                 {
2760                   /* We have to include locale.  */
2761                   const char *locale_name;
2762                   const char *repertoire_name;
2763                   struct translit_include_t *include_stmt, **include_ptr;
2764
2765                   now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2766                   /* This should be a string or an identifier.  In any
2767                      case something to name a locale.  */
2768                   if (now->tok != tok_string && now->tok != tok_ident)
2769                     {
2770                     translit_syntax:
2771                       lr_error (ldfile, _("%s: syntax error"), "LC_CTYPE");
2772                       lr_ignore_rest (ldfile, 0);
2773                       continue;
2774                     }
2775                   locale_name = now->val.str.startmb;
2776
2777                   /* Next should be a semicolon.  */
2778                   now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2779                   if (now->tok != tok_semicolon)
2780                     goto translit_syntax;
2781
2782                   /* Now the repertoire name.  */
2783                   now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2784                   if ((now->tok != tok_string && now->tok != tok_ident)
2785                       || now->val.str.startmb == NULL)
2786                     goto translit_syntax;
2787                   repertoire_name = now->val.str.startmb;
2788
2789                   /* Save the include statement for later processing.  */
2790                   include_stmt = (struct translit_include_t *)
2791                     xmalloc (sizeof (struct translit_include_t));
2792                   include_stmt->copy_locale = locale_name;
2793                   include_stmt->copy_repertoire = repertoire_name;
2794                   include_stmt->next = NULL;
2795
2796                   include_ptr = &ctype->translit_include;
2797                   while (*include_ptr != NULL)
2798                     include_ptr = &(*include_ptr)->next;
2799                   *include_ptr = include_stmt;
2800
2801                   /* The rest of the line must be empty.  */
2802                   lr_ignore_rest (ldfile, 1);
2803
2804                   /* Make sure the locale is read.  */
2805                   add_to_readlist (LC_CTYPE, locale_name, repertoire_name,
2806                                    1, NULL);
2807                   continue;
2808                 }
2809               else if (now->tok == tok_default_missing)
2810                 {
2811                   uint32_t *wstr;
2812
2813                   while (1)
2814                     {
2815                       /* We expect a single character or string as the
2816                          argument.  */
2817                       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2818                       wstr = read_widestring (ldfile, now, charmap,
2819                                               repertoire);
2820
2821                       if (wstr != NULL)
2822                         {
2823                           if (ctype->default_missing != NULL)
2824                             {
2825                               lr_error (ldfile, _("\
2826 %s: duplicate `default_missing' definition"), "LC_CTYPE");
2827                               error_at_line (0, 0, ctype->default_missing_file,
2828                                              ctype->default_missing_lineno,
2829                                              _("\
2830 previous definition was here"));
2831                             }
2832                           else
2833                             {
2834                               ctype->default_missing = wstr;
2835                               ctype->default_missing_file = ldfile->fname;
2836                               ctype->default_missing_lineno = ldfile->lineno;
2837                             }
2838                           /* We can have more entries, ignore them.  */
2839                           lr_ignore_rest (ldfile, 0);
2840                           break;
2841                         }
2842                       else if (wstr == (uint32_t *) -1l)
2843                         /* This was an syntax error.  */
2844                         break;
2845
2846                       /* Maybe there is another replacement we can use.  */
2847                       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2848                       if (now->tok == tok_eol || now->tok == tok_eof)
2849                         {
2850                           /* Nothing found.  We tell the user.  */
2851                           lr_error (ldfile, _("\
2852 %s: no representable `default_missing' definition found"), "LC_CTYPE");
2853                           break;
2854                         }
2855                       if (now->tok != tok_semicolon)
2856                         goto translit_syntax;
2857                     }
2858
2859                   continue;
2860                 }
2861               else if (now->tok == tok_translit_ignore)
2862                 {
2863                   read_translit_ignore_entry (ldfile, ctype, charmap,
2864                                               repertoire);
2865                   continue;
2866                 }
2867
2868               read_translit_entry (ldfile, ctype, now, charmap, repertoire);
2869             }
2870           ldfile->return_widestr = 0;
2871
2872           if (now->tok == tok_eof)
2873             lr_error (ldfile, _(\
2874 "%s: `translit_start' section does not end with `translit_end'"),
2875                       "LC_CTYPE");
2876
2877           break;
2878
2879         case tok_ident:
2880           /* Ignore the rest of the line if we don't need the input of
2881              this line.  */
2882           if (ignore_content)
2883             {
2884               lr_ignore_rest (ldfile, 0);
2885               break;
2886             }
2887
2888           /* This could mean one of several things.  First test whether
2889              it's a character class name.  */
2890           for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2891             if (strcmp (now->val.str.startmb, ctype->classnames[cnt]) == 0)
2892               break;
2893           if (cnt < ctype->nr_charclass)
2894             {
2895               class_bit = _ISwbit (cnt);
2896               class256_bit = cnt <= 11 ? _ISbit (cnt) : 0;
2897               free (now->val.str.startmb);
2898               goto read_charclass;
2899             }
2900           for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
2901             if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2902               break;
2903           if (cnt < ctype->map_collection_nr)
2904             {
2905               mapidx = cnt;
2906               free (now->val.str.startmb);
2907               goto read_mapping;
2908             }
2909 #ifdef PREDEFINED_CLASSES
2910           if (strcmp (now->val.str.startmb, "special1") == 0)
2911             {
2912               class_bit = _ISwspecial1;
2913               free (now->val.str.startmb);
2914               goto read_charclass;
2915             }
2916           if (strcmp (now->val.str.startmb, "special2") == 0)
2917             {
2918               class_bit = _ISwspecial2;
2919               free (now->val.str.startmb);
2920               goto read_charclass;
2921             }
2922           if (strcmp (now->val.str.startmb, "special3") == 0)
2923             {
2924               class_bit = _ISwspecial3;
2925               free (now->val.str.startmb);
2926               goto read_charclass;
2927             }
2928           if (strcmp (now->val.str.startmb, "tosymmetric") == 0)
2929             {
2930               mapidx = 2;
2931               goto read_mapping;
2932             }
2933 #endif
2934           break;
2935
2936         case tok_end:
2937           /* Next we assume `LC_CTYPE'.  */
2938           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2939           if (now->tok == tok_eof)
2940             break;
2941           if (now->tok == tok_eol)
2942             lr_error (ldfile, _("%s: incomplete `END' line"),
2943                       "LC_CTYPE");
2944           else if (now->tok != tok_lc_ctype)
2945             lr_error (ldfile, _("\
2946 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2947           lr_ignore_rest (ldfile, now->tok == tok_lc_ctype);
2948           return;
2949
2950         default:
2951         err_label:
2952           if (now->tok != tok_eof)
2953             SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2954         }
2955
2956       /* Prepare for the next round.  */
2957       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2958       nowtok = now->tok;
2959     }
2960
2961   /* When we come here we reached the end of the file.  */
2962   lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
2963 }
2964
2965
2966 static void
2967 set_class_defaults (struct locale_ctype_t *ctype,
2968                     const struct charmap_t *charmap,
2969                     struct repertoire_t *repertoire)
2970 {
2971   size_t cnt;
2972
2973   /* These function defines the default values for the classes and conversions
2974      according to POSIX.2 2.5.2.1.
2975      It may seem that the order of these if-blocks is arbitrary but it is NOT.
2976      Don't move them unless you know what you do!  */
2977
2978   auto void set_default (int bitpos, int from, int to);
2979
2980   void set_default (int bitpos, int from, int to)
2981     {
2982       char tmp[2];
2983       int ch;
2984       int bit = _ISbit (bitpos);
2985       int bitw = _ISwbit (bitpos);
2986       /* Define string.  */
2987       strcpy (tmp, "?");
2988
2989       for (ch = from; ch <= to; ++ch)
2990         {
2991           struct charseq *seq;
2992           tmp[0] = ch;
2993
2994           seq = charmap_find_value (charmap, tmp, 1);
2995           if (seq == NULL)
2996             {
2997               char buf[10];
2998               sprintf (buf, "U%08X", ch);
2999               seq = charmap_find_value (charmap, buf, 9);
3000             }
3001           if (seq == NULL)
3002             {
3003               if (!be_quiet)
3004                 error (0, 0, _("\
3005 %s: character `%s' not defined in charmap while needed as default value"),
3006                        "LC_CTYPE", tmp);
3007             }
3008           else if (seq->nbytes != 1)
3009             error (0, 0, _("\
3010 %s: character `%s' in charmap not representable with one byte"),
3011                    "LC_CTYPE", tmp);
3012           else
3013             ctype->class256_collection[seq->bytes[0]] |= bit;
3014
3015           /* No need to search here, the ASCII value is also the Unicode
3016              value.  */
3017           ELEM (ctype, class_collection, , ch) |= bitw;
3018         }
3019     }
3020
3021   /* Set default values if keyword was not present.  */
3022   if ((ctype->class_done & BITw (tok_upper)) == 0)
3023     /* "If this keyword [lower] is not specified, the lowercase letters
3024         `A' through `Z', ..., shall automatically belong to this class,
3025         with implementation defined character values."  [P1003.2, 2.5.2.1]  */
3026     set_default (BITPOS (tok_upper), 'A', 'Z');
3027
3028   if ((ctype->class_done & BITw (tok_lower)) == 0)
3029     /* "If this keyword [lower] is not specified, the lowercase letters
3030         `a' through `z', ..., shall automatically belong to this class,
3031         with implementation defined character values."  [P1003.2, 2.5.2.1]  */
3032     set_default (BITPOS (tok_lower), 'a', 'z');
3033
3034   if ((ctype->class_done & BITw (tok_alpha)) == 0)
3035     {
3036       /* Table 2-6 in P1003.2 says that characters in class `upper' or
3037          class `lower' *must* be in class `alpha'.  */
3038       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
3039       unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower);
3040
3041       for (cnt = 0; cnt < 256; ++cnt)
3042         if ((ctype->class256_collection[cnt] & mask) != 0)
3043           ctype->class256_collection[cnt] |= BIT (tok_alpha);
3044
3045       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3046         if ((ctype->class_collection[cnt] & maskw) != 0)
3047           ctype->class_collection[cnt] |= BITw (tok_alpha);
3048     }
3049
3050   if ((ctype->class_done & BITw (tok_digit)) == 0)
3051     /* "If this keyword [digit] is not specified, the digits `0' through
3052         `9', ..., shall automatically belong to this class, with
3053         implementation-defined character values."  [P1003.2, 2.5.2.1]  */
3054     set_default (BITPOS (tok_digit), '0', '9');
3055
3056   /* "Only characters specified for the `alpha' and `digit' keyword
3057      shall be specified.  Characters specified for the keyword `alpha'
3058      and `digit' are automatically included in this class.  */
3059   {
3060     unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
3061     unsigned long int maskw = BITw (tok_alpha) | BITw (tok_digit);
3062
3063     for (cnt = 0; cnt < 256; ++cnt)
3064       if ((ctype->class256_collection[cnt] & mask) != 0)
3065         ctype->class256_collection[cnt] |= BIT (tok_alnum);
3066
3067     for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3068       if ((ctype->class_collection[cnt] & maskw) != 0)
3069         ctype->class_collection[cnt] |= BITw (tok_alnum);
3070   }
3071
3072   if ((ctype->class_done & BITw (tok_space)) == 0)
3073     /* "If this keyword [space] is not specified, the characters <space>,
3074         <form-feed>, <newline>, <carriage-return>, <tab>, and
3075         <vertical-tab>, ..., shall automatically belong to this class,
3076         with implementation-defined character values."  [P1003.2, 2.5.2.1]  */
3077     {
3078       struct charseq *seq;
3079
3080       seq = charmap_find_value (charmap, "space", 5);
3081       if (seq == NULL)
3082         seq = charmap_find_value (charmap, "SP", 2);
3083       if (seq == NULL)
3084         seq = charmap_find_value (charmap, "U00000020", 9);
3085       if (seq == NULL)
3086         {
3087           if (!be_quiet)
3088             error (0, 0, _("\
3089 %s: character `%s' not defined while needed as default value"),
3090                    "LC_CTYPE", "<space>");
3091         }
3092       else if (seq->nbytes != 1)
3093         error (0, 0, _("\
3094 %s: character `%s' in charmap not representable with one byte"),
3095                "LC_CTYPE", "<space>");
3096       else
3097         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3098
3099       /* No need to search.  */
3100       ELEM (ctype, class_collection, , L' ') |= BITw (tok_space);
3101
3102       seq = charmap_find_value (charmap, "form-feed", 9);
3103       if (seq == NULL)
3104         seq = charmap_find_value (charmap, "U0000000C", 9);
3105       if (seq == NULL)
3106         {
3107           if (!be_quiet)
3108             error (0, 0, _("\
3109 %s: character `%s' not defined while needed as default value"),
3110                    "LC_CTYPE", "<form-feed>");
3111         }
3112       else if (seq->nbytes != 1)
3113         error (0, 0, _("\
3114 %s: character `%s' in charmap not representable with one byte"),
3115                "LC_CTYPE", "<form-feed>");
3116       else
3117         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3118
3119       /* No need to search.  */
3120       ELEM (ctype, class_collection, , L'\f') |= BITw (tok_space);
3121
3122
3123       seq = charmap_find_value (charmap, "newline", 7);
3124       if (seq == NULL)
3125         seq = charmap_find_value (charmap, "U0000000A", 9);
3126       if (seq == NULL)
3127         {
3128           if (!be_quiet)
3129             error (0, 0, _("\
3130 character `%s' not defined while needed as default value"),
3131                    "<newline>");
3132         }
3133       else if (seq->nbytes != 1)
3134         error (0, 0, _("\
3135 %s: character `%s' in charmap not representable with one byte"),
3136                "LC_CTYPE", "<newline>");
3137       else
3138         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3139
3140       /* No need to search.  */
3141       ELEM (ctype, class_collection, , L'\n') |= BITw (tok_space);
3142
3143
3144       seq = charmap_find_value (charmap, "carriage-return", 15);
3145       if (seq == NULL)
3146         seq = charmap_find_value (charmap, "U0000000D", 9);
3147       if (seq == NULL)
3148         {
3149           if (!be_quiet)
3150             error (0, 0, _("\
3151 %s: character `%s' not defined while needed as default value"),
3152                    "LC_CTYPE", "<carriage-return>");
3153         }
3154       else if (seq->nbytes != 1)
3155         error (0, 0, _("\
3156 %s: character `%s' in charmap not representable with one byte"),
3157                "LC_CTYPE", "<carriage-return>");
3158       else
3159         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3160
3161       /* No need to search.  */
3162       ELEM (ctype, class_collection, , L'\r') |= BITw (tok_space);
3163
3164
3165       seq = charmap_find_value (charmap, "tab", 3);
3166       if (seq == NULL)
3167         seq = charmap_find_value (charmap, "U00000009", 9);
3168       if (seq == NULL)
3169         {
3170           if (!be_quiet)
3171             error (0, 0, _("\
3172 %s: character `%s' not defined while needed as default value"),
3173                    "LC_CTYPE", "<tab>");
3174         }
3175       else if (seq->nbytes != 1)
3176         error (0, 0, _("\
3177 %s: character `%s' in charmap not representable with one byte"),
3178                "LC_CTYPE", "<tab>");
3179       else
3180         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3181
3182       /* No need to search.  */
3183       ELEM (ctype, class_collection, , L'\t') |= BITw (tok_space);
3184
3185
3186       seq = charmap_find_value (charmap, "vertical-tab", 12);
3187       if (seq == NULL)
3188         seq = charmap_find_value (charmap, "U0000000B", 9);
3189       if (seq == NULL)
3190         {
3191           if (!be_quiet)
3192             error (0, 0, _("\
3193 %s: character `%s' not defined while needed as default value"),
3194                    "LC_CTYPE", "<vertical-tab>");
3195         }
3196       else if (seq->nbytes != 1)
3197         error (0, 0, _("\
3198 %s: character `%s' in charmap not representable with one byte"),
3199                "LC_CTYPE", "<vertical-tab>");
3200       else
3201         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3202
3203       /* No need to search.  */
3204       ELEM (ctype, class_collection, , L'\v') |= BITw (tok_space);
3205     }
3206
3207   if ((ctype->class_done & BITw (tok_xdigit)) == 0)
3208     /* "If this keyword is not specified, the digits `0' to `9', the
3209         uppercase letters `A' through `F', and the lowercase letters `a'
3210         through `f', ..., shell automatically belong to this class, with
3211         implementation defined character values."  [P1003.2, 2.5.2.1]  */
3212     {
3213       set_default (BITPOS (tok_xdigit), '0', '9');
3214       set_default (BITPOS (tok_xdigit), 'A', 'F');
3215       set_default (BITPOS (tok_xdigit), 'a', 'f');
3216     }
3217
3218   if ((ctype->class_done & BITw (tok_blank)) == 0)
3219     /* "If this keyword [blank] is unspecified, the characters <space> and
3220        <tab> shall belong to this character class."  [P1003.2, 2.5.2.1]  */
3221    {
3222       struct charseq *seq;
3223
3224       seq = charmap_find_value (charmap, "space", 5);
3225       if (seq == NULL)
3226         seq = charmap_find_value (charmap, "SP", 2);
3227       if (seq == NULL)
3228         seq = charmap_find_value (charmap, "U00000020", 9);
3229       if (seq == NULL)
3230         {
3231           if (!be_quiet)
3232             error (0, 0, _("\
3233 %s: character `%s' not defined while needed as default value"),
3234                    "LC_CTYPE", "<space>");
3235         }
3236       else if (seq->nbytes != 1)
3237         error (0, 0, _("\
3238 %s: character `%s' in charmap not representable with one byte"),
3239                "LC_CTYPE", "<space>");
3240       else
3241         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
3242
3243       /* No need to search.  */
3244       ELEM (ctype, class_collection, , L' ') |= BITw (tok_blank);
3245
3246
3247       seq = charmap_find_value (charmap, "tab", 3);
3248       if (seq == NULL)
3249         seq = charmap_find_value (charmap, "U00000009", 9);
3250       if (seq == NULL)
3251         {
3252           if (!be_quiet)
3253             error (0, 0, _("\
3254 %s: character `%s' not defined while needed as default value"),
3255                    "LC_CTYPE", "<tab>");
3256         }
3257       else if (seq->nbytes != 1)
3258         error (0, 0, _("\
3259 %s: character `%s' in charmap not representable with one byte"),
3260                "LC_CTYPE", "<tab>");
3261       else
3262         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
3263
3264       /* No need to search.  */
3265       ELEM (ctype, class_collection, , L'\t') |= BITw (tok_blank);
3266     }
3267
3268   if ((ctype->class_done & BITw (tok_graph)) == 0)
3269     /* "If this keyword [graph] is not specified, characters specified for
3270         the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
3271         shall belong to this character class."  [P1003.2, 2.5.2.1]  */
3272     {
3273       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
3274         BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
3275       unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
3276         BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
3277         BITw (tok_punct);
3278       size_t cnt;
3279
3280       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3281         if ((ctype->class_collection[cnt] & maskw) != 0)
3282           ctype->class_collection[cnt] |= BITw (tok_graph);
3283
3284       for (cnt = 0; cnt < 256; ++cnt)
3285         if ((ctype->class256_collection[cnt] & mask) != 0)
3286           ctype->class256_collection[cnt] |= BIT (tok_graph);
3287     }
3288
3289   if ((ctype->class_done & BITw (tok_print)) == 0)
3290     /* "If this keyword [print] is not provided, characters specified for
3291         the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
3292         and the <space> character shall belong to this character class."
3293         [P1003.2, 2.5.2.1]  */
3294     {
3295       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
3296         BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
3297       unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
3298         BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
3299         BITw (tok_punct);
3300       size_t cnt;
3301       struct charseq *seq;
3302
3303       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3304         if ((ctype->class_collection[cnt] & maskw) != 0)
3305           ctype->class_collection[cnt] |= BITw (tok_print);
3306
3307       for (cnt = 0; cnt < 256; ++cnt)
3308         if ((ctype->class256_collection[cnt] & mask) != 0)
3309           ctype->class256_collection[cnt] |= BIT (tok_print);
3310
3311
3312       seq = charmap_find_value (charmap, "space", 5);
3313       if (seq == NULL)
3314         seq = charmap_find_value (charmap, "SP", 2);
3315       if (seq == NULL)
3316         seq = charmap_find_value (charmap, "U00000020", 9);
3317       if (seq == NULL)
3318         {
3319           if (!be_quiet)
3320             error (0, 0, _("\
3321 %s: character `%s' not defined while needed as default value"),
3322                    "LC_CTYPE", "<space>");
3323         }
3324       else if (seq->nbytes != 1)
3325         error (0, 0, _("\
3326 %s: character `%s' in charmap not representable with one byte"),
3327                "LC_CTYPE", "<space>");
3328       else
3329         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_print);
3330
3331       /* No need to search.  */
3332       ELEM (ctype, class_collection, , L' ') |= BITw (tok_print);
3333     }
3334
3335   if (ctype->tomap_done[0] == 0)
3336     /* "If this keyword [toupper] is not specified, the lowercase letters
3337         `a' through `z', and their corresponding uppercase letters `A' to
3338         `Z', ..., shall automatically be included, with implementation-
3339         defined character values."  [P1003.2, 2.5.2.1]  */
3340     {
3341       char tmp[4];
3342       int ch;
3343
3344       strcpy (tmp, "<?>");
3345
3346       for (ch = 'a'; ch <= 'z'; ++ch)
3347         {
3348           struct charseq *seq_from, *seq_to;
3349
3350           tmp[1] = (char) ch;
3351
3352           seq_from = charmap_find_value (charmap, &tmp[1], 1);
3353           if (seq_from == NULL)
3354             {
3355               char buf[10];
3356               sprintf (buf, "U%08X", ch);
3357               seq_from = charmap_find_value (charmap, buf, 9);
3358             }
3359           if (seq_from == NULL)
3360             {
3361               if (!be_quiet)
3362                 error (0, 0, _("\
3363 %s: character `%s' not defined while needed as default value"),
3364                        "LC_CTYPE", tmp);
3365             }
3366           else if (seq_from->nbytes != 1)
3367             {
3368               if (!be_quiet)
3369                 error (0, 0, _("\
3370 %s: character `%s' needed as default value not representable with one byte"),
3371                        "LC_CTYPE", tmp);
3372             }
3373           else
3374             {
3375               /* This conversion is implementation defined.  */
3376               tmp[1] = (char) (ch + ('A' - 'a'));
3377               seq_to = charmap_find_value (charmap, &tmp[1], 1);
3378               if (seq_to == NULL)
3379                 {
3380                   char buf[10];
3381                   sprintf (buf, "U%08X", ch + ('A' - 'a'));
3382                   seq_to = charmap_find_value (charmap, buf, 9);
3383                 }
3384               if (seq_to == NULL)
3385                 {
3386                   if (!be_quiet)
3387                     error (0, 0, _("\
3388 %s: character `%s' not defined while needed as default value"),
3389                            "LC_CTYPE", tmp);
3390                 }
3391               else if (seq_to->nbytes != 1)
3392                 {
3393                   if (!be_quiet)
3394                     error (0, 0, _("\
3395 %s: character `%s' needed as default value not representable with one byte"),
3396                            "LC_CTYPE", tmp);
3397                 }
3398               else
3399                 /* The index [0] is determined by the order of the
3400                    `ctype_map_newP' calls in `ctype_startup'.  */
3401                 ctype->map256_collection[0][seq_from->bytes[0]]
3402                   = seq_to->bytes[0];
3403             }
3404
3405           /* No need to search.  */
3406           ELEM (ctype, map_collection, [0], ch) = ch + ('A' - 'a');
3407         }
3408     }
3409
3410   if (ctype->tomap_done[1] == 0)
3411     /* "If this keyword [tolower] is not specified, the mapping shall be
3412        the reverse mapping of the one specified to `toupper'."  [P1003.2]  */
3413     {
3414       for (cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
3415         if (ctype->map_collection[0][cnt] != 0)
3416           ELEM (ctype, map_collection, [1],
3417                 ctype->map_collection[0][cnt])
3418             = ctype->charnames[cnt];
3419
3420       for (cnt = 0; cnt < 256; ++cnt)
3421         if (ctype->map256_collection[0][cnt] != 0)
3422           ctype->map256_collection[1][ctype->map256_collection[0][cnt]] = cnt;
3423     }
3424
3425   if (ctype->outdigits_act != 10)
3426     {
3427       if (ctype->outdigits_act != 0)
3428         error (0,0, _("%s: field `%s' does not contain exactly ten entries"),
3429                "LC_CTYPE", "outdigit");
3430
3431       for (cnt = ctype->outdigits_act; cnt < 10; ++cnt)
3432         {
3433           ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3434                                                          digits + cnt, 1);
3435
3436           if (ctype->mboutdigits[cnt] == NULL)
3437             ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3438                                                            longnames[cnt],
3439                                                            strlen (longnames[cnt]));
3440
3441           if (ctype->mboutdigits[cnt] == NULL)
3442             ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3443                                                            uninames[cnt], 9);
3444
3445           if (ctype->mboutdigits[cnt] == NULL)
3446             {
3447               /* Provide a replacement.  */
3448               error (0, 0, _("\
3449 no output digits defined and none of the standard names in the charmap"));
3450
3451               ctype->mboutdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
3452                                                        sizeof (struct charseq)
3453                                                        + 1);
3454
3455               /* This is better than nothing.  */
3456               ctype->mboutdigits[cnt]->bytes[0] = digits[cnt];
3457               ctype->mboutdigits[cnt]->nbytes = 1;
3458             }
3459
3460           ctype->wcoutdigits[cnt] = L'0' + cnt;
3461         }
3462
3463       ctype->outdigits_act = 10;
3464     }
3465 }
3466
3467
3468 /* Construction of sparse 3-level tables.
3469    See wchar-lookup.h for their structure and the meaning of p and q.  */
3470
3471 struct wctype_table
3472 {
3473   /* Parameters.  */
3474   unsigned int p;
3475   unsigned int q;
3476   /* Working representation.  */
3477   size_t level1_alloc;
3478   size_t level1_size;
3479   uint32_t *level1;
3480   size_t level2_alloc;
3481   size_t level2_size;
3482   uint32_t *level2;
3483   size_t level3_alloc;
3484   size_t level3_size;
3485   uint32_t *level3;
3486   /* Compressed representation.  */
3487   size_t result_size;
3488   char *result;
3489 };
3490
3491 /* Initialize.  Assumes t->p and t->q have already been set.  */
3492 static inline void
3493 wctype_table_init (struct wctype_table *t)
3494 {
3495   t->level1 = NULL;
3496   t->level1_alloc = t->level1_size = 0;
3497   t->level2 = NULL;
3498   t->level2_alloc = t->level2_size = 0;
3499   t->level3 = NULL;
3500   t->level3_alloc = t->level3_size = 0;
3501 }
3502
3503 /* Retrieve an entry.  */
3504 static inline int
3505 wctype_table_get (struct wctype_table *t, uint32_t wc)
3506 {
3507   uint32_t index1 = wc >> (t->q + t->p + 5);
3508   if (index1 < t->level1_size)
3509     {
3510       uint32_t lookup1 = t->level1[index1];
3511       if (lookup1 != EMPTY)
3512         {
3513           uint32_t index2 = ((wc >> (t->p + 5)) & ((1 << t->q) - 1))
3514                             + (lookup1 << t->q);
3515           uint32_t lookup2 = t->level2[index2];
3516           if (lookup2 != EMPTY)
3517             {
3518               uint32_t index3 = ((wc >> 5) & ((1 << t->p) - 1))
3519                                 + (lookup2 << t->p);
3520               uint32_t lookup3 = t->level3[index3];
3521               uint32_t index4 = wc & 0x1f;
3522
3523               return (lookup3 >> index4) & 1;
3524             }
3525         }
3526     }
3527   return 0;
3528 }
3529
3530 /* Add one entry.  */
3531 static void
3532 wctype_table_add (struct wctype_table *t, uint32_t wc)
3533 {
3534   uint32_t index1 = wc >> (t->q + t->p + 5);
3535   uint32_t index2 = (wc >> (t->p + 5)) & ((1 << t->q) - 1);
3536   uint32_t index3 = (wc >> 5) & ((1 << t->p) - 1);
3537   uint32_t index4 = wc & 0x1f;
3538   size_t i, i1, i2;
3539
3540   if (index1 >= t->level1_size)
3541     {
3542       if (index1 >= t->level1_alloc)
3543         {
3544           size_t alloc = 2 * t->level1_alloc;
3545           if (alloc <= index1)
3546             alloc = index1 + 1;
3547           t->level1 = (uint32_t *) xrealloc ((char *) t->level1,
3548                                              alloc * sizeof (uint32_t));
3549           t->level1_alloc = alloc;
3550         }
3551       while (index1 >= t->level1_size)
3552         t->level1[t->level1_size++] = EMPTY;
3553     }
3554
3555   if (t->level1[index1] == EMPTY)
3556     {
3557       if (t->level2_size == t->level2_alloc)
3558         {
3559           size_t alloc = 2 * t->level2_alloc + 1;
3560           t->level2 = (uint32_t *) xrealloc ((char *) t->level2,
3561                                              (alloc << t->q) * sizeof (uint32_t));
3562           t->level2_alloc = alloc;
3563         }
3564       i1 = t->level2_size << t->q;
3565       i2 = (t->level2_size + 1) << t->q;
3566       for (i = i1; i < i2; i++)
3567         t->level2[i] = EMPTY;
3568       t->level1[index1] = t->level2_size++;
3569     }
3570
3571   index2 += t->level1[index1] << t->q;
3572
3573   if (t->level2[index2] == EMPTY)
3574     {
3575       if (t->level3_size == t->level3_alloc)
3576         {
3577           size_t alloc = 2 * t->level3_alloc + 1;
3578           t->level3 = (uint32_t *) xrealloc ((char *) t->level3,
3579                                              (alloc << t->p) * sizeof (uint32_t));
3580           t->level3_alloc = alloc;
3581         }
3582       i1 = t->level3_size << t->p;
3583       i2 = (t->level3_size + 1) << t->p;
3584       for (i = i1; i < i2; i++)
3585         t->level3[i] = 0;
3586       t->level2[index2] = t->level3_size++;
3587     }
3588
3589   index3 += t->level2[index2] << t->p;
3590
3591   t->level3[index3] |= (uint32_t)1 << index4;
3592 }
3593
3594 /* Finalize and shrink.  */
3595 static void
3596 wctype_table_finalize (struct wctype_table *t)
3597 {
3598   size_t i, j, k;
3599   uint32_t reorder3[t->level3_size];
3600   uint32_t reorder2[t->level2_size];
3601   uint32_t level1_offset, level2_offset, level3_offset;
3602
3603   /* Uniquify level3 blocks.  */
3604   k = 0;
3605   for (j = 0; j < t->level3_size; j++)
3606     {
3607       for (i = 0; i < k; i++)
3608         if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p],
3609                     (1 << t->p) * sizeof (uint32_t)) == 0)
3610           break;
3611       /* Relocate block j to block i.  */
3612       reorder3[j] = i;
3613       if (i == k)
3614         {
3615           if (i != j)
3616             memcpy (&t->level3[i << t->p], &t->level3[j << t->p],
3617                     (1 << t->p) * sizeof (uint32_t));
3618           k++;
3619         }
3620     }
3621   t->level3_size = k;
3622
3623   for (i = 0; i < (t->level2_size << t->q); i++)
3624     if (t->level2[i] != EMPTY)
3625       t->level2[i] = reorder3[t->level2[i]];
3626
3627   /* Uniquify level2 blocks.  */
3628   k = 0;
3629   for (j = 0; j < t->level2_size; j++)
3630     {
3631       for (i = 0; i < k; i++)
3632         if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q],
3633                     (1 << t->q) * sizeof (uint32_t)) == 0)
3634           break;
3635       /* Relocate block j to block i.  */
3636       reorder2[j] = i;
3637       if (i == k)
3638         {
3639           if (i != j)
3640             memcpy (&t->level2[i << t->q], &t->level2[j << t->q],
3641                     (1 << t->q) * sizeof (uint32_t));
3642           k++;
3643         }
3644     }
3645   t->level2_size = k;
3646
3647   for (i = 0; i < t->level1_size; i++)
3648     if (t->level1[i] != EMPTY)
3649       t->level1[i] = reorder2[t->level1[i]];
3650
3651   /* Create and fill the resulting compressed representation.  */
3652   t->result_size =
3653     5 * sizeof (uint32_t)
3654     + t->level1_size * sizeof (uint32_t)
3655     + (t->level2_size << t->q) * sizeof (uint32_t)
3656     + (t->level3_size << t->p) * sizeof (uint32_t);
3657   t->result = (char *) xmalloc (t->result_size);
3658
3659   level1_offset =
3660     5 * sizeof (uint32_t);
3661   level2_offset =
3662     5 * sizeof (uint32_t)
3663     + t->level1_size * sizeof (uint32_t);
3664   level3_offset =
3665     5 * sizeof (uint32_t)
3666     + t->level1_size * sizeof (uint32_t)
3667     + (t->level2_size << t->q) * sizeof (uint32_t);
3668
3669   ((uint32_t *) t->result)[0] = t->q + t->p + 5;
3670   ((uint32_t *) t->result)[1] = t->level1_size;
3671   ((uint32_t *) t->result)[2] = t->p + 5;
3672   ((uint32_t *) t->result)[3] = (1 << t->q) - 1;
3673   ((uint32_t *) t->result)[4] = (1 << t->p) - 1;
3674
3675   for (i = 0; i < t->level1_size; i++)
3676     ((uint32_t *) (t->result + level1_offset))[i] =
3677       (t->level1[i] == EMPTY
3678        ? 0
3679        : (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset);
3680
3681   for (i = 0; i < (t->level2_size << t->q); i++)
3682     ((uint32_t *) (t->result + level2_offset))[i] =
3683       (t->level2[i] == EMPTY
3684        ? 0
3685        : (t->level2[i] << t->p) * sizeof (uint32_t) + level3_offset);
3686
3687   for (i = 0; i < (t->level3_size << t->p); i++)
3688     ((uint32_t *) (t->result + level3_offset))[i] = t->level3[i];
3689
3690   if (t->level1_alloc > 0)
3691     free (t->level1);
3692   if (t->level2_alloc > 0)
3693     free (t->level2);
3694   if (t->level3_alloc > 0)
3695     free (t->level3);
3696 }
3697
3698 #define TABLE wcwidth_table
3699 #define ELEMENT uint8_t
3700 #define DEFAULT 0xff
3701 #include "3level.h"
3702
3703 #define TABLE wctrans_table
3704 #define ELEMENT int32_t
3705 #define DEFAULT 0
3706 #define wctrans_table_add wctrans_table_add_internal
3707 #include "3level.h"
3708 #undef wctrans_table_add
3709 /* The wctrans_table must actually store the difference between the
3710    desired result and the argument.  */
3711 static inline void
3712 wctrans_table_add (struct wctrans_table *t, uint32_t wc, uint32_t mapped_wc)
3713 {
3714   wctrans_table_add_internal (t, wc, mapped_wc - wc);
3715 }
3716
3717
3718 /* Flattens the included transliterations into a translit list.
3719    Inserts them in the list at `cursor', and returns the new cursor.  */
3720 static struct translit_t **
3721 translit_flatten (struct locale_ctype_t *ctype,
3722                   const struct charmap_t *charmap,
3723                   struct translit_t **cursor)
3724 {
3725   while (ctype->translit_include != NULL)
3726     {
3727       const char *copy_locale = ctype->translit_include->copy_locale;
3728       const char *copy_repertoire = ctype->translit_include->copy_repertoire;
3729       struct localedef_t *other;
3730
3731       /* Unchain the include statement.  During the depth-first traversal
3732          we don't want to visit any locale more than once.  */
3733       ctype->translit_include = ctype->translit_include->next;
3734
3735       other = find_locale (LC_CTYPE, copy_locale, copy_repertoire, charmap);
3736
3737       if (other == NULL)
3738         {
3739           error (0, 0, _("\
3740 %s: transliteration data from locale `%s' not available"),
3741                  "LC_CTYPE", copy_locale);
3742         }
3743       else
3744         {
3745           struct locale_ctype_t *other_ctype =
3746             other->categories[LC_CTYPE].ctype;
3747
3748           cursor = translit_flatten (other_ctype, charmap, cursor);
3749           assert (other_ctype->translit_include == NULL);
3750
3751           if (other_ctype->translit != NULL)
3752             {
3753               /* Insert the other_ctype->translit list at *cursor.  */
3754               struct translit_t *endp = other_ctype->translit;
3755               while (endp->next != NULL)
3756                 endp = endp->next;
3757
3758               endp->next = *cursor;
3759               *cursor = other_ctype->translit;
3760
3761               /* Avoid any risk of circular lists.  */
3762               other_ctype->translit = NULL;
3763
3764               cursor = &endp->next;
3765             }
3766
3767           if (ctype->default_missing == NULL)
3768             ctype->default_missing = other_ctype->default_missing;
3769         }
3770     }
3771
3772   return cursor;
3773 }
3774
3775 static void
3776 allocate_arrays (struct locale_ctype_t *ctype, const struct charmap_t *charmap,
3777                  struct repertoire_t *repertoire)
3778 {
3779   size_t idx, nr;
3780   const void *key;
3781   size_t len;
3782   void *vdata;
3783   void *curs;
3784
3785   /* You wonder about this amount of memory?  This is only because some
3786      users do not manage to address the array with unsigned values or
3787      data types with range >= 256.  '\200' would result in the array
3788      index -128.  To help these poor people we duplicate the entries for
3789      128 up to 255 below the entry for \0.  */
3790   ctype->ctype_b = (char_class_t *) xcalloc (256 + 128, sizeof (char_class_t));
3791   ctype->ctype32_b = (char_class32_t *) xcalloc (256, sizeof (char_class32_t));
3792   ctype->class_b = (uint32_t **)
3793     xmalloc (ctype->nr_charclass * sizeof (uint32_t *));
3794   ctype->class_3level = (struct iovec *)
3795     xmalloc (ctype->nr_charclass * sizeof (struct iovec));
3796
3797   /* This is the array accessed using the multibyte string elements.  */
3798   for (idx = 0; idx < 256; ++idx)
3799     ctype->ctype_b[128 + idx] = ctype->class256_collection[idx];
3800
3801   /* Mirror first 127 entries.  We must take care that entry -1 is not
3802      mirrored because EOF == -1.  */
3803   for (idx = 0; idx < 127; ++idx)
3804     ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
3805
3806   /* The 32 bit array contains all characters < 0x100.  */
3807   for (idx = 0; idx < ctype->class_collection_act; ++idx)
3808     if (ctype->charnames[idx] < 0x100)
3809       ctype->ctype32_b[ctype->charnames[idx]] = ctype->class_collection[idx];
3810
3811   for (nr = 0; nr < ctype->nr_charclass; nr++)
3812     {
3813       ctype->class_b[nr] = (uint32_t *) xcalloc (256 / 32, sizeof (uint32_t));
3814
3815       for (idx = 0; idx < 256; ++idx)
3816         if (ctype->class256_collection[idx] & _ISbit (nr))
3817           ctype->class_b[nr][idx >> 5] |= (uint32_t)1 << (idx & 0x1f);
3818     }
3819
3820   for (nr = 0; nr < ctype->nr_charclass; nr++)
3821     {
3822       struct wctype_table t;
3823
3824       t.p = 4; /* or: 5 */
3825       t.q = 7; /* or: 6 */
3826       wctype_table_init (&t);
3827
3828       for (idx = 0; idx < ctype->class_collection_act; ++idx)
3829         if (ctype->class_collection[idx] & _ISwbit (nr))
3830           wctype_table_add (&t, ctype->charnames[idx]);
3831
3832       wctype_table_finalize (&t);
3833
3834       if (verbose)
3835         fprintf (stderr, _("%s: table for class \"%s\": %lu bytes\n"),
3836                  "LC_CTYPE", ctype->classnames[nr],
3837                  (unsigned long int) t.result_size);
3838
3839       ctype->class_3level[nr].iov_base = t.result;
3840       ctype->class_3level[nr].iov_len = t.result_size;
3841     }
3842
3843   /* Room for table of mappings.  */
3844   ctype->map_b = (uint32_t **) xmalloc (2 * sizeof (uint32_t *));
3845   ctype->map32_b = (uint32_t **) xmalloc (ctype->map_collection_nr
3846                                           * sizeof (uint32_t *));
3847   ctype->map_3level = (struct iovec *)
3848     xmalloc (ctype->map_collection_nr * sizeof (struct iovec));
3849
3850   /* Fill in all mappings.  */
3851   for (idx = 0; idx < 2; ++idx)
3852     {
3853       unsigned int idx2;
3854
3855       /* Allocate table.  */
3856       ctype->map_b[idx] = (uint32_t *)
3857         xmalloc ((256 + 128) * sizeof (uint32_t));
3858
3859       /* Copy values from collection.  */
3860       for (idx2 = 0; idx2 < 256; ++idx2)
3861         ctype->map_b[idx][128 + idx2] = ctype->map256_collection[idx][idx2];
3862
3863       /* Mirror first 127 entries.  We must take care not to map entry
3864          -1 because EOF == -1.  */
3865       for (idx2 = 0; idx2 < 127; ++idx2)
3866         ctype->map_b[idx][idx2] = ctype->map_b[idx][256 + idx2];
3867
3868       /* EOF must map to EOF.  */
3869       ctype->map_b[idx][127] = EOF;
3870     }
3871
3872   for (idx = 0; idx < ctype->map_collection_nr; ++idx)
3873     {
3874       unsigned int idx2;
3875
3876       /* Allocate table.  */
3877       ctype->map32_b[idx] = (uint32_t *) xmalloc (256 * sizeof (uint32_t));
3878
3879       /* Copy values from collection.  Default is identity mapping.  */
3880       for (idx2 = 0; idx2 < 256; ++idx2)
3881         ctype->map32_b[idx][idx2] =
3882           (ctype->map_collection[idx][idx2] != 0
3883            ? ctype->map_collection[idx][idx2]
3884            : idx2);
3885     }
3886
3887   for (nr = 0; nr < ctype->map_collection_nr; nr++)
3888     {
3889       struct wctrans_table t;
3890
3891       t.p = 7;
3892       t.q = 9;
3893       wctrans_table_init (&t);
3894
3895       for (idx = 0; idx < ctype->map_collection_act[nr]; ++idx)
3896         if (ctype->map_collection[nr][idx] != 0)
3897           wctrans_table_add (&t, ctype->charnames[idx],
3898                              ctype->map_collection[nr][idx]);
3899
3900       wctrans_table_finalize (&t);
3901
3902       if (verbose)
3903         fprintf (stderr, _("%s: table for map \"%s\": %lu bytes\n"),
3904                  "LC_CTYPE", ctype->mapnames[nr],
3905                  (unsigned long int) t.result_size);
3906
3907       ctype->map_3level[nr].iov_base = t.result;
3908       ctype->map_3level[nr].iov_len = t.result_size;
3909     }
3910
3911   /* Extra array for class and map names.  */
3912   ctype->class_name_ptr = (uint32_t *) xmalloc (ctype->nr_charclass
3913                                                 * sizeof (uint32_t));
3914   ctype->map_name_ptr = (uint32_t *) xmalloc (ctype->map_collection_nr
3915                                               * sizeof (uint32_t));
3916
3917   ctype->class_offset = _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
3918   ctype->map_offset = ctype->class_offset + ctype->nr_charclass;
3919
3920   /* Array for width information.  Because the expected widths are very
3921      small (never larger than 2) we use only one single byte.  This
3922      saves space.
3923      We put only printable characters in the table.  wcwidth is specified
3924      to return -1 for non-printable characters.  Doing the check here
3925      saves a run-time check.
3926      But we put L'\0' in the table.  This again saves a run-time check.  */
3927   {
3928     struct wcwidth_table t;
3929
3930     t.p = 7;
3931     t.q = 9;
3932     wcwidth_table_init (&t);
3933
3934     /* First set all the printable characters of the character set to
3935        the default width.  */
3936     curs = NULL;
3937     while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
3938       {
3939         struct charseq *data = (struct charseq *) vdata;
3940
3941         if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
3942           data->ucs4 = repertoire_find_value (ctype->repertoire,
3943                                               data->name, len);
3944
3945         if (data->ucs4 != ILLEGAL_CHAR_VALUE)
3946           {
3947             uint32_t *class_bits =
3948               find_idx (ctype, &ctype->class_collection, NULL,
3949                         &ctype->class_collection_act, data->ucs4);
3950
3951             if (class_bits != NULL && (*class_bits & BITw (tok_print)))
3952               wcwidth_table_add (&t, data->ucs4, charmap->width_default);
3953           }
3954       }
3955
3956     /* Now add the explicitly specified widths.  */
3957     if (charmap->width_rules != NULL)
3958       {
3959         size_t cnt;
3960
3961         for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
3962           {
3963             unsigned char bytes[charmap->mb_cur_max];
3964             int nbytes = charmap->width_rules[cnt].from->nbytes;
3965
3966             /* We have the range of character for which the width is
3967                specified described using byte sequences of the multibyte
3968                charset.  We have to convert this to UCS4 now.  And we
3969                cannot simply convert the beginning and the end of the
3970                sequence, we have to iterate over the byte sequence and
3971                convert it for every single character.  */
3972             memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
3973
3974             while (nbytes < charmap->width_rules[cnt].to->nbytes
3975                    || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
3976                               nbytes) <= 0)
3977               {
3978                 /* Find the UCS value for `bytes'.  */
3979                 int inner;
3980                 uint32_t wch;
3981                 struct charseq *seq =
3982                   charmap_find_symbol (charmap, bytes, nbytes);
3983
3984                 if (seq == NULL)
3985                   wch = ILLEGAL_CHAR_VALUE;
3986                 else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
3987                   wch = seq->ucs4;
3988                 else
3989                   wch = repertoire_find_value (ctype->repertoire, seq->name,
3990                                                strlen (seq->name));
3991
3992                 if (wch != ILLEGAL_CHAR_VALUE)
3993                   {
3994                     /* Store the value.  */
3995                     uint32_t *class_bits =
3996                       find_idx (ctype, &ctype->class_collection, NULL,
3997                                 &ctype->class_collection_act, wch);
3998
3999                     if (class_bits != NULL && (*class_bits & BITw (tok_print)))
4000                       wcwidth_table_add (&t, wch,
4001                                          charmap->width_rules[cnt].width);
4002                   }
4003
4004                 /* "Increment" the bytes sequence.  */
4005                 inner = nbytes - 1;
4006                 while (inner >= 0 && bytes[inner] == 0xff)
4007                   --inner;
4008
4009                 if (inner < 0)
4010                   {
4011                     /* We have to extend the byte sequence.  */
4012                     if (nbytes >= charmap->width_rules[cnt].to->nbytes)
4013                       break;
4014
4015                     bytes[0] = 1;
4016                     memset (&bytes[1], 0, nbytes);
4017                     ++nbytes;
4018                   }
4019                 else
4020                   {
4021                     ++bytes[inner];
4022                     while (++inner < nbytes)
4023                       bytes[inner] = 0;
4024                   }
4025               }
4026           }
4027       }
4028
4029     /* Set the width of L'\0' to 0.  */
4030     wcwidth_table_add (&t, 0, 0);
4031
4032     wcwidth_table_finalize (&t);
4033
4034     if (verbose)
4035       fprintf (stderr, _("%s: table for width: %lu bytes\n"),
4036                "LC_CTYPE", (unsigned long int) t.result_size);
4037
4038     ctype->width.iov_base = t.result;
4039     ctype->width.iov_len = t.result_size;
4040   }
4041
4042   /* Set MB_CUR_MAX.  */
4043   ctype->mb_cur_max = charmap->mb_cur_max;
4044
4045   /* Now determine the table for the transliteration information.
4046
4047      XXX It is not yet clear to me whether it is worth implementing a
4048      complicated algorithm which uses a hash table to locate the entries.
4049      For now I'll use a simple array which can be searching using binary
4050      search.  */
4051   if (ctype->translit_include != NULL)
4052     /* Traverse the locales mentioned in the `include' statements in a
4053        depth-first way and fold in their transliteration information.  */
4054     translit_flatten (ctype, charmap, &ctype->translit);
4055
4056   if (ctype->translit != NULL)
4057     {
4058       /* First count how many entries we have.  This is the upper limit
4059          since some entries from the included files might be overwritten.  */
4060       size_t number = 0;
4061       size_t cnt;
4062       struct translit_t *runp = ctype->translit;
4063       struct translit_t **sorted;
4064       size_t from_len, to_len;
4065
4066       while (runp != NULL)
4067         {
4068           ++number;
4069           runp = runp->next;
4070         }
4071
4072       /* Next we allocate an array large enough and fill in the values.  */
4073       sorted = (struct translit_t **) alloca (number
4074                                               * sizeof (struct translit_t **));
4075       runp = ctype->translit;
4076       number = 0;
4077       do
4078         {
4079           /* Search for the place where to insert this string.
4080              XXX Better use a real sorting algorithm later.  */
4081           size_t idx = 0;
4082           int replace = 0;
4083
4084           while (idx < number)
4085             {
4086               int res = wcscmp ((const wchar_t *) sorted[idx]->from,
4087                                 (const wchar_t *) runp->from);
4088               if (res == 0)
4089                 {
4090                   replace = 1;
4091                   break;
4092                 }
4093               if (res > 0)
4094                 break;
4095               ++idx;
4096             }
4097
4098           if (replace)
4099             sorted[idx] = runp;
4100           else
4101             {
4102               memmove (&sorted[idx + 1], &sorted[idx],
4103                        (number - idx) * sizeof (struct translit_t *));
4104               sorted[idx] = runp;
4105               ++number;
4106             }
4107
4108           runp = runp->next;
4109         }
4110       while (runp != NULL);
4111
4112       /* The next step is putting all the possible transliteration
4113          strings in one memory block so that we can write it out.
4114          We need several different blocks:
4115          - index to the from-string array
4116          - from-string array
4117          - index to the to-string array
4118          - to-string array.
4119       */
4120       from_len = to_len = 0;
4121       for (cnt = 0; cnt < number; ++cnt)
4122         {
4123           struct translit_to_t *srunp;
4124           from_len += wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
4125           srunp = sorted[cnt]->to;
4126           while (srunp != NULL)
4127             {
4128               to_len += wcslen ((const wchar_t *) srunp->str) + 1;
4129               srunp = srunp->next;
4130             }
4131           /* Plus one for the extra NUL character marking the end of
4132              the list for the current entry.  */
4133           ++to_len;
4134         }
4135
4136       /* We can allocate the arrays for the results.  */
4137       ctype->translit_from_idx = xmalloc (number * sizeof (uint32_t));
4138       ctype->translit_from_tbl = xmalloc (from_len * sizeof (uint32_t));
4139       ctype->translit_to_idx = xmalloc (number * sizeof (uint32_t));
4140       ctype->translit_to_tbl = xmalloc (to_len * sizeof (uint32_t));
4141
4142       from_len = 0;
4143       to_len = 0;
4144       for (cnt = 0; cnt < number; ++cnt)
4145         {
4146           size_t len;
4147           struct translit_to_t *srunp;
4148
4149           ctype->translit_from_idx[cnt] = from_len;
4150           ctype->translit_to_idx[cnt] = to_len;
4151
4152           len = wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
4153           wmemcpy ((wchar_t *) &ctype->translit_from_tbl[from_len],
4154                    (const wchar_t *) sorted[cnt]->from, len);
4155           from_len += len;
4156
4157           ctype->translit_to_idx[cnt] = to_len;
4158           srunp = sorted[cnt]->to;
4159           while (srunp != NULL)
4160             {
4161               len = wcslen ((const wchar_t *) srunp->str) + 1;
4162               wmemcpy ((wchar_t *) &ctype->translit_to_tbl[to_len],
4163                        (const wchar_t *) srunp->str, len);
4164               to_len += len;
4165               srunp = srunp->next;
4166             }
4167           ctype->translit_to_tbl[to_len++] = L'\0';
4168         }
4169
4170       /* Store the information about the length.  */
4171       ctype->translit_idx_size = number;
4172       ctype->translit_from_tbl_size = from_len * sizeof (uint32_t);
4173       ctype->translit_to_tbl_size = to_len * sizeof (uint32_t);
4174     }
4175   else
4176     {
4177       /* Provide some dummy pointers since we have nothing to write out.  */
4178       static uint32_t no_str = { 0 };
4179
4180       ctype->translit_from_idx = &no_str;
4181       ctype->translit_from_tbl = &no_str;
4182       ctype->translit_to_tbl = &no_str;
4183       ctype->translit_idx_size = 0;
4184       ctype->translit_from_tbl_size = 0;
4185       ctype->translit_to_tbl_size = 0;
4186     }
4187 }