locale/programs/ld-ctype.c

   1 /* Copyright (C) 1995-1999, 2000, 2001, 2002 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3    Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Lesser General Public
   7    License as published by the Free Software Foundation; either
   8    version 2.1 of the License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Lesser General Public License for more details.
  14
  15    You should have received a copy of the GNU Lesser General Public
  16    License along with the GNU C Library; if not, write to the Free
  17    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  18    02111-1307 USA.  */
  19
  20 #ifdef HAVE_CONFIG_H
  21 # include <config.h>
  22 #endif
  23
  24 #include <alloca.h>
  25 #include <byteswap.h>
  26 #include <endian.h>
  27 #include <errno.h>
  28 #include <limits.h>
  29 #include <obstack.h>
  30 #include <stdlib.h>
  31 #include <string.h>
  32 #include <wchar.h>
  33 #include <wctype.h>
  34 #include <sys/uio.h>
  35
  36 #include "localedef.h"
  37 #include "charmap.h"
  38 #include "localeinfo.h"
  39 #include "langinfo.h"
  40 #include "linereader.h"
  41 #include "locfile-token.h"
  42 #include "locfile.h"
  43
  44 #include <assert.h>
  45
  46
  47 #ifdef PREDEFINED_CLASSES
  48 /* These are the extra bits not in wctype.h since these are not preallocated
  49    classes.  */
  50 # define _ISwspecial1   (1 << 29)
  51 # define _ISwspecial2   (1 << 30)
  52 # define _ISwspecial3   (1 << 31)
  53 #endif
  54
  55
  56 /* The bit used for representing a special class.  */
  57 #define BITPOS(class) ((class) - tok_upper)
  58 #define BIT(class) (_ISbit (BITPOS (class)))
  59 #define BITw(class) (_ISwbit (BITPOS (class)))
  60
  61 #define ELEM(ctype, collection, idx, value)                                   \
  62   *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx,     \
  63              &ctype->collection##_act idx, value)
  64
  65
  66 /* To be compatible with former implementations we for now restrict
  67    the number of bits for character classes to 16.  When compatibility
  68    is not necessary anymore increase the number to 32.  */
  69 #define char_class_t uint16_t
  70 #define char_class32_t uint32_t
  71
  72
  73 /* Type to describe a transliteration action.  We have a possibly
  74    multiple character from-string and a set of multiple character
  75    to-strings.  All are 32bit values since this is what is used in
  76    the gconv functions.  */
  77 struct translit_to_t
  78 {
  79   uint32_t *str;
  80
  81   struct translit_to_t *next;
  82 };
  83
  84 struct translit_t
  85 {
  86   uint32_t *from;
  87
  88   const char *fname;
  89   size_t lineno;
  90
  91   struct translit_to_t *to;
  92
  93   struct translit_t *next;
  94 };
  95
  96 struct translit_ignore_t
  97 {
  98   uint32_t from;
  99   uint32_t to;
 100   uint32_t step;
 101
 102   const char *fname;
 103   size_t lineno;
 104
 105   struct translit_ignore_t *next;
 106 };
 107
 108
 109 /* Type to describe a transliteration include statement.  */
 110 struct translit_include_t
 111 {
 112   const char *copy_locale;
 113   const char *copy_repertoire;
 114
 115   struct translit_include_t *next;
 116 };
 117
 118
 119 /* Sparse table of uint32_t.  */
 120 #define TABLE idx_table
 121 #define ELEMENT uint32_t
 122 #define DEFAULT ((uint32_t) ~0)
 123 #define NO_FINALIZE
 124 #include "3level.h"
 125
 126
 127 /* The real definition of the struct for the LC_CTYPE locale.  */
 128 struct locale_ctype_t
 129 {
 130   uint32_t *charnames;
 131   size_t charnames_max;
 132   size_t charnames_act;
 133   /* An index lookup table, to speedup find_idx.  */
 134   struct idx_table charnames_idx;
 135
 136   struct repertoire_t *repertoire;
 137
 138   /* We will allow up to 8 * sizeof (uint32_t) character classes.  */
 139 #define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
 140   size_t nr_charclass;
 141   const char *classnames[MAX_NR_CHARCLASS];
 142   uint32_t last_class_char;
 143   uint32_t class256_collection[256];
 144   uint32_t *class_collection;
 145   size_t class_collection_max;
 146   size_t class_collection_act;
 147   uint32_t class_done;
 148   uint32_t class_offset;
 149
 150   struct charseq **mbdigits;
 151   size_t mbdigits_act;
 152   size_t mbdigits_max;
 153   uint32_t *wcdigits;
 154   size_t wcdigits_act;
 155   size_t wcdigits_max;
 156
 157   struct charseq *mboutdigits[10];
 158   uint32_t wcoutdigits[10];
 159   size_t outdigits_act;
 160
 161   /* If the following number ever turns out to be too small simply
 162      increase it.  But I doubt it will.  --drepper@gnu */
 163 #define MAX_NR_CHARMAP 16
 164   const char *mapnames[MAX_NR_CHARMAP];
 165   uint32_t *map_collection[MAX_NR_CHARMAP];
 166   uint32_t map256_collection[2][256];
 167   size_t map_collection_max[MAX_NR_CHARMAP];
 168   size_t map_collection_act[MAX_NR_CHARMAP];
 169   size_t map_collection_nr;
 170   size_t last_map_idx;
 171   int tomap_done[MAX_NR_CHARMAP];
 172   uint32_t map_offset;
 173
 174   /* Transliteration information.  */
 175   struct translit_include_t *translit_include;
 176   struct translit_t *translit;
 177   struct translit_ignore_t *translit_ignore;
 178   uint32_t ntranslit_ignore;
 179
 180   uint32_t *default_missing;
 181   const char *default_missing_file;
 182   size_t default_missing_lineno;
 183
 184   /* The arrays for the binary representation.  */
 185   char_class_t *ctype_b;
 186   char_class32_t *ctype32_b;
 187   uint32_t **map_b;
 188   uint32_t **map32_b;
 189   uint32_t **class_b;
 190   struct iovec *class_3level;
 191   struct iovec *map_3level;
 192   uint32_t *class_name_ptr;
 193   uint32_t *map_name_ptr;
 194   struct iovec width;
 195   uint32_t mb_cur_max;
 196   const char *codeset_name;
 197   uint32_t *translit_from_idx;
 198   uint32_t *translit_from_tbl;
 199   uint32_t *translit_to_idx;
 200   uint32_t *translit_to_tbl;
 201   uint32_t translit_idx_size;
 202   size_t translit_from_tbl_size;
 203   size_t translit_to_tbl_size;
 204
 205   struct obstack mempool;
 206 };
 207
 208
 209 /* Marker for an empty slot.  This has the value 0xFFFFFFFF, regardless
 210    whether 'int' is 16 bit, 32 bit, or 64 bit.  */
 211 #define EMPTY ((uint32_t) ~0)
 212
 213
 214 #define obstack_chunk_alloc xmalloc
 215 #define obstack_chunk_free free
 216
 217
 218 /* Prototypes for local functions.  */
 219 static void ctype_startup (struct linereader *lr, struct localedef_t *locale,
 220                            const struct charmap_t *charmap,
 221                            struct localedef_t *copy_locale,
 222                            int ignore_content);
 223 static void ctype_class_new (struct linereader *lr,
 224                              struct locale_ctype_t *ctype, const char *name);
 225 static void ctype_map_new (struct linereader *lr,
 226                            struct locale_ctype_t *ctype,
 227                            const char *name, const struct charmap_t *charmap);
 228 static uint32_t *find_idx (struct locale_ctype_t *ctype, uint32_t **table,
 229                            size_t *max, size_t *act, unsigned int idx);
 230 static void set_class_defaults (struct locale_ctype_t *ctype,
 231                                 const struct charmap_t *charmap,
 232                                 struct repertoire_t *repertoire);
 233 static void allocate_arrays (struct locale_ctype_t *ctype,
 234                              const struct charmap_t *charmap,
 235                              struct repertoire_t *repertoire);
 236
 237
 238 static const char *longnames[] =
 239 {
 240   "zero", "one", "two", "three", "four",
 241   "five", "six", "seven", "eight", "nine"
 242 };
 243 static const char *uninames[] =
 244 {
 245   "U00000030", "U00000031", "U00000032", "U00000033", "U00000034",
 246   "U00000035", "U00000036", "U00000037", "U00000038", "U00000039"
 247 };
 248 static const unsigned char digits[] = "0123456789";
 249
 250
 251 static void
 252 ctype_startup (struct linereader *lr, struct localedef_t *locale,
 253                const struct charmap_t *charmap,
 254                struct localedef_t *copy_locale, int ignore_content)
 255 {
 256   unsigned int cnt;
 257   struct locale_ctype_t *ctype;
 258
 259   if (!ignore_content && locale->categories[LC_CTYPE].ctype == NULL)
 260     {
 261       if (copy_locale == NULL)
 262         {
 263           /* Allocate the needed room.  */
 264           locale->categories[LC_CTYPE].ctype = ctype =
 265             (struct locale_ctype_t *) xcalloc (1,
 266                                                sizeof (struct locale_ctype_t));
 267
 268           /* We have seen no names yet.  */
 269           ctype->charnames_max = charmap->mb_cur_max == 1 ? 256 : 512;
 270           ctype->charnames =
 271             (unsigned int *) xmalloc (ctype->charnames_max
 272                                       * sizeof (unsigned int));
 273           for (cnt = 0; cnt < 256; ++cnt)
 274             ctype->charnames[cnt] = cnt;
 275           ctype->charnames_act = 256;
 276           idx_table_init (&ctype->charnames_idx);
 277
 278           /* Fill character class information.  */
 279           ctype->last_class_char = ILLEGAL_CHAR_VALUE;
 280           /* The order of the following instructions determines the bit
 281              positions!  */
 282           ctype_class_new (lr, ctype, "upper");
 283           ctype_class_new (lr, ctype, "lower");
 284           ctype_class_new (lr, ctype, "alpha");
 285           ctype_class_new (lr, ctype, "digit");
 286           ctype_class_new (lr, ctype, "xdigit");
 287           ctype_class_new (lr, ctype, "space");
 288           ctype_class_new (lr, ctype, "print");
 289           ctype_class_new (lr, ctype, "graph");
 290           ctype_class_new (lr, ctype, "blank");
 291           ctype_class_new (lr, ctype, "cntrl");
 292           ctype_class_new (lr, ctype, "punct");
 293           ctype_class_new (lr, ctype, "alnum");
 294 #ifdef PREDEFINED_CLASSES
 295           /* The following are extensions from ISO 14652.  */
 296           ctype_class_new (lr, ctype, "left_to_right");
 297           ctype_class_new (lr, ctype, "right_to_left");
 298           ctype_class_new (lr, ctype, "num_terminator");
 299           ctype_class_new (lr, ctype, "num_separator");
 300           ctype_class_new (lr, ctype, "segment_separator");
 301           ctype_class_new (lr, ctype, "block_separator");
 302           ctype_class_new (lr, ctype, "direction_control");
 303           ctype_class_new (lr, ctype, "sym_swap_layout");
 304           ctype_class_new (lr, ctype, "char_shape_selector");
 305           ctype_class_new (lr, ctype, "num_shape_selector");
 306           ctype_class_new (lr, ctype, "non_spacing");
 307           ctype_class_new (lr, ctype, "non_spacing_level3");
 308           ctype_class_new (lr, ctype, "normal_connect");
 309           ctype_class_new (lr, ctype, "r_connect");
 310           ctype_class_new (lr, ctype, "no_connect");
 311           ctype_class_new (lr, ctype, "no_connect-space");
 312           ctype_class_new (lr, ctype, "vowel_connect");
 313 #endif
 314
 315           ctype->class_collection_max = charmap->mb_cur_max == 1 ? 256 : 512;
 316           ctype->class_collection
 317             = (uint32_t *) xcalloc (sizeof (unsigned long int),
 318                                     ctype->class_collection_max);
 319           ctype->class_collection_act = 256;
 320
 321           /* Fill character map information.  */
 322           ctype->last_map_idx = MAX_NR_CHARMAP;
 323           ctype_map_new (lr, ctype, "toupper", charmap);
 324           ctype_map_new (lr, ctype, "tolower", charmap);
 325 #ifdef PREDEFINED_CLASSES
 326           ctype_map_new (lr, ctype, "tosymmetric", charmap);
 327 #endif
 328
 329           /* Fill first 256 entries in `toXXX' arrays.  */
 330           for (cnt = 0; cnt < 256; ++cnt)
 331             {
 332               ctype->map_collection[0][cnt] = cnt;
 333               ctype->map_collection[1][cnt] = cnt;
 334 #ifdef PREDEFINED_CLASSES
 335               ctype->map_collection[2][cnt] = cnt;
 336 #endif
 337               ctype->map256_collection[0][cnt] = cnt;
 338               ctype->map256_collection[1][cnt] = cnt;
 339             }
 340
 341           obstack_init (&ctype->mempool);
 342         }
 343       else
 344         ctype = locale->categories[LC_CTYPE].ctype =
 345           copy_locale->categories[LC_CTYPE].ctype;
 346     }
 347 }
 348
 349
 350 void
 351 ctype_finish (struct localedef_t *locale, const struct charmap_t *charmap)
 352 {
 353   /* See POSIX.2, table 2-6 for the meaning of the following table.  */
 354 #define NCLASS 12
 355   static const struct
 356   {
 357     const char *name;
 358     const char allow[NCLASS];
 359   }
 360   valid_table[NCLASS] =
 361   {
 362     /* The order is important.  See token.h for more information.
 363        M = Always, D = Default, - = Permitted, X = Mutually exclusive  */
 364     { "upper",  "--MX-XDDXXX-" },
 365     { "lower",  "--MX-XDDXXX-" },
 366     { "alpha",  "---X-XDDXXX-" },
 367     { "digit",  "XXX--XDDXXX-" },
 368     { "xdigit", "-----XDDXXX-" },
 369     { "space",  "XXXXX------X" },
 370     { "print",  "---------X--" },
 371     { "graph",  "---------X--" },
 372     { "blank",  "XXXXXM-----X" },
 373     { "cntrl",  "XXXXX-XX--XX" },
 374     { "punct",  "XXXXX-DD-X-X" },
 375     { "alnum",  "-----XDDXXX-" }
 376   };
 377   size_t cnt;
 378   int cls1, cls2;
 379   uint32_t space_value;
 380   struct charseq *space_seq;
 381   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 382   int warned;
 383   const void *key;
 384   size_t len;
 385   void *vdata;
 386   void *curs;
 387
 388   /* Now resolve copying and also handle completely missing definitions.  */
 389   if (ctype == NULL)
 390     {
 391       const char *repertoire_name;
 392
 393       /* First see whether we were supposed to copy.  If yes, find the
 394          actual definition.  */
 395       if (locale->copy_name[LC_CTYPE] != NULL)
 396         {
 397           /* Find the copying locale.  This has to happen transitively since
 398              the locale we are copying from might also copying another one.  */
 399           struct localedef_t *from = locale;
 400
 401           do
 402             from = find_locale (LC_CTYPE, from->copy_name[LC_CTYPE],
 403                                 from->repertoire_name, charmap);
 404           while (from->categories[LC_CTYPE].ctype == NULL
 405                  && from->copy_name[LC_CTYPE] != NULL);
 406
 407           ctype = locale->categories[LC_CTYPE].ctype
 408             = from->categories[LC_CTYPE].ctype;
 409         }
 410
 411       /* If there is still no definition issue an warning and create an
 412          empty one.  */
 413       if (ctype == NULL)
 414         {
 415           if (! be_quiet)
 416             WITH_CUR_LOCALE (error (0, 0, _("\
 417 No definition for %s category found"), "LC_CTYPE"));
 418           ctype_startup (NULL, locale, charmap, NULL, 0);
 419           ctype = locale->categories[LC_CTYPE].ctype;
 420         }
 421
 422       /* Get the repertoire we have to use.  */
 423       repertoire_name = locale->repertoire_name ?: repertoire_global;
 424       if (repertoire_name != NULL)
 425         ctype->repertoire = repertoire_read (repertoire_name);
 426     }
 427
 428   /* We need the name of the currently used 8-bit character set to
 429      make correct conversion between this 8-bit representation and the
 430      ISO 10646 character set used internally for wide characters.  */
 431   ctype->codeset_name = charmap->code_set_name;
 432   if (ctype->codeset_name == NULL)
 433     {
 434       if (! be_quiet)
 435         WITH_CUR_LOCALE (error (0, 0, _("\
 436 No character set name specified in charmap")));
 437       ctype->codeset_name = "//UNKNOWN//";
 438     }
 439
 440   /* Set default value for classes not specified.  */
 441   set_class_defaults (ctype, charmap, ctype->repertoire);
 442
 443   /* Check according to table.  */
 444   for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
 445     {
 446       uint32_t tmp = ctype->class_collection[cnt];
 447
 448       if (tmp != 0)
 449         {
 450           for (cls1 = 0; cls1 < NCLASS; ++cls1)
 451             if ((tmp & _ISwbit (cls1)) != 0)
 452               for (cls2 = 0; cls2 < NCLASS; ++cls2)
 453                 if (valid_table[cls1].allow[cls2] != '-')
 454                   {
 455                     int eq = (tmp & _ISwbit (cls2)) != 0;
 456                     switch (valid_table[cls1].allow[cls2])
 457                       {
 458                       case 'M':
 459                         if (!eq)
 460                           {
 461                             uint32_t value = ctype->charnames[cnt];
 462
 463                             if (!be_quiet)
 464                               WITH_CUR_LOCALE (error (0, 0, _("\
 465 character L'\\u%0*x' in class `%s' must be in class `%s'"),
 466                                                       value > 0xffff ? 8 : 4,
 467                                                       value,
 468                                                       valid_table[cls1].name,
 469                                                       valid_table[cls2].name));
 470                           }
 471                         break;
 472
 473                       case 'X':
 474                         if (eq)
 475                           {
 476                             uint32_t value = ctype->charnames[cnt];
 477
 478                             if (!be_quiet)
 479                               WITH_CUR_LOCALE (error (0, 0, _("\
 480 character L'\\u%0*x' in class `%s' must not be in class `%s'"),
 481                                                       value > 0xffff ? 8 : 4,
 482                                                       value,
 483                                                       valid_table[cls1].name,
 484                                                       valid_table[cls2].name));
 485                           }
 486                         break;
 487
 488                       case 'D':
 489                         ctype->class_collection[cnt] |= _ISwbit (cls2);
 490                         break;
 491
 492                       default:
 493                         WITH_CUR_LOCALE (error (5, 0, _("\
 494 internal error in %s, line %u"), __FUNCTION__, __LINE__));
 495                       }
 496                   }
 497         }
 498     }
 499
 500   for (cnt = 0; cnt < 256; ++cnt)
 501     {
 502       uint32_t tmp = ctype->class256_collection[cnt];
 503
 504       if (tmp != 0)
 505         {
 506           for (cls1 = 0; cls1 < NCLASS; ++cls1)
 507             if ((tmp & _ISbit (cls1)) != 0)
 508               for (cls2 = 0; cls2 < NCLASS; ++cls2)
 509                 if (valid_table[cls1].allow[cls2] != '-')
 510                   {
 511                     int eq = (tmp & _ISbit (cls2)) != 0;
 512                     switch (valid_table[cls1].allow[cls2])
 513                       {
 514                       case 'M':
 515                         if (!eq)
 516                           {
 517                             char buf[17];
 518
 519                             snprintf (buf, sizeof buf, "\\%Zo", cnt);
 520
 521                             if (!be_quiet)
 522                               WITH_CUR_LOCALE (error (0, 0, _("\
 523 character '%s' in class `%s' must be in class `%s'"),
 524                                                       buf,
 525                                                       valid_table[cls1].name,
 526                                                       valid_table[cls2].name));
 527                           }
 528                         break;
 529
 530                       case 'X':
 531                         if (eq)
 532                           {
 533                             char buf[17];
 534
 535                             snprintf (buf, sizeof buf, "\\%Zo", cnt);
 536
 537                             if (!be_quiet)
 538                               WITH_CUR_LOCALE (error (0, 0, _("\
 539 character '%s' in class `%s' must not be in class `%s'"),
 540                                                       buf,
 541                                                       valid_table[cls1].name,
 542                                                       valid_table[cls2].name));
 543                           }
 544                         break;
 545
 546                       case 'D':
 547                         ctype->class256_collection[cnt] |= _ISbit (cls2);
 548                         break;
 549
 550                       default:
 551                         WITH_CUR_LOCALE (error (5, 0, _("\
 552 internal error in %s, line %u"), __FUNCTION__, __LINE__));
 553                       }
 554                   }
 555         }
 556     }
 557
 558   /* ... and now test <SP> as a special case.  */
 559   space_value = 32;
 560   if (((cnt = BITPOS (tok_space),
 561         (ELEM (ctype, class_collection, , space_value)
 562          & BITw (tok_space)) == 0)
 563        || (cnt = BITPOS (tok_blank),
 564            (ELEM (ctype, class_collection, , space_value)
 565             & BITw (tok_blank)) == 0)))
 566     {
 567       if (!be_quiet)
 568         WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"),
 569                                 valid_table[cnt].name));
 570     }
 571   else if (((cnt = BITPOS (tok_punct),
 572              (ELEM (ctype, class_collection, , space_value)
 573               & BITw (tok_punct)) != 0)
 574             || (cnt = BITPOS (tok_graph),
 575                 (ELEM (ctype, class_collection, , space_value)
 576                  & BITw (tok_graph))
 577                 != 0)))
 578     {
 579       if (!be_quiet)
 580         WITH_CUR_LOCALE (error (0, 0, _("\
 581 <SP> character must not be in class `%s'"),
 582                                 valid_table[cnt].name));
 583     }
 584   else
 585     ELEM (ctype, class_collection, , space_value) |= BITw (tok_print);
 586
 587   space_seq = charmap_find_value (charmap, "SP", 2);
 588   if (space_seq == NULL)
 589     space_seq = charmap_find_value (charmap, "space", 5);
 590   if (space_seq == NULL)
 591     space_seq = charmap_find_value (charmap, "U00000020", 9);
 592   if (space_seq == NULL || space_seq->nbytes != 1)
 593     {
 594       if (!be_quiet)
 595         WITH_CUR_LOCALE (error (0, 0, _("\
 596 character <SP> not defined in character map")));
 597     }
 598   else if (((cnt = BITPOS (tok_space),
 599              (ctype->class256_collection[space_seq->bytes[0]]
 600               & BIT (tok_space)) == 0)
 601             || (cnt = BITPOS (tok_blank),
 602                 (ctype->class256_collection[space_seq->bytes[0]]
 603                  & BIT (tok_blank)) == 0)))
 604     {
 605       if (!be_quiet)
 606         WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"),
 607                                 valid_table[cnt].name));
 608     }
 609   else if (((cnt = BITPOS (tok_punct),
 610              (ctype->class256_collection[space_seq->bytes[0]]
 611               & BIT (tok_punct)) != 0)
 612             || (cnt = BITPOS (tok_graph),
 613                 (ctype->class256_collection[space_seq->bytes[0]]
 614                  & BIT (tok_graph)) != 0)))
 615     {
 616       if (!be_quiet)
 617         WITH_CUR_LOCALE (error (0, 0, _("\
 618 <SP> character must not be in class `%s'"),
 619                                 valid_table[cnt].name));
 620     }
 621   else
 622     ctype->class256_collection[space_seq->bytes[0]] |= BIT (tok_print);
 623
 624   /* Now that the tests are done make sure the name array contains all
 625      characters which are handled in the WIDTH section of the
 626      character set definition file.  */
 627   if (charmap->width_rules != NULL)
 628     for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
 629       {
 630         unsigned char bytes[charmap->mb_cur_max];
 631         int nbytes = charmap->width_rules[cnt].from->nbytes;
 632
 633         /* We have the range of character for which the width is
 634            specified described using byte sequences of the multibyte
 635            charset.  We have to convert this to UCS4 now.  And we
 636            cannot simply convert the beginning and the end of the
 637            sequence, we have to iterate over the byte sequence and
 638            convert it for every single character.  */
 639         memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
 640
 641         while (nbytes < charmap->width_rules[cnt].to->nbytes
 642                || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
 643                           nbytes) <= 0)
 644           {
 645             /* Find the UCS value for `bytes'.  */
 646             int inner;
 647             uint32_t wch;
 648             struct charseq *seq = charmap_find_symbol (charmap, bytes, nbytes);
 649
 650             if (seq == NULL)
 651               wch = ILLEGAL_CHAR_VALUE;
 652             else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
 653               wch = seq->ucs4;
 654             else
 655               wch = repertoire_find_value (ctype->repertoire, seq->name,
 656                                            strlen (seq->name));
 657
 658             if (wch != ILLEGAL_CHAR_VALUE)
 659               /* We are only interested in the side-effects of the
 660                  `find_idx' call.  It will add appropriate entries in
 661                  the name array if this is necessary.  */
 662               (void) find_idx (ctype, NULL, NULL, NULL, wch);
 663
 664             /* "Increment" the bytes sequence.  */
 665             inner = nbytes - 1;
 666             while (inner >= 0 && bytes[inner] == 0xff)
 667               --inner;
 668
 669             if (inner < 0)
 670               {
 671                 /* We have to extend the byte sequence.  */
 672                 if (nbytes >= charmap->width_rules[cnt].to->nbytes)
 673                   break;
 674
 675                 bytes[0] = 1;
 676                 memset (&bytes[1], 0, nbytes);
 677                 ++nbytes;
 678               }
 679             else
 680               {
 681                 ++bytes[inner];
 682                 while (++inner < nbytes)
 683                   bytes[inner] = 0;
 684               }
 685           }
 686       }
 687
 688   /* Now set all the other characters of the character set to the
 689      default width.  */
 690   curs = NULL;
 691   while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
 692     {
 693       struct charseq *data = (struct charseq *) vdata;
 694
 695       if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
 696         data->ucs4 = repertoire_find_value (ctype->repertoire,
 697                                             data->name, len);
 698
 699       if (data->ucs4 != ILLEGAL_CHAR_VALUE)
 700         (void) find_idx (ctype, NULL, NULL, NULL, data->ucs4);
 701     }
 702
 703   /* There must be a multiple of 10 digits.  */
 704   if (ctype->mbdigits_act % 10 != 0)
 705     {
 706       assert (ctype->mbdigits_act == ctype->wcdigits_act);
 707       ctype->wcdigits_act -= ctype->mbdigits_act % 10;
 708       ctype->mbdigits_act -= ctype->mbdigits_act % 10;
 709       WITH_CUR_LOCALE (error (0, 0, _("\
 710 `digit' category has not entries in groups of ten")));
 711     }
 712
 713   /* Check the input digits.  There must be a multiple of ten available.
 714      In each group it could be that one or the other character is missing.
 715      In this case the whole group must be removed.  */
 716   cnt = 0;
 717   while (cnt < ctype->mbdigits_act)
 718     {
 719       size_t inner;
 720       for (inner = 0; inner < 10; ++inner)
 721         if (ctype->mbdigits[cnt + inner] == NULL)
 722           break;
 723
 724       if (inner == 10)
 725         cnt += 10;
 726       else
 727         {
 728           /* Remove the group.  */
 729           memmove (&ctype->mbdigits[cnt], &ctype->mbdigits[cnt + 10],
 730                    ((ctype->wcdigits_act - cnt - 10)
 731                     * sizeof (ctype->mbdigits[0])));
 732           ctype->mbdigits_act -= 10;
 733         }
 734     }
 735
 736   /* If no input digits are given use the default.  */
 737   if (ctype->mbdigits_act == 0)
 738     {
 739       if (ctype->mbdigits_max == 0)
 740         {
 741           ctype->mbdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
 742                                            10 * sizeof (struct charseq *));
 743           ctype->mbdigits_max = 10;
 744         }
 745
 746       for (cnt = 0; cnt < 10; ++cnt)
 747         {
 748           ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
 749                                                       digits + cnt, 1);
 750           if (ctype->mbdigits[cnt] == NULL)
 751             {
 752               ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
 753                                                           longnames[cnt],
 754                                                           strlen (longnames[cnt]));
 755               if (ctype->mbdigits[cnt] == NULL)
 756                 {
 757                   /* Hum, this ain't good.  */
 758                   WITH_CUR_LOCALE (error (0, 0, _("\
 759 no input digits defined and none of the standard names in the charmap")));
 760
 761                   ctype->mbdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
 762                                                         sizeof (struct charseq) + 1);
 763
 764                   /* This is better than nothing.  */
 765                   ctype->mbdigits[cnt]->bytes[0] = digits[cnt];
 766                   ctype->mbdigits[cnt]->nbytes = 1;
 767                 }
 768             }
 769         }
 770
 771       ctype->mbdigits_act = 10;
 772     }
 773
 774   /* Check the wide character input digits.  There must be a multiple
 775      of ten available.  In each group it could be that one or the other
 776      character is missing.  In this case the whole group must be
 777      removed.  */
 778   cnt = 0;
 779   while (cnt < ctype->wcdigits_act)
 780     {
 781       size_t inner;
 782       for (inner = 0; inner < 10; ++inner)
 783         if (ctype->wcdigits[cnt + inner] == ILLEGAL_CHAR_VALUE)
 784           break;
 785
 786       if (inner == 10)
 787         cnt += 10;
 788       else
 789         {
 790           /* Remove the group.  */
 791           memmove (&ctype->wcdigits[cnt], &ctype->wcdigits[cnt + 10],
 792                    ((ctype->wcdigits_act - cnt - 10)
 793                     * sizeof (ctype->wcdigits[0])));
 794           ctype->wcdigits_act -= 10;
 795         }
 796     }
 797
 798   /* If no input digits are given use the default.  */
 799   if (ctype->wcdigits_act == 0)
 800     {
 801       if (ctype->wcdigits_max == 0)
 802         {
 803           ctype->wcdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
 804                                            10 * sizeof (uint32_t));
 805           ctype->wcdigits_max = 10;
 806         }
 807
 808       for (cnt = 0; cnt < 10; ++cnt)
 809         ctype->wcdigits[cnt] = L'0' + cnt;
 810
 811       ctype->mbdigits_act = 10;
 812     }
 813
 814   /* Check the outdigits.  */
 815   warned = 0;
 816   for (cnt = 0; cnt < 10; ++cnt)
 817     if (ctype->mboutdigits[cnt] == NULL)
 818       {
 819         static struct charseq replace[2];
 820
 821         if (!warned)
 822           {
 823             WITH_CUR_LOCALE (error (0, 0, _("\
 824 not all characters used in `outdigit' are available in the charmap")));
 825             warned = 1;
 826           }
 827
 828         replace[0].nbytes = 1;
 829         replace[0].bytes[0] = '?';
 830         replace[0].bytes[1] = '\0';
 831         ctype->mboutdigits[cnt] = &replace[0];
 832       }
 833
 834   warned = 0;
 835   for (cnt = 0; cnt < 10; ++cnt)
 836     if (ctype->wcoutdigits[cnt] == 0)
 837       {
 838         if (!warned)
 839           {
 840             WITH_CUR_LOCALE (error (0, 0, _("\
 841 not all characters used in `outdigit' are available in the repertoire")));
 842             warned = 1;
 843           }
 844
 845         ctype->wcoutdigits[cnt] = L'?';
 846       }
 847
 848   /* Sort the entries in the translit_ignore list.  */
 849   if (ctype->translit_ignore != NULL)
 850     {
 851       struct translit_ignore_t *firstp = ctype->translit_ignore;
 852       struct translit_ignore_t *runp;
 853
 854       ctype->ntranslit_ignore = 1;
 855
 856       for (runp = firstp->next; runp != NULL; runp = runp->next)
 857         {
 858           struct translit_ignore_t *lastp = NULL;
 859           struct translit_ignore_t *cmpp;
 860
 861           ++ctype->ntranslit_ignore;
 862
 863           for (cmpp = firstp; cmpp != NULL; lastp = cmpp, cmpp = cmpp->next)
 864             if (runp->from < cmpp->from)
 865               break;
 866
 867           runp->next = lastp;
 868           if (lastp == NULL)
 869             firstp = runp;
 870         }
 871
 872       ctype->translit_ignore = firstp;
 873     }
 874 }
 875
 876
 877 void
 878 ctype_output (struct localedef_t *locale, const struct charmap_t *charmap,
 879               const char *output_path)
 880 {
 881   static const char nulbytes[4] = { 0, 0, 0, 0 };
 882   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 883   const size_t nelems = (_NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1)
 884                          + ctype->nr_charclass + ctype->map_collection_nr);
 885   struct iovec iov[2 + nelems + 2 * ctype->nr_charclass
 886                   + ctype->map_collection_nr + 4];
 887   struct locale_file data;
 888   uint32_t idx[nelems + 1];
 889   uint32_t default_missing_len;
 890   size_t elem, cnt, offset, total;
 891   char *cp;
 892
 893   /* Now prepare the output: Find the sizes of the table we can use.  */
 894   allocate_arrays (ctype, charmap, ctype->repertoire);
 895
 896   data.magic = LIMAGIC (LC_CTYPE);
 897   data.n = nelems;
 898   iov[0].iov_base = (void *) &data;
 899   iov[0].iov_len = sizeof (data);
 900
 901   iov[1].iov_base = (void *) idx;
 902   iov[1].iov_len = nelems * sizeof (uint32_t);
 903
 904   idx[0] = iov[0].iov_len + iov[1].iov_len;
 905   offset = 0;
 906
 907   for (elem = 0; elem < nelems; ++elem)
 908     {
 909       if (elem < _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1))
 910         switch (elem)
 911           {
 912 #define CTYPE_EMPTY(name) \
 913           case name:                                                          \
 914             iov[2 + elem + offset].iov_base = NULL;                           \
 915             iov[2 + elem + offset].iov_len = 0;                               \
 916             idx[elem + 1] = idx[elem];                                        \
 917             break
 918
 919           CTYPE_EMPTY(_NL_CTYPE_GAP1);
 920           CTYPE_EMPTY(_NL_CTYPE_GAP2);
 921           CTYPE_EMPTY(_NL_CTYPE_GAP3);
 922           CTYPE_EMPTY(_NL_CTYPE_GAP4);
 923           CTYPE_EMPTY(_NL_CTYPE_GAP5);
 924           CTYPE_EMPTY(_NL_CTYPE_GAP6);
 925
 926 #define CTYPE_DATA(name, base, len)                                           \
 927           case _NL_ITEM_INDEX (name):                                         \
 928             iov[2 + elem + offset].iov_base = (base);                         \
 929             iov[2 + elem + offset].iov_len = (len);                           \
 930             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;       \
 931             break
 932
 933           CTYPE_DATA (_NL_CTYPE_CLASS,
 934                       ctype->ctype_b,
 935                       (256 + 128) * sizeof (char_class_t));
 936
 937           CTYPE_DATA (_NL_CTYPE_TOUPPER,
 938                       ctype->map_b[0],
 939                       (256 + 128) * sizeof (uint32_t));
 940           CTYPE_DATA (_NL_CTYPE_TOLOWER,
 941                       ctype->map_b[1],
 942                       (256 + 128) * sizeof (uint32_t));
 943
 944           CTYPE_DATA (_NL_CTYPE_TOUPPER32,
 945                       ctype->map32_b[0],
 946                       256 * sizeof (uint32_t));
 947           CTYPE_DATA (_NL_CTYPE_TOLOWER32,
 948                       ctype->map32_b[1],
 949                       256 * sizeof (uint32_t));
 950
 951           CTYPE_DATA (_NL_CTYPE_CLASS32,
 952                       ctype->ctype32_b,
 953                       256 * sizeof (char_class32_t));
 954
 955           CTYPE_DATA (_NL_CTYPE_CLASS_OFFSET,
 956                       &ctype->class_offset, sizeof (uint32_t));
 957
 958           CTYPE_DATA (_NL_CTYPE_MAP_OFFSET,
 959                       &ctype->map_offset, sizeof (uint32_t));
 960
 961           CTYPE_DATA (_NL_CTYPE_TRANSLIT_TAB_SIZE,
 962                       &ctype->translit_idx_size, sizeof (uint32_t));
 963
 964           CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_IDX,
 965                       ctype->translit_from_idx,
 966                       ctype->translit_idx_size * sizeof (uint32_t));
 967
 968           CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_TBL,
 969                       ctype->translit_from_tbl,
 970                       ctype->translit_from_tbl_size);
 971
 972           CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_IDX,
 973                       ctype->translit_to_idx,
 974                       ctype->translit_idx_size * sizeof (uint32_t));
 975
 976           CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_TBL,
 977                       ctype->translit_to_tbl, ctype->translit_to_tbl_size);
 978
 979           case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
 980             /* The class name array.  */
 981             total = 0;
 982             for (cnt = 0; cnt < ctype->nr_charclass; ++cnt, ++offset)
 983               {
 984                 iov[2 + elem + offset].iov_base
 985                   = (void *) ctype->classnames[cnt];
 986                 iov[2 + elem + offset].iov_len
 987                   = strlen (ctype->classnames[cnt]) + 1;
 988                 total += iov[2 + elem + offset].iov_len;
 989               }
 990             iov[2 + elem + offset].iov_base = (void *) nulbytes;
 991             iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
 992             total += 1 + (4 - ((total + 1) % 4));
 993
 994             idx[elem + 1] = idx[elem] + total;
 995             break;
 996
 997           case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
 998             /* The class name array.  */
 999             total = 0;
1000             for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt, ++offset)
1001               {
1002                 iov[2 + elem + offset].iov_base
1003                   = (void *) ctype->mapnames[cnt];
1004                 iov[2 + elem + offset].iov_len
1005                   = strlen (ctype->mapnames[cnt]) + 1;
1006                 total += iov[2 + elem + offset].iov_len;
1007               }
1008             iov[2 + elem + offset].iov_base = (void *) nulbytes;
1009             iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
1010             total += 1 + (4 - ((total + 1) % 4));
1011
1012             idx[elem + 1] = idx[elem] + total;
1013             break;
1014
1015           CTYPE_DATA (_NL_CTYPE_WIDTH,
1016                       ctype->width.iov_base,
1017                       ctype->width.iov_len);
1018
1019           CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX,
1020                       &ctype->mb_cur_max, sizeof (uint32_t));
1021
1022           case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
1023             total = strlen (ctype->codeset_name) + 1;
1024             if (total % 4 == 0)
1025               iov[2 + elem + offset].iov_base = (char *) ctype->codeset_name;
1026             else
1027               {
1028                 iov[2 + elem + offset].iov_base = alloca ((total + 3) & ~3);
1029                 memset (mempcpy (iov[2 + elem + offset].iov_base,
1030                                  ctype->codeset_name, total),
1031                         '\0', 4 - (total & 3));
1032                 total = (total + 3) & ~3;
1033               }
1034             iov[2 + elem + offset].iov_len = total;
1035             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1036             break;
1037
1038           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN):
1039             iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
1040             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1041             *(uint32_t *) iov[2 + elem + offset].iov_base =
1042               ctype->mbdigits_act / 10;
1043             idx[elem + 1] = idx[elem] + sizeof (uint32_t);
1044             break;
1045
1046           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN):
1047             /* Align entries.  */
1048             iov[2 + elem + offset].iov_base = (void *) nulbytes;
1049             iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1050             idx[elem] += iov[2 + elem + offset].iov_len;
1051             ++offset;
1052
1053             iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
1054             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1055             *(uint32_t *) iov[2 + elem + offset].iov_base =
1056               ctype->wcdigits_act / 10;
1057             idx[elem + 1] = idx[elem] + sizeof (uint32_t);
1058             break;
1059
1060           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB):
1061             /* Compute the length of all possible characters.  For INDIGITS
1062                there might be more than one.  We simply concatenate all of
1063                them with a NUL byte following.  The NUL byte wouldn't be
1064                necessary but it makes it easier for the user.  */
1065             total = 0;
1066
1067             for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
1068                  cnt < ctype->mbdigits_act; cnt += 10)
1069               total += ctype->mbdigits[cnt]->nbytes + 1;
1070             iov[2 + elem + offset].iov_base = (char *) alloca (total);
1071             iov[2 + elem + offset].iov_len = total;
1072
1073             cp = iov[2 + elem + offset].iov_base;
1074             for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
1075                  cnt < ctype->mbdigits_act; cnt += 10)
1076               {
1077                 cp = mempcpy (cp, ctype->mbdigits[cnt]->bytes,
1078                               ctype->mbdigits[cnt]->nbytes);
1079                 *cp++ = '\0';
1080               }
1081             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1082             break;
1083
1084           case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB):
1085             /* Compute the length of all possible characters.  For INDIGITS
1086                there might be more than one.  We simply concatenate all of
1087                them with a NUL byte following.  The NUL byte wouldn't be
1088                necessary but it makes it easier for the user.  */
1089             cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB);
1090             total = ctype->mboutdigits[cnt]->nbytes + 1;
1091             iov[2 + elem + offset].iov_base = (char *) alloca (total);
1092             iov[2 + elem + offset].iov_len = total;
1093
1094             *(char *) mempcpy (iov[2 + elem + offset].iov_base,
1095                                ctype->mboutdigits[cnt]->bytes,
1096                                ctype->mboutdigits[cnt]->nbytes) = '\0';
1097             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1098             break;
1099
1100           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC):
1101             total = ctype->wcdigits_act / 10;
1102
1103             iov[2 + elem + offset].iov_base =
1104               (uint32_t *) alloca (total * sizeof (uint32_t));
1105             iov[2 + elem + offset].iov_len = total * sizeof (uint32_t);
1106
1107             for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC);
1108                  cnt < ctype->wcdigits_act; cnt += 10)
1109               ((uint32_t *) iov[2 + elem + offset].iov_base)[cnt / 10]
1110                 = ctype->wcdigits[cnt];
1111             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1112             break;
1113
1114           case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC):
1115             /* Align entries.  */
1116             iov[2 + elem + offset].iov_base = (void *) nulbytes;
1117             iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1118             idx[elem] += iov[2 + elem + offset].iov_len;
1119             ++offset;
1120             /* FALLTRHOUGH */
1121
1122           case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT1_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC):
1123             cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC);
1124             iov[2 + elem + offset].iov_base = &ctype->wcoutdigits[cnt];
1125             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1126             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1127             break;
1128
1129           case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN):
1130             /* Align entries.  */
1131             iov[2 + elem + offset].iov_base = (void *) nulbytes;
1132             iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1133             idx[elem] += iov[2 + elem + offset].iov_len;
1134             ++offset;
1135
1136             default_missing_len = (ctype->default_missing
1137                                    ? wcslen ((wchar_t *)ctype->default_missing)
1138                                    : 0);
1139             iov[2 + elem + offset].iov_base = &default_missing_len;
1140             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1141             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1142             break;
1143
1144           case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING):
1145             iov[2 + elem + offset].iov_base =
1146               ctype->default_missing ?: (uint32_t *) L"";
1147             iov[2 + elem + offset].iov_len =
1148               wcslen (iov[2 + elem + offset].iov_base);
1149             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1150             break;
1151
1152           case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE_LEN):
1153             /* Align entries.  */
1154             iov[2 + elem + offset].iov_base = (void *) nulbytes;
1155             iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1156             idx[elem] += iov[2 + elem + offset].iov_len;
1157             ++offset;
1158
1159             iov[2 + elem + offset].iov_base = &ctype->ntranslit_ignore;
1160             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1161             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1162             break;
1163
1164           case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE):
1165             {
1166               uint32_t *ranges = (uint32_t *) alloca (ctype->ntranslit_ignore
1167                                                       * 3 * sizeof (uint32_t));
1168               struct translit_ignore_t *runp;
1169
1170               iov[2 + elem + offset].iov_base = ranges;
1171               iov[2 + elem + offset].iov_len = (ctype->ntranslit_ignore
1172                                                 * 3 * sizeof (uint32_t));
1173
1174               for (runp = ctype->translit_ignore; runp != NULL;
1175                    runp = runp->next)
1176                 {
1177                   *ranges++ = runp->from;
1178                   *ranges++ = runp->to;
1179                   *ranges++ = runp->step;
1180                 }
1181             }
1182             /* Remove the following line in case a new entry is added
1183                after _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN.  */
1184             if (elem < nelems)
1185               idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1186             break;
1187
1188           default:
1189             assert (! "unknown CTYPE element");
1190           }
1191       else
1192         {
1193           /* Handle extra maps.  */
1194           size_t nr = elem - _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
1195           if (nr < ctype->nr_charclass)
1196             {
1197               iov[2 + elem + offset].iov_base = ctype->class_b[nr];
1198               iov[2 + elem + offset].iov_len = 256 / 32 * sizeof (uint32_t);
1199               idx[elem] += iov[2 + elem + offset].iov_len;
1200               ++offset;
1201
1202               iov[2 + elem + offset] = ctype->class_3level[nr];
1203             }
1204           else
1205             {
1206               nr -= ctype->nr_charclass;
1207               assert (nr < ctype->map_collection_nr);
1208               iov[2 + elem + offset] = ctype->map_3level[nr];
1209             }
1210           idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1211         }
1212     }
1213
1214   assert (2 + elem + offset == (nelems + 2 * ctype->nr_charclass
1215                                 + ctype->map_collection_nr + 4 + 2));
1216
1217   write_locale_data (output_path, LC_CTYPE, "LC_CTYPE", 2 + elem + offset,
1218                      iov);
1219 }
1220
1221
1222 /* Local functions.  */
1223 static void
1224 ctype_class_new (struct linereader *lr, struct locale_ctype_t *ctype,
1225                  const char *name)
1226 {
1227   size_t cnt;
1228
1229   for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
1230     if (strcmp (ctype->classnames[cnt], name) == 0)
1231       break;
1232
1233   if (cnt < ctype->nr_charclass)
1234     {
1235       lr_error (lr, _("character class `%s' already defined"), name);
1236       return;
1237     }
1238
1239   if (ctype->nr_charclass == MAX_NR_CHARCLASS)
1240     /* Exit code 2 is prescribed in P1003.2b.  */
1241     WITH_CUR_LOCALE (error (2, 0, _("\
1242 implementation limit: no more than %Zd character classes allowed"),
1243                             MAX_NR_CHARCLASS));
1244
1245   ctype->classnames[ctype->nr_charclass++] = name;
1246 }
1247
1248
1249 static void
1250 ctype_map_new (struct linereader *lr, struct locale_ctype_t *ctype,
1251                const char *name, const struct charmap_t *charmap)
1252 {
1253   size_t max_chars = 0;
1254   size_t cnt;
1255
1256   for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
1257     {
1258       if (strcmp (ctype->mapnames[cnt], name) == 0)
1259         break;
1260
1261       if (max_chars < ctype->map_collection_max[cnt])
1262         max_chars = ctype->map_collection_max[cnt];
1263     }
1264
1265   if (cnt < ctype->map_collection_nr)
1266     {
1267       lr_error (lr, _("character map `%s' already defined"), name);
1268       return;
1269     }
1270
1271   if (ctype->map_collection_nr == MAX_NR_CHARMAP)
1272     /* Exit code 2 is prescribed in P1003.2b.  */
1273     WITH_CUR_LOCALE (error (2, 0, _("\
1274 implementation limit: no more than %d character maps allowed"),
1275                             MAX_NR_CHARMAP));
1276
1277   ctype->mapnames[cnt] = name;
1278
1279   if (max_chars == 0)
1280     ctype->map_collection_max[cnt] = charmap->mb_cur_max == 1 ? 256 : 512;
1281   else
1282     ctype->map_collection_max[cnt] = max_chars;
1283
1284   ctype->map_collection[cnt] = (uint32_t *)
1285     xcalloc (sizeof (uint32_t), ctype->map_collection_max[cnt]);
1286   ctype->map_collection_act[cnt] = 256;
1287
1288   ++ctype->map_collection_nr;
1289 }
1290
1291
1292 /* We have to be prepared that TABLE, MAX, and ACT can be NULL.  This
1293    is possible if we only want to extend the name array.  */
1294 static uint32_t *
1295 find_idx (struct locale_ctype_t *ctype, uint32_t **table, size_t *max,
1296           size_t *act, uint32_t idx)
1297 {
1298   size_t cnt;
1299
1300   if (idx < 256)
1301     return table == NULL ? NULL : &(*table)[idx];
1302
1303   /* Use the charnames_idx lookup table instead of the slow search loop.  */
1304 #if 1
1305   cnt = idx_table_get (&ctype->charnames_idx, idx);
1306   if (cnt == EMPTY)
1307     /* Not found.  */
1308     cnt = ctype->charnames_act;
1309 #else
1310   for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
1311     if (ctype->charnames[cnt] == idx)
1312       break;
1313 #endif
1314
1315   /* We have to distinguish two cases: the name is found or not.  */
1316   if (cnt == ctype->charnames_act)
1317     {
1318       /* Extend the name array.  */
1319       if (ctype->charnames_act == ctype->charnames_max)
1320         {
1321           ctype->charnames_max *= 2;
1322           ctype->charnames = (uint32_t *)
1323             xrealloc (ctype->charnames,
1324                       sizeof (uint32_t) * ctype->charnames_max);
1325         }
1326       ctype->charnames[ctype->charnames_act++] = idx;
1327       idx_table_add (&ctype->charnames_idx, idx, cnt);
1328     }
1329
1330   if (table == NULL)
1331     /* We have done everything we are asked to do.  */
1332     return NULL;
1333
1334   if (max == NULL)
1335     /* The caller does not want to extend the table.  */
1336     return (cnt >= *act ? NULL : &(*table)[cnt]);
1337
1338   if (cnt >= *act)
1339     {
1340       if (cnt >= *max)
1341         {
1342           size_t old_max = *max;
1343           do
1344             *max *= 2;
1345           while (*max <= cnt);
1346
1347           *table =
1348             (uint32_t *) xrealloc (*table, *max * sizeof (uint32_t));
1349           memset (&(*table)[old_max], '\0',
1350                   (*max - old_max) * sizeof (uint32_t));
1351         }
1352
1353       *act = cnt + 1;
1354     }
1355
1356   return &(*table)[cnt];
1357 }
1358
1359
1360 static int
1361 get_character (struct token *now, const struct charmap_t *charmap,
1362                struct repertoire_t *repertoire,
1363                struct charseq **seqp, uint32_t *wchp)
1364 {
1365   if (now->tok == tok_bsymbol)
1366     {
1367       /* This will hopefully be the normal case.  */
1368       *wchp = repertoire_find_value (repertoire, now->val.str.startmb,
1369                                      now->val.str.lenmb);
1370       *seqp = charmap_find_value (charmap, now->val.str.startmb,
1371                                   now->val.str.lenmb);
1372     }
1373   else if (now->tok == tok_ucs4)
1374     {
1375       char utmp[10];
1376
1377       snprintf (utmp, sizeof (utmp), "U%08X", now->val.ucs4);
1378       *seqp = charmap_find_value (charmap, utmp, 9);
1379
1380       if (*seqp == NULL)
1381         *seqp = repertoire_find_seq (repertoire, now->val.ucs4);
1382
1383       if (*seqp == NULL)
1384         {
1385           /* Compute the value in the charmap from the UCS value.  */
1386           const char *symbol = repertoire_find_symbol (repertoire,
1387                                                        now->val.ucs4);
1388
1389           if (symbol == NULL)
1390             *seqp = NULL;
1391           else
1392             *seqp = charmap_find_value (charmap, symbol, strlen (symbol));
1393
1394           if (*seqp == NULL)
1395             {
1396               if (repertoire != NULL)
1397                 {
1398                   /* Insert a negative entry.  */
1399                   static const struct charseq negative
1400                     = { .ucs4 = ILLEGAL_CHAR_VALUE };
1401                   uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1402                                                   sizeof (uint32_t));
1403                   *newp = now->val.ucs4;
1404
1405                   insert_entry (&repertoire->seq_table, newp,
1406                                 sizeof (uint32_t), (void *) &negative);
1407                 }
1408             }
1409           else
1410             (*seqp)->ucs4 = now->val.ucs4;
1411         }
1412       else if ((*seqp)->ucs4 != now->val.ucs4)
1413         *seqp = NULL;
1414
1415       *wchp = now->val.ucs4;
1416     }
1417   else if (now->tok == tok_charcode)
1418     {
1419       /* We must map from the byte code to UCS4.  */
1420       *seqp = charmap_find_symbol (charmap, now->val.str.startmb,
1421                                    now->val.str.lenmb);
1422
1423       if (*seqp == NULL)
1424         *wchp = ILLEGAL_CHAR_VALUE;
1425       else
1426         {
1427           if ((*seqp)->ucs4 == UNINITIALIZED_CHAR_VALUE)
1428             (*seqp)->ucs4 = repertoire_find_value (repertoire, (*seqp)->name,
1429                                                    strlen ((*seqp)->name));
1430           *wchp = (*seqp)->ucs4;
1431         }
1432     }
1433   else
1434     return 1;
1435
1436   return 0;
1437 }
1438
1439
1440 /* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and
1441    the .(2). counterparts.  */
1442 static void
1443 charclass_symbolic_ellipsis (struct linereader *ldfile,
1444                              struct locale_ctype_t *ctype,
1445                              const struct charmap_t *charmap,
1446                              struct repertoire_t *repertoire,
1447                              struct token *now,
1448                              const char *last_str,
1449                              unsigned long int class256_bit,
1450                              unsigned long int class_bit, int base,
1451                              int ignore_content, int handle_digits, int step)
1452 {
1453   const char *nowstr = now->val.str.startmb;
1454   char tmp[now->val.str.lenmb + 1];
1455   const char *cp;
1456   char *endp;
1457   unsigned long int from;
1458   unsigned long int to;
1459
1460   /* We have to compute the ellipsis values using the symbolic names.  */
1461   assert (last_str != NULL);
1462
1463   if (strlen (last_str) != now->val.str.lenmb)
1464     {
1465     invalid_range:
1466       lr_error (ldfile,
1467                 _("`%s' and `%.*s' are no valid names for symbolic range"),
1468                 last_str, (int) now->val.str.lenmb, nowstr);
1469       return;
1470     }
1471
1472   if (memcmp (last_str, nowstr, now->val.str.lenmb) == 0)
1473     /* Nothing to do, the names are the same.  */
1474     return;
1475
1476   for (cp = last_str; *cp == *(nowstr + (cp - last_str)); ++cp)
1477     ;
1478
1479   errno = 0;
1480   from = strtoul (cp, &endp, base);
1481   if ((from == UINT_MAX && errno == ERANGE) || *endp != '\0')
1482     goto invalid_range;
1483
1484   to = strtoul (nowstr + (cp - last_str), &endp, base);
1485   if ((to == UINT_MAX && errno == ERANGE)
1486       || (endp - nowstr) != now->val.str.lenmb || from >= to)
1487     goto invalid_range;
1488
1489   /* OK, we have a range FROM - TO.  Now we can create the symbolic names.  */
1490   if (!ignore_content)
1491     {
1492       now->val.str.startmb = tmp;
1493       while ((from += step) <= to)
1494         {
1495           struct charseq *seq;
1496           uint32_t wch;
1497
1498           sprintf (tmp, (base == 10 ? "%.*s%0*ld" : "%.*s%0*lX"),
1499                    (int) (cp - last_str), last_str,
1500                    (int) (now->val.str.lenmb - (cp - last_str)),
1501                    from);
1502
1503           get_character (now, charmap, repertoire, &seq, &wch);
1504
1505           if (seq != NULL && seq->nbytes == 1)
1506             /* Yep, we can store information about this byte sequence.  */
1507             ctype->class256_collection[seq->bytes[0]] |= class256_bit;
1508
1509           if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1510             /* We have the UCS4 position.  */
1511             *find_idx (ctype, &ctype->class_collection,
1512                        &ctype->class_collection_max,
1513                        &ctype->class_collection_act, wch) |= class_bit;
1514
1515           if (handle_digits == 1)
1516             {
1517               /* We must store the digit values.  */
1518               if (ctype->mbdigits_act == ctype->mbdigits_max)
1519                 {
1520                   ctype->mbdigits_max *= 2;
1521                   ctype->mbdigits = xrealloc (ctype->mbdigits,
1522                                               (ctype->mbdigits_max
1523                                                * sizeof (char *)));
1524                   ctype->wcdigits_max *= 2;
1525                   ctype->wcdigits = xrealloc (ctype->wcdigits,
1526                                               (ctype->wcdigits_max
1527                                                * sizeof (uint32_t)));
1528                 }
1529
1530               ctype->mbdigits[ctype->mbdigits_act++] = seq;
1531               ctype->wcdigits[ctype->wcdigits_act++] = wch;
1532             }
1533           else if (handle_digits == 2)
1534             {
1535               /* We must store the digit values.  */
1536               if (ctype->outdigits_act >= 10)
1537                 {
1538                   lr_error (ldfile, _("\
1539 %s: field `%s' does not contain exactly ten entries"),
1540                             "LC_CTYPE", "outdigit");
1541                   return;
1542                 }
1543
1544               ctype->mboutdigits[ctype->outdigits_act] = seq;
1545               ctype->wcoutdigits[ctype->outdigits_act] = wch;
1546               ++ctype->outdigits_act;
1547             }
1548         }
1549     }
1550 }
1551
1552
1553 /* Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'.  */
1554 static void
1555 charclass_ucs4_ellipsis (struct linereader *ldfile,
1556                          struct locale_ctype_t *ctype,
1557                          const struct charmap_t *charmap,
1558                          struct repertoire_t *repertoire,
1559                          struct token *now, uint32_t last_wch,
1560                          unsigned long int class256_bit,
1561                          unsigned long int class_bit, int ignore_content,
1562                          int handle_digits, int step)
1563 {
1564   if (last_wch > now->val.ucs4)
1565     {
1566       lr_error (ldfile, _("\
1567 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1568                 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, now->val.ucs4,
1569                 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, last_wch);
1570       return;
1571     }
1572
1573   if (!ignore_content)
1574     while ((last_wch += step) <= now->val.ucs4)
1575       {
1576         /* We have to find out whether there is a byte sequence corresponding
1577            to this UCS4 value.  */
1578         struct charseq *seq;
1579         char utmp[10];
1580
1581         snprintf (utmp, sizeof (utmp), "U%08X", last_wch);
1582         seq = charmap_find_value (charmap, utmp, 9);
1583         if (seq == NULL)
1584           {
1585             snprintf (utmp, sizeof (utmp), "U%04X", last_wch);
1586             seq = charmap_find_value (charmap, utmp, 5);
1587           }
1588
1589         if (seq == NULL)
1590           /* Try looking in the repertoire map.  */
1591           seq = repertoire_find_seq (repertoire, last_wch);
1592
1593         /* If this is the first time we look for this sequence create a new
1594            entry.  */
1595         if (seq == NULL)
1596           {
1597             static const struct charseq negative
1598               = { .ucs4 = ILLEGAL_CHAR_VALUE };
1599
1600             /* Find the symbolic name for this UCS4 value.  */
1601             if (repertoire != NULL)
1602               {
1603                 const char *symbol = repertoire_find_symbol (repertoire,
1604                                                              last_wch);
1605                 uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1606                                                 sizeof (uint32_t));
1607                 *newp = last_wch;
1608
1609                 if (symbol != NULL)
1610                   /* We have a name, now search the multibyte value.  */
1611                   seq = charmap_find_value (charmap, symbol, strlen (symbol));
1612
1613                 if (seq == NULL)
1614                   /* We have to create a fake entry.  */
1615                   seq = (struct charseq *) &negative;
1616                 else
1617                   seq->ucs4 = last_wch;
1618
1619                 insert_entry (&repertoire->seq_table, newp, sizeof (uint32_t),
1620                               seq);
1621               }
1622             else
1623               /* We have to create a fake entry.  */
1624               seq = (struct charseq *) &negative;
1625           }
1626
1627         /* We have a name, now search the multibyte value.  */
1628         if (seq->ucs4 == last_wch && seq->nbytes == 1)
1629           /* Yep, we can store information about this byte sequence.  */
1630           ctype->class256_collection[(size_t) seq->bytes[0]]
1631             |= class256_bit;
1632
1633         /* And of course we have the UCS4 position.  */
1634         if (class_bit != 0)
1635           *find_idx (ctype, &ctype->class_collection,
1636                      &ctype->class_collection_max,
1637                      &ctype->class_collection_act, last_wch) |= class_bit;
1638
1639         if (handle_digits == 1)
1640           {
1641             /* We must store the digit values.  */
1642             if (ctype->mbdigits_act == ctype->mbdigits_max)
1643               {
1644                 ctype->mbdigits_max *= 2;
1645                 ctype->mbdigits = xrealloc (ctype->mbdigits,
1646                                             (ctype->mbdigits_max
1647                                              * sizeof (char *)));
1648                 ctype->wcdigits_max *= 2;
1649                 ctype->wcdigits = xrealloc (ctype->wcdigits,
1650                                             (ctype->wcdigits_max
1651                                              * sizeof (uint32_t)));
1652               }
1653
1654             ctype->mbdigits[ctype->mbdigits_act++] = (seq->ucs4 == last_wch
1655                                                       ? seq : NULL);
1656             ctype->wcdigits[ctype->wcdigits_act++] = last_wch;
1657           }
1658         else if (handle_digits == 2)
1659           {
1660             /* We must store the digit values.  */
1661             if (ctype->outdigits_act >= 10)
1662               {
1663                 lr_error (ldfile, _("\
1664 %s: field `%s' does not contain exactly ten entries"),
1665                           "LC_CTYPE", "outdigit");
1666                 return;
1667               }
1668
1669             ctype->mboutdigits[ctype->outdigits_act] = (seq->ucs4 == last_wch
1670                                                         ? seq : NULL);
1671             ctype->wcoutdigits[ctype->outdigits_act] = last_wch;
1672             ++ctype->outdigits_act;
1673           }
1674       }
1675 }
1676
1677
1678 /* Ellipsis as in `/xea/x12.../xea/x34'.  */
1679 static void
1680 charclass_charcode_ellipsis (struct linereader *ldfile,
1681                              struct locale_ctype_t *ctype,
1682                              const struct charmap_t *charmap,
1683                              struct repertoire_t *repertoire,
1684                              struct token *now, char *last_charcode,
1685                              uint32_t last_charcode_len,
1686                              unsigned long int class256_bit,
1687                              unsigned long int class_bit, int ignore_content,
1688                              int handle_digits)
1689 {
1690   /* First check whether the to-value is larger.  */
1691   if (now->val.charcode.nbytes != last_charcode_len)
1692     {
1693       lr_error (ldfile, _("\
1694 start and end character sequence of range must have the same length"));
1695       return;
1696     }
1697
1698   if (memcmp (last_charcode, now->val.charcode.bytes, last_charcode_len) > 0)
1699     {
1700       lr_error (ldfile, _("\
1701 to-value character sequence is smaller than from-value sequence"));
1702       return;
1703     }
1704
1705   if (!ignore_content)
1706     {
1707       do
1708         {
1709           /* Increment the byte sequence value.  */
1710           struct charseq *seq;
1711           uint32_t wch;
1712           int i;
1713
1714           for (i = last_charcode_len - 1; i >= 0; --i)
1715             if (++last_charcode[i] != 0)
1716               break;
1717
1718           if (last_charcode_len == 1)
1719             /* Of course we have the charcode value.  */
1720             ctype->class256_collection[(size_t) last_charcode[0]]
1721               |= class256_bit;
1722
1723           /* Find the symbolic name.  */
1724           seq = charmap_find_symbol (charmap, last_charcode,
1725                                      last_charcode_len);
1726           if (seq != NULL)
1727             {
1728               if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1729                 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1730                                                    strlen (seq->name));
1731               wch = seq == NULL ? ILLEGAL_CHAR_VALUE : seq->ucs4;
1732
1733               if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1734                 *find_idx (ctype, &ctype->class_collection,
1735                            &ctype->class_collection_max,
1736                            &ctype->class_collection_act, wch) |= class_bit;
1737             }
1738           else
1739             wch = ILLEGAL_CHAR_VALUE;
1740
1741           if (handle_digits == 1)
1742             {
1743               /* We must store the digit values.  */
1744               if (ctype->mbdigits_act == ctype->mbdigits_max)
1745                 {
1746                   ctype->mbdigits_max *= 2;
1747                   ctype->mbdigits = xrealloc (ctype->mbdigits,
1748                                               (ctype->mbdigits_max
1749                                                * sizeof (char *)));
1750                   ctype->wcdigits_max *= 2;
1751                   ctype->wcdigits = xrealloc (ctype->wcdigits,
1752                                               (ctype->wcdigits_max
1753                                                * sizeof (uint32_t)));
1754                 }
1755
1756               seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1757               memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1758               seq->nbytes = last_charcode_len;
1759
1760               ctype->mbdigits[ctype->mbdigits_act++] = seq;
1761               ctype->wcdigits[ctype->wcdigits_act++] = wch;
1762             }
1763           else if (handle_digits == 2)
1764             {
1765               struct charseq *seq;
1766               /* We must store the digit values.  */
1767               if (ctype->outdigits_act >= 10)
1768                 {
1769                   lr_error (ldfile, _("\
1770 %s: field `%s' does not contain exactly ten entries"),
1771                             "LC_CTYPE", "outdigit");
1772                   return;
1773                 }
1774
1775               seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1776               memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1777               seq->nbytes = last_charcode_len;
1778
1779               ctype->mboutdigits[ctype->outdigits_act] = seq;
1780               ctype->wcoutdigits[ctype->outdigits_act] = wch;
1781               ++ctype->outdigits_act;
1782             }
1783         }
1784       while (memcmp (last_charcode, now->val.charcode.bytes,
1785                      last_charcode_len) != 0);
1786     }
1787 }
1788
1789
1790 static uint32_t *
1791 find_translit2 (struct locale_ctype_t *ctype, const struct charmap_t *charmap,
1792                 uint32_t wch)
1793 {
1794   struct translit_t *trunp = ctype->translit;
1795   struct translit_ignore_t *tirunp = ctype->translit_ignore;
1796
1797   while (trunp != NULL)
1798     {
1799       /* XXX We simplify things here.  The transliterations we look
1800          for are only allowed to have one character.  */
1801       if (trunp->from[0] == wch && trunp->from[1] == 0)
1802         {
1803           /* Found it.  Now look for a transliteration which can be
1804              represented with the character set.  */
1805           struct translit_to_t *torunp = trunp->to;
1806
1807           while (torunp != NULL)
1808             {
1809               int i;
1810
1811               for (i = 0; torunp->str[i] != 0; ++i)
1812                 {
1813                   char utmp[10];
1814
1815                   snprintf (utmp, sizeof (utmp), "U%08X", torunp->str[i]);
1816                   if (charmap_find_value (charmap, utmp, 9) == NULL)
1817                     /* This character cannot be represented.  */
1818                     break;
1819                 }
1820
1821               if (torunp->str[i] == 0)
1822                 return torunp->str;
1823
1824               torunp = torunp->next;
1825             }
1826
1827           break;
1828         }
1829
1830       trunp = trunp->next;
1831     }
1832
1833   /* Check for ignored chars.  */
1834   while (tirunp != NULL)
1835     {
1836       if (tirunp->from <= wch && tirunp->to >= wch)
1837         {
1838           uint32_t wi;
1839
1840           for (wi = tirunp->from; wi <= wch; wi += tirunp->step)
1841             if (wi == wch)
1842               return (uint32_t []) { 0 };
1843         }
1844     }
1845
1846   /* Nothing found.  */
1847   return NULL;
1848 }
1849
1850
1851 uint32_t *
1852 find_translit (struct localedef_t *locale, const struct charmap_t *charmap,
1853                uint32_t wch)
1854 {
1855   struct locale_ctype_t *ctype;
1856   uint32_t *result = NULL;
1857
1858   assert (locale != NULL);
1859   ctype = locale->categories[LC_CTYPE].ctype;
1860
1861   if (ctype->translit != NULL)
1862     result = find_translit2 (ctype, charmap, wch);
1863
1864   if (result == NULL)
1865     {
1866       struct translit_include_t *irunp = ctype->translit_include;
1867
1868       while (irunp != NULL && result == NULL)
1869         {
1870           result = find_translit (find_locale (CTYPE_LOCALE,
1871                                                irunp->copy_locale,
1872                                                irunp->copy_repertoire,
1873                                                charmap),
1874                                   charmap, wch);
1875           irunp = irunp->next;
1876         }
1877     }
1878
1879   return result;
1880 }
1881
1882
1883 /* Read one transliteration entry.  */
1884 static uint32_t *
1885 read_widestring (struct linereader *ldfile, struct token *now,
1886                  const struct charmap_t *charmap,
1887                  struct repertoire_t *repertoire)
1888 {
1889   uint32_t *wstr;
1890
1891   if (now->tok == tok_default_missing)
1892     /* The special name "" will denote this case.  */
1893     wstr = ((uint32_t *) { 0 });
1894   else if (now->tok == tok_bsymbol)
1895     {
1896       /* Get the value from the repertoire.  */
1897       wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1898       wstr[0] = repertoire_find_value (repertoire, now->val.str.startmb,
1899                                        now->val.str.lenmb);
1900       if (wstr[0] == ILLEGAL_CHAR_VALUE)
1901         {
1902           /* We cannot proceed, we don't know the UCS4 value.  */
1903           free (wstr);
1904           return NULL;
1905         }
1906
1907       wstr[1] = 0;
1908     }
1909   else if (now->tok == tok_ucs4)
1910     {
1911       wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1912       wstr[0] = now->val.ucs4;
1913       wstr[1] = 0;
1914     }
1915   else if (now->tok == tok_charcode)
1916     {
1917       /* Argh, we have to convert to the symbol name first and then to the
1918          UCS4 value.  */
1919       struct charseq *seq = charmap_find_symbol (charmap,
1920                                                  now->val.str.startmb,
1921                                                  now->val.str.lenmb);
1922       if (seq == NULL)
1923         /* Cannot find the UCS4 value.  */
1924         return NULL;
1925
1926       if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1927         seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1928                                            strlen (seq->name));
1929       if (seq->ucs4 == ILLEGAL_CHAR_VALUE)
1930         /* We cannot proceed, we don't know the UCS4 value.  */
1931         return NULL;
1932
1933       wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1934       wstr[0] = seq->ucs4;
1935       wstr[1] = 0;
1936     }
1937   else if (now->tok == tok_string)
1938     {
1939       wstr = now->val.str.startwc;
1940       if (wstr == NULL || wstr[0] == 0)
1941         return NULL;
1942     }
1943   else
1944     {
1945       if (now->tok != tok_eol && now->tok != tok_eof)
1946         lr_ignore_rest (ldfile, 0);
1947       SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
1948       return (uint32_t *) -1l;
1949     }
1950
1951   return wstr;
1952 }
1953
1954
1955 static void
1956 read_translit_entry (struct linereader *ldfile, struct locale_ctype_t *ctype,
1957                      struct token *now, const struct charmap_t *charmap,
1958                      struct repertoire_t *repertoire)
1959 {
1960   uint32_t *from_wstr = read_widestring (ldfile, now, charmap, repertoire);
1961   struct translit_t *result;
1962   struct translit_to_t **top;
1963   struct obstack *ob = &ctype->mempool;
1964   int first;
1965   int ignore;
1966
1967   if (from_wstr == NULL)
1968     /* There is no valid from string.  */
1969     return;
1970
1971   result = (struct translit_t *) obstack_alloc (ob,
1972                                                 sizeof (struct translit_t));
1973   result->from = from_wstr;
1974   result->fname = ldfile->fname;
1975   result->lineno = ldfile->lineno;
1976   result->next = NULL;
1977   result->to = NULL;
1978   top = &result->to;
1979   first = 1;
1980   ignore = 0;
1981
1982   while (1)
1983     {
1984       uint32_t *to_wstr;
1985
1986       /* Next we have one or more transliterations.  They are
1987          separated by semicolons.  */
1988       now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
1989
1990       if (!first && (now->tok == tok_semicolon || now->tok == tok_eol))
1991         {
1992           /* One string read.  */
1993           const uint32_t zero = 0;
1994
1995           if (!ignore)
1996             {
1997               obstack_grow (ob, &zero, 4);
1998               to_wstr = obstack_finish (ob);
1999
2000               *top = obstack_alloc (ob, sizeof (struct translit_to_t));
2001               (*top)->str = to_wstr;
2002               (*top)->next = NULL;
2003             }
2004
2005           if (now->tok == tok_eol)
2006             {
2007               result->next = ctype->translit;
2008               ctype->translit = result;
2009               return;
2010             }
2011
2012           if (!ignore)
2013             top = &(*top)->next;
2014           ignore = 0;
2015         }
2016       else
2017         {
2018           to_wstr = read_widestring (ldfile, now, charmap, repertoire);
2019           if (to_wstr == (uint32_t *) -1l)
2020             {
2021               /* An error occurred.  */
2022               obstack_free (ob, result);
2023               return;
2024             }
2025
2026           if (to_wstr == NULL)
2027             ignore = 1;
2028           else
2029             /* This value is usable.  */
2030             obstack_grow (ob, to_wstr, wcslen ((wchar_t *) to_wstr) * 4);
2031
2032           first = 0;
2033         }
2034     }
2035 }
2036
2037
2038 static void
2039 read_translit_ignore_entry (struct linereader *ldfile,
2040                             struct locale_ctype_t *ctype,
2041                             const struct charmap_t *charmap,
2042                             struct repertoire_t *repertoire)
2043 {
2044   /* We expect a semicolon-separated list of characters we ignore.  We are
2045      only interested in the wide character definitions.  These must be
2046      single characters, possibly defining a range when an ellipsis is used.  */
2047   while (1)
2048     {
2049       struct token *now = lr_token (ldfile, charmap, NULL, repertoire,
2050                                     verbose);
2051       struct translit_ignore_t *newp;
2052       uint32_t from;
2053
2054       if (now->tok == tok_eol || now->tok == tok_eof)
2055         {
2056           lr_error (ldfile,
2057                     _("premature end of `translit_ignore' definition"));
2058           return;
2059         }
2060
2061       if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
2062         {
2063           lr_error (ldfile, _("syntax error"));
2064           lr_ignore_rest (ldfile, 0);
2065           return;
2066         }
2067
2068       if (now->tok == tok_ucs4)
2069         from = now->val.ucs4;
2070       else
2071         /* Try to get the value.  */
2072         from = repertoire_find_value (repertoire, now->val.str.startmb,
2073                                       now->val.str.lenmb);
2074
2075       if (from == ILLEGAL_CHAR_VALUE)
2076         {
2077           lr_error (ldfile, "invalid character name");
2078           newp = NULL;
2079         }
2080       else
2081         {
2082           newp = (struct translit_ignore_t *)
2083             obstack_alloc (&ctype->mempool, sizeof (struct translit_ignore_t));
2084           newp->from = from;
2085           newp->to = from;
2086           newp->step = 1;
2087
2088           newp->next = ctype->translit_ignore;
2089           ctype->translit_ignore = newp;
2090         }
2091
2092       /* Now we expect either a semicolon, an ellipsis, or the end of the
2093          line.  */
2094       now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2095
2096       if (now->tok == tok_ellipsis2 || now->tok == tok_ellipsis2_2)
2097         {
2098           /* XXX Should we bother implementing `....'?  `...' certainly
2099              will not be implemented.  */
2100           uint32_t to;
2101           int step = now->tok == tok_ellipsis2_2 ? 2 : 1;
2102
2103           now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2104
2105           if (now->tok == tok_eol || now->tok == tok_eof)
2106             {
2107               lr_error (ldfile,
2108                         _("premature end of `translit_ignore' definition"));
2109               return;
2110             }
2111
2112           if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
2113             {
2114               lr_error (ldfile, _("syntax error"));
2115               lr_ignore_rest (ldfile, 0);
2116               return;
2117             }
2118
2119           if (now->tok == tok_ucs4)
2120             to = now->val.ucs4;
2121           else
2122             /* Try to get the value.  */
2123             to = repertoire_find_value (repertoire, now->val.str.startmb,
2124                                         now->val.str.lenmb);
2125
2126           if (to == ILLEGAL_CHAR_VALUE)
2127             lr_error (ldfile, "invalid character name");
2128           else
2129             {
2130               /* Make sure the `to'-value is larger.  */
2131               if (to >= from)
2132                 {
2133                   newp->to = to;
2134                   newp->step = step;
2135                 }
2136               else
2137                 lr_error (ldfile, _("\
2138 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
2139                           (to | from) < 65536 ? 4 : 8, to,
2140                           (to | from) < 65536 ? 4 : 8, from);
2141             }
2142
2143           /* And the next token.  */
2144           now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2145         }
2146
2147       if (now->tok == tok_eol || now->tok == tok_eof)
2148         /* We are done.  */
2149         return;
2150
2151       if (now->tok == tok_semicolon)
2152         /* Next round.  */
2153         continue;
2154
2155       /* If we come here something is wrong.  */
2156       lr_error (ldfile, _("syntax error"));
2157       lr_ignore_rest (ldfile, 0);
2158       return;
2159     }
2160 }
2161
2162
2163 /* The parser for the LC_CTYPE section of the locale definition.  */
2164 void
2165 ctype_read (struct linereader *ldfile, struct localedef_t *result,
2166             const struct charmap_t *charmap, const char *repertoire_name,
2167             int ignore_content)
2168 {
2169   struct repertoire_t *repertoire = NULL;
2170   struct locale_ctype_t *ctype;
2171   struct token *now;
2172   enum token_t nowtok;
2173   size_t cnt;
2174   struct charseq *last_seq;
2175   uint32_t last_wch = 0;
2176   enum token_t last_token;
2177   enum token_t ellipsis_token;
2178   int step;
2179   char last_charcode[16];
2180   size_t last_charcode_len = 0;
2181   const char *last_str = NULL;
2182   int mapidx;
2183   struct localedef_t *copy_locale = NULL;
2184
2185   /* Get the repertoire we have to use.  */
2186   if (repertoire_name != NULL)
2187     repertoire = repertoire_read (repertoire_name);
2188
2189   /* The rest of the line containing `LC_CTYPE' must be free.  */
2190   lr_ignore_rest (ldfile, 1);
2191
2192
2193   do
2194     {
2195       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2196       nowtok = now->tok;
2197     }
2198   while (nowtok == tok_eol);
2199
2200   /* If we see `copy' now we are almost done.  */
2201   if (nowtok == tok_copy)
2202     {
2203       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2204       if (now->tok != tok_string)
2205         {
2206           SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2207
2208         skip_category:
2209           do
2210             now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2211           while (now->tok != tok_eof && now->tok != tok_end);
2212
2213           if (now->tok != tok_eof
2214               || (now = lr_token (ldfile, charmap, NULL, NULL, verbose),
2215                   now->tok == tok_eof))
2216             lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
2217           else if (now->tok != tok_lc_ctype)
2218             {
2219               lr_error (ldfile, _("\
2220 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2221               lr_ignore_rest (ldfile, 0);
2222             }
2223           else
2224             lr_ignore_rest (ldfile, 1);
2225
2226           return;
2227         }
2228
2229       if (! ignore_content)
2230         {
2231           /* Get the locale definition.  */
2232           copy_locale = load_locale (LC_CTYPE, now->val.str.startmb,
2233                                      repertoire_name, charmap, NULL);
2234           if ((copy_locale->avail & CTYPE_LOCALE) == 0)
2235             {
2236               /* Not yet loaded.  So do it now.  */
2237               if (locfile_read (copy_locale, charmap) != 0)
2238                 goto skip_category;
2239             }
2240         }
2241
2242       lr_ignore_rest (ldfile, 1);
2243
2244       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2245       nowtok = now->tok;
2246     }
2247
2248   /* Prepare the data structures.  */
2249   ctype_startup (ldfile, result, charmap, copy_locale, ignore_content);
2250   ctype = result->categories[LC_CTYPE].ctype;
2251
2252   /* Remember the repertoire we use.  */
2253   if (!ignore_content)
2254     ctype->repertoire = repertoire;
2255
2256   while (1)
2257     {
2258       unsigned long int class_bit = 0;
2259       unsigned long int class256_bit = 0;
2260       int handle_digits = 0;
2261
2262       /* Of course we don't proceed beyond the end of file.  */
2263       if (nowtok == tok_eof)
2264         break;
2265
2266       /* Ingore empty lines.  */
2267       if (nowtok == tok_eol)
2268         {
2269           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2270           nowtok = now->tok;
2271           continue;
2272         }
2273
2274       switch (nowtok)
2275         {
2276         case tok_charclass:
2277           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2278           while (now->tok == tok_ident || now->tok == tok_string)
2279             {
2280               ctype_class_new (ldfile, ctype, now->val.str.startmb);
2281               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2282               if (now->tok != tok_semicolon)
2283                 break;
2284               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2285             }
2286           if (now->tok != tok_eol)
2287             SYNTAX_ERROR (_("\
2288 %s: syntax error in definition of new character class"), "LC_CTYPE");
2289           break;
2290
2291         case tok_charconv:
2292           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2293           while (now->tok == tok_ident || now->tok == tok_string)
2294             {
2295               ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2296               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2297               if (now->tok != tok_semicolon)
2298                 break;
2299               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2300             }
2301           if (now->tok != tok_eol)
2302             SYNTAX_ERROR (_("\
2303 %s: syntax error in definition of new character map"), "LC_CTYPE");
2304           break;
2305
2306         case tok_class:
2307           /* Ignore the rest of the line if we don't need the input of
2308              this line.  */
2309           if (ignore_content)
2310             {
2311               lr_ignore_rest (ldfile, 0);
2312               break;
2313             }
2314
2315           /* We simply forget the `class' keyword and use the following
2316              operand to determine the bit.  */
2317           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2318           if (now->tok == tok_ident || now->tok == tok_string)
2319             {
2320               /* Must can be one of the predefined class names.  */
2321               for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2322                 if (strcmp (ctype->classnames[cnt], now->val.str.startmb) == 0)
2323                   break;
2324               if (cnt >= ctype->nr_charclass)
2325                 {
2326 #ifdef PREDEFINED_CLASSES
2327                   if (now->val.str.lenmb == 8
2328                       && memcmp ("special1", now->val.str.startmb, 8) == 0)
2329                     class_bit = _ISwspecial1;
2330                   else if (now->val.str.lenmb == 8
2331                       && memcmp ("special2", now->val.str.startmb, 8) == 0)
2332                     class_bit = _ISwspecial2;
2333                   else if (now->val.str.lenmb == 8
2334                       && memcmp ("special3", now->val.str.startmb, 8) == 0)
2335                     class_bit = _ISwspecial3;
2336                   else
2337 #endif
2338                     {
2339                       /* OK, it's a new class.  */
2340                       ctype_class_new (ldfile, ctype, now->val.str.startmb);
2341
2342                       class_bit = _ISwbit (ctype->nr_charclass - 1);
2343                     }
2344                 }
2345               else
2346                 {
2347                   class_bit = _ISwbit (cnt);
2348
2349                   free (now->val.str.startmb);
2350                 }
2351             }
2352           else if (now->tok == tok_digit)
2353             goto handle_tok_digit;
2354           else if (now->tok < tok_upper || now->tok > tok_blank)
2355             goto err_label;
2356           else
2357             {
2358               class_bit = BITw (now->tok);
2359               class256_bit = BIT (now->tok);
2360             }
2361
2362           /* The next character must be a semicolon.  */
2363           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2364           if (now->tok != tok_semicolon)
2365             goto err_label;
2366           goto read_charclass;
2367
2368         case tok_upper:
2369         case tok_lower:
2370         case tok_alpha:
2371         case tok_alnum:
2372         case tok_space:
2373         case tok_cntrl:
2374         case tok_punct:
2375         case tok_graph:
2376         case tok_print:
2377         case tok_xdigit:
2378         case tok_blank:
2379           /* Ignore the rest of the line if we don't need the input of
2380              this line.  */
2381           if (ignore_content)
2382             {
2383               lr_ignore_rest (ldfile, 0);
2384               break;
2385             }
2386
2387           class_bit = BITw (now->tok);
2388           class256_bit = BIT (now->tok);
2389           handle_digits = 0;
2390         read_charclass:
2391           ctype->class_done |= class_bit;
2392           last_token = tok_none;
2393           ellipsis_token = tok_none;
2394           step = 1;
2395           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2396           while (now->tok != tok_eol && now->tok != tok_eof)
2397             {
2398               uint32_t wch;
2399               struct charseq *seq;
2400
2401               if (ellipsis_token == tok_none)
2402                 {
2403                   if (get_character (now, charmap, repertoire, &seq, &wch))
2404                     goto err_label;
2405
2406                   if (!ignore_content && seq != NULL && seq->nbytes == 1)
2407                     /* Yep, we can store information about this byte
2408                        sequence.  */
2409                     ctype->class256_collection[seq->bytes[0]] |= class256_bit;
2410
2411                   if (!ignore_content && wch != ILLEGAL_CHAR_VALUE
2412                       && class_bit != 0)
2413                     /* We have the UCS4 position.  */
2414                     *find_idx (ctype, &ctype->class_collection,
2415                                &ctype->class_collection_max,
2416                                &ctype->class_collection_act, wch) |= class_bit;
2417
2418                   last_token = now->tok;
2419                   /* Terminate the string.  */
2420                   if (last_token == tok_bsymbol)
2421                     {
2422                       now->val.str.startmb[now->val.str.lenmb] = '\0';
2423                       last_str = now->val.str.startmb;
2424                     }
2425                   else
2426                     last_str = NULL;
2427                   last_seq = seq;
2428                   last_wch = wch;
2429                   memcpy (last_charcode, now->val.charcode.bytes, 16);
2430                   last_charcode_len = now->val.charcode.nbytes;
2431
2432                   if (!ignore_content && handle_digits == 1)
2433                     {
2434                       /* We must store the digit values.  */
2435                       if (ctype->mbdigits_act == ctype->mbdigits_max)
2436                         {
2437                           ctype->mbdigits_max += 10;
2438                           ctype->mbdigits = xrealloc (ctype->mbdigits,
2439                                                       (ctype->mbdigits_max
2440                                                        * sizeof (char *)));
2441                           ctype->wcdigits_max += 10;
2442                           ctype->wcdigits = xrealloc (ctype->wcdigits,
2443                                                       (ctype->wcdigits_max
2444                                                        * sizeof (uint32_t)));
2445                         }
2446
2447                       ctype->mbdigits[ctype->mbdigits_act++] = seq;
2448                       ctype->wcdigits[ctype->wcdigits_act++] = wch;
2449                     }
2450                   else if (!ignore_content && handle_digits == 2)
2451                     {
2452                       /* We must store the digit values.  */
2453                       if (ctype->outdigits_act >= 10)
2454                         {
2455                           lr_error (ldfile, _("\
2456 %s: field `%s' does not contain exactly ten entries"),
2457                             "LC_CTYPE", "outdigit");
2458                           lr_ignore_rest (ldfile, 0);
2459                           break;
2460                         }
2461
2462                       ctype->mboutdigits[ctype->outdigits_act] = seq;
2463                       ctype->wcoutdigits[ctype->outdigits_act] = wch;
2464                       ++ctype->outdigits_act;
2465                     }
2466                 }
2467               else
2468                 {
2469                   /* Now it gets complicated.  We have to resolve the
2470                      ellipsis problem.  First we must distinguish between
2471                      the different kind of ellipsis and this must match the
2472                      tokens we have seen.  */
2473                   assert (last_token != tok_none);
2474
2475                   if (last_token != now->tok)
2476                     {
2477                       lr_error (ldfile, _("\
2478 ellipsis range must be marked by two operands of same type"));
2479                       lr_ignore_rest (ldfile, 0);
2480                       break;
2481                     }
2482
2483                   if (last_token == tok_bsymbol)
2484                     {
2485                       if (ellipsis_token == tok_ellipsis3)
2486                         lr_error (ldfile, _("with symbolic name range values \
2487 the absolute ellipsis `...' must not be used"));
2488
2489                       charclass_symbolic_ellipsis (ldfile, ctype, charmap,
2490                                                    repertoire, now, last_str,
2491                                                    class256_bit, class_bit,
2492                                                    (ellipsis_token
2493                                                     == tok_ellipsis4
2494                                                     ? 10 : 16),
2495                                                    ignore_content,
2496                                                    handle_digits, step);
2497                     }
2498                   else if (last_token == tok_ucs4)
2499                     {
2500                       if (ellipsis_token != tok_ellipsis2)
2501                         lr_error (ldfile, _("\
2502 with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
2503
2504                       charclass_ucs4_ellipsis (ldfile, ctype, charmap,
2505                                                repertoire, now, last_wch,
2506                                                class256_bit, class_bit,
2507                                                ignore_content, handle_digits,
2508                                                step);
2509                     }
2510                   else
2511                     {
2512                       assert (last_token == tok_charcode);
2513
2514                       if (ellipsis_token != tok_ellipsis3)
2515                         lr_error (ldfile, _("\
2516 with character code range values one must use the absolute ellipsis `...'"));
2517
2518                       charclass_charcode_ellipsis (ldfile, ctype, charmap,
2519                                                    repertoire, now,
2520                                                    last_charcode,
2521                                                    last_charcode_len,
2522                                                    class256_bit, class_bit,
2523                                                    ignore_content,
2524                                                    handle_digits);
2525                     }
2526
2527                   /* Now we have used the last value.  */
2528                   last_token = tok_none;
2529                 }
2530
2531               /* Next we expect a semicolon or the end of the line.  */
2532               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2533               if (now->tok == tok_eol || now->tok == tok_eof)
2534                 break;
2535
2536               if (last_token != tok_none
2537                   && now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4_2)
2538                 {
2539                   if (now->tok == tok_ellipsis2_2)
2540                     {
2541                       now->tok = tok_ellipsis2;
2542                       step = 2;
2543                     }
2544                   else if (now->tok == tok_ellipsis4_2)
2545                     {
2546                       now->tok = tok_ellipsis4;
2547                       step = 2;
2548                     }
2549
2550                   ellipsis_token = now->tok;
2551
2552                   now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2553                   continue;
2554                 }
2555
2556               if (now->tok != tok_semicolon)
2557                 goto err_label;
2558
2559               /* And get the next character.  */
2560               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2561
2562               ellipsis_token = tok_none;
2563               step = 1;
2564             }
2565           break;
2566
2567         case tok_digit:
2568           /* Ignore the rest of the line if we don't need the input of
2569              this line.  */
2570           if (ignore_content)
2571             {
2572               lr_ignore_rest (ldfile, 0);
2573               break;
2574             }
2575
2576         handle_tok_digit:
2577           class_bit = _ISwdigit;
2578           class256_bit = _ISdigit;
2579           handle_digits = 1;
2580           goto read_charclass;
2581
2582         case tok_outdigit:
2583           /* Ignore the rest of the line if we don't need the input of
2584              this line.  */
2585           if (ignore_content)
2586             {
2587               lr_ignore_rest (ldfile, 0);
2588               break;
2589             }
2590
2591           if (ctype->outdigits_act != 0)
2592             lr_error (ldfile, _("\
2593 %s: field `%s' declared more than once"),
2594                       "LC_CTYPE", "outdigit");
2595           class_bit = 0;
2596           class256_bit = 0;
2597           handle_digits = 2;
2598           goto read_charclass;
2599
2600         case tok_toupper:
2601           /* Ignore the rest of the line if we don't need the input of
2602              this line.  */
2603           if (ignore_content)
2604             {
2605               lr_ignore_rest (ldfile, 0);
2606               break;
2607             }
2608
2609           mapidx = 0;
2610           goto read_mapping;
2611
2612         case tok_tolower:
2613           /* Ignore the rest of the line if we don't need the input of
2614              this line.  */
2615           if (ignore_content)
2616             {
2617               lr_ignore_rest (ldfile, 0);
2618               break;
2619             }
2620
2621           mapidx = 1;
2622           goto read_mapping;
2623
2624         case tok_map:
2625           /* Ignore the rest of the line if we don't need the input of
2626              this line.  */
2627           if (ignore_content)
2628             {
2629               lr_ignore_rest (ldfile, 0);
2630               break;
2631             }
2632
2633           /* We simply forget the `map' keyword and use the following
2634              operand to determine the mapping.  */
2635           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2636           if (now->tok == tok_ident || now->tok == tok_string)
2637             {
2638               size_t cnt;
2639
2640               for (cnt = 2; cnt < ctype->map_collection_nr; ++cnt)
2641                 if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2642                   break;
2643
2644               if (cnt < ctype->map_collection_nr)
2645                 free (now->val.str.startmb);
2646               else
2647                 /* OK, it's a new map.  */
2648                 ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2649
2650               mapidx = cnt;
2651             }
2652           else if (now->tok < tok_toupper || now->tok > tok_tolower)
2653             goto err_label;
2654           else
2655             mapidx = now->tok - tok_toupper;
2656
2657           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2658           /* This better should be a semicolon.  */
2659           if (now->tok != tok_semicolon)
2660             goto err_label;
2661
2662         read_mapping:
2663           /* Test whether this mapping was already defined.  */
2664           if (ctype->tomap_done[mapidx])
2665             {
2666               lr_error (ldfile, _("duplicated definition for mapping `%s'"),
2667                         ctype->mapnames[mapidx]);
2668               lr_ignore_rest (ldfile, 0);
2669               break;
2670             }
2671           ctype->tomap_done[mapidx] = 1;
2672
2673           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2674           while (now->tok != tok_eol && now->tok != tok_eof)
2675             {
2676               struct charseq *from_seq;
2677               uint32_t from_wch;
2678               struct charseq *to_seq;
2679               uint32_t to_wch;
2680
2681               /* Every pair starts with an opening brace.  */
2682               if (now->tok != tok_open_brace)
2683                 goto err_label;
2684
2685               /* Next comes the from-value.  */
2686               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2687               if (get_character (now, charmap, repertoire, &from_seq,
2688                                  &from_wch) != 0)
2689                 goto err_label;
2690
2691               /* The next is a comma.  */
2692               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2693               if (now->tok != tok_comma)
2694                 goto err_label;
2695
2696               /* And the other value.  */
2697               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2698               if (get_character (now, charmap, repertoire, &to_seq,
2699                                  &to_wch) != 0)
2700                 goto err_label;
2701
2702               /* And the last thing is the closing brace.  */
2703               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2704               if (now->tok != tok_close_brace)
2705                 goto err_label;
2706
2707               if (!ignore_content)
2708                 {
2709                   if (mapidx < 2 && from_seq != NULL && to_seq != NULL
2710                       && from_seq->nbytes == 1 && to_seq->nbytes == 1)
2711                     /* We can use this value.  */
2712                     ctype->map256_collection[mapidx][from_seq->bytes[0]]
2713                       = to_seq->bytes[0];
2714
2715                   if (from_wch != ILLEGAL_CHAR_VALUE
2716                       && to_wch != ILLEGAL_CHAR_VALUE)
2717                     /* Both correct values.  */
2718                     *find_idx (ctype, &ctype->map_collection[mapidx],
2719                                &ctype->map_collection_max[mapidx],
2720                                &ctype->map_collection_act[mapidx],
2721                                from_wch) = to_wch;
2722                 }
2723
2724               /* Now comes a semicolon or the end of the line/file.  */
2725               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2726               if (now->tok == tok_semicolon)
2727                 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2728             }
2729           break;
2730
2731         case tok_translit_start:
2732           /* Ignore the entire translit section with its peculiar syntax
2733              if we don't need the input.  */
2734           if (ignore_content)
2735             {
2736               do
2737                 {
2738                   lr_ignore_rest (ldfile, 0);
2739                   now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2740                 }
2741               while (now->tok != tok_translit_end && now->tok != tok_eof);
2742
2743               if (now->tok == tok_eof)
2744                 lr_error (ldfile, _(\
2745 "%s: `translit_start' section does not end with `translit_end'"),
2746                           "LC_CTYPE");
2747
2748               break;
2749             }
2750
2751           /* The rest of the line better should be empty.  */
2752           lr_ignore_rest (ldfile, 1);
2753
2754           /* We count here the number of allocated entries in the `translit'
2755              array.  */
2756           cnt = 0;
2757
2758           ldfile->translate_strings = 1;
2759           ldfile->return_widestr = 1;
2760
2761           /* We proceed until we see the `translit_end' token.  */
2762           while (now = lr_token (ldfile, charmap, NULL, repertoire, verbose),
2763                  now->tok != tok_translit_end && now->tok != tok_eof)
2764             {
2765               if (now->tok == tok_eol)
2766                 /* Ignore empty lines.  */
2767                 continue;
2768
2769               if (now->tok == tok_include)
2770                 {
2771                   /* We have to include locale.  */
2772                   const char *locale_name;
2773                   const char *repertoire_name;
2774                   struct translit_include_t *include_stmt, **include_ptr;
2775
2776                   now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2777                   /* This should be a string or an identifier.  In any
2778                      case something to name a locale.  */
2779                   if (now->tok != tok_string && now->tok != tok_ident)
2780                     {
2781                     translit_syntax:
2782                       lr_error (ldfile, _("%s: syntax error"), "LC_CTYPE");
2783                       lr_ignore_rest (ldfile, 0);
2784                       continue;
2785                     }
2786                   locale_name = now->val.str.startmb;
2787
2788                   /* Next should be a semicolon.  */
2789                   now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2790                   if (now->tok != tok_semicolon)
2791                     goto translit_syntax;
2792
2793                   /* Now the repertoire name.  */
2794                   now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2795                   if ((now->tok != tok_string && now->tok != tok_ident)
2796                       || now->val.str.startmb == NULL)
2797                     goto translit_syntax;
2798                   repertoire_name = now->val.str.startmb;
2799                   if (repertoire_name[0] == '\0')
2800                     /* Ignore the empty string.  */
2801                     repertoire_name = NULL;
2802
2803                   /* Save the include statement for later processing.  */
2804                   include_stmt = (struct translit_include_t *)
2805                     xmalloc (sizeof (struct translit_include_t));
2806                   include_stmt->copy_locale = locale_name;
2807                   include_stmt->copy_repertoire = repertoire_name;
2808                   include_stmt->next = NULL;
2809
2810                   include_ptr = &ctype->translit_include;
2811                   while (*include_ptr != NULL)
2812                     include_ptr = &(*include_ptr)->next;
2813                   *include_ptr = include_stmt;
2814
2815                   /* The rest of the line must be empty.  */
2816                   lr_ignore_rest (ldfile, 1);
2817
2818                   /* Make sure the locale is read.  */
2819                   add_to_readlist (LC_CTYPE, locale_name, repertoire_name,
2820                                    1, NULL);
2821                   continue;
2822                 }
2823               else if (now->tok == tok_default_missing)
2824                 {
2825                   uint32_t *wstr;
2826
2827                   while (1)
2828                     {
2829                       /* We expect a single character or string as the
2830                          argument.  */
2831                       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2832                       wstr = read_widestring (ldfile, now, charmap,
2833                                               repertoire);
2834
2835                       if (wstr != NULL)
2836                         {
2837                           if (ctype->default_missing != NULL)
2838                             {
2839                               lr_error (ldfile, _("\
2840 %s: duplicate `default_missing' definition"), "LC_CTYPE");
2841                               WITH_CUR_LOCALE (error_at_line (0, 0,
2842                                                               ctype->default_missing_file,
2843                                                               ctype->default_missing_lineno,
2844                                                               _("\
2845 previous definition was here")));
2846                             }
2847                           else
2848                             {
2849                               ctype->default_missing = wstr;
2850                               ctype->default_missing_file = ldfile->fname;
2851                               ctype->default_missing_lineno = ldfile->lineno;
2852                             }
2853                           /* We can have more entries, ignore them.  */
2854                           lr_ignore_rest (ldfile, 0);
2855                           break;
2856                         }
2857                       else if (wstr == (uint32_t *) -1l)
2858                         /* This was an syntax error.  */
2859                         break;
2860
2861                       /* Maybe there is another replacement we can use.  */
2862                       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2863                       if (now->tok == tok_eol || now->tok == tok_eof)
2864                         {
2865                           /* Nothing found.  We tell the user.  */
2866                           lr_error (ldfile, _("\
2867 %s: no representable `default_missing' definition found"), "LC_CTYPE");
2868                           break;
2869                         }
2870                       if (now->tok != tok_semicolon)
2871                         goto translit_syntax;
2872                     }
2873
2874                   continue;
2875                 }
2876               else if (now->tok == tok_translit_ignore)
2877                 {
2878                   read_translit_ignore_entry (ldfile, ctype, charmap,
2879                                               repertoire);
2880                   continue;
2881                 }
2882
2883               read_translit_entry (ldfile, ctype, now, charmap, repertoire);
2884             }
2885           ldfile->return_widestr = 0;
2886
2887           if (now->tok == tok_eof)
2888             lr_error (ldfile, _(\
2889 "%s: `translit_start' section does not end with `translit_end'"),
2890                       "LC_CTYPE");
2891
2892           break;
2893
2894         case tok_ident:
2895           /* Ignore the rest of the line if we don't need the input of
2896              this line.  */
2897           if (ignore_content)
2898             {
2899               lr_ignore_rest (ldfile, 0);
2900               break;
2901             }
2902
2903           /* This could mean one of several things.  First test whether
2904              it's a character class name.  */
2905           for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2906             if (strcmp (now->val.str.startmb, ctype->classnames[cnt]) == 0)
2907               break;
2908           if (cnt < ctype->nr_charclass)
2909             {
2910               class_bit = _ISwbit (cnt);
2911               class256_bit = cnt <= 11 ? _ISbit (cnt) : 0;
2912               free (now->val.str.startmb);
2913               goto read_charclass;
2914             }
2915           for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
2916             if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2917               break;
2918           if (cnt < ctype->map_collection_nr)
2919             {
2920               mapidx = cnt;
2921               free (now->val.str.startmb);
2922               goto read_mapping;
2923             }
2924 #ifdef PREDEFINED_CLASSES
2925           if (strcmp (now->val.str.startmb, "special1") == 0)
2926             {
2927               class_bit = _ISwspecial1;
2928               free (now->val.str.startmb);
2929               goto read_charclass;
2930             }
2931           if (strcmp (now->val.str.startmb, "special2") == 0)
2932             {
2933               class_bit = _ISwspecial2;
2934               free (now->val.str.startmb);
2935               goto read_charclass;
2936             }
2937           if (strcmp (now->val.str.startmb, "special3") == 0)
2938             {
2939               class_bit = _ISwspecial3;
2940               free (now->val.str.startmb);
2941               goto read_charclass;
2942             }
2943           if (strcmp (now->val.str.startmb, "tosymmetric") == 0)
2944             {
2945               mapidx = 2;
2946               goto read_mapping;
2947             }
2948 #endif
2949           break;
2950
2951         case tok_end:
2952           /* Next we assume `LC_CTYPE'.  */
2953           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2954           if (now->tok == tok_eof)
2955             break;
2956           if (now->tok == tok_eol)
2957             lr_error (ldfile, _("%s: incomplete `END' line"),
2958                       "LC_CTYPE");
2959           else if (now->tok != tok_lc_ctype)
2960             lr_error (ldfile, _("\
2961 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2962           lr_ignore_rest (ldfile, now->tok == tok_lc_ctype);
2963           return;
2964
2965         default:
2966         err_label:
2967           if (now->tok != tok_eof)
2968             SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2969         }
2970
2971       /* Prepare for the next round.  */
2972       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2973       nowtok = now->tok;
2974     }
2975
2976   /* When we come here we reached the end of the file.  */
2977   lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
2978 }
2979
2980
2981 static void
2982 set_class_defaults (struct locale_ctype_t *ctype,
2983                     const struct charmap_t *charmap,
2984                     struct repertoire_t *repertoire)
2985 {
2986   size_t cnt;
2987
2988   /* These function defines the default values for the classes and conversions
2989      according to POSIX.2 2.5.2.1.
2990      It may seem that the order of these if-blocks is arbitrary but it is NOT.
2991      Don't move them unless you know what you do!  */
2992
2993   auto void set_default (int bitpos, int from, int to);
2994
2995   void set_default (int bitpos, int from, int to)
2996     {
2997       char tmp[2];
2998       int ch;
2999       int bit = _ISbit (bitpos);
3000       int bitw = _ISwbit (bitpos);
3001       /* Define string.  */
3002       strcpy (tmp, "?");
3003
3004       for (ch = from; ch <= to; ++ch)
3005         {
3006           struct charseq *seq;
3007           tmp[0] = ch;
3008
3009           seq = charmap_find_value (charmap, tmp, 1);
3010           if (seq == NULL)
3011             {
3012               char buf[10];
3013               sprintf (buf, "U%08X", ch);
3014               seq = charmap_find_value (charmap, buf, 9);
3015             }
3016           if (seq == NULL)
3017             {
3018               if (!be_quiet)
3019                 WITH_CUR_LOCALE (error (0, 0, _("\
3020 %s: character `%s' not defined in charmap while needed as default value"),
3021                                         "LC_CTYPE", tmp));
3022             }
3023           else if (seq->nbytes != 1)
3024             WITH_CUR_LOCALE (error (0, 0, _("\
3025 %s: character `%s' in charmap not representable with one byte"),
3026                                     "LC_CTYPE", tmp));
3027           else
3028             ctype->class256_collection[seq->bytes[0]] |= bit;
3029
3030           /* No need to search here, the ASCII value is also the Unicode
3031              value.  */
3032           ELEM (ctype, class_collection, , ch) |= bitw;
3033         }
3034     }
3035
3036   /* Set default values if keyword was not present.  */
3037   if ((ctype->class_done & BITw (tok_upper)) == 0)
3038     /* "If this keyword [lower] is not specified, the lowercase letters
3039         `A' through `Z', ..., shall automatically belong to this class,
3040         with implementation defined character values."  [P1003.2, 2.5.2.1]  */
3041     set_default (BITPOS (tok_upper), 'A', 'Z');
3042
3043   if ((ctype->class_done & BITw (tok_lower)) == 0)
3044     /* "If this keyword [lower] is not specified, the lowercase letters
3045         `a' through `z', ..., shall automatically belong to this class,
3046         with implementation defined character values."  [P1003.2, 2.5.2.1]  */
3047     set_default (BITPOS (tok_lower), 'a', 'z');
3048
3049   if ((ctype->class_done & BITw (tok_alpha)) == 0)
3050     {
3051       /* Table 2-6 in P1003.2 says that characters in class `upper' or
3052          class `lower' *must* be in class `alpha'.  */
3053       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
3054       unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower);
3055
3056       for (cnt = 0; cnt < 256; ++cnt)
3057         if ((ctype->class256_collection[cnt] & mask) != 0)
3058           ctype->class256_collection[cnt] |= BIT (tok_alpha);
3059
3060       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3061         if ((ctype->class_collection[cnt] & maskw) != 0)
3062           ctype->class_collection[cnt] |= BITw (tok_alpha);
3063     }
3064
3065   if ((ctype->class_done & BITw (tok_digit)) == 0)
3066     /* "If this keyword [digit] is not specified, the digits `0' through
3067         `9', ..., shall automatically belong to this class, with
3068         implementation-defined character values."  [P1003.2, 2.5.2.1]  */
3069     set_default (BITPOS (tok_digit), '0', '9');
3070
3071   /* "Only characters specified for the `alpha' and `digit' keyword
3072      shall be specified.  Characters specified for the keyword `alpha'
3073      and `digit' are automatically included in this class.  */
3074   {
3075     unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
3076     unsigned long int maskw = BITw (tok_alpha) | BITw (tok_digit);
3077
3078     for (cnt = 0; cnt < 256; ++cnt)
3079       if ((ctype->class256_collection[cnt] & mask) != 0)
3080         ctype->class256_collection[cnt] |= BIT (tok_alnum);
3081
3082     for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3083       if ((ctype->class_collection[cnt] & maskw) != 0)
3084         ctype->class_collection[cnt] |= BITw (tok_alnum);
3085   }
3086
3087   if ((ctype->class_done & BITw (tok_space)) == 0)
3088     /* "If this keyword [space] is not specified, the characters <space>,
3089         <form-feed>, <newline>, <carriage-return>, <tab>, and
3090         <vertical-tab>, ..., shall automatically belong to this class,
3091         with implementation-defined character values."  [P1003.2, 2.5.2.1]  */
3092     {
3093       struct charseq *seq;
3094
3095       seq = charmap_find_value (charmap, "space", 5);
3096       if (seq == NULL)
3097         seq = charmap_find_value (charmap, "SP", 2);
3098       if (seq == NULL)
3099         seq = charmap_find_value (charmap, "U00000020", 9);
3100       if (seq == NULL)
3101         {
3102           if (!be_quiet)
3103             WITH_CUR_LOCALE (error (0, 0, _("\
3104 %s: character `%s' not defined while needed as default value"),
3105                                     "LC_CTYPE", "<space>"));
3106         }
3107       else if (seq->nbytes != 1)
3108         WITH_CUR_LOCALE (error (0, 0, _("\
3109 %s: character `%s' in charmap not representable with one byte"),
3110                                 "LC_CTYPE", "<space>"));
3111       else
3112         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3113
3114       /* No need to search.  */
3115       ELEM (ctype, class_collection, , L' ') |= BITw (tok_space);
3116
3117       seq = charmap_find_value (charmap, "form-feed", 9);
3118       if (seq == NULL)
3119         seq = charmap_find_value (charmap, "U0000000C", 9);
3120       if (seq == NULL)
3121         {
3122           if (!be_quiet)
3123             WITH_CUR_LOCALE (error (0, 0, _("\
3124 %s: character `%s' not defined while needed as default value"),
3125                                     "LC_CTYPE", "<form-feed>"));
3126         }
3127       else if (seq->nbytes != 1)
3128         WITH_CUR_LOCALE (error (0, 0, _("\
3129 %s: character `%s' in charmap not representable with one byte"),
3130                                 "LC_CTYPE", "<form-feed>"));
3131       else
3132         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3133
3134       /* No need to search.  */
3135       ELEM (ctype, class_collection, , L'\f') |= BITw (tok_space);
3136
3137
3138       seq = charmap_find_value (charmap, "newline", 7);
3139       if (seq == NULL)
3140         seq = charmap_find_value (charmap, "U0000000A", 9);
3141       if (seq == NULL)
3142         {
3143           if (!be_quiet)
3144             WITH_CUR_LOCALE (error (0, 0, _("\
3145 character `%s' not defined while needed as default value"),
3146                                     "<newline>"));
3147         }
3148       else if (seq->nbytes != 1)
3149         WITH_CUR_LOCALE (error (0, 0, _("\
3150 %s: character `%s' in charmap not representable with one byte"),
3151                                 "LC_CTYPE", "<newline>"));
3152       else
3153         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3154
3155       /* No need to search.  */
3156       ELEM (ctype, class_collection, , L'\n') |= BITw (tok_space);
3157
3158
3159       seq = charmap_find_value (charmap, "carriage-return", 15);
3160       if (seq == NULL)
3161         seq = charmap_find_value (charmap, "U0000000D", 9);
3162       if (seq == NULL)
3163         {
3164           if (!be_quiet)
3165             WITH_CUR_LOCALE (error (0, 0, _("\
3166 %s: character `%s' not defined while needed as default value"),
3167                                     "LC_CTYPE", "<carriage-return>"));
3168         }
3169       else if (seq->nbytes != 1)
3170         WITH_CUR_LOCALE (error (0, 0, _("\
3171 %s: character `%s' in charmap not representable with one byte"),
3172                                 "LC_CTYPE", "<carriage-return>"));
3173       else
3174         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3175
3176       /* No need to search.  */
3177       ELEM (ctype, class_collection, , L'\r') |= BITw (tok_space);
3178
3179
3180       seq = charmap_find_value (charmap, "tab", 3);
3181       if (seq == NULL)
3182         seq = charmap_find_value (charmap, "U00000009", 9);
3183       if (seq == NULL)
3184         {
3185           if (!be_quiet)
3186             WITH_CUR_LOCALE (error (0, 0, _("\
3187 %s: character `%s' not defined while needed as default value"),
3188                                     "LC_CTYPE", "<tab>"));
3189         }
3190       else if (seq->nbytes != 1)
3191         WITH_CUR_LOCALE (error (0, 0, _("\
3192 %s: character `%s' in charmap not representable with one byte"),
3193                                 "LC_CTYPE", "<tab>"));
3194       else
3195         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3196
3197       /* No need to search.  */
3198       ELEM (ctype, class_collection, , L'\t') |= BITw (tok_space);
3199
3200
3201       seq = charmap_find_value (charmap, "vertical-tab", 12);
3202       if (seq == NULL)
3203         seq = charmap_find_value (charmap, "U0000000B", 9);
3204       if (seq == NULL)
3205         {
3206           if (!be_quiet)
3207             WITH_CUR_LOCALE (error (0, 0, _("\
3208 %s: character `%s' not defined while needed as default value"),
3209                                     "LC_CTYPE", "<vertical-tab>"));
3210         }
3211       else if (seq->nbytes != 1)
3212         WITH_CUR_LOCALE (error (0, 0, _("\
3213 %s: character `%s' in charmap not representable with one byte"),
3214                                 "LC_CTYPE", "<vertical-tab>"));
3215       else
3216         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3217
3218       /* No need to search.  */
3219       ELEM (ctype, class_collection, , L'\v') |= BITw (tok_space);
3220     }
3221
3222   if ((ctype->class_done & BITw (tok_xdigit)) == 0)
3223     /* "If this keyword is not specified, the digits `0' to `9', the
3224         uppercase letters `A' through `F', and the lowercase letters `a'
3225         through `f', ..., shell automatically belong to this class, with
3226         implementation defined character values."  [P1003.2, 2.5.2.1]  */
3227     {
3228       set_default (BITPOS (tok_xdigit), '0', '9');
3229       set_default (BITPOS (tok_xdigit), 'A', 'F');
3230       set_default (BITPOS (tok_xdigit), 'a', 'f');
3231     }
3232
3233   if ((ctype->class_done & BITw (tok_blank)) == 0)
3234     /* "If this keyword [blank] is unspecified, the characters <space> and
3235        <tab> shall belong to this character class."  [P1003.2, 2.5.2.1]  */
3236    {
3237       struct charseq *seq;
3238
3239       seq = charmap_find_value (charmap, "space", 5);
3240       if (seq == NULL)
3241         seq = charmap_find_value (charmap, "SP", 2);
3242       if (seq == NULL)
3243         seq = charmap_find_value (charmap, "U00000020", 9);
3244       if (seq == NULL)
3245         {
3246           if (!be_quiet)
3247             WITH_CUR_LOCALE (error (0, 0, _("\
3248 %s: character `%s' not defined while needed as default value"),
3249                                     "LC_CTYPE", "<space>"));
3250         }
3251       else if (seq->nbytes != 1)
3252         WITH_CUR_LOCALE (error (0, 0, _("\
3253 %s: character `%s' in charmap not representable with one byte"),
3254                                 "LC_CTYPE", "<space>"));
3255       else
3256         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
3257
3258       /* No need to search.  */
3259       ELEM (ctype, class_collection, , L' ') |= BITw (tok_blank);
3260
3261
3262       seq = charmap_find_value (charmap, "tab", 3);
3263       if (seq == NULL)
3264         seq = charmap_find_value (charmap, "U00000009", 9);
3265       if (seq == NULL)
3266         {
3267           if (!be_quiet)
3268             WITH_CUR_LOCALE (error (0, 0, _("\
3269 %s: character `%s' not defined while needed as default value"),
3270                                     "LC_CTYPE", "<tab>"));
3271         }
3272       else if (seq->nbytes != 1)
3273         WITH_CUR_LOCALE (error (0, 0, _("\
3274 %s: character `%s' in charmap not representable with one byte"),
3275                                 "LC_CTYPE", "<tab>"));
3276       else
3277         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
3278
3279       /* No need to search.  */
3280       ELEM (ctype, class_collection, , L'\t') |= BITw (tok_blank);
3281     }
3282
3283   if ((ctype->class_done & BITw (tok_graph)) == 0)
3284     /* "If this keyword [graph] is not specified, characters specified for
3285         the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
3286         shall belong to this character class."  [P1003.2, 2.5.2.1]  */
3287     {
3288       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
3289         BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
3290       unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
3291         BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
3292         BITw (tok_punct);
3293       size_t cnt;
3294
3295       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3296         if ((ctype->class_collection[cnt] & maskw) != 0)
3297           ctype->class_collection[cnt] |= BITw (tok_graph);
3298
3299       for (cnt = 0; cnt < 256; ++cnt)
3300         if ((ctype->class256_collection[cnt] & mask) != 0)
3301           ctype->class256_collection[cnt] |= BIT (tok_graph);
3302     }
3303
3304   if ((ctype->class_done & BITw (tok_print)) == 0)
3305     /* "If this keyword [print] is not provided, characters specified for
3306         the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
3307         and the <space> character shall belong to this character class."
3308         [P1003.2, 2.5.2.1]  */
3309     {
3310       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
3311         BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
3312       unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
3313         BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
3314         BITw (tok_punct);
3315       size_t cnt;
3316       struct charseq *seq;
3317
3318       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3319         if ((ctype->class_collection[cnt] & maskw) != 0)
3320           ctype->class_collection[cnt] |= BITw (tok_print);
3321
3322       for (cnt = 0; cnt < 256; ++cnt)
3323         if ((ctype->class256_collection[cnt] & mask) != 0)
3324           ctype->class256_collection[cnt] |= BIT (tok_print);
3325
3326
3327       seq = charmap_find_value (charmap, "space", 5);
3328       if (seq == NULL)
3329         seq = charmap_find_value (charmap, "SP", 2);
3330       if (seq == NULL)
3331         seq = charmap_find_value (charmap, "U00000020", 9);
3332       if (seq == NULL)
3333         {
3334           if (!be_quiet)
3335             WITH_CUR_LOCALE (error (0, 0, _("\
3336 %s: character `%s' not defined while needed as default value"),
3337                                     "LC_CTYPE", "<space>"));
3338         }
3339       else if (seq->nbytes != 1)
3340         WITH_CUR_LOCALE (error (0, 0, _("\
3341 %s: character `%s' in charmap not representable with one byte"),
3342                                 "LC_CTYPE", "<space>"));
3343       else
3344         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_print);
3345
3346       /* No need to search.  */
3347       ELEM (ctype, class_collection, , L' ') |= BITw (tok_print);
3348     }
3349
3350   if (ctype->tomap_done[0] == 0)
3351     /* "If this keyword [toupper] is not specified, the lowercase letters
3352         `a' through `z', and their corresponding uppercase letters `A' to
3353         `Z', ..., shall automatically be included, with implementation-
3354         defined character values."  [P1003.2, 2.5.2.1]  */
3355     {
3356       char tmp[4];
3357       int ch;
3358
3359       strcpy (tmp, "<?>");
3360
3361       for (ch = 'a'; ch <= 'z'; ++ch)
3362         {
3363           struct charseq *seq_from, *seq_to;
3364
3365           tmp[1] = (char) ch;
3366
3367           seq_from = charmap_find_value (charmap, &tmp[1], 1);
3368           if (seq_from == NULL)
3369             {
3370               char buf[10];
3371               sprintf (buf, "U%08X", ch);
3372               seq_from = charmap_find_value (charmap, buf, 9);
3373             }
3374           if (seq_from == NULL)
3375             {
3376               if (!be_quiet)
3377                 WITH_CUR_LOCALE (error (0, 0, _("\
3378 %s: character `%s' not defined while needed as default value"),
3379                                         "LC_CTYPE", tmp));
3380             }
3381           else if (seq_from->nbytes != 1)
3382             {
3383               if (!be_quiet)
3384                 WITH_CUR_LOCALE (error (0, 0, _("\
3385 %s: character `%s' needed as default value not representable with one byte"),
3386                                         "LC_CTYPE", tmp));
3387             }
3388           else
3389             {
3390               /* This conversion is implementation defined.  */
3391               tmp[1] = (char) (ch + ('A' - 'a'));
3392               seq_to = charmap_find_value (charmap, &tmp[1], 1);
3393               if (seq_to == NULL)
3394                 {
3395                   char buf[10];
3396                   sprintf (buf, "U%08X", ch + ('A' - 'a'));
3397                   seq_to = charmap_find_value (charmap, buf, 9);
3398                 }
3399               if (seq_to == NULL)
3400                 {
3401                   if (!be_quiet)
3402                     WITH_CUR_LOCALE (error (0, 0, _("\
3403 %s: character `%s' not defined while needed as default value"),
3404                                             "LC_CTYPE", tmp));
3405                 }
3406               else if (seq_to->nbytes != 1)
3407                 {
3408                   if (!be_quiet)
3409                     WITH_CUR_LOCALE (error (0, 0, _("\
3410 %s: character `%s' needed as default value not representable with one byte"),
3411                                             "LC_CTYPE", tmp));
3412                 }
3413               else
3414                 /* The index [0] is determined by the order of the
3415                    `ctype_map_newP' calls in `ctype_startup'.  */
3416                 ctype->map256_collection[0][seq_from->bytes[0]]
3417                   = seq_to->bytes[0];
3418             }
3419
3420           /* No need to search.  */
3421           ELEM (ctype, map_collection, [0], ch) = ch + ('A' - 'a');
3422         }
3423     }
3424
3425   if (ctype->tomap_done[1] == 0)
3426     /* "If this keyword [tolower] is not specified, the mapping shall be
3427        the reverse mapping of the one specified to `toupper'."  [P1003.2]  */
3428     {
3429       for (cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
3430         if (ctype->map_collection[0][cnt] != 0)
3431           ELEM (ctype, map_collection, [1],
3432                 ctype->map_collection[0][cnt])
3433             = ctype->charnames[cnt];
3434
3435       for (cnt = 0; cnt < 256; ++cnt)
3436         if (ctype->map256_collection[0][cnt] != 0)
3437           ctype->map256_collection[1][ctype->map256_collection[0][cnt]] = cnt;
3438     }
3439
3440   if (ctype->outdigits_act != 10)
3441     {
3442       if (ctype->outdigits_act != 0)
3443         WITH_CUR_LOCALE (error (0, 0, _("\
3444 %s: field `%s' does not contain exactly ten entries"),
3445                                 "LC_CTYPE", "outdigit"));
3446
3447       for (cnt = ctype->outdigits_act; cnt < 10; ++cnt)
3448         {
3449           ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3450                                                          digits + cnt, 1);
3451
3452           if (ctype->mboutdigits[cnt] == NULL)
3453             ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3454                                                            longnames[cnt],
3455                                                            strlen (longnames[cnt]));
3456
3457           if (ctype->mboutdigits[cnt] == NULL)
3458             ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3459                                                            uninames[cnt], 9);
3460
3461           if (ctype->mboutdigits[cnt] == NULL)
3462             {
3463               /* Provide a replacement.  */
3464               WITH_CUR_LOCALE (error (0, 0, _("\
3465 no output digits defined and none of the standard names in the charmap")));
3466
3467               ctype->mboutdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
3468                                                        sizeof (struct charseq)
3469                                                        + 1);
3470
3471               /* This is better than nothing.  */
3472               ctype->mboutdigits[cnt]->bytes[0] = digits[cnt];
3473               ctype->mboutdigits[cnt]->nbytes = 1;
3474             }
3475
3476           ctype->wcoutdigits[cnt] = L'0' + cnt;
3477         }
3478
3479       ctype->outdigits_act = 10;
3480     }
3481 }
3482
3483
3484 /* Construction of sparse 3-level tables.
3485    See wchar-lookup.h for their structure and the meaning of p and q.  */
3486
3487 struct wctype_table
3488 {
3489   /* Parameters.  */
3490   unsigned int p;
3491   unsigned int q;
3492   /* Working representation.  */
3493   size_t level1_alloc;
3494   size_t level1_size;
3495   uint32_t *level1;
3496   size_t level2_alloc;
3497   size_t level2_size;
3498   uint32_t *level2;
3499   size_t level3_alloc;
3500   size_t level3_size;
3501   uint32_t *level3;
3502   /* Compressed representation.  */
3503   size_t result_size;
3504   char *result;
3505 };
3506
3507 /* Initialize.  Assumes t->p and t->q have already been set.  */
3508 static inline void
3509 wctype_table_init (struct wctype_table *t)
3510 {
3511   t->level1 = NULL;
3512   t->level1_alloc = t->level1_size = 0;
3513   t->level2 = NULL;
3514   t->level2_alloc = t->level2_size = 0;
3515   t->level3 = NULL;
3516   t->level3_alloc = t->level3_size = 0;
3517 }
3518
3519 /* Retrieve an entry.  */
3520 static inline int
3521 wctype_table_get (struct wctype_table *t, uint32_t wc)
3522 {
3523   uint32_t index1 = wc >> (t->q + t->p + 5);
3524   if (index1 < t->level1_size)
3525     {
3526       uint32_t lookup1 = t->level1[index1];
3527       if (lookup1 != EMPTY)
3528         {
3529           uint32_t index2 = ((wc >> (t->p + 5)) & ((1 << t->q) - 1))
3530                             + (lookup1 << t->q);
3531           uint32_t lookup2 = t->level2[index2];
3532           if (lookup2 != EMPTY)
3533             {
3534               uint32_t index3 = ((wc >> 5) & ((1 << t->p) - 1))
3535                                 + (lookup2 << t->p);
3536               uint32_t lookup3 = t->level3[index3];
3537               uint32_t index4 = wc & 0x1f;
3538
3539               return (lookup3 >> index4) & 1;
3540             }
3541         }
3542     }
3543   return 0;
3544 }
3545
3546 /* Add one entry.  */
3547 static void
3548 wctype_table_add (struct wctype_table *t, uint32_t wc)
3549 {
3550   uint32_t index1 = wc >> (t->q + t->p + 5);
3551   uint32_t index2 = (wc >> (t->p + 5)) & ((1 << t->q) - 1);
3552   uint32_t index3 = (wc >> 5) & ((1 << t->p) - 1);
3553   uint32_t index4 = wc & 0x1f;
3554   size_t i, i1, i2;
3555
3556   if (index1 >= t->level1_size)
3557     {
3558       if (index1 >= t->level1_alloc)
3559         {
3560           size_t alloc = 2 * t->level1_alloc;
3561           if (alloc <= index1)
3562             alloc = index1 + 1;
3563           t->level1 = (uint32_t *) xrealloc ((char *) t->level1,
3564                                              alloc * sizeof (uint32_t));
3565           t->level1_alloc = alloc;
3566         }
3567       while (index1 >= t->level1_size)
3568         t->level1[t->level1_size++] = EMPTY;
3569     }
3570
3571   if (t->level1[index1] == EMPTY)
3572     {
3573       if (t->level2_size == t->level2_alloc)
3574         {
3575           size_t alloc = 2 * t->level2_alloc + 1;
3576           t->level2 = (uint32_t *) xrealloc ((char *) t->level2,
3577                                              (alloc << t->q) * sizeof (uint32_t));
3578           t->level2_alloc = alloc;
3579         }
3580       i1 = t->level2_size << t->q;
3581       i2 = (t->level2_size + 1) << t->q;
3582       for (i = i1; i < i2; i++)
3583         t->level2[i] = EMPTY;
3584       t->level1[index1] = t->level2_size++;
3585     }
3586
3587   index2 += t->level1[index1] << t->q;
3588
3589   if (t->level2[index2] == EMPTY)
3590     {
3591       if (t->level3_size == t->level3_alloc)
3592         {
3593           size_t alloc = 2 * t->level3_alloc + 1;
3594           t->level3 = (uint32_t *) xrealloc ((char *) t->level3,
3595                                              (alloc << t->p) * sizeof (uint32_t));
3596           t->level3_alloc = alloc;
3597         }
3598       i1 = t->level3_size << t->p;
3599       i2 = (t->level3_size + 1) << t->p;
3600       for (i = i1; i < i2; i++)
3601         t->level3[i] = 0;
3602       t->level2[index2] = t->level3_size++;
3603     }
3604
3605   index3 += t->level2[index2] << t->p;
3606
3607   t->level3[index3] |= (uint32_t)1 << index4;
3608 }
3609
3610 /* Finalize and shrink.  */
3611 static void
3612 wctype_table_finalize (struct wctype_table *t)
3613 {
3614   size_t i, j, k;
3615   uint32_t reorder3[t->level3_size];
3616   uint32_t reorder2[t->level2_size];
3617   uint32_t level1_offset, level2_offset, level3_offset;
3618
3619   /* Uniquify level3 blocks.  */
3620   k = 0;
3621   for (j = 0; j < t->level3_size; j++)
3622     {
3623       for (i = 0; i < k; i++)
3624         if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p],
3625                     (1 << t->p) * sizeof (uint32_t)) == 0)
3626           break;
3627       /* Relocate block j to block i.  */
3628       reorder3[j] = i;
3629       if (i == k)
3630         {
3631           if (i != j)
3632             memcpy (&t->level3[i << t->p], &t->level3[j << t->p],
3633                     (1 << t->p) * sizeof (uint32_t));
3634           k++;
3635         }
3636     }
3637   t->level3_size = k;
3638
3639   for (i = 0; i < (t->level2_size << t->q); i++)
3640     if (t->level2[i] != EMPTY)
3641       t->level2[i] = reorder3[t->level2[i]];
3642
3643   /* Uniquify level2 blocks.  */
3644   k = 0;
3645   for (j = 0; j < t->level2_size; j++)
3646     {
3647       for (i = 0; i < k; i++)
3648         if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q],
3649                     (1 << t->q) * sizeof (uint32_t)) == 0)
3650           break;
3651       /* Relocate block j to block i.  */
3652       reorder2[j] = i;
3653       if (i == k)
3654         {
3655           if (i != j)
3656             memcpy (&t->level2[i << t->q], &t->level2[j << t->q],
3657                     (1 << t->q) * sizeof (uint32_t));
3658           k++;
3659         }
3660     }
3661   t->level2_size = k;
3662
3663   for (i = 0; i < t->level1_size; i++)
3664     if (t->level1[i] != EMPTY)
3665       t->level1[i] = reorder2[t->level1[i]];
3666
3667   /* Create and fill the resulting compressed representation.  */
3668   t->result_size =
3669     5 * sizeof (uint32_t)
3670     + t->level1_size * sizeof (uint32_t)
3671     + (t->level2_size << t->q) * sizeof (uint32_t)
3672     + (t->level3_size << t->p) * sizeof (uint32_t);
3673   t->result = (char *) xmalloc (t->result_size);
3674
3675   level1_offset =
3676     5 * sizeof (uint32_t);
3677   level2_offset =
3678     5 * sizeof (uint32_t)
3679     + t->level1_size * sizeof (uint32_t);
3680   level3_offset =
3681     5 * sizeof (uint32_t)
3682     + t->level1_size * sizeof (uint32_t)
3683     + (t->level2_size << t->q) * sizeof (uint32_t);
3684
3685   ((uint32_t *) t->result)[0] = t->q + t->p + 5;
3686   ((uint32_t *) t->result)[1] = t->level1_size;
3687   ((uint32_t *) t->result)[2] = t->p + 5;
3688   ((uint32_t *) t->result)[3] = (1 << t->q) - 1;
3689   ((uint32_t *) t->result)[4] = (1 << t->p) - 1;
3690
3691   for (i = 0; i < t->level1_size; i++)
3692     ((uint32_t *) (t->result + level1_offset))[i] =
3693       (t->level1[i] == EMPTY
3694        ? 0
3695        : (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset);
3696
3697   for (i = 0; i < (t->level2_size << t->q); i++)
3698     ((uint32_t *) (t->result + level2_offset))[i] =
3699       (t->level2[i] == EMPTY
3700        ? 0
3701        : (t->level2[i] << t->p) * sizeof (uint32_t) + level3_offset);
3702
3703   for (i = 0; i < (t->level3_size << t->p); i++)
3704     ((uint32_t *) (t->result + level3_offset))[i] = t->level3[i];
3705
3706   if (t->level1_alloc > 0)
3707     free (t->level1);
3708   if (t->level2_alloc > 0)
3709     free (t->level2);
3710   if (t->level3_alloc > 0)
3711     free (t->level3);
3712 }
3713
3714 #define TABLE wcwidth_table
3715 #define ELEMENT uint8_t
3716 #define DEFAULT 0xff
3717 #include "3level.h"
3718
3719 #define TABLE wctrans_table
3720 #define ELEMENT int32_t
3721 #define DEFAULT 0
3722 #define wctrans_table_add wctrans_table_add_internal
3723 #include "3level.h"
3724 #undef wctrans_table_add
3725 /* The wctrans_table must actually store the difference between the
3726    desired result and the argument.  */
3727 static inline void
3728 wctrans_table_add (struct wctrans_table *t, uint32_t wc, uint32_t mapped_wc)
3729 {
3730   wctrans_table_add_internal (t, wc, mapped_wc - wc);
3731 }
3732
3733
3734 /* Flattens the included transliterations into a translit list.
3735    Inserts them in the list at `cursor', and returns the new cursor.  */
3736 static struct translit_t **
3737 translit_flatten (struct locale_ctype_t *ctype,
3738                   const struct charmap_t *charmap,
3739                   struct translit_t **cursor)
3740 {
3741   while (ctype->translit_include != NULL)
3742     {
3743       const char *copy_locale = ctype->translit_include->copy_locale;
3744       const char *copy_repertoire = ctype->translit_include->copy_repertoire;
3745       struct localedef_t *other;
3746
3747       /* Unchain the include statement.  During the depth-first traversal
3748          we don't want to visit any locale more than once.  */
3749       ctype->translit_include = ctype->translit_include->next;
3750
3751       other = find_locale (LC_CTYPE, copy_locale, copy_repertoire, charmap);
3752
3753       if (other == NULL)
3754         {
3755           WITH_CUR_LOCALE (error (0, 0, _("\
3756 %s: transliteration data from locale `%s' not available"),
3757                                   "LC_CTYPE", copy_locale));
3758         }
3759       else
3760         {
3761           struct locale_ctype_t *other_ctype =
3762             other->categories[LC_CTYPE].ctype;
3763
3764           cursor = translit_flatten (other_ctype, charmap, cursor);
3765           assert (other_ctype->translit_include == NULL);
3766
3767           if (other_ctype->translit != NULL)
3768             {
3769               /* Insert the other_ctype->translit list at *cursor.  */
3770               struct translit_t *endp = other_ctype->translit;
3771               while (endp->next != NULL)
3772                 endp = endp->next;
3773
3774               endp->next = *cursor;
3775               *cursor = other_ctype->translit;
3776
3777               /* Avoid any risk of circular lists.  */
3778               other_ctype->translit = NULL;
3779
3780               cursor = &endp->next;
3781             }
3782
3783           if (ctype->default_missing == NULL)
3784             ctype->default_missing = other_ctype->default_missing;
3785         }
3786     }
3787
3788   return cursor;
3789 }
3790
3791 static void
3792 allocate_arrays (struct locale_ctype_t *ctype, const struct charmap_t *charmap,
3793                  struct repertoire_t *repertoire)
3794 {
3795   size_t idx, nr;
3796   const void *key;
3797   size_t len;
3798   void *vdata;
3799   void *curs;
3800
3801   /* You wonder about this amount of memory?  This is only because some
3802      users do not manage to address the array with unsigned values or
3803      data types with range >= 256.  '\200' would result in the array
3804      index -128.  To help these poor people we duplicate the entries for
3805      128 up to 255 below the entry for \0.  */
3806   ctype->ctype_b = (char_class_t *) xcalloc (256 + 128, sizeof (char_class_t));
3807   ctype->ctype32_b = (char_class32_t *) xcalloc (256, sizeof (char_class32_t));
3808   ctype->class_b = (uint32_t **)
3809     xmalloc (ctype->nr_charclass * sizeof (uint32_t *));
3810   ctype->class_3level = (struct iovec *)
3811     xmalloc (ctype->nr_charclass * sizeof (struct iovec));
3812
3813   /* This is the array accessed using the multibyte string elements.  */
3814   for (idx = 0; idx < 256; ++idx)
3815     ctype->ctype_b[128 + idx] = ctype->class256_collection[idx];
3816
3817   /* Mirror first 127 entries.  We must take care that entry -1 is not
3818      mirrored because EOF == -1.  */
3819   for (idx = 0; idx < 127; ++idx)
3820     ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
3821
3822   /* The 32 bit array contains all characters < 0x100.  */
3823   for (idx = 0; idx < ctype->class_collection_act; ++idx)
3824     if (ctype->charnames[idx] < 0x100)
3825       ctype->ctype32_b[ctype->charnames[idx]] = ctype->class_collection[idx];
3826
3827   for (nr = 0; nr < ctype->nr_charclass; nr++)
3828     {
3829       ctype->class_b[nr] = (uint32_t *) xcalloc (256 / 32, sizeof (uint32_t));
3830
3831       for (idx = 0; idx < 256; ++idx)
3832         if (ctype->class256_collection[idx] & _ISbit (nr))
3833           ctype->class_b[nr][idx >> 5] |= (uint32_t)1 << (idx & 0x1f);
3834     }
3835
3836   for (nr = 0; nr < ctype->nr_charclass; nr++)
3837     {
3838       struct wctype_table t;
3839
3840       t.p = 4; /* or: 5 */
3841       t.q = 7; /* or: 6 */
3842       wctype_table_init (&t);
3843
3844       for (idx = 0; idx < ctype->class_collection_act; ++idx)
3845         if (ctype->class_collection[idx] & _ISwbit (nr))
3846           wctype_table_add (&t, ctype->charnames[idx]);
3847
3848       wctype_table_finalize (&t);
3849
3850       if (verbose)
3851         WITH_CUR_LOCALE (fprintf (stderr, _("\
3852 %s: table for class \"%s\": %lu bytes\n"),
3853                                  "LC_CTYPE", ctype->classnames[nr],
3854                                  (unsigned long int) t.result_size));
3855
3856       ctype->class_3level[nr].iov_base = t.result;
3857       ctype->class_3level[nr].iov_len = t.result_size;
3858     }
3859
3860   /* Room for table of mappings.  */
3861   ctype->map_b = (uint32_t **) xmalloc (2 * sizeof (uint32_t *));
3862   ctype->map32_b = (uint32_t **) xmalloc (ctype->map_collection_nr
3863                                           * sizeof (uint32_t *));
3864   ctype->map_3level = (struct iovec *)
3865     xmalloc (ctype->map_collection_nr * sizeof (struct iovec));
3866
3867   /* Fill in all mappings.  */
3868   for (idx = 0; idx < 2; ++idx)
3869     {
3870       unsigned int idx2;
3871
3872       /* Allocate table.  */
3873       ctype->map_b[idx] = (uint32_t *)
3874         xmalloc ((256 + 128) * sizeof (uint32_t));
3875
3876       /* Copy values from collection.  */
3877       for (idx2 = 0; idx2 < 256; ++idx2)
3878         ctype->map_b[idx][128 + idx2] = ctype->map256_collection[idx][idx2];
3879
3880       /* Mirror first 127 entries.  We must take care not to map entry
3881          -1 because EOF == -1.  */
3882       for (idx2 = 0; idx2 < 127; ++idx2)
3883         ctype->map_b[idx][idx2] = ctype->map_b[idx][256 + idx2];
3884
3885       /* EOF must map to EOF.  */
3886       ctype->map_b[idx][127] = EOF;
3887     }
3888
3889   for (idx = 0; idx < ctype->map_collection_nr; ++idx)
3890     {
3891       unsigned int idx2;
3892
3893       /* Allocate table.  */
3894       ctype->map32_b[idx] = (uint32_t *) xmalloc (256 * sizeof (uint32_t));
3895
3896       /* Copy values from collection.  Default is identity mapping.  */
3897       for (idx2 = 0; idx2 < 256; ++idx2)
3898         ctype->map32_b[idx][idx2] =
3899           (ctype->map_collection[idx][idx2] != 0
3900            ? ctype->map_collection[idx][idx2]
3901            : idx2);
3902     }
3903
3904   for (nr = 0; nr < ctype->map_collection_nr; nr++)
3905     {
3906       struct wctrans_table t;
3907
3908       t.p = 7;
3909       t.q = 9;
3910       wctrans_table_init (&t);
3911
3912       for (idx = 0; idx < ctype->map_collection_act[nr]; ++idx)
3913         if (ctype->map_collection[nr][idx] != 0)
3914           wctrans_table_add (&t, ctype->charnames[idx],
3915                              ctype->map_collection[nr][idx]);
3916
3917       wctrans_table_finalize (&t);
3918
3919       if (verbose)
3920         WITH_CUR_LOCALE (fprintf (stderr, _("\
3921 %s: table for map \"%s\": %lu bytes\n"),
3922                                  "LC_CTYPE", ctype->mapnames[nr],
3923                                  (unsigned long int) t.result_size));
3924
3925       ctype->map_3level[nr].iov_base = t.result;
3926       ctype->map_3level[nr].iov_len = t.result_size;
3927     }
3928
3929   /* Extra array for class and map names.  */
3930   ctype->class_name_ptr = (uint32_t *) xmalloc (ctype->nr_charclass
3931                                                 * sizeof (uint32_t));
3932   ctype->map_name_ptr = (uint32_t *) xmalloc (ctype->map_collection_nr
3933                                               * sizeof (uint32_t));
3934
3935   ctype->class_offset = _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
3936   ctype->map_offset = ctype->class_offset + ctype->nr_charclass;
3937
3938   /* Array for width information.  Because the expected widths are very
3939      small (never larger than 2) we use only one single byte.  This
3940      saves space.
3941      We put only printable characters in the table.  wcwidth is specified
3942      to return -1 for non-printable characters.  Doing the check here
3943      saves a run-time check.
3944      But we put L'\0' in the table.  This again saves a run-time check.  */
3945   {
3946     struct wcwidth_table t;
3947
3948     t.p = 7;
3949     t.q = 9;
3950     wcwidth_table_init (&t);
3951
3952     /* First set all the printable characters of the character set to
3953        the default width.  */
3954     curs = NULL;
3955     while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
3956       {
3957         struct charseq *data = (struct charseq *) vdata;
3958
3959         if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
3960           data->ucs4 = repertoire_find_value (ctype->repertoire,
3961                                               data->name, len);
3962
3963         if (data->ucs4 != ILLEGAL_CHAR_VALUE)
3964           {
3965             uint32_t *class_bits =
3966               find_idx (ctype, &ctype->class_collection, NULL,
3967                         &ctype->class_collection_act, data->ucs4);
3968
3969             if (class_bits != NULL && (*class_bits & BITw (tok_print)))
3970               wcwidth_table_add (&t, data->ucs4, charmap->width_default);
3971           }
3972       }
3973
3974     /* Now add the explicitly specified widths.  */
3975     if (charmap->width_rules != NULL)
3976       {
3977         size_t cnt;
3978
3979         for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
3980           {
3981             unsigned char bytes[charmap->mb_cur_max];
3982             int nbytes = charmap->width_rules[cnt].from->nbytes;
3983
3984             /* We have the range of character for which the width is
3985                specified described using byte sequences of the multibyte
3986                charset.  We have to convert this to UCS4 now.  And we
3987                cannot simply convert the beginning and the end of the
3988                sequence, we have to iterate over the byte sequence and
3989                convert it for every single character.  */
3990             memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
3991
3992             while (nbytes < charmap->width_rules[cnt].to->nbytes
3993                    || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
3994                               nbytes) <= 0)
3995               {
3996                 /* Find the UCS value for `bytes'.  */
3997                 int inner;
3998                 uint32_t wch;
3999                 struct charseq *seq =
4000                   charmap_find_symbol (charmap, bytes, nbytes);
4001
4002                 if (seq == NULL)
4003                   wch = ILLEGAL_CHAR_VALUE;
4004                 else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
4005                   wch = seq->ucs4;
4006                 else
4007                   wch = repertoire_find_value (ctype->repertoire, seq->name,
4008                                                strlen (seq->name));
4009
4010                 if (wch != ILLEGAL_CHAR_VALUE)
4011                   {
4012                     /* Store the value.  */
4013                     uint32_t *class_bits =
4014                       find_idx (ctype, &ctype->class_collection, NULL,
4015                                 &ctype->class_collection_act, wch);
4016
4017                     if (class_bits != NULL && (*class_bits & BITw (tok_print)))
4018                       wcwidth_table_add (&t, wch,
4019                                          charmap->width_rules[cnt].width);
4020                   }
4021
4022                 /* "Increment" the bytes sequence.  */
4023                 inner = nbytes - 1;
4024                 while (inner >= 0 && bytes[inner] == 0xff)
4025                   --inner;
4026
4027                 if (inner < 0)
4028                   {
4029                     /* We have to extend the byte sequence.  */
4030                     if (nbytes >= charmap->width_rules[cnt].to->nbytes)
4031                       break;
4032
4033                     bytes[0] = 1;
4034                     memset (&bytes[1], 0, nbytes);
4035                     ++nbytes;
4036                   }
4037                 else
4038                   {
4039                     ++bytes[inner];
4040                     while (++inner < nbytes)
4041                       bytes[inner] = 0;
4042                   }
4043               }
4044           }
4045       }
4046
4047     /* Set the width of L'\0' to 0.  */
4048     wcwidth_table_add (&t, 0, 0);
4049
4050     wcwidth_table_finalize (&t);
4051
4052     if (verbose)
4053       WITH_CUR_LOCALE (fprintf (stderr, _("%s: table for width: %lu bytes\n"),
4054                                "LC_CTYPE", (unsigned long int) t.result_size));
4055
4056     ctype->width.iov_base = t.result;
4057     ctype->width.iov_len = t.result_size;
4058   }
4059
4060   /* Set MB_CUR_MAX.  */
4061   ctype->mb_cur_max = charmap->mb_cur_max;
4062
4063   /* Now determine the table for the transliteration information.
4064
4065      XXX It is not yet clear to me whether it is worth implementing a
4066      complicated algorithm which uses a hash table to locate the entries.
4067      For now I'll use a simple array which can be searching using binary
4068      search.  */
4069   if (ctype->translit_include != NULL)
4070     /* Traverse the locales mentioned in the `include' statements in a
4071        depth-first way and fold in their transliteration information.  */
4072     translit_flatten (ctype, charmap, &ctype->translit);
4073
4074   if (ctype->translit != NULL)
4075     {
4076       /* First count how many entries we have.  This is the upper limit
4077          since some entries from the included files might be overwritten.  */
4078       size_t number = 0;
4079       size_t cnt;
4080       struct translit_t *runp = ctype->translit;
4081       struct translit_t **sorted;
4082       size_t from_len, to_len;
4083
4084       while (runp != NULL)
4085         {
4086           ++number;
4087           runp = runp->next;
4088         }
4089
4090       /* Next we allocate an array large enough and fill in the values.  */
4091       sorted = (struct translit_t **) alloca (number
4092                                               * sizeof (struct translit_t **));
4093       runp = ctype->translit;
4094       number = 0;
4095       do
4096         {
4097           /* Search for the place where to insert this string.
4098              XXX Better use a real sorting algorithm later.  */
4099           size_t idx = 0;
4100           int replace = 0;
4101
4102           while (idx < number)
4103             {
4104               int res = wcscmp ((const wchar_t *) sorted[idx]->from,
4105                                 (const wchar_t *) runp->from);
4106               if (res == 0)
4107                 {
4108                   replace = 1;
4109                   break;
4110                 }
4111               if (res > 0)
4112                 break;
4113               ++idx;
4114             }
4115
4116           if (replace)
4117             sorted[idx] = runp;
4118           else
4119             {
4120               memmove (&sorted[idx + 1], &sorted[idx],
4121                        (number - idx) * sizeof (struct translit_t *));
4122               sorted[idx] = runp;
4123               ++number;
4124             }
4125
4126           runp = runp->next;
4127         }
4128       while (runp != NULL);
4129
4130       /* The next step is putting all the possible transliteration
4131          strings in one memory block so that we can write it out.
4132          We need several different blocks:
4133          - index to the from-string array
4134          - from-string array
4135          - index to the to-string array
4136          - to-string array.
4137       */
4138       from_len = to_len = 0;
4139       for (cnt = 0; cnt < number; ++cnt)
4140         {
4141           struct translit_to_t *srunp;
4142           from_len += wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
4143           srunp = sorted[cnt]->to;
4144           while (srunp != NULL)
4145             {
4146               to_len += wcslen ((const wchar_t *) srunp->str) + 1;
4147               srunp = srunp->next;
4148             }
4149           /* Plus one for the extra NUL character marking the end of
4150              the list for the current entry.  */
4151           ++to_len;
4152         }
4153
4154       /* We can allocate the arrays for the results.  */
4155       ctype->translit_from_idx = xmalloc (number * sizeof (uint32_t));
4156       ctype->translit_from_tbl = xmalloc (from_len * sizeof (uint32_t));
4157       ctype->translit_to_idx = xmalloc (number * sizeof (uint32_t));
4158       ctype->translit_to_tbl = xmalloc (to_len * sizeof (uint32_t));
4159
4160       from_len = 0;
4161       to_len = 0;
4162       for (cnt = 0; cnt < number; ++cnt)
4163         {
4164           size_t len;
4165           struct translit_to_t *srunp;
4166
4167           ctype->translit_from_idx[cnt] = from_len;
4168           ctype->translit_to_idx[cnt] = to_len;
4169
4170           len = wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
4171           wmemcpy ((wchar_t *) &ctype->translit_from_tbl[from_len],
4172                    (const wchar_t *) sorted[cnt]->from, len);
4173           from_len += len;
4174
4175           ctype->translit_to_idx[cnt] = to_len;
4176           srunp = sorted[cnt]->to;
4177           while (srunp != NULL)
4178             {
4179               len = wcslen ((const wchar_t *) srunp->str) + 1;
4180               wmemcpy ((wchar_t *) &ctype->translit_to_tbl[to_len],
4181                        (const wchar_t *) srunp->str, len);
4182               to_len += len;
4183               srunp = srunp->next;
4184             }
4185           ctype->translit_to_tbl[to_len++] = L'\0';
4186         }
4187
4188       /* Store the information about the length.  */
4189       ctype->translit_idx_size = number;
4190       ctype->translit_from_tbl_size = from_len * sizeof (uint32_t);
4191       ctype->translit_to_tbl_size = to_len * sizeof (uint32_t);
4192     }
4193   else
4194     {
4195       /* Provide some dummy pointers since we have nothing to write out.  */
4196       static uint32_t no_str = { 0 };
4197
4198       ctype->translit_from_idx = &no_str;
4199       ctype->translit_from_tbl = &no_str;
4200       ctype->translit_to_tbl = &no_str;
4201       ctype->translit_idx_size = 0;
4202       ctype->translit_from_tbl_size = 0;
4203       ctype->translit_to_tbl_size = 0;
4204     }
4205 }