locale/programs/ld-ctype.c

   1 /* Copyright (C) 1995-2006, 2007 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3    Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
   4
   5    This program is free software; you can redistribute it and/or modify
   6    it under the terms of the GNU General Public License version 2 as
   7    published by the Free Software Foundation.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program; if not, write to the Free Software Foundation,
  16    Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  17
  18 #ifdef HAVE_CONFIG_H
  19 # include <config.h>
  20 #endif
  21
  22 #include <alloca.h>
  23 #include <byteswap.h>
  24 #include <endian.h>
  25 #include <errno.h>
  26 #include <limits.h>
  27 #include <obstack.h>
  28 #include <stdlib.h>
  29 #include <string.h>
  30 #include <wchar.h>
  31 #include <wctype.h>
  32 #include <sys/uio.h>
  33
  34 #include "localedef.h"
  35 #include "charmap.h"
  36 #include "localeinfo.h"
  37 #include "langinfo.h"
  38 #include "linereader.h"
  39 #include "locfile-token.h"
  40 #include "locfile.h"
  41
  42 #include <assert.h>
  43
  44
  45 #ifdef PREDEFINED_CLASSES
  46 /* These are the extra bits not in wctype.h since these are not preallocated
  47    classes.  */
  48 # define _ISwspecial1   (1 << 29)
  49 # define _ISwspecial2   (1 << 30)
  50 # define _ISwspecial3   (1 << 31)
  51 #endif
  52
  53
  54 /* The bit used for representing a special class.  */
  55 #define BITPOS(class) ((class) - tok_upper)
  56 #define BIT(class) (_ISbit (BITPOS (class)))
  57 #define BITw(class) (_ISwbit (BITPOS (class)))
  58
  59 #define ELEM(ctype, collection, idx, value)                                   \
  60   *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx,     \
  61              &ctype->collection##_act idx, value)
  62
  63
  64 /* To be compatible with former implementations we for now restrict
  65    the number of bits for character classes to 16.  When compatibility
  66    is not necessary anymore increase the number to 32.  */
  67 #define char_class_t uint16_t
  68 #define char_class32_t uint32_t
  69
  70
  71 /* Type to describe a transliteration action.  We have a possibly
  72    multiple character from-string and a set of multiple character
  73    to-strings.  All are 32bit values since this is what is used in
  74    the gconv functions.  */
  75 struct translit_to_t
  76 {
  77   uint32_t *str;
  78
  79   struct translit_to_t *next;
  80 };
  81
  82 struct translit_t
  83 {
  84   uint32_t *from;
  85
  86   const char *fname;
  87   size_t lineno;
  88
  89   struct translit_to_t *to;
  90
  91   struct translit_t *next;
  92 };
  93
  94 struct translit_ignore_t
  95 {
  96   uint32_t from;
  97   uint32_t to;
  98   uint32_t step;
  99
 100   const char *fname;
 101   size_t lineno;
 102
 103   struct translit_ignore_t *next;
 104 };
 105
 106
 107 /* Type to describe a transliteration include statement.  */
 108 struct translit_include_t
 109 {
 110   const char *copy_locale;
 111   const char *copy_repertoire;
 112
 113   struct translit_include_t *next;
 114 };
 115
 116
 117 /* Sparse table of uint32_t.  */
 118 #define TABLE idx_table
 119 #define ELEMENT uint32_t
 120 #define DEFAULT ((uint32_t) ~0)
 121 #define NO_FINALIZE
 122 #include "3level.h"
 123
 124
 125 /* The real definition of the struct for the LC_CTYPE locale.  */
 126 struct locale_ctype_t
 127 {
 128   uint32_t *charnames;
 129   size_t charnames_max;
 130   size_t charnames_act;
 131   /* An index lookup table, to speedup find_idx.  */
 132   struct idx_table charnames_idx;
 133
 134   struct repertoire_t *repertoire;
 135
 136   /* We will allow up to 8 * sizeof (uint32_t) character classes.  */
 137 #define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
 138   size_t nr_charclass;
 139   const char *classnames[MAX_NR_CHARCLASS];
 140   uint32_t last_class_char;
 141   uint32_t class256_collection[256];
 142   uint32_t *class_collection;
 143   size_t class_collection_max;
 144   size_t class_collection_act;
 145   uint32_t class_done;
 146   uint32_t class_offset;
 147
 148   struct charseq **mbdigits;
 149   size_t mbdigits_act;
 150   size_t mbdigits_max;
 151   uint32_t *wcdigits;
 152   size_t wcdigits_act;
 153   size_t wcdigits_max;
 154
 155   struct charseq *mboutdigits[10];
 156   uint32_t wcoutdigits[10];
 157   size_t outdigits_act;
 158
 159   /* If the following number ever turns out to be too small simply
 160      increase it.  But I doubt it will.  --drepper@gnu */
 161 #define MAX_NR_CHARMAP 16
 162   const char *mapnames[MAX_NR_CHARMAP];
 163   uint32_t *map_collection[MAX_NR_CHARMAP];
 164   uint32_t map256_collection[2][256];
 165   size_t map_collection_max[MAX_NR_CHARMAP];
 166   size_t map_collection_act[MAX_NR_CHARMAP];
 167   size_t map_collection_nr;
 168   size_t last_map_idx;
 169   int tomap_done[MAX_NR_CHARMAP];
 170   uint32_t map_offset;
 171
 172   /* Transliteration information.  */
 173   struct translit_include_t *translit_include;
 174   struct translit_t *translit;
 175   struct translit_ignore_t *translit_ignore;
 176   uint32_t ntranslit_ignore;
 177
 178   uint32_t *default_missing;
 179   const char *default_missing_file;
 180   size_t default_missing_lineno;
 181
 182   uint32_t to_nonascii;
 183
 184   /* The arrays for the binary representation.  */
 185   char_class_t *ctype_b;
 186   char_class32_t *ctype32_b;
 187   uint32_t **map_b;
 188   uint32_t **map32_b;
 189   uint32_t **class_b;
 190   struct iovec *class_3level;
 191   struct iovec *map_3level;
 192   uint32_t *class_name_ptr;
 193   uint32_t *map_name_ptr;
 194   struct iovec width;
 195   uint32_t mb_cur_max;
 196   const char *codeset_name;
 197   uint32_t *translit_from_idx;
 198   uint32_t *translit_from_tbl;
 199   uint32_t *translit_to_idx;
 200   uint32_t *translit_to_tbl;
 201   uint32_t translit_idx_size;
 202   size_t translit_from_tbl_size;
 203   size_t translit_to_tbl_size;
 204
 205   struct obstack mempool;
 206 };
 207
 208
 209 /* Marker for an empty slot.  This has the value 0xFFFFFFFF, regardless
 210    whether 'int' is 16 bit, 32 bit, or 64 bit.  */
 211 #define EMPTY ((uint32_t) ~0)
 212
 213
 214 #define obstack_chunk_alloc xmalloc
 215 #define obstack_chunk_free free
 216
 217
 218 /* Prototypes for local functions.  */
 219 static void ctype_startup (struct linereader *lr, struct localedef_t *locale,
 220                            const struct charmap_t *charmap,
 221                            struct localedef_t *copy_locale,
 222                            int ignore_content);
 223 static void ctype_class_new (struct linereader *lr,
 224                              struct locale_ctype_t *ctype, const char *name);
 225 static void ctype_map_new (struct linereader *lr,
 226                            struct locale_ctype_t *ctype,
 227                            const char *name, const struct charmap_t *charmap);
 228 static uint32_t *find_idx (struct locale_ctype_t *ctype, uint32_t **table,
 229                            size_t *max, size_t *act, unsigned int idx);
 230 static void set_class_defaults (struct locale_ctype_t *ctype,
 231                                 const struct charmap_t *charmap,
 232                                 struct repertoire_t *repertoire);
 233 static void allocate_arrays (struct locale_ctype_t *ctype,
 234                              const struct charmap_t *charmap,
 235                              struct repertoire_t *repertoire);
 236
 237
 238 static const char *longnames[] =
 239 {
 240   "zero", "one", "two", "three", "four",
 241   "five", "six", "seven", "eight", "nine"
 242 };
 243 static const char *uninames[] =
 244 {
 245   "U00000030", "U00000031", "U00000032", "U00000033", "U00000034",
 246   "U00000035", "U00000036", "U00000037", "U00000038", "U00000039"
 247 };
 248 static const unsigned char digits[] = "0123456789";
 249
 250
 251 static void
 252 ctype_startup (struct linereader *lr, struct localedef_t *locale,
 253                const struct charmap_t *charmap,
 254                struct localedef_t *copy_locale, int ignore_content)
 255 {
 256   unsigned int cnt;
 257   struct locale_ctype_t *ctype;
 258
 259   if (!ignore_content && locale->categories[LC_CTYPE].ctype == NULL)
 260     {
 261       if (copy_locale == NULL)
 262         {
 263           /* Allocate the needed room.  */
 264           locale->categories[LC_CTYPE].ctype = ctype =
 265             (struct locale_ctype_t *) xcalloc (1,
 266                                                sizeof (struct locale_ctype_t));
 267
 268           /* We have seen no names yet.  */
 269           ctype->charnames_max = charmap->mb_cur_max == 1 ? 256 : 512;
 270           ctype->charnames =
 271             (unsigned int *) xmalloc (ctype->charnames_max
 272                                       * sizeof (unsigned int));
 273           for (cnt = 0; cnt < 256; ++cnt)
 274             ctype->charnames[cnt] = cnt;
 275           ctype->charnames_act = 256;
 276           idx_table_init (&ctype->charnames_idx);
 277
 278           /* Fill character class information.  */
 279           ctype->last_class_char = ILLEGAL_CHAR_VALUE;
 280           /* The order of the following instructions determines the bit
 281              positions!  */
 282           ctype_class_new (lr, ctype, "upper");
 283           ctype_class_new (lr, ctype, "lower");
 284           ctype_class_new (lr, ctype, "alpha");
 285           ctype_class_new (lr, ctype, "digit");
 286           ctype_class_new (lr, ctype, "xdigit");
 287           ctype_class_new (lr, ctype, "space");
 288           ctype_class_new (lr, ctype, "print");
 289           ctype_class_new (lr, ctype, "graph");
 290           ctype_class_new (lr, ctype, "blank");
 291           ctype_class_new (lr, ctype, "cntrl");
 292           ctype_class_new (lr, ctype, "punct");
 293           ctype_class_new (lr, ctype, "alnum");
 294 #ifdef PREDEFINED_CLASSES
 295           /* The following are extensions from ISO 14652.  */
 296           ctype_class_new (lr, ctype, "left_to_right");
 297           ctype_class_new (lr, ctype, "right_to_left");
 298           ctype_class_new (lr, ctype, "num_terminator");
 299           ctype_class_new (lr, ctype, "num_separator");
 300           ctype_class_new (lr, ctype, "segment_separator");
 301           ctype_class_new (lr, ctype, "block_separator");
 302           ctype_class_new (lr, ctype, "direction_control");
 303           ctype_class_new (lr, ctype, "sym_swap_layout");
 304           ctype_class_new (lr, ctype, "char_shape_selector");
 305           ctype_class_new (lr, ctype, "num_shape_selector");
 306           ctype_class_new (lr, ctype, "non_spacing");
 307           ctype_class_new (lr, ctype, "non_spacing_level3");
 308           ctype_class_new (lr, ctype, "normal_connect");
 309           ctype_class_new (lr, ctype, "r_connect");
 310           ctype_class_new (lr, ctype, "no_connect");
 311           ctype_class_new (lr, ctype, "no_connect-space");
 312           ctype_class_new (lr, ctype, "vowel_connect");
 313 #endif
 314
 315           ctype->class_collection_max = charmap->mb_cur_max == 1 ? 256 : 512;
 316           ctype->class_collection
 317             = (uint32_t *) xcalloc (sizeof (unsigned long int),
 318                                     ctype->class_collection_max);
 319           ctype->class_collection_act = 256;
 320
 321           /* Fill character map information.  */
 322           ctype->last_map_idx = MAX_NR_CHARMAP;
 323           ctype_map_new (lr, ctype, "toupper", charmap);
 324           ctype_map_new (lr, ctype, "tolower", charmap);
 325 #ifdef PREDEFINED_CLASSES
 326           ctype_map_new (lr, ctype, "tosymmetric", charmap);
 327 #endif
 328
 329           /* Fill first 256 entries in `toXXX' arrays.  */
 330           for (cnt = 0; cnt < 256; ++cnt)
 331             {
 332               ctype->map_collection[0][cnt] = cnt;
 333               ctype->map_collection[1][cnt] = cnt;
 334 #ifdef PREDEFINED_CLASSES
 335               ctype->map_collection[2][cnt] = cnt;
 336 #endif
 337               ctype->map256_collection[0][cnt] = cnt;
 338               ctype->map256_collection[1][cnt] = cnt;
 339             }
 340
 341           if (enc_not_ascii_compatible)
 342             ctype->to_nonascii = 1;
 343
 344           obstack_init (&ctype->mempool);
 345         }
 346       else
 347         ctype = locale->categories[LC_CTYPE].ctype =
 348           copy_locale->categories[LC_CTYPE].ctype;
 349     }
 350 }
 351
 352
 353 void
 354 ctype_finish (struct localedef_t *locale, const struct charmap_t *charmap)
 355 {
 356   /* See POSIX.2, table 2-6 for the meaning of the following table.  */
 357 #define NCLASS 12
 358   static const struct
 359   {
 360     const char *name;
 361     const char allow[NCLASS];
 362   }
 363   valid_table[NCLASS] =
 364   {
 365     /* The order is important.  See token.h for more information.
 366        M = Always, D = Default, - = Permitted, X = Mutually exclusive  */
 367     { "upper",  "--MX-XDDXXX-" },
 368     { "lower",  "--MX-XDDXXX-" },
 369     { "alpha",  "---X-XDDXXX-" },
 370     { "digit",  "XXX--XDDXXX-" },
 371     { "xdigit", "-----XDDXXX-" },
 372     { "space",  "XXXXX------X" },
 373     { "print",  "---------X--" },
 374     { "graph",  "---------X--" },
 375     { "blank",  "XXXXXM-----X" },
 376     { "cntrl",  "XXXXX-XX--XX" },
 377     { "punct",  "XXXXX-DD-X-X" },
 378     { "alnum",  "-----XDDXXX-" }
 379   };
 380   size_t cnt;
 381   int cls1, cls2;
 382   uint32_t space_value;
 383   struct charseq *space_seq;
 384   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 385   int warned;
 386   const void *key;
 387   size_t len;
 388   void *vdata;
 389   void *curs;
 390
 391   /* Now resolve copying and also handle completely missing definitions.  */
 392   if (ctype == NULL)
 393     {
 394       const char *repertoire_name;
 395
 396       /* First see whether we were supposed to copy.  If yes, find the
 397          actual definition.  */
 398       if (locale->copy_name[LC_CTYPE] != NULL)
 399         {
 400           /* Find the copying locale.  This has to happen transitively since
 401              the locale we are copying from might also copying another one.  */
 402           struct localedef_t *from = locale;
 403
 404           do
 405             from = find_locale (LC_CTYPE, from->copy_name[LC_CTYPE],
 406                                 from->repertoire_name, charmap);
 407           while (from->categories[LC_CTYPE].ctype == NULL
 408                  && from->copy_name[LC_CTYPE] != NULL);
 409
 410           ctype = locale->categories[LC_CTYPE].ctype
 411             = from->categories[LC_CTYPE].ctype;
 412         }
 413
 414       /* If there is still no definition issue an warning and create an
 415          empty one.  */
 416       if (ctype == NULL)
 417         {
 418           if (! be_quiet)
 419             WITH_CUR_LOCALE (error (0, 0, _("\
 420 No definition for %s category found"), "LC_CTYPE"));
 421           ctype_startup (NULL, locale, charmap, NULL, 0);
 422           ctype = locale->categories[LC_CTYPE].ctype;
 423         }
 424
 425       /* Get the repertoire we have to use.  */
 426       repertoire_name = locale->repertoire_name ?: repertoire_global;
 427       if (repertoire_name != NULL)
 428         ctype->repertoire = repertoire_read (repertoire_name);
 429     }
 430
 431   /* We need the name of the currently used 8-bit character set to
 432      make correct conversion between this 8-bit representation and the
 433      ISO 10646 character set used internally for wide characters.  */
 434   ctype->codeset_name = charmap->code_set_name;
 435   if (ctype->codeset_name == NULL)
 436     {
 437       if (! be_quiet)
 438         WITH_CUR_LOCALE (error (0, 0, _("\
 439 No character set name specified in charmap")));
 440       ctype->codeset_name = "//UNKNOWN//";
 441     }
 442
 443   /* Set default value for classes not specified.  */
 444   set_class_defaults (ctype, charmap, ctype->repertoire);
 445
 446   /* Check according to table.  */
 447   for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
 448     {
 449       uint32_t tmp = ctype->class_collection[cnt];
 450
 451       if (tmp != 0)
 452         {
 453           for (cls1 = 0; cls1 < NCLASS; ++cls1)
 454             if ((tmp & _ISwbit (cls1)) != 0)
 455               for (cls2 = 0; cls2 < NCLASS; ++cls2)
 456                 if (valid_table[cls1].allow[cls2] != '-')
 457                   {
 458                     int eq = (tmp & _ISwbit (cls2)) != 0;
 459                     switch (valid_table[cls1].allow[cls2])
 460                       {
 461                       case 'M':
 462                         if (!eq)
 463                           {
 464                             uint32_t value = ctype->charnames[cnt];
 465
 466                             if (!be_quiet)
 467                               WITH_CUR_LOCALE (error (0, 0, _("\
 468 character L'\\u%0*x' in class `%s' must be in class `%s'"),
 469                                                       value > 0xffff ? 8 : 4,
 470                                                       value,
 471                                                       valid_table[cls1].name,
 472                                                       valid_table[cls2].name));
 473                           }
 474                         break;
 475
 476                       case 'X':
 477                         if (eq)
 478                           {
 479                             uint32_t value = ctype->charnames[cnt];
 480
 481                             if (!be_quiet)
 482                               WITH_CUR_LOCALE (error (0, 0, _("\
 483 character L'\\u%0*x' in class `%s' must not be in class `%s'"),
 484                                                       value > 0xffff ? 8 : 4,
 485                                                       value,
 486                                                       valid_table[cls1].name,
 487                                                       valid_table[cls2].name));
 488                           }
 489                         break;
 490
 491                       case 'D':
 492                         ctype->class_collection[cnt] |= _ISwbit (cls2);
 493                         break;
 494
 495                       default:
 496                         WITH_CUR_LOCALE (error (5, 0, _("\
 497 internal error in %s, line %u"), __FUNCTION__, __LINE__));
 498                       }
 499                   }
 500         }
 501     }
 502
 503   for (cnt = 0; cnt < 256; ++cnt)
 504     {
 505       uint32_t tmp = ctype->class256_collection[cnt];
 506
 507       if (tmp != 0)
 508         {
 509           for (cls1 = 0; cls1 < NCLASS; ++cls1)
 510             if ((tmp & _ISbit (cls1)) != 0)
 511               for (cls2 = 0; cls2 < NCLASS; ++cls2)
 512                 if (valid_table[cls1].allow[cls2] != '-')
 513                   {
 514                     int eq = (tmp & _ISbit (cls2)) != 0;
 515                     switch (valid_table[cls1].allow[cls2])
 516                       {
 517                       case 'M':
 518                         if (!eq)
 519                           {
 520                             char buf[17];
 521
 522                             snprintf (buf, sizeof buf, "\\%Zo", cnt);
 523
 524                             if (!be_quiet)
 525                               WITH_CUR_LOCALE (error (0, 0, _("\
 526 character '%s' in class `%s' must be in class `%s'"),
 527                                                       buf,
 528                                                       valid_table[cls1].name,
 529                                                       valid_table[cls2].name));
 530                           }
 531                         break;
 532
 533                       case 'X':
 534                         if (eq)
 535                           {
 536                             char buf[17];
 537
 538                             snprintf (buf, sizeof buf, "\\%Zo", cnt);
 539
 540                             if (!be_quiet)
 541                               WITH_CUR_LOCALE (error (0, 0, _("\
 542 character '%s' in class `%s' must not be in class `%s'"),
 543                                                       buf,
 544                                                       valid_table[cls1].name,
 545                                                       valid_table[cls2].name));
 546                           }
 547                         break;
 548
 549                       case 'D':
 550                         ctype->class256_collection[cnt] |= _ISbit (cls2);
 551                         break;
 552
 553                       default:
 554                         WITH_CUR_LOCALE (error (5, 0, _("\
 555 internal error in %s, line %u"), __FUNCTION__, __LINE__));
 556                       }
 557                   }
 558         }
 559     }
 560
 561   /* ... and now test <SP> as a special case.  */
 562   space_value = 32;
 563   if (((cnt = BITPOS (tok_space),
 564         (ELEM (ctype, class_collection, , space_value)
 565          & BITw (tok_space)) == 0)
 566        || (cnt = BITPOS (tok_blank),
 567            (ELEM (ctype, class_collection, , space_value)
 568             & BITw (tok_blank)) == 0)))
 569     {
 570       if (!be_quiet)
 571         WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"),
 572                                 valid_table[cnt].name));
 573     }
 574   else if (((cnt = BITPOS (tok_punct),
 575              (ELEM (ctype, class_collection, , space_value)
 576               & BITw (tok_punct)) != 0)
 577             || (cnt = BITPOS (tok_graph),
 578                 (ELEM (ctype, class_collection, , space_value)
 579                  & BITw (tok_graph))
 580                 != 0)))
 581     {
 582       if (!be_quiet)
 583         WITH_CUR_LOCALE (error (0, 0, _("\
 584 <SP> character must not be in class `%s'"),
 585                                 valid_table[cnt].name));
 586     }
 587   else
 588     ELEM (ctype, class_collection, , space_value) |= BITw (tok_print);
 589
 590   space_seq = charmap_find_value (charmap, "SP", 2);
 591   if (space_seq == NULL)
 592     space_seq = charmap_find_value (charmap, "space", 5);
 593   if (space_seq == NULL)
 594     space_seq = charmap_find_value (charmap, "U00000020", 9);
 595   if (space_seq == NULL || space_seq->nbytes != 1)
 596     {
 597       if (!be_quiet)
 598         WITH_CUR_LOCALE (error (0, 0, _("\
 599 character <SP> not defined in character map")));
 600     }
 601   else if (((cnt = BITPOS (tok_space),
 602              (ctype->class256_collection[space_seq->bytes[0]]
 603               & BIT (tok_space)) == 0)
 604             || (cnt = BITPOS (tok_blank),
 605                 (ctype->class256_collection[space_seq->bytes[0]]
 606                  & BIT (tok_blank)) == 0)))
 607     {
 608       if (!be_quiet)
 609         WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"),
 610                                 valid_table[cnt].name));
 611     }
 612   else if (((cnt = BITPOS (tok_punct),
 613              (ctype->class256_collection[space_seq->bytes[0]]
 614               & BIT (tok_punct)) != 0)
 615             || (cnt = BITPOS (tok_graph),
 616                 (ctype->class256_collection[space_seq->bytes[0]]
 617                  & BIT (tok_graph)) != 0)))
 618     {
 619       if (!be_quiet)
 620         WITH_CUR_LOCALE (error (0, 0, _("\
 621 <SP> character must not be in class `%s'"),
 622                                 valid_table[cnt].name));
 623     }
 624   else
 625     ctype->class256_collection[space_seq->bytes[0]] |= BIT (tok_print);
 626
 627   /* Now that the tests are done make sure the name array contains all
 628      characters which are handled in the WIDTH section of the
 629      character set definition file.  */
 630   if (charmap->width_rules != NULL)
 631     for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
 632       {
 633         unsigned char bytes[charmap->mb_cur_max];
 634         int nbytes = charmap->width_rules[cnt].from->nbytes;
 635
 636         /* We have the range of character for which the width is
 637            specified described using byte sequences of the multibyte
 638            charset.  We have to convert this to UCS4 now.  And we
 639            cannot simply convert the beginning and the end of the
 640            sequence, we have to iterate over the byte sequence and
 641            convert it for every single character.  */
 642         memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
 643
 644         while (nbytes < charmap->width_rules[cnt].to->nbytes
 645                || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
 646                           nbytes) <= 0)
 647           {
 648             /* Find the UCS value for `bytes'.  */
 649             int inner;
 650             uint32_t wch;
 651             struct charseq *seq = charmap_find_symbol (charmap, bytes, nbytes);
 652
 653             if (seq == NULL)
 654               wch = ILLEGAL_CHAR_VALUE;
 655             else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
 656               wch = seq->ucs4;
 657             else
 658               wch = repertoire_find_value (ctype->repertoire, seq->name,
 659                                            strlen (seq->name));
 660
 661             if (wch != ILLEGAL_CHAR_VALUE)
 662               /* We are only interested in the side-effects of the
 663                  `find_idx' call.  It will add appropriate entries in
 664                  the name array if this is necessary.  */
 665               (void) find_idx (ctype, NULL, NULL, NULL, wch);
 666
 667             /* "Increment" the bytes sequence.  */
 668             inner = nbytes - 1;
 669             while (inner >= 0 && bytes[inner] == 0xff)
 670               --inner;
 671
 672             if (inner < 0)
 673               {
 674                 /* We have to extend the byte sequence.  */
 675                 if (nbytes >= charmap->width_rules[cnt].to->nbytes)
 676                   break;
 677
 678                 bytes[0] = 1;
 679                 memset (&bytes[1], 0, nbytes);
 680                 ++nbytes;
 681               }
 682             else
 683               {
 684                 ++bytes[inner];
 685                 while (++inner < nbytes)
 686                   bytes[inner] = 0;
 687               }
 688           }
 689       }
 690
 691   /* Now set all the other characters of the character set to the
 692      default width.  */
 693   curs = NULL;
 694   while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
 695     {
 696       struct charseq *data = (struct charseq *) vdata;
 697
 698       if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
 699         data->ucs4 = repertoire_find_value (ctype->repertoire,
 700                                             data->name, len);
 701
 702       if (data->ucs4 != ILLEGAL_CHAR_VALUE)
 703         (void) find_idx (ctype, NULL, NULL, NULL, data->ucs4);
 704     }
 705
 706   /* There must be a multiple of 10 digits.  */
 707   if (ctype->mbdigits_act % 10 != 0)
 708     {
 709       assert (ctype->mbdigits_act == ctype->wcdigits_act);
 710       ctype->wcdigits_act -= ctype->mbdigits_act % 10;
 711       ctype->mbdigits_act -= ctype->mbdigits_act % 10;
 712       WITH_CUR_LOCALE (error (0, 0, _("\
 713 `digit' category has not entries in groups of ten")));
 714     }
 715
 716   /* Check the input digits.  There must be a multiple of ten available.
 717      In each group it could be that one or the other character is missing.
 718      In this case the whole group must be removed.  */
 719   cnt = 0;
 720   while (cnt < ctype->mbdigits_act)
 721     {
 722       size_t inner;
 723       for (inner = 0; inner < 10; ++inner)
 724         if (ctype->mbdigits[cnt + inner] == NULL)
 725           break;
 726
 727       if (inner == 10)
 728         cnt += 10;
 729       else
 730         {
 731           /* Remove the group.  */
 732           memmove (&ctype->mbdigits[cnt], &ctype->mbdigits[cnt + 10],
 733                    ((ctype->wcdigits_act - cnt - 10)
 734                     * sizeof (ctype->mbdigits[0])));
 735           ctype->mbdigits_act -= 10;
 736         }
 737     }
 738
 739   /* If no input digits are given use the default.  */
 740   if (ctype->mbdigits_act == 0)
 741     {
 742       if (ctype->mbdigits_max == 0)
 743         {
 744           ctype->mbdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
 745                                            10 * sizeof (struct charseq *));
 746           ctype->mbdigits_max = 10;
 747         }
 748
 749       for (cnt = 0; cnt < 10; ++cnt)
 750         {
 751           ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
 752                                                       digits + cnt, 1);
 753           if (ctype->mbdigits[cnt] == NULL)
 754             {
 755               ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
 756                                                           longnames[cnt],
 757                                                           strlen (longnames[cnt]));
 758               if (ctype->mbdigits[cnt] == NULL)
 759                 {
 760                   /* Hum, this ain't good.  */
 761                   WITH_CUR_LOCALE (error (0, 0, _("\
 762 no input digits defined and none of the standard names in the charmap")));
 763
 764                   ctype->mbdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
 765                                                         sizeof (struct charseq) + 1);
 766
 767                   /* This is better than nothing.  */
 768                   ctype->mbdigits[cnt]->bytes[0] = digits[cnt];
 769                   ctype->mbdigits[cnt]->nbytes = 1;
 770                 }
 771             }
 772         }
 773
 774       ctype->mbdigits_act = 10;
 775     }
 776
 777   /* Check the wide character input digits.  There must be a multiple
 778      of ten available.  In each group it could be that one or the other
 779      character is missing.  In this case the whole group must be
 780      removed.  */
 781   cnt = 0;
 782   while (cnt < ctype->wcdigits_act)
 783     {
 784       size_t inner;
 785       for (inner = 0; inner < 10; ++inner)
 786         if (ctype->wcdigits[cnt + inner] == ILLEGAL_CHAR_VALUE)
 787           break;
 788
 789       if (inner == 10)
 790         cnt += 10;
 791       else
 792         {
 793           /* Remove the group.  */
 794           memmove (&ctype->wcdigits[cnt], &ctype->wcdigits[cnt + 10],
 795                    ((ctype->wcdigits_act - cnt - 10)
 796                     * sizeof (ctype->wcdigits[0])));
 797           ctype->wcdigits_act -= 10;
 798         }
 799     }
 800
 801   /* If no input digits are given use the default.  */
 802   if (ctype->wcdigits_act == 0)
 803     {
 804       if (ctype->wcdigits_max == 0)
 805         {
 806           ctype->wcdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
 807                                            10 * sizeof (uint32_t));
 808           ctype->wcdigits_max = 10;
 809         }
 810
 811       for (cnt = 0; cnt < 10; ++cnt)
 812         ctype->wcdigits[cnt] = L'0' + cnt;
 813
 814       ctype->mbdigits_act = 10;
 815     }
 816
 817   /* Check the outdigits.  */
 818   warned = 0;
 819   for (cnt = 0; cnt < 10; ++cnt)
 820     if (ctype->mboutdigits[cnt] == NULL)
 821       {
 822         static struct charseq replace[2];
 823
 824         if (!warned)
 825           {
 826             WITH_CUR_LOCALE (error (0, 0, _("\
 827 not all characters used in `outdigit' are available in the charmap")));
 828             warned = 1;
 829           }
 830
 831         replace[0].nbytes = 1;
 832         replace[0].bytes[0] = '?';
 833         replace[0].bytes[1] = '\0';
 834         ctype->mboutdigits[cnt] = &replace[0];
 835       }
 836
 837   warned = 0;
 838   for (cnt = 0; cnt < 10; ++cnt)
 839     if (ctype->wcoutdigits[cnt] == 0)
 840       {
 841         if (!warned)
 842           {
 843             WITH_CUR_LOCALE (error (0, 0, _("\
 844 not all characters used in `outdigit' are available in the repertoire")));
 845             warned = 1;
 846           }
 847
 848         ctype->wcoutdigits[cnt] = L'?';
 849       }
 850
 851   /* Sort the entries in the translit_ignore list.  */
 852   if (ctype->translit_ignore != NULL)
 853     {
 854       struct translit_ignore_t *firstp = ctype->translit_ignore;
 855       struct translit_ignore_t *runp;
 856
 857       ctype->ntranslit_ignore = 1;
 858
 859       for (runp = firstp->next; runp != NULL; runp = runp->next)
 860         {
 861           struct translit_ignore_t *lastp = NULL;
 862           struct translit_ignore_t *cmpp;
 863
 864           ++ctype->ntranslit_ignore;
 865
 866           for (cmpp = firstp; cmpp != NULL; lastp = cmpp, cmpp = cmpp->next)
 867             if (runp->from < cmpp->from)
 868               break;
 869
 870           runp->next = lastp;
 871           if (lastp == NULL)
 872             firstp = runp;
 873         }
 874
 875       ctype->translit_ignore = firstp;
 876     }
 877 }
 878
 879
 880 void
 881 ctype_output (struct localedef_t *locale, const struct charmap_t *charmap,
 882               const char *output_path)
 883 {
 884   static const char nulbytes[4] = { 0, 0, 0, 0 };
 885   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 886   const size_t nelems = (_NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1)
 887                          + ctype->nr_charclass + ctype->map_collection_nr);
 888   struct iovec *iov = alloca (sizeof *iov
 889                               * (2 + nelems + 2 * ctype->nr_charclass
 890                                  + ctype->map_collection_nr + 4));
 891   struct locale_file data;
 892   uint32_t *idx = alloca (sizeof *idx * (nelems + 1));
 893   uint32_t default_missing_len;
 894   size_t elem, cnt, offset, total;
 895   char *cp;
 896
 897   /* Now prepare the output: Find the sizes of the table we can use.  */
 898   allocate_arrays (ctype, charmap, ctype->repertoire);
 899
 900   data.magic = LIMAGIC (LC_CTYPE);
 901   data.n = nelems;
 902   iov[0].iov_base = (void *) &data;
 903   iov[0].iov_len = sizeof (data);
 904
 905   iov[1].iov_base = (void *) idx;
 906   iov[1].iov_len = nelems * sizeof (uint32_t);
 907
 908   idx[0] = iov[0].iov_len + iov[1].iov_len;
 909   offset = 0;
 910
 911   for (elem = 0; elem < nelems; ++elem)
 912     {
 913       if (elem < _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1))
 914         switch (elem)
 915           {
 916 #define CTYPE_EMPTY(name) \
 917           case name:                                                          \
 918             iov[2 + elem + offset].iov_base = NULL;                           \
 919             iov[2 + elem + offset].iov_len = 0;                               \
 920             idx[elem + 1] = idx[elem];                                        \
 921             break
 922
 923           CTYPE_EMPTY(_NL_CTYPE_GAP1);
 924           CTYPE_EMPTY(_NL_CTYPE_GAP2);
 925           CTYPE_EMPTY(_NL_CTYPE_GAP3);
 926           CTYPE_EMPTY(_NL_CTYPE_GAP4);
 927           CTYPE_EMPTY(_NL_CTYPE_GAP5);
 928           CTYPE_EMPTY(_NL_CTYPE_GAP6);
 929
 930 #define CTYPE_DATA(name, base, len)                                           \
 931           case _NL_ITEM_INDEX (name):                                         \
 932             iov[2 + elem + offset].iov_base = (base);                         \
 933             iov[2 + elem + offset].iov_len = (len);                           \
 934             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;       \
 935             break
 936
 937           CTYPE_DATA (_NL_CTYPE_CLASS,
 938                       ctype->ctype_b,
 939                       (256 + 128) * sizeof (char_class_t));
 940
 941           CTYPE_DATA (_NL_CTYPE_TOUPPER,
 942                       ctype->map_b[0],
 943                       (256 + 128) * sizeof (uint32_t));
 944           CTYPE_DATA (_NL_CTYPE_TOLOWER,
 945                       ctype->map_b[1],
 946                       (256 + 128) * sizeof (uint32_t));
 947
 948           CTYPE_DATA (_NL_CTYPE_TOUPPER32,
 949                       ctype->map32_b[0],
 950                       256 * sizeof (uint32_t));
 951           CTYPE_DATA (_NL_CTYPE_TOLOWER32,
 952                       ctype->map32_b[1],
 953                       256 * sizeof (uint32_t));
 954
 955           CTYPE_DATA (_NL_CTYPE_CLASS32,
 956                       ctype->ctype32_b,
 957                       256 * sizeof (char_class32_t));
 958
 959           CTYPE_DATA (_NL_CTYPE_CLASS_OFFSET,
 960                       &ctype->class_offset, sizeof (uint32_t));
 961
 962           CTYPE_DATA (_NL_CTYPE_MAP_OFFSET,
 963                       &ctype->map_offset, sizeof (uint32_t));
 964
 965           CTYPE_DATA (_NL_CTYPE_TRANSLIT_TAB_SIZE,
 966                       &ctype->translit_idx_size, sizeof (uint32_t));
 967
 968           CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_IDX,
 969                       ctype->translit_from_idx,
 970                       ctype->translit_idx_size * sizeof (uint32_t));
 971
 972           CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_TBL,
 973                       ctype->translit_from_tbl,
 974                       ctype->translit_from_tbl_size);
 975
 976           CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_IDX,
 977                       ctype->translit_to_idx,
 978                       ctype->translit_idx_size * sizeof (uint32_t));
 979
 980           CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_TBL,
 981                       ctype->translit_to_tbl, ctype->translit_to_tbl_size);
 982
 983           case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
 984             /* The class name array.  */
 985             total = 0;
 986             for (cnt = 0; cnt < ctype->nr_charclass; ++cnt, ++offset)
 987               {
 988                 iov[2 + elem + offset].iov_base
 989                   = (void *) ctype->classnames[cnt];
 990                 iov[2 + elem + offset].iov_len
 991                   = strlen (ctype->classnames[cnt]) + 1;
 992                 total += iov[2 + elem + offset].iov_len;
 993               }
 994             iov[2 + elem + offset].iov_base = (void *) nulbytes;
 995             iov[2 + elem + offset].iov_len = 4 - (total % 4);
 996             total += 4 - (total % 4);
 997
 998             idx[elem + 1] = idx[elem] + total;
 999             break;
1000
1001           case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
1002             /* The class name array.  */
1003             total = 0;
1004             for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt, ++offset)
1005               {
1006                 iov[2 + elem + offset].iov_base
1007                   = (void *) ctype->mapnames[cnt];
1008                 iov[2 + elem + offset].iov_len
1009                   = strlen (ctype->mapnames[cnt]) + 1;
1010                 total += iov[2 + elem + offset].iov_len;
1011               }
1012             iov[2 + elem + offset].iov_base = (void *) nulbytes;
1013             iov[2 + elem + offset].iov_len = 4 - (total % 4);
1014             total += 4 - (total % 4);
1015
1016             idx[elem + 1] = idx[elem] + total;
1017             break;
1018
1019           CTYPE_DATA (_NL_CTYPE_WIDTH,
1020                       ctype->width.iov_base,
1021                       ctype->width.iov_len);
1022
1023           CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX,
1024                       &ctype->mb_cur_max, sizeof (uint32_t));
1025
1026           case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
1027             total = strlen (ctype->codeset_name) + 1;
1028             if (total % 4 == 0)
1029               iov[2 + elem + offset].iov_base = (char *) ctype->codeset_name;
1030             else
1031               {
1032                 iov[2 + elem + offset].iov_base = alloca ((total + 3) & ~3);
1033                 memset (mempcpy (iov[2 + elem + offset].iov_base,
1034                                  ctype->codeset_name, total),
1035                         '\0', 4 - (total & 3));
1036                 total = (total + 3) & ~3;
1037               }
1038             iov[2 + elem + offset].iov_len = total;
1039             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1040             break;
1041
1042
1043           CTYPE_DATA (_NL_CTYPE_MAP_TO_NONASCII,
1044                       &ctype->to_nonascii, sizeof (uint32_t));
1045
1046           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN):
1047             iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
1048             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1049             *(uint32_t *) iov[2 + elem + offset].iov_base =
1050               ctype->mbdigits_act / 10;
1051             idx[elem + 1] = idx[elem] + sizeof (uint32_t);
1052             break;
1053
1054           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN):
1055             /* Align entries.  */
1056             iov[2 + elem + offset].iov_base = (void *) nulbytes;
1057             iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1058             idx[elem] += iov[2 + elem + offset].iov_len;
1059             ++offset;
1060
1061             iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
1062             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1063             *(uint32_t *) iov[2 + elem + offset].iov_base =
1064               ctype->wcdigits_act / 10;
1065             idx[elem + 1] = idx[elem] + sizeof (uint32_t);
1066             break;
1067
1068           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB):
1069             /* Compute the length of all possible characters.  For INDIGITS
1070                there might be more than one.  We simply concatenate all of
1071                them with a NUL byte following.  The NUL byte wouldn't be
1072                necessary but it makes it easier for the user.  */
1073             total = 0;
1074
1075             for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
1076                  cnt < ctype->mbdigits_act; cnt += 10)
1077               total += ctype->mbdigits[cnt]->nbytes + 1;
1078             iov[2 + elem + offset].iov_base = (char *) alloca (total);
1079             iov[2 + elem + offset].iov_len = total;
1080
1081             cp = iov[2 + elem + offset].iov_base;
1082             for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
1083                  cnt < ctype->mbdigits_act; cnt += 10)
1084               {
1085                 cp = mempcpy (cp, ctype->mbdigits[cnt]->bytes,
1086                               ctype->mbdigits[cnt]->nbytes);
1087                 *cp++ = '\0';
1088               }
1089             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1090             break;
1091
1092           case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB):
1093             /* Compute the length of all possible characters.  For INDIGITS
1094                there might be more than one.  We simply concatenate all of
1095                them with a NUL byte following.  The NUL byte wouldn't be
1096                necessary but it makes it easier for the user.  */
1097             cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB);
1098             total = ctype->mboutdigits[cnt]->nbytes + 1;
1099             iov[2 + elem + offset].iov_base = (char *) alloca (total);
1100             iov[2 + elem + offset].iov_len = total;
1101
1102             *(char *) mempcpy (iov[2 + elem + offset].iov_base,
1103                                ctype->mboutdigits[cnt]->bytes,
1104                                ctype->mboutdigits[cnt]->nbytes) = '\0';
1105             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1106             break;
1107
1108           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC):
1109             total = ctype->wcdigits_act / 10;
1110
1111             iov[2 + elem + offset].iov_base =
1112               (uint32_t *) alloca (total * sizeof (uint32_t));
1113             iov[2 + elem + offset].iov_len = total * sizeof (uint32_t);
1114
1115             for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC);
1116                  cnt < ctype->wcdigits_act; cnt += 10)
1117               ((uint32_t *) iov[2 + elem + offset].iov_base)[cnt / 10]
1118                 = ctype->wcdigits[cnt];
1119             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1120             break;
1121
1122           case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC):
1123             /* Align entries.  */
1124             iov[2 + elem + offset].iov_base = (void *) nulbytes;
1125             iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1126             idx[elem] += iov[2 + elem + offset].iov_len;
1127             ++offset;
1128             /* FALLTRHOUGH */
1129
1130           case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT1_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC):
1131             cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC);
1132             iov[2 + elem + offset].iov_base = &ctype->wcoutdigits[cnt];
1133             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1134             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1135             break;
1136
1137           case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN):
1138             /* Align entries.  */
1139             iov[2 + elem + offset].iov_base = (void *) nulbytes;
1140             iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1141             idx[elem] += iov[2 + elem + offset].iov_len;
1142             ++offset;
1143
1144             default_missing_len = (ctype->default_missing
1145                                    ? wcslen ((wchar_t *)ctype->default_missing)
1146                                    : 0);
1147             iov[2 + elem + offset].iov_base = &default_missing_len;
1148             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1149             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1150             break;
1151
1152           case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING):
1153             iov[2 + elem + offset].iov_base =
1154               ctype->default_missing ?: (uint32_t *) L"";
1155             iov[2 + elem + offset].iov_len =
1156               wcslen (iov[2 + elem + offset].iov_base) * sizeof (uint32_t);
1157             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1158             break;
1159
1160           case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE_LEN):
1161             /* Align entries.  */
1162             iov[2 + elem + offset].iov_base = (void *) nulbytes;
1163             iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1164             idx[elem] += iov[2 + elem + offset].iov_len;
1165             ++offset;
1166
1167             iov[2 + elem + offset].iov_base = &ctype->ntranslit_ignore;
1168             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1169             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1170             break;
1171
1172           case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE):
1173             {
1174               uint32_t *ranges = (uint32_t *) alloca (ctype->ntranslit_ignore
1175                                                       * 3 * sizeof (uint32_t));
1176               struct translit_ignore_t *runp;
1177
1178               iov[2 + elem + offset].iov_base = ranges;
1179               iov[2 + elem + offset].iov_len = (ctype->ntranslit_ignore
1180                                                 * 3 * sizeof (uint32_t));
1181
1182               for (runp = ctype->translit_ignore; runp != NULL;
1183                    runp = runp->next)
1184                 {
1185                   *ranges++ = runp->from;
1186                   *ranges++ = runp->to;
1187                   *ranges++ = runp->step;
1188                 }
1189             }
1190             /* Remove the following line in case a new entry is added
1191                after _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN.  */
1192             if (elem < nelems)
1193               idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1194             break;
1195
1196           default:
1197             assert (! "unknown CTYPE element");
1198           }
1199       else
1200         {
1201           /* Handle extra maps.  */
1202           size_t nr = elem - _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
1203           if (nr < ctype->nr_charclass)
1204             {
1205               iov[2 + elem + offset].iov_base = ctype->class_b[nr];
1206               iov[2 + elem + offset].iov_len = 256 / 32 * sizeof (uint32_t);
1207               idx[elem] += iov[2 + elem + offset].iov_len;
1208               ++offset;
1209
1210               iov[2 + elem + offset] = ctype->class_3level[nr];
1211             }
1212           else
1213             {
1214               nr -= ctype->nr_charclass;
1215               assert (nr < ctype->map_collection_nr);
1216               iov[2 + elem + offset] = ctype->map_3level[nr];
1217             }
1218           idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1219         }
1220     }
1221
1222   assert (2 + elem + offset == (nelems + 2 * ctype->nr_charclass
1223                                 + ctype->map_collection_nr + 4 + 2));
1224
1225   write_locale_data (output_path, LC_CTYPE, "LC_CTYPE", 2 + elem + offset,
1226                      iov);
1227 }
1228
1229
1230 /* Local functions.  */
1231 static void
1232 ctype_class_new (struct linereader *lr, struct locale_ctype_t *ctype,
1233                  const char *name)
1234 {
1235   size_t cnt;
1236
1237   for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
1238     if (strcmp (ctype->classnames[cnt], name) == 0)
1239       break;
1240
1241   if (cnt < ctype->nr_charclass)
1242     {
1243       lr_error (lr, _("character class `%s' already defined"), name);
1244       return;
1245     }
1246
1247   if (ctype->nr_charclass == MAX_NR_CHARCLASS)
1248     /* Exit code 2 is prescribed in P1003.2b.  */
1249     WITH_CUR_LOCALE (error (2, 0, _("\
1250 implementation limit: no more than %Zd character classes allowed"),
1251                             MAX_NR_CHARCLASS));
1252
1253   ctype->classnames[ctype->nr_charclass++] = name;
1254 }
1255
1256
1257 static void
1258 ctype_map_new (struct linereader *lr, struct locale_ctype_t *ctype,
1259                const char *name, const struct charmap_t *charmap)
1260 {
1261   size_t max_chars = 0;
1262   size_t cnt;
1263
1264   for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
1265     {
1266       if (strcmp (ctype->mapnames[cnt], name) == 0)
1267         break;
1268
1269       if (max_chars < ctype->map_collection_max[cnt])
1270         max_chars = ctype->map_collection_max[cnt];
1271     }
1272
1273   if (cnt < ctype->map_collection_nr)
1274     {
1275       lr_error (lr, _("character map `%s' already defined"), name);
1276       return;
1277     }
1278
1279   if (ctype->map_collection_nr == MAX_NR_CHARMAP)
1280     /* Exit code 2 is prescribed in P1003.2b.  */
1281     WITH_CUR_LOCALE (error (2, 0, _("\
1282 implementation limit: no more than %d character maps allowed"),
1283                             MAX_NR_CHARMAP));
1284
1285   ctype->mapnames[cnt] = name;
1286
1287   if (max_chars == 0)
1288     ctype->map_collection_max[cnt] = charmap->mb_cur_max == 1 ? 256 : 512;
1289   else
1290     ctype->map_collection_max[cnt] = max_chars;
1291
1292   ctype->map_collection[cnt] = (uint32_t *)
1293     xcalloc (sizeof (uint32_t), ctype->map_collection_max[cnt]);
1294   ctype->map_collection_act[cnt] = 256;
1295
1296   ++ctype->map_collection_nr;
1297 }
1298
1299
1300 /* We have to be prepared that TABLE, MAX, and ACT can be NULL.  This
1301    is possible if we only want to extend the name array.  */
1302 static uint32_t *
1303 find_idx (struct locale_ctype_t *ctype, uint32_t **table, size_t *max,
1304           size_t *act, uint32_t idx)
1305 {
1306   size_t cnt;
1307
1308   if (idx < 256)
1309     return table == NULL ? NULL : &(*table)[idx];
1310
1311   /* Use the charnames_idx lookup table instead of the slow search loop.  */
1312 #if 1
1313   cnt = idx_table_get (&ctype->charnames_idx, idx);
1314   if (cnt == EMPTY)
1315     /* Not found.  */
1316     cnt = ctype->charnames_act;
1317 #else
1318   for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
1319     if (ctype->charnames[cnt] == idx)
1320       break;
1321 #endif
1322
1323   /* We have to distinguish two cases: the name is found or not.  */
1324   if (cnt == ctype->charnames_act)
1325     {
1326       /* Extend the name array.  */
1327       if (ctype->charnames_act == ctype->charnames_max)
1328         {
1329           ctype->charnames_max *= 2;
1330           ctype->charnames = (uint32_t *)
1331             xrealloc (ctype->charnames,
1332                       sizeof (uint32_t) * ctype->charnames_max);
1333         }
1334       ctype->charnames[ctype->charnames_act++] = idx;
1335       idx_table_add (&ctype->charnames_idx, idx, cnt);
1336     }
1337
1338   if (table == NULL)
1339     /* We have done everything we are asked to do.  */
1340     return NULL;
1341
1342   if (max == NULL)
1343     /* The caller does not want to extend the table.  */
1344     return (cnt >= *act ? NULL : &(*table)[cnt]);
1345
1346   if (cnt >= *act)
1347     {
1348       if (cnt >= *max)
1349         {
1350           size_t old_max = *max;
1351           do
1352             *max *= 2;
1353           while (*max <= cnt);
1354
1355           *table =
1356             (uint32_t *) xrealloc (*table, *max * sizeof (uint32_t));
1357           memset (&(*table)[old_max], '\0',
1358                   (*max - old_max) * sizeof (uint32_t));
1359         }
1360
1361       *act = cnt + 1;
1362     }
1363
1364   return &(*table)[cnt];
1365 }
1366
1367
1368 static int
1369 get_character (struct token *now, const struct charmap_t *charmap,
1370                struct repertoire_t *repertoire,
1371                struct charseq **seqp, uint32_t *wchp)
1372 {
1373   if (now->tok == tok_bsymbol)
1374     {
1375       /* This will hopefully be the normal case.  */
1376       *wchp = repertoire_find_value (repertoire, now->val.str.startmb,
1377                                      now->val.str.lenmb);
1378       *seqp = charmap_find_value (charmap, now->val.str.startmb,
1379                                   now->val.str.lenmb);
1380     }
1381   else if (now->tok == tok_ucs4)
1382     {
1383       char utmp[10];
1384
1385       snprintf (utmp, sizeof (utmp), "U%08X", now->val.ucs4);
1386       *seqp = charmap_find_value (charmap, utmp, 9);
1387
1388       if (*seqp == NULL)
1389         *seqp = repertoire_find_seq (repertoire, now->val.ucs4);
1390
1391       if (*seqp == NULL)
1392         {
1393           /* Compute the value in the charmap from the UCS value.  */
1394           const char *symbol = repertoire_find_symbol (repertoire,
1395                                                        now->val.ucs4);
1396
1397           if (symbol == NULL)
1398             *seqp = NULL;
1399           else
1400             *seqp = charmap_find_value (charmap, symbol, strlen (symbol));
1401
1402           if (*seqp == NULL)
1403             {
1404               if (repertoire != NULL)
1405                 {
1406                   /* Insert a negative entry.  */
1407                   static const struct charseq negative
1408                     = { .ucs4 = ILLEGAL_CHAR_VALUE };
1409                   uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1410                                                   sizeof (uint32_t));
1411                   *newp = now->val.ucs4;
1412
1413                   insert_entry (&repertoire->seq_table, newp,
1414                                 sizeof (uint32_t), (void *) &negative);
1415                 }
1416             }
1417           else
1418             (*seqp)->ucs4 = now->val.ucs4;
1419         }
1420       else if ((*seqp)->ucs4 != now->val.ucs4)
1421         *seqp = NULL;
1422
1423       *wchp = now->val.ucs4;
1424     }
1425   else if (now->tok == tok_charcode)
1426     {
1427       /* We must map from the byte code to UCS4.  */
1428       *seqp = charmap_find_symbol (charmap, now->val.str.startmb,
1429                                    now->val.str.lenmb);
1430
1431       if (*seqp == NULL)
1432         *wchp = ILLEGAL_CHAR_VALUE;
1433       else
1434         {
1435           if ((*seqp)->ucs4 == UNINITIALIZED_CHAR_VALUE)
1436             (*seqp)->ucs4 = repertoire_find_value (repertoire, (*seqp)->name,
1437                                                    strlen ((*seqp)->name));
1438           *wchp = (*seqp)->ucs4;
1439         }
1440     }
1441   else
1442     return 1;
1443
1444   return 0;
1445 }
1446
1447
1448 /* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and
1449    the .(2). counterparts.  */
1450 static void
1451 charclass_symbolic_ellipsis (struct linereader *ldfile,
1452                              struct locale_ctype_t *ctype,
1453                              const struct charmap_t *charmap,
1454                              struct repertoire_t *repertoire,
1455                              struct token *now,
1456                              const char *last_str,
1457                              unsigned long int class256_bit,
1458                              unsigned long int class_bit, int base,
1459                              int ignore_content, int handle_digits, int step)
1460 {
1461   const char *nowstr = now->val.str.startmb;
1462   char tmp[now->val.str.lenmb + 1];
1463   const char *cp;
1464   char *endp;
1465   unsigned long int from;
1466   unsigned long int to;
1467
1468   /* We have to compute the ellipsis values using the symbolic names.  */
1469   assert (last_str != NULL);
1470
1471   if (strlen (last_str) != now->val.str.lenmb)
1472     {
1473     invalid_range:
1474       lr_error (ldfile,
1475                 _("`%s' and `%.*s' are not valid names for symbolic range"),
1476                 last_str, (int) now->val.str.lenmb, nowstr);
1477       return;
1478     }
1479
1480   if (memcmp (last_str, nowstr, now->val.str.lenmb) == 0)
1481     /* Nothing to do, the names are the same.  */
1482     return;
1483
1484   for (cp = last_str; *cp == *(nowstr + (cp - last_str)); ++cp)
1485     ;
1486
1487   errno = 0;
1488   from = strtoul (cp, &endp, base);
1489   if ((from == UINT_MAX && errno == ERANGE) || *endp != '\0')
1490     goto invalid_range;
1491
1492   to = strtoul (nowstr + (cp - last_str), &endp, base);
1493   if ((to == UINT_MAX && errno == ERANGE)
1494       || (endp - nowstr) != now->val.str.lenmb || from >= to)
1495     goto invalid_range;
1496
1497   /* OK, we have a range FROM - TO.  Now we can create the symbolic names.  */
1498   if (!ignore_content)
1499     {
1500       now->val.str.startmb = tmp;
1501       while ((from += step) <= to)
1502         {
1503           struct charseq *seq;
1504           uint32_t wch;
1505
1506           sprintf (tmp, (base == 10 ? "%.*s%0*ld" : "%.*s%0*lX"),
1507                    (int) (cp - last_str), last_str,
1508                    (int) (now->val.str.lenmb - (cp - last_str)),
1509                    from);
1510
1511           get_character (now, charmap, repertoire, &seq, &wch);
1512
1513           if (seq != NULL && seq->nbytes == 1)
1514             /* Yep, we can store information about this byte sequence.  */
1515             ctype->class256_collection[seq->bytes[0]] |= class256_bit;
1516
1517           if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1518             /* We have the UCS4 position.  */
1519             *find_idx (ctype, &ctype->class_collection,
1520                        &ctype->class_collection_max,
1521                        &ctype->class_collection_act, wch) |= class_bit;
1522
1523           if (handle_digits == 1)
1524             {
1525               /* We must store the digit values.  */
1526               if (ctype->mbdigits_act == ctype->mbdigits_max)
1527                 {
1528                   ctype->mbdigits_max *= 2;
1529                   ctype->mbdigits = xrealloc (ctype->mbdigits,
1530                                               (ctype->mbdigits_max
1531                                                * sizeof (char *)));
1532                   ctype->wcdigits_max *= 2;
1533                   ctype->wcdigits = xrealloc (ctype->wcdigits,
1534                                               (ctype->wcdigits_max
1535                                                * sizeof (uint32_t)));
1536                 }
1537
1538               ctype->mbdigits[ctype->mbdigits_act++] = seq;
1539               ctype->wcdigits[ctype->wcdigits_act++] = wch;
1540             }
1541           else if (handle_digits == 2)
1542             {
1543               /* We must store the digit values.  */
1544               if (ctype->outdigits_act >= 10)
1545                 {
1546                   lr_error (ldfile, _("\
1547 %s: field `%s' does not contain exactly ten entries"),
1548                             "LC_CTYPE", "outdigit");
1549                   return;
1550                 }
1551
1552               ctype->mboutdigits[ctype->outdigits_act] = seq;
1553               ctype->wcoutdigits[ctype->outdigits_act] = wch;
1554               ++ctype->outdigits_act;
1555             }
1556         }
1557     }
1558 }
1559
1560
1561 /* Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'.  */
1562 static void
1563 charclass_ucs4_ellipsis (struct linereader *ldfile,
1564                          struct locale_ctype_t *ctype,
1565                          const struct charmap_t *charmap,
1566                          struct repertoire_t *repertoire,
1567                          struct token *now, uint32_t last_wch,
1568                          unsigned long int class256_bit,
1569                          unsigned long int class_bit, int ignore_content,
1570                          int handle_digits, int step)
1571 {
1572   if (last_wch > now->val.ucs4)
1573     {
1574       lr_error (ldfile, _("\
1575 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1576                 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, now->val.ucs4,
1577                 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, last_wch);
1578       return;
1579     }
1580
1581   if (!ignore_content)
1582     while ((last_wch += step) <= now->val.ucs4)
1583       {
1584         /* We have to find out whether there is a byte sequence corresponding
1585            to this UCS4 value.  */
1586         struct charseq *seq;
1587         char utmp[10];
1588
1589         snprintf (utmp, sizeof (utmp), "U%08X", last_wch);
1590         seq = charmap_find_value (charmap, utmp, 9);
1591         if (seq == NULL)
1592           {
1593             snprintf (utmp, sizeof (utmp), "U%04X", last_wch);
1594             seq = charmap_find_value (charmap, utmp, 5);
1595           }
1596
1597         if (seq == NULL)
1598           /* Try looking in the repertoire map.  */
1599           seq = repertoire_find_seq (repertoire, last_wch);
1600
1601         /* If this is the first time we look for this sequence create a new
1602            entry.  */
1603         if (seq == NULL)
1604           {
1605             static const struct charseq negative
1606               = { .ucs4 = ILLEGAL_CHAR_VALUE };
1607
1608             /* Find the symbolic name for this UCS4 value.  */
1609             if (repertoire != NULL)
1610               {
1611                 const char *symbol = repertoire_find_symbol (repertoire,
1612                                                              last_wch);
1613                 uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1614                                                 sizeof (uint32_t));
1615                 *newp = last_wch;
1616
1617                 if (symbol != NULL)
1618                   /* We have a name, now search the multibyte value.  */
1619                   seq = charmap_find_value (charmap, symbol, strlen (symbol));
1620
1621                 if (seq == NULL)
1622                   /* We have to create a fake entry.  */
1623                   seq = (struct charseq *) &negative;
1624                 else
1625                   seq->ucs4 = last_wch;
1626
1627                 insert_entry (&repertoire->seq_table, newp, sizeof (uint32_t),
1628                               seq);
1629               }
1630             else
1631               /* We have to create a fake entry.  */
1632               seq = (struct charseq *) &negative;
1633           }
1634
1635         /* We have a name, now search the multibyte value.  */
1636         if (seq->ucs4 == last_wch && seq->nbytes == 1)
1637           /* Yep, we can store information about this byte sequence.  */
1638           ctype->class256_collection[(size_t) seq->bytes[0]]
1639             |= class256_bit;
1640
1641         /* And of course we have the UCS4 position.  */
1642         if (class_bit != 0)
1643           *find_idx (ctype, &ctype->class_collection,
1644                      &ctype->class_collection_max,
1645                      &ctype->class_collection_act, last_wch) |= class_bit;
1646
1647         if (handle_digits == 1)
1648           {
1649             /* We must store the digit values.  */
1650             if (ctype->mbdigits_act == ctype->mbdigits_max)
1651               {
1652                 ctype->mbdigits_max *= 2;
1653                 ctype->mbdigits = xrealloc (ctype->mbdigits,
1654                                             (ctype->mbdigits_max
1655                                              * sizeof (char *)));
1656                 ctype->wcdigits_max *= 2;
1657                 ctype->wcdigits = xrealloc (ctype->wcdigits,
1658                                             (ctype->wcdigits_max
1659                                              * sizeof (uint32_t)));
1660               }
1661
1662             ctype->mbdigits[ctype->mbdigits_act++] = (seq->ucs4 == last_wch
1663                                                       ? seq : NULL);
1664             ctype->wcdigits[ctype->wcdigits_act++] = last_wch;
1665           }
1666         else if (handle_digits == 2)
1667           {
1668             /* We must store the digit values.  */
1669             if (ctype->outdigits_act >= 10)
1670               {
1671                 lr_error (ldfile, _("\
1672 %s: field `%s' does not contain exactly ten entries"),
1673                           "LC_CTYPE", "outdigit");
1674                 return;
1675               }
1676
1677             ctype->mboutdigits[ctype->outdigits_act] = (seq->ucs4 == last_wch
1678                                                         ? seq : NULL);
1679             ctype->wcoutdigits[ctype->outdigits_act] = last_wch;
1680             ++ctype->outdigits_act;
1681           }
1682       }
1683 }
1684
1685
1686 /* Ellipsis as in `/xea/x12.../xea/x34'.  */
1687 static void
1688 charclass_charcode_ellipsis (struct linereader *ldfile,
1689                              struct locale_ctype_t *ctype,
1690                              const struct charmap_t *charmap,
1691                              struct repertoire_t *repertoire,
1692                              struct token *now, char *last_charcode,
1693                              uint32_t last_charcode_len,
1694                              unsigned long int class256_bit,
1695                              unsigned long int class_bit, int ignore_content,
1696                              int handle_digits)
1697 {
1698   /* First check whether the to-value is larger.  */
1699   if (now->val.charcode.nbytes != last_charcode_len)
1700     {
1701       lr_error (ldfile, _("\
1702 start and end character sequence of range must have the same length"));
1703       return;
1704     }
1705
1706   if (memcmp (last_charcode, now->val.charcode.bytes, last_charcode_len) > 0)
1707     {
1708       lr_error (ldfile, _("\
1709 to-value character sequence is smaller than from-value sequence"));
1710       return;
1711     }
1712
1713   if (!ignore_content)
1714     {
1715       do
1716         {
1717           /* Increment the byte sequence value.  */
1718           struct charseq *seq;
1719           uint32_t wch;
1720           int i;
1721
1722           for (i = last_charcode_len - 1; i >= 0; --i)
1723             if (++last_charcode[i] != 0)
1724               break;
1725
1726           if (last_charcode_len == 1)
1727             /* Of course we have the charcode value.  */
1728             ctype->class256_collection[(size_t) last_charcode[0]]
1729               |= class256_bit;
1730
1731           /* Find the symbolic name.  */
1732           seq = charmap_find_symbol (charmap, last_charcode,
1733                                      last_charcode_len);
1734           if (seq != NULL)
1735             {
1736               if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1737                 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1738                                                    strlen (seq->name));
1739               wch = seq == NULL ? ILLEGAL_CHAR_VALUE : seq->ucs4;
1740
1741               if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1742                 *find_idx (ctype, &ctype->class_collection,
1743                            &ctype->class_collection_max,
1744                            &ctype->class_collection_act, wch) |= class_bit;
1745             }
1746           else
1747             wch = ILLEGAL_CHAR_VALUE;
1748
1749           if (handle_digits == 1)
1750             {
1751               /* We must store the digit values.  */
1752               if (ctype->mbdigits_act == ctype->mbdigits_max)
1753                 {
1754                   ctype->mbdigits_max *= 2;
1755                   ctype->mbdigits = xrealloc (ctype->mbdigits,
1756                                               (ctype->mbdigits_max
1757                                                * sizeof (char *)));
1758                   ctype->wcdigits_max *= 2;
1759                   ctype->wcdigits = xrealloc (ctype->wcdigits,
1760                                               (ctype->wcdigits_max
1761                                                * sizeof (uint32_t)));
1762                 }
1763
1764               seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1765               memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1766               seq->nbytes = last_charcode_len;
1767
1768               ctype->mbdigits[ctype->mbdigits_act++] = seq;
1769               ctype->wcdigits[ctype->wcdigits_act++] = wch;
1770             }
1771           else if (handle_digits == 2)
1772             {
1773               struct charseq *seq;
1774               /* We must store the digit values.  */
1775               if (ctype->outdigits_act >= 10)
1776                 {
1777                   lr_error (ldfile, _("\
1778 %s: field `%s' does not contain exactly ten entries"),
1779                             "LC_CTYPE", "outdigit");
1780                   return;
1781                 }
1782
1783               seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1784               memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1785               seq->nbytes = last_charcode_len;
1786
1787               ctype->mboutdigits[ctype->outdigits_act] = seq;
1788               ctype->wcoutdigits[ctype->outdigits_act] = wch;
1789               ++ctype->outdigits_act;
1790             }
1791         }
1792       while (memcmp (last_charcode, now->val.charcode.bytes,
1793                      last_charcode_len) != 0);
1794     }
1795 }
1796
1797
1798 static uint32_t *
1799 find_translit2 (struct locale_ctype_t *ctype, const struct charmap_t *charmap,
1800                 uint32_t wch)
1801 {
1802   struct translit_t *trunp = ctype->translit;
1803   struct translit_ignore_t *tirunp = ctype->translit_ignore;
1804
1805   while (trunp != NULL)
1806     {
1807       /* XXX We simplify things here.  The transliterations we look
1808          for are only allowed to have one character.  */
1809       if (trunp->from[0] == wch && trunp->from[1] == 0)
1810         {
1811           /* Found it.  Now look for a transliteration which can be
1812              represented with the character set.  */
1813           struct translit_to_t *torunp = trunp->to;
1814
1815           while (torunp != NULL)
1816             {
1817               int i;
1818
1819               for (i = 0; torunp->str[i] != 0; ++i)
1820                 {
1821                   char utmp[10];
1822
1823                   snprintf (utmp, sizeof (utmp), "U%08X", torunp->str[i]);
1824                   if (charmap_find_value (charmap, utmp, 9) == NULL)
1825                     /* This character cannot be represented.  */
1826                     break;
1827                 }
1828
1829               if (torunp->str[i] == 0)
1830                 return torunp->str;
1831
1832               torunp = torunp->next;
1833             }
1834
1835           break;
1836         }
1837
1838       trunp = trunp->next;
1839     }
1840
1841   /* Check for ignored chars.  */
1842   while (tirunp != NULL)
1843     {
1844       if (tirunp->from <= wch && tirunp->to >= wch)
1845         {
1846           uint32_t wi;
1847
1848           for (wi = tirunp->from; wi <= wch; wi += tirunp->step)
1849             if (wi == wch)
1850               return (uint32_t []) { 0 };
1851         }
1852     }
1853
1854   /* Nothing found.  */
1855   return NULL;
1856 }
1857
1858
1859 uint32_t *
1860 find_translit (struct localedef_t *locale, const struct charmap_t *charmap,
1861                uint32_t wch)
1862 {
1863   struct locale_ctype_t *ctype;
1864   uint32_t *result = NULL;
1865
1866   assert (locale != NULL);
1867   ctype = locale->categories[LC_CTYPE].ctype;
1868
1869   if (ctype == NULL)
1870     return NULL;
1871
1872   if (ctype->translit != NULL)
1873     result = find_translit2 (ctype, charmap, wch);
1874
1875   if (result == NULL)
1876     {
1877       struct translit_include_t *irunp = ctype->translit_include;
1878
1879       while (irunp != NULL && result == NULL)
1880         {
1881           result = find_translit (find_locale (CTYPE_LOCALE,
1882                                                irunp->copy_locale,
1883                                                irunp->copy_repertoire,
1884                                                charmap),
1885                                   charmap, wch);
1886           irunp = irunp->next;
1887         }
1888     }
1889
1890   return result;
1891 }
1892
1893
1894 /* Read one transliteration entry.  */
1895 static uint32_t *
1896 read_widestring (struct linereader *ldfile, struct token *now,
1897                  const struct charmap_t *charmap,
1898                  struct repertoire_t *repertoire)
1899 {
1900   uint32_t *wstr;
1901
1902   if (now->tok == tok_default_missing)
1903     /* The special name "" will denote this case.  */
1904     wstr = ((uint32_t *) { 0 });
1905   else if (now->tok == tok_bsymbol)
1906     {
1907       /* Get the value from the repertoire.  */
1908       wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1909       wstr[0] = repertoire_find_value (repertoire, now->val.str.startmb,
1910                                        now->val.str.lenmb);
1911       if (wstr[0] == ILLEGAL_CHAR_VALUE)
1912         {
1913           /* We cannot proceed, we don't know the UCS4 value.  */
1914           free (wstr);
1915           return NULL;
1916         }
1917
1918       wstr[1] = 0;
1919     }
1920   else if (now->tok == tok_ucs4)
1921     {
1922       wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1923       wstr[0] = now->val.ucs4;
1924       wstr[1] = 0;
1925     }
1926   else if (now->tok == tok_charcode)
1927     {
1928       /* Argh, we have to convert to the symbol name first and then to the
1929          UCS4 value.  */
1930       struct charseq *seq = charmap_find_symbol (charmap,
1931                                                  now->val.str.startmb,
1932                                                  now->val.str.lenmb);
1933       if (seq == NULL)
1934         /* Cannot find the UCS4 value.  */
1935         return NULL;
1936
1937       if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1938         seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1939                                            strlen (seq->name));
1940       if (seq->ucs4 == ILLEGAL_CHAR_VALUE)
1941         /* We cannot proceed, we don't know the UCS4 value.  */
1942         return NULL;
1943
1944       wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1945       wstr[0] = seq->ucs4;
1946       wstr[1] = 0;
1947     }
1948   else if (now->tok == tok_string)
1949     {
1950       wstr = now->val.str.startwc;
1951       if (wstr == NULL || wstr[0] == 0)
1952         return NULL;
1953     }
1954   else
1955     {
1956       if (now->tok != tok_eol && now->tok != tok_eof)
1957         lr_ignore_rest (ldfile, 0);
1958       SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
1959       return (uint32_t *) -1l;
1960     }
1961
1962   return wstr;
1963 }
1964
1965
1966 static void
1967 read_translit_entry (struct linereader *ldfile, struct locale_ctype_t *ctype,
1968                      struct token *now, const struct charmap_t *charmap,
1969                      struct repertoire_t *repertoire)
1970 {
1971   uint32_t *from_wstr = read_widestring (ldfile, now, charmap, repertoire);
1972   struct translit_t *result;
1973   struct translit_to_t **top;
1974   struct obstack *ob = &ctype->mempool;
1975   int first;
1976   int ignore;
1977
1978   if (from_wstr == NULL)
1979     /* There is no valid from string.  */
1980     return;
1981
1982   result = (struct translit_t *) obstack_alloc (ob,
1983                                                 sizeof (struct translit_t));
1984   result->from = from_wstr;
1985   result->fname = ldfile->fname;
1986   result->lineno = ldfile->lineno;
1987   result->next = NULL;
1988   result->to = NULL;
1989   top = &result->to;
1990   first = 1;
1991   ignore = 0;
1992
1993   while (1)
1994     {
1995       uint32_t *to_wstr;
1996
1997       /* Next we have one or more transliterations.  They are
1998          separated by semicolons.  */
1999       now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2000
2001       if (!first && (now->tok == tok_semicolon || now->tok == tok_eol))
2002         {
2003           /* One string read.  */
2004           const uint32_t zero = 0;
2005
2006           if (!ignore)
2007             {
2008               obstack_grow (ob, &zero, 4);
2009               to_wstr = obstack_finish (ob);
2010
2011               *top = obstack_alloc (ob, sizeof (struct translit_to_t));
2012               (*top)->str = to_wstr;
2013               (*top)->next = NULL;
2014             }
2015
2016           if (now->tok == tok_eol)
2017             {
2018               result->next = ctype->translit;
2019               ctype->translit = result;
2020               return;
2021             }
2022
2023           if (!ignore)
2024             top = &(*top)->next;
2025           ignore = 0;
2026         }
2027       else
2028         {
2029           to_wstr = read_widestring (ldfile, now, charmap, repertoire);
2030           if (to_wstr == (uint32_t *) -1l)
2031             {
2032               /* An error occurred.  */
2033               obstack_free (ob, result);
2034               return;
2035             }
2036
2037           if (to_wstr == NULL)
2038             ignore = 1;
2039           else
2040             /* This value is usable.  */
2041             obstack_grow (ob, to_wstr, wcslen ((wchar_t *) to_wstr) * 4);
2042
2043           first = 0;
2044         }
2045     }
2046 }
2047
2048
2049 static void
2050 read_translit_ignore_entry (struct linereader *ldfile,
2051                             struct locale_ctype_t *ctype,
2052                             const struct charmap_t *charmap,
2053                             struct repertoire_t *repertoire)
2054 {
2055   /* We expect a semicolon-separated list of characters we ignore.  We are
2056      only interested in the wide character definitions.  These must be
2057      single characters, possibly defining a range when an ellipsis is used.  */
2058   while (1)
2059     {
2060       struct token *now = lr_token (ldfile, charmap, NULL, repertoire,
2061                                     verbose);
2062       struct translit_ignore_t *newp;
2063       uint32_t from;
2064
2065       if (now->tok == tok_eol || now->tok == tok_eof)
2066         {
2067           lr_error (ldfile,
2068                     _("premature end of `translit_ignore' definition"));
2069           return;
2070         }
2071
2072       if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
2073         {
2074           lr_error (ldfile, _("syntax error"));
2075           lr_ignore_rest (ldfile, 0);
2076           return;
2077         }
2078
2079       if (now->tok == tok_ucs4)
2080         from = now->val.ucs4;
2081       else
2082         /* Try to get the value.  */
2083         from = repertoire_find_value (repertoire, now->val.str.startmb,
2084                                       now->val.str.lenmb);
2085
2086       if (from == ILLEGAL_CHAR_VALUE)
2087         {
2088           lr_error (ldfile, "invalid character name");
2089           newp = NULL;
2090         }
2091       else
2092         {
2093           newp = (struct translit_ignore_t *)
2094             obstack_alloc (&ctype->mempool, sizeof (struct translit_ignore_t));
2095           newp->from = from;
2096           newp->to = from;
2097           newp->step = 1;
2098
2099           newp->next = ctype->translit_ignore;
2100           ctype->translit_ignore = newp;
2101         }
2102
2103       /* Now we expect either a semicolon, an ellipsis, or the end of the
2104          line.  */
2105       now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2106
2107       if (now->tok == tok_ellipsis2 || now->tok == tok_ellipsis2_2)
2108         {
2109           /* XXX Should we bother implementing `....'?  `...' certainly
2110              will not be implemented.  */
2111           uint32_t to;
2112           int step = now->tok == tok_ellipsis2_2 ? 2 : 1;
2113
2114           now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2115
2116           if (now->tok == tok_eol || now->tok == tok_eof)
2117             {
2118               lr_error (ldfile,
2119                         _("premature end of `translit_ignore' definition"));
2120               return;
2121             }
2122
2123           if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
2124             {
2125               lr_error (ldfile, _("syntax error"));
2126               lr_ignore_rest (ldfile, 0);
2127               return;
2128             }
2129
2130           if (now->tok == tok_ucs4)
2131             to = now->val.ucs4;
2132           else
2133             /* Try to get the value.  */
2134             to = repertoire_find_value (repertoire, now->val.str.startmb,
2135                                         now->val.str.lenmb);
2136
2137           if (to == ILLEGAL_CHAR_VALUE)
2138             lr_error (ldfile, "invalid character name");
2139           else
2140             {
2141               /* Make sure the `to'-value is larger.  */
2142               if (to >= from)
2143                 {
2144                   newp->to = to;
2145                   newp->step = step;
2146                 }
2147               else
2148                 lr_error (ldfile, _("\
2149 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
2150                           (to | from) < 65536 ? 4 : 8, to,
2151                           (to | from) < 65536 ? 4 : 8, from);
2152             }
2153
2154           /* And the next token.  */
2155           now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2156         }
2157
2158       if (now->tok == tok_eol || now->tok == tok_eof)
2159         /* We are done.  */
2160         return;
2161
2162       if (now->tok == tok_semicolon)
2163         /* Next round.  */
2164         continue;
2165
2166       /* If we come here something is wrong.  */
2167       lr_error (ldfile, _("syntax error"));
2168       lr_ignore_rest (ldfile, 0);
2169       return;
2170     }
2171 }
2172
2173
2174 /* The parser for the LC_CTYPE section of the locale definition.  */
2175 void
2176 ctype_read (struct linereader *ldfile, struct localedef_t *result,
2177             const struct charmap_t *charmap, const char *repertoire_name,
2178             int ignore_content)
2179 {
2180   struct repertoire_t *repertoire = NULL;
2181   struct locale_ctype_t *ctype;
2182   struct token *now;
2183   enum token_t nowtok;
2184   size_t cnt;
2185   struct charseq *last_seq;
2186   uint32_t last_wch = 0;
2187   enum token_t last_token;
2188   enum token_t ellipsis_token;
2189   int step;
2190   char last_charcode[16];
2191   size_t last_charcode_len = 0;
2192   const char *last_str = NULL;
2193   int mapidx;
2194   struct localedef_t *copy_locale = NULL;
2195
2196   /* Get the repertoire we have to use.  */
2197   if (repertoire_name != NULL)
2198     repertoire = repertoire_read (repertoire_name);
2199
2200   /* The rest of the line containing `LC_CTYPE' must be free.  */
2201   lr_ignore_rest (ldfile, 1);
2202
2203
2204   do
2205     {
2206       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2207       nowtok = now->tok;
2208     }
2209   while (nowtok == tok_eol);
2210
2211   /* If we see `copy' now we are almost done.  */
2212   if (nowtok == tok_copy)
2213     {
2214       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2215       if (now->tok != tok_string)
2216         {
2217           SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2218
2219         skip_category:
2220           do
2221             now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2222           while (now->tok != tok_eof && now->tok != tok_end);
2223
2224           if (now->tok != tok_eof
2225               || (now = lr_token (ldfile, charmap, NULL, NULL, verbose),
2226                   now->tok == tok_eof))
2227             lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
2228           else if (now->tok != tok_lc_ctype)
2229             {
2230               lr_error (ldfile, _("\
2231 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2232               lr_ignore_rest (ldfile, 0);
2233             }
2234           else
2235             lr_ignore_rest (ldfile, 1);
2236
2237           return;
2238         }
2239
2240       if (! ignore_content)
2241         {
2242           /* Get the locale definition.  */
2243           copy_locale = load_locale (LC_CTYPE, now->val.str.startmb,
2244                                      repertoire_name, charmap, NULL);
2245           if ((copy_locale->avail & CTYPE_LOCALE) == 0)
2246             {
2247               /* Not yet loaded.  So do it now.  */
2248               if (locfile_read (copy_locale, charmap) != 0)
2249                 goto skip_category;
2250             }
2251
2252           if (copy_locale->categories[LC_CTYPE].ctype == NULL)
2253             return;
2254         }
2255
2256       lr_ignore_rest (ldfile, 1);
2257
2258       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2259       nowtok = now->tok;
2260     }
2261
2262   /* Prepare the data structures.  */
2263   ctype_startup (ldfile, result, charmap, copy_locale, ignore_content);
2264   ctype = result->categories[LC_CTYPE].ctype;
2265
2266   /* Remember the repertoire we use.  */
2267   if (!ignore_content)
2268     ctype->repertoire = repertoire;
2269
2270   while (1)
2271     {
2272       unsigned long int class_bit = 0;
2273       unsigned long int class256_bit = 0;
2274       int handle_digits = 0;
2275
2276       /* Of course we don't proceed beyond the end of file.  */
2277       if (nowtok == tok_eof)
2278         break;
2279
2280       /* Ingore empty lines.  */
2281       if (nowtok == tok_eol)
2282         {
2283           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2284           nowtok = now->tok;
2285           continue;
2286         }
2287
2288       switch (nowtok)
2289         {
2290         case tok_charclass:
2291           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2292           while (now->tok == tok_ident || now->tok == tok_string)
2293             {
2294               ctype_class_new (ldfile, ctype, now->val.str.startmb);
2295               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2296               if (now->tok != tok_semicolon)
2297                 break;
2298               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2299             }
2300           if (now->tok != tok_eol)
2301             SYNTAX_ERROR (_("\
2302 %s: syntax error in definition of new character class"), "LC_CTYPE");
2303           break;
2304
2305         case tok_charconv:
2306           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2307           while (now->tok == tok_ident || now->tok == tok_string)
2308             {
2309               ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2310               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2311               if (now->tok != tok_semicolon)
2312                 break;
2313               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2314             }
2315           if (now->tok != tok_eol)
2316             SYNTAX_ERROR (_("\
2317 %s: syntax error in definition of new character map"), "LC_CTYPE");
2318           break;
2319
2320         case tok_class:
2321           /* Ignore the rest of the line if we don't need the input of
2322              this line.  */
2323           if (ignore_content)
2324             {
2325               lr_ignore_rest (ldfile, 0);
2326               break;
2327             }
2328
2329           /* We simply forget the `class' keyword and use the following
2330              operand to determine the bit.  */
2331           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2332           if (now->tok == tok_ident || now->tok == tok_string)
2333             {
2334               /* Must can be one of the predefined class names.  */
2335               for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2336                 if (strcmp (ctype->classnames[cnt], now->val.str.startmb) == 0)
2337                   break;
2338               if (cnt >= ctype->nr_charclass)
2339                 {
2340 #ifdef PREDEFINED_CLASSES
2341                   if (now->val.str.lenmb == 8
2342                       && memcmp ("special1", now->val.str.startmb, 8) == 0)
2343                     class_bit = _ISwspecial1;
2344                   else if (now->val.str.lenmb == 8
2345                       && memcmp ("special2", now->val.str.startmb, 8) == 0)
2346                     class_bit = _ISwspecial2;
2347                   else if (now->val.str.lenmb == 8
2348                       && memcmp ("special3", now->val.str.startmb, 8) == 0)
2349                     class_bit = _ISwspecial3;
2350                   else
2351 #endif
2352                     {
2353                       /* OK, it's a new class.  */
2354                       ctype_class_new (ldfile, ctype, now->val.str.startmb);
2355
2356                       class_bit = _ISwbit (ctype->nr_charclass - 1);
2357                     }
2358                 }
2359               else
2360                 {
2361                   class_bit = _ISwbit (cnt);
2362
2363                   free (now->val.str.startmb);
2364                 }
2365             }
2366           else if (now->tok == tok_digit)
2367             goto handle_tok_digit;
2368           else if (now->tok < tok_upper || now->tok > tok_blank)
2369             goto err_label;
2370           else
2371             {
2372               class_bit = BITw (now->tok);
2373               class256_bit = BIT (now->tok);
2374             }
2375
2376           /* The next character must be a semicolon.  */
2377           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2378           if (now->tok != tok_semicolon)
2379             goto err_label;
2380           goto read_charclass;
2381
2382         case tok_upper:
2383         case tok_lower:
2384         case tok_alpha:
2385         case tok_alnum:
2386         case tok_space:
2387         case tok_cntrl:
2388         case tok_punct:
2389         case tok_graph:
2390         case tok_print:
2391         case tok_xdigit:
2392         case tok_blank:
2393           /* Ignore the rest of the line if we don't need the input of
2394              this line.  */
2395           if (ignore_content)
2396             {
2397               lr_ignore_rest (ldfile, 0);
2398               break;
2399             }
2400
2401           class_bit = BITw (now->tok);
2402           class256_bit = BIT (now->tok);
2403           handle_digits = 0;
2404         read_charclass:
2405           ctype->class_done |= class_bit;
2406           last_token = tok_none;
2407           ellipsis_token = tok_none;
2408           step = 1;
2409           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2410           while (now->tok != tok_eol && now->tok != tok_eof)
2411             {
2412               uint32_t wch;
2413               struct charseq *seq;
2414
2415               if (ellipsis_token == tok_none)
2416                 {
2417                   if (get_character (now, charmap, repertoire, &seq, &wch))
2418                     goto err_label;
2419
2420                   if (!ignore_content && seq != NULL && seq->nbytes == 1)
2421                     /* Yep, we can store information about this byte
2422                        sequence.  */
2423                     ctype->class256_collection[seq->bytes[0]] |= class256_bit;
2424
2425                   if (!ignore_content && wch != ILLEGAL_CHAR_VALUE
2426                       && class_bit != 0)
2427                     /* We have the UCS4 position.  */
2428                     *find_idx (ctype, &ctype->class_collection,
2429                                &ctype->class_collection_max,
2430                                &ctype->class_collection_act, wch) |= class_bit;
2431
2432                   last_token = now->tok;
2433                   /* Terminate the string.  */
2434                   if (last_token == tok_bsymbol)
2435                     {
2436                       now->val.str.startmb[now->val.str.lenmb] = '\0';
2437                       last_str = now->val.str.startmb;
2438                     }
2439                   else
2440                     last_str = NULL;
2441                   last_seq = seq;
2442                   last_wch = wch;
2443                   memcpy (last_charcode, now->val.charcode.bytes, 16);
2444                   last_charcode_len = now->val.charcode.nbytes;
2445
2446                   if (!ignore_content && handle_digits == 1)
2447                     {
2448                       /* We must store the digit values.  */
2449                       if (ctype->mbdigits_act == ctype->mbdigits_max)
2450                         {
2451                           ctype->mbdigits_max += 10;
2452                           ctype->mbdigits = xrealloc (ctype->mbdigits,
2453                                                       (ctype->mbdigits_max
2454                                                        * sizeof (char *)));
2455                           ctype->wcdigits_max += 10;
2456                           ctype->wcdigits = xrealloc (ctype->wcdigits,
2457                                                       (ctype->wcdigits_max
2458                                                        * sizeof (uint32_t)));
2459                         }
2460
2461                       ctype->mbdigits[ctype->mbdigits_act++] = seq;
2462                       ctype->wcdigits[ctype->wcdigits_act++] = wch;
2463                     }
2464                   else if (!ignore_content && handle_digits == 2)
2465                     {
2466                       /* We must store the digit values.  */
2467                       if (ctype->outdigits_act >= 10)
2468                         {
2469                           lr_error (ldfile, _("\
2470 %s: field `%s' does not contain exactly ten entries"),
2471                             "LC_CTYPE", "outdigit");
2472                           lr_ignore_rest (ldfile, 0);
2473                           break;
2474                         }
2475
2476                       ctype->mboutdigits[ctype->outdigits_act] = seq;
2477                       ctype->wcoutdigits[ctype->outdigits_act] = wch;
2478                       ++ctype->outdigits_act;
2479                     }
2480                 }
2481               else
2482                 {
2483                   /* Now it gets complicated.  We have to resolve the
2484                      ellipsis problem.  First we must distinguish between
2485                      the different kind of ellipsis and this must match the
2486                      tokens we have seen.  */
2487                   assert (last_token != tok_none);
2488
2489                   if (last_token != now->tok)
2490                     {
2491                       lr_error (ldfile, _("\
2492 ellipsis range must be marked by two operands of same type"));
2493                       lr_ignore_rest (ldfile, 0);
2494                       break;
2495                     }
2496
2497                   if (last_token == tok_bsymbol)
2498                     {
2499                       if (ellipsis_token == tok_ellipsis3)
2500                         lr_error (ldfile, _("with symbolic name range values \
2501 the absolute ellipsis `...' must not be used"));
2502
2503                       charclass_symbolic_ellipsis (ldfile, ctype, charmap,
2504                                                    repertoire, now, last_str,
2505                                                    class256_bit, class_bit,
2506                                                    (ellipsis_token
2507                                                     == tok_ellipsis4
2508                                                     ? 10 : 16),
2509                                                    ignore_content,
2510                                                    handle_digits, step);
2511                     }
2512                   else if (last_token == tok_ucs4)
2513                     {
2514                       if (ellipsis_token != tok_ellipsis2)
2515                         lr_error (ldfile, _("\
2516 with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
2517
2518                       charclass_ucs4_ellipsis (ldfile, ctype, charmap,
2519                                                repertoire, now, last_wch,
2520                                                class256_bit, class_bit,
2521                                                ignore_content, handle_digits,
2522                                                step);
2523                     }
2524                   else
2525                     {
2526                       assert (last_token == tok_charcode);
2527
2528                       if (ellipsis_token != tok_ellipsis3)
2529                         lr_error (ldfile, _("\
2530 with character code range values one must use the absolute ellipsis `...'"));
2531
2532                       charclass_charcode_ellipsis (ldfile, ctype, charmap,
2533                                                    repertoire, now,
2534                                                    last_charcode,
2535                                                    last_charcode_len,
2536                                                    class256_bit, class_bit,
2537                                                    ignore_content,
2538                                                    handle_digits);
2539                     }
2540
2541                   /* Now we have used the last value.  */
2542                   last_token = tok_none;
2543                 }
2544
2545               /* Next we expect a semicolon or the end of the line.  */
2546               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2547               if (now->tok == tok_eol || now->tok == tok_eof)
2548                 break;
2549
2550               if (last_token != tok_none
2551                   && now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4_2)
2552                 {
2553                   if (now->tok == tok_ellipsis2_2)
2554                     {
2555                       now->tok = tok_ellipsis2;
2556                       step = 2;
2557                     }
2558                   else if (now->tok == tok_ellipsis4_2)
2559                     {
2560                       now->tok = tok_ellipsis4;
2561                       step = 2;
2562                     }
2563
2564                   ellipsis_token = now->tok;
2565
2566                   now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2567                   continue;
2568                 }
2569
2570               if (now->tok != tok_semicolon)
2571                 goto err_label;
2572
2573               /* And get the next character.  */
2574               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2575
2576               ellipsis_token = tok_none;
2577               step = 1;
2578             }
2579           break;
2580
2581         case tok_digit:
2582           /* Ignore the rest of the line if we don't need the input of
2583              this line.  */
2584           if (ignore_content)
2585             {
2586               lr_ignore_rest (ldfile, 0);
2587               break;
2588             }
2589
2590         handle_tok_digit:
2591           class_bit = _ISwdigit;
2592           class256_bit = _ISdigit;
2593           handle_digits = 1;
2594           goto read_charclass;
2595
2596         case tok_outdigit:
2597           /* Ignore the rest of the line if we don't need the input of
2598              this line.  */
2599           if (ignore_content)
2600             {
2601               lr_ignore_rest (ldfile, 0);
2602               break;
2603             }
2604
2605           if (ctype->outdigits_act != 0)
2606             lr_error (ldfile, _("\
2607 %s: field `%s' declared more than once"),
2608                       "LC_CTYPE", "outdigit");
2609           class_bit = 0;
2610           class256_bit = 0;
2611           handle_digits = 2;
2612           goto read_charclass;
2613
2614         case tok_toupper:
2615           /* Ignore the rest of the line if we don't need the input of
2616              this line.  */
2617           if (ignore_content)
2618             {
2619               lr_ignore_rest (ldfile, 0);
2620               break;
2621             }
2622
2623           mapidx = 0;
2624           goto read_mapping;
2625
2626         case tok_tolower:
2627           /* Ignore the rest of the line if we don't need the input of
2628              this line.  */
2629           if (ignore_content)
2630             {
2631               lr_ignore_rest (ldfile, 0);
2632               break;
2633             }
2634
2635           mapidx = 1;
2636           goto read_mapping;
2637
2638         case tok_map:
2639           /* Ignore the rest of the line if we don't need the input of
2640              this line.  */
2641           if (ignore_content)
2642             {
2643               lr_ignore_rest (ldfile, 0);
2644               break;
2645             }
2646
2647           /* We simply forget the `map' keyword and use the following
2648              operand to determine the mapping.  */
2649           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2650           if (now->tok == tok_ident || now->tok == tok_string)
2651             {
2652               size_t cnt;
2653
2654               for (cnt = 2; cnt < ctype->map_collection_nr; ++cnt)
2655                 if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2656                   break;
2657
2658               if (cnt < ctype->map_collection_nr)
2659                 free (now->val.str.startmb);
2660               else
2661                 /* OK, it's a new map.  */
2662                 ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2663
2664               mapidx = cnt;
2665             }
2666           else if (now->tok < tok_toupper || now->tok > tok_tolower)
2667             goto err_label;
2668           else
2669             mapidx = now->tok - tok_toupper;
2670
2671           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2672           /* This better should be a semicolon.  */
2673           if (now->tok != tok_semicolon)
2674             goto err_label;
2675
2676         read_mapping:
2677           /* Test whether this mapping was already defined.  */
2678           if (ctype->tomap_done[mapidx])
2679             {
2680               lr_error (ldfile, _("duplicated definition for mapping `%s'"),
2681                         ctype->mapnames[mapidx]);
2682               lr_ignore_rest (ldfile, 0);
2683               break;
2684             }
2685           ctype->tomap_done[mapidx] = 1;
2686
2687           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2688           while (now->tok != tok_eol && now->tok != tok_eof)
2689             {
2690               struct charseq *from_seq;
2691               uint32_t from_wch;
2692               struct charseq *to_seq;
2693               uint32_t to_wch;
2694
2695               /* Every pair starts with an opening brace.  */
2696               if (now->tok != tok_open_brace)
2697                 goto err_label;
2698
2699               /* Next comes the from-value.  */
2700               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2701               if (get_character (now, charmap, repertoire, &from_seq,
2702                                  &from_wch) != 0)
2703                 goto err_label;
2704
2705               /* The next is a comma.  */
2706               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2707               if (now->tok != tok_comma)
2708                 goto err_label;
2709
2710               /* And the other value.  */
2711               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2712               if (get_character (now, charmap, repertoire, &to_seq,
2713                                  &to_wch) != 0)
2714                 goto err_label;
2715
2716               /* And the last thing is the closing brace.  */
2717               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2718               if (now->tok != tok_close_brace)
2719                 goto err_label;
2720
2721               if (!ignore_content)
2722                 {
2723                   /* Check whether the mapping converts from an ASCII value
2724                      to a non-ASCII value.  */
2725                   if (from_seq != NULL && from_seq->nbytes == 1
2726                       && isascii (from_seq->bytes[0])
2727                       && to_seq != NULL && (to_seq->nbytes != 1
2728                                             || !isascii (to_seq->bytes[0])))
2729                     ctype->to_nonascii = 1;
2730
2731                   if (mapidx < 2 && from_seq != NULL && to_seq != NULL
2732                       && from_seq->nbytes == 1 && to_seq->nbytes == 1)
2733                     /* We can use this value.  */
2734                     ctype->map256_collection[mapidx][from_seq->bytes[0]]
2735                       = to_seq->bytes[0];
2736
2737                   if (from_wch != ILLEGAL_CHAR_VALUE
2738                       && to_wch != ILLEGAL_CHAR_VALUE)
2739                     /* Both correct values.  */
2740                     *find_idx (ctype, &ctype->map_collection[mapidx],
2741                                &ctype->map_collection_max[mapidx],
2742                                &ctype->map_collection_act[mapidx],
2743                                from_wch) = to_wch;
2744                 }
2745
2746               /* Now comes a semicolon or the end of the line/file.  */
2747               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2748               if (now->tok == tok_semicolon)
2749                 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2750             }
2751           break;
2752
2753         case tok_translit_start:
2754           /* Ignore the entire translit section with its peculiar syntax
2755              if we don't need the input.  */
2756           if (ignore_content)
2757             {
2758               do
2759                 {
2760                   lr_ignore_rest (ldfile, 0);
2761                   now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2762                 }
2763               while (now->tok != tok_translit_end && now->tok != tok_eof);
2764
2765               if (now->tok == tok_eof)
2766                 lr_error (ldfile, _(\
2767 "%s: `translit_start' section does not end with `translit_end'"),
2768                           "LC_CTYPE");
2769
2770               break;
2771             }
2772
2773           /* The rest of the line better should be empty.  */
2774           lr_ignore_rest (ldfile, 1);
2775
2776           /* We count here the number of allocated entries in the `translit'
2777              array.  */
2778           cnt = 0;
2779
2780           ldfile->translate_strings = 1;
2781           ldfile->return_widestr = 1;
2782
2783           /* We proceed until we see the `translit_end' token.  */
2784           while (now = lr_token (ldfile, charmap, NULL, repertoire, verbose),
2785                  now->tok != tok_translit_end && now->tok != tok_eof)
2786             {
2787               if (now->tok == tok_eol)
2788                 /* Ignore empty lines.  */
2789                 continue;
2790
2791               if (now->tok == tok_include)
2792                 {
2793                   /* We have to include locale.  */
2794                   const char *locale_name;
2795                   const char *repertoire_name;
2796                   struct translit_include_t *include_stmt, **include_ptr;
2797
2798                   now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2799                   /* This should be a string or an identifier.  In any
2800                      case something to name a locale.  */
2801                   if (now->tok != tok_string && now->tok != tok_ident)
2802                     {
2803                     translit_syntax:
2804                       lr_error (ldfile, _("%s: syntax error"), "LC_CTYPE");
2805                       lr_ignore_rest (ldfile, 0);
2806                       continue;
2807                     }
2808                   locale_name = now->val.str.startmb;
2809
2810                   /* Next should be a semicolon.  */
2811                   now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2812                   if (now->tok != tok_semicolon)
2813                     goto translit_syntax;
2814
2815                   /* Now the repertoire name.  */
2816                   now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2817                   if ((now->tok != tok_string && now->tok != tok_ident)
2818                       || now->val.str.startmb == NULL)
2819                     goto translit_syntax;
2820                   repertoire_name = now->val.str.startmb;
2821                   if (repertoire_name[0] == '\0')
2822                     /* Ignore the empty string.  */
2823                     repertoire_name = NULL;
2824
2825                   /* Save the include statement for later processing.  */
2826                   include_stmt = (struct translit_include_t *)
2827                     xmalloc (sizeof (struct translit_include_t));
2828                   include_stmt->copy_locale = locale_name;
2829                   include_stmt->copy_repertoire = repertoire_name;
2830                   include_stmt->next = NULL;
2831
2832                   include_ptr = &ctype->translit_include;
2833                   while (*include_ptr != NULL)
2834                     include_ptr = &(*include_ptr)->next;
2835                   *include_ptr = include_stmt;
2836
2837                   /* The rest of the line must be empty.  */
2838                   lr_ignore_rest (ldfile, 1);
2839
2840                   /* Make sure the locale is read.  */
2841                   add_to_readlist (LC_CTYPE, locale_name, repertoire_name,
2842                                    1, NULL);
2843                   continue;
2844                 }
2845               else if (now->tok == tok_default_missing)
2846                 {
2847                   uint32_t *wstr;
2848
2849                   while (1)
2850                     {
2851                       /* We expect a single character or string as the
2852                          argument.  */
2853                       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2854                       wstr = read_widestring (ldfile, now, charmap,
2855                                               repertoire);
2856
2857                       if (wstr != NULL)
2858                         {
2859                           if (ctype->default_missing != NULL)
2860                             {
2861                               lr_error (ldfile, _("\
2862 %s: duplicate `default_missing' definition"), "LC_CTYPE");
2863                               WITH_CUR_LOCALE (error_at_line (0, 0,
2864                                                               ctype->default_missing_file,
2865                                                               ctype->default_missing_lineno,
2866                                                               _("\
2867 previous definition was here")));
2868                             }
2869                           else
2870                             {
2871                               ctype->default_missing = wstr;
2872                               ctype->default_missing_file = ldfile->fname;
2873                               ctype->default_missing_lineno = ldfile->lineno;
2874                             }
2875                           /* We can have more entries, ignore them.  */
2876                           lr_ignore_rest (ldfile, 0);
2877                           break;
2878                         }
2879                       else if (wstr == (uint32_t *) -1l)
2880                         /* This was an syntax error.  */
2881                         break;
2882
2883                       /* Maybe there is another replacement we can use.  */
2884                       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2885                       if (now->tok == tok_eol || now->tok == tok_eof)
2886                         {
2887                           /* Nothing found.  We tell the user.  */
2888                           lr_error (ldfile, _("\
2889 %s: no representable `default_missing' definition found"), "LC_CTYPE");
2890                           break;
2891                         }
2892                       if (now->tok != tok_semicolon)
2893                         goto translit_syntax;
2894                     }
2895
2896                   continue;
2897                 }
2898               else if (now->tok == tok_translit_ignore)
2899                 {
2900                   read_translit_ignore_entry (ldfile, ctype, charmap,
2901                                               repertoire);
2902                   continue;
2903                 }
2904
2905               read_translit_entry (ldfile, ctype, now, charmap, repertoire);
2906             }
2907           ldfile->return_widestr = 0;
2908
2909           if (now->tok == tok_eof)
2910             lr_error (ldfile, _(\
2911 "%s: `translit_start' section does not end with `translit_end'"),
2912                       "LC_CTYPE");
2913
2914           break;
2915
2916         case tok_ident:
2917           /* Ignore the rest of the line if we don't need the input of
2918              this line.  */
2919           if (ignore_content)
2920             {
2921               lr_ignore_rest (ldfile, 0);
2922               break;
2923             }
2924
2925           /* This could mean one of several things.  First test whether
2926              it's a character class name.  */
2927           for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2928             if (strcmp (now->val.str.startmb, ctype->classnames[cnt]) == 0)
2929               break;
2930           if (cnt < ctype->nr_charclass)
2931             {
2932               class_bit = _ISwbit (cnt);
2933               class256_bit = cnt <= 11 ? _ISbit (cnt) : 0;
2934               free (now->val.str.startmb);
2935               goto read_charclass;
2936             }
2937           for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
2938             if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2939               break;
2940           if (cnt < ctype->map_collection_nr)
2941             {
2942               mapidx = cnt;
2943               free (now->val.str.startmb);
2944               goto read_mapping;
2945             }
2946 #ifdef PREDEFINED_CLASSES
2947           if (strcmp (now->val.str.startmb, "special1") == 0)
2948             {
2949               class_bit = _ISwspecial1;
2950               free (now->val.str.startmb);
2951               goto read_charclass;
2952             }
2953           if (strcmp (now->val.str.startmb, "special2") == 0)
2954             {
2955               class_bit = _ISwspecial2;
2956               free (now->val.str.startmb);
2957               goto read_charclass;
2958             }
2959           if (strcmp (now->val.str.startmb, "special3") == 0)
2960             {
2961               class_bit = _ISwspecial3;
2962               free (now->val.str.startmb);
2963               goto read_charclass;
2964             }
2965           if (strcmp (now->val.str.startmb, "tosymmetric") == 0)
2966             {
2967               mapidx = 2;
2968               goto read_mapping;
2969             }
2970 #endif
2971           break;
2972
2973         case tok_end:
2974           /* Next we assume `LC_CTYPE'.  */
2975           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2976           if (now->tok == tok_eof)
2977             break;
2978           if (now->tok == tok_eol)
2979             lr_error (ldfile, _("%s: incomplete `END' line"),
2980                       "LC_CTYPE");
2981           else if (now->tok != tok_lc_ctype)
2982             lr_error (ldfile, _("\
2983 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2984           lr_ignore_rest (ldfile, now->tok == tok_lc_ctype);
2985           return;
2986
2987         default:
2988         err_label:
2989           if (now->tok != tok_eof)
2990             SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2991         }
2992
2993       /* Prepare for the next round.  */
2994       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2995       nowtok = now->tok;
2996     }
2997
2998   /* When we come here we reached the end of the file.  */
2999   lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
3000 }
3001
3002
3003 static void
3004 set_class_defaults (struct locale_ctype_t *ctype,
3005                     const struct charmap_t *charmap,
3006                     struct repertoire_t *repertoire)
3007 {
3008   size_t cnt;
3009
3010   /* These function defines the default values for the classes and conversions
3011      according to POSIX.2 2.5.2.1.
3012      It may seem that the order of these if-blocks is arbitrary but it is NOT.
3013      Don't move them unless you know what you do!  */
3014
3015   auto void set_default (int bitpos, int from, int to);
3016
3017   void set_default (int bitpos, int from, int to)
3018     {
3019       char tmp[2];
3020       int ch;
3021       int bit = _ISbit (bitpos);
3022       int bitw = _ISwbit (bitpos);
3023       /* Define string.  */
3024       strcpy (tmp, "?");
3025
3026       for (ch = from; ch <= to; ++ch)
3027         {
3028           struct charseq *seq;
3029           tmp[0] = ch;
3030
3031           seq = charmap_find_value (charmap, tmp, 1);
3032           if (seq == NULL)
3033             {
3034               char buf[10];
3035               sprintf (buf, "U%08X", ch);
3036               seq = charmap_find_value (charmap, buf, 9);
3037             }
3038           if (seq == NULL)
3039             {
3040               if (!be_quiet)
3041                 WITH_CUR_LOCALE (error (0, 0, _("\
3042 %s: character `%s' not defined in charmap while needed as default value"),
3043                                         "LC_CTYPE", tmp));
3044             }
3045           else if (seq->nbytes != 1)
3046             WITH_CUR_LOCALE (error (0, 0, _("\
3047 %s: character `%s' in charmap not representable with one byte"),
3048                                     "LC_CTYPE", tmp));
3049           else
3050             ctype->class256_collection[seq->bytes[0]] |= bit;
3051
3052           /* No need to search here, the ASCII value is also the Unicode
3053              value.  */
3054           ELEM (ctype, class_collection, , ch) |= bitw;
3055         }
3056     }
3057
3058   /* Set default values if keyword was not present.  */
3059   if ((ctype->class_done & BITw (tok_upper)) == 0)
3060     /* "If this keyword [lower] is not specified, the lowercase letters
3061         `A' through `Z', ..., shall automatically belong to this class,
3062         with implementation defined character values."  [P1003.2, 2.5.2.1]  */
3063     set_default (BITPOS (tok_upper), 'A', 'Z');
3064
3065   if ((ctype->class_done & BITw (tok_lower)) == 0)
3066     /* "If this keyword [lower] is not specified, the lowercase letters
3067         `a' through `z', ..., shall automatically belong to this class,
3068         with implementation defined character values."  [P1003.2, 2.5.2.1]  */
3069     set_default (BITPOS (tok_lower), 'a', 'z');
3070
3071   if ((ctype->class_done & BITw (tok_alpha)) == 0)
3072     {
3073       /* Table 2-6 in P1003.2 says that characters in class `upper' or
3074          class `lower' *must* be in class `alpha'.  */
3075       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
3076       unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower);
3077
3078       for (cnt = 0; cnt < 256; ++cnt)
3079         if ((ctype->class256_collection[cnt] & mask) != 0)
3080           ctype->class256_collection[cnt] |= BIT (tok_alpha);
3081
3082       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3083         if ((ctype->class_collection[cnt] & maskw) != 0)
3084           ctype->class_collection[cnt] |= BITw (tok_alpha);
3085     }
3086
3087   if ((ctype->class_done & BITw (tok_digit)) == 0)
3088     /* "If this keyword [digit] is not specified, the digits `0' through
3089         `9', ..., shall automatically belong to this class, with
3090         implementation-defined character values."  [P1003.2, 2.5.2.1]  */
3091     set_default (BITPOS (tok_digit), '0', '9');
3092
3093   /* "Only characters specified for the `alpha' and `digit' keyword
3094      shall be specified.  Characters specified for the keyword `alpha'
3095      and `digit' are automatically included in this class.  */
3096   {
3097     unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
3098     unsigned long int maskw = BITw (tok_alpha) | BITw (tok_digit);
3099
3100     for (cnt = 0; cnt < 256; ++cnt)
3101       if ((ctype->class256_collection[cnt] & mask) != 0)
3102         ctype->class256_collection[cnt] |= BIT (tok_alnum);
3103
3104     for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3105       if ((ctype->class_collection[cnt] & maskw) != 0)
3106         ctype->class_collection[cnt] |= BITw (tok_alnum);
3107   }
3108
3109   if ((ctype->class_done & BITw (tok_space)) == 0)
3110     /* "If this keyword [space] is not specified, the characters <space>,
3111         <form-feed>, <newline>, <carriage-return>, <tab>, and
3112         <vertical-tab>, ..., shall automatically belong to this class,
3113         with implementation-defined character values."  [P1003.2, 2.5.2.1]  */
3114     {
3115       struct charseq *seq;
3116
3117       seq = charmap_find_value (charmap, "space", 5);
3118       if (seq == NULL)
3119         seq = charmap_find_value (charmap, "SP", 2);
3120       if (seq == NULL)
3121         seq = charmap_find_value (charmap, "U00000020", 9);
3122       if (seq == NULL)
3123         {
3124           if (!be_quiet)
3125             WITH_CUR_LOCALE (error (0, 0, _("\
3126 %s: character `%s' not defined while needed as default value"),
3127                                     "LC_CTYPE", "<space>"));
3128         }
3129       else if (seq->nbytes != 1)
3130         WITH_CUR_LOCALE (error (0, 0, _("\
3131 %s: character `%s' in charmap not representable with one byte"),
3132                                 "LC_CTYPE", "<space>"));
3133       else
3134         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3135
3136       /* No need to search.  */
3137       ELEM (ctype, class_collection, , L' ') |= BITw (tok_space);
3138
3139       seq = charmap_find_value (charmap, "form-feed", 9);
3140       if (seq == NULL)
3141         seq = charmap_find_value (charmap, "U0000000C", 9);
3142       if (seq == NULL)
3143         {
3144           if (!be_quiet)
3145             WITH_CUR_LOCALE (error (0, 0, _("\
3146 %s: character `%s' not defined while needed as default value"),
3147                                     "LC_CTYPE", "<form-feed>"));
3148         }
3149       else if (seq->nbytes != 1)
3150         WITH_CUR_LOCALE (error (0, 0, _("\
3151 %s: character `%s' in charmap not representable with one byte"),
3152                                 "LC_CTYPE", "<form-feed>"));
3153       else
3154         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3155
3156       /* No need to search.  */
3157       ELEM (ctype, class_collection, , L'\f') |= BITw (tok_space);
3158
3159
3160       seq = charmap_find_value (charmap, "newline", 7);
3161       if (seq == NULL)
3162         seq = charmap_find_value (charmap, "U0000000A", 9);
3163       if (seq == NULL)
3164         {
3165           if (!be_quiet)
3166             WITH_CUR_LOCALE (error (0, 0, _("\
3167 character `%s' not defined while needed as default value"),
3168                                     "<newline>"));
3169         }
3170       else if (seq->nbytes != 1)
3171         WITH_CUR_LOCALE (error (0, 0, _("\
3172 %s: character `%s' in charmap not representable with one byte"),
3173                                 "LC_CTYPE", "<newline>"));
3174       else
3175         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3176
3177       /* No need to search.  */
3178       ELEM (ctype, class_collection, , L'\n') |= BITw (tok_space);
3179
3180
3181       seq = charmap_find_value (charmap, "carriage-return", 15);
3182       if (seq == NULL)
3183         seq = charmap_find_value (charmap, "U0000000D", 9);
3184       if (seq == NULL)
3185         {
3186           if (!be_quiet)
3187             WITH_CUR_LOCALE (error (0, 0, _("\
3188 %s: character `%s' not defined while needed as default value"),
3189                                     "LC_CTYPE", "<carriage-return>"));
3190         }
3191       else if (seq->nbytes != 1)
3192         WITH_CUR_LOCALE (error (0, 0, _("\
3193 %s: character `%s' in charmap not representable with one byte"),
3194                                 "LC_CTYPE", "<carriage-return>"));
3195       else
3196         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3197
3198       /* No need to search.  */
3199       ELEM (ctype, class_collection, , L'\r') |= BITw (tok_space);
3200
3201
3202       seq = charmap_find_value (charmap, "tab", 3);
3203       if (seq == NULL)
3204         seq = charmap_find_value (charmap, "U00000009", 9);
3205       if (seq == NULL)
3206         {
3207           if (!be_quiet)
3208             WITH_CUR_LOCALE (error (0, 0, _("\
3209 %s: character `%s' not defined while needed as default value"),
3210                                     "LC_CTYPE", "<tab>"));
3211         }
3212       else if (seq->nbytes != 1)
3213         WITH_CUR_LOCALE (error (0, 0, _("\
3214 %s: character `%s' in charmap not representable with one byte"),
3215                                 "LC_CTYPE", "<tab>"));
3216       else
3217         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3218
3219       /* No need to search.  */
3220       ELEM (ctype, class_collection, , L'\t') |= BITw (tok_space);
3221
3222
3223       seq = charmap_find_value (charmap, "vertical-tab", 12);
3224       if (seq == NULL)
3225         seq = charmap_find_value (charmap, "U0000000B", 9);
3226       if (seq == NULL)
3227         {
3228           if (!be_quiet)
3229             WITH_CUR_LOCALE (error (0, 0, _("\
3230 %s: character `%s' not defined while needed as default value"),
3231                                     "LC_CTYPE", "<vertical-tab>"));
3232         }
3233       else if (seq->nbytes != 1)
3234         WITH_CUR_LOCALE (error (0, 0, _("\
3235 %s: character `%s' in charmap not representable with one byte"),
3236                                 "LC_CTYPE", "<vertical-tab>"));
3237       else
3238         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3239
3240       /* No need to search.  */
3241       ELEM (ctype, class_collection, , L'\v') |= BITw (tok_space);
3242     }
3243
3244   if ((ctype->class_done & BITw (tok_xdigit)) == 0)
3245     /* "If this keyword is not specified, the digits `0' to `9', the
3246         uppercase letters `A' through `F', and the lowercase letters `a'
3247         through `f', ..., shell automatically belong to this class, with
3248         implementation defined character values."  [P1003.2, 2.5.2.1]  */
3249     {
3250       set_default (BITPOS (tok_xdigit), '0', '9');
3251       set_default (BITPOS (tok_xdigit), 'A', 'F');
3252       set_default (BITPOS (tok_xdigit), 'a', 'f');
3253     }
3254
3255   if ((ctype->class_done & BITw (tok_blank)) == 0)
3256     /* "If this keyword [blank] is unspecified, the characters <space> and
3257        <tab> shall belong to this character class."  [P1003.2, 2.5.2.1]  */
3258    {
3259       struct charseq *seq;
3260
3261       seq = charmap_find_value (charmap, "space", 5);
3262       if (seq == NULL)
3263         seq = charmap_find_value (charmap, "SP", 2);
3264       if (seq == NULL)
3265         seq = charmap_find_value (charmap, "U00000020", 9);
3266       if (seq == NULL)
3267         {
3268           if (!be_quiet)
3269             WITH_CUR_LOCALE (error (0, 0, _("\
3270 %s: character `%s' not defined while needed as default value"),
3271                                     "LC_CTYPE", "<space>"));
3272         }
3273       else if (seq->nbytes != 1)
3274         WITH_CUR_LOCALE (error (0, 0, _("\
3275 %s: character `%s' in charmap not representable with one byte"),
3276                                 "LC_CTYPE", "<space>"));
3277       else
3278         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
3279
3280       /* No need to search.  */
3281       ELEM (ctype, class_collection, , L' ') |= BITw (tok_blank);
3282
3283
3284       seq = charmap_find_value (charmap, "tab", 3);
3285       if (seq == NULL)
3286         seq = charmap_find_value (charmap, "U00000009", 9);
3287       if (seq == NULL)
3288         {
3289           if (!be_quiet)
3290             WITH_CUR_LOCALE (error (0, 0, _("\
3291 %s: character `%s' not defined while needed as default value"),
3292                                     "LC_CTYPE", "<tab>"));
3293         }
3294       else if (seq->nbytes != 1)
3295         WITH_CUR_LOCALE (error (0, 0, _("\
3296 %s: character `%s' in charmap not representable with one byte"),
3297                                 "LC_CTYPE", "<tab>"));
3298       else
3299         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
3300
3301       /* No need to search.  */
3302       ELEM (ctype, class_collection, , L'\t') |= BITw (tok_blank);
3303     }
3304
3305   if ((ctype->class_done & BITw (tok_graph)) == 0)
3306     /* "If this keyword [graph] is not specified, characters specified for
3307         the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
3308         shall belong to this character class."  [P1003.2, 2.5.2.1]  */
3309     {
3310       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
3311         BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
3312       unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
3313         BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
3314         BITw (tok_punct);
3315       size_t cnt;
3316
3317       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3318         if ((ctype->class_collection[cnt] & maskw) != 0)
3319           ctype->class_collection[cnt] |= BITw (tok_graph);
3320
3321       for (cnt = 0; cnt < 256; ++cnt)
3322         if ((ctype->class256_collection[cnt] & mask) != 0)
3323           ctype->class256_collection[cnt] |= BIT (tok_graph);
3324     }
3325
3326   if ((ctype->class_done & BITw (tok_print)) == 0)
3327     /* "If this keyword [print] is not provided, characters specified for
3328         the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
3329         and the <space> character shall belong to this character class."
3330         [P1003.2, 2.5.2.1]  */
3331     {
3332       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
3333         BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
3334       unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
3335         BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
3336         BITw (tok_punct);
3337       size_t cnt;
3338       struct charseq *seq;
3339
3340       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3341         if ((ctype->class_collection[cnt] & maskw) != 0)
3342           ctype->class_collection[cnt] |= BITw (tok_print);
3343
3344       for (cnt = 0; cnt < 256; ++cnt)
3345         if ((ctype->class256_collection[cnt] & mask) != 0)
3346           ctype->class256_collection[cnt] |= BIT (tok_print);
3347
3348
3349       seq = charmap_find_value (charmap, "space", 5);
3350       if (seq == NULL)
3351         seq = charmap_find_value (charmap, "SP", 2);
3352       if (seq == NULL)
3353         seq = charmap_find_value (charmap, "U00000020", 9);
3354       if (seq == NULL)
3355         {
3356           if (!be_quiet)
3357             WITH_CUR_LOCALE (error (0, 0, _("\
3358 %s: character `%s' not defined while needed as default value"),
3359                                     "LC_CTYPE", "<space>"));
3360         }
3361       else if (seq->nbytes != 1)
3362         WITH_CUR_LOCALE (error (0, 0, _("\
3363 %s: character `%s' in charmap not representable with one byte"),
3364                                 "LC_CTYPE", "<space>"));
3365       else
3366         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_print);
3367
3368       /* No need to search.  */
3369       ELEM (ctype, class_collection, , L' ') |= BITw (tok_print);
3370     }
3371
3372   if (ctype->tomap_done[0] == 0)
3373     /* "If this keyword [toupper] is not specified, the lowercase letters
3374         `a' through `z', and their corresponding uppercase letters `A' to
3375         `Z', ..., shall automatically be included, with implementation-
3376         defined character values."  [P1003.2, 2.5.2.1]  */
3377     {
3378       char tmp[4];
3379       int ch;
3380
3381       strcpy (tmp, "<?>");
3382
3383       for (ch = 'a'; ch <= 'z'; ++ch)
3384         {
3385           struct charseq *seq_from, *seq_to;
3386
3387           tmp[1] = (char) ch;
3388
3389           seq_from = charmap_find_value (charmap, &tmp[1], 1);
3390           if (seq_from == NULL)
3391             {
3392               char buf[10];
3393               sprintf (buf, "U%08X", ch);
3394               seq_from = charmap_find_value (charmap, buf, 9);
3395             }
3396           if (seq_from == NULL)
3397             {
3398               if (!be_quiet)
3399                 WITH_CUR_LOCALE (error (0, 0, _("\
3400 %s: character `%s' not defined while needed as default value"),
3401                                         "LC_CTYPE", tmp));
3402             }
3403           else if (seq_from->nbytes != 1)
3404             {
3405               if (!be_quiet)
3406                 WITH_CUR_LOCALE (error (0, 0, _("\
3407 %s: character `%s' needed as default value not representable with one byte"),
3408                                         "LC_CTYPE", tmp));
3409             }
3410           else
3411             {
3412               /* This conversion is implementation defined.  */
3413               tmp[1] = (char) (ch + ('A' - 'a'));
3414               seq_to = charmap_find_value (charmap, &tmp[1], 1);
3415               if (seq_to == NULL)
3416                 {
3417                   char buf[10];
3418                   sprintf (buf, "U%08X", ch + ('A' - 'a'));
3419                   seq_to = charmap_find_value (charmap, buf, 9);
3420                 }
3421               if (seq_to == NULL)
3422                 {
3423                   if (!be_quiet)
3424                     WITH_CUR_LOCALE (error (0, 0, _("\
3425 %s: character `%s' not defined while needed as default value"),
3426                                             "LC_CTYPE", tmp));
3427                 }
3428               else if (seq_to->nbytes != 1)
3429                 {
3430                   if (!be_quiet)
3431                     WITH_CUR_LOCALE (error (0, 0, _("\
3432 %s: character `%s' needed as default value not representable with one byte"),
3433                                             "LC_CTYPE", tmp));
3434                 }
3435               else
3436                 /* The index [0] is determined by the order of the
3437                    `ctype_map_newP' calls in `ctype_startup'.  */
3438                 ctype->map256_collection[0][seq_from->bytes[0]]
3439                   = seq_to->bytes[0];
3440             }
3441
3442           /* No need to search.  */
3443           ELEM (ctype, map_collection, [0], ch) = ch + ('A' - 'a');
3444         }
3445     }
3446
3447   if (ctype->tomap_done[1] == 0)
3448     /* "If this keyword [tolower] is not specified, the mapping shall be
3449        the reverse mapping of the one specified to `toupper'."  [P1003.2]  */
3450     {
3451       for (cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
3452         if (ctype->map_collection[0][cnt] != 0)
3453           ELEM (ctype, map_collection, [1],
3454                 ctype->map_collection[0][cnt])
3455             = ctype->charnames[cnt];
3456
3457       for (cnt = 0; cnt < 256; ++cnt)
3458         if (ctype->map256_collection[0][cnt] != 0)
3459           ctype->map256_collection[1][ctype->map256_collection[0][cnt]] = cnt;
3460     }
3461
3462   if (ctype->outdigits_act != 10)
3463     {
3464       if (ctype->outdigits_act != 0)
3465         WITH_CUR_LOCALE (error (0, 0, _("\
3466 %s: field `%s' does not contain exactly ten entries"),
3467                                 "LC_CTYPE", "outdigit"));
3468
3469       for (cnt = ctype->outdigits_act; cnt < 10; ++cnt)
3470         {
3471           ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3472                                                          digits + cnt, 1);
3473
3474           if (ctype->mboutdigits[cnt] == NULL)
3475             ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3476                                                            longnames[cnt],
3477                                                            strlen (longnames[cnt]));
3478
3479           if (ctype->mboutdigits[cnt] == NULL)
3480             ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3481                                                            uninames[cnt], 9);
3482
3483           if (ctype->mboutdigits[cnt] == NULL)
3484             {
3485               /* Provide a replacement.  */
3486               WITH_CUR_LOCALE (error (0, 0, _("\
3487 no output digits defined and none of the standard names in the charmap")));
3488
3489               ctype->mboutdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
3490                                                        sizeof (struct charseq)
3491                                                        + 1);
3492
3493               /* This is better than nothing.  */
3494               ctype->mboutdigits[cnt]->bytes[0] = digits[cnt];
3495               ctype->mboutdigits[cnt]->nbytes = 1;
3496             }
3497
3498           ctype->wcoutdigits[cnt] = L'0' + cnt;
3499         }
3500
3501       ctype->outdigits_act = 10;
3502     }
3503 }
3504
3505
3506 /* Construction of sparse 3-level tables.
3507    See wchar-lookup.h for their structure and the meaning of p and q.  */
3508
3509 struct wctype_table
3510 {
3511   /* Parameters.  */
3512   unsigned int p;
3513   unsigned int q;
3514   /* Working representation.  */
3515   size_t level1_alloc;
3516   size_t level1_size;
3517   uint32_t *level1;
3518   size_t level2_alloc;
3519   size_t level2_size;
3520   uint32_t *level2;
3521   size_t level3_alloc;
3522   size_t level3_size;
3523   uint32_t *level3;
3524   /* Compressed representation.  */
3525   size_t result_size;
3526   char *result;
3527 };
3528
3529 /* Initialize.  Assumes t->p and t->q have already been set.  */
3530 static inline void
3531 wctype_table_init (struct wctype_table *t)
3532 {
3533   t->level1 = NULL;
3534   t->level1_alloc = t->level1_size = 0;
3535   t->level2 = NULL;
3536   t->level2_alloc = t->level2_size = 0;
3537   t->level3 = NULL;
3538   t->level3_alloc = t->level3_size = 0;
3539 }
3540
3541 /* Retrieve an entry.  */
3542 static inline int
3543 wctype_table_get (struct wctype_table *t, uint32_t wc)
3544 {
3545   uint32_t index1 = wc >> (t->q + t->p + 5);
3546   if (index1 < t->level1_size)
3547     {
3548       uint32_t lookup1 = t->level1[index1];
3549       if (lookup1 != EMPTY)
3550         {
3551           uint32_t index2 = ((wc >> (t->p + 5)) & ((1 << t->q) - 1))
3552                             + (lookup1 << t->q);
3553           uint32_t lookup2 = t->level2[index2];
3554           if (lookup2 != EMPTY)
3555             {
3556               uint32_t index3 = ((wc >> 5) & ((1 << t->p) - 1))
3557                                 + (lookup2 << t->p);
3558               uint32_t lookup3 = t->level3[index3];
3559               uint32_t index4 = wc & 0x1f;
3560
3561               return (lookup3 >> index4) & 1;
3562             }
3563         }
3564     }
3565   return 0;
3566 }
3567
3568 /* Add one entry.  */
3569 static void
3570 wctype_table_add (struct wctype_table *t, uint32_t wc)
3571 {
3572   uint32_t index1 = wc >> (t->q + t->p + 5);
3573   uint32_t index2 = (wc >> (t->p + 5)) & ((1 << t->q) - 1);
3574   uint32_t index3 = (wc >> 5) & ((1 << t->p) - 1);
3575   uint32_t index4 = wc & 0x1f;
3576   size_t i, i1, i2;
3577
3578   if (index1 >= t->level1_size)
3579     {
3580       if (index1 >= t->level1_alloc)
3581         {
3582           size_t alloc = 2 * t->level1_alloc;
3583           if (alloc <= index1)
3584             alloc = index1 + 1;
3585           t->level1 = (uint32_t *) xrealloc ((char *) t->level1,
3586                                              alloc * sizeof (uint32_t));
3587           t->level1_alloc = alloc;
3588         }
3589       while (index1 >= t->level1_size)
3590         t->level1[t->level1_size++] = EMPTY;
3591     }
3592
3593   if (t->level1[index1] == EMPTY)
3594     {
3595       if (t->level2_size == t->level2_alloc)
3596         {
3597           size_t alloc = 2 * t->level2_alloc + 1;
3598           t->level2 = (uint32_t *) xrealloc ((char *) t->level2,
3599                                              (alloc << t->q) * sizeof (uint32_t));
3600           t->level2_alloc = alloc;
3601         }
3602       i1 = t->level2_size << t->q;
3603       i2 = (t->level2_size + 1) << t->q;
3604       for (i = i1; i < i2; i++)
3605         t->level2[i] = EMPTY;
3606       t->level1[index1] = t->level2_size++;
3607     }
3608
3609   index2 += t->level1[index1] << t->q;
3610
3611   if (t->level2[index2] == EMPTY)
3612     {
3613       if (t->level3_size == t->level3_alloc)
3614         {
3615           size_t alloc = 2 * t->level3_alloc + 1;
3616           t->level3 = (uint32_t *) xrealloc ((char *) t->level3,
3617                                              (alloc << t->p) * sizeof (uint32_t));
3618           t->level3_alloc = alloc;
3619         }
3620       i1 = t->level3_size << t->p;
3621       i2 = (t->level3_size + 1) << t->p;
3622       for (i = i1; i < i2; i++)
3623         t->level3[i] = 0;
3624       t->level2[index2] = t->level3_size++;
3625     }
3626
3627   index3 += t->level2[index2] << t->p;
3628
3629   t->level3[index3] |= (uint32_t)1 << index4;
3630 }
3631
3632 /* Finalize and shrink.  */
3633 static void
3634 wctype_table_finalize (struct wctype_table *t)
3635 {
3636   size_t i, j, k;
3637   uint32_t reorder3[t->level3_size];
3638   uint32_t reorder2[t->level2_size];
3639   uint32_t level1_offset, level2_offset, level3_offset;
3640
3641   /* Uniquify level3 blocks.  */
3642   k = 0;
3643   for (j = 0; j < t->level3_size; j++)
3644     {
3645       for (i = 0; i < k; i++)
3646         if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p],
3647                     (1 << t->p) * sizeof (uint32_t)) == 0)
3648           break;
3649       /* Relocate block j to block i.  */
3650       reorder3[j] = i;
3651       if (i == k)
3652         {
3653           if (i != j)
3654             memcpy (&t->level3[i << t->p], &t->level3[j << t->p],
3655                     (1 << t->p) * sizeof (uint32_t));
3656           k++;
3657         }
3658     }
3659   t->level3_size = k;
3660
3661   for (i = 0; i < (t->level2_size << t->q); i++)
3662     if (t->level2[i] != EMPTY)
3663       t->level2[i] = reorder3[t->level2[i]];
3664
3665   /* Uniquify level2 blocks.  */
3666   k = 0;
3667   for (j = 0; j < t->level2_size; j++)
3668     {
3669       for (i = 0; i < k; i++)
3670         if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q],
3671                     (1 << t->q) * sizeof (uint32_t)) == 0)
3672           break;
3673       /* Relocate block j to block i.  */
3674       reorder2[j] = i;
3675       if (i == k)
3676         {
3677           if (i != j)
3678             memcpy (&t->level2[i << t->q], &t->level2[j << t->q],
3679                     (1 << t->q) * sizeof (uint32_t));
3680           k++;
3681         }
3682     }
3683   t->level2_size = k;
3684
3685   for (i = 0; i < t->level1_size; i++)
3686     if (t->level1[i] != EMPTY)
3687       t->level1[i] = reorder2[t->level1[i]];
3688
3689   /* Create and fill the resulting compressed representation.  */
3690   t->result_size =
3691     5 * sizeof (uint32_t)
3692     + t->level1_size * sizeof (uint32_t)
3693     + (t->level2_size << t->q) * sizeof (uint32_t)
3694     + (t->level3_size << t->p) * sizeof (uint32_t);
3695   t->result = (char *) xmalloc (t->result_size);
3696
3697   level1_offset =
3698     5 * sizeof (uint32_t);
3699   level2_offset =
3700     5 * sizeof (uint32_t)
3701     + t->level1_size * sizeof (uint32_t);
3702   level3_offset =
3703     5 * sizeof (uint32_t)
3704     + t->level1_size * sizeof (uint32_t)
3705     + (t->level2_size << t->q) * sizeof (uint32_t);
3706
3707   ((uint32_t *) t->result)[0] = t->q + t->p + 5;
3708   ((uint32_t *) t->result)[1] = t->level1_size;
3709   ((uint32_t *) t->result)[2] = t->p + 5;
3710   ((uint32_t *) t->result)[3] = (1 << t->q) - 1;
3711   ((uint32_t *) t->result)[4] = (1 << t->p) - 1;
3712
3713   for (i = 0; i < t->level1_size; i++)
3714     ((uint32_t *) (t->result + level1_offset))[i] =
3715       (t->level1[i] == EMPTY
3716        ? 0
3717        : (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset);
3718
3719   for (i = 0; i < (t->level2_size << t->q); i++)
3720     ((uint32_t *) (t->result + level2_offset))[i] =
3721       (t->level2[i] == EMPTY
3722        ? 0
3723        : (t->level2[i] << t->p) * sizeof (uint32_t) + level3_offset);
3724
3725   for (i = 0; i < (t->level3_size << t->p); i++)
3726     ((uint32_t *) (t->result + level3_offset))[i] = t->level3[i];
3727
3728   if (t->level1_alloc > 0)
3729     free (t->level1);
3730   if (t->level2_alloc > 0)
3731     free (t->level2);
3732   if (t->level3_alloc > 0)
3733     free (t->level3);
3734 }
3735
3736 #define TABLE wcwidth_table
3737 #define ELEMENT uint8_t
3738 #define DEFAULT 0xff
3739 #include "3level.h"
3740
3741 #define TABLE wctrans_table
3742 #define ELEMENT int32_t
3743 #define DEFAULT 0
3744 #define wctrans_table_add wctrans_table_add_internal
3745 #include "3level.h"
3746 #undef wctrans_table_add
3747 /* The wctrans_table must actually store the difference between the
3748    desired result and the argument.  */
3749 static inline void
3750 wctrans_table_add (struct wctrans_table *t, uint32_t wc, uint32_t mapped_wc)
3751 {
3752   wctrans_table_add_internal (t, wc, mapped_wc - wc);
3753 }
3754
3755
3756 /* Flattens the included transliterations into a translit list.
3757    Inserts them in the list at `cursor', and returns the new cursor.  */
3758 static struct translit_t **
3759 translit_flatten (struct locale_ctype_t *ctype,
3760                   const struct charmap_t *charmap,
3761                   struct translit_t **cursor)
3762 {
3763   while (ctype->translit_include != NULL)
3764     {
3765       const char *copy_locale = ctype->translit_include->copy_locale;
3766       const char *copy_repertoire = ctype->translit_include->copy_repertoire;
3767       struct localedef_t *other;
3768
3769       /* Unchain the include statement.  During the depth-first traversal
3770          we don't want to visit any locale more than once.  */
3771       ctype->translit_include = ctype->translit_include->next;
3772
3773       other = find_locale (LC_CTYPE, copy_locale, copy_repertoire, charmap);
3774
3775       if (other == NULL || other->categories[LC_CTYPE].ctype == NULL)
3776         {
3777           WITH_CUR_LOCALE (error (0, 0, _("\
3778 %s: transliteration data from locale `%s' not available"),
3779                                   "LC_CTYPE", copy_locale));
3780         }
3781       else
3782         {
3783           struct locale_ctype_t *other_ctype =
3784             other->categories[LC_CTYPE].ctype;
3785
3786           cursor = translit_flatten (other_ctype, charmap, cursor);
3787           assert (other_ctype->translit_include == NULL);
3788
3789           if (other_ctype->translit != NULL)
3790             {
3791               /* Insert the other_ctype->translit list at *cursor.  */
3792               struct translit_t *endp = other_ctype->translit;
3793               while (endp->next != NULL)
3794                 endp = endp->next;
3795
3796               endp->next = *cursor;
3797               *cursor = other_ctype->translit;
3798
3799               /* Avoid any risk of circular lists.  */
3800               other_ctype->translit = NULL;
3801
3802               cursor = &endp->next;
3803             }
3804
3805           if (ctype->default_missing == NULL)
3806             ctype->default_missing = other_ctype->default_missing;
3807         }
3808     }
3809
3810   return cursor;
3811 }
3812
3813 static void
3814 allocate_arrays (struct locale_ctype_t *ctype, const struct charmap_t *charmap,
3815                  struct repertoire_t *repertoire)
3816 {
3817   size_t idx, nr;
3818   const void *key;
3819   size_t len;
3820   void *vdata;
3821   void *curs;
3822
3823   /* You wonder about this amount of memory?  This is only because some
3824      users do not manage to address the array with unsigned values or
3825      data types with range >= 256.  '\200' would result in the array
3826      index -128.  To help these poor people we duplicate the entries for
3827      128 up to 255 below the entry for \0.  */
3828   ctype->ctype_b = (char_class_t *) xcalloc (256 + 128, sizeof (char_class_t));
3829   ctype->ctype32_b = (char_class32_t *) xcalloc (256, sizeof (char_class32_t));
3830   ctype->class_b = (uint32_t **)
3831     xmalloc (ctype->nr_charclass * sizeof (uint32_t *));
3832   ctype->class_3level = (struct iovec *)
3833     xmalloc (ctype->nr_charclass * sizeof (struct iovec));
3834
3835   /* This is the array accessed using the multibyte string elements.  */
3836   for (idx = 0; idx < 256; ++idx)
3837     ctype->ctype_b[128 + idx] = ctype->class256_collection[idx];
3838
3839   /* Mirror first 127 entries.  We must take care that entry -1 is not
3840      mirrored because EOF == -1.  */
3841   for (idx = 0; idx < 127; ++idx)
3842     ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
3843
3844   /* The 32 bit array contains all characters < 0x100.  */
3845   for (idx = 0; idx < ctype->class_collection_act; ++idx)
3846     if (ctype->charnames[idx] < 0x100)
3847       ctype->ctype32_b[ctype->charnames[idx]] = ctype->class_collection[idx];
3848
3849   for (nr = 0; nr < ctype->nr_charclass; nr++)
3850     {
3851       ctype->class_b[nr] = (uint32_t *) xcalloc (256 / 32, sizeof (uint32_t));
3852
3853       /* We only set CLASS_B for the bits in the ISO C classes, not
3854          the user defined classes.  The number should not change but
3855          who knows.  */
3856 #define LAST_ISO_C_BIT 11
3857       if (nr <= LAST_ISO_C_BIT)
3858         for (idx = 0; idx < 256; ++idx)
3859           if (ctype->class256_collection[idx] & _ISbit (nr))
3860             ctype->class_b[nr][idx >> 5] |= (uint32_t) 1 << (idx & 0x1f);
3861     }
3862
3863   for (nr = 0; nr < ctype->nr_charclass; nr++)
3864     {
3865       struct wctype_table t;
3866
3867       t.p = 4; /* or: 5 */
3868       t.q = 7; /* or: 6 */
3869       wctype_table_init (&t);
3870
3871       for (idx = 0; idx < ctype->class_collection_act; ++idx)
3872         if (ctype->class_collection[idx] & _ISwbit (nr))
3873           wctype_table_add (&t, ctype->charnames[idx]);
3874
3875       wctype_table_finalize (&t);
3876
3877       if (verbose)
3878         WITH_CUR_LOCALE (fprintf (stderr, _("\
3879 %s: table for class \"%s\": %lu bytes\n"),
3880                                  "LC_CTYPE", ctype->classnames[nr],
3881                                  (unsigned long int) t.result_size));
3882
3883       ctype->class_3level[nr].iov_base = t.result;
3884       ctype->class_3level[nr].iov_len = t.result_size;
3885     }
3886
3887   /* Room for table of mappings.  */
3888   ctype->map_b = (uint32_t **) xmalloc (2 * sizeof (uint32_t *));
3889   ctype->map32_b = (uint32_t **) xmalloc (ctype->map_collection_nr
3890                                           * sizeof (uint32_t *));
3891   ctype->map_3level = (struct iovec *)
3892     xmalloc (ctype->map_collection_nr * sizeof (struct iovec));
3893
3894   /* Fill in all mappings.  */
3895   for (idx = 0; idx < 2; ++idx)
3896     {
3897       unsigned int idx2;
3898
3899       /* Allocate table.  */
3900       ctype->map_b[idx] = (uint32_t *)
3901         xmalloc ((256 + 128) * sizeof (uint32_t));
3902
3903       /* Copy values from collection.  */
3904       for (idx2 = 0; idx2 < 256; ++idx2)
3905         ctype->map_b[idx][128 + idx2] = ctype->map256_collection[idx][idx2];
3906
3907       /* Mirror first 127 entries.  We must take care not to map entry
3908          -1 because EOF == -1.  */
3909       for (idx2 = 0; idx2 < 127; ++idx2)
3910         ctype->map_b[idx][idx2] = ctype->map_b[idx][256 + idx2];
3911
3912       /* EOF must map to EOF.  */
3913       ctype->map_b[idx][127] = EOF;
3914     }
3915
3916   for (idx = 0; idx < ctype->map_collection_nr; ++idx)
3917     {
3918       unsigned int idx2;
3919
3920       /* Allocate table.  */
3921       ctype->map32_b[idx] = (uint32_t *) xmalloc (256 * sizeof (uint32_t));
3922
3923       /* Copy values from collection.  Default is identity mapping.  */
3924       for (idx2 = 0; idx2 < 256; ++idx2)
3925         ctype->map32_b[idx][idx2] =
3926           (ctype->map_collection[idx][idx2] != 0
3927            ? ctype->map_collection[idx][idx2]
3928            : idx2);
3929     }
3930
3931   for (nr = 0; nr < ctype->map_collection_nr; nr++)
3932     {
3933       struct wctrans_table t;
3934
3935       t.p = 7;
3936       t.q = 9;
3937       wctrans_table_init (&t);
3938
3939       for (idx = 0; idx < ctype->map_collection_act[nr]; ++idx)
3940         if (ctype->map_collection[nr][idx] != 0)
3941           wctrans_table_add (&t, ctype->charnames[idx],
3942                              ctype->map_collection[nr][idx]);
3943
3944       wctrans_table_finalize (&t);
3945
3946       if (verbose)
3947         WITH_CUR_LOCALE (fprintf (stderr, _("\
3948 %s: table for map \"%s\": %lu bytes\n"),
3949                                  "LC_CTYPE", ctype->mapnames[nr],
3950                                  (unsigned long int) t.result_size));
3951
3952       ctype->map_3level[nr].iov_base = t.result;
3953       ctype->map_3level[nr].iov_len = t.result_size;
3954     }
3955
3956   /* Extra array for class and map names.  */
3957   ctype->class_name_ptr = (uint32_t *) xmalloc (ctype->nr_charclass
3958                                                 * sizeof (uint32_t));
3959   ctype->map_name_ptr = (uint32_t *) xmalloc (ctype->map_collection_nr
3960                                               * sizeof (uint32_t));
3961
3962   ctype->class_offset = _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
3963   ctype->map_offset = ctype->class_offset + ctype->nr_charclass;
3964
3965   /* Array for width information.  Because the expected widths are very
3966      small (never larger than 2) we use only one single byte.  This
3967      saves space.
3968      We put only printable characters in the table.  wcwidth is specified
3969      to return -1 for non-printable characters.  Doing the check here
3970      saves a run-time check.
3971      But we put L'\0' in the table.  This again saves a run-time check.  */
3972   {
3973     struct wcwidth_table t;
3974
3975     t.p = 7;
3976     t.q = 9;
3977     wcwidth_table_init (&t);
3978
3979     /* First set all the printable characters of the character set to
3980        the default width.  */
3981     curs = NULL;
3982     while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
3983       {
3984         struct charseq *data = (struct charseq *) vdata;
3985
3986         if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
3987           data->ucs4 = repertoire_find_value (ctype->repertoire,
3988                                               data->name, len);
3989
3990         if (data->ucs4 != ILLEGAL_CHAR_VALUE)
3991           {
3992             uint32_t *class_bits =
3993               find_idx (ctype, &ctype->class_collection, NULL,
3994                         &ctype->class_collection_act, data->ucs4);
3995
3996             if (class_bits != NULL && (*class_bits & BITw (tok_print)))
3997               wcwidth_table_add (&t, data->ucs4, charmap->width_default);
3998           }
3999       }
4000
4001     /* Now add the explicitly specified widths.  */
4002     if (charmap->width_rules != NULL)
4003       {
4004         size_t cnt;
4005
4006         for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
4007           {
4008             unsigned char bytes[charmap->mb_cur_max];
4009             int nbytes = charmap->width_rules[cnt].from->nbytes;
4010
4011             /* We have the range of character for which the width is
4012                specified described using byte sequences of the multibyte
4013                charset.  We have to convert this to UCS4 now.  And we
4014                cannot simply convert the beginning and the end of the
4015                sequence, we have to iterate over the byte sequence and
4016                convert it for every single character.  */
4017             memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
4018
4019             while (nbytes < charmap->width_rules[cnt].to->nbytes
4020                    || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
4021                               nbytes) <= 0)
4022               {
4023                 /* Find the UCS value for `bytes'.  */
4024                 int inner;
4025                 uint32_t wch;
4026                 struct charseq *seq =
4027                   charmap_find_symbol (charmap, bytes, nbytes);
4028
4029                 if (seq == NULL)
4030                   wch = ILLEGAL_CHAR_VALUE;
4031                 else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
4032                   wch = seq->ucs4;
4033                 else
4034                   wch = repertoire_find_value (ctype->repertoire, seq->name,
4035                                                strlen (seq->name));
4036
4037                 if (wch != ILLEGAL_CHAR_VALUE)
4038                   {
4039                     /* Store the value.  */
4040                     uint32_t *class_bits =
4041                       find_idx (ctype, &ctype->class_collection, NULL,
4042                                 &ctype->class_collection_act, wch);
4043
4044                     if (class_bits != NULL && (*class_bits & BITw (tok_print)))
4045                       wcwidth_table_add (&t, wch,
4046                                          charmap->width_rules[cnt].width);
4047                   }
4048
4049                 /* "Increment" the bytes sequence.  */
4050                 inner = nbytes - 1;
4051                 while (inner >= 0 && bytes[inner] == 0xff)
4052                   --inner;
4053
4054                 if (inner < 0)
4055                   {
4056                     /* We have to extend the byte sequence.  */
4057                     if (nbytes >= charmap->width_rules[cnt].to->nbytes)
4058                       break;
4059
4060                     bytes[0] = 1;
4061                     memset (&bytes[1], 0, nbytes);
4062                     ++nbytes;
4063                   }
4064                 else
4065                   {
4066                     ++bytes[inner];
4067                     while (++inner < nbytes)
4068                       bytes[inner] = 0;
4069                   }
4070               }
4071           }
4072       }
4073
4074     /* Set the width of L'\0' to 0.  */
4075     wcwidth_table_add (&t, 0, 0);
4076
4077     wcwidth_table_finalize (&t);
4078
4079     if (verbose)
4080       WITH_CUR_LOCALE (fprintf (stderr, _("%s: table for width: %lu bytes\n"),
4081                                "LC_CTYPE", (unsigned long int) t.result_size));
4082
4083     ctype->width.iov_base = t.result;
4084     ctype->width.iov_len = t.result_size;
4085   }
4086
4087   /* Set MB_CUR_MAX.  */
4088   ctype->mb_cur_max = charmap->mb_cur_max;
4089
4090   /* Now determine the table for the transliteration information.
4091
4092      XXX It is not yet clear to me whether it is worth implementing a
4093      complicated algorithm which uses a hash table to locate the entries.
4094      For now I'll use a simple array which can be searching using binary
4095      search.  */
4096   if (ctype->translit_include != NULL)
4097     /* Traverse the locales mentioned in the `include' statements in a
4098        depth-first way and fold in their transliteration information.  */
4099     translit_flatten (ctype, charmap, &ctype->translit);
4100
4101   if (ctype->translit != NULL)
4102     {
4103       /* First count how many entries we have.  This is the upper limit
4104          since some entries from the included files might be overwritten.  */
4105       size_t number = 0;
4106       size_t cnt;
4107       struct translit_t *runp = ctype->translit;
4108       struct translit_t **sorted;
4109       size_t from_len, to_len;
4110
4111       while (runp != NULL)
4112         {
4113           ++number;
4114           runp = runp->next;
4115         }
4116
4117       /* Next we allocate an array large enough and fill in the values.  */
4118       sorted = (struct translit_t **) alloca (number
4119                                               * sizeof (struct translit_t **));
4120       runp = ctype->translit;
4121       number = 0;
4122       do
4123         {
4124           /* Search for the place where to insert this string.
4125              XXX Better use a real sorting algorithm later.  */
4126           size_t idx = 0;
4127           int replace = 0;
4128
4129           while (idx < number)
4130             {
4131               int res = wcscmp ((const wchar_t *) sorted[idx]->from,
4132                                 (const wchar_t *) runp->from);
4133               if (res == 0)
4134                 {
4135                   replace = 1;
4136                   break;
4137                 }
4138               if (res > 0)
4139                 break;
4140               ++idx;
4141             }
4142
4143           if (replace)
4144             sorted[idx] = runp;
4145           else
4146             {
4147               memmove (&sorted[idx + 1], &sorted[idx],
4148                        (number - idx) * sizeof (struct translit_t *));
4149               sorted[idx] = runp;
4150               ++number;
4151             }
4152
4153           runp = runp->next;
4154         }
4155       while (runp != NULL);
4156
4157       /* The next step is putting all the possible transliteration
4158          strings in one memory block so that we can write it out.
4159          We need several different blocks:
4160          - index to the from-string array
4161          - from-string array
4162          - index to the to-string array
4163          - to-string array.
4164       */
4165       from_len = to_len = 0;
4166       for (cnt = 0; cnt < number; ++cnt)
4167         {
4168           struct translit_to_t *srunp;
4169           from_len += wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
4170           srunp = sorted[cnt]->to;
4171           while (srunp != NULL)
4172             {
4173               to_len += wcslen ((const wchar_t *) srunp->str) + 1;
4174               srunp = srunp->next;
4175             }
4176           /* Plus one for the extra NUL character marking the end of
4177              the list for the current entry.  */
4178           ++to_len;
4179         }
4180
4181       /* We can allocate the arrays for the results.  */
4182       ctype->translit_from_idx = xmalloc (number * sizeof (uint32_t));
4183       ctype->translit_from_tbl = xmalloc (from_len * sizeof (uint32_t));
4184       ctype->translit_to_idx = xmalloc (number * sizeof (uint32_t));
4185       ctype->translit_to_tbl = xmalloc (to_len * sizeof (uint32_t));
4186
4187       from_len = 0;
4188       to_len = 0;
4189       for (cnt = 0; cnt < number; ++cnt)
4190         {
4191           size_t len;
4192           struct translit_to_t *srunp;
4193
4194           ctype->translit_from_idx[cnt] = from_len;
4195           ctype->translit_to_idx[cnt] = to_len;
4196
4197           len = wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
4198           wmemcpy ((wchar_t *) &ctype->translit_from_tbl[from_len],
4199                    (const wchar_t *) sorted[cnt]->from, len);
4200           from_len += len;
4201
4202           ctype->translit_to_idx[cnt] = to_len;
4203           srunp = sorted[cnt]->to;
4204           while (srunp != NULL)
4205             {
4206               len = wcslen ((const wchar_t *) srunp->str) + 1;
4207               wmemcpy ((wchar_t *) &ctype->translit_to_tbl[to_len],
4208                        (const wchar_t *) srunp->str, len);
4209               to_len += len;
4210               srunp = srunp->next;
4211             }
4212           ctype->translit_to_tbl[to_len++] = L'\0';
4213         }
4214
4215       /* Store the information about the length.  */
4216       ctype->translit_idx_size = number;
4217       ctype->translit_from_tbl_size = from_len * sizeof (uint32_t);
4218       ctype->translit_to_tbl_size = to_len * sizeof (uint32_t);
4219     }
4220   else
4221     {
4222       /* Provide some dummy pointers since we have nothing to write out.  */
4223       static uint32_t no_str = { 0 };
4224
4225       ctype->translit_from_idx = &no_str;
4226       ctype->translit_from_tbl = &no_str;
4227       ctype->translit_to_tbl = &no_str;
4228       ctype->translit_idx_size = 0;
4229       ctype->translit_from_tbl_size = 0;
4230       ctype->translit_to_tbl_size = 0;
4231     }
4232 }