locale/programs/ld-ctype.c

   1 /* Copyright (C) 1995-2005, 2006 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3    Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
   4
   5    This program is free software; you can redistribute it and/or modify
   6    it under the terms of the GNU General Public License version 2 as
   7    published by the Free Software Foundation.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program; if not, write to the Free Software Foundation,
  16    Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  17
  18 #ifdef HAVE_CONFIG_H
  19 # include <config.h>
  20 #endif
  21
  22 #include <alloca.h>
  23 #include <byteswap.h>
  24 #include <endian.h>
  25 #include <errno.h>
  26 #include <limits.h>
  27 #include <obstack.h>
  28 #include <stdlib.h>
  29 #include <string.h>
  30 #include <wchar.h>
  31 #include <wctype.h>
  32 #include <sys/uio.h>
  33
  34 #include "localedef.h"
  35 #include "charmap.h"
  36 #include "localeinfo.h"
  37 #include "langinfo.h"
  38 #include "linereader.h"
  39 #include "locfile-token.h"
  40 #include "locfile.h"
  41
  42 #include <assert.h>
  43
  44
  45 #ifdef PREDEFINED_CLASSES
  46 /* These are the extra bits not in wctype.h since these are not preallocated
  47    classes.  */
  48 # define _ISwspecial1   (1 << 29)
  49 # define _ISwspecial2   (1 << 30)
  50 # define _ISwspecial3   (1 << 31)
  51 #endif
  52
  53
  54 /* The bit used for representing a special class.  */
  55 #define BITPOS(class) ((class) - tok_upper)
  56 #define BIT(class) (_ISbit (BITPOS (class)))
  57 #define BITw(class) (_ISwbit (BITPOS (class)))
  58
  59 #define ELEM(ctype, collection, idx, value)                                   \
  60   *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx,     \
  61              &ctype->collection##_act idx, value)
  62
  63
  64 /* To be compatible with former implementations we for now restrict
  65    the number of bits for character classes to 16.  When compatibility
  66    is not necessary anymore increase the number to 32.  */
  67 #define char_class_t uint16_t
  68 #define char_class32_t uint32_t
  69
  70
  71 /* Type to describe a transliteration action.  We have a possibly
  72    multiple character from-string and a set of multiple character
  73    to-strings.  All are 32bit values since this is what is used in
  74    the gconv functions.  */
  75 struct translit_to_t
  76 {
  77   uint32_t *str;
  78
  79   struct translit_to_t *next;
  80 };
  81
  82 struct translit_t
  83 {
  84   uint32_t *from;
  85
  86   const char *fname;
  87   size_t lineno;
  88
  89   struct translit_to_t *to;
  90
  91   struct translit_t *next;
  92 };
  93
  94 struct translit_ignore_t
  95 {
  96   uint32_t from;
  97   uint32_t to;
  98   uint32_t step;
  99
 100   const char *fname;
 101   size_t lineno;
 102
 103   struct translit_ignore_t *next;
 104 };
 105
 106
 107 /* Type to describe a transliteration include statement.  */
 108 struct translit_include_t
 109 {
 110   const char *copy_locale;
 111   const char *copy_repertoire;
 112
 113   struct translit_include_t *next;
 114 };
 115
 116
 117 /* Sparse table of uint32_t.  */
 118 #define TABLE idx_table
 119 #define ELEMENT uint32_t
 120 #define DEFAULT ((uint32_t) ~0)
 121 #define NO_FINALIZE
 122 #include "3level.h"
 123
 124
 125 /* The real definition of the struct for the LC_CTYPE locale.  */
 126 struct locale_ctype_t
 127 {
 128   uint32_t *charnames;
 129   size_t charnames_max;
 130   size_t charnames_act;
 131   /* An index lookup table, to speedup find_idx.  */
 132   struct idx_table charnames_idx;
 133
 134   struct repertoire_t *repertoire;
 135
 136   /* We will allow up to 8 * sizeof (uint32_t) character classes.  */
 137 #define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
 138   size_t nr_charclass;
 139   const char *classnames[MAX_NR_CHARCLASS];
 140   uint32_t last_class_char;
 141   uint32_t class256_collection[256];
 142   uint32_t *class_collection;
 143   size_t class_collection_max;
 144   size_t class_collection_act;
 145   uint32_t class_done;
 146   uint32_t class_offset;
 147
 148   struct charseq **mbdigits;
 149   size_t mbdigits_act;
 150   size_t mbdigits_max;
 151   uint32_t *wcdigits;
 152   size_t wcdigits_act;
 153   size_t wcdigits_max;
 154
 155   struct charseq *mboutdigits[10];
 156   uint32_t wcoutdigits[10];
 157   size_t outdigits_act;
 158
 159   /* If the following number ever turns out to be too small simply
 160      increase it.  But I doubt it will.  --drepper@gnu */
 161 #define MAX_NR_CHARMAP 16
 162   const char *mapnames[MAX_NR_CHARMAP];
 163   uint32_t *map_collection[MAX_NR_CHARMAP];
 164   uint32_t map256_collection[2][256];
 165   size_t map_collection_max[MAX_NR_CHARMAP];
 166   size_t map_collection_act[MAX_NR_CHARMAP];
 167   size_t map_collection_nr;
 168   size_t last_map_idx;
 169   int tomap_done[MAX_NR_CHARMAP];
 170   uint32_t map_offset;
 171
 172   /* Transliteration information.  */
 173   struct translit_include_t *translit_include;
 174   struct translit_t *translit;
 175   struct translit_ignore_t *translit_ignore;
 176   uint32_t ntranslit_ignore;
 177
 178   uint32_t *default_missing;
 179   const char *default_missing_file;
 180   size_t default_missing_lineno;
 181
 182   uint32_t to_nonascii;
 183
 184   /* The arrays for the binary representation.  */
 185   char_class_t *ctype_b;
 186   char_class32_t *ctype32_b;
 187   uint32_t **map_b;
 188   uint32_t **map32_b;
 189   uint32_t **class_b;
 190   struct iovec *class_3level;
 191   struct iovec *map_3level;
 192   uint32_t *class_name_ptr;
 193   uint32_t *map_name_ptr;
 194   struct iovec width;
 195   uint32_t mb_cur_max;
 196   const char *codeset_name;
 197   uint32_t *translit_from_idx;
 198   uint32_t *translit_from_tbl;
 199   uint32_t *translit_to_idx;
 200   uint32_t *translit_to_tbl;
 201   uint32_t translit_idx_size;
 202   size_t translit_from_tbl_size;
 203   size_t translit_to_tbl_size;
 204
 205   struct obstack mempool;
 206 };
 207
 208
 209 /* Marker for an empty slot.  This has the value 0xFFFFFFFF, regardless
 210    whether 'int' is 16 bit, 32 bit, or 64 bit.  */
 211 #define EMPTY ((uint32_t) ~0)
 212
 213
 214 #define obstack_chunk_alloc xmalloc
 215 #define obstack_chunk_free free
 216
 217
 218 /* Prototypes for local functions.  */
 219 static void ctype_startup (struct linereader *lr, struct localedef_t *locale,
 220                            const struct charmap_t *charmap,
 221                            struct localedef_t *copy_locale,
 222                            int ignore_content);
 223 static void ctype_class_new (struct linereader *lr,
 224                              struct locale_ctype_t *ctype, const char *name);
 225 static void ctype_map_new (struct linereader *lr,
 226                            struct locale_ctype_t *ctype,
 227                            const char *name, const struct charmap_t *charmap);
 228 static uint32_t *find_idx (struct locale_ctype_t *ctype, uint32_t **table,
 229                            size_t *max, size_t *act, unsigned int idx);
 230 static void set_class_defaults (struct locale_ctype_t *ctype,
 231                                 const struct charmap_t *charmap,
 232                                 struct repertoire_t *repertoire);
 233 static void allocate_arrays (struct locale_ctype_t *ctype,
 234                              const struct charmap_t *charmap,
 235                              struct repertoire_t *repertoire);
 236
 237
 238 static const char *longnames[] =
 239 {
 240   "zero", "one", "two", "three", "four",
 241   "five", "six", "seven", "eight", "nine"
 242 };
 243 static const char *uninames[] =
 244 {
 245   "U00000030", "U00000031", "U00000032", "U00000033", "U00000034",
 246   "U00000035", "U00000036", "U00000037", "U00000038", "U00000039"
 247 };
 248 static const unsigned char digits[] = "0123456789";
 249
 250
 251 static void
 252 ctype_startup (struct linereader *lr, struct localedef_t *locale,
 253                const struct charmap_t *charmap,
 254                struct localedef_t *copy_locale, int ignore_content)
 255 {
 256   unsigned int cnt;
 257   struct locale_ctype_t *ctype;
 258
 259   if (!ignore_content && locale->categories[LC_CTYPE].ctype == NULL)
 260     {
 261       if (copy_locale == NULL)
 262         {
 263           /* Allocate the needed room.  */
 264           locale->categories[LC_CTYPE].ctype = ctype =
 265             (struct locale_ctype_t *) xcalloc (1,
 266                                                sizeof (struct locale_ctype_t));
 267
 268           /* We have seen no names yet.  */
 269           ctype->charnames_max = charmap->mb_cur_max == 1 ? 256 : 512;
 270           ctype->charnames =
 271             (unsigned int *) xmalloc (ctype->charnames_max
 272                                       * sizeof (unsigned int));
 273           for (cnt = 0; cnt < 256; ++cnt)
 274             ctype->charnames[cnt] = cnt;
 275           ctype->charnames_act = 256;
 276           idx_table_init (&ctype->charnames_idx);
 277
 278           /* Fill character class information.  */
 279           ctype->last_class_char = ILLEGAL_CHAR_VALUE;
 280           /* The order of the following instructions determines the bit
 281              positions!  */
 282           ctype_class_new (lr, ctype, "upper");
 283           ctype_class_new (lr, ctype, "lower");
 284           ctype_class_new (lr, ctype, "alpha");
 285           ctype_class_new (lr, ctype, "digit");
 286           ctype_class_new (lr, ctype, "xdigit");
 287           ctype_class_new (lr, ctype, "space");
 288           ctype_class_new (lr, ctype, "print");
 289           ctype_class_new (lr, ctype, "graph");
 290           ctype_class_new (lr, ctype, "blank");
 291           ctype_class_new (lr, ctype, "cntrl");
 292           ctype_class_new (lr, ctype, "punct");
 293           ctype_class_new (lr, ctype, "alnum");
 294 #ifdef PREDEFINED_CLASSES
 295           /* The following are extensions from ISO 14652.  */
 296           ctype_class_new (lr, ctype, "left_to_right");
 297           ctype_class_new (lr, ctype, "right_to_left");
 298           ctype_class_new (lr, ctype, "num_terminator");
 299           ctype_class_new (lr, ctype, "num_separator");
 300           ctype_class_new (lr, ctype, "segment_separator");
 301           ctype_class_new (lr, ctype, "block_separator");
 302           ctype_class_new (lr, ctype, "direction_control");
 303           ctype_class_new (lr, ctype, "sym_swap_layout");
 304           ctype_class_new (lr, ctype, "char_shape_selector");
 305           ctype_class_new (lr, ctype, "num_shape_selector");
 306           ctype_class_new (lr, ctype, "non_spacing");
 307           ctype_class_new (lr, ctype, "non_spacing_level3");
 308           ctype_class_new (lr, ctype, "normal_connect");
 309           ctype_class_new (lr, ctype, "r_connect");
 310           ctype_class_new (lr, ctype, "no_connect");
 311           ctype_class_new (lr, ctype, "no_connect-space");
 312           ctype_class_new (lr, ctype, "vowel_connect");
 313 #endif
 314
 315           ctype->class_collection_max = charmap->mb_cur_max == 1 ? 256 : 512;
 316           ctype->class_collection
 317             = (uint32_t *) xcalloc (sizeof (unsigned long int),
 318                                     ctype->class_collection_max);
 319           ctype->class_collection_act = 256;
 320
 321           /* Fill character map information.  */
 322           ctype->last_map_idx = MAX_NR_CHARMAP;
 323           ctype_map_new (lr, ctype, "toupper", charmap);
 324           ctype_map_new (lr, ctype, "tolower", charmap);
 325 #ifdef PREDEFINED_CLASSES
 326           ctype_map_new (lr, ctype, "tosymmetric", charmap);
 327 #endif
 328
 329           /* Fill first 256 entries in `toXXX' arrays.  */
 330           for (cnt = 0; cnt < 256; ++cnt)
 331             {
 332               ctype->map_collection[0][cnt] = cnt;
 333               ctype->map_collection[1][cnt] = cnt;
 334 #ifdef PREDEFINED_CLASSES
 335               ctype->map_collection[2][cnt] = cnt;
 336 #endif
 337               ctype->map256_collection[0][cnt] = cnt;
 338               ctype->map256_collection[1][cnt] = cnt;
 339             }
 340
 341           if (enc_not_ascii_compatible)
 342             ctype->to_nonascii = 1;
 343
 344           obstack_init (&ctype->mempool);
 345         }
 346       else
 347         ctype = locale->categories[LC_CTYPE].ctype =
 348           copy_locale->categories[LC_CTYPE].ctype;
 349     }
 350 }
 351
 352
 353 void
 354 ctype_finish (struct localedef_t *locale, const struct charmap_t *charmap)
 355 {
 356   /* See POSIX.2, table 2-6 for the meaning of the following table.  */
 357 #define NCLASS 12
 358   static const struct
 359   {
 360     const char *name;
 361     const char allow[NCLASS];
 362   }
 363   valid_table[NCLASS] =
 364   {
 365     /* The order is important.  See token.h for more information.
 366        M = Always, D = Default, - = Permitted, X = Mutually exclusive  */
 367     { "upper",  "--MX-XDDXXX-" },
 368     { "lower",  "--MX-XDDXXX-" },
 369     { "alpha",  "---X-XDDXXX-" },
 370     { "digit",  "XXX--XDDXXX-" },
 371     { "xdigit", "-----XDDXXX-" },
 372     { "space",  "XXXXX------X" },
 373     { "print",  "---------X--" },
 374     { "graph",  "---------X--" },
 375     { "blank",  "XXXXXM-----X" },
 376     { "cntrl",  "XXXXX-XX--XX" },
 377     { "punct",  "XXXXX-DD-X-X" },
 378     { "alnum",  "-----XDDXXX-" }
 379   };
 380   size_t cnt;
 381   int cls1, cls2;
 382   uint32_t space_value;
 383   struct charseq *space_seq;
 384   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 385   int warned;
 386   const void *key;
 387   size_t len;
 388   void *vdata;
 389   void *curs;
 390
 391   /* Now resolve copying and also handle completely missing definitions.  */
 392   if (ctype == NULL)
 393     {
 394       const char *repertoire_name;
 395
 396       /* First see whether we were supposed to copy.  If yes, find the
 397          actual definition.  */
 398       if (locale->copy_name[LC_CTYPE] != NULL)
 399         {
 400           /* Find the copying locale.  This has to happen transitively since
 401              the locale we are copying from might also copying another one.  */
 402           struct localedef_t *from = locale;
 403
 404           do
 405             from = find_locale (LC_CTYPE, from->copy_name[LC_CTYPE],
 406                                 from->repertoire_name, charmap);
 407           while (from->categories[LC_CTYPE].ctype == NULL
 408                  && from->copy_name[LC_CTYPE] != NULL);
 409
 410           ctype = locale->categories[LC_CTYPE].ctype
 411             = from->categories[LC_CTYPE].ctype;
 412         }
 413
 414       /* If there is still no definition issue an warning and create an
 415          empty one.  */
 416       if (ctype == NULL)
 417         {
 418           if (! be_quiet)
 419             WITH_CUR_LOCALE (error (0, 0, _("\
 420 No definition for %s category found"), "LC_CTYPE"));
 421           ctype_startup (NULL, locale, charmap, NULL, 0);
 422           ctype = locale->categories[LC_CTYPE].ctype;
 423         }
 424
 425       /* Get the repertoire we have to use.  */
 426       repertoire_name = locale->repertoire_name ?: repertoire_global;
 427       if (repertoire_name != NULL)
 428         ctype->repertoire = repertoire_read (repertoire_name);
 429     }
 430
 431   /* We need the name of the currently used 8-bit character set to
 432      make correct conversion between this 8-bit representation and the
 433      ISO 10646 character set used internally for wide characters.  */
 434   ctype->codeset_name = charmap->code_set_name;
 435   if (ctype->codeset_name == NULL)
 436     {
 437       if (! be_quiet)
 438         WITH_CUR_LOCALE (error (0, 0, _("\
 439 No character set name specified in charmap")));
 440       ctype->codeset_name = "//UNKNOWN//";
 441     }
 442
 443   /* Set default value for classes not specified.  */
 444   set_class_defaults (ctype, charmap, ctype->repertoire);
 445
 446   /* Check according to table.  */
 447   for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
 448     {
 449       uint32_t tmp = ctype->class_collection[cnt];
 450
 451       if (tmp != 0)
 452         {
 453           for (cls1 = 0; cls1 < NCLASS; ++cls1)
 454             if ((tmp & _ISwbit (cls1)) != 0)
 455               for (cls2 = 0; cls2 < NCLASS; ++cls2)
 456                 if (valid_table[cls1].allow[cls2] != '-')
 457                   {
 458                     int eq = (tmp & _ISwbit (cls2)) != 0;
 459                     switch (valid_table[cls1].allow[cls2])
 460                       {
 461                       case 'M':
 462                         if (!eq)
 463                           {
 464                             uint32_t value = ctype->charnames[cnt];
 465
 466                             if (!be_quiet)
 467                               WITH_CUR_LOCALE (error (0, 0, _("\
 468 character L'\\u%0*x' in class `%s' must be in class `%s'"),
 469                                                       value > 0xffff ? 8 : 4,
 470                                                       value,
 471                                                       valid_table[cls1].name,
 472                                                       valid_table[cls2].name));
 473                           }
 474                         break;
 475
 476                       case 'X':
 477                         if (eq)
 478                           {
 479                             uint32_t value = ctype->charnames[cnt];
 480
 481                             if (!be_quiet)
 482                               WITH_CUR_LOCALE (error (0, 0, _("\
 483 character L'\\u%0*x' in class `%s' must not be in class `%s'"),
 484                                                       value > 0xffff ? 8 : 4,
 485                                                       value,
 486                                                       valid_table[cls1].name,
 487                                                       valid_table[cls2].name));
 488                           }
 489                         break;
 490
 491                       case 'D':
 492                         ctype->class_collection[cnt] |= _ISwbit (cls2);
 493                         break;
 494
 495                       default:
 496                         WITH_CUR_LOCALE (error (5, 0, _("\
 497 internal error in %s, line %u"), __FUNCTION__, __LINE__));
 498                       }
 499                   }
 500         }
 501     }
 502
 503   for (cnt = 0; cnt < 256; ++cnt)
 504     {
 505       uint32_t tmp = ctype->class256_collection[cnt];
 506
 507       if (tmp != 0)
 508         {
 509           for (cls1 = 0; cls1 < NCLASS; ++cls1)
 510             if ((tmp & _ISbit (cls1)) != 0)
 511               for (cls2 = 0; cls2 < NCLASS; ++cls2)
 512                 if (valid_table[cls1].allow[cls2] != '-')
 513                   {
 514                     int eq = (tmp & _ISbit (cls2)) != 0;
 515                     switch (valid_table[cls1].allow[cls2])
 516                       {
 517                       case 'M':
 518                         if (!eq)
 519                           {
 520                             char buf[17];
 521
 522                             snprintf (buf, sizeof buf, "\\%Zo", cnt);
 523
 524                             if (!be_quiet)
 525                               WITH_CUR_LOCALE (error (0, 0, _("\
 526 character '%s' in class `%s' must be in class `%s'"),
 527                                                       buf,
 528                                                       valid_table[cls1].name,
 529                                                       valid_table[cls2].name));
 530                           }
 531                         break;
 532
 533                       case 'X':
 534                         if (eq)
 535                           {
 536                             char buf[17];
 537
 538                             snprintf (buf, sizeof buf, "\\%Zo", cnt);
 539
 540                             if (!be_quiet)
 541                               WITH_CUR_LOCALE (error (0, 0, _("\
 542 character '%s' in class `%s' must not be in class `%s'"),
 543                                                       buf,
 544                                                       valid_table[cls1].name,
 545                                                       valid_table[cls2].name));
 546                           }
 547                         break;
 548
 549                       case 'D':
 550                         ctype->class256_collection[cnt] |= _ISbit (cls2);
 551                         break;
 552
 553                       default:
 554                         WITH_CUR_LOCALE (error (5, 0, _("\
 555 internal error in %s, line %u"), __FUNCTION__, __LINE__));
 556                       }
 557                   }
 558         }
 559     }
 560
 561   /* ... and now test <SP> as a special case.  */
 562   space_value = 32;
 563   if (((cnt = BITPOS (tok_space),
 564         (ELEM (ctype, class_collection, , space_value)
 565          & BITw (tok_space)) == 0)
 566        || (cnt = BITPOS (tok_blank),
 567            (ELEM (ctype, class_collection, , space_value)
 568             & BITw (tok_blank)) == 0)))
 569     {
 570       if (!be_quiet)
 571         WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"),
 572                                 valid_table[cnt].name));
 573     }
 574   else if (((cnt = BITPOS (tok_punct),
 575              (ELEM (ctype, class_collection, , space_value)
 576               & BITw (tok_punct)) != 0)
 577             || (cnt = BITPOS (tok_graph),
 578                 (ELEM (ctype, class_collection, , space_value)
 579                  & BITw (tok_graph))
 580                 != 0)))
 581     {
 582       if (!be_quiet)
 583         WITH_CUR_LOCALE (error (0, 0, _("\
 584 <SP> character must not be in class `%s'"),
 585                                 valid_table[cnt].name));
 586     }
 587   else
 588     ELEM (ctype, class_collection, , space_value) |= BITw (tok_print);
 589
 590   space_seq = charmap_find_value (charmap, "SP", 2);
 591   if (space_seq == NULL)
 592     space_seq = charmap_find_value (charmap, "space", 5);
 593   if (space_seq == NULL)
 594     space_seq = charmap_find_value (charmap, "U00000020", 9);
 595   if (space_seq == NULL || space_seq->nbytes != 1)
 596     {
 597       if (!be_quiet)
 598         WITH_CUR_LOCALE (error (0, 0, _("\
 599 character <SP> not defined in character map")));
 600     }
 601   else if (((cnt = BITPOS (tok_space),
 602              (ctype->class256_collection[space_seq->bytes[0]]
 603               & BIT (tok_space)) == 0)
 604             || (cnt = BITPOS (tok_blank),
 605                 (ctype->class256_collection[space_seq->bytes[0]]
 606                  & BIT (tok_blank)) == 0)))
 607     {
 608       if (!be_quiet)
 609         WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"),
 610                                 valid_table[cnt].name));
 611     }
 612   else if (((cnt = BITPOS (tok_punct),
 613              (ctype->class256_collection[space_seq->bytes[0]]
 614               & BIT (tok_punct)) != 0)
 615             || (cnt = BITPOS (tok_graph),
 616                 (ctype->class256_collection[space_seq->bytes[0]]
 617                  & BIT (tok_graph)) != 0)))
 618     {
 619       if (!be_quiet)
 620         WITH_CUR_LOCALE (error (0, 0, _("\
 621 <SP> character must not be in class `%s'"),
 622                                 valid_table[cnt].name));
 623     }
 624   else
 625     ctype->class256_collection[space_seq->bytes[0]] |= BIT (tok_print);
 626
 627   /* Now that the tests are done make sure the name array contains all
 628      characters which are handled in the WIDTH section of the
 629      character set definition file.  */
 630   if (charmap->width_rules != NULL)
 631     for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
 632       {
 633         unsigned char bytes[charmap->mb_cur_max];
 634         int nbytes = charmap->width_rules[cnt].from->nbytes;
 635
 636         /* We have the range of character for which the width is
 637            specified described using byte sequences of the multibyte
 638            charset.  We have to convert this to UCS4 now.  And we
 639            cannot simply convert the beginning and the end of the
 640            sequence, we have to iterate over the byte sequence and
 641            convert it for every single character.  */
 642         memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
 643
 644         while (nbytes < charmap->width_rules[cnt].to->nbytes
 645                || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
 646                           nbytes) <= 0)
 647           {
 648             /* Find the UCS value for `bytes'.  */
 649             int inner;
 650             uint32_t wch;
 651             struct charseq *seq = charmap_find_symbol (charmap, bytes, nbytes);
 652
 653             if (seq == NULL)
 654               wch = ILLEGAL_CHAR_VALUE;
 655             else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
 656               wch = seq->ucs4;
 657             else
 658               wch = repertoire_find_value (ctype->repertoire, seq->name,
 659                                            strlen (seq->name));
 660
 661             if (wch != ILLEGAL_CHAR_VALUE)
 662               /* We are only interested in the side-effects of the
 663                  `find_idx' call.  It will add appropriate entries in
 664                  the name array if this is necessary.  */
 665               (void) find_idx (ctype, NULL, NULL, NULL, wch);
 666
 667             /* "Increment" the bytes sequence.  */
 668             inner = nbytes - 1;
 669             while (inner >= 0 && bytes[inner] == 0xff)
 670               --inner;
 671
 672             if (inner < 0)
 673               {
 674                 /* We have to extend the byte sequence.  */
 675                 if (nbytes >= charmap->width_rules[cnt].to->nbytes)
 676                   break;
 677
 678                 bytes[0] = 1;
 679                 memset (&bytes[1], 0, nbytes);
 680                 ++nbytes;
 681               }
 682             else
 683               {
 684                 ++bytes[inner];
 685                 while (++inner < nbytes)
 686                   bytes[inner] = 0;
 687               }
 688           }
 689       }
 690
 691   /* Now set all the other characters of the character set to the
 692      default width.  */
 693   curs = NULL;
 694   while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
 695     {
 696       struct charseq *data = (struct charseq *) vdata;
 697
 698       if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
 699         data->ucs4 = repertoire_find_value (ctype->repertoire,
 700                                             data->name, len);
 701
 702       if (data->ucs4 != ILLEGAL_CHAR_VALUE)
 703         (void) find_idx (ctype, NULL, NULL, NULL, data->ucs4);
 704     }
 705
 706   /* There must be a multiple of 10 digits.  */
 707   if (ctype->mbdigits_act % 10 != 0)
 708     {
 709       assert (ctype->mbdigits_act == ctype->wcdigits_act);
 710       ctype->wcdigits_act -= ctype->mbdigits_act % 10;
 711       ctype->mbdigits_act -= ctype->mbdigits_act % 10;
 712       WITH_CUR_LOCALE (error (0, 0, _("\
 713 `digit' category has not entries in groups of ten")));
 714     }
 715
 716   /* Check the input digits.  There must be a multiple of ten available.
 717      In each group it could be that one or the other character is missing.
 718      In this case the whole group must be removed.  */
 719   cnt = 0;
 720   while (cnt < ctype->mbdigits_act)
 721     {
 722       size_t inner;
 723       for (inner = 0; inner < 10; ++inner)
 724         if (ctype->mbdigits[cnt + inner] == NULL)
 725           break;
 726
 727       if (inner == 10)
 728         cnt += 10;
 729       else
 730         {
 731           /* Remove the group.  */
 732           memmove (&ctype->mbdigits[cnt], &ctype->mbdigits[cnt + 10],
 733                    ((ctype->wcdigits_act - cnt - 10)
 734                     * sizeof (ctype->mbdigits[0])));
 735           ctype->mbdigits_act -= 10;
 736         }
 737     }
 738
 739   /* If no input digits are given use the default.  */
 740   if (ctype->mbdigits_act == 0)
 741     {
 742       if (ctype->mbdigits_max == 0)
 743         {
 744           ctype->mbdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
 745                                            10 * sizeof (struct charseq *));
 746           ctype->mbdigits_max = 10;
 747         }
 748
 749       for (cnt = 0; cnt < 10; ++cnt)
 750         {
 751           ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
 752                                                       digits + cnt, 1);
 753           if (ctype->mbdigits[cnt] == NULL)
 754             {
 755               ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
 756                                                           longnames[cnt],
 757                                                           strlen (longnames[cnt]));
 758               if (ctype->mbdigits[cnt] == NULL)
 759                 {
 760                   /* Hum, this ain't good.  */
 761                   WITH_CUR_LOCALE (error (0, 0, _("\
 762 no input digits defined and none of the standard names in the charmap")));
 763
 764                   ctype->mbdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
 765                                                         sizeof (struct charseq) + 1);
 766
 767                   /* This is better than nothing.  */
 768                   ctype->mbdigits[cnt]->bytes[0] = digits[cnt];
 769                   ctype->mbdigits[cnt]->nbytes = 1;
 770                 }
 771             }
 772         }
 773
 774       ctype->mbdigits_act = 10;
 775     }
 776
 777   /* Check the wide character input digits.  There must be a multiple
 778      of ten available.  In each group it could be that one or the other
 779      character is missing.  In this case the whole group must be
 780      removed.  */
 781   cnt = 0;
 782   while (cnt < ctype->wcdigits_act)
 783     {
 784       size_t inner;
 785       for (inner = 0; inner < 10; ++inner)
 786         if (ctype->wcdigits[cnt + inner] == ILLEGAL_CHAR_VALUE)
 787           break;
 788
 789       if (inner == 10)
 790         cnt += 10;
 791       else
 792         {
 793           /* Remove the group.  */
 794           memmove (&ctype->wcdigits[cnt], &ctype->wcdigits[cnt + 10],
 795                    ((ctype->wcdigits_act - cnt - 10)
 796                     * sizeof (ctype->wcdigits[0])));
 797           ctype->wcdigits_act -= 10;
 798         }
 799     }
 800
 801   /* If no input digits are given use the default.  */
 802   if (ctype->wcdigits_act == 0)
 803     {
 804       if (ctype->wcdigits_max == 0)
 805         {
 806           ctype->wcdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
 807                                            10 * sizeof (uint32_t));
 808           ctype->wcdigits_max = 10;
 809         }
 810
 811       for (cnt = 0; cnt < 10; ++cnt)
 812         ctype->wcdigits[cnt] = L'0' + cnt;
 813
 814       ctype->mbdigits_act = 10;
 815     }
 816
 817   /* Check the outdigits.  */
 818   warned = 0;
 819   for (cnt = 0; cnt < 10; ++cnt)
 820     if (ctype->mboutdigits[cnt] == NULL)
 821       {
 822         static struct charseq replace[2];
 823
 824         if (!warned)
 825           {
 826             WITH_CUR_LOCALE (error (0, 0, _("\
 827 not all characters used in `outdigit' are available in the charmap")));
 828             warned = 1;
 829           }
 830
 831         replace[0].nbytes = 1;
 832         replace[0].bytes[0] = '?';
 833         replace[0].bytes[1] = '\0';
 834         ctype->mboutdigits[cnt] = &replace[0];
 835       }
 836
 837   warned = 0;
 838   for (cnt = 0; cnt < 10; ++cnt)
 839     if (ctype->wcoutdigits[cnt] == 0)
 840       {
 841         if (!warned)
 842           {
 843             WITH_CUR_LOCALE (error (0, 0, _("\
 844 not all characters used in `outdigit' are available in the repertoire")));
 845             warned = 1;
 846           }
 847
 848         ctype->wcoutdigits[cnt] = L'?';
 849       }
 850
 851   /* Sort the entries in the translit_ignore list.  */
 852   if (ctype->translit_ignore != NULL)
 853     {
 854       struct translit_ignore_t *firstp = ctype->translit_ignore;
 855       struct translit_ignore_t *runp;
 856
 857       ctype->ntranslit_ignore = 1;
 858
 859       for (runp = firstp->next; runp != NULL; runp = runp->next)
 860         {
 861           struct translit_ignore_t *lastp = NULL;
 862           struct translit_ignore_t *cmpp;
 863
 864           ++ctype->ntranslit_ignore;
 865
 866           for (cmpp = firstp; cmpp != NULL; lastp = cmpp, cmpp = cmpp->next)
 867             if (runp->from < cmpp->from)
 868               break;
 869
 870           runp->next = lastp;
 871           if (lastp == NULL)
 872             firstp = runp;
 873         }
 874
 875       ctype->translit_ignore = firstp;
 876     }
 877 }
 878
 879
 880 void
 881 ctype_output (struct localedef_t *locale, const struct charmap_t *charmap,
 882               const char *output_path)
 883 {
 884   static const char nulbytes[4] = { 0, 0, 0, 0 };
 885   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 886   const size_t nelems = (_NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1)
 887                          + ctype->nr_charclass + ctype->map_collection_nr);
 888   struct iovec *iov = alloca (sizeof *iov
 889                               * (2 + nelems + 2 * ctype->nr_charclass
 890                                  + ctype->map_collection_nr + 4));
 891   struct locale_file data;
 892   uint32_t *idx = alloca (sizeof *idx * (nelems + 1));
 893   uint32_t default_missing_len;
 894   size_t elem, cnt, offset, total;
 895   char *cp;
 896
 897   /* Now prepare the output: Find the sizes of the table we can use.  */
 898   allocate_arrays (ctype, charmap, ctype->repertoire);
 899
 900   data.magic = LIMAGIC (LC_CTYPE);
 901   data.n = nelems;
 902   iov[0].iov_base = (void *) &data;
 903   iov[0].iov_len = sizeof (data);
 904
 905   iov[1].iov_base = (void *) idx;
 906   iov[1].iov_len = nelems * sizeof (uint32_t);
 907
 908   idx[0] = iov[0].iov_len + iov[1].iov_len;
 909   offset = 0;
 910
 911   for (elem = 0; elem < nelems; ++elem)
 912     {
 913       if (elem < _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1))
 914         switch (elem)
 915           {
 916 #define CTYPE_EMPTY(name) \
 917           case name:                                                          \
 918             iov[2 + elem + offset].iov_base = NULL;                           \
 919             iov[2 + elem + offset].iov_len = 0;                               \
 920             idx[elem + 1] = idx[elem];                                        \
 921             break
 922
 923           CTYPE_EMPTY(_NL_CTYPE_GAP1);
 924           CTYPE_EMPTY(_NL_CTYPE_GAP2);
 925           CTYPE_EMPTY(_NL_CTYPE_GAP3);
 926           CTYPE_EMPTY(_NL_CTYPE_GAP4);
 927           CTYPE_EMPTY(_NL_CTYPE_GAP5);
 928           CTYPE_EMPTY(_NL_CTYPE_GAP6);
 929
 930 #define CTYPE_DATA(name, base, len)                                           \
 931           case _NL_ITEM_INDEX (name):                                         \
 932             iov[2 + elem + offset].iov_base = (base);                         \
 933             iov[2 + elem + offset].iov_len = (len);                           \
 934             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;       \
 935             break
 936
 937           CTYPE_DATA (_NL_CTYPE_CLASS,
 938                       ctype->ctype_b,
 939                       (256 + 128) * sizeof (char_class_t));
 940
 941           CTYPE_DATA (_NL_CTYPE_TOUPPER,
 942                       ctype->map_b[0],
 943                       (256 + 128) * sizeof (uint32_t));
 944           CTYPE_DATA (_NL_CTYPE_TOLOWER,
 945                       ctype->map_b[1],
 946                       (256 + 128) * sizeof (uint32_t));
 947
 948           CTYPE_DATA (_NL_CTYPE_TOUPPER32,
 949                       ctype->map32_b[0],
 950                       256 * sizeof (uint32_t));
 951           CTYPE_DATA (_NL_CTYPE_TOLOWER32,
 952                       ctype->map32_b[1],
 953                       256 * sizeof (uint32_t));
 954
 955           CTYPE_DATA (_NL_CTYPE_CLASS32,
 956                       ctype->ctype32_b,
 957                       256 * sizeof (char_class32_t));
 958
 959           CTYPE_DATA (_NL_CTYPE_CLASS_OFFSET,
 960                       &ctype->class_offset, sizeof (uint32_t));
 961
 962           CTYPE_DATA (_NL_CTYPE_MAP_OFFSET,
 963                       &ctype->map_offset, sizeof (uint32_t));
 964
 965           CTYPE_DATA (_NL_CTYPE_TRANSLIT_TAB_SIZE,
 966                       &ctype->translit_idx_size, sizeof (uint32_t));
 967
 968           CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_IDX,
 969                       ctype->translit_from_idx,
 970                       ctype->translit_idx_size * sizeof (uint32_t));
 971
 972           CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_TBL,
 973                       ctype->translit_from_tbl,
 974                       ctype->translit_from_tbl_size);
 975
 976           CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_IDX,
 977                       ctype->translit_to_idx,
 978                       ctype->translit_idx_size * sizeof (uint32_t));
 979
 980           CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_TBL,
 981                       ctype->translit_to_tbl, ctype->translit_to_tbl_size);
 982
 983           case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
 984             /* The class name array.  */
 985             total = 0;
 986             for (cnt = 0; cnt < ctype->nr_charclass; ++cnt, ++offset)
 987               {
 988                 iov[2 + elem + offset].iov_base
 989                   = (void *) ctype->classnames[cnt];
 990                 iov[2 + elem + offset].iov_len
 991                   = strlen (ctype->classnames[cnt]) + 1;
 992                 total += iov[2 + elem + offset].iov_len;
 993               }
 994             iov[2 + elem + offset].iov_base = (void *) nulbytes;
 995             iov[2 + elem + offset].iov_len = 4 - (total % 4);
 996             total += 4 - (total % 4);
 997
 998             idx[elem + 1] = idx[elem] + total;
 999             break;
1000
1001           case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
1002             /* The class name array.  */
1003             total = 0;
1004             for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt, ++offset)
1005               {
1006                 iov[2 + elem + offset].iov_base
1007                   = (void *) ctype->mapnames[cnt];
1008                 iov[2 + elem + offset].iov_len
1009                   = strlen (ctype->mapnames[cnt]) + 1;
1010                 total += iov[2 + elem + offset].iov_len;
1011               }
1012             iov[2 + elem + offset].iov_base = (void *) nulbytes;
1013             iov[2 + elem + offset].iov_len = 4 - (total % 4);
1014             total += 4 - (total % 4);
1015
1016             idx[elem + 1] = idx[elem] + total;
1017             break;
1018
1019           CTYPE_DATA (_NL_CTYPE_WIDTH,
1020                       ctype->width.iov_base,
1021                       ctype->width.iov_len);
1022
1023           CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX,
1024                       &ctype->mb_cur_max, sizeof (uint32_t));
1025
1026           case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
1027             total = strlen (ctype->codeset_name) + 1;
1028             if (total % 4 == 0)
1029               iov[2 + elem + offset].iov_base = (char *) ctype->codeset_name;
1030             else
1031               {
1032                 iov[2 + elem + offset].iov_base = alloca ((total + 3) & ~3);
1033                 memset (mempcpy (iov[2 + elem + offset].iov_base,
1034                                  ctype->codeset_name, total),
1035                         '\0', 4 - (total & 3));
1036                 total = (total + 3) & ~3;
1037               }
1038             iov[2 + elem + offset].iov_len = total;
1039             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1040             break;
1041
1042
1043           CTYPE_DATA (_NL_CTYPE_MAP_TO_NONASCII,
1044                       &ctype->to_nonascii, sizeof (uint32_t));
1045
1046           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN):
1047             iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
1048             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1049             *(uint32_t *) iov[2 + elem + offset].iov_base =
1050               ctype->mbdigits_act / 10;
1051             idx[elem + 1] = idx[elem] + sizeof (uint32_t);
1052             break;
1053
1054           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN):
1055             /* Align entries.  */
1056             iov[2 + elem + offset].iov_base = (void *) nulbytes;
1057             iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1058             idx[elem] += iov[2 + elem + offset].iov_len;
1059             ++offset;
1060
1061             iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
1062             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1063             *(uint32_t *) iov[2 + elem + offset].iov_base =
1064               ctype->wcdigits_act / 10;
1065             idx[elem + 1] = idx[elem] + sizeof (uint32_t);
1066             break;
1067
1068           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB):
1069             /* Compute the length of all possible characters.  For INDIGITS
1070                there might be more than one.  We simply concatenate all of
1071                them with a NUL byte following.  The NUL byte wouldn't be
1072                necessary but it makes it easier for the user.  */
1073             total = 0;
1074
1075             for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
1076                  cnt < ctype->mbdigits_act; cnt += 10)
1077               total += ctype->mbdigits[cnt]->nbytes + 1;
1078             iov[2 + elem + offset].iov_base = (char *) alloca (total);
1079             iov[2 + elem + offset].iov_len = total;
1080
1081             cp = iov[2 + elem + offset].iov_base;
1082             for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
1083                  cnt < ctype->mbdigits_act; cnt += 10)
1084               {
1085                 cp = mempcpy (cp, ctype->mbdigits[cnt]->bytes,
1086                               ctype->mbdigits[cnt]->nbytes);
1087                 *cp++ = '\0';
1088               }
1089             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1090             break;
1091
1092           case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB):
1093             /* Compute the length of all possible characters.  For INDIGITS
1094                there might be more than one.  We simply concatenate all of
1095                them with a NUL byte following.  The NUL byte wouldn't be
1096                necessary but it makes it easier for the user.  */
1097             cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB);
1098             total = ctype->mboutdigits[cnt]->nbytes + 1;
1099             iov[2 + elem + offset].iov_base = (char *) alloca (total);
1100             iov[2 + elem + offset].iov_len = total;
1101
1102             *(char *) mempcpy (iov[2 + elem + offset].iov_base,
1103                                ctype->mboutdigits[cnt]->bytes,
1104                                ctype->mboutdigits[cnt]->nbytes) = '\0';
1105             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1106             break;
1107
1108           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC):
1109             total = ctype->wcdigits_act / 10;
1110
1111             iov[2 + elem + offset].iov_base =
1112               (uint32_t *) alloca (total * sizeof (uint32_t));
1113             iov[2 + elem + offset].iov_len = total * sizeof (uint32_t);
1114
1115             for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC);
1116                  cnt < ctype->wcdigits_act; cnt += 10)
1117               ((uint32_t *) iov[2 + elem + offset].iov_base)[cnt / 10]
1118                 = ctype->wcdigits[cnt];
1119             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1120             break;
1121
1122           case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC):
1123             /* Align entries.  */
1124             iov[2 + elem + offset].iov_base = (void *) nulbytes;
1125             iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1126             idx[elem] += iov[2 + elem + offset].iov_len;
1127             ++offset;
1128             /* FALLTRHOUGH */
1129
1130           case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT1_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC):
1131             cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC);
1132             iov[2 + elem + offset].iov_base = &ctype->wcoutdigits[cnt];
1133             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1134             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1135             break;
1136
1137           case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN):
1138             /* Align entries.  */
1139             iov[2 + elem + offset].iov_base = (void *) nulbytes;
1140             iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1141             idx[elem] += iov[2 + elem + offset].iov_len;
1142             ++offset;
1143
1144             default_missing_len = (ctype->default_missing
1145                                    ? wcslen ((wchar_t *)ctype->default_missing)
1146                                    : 0);
1147             iov[2 + elem + offset].iov_base = &default_missing_len;
1148             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1149             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1150             break;
1151
1152           case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING):
1153             iov[2 + elem + offset].iov_base =
1154               ctype->default_missing ?: (uint32_t *) L"";
1155             iov[2 + elem + offset].iov_len =
1156               wcslen (iov[2 + elem + offset].iov_base) * sizeof (uint32_t);
1157             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1158             break;
1159
1160           case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE_LEN):
1161             /* Align entries.  */
1162             iov[2 + elem + offset].iov_base = (void *) nulbytes;
1163             iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1164             idx[elem] += iov[2 + elem + offset].iov_len;
1165             ++offset;
1166
1167             iov[2 + elem + offset].iov_base = &ctype->ntranslit_ignore;
1168             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1169             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1170             break;
1171
1172           case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE):
1173             {
1174               uint32_t *ranges = (uint32_t *) alloca (ctype->ntranslit_ignore
1175                                                       * 3 * sizeof (uint32_t));
1176               struct translit_ignore_t *runp;
1177
1178               iov[2 + elem + offset].iov_base = ranges;
1179               iov[2 + elem + offset].iov_len = (ctype->ntranslit_ignore
1180                                                 * 3 * sizeof (uint32_t));
1181
1182               for (runp = ctype->translit_ignore; runp != NULL;
1183                    runp = runp->next)
1184                 {
1185                   *ranges++ = runp->from;
1186                   *ranges++ = runp->to;
1187                   *ranges++ = runp->step;
1188                 }
1189             }
1190             /* Remove the following line in case a new entry is added
1191                after _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN.  */
1192             if (elem < nelems)
1193               idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1194             break;
1195
1196           default:
1197             assert (! "unknown CTYPE element");
1198           }
1199       else
1200         {
1201           /* Handle extra maps.  */
1202           size_t nr = elem - _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
1203           if (nr < ctype->nr_charclass)
1204             {
1205               iov[2 + elem + offset].iov_base = ctype->class_b[nr];
1206               iov[2 + elem + offset].iov_len = 256 / 32 * sizeof (uint32_t);
1207               idx[elem] += iov[2 + elem + offset].iov_len;
1208               ++offset;
1209
1210               iov[2 + elem + offset] = ctype->class_3level[nr];
1211             }
1212           else
1213             {
1214               nr -= ctype->nr_charclass;
1215               assert (nr < ctype->map_collection_nr);
1216               iov[2 + elem + offset] = ctype->map_3level[nr];
1217             }
1218           idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1219         }
1220     }
1221
1222   assert (2 + elem + offset == (nelems + 2 * ctype->nr_charclass
1223                                 + ctype->map_collection_nr + 4 + 2));
1224
1225   write_locale_data (output_path, LC_CTYPE, "LC_CTYPE", 2 + elem + offset,
1226                      iov);
1227 }
1228
1229
1230 /* Local functions.  */
1231 static void
1232 ctype_class_new (struct linereader *lr, struct locale_ctype_t *ctype,
1233                  const char *name)
1234 {
1235   size_t cnt;
1236
1237   for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
1238     if (strcmp (ctype->classnames[cnt], name) == 0)
1239       break;
1240
1241   if (cnt < ctype->nr_charclass)
1242     {
1243       lr_error (lr, _("character class `%s' already defined"), name);
1244       return;
1245     }
1246
1247   if (ctype->nr_charclass == MAX_NR_CHARCLASS)
1248     /* Exit code 2 is prescribed in P1003.2b.  */
1249     WITH_CUR_LOCALE (error (2, 0, _("\
1250 implementation limit: no more than %Zd character classes allowed"),
1251                             MAX_NR_CHARCLASS));
1252
1253   ctype->classnames[ctype->nr_charclass++] = name;
1254 }
1255
1256
1257 static void
1258 ctype_map_new (struct linereader *lr, struct locale_ctype_t *ctype,
1259                const char *name, const struct charmap_t *charmap)
1260 {
1261   size_t max_chars = 0;
1262   size_t cnt;
1263
1264   for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
1265     {
1266       if (strcmp (ctype->mapnames[cnt], name) == 0)
1267         break;
1268
1269       if (max_chars < ctype->map_collection_max[cnt])
1270         max_chars = ctype->map_collection_max[cnt];
1271     }
1272
1273   if (cnt < ctype->map_collection_nr)
1274     {
1275       lr_error (lr, _("character map `%s' already defined"), name);
1276       return;
1277     }
1278
1279   if (ctype->map_collection_nr == MAX_NR_CHARMAP)
1280     /* Exit code 2 is prescribed in P1003.2b.  */
1281     WITH_CUR_LOCALE (error (2, 0, _("\
1282 implementation limit: no more than %d character maps allowed"),
1283                             MAX_NR_CHARMAP));
1284
1285   ctype->mapnames[cnt] = name;
1286
1287   if (max_chars == 0)
1288     ctype->map_collection_max[cnt] = charmap->mb_cur_max == 1 ? 256 : 512;
1289   else
1290     ctype->map_collection_max[cnt] = max_chars;
1291
1292   ctype->map_collection[cnt] = (uint32_t *)
1293     xcalloc (sizeof (uint32_t), ctype->map_collection_max[cnt]);
1294   ctype->map_collection_act[cnt] = 256;
1295
1296   ++ctype->map_collection_nr;
1297 }
1298
1299
1300 /* We have to be prepared that TABLE, MAX, and ACT can be NULL.  This
1301    is possible if we only want to extend the name array.  */
1302 static uint32_t *
1303 find_idx (struct locale_ctype_t *ctype, uint32_t **table, size_t *max,
1304           size_t *act, uint32_t idx)
1305 {
1306   size_t cnt;
1307
1308   if (idx < 256)
1309     return table == NULL ? NULL : &(*table)[idx];
1310
1311   /* Use the charnames_idx lookup table instead of the slow search loop.  */
1312 #if 1
1313   cnt = idx_table_get (&ctype->charnames_idx, idx);
1314   if (cnt == EMPTY)
1315     /* Not found.  */
1316     cnt = ctype->charnames_act;
1317 #else
1318   for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
1319     if (ctype->charnames[cnt] == idx)
1320       break;
1321 #endif
1322
1323   /* We have to distinguish two cases: the name is found or not.  */
1324   if (cnt == ctype->charnames_act)
1325     {
1326       /* Extend the name array.  */
1327       if (ctype->charnames_act == ctype->charnames_max)
1328         {
1329           ctype->charnames_max *= 2;
1330           ctype->charnames = (uint32_t *)
1331             xrealloc (ctype->charnames,
1332                       sizeof (uint32_t) * ctype->charnames_max);
1333         }
1334       ctype->charnames[ctype->charnames_act++] = idx;
1335       idx_table_add (&ctype->charnames_idx, idx, cnt);
1336     }
1337
1338   if (table == NULL)
1339     /* We have done everything we are asked to do.  */
1340     return NULL;
1341
1342   if (max == NULL)
1343     /* The caller does not want to extend the table.  */
1344     return (cnt >= *act ? NULL : &(*table)[cnt]);
1345
1346   if (cnt >= *act)
1347     {
1348       if (cnt >= *max)
1349         {
1350           size_t old_max = *max;
1351           do
1352             *max *= 2;
1353           while (*max <= cnt);
1354
1355           *table =
1356             (uint32_t *) xrealloc (*table, *max * sizeof (uint32_t));
1357           memset (&(*table)[old_max], '\0',
1358                   (*max - old_max) * sizeof (uint32_t));
1359         }
1360
1361       *act = cnt + 1;
1362     }
1363
1364   return &(*table)[cnt];
1365 }
1366
1367
1368 static int
1369 get_character (struct token *now, const struct charmap_t *charmap,
1370                struct repertoire_t *repertoire,
1371                struct charseq **seqp, uint32_t *wchp)
1372 {
1373   if (now->tok == tok_bsymbol)
1374     {
1375       /* This will hopefully be the normal case.  */
1376       *wchp = repertoire_find_value (repertoire, now->val.str.startmb,
1377                                      now->val.str.lenmb);
1378       *seqp = charmap_find_value (charmap, now->val.str.startmb,
1379                                   now->val.str.lenmb);
1380     }
1381   else if (now->tok == tok_ucs4)
1382     {
1383       char utmp[10];
1384
1385       snprintf (utmp, sizeof (utmp), "U%08X", now->val.ucs4);
1386       *seqp = charmap_find_value (charmap, utmp, 9);
1387
1388       if (*seqp == NULL)
1389         *seqp = repertoire_find_seq (repertoire, now->val.ucs4);
1390
1391       if (*seqp == NULL)
1392         {
1393           /* Compute the value in the charmap from the UCS value.  */
1394           const char *symbol = repertoire_find_symbol (repertoire,
1395                                                        now->val.ucs4);
1396
1397           if (symbol == NULL)
1398             *seqp = NULL;
1399           else
1400             *seqp = charmap_find_value (charmap, symbol, strlen (symbol));
1401
1402           if (*seqp == NULL)
1403             {
1404               if (repertoire != NULL)
1405                 {
1406                   /* Insert a negative entry.  */
1407                   static const struct charseq negative
1408                     = { .ucs4 = ILLEGAL_CHAR_VALUE };
1409                   uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1410                                                   sizeof (uint32_t));
1411                   *newp = now->val.ucs4;
1412
1413                   insert_entry (&repertoire->seq_table, newp,
1414                                 sizeof (uint32_t), (void *) &negative);
1415                 }
1416             }
1417           else
1418             (*seqp)->ucs4 = now->val.ucs4;
1419         }
1420       else if ((*seqp)->ucs4 != now->val.ucs4)
1421         *seqp = NULL;
1422
1423       *wchp = now->val.ucs4;
1424     }
1425   else if (now->tok == tok_charcode)
1426     {
1427       /* We must map from the byte code to UCS4.  */
1428       *seqp = charmap_find_symbol (charmap, now->val.str.startmb,
1429                                    now->val.str.lenmb);
1430
1431       if (*seqp == NULL)
1432         *wchp = ILLEGAL_CHAR_VALUE;
1433       else
1434         {
1435           if ((*seqp)->ucs4 == UNINITIALIZED_CHAR_VALUE)
1436             (*seqp)->ucs4 = repertoire_find_value (repertoire, (*seqp)->name,
1437                                                    strlen ((*seqp)->name));
1438           *wchp = (*seqp)->ucs4;
1439         }
1440     }
1441   else
1442     return 1;
1443
1444   return 0;
1445 }
1446
1447
1448 /* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and
1449    the .(2). counterparts.  */
1450 static void
1451 charclass_symbolic_ellipsis (struct linereader *ldfile,
1452                              struct locale_ctype_t *ctype,
1453                              const struct charmap_t *charmap,
1454                              struct repertoire_t *repertoire,
1455                              struct token *now,
1456                              const char *last_str,
1457                              unsigned long int class256_bit,
1458                              unsigned long int class_bit, int base,
1459                              int ignore_content, int handle_digits, int step)
1460 {
1461   const char *nowstr = now->val.str.startmb;
1462   char tmp[now->val.str.lenmb + 1];
1463   const char *cp;
1464   char *endp;
1465   unsigned long int from;
1466   unsigned long int to;
1467
1468   /* We have to compute the ellipsis values using the symbolic names.  */
1469   assert (last_str != NULL);
1470
1471   if (strlen (last_str) != now->val.str.lenmb)
1472     {
1473     invalid_range:
1474       lr_error (ldfile,
1475                 _("`%s' and `%.*s' are not valid names for symbolic range"),
1476                 last_str, (int) now->val.str.lenmb, nowstr);
1477       return;
1478     }
1479
1480   if (memcmp (last_str, nowstr, now->val.str.lenmb) == 0)
1481     /* Nothing to do, the names are the same.  */
1482     return;
1483
1484   for (cp = last_str; *cp == *(nowstr + (cp - last_str)); ++cp)
1485     ;
1486
1487   errno = 0;
1488   from = strtoul (cp, &endp, base);
1489   if ((from == UINT_MAX && errno == ERANGE) || *endp != '\0')
1490     goto invalid_range;
1491
1492   to = strtoul (nowstr + (cp - last_str), &endp, base);
1493   if ((to == UINT_MAX && errno == ERANGE)
1494       || (endp - nowstr) != now->val.str.lenmb || from >= to)
1495     goto invalid_range;
1496
1497   /* OK, we have a range FROM - TO.  Now we can create the symbolic names.  */
1498   if (!ignore_content)
1499     {
1500       now->val.str.startmb = tmp;
1501       while ((from += step) <= to)
1502         {
1503           struct charseq *seq;
1504           uint32_t wch;
1505
1506           sprintf (tmp, (base == 10 ? "%.*s%0*ld" : "%.*s%0*lX"),
1507                    (int) (cp - last_str), last_str,
1508                    (int) (now->val.str.lenmb - (cp - last_str)),
1509                    from);
1510
1511           get_character (now, charmap, repertoire, &seq, &wch);
1512
1513           if (seq != NULL && seq->nbytes == 1)
1514             /* Yep, we can store information about this byte sequence.  */
1515             ctype->class256_collection[seq->bytes[0]] |= class256_bit;
1516
1517           if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1518             /* We have the UCS4 position.  */
1519             *find_idx (ctype, &ctype->class_collection,
1520                        &ctype->class_collection_max,
1521                        &ctype->class_collection_act, wch) |= class_bit;
1522
1523           if (handle_digits == 1)
1524             {
1525               /* We must store the digit values.  */
1526               if (ctype->mbdigits_act == ctype->mbdigits_max)
1527                 {
1528                   ctype->mbdigits_max *= 2;
1529                   ctype->mbdigits = xrealloc (ctype->mbdigits,
1530                                               (ctype->mbdigits_max
1531                                                * sizeof (char *)));
1532                   ctype->wcdigits_max *= 2;
1533                   ctype->wcdigits = xrealloc (ctype->wcdigits,
1534                                               (ctype->wcdigits_max
1535                                                * sizeof (uint32_t)));
1536                 }
1537
1538               ctype->mbdigits[ctype->mbdigits_act++] = seq;
1539               ctype->wcdigits[ctype->wcdigits_act++] = wch;
1540             }
1541           else if (handle_digits == 2)
1542             {
1543               /* We must store the digit values.  */
1544               if (ctype->outdigits_act >= 10)
1545                 {
1546                   lr_error (ldfile, _("\
1547 %s: field `%s' does not contain exactly ten entries"),
1548                             "LC_CTYPE", "outdigit");
1549                   return;
1550                 }
1551
1552               ctype->mboutdigits[ctype->outdigits_act] = seq;
1553               ctype->wcoutdigits[ctype->outdigits_act] = wch;
1554               ++ctype->outdigits_act;
1555             }
1556         }
1557     }
1558 }
1559
1560
1561 /* Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'.  */
1562 static void
1563 charclass_ucs4_ellipsis (struct linereader *ldfile,
1564                          struct locale_ctype_t *ctype,
1565                          const struct charmap_t *charmap,
1566                          struct repertoire_t *repertoire,
1567                          struct token *now, uint32_t last_wch,
1568                          unsigned long int class256_bit,
1569                          unsigned long int class_bit, int ignore_content,
1570                          int handle_digits, int step)
1571 {
1572   if (last_wch > now->val.ucs4)
1573     {
1574       lr_error (ldfile, _("\
1575 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1576                 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, now->val.ucs4,
1577                 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, last_wch);
1578       return;
1579     }
1580
1581   if (!ignore_content)
1582     while ((last_wch += step) <= now->val.ucs4)
1583       {
1584         /* We have to find out whether there is a byte sequence corresponding
1585            to this UCS4 value.  */
1586         struct charseq *seq;
1587         char utmp[10];
1588
1589         snprintf (utmp, sizeof (utmp), "U%08X", last_wch);
1590         seq = charmap_find_value (charmap, utmp, 9);
1591         if (seq == NULL)
1592           {
1593             snprintf (utmp, sizeof (utmp), "U%04X", last_wch);
1594             seq = charmap_find_value (charmap, utmp, 5);
1595           }
1596
1597         if (seq == NULL)
1598           /* Try looking in the repertoire map.  */
1599           seq = repertoire_find_seq (repertoire, last_wch);
1600
1601         /* If this is the first time we look for this sequence create a new
1602            entry.  */
1603         if (seq == NULL)
1604           {
1605             static const struct charseq negative
1606               = { .ucs4 = ILLEGAL_CHAR_VALUE };
1607
1608             /* Find the symbolic name for this UCS4 value.  */
1609             if (repertoire != NULL)
1610               {
1611                 const char *symbol = repertoire_find_symbol (repertoire,
1612                                                              last_wch);
1613                 uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1614                                                 sizeof (uint32_t));
1615                 *newp = last_wch;
1616
1617                 if (symbol != NULL)
1618                   /* We have a name, now search the multibyte value.  */
1619                   seq = charmap_find_value (charmap, symbol, strlen (symbol));
1620
1621                 if (seq == NULL)
1622                   /* We have to create a fake entry.  */
1623                   seq = (struct charseq *) &negative;
1624                 else
1625                   seq->ucs4 = last_wch;
1626
1627                 insert_entry (&repertoire->seq_table, newp, sizeof (uint32_t),
1628                               seq);
1629               }
1630             else
1631               /* We have to create a fake entry.  */
1632               seq = (struct charseq *) &negative;
1633           }
1634
1635         /* We have a name, now search the multibyte value.  */
1636         if (seq->ucs4 == last_wch && seq->nbytes == 1)
1637           /* Yep, we can store information about this byte sequence.  */
1638           ctype->class256_collection[(size_t) seq->bytes[0]]
1639             |= class256_bit;
1640
1641         /* And of course we have the UCS4 position.  */
1642         if (class_bit != 0)
1643           *find_idx (ctype, &ctype->class_collection,
1644                      &ctype->class_collection_max,
1645                      &ctype->class_collection_act, last_wch) |= class_bit;
1646
1647         if (handle_digits == 1)
1648           {
1649             /* We must store the digit values.  */
1650             if (ctype->mbdigits_act == ctype->mbdigits_max)
1651               {
1652                 ctype->mbdigits_max *= 2;
1653                 ctype->mbdigits = xrealloc (ctype->mbdigits,
1654                                             (ctype->mbdigits_max
1655                                              * sizeof (char *)));
1656                 ctype->wcdigits_max *= 2;
1657                 ctype->wcdigits = xrealloc (ctype->wcdigits,
1658                                             (ctype->wcdigits_max
1659                                              * sizeof (uint32_t)));
1660               }
1661
1662             ctype->mbdigits[ctype->mbdigits_act++] = (seq->ucs4 == last_wch
1663                                                       ? seq : NULL);
1664             ctype->wcdigits[ctype->wcdigits_act++] = last_wch;
1665           }
1666         else if (handle_digits == 2)
1667           {
1668             /* We must store the digit values.  */
1669             if (ctype->outdigits_act >= 10)
1670               {
1671                 lr_error (ldfile, _("\
1672 %s: field `%s' does not contain exactly ten entries"),
1673                           "LC_CTYPE", "outdigit");
1674                 return;
1675               }
1676
1677             ctype->mboutdigits[ctype->outdigits_act] = (seq->ucs4 == last_wch
1678                                                         ? seq : NULL);
1679             ctype->wcoutdigits[ctype->outdigits_act] = last_wch;
1680             ++ctype->outdigits_act;
1681           }
1682       }
1683 }
1684
1685
1686 /* Ellipsis as in `/xea/x12.../xea/x34'.  */
1687 static void
1688 charclass_charcode_ellipsis (struct linereader *ldfile,
1689                              struct locale_ctype_t *ctype,
1690                              const struct charmap_t *charmap,
1691                              struct repertoire_t *repertoire,
1692                              struct token *now, char *last_charcode,
1693                              uint32_t last_charcode_len,
1694                              unsigned long int class256_bit,
1695                              unsigned long int class_bit, int ignore_content,
1696                              int handle_digits)
1697 {
1698   /* First check whether the to-value is larger.  */
1699   if (now->val.charcode.nbytes != last_charcode_len)
1700     {
1701       lr_error (ldfile, _("\
1702 start and end character sequence of range must have the same length"));
1703       return;
1704     }
1705
1706   if (memcmp (last_charcode, now->val.charcode.bytes, last_charcode_len) > 0)
1707     {
1708       lr_error (ldfile, _("\
1709 to-value character sequence is smaller than from-value sequence"));
1710       return;
1711     }
1712
1713   if (!ignore_content)
1714     {
1715       do
1716         {
1717           /* Increment the byte sequence value.  */
1718           struct charseq *seq;
1719           uint32_t wch;
1720           int i;
1721
1722           for (i = last_charcode_len - 1; i >= 0; --i)
1723             if (++last_charcode[i] != 0)
1724               break;
1725
1726           if (last_charcode_len == 1)
1727             /* Of course we have the charcode value.  */
1728             ctype->class256_collection[(size_t) last_charcode[0]]
1729               |= class256_bit;
1730
1731           /* Find the symbolic name.  */
1732           seq = charmap_find_symbol (charmap, last_charcode,
1733                                      last_charcode_len);
1734           if (seq != NULL)
1735             {
1736               if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1737                 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1738                                                    strlen (seq->name));
1739               wch = seq == NULL ? ILLEGAL_CHAR_VALUE : seq->ucs4;
1740
1741               if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1742                 *find_idx (ctype, &ctype->class_collection,
1743                            &ctype->class_collection_max,
1744                            &ctype->class_collection_act, wch) |= class_bit;
1745             }
1746           else
1747             wch = ILLEGAL_CHAR_VALUE;
1748
1749           if (handle_digits == 1)
1750             {
1751               /* We must store the digit values.  */
1752               if (ctype->mbdigits_act == ctype->mbdigits_max)
1753                 {
1754                   ctype->mbdigits_max *= 2;
1755                   ctype->mbdigits = xrealloc (ctype->mbdigits,
1756                                               (ctype->mbdigits_max
1757                                                * sizeof (char *)));
1758                   ctype->wcdigits_max *= 2;
1759                   ctype->wcdigits = xrealloc (ctype->wcdigits,
1760                                               (ctype->wcdigits_max
1761                                                * sizeof (uint32_t)));
1762                 }
1763
1764               seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1765               memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1766               seq->nbytes = last_charcode_len;
1767
1768               ctype->mbdigits[ctype->mbdigits_act++] = seq;
1769               ctype->wcdigits[ctype->wcdigits_act++] = wch;
1770             }
1771           else if (handle_digits == 2)
1772             {
1773               struct charseq *seq;
1774               /* We must store the digit values.  */
1775               if (ctype->outdigits_act >= 10)
1776                 {
1777                   lr_error (ldfile, _("\
1778 %s: field `%s' does not contain exactly ten entries"),
1779                             "LC_CTYPE", "outdigit");
1780                   return;
1781                 }
1782
1783               seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1784               memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1785               seq->nbytes = last_charcode_len;
1786
1787               ctype->mboutdigits[ctype->outdigits_act] = seq;
1788               ctype->wcoutdigits[ctype->outdigits_act] = wch;
1789               ++ctype->outdigits_act;
1790             }
1791         }
1792       while (memcmp (last_charcode, now->val.charcode.bytes,
1793                      last_charcode_len) != 0);
1794     }
1795 }
1796
1797
1798 static uint32_t *
1799 find_translit2 (struct locale_ctype_t *ctype, const struct charmap_t *charmap,
1800                 uint32_t wch)
1801 {
1802   struct translit_t *trunp = ctype->translit;
1803   struct translit_ignore_t *tirunp = ctype->translit_ignore;
1804
1805   while (trunp != NULL)
1806     {
1807       /* XXX We simplify things here.  The transliterations we look
1808          for are only allowed to have one character.  */
1809       if (trunp->from[0] == wch && trunp->from[1] == 0)
1810         {
1811           /* Found it.  Now look for a transliteration which can be
1812              represented with the character set.  */
1813           struct translit_to_t *torunp = trunp->to;
1814
1815           while (torunp != NULL)
1816             {
1817               int i;
1818
1819               for (i = 0; torunp->str[i] != 0; ++i)
1820                 {
1821                   char utmp[10];
1822
1823                   snprintf (utmp, sizeof (utmp), "U%08X", torunp->str[i]);
1824                   if (charmap_find_value (charmap, utmp, 9) == NULL)
1825                     /* This character cannot be represented.  */
1826                     break;
1827                 }
1828
1829               if (torunp->str[i] == 0)
1830                 return torunp->str;
1831
1832               torunp = torunp->next;
1833             }
1834
1835           break;
1836         }
1837
1838       trunp = trunp->next;
1839     }
1840
1841   /* Check for ignored chars.  */
1842   while (tirunp != NULL)
1843     {
1844       if (tirunp->from <= wch && tirunp->to >= wch)
1845         {
1846           uint32_t wi;
1847
1848           for (wi = tirunp->from; wi <= wch; wi += tirunp->step)
1849             if (wi == wch)
1850               return (uint32_t []) { 0 };
1851         }
1852     }
1853
1854   /* Nothing found.  */
1855   return NULL;
1856 }
1857
1858
1859 uint32_t *
1860 find_translit (struct localedef_t *locale, const struct charmap_t *charmap,
1861                uint32_t wch)
1862 {
1863   struct locale_ctype_t *ctype;
1864   uint32_t *result = NULL;
1865
1866   assert (locale != NULL);
1867   ctype = locale->categories[LC_CTYPE].ctype;
1868
1869   if (ctype->translit != NULL)
1870     result = find_translit2 (ctype, charmap, wch);
1871
1872   if (result == NULL)
1873     {
1874       struct translit_include_t *irunp = ctype->translit_include;
1875
1876       while (irunp != NULL && result == NULL)
1877         {
1878           result = find_translit (find_locale (CTYPE_LOCALE,
1879                                                irunp->copy_locale,
1880                                                irunp->copy_repertoire,
1881                                                charmap),
1882                                   charmap, wch);
1883           irunp = irunp->next;
1884         }
1885     }
1886
1887   return result;
1888 }
1889
1890
1891 /* Read one transliteration entry.  */
1892 static uint32_t *
1893 read_widestring (struct linereader *ldfile, struct token *now,
1894                  const struct charmap_t *charmap,
1895                  struct repertoire_t *repertoire)
1896 {
1897   uint32_t *wstr;
1898
1899   if (now->tok == tok_default_missing)
1900     /* The special name "" will denote this case.  */
1901     wstr = ((uint32_t *) { 0 });
1902   else if (now->tok == tok_bsymbol)
1903     {
1904       /* Get the value from the repertoire.  */
1905       wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1906       wstr[0] = repertoire_find_value (repertoire, now->val.str.startmb,
1907                                        now->val.str.lenmb);
1908       if (wstr[0] == ILLEGAL_CHAR_VALUE)
1909         {
1910           /* We cannot proceed, we don't know the UCS4 value.  */
1911           free (wstr);
1912           return NULL;
1913         }
1914
1915       wstr[1] = 0;
1916     }
1917   else if (now->tok == tok_ucs4)
1918     {
1919       wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1920       wstr[0] = now->val.ucs4;
1921       wstr[1] = 0;
1922     }
1923   else if (now->tok == tok_charcode)
1924     {
1925       /* Argh, we have to convert to the symbol name first and then to the
1926          UCS4 value.  */
1927       struct charseq *seq = charmap_find_symbol (charmap,
1928                                                  now->val.str.startmb,
1929                                                  now->val.str.lenmb);
1930       if (seq == NULL)
1931         /* Cannot find the UCS4 value.  */
1932         return NULL;
1933
1934       if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1935         seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1936                                            strlen (seq->name));
1937       if (seq->ucs4 == ILLEGAL_CHAR_VALUE)
1938         /* We cannot proceed, we don't know the UCS4 value.  */
1939         return NULL;
1940
1941       wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1942       wstr[0] = seq->ucs4;
1943       wstr[1] = 0;
1944     }
1945   else if (now->tok == tok_string)
1946     {
1947       wstr = now->val.str.startwc;
1948       if (wstr == NULL || wstr[0] == 0)
1949         return NULL;
1950     }
1951   else
1952     {
1953       if (now->tok != tok_eol && now->tok != tok_eof)
1954         lr_ignore_rest (ldfile, 0);
1955       SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
1956       return (uint32_t *) -1l;
1957     }
1958
1959   return wstr;
1960 }
1961
1962
1963 static void
1964 read_translit_entry (struct linereader *ldfile, struct locale_ctype_t *ctype,
1965                      struct token *now, const struct charmap_t *charmap,
1966                      struct repertoire_t *repertoire)
1967 {
1968   uint32_t *from_wstr = read_widestring (ldfile, now, charmap, repertoire);
1969   struct translit_t *result;
1970   struct translit_to_t **top;
1971   struct obstack *ob = &ctype->mempool;
1972   int first;
1973   int ignore;
1974
1975   if (from_wstr == NULL)
1976     /* There is no valid from string.  */
1977     return;
1978
1979   result = (struct translit_t *) obstack_alloc (ob,
1980                                                 sizeof (struct translit_t));
1981   result->from = from_wstr;
1982   result->fname = ldfile->fname;
1983   result->lineno = ldfile->lineno;
1984   result->next = NULL;
1985   result->to = NULL;
1986   top = &result->to;
1987   first = 1;
1988   ignore = 0;
1989
1990   while (1)
1991     {
1992       uint32_t *to_wstr;
1993
1994       /* Next we have one or more transliterations.  They are
1995          separated by semicolons.  */
1996       now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
1997
1998       if (!first && (now->tok == tok_semicolon || now->tok == tok_eol))
1999         {
2000           /* One string read.  */
2001           const uint32_t zero = 0;
2002
2003           if (!ignore)
2004             {
2005               obstack_grow (ob, &zero, 4);
2006               to_wstr = obstack_finish (ob);
2007
2008               *top = obstack_alloc (ob, sizeof (struct translit_to_t));
2009               (*top)->str = to_wstr;
2010               (*top)->next = NULL;
2011             }
2012
2013           if (now->tok == tok_eol)
2014             {
2015               result->next = ctype->translit;
2016               ctype->translit = result;
2017               return;
2018             }
2019
2020           if (!ignore)
2021             top = &(*top)->next;
2022           ignore = 0;
2023         }
2024       else
2025         {
2026           to_wstr = read_widestring (ldfile, now, charmap, repertoire);
2027           if (to_wstr == (uint32_t *) -1l)
2028             {
2029               /* An error occurred.  */
2030               obstack_free (ob, result);
2031               return;
2032             }
2033
2034           if (to_wstr == NULL)
2035             ignore = 1;
2036           else
2037             /* This value is usable.  */
2038             obstack_grow (ob, to_wstr, wcslen ((wchar_t *) to_wstr) * 4);
2039
2040           first = 0;
2041         }
2042     }
2043 }
2044
2045
2046 static void
2047 read_translit_ignore_entry (struct linereader *ldfile,
2048                             struct locale_ctype_t *ctype,
2049                             const struct charmap_t *charmap,
2050                             struct repertoire_t *repertoire)
2051 {
2052   /* We expect a semicolon-separated list of characters we ignore.  We are
2053      only interested in the wide character definitions.  These must be
2054      single characters, possibly defining a range when an ellipsis is used.  */
2055   while (1)
2056     {
2057       struct token *now = lr_token (ldfile, charmap, NULL, repertoire,
2058                                     verbose);
2059       struct translit_ignore_t *newp;
2060       uint32_t from;
2061
2062       if (now->tok == tok_eol || now->tok == tok_eof)
2063         {
2064           lr_error (ldfile,
2065                     _("premature end of `translit_ignore' definition"));
2066           return;
2067         }
2068
2069       if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
2070         {
2071           lr_error (ldfile, _("syntax error"));
2072           lr_ignore_rest (ldfile, 0);
2073           return;
2074         }
2075
2076       if (now->tok == tok_ucs4)
2077         from = now->val.ucs4;
2078       else
2079         /* Try to get the value.  */
2080         from = repertoire_find_value (repertoire, now->val.str.startmb,
2081                                       now->val.str.lenmb);
2082
2083       if (from == ILLEGAL_CHAR_VALUE)
2084         {
2085           lr_error (ldfile, "invalid character name");
2086           newp = NULL;
2087         }
2088       else
2089         {
2090           newp = (struct translit_ignore_t *)
2091             obstack_alloc (&ctype->mempool, sizeof (struct translit_ignore_t));
2092           newp->from = from;
2093           newp->to = from;
2094           newp->step = 1;
2095
2096           newp->next = ctype->translit_ignore;
2097           ctype->translit_ignore = newp;
2098         }
2099
2100       /* Now we expect either a semicolon, an ellipsis, or the end of the
2101          line.  */
2102       now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2103
2104       if (now->tok == tok_ellipsis2 || now->tok == tok_ellipsis2_2)
2105         {
2106           /* XXX Should we bother implementing `....'?  `...' certainly
2107              will not be implemented.  */
2108           uint32_t to;
2109           int step = now->tok == tok_ellipsis2_2 ? 2 : 1;
2110
2111           now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2112
2113           if (now->tok == tok_eol || now->tok == tok_eof)
2114             {
2115               lr_error (ldfile,
2116                         _("premature end of `translit_ignore' definition"));
2117               return;
2118             }
2119
2120           if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
2121             {
2122               lr_error (ldfile, _("syntax error"));
2123               lr_ignore_rest (ldfile, 0);
2124               return;
2125             }
2126
2127           if (now->tok == tok_ucs4)
2128             to = now->val.ucs4;
2129           else
2130             /* Try to get the value.  */
2131             to = repertoire_find_value (repertoire, now->val.str.startmb,
2132                                         now->val.str.lenmb);
2133
2134           if (to == ILLEGAL_CHAR_VALUE)
2135             lr_error (ldfile, "invalid character name");
2136           else
2137             {
2138               /* Make sure the `to'-value is larger.  */
2139               if (to >= from)
2140                 {
2141                   newp->to = to;
2142                   newp->step = step;
2143                 }
2144               else
2145                 lr_error (ldfile, _("\
2146 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
2147                           (to | from) < 65536 ? 4 : 8, to,
2148                           (to | from) < 65536 ? 4 : 8, from);
2149             }
2150
2151           /* And the next token.  */
2152           now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2153         }
2154
2155       if (now->tok == tok_eol || now->tok == tok_eof)
2156         /* We are done.  */
2157         return;
2158
2159       if (now->tok == tok_semicolon)
2160         /* Next round.  */
2161         continue;
2162
2163       /* If we come here something is wrong.  */
2164       lr_error (ldfile, _("syntax error"));
2165       lr_ignore_rest (ldfile, 0);
2166       return;
2167     }
2168 }
2169
2170
2171 /* The parser for the LC_CTYPE section of the locale definition.  */
2172 void
2173 ctype_read (struct linereader *ldfile, struct localedef_t *result,
2174             const struct charmap_t *charmap, const char *repertoire_name,
2175             int ignore_content)
2176 {
2177   struct repertoire_t *repertoire = NULL;
2178   struct locale_ctype_t *ctype;
2179   struct token *now;
2180   enum token_t nowtok;
2181   size_t cnt;
2182   struct charseq *last_seq;
2183   uint32_t last_wch = 0;
2184   enum token_t last_token;
2185   enum token_t ellipsis_token;
2186   int step;
2187   char last_charcode[16];
2188   size_t last_charcode_len = 0;
2189   const char *last_str = NULL;
2190   int mapidx;
2191   struct localedef_t *copy_locale = NULL;
2192
2193   /* Get the repertoire we have to use.  */
2194   if (repertoire_name != NULL)
2195     repertoire = repertoire_read (repertoire_name);
2196
2197   /* The rest of the line containing `LC_CTYPE' must be free.  */
2198   lr_ignore_rest (ldfile, 1);
2199
2200
2201   do
2202     {
2203       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2204       nowtok = now->tok;
2205     }
2206   while (nowtok == tok_eol);
2207
2208   /* If we see `copy' now we are almost done.  */
2209   if (nowtok == tok_copy)
2210     {
2211       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2212       if (now->tok != tok_string)
2213         {
2214           SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2215
2216         skip_category:
2217           do
2218             now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2219           while (now->tok != tok_eof && now->tok != tok_end);
2220
2221           if (now->tok != tok_eof
2222               || (now = lr_token (ldfile, charmap, NULL, NULL, verbose),
2223                   now->tok == tok_eof))
2224             lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
2225           else if (now->tok != tok_lc_ctype)
2226             {
2227               lr_error (ldfile, _("\
2228 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2229               lr_ignore_rest (ldfile, 0);
2230             }
2231           else
2232             lr_ignore_rest (ldfile, 1);
2233
2234           return;
2235         }
2236
2237       if (! ignore_content)
2238         {
2239           /* Get the locale definition.  */
2240           copy_locale = load_locale (LC_CTYPE, now->val.str.startmb,
2241                                      repertoire_name, charmap, NULL);
2242           if ((copy_locale->avail & CTYPE_LOCALE) == 0)
2243             {
2244               /* Not yet loaded.  So do it now.  */
2245               if (locfile_read (copy_locale, charmap) != 0)
2246                 goto skip_category;
2247             }
2248
2249           if (copy_locale->categories[LC_CTYPE].ctype == NULL)
2250             return;
2251         }
2252
2253       lr_ignore_rest (ldfile, 1);
2254
2255       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2256       nowtok = now->tok;
2257     }
2258
2259   /* Prepare the data structures.  */
2260   ctype_startup (ldfile, result, charmap, copy_locale, ignore_content);
2261   ctype = result->categories[LC_CTYPE].ctype;
2262
2263   /* Remember the repertoire we use.  */
2264   if (!ignore_content)
2265     ctype->repertoire = repertoire;
2266
2267   while (1)
2268     {
2269       unsigned long int class_bit = 0;
2270       unsigned long int class256_bit = 0;
2271       int handle_digits = 0;
2272
2273       /* Of course we don't proceed beyond the end of file.  */
2274       if (nowtok == tok_eof)
2275         break;
2276
2277       /* Ingore empty lines.  */
2278       if (nowtok == tok_eol)
2279         {
2280           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2281           nowtok = now->tok;
2282           continue;
2283         }
2284
2285       switch (nowtok)
2286         {
2287         case tok_charclass:
2288           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2289           while (now->tok == tok_ident || now->tok == tok_string)
2290             {
2291               ctype_class_new (ldfile, ctype, now->val.str.startmb);
2292               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2293               if (now->tok != tok_semicolon)
2294                 break;
2295               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2296             }
2297           if (now->tok != tok_eol)
2298             SYNTAX_ERROR (_("\
2299 %s: syntax error in definition of new character class"), "LC_CTYPE");
2300           break;
2301
2302         case tok_charconv:
2303           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2304           while (now->tok == tok_ident || now->tok == tok_string)
2305             {
2306               ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2307               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2308               if (now->tok != tok_semicolon)
2309                 break;
2310               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2311             }
2312           if (now->tok != tok_eol)
2313             SYNTAX_ERROR (_("\
2314 %s: syntax error in definition of new character map"), "LC_CTYPE");
2315           break;
2316
2317         case tok_class:
2318           /* Ignore the rest of the line if we don't need the input of
2319              this line.  */
2320           if (ignore_content)
2321             {
2322               lr_ignore_rest (ldfile, 0);
2323               break;
2324             }
2325
2326           /* We simply forget the `class' keyword and use the following
2327              operand to determine the bit.  */
2328           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2329           if (now->tok == tok_ident || now->tok == tok_string)
2330             {
2331               /* Must can be one of the predefined class names.  */
2332               for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2333                 if (strcmp (ctype->classnames[cnt], now->val.str.startmb) == 0)
2334                   break;
2335               if (cnt >= ctype->nr_charclass)
2336                 {
2337 #ifdef PREDEFINED_CLASSES
2338                   if (now->val.str.lenmb == 8
2339                       && memcmp ("special1", now->val.str.startmb, 8) == 0)
2340                     class_bit = _ISwspecial1;
2341                   else if (now->val.str.lenmb == 8
2342                       && memcmp ("special2", now->val.str.startmb, 8) == 0)
2343                     class_bit = _ISwspecial2;
2344                   else if (now->val.str.lenmb == 8
2345                       && memcmp ("special3", now->val.str.startmb, 8) == 0)
2346                     class_bit = _ISwspecial3;
2347                   else
2348 #endif
2349                     {
2350                       /* OK, it's a new class.  */
2351                       ctype_class_new (ldfile, ctype, now->val.str.startmb);
2352
2353                       class_bit = _ISwbit (ctype->nr_charclass - 1);
2354                     }
2355                 }
2356               else
2357                 {
2358                   class_bit = _ISwbit (cnt);
2359
2360                   free (now->val.str.startmb);
2361                 }
2362             }
2363           else if (now->tok == tok_digit)
2364             goto handle_tok_digit;
2365           else if (now->tok < tok_upper || now->tok > tok_blank)
2366             goto err_label;
2367           else
2368             {
2369               class_bit = BITw (now->tok);
2370               class256_bit = BIT (now->tok);
2371             }
2372
2373           /* The next character must be a semicolon.  */
2374           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2375           if (now->tok != tok_semicolon)
2376             goto err_label;
2377           goto read_charclass;
2378
2379         case tok_upper:
2380         case tok_lower:
2381         case tok_alpha:
2382         case tok_alnum:
2383         case tok_space:
2384         case tok_cntrl:
2385         case tok_punct:
2386         case tok_graph:
2387         case tok_print:
2388         case tok_xdigit:
2389         case tok_blank:
2390           /* Ignore the rest of the line if we don't need the input of
2391              this line.  */
2392           if (ignore_content)
2393             {
2394               lr_ignore_rest (ldfile, 0);
2395               break;
2396             }
2397
2398           class_bit = BITw (now->tok);
2399           class256_bit = BIT (now->tok);
2400           handle_digits = 0;
2401         read_charclass:
2402           ctype->class_done |= class_bit;
2403           last_token = tok_none;
2404           ellipsis_token = tok_none;
2405           step = 1;
2406           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2407           while (now->tok != tok_eol && now->tok != tok_eof)
2408             {
2409               uint32_t wch;
2410               struct charseq *seq;
2411
2412               if (ellipsis_token == tok_none)
2413                 {
2414                   if (get_character (now, charmap, repertoire, &seq, &wch))
2415                     goto err_label;
2416
2417                   if (!ignore_content && seq != NULL && seq->nbytes == 1)
2418                     /* Yep, we can store information about this byte
2419                        sequence.  */
2420                     ctype->class256_collection[seq->bytes[0]] |= class256_bit;
2421
2422                   if (!ignore_content && wch != ILLEGAL_CHAR_VALUE
2423                       && class_bit != 0)
2424                     /* We have the UCS4 position.  */
2425                     *find_idx (ctype, &ctype->class_collection,
2426                                &ctype->class_collection_max,
2427                                &ctype->class_collection_act, wch) |= class_bit;
2428
2429                   last_token = now->tok;
2430                   /* Terminate the string.  */
2431                   if (last_token == tok_bsymbol)
2432                     {
2433                       now->val.str.startmb[now->val.str.lenmb] = '\0';
2434                       last_str = now->val.str.startmb;
2435                     }
2436                   else
2437                     last_str = NULL;
2438                   last_seq = seq;
2439                   last_wch = wch;
2440                   memcpy (last_charcode, now->val.charcode.bytes, 16);
2441                   last_charcode_len = now->val.charcode.nbytes;
2442
2443                   if (!ignore_content && handle_digits == 1)
2444                     {
2445                       /* We must store the digit values.  */
2446                       if (ctype->mbdigits_act == ctype->mbdigits_max)
2447                         {
2448                           ctype->mbdigits_max += 10;
2449                           ctype->mbdigits = xrealloc (ctype->mbdigits,
2450                                                       (ctype->mbdigits_max
2451                                                        * sizeof (char *)));
2452                           ctype->wcdigits_max += 10;
2453                           ctype->wcdigits = xrealloc (ctype->wcdigits,
2454                                                       (ctype->wcdigits_max
2455                                                        * sizeof (uint32_t)));
2456                         }
2457
2458                       ctype->mbdigits[ctype->mbdigits_act++] = seq;
2459                       ctype->wcdigits[ctype->wcdigits_act++] = wch;
2460                     }
2461                   else if (!ignore_content && handle_digits == 2)
2462                     {
2463                       /* We must store the digit values.  */
2464                       if (ctype->outdigits_act >= 10)
2465                         {
2466                           lr_error (ldfile, _("\
2467 %s: field `%s' does not contain exactly ten entries"),
2468                             "LC_CTYPE", "outdigit");
2469                           lr_ignore_rest (ldfile, 0);
2470                           break;
2471                         }
2472
2473                       ctype->mboutdigits[ctype->outdigits_act] = seq;
2474                       ctype->wcoutdigits[ctype->outdigits_act] = wch;
2475                       ++ctype->outdigits_act;
2476                     }
2477                 }
2478               else
2479                 {
2480                   /* Now it gets complicated.  We have to resolve the
2481                      ellipsis problem.  First we must distinguish between
2482                      the different kind of ellipsis and this must match the
2483                      tokens we have seen.  */
2484                   assert (last_token != tok_none);
2485
2486                   if (last_token != now->tok)
2487                     {
2488                       lr_error (ldfile, _("\
2489 ellipsis range must be marked by two operands of same type"));
2490                       lr_ignore_rest (ldfile, 0);
2491                       break;
2492                     }
2493
2494                   if (last_token == tok_bsymbol)
2495                     {
2496                       if (ellipsis_token == tok_ellipsis3)
2497                         lr_error (ldfile, _("with symbolic name range values \
2498 the absolute ellipsis `...' must not be used"));
2499
2500                       charclass_symbolic_ellipsis (ldfile, ctype, charmap,
2501                                                    repertoire, now, last_str,
2502                                                    class256_bit, class_bit,
2503                                                    (ellipsis_token
2504                                                     == tok_ellipsis4
2505                                                     ? 10 : 16),
2506                                                    ignore_content,
2507                                                    handle_digits, step);
2508                     }
2509                   else if (last_token == tok_ucs4)
2510                     {
2511                       if (ellipsis_token != tok_ellipsis2)
2512                         lr_error (ldfile, _("\
2513 with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
2514
2515                       charclass_ucs4_ellipsis (ldfile, ctype, charmap,
2516                                                repertoire, now, last_wch,
2517                                                class256_bit, class_bit,
2518                                                ignore_content, handle_digits,
2519                                                step);
2520                     }
2521                   else
2522                     {
2523                       assert (last_token == tok_charcode);
2524
2525                       if (ellipsis_token != tok_ellipsis3)
2526                         lr_error (ldfile, _("\
2527 with character code range values one must use the absolute ellipsis `...'"));
2528
2529                       charclass_charcode_ellipsis (ldfile, ctype, charmap,
2530                                                    repertoire, now,
2531                                                    last_charcode,
2532                                                    last_charcode_len,
2533                                                    class256_bit, class_bit,
2534                                                    ignore_content,
2535                                                    handle_digits);
2536                     }
2537
2538                   /* Now we have used the last value.  */
2539                   last_token = tok_none;
2540                 }
2541
2542               /* Next we expect a semicolon or the end of the line.  */
2543               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2544               if (now->tok == tok_eol || now->tok == tok_eof)
2545                 break;
2546
2547               if (last_token != tok_none
2548                   && now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4_2)
2549                 {
2550                   if (now->tok == tok_ellipsis2_2)
2551                     {
2552                       now->tok = tok_ellipsis2;
2553                       step = 2;
2554                     }
2555                   else if (now->tok == tok_ellipsis4_2)
2556                     {
2557                       now->tok = tok_ellipsis4;
2558                       step = 2;
2559                     }
2560
2561                   ellipsis_token = now->tok;
2562
2563                   now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2564                   continue;
2565                 }
2566
2567               if (now->tok != tok_semicolon)
2568                 goto err_label;
2569
2570               /* And get the next character.  */
2571               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2572
2573               ellipsis_token = tok_none;
2574               step = 1;
2575             }
2576           break;
2577
2578         case tok_digit:
2579           /* Ignore the rest of the line if we don't need the input of
2580              this line.  */
2581           if (ignore_content)
2582             {
2583               lr_ignore_rest (ldfile, 0);
2584               break;
2585             }
2586
2587         handle_tok_digit:
2588           class_bit = _ISwdigit;
2589           class256_bit = _ISdigit;
2590           handle_digits = 1;
2591           goto read_charclass;
2592
2593         case tok_outdigit:
2594           /* Ignore the rest of the line if we don't need the input of
2595              this line.  */
2596           if (ignore_content)
2597             {
2598               lr_ignore_rest (ldfile, 0);
2599               break;
2600             }
2601
2602           if (ctype->outdigits_act != 0)
2603             lr_error (ldfile, _("\
2604 %s: field `%s' declared more than once"),
2605                       "LC_CTYPE", "outdigit");
2606           class_bit = 0;
2607           class256_bit = 0;
2608           handle_digits = 2;
2609           goto read_charclass;
2610
2611         case tok_toupper:
2612           /* Ignore the rest of the line if we don't need the input of
2613              this line.  */
2614           if (ignore_content)
2615             {
2616               lr_ignore_rest (ldfile, 0);
2617               break;
2618             }
2619
2620           mapidx = 0;
2621           goto read_mapping;
2622
2623         case tok_tolower:
2624           /* Ignore the rest of the line if we don't need the input of
2625              this line.  */
2626           if (ignore_content)
2627             {
2628               lr_ignore_rest (ldfile, 0);
2629               break;
2630             }
2631
2632           mapidx = 1;
2633           goto read_mapping;
2634
2635         case tok_map:
2636           /* Ignore the rest of the line if we don't need the input of
2637              this line.  */
2638           if (ignore_content)
2639             {
2640               lr_ignore_rest (ldfile, 0);
2641               break;
2642             }
2643
2644           /* We simply forget the `map' keyword and use the following
2645              operand to determine the mapping.  */
2646           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2647           if (now->tok == tok_ident || now->tok == tok_string)
2648             {
2649               size_t cnt;
2650
2651               for (cnt = 2; cnt < ctype->map_collection_nr; ++cnt)
2652                 if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2653                   break;
2654
2655               if (cnt < ctype->map_collection_nr)
2656                 free (now->val.str.startmb);
2657               else
2658                 /* OK, it's a new map.  */
2659                 ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2660
2661               mapidx = cnt;
2662             }
2663           else if (now->tok < tok_toupper || now->tok > tok_tolower)
2664             goto err_label;
2665           else
2666             mapidx = now->tok - tok_toupper;
2667
2668           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2669           /* This better should be a semicolon.  */
2670           if (now->tok != tok_semicolon)
2671             goto err_label;
2672
2673         read_mapping:
2674           /* Test whether this mapping was already defined.  */
2675           if (ctype->tomap_done[mapidx])
2676             {
2677               lr_error (ldfile, _("duplicated definition for mapping `%s'"),
2678                         ctype->mapnames[mapidx]);
2679               lr_ignore_rest (ldfile, 0);
2680               break;
2681             }
2682           ctype->tomap_done[mapidx] = 1;
2683
2684           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2685           while (now->tok != tok_eol && now->tok != tok_eof)
2686             {
2687               struct charseq *from_seq;
2688               uint32_t from_wch;
2689               struct charseq *to_seq;
2690               uint32_t to_wch;
2691
2692               /* Every pair starts with an opening brace.  */
2693               if (now->tok != tok_open_brace)
2694                 goto err_label;
2695
2696               /* Next comes the from-value.  */
2697               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2698               if (get_character (now, charmap, repertoire, &from_seq,
2699                                  &from_wch) != 0)
2700                 goto err_label;
2701
2702               /* The next is a comma.  */
2703               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2704               if (now->tok != tok_comma)
2705                 goto err_label;
2706
2707               /* And the other value.  */
2708               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2709               if (get_character (now, charmap, repertoire, &to_seq,
2710                                  &to_wch) != 0)
2711                 goto err_label;
2712
2713               /* And the last thing is the closing brace.  */
2714               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2715               if (now->tok != tok_close_brace)
2716                 goto err_label;
2717
2718               if (!ignore_content)
2719                 {
2720                   /* Check whether the mapping converts from an ASCII value
2721                      to a non-ASCII value.  */
2722                   if (from_seq != NULL && from_seq->nbytes == 1
2723                       && isascii (from_seq->bytes[0])
2724                       && to_seq != NULL && (to_seq->nbytes != 1
2725                                             || !isascii (to_seq->bytes[0])))
2726                     ctype->to_nonascii = 1;
2727
2728                   if (mapidx < 2 && from_seq != NULL && to_seq != NULL
2729                       && from_seq->nbytes == 1 && to_seq->nbytes == 1)
2730                     /* We can use this value.  */
2731                     ctype->map256_collection[mapidx][from_seq->bytes[0]]
2732                       = to_seq->bytes[0];
2733
2734                   if (from_wch != ILLEGAL_CHAR_VALUE
2735                       && to_wch != ILLEGAL_CHAR_VALUE)
2736                     /* Both correct values.  */
2737                     *find_idx (ctype, &ctype->map_collection[mapidx],
2738                                &ctype->map_collection_max[mapidx],
2739                                &ctype->map_collection_act[mapidx],
2740                                from_wch) = to_wch;
2741                 }
2742
2743               /* Now comes a semicolon or the end of the line/file.  */
2744               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2745               if (now->tok == tok_semicolon)
2746                 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2747             }
2748           break;
2749
2750         case tok_translit_start:
2751           /* Ignore the entire translit section with its peculiar syntax
2752              if we don't need the input.  */
2753           if (ignore_content)
2754             {
2755               do
2756                 {
2757                   lr_ignore_rest (ldfile, 0);
2758                   now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2759                 }
2760               while (now->tok != tok_translit_end && now->tok != tok_eof);
2761
2762               if (now->tok == tok_eof)
2763                 lr_error (ldfile, _(\
2764 "%s: `translit_start' section does not end with `translit_end'"),
2765                           "LC_CTYPE");
2766
2767               break;
2768             }
2769
2770           /* The rest of the line better should be empty.  */
2771           lr_ignore_rest (ldfile, 1);
2772
2773           /* We count here the number of allocated entries in the `translit'
2774              array.  */
2775           cnt = 0;
2776
2777           ldfile->translate_strings = 1;
2778           ldfile->return_widestr = 1;
2779
2780           /* We proceed until we see the `translit_end' token.  */
2781           while (now = lr_token (ldfile, charmap, NULL, repertoire, verbose),
2782                  now->tok != tok_translit_end && now->tok != tok_eof)
2783             {
2784               if (now->tok == tok_eol)
2785                 /* Ignore empty lines.  */
2786                 continue;
2787
2788               if (now->tok == tok_include)
2789                 {
2790                   /* We have to include locale.  */
2791                   const char *locale_name;
2792                   const char *repertoire_name;
2793                   struct translit_include_t *include_stmt, **include_ptr;
2794
2795                   now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2796                   /* This should be a string or an identifier.  In any
2797                      case something to name a locale.  */
2798                   if (now->tok != tok_string && now->tok != tok_ident)
2799                     {
2800                     translit_syntax:
2801                       lr_error (ldfile, _("%s: syntax error"), "LC_CTYPE");
2802                       lr_ignore_rest (ldfile, 0);
2803                       continue;
2804                     }
2805                   locale_name = now->val.str.startmb;
2806
2807                   /* Next should be a semicolon.  */
2808                   now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2809                   if (now->tok != tok_semicolon)
2810                     goto translit_syntax;
2811
2812                   /* Now the repertoire name.  */
2813                   now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2814                   if ((now->tok != tok_string && now->tok != tok_ident)
2815                       || now->val.str.startmb == NULL)
2816                     goto translit_syntax;
2817                   repertoire_name = now->val.str.startmb;
2818                   if (repertoire_name[0] == '\0')
2819                     /* Ignore the empty string.  */
2820                     repertoire_name = NULL;
2821
2822                   /* Save the include statement for later processing.  */
2823                   include_stmt = (struct translit_include_t *)
2824                     xmalloc (sizeof (struct translit_include_t));
2825                   include_stmt->copy_locale = locale_name;
2826                   include_stmt->copy_repertoire = repertoire_name;
2827                   include_stmt->next = NULL;
2828
2829                   include_ptr = &ctype->translit_include;
2830                   while (*include_ptr != NULL)
2831                     include_ptr = &(*include_ptr)->next;
2832                   *include_ptr = include_stmt;
2833
2834                   /* The rest of the line must be empty.  */
2835                   lr_ignore_rest (ldfile, 1);
2836
2837                   /* Make sure the locale is read.  */
2838                   add_to_readlist (LC_CTYPE, locale_name, repertoire_name,
2839                                    1, NULL);
2840                   continue;
2841                 }
2842               else if (now->tok == tok_default_missing)
2843                 {
2844                   uint32_t *wstr;
2845
2846                   while (1)
2847                     {
2848                       /* We expect a single character or string as the
2849                          argument.  */
2850                       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2851                       wstr = read_widestring (ldfile, now, charmap,
2852                                               repertoire);
2853
2854                       if (wstr != NULL)
2855                         {
2856                           if (ctype->default_missing != NULL)
2857                             {
2858                               lr_error (ldfile, _("\
2859 %s: duplicate `default_missing' definition"), "LC_CTYPE");
2860                               WITH_CUR_LOCALE (error_at_line (0, 0,
2861                                                               ctype->default_missing_file,
2862                                                               ctype->default_missing_lineno,
2863                                                               _("\
2864 previous definition was here")));
2865                             }
2866                           else
2867                             {
2868                               ctype->default_missing = wstr;
2869                               ctype->default_missing_file = ldfile->fname;
2870                               ctype->default_missing_lineno = ldfile->lineno;
2871                             }
2872                           /* We can have more entries, ignore them.  */
2873                           lr_ignore_rest (ldfile, 0);
2874                           break;
2875                         }
2876                       else if (wstr == (uint32_t *) -1l)
2877                         /* This was an syntax error.  */
2878                         break;
2879
2880                       /* Maybe there is another replacement we can use.  */
2881                       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2882                       if (now->tok == tok_eol || now->tok == tok_eof)
2883                         {
2884                           /* Nothing found.  We tell the user.  */
2885                           lr_error (ldfile, _("\
2886 %s: no representable `default_missing' definition found"), "LC_CTYPE");
2887                           break;
2888                         }
2889                       if (now->tok != tok_semicolon)
2890                         goto translit_syntax;
2891                     }
2892
2893                   continue;
2894                 }
2895               else if (now->tok == tok_translit_ignore)
2896                 {
2897                   read_translit_ignore_entry (ldfile, ctype, charmap,
2898                                               repertoire);
2899                   continue;
2900                 }
2901
2902               read_translit_entry (ldfile, ctype, now, charmap, repertoire);
2903             }
2904           ldfile->return_widestr = 0;
2905
2906           if (now->tok == tok_eof)
2907             lr_error (ldfile, _(\
2908 "%s: `translit_start' section does not end with `translit_end'"),
2909                       "LC_CTYPE");
2910
2911           break;
2912
2913         case tok_ident:
2914           /* Ignore the rest of the line if we don't need the input of
2915              this line.  */
2916           if (ignore_content)
2917             {
2918               lr_ignore_rest (ldfile, 0);
2919               break;
2920             }
2921
2922           /* This could mean one of several things.  First test whether
2923              it's a character class name.  */
2924           for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2925             if (strcmp (now->val.str.startmb, ctype->classnames[cnt]) == 0)
2926               break;
2927           if (cnt < ctype->nr_charclass)
2928             {
2929               class_bit = _ISwbit (cnt);
2930               class256_bit = cnt <= 11 ? _ISbit (cnt) : 0;
2931               free (now->val.str.startmb);
2932               goto read_charclass;
2933             }
2934           for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
2935             if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2936               break;
2937           if (cnt < ctype->map_collection_nr)
2938             {
2939               mapidx = cnt;
2940               free (now->val.str.startmb);
2941               goto read_mapping;
2942             }
2943 #ifdef PREDEFINED_CLASSES
2944           if (strcmp (now->val.str.startmb, "special1") == 0)
2945             {
2946               class_bit = _ISwspecial1;
2947               free (now->val.str.startmb);
2948               goto read_charclass;
2949             }
2950           if (strcmp (now->val.str.startmb, "special2") == 0)
2951             {
2952               class_bit = _ISwspecial2;
2953               free (now->val.str.startmb);
2954               goto read_charclass;
2955             }
2956           if (strcmp (now->val.str.startmb, "special3") == 0)
2957             {
2958               class_bit = _ISwspecial3;
2959               free (now->val.str.startmb);
2960               goto read_charclass;
2961             }
2962           if (strcmp (now->val.str.startmb, "tosymmetric") == 0)
2963             {
2964               mapidx = 2;
2965               goto read_mapping;
2966             }
2967 #endif
2968           break;
2969
2970         case tok_end:
2971           /* Next we assume `LC_CTYPE'.  */
2972           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2973           if (now->tok == tok_eof)
2974             break;
2975           if (now->tok == tok_eol)
2976             lr_error (ldfile, _("%s: incomplete `END' line"),
2977                       "LC_CTYPE");
2978           else if (now->tok != tok_lc_ctype)
2979             lr_error (ldfile, _("\
2980 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2981           lr_ignore_rest (ldfile, now->tok == tok_lc_ctype);
2982           return;
2983
2984         default:
2985         err_label:
2986           if (now->tok != tok_eof)
2987             SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2988         }
2989
2990       /* Prepare for the next round.  */
2991       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2992       nowtok = now->tok;
2993     }
2994
2995   /* When we come here we reached the end of the file.  */
2996   lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
2997 }
2998
2999
3000 static void
3001 set_class_defaults (struct locale_ctype_t *ctype,
3002                     const struct charmap_t *charmap,
3003                     struct repertoire_t *repertoire)
3004 {
3005   size_t cnt;
3006
3007   /* These function defines the default values for the classes and conversions
3008      according to POSIX.2 2.5.2.1.
3009      It may seem that the order of these if-blocks is arbitrary but it is NOT.
3010      Don't move them unless you know what you do!  */
3011
3012   auto void set_default (int bitpos, int from, int to);
3013
3014   void set_default (int bitpos, int from, int to)
3015     {
3016       char tmp[2];
3017       int ch;
3018       int bit = _ISbit (bitpos);
3019       int bitw = _ISwbit (bitpos);
3020       /* Define string.  */
3021       strcpy (tmp, "?");
3022
3023       for (ch = from; ch <= to; ++ch)
3024         {
3025           struct charseq *seq;
3026           tmp[0] = ch;
3027
3028           seq = charmap_find_value (charmap, tmp, 1);
3029           if (seq == NULL)
3030             {
3031               char buf[10];
3032               sprintf (buf, "U%08X", ch);
3033               seq = charmap_find_value (charmap, buf, 9);
3034             }
3035           if (seq == NULL)
3036             {
3037               if (!be_quiet)
3038                 WITH_CUR_LOCALE (error (0, 0, _("\
3039 %s: character `%s' not defined in charmap while needed as default value"),
3040                                         "LC_CTYPE", tmp));
3041             }
3042           else if (seq->nbytes != 1)
3043             WITH_CUR_LOCALE (error (0, 0, _("\
3044 %s: character `%s' in charmap not representable with one byte"),
3045                                     "LC_CTYPE", tmp));
3046           else
3047             ctype->class256_collection[seq->bytes[0]] |= bit;
3048
3049           /* No need to search here, the ASCII value is also the Unicode
3050              value.  */
3051           ELEM (ctype, class_collection, , ch) |= bitw;
3052         }
3053     }
3054
3055   /* Set default values if keyword was not present.  */
3056   if ((ctype->class_done & BITw (tok_upper)) == 0)
3057     /* "If this keyword [lower] is not specified, the lowercase letters
3058         `A' through `Z', ..., shall automatically belong to this class,
3059         with implementation defined character values."  [P1003.2, 2.5.2.1]  */
3060     set_default (BITPOS (tok_upper), 'A', 'Z');
3061
3062   if ((ctype->class_done & BITw (tok_lower)) == 0)
3063     /* "If this keyword [lower] is not specified, the lowercase letters
3064         `a' through `z', ..., shall automatically belong to this class,
3065         with implementation defined character values."  [P1003.2, 2.5.2.1]  */
3066     set_default (BITPOS (tok_lower), 'a', 'z');
3067
3068   if ((ctype->class_done & BITw (tok_alpha)) == 0)
3069     {
3070       /* Table 2-6 in P1003.2 says that characters in class `upper' or
3071          class `lower' *must* be in class `alpha'.  */
3072       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
3073       unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower);
3074
3075       for (cnt = 0; cnt < 256; ++cnt)
3076         if ((ctype->class256_collection[cnt] & mask) != 0)
3077           ctype->class256_collection[cnt] |= BIT (tok_alpha);
3078
3079       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3080         if ((ctype->class_collection[cnt] & maskw) != 0)
3081           ctype->class_collection[cnt] |= BITw (tok_alpha);
3082     }
3083
3084   if ((ctype->class_done & BITw (tok_digit)) == 0)
3085     /* "If this keyword [digit] is not specified, the digits `0' through
3086         `9', ..., shall automatically belong to this class, with
3087         implementation-defined character values."  [P1003.2, 2.5.2.1]  */
3088     set_default (BITPOS (tok_digit), '0', '9');
3089
3090   /* "Only characters specified for the `alpha' and `digit' keyword
3091      shall be specified.  Characters specified for the keyword `alpha'
3092      and `digit' are automatically included in this class.  */
3093   {
3094     unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
3095     unsigned long int maskw = BITw (tok_alpha) | BITw (tok_digit);
3096
3097     for (cnt = 0; cnt < 256; ++cnt)
3098       if ((ctype->class256_collection[cnt] & mask) != 0)
3099         ctype->class256_collection[cnt] |= BIT (tok_alnum);
3100
3101     for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3102       if ((ctype->class_collection[cnt] & maskw) != 0)
3103         ctype->class_collection[cnt] |= BITw (tok_alnum);
3104   }
3105
3106   if ((ctype->class_done & BITw (tok_space)) == 0)
3107     /* "If this keyword [space] is not specified, the characters <space>,
3108         <form-feed>, <newline>, <carriage-return>, <tab>, and
3109         <vertical-tab>, ..., shall automatically belong to this class,
3110         with implementation-defined character values."  [P1003.2, 2.5.2.1]  */
3111     {
3112       struct charseq *seq;
3113
3114       seq = charmap_find_value (charmap, "space", 5);
3115       if (seq == NULL)
3116         seq = charmap_find_value (charmap, "SP", 2);
3117       if (seq == NULL)
3118         seq = charmap_find_value (charmap, "U00000020", 9);
3119       if (seq == NULL)
3120         {
3121           if (!be_quiet)
3122             WITH_CUR_LOCALE (error (0, 0, _("\
3123 %s: character `%s' not defined while needed as default value"),
3124                                     "LC_CTYPE", "<space>"));
3125         }
3126       else if (seq->nbytes != 1)
3127         WITH_CUR_LOCALE (error (0, 0, _("\
3128 %s: character `%s' in charmap not representable with one byte"),
3129                                 "LC_CTYPE", "<space>"));
3130       else
3131         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3132
3133       /* No need to search.  */
3134       ELEM (ctype, class_collection, , L' ') |= BITw (tok_space);
3135
3136       seq = charmap_find_value (charmap, "form-feed", 9);
3137       if (seq == NULL)
3138         seq = charmap_find_value (charmap, "U0000000C", 9);
3139       if (seq == NULL)
3140         {
3141           if (!be_quiet)
3142             WITH_CUR_LOCALE (error (0, 0, _("\
3143 %s: character `%s' not defined while needed as default value"),
3144                                     "LC_CTYPE", "<form-feed>"));
3145         }
3146       else if (seq->nbytes != 1)
3147         WITH_CUR_LOCALE (error (0, 0, _("\
3148 %s: character `%s' in charmap not representable with one byte"),
3149                                 "LC_CTYPE", "<form-feed>"));
3150       else
3151         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3152
3153       /* No need to search.  */
3154       ELEM (ctype, class_collection, , L'\f') |= BITw (tok_space);
3155
3156
3157       seq = charmap_find_value (charmap, "newline", 7);
3158       if (seq == NULL)
3159         seq = charmap_find_value (charmap, "U0000000A", 9);
3160       if (seq == NULL)
3161         {
3162           if (!be_quiet)
3163             WITH_CUR_LOCALE (error (0, 0, _("\
3164 character `%s' not defined while needed as default value"),
3165                                     "<newline>"));
3166         }
3167       else if (seq->nbytes != 1)
3168         WITH_CUR_LOCALE (error (0, 0, _("\
3169 %s: character `%s' in charmap not representable with one byte"),
3170                                 "LC_CTYPE", "<newline>"));
3171       else
3172         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3173
3174       /* No need to search.  */
3175       ELEM (ctype, class_collection, , L'\n') |= BITw (tok_space);
3176
3177
3178       seq = charmap_find_value (charmap, "carriage-return", 15);
3179       if (seq == NULL)
3180         seq = charmap_find_value (charmap, "U0000000D", 9);
3181       if (seq == NULL)
3182         {
3183           if (!be_quiet)
3184             WITH_CUR_LOCALE (error (0, 0, _("\
3185 %s: character `%s' not defined while needed as default value"),
3186                                     "LC_CTYPE", "<carriage-return>"));
3187         }
3188       else if (seq->nbytes != 1)
3189         WITH_CUR_LOCALE (error (0, 0, _("\
3190 %s: character `%s' in charmap not representable with one byte"),
3191                                 "LC_CTYPE", "<carriage-return>"));
3192       else
3193         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3194
3195       /* No need to search.  */
3196       ELEM (ctype, class_collection, , L'\r') |= BITw (tok_space);
3197
3198
3199       seq = charmap_find_value (charmap, "tab", 3);
3200       if (seq == NULL)
3201         seq = charmap_find_value (charmap, "U00000009", 9);
3202       if (seq == NULL)
3203         {
3204           if (!be_quiet)
3205             WITH_CUR_LOCALE (error (0, 0, _("\
3206 %s: character `%s' not defined while needed as default value"),
3207                                     "LC_CTYPE", "<tab>"));
3208         }
3209       else if (seq->nbytes != 1)
3210         WITH_CUR_LOCALE (error (0, 0, _("\
3211 %s: character `%s' in charmap not representable with one byte"),
3212                                 "LC_CTYPE", "<tab>"));
3213       else
3214         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3215
3216       /* No need to search.  */
3217       ELEM (ctype, class_collection, , L'\t') |= BITw (tok_space);
3218
3219
3220       seq = charmap_find_value (charmap, "vertical-tab", 12);
3221       if (seq == NULL)
3222         seq = charmap_find_value (charmap, "U0000000B", 9);
3223       if (seq == NULL)
3224         {
3225           if (!be_quiet)
3226             WITH_CUR_LOCALE (error (0, 0, _("\
3227 %s: character `%s' not defined while needed as default value"),
3228                                     "LC_CTYPE", "<vertical-tab>"));
3229         }
3230       else if (seq->nbytes != 1)
3231         WITH_CUR_LOCALE (error (0, 0, _("\
3232 %s: character `%s' in charmap not representable with one byte"),
3233                                 "LC_CTYPE", "<vertical-tab>"));
3234       else
3235         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3236
3237       /* No need to search.  */
3238       ELEM (ctype, class_collection, , L'\v') |= BITw (tok_space);
3239     }
3240
3241   if ((ctype->class_done & BITw (tok_xdigit)) == 0)
3242     /* "If this keyword is not specified, the digits `0' to `9', the
3243         uppercase letters `A' through `F', and the lowercase letters `a'
3244         through `f', ..., shell automatically belong to this class, with
3245         implementation defined character values."  [P1003.2, 2.5.2.1]  */
3246     {
3247       set_default (BITPOS (tok_xdigit), '0', '9');
3248       set_default (BITPOS (tok_xdigit), 'A', 'F');
3249       set_default (BITPOS (tok_xdigit), 'a', 'f');
3250     }
3251
3252   if ((ctype->class_done & BITw (tok_blank)) == 0)
3253     /* "If this keyword [blank] is unspecified, the characters <space> and
3254        <tab> shall belong to this character class."  [P1003.2, 2.5.2.1]  */
3255    {
3256       struct charseq *seq;
3257
3258       seq = charmap_find_value (charmap, "space", 5);
3259       if (seq == NULL)
3260         seq = charmap_find_value (charmap, "SP", 2);
3261       if (seq == NULL)
3262         seq = charmap_find_value (charmap, "U00000020", 9);
3263       if (seq == NULL)
3264         {
3265           if (!be_quiet)
3266             WITH_CUR_LOCALE (error (0, 0, _("\
3267 %s: character `%s' not defined while needed as default value"),
3268                                     "LC_CTYPE", "<space>"));
3269         }
3270       else if (seq->nbytes != 1)
3271         WITH_CUR_LOCALE (error (0, 0, _("\
3272 %s: character `%s' in charmap not representable with one byte"),
3273                                 "LC_CTYPE", "<space>"));
3274       else
3275         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
3276
3277       /* No need to search.  */
3278       ELEM (ctype, class_collection, , L' ') |= BITw (tok_blank);
3279
3280
3281       seq = charmap_find_value (charmap, "tab", 3);
3282       if (seq == NULL)
3283         seq = charmap_find_value (charmap, "U00000009", 9);
3284       if (seq == NULL)
3285         {
3286           if (!be_quiet)
3287             WITH_CUR_LOCALE (error (0, 0, _("\
3288 %s: character `%s' not defined while needed as default value"),
3289                                     "LC_CTYPE", "<tab>"));
3290         }
3291       else if (seq->nbytes != 1)
3292         WITH_CUR_LOCALE (error (0, 0, _("\
3293 %s: character `%s' in charmap not representable with one byte"),
3294                                 "LC_CTYPE", "<tab>"));
3295       else
3296         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
3297
3298       /* No need to search.  */
3299       ELEM (ctype, class_collection, , L'\t') |= BITw (tok_blank);
3300     }
3301
3302   if ((ctype->class_done & BITw (tok_graph)) == 0)
3303     /* "If this keyword [graph] is not specified, characters specified for
3304         the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
3305         shall belong to this character class."  [P1003.2, 2.5.2.1]  */
3306     {
3307       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
3308         BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
3309       unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
3310         BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
3311         BITw (tok_punct);
3312       size_t cnt;
3313
3314       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3315         if ((ctype->class_collection[cnt] & maskw) != 0)
3316           ctype->class_collection[cnt] |= BITw (tok_graph);
3317
3318       for (cnt = 0; cnt < 256; ++cnt)
3319         if ((ctype->class256_collection[cnt] & mask) != 0)
3320           ctype->class256_collection[cnt] |= BIT (tok_graph);
3321     }
3322
3323   if ((ctype->class_done & BITw (tok_print)) == 0)
3324     /* "If this keyword [print] is not provided, characters specified for
3325         the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
3326         and the <space> character shall belong to this character class."
3327         [P1003.2, 2.5.2.1]  */
3328     {
3329       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
3330         BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
3331       unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
3332         BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
3333         BITw (tok_punct);
3334       size_t cnt;
3335       struct charseq *seq;
3336
3337       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3338         if ((ctype->class_collection[cnt] & maskw) != 0)
3339           ctype->class_collection[cnt] |= BITw (tok_print);
3340
3341       for (cnt = 0; cnt < 256; ++cnt)
3342         if ((ctype->class256_collection[cnt] & mask) != 0)
3343           ctype->class256_collection[cnt] |= BIT (tok_print);
3344
3345
3346       seq = charmap_find_value (charmap, "space", 5);
3347       if (seq == NULL)
3348         seq = charmap_find_value (charmap, "SP", 2);
3349       if (seq == NULL)
3350         seq = charmap_find_value (charmap, "U00000020", 9);
3351       if (seq == NULL)
3352         {
3353           if (!be_quiet)
3354             WITH_CUR_LOCALE (error (0, 0, _("\
3355 %s: character `%s' not defined while needed as default value"),
3356                                     "LC_CTYPE", "<space>"));
3357         }
3358       else if (seq->nbytes != 1)
3359         WITH_CUR_LOCALE (error (0, 0, _("\
3360 %s: character `%s' in charmap not representable with one byte"),
3361                                 "LC_CTYPE", "<space>"));
3362       else
3363         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_print);
3364
3365       /* No need to search.  */
3366       ELEM (ctype, class_collection, , L' ') |= BITw (tok_print);
3367     }
3368
3369   if (ctype->tomap_done[0] == 0)
3370     /* "If this keyword [toupper] is not specified, the lowercase letters
3371         `a' through `z', and their corresponding uppercase letters `A' to
3372         `Z', ..., shall automatically be included, with implementation-
3373         defined character values."  [P1003.2, 2.5.2.1]  */
3374     {
3375       char tmp[4];
3376       int ch;
3377
3378       strcpy (tmp, "<?>");
3379
3380       for (ch = 'a'; ch <= 'z'; ++ch)
3381         {
3382           struct charseq *seq_from, *seq_to;
3383
3384           tmp[1] = (char) ch;
3385
3386           seq_from = charmap_find_value (charmap, &tmp[1], 1);
3387           if (seq_from == NULL)
3388             {
3389               char buf[10];
3390               sprintf (buf, "U%08X", ch);
3391               seq_from = charmap_find_value (charmap, buf, 9);
3392             }
3393           if (seq_from == NULL)
3394             {
3395               if (!be_quiet)
3396                 WITH_CUR_LOCALE (error (0, 0, _("\
3397 %s: character `%s' not defined while needed as default value"),
3398                                         "LC_CTYPE", tmp));
3399             }
3400           else if (seq_from->nbytes != 1)
3401             {
3402               if (!be_quiet)
3403                 WITH_CUR_LOCALE (error (0, 0, _("\
3404 %s: character `%s' needed as default value not representable with one byte"),
3405                                         "LC_CTYPE", tmp));
3406             }
3407           else
3408             {
3409               /* This conversion is implementation defined.  */
3410               tmp[1] = (char) (ch + ('A' - 'a'));
3411               seq_to = charmap_find_value (charmap, &tmp[1], 1);
3412               if (seq_to == NULL)
3413                 {
3414                   char buf[10];
3415                   sprintf (buf, "U%08X", ch + ('A' - 'a'));
3416                   seq_to = charmap_find_value (charmap, buf, 9);
3417                 }
3418               if (seq_to == NULL)
3419                 {
3420                   if (!be_quiet)
3421                     WITH_CUR_LOCALE (error (0, 0, _("\
3422 %s: character `%s' not defined while needed as default value"),
3423                                             "LC_CTYPE", tmp));
3424                 }
3425               else if (seq_to->nbytes != 1)
3426                 {
3427                   if (!be_quiet)
3428                     WITH_CUR_LOCALE (error (0, 0, _("\
3429 %s: character `%s' needed as default value not representable with one byte"),
3430                                             "LC_CTYPE", tmp));
3431                 }
3432               else
3433                 /* The index [0] is determined by the order of the
3434                    `ctype_map_newP' calls in `ctype_startup'.  */
3435                 ctype->map256_collection[0][seq_from->bytes[0]]
3436                   = seq_to->bytes[0];
3437             }
3438
3439           /* No need to search.  */
3440           ELEM (ctype, map_collection, [0], ch) = ch + ('A' - 'a');
3441         }
3442     }
3443
3444   if (ctype->tomap_done[1] == 0)
3445     /* "If this keyword [tolower] is not specified, the mapping shall be
3446        the reverse mapping of the one specified to `toupper'."  [P1003.2]  */
3447     {
3448       for (cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
3449         if (ctype->map_collection[0][cnt] != 0)
3450           ELEM (ctype, map_collection, [1],
3451                 ctype->map_collection[0][cnt])
3452             = ctype->charnames[cnt];
3453
3454       for (cnt = 0; cnt < 256; ++cnt)
3455         if (ctype->map256_collection[0][cnt] != 0)
3456           ctype->map256_collection[1][ctype->map256_collection[0][cnt]] = cnt;
3457     }
3458
3459   if (ctype->outdigits_act != 10)
3460     {
3461       if (ctype->outdigits_act != 0)
3462         WITH_CUR_LOCALE (error (0, 0, _("\
3463 %s: field `%s' does not contain exactly ten entries"),
3464                                 "LC_CTYPE", "outdigit"));
3465
3466       for (cnt = ctype->outdigits_act; cnt < 10; ++cnt)
3467         {
3468           ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3469                                                          digits + cnt, 1);
3470
3471           if (ctype->mboutdigits[cnt] == NULL)
3472             ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3473                                                            longnames[cnt],
3474                                                            strlen (longnames[cnt]));
3475
3476           if (ctype->mboutdigits[cnt] == NULL)
3477             ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3478                                                            uninames[cnt], 9);
3479
3480           if (ctype->mboutdigits[cnt] == NULL)
3481             {
3482               /* Provide a replacement.  */
3483               WITH_CUR_LOCALE (error (0, 0, _("\
3484 no output digits defined and none of the standard names in the charmap")));
3485
3486               ctype->mboutdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
3487                                                        sizeof (struct charseq)
3488                                                        + 1);
3489
3490               /* This is better than nothing.  */
3491               ctype->mboutdigits[cnt]->bytes[0] = digits[cnt];
3492               ctype->mboutdigits[cnt]->nbytes = 1;
3493             }
3494
3495           ctype->wcoutdigits[cnt] = L'0' + cnt;
3496         }
3497
3498       ctype->outdigits_act = 10;
3499     }
3500 }
3501
3502
3503 /* Construction of sparse 3-level tables.
3504    See wchar-lookup.h for their structure and the meaning of p and q.  */
3505
3506 struct wctype_table
3507 {
3508   /* Parameters.  */
3509   unsigned int p;
3510   unsigned int q;
3511   /* Working representation.  */
3512   size_t level1_alloc;
3513   size_t level1_size;
3514   uint32_t *level1;
3515   size_t level2_alloc;
3516   size_t level2_size;
3517   uint32_t *level2;
3518   size_t level3_alloc;
3519   size_t level3_size;
3520   uint32_t *level3;
3521   /* Compressed representation.  */
3522   size_t result_size;
3523   char *result;
3524 };
3525
3526 /* Initialize.  Assumes t->p and t->q have already been set.  */
3527 static inline void
3528 wctype_table_init (struct wctype_table *t)
3529 {
3530   t->level1 = NULL;
3531   t->level1_alloc = t->level1_size = 0;
3532   t->level2 = NULL;
3533   t->level2_alloc = t->level2_size = 0;
3534   t->level3 = NULL;
3535   t->level3_alloc = t->level3_size = 0;
3536 }
3537
3538 /* Retrieve an entry.  */
3539 static inline int
3540 wctype_table_get (struct wctype_table *t, uint32_t wc)
3541 {
3542   uint32_t index1 = wc >> (t->q + t->p + 5);
3543   if (index1 < t->level1_size)
3544     {
3545       uint32_t lookup1 = t->level1[index1];
3546       if (lookup1 != EMPTY)
3547         {
3548           uint32_t index2 = ((wc >> (t->p + 5)) & ((1 << t->q) - 1))
3549                             + (lookup1 << t->q);
3550           uint32_t lookup2 = t->level2[index2];
3551           if (lookup2 != EMPTY)
3552             {
3553               uint32_t index3 = ((wc >> 5) & ((1 << t->p) - 1))
3554                                 + (lookup2 << t->p);
3555               uint32_t lookup3 = t->level3[index3];
3556               uint32_t index4 = wc & 0x1f;
3557
3558               return (lookup3 >> index4) & 1;
3559             }
3560         }
3561     }
3562   return 0;
3563 }
3564
3565 /* Add one entry.  */
3566 static void
3567 wctype_table_add (struct wctype_table *t, uint32_t wc)
3568 {
3569   uint32_t index1 = wc >> (t->q + t->p + 5);
3570   uint32_t index2 = (wc >> (t->p + 5)) & ((1 << t->q) - 1);
3571   uint32_t index3 = (wc >> 5) & ((1 << t->p) - 1);
3572   uint32_t index4 = wc & 0x1f;
3573   size_t i, i1, i2;
3574
3575   if (index1 >= t->level1_size)
3576     {
3577       if (index1 >= t->level1_alloc)
3578         {
3579           size_t alloc = 2 * t->level1_alloc;
3580           if (alloc <= index1)
3581             alloc = index1 + 1;
3582           t->level1 = (uint32_t *) xrealloc ((char *) t->level1,
3583                                              alloc * sizeof (uint32_t));
3584           t->level1_alloc = alloc;
3585         }
3586       while (index1 >= t->level1_size)
3587         t->level1[t->level1_size++] = EMPTY;
3588     }
3589
3590   if (t->level1[index1] == EMPTY)
3591     {
3592       if (t->level2_size == t->level2_alloc)
3593         {
3594           size_t alloc = 2 * t->level2_alloc + 1;
3595           t->level2 = (uint32_t *) xrealloc ((char *) t->level2,
3596                                              (alloc << t->q) * sizeof (uint32_t));
3597           t->level2_alloc = alloc;
3598         }
3599       i1 = t->level2_size << t->q;
3600       i2 = (t->level2_size + 1) << t->q;
3601       for (i = i1; i < i2; i++)
3602         t->level2[i] = EMPTY;
3603       t->level1[index1] = t->level2_size++;
3604     }
3605
3606   index2 += t->level1[index1] << t->q;
3607
3608   if (t->level2[index2] == EMPTY)
3609     {
3610       if (t->level3_size == t->level3_alloc)
3611         {
3612           size_t alloc = 2 * t->level3_alloc + 1;
3613           t->level3 = (uint32_t *) xrealloc ((char *) t->level3,
3614                                              (alloc << t->p) * sizeof (uint32_t));
3615           t->level3_alloc = alloc;
3616         }
3617       i1 = t->level3_size << t->p;
3618       i2 = (t->level3_size + 1) << t->p;
3619       for (i = i1; i < i2; i++)
3620         t->level3[i] = 0;
3621       t->level2[index2] = t->level3_size++;
3622     }
3623
3624   index3 += t->level2[index2] << t->p;
3625
3626   t->level3[index3] |= (uint32_t)1 << index4;
3627 }
3628
3629 /* Finalize and shrink.  */
3630 static void
3631 wctype_table_finalize (struct wctype_table *t)
3632 {
3633   size_t i, j, k;
3634   uint32_t reorder3[t->level3_size];
3635   uint32_t reorder2[t->level2_size];
3636   uint32_t level1_offset, level2_offset, level3_offset;
3637
3638   /* Uniquify level3 blocks.  */
3639   k = 0;
3640   for (j = 0; j < t->level3_size; j++)
3641     {
3642       for (i = 0; i < k; i++)
3643         if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p],
3644                     (1 << t->p) * sizeof (uint32_t)) == 0)
3645           break;
3646       /* Relocate block j to block i.  */
3647       reorder3[j] = i;
3648       if (i == k)
3649         {
3650           if (i != j)
3651             memcpy (&t->level3[i << t->p], &t->level3[j << t->p],
3652                     (1 << t->p) * sizeof (uint32_t));
3653           k++;
3654         }
3655     }
3656   t->level3_size = k;
3657
3658   for (i = 0; i < (t->level2_size << t->q); i++)
3659     if (t->level2[i] != EMPTY)
3660       t->level2[i] = reorder3[t->level2[i]];
3661
3662   /* Uniquify level2 blocks.  */
3663   k = 0;
3664   for (j = 0; j < t->level2_size; j++)
3665     {
3666       for (i = 0; i < k; i++)
3667         if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q],
3668                     (1 << t->q) * sizeof (uint32_t)) == 0)
3669           break;
3670       /* Relocate block j to block i.  */
3671       reorder2[j] = i;
3672       if (i == k)
3673         {
3674           if (i != j)
3675             memcpy (&t->level2[i << t->q], &t->level2[j << t->q],
3676                     (1 << t->q) * sizeof (uint32_t));
3677           k++;
3678         }
3679     }
3680   t->level2_size = k;
3681
3682   for (i = 0; i < t->level1_size; i++)
3683     if (t->level1[i] != EMPTY)
3684       t->level1[i] = reorder2[t->level1[i]];
3685
3686   /* Create and fill the resulting compressed representation.  */
3687   t->result_size =
3688     5 * sizeof (uint32_t)
3689     + t->level1_size * sizeof (uint32_t)
3690     + (t->level2_size << t->q) * sizeof (uint32_t)
3691     + (t->level3_size << t->p) * sizeof (uint32_t);
3692   t->result = (char *) xmalloc (t->result_size);
3693
3694   level1_offset =
3695     5 * sizeof (uint32_t);
3696   level2_offset =
3697     5 * sizeof (uint32_t)
3698     + t->level1_size * sizeof (uint32_t);
3699   level3_offset =
3700     5 * sizeof (uint32_t)
3701     + t->level1_size * sizeof (uint32_t)
3702     + (t->level2_size << t->q) * sizeof (uint32_t);
3703
3704   ((uint32_t *) t->result)[0] = t->q + t->p + 5;
3705   ((uint32_t *) t->result)[1] = t->level1_size;
3706   ((uint32_t *) t->result)[2] = t->p + 5;
3707   ((uint32_t *) t->result)[3] = (1 << t->q) - 1;
3708   ((uint32_t *) t->result)[4] = (1 << t->p) - 1;
3709
3710   for (i = 0; i < t->level1_size; i++)
3711     ((uint32_t *) (t->result + level1_offset))[i] =
3712       (t->level1[i] == EMPTY
3713        ? 0
3714        : (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset);
3715
3716   for (i = 0; i < (t->level2_size << t->q); i++)
3717     ((uint32_t *) (t->result + level2_offset))[i] =
3718       (t->level2[i] == EMPTY
3719        ? 0
3720        : (t->level2[i] << t->p) * sizeof (uint32_t) + level3_offset);
3721
3722   for (i = 0; i < (t->level3_size << t->p); i++)
3723     ((uint32_t *) (t->result + level3_offset))[i] = t->level3[i];
3724
3725   if (t->level1_alloc > 0)
3726     free (t->level1);
3727   if (t->level2_alloc > 0)
3728     free (t->level2);
3729   if (t->level3_alloc > 0)
3730     free (t->level3);
3731 }
3732
3733 #define TABLE wcwidth_table
3734 #define ELEMENT uint8_t
3735 #define DEFAULT 0xff
3736 #include "3level.h"
3737
3738 #define TABLE wctrans_table
3739 #define ELEMENT int32_t
3740 #define DEFAULT 0
3741 #define wctrans_table_add wctrans_table_add_internal
3742 #include "3level.h"
3743 #undef wctrans_table_add
3744 /* The wctrans_table must actually store the difference between the
3745    desired result and the argument.  */
3746 static inline void
3747 wctrans_table_add (struct wctrans_table *t, uint32_t wc, uint32_t mapped_wc)
3748 {
3749   wctrans_table_add_internal (t, wc, mapped_wc - wc);
3750 }
3751
3752
3753 /* Flattens the included transliterations into a translit list.
3754    Inserts them in the list at `cursor', and returns the new cursor.  */
3755 static struct translit_t **
3756 translit_flatten (struct locale_ctype_t *ctype,
3757                   const struct charmap_t *charmap,
3758                   struct translit_t **cursor)
3759 {
3760   while (ctype->translit_include != NULL)
3761     {
3762       const char *copy_locale = ctype->translit_include->copy_locale;
3763       const char *copy_repertoire = ctype->translit_include->copy_repertoire;
3764       struct localedef_t *other;
3765
3766       /* Unchain the include statement.  During the depth-first traversal
3767          we don't want to visit any locale more than once.  */
3768       ctype->translit_include = ctype->translit_include->next;
3769
3770       other = find_locale (LC_CTYPE, copy_locale, copy_repertoire, charmap);
3771
3772       if (other == NULL || other->categories[LC_CTYPE].ctype == NULL)
3773         {
3774           WITH_CUR_LOCALE (error (0, 0, _("\
3775 %s: transliteration data from locale `%s' not available"),
3776                                   "LC_CTYPE", copy_locale));
3777         }
3778       else
3779         {
3780           struct locale_ctype_t *other_ctype =
3781             other->categories[LC_CTYPE].ctype;
3782
3783           cursor = translit_flatten (other_ctype, charmap, cursor);
3784           assert (other_ctype->translit_include == NULL);
3785
3786           if (other_ctype->translit != NULL)
3787             {
3788               /* Insert the other_ctype->translit list at *cursor.  */
3789               struct translit_t *endp = other_ctype->translit;
3790               while (endp->next != NULL)
3791                 endp = endp->next;
3792
3793               endp->next = *cursor;
3794               *cursor = other_ctype->translit;
3795
3796               /* Avoid any risk of circular lists.  */
3797               other_ctype->translit = NULL;
3798
3799               cursor = &endp->next;
3800             }
3801
3802           if (ctype->default_missing == NULL)
3803             ctype->default_missing = other_ctype->default_missing;
3804         }
3805     }
3806
3807   return cursor;
3808 }
3809
3810 static void
3811 allocate_arrays (struct locale_ctype_t *ctype, const struct charmap_t *charmap,
3812                  struct repertoire_t *repertoire)
3813 {
3814   size_t idx, nr;
3815   const void *key;
3816   size_t len;
3817   void *vdata;
3818   void *curs;
3819
3820   /* You wonder about this amount of memory?  This is only because some
3821      users do not manage to address the array with unsigned values or
3822      data types with range >= 256.  '\200' would result in the array
3823      index -128.  To help these poor people we duplicate the entries for
3824      128 up to 255 below the entry for \0.  */
3825   ctype->ctype_b = (char_class_t *) xcalloc (256 + 128, sizeof (char_class_t));
3826   ctype->ctype32_b = (char_class32_t *) xcalloc (256, sizeof (char_class32_t));
3827   ctype->class_b = (uint32_t **)
3828     xmalloc (ctype->nr_charclass * sizeof (uint32_t *));
3829   ctype->class_3level = (struct iovec *)
3830     xmalloc (ctype->nr_charclass * sizeof (struct iovec));
3831
3832   /* This is the array accessed using the multibyte string elements.  */
3833   for (idx = 0; idx < 256; ++idx)
3834     ctype->ctype_b[128 + idx] = ctype->class256_collection[idx];
3835
3836   /* Mirror first 127 entries.  We must take care that entry -1 is not
3837      mirrored because EOF == -1.  */
3838   for (idx = 0; idx < 127; ++idx)
3839     ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
3840
3841   /* The 32 bit array contains all characters < 0x100.  */
3842   for (idx = 0; idx < ctype->class_collection_act; ++idx)
3843     if (ctype->charnames[idx] < 0x100)
3844       ctype->ctype32_b[ctype->charnames[idx]] = ctype->class_collection[idx];
3845
3846   for (nr = 0; nr < ctype->nr_charclass; nr++)
3847     {
3848       ctype->class_b[nr] = (uint32_t *) xcalloc (256 / 32, sizeof (uint32_t));
3849
3850       /* We only set CLASS_B for the bits in the ISO C classes, not
3851          the user defined classes.  The number should not change but
3852          who knows.  */
3853 #define LAST_ISO_C_BIT 11
3854       if (nr <= LAST_ISO_C_BIT)
3855         for (idx = 0; idx < 256; ++idx)
3856           if (ctype->class256_collection[idx] & _ISbit (nr))
3857             ctype->class_b[nr][idx >> 5] |= (uint32_t) 1 << (idx & 0x1f);
3858     }
3859
3860   for (nr = 0; nr < ctype->nr_charclass; nr++)
3861     {
3862       struct wctype_table t;
3863
3864       t.p = 4; /* or: 5 */
3865       t.q = 7; /* or: 6 */
3866       wctype_table_init (&t);
3867
3868       for (idx = 0; idx < ctype->class_collection_act; ++idx)
3869         if (ctype->class_collection[idx] & _ISwbit (nr))
3870           wctype_table_add (&t, ctype->charnames[idx]);
3871
3872       wctype_table_finalize (&t);
3873
3874       if (verbose)
3875         WITH_CUR_LOCALE (fprintf (stderr, _("\
3876 %s: table for class \"%s\": %lu bytes\n"),
3877                                  "LC_CTYPE", ctype->classnames[nr],
3878                                  (unsigned long int) t.result_size));
3879
3880       ctype->class_3level[nr].iov_base = t.result;
3881       ctype->class_3level[nr].iov_len = t.result_size;
3882     }
3883
3884   /* Room for table of mappings.  */
3885   ctype->map_b = (uint32_t **) xmalloc (2 * sizeof (uint32_t *));
3886   ctype->map32_b = (uint32_t **) xmalloc (ctype->map_collection_nr
3887                                           * sizeof (uint32_t *));
3888   ctype->map_3level = (struct iovec *)
3889     xmalloc (ctype->map_collection_nr * sizeof (struct iovec));
3890
3891   /* Fill in all mappings.  */
3892   for (idx = 0; idx < 2; ++idx)
3893     {
3894       unsigned int idx2;
3895
3896       /* Allocate table.  */
3897       ctype->map_b[idx] = (uint32_t *)
3898         xmalloc ((256 + 128) * sizeof (uint32_t));
3899
3900       /* Copy values from collection.  */
3901       for (idx2 = 0; idx2 < 256; ++idx2)
3902         ctype->map_b[idx][128 + idx2] = ctype->map256_collection[idx][idx2];
3903
3904       /* Mirror first 127 entries.  We must take care not to map entry
3905          -1 because EOF == -1.  */
3906       for (idx2 = 0; idx2 < 127; ++idx2)
3907         ctype->map_b[idx][idx2] = ctype->map_b[idx][256 + idx2];
3908
3909       /* EOF must map to EOF.  */
3910       ctype->map_b[idx][127] = EOF;
3911     }
3912
3913   for (idx = 0; idx < ctype->map_collection_nr; ++idx)
3914     {
3915       unsigned int idx2;
3916
3917       /* Allocate table.  */
3918       ctype->map32_b[idx] = (uint32_t *) xmalloc (256 * sizeof (uint32_t));
3919
3920       /* Copy values from collection.  Default is identity mapping.  */
3921       for (idx2 = 0; idx2 < 256; ++idx2)
3922         ctype->map32_b[idx][idx2] =
3923           (ctype->map_collection[idx][idx2] != 0
3924            ? ctype->map_collection[idx][idx2]
3925            : idx2);
3926     }
3927
3928   for (nr = 0; nr < ctype->map_collection_nr; nr++)
3929     {
3930       struct wctrans_table t;
3931
3932       t.p = 7;
3933       t.q = 9;
3934       wctrans_table_init (&t);
3935
3936       for (idx = 0; idx < ctype->map_collection_act[nr]; ++idx)
3937         if (ctype->map_collection[nr][idx] != 0)
3938           wctrans_table_add (&t, ctype->charnames[idx],
3939                              ctype->map_collection[nr][idx]);
3940
3941       wctrans_table_finalize (&t);
3942
3943       if (verbose)
3944         WITH_CUR_LOCALE (fprintf (stderr, _("\
3945 %s: table for map \"%s\": %lu bytes\n"),
3946                                  "LC_CTYPE", ctype->mapnames[nr],
3947                                  (unsigned long int) t.result_size));
3948
3949       ctype->map_3level[nr].iov_base = t.result;
3950       ctype->map_3level[nr].iov_len = t.result_size;
3951     }
3952
3953   /* Extra array for class and map names.  */
3954   ctype->class_name_ptr = (uint32_t *) xmalloc (ctype->nr_charclass
3955                                                 * sizeof (uint32_t));
3956   ctype->map_name_ptr = (uint32_t *) xmalloc (ctype->map_collection_nr
3957                                               * sizeof (uint32_t));
3958
3959   ctype->class_offset = _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
3960   ctype->map_offset = ctype->class_offset + ctype->nr_charclass;
3961
3962   /* Array for width information.  Because the expected widths are very
3963      small (never larger than 2) we use only one single byte.  This
3964      saves space.
3965      We put only printable characters in the table.  wcwidth is specified
3966      to return -1 for non-printable characters.  Doing the check here
3967      saves a run-time check.
3968      But we put L'\0' in the table.  This again saves a run-time check.  */
3969   {
3970     struct wcwidth_table t;
3971
3972     t.p = 7;
3973     t.q = 9;
3974     wcwidth_table_init (&t);
3975
3976     /* First set all the printable characters of the character set to
3977        the default width.  */
3978     curs = NULL;
3979     while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
3980       {
3981         struct charseq *data = (struct charseq *) vdata;
3982
3983         if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
3984           data->ucs4 = repertoire_find_value (ctype->repertoire,
3985                                               data->name, len);
3986
3987         if (data->ucs4 != ILLEGAL_CHAR_VALUE)
3988           {
3989             uint32_t *class_bits =
3990               find_idx (ctype, &ctype->class_collection, NULL,
3991                         &ctype->class_collection_act, data->ucs4);
3992
3993             if (class_bits != NULL && (*class_bits & BITw (tok_print)))
3994               wcwidth_table_add (&t, data->ucs4, charmap->width_default);
3995           }
3996       }
3997
3998     /* Now add the explicitly specified widths.  */
3999     if (charmap->width_rules != NULL)
4000       {
4001         size_t cnt;
4002
4003         for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
4004           {
4005             unsigned char bytes[charmap->mb_cur_max];
4006             int nbytes = charmap->width_rules[cnt].from->nbytes;
4007
4008             /* We have the range of character for which the width is
4009                specified described using byte sequences of the multibyte
4010                charset.  We have to convert this to UCS4 now.  And we
4011                cannot simply convert the beginning and the end of the
4012                sequence, we have to iterate over the byte sequence and
4013                convert it for every single character.  */
4014             memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
4015
4016             while (nbytes < charmap->width_rules[cnt].to->nbytes
4017                    || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
4018                               nbytes) <= 0)
4019               {
4020                 /* Find the UCS value for `bytes'.  */
4021                 int inner;
4022                 uint32_t wch;
4023                 struct charseq *seq =
4024                   charmap_find_symbol (charmap, bytes, nbytes);
4025
4026                 if (seq == NULL)
4027                   wch = ILLEGAL_CHAR_VALUE;
4028                 else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
4029                   wch = seq->ucs4;
4030                 else
4031                   wch = repertoire_find_value (ctype->repertoire, seq->name,
4032                                                strlen (seq->name));
4033
4034                 if (wch != ILLEGAL_CHAR_VALUE)
4035                   {
4036                     /* Store the value.  */
4037                     uint32_t *class_bits =
4038                       find_idx (ctype, &ctype->class_collection, NULL,
4039                                 &ctype->class_collection_act, wch);
4040
4041                     if (class_bits != NULL && (*class_bits & BITw (tok_print)))
4042                       wcwidth_table_add (&t, wch,
4043                                          charmap->width_rules[cnt].width);
4044                   }
4045
4046                 /* "Increment" the bytes sequence.  */
4047                 inner = nbytes - 1;
4048                 while (inner >= 0 && bytes[inner] == 0xff)
4049                   --inner;
4050
4051                 if (inner < 0)
4052                   {
4053                     /* We have to extend the byte sequence.  */
4054                     if (nbytes >= charmap->width_rules[cnt].to->nbytes)
4055                       break;
4056
4057                     bytes[0] = 1;
4058                     memset (&bytes[1], 0, nbytes);
4059                     ++nbytes;
4060                   }
4061                 else
4062                   {
4063                     ++bytes[inner];
4064                     while (++inner < nbytes)
4065                       bytes[inner] = 0;
4066                   }
4067               }
4068           }
4069       }
4070
4071     /* Set the width of L'\0' to 0.  */
4072     wcwidth_table_add (&t, 0, 0);
4073
4074     wcwidth_table_finalize (&t);
4075
4076     if (verbose)
4077       WITH_CUR_LOCALE (fprintf (stderr, _("%s: table for width: %lu bytes\n"),
4078                                "LC_CTYPE", (unsigned long int) t.result_size));
4079
4080     ctype->width.iov_base = t.result;
4081     ctype->width.iov_len = t.result_size;
4082   }
4083
4084   /* Set MB_CUR_MAX.  */
4085   ctype->mb_cur_max = charmap->mb_cur_max;
4086
4087   /* Now determine the table for the transliteration information.
4088
4089      XXX It is not yet clear to me whether it is worth implementing a
4090      complicated algorithm which uses a hash table to locate the entries.
4091      For now I'll use a simple array which can be searching using binary
4092      search.  */
4093   if (ctype->translit_include != NULL)
4094     /* Traverse the locales mentioned in the `include' statements in a
4095        depth-first way and fold in their transliteration information.  */
4096     translit_flatten (ctype, charmap, &ctype->translit);
4097
4098   if (ctype->translit != NULL)
4099     {
4100       /* First count how many entries we have.  This is the upper limit
4101          since some entries from the included files might be overwritten.  */
4102       size_t number = 0;
4103       size_t cnt;
4104       struct translit_t *runp = ctype->translit;
4105       struct translit_t **sorted;
4106       size_t from_len, to_len;
4107
4108       while (runp != NULL)
4109         {
4110           ++number;
4111           runp = runp->next;
4112         }
4113
4114       /* Next we allocate an array large enough and fill in the values.  */
4115       sorted = (struct translit_t **) alloca (number
4116                                               * sizeof (struct translit_t **));
4117       runp = ctype->translit;
4118       number = 0;
4119       do
4120         {
4121           /* Search for the place where to insert this string.
4122              XXX Better use a real sorting algorithm later.  */
4123           size_t idx = 0;
4124           int replace = 0;
4125
4126           while (idx < number)
4127             {
4128               int res = wcscmp ((const wchar_t *) sorted[idx]->from,
4129                                 (const wchar_t *) runp->from);
4130               if (res == 0)
4131                 {
4132                   replace = 1;
4133                   break;
4134                 }
4135               if (res > 0)
4136                 break;
4137               ++idx;
4138             }
4139
4140           if (replace)
4141             sorted[idx] = runp;
4142           else
4143             {
4144               memmove (&sorted[idx + 1], &sorted[idx],
4145                        (number - idx) * sizeof (struct translit_t *));
4146               sorted[idx] = runp;
4147               ++number;
4148             }
4149
4150           runp = runp->next;
4151         }
4152       while (runp != NULL);
4153
4154       /* The next step is putting all the possible transliteration
4155          strings in one memory block so that we can write it out.
4156          We need several different blocks:
4157          - index to the from-string array
4158          - from-string array
4159          - index to the to-string array
4160          - to-string array.
4161       */
4162       from_len = to_len = 0;
4163       for (cnt = 0; cnt < number; ++cnt)
4164         {
4165           struct translit_to_t *srunp;
4166           from_len += wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
4167           srunp = sorted[cnt]->to;
4168           while (srunp != NULL)
4169             {
4170               to_len += wcslen ((const wchar_t *) srunp->str) + 1;
4171               srunp = srunp->next;
4172             }
4173           /* Plus one for the extra NUL character marking the end of
4174              the list for the current entry.  */
4175           ++to_len;
4176         }
4177
4178       /* We can allocate the arrays for the results.  */
4179       ctype->translit_from_idx = xmalloc (number * sizeof (uint32_t));
4180       ctype->translit_from_tbl = xmalloc (from_len * sizeof (uint32_t));
4181       ctype->translit_to_idx = xmalloc (number * sizeof (uint32_t));
4182       ctype->translit_to_tbl = xmalloc (to_len * sizeof (uint32_t));
4183
4184       from_len = 0;
4185       to_len = 0;
4186       for (cnt = 0; cnt < number; ++cnt)
4187         {
4188           size_t len;
4189           struct translit_to_t *srunp;
4190
4191           ctype->translit_from_idx[cnt] = from_len;
4192           ctype->translit_to_idx[cnt] = to_len;
4193
4194           len = wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
4195           wmemcpy ((wchar_t *) &ctype->translit_from_tbl[from_len],
4196                    (const wchar_t *) sorted[cnt]->from, len);
4197           from_len += len;
4198
4199           ctype->translit_to_idx[cnt] = to_len;
4200           srunp = sorted[cnt]->to;
4201           while (srunp != NULL)
4202             {
4203               len = wcslen ((const wchar_t *) srunp->str) + 1;
4204               wmemcpy ((wchar_t *) &ctype->translit_to_tbl[to_len],
4205                        (const wchar_t *) srunp->str, len);
4206               to_len += len;
4207               srunp = srunp->next;
4208             }
4209           ctype->translit_to_tbl[to_len++] = L'\0';
4210         }
4211
4212       /* Store the information about the length.  */
4213       ctype->translit_idx_size = number;
4214       ctype->translit_from_tbl_size = from_len * sizeof (uint32_t);
4215       ctype->translit_to_tbl_size = to_len * sizeof (uint32_t);
4216     }
4217   else
4218     {
4219       /* Provide some dummy pointers since we have nothing to write out.  */
4220       static uint32_t no_str = { 0 };
4221
4222       ctype->translit_from_idx = &no_str;
4223       ctype->translit_from_tbl = &no_str;
4224       ctype->translit_to_tbl = &no_str;
4225       ctype->translit_idx_size = 0;
4226       ctype->translit_from_tbl_size = 0;
4227       ctype->translit_to_tbl_size = 0;
4228     }
4229 }