locale/programs/ld-ctype.c

   1 /* Copyright (C) 1995-2006, 2007, 2009 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3    Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
   4
   5    This program is free software; you can redistribute it and/or modify
   6    it under the terms of the GNU General Public License as published
   7    by the Free Software Foundation; version 2 of the License, or
   8    (at your option) any later version.
   9
  10    This program is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13    GNU General Public License for more details.
  14
  15    You should have received a copy of the GNU General Public License
  16    along with this program; if not, write to the Free Software Foundation,
  17    Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  18
  19 #ifdef HAVE_CONFIG_H
  20 # include <config.h>
  21 #endif
  22
  23 #include <alloca.h>
  24 #include <byteswap.h>
  25 #include <endian.h>
  26 #include <errno.h>
  27 #include <limits.h>
  28 #include <obstack.h>
  29 #include <stdlib.h>
  30 #include <string.h>
  31 #include <wchar.h>
  32 #include <wctype.h>
  33 #include <sys/uio.h>
  34
  35 #include "localedef.h"
  36 #include "charmap.h"
  37 #include "localeinfo.h"
  38 #include "langinfo.h"
  39 #include "linereader.h"
  40 #include "locfile-token.h"
  41 #include "locfile.h"
  42
  43 #include <assert.h>
  44
  45
  46 #ifdef PREDEFINED_CLASSES
  47 /* These are the extra bits not in wctype.h since these are not preallocated
  48    classes.  */
  49 # define _ISwspecial1   (1 << 29)
  50 # define _ISwspecial2   (1 << 30)
  51 # define _ISwspecial3   (1 << 31)
  52 #endif
  53
  54
  55 /* The bit used for representing a special class.  */
  56 #define BITPOS(class) ((class) - tok_upper)
  57 #define BIT(class) (_ISbit (BITPOS (class)))
  58 #define BITw(class) (_ISwbit (BITPOS (class)))
  59
  60 #define ELEM(ctype, collection, idx, value)                                   \
  61   *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx,     \
  62              &ctype->collection##_act idx, value)
  63
  64
  65 /* To be compatible with former implementations we for now restrict
  66    the number of bits for character classes to 16.  When compatibility
  67    is not necessary anymore increase the number to 32.  */
  68 #define char_class_t uint16_t
  69 #define char_class32_t uint32_t
  70
  71
  72 /* Type to describe a transliteration action.  We have a possibly
  73    multiple character from-string and a set of multiple character
  74    to-strings.  All are 32bit values since this is what is used in
  75    the gconv functions.  */
  76 struct translit_to_t
  77 {
  78   uint32_t *str;
  79
  80   struct translit_to_t *next;
  81 };
  82
  83 struct translit_t
  84 {
  85   uint32_t *from;
  86
  87   const char *fname;
  88   size_t lineno;
  89
  90   struct translit_to_t *to;
  91
  92   struct translit_t *next;
  93 };
  94
  95 struct translit_ignore_t
  96 {
  97   uint32_t from;
  98   uint32_t to;
  99   uint32_t step;
 100
 101   const char *fname;
 102   size_t lineno;
 103
 104   struct translit_ignore_t *next;
 105 };
 106
 107
 108 /* Type to describe a transliteration include statement.  */
 109 struct translit_include_t
 110 {
 111   const char *copy_locale;
 112   const char *copy_repertoire;
 113
 114   struct translit_include_t *next;
 115 };
 116
 117
 118 /* Sparse table of uint32_t.  */
 119 #define TABLE idx_table
 120 #define ELEMENT uint32_t
 121 #define DEFAULT ((uint32_t) ~0)
 122 #define NO_FINALIZE
 123 #include "3level.h"
 124
 125
 126 /* The real definition of the struct for the LC_CTYPE locale.  */
 127 struct locale_ctype_t
 128 {
 129   uint32_t *charnames;
 130   size_t charnames_max;
 131   size_t charnames_act;
 132   /* An index lookup table, to speedup find_idx.  */
 133   struct idx_table charnames_idx;
 134
 135   struct repertoire_t *repertoire;
 136
 137   /* We will allow up to 8 * sizeof (uint32_t) character classes.  */
 138 #define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
 139   size_t nr_charclass;
 140   const char *classnames[MAX_NR_CHARCLASS];
 141   uint32_t last_class_char;
 142   uint32_t class256_collection[256];
 143   uint32_t *class_collection;
 144   size_t class_collection_max;
 145   size_t class_collection_act;
 146   uint32_t class_done;
 147   uint32_t class_offset;
 148
 149   struct charseq **mbdigits;
 150   size_t mbdigits_act;
 151   size_t mbdigits_max;
 152   uint32_t *wcdigits;
 153   size_t wcdigits_act;
 154   size_t wcdigits_max;
 155
 156   struct charseq *mboutdigits[10];
 157   uint32_t wcoutdigits[10];
 158   size_t outdigits_act;
 159
 160   /* If the following number ever turns out to be too small simply
 161      increase it.  But I doubt it will.  --drepper@gnu */
 162 #define MAX_NR_CHARMAP 16
 163   const char *mapnames[MAX_NR_CHARMAP];
 164   uint32_t *map_collection[MAX_NR_CHARMAP];
 165   uint32_t map256_collection[2][256];
 166   size_t map_collection_max[MAX_NR_CHARMAP];
 167   size_t map_collection_act[MAX_NR_CHARMAP];
 168   size_t map_collection_nr;
 169   size_t last_map_idx;
 170   int tomap_done[MAX_NR_CHARMAP];
 171   uint32_t map_offset;
 172
 173   /* Transliteration information.  */
 174   struct translit_include_t *translit_include;
 175   struct translit_t *translit;
 176   struct translit_ignore_t *translit_ignore;
 177   uint32_t ntranslit_ignore;
 178
 179   uint32_t *default_missing;
 180   const char *default_missing_file;
 181   size_t default_missing_lineno;
 182
 183   uint32_t to_nonascii;
 184   uint32_t nonascii_case;
 185
 186   /* The arrays for the binary representation.  */
 187   char_class_t *ctype_b;
 188   char_class32_t *ctype32_b;
 189   uint32_t **map_b;
 190   uint32_t **map32_b;
 191   uint32_t **class_b;
 192   struct iovec *class_3level;
 193   struct iovec *map_3level;
 194   uint32_t *class_name_ptr;
 195   uint32_t *map_name_ptr;
 196   struct iovec width;
 197   uint32_t mb_cur_max;
 198   const char *codeset_name;
 199   uint32_t *translit_from_idx;
 200   uint32_t *translit_from_tbl;
 201   uint32_t *translit_to_idx;
 202   uint32_t *translit_to_tbl;
 203   uint32_t translit_idx_size;
 204   size_t translit_from_tbl_size;
 205   size_t translit_to_tbl_size;
 206
 207   struct obstack mempool;
 208 };
 209
 210
 211 /* Marker for an empty slot.  This has the value 0xFFFFFFFF, regardless
 212    whether 'int' is 16 bit, 32 bit, or 64 bit.  */
 213 #define EMPTY ((uint32_t) ~0)
 214
 215
 216 #define obstack_chunk_alloc xmalloc
 217 #define obstack_chunk_free free
 218
 219
 220 /* Prototypes for local functions.  */
 221 static void ctype_startup (struct linereader *lr, struct localedef_t *locale,
 222                            const struct charmap_t *charmap,
 223                            struct localedef_t *copy_locale,
 224                            int ignore_content);
 225 static void ctype_class_new (struct linereader *lr,
 226                              struct locale_ctype_t *ctype, const char *name);
 227 static void ctype_map_new (struct linereader *lr,
 228                            struct locale_ctype_t *ctype,
 229                            const char *name, const struct charmap_t *charmap);
 230 static uint32_t *find_idx (struct locale_ctype_t *ctype, uint32_t **table,
 231                            size_t *max, size_t *act, unsigned int idx);
 232 static void set_class_defaults (struct locale_ctype_t *ctype,
 233                                 const struct charmap_t *charmap,
 234                                 struct repertoire_t *repertoire);
 235 static void allocate_arrays (struct locale_ctype_t *ctype,
 236                              const struct charmap_t *charmap,
 237                              struct repertoire_t *repertoire);
 238
 239
 240 static const char *longnames[] =
 241 {
 242   "zero", "one", "two", "three", "four",
 243   "five", "six", "seven", "eight", "nine"
 244 };
 245 static const char *uninames[] =
 246 {
 247   "U00000030", "U00000031", "U00000032", "U00000033", "U00000034",
 248   "U00000035", "U00000036", "U00000037", "U00000038", "U00000039"
 249 };
 250 static const unsigned char digits[] = "0123456789";
 251
 252
 253 static void
 254 ctype_startup (struct linereader *lr, struct localedef_t *locale,
 255                const struct charmap_t *charmap,
 256                struct localedef_t *copy_locale, int ignore_content)
 257 {
 258   unsigned int cnt;
 259   struct locale_ctype_t *ctype;
 260
 261   if (!ignore_content && locale->categories[LC_CTYPE].ctype == NULL)
 262     {
 263       if (copy_locale == NULL)
 264         {
 265           /* Allocate the needed room.  */
 266           locale->categories[LC_CTYPE].ctype = ctype =
 267             (struct locale_ctype_t *) xcalloc (1,
 268                                                sizeof (struct locale_ctype_t));
 269
 270           /* We have seen no names yet.  */
 271           ctype->charnames_max = charmap->mb_cur_max == 1 ? 256 : 512;
 272           ctype->charnames =
 273             (unsigned int *) xmalloc (ctype->charnames_max
 274                                       * sizeof (unsigned int));
 275           for (cnt = 0; cnt < 256; ++cnt)
 276             ctype->charnames[cnt] = cnt;
 277           ctype->charnames_act = 256;
 278           idx_table_init (&ctype->charnames_idx);
 279
 280           /* Fill character class information.  */
 281           ctype->last_class_char = ILLEGAL_CHAR_VALUE;
 282           /* The order of the following instructions determines the bit
 283              positions!  */
 284           ctype_class_new (lr, ctype, "upper");
 285           ctype_class_new (lr, ctype, "lower");
 286           ctype_class_new (lr, ctype, "alpha");
 287           ctype_class_new (lr, ctype, "digit");
 288           ctype_class_new (lr, ctype, "xdigit");
 289           ctype_class_new (lr, ctype, "space");
 290           ctype_class_new (lr, ctype, "print");
 291           ctype_class_new (lr, ctype, "graph");
 292           ctype_class_new (lr, ctype, "blank");
 293           ctype_class_new (lr, ctype, "cntrl");
 294           ctype_class_new (lr, ctype, "punct");
 295           ctype_class_new (lr, ctype, "alnum");
 296 #ifdef PREDEFINED_CLASSES
 297           /* The following are extensions from ISO 14652.  */
 298           ctype_class_new (lr, ctype, "left_to_right");
 299           ctype_class_new (lr, ctype, "right_to_left");
 300           ctype_class_new (lr, ctype, "num_terminator");
 301           ctype_class_new (lr, ctype, "num_separator");
 302           ctype_class_new (lr, ctype, "segment_separator");
 303           ctype_class_new (lr, ctype, "block_separator");
 304           ctype_class_new (lr, ctype, "direction_control");
 305           ctype_class_new (lr, ctype, "sym_swap_layout");
 306           ctype_class_new (lr, ctype, "char_shape_selector");
 307           ctype_class_new (lr, ctype, "num_shape_selector");
 308           ctype_class_new (lr, ctype, "non_spacing");
 309           ctype_class_new (lr, ctype, "non_spacing_level3");
 310           ctype_class_new (lr, ctype, "normal_connect");
 311           ctype_class_new (lr, ctype, "r_connect");
 312           ctype_class_new (lr, ctype, "no_connect");
 313           ctype_class_new (lr, ctype, "no_connect-space");
 314           ctype_class_new (lr, ctype, "vowel_connect");
 315 #endif
 316
 317           ctype->class_collection_max = charmap->mb_cur_max == 1 ? 256 : 512;
 318           ctype->class_collection
 319             = (uint32_t *) xcalloc (sizeof (unsigned long int),
 320                                     ctype->class_collection_max);
 321           ctype->class_collection_act = 256;
 322
 323           /* Fill character map information.  */
 324           ctype->last_map_idx = MAX_NR_CHARMAP;
 325           ctype_map_new (lr, ctype, "toupper", charmap);
 326           ctype_map_new (lr, ctype, "tolower", charmap);
 327 #ifdef PREDEFINED_CLASSES
 328           ctype_map_new (lr, ctype, "tosymmetric", charmap);
 329 #endif
 330
 331           /* Fill first 256 entries in `toXXX' arrays.  */
 332           for (cnt = 0; cnt < 256; ++cnt)
 333             {
 334               ctype->map_collection[0][cnt] = cnt;
 335               ctype->map_collection[1][cnt] = cnt;
 336 #ifdef PREDEFINED_CLASSES
 337               ctype->map_collection[2][cnt] = cnt;
 338 #endif
 339               ctype->map256_collection[0][cnt] = cnt;
 340               ctype->map256_collection[1][cnt] = cnt;
 341             }
 342
 343           if (enc_not_ascii_compatible)
 344             ctype->to_nonascii = 1;
 345
 346           obstack_init (&ctype->mempool);
 347         }
 348       else
 349         ctype = locale->categories[LC_CTYPE].ctype =
 350           copy_locale->categories[LC_CTYPE].ctype;
 351     }
 352 }
 353
 354
 355 void
 356 ctype_finish (struct localedef_t *locale, const struct charmap_t *charmap)
 357 {
 358   /* See POSIX.2, table 2-6 for the meaning of the following table.  */
 359 #define NCLASS 12
 360   static const struct
 361   {
 362     const char *name;
 363     const char allow[NCLASS];
 364   }
 365   valid_table[NCLASS] =
 366   {
 367     /* The order is important.  See token.h for more information.
 368        M = Always, D = Default, - = Permitted, X = Mutually exclusive  */
 369     { "upper",  "--MX-XDDXXX-" },
 370     { "lower",  "--MX-XDDXXX-" },
 371     { "alpha",  "---X-XDDXXX-" },
 372     { "digit",  "XXX--XDDXXX-" },
 373     { "xdigit", "-----XDDXXX-" },
 374     { "space",  "XXXXX------X" },
 375     { "print",  "---------X--" },
 376     { "graph",  "---------X--" },
 377     { "blank",  "XXXXXM-----X" },
 378     { "cntrl",  "XXXXX-XX--XX" },
 379     { "punct",  "XXXXX-DD-X-X" },
 380     { "alnum",  "-----XDDXXX-" }
 381   };
 382   size_t cnt;
 383   int cls1, cls2;
 384   uint32_t space_value;
 385   struct charseq *space_seq;
 386   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 387   int warned;
 388   const void *key;
 389   size_t len;
 390   void *vdata;
 391   void *curs;
 392
 393   /* Now resolve copying and also handle completely missing definitions.  */
 394   if (ctype == NULL)
 395     {
 396       const char *repertoire_name;
 397
 398       /* First see whether we were supposed to copy.  If yes, find the
 399          actual definition.  */
 400       if (locale->copy_name[LC_CTYPE] != NULL)
 401         {
 402           /* Find the copying locale.  This has to happen transitively since
 403              the locale we are copying from might also copying another one.  */
 404           struct localedef_t *from = locale;
 405
 406           do
 407             from = find_locale (LC_CTYPE, from->copy_name[LC_CTYPE],
 408                                 from->repertoire_name, charmap);
 409           while (from->categories[LC_CTYPE].ctype == NULL
 410                  && from->copy_name[LC_CTYPE] != NULL);
 411
 412           ctype = locale->categories[LC_CTYPE].ctype
 413             = from->categories[LC_CTYPE].ctype;
 414         }
 415
 416       /* If there is still no definition issue an warning and create an
 417          empty one.  */
 418       if (ctype == NULL)
 419         {
 420           if (! be_quiet)
 421             WITH_CUR_LOCALE (error (0, 0, _("\
 422 No definition for %s category found"), "LC_CTYPE"));
 423           ctype_startup (NULL, locale, charmap, NULL, 0);
 424           ctype = locale->categories[LC_CTYPE].ctype;
 425         }
 426
 427       /* Get the repertoire we have to use.  */
 428       repertoire_name = locale->repertoire_name ?: repertoire_global;
 429       if (repertoire_name != NULL)
 430         ctype->repertoire = repertoire_read (repertoire_name);
 431     }
 432
 433   /* We need the name of the currently used 8-bit character set to
 434      make correct conversion between this 8-bit representation and the
 435      ISO 10646 character set used internally for wide characters.  */
 436   ctype->codeset_name = charmap->code_set_name;
 437   if (ctype->codeset_name == NULL)
 438     {
 439       if (! be_quiet)
 440         WITH_CUR_LOCALE (error (0, 0, _("\
 441 No character set name specified in charmap")));
 442       ctype->codeset_name = "//UNKNOWN//";
 443     }
 444
 445   /* Set default value for classes not specified.  */
 446   set_class_defaults (ctype, charmap, ctype->repertoire);
 447
 448   /* Check according to table.  */
 449   for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
 450     {
 451       uint32_t tmp = ctype->class_collection[cnt];
 452
 453       if (tmp != 0)
 454         {
 455           for (cls1 = 0; cls1 < NCLASS; ++cls1)
 456             if ((tmp & _ISwbit (cls1)) != 0)
 457               for (cls2 = 0; cls2 < NCLASS; ++cls2)
 458                 if (valid_table[cls1].allow[cls2] != '-')
 459                   {
 460                     int eq = (tmp & _ISwbit (cls2)) != 0;
 461                     switch (valid_table[cls1].allow[cls2])
 462                       {
 463                       case 'M':
 464                         if (!eq)
 465                           {
 466                             uint32_t value = ctype->charnames[cnt];
 467
 468                             if (!be_quiet)
 469                               WITH_CUR_LOCALE (error (0, 0, _("\
 470 character L'\\u%0*x' in class `%s' must be in class `%s'"),
 471                                                       value > 0xffff ? 8 : 4,
 472                                                       value,
 473                                                       valid_table[cls1].name,
 474                                                       valid_table[cls2].name));
 475                           }
 476                         break;
 477
 478                       case 'X':
 479                         if (eq)
 480                           {
 481                             uint32_t value = ctype->charnames[cnt];
 482
 483                             if (!be_quiet)
 484                               WITH_CUR_LOCALE (error (0, 0, _("\
 485 character L'\\u%0*x' in class `%s' must not be in class `%s'"),
 486                                                       value > 0xffff ? 8 : 4,
 487                                                       value,
 488                                                       valid_table[cls1].name,
 489                                                       valid_table[cls2].name));
 490                           }
 491                         break;
 492
 493                       case 'D':
 494                         ctype->class_collection[cnt] |= _ISwbit (cls2);
 495                         break;
 496
 497                       default:
 498                         WITH_CUR_LOCALE (error (5, 0, _("\
 499 internal error in %s, line %u"), __FUNCTION__, __LINE__));
 500                       }
 501                   }
 502         }
 503     }
 504
 505   for (cnt = 0; cnt < 256; ++cnt)
 506     {
 507       uint32_t tmp = ctype->class256_collection[cnt];
 508
 509       if (tmp != 0)
 510         {
 511           for (cls1 = 0; cls1 < NCLASS; ++cls1)
 512             if ((tmp & _ISbit (cls1)) != 0)
 513               for (cls2 = 0; cls2 < NCLASS; ++cls2)
 514                 if (valid_table[cls1].allow[cls2] != '-')
 515                   {
 516                     int eq = (tmp & _ISbit (cls2)) != 0;
 517                     switch (valid_table[cls1].allow[cls2])
 518                       {
 519                       case 'M':
 520                         if (!eq)
 521                           {
 522                             char buf[17];
 523
 524                             snprintf (buf, sizeof buf, "\\%Zo", cnt);
 525
 526                             if (!be_quiet)
 527                               WITH_CUR_LOCALE (error (0, 0, _("\
 528 character '%s' in class `%s' must be in class `%s'"),
 529                                                       buf,
 530                                                       valid_table[cls1].name,
 531                                                       valid_table[cls2].name));
 532                           }
 533                         break;
 534
 535                       case 'X':
 536                         if (eq)
 537                           {
 538                             char buf[17];
 539
 540                             snprintf (buf, sizeof buf, "\\%Zo", cnt);
 541
 542                             if (!be_quiet)
 543                               WITH_CUR_LOCALE (error (0, 0, _("\
 544 character '%s' in class `%s' must not be in class `%s'"),
 545                                                       buf,
 546                                                       valid_table[cls1].name,
 547                                                       valid_table[cls2].name));
 548                           }
 549                         break;
 550
 551                       case 'D':
 552                         ctype->class256_collection[cnt] |= _ISbit (cls2);
 553                         break;
 554
 555                       default:
 556                         WITH_CUR_LOCALE (error (5, 0, _("\
 557 internal error in %s, line %u"), __FUNCTION__, __LINE__));
 558                       }
 559                   }
 560         }
 561     }
 562
 563   /* ... and now test <SP> as a special case.  */
 564   space_value = 32;
 565   if (((cnt = BITPOS (tok_space),
 566         (ELEM (ctype, class_collection, , space_value)
 567          & BITw (tok_space)) == 0)
 568        || (cnt = BITPOS (tok_blank),
 569            (ELEM (ctype, class_collection, , space_value)
 570             & BITw (tok_blank)) == 0)))
 571     {
 572       if (!be_quiet)
 573         WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"),
 574                                 valid_table[cnt].name));
 575     }
 576   else if (((cnt = BITPOS (tok_punct),
 577              (ELEM (ctype, class_collection, , space_value)
 578               & BITw (tok_punct)) != 0)
 579             || (cnt = BITPOS (tok_graph),
 580                 (ELEM (ctype, class_collection, , space_value)
 581                  & BITw (tok_graph))
 582                 != 0)))
 583     {
 584       if (!be_quiet)
 585         WITH_CUR_LOCALE (error (0, 0, _("\
 586 <SP> character must not be in class `%s'"),
 587                                 valid_table[cnt].name));
 588     }
 589   else
 590     ELEM (ctype, class_collection, , space_value) |= BITw (tok_print);
 591
 592   space_seq = charmap_find_value (charmap, "SP", 2);
 593   if (space_seq == NULL)
 594     space_seq = charmap_find_value (charmap, "space", 5);
 595   if (space_seq == NULL)
 596     space_seq = charmap_find_value (charmap, "U00000020", 9);
 597   if (space_seq == NULL || space_seq->nbytes != 1)
 598     {
 599       if (!be_quiet)
 600         WITH_CUR_LOCALE (error (0, 0, _("\
 601 character <SP> not defined in character map")));
 602     }
 603   else if (((cnt = BITPOS (tok_space),
 604              (ctype->class256_collection[space_seq->bytes[0]]
 605               & BIT (tok_space)) == 0)
 606             || (cnt = BITPOS (tok_blank),
 607                 (ctype->class256_collection[space_seq->bytes[0]]
 608                  & BIT (tok_blank)) == 0)))
 609     {
 610       if (!be_quiet)
 611         WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"),
 612                                 valid_table[cnt].name));
 613     }
 614   else if (((cnt = BITPOS (tok_punct),
 615              (ctype->class256_collection[space_seq->bytes[0]]
 616               & BIT (tok_punct)) != 0)
 617             || (cnt = BITPOS (tok_graph),
 618                 (ctype->class256_collection[space_seq->bytes[0]]
 619                  & BIT (tok_graph)) != 0)))
 620     {
 621       if (!be_quiet)
 622         WITH_CUR_LOCALE (error (0, 0, _("\
 623 <SP> character must not be in class `%s'"),
 624                                 valid_table[cnt].name));
 625     }
 626   else
 627     ctype->class256_collection[space_seq->bytes[0]] |= BIT (tok_print);
 628
 629   /* Check whether all single-byte characters make to their upper/lowercase
 630      equivalent according to the ASCII rules.  */
 631   for (cnt = 'A'; cnt <= 'Z'; ++cnt)
 632     {
 633       uint32_t uppval = ctype->map256_collection[0][cnt];
 634       uint32_t lowval = ctype->map256_collection[1][cnt];
 635       uint32_t lowuppval = ctype->map256_collection[0][lowval];
 636       uint32_t lowlowval = ctype->map256_collection[1][lowval];
 637
 638       if (uppval != cnt
 639           || lowval != cnt + 0x20
 640           || lowuppval != cnt
 641           || lowlowval != cnt + 0x20)
 642         ctype->nonascii_case = 1;
 643     }
 644   for (cnt = 0; cnt < 256; ++cnt)
 645     if (cnt < 'A' || (cnt > 'Z' && cnt < 'a') || cnt > 'z')
 646       if (ctype->map256_collection[0][cnt] != cnt
 647           || ctype->map256_collection[1][cnt] != cnt)
 648         ctype->nonascii_case = 1;
 649
 650   /* Now that the tests are done make sure the name array contains all
 651      characters which are handled in the WIDTH section of the
 652      character set definition file.  */
 653   if (charmap->width_rules != NULL)
 654     for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
 655       {
 656         unsigned char bytes[charmap->mb_cur_max];
 657         int nbytes = charmap->width_rules[cnt].from->nbytes;
 658
 659         /* We have the range of character for which the width is
 660            specified described using byte sequences of the multibyte
 661            charset.  We have to convert this to UCS4 now.  And we
 662            cannot simply convert the beginning and the end of the
 663            sequence, we have to iterate over the byte sequence and
 664            convert it for every single character.  */
 665         memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
 666
 667         while (nbytes < charmap->width_rules[cnt].to->nbytes
 668                || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
 669                           nbytes) <= 0)
 670           {
 671             /* Find the UCS value for `bytes'.  */
 672             int inner;
 673             uint32_t wch;
 674             struct charseq *seq
 675               = charmap_find_symbol (charmap, (char *) bytes, nbytes);
 676
 677             if (seq == NULL)
 678               wch = ILLEGAL_CHAR_VALUE;
 679             else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
 680               wch = seq->ucs4;
 681             else
 682               wch = repertoire_find_value (ctype->repertoire, seq->name,
 683                                            strlen (seq->name));
 684
 685             if (wch != ILLEGAL_CHAR_VALUE)
 686               /* We are only interested in the side-effects of the
 687                  `find_idx' call.  It will add appropriate entries in
 688                  the name array if this is necessary.  */
 689               (void) find_idx (ctype, NULL, NULL, NULL, wch);
 690
 691             /* "Increment" the bytes sequence.  */
 692             inner = nbytes - 1;
 693             while (inner >= 0 && bytes[inner] == 0xff)
 694               --inner;
 695
 696             if (inner < 0)
 697               {
 698                 /* We have to extend the byte sequence.  */
 699                 if (nbytes >= charmap->width_rules[cnt].to->nbytes)
 700                   break;
 701
 702                 bytes[0] = 1;
 703                 memset (&bytes[1], 0, nbytes);
 704                 ++nbytes;
 705               }
 706             else
 707               {
 708                 ++bytes[inner];
 709                 while (++inner < nbytes)
 710                   bytes[inner] = 0;
 711               }
 712           }
 713       }
 714
 715   /* Now set all the other characters of the character set to the
 716      default width.  */
 717   curs = NULL;
 718   while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
 719     {
 720       struct charseq *data = (struct charseq *) vdata;
 721
 722       if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
 723         data->ucs4 = repertoire_find_value (ctype->repertoire,
 724                                             data->name, len);
 725
 726       if (data->ucs4 != ILLEGAL_CHAR_VALUE)
 727         (void) find_idx (ctype, NULL, NULL, NULL, data->ucs4);
 728     }
 729
 730   /* There must be a multiple of 10 digits.  */
 731   if (ctype->mbdigits_act % 10 != 0)
 732     {
 733       assert (ctype->mbdigits_act == ctype->wcdigits_act);
 734       ctype->wcdigits_act -= ctype->mbdigits_act % 10;
 735       ctype->mbdigits_act -= ctype->mbdigits_act % 10;
 736       WITH_CUR_LOCALE (error (0, 0, _("\
 737 `digit' category has not entries in groups of ten")));
 738     }
 739
 740   /* Check the input digits.  There must be a multiple of ten available.
 741      In each group it could be that one or the other character is missing.
 742      In this case the whole group must be removed.  */
 743   cnt = 0;
 744   while (cnt < ctype->mbdigits_act)
 745     {
 746       size_t inner;
 747       for (inner = 0; inner < 10; ++inner)
 748         if (ctype->mbdigits[cnt + inner] == NULL)
 749           break;
 750
 751       if (inner == 10)
 752         cnt += 10;
 753       else
 754         {
 755           /* Remove the group.  */
 756           memmove (&ctype->mbdigits[cnt], &ctype->mbdigits[cnt + 10],
 757                    ((ctype->wcdigits_act - cnt - 10)
 758                     * sizeof (ctype->mbdigits[0])));
 759           ctype->mbdigits_act -= 10;
 760         }
 761     }
 762
 763   /* If no input digits are given use the default.  */
 764   if (ctype->mbdigits_act == 0)
 765     {
 766       if (ctype->mbdigits_max == 0)
 767         {
 768           ctype->mbdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
 769                                            10 * sizeof (struct charseq *));
 770           ctype->mbdigits_max = 10;
 771         }
 772
 773       for (cnt = 0; cnt < 10; ++cnt)
 774         {
 775           ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
 776                                                       (char *) digits + cnt, 1);
 777           if (ctype->mbdigits[cnt] == NULL)
 778             {
 779               ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
 780                                                           longnames[cnt],
 781                                                           strlen (longnames[cnt]));
 782               if (ctype->mbdigits[cnt] == NULL)
 783                 {
 784                   /* Hum, this ain't good.  */
 785                   WITH_CUR_LOCALE (error (0, 0, _("\
 786 no input digits defined and none of the standard names in the charmap")));
 787
 788                   ctype->mbdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
 789                                                         sizeof (struct charseq) + 1);
 790
 791                   /* This is better than nothing.  */
 792                   ctype->mbdigits[cnt]->bytes[0] = digits[cnt];
 793                   ctype->mbdigits[cnt]->nbytes = 1;
 794                 }
 795             }
 796         }
 797
 798       ctype->mbdigits_act = 10;
 799     }
 800
 801   /* Check the wide character input digits.  There must be a multiple
 802      of ten available.  In each group it could be that one or the other
 803      character is missing.  In this case the whole group must be
 804      removed.  */
 805   cnt = 0;
 806   while (cnt < ctype->wcdigits_act)
 807     {
 808       size_t inner;
 809       for (inner = 0; inner < 10; ++inner)
 810         if (ctype->wcdigits[cnt + inner] == ILLEGAL_CHAR_VALUE)
 811           break;
 812
 813       if (inner == 10)
 814         cnt += 10;
 815       else
 816         {
 817           /* Remove the group.  */
 818           memmove (&ctype->wcdigits[cnt], &ctype->wcdigits[cnt + 10],
 819                    ((ctype->wcdigits_act - cnt - 10)
 820                     * sizeof (ctype->wcdigits[0])));
 821           ctype->wcdigits_act -= 10;
 822         }
 823     }
 824
 825   /* If no input digits are given use the default.  */
 826   if (ctype->wcdigits_act == 0)
 827     {
 828       if (ctype->wcdigits_max == 0)
 829         {
 830           ctype->wcdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
 831                                            10 * sizeof (uint32_t));
 832           ctype->wcdigits_max = 10;
 833         }
 834
 835       for (cnt = 0; cnt < 10; ++cnt)
 836         ctype->wcdigits[cnt] = L'0' + cnt;
 837
 838       ctype->mbdigits_act = 10;
 839     }
 840
 841   /* Check the outdigits.  */
 842   warned = 0;
 843   for (cnt = 0; cnt < 10; ++cnt)
 844     if (ctype->mboutdigits[cnt] == NULL)
 845       {
 846         static struct charseq replace[2];
 847
 848         if (!warned)
 849           {
 850             WITH_CUR_LOCALE (error (0, 0, _("\
 851 not all characters used in `outdigit' are available in the charmap")));
 852             warned = 1;
 853           }
 854
 855         replace[0].nbytes = 1;
 856         replace[0].bytes[0] = '?';
 857         replace[0].bytes[1] = '\0';
 858         ctype->mboutdigits[cnt] = &replace[0];
 859       }
 860
 861   warned = 0;
 862   for (cnt = 0; cnt < 10; ++cnt)
 863     if (ctype->wcoutdigits[cnt] == 0)
 864       {
 865         if (!warned)
 866           {
 867             WITH_CUR_LOCALE (error (0, 0, _("\
 868 not all characters used in `outdigit' are available in the repertoire")));
 869             warned = 1;
 870           }
 871
 872         ctype->wcoutdigits[cnt] = L'?';
 873       }
 874
 875   /* Sort the entries in the translit_ignore list.  */
 876   if (ctype->translit_ignore != NULL)
 877     {
 878       struct translit_ignore_t *firstp = ctype->translit_ignore;
 879       struct translit_ignore_t *runp;
 880
 881       ctype->ntranslit_ignore = 1;
 882
 883       for (runp = firstp->next; runp != NULL; runp = runp->next)
 884         {
 885           struct translit_ignore_t *lastp = NULL;
 886           struct translit_ignore_t *cmpp;
 887
 888           ++ctype->ntranslit_ignore;
 889
 890           for (cmpp = firstp; cmpp != NULL; lastp = cmpp, cmpp = cmpp->next)
 891             if (runp->from < cmpp->from)
 892               break;
 893
 894           runp->next = lastp;
 895           if (lastp == NULL)
 896             firstp = runp;
 897         }
 898
 899       ctype->translit_ignore = firstp;
 900     }
 901 }
 902
 903
 904 void
 905 ctype_output (struct localedef_t *locale, const struct charmap_t *charmap,
 906               const char *output_path)
 907 {
 908   static const char nulbytes[4] = { 0, 0, 0, 0 };
 909   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 910   const size_t nelems = (_NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1)
 911                          + ctype->nr_charclass + ctype->map_collection_nr);
 912   struct iovec *iov = alloca (sizeof *iov
 913                               * (2 + nelems + 2 * ctype->nr_charclass
 914                                  + ctype->map_collection_nr + 4));
 915   struct locale_file data;
 916   uint32_t *idx = alloca (sizeof *idx * (nelems + 1));
 917   uint32_t default_missing_len;
 918   size_t elem, cnt, offset, total;
 919   char *cp;
 920
 921   /* Now prepare the output: Find the sizes of the table we can use.  */
 922   allocate_arrays (ctype, charmap, ctype->repertoire);
 923
 924   data.magic = LIMAGIC (LC_CTYPE);
 925   data.n = nelems;
 926   iov[0].iov_base = (void *) &data;
 927   iov[0].iov_len = sizeof (data);
 928
 929   iov[1].iov_base = (void *) idx;
 930   iov[1].iov_len = nelems * sizeof (uint32_t);
 931
 932   idx[0] = iov[0].iov_len + iov[1].iov_len;
 933   offset = 0;
 934
 935   for (elem = 0; elem < nelems; ++elem)
 936     {
 937       if (elem < _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1))
 938         switch (elem)
 939           {
 940 #define CTYPE_EMPTY(name) \
 941           case name:                                                          \
 942             iov[2 + elem + offset].iov_base = NULL;                           \
 943             iov[2 + elem + offset].iov_len = 0;                               \
 944             idx[elem + 1] = idx[elem];                                        \
 945             break
 946
 947           CTYPE_EMPTY(_NL_CTYPE_GAP1);
 948           CTYPE_EMPTY(_NL_CTYPE_GAP2);
 949           CTYPE_EMPTY(_NL_CTYPE_GAP3);
 950           CTYPE_EMPTY(_NL_CTYPE_GAP4);
 951           CTYPE_EMPTY(_NL_CTYPE_GAP5);
 952           CTYPE_EMPTY(_NL_CTYPE_GAP6);
 953
 954 #define CTYPE_DATA(name, base, len)                                           \
 955           case _NL_ITEM_INDEX (name):                                         \
 956             iov[2 + elem + offset].iov_base = (base);                         \
 957             iov[2 + elem + offset].iov_len = (len);                           \
 958             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;       \
 959             break
 960
 961           CTYPE_DATA (_NL_CTYPE_CLASS,
 962                       ctype->ctype_b,
 963                       (256 + 128) * sizeof (char_class_t));
 964
 965           CTYPE_DATA (_NL_CTYPE_TOUPPER,
 966                       ctype->map_b[0],
 967                       (256 + 128) * sizeof (uint32_t));
 968           CTYPE_DATA (_NL_CTYPE_TOLOWER,
 969                       ctype->map_b[1],
 970                       (256 + 128) * sizeof (uint32_t));
 971
 972           CTYPE_DATA (_NL_CTYPE_TOUPPER32,
 973                       ctype->map32_b[0],
 974                       256 * sizeof (uint32_t));
 975           CTYPE_DATA (_NL_CTYPE_TOLOWER32,
 976                       ctype->map32_b[1],
 977                       256 * sizeof (uint32_t));
 978
 979           CTYPE_DATA (_NL_CTYPE_CLASS32,
 980                       ctype->ctype32_b,
 981                       256 * sizeof (char_class32_t));
 982
 983           CTYPE_DATA (_NL_CTYPE_CLASS_OFFSET,
 984                       &ctype->class_offset, sizeof (uint32_t));
 985
 986           CTYPE_DATA (_NL_CTYPE_MAP_OFFSET,
 987                       &ctype->map_offset, sizeof (uint32_t));
 988
 989           CTYPE_DATA (_NL_CTYPE_TRANSLIT_TAB_SIZE,
 990                       &ctype->translit_idx_size, sizeof (uint32_t));
 991
 992           CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_IDX,
 993                       ctype->translit_from_idx,
 994                       ctype->translit_idx_size * sizeof (uint32_t));
 995
 996           CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_TBL,
 997                       ctype->translit_from_tbl,
 998                       ctype->translit_from_tbl_size);
 999
1000           CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_IDX,
1001                       ctype->translit_to_idx,
1002                       ctype->translit_idx_size * sizeof (uint32_t));
1003
1004           CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_TBL,
1005                       ctype->translit_to_tbl, ctype->translit_to_tbl_size);
1006
1007           case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
1008             /* The class name array.  */
1009             total = 0;
1010             for (cnt = 0; cnt < ctype->nr_charclass; ++cnt, ++offset)
1011               {
1012                 iov[2 + elem + offset].iov_base
1013                   = (void *) ctype->classnames[cnt];
1014                 iov[2 + elem + offset].iov_len
1015                   = strlen (ctype->classnames[cnt]) + 1;
1016                 total += iov[2 + elem + offset].iov_len;
1017               }
1018             iov[2 + elem + offset].iov_base = (void *) nulbytes;
1019             iov[2 + elem + offset].iov_len = 4 - (total % 4);
1020             total += 4 - (total % 4);
1021
1022             idx[elem + 1] = idx[elem] + total;
1023             break;
1024
1025           case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
1026             /* The class name array.  */
1027             total = 0;
1028             for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt, ++offset)
1029               {
1030                 iov[2 + elem + offset].iov_base
1031                   = (void *) ctype->mapnames[cnt];
1032                 iov[2 + elem + offset].iov_len
1033                   = strlen (ctype->mapnames[cnt]) + 1;
1034                 total += iov[2 + elem + offset].iov_len;
1035               }
1036             iov[2 + elem + offset].iov_base = (void *) nulbytes;
1037             iov[2 + elem + offset].iov_len = 4 - (total % 4);
1038             total += 4 - (total % 4);
1039
1040             idx[elem + 1] = idx[elem] + total;
1041             break;
1042
1043           CTYPE_DATA (_NL_CTYPE_WIDTH,
1044                       ctype->width.iov_base,
1045                       ctype->width.iov_len);
1046
1047           CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX,
1048                       &ctype->mb_cur_max, sizeof (uint32_t));
1049
1050           case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
1051             total = strlen (ctype->codeset_name) + 1;
1052             if (total % 4 == 0)
1053               iov[2 + elem + offset].iov_base = (char *) ctype->codeset_name;
1054             else
1055               {
1056                 iov[2 + elem + offset].iov_base = alloca ((total + 3) & ~3);
1057                 memset (mempcpy (iov[2 + elem + offset].iov_base,
1058                                  ctype->codeset_name, total),
1059                         '\0', 4 - (total & 3));
1060                 total = (total + 3) & ~3;
1061               }
1062             iov[2 + elem + offset].iov_len = total;
1063             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1064             break;
1065
1066
1067           CTYPE_DATA (_NL_CTYPE_MAP_TO_NONASCII,
1068                       &ctype->to_nonascii, sizeof (uint32_t));
1069
1070           CTYPE_DATA (_NL_CTYPE_NONASCII_CASE,
1071                       &ctype->nonascii_case, sizeof (uint32_t));
1072
1073           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN):
1074             iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
1075             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1076             *(uint32_t *) iov[2 + elem + offset].iov_base =
1077               ctype->mbdigits_act / 10;
1078             idx[elem + 1] = idx[elem] + sizeof (uint32_t);
1079             break;
1080
1081           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN):
1082             /* Align entries.  */
1083             iov[2 + elem + offset].iov_base = (void *) nulbytes;
1084             iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1085             idx[elem] += iov[2 + elem + offset].iov_len;
1086             ++offset;
1087
1088             iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
1089             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1090             *(uint32_t *) iov[2 + elem + offset].iov_base =
1091               ctype->wcdigits_act / 10;
1092             idx[elem + 1] = idx[elem] + sizeof (uint32_t);
1093             break;
1094
1095           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB):
1096             /* Compute the length of all possible characters.  For INDIGITS
1097                there might be more than one.  We simply concatenate all of
1098                them with a NUL byte following.  The NUL byte wouldn't be
1099                necessary but it makes it easier for the user.  */
1100             total = 0;
1101
1102             for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
1103                  cnt < ctype->mbdigits_act; cnt += 10)
1104               total += ctype->mbdigits[cnt]->nbytes + 1;
1105             iov[2 + elem + offset].iov_base = (char *) alloca (total);
1106             iov[2 + elem + offset].iov_len = total;
1107
1108             cp = iov[2 + elem + offset].iov_base;
1109             for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
1110                  cnt < ctype->mbdigits_act; cnt += 10)
1111               {
1112                 cp = mempcpy (cp, ctype->mbdigits[cnt]->bytes,
1113                               ctype->mbdigits[cnt]->nbytes);
1114                 *cp++ = '\0';
1115               }
1116             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1117             break;
1118
1119           case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB):
1120             /* Compute the length of all possible characters.  For INDIGITS
1121                there might be more than one.  We simply concatenate all of
1122                them with a NUL byte following.  The NUL byte wouldn't be
1123                necessary but it makes it easier for the user.  */
1124             cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB);
1125             total = ctype->mboutdigits[cnt]->nbytes + 1;
1126             iov[2 + elem + offset].iov_base = (char *) alloca (total);
1127             iov[2 + elem + offset].iov_len = total;
1128
1129             *(char *) mempcpy (iov[2 + elem + offset].iov_base,
1130                                ctype->mboutdigits[cnt]->bytes,
1131                                ctype->mboutdigits[cnt]->nbytes) = '\0';
1132             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1133             break;
1134
1135           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC):
1136             total = ctype->wcdigits_act / 10;
1137
1138             iov[2 + elem + offset].iov_base =
1139               (uint32_t *) alloca (total * sizeof (uint32_t));
1140             iov[2 + elem + offset].iov_len = total * sizeof (uint32_t);
1141
1142             for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC);
1143                  cnt < ctype->wcdigits_act; cnt += 10)
1144               ((uint32_t *) iov[2 + elem + offset].iov_base)[cnt / 10]
1145                 = ctype->wcdigits[cnt];
1146             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1147             break;
1148
1149           case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC):
1150             /* Align entries.  */
1151             iov[2 + elem + offset].iov_base = (void *) nulbytes;
1152             iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1153             idx[elem] += iov[2 + elem + offset].iov_len;
1154             ++offset;
1155             /* FALLTRHOUGH */
1156
1157           case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT1_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC):
1158             cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC);
1159             iov[2 + elem + offset].iov_base = &ctype->wcoutdigits[cnt];
1160             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1161             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1162             break;
1163
1164           case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN):
1165             /* Align entries.  */
1166             iov[2 + elem + offset].iov_base = (void *) nulbytes;
1167             iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1168             idx[elem] += iov[2 + elem + offset].iov_len;
1169             ++offset;
1170
1171             default_missing_len = (ctype->default_missing
1172                                    ? wcslen ((wchar_t *)ctype->default_missing)
1173                                    : 0);
1174             iov[2 + elem + offset].iov_base = &default_missing_len;
1175             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1176             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1177             break;
1178
1179           case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING):
1180             iov[2 + elem + offset].iov_base =
1181               ctype->default_missing ?: (uint32_t *) L"";
1182             iov[2 + elem + offset].iov_len =
1183               wcslen (iov[2 + elem + offset].iov_base) * sizeof (uint32_t);
1184             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1185             break;
1186
1187           case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE_LEN):
1188             /* Align entries.  */
1189             iov[2 + elem + offset].iov_base = (void *) nulbytes;
1190             iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1191             idx[elem] += iov[2 + elem + offset].iov_len;
1192             ++offset;
1193
1194             iov[2 + elem + offset].iov_base = &ctype->ntranslit_ignore;
1195             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1196             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1197             break;
1198
1199           case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE):
1200             {
1201               uint32_t *ranges = (uint32_t *) alloca (ctype->ntranslit_ignore
1202                                                       * 3 * sizeof (uint32_t));
1203               struct translit_ignore_t *runp;
1204
1205               iov[2 + elem + offset].iov_base = ranges;
1206               iov[2 + elem + offset].iov_len = (ctype->ntranslit_ignore
1207                                                 * 3 * sizeof (uint32_t));
1208
1209               for (runp = ctype->translit_ignore; runp != NULL;
1210                    runp = runp->next)
1211                 {
1212                   *ranges++ = runp->from;
1213                   *ranges++ = runp->to;
1214                   *ranges++ = runp->step;
1215                 }
1216             }
1217             /* Remove the following line in case a new entry is added
1218                after _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN.  */
1219             if (elem < nelems)
1220               idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1221             break;
1222
1223           default:
1224             assert (! "unknown CTYPE element");
1225           }
1226       else
1227         {
1228           /* Handle extra maps.  */
1229           size_t nr = elem - _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
1230           if (nr < ctype->nr_charclass)
1231             {
1232               iov[2 + elem + offset].iov_base = ctype->class_b[nr];
1233               iov[2 + elem + offset].iov_len = 256 / 32 * sizeof (uint32_t);
1234               idx[elem] += iov[2 + elem + offset].iov_len;
1235               ++offset;
1236
1237               iov[2 + elem + offset] = ctype->class_3level[nr];
1238             }
1239           else
1240             {
1241               nr -= ctype->nr_charclass;
1242               assert (nr < ctype->map_collection_nr);
1243               iov[2 + elem + offset] = ctype->map_3level[nr];
1244             }
1245           idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1246         }
1247     }
1248
1249   assert (2 + elem + offset == (nelems + 2 * ctype->nr_charclass
1250                                 + ctype->map_collection_nr + 4 + 2));
1251
1252   write_locale_data (output_path, LC_CTYPE, "LC_CTYPE", 2 + elem + offset,
1253                      iov);
1254 }
1255
1256
1257 /* Local functions.  */
1258 static void
1259 ctype_class_new (struct linereader *lr, struct locale_ctype_t *ctype,
1260                  const char *name)
1261 {
1262   size_t cnt;
1263
1264   for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
1265     if (strcmp (ctype->classnames[cnt], name) == 0)
1266       break;
1267
1268   if (cnt < ctype->nr_charclass)
1269     {
1270       lr_error (lr, _("character class `%s' already defined"), name);
1271       return;
1272     }
1273
1274   if (ctype->nr_charclass == MAX_NR_CHARCLASS)
1275     /* Exit code 2 is prescribed in P1003.2b.  */
1276     WITH_CUR_LOCALE (error (2, 0, _("\
1277 implementation limit: no more than %Zd character classes allowed"),
1278                             MAX_NR_CHARCLASS));
1279
1280   ctype->classnames[ctype->nr_charclass++] = name;
1281 }
1282
1283
1284 static void
1285 ctype_map_new (struct linereader *lr, struct locale_ctype_t *ctype,
1286                const char *name, const struct charmap_t *charmap)
1287 {
1288   size_t max_chars = 0;
1289   size_t cnt;
1290
1291   for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
1292     {
1293       if (strcmp (ctype->mapnames[cnt], name) == 0)
1294         break;
1295
1296       if (max_chars < ctype->map_collection_max[cnt])
1297         max_chars = ctype->map_collection_max[cnt];
1298     }
1299
1300   if (cnt < ctype->map_collection_nr)
1301     {
1302       lr_error (lr, _("character map `%s' already defined"), name);
1303       return;
1304     }
1305
1306   if (ctype->map_collection_nr == MAX_NR_CHARMAP)
1307     /* Exit code 2 is prescribed in P1003.2b.  */
1308     WITH_CUR_LOCALE (error (2, 0, _("\
1309 implementation limit: no more than %d character maps allowed"),
1310                             MAX_NR_CHARMAP));
1311
1312   ctype->mapnames[cnt] = name;
1313
1314   if (max_chars == 0)
1315     ctype->map_collection_max[cnt] = charmap->mb_cur_max == 1 ? 256 : 512;
1316   else
1317     ctype->map_collection_max[cnt] = max_chars;
1318
1319   ctype->map_collection[cnt] = (uint32_t *)
1320     xcalloc (sizeof (uint32_t), ctype->map_collection_max[cnt]);
1321   ctype->map_collection_act[cnt] = 256;
1322
1323   ++ctype->map_collection_nr;
1324 }
1325
1326
1327 /* We have to be prepared that TABLE, MAX, and ACT can be NULL.  This
1328    is possible if we only want to extend the name array.  */
1329 static uint32_t *
1330 find_idx (struct locale_ctype_t *ctype, uint32_t **table, size_t *max,
1331           size_t *act, uint32_t idx)
1332 {
1333   size_t cnt;
1334
1335   if (idx < 256)
1336     return table == NULL ? NULL : &(*table)[idx];
1337
1338   /* Use the charnames_idx lookup table instead of the slow search loop.  */
1339 #if 1
1340   cnt = idx_table_get (&ctype->charnames_idx, idx);
1341   if (cnt == EMPTY)
1342     /* Not found.  */
1343     cnt = ctype->charnames_act;
1344 #else
1345   for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
1346     if (ctype->charnames[cnt] == idx)
1347       break;
1348 #endif
1349
1350   /* We have to distinguish two cases: the name is found or not.  */
1351   if (cnt == ctype->charnames_act)
1352     {
1353       /* Extend the name array.  */
1354       if (ctype->charnames_act == ctype->charnames_max)
1355         {
1356           ctype->charnames_max *= 2;
1357           ctype->charnames = (uint32_t *)
1358             xrealloc (ctype->charnames,
1359                       sizeof (uint32_t) * ctype->charnames_max);
1360         }
1361       ctype->charnames[ctype->charnames_act++] = idx;
1362       idx_table_add (&ctype->charnames_idx, idx, cnt);
1363     }
1364
1365   if (table == NULL)
1366     /* We have done everything we are asked to do.  */
1367     return NULL;
1368
1369   if (max == NULL)
1370     /* The caller does not want to extend the table.  */
1371     return (cnt >= *act ? NULL : &(*table)[cnt]);
1372
1373   if (cnt >= *act)
1374     {
1375       if (cnt >= *max)
1376         {
1377           size_t old_max = *max;
1378           do
1379             *max *= 2;
1380           while (*max <= cnt);
1381
1382           *table =
1383             (uint32_t *) xrealloc (*table, *max * sizeof (uint32_t));
1384           memset (&(*table)[old_max], '\0',
1385                   (*max - old_max) * sizeof (uint32_t));
1386         }
1387
1388       *act = cnt + 1;
1389     }
1390
1391   return &(*table)[cnt];
1392 }
1393
1394
1395 static int
1396 get_character (struct token *now, const struct charmap_t *charmap,
1397                struct repertoire_t *repertoire,
1398                struct charseq **seqp, uint32_t *wchp)
1399 {
1400   if (now->tok == tok_bsymbol)
1401     {
1402       /* This will hopefully be the normal case.  */
1403       *wchp = repertoire_find_value (repertoire, now->val.str.startmb,
1404                                      now->val.str.lenmb);
1405       *seqp = charmap_find_value (charmap, now->val.str.startmb,
1406                                   now->val.str.lenmb);
1407     }
1408   else if (now->tok == tok_ucs4)
1409     {
1410       char utmp[10];
1411
1412       snprintf (utmp, sizeof (utmp), "U%08X", now->val.ucs4);
1413       *seqp = charmap_find_value (charmap, utmp, 9);
1414
1415       if (*seqp == NULL)
1416         *seqp = repertoire_find_seq (repertoire, now->val.ucs4);
1417
1418       if (*seqp == NULL)
1419         {
1420           /* Compute the value in the charmap from the UCS value.  */
1421           const char *symbol = repertoire_find_symbol (repertoire,
1422                                                        now->val.ucs4);
1423
1424           if (symbol == NULL)
1425             *seqp = NULL;
1426           else
1427             *seqp = charmap_find_value (charmap, symbol, strlen (symbol));
1428
1429           if (*seqp == NULL)
1430             {
1431               if (repertoire != NULL)
1432                 {
1433                   /* Insert a negative entry.  */
1434                   static const struct charseq negative
1435                     = { .ucs4 = ILLEGAL_CHAR_VALUE };
1436                   uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1437                                                   sizeof (uint32_t));
1438                   *newp = now->val.ucs4;
1439
1440                   insert_entry (&repertoire->seq_table, newp,
1441                                 sizeof (uint32_t), (void *) &negative);
1442                 }
1443             }
1444           else
1445             (*seqp)->ucs4 = now->val.ucs4;
1446         }
1447       else if ((*seqp)->ucs4 != now->val.ucs4)
1448         *seqp = NULL;
1449
1450       *wchp = now->val.ucs4;
1451     }
1452   else if (now->tok == tok_charcode)
1453     {
1454       /* We must map from the byte code to UCS4.  */
1455       *seqp = charmap_find_symbol (charmap, now->val.str.startmb,
1456                                    now->val.str.lenmb);
1457
1458       if (*seqp == NULL)
1459         *wchp = ILLEGAL_CHAR_VALUE;
1460       else
1461         {
1462           if ((*seqp)->ucs4 == UNINITIALIZED_CHAR_VALUE)
1463             (*seqp)->ucs4 = repertoire_find_value (repertoire, (*seqp)->name,
1464                                                    strlen ((*seqp)->name));
1465           *wchp = (*seqp)->ucs4;
1466         }
1467     }
1468   else
1469     return 1;
1470
1471   return 0;
1472 }
1473
1474
1475 /* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and
1476    the .(2). counterparts.  */
1477 static void
1478 charclass_symbolic_ellipsis (struct linereader *ldfile,
1479                              struct locale_ctype_t *ctype,
1480                              const struct charmap_t *charmap,
1481                              struct repertoire_t *repertoire,
1482                              struct token *now,
1483                              const char *last_str,
1484                              unsigned long int class256_bit,
1485                              unsigned long int class_bit, int base,
1486                              int ignore_content, int handle_digits, int step)
1487 {
1488   const char *nowstr = now->val.str.startmb;
1489   char tmp[now->val.str.lenmb + 1];
1490   const char *cp;
1491   char *endp;
1492   unsigned long int from;
1493   unsigned long int to;
1494
1495   /* We have to compute the ellipsis values using the symbolic names.  */
1496   assert (last_str != NULL);
1497
1498   if (strlen (last_str) != now->val.str.lenmb)
1499     {
1500     invalid_range:
1501       lr_error (ldfile,
1502                 _("`%s' and `%.*s' are not valid names for symbolic range"),
1503                 last_str, (int) now->val.str.lenmb, nowstr);
1504       return;
1505     }
1506
1507   if (memcmp (last_str, nowstr, now->val.str.lenmb) == 0)
1508     /* Nothing to do, the names are the same.  */
1509     return;
1510
1511   for (cp = last_str; *cp == *(nowstr + (cp - last_str)); ++cp)
1512     ;
1513
1514   errno = 0;
1515   from = strtoul (cp, &endp, base);
1516   if ((from == UINT_MAX && errno == ERANGE) || *endp != '\0')
1517     goto invalid_range;
1518
1519   to = strtoul (nowstr + (cp - last_str), &endp, base);
1520   if ((to == UINT_MAX && errno == ERANGE)
1521       || (endp - nowstr) != now->val.str.lenmb || from >= to)
1522     goto invalid_range;
1523
1524   /* OK, we have a range FROM - TO.  Now we can create the symbolic names.  */
1525   if (!ignore_content)
1526     {
1527       now->val.str.startmb = tmp;
1528       while ((from += step) <= to)
1529         {
1530           struct charseq *seq;
1531           uint32_t wch;
1532
1533           sprintf (tmp, (base == 10 ? "%.*s%0*ld" : "%.*s%0*lX"),
1534                    (int) (cp - last_str), last_str,
1535                    (int) (now->val.str.lenmb - (cp - last_str)),
1536                    from);
1537
1538           get_character (now, charmap, repertoire, &seq, &wch);
1539
1540           if (seq != NULL && seq->nbytes == 1)
1541             /* Yep, we can store information about this byte sequence.  */
1542             ctype->class256_collection[seq->bytes[0]] |= class256_bit;
1543
1544           if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1545             /* We have the UCS4 position.  */
1546             *find_idx (ctype, &ctype->class_collection,
1547                        &ctype->class_collection_max,
1548                        &ctype->class_collection_act, wch) |= class_bit;
1549
1550           if (handle_digits == 1)
1551             {
1552               /* We must store the digit values.  */
1553               if (ctype->mbdigits_act == ctype->mbdigits_max)
1554                 {
1555                   ctype->mbdigits_max *= 2;
1556                   ctype->mbdigits = xrealloc (ctype->mbdigits,
1557                                               (ctype->mbdigits_max
1558                                                * sizeof (char *)));
1559                   ctype->wcdigits_max *= 2;
1560                   ctype->wcdigits = xrealloc (ctype->wcdigits,
1561                                               (ctype->wcdigits_max
1562                                                * sizeof (uint32_t)));
1563                 }
1564
1565               ctype->mbdigits[ctype->mbdigits_act++] = seq;
1566               ctype->wcdigits[ctype->wcdigits_act++] = wch;
1567             }
1568           else if (handle_digits == 2)
1569             {
1570               /* We must store the digit values.  */
1571               if (ctype->outdigits_act >= 10)
1572                 {
1573                   lr_error (ldfile, _("\
1574 %s: field `%s' does not contain exactly ten entries"),
1575                             "LC_CTYPE", "outdigit");
1576                   return;
1577                 }
1578
1579               ctype->mboutdigits[ctype->outdigits_act] = seq;
1580               ctype->wcoutdigits[ctype->outdigits_act] = wch;
1581               ++ctype->outdigits_act;
1582             }
1583         }
1584     }
1585 }
1586
1587
1588 /* Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'.  */
1589 static void
1590 charclass_ucs4_ellipsis (struct linereader *ldfile,
1591                          struct locale_ctype_t *ctype,
1592                          const struct charmap_t *charmap,
1593                          struct repertoire_t *repertoire,
1594                          struct token *now, uint32_t last_wch,
1595                          unsigned long int class256_bit,
1596                          unsigned long int class_bit, int ignore_content,
1597                          int handle_digits, int step)
1598 {
1599   if (last_wch > now->val.ucs4)
1600     {
1601       lr_error (ldfile, _("\
1602 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1603                 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, now->val.ucs4,
1604                 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, last_wch);
1605       return;
1606     }
1607
1608   if (!ignore_content)
1609     while ((last_wch += step) <= now->val.ucs4)
1610       {
1611         /* We have to find out whether there is a byte sequence corresponding
1612            to this UCS4 value.  */
1613         struct charseq *seq;
1614         char utmp[10];
1615
1616         snprintf (utmp, sizeof (utmp), "U%08X", last_wch);
1617         seq = charmap_find_value (charmap, utmp, 9);
1618         if (seq == NULL)
1619           {
1620             snprintf (utmp, sizeof (utmp), "U%04X", last_wch);
1621             seq = charmap_find_value (charmap, utmp, 5);
1622           }
1623
1624         if (seq == NULL)
1625           /* Try looking in the repertoire map.  */
1626           seq = repertoire_find_seq (repertoire, last_wch);
1627
1628         /* If this is the first time we look for this sequence create a new
1629            entry.  */
1630         if (seq == NULL)
1631           {
1632             static const struct charseq negative
1633               = { .ucs4 = ILLEGAL_CHAR_VALUE };
1634
1635             /* Find the symbolic name for this UCS4 value.  */
1636             if (repertoire != NULL)
1637               {
1638                 const char *symbol = repertoire_find_symbol (repertoire,
1639                                                              last_wch);
1640                 uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1641                                                 sizeof (uint32_t));
1642                 *newp = last_wch;
1643
1644                 if (symbol != NULL)
1645                   /* We have a name, now search the multibyte value.  */
1646                   seq = charmap_find_value (charmap, symbol, strlen (symbol));
1647
1648                 if (seq == NULL)
1649                   /* We have to create a fake entry.  */
1650                   seq = (struct charseq *) &negative;
1651                 else
1652                   seq->ucs4 = last_wch;
1653
1654                 insert_entry (&repertoire->seq_table, newp, sizeof (uint32_t),
1655                               seq);
1656               }
1657             else
1658               /* We have to create a fake entry.  */
1659               seq = (struct charseq *) &negative;
1660           }
1661
1662         /* We have a name, now search the multibyte value.  */
1663         if (seq->ucs4 == last_wch && seq->nbytes == 1)
1664           /* Yep, we can store information about this byte sequence.  */
1665           ctype->class256_collection[(size_t) seq->bytes[0]]
1666             |= class256_bit;
1667
1668         /* And of course we have the UCS4 position.  */
1669         if (class_bit != 0)
1670           *find_idx (ctype, &ctype->class_collection,
1671                      &ctype->class_collection_max,
1672                      &ctype->class_collection_act, last_wch) |= class_bit;
1673
1674         if (handle_digits == 1)
1675           {
1676             /* We must store the digit values.  */
1677             if (ctype->mbdigits_act == ctype->mbdigits_max)
1678               {
1679                 ctype->mbdigits_max *= 2;
1680                 ctype->mbdigits = xrealloc (ctype->mbdigits,
1681                                             (ctype->mbdigits_max
1682                                              * sizeof (char *)));
1683                 ctype->wcdigits_max *= 2;
1684                 ctype->wcdigits = xrealloc (ctype->wcdigits,
1685                                             (ctype->wcdigits_max
1686                                              * sizeof (uint32_t)));
1687               }
1688
1689             ctype->mbdigits[ctype->mbdigits_act++] = (seq->ucs4 == last_wch
1690                                                       ? seq : NULL);
1691             ctype->wcdigits[ctype->wcdigits_act++] = last_wch;
1692           }
1693         else if (handle_digits == 2)
1694           {
1695             /* We must store the digit values.  */
1696             if (ctype->outdigits_act >= 10)
1697               {
1698                 lr_error (ldfile, _("\
1699 %s: field `%s' does not contain exactly ten entries"),
1700                           "LC_CTYPE", "outdigit");
1701                 return;
1702               }
1703
1704             ctype->mboutdigits[ctype->outdigits_act] = (seq->ucs4 == last_wch
1705                                                         ? seq : NULL);
1706             ctype->wcoutdigits[ctype->outdigits_act] = last_wch;
1707             ++ctype->outdigits_act;
1708           }
1709       }
1710 }
1711
1712
1713 /* Ellipsis as in `/xea/x12.../xea/x34'.  */
1714 static void
1715 charclass_charcode_ellipsis (struct linereader *ldfile,
1716                              struct locale_ctype_t *ctype,
1717                              const struct charmap_t *charmap,
1718                              struct repertoire_t *repertoire,
1719                              struct token *now, char *last_charcode,
1720                              uint32_t last_charcode_len,
1721                              unsigned long int class256_bit,
1722                              unsigned long int class_bit, int ignore_content,
1723                              int handle_digits)
1724 {
1725   /* First check whether the to-value is larger.  */
1726   if (now->val.charcode.nbytes != last_charcode_len)
1727     {
1728       lr_error (ldfile, _("\
1729 start and end character sequence of range must have the same length"));
1730       return;
1731     }
1732
1733   if (memcmp (last_charcode, now->val.charcode.bytes, last_charcode_len) > 0)
1734     {
1735       lr_error (ldfile, _("\
1736 to-value character sequence is smaller than from-value sequence"));
1737       return;
1738     }
1739
1740   if (!ignore_content)
1741     {
1742       do
1743         {
1744           /* Increment the byte sequence value.  */
1745           struct charseq *seq;
1746           uint32_t wch;
1747           int i;
1748
1749           for (i = last_charcode_len - 1; i >= 0; --i)
1750             if (++last_charcode[i] != 0)
1751               break;
1752
1753           if (last_charcode_len == 1)
1754             /* Of course we have the charcode value.  */
1755             ctype->class256_collection[(size_t) last_charcode[0]]
1756               |= class256_bit;
1757
1758           /* Find the symbolic name.  */
1759           seq = charmap_find_symbol (charmap, last_charcode,
1760                                      last_charcode_len);
1761           if (seq != NULL)
1762             {
1763               if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1764                 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1765                                                    strlen (seq->name));
1766               wch = seq == NULL ? ILLEGAL_CHAR_VALUE : seq->ucs4;
1767
1768               if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1769                 *find_idx (ctype, &ctype->class_collection,
1770                            &ctype->class_collection_max,
1771                            &ctype->class_collection_act, wch) |= class_bit;
1772             }
1773           else
1774             wch = ILLEGAL_CHAR_VALUE;
1775
1776           if (handle_digits == 1)
1777             {
1778               /* We must store the digit values.  */
1779               if (ctype->mbdigits_act == ctype->mbdigits_max)
1780                 {
1781                   ctype->mbdigits_max *= 2;
1782                   ctype->mbdigits = xrealloc (ctype->mbdigits,
1783                                               (ctype->mbdigits_max
1784                                                * sizeof (char *)));
1785                   ctype->wcdigits_max *= 2;
1786                   ctype->wcdigits = xrealloc (ctype->wcdigits,
1787                                               (ctype->wcdigits_max
1788                                                * sizeof (uint32_t)));
1789                 }
1790
1791               seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1792               memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1793               seq->nbytes = last_charcode_len;
1794
1795               ctype->mbdigits[ctype->mbdigits_act++] = seq;
1796               ctype->wcdigits[ctype->wcdigits_act++] = wch;
1797             }
1798           else if (handle_digits == 2)
1799             {
1800               struct charseq *seq;
1801               /* We must store the digit values.  */
1802               if (ctype->outdigits_act >= 10)
1803                 {
1804                   lr_error (ldfile, _("\
1805 %s: field `%s' does not contain exactly ten entries"),
1806                             "LC_CTYPE", "outdigit");
1807                   return;
1808                 }
1809
1810               seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1811               memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1812               seq->nbytes = last_charcode_len;
1813
1814               ctype->mboutdigits[ctype->outdigits_act] = seq;
1815               ctype->wcoutdigits[ctype->outdigits_act] = wch;
1816               ++ctype->outdigits_act;
1817             }
1818         }
1819       while (memcmp (last_charcode, now->val.charcode.bytes,
1820                      last_charcode_len) != 0);
1821     }
1822 }
1823
1824
1825 static uint32_t *
1826 find_translit2 (struct locale_ctype_t *ctype, const struct charmap_t *charmap,
1827                 uint32_t wch)
1828 {
1829   struct translit_t *trunp = ctype->translit;
1830   struct translit_ignore_t *tirunp = ctype->translit_ignore;
1831
1832   while (trunp != NULL)
1833     {
1834       /* XXX We simplify things here.  The transliterations we look
1835          for are only allowed to have one character.  */
1836       if (trunp->from[0] == wch && trunp->from[1] == 0)
1837         {
1838           /* Found it.  Now look for a transliteration which can be
1839              represented with the character set.  */
1840           struct translit_to_t *torunp = trunp->to;
1841
1842           while (torunp != NULL)
1843             {
1844               int i;
1845
1846               for (i = 0; torunp->str[i] != 0; ++i)
1847                 {
1848                   char utmp[10];
1849
1850                   snprintf (utmp, sizeof (utmp), "U%08X", torunp->str[i]);
1851                   if (charmap_find_value (charmap, utmp, 9) == NULL)
1852                     /* This character cannot be represented.  */
1853                     break;
1854                 }
1855
1856               if (torunp->str[i] == 0)
1857                 return torunp->str;
1858
1859               torunp = torunp->next;
1860             }
1861
1862           break;
1863         }
1864
1865       trunp = trunp->next;
1866     }
1867
1868   /* Check for ignored chars.  */
1869   while (tirunp != NULL)
1870     {
1871       if (tirunp->from <= wch && tirunp->to >= wch)
1872         {
1873           uint32_t wi;
1874
1875           for (wi = tirunp->from; wi <= wch; wi += tirunp->step)
1876             if (wi == wch)
1877               return (uint32_t []) { 0 };
1878         }
1879     }
1880
1881   /* Nothing found.  */
1882   return NULL;
1883 }
1884
1885
1886 uint32_t *
1887 find_translit (struct localedef_t *locale, const struct charmap_t *charmap,
1888                uint32_t wch)
1889 {
1890   struct locale_ctype_t *ctype;
1891   uint32_t *result = NULL;
1892
1893   assert (locale != NULL);
1894   ctype = locale->categories[LC_CTYPE].ctype;
1895
1896   if (ctype == NULL)
1897     return NULL;
1898
1899   if (ctype->translit != NULL)
1900     result = find_translit2 (ctype, charmap, wch);
1901
1902   if (result == NULL)
1903     {
1904       struct translit_include_t *irunp = ctype->translit_include;
1905
1906       while (irunp != NULL && result == NULL)
1907         {
1908           result = find_translit (find_locale (CTYPE_LOCALE,
1909                                                irunp->copy_locale,
1910                                                irunp->copy_repertoire,
1911                                                charmap),
1912                                   charmap, wch);
1913           irunp = irunp->next;
1914         }
1915     }
1916
1917   return result;
1918 }
1919
1920
1921 /* Read one transliteration entry.  */
1922 static uint32_t *
1923 read_widestring (struct linereader *ldfile, struct token *now,
1924                  const struct charmap_t *charmap,
1925                  struct repertoire_t *repertoire)
1926 {
1927   uint32_t *wstr;
1928
1929   if (now->tok == tok_default_missing)
1930     /* The special name "" will denote this case.  */
1931     wstr = ((uint32_t *) { 0 });
1932   else if (now->tok == tok_bsymbol)
1933     {
1934       /* Get the value from the repertoire.  */
1935       wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1936       wstr[0] = repertoire_find_value (repertoire, now->val.str.startmb,
1937                                        now->val.str.lenmb);
1938       if (wstr[0] == ILLEGAL_CHAR_VALUE)
1939         {
1940           /* We cannot proceed, we don't know the UCS4 value.  */
1941           free (wstr);
1942           return NULL;
1943         }
1944
1945       wstr[1] = 0;
1946     }
1947   else if (now->tok == tok_ucs4)
1948     {
1949       wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1950       wstr[0] = now->val.ucs4;
1951       wstr[1] = 0;
1952     }
1953   else if (now->tok == tok_charcode)
1954     {
1955       /* Argh, we have to convert to the symbol name first and then to the
1956          UCS4 value.  */
1957       struct charseq *seq = charmap_find_symbol (charmap,
1958                                                  now->val.str.startmb,
1959                                                  now->val.str.lenmb);
1960       if (seq == NULL)
1961         /* Cannot find the UCS4 value.  */
1962         return NULL;
1963
1964       if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1965         seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1966                                            strlen (seq->name));
1967       if (seq->ucs4 == ILLEGAL_CHAR_VALUE)
1968         /* We cannot proceed, we don't know the UCS4 value.  */
1969         return NULL;
1970
1971       wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1972       wstr[0] = seq->ucs4;
1973       wstr[1] = 0;
1974     }
1975   else if (now->tok == tok_string)
1976     {
1977       wstr = now->val.str.startwc;
1978       if (wstr == NULL || wstr[0] == 0)
1979         return NULL;
1980     }
1981   else
1982     {
1983       if (now->tok != tok_eol && now->tok != tok_eof)
1984         lr_ignore_rest (ldfile, 0);
1985       SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
1986       return (uint32_t *) -1l;
1987     }
1988
1989   return wstr;
1990 }
1991
1992
1993 static void
1994 read_translit_entry (struct linereader *ldfile, struct locale_ctype_t *ctype,
1995                      struct token *now, const struct charmap_t *charmap,
1996                      struct repertoire_t *repertoire)
1997 {
1998   uint32_t *from_wstr = read_widestring (ldfile, now, charmap, repertoire);
1999   struct translit_t *result;
2000   struct translit_to_t **top;
2001   struct obstack *ob = &ctype->mempool;
2002   int first;
2003   int ignore;
2004
2005   if (from_wstr == NULL)
2006     /* There is no valid from string.  */
2007     return;
2008
2009   result = (struct translit_t *) obstack_alloc (ob,
2010                                                 sizeof (struct translit_t));
2011   result->from = from_wstr;
2012   result->fname = ldfile->fname;
2013   result->lineno = ldfile->lineno;
2014   result->next = NULL;
2015   result->to = NULL;
2016   top = &result->to;
2017   first = 1;
2018   ignore = 0;
2019
2020   while (1)
2021     {
2022       uint32_t *to_wstr;
2023
2024       /* Next we have one or more transliterations.  They are
2025          separated by semicolons.  */
2026       now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2027
2028       if (!first && (now->tok == tok_semicolon || now->tok == tok_eol))
2029         {
2030           /* One string read.  */
2031           const uint32_t zero = 0;
2032
2033           if (!ignore)
2034             {
2035               obstack_grow (ob, &zero, 4);
2036               to_wstr = obstack_finish (ob);
2037
2038               *top = obstack_alloc (ob, sizeof (struct translit_to_t));
2039               (*top)->str = to_wstr;
2040               (*top)->next = NULL;
2041             }
2042
2043           if (now->tok == tok_eol)
2044             {
2045               result->next = ctype->translit;
2046               ctype->translit = result;
2047               return;
2048             }
2049
2050           if (!ignore)
2051             top = &(*top)->next;
2052           ignore = 0;
2053         }
2054       else
2055         {
2056           to_wstr = read_widestring (ldfile, now, charmap, repertoire);
2057           if (to_wstr == (uint32_t *) -1l)
2058             {
2059               /* An error occurred.  */
2060               obstack_free (ob, result);
2061               return;
2062             }
2063
2064           if (to_wstr == NULL)
2065             ignore = 1;
2066           else
2067             /* This value is usable.  */
2068             obstack_grow (ob, to_wstr, wcslen ((wchar_t *) to_wstr) * 4);
2069
2070           first = 0;
2071         }
2072     }
2073 }
2074
2075
2076 static void
2077 read_translit_ignore_entry (struct linereader *ldfile,
2078                             struct locale_ctype_t *ctype,
2079                             const struct charmap_t *charmap,
2080                             struct repertoire_t *repertoire)
2081 {
2082   /* We expect a semicolon-separated list of characters we ignore.  We are
2083      only interested in the wide character definitions.  These must be
2084      single characters, possibly defining a range when an ellipsis is used.  */
2085   while (1)
2086     {
2087       struct token *now = lr_token (ldfile, charmap, NULL, repertoire,
2088                                     verbose);
2089       struct translit_ignore_t *newp;
2090       uint32_t from;
2091
2092       if (now->tok == tok_eol || now->tok == tok_eof)
2093         {
2094           lr_error (ldfile,
2095                     _("premature end of `translit_ignore' definition"));
2096           return;
2097         }
2098
2099       if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
2100         {
2101           lr_error (ldfile, _("syntax error"));
2102           lr_ignore_rest (ldfile, 0);
2103           return;
2104         }
2105
2106       if (now->tok == tok_ucs4)
2107         from = now->val.ucs4;
2108       else
2109         /* Try to get the value.  */
2110         from = repertoire_find_value (repertoire, now->val.str.startmb,
2111                                       now->val.str.lenmb);
2112
2113       if (from == ILLEGAL_CHAR_VALUE)
2114         {
2115           lr_error (ldfile, "invalid character name");
2116           newp = NULL;
2117         }
2118       else
2119         {
2120           newp = (struct translit_ignore_t *)
2121             obstack_alloc (&ctype->mempool, sizeof (struct translit_ignore_t));
2122           newp->from = from;
2123           newp->to = from;
2124           newp->step = 1;
2125
2126           newp->next = ctype->translit_ignore;
2127           ctype->translit_ignore = newp;
2128         }
2129
2130       /* Now we expect either a semicolon, an ellipsis, or the end of the
2131          line.  */
2132       now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2133
2134       if (now->tok == tok_ellipsis2 || now->tok == tok_ellipsis2_2)
2135         {
2136           /* XXX Should we bother implementing `....'?  `...' certainly
2137              will not be implemented.  */
2138           uint32_t to;
2139           int step = now->tok == tok_ellipsis2_2 ? 2 : 1;
2140
2141           now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2142
2143           if (now->tok == tok_eol || now->tok == tok_eof)
2144             {
2145               lr_error (ldfile,
2146                         _("premature end of `translit_ignore' definition"));
2147               return;
2148             }
2149
2150           if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
2151             {
2152               lr_error (ldfile, _("syntax error"));
2153               lr_ignore_rest (ldfile, 0);
2154               return;
2155             }
2156
2157           if (now->tok == tok_ucs4)
2158             to = now->val.ucs4;
2159           else
2160             /* Try to get the value.  */
2161             to = repertoire_find_value (repertoire, now->val.str.startmb,
2162                                         now->val.str.lenmb);
2163
2164           if (to == ILLEGAL_CHAR_VALUE)
2165             lr_error (ldfile, "invalid character name");
2166           else
2167             {
2168               /* Make sure the `to'-value is larger.  */
2169               if (to >= from)
2170                 {
2171                   newp->to = to;
2172                   newp->step = step;
2173                 }
2174               else
2175                 lr_error (ldfile, _("\
2176 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
2177                           (to | from) < 65536 ? 4 : 8, to,
2178                           (to | from) < 65536 ? 4 : 8, from);
2179             }
2180
2181           /* And the next token.  */
2182           now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2183         }
2184
2185       if (now->tok == tok_eol || now->tok == tok_eof)
2186         /* We are done.  */
2187         return;
2188
2189       if (now->tok == tok_semicolon)
2190         /* Next round.  */
2191         continue;
2192
2193       /* If we come here something is wrong.  */
2194       lr_error (ldfile, _("syntax error"));
2195       lr_ignore_rest (ldfile, 0);
2196       return;
2197     }
2198 }
2199
2200
2201 /* The parser for the LC_CTYPE section of the locale definition.  */
2202 void
2203 ctype_read (struct linereader *ldfile, struct localedef_t *result,
2204             const struct charmap_t *charmap, const char *repertoire_name,
2205             int ignore_content)
2206 {
2207   struct repertoire_t *repertoire = NULL;
2208   struct locale_ctype_t *ctype;
2209   struct token *now;
2210   enum token_t nowtok;
2211   size_t cnt;
2212   struct charseq *last_seq;
2213   uint32_t last_wch = 0;
2214   enum token_t last_token;
2215   enum token_t ellipsis_token;
2216   int step;
2217   char last_charcode[16];
2218   size_t last_charcode_len = 0;
2219   const char *last_str = NULL;
2220   int mapidx;
2221   struct localedef_t *copy_locale = NULL;
2222
2223   /* Get the repertoire we have to use.  */
2224   if (repertoire_name != NULL)
2225     repertoire = repertoire_read (repertoire_name);
2226
2227   /* The rest of the line containing `LC_CTYPE' must be free.  */
2228   lr_ignore_rest (ldfile, 1);
2229
2230
2231   do
2232     {
2233       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2234       nowtok = now->tok;
2235     }
2236   while (nowtok == tok_eol);
2237
2238   /* If we see `copy' now we are almost done.  */
2239   if (nowtok == tok_copy)
2240     {
2241       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2242       if (now->tok != tok_string)
2243         {
2244           SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2245
2246         skip_category:
2247           do
2248             now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2249           while (now->tok != tok_eof && now->tok != tok_end);
2250
2251           if (now->tok != tok_eof
2252               || (now = lr_token (ldfile, charmap, NULL, NULL, verbose),
2253                   now->tok == tok_eof))
2254             lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
2255           else if (now->tok != tok_lc_ctype)
2256             {
2257               lr_error (ldfile, _("\
2258 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2259               lr_ignore_rest (ldfile, 0);
2260             }
2261           else
2262             lr_ignore_rest (ldfile, 1);
2263
2264           return;
2265         }
2266
2267       if (! ignore_content)
2268         {
2269           /* Get the locale definition.  */
2270           copy_locale = load_locale (LC_CTYPE, now->val.str.startmb,
2271                                      repertoire_name, charmap, NULL);
2272           if ((copy_locale->avail & CTYPE_LOCALE) == 0)
2273             {
2274               /* Not yet loaded.  So do it now.  */
2275               if (locfile_read (copy_locale, charmap) != 0)
2276                 goto skip_category;
2277             }
2278
2279           if (copy_locale->categories[LC_CTYPE].ctype == NULL)
2280             return;
2281         }
2282
2283       lr_ignore_rest (ldfile, 1);
2284
2285       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2286       nowtok = now->tok;
2287     }
2288
2289   /* Prepare the data structures.  */
2290   ctype_startup (ldfile, result, charmap, copy_locale, ignore_content);
2291   ctype = result->categories[LC_CTYPE].ctype;
2292
2293   /* Remember the repertoire we use.  */
2294   if (!ignore_content)
2295     ctype->repertoire = repertoire;
2296
2297   while (1)
2298     {
2299       unsigned long int class_bit = 0;
2300       unsigned long int class256_bit = 0;
2301       int handle_digits = 0;
2302
2303       /* Of course we don't proceed beyond the end of file.  */
2304       if (nowtok == tok_eof)
2305         break;
2306
2307       /* Ingore empty lines.  */
2308       if (nowtok == tok_eol)
2309         {
2310           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2311           nowtok = now->tok;
2312           continue;
2313         }
2314
2315       switch (nowtok)
2316         {
2317         case tok_charclass:
2318           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2319           while (now->tok == tok_ident || now->tok == tok_string)
2320             {
2321               ctype_class_new (ldfile, ctype, now->val.str.startmb);
2322               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2323               if (now->tok != tok_semicolon)
2324                 break;
2325               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2326             }
2327           if (now->tok != tok_eol)
2328             SYNTAX_ERROR (_("\
2329 %s: syntax error in definition of new character class"), "LC_CTYPE");
2330           break;
2331
2332         case tok_charconv:
2333           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2334           while (now->tok == tok_ident || now->tok == tok_string)
2335             {
2336               ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2337               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2338               if (now->tok != tok_semicolon)
2339                 break;
2340               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2341             }
2342           if (now->tok != tok_eol)
2343             SYNTAX_ERROR (_("\
2344 %s: syntax error in definition of new character map"), "LC_CTYPE");
2345           break;
2346
2347         case tok_class:
2348           /* Ignore the rest of the line if we don't need the input of
2349              this line.  */
2350           if (ignore_content)
2351             {
2352               lr_ignore_rest (ldfile, 0);
2353               break;
2354             }
2355
2356           /* We simply forget the `class' keyword and use the following
2357              operand to determine the bit.  */
2358           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2359           if (now->tok == tok_ident || now->tok == tok_string)
2360             {
2361               /* Must can be one of the predefined class names.  */
2362               for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2363                 if (strcmp (ctype->classnames[cnt], now->val.str.startmb) == 0)
2364                   break;
2365               if (cnt >= ctype->nr_charclass)
2366                 {
2367 #ifdef PREDEFINED_CLASSES
2368                   if (now->val.str.lenmb == 8
2369                       && memcmp ("special1", now->val.str.startmb, 8) == 0)
2370                     class_bit = _ISwspecial1;
2371                   else if (now->val.str.lenmb == 8
2372                       && memcmp ("special2", now->val.str.startmb, 8) == 0)
2373                     class_bit = _ISwspecial2;
2374                   else if (now->val.str.lenmb == 8
2375                       && memcmp ("special3", now->val.str.startmb, 8) == 0)
2376                     class_bit = _ISwspecial3;
2377                   else
2378 #endif
2379                     {
2380                       /* OK, it's a new class.  */
2381                       ctype_class_new (ldfile, ctype, now->val.str.startmb);
2382
2383                       class_bit = _ISwbit (ctype->nr_charclass - 1);
2384                     }
2385                 }
2386               else
2387                 {
2388                   class_bit = _ISwbit (cnt);
2389
2390                   free (now->val.str.startmb);
2391                 }
2392             }
2393           else if (now->tok == tok_digit)
2394             goto handle_tok_digit;
2395           else if (now->tok < tok_upper || now->tok > tok_blank)
2396             goto err_label;
2397           else
2398             {
2399               class_bit = BITw (now->tok);
2400               class256_bit = BIT (now->tok);
2401             }
2402
2403           /* The next character must be a semicolon.  */
2404           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2405           if (now->tok != tok_semicolon)
2406             goto err_label;
2407           goto read_charclass;
2408
2409         case tok_upper:
2410         case tok_lower:
2411         case tok_alpha:
2412         case tok_alnum:
2413         case tok_space:
2414         case tok_cntrl:
2415         case tok_punct:
2416         case tok_graph:
2417         case tok_print:
2418         case tok_xdigit:
2419         case tok_blank:
2420           /* Ignore the rest of the line if we don't need the input of
2421              this line.  */
2422           if (ignore_content)
2423             {
2424               lr_ignore_rest (ldfile, 0);
2425               break;
2426             }
2427
2428           class_bit = BITw (now->tok);
2429           class256_bit = BIT (now->tok);
2430           handle_digits = 0;
2431         read_charclass:
2432           ctype->class_done |= class_bit;
2433           last_token = tok_none;
2434           ellipsis_token = tok_none;
2435           step = 1;
2436           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2437           while (now->tok != tok_eol && now->tok != tok_eof)
2438             {
2439               uint32_t wch;
2440               struct charseq *seq;
2441
2442               if (ellipsis_token == tok_none)
2443                 {
2444                   if (get_character (now, charmap, repertoire, &seq, &wch))
2445                     goto err_label;
2446
2447                   if (!ignore_content && seq != NULL && seq->nbytes == 1)
2448                     /* Yep, we can store information about this byte
2449                        sequence.  */
2450                     ctype->class256_collection[seq->bytes[0]] |= class256_bit;
2451
2452                   if (!ignore_content && wch != ILLEGAL_CHAR_VALUE
2453                       && class_bit != 0)
2454                     /* We have the UCS4 position.  */
2455                     *find_idx (ctype, &ctype->class_collection,
2456                                &ctype->class_collection_max,
2457                                &ctype->class_collection_act, wch) |= class_bit;
2458
2459                   last_token = now->tok;
2460                   /* Terminate the string.  */
2461                   if (last_token == tok_bsymbol)
2462                     {
2463                       now->val.str.startmb[now->val.str.lenmb] = '\0';
2464                       last_str = now->val.str.startmb;
2465                     }
2466                   else
2467                     last_str = NULL;
2468                   last_seq = seq;
2469                   last_wch = wch;
2470                   memcpy (last_charcode, now->val.charcode.bytes, 16);
2471                   last_charcode_len = now->val.charcode.nbytes;
2472
2473                   if (!ignore_content && handle_digits == 1)
2474                     {
2475                       /* We must store the digit values.  */
2476                       if (ctype->mbdigits_act == ctype->mbdigits_max)
2477                         {
2478                           ctype->mbdigits_max += 10;
2479                           ctype->mbdigits = xrealloc (ctype->mbdigits,
2480                                                       (ctype->mbdigits_max
2481                                                        * sizeof (char *)));
2482                           ctype->wcdigits_max += 10;
2483                           ctype->wcdigits = xrealloc (ctype->wcdigits,
2484                                                       (ctype->wcdigits_max
2485                                                        * sizeof (uint32_t)));
2486                         }
2487
2488                       ctype->mbdigits[ctype->mbdigits_act++] = seq;
2489                       ctype->wcdigits[ctype->wcdigits_act++] = wch;
2490                     }
2491                   else if (!ignore_content && handle_digits == 2)
2492                     {
2493                       /* We must store the digit values.  */
2494                       if (ctype->outdigits_act >= 10)
2495                         {
2496                           lr_error (ldfile, _("\
2497 %s: field `%s' does not contain exactly ten entries"),
2498                             "LC_CTYPE", "outdigit");
2499                           lr_ignore_rest (ldfile, 0);
2500                           break;
2501                         }
2502
2503                       ctype->mboutdigits[ctype->outdigits_act] = seq;
2504                       ctype->wcoutdigits[ctype->outdigits_act] = wch;
2505                       ++ctype->outdigits_act;
2506                     }
2507                 }
2508               else
2509                 {
2510                   /* Now it gets complicated.  We have to resolve the
2511                      ellipsis problem.  First we must distinguish between
2512                      the different kind of ellipsis and this must match the
2513                      tokens we have seen.  */
2514                   assert (last_token != tok_none);
2515
2516                   if (last_token != now->tok)
2517                     {
2518                       lr_error (ldfile, _("\
2519 ellipsis range must be marked by two operands of same type"));
2520                       lr_ignore_rest (ldfile, 0);
2521                       break;
2522                     }
2523
2524                   if (last_token == tok_bsymbol)
2525                     {
2526                       if (ellipsis_token == tok_ellipsis3)
2527                         lr_error (ldfile, _("with symbolic name range values \
2528 the absolute ellipsis `...' must not be used"));
2529
2530                       charclass_symbolic_ellipsis (ldfile, ctype, charmap,
2531                                                    repertoire, now, last_str,
2532                                                    class256_bit, class_bit,
2533                                                    (ellipsis_token
2534                                                     == tok_ellipsis4
2535                                                     ? 10 : 16),
2536                                                    ignore_content,
2537                                                    handle_digits, step);
2538                     }
2539                   else if (last_token == tok_ucs4)
2540                     {
2541                       if (ellipsis_token != tok_ellipsis2)
2542                         lr_error (ldfile, _("\
2543 with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
2544
2545                       charclass_ucs4_ellipsis (ldfile, ctype, charmap,
2546                                                repertoire, now, last_wch,
2547                                                class256_bit, class_bit,
2548                                                ignore_content, handle_digits,
2549                                                step);
2550                     }
2551                   else
2552                     {
2553                       assert (last_token == tok_charcode);
2554
2555                       if (ellipsis_token != tok_ellipsis3)
2556                         lr_error (ldfile, _("\
2557 with character code range values one must use the absolute ellipsis `...'"));
2558
2559                       charclass_charcode_ellipsis (ldfile, ctype, charmap,
2560                                                    repertoire, now,
2561                                                    last_charcode,
2562                                                    last_charcode_len,
2563                                                    class256_bit, class_bit,
2564                                                    ignore_content,
2565                                                    handle_digits);
2566                     }
2567
2568                   /* Now we have used the last value.  */
2569                   last_token = tok_none;
2570                 }
2571
2572               /* Next we expect a semicolon or the end of the line.  */
2573               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2574               if (now->tok == tok_eol || now->tok == tok_eof)
2575                 break;
2576
2577               if (last_token != tok_none
2578                   && now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4_2)
2579                 {
2580                   if (now->tok == tok_ellipsis2_2)
2581                     {
2582                       now->tok = tok_ellipsis2;
2583                       step = 2;
2584                     }
2585                   else if (now->tok == tok_ellipsis4_2)
2586                     {
2587                       now->tok = tok_ellipsis4;
2588                       step = 2;
2589                     }
2590
2591                   ellipsis_token = now->tok;
2592
2593                   now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2594                   continue;
2595                 }
2596
2597               if (now->tok != tok_semicolon)
2598                 goto err_label;
2599
2600               /* And get the next character.  */
2601               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2602
2603               ellipsis_token = tok_none;
2604               step = 1;
2605             }
2606           break;
2607
2608         case tok_digit:
2609           /* Ignore the rest of the line if we don't need the input of
2610              this line.  */
2611           if (ignore_content)
2612             {
2613               lr_ignore_rest (ldfile, 0);
2614               break;
2615             }
2616
2617         handle_tok_digit:
2618           class_bit = _ISwdigit;
2619           class256_bit = _ISdigit;
2620           handle_digits = 1;
2621           goto read_charclass;
2622
2623         case tok_outdigit:
2624           /* Ignore the rest of the line if we don't need the input of
2625              this line.  */
2626           if (ignore_content)
2627             {
2628               lr_ignore_rest (ldfile, 0);
2629               break;
2630             }
2631
2632           if (ctype->outdigits_act != 0)
2633             lr_error (ldfile, _("\
2634 %s: field `%s' declared more than once"),
2635                       "LC_CTYPE", "outdigit");
2636           class_bit = 0;
2637           class256_bit = 0;
2638           handle_digits = 2;
2639           goto read_charclass;
2640
2641         case tok_toupper:
2642           /* Ignore the rest of the line if we don't need the input of
2643              this line.  */
2644           if (ignore_content)
2645             {
2646               lr_ignore_rest (ldfile, 0);
2647               break;
2648             }
2649
2650           mapidx = 0;
2651           goto read_mapping;
2652
2653         case tok_tolower:
2654           /* Ignore the rest of the line if we don't need the input of
2655              this line.  */
2656           if (ignore_content)
2657             {
2658               lr_ignore_rest (ldfile, 0);
2659               break;
2660             }
2661
2662           mapidx = 1;
2663           goto read_mapping;
2664
2665         case tok_map:
2666           /* Ignore the rest of the line if we don't need the input of
2667              this line.  */
2668           if (ignore_content)
2669             {
2670               lr_ignore_rest (ldfile, 0);
2671               break;
2672             }
2673
2674           /* We simply forget the `map' keyword and use the following
2675              operand to determine the mapping.  */
2676           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2677           if (now->tok == tok_ident || now->tok == tok_string)
2678             {
2679               size_t cnt;
2680
2681               for (cnt = 2; cnt < ctype->map_collection_nr; ++cnt)
2682                 if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2683                   break;
2684
2685               if (cnt < ctype->map_collection_nr)
2686                 free (now->val.str.startmb);
2687               else
2688                 /* OK, it's a new map.  */
2689                 ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2690
2691               mapidx = cnt;
2692             }
2693           else if (now->tok < tok_toupper || now->tok > tok_tolower)
2694             goto err_label;
2695           else
2696             mapidx = now->tok - tok_toupper;
2697
2698           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2699           /* This better should be a semicolon.  */
2700           if (now->tok != tok_semicolon)
2701             goto err_label;
2702
2703         read_mapping:
2704           /* Test whether this mapping was already defined.  */
2705           if (ctype->tomap_done[mapidx])
2706             {
2707               lr_error (ldfile, _("duplicated definition for mapping `%s'"),
2708                         ctype->mapnames[mapidx]);
2709               lr_ignore_rest (ldfile, 0);
2710               break;
2711             }
2712           ctype->tomap_done[mapidx] = 1;
2713
2714           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2715           while (now->tok != tok_eol && now->tok != tok_eof)
2716             {
2717               struct charseq *from_seq;
2718               uint32_t from_wch;
2719               struct charseq *to_seq;
2720               uint32_t to_wch;
2721
2722               /* Every pair starts with an opening brace.  */
2723               if (now->tok != tok_open_brace)
2724                 goto err_label;
2725
2726               /* Next comes the from-value.  */
2727               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2728               if (get_character (now, charmap, repertoire, &from_seq,
2729                                  &from_wch) != 0)
2730                 goto err_label;
2731
2732               /* The next is a comma.  */
2733               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2734               if (now->tok != tok_comma)
2735                 goto err_label;
2736
2737               /* And the other value.  */
2738               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2739               if (get_character (now, charmap, repertoire, &to_seq,
2740                                  &to_wch) != 0)
2741                 goto err_label;
2742
2743               /* And the last thing is the closing brace.  */
2744               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2745               if (now->tok != tok_close_brace)
2746                 goto err_label;
2747
2748               if (!ignore_content)
2749                 {
2750                   /* Check whether the mapping converts from an ASCII value
2751                      to a non-ASCII value.  */
2752                   if (from_seq != NULL && from_seq->nbytes == 1
2753                       && isascii (from_seq->bytes[0])
2754                       && to_seq != NULL && (to_seq->nbytes != 1
2755                                             || !isascii (to_seq->bytes[0])))
2756                     ctype->to_nonascii = 1;
2757
2758                   if (mapidx < 2 && from_seq != NULL && to_seq != NULL
2759                       && from_seq->nbytes == 1 && to_seq->nbytes == 1)
2760                     /* We can use this value.  */
2761                     ctype->map256_collection[mapidx][from_seq->bytes[0]]
2762                       = to_seq->bytes[0];
2763
2764                   if (from_wch != ILLEGAL_CHAR_VALUE
2765                       && to_wch != ILLEGAL_CHAR_VALUE)
2766                     /* Both correct values.  */
2767                     *find_idx (ctype, &ctype->map_collection[mapidx],
2768                                &ctype->map_collection_max[mapidx],
2769                                &ctype->map_collection_act[mapidx],
2770                                from_wch) = to_wch;
2771                 }
2772
2773               /* Now comes a semicolon or the end of the line/file.  */
2774               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2775               if (now->tok == tok_semicolon)
2776                 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2777             }
2778           break;
2779
2780         case tok_translit_start:
2781           /* Ignore the entire translit section with its peculiar syntax
2782              if we don't need the input.  */
2783           if (ignore_content)
2784             {
2785               do
2786                 {
2787                   lr_ignore_rest (ldfile, 0);
2788                   now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2789                 }
2790               while (now->tok != tok_translit_end && now->tok != tok_eof);
2791
2792               if (now->tok == tok_eof)
2793                 lr_error (ldfile, _(\
2794 "%s: `translit_start' section does not end with `translit_end'"),
2795                           "LC_CTYPE");
2796
2797               break;
2798             }
2799
2800           /* The rest of the line better should be empty.  */
2801           lr_ignore_rest (ldfile, 1);
2802
2803           /* We count here the number of allocated entries in the `translit'
2804              array.  */
2805           cnt = 0;
2806
2807           ldfile->translate_strings = 1;
2808           ldfile->return_widestr = 1;
2809
2810           /* We proceed until we see the `translit_end' token.  */
2811           while (now = lr_token (ldfile, charmap, NULL, repertoire, verbose),
2812                  now->tok != tok_translit_end && now->tok != tok_eof)
2813             {
2814               if (now->tok == tok_eol)
2815                 /* Ignore empty lines.  */
2816                 continue;
2817
2818               if (now->tok == tok_include)
2819                 {
2820                   /* We have to include locale.  */
2821                   const char *locale_name;
2822                   const char *repertoire_name;
2823                   struct translit_include_t *include_stmt, **include_ptr;
2824
2825                   now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2826                   /* This should be a string or an identifier.  In any
2827                      case something to name a locale.  */
2828                   if (now->tok != tok_string && now->tok != tok_ident)
2829                     {
2830                     translit_syntax:
2831                       lr_error (ldfile, _("%s: syntax error"), "LC_CTYPE");
2832                       lr_ignore_rest (ldfile, 0);
2833                       continue;
2834                     }
2835                   locale_name = now->val.str.startmb;
2836
2837                   /* Next should be a semicolon.  */
2838                   now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2839                   if (now->tok != tok_semicolon)
2840                     goto translit_syntax;
2841
2842                   /* Now the repertoire name.  */
2843                   now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2844                   if ((now->tok != tok_string && now->tok != tok_ident)
2845                       || now->val.str.startmb == NULL)
2846                     goto translit_syntax;
2847                   repertoire_name = now->val.str.startmb;
2848                   if (repertoire_name[0] == '\0')
2849                     /* Ignore the empty string.  */
2850                     repertoire_name = NULL;
2851
2852                   /* Save the include statement for later processing.  */
2853                   include_stmt = (struct translit_include_t *)
2854                     xmalloc (sizeof (struct translit_include_t));
2855                   include_stmt->copy_locale = locale_name;
2856                   include_stmt->copy_repertoire = repertoire_name;
2857                   include_stmt->next = NULL;
2858
2859                   include_ptr = &ctype->translit_include;
2860                   while (*include_ptr != NULL)
2861                     include_ptr = &(*include_ptr)->next;
2862                   *include_ptr = include_stmt;
2863
2864                   /* The rest of the line must be empty.  */
2865                   lr_ignore_rest (ldfile, 1);
2866
2867                   /* Make sure the locale is read.  */
2868                   add_to_readlist (LC_CTYPE, locale_name, repertoire_name,
2869                                    1, NULL);
2870                   continue;
2871                 }
2872               else if (now->tok == tok_default_missing)
2873                 {
2874                   uint32_t *wstr;
2875
2876                   while (1)
2877                     {
2878                       /* We expect a single character or string as the
2879                          argument.  */
2880                       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2881                       wstr = read_widestring (ldfile, now, charmap,
2882                                               repertoire);
2883
2884                       if (wstr != NULL)
2885                         {
2886                           if (ctype->default_missing != NULL)
2887                             {
2888                               lr_error (ldfile, _("\
2889 %s: duplicate `default_missing' definition"), "LC_CTYPE");
2890                               WITH_CUR_LOCALE (error_at_line (0, 0,
2891                                                               ctype->default_missing_file,
2892                                                               ctype->default_missing_lineno,
2893                                                               _("\
2894 previous definition was here")));
2895                             }
2896                           else
2897                             {
2898                               ctype->default_missing = wstr;
2899                               ctype->default_missing_file = ldfile->fname;
2900                               ctype->default_missing_lineno = ldfile->lineno;
2901                             }
2902                           /* We can have more entries, ignore them.  */
2903                           lr_ignore_rest (ldfile, 0);
2904                           break;
2905                         }
2906                       else if (wstr == (uint32_t *) -1l)
2907                         /* This was an syntax error.  */
2908                         break;
2909
2910                       /* Maybe there is another replacement we can use.  */
2911                       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2912                       if (now->tok == tok_eol || now->tok == tok_eof)
2913                         {
2914                           /* Nothing found.  We tell the user.  */
2915                           lr_error (ldfile, _("\
2916 %s: no representable `default_missing' definition found"), "LC_CTYPE");
2917                           break;
2918                         }
2919                       if (now->tok != tok_semicolon)
2920                         goto translit_syntax;
2921                     }
2922
2923                   continue;
2924                 }
2925               else if (now->tok == tok_translit_ignore)
2926                 {
2927                   read_translit_ignore_entry (ldfile, ctype, charmap,
2928                                               repertoire);
2929                   continue;
2930                 }
2931
2932               read_translit_entry (ldfile, ctype, now, charmap, repertoire);
2933             }
2934           ldfile->return_widestr = 0;
2935
2936           if (now->tok == tok_eof)
2937             lr_error (ldfile, _(\
2938 "%s: `translit_start' section does not end with `translit_end'"),
2939                       "LC_CTYPE");
2940
2941           break;
2942
2943         case tok_ident:
2944           /* Ignore the rest of the line if we don't need the input of
2945              this line.  */
2946           if (ignore_content)
2947             {
2948               lr_ignore_rest (ldfile, 0);
2949               break;
2950             }
2951
2952           /* This could mean one of several things.  First test whether
2953              it's a character class name.  */
2954           for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2955             if (strcmp (now->val.str.startmb, ctype->classnames[cnt]) == 0)
2956               break;
2957           if (cnt < ctype->nr_charclass)
2958             {
2959               class_bit = _ISwbit (cnt);
2960               class256_bit = cnt <= 11 ? _ISbit (cnt) : 0;
2961               free (now->val.str.startmb);
2962               goto read_charclass;
2963             }
2964           for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
2965             if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2966               break;
2967           if (cnt < ctype->map_collection_nr)
2968             {
2969               mapidx = cnt;
2970               free (now->val.str.startmb);
2971               goto read_mapping;
2972             }
2973 #ifdef PREDEFINED_CLASSES
2974           if (strcmp (now->val.str.startmb, "special1") == 0)
2975             {
2976               class_bit = _ISwspecial1;
2977               free (now->val.str.startmb);
2978               goto read_charclass;
2979             }
2980           if (strcmp (now->val.str.startmb, "special2") == 0)
2981             {
2982               class_bit = _ISwspecial2;
2983               free (now->val.str.startmb);
2984               goto read_charclass;
2985             }
2986           if (strcmp (now->val.str.startmb, "special3") == 0)
2987             {
2988               class_bit = _ISwspecial3;
2989               free (now->val.str.startmb);
2990               goto read_charclass;
2991             }
2992           if (strcmp (now->val.str.startmb, "tosymmetric") == 0)
2993             {
2994               mapidx = 2;
2995               goto read_mapping;
2996             }
2997 #endif
2998           break;
2999
3000         case tok_end:
3001           /* Next we assume `LC_CTYPE'.  */
3002           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
3003           if (now->tok == tok_eof)
3004             break;
3005           if (now->tok == tok_eol)
3006             lr_error (ldfile, _("%s: incomplete `END' line"),
3007                       "LC_CTYPE");
3008           else if (now->tok != tok_lc_ctype)
3009             lr_error (ldfile, _("\
3010 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
3011           lr_ignore_rest (ldfile, now->tok == tok_lc_ctype);
3012           return;
3013
3014         default:
3015         err_label:
3016           if (now->tok != tok_eof)
3017             SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
3018         }
3019
3020       /* Prepare for the next round.  */
3021       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
3022       nowtok = now->tok;
3023     }
3024
3025   /* When we come here we reached the end of the file.  */
3026   lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
3027 }
3028
3029
3030 static void
3031 set_class_defaults (struct locale_ctype_t *ctype,
3032                     const struct charmap_t *charmap,
3033                     struct repertoire_t *repertoire)
3034 {
3035   size_t cnt;
3036
3037   /* These function defines the default values for the classes and conversions
3038      according to POSIX.2 2.5.2.1.
3039      It may seem that the order of these if-blocks is arbitrary but it is NOT.
3040      Don't move them unless you know what you do!  */
3041
3042   auto void set_default (int bitpos, int from, int to);
3043
3044   void set_default (int bitpos, int from, int to)
3045     {
3046       char tmp[2];
3047       int ch;
3048       int bit = _ISbit (bitpos);
3049       int bitw = _ISwbit (bitpos);
3050       /* Define string.  */
3051       strcpy (tmp, "?");
3052
3053       for (ch = from; ch <= to; ++ch)
3054         {
3055           struct charseq *seq;
3056           tmp[0] = ch;
3057
3058           seq = charmap_find_value (charmap, tmp, 1);
3059           if (seq == NULL)
3060             {
3061               char buf[10];
3062               sprintf (buf, "U%08X", ch);
3063               seq = charmap_find_value (charmap, buf, 9);
3064             }
3065           if (seq == NULL)
3066             {
3067               if (!be_quiet)
3068                 WITH_CUR_LOCALE (error (0, 0, _("\
3069 %s: character `%s' not defined while needed as default value"),
3070                                         "LC_CTYPE", tmp));
3071             }
3072           else if (seq->nbytes != 1)
3073             WITH_CUR_LOCALE (error (0, 0, _("\
3074 %s: character `%s' in charmap not representable with one byte"),
3075                                     "LC_CTYPE", tmp));
3076           else
3077             ctype->class256_collection[seq->bytes[0]] |= bit;
3078
3079           /* No need to search here, the ASCII value is also the Unicode
3080              value.  */
3081           ELEM (ctype, class_collection, , ch) |= bitw;
3082         }
3083     }
3084
3085   /* Set default values if keyword was not present.  */
3086   if ((ctype->class_done & BITw (tok_upper)) == 0)
3087     /* "If this keyword [lower] is not specified, the lowercase letters
3088         `A' through `Z', ..., shall automatically belong to this class,
3089         with implementation defined character values."  [P1003.2, 2.5.2.1]  */
3090     set_default (BITPOS (tok_upper), 'A', 'Z');
3091
3092   if ((ctype->class_done & BITw (tok_lower)) == 0)
3093     /* "If this keyword [lower] is not specified, the lowercase letters
3094         `a' through `z', ..., shall automatically belong to this class,
3095         with implementation defined character values."  [P1003.2, 2.5.2.1]  */
3096     set_default (BITPOS (tok_lower), 'a', 'z');
3097
3098   if ((ctype->class_done & BITw (tok_alpha)) == 0)
3099     {
3100       /* Table 2-6 in P1003.2 says that characters in class `upper' or
3101          class `lower' *must* be in class `alpha'.  */
3102       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
3103       unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower);
3104
3105       for (cnt = 0; cnt < 256; ++cnt)
3106         if ((ctype->class256_collection[cnt] & mask) != 0)
3107           ctype->class256_collection[cnt] |= BIT (tok_alpha);
3108
3109       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3110         if ((ctype->class_collection[cnt] & maskw) != 0)
3111           ctype->class_collection[cnt] |= BITw (tok_alpha);
3112     }
3113
3114   if ((ctype->class_done & BITw (tok_digit)) == 0)
3115     /* "If this keyword [digit] is not specified, the digits `0' through
3116         `9', ..., shall automatically belong to this class, with
3117         implementation-defined character values."  [P1003.2, 2.5.2.1]  */
3118     set_default (BITPOS (tok_digit), '0', '9');
3119
3120   /* "Only characters specified for the `alpha' and `digit' keyword
3121      shall be specified.  Characters specified for the keyword `alpha'
3122      and `digit' are automatically included in this class.  */
3123   {
3124     unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
3125     unsigned long int maskw = BITw (tok_alpha) | BITw (tok_digit);
3126
3127     for (cnt = 0; cnt < 256; ++cnt)
3128       if ((ctype->class256_collection[cnt] & mask) != 0)
3129         ctype->class256_collection[cnt] |= BIT (tok_alnum);
3130
3131     for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3132       if ((ctype->class_collection[cnt] & maskw) != 0)
3133         ctype->class_collection[cnt] |= BITw (tok_alnum);
3134   }
3135
3136   if ((ctype->class_done & BITw (tok_space)) == 0)
3137     /* "If this keyword [space] is not specified, the characters <space>,
3138         <form-feed>, <newline>, <carriage-return>, <tab>, and
3139         <vertical-tab>, ..., shall automatically belong to this class,
3140         with implementation-defined character values."  [P1003.2, 2.5.2.1]  */
3141     {
3142       struct charseq *seq;
3143
3144       seq = charmap_find_value (charmap, "space", 5);
3145       if (seq == NULL)
3146         seq = charmap_find_value (charmap, "SP", 2);
3147       if (seq == NULL)
3148         seq = charmap_find_value (charmap, "U00000020", 9);
3149       if (seq == NULL)
3150         {
3151           if (!be_quiet)
3152             WITH_CUR_LOCALE (error (0, 0, _("\
3153 %s: character `%s' not defined while needed as default value"),
3154                                     "LC_CTYPE", "<space>"));
3155         }
3156       else if (seq->nbytes != 1)
3157         WITH_CUR_LOCALE (error (0, 0, _("\
3158 %s: character `%s' in charmap not representable with one byte"),
3159                                 "LC_CTYPE", "<space>"));
3160       else
3161         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3162
3163       /* No need to search.  */
3164       ELEM (ctype, class_collection, , L' ') |= BITw (tok_space);
3165
3166       seq = charmap_find_value (charmap, "form-feed", 9);
3167       if (seq == NULL)
3168         seq = charmap_find_value (charmap, "U0000000C", 9);
3169       if (seq == NULL)
3170         {
3171           if (!be_quiet)
3172             WITH_CUR_LOCALE (error (0, 0, _("\
3173 %s: character `%s' not defined while needed as default value"),
3174                                     "LC_CTYPE", "<form-feed>"));
3175         }
3176       else if (seq->nbytes != 1)
3177         WITH_CUR_LOCALE (error (0, 0, _("\
3178 %s: character `%s' in charmap not representable with one byte"),
3179                                 "LC_CTYPE", "<form-feed>"));
3180       else
3181         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3182
3183       /* No need to search.  */
3184       ELEM (ctype, class_collection, , L'\f') |= BITw (tok_space);
3185
3186
3187       seq = charmap_find_value (charmap, "newline", 7);
3188       if (seq == NULL)
3189         seq = charmap_find_value (charmap, "U0000000A", 9);
3190       if (seq == NULL)
3191         {
3192           if (!be_quiet)
3193             WITH_CUR_LOCALE (error (0, 0, _("\
3194 %s: character `%s' not defined while needed as default value"),
3195                                     "LC_CTYPE", "<newline>"));
3196         }
3197       else if (seq->nbytes != 1)
3198         WITH_CUR_LOCALE (error (0, 0, _("\
3199 %s: character `%s' in charmap not representable with one byte"),
3200                                 "LC_CTYPE", "<newline>"));
3201       else
3202         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3203
3204       /* No need to search.  */
3205       ELEM (ctype, class_collection, , L'\n') |= BITw (tok_space);
3206
3207
3208       seq = charmap_find_value (charmap, "carriage-return", 15);
3209       if (seq == NULL)
3210         seq = charmap_find_value (charmap, "U0000000D", 9);
3211       if (seq == NULL)
3212         {
3213           if (!be_quiet)
3214             WITH_CUR_LOCALE (error (0, 0, _("\
3215 %s: character `%s' not defined while needed as default value"),
3216                                     "LC_CTYPE", "<carriage-return>"));
3217         }
3218       else if (seq->nbytes != 1)
3219         WITH_CUR_LOCALE (error (0, 0, _("\
3220 %s: character `%s' in charmap not representable with one byte"),
3221                                 "LC_CTYPE", "<carriage-return>"));
3222       else
3223         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3224
3225       /* No need to search.  */
3226       ELEM (ctype, class_collection, , L'\r') |= BITw (tok_space);
3227
3228
3229       seq = charmap_find_value (charmap, "tab", 3);
3230       if (seq == NULL)
3231         seq = charmap_find_value (charmap, "U00000009", 9);
3232       if (seq == NULL)
3233         {
3234           if (!be_quiet)
3235             WITH_CUR_LOCALE (error (0, 0, _("\
3236 %s: character `%s' not defined while needed as default value"),
3237                                     "LC_CTYPE", "<tab>"));
3238         }
3239       else if (seq->nbytes != 1)
3240         WITH_CUR_LOCALE (error (0, 0, _("\
3241 %s: character `%s' in charmap not representable with one byte"),
3242                                 "LC_CTYPE", "<tab>"));
3243       else
3244         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3245
3246       /* No need to search.  */
3247       ELEM (ctype, class_collection, , L'\t') |= BITw (tok_space);
3248
3249
3250       seq = charmap_find_value (charmap, "vertical-tab", 12);
3251       if (seq == NULL)
3252         seq = charmap_find_value (charmap, "U0000000B", 9);
3253       if (seq == NULL)
3254         {
3255           if (!be_quiet)
3256             WITH_CUR_LOCALE (error (0, 0, _("\
3257 %s: character `%s' not defined while needed as default value"),
3258                                     "LC_CTYPE", "<vertical-tab>"));
3259         }
3260       else if (seq->nbytes != 1)
3261         WITH_CUR_LOCALE (error (0, 0, _("\
3262 %s: character `%s' in charmap not representable with one byte"),
3263                                 "LC_CTYPE", "<vertical-tab>"));
3264       else
3265         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3266
3267       /* No need to search.  */
3268       ELEM (ctype, class_collection, , L'\v') |= BITw (tok_space);
3269     }
3270
3271   if ((ctype->class_done & BITw (tok_xdigit)) == 0)
3272     /* "If this keyword is not specified, the digits `0' to `9', the
3273         uppercase letters `A' through `F', and the lowercase letters `a'
3274         through `f', ..., shell automatically belong to this class, with
3275         implementation defined character values."  [P1003.2, 2.5.2.1]  */
3276     {
3277       set_default (BITPOS (tok_xdigit), '0', '9');
3278       set_default (BITPOS (tok_xdigit), 'A', 'F');
3279       set_default (BITPOS (tok_xdigit), 'a', 'f');
3280     }
3281
3282   if ((ctype->class_done & BITw (tok_blank)) == 0)
3283     /* "If this keyword [blank] is unspecified, the characters <space> and
3284        <tab> shall belong to this character class."  [P1003.2, 2.5.2.1]  */
3285    {
3286       struct charseq *seq;
3287
3288       seq = charmap_find_value (charmap, "space", 5);
3289       if (seq == NULL)
3290         seq = charmap_find_value (charmap, "SP", 2);
3291       if (seq == NULL)
3292         seq = charmap_find_value (charmap, "U00000020", 9);
3293       if (seq == NULL)
3294         {
3295           if (!be_quiet)
3296             WITH_CUR_LOCALE (error (0, 0, _("\
3297 %s: character `%s' not defined while needed as default value"),
3298                                     "LC_CTYPE", "<space>"));
3299         }
3300       else if (seq->nbytes != 1)
3301         WITH_CUR_LOCALE (error (0, 0, _("\
3302 %s: character `%s' in charmap not representable with one byte"),
3303                                 "LC_CTYPE", "<space>"));
3304       else
3305         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
3306
3307       /* No need to search.  */
3308       ELEM (ctype, class_collection, , L' ') |= BITw (tok_blank);
3309
3310
3311       seq = charmap_find_value (charmap, "tab", 3);
3312       if (seq == NULL)
3313         seq = charmap_find_value (charmap, "U00000009", 9);
3314       if (seq == NULL)
3315         {
3316           if (!be_quiet)
3317             WITH_CUR_LOCALE (error (0, 0, _("\
3318 %s: character `%s' not defined while needed as default value"),
3319                                     "LC_CTYPE", "<tab>"));
3320         }
3321       else if (seq->nbytes != 1)
3322         WITH_CUR_LOCALE (error (0, 0, _("\
3323 %s: character `%s' in charmap not representable with one byte"),
3324                                 "LC_CTYPE", "<tab>"));
3325       else
3326         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
3327
3328       /* No need to search.  */
3329       ELEM (ctype, class_collection, , L'\t') |= BITw (tok_blank);
3330     }
3331
3332   if ((ctype->class_done & BITw (tok_graph)) == 0)
3333     /* "If this keyword [graph] is not specified, characters specified for
3334         the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
3335         shall belong to this character class."  [P1003.2, 2.5.2.1]  */
3336     {
3337       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
3338         BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
3339       unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
3340         BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
3341         BITw (tok_punct);
3342       size_t cnt;
3343
3344       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3345         if ((ctype->class_collection[cnt] & maskw) != 0)
3346           ctype->class_collection[cnt] |= BITw (tok_graph);
3347
3348       for (cnt = 0; cnt < 256; ++cnt)
3349         if ((ctype->class256_collection[cnt] & mask) != 0)
3350           ctype->class256_collection[cnt] |= BIT (tok_graph);
3351     }
3352
3353   if ((ctype->class_done & BITw (tok_print)) == 0)
3354     /* "If this keyword [print] is not provided, characters specified for
3355         the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
3356         and the <space> character shall belong to this character class."
3357         [P1003.2, 2.5.2.1]  */
3358     {
3359       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
3360         BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
3361       unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
3362         BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
3363         BITw (tok_punct);
3364       size_t cnt;
3365       struct charseq *seq;
3366
3367       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3368         if ((ctype->class_collection[cnt] & maskw) != 0)
3369           ctype->class_collection[cnt] |= BITw (tok_print);
3370
3371       for (cnt = 0; cnt < 256; ++cnt)
3372         if ((ctype->class256_collection[cnt] & mask) != 0)
3373           ctype->class256_collection[cnt] |= BIT (tok_print);
3374
3375
3376       seq = charmap_find_value (charmap, "space", 5);
3377       if (seq == NULL)
3378         seq = charmap_find_value (charmap, "SP", 2);
3379       if (seq == NULL)
3380         seq = charmap_find_value (charmap, "U00000020", 9);
3381       if (seq == NULL)
3382         {
3383           if (!be_quiet)
3384             WITH_CUR_LOCALE (error (0, 0, _("\
3385 %s: character `%s' not defined while needed as default value"),
3386                                     "LC_CTYPE", "<space>"));
3387         }
3388       else if (seq->nbytes != 1)
3389         WITH_CUR_LOCALE (error (0, 0, _("\
3390 %s: character `%s' in charmap not representable with one byte"),
3391                                 "LC_CTYPE", "<space>"));
3392       else
3393         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_print);
3394
3395       /* No need to search.  */
3396       ELEM (ctype, class_collection, , L' ') |= BITw (tok_print);
3397     }
3398
3399   if (ctype->tomap_done[0] == 0)
3400     /* "If this keyword [toupper] is not specified, the lowercase letters
3401         `a' through `z', and their corresponding uppercase letters `A' to
3402         `Z', ..., shall automatically be included, with implementation-
3403         defined character values."  [P1003.2, 2.5.2.1]  */
3404     {
3405       char tmp[4];
3406       int ch;
3407
3408       strcpy (tmp, "<?>");
3409
3410       for (ch = 'a'; ch <= 'z'; ++ch)
3411         {
3412           struct charseq *seq_from, *seq_to;
3413
3414           tmp[1] = (char) ch;
3415
3416           seq_from = charmap_find_value (charmap, &tmp[1], 1);
3417           if (seq_from == NULL)
3418             {
3419               char buf[10];
3420               sprintf (buf, "U%08X", ch);
3421               seq_from = charmap_find_value (charmap, buf, 9);
3422             }
3423           if (seq_from == NULL)
3424             {
3425               if (!be_quiet)
3426                 WITH_CUR_LOCALE (error (0, 0, _("\
3427 %s: character `%s' not defined while needed as default value"),
3428                                         "LC_CTYPE", tmp));
3429             }
3430           else if (seq_from->nbytes != 1)
3431             {
3432               if (!be_quiet)
3433                 WITH_CUR_LOCALE (error (0, 0, _("\
3434 %s: character `%s' needed as default value not representable with one byte"),
3435                                         "LC_CTYPE", tmp));
3436             }
3437           else
3438             {
3439               /* This conversion is implementation defined.  */
3440               tmp[1] = (char) (ch + ('A' - 'a'));
3441               seq_to = charmap_find_value (charmap, &tmp[1], 1);
3442               if (seq_to == NULL)
3443                 {
3444                   char buf[10];
3445                   sprintf (buf, "U%08X", ch + ('A' - 'a'));
3446                   seq_to = charmap_find_value (charmap, buf, 9);
3447                 }
3448               if (seq_to == NULL)
3449                 {
3450                   if (!be_quiet)
3451                     WITH_CUR_LOCALE (error (0, 0, _("\
3452 %s: character `%s' not defined while needed as default value"),
3453                                             "LC_CTYPE", tmp));
3454                 }
3455               else if (seq_to->nbytes != 1)
3456                 {
3457                   if (!be_quiet)
3458                     WITH_CUR_LOCALE (error (0, 0, _("\
3459 %s: character `%s' needed as default value not representable with one byte"),
3460                                             "LC_CTYPE", tmp));
3461                 }
3462               else
3463                 /* The index [0] is determined by the order of the
3464                    `ctype_map_newP' calls in `ctype_startup'.  */
3465                 ctype->map256_collection[0][seq_from->bytes[0]]
3466                   = seq_to->bytes[0];
3467             }
3468
3469           /* No need to search.  */
3470           ELEM (ctype, map_collection, [0], ch) = ch + ('A' - 'a');
3471         }
3472     }
3473
3474   if (ctype->tomap_done[1] == 0)
3475     /* "If this keyword [tolower] is not specified, the mapping shall be
3476        the reverse mapping of the one specified to `toupper'."  [P1003.2]  */
3477     {
3478       for (cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
3479         if (ctype->map_collection[0][cnt] != 0)
3480           ELEM (ctype, map_collection, [1],
3481                 ctype->map_collection[0][cnt])
3482             = ctype->charnames[cnt];
3483
3484       for (cnt = 0; cnt < 256; ++cnt)
3485         if (ctype->map256_collection[0][cnt] != 0)
3486           ctype->map256_collection[1][ctype->map256_collection[0][cnt]] = cnt;
3487     }
3488
3489   if (ctype->outdigits_act != 10)
3490     {
3491       if (ctype->outdigits_act != 0)
3492         WITH_CUR_LOCALE (error (0, 0, _("\
3493 %s: field `%s' does not contain exactly ten entries"),
3494                                 "LC_CTYPE", "outdigit"));
3495
3496       for (cnt = ctype->outdigits_act; cnt < 10; ++cnt)
3497         {
3498           ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3499                                                          (char *) digits + cnt,
3500                                                          1);
3501
3502           if (ctype->mboutdigits[cnt] == NULL)
3503             ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3504                                                            longnames[cnt],
3505                                                            strlen (longnames[cnt]));
3506
3507           if (ctype->mboutdigits[cnt] == NULL)
3508             ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3509                                                            uninames[cnt], 9);
3510
3511           if (ctype->mboutdigits[cnt] == NULL)
3512             {
3513               /* Provide a replacement.  */
3514               WITH_CUR_LOCALE (error (0, 0, _("\
3515 no output digits defined and none of the standard names in the charmap")));
3516
3517               ctype->mboutdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
3518                                                        sizeof (struct charseq)
3519                                                        + 1);
3520
3521               /* This is better than nothing.  */
3522               ctype->mboutdigits[cnt]->bytes[0] = digits[cnt];
3523               ctype->mboutdigits[cnt]->nbytes = 1;
3524             }
3525
3526           ctype->wcoutdigits[cnt] = L'0' + cnt;
3527         }
3528
3529       ctype->outdigits_act = 10;
3530     }
3531 }
3532
3533
3534 /* Construction of sparse 3-level tables.
3535    See wchar-lookup.h for their structure and the meaning of p and q.  */
3536
3537 struct wctype_table
3538 {
3539   /* Parameters.  */
3540   unsigned int p;
3541   unsigned int q;
3542   /* Working representation.  */
3543   size_t level1_alloc;
3544   size_t level1_size;
3545   uint32_t *level1;
3546   size_t level2_alloc;
3547   size_t level2_size;
3548   uint32_t *level2;
3549   size_t level3_alloc;
3550   size_t level3_size;
3551   uint32_t *level3;
3552   /* Compressed representation.  */
3553   size_t result_size;
3554   char *result;
3555 };
3556
3557 /* Initialize.  Assumes t->p and t->q have already been set.  */
3558 static inline void
3559 wctype_table_init (struct wctype_table *t)
3560 {
3561   t->level1 = NULL;
3562   t->level1_alloc = t->level1_size = 0;
3563   t->level2 = NULL;
3564   t->level2_alloc = t->level2_size = 0;
3565   t->level3 = NULL;
3566   t->level3_alloc = t->level3_size = 0;
3567 }
3568
3569 /* Retrieve an entry.  */
3570 static inline int
3571 wctype_table_get (struct wctype_table *t, uint32_t wc)
3572 {
3573   uint32_t index1 = wc >> (t->q + t->p + 5);
3574   if (index1 < t->level1_size)
3575     {
3576       uint32_t lookup1 = t->level1[index1];
3577       if (lookup1 != EMPTY)
3578         {
3579           uint32_t index2 = ((wc >> (t->p + 5)) & ((1 << t->q) - 1))
3580                             + (lookup1 << t->q);
3581           uint32_t lookup2 = t->level2[index2];
3582           if (lookup2 != EMPTY)
3583             {
3584               uint32_t index3 = ((wc >> 5) & ((1 << t->p) - 1))
3585                                 + (lookup2 << t->p);
3586               uint32_t lookup3 = t->level3[index3];
3587               uint32_t index4 = wc & 0x1f;
3588
3589               return (lookup3 >> index4) & 1;
3590             }
3591         }
3592     }
3593   return 0;
3594 }
3595
3596 /* Add one entry.  */
3597 static void
3598 wctype_table_add (struct wctype_table *t, uint32_t wc)
3599 {
3600   uint32_t index1 = wc >> (t->q + t->p + 5);
3601   uint32_t index2 = (wc >> (t->p + 5)) & ((1 << t->q) - 1);
3602   uint32_t index3 = (wc >> 5) & ((1 << t->p) - 1);
3603   uint32_t index4 = wc & 0x1f;
3604   size_t i, i1, i2;
3605
3606   if (index1 >= t->level1_size)
3607     {
3608       if (index1 >= t->level1_alloc)
3609         {
3610           size_t alloc = 2 * t->level1_alloc;
3611           if (alloc <= index1)
3612             alloc = index1 + 1;
3613           t->level1 = (uint32_t *) xrealloc ((char *) t->level1,
3614                                              alloc * sizeof (uint32_t));
3615           t->level1_alloc = alloc;
3616         }
3617       while (index1 >= t->level1_size)
3618         t->level1[t->level1_size++] = EMPTY;
3619     }
3620
3621   if (t->level1[index1] == EMPTY)
3622     {
3623       if (t->level2_size == t->level2_alloc)
3624         {
3625           size_t alloc = 2 * t->level2_alloc + 1;
3626           t->level2 = (uint32_t *) xrealloc ((char *) t->level2,
3627                                              (alloc << t->q) * sizeof (uint32_t));
3628           t->level2_alloc = alloc;
3629         }
3630       i1 = t->level2_size << t->q;
3631       i2 = (t->level2_size + 1) << t->q;
3632       for (i = i1; i < i2; i++)
3633         t->level2[i] = EMPTY;
3634       t->level1[index1] = t->level2_size++;
3635     }
3636
3637   index2 += t->level1[index1] << t->q;
3638
3639   if (t->level2[index2] == EMPTY)
3640     {
3641       if (t->level3_size == t->level3_alloc)
3642         {
3643           size_t alloc = 2 * t->level3_alloc + 1;
3644           t->level3 = (uint32_t *) xrealloc ((char *) t->level3,
3645                                              (alloc << t->p) * sizeof (uint32_t));
3646           t->level3_alloc = alloc;
3647         }
3648       i1 = t->level3_size << t->p;
3649       i2 = (t->level3_size + 1) << t->p;
3650       for (i = i1; i < i2; i++)
3651         t->level3[i] = 0;
3652       t->level2[index2] = t->level3_size++;
3653     }
3654
3655   index3 += t->level2[index2] << t->p;
3656
3657   t->level3[index3] |= (uint32_t)1 << index4;
3658 }
3659
3660 /* Finalize and shrink.  */
3661 static void
3662 wctype_table_finalize (struct wctype_table *t)
3663 {
3664   size_t i, j, k;
3665   uint32_t reorder3[t->level3_size];
3666   uint32_t reorder2[t->level2_size];
3667   uint32_t level1_offset, level2_offset, level3_offset;
3668
3669   /* Uniquify level3 blocks.  */
3670   k = 0;
3671   for (j = 0; j < t->level3_size; j++)
3672     {
3673       for (i = 0; i < k; i++)
3674         if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p],
3675                     (1 << t->p) * sizeof (uint32_t)) == 0)
3676           break;
3677       /* Relocate block j to block i.  */
3678       reorder3[j] = i;
3679       if (i == k)
3680         {
3681           if (i != j)
3682             memcpy (&t->level3[i << t->p], &t->level3[j << t->p],
3683                     (1 << t->p) * sizeof (uint32_t));
3684           k++;
3685         }
3686     }
3687   t->level3_size = k;
3688
3689   for (i = 0; i < (t->level2_size << t->q); i++)
3690     if (t->level2[i] != EMPTY)
3691       t->level2[i] = reorder3[t->level2[i]];
3692
3693   /* Uniquify level2 blocks.  */
3694   k = 0;
3695   for (j = 0; j < t->level2_size; j++)
3696     {
3697       for (i = 0; i < k; i++)
3698         if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q],
3699                     (1 << t->q) * sizeof (uint32_t)) == 0)
3700           break;
3701       /* Relocate block j to block i.  */
3702       reorder2[j] = i;
3703       if (i == k)
3704         {
3705           if (i != j)
3706             memcpy (&t->level2[i << t->q], &t->level2[j << t->q],
3707                     (1 << t->q) * sizeof (uint32_t));
3708           k++;
3709         }
3710     }
3711   t->level2_size = k;
3712
3713   for (i = 0; i < t->level1_size; i++)
3714     if (t->level1[i] != EMPTY)
3715       t->level1[i] = reorder2[t->level1[i]];
3716
3717   /* Create and fill the resulting compressed representation.  */
3718   t->result_size =
3719     5 * sizeof (uint32_t)
3720     + t->level1_size * sizeof (uint32_t)
3721     + (t->level2_size << t->q) * sizeof (uint32_t)
3722     + (t->level3_size << t->p) * sizeof (uint32_t);
3723   t->result = (char *) xmalloc (t->result_size);
3724
3725   level1_offset =
3726     5 * sizeof (uint32_t);
3727   level2_offset =
3728     5 * sizeof (uint32_t)
3729     + t->level1_size * sizeof (uint32_t);
3730   level3_offset =
3731     5 * sizeof (uint32_t)
3732     + t->level1_size * sizeof (uint32_t)
3733     + (t->level2_size << t->q) * sizeof (uint32_t);
3734
3735   ((uint32_t *) t->result)[0] = t->q + t->p + 5;
3736   ((uint32_t *) t->result)[1] = t->level1_size;
3737   ((uint32_t *) t->result)[2] = t->p + 5;
3738   ((uint32_t *) t->result)[3] = (1 << t->q) - 1;
3739   ((uint32_t *) t->result)[4] = (1 << t->p) - 1;
3740
3741   for (i = 0; i < t->level1_size; i++)
3742     ((uint32_t *) (t->result + level1_offset))[i] =
3743       (t->level1[i] == EMPTY
3744        ? 0
3745        : (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset);
3746
3747   for (i = 0; i < (t->level2_size << t->q); i++)
3748     ((uint32_t *) (t->result + level2_offset))[i] =
3749       (t->level2[i] == EMPTY
3750        ? 0
3751        : (t->level2[i] << t->p) * sizeof (uint32_t) + level3_offset);
3752
3753   for (i = 0; i < (t->level3_size << t->p); i++)
3754     ((uint32_t *) (t->result + level3_offset))[i] = t->level3[i];
3755
3756   if (t->level1_alloc > 0)
3757     free (t->level1);
3758   if (t->level2_alloc > 0)
3759     free (t->level2);
3760   if (t->level3_alloc > 0)
3761     free (t->level3);
3762 }
3763
3764 #define TABLE wcwidth_table
3765 #define ELEMENT uint8_t
3766 #define DEFAULT 0xff
3767 #include "3level.h"
3768
3769 #define TABLE wctrans_table
3770 #define ELEMENT int32_t
3771 #define DEFAULT 0
3772 #define wctrans_table_add wctrans_table_add_internal
3773 #include "3level.h"
3774 #undef wctrans_table_add
3775 /* The wctrans_table must actually store the difference between the
3776    desired result and the argument.  */
3777 static inline void
3778 wctrans_table_add (struct wctrans_table *t, uint32_t wc, uint32_t mapped_wc)
3779 {
3780   wctrans_table_add_internal (t, wc, mapped_wc - wc);
3781 }
3782
3783
3784 /* Flattens the included transliterations into a translit list.
3785    Inserts them in the list at `cursor', and returns the new cursor.  */
3786 static struct translit_t **
3787 translit_flatten (struct locale_ctype_t *ctype,
3788                   const struct charmap_t *charmap,
3789                   struct translit_t **cursor)
3790 {
3791   while (ctype->translit_include != NULL)
3792     {
3793       const char *copy_locale = ctype->translit_include->copy_locale;
3794       const char *copy_repertoire = ctype->translit_include->copy_repertoire;
3795       struct localedef_t *other;
3796
3797       /* Unchain the include statement.  During the depth-first traversal
3798          we don't want to visit any locale more than once.  */
3799       ctype->translit_include = ctype->translit_include->next;
3800
3801       other = find_locale (LC_CTYPE, copy_locale, copy_repertoire, charmap);
3802
3803       if (other == NULL || other->categories[LC_CTYPE].ctype == NULL)
3804         {
3805           WITH_CUR_LOCALE (error (0, 0, _("\
3806 %s: transliteration data from locale `%s' not available"),
3807                                   "LC_CTYPE", copy_locale));
3808         }
3809       else
3810         {
3811           struct locale_ctype_t *other_ctype =
3812             other->categories[LC_CTYPE].ctype;
3813
3814           cursor = translit_flatten (other_ctype, charmap, cursor);
3815           assert (other_ctype->translit_include == NULL);
3816
3817           if (other_ctype->translit != NULL)
3818             {
3819               /* Insert the other_ctype->translit list at *cursor.  */
3820               struct translit_t *endp = other_ctype->translit;
3821               while (endp->next != NULL)
3822                 endp = endp->next;
3823
3824               endp->next = *cursor;
3825               *cursor = other_ctype->translit;
3826
3827               /* Avoid any risk of circular lists.  */
3828               other_ctype->translit = NULL;
3829
3830               cursor = &endp->next;
3831             }
3832
3833           if (ctype->default_missing == NULL)
3834             ctype->default_missing = other_ctype->default_missing;
3835         }
3836     }
3837
3838   return cursor;
3839 }
3840
3841 static void
3842 allocate_arrays (struct locale_ctype_t *ctype, const struct charmap_t *charmap,
3843                  struct repertoire_t *repertoire)
3844 {
3845   size_t idx, nr;
3846   const void *key;
3847   size_t len;
3848   void *vdata;
3849   void *curs;
3850
3851   /* You wonder about this amount of memory?  This is only because some
3852      users do not manage to address the array with unsigned values or
3853      data types with range >= 256.  '\200' would result in the array
3854      index -128.  To help these poor people we duplicate the entries for
3855      128 up to 255 below the entry for \0.  */
3856   ctype->ctype_b = (char_class_t *) xcalloc (256 + 128, sizeof (char_class_t));
3857   ctype->ctype32_b = (char_class32_t *) xcalloc (256, sizeof (char_class32_t));
3858   ctype->class_b = (uint32_t **)
3859     xmalloc (ctype->nr_charclass * sizeof (uint32_t *));
3860   ctype->class_3level = (struct iovec *)
3861     xmalloc (ctype->nr_charclass * sizeof (struct iovec));
3862
3863   /* This is the array accessed using the multibyte string elements.  */
3864   for (idx = 0; idx < 256; ++idx)
3865     ctype->ctype_b[128 + idx] = ctype->class256_collection[idx];
3866
3867   /* Mirror first 127 entries.  We must take care that entry -1 is not
3868      mirrored because EOF == -1.  */
3869   for (idx = 0; idx < 127; ++idx)
3870     ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
3871
3872   /* The 32 bit array contains all characters < 0x100.  */
3873   for (idx = 0; idx < ctype->class_collection_act; ++idx)
3874     if (ctype->charnames[idx] < 0x100)
3875       ctype->ctype32_b[ctype->charnames[idx]] = ctype->class_collection[idx];
3876
3877   for (nr = 0; nr < ctype->nr_charclass; nr++)
3878     {
3879       ctype->class_b[nr] = (uint32_t *) xcalloc (256 / 32, sizeof (uint32_t));
3880
3881       /* We only set CLASS_B for the bits in the ISO C classes, not
3882          the user defined classes.  The number should not change but
3883          who knows.  */
3884 #define LAST_ISO_C_BIT 11
3885       if (nr <= LAST_ISO_C_BIT)
3886         for (idx = 0; idx < 256; ++idx)
3887           if (ctype->class256_collection[idx] & _ISbit (nr))
3888             ctype->class_b[nr][idx >> 5] |= (uint32_t) 1 << (idx & 0x1f);
3889     }
3890
3891   for (nr = 0; nr < ctype->nr_charclass; nr++)
3892     {
3893       struct wctype_table t;
3894
3895       t.p = 4; /* or: 5 */
3896       t.q = 7; /* or: 6 */
3897       wctype_table_init (&t);
3898
3899       for (idx = 0; idx < ctype->class_collection_act; ++idx)
3900         if (ctype->class_collection[idx] & _ISwbit (nr))
3901           wctype_table_add (&t, ctype->charnames[idx]);
3902
3903       wctype_table_finalize (&t);
3904
3905       if (verbose)
3906         WITH_CUR_LOCALE (fprintf (stderr, _("\
3907 %s: table for class \"%s\": %lu bytes\n"),
3908                                  "LC_CTYPE", ctype->classnames[nr],
3909                                  (unsigned long int) t.result_size));
3910
3911       ctype->class_3level[nr].iov_base = t.result;
3912       ctype->class_3level[nr].iov_len = t.result_size;
3913     }
3914
3915   /* Room for table of mappings.  */
3916   ctype->map_b = (uint32_t **) xmalloc (2 * sizeof (uint32_t *));
3917   ctype->map32_b = (uint32_t **) xmalloc (ctype->map_collection_nr
3918                                           * sizeof (uint32_t *));
3919   ctype->map_3level = (struct iovec *)
3920     xmalloc (ctype->map_collection_nr * sizeof (struct iovec));
3921
3922   /* Fill in all mappings.  */
3923   for (idx = 0; idx < 2; ++idx)
3924     {
3925       unsigned int idx2;
3926
3927       /* Allocate table.  */
3928       ctype->map_b[idx] = (uint32_t *)
3929         xmalloc ((256 + 128) * sizeof (uint32_t));
3930
3931       /* Copy values from collection.  */
3932       for (idx2 = 0; idx2 < 256; ++idx2)
3933         ctype->map_b[idx][128 + idx2] = ctype->map256_collection[idx][idx2];
3934
3935       /* Mirror first 127 entries.  We must take care not to map entry
3936          -1 because EOF == -1.  */
3937       for (idx2 = 0; idx2 < 127; ++idx2)
3938         ctype->map_b[idx][idx2] = ctype->map_b[idx][256 + idx2];
3939
3940       /* EOF must map to EOF.  */
3941       ctype->map_b[idx][127] = EOF;
3942     }
3943
3944   for (idx = 0; idx < ctype->map_collection_nr; ++idx)
3945     {
3946       unsigned int idx2;
3947
3948       /* Allocate table.  */
3949       ctype->map32_b[idx] = (uint32_t *) xmalloc (256 * sizeof (uint32_t));
3950
3951       /* Copy values from collection.  Default is identity mapping.  */
3952       for (idx2 = 0; idx2 < 256; ++idx2)
3953         ctype->map32_b[idx][idx2] =
3954           (ctype->map_collection[idx][idx2] != 0
3955            ? ctype->map_collection[idx][idx2]
3956            : idx2);
3957     }
3958
3959   for (nr = 0; nr < ctype->map_collection_nr; nr++)
3960     {
3961       struct wctrans_table t;
3962
3963       t.p = 7;
3964       t.q = 9;
3965       wctrans_table_init (&t);
3966
3967       for (idx = 0; idx < ctype->map_collection_act[nr]; ++idx)
3968         if (ctype->map_collection[nr][idx] != 0)
3969           wctrans_table_add (&t, ctype->charnames[idx],
3970                              ctype->map_collection[nr][idx]);
3971
3972       wctrans_table_finalize (&t);
3973
3974       if (verbose)
3975         WITH_CUR_LOCALE (fprintf (stderr, _("\
3976 %s: table for map \"%s\": %lu bytes\n"),
3977                                  "LC_CTYPE", ctype->mapnames[nr],
3978                                  (unsigned long int) t.result_size));
3979
3980       ctype->map_3level[nr].iov_base = t.result;
3981       ctype->map_3level[nr].iov_len = t.result_size;
3982     }
3983
3984   /* Extra array for class and map names.  */
3985   ctype->class_name_ptr = (uint32_t *) xmalloc (ctype->nr_charclass
3986                                                 * sizeof (uint32_t));
3987   ctype->map_name_ptr = (uint32_t *) xmalloc (ctype->map_collection_nr
3988                                               * sizeof (uint32_t));
3989
3990   ctype->class_offset = _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
3991   ctype->map_offset = ctype->class_offset + ctype->nr_charclass;
3992
3993   /* Array for width information.  Because the expected widths are very
3994      small (never larger than 2) we use only one single byte.  This
3995      saves space.
3996      We put only printable characters in the table.  wcwidth is specified
3997      to return -1 for non-printable characters.  Doing the check here
3998      saves a run-time check.
3999      But we put L'\0' in the table.  This again saves a run-time check.  */
4000   {
4001     struct wcwidth_table t;
4002
4003     t.p = 7;
4004     t.q = 9;
4005     wcwidth_table_init (&t);
4006
4007     /* First set all the printable characters of the character set to
4008        the default width.  */
4009     curs = NULL;
4010     while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
4011       {
4012         struct charseq *data = (struct charseq *) vdata;
4013
4014         if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
4015           data->ucs4 = repertoire_find_value (ctype->repertoire,
4016                                               data->name, len);
4017
4018         if (data->ucs4 != ILLEGAL_CHAR_VALUE)
4019           {
4020             uint32_t *class_bits =
4021               find_idx (ctype, &ctype->class_collection, NULL,
4022                         &ctype->class_collection_act, data->ucs4);
4023
4024             if (class_bits != NULL && (*class_bits & BITw (tok_print)))
4025               wcwidth_table_add (&t, data->ucs4, charmap->width_default);
4026           }
4027       }
4028
4029     /* Now add the explicitly specified widths.  */
4030     if (charmap->width_rules != NULL)
4031       {
4032         size_t cnt;
4033
4034         for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
4035           {
4036             unsigned char bytes[charmap->mb_cur_max];
4037             int nbytes = charmap->width_rules[cnt].from->nbytes;
4038
4039             /* We have the range of character for which the width is
4040                specified described using byte sequences of the multibyte
4041                charset.  We have to convert this to UCS4 now.  And we
4042                cannot simply convert the beginning and the end of the
4043                sequence, we have to iterate over the byte sequence and
4044                convert it for every single character.  */
4045             memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
4046
4047             while (nbytes < charmap->width_rules[cnt].to->nbytes
4048                    || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
4049                               nbytes) <= 0)
4050               {
4051                 /* Find the UCS value for `bytes'.  */
4052                 int inner;
4053                 uint32_t wch;
4054                 struct charseq *seq =
4055                   charmap_find_symbol (charmap, (char *) bytes, nbytes);
4056
4057                 if (seq == NULL)
4058                   wch = ILLEGAL_CHAR_VALUE;
4059                 else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
4060                   wch = seq->ucs4;
4061                 else
4062                   wch = repertoire_find_value (ctype->repertoire, seq->name,
4063                                                strlen (seq->name));
4064
4065                 if (wch != ILLEGAL_CHAR_VALUE)
4066                   {
4067                     /* Store the value.  */
4068                     uint32_t *class_bits =
4069                       find_idx (ctype, &ctype->class_collection, NULL,
4070                                 &ctype->class_collection_act, wch);
4071
4072                     if (class_bits != NULL && (*class_bits & BITw (tok_print)))
4073                       wcwidth_table_add (&t, wch,
4074                                          charmap->width_rules[cnt].width);
4075                   }
4076
4077                 /* "Increment" the bytes sequence.  */
4078                 inner = nbytes - 1;
4079                 while (inner >= 0 && bytes[inner] == 0xff)
4080                   --inner;
4081
4082                 if (inner < 0)
4083                   {
4084                     /* We have to extend the byte sequence.  */
4085                     if (nbytes >= charmap->width_rules[cnt].to->nbytes)
4086                       break;
4087
4088                     bytes[0] = 1;
4089                     memset (&bytes[1], 0, nbytes);
4090                     ++nbytes;
4091                   }
4092                 else
4093                   {
4094                     ++bytes[inner];
4095                     while (++inner < nbytes)
4096                       bytes[inner] = 0;
4097                   }
4098               }
4099           }
4100       }
4101
4102     /* Set the width of L'\0' to 0.  */
4103     wcwidth_table_add (&t, 0, 0);
4104
4105     wcwidth_table_finalize (&t);
4106
4107     if (verbose)
4108       WITH_CUR_LOCALE (fprintf (stderr, _("%s: table for width: %lu bytes\n"),
4109                                "LC_CTYPE", (unsigned long int) t.result_size));
4110
4111     ctype->width.iov_base = t.result;
4112     ctype->width.iov_len = t.result_size;
4113   }
4114
4115   /* Set MB_CUR_MAX.  */
4116   ctype->mb_cur_max = charmap->mb_cur_max;
4117
4118   /* Now determine the table for the transliteration information.
4119
4120      XXX It is not yet clear to me whether it is worth implementing a
4121      complicated algorithm which uses a hash table to locate the entries.
4122      For now I'll use a simple array which can be searching using binary
4123      search.  */
4124   if (ctype->translit_include != NULL)
4125     /* Traverse the locales mentioned in the `include' statements in a
4126        depth-first way and fold in their transliteration information.  */
4127     translit_flatten (ctype, charmap, &ctype->translit);
4128
4129   if (ctype->translit != NULL)
4130     {
4131       /* First count how many entries we have.  This is the upper limit
4132          since some entries from the included files might be overwritten.  */
4133       size_t number = 0;
4134       size_t cnt;
4135       struct translit_t *runp = ctype->translit;
4136       struct translit_t **sorted;
4137       size_t from_len, to_len;
4138
4139       while (runp != NULL)
4140         {
4141           ++number;
4142           runp = runp->next;
4143         }
4144
4145       /* Next we allocate an array large enough and fill in the values.  */
4146       sorted = (struct translit_t **) alloca (number
4147                                               * sizeof (struct translit_t **));
4148       runp = ctype->translit;
4149       number = 0;
4150       do
4151         {
4152           /* Search for the place where to insert this string.
4153              XXX Better use a real sorting algorithm later.  */
4154           size_t idx = 0;
4155           int replace = 0;
4156
4157           while (idx < number)
4158             {
4159               int res = wcscmp ((const wchar_t *) sorted[idx]->from,
4160                                 (const wchar_t *) runp->from);
4161               if (res == 0)
4162                 {
4163                   replace = 1;
4164                   break;
4165                 }
4166               if (res > 0)
4167                 break;
4168               ++idx;
4169             }
4170
4171           if (replace)
4172             sorted[idx] = runp;
4173           else
4174             {
4175               memmove (&sorted[idx + 1], &sorted[idx],
4176                        (number - idx) * sizeof (struct translit_t *));
4177               sorted[idx] = runp;
4178               ++number;
4179             }
4180
4181           runp = runp->next;
4182         }
4183       while (runp != NULL);
4184
4185       /* The next step is putting all the possible transliteration
4186          strings in one memory block so that we can write it out.
4187          We need several different blocks:
4188          - index to the from-string array
4189          - from-string array
4190          - index to the to-string array
4191          - to-string array.
4192       */
4193       from_len = to_len = 0;
4194       for (cnt = 0; cnt < number; ++cnt)
4195         {
4196           struct translit_to_t *srunp;
4197           from_len += wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
4198           srunp = sorted[cnt]->to;
4199           while (srunp != NULL)
4200             {
4201               to_len += wcslen ((const wchar_t *) srunp->str) + 1;
4202               srunp = srunp->next;
4203             }
4204           /* Plus one for the extra NUL character marking the end of
4205              the list for the current entry.  */
4206           ++to_len;
4207         }
4208
4209       /* We can allocate the arrays for the results.  */
4210       ctype->translit_from_idx = xmalloc (number * sizeof (uint32_t));
4211       ctype->translit_from_tbl = xmalloc (from_len * sizeof (uint32_t));
4212       ctype->translit_to_idx = xmalloc (number * sizeof (uint32_t));
4213       ctype->translit_to_tbl = xmalloc (to_len * sizeof (uint32_t));
4214
4215       from_len = 0;
4216       to_len = 0;
4217       for (cnt = 0; cnt < number; ++cnt)
4218         {
4219           size_t len;
4220           struct translit_to_t *srunp;
4221
4222           ctype->translit_from_idx[cnt] = from_len;
4223           ctype->translit_to_idx[cnt] = to_len;
4224
4225           len = wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
4226           wmemcpy ((wchar_t *) &ctype->translit_from_tbl[from_len],
4227                    (const wchar_t *) sorted[cnt]->from, len);
4228           from_len += len;
4229
4230           ctype->translit_to_idx[cnt] = to_len;
4231           srunp = sorted[cnt]->to;
4232           while (srunp != NULL)
4233             {
4234               len = wcslen ((const wchar_t *) srunp->str) + 1;
4235               wmemcpy ((wchar_t *) &ctype->translit_to_tbl[to_len],
4236                        (const wchar_t *) srunp->str, len);
4237               to_len += len;
4238               srunp = srunp->next;
4239             }
4240           ctype->translit_to_tbl[to_len++] = L'\0';
4241         }
4242
4243       /* Store the information about the length.  */
4244       ctype->translit_idx_size = number;
4245       ctype->translit_from_tbl_size = from_len * sizeof (uint32_t);
4246       ctype->translit_to_tbl_size = to_len * sizeof (uint32_t);
4247     }
4248   else
4249     {
4250       /* Provide some dummy pointers since we have nothing to write out.  */
4251       static uint32_t no_str = { 0 };
4252
4253       ctype->translit_from_idx = &no_str;
4254       ctype->translit_from_tbl = &no_str;
4255       ctype->translit_to_tbl = &no_str;
4256       ctype->translit_idx_size = 0;
4257       ctype->translit_from_tbl_size = 0;
4258       ctype->translit_to_tbl_size = 0;
4259     }
4260 }