locale/programs/ld-ctype.c

   1 /* Copyright (C) 1995-2006,2007,2009,2011 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3    Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
   4
   5    This program is free software; you can redistribute it and/or modify
   6    it under the terms of the GNU General Public License as published
   7    by the Free Software Foundation; version 2 of the License, or
   8    (at your option) any later version.
   9
  10    This program is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13    GNU General Public License for more details.
  14
  15    You should have received a copy of the GNU General Public License
  16    along with this program; if not, see <http://www.gnu.org/licenses/>.  */
  17
  18 #ifdef HAVE_CONFIG_H
  19 # include <config.h>
  20 #endif
  21
  22 #include <alloca.h>
  23 #include <byteswap.h>
  24 #include <endian.h>
  25 #include <errno.h>
  26 #include <limits.h>
  27 #include <obstack.h>
  28 #include <stdlib.h>
  29 #include <string.h>
  30 #include <wchar.h>
  31 #include <wctype.h>
  32 #include <sys/uio.h>
  33
  34 #include "localedef.h"
  35 #include "charmap.h"
  36 #include "localeinfo.h"
  37 #include "langinfo.h"
  38 #include "linereader.h"
  39 #include "locfile-token.h"
  40 #include "locfile.h"
  41
  42 #include <assert.h>
  43
  44
  45 #ifdef PREDEFINED_CLASSES
  46 /* These are the extra bits not in wctype.h since these are not preallocated
  47    classes.  */
  48 # define _ISwspecial1   (1 << 29)
  49 # define _ISwspecial2   (1 << 30)
  50 # define _ISwspecial3   (1 << 31)
  51 #endif
  52
  53
  54 /* The bit used for representing a special class.  */
  55 #define BITPOS(class) ((class) - tok_upper)
  56 #define BIT(class) (_ISbit (BITPOS (class)))
  57 #define BITw(class) (_ISwbit (BITPOS (class)))
  58
  59 #define ELEM(ctype, collection, idx, value)                                   \
  60   *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx,     \
  61              &ctype->collection##_act idx, value)
  62
  63
  64 /* To be compatible with former implementations we for now restrict
  65    the number of bits for character classes to 16.  When compatibility
  66    is not necessary anymore increase the number to 32.  */
  67 #define char_class_t uint16_t
  68 #define char_class32_t uint32_t
  69
  70
  71 /* Type to describe a transliteration action.  We have a possibly
  72    multiple character from-string and a set of multiple character
  73    to-strings.  All are 32bit values since this is what is used in
  74    the gconv functions.  */
  75 struct translit_to_t
  76 {
  77   uint32_t *str;
  78
  79   struct translit_to_t *next;
  80 };
  81
  82 struct translit_t
  83 {
  84   uint32_t *from;
  85
  86   const char *fname;
  87   size_t lineno;
  88
  89   struct translit_to_t *to;
  90
  91   struct translit_t *next;
  92 };
  93
  94 struct translit_ignore_t
  95 {
  96   uint32_t from;
  97   uint32_t to;
  98   uint32_t step;
  99
 100   const char *fname;
 101   size_t lineno;
 102
 103   struct translit_ignore_t *next;
 104 };
 105
 106
 107 /* Type to describe a transliteration include statement.  */
 108 struct translit_include_t
 109 {
 110   const char *copy_locale;
 111   const char *copy_repertoire;
 112
 113   struct translit_include_t *next;
 114 };
 115
 116
 117 /* Sparse table of uint32_t.  */
 118 #define TABLE idx_table
 119 #define ELEMENT uint32_t
 120 #define DEFAULT ((uint32_t) ~0)
 121 #define NO_FINALIZE
 122 #include "3level.h"
 123
 124
 125 /* The real definition of the struct for the LC_CTYPE locale.  */
 126 struct locale_ctype_t
 127 {
 128   uint32_t *charnames;
 129   size_t charnames_max;
 130   size_t charnames_act;
 131   /* An index lookup table, to speedup find_idx.  */
 132   struct idx_table charnames_idx;
 133
 134   struct repertoire_t *repertoire;
 135
 136   /* We will allow up to 8 * sizeof (uint32_t) character classes.  */
 137 #define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
 138   size_t nr_charclass;
 139   const char *classnames[MAX_NR_CHARCLASS];
 140   uint32_t last_class_char;
 141   uint32_t class256_collection[256];
 142   uint32_t *class_collection;
 143   size_t class_collection_max;
 144   size_t class_collection_act;
 145   uint32_t class_done;
 146   uint32_t class_offset;
 147
 148   struct charseq **mbdigits;
 149   size_t mbdigits_act;
 150   size_t mbdigits_max;
 151   uint32_t *wcdigits;
 152   size_t wcdigits_act;
 153   size_t wcdigits_max;
 154
 155   struct charseq *mboutdigits[10];
 156   uint32_t wcoutdigits[10];
 157   size_t outdigits_act;
 158
 159   /* If the following number ever turns out to be too small simply
 160      increase it.  But I doubt it will.  --drepper@gnu */
 161 #define MAX_NR_CHARMAP 16
 162   const char *mapnames[MAX_NR_CHARMAP];
 163   uint32_t *map_collection[MAX_NR_CHARMAP];
 164   uint32_t map256_collection[2][256];
 165   size_t map_collection_max[MAX_NR_CHARMAP];
 166   size_t map_collection_act[MAX_NR_CHARMAP];
 167   size_t map_collection_nr;
 168   size_t last_map_idx;
 169   int tomap_done[MAX_NR_CHARMAP];
 170   uint32_t map_offset;
 171
 172   /* Transliteration information.  */
 173   struct translit_include_t *translit_include;
 174   struct translit_t *translit;
 175   struct translit_ignore_t *translit_ignore;
 176   uint32_t ntranslit_ignore;
 177
 178   uint32_t *default_missing;
 179   const char *default_missing_file;
 180   size_t default_missing_lineno;
 181
 182   uint32_t to_nonascii;
 183   uint32_t nonascii_case;
 184
 185   /* The arrays for the binary representation.  */
 186   char_class_t *ctype_b;
 187   char_class32_t *ctype32_b;
 188   uint32_t **map_b;
 189   uint32_t **map32_b;
 190   uint32_t **class_b;
 191   struct iovec *class_3level;
 192   struct iovec *map_3level;
 193   uint32_t *class_name_ptr;
 194   uint32_t *map_name_ptr;
 195   struct iovec width;
 196   uint32_t mb_cur_max;
 197   const char *codeset_name;
 198   uint32_t *translit_from_idx;
 199   uint32_t *translit_from_tbl;
 200   uint32_t *translit_to_idx;
 201   uint32_t *translit_to_tbl;
 202   uint32_t translit_idx_size;
 203   size_t translit_from_tbl_size;
 204   size_t translit_to_tbl_size;
 205
 206   struct obstack mempool;
 207 };
 208
 209
 210 /* Marker for an empty slot.  This has the value 0xFFFFFFFF, regardless
 211    whether 'int' is 16 bit, 32 bit, or 64 bit.  */
 212 #define EMPTY ((uint32_t) ~0)
 213
 214
 215 #define obstack_chunk_alloc xmalloc
 216 #define obstack_chunk_free free
 217
 218
 219 /* Prototypes for local functions.  */
 220 static void ctype_startup (struct linereader *lr, struct localedef_t *locale,
 221                            const struct charmap_t *charmap,
 222                            struct localedef_t *copy_locale,
 223                            int ignore_content);
 224 static void ctype_class_new (struct linereader *lr,
 225                              struct locale_ctype_t *ctype, const char *name);
 226 static void ctype_map_new (struct linereader *lr,
 227                            struct locale_ctype_t *ctype,
 228                            const char *name, const struct charmap_t *charmap);
 229 static uint32_t *find_idx (struct locale_ctype_t *ctype, uint32_t **table,
 230                            size_t *max, size_t *act, unsigned int idx);
 231 static void set_class_defaults (struct locale_ctype_t *ctype,
 232                                 const struct charmap_t *charmap,
 233                                 struct repertoire_t *repertoire);
 234 static void allocate_arrays (struct locale_ctype_t *ctype,
 235                              const struct charmap_t *charmap,
 236                              struct repertoire_t *repertoire);
 237
 238
 239 static const char *longnames[] =
 240 {
 241   "zero", "one", "two", "three", "four",
 242   "five", "six", "seven", "eight", "nine"
 243 };
 244 static const char *uninames[] =
 245 {
 246   "U00000030", "U00000031", "U00000032", "U00000033", "U00000034",
 247   "U00000035", "U00000036", "U00000037", "U00000038", "U00000039"
 248 };
 249 static const unsigned char digits[] = "0123456789";
 250
 251
 252 static void
 253 ctype_startup (struct linereader *lr, struct localedef_t *locale,
 254                const struct charmap_t *charmap,
 255                struct localedef_t *copy_locale, int ignore_content)
 256 {
 257   unsigned int cnt;
 258   struct locale_ctype_t *ctype;
 259
 260   if (!ignore_content && locale->categories[LC_CTYPE].ctype == NULL)
 261     {
 262       if (copy_locale == NULL)
 263         {
 264           /* Allocate the needed room.  */
 265           locale->categories[LC_CTYPE].ctype = ctype =
 266             (struct locale_ctype_t *) xcalloc (1,
 267                                                sizeof (struct locale_ctype_t));
 268
 269           /* We have seen no names yet.  */
 270           ctype->charnames_max = charmap->mb_cur_max == 1 ? 256 : 512;
 271           ctype->charnames =
 272             (unsigned int *) xmalloc (ctype->charnames_max
 273                                       * sizeof (unsigned int));
 274           for (cnt = 0; cnt < 256; ++cnt)
 275             ctype->charnames[cnt] = cnt;
 276           ctype->charnames_act = 256;
 277           idx_table_init (&ctype->charnames_idx);
 278
 279           /* Fill character class information.  */
 280           ctype->last_class_char = ILLEGAL_CHAR_VALUE;
 281           /* The order of the following instructions determines the bit
 282              positions!  */
 283           ctype_class_new (lr, ctype, "upper");
 284           ctype_class_new (lr, ctype, "lower");
 285           ctype_class_new (lr, ctype, "alpha");
 286           ctype_class_new (lr, ctype, "digit");
 287           ctype_class_new (lr, ctype, "xdigit");
 288           ctype_class_new (lr, ctype, "space");
 289           ctype_class_new (lr, ctype, "print");
 290           ctype_class_new (lr, ctype, "graph");
 291           ctype_class_new (lr, ctype, "blank");
 292           ctype_class_new (lr, ctype, "cntrl");
 293           ctype_class_new (lr, ctype, "punct");
 294           ctype_class_new (lr, ctype, "alnum");
 295 #ifdef PREDEFINED_CLASSES
 296           /* The following are extensions from ISO 14652.  */
 297           ctype_class_new (lr, ctype, "left_to_right");
 298           ctype_class_new (lr, ctype, "right_to_left");
 299           ctype_class_new (lr, ctype, "num_terminator");
 300           ctype_class_new (lr, ctype, "num_separator");
 301           ctype_class_new (lr, ctype, "segment_separator");
 302           ctype_class_new (lr, ctype, "block_separator");
 303           ctype_class_new (lr, ctype, "direction_control");
 304           ctype_class_new (lr, ctype, "sym_swap_layout");
 305           ctype_class_new (lr, ctype, "char_shape_selector");
 306           ctype_class_new (lr, ctype, "num_shape_selector");
 307           ctype_class_new (lr, ctype, "non_spacing");
 308           ctype_class_new (lr, ctype, "non_spacing_level3");
 309           ctype_class_new (lr, ctype, "normal_connect");
 310           ctype_class_new (lr, ctype, "r_connect");
 311           ctype_class_new (lr, ctype, "no_connect");
 312           ctype_class_new (lr, ctype, "no_connect-space");
 313           ctype_class_new (lr, ctype, "vowel_connect");
 314 #endif
 315
 316           ctype->class_collection_max = charmap->mb_cur_max == 1 ? 256 : 512;
 317           ctype->class_collection
 318             = (uint32_t *) xcalloc (sizeof (unsigned long int),
 319                                     ctype->class_collection_max);
 320           ctype->class_collection_act = 256;
 321
 322           /* Fill character map information.  */
 323           ctype->last_map_idx = MAX_NR_CHARMAP;
 324           ctype_map_new (lr, ctype, "toupper", charmap);
 325           ctype_map_new (lr, ctype, "tolower", charmap);
 326 #ifdef PREDEFINED_CLASSES
 327           ctype_map_new (lr, ctype, "tosymmetric", charmap);
 328 #endif
 329
 330           /* Fill first 256 entries in `toXXX' arrays.  */
 331           for (cnt = 0; cnt < 256; ++cnt)
 332             {
 333               ctype->map_collection[0][cnt] = cnt;
 334               ctype->map_collection[1][cnt] = cnt;
 335 #ifdef PREDEFINED_CLASSES
 336               ctype->map_collection[2][cnt] = cnt;
 337 #endif
 338               ctype->map256_collection[0][cnt] = cnt;
 339               ctype->map256_collection[1][cnt] = cnt;
 340             }
 341
 342           if (enc_not_ascii_compatible)
 343             ctype->to_nonascii = 1;
 344
 345           obstack_init (&ctype->mempool);
 346         }
 347       else
 348         ctype = locale->categories[LC_CTYPE].ctype =
 349           copy_locale->categories[LC_CTYPE].ctype;
 350     }
 351 }
 352
 353
 354 void
 355 ctype_finish (struct localedef_t *locale, const struct charmap_t *charmap)
 356 {
 357   /* See POSIX.2, table 2-6 for the meaning of the following table.  */
 358 #define NCLASS 12
 359   static const struct
 360   {
 361     const char *name;
 362     const char allow[NCLASS];
 363   }
 364   valid_table[NCLASS] =
 365   {
 366     /* The order is important.  See token.h for more information.
 367        M = Always, D = Default, - = Permitted, X = Mutually exclusive  */
 368     { "upper",  "--MX-XDDXXX-" },
 369     { "lower",  "--MX-XDDXXX-" },
 370     { "alpha",  "---X-XDDXXX-" },
 371     { "digit",  "XXX--XDDXXX-" },
 372     { "xdigit", "-----XDDXXX-" },
 373     { "space",  "XXXXX------X" },
 374     { "print",  "---------X--" },
 375     { "graph",  "---------X--" },
 376     { "blank",  "XXXXXM-----X" },
 377     { "cntrl",  "XXXXX-XX--XX" },
 378     { "punct",  "XXXXX-DD-X-X" },
 379     { "alnum",  "-----XDDXXX-" }
 380   };
 381   size_t cnt;
 382   int cls1, cls2;
 383   uint32_t space_value;
 384   struct charseq *space_seq;
 385   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 386   int warned;
 387   const void *key;
 388   size_t len;
 389   void *vdata;
 390   void *curs;
 391
 392   /* Now resolve copying and also handle completely missing definitions.  */
 393   if (ctype == NULL)
 394     {
 395       const char *repertoire_name;
 396
 397       /* First see whether we were supposed to copy.  If yes, find the
 398          actual definition.  */
 399       if (locale->copy_name[LC_CTYPE] != NULL)
 400         {
 401           /* Find the copying locale.  This has to happen transitively since
 402              the locale we are copying from might also copying another one.  */
 403           struct localedef_t *from = locale;
 404
 405           do
 406             from = find_locale (LC_CTYPE, from->copy_name[LC_CTYPE],
 407                                 from->repertoire_name, charmap);
 408           while (from->categories[LC_CTYPE].ctype == NULL
 409                  && from->copy_name[LC_CTYPE] != NULL);
 410
 411           ctype = locale->categories[LC_CTYPE].ctype
 412             = from->categories[LC_CTYPE].ctype;
 413         }
 414
 415       /* If there is still no definition issue an warning and create an
 416          empty one.  */
 417       if (ctype == NULL)
 418         {
 419           if (! be_quiet)
 420             WITH_CUR_LOCALE (error (0, 0, _("\
 421 No definition for %s category found"), "LC_CTYPE"));
 422           ctype_startup (NULL, locale, charmap, NULL, 0);
 423           ctype = locale->categories[LC_CTYPE].ctype;
 424         }
 425
 426       /* Get the repertoire we have to use.  */
 427       repertoire_name = locale->repertoire_name ?: repertoire_global;
 428       if (repertoire_name != NULL)
 429         ctype->repertoire = repertoire_read (repertoire_name);
 430     }
 431
 432   /* We need the name of the currently used 8-bit character set to
 433      make correct conversion between this 8-bit representation and the
 434      ISO 10646 character set used internally for wide characters.  */
 435   ctype->codeset_name = charmap->code_set_name;
 436   if (ctype->codeset_name == NULL)
 437     {
 438       if (! be_quiet)
 439         WITH_CUR_LOCALE (error (0, 0, _("\
 440 No character set name specified in charmap")));
 441       ctype->codeset_name = "//UNKNOWN//";
 442     }
 443
 444   /* Set default value for classes not specified.  */
 445   set_class_defaults (ctype, charmap, ctype->repertoire);
 446
 447   /* Check according to table.  */
 448   for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
 449     {
 450       uint32_t tmp = ctype->class_collection[cnt];
 451
 452       if (tmp != 0)
 453         {
 454           for (cls1 = 0; cls1 < NCLASS; ++cls1)
 455             if ((tmp & _ISwbit (cls1)) != 0)
 456               for (cls2 = 0; cls2 < NCLASS; ++cls2)
 457                 if (valid_table[cls1].allow[cls2] != '-')
 458                   {
 459                     int eq = (tmp & _ISwbit (cls2)) != 0;
 460                     switch (valid_table[cls1].allow[cls2])
 461                       {
 462                       case 'M':
 463                         if (!eq)
 464                           {
 465                             uint32_t value = ctype->charnames[cnt];
 466
 467                             if (!be_quiet)
 468                               WITH_CUR_LOCALE (error (0, 0, _("\
 469 character L'\\u%0*x' in class `%s' must be in class `%s'"),
 470                                                       value > 0xffff ? 8 : 4,
 471                                                       value,
 472                                                       valid_table[cls1].name,
 473                                                       valid_table[cls2].name));
 474                           }
 475                         break;
 476
 477                       case 'X':
 478                         if (eq)
 479                           {
 480                             uint32_t value = ctype->charnames[cnt];
 481
 482                             if (!be_quiet)
 483                               WITH_CUR_LOCALE (error (0, 0, _("\
 484 character L'\\u%0*x' in class `%s' must not be in class `%s'"),
 485                                                       value > 0xffff ? 8 : 4,
 486                                                       value,
 487                                                       valid_table[cls1].name,
 488                                                       valid_table[cls2].name));
 489                           }
 490                         break;
 491
 492                       case 'D':
 493                         ctype->class_collection[cnt] |= _ISwbit (cls2);
 494                         break;
 495
 496                       default:
 497                         WITH_CUR_LOCALE (error (5, 0, _("\
 498 internal error in %s, line %u"), __FUNCTION__, __LINE__));
 499                       }
 500                   }
 501         }
 502     }
 503
 504   for (cnt = 0; cnt < 256; ++cnt)
 505     {
 506       uint32_t tmp = ctype->class256_collection[cnt];
 507
 508       if (tmp != 0)
 509         {
 510           for (cls1 = 0; cls1 < NCLASS; ++cls1)
 511             if ((tmp & _ISbit (cls1)) != 0)
 512               for (cls2 = 0; cls2 < NCLASS; ++cls2)
 513                 if (valid_table[cls1].allow[cls2] != '-')
 514                   {
 515                     int eq = (tmp & _ISbit (cls2)) != 0;
 516                     switch (valid_table[cls1].allow[cls2])
 517                       {
 518                       case 'M':
 519                         if (!eq)
 520                           {
 521                             char buf[17];
 522
 523                             snprintf (buf, sizeof buf, "\\%Zo", cnt);
 524
 525                             if (!be_quiet)
 526                               WITH_CUR_LOCALE (error (0, 0, _("\
 527 character '%s' in class `%s' must be in class `%s'"),
 528                                                       buf,
 529                                                       valid_table[cls1].name,
 530                                                       valid_table[cls2].name));
 531                           }
 532                         break;
 533
 534                       case 'X':
 535                         if (eq)
 536                           {
 537                             char buf[17];
 538
 539                             snprintf (buf, sizeof buf, "\\%Zo", cnt);
 540
 541                             if (!be_quiet)
 542                               WITH_CUR_LOCALE (error (0, 0, _("\
 543 character '%s' in class `%s' must not be in class `%s'"),
 544                                                       buf,
 545                                                       valid_table[cls1].name,
 546                                                       valid_table[cls2].name));
 547                           }
 548                         break;
 549
 550                       case 'D':
 551                         ctype->class256_collection[cnt] |= _ISbit (cls2);
 552                         break;
 553
 554                       default:
 555                         WITH_CUR_LOCALE (error (5, 0, _("\
 556 internal error in %s, line %u"), __FUNCTION__, __LINE__));
 557                       }
 558                   }
 559         }
 560     }
 561
 562   /* ... and now test <SP> as a special case.  */
 563   space_value = 32;
 564   if (((cnt = BITPOS (tok_space),
 565         (ELEM (ctype, class_collection, , space_value)
 566          & BITw (tok_space)) == 0)
 567        || (cnt = BITPOS (tok_blank),
 568            (ELEM (ctype, class_collection, , space_value)
 569             & BITw (tok_blank)) == 0)))
 570     {
 571       if (!be_quiet)
 572         WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"),
 573                                 valid_table[cnt].name));
 574     }
 575   else if (((cnt = BITPOS (tok_punct),
 576              (ELEM (ctype, class_collection, , space_value)
 577               & BITw (tok_punct)) != 0)
 578             || (cnt = BITPOS (tok_graph),
 579                 (ELEM (ctype, class_collection, , space_value)
 580                  & BITw (tok_graph))
 581                 != 0)))
 582     {
 583       if (!be_quiet)
 584         WITH_CUR_LOCALE (error (0, 0, _("\
 585 <SP> character must not be in class `%s'"),
 586                                 valid_table[cnt].name));
 587     }
 588   else
 589     ELEM (ctype, class_collection, , space_value) |= BITw (tok_print);
 590
 591   space_seq = charmap_find_value (charmap, "SP", 2);
 592   if (space_seq == NULL)
 593     space_seq = charmap_find_value (charmap, "space", 5);
 594   if (space_seq == NULL)
 595     space_seq = charmap_find_value (charmap, "U00000020", 9);
 596   if (space_seq == NULL || space_seq->nbytes != 1)
 597     {
 598       if (!be_quiet)
 599         WITH_CUR_LOCALE (error (0, 0, _("\
 600 character <SP> not defined in character map")));
 601     }
 602   else if (((cnt = BITPOS (tok_space),
 603              (ctype->class256_collection[space_seq->bytes[0]]
 604               & BIT (tok_space)) == 0)
 605             || (cnt = BITPOS (tok_blank),
 606                 (ctype->class256_collection[space_seq->bytes[0]]
 607                  & BIT (tok_blank)) == 0)))
 608     {
 609       if (!be_quiet)
 610         WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"),
 611                                 valid_table[cnt].name));
 612     }
 613   else if (((cnt = BITPOS (tok_punct),
 614              (ctype->class256_collection[space_seq->bytes[0]]
 615               & BIT (tok_punct)) != 0)
 616             || (cnt = BITPOS (tok_graph),
 617                 (ctype->class256_collection[space_seq->bytes[0]]
 618                  & BIT (tok_graph)) != 0)))
 619     {
 620       if (!be_quiet)
 621         WITH_CUR_LOCALE (error (0, 0, _("\
 622 <SP> character must not be in class `%s'"),
 623                                 valid_table[cnt].name));
 624     }
 625   else
 626     ctype->class256_collection[space_seq->bytes[0]] |= BIT (tok_print);
 627
 628   /* Check whether all single-byte characters make to their upper/lowercase
 629      equivalent according to the ASCII rules.  */
 630   for (cnt = 'A'; cnt <= 'Z'; ++cnt)
 631     {
 632       uint32_t uppval = ctype->map256_collection[0][cnt];
 633       uint32_t lowval = ctype->map256_collection[1][cnt];
 634       uint32_t lowuppval = ctype->map256_collection[0][lowval];
 635       uint32_t lowlowval = ctype->map256_collection[1][lowval];
 636
 637       if (uppval != cnt
 638           || lowval != cnt + 0x20
 639           || lowuppval != cnt
 640           || lowlowval != cnt + 0x20)
 641         ctype->nonascii_case = 1;
 642     }
 643   for (cnt = 0; cnt < 256; ++cnt)
 644     if (cnt < 'A' || (cnt > 'Z' && cnt < 'a') || cnt > 'z')
 645       if (ctype->map256_collection[0][cnt] != cnt
 646           || ctype->map256_collection[1][cnt] != cnt)
 647         ctype->nonascii_case = 1;
 648
 649   /* Now that the tests are done make sure the name array contains all
 650      characters which are handled in the WIDTH section of the
 651      character set definition file.  */
 652   if (charmap->width_rules != NULL)
 653     for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
 654       {
 655         unsigned char bytes[charmap->mb_cur_max];
 656         int nbytes = charmap->width_rules[cnt].from->nbytes;
 657
 658         /* We have the range of character for which the width is
 659            specified described using byte sequences of the multibyte
 660            charset.  We have to convert this to UCS4 now.  And we
 661            cannot simply convert the beginning and the end of the
 662            sequence, we have to iterate over the byte sequence and
 663            convert it for every single character.  */
 664         memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
 665
 666         while (nbytes < charmap->width_rules[cnt].to->nbytes
 667                || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
 668                           nbytes) <= 0)
 669           {
 670             /* Find the UCS value for `bytes'.  */
 671             int inner;
 672             uint32_t wch;
 673             struct charseq *seq
 674               = charmap_find_symbol (charmap, (char *) bytes, nbytes);
 675
 676             if (seq == NULL)
 677               wch = ILLEGAL_CHAR_VALUE;
 678             else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
 679               wch = seq->ucs4;
 680             else
 681               wch = repertoire_find_value (ctype->repertoire, seq->name,
 682                                            strlen (seq->name));
 683
 684             if (wch != ILLEGAL_CHAR_VALUE)
 685               /* We are only interested in the side-effects of the
 686                  `find_idx' call.  It will add appropriate entries in
 687                  the name array if this is necessary.  */
 688               (void) find_idx (ctype, NULL, NULL, NULL, wch);
 689
 690             /* "Increment" the bytes sequence.  */
 691             inner = nbytes - 1;
 692             while (inner >= 0 && bytes[inner] == 0xff)
 693               --inner;
 694
 695             if (inner < 0)
 696               {
 697                 /* We have to extend the byte sequence.  */
 698                 if (nbytes >= charmap->width_rules[cnt].to->nbytes)
 699                   break;
 700
 701                 bytes[0] = 1;
 702                 memset (&bytes[1], 0, nbytes);
 703                 ++nbytes;
 704               }
 705             else
 706               {
 707                 ++bytes[inner];
 708                 while (++inner < nbytes)
 709                   bytes[inner] = 0;
 710               }
 711           }
 712       }
 713
 714   /* Now set all the other characters of the character set to the
 715      default width.  */
 716   curs = NULL;
 717   while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
 718     {
 719       struct charseq *data = (struct charseq *) vdata;
 720
 721       if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
 722         data->ucs4 = repertoire_find_value (ctype->repertoire,
 723                                             data->name, len);
 724
 725       if (data->ucs4 != ILLEGAL_CHAR_VALUE)
 726         (void) find_idx (ctype, NULL, NULL, NULL, data->ucs4);
 727     }
 728
 729   /* There must be a multiple of 10 digits.  */
 730   if (ctype->mbdigits_act % 10 != 0)
 731     {
 732       assert (ctype->mbdigits_act == ctype->wcdigits_act);
 733       ctype->wcdigits_act -= ctype->mbdigits_act % 10;
 734       ctype->mbdigits_act -= ctype->mbdigits_act % 10;
 735       WITH_CUR_LOCALE (error (0, 0, _("\
 736 `digit' category has not entries in groups of ten")));
 737     }
 738
 739   /* Check the input digits.  There must be a multiple of ten available.
 740      In each group it could be that one or the other character is missing.
 741      In this case the whole group must be removed.  */
 742   cnt = 0;
 743   while (cnt < ctype->mbdigits_act)
 744     {
 745       size_t inner;
 746       for (inner = 0; inner < 10; ++inner)
 747         if (ctype->mbdigits[cnt + inner] == NULL)
 748           break;
 749
 750       if (inner == 10)
 751         cnt += 10;
 752       else
 753         {
 754           /* Remove the group.  */
 755           memmove (&ctype->mbdigits[cnt], &ctype->mbdigits[cnt + 10],
 756                    ((ctype->wcdigits_act - cnt - 10)
 757                     * sizeof (ctype->mbdigits[0])));
 758           ctype->mbdigits_act -= 10;
 759         }
 760     }
 761
 762   /* If no input digits are given use the default.  */
 763   if (ctype->mbdigits_act == 0)
 764     {
 765       if (ctype->mbdigits_max == 0)
 766         {
 767           ctype->mbdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
 768                                            10 * sizeof (struct charseq *));
 769           ctype->mbdigits_max = 10;
 770         }
 771
 772       for (cnt = 0; cnt < 10; ++cnt)
 773         {
 774           ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
 775                                                       (char *) digits + cnt, 1);
 776           if (ctype->mbdigits[cnt] == NULL)
 777             {
 778               ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
 779                                                           longnames[cnt],
 780                                                           strlen (longnames[cnt]));
 781               if (ctype->mbdigits[cnt] == NULL)
 782                 {
 783                   /* Hum, this ain't good.  */
 784                   WITH_CUR_LOCALE (error (0, 0, _("\
 785 no input digits defined and none of the standard names in the charmap")));
 786
 787                   ctype->mbdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
 788                                                         sizeof (struct charseq) + 1);
 789
 790                   /* This is better than nothing.  */
 791                   ctype->mbdigits[cnt]->bytes[0] = digits[cnt];
 792                   ctype->mbdigits[cnt]->nbytes = 1;
 793                 }
 794             }
 795         }
 796
 797       ctype->mbdigits_act = 10;
 798     }
 799
 800   /* Check the wide character input digits.  There must be a multiple
 801      of ten available.  In each group it could be that one or the other
 802      character is missing.  In this case the whole group must be
 803      removed.  */
 804   cnt = 0;
 805   while (cnt < ctype->wcdigits_act)
 806     {
 807       size_t inner;
 808       for (inner = 0; inner < 10; ++inner)
 809         if (ctype->wcdigits[cnt + inner] == ILLEGAL_CHAR_VALUE)
 810           break;
 811
 812       if (inner == 10)
 813         cnt += 10;
 814       else
 815         {
 816           /* Remove the group.  */
 817           memmove (&ctype->wcdigits[cnt], &ctype->wcdigits[cnt + 10],
 818                    ((ctype->wcdigits_act - cnt - 10)
 819                     * sizeof (ctype->wcdigits[0])));
 820           ctype->wcdigits_act -= 10;
 821         }
 822     }
 823
 824   /* If no input digits are given use the default.  */
 825   if (ctype->wcdigits_act == 0)
 826     {
 827       if (ctype->wcdigits_max == 0)
 828         {
 829           ctype->wcdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
 830                                            10 * sizeof (uint32_t));
 831           ctype->wcdigits_max = 10;
 832         }
 833
 834       for (cnt = 0; cnt < 10; ++cnt)
 835         ctype->wcdigits[cnt] = L'0' + cnt;
 836
 837       ctype->mbdigits_act = 10;
 838     }
 839
 840   /* Check the outdigits.  */
 841   warned = 0;
 842   for (cnt = 0; cnt < 10; ++cnt)
 843     if (ctype->mboutdigits[cnt] == NULL)
 844       {
 845         static struct charseq replace[2];
 846
 847         if (!warned)
 848           {
 849             WITH_CUR_LOCALE (error (0, 0, _("\
 850 not all characters used in `outdigit' are available in the charmap")));
 851             warned = 1;
 852           }
 853
 854         replace[0].nbytes = 1;
 855         replace[0].bytes[0] = '?';
 856         replace[0].bytes[1] = '\0';
 857         ctype->mboutdigits[cnt] = &replace[0];
 858       }
 859
 860   warned = 0;
 861   for (cnt = 0; cnt < 10; ++cnt)
 862     if (ctype->wcoutdigits[cnt] == 0)
 863       {
 864         if (!warned)
 865           {
 866             WITH_CUR_LOCALE (error (0, 0, _("\
 867 not all characters used in `outdigit' are available in the repertoire")));
 868             warned = 1;
 869           }
 870
 871         ctype->wcoutdigits[cnt] = L'?';
 872       }
 873
 874   /* Sort the entries in the translit_ignore list.  */
 875   if (ctype->translit_ignore != NULL)
 876     {
 877       struct translit_ignore_t *firstp = ctype->translit_ignore;
 878       struct translit_ignore_t *runp;
 879
 880       ctype->ntranslit_ignore = 1;
 881
 882       for (runp = firstp->next; runp != NULL; runp = runp->next)
 883         {
 884           struct translit_ignore_t *lastp = NULL;
 885           struct translit_ignore_t *cmpp;
 886
 887           ++ctype->ntranslit_ignore;
 888
 889           for (cmpp = firstp; cmpp != NULL; lastp = cmpp, cmpp = cmpp->next)
 890             if (runp->from < cmpp->from)
 891               break;
 892
 893           runp->next = lastp;
 894           if (lastp == NULL)
 895             firstp = runp;
 896         }
 897
 898       ctype->translit_ignore = firstp;
 899     }
 900 }
 901
 902
 903 void
 904 ctype_output (struct localedef_t *locale, const struct charmap_t *charmap,
 905               const char *output_path)
 906 {
 907   static const char nulbytes[4] = { 0, 0, 0, 0 };
 908   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 909   const size_t nelems = (_NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1)
 910                          + ctype->nr_charclass + ctype->map_collection_nr);
 911   struct iovec *iov = alloca (sizeof *iov
 912                               * (2 + nelems + 2 * ctype->nr_charclass
 913                                  + ctype->map_collection_nr + 4));
 914   struct locale_file data;
 915   uint32_t *idx = alloca (sizeof *idx * (nelems + 1));
 916   uint32_t default_missing_len;
 917   size_t elem, cnt, offset, total;
 918   char *cp;
 919
 920   /* Now prepare the output: Find the sizes of the table we can use.  */
 921   allocate_arrays (ctype, charmap, ctype->repertoire);
 922
 923   data.magic = LIMAGIC (LC_CTYPE);
 924   data.n = nelems;
 925   iov[0].iov_base = (void *) &data;
 926   iov[0].iov_len = sizeof (data);
 927
 928   iov[1].iov_base = (void *) idx;
 929   iov[1].iov_len = nelems * sizeof (uint32_t);
 930
 931   idx[0] = iov[0].iov_len + iov[1].iov_len;
 932   offset = 0;
 933
 934   for (elem = 0; elem < nelems; ++elem)
 935     {
 936       if (elem < _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1))
 937         switch (elem)
 938           {
 939 #define CTYPE_EMPTY(name) \
 940           case name:                                                          \
 941             iov[2 + elem + offset].iov_base = NULL;                           \
 942             iov[2 + elem + offset].iov_len = 0;                               \
 943             idx[elem + 1] = idx[elem];                                        \
 944             break
 945
 946           CTYPE_EMPTY(_NL_CTYPE_GAP1);
 947           CTYPE_EMPTY(_NL_CTYPE_GAP2);
 948           CTYPE_EMPTY(_NL_CTYPE_GAP3);
 949           CTYPE_EMPTY(_NL_CTYPE_GAP4);
 950           CTYPE_EMPTY(_NL_CTYPE_GAP5);
 951           CTYPE_EMPTY(_NL_CTYPE_GAP6);
 952
 953 #define CTYPE_DATA(name, base, len)                                           \
 954           case _NL_ITEM_INDEX (name):                                         \
 955             iov[2 + elem + offset].iov_base = (base);                         \
 956             iov[2 + elem + offset].iov_len = (len);                           \
 957             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;       \
 958             break
 959
 960           CTYPE_DATA (_NL_CTYPE_CLASS,
 961                       ctype->ctype_b,
 962                       (256 + 128) * sizeof (char_class_t));
 963
 964           CTYPE_DATA (_NL_CTYPE_TOUPPER,
 965                       ctype->map_b[0],
 966                       (256 + 128) * sizeof (uint32_t));
 967           CTYPE_DATA (_NL_CTYPE_TOLOWER,
 968                       ctype->map_b[1],
 969                       (256 + 128) * sizeof (uint32_t));
 970
 971           CTYPE_DATA (_NL_CTYPE_TOUPPER32,
 972                       ctype->map32_b[0],
 973                       256 * sizeof (uint32_t));
 974           CTYPE_DATA (_NL_CTYPE_TOLOWER32,
 975                       ctype->map32_b[1],
 976                       256 * sizeof (uint32_t));
 977
 978           CTYPE_DATA (_NL_CTYPE_CLASS32,
 979                       ctype->ctype32_b,
 980                       256 * sizeof (char_class32_t));
 981
 982           CTYPE_DATA (_NL_CTYPE_CLASS_OFFSET,
 983                       &ctype->class_offset, sizeof (uint32_t));
 984
 985           CTYPE_DATA (_NL_CTYPE_MAP_OFFSET,
 986                       &ctype->map_offset, sizeof (uint32_t));
 987
 988           CTYPE_DATA (_NL_CTYPE_TRANSLIT_TAB_SIZE,
 989                       &ctype->translit_idx_size, sizeof (uint32_t));
 990
 991           CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_IDX,
 992                       ctype->translit_from_idx,
 993                       ctype->translit_idx_size * sizeof (uint32_t));
 994
 995           CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_TBL,
 996                       ctype->translit_from_tbl,
 997                       ctype->translit_from_tbl_size);
 998
 999           CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_IDX,
1000                       ctype->translit_to_idx,
1001                       ctype->translit_idx_size * sizeof (uint32_t));
1002
1003           CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_TBL,
1004                       ctype->translit_to_tbl, ctype->translit_to_tbl_size);
1005
1006           case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
1007             /* The class name array.  */
1008             total = 0;
1009             for (cnt = 0; cnt < ctype->nr_charclass; ++cnt, ++offset)
1010               {
1011                 iov[2 + elem + offset].iov_base
1012                   = (void *) ctype->classnames[cnt];
1013                 iov[2 + elem + offset].iov_len
1014                   = strlen (ctype->classnames[cnt]) + 1;
1015                 total += iov[2 + elem + offset].iov_len;
1016               }
1017             iov[2 + elem + offset].iov_base = (void *) nulbytes;
1018             iov[2 + elem + offset].iov_len = 4 - (total % 4);
1019             total += 4 - (total % 4);
1020
1021             idx[elem + 1] = idx[elem] + total;
1022             break;
1023
1024           case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
1025             /* The class name array.  */
1026             total = 0;
1027             for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt, ++offset)
1028               {
1029                 iov[2 + elem + offset].iov_base
1030                   = (void *) ctype->mapnames[cnt];
1031                 iov[2 + elem + offset].iov_len
1032                   = strlen (ctype->mapnames[cnt]) + 1;
1033                 total += iov[2 + elem + offset].iov_len;
1034               }
1035             iov[2 + elem + offset].iov_base = (void *) nulbytes;
1036             iov[2 + elem + offset].iov_len = 4 - (total % 4);
1037             total += 4 - (total % 4);
1038
1039             idx[elem + 1] = idx[elem] + total;
1040             break;
1041
1042           CTYPE_DATA (_NL_CTYPE_WIDTH,
1043                       ctype->width.iov_base,
1044                       ctype->width.iov_len);
1045
1046           CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX,
1047                       &ctype->mb_cur_max, sizeof (uint32_t));
1048
1049           case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
1050             total = strlen (ctype->codeset_name) + 1;
1051             if (total % 4 == 0)
1052               iov[2 + elem + offset].iov_base = (char *) ctype->codeset_name;
1053             else
1054               {
1055                 iov[2 + elem + offset].iov_base = alloca ((total + 3) & ~3);
1056                 memset (mempcpy (iov[2 + elem + offset].iov_base,
1057                                  ctype->codeset_name, total),
1058                         '\0', 4 - (total & 3));
1059                 total = (total + 3) & ~3;
1060               }
1061             iov[2 + elem + offset].iov_len = total;
1062             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1063             break;
1064
1065
1066           CTYPE_DATA (_NL_CTYPE_MAP_TO_NONASCII,
1067                       &ctype->to_nonascii, sizeof (uint32_t));
1068
1069           CTYPE_DATA (_NL_CTYPE_NONASCII_CASE,
1070                       &ctype->nonascii_case, sizeof (uint32_t));
1071
1072           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN):
1073             iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
1074             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1075             *(uint32_t *) iov[2 + elem + offset].iov_base =
1076               ctype->mbdigits_act / 10;
1077             idx[elem + 1] = idx[elem] + sizeof (uint32_t);
1078             break;
1079
1080           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN):
1081             /* Align entries.  */
1082             iov[2 + elem + offset].iov_base = (void *) nulbytes;
1083             iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1084             idx[elem] += iov[2 + elem + offset].iov_len;
1085             ++offset;
1086
1087             iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
1088             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1089             *(uint32_t *) iov[2 + elem + offset].iov_base =
1090               ctype->wcdigits_act / 10;
1091             idx[elem + 1] = idx[elem] + sizeof (uint32_t);
1092             break;
1093
1094           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB):
1095             /* Compute the length of all possible characters.  For INDIGITS
1096                there might be more than one.  We simply concatenate all of
1097                them with a NUL byte following.  The NUL byte wouldn't be
1098                necessary but it makes it easier for the user.  */
1099             total = 0;
1100
1101             for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
1102                  cnt < ctype->mbdigits_act; cnt += 10)
1103               total += ctype->mbdigits[cnt]->nbytes + 1;
1104             iov[2 + elem + offset].iov_base = (char *) alloca (total);
1105             iov[2 + elem + offset].iov_len = total;
1106
1107             cp = iov[2 + elem + offset].iov_base;
1108             for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
1109                  cnt < ctype->mbdigits_act; cnt += 10)
1110               {
1111                 cp = mempcpy (cp, ctype->mbdigits[cnt]->bytes,
1112                               ctype->mbdigits[cnt]->nbytes);
1113                 *cp++ = '\0';
1114               }
1115             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1116             break;
1117
1118           case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB):
1119             /* Compute the length of all possible characters.  For INDIGITS
1120                there might be more than one.  We simply concatenate all of
1121                them with a NUL byte following.  The NUL byte wouldn't be
1122                necessary but it makes it easier for the user.  */
1123             cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB);
1124             total = ctype->mboutdigits[cnt]->nbytes + 1;
1125             iov[2 + elem + offset].iov_base = (char *) alloca (total);
1126             iov[2 + elem + offset].iov_len = total;
1127
1128             *(char *) mempcpy (iov[2 + elem + offset].iov_base,
1129                                ctype->mboutdigits[cnt]->bytes,
1130                                ctype->mboutdigits[cnt]->nbytes) = '\0';
1131             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1132             break;
1133
1134           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC):
1135             total = ctype->wcdigits_act / 10;
1136
1137             iov[2 + elem + offset].iov_base =
1138               (uint32_t *) alloca (total * sizeof (uint32_t));
1139             iov[2 + elem + offset].iov_len = total * sizeof (uint32_t);
1140
1141             for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC);
1142                  cnt < ctype->wcdigits_act; cnt += 10)
1143               ((uint32_t *) iov[2 + elem + offset].iov_base)[cnt / 10]
1144                 = ctype->wcdigits[cnt];
1145             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1146             break;
1147
1148           case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC):
1149             /* Align entries.  */
1150             iov[2 + elem + offset].iov_base = (void *) nulbytes;
1151             iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1152             idx[elem] += iov[2 + elem + offset].iov_len;
1153             ++offset;
1154             /* FALLTRHOUGH */
1155
1156           case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT1_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC):
1157             cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC);
1158             iov[2 + elem + offset].iov_base = &ctype->wcoutdigits[cnt];
1159             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1160             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1161             break;
1162
1163           case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN):
1164             /* Align entries.  */
1165             iov[2 + elem + offset].iov_base = (void *) nulbytes;
1166             iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1167             idx[elem] += iov[2 + elem + offset].iov_len;
1168             ++offset;
1169
1170             default_missing_len = (ctype->default_missing
1171                                    ? wcslen ((wchar_t *)ctype->default_missing)
1172                                    : 0);
1173             iov[2 + elem + offset].iov_base = &default_missing_len;
1174             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1175             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1176             break;
1177
1178           case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING):
1179             iov[2 + elem + offset].iov_base =
1180               ctype->default_missing ?: (uint32_t *) L"";
1181             iov[2 + elem + offset].iov_len =
1182               wcslen (iov[2 + elem + offset].iov_base) * sizeof (uint32_t);
1183             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1184             break;
1185
1186           case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE_LEN):
1187             /* Align entries.  */
1188             iov[2 + elem + offset].iov_base = (void *) nulbytes;
1189             iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1190             idx[elem] += iov[2 + elem + offset].iov_len;
1191             ++offset;
1192
1193             iov[2 + elem + offset].iov_base = &ctype->ntranslit_ignore;
1194             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1195             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1196             break;
1197
1198           case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE):
1199             {
1200               uint32_t *ranges = (uint32_t *) alloca (ctype->ntranslit_ignore
1201                                                       * 3 * sizeof (uint32_t));
1202               struct translit_ignore_t *runp;
1203
1204               iov[2 + elem + offset].iov_base = ranges;
1205               iov[2 + elem + offset].iov_len = (ctype->ntranslit_ignore
1206                                                 * 3 * sizeof (uint32_t));
1207
1208               for (runp = ctype->translit_ignore; runp != NULL;
1209                    runp = runp->next)
1210                 {
1211                   *ranges++ = runp->from;
1212                   *ranges++ = runp->to;
1213                   *ranges++ = runp->step;
1214                 }
1215             }
1216             /* Remove the following line in case a new entry is added
1217                after _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN.  */
1218             if (elem < nelems)
1219               idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1220             break;
1221
1222           default:
1223             assert (! "unknown CTYPE element");
1224           }
1225       else
1226         {
1227           /* Handle extra maps.  */
1228           size_t nr = elem - _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
1229           if (nr < ctype->nr_charclass)
1230             {
1231               iov[2 + elem + offset].iov_base = ctype->class_b[nr];
1232               iov[2 + elem + offset].iov_len = 256 / 32 * sizeof (uint32_t);
1233               idx[elem] += iov[2 + elem + offset].iov_len;
1234               ++offset;
1235
1236               iov[2 + elem + offset] = ctype->class_3level[nr];
1237             }
1238           else
1239             {
1240               nr -= ctype->nr_charclass;
1241               assert (nr < ctype->map_collection_nr);
1242               iov[2 + elem + offset] = ctype->map_3level[nr];
1243             }
1244           idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1245         }
1246     }
1247
1248   assert (2 + elem + offset == (nelems + 2 * ctype->nr_charclass
1249                                 + ctype->map_collection_nr + 4 + 2));
1250
1251   write_locale_data (output_path, LC_CTYPE, "LC_CTYPE", 2 + elem + offset,
1252                      iov);
1253 }
1254
1255
1256 /* Local functions.  */
1257 static void
1258 ctype_class_new (struct linereader *lr, struct locale_ctype_t *ctype,
1259                  const char *name)
1260 {
1261   size_t cnt;
1262
1263   for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
1264     if (strcmp (ctype->classnames[cnt], name) == 0)
1265       break;
1266
1267   if (cnt < ctype->nr_charclass)
1268     {
1269       lr_error (lr, _("character class `%s' already defined"), name);
1270       return;
1271     }
1272
1273   if (ctype->nr_charclass == MAX_NR_CHARCLASS)
1274     /* Exit code 2 is prescribed in P1003.2b.  */
1275     WITH_CUR_LOCALE (error (2, 0, _("\
1276 implementation limit: no more than %Zd character classes allowed"),
1277                             MAX_NR_CHARCLASS));
1278
1279   ctype->classnames[ctype->nr_charclass++] = name;
1280 }
1281
1282
1283 static void
1284 ctype_map_new (struct linereader *lr, struct locale_ctype_t *ctype,
1285                const char *name, const struct charmap_t *charmap)
1286 {
1287   size_t max_chars = 0;
1288   size_t cnt;
1289
1290   for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
1291     {
1292       if (strcmp (ctype->mapnames[cnt], name) == 0)
1293         break;
1294
1295       if (max_chars < ctype->map_collection_max[cnt])
1296         max_chars = ctype->map_collection_max[cnt];
1297     }
1298
1299   if (cnt < ctype->map_collection_nr)
1300     {
1301       lr_error (lr, _("character map `%s' already defined"), name);
1302       return;
1303     }
1304
1305   if (ctype->map_collection_nr == MAX_NR_CHARMAP)
1306     /* Exit code 2 is prescribed in P1003.2b.  */
1307     WITH_CUR_LOCALE (error (2, 0, _("\
1308 implementation limit: no more than %d character maps allowed"),
1309                             MAX_NR_CHARMAP));
1310
1311   ctype->mapnames[cnt] = name;
1312
1313   if (max_chars == 0)
1314     ctype->map_collection_max[cnt] = charmap->mb_cur_max == 1 ? 256 : 512;
1315   else
1316     ctype->map_collection_max[cnt] = max_chars;
1317
1318   ctype->map_collection[cnt] = (uint32_t *)
1319     xcalloc (sizeof (uint32_t), ctype->map_collection_max[cnt]);
1320   ctype->map_collection_act[cnt] = 256;
1321
1322   ++ctype->map_collection_nr;
1323 }
1324
1325
1326 /* We have to be prepared that TABLE, MAX, and ACT can be NULL.  This
1327    is possible if we only want to extend the name array.  */
1328 static uint32_t *
1329 find_idx (struct locale_ctype_t *ctype, uint32_t **table, size_t *max,
1330           size_t *act, uint32_t idx)
1331 {
1332   size_t cnt;
1333
1334   if (idx < 256)
1335     return table == NULL ? NULL : &(*table)[idx];
1336
1337   /* Use the charnames_idx lookup table instead of the slow search loop.  */
1338 #if 1
1339   cnt = idx_table_get (&ctype->charnames_idx, idx);
1340   if (cnt == EMPTY)
1341     /* Not found.  */
1342     cnt = ctype->charnames_act;
1343 #else
1344   for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
1345     if (ctype->charnames[cnt] == idx)
1346       break;
1347 #endif
1348
1349   /* We have to distinguish two cases: the name is found or not.  */
1350   if (cnt == ctype->charnames_act)
1351     {
1352       /* Extend the name array.  */
1353       if (ctype->charnames_act == ctype->charnames_max)
1354         {
1355           ctype->charnames_max *= 2;
1356           ctype->charnames = (uint32_t *)
1357             xrealloc (ctype->charnames,
1358                       sizeof (uint32_t) * ctype->charnames_max);
1359         }
1360       ctype->charnames[ctype->charnames_act++] = idx;
1361       idx_table_add (&ctype->charnames_idx, idx, cnt);
1362     }
1363
1364   if (table == NULL)
1365     /* We have done everything we are asked to do.  */
1366     return NULL;
1367
1368   if (max == NULL)
1369     /* The caller does not want to extend the table.  */
1370     return (cnt >= *act ? NULL : &(*table)[cnt]);
1371
1372   if (cnt >= *act)
1373     {
1374       if (cnt >= *max)
1375         {
1376           size_t old_max = *max;
1377           do
1378             *max *= 2;
1379           while (*max <= cnt);
1380
1381           *table =
1382             (uint32_t *) xrealloc (*table, *max * sizeof (uint32_t));
1383           memset (&(*table)[old_max], '\0',
1384                   (*max - old_max) * sizeof (uint32_t));
1385         }
1386
1387       *act = cnt + 1;
1388     }
1389
1390   return &(*table)[cnt];
1391 }
1392
1393
1394 static int
1395 get_character (struct token *now, const struct charmap_t *charmap,
1396                struct repertoire_t *repertoire,
1397                struct charseq **seqp, uint32_t *wchp)
1398 {
1399   if (now->tok == tok_bsymbol)
1400     {
1401       /* This will hopefully be the normal case.  */
1402       *wchp = repertoire_find_value (repertoire, now->val.str.startmb,
1403                                      now->val.str.lenmb);
1404       *seqp = charmap_find_value (charmap, now->val.str.startmb,
1405                                   now->val.str.lenmb);
1406     }
1407   else if (now->tok == tok_ucs4)
1408     {
1409       char utmp[10];
1410
1411       snprintf (utmp, sizeof (utmp), "U%08X", now->val.ucs4);
1412       *seqp = charmap_find_value (charmap, utmp, 9);
1413
1414       if (*seqp == NULL)
1415         *seqp = repertoire_find_seq (repertoire, now->val.ucs4);
1416
1417       if (*seqp == NULL)
1418         {
1419           /* Compute the value in the charmap from the UCS value.  */
1420           const char *symbol = repertoire_find_symbol (repertoire,
1421                                                        now->val.ucs4);
1422
1423           if (symbol == NULL)
1424             *seqp = NULL;
1425           else
1426             *seqp = charmap_find_value (charmap, symbol, strlen (symbol));
1427
1428           if (*seqp == NULL)
1429             {
1430               if (repertoire != NULL)
1431                 {
1432                   /* Insert a negative entry.  */
1433                   static const struct charseq negative
1434                     = { .ucs4 = ILLEGAL_CHAR_VALUE };
1435                   uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1436                                                   sizeof (uint32_t));
1437                   *newp = now->val.ucs4;
1438
1439                   insert_entry (&repertoire->seq_table, newp,
1440                                 sizeof (uint32_t), (void *) &negative);
1441                 }
1442             }
1443           else
1444             (*seqp)->ucs4 = now->val.ucs4;
1445         }
1446       else if ((*seqp)->ucs4 != now->val.ucs4)
1447         *seqp = NULL;
1448
1449       *wchp = now->val.ucs4;
1450     }
1451   else if (now->tok == tok_charcode)
1452     {
1453       /* We must map from the byte code to UCS4.  */
1454       *seqp = charmap_find_symbol (charmap, now->val.str.startmb,
1455                                    now->val.str.lenmb);
1456
1457       if (*seqp == NULL)
1458         *wchp = ILLEGAL_CHAR_VALUE;
1459       else
1460         {
1461           if ((*seqp)->ucs4 == UNINITIALIZED_CHAR_VALUE)
1462             (*seqp)->ucs4 = repertoire_find_value (repertoire, (*seqp)->name,
1463                                                    strlen ((*seqp)->name));
1464           *wchp = (*seqp)->ucs4;
1465         }
1466     }
1467   else
1468     return 1;
1469
1470   return 0;
1471 }
1472
1473
1474 /* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and
1475    the .(2). counterparts.  */
1476 static void
1477 charclass_symbolic_ellipsis (struct linereader *ldfile,
1478                              struct locale_ctype_t *ctype,
1479                              const struct charmap_t *charmap,
1480                              struct repertoire_t *repertoire,
1481                              struct token *now,
1482                              const char *last_str,
1483                              unsigned long int class256_bit,
1484                              unsigned long int class_bit, int base,
1485                              int ignore_content, int handle_digits, int step)
1486 {
1487   const char *nowstr = now->val.str.startmb;
1488   char tmp[now->val.str.lenmb + 1];
1489   const char *cp;
1490   char *endp;
1491   unsigned long int from;
1492   unsigned long int to;
1493
1494   /* We have to compute the ellipsis values using the symbolic names.  */
1495   assert (last_str != NULL);
1496
1497   if (strlen (last_str) != now->val.str.lenmb)
1498     {
1499     invalid_range:
1500       lr_error (ldfile,
1501                 _("`%s' and `%.*s' are not valid names for symbolic range"),
1502                 last_str, (int) now->val.str.lenmb, nowstr);
1503       return;
1504     }
1505
1506   if (memcmp (last_str, nowstr, now->val.str.lenmb) == 0)
1507     /* Nothing to do, the names are the same.  */
1508     return;
1509
1510   for (cp = last_str; *cp == *(nowstr + (cp - last_str)); ++cp)
1511     ;
1512
1513   errno = 0;
1514   from = strtoul (cp, &endp, base);
1515   if ((from == UINT_MAX && errno == ERANGE) || *endp != '\0')
1516     goto invalid_range;
1517
1518   to = strtoul (nowstr + (cp - last_str), &endp, base);
1519   if ((to == UINT_MAX && errno == ERANGE)
1520       || (endp - nowstr) != now->val.str.lenmb || from >= to)
1521     goto invalid_range;
1522
1523   /* OK, we have a range FROM - TO.  Now we can create the symbolic names.  */
1524   if (!ignore_content)
1525     {
1526       now->val.str.startmb = tmp;
1527       while ((from += step) <= to)
1528         {
1529           struct charseq *seq;
1530           uint32_t wch;
1531
1532           sprintf (tmp, (base == 10 ? "%.*s%0*ld" : "%.*s%0*lX"),
1533                    (int) (cp - last_str), last_str,
1534                    (int) (now->val.str.lenmb - (cp - last_str)),
1535                    from);
1536
1537           get_character (now, charmap, repertoire, &seq, &wch);
1538
1539           if (seq != NULL && seq->nbytes == 1)
1540             /* Yep, we can store information about this byte sequence.  */
1541             ctype->class256_collection[seq->bytes[0]] |= class256_bit;
1542
1543           if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1544             /* We have the UCS4 position.  */
1545             *find_idx (ctype, &ctype->class_collection,
1546                        &ctype->class_collection_max,
1547                        &ctype->class_collection_act, wch) |= class_bit;
1548
1549           if (handle_digits == 1)
1550             {
1551               /* We must store the digit values.  */
1552               if (ctype->mbdigits_act == ctype->mbdigits_max)
1553                 {
1554                   ctype->mbdigits_max *= 2;
1555                   ctype->mbdigits = xrealloc (ctype->mbdigits,
1556                                               (ctype->mbdigits_max
1557                                                * sizeof (char *)));
1558                   ctype->wcdigits_max *= 2;
1559                   ctype->wcdigits = xrealloc (ctype->wcdigits,
1560                                               (ctype->wcdigits_max
1561                                                * sizeof (uint32_t)));
1562                 }
1563
1564               ctype->mbdigits[ctype->mbdigits_act++] = seq;
1565               ctype->wcdigits[ctype->wcdigits_act++] = wch;
1566             }
1567           else if (handle_digits == 2)
1568             {
1569               /* We must store the digit values.  */
1570               if (ctype->outdigits_act >= 10)
1571                 {
1572                   lr_error (ldfile, _("\
1573 %s: field `%s' does not contain exactly ten entries"),
1574                             "LC_CTYPE", "outdigit");
1575                   return;
1576                 }
1577
1578               ctype->mboutdigits[ctype->outdigits_act] = seq;
1579               ctype->wcoutdigits[ctype->outdigits_act] = wch;
1580               ++ctype->outdigits_act;
1581             }
1582         }
1583     }
1584 }
1585
1586
1587 /* Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'.  */
1588 static void
1589 charclass_ucs4_ellipsis (struct linereader *ldfile,
1590                          struct locale_ctype_t *ctype,
1591                          const struct charmap_t *charmap,
1592                          struct repertoire_t *repertoire,
1593                          struct token *now, uint32_t last_wch,
1594                          unsigned long int class256_bit,
1595                          unsigned long int class_bit, int ignore_content,
1596                          int handle_digits, int step)
1597 {
1598   if (last_wch > now->val.ucs4)
1599     {
1600       lr_error (ldfile, _("\
1601 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1602                 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, now->val.ucs4,
1603                 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, last_wch);
1604       return;
1605     }
1606
1607   if (!ignore_content)
1608     while ((last_wch += step) <= now->val.ucs4)
1609       {
1610         /* We have to find out whether there is a byte sequence corresponding
1611            to this UCS4 value.  */
1612         struct charseq *seq;
1613         char utmp[10];
1614
1615         snprintf (utmp, sizeof (utmp), "U%08X", last_wch);
1616         seq = charmap_find_value (charmap, utmp, 9);
1617         if (seq == NULL)
1618           {
1619             snprintf (utmp, sizeof (utmp), "U%04X", last_wch);
1620             seq = charmap_find_value (charmap, utmp, 5);
1621           }
1622
1623         if (seq == NULL)
1624           /* Try looking in the repertoire map.  */
1625           seq = repertoire_find_seq (repertoire, last_wch);
1626
1627         /* If this is the first time we look for this sequence create a new
1628            entry.  */
1629         if (seq == NULL)
1630           {
1631             static const struct charseq negative
1632               = { .ucs4 = ILLEGAL_CHAR_VALUE };
1633
1634             /* Find the symbolic name for this UCS4 value.  */
1635             if (repertoire != NULL)
1636               {
1637                 const char *symbol = repertoire_find_symbol (repertoire,
1638                                                              last_wch);
1639                 uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1640                                                 sizeof (uint32_t));
1641                 *newp = last_wch;
1642
1643                 if (symbol != NULL)
1644                   /* We have a name, now search the multibyte value.  */
1645                   seq = charmap_find_value (charmap, symbol, strlen (symbol));
1646
1647                 if (seq == NULL)
1648                   /* We have to create a fake entry.  */
1649                   seq = (struct charseq *) &negative;
1650                 else
1651                   seq->ucs4 = last_wch;
1652
1653                 insert_entry (&repertoire->seq_table, newp, sizeof (uint32_t),
1654                               seq);
1655               }
1656             else
1657               /* We have to create a fake entry.  */
1658               seq = (struct charseq *) &negative;
1659           }
1660
1661         /* We have a name, now search the multibyte value.  */
1662         if (seq->ucs4 == last_wch && seq->nbytes == 1)
1663           /* Yep, we can store information about this byte sequence.  */
1664           ctype->class256_collection[(size_t) seq->bytes[0]]
1665             |= class256_bit;
1666
1667         /* And of course we have the UCS4 position.  */
1668         if (class_bit != 0)
1669           *find_idx (ctype, &ctype->class_collection,
1670                      &ctype->class_collection_max,
1671                      &ctype->class_collection_act, last_wch) |= class_bit;
1672
1673         if (handle_digits == 1)
1674           {
1675             /* We must store the digit values.  */
1676             if (ctype->mbdigits_act == ctype->mbdigits_max)
1677               {
1678                 ctype->mbdigits_max *= 2;
1679                 ctype->mbdigits = xrealloc (ctype->mbdigits,
1680                                             (ctype->mbdigits_max
1681                                              * sizeof (char *)));
1682                 ctype->wcdigits_max *= 2;
1683                 ctype->wcdigits = xrealloc (ctype->wcdigits,
1684                                             (ctype->wcdigits_max
1685                                              * sizeof (uint32_t)));
1686               }
1687
1688             ctype->mbdigits[ctype->mbdigits_act++] = (seq->ucs4 == last_wch
1689                                                       ? seq : NULL);
1690             ctype->wcdigits[ctype->wcdigits_act++] = last_wch;
1691           }
1692         else if (handle_digits == 2)
1693           {
1694             /* We must store the digit values.  */
1695             if (ctype->outdigits_act >= 10)
1696               {
1697                 lr_error (ldfile, _("\
1698 %s: field `%s' does not contain exactly ten entries"),
1699                           "LC_CTYPE", "outdigit");
1700                 return;
1701               }
1702
1703             ctype->mboutdigits[ctype->outdigits_act] = (seq->ucs4 == last_wch
1704                                                         ? seq : NULL);
1705             ctype->wcoutdigits[ctype->outdigits_act] = last_wch;
1706             ++ctype->outdigits_act;
1707           }
1708       }
1709 }
1710
1711
1712 /* Ellipsis as in `/xea/x12.../xea/x34'.  */
1713 static void
1714 charclass_charcode_ellipsis (struct linereader *ldfile,
1715                              struct locale_ctype_t *ctype,
1716                              const struct charmap_t *charmap,
1717                              struct repertoire_t *repertoire,
1718                              struct token *now, char *last_charcode,
1719                              uint32_t last_charcode_len,
1720                              unsigned long int class256_bit,
1721                              unsigned long int class_bit, int ignore_content,
1722                              int handle_digits)
1723 {
1724   /* First check whether the to-value is larger.  */
1725   if (now->val.charcode.nbytes != last_charcode_len)
1726     {
1727       lr_error (ldfile, _("\
1728 start and end character sequence of range must have the same length"));
1729       return;
1730     }
1731
1732   if (memcmp (last_charcode, now->val.charcode.bytes, last_charcode_len) > 0)
1733     {
1734       lr_error (ldfile, _("\
1735 to-value character sequence is smaller than from-value sequence"));
1736       return;
1737     }
1738
1739   if (!ignore_content)
1740     {
1741       do
1742         {
1743           /* Increment the byte sequence value.  */
1744           struct charseq *seq;
1745           uint32_t wch;
1746           int i;
1747
1748           for (i = last_charcode_len - 1; i >= 0; --i)
1749             if (++last_charcode[i] != 0)
1750               break;
1751
1752           if (last_charcode_len == 1)
1753             /* Of course we have the charcode value.  */
1754             ctype->class256_collection[(size_t) last_charcode[0]]
1755               |= class256_bit;
1756
1757           /* Find the symbolic name.  */
1758           seq = charmap_find_symbol (charmap, last_charcode,
1759                                      last_charcode_len);
1760           if (seq != NULL)
1761             {
1762               if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1763                 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1764                                                    strlen (seq->name));
1765               wch = seq == NULL ? ILLEGAL_CHAR_VALUE : seq->ucs4;
1766
1767               if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1768                 *find_idx (ctype, &ctype->class_collection,
1769                            &ctype->class_collection_max,
1770                            &ctype->class_collection_act, wch) |= class_bit;
1771             }
1772           else
1773             wch = ILLEGAL_CHAR_VALUE;
1774
1775           if (handle_digits == 1)
1776             {
1777               /* We must store the digit values.  */
1778               if (ctype->mbdigits_act == ctype->mbdigits_max)
1779                 {
1780                   ctype->mbdigits_max *= 2;
1781                   ctype->mbdigits = xrealloc (ctype->mbdigits,
1782                                               (ctype->mbdigits_max
1783                                                * sizeof (char *)));
1784                   ctype->wcdigits_max *= 2;
1785                   ctype->wcdigits = xrealloc (ctype->wcdigits,
1786                                               (ctype->wcdigits_max
1787                                                * sizeof (uint32_t)));
1788                 }
1789
1790               seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1791               memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1792               seq->nbytes = last_charcode_len;
1793
1794               ctype->mbdigits[ctype->mbdigits_act++] = seq;
1795               ctype->wcdigits[ctype->wcdigits_act++] = wch;
1796             }
1797           else if (handle_digits == 2)
1798             {
1799               struct charseq *seq;
1800               /* We must store the digit values.  */
1801               if (ctype->outdigits_act >= 10)
1802                 {
1803                   lr_error (ldfile, _("\
1804 %s: field `%s' does not contain exactly ten entries"),
1805                             "LC_CTYPE", "outdigit");
1806                   return;
1807                 }
1808
1809               seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1810               memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1811               seq->nbytes = last_charcode_len;
1812
1813               ctype->mboutdigits[ctype->outdigits_act] = seq;
1814               ctype->wcoutdigits[ctype->outdigits_act] = wch;
1815               ++ctype->outdigits_act;
1816             }
1817         }
1818       while (memcmp (last_charcode, now->val.charcode.bytes,
1819                      last_charcode_len) != 0);
1820     }
1821 }
1822
1823
1824 static uint32_t *
1825 find_translit2 (struct locale_ctype_t *ctype, const struct charmap_t *charmap,
1826                 uint32_t wch)
1827 {
1828   struct translit_t *trunp = ctype->translit;
1829   struct translit_ignore_t *tirunp = ctype->translit_ignore;
1830
1831   while (trunp != NULL)
1832     {
1833       /* XXX We simplify things here.  The transliterations we look
1834          for are only allowed to have one character.  */
1835       if (trunp->from[0] == wch && trunp->from[1] == 0)
1836         {
1837           /* Found it.  Now look for a transliteration which can be
1838              represented with the character set.  */
1839           struct translit_to_t *torunp = trunp->to;
1840
1841           while (torunp != NULL)
1842             {
1843               int i;
1844
1845               for (i = 0; torunp->str[i] != 0; ++i)
1846                 {
1847                   char utmp[10];
1848
1849                   snprintf (utmp, sizeof (utmp), "U%08X", torunp->str[i]);
1850                   if (charmap_find_value (charmap, utmp, 9) == NULL)
1851                     /* This character cannot be represented.  */
1852                     break;
1853                 }
1854
1855               if (torunp->str[i] == 0)
1856                 return torunp->str;
1857
1858               torunp = torunp->next;
1859             }
1860
1861           break;
1862         }
1863
1864       trunp = trunp->next;
1865     }
1866
1867   /* Check for ignored chars.  */
1868   while (tirunp != NULL)
1869     {
1870       if (tirunp->from <= wch && tirunp->to >= wch)
1871         {
1872           uint32_t wi;
1873
1874           for (wi = tirunp->from; wi <= wch; wi += tirunp->step)
1875             if (wi == wch)
1876               return (uint32_t []) { 0 };
1877         }
1878     }
1879
1880   /* Nothing found.  */
1881   return NULL;
1882 }
1883
1884
1885 uint32_t *
1886 find_translit (struct localedef_t *locale, const struct charmap_t *charmap,
1887                uint32_t wch)
1888 {
1889   struct locale_ctype_t *ctype;
1890   uint32_t *result = NULL;
1891
1892   assert (locale != NULL);
1893   ctype = locale->categories[LC_CTYPE].ctype;
1894
1895   if (ctype == NULL)
1896     return NULL;
1897
1898   if (ctype->translit != NULL)
1899     result = find_translit2 (ctype, charmap, wch);
1900
1901   if (result == NULL)
1902     {
1903       struct translit_include_t *irunp = ctype->translit_include;
1904
1905       while (irunp != NULL && result == NULL)
1906         {
1907           result = find_translit (find_locale (CTYPE_LOCALE,
1908                                                irunp->copy_locale,
1909                                                irunp->copy_repertoire,
1910                                                charmap),
1911                                   charmap, wch);
1912           irunp = irunp->next;
1913         }
1914     }
1915
1916   return result;
1917 }
1918
1919
1920 /* Read one transliteration entry.  */
1921 static uint32_t *
1922 read_widestring (struct linereader *ldfile, struct token *now,
1923                  const struct charmap_t *charmap,
1924                  struct repertoire_t *repertoire)
1925 {
1926   uint32_t *wstr;
1927
1928   if (now->tok == tok_default_missing)
1929     /* The special name "" will denote this case.  */
1930     wstr = ((uint32_t *) { 0 });
1931   else if (now->tok == tok_bsymbol)
1932     {
1933       /* Get the value from the repertoire.  */
1934       wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1935       wstr[0] = repertoire_find_value (repertoire, now->val.str.startmb,
1936                                        now->val.str.lenmb);
1937       if (wstr[0] == ILLEGAL_CHAR_VALUE)
1938         {
1939           /* We cannot proceed, we don't know the UCS4 value.  */
1940           free (wstr);
1941           return NULL;
1942         }
1943
1944       wstr[1] = 0;
1945     }
1946   else if (now->tok == tok_ucs4)
1947     {
1948       wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1949       wstr[0] = now->val.ucs4;
1950       wstr[1] = 0;
1951     }
1952   else if (now->tok == tok_charcode)
1953     {
1954       /* Argh, we have to convert to the symbol name first and then to the
1955          UCS4 value.  */
1956       struct charseq *seq = charmap_find_symbol (charmap,
1957                                                  now->val.str.startmb,
1958                                                  now->val.str.lenmb);
1959       if (seq == NULL)
1960         /* Cannot find the UCS4 value.  */
1961         return NULL;
1962
1963       if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1964         seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1965                                            strlen (seq->name));
1966       if (seq->ucs4 == ILLEGAL_CHAR_VALUE)
1967         /* We cannot proceed, we don't know the UCS4 value.  */
1968         return NULL;
1969
1970       wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1971       wstr[0] = seq->ucs4;
1972       wstr[1] = 0;
1973     }
1974   else if (now->tok == tok_string)
1975     {
1976       wstr = now->val.str.startwc;
1977       if (wstr == NULL || wstr[0] == 0)
1978         return NULL;
1979     }
1980   else
1981     {
1982       if (now->tok != tok_eol && now->tok != tok_eof)
1983         lr_ignore_rest (ldfile, 0);
1984       SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
1985       return (uint32_t *) -1l;
1986     }
1987
1988   return wstr;
1989 }
1990
1991
1992 static void
1993 read_translit_entry (struct linereader *ldfile, struct locale_ctype_t *ctype,
1994                      struct token *now, const struct charmap_t *charmap,
1995                      struct repertoire_t *repertoire)
1996 {
1997   uint32_t *from_wstr = read_widestring (ldfile, now, charmap, repertoire);
1998   struct translit_t *result;
1999   struct translit_to_t **top;
2000   struct obstack *ob = &ctype->mempool;
2001   int first;
2002   int ignore;
2003
2004   if (from_wstr == NULL)
2005     /* There is no valid from string.  */
2006     return;
2007
2008   result = (struct translit_t *) obstack_alloc (ob,
2009                                                 sizeof (struct translit_t));
2010   result->from = from_wstr;
2011   result->fname = ldfile->fname;
2012   result->lineno = ldfile->lineno;
2013   result->next = NULL;
2014   result->to = NULL;
2015   top = &result->to;
2016   first = 1;
2017   ignore = 0;
2018
2019   while (1)
2020     {
2021       uint32_t *to_wstr;
2022
2023       /* Next we have one or more transliterations.  They are
2024          separated by semicolons.  */
2025       now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2026
2027       if (!first && (now->tok == tok_semicolon || now->tok == tok_eol))
2028         {
2029           /* One string read.  */
2030           const uint32_t zero = 0;
2031
2032           if (!ignore)
2033             {
2034               obstack_grow (ob, &zero, 4);
2035               to_wstr = obstack_finish (ob);
2036
2037               *top = obstack_alloc (ob, sizeof (struct translit_to_t));
2038               (*top)->str = to_wstr;
2039               (*top)->next = NULL;
2040             }
2041
2042           if (now->tok == tok_eol)
2043             {
2044               result->next = ctype->translit;
2045               ctype->translit = result;
2046               return;
2047             }
2048
2049           if (!ignore)
2050             top = &(*top)->next;
2051           ignore = 0;
2052         }
2053       else
2054         {
2055           to_wstr = read_widestring (ldfile, now, charmap, repertoire);
2056           if (to_wstr == (uint32_t *) -1l)
2057             {
2058               /* An error occurred.  */
2059               obstack_free (ob, result);
2060               return;
2061             }
2062
2063           if (to_wstr == NULL)
2064             ignore = 1;
2065           else
2066             /* This value is usable.  */
2067             obstack_grow (ob, to_wstr, wcslen ((wchar_t *) to_wstr) * 4);
2068
2069           first = 0;
2070         }
2071     }
2072 }
2073
2074
2075 static void
2076 read_translit_ignore_entry (struct linereader *ldfile,
2077                             struct locale_ctype_t *ctype,
2078                             const struct charmap_t *charmap,
2079                             struct repertoire_t *repertoire)
2080 {
2081   /* We expect a semicolon-separated list of characters we ignore.  We are
2082      only interested in the wide character definitions.  These must be
2083      single characters, possibly defining a range when an ellipsis is used.  */
2084   while (1)
2085     {
2086       struct token *now = lr_token (ldfile, charmap, NULL, repertoire,
2087                                     verbose);
2088       struct translit_ignore_t *newp;
2089       uint32_t from;
2090
2091       if (now->tok == tok_eol || now->tok == tok_eof)
2092         {
2093           lr_error (ldfile,
2094                     _("premature end of `translit_ignore' definition"));
2095           return;
2096         }
2097
2098       if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
2099         {
2100           lr_error (ldfile, _("syntax error"));
2101           lr_ignore_rest (ldfile, 0);
2102           return;
2103         }
2104
2105       if (now->tok == tok_ucs4)
2106         from = now->val.ucs4;
2107       else
2108         /* Try to get the value.  */
2109         from = repertoire_find_value (repertoire, now->val.str.startmb,
2110                                       now->val.str.lenmb);
2111
2112       if (from == ILLEGAL_CHAR_VALUE)
2113         {
2114           lr_error (ldfile, "invalid character name");
2115           newp = NULL;
2116         }
2117       else
2118         {
2119           newp = (struct translit_ignore_t *)
2120             obstack_alloc (&ctype->mempool, sizeof (struct translit_ignore_t));
2121           newp->from = from;
2122           newp->to = from;
2123           newp->step = 1;
2124
2125           newp->next = ctype->translit_ignore;
2126           ctype->translit_ignore = newp;
2127         }
2128
2129       /* Now we expect either a semicolon, an ellipsis, or the end of the
2130          line.  */
2131       now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2132
2133       if (now->tok == tok_ellipsis2 || now->tok == tok_ellipsis2_2)
2134         {
2135           /* XXX Should we bother implementing `....'?  `...' certainly
2136              will not be implemented.  */
2137           uint32_t to;
2138           int step = now->tok == tok_ellipsis2_2 ? 2 : 1;
2139
2140           now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2141
2142           if (now->tok == tok_eol || now->tok == tok_eof)
2143             {
2144               lr_error (ldfile,
2145                         _("premature end of `translit_ignore' definition"));
2146               return;
2147             }
2148
2149           if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
2150             {
2151               lr_error (ldfile, _("syntax error"));
2152               lr_ignore_rest (ldfile, 0);
2153               return;
2154             }
2155
2156           if (now->tok == tok_ucs4)
2157             to = now->val.ucs4;
2158           else
2159             /* Try to get the value.  */
2160             to = repertoire_find_value (repertoire, now->val.str.startmb,
2161                                         now->val.str.lenmb);
2162
2163           if (to == ILLEGAL_CHAR_VALUE)
2164             lr_error (ldfile, "invalid character name");
2165           else
2166             {
2167               /* Make sure the `to'-value is larger.  */
2168               if (to >= from)
2169                 {
2170                   newp->to = to;
2171                   newp->step = step;
2172                 }
2173               else
2174                 lr_error (ldfile, _("\
2175 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
2176                           (to | from) < 65536 ? 4 : 8, to,
2177                           (to | from) < 65536 ? 4 : 8, from);
2178             }
2179
2180           /* And the next token.  */
2181           now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2182         }
2183
2184       if (now->tok == tok_eol || now->tok == tok_eof)
2185         /* We are done.  */
2186         return;
2187
2188       if (now->tok == tok_semicolon)
2189         /* Next round.  */
2190         continue;
2191
2192       /* If we come here something is wrong.  */
2193       lr_error (ldfile, _("syntax error"));
2194       lr_ignore_rest (ldfile, 0);
2195       return;
2196     }
2197 }
2198
2199
2200 /* The parser for the LC_CTYPE section of the locale definition.  */
2201 void
2202 ctype_read (struct linereader *ldfile, struct localedef_t *result,
2203             const struct charmap_t *charmap, const char *repertoire_name,
2204             int ignore_content)
2205 {
2206   struct repertoire_t *repertoire = NULL;
2207   struct locale_ctype_t *ctype;
2208   struct token *now;
2209   enum token_t nowtok;
2210   size_t cnt;
2211   uint32_t last_wch = 0;
2212   enum token_t last_token;
2213   enum token_t ellipsis_token;
2214   int step;
2215   char last_charcode[16];
2216   size_t last_charcode_len = 0;
2217   const char *last_str = NULL;
2218   int mapidx;
2219   struct localedef_t *copy_locale = NULL;
2220
2221   /* Get the repertoire we have to use.  */
2222   if (repertoire_name != NULL)
2223     repertoire = repertoire_read (repertoire_name);
2224
2225   /* The rest of the line containing `LC_CTYPE' must be free.  */
2226   lr_ignore_rest (ldfile, 1);
2227
2228
2229   do
2230     {
2231       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2232       nowtok = now->tok;
2233     }
2234   while (nowtok == tok_eol);
2235
2236   /* If we see `copy' now we are almost done.  */
2237   if (nowtok == tok_copy)
2238     {
2239       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2240       if (now->tok != tok_string)
2241         {
2242           SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2243
2244         skip_category:
2245           do
2246             now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2247           while (now->tok != tok_eof && now->tok != tok_end);
2248
2249           if (now->tok != tok_eof
2250               || (now = lr_token (ldfile, charmap, NULL, NULL, verbose),
2251                   now->tok == tok_eof))
2252             lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
2253           else if (now->tok != tok_lc_ctype)
2254             {
2255               lr_error (ldfile, _("\
2256 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2257               lr_ignore_rest (ldfile, 0);
2258             }
2259           else
2260             lr_ignore_rest (ldfile, 1);
2261
2262           return;
2263         }
2264
2265       if (! ignore_content)
2266         {
2267           /* Get the locale definition.  */
2268           copy_locale = load_locale (LC_CTYPE, now->val.str.startmb,
2269                                      repertoire_name, charmap, NULL);
2270           if ((copy_locale->avail & CTYPE_LOCALE) == 0)
2271             {
2272               /* Not yet loaded.  So do it now.  */
2273               if (locfile_read (copy_locale, charmap) != 0)
2274                 goto skip_category;
2275             }
2276
2277           if (copy_locale->categories[LC_CTYPE].ctype == NULL)
2278             return;
2279         }
2280
2281       lr_ignore_rest (ldfile, 1);
2282
2283       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2284       nowtok = now->tok;
2285     }
2286
2287   /* Prepare the data structures.  */
2288   ctype_startup (ldfile, result, charmap, copy_locale, ignore_content);
2289   ctype = result->categories[LC_CTYPE].ctype;
2290
2291   /* Remember the repertoire we use.  */
2292   if (!ignore_content)
2293     ctype->repertoire = repertoire;
2294
2295   while (1)
2296     {
2297       unsigned long int class_bit = 0;
2298       unsigned long int class256_bit = 0;
2299       int handle_digits = 0;
2300
2301       /* Of course we don't proceed beyond the end of file.  */
2302       if (nowtok == tok_eof)
2303         break;
2304
2305       /* Ingore empty lines.  */
2306       if (nowtok == tok_eol)
2307         {
2308           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2309           nowtok = now->tok;
2310           continue;
2311         }
2312
2313       switch (nowtok)
2314         {
2315         case tok_charclass:
2316           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2317           while (now->tok == tok_ident || now->tok == tok_string)
2318             {
2319               ctype_class_new (ldfile, ctype, now->val.str.startmb);
2320               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2321               if (now->tok != tok_semicolon)
2322                 break;
2323               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2324             }
2325           if (now->tok != tok_eol)
2326             SYNTAX_ERROR (_("\
2327 %s: syntax error in definition of new character class"), "LC_CTYPE");
2328           break;
2329
2330         case tok_charconv:
2331           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2332           while (now->tok == tok_ident || now->tok == tok_string)
2333             {
2334               ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2335               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2336               if (now->tok != tok_semicolon)
2337                 break;
2338               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2339             }
2340           if (now->tok != tok_eol)
2341             SYNTAX_ERROR (_("\
2342 %s: syntax error in definition of new character map"), "LC_CTYPE");
2343           break;
2344
2345         case tok_class:
2346           /* Ignore the rest of the line if we don't need the input of
2347              this line.  */
2348           if (ignore_content)
2349             {
2350               lr_ignore_rest (ldfile, 0);
2351               break;
2352             }
2353
2354           /* We simply forget the `class' keyword and use the following
2355              operand to determine the bit.  */
2356           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2357           if (now->tok == tok_ident || now->tok == tok_string)
2358             {
2359               /* Must can be one of the predefined class names.  */
2360               for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2361                 if (strcmp (ctype->classnames[cnt], now->val.str.startmb) == 0)
2362                   break;
2363               if (cnt >= ctype->nr_charclass)
2364                 {
2365 #ifdef PREDEFINED_CLASSES
2366                   if (now->val.str.lenmb == 8
2367                       && memcmp ("special1", now->val.str.startmb, 8) == 0)
2368                     class_bit = _ISwspecial1;
2369                   else if (now->val.str.lenmb == 8
2370                       && memcmp ("special2", now->val.str.startmb, 8) == 0)
2371                     class_bit = _ISwspecial2;
2372                   else if (now->val.str.lenmb == 8
2373                       && memcmp ("special3", now->val.str.startmb, 8) == 0)
2374                     class_bit = _ISwspecial3;
2375                   else
2376 #endif
2377                     {
2378                       /* OK, it's a new class.  */
2379                       ctype_class_new (ldfile, ctype, now->val.str.startmb);
2380
2381                       class_bit = _ISwbit (ctype->nr_charclass - 1);
2382                     }
2383                 }
2384               else
2385                 {
2386                   class_bit = _ISwbit (cnt);
2387
2388                   free (now->val.str.startmb);
2389                 }
2390             }
2391           else if (now->tok == tok_digit)
2392             goto handle_tok_digit;
2393           else if (now->tok < tok_upper || now->tok > tok_blank)
2394             goto err_label;
2395           else
2396             {
2397               class_bit = BITw (now->tok);
2398               class256_bit = BIT (now->tok);
2399             }
2400
2401           /* The next character must be a semicolon.  */
2402           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2403           if (now->tok != tok_semicolon)
2404             goto err_label;
2405           goto read_charclass;
2406
2407         case tok_upper:
2408         case tok_lower:
2409         case tok_alpha:
2410         case tok_alnum:
2411         case tok_space:
2412         case tok_cntrl:
2413         case tok_punct:
2414         case tok_graph:
2415         case tok_print:
2416         case tok_xdigit:
2417         case tok_blank:
2418           /* Ignore the rest of the line if we don't need the input of
2419              this line.  */
2420           if (ignore_content)
2421             {
2422               lr_ignore_rest (ldfile, 0);
2423               break;
2424             }
2425
2426           class_bit = BITw (now->tok);
2427           class256_bit = BIT (now->tok);
2428           handle_digits = 0;
2429         read_charclass:
2430           ctype->class_done |= class_bit;
2431           last_token = tok_none;
2432           ellipsis_token = tok_none;
2433           step = 1;
2434           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2435           while (now->tok != tok_eol && now->tok != tok_eof)
2436             {
2437               uint32_t wch;
2438               struct charseq *seq;
2439
2440               if (ellipsis_token == tok_none)
2441                 {
2442                   if (get_character (now, charmap, repertoire, &seq, &wch))
2443                     goto err_label;
2444
2445                   if (!ignore_content && seq != NULL && seq->nbytes == 1)
2446                     /* Yep, we can store information about this byte
2447                        sequence.  */
2448                     ctype->class256_collection[seq->bytes[0]] |= class256_bit;
2449
2450                   if (!ignore_content && wch != ILLEGAL_CHAR_VALUE
2451                       && class_bit != 0)
2452                     /* We have the UCS4 position.  */
2453                     *find_idx (ctype, &ctype->class_collection,
2454                                &ctype->class_collection_max,
2455                                &ctype->class_collection_act, wch) |= class_bit;
2456
2457                   last_token = now->tok;
2458                   /* Terminate the string.  */
2459                   if (last_token == tok_bsymbol)
2460                     {
2461                       now->val.str.startmb[now->val.str.lenmb] = '\0';
2462                       last_str = now->val.str.startmb;
2463                     }
2464                   else
2465                     last_str = NULL;
2466                   last_wch = wch;
2467                   memcpy (last_charcode, now->val.charcode.bytes, 16);
2468                   last_charcode_len = now->val.charcode.nbytes;
2469
2470                   if (!ignore_content && handle_digits == 1)
2471                     {
2472                       /* We must store the digit values.  */
2473                       if (ctype->mbdigits_act == ctype->mbdigits_max)
2474                         {
2475                           ctype->mbdigits_max += 10;
2476                           ctype->mbdigits = xrealloc (ctype->mbdigits,
2477                                                       (ctype->mbdigits_max
2478                                                        * sizeof (char *)));
2479                           ctype->wcdigits_max += 10;
2480                           ctype->wcdigits = xrealloc (ctype->wcdigits,
2481                                                       (ctype->wcdigits_max
2482                                                        * sizeof (uint32_t)));
2483                         }
2484
2485                       ctype->mbdigits[ctype->mbdigits_act++] = seq;
2486                       ctype->wcdigits[ctype->wcdigits_act++] = wch;
2487                     }
2488                   else if (!ignore_content && handle_digits == 2)
2489                     {
2490                       /* We must store the digit values.  */
2491                       if (ctype->outdigits_act >= 10)
2492                         {
2493                           lr_error (ldfile, _("\
2494 %s: field `%s' does not contain exactly ten entries"),
2495                             "LC_CTYPE", "outdigit");
2496                           lr_ignore_rest (ldfile, 0);
2497                           break;
2498                         }
2499
2500                       ctype->mboutdigits[ctype->outdigits_act] = seq;
2501                       ctype->wcoutdigits[ctype->outdigits_act] = wch;
2502                       ++ctype->outdigits_act;
2503                     }
2504                 }
2505               else
2506                 {
2507                   /* Now it gets complicated.  We have to resolve the
2508                      ellipsis problem.  First we must distinguish between
2509                      the different kind of ellipsis and this must match the
2510                      tokens we have seen.  */
2511                   assert (last_token != tok_none);
2512
2513                   if (last_token != now->tok)
2514                     {
2515                       lr_error (ldfile, _("\
2516 ellipsis range must be marked by two operands of same type"));
2517                       lr_ignore_rest (ldfile, 0);
2518                       break;
2519                     }
2520
2521                   if (last_token == tok_bsymbol)
2522                     {
2523                       if (ellipsis_token == tok_ellipsis3)
2524                         lr_error (ldfile, _("with symbolic name range values \
2525 the absolute ellipsis `...' must not be used"));
2526
2527                       charclass_symbolic_ellipsis (ldfile, ctype, charmap,
2528                                                    repertoire, now, last_str,
2529                                                    class256_bit, class_bit,
2530                                                    (ellipsis_token
2531                                                     == tok_ellipsis4
2532                                                     ? 10 : 16),
2533                                                    ignore_content,
2534                                                    handle_digits, step);
2535                     }
2536                   else if (last_token == tok_ucs4)
2537                     {
2538                       if (ellipsis_token != tok_ellipsis2)
2539                         lr_error (ldfile, _("\
2540 with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
2541
2542                       charclass_ucs4_ellipsis (ldfile, ctype, charmap,
2543                                                repertoire, now, last_wch,
2544                                                class256_bit, class_bit,
2545                                                ignore_content, handle_digits,
2546                                                step);
2547                     }
2548                   else
2549                     {
2550                       assert (last_token == tok_charcode);
2551
2552                       if (ellipsis_token != tok_ellipsis3)
2553                         lr_error (ldfile, _("\
2554 with character code range values one must use the absolute ellipsis `...'"));
2555
2556                       charclass_charcode_ellipsis (ldfile, ctype, charmap,
2557                                                    repertoire, now,
2558                                                    last_charcode,
2559                                                    last_charcode_len,
2560                                                    class256_bit, class_bit,
2561                                                    ignore_content,
2562                                                    handle_digits);
2563                     }
2564
2565                   /* Now we have used the last value.  */
2566                   last_token = tok_none;
2567                 }
2568
2569               /* Next we expect a semicolon or the end of the line.  */
2570               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2571               if (now->tok == tok_eol || now->tok == tok_eof)
2572                 break;
2573
2574               if (last_token != tok_none
2575                   && now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4_2)
2576                 {
2577                   if (now->tok == tok_ellipsis2_2)
2578                     {
2579                       now->tok = tok_ellipsis2;
2580                       step = 2;
2581                     }
2582                   else if (now->tok == tok_ellipsis4_2)
2583                     {
2584                       now->tok = tok_ellipsis4;
2585                       step = 2;
2586                     }
2587
2588                   ellipsis_token = now->tok;
2589
2590                   now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2591                   continue;
2592                 }
2593
2594               if (now->tok != tok_semicolon)
2595                 goto err_label;
2596
2597               /* And get the next character.  */
2598               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2599
2600               ellipsis_token = tok_none;
2601               step = 1;
2602             }
2603           break;
2604
2605         case tok_digit:
2606           /* Ignore the rest of the line if we don't need the input of
2607              this line.  */
2608           if (ignore_content)
2609             {
2610               lr_ignore_rest (ldfile, 0);
2611               break;
2612             }
2613
2614         handle_tok_digit:
2615           class_bit = _ISwdigit;
2616           class256_bit = _ISdigit;
2617           handle_digits = 1;
2618           goto read_charclass;
2619
2620         case tok_outdigit:
2621           /* Ignore the rest of the line if we don't need the input of
2622              this line.  */
2623           if (ignore_content)
2624             {
2625               lr_ignore_rest (ldfile, 0);
2626               break;
2627             }
2628
2629           if (ctype->outdigits_act != 0)
2630             lr_error (ldfile, _("\
2631 %s: field `%s' declared more than once"),
2632                       "LC_CTYPE", "outdigit");
2633           class_bit = 0;
2634           class256_bit = 0;
2635           handle_digits = 2;
2636           goto read_charclass;
2637
2638         case tok_toupper:
2639           /* Ignore the rest of the line if we don't need the input of
2640              this line.  */
2641           if (ignore_content)
2642             {
2643               lr_ignore_rest (ldfile, 0);
2644               break;
2645             }
2646
2647           mapidx = 0;
2648           goto read_mapping;
2649
2650         case tok_tolower:
2651           /* Ignore the rest of the line if we don't need the input of
2652              this line.  */
2653           if (ignore_content)
2654             {
2655               lr_ignore_rest (ldfile, 0);
2656               break;
2657             }
2658
2659           mapidx = 1;
2660           goto read_mapping;
2661
2662         case tok_map:
2663           /* Ignore the rest of the line if we don't need the input of
2664              this line.  */
2665           if (ignore_content)
2666             {
2667               lr_ignore_rest (ldfile, 0);
2668               break;
2669             }
2670
2671           /* We simply forget the `map' keyword and use the following
2672              operand to determine the mapping.  */
2673           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2674           if (now->tok == tok_ident || now->tok == tok_string)
2675             {
2676               size_t cnt;
2677
2678               for (cnt = 2; cnt < ctype->map_collection_nr; ++cnt)
2679                 if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2680                   break;
2681
2682               if (cnt < ctype->map_collection_nr)
2683                 free (now->val.str.startmb);
2684               else
2685                 /* OK, it's a new map.  */
2686                 ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2687
2688               mapidx = cnt;
2689             }
2690           else if (now->tok < tok_toupper || now->tok > tok_tolower)
2691             goto err_label;
2692           else
2693             mapidx = now->tok - tok_toupper;
2694
2695           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2696           /* This better should be a semicolon.  */
2697           if (now->tok != tok_semicolon)
2698             goto err_label;
2699
2700         read_mapping:
2701           /* Test whether this mapping was already defined.  */
2702           if (ctype->tomap_done[mapidx])
2703             {
2704               lr_error (ldfile, _("duplicated definition for mapping `%s'"),
2705                         ctype->mapnames[mapidx]);
2706               lr_ignore_rest (ldfile, 0);
2707               break;
2708             }
2709           ctype->tomap_done[mapidx] = 1;
2710
2711           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2712           while (now->tok != tok_eol && now->tok != tok_eof)
2713             {
2714               struct charseq *from_seq;
2715               uint32_t from_wch;
2716               struct charseq *to_seq;
2717               uint32_t to_wch;
2718
2719               /* Every pair starts with an opening brace.  */
2720               if (now->tok != tok_open_brace)
2721                 goto err_label;
2722
2723               /* Next comes the from-value.  */
2724               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2725               if (get_character (now, charmap, repertoire, &from_seq,
2726                                  &from_wch) != 0)
2727                 goto err_label;
2728
2729               /* The next is a comma.  */
2730               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2731               if (now->tok != tok_comma)
2732                 goto err_label;
2733
2734               /* And the other value.  */
2735               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2736               if (get_character (now, charmap, repertoire, &to_seq,
2737                                  &to_wch) != 0)
2738                 goto err_label;
2739
2740               /* And the last thing is the closing brace.  */
2741               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2742               if (now->tok != tok_close_brace)
2743                 goto err_label;
2744
2745               if (!ignore_content)
2746                 {
2747                   /* Check whether the mapping converts from an ASCII value
2748                      to a non-ASCII value.  */
2749                   if (from_seq != NULL && from_seq->nbytes == 1
2750                       && isascii (from_seq->bytes[0])
2751                       && to_seq != NULL && (to_seq->nbytes != 1
2752                                             || !isascii (to_seq->bytes[0])))
2753                     ctype->to_nonascii = 1;
2754
2755                   if (mapidx < 2 && from_seq != NULL && to_seq != NULL
2756                       && from_seq->nbytes == 1 && to_seq->nbytes == 1)
2757                     /* We can use this value.  */
2758                     ctype->map256_collection[mapidx][from_seq->bytes[0]]
2759                       = to_seq->bytes[0];
2760
2761                   if (from_wch != ILLEGAL_CHAR_VALUE
2762                       && to_wch != ILLEGAL_CHAR_VALUE)
2763                     /* Both correct values.  */
2764                     *find_idx (ctype, &ctype->map_collection[mapidx],
2765                                &ctype->map_collection_max[mapidx],
2766                                &ctype->map_collection_act[mapidx],
2767                                from_wch) = to_wch;
2768                 }
2769
2770               /* Now comes a semicolon or the end of the line/file.  */
2771               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2772               if (now->tok == tok_semicolon)
2773                 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2774             }
2775           break;
2776
2777         case tok_translit_start:
2778           /* Ignore the entire translit section with its peculiar syntax
2779              if we don't need the input.  */
2780           if (ignore_content)
2781             {
2782               do
2783                 {
2784                   lr_ignore_rest (ldfile, 0);
2785                   now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2786                 }
2787               while (now->tok != tok_translit_end && now->tok != tok_eof);
2788
2789               if (now->tok == tok_eof)
2790                 lr_error (ldfile, _(\
2791 "%s: `translit_start' section does not end with `translit_end'"),
2792                           "LC_CTYPE");
2793
2794               break;
2795             }
2796
2797           /* The rest of the line better should be empty.  */
2798           lr_ignore_rest (ldfile, 1);
2799
2800           /* We count here the number of allocated entries in the `translit'
2801              array.  */
2802           cnt = 0;
2803
2804           ldfile->translate_strings = 1;
2805           ldfile->return_widestr = 1;
2806
2807           /* We proceed until we see the `translit_end' token.  */
2808           while (now = lr_token (ldfile, charmap, NULL, repertoire, verbose),
2809                  now->tok != tok_translit_end && now->tok != tok_eof)
2810             {
2811               if (now->tok == tok_eol)
2812                 /* Ignore empty lines.  */
2813                 continue;
2814
2815               if (now->tok == tok_include)
2816                 {
2817                   /* We have to include locale.  */
2818                   const char *locale_name;
2819                   const char *repertoire_name;
2820                   struct translit_include_t *include_stmt, **include_ptr;
2821
2822                   now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2823                   /* This should be a string or an identifier.  In any
2824                      case something to name a locale.  */
2825                   if (now->tok != tok_string && now->tok != tok_ident)
2826                     {
2827                     translit_syntax:
2828                       lr_error (ldfile, _("%s: syntax error"), "LC_CTYPE");
2829                       lr_ignore_rest (ldfile, 0);
2830                       continue;
2831                     }
2832                   locale_name = now->val.str.startmb;
2833
2834                   /* Next should be a semicolon.  */
2835                   now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2836                   if (now->tok != tok_semicolon)
2837                     goto translit_syntax;
2838
2839                   /* Now the repertoire name.  */
2840                   now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2841                   if ((now->tok != tok_string && now->tok != tok_ident)
2842                       || now->val.str.startmb == NULL)
2843                     goto translit_syntax;
2844                   repertoire_name = now->val.str.startmb;
2845                   if (repertoire_name[0] == '\0')
2846                     /* Ignore the empty string.  */
2847                     repertoire_name = NULL;
2848
2849                   /* Save the include statement for later processing.  */
2850                   include_stmt = (struct translit_include_t *)
2851                     xmalloc (sizeof (struct translit_include_t));
2852                   include_stmt->copy_locale = locale_name;
2853                   include_stmt->copy_repertoire = repertoire_name;
2854                   include_stmt->next = NULL;
2855
2856                   include_ptr = &ctype->translit_include;
2857                   while (*include_ptr != NULL)
2858                     include_ptr = &(*include_ptr)->next;
2859                   *include_ptr = include_stmt;
2860
2861                   /* The rest of the line must be empty.  */
2862                   lr_ignore_rest (ldfile, 1);
2863
2864                   /* Make sure the locale is read.  */
2865                   add_to_readlist (LC_CTYPE, locale_name, repertoire_name,
2866                                    1, NULL);
2867                   continue;
2868                 }
2869               else if (now->tok == tok_default_missing)
2870                 {
2871                   uint32_t *wstr;
2872
2873                   while (1)
2874                     {
2875                       /* We expect a single character or string as the
2876                          argument.  */
2877                       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2878                       wstr = read_widestring (ldfile, now, charmap,
2879                                               repertoire);
2880
2881                       if (wstr != NULL)
2882                         {
2883                           if (ctype->default_missing != NULL)
2884                             {
2885                               lr_error (ldfile, _("\
2886 %s: duplicate `default_missing' definition"), "LC_CTYPE");
2887                               WITH_CUR_LOCALE (error_at_line (0, 0,
2888                                                               ctype->default_missing_file,
2889                                                               ctype->default_missing_lineno,
2890                                                               _("\
2891 previous definition was here")));
2892                             }
2893                           else
2894                             {
2895                               ctype->default_missing = wstr;
2896                               ctype->default_missing_file = ldfile->fname;
2897                               ctype->default_missing_lineno = ldfile->lineno;
2898                             }
2899                           /* We can have more entries, ignore them.  */
2900                           lr_ignore_rest (ldfile, 0);
2901                           break;
2902                         }
2903                       else if (wstr == (uint32_t *) -1l)
2904                         /* This was an syntax error.  */
2905                         break;
2906
2907                       /* Maybe there is another replacement we can use.  */
2908                       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2909                       if (now->tok == tok_eol || now->tok == tok_eof)
2910                         {
2911                           /* Nothing found.  We tell the user.  */
2912                           lr_error (ldfile, _("\
2913 %s: no representable `default_missing' definition found"), "LC_CTYPE");
2914                           break;
2915                         }
2916                       if (now->tok != tok_semicolon)
2917                         goto translit_syntax;
2918                     }
2919
2920                   continue;
2921                 }
2922               else if (now->tok == tok_translit_ignore)
2923                 {
2924                   read_translit_ignore_entry (ldfile, ctype, charmap,
2925                                               repertoire);
2926                   continue;
2927                 }
2928
2929               read_translit_entry (ldfile, ctype, now, charmap, repertoire);
2930             }
2931           ldfile->return_widestr = 0;
2932
2933           if (now->tok == tok_eof)
2934             lr_error (ldfile, _(\
2935 "%s: `translit_start' section does not end with `translit_end'"),
2936                       "LC_CTYPE");
2937
2938           break;
2939
2940         case tok_ident:
2941           /* Ignore the rest of the line if we don't need the input of
2942              this line.  */
2943           if (ignore_content)
2944             {
2945               lr_ignore_rest (ldfile, 0);
2946               break;
2947             }
2948
2949           /* This could mean one of several things.  First test whether
2950              it's a character class name.  */
2951           for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2952             if (strcmp (now->val.str.startmb, ctype->classnames[cnt]) == 0)
2953               break;
2954           if (cnt < ctype->nr_charclass)
2955             {
2956               class_bit = _ISwbit (cnt);
2957               class256_bit = cnt <= 11 ? _ISbit (cnt) : 0;
2958               free (now->val.str.startmb);
2959               goto read_charclass;
2960             }
2961           for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
2962             if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2963               break;
2964           if (cnt < ctype->map_collection_nr)
2965             {
2966               mapidx = cnt;
2967               free (now->val.str.startmb);
2968               goto read_mapping;
2969             }
2970 #ifdef PREDEFINED_CLASSES
2971           if (strcmp (now->val.str.startmb, "special1") == 0)
2972             {
2973               class_bit = _ISwspecial1;
2974               free (now->val.str.startmb);
2975               goto read_charclass;
2976             }
2977           if (strcmp (now->val.str.startmb, "special2") == 0)
2978             {
2979               class_bit = _ISwspecial2;
2980               free (now->val.str.startmb);
2981               goto read_charclass;
2982             }
2983           if (strcmp (now->val.str.startmb, "special3") == 0)
2984             {
2985               class_bit = _ISwspecial3;
2986               free (now->val.str.startmb);
2987               goto read_charclass;
2988             }
2989           if (strcmp (now->val.str.startmb, "tosymmetric") == 0)
2990             {
2991               mapidx = 2;
2992               goto read_mapping;
2993             }
2994 #endif
2995           break;
2996
2997         case tok_end:
2998           /* Next we assume `LC_CTYPE'.  */
2999           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
3000           if (now->tok == tok_eof)
3001             break;
3002           if (now->tok == tok_eol)
3003             lr_error (ldfile, _("%s: incomplete `END' line"),
3004                       "LC_CTYPE");
3005           else if (now->tok != tok_lc_ctype)
3006             lr_error (ldfile, _("\
3007 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
3008           lr_ignore_rest (ldfile, now->tok == tok_lc_ctype);
3009           return;
3010
3011         default:
3012         err_label:
3013           if (now->tok != tok_eof)
3014             SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
3015         }
3016
3017       /* Prepare for the next round.  */
3018       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
3019       nowtok = now->tok;
3020     }
3021
3022   /* When we come here we reached the end of the file.  */
3023   lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
3024 }
3025
3026
3027 static void
3028 set_class_defaults (struct locale_ctype_t *ctype,
3029                     const struct charmap_t *charmap,
3030                     struct repertoire_t *repertoire)
3031 {
3032   size_t cnt;
3033
3034   /* These function defines the default values for the classes and conversions
3035      according to POSIX.2 2.5.2.1.
3036      It may seem that the order of these if-blocks is arbitrary but it is NOT.
3037      Don't move them unless you know what you do!  */
3038
3039   auto void set_default (int bitpos, int from, int to);
3040
3041   void set_default (int bitpos, int from, int to)
3042     {
3043       char tmp[2];
3044       int ch;
3045       int bit = _ISbit (bitpos);
3046       int bitw = _ISwbit (bitpos);
3047       /* Define string.  */
3048       strcpy (tmp, "?");
3049
3050       for (ch = from; ch <= to; ++ch)
3051         {
3052           struct charseq *seq;
3053           tmp[0] = ch;
3054
3055           seq = charmap_find_value (charmap, tmp, 1);
3056           if (seq == NULL)
3057             {
3058               char buf[10];
3059               sprintf (buf, "U%08X", ch);
3060               seq = charmap_find_value (charmap, buf, 9);
3061             }
3062           if (seq == NULL)
3063             {
3064               if (!be_quiet)
3065                 WITH_CUR_LOCALE (error (0, 0, _("\
3066 %s: character `%s' not defined while needed as default value"),
3067                                         "LC_CTYPE", tmp));
3068             }
3069           else if (seq->nbytes != 1)
3070             WITH_CUR_LOCALE (error (0, 0, _("\
3071 %s: character `%s' in charmap not representable with one byte"),
3072                                     "LC_CTYPE", tmp));
3073           else
3074             ctype->class256_collection[seq->bytes[0]] |= bit;
3075
3076           /* No need to search here, the ASCII value is also the Unicode
3077              value.  */
3078           ELEM (ctype, class_collection, , ch) |= bitw;
3079         }
3080     }
3081
3082   /* Set default values if keyword was not present.  */
3083   if ((ctype->class_done & BITw (tok_upper)) == 0)
3084     /* "If this keyword [lower] is not specified, the lowercase letters
3085         `A' through `Z', ..., shall automatically belong to this class,
3086         with implementation defined character values."  [P1003.2, 2.5.2.1]  */
3087     set_default (BITPOS (tok_upper), 'A', 'Z');
3088
3089   if ((ctype->class_done & BITw (tok_lower)) == 0)
3090     /* "If this keyword [lower] is not specified, the lowercase letters
3091         `a' through `z', ..., shall automatically belong to this class,
3092         with implementation defined character values."  [P1003.2, 2.5.2.1]  */
3093     set_default (BITPOS (tok_lower), 'a', 'z');
3094
3095   if ((ctype->class_done & BITw (tok_alpha)) == 0)
3096     {
3097       /* Table 2-6 in P1003.2 says that characters in class `upper' or
3098          class `lower' *must* be in class `alpha'.  */
3099       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
3100       unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower);
3101
3102       for (cnt = 0; cnt < 256; ++cnt)
3103         if ((ctype->class256_collection[cnt] & mask) != 0)
3104           ctype->class256_collection[cnt] |= BIT (tok_alpha);
3105
3106       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3107         if ((ctype->class_collection[cnt] & maskw) != 0)
3108           ctype->class_collection[cnt] |= BITw (tok_alpha);
3109     }
3110
3111   if ((ctype->class_done & BITw (tok_digit)) == 0)
3112     /* "If this keyword [digit] is not specified, the digits `0' through
3113         `9', ..., shall automatically belong to this class, with
3114         implementation-defined character values."  [P1003.2, 2.5.2.1]  */
3115     set_default (BITPOS (tok_digit), '0', '9');
3116
3117   /* "Only characters specified for the `alpha' and `digit' keyword
3118      shall be specified.  Characters specified for the keyword `alpha'
3119      and `digit' are automatically included in this class.  */
3120   {
3121     unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
3122     unsigned long int maskw = BITw (tok_alpha) | BITw (tok_digit);
3123
3124     for (cnt = 0; cnt < 256; ++cnt)
3125       if ((ctype->class256_collection[cnt] & mask) != 0)
3126         ctype->class256_collection[cnt] |= BIT (tok_alnum);
3127
3128     for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3129       if ((ctype->class_collection[cnt] & maskw) != 0)
3130         ctype->class_collection[cnt] |= BITw (tok_alnum);
3131   }
3132
3133   if ((ctype->class_done & BITw (tok_space)) == 0)
3134     /* "If this keyword [space] is not specified, the characters <space>,
3135         <form-feed>, <newline>, <carriage-return>, <tab>, and
3136         <vertical-tab>, ..., shall automatically belong to this class,
3137         with implementation-defined character values."  [P1003.2, 2.5.2.1]  */
3138     {
3139       struct charseq *seq;
3140
3141       seq = charmap_find_value (charmap, "space", 5);
3142       if (seq == NULL)
3143         seq = charmap_find_value (charmap, "SP", 2);
3144       if (seq == NULL)
3145         seq = charmap_find_value (charmap, "U00000020", 9);
3146       if (seq == NULL)
3147         {
3148           if (!be_quiet)
3149             WITH_CUR_LOCALE (error (0, 0, _("\
3150 %s: character `%s' not defined while needed as default value"),
3151                                     "LC_CTYPE", "<space>"));
3152         }
3153       else if (seq->nbytes != 1)
3154         WITH_CUR_LOCALE (error (0, 0, _("\
3155 %s: character `%s' in charmap not representable with one byte"),
3156                                 "LC_CTYPE", "<space>"));
3157       else
3158         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3159
3160       /* No need to search.  */
3161       ELEM (ctype, class_collection, , L' ') |= BITw (tok_space);
3162
3163       seq = charmap_find_value (charmap, "form-feed", 9);
3164       if (seq == NULL)
3165         seq = charmap_find_value (charmap, "U0000000C", 9);
3166       if (seq == NULL)
3167         {
3168           if (!be_quiet)
3169             WITH_CUR_LOCALE (error (0, 0, _("\
3170 %s: character `%s' not defined while needed as default value"),
3171                                     "LC_CTYPE", "<form-feed>"));
3172         }
3173       else if (seq->nbytes != 1)
3174         WITH_CUR_LOCALE (error (0, 0, _("\
3175 %s: character `%s' in charmap not representable with one byte"),
3176                                 "LC_CTYPE", "<form-feed>"));
3177       else
3178         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3179
3180       /* No need to search.  */
3181       ELEM (ctype, class_collection, , L'\f') |= BITw (tok_space);
3182
3183
3184       seq = charmap_find_value (charmap, "newline", 7);
3185       if (seq == NULL)
3186         seq = charmap_find_value (charmap, "U0000000A", 9);
3187       if (seq == NULL)
3188         {
3189           if (!be_quiet)
3190             WITH_CUR_LOCALE (error (0, 0, _("\
3191 %s: character `%s' not defined while needed as default value"),
3192                                     "LC_CTYPE", "<newline>"));
3193         }
3194       else if (seq->nbytes != 1)
3195         WITH_CUR_LOCALE (error (0, 0, _("\
3196 %s: character `%s' in charmap not representable with one byte"),
3197                                 "LC_CTYPE", "<newline>"));
3198       else
3199         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3200
3201       /* No need to search.  */
3202       ELEM (ctype, class_collection, , L'\n') |= BITw (tok_space);
3203
3204
3205       seq = charmap_find_value (charmap, "carriage-return", 15);
3206       if (seq == NULL)
3207         seq = charmap_find_value (charmap, "U0000000D", 9);
3208       if (seq == NULL)
3209         {
3210           if (!be_quiet)
3211             WITH_CUR_LOCALE (error (0, 0, _("\
3212 %s: character `%s' not defined while needed as default value"),
3213                                     "LC_CTYPE", "<carriage-return>"));
3214         }
3215       else if (seq->nbytes != 1)
3216         WITH_CUR_LOCALE (error (0, 0, _("\
3217 %s: character `%s' in charmap not representable with one byte"),
3218                                 "LC_CTYPE", "<carriage-return>"));
3219       else
3220         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3221
3222       /* No need to search.  */
3223       ELEM (ctype, class_collection, , L'\r') |= BITw (tok_space);
3224
3225
3226       seq = charmap_find_value (charmap, "tab", 3);
3227       if (seq == NULL)
3228         seq = charmap_find_value (charmap, "U00000009", 9);
3229       if (seq == NULL)
3230         {
3231           if (!be_quiet)
3232             WITH_CUR_LOCALE (error (0, 0, _("\
3233 %s: character `%s' not defined while needed as default value"),
3234                                     "LC_CTYPE", "<tab>"));
3235         }
3236       else if (seq->nbytes != 1)
3237         WITH_CUR_LOCALE (error (0, 0, _("\
3238 %s: character `%s' in charmap not representable with one byte"),
3239                                 "LC_CTYPE", "<tab>"));
3240       else
3241         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3242
3243       /* No need to search.  */
3244       ELEM (ctype, class_collection, , L'\t') |= BITw (tok_space);
3245
3246
3247       seq = charmap_find_value (charmap, "vertical-tab", 12);
3248       if (seq == NULL)
3249         seq = charmap_find_value (charmap, "U0000000B", 9);
3250       if (seq == NULL)
3251         {
3252           if (!be_quiet)
3253             WITH_CUR_LOCALE (error (0, 0, _("\
3254 %s: character `%s' not defined while needed as default value"),
3255                                     "LC_CTYPE", "<vertical-tab>"));
3256         }
3257       else if (seq->nbytes != 1)
3258         WITH_CUR_LOCALE (error (0, 0, _("\
3259 %s: character `%s' in charmap not representable with one byte"),
3260                                 "LC_CTYPE", "<vertical-tab>"));
3261       else
3262         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3263
3264       /* No need to search.  */
3265       ELEM (ctype, class_collection, , L'\v') |= BITw (tok_space);
3266     }
3267
3268   if ((ctype->class_done & BITw (tok_xdigit)) == 0)
3269     /* "If this keyword is not specified, the digits `0' to `9', the
3270         uppercase letters `A' through `F', and the lowercase letters `a'
3271         through `f', ..., shell automatically belong to this class, with
3272         implementation defined character values."  [P1003.2, 2.5.2.1]  */
3273     {
3274       set_default (BITPOS (tok_xdigit), '0', '9');
3275       set_default (BITPOS (tok_xdigit), 'A', 'F');
3276       set_default (BITPOS (tok_xdigit), 'a', 'f');
3277     }
3278
3279   if ((ctype->class_done & BITw (tok_blank)) == 0)
3280     /* "If this keyword [blank] is unspecified, the characters <space> and
3281        <tab> shall belong to this character class."  [P1003.2, 2.5.2.1]  */
3282    {
3283       struct charseq *seq;
3284
3285       seq = charmap_find_value (charmap, "space", 5);
3286       if (seq == NULL)
3287         seq = charmap_find_value (charmap, "SP", 2);
3288       if (seq == NULL)
3289         seq = charmap_find_value (charmap, "U00000020", 9);
3290       if (seq == NULL)
3291         {
3292           if (!be_quiet)
3293             WITH_CUR_LOCALE (error (0, 0, _("\
3294 %s: character `%s' not defined while needed as default value"),
3295                                     "LC_CTYPE", "<space>"));
3296         }
3297       else if (seq->nbytes != 1)
3298         WITH_CUR_LOCALE (error (0, 0, _("\
3299 %s: character `%s' in charmap not representable with one byte"),
3300                                 "LC_CTYPE", "<space>"));
3301       else
3302         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
3303
3304       /* No need to search.  */
3305       ELEM (ctype, class_collection, , L' ') |= BITw (tok_blank);
3306
3307
3308       seq = charmap_find_value (charmap, "tab", 3);
3309       if (seq == NULL)
3310         seq = charmap_find_value (charmap, "U00000009", 9);
3311       if (seq == NULL)
3312         {
3313           if (!be_quiet)
3314             WITH_CUR_LOCALE (error (0, 0, _("\
3315 %s: character `%s' not defined while needed as default value"),
3316                                     "LC_CTYPE", "<tab>"));
3317         }
3318       else if (seq->nbytes != 1)
3319         WITH_CUR_LOCALE (error (0, 0, _("\
3320 %s: character `%s' in charmap not representable with one byte"),
3321                                 "LC_CTYPE", "<tab>"));
3322       else
3323         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
3324
3325       /* No need to search.  */
3326       ELEM (ctype, class_collection, , L'\t') |= BITw (tok_blank);
3327     }
3328
3329   if ((ctype->class_done & BITw (tok_graph)) == 0)
3330     /* "If this keyword [graph] is not specified, characters specified for
3331         the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
3332         shall belong to this character class."  [P1003.2, 2.5.2.1]  */
3333     {
3334       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
3335         BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
3336       unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
3337         BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
3338         BITw (tok_punct);
3339       size_t cnt;
3340
3341       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3342         if ((ctype->class_collection[cnt] & maskw) != 0)
3343           ctype->class_collection[cnt] |= BITw (tok_graph);
3344
3345       for (cnt = 0; cnt < 256; ++cnt)
3346         if ((ctype->class256_collection[cnt] & mask) != 0)
3347           ctype->class256_collection[cnt] |= BIT (tok_graph);
3348     }
3349
3350   if ((ctype->class_done & BITw (tok_print)) == 0)
3351     /* "If this keyword [print] is not provided, characters specified for
3352         the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
3353         and the <space> character shall belong to this character class."
3354         [P1003.2, 2.5.2.1]  */
3355     {
3356       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
3357         BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
3358       unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
3359         BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
3360         BITw (tok_punct);
3361       size_t cnt;
3362       struct charseq *seq;
3363
3364       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3365         if ((ctype->class_collection[cnt] & maskw) != 0)
3366           ctype->class_collection[cnt] |= BITw (tok_print);
3367
3368       for (cnt = 0; cnt < 256; ++cnt)
3369         if ((ctype->class256_collection[cnt] & mask) != 0)
3370           ctype->class256_collection[cnt] |= BIT (tok_print);
3371
3372
3373       seq = charmap_find_value (charmap, "space", 5);
3374       if (seq == NULL)
3375         seq = charmap_find_value (charmap, "SP", 2);
3376       if (seq == NULL)
3377         seq = charmap_find_value (charmap, "U00000020", 9);
3378       if (seq == NULL)
3379         {
3380           if (!be_quiet)
3381             WITH_CUR_LOCALE (error (0, 0, _("\
3382 %s: character `%s' not defined while needed as default value"),
3383                                     "LC_CTYPE", "<space>"));
3384         }
3385       else if (seq->nbytes != 1)
3386         WITH_CUR_LOCALE (error (0, 0, _("\
3387 %s: character `%s' in charmap not representable with one byte"),
3388                                 "LC_CTYPE", "<space>"));
3389       else
3390         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_print);
3391
3392       /* No need to search.  */
3393       ELEM (ctype, class_collection, , L' ') |= BITw (tok_print);
3394     }
3395
3396   if (ctype->tomap_done[0] == 0)
3397     /* "If this keyword [toupper] is not specified, the lowercase letters
3398         `a' through `z', and their corresponding uppercase letters `A' to
3399         `Z', ..., shall automatically be included, with implementation-
3400         defined character values."  [P1003.2, 2.5.2.1]  */
3401     {
3402       char tmp[4];
3403       int ch;
3404
3405       strcpy (tmp, "<?>");
3406
3407       for (ch = 'a'; ch <= 'z'; ++ch)
3408         {
3409           struct charseq *seq_from, *seq_to;
3410
3411           tmp[1] = (char) ch;
3412
3413           seq_from = charmap_find_value (charmap, &tmp[1], 1);
3414           if (seq_from == NULL)
3415             {
3416               char buf[10];
3417               sprintf (buf, "U%08X", ch);
3418               seq_from = charmap_find_value (charmap, buf, 9);
3419             }
3420           if (seq_from == NULL)
3421             {
3422               if (!be_quiet)
3423                 WITH_CUR_LOCALE (error (0, 0, _("\
3424 %s: character `%s' not defined while needed as default value"),
3425                                         "LC_CTYPE", tmp));
3426             }
3427           else if (seq_from->nbytes != 1)
3428             {
3429               if (!be_quiet)
3430                 WITH_CUR_LOCALE (error (0, 0, _("\
3431 %s: character `%s' needed as default value not representable with one byte"),
3432                                         "LC_CTYPE", tmp));
3433             }
3434           else
3435             {
3436               /* This conversion is implementation defined.  */
3437               tmp[1] = (char) (ch + ('A' - 'a'));
3438               seq_to = charmap_find_value (charmap, &tmp[1], 1);
3439               if (seq_to == NULL)
3440                 {
3441                   char buf[10];
3442                   sprintf (buf, "U%08X", ch + ('A' - 'a'));
3443                   seq_to = charmap_find_value (charmap, buf, 9);
3444                 }
3445               if (seq_to == NULL)
3446                 {
3447                   if (!be_quiet)
3448                     WITH_CUR_LOCALE (error (0, 0, _("\
3449 %s: character `%s' not defined while needed as default value"),
3450                                             "LC_CTYPE", tmp));
3451                 }
3452               else if (seq_to->nbytes != 1)
3453                 {
3454                   if (!be_quiet)
3455                     WITH_CUR_LOCALE (error (0, 0, _("\
3456 %s: character `%s' needed as default value not representable with one byte"),
3457                                             "LC_CTYPE", tmp));
3458                 }
3459               else
3460                 /* The index [0] is determined by the order of the
3461                    `ctype_map_newP' calls in `ctype_startup'.  */
3462                 ctype->map256_collection[0][seq_from->bytes[0]]
3463                   = seq_to->bytes[0];
3464             }
3465
3466           /* No need to search.  */
3467           ELEM (ctype, map_collection, [0], ch) = ch + ('A' - 'a');
3468         }
3469     }
3470
3471   if (ctype->tomap_done[1] == 0)
3472     /* "If this keyword [tolower] is not specified, the mapping shall be
3473        the reverse mapping of the one specified to `toupper'."  [P1003.2]  */
3474     {
3475       for (cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
3476         if (ctype->map_collection[0][cnt] != 0)
3477           ELEM (ctype, map_collection, [1],
3478                 ctype->map_collection[0][cnt])
3479             = ctype->charnames[cnt];
3480
3481       for (cnt = 0; cnt < 256; ++cnt)
3482         if (ctype->map256_collection[0][cnt] != 0)
3483           ctype->map256_collection[1][ctype->map256_collection[0][cnt]] = cnt;
3484     }
3485
3486   if (ctype->outdigits_act != 10)
3487     {
3488       if (ctype->outdigits_act != 0)
3489         WITH_CUR_LOCALE (error (0, 0, _("\
3490 %s: field `%s' does not contain exactly ten entries"),
3491                                 "LC_CTYPE", "outdigit"));
3492
3493       for (cnt = ctype->outdigits_act; cnt < 10; ++cnt)
3494         {
3495           ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3496                                                          (char *) digits + cnt,
3497                                                          1);
3498
3499           if (ctype->mboutdigits[cnt] == NULL)
3500             ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3501                                                            longnames[cnt],
3502                                                            strlen (longnames[cnt]));
3503
3504           if (ctype->mboutdigits[cnt] == NULL)
3505             ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3506                                                            uninames[cnt], 9);
3507
3508           if (ctype->mboutdigits[cnt] == NULL)
3509             {
3510               /* Provide a replacement.  */
3511               WITH_CUR_LOCALE (error (0, 0, _("\
3512 no output digits defined and none of the standard names in the charmap")));
3513
3514               ctype->mboutdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
3515                                                        sizeof (struct charseq)
3516                                                        + 1);
3517
3518               /* This is better than nothing.  */
3519               ctype->mboutdigits[cnt]->bytes[0] = digits[cnt];
3520               ctype->mboutdigits[cnt]->nbytes = 1;
3521             }
3522
3523           ctype->wcoutdigits[cnt] = L'0' + cnt;
3524         }
3525
3526       ctype->outdigits_act = 10;
3527     }
3528 }
3529
3530
3531 /* Construction of sparse 3-level tables.
3532    See wchar-lookup.h for their structure and the meaning of p and q.  */
3533
3534 struct wctype_table
3535 {
3536   /* Parameters.  */
3537   unsigned int p;
3538   unsigned int q;
3539   /* Working representation.  */
3540   size_t level1_alloc;
3541   size_t level1_size;
3542   uint32_t *level1;
3543   size_t level2_alloc;
3544   size_t level2_size;
3545   uint32_t *level2;
3546   size_t level3_alloc;
3547   size_t level3_size;
3548   uint32_t *level3;
3549   /* Compressed representation.  */
3550   size_t result_size;
3551   char *result;
3552 };
3553
3554 /* Initialize.  Assumes t->p and t->q have already been set.  */
3555 static inline void
3556 wctype_table_init (struct wctype_table *t)
3557 {
3558   t->level1 = NULL;
3559   t->level1_alloc = t->level1_size = 0;
3560   t->level2 = NULL;
3561   t->level2_alloc = t->level2_size = 0;
3562   t->level3 = NULL;
3563   t->level3_alloc = t->level3_size = 0;
3564 }
3565
3566 /* Retrieve an entry.  */
3567 static inline int
3568 wctype_table_get (struct wctype_table *t, uint32_t wc)
3569 {
3570   uint32_t index1 = wc >> (t->q + t->p + 5);
3571   if (index1 < t->level1_size)
3572     {
3573       uint32_t lookup1 = t->level1[index1];
3574       if (lookup1 != EMPTY)
3575         {
3576           uint32_t index2 = ((wc >> (t->p + 5)) & ((1 << t->q) - 1))
3577                             + (lookup1 << t->q);
3578           uint32_t lookup2 = t->level2[index2];
3579           if (lookup2 != EMPTY)
3580             {
3581               uint32_t index3 = ((wc >> 5) & ((1 << t->p) - 1))
3582                                 + (lookup2 << t->p);
3583               uint32_t lookup3 = t->level3[index3];
3584               uint32_t index4 = wc & 0x1f;
3585
3586               return (lookup3 >> index4) & 1;
3587             }
3588         }
3589     }
3590   return 0;
3591 }
3592
3593 /* Add one entry.  */
3594 static void
3595 wctype_table_add (struct wctype_table *t, uint32_t wc)
3596 {
3597   uint32_t index1 = wc >> (t->q + t->p + 5);
3598   uint32_t index2 = (wc >> (t->p + 5)) & ((1 << t->q) - 1);
3599   uint32_t index3 = (wc >> 5) & ((1 << t->p) - 1);
3600   uint32_t index4 = wc & 0x1f;
3601   size_t i, i1, i2;
3602
3603   if (index1 >= t->level1_size)
3604     {
3605       if (index1 >= t->level1_alloc)
3606         {
3607           size_t alloc = 2 * t->level1_alloc;
3608           if (alloc <= index1)
3609             alloc = index1 + 1;
3610           t->level1 = (uint32_t *) xrealloc ((char *) t->level1,
3611                                              alloc * sizeof (uint32_t));
3612           t->level1_alloc = alloc;
3613         }
3614       while (index1 >= t->level1_size)
3615         t->level1[t->level1_size++] = EMPTY;
3616     }
3617
3618   if (t->level1[index1] == EMPTY)
3619     {
3620       if (t->level2_size == t->level2_alloc)
3621         {
3622           size_t alloc = 2 * t->level2_alloc + 1;
3623           t->level2 = (uint32_t *) xrealloc ((char *) t->level2,
3624                                              (alloc << t->q) * sizeof (uint32_t));
3625           t->level2_alloc = alloc;
3626         }
3627       i1 = t->level2_size << t->q;
3628       i2 = (t->level2_size + 1) << t->q;
3629       for (i = i1; i < i2; i++)
3630         t->level2[i] = EMPTY;
3631       t->level1[index1] = t->level2_size++;
3632     }
3633
3634   index2 += t->level1[index1] << t->q;
3635
3636   if (t->level2[index2] == EMPTY)
3637     {
3638       if (t->level3_size == t->level3_alloc)
3639         {
3640           size_t alloc = 2 * t->level3_alloc + 1;
3641           t->level3 = (uint32_t *) xrealloc ((char *) t->level3,
3642                                              (alloc << t->p) * sizeof (uint32_t));
3643           t->level3_alloc = alloc;
3644         }
3645       i1 = t->level3_size << t->p;
3646       i2 = (t->level3_size + 1) << t->p;
3647       for (i = i1; i < i2; i++)
3648         t->level3[i] = 0;
3649       t->level2[index2] = t->level3_size++;
3650     }
3651
3652   index3 += t->level2[index2] << t->p;
3653
3654   t->level3[index3] |= (uint32_t)1 << index4;
3655 }
3656
3657 /* Finalize and shrink.  */
3658 static void
3659 wctype_table_finalize (struct wctype_table *t)
3660 {
3661   size_t i, j, k;
3662   uint32_t reorder3[t->level3_size];
3663   uint32_t reorder2[t->level2_size];
3664   uint32_t level1_offset, level2_offset, level3_offset;
3665
3666   /* Uniquify level3 blocks.  */
3667   k = 0;
3668   for (j = 0; j < t->level3_size; j++)
3669     {
3670       for (i = 0; i < k; i++)
3671         if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p],
3672                     (1 << t->p) * sizeof (uint32_t)) == 0)
3673           break;
3674       /* Relocate block j to block i.  */
3675       reorder3[j] = i;
3676       if (i == k)
3677         {
3678           if (i != j)
3679             memcpy (&t->level3[i << t->p], &t->level3[j << t->p],
3680                     (1 << t->p) * sizeof (uint32_t));
3681           k++;
3682         }
3683     }
3684   t->level3_size = k;
3685
3686   for (i = 0; i < (t->level2_size << t->q); i++)
3687     if (t->level2[i] != EMPTY)
3688       t->level2[i] = reorder3[t->level2[i]];
3689
3690   /* Uniquify level2 blocks.  */
3691   k = 0;
3692   for (j = 0; j < t->level2_size; j++)
3693     {
3694       for (i = 0; i < k; i++)
3695         if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q],
3696                     (1 << t->q) * sizeof (uint32_t)) == 0)
3697           break;
3698       /* Relocate block j to block i.  */
3699       reorder2[j] = i;
3700       if (i == k)
3701         {
3702           if (i != j)
3703             memcpy (&t->level2[i << t->q], &t->level2[j << t->q],
3704                     (1 << t->q) * sizeof (uint32_t));
3705           k++;
3706         }
3707     }
3708   t->level2_size = k;
3709
3710   for (i = 0; i < t->level1_size; i++)
3711     if (t->level1[i] != EMPTY)
3712       t->level1[i] = reorder2[t->level1[i]];
3713
3714   /* Create and fill the resulting compressed representation.  */
3715   t->result_size =
3716     5 * sizeof (uint32_t)
3717     + t->level1_size * sizeof (uint32_t)
3718     + (t->level2_size << t->q) * sizeof (uint32_t)
3719     + (t->level3_size << t->p) * sizeof (uint32_t);
3720   t->result = (char *) xmalloc (t->result_size);
3721
3722   level1_offset =
3723     5 * sizeof (uint32_t);
3724   level2_offset =
3725     5 * sizeof (uint32_t)
3726     + t->level1_size * sizeof (uint32_t);
3727   level3_offset =
3728     5 * sizeof (uint32_t)
3729     + t->level1_size * sizeof (uint32_t)
3730     + (t->level2_size << t->q) * sizeof (uint32_t);
3731
3732   ((uint32_t *) t->result)[0] = t->q + t->p + 5;
3733   ((uint32_t *) t->result)[1] = t->level1_size;
3734   ((uint32_t *) t->result)[2] = t->p + 5;
3735   ((uint32_t *) t->result)[3] = (1 << t->q) - 1;
3736   ((uint32_t *) t->result)[4] = (1 << t->p) - 1;
3737
3738   for (i = 0; i < t->level1_size; i++)
3739     ((uint32_t *) (t->result + level1_offset))[i] =
3740       (t->level1[i] == EMPTY
3741        ? 0
3742        : (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset);
3743
3744   for (i = 0; i < (t->level2_size << t->q); i++)
3745     ((uint32_t *) (t->result + level2_offset))[i] =
3746       (t->level2[i] == EMPTY
3747        ? 0
3748        : (t->level2[i] << t->p) * sizeof (uint32_t) + level3_offset);
3749
3750   for (i = 0; i < (t->level3_size << t->p); i++)
3751     ((uint32_t *) (t->result + level3_offset))[i] = t->level3[i];
3752
3753   if (t->level1_alloc > 0)
3754     free (t->level1);
3755   if (t->level2_alloc > 0)
3756     free (t->level2);
3757   if (t->level3_alloc > 0)
3758     free (t->level3);
3759 }
3760
3761 #define TABLE wcwidth_table
3762 #define ELEMENT uint8_t
3763 #define DEFAULT 0xff
3764 #include "3level.h"
3765
3766 #define TABLE wctrans_table
3767 #define ELEMENT int32_t
3768 #define DEFAULT 0
3769 #define wctrans_table_add wctrans_table_add_internal
3770 #include "3level.h"
3771 #undef wctrans_table_add
3772 /* The wctrans_table must actually store the difference between the
3773    desired result and the argument.  */
3774 static inline void
3775 wctrans_table_add (struct wctrans_table *t, uint32_t wc, uint32_t mapped_wc)
3776 {
3777   wctrans_table_add_internal (t, wc, mapped_wc - wc);
3778 }
3779
3780
3781 /* Flattens the included transliterations into a translit list.
3782    Inserts them in the list at `cursor', and returns the new cursor.  */
3783 static struct translit_t **
3784 translit_flatten (struct locale_ctype_t *ctype,
3785                   const struct charmap_t *charmap,
3786                   struct translit_t **cursor)
3787 {
3788   while (ctype->translit_include != NULL)
3789     {
3790       const char *copy_locale = ctype->translit_include->copy_locale;
3791       const char *copy_repertoire = ctype->translit_include->copy_repertoire;
3792       struct localedef_t *other;
3793
3794       /* Unchain the include statement.  During the depth-first traversal
3795          we don't want to visit any locale more than once.  */
3796       ctype->translit_include = ctype->translit_include->next;
3797
3798       other = find_locale (LC_CTYPE, copy_locale, copy_repertoire, charmap);
3799
3800       if (other == NULL || other->categories[LC_CTYPE].ctype == NULL)
3801         {
3802           WITH_CUR_LOCALE (error (0, 0, _("\
3803 %s: transliteration data from locale `%s' not available"),
3804                                   "LC_CTYPE", copy_locale));
3805         }
3806       else
3807         {
3808           struct locale_ctype_t *other_ctype =
3809             other->categories[LC_CTYPE].ctype;
3810
3811           cursor = translit_flatten (other_ctype, charmap, cursor);
3812           assert (other_ctype->translit_include == NULL);
3813
3814           if (other_ctype->translit != NULL)
3815             {
3816               /* Insert the other_ctype->translit list at *cursor.  */
3817               struct translit_t *endp = other_ctype->translit;
3818               while (endp->next != NULL)
3819                 endp = endp->next;
3820
3821               endp->next = *cursor;
3822               *cursor = other_ctype->translit;
3823
3824               /* Avoid any risk of circular lists.  */
3825               other_ctype->translit = NULL;
3826
3827               cursor = &endp->next;
3828             }
3829
3830           if (ctype->default_missing == NULL)
3831             ctype->default_missing = other_ctype->default_missing;
3832         }
3833     }
3834
3835   return cursor;
3836 }
3837
3838 static void
3839 allocate_arrays (struct locale_ctype_t *ctype, const struct charmap_t *charmap,
3840                  struct repertoire_t *repertoire)
3841 {
3842   size_t idx, nr;
3843   const void *key;
3844   size_t len;
3845   void *vdata;
3846   void *curs;
3847
3848   /* You wonder about this amount of memory?  This is only because some
3849      users do not manage to address the array with unsigned values or
3850      data types with range >= 256.  '\200' would result in the array
3851      index -128.  To help these poor people we duplicate the entries for
3852      128 up to 255 below the entry for \0.  */
3853   ctype->ctype_b = (char_class_t *) xcalloc (256 + 128, sizeof (char_class_t));
3854   ctype->ctype32_b = (char_class32_t *) xcalloc (256, sizeof (char_class32_t));
3855   ctype->class_b = (uint32_t **)
3856     xmalloc (ctype->nr_charclass * sizeof (uint32_t *));
3857   ctype->class_3level = (struct iovec *)
3858     xmalloc (ctype->nr_charclass * sizeof (struct iovec));
3859
3860   /* This is the array accessed using the multibyte string elements.  */
3861   for (idx = 0; idx < 256; ++idx)
3862     ctype->ctype_b[128 + idx] = ctype->class256_collection[idx];
3863
3864   /* Mirror first 127 entries.  We must take care that entry -1 is not
3865      mirrored because EOF == -1.  */
3866   for (idx = 0; idx < 127; ++idx)
3867     ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
3868
3869   /* The 32 bit array contains all characters < 0x100.  */
3870   for (idx = 0; idx < ctype->class_collection_act; ++idx)
3871     if (ctype->charnames[idx] < 0x100)
3872       ctype->ctype32_b[ctype->charnames[idx]] = ctype->class_collection[idx];
3873
3874   for (nr = 0; nr < ctype->nr_charclass; nr++)
3875     {
3876       ctype->class_b[nr] = (uint32_t *) xcalloc (256 / 32, sizeof (uint32_t));
3877
3878       /* We only set CLASS_B for the bits in the ISO C classes, not
3879          the user defined classes.  The number should not change but
3880          who knows.  */
3881 #define LAST_ISO_C_BIT 11
3882       if (nr <= LAST_ISO_C_BIT)
3883         for (idx = 0; idx < 256; ++idx)
3884           if (ctype->class256_collection[idx] & _ISbit (nr))
3885             ctype->class_b[nr][idx >> 5] |= (uint32_t) 1 << (idx & 0x1f);
3886     }
3887
3888   for (nr = 0; nr < ctype->nr_charclass; nr++)
3889     {
3890       struct wctype_table t;
3891
3892       t.p = 4; /* or: 5 */
3893       t.q = 7; /* or: 6 */
3894       wctype_table_init (&t);
3895
3896       for (idx = 0; idx < ctype->class_collection_act; ++idx)
3897         if (ctype->class_collection[idx] & _ISwbit (nr))
3898           wctype_table_add (&t, ctype->charnames[idx]);
3899
3900       wctype_table_finalize (&t);
3901
3902       if (verbose)
3903         WITH_CUR_LOCALE (fprintf (stderr, _("\
3904 %s: table for class \"%s\": %lu bytes\n"),
3905                                  "LC_CTYPE", ctype->classnames[nr],
3906                                  (unsigned long int) t.result_size));
3907
3908       ctype->class_3level[nr].iov_base = t.result;
3909       ctype->class_3level[nr].iov_len = t.result_size;
3910     }
3911
3912   /* Room for table of mappings.  */
3913   ctype->map_b = (uint32_t **) xmalloc (2 * sizeof (uint32_t *));
3914   ctype->map32_b = (uint32_t **) xmalloc (ctype->map_collection_nr
3915                                           * sizeof (uint32_t *));
3916   ctype->map_3level = (struct iovec *)
3917     xmalloc (ctype->map_collection_nr * sizeof (struct iovec));
3918
3919   /* Fill in all mappings.  */
3920   for (idx = 0; idx < 2; ++idx)
3921     {
3922       unsigned int idx2;
3923
3924       /* Allocate table.  */
3925       ctype->map_b[idx] = (uint32_t *)
3926         xmalloc ((256 + 128) * sizeof (uint32_t));
3927
3928       /* Copy values from collection.  */
3929       for (idx2 = 0; idx2 < 256; ++idx2)
3930         ctype->map_b[idx][128 + idx2] = ctype->map256_collection[idx][idx2];
3931
3932       /* Mirror first 127 entries.  We must take care not to map entry
3933          -1 because EOF == -1.  */
3934       for (idx2 = 0; idx2 < 127; ++idx2)
3935         ctype->map_b[idx][idx2] = ctype->map_b[idx][256 + idx2];
3936
3937       /* EOF must map to EOF.  */
3938       ctype->map_b[idx][127] = EOF;
3939     }
3940
3941   for (idx = 0; idx < ctype->map_collection_nr; ++idx)
3942     {
3943       unsigned int idx2;
3944
3945       /* Allocate table.  */
3946       ctype->map32_b[idx] = (uint32_t *) xmalloc (256 * sizeof (uint32_t));
3947
3948       /* Copy values from collection.  Default is identity mapping.  */
3949       for (idx2 = 0; idx2 < 256; ++idx2)
3950         ctype->map32_b[idx][idx2] =
3951           (ctype->map_collection[idx][idx2] != 0
3952            ? ctype->map_collection[idx][idx2]
3953            : idx2);
3954     }
3955
3956   for (nr = 0; nr < ctype->map_collection_nr; nr++)
3957     {
3958       struct wctrans_table t;
3959
3960       t.p = 7;
3961       t.q = 9;
3962       wctrans_table_init (&t);
3963
3964       for (idx = 0; idx < ctype->map_collection_act[nr]; ++idx)
3965         if (ctype->map_collection[nr][idx] != 0)
3966           wctrans_table_add (&t, ctype->charnames[idx],
3967                              ctype->map_collection[nr][idx]);
3968
3969       wctrans_table_finalize (&t);
3970
3971       if (verbose)
3972         WITH_CUR_LOCALE (fprintf (stderr, _("\
3973 %s: table for map \"%s\": %lu bytes\n"),
3974                                  "LC_CTYPE", ctype->mapnames[nr],
3975                                  (unsigned long int) t.result_size));
3976
3977       ctype->map_3level[nr].iov_base = t.result;
3978       ctype->map_3level[nr].iov_len = t.result_size;
3979     }
3980
3981   /* Extra array for class and map names.  */
3982   ctype->class_name_ptr = (uint32_t *) xmalloc (ctype->nr_charclass
3983                                                 * sizeof (uint32_t));
3984   ctype->map_name_ptr = (uint32_t *) xmalloc (ctype->map_collection_nr
3985                                               * sizeof (uint32_t));
3986
3987   ctype->class_offset = _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
3988   ctype->map_offset = ctype->class_offset + ctype->nr_charclass;
3989
3990   /* Array for width information.  Because the expected widths are very
3991      small (never larger than 2) we use only one single byte.  This
3992      saves space.
3993      We put only printable characters in the table.  wcwidth is specified
3994      to return -1 for non-printable characters.  Doing the check here
3995      saves a run-time check.
3996      But we put L'\0' in the table.  This again saves a run-time check.  */
3997   {
3998     struct wcwidth_table t;
3999
4000     t.p = 7;
4001     t.q = 9;
4002     wcwidth_table_init (&t);
4003
4004     /* First set all the printable characters of the character set to
4005        the default width.  */
4006     curs = NULL;
4007     while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
4008       {
4009         struct charseq *data = (struct charseq *) vdata;
4010
4011         if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
4012           data->ucs4 = repertoire_find_value (ctype->repertoire,
4013                                               data->name, len);
4014
4015         if (data->ucs4 != ILLEGAL_CHAR_VALUE)
4016           {
4017             uint32_t *class_bits =
4018               find_idx (ctype, &ctype->class_collection, NULL,
4019                         &ctype->class_collection_act, data->ucs4);
4020
4021             if (class_bits != NULL && (*class_bits & BITw (tok_print)))
4022               wcwidth_table_add (&t, data->ucs4, charmap->width_default);
4023           }
4024       }
4025
4026     /* Now add the explicitly specified widths.  */
4027     if (charmap->width_rules != NULL)
4028       {
4029         size_t cnt;
4030
4031         for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
4032           {
4033             unsigned char bytes[charmap->mb_cur_max];
4034             int nbytes = charmap->width_rules[cnt].from->nbytes;
4035
4036             /* We have the range of character for which the width is
4037                specified described using byte sequences of the multibyte
4038                charset.  We have to convert this to UCS4 now.  And we
4039                cannot simply convert the beginning and the end of the
4040                sequence, we have to iterate over the byte sequence and
4041                convert it for every single character.  */
4042             memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
4043
4044             while (nbytes < charmap->width_rules[cnt].to->nbytes
4045                    || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
4046                               nbytes) <= 0)
4047               {
4048                 /* Find the UCS value for `bytes'.  */
4049                 int inner;
4050                 uint32_t wch;
4051                 struct charseq *seq =
4052                   charmap_find_symbol (charmap, (char *) bytes, nbytes);
4053
4054                 if (seq == NULL)
4055                   wch = ILLEGAL_CHAR_VALUE;
4056                 else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
4057                   wch = seq->ucs4;
4058                 else
4059                   wch = repertoire_find_value (ctype->repertoire, seq->name,
4060                                                strlen (seq->name));
4061
4062                 if (wch != ILLEGAL_CHAR_VALUE)
4063                   {
4064                     /* Store the value.  */
4065                     uint32_t *class_bits =
4066                       find_idx (ctype, &ctype->class_collection, NULL,
4067                                 &ctype->class_collection_act, wch);
4068
4069                     if (class_bits != NULL && (*class_bits & BITw (tok_print)))
4070                       wcwidth_table_add (&t, wch,
4071                                          charmap->width_rules[cnt].width);
4072                   }
4073
4074                 /* "Increment" the bytes sequence.  */
4075                 inner = nbytes - 1;
4076                 while (inner >= 0 && bytes[inner] == 0xff)
4077                   --inner;
4078
4079                 if (inner < 0)
4080                   {
4081                     /* We have to extend the byte sequence.  */
4082                     if (nbytes >= charmap->width_rules[cnt].to->nbytes)
4083                       break;
4084
4085                     bytes[0] = 1;
4086                     memset (&bytes[1], 0, nbytes);
4087                     ++nbytes;
4088                   }
4089                 else
4090                   {
4091                     ++bytes[inner];
4092                     while (++inner < nbytes)
4093                       bytes[inner] = 0;
4094                   }
4095               }
4096           }
4097       }
4098
4099     /* Set the width of L'\0' to 0.  */
4100     wcwidth_table_add (&t, 0, 0);
4101
4102     wcwidth_table_finalize (&t);
4103
4104     if (verbose)
4105       WITH_CUR_LOCALE (fprintf (stderr, _("%s: table for width: %lu bytes\n"),
4106                                "LC_CTYPE", (unsigned long int) t.result_size));
4107
4108     ctype->width.iov_base = t.result;
4109     ctype->width.iov_len = t.result_size;
4110   }
4111
4112   /* Set MB_CUR_MAX.  */
4113   ctype->mb_cur_max = charmap->mb_cur_max;
4114
4115   /* Now determine the table for the transliteration information.
4116
4117      XXX It is not yet clear to me whether it is worth implementing a
4118      complicated algorithm which uses a hash table to locate the entries.
4119      For now I'll use a simple array which can be searching using binary
4120      search.  */
4121   if (ctype->translit_include != NULL)
4122     /* Traverse the locales mentioned in the `include' statements in a
4123        depth-first way and fold in their transliteration information.  */
4124     translit_flatten (ctype, charmap, &ctype->translit);
4125
4126   if (ctype->translit != NULL)
4127     {
4128       /* First count how many entries we have.  This is the upper limit
4129          since some entries from the included files might be overwritten.  */
4130       size_t number = 0;
4131       size_t cnt;
4132       struct translit_t *runp = ctype->translit;
4133       struct translit_t **sorted;
4134       size_t from_len, to_len;
4135
4136       while (runp != NULL)
4137         {
4138           ++number;
4139           runp = runp->next;
4140         }
4141
4142       /* Next we allocate an array large enough and fill in the values.  */
4143       sorted = (struct translit_t **) alloca (number
4144                                               * sizeof (struct translit_t **));
4145       runp = ctype->translit;
4146       number = 0;
4147       do
4148         {
4149           /* Search for the place where to insert this string.
4150              XXX Better use a real sorting algorithm later.  */
4151           size_t idx = 0;
4152           int replace = 0;
4153
4154           while (idx < number)
4155             {
4156               int res = wcscmp ((const wchar_t *) sorted[idx]->from,
4157                                 (const wchar_t *) runp->from);
4158               if (res == 0)
4159                 {
4160                   replace = 1;
4161                   break;
4162                 }
4163               if (res > 0)
4164                 break;
4165               ++idx;
4166             }
4167
4168           if (replace)
4169             sorted[idx] = runp;
4170           else
4171             {
4172               memmove (&sorted[idx + 1], &sorted[idx],
4173                        (number - idx) * sizeof (struct translit_t *));
4174               sorted[idx] = runp;
4175               ++number;
4176             }
4177
4178           runp = runp->next;
4179         }
4180       while (runp != NULL);
4181
4182       /* The next step is putting all the possible transliteration
4183          strings in one memory block so that we can write it out.
4184          We need several different blocks:
4185          - index to the from-string array
4186          - from-string array
4187          - index to the to-string array
4188          - to-string array.
4189       */
4190       from_len = to_len = 0;
4191       for (cnt = 0; cnt < number; ++cnt)
4192         {
4193           struct translit_to_t *srunp;
4194           from_len += wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
4195           srunp = sorted[cnt]->to;
4196           while (srunp != NULL)
4197             {
4198               to_len += wcslen ((const wchar_t *) srunp->str) + 1;
4199               srunp = srunp->next;
4200             }
4201           /* Plus one for the extra NUL character marking the end of
4202              the list for the current entry.  */
4203           ++to_len;
4204         }
4205
4206       /* We can allocate the arrays for the results.  */
4207       ctype->translit_from_idx = xmalloc (number * sizeof (uint32_t));
4208       ctype->translit_from_tbl = xmalloc (from_len * sizeof (uint32_t));
4209       ctype->translit_to_idx = xmalloc (number * sizeof (uint32_t));
4210       ctype->translit_to_tbl = xmalloc (to_len * sizeof (uint32_t));
4211
4212       from_len = 0;
4213       to_len = 0;
4214       for (cnt = 0; cnt < number; ++cnt)
4215         {
4216           size_t len;
4217           struct translit_to_t *srunp;
4218
4219           ctype->translit_from_idx[cnt] = from_len;
4220           ctype->translit_to_idx[cnt] = to_len;
4221
4222           len = wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
4223           wmemcpy ((wchar_t *) &ctype->translit_from_tbl[from_len],
4224                    (const wchar_t *) sorted[cnt]->from, len);
4225           from_len += len;
4226
4227           ctype->translit_to_idx[cnt] = to_len;
4228           srunp = sorted[cnt]->to;
4229           while (srunp != NULL)
4230             {
4231               len = wcslen ((const wchar_t *) srunp->str) + 1;
4232               wmemcpy ((wchar_t *) &ctype->translit_to_tbl[to_len],
4233                        (const wchar_t *) srunp->str, len);
4234               to_len += len;
4235               srunp = srunp->next;
4236             }
4237           ctype->translit_to_tbl[to_len++] = L'\0';
4238         }
4239
4240       /* Store the information about the length.  */
4241       ctype->translit_idx_size = number;
4242       ctype->translit_from_tbl_size = from_len * sizeof (uint32_t);
4243       ctype->translit_to_tbl_size = to_len * sizeof (uint32_t);
4244     }
4245   else
4246     {
4247       /* Provide some dummy pointers since we have nothing to write out.  */
4248       static uint32_t no_str = { 0 };
4249
4250       ctype->translit_from_idx = &no_str;
4251       ctype->translit_from_tbl = &no_str;
4252       ctype->translit_to_tbl = &no_str;
4253       ctype->translit_idx_size = 0;
4254       ctype->translit_from_tbl_size = 0;
4255       ctype->translit_to_tbl_size = 0;
4256     }
4257 }