locale/programs/ld-ctype.c

   1 /* Copyright (C) 1995-2018 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3    Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
   4
   5    This program is free software; you can redistribute it and/or modify
   6    it under the terms of the GNU General Public License as published
   7    by the Free Software Foundation; version 2 of the License, or
   8    (at your option) any later version.
   9
  10    This program is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13    GNU General Public License for more details.
  14
  15    You should have received a copy of the GNU General Public License
  16    along with this program; if not, see <http://www.gnu.org/licenses/>.  */
  17
  18 #ifdef HAVE_CONFIG_H
  19 # include <config.h>
  20 #endif
  21
  22 #include <alloca.h>
  23 #include <byteswap.h>
  24 #include <endian.h>
  25 #include <errno.h>
  26 #include <limits.h>
  27 #include <obstack.h>
  28 #include <stdlib.h>
  29 #include <string.h>
  30 #include <wchar.h>
  31 #include <wctype.h>
  32 #include <stdint.h>
  33 #include <sys/uio.h>
  34
  35 #include "localedef.h"
  36 #include "charmap.h"
  37 #include "localeinfo.h"
  38 #include "langinfo.h"
  39 #include "linereader.h"
  40 #include "locfile-token.h"
  41 #include "locfile.h"
  42
  43 #include <assert.h>
  44
  45
  46 /* The bit used for representing a special class.  */
  47 #define BITPOS(class) ((class) - tok_upper)
  48 #define BIT(class) (_ISbit (BITPOS (class)))
  49 #define BITw(class) (_ISwbit (BITPOS (class)))
  50
  51 #define ELEM(ctype, collection, idx, value)                                   \
  52   *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx,     \
  53              &ctype->collection##_act idx, value)
  54
  55
  56 /* To be compatible with former implementations we for now restrict
  57    the number of bits for character classes to 16.  When compatibility
  58    is not necessary anymore increase the number to 32.  */
  59 #define char_class_t uint16_t
  60 #define char_class32_t uint32_t
  61
  62
  63 /* Type to describe a transliteration action.  We have a possibly
  64    multiple character from-string and a set of multiple character
  65    to-strings.  All are 32bit values since this is what is used in
  66    the gconv functions.  */
  67 struct translit_to_t
  68 {
  69   uint32_t *str;
  70
  71   struct translit_to_t *next;
  72 };
  73
  74 struct translit_t
  75 {
  76   uint32_t *from;
  77
  78   const char *fname;
  79   size_t lineno;
  80
  81   struct translit_to_t *to;
  82
  83   struct translit_t *next;
  84 };
  85
  86 struct translit_ignore_t
  87 {
  88   uint32_t from;
  89   uint32_t to;
  90   uint32_t step;
  91
  92   const char *fname;
  93   size_t lineno;
  94
  95   struct translit_ignore_t *next;
  96 };
  97
  98
  99 /* Type to describe a transliteration include statement.  */
 100 struct translit_include_t
 101 {
 102   const char *copy_locale;
 103   const char *copy_repertoire;
 104
 105   struct translit_include_t *next;
 106 };
 107
 108 /* Provide some dummy pointer for empty string.  */
 109 static uint32_t no_str[] = { 0 };
 110
 111
 112 /* Sparse table of uint32_t.  */
 113 #define TABLE idx_table
 114 #define ELEMENT uint32_t
 115 #define DEFAULT ((uint32_t) ~0)
 116 #define NO_ADD_LOCALE
 117 #include "3level.h"
 118
 119 #define TABLE wcwidth_table
 120 #define ELEMENT uint8_t
 121 #define DEFAULT 0xff
 122 #include "3level.h"
 123
 124 #define TABLE wctrans_table
 125 #define ELEMENT int32_t
 126 #define DEFAULT 0
 127 #define wctrans_table_add wctrans_table_add_internal
 128 #include "3level.h"
 129 #undef wctrans_table_add
 130 /* The wctrans_table must actually store the difference between the
 131    desired result and the argument.  */
 132 static inline void
 133 wctrans_table_add (struct wctrans_table *t, uint32_t wc, uint32_t mapped_wc)
 134 {
 135   wctrans_table_add_internal (t, wc, mapped_wc - wc);
 136 }
 137
 138 /* Construction of sparse 3-level tables.
 139    See wchar-lookup.h for their structure and the meaning of p and q.  */
 140
 141 struct wctype_table
 142 {
 143   /* Parameters.  */
 144   unsigned int p;
 145   unsigned int q;
 146   /* Working representation.  */
 147   size_t level1_alloc;
 148   size_t level1_size;
 149   uint32_t *level1;
 150   size_t level2_alloc;
 151   size_t level2_size;
 152   uint32_t *level2;
 153   size_t level3_alloc;
 154   size_t level3_size;
 155   uint32_t *level3;
 156   size_t result_size;
 157 };
 158
 159 static void add_locale_wctype_table (struct locale_file *file,
 160                                      struct wctype_table *t);
 161
 162 /* The real definition of the struct for the LC_CTYPE locale.  */
 163 struct locale_ctype_t
 164 {
 165   uint32_t *charnames;
 166   size_t charnames_max;
 167   size_t charnames_act;
 168   /* An index lookup table, to speedup find_idx.  */
 169   struct idx_table charnames_idx;
 170
 171   struct repertoire_t *repertoire;
 172
 173   /* We will allow up to 8 * sizeof (uint32_t) character classes.  */
 174 #define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
 175   size_t nr_charclass;
 176   const char *classnames[MAX_NR_CHARCLASS];
 177   uint32_t last_class_char;
 178   uint32_t class256_collection[256];
 179   uint32_t *class_collection;
 180   size_t class_collection_max;
 181   size_t class_collection_act;
 182   uint32_t class_done;
 183   uint32_t class_offset;
 184
 185   struct charseq **mbdigits;
 186   size_t mbdigits_act;
 187   size_t mbdigits_max;
 188   uint32_t *wcdigits;
 189   size_t wcdigits_act;
 190   size_t wcdigits_max;
 191
 192   struct charseq *mboutdigits[10];
 193   uint32_t wcoutdigits[10];
 194   size_t outdigits_act;
 195
 196   /* If the following number ever turns out to be too small simply
 197      increase it.  But I doubt it will.  --drepper@gnu */
 198 #define MAX_NR_CHARMAP 16
 199   const char *mapnames[MAX_NR_CHARMAP];
 200   uint32_t *map_collection[MAX_NR_CHARMAP];
 201   uint32_t map256_collection[2][256];
 202   size_t map_collection_max[MAX_NR_CHARMAP];
 203   size_t map_collection_act[MAX_NR_CHARMAP];
 204   size_t map_collection_nr;
 205   size_t last_map_idx;
 206   int tomap_done[MAX_NR_CHARMAP];
 207   uint32_t map_offset;
 208
 209   /* Transliteration information.  */
 210   struct translit_include_t *translit_include;
 211   struct translit_t *translit;
 212   struct translit_ignore_t *translit_ignore;
 213   uint32_t ntranslit_ignore;
 214
 215   uint32_t *default_missing;
 216   const char *default_missing_file;
 217   size_t default_missing_lineno;
 218
 219   uint32_t to_nonascii;
 220   uint32_t nonascii_case;
 221
 222   /* The arrays for the binary representation.  */
 223   char_class_t *ctype_b;
 224   char_class32_t *ctype32_b;
 225   uint32_t **map_b;
 226   uint32_t **map32_b;
 227   uint32_t **class_b;
 228   struct wctype_table *class_3level;
 229   struct wctrans_table *map_3level;
 230   uint32_t *class_name_ptr;
 231   uint32_t *map_name_ptr;
 232   struct wcwidth_table width;
 233   uint32_t mb_cur_max;
 234   const char *codeset_name;
 235   uint32_t *translit_from_idx;
 236   uint32_t *translit_from_tbl;
 237   uint32_t *translit_to_idx;
 238   uint32_t *translit_to_tbl;
 239   uint32_t translit_idx_size;
 240   size_t translit_from_tbl_size;
 241   size_t translit_to_tbl_size;
 242
 243   struct obstack mempool;
 244 };
 245
 246
 247 /* Marker for an empty slot.  This has the value 0xFFFFFFFF, regardless
 248    whether 'int' is 16 bit, 32 bit, or 64 bit.  */
 249 #define EMPTY ((uint32_t) ~0)
 250
 251
 252 #define obstack_chunk_alloc xmalloc
 253 #define obstack_chunk_free free
 254
 255
 256 /* Prototypes for local functions.  */
 257 static void ctype_startup (struct linereader *lr, struct localedef_t *locale,
 258                            const struct charmap_t *charmap,
 259                            struct localedef_t *copy_locale,
 260                            int ignore_content);
 261 static void ctype_class_new (struct linereader *lr,
 262                              struct locale_ctype_t *ctype, const char *name);
 263 static void ctype_map_new (struct linereader *lr,
 264                            struct locale_ctype_t *ctype,
 265                            const char *name, const struct charmap_t *charmap);
 266 static uint32_t *find_idx (struct locale_ctype_t *ctype, uint32_t **table,
 267                            size_t *max, size_t *act, uint32_t idx);
 268 static void set_class_defaults (struct locale_ctype_t *ctype,
 269                                 const struct charmap_t *charmap,
 270                                 struct repertoire_t *repertoire);
 271 static void allocate_arrays (struct locale_ctype_t *ctype,
 272                              const struct charmap_t *charmap,
 273                              struct repertoire_t *repertoire);
 274
 275
 276 static const char *longnames[] =
 277 {
 278   "zero", "one", "two", "three", "four",
 279   "five", "six", "seven", "eight", "nine"
 280 };
 281 static const char *uninames[] =
 282 {
 283   "U00000030", "U00000031", "U00000032", "U00000033", "U00000034",
 284   "U00000035", "U00000036", "U00000037", "U00000038", "U00000039"
 285 };
 286 static const unsigned char digits[] = "0123456789";
 287
 288
 289 static void
 290 ctype_startup (struct linereader *lr, struct localedef_t *locale,
 291                const struct charmap_t *charmap,
 292                struct localedef_t *copy_locale, int ignore_content)
 293 {
 294   unsigned int cnt;
 295   struct locale_ctype_t *ctype;
 296
 297   if (!ignore_content && locale->categories[LC_CTYPE].ctype == NULL)
 298     {
 299       if (copy_locale == NULL)
 300         {
 301           /* Allocate the needed room.  */
 302           locale->categories[LC_CTYPE].ctype = ctype =
 303             (struct locale_ctype_t *) xcalloc (1,
 304                                                sizeof (struct locale_ctype_t));
 305
 306           /* We have seen no names yet.  */
 307           ctype->charnames_max = charmap->mb_cur_max == 1 ? 256 : 512;
 308           ctype->charnames = (uint32_t *) xmalloc (ctype->charnames_max
 309                                                    * sizeof (uint32_t));
 310           for (cnt = 0; cnt < 256; ++cnt)
 311             ctype->charnames[cnt] = cnt;
 312           ctype->charnames_act = 256;
 313           idx_table_init (&ctype->charnames_idx);
 314
 315           /* Fill character class information.  */
 316           ctype->last_class_char = ILLEGAL_CHAR_VALUE;
 317           /* The order of the following instructions determines the bit
 318              positions!  */
 319           ctype_class_new (lr, ctype, "upper");
 320           ctype_class_new (lr, ctype, "lower");
 321           ctype_class_new (lr, ctype, "alpha");
 322           ctype_class_new (lr, ctype, "digit");
 323           ctype_class_new (lr, ctype, "xdigit");
 324           ctype_class_new (lr, ctype, "space");
 325           ctype_class_new (lr, ctype, "print");
 326           ctype_class_new (lr, ctype, "graph");
 327           ctype_class_new (lr, ctype, "blank");
 328           ctype_class_new (lr, ctype, "cntrl");
 329           ctype_class_new (lr, ctype, "punct");
 330           ctype_class_new (lr, ctype, "alnum");
 331
 332           ctype->class_collection_max = charmap->mb_cur_max == 1 ? 256 : 512;
 333           ctype->class_collection
 334             = (uint32_t *) xcalloc (sizeof (unsigned long int),
 335                                     ctype->class_collection_max);
 336           ctype->class_collection_act = 256;
 337
 338           /* Fill character map information.  */
 339           ctype->last_map_idx = MAX_NR_CHARMAP;
 340           ctype_map_new (lr, ctype, "toupper", charmap);
 341           ctype_map_new (lr, ctype, "tolower", charmap);
 342
 343           /* Fill first 256 entries in `toXXX' arrays.  */
 344           for (cnt = 0; cnt < 256; ++cnt)
 345             {
 346               ctype->map_collection[0][cnt] = cnt;
 347               ctype->map_collection[1][cnt] = cnt;
 348
 349               ctype->map256_collection[0][cnt] = cnt;
 350               ctype->map256_collection[1][cnt] = cnt;
 351             }
 352
 353           if (enc_not_ascii_compatible)
 354             ctype->to_nonascii = 1;
 355
 356           obstack_init (&ctype->mempool);
 357         }
 358       else
 359         ctype = locale->categories[LC_CTYPE].ctype =
 360           copy_locale->categories[LC_CTYPE].ctype;
 361     }
 362 }
 363
 364
 365 void
 366 ctype_finish (struct localedef_t *locale, const struct charmap_t *charmap)
 367 {
 368   /* See POSIX.2, table 2-6 for the meaning of the following table.  */
 369 #define NCLASS 12
 370   static const struct
 371   {
 372     const char *name;
 373     const char allow[NCLASS];
 374   }
 375   valid_table[NCLASS] =
 376   {
 377     /* The order is important.  See token.h for more information.
 378        M = Always, D = Default, - = Permitted, X = Mutually exclusive  */
 379     { "upper",  "--MX-XDDXXX-" },
 380     { "lower",  "--MX-XDDXXX-" },
 381     { "alpha",  "---X-XDDXXX-" },
 382     { "digit",  "XXX--XDDXXX-" },
 383     { "xdigit", "-----XDDXXX-" },
 384     { "space",  "XXXXX------X" },
 385     { "print",  "---------X--" },
 386     { "graph",  "---------X--" },
 387     { "blank",  "XXXXXM-----X" },
 388     { "cntrl",  "XXXXX-XX--XX" },
 389     { "punct",  "XXXXX-DD-X-X" },
 390     { "alnum",  "-----XDDXXX-" }
 391   };
 392   size_t cnt;
 393   int cls1, cls2;
 394   uint32_t space_value;
 395   struct charseq *space_seq;
 396   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 397   int warned;
 398   const void *key;
 399   size_t len;
 400   void *vdata;
 401   void *curs;
 402
 403   /* Now resolve copying and also handle completely missing definitions.  */
 404   if (ctype == NULL)
 405     {
 406       const char *repertoire_name;
 407
 408       /* First see whether we were supposed to copy.  If yes, find the
 409          actual definition.  */
 410       if (locale->copy_name[LC_CTYPE] != NULL)
 411         {
 412           /* Find the copying locale.  This has to happen transitively since
 413              the locale we are copying from might also copying another one.  */
 414           struct localedef_t *from = locale;
 415
 416           do
 417             from = find_locale (LC_CTYPE, from->copy_name[LC_CTYPE],
 418                                 from->repertoire_name, charmap);
 419           while (from->categories[LC_CTYPE].ctype == NULL
 420                  && from->copy_name[LC_CTYPE] != NULL);
 421
 422           ctype = locale->categories[LC_CTYPE].ctype
 423             = from->categories[LC_CTYPE].ctype;
 424         }
 425
 426       /* If there is still no definition issue an warning and create an
 427          empty one.  */
 428       if (ctype == NULL)
 429         {
 430           record_warning (_("\
 431 No definition for %s category found"), "LC_CTYPE");
 432           ctype_startup (NULL, locale, charmap, NULL, 0);
 433           ctype = locale->categories[LC_CTYPE].ctype;
 434         }
 435
 436       /* Get the repertoire we have to use.  */
 437       repertoire_name = locale->repertoire_name ?: repertoire_global;
 438       if (repertoire_name != NULL)
 439         ctype->repertoire = repertoire_read (repertoire_name);
 440     }
 441
 442   /* We need the name of the currently used 8-bit character set to
 443      make correct conversion between this 8-bit representation and the
 444      ISO 10646 character set used internally for wide characters.  */
 445   ctype->codeset_name = charmap->code_set_name;
 446   if (ctype->codeset_name == NULL)
 447     {
 448       record_error (0, 0, _("\
 449 No character set name specified in charmap"));
 450       ctype->codeset_name = "//UNKNOWN//";
 451     }
 452
 453   /* Set default value for classes not specified.  */
 454   set_class_defaults (ctype, charmap, ctype->repertoire);
 455
 456   /* Check according to table.  */
 457   for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
 458     {
 459       uint32_t tmp = ctype->class_collection[cnt];
 460
 461       if (tmp != 0)
 462         {
 463           for (cls1 = 0; cls1 < NCLASS; ++cls1)
 464             if ((tmp & _ISwbit (cls1)) != 0)
 465               for (cls2 = 0; cls2 < NCLASS; ++cls2)
 466                 if (valid_table[cls1].allow[cls2] != '-')
 467                   {
 468                     int eq = (tmp & _ISwbit (cls2)) != 0;
 469                     switch (valid_table[cls1].allow[cls2])
 470                       {
 471                       case 'M':
 472                         if (!eq)
 473                           {
 474                             uint32_t value = ctype->charnames[cnt];
 475
 476                             record_error (0, 0, _("\
 477 character L'\\u%0*x' in class `%s' must be in class `%s'"),
 478                                           value > 0xffff ? 8 : 4,
 479                                           value,
 480                                           valid_table[cls1].name,
 481                                           valid_table[cls2].name);
 482                           }
 483                         break;
 484
 485                       case 'X':
 486                         if (eq)
 487                           {
 488                             uint32_t value = ctype->charnames[cnt];
 489
 490                             record_error (0, 0, _("\
 491 character L'\\u%0*x' in class `%s' must not be in class `%s'"),
 492                                           value > 0xffff ? 8 : 4,
 493                                           value,
 494                                           valid_table[cls1].name,
 495                                           valid_table[cls2].name);
 496                           }
 497                         break;
 498
 499                       case 'D':
 500                         ctype->class_collection[cnt] |= _ISwbit (cls2);
 501                         break;
 502
 503                       default:
 504                         record_error (5, 0, _("\
 505 internal error in %s, line %u"), __FUNCTION__, __LINE__);
 506                       }
 507                   }
 508         }
 509     }
 510
 511   for (cnt = 0; cnt < 256; ++cnt)
 512     {
 513       uint32_t tmp = ctype->class256_collection[cnt];
 514
 515       if (tmp != 0)
 516         {
 517           for (cls1 = 0; cls1 < NCLASS; ++cls1)
 518             if ((tmp & _ISbit (cls1)) != 0)
 519               for (cls2 = 0; cls2 < NCLASS; ++cls2)
 520                 if (valid_table[cls1].allow[cls2] != '-')
 521                   {
 522                     int eq = (tmp & _ISbit (cls2)) != 0;
 523                     switch (valid_table[cls1].allow[cls2])
 524                       {
 525                       case 'M':
 526                         if (!eq)
 527                           {
 528                             char buf[17];
 529
 530                             snprintf (buf, sizeof buf, "\\%Zo", cnt);
 531
 532                             record_error (0, 0, _("\
 533 character '%s' in class `%s' must be in class `%s'"),
 534                                           buf,
 535                                           valid_table[cls1].name,
 536                                           valid_table[cls2].name);
 537                           }
 538                         break;
 539
 540                       case 'X':
 541                         if (eq)
 542                           {
 543                             char buf[17];
 544
 545                             snprintf (buf, sizeof buf, "\\%Zo", cnt);
 546
 547                             record_error (0, 0, _("\
 548 character '%s' in class `%s' must not be in class `%s'"),
 549                                           buf,
 550                                           valid_table[cls1].name,
 551                                           valid_table[cls2].name);
 552                           }
 553                         break;
 554
 555                       case 'D':
 556                         ctype->class256_collection[cnt] |= _ISbit (cls2);
 557                         break;
 558
 559                       default:
 560                         record_error (5, 0, _("\
 561 internal error in %s, line %u"), __FUNCTION__, __LINE__);
 562                       }
 563                   }
 564         }
 565     }
 566
 567   /* ... and now test <SP> as a special case.  */
 568   space_value = 32;
 569   if (((cnt = BITPOS (tok_space),
 570         (ELEM (ctype, class_collection, , space_value)
 571          & BITw (tok_space)) == 0)
 572        || (cnt = BITPOS (tok_blank),
 573            (ELEM (ctype, class_collection, , space_value)
 574             & BITw (tok_blank)) == 0)))
 575     {
 576       record_error (0, 0, _("<SP> character not in class `%s'"),
 577                     valid_table[cnt].name);
 578     }
 579   else if (((cnt = BITPOS (tok_punct),
 580              (ELEM (ctype, class_collection, , space_value)
 581               & BITw (tok_punct)) != 0)
 582             || (cnt = BITPOS (tok_graph),
 583                 (ELEM (ctype, class_collection, , space_value)
 584                  & BITw (tok_graph))
 585                 != 0)))
 586     {
 587       record_error (0, 0, _("\
 588 <SP> character must not be in class `%s'"),
 589                                 valid_table[cnt].name);
 590     }
 591   else
 592     ELEM (ctype, class_collection, , space_value) |= BITw (tok_print);
 593
 594   space_seq = charmap_find_value (charmap, "SP", 2);
 595   if (space_seq == NULL)
 596     space_seq = charmap_find_value (charmap, "space", 5);
 597   if (space_seq == NULL)
 598     space_seq = charmap_find_value (charmap, "U00000020", 9);
 599   if (space_seq == NULL || space_seq->nbytes != 1)
 600     {
 601       record_error (0, 0, _("\
 602 character <SP> not defined in character map"));
 603     }
 604   else if (((cnt = BITPOS (tok_space),
 605              (ctype->class256_collection[space_seq->bytes[0]]
 606               & BIT (tok_space)) == 0)
 607             || (cnt = BITPOS (tok_blank),
 608                 (ctype->class256_collection[space_seq->bytes[0]]
 609                  & BIT (tok_blank)) == 0)))
 610     {
 611        record_error (0, 0, _("<SP> character not in class `%s'"),
 612                      valid_table[cnt].name);
 613     }
 614   else if (((cnt = BITPOS (tok_punct),
 615              (ctype->class256_collection[space_seq->bytes[0]]
 616               & BIT (tok_punct)) != 0)
 617             || (cnt = BITPOS (tok_graph),
 618                 (ctype->class256_collection[space_seq->bytes[0]]
 619                  & BIT (tok_graph)) != 0)))
 620     {
 621       record_error (0, 0, _("\
 622 <SP> character must not be in class `%s'"),
 623                     valid_table[cnt].name);
 624     }
 625   else
 626     ctype->class256_collection[space_seq->bytes[0]] |= BIT (tok_print);
 627
 628   /* Check whether all single-byte characters make to their upper/lowercase
 629      equivalent according to the ASCII rules.  */
 630   for (cnt = 'A'; cnt <= 'Z'; ++cnt)
 631     {
 632       uint32_t uppval = ctype->map256_collection[0][cnt];
 633       uint32_t lowval = ctype->map256_collection[1][cnt];
 634       uint32_t lowuppval = ctype->map256_collection[0][lowval];
 635       uint32_t lowlowval = ctype->map256_collection[1][lowval];
 636
 637       if (uppval != cnt
 638           || lowval != cnt + 0x20
 639           || lowuppval != cnt
 640           || lowlowval != cnt + 0x20)
 641         ctype->nonascii_case = 1;
 642     }
 643   for (cnt = 0; cnt < 256; ++cnt)
 644     if (cnt < 'A' || (cnt > 'Z' && cnt < 'a') || cnt > 'z')
 645       if (ctype->map256_collection[0][cnt] != cnt
 646           || ctype->map256_collection[1][cnt] != cnt)
 647         ctype->nonascii_case = 1;
 648
 649   /* Now that the tests are done make sure the name array contains all
 650      characters which are handled in the WIDTH section of the
 651      character set definition file.  */
 652   if (charmap->width_rules != NULL)
 653     for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
 654       {
 655         unsigned char bytes[charmap->mb_cur_max];
 656         int nbytes = charmap->width_rules[cnt].from->nbytes;
 657
 658         /* We have the range of character for which the width is
 659            specified described using byte sequences of the multibyte
 660            charset.  We have to convert this to UCS4 now.  And we
 661            cannot simply convert the beginning and the end of the
 662            sequence, we have to iterate over the byte sequence and
 663            convert it for every single character.  */
 664         memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
 665
 666         while (nbytes < charmap->width_rules[cnt].to->nbytes
 667                || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
 668                           nbytes) <= 0)
 669           {
 670             /* Find the UCS value for `bytes'.  */
 671             int inner;
 672             uint32_t wch;
 673             struct charseq *seq
 674               = charmap_find_symbol (charmap, (char *) bytes, nbytes);
 675
 676             if (seq == NULL)
 677               wch = ILLEGAL_CHAR_VALUE;
 678             else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
 679               wch = seq->ucs4;
 680             else
 681               wch = repertoire_find_value (ctype->repertoire, seq->name,
 682                                            strlen (seq->name));
 683
 684             if (wch != ILLEGAL_CHAR_VALUE)
 685               /* We are only interested in the side-effects of the
 686                  `find_idx' call.  It will add appropriate entries in
 687                  the name array if this is necessary.  */
 688               (void) find_idx (ctype, NULL, NULL, NULL, wch);
 689
 690             /* "Increment" the bytes sequence.  */
 691             inner = nbytes - 1;
 692             while (inner >= 0 && bytes[inner] == 0xff)
 693               --inner;
 694
 695             if (inner < 0)
 696               {
 697                 /* We have to extend the byte sequence.  */
 698                 if (nbytes >= charmap->width_rules[cnt].to->nbytes)
 699                   break;
 700
 701                 bytes[0] = 1;
 702                 memset (&bytes[1], 0, nbytes);
 703                 ++nbytes;
 704               }
 705             else
 706               {
 707                 ++bytes[inner];
 708                 while (++inner < nbytes)
 709                   bytes[inner] = 0;
 710               }
 711           }
 712       }
 713
 714   /* Now set all the other characters of the character set to the
 715      default width.  */
 716   curs = NULL;
 717   while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
 718     {
 719       struct charseq *data = (struct charseq *) vdata;
 720
 721       if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
 722         data->ucs4 = repertoire_find_value (ctype->repertoire,
 723                                             data->name, len);
 724
 725       if (data->ucs4 != ILLEGAL_CHAR_VALUE)
 726         (void) find_idx (ctype, NULL, NULL, NULL, data->ucs4);
 727     }
 728
 729   /* There must be a multiple of 10 digits.  */
 730   if (ctype->mbdigits_act % 10 != 0)
 731     {
 732       assert (ctype->mbdigits_act == ctype->wcdigits_act);
 733       ctype->wcdigits_act -= ctype->mbdigits_act % 10;
 734       ctype->mbdigits_act -= ctype->mbdigits_act % 10;
 735       record_error (0, 0, _("\
 736 `digit' category has not entries in groups of ten"));
 737     }
 738
 739   /* Check the input digits.  There must be a multiple of ten available.
 740      In each group it could be that one or the other character is missing.
 741      In this case the whole group must be removed.  */
 742   cnt = 0;
 743   while (cnt < ctype->mbdigits_act)
 744     {
 745       size_t inner;
 746       for (inner = 0; inner < 10; ++inner)
 747         if (ctype->mbdigits[cnt + inner] == NULL)
 748           break;
 749
 750       if (inner == 10)
 751         cnt += 10;
 752       else
 753         {
 754           /* Remove the group.  */
 755           memmove (&ctype->mbdigits[cnt], &ctype->mbdigits[cnt + 10],
 756                    ((ctype->wcdigits_act - cnt - 10)
 757                     * sizeof (ctype->mbdigits[0])));
 758           ctype->mbdigits_act -= 10;
 759         }
 760     }
 761
 762   /* If no input digits are given use the default.  */
 763   if (ctype->mbdigits_act == 0)
 764     {
 765       if (ctype->mbdigits_max == 0)
 766         {
 767           ctype->mbdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
 768                                            10 * sizeof (struct charseq *));
 769           ctype->mbdigits_max = 10;
 770         }
 771
 772       for (cnt = 0; cnt < 10; ++cnt)
 773         {
 774           ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
 775                                                       (char *) digits + cnt, 1);
 776           if (ctype->mbdigits[cnt] == NULL)
 777             {
 778               ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
 779                                                           longnames[cnt],
 780                                                           strlen (longnames[cnt]));
 781               if (ctype->mbdigits[cnt] == NULL)
 782                 {
 783                   /* Hum, this ain't good.  */
 784                   record_error (0, 0, _("\
 785 no input digits defined and none of the standard names in the charmap"));
 786
 787                   ctype->mbdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
 788                                                         sizeof (struct charseq) + 1);
 789
 790                   /* This is better than nothing.  */
 791                   ctype->mbdigits[cnt]->bytes[0] = digits[cnt];
 792                   ctype->mbdigits[cnt]->nbytes = 1;
 793                 }
 794             }
 795         }
 796
 797       ctype->mbdigits_act = 10;
 798     }
 799
 800   /* Check the wide character input digits.  There must be a multiple
 801      of ten available.  In each group it could be that one or the other
 802      character is missing.  In this case the whole group must be
 803      removed.  */
 804   cnt = 0;
 805   while (cnt < ctype->wcdigits_act)
 806     {
 807       size_t inner;
 808       for (inner = 0; inner < 10; ++inner)
 809         if (ctype->wcdigits[cnt + inner] == ILLEGAL_CHAR_VALUE)
 810           break;
 811
 812       if (inner == 10)
 813         cnt += 10;
 814       else
 815         {
 816           /* Remove the group.  */
 817           memmove (&ctype->wcdigits[cnt], &ctype->wcdigits[cnt + 10],
 818                    ((ctype->wcdigits_act - cnt - 10)
 819                     * sizeof (ctype->wcdigits[0])));
 820           ctype->wcdigits_act -= 10;
 821         }
 822     }
 823
 824   /* If no input digits are given use the default.  */
 825   if (ctype->wcdigits_act == 0)
 826     {
 827       if (ctype->wcdigits_max == 0)
 828         {
 829           ctype->wcdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
 830                                            10 * sizeof (uint32_t));
 831           ctype->wcdigits_max = 10;
 832         }
 833
 834       for (cnt = 0; cnt < 10; ++cnt)
 835         ctype->wcdigits[cnt] = L'0' + cnt;
 836
 837       ctype->mbdigits_act = 10;
 838     }
 839
 840   /* Check the outdigits.  */
 841   warned = 0;
 842   for (cnt = 0; cnt < 10; ++cnt)
 843     if (ctype->mboutdigits[cnt] == NULL)
 844       {
 845         static struct charseq replace[2];
 846
 847         if (!warned)
 848           {
 849             record_error (0, 0, _("\
 850 not all characters used in `outdigit' are available in the charmap"));
 851             warned = 1;
 852           }
 853
 854         replace[0].nbytes = 1;
 855         replace[0].bytes[0] = '?';
 856         replace[0].bytes[1] = '\0';
 857         ctype->mboutdigits[cnt] = &replace[0];
 858       }
 859
 860   warned = 0;
 861   for (cnt = 0; cnt < 10; ++cnt)
 862     if (ctype->wcoutdigits[cnt] == 0)
 863       {
 864         if (!warned)
 865           {
 866             record_error (0, 0, _("\
 867 not all characters used in `outdigit' are available in the repertoire"));
 868             warned = 1;
 869           }
 870
 871         ctype->wcoutdigits[cnt] = L'?';
 872       }
 873
 874   /* Sort the entries in the translit_ignore list.  */
 875   if (ctype->translit_ignore != NULL)
 876     {
 877       struct translit_ignore_t *firstp = ctype->translit_ignore;
 878       struct translit_ignore_t *runp;
 879
 880       ctype->ntranslit_ignore = 1;
 881
 882       for (runp = firstp->next; runp != NULL; runp = runp->next)
 883         {
 884           struct translit_ignore_t *lastp = NULL;
 885           struct translit_ignore_t *cmpp;
 886
 887           ++ctype->ntranslit_ignore;
 888
 889           for (cmpp = firstp; cmpp != NULL; lastp = cmpp, cmpp = cmpp->next)
 890             if (runp->from < cmpp->from)
 891               break;
 892
 893           runp->next = lastp;
 894           if (lastp == NULL)
 895             firstp = runp;
 896         }
 897
 898       ctype->translit_ignore = firstp;
 899     }
 900 }
 901
 902
 903 void
 904 ctype_output (struct localedef_t *locale, const struct charmap_t *charmap,
 905               const char *output_path)
 906 {
 907   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 908   const size_t nelems = (_NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1)
 909                          + ctype->nr_charclass + ctype->map_collection_nr);
 910   struct locale_file file;
 911   uint32_t default_missing_len;
 912   size_t elem, cnt;
 913
 914   /* Now prepare the output: Find the sizes of the table we can use.  */
 915   allocate_arrays (ctype, charmap, ctype->repertoire);
 916
 917   default_missing_len = (ctype->default_missing
 918                          ? wcslen ((wchar_t *) ctype->default_missing)
 919                          : 0);
 920
 921   init_locale_data (&file, nelems);
 922   for (elem = 0; elem < nelems; ++elem)
 923     {
 924       if (elem < _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1))
 925         switch (elem)
 926           {
 927 #define CTYPE_EMPTY(name) \
 928           case name:                                                          \
 929             add_locale_empty (&file);                                         \
 930             break
 931
 932           CTYPE_EMPTY(_NL_CTYPE_GAP1);
 933           CTYPE_EMPTY(_NL_CTYPE_GAP2);
 934           CTYPE_EMPTY(_NL_CTYPE_GAP3);
 935           CTYPE_EMPTY(_NL_CTYPE_GAP4);
 936           CTYPE_EMPTY(_NL_CTYPE_GAP5);
 937           CTYPE_EMPTY(_NL_CTYPE_GAP6);
 938
 939 #define CTYPE_RAW_DATA(name, base, size)                                      \
 940           case _NL_ITEM_INDEX (name):                                         \
 941             add_locale_raw_data (&file, base, size);                          \
 942             break
 943
 944           CTYPE_RAW_DATA (_NL_CTYPE_CLASS,
 945                           ctype->ctype_b,
 946                           (256 + 128) * sizeof (char_class_t));
 947
 948 #define CTYPE_UINT32_ARRAY(name, base, n_elems)                               \
 949           case _NL_ITEM_INDEX (name):                                         \
 950             add_locale_uint32_array (&file, base, n_elems);                   \
 951             break
 952
 953           CTYPE_UINT32_ARRAY (_NL_CTYPE_TOUPPER, ctype->map_b[0], 256 + 128);
 954           CTYPE_UINT32_ARRAY (_NL_CTYPE_TOLOWER, ctype->map_b[1], 256 + 128);
 955           CTYPE_UINT32_ARRAY (_NL_CTYPE_TOUPPER32, ctype->map32_b[0], 256);
 956           CTYPE_UINT32_ARRAY (_NL_CTYPE_TOLOWER32, ctype->map32_b[1], 256);
 957           CTYPE_RAW_DATA (_NL_CTYPE_CLASS32,
 958                           ctype->ctype32_b,
 959                           256 * sizeof (char_class32_t));
 960
 961 #define CTYPE_UINT32(name, value)                                             \
 962           case _NL_ITEM_INDEX (name):                                         \
 963             add_locale_uint32 (&file, value);                                 \
 964             break
 965
 966           CTYPE_UINT32 (_NL_CTYPE_CLASS_OFFSET, ctype->class_offset);
 967           CTYPE_UINT32 (_NL_CTYPE_MAP_OFFSET, ctype->map_offset);
 968           CTYPE_UINT32 (_NL_CTYPE_TRANSLIT_TAB_SIZE, ctype->translit_idx_size);
 969
 970           CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_FROM_IDX,
 971                               ctype->translit_from_idx,
 972                               ctype->translit_idx_size);
 973
 974           CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_FROM_TBL,
 975                               ctype->translit_from_tbl,
 976                               ctype->translit_from_tbl_size
 977                               / sizeof (uint32_t));
 978
 979           CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_TO_IDX,
 980                               ctype->translit_to_idx,
 981                               ctype->translit_idx_size);
 982
 983           CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_TO_TBL,
 984                               ctype->translit_to_tbl,
 985                               ctype->translit_to_tbl_size / sizeof (uint32_t));
 986
 987           case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
 988             /* The class name array.  */
 989             start_locale_structure (&file);
 990             for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
 991               add_locale_string (&file, ctype->classnames[cnt]);
 992             add_locale_char (&file, 0);
 993             align_locale_data (&file, LOCFILE_ALIGN);
 994             end_locale_structure (&file);
 995             break;
 996
 997           case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
 998             /* The class name array.  */
 999             start_locale_structure (&file);
1000             for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
1001               add_locale_string (&file, ctype->mapnames[cnt]);
1002             add_locale_char (&file, 0);
1003             align_locale_data (&file, LOCFILE_ALIGN);
1004             end_locale_structure (&file);
1005             break;
1006
1007           case _NL_ITEM_INDEX (_NL_CTYPE_WIDTH):
1008             add_locale_wcwidth_table (&file, &ctype->width);
1009             break;
1010
1011           CTYPE_UINT32 (_NL_CTYPE_MB_CUR_MAX, ctype->mb_cur_max);
1012
1013           case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
1014             add_locale_string (&file, ctype->codeset_name);
1015             break;
1016
1017           CTYPE_UINT32 (_NL_CTYPE_MAP_TO_NONASCII, ctype->to_nonascii);
1018
1019           CTYPE_UINT32 (_NL_CTYPE_NONASCII_CASE, ctype->nonascii_case);
1020
1021           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN):
1022             add_locale_uint32 (&file, ctype->mbdigits_act / 10);
1023             break;
1024
1025           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN):
1026             add_locale_uint32 (&file, ctype->wcdigits_act / 10);
1027             break;
1028
1029           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB):
1030             start_locale_structure (&file);
1031             for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
1032                  cnt < ctype->mbdigits_act; cnt += 10)
1033               {
1034                 add_locale_raw_data (&file, ctype->mbdigits[cnt]->bytes,
1035                                      ctype->mbdigits[cnt]->nbytes);
1036                 add_locale_char (&file, 0);
1037               }
1038             end_locale_structure (&file);
1039             break;
1040
1041           case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB):
1042             start_locale_structure (&file);
1043             cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB);
1044             add_locale_raw_data (&file, ctype->mboutdigits[cnt]->bytes,
1045                                  ctype->mboutdigits[cnt]->nbytes);
1046             add_locale_char (&file, 0);
1047             end_locale_structure (&file);
1048             break;
1049
1050           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC):
1051             start_locale_structure (&file);
1052             for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC);
1053                  cnt < ctype->wcdigits_act; cnt += 10)
1054               add_locale_uint32 (&file, ctype->wcdigits[cnt]);
1055             end_locale_structure (&file);
1056             break;
1057
1058           case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC):
1059             cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC);
1060             add_locale_uint32 (&file, ctype->wcoutdigits[cnt]);
1061             break;
1062
1063           case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN):
1064             add_locale_uint32 (&file, default_missing_len);
1065             break;
1066
1067           case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING):
1068             add_locale_uint32_array (&file, ctype->default_missing,
1069                                      default_missing_len);
1070             break;
1071
1072           case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE_LEN):
1073             add_locale_uint32 (&file, ctype->ntranslit_ignore);
1074             break;
1075
1076           case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE):
1077             start_locale_structure (&file);
1078             {
1079               struct translit_ignore_t *runp;
1080               for (runp = ctype->translit_ignore; runp != NULL;
1081                    runp = runp->next)
1082                 {
1083                   add_locale_uint32 (&file, runp->from);
1084                   add_locale_uint32 (&file, runp->to);
1085                   add_locale_uint32 (&file, runp->step);
1086                 }
1087             }
1088             end_locale_structure (&file);
1089             break;
1090
1091           default:
1092             assert (! "unknown CTYPE element");
1093           }
1094       else
1095         {
1096           /* Handle extra maps.  */
1097           size_t nr = elem - _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
1098           if (nr < ctype->nr_charclass)
1099             {
1100               start_locale_prelude (&file);
1101               add_locale_uint32_array (&file, ctype->class_b[nr], 256 / 32);
1102               end_locale_prelude (&file);
1103               add_locale_wctype_table (&file, &ctype->class_3level[nr]);
1104             }
1105           else
1106             {
1107               nr -= ctype->nr_charclass;
1108               assert (nr < ctype->map_collection_nr);
1109               add_locale_wctrans_table (&file, &ctype->map_3level[nr]);
1110             }
1111         }
1112     }
1113
1114   write_locale_data (output_path, LC_CTYPE, "LC_CTYPE", &file);
1115 }
1116
1117
1118 /* Local functions.  */
1119 static void
1120 ctype_class_new (struct linereader *lr, struct locale_ctype_t *ctype,
1121                  const char *name)
1122 {
1123   size_t cnt;
1124
1125   for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
1126     if (strcmp (ctype->classnames[cnt], name) == 0)
1127       break;
1128
1129   if (cnt < ctype->nr_charclass)
1130     {
1131       lr_error (lr, _("character class `%s' already defined"), name);
1132       return;
1133     }
1134
1135   if (ctype->nr_charclass == MAX_NR_CHARCLASS)
1136     /* Exit code 2 is prescribed in P1003.2b.  */
1137     record_error (2, 0, _("\
1138 implementation limit: no more than %Zd character classes allowed"),
1139                   MAX_NR_CHARCLASS);
1140
1141   ctype->classnames[ctype->nr_charclass++] = name;
1142 }
1143
1144
1145 static void
1146 ctype_map_new (struct linereader *lr, struct locale_ctype_t *ctype,
1147                const char *name, const struct charmap_t *charmap)
1148 {
1149   size_t max_chars = 0;
1150   size_t cnt;
1151
1152   for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
1153     {
1154       if (strcmp (ctype->mapnames[cnt], name) == 0)
1155         break;
1156
1157       if (max_chars < ctype->map_collection_max[cnt])
1158         max_chars = ctype->map_collection_max[cnt];
1159     }
1160
1161   if (cnt < ctype->map_collection_nr)
1162     {
1163       lr_error (lr, _("character map `%s' already defined"), name);
1164       return;
1165     }
1166
1167   if (ctype->map_collection_nr == MAX_NR_CHARMAP)
1168     /* Exit code 2 is prescribed in P1003.2b.  */
1169     record_error (2, 0, _("\
1170 implementation limit: no more than %d character maps allowed"),
1171                   MAX_NR_CHARMAP);
1172
1173   ctype->mapnames[cnt] = name;
1174
1175   if (max_chars == 0)
1176     ctype->map_collection_max[cnt] = charmap->mb_cur_max == 1 ? 256 : 512;
1177   else
1178     ctype->map_collection_max[cnt] = max_chars;
1179
1180   ctype->map_collection[cnt] = (uint32_t *)
1181     xcalloc (sizeof (uint32_t), ctype->map_collection_max[cnt]);
1182   ctype->map_collection_act[cnt] = 256;
1183
1184   ++ctype->map_collection_nr;
1185 }
1186
1187
1188 /* We have to be prepared that TABLE, MAX, and ACT can be NULL.  This
1189    is possible if we only want to extend the name array.  */
1190 static uint32_t *
1191 find_idx (struct locale_ctype_t *ctype, uint32_t **table, size_t *max,
1192           size_t *act, uint32_t idx)
1193 {
1194   size_t cnt;
1195
1196   if (idx < 256)
1197     return table == NULL ? NULL : &(*table)[idx];
1198
1199   /* Use the charnames_idx lookup table instead of the slow search loop.  */
1200 #if 1
1201   cnt = idx_table_get (&ctype->charnames_idx, idx);
1202   if (cnt == EMPTY)
1203     /* Not found.  */
1204     cnt = ctype->charnames_act;
1205 #else
1206   for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
1207     if (ctype->charnames[cnt] == idx)
1208       break;
1209 #endif
1210
1211   /* We have to distinguish two cases: the name is found or not.  */
1212   if (cnt == ctype->charnames_act)
1213     {
1214       /* Extend the name array.  */
1215       if (ctype->charnames_act == ctype->charnames_max)
1216         {
1217           ctype->charnames_max *= 2;
1218           ctype->charnames = (uint32_t *)
1219             xrealloc (ctype->charnames,
1220                       sizeof (uint32_t) * ctype->charnames_max);
1221         }
1222       ctype->charnames[ctype->charnames_act++] = idx;
1223       idx_table_add (&ctype->charnames_idx, idx, cnt);
1224     }
1225
1226   if (table == NULL)
1227     /* We have done everything we are asked to do.  */
1228     return NULL;
1229
1230   if (max == NULL)
1231     /* The caller does not want to extend the table.  */
1232     return (cnt >= *act ? NULL : &(*table)[cnt]);
1233
1234   if (cnt >= *act)
1235     {
1236       if (cnt >= *max)
1237         {
1238           size_t old_max = *max;
1239           do
1240             *max *= 2;
1241           while (*max <= cnt);
1242
1243           *table =
1244             (uint32_t *) xrealloc (*table, *max * sizeof (uint32_t));
1245           memset (&(*table)[old_max], '\0',
1246                   (*max - old_max) * sizeof (uint32_t));
1247         }
1248
1249       *act = cnt + 1;
1250     }
1251
1252   return &(*table)[cnt];
1253 }
1254
1255
1256 static int
1257 get_character (struct token *now, const struct charmap_t *charmap,
1258                struct repertoire_t *repertoire,
1259                struct charseq **seqp, uint32_t *wchp)
1260 {
1261   if (now->tok == tok_bsymbol)
1262     {
1263       /* This will hopefully be the normal case.  */
1264       *wchp = repertoire_find_value (repertoire, now->val.str.startmb,
1265                                      now->val.str.lenmb);
1266       *seqp = charmap_find_value (charmap, now->val.str.startmb,
1267                                   now->val.str.lenmb);
1268     }
1269   else if (now->tok == tok_ucs4)
1270     {
1271       char utmp[10];
1272
1273       snprintf (utmp, sizeof (utmp), "U%08X", now->val.ucs4);
1274       *seqp = charmap_find_value (charmap, utmp, 9);
1275
1276       if (*seqp == NULL)
1277         *seqp = repertoire_find_seq (repertoire, now->val.ucs4);
1278
1279       if (*seqp == NULL)
1280         {
1281           /* Compute the value in the charmap from the UCS value.  */
1282           const char *symbol = repertoire_find_symbol (repertoire,
1283                                                        now->val.ucs4);
1284
1285           if (symbol == NULL)
1286             *seqp = NULL;
1287           else
1288             *seqp = charmap_find_value (charmap, symbol, strlen (symbol));
1289
1290           if (*seqp == NULL)
1291             {
1292               if (repertoire != NULL)
1293                 {
1294                   /* Insert a negative entry.  */
1295                   static const struct charseq negative
1296                     = { .ucs4 = ILLEGAL_CHAR_VALUE };
1297                   uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1298                                                   sizeof (uint32_t));
1299                   *newp = now->val.ucs4;
1300
1301                   insert_entry (&repertoire->seq_table, newp,
1302                                 sizeof (uint32_t), (void *) &negative);
1303                 }
1304             }
1305           else
1306             (*seqp)->ucs4 = now->val.ucs4;
1307         }
1308       else if ((*seqp)->ucs4 != now->val.ucs4)
1309         *seqp = NULL;
1310
1311       *wchp = now->val.ucs4;
1312     }
1313   else if (now->tok == tok_charcode)
1314     {
1315       /* We must map from the byte code to UCS4.  */
1316       *seqp = charmap_find_symbol (charmap, now->val.str.startmb,
1317                                    now->val.str.lenmb);
1318
1319       if (*seqp == NULL)
1320         *wchp = ILLEGAL_CHAR_VALUE;
1321       else
1322         {
1323           if ((*seqp)->ucs4 == UNINITIALIZED_CHAR_VALUE)
1324             (*seqp)->ucs4 = repertoire_find_value (repertoire, (*seqp)->name,
1325                                                    strlen ((*seqp)->name));
1326           *wchp = (*seqp)->ucs4;
1327         }
1328     }
1329   else
1330     return 1;
1331
1332   return 0;
1333 }
1334
1335
1336 /* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and
1337    the .(2). counterparts.  */
1338 static void
1339 charclass_symbolic_ellipsis (struct linereader *ldfile,
1340                              struct locale_ctype_t *ctype,
1341                              const struct charmap_t *charmap,
1342                              struct repertoire_t *repertoire,
1343                              struct token *now,
1344                              const char *last_str,
1345                              unsigned long int class256_bit,
1346                              unsigned long int class_bit, int base,
1347                              int ignore_content, int handle_digits, int step)
1348 {
1349   const char *nowstr = now->val.str.startmb;
1350   char tmp[now->val.str.lenmb + 1];
1351   const char *cp;
1352   char *endp;
1353   unsigned long int from;
1354   unsigned long int to;
1355
1356   /* We have to compute the ellipsis values using the symbolic names.  */
1357   assert (last_str != NULL);
1358
1359   if (strlen (last_str) != now->val.str.lenmb)
1360     {
1361     invalid_range:
1362       lr_error (ldfile,
1363                 _("`%s' and `%.*s' are not valid names for symbolic range"),
1364                 last_str, (int) now->val.str.lenmb, nowstr);
1365       return;
1366     }
1367
1368   if (memcmp (last_str, nowstr, now->val.str.lenmb) == 0)
1369     /* Nothing to do, the names are the same.  */
1370     return;
1371
1372   for (cp = last_str; *cp == *(nowstr + (cp - last_str)); ++cp)
1373     ;
1374
1375   errno = 0;
1376   from = strtoul (cp, &endp, base);
1377   if ((from == UINT_MAX && errno == ERANGE) || *endp != '\0')
1378     goto invalid_range;
1379
1380   to = strtoul (nowstr + (cp - last_str), &endp, base);
1381   if ((to == UINT_MAX && errno == ERANGE)
1382       || (endp - nowstr) != now->val.str.lenmb || from >= to)
1383     goto invalid_range;
1384
1385   /* OK, we have a range FROM - TO.  Now we can create the symbolic names.  */
1386   if (!ignore_content)
1387     {
1388       now->val.str.startmb = tmp;
1389       while ((from += step) <= to)
1390         {
1391           struct charseq *seq;
1392           uint32_t wch;
1393
1394           sprintf (tmp, (base == 10 ? "%.*s%0*ld" : "%.*s%0*lX"),
1395                    (int) (cp - last_str), last_str,
1396                    (int) (now->val.str.lenmb - (cp - last_str)),
1397                    from);
1398
1399           get_character (now, charmap, repertoire, &seq, &wch);
1400
1401           if (seq != NULL && seq->nbytes == 1)
1402             /* Yep, we can store information about this byte sequence.  */
1403             ctype->class256_collection[seq->bytes[0]] |= class256_bit;
1404
1405           if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1406             /* We have the UCS4 position.  */
1407             *find_idx (ctype, &ctype->class_collection,
1408                        &ctype->class_collection_max,
1409                        &ctype->class_collection_act, wch) |= class_bit;
1410
1411           if (handle_digits == 1)
1412             {
1413               /* We must store the digit values.  */
1414               if (ctype->mbdigits_act == ctype->mbdigits_max)
1415                 {
1416                   ctype->mbdigits_max *= 2;
1417                   ctype->mbdigits = xrealloc (ctype->mbdigits,
1418                                               (ctype->mbdigits_max
1419                                                * sizeof (char *)));
1420                   ctype->wcdigits_max *= 2;
1421                   ctype->wcdigits = xrealloc (ctype->wcdigits,
1422                                               (ctype->wcdigits_max
1423                                                * sizeof (uint32_t)));
1424                 }
1425
1426               ctype->mbdigits[ctype->mbdigits_act++] = seq;
1427               ctype->wcdigits[ctype->wcdigits_act++] = wch;
1428             }
1429           else if (handle_digits == 2)
1430             {
1431               /* We must store the digit values.  */
1432               if (ctype->outdigits_act >= 10)
1433                 {
1434                   lr_error (ldfile, _("\
1435 %s: field `%s' does not contain exactly ten entries"),
1436                             "LC_CTYPE", "outdigit");
1437                   return;
1438                 }
1439
1440               ctype->mboutdigits[ctype->outdigits_act] = seq;
1441               ctype->wcoutdigits[ctype->outdigits_act] = wch;
1442               ++ctype->outdigits_act;
1443             }
1444         }
1445     }
1446 }
1447
1448
1449 /* Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'.  */
1450 static void
1451 charclass_ucs4_ellipsis (struct linereader *ldfile,
1452                          struct locale_ctype_t *ctype,
1453                          const struct charmap_t *charmap,
1454                          struct repertoire_t *repertoire,
1455                          struct token *now, uint32_t last_wch,
1456                          unsigned long int class256_bit,
1457                          unsigned long int class_bit, int ignore_content,
1458                          int handle_digits, int step)
1459 {
1460   if (last_wch > now->val.ucs4)
1461     {
1462       lr_error (ldfile, _("\
1463 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1464                 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, now->val.ucs4,
1465                 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, last_wch);
1466       return;
1467     }
1468
1469   if (!ignore_content)
1470     while ((last_wch += step) <= now->val.ucs4)
1471       {
1472         /* We have to find out whether there is a byte sequence corresponding
1473            to this UCS4 value.  */
1474         struct charseq *seq;
1475         char utmp[10];
1476
1477         snprintf (utmp, sizeof (utmp), "U%08X", last_wch);
1478         seq = charmap_find_value (charmap, utmp, 9);
1479         if (seq == NULL)
1480           {
1481             snprintf (utmp, sizeof (utmp), "U%04X", last_wch);
1482             seq = charmap_find_value (charmap, utmp, 5);
1483           }
1484
1485         if (seq == NULL)
1486           /* Try looking in the repertoire map.  */
1487           seq = repertoire_find_seq (repertoire, last_wch);
1488
1489         /* If this is the first time we look for this sequence create a new
1490            entry.  */
1491         if (seq == NULL)
1492           {
1493             static const struct charseq negative
1494               = { .ucs4 = ILLEGAL_CHAR_VALUE };
1495
1496             /* Find the symbolic name for this UCS4 value.  */
1497             if (repertoire != NULL)
1498               {
1499                 const char *symbol = repertoire_find_symbol (repertoire,
1500                                                              last_wch);
1501                 uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1502                                                 sizeof (uint32_t));
1503                 *newp = last_wch;
1504
1505                 if (symbol != NULL)
1506                   /* We have a name, now search the multibyte value.  */
1507                   seq = charmap_find_value (charmap, symbol, strlen (symbol));
1508
1509                 if (seq == NULL)
1510                   /* We have to create a fake entry.  */
1511                   seq = (struct charseq *) &negative;
1512                 else
1513                   seq->ucs4 = last_wch;
1514
1515                 insert_entry (&repertoire->seq_table, newp, sizeof (uint32_t),
1516                               seq);
1517               }
1518             else
1519               /* We have to create a fake entry.  */
1520               seq = (struct charseq *) &negative;
1521           }
1522
1523         /* We have a name, now search the multibyte value.  */
1524         if (seq->ucs4 == last_wch && seq->nbytes == 1)
1525           /* Yep, we can store information about this byte sequence.  */
1526           ctype->class256_collection[(size_t) seq->bytes[0]]
1527             |= class256_bit;
1528
1529         /* And of course we have the UCS4 position.  */
1530         if (class_bit != 0)
1531           *find_idx (ctype, &ctype->class_collection,
1532                      &ctype->class_collection_max,
1533                      &ctype->class_collection_act, last_wch) |= class_bit;
1534
1535         if (handle_digits == 1)
1536           {
1537             /* We must store the digit values.  */
1538             if (ctype->mbdigits_act == ctype->mbdigits_max)
1539               {
1540                 ctype->mbdigits_max *= 2;
1541                 ctype->mbdigits = xrealloc (ctype->mbdigits,
1542                                             (ctype->mbdigits_max
1543                                              * sizeof (char *)));
1544                 ctype->wcdigits_max *= 2;
1545                 ctype->wcdigits = xrealloc (ctype->wcdigits,
1546                                             (ctype->wcdigits_max
1547                                              * sizeof (uint32_t)));
1548               }
1549
1550             ctype->mbdigits[ctype->mbdigits_act++] = (seq->ucs4 == last_wch
1551                                                       ? seq : NULL);
1552             ctype->wcdigits[ctype->wcdigits_act++] = last_wch;
1553           }
1554         else if (handle_digits == 2)
1555           {
1556             /* We must store the digit values.  */
1557             if (ctype->outdigits_act >= 10)
1558               {
1559                 lr_error (ldfile, _("\
1560 %s: field `%s' does not contain exactly ten entries"),
1561                           "LC_CTYPE", "outdigit");
1562                 return;
1563               }
1564
1565             ctype->mboutdigits[ctype->outdigits_act] = (seq->ucs4 == last_wch
1566                                                         ? seq : NULL);
1567             ctype->wcoutdigits[ctype->outdigits_act] = last_wch;
1568             ++ctype->outdigits_act;
1569           }
1570       }
1571 }
1572
1573
1574 /* Ellipsis as in `/xea/x12.../xea/x34'.  */
1575 static void
1576 charclass_charcode_ellipsis (struct linereader *ldfile,
1577                              struct locale_ctype_t *ctype,
1578                              const struct charmap_t *charmap,
1579                              struct repertoire_t *repertoire,
1580                              struct token *now, char *last_charcode,
1581                              uint32_t last_charcode_len,
1582                              unsigned long int class256_bit,
1583                              unsigned long int class_bit, int ignore_content,
1584                              int handle_digits)
1585 {
1586   /* First check whether the to-value is larger.  */
1587   if (now->val.charcode.nbytes != last_charcode_len)
1588     {
1589       lr_error (ldfile, _("\
1590 start and end character sequence of range must have the same length"));
1591       return;
1592     }
1593
1594   if (memcmp (last_charcode, now->val.charcode.bytes, last_charcode_len) > 0)
1595     {
1596       lr_error (ldfile, _("\
1597 to-value character sequence is smaller than from-value sequence"));
1598       return;
1599     }
1600
1601   if (!ignore_content)
1602     {
1603       do
1604         {
1605           /* Increment the byte sequence value.  */
1606           struct charseq *seq;
1607           uint32_t wch;
1608           int i;
1609
1610           for (i = last_charcode_len - 1; i >= 0; --i)
1611             if (++last_charcode[i] != 0)
1612               break;
1613
1614           if (last_charcode_len == 1)
1615             /* Of course we have the charcode value.  */
1616             ctype->class256_collection[(size_t) last_charcode[0]]
1617               |= class256_bit;
1618
1619           /* Find the symbolic name.  */
1620           seq = charmap_find_symbol (charmap, last_charcode,
1621                                      last_charcode_len);
1622           if (seq != NULL)
1623             {
1624               if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1625                 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1626                                                    strlen (seq->name));
1627               wch = seq == NULL ? ILLEGAL_CHAR_VALUE : seq->ucs4;
1628
1629               if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1630                 *find_idx (ctype, &ctype->class_collection,
1631                            &ctype->class_collection_max,
1632                            &ctype->class_collection_act, wch) |= class_bit;
1633             }
1634           else
1635             wch = ILLEGAL_CHAR_VALUE;
1636
1637           if (handle_digits == 1)
1638             {
1639               /* We must store the digit values.  */
1640               if (ctype->mbdigits_act == ctype->mbdigits_max)
1641                 {
1642                   ctype->mbdigits_max *= 2;
1643                   ctype->mbdigits = xrealloc (ctype->mbdigits,
1644                                               (ctype->mbdigits_max
1645                                                * sizeof (char *)));
1646                   ctype->wcdigits_max *= 2;
1647                   ctype->wcdigits = xrealloc (ctype->wcdigits,
1648                                               (ctype->wcdigits_max
1649                                                * sizeof (uint32_t)));
1650                 }
1651
1652               seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1653               memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1654               seq->nbytes = last_charcode_len;
1655
1656               ctype->mbdigits[ctype->mbdigits_act++] = seq;
1657               ctype->wcdigits[ctype->wcdigits_act++] = wch;
1658             }
1659           else if (handle_digits == 2)
1660             {
1661               struct charseq *seq;
1662               /* We must store the digit values.  */
1663               if (ctype->outdigits_act >= 10)
1664                 {
1665                   lr_error (ldfile, _("\
1666 %s: field `%s' does not contain exactly ten entries"),
1667                             "LC_CTYPE", "outdigit");
1668                   return;
1669                 }
1670
1671               seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1672               memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1673               seq->nbytes = last_charcode_len;
1674
1675               ctype->mboutdigits[ctype->outdigits_act] = seq;
1676               ctype->wcoutdigits[ctype->outdigits_act] = wch;
1677               ++ctype->outdigits_act;
1678             }
1679         }
1680       while (memcmp (last_charcode, now->val.charcode.bytes,
1681                      last_charcode_len) != 0);
1682     }
1683 }
1684
1685
1686 static uint32_t *
1687 find_translit2 (struct locale_ctype_t *ctype, const struct charmap_t *charmap,
1688                 uint32_t wch)
1689 {
1690   struct translit_t *trunp = ctype->translit;
1691   struct translit_ignore_t *tirunp = ctype->translit_ignore;
1692
1693   while (trunp != NULL)
1694     {
1695       /* XXX We simplify things here.  The transliterations we look
1696          for are only allowed to have one character.  */
1697       if (trunp->from[0] == wch && trunp->from[1] == 0)
1698         {
1699           /* Found it.  Now look for a transliteration which can be
1700              represented with the character set.  */
1701           struct translit_to_t *torunp = trunp->to;
1702
1703           while (torunp != NULL)
1704             {
1705               int i;
1706
1707               for (i = 0; torunp->str[i] != 0; ++i)
1708                 {
1709                   char utmp[10];
1710
1711                   snprintf (utmp, sizeof (utmp), "U%08X", torunp->str[i]);
1712                   if (charmap_find_value (charmap, utmp, 9) == NULL)
1713                     /* This character cannot be represented.  */
1714                     break;
1715                 }
1716
1717               if (torunp->str[i] == 0)
1718                 return torunp->str;
1719
1720               torunp = torunp->next;
1721             }
1722
1723           break;
1724         }
1725
1726       trunp = trunp->next;
1727     }
1728
1729   /* Check for ignored chars.  */
1730   while (tirunp != NULL)
1731     {
1732       if (tirunp->from <= wch && tirunp->to >= wch)
1733         {
1734           uint32_t wi;
1735
1736           for (wi = tirunp->from; wi <= wch; wi += tirunp->step)
1737             if (wi == wch)
1738               return no_str;
1739         }
1740     }
1741
1742   /* Nothing found.  */
1743   return NULL;
1744 }
1745
1746
1747 uint32_t *
1748 find_translit (struct localedef_t *locale, const struct charmap_t *charmap,
1749                uint32_t wch)
1750 {
1751   struct locale_ctype_t *ctype;
1752   uint32_t *result = NULL;
1753
1754   assert (locale != NULL);
1755   ctype = locale->categories[LC_CTYPE].ctype;
1756
1757   if (ctype == NULL)
1758     return NULL;
1759
1760   if (ctype->translit != NULL)
1761     result = find_translit2 (ctype, charmap, wch);
1762
1763   if (result == NULL)
1764     {
1765       struct translit_include_t *irunp = ctype->translit_include;
1766
1767       while (irunp != NULL && result == NULL)
1768         {
1769           result = find_translit (find_locale (CTYPE_LOCALE,
1770                                                irunp->copy_locale,
1771                                                irunp->copy_repertoire,
1772                                                charmap),
1773                                   charmap, wch);
1774           irunp = irunp->next;
1775         }
1776     }
1777
1778   return result;
1779 }
1780
1781
1782 /* Read one transliteration entry.  */
1783 static uint32_t *
1784 read_widestring (struct linereader *ldfile, struct token *now,
1785                  const struct charmap_t *charmap,
1786                  struct repertoire_t *repertoire)
1787 {
1788   uint32_t *wstr;
1789
1790   if (now->tok == tok_default_missing)
1791     /* The special name "" will denote this case.  */
1792     wstr = no_str;
1793   else if (now->tok == tok_bsymbol)
1794     {
1795       /* Get the value from the repertoire.  */
1796       wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1797       wstr[0] = repertoire_find_value (repertoire, now->val.str.startmb,
1798                                        now->val.str.lenmb);
1799       if (wstr[0] == ILLEGAL_CHAR_VALUE)
1800         {
1801           /* We cannot proceed, we don't know the UCS4 value.  */
1802           free (wstr);
1803           return NULL;
1804         }
1805
1806       wstr[1] = 0;
1807     }
1808   else if (now->tok == tok_ucs4)
1809     {
1810       wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1811       wstr[0] = now->val.ucs4;
1812       wstr[1] = 0;
1813     }
1814   else if (now->tok == tok_charcode)
1815     {
1816       /* Argh, we have to convert to the symbol name first and then to the
1817          UCS4 value.  */
1818       struct charseq *seq = charmap_find_symbol (charmap,
1819                                                  now->val.str.startmb,
1820                                                  now->val.str.lenmb);
1821       if (seq == NULL)
1822         /* Cannot find the UCS4 value.  */
1823         return NULL;
1824
1825       if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1826         seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1827                                            strlen (seq->name));
1828       if (seq->ucs4 == ILLEGAL_CHAR_VALUE)
1829         /* We cannot proceed, we don't know the UCS4 value.  */
1830         return NULL;
1831
1832       wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1833       wstr[0] = seq->ucs4;
1834       wstr[1] = 0;
1835     }
1836   else if (now->tok == tok_string)
1837     {
1838       wstr = now->val.str.startwc;
1839       if (wstr == NULL || wstr[0] == 0)
1840         return NULL;
1841     }
1842   else
1843     {
1844       if (now->tok != tok_eol && now->tok != tok_eof)
1845         lr_ignore_rest (ldfile, 0);
1846       SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
1847       return (uint32_t *) -1l;
1848     }
1849
1850   return wstr;
1851 }
1852
1853
1854 static void
1855 read_translit_entry (struct linereader *ldfile, struct locale_ctype_t *ctype,
1856                      struct token *now, const struct charmap_t *charmap,
1857                      struct repertoire_t *repertoire)
1858 {
1859   uint32_t *from_wstr = read_widestring (ldfile, now, charmap, repertoire);
1860   struct translit_t *result;
1861   struct translit_to_t **top;
1862   struct obstack *ob = &ctype->mempool;
1863   int first;
1864   int ignore;
1865
1866   if (from_wstr == NULL)
1867     /* There is no valid from string.  */
1868     return;
1869
1870   result = (struct translit_t *) obstack_alloc (ob,
1871                                                 sizeof (struct translit_t));
1872   result->from = from_wstr;
1873   result->fname = ldfile->fname;
1874   result->lineno = ldfile->lineno;
1875   result->next = NULL;
1876   result->to = NULL;
1877   top = &result->to;
1878   first = 1;
1879   ignore = 0;
1880
1881   while (1)
1882     {
1883       uint32_t *to_wstr;
1884
1885       /* Next we have one or more transliterations.  They are
1886          separated by semicolons.  */
1887       now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
1888
1889       if (!first && (now->tok == tok_semicolon || now->tok == tok_eol))
1890         {
1891           /* One string read.  */
1892           const uint32_t zero = 0;
1893
1894           if (!ignore)
1895             {
1896               obstack_grow (ob, &zero, 4);
1897               to_wstr = obstack_finish (ob);
1898
1899               *top = obstack_alloc (ob, sizeof (struct translit_to_t));
1900               (*top)->str = to_wstr;
1901               (*top)->next = NULL;
1902             }
1903
1904           if (now->tok == tok_eol)
1905             {
1906               result->next = ctype->translit;
1907               ctype->translit = result;
1908               return;
1909             }
1910
1911           if (!ignore)
1912             top = &(*top)->next;
1913           ignore = 0;
1914         }
1915       else
1916         {
1917           to_wstr = read_widestring (ldfile, now, charmap, repertoire);
1918           if (to_wstr == (uint32_t *) -1l)
1919             {
1920               /* An error occurred.  */
1921               obstack_free (ob, result);
1922               return;
1923             }
1924
1925           if (to_wstr == NULL)
1926             ignore = 1;
1927           else
1928             /* This value is usable.  */
1929             obstack_grow (ob, to_wstr, wcslen ((wchar_t *) to_wstr) * 4);
1930
1931           first = 0;
1932         }
1933     }
1934 }
1935
1936
1937 static void
1938 read_translit_ignore_entry (struct linereader *ldfile,
1939                             struct locale_ctype_t *ctype,
1940                             const struct charmap_t *charmap,
1941                             struct repertoire_t *repertoire)
1942 {
1943   /* We expect a semicolon-separated list of characters we ignore.  We are
1944      only interested in the wide character definitions.  These must be
1945      single characters, possibly defining a range when an ellipsis is used.  */
1946   while (1)
1947     {
1948       struct token *now = lr_token (ldfile, charmap, NULL, repertoire,
1949                                     verbose);
1950       struct translit_ignore_t *newp;
1951       uint32_t from;
1952
1953       if (now->tok == tok_eol || now->tok == tok_eof)
1954         {
1955           lr_error (ldfile,
1956                     _("premature end of `translit_ignore' definition"));
1957           return;
1958         }
1959
1960       if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
1961         {
1962           lr_error (ldfile, _("syntax error"));
1963           lr_ignore_rest (ldfile, 0);
1964           return;
1965         }
1966
1967       if (now->tok == tok_ucs4)
1968         from = now->val.ucs4;
1969       else
1970         /* Try to get the value.  */
1971         from = repertoire_find_value (repertoire, now->val.str.startmb,
1972                                       now->val.str.lenmb);
1973
1974       if (from == ILLEGAL_CHAR_VALUE)
1975         {
1976           lr_error (ldfile, "invalid character name");
1977           newp = NULL;
1978         }
1979       else
1980         {
1981           newp = (struct translit_ignore_t *)
1982             obstack_alloc (&ctype->mempool, sizeof (struct translit_ignore_t));
1983           newp->from = from;
1984           newp->to = from;
1985           newp->step = 1;
1986
1987           newp->next = ctype->translit_ignore;
1988           ctype->translit_ignore = newp;
1989         }
1990
1991       /* Now we expect either a semicolon, an ellipsis, or the end of the
1992          line.  */
1993       now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
1994
1995       if (now->tok == tok_ellipsis2 || now->tok == tok_ellipsis2_2)
1996         {
1997           /* XXX Should we bother implementing `....'?  `...' certainly
1998              will not be implemented.  */
1999           uint32_t to;
2000           int step = now->tok == tok_ellipsis2_2 ? 2 : 1;
2001
2002           now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2003
2004           if (now->tok == tok_eol || now->tok == tok_eof)
2005             {
2006               lr_error (ldfile,
2007                         _("premature end of `translit_ignore' definition"));
2008               return;
2009             }
2010
2011           if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
2012             {
2013               lr_error (ldfile, _("syntax error"));
2014               lr_ignore_rest (ldfile, 0);
2015               return;
2016             }
2017
2018           if (now->tok == tok_ucs4)
2019             to = now->val.ucs4;
2020           else
2021             /* Try to get the value.  */
2022             to = repertoire_find_value (repertoire, now->val.str.startmb,
2023                                         now->val.str.lenmb);
2024
2025           if (to == ILLEGAL_CHAR_VALUE)
2026             lr_error (ldfile, "invalid character name");
2027           else
2028             {
2029               /* Make sure the `to'-value is larger.  */
2030               if (to >= from)
2031                 {
2032                   newp->to = to;
2033                   newp->step = step;
2034                 }
2035               else
2036                 lr_error (ldfile, _("\
2037 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
2038                           (to | from) < 65536 ? 4 : 8, to,
2039                           (to | from) < 65536 ? 4 : 8, from);
2040             }
2041
2042           /* And the next token.  */
2043           now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2044         }
2045
2046       if (now->tok == tok_eol || now->tok == tok_eof)
2047         /* We are done.  */
2048         return;
2049
2050       if (now->tok == tok_semicolon)
2051         /* Next round.  */
2052         continue;
2053
2054       /* If we come here something is wrong.  */
2055       lr_error (ldfile, _("syntax error"));
2056       lr_ignore_rest (ldfile, 0);
2057       return;
2058     }
2059 }
2060
2061
2062 /* The parser for the LC_CTYPE section of the locale definition.  */
2063 void
2064 ctype_read (struct linereader *ldfile, struct localedef_t *result,
2065             const struct charmap_t *charmap, const char *repertoire_name,
2066             int ignore_content)
2067 {
2068   struct repertoire_t *repertoire = NULL;
2069   struct locale_ctype_t *ctype;
2070   struct token *now;
2071   enum token_t nowtok;
2072   size_t cnt;
2073   uint32_t last_wch = 0;
2074   enum token_t last_token;
2075   enum token_t ellipsis_token;
2076   int step;
2077   char last_charcode[16];
2078   size_t last_charcode_len = 0;
2079   const char *last_str = NULL;
2080   int mapidx;
2081   struct localedef_t *copy_locale = NULL;
2082
2083   /* Get the repertoire we have to use.  */
2084   if (repertoire_name != NULL)
2085     repertoire = repertoire_read (repertoire_name);
2086
2087   /* The rest of the line containing `LC_CTYPE' must be free.  */
2088   lr_ignore_rest (ldfile, 1);
2089
2090
2091   do
2092     {
2093       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2094       nowtok = now->tok;
2095     }
2096   while (nowtok == tok_eol);
2097
2098   /* If we see `copy' now we are almost done.  */
2099   if (nowtok == tok_copy)
2100     {
2101       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2102       if (now->tok != tok_string)
2103         {
2104           SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2105
2106         skip_category:
2107           do
2108             now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2109           while (now->tok != tok_eof && now->tok != tok_end);
2110
2111           if (now->tok != tok_eof
2112               || (now = lr_token (ldfile, charmap, NULL, NULL, verbose),
2113                   now->tok == tok_eof))
2114             lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
2115           else if (now->tok != tok_lc_ctype)
2116             {
2117               lr_error (ldfile, _("\
2118 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2119               lr_ignore_rest (ldfile, 0);
2120             }
2121           else
2122             lr_ignore_rest (ldfile, 1);
2123
2124           return;
2125         }
2126
2127       if (! ignore_content)
2128         {
2129           /* Get the locale definition.  */
2130           copy_locale = load_locale (LC_CTYPE, now->val.str.startmb,
2131                                      repertoire_name, charmap, NULL);
2132           if ((copy_locale->avail & CTYPE_LOCALE) == 0)
2133             {
2134               /* Not yet loaded.  So do it now.  */
2135               if (locfile_read (copy_locale, charmap) != 0)
2136                 goto skip_category;
2137             }
2138
2139           if (copy_locale->categories[LC_CTYPE].ctype == NULL)
2140             return;
2141         }
2142
2143       lr_ignore_rest (ldfile, 1);
2144
2145       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2146       nowtok = now->tok;
2147     }
2148
2149   /* Prepare the data structures.  */
2150   ctype_startup (ldfile, result, charmap, copy_locale, ignore_content);
2151   ctype = result->categories[LC_CTYPE].ctype;
2152
2153   /* Remember the repertoire we use.  */
2154   if (!ignore_content)
2155     ctype->repertoire = repertoire;
2156
2157   while (1)
2158     {
2159       unsigned long int class_bit = 0;
2160       unsigned long int class256_bit = 0;
2161       int handle_digits = 0;
2162
2163       /* Of course we don't proceed beyond the end of file.  */
2164       if (nowtok == tok_eof)
2165         break;
2166
2167       /* Ingore empty lines.  */
2168       if (nowtok == tok_eol)
2169         {
2170           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2171           nowtok = now->tok;
2172           continue;
2173         }
2174
2175       switch (nowtok)
2176         {
2177         case tok_charclass:
2178           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2179           while (now->tok == tok_ident || now->tok == tok_string)
2180             {
2181               ctype_class_new (ldfile, ctype, now->val.str.startmb);
2182               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2183               if (now->tok != tok_semicolon)
2184                 break;
2185               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2186             }
2187           if (now->tok != tok_eol)
2188             SYNTAX_ERROR (_("\
2189 %s: syntax error in definition of new character class"), "LC_CTYPE");
2190           break;
2191
2192         case tok_charconv:
2193           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2194           while (now->tok == tok_ident || now->tok == tok_string)
2195             {
2196               ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2197               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2198               if (now->tok != tok_semicolon)
2199                 break;
2200               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2201             }
2202           if (now->tok != tok_eol)
2203             SYNTAX_ERROR (_("\
2204 %s: syntax error in definition of new character map"), "LC_CTYPE");
2205           break;
2206
2207         case tok_class:
2208           /* Ignore the rest of the line if we don't need the input of
2209              this line.  */
2210           if (ignore_content)
2211             {
2212               lr_ignore_rest (ldfile, 0);
2213               break;
2214             }
2215
2216           /* We simply forget the `class' keyword and use the following
2217              operand to determine the bit.  */
2218           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2219           if (now->tok == tok_ident || now->tok == tok_string)
2220             {
2221               /* Must can be one of the predefined class names.  */
2222               for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2223                 if (strcmp (ctype->classnames[cnt], now->val.str.startmb) == 0)
2224                   break;
2225               if (cnt >= ctype->nr_charclass)
2226                 {
2227                   /* OK, it's a new class.  */
2228                   ctype_class_new (ldfile, ctype, now->val.str.startmb);
2229
2230                   class_bit = _ISwbit (ctype->nr_charclass - 1);
2231                 }
2232               else
2233                 {
2234                   class_bit = _ISwbit (cnt);
2235
2236                   free (now->val.str.startmb);
2237                 }
2238             }
2239           else if (now->tok == tok_digit)
2240             goto handle_tok_digit;
2241           else if (now->tok < tok_upper || now->tok > tok_blank)
2242             goto err_label;
2243           else
2244             {
2245               class_bit = BITw (now->tok);
2246               class256_bit = BIT (now->tok);
2247             }
2248
2249           /* The next character must be a semicolon.  */
2250           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2251           if (now->tok != tok_semicolon)
2252             goto err_label;
2253           goto read_charclass;
2254
2255         case tok_upper:
2256         case tok_lower:
2257         case tok_alpha:
2258         case tok_alnum:
2259         case tok_space:
2260         case tok_cntrl:
2261         case tok_punct:
2262         case tok_graph:
2263         case tok_print:
2264         case tok_xdigit:
2265         case tok_blank:
2266           /* Ignore the rest of the line if we don't need the input of
2267              this line.  */
2268           if (ignore_content)
2269             {
2270               lr_ignore_rest (ldfile, 0);
2271               break;
2272             }
2273
2274           class_bit = BITw (now->tok);
2275           class256_bit = BIT (now->tok);
2276           handle_digits = 0;
2277         read_charclass:
2278           ctype->class_done |= class_bit;
2279           last_token = tok_none;
2280           ellipsis_token = tok_none;
2281           step = 1;
2282           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2283           while (now->tok != tok_eol && now->tok != tok_eof)
2284             {
2285               uint32_t wch;
2286               struct charseq *seq;
2287
2288               if (ellipsis_token == tok_none)
2289                 {
2290                   if (get_character (now, charmap, repertoire, &seq, &wch))
2291                     goto err_label;
2292
2293                   if (!ignore_content && seq != NULL && seq->nbytes == 1)
2294                     /* Yep, we can store information about this byte
2295                        sequence.  */
2296                     ctype->class256_collection[seq->bytes[0]] |= class256_bit;
2297
2298                   if (!ignore_content && wch != ILLEGAL_CHAR_VALUE
2299                       && class_bit != 0)
2300                     /* We have the UCS4 position.  */
2301                     *find_idx (ctype, &ctype->class_collection,
2302                                &ctype->class_collection_max,
2303                                &ctype->class_collection_act, wch) |= class_bit;
2304
2305                   last_token = now->tok;
2306                   /* Terminate the string.  */
2307                   if (last_token == tok_bsymbol)
2308                     {
2309                       now->val.str.startmb[now->val.str.lenmb] = '\0';
2310                       last_str = now->val.str.startmb;
2311                     }
2312                   else
2313                     last_str = NULL;
2314                   last_wch = wch;
2315                   memcpy (last_charcode, now->val.charcode.bytes, 16);
2316                   last_charcode_len = now->val.charcode.nbytes;
2317
2318                   if (!ignore_content && handle_digits == 1)
2319                     {
2320                       /* We must store the digit values.  */
2321                       if (ctype->mbdigits_act == ctype->mbdigits_max)
2322                         {
2323                           ctype->mbdigits_max += 10;
2324                           ctype->mbdigits = xrealloc (ctype->mbdigits,
2325                                                       (ctype->mbdigits_max
2326                                                        * sizeof (char *)));
2327                           ctype->wcdigits_max += 10;
2328                           ctype->wcdigits = xrealloc (ctype->wcdigits,
2329                                                       (ctype->wcdigits_max
2330                                                        * sizeof (uint32_t)));
2331                         }
2332
2333                       ctype->mbdigits[ctype->mbdigits_act++] = seq;
2334                       ctype->wcdigits[ctype->wcdigits_act++] = wch;
2335                     }
2336                   else if (!ignore_content && handle_digits == 2)
2337                     {
2338                       /* We must store the digit values.  */
2339                       if (ctype->outdigits_act >= 10)
2340                         {
2341                           lr_error (ldfile, _("\
2342 %s: field `%s' does not contain exactly ten entries"),
2343                             "LC_CTYPE", "outdigit");
2344                           lr_ignore_rest (ldfile, 0);
2345                           break;
2346                         }
2347
2348                       ctype->mboutdigits[ctype->outdigits_act] = seq;
2349                       ctype->wcoutdigits[ctype->outdigits_act] = wch;
2350                       ++ctype->outdigits_act;
2351                     }
2352                 }
2353               else
2354                 {
2355                   /* Now it gets complicated.  We have to resolve the
2356                      ellipsis problem.  First we must distinguish between
2357                      the different kind of ellipsis and this must match the
2358                      tokens we have seen.  */
2359                   assert (last_token != tok_none);
2360
2361                   if (last_token != now->tok)
2362                     {
2363                       lr_error (ldfile, _("\
2364 ellipsis range must be marked by two operands of same type"));
2365                       lr_ignore_rest (ldfile, 0);
2366                       break;
2367                     }
2368
2369                   if (last_token == tok_bsymbol)
2370                     {
2371                       if (ellipsis_token == tok_ellipsis3)
2372                         lr_error (ldfile, _("with symbolic name range values \
2373 the absolute ellipsis `...' must not be used"));
2374
2375                       charclass_symbolic_ellipsis (ldfile, ctype, charmap,
2376                                                    repertoire, now, last_str,
2377                                                    class256_bit, class_bit,
2378                                                    (ellipsis_token
2379                                                     == tok_ellipsis4
2380                                                     ? 10 : 16),
2381                                                    ignore_content,
2382                                                    handle_digits, step);
2383                     }
2384                   else if (last_token == tok_ucs4)
2385                     {
2386                       if (ellipsis_token != tok_ellipsis2)
2387                         lr_error (ldfile, _("\
2388 with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
2389
2390                       charclass_ucs4_ellipsis (ldfile, ctype, charmap,
2391                                                repertoire, now, last_wch,
2392                                                class256_bit, class_bit,
2393                                                ignore_content, handle_digits,
2394                                                step);
2395                     }
2396                   else
2397                     {
2398                       assert (last_token == tok_charcode);
2399
2400                       if (ellipsis_token != tok_ellipsis3)
2401                         lr_error (ldfile, _("\
2402 with character code range values one must use the absolute ellipsis `...'"));
2403
2404                       charclass_charcode_ellipsis (ldfile, ctype, charmap,
2405                                                    repertoire, now,
2406                                                    last_charcode,
2407                                                    last_charcode_len,
2408                                                    class256_bit, class_bit,
2409                                                    ignore_content,
2410                                                    handle_digits);
2411                     }
2412
2413                   /* Now we have used the last value.  */
2414                   last_token = tok_none;
2415                 }
2416
2417               /* Next we expect a semicolon or the end of the line.  */
2418               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2419               if (now->tok == tok_eol || now->tok == tok_eof)
2420                 break;
2421
2422               if (last_token != tok_none
2423                   && now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4_2)
2424                 {
2425                   if (now->tok == tok_ellipsis2_2)
2426                     {
2427                       now->tok = tok_ellipsis2;
2428                       step = 2;
2429                     }
2430                   else if (now->tok == tok_ellipsis4_2)
2431                     {
2432                       now->tok = tok_ellipsis4;
2433                       step = 2;
2434                     }
2435
2436                   ellipsis_token = now->tok;
2437
2438                   now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2439                   continue;
2440                 }
2441
2442               if (now->tok != tok_semicolon)
2443                 goto err_label;
2444
2445               /* And get the next character.  */
2446               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2447
2448               ellipsis_token = tok_none;
2449               step = 1;
2450             }
2451           break;
2452
2453         case tok_digit:
2454           /* Ignore the rest of the line if we don't need the input of
2455              this line.  */
2456           if (ignore_content)
2457             {
2458               lr_ignore_rest (ldfile, 0);
2459               break;
2460             }
2461
2462         handle_tok_digit:
2463           class_bit = _ISwdigit;
2464           class256_bit = _ISdigit;
2465           handle_digits = 1;
2466           goto read_charclass;
2467
2468         case tok_outdigit:
2469           /* Ignore the rest of the line if we don't need the input of
2470              this line.  */
2471           if (ignore_content)
2472             {
2473               lr_ignore_rest (ldfile, 0);
2474               break;
2475             }
2476
2477           if (ctype->outdigits_act != 0)
2478             lr_error (ldfile, _("\
2479 %s: field `%s' declared more than once"),
2480                       "LC_CTYPE", "outdigit");
2481           class_bit = 0;
2482           class256_bit = 0;
2483           handle_digits = 2;
2484           goto read_charclass;
2485
2486         case tok_toupper:
2487           /* Ignore the rest of the line if we don't need the input of
2488              this line.  */
2489           if (ignore_content)
2490             {
2491               lr_ignore_rest (ldfile, 0);
2492               break;
2493             }
2494
2495           mapidx = 0;
2496           goto read_mapping;
2497
2498         case tok_tolower:
2499           /* Ignore the rest of the line if we don't need the input of
2500              this line.  */
2501           if (ignore_content)
2502             {
2503               lr_ignore_rest (ldfile, 0);
2504               break;
2505             }
2506
2507           mapidx = 1;
2508           goto read_mapping;
2509
2510         case tok_map:
2511           /* Ignore the rest of the line if we don't need the input of
2512              this line.  */
2513           if (ignore_content)
2514             {
2515               lr_ignore_rest (ldfile, 0);
2516               break;
2517             }
2518
2519           /* We simply forget the `map' keyword and use the following
2520              operand to determine the mapping.  */
2521           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2522           if (now->tok == tok_ident || now->tok == tok_string)
2523             {
2524               size_t cnt;
2525
2526               for (cnt = 2; cnt < ctype->map_collection_nr; ++cnt)
2527                 if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2528                   break;
2529
2530               if (cnt < ctype->map_collection_nr)
2531                 free (now->val.str.startmb);
2532               else
2533                 /* OK, it's a new map.  */
2534                 ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2535
2536               mapidx = cnt;
2537             }
2538           else if (now->tok < tok_toupper || now->tok > tok_tolower)
2539             goto err_label;
2540           else
2541             mapidx = now->tok - tok_toupper;
2542
2543           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2544           /* This better should be a semicolon.  */
2545           if (now->tok != tok_semicolon)
2546             goto err_label;
2547
2548         read_mapping:
2549           /* Test whether this mapping was already defined.  */
2550           if (ctype->tomap_done[mapidx])
2551             {
2552               lr_error (ldfile, _("duplicated definition for mapping `%s'"),
2553                         ctype->mapnames[mapidx]);
2554               lr_ignore_rest (ldfile, 0);
2555               break;
2556             }
2557           ctype->tomap_done[mapidx] = 1;
2558
2559           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2560           while (now->tok != tok_eol && now->tok != tok_eof)
2561             {
2562               struct charseq *from_seq;
2563               uint32_t from_wch;
2564               struct charseq *to_seq;
2565               uint32_t to_wch;
2566
2567               /* Every pair starts with an opening brace.  */
2568               if (now->tok != tok_open_brace)
2569                 goto err_label;
2570
2571               /* Next comes the from-value.  */
2572               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2573               if (get_character (now, charmap, repertoire, &from_seq,
2574                                  &from_wch) != 0)
2575                 goto err_label;
2576
2577               /* The next is a comma.  */
2578               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2579               if (now->tok != tok_comma)
2580                 goto err_label;
2581
2582               /* And the other value.  */
2583               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2584               if (get_character (now, charmap, repertoire, &to_seq,
2585                                  &to_wch) != 0)
2586                 goto err_label;
2587
2588               /* And the last thing is the closing brace.  */
2589               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2590               if (now->tok != tok_close_brace)
2591                 goto err_label;
2592
2593               if (!ignore_content)
2594                 {
2595                   /* Check whether the mapping converts from an ASCII value
2596                      to a non-ASCII value.  */
2597                   if (from_seq != NULL && from_seq->nbytes == 1
2598                       && isascii (from_seq->bytes[0])
2599                       && to_seq != NULL && (to_seq->nbytes != 1
2600                                             || !isascii (to_seq->bytes[0])))
2601                     ctype->to_nonascii = 1;
2602
2603                   if (mapidx < 2 && from_seq != NULL && to_seq != NULL
2604                       && from_seq->nbytes == 1 && to_seq->nbytes == 1)
2605                     /* We can use this value.  */
2606                     ctype->map256_collection[mapidx][from_seq->bytes[0]]
2607                       = to_seq->bytes[0];
2608
2609                   if (from_wch != ILLEGAL_CHAR_VALUE
2610                       && to_wch != ILLEGAL_CHAR_VALUE)
2611                     /* Both correct values.  */
2612                     *find_idx (ctype, &ctype->map_collection[mapidx],
2613                                &ctype->map_collection_max[mapidx],
2614                                &ctype->map_collection_act[mapidx],
2615                                from_wch) = to_wch;
2616                 }
2617
2618               /* Now comes a semicolon or the end of the line/file.  */
2619               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2620               if (now->tok == tok_semicolon)
2621                 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2622             }
2623           break;
2624
2625         case tok_translit_start:
2626           /* Ignore the entire translit section with its peculiar syntax
2627              if we don't need the input.  */
2628           if (ignore_content)
2629             {
2630               do
2631                 {
2632                   lr_ignore_rest (ldfile, 0);
2633                   now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2634                 }
2635               while (now->tok != tok_translit_end && now->tok != tok_eof);
2636
2637               if (now->tok == tok_eof)
2638                 lr_error (ldfile, _(\
2639 "%s: `translit_start' section does not end with `translit_end'"),
2640                           "LC_CTYPE");
2641
2642               break;
2643             }
2644
2645           /* The rest of the line better should be empty.  */
2646           lr_ignore_rest (ldfile, 1);
2647
2648           /* We count here the number of allocated entries in the `translit'
2649              array.  */
2650           cnt = 0;
2651
2652           ldfile->translate_strings = 1;
2653           ldfile->return_widestr = 1;
2654
2655           /* We proceed until we see the `translit_end' token.  */
2656           while (now = lr_token (ldfile, charmap, NULL, repertoire, verbose),
2657                  now->tok != tok_translit_end && now->tok != tok_eof)
2658             {
2659               if (now->tok == tok_eol)
2660                 /* Ignore empty lines.  */
2661                 continue;
2662
2663               if (now->tok == tok_include)
2664                 {
2665                   /* We have to include locale.  */
2666                   const char *locale_name;
2667                   const char *repertoire_name;
2668                   struct translit_include_t *include_stmt, **include_ptr;
2669
2670                   now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2671                   /* This should be a string or an identifier.  In any
2672                      case something to name a locale.  */
2673                   if (now->tok != tok_string && now->tok != tok_ident)
2674                     {
2675                     translit_syntax:
2676                       lr_error (ldfile, _("%s: syntax error"), "LC_CTYPE");
2677                       lr_ignore_rest (ldfile, 0);
2678                       continue;
2679                     }
2680                   locale_name = now->val.str.startmb;
2681
2682                   /* Next should be a semicolon.  */
2683                   now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2684                   if (now->tok != tok_semicolon)
2685                     goto translit_syntax;
2686
2687                   /* Now the repertoire name.  */
2688                   now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2689                   if ((now->tok != tok_string && now->tok != tok_ident)
2690                       || now->val.str.startmb == NULL)
2691                     goto translit_syntax;
2692                   repertoire_name = now->val.str.startmb;
2693                   if (repertoire_name[0] == '\0')
2694                     /* Ignore the empty string.  */
2695                     repertoire_name = NULL;
2696
2697                   /* Save the include statement for later processing.  */
2698                   include_stmt = (struct translit_include_t *)
2699                     xmalloc (sizeof (struct translit_include_t));
2700                   include_stmt->copy_locale = locale_name;
2701                   include_stmt->copy_repertoire = repertoire_name;
2702                   include_stmt->next = NULL;
2703
2704                   include_ptr = &ctype->translit_include;
2705                   while (*include_ptr != NULL)
2706                     include_ptr = &(*include_ptr)->next;
2707                   *include_ptr = include_stmt;
2708
2709                   /* The rest of the line must be empty.  */
2710                   lr_ignore_rest (ldfile, 1);
2711
2712                   /* Make sure the locale is read.  */
2713                   add_to_readlist (LC_CTYPE, locale_name, repertoire_name,
2714                                    1, NULL);
2715                   continue;
2716                 }
2717               else if (now->tok == tok_default_missing)
2718                 {
2719                   uint32_t *wstr;
2720
2721                   while (1)
2722                     {
2723                       /* We expect a single character or string as the
2724                          argument.  */
2725                       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2726                       wstr = read_widestring (ldfile, now, charmap,
2727                                               repertoire);
2728
2729                       if (wstr != NULL)
2730                         {
2731                           if (ctype->default_missing != NULL)
2732                             {
2733                               lr_error (ldfile, _("\
2734 %s: duplicate `default_missing' definition"), "LC_CTYPE");
2735                               record_error_at_line (0, 0,
2736                                                     ctype->default_missing_file,
2737                                                     ctype->default_missing_lineno,
2738                                                     _("\
2739 previous definition was here"));
2740                             }
2741                           else
2742                             {
2743                               ctype->default_missing = wstr;
2744                               ctype->default_missing_file = ldfile->fname;
2745                               ctype->default_missing_lineno = ldfile->lineno;
2746                             }
2747                           /* We can have more entries, ignore them.  */
2748                           lr_ignore_rest (ldfile, 0);
2749                           break;
2750                         }
2751                       else if (wstr == (uint32_t *) -1l)
2752                         /* This was an syntax error.  */
2753                         break;
2754
2755                       /* Maybe there is another replacement we can use.  */
2756                       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2757                       if (now->tok == tok_eol || now->tok == tok_eof)
2758                         {
2759                           /* Nothing found.  We tell the user.  */
2760                           lr_error (ldfile, _("\
2761 %s: no representable `default_missing' definition found"), "LC_CTYPE");
2762                           break;
2763                         }
2764                       if (now->tok != tok_semicolon)
2765                         goto translit_syntax;
2766                     }
2767
2768                   continue;
2769                 }
2770               else if (now->tok == tok_translit_ignore)
2771                 {
2772                   read_translit_ignore_entry (ldfile, ctype, charmap,
2773                                               repertoire);
2774                   continue;
2775                 }
2776
2777               read_translit_entry (ldfile, ctype, now, charmap, repertoire);
2778             }
2779           ldfile->return_widestr = 0;
2780
2781           if (now->tok == tok_eof)
2782             lr_error (ldfile, _(\
2783 "%s: `translit_start' section does not end with `translit_end'"),
2784                       "LC_CTYPE");
2785
2786           break;
2787
2788         case tok_ident:
2789           /* Ignore the rest of the line if we don't need the input of
2790              this line.  */
2791           if (ignore_content)
2792             {
2793               lr_ignore_rest (ldfile, 0);
2794               break;
2795             }
2796
2797           /* This could mean one of several things.  First test whether
2798              it's a character class name.  */
2799           for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2800             if (strcmp (now->val.str.startmb, ctype->classnames[cnt]) == 0)
2801               break;
2802           if (cnt < ctype->nr_charclass)
2803             {
2804               class_bit = _ISwbit (cnt);
2805               class256_bit = cnt <= 11 ? _ISbit (cnt) : 0;
2806               free (now->val.str.startmb);
2807               goto read_charclass;
2808             }
2809           for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
2810             if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2811               break;
2812           if (cnt < ctype->map_collection_nr)
2813             {
2814               mapidx = cnt;
2815               free (now->val.str.startmb);
2816               goto read_mapping;
2817             }
2818           break;
2819
2820         case tok_end:
2821           /* Next we assume `LC_CTYPE'.  */
2822           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2823           if (now->tok == tok_eof)
2824             break;
2825           if (now->tok == tok_eol)
2826             lr_error (ldfile, _("%s: incomplete `END' line"),
2827                       "LC_CTYPE");
2828           else if (now->tok != tok_lc_ctype)
2829             lr_error (ldfile, _("\
2830 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2831           lr_ignore_rest (ldfile, now->tok == tok_lc_ctype);
2832           return;
2833
2834         default:
2835         err_label:
2836           if (now->tok != tok_eof)
2837             SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2838         }
2839
2840       /* Prepare for the next round.  */
2841       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2842       nowtok = now->tok;
2843     }
2844
2845   /* When we come here we reached the end of the file.  */
2846   lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
2847 }
2848
2849
2850 /* Subroutine of set_class_defaults, below.  */
2851 static void
2852 set_one_default (struct locale_ctype_t *ctype,
2853                  const struct charmap_t *charmap,
2854                  int bitpos, int from, int to)
2855 {
2856   char tmp[2];
2857   int ch;
2858   int bit = _ISbit (bitpos);
2859   int bitw = _ISwbit (bitpos);
2860   /* Define string.  */
2861   strcpy (tmp, "?");
2862
2863   for (ch = from; ch <= to; ++ch)
2864     {
2865       struct charseq *seq;
2866       tmp[0] = ch;
2867
2868       seq = charmap_find_value (charmap, tmp, 1);
2869       if (seq == NULL)
2870         {
2871           char buf[10];
2872           sprintf (buf, "U%08X", ch);
2873           seq = charmap_find_value (charmap, buf, 9);
2874         }
2875       if (seq == NULL)
2876         {
2877           record_error (0, 0, _("\
2878 %s: character `%s' not defined while needed as default value"),
2879                         "LC_CTYPE", tmp);
2880         }
2881       else if (seq->nbytes != 1)
2882         record_error (0, 0, _("\
2883 %s: character `%s' in charmap not representable with one byte"),
2884                       "LC_CTYPE", tmp);
2885       else
2886         ctype->class256_collection[seq->bytes[0]] |= bit;
2887
2888       /* No need to search here, the ASCII value is also the Unicode
2889          value.  */
2890       ELEM (ctype, class_collection, , ch) |= bitw;
2891     }
2892 }
2893
2894 static void
2895 set_class_defaults (struct locale_ctype_t *ctype,
2896                     const struct charmap_t *charmap,
2897                     struct repertoire_t *repertoire)
2898 {
2899 #define set_default(bitpos, from, to) \
2900   set_one_default (ctype, charmap, bitpos, from, to)
2901
2902   /* These function defines the default values for the classes and conversions
2903      according to POSIX.2 2.5.2.1.
2904      It may seem that the order of these if-blocks is arbitrary but it is NOT.
2905      Don't move them unless you know what you do!  */
2906
2907   /* Set default values if keyword was not present.  */
2908   if ((ctype->class_done & BITw (tok_upper)) == 0)
2909     /* "If this keyword [lower] is not specified, the lowercase letters
2910         `A' through `Z', ..., shall automatically belong to this class,
2911         with implementation defined character values."  [P1003.2, 2.5.2.1]  */
2912     set_default (BITPOS (tok_upper), 'A', 'Z');
2913
2914   if ((ctype->class_done & BITw (tok_lower)) == 0)
2915     /* "If this keyword [lower] is not specified, the lowercase letters
2916         `a' through `z', ..., shall automatically belong to this class,
2917         with implementation defined character values."  [P1003.2, 2.5.2.1]  */
2918     set_default (BITPOS (tok_lower), 'a', 'z');
2919
2920   if ((ctype->class_done & BITw (tok_alpha)) == 0)
2921     {
2922       /* Table 2-6 in P1003.2 says that characters in class `upper' or
2923          class `lower' *must* be in class `alpha'.  */
2924       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
2925       unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower);
2926
2927       for (size_t cnt = 0; cnt < 256; ++cnt)
2928         if ((ctype->class256_collection[cnt] & mask) != 0)
2929           ctype->class256_collection[cnt] |= BIT (tok_alpha);
2930
2931       for (size_t cnt = 0; cnt < ctype->class_collection_act; ++cnt)
2932         if ((ctype->class_collection[cnt] & maskw) != 0)
2933           ctype->class_collection[cnt] |= BITw (tok_alpha);
2934     }
2935
2936   if ((ctype->class_done & BITw (tok_digit)) == 0)
2937     /* "If this keyword [digit] is not specified, the digits `0' through
2938         `9', ..., shall automatically belong to this class, with
2939         implementation-defined character values."  [P1003.2, 2.5.2.1]  */
2940     set_default (BITPOS (tok_digit), '0', '9');
2941
2942   /* "Only characters specified for the `alpha' and `digit' keyword
2943      shall be specified.  Characters specified for the keyword `alpha'
2944      and `digit' are automatically included in this class.  */
2945   {
2946     unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
2947     unsigned long int maskw = BITw (tok_alpha) | BITw (tok_digit);
2948
2949     for (size_t cnt = 0; cnt < 256; ++cnt)
2950       if ((ctype->class256_collection[cnt] & mask) != 0)
2951         ctype->class256_collection[cnt] |= BIT (tok_alnum);
2952
2953     for (size_t cnt = 0; cnt < ctype->class_collection_act; ++cnt)
2954       if ((ctype->class_collection[cnt] & maskw) != 0)
2955         ctype->class_collection[cnt] |= BITw (tok_alnum);
2956   }
2957
2958   if ((ctype->class_done & BITw (tok_space)) == 0)
2959     /* "If this keyword [space] is not specified, the characters <space>,
2960         <form-feed>, <newline>, <carriage-return>, <tab>, and
2961         <vertical-tab>, ..., shall automatically belong to this class,
2962         with implementation-defined character values."  [P1003.2, 2.5.2.1]  */
2963     {
2964       struct charseq *seq;
2965
2966       seq = charmap_find_value (charmap, "space", 5);
2967       if (seq == NULL)
2968         seq = charmap_find_value (charmap, "SP", 2);
2969       if (seq == NULL)
2970         seq = charmap_find_value (charmap, "U00000020", 9);
2971       if (seq == NULL)
2972         {
2973           record_error (0, 0, _("\
2974 %s: character `%s' not defined while needed as default value"),
2975                         "LC_CTYPE", "<space>");
2976         }
2977       else if (seq->nbytes != 1)
2978         record_error (0, 0, _("\
2979 %s: character `%s' in charmap not representable with one byte"),
2980                       "LC_CTYPE", "<space>");
2981       else
2982         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2983
2984       /* No need to search.  */
2985       ELEM (ctype, class_collection, , L' ') |= BITw (tok_space);
2986
2987       seq = charmap_find_value (charmap, "form-feed", 9);
2988       if (seq == NULL)
2989         seq = charmap_find_value (charmap, "U0000000C", 9);
2990       if (seq == NULL)
2991         {
2992           record_error (0, 0, _("\
2993 %s: character `%s' not defined while needed as default value"),
2994                                     "LC_CTYPE", "<form-feed>");
2995         }
2996       else if (seq->nbytes != 1)
2997         record_error (0, 0, _("\
2998 %s: character `%s' in charmap not representable with one byte"),
2999                       "LC_CTYPE", "<form-feed>");
3000       else
3001         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3002
3003       /* No need to search.  */
3004       ELEM (ctype, class_collection, , L'\f') |= BITw (tok_space);
3005
3006
3007       seq = charmap_find_value (charmap, "newline", 7);
3008       if (seq == NULL)
3009         seq = charmap_find_value (charmap, "U0000000A", 9);
3010       if (seq == NULL)
3011         {
3012           record_error (0, 0, _("\
3013 %s: character `%s' not defined while needed as default value"),
3014                         "LC_CTYPE", "<newline>");
3015         }
3016       else if (seq->nbytes != 1)
3017         record_error (0, 0, _("\
3018 %s: character `%s' in charmap not representable with one byte"),
3019                       "LC_CTYPE", "<newline>");
3020       else
3021         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3022
3023       /* No need to search.  */
3024       ELEM (ctype, class_collection, , L'\n') |= BITw (tok_space);
3025
3026
3027       seq = charmap_find_value (charmap, "carriage-return", 15);
3028       if (seq == NULL)
3029         seq = charmap_find_value (charmap, "U0000000D", 9);
3030       if (seq == NULL)
3031         {
3032           record_error (0, 0, _("\
3033 %s: character `%s' not defined while needed as default value"),
3034                         "LC_CTYPE", "<carriage-return>");
3035         }
3036       else if (seq->nbytes != 1)
3037         record_error (0, 0, _("\
3038 %s: character `%s' in charmap not representable with one byte"),
3039                       "LC_CTYPE", "<carriage-return>");
3040       else
3041         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3042
3043       /* No need to search.  */
3044       ELEM (ctype, class_collection, , L'\r') |= BITw (tok_space);
3045
3046
3047       seq = charmap_find_value (charmap, "tab", 3);
3048       if (seq == NULL)
3049         seq = charmap_find_value (charmap, "U00000009", 9);
3050       if (seq == NULL)
3051         {
3052           record_error (0, 0, _("\
3053 %s: character `%s' not defined while needed as default value"),
3054                         "LC_CTYPE", "<tab>");
3055         }
3056       else if (seq->nbytes != 1)
3057         record_error (0, 0, _("\
3058 %s: character `%s' in charmap not representable with one byte"),
3059                       "LC_CTYPE", "<tab>");
3060       else
3061         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3062
3063       /* No need to search.  */
3064       ELEM (ctype, class_collection, , L'\t') |= BITw (tok_space);
3065
3066
3067       seq = charmap_find_value (charmap, "vertical-tab", 12);
3068       if (seq == NULL)
3069         seq = charmap_find_value (charmap, "U0000000B", 9);
3070       if (seq == NULL)
3071         {
3072           record_error (0, 0, _("\
3073 %s: character `%s' not defined while needed as default value"),
3074                         "LC_CTYPE", "<vertical-tab>");
3075         }
3076       else if (seq->nbytes != 1)
3077         record_error (0, 0, _("\
3078 %s: character `%s' in charmap not representable with one byte"),
3079                       "LC_CTYPE", "<vertical-tab>");
3080       else
3081         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3082
3083       /* No need to search.  */
3084       ELEM (ctype, class_collection, , L'\v') |= BITw (tok_space);
3085     }
3086
3087   if ((ctype->class_done & BITw (tok_xdigit)) == 0)
3088     /* "If this keyword is not specified, the digits `0' to `9', the
3089         uppercase letters `A' through `F', and the lowercase letters `a'
3090         through `f', ..., shell automatically belong to this class, with
3091         implementation defined character values."  [P1003.2, 2.5.2.1]  */
3092     {
3093       set_default (BITPOS (tok_xdigit), '0', '9');
3094       set_default (BITPOS (tok_xdigit), 'A', 'F');
3095       set_default (BITPOS (tok_xdigit), 'a', 'f');
3096     }
3097
3098   if ((ctype->class_done & BITw (tok_blank)) == 0)
3099     /* "If this keyword [blank] is unspecified, the characters <space> and
3100        <tab> shall belong to this character class."  [P1003.2, 2.5.2.1]  */
3101    {
3102       struct charseq *seq;
3103
3104       seq = charmap_find_value (charmap, "space", 5);
3105       if (seq == NULL)
3106         seq = charmap_find_value (charmap, "SP", 2);
3107       if (seq == NULL)
3108         seq = charmap_find_value (charmap, "U00000020", 9);
3109       if (seq == NULL)
3110         {
3111           record_error (0, 0, _("\
3112 %s: character `%s' not defined while needed as default value"),
3113                         "LC_CTYPE", "<space>");
3114         }
3115       else if (seq->nbytes != 1)
3116         record_error (0, 0, _("\
3117 %s: character `%s' in charmap not representable with one byte"),
3118                       "LC_CTYPE", "<space>");
3119       else
3120         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
3121
3122       /* No need to search.  */
3123       ELEM (ctype, class_collection, , L' ') |= BITw (tok_blank);
3124
3125
3126       seq = charmap_find_value (charmap, "tab", 3);
3127       if (seq == NULL)
3128         seq = charmap_find_value (charmap, "U00000009", 9);
3129       if (seq == NULL)
3130         {
3131            record_error (0, 0, _("\
3132 %s: character `%s' not defined while needed as default value"),
3133                          "LC_CTYPE", "<tab>");
3134         }
3135       else if (seq->nbytes != 1)
3136         record_error (0, 0, _("\
3137 %s: character `%s' in charmap not representable with one byte"),
3138                       "LC_CTYPE", "<tab>");
3139       else
3140         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
3141
3142       /* No need to search.  */
3143       ELEM (ctype, class_collection, , L'\t') |= BITw (tok_blank);
3144     }
3145
3146   if ((ctype->class_done & BITw (tok_graph)) == 0)
3147     /* "If this keyword [graph] is not specified, characters specified for
3148         the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
3149         shall belong to this character class."  [P1003.2, 2.5.2.1]  */
3150     {
3151       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
3152         BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
3153       unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
3154         BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
3155         BITw (tok_punct);
3156
3157       for (size_t cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3158         if ((ctype->class_collection[cnt] & maskw) != 0)
3159           ctype->class_collection[cnt] |= BITw (tok_graph);
3160
3161       for (size_t cnt = 0; cnt < 256; ++cnt)
3162         if ((ctype->class256_collection[cnt] & mask) != 0)
3163           ctype->class256_collection[cnt] |= BIT (tok_graph);
3164     }
3165
3166   if ((ctype->class_done & BITw (tok_print)) == 0)
3167     /* "If this keyword [print] is not provided, characters specified for
3168         the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
3169         and the <space> character shall belong to this character class."
3170         [P1003.2, 2.5.2.1]  */
3171     {
3172       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
3173         BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
3174       unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
3175         BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
3176         BITw (tok_punct);
3177       struct charseq *seq;
3178
3179       for (size_t cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3180         if ((ctype->class_collection[cnt] & maskw) != 0)
3181           ctype->class_collection[cnt] |= BITw (tok_print);
3182
3183       for (size_t cnt = 0; cnt < 256; ++cnt)
3184         if ((ctype->class256_collection[cnt] & mask) != 0)
3185           ctype->class256_collection[cnt] |= BIT (tok_print);
3186
3187
3188       seq = charmap_find_value (charmap, "space", 5);
3189       if (seq == NULL)
3190         seq = charmap_find_value (charmap, "SP", 2);
3191       if (seq == NULL)
3192         seq = charmap_find_value (charmap, "U00000020", 9);
3193       if (seq == NULL)
3194         {
3195           record_error (0, 0, _("\
3196 %s: character `%s' not defined while needed as default value"),
3197                         "LC_CTYPE", "<space>");
3198         }
3199       else if (seq->nbytes != 1)
3200         record_error (0, 0, _("\
3201 %s: character `%s' in charmap not representable with one byte"),
3202                       "LC_CTYPE", "<space>");
3203       else
3204         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_print);
3205
3206       /* No need to search.  */
3207       ELEM (ctype, class_collection, , L' ') |= BITw (tok_print);
3208     }
3209
3210   if (ctype->tomap_done[0] == 0)
3211     /* "If this keyword [toupper] is not specified, the lowercase letters
3212         `a' through `z', and their corresponding uppercase letters `A' to
3213         `Z', ..., shall automatically be included, with implementation-
3214         defined character values."  [P1003.2, 2.5.2.1]  */
3215     {
3216       char tmp[4];
3217       int ch;
3218
3219       strcpy (tmp, "<?>");
3220
3221       for (ch = 'a'; ch <= 'z'; ++ch)
3222         {
3223           struct charseq *seq_from, *seq_to;
3224
3225           tmp[1] = (char) ch;
3226
3227           seq_from = charmap_find_value (charmap, &tmp[1], 1);
3228           if (seq_from == NULL)
3229             {
3230               char buf[10];
3231               sprintf (buf, "U%08X", ch);
3232               seq_from = charmap_find_value (charmap, buf, 9);
3233             }
3234           if (seq_from == NULL)
3235             {
3236               record_error (0, 0, _("\
3237 %s: character `%s' not defined while needed as default value"),
3238                             "LC_CTYPE", tmp);
3239             }
3240           else if (seq_from->nbytes != 1)
3241             {
3242               record_error (0, 0, _("\
3243 %s: character `%s' needed as default value not representable with one byte"),
3244                             "LC_CTYPE", tmp);
3245             }
3246           else
3247             {
3248               /* This conversion is implementation defined.  */
3249               tmp[1] = (char) (ch + ('A' - 'a'));
3250               seq_to = charmap_find_value (charmap, &tmp[1], 1);
3251               if (seq_to == NULL)
3252                 {
3253                   char buf[10];
3254                   sprintf (buf, "U%08X", ch + ('A' - 'a'));
3255                   seq_to = charmap_find_value (charmap, buf, 9);
3256                 }
3257               if (seq_to == NULL)
3258                 {
3259                   record_error (0, 0, _("\
3260 %s: character `%s' not defined while needed as default value"),
3261                                 "LC_CTYPE", tmp);
3262                 }
3263               else if (seq_to->nbytes != 1)
3264                 {
3265                   record_error (0, 0, _("\
3266 %s: character `%s' needed as default value not representable with one byte"),
3267                                 "LC_CTYPE", tmp);
3268                 }
3269               else
3270                 /* The index [0] is determined by the order of the
3271                    `ctype_map_newP' calls in `ctype_startup'.  */
3272                 ctype->map256_collection[0][seq_from->bytes[0]]
3273                   = seq_to->bytes[0];
3274             }
3275
3276           /* No need to search.  */
3277           ELEM (ctype, map_collection, [0], ch) = ch + ('A' - 'a');
3278         }
3279     }
3280
3281   if (ctype->tomap_done[1] == 0)
3282     /* "If this keyword [tolower] is not specified, the mapping shall be
3283        the reverse mapping of the one specified to `toupper'."  [P1003.2]  */
3284     {
3285       for (size_t cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
3286         if (ctype->map_collection[0][cnt] != 0)
3287           ELEM (ctype, map_collection, [1],
3288                 ctype->map_collection[0][cnt])
3289             = ctype->charnames[cnt];
3290
3291       for (size_t cnt = 0; cnt < 256; ++cnt)
3292         if (ctype->map256_collection[0][cnt] != 0)
3293           ctype->map256_collection[1][ctype->map256_collection[0][cnt]] = cnt;
3294     }
3295
3296   if (ctype->outdigits_act != 10)
3297     {
3298       if (ctype->outdigits_act != 0)
3299         record_error (0, 0, _("\
3300 %s: field `%s' does not contain exactly ten entries"),
3301                       "LC_CTYPE", "outdigit");
3302
3303       for (size_t cnt = ctype->outdigits_act; cnt < 10; ++cnt)
3304         {
3305           ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3306                                                          (char *) digits + cnt,
3307                                                          1);
3308
3309           if (ctype->mboutdigits[cnt] == NULL)
3310             ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3311                                                            longnames[cnt],
3312                                                            strlen (longnames[cnt]));
3313
3314           if (ctype->mboutdigits[cnt] == NULL)
3315             ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3316                                                            uninames[cnt], 9);
3317
3318           if (ctype->mboutdigits[cnt] == NULL)
3319             {
3320               /* Provide a replacement.  */
3321               record_error (0, 0, _("\
3322 no output digits defined and none of the standard names in the charmap"));
3323
3324               ctype->mboutdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
3325                                                        sizeof (struct charseq)
3326                                                        + 1);
3327
3328               /* This is better than nothing.  */
3329               ctype->mboutdigits[cnt]->bytes[0] = digits[cnt];
3330               ctype->mboutdigits[cnt]->nbytes = 1;
3331             }
3332
3333           ctype->wcoutdigits[cnt] = L'0' + cnt;
3334         }
3335
3336       ctype->outdigits_act = 10;
3337     }
3338
3339 #undef set_default
3340 }
3341
3342
3343 /* Initialize.  Assumes t->p and t->q have already been set.  */
3344 static inline void
3345 wctype_table_init (struct wctype_table *t)
3346 {
3347   t->level1 = NULL;
3348   t->level1_alloc = t->level1_size = 0;
3349   t->level2 = NULL;
3350   t->level2_alloc = t->level2_size = 0;
3351   t->level3 = NULL;
3352   t->level3_alloc = t->level3_size = 0;
3353 }
3354
3355 /* Retrieve an entry.  */
3356 static inline int
3357 wctype_table_get (struct wctype_table *t, uint32_t wc)
3358 {
3359   uint32_t index1 = wc >> (t->q + t->p + 5);
3360   if (index1 < t->level1_size)
3361     {
3362       uint32_t lookup1 = t->level1[index1];
3363       if (lookup1 != EMPTY)
3364         {
3365           uint32_t index2 = ((wc >> (t->p + 5)) & ((1 << t->q) - 1))
3366                             + (lookup1 << t->q);
3367           uint32_t lookup2 = t->level2[index2];
3368           if (lookup2 != EMPTY)
3369             {
3370               uint32_t index3 = ((wc >> 5) & ((1 << t->p) - 1))
3371                                 + (lookup2 << t->p);
3372               uint32_t lookup3 = t->level3[index3];
3373               uint32_t index4 = wc & 0x1f;
3374
3375               return (lookup3 >> index4) & 1;
3376             }
3377         }
3378     }
3379   return 0;
3380 }
3381
3382 /* Add one entry.  */
3383 static void
3384 wctype_table_add (struct wctype_table *t, uint32_t wc)
3385 {
3386   uint32_t index1 = wc >> (t->q + t->p + 5);
3387   uint32_t index2 = (wc >> (t->p + 5)) & ((1 << t->q) - 1);
3388   uint32_t index3 = (wc >> 5) & ((1 << t->p) - 1);
3389   uint32_t index4 = wc & 0x1f;
3390   size_t i, i1, i2;
3391
3392   if (index1 >= t->level1_size)
3393     {
3394       if (index1 >= t->level1_alloc)
3395         {
3396           size_t alloc = 2 * t->level1_alloc;
3397           if (alloc <= index1)
3398             alloc = index1 + 1;
3399           t->level1 = (uint32_t *) xrealloc ((char *) t->level1,
3400                                              alloc * sizeof (uint32_t));
3401           t->level1_alloc = alloc;
3402         }
3403       while (index1 >= t->level1_size)
3404         t->level1[t->level1_size++] = EMPTY;
3405     }
3406
3407   if (t->level1[index1] == EMPTY)
3408     {
3409       if (t->level2_size == t->level2_alloc)
3410         {
3411           size_t alloc = 2 * t->level2_alloc + 1;
3412           t->level2 = (uint32_t *) xrealloc ((char *) t->level2,
3413                                              (alloc << t->q) * sizeof (uint32_t));
3414           t->level2_alloc = alloc;
3415         }
3416       i1 = t->level2_size << t->q;
3417       i2 = (t->level2_size + 1) << t->q;
3418       for (i = i1; i < i2; i++)
3419         t->level2[i] = EMPTY;
3420       t->level1[index1] = t->level2_size++;
3421     }
3422
3423   index2 += t->level1[index1] << t->q;
3424
3425   if (t->level2[index2] == EMPTY)
3426     {
3427       if (t->level3_size == t->level3_alloc)
3428         {
3429           size_t alloc = 2 * t->level3_alloc + 1;
3430           t->level3 = (uint32_t *) xrealloc ((char *) t->level3,
3431                                              (alloc << t->p) * sizeof (uint32_t));
3432           t->level3_alloc = alloc;
3433         }
3434       i1 = t->level3_size << t->p;
3435       i2 = (t->level3_size + 1) << t->p;
3436       for (i = i1; i < i2; i++)
3437         t->level3[i] = 0;
3438       t->level2[index2] = t->level3_size++;
3439     }
3440
3441   index3 += t->level2[index2] << t->p;
3442
3443   t->level3[index3] |= (uint32_t)1 << index4;
3444 }
3445
3446 /* Finalize and shrink.  */
3447 static void
3448 add_locale_wctype_table (struct locale_file *file, struct wctype_table *t)
3449 {
3450   size_t i, j, k;
3451   uint32_t reorder3[t->level3_size];
3452   uint32_t reorder2[t->level2_size];
3453   uint32_t level2_offset, level3_offset;
3454
3455   /* Uniquify level3 blocks.  */
3456   k = 0;
3457   for (j = 0; j < t->level3_size; j++)
3458     {
3459       for (i = 0; i < k; i++)
3460         if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p],
3461                     (1 << t->p) * sizeof (uint32_t)) == 0)
3462           break;
3463       /* Relocate block j to block i.  */
3464       reorder3[j] = i;
3465       if (i == k)
3466         {
3467           if (i != j)
3468             memcpy (&t->level3[i << t->p], &t->level3[j << t->p],
3469                     (1 << t->p) * sizeof (uint32_t));
3470           k++;
3471         }
3472     }
3473   t->level3_size = k;
3474
3475   for (i = 0; i < (t->level2_size << t->q); i++)
3476     if (t->level2[i] != EMPTY)
3477       t->level2[i] = reorder3[t->level2[i]];
3478
3479   /* Uniquify level2 blocks.  */
3480   k = 0;
3481   for (j = 0; j < t->level2_size; j++)
3482     {
3483       for (i = 0; i < k; i++)
3484         if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q],
3485                     (1 << t->q) * sizeof (uint32_t)) == 0)
3486           break;
3487       /* Relocate block j to block i.  */
3488       reorder2[j] = i;
3489       if (i == k)
3490         {
3491           if (i != j)
3492             memcpy (&t->level2[i << t->q], &t->level2[j << t->q],
3493                     (1 << t->q) * sizeof (uint32_t));
3494           k++;
3495         }
3496     }
3497   t->level2_size = k;
3498
3499   for (i = 0; i < t->level1_size; i++)
3500     if (t->level1[i] != EMPTY)
3501       t->level1[i] = reorder2[t->level1[i]];
3502
3503   t->result_size =
3504     5 * sizeof (uint32_t)
3505     + t->level1_size * sizeof (uint32_t)
3506     + (t->level2_size << t->q) * sizeof (uint32_t)
3507     + (t->level3_size << t->p) * sizeof (uint32_t);
3508
3509   level2_offset =
3510     5 * sizeof (uint32_t)
3511     + t->level1_size * sizeof (uint32_t);
3512   level3_offset =
3513     5 * sizeof (uint32_t)
3514     + t->level1_size * sizeof (uint32_t)
3515     + (t->level2_size << t->q) * sizeof (uint32_t);
3516
3517   start_locale_structure (file);
3518   add_locale_uint32 (file, t->q + t->p + 5);
3519   add_locale_uint32 (file, t->level1_size);
3520   add_locale_uint32 (file, t->p + 5);
3521   add_locale_uint32 (file, (1 << t->q) - 1);
3522   add_locale_uint32 (file, (1 << t->p) - 1);
3523
3524   for (i = 0; i < t->level1_size; i++)
3525     add_locale_uint32
3526       (file,
3527        t->level1[i] == EMPTY
3528        ? 0
3529        : (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset);
3530
3531   for (i = 0; i < (t->level2_size << t->q); i++)
3532     add_locale_uint32
3533       (file,
3534        t->level2[i] == EMPTY
3535        ? 0
3536        : (t->level2[i] << t->p) * sizeof (uint32_t) + level3_offset);
3537
3538   add_locale_uint32_array (file, t->level3, t->level3_size << t->p);
3539   end_locale_structure (file);
3540
3541   if (t->level1_alloc > 0)
3542     free (t->level1);
3543   if (t->level2_alloc > 0)
3544     free (t->level2);
3545   if (t->level3_alloc > 0)
3546     free (t->level3);
3547 }
3548
3549 /* Flattens the included transliterations into a translit list.
3550    Inserts them in the list at `cursor', and returns the new cursor.  */
3551 static struct translit_t **
3552 translit_flatten (struct locale_ctype_t *ctype,
3553                   const struct charmap_t *charmap,
3554                   struct translit_t **cursor)
3555 {
3556   while (ctype->translit_include != NULL)
3557     {
3558       const char *copy_locale = ctype->translit_include->copy_locale;
3559       const char *copy_repertoire = ctype->translit_include->copy_repertoire;
3560       struct localedef_t *other;
3561
3562       /* Unchain the include statement.  During the depth-first traversal
3563          we don't want to visit any locale more than once.  */
3564       ctype->translit_include = ctype->translit_include->next;
3565
3566       other = find_locale (LC_CTYPE, copy_locale, copy_repertoire, charmap);
3567
3568       if (other == NULL || other->categories[LC_CTYPE].ctype == NULL)
3569         {
3570           record_error (0, 0, _("\
3571 %s: transliteration data from locale `%s' not available"),
3572                         "LC_CTYPE", copy_locale);
3573         }
3574       else
3575         {
3576           struct locale_ctype_t *other_ctype =
3577             other->categories[LC_CTYPE].ctype;
3578
3579           cursor = translit_flatten (other_ctype, charmap, cursor);
3580           assert (other_ctype->translit_include == NULL);
3581
3582           if (other_ctype->translit != NULL)
3583             {
3584               /* Insert the other_ctype->translit list at *cursor.  */
3585               struct translit_t *endp = other_ctype->translit;
3586               while (endp->next != NULL)
3587                 endp = endp->next;
3588
3589               endp->next = *cursor;
3590               *cursor = other_ctype->translit;
3591
3592               /* Avoid any risk of circular lists.  */
3593               other_ctype->translit = NULL;
3594
3595               cursor = &endp->next;
3596             }
3597
3598           if (ctype->default_missing == NULL)
3599             ctype->default_missing = other_ctype->default_missing;
3600         }
3601     }
3602
3603   return cursor;
3604 }
3605
3606 static void
3607 allocate_arrays (struct locale_ctype_t *ctype, const struct charmap_t *charmap,
3608                  struct repertoire_t *repertoire)
3609 {
3610   size_t idx, nr;
3611   const void *key;
3612   size_t len;
3613   void *vdata;
3614   void *curs;
3615
3616   /* You wonder about this amount of memory?  This is only because some
3617      users do not manage to address the array with unsigned values or
3618      data types with range >= 256.  '\200' would result in the array
3619      index -128.  To help these poor people we duplicate the entries for
3620      128 up to 255 below the entry for \0.  */
3621   ctype->ctype_b = (char_class_t *) xcalloc (256 + 128, sizeof (char_class_t));
3622   ctype->ctype32_b = (char_class32_t *) xcalloc (256, sizeof (char_class32_t));
3623   ctype->class_b = (uint32_t **)
3624     xmalloc (ctype->nr_charclass * sizeof (uint32_t *));
3625   ctype->class_3level = (struct wctype_table *)
3626     xmalloc (ctype->nr_charclass * sizeof (struct wctype_table));
3627
3628   /* This is the array accessed using the multibyte string elements.  */
3629   for (idx = 0; idx < 256; ++idx)
3630     ctype->ctype_b[128 + idx] = ctype->class256_collection[idx];
3631
3632   /* Mirror first 127 entries.  We must take care that entry -1 is not
3633      mirrored because EOF == -1.  */
3634   for (idx = 0; idx < 127; ++idx)
3635     ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
3636
3637   /* The 32 bit array contains all characters < 0x100.  */
3638   for (idx = 0; idx < ctype->class_collection_act; ++idx)
3639     if (ctype->charnames[idx] < 0x100)
3640       ctype->ctype32_b[ctype->charnames[idx]] = ctype->class_collection[idx];
3641
3642   for (nr = 0; nr < ctype->nr_charclass; nr++)
3643     {
3644       ctype->class_b[nr] = (uint32_t *) xcalloc (256 / 32, sizeof (uint32_t));
3645
3646       /* We only set CLASS_B for the bits in the ISO C classes, not
3647          the user defined classes.  The number should not change but
3648          who knows.  */
3649 #define LAST_ISO_C_BIT 11
3650       if (nr <= LAST_ISO_C_BIT)
3651         for (idx = 0; idx < 256; ++idx)
3652           if (ctype->class256_collection[idx] & _ISbit (nr))
3653             ctype->class_b[nr][idx >> 5] |= (uint32_t) 1 << (idx & 0x1f);
3654     }
3655
3656   for (nr = 0; nr < ctype->nr_charclass; nr++)
3657     {
3658       struct wctype_table *t;
3659
3660       t = &ctype->class_3level[nr];
3661       t->p = 4; /* or: 5 */
3662       t->q = 7; /* or: 6 */
3663       wctype_table_init (t);
3664
3665       for (idx = 0; idx < ctype->class_collection_act; ++idx)
3666         if (ctype->class_collection[idx] & _ISwbit (nr))
3667           wctype_table_add (t, ctype->charnames[idx]);
3668
3669       record_verbose (stderr, _("\
3670 %s: table for class \"%s\": %lu bytes"),
3671                       "LC_CTYPE", ctype->classnames[nr],
3672                       (unsigned long int) t->result_size);
3673     }
3674
3675   /* Room for table of mappings.  */
3676   ctype->map_b = (uint32_t **) xmalloc (2 * sizeof (uint32_t *));
3677   ctype->map32_b = (uint32_t **) xmalloc (ctype->map_collection_nr
3678                                           * sizeof (uint32_t *));
3679   ctype->map_3level = (struct wctrans_table *)
3680     xmalloc (ctype->map_collection_nr * sizeof (struct wctrans_table));
3681
3682   /* Fill in all mappings.  */
3683   for (idx = 0; idx < 2; ++idx)
3684     {
3685       unsigned int idx2;
3686
3687       /* Allocate table.  */
3688       ctype->map_b[idx] = (uint32_t *)
3689         xmalloc ((256 + 128) * sizeof (uint32_t));
3690
3691       /* Copy values from collection.  */
3692       for (idx2 = 0; idx2 < 256; ++idx2)
3693         ctype->map_b[idx][128 + idx2] = ctype->map256_collection[idx][idx2];
3694
3695       /* Mirror first 127 entries.  We must take care not to map entry
3696          -1 because EOF == -1.  */
3697       for (idx2 = 0; idx2 < 127; ++idx2)
3698         ctype->map_b[idx][idx2] = ctype->map_b[idx][256 + idx2];
3699
3700       /* EOF must map to EOF.  */
3701       ctype->map_b[idx][127] = EOF;
3702     }
3703
3704   for (idx = 0; idx < ctype->map_collection_nr; ++idx)
3705     {
3706       unsigned int idx2;
3707
3708       /* Allocate table.  */
3709       ctype->map32_b[idx] = (uint32_t *) xmalloc (256 * sizeof (uint32_t));
3710
3711       /* Copy values from collection.  Default is identity mapping.  */
3712       for (idx2 = 0; idx2 < 256; ++idx2)
3713         ctype->map32_b[idx][idx2] =
3714           (ctype->map_collection[idx][idx2] != 0
3715            ? ctype->map_collection[idx][idx2]
3716            : idx2);
3717     }
3718
3719   for (nr = 0; nr < ctype->map_collection_nr; nr++)
3720     {
3721       struct wctrans_table *t;
3722
3723       t = &ctype->map_3level[nr];
3724       t->p = 7;
3725       t->q = 9;
3726       wctrans_table_init (t);
3727
3728       for (idx = 0; idx < ctype->map_collection_act[nr]; ++idx)
3729         if (ctype->map_collection[nr][idx] != 0)
3730           wctrans_table_add (t, ctype->charnames[idx],
3731                              ctype->map_collection[nr][idx]);
3732
3733       record_verbose (stderr, _("\
3734 %s: table for map \"%s\": %lu bytes"),
3735                       "LC_CTYPE", ctype->mapnames[nr],
3736                       (unsigned long int) t->result_size);
3737     }
3738
3739   /* Extra array for class and map names.  */
3740   ctype->class_name_ptr = (uint32_t *) xmalloc (ctype->nr_charclass
3741                                                 * sizeof (uint32_t));
3742   ctype->map_name_ptr = (uint32_t *) xmalloc (ctype->map_collection_nr
3743                                               * sizeof (uint32_t));
3744
3745   ctype->class_offset = _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
3746   ctype->map_offset = ctype->class_offset + ctype->nr_charclass;
3747
3748   /* Array for width information.  Because the expected widths are very
3749      small (never larger than 2) we use only one single byte.  This
3750      saves space.
3751      We put only printable characters in the table.  wcwidth is specified
3752      to return -1 for non-printable characters.  Doing the check here
3753      saves a run-time check.
3754      But we put L'\0' in the table.  This again saves a run-time check.  */
3755   {
3756     struct wcwidth_table *t;
3757
3758     t = &ctype->width;
3759     t->p = 7;
3760     t->q = 9;
3761     wcwidth_table_init (t);
3762
3763     /* First set all the printable characters of the character set to
3764        the default width.  */
3765     curs = NULL;
3766     while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
3767       {
3768         struct charseq *data = (struct charseq *) vdata;
3769
3770         if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
3771           data->ucs4 = repertoire_find_value (ctype->repertoire,
3772                                               data->name, len);
3773
3774         if (data->ucs4 != ILLEGAL_CHAR_VALUE)
3775           {
3776             uint32_t *class_bits =
3777               find_idx (ctype, &ctype->class_collection, NULL,
3778                         &ctype->class_collection_act, data->ucs4);
3779
3780             if (class_bits != NULL && (*class_bits & BITw (tok_print)))
3781               wcwidth_table_add (t, data->ucs4, charmap->width_default);
3782           }
3783       }
3784
3785     /* Now add the explicitly specified widths.  */
3786     if (charmap->width_rules != NULL)
3787       for (size_t cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
3788         {
3789           unsigned char bytes[charmap->mb_cur_max];
3790           int nbytes = charmap->width_rules[cnt].from->nbytes;
3791
3792           /* We have the range of character for which the width is
3793              specified described using byte sequences of the multibyte
3794              charset.  We have to convert this to UCS4 now.  And we
3795              cannot simply convert the beginning and the end of the
3796              sequence, we have to iterate over the byte sequence and
3797              convert it for every single character.  */
3798           memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
3799
3800           while (nbytes < charmap->width_rules[cnt].to->nbytes
3801                  || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
3802                             nbytes) <= 0)
3803             {
3804               /* Find the UCS value for `bytes'.  */
3805               int inner;
3806               uint32_t wch;
3807               struct charseq *seq =
3808                 charmap_find_symbol (charmap, (char *) bytes, nbytes);
3809
3810               if (seq == NULL)
3811                 wch = ILLEGAL_CHAR_VALUE;
3812               else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
3813                 wch = seq->ucs4;
3814               else
3815                 wch = repertoire_find_value (ctype->repertoire, seq->name,
3816                                              strlen (seq->name));
3817
3818               if (wch != ILLEGAL_CHAR_VALUE)
3819                 {
3820                   /* Store the value.  */
3821                   uint32_t *class_bits =
3822                     find_idx (ctype, &ctype->class_collection, NULL,
3823                               &ctype->class_collection_act, wch);
3824
3825                   if (class_bits != NULL && (*class_bits & BITw (tok_print)))
3826                     wcwidth_table_add (t, wch,
3827                                        charmap->width_rules[cnt].width);
3828                 }
3829
3830               /* "Increment" the bytes sequence.  */
3831               inner = nbytes - 1;
3832               while (inner >= 0 && bytes[inner] == 0xff)
3833                 --inner;
3834
3835               if (inner < 0)
3836                 {
3837                   /* We have to extend the byte sequence.  */
3838                   if (nbytes >= charmap->width_rules[cnt].to->nbytes)
3839                     break;
3840
3841                   bytes[0] = 1;
3842                   memset (&bytes[1], 0, nbytes);
3843                   ++nbytes;
3844                 }
3845               else
3846                 {
3847                   ++bytes[inner];
3848                   while (++inner < nbytes)
3849                     bytes[inner] = 0;
3850                 }
3851             }
3852         }
3853
3854     /* Set the width of L'\0' to 0.  */
3855     wcwidth_table_add (t, 0, 0);
3856
3857     record_verbose (stderr, _("%s: table for width: %lu bytes"),
3858                     "LC_CTYPE", (unsigned long int) t->result_size);
3859   }
3860
3861   /* Set MB_CUR_MAX.  */
3862   ctype->mb_cur_max = charmap->mb_cur_max;
3863
3864   /* Now determine the table for the transliteration information.
3865
3866      XXX It is not yet clear to me whether it is worth implementing a
3867      complicated algorithm which uses a hash table to locate the entries.
3868      For now I'll use a simple array which can be searching using binary
3869      search.  */
3870   if (ctype->translit_include != NULL)
3871     /* Traverse the locales mentioned in the `include' statements in a
3872        depth-first way and fold in their transliteration information.  */
3873     translit_flatten (ctype, charmap, &ctype->translit);
3874
3875   if (ctype->translit != NULL)
3876     {
3877       /* First count how many entries we have.  This is the upper limit
3878          since some entries from the included files might be overwritten.  */
3879       size_t number = 0;
3880       struct translit_t *runp = ctype->translit;
3881       struct translit_t **sorted;
3882       size_t from_len, to_len;
3883
3884       while (runp != NULL)
3885         {
3886           ++number;
3887           runp = runp->next;
3888         }
3889
3890       /* Next we allocate an array large enough and fill in the values.  */
3891       sorted = (struct translit_t **) alloca (number
3892                                               * sizeof (struct translit_t **));
3893       runp = ctype->translit;
3894       number = 0;
3895       do
3896         {
3897           /* Search for the place where to insert this string.
3898              XXX Better use a real sorting algorithm later.  */
3899           size_t idx = 0;
3900           int replace = 0;
3901
3902           while (idx < number)
3903             {
3904               int res = wcscmp ((const wchar_t *) sorted[idx]->from,
3905                                 (const wchar_t *) runp->from);
3906               if (res == 0)
3907                 {
3908                   replace = 1;
3909                   break;
3910                 }
3911               if (res > 0)
3912                 break;
3913               ++idx;
3914             }
3915
3916           if (replace)
3917             sorted[idx] = runp;
3918           else
3919             {
3920               memmove (&sorted[idx + 1], &sorted[idx],
3921                        (number - idx) * sizeof (struct translit_t *));
3922               sorted[idx] = runp;
3923               ++number;
3924             }
3925
3926           runp = runp->next;
3927         }
3928       while (runp != NULL);
3929
3930       /* The next step is putting all the possible transliteration
3931          strings in one memory block so that we can write it out.
3932          We need several different blocks:
3933          - index to the from-string array
3934          - from-string array
3935          - index to the to-string array
3936          - to-string array.
3937       */
3938       from_len = to_len = 0;
3939       for (size_t cnt = 0; cnt < number; ++cnt)
3940         {
3941           struct translit_to_t *srunp;
3942           from_len += wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
3943           srunp = sorted[cnt]->to;
3944           while (srunp != NULL)
3945             {
3946               to_len += wcslen ((const wchar_t *) srunp->str) + 1;
3947               srunp = srunp->next;
3948             }
3949           /* Plus one for the extra NUL character marking the end of
3950              the list for the current entry.  */
3951           ++to_len;
3952         }
3953
3954       /* We can allocate the arrays for the results.  */
3955       ctype->translit_from_idx = xmalloc (number * sizeof (uint32_t));
3956       ctype->translit_from_tbl = xmalloc (from_len * sizeof (uint32_t));
3957       ctype->translit_to_idx = xmalloc (number * sizeof (uint32_t));
3958       ctype->translit_to_tbl = xmalloc (to_len * sizeof (uint32_t));
3959
3960       from_len = 0;
3961       to_len = 0;
3962       for (size_t cnt = 0; cnt < number; ++cnt)
3963         {
3964           size_t len;
3965           struct translit_to_t *srunp;
3966
3967           ctype->translit_from_idx[cnt] = from_len;
3968           ctype->translit_to_idx[cnt] = to_len;
3969
3970           len = wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
3971           wmemcpy ((wchar_t *) &ctype->translit_from_tbl[from_len],
3972                    (const wchar_t *) sorted[cnt]->from, len);
3973           from_len += len;
3974
3975           ctype->translit_to_idx[cnt] = to_len;
3976           srunp = sorted[cnt]->to;
3977           while (srunp != NULL)
3978             {
3979               len = wcslen ((const wchar_t *) srunp->str) + 1;
3980               wmemcpy ((wchar_t *) &ctype->translit_to_tbl[to_len],
3981                        (const wchar_t *) srunp->str, len);
3982               to_len += len;
3983               srunp = srunp->next;
3984             }
3985           ctype->translit_to_tbl[to_len++] = L'\0';
3986         }
3987
3988       /* Store the information about the length.  */
3989       ctype->translit_idx_size = number;
3990       ctype->translit_from_tbl_size = from_len * sizeof (uint32_t);
3991       ctype->translit_to_tbl_size = to_len * sizeof (uint32_t);
3992     }
3993   else
3994     {
3995       ctype->translit_from_idx = no_str;
3996       ctype->translit_from_tbl = no_str;
3997       ctype->translit_to_tbl = no_str;
3998       ctype->translit_idx_size = 0;
3999       ctype->translit_from_tbl_size = 0;
4000       ctype->translit_to_tbl_size = 0;
4001     }
4002 }