locale/programs/ld-ctype.c

   1 /* Copyright (C) 1995, 1996, 1997, 1998, 1999 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3    Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Library General Public License as
   7    published by the Free Software Foundation; either version 2 of the
   8    License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Library General Public License for more details.
  14
  15    You should have received a copy of the GNU Library General Public
  16    License along with the GNU C Library; see the file COPYING.LIB.  If not,
  17    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  18    Boston, MA 02111-1307, USA.  */
  19
  20 #ifdef HAVE_CONFIG_H
  21 # include <config.h>
  22 #endif
  23
  24 #include <alloca.h>
  25 #include <byteswap.h>
  26 #include <endian.h>
  27 #include <errno.h>
  28 #include <limits.h>
  29 #include <obstack.h>
  30 #include <stdlib.h>
  31 #include <string.h>
  32 #include <wchar.h>
  33 #include <wctype.h>
  34 #include <sys/uio.h>
  35
  36 #include "charmap.h"
  37 #include "localeinfo.h"
  38 #include "langinfo.h"
  39 #include "linereader.h"
  40 #include "locfile-token.h"
  41 #include "locfile.h"
  42 #include "localedef.h"
  43
  44 #include <assert.h>
  45
  46
  47 /* These are the extra bits not in wctype.h since these are not preallocated
  48    classes.  */
  49 #define _ISwspecial1    (1 << 29)
  50 #define _ISwspecial2    (1 << 30)
  51 #define _ISwspecial3    (1 << 31)
  52
  53
  54 /* The bit used for representing a special class.  */
  55 #define BITPOS(class) ((class) - tok_upper)
  56 #define BIT(class) (_ISbit (BITPOS (class)))
  57 #define BITw(class) (_ISwbit (BITPOS (class)))
  58
  59 #define ELEM(ctype, collection, idx, value)                                   \
  60   *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx,     \
  61              &ctype->collection##_act idx, value)
  62
  63
  64 /* To be compatible with former implementations we for now restrict
  65    the number of bits for character classes to 16.  When compatibility
  66    is not necessary anymore increase the number to 32.  */
  67 #define char_class_t uint16_t
  68 #define char_class32_t uint32_t
  69
  70
  71 /* Type to describe a transliteration action.  We have a possibly
  72    multiple character from-string and a set of multiple character
  73    to-strings.  All are 32bit values since this is what is used in
  74    the gconv functions.  */
  75 struct translit_to_t
  76 {
  77   uint32_t *str;
  78
  79   struct translit_to_t *next;
  80 };
  81
  82 struct translit_t
  83 {
  84   uint32_t *from;
  85
  86   struct translit_to_t *to;
  87
  88   struct translit_t *next;
  89 };
  90
  91
  92 /* The real definition of the struct for the LC_CTYPE locale.  */
  93 struct locale_ctype_t
  94 {
  95   uint32_t *charnames;
  96   size_t charnames_max;
  97   size_t charnames_act;
  98
  99   struct repertoire_t *repertoire;
 100
 101   /* We will allow up to 8 * sizeof (uint32_t) character classes.  */
 102 #define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
 103   size_t nr_charclass;
 104   const char *classnames[MAX_NR_CHARCLASS];
 105   uint32_t last_class_char;
 106   uint32_t class256_collection[256];
 107   uint32_t *class_collection;
 108   size_t class_collection_max;
 109   size_t class_collection_act;
 110   uint32_t class_done;
 111
 112   struct charseq **mbdigits;
 113   size_t mbdigits_act;
 114   size_t mbdigits_max;
 115   uint32_t *wcdigits;
 116   size_t wcdigits_act;
 117   size_t wcdigits_max;
 118
 119   struct charseq *mboutdigits[10];
 120   uint32_t wcoutdigits[10];
 121   size_t outdigits_act;
 122
 123   /* If the following number ever turns out to be too small simply
 124      increase it.  But I doubt it will.  --drepper@gnu */
 125 #define MAX_NR_CHARMAP 16
 126   const char *mapnames[MAX_NR_CHARMAP];
 127   uint32_t *map_collection[MAX_NR_CHARMAP];
 128   uint32_t map256_collection[2][256];
 129   size_t map_collection_max[MAX_NR_CHARMAP];
 130   size_t map_collection_act[MAX_NR_CHARMAP];
 131   size_t map_collection_nr;
 132   size_t last_map_idx;
 133   int tomap_done[MAX_NR_CHARMAP];
 134
 135   /* Transliteration information.  */
 136   const char *translit_copy_locale;
 137   const char *translit_copy_repertoire;
 138   struct translit_t *translit;
 139
 140   /* The arrays for the binary representation.  */
 141   uint32_t plane_size;
 142   uint32_t plane_cnt;
 143   char_class_t *ctype_b;
 144   char_class32_t *ctype32_b;
 145   uint32_t *names;
 146   uint32_t **map;
 147   uint32_t *class_name_ptr;
 148   uint32_t *map_name_ptr;
 149   unsigned char *width;
 150   uint32_t mb_cur_max;
 151   const char *codeset_name;
 152   uint32_t translit_hash_size;
 153   uint32_t translit_hash_layers;
 154   uint32_t *translit_from_idx;
 155   uint32_t *translit_from_tbl;
 156   uint32_t *translit_to_idx;
 157   uint32_t *translit_to_tbl;
 158   size_t translit_idx_size;
 159   size_t translit_from_tbl_size;
 160   size_t translit_to_tbl_size;
 161
 162   struct obstack mem_pool;
 163 };
 164
 165
 166 #define obstack_chunk_alloc xmalloc
 167 #define obstack_chunk_free free
 168
 169
 170 /* Prototypes for local functions.  */
 171 static void ctype_startup (struct linereader *lr, struct localedef_t *locale,
 172                            struct charmap_t *charmap, int ignore_content);
 173 static void ctype_class_new (struct linereader *lr,
 174                              struct locale_ctype_t *ctype, const char *name);
 175 static void ctype_map_new (struct linereader *lr,
 176                            struct locale_ctype_t *ctype,
 177                            const char *name, struct charmap_t *charmap);
 178 static uint32_t *find_idx (struct locale_ctype_t *ctype, uint32_t **table,
 179                            size_t *max, size_t *act, unsigned int idx);
 180 static void set_class_defaults (struct locale_ctype_t *ctype,
 181                                 struct charmap_t *charmap,
 182                                 struct repertoire_t *repertoire);
 183 static void allocate_arrays (struct locale_ctype_t *ctype,
 184                              struct charmap_t *charmap,
 185                              struct repertoire_t *repertoire);
 186
 187
 188 static const char *longnames[] =
 189 {
 190   "zero", "one", "two", "three", "four",
 191   "five", "six", "seven", "eight", "nine"
 192 };
 193 static const unsigned char digits[] = "0123456789";
 194
 195
 196 static void
 197 ctype_startup (struct linereader *lr, struct localedef_t *locale,
 198                struct charmap_t *charmap, int ignore_content)
 199 {
 200   unsigned int cnt;
 201   struct locale_ctype_t *ctype;
 202
 203   if (!ignore_content)
 204     {
 205       /* Allocate the needed room.  */
 206       locale->categories[LC_CTYPE].ctype = ctype =
 207         (struct locale_ctype_t *) xcalloc (1, sizeof (struct locale_ctype_t));
 208
 209       /* We have seen no names yet.  */
 210       ctype->charnames_max = charmap->mb_cur_max == 1 ? 256 : 512;
 211       ctype->charnames =
 212         (unsigned int *) xmalloc (ctype->charnames_max
 213                                   * sizeof (unsigned int));
 214       for (cnt = 0; cnt < 256; ++cnt)
 215         ctype->charnames[cnt] = cnt;
 216       ctype->charnames_act = 256;
 217
 218       /* Fill character class information.  */
 219       ctype->last_class_char = ILLEGAL_CHAR_VALUE;
 220       /* The order of the following instructions determines the bit
 221          positions!  */
 222       ctype_class_new (lr, ctype, "upper");
 223       ctype_class_new (lr, ctype, "lower");
 224       ctype_class_new (lr, ctype, "alpha");
 225       ctype_class_new (lr, ctype, "digit");
 226       ctype_class_new (lr, ctype, "xdigit");
 227       ctype_class_new (lr, ctype, "space");
 228       ctype_class_new (lr, ctype, "print");
 229       ctype_class_new (lr, ctype, "graph");
 230       ctype_class_new (lr, ctype, "blank");
 231       ctype_class_new (lr, ctype, "cntrl");
 232       ctype_class_new (lr, ctype, "punct");
 233       ctype_class_new (lr, ctype, "alnum");
 234       /* The following are extensions from ISO 14652.  */
 235       ctype_class_new (lr, ctype, "left_to_right");
 236       ctype_class_new (lr, ctype, "right_to_left");
 237       ctype_class_new (lr, ctype, "num_terminator");
 238       ctype_class_new (lr, ctype, "num_separator");
 239       ctype_class_new (lr, ctype, "segment_separator");
 240       ctype_class_new (lr, ctype, "block_separator");
 241       ctype_class_new (lr, ctype, "direction_control");
 242       ctype_class_new (lr, ctype, "sym_swap_layout");
 243       ctype_class_new (lr, ctype, "char_shape_selector");
 244       ctype_class_new (lr, ctype, "num_shape_selector");
 245       ctype_class_new (lr, ctype, "non_spacing");
 246       ctype_class_new (lr, ctype, "non_spacing_level3");
 247       ctype_class_new (lr, ctype, "normal_connect");
 248       ctype_class_new (lr, ctype, "r_connect");
 249       ctype_class_new (lr, ctype, "no_connect");
 250       ctype_class_new (lr, ctype, "no_connect-space");
 251       ctype_class_new (lr, ctype, "vowel_connect");
 252
 253       ctype->class_collection_max = charmap->mb_cur_max == 1 ? 256 : 512;
 254       ctype->class_collection
 255         = (uint32_t *) xcalloc (sizeof (unsigned long int),
 256                                 ctype->class_collection_max);
 257       ctype->class_collection_act = 256;
 258
 259       /* Fill character map information.  */
 260       ctype->map_collection_nr = 0;
 261       ctype->last_map_idx = MAX_NR_CHARMAP;
 262       ctype_map_new (lr, ctype, "toupper", charmap);
 263       ctype_map_new (lr, ctype, "tolower", charmap);
 264       ctype_map_new (lr, ctype, "tosymmetric", charmap);
 265
 266       /* Fill first 256 entries in `toXXX' arrays.  */
 267       for (cnt = 0; cnt < 256; ++cnt)
 268         {
 269           ctype->map_collection[0][cnt] = cnt;
 270           ctype->map_collection[1][cnt] = cnt;
 271           ctype->map_collection[2][cnt] = cnt;
 272           ctype->map256_collection[0][cnt] = cnt;
 273           ctype->map256_collection[1][cnt] = cnt;
 274         }
 275
 276       obstack_init (&ctype->mem_pool);
 277     }
 278 }
 279
 280
 281 void
 282 ctype_finish (struct localedef_t *locale, struct charmap_t *charmap)
 283 {
 284   /* See POSIX.2, table 2-6 for the meaning of the following table.  */
 285 #define NCLASS 12
 286   static const struct
 287   {
 288     const char *name;
 289     const char allow[NCLASS];
 290   }
 291   valid_table[NCLASS] =
 292   {
 293     /* The order is important.  See token.h for more information.
 294        M = Always, D = Default, - = Permitted, X = Mutually exclusive  */
 295     { "upper",  "--MX-XDDXXX-" },
 296     { "lower",  "--MX-XDDXXX-" },
 297     { "alpha",  "---X-XDDXXX-" },
 298     { "digit",  "XXX--XDDXXX-" },
 299     { "xdigit", "-----XDDXXX-" },
 300     { "space",  "XXXXX------X" },
 301     { "print",  "---------X--" },
 302     { "graph",  "---------X--" },
 303     { "blank",  "XXXXXM-----X" },
 304     { "cntrl",  "XXXXX-XX--XX" },
 305     { "punct",  "XXXXX-DD-X-X" },
 306     { "alnum",  "-----XDDXXX-" }
 307   };
 308   size_t cnt;
 309   int cls1, cls2;
 310   uint32_t space_value;
 311   struct charseq *space_seq;
 312   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 313   int warned;
 314
 315   /* Now resolve copying and also handle completely missing definitions.  */
 316   if (ctype == NULL)
 317     {
 318       /* First see whether we were supposed to copy.  If yes, find the
 319          actual definition.  */
 320       if (locale->copy_name[LC_CTYPE] != NULL)
 321         {
 322           /* Find the copying locale.  This has to happen transitively since
 323              the locale we are copying from might also copying another one.  */
 324           struct localedef_t *from = locale;
 325
 326           do
 327             from = find_locale (LC_CTYPE, from->copy_name[LC_CTYPE],
 328                                 from->repertoire_name, charmap);
 329           while (from->categories[LC_CTYPE].ctype == NULL
 330                  && from->copy_name[LC_CTYPE] != NULL);
 331
 332           ctype = locale->categories[LC_CTYPE].ctype
 333             = from->categories[LC_CTYPE].ctype;
 334         }
 335
 336       /* If there is still no definition issue an warning and create an
 337          empty one.  */
 338       if (ctype == NULL)
 339         {
 340           error (0, 0, _("No definition for %s category found"), "LC_CTYPE");
 341           ctype_startup (NULL, locale, charmap, 0);
 342           ctype = locale->categories[LC_CTYPE].ctype;
 343         }
 344     }
 345
 346   /* Set default value for classes not specified.  */
 347   set_class_defaults (ctype, charmap, ctype->repertoire);
 348
 349   /* Check according to table.  */
 350   for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
 351     {
 352       uint32_t tmp = ctype->class_collection[cnt];
 353
 354       if (tmp != 0)
 355         {
 356           for (cls1 = 0; cls1 < NCLASS; ++cls1)
 357             if ((tmp & _ISwbit (cls1)) != 0)
 358               for (cls2 = 0; cls2 < NCLASS; ++cls2)
 359                 if (valid_table[cls1].allow[cls2] != '-')
 360                   {
 361                     int eq = (tmp & _ISwbit (cls2)) != 0;
 362                     switch (valid_table[cls1].allow[cls2])
 363                       {
 364                       case 'M':
 365                         if (!eq)
 366                           {
 367                             uint32_t value = ctype->charnames[cnt];
 368
 369                             if (!be_quiet)
 370                               error (0, 0, _("\
 371 character L'\\u%0*x' in class `%s' must be in class `%s'"),
 372                                      value > 0xffff ? 8 : 4, value,
 373                                      valid_table[cls1].name,
 374                                      valid_table[cls2].name);
 375                           }
 376                         break;
 377
 378                       case 'X':
 379                         if (eq)
 380                           {
 381                             uint32_t value = ctype->charnames[cnt];
 382
 383                             if (!be_quiet)
 384                               error (0, 0, _("\
 385 character L'\\u%0*x' in class `%s' must not be in class `%s'"),
 386                                      value > 0xffff ? 8 : 4, value,
 387                                      valid_table[cls1].name,
 388                                      valid_table[cls2].name);
 389                           }
 390                         break;
 391
 392                       case 'D':
 393                         ctype->class_collection[cnt] |= _ISwbit (cls2);
 394                         break;
 395
 396                       default:
 397                         error (5, 0, _("internal error in %s, line %u"),
 398                                __FUNCTION__, __LINE__);
 399                       }
 400                   }
 401         }
 402     }
 403
 404   for (cnt = 0; cnt < 256; ++cnt)
 405     {
 406       uint32_t tmp = ctype->class256_collection[cnt];
 407
 408       if (tmp != 0)
 409         {
 410           for (cls1 = 0; cls1 < NCLASS; ++cls1)
 411             if ((tmp & _ISbit (cls1)) != 0)
 412               for (cls2 = 0; cls2 < NCLASS; ++cls2)
 413                 if (valid_table[cls1].allow[cls2] != '-')
 414                   {
 415                     int eq = (tmp & _ISbit (cls2)) != 0;
 416                     switch (valid_table[cls1].allow[cls2])
 417                       {
 418                       case 'M':
 419                         if (!eq)
 420                           {
 421                             char buf[17];
 422
 423                             sprintf (buf, "\\%o", cnt);
 424
 425                             if (!be_quiet)
 426                               error (0, 0, _("\
 427 character '%s' in class `%s' must be in class `%s'"),
 428                                      buf, valid_table[cls1].name,
 429                                      valid_table[cls2].name);
 430                           }
 431                         break;
 432
 433                       case 'X':
 434                         if (eq)
 435                           {
 436                             char buf[17];
 437
 438                             sprintf (buf, "\\%o", cnt);
 439
 440                             if (!be_quiet)
 441                               error (0, 0, _("\
 442 character '%s' in class `%s' must not be in class `%s'"),
 443                                      buf, valid_table[cls1].name,
 444                                      valid_table[cls2].name);
 445                           }
 446                         break;
 447
 448                       case 'D':
 449                         ctype->class256_collection[cnt] |= _ISbit (cls2);
 450                         break;
 451
 452                       default:
 453                         error (5, 0, _("internal error in %s, line %u"),
 454                                __FUNCTION__, __LINE__);
 455                       }
 456                   }
 457         }
 458     }
 459
 460   /* ... and now test <SP> as a special case.  */
 461   space_value = repertoire_find_value (ctype->repertoire, "SP", 2);
 462   if (space_value == ILLEGAL_CHAR_VALUE)
 463     {
 464       if (!be_quiet)
 465         error (0, 0, _("character <SP> not defined in character map"));
 466     }
 467   else if (((cnt = BITPOS (tok_space),
 468              (ELEM (ctype, class_collection, , space_value)
 469               & BITw (tok_space)) == 0)
 470             || (cnt = BITPOS (tok_blank),
 471                 (ELEM (ctype, class_collection, , space_value)
 472                  & BITw (tok_blank)) == 0)))
 473     {
 474       if (!be_quiet)
 475         error (0, 0, _("<SP> character not in class `%s'"),
 476                valid_table[cnt].name);
 477     }
 478   else if (((cnt = BITPOS (tok_punct),
 479              (ELEM (ctype, class_collection, , space_value)
 480               & BITw (tok_punct)) != 0)
 481             || (cnt = BITPOS (tok_graph),
 482                 (ELEM (ctype, class_collection, , space_value)
 483                  & BITw (tok_graph))
 484                 != 0)))
 485     {
 486       if (!be_quiet)
 487         error (0, 0, _("<SP> character must not be in class `%s'"),
 488                valid_table[cnt].name);
 489     }
 490   else
 491     ELEM (ctype, class_collection, , space_value) |= BITw (tok_print);
 492
 493   space_seq = charmap_find_value (charmap, "SP", 2);
 494   if (space_seq == NULL || space_seq->nbytes != 1)
 495     {
 496       if (!be_quiet)
 497         error (0, 0, _("character <SP> not defined in character map"));
 498     }
 499   else if (((cnt = BITPOS (tok_space),
 500              (ctype->class256_collection[space_seq->bytes[0]]
 501               & BIT (tok_space)) == 0)
 502             || (cnt = BITPOS (tok_blank),
 503                 (ctype->class256_collection[space_seq->bytes[0]]
 504                  & BIT (tok_blank)) == 0)))
 505     {
 506       if (!be_quiet)
 507         error (0, 0, _("<SP> character not in class `%s'"),
 508                valid_table[cnt].name);
 509     }
 510   else if (((cnt = BITPOS (tok_punct),
 511              (ctype->class256_collection[space_seq->bytes[0]]
 512               & BIT (tok_punct)) != 0)
 513             || (cnt = BITPOS (tok_graph),
 514                 (ctype->class256_collection[space_seq->bytes[0]]
 515                  & BIT (tok_graph)) != 0)))
 516     {
 517       if (!be_quiet)
 518         error (0, 0, _("<SP> character must not be in class `%s'"),
 519                valid_table[cnt].name);
 520     }
 521   else
 522     ctype->class256_collection[space_seq->bytes[0]] |= BIT (tok_print);
 523
 524   /* Now that the tests are done make sure the name array contains all
 525      characters which are handled in the WIDTH section of the
 526      character set definition file.  */
 527   if (charmap->width_rules != NULL)
 528     for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
 529       {
 530         unsigned char bytes[charmap->mb_cur_max];
 531         int nbytes = charmap->width_rules[cnt].from->nbytes;
 532
 533         /* We have the range of character for which the width is
 534            specified described using byte sequences of the multibyte
 535            charset.  We have to convert this to UCS4 now.  And we
 536            cannot simply convert the beginning and the end of the
 537            sequence, we have to iterate over the byte sequence and
 538            convert it for every single character.  */
 539         memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
 540
 541         while (nbytes < charmap->width_rules[cnt].to->nbytes
 542                || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
 543                           nbytes) <= 0)
 544           {
 545             /* Find the UCS value for `bytes'.  */
 546             uint32_t wch = repertoire_find_value (ctype->repertoire, bytes,
 547                                                   nbytes);
 548             int inner;
 549
 550             if (wch != ILLEGAL_CHAR_VALUE)
 551               /* We are only interested in the side-effects of the
 552                  `find_idx' call.  It will add appropriate entries in
 553                  the name array if this is necessary.  */
 554               (void) find_idx (ctype, NULL, NULL, NULL, wch);
 555
 556             /* "Increment" the bytes sequence.  */
 557             inner = nbytes - 1;
 558             while (inner >= 0 && bytes[inner] == 0xff)
 559               --inner;
 560
 561             if (inner < 0)
 562               {
 563                 /* We have to extend the byte sequence.  */
 564                 if (nbytes >= charmap->width_rules[cnt].to->nbytes)
 565                   break;
 566
 567                 bytes[0] = 1;
 568                 memset (&bytes[1], 0, nbytes);
 569                 ++nbytes;
 570               }
 571             else
 572               {
 573                 ++bytes[inner];
 574                 while (++inner < nbytes)
 575                   bytes[inner] = 0;
 576               }
 577           }
 578       }
 579
 580   /* There must be a multiple of 10 digits.  */
 581   if (ctype->mbdigits_act % 10 != 0)
 582     {
 583       assert (ctype->mbdigits_act == ctype->wcdigits_act);
 584       ctype->wcdigits_act -= ctype->mbdigits_act % 10;
 585       ctype->mbdigits_act -= ctype->mbdigits_act % 10;
 586       error (0, 0, _("`digit' category has not entries in groups of ten"));
 587     }
 588
 589   /* Check the input digits.  There must be a multiple of ten available.
 590      In each group it could be that one or the other character is missing.
 591      In this case the whole group must be removed.  */
 592   cnt = 0;
 593   while (cnt < ctype->mbdigits_act)
 594     {
 595       size_t inner;
 596       for (inner = 0; inner < 10; ++inner)
 597         if (ctype->mbdigits[cnt + inner] == NULL)
 598           break;
 599
 600       if (inner == 10)
 601         cnt += 10;
 602       else
 603         {
 604           /* Remove the group.  */
 605           memmove (&ctype->mbdigits[cnt], &ctype->mbdigits[cnt + 10],
 606                    ((ctype->wcdigits_act - cnt - 10)
 607                     * sizeof (ctype->mbdigits[0])));
 608           ctype->mbdigits_act -= 10;
 609         }
 610     }
 611
 612   /* If no input digits are given use the default.  */
 613   if (ctype->mbdigits_act == 0)
 614     {
 615       if (ctype->mbdigits_max == 0)
 616         {
 617           ctype->mbdigits = obstack_alloc (&charmap->mem_pool,
 618                                            10 * sizeof (struct charseq *));
 619           ctype->mbdigits_max = 10;
 620         }
 621
 622       for (cnt = 0; cnt < 10; ++cnt)
 623         {
 624           ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
 625                                                       digits + cnt, 1);
 626           if (ctype->mbdigits[cnt] == NULL)
 627             {
 628               ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
 629                                                           longnames[cnt],
 630                                                           strlen (longnames[cnt]));
 631               if (ctype->mbdigits[cnt] == NULL)
 632                 {
 633                   /* Hum, this ain't good.  */
 634                   error (0, 0, _("\
 635 no input digits defined and none of the standard names in the charmap"));
 636
 637                   ctype->mbdigits[cnt] = obstack_alloc (&charmap->mem_pool,
 638                                                         sizeof (struct charseq) + 1);
 639
 640                   /* This is better than nothing.  */
 641                   ctype->mbdigits[cnt]->bytes[0] = digits[cnt];
 642                   ctype->mbdigits[cnt]->nbytes = 1;
 643                 }
 644             }
 645         }
 646
 647       ctype->mbdigits_act = 10;
 648     }
 649
 650   /* Check the wide character input digits.  There must be a multiple
 651      of ten available.  In each group it could be that one or the other
 652      character is missing.  In this case the whole group must be
 653      removed.  */
 654   cnt = 0;
 655   while (cnt < ctype->wcdigits_act)
 656     {
 657       size_t inner;
 658       for (inner = 0; inner < 10; ++inner)
 659         if (ctype->wcdigits[cnt + inner] == ILLEGAL_CHAR_VALUE)
 660           break;
 661
 662       if (inner == 10)
 663         cnt += 10;
 664       else
 665         {
 666           /* Remove the group.  */
 667           memmove (&ctype->wcdigits[cnt], &ctype->wcdigits[cnt + 10],
 668                    ((ctype->wcdigits_act - cnt - 10)
 669                     * sizeof (ctype->wcdigits[0])));
 670           ctype->wcdigits_act -= 10;
 671         }
 672     }
 673
 674   /* If no input digits are given use the default.  */
 675   if (ctype->wcdigits_act == 0)
 676     {
 677       if (ctype->wcdigits_max == 0)
 678         {
 679           ctype->wcdigits = obstack_alloc (&charmap->mem_pool,
 680                                            10 * sizeof (uint32_t));
 681           ctype->wcdigits_max = 10;
 682         }
 683
 684       for (cnt = 0; cnt < 10; ++cnt)
 685         ctype->wcdigits[cnt] = L'0' + cnt;
 686
 687       ctype->mbdigits_act = 10;
 688     }
 689
 690   /* Check the outdigits.  */
 691   warned = 0;
 692   for (cnt = 0; cnt < 10; ++cnt)
 693     if (ctype->mboutdigits[cnt] == NULL)
 694       {
 695         static struct charseq replace[2];
 696
 697         if (!warned)
 698           {
 699             error (0, 0, _("\
 700 not all characters used in `outdigit' are available in the charmap"));
 701             warned = 1;
 702           }
 703
 704         replace[0].nbytes = 1;
 705         replace[0].bytes[0] = '?';
 706         replace[0].bytes[1] = '\0';
 707         ctype->mboutdigits[cnt] = &replace[0];
 708       }
 709
 710   warned = 0;
 711   for (cnt = 0; cnt < 10; ++cnt)
 712     if (ctype->wcoutdigits[cnt] == 0)
 713       {
 714         if (!warned)
 715           {
 716             error (0, 0, _("\
 717 not all characters used in `outdigit' are available in the repertoire"));
 718             warned = 1;
 719           }
 720
 721         ctype->wcoutdigits[cnt] = L'?';
 722       }
 723 }
 724
 725
 726 void
 727 ctype_output (struct localedef_t *locale, struct charmap_t *charmap,
 728               const char *output_path)
 729 {
 730   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 731   const size_t nelems = (_NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)
 732                          + (ctype->map_collection_nr - 2));
 733   struct iovec iov[2 + nelems + ctype->nr_charclass
 734                   + ctype->map_collection_nr];
 735   struct locale_file data;
 736   uint32_t idx[nelems + 1];
 737   size_t elem, cnt, offset, total;
 738   char *cp;
 739
 740   /* Now prepare the output: Find the sizes of the table we can use.  */
 741   allocate_arrays (ctype, charmap, ctype->repertoire);
 742
 743   data.magic = LIMAGIC (LC_CTYPE);
 744   data.n = nelems;
 745   iov[0].iov_base = (void *) &data;
 746   iov[0].iov_len = sizeof (data);
 747
 748   iov[1].iov_base = (void *) idx;
 749   iov[1].iov_len = sizeof (idx);
 750
 751   idx[0] = iov[0].iov_len + iov[1].iov_len;
 752   offset = 0;
 753
 754   for (elem = 0; elem < nelems; ++elem)
 755     {
 756       if (elem < _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE))
 757         switch (elem)
 758           {
 759 #define CTYPE_DATA(name, base, len)                                           \
 760           case _NL_ITEM_INDEX (name):                                         \
 761             iov[2 + elem + offset].iov_base = (base);                         \
 762             iov[2 + elem + offset].iov_len = (len);                           \
 763             if (elem + 1 < nelems)                                            \
 764               idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;     \
 765             break
 766
 767           CTYPE_DATA (_NL_CTYPE_CLASS,
 768                       ctype->ctype_b,
 769                       (256 + 128) * sizeof (char_class_t));
 770
 771           CTYPE_DATA (_NL_CTYPE_TOUPPER,
 772                       ctype->map[0],
 773                       (ctype->plane_size * ctype->plane_cnt + 128)
 774                       * sizeof (uint32_t));
 775           CTYPE_DATA (_NL_CTYPE_TOLOWER,
 776                       ctype->map[1],
 777                       (ctype->plane_size * ctype->plane_cnt + 128)
 778                       * sizeof (uint32_t));
 779
 780           CTYPE_DATA (_NL_CTYPE_CLASS32,
 781                       ctype->ctype32_b,
 782                       (ctype->plane_size * ctype->plane_cnt
 783                        * sizeof (char_class32_t)));
 784
 785           CTYPE_DATA (_NL_CTYPE_NAMES,
 786                       ctype->names, (ctype->plane_size * ctype->plane_cnt
 787                                      * sizeof (uint32_t)));
 788
 789           CTYPE_DATA (_NL_CTYPE_TRANSLIT_HASH_SIZE,
 790                       &ctype->translit_hash_size, sizeof (uint32_t));
 791           CTYPE_DATA (_NL_CTYPE_TRANSLIT_HASH_LAYERS,
 792                       &ctype->translit_hash_layers, sizeof (uint32_t));
 793
 794           CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_IDX,
 795                       ctype->translit_from_idx,
 796                       ctype->translit_idx_size);
 797
 798           CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_TBL,
 799                       ctype->translit_from_tbl,
 800                       ctype->translit_from_tbl_size);
 801
 802           CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_IDX,
 803                       ctype->translit_to_idx,
 804                       ctype->translit_idx_size);
 805
 806           CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_TBL,
 807                       ctype->translit_to_tbl, ctype->translit_to_tbl_size);
 808
 809           CTYPE_DATA (_NL_CTYPE_HASH_SIZE,
 810                       &ctype->plane_size, sizeof (uint32_t));
 811           CTYPE_DATA (_NL_CTYPE_HASH_LAYERS,
 812                       &ctype->plane_cnt, sizeof (uint32_t));
 813
 814           case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
 815             /* The class name array.  */
 816             total = 0;
 817             for (cnt = 0; cnt < ctype->nr_charclass; ++cnt, ++offset)
 818               {
 819                 iov[2 + elem + offset].iov_base
 820                   = (void *) ctype->classnames[cnt];
 821                 iov[2 + elem + offset].iov_len
 822                   = strlen (ctype->classnames[cnt]) + 1;
 823                 total += iov[2 + elem + offset].iov_len;
 824               }
 825             iov[2 + elem + offset].iov_base = (void *) "\0\0\0";
 826             iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
 827             total += 1 + (4 - ((total + 1) % 4));
 828
 829             idx[elem + 1] = idx[elem] + total;
 830             break;
 831
 832           case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
 833             /* The class name array.  */
 834             total = 0;
 835             for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt, ++offset)
 836               {
 837                 iov[2 + elem + offset].iov_base
 838                   = (void *) ctype->mapnames[cnt];
 839                 iov[2 + elem + offset].iov_len
 840                   = strlen (ctype->mapnames[cnt]) + 1;
 841                 total += iov[2 + elem + offset].iov_len;
 842               }
 843             iov[2 + elem + offset].iov_base = (void *) "\0\0\0";
 844             iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
 845             total += 1 + (4 - ((total + 1) % 4));
 846
 847             idx[elem + 1] = idx[elem] + total;
 848             break;
 849
 850           CTYPE_DATA (_NL_CTYPE_WIDTH,
 851                       ctype->width, ctype->plane_size * ctype->plane_cnt);
 852
 853           CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX,
 854                       &ctype->mb_cur_max, sizeof (uint32_t));
 855
 856           case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
 857             total = strlen (ctype->codeset_name) + 1;
 858             if (total % 4 == 0)
 859               iov[2 + elem + offset].iov_base = (char *) ctype->codeset_name;
 860             else
 861               {
 862                 iov[2 + elem + offset].iov_base = alloca ((total + 3) & ~3);
 863                 memset (mempcpy (iov[2 + elem + offset].iov_base,
 864                                  ctype->codeset_name, total),
 865                         '\0', 4 - (total & 3));
 866                 total = (total + 3) & ~3;
 867               }
 868             iov[2 + elem + offset].iov_len = total;
 869             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
 870             break;
 871
 872           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN):
 873             iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
 874             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
 875             *(uint32_t *) iov[2 + elem + offset].iov_base =
 876               ctype->mbdigits_act / 10;
 877             idx[elem + 1] = idx[elem] + sizeof (uint32_t);
 878             break;
 879
 880           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN):
 881             iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
 882             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
 883             *(uint32_t *) iov[2 + elem + offset].iov_base =
 884               ctype->wcdigits_act / 10;
 885             idx[elem + 1] = idx[elem] + sizeof (uint32_t);
 886             break;
 887
 888           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB):
 889             /* Compute the length of all possible characters.  For INDIGITS
 890                there might be more than one.  We simply concatenate all of
 891                them with a NUL byte following.  The NUL byte wouldn't be
 892                necessary but it makes it easier for the user.  */
 893             total = 0;
 894             for (cnt = elem - _NL_CTYPE_INDIGITS0_MB;
 895                  cnt < ctype->mbdigits_act; cnt += 10)
 896               total += ctype->mbdigits[cnt]->nbytes + 1;
 897             iov[2 + elem + offset].iov_base = (char *) alloca (total);
 898             iov[2 + elem + offset].iov_len = total;
 899
 900             cp = iov[2 + elem + offset].iov_base;
 901             for (cnt = elem - _NL_CTYPE_INDIGITS0_MB;
 902                  cnt < ctype->mbdigits_act; cnt += 10)
 903               {
 904                 cp = mempcpy (cp, ctype->mbdigits[cnt]->bytes,
 905                               ctype->mbdigits[cnt]->nbytes);
 906                 *cp++ = '\0';
 907               }
 908             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
 909             break;
 910
 911           case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB):
 912             /* Compute the length of all possible characters.  For INDIGITS
 913                there might be more than one.  We simply concatenate all of
 914                them with a NUL byte following.  The NUL byte wouldn't be
 915                necessary but it makes it easier for the user.  */
 916             cnt = elem - _NL_CTYPE_OUTDIGIT0_MB;
 917             total = ctype->mboutdigits[cnt]->nbytes + 1;
 918             iov[2 + elem + offset].iov_base = (char *) alloca (total);
 919             iov[2 + elem + offset].iov_len = total;
 920
 921             *(char *) mempcpy (iov[2 + elem + offset].iov_base,
 922                                ctype->mbdigits[cnt]->bytes,
 923                                ctype->mbdigits[cnt]->nbytes) = '\0';
 924             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
 925             break;
 926
 927           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC):
 928             total = ctype->wcdigits_act / 10;
 929
 930             iov[2 + elem + offset].iov_base =
 931               (uint32_t *) alloca (total * sizeof (uint32_t));
 932             iov[2 + elem + offset].iov_len = total * sizeof (uint32_t);
 933
 934             for (cnt = elem - _NL_CTYPE_INDIGITS0_WC;
 935                  cnt < ctype->wcdigits_act; cnt += 10)
 936               ((uint32_t *) iov[2 + elem + offset].iov_base)[cnt / 10]
 937                 = ctype->wcdigits[cnt];
 938             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
 939             break;
 940
 941           case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC):
 942             cnt = elem - _NL_CTYPE_OUTDIGIT0_WC;
 943             iov[2 + elem + offset].iov_base = &ctype->wcoutdigits[cnt];
 944             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
 945             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
 946             break;
 947
 948           default:
 949             assert (! "unknown CTYPE element");
 950           }
 951       else
 952         {
 953           /* Handle extra maps.  */
 954           size_t nr = (elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)) + 2;
 955
 956           iov[2 + elem + offset].iov_base = ctype->map[nr];
 957           iov[2 + elem + offset].iov_len = ((ctype->plane_size
 958                                              * ctype->plane_cnt + 128)
 959                                             * sizeof (uint32_t));
 960
 961           idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
 962         }
 963     }
 964
 965   assert (2 + elem + offset == (nelems + ctype->nr_charclass
 966                                 + ctype->map_collection_nr + 2));
 967
 968   write_locale_data (output_path, "LC_CTYPE", 2 + elem + offset, iov);
 969 }
 970
 971
 972 /* Local functions.  */
 973 static void
 974 ctype_class_new (struct linereader *lr, struct locale_ctype_t *ctype,
 975                  const char *name)
 976 {
 977   size_t cnt;
 978
 979   for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
 980     if (strcmp (ctype->classnames[cnt], name) == 0)
 981       break;
 982
 983   if (cnt < ctype->nr_charclass)
 984     {
 985       lr_error (lr, _("character class `%s' already defined"), name);
 986       return;
 987     }
 988
 989   if (ctype->nr_charclass == MAX_NR_CHARCLASS)
 990     /* Exit code 2 is prescribed in P1003.2b.  */
 991     error (2, 0, _("\
 992 implementation limit: no more than %d character classes allowed"),
 993            MAX_NR_CHARCLASS);
 994
 995   ctype->classnames[ctype->nr_charclass++] = name;
 996 }
 997
 998
 999 static void
1000 ctype_map_new (struct linereader *lr, struct locale_ctype_t *ctype,
1001                const char *name, struct charmap_t *charmap)
1002 {
1003   size_t max_chars = 0;
1004   size_t cnt;
1005
1006   for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
1007     {
1008       if (strcmp (ctype->mapnames[cnt], name) == 0)
1009         break;
1010
1011       if (max_chars < ctype->map_collection_max[cnt])
1012         max_chars = ctype->map_collection_max[cnt];
1013     }
1014
1015   if (cnt < ctype->map_collection_nr)
1016     {
1017       lr_error (lr, _("character map `%s' already defined"), name);
1018       return;
1019     }
1020
1021   if (ctype->map_collection_nr == MAX_NR_CHARMAP)
1022     /* Exit code 2 is prescribed in P1003.2b.  */
1023     error (2, 0, _("\
1024 implementation limit: no more than %d character maps allowed"),
1025            MAX_NR_CHARMAP);
1026
1027   ctype->mapnames[cnt] = name;
1028
1029   if (max_chars == 0)
1030     ctype->map_collection_max[cnt] = charmap->mb_cur_max == 1 ? 256 : 512;
1031   else
1032     ctype->map_collection_max[cnt] = max_chars;
1033
1034   ctype->map_collection[cnt] = (uint32_t *)
1035     xmalloc (sizeof (uint32_t) * ctype->map_collection_max[cnt]);
1036   memset (ctype->map_collection[cnt], '\0',
1037           sizeof (uint32_t) * ctype->map_collection_max[cnt]);
1038   ctype->map_collection_act[cnt] = 256;
1039
1040   ++ctype->map_collection_nr;
1041 }
1042
1043
1044 /* We have to be prepared that TABLE, MAX, and ACT can be NULL.  This
1045    is possible if we only want to extend the name array.  */
1046 static uint32_t *
1047 find_idx (struct locale_ctype_t *ctype, uint32_t **table, size_t *max,
1048           size_t *act, uint32_t idx)
1049 {
1050   size_t cnt;
1051
1052   if (idx < 256)
1053     return table == NULL ? NULL : &(*table)[idx];
1054
1055   for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
1056     if (ctype->charnames[cnt] == idx)
1057       break;
1058
1059   /* We have to distinguish two cases: the name is found or not.  */
1060   if (cnt == ctype->charnames_act)
1061     {
1062       /* Extend the name array.  */
1063       if (ctype->charnames_act == ctype->charnames_max)
1064         {
1065           ctype->charnames_max *= 2;
1066           ctype->charnames = (unsigned int *)
1067             xrealloc (ctype->charnames,
1068                       sizeof (unsigned int) * ctype->charnames_max);
1069         }
1070       ctype->charnames[ctype->charnames_act++] = idx;
1071     }
1072
1073   if (table == NULL)
1074     /* We have done everything we are asked to do.  */
1075     return NULL;
1076
1077   if (cnt >= *act)
1078     {
1079       if (cnt >= *max)
1080         {
1081           size_t old_max = *max;
1082           do
1083             *max *= 2;
1084           while (*max <= cnt);
1085
1086           *table =
1087             (uint32_t *) xrealloc (*table, *max * sizeof (unsigned long int));
1088           memset (&(*table)[old_max], '\0',
1089                   (*max - old_max) * sizeof (uint32_t));
1090         }
1091
1092       *act = cnt;
1093     }
1094
1095   return &(*table)[cnt];
1096 }
1097
1098
1099 static int
1100 get_character (struct token *now, struct charmap_t *charmap,
1101                struct repertoire_t *repertoire,
1102                struct charseq **seqp, uint32_t *wchp)
1103 {
1104   if (now->tok == tok_bsymbol)
1105     {
1106       /* This will hopefully be the normal case.  */
1107       *wchp = repertoire_find_value (repertoire, now->val.str.startmb,
1108                                      now->val.str.lenmb);
1109       *seqp = charmap_find_value (charmap, now->val.str.startmb,
1110                                   now->val.str.lenmb);
1111     }
1112   else if (now->tok == tok_ucs4)
1113     {
1114       *seqp = repertoire_find_seq (repertoire, now->val.ucs4);
1115
1116       if (*seqp == NULL)
1117         {
1118           /* Compute the value in the charmap from the UCS value.  */
1119           const char *symbol = repertoire_find_symbol (repertoire,
1120                                                        now->val.ucs4);
1121
1122           if (symbol == NULL)
1123             *seqp = NULL;
1124           else
1125             *seqp = charmap_find_value (charmap, symbol, strlen (symbol));
1126
1127           if (*seqp == NULL)
1128             {
1129               /* Insert a negative entry.  */
1130               static const struct charseq negative
1131                 = { .ucs4 = ILLEGAL_CHAR_VALUE };
1132               uint32_t *newp = obstack_alloc (&repertoire->mem_pool, 4);
1133               *newp = now->val.ucs4;
1134
1135               insert_entry (&repertoire->seq_table, newp, 4,
1136                             (void *) &negative);
1137             }
1138           else
1139             (*seqp)->ucs4 = now->val.ucs4;
1140         }
1141       else if ((*seqp)->ucs4 != now->val.ucs4)
1142         *seqp = NULL;
1143
1144       *wchp = now->val.ucs4;
1145     }
1146   else if (now->tok == tok_charcode)
1147     {
1148       /* We must map from the byte code to UCS4.  */
1149       *seqp = charmap_find_symbol (charmap, now->val.str.startmb,
1150                                    now->val.str.lenmb);
1151
1152       if (*seqp == NULL)
1153         *wchp = ILLEGAL_CHAR_VALUE;
1154       else
1155         {
1156           if ((*seqp)->ucs4 == UNINITIALIZED_CHAR_VALUE)
1157             (*seqp)->ucs4 = repertoire_find_value (repertoire, (*seqp)->name,
1158                                                    strlen ((*seqp)->name));
1159           *wchp = (*seqp)->ucs4;
1160         }
1161     }
1162   else
1163     return 1;
1164
1165   return 0;
1166 }
1167
1168
1169 /* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>'.  */
1170 static void
1171 charclass_symbolic_ellipsis (struct linereader *ldfile,
1172                              struct locale_ctype_t *ctype,
1173                              struct charmap_t *charmap,
1174                              struct repertoire_t *repertoire,
1175                              struct token *now,
1176                              const char *last_str,
1177                              unsigned long int class256_bit,
1178                              unsigned long int class_bit, int base,
1179                              int ignore_content, int handle_digits)
1180 {
1181   const char *nowstr = now->val.str.startmb;
1182   char tmp[now->val.str.lenmb + 1];
1183   const char *cp;
1184   char *endp;
1185   unsigned long int from;
1186   unsigned long int to;
1187
1188   /* We have to compute the ellipsis values using the symbolic names.  */
1189   assert (last_str != NULL);
1190
1191   if (strlen (last_str) != now->val.str.lenmb)
1192     {
1193     invalid_range:
1194       lr_error (ldfile,
1195                 _("`%s' and `%.*s' are no valid names for symbolic range"),
1196                 last_str, now->val.str.lenmb, nowstr);
1197       return;
1198     }
1199
1200   if (memcmp (last_str, nowstr, now->val.str.lenmb) == 0)
1201     /* Nothing to do, the names are the same.  */
1202     return;
1203
1204   for (cp = last_str; *cp == *(nowstr + (cp - last_str)); ++cp)
1205     ;
1206
1207   errno = 0;
1208   from = strtoul (cp, &endp, base);
1209   if ((from == UINT_MAX && errno == ERANGE) || *endp != '\0')
1210     goto invalid_range;
1211
1212   to = strtoul (nowstr + (cp - last_str), &endp, base);
1213   if ((to == UINT_MAX && errno == ERANGE)
1214       || (endp - nowstr) != now->val.str.lenmb || from >= to)
1215     goto invalid_range;
1216
1217   /* OK, we have a range FROM - TO.  Now we can create the symbolic names.  */
1218   if (!ignore_content)
1219     {
1220       now->val.str.startmb = tmp;
1221       while (++from <= to)
1222         {
1223           struct charseq *seq;
1224           uint32_t wch;
1225
1226           sprintf (tmp, (base == 10 ? "%.*s%0*d" : "%.*s%0*X"), cp - last_str,
1227                    last_str, now->val.str.lenmb - (cp - last_str), from);
1228
1229           get_character (now, charmap, repertoire, &seq, &wch);
1230
1231           if (seq != NULL && seq->nbytes == 1)
1232             /* Yep, we can store information about this byte sequence.  */
1233             ctype->class256_collection[seq->bytes[0]] |= class256_bit;
1234
1235           if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1236             /* We have the UCS4 position.  */
1237             *find_idx (ctype, &ctype->class_collection,
1238                        &ctype->class_collection_max,
1239                        &ctype->class_collection_act, wch) |= class_bit;
1240
1241           if (handle_digits == 1)
1242             {
1243               /* We must store the digit values.  */
1244               if (ctype->mbdigits_act == ctype->mbdigits_max)
1245                 {
1246                   ctype->mbdigits_max *= 2;
1247                   ctype->mbdigits = xrealloc (ctype->mbdigits,
1248                                               (ctype->mbdigits_max
1249                                                * sizeof (char *)));
1250                   ctype->wcdigits_max *= 2;
1251                   ctype->wcdigits = xrealloc (ctype->wcdigits,
1252                                               (ctype->wcdigits_max
1253                                                * sizeof (uint32_t)));
1254                 }
1255
1256               ctype->mbdigits[ctype->mbdigits_act++] = seq;
1257               ctype->wcdigits[ctype->wcdigits_act++] = wch;
1258             }
1259           else if (handle_digits == 2)
1260             {
1261               /* We must store the digit values.  */
1262               if (ctype->outdigits_act >= 10)
1263                 {
1264                   lr_error (ldfile, _("\
1265 %s: field `%s' does not contain exactly ten entries"),
1266                             "LC_CTYPE", "outdigit");
1267                   return;
1268                 }
1269
1270               ctype->mboutdigits[ctype->outdigits_act] = seq;
1271               ctype->wcoutdigits[ctype->outdigits_act] = wch;
1272               ++ctype->outdigits_act;
1273             }
1274         }
1275     }
1276 }
1277
1278
1279 /* Ellipsis like in `<U1234>..<U2345>'.  */
1280 static void
1281 charclass_ucs4_ellipsis (struct linereader *ldfile,
1282                          struct locale_ctype_t *ctype,
1283                          struct charmap_t *charmap,
1284                          struct repertoire_t *repertoire,
1285                          struct token *now, uint32_t last_wch,
1286                          unsigned long int class256_bit,
1287                          unsigned long int class_bit, int ignore_content,
1288                          int handle_digits)
1289 {
1290   if (last_wch > now->val.ucs4)
1291     {
1292       lr_error (ldfile, _("\
1293 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1294                 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, now->val.ucs4,
1295                 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, last_wch);
1296       return;
1297     }
1298
1299   if (!ignore_content)
1300     while (++last_wch <= now->val.ucs4)
1301       {
1302         /* We have to find out whether there is a byte sequence corresponding
1303            to this UCS4 value.  */
1304         struct charseq *seq = repertoire_find_seq (repertoire, last_wch);
1305
1306         /* If this is the first time we look for this sequence create a new
1307            entry.  */
1308         if (seq == NULL)
1309           {
1310             /* Find the symbolic name for this UCS4 value.  */
1311             const char *symbol = repertoire_find_symbol (repertoire, last_wch);
1312             uint32_t *newp = obstack_alloc (&repertoire->mem_pool, 4);
1313             *newp = last_wch;
1314
1315             if (symbol != NULL)
1316               /* We have a name, now search the multibyte value.  */
1317               seq = charmap_find_value (charmap, symbol, strlen (symbol));
1318
1319             if (seq == NULL)
1320               {
1321                 /* We have to create a fake entry.  */
1322                 static const struct charseq negative
1323                   = { .ucs4 = ILLEGAL_CHAR_VALUE };
1324                 seq = (struct charseq *) &negative;
1325               }
1326             else
1327               seq->ucs4 = last_wch;
1328
1329             insert_entry (&repertoire->seq_table, newp, 4, seq);
1330           }
1331
1332         /* We have a name, now search the multibyte value.  */
1333         if (seq->ucs4 == last_wch && seq->nbytes == 1)
1334           /* Yep, we can store information about this byte sequence.  */
1335           ctype->class256_collection[(size_t) seq->bytes[0]]
1336             |= class256_bit;
1337
1338         /* And of course we have the UCS4 position.  */
1339         if (class_bit != 0 && class_bit != 0)
1340           *find_idx (ctype, &ctype->class_collection,
1341                      &ctype->class_collection_max,
1342                      &ctype->class_collection_act, last_wch) |= class_bit;
1343
1344         if (handle_digits == 1)
1345           {
1346             /* We must store the digit values.  */
1347             if (ctype->mbdigits_act == ctype->mbdigits_max)
1348               {
1349                 ctype->mbdigits_max *= 2;
1350                 ctype->mbdigits = xrealloc (ctype->mbdigits,
1351                                             (ctype->mbdigits_max
1352                                              * sizeof (char *)));
1353                 ctype->wcdigits_max *= 2;
1354                 ctype->wcdigits = xrealloc (ctype->wcdigits,
1355                                             (ctype->wcdigits_max
1356                                              * sizeof (uint32_t)));
1357               }
1358
1359             ctype->mbdigits[ctype->mbdigits_act++] = (seq->ucs4 == last_wch
1360                                                       ? seq : NULL);
1361             ctype->wcdigits[ctype->wcdigits_act++] = last_wch;
1362           }
1363         else if (handle_digits == 2)
1364           {
1365             /* We must store the digit values.  */
1366             if (ctype->outdigits_act >= 10)
1367               {
1368                 lr_error (ldfile, _("\
1369 %s: field `%s' does not contain exactly ten entries"),
1370                           "LC_CTYPE", "outdigit");
1371                 return;
1372               }
1373
1374             ctype->mboutdigits[ctype->outdigits_act] = (seq->ucs4 == last_wch
1375                                                         ? seq : NULL);
1376             ctype->wcoutdigits[ctype->outdigits_act] = last_wch;
1377             ++ctype->outdigits_act;
1378           }
1379       }
1380 }
1381
1382
1383 /* Ellipsis as in `/xea/x12.../xea/x34'.  */
1384 static void
1385 charclass_charcode_ellipsis (struct linereader *ldfile,
1386                              struct locale_ctype_t *ctype,
1387                              struct charmap_t *charmap,
1388                              struct repertoire_t *repertoire,
1389                              struct token *now, char *last_charcode,
1390                              uint32_t last_charcode_len,
1391                              unsigned long int class256_bit,
1392                              unsigned long int class_bit, int ignore_content,
1393                              int handle_digits)
1394 {
1395   /* First check whether the to-value is larger.  */
1396   if (now->val.charcode.nbytes != last_charcode_len)
1397     {
1398       lr_error (ldfile, _("\
1399 start end end character sequence of range must have the same length"));
1400       return;
1401     }
1402
1403   if (memcmp (last_charcode, now->val.charcode.bytes, last_charcode_len) > 0)
1404     {
1405       lr_error (ldfile, _("\
1406 to-value character sequence is smaller than from-value sequence"));
1407       return;
1408     }
1409
1410   if (!ignore_content)
1411     {
1412       do
1413         {
1414           /* Increment the byte sequence value.  */
1415           struct charseq *seq;
1416           uint32_t wch;
1417           int i;
1418
1419           for (i = last_charcode_len - 1; i >= 0; --i)
1420             if (++last_charcode[i] != 0)
1421               break;
1422
1423           if (last_charcode_len == 1)
1424             /* Of course we have the charcode value.  */
1425             ctype->class256_collection[(size_t) last_charcode[0]]
1426               |= class256_bit;
1427
1428           /* Find the symbolic name.  */
1429           seq = charmap_find_symbol (charmap, last_charcode,
1430                                      last_charcode_len);
1431           if (seq != NULL)
1432             {
1433               if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1434                 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1435                                                    strlen (seq->name));
1436               wch = seq->ucs4;
1437
1438               if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1439                 *find_idx (ctype, &ctype->class_collection,
1440                            &ctype->class_collection_max,
1441                            &ctype->class_collection_act, wch) |= class_bit;
1442             }
1443           else
1444             wch = ILLEGAL_CHAR_VALUE;
1445
1446           if (handle_digits == 1)
1447             {
1448               /* We must store the digit values.  */
1449               if (ctype->mbdigits_act == ctype->mbdigits_max)
1450                 {
1451                   ctype->mbdigits_max *= 2;
1452                   ctype->mbdigits = xrealloc (ctype->mbdigits,
1453                                               (ctype->mbdigits_max
1454                                                * sizeof (char *)));
1455                   ctype->wcdigits_max *= 2;
1456                   ctype->wcdigits = xrealloc (ctype->wcdigits,
1457                                               (ctype->wcdigits_max
1458                                                * sizeof (uint32_t)));
1459                 }
1460
1461               seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1462               memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1463               seq->nbytes = last_charcode_len;
1464
1465               ctype->mbdigits[ctype->mbdigits_act++] = seq;
1466               ctype->wcdigits[ctype->wcdigits_act++] = wch;
1467             }
1468           else if (handle_digits == 2)
1469             {
1470               struct charseq *seq;
1471               /* We must store the digit values.  */
1472               if (ctype->outdigits_act >= 10)
1473                 {
1474                   lr_error (ldfile, _("\
1475 %s: field `%s' does not contain exactly ten entries"),
1476                             "LC_CTYPE", "outdigit");
1477                   return;
1478                 }
1479
1480               seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1481               memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1482               seq->nbytes = last_charcode_len;
1483
1484               ctype->mboutdigits[ctype->outdigits_act] = seq;
1485               ctype->wcoutdigits[ctype->outdigits_act] = wch;
1486               ++ctype->outdigits_act;
1487             }
1488         }
1489       while (memcmp (last_charcode, now->val.charcode.bytes,
1490                      last_charcode_len) != 0);
1491     }
1492 }
1493
1494
1495 /* Read one transliteration entry.  */
1496 static uint32_t *
1497 read_widestring (struct linereader *ldfile, struct token *now,
1498                  struct charmap_t *charmap, struct repertoire_t *repertoire)
1499 {
1500   uint32_t *wstr;
1501
1502   if (now->tok == tok_default_missing)
1503     /* The special name "" will denote this case.  */
1504     wstr = (uint32_t *) L"";
1505   else if (now->tok == tok_bsymbol)
1506     {
1507       /* Get the value from the repertoire.  */
1508       wstr = xmalloc (2 * sizeof (uint32_t));
1509       wstr[0] = repertoire_find_value (repertoire, now->val.str.startmb,
1510                                        now->val.str.lenmb);
1511       if (wstr[0] == ILLEGAL_CHAR_VALUE)
1512         /* We cannot proceed, we don't know the UCS4 value.  */
1513         return NULL;
1514
1515       wstr[1] = 0;
1516     }
1517   else if (now->tok == tok_ucs4)
1518     {
1519       wstr = xmalloc (2 * sizeof (uint32_t));
1520       wstr[0] = now->val.ucs4;
1521       wstr[1] = 0;
1522     }
1523   else if (now->tok == tok_charcode)
1524     {
1525       /* Argh, we have to convert to the symbol name first and then to the
1526          UCS4 value.  */
1527       struct charseq *seq = charmap_find_symbol (charmap,
1528                                                  now->val.str.startmb,
1529                                                  now->val.str.lenmb);
1530       if (seq == NULL)
1531         /* Cannot find the UCS4 value.  */
1532         return NULL;
1533
1534       if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1535         seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1536                                            strlen (seq->name));
1537       if (seq->ucs4 == ILLEGAL_CHAR_VALUE)
1538         /* We cannot proceed, we don't know the UCS4 value.  */
1539         return NULL;
1540
1541       wstr = xmalloc (2 * sizeof (uint32_t));
1542       wstr[0] = seq->ucs4;
1543       wstr[1] = 0;
1544     }
1545   else if (now->tok == tok_string)
1546     {
1547       wstr = now->val.str.startwc;
1548       if (wstr[0] == 0)
1549         return NULL;
1550     }
1551   else
1552     {
1553       if (now->tok != tok_eol && now->tok != tok_eof)
1554         lr_ignore_rest (ldfile, 0);
1555       SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
1556       return (uint32_t *) -1l;
1557     }
1558
1559   return wstr;
1560 }
1561
1562
1563 static void
1564 read_translit_entry (struct linereader *ldfile, struct locale_ctype_t *ctype,
1565                      struct token *now, struct charmap_t *charmap,
1566                      struct repertoire_t *repertoire)
1567 {
1568   uint32_t *from_wstr = read_widestring (ldfile, now, charmap, repertoire);
1569   struct translit_t *result;
1570   struct translit_to_t **top;
1571   struct obstack *ob = &ctype->mem_pool;
1572   int first;
1573   int ignore;
1574
1575   if (from_wstr == NULL)
1576     /* There is no valid from string.  */
1577     return;
1578
1579   result = (struct translit_t *) obstack_alloc (ob,
1580                                                 sizeof (struct translit_t));
1581   result->from = from_wstr;
1582   result->next = NULL;
1583   result->to = NULL;
1584   top = &result->to;
1585   first = 1;
1586   ignore = 0;
1587
1588   while (1)
1589     {
1590       uint32_t *to_wstr;
1591
1592       /* Next we have one or more transliterations.  They are
1593          separated by semicolons.  */
1594       now = lr_token (ldfile, charmap, repertoire);
1595
1596       if (!first && (now->tok == tok_semicolon || now->tok == tok_eol))
1597         {
1598           /* One string read.  */
1599           const uint32_t zero = 0;
1600
1601           if (!ignore)
1602             {
1603               obstack_grow (ob, &zero, 4);
1604               to_wstr = obstack_finish (ob);
1605
1606               *top = obstack_alloc (ob, sizeof (struct translit_to_t));
1607               (*top)->str = to_wstr;
1608               (*top)->next = NULL;
1609             }
1610
1611           if (now->tok == tok_eol)
1612             {
1613               result->next = ctype->translit;
1614               ctype->translit = result;
1615               return;
1616             }
1617
1618           if (!ignore)
1619             top = &(*top)->next;
1620           ignore = 0;
1621         }
1622       else
1623         {
1624           to_wstr = read_widestring (ldfile, now, charmap, repertoire);
1625           if (to_wstr == (uint32_t *) -1l)
1626             {
1627               /* An error occurred.  */
1628               obstack_free (ob, result);
1629               return;
1630             }
1631
1632           if (to_wstr == NULL)
1633             ignore = 1;
1634           else
1635             /* This value is usable.  */
1636             obstack_grow (ob, to_wstr, wcslen ((wchar_t *) to_wstr) * 4);
1637
1638           first = 0;
1639         }
1640     }
1641 }
1642
1643
1644 /* The parser for the LC_CTYPE section of the locale definition.  */
1645 void
1646 ctype_read (struct linereader *ldfile, struct localedef_t *result,
1647             struct charmap_t *charmap, const char *repertoire_name,
1648             int ignore_content)
1649 {
1650   struct repertoire_t *repertoire = NULL;
1651   struct locale_ctype_t *ctype;
1652   struct token *now;
1653   enum token_t nowtok;
1654   size_t cnt;
1655   struct charseq *last_seq;
1656   uint32_t last_wch = 0;
1657   enum token_t last_token;
1658   enum token_t ellipsis_token;
1659   char last_charcode[16];
1660   size_t last_charcode_len = 0;
1661   const char *last_str = NULL;
1662   int mapidx;
1663
1664   /* Get the repertoire we have to use.  */
1665   if (repertoire_name != NULL)
1666     repertoire = repertoire_read (repertoire_name);
1667
1668   /* The rest of the line containing `LC_CTYPE' must be free.  */
1669   lr_ignore_rest (ldfile, 1);
1670
1671
1672   do
1673     {
1674       now = lr_token (ldfile, charmap, NULL);
1675       nowtok = now->tok;
1676     }
1677   while (nowtok == tok_eol);
1678
1679   /* If we see `copy' now we are almost done.  */
1680   if (nowtok == tok_copy)
1681     {
1682       handle_copy (ldfile, charmap, repertoire, result, tok_lc_ctype, LC_CTYPE,
1683                    "LC_CTYPE", ignore_content);
1684       return;
1685     }
1686
1687   /* Prepare the data structures.  */
1688   ctype_startup (ldfile, result, charmap, ignore_content);
1689   ctype = result->categories[LC_CTYPE].ctype;
1690
1691   /* Remember the repertoire we use.  */
1692   if (!ignore_content)
1693     ctype->repertoire = repertoire;
1694
1695   while (1)
1696     {
1697       unsigned long int class_bit = 0;
1698       unsigned long int class256_bit = 0;
1699       int handle_digits = 0;
1700
1701       /* Of course we don't proceed beyond the end of file.  */
1702       if (nowtok == tok_eof)
1703         break;
1704
1705       /* Ingore empty lines.  */
1706       if (nowtok == tok_eol)
1707         {
1708           now = lr_token (ldfile, charmap, NULL);
1709           nowtok = now->tok;
1710           continue;
1711         }
1712
1713       switch (nowtok)
1714         {
1715         case tok_charclass:
1716           now = lr_token (ldfile, charmap, NULL);
1717           while (now->tok == tok_ident || now->tok == tok_string)
1718             {
1719               ctype_class_new (ldfile, ctype, now->val.str.startmb);
1720               now = lr_token (ldfile, charmap, NULL);
1721               if (now->tok != tok_semicolon)
1722                 break;
1723               now = lr_token (ldfile, charmap, NULL);
1724             }
1725           if (now->tok != tok_eol)
1726             SYNTAX_ERROR (_("\
1727 %s: syntax error in definition of new character class"), "LC_CTYPE");
1728           break;
1729
1730         case tok_charconv:
1731           now = lr_token (ldfile, charmap, NULL);
1732           while (now->tok == tok_ident || now->tok == tok_string)
1733             {
1734               ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
1735               now = lr_token (ldfile, charmap, NULL);
1736               if (now->tok != tok_semicolon)
1737                 break;
1738               now = lr_token (ldfile, charmap, NULL);
1739             }
1740           if (now->tok != tok_eol)
1741             SYNTAX_ERROR (_("\
1742 %s: syntax error in definition of new character map"), "LC_CTYPE");
1743           break;
1744
1745         case tok_class:
1746           /* Ignore the rest of the line if we don't need the input of
1747              this line.  */
1748           if (ignore_content)
1749             {
1750               lr_ignore_rest (ldfile, 0);
1751               break;
1752             }
1753
1754           /* We simply forget the `class' keyword and use the following
1755              operand to determine the bit.  */
1756           now = lr_token (ldfile, charmap, NULL);
1757           if (now->tok == tok_ident || now->tok == tok_string)
1758             {
1759               /* Must be one of the predefined class names.  */
1760               for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
1761                 if (strcmp (ctype->classnames[cnt], now->val.str.startmb) == 0)
1762                   break;
1763               if (cnt >= ctype->nr_charclass)
1764                 {
1765                   if (now->val.str.lenmb == 8
1766                       && memcmp ("special1", now->val.str.startmb, 8) == 0)
1767                     class_bit = _ISwspecial1;
1768                   else if (now->val.str.lenmb == 8
1769                       && memcmp ("special2", now->val.str.startmb, 8) == 0)
1770                     class_bit = _ISwspecial2;
1771                   else if (now->val.str.lenmb == 8
1772                       && memcmp ("special3", now->val.str.startmb, 8) == 0)
1773                     class_bit = _ISwspecial3;
1774                   else
1775                     {
1776                       lr_error (ldfile, _("\
1777 unknown character class `%s' in category `LC_CTYPE'"),
1778                                 now->val.str.startmb);
1779                       free (now->val.str.startmb);
1780
1781                       lr_ignore_rest (ldfile, 0);
1782                       continue;
1783                     }
1784                 }
1785               else
1786                 class_bit = _ISwbit (cnt);
1787
1788               free (now->val.str.startmb);
1789             }
1790           else if (now->tok == tok_digit)
1791             goto handle_tok_digit;
1792           else if (now->tok < tok_upper || now->tok > tok_blank)
1793             goto err_label;
1794           else
1795             {
1796               class_bit = BITw (now->tok);
1797               class256_bit = BIT (now->tok);
1798             }
1799
1800           /* The next character must be a semicolon.  */
1801           now = lr_token (ldfile, charmap, NULL);
1802           if (now->tok != tok_semicolon)
1803             goto err_label;
1804           goto read_charclass;
1805
1806         case tok_upper:
1807         case tok_lower:
1808         case tok_alpha:
1809         case tok_alnum:
1810         case tok_space:
1811         case tok_cntrl:
1812         case tok_punct:
1813         case tok_graph:
1814         case tok_print:
1815         case tok_xdigit:
1816         case tok_blank:
1817           /* Ignore the rest of the line if we don't need the input of
1818              this line.  */
1819           if (ignore_content)
1820             {
1821               lr_ignore_rest (ldfile, 0);
1822               break;
1823             }
1824
1825           class_bit = BITw (now->tok);
1826           class256_bit = BIT (now->tok);
1827           handle_digits = 0;
1828         read_charclass:
1829           ctype->class_done |= class_bit;
1830           last_token = tok_none;
1831           ellipsis_token = tok_none;
1832           now = lr_token (ldfile, charmap, NULL);
1833           while (now->tok != tok_eol && now->tok != tok_eof)
1834             {
1835               uint32_t wch;
1836               struct charseq *seq;
1837
1838               if (now->tok != tok_bsymbol)
1839                 /* XXX Cannot be handled yet.  We will have support
1840                    for tok_ucs4 soon.  */
1841                 goto err_label;
1842
1843               if (ellipsis_token == tok_none)
1844                 {
1845                   if (get_character (now, charmap, repertoire, &seq, &wch))
1846                     goto err_label;
1847
1848                   if (!ignore_content && seq != NULL && seq->nbytes == 1)
1849                     /* Yep, we can store information about this byte
1850                        sequence.  */
1851                     ctype->class256_collection[seq->bytes[0]] |= class256_bit;
1852
1853                   if (!ignore_content && wch != ILLEGAL_CHAR_VALUE
1854                       && class_bit != 0)
1855                     /* We have the UCS4 position.  */
1856                     *find_idx (ctype, &ctype->class_collection,
1857                                &ctype->class_collection_max,
1858                                &ctype->class_collection_act, wch) |= class_bit;
1859
1860                   last_token = now->tok;
1861                   /* Terminate the string.  */
1862                   now->val.str.startmb[now->val.str.lenmb] = '\0';
1863                   last_str = now->val.str.startmb;
1864                   last_seq = seq;
1865                   last_wch = wch;
1866                   memcpy (last_charcode, now->val.charcode.bytes, 16);
1867                   last_charcode_len = now->val.charcode.nbytes;
1868
1869                   if (!ignore_content && handle_digits == 1)
1870                     {
1871                       /* We must store the digit values.  */
1872                       if (ctype->mbdigits_act == ctype->mbdigits_max)
1873                         {
1874                           ctype->mbdigits_max += 10;
1875                           ctype->mbdigits = xrealloc (ctype->mbdigits,
1876                                                       (ctype->mbdigits_max
1877                                                        * sizeof (char *)));
1878                           ctype->wcdigits_max += 10;
1879                           ctype->wcdigits = xrealloc (ctype->wcdigits,
1880                                                       (ctype->wcdigits_max
1881                                                        * sizeof (uint32_t)));
1882                         }
1883
1884                       ctype->mbdigits[ctype->mbdigits_act++] = seq;
1885                       ctype->wcdigits[ctype->wcdigits_act++] = wch;
1886                     }
1887                   else if (!ignore_content && handle_digits == 2)
1888                     {
1889                       /* We must store the digit values.  */
1890                       if (ctype->outdigits_act >= 10)
1891                         {
1892                           lr_error (ldfile, _("\
1893 %s: field `%s' does not contain exactly ten entries"),
1894                             "LC_CTYPE", "outdigit");
1895                           goto err_label;
1896                         }
1897
1898                       ctype->mboutdigits[ctype->outdigits_act] = seq;
1899                       ctype->wcoutdigits[ctype->outdigits_act] = wch;
1900                       ++ctype->outdigits_act;
1901                     }
1902                 }
1903               else
1904                 {
1905                   /* Now it gets complicated.  We have to resolve the
1906                      ellipsis problem.  First we must distinguish between
1907                      the different kind of ellipsis and this must match the
1908                      tokens we have seen.  */
1909                   assert (last_token != tok_none);
1910
1911                   if (last_token != now->tok)
1912                     {
1913                       lr_error (ldfile, _("\
1914 ellipsis range must be marked by two operands of same type"));
1915                       lr_ignore_rest (ldfile, 0);
1916                       break;
1917                     }
1918
1919                   if (last_token == tok_bsymbol)
1920                     {
1921                       if (ellipsis_token == tok_ellipsis3)
1922                         lr_error (ldfile, _("with symbolic name range values \
1923 the absolute ellipsis `...' must not be used"));
1924
1925                       charclass_symbolic_ellipsis (ldfile, ctype, charmap,
1926                                                    repertoire, now, last_str,
1927                                                    class256_bit, class_bit,
1928                                                    (ellipsis_token
1929                                                     == tok_ellipsis4
1930                                                     ? 10 : 16),
1931                                                    ignore_content,
1932                                                    handle_digits);
1933                     }
1934                   else if (last_token == tok_ucs4)
1935                     {
1936                       if (ellipsis_token != tok_ellipsis2)
1937                         lr_error (ldfile, _("\
1938 with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
1939
1940                       charclass_ucs4_ellipsis (ldfile, ctype, charmap,
1941                                                repertoire, now, last_wch,
1942                                                class256_bit, class_bit,
1943                                                ignore_content, handle_digits);
1944                     }
1945                   else
1946                     {
1947                       assert (last_token == tok_charcode);
1948
1949                       if (ellipsis_token != tok_ellipsis3)
1950                         lr_error (ldfile, _("\
1951 with character code range values one must use the absolute ellipsis `...'"));
1952
1953                       charclass_charcode_ellipsis (ldfile, ctype, charmap,
1954                                                    repertoire, now,
1955                                                    last_charcode,
1956                                                    last_charcode_len,
1957                                                    class256_bit, class_bit,
1958                                                    ignore_content,
1959                                                    handle_digits);
1960                     }
1961
1962                   /* Now we have used the last value.  */
1963                   last_token = tok_none;
1964                 }
1965
1966               /* Next we expect a semicolon or the end of the line.  */
1967               now = lr_token (ldfile, charmap, NULL);
1968               if (now->tok == tok_eol || now->tok == tok_eof)
1969                 break;
1970
1971               if (last_token != tok_none
1972                   && now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4)
1973                 {
1974                   ellipsis_token = now->tok;
1975                   now = lr_token (ldfile, charmap, NULL);
1976                   continue;
1977                 }
1978
1979               if (now->tok != tok_semicolon)
1980                 goto err_label;
1981
1982               /* And get the next character.  */
1983               now = lr_token (ldfile, charmap, NULL);
1984
1985               ellipsis_token = tok_none;
1986             }
1987           break;
1988
1989         case tok_digit:
1990           /* Ignore the rest of the line if we don't need the input of
1991              this line.  */
1992           if (ignore_content)
1993             {
1994               lr_ignore_rest (ldfile, 0);
1995               break;
1996             }
1997
1998         handle_tok_digit:
1999           class_bit = _ISwdigit;
2000           class256_bit = _ISdigit;
2001           handle_digits = 1;
2002           goto read_charclass;
2003
2004         case tok_outdigit:
2005           /* Ignore the rest of the line if we don't need the input of
2006              this line.  */
2007           if (ignore_content)
2008             {
2009               lr_ignore_rest (ldfile, 0);
2010               break;
2011             }
2012
2013           if (ctype->outdigits_act != 0)
2014             lr_error (ldfile, _("\
2015 %s: field `%s' declared more than once"),
2016                       "LC_CTYPE", "outdigit");
2017           class_bit = 0;
2018           class256_bit = 0;
2019           handle_digits = 2;
2020           goto read_charclass;
2021
2022         case tok_toupper:
2023           /* Ignore the rest of the line if we don't need the input of
2024              this line.  */
2025           if (ignore_content)
2026             {
2027               lr_ignore_rest (ldfile, 0);
2028               break;
2029             }
2030
2031           mapidx = 0;
2032           goto read_mapping;
2033
2034         case tok_tolower:
2035           /* Ignore the rest of the line if we don't need the input of
2036              this line.  */
2037           if (ignore_content)
2038             {
2039               lr_ignore_rest (ldfile, 0);
2040               break;
2041             }
2042
2043           mapidx = 1;
2044           goto read_mapping;
2045
2046         case tok_map:
2047           /* Ignore the rest of the line if we don't need the input of
2048              this line.  */
2049           if (ignore_content)
2050             {
2051               lr_ignore_rest (ldfile, 0);
2052               break;
2053             }
2054
2055           /* We simply forget the `map' keyword and use the following
2056              operand to determine the mapping.  */
2057           now = lr_token (ldfile, charmap, NULL);
2058           if (now->tok == tok_ident || now->tok == tok_string)
2059             {
2060               size_t cnt;
2061
2062               for (cnt = 2; cnt < ctype->map_collection_nr; ++cnt)
2063                 if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2064                   break;
2065
2066               if (cnt < ctype->map_collection_nr)
2067                 mapidx = cnt;
2068               else
2069                 {
2070                   lr_error (ldfile, _("unknown map `%s'"),
2071                             now->val.str.startmb);
2072                   lr_ignore_rest (ldfile, 0);
2073                   break;
2074                 }
2075             }
2076           else if (now->tok < tok_toupper || now->tok > tok_tolower)
2077             goto err_label;
2078           else
2079             mapidx = now->tok - tok_toupper;
2080
2081           now = lr_token (ldfile, charmap, NULL);
2082           /* This better should be a semicolon.  */
2083           if (now->tok != tok_semicolon)
2084             goto err_label;
2085
2086         read_mapping:
2087           /* Test whether this mapping was already defined.  */
2088           if (ctype->tomap_done[mapidx])
2089             {
2090               lr_error (ldfile, _("duplicated definition for mapping `%s'"),
2091                         ctype->mapnames[mapidx]);
2092               lr_ignore_rest (ldfile, 0);
2093               break;
2094             }
2095           ctype->tomap_done[mapidx] = 1;
2096
2097           now = lr_token (ldfile, charmap, NULL);
2098           while (now->tok != tok_eol && now->tok != tok_eof)
2099             {
2100               struct charseq *from_seq;
2101               uint32_t from_wch;
2102               struct charseq *to_seq;
2103               uint32_t to_wch;
2104
2105               /* Every pair starts with an opening brace.  */
2106               if (now->tok != tok_open_brace)
2107                 goto err_label;
2108
2109               /* Next comes the from-value.  */
2110               now = lr_token (ldfile, charmap, NULL);
2111               if (get_character (now, charmap, repertoire, &from_seq,
2112                                  &from_wch) != 0)
2113                 goto err_label;
2114
2115               /* The next is a comma.  */
2116               now = lr_token (ldfile, charmap, NULL);
2117               if (now->tok != tok_comma)
2118                 goto err_label;
2119
2120               /* And the other value.  */
2121               now = lr_token (ldfile, charmap, NULL);
2122               if (get_character (now, charmap, repertoire, &to_seq,
2123                                  &to_wch) != 0)
2124                 goto err_label;
2125
2126               /* And the last thing is the closing brace.  */
2127               now = lr_token (ldfile, charmap, NULL);
2128               if (now->tok != tok_close_brace)
2129                 goto err_label;
2130
2131               if (!ignore_content)
2132                 {
2133                   if (mapidx < 2 && from_seq != NULL && to_seq != NULL
2134                       && from_seq->nbytes == 1 && to_seq->nbytes == 1)
2135                     /* We can use this value.  */
2136                     ctype->map256_collection[mapidx][from_seq->bytes[0]]
2137                       = to_seq->bytes[0];
2138
2139                   if (from_wch != ILLEGAL_CHAR_VALUE
2140                       && to_wch != ILLEGAL_CHAR_VALUE)
2141                     /* Both correct values.  */
2142                     *find_idx (ctype, &ctype->map_collection[mapidx],
2143                                &ctype->map_collection_max[mapidx],
2144                                &ctype->map_collection_act[mapidx],
2145                                from_wch) = to_wch;
2146                 }
2147
2148               /* Now comes a semicolon or the end of the line/file.  */
2149               now = lr_token (ldfile, charmap, NULL);
2150               if (now->tok == tok_semicolon)
2151                 now = lr_token (ldfile, charmap, NULL);
2152             }
2153           break;
2154
2155         case tok_translit_start:
2156           /* Ignore the rest of the line if we don't need the input of
2157              this line.  */
2158           if (ignore_content)
2159             {
2160               lr_ignore_rest (ldfile, 0);
2161               break;
2162             }
2163
2164           /* The rest of the line better should be empty.  */
2165           lr_ignore_rest (ldfile, 1);
2166
2167           /* We count here the number of allocated entries in the `translit'
2168              array.  */
2169           cnt = 0;
2170
2171           /* We proceed until we see the `translit_end' token.  */
2172           while (now = lr_token (ldfile, charmap, repertoire),
2173                  now->tok != tok_translit_end && now->tok != tok_eof)
2174             {
2175               if (now->tok == tok_eol)
2176                 /* Ignore empty lines.  */
2177                 continue;
2178
2179               if (now->tok == tok_translit_end)
2180                 {
2181                   lr_ignore_rest (ldfile, 0);
2182                   break;
2183                 }
2184
2185               if (now->tok == tok_include)
2186                 {
2187                   /* We have to include locale.  */
2188                   const char *locale_name;
2189                   const char *repertoire_name;
2190
2191                   now = lr_token (ldfile, charmap, NULL);
2192                   /* This should be a string or an identifier.  In any
2193                      case something to name a locale.  */
2194                   if (now->tok != tok_string && now->tok != tok_ident)
2195                     {
2196                     translit_syntax:
2197                       lr_error (ldfile, _("%s: syntax error"), "LC_CTYPE");
2198                       lr_ignore_rest (ldfile, 0);
2199                       continue;
2200                     }
2201                   locale_name = now->val.str.startmb;
2202
2203                   /* Next should be a semicolon.  */
2204                   now = lr_token (ldfile, charmap, NULL);
2205                   if (now->tok != tok_semicolon)
2206                     goto translit_syntax;
2207
2208                   /* Now the repertoire name.  */
2209                   now = lr_token (ldfile, charmap, NULL);
2210                   if ((now->tok != tok_string && now->tok != tok_ident)
2211                       || now->val.str.startmb == NULL)
2212                     goto translit_syntax;
2213                   repertoire_name = now->val.str.startmb;
2214
2215                   /* We must not have more than one `include'.  */
2216                   if (ctype->translit_copy_locale != NULL)
2217                     {
2218                       lr_error (ldfile, _("\
2219 %s: only one `include' instruction allowed"), "LC_CTYPE");
2220                       lr_ignore_rest (ldfile, 0);
2221                       continue;
2222                     }
2223
2224                   ctype->translit_copy_locale = locale_name;
2225                   ctype->translit_copy_repertoire = repertoire_name;
2226
2227                   /* The rest of the line must be empty.  */
2228                   lr_ignore_rest (ldfile, 1);
2229                   continue;
2230                 }
2231
2232               read_translit_entry (ldfile, ctype, now, charmap, repertoire);
2233             }
2234           break;
2235
2236         case tok_ident:
2237           /* Ignore the rest of the line if we don't need the input of
2238              this line.  */
2239           if (ignore_content)
2240             {
2241               lr_ignore_rest (ldfile, 0);
2242               break;
2243             }
2244
2245           /* This could mean one of several things.  First test whether
2246              it's a character class name.  */
2247           for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2248             if (strcmp (now->val.str.startmb, ctype->classnames[cnt]) == 0)
2249               break;
2250           if (cnt < ctype->nr_charclass)
2251             {
2252               class_bit = _ISwbit (cnt);
2253               class256_bit = cnt <= 11 ? _ISbit (cnt) : 0;
2254               free (now->val.str.startmb);
2255               goto read_charclass;
2256             }
2257           for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
2258             if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2259               break;
2260           if (cnt < ctype->map_collection_nr)
2261             {
2262               mapidx = cnt;
2263               free (now->val.str.startmb);
2264               goto read_mapping;
2265             }
2266           if (strcmp (now->val.str.startmb, "special1") == 0)
2267             {
2268               class_bit = _ISwspecial1;
2269               free (now->val.str.startmb);
2270               goto read_charclass;
2271             }
2272           if (strcmp (now->val.str.startmb, "special2") == 0)
2273             {
2274               class_bit = _ISwspecial2;
2275               free (now->val.str.startmb);
2276               goto read_charclass;
2277             }
2278           if (strcmp (now->val.str.startmb, "special3") == 0)
2279             {
2280               class_bit = _ISwspecial3;
2281               free (now->val.str.startmb);
2282               goto read_charclass;
2283             }
2284           if (strcmp (now->val.str.startmb, "tosymmetric") == 0)
2285             {
2286               mapidx = 2;
2287               goto read_mapping;
2288             }
2289           break;
2290
2291         case tok_end:
2292           /* Next we assume `LC_CTYPE'.  */
2293           now = lr_token (ldfile, charmap, NULL);
2294           if (now->tok == tok_eof)
2295             break;
2296           if (now->tok == tok_eol)
2297             lr_error (ldfile, _("%s: incomplete `END' line"),
2298                       "LC_CTYPE");
2299           else if (now->tok != tok_lc_ctype)
2300             lr_error (ldfile, _("\
2301 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2302           lr_ignore_rest (ldfile, now->tok == tok_lc_ctype);
2303           return;
2304
2305         default:
2306         err_label:
2307           if (now->tok != tok_eof)
2308             SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2309         }
2310
2311       /* Prepare for the next round.  */
2312       now = lr_token (ldfile, charmap, NULL);
2313       nowtok = now->tok;
2314     }
2315
2316   /* When we come here we reached the end of the file.  */
2317   lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
2318 }
2319
2320
2321 static void
2322 set_class_defaults (struct locale_ctype_t *ctype, struct charmap_t *charmap,
2323                     struct repertoire_t *repertoire)
2324 {
2325   size_t cnt;
2326
2327   /* These function defines the default values for the classes and conversions
2328      according to POSIX.2 2.5.2.1.
2329      It may seem that the order of these if-blocks is arbitrary but it is NOT.
2330      Don't move them unless you know what you do!  */
2331
2332   void set_default (int bitpos, int from, int to)
2333     {
2334       char tmp[2];
2335       int ch;
2336       int bit = _ISbit (bitpos);
2337       int bitw = _ISwbit (bitpos);
2338       /* Define string.  */
2339       strcpy (tmp, "?");
2340
2341       for (ch = from; ch <= to; ++ch)
2342         {
2343           uint32_t value;
2344           struct charseq *seq;
2345           tmp[0] = ch;
2346
2347           value = repertoire_find_value (repertoire, tmp, 1);
2348           if (value == ILLEGAL_CHAR_VALUE)
2349             {
2350               if (!be_quiet)
2351                 error (0, 0, _("\
2352 %s: character `%s' not defined in repertoire while needed as default value"),
2353                        "LC_CTYPE", tmp);
2354             }
2355           else
2356             ELEM (ctype, class_collection, , value) |= bitw;
2357
2358           seq = charmap_find_value (charmap, tmp, 1);
2359           if (seq == NULL)
2360             {
2361               if (!be_quiet)
2362                 error (0, 0, _("\
2363 %s: character `%s' not defined in charmap while needed as default value"),
2364                        "LC_CTYPE", tmp);
2365             }
2366           else if (seq->nbytes != 1)
2367             error (0, 0, _("\
2368 %s: character `%s' in charmap not representable with one byte"),
2369                    "LC_CTYPE", tmp);
2370           else
2371             ctype->class256_collection[seq->bytes[0]] |= bit;
2372         }
2373     }
2374
2375   /* Set default values if keyword was not present.  */
2376   if ((ctype->class_done & BITw (tok_upper)) == 0)
2377     /* "If this keyword [lower] is not specified, the lowercase letters
2378         `A' through `Z', ..., shall automatically belong to this class,
2379         with implementation defined character values."  [P1003.2, 2.5.2.1]  */
2380     set_default (BITPOS (tok_upper), 'A', 'Z');
2381
2382   if ((ctype->class_done & BITw (tok_lower)) == 0)
2383     /* "If this keyword [lower] is not specified, the lowercase letters
2384         `a' through `z', ..., shall automatically belong to this class,
2385         with implementation defined character values."  [P1003.2, 2.5.2.1]  */
2386     set_default (BITPOS (tok_lower), 'a', 'z');
2387
2388   if ((ctype->class_done & BITw (tok_alpha)) == 0)
2389     {
2390       /* Table 2-6 in P1003.2 says that characters in class `upper' or
2391          class `lower' *must* be in class `alpha'.  */
2392       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
2393       unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower);
2394
2395       for (cnt = 0; cnt < 256; ++cnt)
2396         if ((ctype->class256_collection[cnt] & mask) != 0)
2397           ctype->class256_collection[cnt] |= BIT (tok_alpha);
2398
2399       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
2400         if ((ctype->class_collection[cnt] & maskw) != 0)
2401           ctype->class_collection[cnt] |= BITw (tok_alpha);
2402     }
2403
2404   if ((ctype->class_done & BITw (tok_digit)) == 0)
2405     /* "If this keyword [digit] is not specified, the digits `0' through
2406         `9', ..., shall automatically belong to this class, with
2407         implementation-defined character values."  [P1003.2, 2.5.2.1]  */
2408     set_default (BITPOS (tok_digit), '0', '9');
2409
2410   /* "Only characters specified for the `alpha' and `digit' keyword
2411      shall be specified.  Characters specified for the keyword `alpha'
2412      and `digit' are automatically included in this class.  */
2413   {
2414     unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
2415     unsigned long int maskw = BITw (tok_alpha) | BITw (tok_digit);
2416
2417     for (cnt = 0; cnt < 256; ++cnt)
2418       if ((ctype->class256_collection[cnt] & mask) != 0)
2419         ctype->class256_collection[cnt] |= BIT (tok_alnum);
2420
2421     for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
2422       if ((ctype->class_collection[cnt] & maskw) != 0)
2423         ctype->class_collection[cnt] |= BITw (tok_alnum);
2424   }
2425
2426   if ((ctype->class_done & BITw (tok_space)) == 0)
2427     /* "If this keyword [space] is not specified, the characters <space>,
2428         <form-feed>, <newline>, <carriage-return>, <tab>, and
2429         <vertical-tab>, ..., shall automatically belong to this class,
2430         with implementation-defined character values."  [P1003.2, 2.5.2.1]  */
2431     {
2432       uint32_t value;
2433       struct charseq *seq;
2434
2435       value = repertoire_find_value (repertoire, "space", 5);
2436       if (value == ILLEGAL_CHAR_VALUE)
2437         {
2438           if (!be_quiet)
2439             error (0, 0, _("\
2440 %s: character `%s' not defined while needed as default value"),
2441                    "LC_CTYPE", "<space>");
2442         }
2443       else
2444         ELEM (ctype, class_collection, , value) |= BIT (tok_space);
2445
2446       seq = charmap_find_value (charmap, "space", 5);
2447       if (seq == NULL)
2448         {
2449           if (!be_quiet)
2450             error (0, 0, _("\
2451 %s: character `%s' not defined while needed as default value"),
2452                    "LC_CTYPE", "<space>");
2453         }
2454       else if (seq->nbytes != 1)
2455         error (0, 0, _("\
2456 %s: character `%s' in charmap not representable with one byte"),
2457                "LC_CTYPE", "<space>");
2458       else
2459         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2460
2461
2462       value = repertoire_find_value (repertoire, "form-feed", 9);
2463       if (value == ILLEGAL_CHAR_VALUE)
2464         {
2465           if (!be_quiet)
2466             error (0, 0, _("\
2467 %s: character `%s' not defined while needed as default value"),
2468                    "LC_CTYPE", "<form-feed>");
2469         }
2470       else
2471         ELEM (ctype, class_collection, , value) |= BIT (tok_space);
2472
2473       seq = charmap_find_value (charmap, "form-feed", 9);
2474       if (seq == NULL)
2475         {
2476           if (!be_quiet)
2477             error (0, 0, _("\
2478 %s: character `%s' not defined while needed as default value"),
2479                    "LC_CTYPE", "<form-feed>");
2480         }
2481       else if (seq->nbytes != 1)
2482         error (0, 0, _("\
2483 %s: character `%s' in charmap not representable with one byte"),
2484                "LC_CTYPE", "<form-feed>");
2485       else
2486         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2487
2488
2489       value = repertoire_find_value (repertoire, "newline", 7);
2490       if (value == ILLEGAL_CHAR_VALUE)
2491         {
2492           if (!be_quiet)
2493             error (0, 0, _("\
2494 %s: character `%s' not defined while needed as default value"),
2495                    "LC_CTYPE", "<newline>");
2496         }
2497       else
2498         ELEM (ctype, class_collection, , value) |= BIT (tok_space);
2499
2500       seq = charmap_find_value (charmap, "newline", 7);
2501       if (seq == NULL)
2502         {
2503           if (!be_quiet)
2504             error (0, 0, _("\
2505 character `%s' not defined while needed as default value"),
2506                    "<newline>");
2507         }
2508       else if (seq->nbytes != 1)
2509         error (0, 0, _("\
2510 %s: character `%s' in charmap not representable with one byte"),
2511                "LC_CTYPE", "<newline>");
2512       else
2513         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2514
2515
2516       value = repertoire_find_value (repertoire, "carriage-return", 15);
2517       if (value == ILLEGAL_CHAR_VALUE)
2518         {
2519           if (!be_quiet)
2520             error (0, 0, _("\
2521 %s: character `%s' not defined while needed as default value"),
2522                    "LC_CTYPE", "<carriage-return>");
2523         }
2524       else
2525         ELEM (ctype, class_collection, , value) |= BIT (tok_space);
2526
2527       seq = charmap_find_value (charmap, "carriage-return", 15);
2528       if (seq == NULL)
2529         {
2530           if (!be_quiet)
2531             error (0, 0, _("\
2532 %s: character `%s' not defined while needed as default value"),
2533                    "LC_CTYPE", "<carriage-return>");
2534         }
2535       else if (seq->nbytes != 1)
2536         error (0, 0, _("\
2537 %s: character `%s' in charmap not representable with one byte"),
2538                "LC_CTYPE", "<carriage-return>");
2539       else
2540         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2541
2542
2543       value = repertoire_find_value (repertoire, "tab", 3);
2544       if (value == ILLEGAL_CHAR_VALUE)
2545         {
2546           if (!be_quiet)
2547             error (0, 0, _("\
2548 %s: character `%s' not defined while needed as default value"),
2549                    "LC_CTYPE", "<tab>");
2550         }
2551       else
2552         ELEM (ctype, class_collection, , value) |= BIT (tok_space);
2553
2554       seq = charmap_find_value (charmap, "tab", 3);
2555       if (seq == NULL)
2556         {
2557           if (!be_quiet)
2558             error (0, 0, _("\
2559 %s: character `%s' not defined while needed as default value"),
2560                    "LC_CTYPE", "<tab>");
2561         }
2562       else if (seq->nbytes != 1)
2563         error (0, 0, _("\
2564 %s: character `%s' in charmap not representable with one byte"),
2565                "LC_CTYPE", "<tab>");
2566       else
2567         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2568
2569
2570       value = repertoire_find_value (repertoire, "vertical-tab", 12);
2571       if (value == ILLEGAL_CHAR_VALUE)
2572         {
2573           if (!be_quiet)
2574             error (0, 0, _("\
2575 %s: character `%s' not defined while needed as default value"),
2576                    "LC_CTYPE", "<vertical-tab>");
2577         }
2578       else
2579         ELEM (ctype, class_collection, , value) |= BIT (tok_space);
2580
2581       seq = charmap_find_value (charmap, "vertical-tab", 12);
2582       if (seq == NULL)
2583         {
2584           if (!be_quiet)
2585             error (0, 0, _("\
2586 %s: character `%s' not defined while needed as default value"),
2587                    "LC_CTYPE", "<vertical-tab>");
2588         }
2589       else if (seq->nbytes != 1)
2590         error (0, 0, _("\
2591 %s: character `%s' in charmap not representable with one byte"),
2592                "LC_CTYPE", "<vertical-tab>");
2593       else
2594         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2595     }
2596
2597   if ((ctype->class_done & BITw (tok_xdigit)) == 0)
2598     /* "If this keyword is not specified, the digits `0' to `9', the
2599         uppercase letters `A' through `F', and the lowercase letters `a'
2600         through `f', ..., shell automatically belong to this class, with
2601         implementation defined character values."  [P1003.2, 2.5.2.1]  */
2602     {
2603       set_default (BITPOS (tok_xdigit), '0', '9');
2604       set_default (BITPOS (tok_xdigit), 'A', 'F');
2605       set_default (BITPOS (tok_xdigit), 'a', 'f');
2606     }
2607
2608   if ((ctype->class_done & BITw (tok_blank)) == 0)
2609     /* "If this keyword [blank] is unspecified, the characters <space> and
2610        <tab> shall belong to this character class."  [P1003.2, 2.5.2.1]  */
2611    {
2612       uint32_t value;
2613       struct charseq *seq;
2614
2615       value = repertoire_find_value (repertoire, "space", 5);
2616       if (value == ILLEGAL_CHAR_VALUE)
2617         {
2618           if (!be_quiet)
2619             error (0, 0, _("\
2620 %s: character `%s' not defined while needed as default value"),
2621                    "LC_CTYPE", "<space>");
2622         }
2623       else
2624         ELEM (ctype, class_collection, , value) |= BIT (tok_blank);
2625
2626       seq = charmap_find_value (charmap, "space", 5);
2627       if (seq == NULL)
2628         {
2629           if (!be_quiet)
2630             error (0, 0, _("\
2631 %s: character `%s' not defined while needed as default value"),
2632                    "LC_CTYPE", "<space>");
2633         }
2634       else if (seq->nbytes != 1)
2635         error (0, 0, _("\
2636 %s: character `%s' in charmap not representable with one byte"),
2637                "LC_CTYPE", "<space>");
2638       else
2639         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
2640
2641
2642       value = repertoire_find_value (repertoire, "tab", 3);
2643       if (value == ILLEGAL_CHAR_VALUE)
2644         {
2645           if (!be_quiet)
2646             error (0, 0, _("\
2647 %s: character `%s' not defined while needed as default value"),
2648                    "LC_CTYPE", "<tab>");
2649         }
2650       else
2651         ELEM (ctype, class_collection, , value) |= BIT (tok_blank);
2652
2653       seq = charmap_find_value (charmap, "tab", 3);
2654       if (seq == NULL)
2655         {
2656           if (!be_quiet)
2657             error (0, 0, _("\
2658 %s: character `%s' not defined while needed as default value"),
2659                    "LC_CTYPE", "<tab>");
2660         }
2661       else if (seq->nbytes != 1)
2662         error (0, 0, _("\
2663 %s: character `%s' in charmap not representable with one byte"),
2664                "LC_CTYPE", "<tab>");
2665       else
2666         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
2667     }
2668
2669   if ((ctype->class_done & BITw (tok_graph)) == 0)
2670     /* "If this keyword [graph] is not specified, characters specified for
2671         the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
2672         shall belong to this character class."  [P1003.2, 2.5.2.1]  */
2673     {
2674       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
2675         BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
2676       size_t cnt;
2677
2678       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
2679         if ((ctype->class_collection[cnt] & mask) != 0)
2680           ctype->class_collection[cnt] |= BIT (tok_graph);
2681
2682       for (cnt = 0; cnt < 256; ++cnt)
2683         if ((ctype->class256_collection[cnt] & mask) != 0)
2684           ctype->class256_collection[cnt] |= BIT (tok_graph);
2685     }
2686
2687   if ((ctype->class_done & BITw (tok_print)) == 0)
2688     /* "If this keyword [print] is not provided, characters specified for
2689         the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
2690         and the <space> character shall belong to this character class."
2691         [P1003.2, 2.5.2.1]  */
2692     {
2693       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
2694         BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
2695       size_t cnt;
2696       uint32_t space;
2697       struct charseq *seq;
2698
2699       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
2700         if ((ctype->class_collection[cnt] & mask) != 0)
2701           ctype->class_collection[cnt] |= BIT (tok_print);
2702
2703       for (cnt = 0; cnt < 256; ++cnt)
2704         if ((ctype->class256_collection[cnt] & mask) != 0)
2705           ctype->class256_collection[cnt] |= BIT (tok_print);
2706
2707
2708       space = repertoire_find_value (repertoire, "space", 5);
2709       if (space == ILLEGAL_CHAR_VALUE)
2710         {
2711           if (!be_quiet)
2712             error (0, 0, _("\
2713 %s: character `%s' not defined while needed as default value"),
2714                    "LC_CTYPE", "<space>");
2715         }
2716       else
2717         ELEM (ctype, class_collection, , space) |= BIT (tok_print);
2718
2719       seq = charmap_find_value (charmap, "space", 5);
2720       if (seq == NULL)
2721         {
2722           if (!be_quiet)
2723             error (0, 0, _("\
2724 %s: character `%s' not defined while needed as default value"),
2725                    "LC_CTYPE", "<space>");
2726         }
2727       else if (seq->nbytes != 1)
2728         error (0, 0, _("\
2729 %s: character `%s' in charmap not representable with one byte"),
2730                "LC_CTYPE", "<space>");
2731       else
2732         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_print);
2733     }
2734
2735   if (ctype->tomap_done[0] == 0)
2736     /* "If this keyword [toupper] is not specified, the lowercase letters
2737         `a' through `z', and their corresponding uppercase letters `A' to
2738         `Z', ..., shall automatically be included, with implementation-
2739         defined character values."  [P1003.2, 2.5.2.1]  */
2740     {
2741       char tmp[4];
2742       int ch;
2743
2744       strcpy (tmp, "<?>");
2745
2746       for (ch = 'a'; ch <= 'z'; ++ch)
2747         {
2748           uint32_t value_from, value_to;
2749           struct charseq *seq_from, *seq_to;
2750
2751           tmp[1] = (char) ch;
2752
2753           value_from = repertoire_find_value (repertoire, &tmp[1], 1);
2754           if (value_from == ILLEGAL_CHAR_VALUE)
2755             {
2756               if (!be_quiet)
2757                 error (0, 0, _("\
2758 %s: character `%s' not defined while needed as default value"),
2759                        "LC_CTYPE", tmp);
2760             }
2761           else
2762             {
2763               /* This conversion is implementation defined.  */
2764               tmp[1] = (char) (ch + ('A' - 'a'));
2765               value_to = repertoire_find_value (repertoire, &tmp[1], 1);
2766               if (value_to == ILLEGAL_CHAR_VALUE)
2767                 {
2768                   if (!be_quiet)
2769                     error (0, 0, _("\
2770 %s: character `%s' not defined while needed as default value"),
2771                            "LC_CTYPE", tmp);
2772                 }
2773               else
2774                 /* The index [0] is determined by the order of the
2775                    `ctype_map_newP' calls in `ctype_startup'.  */
2776                 ELEM (ctype, map_collection, [0], value_from) = value_to;
2777             }
2778
2779           seq_from = charmap_find_value (charmap, &tmp[1], 1);
2780           if (seq_from == NULL)
2781             {
2782               if (!be_quiet)
2783                 error (0, 0, _("\
2784 %s: character `%s' not defined while needed as default value"),
2785                        "LC_CTYPE", tmp);
2786             }
2787           else if (seq_from->nbytes != 1)
2788             {
2789               if (!be_quiet)
2790                 error (0, 0, _("\
2791 %s: character `%s' needed as default value not representable with one byte"),
2792                        "LC_CTYPE", tmp);
2793             }
2794           else
2795             {
2796               /* This conversion is implementation defined.  */
2797               tmp[1] = (char) (ch + ('A' - 'a'));
2798               seq_to = charmap_find_value (charmap, &tmp[1], 1);
2799               if (seq_to == NULL)
2800                 {
2801                   if (!be_quiet)
2802                     error (0, 0, _("\
2803 %s: character `%s' not defined while needed as default value"),
2804                            "LC_CTYPE", tmp);
2805                 }
2806               else if (seq_to->nbytes != 1)
2807                 {
2808                   if (!be_quiet)
2809                     error (0, 0, _("\
2810 %s: character `%s' needed as default value not representable with one byte"),
2811                            "LC_CTYPE", tmp);
2812                 }
2813               else
2814                 /* The index [0] is determined by the order of the
2815                    `ctype_map_newP' calls in `ctype_startup'.  */
2816                 ctype->map256_collection[0][seq_from->bytes[0]]
2817                   = seq_to->bytes[0];
2818             }
2819         }
2820     }
2821
2822   if (ctype->tomap_done[1] == 0)
2823     /* "If this keyword [tolower] is not specified, the mapping shall be
2824        the reverse mapping of the one specified to `toupper'."  [P1003.2]  */
2825     {
2826       for (cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
2827         if (ctype->map_collection[0][cnt] != 0)
2828           ELEM (ctype, map_collection, [1],
2829                 ctype->map_collection[0][cnt])
2830             = ctype->charnames[cnt];
2831
2832       for (cnt = 0; cnt < 256; ++cnt)
2833         if (ctype->map256_collection[0][cnt] != 0)
2834           ctype->map_collection[1][ctype->map_collection[0][cnt]]
2835             = ctype->charnames[cnt];
2836     }
2837
2838   if (ctype->outdigits_act == 0)
2839     {
2840       for (cnt = 0; cnt < 10; ++cnt)
2841         {
2842           ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
2843                                                          digits + cnt, 1);
2844
2845           if (ctype->mboutdigits[cnt] == NULL)
2846             {
2847               ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
2848                                                              longnames[cnt],
2849                                                              strlen (longnames[cnt]));
2850
2851               if (ctype->mboutdigits[cnt] == NULL)
2852                 {
2853                   /* Provide a replacement.  */
2854                   error (0, 0, _("\
2855 no output digits defined and none of the standard names in the charmap"));
2856
2857                   ctype->mboutdigits[cnt] = obstack_alloc (&charmap->mem_pool,
2858                                                            sizeof (struct charseq) + 1);
2859
2860                   /* This is better than nothing.  */
2861                   ctype->mboutdigits[cnt]->bytes[0] = digits[cnt];
2862                   ctype->mboutdigits[cnt]->nbytes = 1;
2863                 }
2864             }
2865
2866           ctype->wcoutdigits[cnt] = repertoire_find_value (repertoire,
2867                                                            digits + cnt, 1);
2868
2869           if (ctype->wcoutdigits[cnt] == ILLEGAL_CHAR_VALUE)
2870             {
2871               ctype->wcoutdigits[cnt] = repertoire_find_value (repertoire,
2872                                                                longnames[cnt],
2873                                                                strlen (longnames[cnt]));
2874
2875               if (ctype->wcoutdigits[cnt] == ILLEGAL_CHAR_VALUE)
2876                 {
2877                   /* Provide a replacement.  */
2878                   error (0, 0, _("\
2879 no output digits defined and none of the standard names in the repertoire"));
2880
2881                   /* This is better than nothing.  */
2882                   ctype->wcoutdigits[cnt] = (uint32_t) digits[cnt];
2883                 }
2884             }
2885         }
2886
2887       ctype->outdigits_act = 10;
2888     }
2889 }
2890
2891
2892 static void
2893 allocate_arrays (struct locale_ctype_t *ctype, struct charmap_t *charmap,
2894                  struct repertoire_t *repertoire)
2895 {
2896   size_t idx;
2897
2898   /* First we have to decide how we organize the arrays.  It is easy
2899      for a one-byte character set.  But multi-byte character set
2900      cannot be stored flat because the chars might be sparsely used.
2901      So we determine an optimal hashing function for the used
2902      characters.
2903
2904      We use a very trivial hashing function to store the sparse
2905      table.  CH % TABSIZE is used as an index.  To solve multiple hits
2906      we have N planes.  This guarantees a fixed search time for a
2907      character [N / 2].  In the following code we determine the minimum
2908      value for TABSIZE * N, where TABSIZE >= 256.  */
2909   size_t min_total = UINT_MAX;
2910   size_t act_size = 256;
2911
2912   if (!be_quiet)
2913     fputs (_("\
2914 Computing table size for character classes might take a while..."),
2915            stderr);
2916
2917   while (act_size < min_total)
2918     {
2919       size_t cnt[act_size];
2920       size_t act_planes = 1;
2921
2922       memset (cnt, '\0', sizeof cnt);
2923
2924       for (idx = 0; idx < 256; ++idx)
2925         cnt[idx] = 1;
2926
2927       for (idx = 0; idx < ctype->charnames_act; ++idx)
2928         if (ctype->charnames[idx] >= 256)
2929           {
2930             size_t nr = ctype->charnames[idx] % act_size;
2931
2932             if (++cnt[nr] > act_planes)
2933               {
2934                 act_planes = cnt[nr];
2935                 if (act_size * act_planes >= min_total)
2936                   break;
2937               }
2938           }
2939
2940       if (act_size * act_planes < min_total)
2941         {
2942           min_total = act_size * act_planes;
2943           ctype->plane_size = act_size;
2944           ctype->plane_cnt = act_planes;
2945         }
2946
2947       ++act_size;
2948     }
2949
2950   if (!be_quiet)
2951     fputs (_(" done\n"), stderr);
2952
2953
2954   ctype->names = (uint32_t *) xcalloc (ctype->plane_size
2955                                        * ctype->plane_cnt,
2956                                        sizeof (uint32_t));
2957
2958   for (idx = 1; idx < 256; ++idx)
2959     ctype->names[idx] = idx;
2960
2961   /* Trick: change the 0th entry's name to 1 to mark the cell occupied.  */
2962   ctype->names[0] = 1;
2963
2964   for (idx = 256; idx < ctype->charnames_act; ++idx)
2965     {
2966       size_t nr = (ctype->charnames[idx] % ctype->plane_size);
2967       size_t depth = 0;
2968
2969       while (ctype->names[nr + depth * ctype->plane_size])
2970         ++depth;
2971       assert (depth < ctype->plane_cnt);
2972
2973       ctype->names[nr + depth * ctype->plane_size] = ctype->charnames[idx];
2974
2975       /* Now for faster access remember the index in the NAMES_B array.  */
2976       ctype->charnames[idx] = nr + depth * ctype->plane_size;
2977     }
2978   ctype->names[0] = 0;
2979
2980
2981   /* You wonder about this amount of memory?  This is only because some
2982      users do not manage to address the array with unsigned values or
2983      data types with range >= 256.  '\200' would result in the array
2984      index -128.  To help these poor people we duplicate the entries for
2985      128 up to 255 below the entry for \0.  */
2986   ctype->ctype_b = (char_class_t *) xcalloc (256 + 128,
2987                                              sizeof (char_class_t));
2988   ctype->ctype32_b = (char_class32_t *) xcalloc (ctype->plane_size
2989                                                  * ctype->plane_cnt,
2990                                                  sizeof (char_class32_t));
2991
2992   /* This is the array accessed using the multibyte string elements.  */
2993   for (idx = 0; idx < 256; ++idx)
2994     ctype->ctype_b[128 + idx] = ctype->class256_collection[idx];
2995
2996   /* Mirror first 127 entries.  We must take care that entry -1 is not
2997      mirrored because EOF == -1.  */
2998   for (idx = 0; idx < 127; ++idx)
2999     ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
3000
3001   /* The 32 bit array contains all characters.  */
3002   for (idx = 0; idx < ctype->class_collection_act; ++idx)
3003     ctype->ctype32_b[ctype->charnames[idx]] = ctype->class_collection[idx];
3004
3005   /* Room for table of mappings.  */
3006   ctype->map = (uint32_t **) xmalloc (ctype->map_collection_nr
3007                                       * sizeof (uint32_t *));
3008
3009   /* Fill in all mappings.  */
3010   for (idx = 0; idx < ctype->map_collection_nr; ++idx)
3011     {
3012       unsigned int idx2;
3013
3014       /* Allocate table.  */
3015       ctype->map[idx] = (uint32_t *) xmalloc ((ctype->plane_size
3016                                                * ctype->plane_cnt + 128)
3017                                               * sizeof (uint32_t));
3018
3019       /* Copy default value (identity mapping).  */
3020       memcpy (&ctype->map[idx][128], ctype->names,
3021               ctype->plane_size * ctype->plane_cnt * sizeof (uint32_t));
3022
3023       /* Copy values from collection.  */
3024       for (idx2 = 0; idx2 < 256; ++idx2)
3025         ctype->map[idx][128 + idx2] = ctype->map256_collection[idx][idx2];
3026
3027       /* Mirror first 127 entries.  We must take care not to map entry
3028          -1 because EOF == -1.  */
3029       for (idx2 = 0; idx2 < 127; ++idx2)
3030         ctype->map[idx][idx2] = ctype->map[idx][256 + idx2];
3031
3032       /* EOF must map to EOF.  */
3033       ctype->map[idx][127] = EOF;
3034
3035       /* The 32 bit map collection.  */
3036       for (idx2 = 0; idx2 < ctype->map_collection_act[idx]; ++idx2)
3037         if (ctype->map_collection[idx][idx2] != 0)
3038           ctype->map[idx][128 + ctype->charnames[idx2]]
3039             = ctype->map_collection[idx][idx2];
3040     }
3041
3042   /* Extra array for class and map names.  */
3043   ctype->class_name_ptr = (uint32_t *) xmalloc (ctype->nr_charclass
3044                                                 * sizeof (uint32_t));
3045   ctype->map_name_ptr = (uint32_t *) xmalloc (ctype->map_collection_nr
3046                                               * sizeof (uint32_t));
3047
3048   /* Array for width information.  Because the expected width are very
3049      small we use only one single byte.  This save space and we need
3050      not provide the information twice with both endianesses.  */
3051   ctype->width = (unsigned char *) xmalloc (ctype->plane_size
3052                                             * ctype->plane_cnt);
3053   /* Initialize with default width value.  */
3054   memset (ctype->width, charmap->width_default,
3055           ctype->plane_size * ctype->plane_cnt);
3056   if (charmap->width_rules != NULL)
3057     {
3058       size_t cnt;
3059
3060       for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
3061         {
3062           unsigned char bytes[charmap->mb_cur_max];
3063           int nbytes = charmap->width_rules[cnt].from->nbytes;
3064
3065           /* We have the range of character for which the width is
3066              specified described using byte sequences of the multibyte
3067              charset.  We have to convert this to UCS4 now.  And we
3068              cannot simply convert the beginning and the end of the
3069              sequence, we have to iterate over the byte sequence and
3070              convert it for every single character.  */
3071           memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
3072
3073           while (nbytes < charmap->width_rules[cnt].to->nbytes
3074                  || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
3075                             nbytes) <= 0)
3076             {
3077               /* Find the UCS value for `bytes'.  */
3078               uint32_t wch = repertoire_find_value (ctype->repertoire, bytes,
3079                                                     nbytes);
3080               int inner;
3081
3082               if (wch != ILLEGAL_CHAR_VALUE)
3083                 {
3084                   /* Store the value.  */
3085                   size_t nr = idx % ctype->plane_size;
3086                   size_t depth = 0;
3087
3088                   while (ctype->names[nr + depth * ctype->plane_size] != nr)
3089                     ++depth;
3090                   assert (depth < ctype->plane_cnt);
3091
3092                   ctype->width[nr + depth * ctype->plane_size]
3093                     = charmap->width_rules[cnt].width;
3094                 }
3095
3096               /* "Increment" the bytes sequence.  */
3097               inner = nbytes - 1;
3098               while (inner >= 0 && bytes[inner] == 0xff)
3099                 --inner;
3100
3101               if (inner < 0)
3102                 {
3103                   /* We have to extend the byte sequence.  */
3104                   if (nbytes >= charmap->width_rules[cnt].to->nbytes)
3105                     break;
3106
3107                   bytes[0] = 1;
3108                   memset (&bytes[1], 0, nbytes);
3109                   ++nbytes;
3110                 }
3111               else
3112                 {
3113                   ++bytes[inner];
3114                   while (++inner < nbytes)
3115                     bytes[inner] = 0;
3116                 }
3117             }
3118         }
3119     }
3120
3121   /* Set MB_CUR_MAX.  */
3122   ctype->mb_cur_max = charmap->mb_cur_max;
3123
3124   /* We need the name of the currently used 8-bit character set to
3125      make correct conversion between this 8-bit representation and the
3126      ISO 10646 character set used internally for wide characters.  */
3127   ctype->codeset_name = charmap->code_set_name;
3128
3129   /* Now determine the table for the transliteration information.
3130
3131      XXX It is not yet clear to me whether it is worth implementing a
3132      complicated algorithm which uses a hash table to locate the entries.
3133      For now I'll use a simple array which can be searching using binary
3134      search.  */
3135   if (ctype->translit_copy_locale != NULL)
3136     {
3137       /* Fold in the transliteration information from the locale mentioned
3138          in the `include' statement.  */
3139       struct locale_ctype_t *here = ctype;
3140
3141       do
3142         {
3143           struct localedef_t *other = find_locale (LC_CTYPE,
3144                                                    here->translit_copy_locale,
3145                                                    repertoire->name, charmap);
3146
3147           if (other == NULL)
3148             {
3149               error (0, 0, _("\
3150 %s: transliteration data from locale `%s' not available"),
3151                      "LC_CTYPE", here->translit_copy_locale);
3152               break;
3153             }
3154
3155           here = other->categories[LC_CTYPE].ctype;
3156
3157           /* Enqueue the information if necessary.  */
3158           if (here->translit != NULL)
3159             {
3160               struct translit_t *endp = here->translit;
3161               while (endp->next != NULL)
3162                 endp = endp->next;
3163
3164               endp->next = ctype->translit;
3165               ctype->translit = here->translit;
3166             }
3167         }
3168       while (here->translit_copy_locale != NULL);
3169     }
3170
3171   if (ctype->translit != NULL)
3172     {
3173       /* First count how many entries we have.  This is the upper limit
3174          since some entries from the included files might be overwritten.  */
3175       size_t number = 0;
3176       size_t cnt;
3177       struct translit_t *runp = ctype->translit;
3178       struct translit_t **sorted;
3179       size_t from_len, to_len;
3180
3181       while (runp != NULL)
3182         {
3183           ++number;
3184           runp = runp->next;
3185         }
3186
3187       /* Next we allocate an array large enough and fill in the values.  */
3188       sorted = (struct translit_t **) alloca (number
3189                                               * sizeof (struct translit_t **));
3190       runp = ctype->translit;
3191       number = 0;
3192       do
3193         {
3194           /* Search for the place where to insert this string.
3195              XXX Better use a real sorting algorithm later.  */
3196           size_t idx = 0;
3197           int replace = 0;
3198
3199           while (idx < number)
3200             {
3201               int res = wcscmp ((const wchar_t *) sorted[idx]->from,
3202                                 (const wchar_t *) runp->from);
3203               if (res == 0)
3204                 {
3205                   replace = 1;
3206                   break;
3207                 }
3208               if (res > 0)
3209                 break;
3210               ++idx;
3211             }
3212
3213           if (replace)
3214             sorted[idx] = runp;
3215           else
3216             {
3217               memmove (&sorted[idx + 1], &sorted[idx],
3218                        (number - idx) * sizeof (struct translit_t *));
3219               sorted[idx] = runp;
3220               ++number;
3221             }
3222
3223           runp = runp->next;
3224         }
3225       while (runp != NULL);
3226
3227       /* The next step is putting all the possible transliteration
3228          strings in one memory block so that we can write it out.
3229          We need several different blocks:
3230          - index to the tfromstring array
3231          - from-string array
3232          - index to the to-string array
3233          - to-string array.
3234          And this all must be available for both endianes variants.
3235       */
3236       from_len = to_len = 0;
3237       for (cnt = 0; cnt < number; ++cnt)
3238         {
3239           struct translit_to_t *srunp;
3240           from_len += wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
3241           srunp = sorted[cnt]->to;
3242           while (srunp != NULL)
3243             {
3244               to_len += wcslen ((const wchar_t *) srunp->str) + 1;
3245               srunp = srunp->next;
3246             }
3247           /* Plus one for the extra NUL character marking the end of
3248              the list for the current entry.  */
3249           ++to_len;
3250         }
3251
3252       /* We can allocate the arrays for the results.  */
3253       ctype->translit_from_idx = xmalloc (number * sizeof (uint32_t));
3254       ctype->translit_from_tbl = xmalloc (from_len * sizeof (uint32_t));
3255       ctype->translit_to_idx = xmalloc (number * sizeof (uint32_t));
3256       ctype->translit_to_tbl = xmalloc (to_len * sizeof (uint32_t));
3257
3258       from_len = 0;
3259       to_len = 0;
3260       for (cnt = 0; cnt < number; ++cnt)
3261         {
3262           size_t len;
3263           struct translit_to_t *srunp;
3264
3265           ctype->translit_from_idx[cnt] = from_len;
3266           ctype->translit_to_idx[cnt] = to_len;
3267
3268           len = wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
3269           wmemcpy ((wchar_t *) &ctype->translit_from_tbl[from_len],
3270                    (const wchar_t *) sorted[cnt]->from, len);
3271           from_len += len;
3272
3273           ctype->translit_to_idx[cnt] = to_len;
3274           srunp = sorted[cnt]->to;
3275           while (srunp != NULL)
3276             {
3277               len = wcslen ((const wchar_t *) srunp->str) + 1;
3278               wmemcpy ((wchar_t *) &ctype->translit_to_tbl[to_len],
3279                        (const wchar_t *) srunp->str, len);
3280               to_len += len;
3281               srunp = srunp->next;
3282             }
3283           ctype->translit_to_tbl[to_len++] = L'\0';
3284         }
3285
3286       /* Store the information about the length.  */
3287       ctype->translit_idx_size = number * sizeof (uint32_t);
3288       ctype->translit_from_tbl_size = from_len * sizeof (uint32_t);
3289       ctype->translit_to_tbl_size = to_len * sizeof (uint32_t);
3290     }
3291   else
3292     {
3293       /* Provide some dummy pointers since we have nothing to write out.  */
3294       static uint32_t no_str = { 0 };
3295
3296       ctype->translit_from_idx = &no_str;
3297       ctype->translit_from_tbl = &no_str;
3298       ctype->translit_to_tbl = &no_str;
3299       ctype->translit_idx_size = 0;
3300       ctype->translit_from_tbl_size = 0;
3301       ctype->translit_to_tbl_size = 0;
3302     }
3303 }