locale/programs/ld-ctype.c

   1 /* Copyright (C) 1995, 1996, 1997, 1998, 1999 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3    Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Library General Public License as
   7    published by the Free Software Foundation; either version 2 of the
   8    License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Library General Public License for more details.
  14
  15    You should have received a copy of the GNU Library General Public
  16    License along with the GNU C Library; see the file COPYING.LIB.  If not,
  17    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  18    Boston, MA 02111-1307, USA.  */
  19
  20 #ifdef HAVE_CONFIG_H
  21 # include <config.h>
  22 #endif
  23
  24 #include <alloca.h>
  25 #include <byteswap.h>
  26 #include <endian.h>
  27 #include <errno.h>
  28 #include <limits.h>
  29 #include <obstack.h>
  30 #include <stdlib.h>
  31 #include <string.h>
  32 #include <wchar.h>
  33 #include <wctype.h>
  34 #include <sys/uio.h>
  35
  36 #include "charmap.h"
  37 #include "localeinfo.h"
  38 #include "langinfo.h"
  39 #include "linereader.h"
  40 #include "locfile-token.h"
  41 #include "locfile.h"
  42 #include "localedef.h"
  43
  44 #include <assert.h>
  45
  46
  47 #ifdef PREDEFINED_CLASSES
  48 /* These are the extra bits not in wctype.h since these are not preallocated
  49    classes.  */
  50 # define _ISwspecial1   (1 << 29)
  51 # define _ISwspecial2   (1 << 30)
  52 # define _ISwspecial3   (1 << 31)
  53 #endif
  54
  55
  56 /* The bit used for representing a special class.  */
  57 #define BITPOS(class) ((class) - tok_upper)
  58 #define BIT(class) (_ISbit (BITPOS (class)))
  59 #define BITw(class) (_ISwbit (BITPOS (class)))
  60
  61 #define ELEM(ctype, collection, idx, value)                                   \
  62   *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx,     \
  63              &ctype->collection##_act idx, value)
  64
  65
  66 /* To be compatible with former implementations we for now restrict
  67    the number of bits for character classes to 16.  When compatibility
  68    is not necessary anymore increase the number to 32.  */
  69 #define char_class_t uint16_t
  70 #define char_class32_t uint32_t
  71
  72
  73 /* Type to describe a transliteration action.  We have a possibly
  74    multiple character from-string and a set of multiple character
  75    to-strings.  All are 32bit values since this is what is used in
  76    the gconv functions.  */
  77 struct translit_to_t
  78 {
  79   uint32_t *str;
  80
  81   struct translit_to_t *next;
  82 };
  83
  84 struct translit_t
  85 {
  86   uint32_t *from;
  87
  88   struct translit_to_t *to;
  89
  90   struct translit_t *next;
  91 };
  92
  93
  94 /* The real definition of the struct for the LC_CTYPE locale.  */
  95 struct locale_ctype_t
  96 {
  97   uint32_t *charnames;
  98   size_t charnames_max;
  99   size_t charnames_act;
 100
 101   struct repertoire_t *repertoire;
 102
 103   /* We will allow up to 8 * sizeof (uint32_t) character classes.  */
 104 #define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
 105   size_t nr_charclass;
 106   const char *classnames[MAX_NR_CHARCLASS];
 107   uint32_t last_class_char;
 108   uint32_t class256_collection[256];
 109   uint32_t *class_collection;
 110   size_t class_collection_max;
 111   size_t class_collection_act;
 112   uint32_t class_done;
 113
 114   struct charseq **mbdigits;
 115   size_t mbdigits_act;
 116   size_t mbdigits_max;
 117   uint32_t *wcdigits;
 118   size_t wcdigits_act;
 119   size_t wcdigits_max;
 120
 121   struct charseq *mboutdigits[10];
 122   uint32_t wcoutdigits[10];
 123   size_t outdigits_act;
 124
 125   /* If the following number ever turns out to be too small simply
 126      increase it.  But I doubt it will.  --drepper@gnu */
 127 #define MAX_NR_CHARMAP 16
 128   const char *mapnames[MAX_NR_CHARMAP];
 129   uint32_t *map_collection[MAX_NR_CHARMAP];
 130   uint32_t map256_collection[2][256];
 131   size_t map_collection_max[MAX_NR_CHARMAP];
 132   size_t map_collection_act[MAX_NR_CHARMAP];
 133   size_t map_collection_nr;
 134   size_t last_map_idx;
 135   int tomap_done[MAX_NR_CHARMAP];
 136
 137   /* Transliteration information.  */
 138   const char *translit_copy_locale;
 139   const char *translit_copy_repertoire;
 140   struct translit_t *translit;
 141
 142   /* The arrays for the binary representation.  */
 143   uint32_t plane_size;
 144   uint32_t plane_cnt;
 145   char_class_t *ctype_b;
 146   char_class32_t *ctype32_b;
 147   uint32_t *names;
 148   uint32_t **map;
 149   uint32_t *class_name_ptr;
 150   uint32_t *map_name_ptr;
 151   unsigned char *width;
 152   uint32_t mb_cur_max;
 153   const char *codeset_name;
 154   uint32_t translit_hash_size;
 155   uint32_t translit_hash_layers;
 156   uint32_t *translit_from_idx;
 157   uint32_t *translit_from_tbl;
 158   uint32_t *translit_to_idx;
 159   uint32_t *translit_to_tbl;
 160   size_t translit_idx_size;
 161   size_t translit_from_tbl_size;
 162   size_t translit_to_tbl_size;
 163
 164   struct obstack mem_pool;
 165 };
 166
 167
 168 #define obstack_chunk_alloc xmalloc
 169 #define obstack_chunk_free free
 170
 171
 172 /* Prototypes for local functions.  */
 173 static void ctype_startup (struct linereader *lr, struct localedef_t *locale,
 174                            struct charmap_t *charmap, int ignore_content);
 175 static void ctype_class_new (struct linereader *lr,
 176                              struct locale_ctype_t *ctype, const char *name);
 177 static void ctype_map_new (struct linereader *lr,
 178                            struct locale_ctype_t *ctype,
 179                            const char *name, struct charmap_t *charmap);
 180 static uint32_t *find_idx (struct locale_ctype_t *ctype, uint32_t **table,
 181                            size_t *max, size_t *act, unsigned int idx);
 182 static void set_class_defaults (struct locale_ctype_t *ctype,
 183                                 struct charmap_t *charmap,
 184                                 struct repertoire_t *repertoire);
 185 static void allocate_arrays (struct locale_ctype_t *ctype,
 186                              struct charmap_t *charmap,
 187                              struct repertoire_t *repertoire);
 188
 189
 190 static const char *longnames[] =
 191 {
 192   "zero", "one", "two", "three", "four",
 193   "five", "six", "seven", "eight", "nine"
 194 };
 195 static const unsigned char digits[] = "0123456789";
 196
 197
 198 static void
 199 ctype_startup (struct linereader *lr, struct localedef_t *locale,
 200                struct charmap_t *charmap, int ignore_content)
 201 {
 202   unsigned int cnt;
 203   struct locale_ctype_t *ctype;
 204
 205   if (!ignore_content)
 206     {
 207       /* Allocate the needed room.  */
 208       locale->categories[LC_CTYPE].ctype = ctype =
 209         (struct locale_ctype_t *) xcalloc (1, sizeof (struct locale_ctype_t));
 210
 211       /* We have seen no names yet.  */
 212       ctype->charnames_max = charmap->mb_cur_max == 1 ? 256 : 512;
 213       ctype->charnames =
 214         (unsigned int *) xmalloc (ctype->charnames_max
 215                                   * sizeof (unsigned int));
 216       for (cnt = 0; cnt < 256; ++cnt)
 217         ctype->charnames[cnt] = cnt;
 218       ctype->charnames_act = 256;
 219
 220       /* Fill character class information.  */
 221       ctype->last_class_char = ILLEGAL_CHAR_VALUE;
 222       /* The order of the following instructions determines the bit
 223          positions!  */
 224       ctype_class_new (lr, ctype, "upper");
 225       ctype_class_new (lr, ctype, "lower");
 226       ctype_class_new (lr, ctype, "alpha");
 227       ctype_class_new (lr, ctype, "digit");
 228       ctype_class_new (lr, ctype, "xdigit");
 229       ctype_class_new (lr, ctype, "space");
 230       ctype_class_new (lr, ctype, "print");
 231       ctype_class_new (lr, ctype, "graph");
 232       ctype_class_new (lr, ctype, "blank");
 233       ctype_class_new (lr, ctype, "cntrl");
 234       ctype_class_new (lr, ctype, "punct");
 235       ctype_class_new (lr, ctype, "alnum");
 236 #ifdef PREDEFINED_CLASSES
 237       /* The following are extensions from ISO 14652.  */
 238       ctype_class_new (lr, ctype, "left_to_right");
 239       ctype_class_new (lr, ctype, "right_to_left");
 240       ctype_class_new (lr, ctype, "num_terminator");
 241       ctype_class_new (lr, ctype, "num_separator");
 242       ctype_class_new (lr, ctype, "segment_separator");
 243       ctype_class_new (lr, ctype, "block_separator");
 244       ctype_class_new (lr, ctype, "direction_control");
 245       ctype_class_new (lr, ctype, "sym_swap_layout");
 246       ctype_class_new (lr, ctype, "char_shape_selector");
 247       ctype_class_new (lr, ctype, "num_shape_selector");
 248       ctype_class_new (lr, ctype, "non_spacing");
 249       ctype_class_new (lr, ctype, "non_spacing_level3");
 250       ctype_class_new (lr, ctype, "normal_connect");
 251       ctype_class_new (lr, ctype, "r_connect");
 252       ctype_class_new (lr, ctype, "no_connect");
 253       ctype_class_new (lr, ctype, "no_connect-space");
 254       ctype_class_new (lr, ctype, "vowel_connect");
 255 #endif
 256
 257       ctype->class_collection_max = charmap->mb_cur_max == 1 ? 256 : 512;
 258       ctype->class_collection
 259         = (uint32_t *) xcalloc (sizeof (unsigned long int),
 260                                 ctype->class_collection_max);
 261       ctype->class_collection_act = 256;
 262
 263       /* Fill character map information.  */
 264       ctype->map_collection_nr = 0;
 265       ctype->last_map_idx = MAX_NR_CHARMAP;
 266       ctype_map_new (lr, ctype, "toupper", charmap);
 267       ctype_map_new (lr, ctype, "tolower", charmap);
 268 #ifdef PREDEFINED_CLASSES
 269       ctype_map_new (lr, ctype, "tosymmetric", charmap);
 270 #endif
 271
 272       /* Fill first 256 entries in `toXXX' arrays.  */
 273       for (cnt = 0; cnt < 256; ++cnt)
 274         {
 275           ctype->map_collection[0][cnt] = cnt;
 276           ctype->map_collection[1][cnt] = cnt;
 277 #ifdef PREDEFINED_CLASSES
 278           ctype->map_collection[2][cnt] = cnt;
 279 #endif
 280           ctype->map256_collection[0][cnt] = cnt;
 281           ctype->map256_collection[1][cnt] = cnt;
 282         }
 283
 284       obstack_init (&ctype->mem_pool);
 285     }
 286 }
 287
 288
 289 void
 290 ctype_finish (struct localedef_t *locale, struct charmap_t *charmap)
 291 {
 292   /* See POSIX.2, table 2-6 for the meaning of the following table.  */
 293 #define NCLASS 12
 294   static const struct
 295   {
 296     const char *name;
 297     const char allow[NCLASS];
 298   }
 299   valid_table[NCLASS] =
 300   {
 301     /* The order is important.  See token.h for more information.
 302        M = Always, D = Default, - = Permitted, X = Mutually exclusive  */
 303     { "upper",  "--MX-XDDXXX-" },
 304     { "lower",  "--MX-XDDXXX-" },
 305     { "alpha",  "---X-XDDXXX-" },
 306     { "digit",  "XXX--XDDXXX-" },
 307     { "xdigit", "-----XDDXXX-" },
 308     { "space",  "XXXXX------X" },
 309     { "print",  "---------X--" },
 310     { "graph",  "---------X--" },
 311     { "blank",  "XXXXXM-----X" },
 312     { "cntrl",  "XXXXX-XX--XX" },
 313     { "punct",  "XXXXX-DD-X-X" },
 314     { "alnum",  "-----XDDXXX-" }
 315   };
 316   size_t cnt;
 317   int cls1, cls2;
 318   uint32_t space_value;
 319   struct charseq *space_seq;
 320   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 321   int warned;
 322
 323   /* Now resolve copying and also handle completely missing definitions.  */
 324   if (ctype == NULL)
 325     {
 326       /* First see whether we were supposed to copy.  If yes, find the
 327          actual definition.  */
 328       if (locale->copy_name[LC_CTYPE] != NULL)
 329         {
 330           /* Find the copying locale.  This has to happen transitively since
 331              the locale we are copying from might also copying another one.  */
 332           struct localedef_t *from = locale;
 333
 334           do
 335             from = find_locale (LC_CTYPE, from->copy_name[LC_CTYPE],
 336                                 from->repertoire_name, charmap);
 337           while (from->categories[LC_CTYPE].ctype == NULL
 338                  && from->copy_name[LC_CTYPE] != NULL);
 339
 340           ctype = locale->categories[LC_CTYPE].ctype
 341             = from->categories[LC_CTYPE].ctype;
 342         }
 343
 344       /* If there is still no definition issue an warning and create an
 345          empty one.  */
 346       if (ctype == NULL)
 347         {
 348           error (0, 0, _("No definition for %s category found"), "LC_CTYPE");
 349           ctype_startup (NULL, locale, charmap, 0);
 350           ctype = locale->categories[LC_CTYPE].ctype;
 351         }
 352     }
 353
 354   /* Set default value for classes not specified.  */
 355   set_class_defaults (ctype, charmap, ctype->repertoire);
 356
 357   /* Check according to table.  */
 358   for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
 359     {
 360       uint32_t tmp = ctype->class_collection[cnt];
 361
 362       if (tmp != 0)
 363         {
 364           for (cls1 = 0; cls1 < NCLASS; ++cls1)
 365             if ((tmp & _ISwbit (cls1)) != 0)
 366               for (cls2 = 0; cls2 < NCLASS; ++cls2)
 367                 if (valid_table[cls1].allow[cls2] != '-')
 368                   {
 369                     int eq = (tmp & _ISwbit (cls2)) != 0;
 370                     switch (valid_table[cls1].allow[cls2])
 371                       {
 372                       case 'M':
 373                         if (!eq)
 374                           {
 375                             uint32_t value = ctype->charnames[cnt];
 376
 377                             if (!be_quiet)
 378                               error (0, 0, _("\
 379 character L'\\u%0*x' in class `%s' must be in class `%s'"),
 380                                      value > 0xffff ? 8 : 4, value,
 381                                      valid_table[cls1].name,
 382                                      valid_table[cls2].name);
 383                           }
 384                         break;
 385
 386                       case 'X':
 387                         if (eq)
 388                           {
 389                             uint32_t value = ctype->charnames[cnt];
 390
 391                             if (!be_quiet)
 392                               error (0, 0, _("\
 393 character L'\\u%0*x' in class `%s' must not be in class `%s'"),
 394                                      value > 0xffff ? 8 : 4, value,
 395                                      valid_table[cls1].name,
 396                                      valid_table[cls2].name);
 397                           }
 398                         break;
 399
 400                       case 'D':
 401                         ctype->class_collection[cnt] |= _ISwbit (cls2);
 402                         break;
 403
 404                       default:
 405                         error (5, 0, _("internal error in %s, line %u"),
 406                                __FUNCTION__, __LINE__);
 407                       }
 408                   }
 409         }
 410     }
 411
 412   for (cnt = 0; cnt < 256; ++cnt)
 413     {
 414       uint32_t tmp = ctype->class256_collection[cnt];
 415
 416       if (tmp != 0)
 417         {
 418           for (cls1 = 0; cls1 < NCLASS; ++cls1)
 419             if ((tmp & _ISbit (cls1)) != 0)
 420               for (cls2 = 0; cls2 < NCLASS; ++cls2)
 421                 if (valid_table[cls1].allow[cls2] != '-')
 422                   {
 423                     int eq = (tmp & _ISbit (cls2)) != 0;
 424                     switch (valid_table[cls1].allow[cls2])
 425                       {
 426                       case 'M':
 427                         if (!eq)
 428                           {
 429                             char buf[17];
 430
 431                             sprintf (buf, "\\%o", cnt);
 432
 433                             if (!be_quiet)
 434                               error (0, 0, _("\
 435 character '%s' in class `%s' must be in class `%s'"),
 436                                      buf, valid_table[cls1].name,
 437                                      valid_table[cls2].name);
 438                           }
 439                         break;
 440
 441                       case 'X':
 442                         if (eq)
 443                           {
 444                             char buf[17];
 445
 446                             sprintf (buf, "\\%o", cnt);
 447
 448                             if (!be_quiet)
 449                               error (0, 0, _("\
 450 character '%s' in class `%s' must not be in class `%s'"),
 451                                      buf, valid_table[cls1].name,
 452                                      valid_table[cls2].name);
 453                           }
 454                         break;
 455
 456                       case 'D':
 457                         ctype->class256_collection[cnt] |= _ISbit (cls2);
 458                         break;
 459
 460                       default:
 461                         error (5, 0, _("internal error in %s, line %u"),
 462                                __FUNCTION__, __LINE__);
 463                       }
 464                   }
 465         }
 466     }
 467
 468   /* ... and now test <SP> as a special case.  */
 469   space_value = repertoire_find_value (ctype->repertoire, "SP", 2);
 470   if (space_value == ILLEGAL_CHAR_VALUE)
 471     {
 472       if (!be_quiet)
 473         error (0, 0, _("character <SP> not defined in character map"));
 474     }
 475   else if (((cnt = BITPOS (tok_space),
 476              (ELEM (ctype, class_collection, , space_value)
 477               & BITw (tok_space)) == 0)
 478             || (cnt = BITPOS (tok_blank),
 479                 (ELEM (ctype, class_collection, , space_value)
 480                  & BITw (tok_blank)) == 0)))
 481     {
 482       if (!be_quiet)
 483         error (0, 0, _("<SP> character not in class `%s'"),
 484                valid_table[cnt].name);
 485     }
 486   else if (((cnt = BITPOS (tok_punct),
 487              (ELEM (ctype, class_collection, , space_value)
 488               & BITw (tok_punct)) != 0)
 489             || (cnt = BITPOS (tok_graph),
 490                 (ELEM (ctype, class_collection, , space_value)
 491                  & BITw (tok_graph))
 492                 != 0)))
 493     {
 494       if (!be_quiet)
 495         error (0, 0, _("<SP> character must not be in class `%s'"),
 496                valid_table[cnt].name);
 497     }
 498   else
 499     ELEM (ctype, class_collection, , space_value) |= BITw (tok_print);
 500
 501   space_seq = charmap_find_value (charmap, "SP", 2);
 502   if (space_seq == NULL || space_seq->nbytes != 1)
 503     {
 504       if (!be_quiet)
 505         error (0, 0, _("character <SP> not defined in character map"));
 506     }
 507   else if (((cnt = BITPOS (tok_space),
 508              (ctype->class256_collection[space_seq->bytes[0]]
 509               & BIT (tok_space)) == 0)
 510             || (cnt = BITPOS (tok_blank),
 511                 (ctype->class256_collection[space_seq->bytes[0]]
 512                  & BIT (tok_blank)) == 0)))
 513     {
 514       if (!be_quiet)
 515         error (0, 0, _("<SP> character not in class `%s'"),
 516                valid_table[cnt].name);
 517     }
 518   else if (((cnt = BITPOS (tok_punct),
 519              (ctype->class256_collection[space_seq->bytes[0]]
 520               & BIT (tok_punct)) != 0)
 521             || (cnt = BITPOS (tok_graph),
 522                 (ctype->class256_collection[space_seq->bytes[0]]
 523                  & BIT (tok_graph)) != 0)))
 524     {
 525       if (!be_quiet)
 526         error (0, 0, _("<SP> character must not be in class `%s'"),
 527                valid_table[cnt].name);
 528     }
 529   else
 530     ctype->class256_collection[space_seq->bytes[0]] |= BIT (tok_print);
 531
 532   /* Now that the tests are done make sure the name array contains all
 533      characters which are handled in the WIDTH section of the
 534      character set definition file.  */
 535   if (charmap->width_rules != NULL)
 536     for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
 537       {
 538         unsigned char bytes[charmap->mb_cur_max];
 539         int nbytes = charmap->width_rules[cnt].from->nbytes;
 540
 541         /* We have the range of character for which the width is
 542            specified described using byte sequences of the multibyte
 543            charset.  We have to convert this to UCS4 now.  And we
 544            cannot simply convert the beginning and the end of the
 545            sequence, we have to iterate over the byte sequence and
 546            convert it for every single character.  */
 547         memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
 548
 549         while (nbytes < charmap->width_rules[cnt].to->nbytes
 550                || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
 551                           nbytes) <= 0)
 552           {
 553             /* Find the UCS value for `bytes'.  */
 554             uint32_t wch = repertoire_find_value (ctype->repertoire, bytes,
 555                                                   nbytes);
 556             int inner;
 557
 558             if (wch != ILLEGAL_CHAR_VALUE)
 559               /* We are only interested in the side-effects of the
 560                  `find_idx' call.  It will add appropriate entries in
 561                  the name array if this is necessary.  */
 562               (void) find_idx (ctype, NULL, NULL, NULL, wch);
 563
 564             /* "Increment" the bytes sequence.  */
 565             inner = nbytes - 1;
 566             while (inner >= 0 && bytes[inner] == 0xff)
 567               --inner;
 568
 569             if (inner < 0)
 570               {
 571                 /* We have to extend the byte sequence.  */
 572                 if (nbytes >= charmap->width_rules[cnt].to->nbytes)
 573                   break;
 574
 575                 bytes[0] = 1;
 576                 memset (&bytes[1], 0, nbytes);
 577                 ++nbytes;
 578               }
 579             else
 580               {
 581                 ++bytes[inner];
 582                 while (++inner < nbytes)
 583                   bytes[inner] = 0;
 584               }
 585           }
 586       }
 587
 588   /* There must be a multiple of 10 digits.  */
 589   if (ctype->mbdigits_act % 10 != 0)
 590     {
 591       assert (ctype->mbdigits_act == ctype->wcdigits_act);
 592       ctype->wcdigits_act -= ctype->mbdigits_act % 10;
 593       ctype->mbdigits_act -= ctype->mbdigits_act % 10;
 594       error (0, 0, _("`digit' category has not entries in groups of ten"));
 595     }
 596
 597   /* Check the input digits.  There must be a multiple of ten available.
 598      In each group it could be that one or the other character is missing.
 599      In this case the whole group must be removed.  */
 600   cnt = 0;
 601   while (cnt < ctype->mbdigits_act)
 602     {
 603       size_t inner;
 604       for (inner = 0; inner < 10; ++inner)
 605         if (ctype->mbdigits[cnt + inner] == NULL)
 606           break;
 607
 608       if (inner == 10)
 609         cnt += 10;
 610       else
 611         {
 612           /* Remove the group.  */
 613           memmove (&ctype->mbdigits[cnt], &ctype->mbdigits[cnt + 10],
 614                    ((ctype->wcdigits_act - cnt - 10)
 615                     * sizeof (ctype->mbdigits[0])));
 616           ctype->mbdigits_act -= 10;
 617         }
 618     }
 619
 620   /* If no input digits are given use the default.  */
 621   if (ctype->mbdigits_act == 0)
 622     {
 623       if (ctype->mbdigits_max == 0)
 624         {
 625           ctype->mbdigits = obstack_alloc (&charmap->mem_pool,
 626                                            10 * sizeof (struct charseq *));
 627           ctype->mbdigits_max = 10;
 628         }
 629
 630       for (cnt = 0; cnt < 10; ++cnt)
 631         {
 632           ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
 633                                                       digits + cnt, 1);
 634           if (ctype->mbdigits[cnt] == NULL)
 635             {
 636               ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
 637                                                           longnames[cnt],
 638                                                           strlen (longnames[cnt]));
 639               if (ctype->mbdigits[cnt] == NULL)
 640                 {
 641                   /* Hum, this ain't good.  */
 642                   error (0, 0, _("\
 643 no input digits defined and none of the standard names in the charmap"));
 644
 645                   ctype->mbdigits[cnt] = obstack_alloc (&charmap->mem_pool,
 646                                                         sizeof (struct charseq) + 1);
 647
 648                   /* This is better than nothing.  */
 649                   ctype->mbdigits[cnt]->bytes[0] = digits[cnt];
 650                   ctype->mbdigits[cnt]->nbytes = 1;
 651                 }
 652             }
 653         }
 654
 655       ctype->mbdigits_act = 10;
 656     }
 657
 658   /* Check the wide character input digits.  There must be a multiple
 659      of ten available.  In each group it could be that one or the other
 660      character is missing.  In this case the whole group must be
 661      removed.  */
 662   cnt = 0;
 663   while (cnt < ctype->wcdigits_act)
 664     {
 665       size_t inner;
 666       for (inner = 0; inner < 10; ++inner)
 667         if (ctype->wcdigits[cnt + inner] == ILLEGAL_CHAR_VALUE)
 668           break;
 669
 670       if (inner == 10)
 671         cnt += 10;
 672       else
 673         {
 674           /* Remove the group.  */
 675           memmove (&ctype->wcdigits[cnt], &ctype->wcdigits[cnt + 10],
 676                    ((ctype->wcdigits_act - cnt - 10)
 677                     * sizeof (ctype->wcdigits[0])));
 678           ctype->wcdigits_act -= 10;
 679         }
 680     }
 681
 682   /* If no input digits are given use the default.  */
 683   if (ctype->wcdigits_act == 0)
 684     {
 685       if (ctype->wcdigits_max == 0)
 686         {
 687           ctype->wcdigits = obstack_alloc (&charmap->mem_pool,
 688                                            10 * sizeof (uint32_t));
 689           ctype->wcdigits_max = 10;
 690         }
 691
 692       for (cnt = 0; cnt < 10; ++cnt)
 693         ctype->wcdigits[cnt] = L'0' + cnt;
 694
 695       ctype->mbdigits_act = 10;
 696     }
 697
 698   /* Check the outdigits.  */
 699   warned = 0;
 700   for (cnt = 0; cnt < 10; ++cnt)
 701     if (ctype->mboutdigits[cnt] == NULL)
 702       {
 703         static struct charseq replace[2];
 704
 705         if (!warned)
 706           {
 707             error (0, 0, _("\
 708 not all characters used in `outdigit' are available in the charmap"));
 709             warned = 1;
 710           }
 711
 712         replace[0].nbytes = 1;
 713         replace[0].bytes[0] = '?';
 714         replace[0].bytes[1] = '\0';
 715         ctype->mboutdigits[cnt] = &replace[0];
 716       }
 717
 718   warned = 0;
 719   for (cnt = 0; cnt < 10; ++cnt)
 720     if (ctype->wcoutdigits[cnt] == 0)
 721       {
 722         if (!warned)
 723           {
 724             error (0, 0, _("\
 725 not all characters used in `outdigit' are available in the repertoire"));
 726             warned = 1;
 727           }
 728
 729         ctype->wcoutdigits[cnt] = L'?';
 730       }
 731 }
 732
 733
 734 void
 735 ctype_output (struct localedef_t *locale, struct charmap_t *charmap,
 736               const char *output_path)
 737 {
 738   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 739   const size_t nelems = (_NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)
 740                          + (ctype->map_collection_nr - 2));
 741   struct iovec iov[2 + nelems + ctype->nr_charclass
 742                   + ctype->map_collection_nr];
 743   struct locale_file data;
 744   uint32_t idx[nelems + 1];
 745   size_t elem, cnt, offset, total;
 746   char *cp;
 747
 748   /* Now prepare the output: Find the sizes of the table we can use.  */
 749   allocate_arrays (ctype, charmap, ctype->repertoire);
 750
 751   data.magic = LIMAGIC (LC_CTYPE);
 752   data.n = nelems;
 753   iov[0].iov_base = (void *) &data;
 754   iov[0].iov_len = sizeof (data);
 755
 756   iov[1].iov_base = (void *) idx;
 757   iov[1].iov_len = sizeof (idx);
 758
 759   idx[0] = iov[0].iov_len + iov[1].iov_len;
 760   offset = 0;
 761
 762   for (elem = 0; elem < nelems; ++elem)
 763     {
 764       if (elem < _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE))
 765         switch (elem)
 766           {
 767 #define CTYPE_DATA(name, base, len)                                           \
 768           case _NL_ITEM_INDEX (name):                                         \
 769             iov[2 + elem + offset].iov_base = (base);                         \
 770             iov[2 + elem + offset].iov_len = (len);                           \
 771             if (elem + 1 < nelems)                                            \
 772               idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;     \
 773             break
 774
 775           CTYPE_DATA (_NL_CTYPE_CLASS,
 776                       ctype->ctype_b,
 777                       (256 + 128) * sizeof (char_class_t));
 778
 779           CTYPE_DATA (_NL_CTYPE_TOUPPER,
 780                       ctype->map[0],
 781                       (ctype->plane_size * ctype->plane_cnt + 128)
 782                       * sizeof (uint32_t));
 783           CTYPE_DATA (_NL_CTYPE_TOLOWER,
 784                       ctype->map[1],
 785                       (ctype->plane_size * ctype->plane_cnt + 128)
 786                       * sizeof (uint32_t));
 787
 788           CTYPE_DATA (_NL_CTYPE_CLASS32,
 789                       ctype->ctype32_b,
 790                       (ctype->plane_size * ctype->plane_cnt
 791                        * sizeof (char_class32_t)));
 792
 793           CTYPE_DATA (_NL_CTYPE_NAMES,
 794                       ctype->names, (ctype->plane_size * ctype->plane_cnt
 795                                      * sizeof (uint32_t)));
 796
 797           CTYPE_DATA (_NL_CTYPE_TRANSLIT_HASH_SIZE,
 798                       &ctype->translit_hash_size, sizeof (uint32_t));
 799           CTYPE_DATA (_NL_CTYPE_TRANSLIT_HASH_LAYERS,
 800                       &ctype->translit_hash_layers, sizeof (uint32_t));
 801
 802           CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_IDX,
 803                       ctype->translit_from_idx,
 804                       ctype->translit_idx_size);
 805
 806           CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_TBL,
 807                       ctype->translit_from_tbl,
 808                       ctype->translit_from_tbl_size);
 809
 810           CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_IDX,
 811                       ctype->translit_to_idx,
 812                       ctype->translit_idx_size);
 813
 814           CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_TBL,
 815                       ctype->translit_to_tbl, ctype->translit_to_tbl_size);
 816
 817           CTYPE_DATA (_NL_CTYPE_HASH_SIZE,
 818                       &ctype->plane_size, sizeof (uint32_t));
 819           CTYPE_DATA (_NL_CTYPE_HASH_LAYERS,
 820                       &ctype->plane_cnt, sizeof (uint32_t));
 821
 822           case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
 823             /* The class name array.  */
 824             total = 0;
 825             for (cnt = 0; cnt < ctype->nr_charclass; ++cnt, ++offset)
 826               {
 827                 iov[2 + elem + offset].iov_base
 828                   = (void *) ctype->classnames[cnt];
 829                 iov[2 + elem + offset].iov_len
 830                   = strlen (ctype->classnames[cnt]) + 1;
 831                 total += iov[2 + elem + offset].iov_len;
 832               }
 833             iov[2 + elem + offset].iov_base = (void *) "\0\0\0";
 834             iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
 835             total += 1 + (4 - ((total + 1) % 4));
 836
 837             idx[elem + 1] = idx[elem] + total;
 838             break;
 839
 840           case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
 841             /* The class name array.  */
 842             total = 0;
 843             for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt, ++offset)
 844               {
 845                 iov[2 + elem + offset].iov_base
 846                   = (void *) ctype->mapnames[cnt];
 847                 iov[2 + elem + offset].iov_len
 848                   = strlen (ctype->mapnames[cnt]) + 1;
 849                 total += iov[2 + elem + offset].iov_len;
 850               }
 851             iov[2 + elem + offset].iov_base = (void *) "\0\0\0";
 852             iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
 853             total += 1 + (4 - ((total + 1) % 4));
 854
 855             idx[elem + 1] = idx[elem] + total;
 856             break;
 857
 858           CTYPE_DATA (_NL_CTYPE_WIDTH,
 859                       ctype->width, ctype->plane_size * ctype->plane_cnt);
 860
 861           CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX,
 862                       &ctype->mb_cur_max, sizeof (uint32_t));
 863
 864           case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
 865             total = strlen (ctype->codeset_name) + 1;
 866             if (total % 4 == 0)
 867               iov[2 + elem + offset].iov_base = (char *) ctype->codeset_name;
 868             else
 869               {
 870                 iov[2 + elem + offset].iov_base = alloca ((total + 3) & ~3);
 871                 memset (mempcpy (iov[2 + elem + offset].iov_base,
 872                                  ctype->codeset_name, total),
 873                         '\0', 4 - (total & 3));
 874                 total = (total + 3) & ~3;
 875               }
 876             iov[2 + elem + offset].iov_len = total;
 877             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
 878             break;
 879
 880           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN):
 881             iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
 882             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
 883             *(uint32_t *) iov[2 + elem + offset].iov_base =
 884               ctype->mbdigits_act / 10;
 885             idx[elem + 1] = idx[elem] + sizeof (uint32_t);
 886             break;
 887
 888           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN):
 889             iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
 890             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
 891             *(uint32_t *) iov[2 + elem + offset].iov_base =
 892               ctype->wcdigits_act / 10;
 893             idx[elem + 1] = idx[elem] + sizeof (uint32_t);
 894             break;
 895
 896           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB):
 897             /* Compute the length of all possible characters.  For INDIGITS
 898                there might be more than one.  We simply concatenate all of
 899                them with a NUL byte following.  The NUL byte wouldn't be
 900                necessary but it makes it easier for the user.  */
 901             total = 0;
 902             for (cnt = elem - _NL_CTYPE_INDIGITS0_MB;
 903                  cnt < ctype->mbdigits_act; cnt += 10)
 904               total += ctype->mbdigits[cnt]->nbytes + 1;
 905             iov[2 + elem + offset].iov_base = (char *) alloca (total);
 906             iov[2 + elem + offset].iov_len = total;
 907
 908             cp = iov[2 + elem + offset].iov_base;
 909             for (cnt = elem - _NL_CTYPE_INDIGITS0_MB;
 910                  cnt < ctype->mbdigits_act; cnt += 10)
 911               {
 912                 cp = mempcpy (cp, ctype->mbdigits[cnt]->bytes,
 913                               ctype->mbdigits[cnt]->nbytes);
 914                 *cp++ = '\0';
 915               }
 916             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
 917             break;
 918
 919           case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB):
 920             /* Compute the length of all possible characters.  For INDIGITS
 921                there might be more than one.  We simply concatenate all of
 922                them with a NUL byte following.  The NUL byte wouldn't be
 923                necessary but it makes it easier for the user.  */
 924             cnt = elem - _NL_CTYPE_OUTDIGIT0_MB;
 925             total = ctype->mboutdigits[cnt]->nbytes + 1;
 926             iov[2 + elem + offset].iov_base = (char *) alloca (total);
 927             iov[2 + elem + offset].iov_len = total;
 928
 929             *(char *) mempcpy (iov[2 + elem + offset].iov_base,
 930                                ctype->mbdigits[cnt]->bytes,
 931                                ctype->mbdigits[cnt]->nbytes) = '\0';
 932             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
 933             break;
 934
 935           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC):
 936             total = ctype->wcdigits_act / 10;
 937
 938             iov[2 + elem + offset].iov_base =
 939               (uint32_t *) alloca (total * sizeof (uint32_t));
 940             iov[2 + elem + offset].iov_len = total * sizeof (uint32_t);
 941
 942             for (cnt = elem - _NL_CTYPE_INDIGITS0_WC;
 943                  cnt < ctype->wcdigits_act; cnt += 10)
 944               ((uint32_t *) iov[2 + elem + offset].iov_base)[cnt / 10]
 945                 = ctype->wcdigits[cnt];
 946             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
 947             break;
 948
 949           case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC):
 950             cnt = elem - _NL_CTYPE_OUTDIGIT0_WC;
 951             iov[2 + elem + offset].iov_base = &ctype->wcoutdigits[cnt];
 952             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
 953             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
 954             break;
 955
 956           default:
 957             assert (! "unknown CTYPE element");
 958           }
 959       else
 960         {
 961           /* Handle extra maps.  */
 962           size_t nr = (elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)) + 2;
 963
 964           iov[2 + elem + offset].iov_base = ctype->map[nr];
 965           iov[2 + elem + offset].iov_len = ((ctype->plane_size
 966                                              * ctype->plane_cnt + 128)
 967                                             * sizeof (uint32_t));
 968
 969           idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
 970         }
 971     }
 972
 973   assert (2 + elem + offset == (nelems + ctype->nr_charclass
 974                                 + ctype->map_collection_nr + 2));
 975
 976   write_locale_data (output_path, "LC_CTYPE", 2 + elem + offset, iov);
 977 }
 978
 979
 980 /* Local functions.  */
 981 static void
 982 ctype_class_new (struct linereader *lr, struct locale_ctype_t *ctype,
 983                  const char *name)
 984 {
 985   size_t cnt;
 986
 987   for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
 988     if (strcmp (ctype->classnames[cnt], name) == 0)
 989       break;
 990
 991   if (cnt < ctype->nr_charclass)
 992     {
 993       lr_error (lr, _("character class `%s' already defined"), name);
 994       return;
 995     }
 996
 997   if (ctype->nr_charclass == MAX_NR_CHARCLASS)
 998     /* Exit code 2 is prescribed in P1003.2b.  */
 999     error (2, 0, _("\
1000 implementation limit: no more than %d character classes allowed"),
1001            MAX_NR_CHARCLASS);
1002
1003   ctype->classnames[ctype->nr_charclass++] = name;
1004 }
1005
1006
1007 static void
1008 ctype_map_new (struct linereader *lr, struct locale_ctype_t *ctype,
1009                const char *name, struct charmap_t *charmap)
1010 {
1011   size_t max_chars = 0;
1012   size_t cnt;
1013
1014   for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
1015     {
1016       if (strcmp (ctype->mapnames[cnt], name) == 0)
1017         break;
1018
1019       if (max_chars < ctype->map_collection_max[cnt])
1020         max_chars = ctype->map_collection_max[cnt];
1021     }
1022
1023   if (cnt < ctype->map_collection_nr)
1024     {
1025       lr_error (lr, _("character map `%s' already defined"), name);
1026       return;
1027     }
1028
1029   if (ctype->map_collection_nr == MAX_NR_CHARMAP)
1030     /* Exit code 2 is prescribed in P1003.2b.  */
1031     error (2, 0, _("\
1032 implementation limit: no more than %d character maps allowed"),
1033            MAX_NR_CHARMAP);
1034
1035   ctype->mapnames[cnt] = name;
1036
1037   if (max_chars == 0)
1038     ctype->map_collection_max[cnt] = charmap->mb_cur_max == 1 ? 256 : 512;
1039   else
1040     ctype->map_collection_max[cnt] = max_chars;
1041
1042   ctype->map_collection[cnt] = (uint32_t *)
1043     xmalloc (sizeof (uint32_t) * ctype->map_collection_max[cnt]);
1044   memset (ctype->map_collection[cnt], '\0',
1045           sizeof (uint32_t) * ctype->map_collection_max[cnt]);
1046   ctype->map_collection_act[cnt] = 256;
1047
1048   ++ctype->map_collection_nr;
1049 }
1050
1051
1052 /* We have to be prepared that TABLE, MAX, and ACT can be NULL.  This
1053    is possible if we only want to extend the name array.  */
1054 static uint32_t *
1055 find_idx (struct locale_ctype_t *ctype, uint32_t **table, size_t *max,
1056           size_t *act, uint32_t idx)
1057 {
1058   size_t cnt;
1059
1060   if (idx < 256)
1061     return table == NULL ? NULL : &(*table)[idx];
1062
1063   for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
1064     if (ctype->charnames[cnt] == idx)
1065       break;
1066
1067   /* We have to distinguish two cases: the name is found or not.  */
1068   if (cnt == ctype->charnames_act)
1069     {
1070       /* Extend the name array.  */
1071       if (ctype->charnames_act == ctype->charnames_max)
1072         {
1073           ctype->charnames_max *= 2;
1074           ctype->charnames = (unsigned int *)
1075             xrealloc (ctype->charnames,
1076                       sizeof (unsigned int) * ctype->charnames_max);
1077         }
1078       ctype->charnames[ctype->charnames_act++] = idx;
1079     }
1080
1081   if (table == NULL)
1082     /* We have done everything we are asked to do.  */
1083     return NULL;
1084
1085   if (cnt >= *act)
1086     {
1087       if (cnt >= *max)
1088         {
1089           size_t old_max = *max;
1090           do
1091             *max *= 2;
1092           while (*max <= cnt);
1093
1094           *table =
1095             (uint32_t *) xrealloc (*table, *max * sizeof (unsigned long int));
1096           memset (&(*table)[old_max], '\0',
1097                   (*max - old_max) * sizeof (uint32_t));
1098         }
1099
1100       *act = cnt;
1101     }
1102
1103   return &(*table)[cnt];
1104 }
1105
1106
1107 static int
1108 get_character (struct token *now, struct charmap_t *charmap,
1109                struct repertoire_t *repertoire,
1110                struct charseq **seqp, uint32_t *wchp)
1111 {
1112   if (now->tok == tok_bsymbol)
1113     {
1114       /* This will hopefully be the normal case.  */
1115       *wchp = repertoire_find_value (repertoire, now->val.str.startmb,
1116                                      now->val.str.lenmb);
1117       *seqp = charmap_find_value (charmap, now->val.str.startmb,
1118                                   now->val.str.lenmb);
1119     }
1120   else if (now->tok == tok_ucs4)
1121     {
1122       *seqp = repertoire_find_seq (repertoire, now->val.ucs4);
1123
1124       if (*seqp == NULL)
1125         {
1126           /* Compute the value in the charmap from the UCS value.  */
1127           const char *symbol = repertoire_find_symbol (repertoire,
1128                                                        now->val.ucs4);
1129
1130           if (symbol == NULL)
1131             *seqp = NULL;
1132           else
1133             *seqp = charmap_find_value (charmap, symbol, strlen (symbol));
1134
1135           if (*seqp == NULL)
1136             {
1137               /* Insert a negative entry.  */
1138               static const struct charseq negative
1139                 = { .ucs4 = ILLEGAL_CHAR_VALUE };
1140               uint32_t *newp = obstack_alloc (&repertoire->mem_pool, 4);
1141               *newp = now->val.ucs4;
1142
1143               insert_entry (&repertoire->seq_table, newp, 4,
1144                             (void *) &negative);
1145             }
1146           else
1147             (*seqp)->ucs4 = now->val.ucs4;
1148         }
1149       else if ((*seqp)->ucs4 != now->val.ucs4)
1150         *seqp = NULL;
1151
1152       *wchp = now->val.ucs4;
1153     }
1154   else if (now->tok == tok_charcode)
1155     {
1156       /* We must map from the byte code to UCS4.  */
1157       *seqp = charmap_find_symbol (charmap, now->val.str.startmb,
1158                                    now->val.str.lenmb);
1159
1160       if (*seqp == NULL)
1161         *wchp = ILLEGAL_CHAR_VALUE;
1162       else
1163         {
1164           if ((*seqp)->ucs4 == UNINITIALIZED_CHAR_VALUE)
1165             (*seqp)->ucs4 = repertoire_find_value (repertoire, (*seqp)->name,
1166                                                    strlen ((*seqp)->name));
1167           *wchp = (*seqp)->ucs4;
1168         }
1169     }
1170   else
1171     return 1;
1172
1173   return 0;
1174 }
1175
1176
1177 /* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>'.  */
1178 static void
1179 charclass_symbolic_ellipsis (struct linereader *ldfile,
1180                              struct locale_ctype_t *ctype,
1181                              struct charmap_t *charmap,
1182                              struct repertoire_t *repertoire,
1183                              struct token *now,
1184                              const char *last_str,
1185                              unsigned long int class256_bit,
1186                              unsigned long int class_bit, int base,
1187                              int ignore_content, int handle_digits)
1188 {
1189   const char *nowstr = now->val.str.startmb;
1190   char tmp[now->val.str.lenmb + 1];
1191   const char *cp;
1192   char *endp;
1193   unsigned long int from;
1194   unsigned long int to;
1195
1196   /* We have to compute the ellipsis values using the symbolic names.  */
1197   assert (last_str != NULL);
1198
1199   if (strlen (last_str) != now->val.str.lenmb)
1200     {
1201     invalid_range:
1202       lr_error (ldfile,
1203                 _("`%s' and `%.*s' are no valid names for symbolic range"),
1204                 last_str, now->val.str.lenmb, nowstr);
1205       return;
1206     }
1207
1208   if (memcmp (last_str, nowstr, now->val.str.lenmb) == 0)
1209     /* Nothing to do, the names are the same.  */
1210     return;
1211
1212   for (cp = last_str; *cp == *(nowstr + (cp - last_str)); ++cp)
1213     ;
1214
1215   errno = 0;
1216   from = strtoul (cp, &endp, base);
1217   if ((from == UINT_MAX && errno == ERANGE) || *endp != '\0')
1218     goto invalid_range;
1219
1220   to = strtoul (nowstr + (cp - last_str), &endp, base);
1221   if ((to == UINT_MAX && errno == ERANGE)
1222       || (endp - nowstr) != now->val.str.lenmb || from >= to)
1223     goto invalid_range;
1224
1225   /* OK, we have a range FROM - TO.  Now we can create the symbolic names.  */
1226   if (!ignore_content)
1227     {
1228       now->val.str.startmb = tmp;
1229       while (++from <= to)
1230         {
1231           struct charseq *seq;
1232           uint32_t wch;
1233
1234           sprintf (tmp, (base == 10 ? "%.*s%0*d" : "%.*s%0*X"), cp - last_str,
1235                    last_str, now->val.str.lenmb - (cp - last_str), from);
1236
1237           get_character (now, charmap, repertoire, &seq, &wch);
1238
1239           if (seq != NULL && seq->nbytes == 1)
1240             /* Yep, we can store information about this byte sequence.  */
1241             ctype->class256_collection[seq->bytes[0]] |= class256_bit;
1242
1243           if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1244             /* We have the UCS4 position.  */
1245             *find_idx (ctype, &ctype->class_collection,
1246                        &ctype->class_collection_max,
1247                        &ctype->class_collection_act, wch) |= class_bit;
1248
1249           if (handle_digits == 1)
1250             {
1251               /* We must store the digit values.  */
1252               if (ctype->mbdigits_act == ctype->mbdigits_max)
1253                 {
1254                   ctype->mbdigits_max *= 2;
1255                   ctype->mbdigits = xrealloc (ctype->mbdigits,
1256                                               (ctype->mbdigits_max
1257                                                * sizeof (char *)));
1258                   ctype->wcdigits_max *= 2;
1259                   ctype->wcdigits = xrealloc (ctype->wcdigits,
1260                                               (ctype->wcdigits_max
1261                                                * sizeof (uint32_t)));
1262                 }
1263
1264               ctype->mbdigits[ctype->mbdigits_act++] = seq;
1265               ctype->wcdigits[ctype->wcdigits_act++] = wch;
1266             }
1267           else if (handle_digits == 2)
1268             {
1269               /* We must store the digit values.  */
1270               if (ctype->outdigits_act >= 10)
1271                 {
1272                   lr_error (ldfile, _("\
1273 %s: field `%s' does not contain exactly ten entries"),
1274                             "LC_CTYPE", "outdigit");
1275                   return;
1276                 }
1277
1278               ctype->mboutdigits[ctype->outdigits_act] = seq;
1279               ctype->wcoutdigits[ctype->outdigits_act] = wch;
1280               ++ctype->outdigits_act;
1281             }
1282         }
1283     }
1284 }
1285
1286
1287 /* Ellipsis like in `<U1234>..<U2345>'.  */
1288 static void
1289 charclass_ucs4_ellipsis (struct linereader *ldfile,
1290                          struct locale_ctype_t *ctype,
1291                          struct charmap_t *charmap,
1292                          struct repertoire_t *repertoire,
1293                          struct token *now, uint32_t last_wch,
1294                          unsigned long int class256_bit,
1295                          unsigned long int class_bit, int ignore_content,
1296                          int handle_digits)
1297 {
1298   if (last_wch > now->val.ucs4)
1299     {
1300       lr_error (ldfile, _("\
1301 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1302                 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, now->val.ucs4,
1303                 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, last_wch);
1304       return;
1305     }
1306
1307   if (!ignore_content)
1308     while (++last_wch <= now->val.ucs4)
1309       {
1310         /* We have to find out whether there is a byte sequence corresponding
1311            to this UCS4 value.  */
1312         struct charseq *seq = repertoire_find_seq (repertoire, last_wch);
1313
1314         /* If this is the first time we look for this sequence create a new
1315            entry.  */
1316         if (seq == NULL)
1317           {
1318             /* Find the symbolic name for this UCS4 value.  */
1319             const char *symbol = repertoire_find_symbol (repertoire, last_wch);
1320             uint32_t *newp = obstack_alloc (&repertoire->mem_pool, 4);
1321             *newp = last_wch;
1322
1323             if (symbol != NULL)
1324               /* We have a name, now search the multibyte value.  */
1325               seq = charmap_find_value (charmap, symbol, strlen (symbol));
1326
1327             if (seq == NULL)
1328               {
1329                 /* We have to create a fake entry.  */
1330                 static const struct charseq negative
1331                   = { .ucs4 = ILLEGAL_CHAR_VALUE };
1332                 seq = (struct charseq *) &negative;
1333               }
1334             else
1335               seq->ucs4 = last_wch;
1336
1337             insert_entry (&repertoire->seq_table, newp, 4, seq);
1338           }
1339
1340         /* We have a name, now search the multibyte value.  */
1341         if (seq->ucs4 == last_wch && seq->nbytes == 1)
1342           /* Yep, we can store information about this byte sequence.  */
1343           ctype->class256_collection[(size_t) seq->bytes[0]]
1344             |= class256_bit;
1345
1346         /* And of course we have the UCS4 position.  */
1347         if (class_bit != 0 && class_bit != 0)
1348           *find_idx (ctype, &ctype->class_collection,
1349                      &ctype->class_collection_max,
1350                      &ctype->class_collection_act, last_wch) |= class_bit;
1351
1352         if (handle_digits == 1)
1353           {
1354             /* We must store the digit values.  */
1355             if (ctype->mbdigits_act == ctype->mbdigits_max)
1356               {
1357                 ctype->mbdigits_max *= 2;
1358                 ctype->mbdigits = xrealloc (ctype->mbdigits,
1359                                             (ctype->mbdigits_max
1360                                              * sizeof (char *)));
1361                 ctype->wcdigits_max *= 2;
1362                 ctype->wcdigits = xrealloc (ctype->wcdigits,
1363                                             (ctype->wcdigits_max
1364                                              * sizeof (uint32_t)));
1365               }
1366
1367             ctype->mbdigits[ctype->mbdigits_act++] = (seq->ucs4 == last_wch
1368                                                       ? seq : NULL);
1369             ctype->wcdigits[ctype->wcdigits_act++] = last_wch;
1370           }
1371         else if (handle_digits == 2)
1372           {
1373             /* We must store the digit values.  */
1374             if (ctype->outdigits_act >= 10)
1375               {
1376                 lr_error (ldfile, _("\
1377 %s: field `%s' does not contain exactly ten entries"),
1378                           "LC_CTYPE", "outdigit");
1379                 return;
1380               }
1381
1382             ctype->mboutdigits[ctype->outdigits_act] = (seq->ucs4 == last_wch
1383                                                         ? seq : NULL);
1384             ctype->wcoutdigits[ctype->outdigits_act] = last_wch;
1385             ++ctype->outdigits_act;
1386           }
1387       }
1388 }
1389
1390
1391 /* Ellipsis as in `/xea/x12.../xea/x34'.  */
1392 static void
1393 charclass_charcode_ellipsis (struct linereader *ldfile,
1394                              struct locale_ctype_t *ctype,
1395                              struct charmap_t *charmap,
1396                              struct repertoire_t *repertoire,
1397                              struct token *now, char *last_charcode,
1398                              uint32_t last_charcode_len,
1399                              unsigned long int class256_bit,
1400                              unsigned long int class_bit, int ignore_content,
1401                              int handle_digits)
1402 {
1403   /* First check whether the to-value is larger.  */
1404   if (now->val.charcode.nbytes != last_charcode_len)
1405     {
1406       lr_error (ldfile, _("\
1407 start end end character sequence of range must have the same length"));
1408       return;
1409     }
1410
1411   if (memcmp (last_charcode, now->val.charcode.bytes, last_charcode_len) > 0)
1412     {
1413       lr_error (ldfile, _("\
1414 to-value character sequence is smaller than from-value sequence"));
1415       return;
1416     }
1417
1418   if (!ignore_content)
1419     {
1420       do
1421         {
1422           /* Increment the byte sequence value.  */
1423           struct charseq *seq;
1424           uint32_t wch;
1425           int i;
1426
1427           for (i = last_charcode_len - 1; i >= 0; --i)
1428             if (++last_charcode[i] != 0)
1429               break;
1430
1431           if (last_charcode_len == 1)
1432             /* Of course we have the charcode value.  */
1433             ctype->class256_collection[(size_t) last_charcode[0]]
1434               |= class256_bit;
1435
1436           /* Find the symbolic name.  */
1437           seq = charmap_find_symbol (charmap, last_charcode,
1438                                      last_charcode_len);
1439           if (seq != NULL)
1440             {
1441               if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1442                 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1443                                                    strlen (seq->name));
1444               wch = seq->ucs4;
1445
1446               if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1447                 *find_idx (ctype, &ctype->class_collection,
1448                            &ctype->class_collection_max,
1449                            &ctype->class_collection_act, wch) |= class_bit;
1450             }
1451           else
1452             wch = ILLEGAL_CHAR_VALUE;
1453
1454           if (handle_digits == 1)
1455             {
1456               /* We must store the digit values.  */
1457               if (ctype->mbdigits_act == ctype->mbdigits_max)
1458                 {
1459                   ctype->mbdigits_max *= 2;
1460                   ctype->mbdigits = xrealloc (ctype->mbdigits,
1461                                               (ctype->mbdigits_max
1462                                                * sizeof (char *)));
1463                   ctype->wcdigits_max *= 2;
1464                   ctype->wcdigits = xrealloc (ctype->wcdigits,
1465                                               (ctype->wcdigits_max
1466                                                * sizeof (uint32_t)));
1467                 }
1468
1469               seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1470               memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1471               seq->nbytes = last_charcode_len;
1472
1473               ctype->mbdigits[ctype->mbdigits_act++] = seq;
1474               ctype->wcdigits[ctype->wcdigits_act++] = wch;
1475             }
1476           else if (handle_digits == 2)
1477             {
1478               struct charseq *seq;
1479               /* We must store the digit values.  */
1480               if (ctype->outdigits_act >= 10)
1481                 {
1482                   lr_error (ldfile, _("\
1483 %s: field `%s' does not contain exactly ten entries"),
1484                             "LC_CTYPE", "outdigit");
1485                   return;
1486                 }
1487
1488               seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1489               memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1490               seq->nbytes = last_charcode_len;
1491
1492               ctype->mboutdigits[ctype->outdigits_act] = seq;
1493               ctype->wcoutdigits[ctype->outdigits_act] = wch;
1494               ++ctype->outdigits_act;
1495             }
1496         }
1497       while (memcmp (last_charcode, now->val.charcode.bytes,
1498                      last_charcode_len) != 0);
1499     }
1500 }
1501
1502
1503 /* Read one transliteration entry.  */
1504 static uint32_t *
1505 read_widestring (struct linereader *ldfile, struct token *now,
1506                  struct charmap_t *charmap, struct repertoire_t *repertoire)
1507 {
1508   uint32_t *wstr;
1509
1510   if (now->tok == tok_default_missing)
1511     /* The special name "" will denote this case.  */
1512     wstr = (uint32_t *) L"";
1513   else if (now->tok == tok_bsymbol)
1514     {
1515       /* Get the value from the repertoire.  */
1516       wstr = xmalloc (2 * sizeof (uint32_t));
1517       wstr[0] = repertoire_find_value (repertoire, now->val.str.startmb,
1518                                        now->val.str.lenmb);
1519       if (wstr[0] == ILLEGAL_CHAR_VALUE)
1520         /* We cannot proceed, we don't know the UCS4 value.  */
1521         return NULL;
1522
1523       wstr[1] = 0;
1524     }
1525   else if (now->tok == tok_ucs4)
1526     {
1527       wstr = xmalloc (2 * sizeof (uint32_t));
1528       wstr[0] = now->val.ucs4;
1529       wstr[1] = 0;
1530     }
1531   else if (now->tok == tok_charcode)
1532     {
1533       /* Argh, we have to convert to the symbol name first and then to the
1534          UCS4 value.  */
1535       struct charseq *seq = charmap_find_symbol (charmap,
1536                                                  now->val.str.startmb,
1537                                                  now->val.str.lenmb);
1538       if (seq == NULL)
1539         /* Cannot find the UCS4 value.  */
1540         return NULL;
1541
1542       if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1543         seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1544                                            strlen (seq->name));
1545       if (seq->ucs4 == ILLEGAL_CHAR_VALUE)
1546         /* We cannot proceed, we don't know the UCS4 value.  */
1547         return NULL;
1548
1549       wstr = xmalloc (2 * sizeof (uint32_t));
1550       wstr[0] = seq->ucs4;
1551       wstr[1] = 0;
1552     }
1553   else if (now->tok == tok_string)
1554     {
1555       wstr = now->val.str.startwc;
1556       if (wstr[0] == 0)
1557         return NULL;
1558     }
1559   else
1560     {
1561       if (now->tok != tok_eol && now->tok != tok_eof)
1562         lr_ignore_rest (ldfile, 0);
1563       SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
1564       return (uint32_t *) -1l;
1565     }
1566
1567   return wstr;
1568 }
1569
1570
1571 static void
1572 read_translit_entry (struct linereader *ldfile, struct locale_ctype_t *ctype,
1573                      struct token *now, struct charmap_t *charmap,
1574                      struct repertoire_t *repertoire)
1575 {
1576   uint32_t *from_wstr = read_widestring (ldfile, now, charmap, repertoire);
1577   struct translit_t *result;
1578   struct translit_to_t **top;
1579   struct obstack *ob = &ctype->mem_pool;
1580   int first;
1581   int ignore;
1582
1583   if (from_wstr == NULL)
1584     /* There is no valid from string.  */
1585     return;
1586
1587   result = (struct translit_t *) obstack_alloc (ob,
1588                                                 sizeof (struct translit_t));
1589   result->from = from_wstr;
1590   result->next = NULL;
1591   result->to = NULL;
1592   top = &result->to;
1593   first = 1;
1594   ignore = 0;
1595
1596   while (1)
1597     {
1598       uint32_t *to_wstr;
1599
1600       /* Next we have one or more transliterations.  They are
1601          separated by semicolons.  */
1602       now = lr_token (ldfile, charmap, repertoire);
1603
1604       if (!first && (now->tok == tok_semicolon || now->tok == tok_eol))
1605         {
1606           /* One string read.  */
1607           const uint32_t zero = 0;
1608
1609           if (!ignore)
1610             {
1611               obstack_grow (ob, &zero, 4);
1612               to_wstr = obstack_finish (ob);
1613
1614               *top = obstack_alloc (ob, sizeof (struct translit_to_t));
1615               (*top)->str = to_wstr;
1616               (*top)->next = NULL;
1617             }
1618
1619           if (now->tok == tok_eol)
1620             {
1621               result->next = ctype->translit;
1622               ctype->translit = result;
1623               return;
1624             }
1625
1626           if (!ignore)
1627             top = &(*top)->next;
1628           ignore = 0;
1629         }
1630       else
1631         {
1632           to_wstr = read_widestring (ldfile, now, charmap, repertoire);
1633           if (to_wstr == (uint32_t *) -1l)
1634             {
1635               /* An error occurred.  */
1636               obstack_free (ob, result);
1637               return;
1638             }
1639
1640           if (to_wstr == NULL)
1641             ignore = 1;
1642           else
1643             /* This value is usable.  */
1644             obstack_grow (ob, to_wstr, wcslen ((wchar_t *) to_wstr) * 4);
1645
1646           first = 0;
1647         }
1648     }
1649 }
1650
1651
1652 /* The parser for the LC_CTYPE section of the locale definition.  */
1653 void
1654 ctype_read (struct linereader *ldfile, struct localedef_t *result,
1655             struct charmap_t *charmap, const char *repertoire_name,
1656             int ignore_content)
1657 {
1658   struct repertoire_t *repertoire = NULL;
1659   struct locale_ctype_t *ctype;
1660   struct token *now;
1661   enum token_t nowtok;
1662   size_t cnt;
1663   struct charseq *last_seq;
1664   uint32_t last_wch = 0;
1665   enum token_t last_token;
1666   enum token_t ellipsis_token;
1667   char last_charcode[16];
1668   size_t last_charcode_len = 0;
1669   const char *last_str = NULL;
1670   int mapidx;
1671
1672   /* Get the repertoire we have to use.  */
1673   if (repertoire_name != NULL)
1674     repertoire = repertoire_read (repertoire_name);
1675
1676   /* The rest of the line containing `LC_CTYPE' must be free.  */
1677   lr_ignore_rest (ldfile, 1);
1678
1679
1680   do
1681     {
1682       now = lr_token (ldfile, charmap, NULL);
1683       nowtok = now->tok;
1684     }
1685   while (nowtok == tok_eol);
1686
1687   /* If we see `copy' now we are almost done.  */
1688   if (nowtok == tok_copy)
1689     {
1690       handle_copy (ldfile, charmap, repertoire, result, tok_lc_ctype, LC_CTYPE,
1691                    "LC_CTYPE", ignore_content);
1692       return;
1693     }
1694
1695   /* Prepare the data structures.  */
1696   ctype_startup (ldfile, result, charmap, ignore_content);
1697   ctype = result->categories[LC_CTYPE].ctype;
1698
1699   /* Remember the repertoire we use.  */
1700   if (!ignore_content)
1701     ctype->repertoire = repertoire;
1702
1703   while (1)
1704     {
1705       unsigned long int class_bit = 0;
1706       unsigned long int class256_bit = 0;
1707       int handle_digits = 0;
1708
1709       /* Of course we don't proceed beyond the end of file.  */
1710       if (nowtok == tok_eof)
1711         break;
1712
1713       /* Ingore empty lines.  */
1714       if (nowtok == tok_eol)
1715         {
1716           now = lr_token (ldfile, charmap, NULL);
1717           nowtok = now->tok;
1718           continue;
1719         }
1720
1721       switch (nowtok)
1722         {
1723         case tok_charclass:
1724           now = lr_token (ldfile, charmap, NULL);
1725           while (now->tok == tok_ident || now->tok == tok_string)
1726             {
1727               ctype_class_new (ldfile, ctype, now->val.str.startmb);
1728               now = lr_token (ldfile, charmap, NULL);
1729               if (now->tok != tok_semicolon)
1730                 break;
1731               now = lr_token (ldfile, charmap, NULL);
1732             }
1733           if (now->tok != tok_eol)
1734             SYNTAX_ERROR (_("\
1735 %s: syntax error in definition of new character class"), "LC_CTYPE");
1736           break;
1737
1738         case tok_charconv:
1739           now = lr_token (ldfile, charmap, NULL);
1740           while (now->tok == tok_ident || now->tok == tok_string)
1741             {
1742               ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
1743               now = lr_token (ldfile, charmap, NULL);
1744               if (now->tok != tok_semicolon)
1745                 break;
1746               now = lr_token (ldfile, charmap, NULL);
1747             }
1748           if (now->tok != tok_eol)
1749             SYNTAX_ERROR (_("\
1750 %s: syntax error in definition of new character map"), "LC_CTYPE");
1751           break;
1752
1753         case tok_class:
1754           /* Ignore the rest of the line if we don't need the input of
1755              this line.  */
1756           if (ignore_content)
1757             {
1758               lr_ignore_rest (ldfile, 0);
1759               break;
1760             }
1761
1762           /* We simply forget the `class' keyword and use the following
1763              operand to determine the bit.  */
1764           now = lr_token (ldfile, charmap, NULL);
1765           if (now->tok == tok_ident || now->tok == tok_string)
1766             {
1767               /* Must can be one of the predefined class names.  */
1768               for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
1769                 if (strcmp (ctype->classnames[cnt], now->val.str.startmb) == 0)
1770                   break;
1771               if (cnt >= ctype->nr_charclass)
1772                 {
1773 #ifdef PREDEFINED_CLASSES
1774                   if (now->val.str.lenmb == 8
1775                       && memcmp ("special1", now->val.str.startmb, 8) == 0)
1776                     class_bit = _ISwspecial1;
1777                   else if (now->val.str.lenmb == 8
1778                       && memcmp ("special2", now->val.str.startmb, 8) == 0)
1779                     class_bit = _ISwspecial2;
1780                   else if (now->val.str.lenmb == 8
1781                       && memcmp ("special3", now->val.str.startmb, 8) == 0)
1782                     class_bit = _ISwspecial3;
1783                   else
1784 #endif
1785                     {
1786                       /* OK, it's a new class.  */
1787                       ctype_class_new (ldfile, ctype, now->val.str.startmb);
1788
1789                       class_bit = _ISwbit (ctype->nr_charclass - 1);
1790                     }
1791                 }
1792               else
1793                 class_bit = _ISwbit (cnt);
1794
1795               free (now->val.str.startmb);
1796             }
1797           else if (now->tok == tok_digit)
1798             goto handle_tok_digit;
1799           else if (now->tok < tok_upper || now->tok > tok_blank)
1800             goto err_label;
1801           else
1802             {
1803               class_bit = BITw (now->tok);
1804               class256_bit = BIT (now->tok);
1805             }
1806
1807           /* The next character must be a semicolon.  */
1808           now = lr_token (ldfile, charmap, NULL);
1809           if (now->tok != tok_semicolon)
1810             goto err_label;
1811           goto read_charclass;
1812
1813         case tok_upper:
1814         case tok_lower:
1815         case tok_alpha:
1816         case tok_alnum:
1817         case tok_space:
1818         case tok_cntrl:
1819         case tok_punct:
1820         case tok_graph:
1821         case tok_print:
1822         case tok_xdigit:
1823         case tok_blank:
1824           /* Ignore the rest of the line if we don't need the input of
1825              this line.  */
1826           if (ignore_content)
1827             {
1828               lr_ignore_rest (ldfile, 0);
1829               break;
1830             }
1831
1832           class_bit = BITw (now->tok);
1833           class256_bit = BIT (now->tok);
1834           handle_digits = 0;
1835         read_charclass:
1836           ctype->class_done |= class_bit;
1837           last_token = tok_none;
1838           ellipsis_token = tok_none;
1839           now = lr_token (ldfile, charmap, NULL);
1840           while (now->tok != tok_eol && now->tok != tok_eof)
1841             {
1842               uint32_t wch;
1843               struct charseq *seq;
1844
1845               if (ellipsis_token == tok_none)
1846                 {
1847                   if (get_character (now, charmap, repertoire, &seq, &wch))
1848                     goto err_label;
1849
1850                   if (!ignore_content && seq != NULL && seq->nbytes == 1)
1851                     /* Yep, we can store information about this byte
1852                        sequence.  */
1853                     ctype->class256_collection[seq->bytes[0]] |= class256_bit;
1854
1855                   if (!ignore_content && wch != ILLEGAL_CHAR_VALUE
1856                       && class_bit != 0)
1857                     /* We have the UCS4 position.  */
1858                     *find_idx (ctype, &ctype->class_collection,
1859                                &ctype->class_collection_max,
1860                                &ctype->class_collection_act, wch) |= class_bit;
1861
1862                   last_token = now->tok;
1863                   /* Terminate the string.  */
1864                   if (last_token == tok_bsymbol)
1865                     {
1866                       now->val.str.startmb[now->val.str.lenmb] = '\0';
1867                       last_str = now->val.str.startmb;
1868                     }
1869                   else
1870                     last_str = NULL;
1871                   last_seq = seq;
1872                   last_wch = wch;
1873                   memcpy (last_charcode, now->val.charcode.bytes, 16);
1874                   last_charcode_len = now->val.charcode.nbytes;
1875
1876                   if (!ignore_content && handle_digits == 1)
1877                     {
1878                       /* We must store the digit values.  */
1879                       if (ctype->mbdigits_act == ctype->mbdigits_max)
1880                         {
1881                           ctype->mbdigits_max += 10;
1882                           ctype->mbdigits = xrealloc (ctype->mbdigits,
1883                                                       (ctype->mbdigits_max
1884                                                        * sizeof (char *)));
1885                           ctype->wcdigits_max += 10;
1886                           ctype->wcdigits = xrealloc (ctype->wcdigits,
1887                                                       (ctype->wcdigits_max
1888                                                        * sizeof (uint32_t)));
1889                         }
1890
1891                       ctype->mbdigits[ctype->mbdigits_act++] = seq;
1892                       ctype->wcdigits[ctype->wcdigits_act++] = wch;
1893                     }
1894                   else if (!ignore_content && handle_digits == 2)
1895                     {
1896                       /* We must store the digit values.  */
1897                       if (ctype->outdigits_act >= 10)
1898                         {
1899                           lr_error (ldfile, _("\
1900 %s: field `%s' does not contain exactly ten entries"),
1901                             "LC_CTYPE", "outdigit");
1902                           goto err_label;
1903                         }
1904
1905                       ctype->mboutdigits[ctype->outdigits_act] = seq;
1906                       ctype->wcoutdigits[ctype->outdigits_act] = wch;
1907                       ++ctype->outdigits_act;
1908                     }
1909                 }
1910               else
1911                 {
1912                   /* Now it gets complicated.  We have to resolve the
1913                      ellipsis problem.  First we must distinguish between
1914                      the different kind of ellipsis and this must match the
1915                      tokens we have seen.  */
1916                   assert (last_token != tok_none);
1917
1918                   if (last_token != now->tok)
1919                     {
1920                       lr_error (ldfile, _("\
1921 ellipsis range must be marked by two operands of same type"));
1922                       lr_ignore_rest (ldfile, 0);
1923                       break;
1924                     }
1925
1926                   if (last_token == tok_bsymbol)
1927                     {
1928                       if (ellipsis_token == tok_ellipsis3)
1929                         lr_error (ldfile, _("with symbolic name range values \
1930 the absolute ellipsis `...' must not be used"));
1931
1932                       charclass_symbolic_ellipsis (ldfile, ctype, charmap,
1933                                                    repertoire, now, last_str,
1934                                                    class256_bit, class_bit,
1935                                                    (ellipsis_token
1936                                                     == tok_ellipsis4
1937                                                     ? 10 : 16),
1938                                                    ignore_content,
1939                                                    handle_digits);
1940                     }
1941                   else if (last_token == tok_ucs4)
1942                     {
1943                       if (ellipsis_token != tok_ellipsis2)
1944                         lr_error (ldfile, _("\
1945 with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
1946
1947                       charclass_ucs4_ellipsis (ldfile, ctype, charmap,
1948                                                repertoire, now, last_wch,
1949                                                class256_bit, class_bit,
1950                                                ignore_content, handle_digits);
1951                     }
1952                   else
1953                     {
1954                       assert (last_token == tok_charcode);
1955
1956                       if (ellipsis_token != tok_ellipsis3)
1957                         lr_error (ldfile, _("\
1958 with character code range values one must use the absolute ellipsis `...'"));
1959
1960                       charclass_charcode_ellipsis (ldfile, ctype, charmap,
1961                                                    repertoire, now,
1962                                                    last_charcode,
1963                                                    last_charcode_len,
1964                                                    class256_bit, class_bit,
1965                                                    ignore_content,
1966                                                    handle_digits);
1967                     }
1968
1969                   /* Now we have used the last value.  */
1970                   last_token = tok_none;
1971                 }
1972
1973               /* Next we expect a semicolon or the end of the line.  */
1974               now = lr_token (ldfile, charmap, NULL);
1975               if (now->tok == tok_eol || now->tok == tok_eof)
1976                 break;
1977
1978               if (last_token != tok_none
1979                   && now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4)
1980                 {
1981                   ellipsis_token = now->tok;
1982                   now = lr_token (ldfile, charmap, NULL);
1983                   continue;
1984                 }
1985
1986               if (now->tok != tok_semicolon)
1987                 goto err_label;
1988
1989               /* And get the next character.  */
1990               now = lr_token (ldfile, charmap, NULL);
1991
1992               ellipsis_token = tok_none;
1993             }
1994           break;
1995
1996         case tok_digit:
1997           /* Ignore the rest of the line if we don't need the input of
1998              this line.  */
1999           if (ignore_content)
2000             {
2001               lr_ignore_rest (ldfile, 0);
2002               break;
2003             }
2004
2005         handle_tok_digit:
2006           class_bit = _ISwdigit;
2007           class256_bit = _ISdigit;
2008           handle_digits = 1;
2009           goto read_charclass;
2010
2011         case tok_outdigit:
2012           /* Ignore the rest of the line if we don't need the input of
2013              this line.  */
2014           if (ignore_content)
2015             {
2016               lr_ignore_rest (ldfile, 0);
2017               break;
2018             }
2019
2020           if (ctype->outdigits_act != 0)
2021             lr_error (ldfile, _("\
2022 %s: field `%s' declared more than once"),
2023                       "LC_CTYPE", "outdigit");
2024           class_bit = 0;
2025           class256_bit = 0;
2026           handle_digits = 2;
2027           goto read_charclass;
2028
2029         case tok_toupper:
2030           /* Ignore the rest of the line if we don't need the input of
2031              this line.  */
2032           if (ignore_content)
2033             {
2034               lr_ignore_rest (ldfile, 0);
2035               break;
2036             }
2037
2038           mapidx = 0;
2039           goto read_mapping;
2040
2041         case tok_tolower:
2042           /* Ignore the rest of the line if we don't need the input of
2043              this line.  */
2044           if (ignore_content)
2045             {
2046               lr_ignore_rest (ldfile, 0);
2047               break;
2048             }
2049
2050           mapidx = 1;
2051           goto read_mapping;
2052
2053         case tok_map:
2054           /* Ignore the rest of the line if we don't need the input of
2055              this line.  */
2056           if (ignore_content)
2057             {
2058               lr_ignore_rest (ldfile, 0);
2059               break;
2060             }
2061
2062           /* We simply forget the `map' keyword and use the following
2063              operand to determine the mapping.  */
2064           now = lr_token (ldfile, charmap, NULL);
2065           if (now->tok == tok_ident || now->tok == tok_string)
2066             {
2067               size_t cnt;
2068
2069               for (cnt = 2; cnt < ctype->map_collection_nr; ++cnt)
2070                 if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2071                   break;
2072
2073               if (cnt >= ctype->map_collection_nr)
2074                 /* OK, it's a new map.  */
2075                 ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2076
2077               mapidx = cnt;
2078             }
2079           else if (now->tok < tok_toupper || now->tok > tok_tolower)
2080             goto err_label;
2081           else
2082             mapidx = now->tok - tok_toupper;
2083
2084           now = lr_token (ldfile, charmap, NULL);
2085           /* This better should be a semicolon.  */
2086           if (now->tok != tok_semicolon)
2087             goto err_label;
2088
2089         read_mapping:
2090           /* Test whether this mapping was already defined.  */
2091           if (ctype->tomap_done[mapidx])
2092             {
2093               lr_error (ldfile, _("duplicated definition for mapping `%s'"),
2094                         ctype->mapnames[mapidx]);
2095               lr_ignore_rest (ldfile, 0);
2096               break;
2097             }
2098           ctype->tomap_done[mapidx] = 1;
2099
2100           now = lr_token (ldfile, charmap, NULL);
2101           while (now->tok != tok_eol && now->tok != tok_eof)
2102             {
2103               struct charseq *from_seq;
2104               uint32_t from_wch;
2105               struct charseq *to_seq;
2106               uint32_t to_wch;
2107
2108               /* Every pair starts with an opening brace.  */
2109               if (now->tok != tok_open_brace)
2110                 goto err_label;
2111
2112               /* Next comes the from-value.  */
2113               now = lr_token (ldfile, charmap, NULL);
2114               if (get_character (now, charmap, repertoire, &from_seq,
2115                                  &from_wch) != 0)
2116                 goto err_label;
2117
2118               /* The next is a comma.  */
2119               now = lr_token (ldfile, charmap, NULL);
2120               if (now->tok != tok_comma)
2121                 goto err_label;
2122
2123               /* And the other value.  */
2124               now = lr_token (ldfile, charmap, NULL);
2125               if (get_character (now, charmap, repertoire, &to_seq,
2126                                  &to_wch) != 0)
2127                 goto err_label;
2128
2129               /* And the last thing is the closing brace.  */
2130               now = lr_token (ldfile, charmap, NULL);
2131               if (now->tok != tok_close_brace)
2132                 goto err_label;
2133
2134               if (!ignore_content)
2135                 {
2136                   if (mapidx < 2 && from_seq != NULL && to_seq != NULL
2137                       && from_seq->nbytes == 1 && to_seq->nbytes == 1)
2138                     /* We can use this value.  */
2139                     ctype->map256_collection[mapidx][from_seq->bytes[0]]
2140                       = to_seq->bytes[0];
2141
2142                   if (from_wch != ILLEGAL_CHAR_VALUE
2143                       && to_wch != ILLEGAL_CHAR_VALUE)
2144                     /* Both correct values.  */
2145                     *find_idx (ctype, &ctype->map_collection[mapidx],
2146                                &ctype->map_collection_max[mapidx],
2147                                &ctype->map_collection_act[mapidx],
2148                                from_wch) = to_wch;
2149                 }
2150
2151               /* Now comes a semicolon or the end of the line/file.  */
2152               now = lr_token (ldfile, charmap, NULL);
2153               if (now->tok == tok_semicolon)
2154                 now = lr_token (ldfile, charmap, NULL);
2155             }
2156           break;
2157
2158         case tok_translit_start:
2159           /* Ignore the rest of the line if we don't need the input of
2160              this line.  */
2161           if (ignore_content)
2162             {
2163               lr_ignore_rest (ldfile, 0);
2164               break;
2165             }
2166
2167           /* The rest of the line better should be empty.  */
2168           lr_ignore_rest (ldfile, 1);
2169
2170           /* We count here the number of allocated entries in the `translit'
2171              array.  */
2172           cnt = 0;
2173
2174           /* We proceed until we see the `translit_end' token.  */
2175           while (now = lr_token (ldfile, charmap, repertoire),
2176                  now->tok != tok_translit_end && now->tok != tok_eof)
2177             {
2178               if (now->tok == tok_eol)
2179                 /* Ignore empty lines.  */
2180                 continue;
2181
2182               if (now->tok == tok_translit_end)
2183                 {
2184                   lr_ignore_rest (ldfile, 0);
2185                   break;
2186                 }
2187
2188               if (now->tok == tok_include)
2189                 {
2190                   /* We have to include locale.  */
2191                   const char *locale_name;
2192                   const char *repertoire_name;
2193
2194                   now = lr_token (ldfile, charmap, NULL);
2195                   /* This should be a string or an identifier.  In any
2196                      case something to name a locale.  */
2197                   if (now->tok != tok_string && now->tok != tok_ident)
2198                     {
2199                     translit_syntax:
2200                       lr_error (ldfile, _("%s: syntax error"), "LC_CTYPE");
2201                       lr_ignore_rest (ldfile, 0);
2202                       continue;
2203                     }
2204                   locale_name = now->val.str.startmb;
2205
2206                   /* Next should be a semicolon.  */
2207                   now = lr_token (ldfile, charmap, NULL);
2208                   if (now->tok != tok_semicolon)
2209                     goto translit_syntax;
2210
2211                   /* Now the repertoire name.  */
2212                   now = lr_token (ldfile, charmap, NULL);
2213                   if ((now->tok != tok_string && now->tok != tok_ident)
2214                       || now->val.str.startmb == NULL)
2215                     goto translit_syntax;
2216                   repertoire_name = now->val.str.startmb;
2217
2218                   /* We must not have more than one `include'.  */
2219                   if (ctype->translit_copy_locale != NULL)
2220                     {
2221                       lr_error (ldfile, _("\
2222 %s: only one `include' instruction allowed"), "LC_CTYPE");
2223                       lr_ignore_rest (ldfile, 0);
2224                       continue;
2225                     }
2226
2227                   ctype->translit_copy_locale = locale_name;
2228                   ctype->translit_copy_repertoire = repertoire_name;
2229
2230                   /* The rest of the line must be empty.  */
2231                   lr_ignore_rest (ldfile, 1);
2232                   continue;
2233                 }
2234
2235               read_translit_entry (ldfile, ctype, now, charmap, repertoire);
2236             }
2237           break;
2238
2239         case tok_ident:
2240           /* Ignore the rest of the line if we don't need the input of
2241              this line.  */
2242           if (ignore_content)
2243             {
2244               lr_ignore_rest (ldfile, 0);
2245               break;
2246             }
2247
2248           /* This could mean one of several things.  First test whether
2249              it's a character class name.  */
2250           for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2251             if (strcmp (now->val.str.startmb, ctype->classnames[cnt]) == 0)
2252               break;
2253           if (cnt < ctype->nr_charclass)
2254             {
2255               class_bit = _ISwbit (cnt);
2256               class256_bit = cnt <= 11 ? _ISbit (cnt) : 0;
2257               free (now->val.str.startmb);
2258               goto read_charclass;
2259             }
2260           for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
2261             if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2262               break;
2263           if (cnt < ctype->map_collection_nr)
2264             {
2265               mapidx = cnt;
2266               free (now->val.str.startmb);
2267               goto read_mapping;
2268             }
2269 #ifdef PREDEFINED_CLASSES
2270           if (strcmp (now->val.str.startmb, "special1") == 0)
2271             {
2272               class_bit = _ISwspecial1;
2273               free (now->val.str.startmb);
2274               goto read_charclass;
2275             }
2276           if (strcmp (now->val.str.startmb, "special2") == 0)
2277             {
2278               class_bit = _ISwspecial2;
2279               free (now->val.str.startmb);
2280               goto read_charclass;
2281             }
2282           if (strcmp (now->val.str.startmb, "special3") == 0)
2283             {
2284               class_bit = _ISwspecial3;
2285               free (now->val.str.startmb);
2286               goto read_charclass;
2287             }
2288           if (strcmp (now->val.str.startmb, "tosymmetric") == 0)
2289             {
2290               mapidx = 2;
2291               goto read_mapping;
2292             }
2293 #endif
2294           break;
2295
2296         case tok_end:
2297           /* Next we assume `LC_CTYPE'.  */
2298           now = lr_token (ldfile, charmap, NULL);
2299           if (now->tok == tok_eof)
2300             break;
2301           if (now->tok == tok_eol)
2302             lr_error (ldfile, _("%s: incomplete `END' line"),
2303                       "LC_CTYPE");
2304           else if (now->tok != tok_lc_ctype)
2305             lr_error (ldfile, _("\
2306 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2307           lr_ignore_rest (ldfile, now->tok == tok_lc_ctype);
2308           return;
2309
2310         default:
2311         err_label:
2312           if (now->tok != tok_eof)
2313             SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2314         }
2315
2316       /* Prepare for the next round.  */
2317       now = lr_token (ldfile, charmap, NULL);
2318       nowtok = now->tok;
2319     }
2320
2321   /* When we come here we reached the end of the file.  */
2322   lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
2323 }
2324
2325
2326 static void
2327 set_class_defaults (struct locale_ctype_t *ctype, struct charmap_t *charmap,
2328                     struct repertoire_t *repertoire)
2329 {
2330   size_t cnt;
2331
2332   /* These function defines the default values for the classes and conversions
2333      according to POSIX.2 2.5.2.1.
2334      It may seem that the order of these if-blocks is arbitrary but it is NOT.
2335      Don't move them unless you know what you do!  */
2336
2337   void set_default (int bitpos, int from, int to)
2338     {
2339       char tmp[2];
2340       int ch;
2341       int bit = _ISbit (bitpos);
2342       int bitw = _ISwbit (bitpos);
2343       /* Define string.  */
2344       strcpy (tmp, "?");
2345
2346       for (ch = from; ch <= to; ++ch)
2347         {
2348           uint32_t value;
2349           struct charseq *seq;
2350           tmp[0] = ch;
2351
2352           value = repertoire_find_value (repertoire, tmp, 1);
2353           if (value == ILLEGAL_CHAR_VALUE)
2354             {
2355               if (!be_quiet)
2356                 error (0, 0, _("\
2357 %s: character `%s' not defined in repertoire while needed as default value"),
2358                        "LC_CTYPE", tmp);
2359             }
2360           else
2361             ELEM (ctype, class_collection, , value) |= bitw;
2362
2363           seq = charmap_find_value (charmap, tmp, 1);
2364           if (seq == NULL)
2365             {
2366               if (!be_quiet)
2367                 error (0, 0, _("\
2368 %s: character `%s' not defined in charmap while needed as default value"),
2369                        "LC_CTYPE", tmp);
2370             }
2371           else if (seq->nbytes != 1)
2372             error (0, 0, _("\
2373 %s: character `%s' in charmap not representable with one byte"),
2374                    "LC_CTYPE", tmp);
2375           else
2376             ctype->class256_collection[seq->bytes[0]] |= bit;
2377         }
2378     }
2379
2380   /* Set default values if keyword was not present.  */
2381   if ((ctype->class_done & BITw (tok_upper)) == 0)
2382     /* "If this keyword [lower] is not specified, the lowercase letters
2383         `A' through `Z', ..., shall automatically belong to this class,
2384         with implementation defined character values."  [P1003.2, 2.5.2.1]  */
2385     set_default (BITPOS (tok_upper), 'A', 'Z');
2386
2387   if ((ctype->class_done & BITw (tok_lower)) == 0)
2388     /* "If this keyword [lower] is not specified, the lowercase letters
2389         `a' through `z', ..., shall automatically belong to this class,
2390         with implementation defined character values."  [P1003.2, 2.5.2.1]  */
2391     set_default (BITPOS (tok_lower), 'a', 'z');
2392
2393   if ((ctype->class_done & BITw (tok_alpha)) == 0)
2394     {
2395       /* Table 2-6 in P1003.2 says that characters in class `upper' or
2396          class `lower' *must* be in class `alpha'.  */
2397       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
2398       unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower);
2399
2400       for (cnt = 0; cnt < 256; ++cnt)
2401         if ((ctype->class256_collection[cnt] & mask) != 0)
2402           ctype->class256_collection[cnt] |= BIT (tok_alpha);
2403
2404       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
2405         if ((ctype->class_collection[cnt] & maskw) != 0)
2406           ctype->class_collection[cnt] |= BITw (tok_alpha);
2407     }
2408
2409   if ((ctype->class_done & BITw (tok_digit)) == 0)
2410     /* "If this keyword [digit] is not specified, the digits `0' through
2411         `9', ..., shall automatically belong to this class, with
2412         implementation-defined character values."  [P1003.2, 2.5.2.1]  */
2413     set_default (BITPOS (tok_digit), '0', '9');
2414
2415   /* "Only characters specified for the `alpha' and `digit' keyword
2416      shall be specified.  Characters specified for the keyword `alpha'
2417      and `digit' are automatically included in this class.  */
2418   {
2419     unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
2420     unsigned long int maskw = BITw (tok_alpha) | BITw (tok_digit);
2421
2422     for (cnt = 0; cnt < 256; ++cnt)
2423       if ((ctype->class256_collection[cnt] & mask) != 0)
2424         ctype->class256_collection[cnt] |= BIT (tok_alnum);
2425
2426     for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
2427       if ((ctype->class_collection[cnt] & maskw) != 0)
2428         ctype->class_collection[cnt] |= BITw (tok_alnum);
2429   }
2430
2431   if ((ctype->class_done & BITw (tok_space)) == 0)
2432     /* "If this keyword [space] is not specified, the characters <space>,
2433         <form-feed>, <newline>, <carriage-return>, <tab>, and
2434         <vertical-tab>, ..., shall automatically belong to this class,
2435         with implementation-defined character values."  [P1003.2, 2.5.2.1]  */
2436     {
2437       uint32_t value;
2438       struct charseq *seq;
2439
2440       value = repertoire_find_value (repertoire, "space", 5);
2441       if (value == ILLEGAL_CHAR_VALUE)
2442         {
2443           if (!be_quiet)
2444             error (0, 0, _("\
2445 %s: character `%s' not defined while needed as default value"),
2446                    "LC_CTYPE", "<space>");
2447         }
2448       else
2449         ELEM (ctype, class_collection, , value) |= BIT (tok_space);
2450
2451       seq = charmap_find_value (charmap, "space", 5);
2452       if (seq == NULL)
2453         {
2454           if (!be_quiet)
2455             error (0, 0, _("\
2456 %s: character `%s' not defined while needed as default value"),
2457                    "LC_CTYPE", "<space>");
2458         }
2459       else if (seq->nbytes != 1)
2460         error (0, 0, _("\
2461 %s: character `%s' in charmap not representable with one byte"),
2462                "LC_CTYPE", "<space>");
2463       else
2464         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2465
2466
2467       value = repertoire_find_value (repertoire, "form-feed", 9);
2468       if (value == ILLEGAL_CHAR_VALUE)
2469         {
2470           if (!be_quiet)
2471             error (0, 0, _("\
2472 %s: character `%s' not defined while needed as default value"),
2473                    "LC_CTYPE", "<form-feed>");
2474         }
2475       else
2476         ELEM (ctype, class_collection, , value) |= BIT (tok_space);
2477
2478       seq = charmap_find_value (charmap, "form-feed", 9);
2479       if (seq == NULL)
2480         {
2481           if (!be_quiet)
2482             error (0, 0, _("\
2483 %s: character `%s' not defined while needed as default value"),
2484                    "LC_CTYPE", "<form-feed>");
2485         }
2486       else if (seq->nbytes != 1)
2487         error (0, 0, _("\
2488 %s: character `%s' in charmap not representable with one byte"),
2489                "LC_CTYPE", "<form-feed>");
2490       else
2491         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2492
2493
2494       value = repertoire_find_value (repertoire, "newline", 7);
2495       if (value == ILLEGAL_CHAR_VALUE)
2496         {
2497           if (!be_quiet)
2498             error (0, 0, _("\
2499 %s: character `%s' not defined while needed as default value"),
2500                    "LC_CTYPE", "<newline>");
2501         }
2502       else
2503         ELEM (ctype, class_collection, , value) |= BIT (tok_space);
2504
2505       seq = charmap_find_value (charmap, "newline", 7);
2506       if (seq == NULL)
2507         {
2508           if (!be_quiet)
2509             error (0, 0, _("\
2510 character `%s' not defined while needed as default value"),
2511                    "<newline>");
2512         }
2513       else if (seq->nbytes != 1)
2514         error (0, 0, _("\
2515 %s: character `%s' in charmap not representable with one byte"),
2516                "LC_CTYPE", "<newline>");
2517       else
2518         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2519
2520
2521       value = repertoire_find_value (repertoire, "carriage-return", 15);
2522       if (value == ILLEGAL_CHAR_VALUE)
2523         {
2524           if (!be_quiet)
2525             error (0, 0, _("\
2526 %s: character `%s' not defined while needed as default value"),
2527                    "LC_CTYPE", "<carriage-return>");
2528         }
2529       else
2530         ELEM (ctype, class_collection, , value) |= BIT (tok_space);
2531
2532       seq = charmap_find_value (charmap, "carriage-return", 15);
2533       if (seq == NULL)
2534         {
2535           if (!be_quiet)
2536             error (0, 0, _("\
2537 %s: character `%s' not defined while needed as default value"),
2538                    "LC_CTYPE", "<carriage-return>");
2539         }
2540       else if (seq->nbytes != 1)
2541         error (0, 0, _("\
2542 %s: character `%s' in charmap not representable with one byte"),
2543                "LC_CTYPE", "<carriage-return>");
2544       else
2545         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2546
2547
2548       value = repertoire_find_value (repertoire, "tab", 3);
2549       if (value == ILLEGAL_CHAR_VALUE)
2550         {
2551           if (!be_quiet)
2552             error (0, 0, _("\
2553 %s: character `%s' not defined while needed as default value"),
2554                    "LC_CTYPE", "<tab>");
2555         }
2556       else
2557         ELEM (ctype, class_collection, , value) |= BIT (tok_space);
2558
2559       seq = charmap_find_value (charmap, "tab", 3);
2560       if (seq == NULL)
2561         {
2562           if (!be_quiet)
2563             error (0, 0, _("\
2564 %s: character `%s' not defined while needed as default value"),
2565                    "LC_CTYPE", "<tab>");
2566         }
2567       else if (seq->nbytes != 1)
2568         error (0, 0, _("\
2569 %s: character `%s' in charmap not representable with one byte"),
2570                "LC_CTYPE", "<tab>");
2571       else
2572         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2573
2574
2575       value = repertoire_find_value (repertoire, "vertical-tab", 12);
2576       if (value == ILLEGAL_CHAR_VALUE)
2577         {
2578           if (!be_quiet)
2579             error (0, 0, _("\
2580 %s: character `%s' not defined while needed as default value"),
2581                    "LC_CTYPE", "<vertical-tab>");
2582         }
2583       else
2584         ELEM (ctype, class_collection, , value) |= BIT (tok_space);
2585
2586       seq = charmap_find_value (charmap, "vertical-tab", 12);
2587       if (seq == NULL)
2588         {
2589           if (!be_quiet)
2590             error (0, 0, _("\
2591 %s: character `%s' not defined while needed as default value"),
2592                    "LC_CTYPE", "<vertical-tab>");
2593         }
2594       else if (seq->nbytes != 1)
2595         error (0, 0, _("\
2596 %s: character `%s' in charmap not representable with one byte"),
2597                "LC_CTYPE", "<vertical-tab>");
2598       else
2599         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2600     }
2601
2602   if ((ctype->class_done & BITw (tok_xdigit)) == 0)
2603     /* "If this keyword is not specified, the digits `0' to `9', the
2604         uppercase letters `A' through `F', and the lowercase letters `a'
2605         through `f', ..., shell automatically belong to this class, with
2606         implementation defined character values."  [P1003.2, 2.5.2.1]  */
2607     {
2608       set_default (BITPOS (tok_xdigit), '0', '9');
2609       set_default (BITPOS (tok_xdigit), 'A', 'F');
2610       set_default (BITPOS (tok_xdigit), 'a', 'f');
2611     }
2612
2613   if ((ctype->class_done & BITw (tok_blank)) == 0)
2614     /* "If this keyword [blank] is unspecified, the characters <space> and
2615        <tab> shall belong to this character class."  [P1003.2, 2.5.2.1]  */
2616    {
2617       uint32_t value;
2618       struct charseq *seq;
2619
2620       value = repertoire_find_value (repertoire, "space", 5);
2621       if (value == ILLEGAL_CHAR_VALUE)
2622         {
2623           if (!be_quiet)
2624             error (0, 0, _("\
2625 %s: character `%s' not defined while needed as default value"),
2626                    "LC_CTYPE", "<space>");
2627         }
2628       else
2629         ELEM (ctype, class_collection, , value) |= BIT (tok_blank);
2630
2631       seq = charmap_find_value (charmap, "space", 5);
2632       if (seq == NULL)
2633         {
2634           if (!be_quiet)
2635             error (0, 0, _("\
2636 %s: character `%s' not defined while needed as default value"),
2637                    "LC_CTYPE", "<space>");
2638         }
2639       else if (seq->nbytes != 1)
2640         error (0, 0, _("\
2641 %s: character `%s' in charmap not representable with one byte"),
2642                "LC_CTYPE", "<space>");
2643       else
2644         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
2645
2646
2647       value = repertoire_find_value (repertoire, "tab", 3);
2648       if (value == ILLEGAL_CHAR_VALUE)
2649         {
2650           if (!be_quiet)
2651             error (0, 0, _("\
2652 %s: character `%s' not defined while needed as default value"),
2653                    "LC_CTYPE", "<tab>");
2654         }
2655       else
2656         ELEM (ctype, class_collection, , value) |= BIT (tok_blank);
2657
2658       seq = charmap_find_value (charmap, "tab", 3);
2659       if (seq == NULL)
2660         {
2661           if (!be_quiet)
2662             error (0, 0, _("\
2663 %s: character `%s' not defined while needed as default value"),
2664                    "LC_CTYPE", "<tab>");
2665         }
2666       else if (seq->nbytes != 1)
2667         error (0, 0, _("\
2668 %s: character `%s' in charmap not representable with one byte"),
2669                "LC_CTYPE", "<tab>");
2670       else
2671         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
2672     }
2673
2674   if ((ctype->class_done & BITw (tok_graph)) == 0)
2675     /* "If this keyword [graph] is not specified, characters specified for
2676         the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
2677         shall belong to this character class."  [P1003.2, 2.5.2.1]  */
2678     {
2679       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
2680         BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
2681       size_t cnt;
2682
2683       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
2684         if ((ctype->class_collection[cnt] & mask) != 0)
2685           ctype->class_collection[cnt] |= BIT (tok_graph);
2686
2687       for (cnt = 0; cnt < 256; ++cnt)
2688         if ((ctype->class256_collection[cnt] & mask) != 0)
2689           ctype->class256_collection[cnt] |= BIT (tok_graph);
2690     }
2691
2692   if ((ctype->class_done & BITw (tok_print)) == 0)
2693     /* "If this keyword [print] is not provided, characters specified for
2694         the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
2695         and the <space> character shall belong to this character class."
2696         [P1003.2, 2.5.2.1]  */
2697     {
2698       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
2699         BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
2700       size_t cnt;
2701       uint32_t space;
2702       struct charseq *seq;
2703
2704       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
2705         if ((ctype->class_collection[cnt] & mask) != 0)
2706           ctype->class_collection[cnt] |= BIT (tok_print);
2707
2708       for (cnt = 0; cnt < 256; ++cnt)
2709         if ((ctype->class256_collection[cnt] & mask) != 0)
2710           ctype->class256_collection[cnt] |= BIT (tok_print);
2711
2712
2713       space = repertoire_find_value (repertoire, "space", 5);
2714       if (space == ILLEGAL_CHAR_VALUE)
2715         {
2716           if (!be_quiet)
2717             error (0, 0, _("\
2718 %s: character `%s' not defined while needed as default value"),
2719                    "LC_CTYPE", "<space>");
2720         }
2721       else
2722         ELEM (ctype, class_collection, , space) |= BIT (tok_print);
2723
2724       seq = charmap_find_value (charmap, "space", 5);
2725       if (seq == NULL)
2726         {
2727           if (!be_quiet)
2728             error (0, 0, _("\
2729 %s: character `%s' not defined while needed as default value"),
2730                    "LC_CTYPE", "<space>");
2731         }
2732       else if (seq->nbytes != 1)
2733         error (0, 0, _("\
2734 %s: character `%s' in charmap not representable with one byte"),
2735                "LC_CTYPE", "<space>");
2736       else
2737         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_print);
2738     }
2739
2740   if (ctype->tomap_done[0] == 0)
2741     /* "If this keyword [toupper] is not specified, the lowercase letters
2742         `a' through `z', and their corresponding uppercase letters `A' to
2743         `Z', ..., shall automatically be included, with implementation-
2744         defined character values."  [P1003.2, 2.5.2.1]  */
2745     {
2746       char tmp[4];
2747       int ch;
2748
2749       strcpy (tmp, "<?>");
2750
2751       for (ch = 'a'; ch <= 'z'; ++ch)
2752         {
2753           uint32_t value_from, value_to;
2754           struct charseq *seq_from, *seq_to;
2755
2756           tmp[1] = (char) ch;
2757
2758           value_from = repertoire_find_value (repertoire, &tmp[1], 1);
2759           if (value_from == ILLEGAL_CHAR_VALUE)
2760             {
2761               if (!be_quiet)
2762                 error (0, 0, _("\
2763 %s: character `%s' not defined while needed as default value"),
2764                        "LC_CTYPE", tmp);
2765             }
2766           else
2767             {
2768               /* This conversion is implementation defined.  */
2769               tmp[1] = (char) (ch + ('A' - 'a'));
2770               value_to = repertoire_find_value (repertoire, &tmp[1], 1);
2771               if (value_to == ILLEGAL_CHAR_VALUE)
2772                 {
2773                   if (!be_quiet)
2774                     error (0, 0, _("\
2775 %s: character `%s' not defined while needed as default value"),
2776                            "LC_CTYPE", tmp);
2777                 }
2778               else
2779                 /* The index [0] is determined by the order of the
2780                    `ctype_map_newP' calls in `ctype_startup'.  */
2781                 ELEM (ctype, map_collection, [0], value_from) = value_to;
2782             }
2783
2784           seq_from = charmap_find_value (charmap, &tmp[1], 1);
2785           if (seq_from == NULL)
2786             {
2787               if (!be_quiet)
2788                 error (0, 0, _("\
2789 %s: character `%s' not defined while needed as default value"),
2790                        "LC_CTYPE", tmp);
2791             }
2792           else if (seq_from->nbytes != 1)
2793             {
2794               if (!be_quiet)
2795                 error (0, 0, _("\
2796 %s: character `%s' needed as default value not representable with one byte"),
2797                        "LC_CTYPE", tmp);
2798             }
2799           else
2800             {
2801               /* This conversion is implementation defined.  */
2802               tmp[1] = (char) (ch + ('A' - 'a'));
2803               seq_to = charmap_find_value (charmap, &tmp[1], 1);
2804               if (seq_to == NULL)
2805                 {
2806                   if (!be_quiet)
2807                     error (0, 0, _("\
2808 %s: character `%s' not defined while needed as default value"),
2809                            "LC_CTYPE", tmp);
2810                 }
2811               else if (seq_to->nbytes != 1)
2812                 {
2813                   if (!be_quiet)
2814                     error (0, 0, _("\
2815 %s: character `%s' needed as default value not representable with one byte"),
2816                            "LC_CTYPE", tmp);
2817                 }
2818               else
2819                 /* The index [0] is determined by the order of the
2820                    `ctype_map_newP' calls in `ctype_startup'.  */
2821                 ctype->map256_collection[0][seq_from->bytes[0]]
2822                   = seq_to->bytes[0];
2823             }
2824         }
2825     }
2826
2827   if (ctype->tomap_done[1] == 0)
2828     /* "If this keyword [tolower] is not specified, the mapping shall be
2829        the reverse mapping of the one specified to `toupper'."  [P1003.2]  */
2830     {
2831       for (cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
2832         if (ctype->map_collection[0][cnt] != 0)
2833           ELEM (ctype, map_collection, [1],
2834                 ctype->map_collection[0][cnt])
2835             = ctype->charnames[cnt];
2836
2837       for (cnt = 0; cnt < 256; ++cnt)
2838         if (ctype->map256_collection[0][cnt] != 0)
2839           ctype->map_collection[1][ctype->map_collection[0][cnt]]
2840             = ctype->charnames[cnt];
2841     }
2842
2843   if (ctype->outdigits_act == 0)
2844     {
2845       for (cnt = 0; cnt < 10; ++cnt)
2846         {
2847           ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
2848                                                          digits + cnt, 1);
2849
2850           if (ctype->mboutdigits[cnt] == NULL)
2851             {
2852               ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
2853                                                              longnames[cnt],
2854                                                              strlen (longnames[cnt]));
2855
2856               if (ctype->mboutdigits[cnt] == NULL)
2857                 {
2858                   /* Provide a replacement.  */
2859                   error (0, 0, _("\
2860 no output digits defined and none of the standard names in the charmap"));
2861
2862                   ctype->mboutdigits[cnt] = obstack_alloc (&charmap->mem_pool,
2863                                                            sizeof (struct charseq) + 1);
2864
2865                   /* This is better than nothing.  */
2866                   ctype->mboutdigits[cnt]->bytes[0] = digits[cnt];
2867                   ctype->mboutdigits[cnt]->nbytes = 1;
2868                 }
2869             }
2870
2871           ctype->wcoutdigits[cnt] = repertoire_find_value (repertoire,
2872                                                            digits + cnt, 1);
2873
2874           if (ctype->wcoutdigits[cnt] == ILLEGAL_CHAR_VALUE)
2875             {
2876               ctype->wcoutdigits[cnt] = repertoire_find_value (repertoire,
2877                                                                longnames[cnt],
2878                                                                strlen (longnames[cnt]));
2879
2880               if (ctype->wcoutdigits[cnt] == ILLEGAL_CHAR_VALUE)
2881                 {
2882                   /* Provide a replacement.  */
2883                   error (0, 0, _("\
2884 no output digits defined and none of the standard names in the repertoire"));
2885
2886                   /* This is better than nothing.  */
2887                   ctype->wcoutdigits[cnt] = (uint32_t) digits[cnt];
2888                 }
2889             }
2890         }
2891
2892       ctype->outdigits_act = 10;
2893     }
2894 }
2895
2896
2897 static void
2898 allocate_arrays (struct locale_ctype_t *ctype, struct charmap_t *charmap,
2899                  struct repertoire_t *repertoire)
2900 {
2901   size_t idx;
2902
2903   /* First we have to decide how we organize the arrays.  It is easy
2904      for a one-byte character set.  But multi-byte character set
2905      cannot be stored flat because the chars might be sparsely used.
2906      So we determine an optimal hashing function for the used
2907      characters.
2908
2909      We use a very trivial hashing function to store the sparse
2910      table.  CH % TABSIZE is used as an index.  To solve multiple hits
2911      we have N planes.  This guarantees a fixed search time for a
2912      character [N / 2].  In the following code we determine the minimum
2913      value for TABSIZE * N, where TABSIZE >= 256.  */
2914   size_t min_total = UINT_MAX;
2915   size_t act_size = 256;
2916
2917   if (!be_quiet)
2918     fputs (_("\
2919 Computing table size for character classes might take a while..."),
2920            stderr);
2921
2922   while (act_size < min_total)
2923     {
2924       size_t cnt[act_size];
2925       size_t act_planes = 1;
2926
2927       memset (cnt, '\0', sizeof cnt);
2928
2929       for (idx = 0; idx < 256; ++idx)
2930         cnt[idx] = 1;
2931
2932       for (idx = 0; idx < ctype->charnames_act; ++idx)
2933         if (ctype->charnames[idx] >= 256)
2934           {
2935             size_t nr = ctype->charnames[idx] % act_size;
2936
2937             if (++cnt[nr] > act_planes)
2938               {
2939                 act_planes = cnt[nr];
2940                 if (act_size * act_planes >= min_total)
2941                   break;
2942               }
2943           }
2944
2945       if (act_size * act_planes < min_total)
2946         {
2947           min_total = act_size * act_planes;
2948           ctype->plane_size = act_size;
2949           ctype->plane_cnt = act_planes;
2950         }
2951
2952       ++act_size;
2953     }
2954
2955   if (!be_quiet)
2956     fputs (_(" done\n"), stderr);
2957
2958
2959   ctype->names = (uint32_t *) xcalloc (ctype->plane_size
2960                                        * ctype->plane_cnt,
2961                                        sizeof (uint32_t));
2962
2963   for (idx = 1; idx < 256; ++idx)
2964     ctype->names[idx] = idx;
2965
2966   /* Trick: change the 0th entry's name to 1 to mark the cell occupied.  */
2967   ctype->names[0] = 1;
2968
2969   for (idx = 256; idx < ctype->charnames_act; ++idx)
2970     {
2971       size_t nr = (ctype->charnames[idx] % ctype->plane_size);
2972       size_t depth = 0;
2973
2974       while (ctype->names[nr + depth * ctype->plane_size])
2975         ++depth;
2976       assert (depth < ctype->plane_cnt);
2977
2978       ctype->names[nr + depth * ctype->plane_size] = ctype->charnames[idx];
2979
2980       /* Now for faster access remember the index in the NAMES_B array.  */
2981       ctype->charnames[idx] = nr + depth * ctype->plane_size;
2982     }
2983   ctype->names[0] = 0;
2984
2985
2986   /* You wonder about this amount of memory?  This is only because some
2987      users do not manage to address the array with unsigned values or
2988      data types with range >= 256.  '\200' would result in the array
2989      index -128.  To help these poor people we duplicate the entries for
2990      128 up to 255 below the entry for \0.  */
2991   ctype->ctype_b = (char_class_t *) xcalloc (256 + 128,
2992                                              sizeof (char_class_t));
2993   ctype->ctype32_b = (char_class32_t *) xcalloc (ctype->plane_size
2994                                                  * ctype->plane_cnt,
2995                                                  sizeof (char_class32_t));
2996
2997   /* This is the array accessed using the multibyte string elements.  */
2998   for (idx = 0; idx < 256; ++idx)
2999     ctype->ctype_b[128 + idx] = ctype->class256_collection[idx];
3000
3001   /* Mirror first 127 entries.  We must take care that entry -1 is not
3002      mirrored because EOF == -1.  */
3003   for (idx = 0; idx < 127; ++idx)
3004     ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
3005
3006   /* The 32 bit array contains all characters.  */
3007   for (idx = 0; idx < ctype->class_collection_act; ++idx)
3008     ctype->ctype32_b[ctype->charnames[idx]] = ctype->class_collection[idx];
3009
3010   /* Room for table of mappings.  */
3011   ctype->map = (uint32_t **) xmalloc (ctype->map_collection_nr
3012                                       * sizeof (uint32_t *));
3013
3014   /* Fill in all mappings.  */
3015   for (idx = 0; idx < ctype->map_collection_nr; ++idx)
3016     {
3017       unsigned int idx2;
3018
3019       /* Allocate table.  */
3020       ctype->map[idx] = (uint32_t *) xmalloc ((ctype->plane_size
3021                                                * ctype->plane_cnt + 128)
3022                                               * sizeof (uint32_t));
3023
3024       /* Copy default value (identity mapping).  */
3025       memcpy (&ctype->map[idx][128], ctype->names,
3026               ctype->plane_size * ctype->plane_cnt * sizeof (uint32_t));
3027
3028       /* Copy values from collection.  */
3029       for (idx2 = 0; idx2 < 256; ++idx2)
3030         ctype->map[idx][128 + idx2] = ctype->map256_collection[idx][idx2];
3031
3032       /* Mirror first 127 entries.  We must take care not to map entry
3033          -1 because EOF == -1.  */
3034       for (idx2 = 0; idx2 < 127; ++idx2)
3035         ctype->map[idx][idx2] = ctype->map[idx][256 + idx2];
3036
3037       /* EOF must map to EOF.  */
3038       ctype->map[idx][127] = EOF;
3039
3040       /* The 32 bit map collection.  */
3041       for (idx2 = 0; idx2 < ctype->map_collection_act[idx]; ++idx2)
3042         if (ctype->map_collection[idx][idx2] != 0)
3043           ctype->map[idx][128 + ctype->charnames[idx2]]
3044             = ctype->map_collection[idx][idx2];
3045     }
3046
3047   /* Extra array for class and map names.  */
3048   ctype->class_name_ptr = (uint32_t *) xmalloc (ctype->nr_charclass
3049                                                 * sizeof (uint32_t));
3050   ctype->map_name_ptr = (uint32_t *) xmalloc (ctype->map_collection_nr
3051                                               * sizeof (uint32_t));
3052
3053   /* Array for width information.  Because the expected width are very
3054      small we use only one single byte.  This save space and we need
3055      not provide the information twice with both endianesses.  */
3056   ctype->width = (unsigned char *) xmalloc (ctype->plane_size
3057                                             * ctype->plane_cnt);
3058   /* Initialize with default width value.  */
3059   memset (ctype->width, charmap->width_default,
3060           ctype->plane_size * ctype->plane_cnt);
3061   if (charmap->width_rules != NULL)
3062     {
3063       size_t cnt;
3064
3065       for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
3066         {
3067           unsigned char bytes[charmap->mb_cur_max];
3068           int nbytes = charmap->width_rules[cnt].from->nbytes;
3069
3070           /* We have the range of character for which the width is
3071              specified described using byte sequences of the multibyte
3072              charset.  We have to convert this to UCS4 now.  And we
3073              cannot simply convert the beginning and the end of the
3074              sequence, we have to iterate over the byte sequence and
3075              convert it for every single character.  */
3076           memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
3077
3078           while (nbytes < charmap->width_rules[cnt].to->nbytes
3079                  || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
3080                             nbytes) <= 0)
3081             {
3082               /* Find the UCS value for `bytes'.  */
3083               uint32_t wch = repertoire_find_value (ctype->repertoire, bytes,
3084                                                     nbytes);
3085               int inner;
3086
3087               if (wch != ILLEGAL_CHAR_VALUE)
3088                 {
3089                   /* Store the value.  */
3090                   size_t nr = idx % ctype->plane_size;
3091                   size_t depth = 0;
3092
3093                   while (ctype->names[nr + depth * ctype->plane_size] != nr)
3094                     ++depth;
3095                   assert (depth < ctype->plane_cnt);
3096
3097                   ctype->width[nr + depth * ctype->plane_size]
3098                     = charmap->width_rules[cnt].width;
3099                 }
3100
3101               /* "Increment" the bytes sequence.  */
3102               inner = nbytes - 1;
3103               while (inner >= 0 && bytes[inner] == 0xff)
3104                 --inner;
3105
3106               if (inner < 0)
3107                 {
3108                   /* We have to extend the byte sequence.  */
3109                   if (nbytes >= charmap->width_rules[cnt].to->nbytes)
3110                     break;
3111
3112                   bytes[0] = 1;
3113                   memset (&bytes[1], 0, nbytes);
3114                   ++nbytes;
3115                 }
3116               else
3117                 {
3118                   ++bytes[inner];
3119                   while (++inner < nbytes)
3120                     bytes[inner] = 0;
3121                 }
3122             }
3123         }
3124     }
3125
3126   /* Set MB_CUR_MAX.  */
3127   ctype->mb_cur_max = charmap->mb_cur_max;
3128
3129   /* We need the name of the currently used 8-bit character set to
3130      make correct conversion between this 8-bit representation and the
3131      ISO 10646 character set used internally for wide characters.  */
3132   ctype->codeset_name = charmap->code_set_name;
3133
3134   /* Now determine the table for the transliteration information.
3135
3136      XXX It is not yet clear to me whether it is worth implementing a
3137      complicated algorithm which uses a hash table to locate the entries.
3138      For now I'll use a simple array which can be searching using binary
3139      search.  */
3140   if (ctype->translit_copy_locale != NULL)
3141     {
3142       /* Fold in the transliteration information from the locale mentioned
3143          in the `include' statement.  */
3144       struct locale_ctype_t *here = ctype;
3145
3146       do
3147         {
3148           struct localedef_t *other = find_locale (LC_CTYPE,
3149                                                    here->translit_copy_locale,
3150                                                    repertoire->name, charmap);
3151
3152           if (other == NULL)
3153             {
3154               error (0, 0, _("\
3155 %s: transliteration data from locale `%s' not available"),
3156                      "LC_CTYPE", here->translit_copy_locale);
3157               break;
3158             }
3159
3160           here = other->categories[LC_CTYPE].ctype;
3161
3162           /* Enqueue the information if necessary.  */
3163           if (here->translit != NULL)
3164             {
3165               struct translit_t *endp = here->translit;
3166               while (endp->next != NULL)
3167                 endp = endp->next;
3168
3169               endp->next = ctype->translit;
3170               ctype->translit = here->translit;
3171             }
3172         }
3173       while (here->translit_copy_locale != NULL);
3174     }
3175
3176   if (ctype->translit != NULL)
3177     {
3178       /* First count how many entries we have.  This is the upper limit
3179          since some entries from the included files might be overwritten.  */
3180       size_t number = 0;
3181       size_t cnt;
3182       struct translit_t *runp = ctype->translit;
3183       struct translit_t **sorted;
3184       size_t from_len, to_len;
3185
3186       while (runp != NULL)
3187         {
3188           ++number;
3189           runp = runp->next;
3190         }
3191
3192       /* Next we allocate an array large enough and fill in the values.  */
3193       sorted = (struct translit_t **) alloca (number
3194                                               * sizeof (struct translit_t **));
3195       runp = ctype->translit;
3196       number = 0;
3197       do
3198         {
3199           /* Search for the place where to insert this string.
3200              XXX Better use a real sorting algorithm later.  */
3201           size_t idx = 0;
3202           int replace = 0;
3203
3204           while (idx < number)
3205             {
3206               int res = wcscmp ((const wchar_t *) sorted[idx]->from,
3207                                 (const wchar_t *) runp->from);
3208               if (res == 0)
3209                 {
3210                   replace = 1;
3211                   break;
3212                 }
3213               if (res > 0)
3214                 break;
3215               ++idx;
3216             }
3217
3218           if (replace)
3219             sorted[idx] = runp;
3220           else
3221             {
3222               memmove (&sorted[idx + 1], &sorted[idx],
3223                        (number - idx) * sizeof (struct translit_t *));
3224               sorted[idx] = runp;
3225               ++number;
3226             }
3227
3228           runp = runp->next;
3229         }
3230       while (runp != NULL);
3231
3232       /* The next step is putting all the possible transliteration
3233          strings in one memory block so that we can write it out.
3234          We need several different blocks:
3235          - index to the tfromstring array
3236          - from-string array
3237          - index to the to-string array
3238          - to-string array.
3239          And this all must be available for both endianes variants.
3240       */
3241       from_len = to_len = 0;
3242       for (cnt = 0; cnt < number; ++cnt)
3243         {
3244           struct translit_to_t *srunp;
3245           from_len += wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
3246           srunp = sorted[cnt]->to;
3247           while (srunp != NULL)
3248             {
3249               to_len += wcslen ((const wchar_t *) srunp->str) + 1;
3250               srunp = srunp->next;
3251             }
3252           /* Plus one for the extra NUL character marking the end of
3253              the list for the current entry.  */
3254           ++to_len;
3255         }
3256
3257       /* We can allocate the arrays for the results.  */
3258       ctype->translit_from_idx = xmalloc (number * sizeof (uint32_t));
3259       ctype->translit_from_tbl = xmalloc (from_len * sizeof (uint32_t));
3260       ctype->translit_to_idx = xmalloc (number * sizeof (uint32_t));
3261       ctype->translit_to_tbl = xmalloc (to_len * sizeof (uint32_t));
3262
3263       from_len = 0;
3264       to_len = 0;
3265       for (cnt = 0; cnt < number; ++cnt)
3266         {
3267           size_t len;
3268           struct translit_to_t *srunp;
3269
3270           ctype->translit_from_idx[cnt] = from_len;
3271           ctype->translit_to_idx[cnt] = to_len;
3272
3273           len = wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
3274           wmemcpy ((wchar_t *) &ctype->translit_from_tbl[from_len],
3275                    (const wchar_t *) sorted[cnt]->from, len);
3276           from_len += len;
3277
3278           ctype->translit_to_idx[cnt] = to_len;
3279           srunp = sorted[cnt]->to;
3280           while (srunp != NULL)
3281             {
3282               len = wcslen ((const wchar_t *) srunp->str) + 1;
3283               wmemcpy ((wchar_t *) &ctype->translit_to_tbl[to_len],
3284                        (const wchar_t *) srunp->str, len);
3285               to_len += len;
3286               srunp = srunp->next;
3287             }
3288           ctype->translit_to_tbl[to_len++] = L'\0';
3289         }
3290
3291       /* Store the information about the length.  */
3292       ctype->translit_idx_size = number * sizeof (uint32_t);
3293       ctype->translit_from_tbl_size = from_len * sizeof (uint32_t);
3294       ctype->translit_to_tbl_size = to_len * sizeof (uint32_t);
3295     }
3296   else
3297     {
3298       /* Provide some dummy pointers since we have nothing to write out.  */
3299       static uint32_t no_str = { 0 };
3300
3301       ctype->translit_from_idx = &no_str;
3302       ctype->translit_from_tbl = &no_str;
3303       ctype->translit_to_tbl = &no_str;
3304       ctype->translit_idx_size = 0;
3305       ctype->translit_from_tbl_size = 0;
3306       ctype->translit_to_tbl_size = 0;
3307     }
3308 }