locale/programs/ld-ctype.c

   1 /* Copyright (C) 1995, 1996, 1997, 1998, 1999 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3    Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Library General Public License as
   7    published by the Free Software Foundation; either version 2 of the
   8    License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Library General Public License for more details.
  14
  15    You should have received a copy of the GNU Library General Public
  16    License along with the GNU C Library; see the file COPYING.LIB.  If not,
  17    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  18    Boston, MA 02111-1307, USA.  */
  19
  20 #ifdef HAVE_CONFIG_H
  21 # include <config.h>
  22 #endif
  23
  24 #include <alloca.h>
  25 #include <byteswap.h>
  26 #include <endian.h>
  27 #include <errno.h>
  28 #include <limits.h>
  29 #include <obstack.h>
  30 #include <stdlib.h>
  31 #include <string.h>
  32 #include <wchar.h>
  33 #include <wctype.h>
  34 #include <sys/uio.h>
  35
  36 #include "charmap.h"
  37 #include "localeinfo.h"
  38 #include "langinfo.h"
  39 #include "linereader.h"
  40 #include "locfile-token.h"
  41 #include "locfile.h"
  42 #include "localedef.h"
  43
  44 #include <assert.h>
  45
  46
  47 #ifdef PREDEFINED_CLASSES
  48 /* These are the extra bits not in wctype.h since these are not preallocated
  49    classes.  */
  50 # define _ISwspecial1   (1 << 29)
  51 # define _ISwspecial2   (1 << 30)
  52 # define _ISwspecial3   (1 << 31)
  53 #endif
  54
  55
  56 /* The bit used for representing a special class.  */
  57 #define BITPOS(class) ((class) - tok_upper)
  58 #define BIT(class) (_ISbit (BITPOS (class)))
  59 #define BITw(class) (_ISwbit (BITPOS (class)))
  60
  61 #define ELEM(ctype, collection, idx, value)                                   \
  62   *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx,     \
  63              &ctype->collection##_act idx, value)
  64
  65
  66 /* To be compatible with former implementations we for now restrict
  67    the number of bits for character classes to 16.  When compatibility
  68    is not necessary anymore increase the number to 32.  */
  69 #define char_class_t uint16_t
  70 #define char_class32_t uint32_t
  71
  72
  73 /* Type to describe a transliteration action.  We have a possibly
  74    multiple character from-string and a set of multiple character
  75    to-strings.  All are 32bit values since this is what is used in
  76    the gconv functions.  */
  77 struct translit_to_t
  78 {
  79   uint32_t *str;
  80
  81   struct translit_to_t *next;
  82 };
  83
  84 struct translit_t
  85 {
  86   uint32_t *from;
  87
  88   struct translit_to_t *to;
  89
  90   struct translit_t *next;
  91 };
  92
  93
  94 /* The real definition of the struct for the LC_CTYPE locale.  */
  95 struct locale_ctype_t
  96 {
  97   uint32_t *charnames;
  98   size_t charnames_max;
  99   size_t charnames_act;
 100
 101   struct repertoire_t *repertoire;
 102
 103   /* We will allow up to 8 * sizeof (uint32_t) character classes.  */
 104 #define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
 105   size_t nr_charclass;
 106   const char *classnames[MAX_NR_CHARCLASS];
 107   uint32_t last_class_char;
 108   uint32_t class256_collection[256];
 109   uint32_t *class_collection;
 110   size_t class_collection_max;
 111   size_t class_collection_act;
 112   uint32_t class_done;
 113
 114   struct charseq **mbdigits;
 115   size_t mbdigits_act;
 116   size_t mbdigits_max;
 117   uint32_t *wcdigits;
 118   size_t wcdigits_act;
 119   size_t wcdigits_max;
 120
 121   struct charseq *mboutdigits[10];
 122   uint32_t wcoutdigits[10];
 123   size_t outdigits_act;
 124
 125   /* If the following number ever turns out to be too small simply
 126      increase it.  But I doubt it will.  --drepper@gnu */
 127 #define MAX_NR_CHARMAP 16
 128   const char *mapnames[MAX_NR_CHARMAP];
 129   uint32_t *map_collection[MAX_NR_CHARMAP];
 130   uint32_t map256_collection[2][256];
 131   size_t map_collection_max[MAX_NR_CHARMAP];
 132   size_t map_collection_act[MAX_NR_CHARMAP];
 133   size_t map_collection_nr;
 134   size_t last_map_idx;
 135   int tomap_done[MAX_NR_CHARMAP];
 136
 137   /* Transliteration information.  */
 138   const char *translit_copy_locale;
 139   const char *translit_copy_repertoire;
 140   struct translit_t *translit;
 141
 142   /* The arrays for the binary representation.  */
 143   uint32_t plane_size;
 144   uint32_t plane_cnt;
 145   char_class_t *ctype_b;
 146   char_class32_t *ctype32_b;
 147   uint32_t *names;
 148   uint32_t **map;
 149   uint32_t *class_name_ptr;
 150   uint32_t *map_name_ptr;
 151   unsigned char *width;
 152   uint32_t mb_cur_max;
 153   const char *codeset_name;
 154   uint32_t translit_hash_size;
 155   uint32_t translit_hash_layers;
 156   uint32_t *translit_from_idx;
 157   uint32_t *translit_from_tbl;
 158   uint32_t *translit_to_idx;
 159   uint32_t *translit_to_tbl;
 160   size_t translit_idx_size;
 161   size_t translit_from_tbl_size;
 162   size_t translit_to_tbl_size;
 163
 164   struct obstack mem_pool;
 165 };
 166
 167
 168 #define obstack_chunk_alloc xmalloc
 169 #define obstack_chunk_free free
 170
 171
 172 /* Prototypes for local functions.  */
 173 static void ctype_startup (struct linereader *lr, struct localedef_t *locale,
 174                            struct charmap_t *charmap, int ignore_content);
 175 static void ctype_class_new (struct linereader *lr,
 176                              struct locale_ctype_t *ctype, const char *name);
 177 static void ctype_map_new (struct linereader *lr,
 178                            struct locale_ctype_t *ctype,
 179                            const char *name, struct charmap_t *charmap);
 180 static uint32_t *find_idx (struct locale_ctype_t *ctype, uint32_t **table,
 181                            size_t *max, size_t *act, unsigned int idx);
 182 static void set_class_defaults (struct locale_ctype_t *ctype,
 183                                 struct charmap_t *charmap,
 184                                 struct repertoire_t *repertoire);
 185 static void allocate_arrays (struct locale_ctype_t *ctype,
 186                              struct charmap_t *charmap,
 187                              struct repertoire_t *repertoire);
 188
 189
 190 static const char *longnames[] =
 191 {
 192   "zero", "one", "two", "three", "four",
 193   "five", "six", "seven", "eight", "nine"
 194 };
 195 static const unsigned char digits[] = "0123456789";
 196
 197
 198 static void
 199 ctype_startup (struct linereader *lr, struct localedef_t *locale,
 200                struct charmap_t *charmap, int ignore_content)
 201 {
 202   unsigned int cnt;
 203   struct locale_ctype_t *ctype;
 204
 205   if (!ignore_content)
 206     {
 207       /* Allocate the needed room.  */
 208       locale->categories[LC_CTYPE].ctype = ctype =
 209         (struct locale_ctype_t *) xcalloc (1, sizeof (struct locale_ctype_t));
 210
 211       /* We have seen no names yet.  */
 212       ctype->charnames_max = charmap->mb_cur_max == 1 ? 256 : 512;
 213       ctype->charnames =
 214         (unsigned int *) xmalloc (ctype->charnames_max
 215                                   * sizeof (unsigned int));
 216       for (cnt = 0; cnt < 256; ++cnt)
 217         ctype->charnames[cnt] = cnt;
 218       ctype->charnames_act = 256;
 219
 220       /* Fill character class information.  */
 221       ctype->last_class_char = ILLEGAL_CHAR_VALUE;
 222       /* The order of the following instructions determines the bit
 223          positions!  */
 224       ctype_class_new (lr, ctype, "upper");
 225       ctype_class_new (lr, ctype, "lower");
 226       ctype_class_new (lr, ctype, "alpha");
 227       ctype_class_new (lr, ctype, "digit");
 228       ctype_class_new (lr, ctype, "xdigit");
 229       ctype_class_new (lr, ctype, "space");
 230       ctype_class_new (lr, ctype, "print");
 231       ctype_class_new (lr, ctype, "graph");
 232       ctype_class_new (lr, ctype, "blank");
 233       ctype_class_new (lr, ctype, "cntrl");
 234       ctype_class_new (lr, ctype, "punct");
 235       ctype_class_new (lr, ctype, "alnum");
 236 #ifdef PREDEFINED_CLASSES
 237       /* The following are extensions from ISO 14652.  */
 238       ctype_class_new (lr, ctype, "left_to_right");
 239       ctype_class_new (lr, ctype, "right_to_left");
 240       ctype_class_new (lr, ctype, "num_terminator");
 241       ctype_class_new (lr, ctype, "num_separator");
 242       ctype_class_new (lr, ctype, "segment_separator");
 243       ctype_class_new (lr, ctype, "block_separator");
 244       ctype_class_new (lr, ctype, "direction_control");
 245       ctype_class_new (lr, ctype, "sym_swap_layout");
 246       ctype_class_new (lr, ctype, "char_shape_selector");
 247       ctype_class_new (lr, ctype, "num_shape_selector");
 248       ctype_class_new (lr, ctype, "non_spacing");
 249       ctype_class_new (lr, ctype, "non_spacing_level3");
 250       ctype_class_new (lr, ctype, "normal_connect");
 251       ctype_class_new (lr, ctype, "r_connect");
 252       ctype_class_new (lr, ctype, "no_connect");
 253       ctype_class_new (lr, ctype, "no_connect-space");
 254       ctype_class_new (lr, ctype, "vowel_connect");
 255 #endif
 256
 257       ctype->class_collection_max = charmap->mb_cur_max == 1 ? 256 : 512;
 258       ctype->class_collection
 259         = (uint32_t *) xcalloc (sizeof (unsigned long int),
 260                                 ctype->class_collection_max);
 261       ctype->class_collection_act = 256;
 262
 263       /* Fill character map information.  */
 264       ctype->map_collection_nr = 0;
 265       ctype->last_map_idx = MAX_NR_CHARMAP;
 266       ctype_map_new (lr, ctype, "toupper", charmap);
 267       ctype_map_new (lr, ctype, "tolower", charmap);
 268 #ifdef PREDEFINED_CLASSES
 269       ctype_map_new (lr, ctype, "tosymmetric", charmap);
 270 #endif
 271
 272       /* Fill first 256 entries in `toXXX' arrays.  */
 273       for (cnt = 0; cnt < 256; ++cnt)
 274         {
 275           ctype->map_collection[0][cnt] = cnt;
 276           ctype->map_collection[1][cnt] = cnt;
 277 #ifdef PREDEFINED_CLASSES
 278           ctype->map_collection[2][cnt] = cnt;
 279 #endif
 280           ctype->map256_collection[0][cnt] = cnt;
 281           ctype->map256_collection[1][cnt] = cnt;
 282         }
 283
 284       obstack_init (&ctype->mem_pool);
 285     }
 286 }
 287
 288
 289 void
 290 ctype_finish (struct localedef_t *locale, struct charmap_t *charmap)
 291 {
 292   /* See POSIX.2, table 2-6 for the meaning of the following table.  */
 293 #define NCLASS 12
 294   static const struct
 295   {
 296     const char *name;
 297     const char allow[NCLASS];
 298   }
 299   valid_table[NCLASS] =
 300   {
 301     /* The order is important.  See token.h for more information.
 302        M = Always, D = Default, - = Permitted, X = Mutually exclusive  */
 303     { "upper",  "--MX-XDDXXX-" },
 304     { "lower",  "--MX-XDDXXX-" },
 305     { "alpha",  "---X-XDDXXX-" },
 306     { "digit",  "XXX--XDDXXX-" },
 307     { "xdigit", "-----XDDXXX-" },
 308     { "space",  "XXXXX------X" },
 309     { "print",  "---------X--" },
 310     { "graph",  "---------X--" },
 311     { "blank",  "XXXXXM-----X" },
 312     { "cntrl",  "XXXXX-XX--XX" },
 313     { "punct",  "XXXXX-DD-X-X" },
 314     { "alnum",  "-----XDDXXX-" }
 315   };
 316   size_t cnt;
 317   int cls1, cls2;
 318   uint32_t space_value;
 319   struct charseq *space_seq;
 320   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 321   int warned;
 322
 323   /* Now resolve copying and also handle completely missing definitions.  */
 324   if (ctype == NULL)
 325     {
 326       /* First see whether we were supposed to copy.  If yes, find the
 327          actual definition.  */
 328       if (locale->copy_name[LC_CTYPE] != NULL)
 329         {
 330           /* Find the copying locale.  This has to happen transitively since
 331              the locale we are copying from might also copying another one.  */
 332           struct localedef_t *from = locale;
 333
 334           do
 335             from = find_locale (LC_CTYPE, from->copy_name[LC_CTYPE],
 336                                 from->repertoire_name, charmap);
 337           while (from->categories[LC_CTYPE].ctype == NULL
 338                  && from->copy_name[LC_CTYPE] != NULL);
 339
 340           ctype = locale->categories[LC_CTYPE].ctype
 341             = from->categories[LC_CTYPE].ctype;
 342         }
 343
 344       /* If there is still no definition issue an warning and create an
 345          empty one.  */
 346       if (ctype == NULL)
 347         {
 348           error (0, 0, _("No definition for %s category found"), "LC_CTYPE");
 349           ctype_startup (NULL, locale, charmap, 0);
 350           ctype = locale->categories[LC_CTYPE].ctype;
 351         }
 352     }
 353
 354   /* Set default value for classes not specified.  */
 355   set_class_defaults (ctype, charmap, ctype->repertoire);
 356
 357   /* Check according to table.  */
 358   for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
 359     {
 360       uint32_t tmp = ctype->class_collection[cnt];
 361
 362       if (tmp != 0)
 363         {
 364           for (cls1 = 0; cls1 < NCLASS; ++cls1)
 365             if ((tmp & _ISwbit (cls1)) != 0)
 366               for (cls2 = 0; cls2 < NCLASS; ++cls2)
 367                 if (valid_table[cls1].allow[cls2] != '-')
 368                   {
 369                     int eq = (tmp & _ISwbit (cls2)) != 0;
 370                     switch (valid_table[cls1].allow[cls2])
 371                       {
 372                       case 'M':
 373                         if (!eq)
 374                           {
 375                             uint32_t value = ctype->charnames[cnt];
 376
 377                             if (!be_quiet)
 378                               error (0, 0, _("\
 379 character L'\\u%0*x' in class `%s' must be in class `%s'"),
 380                                      value > 0xffff ? 8 : 4, value,
 381                                      valid_table[cls1].name,
 382                                      valid_table[cls2].name);
 383                           }
 384                         break;
 385
 386                       case 'X':
 387                         if (eq)
 388                           {
 389                             uint32_t value = ctype->charnames[cnt];
 390
 391                             if (!be_quiet)
 392                               error (0, 0, _("\
 393 character L'\\u%0*x' in class `%s' must not be in class `%s'"),
 394                                      value > 0xffff ? 8 : 4, value,
 395                                      valid_table[cls1].name,
 396                                      valid_table[cls2].name);
 397                           }
 398                         break;
 399
 400                       case 'D':
 401                         ctype->class_collection[cnt] |= _ISwbit (cls2);
 402                         break;
 403
 404                       default:
 405                         error (5, 0, _("internal error in %s, line %u"),
 406                                __FUNCTION__, __LINE__);
 407                       }
 408                   }
 409         }
 410     }
 411
 412   for (cnt = 0; cnt < 256; ++cnt)
 413     {
 414       uint32_t tmp = ctype->class256_collection[cnt];
 415
 416       if (tmp != 0)
 417         {
 418           for (cls1 = 0; cls1 < NCLASS; ++cls1)
 419             if ((tmp & _ISbit (cls1)) != 0)
 420               for (cls2 = 0; cls2 < NCLASS; ++cls2)
 421                 if (valid_table[cls1].allow[cls2] != '-')
 422                   {
 423                     int eq = (tmp & _ISbit (cls2)) != 0;
 424                     switch (valid_table[cls1].allow[cls2])
 425                       {
 426                       case 'M':
 427                         if (!eq)
 428                           {
 429                             char buf[17];
 430
 431                             sprintf (buf, "\\%o", cnt);
 432
 433                             if (!be_quiet)
 434                               error (0, 0, _("\
 435 character '%s' in class `%s' must be in class `%s'"),
 436                                      buf, valid_table[cls1].name,
 437                                      valid_table[cls2].name);
 438                           }
 439                         break;
 440
 441                       case 'X':
 442                         if (eq)
 443                           {
 444                             char buf[17];
 445
 446                             sprintf (buf, "\\%o", cnt);
 447
 448                             if (!be_quiet)
 449                               error (0, 0, _("\
 450 character '%s' in class `%s' must not be in class `%s'"),
 451                                      buf, valid_table[cls1].name,
 452                                      valid_table[cls2].name);
 453                           }
 454                         break;
 455
 456                       case 'D':
 457                         ctype->class256_collection[cnt] |= _ISbit (cls2);
 458                         break;
 459
 460                       default:
 461                         error (5, 0, _("internal error in %s, line %u"),
 462                                __FUNCTION__, __LINE__);
 463                       }
 464                   }
 465         }
 466     }
 467
 468   /* ... and now test <SP> as a special case.  */
 469   space_value = repertoire_find_value (ctype->repertoire, "SP", 2);
 470   if (space_value == ILLEGAL_CHAR_VALUE)
 471     {
 472       if (!be_quiet)
 473         error (0, 0, _("character <SP> not defined in character map"));
 474     }
 475   else if (((cnt = BITPOS (tok_space),
 476              (ELEM (ctype, class_collection, , space_value)
 477               & BITw (tok_space)) == 0)
 478             || (cnt = BITPOS (tok_blank),
 479                 (ELEM (ctype, class_collection, , space_value)
 480                  & BITw (tok_blank)) == 0)))
 481     {
 482       if (!be_quiet)
 483         error (0, 0, _("<SP> character not in class `%s'"),
 484                valid_table[cnt].name);
 485     }
 486   else if (((cnt = BITPOS (tok_punct),
 487              (ELEM (ctype, class_collection, , space_value)
 488               & BITw (tok_punct)) != 0)
 489             || (cnt = BITPOS (tok_graph),
 490                 (ELEM (ctype, class_collection, , space_value)
 491                  & BITw (tok_graph))
 492                 != 0)))
 493     {
 494       if (!be_quiet)
 495         error (0, 0, _("<SP> character must not be in class `%s'"),
 496                valid_table[cnt].name);
 497     }
 498   else
 499     ELEM (ctype, class_collection, , space_value) |= BITw (tok_print);
 500
 501   space_seq = charmap_find_value (charmap, "SP", 2);
 502   if (space_seq == NULL || space_seq->nbytes != 1)
 503     {
 504       if (!be_quiet)
 505         error (0, 0, _("character <SP> not defined in character map"));
 506     }
 507   else if (((cnt = BITPOS (tok_space),
 508              (ctype->class256_collection[space_seq->bytes[0]]
 509               & BIT (tok_space)) == 0)
 510             || (cnt = BITPOS (tok_blank),
 511                 (ctype->class256_collection[space_seq->bytes[0]]
 512                  & BIT (tok_blank)) == 0)))
 513     {
 514       if (!be_quiet)
 515         error (0, 0, _("<SP> character not in class `%s'"),
 516                valid_table[cnt].name);
 517     }
 518   else if (((cnt = BITPOS (tok_punct),
 519              (ctype->class256_collection[space_seq->bytes[0]]
 520               & BIT (tok_punct)) != 0)
 521             || (cnt = BITPOS (tok_graph),
 522                 (ctype->class256_collection[space_seq->bytes[0]]
 523                  & BIT (tok_graph)) != 0)))
 524     {
 525       if (!be_quiet)
 526         error (0, 0, _("<SP> character must not be in class `%s'"),
 527                valid_table[cnt].name);
 528     }
 529   else
 530     ctype->class256_collection[space_seq->bytes[0]] |= BIT (tok_print);
 531
 532   /* Now that the tests are done make sure the name array contains all
 533      characters which are handled in the WIDTH section of the
 534      character set definition file.  */
 535   if (charmap->width_rules != NULL)
 536     for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
 537       {
 538         unsigned char bytes[charmap->mb_cur_max];
 539         int nbytes = charmap->width_rules[cnt].from->nbytes;
 540
 541         /* We have the range of character for which the width is
 542            specified described using byte sequences of the multibyte
 543            charset.  We have to convert this to UCS4 now.  And we
 544            cannot simply convert the beginning and the end of the
 545            sequence, we have to iterate over the byte sequence and
 546            convert it for every single character.  */
 547         memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
 548
 549         while (nbytes < charmap->width_rules[cnt].to->nbytes
 550                || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
 551                           nbytes) <= 0)
 552           {
 553             /* Find the UCS value for `bytes'.  */
 554             uint32_t wch = repertoire_find_value (ctype->repertoire, bytes,
 555                                                   nbytes);
 556             int inner;
 557
 558             if (wch != ILLEGAL_CHAR_VALUE)
 559               /* We are only interested in the side-effects of the
 560                  `find_idx' call.  It will add appropriate entries in
 561                  the name array if this is necessary.  */
 562               (void) find_idx (ctype, NULL, NULL, NULL, wch);
 563
 564             /* "Increment" the bytes sequence.  */
 565             inner = nbytes - 1;
 566             while (inner >= 0 && bytes[inner] == 0xff)
 567               --inner;
 568
 569             if (inner < 0)
 570               {
 571                 /* We have to extend the byte sequence.  */
 572                 if (nbytes >= charmap->width_rules[cnt].to->nbytes)
 573                   break;
 574
 575                 bytes[0] = 1;
 576                 memset (&bytes[1], 0, nbytes);
 577                 ++nbytes;
 578               }
 579             else
 580               {
 581                 ++bytes[inner];
 582                 while (++inner < nbytes)
 583                   bytes[inner] = 0;
 584               }
 585           }
 586       }
 587
 588   /* There must be a multiple of 10 digits.  */
 589   if (ctype->mbdigits_act % 10 != 0)
 590     {
 591       assert (ctype->mbdigits_act == ctype->wcdigits_act);
 592       ctype->wcdigits_act -= ctype->mbdigits_act % 10;
 593       ctype->mbdigits_act -= ctype->mbdigits_act % 10;
 594       error (0, 0, _("`digit' category has not entries in groups of ten"));
 595     }
 596
 597   /* Check the input digits.  There must be a multiple of ten available.
 598      In each group it could be that one or the other character is missing.
 599      In this case the whole group must be removed.  */
 600   cnt = 0;
 601   while (cnt < ctype->mbdigits_act)
 602     {
 603       size_t inner;
 604       for (inner = 0; inner < 10; ++inner)
 605         if (ctype->mbdigits[cnt + inner] == NULL)
 606           break;
 607
 608       if (inner == 10)
 609         cnt += 10;
 610       else
 611         {
 612           /* Remove the group.  */
 613           memmove (&ctype->mbdigits[cnt], &ctype->mbdigits[cnt + 10],
 614                    ((ctype->wcdigits_act - cnt - 10)
 615                     * sizeof (ctype->mbdigits[0])));
 616           ctype->mbdigits_act -= 10;
 617         }
 618     }
 619
 620   /* If no input digits are given use the default.  */
 621   if (ctype->mbdigits_act == 0)
 622     {
 623       if (ctype->mbdigits_max == 0)
 624         {
 625           ctype->mbdigits = obstack_alloc (&charmap->mem_pool,
 626                                            10 * sizeof (struct charseq *));
 627           ctype->mbdigits_max = 10;
 628         }
 629
 630       for (cnt = 0; cnt < 10; ++cnt)
 631         {
 632           ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
 633                                                       digits + cnt, 1);
 634           if (ctype->mbdigits[cnt] == NULL)
 635             {
 636               ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
 637                                                           longnames[cnt],
 638                                                           strlen (longnames[cnt]));
 639               if (ctype->mbdigits[cnt] == NULL)
 640                 {
 641                   /* Hum, this ain't good.  */
 642                   error (0, 0, _("\
 643 no input digits defined and none of the standard names in the charmap"));
 644
 645                   ctype->mbdigits[cnt] = obstack_alloc (&charmap->mem_pool,
 646                                                         sizeof (struct charseq) + 1);
 647
 648                   /* This is better than nothing.  */
 649                   ctype->mbdigits[cnt]->bytes[0] = digits[cnt];
 650                   ctype->mbdigits[cnt]->nbytes = 1;
 651                 }
 652             }
 653         }
 654
 655       ctype->mbdigits_act = 10;
 656     }
 657
 658   /* Check the wide character input digits.  There must be a multiple
 659      of ten available.  In each group it could be that one or the other
 660      character is missing.  In this case the whole group must be
 661      removed.  */
 662   cnt = 0;
 663   while (cnt < ctype->wcdigits_act)
 664     {
 665       size_t inner;
 666       for (inner = 0; inner < 10; ++inner)
 667         if (ctype->wcdigits[cnt + inner] == ILLEGAL_CHAR_VALUE)
 668           break;
 669
 670       if (inner == 10)
 671         cnt += 10;
 672       else
 673         {
 674           /* Remove the group.  */
 675           memmove (&ctype->wcdigits[cnt], &ctype->wcdigits[cnt + 10],
 676                    ((ctype->wcdigits_act - cnt - 10)
 677                     * sizeof (ctype->wcdigits[0])));
 678           ctype->wcdigits_act -= 10;
 679         }
 680     }
 681
 682   /* If no input digits are given use the default.  */
 683   if (ctype->wcdigits_act == 0)
 684     {
 685       if (ctype->wcdigits_max == 0)
 686         {
 687           ctype->wcdigits = obstack_alloc (&charmap->mem_pool,
 688                                            10 * sizeof (uint32_t));
 689           ctype->wcdigits_max = 10;
 690         }
 691
 692       for (cnt = 0; cnt < 10; ++cnt)
 693         ctype->wcdigits[cnt] = L'0' + cnt;
 694
 695       ctype->mbdigits_act = 10;
 696     }
 697
 698   /* Check the outdigits.  */
 699   warned = 0;
 700   for (cnt = 0; cnt < 10; ++cnt)
 701     if (ctype->mboutdigits[cnt] == NULL)
 702       {
 703         static struct charseq replace[2];
 704
 705         if (!warned)
 706           {
 707             error (0, 0, _("\
 708 not all characters used in `outdigit' are available in the charmap"));
 709             warned = 1;
 710           }
 711
 712         replace[0].nbytes = 1;
 713         replace[0].bytes[0] = '?';
 714         replace[0].bytes[1] = '\0';
 715         ctype->mboutdigits[cnt] = &replace[0];
 716       }
 717
 718   warned = 0;
 719   for (cnt = 0; cnt < 10; ++cnt)
 720     if (ctype->wcoutdigits[cnt] == 0)
 721       {
 722         if (!warned)
 723           {
 724             error (0, 0, _("\
 725 not all characters used in `outdigit' are available in the repertoire"));
 726             warned = 1;
 727           }
 728
 729         ctype->wcoutdigits[cnt] = L'?';
 730       }
 731 }
 732
 733
 734 void
 735 ctype_output (struct localedef_t *locale, struct charmap_t *charmap,
 736               const char *output_path)
 737 {
 738   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 739   const size_t nelems = (_NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)
 740                          + (ctype->map_collection_nr - 2));
 741   struct iovec iov[2 + nelems + ctype->nr_charclass
 742                   + ctype->map_collection_nr];
 743   struct locale_file data;
 744   uint32_t idx[nelems + 1];
 745   size_t elem, cnt, offset, total;
 746   char *cp;
 747
 748   /* Now prepare the output: Find the sizes of the table we can use.  */
 749   allocate_arrays (ctype, charmap, ctype->repertoire);
 750
 751   data.magic = LIMAGIC (LC_CTYPE);
 752   data.n = nelems;
 753   iov[0].iov_base = (void *) &data;
 754   iov[0].iov_len = sizeof (data);
 755
 756   iov[1].iov_base = (void *) idx;
 757   iov[1].iov_len = sizeof (idx);
 758
 759   idx[0] = iov[0].iov_len + iov[1].iov_len;
 760   offset = 0;
 761
 762   for (elem = 0; elem < nelems; ++elem)
 763     {
 764       if (elem < _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE))
 765         switch (elem)
 766           {
 767 #define CTYPE_DATA(name, base, len)                                           \
 768           case _NL_ITEM_INDEX (name):                                         \
 769             iov[2 + elem + offset].iov_base = (base);                         \
 770             iov[2 + elem + offset].iov_len = (len);                           \
 771             if (elem + 1 < nelems)                                            \
 772               idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;     \
 773             break
 774
 775           CTYPE_DATA (_NL_CTYPE_CLASS,
 776                       ctype->ctype_b,
 777                       (256 + 128) * sizeof (char_class_t));
 778
 779           CTYPE_DATA (_NL_CTYPE_TOUPPER,
 780                       ctype->map[0],
 781                       (ctype->plane_size * ctype->plane_cnt + 128)
 782                       * sizeof (uint32_t));
 783           CTYPE_DATA (_NL_CTYPE_TOLOWER,
 784                       ctype->map[1],
 785                       (ctype->plane_size * ctype->plane_cnt + 128)
 786                       * sizeof (uint32_t));
 787
 788           CTYPE_DATA (_NL_CTYPE_CLASS32,
 789                       ctype->ctype32_b,
 790                       (ctype->plane_size * ctype->plane_cnt
 791                        * sizeof (char_class32_t)));
 792
 793           CTYPE_DATA (_NL_CTYPE_NAMES,
 794                       ctype->names, (ctype->plane_size * ctype->plane_cnt
 795                                      * sizeof (uint32_t)));
 796
 797           CTYPE_DATA (_NL_CTYPE_TRANSLIT_HASH_SIZE,
 798                       &ctype->translit_hash_size, sizeof (uint32_t));
 799           CTYPE_DATA (_NL_CTYPE_TRANSLIT_HASH_LAYERS,
 800                       &ctype->translit_hash_layers, sizeof (uint32_t));
 801
 802           CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_IDX,
 803                       ctype->translit_from_idx,
 804                       ctype->translit_idx_size);
 805
 806           CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_TBL,
 807                       ctype->translit_from_tbl,
 808                       ctype->translit_from_tbl_size);
 809
 810           CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_IDX,
 811                       ctype->translit_to_idx,
 812                       ctype->translit_idx_size);
 813
 814           CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_TBL,
 815                       ctype->translit_to_tbl, ctype->translit_to_tbl_size);
 816
 817           CTYPE_DATA (_NL_CTYPE_HASH_SIZE,
 818                       &ctype->plane_size, sizeof (uint32_t));
 819           CTYPE_DATA (_NL_CTYPE_HASH_LAYERS,
 820                       &ctype->plane_cnt, sizeof (uint32_t));
 821
 822           case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
 823             /* The class name array.  */
 824             total = 0;
 825             for (cnt = 0; cnt < ctype->nr_charclass; ++cnt, ++offset)
 826               {
 827                 iov[2 + elem + offset].iov_base
 828                   = (void *) ctype->classnames[cnt];
 829                 iov[2 + elem + offset].iov_len
 830                   = strlen (ctype->classnames[cnt]) + 1;
 831                 total += iov[2 + elem + offset].iov_len;
 832               }
 833             iov[2 + elem + offset].iov_base = (void *) "\0\0\0";
 834             iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
 835             total += 1 + (4 - ((total + 1) % 4));
 836
 837             idx[elem + 1] = idx[elem] + total;
 838             break;
 839
 840           case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
 841             /* The class name array.  */
 842             total = 0;
 843             for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt, ++offset)
 844               {
 845                 iov[2 + elem + offset].iov_base
 846                   = (void *) ctype->mapnames[cnt];
 847                 iov[2 + elem + offset].iov_len
 848                   = strlen (ctype->mapnames[cnt]) + 1;
 849                 total += iov[2 + elem + offset].iov_len;
 850               }
 851             iov[2 + elem + offset].iov_base = (void *) "\0\0\0";
 852             iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
 853             total += 1 + (4 - ((total + 1) % 4));
 854
 855             idx[elem + 1] = idx[elem] + total;
 856             break;
 857
 858           CTYPE_DATA (_NL_CTYPE_WIDTH,
 859                       ctype->width, ctype->plane_size * ctype->plane_cnt);
 860
 861           CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX,
 862                       &ctype->mb_cur_max, sizeof (uint32_t));
 863
 864           case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
 865             total = strlen (ctype->codeset_name) + 1;
 866             if (total % 4 == 0)
 867               iov[2 + elem + offset].iov_base = (char *) ctype->codeset_name;
 868             else
 869               {
 870                 iov[2 + elem + offset].iov_base = alloca ((total + 3) & ~3);
 871                 memset (mempcpy (iov[2 + elem + offset].iov_base,
 872                                  ctype->codeset_name, total),
 873                         '\0', 4 - (total & 3));
 874                 total = (total + 3) & ~3;
 875               }
 876             iov[2 + elem + offset].iov_len = total;
 877             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
 878             break;
 879
 880           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN):
 881             iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
 882             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
 883             *(uint32_t *) iov[2 + elem + offset].iov_base =
 884               ctype->mbdigits_act / 10;
 885             idx[elem + 1] = idx[elem] + sizeof (uint32_t);
 886             break;
 887
 888           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN):
 889             iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
 890             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
 891             *(uint32_t *) iov[2 + elem + offset].iov_base =
 892               ctype->wcdigits_act / 10;
 893             idx[elem + 1] = idx[elem] + sizeof (uint32_t);
 894             break;
 895
 896           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB):
 897             /* Compute the length of all possible characters.  For INDIGITS
 898                there might be more than one.  We simply concatenate all of
 899                them with a NUL byte following.  The NUL byte wouldn't be
 900                necessary but it makes it easier for the user.  */
 901             total = 0;
 902             for (cnt = elem - _NL_CTYPE_INDIGITS0_MB;
 903                  cnt < ctype->mbdigits_act; cnt += 10)
 904               total += ctype->mbdigits[cnt]->nbytes + 1;
 905             iov[2 + elem + offset].iov_base = (char *) alloca (total);
 906             iov[2 + elem + offset].iov_len = total;
 907
 908             cp = iov[2 + elem + offset].iov_base;
 909             for (cnt = elem - _NL_CTYPE_INDIGITS0_MB;
 910                  cnt < ctype->mbdigits_act; cnt += 10)
 911               {
 912                 cp = mempcpy (cp, ctype->mbdigits[cnt]->bytes,
 913                               ctype->mbdigits[cnt]->nbytes);
 914                 *cp++ = '\0';
 915               }
 916             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
 917             break;
 918
 919           case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB):
 920             /* Compute the length of all possible characters.  For INDIGITS
 921                there might be more than one.  We simply concatenate all of
 922                them with a NUL byte following.  The NUL byte wouldn't be
 923                necessary but it makes it easier for the user.  */
 924             cnt = elem - _NL_CTYPE_OUTDIGIT0_MB;
 925             total = ctype->mboutdigits[cnt]->nbytes + 1;
 926             iov[2 + elem + offset].iov_base = (char *) alloca (total);
 927             iov[2 + elem + offset].iov_len = total;
 928
 929             *(char *) mempcpy (iov[2 + elem + offset].iov_base,
 930                                ctype->mbdigits[cnt]->bytes,
 931                                ctype->mbdigits[cnt]->nbytes) = '\0';
 932             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
 933             break;
 934
 935           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC):
 936             total = ctype->wcdigits_act / 10;
 937
 938             iov[2 + elem + offset].iov_base =
 939               (uint32_t *) alloca (total * sizeof (uint32_t));
 940             iov[2 + elem + offset].iov_len = total * sizeof (uint32_t);
 941
 942             for (cnt = elem - _NL_CTYPE_INDIGITS0_WC;
 943                  cnt < ctype->wcdigits_act; cnt += 10)
 944               ((uint32_t *) iov[2 + elem + offset].iov_base)[cnt / 10]
 945                 = ctype->wcdigits[cnt];
 946             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
 947             break;
 948
 949           case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC):
 950             cnt = elem - _NL_CTYPE_OUTDIGIT0_WC;
 951             iov[2 + elem + offset].iov_base = &ctype->wcoutdigits[cnt];
 952             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
 953             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
 954             break;
 955
 956           default:
 957             assert (! "unknown CTYPE element");
 958           }
 959       else
 960         {
 961           /* Handle extra maps.  */
 962           size_t nr = (elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)) + 2;
 963
 964           iov[2 + elem + offset].iov_base = ctype->map[nr];
 965           iov[2 + elem + offset].iov_len = ((ctype->plane_size
 966                                              * ctype->plane_cnt + 128)
 967                                             * sizeof (uint32_t));
 968
 969           idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
 970         }
 971     }
 972
 973   assert (2 + elem + offset == (nelems + ctype->nr_charclass
 974                                 + ctype->map_collection_nr + 2));
 975
 976   write_locale_data (output_path, "LC_CTYPE", 2 + elem + offset, iov);
 977 }
 978
 979
 980 /* Local functions.  */
 981 static void
 982 ctype_class_new (struct linereader *lr, struct locale_ctype_t *ctype,
 983                  const char *name)
 984 {
 985   size_t cnt;
 986
 987   for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
 988     if (strcmp (ctype->classnames[cnt], name) == 0)
 989       break;
 990
 991   if (cnt < ctype->nr_charclass)
 992     {
 993       lr_error (lr, _("character class `%s' already defined"), name);
 994       return;
 995     }
 996
 997   if (ctype->nr_charclass == MAX_NR_CHARCLASS)
 998     /* Exit code 2 is prescribed in P1003.2b.  */
 999     error (2, 0, _("\
1000 implementation limit: no more than %d character classes allowed"),
1001            MAX_NR_CHARCLASS);
1002
1003   ctype->classnames[ctype->nr_charclass++] = name;
1004 }
1005
1006
1007 static void
1008 ctype_map_new (struct linereader *lr, struct locale_ctype_t *ctype,
1009                const char *name, struct charmap_t *charmap)
1010 {
1011   size_t max_chars = 0;
1012   size_t cnt;
1013
1014   for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
1015     {
1016       if (strcmp (ctype->mapnames[cnt], name) == 0)
1017         break;
1018
1019       if (max_chars < ctype->map_collection_max[cnt])
1020         max_chars = ctype->map_collection_max[cnt];
1021     }
1022
1023   if (cnt < ctype->map_collection_nr)
1024     {
1025       lr_error (lr, _("character map `%s' already defined"), name);
1026       return;
1027     }
1028
1029   if (ctype->map_collection_nr == MAX_NR_CHARMAP)
1030     /* Exit code 2 is prescribed in P1003.2b.  */
1031     error (2, 0, _("\
1032 implementation limit: no more than %d character maps allowed"),
1033            MAX_NR_CHARMAP);
1034
1035   ctype->mapnames[cnt] = name;
1036
1037   if (max_chars == 0)
1038     ctype->map_collection_max[cnt] = charmap->mb_cur_max == 1 ? 256 : 512;
1039   else
1040     ctype->map_collection_max[cnt] = max_chars;
1041
1042   ctype->map_collection[cnt] = (uint32_t *)
1043     xmalloc (sizeof (uint32_t) * ctype->map_collection_max[cnt]);
1044   memset (ctype->map_collection[cnt], '\0',
1045           sizeof (uint32_t) * ctype->map_collection_max[cnt]);
1046   ctype->map_collection_act[cnt] = 256;
1047
1048   ++ctype->map_collection_nr;
1049 }
1050
1051
1052 /* We have to be prepared that TABLE, MAX, and ACT can be NULL.  This
1053    is possible if we only want to extend the name array.  */
1054 static uint32_t *
1055 find_idx (struct locale_ctype_t *ctype, uint32_t **table, size_t *max,
1056           size_t *act, uint32_t idx)
1057 {
1058   size_t cnt;
1059
1060   if (idx < 256)
1061     return table == NULL ? NULL : &(*table)[idx];
1062
1063   for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
1064     if (ctype->charnames[cnt] == idx)
1065       break;
1066
1067   /* We have to distinguish two cases: the name is found or not.  */
1068   if (cnt == ctype->charnames_act)
1069     {
1070       /* Extend the name array.  */
1071       if (ctype->charnames_act == ctype->charnames_max)
1072         {
1073           ctype->charnames_max *= 2;
1074           ctype->charnames = (unsigned int *)
1075             xrealloc (ctype->charnames,
1076                       sizeof (unsigned int) * ctype->charnames_max);
1077         }
1078       ctype->charnames[ctype->charnames_act++] = idx;
1079     }
1080
1081   if (table == NULL)
1082     /* We have done everything we are asked to do.  */
1083     return NULL;
1084
1085   if (cnt >= *act)
1086     {
1087       if (cnt >= *max)
1088         {
1089           size_t old_max = *max;
1090           do
1091             *max *= 2;
1092           while (*max <= cnt);
1093
1094           *table =
1095             (uint32_t *) xrealloc (*table, *max * sizeof (unsigned long int));
1096           memset (&(*table)[old_max], '\0',
1097                   (*max - old_max) * sizeof (uint32_t));
1098         }
1099
1100       *act = cnt;
1101     }
1102
1103   return &(*table)[cnt];
1104 }
1105
1106
1107 static int
1108 get_character (struct token *now, struct charmap_t *charmap,
1109                struct repertoire_t *repertoire,
1110                struct charseq **seqp, uint32_t *wchp)
1111 {
1112   if (now->tok == tok_bsymbol)
1113     {
1114       /* This will hopefully be the normal case.  */
1115       *wchp = repertoire_find_value (repertoire, now->val.str.startmb,
1116                                      now->val.str.lenmb);
1117       *seqp = charmap_find_value (charmap, now->val.str.startmb,
1118                                   now->val.str.lenmb);
1119     }
1120   else if (now->tok == tok_ucs4)
1121     {
1122       *seqp = repertoire_find_seq (repertoire, now->val.ucs4);
1123
1124       if (*seqp == NULL)
1125         {
1126           /* Compute the value in the charmap from the UCS value.  */
1127           const char *symbol = repertoire_find_symbol (repertoire,
1128                                                        now->val.ucs4);
1129
1130           if (symbol == NULL)
1131             *seqp = NULL;
1132           else
1133             *seqp = charmap_find_value (charmap, symbol, strlen (symbol));
1134
1135           if (*seqp == NULL)
1136             {
1137               /* Insert a negative entry.  */
1138               static const struct charseq negative
1139                 = { .ucs4 = ILLEGAL_CHAR_VALUE };
1140               uint32_t *newp = obstack_alloc (&repertoire->mem_pool, 4);
1141               *newp = now->val.ucs4;
1142
1143               insert_entry (&repertoire->seq_table, newp, 4,
1144                             (void *) &negative);
1145             }
1146           else
1147             (*seqp)->ucs4 = now->val.ucs4;
1148         }
1149       else if ((*seqp)->ucs4 != now->val.ucs4)
1150         *seqp = NULL;
1151
1152       *wchp = now->val.ucs4;
1153     }
1154   else if (now->tok == tok_charcode)
1155     {
1156       /* We must map from the byte code to UCS4.  */
1157       *seqp = charmap_find_symbol (charmap, now->val.str.startmb,
1158                                    now->val.str.lenmb);
1159
1160       if (*seqp == NULL)
1161         *wchp = ILLEGAL_CHAR_VALUE;
1162       else
1163         {
1164           if ((*seqp)->ucs4 == UNINITIALIZED_CHAR_VALUE)
1165             (*seqp)->ucs4 = repertoire_find_value (repertoire, (*seqp)->name,
1166                                                    strlen ((*seqp)->name));
1167           *wchp = (*seqp)->ucs4;
1168         }
1169     }
1170   else
1171     return 1;
1172
1173   return 0;
1174 }
1175
1176
1177 /* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>'.  */
1178 static void
1179 charclass_symbolic_ellipsis (struct linereader *ldfile,
1180                              struct locale_ctype_t *ctype,
1181                              struct charmap_t *charmap,
1182                              struct repertoire_t *repertoire,
1183                              struct token *now,
1184                              const char *last_str,
1185                              unsigned long int class256_bit,
1186                              unsigned long int class_bit, int base,
1187                              int ignore_content, int handle_digits)
1188 {
1189   const char *nowstr = now->val.str.startmb;
1190   char tmp[now->val.str.lenmb + 1];
1191   const char *cp;
1192   char *endp;
1193   unsigned long int from;
1194   unsigned long int to;
1195
1196   /* We have to compute the ellipsis values using the symbolic names.  */
1197   assert (last_str != NULL);
1198
1199   if (strlen (last_str) != now->val.str.lenmb)
1200     {
1201     invalid_range:
1202       lr_error (ldfile,
1203                 _("`%s' and `%.*s' are no valid names for symbolic range"),
1204                 last_str, now->val.str.lenmb, nowstr);
1205       return;
1206     }
1207
1208   if (memcmp (last_str, nowstr, now->val.str.lenmb) == 0)
1209     /* Nothing to do, the names are the same.  */
1210     return;
1211
1212   for (cp = last_str; *cp == *(nowstr + (cp - last_str)); ++cp)
1213     ;
1214
1215   errno = 0;
1216   from = strtoul (cp, &endp, base);
1217   if ((from == UINT_MAX && errno == ERANGE) || *endp != '\0')
1218     goto invalid_range;
1219
1220   to = strtoul (nowstr + (cp - last_str), &endp, base);
1221   if ((to == UINT_MAX && errno == ERANGE)
1222       || (endp - nowstr) != now->val.str.lenmb || from >= to)
1223     goto invalid_range;
1224
1225   /* OK, we have a range FROM - TO.  Now we can create the symbolic names.  */
1226   if (!ignore_content)
1227     {
1228       now->val.str.startmb = tmp;
1229       while (++from <= to)
1230         {
1231           struct charseq *seq;
1232           uint32_t wch;
1233
1234           sprintf (tmp, (base == 10 ? "%.*s%0*d" : "%.*s%0*X"), cp - last_str,
1235                    last_str, now->val.str.lenmb - (cp - last_str), from);
1236
1237           get_character (now, charmap, repertoire, &seq, &wch);
1238
1239           if (seq != NULL && seq->nbytes == 1)
1240             /* Yep, we can store information about this byte sequence.  */
1241             ctype->class256_collection[seq->bytes[0]] |= class256_bit;
1242
1243           if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1244             /* We have the UCS4 position.  */
1245             *find_idx (ctype, &ctype->class_collection,
1246                        &ctype->class_collection_max,
1247                        &ctype->class_collection_act, wch) |= class_bit;
1248
1249           if (handle_digits == 1)
1250             {
1251               /* We must store the digit values.  */
1252               if (ctype->mbdigits_act == ctype->mbdigits_max)
1253                 {
1254                   ctype->mbdigits_max *= 2;
1255                   ctype->mbdigits = xrealloc (ctype->mbdigits,
1256                                               (ctype->mbdigits_max
1257                                                * sizeof (char *)));
1258                   ctype->wcdigits_max *= 2;
1259                   ctype->wcdigits = xrealloc (ctype->wcdigits,
1260                                               (ctype->wcdigits_max
1261                                                * sizeof (uint32_t)));
1262                 }
1263
1264               ctype->mbdigits[ctype->mbdigits_act++] = seq;
1265               ctype->wcdigits[ctype->wcdigits_act++] = wch;
1266             }
1267           else if (handle_digits == 2)
1268             {
1269               /* We must store the digit values.  */
1270               if (ctype->outdigits_act >= 10)
1271                 {
1272                   lr_error (ldfile, _("\
1273 %s: field `%s' does not contain exactly ten entries"),
1274                             "LC_CTYPE", "outdigit");
1275                   return;
1276                 }
1277
1278               ctype->mboutdigits[ctype->outdigits_act] = seq;
1279               ctype->wcoutdigits[ctype->outdigits_act] = wch;
1280               ++ctype->outdigits_act;
1281             }
1282         }
1283     }
1284 }
1285
1286
1287 /* Ellipsis like in `<U1234>..<U2345>'.  */
1288 static void
1289 charclass_ucs4_ellipsis (struct linereader *ldfile,
1290                          struct locale_ctype_t *ctype,
1291                          struct charmap_t *charmap,
1292                          struct repertoire_t *repertoire,
1293                          struct token *now, uint32_t last_wch,
1294                          unsigned long int class256_bit,
1295                          unsigned long int class_bit, int ignore_content,
1296                          int handle_digits)
1297 {
1298   if (last_wch > now->val.ucs4)
1299     {
1300       lr_error (ldfile, _("\
1301 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1302                 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, now->val.ucs4,
1303                 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, last_wch);
1304       return;
1305     }
1306
1307   if (!ignore_content)
1308     while (++last_wch <= now->val.ucs4)
1309       {
1310         /* We have to find out whether there is a byte sequence corresponding
1311            to this UCS4 value.  */
1312         struct charseq *seq = repertoire_find_seq (repertoire, last_wch);
1313
1314         /* If this is the first time we look for this sequence create a new
1315            entry.  */
1316         if (seq == NULL)
1317           {
1318             /* Find the symbolic name for this UCS4 value.  */
1319             const char *symbol = repertoire_find_symbol (repertoire, last_wch);
1320             uint32_t *newp = obstack_alloc (&repertoire->mem_pool, 4);
1321             *newp = last_wch;
1322
1323             if (symbol != NULL)
1324               /* We have a name, now search the multibyte value.  */
1325               seq = charmap_find_value (charmap, symbol, strlen (symbol));
1326
1327             if (seq == NULL)
1328               {
1329                 /* We have to create a fake entry.  */
1330                 static const struct charseq negative
1331                   = { .ucs4 = ILLEGAL_CHAR_VALUE };
1332                 seq = (struct charseq *) &negative;
1333               }
1334             else
1335               seq->ucs4 = last_wch;
1336
1337             insert_entry (&repertoire->seq_table, newp, 4, seq);
1338           }
1339
1340         /* We have a name, now search the multibyte value.  */
1341         if (seq->ucs4 == last_wch && seq->nbytes == 1)
1342           /* Yep, we can store information about this byte sequence.  */
1343           ctype->class256_collection[(size_t) seq->bytes[0]]
1344             |= class256_bit;
1345
1346         /* And of course we have the UCS4 position.  */
1347         if (class_bit != 0 && class_bit != 0)
1348           *find_idx (ctype, &ctype->class_collection,
1349                      &ctype->class_collection_max,
1350                      &ctype->class_collection_act, last_wch) |= class_bit;
1351
1352         if (handle_digits == 1)
1353           {
1354             /* We must store the digit values.  */
1355             if (ctype->mbdigits_act == ctype->mbdigits_max)
1356               {
1357                 ctype->mbdigits_max *= 2;
1358                 ctype->mbdigits = xrealloc (ctype->mbdigits,
1359                                             (ctype->mbdigits_max
1360                                              * sizeof (char *)));
1361                 ctype->wcdigits_max *= 2;
1362                 ctype->wcdigits = xrealloc (ctype->wcdigits,
1363                                             (ctype->wcdigits_max
1364                                              * sizeof (uint32_t)));
1365               }
1366
1367             ctype->mbdigits[ctype->mbdigits_act++] = (seq->ucs4 == last_wch
1368                                                       ? seq : NULL);
1369             ctype->wcdigits[ctype->wcdigits_act++] = last_wch;
1370           }
1371         else if (handle_digits == 2)
1372           {
1373             /* We must store the digit values.  */
1374             if (ctype->outdigits_act >= 10)
1375               {
1376                 lr_error (ldfile, _("\
1377 %s: field `%s' does not contain exactly ten entries"),
1378                           "LC_CTYPE", "outdigit");
1379                 return;
1380               }
1381
1382             ctype->mboutdigits[ctype->outdigits_act] = (seq->ucs4 == last_wch
1383                                                         ? seq : NULL);
1384             ctype->wcoutdigits[ctype->outdigits_act] = last_wch;
1385             ++ctype->outdigits_act;
1386           }
1387       }
1388 }
1389
1390
1391 /* Ellipsis as in `/xea/x12.../xea/x34'.  */
1392 static void
1393 charclass_charcode_ellipsis (struct linereader *ldfile,
1394                              struct locale_ctype_t *ctype,
1395                              struct charmap_t *charmap,
1396                              struct repertoire_t *repertoire,
1397                              struct token *now, char *last_charcode,
1398                              uint32_t last_charcode_len,
1399                              unsigned long int class256_bit,
1400                              unsigned long int class_bit, int ignore_content,
1401                              int handle_digits)
1402 {
1403   /* First check whether the to-value is larger.  */
1404   if (now->val.charcode.nbytes != last_charcode_len)
1405     {
1406       lr_error (ldfile, _("\
1407 start end end character sequence of range must have the same length"));
1408       return;
1409     }
1410
1411   if (memcmp (last_charcode, now->val.charcode.bytes, last_charcode_len) > 0)
1412     {
1413       lr_error (ldfile, _("\
1414 to-value character sequence is smaller than from-value sequence"));
1415       return;
1416     }
1417
1418   if (!ignore_content)
1419     {
1420       do
1421         {
1422           /* Increment the byte sequence value.  */
1423           struct charseq *seq;
1424           uint32_t wch;
1425           int i;
1426
1427           for (i = last_charcode_len - 1; i >= 0; --i)
1428             if (++last_charcode[i] != 0)
1429               break;
1430
1431           if (last_charcode_len == 1)
1432             /* Of course we have the charcode value.  */
1433             ctype->class256_collection[(size_t) last_charcode[0]]
1434               |= class256_bit;
1435
1436           /* Find the symbolic name.  */
1437           seq = charmap_find_symbol (charmap, last_charcode,
1438                                      last_charcode_len);
1439           if (seq != NULL)
1440             {
1441               if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1442                 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1443                                                    strlen (seq->name));
1444               wch = seq->ucs4;
1445
1446               if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1447                 *find_idx (ctype, &ctype->class_collection,
1448                            &ctype->class_collection_max,
1449                            &ctype->class_collection_act, wch) |= class_bit;
1450             }
1451           else
1452             wch = ILLEGAL_CHAR_VALUE;
1453
1454           if (handle_digits == 1)
1455             {
1456               /* We must store the digit values.  */
1457               if (ctype->mbdigits_act == ctype->mbdigits_max)
1458                 {
1459                   ctype->mbdigits_max *= 2;
1460                   ctype->mbdigits = xrealloc (ctype->mbdigits,
1461                                               (ctype->mbdigits_max
1462                                                * sizeof (char *)));
1463                   ctype->wcdigits_max *= 2;
1464                   ctype->wcdigits = xrealloc (ctype->wcdigits,
1465                                               (ctype->wcdigits_max
1466                                                * sizeof (uint32_t)));
1467                 }
1468
1469               seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1470               memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1471               seq->nbytes = last_charcode_len;
1472
1473               ctype->mbdigits[ctype->mbdigits_act++] = seq;
1474               ctype->wcdigits[ctype->wcdigits_act++] = wch;
1475             }
1476           else if (handle_digits == 2)
1477             {
1478               struct charseq *seq;
1479               /* We must store the digit values.  */
1480               if (ctype->outdigits_act >= 10)
1481                 {
1482                   lr_error (ldfile, _("\
1483 %s: field `%s' does not contain exactly ten entries"),
1484                             "LC_CTYPE", "outdigit");
1485                   return;
1486                 }
1487
1488               seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1489               memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1490               seq->nbytes = last_charcode_len;
1491
1492               ctype->mboutdigits[ctype->outdigits_act] = seq;
1493               ctype->wcoutdigits[ctype->outdigits_act] = wch;
1494               ++ctype->outdigits_act;
1495             }
1496         }
1497       while (memcmp (last_charcode, now->val.charcode.bytes,
1498                      last_charcode_len) != 0);
1499     }
1500 }
1501
1502
1503 /* Read one transliteration entry.  */
1504 static uint32_t *
1505 read_widestring (struct linereader *ldfile, struct token *now,
1506                  struct charmap_t *charmap, struct repertoire_t *repertoire)
1507 {
1508   uint32_t *wstr;
1509
1510   if (now->tok == tok_default_missing)
1511     /* The special name "" will denote this case.  */
1512     wstr = (uint32_t *) L"";
1513   else if (now->tok == tok_bsymbol)
1514     {
1515       /* Get the value from the repertoire.  */
1516       wstr = xmalloc (2 * sizeof (uint32_t));
1517       wstr[0] = repertoire_find_value (repertoire, now->val.str.startmb,
1518                                        now->val.str.lenmb);
1519       if (wstr[0] == ILLEGAL_CHAR_VALUE)
1520         /* We cannot proceed, we don't know the UCS4 value.  */
1521         return NULL;
1522
1523       wstr[1] = 0;
1524     }
1525   else if (now->tok == tok_ucs4)
1526     {
1527       wstr = xmalloc (2 * sizeof (uint32_t));
1528       wstr[0] = now->val.ucs4;
1529       wstr[1] = 0;
1530     }
1531   else if (now->tok == tok_charcode)
1532     {
1533       /* Argh, we have to convert to the symbol name first and then to the
1534          UCS4 value.  */
1535       struct charseq *seq = charmap_find_symbol (charmap,
1536                                                  now->val.str.startmb,
1537                                                  now->val.str.lenmb);
1538       if (seq == NULL)
1539         /* Cannot find the UCS4 value.  */
1540         return NULL;
1541
1542       if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1543         seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1544                                            strlen (seq->name));
1545       if (seq->ucs4 == ILLEGAL_CHAR_VALUE)
1546         /* We cannot proceed, we don't know the UCS4 value.  */
1547         return NULL;
1548
1549       wstr = xmalloc (2 * sizeof (uint32_t));
1550       wstr[0] = seq->ucs4;
1551       wstr[1] = 0;
1552     }
1553   else if (now->tok == tok_string)
1554     {
1555       wstr = now->val.str.startwc;
1556       if (wstr[0] == 0)
1557         return NULL;
1558     }
1559   else
1560     {
1561       if (now->tok != tok_eol && now->tok != tok_eof)
1562         lr_ignore_rest (ldfile, 0);
1563       SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
1564       return (uint32_t *) -1l;
1565     }
1566
1567   return wstr;
1568 }
1569
1570
1571 static void
1572 read_translit_entry (struct linereader *ldfile, struct locale_ctype_t *ctype,
1573                      struct token *now, struct charmap_t *charmap,
1574                      struct repertoire_t *repertoire)
1575 {
1576   uint32_t *from_wstr = read_widestring (ldfile, now, charmap, repertoire);
1577   struct translit_t *result;
1578   struct translit_to_t **top;
1579   struct obstack *ob = &ctype->mem_pool;
1580   int first;
1581   int ignore;
1582
1583   if (from_wstr == NULL)
1584     /* There is no valid from string.  */
1585     return;
1586
1587   result = (struct translit_t *) obstack_alloc (ob,
1588                                                 sizeof (struct translit_t));
1589   result->from = from_wstr;
1590   result->next = NULL;
1591   result->to = NULL;
1592   top = &result->to;
1593   first = 1;
1594   ignore = 0;
1595
1596   while (1)
1597     {
1598       uint32_t *to_wstr;
1599
1600       /* Next we have one or more transliterations.  They are
1601          separated by semicolons.  */
1602       now = lr_token (ldfile, charmap, repertoire);
1603
1604       if (!first && (now->tok == tok_semicolon || now->tok == tok_eol))
1605         {
1606           /* One string read.  */
1607           const uint32_t zero = 0;
1608
1609           if (!ignore)
1610             {
1611               obstack_grow (ob, &zero, 4);
1612               to_wstr = obstack_finish (ob);
1613
1614               *top = obstack_alloc (ob, sizeof (struct translit_to_t));
1615               (*top)->str = to_wstr;
1616               (*top)->next = NULL;
1617             }
1618
1619           if (now->tok == tok_eol)
1620             {
1621               result->next = ctype->translit;
1622               ctype->translit = result;
1623               return;
1624             }
1625
1626           if (!ignore)
1627             top = &(*top)->next;
1628           ignore = 0;
1629         }
1630       else
1631         {
1632           to_wstr = read_widestring (ldfile, now, charmap, repertoire);
1633           if (to_wstr == (uint32_t *) -1l)
1634             {
1635               /* An error occurred.  */
1636               obstack_free (ob, result);
1637               return;
1638             }
1639
1640           if (to_wstr == NULL)
1641             ignore = 1;
1642           else
1643             /* This value is usable.  */
1644             obstack_grow (ob, to_wstr, wcslen ((wchar_t *) to_wstr) * 4);
1645
1646           first = 0;
1647         }
1648     }
1649 }
1650
1651
1652 /* The parser for the LC_CTYPE section of the locale definition.  */
1653 void
1654 ctype_read (struct linereader *ldfile, struct localedef_t *result,
1655             struct charmap_t *charmap, const char *repertoire_name,
1656             int ignore_content)
1657 {
1658   struct repertoire_t *repertoire = NULL;
1659   struct locale_ctype_t *ctype;
1660   struct token *now;
1661   enum token_t nowtok;
1662   size_t cnt;
1663   struct charseq *last_seq;
1664   uint32_t last_wch = 0;
1665   enum token_t last_token;
1666   enum token_t ellipsis_token;
1667   char last_charcode[16];
1668   size_t last_charcode_len = 0;
1669   const char *last_str = NULL;
1670   int mapidx;
1671
1672   /* Get the repertoire we have to use.  */
1673   if (repertoire_name != NULL)
1674     repertoire = repertoire_read (repertoire_name);
1675
1676   /* The rest of the line containing `LC_CTYPE' must be free.  */
1677   lr_ignore_rest (ldfile, 1);
1678
1679
1680   do
1681     {
1682       now = lr_token (ldfile, charmap, NULL);
1683       nowtok = now->tok;
1684     }
1685   while (nowtok == tok_eol);
1686
1687   /* If we see `copy' now we are almost done.  */
1688   if (nowtok == tok_copy)
1689     {
1690       handle_copy (ldfile, charmap, repertoire, result, tok_lc_ctype, LC_CTYPE,
1691                    "LC_CTYPE", ignore_content);
1692       return;
1693     }
1694
1695   /* Prepare the data structures.  */
1696   ctype_startup (ldfile, result, charmap, ignore_content);
1697   ctype = result->categories[LC_CTYPE].ctype;
1698
1699   /* Remember the repertoire we use.  */
1700   if (!ignore_content)
1701     ctype->repertoire = repertoire;
1702
1703   while (1)
1704     {
1705       unsigned long int class_bit = 0;
1706       unsigned long int class256_bit = 0;
1707       int handle_digits = 0;
1708
1709       /* Of course we don't proceed beyond the end of file.  */
1710       if (nowtok == tok_eof)
1711         break;
1712
1713       /* Ingore empty lines.  */
1714       if (nowtok == tok_eol)
1715         {
1716           now = lr_token (ldfile, charmap, NULL);
1717           nowtok = now->tok;
1718           continue;
1719         }
1720
1721       switch (nowtok)
1722         {
1723         case tok_charclass:
1724           now = lr_token (ldfile, charmap, NULL);
1725           while (now->tok == tok_ident || now->tok == tok_string)
1726             {
1727               ctype_class_new (ldfile, ctype, now->val.str.startmb);
1728               now = lr_token (ldfile, charmap, NULL);
1729               if (now->tok != tok_semicolon)
1730                 break;
1731               now = lr_token (ldfile, charmap, NULL);
1732             }
1733           if (now->tok != tok_eol)
1734             SYNTAX_ERROR (_("\
1735 %s: syntax error in definition of new character class"), "LC_CTYPE");
1736           break;
1737
1738         case tok_charconv:
1739           now = lr_token (ldfile, charmap, NULL);
1740           while (now->tok == tok_ident || now->tok == tok_string)
1741             {
1742               ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
1743               now = lr_token (ldfile, charmap, NULL);
1744               if (now->tok != tok_semicolon)
1745                 break;
1746               now = lr_token (ldfile, charmap, NULL);
1747             }
1748           if (now->tok != tok_eol)
1749             SYNTAX_ERROR (_("\
1750 %s: syntax error in definition of new character map"), "LC_CTYPE");
1751           break;
1752
1753         case tok_class:
1754           /* Ignore the rest of the line if we don't need the input of
1755              this line.  */
1756           if (ignore_content)
1757             {
1758               lr_ignore_rest (ldfile, 0);
1759               break;
1760             }
1761
1762           /* We simply forget the `class' keyword and use the following
1763              operand to determine the bit.  */
1764           now = lr_token (ldfile, charmap, NULL);
1765           if (now->tok == tok_ident || now->tok == tok_string)
1766             {
1767               /* Must be one of the predefined class names.  */
1768               for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
1769                 if (strcmp (ctype->classnames[cnt], now->val.str.startmb) == 0)
1770                   break;
1771               if (cnt >= ctype->nr_charclass)
1772                 {
1773 #ifdef PREDEFINED_CLASSES
1774                   if (now->val.str.lenmb == 8
1775                       && memcmp ("special1", now->val.str.startmb, 8) == 0)
1776                     class_bit = _ISwspecial1;
1777                   else if (now->val.str.lenmb == 8
1778                       && memcmp ("special2", now->val.str.startmb, 8) == 0)
1779                     class_bit = _ISwspecial2;
1780                   else if (now->val.str.lenmb == 8
1781                       && memcmp ("special3", now->val.str.startmb, 8) == 0)
1782                     class_bit = _ISwspecial3;
1783                   else
1784 #endif
1785                     {
1786                       lr_error (ldfile, _("\
1787 unknown character class `%s' in category `LC_CTYPE'"),
1788                                 now->val.str.startmb);
1789                       free (now->val.str.startmb);
1790
1791                       lr_ignore_rest (ldfile, 0);
1792                       continue;
1793                     }
1794                 }
1795               else
1796                 class_bit = _ISwbit (cnt);
1797
1798               free (now->val.str.startmb);
1799             }
1800           else if (now->tok == tok_digit)
1801             goto handle_tok_digit;
1802           else if (now->tok < tok_upper || now->tok > tok_blank)
1803             goto err_label;
1804           else
1805             {
1806               class_bit = BITw (now->tok);
1807               class256_bit = BIT (now->tok);
1808             }
1809
1810           /* The next character must be a semicolon.  */
1811           now = lr_token (ldfile, charmap, NULL);
1812           if (now->tok != tok_semicolon)
1813             goto err_label;
1814           goto read_charclass;
1815
1816         case tok_upper:
1817         case tok_lower:
1818         case tok_alpha:
1819         case tok_alnum:
1820         case tok_space:
1821         case tok_cntrl:
1822         case tok_punct:
1823         case tok_graph:
1824         case tok_print:
1825         case tok_xdigit:
1826         case tok_blank:
1827           /* Ignore the rest of the line if we don't need the input of
1828              this line.  */
1829           if (ignore_content)
1830             {
1831               lr_ignore_rest (ldfile, 0);
1832               break;
1833             }
1834
1835           class_bit = BITw (now->tok);
1836           class256_bit = BIT (now->tok);
1837           handle_digits = 0;
1838         read_charclass:
1839           ctype->class_done |= class_bit;
1840           last_token = tok_none;
1841           ellipsis_token = tok_none;
1842           now = lr_token (ldfile, charmap, NULL);
1843           while (now->tok != tok_eol && now->tok != tok_eof)
1844             {
1845               uint32_t wch;
1846               struct charseq *seq;
1847
1848               if (ellipsis_token == tok_none)
1849                 {
1850                   if (get_character (now, charmap, repertoire, &seq, &wch))
1851                     goto err_label;
1852
1853                   if (!ignore_content && seq != NULL && seq->nbytes == 1)
1854                     /* Yep, we can store information about this byte
1855                        sequence.  */
1856                     ctype->class256_collection[seq->bytes[0]] |= class256_bit;
1857
1858                   if (!ignore_content && wch != ILLEGAL_CHAR_VALUE
1859                       && class_bit != 0)
1860                     /* We have the UCS4 position.  */
1861                     *find_idx (ctype, &ctype->class_collection,
1862                                &ctype->class_collection_max,
1863                                &ctype->class_collection_act, wch) |= class_bit;
1864
1865                   last_token = now->tok;
1866                   /* Terminate the string.  */
1867                   if (last_token == tok_bsymbol)
1868                     {
1869                       now->val.str.startmb[now->val.str.lenmb] = '\0';
1870                       last_str = now->val.str.startmb;
1871                     }
1872                   else
1873                     last_str = NULL;
1874                   last_seq = seq;
1875                   last_wch = wch;
1876                   memcpy (last_charcode, now->val.charcode.bytes, 16);
1877                   last_charcode_len = now->val.charcode.nbytes;
1878
1879                   if (!ignore_content && handle_digits == 1)
1880                     {
1881                       /* We must store the digit values.  */
1882                       if (ctype->mbdigits_act == ctype->mbdigits_max)
1883                         {
1884                           ctype->mbdigits_max += 10;
1885                           ctype->mbdigits = xrealloc (ctype->mbdigits,
1886                                                       (ctype->mbdigits_max
1887                                                        * sizeof (char *)));
1888                           ctype->wcdigits_max += 10;
1889                           ctype->wcdigits = xrealloc (ctype->wcdigits,
1890                                                       (ctype->wcdigits_max
1891                                                        * sizeof (uint32_t)));
1892                         }
1893
1894                       ctype->mbdigits[ctype->mbdigits_act++] = seq;
1895                       ctype->wcdigits[ctype->wcdigits_act++] = wch;
1896                     }
1897                   else if (!ignore_content && handle_digits == 2)
1898                     {
1899                       /* We must store the digit values.  */
1900                       if (ctype->outdigits_act >= 10)
1901                         {
1902                           lr_error (ldfile, _("\
1903 %s: field `%s' does not contain exactly ten entries"),
1904                             "LC_CTYPE", "outdigit");
1905                           goto err_label;
1906                         }
1907
1908                       ctype->mboutdigits[ctype->outdigits_act] = seq;
1909                       ctype->wcoutdigits[ctype->outdigits_act] = wch;
1910                       ++ctype->outdigits_act;
1911                     }
1912                 }
1913               else
1914                 {
1915                   /* Now it gets complicated.  We have to resolve the
1916                      ellipsis problem.  First we must distinguish between
1917                      the different kind of ellipsis and this must match the
1918                      tokens we have seen.  */
1919                   assert (last_token != tok_none);
1920
1921                   if (last_token != now->tok)
1922                     {
1923                       lr_error (ldfile, _("\
1924 ellipsis range must be marked by two operands of same type"));
1925                       lr_ignore_rest (ldfile, 0);
1926                       break;
1927                     }
1928
1929                   if (last_token == tok_bsymbol)
1930                     {
1931                       if (ellipsis_token == tok_ellipsis3)
1932                         lr_error (ldfile, _("with symbolic name range values \
1933 the absolute ellipsis `...' must not be used"));
1934
1935                       charclass_symbolic_ellipsis (ldfile, ctype, charmap,
1936                                                    repertoire, now, last_str,
1937                                                    class256_bit, class_bit,
1938                                                    (ellipsis_token
1939                                                     == tok_ellipsis4
1940                                                     ? 10 : 16),
1941                                                    ignore_content,
1942                                                    handle_digits);
1943                     }
1944                   else if (last_token == tok_ucs4)
1945                     {
1946                       if (ellipsis_token != tok_ellipsis2)
1947                         lr_error (ldfile, _("\
1948 with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
1949
1950                       charclass_ucs4_ellipsis (ldfile, ctype, charmap,
1951                                                repertoire, now, last_wch,
1952                                                class256_bit, class_bit,
1953                                                ignore_content, handle_digits);
1954                     }
1955                   else
1956                     {
1957                       assert (last_token == tok_charcode);
1958
1959                       if (ellipsis_token != tok_ellipsis3)
1960                         lr_error (ldfile, _("\
1961 with character code range values one must use the absolute ellipsis `...'"));
1962
1963                       charclass_charcode_ellipsis (ldfile, ctype, charmap,
1964                                                    repertoire, now,
1965                                                    last_charcode,
1966                                                    last_charcode_len,
1967                                                    class256_bit, class_bit,
1968                                                    ignore_content,
1969                                                    handle_digits);
1970                     }
1971
1972                   /* Now we have used the last value.  */
1973                   last_token = tok_none;
1974                 }
1975
1976               /* Next we expect a semicolon or the end of the line.  */
1977               now = lr_token (ldfile, charmap, NULL);
1978               if (now->tok == tok_eol || now->tok == tok_eof)
1979                 break;
1980
1981               if (last_token != tok_none
1982                   && now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4)
1983                 {
1984                   ellipsis_token = now->tok;
1985                   now = lr_token (ldfile, charmap, NULL);
1986                   continue;
1987                 }
1988
1989               if (now->tok != tok_semicolon)
1990                 goto err_label;
1991
1992               /* And get the next character.  */
1993               now = lr_token (ldfile, charmap, NULL);
1994
1995               ellipsis_token = tok_none;
1996             }
1997           break;
1998
1999         case tok_digit:
2000           /* Ignore the rest of the line if we don't need the input of
2001              this line.  */
2002           if (ignore_content)
2003             {
2004               lr_ignore_rest (ldfile, 0);
2005               break;
2006             }
2007
2008         handle_tok_digit:
2009           class_bit = _ISwdigit;
2010           class256_bit = _ISdigit;
2011           handle_digits = 1;
2012           goto read_charclass;
2013
2014         case tok_outdigit:
2015           /* Ignore the rest of the line if we don't need the input of
2016              this line.  */
2017           if (ignore_content)
2018             {
2019               lr_ignore_rest (ldfile, 0);
2020               break;
2021             }
2022
2023           if (ctype->outdigits_act != 0)
2024             lr_error (ldfile, _("\
2025 %s: field `%s' declared more than once"),
2026                       "LC_CTYPE", "outdigit");
2027           class_bit = 0;
2028           class256_bit = 0;
2029           handle_digits = 2;
2030           goto read_charclass;
2031
2032         case tok_toupper:
2033           /* Ignore the rest of the line if we don't need the input of
2034              this line.  */
2035           if (ignore_content)
2036             {
2037               lr_ignore_rest (ldfile, 0);
2038               break;
2039             }
2040
2041           mapidx = 0;
2042           goto read_mapping;
2043
2044         case tok_tolower:
2045           /* Ignore the rest of the line if we don't need the input of
2046              this line.  */
2047           if (ignore_content)
2048             {
2049               lr_ignore_rest (ldfile, 0);
2050               break;
2051             }
2052
2053           mapidx = 1;
2054           goto read_mapping;
2055
2056         case tok_map:
2057           /* Ignore the rest of the line if we don't need the input of
2058              this line.  */
2059           if (ignore_content)
2060             {
2061               lr_ignore_rest (ldfile, 0);
2062               break;
2063             }
2064
2065           /* We simply forget the `map' keyword and use the following
2066              operand to determine the mapping.  */
2067           now = lr_token (ldfile, charmap, NULL);
2068           if (now->tok == tok_ident || now->tok == tok_string)
2069             {
2070               size_t cnt;
2071
2072               for (cnt = 2; cnt < ctype->map_collection_nr; ++cnt)
2073                 if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2074                   break;
2075
2076               if (cnt < ctype->map_collection_nr)
2077                 mapidx = cnt;
2078               else
2079                 {
2080                   lr_error (ldfile, _("unknown map `%s'"),
2081                             now->val.str.startmb);
2082                   lr_ignore_rest (ldfile, 0);
2083                   break;
2084                 }
2085             }
2086           else if (now->tok < tok_toupper || now->tok > tok_tolower)
2087             goto err_label;
2088           else
2089             mapidx = now->tok - tok_toupper;
2090
2091           now = lr_token (ldfile, charmap, NULL);
2092           /* This better should be a semicolon.  */
2093           if (now->tok != tok_semicolon)
2094             goto err_label;
2095
2096         read_mapping:
2097           /* Test whether this mapping was already defined.  */
2098           if (ctype->tomap_done[mapidx])
2099             {
2100               lr_error (ldfile, _("duplicated definition for mapping `%s'"),
2101                         ctype->mapnames[mapidx]);
2102               lr_ignore_rest (ldfile, 0);
2103               break;
2104             }
2105           ctype->tomap_done[mapidx] = 1;
2106
2107           now = lr_token (ldfile, charmap, NULL);
2108           while (now->tok != tok_eol && now->tok != tok_eof)
2109             {
2110               struct charseq *from_seq;
2111               uint32_t from_wch;
2112               struct charseq *to_seq;
2113               uint32_t to_wch;
2114
2115               /* Every pair starts with an opening brace.  */
2116               if (now->tok != tok_open_brace)
2117                 goto err_label;
2118
2119               /* Next comes the from-value.  */
2120               now = lr_token (ldfile, charmap, NULL);
2121               if (get_character (now, charmap, repertoire, &from_seq,
2122                                  &from_wch) != 0)
2123                 goto err_label;
2124
2125               /* The next is a comma.  */
2126               now = lr_token (ldfile, charmap, NULL);
2127               if (now->tok != tok_comma)
2128                 goto err_label;
2129
2130               /* And the other value.  */
2131               now = lr_token (ldfile, charmap, NULL);
2132               if (get_character (now, charmap, repertoire, &to_seq,
2133                                  &to_wch) != 0)
2134                 goto err_label;
2135
2136               /* And the last thing is the closing brace.  */
2137               now = lr_token (ldfile, charmap, NULL);
2138               if (now->tok != tok_close_brace)
2139                 goto err_label;
2140
2141               if (!ignore_content)
2142                 {
2143                   if (mapidx < 2 && from_seq != NULL && to_seq != NULL
2144                       && from_seq->nbytes == 1 && to_seq->nbytes == 1)
2145                     /* We can use this value.  */
2146                     ctype->map256_collection[mapidx][from_seq->bytes[0]]
2147                       = to_seq->bytes[0];
2148
2149                   if (from_wch != ILLEGAL_CHAR_VALUE
2150                       && to_wch != ILLEGAL_CHAR_VALUE)
2151                     /* Both correct values.  */
2152                     *find_idx (ctype, &ctype->map_collection[mapidx],
2153                                &ctype->map_collection_max[mapidx],
2154                                &ctype->map_collection_act[mapidx],
2155                                from_wch) = to_wch;
2156                 }
2157
2158               /* Now comes a semicolon or the end of the line/file.  */
2159               now = lr_token (ldfile, charmap, NULL);
2160               if (now->tok == tok_semicolon)
2161                 now = lr_token (ldfile, charmap, NULL);
2162             }
2163           break;
2164
2165         case tok_translit_start:
2166           /* Ignore the rest of the line if we don't need the input of
2167              this line.  */
2168           if (ignore_content)
2169             {
2170               lr_ignore_rest (ldfile, 0);
2171               break;
2172             }
2173
2174           /* The rest of the line better should be empty.  */
2175           lr_ignore_rest (ldfile, 1);
2176
2177           /* We count here the number of allocated entries in the `translit'
2178              array.  */
2179           cnt = 0;
2180
2181           /* We proceed until we see the `translit_end' token.  */
2182           while (now = lr_token (ldfile, charmap, repertoire),
2183                  now->tok != tok_translit_end && now->tok != tok_eof)
2184             {
2185               if (now->tok == tok_eol)
2186                 /* Ignore empty lines.  */
2187                 continue;
2188
2189               if (now->tok == tok_translit_end)
2190                 {
2191                   lr_ignore_rest (ldfile, 0);
2192                   break;
2193                 }
2194
2195               if (now->tok == tok_include)
2196                 {
2197                   /* We have to include locale.  */
2198                   const char *locale_name;
2199                   const char *repertoire_name;
2200
2201                   now = lr_token (ldfile, charmap, NULL);
2202                   /* This should be a string or an identifier.  In any
2203                      case something to name a locale.  */
2204                   if (now->tok != tok_string && now->tok != tok_ident)
2205                     {
2206                     translit_syntax:
2207                       lr_error (ldfile, _("%s: syntax error"), "LC_CTYPE");
2208                       lr_ignore_rest (ldfile, 0);
2209                       continue;
2210                     }
2211                   locale_name = now->val.str.startmb;
2212
2213                   /* Next should be a semicolon.  */
2214                   now = lr_token (ldfile, charmap, NULL);
2215                   if (now->tok != tok_semicolon)
2216                     goto translit_syntax;
2217
2218                   /* Now the repertoire name.  */
2219                   now = lr_token (ldfile, charmap, NULL);
2220                   if ((now->tok != tok_string && now->tok != tok_ident)
2221                       || now->val.str.startmb == NULL)
2222                     goto translit_syntax;
2223                   repertoire_name = now->val.str.startmb;
2224
2225                   /* We must not have more than one `include'.  */
2226                   if (ctype->translit_copy_locale != NULL)
2227                     {
2228                       lr_error (ldfile, _("\
2229 %s: only one `include' instruction allowed"), "LC_CTYPE");
2230                       lr_ignore_rest (ldfile, 0);
2231                       continue;
2232                     }
2233
2234                   ctype->translit_copy_locale = locale_name;
2235                   ctype->translit_copy_repertoire = repertoire_name;
2236
2237                   /* The rest of the line must be empty.  */
2238                   lr_ignore_rest (ldfile, 1);
2239                   continue;
2240                 }
2241
2242               read_translit_entry (ldfile, ctype, now, charmap, repertoire);
2243             }
2244           break;
2245
2246         case tok_ident:
2247           /* Ignore the rest of the line if we don't need the input of
2248              this line.  */
2249           if (ignore_content)
2250             {
2251               lr_ignore_rest (ldfile, 0);
2252               break;
2253             }
2254
2255           /* This could mean one of several things.  First test whether
2256              it's a character class name.  */
2257           for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2258             if (strcmp (now->val.str.startmb, ctype->classnames[cnt]) == 0)
2259               break;
2260           if (cnt < ctype->nr_charclass)
2261             {
2262               class_bit = _ISwbit (cnt);
2263               class256_bit = cnt <= 11 ? _ISbit (cnt) : 0;
2264               free (now->val.str.startmb);
2265               goto read_charclass;
2266             }
2267           for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
2268             if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2269               break;
2270           if (cnt < ctype->map_collection_nr)
2271             {
2272               mapidx = cnt;
2273               free (now->val.str.startmb);
2274               goto read_mapping;
2275             }
2276 #ifdef PREDEFINED_CLASSES
2277           if (strcmp (now->val.str.startmb, "special1") == 0)
2278             {
2279               class_bit = _ISwspecial1;
2280               free (now->val.str.startmb);
2281               goto read_charclass;
2282             }
2283           if (strcmp (now->val.str.startmb, "special2") == 0)
2284             {
2285               class_bit = _ISwspecial2;
2286               free (now->val.str.startmb);
2287               goto read_charclass;
2288             }
2289           if (strcmp (now->val.str.startmb, "special3") == 0)
2290             {
2291               class_bit = _ISwspecial3;
2292               free (now->val.str.startmb);
2293               goto read_charclass;
2294             }
2295           if (strcmp (now->val.str.startmb, "tosymmetric") == 0)
2296             {
2297               mapidx = 2;
2298               goto read_mapping;
2299             }
2300 #endif
2301           break;
2302
2303         case tok_end:
2304           /* Next we assume `LC_CTYPE'.  */
2305           now = lr_token (ldfile, charmap, NULL);
2306           if (now->tok == tok_eof)
2307             break;
2308           if (now->tok == tok_eol)
2309             lr_error (ldfile, _("%s: incomplete `END' line"),
2310                       "LC_CTYPE");
2311           else if (now->tok != tok_lc_ctype)
2312             lr_error (ldfile, _("\
2313 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2314           lr_ignore_rest (ldfile, now->tok == tok_lc_ctype);
2315           return;
2316
2317         default:
2318         err_label:
2319           if (now->tok != tok_eof)
2320             SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2321         }
2322
2323       /* Prepare for the next round.  */
2324       now = lr_token (ldfile, charmap, NULL);
2325       nowtok = now->tok;
2326     }
2327
2328   /* When we come here we reached the end of the file.  */
2329   lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
2330 }
2331
2332
2333 static void
2334 set_class_defaults (struct locale_ctype_t *ctype, struct charmap_t *charmap,
2335                     struct repertoire_t *repertoire)
2336 {
2337   size_t cnt;
2338
2339   /* These function defines the default values for the classes and conversions
2340      according to POSIX.2 2.5.2.1.
2341      It may seem that the order of these if-blocks is arbitrary but it is NOT.
2342      Don't move them unless you know what you do!  */
2343
2344   void set_default (int bitpos, int from, int to)
2345     {
2346       char tmp[2];
2347       int ch;
2348       int bit = _ISbit (bitpos);
2349       int bitw = _ISwbit (bitpos);
2350       /* Define string.  */
2351       strcpy (tmp, "?");
2352
2353       for (ch = from; ch <= to; ++ch)
2354         {
2355           uint32_t value;
2356           struct charseq *seq;
2357           tmp[0] = ch;
2358
2359           value = repertoire_find_value (repertoire, tmp, 1);
2360           if (value == ILLEGAL_CHAR_VALUE)
2361             {
2362               if (!be_quiet)
2363                 error (0, 0, _("\
2364 %s: character `%s' not defined in repertoire while needed as default value"),
2365                        "LC_CTYPE", tmp);
2366             }
2367           else
2368             ELEM (ctype, class_collection, , value) |= bitw;
2369
2370           seq = charmap_find_value (charmap, tmp, 1);
2371           if (seq == NULL)
2372             {
2373               if (!be_quiet)
2374                 error (0, 0, _("\
2375 %s: character `%s' not defined in charmap while needed as default value"),
2376                        "LC_CTYPE", tmp);
2377             }
2378           else if (seq->nbytes != 1)
2379             error (0, 0, _("\
2380 %s: character `%s' in charmap not representable with one byte"),
2381                    "LC_CTYPE", tmp);
2382           else
2383             ctype->class256_collection[seq->bytes[0]] |= bit;
2384         }
2385     }
2386
2387   /* Set default values if keyword was not present.  */
2388   if ((ctype->class_done & BITw (tok_upper)) == 0)
2389     /* "If this keyword [lower] is not specified, the lowercase letters
2390         `A' through `Z', ..., shall automatically belong to this class,
2391         with implementation defined character values."  [P1003.2, 2.5.2.1]  */
2392     set_default (BITPOS (tok_upper), 'A', 'Z');
2393
2394   if ((ctype->class_done & BITw (tok_lower)) == 0)
2395     /* "If this keyword [lower] is not specified, the lowercase letters
2396         `a' through `z', ..., shall automatically belong to this class,
2397         with implementation defined character values."  [P1003.2, 2.5.2.1]  */
2398     set_default (BITPOS (tok_lower), 'a', 'z');
2399
2400   if ((ctype->class_done & BITw (tok_alpha)) == 0)
2401     {
2402       /* Table 2-6 in P1003.2 says that characters in class `upper' or
2403          class `lower' *must* be in class `alpha'.  */
2404       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
2405       unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower);
2406
2407       for (cnt = 0; cnt < 256; ++cnt)
2408         if ((ctype->class256_collection[cnt] & mask) != 0)
2409           ctype->class256_collection[cnt] |= BIT (tok_alpha);
2410
2411       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
2412         if ((ctype->class_collection[cnt] & maskw) != 0)
2413           ctype->class_collection[cnt] |= BITw (tok_alpha);
2414     }
2415
2416   if ((ctype->class_done & BITw (tok_digit)) == 0)
2417     /* "If this keyword [digit] is not specified, the digits `0' through
2418         `9', ..., shall automatically belong to this class, with
2419         implementation-defined character values."  [P1003.2, 2.5.2.1]  */
2420     set_default (BITPOS (tok_digit), '0', '9');
2421
2422   /* "Only characters specified for the `alpha' and `digit' keyword
2423      shall be specified.  Characters specified for the keyword `alpha'
2424      and `digit' are automatically included in this class.  */
2425   {
2426     unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
2427     unsigned long int maskw = BITw (tok_alpha) | BITw (tok_digit);
2428
2429     for (cnt = 0; cnt < 256; ++cnt)
2430       if ((ctype->class256_collection[cnt] & mask) != 0)
2431         ctype->class256_collection[cnt] |= BIT (tok_alnum);
2432
2433     for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
2434       if ((ctype->class_collection[cnt] & maskw) != 0)
2435         ctype->class_collection[cnt] |= BITw (tok_alnum);
2436   }
2437
2438   if ((ctype->class_done & BITw (tok_space)) == 0)
2439     /* "If this keyword [space] is not specified, the characters <space>,
2440         <form-feed>, <newline>, <carriage-return>, <tab>, and
2441         <vertical-tab>, ..., shall automatically belong to this class,
2442         with implementation-defined character values."  [P1003.2, 2.5.2.1]  */
2443     {
2444       uint32_t value;
2445       struct charseq *seq;
2446
2447       value = repertoire_find_value (repertoire, "space", 5);
2448       if (value == ILLEGAL_CHAR_VALUE)
2449         {
2450           if (!be_quiet)
2451             error (0, 0, _("\
2452 %s: character `%s' not defined while needed as default value"),
2453                    "LC_CTYPE", "<space>");
2454         }
2455       else
2456         ELEM (ctype, class_collection, , value) |= BIT (tok_space);
2457
2458       seq = charmap_find_value (charmap, "space", 5);
2459       if (seq == NULL)
2460         {
2461           if (!be_quiet)
2462             error (0, 0, _("\
2463 %s: character `%s' not defined while needed as default value"),
2464                    "LC_CTYPE", "<space>");
2465         }
2466       else if (seq->nbytes != 1)
2467         error (0, 0, _("\
2468 %s: character `%s' in charmap not representable with one byte"),
2469                "LC_CTYPE", "<space>");
2470       else
2471         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2472
2473
2474       value = repertoire_find_value (repertoire, "form-feed", 9);
2475       if (value == ILLEGAL_CHAR_VALUE)
2476         {
2477           if (!be_quiet)
2478             error (0, 0, _("\
2479 %s: character `%s' not defined while needed as default value"),
2480                    "LC_CTYPE", "<form-feed>");
2481         }
2482       else
2483         ELEM (ctype, class_collection, , value) |= BIT (tok_space);
2484
2485       seq = charmap_find_value (charmap, "form-feed", 9);
2486       if (seq == NULL)
2487         {
2488           if (!be_quiet)
2489             error (0, 0, _("\
2490 %s: character `%s' not defined while needed as default value"),
2491                    "LC_CTYPE", "<form-feed>");
2492         }
2493       else if (seq->nbytes != 1)
2494         error (0, 0, _("\
2495 %s: character `%s' in charmap not representable with one byte"),
2496                "LC_CTYPE", "<form-feed>");
2497       else
2498         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2499
2500
2501       value = repertoire_find_value (repertoire, "newline", 7);
2502       if (value == ILLEGAL_CHAR_VALUE)
2503         {
2504           if (!be_quiet)
2505             error (0, 0, _("\
2506 %s: character `%s' not defined while needed as default value"),
2507                    "LC_CTYPE", "<newline>");
2508         }
2509       else
2510         ELEM (ctype, class_collection, , value) |= BIT (tok_space);
2511
2512       seq = charmap_find_value (charmap, "newline", 7);
2513       if (seq == NULL)
2514         {
2515           if (!be_quiet)
2516             error (0, 0, _("\
2517 character `%s' not defined while needed as default value"),
2518                    "<newline>");
2519         }
2520       else if (seq->nbytes != 1)
2521         error (0, 0, _("\
2522 %s: character `%s' in charmap not representable with one byte"),
2523                "LC_CTYPE", "<newline>");
2524       else
2525         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2526
2527
2528       value = repertoire_find_value (repertoire, "carriage-return", 15);
2529       if (value == ILLEGAL_CHAR_VALUE)
2530         {
2531           if (!be_quiet)
2532             error (0, 0, _("\
2533 %s: character `%s' not defined while needed as default value"),
2534                    "LC_CTYPE", "<carriage-return>");
2535         }
2536       else
2537         ELEM (ctype, class_collection, , value) |= BIT (tok_space);
2538
2539       seq = charmap_find_value (charmap, "carriage-return", 15);
2540       if (seq == NULL)
2541         {
2542           if (!be_quiet)
2543             error (0, 0, _("\
2544 %s: character `%s' not defined while needed as default value"),
2545                    "LC_CTYPE", "<carriage-return>");
2546         }
2547       else if (seq->nbytes != 1)
2548         error (0, 0, _("\
2549 %s: character `%s' in charmap not representable with one byte"),
2550                "LC_CTYPE", "<carriage-return>");
2551       else
2552         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2553
2554
2555       value = repertoire_find_value (repertoire, "tab", 3);
2556       if (value == ILLEGAL_CHAR_VALUE)
2557         {
2558           if (!be_quiet)
2559             error (0, 0, _("\
2560 %s: character `%s' not defined while needed as default value"),
2561                    "LC_CTYPE", "<tab>");
2562         }
2563       else
2564         ELEM (ctype, class_collection, , value) |= BIT (tok_space);
2565
2566       seq = charmap_find_value (charmap, "tab", 3);
2567       if (seq == NULL)
2568         {
2569           if (!be_quiet)
2570             error (0, 0, _("\
2571 %s: character `%s' not defined while needed as default value"),
2572                    "LC_CTYPE", "<tab>");
2573         }
2574       else if (seq->nbytes != 1)
2575         error (0, 0, _("\
2576 %s: character `%s' in charmap not representable with one byte"),
2577                "LC_CTYPE", "<tab>");
2578       else
2579         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2580
2581
2582       value = repertoire_find_value (repertoire, "vertical-tab", 12);
2583       if (value == ILLEGAL_CHAR_VALUE)
2584         {
2585           if (!be_quiet)
2586             error (0, 0, _("\
2587 %s: character `%s' not defined while needed as default value"),
2588                    "LC_CTYPE", "<vertical-tab>");
2589         }
2590       else
2591         ELEM (ctype, class_collection, , value) |= BIT (tok_space);
2592
2593       seq = charmap_find_value (charmap, "vertical-tab", 12);
2594       if (seq == NULL)
2595         {
2596           if (!be_quiet)
2597             error (0, 0, _("\
2598 %s: character `%s' not defined while needed as default value"),
2599                    "LC_CTYPE", "<vertical-tab>");
2600         }
2601       else if (seq->nbytes != 1)
2602         error (0, 0, _("\
2603 %s: character `%s' in charmap not representable with one byte"),
2604                "LC_CTYPE", "<vertical-tab>");
2605       else
2606         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2607     }
2608
2609   if ((ctype->class_done & BITw (tok_xdigit)) == 0)
2610     /* "If this keyword is not specified, the digits `0' to `9', the
2611         uppercase letters `A' through `F', and the lowercase letters `a'
2612         through `f', ..., shell automatically belong to this class, with
2613         implementation defined character values."  [P1003.2, 2.5.2.1]  */
2614     {
2615       set_default (BITPOS (tok_xdigit), '0', '9');
2616       set_default (BITPOS (tok_xdigit), 'A', 'F');
2617       set_default (BITPOS (tok_xdigit), 'a', 'f');
2618     }
2619
2620   if ((ctype->class_done & BITw (tok_blank)) == 0)
2621     /* "If this keyword [blank] is unspecified, the characters <space> and
2622        <tab> shall belong to this character class."  [P1003.2, 2.5.2.1]  */
2623    {
2624       uint32_t value;
2625       struct charseq *seq;
2626
2627       value = repertoire_find_value (repertoire, "space", 5);
2628       if (value == ILLEGAL_CHAR_VALUE)
2629         {
2630           if (!be_quiet)
2631             error (0, 0, _("\
2632 %s: character `%s' not defined while needed as default value"),
2633                    "LC_CTYPE", "<space>");
2634         }
2635       else
2636         ELEM (ctype, class_collection, , value) |= BIT (tok_blank);
2637
2638       seq = charmap_find_value (charmap, "space", 5);
2639       if (seq == NULL)
2640         {
2641           if (!be_quiet)
2642             error (0, 0, _("\
2643 %s: character `%s' not defined while needed as default value"),
2644                    "LC_CTYPE", "<space>");
2645         }
2646       else if (seq->nbytes != 1)
2647         error (0, 0, _("\
2648 %s: character `%s' in charmap not representable with one byte"),
2649                "LC_CTYPE", "<space>");
2650       else
2651         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
2652
2653
2654       value = repertoire_find_value (repertoire, "tab", 3);
2655       if (value == ILLEGAL_CHAR_VALUE)
2656         {
2657           if (!be_quiet)
2658             error (0, 0, _("\
2659 %s: character `%s' not defined while needed as default value"),
2660                    "LC_CTYPE", "<tab>");
2661         }
2662       else
2663         ELEM (ctype, class_collection, , value) |= BIT (tok_blank);
2664
2665       seq = charmap_find_value (charmap, "tab", 3);
2666       if (seq == NULL)
2667         {
2668           if (!be_quiet)
2669             error (0, 0, _("\
2670 %s: character `%s' not defined while needed as default value"),
2671                    "LC_CTYPE", "<tab>");
2672         }
2673       else if (seq->nbytes != 1)
2674         error (0, 0, _("\
2675 %s: character `%s' in charmap not representable with one byte"),
2676                "LC_CTYPE", "<tab>");
2677       else
2678         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
2679     }
2680
2681   if ((ctype->class_done & BITw (tok_graph)) == 0)
2682     /* "If this keyword [graph] is not specified, characters specified for
2683         the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
2684         shall belong to this character class."  [P1003.2, 2.5.2.1]  */
2685     {
2686       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
2687         BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
2688       size_t cnt;
2689
2690       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
2691         if ((ctype->class_collection[cnt] & mask) != 0)
2692           ctype->class_collection[cnt] |= BIT (tok_graph);
2693
2694       for (cnt = 0; cnt < 256; ++cnt)
2695         if ((ctype->class256_collection[cnt] & mask) != 0)
2696           ctype->class256_collection[cnt] |= BIT (tok_graph);
2697     }
2698
2699   if ((ctype->class_done & BITw (tok_print)) == 0)
2700     /* "If this keyword [print] is not provided, characters specified for
2701         the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
2702         and the <space> character shall belong to this character class."
2703         [P1003.2, 2.5.2.1]  */
2704     {
2705       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
2706         BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
2707       size_t cnt;
2708       uint32_t space;
2709       struct charseq *seq;
2710
2711       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
2712         if ((ctype->class_collection[cnt] & mask) != 0)
2713           ctype->class_collection[cnt] |= BIT (tok_print);
2714
2715       for (cnt = 0; cnt < 256; ++cnt)
2716         if ((ctype->class256_collection[cnt] & mask) != 0)
2717           ctype->class256_collection[cnt] |= BIT (tok_print);
2718
2719
2720       space = repertoire_find_value (repertoire, "space", 5);
2721       if (space == ILLEGAL_CHAR_VALUE)
2722         {
2723           if (!be_quiet)
2724             error (0, 0, _("\
2725 %s: character `%s' not defined while needed as default value"),
2726                    "LC_CTYPE", "<space>");
2727         }
2728       else
2729         ELEM (ctype, class_collection, , space) |= BIT (tok_print);
2730
2731       seq = charmap_find_value (charmap, "space", 5);
2732       if (seq == NULL)
2733         {
2734           if (!be_quiet)
2735             error (0, 0, _("\
2736 %s: character `%s' not defined while needed as default value"),
2737                    "LC_CTYPE", "<space>");
2738         }
2739       else if (seq->nbytes != 1)
2740         error (0, 0, _("\
2741 %s: character `%s' in charmap not representable with one byte"),
2742                "LC_CTYPE", "<space>");
2743       else
2744         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_print);
2745     }
2746
2747   if (ctype->tomap_done[0] == 0)
2748     /* "If this keyword [toupper] is not specified, the lowercase letters
2749         `a' through `z', and their corresponding uppercase letters `A' to
2750         `Z', ..., shall automatically be included, with implementation-
2751         defined character values."  [P1003.2, 2.5.2.1]  */
2752     {
2753       char tmp[4];
2754       int ch;
2755
2756       strcpy (tmp, "<?>");
2757
2758       for (ch = 'a'; ch <= 'z'; ++ch)
2759         {
2760           uint32_t value_from, value_to;
2761           struct charseq *seq_from, *seq_to;
2762
2763           tmp[1] = (char) ch;
2764
2765           value_from = repertoire_find_value (repertoire, &tmp[1], 1);
2766           if (value_from == ILLEGAL_CHAR_VALUE)
2767             {
2768               if (!be_quiet)
2769                 error (0, 0, _("\
2770 %s: character `%s' not defined while needed as default value"),
2771                        "LC_CTYPE", tmp);
2772             }
2773           else
2774             {
2775               /* This conversion is implementation defined.  */
2776               tmp[1] = (char) (ch + ('A' - 'a'));
2777               value_to = repertoire_find_value (repertoire, &tmp[1], 1);
2778               if (value_to == ILLEGAL_CHAR_VALUE)
2779                 {
2780                   if (!be_quiet)
2781                     error (0, 0, _("\
2782 %s: character `%s' not defined while needed as default value"),
2783                            "LC_CTYPE", tmp);
2784                 }
2785               else
2786                 /* The index [0] is determined by the order of the
2787                    `ctype_map_newP' calls in `ctype_startup'.  */
2788                 ELEM (ctype, map_collection, [0], value_from) = value_to;
2789             }
2790
2791           seq_from = charmap_find_value (charmap, &tmp[1], 1);
2792           if (seq_from == NULL)
2793             {
2794               if (!be_quiet)
2795                 error (0, 0, _("\
2796 %s: character `%s' not defined while needed as default value"),
2797                        "LC_CTYPE", tmp);
2798             }
2799           else if (seq_from->nbytes != 1)
2800             {
2801               if (!be_quiet)
2802                 error (0, 0, _("\
2803 %s: character `%s' needed as default value not representable with one byte"),
2804                        "LC_CTYPE", tmp);
2805             }
2806           else
2807             {
2808               /* This conversion is implementation defined.  */
2809               tmp[1] = (char) (ch + ('A' - 'a'));
2810               seq_to = charmap_find_value (charmap, &tmp[1], 1);
2811               if (seq_to == NULL)
2812                 {
2813                   if (!be_quiet)
2814                     error (0, 0, _("\
2815 %s: character `%s' not defined while needed as default value"),
2816                            "LC_CTYPE", tmp);
2817                 }
2818               else if (seq_to->nbytes != 1)
2819                 {
2820                   if (!be_quiet)
2821                     error (0, 0, _("\
2822 %s: character `%s' needed as default value not representable with one byte"),
2823                            "LC_CTYPE", tmp);
2824                 }
2825               else
2826                 /* The index [0] is determined by the order of the
2827                    `ctype_map_newP' calls in `ctype_startup'.  */
2828                 ctype->map256_collection[0][seq_from->bytes[0]]
2829                   = seq_to->bytes[0];
2830             }
2831         }
2832     }
2833
2834   if (ctype->tomap_done[1] == 0)
2835     /* "If this keyword [tolower] is not specified, the mapping shall be
2836        the reverse mapping of the one specified to `toupper'."  [P1003.2]  */
2837     {
2838       for (cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
2839         if (ctype->map_collection[0][cnt] != 0)
2840           ELEM (ctype, map_collection, [1],
2841                 ctype->map_collection[0][cnt])
2842             = ctype->charnames[cnt];
2843
2844       for (cnt = 0; cnt < 256; ++cnt)
2845         if (ctype->map256_collection[0][cnt] != 0)
2846           ctype->map_collection[1][ctype->map_collection[0][cnt]]
2847             = ctype->charnames[cnt];
2848     }
2849
2850   if (ctype->outdigits_act == 0)
2851     {
2852       for (cnt = 0; cnt < 10; ++cnt)
2853         {
2854           ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
2855                                                          digits + cnt, 1);
2856
2857           if (ctype->mboutdigits[cnt] == NULL)
2858             {
2859               ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
2860                                                              longnames[cnt],
2861                                                              strlen (longnames[cnt]));
2862
2863               if (ctype->mboutdigits[cnt] == NULL)
2864                 {
2865                   /* Provide a replacement.  */
2866                   error (0, 0, _("\
2867 no output digits defined and none of the standard names in the charmap"));
2868
2869                   ctype->mboutdigits[cnt] = obstack_alloc (&charmap->mem_pool,
2870                                                            sizeof (struct charseq) + 1);
2871
2872                   /* This is better than nothing.  */
2873                   ctype->mboutdigits[cnt]->bytes[0] = digits[cnt];
2874                   ctype->mboutdigits[cnt]->nbytes = 1;
2875                 }
2876             }
2877
2878           ctype->wcoutdigits[cnt] = repertoire_find_value (repertoire,
2879                                                            digits + cnt, 1);
2880
2881           if (ctype->wcoutdigits[cnt] == ILLEGAL_CHAR_VALUE)
2882             {
2883               ctype->wcoutdigits[cnt] = repertoire_find_value (repertoire,
2884                                                                longnames[cnt],
2885                                                                strlen (longnames[cnt]));
2886
2887               if (ctype->wcoutdigits[cnt] == ILLEGAL_CHAR_VALUE)
2888                 {
2889                   /* Provide a replacement.  */
2890                   error (0, 0, _("\
2891 no output digits defined and none of the standard names in the repertoire"));
2892
2893                   /* This is better than nothing.  */
2894                   ctype->wcoutdigits[cnt] = (uint32_t) digits[cnt];
2895                 }
2896             }
2897         }
2898
2899       ctype->outdigits_act = 10;
2900     }
2901 }
2902
2903
2904 static void
2905 allocate_arrays (struct locale_ctype_t *ctype, struct charmap_t *charmap,
2906                  struct repertoire_t *repertoire)
2907 {
2908   size_t idx;
2909
2910   /* First we have to decide how we organize the arrays.  It is easy
2911      for a one-byte character set.  But multi-byte character set
2912      cannot be stored flat because the chars might be sparsely used.
2913      So we determine an optimal hashing function for the used
2914      characters.
2915
2916      We use a very trivial hashing function to store the sparse
2917      table.  CH % TABSIZE is used as an index.  To solve multiple hits
2918      we have N planes.  This guarantees a fixed search time for a
2919      character [N / 2].  In the following code we determine the minimum
2920      value for TABSIZE * N, where TABSIZE >= 256.  */
2921   size_t min_total = UINT_MAX;
2922   size_t act_size = 256;
2923
2924   if (!be_quiet)
2925     fputs (_("\
2926 Computing table size for character classes might take a while..."),
2927            stderr);
2928
2929   while (act_size < min_total)
2930     {
2931       size_t cnt[act_size];
2932       size_t act_planes = 1;
2933
2934       memset (cnt, '\0', sizeof cnt);
2935
2936       for (idx = 0; idx < 256; ++idx)
2937         cnt[idx] = 1;
2938
2939       for (idx = 0; idx < ctype->charnames_act; ++idx)
2940         if (ctype->charnames[idx] >= 256)
2941           {
2942             size_t nr = ctype->charnames[idx] % act_size;
2943
2944             if (++cnt[nr] > act_planes)
2945               {
2946                 act_planes = cnt[nr];
2947                 if (act_size * act_planes >= min_total)
2948                   break;
2949               }
2950           }
2951
2952       if (act_size * act_planes < min_total)
2953         {
2954           min_total = act_size * act_planes;
2955           ctype->plane_size = act_size;
2956           ctype->plane_cnt = act_planes;
2957         }
2958
2959       ++act_size;
2960     }
2961
2962   if (!be_quiet)
2963     fputs (_(" done\n"), stderr);
2964
2965
2966   ctype->names = (uint32_t *) xcalloc (ctype->plane_size
2967                                        * ctype->plane_cnt,
2968                                        sizeof (uint32_t));
2969
2970   for (idx = 1; idx < 256; ++idx)
2971     ctype->names[idx] = idx;
2972
2973   /* Trick: change the 0th entry's name to 1 to mark the cell occupied.  */
2974   ctype->names[0] = 1;
2975
2976   for (idx = 256; idx < ctype->charnames_act; ++idx)
2977     {
2978       size_t nr = (ctype->charnames[idx] % ctype->plane_size);
2979       size_t depth = 0;
2980
2981       while (ctype->names[nr + depth * ctype->plane_size])
2982         ++depth;
2983       assert (depth < ctype->plane_cnt);
2984
2985       ctype->names[nr + depth * ctype->plane_size] = ctype->charnames[idx];
2986
2987       /* Now for faster access remember the index in the NAMES_B array.  */
2988       ctype->charnames[idx] = nr + depth * ctype->plane_size;
2989     }
2990   ctype->names[0] = 0;
2991
2992
2993   /* You wonder about this amount of memory?  This is only because some
2994      users do not manage to address the array with unsigned values or
2995      data types with range >= 256.  '\200' would result in the array
2996      index -128.  To help these poor people we duplicate the entries for
2997      128 up to 255 below the entry for \0.  */
2998   ctype->ctype_b = (char_class_t *) xcalloc (256 + 128,
2999                                              sizeof (char_class_t));
3000   ctype->ctype32_b = (char_class32_t *) xcalloc (ctype->plane_size
3001                                                  * ctype->plane_cnt,
3002                                                  sizeof (char_class32_t));
3003
3004   /* This is the array accessed using the multibyte string elements.  */
3005   for (idx = 0; idx < 256; ++idx)
3006     ctype->ctype_b[128 + idx] = ctype->class256_collection[idx];
3007
3008   /* Mirror first 127 entries.  We must take care that entry -1 is not
3009      mirrored because EOF == -1.  */
3010   for (idx = 0; idx < 127; ++idx)
3011     ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
3012
3013   /* The 32 bit array contains all characters.  */
3014   for (idx = 0; idx < ctype->class_collection_act; ++idx)
3015     ctype->ctype32_b[ctype->charnames[idx]] = ctype->class_collection[idx];
3016
3017   /* Room for table of mappings.  */
3018   ctype->map = (uint32_t **) xmalloc (ctype->map_collection_nr
3019                                       * sizeof (uint32_t *));
3020
3021   /* Fill in all mappings.  */
3022   for (idx = 0; idx < ctype->map_collection_nr; ++idx)
3023     {
3024       unsigned int idx2;
3025
3026       /* Allocate table.  */
3027       ctype->map[idx] = (uint32_t *) xmalloc ((ctype->plane_size
3028                                                * ctype->plane_cnt + 128)
3029                                               * sizeof (uint32_t));
3030
3031       /* Copy default value (identity mapping).  */
3032       memcpy (&ctype->map[idx][128], ctype->names,
3033               ctype->plane_size * ctype->plane_cnt * sizeof (uint32_t));
3034
3035       /* Copy values from collection.  */
3036       for (idx2 = 0; idx2 < 256; ++idx2)
3037         ctype->map[idx][128 + idx2] = ctype->map256_collection[idx][idx2];
3038
3039       /* Mirror first 127 entries.  We must take care not to map entry
3040          -1 because EOF == -1.  */
3041       for (idx2 = 0; idx2 < 127; ++idx2)
3042         ctype->map[idx][idx2] = ctype->map[idx][256 + idx2];
3043
3044       /* EOF must map to EOF.  */
3045       ctype->map[idx][127] = EOF;
3046
3047       /* The 32 bit map collection.  */
3048       for (idx2 = 0; idx2 < ctype->map_collection_act[idx]; ++idx2)
3049         if (ctype->map_collection[idx][idx2] != 0)
3050           ctype->map[idx][128 + ctype->charnames[idx2]]
3051             = ctype->map_collection[idx][idx2];
3052     }
3053
3054   /* Extra array for class and map names.  */
3055   ctype->class_name_ptr = (uint32_t *) xmalloc (ctype->nr_charclass
3056                                                 * sizeof (uint32_t));
3057   ctype->map_name_ptr = (uint32_t *) xmalloc (ctype->map_collection_nr
3058                                               * sizeof (uint32_t));
3059
3060   /* Array for width information.  Because the expected width are very
3061      small we use only one single byte.  This save space and we need
3062      not provide the information twice with both endianesses.  */
3063   ctype->width = (unsigned char *) xmalloc (ctype->plane_size
3064                                             * ctype->plane_cnt);
3065   /* Initialize with default width value.  */
3066   memset (ctype->width, charmap->width_default,
3067           ctype->plane_size * ctype->plane_cnt);
3068   if (charmap->width_rules != NULL)
3069     {
3070       size_t cnt;
3071
3072       for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
3073         {
3074           unsigned char bytes[charmap->mb_cur_max];
3075           int nbytes = charmap->width_rules[cnt].from->nbytes;
3076
3077           /* We have the range of character for which the width is
3078              specified described using byte sequences of the multibyte
3079              charset.  We have to convert this to UCS4 now.  And we
3080              cannot simply convert the beginning and the end of the
3081              sequence, we have to iterate over the byte sequence and
3082              convert it for every single character.  */
3083           memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
3084
3085           while (nbytes < charmap->width_rules[cnt].to->nbytes
3086                  || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
3087                             nbytes) <= 0)
3088             {
3089               /* Find the UCS value for `bytes'.  */
3090               uint32_t wch = repertoire_find_value (ctype->repertoire, bytes,
3091                                                     nbytes);
3092               int inner;
3093
3094               if (wch != ILLEGAL_CHAR_VALUE)
3095                 {
3096                   /* Store the value.  */
3097                   size_t nr = idx % ctype->plane_size;
3098                   size_t depth = 0;
3099
3100                   while (ctype->names[nr + depth * ctype->plane_size] != nr)
3101                     ++depth;
3102                   assert (depth < ctype->plane_cnt);
3103
3104                   ctype->width[nr + depth * ctype->plane_size]
3105                     = charmap->width_rules[cnt].width;
3106                 }
3107
3108               /* "Increment" the bytes sequence.  */
3109               inner = nbytes - 1;
3110               while (inner >= 0 && bytes[inner] == 0xff)
3111                 --inner;
3112
3113               if (inner < 0)
3114                 {
3115                   /* We have to extend the byte sequence.  */
3116                   if (nbytes >= charmap->width_rules[cnt].to->nbytes)
3117                     break;
3118
3119                   bytes[0] = 1;
3120                   memset (&bytes[1], 0, nbytes);
3121                   ++nbytes;
3122                 }
3123               else
3124                 {
3125                   ++bytes[inner];
3126                   while (++inner < nbytes)
3127                     bytes[inner] = 0;
3128                 }
3129             }
3130         }
3131     }
3132
3133   /* Set MB_CUR_MAX.  */
3134   ctype->mb_cur_max = charmap->mb_cur_max;
3135
3136   /* We need the name of the currently used 8-bit character set to
3137      make correct conversion between this 8-bit representation and the
3138      ISO 10646 character set used internally for wide characters.  */
3139   ctype->codeset_name = charmap->code_set_name;
3140
3141   /* Now determine the table for the transliteration information.
3142
3143      XXX It is not yet clear to me whether it is worth implementing a
3144      complicated algorithm which uses a hash table to locate the entries.
3145      For now I'll use a simple array which can be searching using binary
3146      search.  */
3147   if (ctype->translit_copy_locale != NULL)
3148     {
3149       /* Fold in the transliteration information from the locale mentioned
3150          in the `include' statement.  */
3151       struct locale_ctype_t *here = ctype;
3152
3153       do
3154         {
3155           struct localedef_t *other = find_locale (LC_CTYPE,
3156                                                    here->translit_copy_locale,
3157                                                    repertoire->name, charmap);
3158
3159           if (other == NULL)
3160             {
3161               error (0, 0, _("\
3162 %s: transliteration data from locale `%s' not available"),
3163                      "LC_CTYPE", here->translit_copy_locale);
3164               break;
3165             }
3166
3167           here = other->categories[LC_CTYPE].ctype;
3168
3169           /* Enqueue the information if necessary.  */
3170           if (here->translit != NULL)
3171             {
3172               struct translit_t *endp = here->translit;
3173               while (endp->next != NULL)
3174                 endp = endp->next;
3175
3176               endp->next = ctype->translit;
3177               ctype->translit = here->translit;
3178             }
3179         }
3180       while (here->translit_copy_locale != NULL);
3181     }
3182
3183   if (ctype->translit != NULL)
3184     {
3185       /* First count how many entries we have.  This is the upper limit
3186          since some entries from the included files might be overwritten.  */
3187       size_t number = 0;
3188       size_t cnt;
3189       struct translit_t *runp = ctype->translit;
3190       struct translit_t **sorted;
3191       size_t from_len, to_len;
3192
3193       while (runp != NULL)
3194         {
3195           ++number;
3196           runp = runp->next;
3197         }
3198
3199       /* Next we allocate an array large enough and fill in the values.  */
3200       sorted = (struct translit_t **) alloca (number
3201                                               * sizeof (struct translit_t **));
3202       runp = ctype->translit;
3203       number = 0;
3204       do
3205         {
3206           /* Search for the place where to insert this string.
3207              XXX Better use a real sorting algorithm later.  */
3208           size_t idx = 0;
3209           int replace = 0;
3210
3211           while (idx < number)
3212             {
3213               int res = wcscmp ((const wchar_t *) sorted[idx]->from,
3214                                 (const wchar_t *) runp->from);
3215               if (res == 0)
3216                 {
3217                   replace = 1;
3218                   break;
3219                 }
3220               if (res > 0)
3221                 break;
3222               ++idx;
3223             }
3224
3225           if (replace)
3226             sorted[idx] = runp;
3227           else
3228             {
3229               memmove (&sorted[idx + 1], &sorted[idx],
3230                        (number - idx) * sizeof (struct translit_t *));
3231               sorted[idx] = runp;
3232               ++number;
3233             }
3234
3235           runp = runp->next;
3236         }
3237       while (runp != NULL);
3238
3239       /* The next step is putting all the possible transliteration
3240          strings in one memory block so that we can write it out.
3241          We need several different blocks:
3242          - index to the tfromstring array
3243          - from-string array
3244          - index to the to-string array
3245          - to-string array.
3246          And this all must be available for both endianes variants.
3247       */
3248       from_len = to_len = 0;
3249       for (cnt = 0; cnt < number; ++cnt)
3250         {
3251           struct translit_to_t *srunp;
3252           from_len += wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
3253           srunp = sorted[cnt]->to;
3254           while (srunp != NULL)
3255             {
3256               to_len += wcslen ((const wchar_t *) srunp->str) + 1;
3257               srunp = srunp->next;
3258             }
3259           /* Plus one for the extra NUL character marking the end of
3260              the list for the current entry.  */
3261           ++to_len;
3262         }
3263
3264       /* We can allocate the arrays for the results.  */
3265       ctype->translit_from_idx = xmalloc (number * sizeof (uint32_t));
3266       ctype->translit_from_tbl = xmalloc (from_len * sizeof (uint32_t));
3267       ctype->translit_to_idx = xmalloc (number * sizeof (uint32_t));
3268       ctype->translit_to_tbl = xmalloc (to_len * sizeof (uint32_t));
3269
3270       from_len = 0;
3271       to_len = 0;
3272       for (cnt = 0; cnt < number; ++cnt)
3273         {
3274           size_t len;
3275           struct translit_to_t *srunp;
3276
3277           ctype->translit_from_idx[cnt] = from_len;
3278           ctype->translit_to_idx[cnt] = to_len;
3279
3280           len = wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
3281           wmemcpy ((wchar_t *) &ctype->translit_from_tbl[from_len],
3282                    (const wchar_t *) sorted[cnt]->from, len);
3283           from_len += len;
3284
3285           ctype->translit_to_idx[cnt] = to_len;
3286           srunp = sorted[cnt]->to;
3287           while (srunp != NULL)
3288             {
3289               len = wcslen ((const wchar_t *) srunp->str) + 1;
3290               wmemcpy ((wchar_t *) &ctype->translit_to_tbl[to_len],
3291                        (const wchar_t *) srunp->str, len);
3292               to_len += len;
3293               srunp = srunp->next;
3294             }
3295           ctype->translit_to_tbl[to_len++] = L'\0';
3296         }
3297
3298       /* Store the information about the length.  */
3299       ctype->translit_idx_size = number * sizeof (uint32_t);
3300       ctype->translit_from_tbl_size = from_len * sizeof (uint32_t);
3301       ctype->translit_to_tbl_size = to_len * sizeof (uint32_t);
3302     }
3303   else
3304     {
3305       /* Provide some dummy pointers since we have nothing to write out.  */
3306       static uint32_t no_str = { 0 };
3307
3308       ctype->translit_from_idx = &no_str;
3309       ctype->translit_from_tbl = &no_str;
3310       ctype->translit_to_tbl = &no_str;
3311       ctype->translit_idx_size = 0;
3312       ctype->translit_from_tbl_size = 0;
3313       ctype->translit_to_tbl_size = 0;
3314     }
3315 }