locale/programs/ld-ctype.c

   1 /* Copyright (C) 1995-2002, 2003, 2004, 2005 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3    Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
   4
   5    This program is free software; you can redistribute it and/or modify
   6    it under the terms of the GNU General Public License version 2 as
   7    published by the Free Software Foundation.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program; if not, write to the Free Software Foundation,
  16    Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  17
  18 #ifdef HAVE_CONFIG_H
  19 # include <config.h>
  20 #endif
  21
  22 #include <alloca.h>
  23 #include <byteswap.h>
  24 #include <endian.h>
  25 #include <errno.h>
  26 #include <limits.h>
  27 #include <obstack.h>
  28 #include <stdlib.h>
  29 #include <string.h>
  30 #include <wchar.h>
  31 #include <wctype.h>
  32 #include <sys/uio.h>
  33
  34 #include "localedef.h"
  35 #include "charmap.h"
  36 #include "localeinfo.h"
  37 #include "langinfo.h"
  38 #include "linereader.h"
  39 #include "locfile-token.h"
  40 #include "locfile.h"
  41
  42 #include <assert.h>
  43
  44
  45 #ifdef PREDEFINED_CLASSES
  46 /* These are the extra bits not in wctype.h since these are not preallocated
  47    classes.  */
  48 # define _ISwspecial1   (1 << 29)
  49 # define _ISwspecial2   (1 << 30)
  50 # define _ISwspecial3   (1 << 31)
  51 #endif
  52
  53
  54 /* The bit used for representing a special class.  */
  55 #define BITPOS(class) ((class) - tok_upper)
  56 #define BIT(class) (_ISbit (BITPOS (class)))
  57 #define BITw(class) (_ISwbit (BITPOS (class)))
  58
  59 #define ELEM(ctype, collection, idx, value)                                   \
  60   *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx,     \
  61              &ctype->collection##_act idx, value)
  62
  63
  64 /* To be compatible with former implementations we for now restrict
  65    the number of bits for character classes to 16.  When compatibility
  66    is not necessary anymore increase the number to 32.  */
  67 #define char_class_t uint16_t
  68 #define char_class32_t uint32_t
  69
  70
  71 /* Type to describe a transliteration action.  We have a possibly
  72    multiple character from-string and a set of multiple character
  73    to-strings.  All are 32bit values since this is what is used in
  74    the gconv functions.  */
  75 struct translit_to_t
  76 {
  77   uint32_t *str;
  78
  79   struct translit_to_t *next;
  80 };
  81
  82 struct translit_t
  83 {
  84   uint32_t *from;
  85
  86   const char *fname;
  87   size_t lineno;
  88
  89   struct translit_to_t *to;
  90
  91   struct translit_t *next;
  92 };
  93
  94 struct translit_ignore_t
  95 {
  96   uint32_t from;
  97   uint32_t to;
  98   uint32_t step;
  99
 100   const char *fname;
 101   size_t lineno;
 102
 103   struct translit_ignore_t *next;
 104 };
 105
 106
 107 /* Type to describe a transliteration include statement.  */
 108 struct translit_include_t
 109 {
 110   const char *copy_locale;
 111   const char *copy_repertoire;
 112
 113   struct translit_include_t *next;
 114 };
 115
 116
 117 /* Sparse table of uint32_t.  */
 118 #define TABLE idx_table
 119 #define ELEMENT uint32_t
 120 #define DEFAULT ((uint32_t) ~0)
 121 #define NO_FINALIZE
 122 #include "3level.h"
 123
 124
 125 /* The real definition of the struct for the LC_CTYPE locale.  */
 126 struct locale_ctype_t
 127 {
 128   uint32_t *charnames;
 129   size_t charnames_max;
 130   size_t charnames_act;
 131   /* An index lookup table, to speedup find_idx.  */
 132   struct idx_table charnames_idx;
 133
 134   struct repertoire_t *repertoire;
 135
 136   /* We will allow up to 8 * sizeof (uint32_t) character classes.  */
 137 #define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
 138   size_t nr_charclass;
 139   const char *classnames[MAX_NR_CHARCLASS];
 140   uint32_t last_class_char;
 141   uint32_t class256_collection[256];
 142   uint32_t *class_collection;
 143   size_t class_collection_max;
 144   size_t class_collection_act;
 145   uint32_t class_done;
 146   uint32_t class_offset;
 147
 148   struct charseq **mbdigits;
 149   size_t mbdigits_act;
 150   size_t mbdigits_max;
 151   uint32_t *wcdigits;
 152   size_t wcdigits_act;
 153   size_t wcdigits_max;
 154
 155   struct charseq *mboutdigits[10];
 156   uint32_t wcoutdigits[10];
 157   size_t outdigits_act;
 158
 159   /* If the following number ever turns out to be too small simply
 160      increase it.  But I doubt it will.  --drepper@gnu */
 161 #define MAX_NR_CHARMAP 16
 162   const char *mapnames[MAX_NR_CHARMAP];
 163   uint32_t *map_collection[MAX_NR_CHARMAP];
 164   uint32_t map256_collection[2][256];
 165   size_t map_collection_max[MAX_NR_CHARMAP];
 166   size_t map_collection_act[MAX_NR_CHARMAP];
 167   size_t map_collection_nr;
 168   size_t last_map_idx;
 169   int tomap_done[MAX_NR_CHARMAP];
 170   uint32_t map_offset;
 171
 172   /* Transliteration information.  */
 173   struct translit_include_t *translit_include;
 174   struct translit_t *translit;
 175   struct translit_ignore_t *translit_ignore;
 176   uint32_t ntranslit_ignore;
 177
 178   uint32_t *default_missing;
 179   const char *default_missing_file;
 180   size_t default_missing_lineno;
 181
 182   uint32_t to_nonascii;
 183
 184   /* The arrays for the binary representation.  */
 185   char_class_t *ctype_b;
 186   char_class32_t *ctype32_b;
 187   uint32_t **map_b;
 188   uint32_t **map32_b;
 189   uint32_t **class_b;
 190   struct iovec *class_3level;
 191   struct iovec *map_3level;
 192   uint32_t *class_name_ptr;
 193   uint32_t *map_name_ptr;
 194   struct iovec width;
 195   uint32_t mb_cur_max;
 196   const char *codeset_name;
 197   uint32_t *translit_from_idx;
 198   uint32_t *translit_from_tbl;
 199   uint32_t *translit_to_idx;
 200   uint32_t *translit_to_tbl;
 201   uint32_t translit_idx_size;
 202   size_t translit_from_tbl_size;
 203   size_t translit_to_tbl_size;
 204
 205   struct obstack mempool;
 206 };
 207
 208
 209 /* Marker for an empty slot.  This has the value 0xFFFFFFFF, regardless
 210    whether 'int' is 16 bit, 32 bit, or 64 bit.  */
 211 #define EMPTY ((uint32_t) ~0)
 212
 213
 214 #define obstack_chunk_alloc xmalloc
 215 #define obstack_chunk_free free
 216
 217
 218 /* Prototypes for local functions.  */
 219 static void ctype_startup (struct linereader *lr, struct localedef_t *locale,
 220                            const struct charmap_t *charmap,
 221                            struct localedef_t *copy_locale,
 222                            int ignore_content);
 223 static void ctype_class_new (struct linereader *lr,
 224                              struct locale_ctype_t *ctype, const char *name);
 225 static void ctype_map_new (struct linereader *lr,
 226                            struct locale_ctype_t *ctype,
 227                            const char *name, const struct charmap_t *charmap);
 228 static uint32_t *find_idx (struct locale_ctype_t *ctype, uint32_t **table,
 229                            size_t *max, size_t *act, unsigned int idx);
 230 static void set_class_defaults (struct locale_ctype_t *ctype,
 231                                 const struct charmap_t *charmap,
 232                                 struct repertoire_t *repertoire);
 233 static void allocate_arrays (struct locale_ctype_t *ctype,
 234                              const struct charmap_t *charmap,
 235                              struct repertoire_t *repertoire);
 236
 237
 238 static const char *longnames[] =
 239 {
 240   "zero", "one", "two", "three", "four",
 241   "five", "six", "seven", "eight", "nine"
 242 };
 243 static const char *uninames[] =
 244 {
 245   "U00000030", "U00000031", "U00000032", "U00000033", "U00000034",
 246   "U00000035", "U00000036", "U00000037", "U00000038", "U00000039"
 247 };
 248 static const unsigned char digits[] = "0123456789";
 249
 250
 251 static void
 252 ctype_startup (struct linereader *lr, struct localedef_t *locale,
 253                const struct charmap_t *charmap,
 254                struct localedef_t *copy_locale, int ignore_content)
 255 {
 256   unsigned int cnt;
 257   struct locale_ctype_t *ctype;
 258
 259   if (!ignore_content && locale->categories[LC_CTYPE].ctype == NULL)
 260     {
 261       if (copy_locale == NULL)
 262         {
 263           /* Allocate the needed room.  */
 264           locale->categories[LC_CTYPE].ctype = ctype =
 265             (struct locale_ctype_t *) xcalloc (1,
 266                                                sizeof (struct locale_ctype_t));
 267
 268           /* We have seen no names yet.  */
 269           ctype->charnames_max = charmap->mb_cur_max == 1 ? 256 : 512;
 270           ctype->charnames =
 271             (unsigned int *) xmalloc (ctype->charnames_max
 272                                       * sizeof (unsigned int));
 273           for (cnt = 0; cnt < 256; ++cnt)
 274             ctype->charnames[cnt] = cnt;
 275           ctype->charnames_act = 256;
 276           idx_table_init (&ctype->charnames_idx);
 277
 278           /* Fill character class information.  */
 279           ctype->last_class_char = ILLEGAL_CHAR_VALUE;
 280           /* The order of the following instructions determines the bit
 281              positions!  */
 282           ctype_class_new (lr, ctype, "upper");
 283           ctype_class_new (lr, ctype, "lower");
 284           ctype_class_new (lr, ctype, "alpha");
 285           ctype_class_new (lr, ctype, "digit");
 286           ctype_class_new (lr, ctype, "xdigit");
 287           ctype_class_new (lr, ctype, "space");
 288           ctype_class_new (lr, ctype, "print");
 289           ctype_class_new (lr, ctype, "graph");
 290           ctype_class_new (lr, ctype, "blank");
 291           ctype_class_new (lr, ctype, "cntrl");
 292           ctype_class_new (lr, ctype, "punct");
 293           ctype_class_new (lr, ctype, "alnum");
 294 #ifdef PREDEFINED_CLASSES
 295           /* The following are extensions from ISO 14652.  */
 296           ctype_class_new (lr, ctype, "left_to_right");
 297           ctype_class_new (lr, ctype, "right_to_left");
 298           ctype_class_new (lr, ctype, "num_terminator");
 299           ctype_class_new (lr, ctype, "num_separator");
 300           ctype_class_new (lr, ctype, "segment_separator");
 301           ctype_class_new (lr, ctype, "block_separator");
 302           ctype_class_new (lr, ctype, "direction_control");
 303           ctype_class_new (lr, ctype, "sym_swap_layout");
 304           ctype_class_new (lr, ctype, "char_shape_selector");
 305           ctype_class_new (lr, ctype, "num_shape_selector");
 306           ctype_class_new (lr, ctype, "non_spacing");
 307           ctype_class_new (lr, ctype, "non_spacing_level3");
 308           ctype_class_new (lr, ctype, "normal_connect");
 309           ctype_class_new (lr, ctype, "r_connect");
 310           ctype_class_new (lr, ctype, "no_connect");
 311           ctype_class_new (lr, ctype, "no_connect-space");
 312           ctype_class_new (lr, ctype, "vowel_connect");
 313 #endif
 314
 315           ctype->class_collection_max = charmap->mb_cur_max == 1 ? 256 : 512;
 316           ctype->class_collection
 317             = (uint32_t *) xcalloc (sizeof (unsigned long int),
 318                                     ctype->class_collection_max);
 319           ctype->class_collection_act = 256;
 320
 321           /* Fill character map information.  */
 322           ctype->last_map_idx = MAX_NR_CHARMAP;
 323           ctype_map_new (lr, ctype, "toupper", charmap);
 324           ctype_map_new (lr, ctype, "tolower", charmap);
 325 #ifdef PREDEFINED_CLASSES
 326           ctype_map_new (lr, ctype, "tosymmetric", charmap);
 327 #endif
 328
 329           /* Fill first 256 entries in `toXXX' arrays.  */
 330           for (cnt = 0; cnt < 256; ++cnt)
 331             {
 332               ctype->map_collection[0][cnt] = cnt;
 333               ctype->map_collection[1][cnt] = cnt;
 334 #ifdef PREDEFINED_CLASSES
 335               ctype->map_collection[2][cnt] = cnt;
 336 #endif
 337               ctype->map256_collection[0][cnt] = cnt;
 338               ctype->map256_collection[1][cnt] = cnt;
 339             }
 340
 341           if (enc_not_ascii_compatible)
 342             ctype->to_nonascii = 1;
 343
 344           obstack_init (&ctype->mempool);
 345         }
 346       else
 347         ctype = locale->categories[LC_CTYPE].ctype =
 348           copy_locale->categories[LC_CTYPE].ctype;
 349     }
 350 }
 351
 352
 353 void
 354 ctype_finish (struct localedef_t *locale, const struct charmap_t *charmap)
 355 {
 356   /* See POSIX.2, table 2-6 for the meaning of the following table.  */
 357 #define NCLASS 12
 358   static const struct
 359   {
 360     const char *name;
 361     const char allow[NCLASS];
 362   }
 363   valid_table[NCLASS] =
 364   {
 365     /* The order is important.  See token.h for more information.
 366        M = Always, D = Default, - = Permitted, X = Mutually exclusive  */
 367     { "upper",  "--MX-XDDXXX-" },
 368     { "lower",  "--MX-XDDXXX-" },
 369     { "alpha",  "---X-XDDXXX-" },
 370     { "digit",  "XXX--XDDXXX-" },
 371     { "xdigit", "-----XDDXXX-" },
 372     { "space",  "XXXXX------X" },
 373     { "print",  "---------X--" },
 374     { "graph",  "---------X--" },
 375     { "blank",  "XXXXXM-----X" },
 376     { "cntrl",  "XXXXX-XX--XX" },
 377     { "punct",  "XXXXX-DD-X-X" },
 378     { "alnum",  "-----XDDXXX-" }
 379   };
 380   size_t cnt;
 381   int cls1, cls2;
 382   uint32_t space_value;
 383   struct charseq *space_seq;
 384   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 385   int warned;
 386   const void *key;
 387   size_t len;
 388   void *vdata;
 389   void *curs;
 390
 391   /* Now resolve copying and also handle completely missing definitions.  */
 392   if (ctype == NULL)
 393     {
 394       const char *repertoire_name;
 395
 396       /* First see whether we were supposed to copy.  If yes, find the
 397          actual definition.  */
 398       if (locale->copy_name[LC_CTYPE] != NULL)
 399         {
 400           /* Find the copying locale.  This has to happen transitively since
 401              the locale we are copying from might also copying another one.  */
 402           struct localedef_t *from = locale;
 403
 404           do
 405             from = find_locale (LC_CTYPE, from->copy_name[LC_CTYPE],
 406                                 from->repertoire_name, charmap);
 407           while (from->categories[LC_CTYPE].ctype == NULL
 408                  && from->copy_name[LC_CTYPE] != NULL);
 409
 410           ctype = locale->categories[LC_CTYPE].ctype
 411             = from->categories[LC_CTYPE].ctype;
 412         }
 413
 414       /* If there is still no definition issue an warning and create an
 415          empty one.  */
 416       if (ctype == NULL)
 417         {
 418           if (! be_quiet)
 419             WITH_CUR_LOCALE (error (0, 0, _("\
 420 No definition for %s category found"), "LC_CTYPE"));
 421           ctype_startup (NULL, locale, charmap, NULL, 0);
 422           ctype = locale->categories[LC_CTYPE].ctype;
 423         }
 424
 425       /* Get the repertoire we have to use.  */
 426       repertoire_name = locale->repertoire_name ?: repertoire_global;
 427       if (repertoire_name != NULL)
 428         ctype->repertoire = repertoire_read (repertoire_name);
 429     }
 430
 431   /* We need the name of the currently used 8-bit character set to
 432      make correct conversion between this 8-bit representation and the
 433      ISO 10646 character set used internally for wide characters.  */
 434   ctype->codeset_name = charmap->code_set_name;
 435   if (ctype->codeset_name == NULL)
 436     {
 437       if (! be_quiet)
 438         WITH_CUR_LOCALE (error (0, 0, _("\
 439 No character set name specified in charmap")));
 440       ctype->codeset_name = "//UNKNOWN//";
 441     }
 442
 443   /* Set default value for classes not specified.  */
 444   set_class_defaults (ctype, charmap, ctype->repertoire);
 445
 446   /* Check according to table.  */
 447   for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
 448     {
 449       uint32_t tmp = ctype->class_collection[cnt];
 450
 451       if (tmp != 0)
 452         {
 453           for (cls1 = 0; cls1 < NCLASS; ++cls1)
 454             if ((tmp & _ISwbit (cls1)) != 0)
 455               for (cls2 = 0; cls2 < NCLASS; ++cls2)
 456                 if (valid_table[cls1].allow[cls2] != '-')
 457                   {
 458                     int eq = (tmp & _ISwbit (cls2)) != 0;
 459                     switch (valid_table[cls1].allow[cls2])
 460                       {
 461                       case 'M':
 462                         if (!eq)
 463                           {
 464                             uint32_t value = ctype->charnames[cnt];
 465
 466                             if (!be_quiet)
 467                               WITH_CUR_LOCALE (error (0, 0, _("\
 468 character L'\\u%0*x' in class `%s' must be in class `%s'"),
 469                                                       value > 0xffff ? 8 : 4,
 470                                                       value,
 471                                                       valid_table[cls1].name,
 472                                                       valid_table[cls2].name));
 473                           }
 474                         break;
 475
 476                       case 'X':
 477                         if (eq)
 478                           {
 479                             uint32_t value = ctype->charnames[cnt];
 480
 481                             if (!be_quiet)
 482                               WITH_CUR_LOCALE (error (0, 0, _("\
 483 character L'\\u%0*x' in class `%s' must not be in class `%s'"),
 484                                                       value > 0xffff ? 8 : 4,
 485                                                       value,
 486                                                       valid_table[cls1].name,
 487                                                       valid_table[cls2].name));
 488                           }
 489                         break;
 490
 491                       case 'D':
 492                         ctype->class_collection[cnt] |= _ISwbit (cls2);
 493                         break;
 494
 495                       default:
 496                         WITH_CUR_LOCALE (error (5, 0, _("\
 497 internal error in %s, line %u"), __FUNCTION__, __LINE__));
 498                       }
 499                   }
 500         }
 501     }
 502
 503   for (cnt = 0; cnt < 256; ++cnt)
 504     {
 505       uint32_t tmp = ctype->class256_collection[cnt];
 506
 507       if (tmp != 0)
 508         {
 509           for (cls1 = 0; cls1 < NCLASS; ++cls1)
 510             if ((tmp & _ISbit (cls1)) != 0)
 511               for (cls2 = 0; cls2 < NCLASS; ++cls2)
 512                 if (valid_table[cls1].allow[cls2] != '-')
 513                   {
 514                     int eq = (tmp & _ISbit (cls2)) != 0;
 515                     switch (valid_table[cls1].allow[cls2])
 516                       {
 517                       case 'M':
 518                         if (!eq)
 519                           {
 520                             char buf[17];
 521
 522                             snprintf (buf, sizeof buf, "\\%Zo", cnt);
 523
 524                             if (!be_quiet)
 525                               WITH_CUR_LOCALE (error (0, 0, _("\
 526 character '%s' in class `%s' must be in class `%s'"),
 527                                                       buf,
 528                                                       valid_table[cls1].name,
 529                                                       valid_table[cls2].name));
 530                           }
 531                         break;
 532
 533                       case 'X':
 534                         if (eq)
 535                           {
 536                             char buf[17];
 537
 538                             snprintf (buf, sizeof buf, "\\%Zo", cnt);
 539
 540                             if (!be_quiet)
 541                               WITH_CUR_LOCALE (error (0, 0, _("\
 542 character '%s' in class `%s' must not be in class `%s'"),
 543                                                       buf,
 544                                                       valid_table[cls1].name,
 545                                                       valid_table[cls2].name));
 546                           }
 547                         break;
 548
 549                       case 'D':
 550                         ctype->class256_collection[cnt] |= _ISbit (cls2);
 551                         break;
 552
 553                       default:
 554                         WITH_CUR_LOCALE (error (5, 0, _("\
 555 internal error in %s, line %u"), __FUNCTION__, __LINE__));
 556                       }
 557                   }
 558         }
 559     }
 560
 561   /* ... and now test <SP> as a special case.  */
 562   space_value = 32;
 563   if (((cnt = BITPOS (tok_space),
 564         (ELEM (ctype, class_collection, , space_value)
 565          & BITw (tok_space)) == 0)
 566        || (cnt = BITPOS (tok_blank),
 567            (ELEM (ctype, class_collection, , space_value)
 568             & BITw (tok_blank)) == 0)))
 569     {
 570       if (!be_quiet)
 571         WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"),
 572                                 valid_table[cnt].name));
 573     }
 574   else if (((cnt = BITPOS (tok_punct),
 575              (ELEM (ctype, class_collection, , space_value)
 576               & BITw (tok_punct)) != 0)
 577             || (cnt = BITPOS (tok_graph),
 578                 (ELEM (ctype, class_collection, , space_value)
 579                  & BITw (tok_graph))
 580                 != 0)))
 581     {
 582       if (!be_quiet)
 583         WITH_CUR_LOCALE (error (0, 0, _("\
 584 <SP> character must not be in class `%s'"),
 585                                 valid_table[cnt].name));
 586     }
 587   else
 588     ELEM (ctype, class_collection, , space_value) |= BITw (tok_print);
 589
 590   space_seq = charmap_find_value (charmap, "SP", 2);
 591   if (space_seq == NULL)
 592     space_seq = charmap_find_value (charmap, "space", 5);
 593   if (space_seq == NULL)
 594     space_seq = charmap_find_value (charmap, "U00000020", 9);
 595   if (space_seq == NULL || space_seq->nbytes != 1)
 596     {
 597       if (!be_quiet)
 598         WITH_CUR_LOCALE (error (0, 0, _("\
 599 character <SP> not defined in character map")));
 600     }
 601   else if (((cnt = BITPOS (tok_space),
 602              (ctype->class256_collection[space_seq->bytes[0]]
 603               & BIT (tok_space)) == 0)
 604             || (cnt = BITPOS (tok_blank),
 605                 (ctype->class256_collection[space_seq->bytes[0]]
 606                  & BIT (tok_blank)) == 0)))
 607     {
 608       if (!be_quiet)
 609         WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"),
 610                                 valid_table[cnt].name));
 611     }
 612   else if (((cnt = BITPOS (tok_punct),
 613              (ctype->class256_collection[space_seq->bytes[0]]
 614               & BIT (tok_punct)) != 0)
 615             || (cnt = BITPOS (tok_graph),
 616                 (ctype->class256_collection[space_seq->bytes[0]]
 617                  & BIT (tok_graph)) != 0)))
 618     {
 619       if (!be_quiet)
 620         WITH_CUR_LOCALE (error (0, 0, _("\
 621 <SP> character must not be in class `%s'"),
 622                                 valid_table[cnt].name));
 623     }
 624   else
 625     ctype->class256_collection[space_seq->bytes[0]] |= BIT (tok_print);
 626
 627   /* Now that the tests are done make sure the name array contains all
 628      characters which are handled in the WIDTH section of the
 629      character set definition file.  */
 630   if (charmap->width_rules != NULL)
 631     for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
 632       {
 633         unsigned char bytes[charmap->mb_cur_max];
 634         int nbytes = charmap->width_rules[cnt].from->nbytes;
 635
 636         /* We have the range of character for which the width is
 637            specified described using byte sequences of the multibyte
 638            charset.  We have to convert this to UCS4 now.  And we
 639            cannot simply convert the beginning and the end of the
 640            sequence, we have to iterate over the byte sequence and
 641            convert it for every single character.  */
 642         memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
 643
 644         while (nbytes < charmap->width_rules[cnt].to->nbytes
 645                || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
 646                           nbytes) <= 0)
 647           {
 648             /* Find the UCS value for `bytes'.  */
 649             int inner;
 650             uint32_t wch;
 651             struct charseq *seq = charmap_find_symbol (charmap, bytes, nbytes);
 652
 653             if (seq == NULL)
 654               wch = ILLEGAL_CHAR_VALUE;
 655             else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
 656               wch = seq->ucs4;
 657             else
 658               wch = repertoire_find_value (ctype->repertoire, seq->name,
 659                                            strlen (seq->name));
 660
 661             if (wch != ILLEGAL_CHAR_VALUE)
 662               /* We are only interested in the side-effects of the
 663                  `find_idx' call.  It will add appropriate entries in
 664                  the name array if this is necessary.  */
 665               (void) find_idx (ctype, NULL, NULL, NULL, wch);
 666
 667             /* "Increment" the bytes sequence.  */
 668             inner = nbytes - 1;
 669             while (inner >= 0 && bytes[inner] == 0xff)
 670               --inner;
 671
 672             if (inner < 0)
 673               {
 674                 /* We have to extend the byte sequence.  */
 675                 if (nbytes >= charmap->width_rules[cnt].to->nbytes)
 676                   break;
 677
 678                 bytes[0] = 1;
 679                 memset (&bytes[1], 0, nbytes);
 680                 ++nbytes;
 681               }
 682             else
 683               {
 684                 ++bytes[inner];
 685                 while (++inner < nbytes)
 686                   bytes[inner] = 0;
 687               }
 688           }
 689       }
 690
 691   /* Now set all the other characters of the character set to the
 692      default width.  */
 693   curs = NULL;
 694   while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
 695     {
 696       struct charseq *data = (struct charseq *) vdata;
 697
 698       if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
 699         data->ucs4 = repertoire_find_value (ctype->repertoire,
 700                                             data->name, len);
 701
 702       if (data->ucs4 != ILLEGAL_CHAR_VALUE)
 703         (void) find_idx (ctype, NULL, NULL, NULL, data->ucs4);
 704     }
 705
 706   /* There must be a multiple of 10 digits.  */
 707   if (ctype->mbdigits_act % 10 != 0)
 708     {
 709       assert (ctype->mbdigits_act == ctype->wcdigits_act);
 710       ctype->wcdigits_act -= ctype->mbdigits_act % 10;
 711       ctype->mbdigits_act -= ctype->mbdigits_act % 10;
 712       WITH_CUR_LOCALE (error (0, 0, _("\
 713 `digit' category has not entries in groups of ten")));
 714     }
 715
 716   /* Check the input digits.  There must be a multiple of ten available.
 717      In each group it could be that one or the other character is missing.
 718      In this case the whole group must be removed.  */
 719   cnt = 0;
 720   while (cnt < ctype->mbdigits_act)
 721     {
 722       size_t inner;
 723       for (inner = 0; inner < 10; ++inner)
 724         if (ctype->mbdigits[cnt + inner] == NULL)
 725           break;
 726
 727       if (inner == 10)
 728         cnt += 10;
 729       else
 730         {
 731           /* Remove the group.  */
 732           memmove (&ctype->mbdigits[cnt], &ctype->mbdigits[cnt + 10],
 733                    ((ctype->wcdigits_act - cnt - 10)
 734                     * sizeof (ctype->mbdigits[0])));
 735           ctype->mbdigits_act -= 10;
 736         }
 737     }
 738
 739   /* If no input digits are given use the default.  */
 740   if (ctype->mbdigits_act == 0)
 741     {
 742       if (ctype->mbdigits_max == 0)
 743         {
 744           ctype->mbdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
 745                                            10 * sizeof (struct charseq *));
 746           ctype->mbdigits_max = 10;
 747         }
 748
 749       for (cnt = 0; cnt < 10; ++cnt)
 750         {
 751           ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
 752                                                       digits + cnt, 1);
 753           if (ctype->mbdigits[cnt] == NULL)
 754             {
 755               ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
 756                                                           longnames[cnt],
 757                                                           strlen (longnames[cnt]));
 758               if (ctype->mbdigits[cnt] == NULL)
 759                 {
 760                   /* Hum, this ain't good.  */
 761                   WITH_CUR_LOCALE (error (0, 0, _("\
 762 no input digits defined and none of the standard names in the charmap")));
 763
 764                   ctype->mbdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
 765                                                         sizeof (struct charseq) + 1);
 766
 767                   /* This is better than nothing.  */
 768                   ctype->mbdigits[cnt]->bytes[0] = digits[cnt];
 769                   ctype->mbdigits[cnt]->nbytes = 1;
 770                 }
 771             }
 772         }
 773
 774       ctype->mbdigits_act = 10;
 775     }
 776
 777   /* Check the wide character input digits.  There must be a multiple
 778      of ten available.  In each group it could be that one or the other
 779      character is missing.  In this case the whole group must be
 780      removed.  */
 781   cnt = 0;
 782   while (cnt < ctype->wcdigits_act)
 783     {
 784       size_t inner;
 785       for (inner = 0; inner < 10; ++inner)
 786         if (ctype->wcdigits[cnt + inner] == ILLEGAL_CHAR_VALUE)
 787           break;
 788
 789       if (inner == 10)
 790         cnt += 10;
 791       else
 792         {
 793           /* Remove the group.  */
 794           memmove (&ctype->wcdigits[cnt], &ctype->wcdigits[cnt + 10],
 795                    ((ctype->wcdigits_act - cnt - 10)
 796                     * sizeof (ctype->wcdigits[0])));
 797           ctype->wcdigits_act -= 10;
 798         }
 799     }
 800
 801   /* If no input digits are given use the default.  */
 802   if (ctype->wcdigits_act == 0)
 803     {
 804       if (ctype->wcdigits_max == 0)
 805         {
 806           ctype->wcdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
 807                                            10 * sizeof (uint32_t));
 808           ctype->wcdigits_max = 10;
 809         }
 810
 811       for (cnt = 0; cnt < 10; ++cnt)
 812         ctype->wcdigits[cnt] = L'0' + cnt;
 813
 814       ctype->mbdigits_act = 10;
 815     }
 816
 817   /* Check the outdigits.  */
 818   warned = 0;
 819   for (cnt = 0; cnt < 10; ++cnt)
 820     if (ctype->mboutdigits[cnt] == NULL)
 821       {
 822         static struct charseq replace[2];
 823
 824         if (!warned)
 825           {
 826             WITH_CUR_LOCALE (error (0, 0, _("\
 827 not all characters used in `outdigit' are available in the charmap")));
 828             warned = 1;
 829           }
 830
 831         replace[0].nbytes = 1;
 832         replace[0].bytes[0] = '?';
 833         replace[0].bytes[1] = '\0';
 834         ctype->mboutdigits[cnt] = &replace[0];
 835       }
 836
 837   warned = 0;
 838   for (cnt = 0; cnt < 10; ++cnt)
 839     if (ctype->wcoutdigits[cnt] == 0)
 840       {
 841         if (!warned)
 842           {
 843             WITH_CUR_LOCALE (error (0, 0, _("\
 844 not all characters used in `outdigit' are available in the repertoire")));
 845             warned = 1;
 846           }
 847
 848         ctype->wcoutdigits[cnt] = L'?';
 849       }
 850
 851   /* Sort the entries in the translit_ignore list.  */
 852   if (ctype->translit_ignore != NULL)
 853     {
 854       struct translit_ignore_t *firstp = ctype->translit_ignore;
 855       struct translit_ignore_t *runp;
 856
 857       ctype->ntranslit_ignore = 1;
 858
 859       for (runp = firstp->next; runp != NULL; runp = runp->next)
 860         {
 861           struct translit_ignore_t *lastp = NULL;
 862           struct translit_ignore_t *cmpp;
 863
 864           ++ctype->ntranslit_ignore;
 865
 866           for (cmpp = firstp; cmpp != NULL; lastp = cmpp, cmpp = cmpp->next)
 867             if (runp->from < cmpp->from)
 868               break;
 869
 870           runp->next = lastp;
 871           if (lastp == NULL)
 872             firstp = runp;
 873         }
 874
 875       ctype->translit_ignore = firstp;
 876     }
 877 }
 878
 879
 880 void
 881 ctype_output (struct localedef_t *locale, const struct charmap_t *charmap,
 882               const char *output_path)
 883 {
 884   static const char nulbytes[4] = { 0, 0, 0, 0 };
 885   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 886   const size_t nelems = (_NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1)
 887                          + ctype->nr_charclass + ctype->map_collection_nr);
 888   struct iovec *iov = alloca (sizeof *iov
 889                               * (2 + nelems + 2 * ctype->nr_charclass
 890                                  + ctype->map_collection_nr + 4));
 891   struct locale_file data;
 892   uint32_t *idx = alloca (sizeof *idx * (nelems + 1));
 893   uint32_t default_missing_len;
 894   size_t elem, cnt, offset, total;
 895   char *cp;
 896
 897   /* Now prepare the output: Find the sizes of the table we can use.  */
 898   allocate_arrays (ctype, charmap, ctype->repertoire);
 899
 900   data.magic = LIMAGIC (LC_CTYPE);
 901   data.n = nelems;
 902   iov[0].iov_base = (void *) &data;
 903   iov[0].iov_len = sizeof (data);
 904
 905   iov[1].iov_base = (void *) idx;
 906   iov[1].iov_len = nelems * sizeof (uint32_t);
 907
 908   idx[0] = iov[0].iov_len + iov[1].iov_len;
 909   offset = 0;
 910
 911   for (elem = 0; elem < nelems; ++elem)
 912     {
 913       if (elem < _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1))
 914         switch (elem)
 915           {
 916 #define CTYPE_EMPTY(name) \
 917           case name:                                                          \
 918             iov[2 + elem + offset].iov_base = NULL;                           \
 919             iov[2 + elem + offset].iov_len = 0;                               \
 920             idx[elem + 1] = idx[elem];                                        \
 921             break
 922
 923           CTYPE_EMPTY(_NL_CTYPE_GAP1);
 924           CTYPE_EMPTY(_NL_CTYPE_GAP2);
 925           CTYPE_EMPTY(_NL_CTYPE_GAP3);
 926           CTYPE_EMPTY(_NL_CTYPE_GAP4);
 927           CTYPE_EMPTY(_NL_CTYPE_GAP5);
 928           CTYPE_EMPTY(_NL_CTYPE_GAP6);
 929
 930 #define CTYPE_DATA(name, base, len)                                           \
 931           case _NL_ITEM_INDEX (name):                                         \
 932             iov[2 + elem + offset].iov_base = (base);                         \
 933             iov[2 + elem + offset].iov_len = (len);                           \
 934             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;       \
 935             break
 936
 937           CTYPE_DATA (_NL_CTYPE_CLASS,
 938                       ctype->ctype_b,
 939                       (256 + 128) * sizeof (char_class_t));
 940
 941           CTYPE_DATA (_NL_CTYPE_TOUPPER,
 942                       ctype->map_b[0],
 943                       (256 + 128) * sizeof (uint32_t));
 944           CTYPE_DATA (_NL_CTYPE_TOLOWER,
 945                       ctype->map_b[1],
 946                       (256 + 128) * sizeof (uint32_t));
 947
 948           CTYPE_DATA (_NL_CTYPE_TOUPPER32,
 949                       ctype->map32_b[0],
 950                       256 * sizeof (uint32_t));
 951           CTYPE_DATA (_NL_CTYPE_TOLOWER32,
 952                       ctype->map32_b[1],
 953                       256 * sizeof (uint32_t));
 954
 955           CTYPE_DATA (_NL_CTYPE_CLASS32,
 956                       ctype->ctype32_b,
 957                       256 * sizeof (char_class32_t));
 958
 959           CTYPE_DATA (_NL_CTYPE_CLASS_OFFSET,
 960                       &ctype->class_offset, sizeof (uint32_t));
 961
 962           CTYPE_DATA (_NL_CTYPE_MAP_OFFSET,
 963                       &ctype->map_offset, sizeof (uint32_t));
 964
 965           CTYPE_DATA (_NL_CTYPE_TRANSLIT_TAB_SIZE,
 966                       &ctype->translit_idx_size, sizeof (uint32_t));
 967
 968           CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_IDX,
 969                       ctype->translit_from_idx,
 970                       ctype->translit_idx_size * sizeof (uint32_t));
 971
 972           CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_TBL,
 973                       ctype->translit_from_tbl,
 974                       ctype->translit_from_tbl_size);
 975
 976           CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_IDX,
 977                       ctype->translit_to_idx,
 978                       ctype->translit_idx_size * sizeof (uint32_t));
 979
 980           CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_TBL,
 981                       ctype->translit_to_tbl, ctype->translit_to_tbl_size);
 982
 983           case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
 984             /* The class name array.  */
 985             total = 0;
 986             for (cnt = 0; cnt < ctype->nr_charclass; ++cnt, ++offset)
 987               {
 988                 iov[2 + elem + offset].iov_base
 989                   = (void *) ctype->classnames[cnt];
 990                 iov[2 + elem + offset].iov_len
 991                   = strlen (ctype->classnames[cnt]) + 1;
 992                 total += iov[2 + elem + offset].iov_len;
 993               }
 994             iov[2 + elem + offset].iov_base = (void *) nulbytes;
 995             iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
 996             total += 1 + (4 - ((total + 1) % 4));
 997
 998             idx[elem + 1] = idx[elem] + total;
 999             break;
1000
1001           case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
1002             /* The class name array.  */
1003             total = 0;
1004             for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt, ++offset)
1005               {
1006                 iov[2 + elem + offset].iov_base
1007                   = (void *) ctype->mapnames[cnt];
1008                 iov[2 + elem + offset].iov_len
1009                   = strlen (ctype->mapnames[cnt]) + 1;
1010                 total += iov[2 + elem + offset].iov_len;
1011               }
1012             iov[2 + elem + offset].iov_base = (void *) nulbytes;
1013             iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
1014             total += 1 + (4 - ((total + 1) % 4));
1015
1016             idx[elem + 1] = idx[elem] + total;
1017             break;
1018
1019           CTYPE_DATA (_NL_CTYPE_WIDTH,
1020                       ctype->width.iov_base,
1021                       ctype->width.iov_len);
1022
1023           CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX,
1024                       &ctype->mb_cur_max, sizeof (uint32_t));
1025
1026           case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
1027             total = strlen (ctype->codeset_name) + 1;
1028             if (total % 4 == 0)
1029               iov[2 + elem + offset].iov_base = (char *) ctype->codeset_name;
1030             else
1031               {
1032                 iov[2 + elem + offset].iov_base = alloca ((total + 3) & ~3);
1033                 memset (mempcpy (iov[2 + elem + offset].iov_base,
1034                                  ctype->codeset_name, total),
1035                         '\0', 4 - (total & 3));
1036                 total = (total + 3) & ~3;
1037               }
1038             iov[2 + elem + offset].iov_len = total;
1039             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1040             break;
1041
1042
1043           CTYPE_DATA (_NL_CTYPE_MAP_TO_NONASCII,
1044                       &ctype->to_nonascii, sizeof (uint32_t));
1045
1046           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN):
1047             iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
1048             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1049             *(uint32_t *) iov[2 + elem + offset].iov_base =
1050               ctype->mbdigits_act / 10;
1051             idx[elem + 1] = idx[elem] + sizeof (uint32_t);
1052             break;
1053
1054           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN):
1055             /* Align entries.  */
1056             iov[2 + elem + offset].iov_base = (void *) nulbytes;
1057             iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1058             idx[elem] += iov[2 + elem + offset].iov_len;
1059             ++offset;
1060
1061             iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
1062             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1063             *(uint32_t *) iov[2 + elem + offset].iov_base =
1064               ctype->wcdigits_act / 10;
1065             idx[elem + 1] = idx[elem] + sizeof (uint32_t);
1066             break;
1067
1068           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB):
1069             /* Compute the length of all possible characters.  For INDIGITS
1070                there might be more than one.  We simply concatenate all of
1071                them with a NUL byte following.  The NUL byte wouldn't be
1072                necessary but it makes it easier for the user.  */
1073             total = 0;
1074
1075             for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
1076                  cnt < ctype->mbdigits_act; cnt += 10)
1077               total += ctype->mbdigits[cnt]->nbytes + 1;
1078             iov[2 + elem + offset].iov_base = (char *) alloca (total);
1079             iov[2 + elem + offset].iov_len = total;
1080
1081             cp = iov[2 + elem + offset].iov_base;
1082             for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
1083                  cnt < ctype->mbdigits_act; cnt += 10)
1084               {
1085                 cp = mempcpy (cp, ctype->mbdigits[cnt]->bytes,
1086                               ctype->mbdigits[cnt]->nbytes);
1087                 *cp++ = '\0';
1088               }
1089             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1090             break;
1091
1092           case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB):
1093             /* Compute the length of all possible characters.  For INDIGITS
1094                there might be more than one.  We simply concatenate all of
1095                them with a NUL byte following.  The NUL byte wouldn't be
1096                necessary but it makes it easier for the user.  */
1097             cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB);
1098             total = ctype->mboutdigits[cnt]->nbytes + 1;
1099             iov[2 + elem + offset].iov_base = (char *) alloca (total);
1100             iov[2 + elem + offset].iov_len = total;
1101
1102             *(char *) mempcpy (iov[2 + elem + offset].iov_base,
1103                                ctype->mboutdigits[cnt]->bytes,
1104                                ctype->mboutdigits[cnt]->nbytes) = '\0';
1105             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1106             break;
1107
1108           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC):
1109             total = ctype->wcdigits_act / 10;
1110
1111             iov[2 + elem + offset].iov_base =
1112               (uint32_t *) alloca (total * sizeof (uint32_t));
1113             iov[2 + elem + offset].iov_len = total * sizeof (uint32_t);
1114
1115             for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC);
1116                  cnt < ctype->wcdigits_act; cnt += 10)
1117               ((uint32_t *) iov[2 + elem + offset].iov_base)[cnt / 10]
1118                 = ctype->wcdigits[cnt];
1119             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1120             break;
1121
1122           case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC):
1123             /* Align entries.  */
1124             iov[2 + elem + offset].iov_base = (void *) nulbytes;
1125             iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1126             idx[elem] += iov[2 + elem + offset].iov_len;
1127             ++offset;
1128             /* FALLTRHOUGH */
1129
1130           case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT1_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC):
1131             cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC);
1132             iov[2 + elem + offset].iov_base = &ctype->wcoutdigits[cnt];
1133             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1134             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1135             break;
1136
1137           case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN):
1138             /* Align entries.  */
1139             iov[2 + elem + offset].iov_base = (void *) nulbytes;
1140             iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1141             idx[elem] += iov[2 + elem + offset].iov_len;
1142             ++offset;
1143
1144             default_missing_len = (ctype->default_missing
1145                                    ? wcslen ((wchar_t *)ctype->default_missing)
1146                                    : 0);
1147             iov[2 + elem + offset].iov_base = &default_missing_len;
1148             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1149             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1150             break;
1151
1152           case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING):
1153             iov[2 + elem + offset].iov_base =
1154               ctype->default_missing ?: (uint32_t *) L"";
1155             iov[2 + elem + offset].iov_len =
1156               wcslen (iov[2 + elem + offset].iov_base);
1157             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1158             break;
1159
1160           case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE_LEN):
1161             /* Align entries.  */
1162             iov[2 + elem + offset].iov_base = (void *) nulbytes;
1163             iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1164             idx[elem] += iov[2 + elem + offset].iov_len;
1165             ++offset;
1166
1167             iov[2 + elem + offset].iov_base = &ctype->ntranslit_ignore;
1168             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1169             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1170             break;
1171
1172           case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE):
1173             {
1174               uint32_t *ranges = (uint32_t *) alloca (ctype->ntranslit_ignore
1175                                                       * 3 * sizeof (uint32_t));
1176               struct translit_ignore_t *runp;
1177
1178               iov[2 + elem + offset].iov_base = ranges;
1179               iov[2 + elem + offset].iov_len = (ctype->ntranslit_ignore
1180                                                 * 3 * sizeof (uint32_t));
1181
1182               for (runp = ctype->translit_ignore; runp != NULL;
1183                    runp = runp->next)
1184                 {
1185                   *ranges++ = runp->from;
1186                   *ranges++ = runp->to;
1187                   *ranges++ = runp->step;
1188                 }
1189             }
1190             /* Remove the following line in case a new entry is added
1191                after _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN.  */
1192             if (elem < nelems)
1193               idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1194             break;
1195
1196           default:
1197             assert (! "unknown CTYPE element");
1198           }
1199       else
1200         {
1201           /* Handle extra maps.  */
1202           size_t nr = elem - _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
1203           if (nr < ctype->nr_charclass)
1204             {
1205               iov[2 + elem + offset].iov_base = ctype->class_b[nr];
1206               iov[2 + elem + offset].iov_len = 256 / 32 * sizeof (uint32_t);
1207               idx[elem] += iov[2 + elem + offset].iov_len;
1208               ++offset;
1209
1210               iov[2 + elem + offset] = ctype->class_3level[nr];
1211             }
1212           else
1213             {
1214               nr -= ctype->nr_charclass;
1215               assert (nr < ctype->map_collection_nr);
1216               iov[2 + elem + offset] = ctype->map_3level[nr];
1217             }
1218           idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1219         }
1220     }
1221
1222   assert (2 + elem + offset == (nelems + 2 * ctype->nr_charclass
1223                                 + ctype->map_collection_nr + 4 + 2));
1224
1225   write_locale_data (output_path, LC_CTYPE, "LC_CTYPE", 2 + elem + offset,
1226                      iov);
1227 }
1228
1229
1230 /* Local functions.  */
1231 static void
1232 ctype_class_new (struct linereader *lr, struct locale_ctype_t *ctype,
1233                  const char *name)
1234 {
1235   size_t cnt;
1236
1237   for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
1238     if (strcmp (ctype->classnames[cnt], name) == 0)
1239       break;
1240
1241   if (cnt < ctype->nr_charclass)
1242     {
1243       lr_error (lr, _("character class `%s' already defined"), name);
1244       return;
1245     }
1246
1247   if (ctype->nr_charclass == MAX_NR_CHARCLASS)
1248     /* Exit code 2 is prescribed in P1003.2b.  */
1249     WITH_CUR_LOCALE (error (2, 0, _("\
1250 implementation limit: no more than %Zd character classes allowed"),
1251                             MAX_NR_CHARCLASS));
1252
1253   ctype->classnames[ctype->nr_charclass++] = name;
1254 }
1255
1256
1257 static void
1258 ctype_map_new (struct linereader *lr, struct locale_ctype_t *ctype,
1259                const char *name, const struct charmap_t *charmap)
1260 {
1261   size_t max_chars = 0;
1262   size_t cnt;
1263
1264   for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
1265     {
1266       if (strcmp (ctype->mapnames[cnt], name) == 0)
1267         break;
1268
1269       if (max_chars < ctype->map_collection_max[cnt])
1270         max_chars = ctype->map_collection_max[cnt];
1271     }
1272
1273   if (cnt < ctype->map_collection_nr)
1274     {
1275       lr_error (lr, _("character map `%s' already defined"), name);
1276       return;
1277     }
1278
1279   if (ctype->map_collection_nr == MAX_NR_CHARMAP)
1280     /* Exit code 2 is prescribed in P1003.2b.  */
1281     WITH_CUR_LOCALE (error (2, 0, _("\
1282 implementation limit: no more than %d character maps allowed"),
1283                             MAX_NR_CHARMAP));
1284
1285   ctype->mapnames[cnt] = name;
1286
1287   if (max_chars == 0)
1288     ctype->map_collection_max[cnt] = charmap->mb_cur_max == 1 ? 256 : 512;
1289   else
1290     ctype->map_collection_max[cnt] = max_chars;
1291
1292   ctype->map_collection[cnt] = (uint32_t *)
1293     xcalloc (sizeof (uint32_t), ctype->map_collection_max[cnt]);
1294   ctype->map_collection_act[cnt] = 256;
1295
1296   ++ctype->map_collection_nr;
1297 }
1298
1299
1300 /* We have to be prepared that TABLE, MAX, and ACT can be NULL.  This
1301    is possible if we only want to extend the name array.  */
1302 static uint32_t *
1303 find_idx (struct locale_ctype_t *ctype, uint32_t **table, size_t *max,
1304           size_t *act, uint32_t idx)
1305 {
1306   size_t cnt;
1307
1308   if (idx < 256)
1309     return table == NULL ? NULL : &(*table)[idx];
1310
1311   /* Use the charnames_idx lookup table instead of the slow search loop.  */
1312 #if 1
1313   cnt = idx_table_get (&ctype->charnames_idx, idx);
1314   if (cnt == EMPTY)
1315     /* Not found.  */
1316     cnt = ctype->charnames_act;
1317 #else
1318   for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
1319     if (ctype->charnames[cnt] == idx)
1320       break;
1321 #endif
1322
1323   /* We have to distinguish two cases: the name is found or not.  */
1324   if (cnt == ctype->charnames_act)
1325     {
1326       /* Extend the name array.  */
1327       if (ctype->charnames_act == ctype->charnames_max)
1328         {
1329           ctype->charnames_max *= 2;
1330           ctype->charnames = (uint32_t *)
1331             xrealloc (ctype->charnames,
1332                       sizeof (uint32_t) * ctype->charnames_max);
1333         }
1334       ctype->charnames[ctype->charnames_act++] = idx;
1335       idx_table_add (&ctype->charnames_idx, idx, cnt);
1336     }
1337
1338   if (table == NULL)
1339     /* We have done everything we are asked to do.  */
1340     return NULL;
1341
1342   if (max == NULL)
1343     /* The caller does not want to extend the table.  */
1344     return (cnt >= *act ? NULL : &(*table)[cnt]);
1345
1346   if (cnt >= *act)
1347     {
1348       if (cnt >= *max)
1349         {
1350           size_t old_max = *max;
1351           do
1352             *max *= 2;
1353           while (*max <= cnt);
1354
1355           *table =
1356             (uint32_t *) xrealloc (*table, *max * sizeof (uint32_t));
1357           memset (&(*table)[old_max], '\0',
1358                   (*max - old_max) * sizeof (uint32_t));
1359         }
1360
1361       *act = cnt + 1;
1362     }
1363
1364   return &(*table)[cnt];
1365 }
1366
1367
1368 static int
1369 get_character (struct token *now, const struct charmap_t *charmap,
1370                struct repertoire_t *repertoire,
1371                struct charseq **seqp, uint32_t *wchp)
1372 {
1373   if (now->tok == tok_bsymbol)
1374     {
1375       /* This will hopefully be the normal case.  */
1376       *wchp = repertoire_find_value (repertoire, now->val.str.startmb,
1377                                      now->val.str.lenmb);
1378       *seqp = charmap_find_value (charmap, now->val.str.startmb,
1379                                   now->val.str.lenmb);
1380     }
1381   else if (now->tok == tok_ucs4)
1382     {
1383       char utmp[10];
1384
1385       snprintf (utmp, sizeof (utmp), "U%08X", now->val.ucs4);
1386       *seqp = charmap_find_value (charmap, utmp, 9);
1387
1388       if (*seqp == NULL)
1389         *seqp = repertoire_find_seq (repertoire, now->val.ucs4);
1390
1391       if (*seqp == NULL)
1392         {
1393           /* Compute the value in the charmap from the UCS value.  */
1394           const char *symbol = repertoire_find_symbol (repertoire,
1395                                                        now->val.ucs4);
1396
1397           if (symbol == NULL)
1398             *seqp = NULL;
1399           else
1400             *seqp = charmap_find_value (charmap, symbol, strlen (symbol));
1401
1402           if (*seqp == NULL)
1403             {
1404               if (repertoire != NULL)
1405                 {
1406                   /* Insert a negative entry.  */
1407                   static const struct charseq negative
1408                     = { .ucs4 = ILLEGAL_CHAR_VALUE };
1409                   uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1410                                                   sizeof (uint32_t));
1411                   *newp = now->val.ucs4;
1412
1413                   insert_entry (&repertoire->seq_table, newp,
1414                                 sizeof (uint32_t), (void *) &negative);
1415                 }
1416             }
1417           else
1418             (*seqp)->ucs4 = now->val.ucs4;
1419         }
1420       else if ((*seqp)->ucs4 != now->val.ucs4)
1421         *seqp = NULL;
1422
1423       *wchp = now->val.ucs4;
1424     }
1425   else if (now->tok == tok_charcode)
1426     {
1427       /* We must map from the byte code to UCS4.  */
1428       *seqp = charmap_find_symbol (charmap, now->val.str.startmb,
1429                                    now->val.str.lenmb);
1430
1431       if (*seqp == NULL)
1432         *wchp = ILLEGAL_CHAR_VALUE;
1433       else
1434         {
1435           if ((*seqp)->ucs4 == UNINITIALIZED_CHAR_VALUE)
1436             (*seqp)->ucs4 = repertoire_find_value (repertoire, (*seqp)->name,
1437                                                    strlen ((*seqp)->name));
1438           *wchp = (*seqp)->ucs4;
1439         }
1440     }
1441   else
1442     return 1;
1443
1444   return 0;
1445 }
1446
1447
1448 /* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and
1449    the .(2). counterparts.  */
1450 static void
1451 charclass_symbolic_ellipsis (struct linereader *ldfile,
1452                              struct locale_ctype_t *ctype,
1453                              const struct charmap_t *charmap,
1454                              struct repertoire_t *repertoire,
1455                              struct token *now,
1456                              const char *last_str,
1457                              unsigned long int class256_bit,
1458                              unsigned long int class_bit, int base,
1459                              int ignore_content, int handle_digits, int step)
1460 {
1461   const char *nowstr = now->val.str.startmb;
1462   char tmp[now->val.str.lenmb + 1];
1463   const char *cp;
1464   char *endp;
1465   unsigned long int from;
1466   unsigned long int to;
1467
1468   /* We have to compute the ellipsis values using the symbolic names.  */
1469   assert (last_str != NULL);
1470
1471   if (strlen (last_str) != now->val.str.lenmb)
1472     {
1473     invalid_range:
1474       lr_error (ldfile,
1475                 _("`%s' and `%.*s' are no valid names for symbolic range"),
1476                 last_str, (int) now->val.str.lenmb, nowstr);
1477       return;
1478     }
1479
1480   if (memcmp (last_str, nowstr, now->val.str.lenmb) == 0)
1481     /* Nothing to do, the names are the same.  */
1482     return;
1483
1484   for (cp = last_str; *cp == *(nowstr + (cp - last_str)); ++cp)
1485     ;
1486
1487   errno = 0;
1488   from = strtoul (cp, &endp, base);
1489   if ((from == UINT_MAX && errno == ERANGE) || *endp != '\0')
1490     goto invalid_range;
1491
1492   to = strtoul (nowstr + (cp - last_str), &endp, base);
1493   if ((to == UINT_MAX && errno == ERANGE)
1494       || (endp - nowstr) != now->val.str.lenmb || from >= to)
1495     goto invalid_range;
1496
1497   /* OK, we have a range FROM - TO.  Now we can create the symbolic names.  */
1498   if (!ignore_content)
1499     {
1500       now->val.str.startmb = tmp;
1501       while ((from += step) <= to)
1502         {
1503           struct charseq *seq;
1504           uint32_t wch;
1505
1506           sprintf (tmp, (base == 10 ? "%.*s%0*ld" : "%.*s%0*lX"),
1507                    (int) (cp - last_str), last_str,
1508                    (int) (now->val.str.lenmb - (cp - last_str)),
1509                    from);
1510
1511           get_character (now, charmap, repertoire, &seq, &wch);
1512
1513           if (seq != NULL && seq->nbytes == 1)
1514             /* Yep, we can store information about this byte sequence.  */
1515             ctype->class256_collection[seq->bytes[0]] |= class256_bit;
1516
1517           if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1518             /* We have the UCS4 position.  */
1519             *find_idx (ctype, &ctype->class_collection,
1520                        &ctype->class_collection_max,
1521                        &ctype->class_collection_act, wch) |= class_bit;
1522
1523           if (handle_digits == 1)
1524             {
1525               /* We must store the digit values.  */
1526               if (ctype->mbdigits_act == ctype->mbdigits_max)
1527                 {
1528                   ctype->mbdigits_max *= 2;
1529                   ctype->mbdigits = xrealloc (ctype->mbdigits,
1530                                               (ctype->mbdigits_max
1531                                                * sizeof (char *)));
1532                   ctype->wcdigits_max *= 2;
1533                   ctype->wcdigits = xrealloc (ctype->wcdigits,
1534                                               (ctype->wcdigits_max
1535                                                * sizeof (uint32_t)));
1536                 }
1537
1538               ctype->mbdigits[ctype->mbdigits_act++] = seq;
1539               ctype->wcdigits[ctype->wcdigits_act++] = wch;
1540             }
1541           else if (handle_digits == 2)
1542             {
1543               /* We must store the digit values.  */
1544               if (ctype->outdigits_act >= 10)
1545                 {
1546                   lr_error (ldfile, _("\
1547 %s: field `%s' does not contain exactly ten entries"),
1548                             "LC_CTYPE", "outdigit");
1549                   return;
1550                 }
1551
1552               ctype->mboutdigits[ctype->outdigits_act] = seq;
1553               ctype->wcoutdigits[ctype->outdigits_act] = wch;
1554               ++ctype->outdigits_act;
1555             }
1556         }
1557     }
1558 }
1559
1560
1561 /* Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'.  */
1562 static void
1563 charclass_ucs4_ellipsis (struct linereader *ldfile,
1564                          struct locale_ctype_t *ctype,
1565                          const struct charmap_t *charmap,
1566                          struct repertoire_t *repertoire,
1567                          struct token *now, uint32_t last_wch,
1568                          unsigned long int class256_bit,
1569                          unsigned long int class_bit, int ignore_content,
1570                          int handle_digits, int step)
1571 {
1572   if (last_wch > now->val.ucs4)
1573     {
1574       lr_error (ldfile, _("\
1575 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1576                 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, now->val.ucs4,
1577                 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, last_wch);
1578       return;
1579     }
1580
1581   if (!ignore_content)
1582     while ((last_wch += step) <= now->val.ucs4)
1583       {
1584         /* We have to find out whether there is a byte sequence corresponding
1585            to this UCS4 value.  */
1586         struct charseq *seq;
1587         char utmp[10];
1588
1589         snprintf (utmp, sizeof (utmp), "U%08X", last_wch);
1590         seq = charmap_find_value (charmap, utmp, 9);
1591         if (seq == NULL)
1592           {
1593             snprintf (utmp, sizeof (utmp), "U%04X", last_wch);
1594             seq = charmap_find_value (charmap, utmp, 5);
1595           }
1596
1597         if (seq == NULL)
1598           /* Try looking in the repertoire map.  */
1599           seq = repertoire_find_seq (repertoire, last_wch);
1600
1601         /* If this is the first time we look for this sequence create a new
1602            entry.  */
1603         if (seq == NULL)
1604           {
1605             static const struct charseq negative
1606               = { .ucs4 = ILLEGAL_CHAR_VALUE };
1607
1608             /* Find the symbolic name for this UCS4 value.  */
1609             if (repertoire != NULL)
1610               {
1611                 const char *symbol = repertoire_find_symbol (repertoire,
1612                                                              last_wch);
1613                 uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1614                                                 sizeof (uint32_t));
1615                 *newp = last_wch;
1616
1617                 if (symbol != NULL)
1618                   /* We have a name, now search the multibyte value.  */
1619                   seq = charmap_find_value (charmap, symbol, strlen (symbol));
1620
1621                 if (seq == NULL)
1622                   /* We have to create a fake entry.  */
1623                   seq = (struct charseq *) &negative;
1624                 else
1625                   seq->ucs4 = last_wch;
1626
1627                 insert_entry (&repertoire->seq_table, newp, sizeof (uint32_t),
1628                               seq);
1629               }
1630             else
1631               /* We have to create a fake entry.  */
1632               seq = (struct charseq *) &negative;
1633           }
1634
1635         /* We have a name, now search the multibyte value.  */
1636         if (seq->ucs4 == last_wch && seq->nbytes == 1)
1637           /* Yep, we can store information about this byte sequence.  */
1638           ctype->class256_collection[(size_t) seq->bytes[0]]
1639             |= class256_bit;
1640
1641         /* And of course we have the UCS4 position.  */
1642         if (class_bit != 0)
1643           *find_idx (ctype, &ctype->class_collection,
1644                      &ctype->class_collection_max,
1645                      &ctype->class_collection_act, last_wch) |= class_bit;
1646
1647         if (handle_digits == 1)
1648           {
1649             /* We must store the digit values.  */
1650             if (ctype->mbdigits_act == ctype->mbdigits_max)
1651               {
1652                 ctype->mbdigits_max *= 2;
1653                 ctype->mbdigits = xrealloc (ctype->mbdigits,
1654                                             (ctype->mbdigits_max
1655                                              * sizeof (char *)));
1656                 ctype->wcdigits_max *= 2;
1657                 ctype->wcdigits = xrealloc (ctype->wcdigits,
1658                                             (ctype->wcdigits_max
1659                                              * sizeof (uint32_t)));
1660               }
1661
1662             ctype->mbdigits[ctype->mbdigits_act++] = (seq->ucs4 == last_wch
1663                                                       ? seq : NULL);
1664             ctype->wcdigits[ctype->wcdigits_act++] = last_wch;
1665           }
1666         else if (handle_digits == 2)
1667           {
1668             /* We must store the digit values.  */
1669             if (ctype->outdigits_act >= 10)
1670               {
1671                 lr_error (ldfile, _("\
1672 %s: field `%s' does not contain exactly ten entries"),
1673                           "LC_CTYPE", "outdigit");
1674                 return;
1675               }
1676
1677             ctype->mboutdigits[ctype->outdigits_act] = (seq->ucs4 == last_wch
1678                                                         ? seq : NULL);
1679             ctype->wcoutdigits[ctype->outdigits_act] = last_wch;
1680             ++ctype->outdigits_act;
1681           }
1682       }
1683 }
1684
1685
1686 /* Ellipsis as in `/xea/x12.../xea/x34'.  */
1687 static void
1688 charclass_charcode_ellipsis (struct linereader *ldfile,
1689                              struct locale_ctype_t *ctype,
1690                              const struct charmap_t *charmap,
1691                              struct repertoire_t *repertoire,
1692                              struct token *now, char *last_charcode,
1693                              uint32_t last_charcode_len,
1694                              unsigned long int class256_bit,
1695                              unsigned long int class_bit, int ignore_content,
1696                              int handle_digits)
1697 {
1698   /* First check whether the to-value is larger.  */
1699   if (now->val.charcode.nbytes != last_charcode_len)
1700     {
1701       lr_error (ldfile, _("\
1702 start and end character sequence of range must have the same length"));
1703       return;
1704     }
1705
1706   if (memcmp (last_charcode, now->val.charcode.bytes, last_charcode_len) > 0)
1707     {
1708       lr_error (ldfile, _("\
1709 to-value character sequence is smaller than from-value sequence"));
1710       return;
1711     }
1712
1713   if (!ignore_content)
1714     {
1715       do
1716         {
1717           /* Increment the byte sequence value.  */
1718           struct charseq *seq;
1719           uint32_t wch;
1720           int i;
1721
1722           for (i = last_charcode_len - 1; i >= 0; --i)
1723             if (++last_charcode[i] != 0)
1724               break;
1725
1726           if (last_charcode_len == 1)
1727             /* Of course we have the charcode value.  */
1728             ctype->class256_collection[(size_t) last_charcode[0]]
1729               |= class256_bit;
1730
1731           /* Find the symbolic name.  */
1732           seq = charmap_find_symbol (charmap, last_charcode,
1733                                      last_charcode_len);
1734           if (seq != NULL)
1735             {
1736               if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1737                 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1738                                                    strlen (seq->name));
1739               wch = seq == NULL ? ILLEGAL_CHAR_VALUE : seq->ucs4;
1740
1741               if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1742                 *find_idx (ctype, &ctype->class_collection,
1743                            &ctype->class_collection_max,
1744                            &ctype->class_collection_act, wch) |= class_bit;
1745             }
1746           else
1747             wch = ILLEGAL_CHAR_VALUE;
1748
1749           if (handle_digits == 1)
1750             {
1751               /* We must store the digit values.  */
1752               if (ctype->mbdigits_act == ctype->mbdigits_max)
1753                 {
1754                   ctype->mbdigits_max *= 2;
1755                   ctype->mbdigits = xrealloc (ctype->mbdigits,
1756                                               (ctype->mbdigits_max
1757                                                * sizeof (char *)));
1758                   ctype->wcdigits_max *= 2;
1759                   ctype->wcdigits = xrealloc (ctype->wcdigits,
1760                                               (ctype->wcdigits_max
1761                                                * sizeof (uint32_t)));
1762                 }
1763
1764               seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1765               memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1766               seq->nbytes = last_charcode_len;
1767
1768               ctype->mbdigits[ctype->mbdigits_act++] = seq;
1769               ctype->wcdigits[ctype->wcdigits_act++] = wch;
1770             }
1771           else if (handle_digits == 2)
1772             {
1773               struct charseq *seq;
1774               /* We must store the digit values.  */
1775               if (ctype->outdigits_act >= 10)
1776                 {
1777                   lr_error (ldfile, _("\
1778 %s: field `%s' does not contain exactly ten entries"),
1779                             "LC_CTYPE", "outdigit");
1780                   return;
1781                 }
1782
1783               seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1784               memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1785               seq->nbytes = last_charcode_len;
1786
1787               ctype->mboutdigits[ctype->outdigits_act] = seq;
1788               ctype->wcoutdigits[ctype->outdigits_act] = wch;
1789               ++ctype->outdigits_act;
1790             }
1791         }
1792       while (memcmp (last_charcode, now->val.charcode.bytes,
1793                      last_charcode_len) != 0);
1794     }
1795 }
1796
1797
1798 static uint32_t *
1799 find_translit2 (struct locale_ctype_t *ctype, const struct charmap_t *charmap,
1800                 uint32_t wch)
1801 {
1802   struct translit_t *trunp = ctype->translit;
1803   struct translit_ignore_t *tirunp = ctype->translit_ignore;
1804
1805   while (trunp != NULL)
1806     {
1807       /* XXX We simplify things here.  The transliterations we look
1808          for are only allowed to have one character.  */
1809       if (trunp->from[0] == wch && trunp->from[1] == 0)
1810         {
1811           /* Found it.  Now look for a transliteration which can be
1812              represented with the character set.  */
1813           struct translit_to_t *torunp = trunp->to;
1814
1815           while (torunp != NULL)
1816             {
1817               int i;
1818
1819               for (i = 0; torunp->str[i] != 0; ++i)
1820                 {
1821                   char utmp[10];
1822
1823                   snprintf (utmp, sizeof (utmp), "U%08X", torunp->str[i]);
1824                   if (charmap_find_value (charmap, utmp, 9) == NULL)
1825                     /* This character cannot be represented.  */
1826                     break;
1827                 }
1828
1829               if (torunp->str[i] == 0)
1830                 return torunp->str;
1831
1832               torunp = torunp->next;
1833             }
1834
1835           break;
1836         }
1837
1838       trunp = trunp->next;
1839     }
1840
1841   /* Check for ignored chars.  */
1842   while (tirunp != NULL)
1843     {
1844       if (tirunp->from <= wch && tirunp->to >= wch)
1845         {
1846           uint32_t wi;
1847
1848           for (wi = tirunp->from; wi <= wch; wi += tirunp->step)
1849             if (wi == wch)
1850               return (uint32_t []) { 0 };
1851         }
1852     }
1853
1854   /* Nothing found.  */
1855   return NULL;
1856 }
1857
1858
1859 uint32_t *
1860 find_translit (struct localedef_t *locale, const struct charmap_t *charmap,
1861                uint32_t wch)
1862 {
1863   struct locale_ctype_t *ctype;
1864   uint32_t *result = NULL;
1865
1866   assert (locale != NULL);
1867   ctype = locale->categories[LC_CTYPE].ctype;
1868
1869   if (ctype->translit != NULL)
1870     result = find_translit2 (ctype, charmap, wch);
1871
1872   if (result == NULL)
1873     {
1874       struct translit_include_t *irunp = ctype->translit_include;
1875
1876       while (irunp != NULL && result == NULL)
1877         {
1878           result = find_translit (find_locale (CTYPE_LOCALE,
1879                                                irunp->copy_locale,
1880                                                irunp->copy_repertoire,
1881                                                charmap),
1882                                   charmap, wch);
1883           irunp = irunp->next;
1884         }
1885     }
1886
1887   return result;
1888 }
1889
1890
1891 /* Read one transliteration entry.  */
1892 static uint32_t *
1893 read_widestring (struct linereader *ldfile, struct token *now,
1894                  const struct charmap_t *charmap,
1895                  struct repertoire_t *repertoire)
1896 {
1897   uint32_t *wstr;
1898
1899   if (now->tok == tok_default_missing)
1900     /* The special name "" will denote this case.  */
1901     wstr = ((uint32_t *) { 0 });
1902   else if (now->tok == tok_bsymbol)
1903     {
1904       /* Get the value from the repertoire.  */
1905       wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1906       wstr[0] = repertoire_find_value (repertoire, now->val.str.startmb,
1907                                        now->val.str.lenmb);
1908       if (wstr[0] == ILLEGAL_CHAR_VALUE)
1909         {
1910           /* We cannot proceed, we don't know the UCS4 value.  */
1911           free (wstr);
1912           return NULL;
1913         }
1914
1915       wstr[1] = 0;
1916     }
1917   else if (now->tok == tok_ucs4)
1918     {
1919       wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1920       wstr[0] = now->val.ucs4;
1921       wstr[1] = 0;
1922     }
1923   else if (now->tok == tok_charcode)
1924     {
1925       /* Argh, we have to convert to the symbol name first and then to the
1926          UCS4 value.  */
1927       struct charseq *seq = charmap_find_symbol (charmap,
1928                                                  now->val.str.startmb,
1929                                                  now->val.str.lenmb);
1930       if (seq == NULL)
1931         /* Cannot find the UCS4 value.  */
1932         return NULL;
1933
1934       if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1935         seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1936                                            strlen (seq->name));
1937       if (seq->ucs4 == ILLEGAL_CHAR_VALUE)
1938         /* We cannot proceed, we don't know the UCS4 value.  */
1939         return NULL;
1940
1941       wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1942       wstr[0] = seq->ucs4;
1943       wstr[1] = 0;
1944     }
1945   else if (now->tok == tok_string)
1946     {
1947       wstr = now->val.str.startwc;
1948       if (wstr == NULL || wstr[0] == 0)
1949         return NULL;
1950     }
1951   else
1952     {
1953       if (now->tok != tok_eol && now->tok != tok_eof)
1954         lr_ignore_rest (ldfile, 0);
1955       SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
1956       return (uint32_t *) -1l;
1957     }
1958
1959   return wstr;
1960 }
1961
1962
1963 static void
1964 read_translit_entry (struct linereader *ldfile, struct locale_ctype_t *ctype,
1965                      struct token *now, const struct charmap_t *charmap,
1966                      struct repertoire_t *repertoire)
1967 {
1968   uint32_t *from_wstr = read_widestring (ldfile, now, charmap, repertoire);
1969   struct translit_t *result;
1970   struct translit_to_t **top;
1971   struct obstack *ob = &ctype->mempool;
1972   int first;
1973   int ignore;
1974
1975   if (from_wstr == NULL)
1976     /* There is no valid from string.  */
1977     return;
1978
1979   result = (struct translit_t *) obstack_alloc (ob,
1980                                                 sizeof (struct translit_t));
1981   result->from = from_wstr;
1982   result->fname = ldfile->fname;
1983   result->lineno = ldfile->lineno;
1984   result->next = NULL;
1985   result->to = NULL;
1986   top = &result->to;
1987   first = 1;
1988   ignore = 0;
1989
1990   while (1)
1991     {
1992       uint32_t *to_wstr;
1993
1994       /* Next we have one or more transliterations.  They are
1995          separated by semicolons.  */
1996       now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
1997
1998       if (!first && (now->tok == tok_semicolon || now->tok == tok_eol))
1999         {
2000           /* One string read.  */
2001           const uint32_t zero = 0;
2002
2003           if (!ignore)
2004             {
2005               obstack_grow (ob, &zero, 4);
2006               to_wstr = obstack_finish (ob);
2007
2008               *top = obstack_alloc (ob, sizeof (struct translit_to_t));
2009               (*top)->str = to_wstr;
2010               (*top)->next = NULL;
2011             }
2012
2013           if (now->tok == tok_eol)
2014             {
2015               result->next = ctype->translit;
2016               ctype->translit = result;
2017               return;
2018             }
2019
2020           if (!ignore)
2021             top = &(*top)->next;
2022           ignore = 0;
2023         }
2024       else
2025         {
2026           to_wstr = read_widestring (ldfile, now, charmap, repertoire);
2027           if (to_wstr == (uint32_t *) -1l)
2028             {
2029               /* An error occurred.  */
2030               obstack_free (ob, result);
2031               return;
2032             }
2033
2034           if (to_wstr == NULL)
2035             ignore = 1;
2036           else
2037             /* This value is usable.  */
2038             obstack_grow (ob, to_wstr, wcslen ((wchar_t *) to_wstr) * 4);
2039
2040           first = 0;
2041         }
2042     }
2043 }
2044
2045
2046 static void
2047 read_translit_ignore_entry (struct linereader *ldfile,
2048                             struct locale_ctype_t *ctype,
2049                             const struct charmap_t *charmap,
2050                             struct repertoire_t *repertoire)
2051 {
2052   /* We expect a semicolon-separated list of characters we ignore.  We are
2053      only interested in the wide character definitions.  These must be
2054      single characters, possibly defining a range when an ellipsis is used.  */
2055   while (1)
2056     {
2057       struct token *now = lr_token (ldfile, charmap, NULL, repertoire,
2058                                     verbose);
2059       struct translit_ignore_t *newp;
2060       uint32_t from;
2061
2062       if (now->tok == tok_eol || now->tok == tok_eof)
2063         {
2064           lr_error (ldfile,
2065                     _("premature end of `translit_ignore' definition"));
2066           return;
2067         }
2068
2069       if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
2070         {
2071           lr_error (ldfile, _("syntax error"));
2072           lr_ignore_rest (ldfile, 0);
2073           return;
2074         }
2075
2076       if (now->tok == tok_ucs4)
2077         from = now->val.ucs4;
2078       else
2079         /* Try to get the value.  */
2080         from = repertoire_find_value (repertoire, now->val.str.startmb,
2081                                       now->val.str.lenmb);
2082
2083       if (from == ILLEGAL_CHAR_VALUE)
2084         {
2085           lr_error (ldfile, "invalid character name");
2086           newp = NULL;
2087         }
2088       else
2089         {
2090           newp = (struct translit_ignore_t *)
2091             obstack_alloc (&ctype->mempool, sizeof (struct translit_ignore_t));
2092           newp->from = from;
2093           newp->to = from;
2094           newp->step = 1;
2095
2096           newp->next = ctype->translit_ignore;
2097           ctype->translit_ignore = newp;
2098         }
2099
2100       /* Now we expect either a semicolon, an ellipsis, or the end of the
2101          line.  */
2102       now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2103
2104       if (now->tok == tok_ellipsis2 || now->tok == tok_ellipsis2_2)
2105         {
2106           /* XXX Should we bother implementing `....'?  `...' certainly
2107              will not be implemented.  */
2108           uint32_t to;
2109           int step = now->tok == tok_ellipsis2_2 ? 2 : 1;
2110
2111           now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2112
2113           if (now->tok == tok_eol || now->tok == tok_eof)
2114             {
2115               lr_error (ldfile,
2116                         _("premature end of `translit_ignore' definition"));
2117               return;
2118             }
2119
2120           if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
2121             {
2122               lr_error (ldfile, _("syntax error"));
2123               lr_ignore_rest (ldfile, 0);
2124               return;
2125             }
2126
2127           if (now->tok == tok_ucs4)
2128             to = now->val.ucs4;
2129           else
2130             /* Try to get the value.  */
2131             to = repertoire_find_value (repertoire, now->val.str.startmb,
2132                                         now->val.str.lenmb);
2133
2134           if (to == ILLEGAL_CHAR_VALUE)
2135             lr_error (ldfile, "invalid character name");
2136           else
2137             {
2138               /* Make sure the `to'-value is larger.  */
2139               if (to >= from)
2140                 {
2141                   newp->to = to;
2142                   newp->step = step;
2143                 }
2144               else
2145                 lr_error (ldfile, _("\
2146 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
2147                           (to | from) < 65536 ? 4 : 8, to,
2148                           (to | from) < 65536 ? 4 : 8, from);
2149             }
2150
2151           /* And the next token.  */
2152           now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2153         }
2154
2155       if (now->tok == tok_eol || now->tok == tok_eof)
2156         /* We are done.  */
2157         return;
2158
2159       if (now->tok == tok_semicolon)
2160         /* Next round.  */
2161         continue;
2162
2163       /* If we come here something is wrong.  */
2164       lr_error (ldfile, _("syntax error"));
2165       lr_ignore_rest (ldfile, 0);
2166       return;
2167     }
2168 }
2169
2170
2171 /* The parser for the LC_CTYPE section of the locale definition.  */
2172 void
2173 ctype_read (struct linereader *ldfile, struct localedef_t *result,
2174             const struct charmap_t *charmap, const char *repertoire_name,
2175             int ignore_content)
2176 {
2177   struct repertoire_t *repertoire = NULL;
2178   struct locale_ctype_t *ctype;
2179   struct token *now;
2180   enum token_t nowtok;
2181   size_t cnt;
2182   struct charseq *last_seq;
2183   uint32_t last_wch = 0;
2184   enum token_t last_token;
2185   enum token_t ellipsis_token;
2186   int step;
2187   char last_charcode[16];
2188   size_t last_charcode_len = 0;
2189   const char *last_str = NULL;
2190   int mapidx;
2191   struct localedef_t *copy_locale = NULL;
2192
2193   /* Get the repertoire we have to use.  */
2194   if (repertoire_name != NULL)
2195     repertoire = repertoire_read (repertoire_name);
2196
2197   /* The rest of the line containing `LC_CTYPE' must be free.  */
2198   lr_ignore_rest (ldfile, 1);
2199
2200
2201   do
2202     {
2203       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2204       nowtok = now->tok;
2205     }
2206   while (nowtok == tok_eol);
2207
2208   /* If we see `copy' now we are almost done.  */
2209   if (nowtok == tok_copy)
2210     {
2211       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2212       if (now->tok != tok_string)
2213         {
2214           SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2215
2216         skip_category:
2217           do
2218             now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2219           while (now->tok != tok_eof && now->tok != tok_end);
2220
2221           if (now->tok != tok_eof
2222               || (now = lr_token (ldfile, charmap, NULL, NULL, verbose),
2223                   now->tok == tok_eof))
2224             lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
2225           else if (now->tok != tok_lc_ctype)
2226             {
2227               lr_error (ldfile, _("\
2228 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2229               lr_ignore_rest (ldfile, 0);
2230             }
2231           else
2232             lr_ignore_rest (ldfile, 1);
2233
2234           return;
2235         }
2236
2237       if (! ignore_content)
2238         {
2239           /* Get the locale definition.  */
2240           copy_locale = load_locale (LC_CTYPE, now->val.str.startmb,
2241                                      repertoire_name, charmap, NULL);
2242           if ((copy_locale->avail & CTYPE_LOCALE) == 0)
2243             {
2244               /* Not yet loaded.  So do it now.  */
2245               if (locfile_read (copy_locale, charmap) != 0)
2246                 goto skip_category;
2247             }
2248         }
2249
2250       lr_ignore_rest (ldfile, 1);
2251
2252       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2253       nowtok = now->tok;
2254     }
2255
2256   /* Prepare the data structures.  */
2257   ctype_startup (ldfile, result, charmap, copy_locale, ignore_content);
2258   ctype = result->categories[LC_CTYPE].ctype;
2259
2260   /* Remember the repertoire we use.  */
2261   if (!ignore_content)
2262     ctype->repertoire = repertoire;
2263
2264   while (1)
2265     {
2266       unsigned long int class_bit = 0;
2267       unsigned long int class256_bit = 0;
2268       int handle_digits = 0;
2269
2270       /* Of course we don't proceed beyond the end of file.  */
2271       if (nowtok == tok_eof)
2272         break;
2273
2274       /* Ingore empty lines.  */
2275       if (nowtok == tok_eol)
2276         {
2277           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2278           nowtok = now->tok;
2279           continue;
2280         }
2281
2282       switch (nowtok)
2283         {
2284         case tok_charclass:
2285           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2286           while (now->tok == tok_ident || now->tok == tok_string)
2287             {
2288               ctype_class_new (ldfile, ctype, now->val.str.startmb);
2289               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2290               if (now->tok != tok_semicolon)
2291                 break;
2292               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2293             }
2294           if (now->tok != tok_eol)
2295             SYNTAX_ERROR (_("\
2296 %s: syntax error in definition of new character class"), "LC_CTYPE");
2297           break;
2298
2299         case tok_charconv:
2300           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2301           while (now->tok == tok_ident || now->tok == tok_string)
2302             {
2303               ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2304               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2305               if (now->tok != tok_semicolon)
2306                 break;
2307               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2308             }
2309           if (now->tok != tok_eol)
2310             SYNTAX_ERROR (_("\
2311 %s: syntax error in definition of new character map"), "LC_CTYPE");
2312           break;
2313
2314         case tok_class:
2315           /* Ignore the rest of the line if we don't need the input of
2316              this line.  */
2317           if (ignore_content)
2318             {
2319               lr_ignore_rest (ldfile, 0);
2320               break;
2321             }
2322
2323           /* We simply forget the `class' keyword and use the following
2324              operand to determine the bit.  */
2325           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2326           if (now->tok == tok_ident || now->tok == tok_string)
2327             {
2328               /* Must can be one of the predefined class names.  */
2329               for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2330                 if (strcmp (ctype->classnames[cnt], now->val.str.startmb) == 0)
2331                   break;
2332               if (cnt >= ctype->nr_charclass)
2333                 {
2334 #ifdef PREDEFINED_CLASSES
2335                   if (now->val.str.lenmb == 8
2336                       && memcmp ("special1", now->val.str.startmb, 8) == 0)
2337                     class_bit = _ISwspecial1;
2338                   else if (now->val.str.lenmb == 8
2339                       && memcmp ("special2", now->val.str.startmb, 8) == 0)
2340                     class_bit = _ISwspecial2;
2341                   else if (now->val.str.lenmb == 8
2342                       && memcmp ("special3", now->val.str.startmb, 8) == 0)
2343                     class_bit = _ISwspecial3;
2344                   else
2345 #endif
2346                     {
2347                       /* OK, it's a new class.  */
2348                       ctype_class_new (ldfile, ctype, now->val.str.startmb);
2349
2350                       class_bit = _ISwbit (ctype->nr_charclass - 1);
2351                     }
2352                 }
2353               else
2354                 {
2355                   class_bit = _ISwbit (cnt);
2356
2357                   free (now->val.str.startmb);
2358                 }
2359             }
2360           else if (now->tok == tok_digit)
2361             goto handle_tok_digit;
2362           else if (now->tok < tok_upper || now->tok > tok_blank)
2363             goto err_label;
2364           else
2365             {
2366               class_bit = BITw (now->tok);
2367               class256_bit = BIT (now->tok);
2368             }
2369
2370           /* The next character must be a semicolon.  */
2371           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2372           if (now->tok != tok_semicolon)
2373             goto err_label;
2374           goto read_charclass;
2375
2376         case tok_upper:
2377         case tok_lower:
2378         case tok_alpha:
2379         case tok_alnum:
2380         case tok_space:
2381         case tok_cntrl:
2382         case tok_punct:
2383         case tok_graph:
2384         case tok_print:
2385         case tok_xdigit:
2386         case tok_blank:
2387           /* Ignore the rest of the line if we don't need the input of
2388              this line.  */
2389           if (ignore_content)
2390             {
2391               lr_ignore_rest (ldfile, 0);
2392               break;
2393             }
2394
2395           class_bit = BITw (now->tok);
2396           class256_bit = BIT (now->tok);
2397           handle_digits = 0;
2398         read_charclass:
2399           ctype->class_done |= class_bit;
2400           last_token = tok_none;
2401           ellipsis_token = tok_none;
2402           step = 1;
2403           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2404           while (now->tok != tok_eol && now->tok != tok_eof)
2405             {
2406               uint32_t wch;
2407               struct charseq *seq;
2408
2409               if (ellipsis_token == tok_none)
2410                 {
2411                   if (get_character (now, charmap, repertoire, &seq, &wch))
2412                     goto err_label;
2413
2414                   if (!ignore_content && seq != NULL && seq->nbytes == 1)
2415                     /* Yep, we can store information about this byte
2416                        sequence.  */
2417                     ctype->class256_collection[seq->bytes[0]] |= class256_bit;
2418
2419                   if (!ignore_content && wch != ILLEGAL_CHAR_VALUE
2420                       && class_bit != 0)
2421                     /* We have the UCS4 position.  */
2422                     *find_idx (ctype, &ctype->class_collection,
2423                                &ctype->class_collection_max,
2424                                &ctype->class_collection_act, wch) |= class_bit;
2425
2426                   last_token = now->tok;
2427                   /* Terminate the string.  */
2428                   if (last_token == tok_bsymbol)
2429                     {
2430                       now->val.str.startmb[now->val.str.lenmb] = '\0';
2431                       last_str = now->val.str.startmb;
2432                     }
2433                   else
2434                     last_str = NULL;
2435                   last_seq = seq;
2436                   last_wch = wch;
2437                   memcpy (last_charcode, now->val.charcode.bytes, 16);
2438                   last_charcode_len = now->val.charcode.nbytes;
2439
2440                   if (!ignore_content && handle_digits == 1)
2441                     {
2442                       /* We must store the digit values.  */
2443                       if (ctype->mbdigits_act == ctype->mbdigits_max)
2444                         {
2445                           ctype->mbdigits_max += 10;
2446                           ctype->mbdigits = xrealloc (ctype->mbdigits,
2447                                                       (ctype->mbdigits_max
2448                                                        * sizeof (char *)));
2449                           ctype->wcdigits_max += 10;
2450                           ctype->wcdigits = xrealloc (ctype->wcdigits,
2451                                                       (ctype->wcdigits_max
2452                                                        * sizeof (uint32_t)));
2453                         }
2454
2455                       ctype->mbdigits[ctype->mbdigits_act++] = seq;
2456                       ctype->wcdigits[ctype->wcdigits_act++] = wch;
2457                     }
2458                   else if (!ignore_content && handle_digits == 2)
2459                     {
2460                       /* We must store the digit values.  */
2461                       if (ctype->outdigits_act >= 10)
2462                         {
2463                           lr_error (ldfile, _("\
2464 %s: field `%s' does not contain exactly ten entries"),
2465                             "LC_CTYPE", "outdigit");
2466                           lr_ignore_rest (ldfile, 0);
2467                           break;
2468                         }
2469
2470                       ctype->mboutdigits[ctype->outdigits_act] = seq;
2471                       ctype->wcoutdigits[ctype->outdigits_act] = wch;
2472                       ++ctype->outdigits_act;
2473                     }
2474                 }
2475               else
2476                 {
2477                   /* Now it gets complicated.  We have to resolve the
2478                      ellipsis problem.  First we must distinguish between
2479                      the different kind of ellipsis and this must match the
2480                      tokens we have seen.  */
2481                   assert (last_token != tok_none);
2482
2483                   if (last_token != now->tok)
2484                     {
2485                       lr_error (ldfile, _("\
2486 ellipsis range must be marked by two operands of same type"));
2487                       lr_ignore_rest (ldfile, 0);
2488                       break;
2489                     }
2490
2491                   if (last_token == tok_bsymbol)
2492                     {
2493                       if (ellipsis_token == tok_ellipsis3)
2494                         lr_error (ldfile, _("with symbolic name range values \
2495 the absolute ellipsis `...' must not be used"));
2496
2497                       charclass_symbolic_ellipsis (ldfile, ctype, charmap,
2498                                                    repertoire, now, last_str,
2499                                                    class256_bit, class_bit,
2500                                                    (ellipsis_token
2501                                                     == tok_ellipsis4
2502                                                     ? 10 : 16),
2503                                                    ignore_content,
2504                                                    handle_digits, step);
2505                     }
2506                   else if (last_token == tok_ucs4)
2507                     {
2508                       if (ellipsis_token != tok_ellipsis2)
2509                         lr_error (ldfile, _("\
2510 with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
2511
2512                       charclass_ucs4_ellipsis (ldfile, ctype, charmap,
2513                                                repertoire, now, last_wch,
2514                                                class256_bit, class_bit,
2515                                                ignore_content, handle_digits,
2516                                                step);
2517                     }
2518                   else
2519                     {
2520                       assert (last_token == tok_charcode);
2521
2522                       if (ellipsis_token != tok_ellipsis3)
2523                         lr_error (ldfile, _("\
2524 with character code range values one must use the absolute ellipsis `...'"));
2525
2526                       charclass_charcode_ellipsis (ldfile, ctype, charmap,
2527                                                    repertoire, now,
2528                                                    last_charcode,
2529                                                    last_charcode_len,
2530                                                    class256_bit, class_bit,
2531                                                    ignore_content,
2532                                                    handle_digits);
2533                     }
2534
2535                   /* Now we have used the last value.  */
2536                   last_token = tok_none;
2537                 }
2538
2539               /* Next we expect a semicolon or the end of the line.  */
2540               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2541               if (now->tok == tok_eol || now->tok == tok_eof)
2542                 break;
2543
2544               if (last_token != tok_none
2545                   && now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4_2)
2546                 {
2547                   if (now->tok == tok_ellipsis2_2)
2548                     {
2549                       now->tok = tok_ellipsis2;
2550                       step = 2;
2551                     }
2552                   else if (now->tok == tok_ellipsis4_2)
2553                     {
2554                       now->tok = tok_ellipsis4;
2555                       step = 2;
2556                     }
2557
2558                   ellipsis_token = now->tok;
2559
2560                   now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2561                   continue;
2562                 }
2563
2564               if (now->tok != tok_semicolon)
2565                 goto err_label;
2566
2567               /* And get the next character.  */
2568               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2569
2570               ellipsis_token = tok_none;
2571               step = 1;
2572             }
2573           break;
2574
2575         case tok_digit:
2576           /* Ignore the rest of the line if we don't need the input of
2577              this line.  */
2578           if (ignore_content)
2579             {
2580               lr_ignore_rest (ldfile, 0);
2581               break;
2582             }
2583
2584         handle_tok_digit:
2585           class_bit = _ISwdigit;
2586           class256_bit = _ISdigit;
2587           handle_digits = 1;
2588           goto read_charclass;
2589
2590         case tok_outdigit:
2591           /* Ignore the rest of the line if we don't need the input of
2592              this line.  */
2593           if (ignore_content)
2594             {
2595               lr_ignore_rest (ldfile, 0);
2596               break;
2597             }
2598
2599           if (ctype->outdigits_act != 0)
2600             lr_error (ldfile, _("\
2601 %s: field `%s' declared more than once"),
2602                       "LC_CTYPE", "outdigit");
2603           class_bit = 0;
2604           class256_bit = 0;
2605           handle_digits = 2;
2606           goto read_charclass;
2607
2608         case tok_toupper:
2609           /* Ignore the rest of the line if we don't need the input of
2610              this line.  */
2611           if (ignore_content)
2612             {
2613               lr_ignore_rest (ldfile, 0);
2614               break;
2615             }
2616
2617           mapidx = 0;
2618           goto read_mapping;
2619
2620         case tok_tolower:
2621           /* Ignore the rest of the line if we don't need the input of
2622              this line.  */
2623           if (ignore_content)
2624             {
2625               lr_ignore_rest (ldfile, 0);
2626               break;
2627             }
2628
2629           mapidx = 1;
2630           goto read_mapping;
2631
2632         case tok_map:
2633           /* Ignore the rest of the line if we don't need the input of
2634              this line.  */
2635           if (ignore_content)
2636             {
2637               lr_ignore_rest (ldfile, 0);
2638               break;
2639             }
2640
2641           /* We simply forget the `map' keyword and use the following
2642              operand to determine the mapping.  */
2643           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2644           if (now->tok == tok_ident || now->tok == tok_string)
2645             {
2646               size_t cnt;
2647
2648               for (cnt = 2; cnt < ctype->map_collection_nr; ++cnt)
2649                 if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2650                   break;
2651
2652               if (cnt < ctype->map_collection_nr)
2653                 free (now->val.str.startmb);
2654               else
2655                 /* OK, it's a new map.  */
2656                 ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2657
2658               mapidx = cnt;
2659             }
2660           else if (now->tok < tok_toupper || now->tok > tok_tolower)
2661             goto err_label;
2662           else
2663             mapidx = now->tok - tok_toupper;
2664
2665           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2666           /* This better should be a semicolon.  */
2667           if (now->tok != tok_semicolon)
2668             goto err_label;
2669
2670         read_mapping:
2671           /* Test whether this mapping was already defined.  */
2672           if (ctype->tomap_done[mapidx])
2673             {
2674               lr_error (ldfile, _("duplicated definition for mapping `%s'"),
2675                         ctype->mapnames[mapidx]);
2676               lr_ignore_rest (ldfile, 0);
2677               break;
2678             }
2679           ctype->tomap_done[mapidx] = 1;
2680
2681           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2682           while (now->tok != tok_eol && now->tok != tok_eof)
2683             {
2684               struct charseq *from_seq;
2685               uint32_t from_wch;
2686               struct charseq *to_seq;
2687               uint32_t to_wch;
2688
2689               /* Every pair starts with an opening brace.  */
2690               if (now->tok != tok_open_brace)
2691                 goto err_label;
2692
2693               /* Next comes the from-value.  */
2694               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2695               if (get_character (now, charmap, repertoire, &from_seq,
2696                                  &from_wch) != 0)
2697                 goto err_label;
2698
2699               /* The next is a comma.  */
2700               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2701               if (now->tok != tok_comma)
2702                 goto err_label;
2703
2704               /* And the other value.  */
2705               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2706               if (get_character (now, charmap, repertoire, &to_seq,
2707                                  &to_wch) != 0)
2708                 goto err_label;
2709
2710               /* And the last thing is the closing brace.  */
2711               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2712               if (now->tok != tok_close_brace)
2713                 goto err_label;
2714
2715               if (!ignore_content)
2716                 {
2717                   /* Check whether the mapping converts from an ASCII value
2718                      to a non-ASCII value.  */
2719                   if (from_seq != NULL && from_seq->nbytes == 1
2720                       && isascii (from_seq->bytes[0])
2721                       && to_seq != NULL && (to_seq->nbytes != 1
2722                                             || !isascii (to_seq->bytes[0])))
2723                     ctype->to_nonascii = 1;
2724
2725                   if (mapidx < 2 && from_seq != NULL && to_seq != NULL
2726                       && from_seq->nbytes == 1 && to_seq->nbytes == 1)
2727                     /* We can use this value.  */
2728                     ctype->map256_collection[mapidx][from_seq->bytes[0]]
2729                       = to_seq->bytes[0];
2730
2731                   if (from_wch != ILLEGAL_CHAR_VALUE
2732                       && to_wch != ILLEGAL_CHAR_VALUE)
2733                     /* Both correct values.  */
2734                     *find_idx (ctype, &ctype->map_collection[mapidx],
2735                                &ctype->map_collection_max[mapidx],
2736                                &ctype->map_collection_act[mapidx],
2737                                from_wch) = to_wch;
2738                 }
2739
2740               /* Now comes a semicolon or the end of the line/file.  */
2741               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2742               if (now->tok == tok_semicolon)
2743                 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2744             }
2745           break;
2746
2747         case tok_translit_start:
2748           /* Ignore the entire translit section with its peculiar syntax
2749              if we don't need the input.  */
2750           if (ignore_content)
2751             {
2752               do
2753                 {
2754                   lr_ignore_rest (ldfile, 0);
2755                   now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2756                 }
2757               while (now->tok != tok_translit_end && now->tok != tok_eof);
2758
2759               if (now->tok == tok_eof)
2760                 lr_error (ldfile, _(\
2761 "%s: `translit_start' section does not end with `translit_end'"),
2762                           "LC_CTYPE");
2763
2764               break;
2765             }
2766
2767           /* The rest of the line better should be empty.  */
2768           lr_ignore_rest (ldfile, 1);
2769
2770           /* We count here the number of allocated entries in the `translit'
2771              array.  */
2772           cnt = 0;
2773
2774           ldfile->translate_strings = 1;
2775           ldfile->return_widestr = 1;
2776
2777           /* We proceed until we see the `translit_end' token.  */
2778           while (now = lr_token (ldfile, charmap, NULL, repertoire, verbose),
2779                  now->tok != tok_translit_end && now->tok != tok_eof)
2780             {
2781               if (now->tok == tok_eol)
2782                 /* Ignore empty lines.  */
2783                 continue;
2784
2785               if (now->tok == tok_include)
2786                 {
2787                   /* We have to include locale.  */
2788                   const char *locale_name;
2789                   const char *repertoire_name;
2790                   struct translit_include_t *include_stmt, **include_ptr;
2791
2792                   now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2793                   /* This should be a string or an identifier.  In any
2794                      case something to name a locale.  */
2795                   if (now->tok != tok_string && now->tok != tok_ident)
2796                     {
2797                     translit_syntax:
2798                       lr_error (ldfile, _("%s: syntax error"), "LC_CTYPE");
2799                       lr_ignore_rest (ldfile, 0);
2800                       continue;
2801                     }
2802                   locale_name = now->val.str.startmb;
2803
2804                   /* Next should be a semicolon.  */
2805                   now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2806                   if (now->tok != tok_semicolon)
2807                     goto translit_syntax;
2808
2809                   /* Now the repertoire name.  */
2810                   now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2811                   if ((now->tok != tok_string && now->tok != tok_ident)
2812                       || now->val.str.startmb == NULL)
2813                     goto translit_syntax;
2814                   repertoire_name = now->val.str.startmb;
2815                   if (repertoire_name[0] == '\0')
2816                     /* Ignore the empty string.  */
2817                     repertoire_name = NULL;
2818
2819                   /* Save the include statement for later processing.  */
2820                   include_stmt = (struct translit_include_t *)
2821                     xmalloc (sizeof (struct translit_include_t));
2822                   include_stmt->copy_locale = locale_name;
2823                   include_stmt->copy_repertoire = repertoire_name;
2824                   include_stmt->next = NULL;
2825
2826                   include_ptr = &ctype->translit_include;
2827                   while (*include_ptr != NULL)
2828                     include_ptr = &(*include_ptr)->next;
2829                   *include_ptr = include_stmt;
2830
2831                   /* The rest of the line must be empty.  */
2832                   lr_ignore_rest (ldfile, 1);
2833
2834                   /* Make sure the locale is read.  */
2835                   add_to_readlist (LC_CTYPE, locale_name, repertoire_name,
2836                                    1, NULL);
2837                   continue;
2838                 }
2839               else if (now->tok == tok_default_missing)
2840                 {
2841                   uint32_t *wstr;
2842
2843                   while (1)
2844                     {
2845                       /* We expect a single character or string as the
2846                          argument.  */
2847                       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2848                       wstr = read_widestring (ldfile, now, charmap,
2849                                               repertoire);
2850
2851                       if (wstr != NULL)
2852                         {
2853                           if (ctype->default_missing != NULL)
2854                             {
2855                               lr_error (ldfile, _("\
2856 %s: duplicate `default_missing' definition"), "LC_CTYPE");
2857                               WITH_CUR_LOCALE (error_at_line (0, 0,
2858                                                               ctype->default_missing_file,
2859                                                               ctype->default_missing_lineno,
2860                                                               _("\
2861 previous definition was here")));
2862                             }
2863                           else
2864                             {
2865                               ctype->default_missing = wstr;
2866                               ctype->default_missing_file = ldfile->fname;
2867                               ctype->default_missing_lineno = ldfile->lineno;
2868                             }
2869                           /* We can have more entries, ignore them.  */
2870                           lr_ignore_rest (ldfile, 0);
2871                           break;
2872                         }
2873                       else if (wstr == (uint32_t *) -1l)
2874                         /* This was an syntax error.  */
2875                         break;
2876
2877                       /* Maybe there is another replacement we can use.  */
2878                       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2879                       if (now->tok == tok_eol || now->tok == tok_eof)
2880                         {
2881                           /* Nothing found.  We tell the user.  */
2882                           lr_error (ldfile, _("\
2883 %s: no representable `default_missing' definition found"), "LC_CTYPE");
2884                           break;
2885                         }
2886                       if (now->tok != tok_semicolon)
2887                         goto translit_syntax;
2888                     }
2889
2890                   continue;
2891                 }
2892               else if (now->tok == tok_translit_ignore)
2893                 {
2894                   read_translit_ignore_entry (ldfile, ctype, charmap,
2895                                               repertoire);
2896                   continue;
2897                 }
2898
2899               read_translit_entry (ldfile, ctype, now, charmap, repertoire);
2900             }
2901           ldfile->return_widestr = 0;
2902
2903           if (now->tok == tok_eof)
2904             lr_error (ldfile, _(\
2905 "%s: `translit_start' section does not end with `translit_end'"),
2906                       "LC_CTYPE");
2907
2908           break;
2909
2910         case tok_ident:
2911           /* Ignore the rest of the line if we don't need the input of
2912              this line.  */
2913           if (ignore_content)
2914             {
2915               lr_ignore_rest (ldfile, 0);
2916               break;
2917             }
2918
2919           /* This could mean one of several things.  First test whether
2920              it's a character class name.  */
2921           for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2922             if (strcmp (now->val.str.startmb, ctype->classnames[cnt]) == 0)
2923               break;
2924           if (cnt < ctype->nr_charclass)
2925             {
2926               class_bit = _ISwbit (cnt);
2927               class256_bit = cnt <= 11 ? _ISbit (cnt) : 0;
2928               free (now->val.str.startmb);
2929               goto read_charclass;
2930             }
2931           for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
2932             if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2933               break;
2934           if (cnt < ctype->map_collection_nr)
2935             {
2936               mapidx = cnt;
2937               free (now->val.str.startmb);
2938               goto read_mapping;
2939             }
2940 #ifdef PREDEFINED_CLASSES
2941           if (strcmp (now->val.str.startmb, "special1") == 0)
2942             {
2943               class_bit = _ISwspecial1;
2944               free (now->val.str.startmb);
2945               goto read_charclass;
2946             }
2947           if (strcmp (now->val.str.startmb, "special2") == 0)
2948             {
2949               class_bit = _ISwspecial2;
2950               free (now->val.str.startmb);
2951               goto read_charclass;
2952             }
2953           if (strcmp (now->val.str.startmb, "special3") == 0)
2954             {
2955               class_bit = _ISwspecial3;
2956               free (now->val.str.startmb);
2957               goto read_charclass;
2958             }
2959           if (strcmp (now->val.str.startmb, "tosymmetric") == 0)
2960             {
2961               mapidx = 2;
2962               goto read_mapping;
2963             }
2964 #endif
2965           break;
2966
2967         case tok_end:
2968           /* Next we assume `LC_CTYPE'.  */
2969           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2970           if (now->tok == tok_eof)
2971             break;
2972           if (now->tok == tok_eol)
2973             lr_error (ldfile, _("%s: incomplete `END' line"),
2974                       "LC_CTYPE");
2975           else if (now->tok != tok_lc_ctype)
2976             lr_error (ldfile, _("\
2977 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2978           lr_ignore_rest (ldfile, now->tok == tok_lc_ctype);
2979           return;
2980
2981         default:
2982         err_label:
2983           if (now->tok != tok_eof)
2984             SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2985         }
2986
2987       /* Prepare for the next round.  */
2988       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2989       nowtok = now->tok;
2990     }
2991
2992   /* When we come here we reached the end of the file.  */
2993   lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
2994 }
2995
2996
2997 static void
2998 set_class_defaults (struct locale_ctype_t *ctype,
2999                     const struct charmap_t *charmap,
3000                     struct repertoire_t *repertoire)
3001 {
3002   size_t cnt;
3003
3004   /* These function defines the default values for the classes and conversions
3005      according to POSIX.2 2.5.2.1.
3006      It may seem that the order of these if-blocks is arbitrary but it is NOT.
3007      Don't move them unless you know what you do!  */
3008
3009   auto void set_default (int bitpos, int from, int to);
3010
3011   void set_default (int bitpos, int from, int to)
3012     {
3013       char tmp[2];
3014       int ch;
3015       int bit = _ISbit (bitpos);
3016       int bitw = _ISwbit (bitpos);
3017       /* Define string.  */
3018       strcpy (tmp, "?");
3019
3020       for (ch = from; ch <= to; ++ch)
3021         {
3022           struct charseq *seq;
3023           tmp[0] = ch;
3024
3025           seq = charmap_find_value (charmap, tmp, 1);
3026           if (seq == NULL)
3027             {
3028               char buf[10];
3029               sprintf (buf, "U%08X", ch);
3030               seq = charmap_find_value (charmap, buf, 9);
3031             }
3032           if (seq == NULL)
3033             {
3034               if (!be_quiet)
3035                 WITH_CUR_LOCALE (error (0, 0, _("\
3036 %s: character `%s' not defined in charmap while needed as default value"),
3037                                         "LC_CTYPE", tmp));
3038             }
3039           else if (seq->nbytes != 1)
3040             WITH_CUR_LOCALE (error (0, 0, _("\
3041 %s: character `%s' in charmap not representable with one byte"),
3042                                     "LC_CTYPE", tmp));
3043           else
3044             ctype->class256_collection[seq->bytes[0]] |= bit;
3045
3046           /* No need to search here, the ASCII value is also the Unicode
3047              value.  */
3048           ELEM (ctype, class_collection, , ch) |= bitw;
3049         }
3050     }
3051
3052   /* Set default values if keyword was not present.  */
3053   if ((ctype->class_done & BITw (tok_upper)) == 0)
3054     /* "If this keyword [lower] is not specified, the lowercase letters
3055         `A' through `Z', ..., shall automatically belong to this class,
3056         with implementation defined character values."  [P1003.2, 2.5.2.1]  */
3057     set_default (BITPOS (tok_upper), 'A', 'Z');
3058
3059   if ((ctype->class_done & BITw (tok_lower)) == 0)
3060     /* "If this keyword [lower] is not specified, the lowercase letters
3061         `a' through `z', ..., shall automatically belong to this class,
3062         with implementation defined character values."  [P1003.2, 2.5.2.1]  */
3063     set_default (BITPOS (tok_lower), 'a', 'z');
3064
3065   if ((ctype->class_done & BITw (tok_alpha)) == 0)
3066     {
3067       /* Table 2-6 in P1003.2 says that characters in class `upper' or
3068          class `lower' *must* be in class `alpha'.  */
3069       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
3070       unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower);
3071
3072       for (cnt = 0; cnt < 256; ++cnt)
3073         if ((ctype->class256_collection[cnt] & mask) != 0)
3074           ctype->class256_collection[cnt] |= BIT (tok_alpha);
3075
3076       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3077         if ((ctype->class_collection[cnt] & maskw) != 0)
3078           ctype->class_collection[cnt] |= BITw (tok_alpha);
3079     }
3080
3081   if ((ctype->class_done & BITw (tok_digit)) == 0)
3082     /* "If this keyword [digit] is not specified, the digits `0' through
3083         `9', ..., shall automatically belong to this class, with
3084         implementation-defined character values."  [P1003.2, 2.5.2.1]  */
3085     set_default (BITPOS (tok_digit), '0', '9');
3086
3087   /* "Only characters specified for the `alpha' and `digit' keyword
3088      shall be specified.  Characters specified for the keyword `alpha'
3089      and `digit' are automatically included in this class.  */
3090   {
3091     unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
3092     unsigned long int maskw = BITw (tok_alpha) | BITw (tok_digit);
3093
3094     for (cnt = 0; cnt < 256; ++cnt)
3095       if ((ctype->class256_collection[cnt] & mask) != 0)
3096         ctype->class256_collection[cnt] |= BIT (tok_alnum);
3097
3098     for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3099       if ((ctype->class_collection[cnt] & maskw) != 0)
3100         ctype->class_collection[cnt] |= BITw (tok_alnum);
3101   }
3102
3103   if ((ctype->class_done & BITw (tok_space)) == 0)
3104     /* "If this keyword [space] is not specified, the characters <space>,
3105         <form-feed>, <newline>, <carriage-return>, <tab>, and
3106         <vertical-tab>, ..., shall automatically belong to this class,
3107         with implementation-defined character values."  [P1003.2, 2.5.2.1]  */
3108     {
3109       struct charseq *seq;
3110
3111       seq = charmap_find_value (charmap, "space", 5);
3112       if (seq == NULL)
3113         seq = charmap_find_value (charmap, "SP", 2);
3114       if (seq == NULL)
3115         seq = charmap_find_value (charmap, "U00000020", 9);
3116       if (seq == NULL)
3117         {
3118           if (!be_quiet)
3119             WITH_CUR_LOCALE (error (0, 0, _("\
3120 %s: character `%s' not defined while needed as default value"),
3121                                     "LC_CTYPE", "<space>"));
3122         }
3123       else if (seq->nbytes != 1)
3124         WITH_CUR_LOCALE (error (0, 0, _("\
3125 %s: character `%s' in charmap not representable with one byte"),
3126                                 "LC_CTYPE", "<space>"));
3127       else
3128         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3129
3130       /* No need to search.  */
3131       ELEM (ctype, class_collection, , L' ') |= BITw (tok_space);
3132
3133       seq = charmap_find_value (charmap, "form-feed", 9);
3134       if (seq == NULL)
3135         seq = charmap_find_value (charmap, "U0000000C", 9);
3136       if (seq == NULL)
3137         {
3138           if (!be_quiet)
3139             WITH_CUR_LOCALE (error (0, 0, _("\
3140 %s: character `%s' not defined while needed as default value"),
3141                                     "LC_CTYPE", "<form-feed>"));
3142         }
3143       else if (seq->nbytes != 1)
3144         WITH_CUR_LOCALE (error (0, 0, _("\
3145 %s: character `%s' in charmap not representable with one byte"),
3146                                 "LC_CTYPE", "<form-feed>"));
3147       else
3148         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3149
3150       /* No need to search.  */
3151       ELEM (ctype, class_collection, , L'\f') |= BITw (tok_space);
3152
3153
3154       seq = charmap_find_value (charmap, "newline", 7);
3155       if (seq == NULL)
3156         seq = charmap_find_value (charmap, "U0000000A", 9);
3157       if (seq == NULL)
3158         {
3159           if (!be_quiet)
3160             WITH_CUR_LOCALE (error (0, 0, _("\
3161 character `%s' not defined while needed as default value"),
3162                                     "<newline>"));
3163         }
3164       else if (seq->nbytes != 1)
3165         WITH_CUR_LOCALE (error (0, 0, _("\
3166 %s: character `%s' in charmap not representable with one byte"),
3167                                 "LC_CTYPE", "<newline>"));
3168       else
3169         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3170
3171       /* No need to search.  */
3172       ELEM (ctype, class_collection, , L'\n') |= BITw (tok_space);
3173
3174
3175       seq = charmap_find_value (charmap, "carriage-return", 15);
3176       if (seq == NULL)
3177         seq = charmap_find_value (charmap, "U0000000D", 9);
3178       if (seq == NULL)
3179         {
3180           if (!be_quiet)
3181             WITH_CUR_LOCALE (error (0, 0, _("\
3182 %s: character `%s' not defined while needed as default value"),
3183                                     "LC_CTYPE", "<carriage-return>"));
3184         }
3185       else if (seq->nbytes != 1)
3186         WITH_CUR_LOCALE (error (0, 0, _("\
3187 %s: character `%s' in charmap not representable with one byte"),
3188                                 "LC_CTYPE", "<carriage-return>"));
3189       else
3190         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3191
3192       /* No need to search.  */
3193       ELEM (ctype, class_collection, , L'\r') |= BITw (tok_space);
3194
3195
3196       seq = charmap_find_value (charmap, "tab", 3);
3197       if (seq == NULL)
3198         seq = charmap_find_value (charmap, "U00000009", 9);
3199       if (seq == NULL)
3200         {
3201           if (!be_quiet)
3202             WITH_CUR_LOCALE (error (0, 0, _("\
3203 %s: character `%s' not defined while needed as default value"),
3204                                     "LC_CTYPE", "<tab>"));
3205         }
3206       else if (seq->nbytes != 1)
3207         WITH_CUR_LOCALE (error (0, 0, _("\
3208 %s: character `%s' in charmap not representable with one byte"),
3209                                 "LC_CTYPE", "<tab>"));
3210       else
3211         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3212
3213       /* No need to search.  */
3214       ELEM (ctype, class_collection, , L'\t') |= BITw (tok_space);
3215
3216
3217       seq = charmap_find_value (charmap, "vertical-tab", 12);
3218       if (seq == NULL)
3219         seq = charmap_find_value (charmap, "U0000000B", 9);
3220       if (seq == NULL)
3221         {
3222           if (!be_quiet)
3223             WITH_CUR_LOCALE (error (0, 0, _("\
3224 %s: character `%s' not defined while needed as default value"),
3225                                     "LC_CTYPE", "<vertical-tab>"));
3226         }
3227       else if (seq->nbytes != 1)
3228         WITH_CUR_LOCALE (error (0, 0, _("\
3229 %s: character `%s' in charmap not representable with one byte"),
3230                                 "LC_CTYPE", "<vertical-tab>"));
3231       else
3232         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3233
3234       /* No need to search.  */
3235       ELEM (ctype, class_collection, , L'\v') |= BITw (tok_space);
3236     }
3237
3238   if ((ctype->class_done & BITw (tok_xdigit)) == 0)
3239     /* "If this keyword is not specified, the digits `0' to `9', the
3240         uppercase letters `A' through `F', and the lowercase letters `a'
3241         through `f', ..., shell automatically belong to this class, with
3242         implementation defined character values."  [P1003.2, 2.5.2.1]  */
3243     {
3244       set_default (BITPOS (tok_xdigit), '0', '9');
3245       set_default (BITPOS (tok_xdigit), 'A', 'F');
3246       set_default (BITPOS (tok_xdigit), 'a', 'f');
3247     }
3248
3249   if ((ctype->class_done & BITw (tok_blank)) == 0)
3250     /* "If this keyword [blank] is unspecified, the characters <space> and
3251        <tab> shall belong to this character class."  [P1003.2, 2.5.2.1]  */
3252    {
3253       struct charseq *seq;
3254
3255       seq = charmap_find_value (charmap, "space", 5);
3256       if (seq == NULL)
3257         seq = charmap_find_value (charmap, "SP", 2);
3258       if (seq == NULL)
3259         seq = charmap_find_value (charmap, "U00000020", 9);
3260       if (seq == NULL)
3261         {
3262           if (!be_quiet)
3263             WITH_CUR_LOCALE (error (0, 0, _("\
3264 %s: character `%s' not defined while needed as default value"),
3265                                     "LC_CTYPE", "<space>"));
3266         }
3267       else if (seq->nbytes != 1)
3268         WITH_CUR_LOCALE (error (0, 0, _("\
3269 %s: character `%s' in charmap not representable with one byte"),
3270                                 "LC_CTYPE", "<space>"));
3271       else
3272         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
3273
3274       /* No need to search.  */
3275       ELEM (ctype, class_collection, , L' ') |= BITw (tok_blank);
3276
3277
3278       seq = charmap_find_value (charmap, "tab", 3);
3279       if (seq == NULL)
3280         seq = charmap_find_value (charmap, "U00000009", 9);
3281       if (seq == NULL)
3282         {
3283           if (!be_quiet)
3284             WITH_CUR_LOCALE (error (0, 0, _("\
3285 %s: character `%s' not defined while needed as default value"),
3286                                     "LC_CTYPE", "<tab>"));
3287         }
3288       else if (seq->nbytes != 1)
3289         WITH_CUR_LOCALE (error (0, 0, _("\
3290 %s: character `%s' in charmap not representable with one byte"),
3291                                 "LC_CTYPE", "<tab>"));
3292       else
3293         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
3294
3295       /* No need to search.  */
3296       ELEM (ctype, class_collection, , L'\t') |= BITw (tok_blank);
3297     }
3298
3299   if ((ctype->class_done & BITw (tok_graph)) == 0)
3300     /* "If this keyword [graph] is not specified, characters specified for
3301         the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
3302         shall belong to this character class."  [P1003.2, 2.5.2.1]  */
3303     {
3304       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
3305         BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
3306       unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
3307         BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
3308         BITw (tok_punct);
3309       size_t cnt;
3310
3311       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3312         if ((ctype->class_collection[cnt] & maskw) != 0)
3313           ctype->class_collection[cnt] |= BITw (tok_graph);
3314
3315       for (cnt = 0; cnt < 256; ++cnt)
3316         if ((ctype->class256_collection[cnt] & mask) != 0)
3317           ctype->class256_collection[cnt] |= BIT (tok_graph);
3318     }
3319
3320   if ((ctype->class_done & BITw (tok_print)) == 0)
3321     /* "If this keyword [print] is not provided, characters specified for
3322         the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
3323         and the <space> character shall belong to this character class."
3324         [P1003.2, 2.5.2.1]  */
3325     {
3326       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
3327         BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
3328       unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
3329         BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
3330         BITw (tok_punct);
3331       size_t cnt;
3332       struct charseq *seq;
3333
3334       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3335         if ((ctype->class_collection[cnt] & maskw) != 0)
3336           ctype->class_collection[cnt] |= BITw (tok_print);
3337
3338       for (cnt = 0; cnt < 256; ++cnt)
3339         if ((ctype->class256_collection[cnt] & mask) != 0)
3340           ctype->class256_collection[cnt] |= BIT (tok_print);
3341
3342
3343       seq = charmap_find_value (charmap, "space", 5);
3344       if (seq == NULL)
3345         seq = charmap_find_value (charmap, "SP", 2);
3346       if (seq == NULL)
3347         seq = charmap_find_value (charmap, "U00000020", 9);
3348       if (seq == NULL)
3349         {
3350           if (!be_quiet)
3351             WITH_CUR_LOCALE (error (0, 0, _("\
3352 %s: character `%s' not defined while needed as default value"),
3353                                     "LC_CTYPE", "<space>"));
3354         }
3355       else if (seq->nbytes != 1)
3356         WITH_CUR_LOCALE (error (0, 0, _("\
3357 %s: character `%s' in charmap not representable with one byte"),
3358                                 "LC_CTYPE", "<space>"));
3359       else
3360         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_print);
3361
3362       /* No need to search.  */
3363       ELEM (ctype, class_collection, , L' ') |= BITw (tok_print);
3364     }
3365
3366   if (ctype->tomap_done[0] == 0)
3367     /* "If this keyword [toupper] is not specified, the lowercase letters
3368         `a' through `z', and their corresponding uppercase letters `A' to
3369         `Z', ..., shall automatically be included, with implementation-
3370         defined character values."  [P1003.2, 2.5.2.1]  */
3371     {
3372       char tmp[4];
3373       int ch;
3374
3375       strcpy (tmp, "<?>");
3376
3377       for (ch = 'a'; ch <= 'z'; ++ch)
3378         {
3379           struct charseq *seq_from, *seq_to;
3380
3381           tmp[1] = (char) ch;
3382
3383           seq_from = charmap_find_value (charmap, &tmp[1], 1);
3384           if (seq_from == NULL)
3385             {
3386               char buf[10];
3387               sprintf (buf, "U%08X", ch);
3388               seq_from = charmap_find_value (charmap, buf, 9);
3389             }
3390           if (seq_from == NULL)
3391             {
3392               if (!be_quiet)
3393                 WITH_CUR_LOCALE (error (0, 0, _("\
3394 %s: character `%s' not defined while needed as default value"),
3395                                         "LC_CTYPE", tmp));
3396             }
3397           else if (seq_from->nbytes != 1)
3398             {
3399               if (!be_quiet)
3400                 WITH_CUR_LOCALE (error (0, 0, _("\
3401 %s: character `%s' needed as default value not representable with one byte"),
3402                                         "LC_CTYPE", tmp));
3403             }
3404           else
3405             {
3406               /* This conversion is implementation defined.  */
3407               tmp[1] = (char) (ch + ('A' - 'a'));
3408               seq_to = charmap_find_value (charmap, &tmp[1], 1);
3409               if (seq_to == NULL)
3410                 {
3411                   char buf[10];
3412                   sprintf (buf, "U%08X", ch + ('A' - 'a'));
3413                   seq_to = charmap_find_value (charmap, buf, 9);
3414                 }
3415               if (seq_to == NULL)
3416                 {
3417                   if (!be_quiet)
3418                     WITH_CUR_LOCALE (error (0, 0, _("\
3419 %s: character `%s' not defined while needed as default value"),
3420                                             "LC_CTYPE", tmp));
3421                 }
3422               else if (seq_to->nbytes != 1)
3423                 {
3424                   if (!be_quiet)
3425                     WITH_CUR_LOCALE (error (0, 0, _("\
3426 %s: character `%s' needed as default value not representable with one byte"),
3427                                             "LC_CTYPE", tmp));
3428                 }
3429               else
3430                 /* The index [0] is determined by the order of the
3431                    `ctype_map_newP' calls in `ctype_startup'.  */
3432                 ctype->map256_collection[0][seq_from->bytes[0]]
3433                   = seq_to->bytes[0];
3434             }
3435
3436           /* No need to search.  */
3437           ELEM (ctype, map_collection, [0], ch) = ch + ('A' - 'a');
3438         }
3439     }
3440
3441   if (ctype->tomap_done[1] == 0)
3442     /* "If this keyword [tolower] is not specified, the mapping shall be
3443        the reverse mapping of the one specified to `toupper'."  [P1003.2]  */
3444     {
3445       for (cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
3446         if (ctype->map_collection[0][cnt] != 0)
3447           ELEM (ctype, map_collection, [1],
3448                 ctype->map_collection[0][cnt])
3449             = ctype->charnames[cnt];
3450
3451       for (cnt = 0; cnt < 256; ++cnt)
3452         if (ctype->map256_collection[0][cnt] != 0)
3453           ctype->map256_collection[1][ctype->map256_collection[0][cnt]] = cnt;
3454     }
3455
3456   if (ctype->outdigits_act != 10)
3457     {
3458       if (ctype->outdigits_act != 0)
3459         WITH_CUR_LOCALE (error (0, 0, _("\
3460 %s: field `%s' does not contain exactly ten entries"),
3461                                 "LC_CTYPE", "outdigit"));
3462
3463       for (cnt = ctype->outdigits_act; cnt < 10; ++cnt)
3464         {
3465           ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3466                                                          digits + cnt, 1);
3467
3468           if (ctype->mboutdigits[cnt] == NULL)
3469             ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3470                                                            longnames[cnt],
3471                                                            strlen (longnames[cnt]));
3472
3473           if (ctype->mboutdigits[cnt] == NULL)
3474             ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3475                                                            uninames[cnt], 9);
3476
3477           if (ctype->mboutdigits[cnt] == NULL)
3478             {
3479               /* Provide a replacement.  */
3480               WITH_CUR_LOCALE (error (0, 0, _("\
3481 no output digits defined and none of the standard names in the charmap")));
3482
3483               ctype->mboutdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
3484                                                        sizeof (struct charseq)
3485                                                        + 1);
3486
3487               /* This is better than nothing.  */
3488               ctype->mboutdigits[cnt]->bytes[0] = digits[cnt];
3489               ctype->mboutdigits[cnt]->nbytes = 1;
3490             }
3491
3492           ctype->wcoutdigits[cnt] = L'0' + cnt;
3493         }
3494
3495       ctype->outdigits_act = 10;
3496     }
3497 }
3498
3499
3500 /* Construction of sparse 3-level tables.
3501    See wchar-lookup.h for their structure and the meaning of p and q.  */
3502
3503 struct wctype_table
3504 {
3505   /* Parameters.  */
3506   unsigned int p;
3507   unsigned int q;
3508   /* Working representation.  */
3509   size_t level1_alloc;
3510   size_t level1_size;
3511   uint32_t *level1;
3512   size_t level2_alloc;
3513   size_t level2_size;
3514   uint32_t *level2;
3515   size_t level3_alloc;
3516   size_t level3_size;
3517   uint32_t *level3;
3518   /* Compressed representation.  */
3519   size_t result_size;
3520   char *result;
3521 };
3522
3523 /* Initialize.  Assumes t->p and t->q have already been set.  */
3524 static inline void
3525 wctype_table_init (struct wctype_table *t)
3526 {
3527   t->level1 = NULL;
3528   t->level1_alloc = t->level1_size = 0;
3529   t->level2 = NULL;
3530   t->level2_alloc = t->level2_size = 0;
3531   t->level3 = NULL;
3532   t->level3_alloc = t->level3_size = 0;
3533 }
3534
3535 /* Retrieve an entry.  */
3536 static inline int
3537 wctype_table_get (struct wctype_table *t, uint32_t wc)
3538 {
3539   uint32_t index1 = wc >> (t->q + t->p + 5);
3540   if (index1 < t->level1_size)
3541     {
3542       uint32_t lookup1 = t->level1[index1];
3543       if (lookup1 != EMPTY)
3544         {
3545           uint32_t index2 = ((wc >> (t->p + 5)) & ((1 << t->q) - 1))
3546                             + (lookup1 << t->q);
3547           uint32_t lookup2 = t->level2[index2];
3548           if (lookup2 != EMPTY)
3549             {
3550               uint32_t index3 = ((wc >> 5) & ((1 << t->p) - 1))
3551                                 + (lookup2 << t->p);
3552               uint32_t lookup3 = t->level3[index3];
3553               uint32_t index4 = wc & 0x1f;
3554
3555               return (lookup3 >> index4) & 1;
3556             }
3557         }
3558     }
3559   return 0;
3560 }
3561
3562 /* Add one entry.  */
3563 static void
3564 wctype_table_add (struct wctype_table *t, uint32_t wc)
3565 {
3566   uint32_t index1 = wc >> (t->q + t->p + 5);
3567   uint32_t index2 = (wc >> (t->p + 5)) & ((1 << t->q) - 1);
3568   uint32_t index3 = (wc >> 5) & ((1 << t->p) - 1);
3569   uint32_t index4 = wc & 0x1f;
3570   size_t i, i1, i2;
3571
3572   if (index1 >= t->level1_size)
3573     {
3574       if (index1 >= t->level1_alloc)
3575         {
3576           size_t alloc = 2 * t->level1_alloc;
3577           if (alloc <= index1)
3578             alloc = index1 + 1;
3579           t->level1 = (uint32_t *) xrealloc ((char *) t->level1,
3580                                              alloc * sizeof (uint32_t));
3581           t->level1_alloc = alloc;
3582         }
3583       while (index1 >= t->level1_size)
3584         t->level1[t->level1_size++] = EMPTY;
3585     }
3586
3587   if (t->level1[index1] == EMPTY)
3588     {
3589       if (t->level2_size == t->level2_alloc)
3590         {
3591           size_t alloc = 2 * t->level2_alloc + 1;
3592           t->level2 = (uint32_t *) xrealloc ((char *) t->level2,
3593                                              (alloc << t->q) * sizeof (uint32_t));
3594           t->level2_alloc = alloc;
3595         }
3596       i1 = t->level2_size << t->q;
3597       i2 = (t->level2_size + 1) << t->q;
3598       for (i = i1; i < i2; i++)
3599         t->level2[i] = EMPTY;
3600       t->level1[index1] = t->level2_size++;
3601     }
3602
3603   index2 += t->level1[index1] << t->q;
3604
3605   if (t->level2[index2] == EMPTY)
3606     {
3607       if (t->level3_size == t->level3_alloc)
3608         {
3609           size_t alloc = 2 * t->level3_alloc + 1;
3610           t->level3 = (uint32_t *) xrealloc ((char *) t->level3,
3611                                              (alloc << t->p) * sizeof (uint32_t));
3612           t->level3_alloc = alloc;
3613         }
3614       i1 = t->level3_size << t->p;
3615       i2 = (t->level3_size + 1) << t->p;
3616       for (i = i1; i < i2; i++)
3617         t->level3[i] = 0;
3618       t->level2[index2] = t->level3_size++;
3619     }
3620
3621   index3 += t->level2[index2] << t->p;
3622
3623   t->level3[index3] |= (uint32_t)1 << index4;
3624 }
3625
3626 /* Finalize and shrink.  */
3627 static void
3628 wctype_table_finalize (struct wctype_table *t)
3629 {
3630   size_t i, j, k;
3631   uint32_t reorder3[t->level3_size];
3632   uint32_t reorder2[t->level2_size];
3633   uint32_t level1_offset, level2_offset, level3_offset;
3634
3635   /* Uniquify level3 blocks.  */
3636   k = 0;
3637   for (j = 0; j < t->level3_size; j++)
3638     {
3639       for (i = 0; i < k; i++)
3640         if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p],
3641                     (1 << t->p) * sizeof (uint32_t)) == 0)
3642           break;
3643       /* Relocate block j to block i.  */
3644       reorder3[j] = i;
3645       if (i == k)
3646         {
3647           if (i != j)
3648             memcpy (&t->level3[i << t->p], &t->level3[j << t->p],
3649                     (1 << t->p) * sizeof (uint32_t));
3650           k++;
3651         }
3652     }
3653   t->level3_size = k;
3654
3655   for (i = 0; i < (t->level2_size << t->q); i++)
3656     if (t->level2[i] != EMPTY)
3657       t->level2[i] = reorder3[t->level2[i]];
3658
3659   /* Uniquify level2 blocks.  */
3660   k = 0;
3661   for (j = 0; j < t->level2_size; j++)
3662     {
3663       for (i = 0; i < k; i++)
3664         if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q],
3665                     (1 << t->q) * sizeof (uint32_t)) == 0)
3666           break;
3667       /* Relocate block j to block i.  */
3668       reorder2[j] = i;
3669       if (i == k)
3670         {
3671           if (i != j)
3672             memcpy (&t->level2[i << t->q], &t->level2[j << t->q],
3673                     (1 << t->q) * sizeof (uint32_t));
3674           k++;
3675         }
3676     }
3677   t->level2_size = k;
3678
3679   for (i = 0; i < t->level1_size; i++)
3680     if (t->level1[i] != EMPTY)
3681       t->level1[i] = reorder2[t->level1[i]];
3682
3683   /* Create and fill the resulting compressed representation.  */
3684   t->result_size =
3685     5 * sizeof (uint32_t)
3686     + t->level1_size * sizeof (uint32_t)
3687     + (t->level2_size << t->q) * sizeof (uint32_t)
3688     + (t->level3_size << t->p) * sizeof (uint32_t);
3689   t->result = (char *) xmalloc (t->result_size);
3690
3691   level1_offset =
3692     5 * sizeof (uint32_t);
3693   level2_offset =
3694     5 * sizeof (uint32_t)
3695     + t->level1_size * sizeof (uint32_t);
3696   level3_offset =
3697     5 * sizeof (uint32_t)
3698     + t->level1_size * sizeof (uint32_t)
3699     + (t->level2_size << t->q) * sizeof (uint32_t);
3700
3701   ((uint32_t *) t->result)[0] = t->q + t->p + 5;
3702   ((uint32_t *) t->result)[1] = t->level1_size;
3703   ((uint32_t *) t->result)[2] = t->p + 5;
3704   ((uint32_t *) t->result)[3] = (1 << t->q) - 1;
3705   ((uint32_t *) t->result)[4] = (1 << t->p) - 1;
3706
3707   for (i = 0; i < t->level1_size; i++)
3708     ((uint32_t *) (t->result + level1_offset))[i] =
3709       (t->level1[i] == EMPTY
3710        ? 0
3711        : (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset);
3712
3713   for (i = 0; i < (t->level2_size << t->q); i++)
3714     ((uint32_t *) (t->result + level2_offset))[i] =
3715       (t->level2[i] == EMPTY
3716        ? 0
3717        : (t->level2[i] << t->p) * sizeof (uint32_t) + level3_offset);
3718
3719   for (i = 0; i < (t->level3_size << t->p); i++)
3720     ((uint32_t *) (t->result + level3_offset))[i] = t->level3[i];
3721
3722   if (t->level1_alloc > 0)
3723     free (t->level1);
3724   if (t->level2_alloc > 0)
3725     free (t->level2);
3726   if (t->level3_alloc > 0)
3727     free (t->level3);
3728 }
3729
3730 #define TABLE wcwidth_table
3731 #define ELEMENT uint8_t
3732 #define DEFAULT 0xff
3733 #include "3level.h"
3734
3735 #define TABLE wctrans_table
3736 #define ELEMENT int32_t
3737 #define DEFAULT 0
3738 #define wctrans_table_add wctrans_table_add_internal
3739 #include "3level.h"
3740 #undef wctrans_table_add
3741 /* The wctrans_table must actually store the difference between the
3742    desired result and the argument.  */
3743 static inline void
3744 wctrans_table_add (struct wctrans_table *t, uint32_t wc, uint32_t mapped_wc)
3745 {
3746   wctrans_table_add_internal (t, wc, mapped_wc - wc);
3747 }
3748
3749
3750 /* Flattens the included transliterations into a translit list.
3751    Inserts them in the list at `cursor', and returns the new cursor.  */
3752 static struct translit_t **
3753 translit_flatten (struct locale_ctype_t *ctype,
3754                   const struct charmap_t *charmap,
3755                   struct translit_t **cursor)
3756 {
3757   while (ctype->translit_include != NULL)
3758     {
3759       const char *copy_locale = ctype->translit_include->copy_locale;
3760       const char *copy_repertoire = ctype->translit_include->copy_repertoire;
3761       struct localedef_t *other;
3762
3763       /* Unchain the include statement.  During the depth-first traversal
3764          we don't want to visit any locale more than once.  */
3765       ctype->translit_include = ctype->translit_include->next;
3766
3767       other = find_locale (LC_CTYPE, copy_locale, copy_repertoire, charmap);
3768
3769       if (other == NULL)
3770         {
3771           WITH_CUR_LOCALE (error (0, 0, _("\
3772 %s: transliteration data from locale `%s' not available"),
3773                                   "LC_CTYPE", copy_locale));
3774         }
3775       else
3776         {
3777           struct locale_ctype_t *other_ctype =
3778             other->categories[LC_CTYPE].ctype;
3779
3780           cursor = translit_flatten (other_ctype, charmap, cursor);
3781           assert (other_ctype->translit_include == NULL);
3782
3783           if (other_ctype->translit != NULL)
3784             {
3785               /* Insert the other_ctype->translit list at *cursor.  */
3786               struct translit_t *endp = other_ctype->translit;
3787               while (endp->next != NULL)
3788                 endp = endp->next;
3789
3790               endp->next = *cursor;
3791               *cursor = other_ctype->translit;
3792
3793               /* Avoid any risk of circular lists.  */
3794               other_ctype->translit = NULL;
3795
3796               cursor = &endp->next;
3797             }
3798
3799           if (ctype->default_missing == NULL)
3800             ctype->default_missing = other_ctype->default_missing;
3801         }
3802     }
3803
3804   return cursor;
3805 }
3806
3807 static void
3808 allocate_arrays (struct locale_ctype_t *ctype, const struct charmap_t *charmap,
3809                  struct repertoire_t *repertoire)
3810 {
3811   size_t idx, nr;
3812   const void *key;
3813   size_t len;
3814   void *vdata;
3815   void *curs;
3816
3817   /* You wonder about this amount of memory?  This is only because some
3818      users do not manage to address the array with unsigned values or
3819      data types with range >= 256.  '\200' would result in the array
3820      index -128.  To help these poor people we duplicate the entries for
3821      128 up to 255 below the entry for \0.  */
3822   ctype->ctype_b = (char_class_t *) xcalloc (256 + 128, sizeof (char_class_t));
3823   ctype->ctype32_b = (char_class32_t *) xcalloc (256, sizeof (char_class32_t));
3824   ctype->class_b = (uint32_t **)
3825     xmalloc (ctype->nr_charclass * sizeof (uint32_t *));
3826   ctype->class_3level = (struct iovec *)
3827     xmalloc (ctype->nr_charclass * sizeof (struct iovec));
3828
3829   /* This is the array accessed using the multibyte string elements.  */
3830   for (idx = 0; idx < 256; ++idx)
3831     ctype->ctype_b[128 + idx] = ctype->class256_collection[idx];
3832
3833   /* Mirror first 127 entries.  We must take care that entry -1 is not
3834      mirrored because EOF == -1.  */
3835   for (idx = 0; idx < 127; ++idx)
3836     ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
3837
3838   /* The 32 bit array contains all characters < 0x100.  */
3839   for (idx = 0; idx < ctype->class_collection_act; ++idx)
3840     if (ctype->charnames[idx] < 0x100)
3841       ctype->ctype32_b[ctype->charnames[idx]] = ctype->class_collection[idx];
3842
3843   for (nr = 0; nr < ctype->nr_charclass; nr++)
3844     {
3845       ctype->class_b[nr] = (uint32_t *) xcalloc (256 / 32, sizeof (uint32_t));
3846
3847       for (idx = 0; idx < 256; ++idx)
3848         if (ctype->class256_collection[idx] & _ISbit (nr))
3849           ctype->class_b[nr][idx >> 5] |= (uint32_t)1 << (idx & 0x1f);
3850     }
3851
3852   for (nr = 0; nr < ctype->nr_charclass; nr++)
3853     {
3854       struct wctype_table t;
3855
3856       t.p = 4; /* or: 5 */
3857       t.q = 7; /* or: 6 */
3858       wctype_table_init (&t);
3859
3860       for (idx = 0; idx < ctype->class_collection_act; ++idx)
3861         if (ctype->class_collection[idx] & _ISwbit (nr))
3862           wctype_table_add (&t, ctype->charnames[idx]);
3863
3864       wctype_table_finalize (&t);
3865
3866       if (verbose)
3867         WITH_CUR_LOCALE (fprintf (stderr, _("\
3868 %s: table for class \"%s\": %lu bytes\n"),
3869                                  "LC_CTYPE", ctype->classnames[nr],
3870                                  (unsigned long int) t.result_size));
3871
3872       ctype->class_3level[nr].iov_base = t.result;
3873       ctype->class_3level[nr].iov_len = t.result_size;
3874     }
3875
3876   /* Room for table of mappings.  */
3877   ctype->map_b = (uint32_t **) xmalloc (2 * sizeof (uint32_t *));
3878   ctype->map32_b = (uint32_t **) xmalloc (ctype->map_collection_nr
3879                                           * sizeof (uint32_t *));
3880   ctype->map_3level = (struct iovec *)
3881     xmalloc (ctype->map_collection_nr * sizeof (struct iovec));
3882
3883   /* Fill in all mappings.  */
3884   for (idx = 0; idx < 2; ++idx)
3885     {
3886       unsigned int idx2;
3887
3888       /* Allocate table.  */
3889       ctype->map_b[idx] = (uint32_t *)
3890         xmalloc ((256 + 128) * sizeof (uint32_t));
3891
3892       /* Copy values from collection.  */
3893       for (idx2 = 0; idx2 < 256; ++idx2)
3894         ctype->map_b[idx][128 + idx2] = ctype->map256_collection[idx][idx2];
3895
3896       /* Mirror first 127 entries.  We must take care not to map entry
3897          -1 because EOF == -1.  */
3898       for (idx2 = 0; idx2 < 127; ++idx2)
3899         ctype->map_b[idx][idx2] = ctype->map_b[idx][256 + idx2];
3900
3901       /* EOF must map to EOF.  */
3902       ctype->map_b[idx][127] = EOF;
3903     }
3904
3905   for (idx = 0; idx < ctype->map_collection_nr; ++idx)
3906     {
3907       unsigned int idx2;
3908
3909       /* Allocate table.  */
3910       ctype->map32_b[idx] = (uint32_t *) xmalloc (256 * sizeof (uint32_t));
3911
3912       /* Copy values from collection.  Default is identity mapping.  */
3913       for (idx2 = 0; idx2 < 256; ++idx2)
3914         ctype->map32_b[idx][idx2] =
3915           (ctype->map_collection[idx][idx2] != 0
3916            ? ctype->map_collection[idx][idx2]
3917            : idx2);
3918     }
3919
3920   for (nr = 0; nr < ctype->map_collection_nr; nr++)
3921     {
3922       struct wctrans_table t;
3923
3924       t.p = 7;
3925       t.q = 9;
3926       wctrans_table_init (&t);
3927
3928       for (idx = 0; idx < ctype->map_collection_act[nr]; ++idx)
3929         if (ctype->map_collection[nr][idx] != 0)
3930           wctrans_table_add (&t, ctype->charnames[idx],
3931                              ctype->map_collection[nr][idx]);
3932
3933       wctrans_table_finalize (&t);
3934
3935       if (verbose)
3936         WITH_CUR_LOCALE (fprintf (stderr, _("\
3937 %s: table for map \"%s\": %lu bytes\n"),
3938                                  "LC_CTYPE", ctype->mapnames[nr],
3939                                  (unsigned long int) t.result_size));
3940
3941       ctype->map_3level[nr].iov_base = t.result;
3942       ctype->map_3level[nr].iov_len = t.result_size;
3943     }
3944
3945   /* Extra array for class and map names.  */
3946   ctype->class_name_ptr = (uint32_t *) xmalloc (ctype->nr_charclass
3947                                                 * sizeof (uint32_t));
3948   ctype->map_name_ptr = (uint32_t *) xmalloc (ctype->map_collection_nr
3949                                               * sizeof (uint32_t));
3950
3951   ctype->class_offset = _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
3952   ctype->map_offset = ctype->class_offset + ctype->nr_charclass;
3953
3954   /* Array for width information.  Because the expected widths are very
3955      small (never larger than 2) we use only one single byte.  This
3956      saves space.
3957      We put only printable characters in the table.  wcwidth is specified
3958      to return -1 for non-printable characters.  Doing the check here
3959      saves a run-time check.
3960      But we put L'\0' in the table.  This again saves a run-time check.  */
3961   {
3962     struct wcwidth_table t;
3963
3964     t.p = 7;
3965     t.q = 9;
3966     wcwidth_table_init (&t);
3967
3968     /* First set all the printable characters of the character set to
3969        the default width.  */
3970     curs = NULL;
3971     while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
3972       {
3973         struct charseq *data = (struct charseq *) vdata;
3974
3975         if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
3976           data->ucs4 = repertoire_find_value (ctype->repertoire,
3977                                               data->name, len);
3978
3979         if (data->ucs4 != ILLEGAL_CHAR_VALUE)
3980           {
3981             uint32_t *class_bits =
3982               find_idx (ctype, &ctype->class_collection, NULL,
3983                         &ctype->class_collection_act, data->ucs4);
3984
3985             if (class_bits != NULL && (*class_bits & BITw (tok_print)))
3986               wcwidth_table_add (&t, data->ucs4, charmap->width_default);
3987           }
3988       }
3989
3990     /* Now add the explicitly specified widths.  */
3991     if (charmap->width_rules != NULL)
3992       {
3993         size_t cnt;
3994
3995         for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
3996           {
3997             unsigned char bytes[charmap->mb_cur_max];
3998             int nbytes = charmap->width_rules[cnt].from->nbytes;
3999
4000             /* We have the range of character for which the width is
4001                specified described using byte sequences of the multibyte
4002                charset.  We have to convert this to UCS4 now.  And we
4003                cannot simply convert the beginning and the end of the
4004                sequence, we have to iterate over the byte sequence and
4005                convert it for every single character.  */
4006             memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
4007
4008             while (nbytes < charmap->width_rules[cnt].to->nbytes
4009                    || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
4010                               nbytes) <= 0)
4011               {
4012                 /* Find the UCS value for `bytes'.  */
4013                 int inner;
4014                 uint32_t wch;
4015                 struct charseq *seq =
4016                   charmap_find_symbol (charmap, bytes, nbytes);
4017
4018                 if (seq == NULL)
4019                   wch = ILLEGAL_CHAR_VALUE;
4020                 else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
4021                   wch = seq->ucs4;
4022                 else
4023                   wch = repertoire_find_value (ctype->repertoire, seq->name,
4024                                                strlen (seq->name));
4025
4026                 if (wch != ILLEGAL_CHAR_VALUE)
4027                   {
4028                     /* Store the value.  */
4029                     uint32_t *class_bits =
4030                       find_idx (ctype, &ctype->class_collection, NULL,
4031                                 &ctype->class_collection_act, wch);
4032
4033                     if (class_bits != NULL && (*class_bits & BITw (tok_print)))
4034                       wcwidth_table_add (&t, wch,
4035                                          charmap->width_rules[cnt].width);
4036                   }
4037
4038                 /* "Increment" the bytes sequence.  */
4039                 inner = nbytes - 1;
4040                 while (inner >= 0 && bytes[inner] == 0xff)
4041                   --inner;
4042
4043                 if (inner < 0)
4044                   {
4045                     /* We have to extend the byte sequence.  */
4046                     if (nbytes >= charmap->width_rules[cnt].to->nbytes)
4047                       break;
4048
4049                     bytes[0] = 1;
4050                     memset (&bytes[1], 0, nbytes);
4051                     ++nbytes;
4052                   }
4053                 else
4054                   {
4055                     ++bytes[inner];
4056                     while (++inner < nbytes)
4057                       bytes[inner] = 0;
4058                   }
4059               }
4060           }
4061       }
4062
4063     /* Set the width of L'\0' to 0.  */
4064     wcwidth_table_add (&t, 0, 0);
4065
4066     wcwidth_table_finalize (&t);
4067
4068     if (verbose)
4069       WITH_CUR_LOCALE (fprintf (stderr, _("%s: table for width: %lu bytes\n"),
4070                                "LC_CTYPE", (unsigned long int) t.result_size));
4071
4072     ctype->width.iov_base = t.result;
4073     ctype->width.iov_len = t.result_size;
4074   }
4075
4076   /* Set MB_CUR_MAX.  */
4077   ctype->mb_cur_max = charmap->mb_cur_max;
4078
4079   /* Now determine the table for the transliteration information.
4080
4081      XXX It is not yet clear to me whether it is worth implementing a
4082      complicated algorithm which uses a hash table to locate the entries.
4083      For now I'll use a simple array which can be searching using binary
4084      search.  */
4085   if (ctype->translit_include != NULL)
4086     /* Traverse the locales mentioned in the `include' statements in a
4087        depth-first way and fold in their transliteration information.  */
4088     translit_flatten (ctype, charmap, &ctype->translit);
4089
4090   if (ctype->translit != NULL)
4091     {
4092       /* First count how many entries we have.  This is the upper limit
4093          since some entries from the included files might be overwritten.  */
4094       size_t number = 0;
4095       size_t cnt;
4096       struct translit_t *runp = ctype->translit;
4097       struct translit_t **sorted;
4098       size_t from_len, to_len;
4099
4100       while (runp != NULL)
4101         {
4102           ++number;
4103           runp = runp->next;
4104         }
4105
4106       /* Next we allocate an array large enough and fill in the values.  */
4107       sorted = (struct translit_t **) alloca (number
4108                                               * sizeof (struct translit_t **));
4109       runp = ctype->translit;
4110       number = 0;
4111       do
4112         {
4113           /* Search for the place where to insert this string.
4114              XXX Better use a real sorting algorithm later.  */
4115           size_t idx = 0;
4116           int replace = 0;
4117
4118           while (idx < number)
4119             {
4120               int res = wcscmp ((const wchar_t *) sorted[idx]->from,
4121                                 (const wchar_t *) runp->from);
4122               if (res == 0)
4123                 {
4124                   replace = 1;
4125                   break;
4126                 }
4127               if (res > 0)
4128                 break;
4129               ++idx;
4130             }
4131
4132           if (replace)
4133             sorted[idx] = runp;
4134           else
4135             {
4136               memmove (&sorted[idx + 1], &sorted[idx],
4137                        (number - idx) * sizeof (struct translit_t *));
4138               sorted[idx] = runp;
4139               ++number;
4140             }
4141
4142           runp = runp->next;
4143         }
4144       while (runp != NULL);
4145
4146       /* The next step is putting all the possible transliteration
4147          strings in one memory block so that we can write it out.
4148          We need several different blocks:
4149          - index to the from-string array
4150          - from-string array
4151          - index to the to-string array
4152          - to-string array.
4153       */
4154       from_len = to_len = 0;
4155       for (cnt = 0; cnt < number; ++cnt)
4156         {
4157           struct translit_to_t *srunp;
4158           from_len += wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
4159           srunp = sorted[cnt]->to;
4160           while (srunp != NULL)
4161             {
4162               to_len += wcslen ((const wchar_t *) srunp->str) + 1;
4163               srunp = srunp->next;
4164             }
4165           /* Plus one for the extra NUL character marking the end of
4166              the list for the current entry.  */
4167           ++to_len;
4168         }
4169
4170       /* We can allocate the arrays for the results.  */
4171       ctype->translit_from_idx = xmalloc (number * sizeof (uint32_t));
4172       ctype->translit_from_tbl = xmalloc (from_len * sizeof (uint32_t));
4173       ctype->translit_to_idx = xmalloc (number * sizeof (uint32_t));
4174       ctype->translit_to_tbl = xmalloc (to_len * sizeof (uint32_t));
4175
4176       from_len = 0;
4177       to_len = 0;
4178       for (cnt = 0; cnt < number; ++cnt)
4179         {
4180           size_t len;
4181           struct translit_to_t *srunp;
4182
4183           ctype->translit_from_idx[cnt] = from_len;
4184           ctype->translit_to_idx[cnt] = to_len;
4185
4186           len = wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
4187           wmemcpy ((wchar_t *) &ctype->translit_from_tbl[from_len],
4188                    (const wchar_t *) sorted[cnt]->from, len);
4189           from_len += len;
4190
4191           ctype->translit_to_idx[cnt] = to_len;
4192           srunp = sorted[cnt]->to;
4193           while (srunp != NULL)
4194             {
4195               len = wcslen ((const wchar_t *) srunp->str) + 1;
4196               wmemcpy ((wchar_t *) &ctype->translit_to_tbl[to_len],
4197                        (const wchar_t *) srunp->str, len);
4198               to_len += len;
4199               srunp = srunp->next;
4200             }
4201           ctype->translit_to_tbl[to_len++] = L'\0';
4202         }
4203
4204       /* Store the information about the length.  */
4205       ctype->translit_idx_size = number;
4206       ctype->translit_from_tbl_size = from_len * sizeof (uint32_t);
4207       ctype->translit_to_tbl_size = to_len * sizeof (uint32_t);
4208     }
4209   else
4210     {
4211       /* Provide some dummy pointers since we have nothing to write out.  */
4212       static uint32_t no_str = { 0 };
4213
4214       ctype->translit_from_idx = &no_str;
4215       ctype->translit_from_tbl = &no_str;
4216       ctype->translit_to_tbl = &no_str;
4217       ctype->translit_idx_size = 0;
4218       ctype->translit_from_tbl_size = 0;
4219       ctype->translit_to_tbl_size = 0;
4220     }
4221 }