locale/programs/ld-ctype.c

   1 /* Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3    Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Library General Public License as
   7    published by the Free Software Foundation; either version 2 of the
   8    License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Library General Public License for more details.
  14
  15    You should have received a copy of the GNU Library General Public
  16    License along with the GNU C Library; see the file COPYING.LIB.  If not,
  17    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  18    Boston, MA 02111-1307, USA.  */
  19
  20 #ifdef HAVE_CONFIG_H
  21 # include <config.h>
  22 #endif
  23
  24 #include <alloca.h>
  25 #include <endian.h>
  26 #include <limits.h>
  27 #include <string.h>
  28
  29 #include "locales.h"
  30 #include "localeinfo.h"
  31 #include "langinfo.h"
  32 #include "locfile-token.h"
  33 #include "stringtrans.h"
  34
  35 /* Uncomment the following line in the production version.  */
  36 /* define NDEBUG 1 */
  37 #include <assert.h>
  38
  39
  40 void *xmalloc (size_t __n);
  41 void *xcalloc (size_t __n, size_t __s);
  42 void *xrealloc (void *__ptr, size_t __n);
  43
  44
  45 /* The bit used for representing a special class.  */
  46 #define BITPOS(class) ((class) - tok_upper)
  47 #define BIT(class) (1 << BITPOS (class))
  48
  49 #define ELEM(ctype, collection, idx, value)                                   \
  50   *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx,     \
  51              &ctype->collection##_act idx, value)
  52
  53 #define SWAPU32(w) \
  54   (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24))
  55
  56 #define SWAPU16(w) \
  57   ((((w)  >> 8) & 0xff) | (((w) & 0xff) << 8))
  58
  59
  60 /* To be compatible with former implementations we for now restrict
  61    the number of bits for character classes to 16.  When compatibility
  62    is not necessary anymore increase the number to 32.  */
  63 #define char_class_t u_int16_t
  64 #define CHAR_CLASS_TRANS SWAPU16
  65 #define char_class32_t u_int32_t
  66 #define CHAR_CLASS32_TRANS SWAPU32
  67
  68
  69 /* The real definition of the struct for the LC_CTYPE locale.  */
  70 struct locale_ctype_t
  71 {
  72   unsigned int *charnames;
  73   size_t charnames_max;
  74   size_t charnames_act;
  75
  76   /* We will allow up to 8 * sizeof(u_int32_t) - 1 character classes.  */
  77 #define MAX_NR_CHARCLASS (8 * sizeof (u_int32_t) - 1)
  78   size_t nr_charclass;
  79   const char *classnames[MAX_NR_CHARCLASS];
  80   unsigned long int current_class_mask;
  81   unsigned int last_class_char;
  82   u_int32_t *class_collection;
  83   size_t class_collection_max;
  84   size_t class_collection_act;
  85   unsigned long int class_done;
  86
  87   /* If the following number ever turns out to be too small simply
  88      increase it.  But I doubt it will.  --drepper@gnu */
  89 #define MAX_NR_CHARMAP 16
  90   const char *mapnames[MAX_NR_CHARMAP];
  91   u_int32_t *map_collection[MAX_NR_CHARMAP];
  92   size_t map_collection_max[MAX_NR_CHARMAP];
  93   size_t map_collection_act[MAX_NR_CHARMAP];
  94   size_t map_collection_nr;
  95   size_t last_map_idx;
  96   unsigned int from_map_char;
  97   int toupper_done;
  98   int tolower_done;
  99
 100   /* The arrays for the binary representation.  */
 101   u_int32_t plane_size;
 102   u_int32_t plane_cnt;
 103   char_class_t *ctype_b;
 104   char_class32_t *ctype32_b;
 105   u_int32_t *names_el;
 106   u_int32_t *names_eb;
 107   u_int32_t **map_eb;
 108   u_int32_t **map_el;
 109   u_int32_t *class_name_ptr;
 110   u_int32_t *map_name_ptr;
 111   unsigned char *width;
 112   u_int32_t mb_cur_max;
 113   const char *codeset_name;
 114 };
 115
 116
 117 /* Prototypes for local functions.  */
 118 static void ctype_class_newP (struct linereader *lr,
 119                               struct locale_ctype_t *ctype, const char *name);
 120 static void ctype_map_newP (struct linereader *lr,
 121                             struct locale_ctype_t *ctype,
 122                             const char *name, struct charset_t *charset);
 123 static u_int32_t *find_idx (struct locale_ctype_t *ctype, u_int32_t **table,
 124                             size_t *max, size_t *act, unsigned int idx);
 125 static void set_class_defaults (struct locale_ctype_t *ctype,
 126                                 struct charset_t *charset);
 127 static void allocate_arrays (struct locale_ctype_t *ctype,
 128                              struct charset_t *charset);
 129
 130
 131 void
 132 ctype_startup (struct linereader *lr, struct localedef_t *locale,
 133                struct charset_t *charset)
 134 {
 135   unsigned int cnt;
 136   struct locale_ctype_t *ctype;
 137
 138   /* It is important that we always use UCS1 encoding for strings now.  */
 139   encoding_method = ENC_UCS1;
 140
 141   /* Allocate the needed room.  */
 142   locale->categories[LC_CTYPE].ctype = ctype =
 143     (struct locale_ctype_t *) xmalloc (sizeof (struct locale_ctype_t));
 144
 145   /* We have no names seen yet.  */
 146   ctype->charnames_max = charset->mb_cur_max == 1 ? 256 : 512;
 147   ctype->charnames =
 148     (unsigned int *) xmalloc (ctype->charnames_max * sizeof (unsigned int));
 149   for (cnt = 0; cnt < 256; ++cnt)
 150     ctype->charnames[cnt] = cnt;
 151   ctype->charnames_act = 256;
 152
 153   /* Fill character class information.  */
 154   ctype->nr_charclass = 0;
 155   ctype->current_class_mask = 0;
 156   ctype->last_class_char = ILLEGAL_CHAR_VALUE;
 157   /* The order of the following instructions determines the bit
 158      positions!  */
 159   ctype_class_newP (lr, ctype, "upper");
 160   ctype_class_newP (lr, ctype, "lower");
 161   ctype_class_newP (lr, ctype, "alpha");
 162   ctype_class_newP (lr, ctype, "digit");
 163   ctype_class_newP (lr, ctype, "xdigit");
 164   ctype_class_newP (lr, ctype, "space");
 165   ctype_class_newP (lr, ctype, "print");
 166   ctype_class_newP (lr, ctype, "graph");
 167   ctype_class_newP (lr, ctype, "blank");
 168   ctype_class_newP (lr, ctype, "cntrl");
 169   ctype_class_newP (lr, ctype, "punct");
 170   ctype_class_newP (lr, ctype, "alnum");
 171
 172   ctype->class_collection_max = charset->mb_cur_max == 1 ? 256 : 512;
 173   ctype->class_collection
 174     = (u_int32_t *) xmalloc (sizeof (unsigned long int)
 175                              * ctype->class_collection_max);
 176   memset (ctype->class_collection, '\0',
 177           sizeof (unsigned long int) * ctype->class_collection_max);
 178   ctype->class_collection_act = 256;
 179
 180   /* Fill character map information.  */
 181   ctype->map_collection_nr = 0;
 182   ctype->last_map_idx = MAX_NR_CHARMAP;
 183   ctype->from_map_char = ILLEGAL_CHAR_VALUE;
 184   ctype_map_newP (lr, ctype, "toupper", charset);
 185   ctype_map_newP (lr, ctype, "tolower", charset);
 186
 187   /* Fill first 256 entries in `toupper' and `tolower' arrays.  */
 188   for (cnt = 0; cnt < 256; ++cnt)
 189     {
 190       ctype->map_collection[0][cnt] = cnt;
 191       ctype->map_collection[1][cnt] = cnt;
 192     }
 193 }
 194
 195
 196 void
 197 ctype_finish (struct localedef_t *locale, struct charset_t *charset)
 198 {
 199   /* See POSIX.2, table 2-6 for the meaning of the following table.  */
 200 #define NCLASS 12
 201   static const struct
 202   {
 203     const char *name;
 204     const char allow[NCLASS];
 205   }
 206   valid_table[NCLASS] =
 207   {
 208     /* The order is important.  See token.h for more information.
 209        M = Always, D = Default, - = Permitted, X = Mutually exclusive  */
 210     { "upper",  "--MX-XDDXXX-" },
 211     { "lower",  "--MX-XDDXXX-" },
 212     { "alpha",  "---X-XDDXXX-" },
 213     { "digit",  "XXX--XDDXXX-" },
 214     { "xdigit", "-----XDDXXX-" },
 215     { "space",  "XXXXX------X" },
 216     { "print",  "---------X--" },
 217     { "graph",  "---------X--" },
 218     { "blank",  "XXXXXM-----X" },
 219     { "cntrl",  "XXXXX-XX--XX" },
 220     { "punct",  "XXXXX-DD-X-X" },
 221     { "alnum",  "-----XDDXXX-" }
 222   };
 223   size_t cnt;
 224   int cls1, cls2;
 225   unsigned int space_value;
 226   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 227
 228   /* Set default value for classes not specified.  */
 229   set_class_defaults (ctype, charset);
 230
 231   /* Check according to table.  */
 232   for (cnt = 0; cnt < ctype->class_collection_max; ++cnt)
 233     {
 234       unsigned long int tmp;
 235
 236       tmp = ctype->class_collection[cnt];
 237       if (tmp == 0)
 238         continue;
 239
 240       for (cls1 = 0; cls1 < NCLASS; ++cls1)
 241         if ((tmp & (1 << cls1)) != 0)
 242           for (cls2 = 0; cls2 < NCLASS; ++cls2)
 243             if (valid_table[cls1].allow[cls2] != '-')
 244               {
 245                 int eq = (tmp & (1 << cls2)) != 0;
 246                 switch (valid_table[cls1].allow[cls2])
 247                   {
 248                   case 'M':
 249                     if (!eq)
 250                       {
 251                         char buf[17];
 252                         char *cp = buf;
 253                         unsigned int value;
 254
 255                         value = ctype->charnames[cnt];
 256
 257                         if ((value & 0xff000000) != 0)
 258                           cp += sprintf (cp, "\\%o", (value >> 24) & 0xff);
 259                         if ((value & 0xffff0000) != 0)
 260                           cp += sprintf (cp, "\\%o", (value >> 16) & 0xff);
 261                         if ((value & 0xffffff00) != 0)
 262                           cp += sprintf (cp, "\\%o", (value >> 8) & 0xff);
 263                         sprintf (cp, "\\%o", value & 0xff);
 264
 265                         if (!be_quiet)
 266                           error (0, 0, _("\
 267 character %s'%s' in class `%s' must be in class `%s'"), value > 256 ? "L" : "",
 268                                  cp, valid_table[cls1].name,
 269                                  valid_table[cls2].name);
 270                       }
 271                     break;
 272
 273                   case 'X':
 274                     if (eq)
 275                       {
 276                         char buf[17];
 277                         char *cp = buf;
 278                         unsigned int value;
 279
 280                         value = ctype->charnames[cnt];
 281
 282                         if ((value & 0xff000000) != 0)
 283                           cp += sprintf (cp, "\\%o", value >> 24);
 284                         if ((value & 0xffff0000) != 0)
 285                           cp += sprintf (cp, "\\%o", (value >> 16) & 0xff);
 286                         if ((value & 0xffffff00) != 0)
 287                           cp += sprintf (cp, "\\%o", (value >> 8) & 0xff);
 288                         sprintf (cp, "\\%o", value & 0xff);
 289
 290                         if (!be_quiet)
 291                           error (0, 0, _("\
 292 character %s'%s' in class `%s' must not be in class `%s'"),
 293                                  value > 256 ? "L" : "", cp,
 294                                  valid_table[cls1].name,
 295                                  valid_table[cls2].name);
 296                       }
 297                     break;
 298
 299                   case 'D':
 300                     ctype->class_collection[cnt] |= 1 << cls2;
 301                     break;
 302
 303                   default:
 304                     error (5, 0, _("internal error in %s, line %u"),
 305                            __FUNCTION__, __LINE__);
 306                   }
 307               }
 308     }
 309
 310   /* ... and now test <SP> as a special case.  */
 311   space_value = charset_find_value (charset, "SP", 2);
 312   if ((wchar_t) space_value == ILLEGAL_CHAR_VALUE && !be_quiet)
 313     error (0, 0, _("character <SP> not defined in character map"));
 314   else if (((cnt = BITPOS (tok_space),
 315              (ELEM (ctype, class_collection, , space_value)
 316               & BIT (tok_space)) == 0)
 317             || (cnt = BITPOS (tok_blank),
 318                 (ELEM (ctype, class_collection, , space_value)
 319                  & BIT (tok_blank)) == 0))
 320            && !be_quiet)
 321     error (0, 0, _("<SP> character not in class `%s'"),
 322            valid_table[cnt].name);
 323   else if (((cnt = BITPOS (tok_punct),
 324              (ELEM (ctype, class_collection, , space_value)
 325               & BIT (tok_punct)) != 0)
 326             || (cnt = BITPOS (tok_graph),
 327                 (ELEM (ctype, class_collection, , space_value)
 328                  & BIT (tok_graph))
 329                 != 0))
 330            && !be_quiet)
 331     error (0, 0, _("<SP> character must not be in class `%s'"),
 332            valid_table[cnt].name);
 333   else
 334     ELEM (ctype, class_collection, , space_value) |= BIT (tok_print);
 335
 336   /* Now that the tests are done make sure the name array contains all
 337      characters which are handled in the WIDTH section of the
 338      character set definition file.  */
 339   if (charset->width_rules != NULL)
 340     for (cnt = 0; cnt < charset->nwidth_rules; ++cnt)
 341       {
 342         size_t inner;
 343         for (inner = charset->width_rules[cnt].from;
 344              inner <= charset->width_rules[cnt].to; ++inner)
 345           (void) find_idx (ctype, NULL, NULL, NULL, inner);
 346       }
 347 }
 348
 349
 350 void
 351 ctype_output (struct localedef_t *locale, struct charset_t *charset,
 352               const char *output_path)
 353 {
 354   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 355   const size_t nelems = (_NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)
 356                          + 2 * (ctype->map_collection_nr - 2));
 357   struct iovec iov[2 + nelems + ctype->nr_charclass
 358                   + ctype->map_collection_nr];
 359   struct locale_file data;
 360   u_int32_t idx[nelems];
 361   size_t elem, cnt, offset, total;
 362
 363
 364   if ((locale->binary & (1 << LC_CTYPE)) != 0)
 365     {
 366       iov[0].iov_base = ctype;
 367       iov[0].iov_len = locale->len[LC_CTYPE];
 368
 369       write_locale_data (output_path, "LC_CTYPE", 1, iov);
 370
 371       return;
 372     }
 373
 374
 375   /* Now prepare the output: Find the sizes of the table we can use.  */
 376   allocate_arrays (ctype, charset);
 377
 378   data.magic = LIMAGIC (LC_CTYPE);
 379   data.n = nelems;
 380   iov[0].iov_base = (void *) &data;
 381   iov[0].iov_len = sizeof (data);
 382
 383   iov[1].iov_base = (void *) idx;
 384   iov[1].iov_len = sizeof (idx);
 385
 386   idx[0] = iov[0].iov_len + iov[1].iov_len;
 387   offset = 0;
 388
 389   for (elem = 0; elem < nelems; ++elem)
 390     {
 391       if (elem < _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE))
 392         switch (elem)
 393           {
 394 #define CTYPE_DATA(name, base, len)                                           \
 395           case _NL_ITEM_INDEX (name):                                         \
 396             iov[2 + elem + offset].iov_base = (base);                         \
 397             iov[2 + elem + offset].iov_len = (len);                           \
 398             if (elem + 1 < nelems)                                            \
 399               idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;     \
 400             break
 401
 402           CTYPE_DATA (_NL_CTYPE_CLASS,
 403                       ctype->ctype_b,
 404                       (256 + 128) * sizeof (char_class_t));
 405
 406           CTYPE_DATA (_NL_CTYPE_TOUPPER_EB,
 407                       ctype->map_eb[0],
 408                       (ctype->plane_size * ctype->plane_cnt + 128)
 409                       * sizeof (u_int32_t));
 410           CTYPE_DATA (_NL_CTYPE_TOLOWER_EB,
 411                       ctype->map_eb[1],
 412                       (ctype->plane_size * ctype->plane_cnt + 128)
 413                       * sizeof (u_int32_t));
 414
 415           CTYPE_DATA (_NL_CTYPE_TOUPPER_EL,
 416                       ctype->map_el[0],
 417                       (ctype->plane_size * ctype->plane_cnt + 128)
 418                       * sizeof (u_int32_t));
 419           CTYPE_DATA (_NL_CTYPE_TOLOWER_EL,
 420                       ctype->map_el[1],
 421                       (ctype->plane_size * ctype->plane_cnt + 128)
 422                       * sizeof (u_int32_t));
 423
 424           CTYPE_DATA (_NL_CTYPE_CLASS32,
 425                       ctype->ctype32_b,
 426                       (ctype->plane_size * ctype->plane_cnt
 427                        * sizeof (char_class32_t)));
 428
 429           CTYPE_DATA (_NL_CTYPE_NAMES_EB,
 430                       ctype->names_eb, (ctype->plane_size * ctype->plane_cnt
 431                                         * sizeof (u_int32_t)));
 432           CTYPE_DATA (_NL_CTYPE_NAMES_EL,
 433                       ctype->names_el, (ctype->plane_size * ctype->plane_cnt
 434                                         * sizeof (u_int32_t)));
 435
 436           CTYPE_DATA (_NL_CTYPE_HASH_SIZE,
 437                       &ctype->plane_size, sizeof (u_int32_t));
 438           CTYPE_DATA (_NL_CTYPE_HASH_LAYERS,
 439                       &ctype->plane_cnt, sizeof (u_int32_t));
 440
 441           case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
 442             /* The class name array.  */
 443             total = 0;
 444             for (cnt = 0; cnt < ctype->nr_charclass; ++cnt, ++offset)
 445               {
 446                 iov[2 + elem + offset].iov_base
 447                   = (void *) ctype->classnames[cnt];
 448                 iov[2 + elem + offset].iov_len
 449                   = strlen (ctype->classnames[cnt]) + 1;
 450                 total += iov[2 + elem + offset].iov_len;
 451               }
 452             iov[2 + elem + offset].iov_base = (void *) "\0\0\0";
 453             iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
 454             total += 1 + (4 - ((total + 1) % 4));
 455
 456             if (elem + 1 < nelems)
 457               idx[elem + 1] = idx[elem] + total;
 458             break;
 459
 460           case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
 461             /* The class name array.  */
 462             total = 0;
 463             for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt, ++offset)
 464               {
 465                 iov[2 + elem + offset].iov_base
 466                   = (void *) ctype->mapnames[cnt];
 467                 iov[2 + elem + offset].iov_len
 468                   = strlen (ctype->mapnames[cnt]) + 1;
 469                 total += iov[2 + elem + offset].iov_len;
 470               }
 471             iov[2 + elem + offset].iov_base = (void *) "\0\0\0";
 472             iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
 473             total += 1 + (4 - ((total + 1) % 4));
 474
 475             if (elem + 1 < nelems)
 476               idx[elem + 1] = idx[elem] + total;
 477             break;
 478
 479           CTYPE_DATA (_NL_CTYPE_WIDTH,
 480                       ctype->width, ctype->plane_size * ctype->plane_cnt);
 481
 482           CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX,
 483                       &ctype->mb_cur_max, sizeof (u_int32_t));
 484
 485           case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
 486             total = strlen (ctype->codeset_name) + 1;
 487             if (total % 4 == 0)
 488               iov[2 + elem + offset].iov_base = (char *) ctype->codeset_name;
 489             else
 490               {
 491                 iov[2 + elem + offset].iov_base = alloca ((total + 3) & ~3);
 492                 memcpy (iov[2 + elem + offset].iov_base, ctype->codeset_name,
 493                         total);
 494                 total = (total + 3) & ~3;
 495               }
 496             iov[2 + elem + offset].iov_len = total;
 497             if (elem + 1 < nelems)
 498               idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
 499             break;
 500
 501           default:
 502             assert (! "unknown CTYPE element");
 503           }
 504       else
 505         {
 506           /* Handle extra maps.  */
 507           size_t nr = (elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)) >> 1;
 508
 509           if (((elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)) & 1) == 0)
 510             iov[2 + elem + offset].iov_base = ctype->map_eb[nr];
 511           else
 512             iov[2 + elem + offset].iov_base = ctype->map_el[nr];
 513
 514           iov[2 + elem + offset].iov_len = ((ctype->plane_size
 515                                              * ctype->plane_cnt + 128)
 516                                             * sizeof (u_int32_t));
 517
 518           if (elem + 1 < nelems)
 519             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
 520         }
 521     }
 522
 523   assert (2 + elem + offset == (nelems + ctype->nr_charclass
 524                                 + ctype->map_collection_nr + 2));
 525
 526   write_locale_data (output_path, "LC_CTYPE", 2 + elem + offset, iov);
 527 }
 528
 529
 530 /* Character class handling.  */
 531 void
 532 ctype_class_new (struct linereader *lr, struct localedef_t *locale,
 533                  enum token_t tok, struct token *code,
 534                  struct charset_t *charset)
 535 {
 536   ctype_class_newP (lr, locale->categories[LC_CTYPE].ctype,
 537                     code->val.str.start);
 538 }
 539
 540
 541 int
 542 ctype_is_charclass (struct linereader *lr, struct localedef_t *locale,
 543                     const char *name)
 544 {
 545   size_t cnt;
 546
 547   for (cnt = 0; cnt < locale->categories[LC_CTYPE].ctype->nr_charclass; ++cnt)
 548     if (strcmp (name, locale->categories[LC_CTYPE].ctype->classnames[cnt])
 549         == 0)
 550       return 1;
 551
 552   return 0;
 553 }
 554
 555
 556 void
 557 ctype_class_start (struct linereader *lr, struct localedef_t *locale,
 558                    enum token_t tok, const char *str,
 559                    struct charset_t *charset)
 560 {
 561   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 562   size_t cnt;
 563
 564   switch (tok)
 565     {
 566     case tok_upper:
 567       str = "upper";
 568       break;
 569     case tok_lower:
 570       str = "lower";
 571       break;
 572     case tok_alpha:
 573       str = "alpha";
 574       break;
 575     case tok_digit:
 576       str = "digit";
 577       break;
 578     case tok_xdigit:
 579       str = "xdigit";
 580       break;
 581     case tok_space:
 582       str = "space";
 583       break;
 584     case tok_print:
 585       str = "print";
 586       break;
 587     case tok_graph:
 588       str = "graph";
 589       break;
 590     case tok_blank:
 591       str = "blank";
 592       break;
 593     case tok_cntrl:
 594       str = "cntrl";
 595       break;
 596     case tok_punct:
 597       str = "punct";
 598       break;
 599     case tok_alnum:
 600       str = "alnum";
 601       break;
 602     case tok_ident:
 603       break;
 604     default:
 605       assert (! "illegal token as class name: should not happen");
 606     }
 607
 608   for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
 609     if (strcmp (str, ctype->classnames[cnt]) == 0)
 610       break;
 611
 612   if (cnt >= ctype->nr_charclass)
 613     assert (! "unknown class in class definition: should not happen");
 614
 615   ctype->class_done |= BIT (tok);
 616
 617   ctype->current_class_mask = 1 << cnt;
 618   ctype->last_class_char = ILLEGAL_CHAR_VALUE;
 619 }
 620
 621
 622 void
 623 ctype_class_from (struct linereader *lr, struct localedef_t *locale,
 624                   struct token *code, struct charset_t *charset)
 625 {
 626   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 627   unsigned int value;
 628
 629   value = charset_find_value (charset, code->val.str.start, code->val.str.len);
 630
 631   ctype->last_class_char = value;
 632
 633   if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
 634     /* In the LC_CTYPE category it is no error when a character is
 635        not found.  This has to be ignored silently.  */
 636     return;
 637
 638   *find_idx (ctype, &ctype->class_collection, &ctype->class_collection_max,
 639              &ctype->class_collection_act, value)
 640     |= ctype->current_class_mask;
 641 }
 642
 643
 644 void
 645 ctype_class_to (struct linereader *lr, struct localedef_t *locale,
 646                 struct token *code, struct charset_t *charset)
 647 {
 648   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 649   unsigned int value, cnt;
 650
 651   value = charset_find_value (charset, code->val.str.start, code->val.str.len);
 652
 653   assert (value >= ctype->last_class_char);
 654
 655   for (cnt = ctype->last_class_char + 1; cnt <= value; ++cnt)
 656     *find_idx (ctype, &ctype->class_collection, &ctype->class_collection_max,
 657                &ctype->class_collection_act, cnt)
 658       |= ctype->current_class_mask;
 659
 660   ctype->last_class_char = ILLEGAL_CHAR_VALUE;
 661 }
 662
 663
 664 void
 665 ctype_class_end (struct linereader *lr, struct localedef_t *locale)
 666 {
 667   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 668
 669   /* We have no special actions to perform here.  */
 670   ctype->current_class_mask = 0;
 671   ctype->last_class_char = ILLEGAL_CHAR_VALUE;
 672 }
 673
 674
 675 /* Character map handling.  */
 676 void
 677 ctype_map_new (struct linereader *lr, struct localedef_t *locale,
 678                enum token_t tok, struct token *code,
 679                struct charset_t *charset)
 680 {
 681   ctype_map_newP (lr, locale->categories[LC_CTYPE].ctype,
 682                   code->val.str.start, charset);
 683 }
 684
 685
 686 int
 687 ctype_is_charconv (struct linereader *lr, struct localedef_t *locale,
 688                    const char *name)
 689 {
 690   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 691   size_t cnt;
 692
 693   for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
 694     if (strcmp (name, ctype->mapnames[cnt]) == 0)
 695       return 1;
 696
 697   return 0;
 698 }
 699
 700
 701 void
 702 ctype_map_start (struct linereader *lr, struct localedef_t *locale,
 703                  enum token_t tok, const char *name, struct charset_t *charset)
 704 {
 705   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 706   size_t cnt;
 707
 708   switch (tok)
 709     {
 710     case tok_toupper:
 711       ctype->toupper_done = 1;
 712       name = "toupper";
 713       break;
 714     case tok_tolower:
 715       ctype->tolower_done = 1;
 716       name = "tolower";
 717       break;
 718     case tok_ident:
 719       break;
 720     default:
 721       assert (! "unknown token in category `LC_CTYPE' should not happen");
 722     }
 723
 724   for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
 725     if (strcmp (name, ctype->mapnames[cnt]) == 0)
 726       break;
 727
 728   if (cnt == ctype->map_collection_nr)
 729     assert (! "unknown token in category `LC_CTYPE' should not happen");
 730
 731   ctype->last_map_idx = cnt;
 732   ctype->from_map_char = ILLEGAL_CHAR_VALUE;
 733 }
 734
 735
 736 void
 737 ctype_map_from (struct linereader *lr, struct localedef_t *locale,
 738                 struct token *code, struct charset_t *charset)
 739 {
 740   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 741   unsigned int value;
 742
 743   value = charset_find_value (charset, code->val.str.start, code->val.str.len);
 744
 745   if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
 746     /* In the LC_CTYPE category it is no error when a character is
 747        not found.  This has to be ignored silently.  */
 748     return;
 749
 750   assert (ctype->last_map_idx < ctype->map_collection_nr);
 751
 752   ctype->from_map_char = value;
 753 }
 754
 755
 756 void
 757 ctype_map_to (struct linereader *lr, struct localedef_t *locale,
 758               struct token *code, struct charset_t *charset)
 759 {
 760   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 761   unsigned int value;
 762
 763   value = charset_find_value (charset, code->val.str.start, code->val.str.len);
 764
 765   if ((wchar_t) ctype->from_map_char == ILLEGAL_CHAR_VALUE
 766       || (wchar_t) value == ILLEGAL_CHAR_VALUE)
 767     {
 768       /* In the LC_CTYPE category it is no error when a character is
 769          not found.  This has to be ignored silently.  */
 770       ctype->from_map_char = ILLEGAL_CHAR_VALUE;
 771       return;
 772     }
 773
 774   *find_idx (ctype, &ctype->map_collection[ctype->last_map_idx],
 775              &ctype->map_collection_max[ctype->last_map_idx],
 776              &ctype->map_collection_act[ctype->last_map_idx],
 777              ctype->from_map_char) = value;
 778
 779   ctype->from_map_char = ILLEGAL_CHAR_VALUE;
 780 }
 781
 782
 783 void
 784 ctype_map_end (struct linereader *lr, struct localedef_t *locale)
 785 {
 786   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 787
 788   ctype->last_map_idx = MAX_NR_CHARMAP;
 789   ctype->from_map_char = ILLEGAL_CHAR_VALUE;
 790 }
 791
 792
 793 /* Local functions.  */
 794 static void
 795 ctype_class_newP (struct linereader *lr, struct locale_ctype_t *ctype,
 796                   const char *name)
 797 {
 798   size_t cnt;
 799
 800   for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
 801     if (strcmp (ctype->classnames[cnt], name) == 0)
 802       break;
 803
 804   if (cnt < ctype->nr_charclass)
 805     {
 806       lr_error (lr, _("character class `%s' already defined"), name);
 807       return;
 808     }
 809
 810   if (ctype->nr_charclass == MAX_NR_CHARCLASS)
 811     /* Exit code 2 is prescribed in P1003.2b.  */
 812     error (2, 0, _("\
 813 implementation limit: no more than %d character classes allowed"),
 814            MAX_NR_CHARCLASS);
 815
 816   ctype->classnames[ctype->nr_charclass++] = name;
 817 }
 818
 819
 820 static void
 821 ctype_map_newP (struct linereader *lr, struct locale_ctype_t *ctype,
 822                 const char *name, struct charset_t *charset)
 823 {
 824   size_t max_chars = 0;
 825   size_t cnt;
 826
 827   for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
 828     {
 829       if (strcmp (ctype->mapnames[cnt], name) == 0)
 830         break;
 831
 832       if (max_chars < ctype->map_collection_max[cnt])
 833         max_chars = ctype->map_collection_max[cnt];
 834     }
 835
 836   if (cnt < ctype->map_collection_nr)
 837     {
 838       lr_error (lr, _("character map `%s' already defined"), name);
 839       return;
 840     }
 841
 842   if (ctype->map_collection_nr == MAX_NR_CHARMAP)
 843     /* Exit code 2 is prescribed in P1003.2b.  */
 844     error (2, 0, _("\
 845 implementation limit: no more than %d character maps allowed"),
 846            MAX_NR_CHARMAP);
 847
 848   ctype->mapnames[cnt] = name;
 849
 850   if (max_chars == 0)
 851     ctype->map_collection_max[cnt] = charset->mb_cur_max == 1 ? 256 : 512;
 852   else
 853     ctype->map_collection_max[cnt] = max_chars;
 854
 855   ctype->map_collection[cnt] = (u_int32_t *)
 856     xmalloc (sizeof (u_int32_t) * ctype->map_collection_max[cnt]);
 857   memset (ctype->map_collection[cnt], '\0',
 858           sizeof (u_int32_t) * ctype->map_collection_max[cnt]);
 859   ctype->map_collection_act[cnt] = 256;
 860
 861   ++ctype->map_collection_nr;
 862 }
 863
 864
 865 /* We have to be prepared that TABLE, MAX, and ACT can be NULL.  This
 866    is possible if we only want ot extend the name array.  */
 867 static u_int32_t *
 868 find_idx (struct locale_ctype_t *ctype, u_int32_t **table, size_t *max,
 869           size_t *act, unsigned int idx)
 870 {
 871   size_t cnt;
 872
 873   if (idx < 256)
 874     return table == NULL ? NULL : &(*table)[idx];
 875
 876   for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
 877     if (ctype->charnames[cnt] == idx)
 878       break;
 879
 880   /* We have to distinguish two cases: the names is found or not.  */
 881   if (cnt == ctype->charnames_act)
 882     {
 883       /* Extend the name array.  */
 884       if (ctype->charnames_act == ctype->charnames_max)
 885         {
 886           ctype->charnames_max *= 2;
 887           ctype->charnames = (unsigned int *)
 888             xrealloc (ctype->charnames,
 889                       sizeof (unsigned int) * ctype->charnames_max);
 890         }
 891       ctype->charnames[ctype->charnames_act++] = idx;
 892     }
 893
 894   if (table == NULL)
 895     /* We have done everything we are asked to do.  */
 896     return NULL;
 897
 898   if (cnt >= *act)
 899     {
 900       if (cnt >= *max)
 901         {
 902           size_t old_max = *max;
 903           do
 904             *max *= 2;
 905           while (*max <= cnt);
 906
 907           *table =
 908             (u_int32_t *) xrealloc (*table, *max * sizeof (unsigned long int));
 909           memset (&(*table)[old_max], '\0',
 910                   (*max - old_max) * sizeof (u_int32_t));
 911         }
 912
 913       (*table)[cnt] = 0;
 914       *act = cnt;
 915     }
 916
 917   return &(*table)[cnt];
 918 }
 919
 920
 921 static void
 922 set_class_defaults (struct locale_ctype_t *ctype, struct charset_t *charset)
 923 {
 924   /* These function defines the default values for the classes and conversions
 925      according to POSIX.2 2.5.2.1.
 926      It may seem that the order of these if-blocks is arbitrary but it is NOT.
 927      Don't move them unless you know what you do!  */
 928
 929   void set_default (int bit, int from, int to)
 930     {
 931       char tmp[2];
 932       int ch;
 933       /* Define string.  */
 934       strcpy (tmp, "?");
 935
 936       for (ch = from; ch <= to; ++ch)
 937         {
 938           unsigned int value;
 939           tmp[0] = ch;
 940
 941           value = charset_find_value (charset, tmp, 1);
 942           if ((wchar_t) value == ILLEGAL_CHAR_VALUE && !be_quiet)
 943             {
 944               error (0, 0, _("\
 945 character `%s' not defined while needed as default value"),
 946                      tmp);
 947               continue;
 948             }
 949           else
 950             ELEM (ctype, class_collection, , value) |= bit;
 951         }
 952     }
 953
 954   /* Set default values if keyword was not present.  */
 955   if ((ctype->class_done & BIT (tok_upper)) == 0)
 956     /* "If this keyword [lower] is not specified, the lowercase letters
 957         `A' through `Z', ..., shall automatically belong to this class,
 958         with implementation defined character values."  [P1003.2, 2.5.2.1]  */
 959     set_default (BIT (tok_upper), 'A', 'Z');
 960
 961   if ((ctype->class_done & BIT (tok_lower)) == 0)
 962     /* "If this keyword [lower] is not specified, the lowercase letters
 963         `a' through `z', ..., shall automatically belong to this class,
 964         with implementation defined character values."  [P1003.2, 2.5.2.1]  */
 965     set_default (BIT (tok_lower), 'a', 'z');
 966
 967   if ((ctype->class_done & BIT (tok_alpha)) == 0)
 968     {
 969       /* Table 2-6 in P1003.2 says that characters in class `upper' or
 970          class `lower' *must* be in class `alpha'.  */
 971       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
 972       size_t cnt;
 973
 974       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
 975         if ((ctype->class_collection[cnt] & mask) != 0)
 976           ctype->class_collection[cnt] |= BIT (tok_alpha);
 977     }
 978
 979   if ((ctype->class_done & BIT (tok_digit)) == 0)
 980     /* "If this keyword [digit] is not specified, the digits `0' through
 981         `9', ..., shall automatically belong to this class, with
 982         implementation-defined character values."  [P1003.2, 2.5.2.1]  */
 983     set_default (BIT (tok_digit), '0', '9');
 984
 985   /* "Only characters specified for the `alpha' and `digit' keyword
 986      shall be specified.  Characters specified for the keyword `alpha'
 987      and `digit' are automatically included in this class.  */
 988   {
 989     unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
 990     size_t cnt;
 991
 992     for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
 993       if ((ctype->class_collection[cnt] & mask) != 0)
 994         ctype->class_collection[cnt] |= BIT (tok_alnum);
 995   }
 996
 997   if ((ctype->class_done & BIT (tok_space)) == 0)
 998     /* "If this keyword [space] is not specified, the characters <space>,
 999         <form-feed>, <newline>, <carriage-return>, <tab>, and
1000         <vertical-tab>, ..., shall automatically belong to this class,
1001         with implementation-defined character values."  [P1003.2, 2.5.2.1]  */
1002     {
1003       unsigned int value;
1004
1005       value = charset_find_value (charset, "space", 5);
1006       if ((wchar_t) value == ILLEGAL_CHAR_VALUE && !be_quiet)
1007         error (0, 0, _("\
1008 character `%s' not defined while needed as default value"),
1009                "<space>");
1010       else
1011         ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1012
1013       value = charset_find_value (charset, "form-feed", 9);
1014       if ((wchar_t) value == ILLEGAL_CHAR_VALUE && !be_quiet)
1015         error (0, 0, _("\
1016 character `%s' not defined while needed as default value"),
1017                "<form-feed>");
1018       else
1019         ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1020
1021       value = charset_find_value (charset, "newline", 7);
1022       if ((wchar_t) value == ILLEGAL_CHAR_VALUE && !be_quiet)
1023         error (0, 0, _("\
1024 character `%s' not defined while needed as default value"),
1025                "<newline>");
1026       else
1027         ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1028
1029       value = charset_find_value (charset, "carriage-return", 15);
1030       if ((wchar_t) value == ILLEGAL_CHAR_VALUE && !be_quiet)
1031         error (0, 0, _("\
1032 character `%s' not defined while needed as default value"),
1033                "<carriage-return>");
1034       else
1035         ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1036
1037       value = charset_find_value (charset, "tab", 3);
1038       if ((wchar_t) value == ILLEGAL_CHAR_VALUE && !be_quiet)
1039         error (0, 0, _("\
1040 character `%s' not defined while needed as default value"),
1041                "<tab>");
1042       else
1043         ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1044
1045       value = charset_find_value (charset, "vertical-tab", 12);
1046       if ((wchar_t) value == ILLEGAL_CHAR_VALUE && !be_quiet)
1047         error (0, 0, _("\
1048 character `%s' not defined while needed as default value"),
1049                "<vertical-tab>");
1050       else
1051         ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1052     }
1053
1054   if ((ctype->class_done & BIT (tok_xdigit)) == 0)
1055     /* "If this keyword is not specified, the digits `0' to `9', the
1056         uppercase letters `A' through `F', and the lowercase letters `a'
1057         through `f', ..., shell automatically belong to this class, with
1058         implementation defined character values."  [P1003.2, 2.5.2.1]  */
1059     {
1060       set_default (BIT (tok_xdigit), '0', '9');
1061       set_default (BIT (tok_xdigit), 'A', 'F');
1062       set_default (BIT (tok_xdigit), 'a', 'f');
1063     }
1064
1065   if ((ctype->class_done & BIT (tok_blank)) == 0)
1066     /* "If this keyword [blank] is unspecified, the characters <space> and
1067        <tab> shall belong to this character class."  [P1003.2, 2.5.2.1]  */
1068    {
1069       unsigned int value;
1070
1071       value = charset_find_value (charset, "space", 5);
1072       if ((wchar_t) value == ILLEGAL_CHAR_VALUE && !be_quiet)
1073         error (0, 0, _("\
1074 character `%s' not defined while needed as default value"),
1075                "<space>");
1076       else
1077         ELEM (ctype, class_collection, , value) |= BIT (tok_blank);
1078
1079       value = charset_find_value (charset, "tab", 3);
1080       if ((wchar_t) value == ILLEGAL_CHAR_VALUE && !be_quiet)
1081         error (0, 0, _("\
1082 character `%s' not defined while needed as default value"),
1083                "<tab>");
1084       else
1085         ELEM (ctype, class_collection, , value) |= BIT (tok_blank);
1086     }
1087
1088   if ((ctype->class_done & BIT (tok_graph)) == 0)
1089     /* "If this keyword [graph] is not specified, characters specified for
1090         the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
1091         shall belong to this character class."  [P1003.2, 2.5.2.1]  */
1092     {
1093       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
1094         BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
1095       size_t cnt;
1096
1097       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
1098         if ((ctype->class_collection[cnt] & mask) != 0)
1099           ctype->class_collection[cnt] |= BIT (tok_graph);
1100     }
1101
1102   if ((ctype->class_done & BIT (tok_print)) == 0)
1103     /* "If this keyword [print] is not provided, characters specified for
1104         the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
1105         and the <space> character shall belong to this character class."
1106         [P1003.2, 2.5.2.1]  */
1107     {
1108       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
1109         BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
1110       size_t cnt;
1111       wchar_t space;
1112
1113       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
1114         if ((ctype->class_collection[cnt] & mask) != 0)
1115           ctype->class_collection[cnt] |= BIT (tok_print);
1116
1117       space = charset_find_value (charset, "space", 5);
1118       if (space == ILLEGAL_CHAR_VALUE && !be_quiet)
1119         error (0, 0, _("\
1120 character `%s' not defined while needed as default value"),
1121                "<space>");
1122       else
1123         ELEM (ctype, class_collection, , space) |= BIT (tok_print);
1124     }
1125
1126   if (ctype->toupper_done == 0)
1127     /* "If this keyword [toupper] is not specified, the lowercase letters
1128         `a' through `z', and their corresponding uppercase letters `A' to
1129         `Z', ..., shall automatically be included, with implementation-
1130         defined character values."  [P1003.2, 2.5.2.1]  */
1131     {
1132       char tmp[4];
1133       int ch;
1134
1135       strcpy (tmp, "<?>");
1136
1137       for (ch = 'a'; ch <= 'z'; ++ch)
1138         {
1139           unsigned int value_from, value_to;
1140
1141           tmp[1] = (char) ch;
1142
1143           value_from = charset_find_value (charset, &tmp[1], 1);
1144           if ((wchar_t) value_from == ILLEGAL_CHAR_VALUE && !be_quiet)
1145             {
1146               error (0, 0, _("\
1147 character `%s' not defined while needed as default value"),
1148                      tmp);
1149               continue;
1150             }
1151
1152           /* This conversion is implementation defined.  */
1153           tmp[1] = (char) (ch + ('A' - 'a'));
1154           value_to = charset_find_value (charset, &tmp[1], 1);
1155           if ((wchar_t) value_to == ILLEGAL_CHAR_VALUE && !be_quiet)
1156             {
1157               error (0, 0, _("\
1158 character `%s' not defined while needed as default value"),
1159                      tmp);
1160               continue;
1161             }
1162
1163           /* The index [0] is determined by the order of the
1164              `ctype_map_newP' calls in `ctype_startup'.  */
1165           ELEM (ctype, map_collection, [0], value_from) = value_to;
1166         }
1167     }
1168
1169   if (ctype->tolower_done == 0)
1170     /* "If this keyword [tolower] is not specified, the mapping shall be
1171        the reverse mapping of the one specified to `toupper'."  [P1003.2]  */
1172     {
1173       size_t cnt;
1174
1175       for (cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
1176         if (ctype->map_collection[0][cnt] != 0)
1177           ELEM (ctype, map_collection, [1],
1178                 ctype->map_collection[0][cnt])
1179             = ctype->charnames[cnt];
1180     }
1181 }
1182
1183
1184 static void
1185 allocate_arrays (struct locale_ctype_t *ctype, struct charset_t *charset)
1186 {
1187   size_t idx;
1188
1189   /* First we have to decide how we organize the arrays.  It is easy
1190      for a one-byte character set.  But multi-byte character set
1191      cannot be stored flat because the chars might be sparsely used.
1192      So we determine an optimal hashing function for the used
1193      characters.
1194
1195      We use a very trivial hashing function to store the sparse
1196      table.  CH % TABSIZE is used as an index.  To solve multiple hits
1197      we have N planes.  This guarantees a fixed search time for a
1198      character [N / 2].  In the following code we determine the minmum
1199      value for TABSIZE * N, where TABSIZE >= 256.  */
1200   size_t min_total = UINT_MAX;
1201   size_t act_size = 256;
1202
1203   if (!be_quiet)
1204     fputs (_("\
1205 Computing table size for character classes might take a while..."),
1206            stderr);
1207
1208   while (act_size < min_total)
1209     {
1210       size_t cnt[act_size];
1211       size_t act_planes = 1;
1212
1213       memset (cnt, '\0', sizeof cnt);
1214
1215       for (idx = 0; idx < 256; ++idx)
1216         cnt[idx] = 1;
1217
1218       for (idx = 0; idx < ctype->charnames_act; ++idx)
1219         if (ctype->charnames[idx] >= 256)
1220           {
1221             size_t nr = ctype->charnames[idx] % act_size;
1222
1223             if (++cnt[nr] > act_planes)
1224               {
1225                 act_planes = cnt[nr];
1226                 if (act_size * act_planes >= min_total)
1227                   break;
1228               }
1229           }
1230
1231       if (act_size * act_planes < min_total)
1232         {
1233           min_total = act_size * act_planes;
1234           ctype->plane_size = act_size;
1235           ctype->plane_cnt = act_planes;
1236         }
1237
1238       ++act_size;
1239     }
1240
1241   if (!be_quiet)
1242     fputs (_(" done\n"), stderr);
1243
1244
1245 #if __BYTE_ORDER == __LITTLE_ENDIAN
1246 # define NAMES_B1 ctype->names_el
1247 # define NAMES_B2 ctype->names_eb
1248 #else
1249 # define NAMES_B1 ctype->names_eb
1250 # define NAMES_B2 ctype->names_el
1251 #endif
1252
1253   ctype->names_eb = (u_int32_t *) xcalloc (ctype->plane_size
1254                                            * ctype->plane_cnt,
1255                                            sizeof (u_int32_t));
1256   ctype->names_el = (u_int32_t *) xcalloc (ctype->plane_size
1257                                            * ctype->plane_cnt,
1258                                            sizeof (u_int32_t));
1259
1260   for (idx = 1; idx < 256; ++idx)
1261     NAMES_B1[idx] = idx;
1262
1263   /* Trick: change the 0th entry's name to 1 to mark the cell occupied.  */
1264   NAMES_B1[0] = 1;
1265
1266   for (idx = 256; idx < ctype->charnames_act; ++idx)
1267     {
1268       size_t nr = (ctype->charnames[idx] % ctype->plane_size);
1269       size_t depth = 0;
1270
1271       while (NAMES_B1[nr + depth * ctype->plane_size])
1272         ++depth;
1273       assert (depth < ctype->plane_cnt);
1274
1275       NAMES_B1[nr + depth * ctype->plane_size] = ctype->charnames[idx];
1276
1277       /* Now for faster access remember the index in the NAMES_B array.  */
1278       ctype->charnames[idx] = nr + depth * ctype->plane_size;
1279     }
1280   NAMES_B1[0] = 0;
1281
1282   for (idx = 0; idx < ctype->plane_size * ctype->plane_cnt; ++idx)
1283     NAMES_B2[idx] = SWAPU32 (NAMES_B1[idx]);
1284
1285
1286   /* You wonder about this amount of memory?  This is only because some
1287      users do not manage to address the array with unsigned values or
1288      data types with range >= 256.  '\200' would result in the array
1289      index -128.  To help these poor people we duplicate the entries for
1290      128 up to 255 below the entry for \0.  */
1291   ctype->ctype_b = (char_class_t *) xcalloc (256 + 128,
1292                                              sizeof (char_class_t));
1293   ctype->ctype32_b = (char_class32_t *) xcalloc (ctype->plane_size
1294                                                  * ctype->plane_cnt,
1295                                                  sizeof (char_class32_t));
1296
1297   /* Fill in the character class information.  */
1298 #if __BYTE_ORDER == __LITTLE_ENDIAN
1299 # define TRANS(w) CHAR_CLASS_TRANS (w)
1300 # define TRANS32(w) CHAR_CLASS32_TRANS (w)
1301 #else
1302 # define TRANS(w) (w)
1303 # define TRANS32(w) (w)
1304 #endif
1305
1306   for (idx = 0; idx < ctype->class_collection_act; ++idx)
1307     if (ctype->charnames[idx] < 256)
1308       ctype->ctype_b[128 + ctype->charnames[idx]]
1309         = TRANS (ctype->class_collection[idx]);
1310
1311   /* Mirror first 127 entries.  We must take care that entry -1 is not
1312      mirrored because EOF == -1.  */
1313   for (idx = 0; idx < 127; ++idx)
1314     ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
1315
1316   /* The 32 bit array contains all characters.  */
1317   for (idx = 0; idx < ctype->class_collection_act; ++idx)
1318     ctype->ctype32_b[ctype->charnames[idx]]
1319       = TRANS32 (ctype->class_collection[idx]);
1320
1321   /* Room for table of mappings.  */
1322   ctype->map_eb = (u_int32_t **) xmalloc (ctype->map_collection_nr
1323                                           * sizeof (u_int32_t *));
1324   ctype->map_el = (u_int32_t **) xmalloc (ctype->map_collection_nr
1325                                           * sizeof (u_int32_t *));
1326
1327   /* Fill in all mappings.  */
1328   for (idx = 0; idx < ctype->map_collection_nr; ++idx)
1329     {
1330       unsigned int idx2;
1331
1332       /* Allocate table.  */
1333       ctype->map_eb[idx] = (u_int32_t *) xmalloc ((ctype->plane_size
1334                                                    * ctype->plane_cnt + 128)
1335                                                   * sizeof (u_int32_t));
1336       ctype->map_el[idx] = (u_int32_t *) xmalloc ((ctype->plane_size
1337                                                    * ctype->plane_cnt + 128)
1338                                                   * sizeof (u_int32_t));
1339
1340 #if __BYTE_ORDER == __LITTLE_ENDIAN
1341 # define MAP_B1 ctype->map_el
1342 # define MAP_B2 ctype->map_eb
1343 #else
1344 # define MAP_B1 ctype->map_eb
1345 # define MAP_B2 ctype->map_el
1346 #endif
1347
1348       /* Copy default value (identity mapping).  */
1349       memcpy (&MAP_B1[idx][128], NAMES_B1,
1350               ctype->plane_size * ctype->plane_cnt * sizeof (u_int32_t));
1351
1352       /* Copy values from collection.  */
1353       for (idx2 = 0; idx2 < ctype->map_collection_act[idx]; ++idx2)
1354         if (ctype->map_collection[idx][idx2] != 0)
1355           MAP_B1[idx][128 + ctype->charnames[idx2]] =
1356             ctype->map_collection[idx][idx2];
1357
1358       /* Mirror first 127 entries.  We must take care not to map entry
1359          -1 because EOF == -1.  */
1360       for (idx2 = 0; idx2 < 127; ++idx2)
1361         MAP_B1[idx][idx2] = MAP_B1[idx][256 + idx2];
1362
1363       /* EOF must map to EOF.  */
1364       MAP_B1[idx][127] = EOF;
1365
1366       /* And now the other byte order.  */
1367       for (idx2 = 0; idx2 < ctype->plane_size * ctype->plane_cnt + 128; ++idx2)
1368         MAP_B2[idx][idx2] = SWAPU32 (MAP_B1[idx][idx2]);
1369     }
1370
1371   /* Extra array for class and map names.  */
1372   ctype->class_name_ptr = (u_int32_t *) xmalloc (ctype->nr_charclass
1373                                                  * sizeof (u_int32_t));
1374   ctype->map_name_ptr = (u_int32_t *) xmalloc (ctype->map_collection_nr
1375                                                * sizeof (u_int32_t));
1376
1377   /* Array for width information.  Because the expected width are very
1378      small we use only one single byte.  This save space and we need
1379      not provide the information twice with both endianesses.  */
1380   ctype->width = (unsigned char *) xmalloc (ctype->plane_size
1381                                             * ctype->plane_cnt);
1382   /* Initialize with default width value.  */
1383   memset (ctype->width, charset->width_default,
1384           ctype->plane_size * ctype->plane_cnt);
1385   if (charset->width_rules != NULL)
1386     {
1387       size_t cnt;
1388
1389       for (cnt = 0; cnt < charset->nwidth_rules; ++cnt)
1390         if (charset->width_rules[cnt].width != charset->width_default)
1391           for (idx = charset->width_rules[cnt].from;
1392                idx <= charset->width_rules[cnt].to; ++idx)
1393             {
1394               size_t nr = idx % ctype->plane_size;
1395               size_t depth = 0;
1396
1397               while (NAMES_B1[nr + depth * ctype->plane_size] != nr)
1398                 ++depth;
1399               assert (depth < ctype->plane_cnt);
1400
1401               ctype->width[nr + depth * ctype->plane_size]
1402                 = charset->width_rules[cnt].width;
1403             }
1404     }
1405
1406   /* Compute MB_CUR_MAX.  Please note the value mb_cur_max in the
1407      character set definition gives the number of bytes in the wide
1408      character representation.  We compute the number of bytes used
1409      for the UTF-8 encoded form.  */
1410   ctype->mb_cur_max = ((int []) { 2, 3, 5, 6 }) [charset->mb_cur_max - 1];
1411
1412   /* We need the name of the currently used 8-bit character set to
1413      make correct conversion between this 8-bit representation and the
1414      ISO 10646 character set used internally for wide characters.  */
1415   ctype->codeset_name = charset->code_set_name;
1416 }