locale/programs/ld-ctype.c

   1 /* Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3    Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Library General Public License as
   7    published by the Free Software Foundation; either version 2 of the
   8    License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Library General Public License for more details.
  14
  15    You should have received a copy of the GNU Library General Public
  16    License along with the GNU C Library; see the file COPYING.LIB.  If not,
  17    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  18    Boston, MA 02111-1307, USA.  */
  19
  20 #ifdef HAVE_CONFIG_H
  21 # include <config.h>
  22 #endif
  23
  24 #include <alloca.h>
  25 #include <endian.h>
  26 #include <limits.h>
  27 #include <string.h>
  28
  29 #include "locales.h"
  30 #include "localeinfo.h"
  31 #include "langinfo.h"
  32 #include "locfile-token.h"
  33 #include "stringtrans.h"
  34
  35 /* Uncomment the following line in the production version.  */
  36 /* define NDEBUG 1 */
  37 #include <assert.h>
  38
  39
  40 void *xmalloc (size_t __n);
  41 void *xcalloc (size_t __n, size_t __s);
  42 void *xrealloc (void *__ptr, size_t __n);
  43
  44
  45 /* The bit used for representing a special class.  */
  46 #define BITPOS(class) ((class) - tok_upper)
  47 #define BIT(class) (1 << BITPOS (class))
  48
  49 #define ELEM(ctype, collection, idx, value)                                   \
  50   *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx,     \
  51              &ctype->collection##_act idx, value)
  52
  53 #define SWAPU32(w) \
  54   (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24))
  55
  56 #define SWAPU16(w) \
  57   ((((w)  >> 8) & 0xff) | (((w) & 0xff) << 8))
  58
  59 #define XSWAPU32(w) \
  60   ((((w) & 0xff00ff00) >> 8) | (((w) & 0xff00ff) << 8))
  61
  62
  63 /* To be compatible with former implementations we for now restrict
  64    the number of bits for character classes to 16.  When compatibility
  65    is not necessary anymore increase the number to 32.  */
  66 #define char_class_t u_int16_t
  67 #define CHAR_CLASS_TRANS SWAPU16
  68 #define char_class32_t u_int32_t
  69 #define CHAR_CLASS32_TRANS XSWAPU32
  70
  71
  72 /* The real definition of the struct for the LC_CTYPE locale.  */
  73 struct locale_ctype_t
  74 {
  75   unsigned int *charnames;
  76   size_t charnames_max;
  77   size_t charnames_act;
  78
  79   /* We will allow up to 8 * sizeof(u_int32_t) - 1 character classes.  */
  80 #define MAX_NR_CHARCLASS (8 * sizeof (u_int32_t) - 1)
  81   size_t nr_charclass;
  82   const char *classnames[MAX_NR_CHARCLASS];
  83   unsigned long int current_class_mask;
  84   unsigned int last_class_char;
  85   u_int32_t *class_collection;
  86   size_t class_collection_max;
  87   size_t class_collection_act;
  88   unsigned long int class_done;
  89
  90   /* If the following number ever turns out to be too small simply
  91      increase it.  But I doubt it will.  --drepper@gnu */
  92 #define MAX_NR_CHARMAP 16
  93   const char *mapnames[MAX_NR_CHARMAP];
  94   u_int32_t *map_collection[MAX_NR_CHARMAP];
  95   size_t map_collection_max[MAX_NR_CHARMAP];
  96   size_t map_collection_act[MAX_NR_CHARMAP];
  97   size_t map_collection_nr;
  98   size_t last_map_idx;
  99   unsigned int from_map_char;
 100   int toupper_done;
 101   int tolower_done;
 102
 103   /* The arrays for the binary representation.  */
 104   u_int32_t plane_size;
 105   u_int32_t plane_cnt;
 106   char_class_t *ctype_b;
 107   char_class32_t *ctype32_b;
 108   u_int32_t *names_el;
 109   u_int32_t *names_eb;
 110   u_int32_t **map_eb;
 111   u_int32_t **map_el;
 112   u_int32_t *class_name_ptr;
 113   u_int32_t *map_name_ptr;
 114   unsigned char *width;
 115   u_int32_t mb_cur_max;
 116   const char *codeset_name;
 117 };
 118
 119
 120 /* Prototypes for local functions.  */
 121 static void ctype_class_newP (struct linereader *lr,
 122                               struct locale_ctype_t *ctype, const char *name);
 123 static void ctype_map_newP (struct linereader *lr,
 124                             struct locale_ctype_t *ctype,
 125                             const char *name, struct charset_t *charset);
 126 static u_int32_t *find_idx (struct locale_ctype_t *ctype, u_int32_t **table,
 127                             size_t *max, size_t *act, unsigned int idx);
 128 static void set_class_defaults (struct locale_ctype_t *ctype,
 129                                 struct charset_t *charset);
 130 static void allocate_arrays (struct locale_ctype_t *ctype,
 131                              struct charset_t *charset);
 132
 133
 134 void
 135 ctype_startup (struct linereader *lr, struct localedef_t *locale,
 136                struct charset_t *charset)
 137 {
 138   unsigned int cnt;
 139   struct locale_ctype_t *ctype;
 140
 141   /* It is important that we always use UCS1 encoding for strings now.  */
 142   encoding_method = ENC_UCS1;
 143
 144   /* Allocate the needed room.  */
 145   locale->categories[LC_CTYPE].ctype = ctype =
 146     (struct locale_ctype_t *) xmalloc (sizeof (struct locale_ctype_t));
 147
 148   /* We have no names seen yet.  */
 149   ctype->charnames_max = charset->mb_cur_max == 1 ? 256 : 512;
 150   ctype->charnames =
 151     (unsigned int *) xmalloc (ctype->charnames_max * sizeof (unsigned int));
 152   for (cnt = 0; cnt < 256; ++cnt)
 153     ctype->charnames[cnt] = cnt;
 154   ctype->charnames_act = 256;
 155
 156   /* Fill character class information.  */
 157   ctype->nr_charclass = 0;
 158   ctype->current_class_mask = 0;
 159   ctype->last_class_char = ILLEGAL_CHAR_VALUE;
 160   /* The order of the following instructions determines the bit
 161      positions!  */
 162   ctype_class_newP (lr, ctype, "upper");
 163   ctype_class_newP (lr, ctype, "lower");
 164   ctype_class_newP (lr, ctype, "alpha");
 165   ctype_class_newP (lr, ctype, "digit");
 166   ctype_class_newP (lr, ctype, "xdigit");
 167   ctype_class_newP (lr, ctype, "space");
 168   ctype_class_newP (lr, ctype, "print");
 169   ctype_class_newP (lr, ctype, "graph");
 170   ctype_class_newP (lr, ctype, "blank");
 171   ctype_class_newP (lr, ctype, "cntrl");
 172   ctype_class_newP (lr, ctype, "punct");
 173   ctype_class_newP (lr, ctype, "alnum");
 174
 175   ctype->class_collection_max = charset->mb_cur_max == 1 ? 256 : 512;
 176   ctype->class_collection
 177     = (u_int32_t *) xmalloc (sizeof (unsigned long int)
 178                              * ctype->class_collection_max);
 179   memset (ctype->class_collection, '\0',
 180           sizeof (unsigned long int) * ctype->class_collection_max);
 181   ctype->class_collection_act = 256;
 182
 183   /* Fill character map information.  */
 184   ctype->map_collection_nr = 0;
 185   ctype->last_map_idx = MAX_NR_CHARMAP;
 186   ctype->from_map_char = ILLEGAL_CHAR_VALUE;
 187   ctype_map_newP (lr, ctype, "toupper", charset);
 188   ctype_map_newP (lr, ctype, "tolower", charset);
 189
 190   /* Fill first 256 entries in `toupper' and `tolower' arrays.  */
 191   for (cnt = 0; cnt < 256; ++cnt)
 192     {
 193       ctype->map_collection[0][cnt] = cnt;
 194       ctype->map_collection[1][cnt] = cnt;
 195     }
 196 }
 197
 198
 199 void
 200 ctype_finish (struct localedef_t *locale, struct charset_t *charset)
 201 {
 202   /* See POSIX.2, table 2-6 for the meaning of the following table.  */
 203 #define NCLASS 12
 204   static const struct
 205   {
 206     const char *name;
 207     const char allow[NCLASS];
 208   }
 209   valid_table[NCLASS] =
 210   {
 211     /* The order is important.  See token.h for more information.
 212        M = Always, D = Default, - = Permitted, X = Mutually exclusive  */
 213     { "upper",  "--MX-XDDXXX-" },
 214     { "lower",  "--MX-XDDXXX-" },
 215     { "alpha",  "---X-XDDXXX-" },
 216     { "digit",  "XXX--XDDXXX-" },
 217     { "xdigit", "-----XDDXXX-" },
 218     { "space",  "XXXXX------X" },
 219     { "print",  "---------X--" },
 220     { "graph",  "---------X--" },
 221     { "blank",  "XXXXXM-----X" },
 222     { "cntrl",  "XXXXX-XX--XX" },
 223     { "punct",  "XXXXX-DD-X-X" },
 224     { "alnum",  "-----XDDXXX-" }
 225   };
 226   size_t cnt;
 227   int cls1, cls2;
 228   unsigned int space_value;
 229   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 230
 231   /* Set default value for classes not specified.  */
 232   set_class_defaults (ctype, charset);
 233
 234   /* Check according to table.  */
 235   for (cnt = 0; cnt < ctype->class_collection_max; ++cnt)
 236     {
 237       unsigned long int tmp;
 238
 239       tmp = ctype->class_collection[cnt];
 240       if (tmp == 0)
 241         continue;
 242
 243       for (cls1 = 0; cls1 < NCLASS; ++cls1)
 244         if ((tmp & (1 << cls1)) != 0)
 245           for (cls2 = 0; cls2 < NCLASS; ++cls2)
 246             if (valid_table[cls1].allow[cls2] != '-')
 247               {
 248                 int eq = (tmp & (1 << cls2)) != 0;
 249                 switch (valid_table[cls1].allow[cls2])
 250                   {
 251                   case 'M':
 252                     if (!eq)
 253                       {
 254                         char buf[17];
 255                         char *cp = buf;
 256                         unsigned int value;
 257
 258                         value = ctype->charnames[cnt];
 259
 260                         if ((value & 0xff000000) != 0)
 261                           cp += sprintf (cp, "\\%o", (value >> 24) & 0xff);
 262                         if ((value & 0xffff0000) != 0)
 263                           cp += sprintf (cp, "\\%o", (value >> 16) & 0xff);
 264                         if ((value & 0xffffff00) != 0)
 265                           cp += sprintf (cp, "\\%o", (value >> 8) & 0xff);
 266                         sprintf (cp, "\\%o", value & 0xff);
 267
 268                         if (!be_quiet)
 269                           error (0, 0, _("\
 270 character %s'%s' in class `%s' must be in class `%s'"), value > 256 ? "L" : "",
 271                                  cp, valid_table[cls1].name,
 272                                  valid_table[cls2].name);
 273                       }
 274                     break;
 275
 276                   case 'X':
 277                     if (eq)
 278                       {
 279                         char buf[17];
 280                         char *cp = buf;
 281                         unsigned int value;
 282
 283                         value = ctype->charnames[cnt];
 284
 285                         if ((value & 0xff000000) != 0)
 286                           cp += sprintf (cp, "\\%o", value >> 24);
 287                         if ((value & 0xffff0000) != 0)
 288                           cp += sprintf (cp, "\\%o", (value >> 16) & 0xff);
 289                         if ((value & 0xffffff00) != 0)
 290                           cp += sprintf (cp, "\\%o", (value >> 8) & 0xff);
 291                         sprintf (cp, "\\%o", value & 0xff);
 292
 293                         if (!be_quiet)
 294                           error (0, 0, _("\
 295 character %s'%s' in class `%s' must not be in class `%s'"),
 296                                  value > 256 ? "L" : "", cp,
 297                                  valid_table[cls1].name,
 298                                  valid_table[cls2].name);
 299                       }
 300                     break;
 301
 302                   case 'D':
 303                     ctype->class_collection[cnt] |= 1 << cls2;
 304                     break;
 305
 306                   default:
 307                     error (5, 0, _("internal error in %s, line %u"),
 308                            __FUNCTION__, __LINE__);
 309                   }
 310               }
 311     }
 312
 313   /* ... and now test <SP> as a special case.  */
 314   space_value = charset_find_value (charset, "SP", 2);
 315   if ((wchar_t) space_value == ILLEGAL_CHAR_VALUE && !be_quiet)
 316     error (0, 0, _("character <SP> not defined in character map"));
 317   else if (((cnt = BITPOS (tok_space),
 318              (ELEM (ctype, class_collection, , space_value)
 319               & BIT (tok_space)) == 0)
 320             || (cnt = BITPOS (tok_blank),
 321                 (ELEM (ctype, class_collection, , space_value)
 322                  & BIT (tok_blank)) == 0))
 323            && !be_quiet)
 324     error (0, 0, _("<SP> character not in class `%s'"),
 325            valid_table[cnt].name);
 326   else if (((cnt = BITPOS (tok_punct),
 327              (ELEM (ctype, class_collection, , space_value)
 328               & BIT (tok_punct)) != 0)
 329             || (cnt = BITPOS (tok_graph),
 330                 (ELEM (ctype, class_collection, , space_value)
 331                  & BIT (tok_graph))
 332                 != 0))
 333            && !be_quiet)
 334     error (0, 0, _("<SP> character must not be in class `%s'"),
 335            valid_table[cnt].name);
 336   else
 337     ELEM (ctype, class_collection, , space_value) |= BIT (tok_print);
 338
 339   /* Now that the tests are done make sure the name array contains all
 340      characters which are handled in the WIDTH section of the
 341      character set definition file.  */
 342   if (charset->width_rules != NULL)
 343     for (cnt = 0; cnt < charset->nwidth_rules; ++cnt)
 344       {
 345         size_t inner;
 346         for (inner = charset->width_rules[cnt].from;
 347              inner <= charset->width_rules[cnt].to; ++inner)
 348           (void) find_idx (ctype, NULL, NULL, NULL, inner);
 349       }
 350 }
 351
 352
 353 void
 354 ctype_output (struct localedef_t *locale, struct charset_t *charset,
 355               const char *output_path)
 356 {
 357   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 358   const size_t nelems = (_NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)
 359                          + 2 * (ctype->map_collection_nr - 2));
 360   struct iovec iov[2 + nelems + ctype->nr_charclass
 361                   + ctype->map_collection_nr];
 362   struct locale_file data;
 363   u_int32_t idx[nelems];
 364   size_t elem, cnt, offset, total;
 365
 366
 367   if ((locale->binary & (1 << LC_CTYPE)) != 0)
 368     {
 369       iov[0].iov_base = ctype;
 370       iov[0].iov_len = locale->len[LC_CTYPE];
 371
 372       write_locale_data (output_path, "LC_CTYPE", 1, iov);
 373
 374       return;
 375     }
 376
 377
 378   /* Now prepare the output: Find the sizes of the table we can use.  */
 379   allocate_arrays (ctype, charset);
 380
 381   data.magic = LIMAGIC (LC_CTYPE);
 382   data.n = nelems;
 383   iov[0].iov_base = (void *) &data;
 384   iov[0].iov_len = sizeof (data);
 385
 386   iov[1].iov_base = (void *) idx;
 387   iov[1].iov_len = sizeof (idx);
 388
 389   idx[0] = iov[0].iov_len + iov[1].iov_len;
 390   offset = 0;
 391
 392   for (elem = 0; elem < nelems; ++elem)
 393     {
 394       if (elem < _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE))
 395         switch (elem)
 396           {
 397 #define CTYPE_DATA(name, base, len)                                           \
 398           case _NL_ITEM_INDEX (name):                                         \
 399             iov[2 + elem + offset].iov_base = (base);                         \
 400             iov[2 + elem + offset].iov_len = (len);                           \
 401             if (elem + 1 < nelems)                                            \
 402               idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;     \
 403             break
 404
 405           CTYPE_DATA (_NL_CTYPE_CLASS,
 406                       ctype->ctype_b,
 407                       (256 + 128) * sizeof (char_class_t));
 408
 409           CTYPE_DATA (_NL_CTYPE_TOUPPER_EB,
 410                       ctype->map_eb[0],
 411                       (ctype->plane_size * ctype->plane_cnt + 128)
 412                       * sizeof (u_int32_t));
 413           CTYPE_DATA (_NL_CTYPE_TOLOWER_EB,
 414                       ctype->map_eb[1],
 415                       (ctype->plane_size * ctype->plane_cnt + 128)
 416                       * sizeof (u_int32_t));
 417
 418           CTYPE_DATA (_NL_CTYPE_TOUPPER_EL,
 419                       ctype->map_el[0],
 420                       (ctype->plane_size * ctype->plane_cnt + 128)
 421                       * sizeof (u_int32_t));
 422           CTYPE_DATA (_NL_CTYPE_TOLOWER_EL,
 423                       ctype->map_el[1],
 424                       (ctype->plane_size * ctype->plane_cnt + 128)
 425                       * sizeof (u_int32_t));
 426
 427           CTYPE_DATA (_NL_CTYPE_CLASS32,
 428                       ctype->ctype32_b,
 429                       (ctype->plane_size * ctype->plane_cnt
 430                        * sizeof (char_class32_t)));
 431
 432           CTYPE_DATA (_NL_CTYPE_NAMES_EB,
 433                       ctype->names_eb, (ctype->plane_size * ctype->plane_cnt
 434                                         * sizeof (u_int32_t)));
 435           CTYPE_DATA (_NL_CTYPE_NAMES_EL,
 436                       ctype->names_el, (ctype->plane_size * ctype->plane_cnt
 437                                         * sizeof (u_int32_t)));
 438
 439           CTYPE_DATA (_NL_CTYPE_HASH_SIZE,
 440                       &ctype->plane_size, sizeof (u_int32_t));
 441           CTYPE_DATA (_NL_CTYPE_HASH_LAYERS,
 442                       &ctype->plane_cnt, sizeof (u_int32_t));
 443
 444           case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
 445             /* The class name array.  */
 446             total = 0;
 447             for (cnt = 0; cnt < ctype->nr_charclass; ++cnt, ++offset)
 448               {
 449                 iov[2 + elem + offset].iov_base
 450                   = (void *) ctype->classnames[cnt];
 451                 iov[2 + elem + offset].iov_len
 452                   = strlen (ctype->classnames[cnt]) + 1;
 453                 total += iov[2 + elem + offset].iov_len;
 454               }
 455             iov[2 + elem + offset].iov_base = (void *) "\0\0\0";
 456             iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
 457             total += 1 + (4 - ((total + 1) % 4));
 458
 459             if (elem + 1 < nelems)
 460               idx[elem + 1] = idx[elem] + total;
 461             break;
 462
 463           case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
 464             /* The class name array.  */
 465             total = 0;
 466             for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt, ++offset)
 467               {
 468                 iov[2 + elem + offset].iov_base
 469                   = (void *) ctype->mapnames[cnt];
 470                 iov[2 + elem + offset].iov_len
 471                   = strlen (ctype->mapnames[cnt]) + 1;
 472                 total += iov[2 + elem + offset].iov_len;
 473               }
 474             iov[2 + elem + offset].iov_base = (void *) "\0\0\0";
 475             iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
 476             total += 1 + (4 - ((total + 1) % 4));
 477
 478             if (elem + 1 < nelems)
 479               idx[elem + 1] = idx[elem] + total;
 480             break;
 481
 482           CTYPE_DATA (_NL_CTYPE_WIDTH,
 483                       ctype->width, ctype->plane_size * ctype->plane_cnt);
 484
 485           CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX,
 486                       &ctype->mb_cur_max, sizeof (u_int32_t));
 487
 488           case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
 489             total = strlen (ctype->codeset_name) + 1;
 490             if (total % 4 == 0)
 491               iov[2 + elem + offset].iov_base = (char *) ctype->codeset_name;
 492             else
 493               {
 494                 iov[2 + elem + offset].iov_base = alloca ((total + 3) & ~3);
 495                 memset (mempcpy (iov[2 + elem + offset].iov_base,
 496                                  ctype->codeset_name, total),
 497                         '\0', 4 - (total & 3));
 498                 total = (total + 3) & ~3;
 499               }
 500             iov[2 + elem + offset].iov_len = total;
 501             if (elem + 1 < nelems)
 502               idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
 503             break;
 504
 505           default:
 506             assert (! "unknown CTYPE element");
 507           }
 508       else
 509         {
 510           /* Handle extra maps.  */
 511           size_t nr = (elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)) >> 1;
 512
 513           if (((elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)) & 1) == 0)
 514             iov[2 + elem + offset].iov_base = ctype->map_eb[nr];
 515           else
 516             iov[2 + elem + offset].iov_base = ctype->map_el[nr];
 517
 518           iov[2 + elem + offset].iov_len = ((ctype->plane_size
 519                                              * ctype->plane_cnt + 128)
 520                                             * sizeof (u_int32_t));
 521
 522           if (elem + 1 < nelems)
 523             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
 524         }
 525     }
 526
 527   assert (2 + elem + offset == (nelems + ctype->nr_charclass
 528                                 + ctype->map_collection_nr + 2));
 529
 530   write_locale_data (output_path, "LC_CTYPE", 2 + elem + offset, iov);
 531 }
 532
 533
 534 /* Character class handling.  */
 535 void
 536 ctype_class_new (struct linereader *lr, struct localedef_t *locale,
 537                  enum token_t tok, struct token *code,
 538                  struct charset_t *charset)
 539 {
 540   ctype_class_newP (lr, locale->categories[LC_CTYPE].ctype,
 541                     code->val.str.start);
 542 }
 543
 544
 545 int
 546 ctype_is_charclass (struct linereader *lr, struct localedef_t *locale,
 547                     const char *name)
 548 {
 549   size_t cnt;
 550
 551   for (cnt = 0; cnt < locale->categories[LC_CTYPE].ctype->nr_charclass; ++cnt)
 552     if (strcmp (name, locale->categories[LC_CTYPE].ctype->classnames[cnt])
 553         == 0)
 554       return 1;
 555
 556   return 0;
 557 }
 558
 559
 560 void
 561 ctype_class_start (struct linereader *lr, struct localedef_t *locale,
 562                    enum token_t tok, const char *str,
 563                    struct charset_t *charset)
 564 {
 565   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 566   size_t cnt;
 567
 568   switch (tok)
 569     {
 570     case tok_upper:
 571       str = "upper";
 572       break;
 573     case tok_lower:
 574       str = "lower";
 575       break;
 576     case tok_alpha:
 577       str = "alpha";
 578       break;
 579     case tok_digit:
 580       str = "digit";
 581       break;
 582     case tok_xdigit:
 583       str = "xdigit";
 584       break;
 585     case tok_space:
 586       str = "space";
 587       break;
 588     case tok_print:
 589       str = "print";
 590       break;
 591     case tok_graph:
 592       str = "graph";
 593       break;
 594     case tok_blank:
 595       str = "blank";
 596       break;
 597     case tok_cntrl:
 598       str = "cntrl";
 599       break;
 600     case tok_punct:
 601       str = "punct";
 602       break;
 603     case tok_alnum:
 604       str = "alnum";
 605       break;
 606     case tok_ident:
 607       break;
 608     default:
 609       assert (! "illegal token as class name: should not happen");
 610     }
 611
 612   for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
 613     if (strcmp (str, ctype->classnames[cnt]) == 0)
 614       break;
 615
 616   if (cnt >= ctype->nr_charclass)
 617     assert (! "unknown class in class definition: should not happen");
 618
 619   ctype->class_done |= BIT (tok);
 620
 621   ctype->current_class_mask = 1 << cnt;
 622   ctype->last_class_char = ILLEGAL_CHAR_VALUE;
 623 }
 624
 625
 626 void
 627 ctype_class_from (struct linereader *lr, struct localedef_t *locale,
 628                   struct token *code, struct charset_t *charset)
 629 {
 630   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 631   unsigned int value;
 632
 633   value = charset_find_value (charset, code->val.str.start, code->val.str.len);
 634
 635   ctype->last_class_char = value;
 636
 637   if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
 638     /* In the LC_CTYPE category it is no error when a character is
 639        not found.  This has to be ignored silently.  */
 640     return;
 641
 642   *find_idx (ctype, &ctype->class_collection, &ctype->class_collection_max,
 643              &ctype->class_collection_act, value)
 644     |= ctype->current_class_mask;
 645 }
 646
 647
 648 void
 649 ctype_class_to (struct linereader *lr, struct localedef_t *locale,
 650                 struct token *code, struct charset_t *charset)
 651 {
 652   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 653   unsigned int value, cnt;
 654
 655   value = charset_find_value (charset, code->val.str.start, code->val.str.len);
 656
 657   assert (value >= ctype->last_class_char);
 658
 659   for (cnt = ctype->last_class_char + 1; cnt <= value; ++cnt)
 660     *find_idx (ctype, &ctype->class_collection, &ctype->class_collection_max,
 661                &ctype->class_collection_act, cnt)
 662       |= ctype->current_class_mask;
 663
 664   ctype->last_class_char = ILLEGAL_CHAR_VALUE;
 665 }
 666
 667
 668 void
 669 ctype_class_end (struct linereader *lr, struct localedef_t *locale)
 670 {
 671   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 672
 673   /* We have no special actions to perform here.  */
 674   ctype->current_class_mask = 0;
 675   ctype->last_class_char = ILLEGAL_CHAR_VALUE;
 676 }
 677
 678
 679 /* Character map handling.  */
 680 void
 681 ctype_map_new (struct linereader *lr, struct localedef_t *locale,
 682                enum token_t tok, struct token *code,
 683                struct charset_t *charset)
 684 {
 685   ctype_map_newP (lr, locale->categories[LC_CTYPE].ctype,
 686                   code->val.str.start, charset);
 687 }
 688
 689
 690 int
 691 ctype_is_charconv (struct linereader *lr, struct localedef_t *locale,
 692                    const char *name)
 693 {
 694   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 695   size_t cnt;
 696
 697   for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
 698     if (strcmp (name, ctype->mapnames[cnt]) == 0)
 699       return 1;
 700
 701   return 0;
 702 }
 703
 704
 705 void
 706 ctype_map_start (struct linereader *lr, struct localedef_t *locale,
 707                  enum token_t tok, const char *name, struct charset_t *charset)
 708 {
 709   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 710   size_t cnt;
 711
 712   switch (tok)
 713     {
 714     case tok_toupper:
 715       ctype->toupper_done = 1;
 716       name = "toupper";
 717       break;
 718     case tok_tolower:
 719       ctype->tolower_done = 1;
 720       name = "tolower";
 721       break;
 722     case tok_ident:
 723       break;
 724     default:
 725       assert (! "unknown token in category `LC_CTYPE' should not happen");
 726     }
 727
 728   for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
 729     if (strcmp (name, ctype->mapnames[cnt]) == 0)
 730       break;
 731
 732   if (cnt == ctype->map_collection_nr)
 733     assert (! "unknown token in category `LC_CTYPE' should not happen");
 734
 735   ctype->last_map_idx = cnt;
 736   ctype->from_map_char = ILLEGAL_CHAR_VALUE;
 737 }
 738
 739
 740 void
 741 ctype_map_from (struct linereader *lr, struct localedef_t *locale,
 742                 struct token *code, struct charset_t *charset)
 743 {
 744   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 745   unsigned int value;
 746
 747   value = charset_find_value (charset, code->val.str.start, code->val.str.len);
 748
 749   if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
 750     /* In the LC_CTYPE category it is no error when a character is
 751        not found.  This has to be ignored silently.  */
 752     return;
 753
 754   assert (ctype->last_map_idx < ctype->map_collection_nr);
 755
 756   ctype->from_map_char = value;
 757 }
 758
 759
 760 void
 761 ctype_map_to (struct linereader *lr, struct localedef_t *locale,
 762               struct token *code, struct charset_t *charset)
 763 {
 764   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 765   unsigned int value;
 766
 767   value = charset_find_value (charset, code->val.str.start, code->val.str.len);
 768
 769   if ((wchar_t) ctype->from_map_char == ILLEGAL_CHAR_VALUE
 770       || (wchar_t) value == ILLEGAL_CHAR_VALUE)
 771     {
 772       /* In the LC_CTYPE category it is no error when a character is
 773          not found.  This has to be ignored silently.  */
 774       ctype->from_map_char = ILLEGAL_CHAR_VALUE;
 775       return;
 776     }
 777
 778   *find_idx (ctype, &ctype->map_collection[ctype->last_map_idx],
 779              &ctype->map_collection_max[ctype->last_map_idx],
 780              &ctype->map_collection_act[ctype->last_map_idx],
 781              ctype->from_map_char) = value;
 782
 783   ctype->from_map_char = ILLEGAL_CHAR_VALUE;
 784 }
 785
 786
 787 void
 788 ctype_map_end (struct linereader *lr, struct localedef_t *locale)
 789 {
 790   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 791
 792   ctype->last_map_idx = MAX_NR_CHARMAP;
 793   ctype->from_map_char = ILLEGAL_CHAR_VALUE;
 794 }
 795
 796
 797 /* Local functions.  */
 798 static void
 799 ctype_class_newP (struct linereader *lr, struct locale_ctype_t *ctype,
 800                   const char *name)
 801 {
 802   size_t cnt;
 803
 804   for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
 805     if (strcmp (ctype->classnames[cnt], name) == 0)
 806       break;
 807
 808   if (cnt < ctype->nr_charclass)
 809     {
 810       lr_error (lr, _("character class `%s' already defined"), name);
 811       return;
 812     }
 813
 814   if (ctype->nr_charclass == MAX_NR_CHARCLASS)
 815     /* Exit code 2 is prescribed in P1003.2b.  */
 816     error (2, 0, _("\
 817 implementation limit: no more than %d character classes allowed"),
 818            MAX_NR_CHARCLASS);
 819
 820   ctype->classnames[ctype->nr_charclass++] = name;
 821 }
 822
 823
 824 static void
 825 ctype_map_newP (struct linereader *lr, struct locale_ctype_t *ctype,
 826                 const char *name, struct charset_t *charset)
 827 {
 828   size_t max_chars = 0;
 829   size_t cnt;
 830
 831   for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
 832     {
 833       if (strcmp (ctype->mapnames[cnt], name) == 0)
 834         break;
 835
 836       if (max_chars < ctype->map_collection_max[cnt])
 837         max_chars = ctype->map_collection_max[cnt];
 838     }
 839
 840   if (cnt < ctype->map_collection_nr)
 841     {
 842       lr_error (lr, _("character map `%s' already defined"), name);
 843       return;
 844     }
 845
 846   if (ctype->map_collection_nr == MAX_NR_CHARMAP)
 847     /* Exit code 2 is prescribed in P1003.2b.  */
 848     error (2, 0, _("\
 849 implementation limit: no more than %d character maps allowed"),
 850            MAX_NR_CHARMAP);
 851
 852   ctype->mapnames[cnt] = name;
 853
 854   if (max_chars == 0)
 855     ctype->map_collection_max[cnt] = charset->mb_cur_max == 1 ? 256 : 512;
 856   else
 857     ctype->map_collection_max[cnt] = max_chars;
 858
 859   ctype->map_collection[cnt] = (u_int32_t *)
 860     xmalloc (sizeof (u_int32_t) * ctype->map_collection_max[cnt]);
 861   memset (ctype->map_collection[cnt], '\0',
 862           sizeof (u_int32_t) * ctype->map_collection_max[cnt]);
 863   ctype->map_collection_act[cnt] = 256;
 864
 865   ++ctype->map_collection_nr;
 866 }
 867
 868
 869 /* We have to be prepared that TABLE, MAX, and ACT can be NULL.  This
 870    is possible if we only want ot extend the name array.  */
 871 static u_int32_t *
 872 find_idx (struct locale_ctype_t *ctype, u_int32_t **table, size_t *max,
 873           size_t *act, unsigned int idx)
 874 {
 875   size_t cnt;
 876
 877   if (idx < 256)
 878     return table == NULL ? NULL : &(*table)[idx];
 879
 880   for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
 881     if (ctype->charnames[cnt] == idx)
 882       break;
 883
 884   /* We have to distinguish two cases: the names is found or not.  */
 885   if (cnt == ctype->charnames_act)
 886     {
 887       /* Extend the name array.  */
 888       if (ctype->charnames_act == ctype->charnames_max)
 889         {
 890           ctype->charnames_max *= 2;
 891           ctype->charnames = (unsigned int *)
 892             xrealloc (ctype->charnames,
 893                       sizeof (unsigned int) * ctype->charnames_max);
 894         }
 895       ctype->charnames[ctype->charnames_act++] = idx;
 896     }
 897
 898   if (table == NULL)
 899     /* We have done everything we are asked to do.  */
 900     return NULL;
 901
 902   if (cnt >= *act)
 903     {
 904       if (cnt >= *max)
 905         {
 906           size_t old_max = *max;
 907           do
 908             *max *= 2;
 909           while (*max <= cnt);
 910
 911           *table =
 912             (u_int32_t *) xrealloc (*table, *max * sizeof (unsigned long int));
 913           memset (&(*table)[old_max], '\0',
 914                   (*max - old_max) * sizeof (u_int32_t));
 915         }
 916
 917       (*table)[cnt] = 0;
 918       *act = cnt;
 919     }
 920
 921   return &(*table)[cnt];
 922 }
 923
 924
 925 static void
 926 set_class_defaults (struct locale_ctype_t *ctype, struct charset_t *charset)
 927 {
 928   /* These function defines the default values for the classes and conversions
 929      according to POSIX.2 2.5.2.1.
 930      It may seem that the order of these if-blocks is arbitrary but it is NOT.
 931      Don't move them unless you know what you do!  */
 932
 933   void set_default (int bit, int from, int to)
 934     {
 935       char tmp[2];
 936       int ch;
 937       /* Define string.  */
 938       strcpy (tmp, "?");
 939
 940       for (ch = from; ch <= to; ++ch)
 941         {
 942           unsigned int value;
 943           tmp[0] = ch;
 944
 945           value = charset_find_value (charset, tmp, 1);
 946           if ((wchar_t) value == ILLEGAL_CHAR_VALUE && !be_quiet)
 947             {
 948               error (0, 0, _("\
 949 character `%s' not defined while needed as default value"),
 950                      tmp);
 951               continue;
 952             }
 953           else
 954             ELEM (ctype, class_collection, , value) |= bit;
 955         }
 956     }
 957
 958   /* Set default values if keyword was not present.  */
 959   if ((ctype->class_done & BIT (tok_upper)) == 0)
 960     /* "If this keyword [lower] is not specified, the lowercase letters
 961         `A' through `Z', ..., shall automatically belong to this class,
 962         with implementation defined character values."  [P1003.2, 2.5.2.1]  */
 963     set_default (BIT (tok_upper), 'A', 'Z');
 964
 965   if ((ctype->class_done & BIT (tok_lower)) == 0)
 966     /* "If this keyword [lower] is not specified, the lowercase letters
 967         `a' through `z', ..., shall automatically belong to this class,
 968         with implementation defined character values."  [P1003.2, 2.5.2.1]  */
 969     set_default (BIT (tok_lower), 'a', 'z');
 970
 971   if ((ctype->class_done & BIT (tok_alpha)) == 0)
 972     {
 973       /* Table 2-6 in P1003.2 says that characters in class `upper' or
 974          class `lower' *must* be in class `alpha'.  */
 975       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
 976       size_t cnt;
 977
 978       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
 979         if ((ctype->class_collection[cnt] & mask) != 0)
 980           ctype->class_collection[cnt] |= BIT (tok_alpha);
 981     }
 982
 983   if ((ctype->class_done & BIT (tok_digit)) == 0)
 984     /* "If this keyword [digit] is not specified, the digits `0' through
 985         `9', ..., shall automatically belong to this class, with
 986         implementation-defined character values."  [P1003.2, 2.5.2.1]  */
 987     set_default (BIT (tok_digit), '0', '9');
 988
 989   /* "Only characters specified for the `alpha' and `digit' keyword
 990      shall be specified.  Characters specified for the keyword `alpha'
 991      and `digit' are automatically included in this class.  */
 992   {
 993     unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
 994     size_t cnt;
 995
 996     for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
 997       if ((ctype->class_collection[cnt] & mask) != 0)
 998         ctype->class_collection[cnt] |= BIT (tok_alnum);
 999   }
1000
1001   if ((ctype->class_done & BIT (tok_space)) == 0)
1002     /* "If this keyword [space] is not specified, the characters <space>,
1003         <form-feed>, <newline>, <carriage-return>, <tab>, and
1004         <vertical-tab>, ..., shall automatically belong to this class,
1005         with implementation-defined character values."  [P1003.2, 2.5.2.1]  */
1006     {
1007       unsigned int value;
1008
1009       value = charset_find_value (charset, "space", 5);
1010       if ((wchar_t) value == ILLEGAL_CHAR_VALUE && !be_quiet)
1011         error (0, 0, _("\
1012 character `%s' not defined while needed as default value"),
1013                "<space>");
1014       else
1015         ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1016
1017       value = charset_find_value (charset, "form-feed", 9);
1018       if ((wchar_t) value == ILLEGAL_CHAR_VALUE && !be_quiet)
1019         error (0, 0, _("\
1020 character `%s' not defined while needed as default value"),
1021                "<form-feed>");
1022       else
1023         ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1024
1025       value = charset_find_value (charset, "newline", 7);
1026       if ((wchar_t) value == ILLEGAL_CHAR_VALUE && !be_quiet)
1027         error (0, 0, _("\
1028 character `%s' not defined while needed as default value"),
1029                "<newline>");
1030       else
1031         ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1032
1033       value = charset_find_value (charset, "carriage-return", 15);
1034       if ((wchar_t) value == ILLEGAL_CHAR_VALUE && !be_quiet)
1035         error (0, 0, _("\
1036 character `%s' not defined while needed as default value"),
1037                "<carriage-return>");
1038       else
1039         ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1040
1041       value = charset_find_value (charset, "tab", 3);
1042       if ((wchar_t) value == ILLEGAL_CHAR_VALUE && !be_quiet)
1043         error (0, 0, _("\
1044 character `%s' not defined while needed as default value"),
1045                "<tab>");
1046       else
1047         ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1048
1049       value = charset_find_value (charset, "vertical-tab", 12);
1050       if ((wchar_t) value == ILLEGAL_CHAR_VALUE && !be_quiet)
1051         error (0, 0, _("\
1052 character `%s' not defined while needed as default value"),
1053                "<vertical-tab>");
1054       else
1055         ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1056     }
1057
1058   if ((ctype->class_done & BIT (tok_xdigit)) == 0)
1059     /* "If this keyword is not specified, the digits `0' to `9', the
1060         uppercase letters `A' through `F', and the lowercase letters `a'
1061         through `f', ..., shell automatically belong to this class, with
1062         implementation defined character values."  [P1003.2, 2.5.2.1]  */
1063     {
1064       set_default (BIT (tok_xdigit), '0', '9');
1065       set_default (BIT (tok_xdigit), 'A', 'F');
1066       set_default (BIT (tok_xdigit), 'a', 'f');
1067     }
1068
1069   if ((ctype->class_done & BIT (tok_blank)) == 0)
1070     /* "If this keyword [blank] is unspecified, the characters <space> and
1071        <tab> shall belong to this character class."  [P1003.2, 2.5.2.1]  */
1072    {
1073       unsigned int value;
1074
1075       value = charset_find_value (charset, "space", 5);
1076       if ((wchar_t) value == ILLEGAL_CHAR_VALUE && !be_quiet)
1077         error (0, 0, _("\
1078 character `%s' not defined while needed as default value"),
1079                "<space>");
1080       else
1081         ELEM (ctype, class_collection, , value) |= BIT (tok_blank);
1082
1083       value = charset_find_value (charset, "tab", 3);
1084       if ((wchar_t) value == ILLEGAL_CHAR_VALUE && !be_quiet)
1085         error (0, 0, _("\
1086 character `%s' not defined while needed as default value"),
1087                "<tab>");
1088       else
1089         ELEM (ctype, class_collection, , value) |= BIT (tok_blank);
1090     }
1091
1092   if ((ctype->class_done & BIT (tok_graph)) == 0)
1093     /* "If this keyword [graph] is not specified, characters specified for
1094         the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
1095         shall belong to this character class."  [P1003.2, 2.5.2.1]  */
1096     {
1097       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
1098         BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
1099       size_t cnt;
1100
1101       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
1102         if ((ctype->class_collection[cnt] & mask) != 0)
1103           ctype->class_collection[cnt] |= BIT (tok_graph);
1104     }
1105
1106   if ((ctype->class_done & BIT (tok_print)) == 0)
1107     /* "If this keyword [print] is not provided, characters specified for
1108         the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
1109         and the <space> character shall belong to this character class."
1110         [P1003.2, 2.5.2.1]  */
1111     {
1112       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
1113         BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
1114       size_t cnt;
1115       wchar_t space;
1116
1117       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
1118         if ((ctype->class_collection[cnt] & mask) != 0)
1119           ctype->class_collection[cnt] |= BIT (tok_print);
1120
1121       space = charset_find_value (charset, "space", 5);
1122       if (space == ILLEGAL_CHAR_VALUE && !be_quiet)
1123         error (0, 0, _("\
1124 character `%s' not defined while needed as default value"),
1125                "<space>");
1126       else
1127         ELEM (ctype, class_collection, , space) |= BIT (tok_print);
1128     }
1129
1130   if (ctype->toupper_done == 0)
1131     /* "If this keyword [toupper] is not specified, the lowercase letters
1132         `a' through `z', and their corresponding uppercase letters `A' to
1133         `Z', ..., shall automatically be included, with implementation-
1134         defined character values."  [P1003.2, 2.5.2.1]  */
1135     {
1136       char tmp[4];
1137       int ch;
1138
1139       strcpy (tmp, "<?>");
1140
1141       for (ch = 'a'; ch <= 'z'; ++ch)
1142         {
1143           unsigned int value_from, value_to;
1144
1145           tmp[1] = (char) ch;
1146
1147           value_from = charset_find_value (charset, &tmp[1], 1);
1148           if ((wchar_t) value_from == ILLEGAL_CHAR_VALUE && !be_quiet)
1149             {
1150               error (0, 0, _("\
1151 character `%s' not defined while needed as default value"),
1152                      tmp);
1153               continue;
1154             }
1155
1156           /* This conversion is implementation defined.  */
1157           tmp[1] = (char) (ch + ('A' - 'a'));
1158           value_to = charset_find_value (charset, &tmp[1], 1);
1159           if ((wchar_t) value_to == ILLEGAL_CHAR_VALUE && !be_quiet)
1160             {
1161               error (0, 0, _("\
1162 character `%s' not defined while needed as default value"),
1163                      tmp);
1164               continue;
1165             }
1166
1167           /* The index [0] is determined by the order of the
1168              `ctype_map_newP' calls in `ctype_startup'.  */
1169           ELEM (ctype, map_collection, [0], value_from) = value_to;
1170         }
1171     }
1172
1173   if (ctype->tolower_done == 0)
1174     /* "If this keyword [tolower] is not specified, the mapping shall be
1175        the reverse mapping of the one specified to `toupper'."  [P1003.2]  */
1176     {
1177       size_t cnt;
1178
1179       for (cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
1180         if (ctype->map_collection[0][cnt] != 0)
1181           ELEM (ctype, map_collection, [1],
1182                 ctype->map_collection[0][cnt])
1183             = ctype->charnames[cnt];
1184     }
1185 }
1186
1187
1188 static void
1189 allocate_arrays (struct locale_ctype_t *ctype, struct charset_t *charset)
1190 {
1191   size_t idx;
1192
1193   /* First we have to decide how we organize the arrays.  It is easy
1194      for a one-byte character set.  But multi-byte character set
1195      cannot be stored flat because the chars might be sparsely used.
1196      So we determine an optimal hashing function for the used
1197      characters.
1198
1199      We use a very trivial hashing function to store the sparse
1200      table.  CH % TABSIZE is used as an index.  To solve multiple hits
1201      we have N planes.  This guarantees a fixed search time for a
1202      character [N / 2].  In the following code we determine the minmum
1203      value for TABSIZE * N, where TABSIZE >= 256.  */
1204   size_t min_total = UINT_MAX;
1205   size_t act_size = 256;
1206
1207   if (!be_quiet)
1208     fputs (_("\
1209 Computing table size for character classes might take a while..."),
1210            stderr);
1211
1212   while (act_size < min_total)
1213     {
1214       size_t cnt[act_size];
1215       size_t act_planes = 1;
1216
1217       memset (cnt, '\0', sizeof cnt);
1218
1219       for (idx = 0; idx < 256; ++idx)
1220         cnt[idx] = 1;
1221
1222       for (idx = 0; idx < ctype->charnames_act; ++idx)
1223         if (ctype->charnames[idx] >= 256)
1224           {
1225             size_t nr = ctype->charnames[idx] % act_size;
1226
1227             if (++cnt[nr] > act_planes)
1228               {
1229                 act_planes = cnt[nr];
1230                 if (act_size * act_planes >= min_total)
1231                   break;
1232               }
1233           }
1234
1235       if (act_size * act_planes < min_total)
1236         {
1237           min_total = act_size * act_planes;
1238           ctype->plane_size = act_size;
1239           ctype->plane_cnt = act_planes;
1240         }
1241
1242       ++act_size;
1243     }
1244
1245   if (!be_quiet)
1246     fputs (_(" done\n"), stderr);
1247
1248
1249 #if __BYTE_ORDER == __LITTLE_ENDIAN
1250 # define NAMES_B1 ctype->names_el
1251 # define NAMES_B2 ctype->names_eb
1252 #else
1253 # define NAMES_B1 ctype->names_eb
1254 # define NAMES_B2 ctype->names_el
1255 #endif
1256
1257   ctype->names_eb = (u_int32_t *) xcalloc (ctype->plane_size
1258                                            * ctype->plane_cnt,
1259                                            sizeof (u_int32_t));
1260   ctype->names_el = (u_int32_t *) xcalloc (ctype->plane_size
1261                                            * ctype->plane_cnt,
1262                                            sizeof (u_int32_t));
1263
1264   for (idx = 1; idx < 256; ++idx)
1265     NAMES_B1[idx] = idx;
1266
1267   /* Trick: change the 0th entry's name to 1 to mark the cell occupied.  */
1268   NAMES_B1[0] = 1;
1269
1270   for (idx = 256; idx < ctype->charnames_act; ++idx)
1271     {
1272       size_t nr = (ctype->charnames[idx] % ctype->plane_size);
1273       size_t depth = 0;
1274
1275       while (NAMES_B1[nr + depth * ctype->plane_size])
1276         ++depth;
1277       assert (depth < ctype->plane_cnt);
1278
1279       NAMES_B1[nr + depth * ctype->plane_size] = ctype->charnames[idx];
1280
1281       /* Now for faster access remember the index in the NAMES_B array.  */
1282       ctype->charnames[idx] = nr + depth * ctype->plane_size;
1283     }
1284   NAMES_B1[0] = 0;
1285
1286   for (idx = 0; idx < ctype->plane_size * ctype->plane_cnt; ++idx)
1287     NAMES_B2[idx] = SWAPU32 (NAMES_B1[idx]);
1288
1289
1290   /* You wonder about this amount of memory?  This is only because some
1291      users do not manage to address the array with unsigned values or
1292      data types with range >= 256.  '\200' would result in the array
1293      index -128.  To help these poor people we duplicate the entries for
1294      128 up to 255 below the entry for \0.  */
1295   ctype->ctype_b = (char_class_t *) xcalloc (256 + 128,
1296                                              sizeof (char_class_t));
1297   ctype->ctype32_b = (char_class32_t *) xcalloc (ctype->plane_size
1298                                                  * ctype->plane_cnt,
1299                                                  sizeof (char_class32_t));
1300
1301   /* Fill in the character class information.  */
1302 #if __BYTE_ORDER == __LITTLE_ENDIAN
1303 # define TRANS(w) CHAR_CLASS_TRANS (w)
1304 # define TRANS32(w) CHAR_CLASS32_TRANS (w)
1305 #else
1306 # define TRANS(w) (w)
1307 # define TRANS32(w) (w)
1308 #endif
1309
1310   for (idx = 0; idx < ctype->class_collection_act; ++idx)
1311     if (ctype->charnames[idx] < 256)
1312       ctype->ctype_b[128 + ctype->charnames[idx]]
1313         = TRANS (ctype->class_collection[idx]);
1314
1315   /* Mirror first 127 entries.  We must take care that entry -1 is not
1316      mirrored because EOF == -1.  */
1317   for (idx = 0; idx < 127; ++idx)
1318     ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
1319
1320   /* The 32 bit array contains all characters.  */
1321   for (idx = 0; idx < ctype->class_collection_act; ++idx)
1322     ctype->ctype32_b[ctype->charnames[idx]]
1323       = TRANS32 (ctype->class_collection[idx]);
1324
1325   /* Room for table of mappings.  */
1326   ctype->map_eb = (u_int32_t **) xmalloc (ctype->map_collection_nr
1327                                           * sizeof (u_int32_t *));
1328   ctype->map_el = (u_int32_t **) xmalloc (ctype->map_collection_nr
1329                                           * sizeof (u_int32_t *));
1330
1331   /* Fill in all mappings.  */
1332   for (idx = 0; idx < ctype->map_collection_nr; ++idx)
1333     {
1334       unsigned int idx2;
1335
1336       /* Allocate table.  */
1337       ctype->map_eb[idx] = (u_int32_t *) xmalloc ((ctype->plane_size
1338                                                    * ctype->plane_cnt + 128)
1339                                                   * sizeof (u_int32_t));
1340       ctype->map_el[idx] = (u_int32_t *) xmalloc ((ctype->plane_size
1341                                                    * ctype->plane_cnt + 128)
1342                                                   * sizeof (u_int32_t));
1343
1344 #if __BYTE_ORDER == __LITTLE_ENDIAN
1345 # define MAP_B1 ctype->map_el
1346 # define MAP_B2 ctype->map_eb
1347 #else
1348 # define MAP_B1 ctype->map_eb
1349 # define MAP_B2 ctype->map_el
1350 #endif
1351
1352       /* Copy default value (identity mapping).  */
1353       memcpy (&MAP_B1[idx][128], NAMES_B1,
1354               ctype->plane_size * ctype->plane_cnt * sizeof (u_int32_t));
1355
1356       /* Copy values from collection.  */
1357       for (idx2 = 0; idx2 < ctype->map_collection_act[idx]; ++idx2)
1358         if (ctype->map_collection[idx][idx2] != 0)
1359           MAP_B1[idx][128 + ctype->charnames[idx2]] =
1360             ctype->map_collection[idx][idx2];
1361
1362       /* Mirror first 127 entries.  We must take care not to map entry
1363          -1 because EOF == -1.  */
1364       for (idx2 = 0; idx2 < 127; ++idx2)
1365         MAP_B1[idx][idx2] = MAP_B1[idx][256 + idx2];
1366
1367       /* EOF must map to EOF.  */
1368       MAP_B1[idx][127] = EOF;
1369
1370       /* And now the other byte order.  */
1371       for (idx2 = 0; idx2 < ctype->plane_size * ctype->plane_cnt + 128; ++idx2)
1372         MAP_B2[idx][idx2] = SWAPU32 (MAP_B1[idx][idx2]);
1373     }
1374
1375   /* Extra array for class and map names.  */
1376   ctype->class_name_ptr = (u_int32_t *) xmalloc (ctype->nr_charclass
1377                                                  * sizeof (u_int32_t));
1378   ctype->map_name_ptr = (u_int32_t *) xmalloc (ctype->map_collection_nr
1379                                                * sizeof (u_int32_t));
1380
1381   /* Array for width information.  Because the expected width are very
1382      small we use only one single byte.  This save space and we need
1383      not provide the information twice with both endianesses.  */
1384   ctype->width = (unsigned char *) xmalloc (ctype->plane_size
1385                                             * ctype->plane_cnt);
1386   /* Initialize with default width value.  */
1387   memset (ctype->width, charset->width_default,
1388           ctype->plane_size * ctype->plane_cnt);
1389   if (charset->width_rules != NULL)
1390     {
1391       size_t cnt;
1392
1393       for (cnt = 0; cnt < charset->nwidth_rules; ++cnt)
1394         if (charset->width_rules[cnt].width != charset->width_default)
1395           for (idx = charset->width_rules[cnt].from;
1396                idx <= charset->width_rules[cnt].to; ++idx)
1397             {
1398               size_t nr = idx % ctype->plane_size;
1399               size_t depth = 0;
1400
1401               while (NAMES_B1[nr + depth * ctype->plane_size] != nr)
1402                 ++depth;
1403               assert (depth < ctype->plane_cnt);
1404
1405               ctype->width[nr + depth * ctype->plane_size]
1406                 = charset->width_rules[cnt].width;
1407             }
1408     }
1409
1410   /* Compute MB_CUR_MAX.  Please note the value mb_cur_max in the
1411      character set definition gives the number of bytes in the wide
1412      character representation.  We compute the number of bytes used
1413      for the UTF-8 encoded form.  */
1414   ctype->mb_cur_max = ((int []) { 2, 3, 5, 6 }) [charset->mb_cur_max - 1];
1415
1416   /* We need the name of the currently used 8-bit character set to
1417      make correct conversion between this 8-bit representation and the
1418      ISO 10646 character set used internally for wide characters.  */
1419   ctype->codeset_name = charset->code_set_name;
1420 }