Update.
[glibc.git] / locale / programs / ld-ctype.c
blob3c0c8e870d3c146c08822a7460bfdd2979ac8037
1 /* Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
20 #ifdef HAVE_CONFIG_H
21 # include <config.h>
22 #endif
24 #include <alloca.h>
25 #include <endian.h>
26 #include <limits.h>
27 #include <string.h>
29 #include "locales.h"
30 #include "localeinfo.h"
31 #include "langinfo.h"
32 #include "locfile-token.h"
33 #include "stringtrans.h"
35 /* Uncomment the following line in the production version. */
36 /* define NDEBUG 1 */
37 #include <assert.h>
40 void *xmalloc (size_t __n);
41 void *xcalloc (size_t __n, size_t __s);
42 void *xrealloc (void *__ptr, size_t __n);
45 /* The bit used for representing a special class. */
46 #define BITPOS(class) ((class) - tok_upper)
47 #define BIT(class) (1 << BITPOS (class))
49 #define ELEM(ctype, collection, idx, value) \
50 *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \
51 &ctype->collection##_act idx, value)
53 #define SWAPU32(w) \
54 (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24))
56 #define SWAPU16(w) \
57 ((((w) >> 8) & 0xff) | (((w) & 0xff) << 8))
60 /* To be compatible with former implementations we for now restrict
61 the number of bits for character classes to 16. When compatibility
62 is not necessary anymore increase the number to 32. */
63 #define char_class_t u_int16_t
64 #define CHAR_CLASS_TRANS SWAPU16
65 #define char_class32_t u_int32_t
66 #define CHAR_CLASS32_TRANS SWAPU32
69 /* The real definition of the struct for the LC_CTYPE locale. */
70 struct locale_ctype_t
72 unsigned int *charnames;
73 size_t charnames_max;
74 size_t charnames_act;
76 /* We will allow up to 8 * sizeof(u_int32_t) - 1 character classes. */
77 #define MAX_NR_CHARCLASS (8 * sizeof (u_int32_t) - 1)
78 size_t nr_charclass;
79 const char *classnames[MAX_NR_CHARCLASS];
80 unsigned long int current_class_mask;
81 unsigned int last_class_char;
82 u_int32_t *class_collection;
83 size_t class_collection_max;
84 size_t class_collection_act;
85 unsigned long int class_done;
87 /* If the following number ever turns out to be too small simply
88 increase it. But I doubt it will. --drepper@gnu */
89 #define MAX_NR_CHARMAP 16
90 const char *mapnames[MAX_NR_CHARMAP];
91 u_int32_t *map_collection[MAX_NR_CHARMAP];
92 size_t map_collection_max[MAX_NR_CHARMAP];
93 size_t map_collection_act[MAX_NR_CHARMAP];
94 size_t map_collection_nr;
95 size_t last_map_idx;
96 unsigned int from_map_char;
97 int toupper_done;
98 int tolower_done;
100 /* The arrays for the binary representation. */
101 u_int32_t plane_size;
102 u_int32_t plane_cnt;
103 char_class_t *ctype_b;
104 char_class32_t *ctype32_b;
105 u_int32_t *names_el;
106 u_int32_t *names_eb;
107 u_int32_t **map_eb;
108 u_int32_t **map_el;
109 u_int32_t *class_name_ptr;
110 u_int32_t *map_name_ptr;
111 unsigned char *width;
112 u_int32_t mb_cur_max;
113 const char *codeset_name;
117 /* Prototypes for local functions. */
118 static void ctype_class_newP (struct linereader *lr,
119 struct locale_ctype_t *ctype, const char *name);
120 static void ctype_map_newP (struct linereader *lr,
121 struct locale_ctype_t *ctype,
122 const char *name, struct charset_t *charset);
123 static u_int32_t *find_idx (struct locale_ctype_t *ctype, u_int32_t **table,
124 size_t *max, size_t *act, unsigned int idx);
125 static void set_class_defaults (struct locale_ctype_t *ctype,
126 struct charset_t *charset);
127 static void allocate_arrays (struct locale_ctype_t *ctype,
128 struct charset_t *charset);
131 void
132 ctype_startup (struct linereader *lr, struct localedef_t *locale,
133 struct charset_t *charset)
135 unsigned int cnt;
136 struct locale_ctype_t *ctype;
138 /* We have a definition for LC_CTYPE. */
139 copy_posix.mask &= ~(1 << LC_CTYPE);
141 /* It is important that we always use UCS1 encoding for strings now. */
142 encoding_method = ENC_UCS1;
144 /* Allocate the needed room. */
145 locale->categories[LC_CTYPE].ctype = ctype =
146 (struct locale_ctype_t *) xmalloc (sizeof (struct locale_ctype_t));
148 /* We have no names seen yet. */
149 ctype->charnames_max = charset->mb_cur_max == 1 ? 256 : 512;
150 ctype->charnames =
151 (unsigned int *) xmalloc (ctype->charnames_max * sizeof (unsigned int));
152 for (cnt = 0; cnt < 256; ++cnt)
153 ctype->charnames[cnt] = cnt;
154 ctype->charnames_act = 256;
156 /* Fill character class information. */
157 ctype->nr_charclass = 0;
158 ctype->current_class_mask = 0;
159 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
160 /* The order of the following instructions determines the bit
161 positions! */
162 ctype_class_newP (lr, ctype, "upper");
163 ctype_class_newP (lr, ctype, "lower");
164 ctype_class_newP (lr, ctype, "alpha");
165 ctype_class_newP (lr, ctype, "digit");
166 ctype_class_newP (lr, ctype, "xdigit");
167 ctype_class_newP (lr, ctype, "space");
168 ctype_class_newP (lr, ctype, "print");
169 ctype_class_newP (lr, ctype, "graph");
170 ctype_class_newP (lr, ctype, "blank");
171 ctype_class_newP (lr, ctype, "cntrl");
172 ctype_class_newP (lr, ctype, "punct");
173 ctype_class_newP (lr, ctype, "alnum");
175 ctype->class_collection_max = charset->mb_cur_max == 1 ? 256 : 512;
176 ctype->class_collection
177 = (u_int32_t *) xmalloc (sizeof (unsigned long int)
178 * ctype->class_collection_max);
179 memset (ctype->class_collection, '\0',
180 sizeof (unsigned long int) * ctype->class_collection_max);
181 ctype->class_collection_act = 256;
183 /* Fill character map information. */
184 ctype->map_collection_nr = 0;
185 ctype->last_map_idx = MAX_NR_CHARMAP;
186 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
187 ctype_map_newP (lr, ctype, "toupper", charset);
188 ctype_map_newP (lr, ctype, "tolower", charset);
190 /* Fill first 256 entries in `toupper' and `tolower' arrays. */
191 for (cnt = 0; cnt < 256; ++cnt)
193 ctype->map_collection[0][cnt] = cnt;
194 ctype->map_collection[1][cnt] = cnt;
199 void
200 ctype_finish (struct localedef_t *locale, struct charset_t *charset)
202 /* See POSIX.2, table 2-6 for the meaning of the following table. */
203 #define NCLASS 12
204 static const struct
206 const char *name;
207 const char allow[NCLASS];
209 valid_table[NCLASS] =
211 /* The order is important. See token.h for more information.
212 M = Always, D = Default, - = Permitted, X = Mutually exclusive */
213 { "upper", "--MX-XDDXXX-" },
214 { "lower", "--MX-XDDXXX-" },
215 { "alpha", "---X-XDDXXX-" },
216 { "digit", "XXX--XDDXXX-" },
217 { "xdigit", "-----XDDXXX-" },
218 { "space", "XXXXX------X" },
219 { "print", "---------X--" },
220 { "graph", "---------X--" },
221 { "blank", "XXXXXM-----X" },
222 { "cntrl", "XXXXX-XX--XX" },
223 { "punct", "XXXXX-DD-X-X" },
224 { "alnum", "-----XDDXXX-" }
226 size_t cnt;
227 int cls1, cls2;
228 unsigned int space_value;
229 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
231 /* Set default value for classes not specified. */
232 set_class_defaults (ctype, charset);
234 /* Check according to table. */
235 for (cnt = 0; cnt < ctype->class_collection_max; ++cnt)
237 unsigned long int tmp;
239 tmp = ctype->class_collection[cnt];
240 if (tmp == 0)
241 continue;
243 for (cls1 = 0; cls1 < NCLASS; ++cls1)
244 if ((tmp & (1 << cls1)) != 0)
245 for (cls2 = 0; cls2 < NCLASS; ++cls2)
246 if (valid_table[cls1].allow[cls2] != '-')
248 int eq = (tmp & (1 << cls2)) != 0;
249 switch (valid_table[cls1].allow[cls2])
251 case 'M':
252 if (!eq)
254 char buf[17];
255 char *cp = buf;
256 unsigned int value;
258 value = ctype->charnames[cnt];
260 if ((value & 0xff000000) != 0)
261 cp += sprintf (cp, "\\%o", (value >> 24) & 0xff);
262 if ((value & 0xffff0000) != 0)
263 cp += sprintf (cp, "\\%o", (value >> 16) & 0xff);
264 if ((value & 0xffffff00) != 0)
265 cp += sprintf (cp, "\\%o", (value >> 8) & 0xff);
266 sprintf (cp, "\\%o", value & 0xff);
268 if (!be_quiet)
269 error (0, 0, _("\
270 character %s'%s' in class `%s' must be in class `%s'"), value > 256 ? "L" : "",
271 cp, valid_table[cls1].name,
272 valid_table[cls2].name);
274 break;
276 case 'X':
277 if (eq)
279 char buf[17];
280 char *cp = buf;
281 unsigned int value;
283 value = ctype->charnames[cnt];
285 if ((value & 0xff000000) != 0)
286 cp += sprintf (cp, "\\%o", value >> 24);
287 if ((value & 0xffff0000) != 0)
288 cp += sprintf (cp, "\\%o", (value >> 16) & 0xff);
289 if ((value & 0xffffff00) != 0)
290 cp += sprintf (cp, "\\%o", (value >> 8) & 0xff);
291 sprintf (cp, "\\%o", value & 0xff);
293 if (!be_quiet)
294 error (0, 0, _("\
295 character %s'%s' in class `%s' must not be in class `%s'"),
296 value > 256 ? "L" : "", cp,
297 valid_table[cls1].name,
298 valid_table[cls2].name);
300 break;
302 case 'D':
303 ctype->class_collection[cnt] |= 1 << cls2;
304 break;
306 default:
307 error (5, 0, _("internal error in %s, line %u"),
308 __FUNCTION__, __LINE__);
313 /* ... and now test <SP> as a special case. */
314 space_value = charset_find_value (&charset->char_table, "SP", 2);
315 if ((wchar_t) space_value == ILLEGAL_CHAR_VALUE)
316 space_value = charset_find_value (&charset->char_table, "space", 5);
317 if ((wchar_t) space_value == ILLEGAL_CHAR_VALUE)
319 if (!be_quiet)
320 error (0, 0, _("character <SP> not defined in character map"));
322 else if (((cnt = BITPOS (tok_space),
323 (ELEM (ctype, class_collection, , space_value)
324 & BIT (tok_space)) == 0)
325 || (cnt = BITPOS (tok_blank),
326 (ELEM (ctype, class_collection, , space_value)
327 & BIT (tok_blank)) == 0)))
329 if (!be_quiet)
330 error (0, 0, _("<SP> character not in class `%s'"),
331 valid_table[cnt].name);
333 else if (((cnt = BITPOS (tok_punct),
334 (ELEM (ctype, class_collection, , space_value)
335 & BIT (tok_punct)) != 0)
336 || (cnt = BITPOS (tok_graph),
337 (ELEM (ctype, class_collection, , space_value)
338 & BIT (tok_graph))
339 != 0)))
341 if (!be_quiet)
342 error (0, 0, _("<SP> character must not be in class `%s'"),
343 valid_table[cnt].name);
345 else
346 ELEM (ctype, class_collection, , space_value) |= BIT (tok_print);
348 /* Now that the tests are done make sure the name array contains all
349 characters which are handled in the WIDTH section of the
350 character set definition file. */
351 if (charset->width_rules != NULL)
352 for (cnt = 0; cnt < charset->nwidth_rules; ++cnt)
354 size_t inner;
355 for (inner = charset->width_rules[cnt].from;
356 inner <= charset->width_rules[cnt].to; ++inner)
357 (void) find_idx (ctype, NULL, NULL, NULL, inner);
362 void
363 ctype_output (struct localedef_t *locale, struct charset_t *charset,
364 const char *output_path)
366 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
367 const size_t nelems = (_NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)
368 + 2 * (ctype->map_collection_nr - 2));
369 struct iovec iov[2 + nelems + ctype->nr_charclass
370 + ctype->map_collection_nr];
371 struct locale_file data;
372 u_int32_t idx[nelems];
373 size_t elem, cnt, offset, total;
376 if ((locale->binary & (1 << LC_CTYPE)) != 0)
378 iov[0].iov_base = ctype;
379 iov[0].iov_len = locale->len[LC_CTYPE];
381 write_locale_data (output_path, "LC_CTYPE", 1, iov);
383 return;
387 /* Now prepare the output: Find the sizes of the table we can use. */
388 allocate_arrays (ctype, charset);
390 data.magic = LIMAGIC (LC_CTYPE);
391 data.n = nelems;
392 iov[0].iov_base = (void *) &data;
393 iov[0].iov_len = sizeof (data);
395 iov[1].iov_base = (void *) idx;
396 iov[1].iov_len = sizeof (idx);
398 idx[0] = iov[0].iov_len + iov[1].iov_len;
399 offset = 0;
401 for (elem = 0; elem < nelems; ++elem)
403 if (elem < _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE))
404 switch (elem)
406 #define CTYPE_DATA(name, base, len) \
407 case _NL_ITEM_INDEX (name): \
408 iov[2 + elem + offset].iov_base = (base); \
409 iov[2 + elem + offset].iov_len = (len); \
410 if (elem + 1 < nelems) \
411 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; \
412 break
414 CTYPE_DATA (_NL_CTYPE_CLASS,
415 ctype->ctype_b,
416 (256 + 128) * sizeof (char_class_t));
418 CTYPE_DATA (_NL_CTYPE_TOUPPER_EB,
419 ctype->map_eb[0],
420 (ctype->plane_size * ctype->plane_cnt + 128)
421 * sizeof (u_int32_t));
422 CTYPE_DATA (_NL_CTYPE_TOLOWER_EB,
423 ctype->map_eb[1],
424 (ctype->plane_size * ctype->plane_cnt + 128)
425 * sizeof (u_int32_t));
427 CTYPE_DATA (_NL_CTYPE_TOUPPER_EL,
428 ctype->map_el[0],
429 (ctype->plane_size * ctype->plane_cnt + 128)
430 * sizeof (u_int32_t));
431 CTYPE_DATA (_NL_CTYPE_TOLOWER_EL,
432 ctype->map_el[1],
433 (ctype->plane_size * ctype->plane_cnt + 128)
434 * sizeof (u_int32_t));
436 CTYPE_DATA (_NL_CTYPE_CLASS32,
437 ctype->ctype32_b,
438 (ctype->plane_size * ctype->plane_cnt
439 * sizeof (char_class32_t)));
441 CTYPE_DATA (_NL_CTYPE_NAMES_EB,
442 ctype->names_eb, (ctype->plane_size * ctype->plane_cnt
443 * sizeof (u_int32_t)));
444 CTYPE_DATA (_NL_CTYPE_NAMES_EL,
445 ctype->names_el, (ctype->plane_size * ctype->plane_cnt
446 * sizeof (u_int32_t)));
448 CTYPE_DATA (_NL_CTYPE_HASH_SIZE,
449 &ctype->plane_size, sizeof (u_int32_t));
450 CTYPE_DATA (_NL_CTYPE_HASH_LAYERS,
451 &ctype->plane_cnt, sizeof (u_int32_t));
453 case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
454 /* The class name array. */
455 total = 0;
456 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt, ++offset)
458 iov[2 + elem + offset].iov_base
459 = (void *) ctype->classnames[cnt];
460 iov[2 + elem + offset].iov_len
461 = strlen (ctype->classnames[cnt]) + 1;
462 total += iov[2 + elem + offset].iov_len;
464 iov[2 + elem + offset].iov_base = (void *) "\0\0\0";
465 iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
466 total += 1 + (4 - ((total + 1) % 4));
468 if (elem + 1 < nelems)
469 idx[elem + 1] = idx[elem] + total;
470 break;
472 case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
473 /* The class name array. */
474 total = 0;
475 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt, ++offset)
477 iov[2 + elem + offset].iov_base
478 = (void *) ctype->mapnames[cnt];
479 iov[2 + elem + offset].iov_len
480 = strlen (ctype->mapnames[cnt]) + 1;
481 total += iov[2 + elem + offset].iov_len;
483 iov[2 + elem + offset].iov_base = (void *) "\0\0\0";
484 iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
485 total += 1 + (4 - ((total + 1) % 4));
487 if (elem + 1 < nelems)
488 idx[elem + 1] = idx[elem] + total;
489 break;
491 CTYPE_DATA (_NL_CTYPE_WIDTH,
492 ctype->width, ctype->plane_size * ctype->plane_cnt);
494 CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX,
495 &ctype->mb_cur_max, sizeof (u_int32_t));
497 case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
498 total = strlen (ctype->codeset_name) + 1;
499 if (total % 4 == 0)
500 iov[2 + elem + offset].iov_base = (char *) ctype->codeset_name;
501 else
503 iov[2 + elem + offset].iov_base = alloca ((total + 3) & ~3);
504 memset (mempcpy (iov[2 + elem + offset].iov_base,
505 ctype->codeset_name, total),
506 '\0', 4 - (total & 3));
507 total = (total + 3) & ~3;
509 iov[2 + elem + offset].iov_len = total;
510 if (elem + 1 < nelems)
511 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
512 break;
514 default:
515 assert (! "unknown CTYPE element");
517 else
519 /* Handle extra maps. */
520 size_t nr = (elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)) >> 1;
522 if (((elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)) & 1) == 0)
523 iov[2 + elem + offset].iov_base = ctype->map_eb[nr];
524 else
525 iov[2 + elem + offset].iov_base = ctype->map_el[nr];
527 iov[2 + elem + offset].iov_len = ((ctype->plane_size
528 * ctype->plane_cnt + 128)
529 * sizeof (u_int32_t));
531 if (elem + 1 < nelems)
532 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
536 assert (2 + elem + offset == (nelems + ctype->nr_charclass
537 + ctype->map_collection_nr + 2));
539 write_locale_data (output_path, "LC_CTYPE", 2 + elem + offset, iov);
543 /* Character class handling. */
544 void
545 ctype_class_new (struct linereader *lr, struct localedef_t *locale,
546 enum token_t tok, struct token *code,
547 struct charset_t *charset)
549 ctype_class_newP (lr, locale->categories[LC_CTYPE].ctype,
550 code->val.str.start);
555 ctype_is_charclass (struct linereader *lr, struct localedef_t *locale,
556 const char *name)
558 size_t cnt;
560 for (cnt = 0; cnt < locale->categories[LC_CTYPE].ctype->nr_charclass; ++cnt)
561 if (strcmp (name, locale->categories[LC_CTYPE].ctype->classnames[cnt])
562 == 0)
563 return 1;
565 return 0;
569 void
570 ctype_class_start (struct linereader *lr, struct localedef_t *locale,
571 enum token_t tok, const char *str,
572 struct charset_t *charset)
574 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
575 size_t cnt;
577 switch (tok)
579 case tok_upper:
580 str = "upper";
581 break;
582 case tok_lower:
583 str = "lower";
584 break;
585 case tok_alpha:
586 str = "alpha";
587 break;
588 case tok_digit:
589 str = "digit";
590 break;
591 case tok_xdigit:
592 str = "xdigit";
593 break;
594 case tok_space:
595 str = "space";
596 break;
597 case tok_print:
598 str = "print";
599 break;
600 case tok_graph:
601 str = "graph";
602 break;
603 case tok_blank:
604 str = "blank";
605 break;
606 case tok_cntrl:
607 str = "cntrl";
608 break;
609 case tok_punct:
610 str = "punct";
611 break;
612 case tok_alnum:
613 str = "alnum";
614 break;
615 case tok_ident:
616 break;
617 default:
618 assert (! "illegal token as class name: should not happen");
621 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
622 if (strcmp (str, ctype->classnames[cnt]) == 0)
623 break;
625 if (cnt >= ctype->nr_charclass)
626 assert (! "unknown class in class definition: should not happen");
628 ctype->class_done |= BIT (tok);
630 ctype->current_class_mask = 1 << cnt;
631 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
635 void
636 ctype_class_from (struct linereader *lr, struct localedef_t *locale,
637 struct token *code, struct charset_t *charset)
639 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
640 unsigned int value;
642 value = charset_find_value (&charset->char_table, code->val.str.start,
643 code->val.str.len);
645 ctype->last_class_char = value;
647 if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
648 /* In the LC_CTYPE category it is no error when a character is
649 not found. This has to be ignored silently. */
650 return;
652 *find_idx (ctype, &ctype->class_collection, &ctype->class_collection_max,
653 &ctype->class_collection_act, value)
654 |= ctype->current_class_mask;
658 void
659 ctype_class_to (struct linereader *lr, struct localedef_t *locale,
660 struct token *code, struct charset_t *charset)
662 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
663 unsigned int value, cnt;
665 value = charset_find_value (&charset->char_table, code->val.str.start,
666 code->val.str.len);
668 /* In the LC_CTYPE category it is no error when a character is
669 not found. This has to be ignored silently. */
670 if ((wchar_t) ctype->last_class_char != ILLEGAL_CHAR_VALUE
671 && (wchar_t) value != ILLEGAL_CHAR_VALUE)
672 for (cnt = ctype->last_class_char + 1; cnt <= value; ++cnt)
673 *find_idx (ctype, &ctype->class_collection, &ctype->class_collection_max,
674 &ctype->class_collection_act, cnt)
675 |= ctype->current_class_mask;
677 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
681 void
682 ctype_class_end (struct linereader *lr, struct localedef_t *locale)
684 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
686 /* We have no special actions to perform here. */
687 ctype->current_class_mask = 0;
688 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
692 /* Character map handling. */
693 void
694 ctype_map_new (struct linereader *lr, struct localedef_t *locale,
695 enum token_t tok, struct token *code,
696 struct charset_t *charset)
698 ctype_map_newP (lr, locale->categories[LC_CTYPE].ctype,
699 code->val.str.start, charset);
704 ctype_is_charconv (struct linereader *lr, struct localedef_t *locale,
705 const char *name)
707 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
708 size_t cnt;
710 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
711 if (strcmp (name, ctype->mapnames[cnt]) == 0)
712 return 1;
714 return 0;
718 void
719 ctype_map_start (struct linereader *lr, struct localedef_t *locale,
720 enum token_t tok, const char *name, struct charset_t *charset)
722 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
723 size_t cnt;
725 switch (tok)
727 case tok_toupper:
728 ctype->toupper_done = 1;
729 name = "toupper";
730 break;
731 case tok_tolower:
732 ctype->tolower_done = 1;
733 name = "tolower";
734 break;
735 case tok_ident:
736 break;
737 default:
738 assert (! "unknown token in category `LC_CTYPE' should not happen");
741 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
742 if (strcmp (name, ctype->mapnames[cnt]) == 0)
743 break;
745 if (cnt == ctype->map_collection_nr)
746 assert (! "unknown token in category `LC_CTYPE' should not happen");
748 ctype->last_map_idx = cnt;
749 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
753 void
754 ctype_map_from (struct linereader *lr, struct localedef_t *locale,
755 struct token *code, struct charset_t *charset)
757 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
758 unsigned int value;
760 value = charset_find_value (&charset->char_table, code->val.str.start,
761 code->val.str.len);
763 if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
764 /* In the LC_CTYPE category it is no error when a character is
765 not found. This has to be ignored silently. */
766 return;
768 assert (ctype->last_map_idx < ctype->map_collection_nr);
770 ctype->from_map_char = value;
774 void
775 ctype_map_to (struct linereader *lr, struct localedef_t *locale,
776 struct token *code, struct charset_t *charset)
778 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
779 unsigned int value;
781 value = charset_find_value (&charset->char_table, code->val.str.start,
782 code->val.str.len);
784 if ((wchar_t) ctype->from_map_char == ILLEGAL_CHAR_VALUE
785 || (wchar_t) value == ILLEGAL_CHAR_VALUE)
787 /* In the LC_CTYPE category it is no error when a character is
788 not found. This has to be ignored silently. */
789 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
790 return;
793 *find_idx (ctype, &ctype->map_collection[ctype->last_map_idx],
794 &ctype->map_collection_max[ctype->last_map_idx],
795 &ctype->map_collection_act[ctype->last_map_idx],
796 ctype->from_map_char) = value;
798 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
802 void
803 ctype_map_end (struct linereader *lr, struct localedef_t *locale)
805 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
807 ctype->last_map_idx = MAX_NR_CHARMAP;
808 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
812 /* Local functions. */
813 static void
814 ctype_class_newP (struct linereader *lr, struct locale_ctype_t *ctype,
815 const char *name)
817 size_t cnt;
819 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
820 if (strcmp (ctype->classnames[cnt], name) == 0)
821 break;
823 if (cnt < ctype->nr_charclass)
825 lr_error (lr, _("character class `%s' already defined"), name);
826 return;
829 if (ctype->nr_charclass == MAX_NR_CHARCLASS)
830 /* Exit code 2 is prescribed in P1003.2b. */
831 error (2, 0, _("\
832 implementation limit: no more than %d character classes allowed"),
833 MAX_NR_CHARCLASS);
835 ctype->classnames[ctype->nr_charclass++] = name;
839 static void
840 ctype_map_newP (struct linereader *lr, struct locale_ctype_t *ctype,
841 const char *name, struct charset_t *charset)
843 size_t max_chars = 0;
844 size_t cnt;
846 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
848 if (strcmp (ctype->mapnames[cnt], name) == 0)
849 break;
851 if (max_chars < ctype->map_collection_max[cnt])
852 max_chars = ctype->map_collection_max[cnt];
855 if (cnt < ctype->map_collection_nr)
857 lr_error (lr, _("character map `%s' already defined"), name);
858 return;
861 if (ctype->map_collection_nr == MAX_NR_CHARMAP)
862 /* Exit code 2 is prescribed in P1003.2b. */
863 error (2, 0, _("\
864 implementation limit: no more than %d character maps allowed"),
865 MAX_NR_CHARMAP);
867 ctype->mapnames[cnt] = name;
869 if (max_chars == 0)
870 ctype->map_collection_max[cnt] = charset->mb_cur_max == 1 ? 256 : 512;
871 else
872 ctype->map_collection_max[cnt] = max_chars;
874 ctype->map_collection[cnt] = (u_int32_t *)
875 xmalloc (sizeof (u_int32_t) * ctype->map_collection_max[cnt]);
876 memset (ctype->map_collection[cnt], '\0',
877 sizeof (u_int32_t) * ctype->map_collection_max[cnt]);
878 ctype->map_collection_act[cnt] = 256;
880 ++ctype->map_collection_nr;
884 /* We have to be prepared that TABLE, MAX, and ACT can be NULL. This
885 is possible if we only want ot extend the name array. */
886 static u_int32_t *
887 find_idx (struct locale_ctype_t *ctype, u_int32_t **table, size_t *max,
888 size_t *act, unsigned int idx)
890 size_t cnt;
892 if (idx < 256)
893 return table == NULL ? NULL : &(*table)[idx];
895 for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
896 if (ctype->charnames[cnt] == idx)
897 break;
899 /* We have to distinguish two cases: the names is found or not. */
900 if (cnt == ctype->charnames_act)
902 /* Extend the name array. */
903 if (ctype->charnames_act == ctype->charnames_max)
905 ctype->charnames_max *= 2;
906 ctype->charnames = (unsigned int *)
907 xrealloc (ctype->charnames,
908 sizeof (unsigned int) * ctype->charnames_max);
910 ctype->charnames[ctype->charnames_act++] = idx;
913 if (table == NULL)
914 /* We have done everything we are asked to do. */
915 return NULL;
917 if (cnt >= *act)
919 if (cnt >= *max)
921 size_t old_max = *max;
923 *max *= 2;
924 while (*max <= cnt);
926 *table =
927 (u_int32_t *) xrealloc (*table, *max * sizeof (unsigned long int));
928 memset (&(*table)[old_max], '\0',
929 (*max - old_max) * sizeof (u_int32_t));
932 (*table)[cnt] = 0;
933 *act = cnt;
936 return &(*table)[cnt];
940 static void
941 set_class_defaults (struct locale_ctype_t *ctype, struct charset_t *charset)
943 /* These function defines the default values for the classes and conversions
944 according to POSIX.2 2.5.2.1.
945 It may seem that the order of these if-blocks is arbitrary but it is NOT.
946 Don't move them unless you know what you do! */
948 void set_default (int bit, int from, int to)
950 char tmp[2];
951 int ch;
952 /* Define string. */
953 strcpy (tmp, "?");
955 for (ch = from; ch <= to; ++ch)
957 unsigned int value;
958 tmp[0] = ch;
960 value = charset_find_value (&charset->char_table, tmp, 1);
961 if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
963 if (!be_quiet)
964 error (0, 0, _("\
965 character `%s' not defined while needed as default value"),
966 tmp);
967 continue;
969 else
970 ELEM (ctype, class_collection, , value) |= bit;
974 /* Set default values if keyword was not present. */
975 if ((ctype->class_done & BIT (tok_upper)) == 0)
976 /* "If this keyword [lower] is not specified, the lowercase letters
977 `A' through `Z', ..., shall automatically belong to this class,
978 with implementation defined character values." [P1003.2, 2.5.2.1] */
979 set_default (BIT (tok_upper), 'A', 'Z');
981 if ((ctype->class_done & BIT (tok_lower)) == 0)
982 /* "If this keyword [lower] is not specified, the lowercase letters
983 `a' through `z', ..., shall automatically belong to this class,
984 with implementation defined character values." [P1003.2, 2.5.2.1] */
985 set_default (BIT (tok_lower), 'a', 'z');
987 if ((ctype->class_done & BIT (tok_alpha)) == 0)
989 /* Table 2-6 in P1003.2 says that characters in class `upper' or
990 class `lower' *must* be in class `alpha'. */
991 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
992 size_t cnt;
994 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
995 if ((ctype->class_collection[cnt] & mask) != 0)
996 ctype->class_collection[cnt] |= BIT (tok_alpha);
999 if ((ctype->class_done & BIT (tok_digit)) == 0)
1000 /* "If this keyword [digit] is not specified, the digits `0' through
1001 `9', ..., shall automatically belong to this class, with
1002 implementation-defined character values." [P1003.2, 2.5.2.1] */
1003 set_default (BIT (tok_digit), '0', '9');
1005 /* "Only characters specified for the `alpha' and `digit' keyword
1006 shall be specified. Characters specified for the keyword `alpha'
1007 and `digit' are automatically included in this class. */
1009 unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
1010 size_t cnt;
1012 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
1013 if ((ctype->class_collection[cnt] & mask) != 0)
1014 ctype->class_collection[cnt] |= BIT (tok_alnum);
1017 if ((ctype->class_done & BIT (tok_space)) == 0)
1018 /* "If this keyword [space] is not specified, the characters <space>,
1019 <form-feed>, <newline>, <carriage-return>, <tab>, and
1020 <vertical-tab>, ..., shall automatically belong to this class,
1021 with implementation-defined character values." [P1003.2, 2.5.2.1] */
1023 unsigned int value;
1025 value = charset_find_value (&charset->char_table, "space", 5);
1026 if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
1028 if (!be_quiet)
1029 error (0, 0, _("\
1030 character `%s' not defined while needed as default value"),
1031 "<space>");
1033 else
1034 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1036 value = charset_find_value (&charset->char_table, "form-feed", 9);
1037 if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
1039 if (!be_quiet)
1040 error (0, 0, _("\
1041 character `%s' not defined while needed as default value"),
1042 "<form-feed>");
1044 else
1045 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1047 value = charset_find_value (&charset->char_table, "newline", 7);
1048 if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
1050 if (!be_quiet)
1051 error (0, 0, _("\
1052 character `%s' not defined while needed as default value"),
1053 "<newline>");
1055 else
1056 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1058 value = charset_find_value (&charset->char_table, "carriage-return", 15);
1059 if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
1061 if (!be_quiet)
1062 error (0, 0, _("\
1063 character `%s' not defined while needed as default value"),
1064 "<carriage-return>");
1066 else
1067 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1069 value = charset_find_value (&charset->char_table, "tab", 3);
1070 if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
1072 if (!be_quiet)
1073 error (0, 0, _("\
1074 character `%s' not defined while needed as default value"),
1075 "<tab>");
1077 else
1078 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1080 value = charset_find_value (&charset->char_table, "vertical-tab", 12);
1081 if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
1083 if (!be_quiet)
1084 error (0, 0, _("\
1085 character `%s' not defined while needed as default value"),
1086 "<vertical-tab>");
1088 else
1089 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1092 if ((ctype->class_done & BIT (tok_xdigit)) == 0)
1093 /* "If this keyword is not specified, the digits `0' to `9', the
1094 uppercase letters `A' through `F', and the lowercase letters `a'
1095 through `f', ..., shell automatically belong to this class, with
1096 implementation defined character values." [P1003.2, 2.5.2.1] */
1098 set_default (BIT (tok_xdigit), '0', '9');
1099 set_default (BIT (tok_xdigit), 'A', 'F');
1100 set_default (BIT (tok_xdigit), 'a', 'f');
1103 if ((ctype->class_done & BIT (tok_blank)) == 0)
1104 /* "If this keyword [blank] is unspecified, the characters <space> and
1105 <tab> shall belong to this character class." [P1003.2, 2.5.2.1] */
1107 unsigned int value;
1109 value = charset_find_value (&charset->char_table, "space", 5);
1110 if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
1112 if (!be_quiet)
1113 error (0, 0, _("\
1114 character `%s' not defined while needed as default value"),
1115 "<space>");
1117 else
1118 ELEM (ctype, class_collection, , value) |= BIT (tok_blank);
1120 value = charset_find_value (&charset->char_table, "tab", 3);
1121 if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
1123 if (!be_quiet)
1124 error (0, 0, _("\
1125 character `%s' not defined while needed as default value"),
1126 "<tab>");
1128 else
1129 ELEM (ctype, class_collection, , value) |= BIT (tok_blank);
1132 if ((ctype->class_done & BIT (tok_graph)) == 0)
1133 /* "If this keyword [graph] is not specified, characters specified for
1134 the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
1135 shall belong to this character class." [P1003.2, 2.5.2.1] */
1137 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
1138 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
1139 size_t cnt;
1141 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
1142 if ((ctype->class_collection[cnt] & mask) != 0)
1143 ctype->class_collection[cnt] |= BIT (tok_graph);
1146 if ((ctype->class_done & BIT (tok_print)) == 0)
1147 /* "If this keyword [print] is not provided, characters specified for
1148 the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
1149 and the <space> character shall belong to this character class."
1150 [P1003.2, 2.5.2.1] */
1152 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
1153 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
1154 size_t cnt;
1155 wchar_t space;
1157 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
1158 if ((ctype->class_collection[cnt] & mask) != 0)
1159 ctype->class_collection[cnt] |= BIT (tok_print);
1161 space = charset_find_value (&charset->char_table, "space", 5);
1162 if (space == ILLEGAL_CHAR_VALUE)
1164 if (!be_quiet)
1165 error (0, 0, _("\
1166 character `%s' not defined while needed as default value"),
1167 "<space>");
1169 else
1170 ELEM (ctype, class_collection, , space) |= BIT (tok_print);
1173 if (ctype->toupper_done == 0)
1174 /* "If this keyword [toupper] is not specified, the lowercase letters
1175 `a' through `z', and their corresponding uppercase letters `A' to
1176 `Z', ..., shall automatically be included, with implementation-
1177 defined character values." [P1003.2, 2.5.2.1] */
1179 char tmp[4];
1180 int ch;
1182 strcpy (tmp, "<?>");
1184 for (ch = 'a'; ch <= 'z'; ++ch)
1186 unsigned int value_from, value_to;
1188 tmp[1] = (char) ch;
1190 value_from = charset_find_value (&charset->char_table, &tmp[1], 1);
1191 if ((wchar_t) value_from == ILLEGAL_CHAR_VALUE)
1193 if (!be_quiet)
1194 error (0, 0, _("\
1195 character `%s' not defined while needed as default value"),
1196 tmp);
1197 continue;
1200 /* This conversion is implementation defined. */
1201 tmp[1] = (char) (ch + ('A' - 'a'));
1202 value_to = charset_find_value (&charset->char_table, &tmp[1], 1);
1203 if ((wchar_t) value_to == ILLEGAL_CHAR_VALUE)
1205 if (!be_quiet)
1206 error (0, 0, _("\
1207 character `%s' not defined while needed as default value"),
1208 tmp);
1209 continue;
1212 /* The index [0] is determined by the order of the
1213 `ctype_map_newP' calls in `ctype_startup'. */
1214 ELEM (ctype, map_collection, [0], value_from) = value_to;
1218 if (ctype->tolower_done == 0)
1219 /* "If this keyword [tolower] is not specified, the mapping shall be
1220 the reverse mapping of the one specified to `toupper'." [P1003.2] */
1222 size_t cnt;
1224 for (cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
1225 if (ctype->map_collection[0][cnt] != 0)
1226 ELEM (ctype, map_collection, [1],
1227 ctype->map_collection[0][cnt])
1228 = ctype->charnames[cnt];
1233 static void
1234 allocate_arrays (struct locale_ctype_t *ctype, struct charset_t *charset)
1236 size_t idx;
1238 /* First we have to decide how we organize the arrays. It is easy
1239 for a one-byte character set. But multi-byte character set
1240 cannot be stored flat because the chars might be sparsely used.
1241 So we determine an optimal hashing function for the used
1242 characters.
1244 We use a very trivial hashing function to store the sparse
1245 table. CH % TABSIZE is used as an index. To solve multiple hits
1246 we have N planes. This guarantees a fixed search time for a
1247 character [N / 2]. In the following code we determine the minmum
1248 value for TABSIZE * N, where TABSIZE >= 256. */
1249 size_t min_total = UINT_MAX;
1250 size_t act_size = 256;
1252 if (!be_quiet)
1253 fputs (_("\
1254 Computing table size for character classes might take a while..."),
1255 stderr);
1257 while (act_size < min_total)
1259 size_t cnt[act_size];
1260 size_t act_planes = 1;
1262 memset (cnt, '\0', sizeof cnt);
1264 for (idx = 0; idx < 256; ++idx)
1265 cnt[idx] = 1;
1267 for (idx = 0; idx < ctype->charnames_act; ++idx)
1268 if (ctype->charnames[idx] >= 256)
1270 size_t nr = ctype->charnames[idx] % act_size;
1272 if (++cnt[nr] > act_planes)
1274 act_planes = cnt[nr];
1275 if (act_size * act_planes >= min_total)
1276 break;
1280 if (act_size * act_planes < min_total)
1282 min_total = act_size * act_planes;
1283 ctype->plane_size = act_size;
1284 ctype->plane_cnt = act_planes;
1287 ++act_size;
1290 if (!be_quiet)
1291 fputs (_(" done\n"), stderr);
1294 #if __BYTE_ORDER == __LITTLE_ENDIAN
1295 # define NAMES_B1 ctype->names_el
1296 # define NAMES_B2 ctype->names_eb
1297 #else
1298 # define NAMES_B1 ctype->names_eb
1299 # define NAMES_B2 ctype->names_el
1300 #endif
1302 ctype->names_eb = (u_int32_t *) xcalloc (ctype->plane_size
1303 * ctype->plane_cnt,
1304 sizeof (u_int32_t));
1305 ctype->names_el = (u_int32_t *) xcalloc (ctype->plane_size
1306 * ctype->plane_cnt,
1307 sizeof (u_int32_t));
1309 for (idx = 1; idx < 256; ++idx)
1310 NAMES_B1[idx] = idx;
1312 /* Trick: change the 0th entry's name to 1 to mark the cell occupied. */
1313 NAMES_B1[0] = 1;
1315 for (idx = 256; idx < ctype->charnames_act; ++idx)
1317 size_t nr = (ctype->charnames[idx] % ctype->plane_size);
1318 size_t depth = 0;
1320 while (NAMES_B1[nr + depth * ctype->plane_size])
1321 ++depth;
1322 assert (depth < ctype->plane_cnt);
1324 NAMES_B1[nr + depth * ctype->plane_size] = ctype->charnames[idx];
1326 /* Now for faster access remember the index in the NAMES_B array. */
1327 ctype->charnames[idx] = nr + depth * ctype->plane_size;
1329 NAMES_B1[0] = 0;
1331 for (idx = 0; idx < ctype->plane_size * ctype->plane_cnt; ++idx)
1332 NAMES_B2[idx] = SWAPU32 (NAMES_B1[idx]);
1335 /* You wonder about this amount of memory? This is only because some
1336 users do not manage to address the array with unsigned values or
1337 data types with range >= 256. '\200' would result in the array
1338 index -128. To help these poor people we duplicate the entries for
1339 128 up to 255 below the entry for \0. */
1340 ctype->ctype_b = (char_class_t *) xcalloc (256 + 128,
1341 sizeof (char_class_t));
1342 ctype->ctype32_b = (char_class32_t *) xcalloc (ctype->plane_size
1343 * ctype->plane_cnt,
1344 sizeof (char_class32_t));
1346 /* Fill in the character class information. */
1347 #if __BYTE_ORDER == __LITTLE_ENDIAN
1348 # define TRANS(w) CHAR_CLASS_TRANS (w)
1349 # define TRANS32(w) CHAR_CLASS32_TRANS (w)
1350 #else
1351 # define TRANS(w) (w)
1352 # define TRANS32(w) (w)
1353 #endif
1355 for (idx = 0; idx < ctype->class_collection_act; ++idx)
1356 if (ctype->charnames[idx] < 256)
1357 ctype->ctype_b[128 + ctype->charnames[idx]]
1358 = TRANS (ctype->class_collection[idx]);
1360 /* Mirror first 127 entries. We must take care that entry -1 is not
1361 mirrored because EOF == -1. */
1362 for (idx = 0; idx < 127; ++idx)
1363 ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
1365 /* The 32 bit array contains all characters. */
1366 for (idx = 0; idx < ctype->class_collection_act; ++idx)
1367 ctype->ctype32_b[ctype->charnames[idx]]
1368 = TRANS32 (ctype->class_collection[idx]);
1370 /* Room for table of mappings. */
1371 ctype->map_eb = (u_int32_t **) xmalloc (ctype->map_collection_nr
1372 * sizeof (u_int32_t *));
1373 ctype->map_el = (u_int32_t **) xmalloc (ctype->map_collection_nr
1374 * sizeof (u_int32_t *));
1376 /* Fill in all mappings. */
1377 for (idx = 0; idx < ctype->map_collection_nr; ++idx)
1379 unsigned int idx2;
1381 /* Allocate table. */
1382 ctype->map_eb[idx] = (u_int32_t *) xmalloc ((ctype->plane_size
1383 * ctype->plane_cnt + 128)
1384 * sizeof (u_int32_t));
1385 ctype->map_el[idx] = (u_int32_t *) xmalloc ((ctype->plane_size
1386 * ctype->plane_cnt + 128)
1387 * sizeof (u_int32_t));
1389 #if __BYTE_ORDER == __LITTLE_ENDIAN
1390 # define MAP_B1 ctype->map_el
1391 # define MAP_B2 ctype->map_eb
1392 #else
1393 # define MAP_B1 ctype->map_eb
1394 # define MAP_B2 ctype->map_el
1395 #endif
1397 /* Copy default value (identity mapping). */
1398 memcpy (&MAP_B1[idx][128], NAMES_B1,
1399 ctype->plane_size * ctype->plane_cnt * sizeof (u_int32_t));
1401 /* Copy values from collection. */
1402 for (idx2 = 0; idx2 < ctype->map_collection_act[idx]; ++idx2)
1403 if (ctype->map_collection[idx][idx2] != 0)
1404 MAP_B1[idx][128 + ctype->charnames[idx2]] =
1405 ctype->map_collection[idx][idx2];
1407 /* Mirror first 127 entries. We must take care not to map entry
1408 -1 because EOF == -1. */
1409 for (idx2 = 0; idx2 < 127; ++idx2)
1410 MAP_B1[idx][idx2] = MAP_B1[idx][256 + idx2];
1412 /* EOF must map to EOF. */
1413 MAP_B1[idx][127] = EOF;
1415 /* And now the other byte order. */
1416 for (idx2 = 0; idx2 < ctype->plane_size * ctype->plane_cnt + 128; ++idx2)
1417 MAP_B2[idx][idx2] = SWAPU32 (MAP_B1[idx][idx2]);
1420 /* Extra array for class and map names. */
1421 ctype->class_name_ptr = (u_int32_t *) xmalloc (ctype->nr_charclass
1422 * sizeof (u_int32_t));
1423 ctype->map_name_ptr = (u_int32_t *) xmalloc (ctype->map_collection_nr
1424 * sizeof (u_int32_t));
1426 /* Array for width information. Because the expected width are very
1427 small we use only one single byte. This save space and we need
1428 not provide the information twice with both endianesses. */
1429 ctype->width = (unsigned char *) xmalloc (ctype->plane_size
1430 * ctype->plane_cnt);
1431 /* Initialize with default width value. */
1432 memset (ctype->width, charset->width_default,
1433 ctype->plane_size * ctype->plane_cnt);
1434 if (charset->width_rules != NULL)
1436 size_t cnt;
1438 for (cnt = 0; cnt < charset->nwidth_rules; ++cnt)
1439 if (charset->width_rules[cnt].width != charset->width_default)
1440 for (idx = charset->width_rules[cnt].from;
1441 idx <= charset->width_rules[cnt].to; ++idx)
1443 size_t nr = idx % ctype->plane_size;
1444 size_t depth = 0;
1446 while (NAMES_B1[nr + depth * ctype->plane_size] != nr)
1447 ++depth;
1448 assert (depth < ctype->plane_cnt);
1450 ctype->width[nr + depth * ctype->plane_size]
1451 = charset->width_rules[cnt].width;
1455 /* Compute MB_CUR_MAX. */
1456 ctype->mb_cur_max = charset->mb_cur_max;
1458 /* We need the name of the currently used 8-bit character set to
1459 make correct conversion between this 8-bit representation and the
1460 ISO 10646 character set used internally for wide characters. */
1461 ctype->codeset_name = charset->code_set_name ? : "";