Update.
[glibc.git] / locale / programs / ld-ctype.c
blobf2ad46f09218d97d9ee570c25fc740c946ca2943
1 /* Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
20 #ifdef HAVE_CONFIG_H
21 # include <config.h>
22 #endif
24 #include <alloca.h>
25 #include <endian.h>
26 #include <limits.h>
27 #include <string.h>
29 #include "locales.h"
30 #include "localeinfo.h"
31 #include "langinfo.h"
32 #include "locfile-token.h"
33 #include "stringtrans.h"
35 /* Uncomment the following line in the production version. */
36 /* define NDEBUG 1 */
37 #include <assert.h>
40 void *xmalloc (size_t __n);
41 void *xcalloc (size_t __n, size_t __s);
42 void *xrealloc (void *__ptr, size_t __n);
45 /* The bit used for representing a special class. */
46 #define BITPOS(class) ((class) - tok_upper)
47 #define BIT(class) (1 << BITPOS (class))
49 #define ELEM(ctype, collection, idx, value) \
50 *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \
51 &ctype->collection##_act idx, value)
53 #define SWAPU32(w) \
54 (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24))
56 #define SWAPU16(w) \
57 ((((w) >> 8) & 0xff) | (((w) & 0xff) << 8))
59 #define XSWAPU32(w) \
60 ((((w) & 0xff00ff00) >> 8) | (((w) & 0xff00ff) << 8))
63 /* To be compatible with former implementations we for now restrict
64 the number of bits for character classes to 16. When compatibility
65 is not necessary anymore increase the number to 32. */
66 #define char_class_t u_int16_t
67 #define CHAR_CLASS_TRANS SWAPU16
68 #define char_class32_t u_int32_t
69 #define CHAR_CLASS32_TRANS XSWAPU32
72 /* The real definition of the struct for the LC_CTYPE locale. */
73 struct locale_ctype_t
75 unsigned int *charnames;
76 size_t charnames_max;
77 size_t charnames_act;
79 /* We will allow up to 8 * sizeof(u_int32_t) - 1 character classes. */
80 #define MAX_NR_CHARCLASS (8 * sizeof (u_int32_t) - 1)
81 size_t nr_charclass;
82 const char *classnames[MAX_NR_CHARCLASS];
83 unsigned long int current_class_mask;
84 unsigned int last_class_char;
85 u_int32_t *class_collection;
86 size_t class_collection_max;
87 size_t class_collection_act;
88 unsigned long int class_done;
90 /* If the following number ever turns out to be too small simply
91 increase it. But I doubt it will. --drepper@gnu */
92 #define MAX_NR_CHARMAP 16
93 const char *mapnames[MAX_NR_CHARMAP];
94 u_int32_t *map_collection[MAX_NR_CHARMAP];
95 size_t map_collection_max[MAX_NR_CHARMAP];
96 size_t map_collection_act[MAX_NR_CHARMAP];
97 size_t map_collection_nr;
98 size_t last_map_idx;
99 unsigned int from_map_char;
100 int toupper_done;
101 int tolower_done;
103 /* The arrays for the binary representation. */
104 u_int32_t plane_size;
105 u_int32_t plane_cnt;
106 char_class_t *ctype_b;
107 char_class32_t *ctype32_b;
108 u_int32_t *names_el;
109 u_int32_t *names_eb;
110 u_int32_t **map_eb;
111 u_int32_t **map_el;
112 u_int32_t *class_name_ptr;
113 u_int32_t *map_name_ptr;
114 unsigned char *width;
115 u_int32_t mb_cur_max;
116 const char *codeset_name;
120 /* Prototypes for local functions. */
121 static void ctype_class_newP (struct linereader *lr,
122 struct locale_ctype_t *ctype, const char *name);
123 static void ctype_map_newP (struct linereader *lr,
124 struct locale_ctype_t *ctype,
125 const char *name, struct charset_t *charset);
126 static u_int32_t *find_idx (struct locale_ctype_t *ctype, u_int32_t **table,
127 size_t *max, size_t *act, unsigned int idx);
128 static void set_class_defaults (struct locale_ctype_t *ctype,
129 struct charset_t *charset);
130 static void allocate_arrays (struct locale_ctype_t *ctype,
131 struct charset_t *charset);
134 void
135 ctype_startup (struct linereader *lr, struct localedef_t *locale,
136 struct charset_t *charset)
138 unsigned int cnt;
139 struct locale_ctype_t *ctype;
141 /* It is important that we always use UCS1 encoding for strings now. */
142 encoding_method = ENC_UCS1;
144 /* Allocate the needed room. */
145 locale->categories[LC_CTYPE].ctype = ctype =
146 (struct locale_ctype_t *) xmalloc (sizeof (struct locale_ctype_t));
148 /* We have no names seen yet. */
149 ctype->charnames_max = charset->mb_cur_max == 1 ? 256 : 512;
150 ctype->charnames =
151 (unsigned int *) xmalloc (ctype->charnames_max * sizeof (unsigned int));
152 for (cnt = 0; cnt < 256; ++cnt)
153 ctype->charnames[cnt] = cnt;
154 ctype->charnames_act = 256;
156 /* Fill character class information. */
157 ctype->nr_charclass = 0;
158 ctype->current_class_mask = 0;
159 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
160 /* The order of the following instructions determines the bit
161 positions! */
162 ctype_class_newP (lr, ctype, "upper");
163 ctype_class_newP (lr, ctype, "lower");
164 ctype_class_newP (lr, ctype, "alpha");
165 ctype_class_newP (lr, ctype, "digit");
166 ctype_class_newP (lr, ctype, "xdigit");
167 ctype_class_newP (lr, ctype, "space");
168 ctype_class_newP (lr, ctype, "print");
169 ctype_class_newP (lr, ctype, "graph");
170 ctype_class_newP (lr, ctype, "blank");
171 ctype_class_newP (lr, ctype, "cntrl");
172 ctype_class_newP (lr, ctype, "punct");
173 ctype_class_newP (lr, ctype, "alnum");
175 ctype->class_collection_max = charset->mb_cur_max == 1 ? 256 : 512;
176 ctype->class_collection
177 = (u_int32_t *) xmalloc (sizeof (unsigned long int)
178 * ctype->class_collection_max);
179 memset (ctype->class_collection, '\0',
180 sizeof (unsigned long int) * ctype->class_collection_max);
181 ctype->class_collection_act = 256;
183 /* Fill character map information. */
184 ctype->map_collection_nr = 0;
185 ctype->last_map_idx = MAX_NR_CHARMAP;
186 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
187 ctype_map_newP (lr, ctype, "toupper", charset);
188 ctype_map_newP (lr, ctype, "tolower", charset);
190 /* Fill first 256 entries in `toupper' and `tolower' arrays. */
191 for (cnt = 0; cnt < 256; ++cnt)
193 ctype->map_collection[0][cnt] = cnt;
194 ctype->map_collection[1][cnt] = cnt;
199 void
200 ctype_finish (struct localedef_t *locale, struct charset_t *charset)
202 /* See POSIX.2, table 2-6 for the meaning of the following table. */
203 #define NCLASS 12
204 static const struct
206 const char *name;
207 const char allow[NCLASS];
209 valid_table[NCLASS] =
211 /* The order is important. See token.h for more information.
212 M = Always, D = Default, - = Permitted, X = Mutually exclusive */
213 { "upper", "--MX-XDDXXX-" },
214 { "lower", "--MX-XDDXXX-" },
215 { "alpha", "---X-XDDXXX-" },
216 { "digit", "XXX--XDDXXX-" },
217 { "xdigit", "-----XDDXXX-" },
218 { "space", "XXXXX------X" },
219 { "print", "---------X--" },
220 { "graph", "---------X--" },
221 { "blank", "XXXXXM-----X" },
222 { "cntrl", "XXXXX-XX--XX" },
223 { "punct", "XXXXX-DD-X-X" },
224 { "alnum", "-----XDDXXX-" }
226 size_t cnt;
227 int cls1, cls2;
228 unsigned int space_value;
229 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
231 /* Set default value for classes not specified. */
232 set_class_defaults (ctype, charset);
234 /* Check according to table. */
235 for (cnt = 0; cnt < ctype->class_collection_max; ++cnt)
237 unsigned long int tmp;
239 tmp = ctype->class_collection[cnt];
240 if (tmp == 0)
241 continue;
243 for (cls1 = 0; cls1 < NCLASS; ++cls1)
244 if ((tmp & (1 << cls1)) != 0)
245 for (cls2 = 0; cls2 < NCLASS; ++cls2)
246 if (valid_table[cls1].allow[cls2] != '-')
248 int eq = (tmp & (1 << cls2)) != 0;
249 switch (valid_table[cls1].allow[cls2])
251 case 'M':
252 if (!eq)
254 char buf[17];
255 char *cp = buf;
256 unsigned int value;
258 value = ctype->charnames[cnt];
260 if ((value & 0xff000000) != 0)
261 cp += sprintf (cp, "\\%o", (value >> 24) & 0xff);
262 if ((value & 0xffff0000) != 0)
263 cp += sprintf (cp, "\\%o", (value >> 16) & 0xff);
264 if ((value & 0xffffff00) != 0)
265 cp += sprintf (cp, "\\%o", (value >> 8) & 0xff);
266 sprintf (cp, "\\%o", value & 0xff);
268 if (!be_quiet)
269 error (0, 0, _("\
270 character %s'%s' in class `%s' must be in class `%s'"), value > 256 ? "L" : "",
271 cp, valid_table[cls1].name,
272 valid_table[cls2].name);
274 break;
276 case 'X':
277 if (eq)
279 char buf[17];
280 char *cp = buf;
281 unsigned int value;
283 value = ctype->charnames[cnt];
285 if ((value & 0xff000000) != 0)
286 cp += sprintf (cp, "\\%o", value >> 24);
287 if ((value & 0xffff0000) != 0)
288 cp += sprintf (cp, "\\%o", (value >> 16) & 0xff);
289 if ((value & 0xffffff00) != 0)
290 cp += sprintf (cp, "\\%o", (value >> 8) & 0xff);
291 sprintf (cp, "\\%o", value & 0xff);
293 if (!be_quiet)
294 error (0, 0, _("\
295 character %s'%s' in class `%s' must not be in class `%s'"),
296 value > 256 ? "L" : "", cp,
297 valid_table[cls1].name,
298 valid_table[cls2].name);
300 break;
302 case 'D':
303 ctype->class_collection[cnt] |= 1 << cls2;
304 break;
306 default:
307 error (5, 0, _("internal error in %s, line %u"),
308 __FUNCTION__, __LINE__);
313 /* ... and now test <SP> as a special case. */
314 space_value = charset_find_value (charset, "SP", 2);
315 if ((wchar_t) space_value == ILLEGAL_CHAR_VALUE && !be_quiet)
316 error (0, 0, _("character <SP> not defined in character map"));
317 else if (((cnt = BITPOS (tok_space),
318 (ELEM (ctype, class_collection, , space_value)
319 & BIT (tok_space)) == 0)
320 || (cnt = BITPOS (tok_blank),
321 (ELEM (ctype, class_collection, , space_value)
322 & BIT (tok_blank)) == 0))
323 && !be_quiet)
324 error (0, 0, _("<SP> character not in class `%s'"),
325 valid_table[cnt].name);
326 else if (((cnt = BITPOS (tok_punct),
327 (ELEM (ctype, class_collection, , space_value)
328 & BIT (tok_punct)) != 0)
329 || (cnt = BITPOS (tok_graph),
330 (ELEM (ctype, class_collection, , space_value)
331 & BIT (tok_graph))
332 != 0))
333 && !be_quiet)
334 error (0, 0, _("<SP> character must not be in class `%s'"),
335 valid_table[cnt].name);
336 else
337 ELEM (ctype, class_collection, , space_value) |= BIT (tok_print);
339 /* Now that the tests are done make sure the name array contains all
340 characters which are handled in the WIDTH section of the
341 character set definition file. */
342 if (charset->width_rules != NULL)
343 for (cnt = 0; cnt < charset->nwidth_rules; ++cnt)
345 size_t inner;
346 for (inner = charset->width_rules[cnt].from;
347 inner <= charset->width_rules[cnt].to; ++inner)
348 (void) find_idx (ctype, NULL, NULL, NULL, inner);
353 void
354 ctype_output (struct localedef_t *locale, struct charset_t *charset,
355 const char *output_path)
357 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
358 const size_t nelems = (_NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)
359 + 2 * (ctype->map_collection_nr - 2));
360 struct iovec iov[2 + nelems + ctype->nr_charclass
361 + ctype->map_collection_nr];
362 struct locale_file data;
363 u_int32_t idx[nelems];
364 size_t elem, cnt, offset, total;
367 if ((locale->binary & (1 << LC_CTYPE)) != 0)
369 iov[0].iov_base = ctype;
370 iov[0].iov_len = locale->len[LC_CTYPE];
372 write_locale_data (output_path, "LC_CTYPE", 1, iov);
374 return;
378 /* Now prepare the output: Find the sizes of the table we can use. */
379 allocate_arrays (ctype, charset);
381 data.magic = LIMAGIC (LC_CTYPE);
382 data.n = nelems;
383 iov[0].iov_base = (void *) &data;
384 iov[0].iov_len = sizeof (data);
386 iov[1].iov_base = (void *) idx;
387 iov[1].iov_len = sizeof (idx);
389 idx[0] = iov[0].iov_len + iov[1].iov_len;
390 offset = 0;
392 for (elem = 0; elem < nelems; ++elem)
394 if (elem < _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE))
395 switch (elem)
397 #define CTYPE_DATA(name, base, len) \
398 case _NL_ITEM_INDEX (name): \
399 iov[2 + elem + offset].iov_base = (base); \
400 iov[2 + elem + offset].iov_len = (len); \
401 if (elem + 1 < nelems) \
402 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; \
403 break
405 CTYPE_DATA (_NL_CTYPE_CLASS,
406 ctype->ctype_b,
407 (256 + 128) * sizeof (char_class_t));
409 CTYPE_DATA (_NL_CTYPE_TOUPPER_EB,
410 ctype->map_eb[0],
411 (ctype->plane_size * ctype->plane_cnt + 128)
412 * sizeof (u_int32_t));
413 CTYPE_DATA (_NL_CTYPE_TOLOWER_EB,
414 ctype->map_eb[1],
415 (ctype->plane_size * ctype->plane_cnt + 128)
416 * sizeof (u_int32_t));
418 CTYPE_DATA (_NL_CTYPE_TOUPPER_EL,
419 ctype->map_el[0],
420 (ctype->plane_size * ctype->plane_cnt + 128)
421 * sizeof (u_int32_t));
422 CTYPE_DATA (_NL_CTYPE_TOLOWER_EL,
423 ctype->map_el[1],
424 (ctype->plane_size * ctype->plane_cnt + 128)
425 * sizeof (u_int32_t));
427 CTYPE_DATA (_NL_CTYPE_CLASS32,
428 ctype->ctype32_b,
429 (ctype->plane_size * ctype->plane_cnt
430 * sizeof (char_class32_t)));
432 CTYPE_DATA (_NL_CTYPE_NAMES_EB,
433 ctype->names_eb, (ctype->plane_size * ctype->plane_cnt
434 * sizeof (u_int32_t)));
435 CTYPE_DATA (_NL_CTYPE_NAMES_EL,
436 ctype->names_el, (ctype->plane_size * ctype->plane_cnt
437 * sizeof (u_int32_t)));
439 CTYPE_DATA (_NL_CTYPE_HASH_SIZE,
440 &ctype->plane_size, sizeof (u_int32_t));
441 CTYPE_DATA (_NL_CTYPE_HASH_LAYERS,
442 &ctype->plane_cnt, sizeof (u_int32_t));
444 case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
445 /* The class name array. */
446 total = 0;
447 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt, ++offset)
449 iov[2 + elem + offset].iov_base
450 = (void *) ctype->classnames[cnt];
451 iov[2 + elem + offset].iov_len
452 = strlen (ctype->classnames[cnt]) + 1;
453 total += iov[2 + elem + offset].iov_len;
455 iov[2 + elem + offset].iov_base = (void *) "\0\0\0";
456 iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
457 total += 1 + (4 - ((total + 1) % 4));
459 if (elem + 1 < nelems)
460 idx[elem + 1] = idx[elem] + total;
461 break;
463 case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
464 /* The class name array. */
465 total = 0;
466 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt, ++offset)
468 iov[2 + elem + offset].iov_base
469 = (void *) ctype->mapnames[cnt];
470 iov[2 + elem + offset].iov_len
471 = strlen (ctype->mapnames[cnt]) + 1;
472 total += iov[2 + elem + offset].iov_len;
474 iov[2 + elem + offset].iov_base = (void *) "\0\0\0";
475 iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
476 total += 1 + (4 - ((total + 1) % 4));
478 if (elem + 1 < nelems)
479 idx[elem + 1] = idx[elem] + total;
480 break;
482 CTYPE_DATA (_NL_CTYPE_WIDTH,
483 ctype->width, ctype->plane_size * ctype->plane_cnt);
485 CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX,
486 &ctype->mb_cur_max, sizeof (u_int32_t));
488 case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
489 total = strlen (ctype->codeset_name) + 1;
490 if (total % 4 == 0)
491 iov[2 + elem + offset].iov_base = (char *) ctype->codeset_name;
492 else
494 iov[2 + elem + offset].iov_base = alloca ((total + 3) & ~3);
495 memset (mempcpy (iov[2 + elem + offset].iov_base,
496 ctype->codeset_name, total),
497 '\0', 4 - (total & 3));
498 total = (total + 3) & ~3;
500 iov[2 + elem + offset].iov_len = total;
501 if (elem + 1 < nelems)
502 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
503 break;
505 default:
506 assert (! "unknown CTYPE element");
508 else
510 /* Handle extra maps. */
511 size_t nr = (elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)) >> 1;
513 if (((elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)) & 1) == 0)
514 iov[2 + elem + offset].iov_base = ctype->map_eb[nr];
515 else
516 iov[2 + elem + offset].iov_base = ctype->map_el[nr];
518 iov[2 + elem + offset].iov_len = ((ctype->plane_size
519 * ctype->plane_cnt + 128)
520 * sizeof (u_int32_t));
522 if (elem + 1 < nelems)
523 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
527 assert (2 + elem + offset == (nelems + ctype->nr_charclass
528 + ctype->map_collection_nr + 2));
530 write_locale_data (output_path, "LC_CTYPE", 2 + elem + offset, iov);
534 /* Character class handling. */
535 void
536 ctype_class_new (struct linereader *lr, struct localedef_t *locale,
537 enum token_t tok, struct token *code,
538 struct charset_t *charset)
540 ctype_class_newP (lr, locale->categories[LC_CTYPE].ctype,
541 code->val.str.start);
546 ctype_is_charclass (struct linereader *lr, struct localedef_t *locale,
547 const char *name)
549 size_t cnt;
551 for (cnt = 0; cnt < locale->categories[LC_CTYPE].ctype->nr_charclass; ++cnt)
552 if (strcmp (name, locale->categories[LC_CTYPE].ctype->classnames[cnt])
553 == 0)
554 return 1;
556 return 0;
560 void
561 ctype_class_start (struct linereader *lr, struct localedef_t *locale,
562 enum token_t tok, const char *str,
563 struct charset_t *charset)
565 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
566 size_t cnt;
568 switch (tok)
570 case tok_upper:
571 str = "upper";
572 break;
573 case tok_lower:
574 str = "lower";
575 break;
576 case tok_alpha:
577 str = "alpha";
578 break;
579 case tok_digit:
580 str = "digit";
581 break;
582 case tok_xdigit:
583 str = "xdigit";
584 break;
585 case tok_space:
586 str = "space";
587 break;
588 case tok_print:
589 str = "print";
590 break;
591 case tok_graph:
592 str = "graph";
593 break;
594 case tok_blank:
595 str = "blank";
596 break;
597 case tok_cntrl:
598 str = "cntrl";
599 break;
600 case tok_punct:
601 str = "punct";
602 break;
603 case tok_alnum:
604 str = "alnum";
605 break;
606 case tok_ident:
607 break;
608 default:
609 assert (! "illegal token as class name: should not happen");
612 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
613 if (strcmp (str, ctype->classnames[cnt]) == 0)
614 break;
616 if (cnt >= ctype->nr_charclass)
617 assert (! "unknown class in class definition: should not happen");
619 ctype->class_done |= BIT (tok);
621 ctype->current_class_mask = 1 << cnt;
622 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
626 void
627 ctype_class_from (struct linereader *lr, struct localedef_t *locale,
628 struct token *code, struct charset_t *charset)
630 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
631 unsigned int value;
633 value = charset_find_value (charset, code->val.str.start, code->val.str.len);
635 ctype->last_class_char = value;
637 if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
638 /* In the LC_CTYPE category it is no error when a character is
639 not found. This has to be ignored silently. */
640 return;
642 *find_idx (ctype, &ctype->class_collection, &ctype->class_collection_max,
643 &ctype->class_collection_act, value)
644 |= ctype->current_class_mask;
648 void
649 ctype_class_to (struct linereader *lr, struct localedef_t *locale,
650 struct token *code, struct charset_t *charset)
652 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
653 unsigned int value, cnt;
655 value = charset_find_value (charset, code->val.str.start, code->val.str.len);
657 assert (value >= ctype->last_class_char);
659 for (cnt = ctype->last_class_char + 1; cnt <= value; ++cnt)
660 *find_idx (ctype, &ctype->class_collection, &ctype->class_collection_max,
661 &ctype->class_collection_act, cnt)
662 |= ctype->current_class_mask;
664 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
668 void
669 ctype_class_end (struct linereader *lr, struct localedef_t *locale)
671 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
673 /* We have no special actions to perform here. */
674 ctype->current_class_mask = 0;
675 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
679 /* Character map handling. */
680 void
681 ctype_map_new (struct linereader *lr, struct localedef_t *locale,
682 enum token_t tok, struct token *code,
683 struct charset_t *charset)
685 ctype_map_newP (lr, locale->categories[LC_CTYPE].ctype,
686 code->val.str.start, charset);
691 ctype_is_charconv (struct linereader *lr, struct localedef_t *locale,
692 const char *name)
694 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
695 size_t cnt;
697 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
698 if (strcmp (name, ctype->mapnames[cnt]) == 0)
699 return 1;
701 return 0;
705 void
706 ctype_map_start (struct linereader *lr, struct localedef_t *locale,
707 enum token_t tok, const char *name, struct charset_t *charset)
709 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
710 size_t cnt;
712 switch (tok)
714 case tok_toupper:
715 ctype->toupper_done = 1;
716 name = "toupper";
717 break;
718 case tok_tolower:
719 ctype->tolower_done = 1;
720 name = "tolower";
721 break;
722 case tok_ident:
723 break;
724 default:
725 assert (! "unknown token in category `LC_CTYPE' should not happen");
728 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
729 if (strcmp (name, ctype->mapnames[cnt]) == 0)
730 break;
732 if (cnt == ctype->map_collection_nr)
733 assert (! "unknown token in category `LC_CTYPE' should not happen");
735 ctype->last_map_idx = cnt;
736 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
740 void
741 ctype_map_from (struct linereader *lr, struct localedef_t *locale,
742 struct token *code, struct charset_t *charset)
744 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
745 unsigned int value;
747 value = charset_find_value (charset, code->val.str.start, code->val.str.len);
749 if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
750 /* In the LC_CTYPE category it is no error when a character is
751 not found. This has to be ignored silently. */
752 return;
754 assert (ctype->last_map_idx < ctype->map_collection_nr);
756 ctype->from_map_char = value;
760 void
761 ctype_map_to (struct linereader *lr, struct localedef_t *locale,
762 struct token *code, struct charset_t *charset)
764 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
765 unsigned int value;
767 value = charset_find_value (charset, code->val.str.start, code->val.str.len);
769 if ((wchar_t) ctype->from_map_char == ILLEGAL_CHAR_VALUE
770 || (wchar_t) value == ILLEGAL_CHAR_VALUE)
772 /* In the LC_CTYPE category it is no error when a character is
773 not found. This has to be ignored silently. */
774 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
775 return;
778 *find_idx (ctype, &ctype->map_collection[ctype->last_map_idx],
779 &ctype->map_collection_max[ctype->last_map_idx],
780 &ctype->map_collection_act[ctype->last_map_idx],
781 ctype->from_map_char) = value;
783 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
787 void
788 ctype_map_end (struct linereader *lr, struct localedef_t *locale)
790 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
792 ctype->last_map_idx = MAX_NR_CHARMAP;
793 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
797 /* Local functions. */
798 static void
799 ctype_class_newP (struct linereader *lr, struct locale_ctype_t *ctype,
800 const char *name)
802 size_t cnt;
804 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
805 if (strcmp (ctype->classnames[cnt], name) == 0)
806 break;
808 if (cnt < ctype->nr_charclass)
810 lr_error (lr, _("character class `%s' already defined"), name);
811 return;
814 if (ctype->nr_charclass == MAX_NR_CHARCLASS)
815 /* Exit code 2 is prescribed in P1003.2b. */
816 error (2, 0, _("\
817 implementation limit: no more than %d character classes allowed"),
818 MAX_NR_CHARCLASS);
820 ctype->classnames[ctype->nr_charclass++] = name;
824 static void
825 ctype_map_newP (struct linereader *lr, struct locale_ctype_t *ctype,
826 const char *name, struct charset_t *charset)
828 size_t max_chars = 0;
829 size_t cnt;
831 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
833 if (strcmp (ctype->mapnames[cnt], name) == 0)
834 break;
836 if (max_chars < ctype->map_collection_max[cnt])
837 max_chars = ctype->map_collection_max[cnt];
840 if (cnt < ctype->map_collection_nr)
842 lr_error (lr, _("character map `%s' already defined"), name);
843 return;
846 if (ctype->map_collection_nr == MAX_NR_CHARMAP)
847 /* Exit code 2 is prescribed in P1003.2b. */
848 error (2, 0, _("\
849 implementation limit: no more than %d character maps allowed"),
850 MAX_NR_CHARMAP);
852 ctype->mapnames[cnt] = name;
854 if (max_chars == 0)
855 ctype->map_collection_max[cnt] = charset->mb_cur_max == 1 ? 256 : 512;
856 else
857 ctype->map_collection_max[cnt] = max_chars;
859 ctype->map_collection[cnt] = (u_int32_t *)
860 xmalloc (sizeof (u_int32_t) * ctype->map_collection_max[cnt]);
861 memset (ctype->map_collection[cnt], '\0',
862 sizeof (u_int32_t) * ctype->map_collection_max[cnt]);
863 ctype->map_collection_act[cnt] = 256;
865 ++ctype->map_collection_nr;
869 /* We have to be prepared that TABLE, MAX, and ACT can be NULL. This
870 is possible if we only want ot extend the name array. */
871 static u_int32_t *
872 find_idx (struct locale_ctype_t *ctype, u_int32_t **table, size_t *max,
873 size_t *act, unsigned int idx)
875 size_t cnt;
877 if (idx < 256)
878 return table == NULL ? NULL : &(*table)[idx];
880 for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
881 if (ctype->charnames[cnt] == idx)
882 break;
884 /* We have to distinguish two cases: the names is found or not. */
885 if (cnt == ctype->charnames_act)
887 /* Extend the name array. */
888 if (ctype->charnames_act == ctype->charnames_max)
890 ctype->charnames_max *= 2;
891 ctype->charnames = (unsigned int *)
892 xrealloc (ctype->charnames,
893 sizeof (unsigned int) * ctype->charnames_max);
895 ctype->charnames[ctype->charnames_act++] = idx;
898 if (table == NULL)
899 /* We have done everything we are asked to do. */
900 return NULL;
902 if (cnt >= *act)
904 if (cnt >= *max)
906 size_t old_max = *max;
908 *max *= 2;
909 while (*max <= cnt);
911 *table =
912 (u_int32_t *) xrealloc (*table, *max * sizeof (unsigned long int));
913 memset (&(*table)[old_max], '\0',
914 (*max - old_max) * sizeof (u_int32_t));
917 (*table)[cnt] = 0;
918 *act = cnt;
921 return &(*table)[cnt];
925 static void
926 set_class_defaults (struct locale_ctype_t *ctype, struct charset_t *charset)
928 /* These function defines the default values for the classes and conversions
929 according to POSIX.2 2.5.2.1.
930 It may seem that the order of these if-blocks is arbitrary but it is NOT.
931 Don't move them unless you know what you do! */
933 void set_default (int bit, int from, int to)
935 char tmp[2];
936 int ch;
937 /* Define string. */
938 strcpy (tmp, "?");
940 for (ch = from; ch <= to; ++ch)
942 unsigned int value;
943 tmp[0] = ch;
945 value = charset_find_value (charset, tmp, 1);
946 if ((wchar_t) value == ILLEGAL_CHAR_VALUE && !be_quiet)
948 error (0, 0, _("\
949 character `%s' not defined while needed as default value"),
950 tmp);
951 continue;
953 else
954 ELEM (ctype, class_collection, , value) |= bit;
958 /* Set default values if keyword was not present. */
959 if ((ctype->class_done & BIT (tok_upper)) == 0)
960 /* "If this keyword [lower] is not specified, the lowercase letters
961 `A' through `Z', ..., shall automatically belong to this class,
962 with implementation defined character values." [P1003.2, 2.5.2.1] */
963 set_default (BIT (tok_upper), 'A', 'Z');
965 if ((ctype->class_done & BIT (tok_lower)) == 0)
966 /* "If this keyword [lower] is not specified, the lowercase letters
967 `a' through `z', ..., shall automatically belong to this class,
968 with implementation defined character values." [P1003.2, 2.5.2.1] */
969 set_default (BIT (tok_lower), 'a', 'z');
971 if ((ctype->class_done & BIT (tok_alpha)) == 0)
973 /* Table 2-6 in P1003.2 says that characters in class `upper' or
974 class `lower' *must* be in class `alpha'. */
975 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
976 size_t cnt;
978 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
979 if ((ctype->class_collection[cnt] & mask) != 0)
980 ctype->class_collection[cnt] |= BIT (tok_alpha);
983 if ((ctype->class_done & BIT (tok_digit)) == 0)
984 /* "If this keyword [digit] is not specified, the digits `0' through
985 `9', ..., shall automatically belong to this class, with
986 implementation-defined character values." [P1003.2, 2.5.2.1] */
987 set_default (BIT (tok_digit), '0', '9');
989 /* "Only characters specified for the `alpha' and `digit' keyword
990 shall be specified. Characters specified for the keyword `alpha'
991 and `digit' are automatically included in this class. */
993 unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
994 size_t cnt;
996 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
997 if ((ctype->class_collection[cnt] & mask) != 0)
998 ctype->class_collection[cnt] |= BIT (tok_alnum);
1001 if ((ctype->class_done & BIT (tok_space)) == 0)
1002 /* "If this keyword [space] is not specified, the characters <space>,
1003 <form-feed>, <newline>, <carriage-return>, <tab>, and
1004 <vertical-tab>, ..., shall automatically belong to this class,
1005 with implementation-defined character values." [P1003.2, 2.5.2.1] */
1007 unsigned int value;
1009 value = charset_find_value (charset, "space", 5);
1010 if ((wchar_t) value == ILLEGAL_CHAR_VALUE && !be_quiet)
1011 error (0, 0, _("\
1012 character `%s' not defined while needed as default value"),
1013 "<space>");
1014 else
1015 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1017 value = charset_find_value (charset, "form-feed", 9);
1018 if ((wchar_t) value == ILLEGAL_CHAR_VALUE && !be_quiet)
1019 error (0, 0, _("\
1020 character `%s' not defined while needed as default value"),
1021 "<form-feed>");
1022 else
1023 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1025 value = charset_find_value (charset, "newline", 7);
1026 if ((wchar_t) value == ILLEGAL_CHAR_VALUE && !be_quiet)
1027 error (0, 0, _("\
1028 character `%s' not defined while needed as default value"),
1029 "<newline>");
1030 else
1031 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1033 value = charset_find_value (charset, "carriage-return", 15);
1034 if ((wchar_t) value == ILLEGAL_CHAR_VALUE && !be_quiet)
1035 error (0, 0, _("\
1036 character `%s' not defined while needed as default value"),
1037 "<carriage-return>");
1038 else
1039 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1041 value = charset_find_value (charset, "tab", 3);
1042 if ((wchar_t) value == ILLEGAL_CHAR_VALUE && !be_quiet)
1043 error (0, 0, _("\
1044 character `%s' not defined while needed as default value"),
1045 "<tab>");
1046 else
1047 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1049 value = charset_find_value (charset, "vertical-tab", 12);
1050 if ((wchar_t) value == ILLEGAL_CHAR_VALUE && !be_quiet)
1051 error (0, 0, _("\
1052 character `%s' not defined while needed as default value"),
1053 "<vertical-tab>");
1054 else
1055 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1058 if ((ctype->class_done & BIT (tok_xdigit)) == 0)
1059 /* "If this keyword is not specified, the digits `0' to `9', the
1060 uppercase letters `A' through `F', and the lowercase letters `a'
1061 through `f', ..., shell automatically belong to this class, with
1062 implementation defined character values." [P1003.2, 2.5.2.1] */
1064 set_default (BIT (tok_xdigit), '0', '9');
1065 set_default (BIT (tok_xdigit), 'A', 'F');
1066 set_default (BIT (tok_xdigit), 'a', 'f');
1069 if ((ctype->class_done & BIT (tok_blank)) == 0)
1070 /* "If this keyword [blank] is unspecified, the characters <space> and
1071 <tab> shall belong to this character class." [P1003.2, 2.5.2.1] */
1073 unsigned int value;
1075 value = charset_find_value (charset, "space", 5);
1076 if ((wchar_t) value == ILLEGAL_CHAR_VALUE && !be_quiet)
1077 error (0, 0, _("\
1078 character `%s' not defined while needed as default value"),
1079 "<space>");
1080 else
1081 ELEM (ctype, class_collection, , value) |= BIT (tok_blank);
1083 value = charset_find_value (charset, "tab", 3);
1084 if ((wchar_t) value == ILLEGAL_CHAR_VALUE && !be_quiet)
1085 error (0, 0, _("\
1086 character `%s' not defined while needed as default value"),
1087 "<tab>");
1088 else
1089 ELEM (ctype, class_collection, , value) |= BIT (tok_blank);
1092 if ((ctype->class_done & BIT (tok_graph)) == 0)
1093 /* "If this keyword [graph] is not specified, characters specified for
1094 the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
1095 shall belong to this character class." [P1003.2, 2.5.2.1] */
1097 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
1098 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
1099 size_t cnt;
1101 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
1102 if ((ctype->class_collection[cnt] & mask) != 0)
1103 ctype->class_collection[cnt] |= BIT (tok_graph);
1106 if ((ctype->class_done & BIT (tok_print)) == 0)
1107 /* "If this keyword [print] is not provided, characters specified for
1108 the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
1109 and the <space> character shall belong to this character class."
1110 [P1003.2, 2.5.2.1] */
1112 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
1113 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
1114 size_t cnt;
1115 wchar_t space;
1117 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
1118 if ((ctype->class_collection[cnt] & mask) != 0)
1119 ctype->class_collection[cnt] |= BIT (tok_print);
1121 space = charset_find_value (charset, "space", 5);
1122 if (space == ILLEGAL_CHAR_VALUE && !be_quiet)
1123 error (0, 0, _("\
1124 character `%s' not defined while needed as default value"),
1125 "<space>");
1126 else
1127 ELEM (ctype, class_collection, , space) |= BIT (tok_print);
1130 if (ctype->toupper_done == 0)
1131 /* "If this keyword [toupper] is not specified, the lowercase letters
1132 `a' through `z', and their corresponding uppercase letters `A' to
1133 `Z', ..., shall automatically be included, with implementation-
1134 defined character values." [P1003.2, 2.5.2.1] */
1136 char tmp[4];
1137 int ch;
1139 strcpy (tmp, "<?>");
1141 for (ch = 'a'; ch <= 'z'; ++ch)
1143 unsigned int value_from, value_to;
1145 tmp[1] = (char) ch;
1147 value_from = charset_find_value (charset, &tmp[1], 1);
1148 if ((wchar_t) value_from == ILLEGAL_CHAR_VALUE && !be_quiet)
1150 error (0, 0, _("\
1151 character `%s' not defined while needed as default value"),
1152 tmp);
1153 continue;
1156 /* This conversion is implementation defined. */
1157 tmp[1] = (char) (ch + ('A' - 'a'));
1158 value_to = charset_find_value (charset, &tmp[1], 1);
1159 if ((wchar_t) value_to == ILLEGAL_CHAR_VALUE && !be_quiet)
1161 error (0, 0, _("\
1162 character `%s' not defined while needed as default value"),
1163 tmp);
1164 continue;
1167 /* The index [0] is determined by the order of the
1168 `ctype_map_newP' calls in `ctype_startup'. */
1169 ELEM (ctype, map_collection, [0], value_from) = value_to;
1173 if (ctype->tolower_done == 0)
1174 /* "If this keyword [tolower] is not specified, the mapping shall be
1175 the reverse mapping of the one specified to `toupper'." [P1003.2] */
1177 size_t cnt;
1179 for (cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
1180 if (ctype->map_collection[0][cnt] != 0)
1181 ELEM (ctype, map_collection, [1],
1182 ctype->map_collection[0][cnt])
1183 = ctype->charnames[cnt];
1188 static void
1189 allocate_arrays (struct locale_ctype_t *ctype, struct charset_t *charset)
1191 size_t idx;
1193 /* First we have to decide how we organize the arrays. It is easy
1194 for a one-byte character set. But multi-byte character set
1195 cannot be stored flat because the chars might be sparsely used.
1196 So we determine an optimal hashing function for the used
1197 characters.
1199 We use a very trivial hashing function to store the sparse
1200 table. CH % TABSIZE is used as an index. To solve multiple hits
1201 we have N planes. This guarantees a fixed search time for a
1202 character [N / 2]. In the following code we determine the minmum
1203 value for TABSIZE * N, where TABSIZE >= 256. */
1204 size_t min_total = UINT_MAX;
1205 size_t act_size = 256;
1207 if (!be_quiet)
1208 fputs (_("\
1209 Computing table size for character classes might take a while..."),
1210 stderr);
1212 while (act_size < min_total)
1214 size_t cnt[act_size];
1215 size_t act_planes = 1;
1217 memset (cnt, '\0', sizeof cnt);
1219 for (idx = 0; idx < 256; ++idx)
1220 cnt[idx] = 1;
1222 for (idx = 0; idx < ctype->charnames_act; ++idx)
1223 if (ctype->charnames[idx] >= 256)
1225 size_t nr = ctype->charnames[idx] % act_size;
1227 if (++cnt[nr] > act_planes)
1229 act_planes = cnt[nr];
1230 if (act_size * act_planes >= min_total)
1231 break;
1235 if (act_size * act_planes < min_total)
1237 min_total = act_size * act_planes;
1238 ctype->plane_size = act_size;
1239 ctype->plane_cnt = act_planes;
1242 ++act_size;
1245 if (!be_quiet)
1246 fputs (_(" done\n"), stderr);
1249 #if __BYTE_ORDER == __LITTLE_ENDIAN
1250 # define NAMES_B1 ctype->names_el
1251 # define NAMES_B2 ctype->names_eb
1252 #else
1253 # define NAMES_B1 ctype->names_eb
1254 # define NAMES_B2 ctype->names_el
1255 #endif
1257 ctype->names_eb = (u_int32_t *) xcalloc (ctype->plane_size
1258 * ctype->plane_cnt,
1259 sizeof (u_int32_t));
1260 ctype->names_el = (u_int32_t *) xcalloc (ctype->plane_size
1261 * ctype->plane_cnt,
1262 sizeof (u_int32_t));
1264 for (idx = 1; idx < 256; ++idx)
1265 NAMES_B1[idx] = idx;
1267 /* Trick: change the 0th entry's name to 1 to mark the cell occupied. */
1268 NAMES_B1[0] = 1;
1270 for (idx = 256; idx < ctype->charnames_act; ++idx)
1272 size_t nr = (ctype->charnames[idx] % ctype->plane_size);
1273 size_t depth = 0;
1275 while (NAMES_B1[nr + depth * ctype->plane_size])
1276 ++depth;
1277 assert (depth < ctype->plane_cnt);
1279 NAMES_B1[nr + depth * ctype->plane_size] = ctype->charnames[idx];
1281 /* Now for faster access remember the index in the NAMES_B array. */
1282 ctype->charnames[idx] = nr + depth * ctype->plane_size;
1284 NAMES_B1[0] = 0;
1286 for (idx = 0; idx < ctype->plane_size * ctype->plane_cnt; ++idx)
1287 NAMES_B2[idx] = SWAPU32 (NAMES_B1[idx]);
1290 /* You wonder about this amount of memory? This is only because some
1291 users do not manage to address the array with unsigned values or
1292 data types with range >= 256. '\200' would result in the array
1293 index -128. To help these poor people we duplicate the entries for
1294 128 up to 255 below the entry for \0. */
1295 ctype->ctype_b = (char_class_t *) xcalloc (256 + 128,
1296 sizeof (char_class_t));
1297 ctype->ctype32_b = (char_class32_t *) xcalloc (ctype->plane_size
1298 * ctype->plane_cnt,
1299 sizeof (char_class32_t));
1301 /* Fill in the character class information. */
1302 #if __BYTE_ORDER == __LITTLE_ENDIAN
1303 # define TRANS(w) CHAR_CLASS_TRANS (w)
1304 # define TRANS32(w) CHAR_CLASS32_TRANS (w)
1305 #else
1306 # define TRANS(w) (w)
1307 # define TRANS32(w) (w)
1308 #endif
1310 for (idx = 0; idx < ctype->class_collection_act; ++idx)
1311 if (ctype->charnames[idx] < 256)
1312 ctype->ctype_b[128 + ctype->charnames[idx]]
1313 = TRANS (ctype->class_collection[idx]);
1315 /* Mirror first 127 entries. We must take care that entry -1 is not
1316 mirrored because EOF == -1. */
1317 for (idx = 0; idx < 127; ++idx)
1318 ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
1320 /* The 32 bit array contains all characters. */
1321 for (idx = 0; idx < ctype->class_collection_act; ++idx)
1322 ctype->ctype32_b[ctype->charnames[idx]]
1323 = TRANS32 (ctype->class_collection[idx]);
1325 /* Room for table of mappings. */
1326 ctype->map_eb = (u_int32_t **) xmalloc (ctype->map_collection_nr
1327 * sizeof (u_int32_t *));
1328 ctype->map_el = (u_int32_t **) xmalloc (ctype->map_collection_nr
1329 * sizeof (u_int32_t *));
1331 /* Fill in all mappings. */
1332 for (idx = 0; idx < ctype->map_collection_nr; ++idx)
1334 unsigned int idx2;
1336 /* Allocate table. */
1337 ctype->map_eb[idx] = (u_int32_t *) xmalloc ((ctype->plane_size
1338 * ctype->plane_cnt + 128)
1339 * sizeof (u_int32_t));
1340 ctype->map_el[idx] = (u_int32_t *) xmalloc ((ctype->plane_size
1341 * ctype->plane_cnt + 128)
1342 * sizeof (u_int32_t));
1344 #if __BYTE_ORDER == __LITTLE_ENDIAN
1345 # define MAP_B1 ctype->map_el
1346 # define MAP_B2 ctype->map_eb
1347 #else
1348 # define MAP_B1 ctype->map_eb
1349 # define MAP_B2 ctype->map_el
1350 #endif
1352 /* Copy default value (identity mapping). */
1353 memcpy (&MAP_B1[idx][128], NAMES_B1,
1354 ctype->plane_size * ctype->plane_cnt * sizeof (u_int32_t));
1356 /* Copy values from collection. */
1357 for (idx2 = 0; idx2 < ctype->map_collection_act[idx]; ++idx2)
1358 if (ctype->map_collection[idx][idx2] != 0)
1359 MAP_B1[idx][128 + ctype->charnames[idx2]] =
1360 ctype->map_collection[idx][idx2];
1362 /* Mirror first 127 entries. We must take care not to map entry
1363 -1 because EOF == -1. */
1364 for (idx2 = 0; idx2 < 127; ++idx2)
1365 MAP_B1[idx][idx2] = MAP_B1[idx][256 + idx2];
1367 /* EOF must map to EOF. */
1368 MAP_B1[idx][127] = EOF;
1370 /* And now the other byte order. */
1371 for (idx2 = 0; idx2 < ctype->plane_size * ctype->plane_cnt + 128; ++idx2)
1372 MAP_B2[idx][idx2] = SWAPU32 (MAP_B1[idx][idx2]);
1375 /* Extra array for class and map names. */
1376 ctype->class_name_ptr = (u_int32_t *) xmalloc (ctype->nr_charclass
1377 * sizeof (u_int32_t));
1378 ctype->map_name_ptr = (u_int32_t *) xmalloc (ctype->map_collection_nr
1379 * sizeof (u_int32_t));
1381 /* Array for width information. Because the expected width are very
1382 small we use only one single byte. This save space and we need
1383 not provide the information twice with both endianesses. */
1384 ctype->width = (unsigned char *) xmalloc (ctype->plane_size
1385 * ctype->plane_cnt);
1386 /* Initialize with default width value. */
1387 memset (ctype->width, charset->width_default,
1388 ctype->plane_size * ctype->plane_cnt);
1389 if (charset->width_rules != NULL)
1391 size_t cnt;
1393 for (cnt = 0; cnt < charset->nwidth_rules; ++cnt)
1394 if (charset->width_rules[cnt].width != charset->width_default)
1395 for (idx = charset->width_rules[cnt].from;
1396 idx <= charset->width_rules[cnt].to; ++idx)
1398 size_t nr = idx % ctype->plane_size;
1399 size_t depth = 0;
1401 while (NAMES_B1[nr + depth * ctype->plane_size] != nr)
1402 ++depth;
1403 assert (depth < ctype->plane_cnt);
1405 ctype->width[nr + depth * ctype->plane_size]
1406 = charset->width_rules[cnt].width;
1410 /* Compute MB_CUR_MAX. Please note the value mb_cur_max in the
1411 character set definition gives the number of bytes in the wide
1412 character representation. We compute the number of bytes used
1413 for the UTF-8 encoded form. */
1414 ctype->mb_cur_max = ((int []) { 2, 3, 5, 6 }) [charset->mb_cur_max - 1];
1416 /* We need the name of the currently used 8-bit character set to
1417 make correct conversion between this 8-bit representation and the
1418 ISO 10646 character set used internally for wide characters. */
1419 ctype->codeset_name = charset->code_set_name;