Update.
[glibc.git] / locale / programs / ld-ctype.c
blobe2ebf26254687fa51a20a89124817b3bd89a9db7
1 /* Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
20 #ifdef HAVE_CONFIG_H
21 # include <config.h>
22 #endif
24 #include <alloca.h>
25 #include <endian.h>
26 #include <limits.h>
27 #include <string.h>
29 #include "locales.h"
30 #include "localeinfo.h"
31 #include "langinfo.h"
32 #include "locfile-token.h"
33 #include "stringtrans.h"
35 /* Uncomment the following line in the production version. */
36 /* define NDEBUG 1 */
37 #include <assert.h>
40 void *xmalloc (size_t __n);
41 void *xcalloc (size_t __n, size_t __s);
42 void *xrealloc (void *__ptr, size_t __n);
45 /* The bit used for representing a special class. */
46 #define BITPOS(class) ((class) - tok_upper)
47 #define BIT(class) (1 << BITPOS (class))
49 #define ELEM(ctype, collection, idx, value) \
50 *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \
51 &ctype->collection##_act idx, value)
53 #define SWAPU32(w) \
54 (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24))
56 #define SWAPU16(w) \
57 ((((w) >> 8) & 0xff) | (((w) & 0xff) << 8))
60 /* To be compatible with former implementations we for now restrict
61 the number of bits for character classes to 16. When compatibility
62 is not necessary anymore increase the number to 32. */
63 #define char_class_t u_int16_t
64 #define CHAR_CLASS_TRANS SWAPU16
65 #define char_class32_t u_int32_t
66 #define CHAR_CLASS32_TRANS SWAPU32
69 /* The real definition of the struct for the LC_CTYPE locale. */
70 struct locale_ctype_t
72 unsigned int *charnames;
73 size_t charnames_max;
74 size_t charnames_act;
76 /* We will allow up to 8 * sizeof(u_int32_t) - 1 character classes. */
77 #define MAX_NR_CHARCLASS (8 * sizeof (u_int32_t) - 1)
78 size_t nr_charclass;
79 const char *classnames[MAX_NR_CHARCLASS];
80 unsigned long int current_class_mask;
81 unsigned int last_class_char;
82 u_int32_t *class_collection;
83 size_t class_collection_max;
84 size_t class_collection_act;
85 unsigned long int class_done;
87 /* If the following number ever turns out to be too small simply
88 increase it. But I doubt it will. --drepper@gnu */
89 #define MAX_NR_CHARMAP 16
90 const char *mapnames[MAX_NR_CHARMAP];
91 u_int32_t *map_collection[MAX_NR_CHARMAP];
92 size_t map_collection_max[MAX_NR_CHARMAP];
93 size_t map_collection_act[MAX_NR_CHARMAP];
94 size_t map_collection_nr;
95 size_t last_map_idx;
96 unsigned int from_map_char;
97 int toupper_done;
98 int tolower_done;
100 /* The arrays for the binary representation. */
101 u_int32_t plane_size;
102 u_int32_t plane_cnt;
103 char_class_t *ctype_b;
104 char_class32_t *ctype32_b;
105 u_int32_t *names_el;
106 u_int32_t *names_eb;
107 u_int32_t **map_eb;
108 u_int32_t **map_el;
109 u_int32_t *class_name_ptr;
110 u_int32_t *map_name_ptr;
111 unsigned char *width;
112 u_int32_t mb_cur_max;
113 const char *codeset_name;
117 /* Prototypes for local functions. */
118 static void ctype_class_newP (struct linereader *lr,
119 struct locale_ctype_t *ctype, const char *name);
120 static void ctype_map_newP (struct linereader *lr,
121 struct locale_ctype_t *ctype,
122 const char *name, struct charset_t *charset);
123 static u_int32_t *find_idx (struct locale_ctype_t *ctype, u_int32_t **table,
124 size_t *max, size_t *act, unsigned int idx);
125 static void set_class_defaults (struct locale_ctype_t *ctype,
126 struct charset_t *charset);
127 static void allocate_arrays (struct locale_ctype_t *ctype,
128 struct charset_t *charset);
131 void
132 ctype_startup (struct linereader *lr, struct localedef_t *locale,
133 struct charset_t *charset)
135 unsigned int cnt;
136 struct locale_ctype_t *ctype;
138 /* It is important that we always use UCS1 encoding for strings now. */
139 encoding_method = ENC_UCS1;
141 /* Allocate the needed room. */
142 locale->categories[LC_CTYPE].ctype = ctype =
143 (struct locale_ctype_t *) xmalloc (sizeof (struct locale_ctype_t));
145 /* We have no names seen yet. */
146 ctype->charnames_max = charset->mb_cur_max == 1 ? 256 : 512;
147 ctype->charnames =
148 (unsigned int *) xmalloc (ctype->charnames_max * sizeof (unsigned int));
149 for (cnt = 0; cnt < 256; ++cnt)
150 ctype->charnames[cnt] = cnt;
151 ctype->charnames_act = 256;
153 /* Fill character class information. */
154 ctype->nr_charclass = 0;
155 ctype->current_class_mask = 0;
156 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
157 /* The order of the following instructions determines the bit
158 positions! */
159 ctype_class_newP (lr, ctype, "upper");
160 ctype_class_newP (lr, ctype, "lower");
161 ctype_class_newP (lr, ctype, "alpha");
162 ctype_class_newP (lr, ctype, "digit");
163 ctype_class_newP (lr, ctype, "xdigit");
164 ctype_class_newP (lr, ctype, "space");
165 ctype_class_newP (lr, ctype, "print");
166 ctype_class_newP (lr, ctype, "graph");
167 ctype_class_newP (lr, ctype, "blank");
168 ctype_class_newP (lr, ctype, "cntrl");
169 ctype_class_newP (lr, ctype, "punct");
170 ctype_class_newP (lr, ctype, "alnum");
172 ctype->class_collection_max = charset->mb_cur_max == 1 ? 256 : 512;
173 ctype->class_collection
174 = (u_int32_t *) xmalloc (sizeof (unsigned long int)
175 * ctype->class_collection_max);
176 memset (ctype->class_collection, '\0',
177 sizeof (unsigned long int) * ctype->class_collection_max);
178 ctype->class_collection_act = 256;
180 /* Fill character map information. */
181 ctype->map_collection_nr = 0;
182 ctype->last_map_idx = MAX_NR_CHARMAP;
183 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
184 ctype_map_newP (lr, ctype, "toupper", charset);
185 ctype_map_newP (lr, ctype, "tolower", charset);
187 /* Fill first 256 entries in `toupper' and `tolower' arrays. */
188 for (cnt = 0; cnt < 256; ++cnt)
190 ctype->map_collection[0][cnt] = cnt;
191 ctype->map_collection[1][cnt] = cnt;
196 void
197 ctype_finish (struct localedef_t *locale, struct charset_t *charset)
199 /* See POSIX.2, table 2-6 for the meaning of the following table. */
200 #define NCLASS 12
201 static const struct
203 const char *name;
204 const char allow[NCLASS];
206 valid_table[NCLASS] =
208 /* The order is important. See token.h for more information.
209 M = Always, D = Default, - = Permitted, X = Mutually exclusive */
210 { "upper", "--MX-XDDXXX-" },
211 { "lower", "--MX-XDDXXX-" },
212 { "alpha", "---X-XDDXXX-" },
213 { "digit", "XXX--XDDXXX-" },
214 { "xdigit", "-----XDDXXX-" },
215 { "space", "XXXXX------X" },
216 { "print", "---------X--" },
217 { "graph", "---------X--" },
218 { "blank", "XXXXXM-----X" },
219 { "cntrl", "XXXXX-XX--XX" },
220 { "punct", "XXXXX-DD-X-X" },
221 { "alnum", "-----XDDXXX-" }
223 size_t cnt;
224 int cls1, cls2;
225 unsigned int space_value;
226 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
228 /* Set default value for classes not specified. */
229 set_class_defaults (ctype, charset);
231 /* Check according to table. */
232 for (cnt = 0; cnt < ctype->class_collection_max; ++cnt)
234 unsigned long int tmp;
236 tmp = ctype->class_collection[cnt];
237 if (tmp == 0)
238 continue;
240 for (cls1 = 0; cls1 < NCLASS; ++cls1)
241 if ((tmp & (1 << cls1)) != 0)
242 for (cls2 = 0; cls2 < NCLASS; ++cls2)
243 if (valid_table[cls1].allow[cls2] != '-')
245 int eq = (tmp & (1 << cls2)) != 0;
246 switch (valid_table[cls1].allow[cls2])
248 case 'M':
249 if (!eq)
251 char buf[17];
252 char *cp = buf;
253 unsigned int value;
255 value = ctype->charnames[cnt];
257 if ((value & 0xff000000) != 0)
258 cp += sprintf (cp, "\\%o", (value >> 24) & 0xff);
259 if ((value & 0xffff0000) != 0)
260 cp += sprintf (cp, "\\%o", (value >> 16) & 0xff);
261 if ((value & 0xffffff00) != 0)
262 cp += sprintf (cp, "\\%o", (value >> 8) & 0xff);
263 sprintf (cp, "\\%o", value & 0xff);
265 if (!be_quiet)
266 error (0, 0, _("\
267 character %s'%s' in class `%s' must be in class `%s'"), value > 256 ? "L" : "",
268 cp, valid_table[cls1].name,
269 valid_table[cls2].name);
271 break;
273 case 'X':
274 if (eq)
276 char buf[17];
277 char *cp = buf;
278 unsigned int value;
280 value = ctype->charnames[cnt];
282 if ((value & 0xff000000) != 0)
283 cp += sprintf (cp, "\\%o", value >> 24);
284 if ((value & 0xffff0000) != 0)
285 cp += sprintf (cp, "\\%o", (value >> 16) & 0xff);
286 if ((value & 0xffffff00) != 0)
287 cp += sprintf (cp, "\\%o", (value >> 8) & 0xff);
288 sprintf (cp, "\\%o", value & 0xff);
290 if (!be_quiet)
291 error (0, 0, _("\
292 character %s'%s' in class `%s' must not be in class `%s'"),
293 value > 256 ? "L" : "", cp,
294 valid_table[cls1].name,
295 valid_table[cls2].name);
297 break;
299 case 'D':
300 ctype->class_collection[cnt] |= 1 << cls2;
301 break;
303 default:
304 error (5, 0, _("internal error in %s, line %u"),
305 __FUNCTION__, __LINE__);
310 /* ... and now test <SP> as a special case. */
311 space_value = charset_find_value (charset, "SP", 2);
312 if ((wchar_t) space_value == ILLEGAL_CHAR_VALUE && !be_quiet)
313 error (0, 0, _("character <SP> not defined in character map"));
314 else if (((cnt = BITPOS (tok_space),
315 (ELEM (ctype, class_collection, , space_value)
316 & BIT (tok_space)) == 0)
317 || (cnt = BITPOS (tok_blank),
318 (ELEM (ctype, class_collection, , space_value)
319 & BIT (tok_blank)) == 0))
320 && !be_quiet)
321 error (0, 0, _("<SP> character not in class `%s'"),
322 valid_table[cnt].name);
323 else if (((cnt = BITPOS (tok_punct),
324 (ELEM (ctype, class_collection, , space_value)
325 & BIT (tok_punct)) != 0)
326 || (cnt = BITPOS (tok_graph),
327 (ELEM (ctype, class_collection, , space_value)
328 & BIT (tok_graph))
329 != 0))
330 && !be_quiet)
331 error (0, 0, _("<SP> character must not be in class `%s'"),
332 valid_table[cnt].name);
333 else
334 ELEM (ctype, class_collection, , space_value) |= BIT (tok_print);
336 /* Now that the tests are done make sure the name array contains all
337 characters which are handled in the WIDTH section of the
338 character set definition file. */
339 if (charset->width_rules != NULL)
340 for (cnt = 0; cnt < charset->nwidth_rules; ++cnt)
342 size_t inner;
343 for (inner = charset->width_rules[cnt].from;
344 inner <= charset->width_rules[cnt].to; ++inner)
345 (void) find_idx (ctype, NULL, NULL, NULL, inner);
350 void
351 ctype_output (struct localedef_t *locale, struct charset_t *charset,
352 const char *output_path)
354 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
355 const size_t nelems = (_NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)
356 + 2 * (ctype->map_collection_nr - 2));
357 struct iovec iov[2 + nelems + ctype->nr_charclass
358 + ctype->map_collection_nr];
359 struct locale_file data;
360 u_int32_t idx[nelems];
361 size_t elem, cnt, offset, total;
364 if ((locale->binary & (1 << LC_CTYPE)) != 0)
366 iov[0].iov_base = ctype;
367 iov[0].iov_len = locale->len[LC_CTYPE];
369 write_locale_data (output_path, "LC_CTYPE", 1, iov);
371 return;
375 /* Now prepare the output: Find the sizes of the table we can use. */
376 allocate_arrays (ctype, charset);
378 data.magic = LIMAGIC (LC_CTYPE);
379 data.n = nelems;
380 iov[0].iov_base = (void *) &data;
381 iov[0].iov_len = sizeof (data);
383 iov[1].iov_base = (void *) idx;
384 iov[1].iov_len = sizeof (idx);
386 idx[0] = iov[0].iov_len + iov[1].iov_len;
387 offset = 0;
389 for (elem = 0; elem < nelems; ++elem)
391 if (elem < _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE))
392 switch (elem)
394 #define CTYPE_DATA(name, base, len) \
395 case _NL_ITEM_INDEX (name): \
396 iov[2 + elem + offset].iov_base = (base); \
397 iov[2 + elem + offset].iov_len = (len); \
398 if (elem + 1 < nelems) \
399 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; \
400 break
402 CTYPE_DATA (_NL_CTYPE_CLASS,
403 ctype->ctype_b,
404 (256 + 128) * sizeof (char_class_t));
406 CTYPE_DATA (_NL_CTYPE_TOUPPER_EB,
407 ctype->map_eb[0],
408 (ctype->plane_size * ctype->plane_cnt + 128)
409 * sizeof (u_int32_t));
410 CTYPE_DATA (_NL_CTYPE_TOLOWER_EB,
411 ctype->map_eb[1],
412 (ctype->plane_size * ctype->plane_cnt + 128)
413 * sizeof (u_int32_t));
415 CTYPE_DATA (_NL_CTYPE_TOUPPER_EL,
416 ctype->map_el[0],
417 (ctype->plane_size * ctype->plane_cnt + 128)
418 * sizeof (u_int32_t));
419 CTYPE_DATA (_NL_CTYPE_TOLOWER_EL,
420 ctype->map_el[1],
421 (ctype->plane_size * ctype->plane_cnt + 128)
422 * sizeof (u_int32_t));
424 CTYPE_DATA (_NL_CTYPE_CLASS32,
425 ctype->ctype32_b,
426 (ctype->plane_size * ctype->plane_cnt
427 * sizeof (char_class32_t)));
429 CTYPE_DATA (_NL_CTYPE_NAMES_EB,
430 ctype->names_eb, (ctype->plane_size * ctype->plane_cnt
431 * sizeof (u_int32_t)));
432 CTYPE_DATA (_NL_CTYPE_NAMES_EL,
433 ctype->names_el, (ctype->plane_size * ctype->plane_cnt
434 * sizeof (u_int32_t)));
436 CTYPE_DATA (_NL_CTYPE_HASH_SIZE,
437 &ctype->plane_size, sizeof (u_int32_t));
438 CTYPE_DATA (_NL_CTYPE_HASH_LAYERS,
439 &ctype->plane_cnt, sizeof (u_int32_t));
441 case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
442 /* The class name array. */
443 total = 0;
444 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt, ++offset)
446 iov[2 + elem + offset].iov_base
447 = (void *) ctype->classnames[cnt];
448 iov[2 + elem + offset].iov_len
449 = strlen (ctype->classnames[cnt]) + 1;
450 total += iov[2 + elem + offset].iov_len;
452 iov[2 + elem + offset].iov_base = (void *) "\0\0\0";
453 iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
454 total += 1 + (4 - ((total + 1) % 4));
456 if (elem + 1 < nelems)
457 idx[elem + 1] = idx[elem] + total;
458 break;
460 case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
461 /* The class name array. */
462 total = 0;
463 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt, ++offset)
465 iov[2 + elem + offset].iov_base
466 = (void *) ctype->mapnames[cnt];
467 iov[2 + elem + offset].iov_len
468 = strlen (ctype->mapnames[cnt]) + 1;
469 total += iov[2 + elem + offset].iov_len;
471 iov[2 + elem + offset].iov_base = (void *) "\0\0\0";
472 iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
473 total += 1 + (4 - ((total + 1) % 4));
475 if (elem + 1 < nelems)
476 idx[elem + 1] = idx[elem] + total;
477 break;
479 CTYPE_DATA (_NL_CTYPE_WIDTH,
480 ctype->width, ctype->plane_size * ctype->plane_cnt);
482 CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX,
483 &ctype->mb_cur_max, sizeof (u_int32_t));
485 case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
486 total = strlen (ctype->codeset_name) + 1;
487 if (total % 4 == 0)
488 iov[2 + elem + offset].iov_base = (char *) ctype->codeset_name;
489 else
491 iov[2 + elem + offset].iov_base = alloca ((total + 3) & ~3);
492 memcpy (iov[2 + elem + offset].iov_base, ctype->codeset_name,
493 total);
494 total = (total + 3) & ~3;
496 iov[2 + elem + offset].iov_len = total;
497 if (elem + 1 < nelems)
498 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
499 break;
501 default:
502 assert (! "unknown CTYPE element");
504 else
506 /* Handle extra maps. */
507 size_t nr = (elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)) >> 1;
509 if (((elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)) & 1) == 0)
510 iov[2 + elem + offset].iov_base = ctype->map_eb[nr];
511 else
512 iov[2 + elem + offset].iov_base = ctype->map_el[nr];
514 iov[2 + elem + offset].iov_len = ((ctype->plane_size
515 * ctype->plane_cnt + 128)
516 * sizeof (u_int32_t));
518 if (elem + 1 < nelems)
519 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
523 assert (2 + elem + offset == (nelems + ctype->nr_charclass
524 + ctype->map_collection_nr + 2));
526 write_locale_data (output_path, "LC_CTYPE", 2 + elem + offset, iov);
530 /* Character class handling. */
531 void
532 ctype_class_new (struct linereader *lr, struct localedef_t *locale,
533 enum token_t tok, struct token *code,
534 struct charset_t *charset)
536 ctype_class_newP (lr, locale->categories[LC_CTYPE].ctype,
537 code->val.str.start);
542 ctype_is_charclass (struct linereader *lr, struct localedef_t *locale,
543 const char *name)
545 size_t cnt;
547 for (cnt = 0; cnt < locale->categories[LC_CTYPE].ctype->nr_charclass; ++cnt)
548 if (strcmp (name, locale->categories[LC_CTYPE].ctype->classnames[cnt])
549 == 0)
550 return 1;
552 return 0;
556 void
557 ctype_class_start (struct linereader *lr, struct localedef_t *locale,
558 enum token_t tok, const char *str,
559 struct charset_t *charset)
561 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
562 size_t cnt;
564 switch (tok)
566 case tok_upper:
567 str = "upper";
568 break;
569 case tok_lower:
570 str = "lower";
571 break;
572 case tok_alpha:
573 str = "alpha";
574 break;
575 case tok_digit:
576 str = "digit";
577 break;
578 case tok_xdigit:
579 str = "xdigit";
580 break;
581 case tok_space:
582 str = "space";
583 break;
584 case tok_print:
585 str = "print";
586 break;
587 case tok_graph:
588 str = "graph";
589 break;
590 case tok_blank:
591 str = "blank";
592 break;
593 case tok_cntrl:
594 str = "cntrl";
595 break;
596 case tok_punct:
597 str = "punct";
598 break;
599 case tok_alnum:
600 str = "alnum";
601 break;
602 case tok_ident:
603 break;
604 default:
605 assert (! "illegal token as class name: should not happen");
608 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
609 if (strcmp (str, ctype->classnames[cnt]) == 0)
610 break;
612 if (cnt >= ctype->nr_charclass)
613 assert (! "unknown class in class definition: should not happen");
615 ctype->class_done |= BIT (tok);
617 ctype->current_class_mask = 1 << cnt;
618 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
622 void
623 ctype_class_from (struct linereader *lr, struct localedef_t *locale,
624 struct token *code, struct charset_t *charset)
626 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
627 unsigned int value;
629 value = charset_find_value (charset, code->val.str.start, code->val.str.len);
631 ctype->last_class_char = value;
633 if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
634 /* In the LC_CTYPE category it is no error when a character is
635 not found. This has to be ignored silently. */
636 return;
638 *find_idx (ctype, &ctype->class_collection, &ctype->class_collection_max,
639 &ctype->class_collection_act, value)
640 |= ctype->current_class_mask;
644 void
645 ctype_class_to (struct linereader *lr, struct localedef_t *locale,
646 struct token *code, struct charset_t *charset)
648 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
649 unsigned int value, cnt;
651 value = charset_find_value (charset, code->val.str.start, code->val.str.len);
653 assert (value >= ctype->last_class_char);
655 for (cnt = ctype->last_class_char + 1; cnt <= value; ++cnt)
656 *find_idx (ctype, &ctype->class_collection, &ctype->class_collection_max,
657 &ctype->class_collection_act, cnt)
658 |= ctype->current_class_mask;
660 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
664 void
665 ctype_class_end (struct linereader *lr, struct localedef_t *locale)
667 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
669 /* We have no special actions to perform here. */
670 ctype->current_class_mask = 0;
671 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
675 /* Character map handling. */
676 void
677 ctype_map_new (struct linereader *lr, struct localedef_t *locale,
678 enum token_t tok, struct token *code,
679 struct charset_t *charset)
681 ctype_map_newP (lr, locale->categories[LC_CTYPE].ctype,
682 code->val.str.start, charset);
687 ctype_is_charconv (struct linereader *lr, struct localedef_t *locale,
688 const char *name)
690 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
691 size_t cnt;
693 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
694 if (strcmp (name, ctype->mapnames[cnt]) == 0)
695 return 1;
697 return 0;
701 void
702 ctype_map_start (struct linereader *lr, struct localedef_t *locale,
703 enum token_t tok, const char *name, struct charset_t *charset)
705 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
706 size_t cnt;
708 switch (tok)
710 case tok_toupper:
711 ctype->toupper_done = 1;
712 name = "toupper";
713 break;
714 case tok_tolower:
715 ctype->tolower_done = 1;
716 name = "tolower";
717 break;
718 case tok_ident:
719 break;
720 default:
721 assert (! "unknown token in category `LC_CTYPE' should not happen");
724 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
725 if (strcmp (name, ctype->mapnames[cnt]) == 0)
726 break;
728 if (cnt == ctype->map_collection_nr)
729 assert (! "unknown token in category `LC_CTYPE' should not happen");
731 ctype->last_map_idx = cnt;
732 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
736 void
737 ctype_map_from (struct linereader *lr, struct localedef_t *locale,
738 struct token *code, struct charset_t *charset)
740 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
741 unsigned int value;
743 value = charset_find_value (charset, code->val.str.start, code->val.str.len);
745 if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
746 /* In the LC_CTYPE category it is no error when a character is
747 not found. This has to be ignored silently. */
748 return;
750 assert (ctype->last_map_idx < ctype->map_collection_nr);
752 ctype->from_map_char = value;
756 void
757 ctype_map_to (struct linereader *lr, struct localedef_t *locale,
758 struct token *code, struct charset_t *charset)
760 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
761 unsigned int value;
763 value = charset_find_value (charset, code->val.str.start, code->val.str.len);
765 if ((wchar_t) ctype->from_map_char == ILLEGAL_CHAR_VALUE
766 || (wchar_t) value == ILLEGAL_CHAR_VALUE)
768 /* In the LC_CTYPE category it is no error when a character is
769 not found. This has to be ignored silently. */
770 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
771 return;
774 *find_idx (ctype, &ctype->map_collection[ctype->last_map_idx],
775 &ctype->map_collection_max[ctype->last_map_idx],
776 &ctype->map_collection_act[ctype->last_map_idx],
777 ctype->from_map_char) = value;
779 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
783 void
784 ctype_map_end (struct linereader *lr, struct localedef_t *locale)
786 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
788 ctype->last_map_idx = MAX_NR_CHARMAP;
789 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
793 /* Local functions. */
794 static void
795 ctype_class_newP (struct linereader *lr, struct locale_ctype_t *ctype,
796 const char *name)
798 size_t cnt;
800 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
801 if (strcmp (ctype->classnames[cnt], name) == 0)
802 break;
804 if (cnt < ctype->nr_charclass)
806 lr_error (lr, _("character class `%s' already defined"), name);
807 return;
810 if (ctype->nr_charclass == MAX_NR_CHARCLASS)
811 /* Exit code 2 is prescribed in P1003.2b. */
812 error (2, 0, _("\
813 implementation limit: no more than %d character classes allowed"),
814 MAX_NR_CHARCLASS);
816 ctype->classnames[ctype->nr_charclass++] = name;
820 static void
821 ctype_map_newP (struct linereader *lr, struct locale_ctype_t *ctype,
822 const char *name, struct charset_t *charset)
824 size_t max_chars = 0;
825 size_t cnt;
827 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
829 if (strcmp (ctype->mapnames[cnt], name) == 0)
830 break;
832 if (max_chars < ctype->map_collection_max[cnt])
833 max_chars = ctype->map_collection_max[cnt];
836 if (cnt < ctype->map_collection_nr)
838 lr_error (lr, _("character map `%s' already defined"), name);
839 return;
842 if (ctype->map_collection_nr == MAX_NR_CHARMAP)
843 /* Exit code 2 is prescribed in P1003.2b. */
844 error (2, 0, _("\
845 implementation limit: no more than %d character maps allowed"),
846 MAX_NR_CHARMAP);
848 ctype->mapnames[cnt] = name;
850 if (max_chars == 0)
851 ctype->map_collection_max[cnt] = charset->mb_cur_max == 1 ? 256 : 512;
852 else
853 ctype->map_collection_max[cnt] = max_chars;
855 ctype->map_collection[cnt] = (u_int32_t *)
856 xmalloc (sizeof (u_int32_t) * ctype->map_collection_max[cnt]);
857 memset (ctype->map_collection[cnt], '\0',
858 sizeof (u_int32_t) * ctype->map_collection_max[cnt]);
859 ctype->map_collection_act[cnt] = 256;
861 ++ctype->map_collection_nr;
865 /* We have to be prepared that TABLE, MAX, and ACT can be NULL. This
866 is possible if we only want ot extend the name array. */
867 static u_int32_t *
868 find_idx (struct locale_ctype_t *ctype, u_int32_t **table, size_t *max,
869 size_t *act, unsigned int idx)
871 size_t cnt;
873 if (idx < 256)
874 return table == NULL ? NULL : &(*table)[idx];
876 for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
877 if (ctype->charnames[cnt] == idx)
878 break;
880 /* We have to distinguish two cases: the names is found or not. */
881 if (cnt == ctype->charnames_act)
883 /* Extend the name array. */
884 if (ctype->charnames_act == ctype->charnames_max)
886 ctype->charnames_max *= 2;
887 ctype->charnames = (unsigned int *)
888 xrealloc (ctype->charnames,
889 sizeof (unsigned int) * ctype->charnames_max);
891 ctype->charnames[ctype->charnames_act++] = idx;
894 if (table == NULL)
895 /* We have done everything we are asked to do. */
896 return NULL;
898 if (cnt >= *act)
900 if (cnt >= *max)
902 size_t old_max = *max;
904 *max *= 2;
905 while (*max <= cnt);
907 *table =
908 (u_int32_t *) xrealloc (*table, *max * sizeof (unsigned long int));
909 memset (&(*table)[old_max], '\0',
910 (*max - old_max) * sizeof (u_int32_t));
913 (*table)[cnt] = 0;
914 *act = cnt;
917 return &(*table)[cnt];
921 static void
922 set_class_defaults (struct locale_ctype_t *ctype, struct charset_t *charset)
924 /* These function defines the default values for the classes and conversions
925 according to POSIX.2 2.5.2.1.
926 It may seem that the order of these if-blocks is arbitrary but it is NOT.
927 Don't move them unless you know what you do! */
929 void set_default (int bit, int from, int to)
931 char tmp[2];
932 int ch;
933 /* Define string. */
934 strcpy (tmp, "?");
936 for (ch = from; ch <= to; ++ch)
938 unsigned int value;
939 tmp[0] = ch;
941 value = charset_find_value (charset, tmp, 1);
942 if ((wchar_t) value == ILLEGAL_CHAR_VALUE && !be_quiet)
944 error (0, 0, _("\
945 character `%s' not defined while needed as default value"),
946 tmp);
947 continue;
949 else
950 ELEM (ctype, class_collection, , value) |= bit;
954 /* Set default values if keyword was not present. */
955 if ((ctype->class_done & BIT (tok_upper)) == 0)
956 /* "If this keyword [lower] is not specified, the lowercase letters
957 `A' through `Z', ..., shall automatically belong to this class,
958 with implementation defined character values." [P1003.2, 2.5.2.1] */
959 set_default (BIT (tok_upper), 'A', 'Z');
961 if ((ctype->class_done & BIT (tok_lower)) == 0)
962 /* "If this keyword [lower] is not specified, the lowercase letters
963 `a' through `z', ..., shall automatically belong to this class,
964 with implementation defined character values." [P1003.2, 2.5.2.1] */
965 set_default (BIT (tok_lower), 'a', 'z');
967 if ((ctype->class_done & BIT (tok_alpha)) == 0)
969 /* Table 2-6 in P1003.2 says that characters in class `upper' or
970 class `lower' *must* be in class `alpha'. */
971 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
972 size_t cnt;
974 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
975 if ((ctype->class_collection[cnt] & mask) != 0)
976 ctype->class_collection[cnt] |= BIT (tok_alpha);
979 if ((ctype->class_done & BIT (tok_digit)) == 0)
980 /* "If this keyword [digit] is not specified, the digits `0' through
981 `9', ..., shall automatically belong to this class, with
982 implementation-defined character values." [P1003.2, 2.5.2.1] */
983 set_default (BIT (tok_digit), '0', '9');
985 /* "Only characters specified for the `alpha' and `digit' keyword
986 shall be specified. Characters specified for the keyword `alpha'
987 and `digit' are automatically included in this class. */
989 unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
990 size_t cnt;
992 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
993 if ((ctype->class_collection[cnt] & mask) != 0)
994 ctype->class_collection[cnt] |= BIT (tok_alnum);
997 if ((ctype->class_done & BIT (tok_space)) == 0)
998 /* "If this keyword [space] is not specified, the characters <space>,
999 <form-feed>, <newline>, <carriage-return>, <tab>, and
1000 <vertical-tab>, ..., shall automatically belong to this class,
1001 with implementation-defined character values." [P1003.2, 2.5.2.1] */
1003 unsigned int value;
1005 value = charset_find_value (charset, "space", 5);
1006 if ((wchar_t) value == ILLEGAL_CHAR_VALUE && !be_quiet)
1007 error (0, 0, _("\
1008 character `%s' not defined while needed as default value"),
1009 "<space>");
1010 else
1011 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1013 value = charset_find_value (charset, "form-feed", 9);
1014 if ((wchar_t) value == ILLEGAL_CHAR_VALUE && !be_quiet)
1015 error (0, 0, _("\
1016 character `%s' not defined while needed as default value"),
1017 "<form-feed>");
1018 else
1019 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1021 value = charset_find_value (charset, "newline", 7);
1022 if ((wchar_t) value == ILLEGAL_CHAR_VALUE && !be_quiet)
1023 error (0, 0, _("\
1024 character `%s' not defined while needed as default value"),
1025 "<newline>");
1026 else
1027 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1029 value = charset_find_value (charset, "carriage-return", 15);
1030 if ((wchar_t) value == ILLEGAL_CHAR_VALUE && !be_quiet)
1031 error (0, 0, _("\
1032 character `%s' not defined while needed as default value"),
1033 "<carriage-return>");
1034 else
1035 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1037 value = charset_find_value (charset, "tab", 3);
1038 if ((wchar_t) value == ILLEGAL_CHAR_VALUE && !be_quiet)
1039 error (0, 0, _("\
1040 character `%s' not defined while needed as default value"),
1041 "<tab>");
1042 else
1043 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1045 value = charset_find_value (charset, "vertical-tab", 12);
1046 if ((wchar_t) value == ILLEGAL_CHAR_VALUE && !be_quiet)
1047 error (0, 0, _("\
1048 character `%s' not defined while needed as default value"),
1049 "<vertical-tab>");
1050 else
1051 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1054 if ((ctype->class_done & BIT (tok_xdigit)) == 0)
1055 /* "If this keyword is not specified, the digits `0' to `9', the
1056 uppercase letters `A' through `F', and the lowercase letters `a'
1057 through `f', ..., shell automatically belong to this class, with
1058 implementation defined character values." [P1003.2, 2.5.2.1] */
1060 set_default (BIT (tok_xdigit), '0', '9');
1061 set_default (BIT (tok_xdigit), 'A', 'F');
1062 set_default (BIT (tok_xdigit), 'a', 'f');
1065 if ((ctype->class_done & BIT (tok_blank)) == 0)
1066 /* "If this keyword [blank] is unspecified, the characters <space> and
1067 <tab> shall belong to this character class." [P1003.2, 2.5.2.1] */
1069 unsigned int value;
1071 value = charset_find_value (charset, "space", 5);
1072 if ((wchar_t) value == ILLEGAL_CHAR_VALUE && !be_quiet)
1073 error (0, 0, _("\
1074 character `%s' not defined while needed as default value"),
1075 "<space>");
1076 else
1077 ELEM (ctype, class_collection, , value) |= BIT (tok_blank);
1079 value = charset_find_value (charset, "tab", 3);
1080 if ((wchar_t) value == ILLEGAL_CHAR_VALUE && !be_quiet)
1081 error (0, 0, _("\
1082 character `%s' not defined while needed as default value"),
1083 "<tab>");
1084 else
1085 ELEM (ctype, class_collection, , value) |= BIT (tok_blank);
1088 if ((ctype->class_done & BIT (tok_graph)) == 0)
1089 /* "If this keyword [graph] is not specified, characters specified for
1090 the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
1091 shall belong to this character class." [P1003.2, 2.5.2.1] */
1093 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
1094 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
1095 size_t cnt;
1097 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
1098 if ((ctype->class_collection[cnt] & mask) != 0)
1099 ctype->class_collection[cnt] |= BIT (tok_graph);
1102 if ((ctype->class_done & BIT (tok_print)) == 0)
1103 /* "If this keyword [print] is not provided, characters specified for
1104 the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
1105 and the <space> character shall belong to this character class."
1106 [P1003.2, 2.5.2.1] */
1108 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
1109 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
1110 size_t cnt;
1111 wchar_t space;
1113 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
1114 if ((ctype->class_collection[cnt] & mask) != 0)
1115 ctype->class_collection[cnt] |= BIT (tok_print);
1117 space = charset_find_value (charset, "space", 5);
1118 if (space == ILLEGAL_CHAR_VALUE && !be_quiet)
1119 error (0, 0, _("\
1120 character `%s' not defined while needed as default value"),
1121 "<space>");
1122 else
1123 ELEM (ctype, class_collection, , space) |= BIT (tok_print);
1126 if (ctype->toupper_done == 0)
1127 /* "If this keyword [toupper] is not specified, the lowercase letters
1128 `a' through `z', and their corresponding uppercase letters `A' to
1129 `Z', ..., shall automatically be included, with implementation-
1130 defined character values." [P1003.2, 2.5.2.1] */
1132 char tmp[4];
1133 int ch;
1135 strcpy (tmp, "<?>");
1137 for (ch = 'a'; ch <= 'z'; ++ch)
1139 unsigned int value_from, value_to;
1141 tmp[1] = (char) ch;
1143 value_from = charset_find_value (charset, &tmp[1], 1);
1144 if ((wchar_t) value_from == ILLEGAL_CHAR_VALUE && !be_quiet)
1146 error (0, 0, _("\
1147 character `%s' not defined while needed as default value"),
1148 tmp);
1149 continue;
1152 /* This conversion is implementation defined. */
1153 tmp[1] = (char) (ch + ('A' - 'a'));
1154 value_to = charset_find_value (charset, &tmp[1], 1);
1155 if ((wchar_t) value_to == ILLEGAL_CHAR_VALUE && !be_quiet)
1157 error (0, 0, _("\
1158 character `%s' not defined while needed as default value"),
1159 tmp);
1160 continue;
1163 /* The index [0] is determined by the order of the
1164 `ctype_map_newP' calls in `ctype_startup'. */
1165 ELEM (ctype, map_collection, [0], value_from) = value_to;
1169 if (ctype->tolower_done == 0)
1170 /* "If this keyword [tolower] is not specified, the mapping shall be
1171 the reverse mapping of the one specified to `toupper'." [P1003.2] */
1173 size_t cnt;
1175 for (cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
1176 if (ctype->map_collection[0][cnt] != 0)
1177 ELEM (ctype, map_collection, [1],
1178 ctype->map_collection[0][cnt])
1179 = ctype->charnames[cnt];
1184 static void
1185 allocate_arrays (struct locale_ctype_t *ctype, struct charset_t *charset)
1187 size_t idx;
1189 /* First we have to decide how we organize the arrays. It is easy
1190 for a one-byte character set. But multi-byte character set
1191 cannot be stored flat because the chars might be sparsely used.
1192 So we determine an optimal hashing function for the used
1193 characters.
1195 We use a very trivial hashing function to store the sparse
1196 table. CH % TABSIZE is used as an index. To solve multiple hits
1197 we have N planes. This guarantees a fixed search time for a
1198 character [N / 2]. In the following code we determine the minmum
1199 value for TABSIZE * N, where TABSIZE >= 256. */
1200 size_t min_total = UINT_MAX;
1201 size_t act_size = 256;
1203 if (!be_quiet)
1204 fputs (_("\
1205 Computing table size for character classes might take a while..."),
1206 stderr);
1208 while (act_size < min_total)
1210 size_t cnt[act_size];
1211 size_t act_planes = 1;
1213 memset (cnt, '\0', sizeof cnt);
1215 for (idx = 0; idx < 256; ++idx)
1216 cnt[idx] = 1;
1218 for (idx = 0; idx < ctype->charnames_act; ++idx)
1219 if (ctype->charnames[idx] >= 256)
1221 size_t nr = ctype->charnames[idx] % act_size;
1223 if (++cnt[nr] > act_planes)
1225 act_planes = cnt[nr];
1226 if (act_size * act_planes >= min_total)
1227 break;
1231 if (act_size * act_planes < min_total)
1233 min_total = act_size * act_planes;
1234 ctype->plane_size = act_size;
1235 ctype->plane_cnt = act_planes;
1238 ++act_size;
1241 if (!be_quiet)
1242 fputs (_(" done\n"), stderr);
1245 #if __BYTE_ORDER == __LITTLE_ENDIAN
1246 # define NAMES_B1 ctype->names_el
1247 # define NAMES_B2 ctype->names_eb
1248 #else
1249 # define NAMES_B1 ctype->names_eb
1250 # define NAMES_B2 ctype->names_el
1251 #endif
1253 ctype->names_eb = (u_int32_t *) xcalloc (ctype->plane_size
1254 * ctype->plane_cnt,
1255 sizeof (u_int32_t));
1256 ctype->names_el = (u_int32_t *) xcalloc (ctype->plane_size
1257 * ctype->plane_cnt,
1258 sizeof (u_int32_t));
1260 for (idx = 1; idx < 256; ++idx)
1261 NAMES_B1[idx] = idx;
1263 /* Trick: change the 0th entry's name to 1 to mark the cell occupied. */
1264 NAMES_B1[0] = 1;
1266 for (idx = 256; idx < ctype->charnames_act; ++idx)
1268 size_t nr = (ctype->charnames[idx] % ctype->plane_size);
1269 size_t depth = 0;
1271 while (NAMES_B1[nr + depth * ctype->plane_size])
1272 ++depth;
1273 assert (depth < ctype->plane_cnt);
1275 NAMES_B1[nr + depth * ctype->plane_size] = ctype->charnames[idx];
1277 /* Now for faster access remember the index in the NAMES_B array. */
1278 ctype->charnames[idx] = nr + depth * ctype->plane_size;
1280 NAMES_B1[0] = 0;
1282 for (idx = 0; idx < ctype->plane_size * ctype->plane_cnt; ++idx)
1283 NAMES_B2[idx] = SWAPU32 (NAMES_B1[idx]);
1286 /* You wonder about this amount of memory? This is only because some
1287 users do not manage to address the array with unsigned values or
1288 data types with range >= 256. '\200' would result in the array
1289 index -128. To help these poor people we duplicate the entries for
1290 128 up to 255 below the entry for \0. */
1291 ctype->ctype_b = (char_class_t *) xcalloc (256 + 128,
1292 sizeof (char_class_t));
1293 ctype->ctype32_b = (char_class32_t *) xcalloc (ctype->plane_size
1294 * ctype->plane_cnt,
1295 sizeof (char_class32_t));
1297 /* Fill in the character class information. */
1298 #if __BYTE_ORDER == __LITTLE_ENDIAN
1299 # define TRANS(w) CHAR_CLASS_TRANS (w)
1300 # define TRANS32(w) CHAR_CLASS32_TRANS (w)
1301 #else
1302 # define TRANS(w) (w)
1303 # define TRANS32(w) (w)
1304 #endif
1306 for (idx = 0; idx < ctype->class_collection_act; ++idx)
1307 if (ctype->charnames[idx] < 256)
1308 ctype->ctype_b[128 + ctype->charnames[idx]]
1309 = TRANS (ctype->class_collection[idx]);
1311 /* Mirror first 127 entries. We must take care that entry -1 is not
1312 mirrored because EOF == -1. */
1313 for (idx = 0; idx < 127; ++idx)
1314 ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
1316 /* The 32 bit array contains all characters. */
1317 for (idx = 0; idx < ctype->class_collection_act; ++idx)
1318 ctype->ctype32_b[ctype->charnames[idx]]
1319 = TRANS32 (ctype->class_collection[idx]);
1321 /* Room for table of mappings. */
1322 ctype->map_eb = (u_int32_t **) xmalloc (ctype->map_collection_nr
1323 * sizeof (u_int32_t *));
1324 ctype->map_el = (u_int32_t **) xmalloc (ctype->map_collection_nr
1325 * sizeof (u_int32_t *));
1327 /* Fill in all mappings. */
1328 for (idx = 0; idx < ctype->map_collection_nr; ++idx)
1330 unsigned int idx2;
1332 /* Allocate table. */
1333 ctype->map_eb[idx] = (u_int32_t *) xmalloc ((ctype->plane_size
1334 * ctype->plane_cnt + 128)
1335 * sizeof (u_int32_t));
1336 ctype->map_el[idx] = (u_int32_t *) xmalloc ((ctype->plane_size
1337 * ctype->plane_cnt + 128)
1338 * sizeof (u_int32_t));
1340 #if __BYTE_ORDER == __LITTLE_ENDIAN
1341 # define MAP_B1 ctype->map_el
1342 # define MAP_B2 ctype->map_eb
1343 #else
1344 # define MAP_B1 ctype->map_eb
1345 # define MAP_B2 ctype->map_el
1346 #endif
1348 /* Copy default value (identity mapping). */
1349 memcpy (&MAP_B1[idx][128], NAMES_B1,
1350 ctype->plane_size * ctype->plane_cnt * sizeof (u_int32_t));
1352 /* Copy values from collection. */
1353 for (idx2 = 0; idx2 < ctype->map_collection_act[idx]; ++idx2)
1354 if (ctype->map_collection[idx][idx2] != 0)
1355 MAP_B1[idx][128 + ctype->charnames[idx2]] =
1356 ctype->map_collection[idx][idx2];
1358 /* Mirror first 127 entries. We must take care not to map entry
1359 -1 because EOF == -1. */
1360 for (idx2 = 0; idx2 < 127; ++idx2)
1361 MAP_B1[idx][idx2] = MAP_B1[idx][256 + idx2];
1363 /* EOF must map to EOF. */
1364 MAP_B1[idx][127] = EOF;
1366 /* And now the other byte order. */
1367 for (idx2 = 0; idx2 < ctype->plane_size * ctype->plane_cnt + 128; ++idx2)
1368 MAP_B2[idx][idx2] = SWAPU32 (MAP_B1[idx][idx2]);
1371 /* Extra array for class and map names. */
1372 ctype->class_name_ptr = (u_int32_t *) xmalloc (ctype->nr_charclass
1373 * sizeof (u_int32_t));
1374 ctype->map_name_ptr = (u_int32_t *) xmalloc (ctype->map_collection_nr
1375 * sizeof (u_int32_t));
1377 /* Array for width information. Because the expected width are very
1378 small we use only one single byte. This save space and we need
1379 not provide the information twice with both endianesses. */
1380 ctype->width = (unsigned char *) xmalloc (ctype->plane_size
1381 * ctype->plane_cnt);
1382 /* Initialize with default width value. */
1383 memset (ctype->width, charset->width_default,
1384 ctype->plane_size * ctype->plane_cnt);
1385 if (charset->width_rules != NULL)
1387 size_t cnt;
1389 for (cnt = 0; cnt < charset->nwidth_rules; ++cnt)
1390 if (charset->width_rules[cnt].width != charset->width_default)
1391 for (idx = charset->width_rules[cnt].from;
1392 idx <= charset->width_rules[cnt].to; ++idx)
1394 size_t nr = idx % ctype->plane_size;
1395 size_t depth = 0;
1397 while (NAMES_B1[nr + depth * ctype->plane_size] != nr)
1398 ++depth;
1399 assert (depth < ctype->plane_cnt);
1401 ctype->width[nr + depth * ctype->plane_size]
1402 = charset->width_rules[cnt].width;
1406 /* Compute MB_CUR_MAX. Please note the value mb_cur_max in the
1407 character set definition gives the number of bytes in the wide
1408 character representation. We compute the number of bytes used
1409 for the UTF-8 encoded form. */
1410 ctype->mb_cur_max = ((int []) { 2, 3, 5, 6 }) [charset->mb_cur_max - 1];
1412 /* We need the name of the currently used 8-bit character set to
1413 make correct conversion between this 8-bit representation and the
1414 ISO 10646 character set used internally for wide characters. */
1415 ctype->codeset_name = charset->code_set_name;