Update.
[glibc.git] / locale / programs / ld-ctype.c
blob0a5e4eebf108854e31343127ba79d5a8c926d7bb
1 /* Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
20 #ifdef HAVE_CONFIG_H
21 # include <config.h>
22 #endif
24 #include <alloca.h>
25 #include <endian.h>
26 #include <limits.h>
27 #include <string.h>
29 #include "locales.h"
30 #include "localeinfo.h"
31 #include "langinfo.h"
32 #include "locfile-token.h"
33 #include "stringtrans.h"
35 /* Uncomment the following line in the production version. */
36 /* define NDEBUG 1 */
37 #include <assert.h>
40 void *xmalloc (size_t __n);
41 void *xcalloc (size_t __n, size_t __s);
42 void *xrealloc (void *__ptr, size_t __n);
45 /* The bit used for representing a special class. */
46 #define BITPOS(class) ((class) - tok_upper)
47 #define BIT(class) (1 << BITPOS (class))
49 #define ELEM(ctype, collection, idx, value) \
50 *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \
51 &ctype->collection##_act idx, value)
53 #define SWAPU32(w) \
54 (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24))
56 #define SWAPU16(w) \
57 ((((w) >> 8) & 0xff) | (((w) & 0xff) << 8))
59 #define XSWAPU32(w) \
60 ((((w) & 0xff00ff00) >> 8) | (((w) & 0xff00ff) << 8))
63 /* To be compatible with former implementations we for now restrict
64 the number of bits for character classes to 16. When compatibility
65 is not necessary anymore increase the number to 32. */
66 #define char_class_t u_int16_t
67 #define CHAR_CLASS_TRANS SWAPU16
68 #define char_class32_t u_int32_t
69 #define CHAR_CLASS32_TRANS XSWAPU32
72 /* The real definition of the struct for the LC_CTYPE locale. */
73 struct locale_ctype_t
75 unsigned int *charnames;
76 size_t charnames_max;
77 size_t charnames_act;
79 /* We will allow up to 8 * sizeof(u_int32_t) - 1 character classes. */
80 #define MAX_NR_CHARCLASS (8 * sizeof (u_int32_t) - 1)
81 size_t nr_charclass;
82 const char *classnames[MAX_NR_CHARCLASS];
83 unsigned long int current_class_mask;
84 unsigned int last_class_char;
85 u_int32_t *class_collection;
86 size_t class_collection_max;
87 size_t class_collection_act;
88 unsigned long int class_done;
90 /* If the following number ever turns out to be too small simply
91 increase it. But I doubt it will. --drepper@gnu */
92 #define MAX_NR_CHARMAP 16
93 const char *mapnames[MAX_NR_CHARMAP];
94 u_int32_t *map_collection[MAX_NR_CHARMAP];
95 size_t map_collection_max[MAX_NR_CHARMAP];
96 size_t map_collection_act[MAX_NR_CHARMAP];
97 size_t map_collection_nr;
98 size_t last_map_idx;
99 unsigned int from_map_char;
100 int toupper_done;
101 int tolower_done;
103 /* The arrays for the binary representation. */
104 u_int32_t plane_size;
105 u_int32_t plane_cnt;
106 char_class_t *ctype_b;
107 char_class32_t *ctype32_b;
108 u_int32_t *names_el;
109 u_int32_t *names_eb;
110 u_int32_t **map_eb;
111 u_int32_t **map_el;
112 u_int32_t *class_name_ptr;
113 u_int32_t *map_name_ptr;
114 unsigned char *width;
115 u_int32_t mb_cur_max;
116 const char *codeset_name;
120 /* Prototypes for local functions. */
121 static void ctype_class_newP (struct linereader *lr,
122 struct locale_ctype_t *ctype, const char *name);
123 static void ctype_map_newP (struct linereader *lr,
124 struct locale_ctype_t *ctype,
125 const char *name, struct charset_t *charset);
126 static u_int32_t *find_idx (struct locale_ctype_t *ctype, u_int32_t **table,
127 size_t *max, size_t *act, unsigned int idx);
128 static void set_class_defaults (struct locale_ctype_t *ctype,
129 struct charset_t *charset);
130 static void allocate_arrays (struct locale_ctype_t *ctype,
131 struct charset_t *charset);
134 void
135 ctype_startup (struct linereader *lr, struct localedef_t *locale,
136 struct charset_t *charset)
138 unsigned int cnt;
139 struct locale_ctype_t *ctype;
141 /* It is important that we always use UCS1 encoding for strings now. */
142 encoding_method = ENC_UCS1;
144 /* Allocate the needed room. */
145 locale->categories[LC_CTYPE].ctype = ctype =
146 (struct locale_ctype_t *) xmalloc (sizeof (struct locale_ctype_t));
148 /* We have no names seen yet. */
149 ctype->charnames_max = charset->mb_cur_max == 1 ? 256 : 512;
150 ctype->charnames =
151 (unsigned int *) xmalloc (ctype->charnames_max * sizeof (unsigned int));
152 for (cnt = 0; cnt < 256; ++cnt)
153 ctype->charnames[cnt] = cnt;
154 ctype->charnames_act = 256;
156 /* Fill character class information. */
157 ctype->nr_charclass = 0;
158 ctype->current_class_mask = 0;
159 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
160 /* The order of the following instructions determines the bit
161 positions! */
162 ctype_class_newP (lr, ctype, "upper");
163 ctype_class_newP (lr, ctype, "lower");
164 ctype_class_newP (lr, ctype, "alpha");
165 ctype_class_newP (lr, ctype, "digit");
166 ctype_class_newP (lr, ctype, "xdigit");
167 ctype_class_newP (lr, ctype, "space");
168 ctype_class_newP (lr, ctype, "print");
169 ctype_class_newP (lr, ctype, "graph");
170 ctype_class_newP (lr, ctype, "blank");
171 ctype_class_newP (lr, ctype, "cntrl");
172 ctype_class_newP (lr, ctype, "punct");
173 ctype_class_newP (lr, ctype, "alnum");
175 ctype->class_collection_max = charset->mb_cur_max == 1 ? 256 : 512;
176 ctype->class_collection
177 = (u_int32_t *) xmalloc (sizeof (unsigned long int)
178 * ctype->class_collection_max);
179 memset (ctype->class_collection, '\0',
180 sizeof (unsigned long int) * ctype->class_collection_max);
181 ctype->class_collection_act = 256;
183 /* Fill character map information. */
184 ctype->map_collection_nr = 0;
185 ctype->last_map_idx = MAX_NR_CHARMAP;
186 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
187 ctype_map_newP (lr, ctype, "toupper", charset);
188 ctype_map_newP (lr, ctype, "tolower", charset);
190 /* Fill first 256 entries in `toupper' and `tolower' arrays. */
191 for (cnt = 0; cnt < 256; ++cnt)
193 ctype->map_collection[0][cnt] = cnt;
194 ctype->map_collection[1][cnt] = cnt;
199 void
200 ctype_finish (struct localedef_t *locale, struct charset_t *charset)
202 /* See POSIX.2, table 2-6 for the meaning of the following table. */
203 #define NCLASS 12
204 static const struct
206 const char *name;
207 const char allow[NCLASS];
209 valid_table[NCLASS] =
211 /* The order is important. See token.h for more information.
212 M = Always, D = Default, - = Permitted, X = Mutually exclusive */
213 { "upper", "--MX-XDDXXX-" },
214 { "lower", "--MX-XDDXXX-" },
215 { "alpha", "---X-XDDXXX-" },
216 { "digit", "XXX--XDDXXX-" },
217 { "xdigit", "-----XDDXXX-" },
218 { "space", "XXXXX------X" },
219 { "print", "---------X--" },
220 { "graph", "---------X--" },
221 { "blank", "XXXXXM-----X" },
222 { "cntrl", "XXXXX-XX--XX" },
223 { "punct", "XXXXX-DD-X-X" },
224 { "alnum", "-----XDDXXX-" }
226 size_t cnt;
227 int cls1, cls2;
228 unsigned int space_value;
229 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
231 /* Set default value for classes not specified. */
232 set_class_defaults (ctype, charset);
234 /* Check according to table. */
235 for (cnt = 0; cnt < ctype->class_collection_max; ++cnt)
237 unsigned long int tmp;
239 tmp = ctype->class_collection[cnt];
240 if (tmp == 0)
241 continue;
243 for (cls1 = 0; cls1 < NCLASS; ++cls1)
244 if ((tmp & (1 << cls1)) != 0)
245 for (cls2 = 0; cls2 < NCLASS; ++cls2)
246 if (valid_table[cls1].allow[cls2] != '-')
248 int eq = (tmp & (1 << cls2)) != 0;
249 switch (valid_table[cls1].allow[cls2])
251 case 'M':
252 if (!eq)
254 char buf[17];
255 char *cp = buf;
256 unsigned int value;
258 value = ctype->charnames[cnt];
260 if ((value & 0xff000000) != 0)
261 cp += sprintf (cp, "\\%o", (value >> 24) & 0xff);
262 if ((value & 0xffff0000) != 0)
263 cp += sprintf (cp, "\\%o", (value >> 16) & 0xff);
264 if ((value & 0xffffff00) != 0)
265 cp += sprintf (cp, "\\%o", (value >> 8) & 0xff);
266 sprintf (cp, "\\%o", value & 0xff);
268 if (!be_quiet)
269 error (0, 0, _("\
270 character %s'%s' in class `%s' must be in class `%s'"), value > 256 ? "L" : "",
271 cp, valid_table[cls1].name,
272 valid_table[cls2].name);
274 break;
276 case 'X':
277 if (eq)
279 char buf[17];
280 char *cp = buf;
281 unsigned int value;
283 value = ctype->charnames[cnt];
285 if ((value & 0xff000000) != 0)
286 cp += sprintf (cp, "\\%o", value >> 24);
287 if ((value & 0xffff0000) != 0)
288 cp += sprintf (cp, "\\%o", (value >> 16) & 0xff);
289 if ((value & 0xffffff00) != 0)
290 cp += sprintf (cp, "\\%o", (value >> 8) & 0xff);
291 sprintf (cp, "\\%o", value & 0xff);
293 if (!be_quiet)
294 error (0, 0, _("\
295 character %s'%s' in class `%s' must not be in class `%s'"),
296 value > 256 ? "L" : "", cp,
297 valid_table[cls1].name,
298 valid_table[cls2].name);
300 break;
302 case 'D':
303 ctype->class_collection[cnt] |= 1 << cls2;
304 break;
306 default:
307 error (5, 0, _("internal error in %s, line %u"),
308 __FUNCTION__, __LINE__);
313 /* ... and now test <SP> as a special case. */
314 space_value = charset_find_value (charset, "SP", 2);
315 if ((wchar_t) space_value == ILLEGAL_CHAR_VALUE && !be_quiet)
316 error (0, 0, _("character <SP> not defined in character map"));
317 else if (((cnt = BITPOS (tok_space),
318 (ELEM (ctype, class_collection, , space_value)
319 & BIT (tok_space)) == 0)
320 || (cnt = BITPOS (tok_blank),
321 (ELEM (ctype, class_collection, , space_value)
322 & BIT (tok_blank)) == 0))
323 && !be_quiet)
324 error (0, 0, _("<SP> character not in class `%s'"),
325 valid_table[cnt].name);
326 else if (((cnt = BITPOS (tok_punct),
327 (ELEM (ctype, class_collection, , space_value)
328 & BIT (tok_punct)) != 0)
329 || (cnt = BITPOS (tok_graph),
330 (ELEM (ctype, class_collection, , space_value)
331 & BIT (tok_graph))
332 != 0))
333 && !be_quiet)
334 error (0, 0, _("<SP> character must not be in class `%s'"),
335 valid_table[cnt].name);
336 else
337 ELEM (ctype, class_collection, , space_value) |= BIT (tok_print);
339 /* Now that the tests are done make sure the name array contains all
340 characters which are handled in the WIDTH section of the
341 character set definition file. */
342 if (charset->width_rules != NULL)
343 for (cnt = 0; cnt < charset->nwidth_rules; ++cnt)
345 size_t inner;
346 for (inner = charset->width_rules[cnt].from;
347 inner <= charset->width_rules[cnt].to; ++inner)
348 (void) find_idx (ctype, NULL, NULL, NULL, inner);
353 void
354 ctype_output (struct localedef_t *locale, struct charset_t *charset,
355 const char *output_path)
357 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
358 const size_t nelems = (_NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)
359 + 2 * (ctype->map_collection_nr - 2));
360 struct iovec iov[2 + nelems + ctype->nr_charclass
361 + ctype->map_collection_nr];
362 struct locale_file data;
363 u_int32_t idx[nelems];
364 size_t elem, cnt, offset, total;
367 if ((locale->binary & (1 << LC_CTYPE)) != 0)
369 iov[0].iov_base = ctype;
370 iov[0].iov_len = locale->len[LC_CTYPE];
372 write_locale_data (output_path, "LC_CTYPE", 1, iov);
374 return;
378 /* Now prepare the output: Find the sizes of the table we can use. */
379 allocate_arrays (ctype, charset);
381 data.magic = LIMAGIC (LC_CTYPE);
382 data.n = nelems;
383 iov[0].iov_base = (void *) &data;
384 iov[0].iov_len = sizeof (data);
386 iov[1].iov_base = (void *) idx;
387 iov[1].iov_len = sizeof (idx);
389 idx[0] = iov[0].iov_len + iov[1].iov_len;
390 offset = 0;
392 for (elem = 0; elem < nelems; ++elem)
394 if (elem < _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE))
395 switch (elem)
397 #define CTYPE_DATA(name, base, len) \
398 case _NL_ITEM_INDEX (name): \
399 iov[2 + elem + offset].iov_base = (base); \
400 iov[2 + elem + offset].iov_len = (len); \
401 if (elem + 1 < nelems) \
402 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; \
403 break
405 CTYPE_DATA (_NL_CTYPE_CLASS,
406 ctype->ctype_b,
407 (256 + 128) * sizeof (char_class_t));
409 CTYPE_DATA (_NL_CTYPE_TOUPPER_EB,
410 ctype->map_eb[0],
411 (ctype->plane_size * ctype->plane_cnt + 128)
412 * sizeof (u_int32_t));
413 CTYPE_DATA (_NL_CTYPE_TOLOWER_EB,
414 ctype->map_eb[1],
415 (ctype->plane_size * ctype->plane_cnt + 128)
416 * sizeof (u_int32_t));
418 CTYPE_DATA (_NL_CTYPE_TOUPPER_EL,
419 ctype->map_el[0],
420 (ctype->plane_size * ctype->plane_cnt + 128)
421 * sizeof (u_int32_t));
422 CTYPE_DATA (_NL_CTYPE_TOLOWER_EL,
423 ctype->map_el[1],
424 (ctype->plane_size * ctype->plane_cnt + 128)
425 * sizeof (u_int32_t));
427 CTYPE_DATA (_NL_CTYPE_CLASS32,
428 ctype->ctype32_b,
429 (ctype->plane_size * ctype->plane_cnt
430 * sizeof (char_class32_t)));
432 CTYPE_DATA (_NL_CTYPE_NAMES_EB,
433 ctype->names_eb, (ctype->plane_size * ctype->plane_cnt
434 * sizeof (u_int32_t)));
435 CTYPE_DATA (_NL_CTYPE_NAMES_EL,
436 ctype->names_el, (ctype->plane_size * ctype->plane_cnt
437 * sizeof (u_int32_t)));
439 CTYPE_DATA (_NL_CTYPE_HASH_SIZE,
440 &ctype->plane_size, sizeof (u_int32_t));
441 CTYPE_DATA (_NL_CTYPE_HASH_LAYERS,
442 &ctype->plane_cnt, sizeof (u_int32_t));
444 case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
445 /* The class name array. */
446 total = 0;
447 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt, ++offset)
449 iov[2 + elem + offset].iov_base
450 = (void *) ctype->classnames[cnt];
451 iov[2 + elem + offset].iov_len
452 = strlen (ctype->classnames[cnt]) + 1;
453 total += iov[2 + elem + offset].iov_len;
455 iov[2 + elem + offset].iov_base = (void *) "\0\0\0";
456 iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
457 total += 1 + (4 - ((total + 1) % 4));
459 if (elem + 1 < nelems)
460 idx[elem + 1] = idx[elem] + total;
461 break;
463 case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
464 /* The class name array. */
465 total = 0;
466 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt, ++offset)
468 iov[2 + elem + offset].iov_base
469 = (void *) ctype->mapnames[cnt];
470 iov[2 + elem + offset].iov_len
471 = strlen (ctype->mapnames[cnt]) + 1;
472 total += iov[2 + elem + offset].iov_len;
474 iov[2 + elem + offset].iov_base = (void *) "\0\0\0";
475 iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
476 total += 1 + (4 - ((total + 1) % 4));
478 if (elem + 1 < nelems)
479 idx[elem + 1] = idx[elem] + total;
480 break;
482 CTYPE_DATA (_NL_CTYPE_WIDTH,
483 ctype->width, ctype->plane_size * ctype->plane_cnt);
485 CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX,
486 &ctype->mb_cur_max, sizeof (u_int32_t));
488 case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
489 total = strlen (ctype->codeset_name) + 1;
490 if (total % 4 == 0)
491 iov[2 + elem + offset].iov_base = (char *) ctype->codeset_name;
492 else
494 iov[2 + elem + offset].iov_base = alloca ((total + 3) & ~3);
495 memcpy (iov[2 + elem + offset].iov_base, ctype->codeset_name,
496 total);
497 total = (total + 3) & ~3;
499 iov[2 + elem + offset].iov_len = total;
500 if (elem + 1 < nelems)
501 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
502 break;
504 default:
505 assert (! "unknown CTYPE element");
507 else
509 /* Handle extra maps. */
510 size_t nr = (elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)) >> 1;
512 if (((elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)) & 1) == 0)
513 iov[2 + elem + offset].iov_base = ctype->map_eb[nr];
514 else
515 iov[2 + elem + offset].iov_base = ctype->map_el[nr];
517 iov[2 + elem + offset].iov_len = ((ctype->plane_size
518 * ctype->plane_cnt + 128)
519 * sizeof (u_int32_t));
521 if (elem + 1 < nelems)
522 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
526 assert (2 + elem + offset == (nelems + ctype->nr_charclass
527 + ctype->map_collection_nr + 2));
529 write_locale_data (output_path, "LC_CTYPE", 2 + elem + offset, iov);
533 /* Character class handling. */
534 void
535 ctype_class_new (struct linereader *lr, struct localedef_t *locale,
536 enum token_t tok, struct token *code,
537 struct charset_t *charset)
539 ctype_class_newP (lr, locale->categories[LC_CTYPE].ctype,
540 code->val.str.start);
545 ctype_is_charclass (struct linereader *lr, struct localedef_t *locale,
546 const char *name)
548 size_t cnt;
550 for (cnt = 0; cnt < locale->categories[LC_CTYPE].ctype->nr_charclass; ++cnt)
551 if (strcmp (name, locale->categories[LC_CTYPE].ctype->classnames[cnt])
552 == 0)
553 return 1;
555 return 0;
559 void
560 ctype_class_start (struct linereader *lr, struct localedef_t *locale,
561 enum token_t tok, const char *str,
562 struct charset_t *charset)
564 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
565 size_t cnt;
567 switch (tok)
569 case tok_upper:
570 str = "upper";
571 break;
572 case tok_lower:
573 str = "lower";
574 break;
575 case tok_alpha:
576 str = "alpha";
577 break;
578 case tok_digit:
579 str = "digit";
580 break;
581 case tok_xdigit:
582 str = "xdigit";
583 break;
584 case tok_space:
585 str = "space";
586 break;
587 case tok_print:
588 str = "print";
589 break;
590 case tok_graph:
591 str = "graph";
592 break;
593 case tok_blank:
594 str = "blank";
595 break;
596 case tok_cntrl:
597 str = "cntrl";
598 break;
599 case tok_punct:
600 str = "punct";
601 break;
602 case tok_alnum:
603 str = "alnum";
604 break;
605 case tok_ident:
606 break;
607 default:
608 assert (! "illegal token as class name: should not happen");
611 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
612 if (strcmp (str, ctype->classnames[cnt]) == 0)
613 break;
615 if (cnt >= ctype->nr_charclass)
616 assert (! "unknown class in class definition: should not happen");
618 ctype->class_done |= BIT (tok);
620 ctype->current_class_mask = 1 << cnt;
621 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
625 void
626 ctype_class_from (struct linereader *lr, struct localedef_t *locale,
627 struct token *code, struct charset_t *charset)
629 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
630 unsigned int value;
632 value = charset_find_value (charset, code->val.str.start, code->val.str.len);
634 ctype->last_class_char = value;
636 if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
637 /* In the LC_CTYPE category it is no error when a character is
638 not found. This has to be ignored silently. */
639 return;
641 *find_idx (ctype, &ctype->class_collection, &ctype->class_collection_max,
642 &ctype->class_collection_act, value)
643 |= ctype->current_class_mask;
647 void
648 ctype_class_to (struct linereader *lr, struct localedef_t *locale,
649 struct token *code, struct charset_t *charset)
651 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
652 unsigned int value, cnt;
654 value = charset_find_value (charset, code->val.str.start, code->val.str.len);
656 assert (value >= ctype->last_class_char);
658 for (cnt = ctype->last_class_char + 1; cnt <= value; ++cnt)
659 *find_idx (ctype, &ctype->class_collection, &ctype->class_collection_max,
660 &ctype->class_collection_act, cnt)
661 |= ctype->current_class_mask;
663 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
667 void
668 ctype_class_end (struct linereader *lr, struct localedef_t *locale)
670 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
672 /* We have no special actions to perform here. */
673 ctype->current_class_mask = 0;
674 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
678 /* Character map handling. */
679 void
680 ctype_map_new (struct linereader *lr, struct localedef_t *locale,
681 enum token_t tok, struct token *code,
682 struct charset_t *charset)
684 ctype_map_newP (lr, locale->categories[LC_CTYPE].ctype,
685 code->val.str.start, charset);
690 ctype_is_charconv (struct linereader *lr, struct localedef_t *locale,
691 const char *name)
693 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
694 size_t cnt;
696 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
697 if (strcmp (name, ctype->mapnames[cnt]) == 0)
698 return 1;
700 return 0;
704 void
705 ctype_map_start (struct linereader *lr, struct localedef_t *locale,
706 enum token_t tok, const char *name, struct charset_t *charset)
708 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
709 size_t cnt;
711 switch (tok)
713 case tok_toupper:
714 ctype->toupper_done = 1;
715 name = "toupper";
716 break;
717 case tok_tolower:
718 ctype->tolower_done = 1;
719 name = "tolower";
720 break;
721 case tok_ident:
722 break;
723 default:
724 assert (! "unknown token in category `LC_CTYPE' should not happen");
727 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
728 if (strcmp (name, ctype->mapnames[cnt]) == 0)
729 break;
731 if (cnt == ctype->map_collection_nr)
732 assert (! "unknown token in category `LC_CTYPE' should not happen");
734 ctype->last_map_idx = cnt;
735 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
739 void
740 ctype_map_from (struct linereader *lr, struct localedef_t *locale,
741 struct token *code, struct charset_t *charset)
743 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
744 unsigned int value;
746 value = charset_find_value (charset, code->val.str.start, code->val.str.len);
748 if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
749 /* In the LC_CTYPE category it is no error when a character is
750 not found. This has to be ignored silently. */
751 return;
753 assert (ctype->last_map_idx < ctype->map_collection_nr);
755 ctype->from_map_char = value;
759 void
760 ctype_map_to (struct linereader *lr, struct localedef_t *locale,
761 struct token *code, struct charset_t *charset)
763 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
764 unsigned int value;
766 value = charset_find_value (charset, code->val.str.start, code->val.str.len);
768 if ((wchar_t) ctype->from_map_char == ILLEGAL_CHAR_VALUE
769 || (wchar_t) value == ILLEGAL_CHAR_VALUE)
771 /* In the LC_CTYPE category it is no error when a character is
772 not found. This has to be ignored silently. */
773 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
774 return;
777 *find_idx (ctype, &ctype->map_collection[ctype->last_map_idx],
778 &ctype->map_collection_max[ctype->last_map_idx],
779 &ctype->map_collection_act[ctype->last_map_idx],
780 ctype->from_map_char) = value;
782 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
786 void
787 ctype_map_end (struct linereader *lr, struct localedef_t *locale)
789 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
791 ctype->last_map_idx = MAX_NR_CHARMAP;
792 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
796 /* Local functions. */
797 static void
798 ctype_class_newP (struct linereader *lr, struct locale_ctype_t *ctype,
799 const char *name)
801 size_t cnt;
803 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
804 if (strcmp (ctype->classnames[cnt], name) == 0)
805 break;
807 if (cnt < ctype->nr_charclass)
809 lr_error (lr, _("character class `%s' already defined"), name);
810 return;
813 if (ctype->nr_charclass == MAX_NR_CHARCLASS)
814 /* Exit code 2 is prescribed in P1003.2b. */
815 error (2, 0, _("\
816 implementation limit: no more than %d character classes allowed"),
817 MAX_NR_CHARCLASS);
819 ctype->classnames[ctype->nr_charclass++] = name;
823 static void
824 ctype_map_newP (struct linereader *lr, struct locale_ctype_t *ctype,
825 const char *name, struct charset_t *charset)
827 size_t max_chars = 0;
828 size_t cnt;
830 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
832 if (strcmp (ctype->mapnames[cnt], name) == 0)
833 break;
835 if (max_chars < ctype->map_collection_max[cnt])
836 max_chars = ctype->map_collection_max[cnt];
839 if (cnt < ctype->map_collection_nr)
841 lr_error (lr, _("character map `%s' already defined"), name);
842 return;
845 if (ctype->map_collection_nr == MAX_NR_CHARMAP)
846 /* Exit code 2 is prescribed in P1003.2b. */
847 error (2, 0, _("\
848 implementation limit: no more than %d character maps allowed"),
849 MAX_NR_CHARMAP);
851 ctype->mapnames[cnt] = name;
853 if (max_chars == 0)
854 ctype->map_collection_max[cnt] = charset->mb_cur_max == 1 ? 256 : 512;
855 else
856 ctype->map_collection_max[cnt] = max_chars;
858 ctype->map_collection[cnt] = (u_int32_t *)
859 xmalloc (sizeof (u_int32_t) * ctype->map_collection_max[cnt]);
860 memset (ctype->map_collection[cnt], '\0',
861 sizeof (u_int32_t) * ctype->map_collection_max[cnt]);
862 ctype->map_collection_act[cnt] = 256;
864 ++ctype->map_collection_nr;
868 /* We have to be prepared that TABLE, MAX, and ACT can be NULL. This
869 is possible if we only want ot extend the name array. */
870 static u_int32_t *
871 find_idx (struct locale_ctype_t *ctype, u_int32_t **table, size_t *max,
872 size_t *act, unsigned int idx)
874 size_t cnt;
876 if (idx < 256)
877 return table == NULL ? NULL : &(*table)[idx];
879 for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
880 if (ctype->charnames[cnt] == idx)
881 break;
883 /* We have to distinguish two cases: the names is found or not. */
884 if (cnt == ctype->charnames_act)
886 /* Extend the name array. */
887 if (ctype->charnames_act == ctype->charnames_max)
889 ctype->charnames_max *= 2;
890 ctype->charnames = (unsigned int *)
891 xrealloc (ctype->charnames,
892 sizeof (unsigned int) * ctype->charnames_max);
894 ctype->charnames[ctype->charnames_act++] = idx;
897 if (table == NULL)
898 /* We have done everything we are asked to do. */
899 return NULL;
901 if (cnt >= *act)
903 if (cnt >= *max)
905 size_t old_max = *max;
907 *max *= 2;
908 while (*max <= cnt);
910 *table =
911 (u_int32_t *) xrealloc (*table, *max * sizeof (unsigned long int));
912 memset (&(*table)[old_max], '\0',
913 (*max - old_max) * sizeof (u_int32_t));
916 (*table)[cnt] = 0;
917 *act = cnt;
920 return &(*table)[cnt];
924 static void
925 set_class_defaults (struct locale_ctype_t *ctype, struct charset_t *charset)
927 /* These function defines the default values for the classes and conversions
928 according to POSIX.2 2.5.2.1.
929 It may seem that the order of these if-blocks is arbitrary but it is NOT.
930 Don't move them unless you know what you do! */
932 void set_default (int bit, int from, int to)
934 char tmp[2];
935 int ch;
936 /* Define string. */
937 strcpy (tmp, "?");
939 for (ch = from; ch <= to; ++ch)
941 unsigned int value;
942 tmp[0] = ch;
944 value = charset_find_value (charset, tmp, 1);
945 if ((wchar_t) value == ILLEGAL_CHAR_VALUE && !be_quiet)
947 error (0, 0, _("\
948 character `%s' not defined while needed as default value"),
949 tmp);
950 continue;
952 else
953 ELEM (ctype, class_collection, , value) |= bit;
957 /* Set default values if keyword was not present. */
958 if ((ctype->class_done & BIT (tok_upper)) == 0)
959 /* "If this keyword [lower] is not specified, the lowercase letters
960 `A' through `Z', ..., shall automatically belong to this class,
961 with implementation defined character values." [P1003.2, 2.5.2.1] */
962 set_default (BIT (tok_upper), 'A', 'Z');
964 if ((ctype->class_done & BIT (tok_lower)) == 0)
965 /* "If this keyword [lower] is not specified, the lowercase letters
966 `a' through `z', ..., shall automatically belong to this class,
967 with implementation defined character values." [P1003.2, 2.5.2.1] */
968 set_default (BIT (tok_lower), 'a', 'z');
970 if ((ctype->class_done & BIT (tok_alpha)) == 0)
972 /* Table 2-6 in P1003.2 says that characters in class `upper' or
973 class `lower' *must* be in class `alpha'. */
974 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
975 size_t cnt;
977 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
978 if ((ctype->class_collection[cnt] & mask) != 0)
979 ctype->class_collection[cnt] |= BIT (tok_alpha);
982 if ((ctype->class_done & BIT (tok_digit)) == 0)
983 /* "If this keyword [digit] is not specified, the digits `0' through
984 `9', ..., shall automatically belong to this class, with
985 implementation-defined character values." [P1003.2, 2.5.2.1] */
986 set_default (BIT (tok_digit), '0', '9');
988 /* "Only characters specified for the `alpha' and `digit' keyword
989 shall be specified. Characters specified for the keyword `alpha'
990 and `digit' are automatically included in this class. */
992 unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
993 size_t cnt;
995 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
996 if ((ctype->class_collection[cnt] & mask) != 0)
997 ctype->class_collection[cnt] |= BIT (tok_alnum);
1000 if ((ctype->class_done & BIT (tok_space)) == 0)
1001 /* "If this keyword [space] is not specified, the characters <space>,
1002 <form-feed>, <newline>, <carriage-return>, <tab>, and
1003 <vertical-tab>, ..., shall automatically belong to this class,
1004 with implementation-defined character values." [P1003.2, 2.5.2.1] */
1006 unsigned int value;
1008 value = charset_find_value (charset, "space", 5);
1009 if ((wchar_t) value == ILLEGAL_CHAR_VALUE && !be_quiet)
1010 error (0, 0, _("\
1011 character `%s' not defined while needed as default value"),
1012 "<space>");
1013 else
1014 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1016 value = charset_find_value (charset, "form-feed", 9);
1017 if ((wchar_t) value == ILLEGAL_CHAR_VALUE && !be_quiet)
1018 error (0, 0, _("\
1019 character `%s' not defined while needed as default value"),
1020 "<form-feed>");
1021 else
1022 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1024 value = charset_find_value (charset, "newline", 7);
1025 if ((wchar_t) value == ILLEGAL_CHAR_VALUE && !be_quiet)
1026 error (0, 0, _("\
1027 character `%s' not defined while needed as default value"),
1028 "<newline>");
1029 else
1030 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1032 value = charset_find_value (charset, "carriage-return", 15);
1033 if ((wchar_t) value == ILLEGAL_CHAR_VALUE && !be_quiet)
1034 error (0, 0, _("\
1035 character `%s' not defined while needed as default value"),
1036 "<carriage-return>");
1037 else
1038 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1040 value = charset_find_value (charset, "tab", 3);
1041 if ((wchar_t) value == ILLEGAL_CHAR_VALUE && !be_quiet)
1042 error (0, 0, _("\
1043 character `%s' not defined while needed as default value"),
1044 "<tab>");
1045 else
1046 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1048 value = charset_find_value (charset, "vertical-tab", 12);
1049 if ((wchar_t) value == ILLEGAL_CHAR_VALUE && !be_quiet)
1050 error (0, 0, _("\
1051 character `%s' not defined while needed as default value"),
1052 "<vertical-tab>");
1053 else
1054 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1057 if ((ctype->class_done & BIT (tok_xdigit)) == 0)
1058 /* "If this keyword is not specified, the digits `0' to `9', the
1059 uppercase letters `A' through `F', and the lowercase letters `a'
1060 through `f', ..., shell automatically belong to this class, with
1061 implementation defined character values." [P1003.2, 2.5.2.1] */
1063 set_default (BIT (tok_xdigit), '0', '9');
1064 set_default (BIT (tok_xdigit), 'A', 'F');
1065 set_default (BIT (tok_xdigit), 'a', 'f');
1068 if ((ctype->class_done & BIT (tok_blank)) == 0)
1069 /* "If this keyword [blank] is unspecified, the characters <space> and
1070 <tab> shall belong to this character class." [P1003.2, 2.5.2.1] */
1072 unsigned int value;
1074 value = charset_find_value (charset, "space", 5);
1075 if ((wchar_t) value == ILLEGAL_CHAR_VALUE && !be_quiet)
1076 error (0, 0, _("\
1077 character `%s' not defined while needed as default value"),
1078 "<space>");
1079 else
1080 ELEM (ctype, class_collection, , value) |= BIT (tok_blank);
1082 value = charset_find_value (charset, "tab", 3);
1083 if ((wchar_t) value == ILLEGAL_CHAR_VALUE && !be_quiet)
1084 error (0, 0, _("\
1085 character `%s' not defined while needed as default value"),
1086 "<tab>");
1087 else
1088 ELEM (ctype, class_collection, , value) |= BIT (tok_blank);
1091 if ((ctype->class_done & BIT (tok_graph)) == 0)
1092 /* "If this keyword [graph] is not specified, characters specified for
1093 the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
1094 shall belong to this character class." [P1003.2, 2.5.2.1] */
1096 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
1097 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
1098 size_t cnt;
1100 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
1101 if ((ctype->class_collection[cnt] & mask) != 0)
1102 ctype->class_collection[cnt] |= BIT (tok_graph);
1105 if ((ctype->class_done & BIT (tok_print)) == 0)
1106 /* "If this keyword [print] is not provided, characters specified for
1107 the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
1108 and the <space> character shall belong to this character class."
1109 [P1003.2, 2.5.2.1] */
1111 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
1112 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
1113 size_t cnt;
1114 wchar_t space;
1116 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
1117 if ((ctype->class_collection[cnt] & mask) != 0)
1118 ctype->class_collection[cnt] |= BIT (tok_print);
1120 space = charset_find_value (charset, "space", 5);
1121 if (space == ILLEGAL_CHAR_VALUE && !be_quiet)
1122 error (0, 0, _("\
1123 character `%s' not defined while needed as default value"),
1124 "<space>");
1125 else
1126 ELEM (ctype, class_collection, , space) |= BIT (tok_print);
1129 if (ctype->toupper_done == 0)
1130 /* "If this keyword [toupper] is not specified, the lowercase letters
1131 `a' through `z', and their corresponding uppercase letters `A' to
1132 `Z', ..., shall automatically be included, with implementation-
1133 defined character values." [P1003.2, 2.5.2.1] */
1135 char tmp[4];
1136 int ch;
1138 strcpy (tmp, "<?>");
1140 for (ch = 'a'; ch <= 'z'; ++ch)
1142 unsigned int value_from, value_to;
1144 tmp[1] = (char) ch;
1146 value_from = charset_find_value (charset, &tmp[1], 1);
1147 if ((wchar_t) value_from == ILLEGAL_CHAR_VALUE && !be_quiet)
1149 error (0, 0, _("\
1150 character `%s' not defined while needed as default value"),
1151 tmp);
1152 continue;
1155 /* This conversion is implementation defined. */
1156 tmp[1] = (char) (ch + ('A' - 'a'));
1157 value_to = charset_find_value (charset, &tmp[1], 1);
1158 if ((wchar_t) value_to == ILLEGAL_CHAR_VALUE && !be_quiet)
1160 error (0, 0, _("\
1161 character `%s' not defined while needed as default value"),
1162 tmp);
1163 continue;
1166 /* The index [0] is determined by the order of the
1167 `ctype_map_newP' calls in `ctype_startup'. */
1168 ELEM (ctype, map_collection, [0], value_from) = value_to;
1172 if (ctype->tolower_done == 0)
1173 /* "If this keyword [tolower] is not specified, the mapping shall be
1174 the reverse mapping of the one specified to `toupper'." [P1003.2] */
1176 size_t cnt;
1178 for (cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
1179 if (ctype->map_collection[0][cnt] != 0)
1180 ELEM (ctype, map_collection, [1],
1181 ctype->map_collection[0][cnt])
1182 = ctype->charnames[cnt];
1187 static void
1188 allocate_arrays (struct locale_ctype_t *ctype, struct charset_t *charset)
1190 size_t idx;
1192 /* First we have to decide how we organize the arrays. It is easy
1193 for a one-byte character set. But multi-byte character set
1194 cannot be stored flat because the chars might be sparsely used.
1195 So we determine an optimal hashing function for the used
1196 characters.
1198 We use a very trivial hashing function to store the sparse
1199 table. CH % TABSIZE is used as an index. To solve multiple hits
1200 we have N planes. This guarantees a fixed search time for a
1201 character [N / 2]. In the following code we determine the minmum
1202 value for TABSIZE * N, where TABSIZE >= 256. */
1203 size_t min_total = UINT_MAX;
1204 size_t act_size = 256;
1206 if (!be_quiet)
1207 fputs (_("\
1208 Computing table size for character classes might take a while..."),
1209 stderr);
1211 while (act_size < min_total)
1213 size_t cnt[act_size];
1214 size_t act_planes = 1;
1216 memset (cnt, '\0', sizeof cnt);
1218 for (idx = 0; idx < 256; ++idx)
1219 cnt[idx] = 1;
1221 for (idx = 0; idx < ctype->charnames_act; ++idx)
1222 if (ctype->charnames[idx] >= 256)
1224 size_t nr = ctype->charnames[idx] % act_size;
1226 if (++cnt[nr] > act_planes)
1228 act_planes = cnt[nr];
1229 if (act_size * act_planes >= min_total)
1230 break;
1234 if (act_size * act_planes < min_total)
1236 min_total = act_size * act_planes;
1237 ctype->plane_size = act_size;
1238 ctype->plane_cnt = act_planes;
1241 ++act_size;
1244 if (!be_quiet)
1245 fputs (_(" done\n"), stderr);
1248 #if __BYTE_ORDER == __LITTLE_ENDIAN
1249 # define NAMES_B1 ctype->names_el
1250 # define NAMES_B2 ctype->names_eb
1251 #else
1252 # define NAMES_B1 ctype->names_eb
1253 # define NAMES_B2 ctype->names_el
1254 #endif
1256 ctype->names_eb = (u_int32_t *) xcalloc (ctype->plane_size
1257 * ctype->plane_cnt,
1258 sizeof (u_int32_t));
1259 ctype->names_el = (u_int32_t *) xcalloc (ctype->plane_size
1260 * ctype->plane_cnt,
1261 sizeof (u_int32_t));
1263 for (idx = 1; idx < 256; ++idx)
1264 NAMES_B1[idx] = idx;
1266 /* Trick: change the 0th entry's name to 1 to mark the cell occupied. */
1267 NAMES_B1[0] = 1;
1269 for (idx = 256; idx < ctype->charnames_act; ++idx)
1271 size_t nr = (ctype->charnames[idx] % ctype->plane_size);
1272 size_t depth = 0;
1274 while (NAMES_B1[nr + depth * ctype->plane_size])
1275 ++depth;
1276 assert (depth < ctype->plane_cnt);
1278 NAMES_B1[nr + depth * ctype->plane_size] = ctype->charnames[idx];
1280 /* Now for faster access remember the index in the NAMES_B array. */
1281 ctype->charnames[idx] = nr + depth * ctype->plane_size;
1283 NAMES_B1[0] = 0;
1285 for (idx = 0; idx < ctype->plane_size * ctype->plane_cnt; ++idx)
1286 NAMES_B2[idx] = SWAPU32 (NAMES_B1[idx]);
1289 /* You wonder about this amount of memory? This is only because some
1290 users do not manage to address the array with unsigned values or
1291 data types with range >= 256. '\200' would result in the array
1292 index -128. To help these poor people we duplicate the entries for
1293 128 up to 255 below the entry for \0. */
1294 ctype->ctype_b = (char_class_t *) xcalloc (256 + 128,
1295 sizeof (char_class_t));
1296 ctype->ctype32_b = (char_class32_t *) xcalloc (ctype->plane_size
1297 * ctype->plane_cnt,
1298 sizeof (char_class32_t));
1300 /* Fill in the character class information. */
1301 #if __BYTE_ORDER == __LITTLE_ENDIAN
1302 # define TRANS(w) CHAR_CLASS_TRANS (w)
1303 # define TRANS32(w) CHAR_CLASS32_TRANS (w)
1304 #else
1305 # define TRANS(w) (w)
1306 # define TRANS32(w) (w)
1307 #endif
1309 for (idx = 0; idx < ctype->class_collection_act; ++idx)
1310 if (ctype->charnames[idx] < 256)
1311 ctype->ctype_b[128 + ctype->charnames[idx]]
1312 = TRANS (ctype->class_collection[idx]);
1314 /* Mirror first 127 entries. We must take care that entry -1 is not
1315 mirrored because EOF == -1. */
1316 for (idx = 0; idx < 127; ++idx)
1317 ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
1319 /* The 32 bit array contains all characters. */
1320 for (idx = 0; idx < ctype->class_collection_act; ++idx)
1321 ctype->ctype32_b[ctype->charnames[idx]]
1322 = TRANS32 (ctype->class_collection[idx]);
1324 /* Room for table of mappings. */
1325 ctype->map_eb = (u_int32_t **) xmalloc (ctype->map_collection_nr
1326 * sizeof (u_int32_t *));
1327 ctype->map_el = (u_int32_t **) xmalloc (ctype->map_collection_nr
1328 * sizeof (u_int32_t *));
1330 /* Fill in all mappings. */
1331 for (idx = 0; idx < ctype->map_collection_nr; ++idx)
1333 unsigned int idx2;
1335 /* Allocate table. */
1336 ctype->map_eb[idx] = (u_int32_t *) xmalloc ((ctype->plane_size
1337 * ctype->plane_cnt + 128)
1338 * sizeof (u_int32_t));
1339 ctype->map_el[idx] = (u_int32_t *) xmalloc ((ctype->plane_size
1340 * ctype->plane_cnt + 128)
1341 * sizeof (u_int32_t));
1343 #if __BYTE_ORDER == __LITTLE_ENDIAN
1344 # define MAP_B1 ctype->map_el
1345 # define MAP_B2 ctype->map_eb
1346 #else
1347 # define MAP_B1 ctype->map_eb
1348 # define MAP_B2 ctype->map_el
1349 #endif
1351 /* Copy default value (identity mapping). */
1352 memcpy (&MAP_B1[idx][128], NAMES_B1,
1353 ctype->plane_size * ctype->plane_cnt * sizeof (u_int32_t));
1355 /* Copy values from collection. */
1356 for (idx2 = 0; idx2 < ctype->map_collection_act[idx]; ++idx2)
1357 if (ctype->map_collection[idx][idx2] != 0)
1358 MAP_B1[idx][128 + ctype->charnames[idx2]] =
1359 ctype->map_collection[idx][idx2];
1361 /* Mirror first 127 entries. We must take care not to map entry
1362 -1 because EOF == -1. */
1363 for (idx2 = 0; idx2 < 127; ++idx2)
1364 MAP_B1[idx][idx2] = MAP_B1[idx][256 + idx2];
1366 /* EOF must map to EOF. */
1367 MAP_B1[idx][127] = EOF;
1369 /* And now the other byte order. */
1370 for (idx2 = 0; idx2 < ctype->plane_size * ctype->plane_cnt + 128; ++idx2)
1371 MAP_B2[idx][idx2] = SWAPU32 (MAP_B1[idx][idx2]);
1374 /* Extra array for class and map names. */
1375 ctype->class_name_ptr = (u_int32_t *) xmalloc (ctype->nr_charclass
1376 * sizeof (u_int32_t));
1377 ctype->map_name_ptr = (u_int32_t *) xmalloc (ctype->map_collection_nr
1378 * sizeof (u_int32_t));
1380 /* Array for width information. Because the expected width are very
1381 small we use only one single byte. This save space and we need
1382 not provide the information twice with both endianesses. */
1383 ctype->width = (unsigned char *) xmalloc (ctype->plane_size
1384 * ctype->plane_cnt);
1385 /* Initialize with default width value. */
1386 memset (ctype->width, charset->width_default,
1387 ctype->plane_size * ctype->plane_cnt);
1388 if (charset->width_rules != NULL)
1390 size_t cnt;
1392 for (cnt = 0; cnt < charset->nwidth_rules; ++cnt)
1393 if (charset->width_rules[cnt].width != charset->width_default)
1394 for (idx = charset->width_rules[cnt].from;
1395 idx <= charset->width_rules[cnt].to; ++idx)
1397 size_t nr = idx % ctype->plane_size;
1398 size_t depth = 0;
1400 while (NAMES_B1[nr + depth * ctype->plane_size] != nr)
1401 ++depth;
1402 assert (depth < ctype->plane_cnt);
1404 ctype->width[nr + depth * ctype->plane_size]
1405 = charset->width_rules[cnt].width;
1409 /* Compute MB_CUR_MAX. Please note the value mb_cur_max in the
1410 character set definition gives the number of bytes in the wide
1411 character representation. We compute the number of bytes used
1412 for the UTF-8 encoded form. */
1413 ctype->mb_cur_max = ((int []) { 2, 3, 5, 6 }) [charset->mb_cur_max - 1];
1415 /* We need the name of the currently used 8-bit character set to
1416 make correct conversion between this 8-bit representation and the
1417 ISO 10646 character set used internally for wide characters. */
1418 ctype->codeset_name = charset->code_set_name;