update from main archive 961001
[glibc.git] / locale / programs / ld-ctype.c
blob64f73d430f949008c08e1b0b912a2efd326cef7d
1 /* Copyright (C) 1995, 1996 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If
17 not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
20 #ifdef HAVE_CONFIG_H
21 # include <config.h>
22 #endif
24 #include <alloca.h>
25 #include <endian.h>
26 #include <limits.h>
27 #include <string.h>
29 #include "locales.h"
30 #include "localeinfo.h"
31 #include "langinfo.h"
32 #include "locfile-token.h"
33 #include "stringtrans.h"
35 /* Uncomment the following line in the production version. */
36 /* define NDEBUG 1 */
37 #include <assert.h>
40 void *xmalloc (size_t __n);
41 void *xcalloc (size_t __n, size_t __s);
42 void *xrealloc (void *__ptr, size_t __n);
45 /* The bit used for representing a special class. */
46 #define BITPOS(class) ((class) - tok_upper)
47 #define BIT(class) (1 << BITPOS (class))
49 #define ELEM(ctype, collection, idx, value) \
50 *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \
51 &ctype->collection##_act idx, value)
53 #define SWAPU32(w) \
54 (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24))
56 #define SWAPU16(w) \
57 ((((w) >> 8) & 0xff) | (((w) & 0xff) << 8))
60 /* To be compatible with former implementations we for now restrict
61 the number of bits for character classes to 16. When compatibility
62 is not necessary anymore increase the number to 32. */
63 #define char_class_t u_int16_t
64 #define CHAR_CLASS_TRANS SWAPU16
65 #define char_class32_t u_int32_t
66 #define CHAR_CLASS32_TRANS SWAPU32
69 /* The real definition of the struct for the LC_CTYPE locale. */
70 struct locale_ctype_t
72 unsigned int *charnames;
73 size_t charnames_max;
74 size_t charnames_act;
76 /* We will allow up to 8 * sizeof(u_int32_t) - 1 character classes. */
77 #define MAX_NR_CHARCLASS (8 * sizeof (u_int32_t) - 1)
78 int nr_charclass;
79 const char *classnames[MAX_NR_CHARCLASS];
80 unsigned long int current_class_mask;
81 unsigned int last_class_char;
82 u_int32_t *class_collection;
83 size_t class_collection_max;
84 size_t class_collection_act;
85 unsigned long int class_done;
87 /* If the following number ever turns out to be too small simply
88 increase it. But I doubt it will. --drepper@gnu */
89 #define MAX_NR_CHARMAP 16
90 const char *mapnames[MAX_NR_CHARMAP];
91 u_int32_t *map_collection[MAX_NR_CHARMAP];
92 u_int32_t map_collection_max[MAX_NR_CHARMAP];
93 u_int32_t map_collection_act[MAX_NR_CHARMAP];
94 size_t map_collection_nr;
95 size_t last_map_idx;
96 unsigned int from_map_char;
97 int toupper_done;
98 int tolower_done;
100 /* The arrays for the binary representation. */
101 u_int32_t plane_size;
102 u_int32_t plane_cnt;
103 char_class_t *ctype_b;
104 char_class32_t *ctype32_b;
105 u_int32_t *names_el;
106 u_int32_t *names_eb;
107 u_int32_t **map_eb;
108 u_int32_t **map_el;
109 u_int32_t *class_name_ptr;
110 u_int32_t *map_name_ptr;
111 unsigned char *width;
112 u_int32_t mb_cur_max;
113 const char *codeset_name;
117 /* Prototypes for local functions. */
118 static void ctype_class_newP (struct linereader *lr,
119 struct locale_ctype_t *ctype, const char *name);
120 static void ctype_map_newP (struct linereader *lr,
121 struct locale_ctype_t *ctype,
122 const char *name, struct charset_t *charset);
123 static u_int32_t *find_idx (struct locale_ctype_t *ctype, u_int32_t **table,
124 size_t *max, size_t *act, unsigned int idx);
125 static void set_class_defaults (struct locale_ctype_t *ctype,
126 struct charset_t *charset);
127 static void allocate_arrays (struct locale_ctype_t *ctype,
128 struct charset_t *charset);
131 void
132 ctype_startup (struct linereader *lr, struct localedef_t *locale,
133 struct charset_t *charset)
135 unsigned int cnt;
136 struct locale_ctype_t *ctype;
138 /* It is important that we always use UCS1 encoding for strings now. */
139 encoding_method = ENC_UCS1;
141 /* Allocate the needed room. */
142 locale->categories[LC_CTYPE].ctype = ctype =
143 (struct locale_ctype_t *) xmalloc (sizeof (struct locale_ctype_t));
145 /* We have no names seen yet. */
146 ctype->charnames_max = charset->mb_cur_max == 1 ? 256 : 512;
147 ctype->charnames =
148 (unsigned int *) xmalloc (ctype->charnames_max * sizeof (unsigned int));
149 for (cnt = 0; cnt < 256; ++cnt)
150 ctype->charnames[cnt] = cnt;
151 ctype->charnames_act = 256;
153 /* Fill character class information. */
154 ctype->nr_charclass = 0;
155 ctype->current_class_mask = 0;
156 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
157 /* The order of the following instructions determines the bit
158 positions! */
159 ctype_class_newP (lr, ctype, "upper");
160 ctype_class_newP (lr, ctype, "lower");
161 ctype_class_newP (lr, ctype, "alpha");
162 ctype_class_newP (lr, ctype, "digit");
163 ctype_class_newP (lr, ctype, "xdigit");
164 ctype_class_newP (lr, ctype, "space");
165 ctype_class_newP (lr, ctype, "print");
166 ctype_class_newP (lr, ctype, "graph");
167 ctype_class_newP (lr, ctype, "blank");
168 ctype_class_newP (lr, ctype, "cntrl");
169 ctype_class_newP (lr, ctype, "punct");
170 ctype_class_newP (lr, ctype, "alnum");
172 ctype->class_collection_max = charset->mb_cur_max == 1 ? 256 : 512;
173 ctype->class_collection
174 = (u_int32_t *) xmalloc (sizeof (unsigned long int)
175 * ctype->class_collection_max);
176 memset (ctype->class_collection, '\0',
177 sizeof (unsigned long int) * ctype->class_collection_max);
178 ctype->class_collection_act = 256;
180 /* Fill character map information. */
181 ctype->map_collection_nr = 0;
182 ctype->last_map_idx = MAX_NR_CHARMAP;
183 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
184 ctype_map_newP (lr, ctype, "toupper", charset);
185 ctype_map_newP (lr, ctype, "tolower", charset);
187 /* Fill first 256 entries in `toupper' and `tolower' arrays. */
188 for (cnt = 0; cnt < 256; ++cnt)
190 ctype->map_collection[0][cnt] = cnt;
191 ctype->map_collection[1][cnt] = cnt;
196 void
197 ctype_finish (struct localedef_t *locale, struct charset_t *charset)
199 /* See POSIX.2, table 2-6 for the meaning of the following table. */
200 #define NCLASS 12
201 static const struct
203 const char *name;
204 const char allow[NCLASS];
206 valid_table[NCLASS] =
208 /* The order is important. See token.h for more information.
209 M = Always, D = Default, - = Permitted, X = Mutually exclusive */
210 { "upper", "--MX-XDDXXX-" },
211 { "lower", "--MX-XDDXXX-" },
212 { "alpha", "---X-XDDXXX-" },
213 { "digit", "XXX--XDDXXX-" },
214 { "xdigit", "-----XDDXXX-" },
215 { "space", "XXXXX------X" },
216 { "print", "---------X--" },
217 { "graph", "---------X--" },
218 { "blank", "XXXXXM-----X" },
219 { "cntrl", "XXXXX-XX--XX" },
220 { "punct", "XXXXX-DD-X-X" },
221 { "alnum", "-----XDDXXX-" }
223 size_t cnt;
224 int cls1, cls2;
225 unsigned int space_value;
226 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
228 /* Set default value for classes not specified. */
229 set_class_defaults (ctype, charset);
231 /* Check according to table. */
232 for (cnt = 0; cnt < ctype->class_collection_max; ++cnt)
234 unsigned long int tmp;
236 tmp = ctype->class_collection[cnt];
237 if (tmp == 0)
238 continue;
240 for (cls1 = 0; cls1 < NCLASS; ++cls1)
241 if ((tmp & (1 << cls1)) != 0)
242 for (cls2 = 0; cls2 < NCLASS; ++cls2)
243 if (valid_table[cls1].allow[cls2] != '-')
245 int eq = (tmp & (1 << cls2)) != 0;
246 switch (valid_table[cls1].allow[cls2])
248 case 'M':
249 if (!eq)
251 char buf[17];
252 char *cp = buf;
253 unsigned int value;
255 value = ctype->charnames[cnt];
257 if ((value & 0xff000000) != 0)
258 cp += sprintf (cp, "\\%o", (value >> 24) & 0xff);
259 if ((value & 0xffff0000) != 0)
260 cp += sprintf (cp, "\\%o", (value >> 16) & 0xff);
261 if ((value & 0xffffff00) != 0)
262 cp += sprintf (cp, "\\%o", (value >> 8) & 0xff);
263 sprintf (cp, "\\%o", value & 0xff);
265 error (0, 0, _("\
266 character %s'%s' in class `%s' must be in class `%s'"), value > 256 ? "L" : "",
267 cp, valid_table[cls1].name,
268 valid_table[cls2].name);
270 break;
272 case 'X':
273 if (eq)
275 char buf[17];
276 char *cp = buf;
277 unsigned int value;
279 value = ctype->charnames[cnt];
281 if ((value & 0xff000000) != 0)
282 cp += sprintf (cp, "\\%o", value >> 24);
283 if ((value & 0xffff0000) != 0)
284 cp += sprintf (cp, "\\%o", (value >> 16) & 0xff);
285 if ((value & 0xffffff00) != 0)
286 cp += sprintf (cp, "\\%o", (value >> 8) & 0xff);
287 sprintf (cp, "\\%o", value & 0xff);
289 error (0, 0, _("\
290 character %s'%s' in class `%s' must not be in class `%s'"),
291 value > 256 ? "L" : "", cp,
292 valid_table[cls1].name, valid_table[cls2].name);
294 break;
296 case 'D':
297 ctype->class_collection[cnt] |= 1 << cls2;
298 break;
300 default:
301 error (5, 0, _("internal error in %s, line %u"),
302 __FUNCTION__, __LINE__);
307 /* ... and now test <SP> as a special case. */
308 space_value = charset_find_value (charset, "SP", 2);
309 if (space_value == ILLEGAL_CHAR_VALUE)
310 error (0, 0, _("character <SP> not defined in character map"));
311 else if ((cnt = BITPOS (tok_space),
312 (ELEM (ctype, class_collection, , space_value)
313 & BIT (tok_space)) == 0)
314 || (cnt = BITPOS (tok_blank),
315 (ELEM (ctype, class_collection, , space_value)
316 & BIT (tok_blank)) == 0))
317 error (0, 0, _("<SP> character not in class `%s'"),
318 valid_table[cnt].name);
319 else if ((cnt = BITPOS (tok_punct),
320 (ELEM (ctype, class_collection, , space_value)
321 & BIT (tok_punct)) != 0)
322 || (cnt = BITPOS (tok_graph),
323 (ELEM (ctype, class_collection, , space_value)
324 & BIT (tok_graph))
325 != 0))
326 error (0, 0, _("<SP> character must not be in class `%s'"),
327 valid_table[cnt].name);
328 else
329 ELEM (ctype, class_collection, , space_value) |= BIT (tok_print);
331 /* Now that the tests are done make sure the name array contains all
332 characters which are handled in the WIDTH section of the
333 character set definition file. */
334 if (charset->width_rules != NULL)
335 for (cnt = 0; cnt < charset->nwidth_rules; ++cnt)
337 size_t inner;
338 for (inner = charset->width_rules[cnt].from;
339 inner <= charset->width_rules[cnt].to; ++inner)
340 (void) find_idx (ctype, NULL, NULL, NULL, inner);
345 void
346 ctype_output (struct localedef_t *locale, struct charset_t *charset,
347 const char *output_path)
349 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
350 const size_t nelems = (_NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)
351 + 2 * (ctype->map_collection_nr - 2));
352 struct iovec iov[2 + nelems + ctype->nr_charclass
353 + ctype->map_collection_nr];
354 struct locale_file data;
355 u_int32_t idx[nelems];
356 size_t elem, cnt, offset, total;
359 if ((locale->binary & (1 << LC_CTYPE)) != 0)
361 iov[0].iov_base = ctype;
362 iov[0].iov_len = locale->len[LC_CTYPE];
364 write_locale_data (output_path, "LC_CTYPE", 1, iov);
366 return;
370 /* Now prepare the output: Find the sizes of the table we can use. */
371 allocate_arrays (ctype, charset);
373 data.magic = LIMAGIC (LC_CTYPE);
374 data.n = nelems;
375 iov[0].iov_base = (void *) &data;
376 iov[0].iov_len = sizeof (data);
378 iov[1].iov_base = (void *) idx;
379 iov[1].iov_len = sizeof (idx);
381 idx[0] = iov[0].iov_len + iov[1].iov_len;
382 offset = 0;
384 for (elem = 0; elem < nelems; ++elem)
386 if (elem < _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE))
387 switch (elem)
389 #define CTYPE_DATA(name, base, len) \
390 case _NL_ITEM_INDEX (name): \
391 iov[2 + elem + offset].iov_base = (base); \
392 iov[2 + elem + offset].iov_len = (len); \
393 if (elem + 1 < nelems) \
394 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; \
395 break
397 CTYPE_DATA (_NL_CTYPE_CLASS,
398 ctype->ctype_b,
399 (256 + 128) * sizeof (char_class_t));
401 CTYPE_DATA (_NL_CTYPE_TOUPPER_EB,
402 ctype->map_eb[0],
403 (ctype->plane_size * ctype->plane_cnt + 128)
404 * sizeof (u_int32_t));
405 CTYPE_DATA (_NL_CTYPE_TOLOWER_EB,
406 ctype->map_eb[1],
407 (ctype->plane_size * ctype->plane_cnt + 128)
408 * sizeof (u_int32_t));
410 CTYPE_DATA (_NL_CTYPE_TOUPPER_EL,
411 ctype->map_el[0],
412 (ctype->plane_size * ctype->plane_cnt + 128)
413 * sizeof (u_int32_t));
414 CTYPE_DATA (_NL_CTYPE_TOLOWER_EL,
415 ctype->map_el[1],
416 (ctype->plane_size * ctype->plane_cnt + 128)
417 * sizeof (u_int32_t));
419 CTYPE_DATA (_NL_CTYPE_CLASS32,
420 ctype->ctype32_b,
421 (ctype->plane_size * ctype->plane_cnt
422 * sizeof (char_class32_t)));
424 CTYPE_DATA (_NL_CTYPE_NAMES_EB,
425 ctype->names_eb, (ctype->plane_size * ctype->plane_cnt
426 * sizeof (u_int32_t)));
427 CTYPE_DATA (_NL_CTYPE_NAMES_EL,
428 ctype->names_el, (ctype->plane_size * ctype->plane_cnt
429 * sizeof (u_int32_t)));
431 CTYPE_DATA (_NL_CTYPE_HASH_SIZE,
432 &ctype->plane_size, sizeof (u_int32_t));
433 CTYPE_DATA (_NL_CTYPE_HASH_LAYERS,
434 &ctype->plane_cnt, sizeof (u_int32_t));
436 case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
437 /* The class name array. */
438 total = 0;
439 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt, ++offset)
441 iov[2 + elem + offset].iov_base
442 = (void *) ctype->classnames[cnt];
443 iov[2 + elem + offset].iov_len
444 = strlen (ctype->classnames[cnt]) + 1;
445 total += iov[2 + elem + offset].iov_len;
447 iov[2 + elem + offset].iov_base = (void *) "\0\0\0";
448 iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
449 total += 1 + (4 - ((total + 1) % 4));
451 if (elem + 1 < nelems)
452 idx[elem + 1] = idx[elem] + total;
453 break;
455 case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
456 /* The class name array. */
457 total = 0;
458 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt, ++offset)
460 iov[2 + elem + offset].iov_base
461 = (void *) ctype->mapnames[cnt];
462 iov[2 + elem + offset].iov_len
463 = strlen (ctype->mapnames[cnt]) + 1;
464 total += iov[2 + elem + offset].iov_len;
466 iov[2 + elem + offset].iov_base = (void *) "\0\0\0";
467 iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
468 total += 1 + (4 - ((total + 1) % 4));
470 if (elem + 1 < nelems)
471 idx[elem + 1] = idx[elem] + total;
472 break;
474 CTYPE_DATA (_NL_CTYPE_WIDTH,
475 ctype->width, ctype->plane_size * ctype->plane_cnt);
477 CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX,
478 &ctype->mb_cur_max, sizeof (u_int32_t));
480 case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
481 total = strlen (ctype->codeset_name) + 1;
482 if (total % 4 == 0)
483 iov[2 + elem + offset].iov_base = (char *) ctype->codeset_name;
484 else
486 iov[2 + elem + offset].iov_base = alloca ((total + 3) & ~3);
487 memcpy (iov[2 + elem + offset].iov_base, ctype->codeset_name,
488 total);
489 total = (total + 3) & ~3;
491 iov[2 + elem + offset].iov_len = total;
492 if (elem + 1 < nelems)
493 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
494 break;
496 default:
497 assert (! "unknown CTYPE element");
499 else
501 /* Handle extra maps. */
502 size_t nr = (elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)) >> 1;
504 if (((elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)) & 1) == 0)
505 iov[2 + elem + offset].iov_base = ctype->map_eb[nr];
506 else
507 iov[2 + elem + offset].iov_base = ctype->map_el[nr];
509 iov[2 + elem + offset].iov_len = ((ctype->plane_size
510 * ctype->plane_cnt + 128)
511 * sizeof (u_int32_t));
513 if (elem + 1 < nelems)
514 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
518 assert (2 + elem + offset == (nelems + ctype->nr_charclass
519 + ctype->map_collection_nr + 2));
521 write_locale_data (output_path, "LC_CTYPE", 2 + elem + offset, iov);
525 /* Character class handling. */
526 void
527 ctype_class_new (struct linereader *lr, struct localedef_t *locale,
528 enum token_t tok, struct token *code,
529 struct charset_t *charset)
531 ctype_class_newP (lr, locale->categories[LC_CTYPE].ctype,
532 code->val.str.start);
537 ctype_is_charclass (struct linereader *lr, struct localedef_t *locale,
538 const char *name)
540 int cnt;
542 for (cnt = 0; cnt < locale->categories[LC_CTYPE].ctype->nr_charclass; ++cnt)
543 if (strcmp (name, locale->categories[LC_CTYPE].ctype->classnames[cnt])
544 == 0)
545 return 1;
547 return 0;
551 void
552 ctype_class_start (struct linereader *lr, struct localedef_t *locale,
553 enum token_t tok, const char *str,
554 struct charset_t *charset)
556 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
557 int cnt;
559 switch (tok)
561 case tok_upper:
562 str = "upper";
563 break;
564 case tok_lower:
565 str = "lower";
566 break;
567 case tok_alpha:
568 str = "alpha";
569 break;
570 case tok_digit:
571 str = "digit";
572 break;
573 case tok_xdigit:
574 str = "xdigit";
575 break;
576 case tok_space:
577 str = "space";
578 break;
579 case tok_print:
580 str = "print";
581 break;
582 case tok_graph:
583 str = "graph";
584 break;
585 case tok_blank:
586 str = "blank";
587 break;
588 case tok_cntrl:
589 str = "cntrl";
590 break;
591 case tok_punct:
592 str = "punct";
593 break;
594 case tok_alnum:
595 str = "alnum";
596 break;
597 case tok_ident:
598 break;
599 default:
600 assert (! "illegal token as class name: should not happen");
603 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
604 if (strcmp (str, ctype->classnames[cnt]) == 0)
605 break;
607 if (cnt >= ctype->nr_charclass)
608 assert (! "unknown class in class definition: should not happen");
610 ctype->class_done |= BIT (tok);
612 ctype->current_class_mask = 1 << cnt;
613 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
617 void
618 ctype_class_from (struct linereader *lr, struct localedef_t *locale,
619 struct token *code, struct charset_t *charset)
621 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
622 unsigned int value;
624 value = charset_find_value (charset, code->val.str.start, code->val.str.len);
626 ctype->last_class_char = value;
628 if (value == ILLEGAL_CHAR_VALUE)
629 /* In the LC_CTYPE category it is no error when a character is
630 not found. This has to be ignored silently. */
631 return;
633 *find_idx (ctype, &ctype->class_collection, &ctype->class_collection_max,
634 &ctype->class_collection_act, value)
635 |= ctype->current_class_mask;
639 void
640 ctype_class_to (struct linereader *lr, struct localedef_t *locale,
641 struct token *code, struct charset_t *charset)
643 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
644 unsigned int value, cnt;
646 value = charset_find_value (charset, code->val.str.start, code->val.str.len);
648 assert (value >= ctype->last_class_char);
650 for (cnt = ctype->last_class_char + 1; cnt <= value; ++cnt)
651 *find_idx (ctype, &ctype->class_collection, &ctype->class_collection_max,
652 &ctype->class_collection_act, cnt)
653 |= ctype->current_class_mask;
655 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
659 void
660 ctype_class_end (struct linereader *lr, struct localedef_t *locale)
662 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
664 /* We have no special actions to perform here. */
665 ctype->current_class_mask = 0;
666 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
670 /* Character map handling. */
671 void
672 ctype_map_new (struct linereader *lr, struct localedef_t *locale,
673 enum token_t tok, struct token *code,
674 struct charset_t *charset)
676 ctype_map_newP (lr, locale->categories[LC_CTYPE].ctype,
677 code->val.str.start, charset);
682 ctype_is_charconv (struct linereader *lr, struct localedef_t *locale,
683 const char *name)
685 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
686 size_t cnt;
688 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
689 if (strcmp (name, ctype->mapnames[cnt]) == 0)
690 return 1;
692 return 0;
696 void
697 ctype_map_start (struct linereader *lr, struct localedef_t *locale,
698 enum token_t tok, const char *name, struct charset_t *charset)
700 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
701 size_t cnt;
703 switch (tok)
705 case tok_toupper:
706 ctype->toupper_done = 1;
707 name = "toupper";
708 break;
709 case tok_tolower:
710 ctype->tolower_done = 1;
711 name = "tolower";
712 break;
713 case tok_ident:
714 break;
715 default:
716 assert (! "unknown token in category `LC_CTYPE' should not happen");
719 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
720 if (strcmp (name, ctype->mapnames[cnt]) == 0)
721 break;
723 if (cnt == ctype->map_collection_nr)
724 assert (! "unknown token in category `LC_CTYPE' should not happen");
726 ctype->last_map_idx = cnt;
727 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
731 void
732 ctype_map_from (struct linereader *lr, struct localedef_t *locale,
733 struct token *code, struct charset_t *charset)
735 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
736 unsigned int value;
738 value = charset_find_value (charset, code->val.str.start, code->val.str.len);
740 if (value == ILLEGAL_CHAR_VALUE)
741 /* In the LC_CTYPE category it is no error when a character is
742 not found. This has to be ignored silently. */
743 return;
745 assert (ctype->last_map_idx < ctype->map_collection_nr);
747 ctype->from_map_char = value;
751 void
752 ctype_map_to (struct linereader *lr, struct localedef_t *locale,
753 struct token *code, struct charset_t *charset)
755 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
756 unsigned int value;
758 value = charset_find_value (charset, code->val.str.start, code->val.str.len);
760 if (ctype->from_map_char == ILLEGAL_CHAR_VALUE
761 || value == ILLEGAL_CHAR_VALUE)
763 /* In the LC_CTYPE category it is no error when a character is
764 not found. This has to be ignored silently. */
765 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
766 return;
769 *find_idx (ctype, &ctype->map_collection[ctype->last_map_idx],
770 &ctype->map_collection_max[ctype->last_map_idx],
771 &ctype->map_collection_act[ctype->last_map_idx],
772 ctype->from_map_char) = value;
774 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
778 void
779 ctype_map_end (struct linereader *lr, struct localedef_t *locale)
781 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
783 ctype->last_map_idx = MAX_NR_CHARMAP;
784 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
788 /* Local functions. */
789 static void
790 ctype_class_newP (struct linereader *lr, struct locale_ctype_t *ctype,
791 const char *name)
793 int cnt;
795 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
796 if (strcmp (ctype->classnames[cnt], name) == 0)
797 break;
799 if (cnt < ctype->nr_charclass)
801 lr_error (lr, _("character class `%s' already defined"));
802 return;
805 if (ctype->nr_charclass == MAX_NR_CHARCLASS)
806 /* Exit code 2 is prescribed in P1003.2b. */
807 error (2, 0, _("\
808 implementation limit: no more than %d character classes allowed"),
809 MAX_NR_CHARCLASS);
811 ctype->classnames[ctype->nr_charclass++] = name;
815 static void
816 ctype_map_newP (struct linereader *lr, struct locale_ctype_t *ctype,
817 const char *name, struct charset_t *charset)
819 size_t max_chars = 0;
820 int cnt;
822 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
824 if (strcmp (ctype->mapnames[cnt], name) == 0)
825 break;
827 if (max_chars < ctype->map_collection_max[cnt])
828 max_chars = ctype->map_collection_max[cnt];
831 if (cnt < ctype->map_collection_nr)
833 lr_error (lr, _("character map `%s' already defined"));
834 return;
837 if (ctype->map_collection_nr == MAX_NR_CHARMAP)
838 /* Exit code 2 is prescribed in P1003.2b. */
839 error (2, 0, _("\
840 implementation limit: no more than %d character maps allowed"),
841 MAX_NR_CHARMAP);
843 ctype->mapnames[cnt] = name;
845 if (max_chars == 0)
846 ctype->map_collection_max[cnt] = charset->mb_cur_max == 1 ? 256 : 512;
847 else
848 ctype->map_collection_max[cnt] = max_chars;
850 ctype->map_collection[cnt] = (u_int32_t *)
851 xmalloc (sizeof (u_int32_t) * ctype->map_collection_max[cnt]);
852 memset (ctype->map_collection[cnt], '\0',
853 sizeof (u_int32_t) * ctype->map_collection_max[cnt]);
854 ctype->map_collection_act[cnt] = 256;
856 ++ctype->map_collection_nr;
860 /* We have to be prepared that TABLE, MAX, and ACT can be NULL. This
861 is possible if we only want ot extend the name array. */
862 static u_int32_t *
863 find_idx (struct locale_ctype_t *ctype, u_int32_t **table, size_t *max,
864 size_t *act, unsigned int idx)
866 size_t cnt;
868 if (idx < 256)
869 return table == NULL ? NULL : &(*table)[idx];
871 for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
872 if (ctype->charnames[cnt] == idx)
873 break;
875 /* We have to distinguish two cases: the names is found or not. */
876 if (cnt == ctype->charnames_act)
878 /* Extend the name array. */
879 if (ctype->charnames_act == ctype->charnames_max)
881 ctype->charnames_max *= 2;
882 ctype->charnames = (unsigned int *)
883 xrealloc (ctype->charnames,
884 sizeof (unsigned int) * ctype->charnames_max);
886 ctype->charnames[ctype->charnames_act++] = idx;
889 if (table == NULL)
890 /* We have done everything we are asked to do. */
891 return NULL;
893 if (cnt >= *act)
895 if (cnt >= *max)
897 size_t old_max = *max;
899 *max *= 2;
900 while (*max <= cnt);
902 *table =
903 (u_int32_t *) xrealloc (*table, *max * sizeof (unsigned long int));
904 memset (&(*table)[old_max], '\0',
905 (*max - old_max) * sizeof (u_int32_t));
908 (*table)[cnt] = 0;
909 *act = cnt;
912 return &(*table)[cnt];
916 static void
917 set_class_defaults (struct locale_ctype_t *ctype, struct charset_t *charset)
919 /* These function defines the default values for the classes and conversions
920 according to POSIX.2 2.5.2.1.
921 It may seem that the order of these if-blocks is arbitrary but it is NOT.
922 Don't move them unless you know what you do! */
924 void set_default (int bit, int from, int to)
926 char tmp[2];
927 int ch;
928 /* Define string. */
929 strcpy (tmp, "?");
931 for (ch = from; ch <= to; ++ch)
933 unsigned int value;
934 tmp[0] = ch;
936 value = charset_find_value (charset, tmp, 1);
937 if (value == ILLEGAL_CHAR_VALUE)
939 error (0, 0, _("\
940 character `%s' not defined while needed as default value"),
941 tmp);
942 continue;
944 else
945 ELEM (ctype, class_collection, , value) |= bit;
949 /* Set default values if keyword was not present. */
950 if ((ctype->class_done & BIT (tok_upper)) == 0)
951 /* "If this keyword [lower] is not specified, the lowercase letters
952 `A' through `Z', ..., shall automatically belong to this class,
953 with implementation defined character values." [P1003.2, 2.5.2.1] */
954 set_default (BIT (tok_upper), 'A', 'Z');
956 if ((ctype->class_done & BIT (tok_lower)) == 0)
957 /* "If this keyword [lower] is not specified, the lowercase letters
958 `a' through `z', ..., shall automatically belong to this class,
959 with implementation defined character values." [P1003.2, 2.5.2.1] */
960 set_default (BIT (tok_lower), 'a', 'z');
962 if ((ctype->class_done & BIT (tok_alpha)) == 0)
964 /* Table 2-6 in P1003.2 says that characters in class `upper' or
965 class `lower' *must* be in class `alpha'. */
966 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
967 size_t cnt;
969 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
970 if ((ctype->class_collection[cnt] & mask) != 0)
971 ctype->class_collection[cnt] |= BIT (tok_alpha);
974 if ((ctype->class_done & BIT (tok_digit)) == 0)
975 /* "If this keyword [digit] is not specified, the digits `0' through
976 `9', ..., shall automatically belong to this class, with
977 implementation-defined character values." [P1003.2, 2.5.2.1] */
978 set_default (BIT (tok_digit), '0', '9');
980 /* "Only characters specified for the `alpha' and `digit' keyword
981 shall be specified. Characters specified for the keyword `alpha'
982 and `digit' are automatically included in this class. */
984 unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
985 size_t cnt;
987 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
988 if ((ctype->class_collection[cnt] & mask) != 0)
989 ctype->class_collection[cnt] |= BIT (tok_alnum);
992 if ((ctype->class_done & BIT (tok_space)) == 0)
993 /* "If this keyword [space] is not specified, the characters <space>,
994 <form-feed>, <newline>, <carriage-return>, <tab>, and
995 <vertical-tab>, ..., shall automatically belong to this class,
996 with implementation-defined character values." [P1003.2, 2.5.2.1] */
998 unsigned int value;
1000 value = charset_find_value (charset, "space", 5);
1001 if (value == ILLEGAL_CHAR_VALUE)
1002 error (0, 0, _("\
1003 character `%s' not defined while needed as default value"),
1004 "<space>");
1005 else
1006 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1008 value = charset_find_value (charset, "form-feed", 9);
1009 if (value == ILLEGAL_CHAR_VALUE)
1010 error (0, 0, _("\
1011 character `%s' not defined while needed as default value"),
1012 "<form-feed>");
1013 else
1014 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1016 value = charset_find_value (charset, "newline", 7);
1017 if (value == ILLEGAL_CHAR_VALUE)
1018 error (0, 0, _("\
1019 character `%s' not defined while needed as default value"),
1020 "<newline>");
1021 else
1022 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1024 value = charset_find_value (charset, "carriage-return", 15);
1025 if (value == ILLEGAL_CHAR_VALUE)
1026 error (0, 0, _("\
1027 character `%s' not defined while needed as default value"),
1028 "<carriage-return>");
1029 else
1030 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1032 value = charset_find_value (charset, "tab", 3);
1033 if (value == ILLEGAL_CHAR_VALUE)
1034 error (0, 0, _("\
1035 character `%s' not defined while needed as default value"),
1036 "<tab>");
1037 else
1038 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1040 value = charset_find_value (charset, "vertical-tab", 12);
1041 if (value == ILLEGAL_CHAR_VALUE)
1042 error (0, 0, _("\
1043 character `%s' not defined while needed as default value"),
1044 "<vertical-tab>");
1045 else
1046 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1049 if ((ctype->class_done & BIT (tok_xdigit)) == 0)
1050 /* "If this keyword is not specified, the digits `0' to `9', the
1051 uppercase letters `A' through `F', and the lowercase letters `a'
1052 through `f', ..., shell automatically belong to this class, with
1053 implementation defined character values." [P1003.2, 2.5.2.1] */
1055 set_default (BIT (tok_xdigit), '0', '9');
1056 set_default (BIT (tok_xdigit), 'A', 'F');
1057 set_default (BIT (tok_xdigit), 'a', 'f');
1060 if ((ctype->class_done & BIT (tok_blank)) == 0)
1061 /* "If this keyword [blank] is unspecified, the characters <space> and
1062 <tab> shall belong to this character class." [P1003.2, 2.5.2.1] */
1064 unsigned int value;
1066 value = charset_find_value (charset, "space", 5);
1067 if (value == ILLEGAL_CHAR_VALUE)
1068 error (0, 0, _("\
1069 character `%s' not defined while needed as default value"),
1070 "<space>");
1071 else
1072 ELEM (ctype, class_collection, , value) |= BIT (tok_blank);
1074 value = charset_find_value (charset, "tab", 3);
1075 if (value == ILLEGAL_CHAR_VALUE)
1076 error (0, 0, _("\
1077 character `%s' not defined while needed as default value"),
1078 "<tab>");
1079 else
1080 ELEM (ctype, class_collection, , value) |= BIT (tok_blank);
1083 if ((ctype->class_done & BIT (tok_graph)) == 0)
1084 /* "If this keyword [graph] is not specified, characters specified for
1085 the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
1086 shall belong to this character class." [P1003.2, 2.5.2.1] */
1088 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
1089 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
1090 size_t cnt;
1092 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
1093 if ((ctype->class_collection[cnt] & mask) != 0)
1094 ctype->class_collection[cnt] |= BIT (tok_graph);
1097 if ((ctype->class_done & BIT (tok_print)) == 0)
1098 /* "If this keyword [print] is not provided, characters specified for
1099 the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
1100 and the <space> character shall belong to this character class."
1101 [P1003.2, 2.5.2.1] */
1103 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
1104 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
1105 size_t cnt;
1106 int space;
1108 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
1109 if ((ctype->class_collection[cnt] & mask) != 0)
1110 ctype->class_collection[cnt] |= BIT (tok_print);
1112 space = charset_find_value (charset, "space", 5);
1113 if (space == ILLEGAL_CHAR_VALUE)
1114 error (0, 0, _("\
1115 character `%s' not defined while needed as default value"),
1116 "<space>");
1117 else
1118 ELEM (ctype, class_collection, , space) |= BIT (tok_print);
1121 if (ctype->toupper_done == 0)
1122 /* "If this keyword [toupper] is not spcified, the lowercase letters
1123 `a' through `z', and their corresponding uppercase letters `A' to
1124 `Z', ..., shall automatically be included, with implementation-
1125 defined character values." [P1003.2, 2.5.2.1] */
1127 char tmp[4];
1128 int ch;
1130 strcpy (tmp, "<?>");
1132 for (ch = 'a'; ch <= 'z'; ++ch)
1134 unsigned int value_from, value_to;
1136 tmp[1] = (char) ch;
1138 value_from = charset_find_value (charset, &tmp[1], 1);
1139 if (value_from == ILLEGAL_CHAR_VALUE)
1141 error (0, 0, _("\
1142 character `%c' not defined while needed as default value"),
1143 tmp);
1144 continue;
1147 /* This conversion is implementation defined. */
1148 tmp[1] = (char) (ch + ('A' - 'a'));
1149 value_to = charset_find_value (charset, &tmp[1], 1);
1150 if (value_to == -1)
1152 error (0, 0, _("\
1153 character `%s' not defined while needed as default value"),
1154 tmp);
1155 continue;
1158 /* The index [0] is determined by the order of the
1159 `ctype_map_newP' calls in `ctype_startup'. */
1160 ELEM (ctype, map_collection, [0], value_from) = value_to;
1164 if (ctype->tolower_done == 0)
1165 /* "If this keyword [tolower] is not specified, the mapping shall be
1166 the reverse mapping of the one specified to `toupper'." [P1003.2] */
1168 size_t cnt;
1170 for (cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
1171 if (ctype->map_collection[0][cnt] != 0)
1172 ELEM (ctype, map_collection, [1],
1173 ctype->map_collection[0][cnt])
1174 = ctype->charnames[cnt];
1179 static void
1180 allocate_arrays (struct locale_ctype_t *ctype, struct charset_t *charset)
1182 size_t idx;
1184 /* First we have to decide how we organize the arrays. It is easy for
1185 a one-byte character set. But multi-byte character set cannot be
1186 stored flat because they might be sparsly used. So we determine an
1187 optimal hashing function for the used characters.
1189 We use a very trivial hashing function to store the sparse table.
1190 CH % TABSIZE is used as an index. To solve multiple hits we have
1191 N planes. This gurantees a fixed search time for a character [N
1192 / 2]. In the following code we determine the minmum value for
1193 TABSIZE * N, where TABSIZE >= 256. */
1194 size_t min_total = UINT_MAX;
1195 size_t act_size = 256;
1197 fputs (_("\
1198 Computing table size for character classes might take a while..."),
1199 stderr);
1201 while (act_size < min_total)
1203 size_t cnt[act_size];
1204 size_t act_planes = 1;
1206 memset (cnt, '\0', sizeof cnt);
1208 for (idx = 0; idx < 256; ++idx)
1209 cnt[idx] = 1;
1211 for (idx = 0; idx < ctype->charnames_act; ++idx)
1212 if (ctype->charnames[idx] >= 256)
1214 size_t nr = ctype->charnames[idx] % act_size;
1216 if (++cnt[nr] > act_planes)
1218 act_planes = cnt[nr];
1219 if (act_size * act_planes >= min_total)
1220 break;
1224 if (act_size * act_planes < min_total)
1226 min_total = act_size * act_planes;
1227 ctype->plane_size = act_size;
1228 ctype->plane_cnt = act_planes;
1231 ++act_size;
1234 fprintf (stderr, _(" done\n"));
1237 #if __BYTE_ORDER == __LITTLE_ENDIAN
1238 # define NAMES_B1 ctype->names_el
1239 # define NAMES_B2 ctype->names_eb
1240 #else
1241 # define NAMES_B1 ctype->names_eb
1242 # define NAMES_B2 ctype->names_el
1243 #endif
1245 ctype->names_eb = (u_int32_t *) xcalloc (ctype->plane_size
1246 * ctype->plane_cnt,
1247 sizeof (u_int32_t));
1248 ctype->names_el = (u_int32_t *) xcalloc (ctype->plane_size
1249 * ctype->plane_cnt,
1250 sizeof (u_int32_t));
1252 for (idx = 1; idx < 256; ++idx)
1253 NAMES_B1[idx] = idx;
1255 /* Trick: change the 0th entry's name to 1 to mark the cell occupied. */
1256 NAMES_B1[0] = 1;
1258 for (idx = 256; idx < ctype->charnames_act; ++idx)
1260 size_t nr = (ctype->charnames[idx] % ctype->plane_size);
1261 size_t depth = 0;
1263 while (NAMES_B1[nr + depth * ctype->plane_size])
1264 ++depth;
1265 assert (depth < ctype->plane_cnt);
1267 NAMES_B1[nr + depth * ctype->plane_size] = ctype->charnames[idx];
1269 /* Now for faster access remember the index in the NAMES_B array. */
1270 ctype->charnames[idx] = nr + depth * ctype->plane_size;
1272 NAMES_B1[0] = 0;
1274 for (idx = 0; idx < ctype->plane_size * ctype->plane_cnt; ++idx)
1275 NAMES_B2[idx] = SWAPU32 (NAMES_B1[idx]);
1278 /* You wonder about this amount of memory? This is only because some
1279 users do not manage to address the array with unsigned values or
1280 data types with range >= 256. '\200' would result in the array
1281 index -128. To help these poor people we duplicate the entries for
1282 128 up to 255 below the entry for \0. */
1283 ctype->ctype_b = (char_class_t *) xcalloc (256 + 128,
1284 sizeof (char_class_t));
1285 ctype->ctype32_b = (char_class32_t *) xcalloc (ctype->plane_size
1286 * ctype->plane_cnt,
1287 sizeof (char_class32_t));
1289 /* Fill in the character class information. */
1290 #if __BYTE_ORDER == __LITTLE_ENDIAN
1291 # define TRANS(w) CHAR_CLASS_TRANS (w)
1292 # define TRANS32(w) CHAR_CLASS32_TRANS (w)
1293 #else
1294 # define TRANS(w) (w)
1295 # define TRANS32(w) (w)
1296 #endif
1298 for (idx = 0; idx < ctype->class_collection_act; ++idx)
1299 if (ctype->charnames[idx] < 256)
1300 ctype->ctype_b[128 + ctype->charnames[idx]]
1301 = TRANS (ctype->class_collection[idx]);
1303 /* Mirror first 127 entries. We must take care that entry -1 is not
1304 mirrored because EOF == -1. */
1305 for (idx = 0; idx < 127; ++idx)
1306 ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
1308 /* The 32 bit array contains all characters. */
1309 for (idx = 0; idx < ctype->class_collection_act; ++idx)
1310 ctype->ctype32_b[ctype->charnames[idx]]
1311 = TRANS32 (ctype->class_collection[idx]);
1313 /* Room for table of mappings. */
1314 ctype->map_eb = (u_int32_t **) xmalloc (ctype->map_collection_nr
1315 * sizeof (u_int32_t *));
1316 ctype->map_el = (u_int32_t **) xmalloc (ctype->map_collection_nr
1317 * sizeof (u_int32_t *));
1319 /* Fill in all mappings. */
1320 for (idx = 0; idx < ctype->map_collection_nr; ++idx)
1322 unsigned int idx2;
1324 /* Allocate table. */
1325 ctype->map_eb[idx] = (u_int32_t *) xmalloc ((ctype->plane_size
1326 * ctype->plane_cnt + 128)
1327 * sizeof (u_int32_t));
1328 ctype->map_el[idx] = (u_int32_t *) xmalloc ((ctype->plane_size
1329 * ctype->plane_cnt + 128)
1330 * sizeof (u_int32_t));
1332 #if __BYTE_ORDER == __LITTLE_ENDIAN
1333 # define MAP_B1 ctype->map_el
1334 # define MAP_B2 ctype->map_eb
1335 #else
1336 # define MAP_B1 ctype->map_eb
1337 # define MAP_B2 ctype->map_el
1338 #endif
1340 /* Copy default value (identity mapping). */
1341 memcpy (&MAP_B1[idx][128], NAMES_B1,
1342 ctype->plane_size * ctype->plane_cnt * sizeof (u_int32_t));
1344 /* Copy values from collection. */
1345 for (idx2 = 0; idx2 < ctype->map_collection_act[idx]; ++idx2)
1346 if (ctype->map_collection[idx][idx2] != 0)
1347 MAP_B1[idx][128 + ctype->charnames[idx2]] =
1348 ctype->map_collection[idx][idx2];
1350 /* Mirror first 127 entries. We must take care not to map entry
1351 -1 because EOF == -1. */
1352 for (idx2 = 0; idx2 < 127; ++idx2)
1353 MAP_B1[idx][idx2] = MAP_B1[idx][256 + idx2];
1355 /* EOF must map to EOF. */
1356 MAP_B1[idx][127] = EOF;
1358 /* And now the other byte order. */
1359 for (idx2 = 0; idx2 < ctype->plane_size * ctype->plane_cnt + 128; ++idx2)
1360 MAP_B2[idx][idx2] = SWAPU32 (MAP_B1[idx][idx2]);
1363 /* Extra array for class and map names. */
1364 ctype->class_name_ptr = (u_int32_t *) xmalloc (ctype->nr_charclass
1365 * sizeof (u_int32_t));
1366 ctype->map_name_ptr = (u_int32_t *) xmalloc (ctype->map_collection_nr
1367 * sizeof (u_int32_t));
1369 /* Array for width information. Because the expected width are very
1370 small we use only one single byte. This save space and we need
1371 not provide the information twice with both endianesses. */
1372 ctype->width = (unsigned char *) xmalloc (ctype->plane_size
1373 * ctype->plane_cnt);
1374 /* Initialize with default width value. */
1375 memset (ctype->width, charset->width_default,
1376 ctype->plane_size * ctype->plane_cnt);
1377 if (charset->width_rules != NULL)
1379 size_t cnt;
1381 for (cnt = 0; cnt < charset->nwidth_rules; ++cnt)
1382 if (charset->width_rules[cnt].width != charset->width_default)
1383 for (idx = charset->width_rules[cnt].from;
1384 idx <= charset->width_rules[cnt].to; ++idx)
1386 size_t nr = idx % ctype->plane_size;
1387 size_t depth = 0;
1389 while (NAMES_B1[nr + depth * ctype->plane_size] != nr)
1390 ++depth;
1391 assert (depth < ctype->plane_cnt);
1393 ctype->width[nr + depth * ctype->plane_size]
1394 = charset->width_rules[cnt].width;
1398 /* Compute MB_CUR_MAX. Please note the value mb_cur_max in the
1399 character set definition gives the number of bytes in the wide
1400 character representation. We compute the number of bytes used
1401 for the UTF-8 encoded form. */
1402 ctype->mb_cur_max = ((int []) { 2, 3, 5, 6 }) [charset->mb_cur_max - 1];
1404 /* We need the name of the currently used 8-bit character set to
1405 make correct conversion between this 8-bit representation and the
1406 ISO 10646 character set used internally for wide characters. */
1407 ctype->codeset_name = charset->code_set_name;