1 /* Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
30 #include "localeinfo.h"
32 #include "locfile-token.h"
33 #include "stringtrans.h"
35 /* Uncomment the following line in the production version. */
40 void *xmalloc (size_t __n
);
41 void *xcalloc (size_t __n
, size_t __s
);
42 void *xrealloc (void *__ptr
, size_t __n
);
45 /* The bit used for representing a special class. */
46 #define BITPOS(class) ((class) - tok_upper)
47 #define BIT(class) (1 << BITPOS (class))
49 #define ELEM(ctype, collection, idx, value) \
50 *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \
51 &ctype->collection##_act idx, value)
54 (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24))
57 ((((w) >> 8) & 0xff) | (((w) & 0xff) << 8))
60 /* To be compatible with former implementations we for now restrict
61 the number of bits for character classes to 16. When compatibility
62 is not necessary anymore increase the number to 32. */
63 #define char_class_t u_int16_t
64 #define CHAR_CLASS_TRANS SWAPU16
65 #define char_class32_t u_int32_t
66 #define CHAR_CLASS32_TRANS SWAPU32
69 /* The real definition of the struct for the LC_CTYPE locale. */
72 unsigned int *charnames
;
76 /* We will allow up to 8 * sizeof(u_int32_t) - 1 character classes. */
77 #define MAX_NR_CHARCLASS (8 * sizeof (u_int32_t) - 1)
79 const char *classnames
[MAX_NR_CHARCLASS
];
80 unsigned long int current_class_mask
;
81 unsigned int last_class_char
;
82 u_int32_t
*class_collection
;
83 size_t class_collection_max
;
84 size_t class_collection_act
;
85 unsigned long int class_done
;
87 /* If the following number ever turns out to be too small simply
88 increase it. But I doubt it will. --drepper@gnu */
89 #define MAX_NR_CHARMAP 16
90 const char *mapnames
[MAX_NR_CHARMAP
];
91 u_int32_t
*map_collection
[MAX_NR_CHARMAP
];
92 size_t map_collection_max
[MAX_NR_CHARMAP
];
93 size_t map_collection_act
[MAX_NR_CHARMAP
];
94 size_t map_collection_nr
;
96 unsigned int from_map_char
;
100 /* The arrays for the binary representation. */
101 u_int32_t plane_size
;
103 char_class_t
*ctype_b
;
104 char_class32_t
*ctype32_b
;
109 u_int32_t
*class_name_ptr
;
110 u_int32_t
*map_name_ptr
;
111 unsigned char *width
;
112 u_int32_t mb_cur_max
;
113 const char *codeset_name
;
117 /* Prototypes for local functions. */
118 static void ctype_class_newP (struct linereader
*lr
,
119 struct locale_ctype_t
*ctype
, const char *name
);
120 static void ctype_map_newP (struct linereader
*lr
,
121 struct locale_ctype_t
*ctype
,
122 const char *name
, struct charset_t
*charset
);
123 static u_int32_t
*find_idx (struct locale_ctype_t
*ctype
, u_int32_t
**table
,
124 size_t *max
, size_t *act
, unsigned int idx
);
125 static void set_class_defaults (struct locale_ctype_t
*ctype
,
126 struct charset_t
*charset
);
127 static void allocate_arrays (struct locale_ctype_t
*ctype
,
128 struct charset_t
*charset
);
132 ctype_startup (struct linereader
*lr
, struct localedef_t
*locale
,
133 struct charset_t
*charset
)
136 struct locale_ctype_t
*ctype
;
138 /* It is important that we always use UCS1 encoding for strings now. */
139 encoding_method
= ENC_UCS1
;
141 /* Allocate the needed room. */
142 locale
->categories
[LC_CTYPE
].ctype
= ctype
=
143 (struct locale_ctype_t
*) xmalloc (sizeof (struct locale_ctype_t
));
145 /* We have no names seen yet. */
146 ctype
->charnames_max
= charset
->mb_cur_max
== 1 ? 256 : 512;
148 (unsigned int *) xmalloc (ctype
->charnames_max
* sizeof (unsigned int));
149 for (cnt
= 0; cnt
< 256; ++cnt
)
150 ctype
->charnames
[cnt
] = cnt
;
151 ctype
->charnames_act
= 256;
153 /* Fill character class information. */
154 ctype
->nr_charclass
= 0;
155 ctype
->current_class_mask
= 0;
156 ctype
->last_class_char
= ILLEGAL_CHAR_VALUE
;
157 /* The order of the following instructions determines the bit
159 ctype_class_newP (lr
, ctype
, "upper");
160 ctype_class_newP (lr
, ctype
, "lower");
161 ctype_class_newP (lr
, ctype
, "alpha");
162 ctype_class_newP (lr
, ctype
, "digit");
163 ctype_class_newP (lr
, ctype
, "xdigit");
164 ctype_class_newP (lr
, ctype
, "space");
165 ctype_class_newP (lr
, ctype
, "print");
166 ctype_class_newP (lr
, ctype
, "graph");
167 ctype_class_newP (lr
, ctype
, "blank");
168 ctype_class_newP (lr
, ctype
, "cntrl");
169 ctype_class_newP (lr
, ctype
, "punct");
170 ctype_class_newP (lr
, ctype
, "alnum");
172 ctype
->class_collection_max
= charset
->mb_cur_max
== 1 ? 256 : 512;
173 ctype
->class_collection
174 = (u_int32_t
*) xmalloc (sizeof (unsigned long int)
175 * ctype
->class_collection_max
);
176 memset (ctype
->class_collection
, '\0',
177 sizeof (unsigned long int) * ctype
->class_collection_max
);
178 ctype
->class_collection_act
= 256;
180 /* Fill character map information. */
181 ctype
->map_collection_nr
= 0;
182 ctype
->last_map_idx
= MAX_NR_CHARMAP
;
183 ctype
->from_map_char
= ILLEGAL_CHAR_VALUE
;
184 ctype_map_newP (lr
, ctype
, "toupper", charset
);
185 ctype_map_newP (lr
, ctype
, "tolower", charset
);
187 /* Fill first 256 entries in `toupper' and `tolower' arrays. */
188 for (cnt
= 0; cnt
< 256; ++cnt
)
190 ctype
->map_collection
[0][cnt
] = cnt
;
191 ctype
->map_collection
[1][cnt
] = cnt
;
197 ctype_finish (struct localedef_t
*locale
, struct charset_t
*charset
)
199 /* See POSIX.2, table 2-6 for the meaning of the following table. */
204 const char allow
[NCLASS
];
206 valid_table
[NCLASS
] =
208 /* The order is important. See token.h for more information.
209 M = Always, D = Default, - = Permitted, X = Mutually exclusive */
210 { "upper", "--MX-XDDXXX-" },
211 { "lower", "--MX-XDDXXX-" },
212 { "alpha", "---X-XDDXXX-" },
213 { "digit", "XXX--XDDXXX-" },
214 { "xdigit", "-----XDDXXX-" },
215 { "space", "XXXXX------X" },
216 { "print", "---------X--" },
217 { "graph", "---------X--" },
218 { "blank", "XXXXXM-----X" },
219 { "cntrl", "XXXXX-XX--XX" },
220 { "punct", "XXXXX-DD-X-X" },
221 { "alnum", "-----XDDXXX-" }
225 unsigned int space_value
;
226 struct locale_ctype_t
*ctype
= locale
->categories
[LC_CTYPE
].ctype
;
228 /* Set default value for classes not specified. */
229 set_class_defaults (ctype
, charset
);
231 /* Check according to table. */
232 for (cnt
= 0; cnt
< ctype
->class_collection_max
; ++cnt
)
234 unsigned long int tmp
;
236 tmp
= ctype
->class_collection
[cnt
];
240 for (cls1
= 0; cls1
< NCLASS
; ++cls1
)
241 if ((tmp
& (1 << cls1
)) != 0)
242 for (cls2
= 0; cls2
< NCLASS
; ++cls2
)
243 if (valid_table
[cls1
].allow
[cls2
] != '-')
245 int eq
= (tmp
& (1 << cls2
)) != 0;
246 switch (valid_table
[cls1
].allow
[cls2
])
255 value
= ctype
->charnames
[cnt
];
257 if ((value
& 0xff000000) != 0)
258 cp
+= sprintf (cp
, "\\%o", (value
>> 24) & 0xff);
259 if ((value
& 0xffff0000) != 0)
260 cp
+= sprintf (cp
, "\\%o", (value
>> 16) & 0xff);
261 if ((value
& 0xffffff00) != 0)
262 cp
+= sprintf (cp
, "\\%o", (value
>> 8) & 0xff);
263 sprintf (cp
, "\\%o", value
& 0xff);
267 character %s'%s' in class `%s' must be in class `%s'"), value
> 256 ? "L" : "",
268 cp
, valid_table
[cls1
].name
,
269 valid_table
[cls2
].name
);
280 value
= ctype
->charnames
[cnt
];
282 if ((value
& 0xff000000) != 0)
283 cp
+= sprintf (cp
, "\\%o", value
>> 24);
284 if ((value
& 0xffff0000) != 0)
285 cp
+= sprintf (cp
, "\\%o", (value
>> 16) & 0xff);
286 if ((value
& 0xffffff00) != 0)
287 cp
+= sprintf (cp
, "\\%o", (value
>> 8) & 0xff);
288 sprintf (cp
, "\\%o", value
& 0xff);
292 character %s'%s' in class `%s' must not be in class `%s'"),
293 value
> 256 ? "L" : "", cp
,
294 valid_table
[cls1
].name
,
295 valid_table
[cls2
].name
);
300 ctype
->class_collection
[cnt
] |= 1 << cls2
;
304 error (5, 0, _("internal error in %s, line %u"),
305 __FUNCTION__
, __LINE__
);
310 /* ... and now test <SP> as a special case. */
311 space_value
= charset_find_value (charset
, "SP", 2);
312 if ((wchar_t) space_value
== ILLEGAL_CHAR_VALUE
&& !be_quiet
)
313 error (0, 0, _("character <SP> not defined in character map"));
314 else if (((cnt
= BITPOS (tok_space
),
315 (ELEM (ctype
, class_collection
, , space_value
)
316 & BIT (tok_space
)) == 0)
317 || (cnt
= BITPOS (tok_blank
),
318 (ELEM (ctype
, class_collection
, , space_value
)
319 & BIT (tok_blank
)) == 0))
321 error (0, 0, _("<SP> character not in class `%s'"),
322 valid_table
[cnt
].name
);
323 else if (((cnt
= BITPOS (tok_punct
),
324 (ELEM (ctype
, class_collection
, , space_value
)
325 & BIT (tok_punct
)) != 0)
326 || (cnt
= BITPOS (tok_graph
),
327 (ELEM (ctype
, class_collection
, , space_value
)
331 error (0, 0, _("<SP> character must not be in class `%s'"),
332 valid_table
[cnt
].name
);
334 ELEM (ctype
, class_collection
, , space_value
) |= BIT (tok_print
);
336 /* Now that the tests are done make sure the name array contains all
337 characters which are handled in the WIDTH section of the
338 character set definition file. */
339 if (charset
->width_rules
!= NULL
)
340 for (cnt
= 0; cnt
< charset
->nwidth_rules
; ++cnt
)
343 for (inner
= charset
->width_rules
[cnt
].from
;
344 inner
<= charset
->width_rules
[cnt
].to
; ++inner
)
345 (void) find_idx (ctype
, NULL
, NULL
, NULL
, inner
);
351 ctype_output (struct localedef_t
*locale
, struct charset_t
*charset
,
352 const char *output_path
)
354 struct locale_ctype_t
*ctype
= locale
->categories
[LC_CTYPE
].ctype
;
355 const size_t nelems
= (_NL_ITEM_INDEX (_NL_NUM_LC_CTYPE
)
356 + 2 * (ctype
->map_collection_nr
- 2));
357 struct iovec iov
[2 + nelems
+ ctype
->nr_charclass
358 + ctype
->map_collection_nr
];
359 struct locale_file data
;
360 u_int32_t idx
[nelems
];
361 size_t elem
, cnt
, offset
, total
;
364 if ((locale
->binary
& (1 << LC_CTYPE
)) != 0)
366 iov
[0].iov_base
= ctype
;
367 iov
[0].iov_len
= locale
->len
[LC_CTYPE
];
369 write_locale_data (output_path
, "LC_CTYPE", 1, iov
);
375 /* Now prepare the output: Find the sizes of the table we can use. */
376 allocate_arrays (ctype
, charset
);
378 data
.magic
= LIMAGIC (LC_CTYPE
);
380 iov
[0].iov_base
= (void *) &data
;
381 iov
[0].iov_len
= sizeof (data
);
383 iov
[1].iov_base
= (void *) idx
;
384 iov
[1].iov_len
= sizeof (idx
);
386 idx
[0] = iov
[0].iov_len
+ iov
[1].iov_len
;
389 for (elem
= 0; elem
< nelems
; ++elem
)
391 if (elem
< _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE
))
394 #define CTYPE_DATA(name, base, len) \
395 case _NL_ITEM_INDEX (name): \
396 iov[2 + elem + offset].iov_base = (base); \
397 iov[2 + elem + offset].iov_len = (len); \
398 if (elem + 1 < nelems) \
399 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; \
402 CTYPE_DATA (_NL_CTYPE_CLASS
,
404 (256 + 128) * sizeof (char_class_t
));
406 CTYPE_DATA (_NL_CTYPE_TOUPPER_EB
,
408 (ctype
->plane_size
* ctype
->plane_cnt
+ 128)
409 * sizeof (u_int32_t
));
410 CTYPE_DATA (_NL_CTYPE_TOLOWER_EB
,
412 (ctype
->plane_size
* ctype
->plane_cnt
+ 128)
413 * sizeof (u_int32_t
));
415 CTYPE_DATA (_NL_CTYPE_TOUPPER_EL
,
417 (ctype
->plane_size
* ctype
->plane_cnt
+ 128)
418 * sizeof (u_int32_t
));
419 CTYPE_DATA (_NL_CTYPE_TOLOWER_EL
,
421 (ctype
->plane_size
* ctype
->plane_cnt
+ 128)
422 * sizeof (u_int32_t
));
424 CTYPE_DATA (_NL_CTYPE_CLASS32
,
426 (ctype
->plane_size
* ctype
->plane_cnt
427 * sizeof (char_class32_t
)));
429 CTYPE_DATA (_NL_CTYPE_NAMES_EB
,
430 ctype
->names_eb
, (ctype
->plane_size
* ctype
->plane_cnt
431 * sizeof (u_int32_t
)));
432 CTYPE_DATA (_NL_CTYPE_NAMES_EL
,
433 ctype
->names_el
, (ctype
->plane_size
* ctype
->plane_cnt
434 * sizeof (u_int32_t
)));
436 CTYPE_DATA (_NL_CTYPE_HASH_SIZE
,
437 &ctype
->plane_size
, sizeof (u_int32_t
));
438 CTYPE_DATA (_NL_CTYPE_HASH_LAYERS
,
439 &ctype
->plane_cnt
, sizeof (u_int32_t
));
441 case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES
):
442 /* The class name array. */
444 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
, ++offset
)
446 iov
[2 + elem
+ offset
].iov_base
447 = (void *) ctype
->classnames
[cnt
];
448 iov
[2 + elem
+ offset
].iov_len
449 = strlen (ctype
->classnames
[cnt
]) + 1;
450 total
+= iov
[2 + elem
+ offset
].iov_len
;
452 iov
[2 + elem
+ offset
].iov_base
= (void *) "\0\0\0";
453 iov
[2 + elem
+ offset
].iov_len
= 1 + (4 - ((total
+ 1) % 4));
454 total
+= 1 + (4 - ((total
+ 1) % 4));
456 if (elem
+ 1 < nelems
)
457 idx
[elem
+ 1] = idx
[elem
] + total
;
460 case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES
):
461 /* The class name array. */
463 for (cnt
= 0; cnt
< ctype
->map_collection_nr
; ++cnt
, ++offset
)
465 iov
[2 + elem
+ offset
].iov_base
466 = (void *) ctype
->mapnames
[cnt
];
467 iov
[2 + elem
+ offset
].iov_len
468 = strlen (ctype
->mapnames
[cnt
]) + 1;
469 total
+= iov
[2 + elem
+ offset
].iov_len
;
471 iov
[2 + elem
+ offset
].iov_base
= (void *) "\0\0\0";
472 iov
[2 + elem
+ offset
].iov_len
= 1 + (4 - ((total
+ 1) % 4));
473 total
+= 1 + (4 - ((total
+ 1) % 4));
475 if (elem
+ 1 < nelems
)
476 idx
[elem
+ 1] = idx
[elem
] + total
;
479 CTYPE_DATA (_NL_CTYPE_WIDTH
,
480 ctype
->width
, ctype
->plane_size
* ctype
->plane_cnt
);
482 CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX
,
483 &ctype
->mb_cur_max
, sizeof (u_int32_t
));
485 case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME
):
486 total
= strlen (ctype
->codeset_name
) + 1;
488 iov
[2 + elem
+ offset
].iov_base
= (char *) ctype
->codeset_name
;
491 iov
[2 + elem
+ offset
].iov_base
= alloca ((total
+ 3) & ~3);
492 memcpy (iov
[2 + elem
+ offset
].iov_base
, ctype
->codeset_name
,
494 total
= (total
+ 3) & ~3;
496 iov
[2 + elem
+ offset
].iov_len
= total
;
497 if (elem
+ 1 < nelems
)
498 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
502 assert (! "unknown CTYPE element");
506 /* Handle extra maps. */
507 size_t nr
= (elem
- _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE
)) >> 1;
509 if (((elem
- _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE
)) & 1) == 0)
510 iov
[2 + elem
+ offset
].iov_base
= ctype
->map_eb
[nr
];
512 iov
[2 + elem
+ offset
].iov_base
= ctype
->map_el
[nr
];
514 iov
[2 + elem
+ offset
].iov_len
= ((ctype
->plane_size
515 * ctype
->plane_cnt
+ 128)
516 * sizeof (u_int32_t
));
518 if (elem
+ 1 < nelems
)
519 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
523 assert (2 + elem
+ offset
== (nelems
+ ctype
->nr_charclass
524 + ctype
->map_collection_nr
+ 2));
526 write_locale_data (output_path
, "LC_CTYPE", 2 + elem
+ offset
, iov
);
530 /* Character class handling. */
532 ctype_class_new (struct linereader
*lr
, struct localedef_t
*locale
,
533 enum token_t tok
, struct token
*code
,
534 struct charset_t
*charset
)
536 ctype_class_newP (lr
, locale
->categories
[LC_CTYPE
].ctype
,
537 code
->val
.str
.start
);
542 ctype_is_charclass (struct linereader
*lr
, struct localedef_t
*locale
,
547 for (cnt
= 0; cnt
< locale
->categories
[LC_CTYPE
].ctype
->nr_charclass
; ++cnt
)
548 if (strcmp (name
, locale
->categories
[LC_CTYPE
].ctype
->classnames
[cnt
])
557 ctype_class_start (struct linereader
*lr
, struct localedef_t
*locale
,
558 enum token_t tok
, const char *str
,
559 struct charset_t
*charset
)
561 struct locale_ctype_t
*ctype
= locale
->categories
[LC_CTYPE
].ctype
;
605 assert (! "illegal token as class name: should not happen");
608 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
)
609 if (strcmp (str
, ctype
->classnames
[cnt
]) == 0)
612 if (cnt
>= ctype
->nr_charclass
)
613 assert (! "unknown class in class definition: should not happen");
615 ctype
->class_done
|= BIT (tok
);
617 ctype
->current_class_mask
= 1 << cnt
;
618 ctype
->last_class_char
= ILLEGAL_CHAR_VALUE
;
623 ctype_class_from (struct linereader
*lr
, struct localedef_t
*locale
,
624 struct token
*code
, struct charset_t
*charset
)
626 struct locale_ctype_t
*ctype
= locale
->categories
[LC_CTYPE
].ctype
;
629 value
= charset_find_value (charset
, code
->val
.str
.start
, code
->val
.str
.len
);
631 ctype
->last_class_char
= value
;
633 if ((wchar_t) value
== ILLEGAL_CHAR_VALUE
)
634 /* In the LC_CTYPE category it is no error when a character is
635 not found. This has to be ignored silently. */
638 *find_idx (ctype
, &ctype
->class_collection
, &ctype
->class_collection_max
,
639 &ctype
->class_collection_act
, value
)
640 |= ctype
->current_class_mask
;
645 ctype_class_to (struct linereader
*lr
, struct localedef_t
*locale
,
646 struct token
*code
, struct charset_t
*charset
)
648 struct locale_ctype_t
*ctype
= locale
->categories
[LC_CTYPE
].ctype
;
649 unsigned int value
, cnt
;
651 value
= charset_find_value (charset
, code
->val
.str
.start
, code
->val
.str
.len
);
653 assert (value
>= ctype
->last_class_char
);
655 for (cnt
= ctype
->last_class_char
+ 1; cnt
<= value
; ++cnt
)
656 *find_idx (ctype
, &ctype
->class_collection
, &ctype
->class_collection_max
,
657 &ctype
->class_collection_act
, cnt
)
658 |= ctype
->current_class_mask
;
660 ctype
->last_class_char
= ILLEGAL_CHAR_VALUE
;
665 ctype_class_end (struct linereader
*lr
, struct localedef_t
*locale
)
667 struct locale_ctype_t
*ctype
= locale
->categories
[LC_CTYPE
].ctype
;
669 /* We have no special actions to perform here. */
670 ctype
->current_class_mask
= 0;
671 ctype
->last_class_char
= ILLEGAL_CHAR_VALUE
;
675 /* Character map handling. */
677 ctype_map_new (struct linereader
*lr
, struct localedef_t
*locale
,
678 enum token_t tok
, struct token
*code
,
679 struct charset_t
*charset
)
681 ctype_map_newP (lr
, locale
->categories
[LC_CTYPE
].ctype
,
682 code
->val
.str
.start
, charset
);
687 ctype_is_charconv (struct linereader
*lr
, struct localedef_t
*locale
,
690 struct locale_ctype_t
*ctype
= locale
->categories
[LC_CTYPE
].ctype
;
693 for (cnt
= 0; cnt
< ctype
->map_collection_nr
; ++cnt
)
694 if (strcmp (name
, ctype
->mapnames
[cnt
]) == 0)
702 ctype_map_start (struct linereader
*lr
, struct localedef_t
*locale
,
703 enum token_t tok
, const char *name
, struct charset_t
*charset
)
705 struct locale_ctype_t
*ctype
= locale
->categories
[LC_CTYPE
].ctype
;
711 ctype
->toupper_done
= 1;
715 ctype
->tolower_done
= 1;
721 assert (! "unknown token in category `LC_CTYPE' should not happen");
724 for (cnt
= 0; cnt
< ctype
->map_collection_nr
; ++cnt
)
725 if (strcmp (name
, ctype
->mapnames
[cnt
]) == 0)
728 if (cnt
== ctype
->map_collection_nr
)
729 assert (! "unknown token in category `LC_CTYPE' should not happen");
731 ctype
->last_map_idx
= cnt
;
732 ctype
->from_map_char
= ILLEGAL_CHAR_VALUE
;
737 ctype_map_from (struct linereader
*lr
, struct localedef_t
*locale
,
738 struct token
*code
, struct charset_t
*charset
)
740 struct locale_ctype_t
*ctype
= locale
->categories
[LC_CTYPE
].ctype
;
743 value
= charset_find_value (charset
, code
->val
.str
.start
, code
->val
.str
.len
);
745 if ((wchar_t) value
== ILLEGAL_CHAR_VALUE
)
746 /* In the LC_CTYPE category it is no error when a character is
747 not found. This has to be ignored silently. */
750 assert (ctype
->last_map_idx
< ctype
->map_collection_nr
);
752 ctype
->from_map_char
= value
;
757 ctype_map_to (struct linereader
*lr
, struct localedef_t
*locale
,
758 struct token
*code
, struct charset_t
*charset
)
760 struct locale_ctype_t
*ctype
= locale
->categories
[LC_CTYPE
].ctype
;
763 value
= charset_find_value (charset
, code
->val
.str
.start
, code
->val
.str
.len
);
765 if ((wchar_t) ctype
->from_map_char
== ILLEGAL_CHAR_VALUE
766 || (wchar_t) value
== ILLEGAL_CHAR_VALUE
)
768 /* In the LC_CTYPE category it is no error when a character is
769 not found. This has to be ignored silently. */
770 ctype
->from_map_char
= ILLEGAL_CHAR_VALUE
;
774 *find_idx (ctype
, &ctype
->map_collection
[ctype
->last_map_idx
],
775 &ctype
->map_collection_max
[ctype
->last_map_idx
],
776 &ctype
->map_collection_act
[ctype
->last_map_idx
],
777 ctype
->from_map_char
) = value
;
779 ctype
->from_map_char
= ILLEGAL_CHAR_VALUE
;
784 ctype_map_end (struct linereader
*lr
, struct localedef_t
*locale
)
786 struct locale_ctype_t
*ctype
= locale
->categories
[LC_CTYPE
].ctype
;
788 ctype
->last_map_idx
= MAX_NR_CHARMAP
;
789 ctype
->from_map_char
= ILLEGAL_CHAR_VALUE
;
793 /* Local functions. */
795 ctype_class_newP (struct linereader
*lr
, struct locale_ctype_t
*ctype
,
800 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
)
801 if (strcmp (ctype
->classnames
[cnt
], name
) == 0)
804 if (cnt
< ctype
->nr_charclass
)
806 lr_error (lr
, _("character class `%s' already defined"), name
);
810 if (ctype
->nr_charclass
== MAX_NR_CHARCLASS
)
811 /* Exit code 2 is prescribed in P1003.2b. */
813 implementation limit: no more than %d character classes allowed"),
816 ctype
->classnames
[ctype
->nr_charclass
++] = name
;
821 ctype_map_newP (struct linereader
*lr
, struct locale_ctype_t
*ctype
,
822 const char *name
, struct charset_t
*charset
)
824 size_t max_chars
= 0;
827 for (cnt
= 0; cnt
< ctype
->map_collection_nr
; ++cnt
)
829 if (strcmp (ctype
->mapnames
[cnt
], name
) == 0)
832 if (max_chars
< ctype
->map_collection_max
[cnt
])
833 max_chars
= ctype
->map_collection_max
[cnt
];
836 if (cnt
< ctype
->map_collection_nr
)
838 lr_error (lr
, _("character map `%s' already defined"), name
);
842 if (ctype
->map_collection_nr
== MAX_NR_CHARMAP
)
843 /* Exit code 2 is prescribed in P1003.2b. */
845 implementation limit: no more than %d character maps allowed"),
848 ctype
->mapnames
[cnt
] = name
;
851 ctype
->map_collection_max
[cnt
] = charset
->mb_cur_max
== 1 ? 256 : 512;
853 ctype
->map_collection_max
[cnt
] = max_chars
;
855 ctype
->map_collection
[cnt
] = (u_int32_t
*)
856 xmalloc (sizeof (u_int32_t
) * ctype
->map_collection_max
[cnt
]);
857 memset (ctype
->map_collection
[cnt
], '\0',
858 sizeof (u_int32_t
) * ctype
->map_collection_max
[cnt
]);
859 ctype
->map_collection_act
[cnt
] = 256;
861 ++ctype
->map_collection_nr
;
865 /* We have to be prepared that TABLE, MAX, and ACT can be NULL. This
866 is possible if we only want ot extend the name array. */
868 find_idx (struct locale_ctype_t
*ctype
, u_int32_t
**table
, size_t *max
,
869 size_t *act
, unsigned int idx
)
874 return table
== NULL
? NULL
: &(*table
)[idx
];
876 for (cnt
= 256; cnt
< ctype
->charnames_act
; ++cnt
)
877 if (ctype
->charnames
[cnt
] == idx
)
880 /* We have to distinguish two cases: the names is found or not. */
881 if (cnt
== ctype
->charnames_act
)
883 /* Extend the name array. */
884 if (ctype
->charnames_act
== ctype
->charnames_max
)
886 ctype
->charnames_max
*= 2;
887 ctype
->charnames
= (unsigned int *)
888 xrealloc (ctype
->charnames
,
889 sizeof (unsigned int) * ctype
->charnames_max
);
891 ctype
->charnames
[ctype
->charnames_act
++] = idx
;
895 /* We have done everything we are asked to do. */
902 size_t old_max
= *max
;
908 (u_int32_t
*) xrealloc (*table
, *max
* sizeof (unsigned long int));
909 memset (&(*table
)[old_max
], '\0',
910 (*max
- old_max
) * sizeof (u_int32_t
));
917 return &(*table
)[cnt
];
922 set_class_defaults (struct locale_ctype_t
*ctype
, struct charset_t
*charset
)
924 /* These function defines the default values for the classes and conversions
925 according to POSIX.2 2.5.2.1.
926 It may seem that the order of these if-blocks is arbitrary but it is NOT.
927 Don't move them unless you know what you do! */
929 void set_default (int bit
, int from
, int to
)
936 for (ch
= from
; ch
<= to
; ++ch
)
941 value
= charset_find_value (charset
, tmp
, 1);
942 if ((wchar_t) value
== ILLEGAL_CHAR_VALUE
&& !be_quiet
)
945 character `%s' not defined while needed as default value"),
950 ELEM (ctype
, class_collection
, , value
) |= bit
;
954 /* Set default values if keyword was not present. */
955 if ((ctype
->class_done
& BIT (tok_upper
)) == 0)
956 /* "If this keyword [lower] is not specified, the lowercase letters
957 `A' through `Z', ..., shall automatically belong to this class,
958 with implementation defined character values." [P1003.2, 2.5.2.1] */
959 set_default (BIT (tok_upper
), 'A', 'Z');
961 if ((ctype
->class_done
& BIT (tok_lower
)) == 0)
962 /* "If this keyword [lower] is not specified, the lowercase letters
963 `a' through `z', ..., shall automatically belong to this class,
964 with implementation defined character values." [P1003.2, 2.5.2.1] */
965 set_default (BIT (tok_lower
), 'a', 'z');
967 if ((ctype
->class_done
& BIT (tok_alpha
)) == 0)
969 /* Table 2-6 in P1003.2 says that characters in class `upper' or
970 class `lower' *must* be in class `alpha'. */
971 unsigned long int mask
= BIT (tok_upper
) | BIT (tok_lower
);
974 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
975 if ((ctype
->class_collection
[cnt
] & mask
) != 0)
976 ctype
->class_collection
[cnt
] |= BIT (tok_alpha
);
979 if ((ctype
->class_done
& BIT (tok_digit
)) == 0)
980 /* "If this keyword [digit] is not specified, the digits `0' through
981 `9', ..., shall automatically belong to this class, with
982 implementation-defined character values." [P1003.2, 2.5.2.1] */
983 set_default (BIT (tok_digit
), '0', '9');
985 /* "Only characters specified for the `alpha' and `digit' keyword
986 shall be specified. Characters specified for the keyword `alpha'
987 and `digit' are automatically included in this class. */
989 unsigned long int mask
= BIT (tok_alpha
) | BIT (tok_digit
);
992 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
993 if ((ctype
->class_collection
[cnt
] & mask
) != 0)
994 ctype
->class_collection
[cnt
] |= BIT (tok_alnum
);
997 if ((ctype
->class_done
& BIT (tok_space
)) == 0)
998 /* "If this keyword [space] is not specified, the characters <space>,
999 <form-feed>, <newline>, <carriage-return>, <tab>, and
1000 <vertical-tab>, ..., shall automatically belong to this class,
1001 with implementation-defined character values." [P1003.2, 2.5.2.1] */
1005 value
= charset_find_value (charset
, "space", 5);
1006 if ((wchar_t) value
== ILLEGAL_CHAR_VALUE
&& !be_quiet
)
1008 character `%s' not defined while needed as default value"),
1011 ELEM (ctype
, class_collection
, , value
) |= BIT (tok_space
);
1013 value
= charset_find_value (charset
, "form-feed", 9);
1014 if ((wchar_t) value
== ILLEGAL_CHAR_VALUE
&& !be_quiet
)
1016 character `%s' not defined while needed as default value"),
1019 ELEM (ctype
, class_collection
, , value
) |= BIT (tok_space
);
1021 value
= charset_find_value (charset
, "newline", 7);
1022 if ((wchar_t) value
== ILLEGAL_CHAR_VALUE
&& !be_quiet
)
1024 character `%s' not defined while needed as default value"),
1027 ELEM (ctype
, class_collection
, , value
) |= BIT (tok_space
);
1029 value
= charset_find_value (charset
, "carriage-return", 15);
1030 if ((wchar_t) value
== ILLEGAL_CHAR_VALUE
&& !be_quiet
)
1032 character `%s' not defined while needed as default value"),
1033 "<carriage-return>");
1035 ELEM (ctype
, class_collection
, , value
) |= BIT (tok_space
);
1037 value
= charset_find_value (charset
, "tab", 3);
1038 if ((wchar_t) value
== ILLEGAL_CHAR_VALUE
&& !be_quiet
)
1040 character `%s' not defined while needed as default value"),
1043 ELEM (ctype
, class_collection
, , value
) |= BIT (tok_space
);
1045 value
= charset_find_value (charset
, "vertical-tab", 12);
1046 if ((wchar_t) value
== ILLEGAL_CHAR_VALUE
&& !be_quiet
)
1048 character `%s' not defined while needed as default value"),
1051 ELEM (ctype
, class_collection
, , value
) |= BIT (tok_space
);
1054 if ((ctype
->class_done
& BIT (tok_xdigit
)) == 0)
1055 /* "If this keyword is not specified, the digits `0' to `9', the
1056 uppercase letters `A' through `F', and the lowercase letters `a'
1057 through `f', ..., shell automatically belong to this class, with
1058 implementation defined character values." [P1003.2, 2.5.2.1] */
1060 set_default (BIT (tok_xdigit
), '0', '9');
1061 set_default (BIT (tok_xdigit
), 'A', 'F');
1062 set_default (BIT (tok_xdigit
), 'a', 'f');
1065 if ((ctype
->class_done
& BIT (tok_blank
)) == 0)
1066 /* "If this keyword [blank] is unspecified, the characters <space> and
1067 <tab> shall belong to this character class." [P1003.2, 2.5.2.1] */
1071 value
= charset_find_value (charset
, "space", 5);
1072 if ((wchar_t) value
== ILLEGAL_CHAR_VALUE
&& !be_quiet
)
1074 character `%s' not defined while needed as default value"),
1077 ELEM (ctype
, class_collection
, , value
) |= BIT (tok_blank
);
1079 value
= charset_find_value (charset
, "tab", 3);
1080 if ((wchar_t) value
== ILLEGAL_CHAR_VALUE
&& !be_quiet
)
1082 character `%s' not defined while needed as default value"),
1085 ELEM (ctype
, class_collection
, , value
) |= BIT (tok_blank
);
1088 if ((ctype
->class_done
& BIT (tok_graph
)) == 0)
1089 /* "If this keyword [graph] is not specified, characters specified for
1090 the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
1091 shall belong to this character class." [P1003.2, 2.5.2.1] */
1093 unsigned long int mask
= BIT (tok_upper
) | BIT (tok_lower
) |
1094 BIT (tok_alpha
) | BIT (tok_digit
) | BIT (tok_xdigit
) | BIT (tok_punct
);
1097 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
1098 if ((ctype
->class_collection
[cnt
] & mask
) != 0)
1099 ctype
->class_collection
[cnt
] |= BIT (tok_graph
);
1102 if ((ctype
->class_done
& BIT (tok_print
)) == 0)
1103 /* "If this keyword [print] is not provided, characters specified for
1104 the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
1105 and the <space> character shall belong to this character class."
1106 [P1003.2, 2.5.2.1] */
1108 unsigned long int mask
= BIT (tok_upper
) | BIT (tok_lower
) |
1109 BIT (tok_alpha
) | BIT (tok_digit
) | BIT (tok_xdigit
) | BIT (tok_punct
);
1113 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
1114 if ((ctype
->class_collection
[cnt
] & mask
) != 0)
1115 ctype
->class_collection
[cnt
] |= BIT (tok_print
);
1117 space
= charset_find_value (charset
, "space", 5);
1118 if (space
== ILLEGAL_CHAR_VALUE
&& !be_quiet
)
1120 character `%s' not defined while needed as default value"),
1123 ELEM (ctype
, class_collection
, , space
) |= BIT (tok_print
);
1126 if (ctype
->toupper_done
== 0)
1127 /* "If this keyword [toupper] is not specified, the lowercase letters
1128 `a' through `z', and their corresponding uppercase letters `A' to
1129 `Z', ..., shall automatically be included, with implementation-
1130 defined character values." [P1003.2, 2.5.2.1] */
1135 strcpy (tmp
, "<?>");
1137 for (ch
= 'a'; ch
<= 'z'; ++ch
)
1139 unsigned int value_from
, value_to
;
1143 value_from
= charset_find_value (charset
, &tmp
[1], 1);
1144 if ((wchar_t) value_from
== ILLEGAL_CHAR_VALUE
&& !be_quiet
)
1147 character `%s' not defined while needed as default value"),
1152 /* This conversion is implementation defined. */
1153 tmp
[1] = (char) (ch
+ ('A' - 'a'));
1154 value_to
= charset_find_value (charset
, &tmp
[1], 1);
1155 if ((wchar_t) value_to
== ILLEGAL_CHAR_VALUE
&& !be_quiet
)
1158 character `%s' not defined while needed as default value"),
1163 /* The index [0] is determined by the order of the
1164 `ctype_map_newP' calls in `ctype_startup'. */
1165 ELEM (ctype
, map_collection
, [0], value_from
) = value_to
;
1169 if (ctype
->tolower_done
== 0)
1170 /* "If this keyword [tolower] is not specified, the mapping shall be
1171 the reverse mapping of the one specified to `toupper'." [P1003.2] */
1175 for (cnt
= 0; cnt
< ctype
->map_collection_act
[0]; ++cnt
)
1176 if (ctype
->map_collection
[0][cnt
] != 0)
1177 ELEM (ctype
, map_collection
, [1],
1178 ctype
->map_collection
[0][cnt
])
1179 = ctype
->charnames
[cnt
];
1185 allocate_arrays (struct locale_ctype_t
*ctype
, struct charset_t
*charset
)
1189 /* First we have to decide how we organize the arrays. It is easy
1190 for a one-byte character set. But multi-byte character set
1191 cannot be stored flat because the chars might be sparsely used.
1192 So we determine an optimal hashing function for the used
1195 We use a very trivial hashing function to store the sparse
1196 table. CH % TABSIZE is used as an index. To solve multiple hits
1197 we have N planes. This guarantees a fixed search time for a
1198 character [N / 2]. In the following code we determine the minmum
1199 value for TABSIZE * N, where TABSIZE >= 256. */
1200 size_t min_total
= UINT_MAX
;
1201 size_t act_size
= 256;
1205 Computing table size for character classes might take a while..."),
1208 while (act_size
< min_total
)
1210 size_t cnt
[act_size
];
1211 size_t act_planes
= 1;
1213 memset (cnt
, '\0', sizeof cnt
);
1215 for (idx
= 0; idx
< 256; ++idx
)
1218 for (idx
= 0; idx
< ctype
->charnames_act
; ++idx
)
1219 if (ctype
->charnames
[idx
] >= 256)
1221 size_t nr
= ctype
->charnames
[idx
] % act_size
;
1223 if (++cnt
[nr
] > act_planes
)
1225 act_planes
= cnt
[nr
];
1226 if (act_size
* act_planes
>= min_total
)
1231 if (act_size
* act_planes
< min_total
)
1233 min_total
= act_size
* act_planes
;
1234 ctype
->plane_size
= act_size
;
1235 ctype
->plane_cnt
= act_planes
;
1242 fputs (_(" done\n"), stderr
);
1245 #if __BYTE_ORDER == __LITTLE_ENDIAN
1246 # define NAMES_B1 ctype->names_el
1247 # define NAMES_B2 ctype->names_eb
1249 # define NAMES_B1 ctype->names_eb
1250 # define NAMES_B2 ctype->names_el
1253 ctype
->names_eb
= (u_int32_t
*) xcalloc (ctype
->plane_size
1255 sizeof (u_int32_t
));
1256 ctype
->names_el
= (u_int32_t
*) xcalloc (ctype
->plane_size
1258 sizeof (u_int32_t
));
1260 for (idx
= 1; idx
< 256; ++idx
)
1261 NAMES_B1
[idx
] = idx
;
1263 /* Trick: change the 0th entry's name to 1 to mark the cell occupied. */
1266 for (idx
= 256; idx
< ctype
->charnames_act
; ++idx
)
1268 size_t nr
= (ctype
->charnames
[idx
] % ctype
->plane_size
);
1271 while (NAMES_B1
[nr
+ depth
* ctype
->plane_size
])
1273 assert (depth
< ctype
->plane_cnt
);
1275 NAMES_B1
[nr
+ depth
* ctype
->plane_size
] = ctype
->charnames
[idx
];
1277 /* Now for faster access remember the index in the NAMES_B array. */
1278 ctype
->charnames
[idx
] = nr
+ depth
* ctype
->plane_size
;
1282 for (idx
= 0; idx
< ctype
->plane_size
* ctype
->plane_cnt
; ++idx
)
1283 NAMES_B2
[idx
] = SWAPU32 (NAMES_B1
[idx
]);
1286 /* You wonder about this amount of memory? This is only because some
1287 users do not manage to address the array with unsigned values or
1288 data types with range >= 256. '\200' would result in the array
1289 index -128. To help these poor people we duplicate the entries for
1290 128 up to 255 below the entry for \0. */
1291 ctype
->ctype_b
= (char_class_t
*) xcalloc (256 + 128,
1292 sizeof (char_class_t
));
1293 ctype
->ctype32_b
= (char_class32_t
*) xcalloc (ctype
->plane_size
1295 sizeof (char_class32_t
));
1297 /* Fill in the character class information. */
1298 #if __BYTE_ORDER == __LITTLE_ENDIAN
1299 # define TRANS(w) CHAR_CLASS_TRANS (w)
1300 # define TRANS32(w) CHAR_CLASS32_TRANS (w)
1302 # define TRANS(w) (w)
1303 # define TRANS32(w) (w)
1306 for (idx
= 0; idx
< ctype
->class_collection_act
; ++idx
)
1307 if (ctype
->charnames
[idx
] < 256)
1308 ctype
->ctype_b
[128 + ctype
->charnames
[idx
]]
1309 = TRANS (ctype
->class_collection
[idx
]);
1311 /* Mirror first 127 entries. We must take care that entry -1 is not
1312 mirrored because EOF == -1. */
1313 for (idx
= 0; idx
< 127; ++idx
)
1314 ctype
->ctype_b
[idx
] = ctype
->ctype_b
[256 + idx
];
1316 /* The 32 bit array contains all characters. */
1317 for (idx
= 0; idx
< ctype
->class_collection_act
; ++idx
)
1318 ctype
->ctype32_b
[ctype
->charnames
[idx
]]
1319 = TRANS32 (ctype
->class_collection
[idx
]);
1321 /* Room for table of mappings. */
1322 ctype
->map_eb
= (u_int32_t
**) xmalloc (ctype
->map_collection_nr
1323 * sizeof (u_int32_t
*));
1324 ctype
->map_el
= (u_int32_t
**) xmalloc (ctype
->map_collection_nr
1325 * sizeof (u_int32_t
*));
1327 /* Fill in all mappings. */
1328 for (idx
= 0; idx
< ctype
->map_collection_nr
; ++idx
)
1332 /* Allocate table. */
1333 ctype
->map_eb
[idx
] = (u_int32_t
*) xmalloc ((ctype
->plane_size
1334 * ctype
->plane_cnt
+ 128)
1335 * sizeof (u_int32_t
));
1336 ctype
->map_el
[idx
] = (u_int32_t
*) xmalloc ((ctype
->plane_size
1337 * ctype
->plane_cnt
+ 128)
1338 * sizeof (u_int32_t
));
1340 #if __BYTE_ORDER == __LITTLE_ENDIAN
1341 # define MAP_B1 ctype->map_el
1342 # define MAP_B2 ctype->map_eb
1344 # define MAP_B1 ctype->map_eb
1345 # define MAP_B2 ctype->map_el
1348 /* Copy default value (identity mapping). */
1349 memcpy (&MAP_B1
[idx
][128], NAMES_B1
,
1350 ctype
->plane_size
* ctype
->plane_cnt
* sizeof (u_int32_t
));
1352 /* Copy values from collection. */
1353 for (idx2
= 0; idx2
< ctype
->map_collection_act
[idx
]; ++idx2
)
1354 if (ctype
->map_collection
[idx
][idx2
] != 0)
1355 MAP_B1
[idx
][128 + ctype
->charnames
[idx2
]] =
1356 ctype
->map_collection
[idx
][idx2
];
1358 /* Mirror first 127 entries. We must take care not to map entry
1359 -1 because EOF == -1. */
1360 for (idx2
= 0; idx2
< 127; ++idx2
)
1361 MAP_B1
[idx
][idx2
] = MAP_B1
[idx
][256 + idx2
];
1363 /* EOF must map to EOF. */
1364 MAP_B1
[idx
][127] = EOF
;
1366 /* And now the other byte order. */
1367 for (idx2
= 0; idx2
< ctype
->plane_size
* ctype
->plane_cnt
+ 128; ++idx2
)
1368 MAP_B2
[idx
][idx2
] = SWAPU32 (MAP_B1
[idx
][idx2
]);
1371 /* Extra array for class and map names. */
1372 ctype
->class_name_ptr
= (u_int32_t
*) xmalloc (ctype
->nr_charclass
1373 * sizeof (u_int32_t
));
1374 ctype
->map_name_ptr
= (u_int32_t
*) xmalloc (ctype
->map_collection_nr
1375 * sizeof (u_int32_t
));
1377 /* Array for width information. Because the expected width are very
1378 small we use only one single byte. This save space and we need
1379 not provide the information twice with both endianesses. */
1380 ctype
->width
= (unsigned char *) xmalloc (ctype
->plane_size
1381 * ctype
->plane_cnt
);
1382 /* Initialize with default width value. */
1383 memset (ctype
->width
, charset
->width_default
,
1384 ctype
->plane_size
* ctype
->plane_cnt
);
1385 if (charset
->width_rules
!= NULL
)
1389 for (cnt
= 0; cnt
< charset
->nwidth_rules
; ++cnt
)
1390 if (charset
->width_rules
[cnt
].width
!= charset
->width_default
)
1391 for (idx
= charset
->width_rules
[cnt
].from
;
1392 idx
<= charset
->width_rules
[cnt
].to
; ++idx
)
1394 size_t nr
= idx
% ctype
->plane_size
;
1397 while (NAMES_B1
[nr
+ depth
* ctype
->plane_size
] != nr
)
1399 assert (depth
< ctype
->plane_cnt
);
1401 ctype
->width
[nr
+ depth
* ctype
->plane_size
]
1402 = charset
->width_rules
[cnt
].width
;
1406 /* Compute MB_CUR_MAX. Please note the value mb_cur_max in the
1407 character set definition gives the number of bytes in the wide
1408 character representation. We compute the number of bytes used
1409 for the UTF-8 encoded form. */
1410 ctype
->mb_cur_max
= ((int []) { 2, 3, 5, 6 }) [charset
->mb_cur_max
- 1];
1412 /* We need the name of the currently used 8-bit character set to
1413 make correct conversion between this 8-bit representation and the
1414 ISO 10646 character set used internally for wide characters. */
1415 ctype
->codeset_name
= charset
->code_set_name
;