[BZ #3137]
[glibc.git] / locale / programs / ld-ctype.c
blobca1ec7995aed4ade770f96b5fa4921bfbcf721fa
1 /* Copyright (C) 1995-2005, 2006 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License version 2 as
7 published by the Free Software Foundation.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 #ifdef HAVE_CONFIG_H
19 # include <config.h>
20 #endif
22 #include <alloca.h>
23 #include <byteswap.h>
24 #include <endian.h>
25 #include <errno.h>
26 #include <limits.h>
27 #include <obstack.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <wchar.h>
31 #include <wctype.h>
32 #include <sys/uio.h>
34 #include "localedef.h"
35 #include "charmap.h"
36 #include "localeinfo.h"
37 #include "langinfo.h"
38 #include "linereader.h"
39 #include "locfile-token.h"
40 #include "locfile.h"
42 #include <assert.h>
45 #ifdef PREDEFINED_CLASSES
46 /* These are the extra bits not in wctype.h since these are not preallocated
47 classes. */
48 # define _ISwspecial1 (1 << 29)
49 # define _ISwspecial2 (1 << 30)
50 # define _ISwspecial3 (1 << 31)
51 #endif
54 /* The bit used for representing a special class. */
55 #define BITPOS(class) ((class) - tok_upper)
56 #define BIT(class) (_ISbit (BITPOS (class)))
57 #define BITw(class) (_ISwbit (BITPOS (class)))
59 #define ELEM(ctype, collection, idx, value) \
60 *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \
61 &ctype->collection##_act idx, value)
64 /* To be compatible with former implementations we for now restrict
65 the number of bits for character classes to 16. When compatibility
66 is not necessary anymore increase the number to 32. */
67 #define char_class_t uint16_t
68 #define char_class32_t uint32_t
71 /* Type to describe a transliteration action. We have a possibly
72 multiple character from-string and a set of multiple character
73 to-strings. All are 32bit values since this is what is used in
74 the gconv functions. */
75 struct translit_to_t
77 uint32_t *str;
79 struct translit_to_t *next;
82 struct translit_t
84 uint32_t *from;
86 const char *fname;
87 size_t lineno;
89 struct translit_to_t *to;
91 struct translit_t *next;
94 struct translit_ignore_t
96 uint32_t from;
97 uint32_t to;
98 uint32_t step;
100 const char *fname;
101 size_t lineno;
103 struct translit_ignore_t *next;
107 /* Type to describe a transliteration include statement. */
108 struct translit_include_t
110 const char *copy_locale;
111 const char *copy_repertoire;
113 struct translit_include_t *next;
117 /* Sparse table of uint32_t. */
118 #define TABLE idx_table
119 #define ELEMENT uint32_t
120 #define DEFAULT ((uint32_t) ~0)
121 #define NO_FINALIZE
122 #include "3level.h"
125 /* The real definition of the struct for the LC_CTYPE locale. */
126 struct locale_ctype_t
128 uint32_t *charnames;
129 size_t charnames_max;
130 size_t charnames_act;
131 /* An index lookup table, to speedup find_idx. */
132 struct idx_table charnames_idx;
134 struct repertoire_t *repertoire;
136 /* We will allow up to 8 * sizeof (uint32_t) character classes. */
137 #define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
138 size_t nr_charclass;
139 const char *classnames[MAX_NR_CHARCLASS];
140 uint32_t last_class_char;
141 uint32_t class256_collection[256];
142 uint32_t *class_collection;
143 size_t class_collection_max;
144 size_t class_collection_act;
145 uint32_t class_done;
146 uint32_t class_offset;
148 struct charseq **mbdigits;
149 size_t mbdigits_act;
150 size_t mbdigits_max;
151 uint32_t *wcdigits;
152 size_t wcdigits_act;
153 size_t wcdigits_max;
155 struct charseq *mboutdigits[10];
156 uint32_t wcoutdigits[10];
157 size_t outdigits_act;
159 /* If the following number ever turns out to be too small simply
160 increase it. But I doubt it will. --drepper@gnu */
161 #define MAX_NR_CHARMAP 16
162 const char *mapnames[MAX_NR_CHARMAP];
163 uint32_t *map_collection[MAX_NR_CHARMAP];
164 uint32_t map256_collection[2][256];
165 size_t map_collection_max[MAX_NR_CHARMAP];
166 size_t map_collection_act[MAX_NR_CHARMAP];
167 size_t map_collection_nr;
168 size_t last_map_idx;
169 int tomap_done[MAX_NR_CHARMAP];
170 uint32_t map_offset;
172 /* Transliteration information. */
173 struct translit_include_t *translit_include;
174 struct translit_t *translit;
175 struct translit_ignore_t *translit_ignore;
176 uint32_t ntranslit_ignore;
178 uint32_t *default_missing;
179 const char *default_missing_file;
180 size_t default_missing_lineno;
182 uint32_t to_nonascii;
184 /* The arrays for the binary representation. */
185 char_class_t *ctype_b;
186 char_class32_t *ctype32_b;
187 uint32_t **map_b;
188 uint32_t **map32_b;
189 uint32_t **class_b;
190 struct iovec *class_3level;
191 struct iovec *map_3level;
192 uint32_t *class_name_ptr;
193 uint32_t *map_name_ptr;
194 struct iovec width;
195 uint32_t mb_cur_max;
196 const char *codeset_name;
197 uint32_t *translit_from_idx;
198 uint32_t *translit_from_tbl;
199 uint32_t *translit_to_idx;
200 uint32_t *translit_to_tbl;
201 uint32_t translit_idx_size;
202 size_t translit_from_tbl_size;
203 size_t translit_to_tbl_size;
205 struct obstack mempool;
209 /* Marker for an empty slot. This has the value 0xFFFFFFFF, regardless
210 whether 'int' is 16 bit, 32 bit, or 64 bit. */
211 #define EMPTY ((uint32_t) ~0)
214 #define obstack_chunk_alloc xmalloc
215 #define obstack_chunk_free free
218 /* Prototypes for local functions. */
219 static void ctype_startup (struct linereader *lr, struct localedef_t *locale,
220 const struct charmap_t *charmap,
221 struct localedef_t *copy_locale,
222 int ignore_content);
223 static void ctype_class_new (struct linereader *lr,
224 struct locale_ctype_t *ctype, const char *name);
225 static void ctype_map_new (struct linereader *lr,
226 struct locale_ctype_t *ctype,
227 const char *name, const struct charmap_t *charmap);
228 static uint32_t *find_idx (struct locale_ctype_t *ctype, uint32_t **table,
229 size_t *max, size_t *act, unsigned int idx);
230 static void set_class_defaults (struct locale_ctype_t *ctype,
231 const struct charmap_t *charmap,
232 struct repertoire_t *repertoire);
233 static void allocate_arrays (struct locale_ctype_t *ctype,
234 const struct charmap_t *charmap,
235 struct repertoire_t *repertoire);
238 static const char *longnames[] =
240 "zero", "one", "two", "three", "four",
241 "five", "six", "seven", "eight", "nine"
243 static const char *uninames[] =
245 "U00000030", "U00000031", "U00000032", "U00000033", "U00000034",
246 "U00000035", "U00000036", "U00000037", "U00000038", "U00000039"
248 static const unsigned char digits[] = "0123456789";
251 static void
252 ctype_startup (struct linereader *lr, struct localedef_t *locale,
253 const struct charmap_t *charmap,
254 struct localedef_t *copy_locale, int ignore_content)
256 unsigned int cnt;
257 struct locale_ctype_t *ctype;
259 if (!ignore_content && locale->categories[LC_CTYPE].ctype == NULL)
261 if (copy_locale == NULL)
263 /* Allocate the needed room. */
264 locale->categories[LC_CTYPE].ctype = ctype =
265 (struct locale_ctype_t *) xcalloc (1,
266 sizeof (struct locale_ctype_t));
268 /* We have seen no names yet. */
269 ctype->charnames_max = charmap->mb_cur_max == 1 ? 256 : 512;
270 ctype->charnames =
271 (unsigned int *) xmalloc (ctype->charnames_max
272 * sizeof (unsigned int));
273 for (cnt = 0; cnt < 256; ++cnt)
274 ctype->charnames[cnt] = cnt;
275 ctype->charnames_act = 256;
276 idx_table_init (&ctype->charnames_idx);
278 /* Fill character class information. */
279 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
280 /* The order of the following instructions determines the bit
281 positions! */
282 ctype_class_new (lr, ctype, "upper");
283 ctype_class_new (lr, ctype, "lower");
284 ctype_class_new (lr, ctype, "alpha");
285 ctype_class_new (lr, ctype, "digit");
286 ctype_class_new (lr, ctype, "xdigit");
287 ctype_class_new (lr, ctype, "space");
288 ctype_class_new (lr, ctype, "print");
289 ctype_class_new (lr, ctype, "graph");
290 ctype_class_new (lr, ctype, "blank");
291 ctype_class_new (lr, ctype, "cntrl");
292 ctype_class_new (lr, ctype, "punct");
293 ctype_class_new (lr, ctype, "alnum");
294 #ifdef PREDEFINED_CLASSES
295 /* The following are extensions from ISO 14652. */
296 ctype_class_new (lr, ctype, "left_to_right");
297 ctype_class_new (lr, ctype, "right_to_left");
298 ctype_class_new (lr, ctype, "num_terminator");
299 ctype_class_new (lr, ctype, "num_separator");
300 ctype_class_new (lr, ctype, "segment_separator");
301 ctype_class_new (lr, ctype, "block_separator");
302 ctype_class_new (lr, ctype, "direction_control");
303 ctype_class_new (lr, ctype, "sym_swap_layout");
304 ctype_class_new (lr, ctype, "char_shape_selector");
305 ctype_class_new (lr, ctype, "num_shape_selector");
306 ctype_class_new (lr, ctype, "non_spacing");
307 ctype_class_new (lr, ctype, "non_spacing_level3");
308 ctype_class_new (lr, ctype, "normal_connect");
309 ctype_class_new (lr, ctype, "r_connect");
310 ctype_class_new (lr, ctype, "no_connect");
311 ctype_class_new (lr, ctype, "no_connect-space");
312 ctype_class_new (lr, ctype, "vowel_connect");
313 #endif
315 ctype->class_collection_max = charmap->mb_cur_max == 1 ? 256 : 512;
316 ctype->class_collection
317 = (uint32_t *) xcalloc (sizeof (unsigned long int),
318 ctype->class_collection_max);
319 ctype->class_collection_act = 256;
321 /* Fill character map information. */
322 ctype->last_map_idx = MAX_NR_CHARMAP;
323 ctype_map_new (lr, ctype, "toupper", charmap);
324 ctype_map_new (lr, ctype, "tolower", charmap);
325 #ifdef PREDEFINED_CLASSES
326 ctype_map_new (lr, ctype, "tosymmetric", charmap);
327 #endif
329 /* Fill first 256 entries in `toXXX' arrays. */
330 for (cnt = 0; cnt < 256; ++cnt)
332 ctype->map_collection[0][cnt] = cnt;
333 ctype->map_collection[1][cnt] = cnt;
334 #ifdef PREDEFINED_CLASSES
335 ctype->map_collection[2][cnt] = cnt;
336 #endif
337 ctype->map256_collection[0][cnt] = cnt;
338 ctype->map256_collection[1][cnt] = cnt;
341 if (enc_not_ascii_compatible)
342 ctype->to_nonascii = 1;
344 obstack_init (&ctype->mempool);
346 else
347 ctype = locale->categories[LC_CTYPE].ctype =
348 copy_locale->categories[LC_CTYPE].ctype;
353 void
354 ctype_finish (struct localedef_t *locale, const struct charmap_t *charmap)
356 /* See POSIX.2, table 2-6 for the meaning of the following table. */
357 #define NCLASS 12
358 static const struct
360 const char *name;
361 const char allow[NCLASS];
363 valid_table[NCLASS] =
365 /* The order is important. See token.h for more information.
366 M = Always, D = Default, - = Permitted, X = Mutually exclusive */
367 { "upper", "--MX-XDDXXX-" },
368 { "lower", "--MX-XDDXXX-" },
369 { "alpha", "---X-XDDXXX-" },
370 { "digit", "XXX--XDDXXX-" },
371 { "xdigit", "-----XDDXXX-" },
372 { "space", "XXXXX------X" },
373 { "print", "---------X--" },
374 { "graph", "---------X--" },
375 { "blank", "XXXXXM-----X" },
376 { "cntrl", "XXXXX-XX--XX" },
377 { "punct", "XXXXX-DD-X-X" },
378 { "alnum", "-----XDDXXX-" }
380 size_t cnt;
381 int cls1, cls2;
382 uint32_t space_value;
383 struct charseq *space_seq;
384 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
385 int warned;
386 const void *key;
387 size_t len;
388 void *vdata;
389 void *curs;
391 /* Now resolve copying and also handle completely missing definitions. */
392 if (ctype == NULL)
394 const char *repertoire_name;
396 /* First see whether we were supposed to copy. If yes, find the
397 actual definition. */
398 if (locale->copy_name[LC_CTYPE] != NULL)
400 /* Find the copying locale. This has to happen transitively since
401 the locale we are copying from might also copying another one. */
402 struct localedef_t *from = locale;
405 from = find_locale (LC_CTYPE, from->copy_name[LC_CTYPE],
406 from->repertoire_name, charmap);
407 while (from->categories[LC_CTYPE].ctype == NULL
408 && from->copy_name[LC_CTYPE] != NULL);
410 ctype = locale->categories[LC_CTYPE].ctype
411 = from->categories[LC_CTYPE].ctype;
414 /* If there is still no definition issue an warning and create an
415 empty one. */
416 if (ctype == NULL)
418 if (! be_quiet)
419 WITH_CUR_LOCALE (error (0, 0, _("\
420 No definition for %s category found"), "LC_CTYPE"));
421 ctype_startup (NULL, locale, charmap, NULL, 0);
422 ctype = locale->categories[LC_CTYPE].ctype;
425 /* Get the repertoire we have to use. */
426 repertoire_name = locale->repertoire_name ?: repertoire_global;
427 if (repertoire_name != NULL)
428 ctype->repertoire = repertoire_read (repertoire_name);
431 /* We need the name of the currently used 8-bit character set to
432 make correct conversion between this 8-bit representation and the
433 ISO 10646 character set used internally for wide characters. */
434 ctype->codeset_name = charmap->code_set_name;
435 if (ctype->codeset_name == NULL)
437 if (! be_quiet)
438 WITH_CUR_LOCALE (error (0, 0, _("\
439 No character set name specified in charmap")));
440 ctype->codeset_name = "//UNKNOWN//";
443 /* Set default value for classes not specified. */
444 set_class_defaults (ctype, charmap, ctype->repertoire);
446 /* Check according to table. */
447 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
449 uint32_t tmp = ctype->class_collection[cnt];
451 if (tmp != 0)
453 for (cls1 = 0; cls1 < NCLASS; ++cls1)
454 if ((tmp & _ISwbit (cls1)) != 0)
455 for (cls2 = 0; cls2 < NCLASS; ++cls2)
456 if (valid_table[cls1].allow[cls2] != '-')
458 int eq = (tmp & _ISwbit (cls2)) != 0;
459 switch (valid_table[cls1].allow[cls2])
461 case 'M':
462 if (!eq)
464 uint32_t value = ctype->charnames[cnt];
466 if (!be_quiet)
467 WITH_CUR_LOCALE (error (0, 0, _("\
468 character L'\\u%0*x' in class `%s' must be in class `%s'"),
469 value > 0xffff ? 8 : 4,
470 value,
471 valid_table[cls1].name,
472 valid_table[cls2].name));
474 break;
476 case 'X':
477 if (eq)
479 uint32_t value = ctype->charnames[cnt];
481 if (!be_quiet)
482 WITH_CUR_LOCALE (error (0, 0, _("\
483 character L'\\u%0*x' in class `%s' must not be in class `%s'"),
484 value > 0xffff ? 8 : 4,
485 value,
486 valid_table[cls1].name,
487 valid_table[cls2].name));
489 break;
491 case 'D':
492 ctype->class_collection[cnt] |= _ISwbit (cls2);
493 break;
495 default:
496 WITH_CUR_LOCALE (error (5, 0, _("\
497 internal error in %s, line %u"), __FUNCTION__, __LINE__));
503 for (cnt = 0; cnt < 256; ++cnt)
505 uint32_t tmp = ctype->class256_collection[cnt];
507 if (tmp != 0)
509 for (cls1 = 0; cls1 < NCLASS; ++cls1)
510 if ((tmp & _ISbit (cls1)) != 0)
511 for (cls2 = 0; cls2 < NCLASS; ++cls2)
512 if (valid_table[cls1].allow[cls2] != '-')
514 int eq = (tmp & _ISbit (cls2)) != 0;
515 switch (valid_table[cls1].allow[cls2])
517 case 'M':
518 if (!eq)
520 char buf[17];
522 snprintf (buf, sizeof buf, "\\%Zo", cnt);
524 if (!be_quiet)
525 WITH_CUR_LOCALE (error (0, 0, _("\
526 character '%s' in class `%s' must be in class `%s'"),
527 buf,
528 valid_table[cls1].name,
529 valid_table[cls2].name));
531 break;
533 case 'X':
534 if (eq)
536 char buf[17];
538 snprintf (buf, sizeof buf, "\\%Zo", cnt);
540 if (!be_quiet)
541 WITH_CUR_LOCALE (error (0, 0, _("\
542 character '%s' in class `%s' must not be in class `%s'"),
543 buf,
544 valid_table[cls1].name,
545 valid_table[cls2].name));
547 break;
549 case 'D':
550 ctype->class256_collection[cnt] |= _ISbit (cls2);
551 break;
553 default:
554 WITH_CUR_LOCALE (error (5, 0, _("\
555 internal error in %s, line %u"), __FUNCTION__, __LINE__));
561 /* ... and now test <SP> as a special case. */
562 space_value = 32;
563 if (((cnt = BITPOS (tok_space),
564 (ELEM (ctype, class_collection, , space_value)
565 & BITw (tok_space)) == 0)
566 || (cnt = BITPOS (tok_blank),
567 (ELEM (ctype, class_collection, , space_value)
568 & BITw (tok_blank)) == 0)))
570 if (!be_quiet)
571 WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"),
572 valid_table[cnt].name));
574 else if (((cnt = BITPOS (tok_punct),
575 (ELEM (ctype, class_collection, , space_value)
576 & BITw (tok_punct)) != 0)
577 || (cnt = BITPOS (tok_graph),
578 (ELEM (ctype, class_collection, , space_value)
579 & BITw (tok_graph))
580 != 0)))
582 if (!be_quiet)
583 WITH_CUR_LOCALE (error (0, 0, _("\
584 <SP> character must not be in class `%s'"),
585 valid_table[cnt].name));
587 else
588 ELEM (ctype, class_collection, , space_value) |= BITw (tok_print);
590 space_seq = charmap_find_value (charmap, "SP", 2);
591 if (space_seq == NULL)
592 space_seq = charmap_find_value (charmap, "space", 5);
593 if (space_seq == NULL)
594 space_seq = charmap_find_value (charmap, "U00000020", 9);
595 if (space_seq == NULL || space_seq->nbytes != 1)
597 if (!be_quiet)
598 WITH_CUR_LOCALE (error (0, 0, _("\
599 character <SP> not defined in character map")));
601 else if (((cnt = BITPOS (tok_space),
602 (ctype->class256_collection[space_seq->bytes[0]]
603 & BIT (tok_space)) == 0)
604 || (cnt = BITPOS (tok_blank),
605 (ctype->class256_collection[space_seq->bytes[0]]
606 & BIT (tok_blank)) == 0)))
608 if (!be_quiet)
609 WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"),
610 valid_table[cnt].name));
612 else if (((cnt = BITPOS (tok_punct),
613 (ctype->class256_collection[space_seq->bytes[0]]
614 & BIT (tok_punct)) != 0)
615 || (cnt = BITPOS (tok_graph),
616 (ctype->class256_collection[space_seq->bytes[0]]
617 & BIT (tok_graph)) != 0)))
619 if (!be_quiet)
620 WITH_CUR_LOCALE (error (0, 0, _("\
621 <SP> character must not be in class `%s'"),
622 valid_table[cnt].name));
624 else
625 ctype->class256_collection[space_seq->bytes[0]] |= BIT (tok_print);
627 /* Now that the tests are done make sure the name array contains all
628 characters which are handled in the WIDTH section of the
629 character set definition file. */
630 if (charmap->width_rules != NULL)
631 for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
633 unsigned char bytes[charmap->mb_cur_max];
634 int nbytes = charmap->width_rules[cnt].from->nbytes;
636 /* We have the range of character for which the width is
637 specified described using byte sequences of the multibyte
638 charset. We have to convert this to UCS4 now. And we
639 cannot simply convert the beginning and the end of the
640 sequence, we have to iterate over the byte sequence and
641 convert it for every single character. */
642 memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
644 while (nbytes < charmap->width_rules[cnt].to->nbytes
645 || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
646 nbytes) <= 0)
648 /* Find the UCS value for `bytes'. */
649 int inner;
650 uint32_t wch;
651 struct charseq *seq = charmap_find_symbol (charmap, bytes, nbytes);
653 if (seq == NULL)
654 wch = ILLEGAL_CHAR_VALUE;
655 else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
656 wch = seq->ucs4;
657 else
658 wch = repertoire_find_value (ctype->repertoire, seq->name,
659 strlen (seq->name));
661 if (wch != ILLEGAL_CHAR_VALUE)
662 /* We are only interested in the side-effects of the
663 `find_idx' call. It will add appropriate entries in
664 the name array if this is necessary. */
665 (void) find_idx (ctype, NULL, NULL, NULL, wch);
667 /* "Increment" the bytes sequence. */
668 inner = nbytes - 1;
669 while (inner >= 0 && bytes[inner] == 0xff)
670 --inner;
672 if (inner < 0)
674 /* We have to extend the byte sequence. */
675 if (nbytes >= charmap->width_rules[cnt].to->nbytes)
676 break;
678 bytes[0] = 1;
679 memset (&bytes[1], 0, nbytes);
680 ++nbytes;
682 else
684 ++bytes[inner];
685 while (++inner < nbytes)
686 bytes[inner] = 0;
691 /* Now set all the other characters of the character set to the
692 default width. */
693 curs = NULL;
694 while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
696 struct charseq *data = (struct charseq *) vdata;
698 if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
699 data->ucs4 = repertoire_find_value (ctype->repertoire,
700 data->name, len);
702 if (data->ucs4 != ILLEGAL_CHAR_VALUE)
703 (void) find_idx (ctype, NULL, NULL, NULL, data->ucs4);
706 /* There must be a multiple of 10 digits. */
707 if (ctype->mbdigits_act % 10 != 0)
709 assert (ctype->mbdigits_act == ctype->wcdigits_act);
710 ctype->wcdigits_act -= ctype->mbdigits_act % 10;
711 ctype->mbdigits_act -= ctype->mbdigits_act % 10;
712 WITH_CUR_LOCALE (error (0, 0, _("\
713 `digit' category has not entries in groups of ten")));
716 /* Check the input digits. There must be a multiple of ten available.
717 In each group it could be that one or the other character is missing.
718 In this case the whole group must be removed. */
719 cnt = 0;
720 while (cnt < ctype->mbdigits_act)
722 size_t inner;
723 for (inner = 0; inner < 10; ++inner)
724 if (ctype->mbdigits[cnt + inner] == NULL)
725 break;
727 if (inner == 10)
728 cnt += 10;
729 else
731 /* Remove the group. */
732 memmove (&ctype->mbdigits[cnt], &ctype->mbdigits[cnt + 10],
733 ((ctype->wcdigits_act - cnt - 10)
734 * sizeof (ctype->mbdigits[0])));
735 ctype->mbdigits_act -= 10;
739 /* If no input digits are given use the default. */
740 if (ctype->mbdigits_act == 0)
742 if (ctype->mbdigits_max == 0)
744 ctype->mbdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
745 10 * sizeof (struct charseq *));
746 ctype->mbdigits_max = 10;
749 for (cnt = 0; cnt < 10; ++cnt)
751 ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
752 digits + cnt, 1);
753 if (ctype->mbdigits[cnt] == NULL)
755 ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
756 longnames[cnt],
757 strlen (longnames[cnt]));
758 if (ctype->mbdigits[cnt] == NULL)
760 /* Hum, this ain't good. */
761 WITH_CUR_LOCALE (error (0, 0, _("\
762 no input digits defined and none of the standard names in the charmap")));
764 ctype->mbdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
765 sizeof (struct charseq) + 1);
767 /* This is better than nothing. */
768 ctype->mbdigits[cnt]->bytes[0] = digits[cnt];
769 ctype->mbdigits[cnt]->nbytes = 1;
774 ctype->mbdigits_act = 10;
777 /* Check the wide character input digits. There must be a multiple
778 of ten available. In each group it could be that one or the other
779 character is missing. In this case the whole group must be
780 removed. */
781 cnt = 0;
782 while (cnt < ctype->wcdigits_act)
784 size_t inner;
785 for (inner = 0; inner < 10; ++inner)
786 if (ctype->wcdigits[cnt + inner] == ILLEGAL_CHAR_VALUE)
787 break;
789 if (inner == 10)
790 cnt += 10;
791 else
793 /* Remove the group. */
794 memmove (&ctype->wcdigits[cnt], &ctype->wcdigits[cnt + 10],
795 ((ctype->wcdigits_act - cnt - 10)
796 * sizeof (ctype->wcdigits[0])));
797 ctype->wcdigits_act -= 10;
801 /* If no input digits are given use the default. */
802 if (ctype->wcdigits_act == 0)
804 if (ctype->wcdigits_max == 0)
806 ctype->wcdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
807 10 * sizeof (uint32_t));
808 ctype->wcdigits_max = 10;
811 for (cnt = 0; cnt < 10; ++cnt)
812 ctype->wcdigits[cnt] = L'0' + cnt;
814 ctype->mbdigits_act = 10;
817 /* Check the outdigits. */
818 warned = 0;
819 for (cnt = 0; cnt < 10; ++cnt)
820 if (ctype->mboutdigits[cnt] == NULL)
822 static struct charseq replace[2];
824 if (!warned)
826 WITH_CUR_LOCALE (error (0, 0, _("\
827 not all characters used in `outdigit' are available in the charmap")));
828 warned = 1;
831 replace[0].nbytes = 1;
832 replace[0].bytes[0] = '?';
833 replace[0].bytes[1] = '\0';
834 ctype->mboutdigits[cnt] = &replace[0];
837 warned = 0;
838 for (cnt = 0; cnt < 10; ++cnt)
839 if (ctype->wcoutdigits[cnt] == 0)
841 if (!warned)
843 WITH_CUR_LOCALE (error (0, 0, _("\
844 not all characters used in `outdigit' are available in the repertoire")));
845 warned = 1;
848 ctype->wcoutdigits[cnt] = L'?';
851 /* Sort the entries in the translit_ignore list. */
852 if (ctype->translit_ignore != NULL)
854 struct translit_ignore_t *firstp = ctype->translit_ignore;
855 struct translit_ignore_t *runp;
857 ctype->ntranslit_ignore = 1;
859 for (runp = firstp->next; runp != NULL; runp = runp->next)
861 struct translit_ignore_t *lastp = NULL;
862 struct translit_ignore_t *cmpp;
864 ++ctype->ntranslit_ignore;
866 for (cmpp = firstp; cmpp != NULL; lastp = cmpp, cmpp = cmpp->next)
867 if (runp->from < cmpp->from)
868 break;
870 runp->next = lastp;
871 if (lastp == NULL)
872 firstp = runp;
875 ctype->translit_ignore = firstp;
880 void
881 ctype_output (struct localedef_t *locale, const struct charmap_t *charmap,
882 const char *output_path)
884 static const char nulbytes[4] = { 0, 0, 0, 0 };
885 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
886 const size_t nelems = (_NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1)
887 + ctype->nr_charclass + ctype->map_collection_nr);
888 struct iovec *iov = alloca (sizeof *iov
889 * (2 + nelems + 2 * ctype->nr_charclass
890 + ctype->map_collection_nr + 4));
891 struct locale_file data;
892 uint32_t *idx = alloca (sizeof *idx * (nelems + 1));
893 uint32_t default_missing_len;
894 size_t elem, cnt, offset, total;
895 char *cp;
897 /* Now prepare the output: Find the sizes of the table we can use. */
898 allocate_arrays (ctype, charmap, ctype->repertoire);
900 data.magic = LIMAGIC (LC_CTYPE);
901 data.n = nelems;
902 iov[0].iov_base = (void *) &data;
903 iov[0].iov_len = sizeof (data);
905 iov[1].iov_base = (void *) idx;
906 iov[1].iov_len = nelems * sizeof (uint32_t);
908 idx[0] = iov[0].iov_len + iov[1].iov_len;
909 offset = 0;
911 for (elem = 0; elem < nelems; ++elem)
913 if (elem < _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1))
914 switch (elem)
916 #define CTYPE_EMPTY(name) \
917 case name: \
918 iov[2 + elem + offset].iov_base = NULL; \
919 iov[2 + elem + offset].iov_len = 0; \
920 idx[elem + 1] = idx[elem]; \
921 break
923 CTYPE_EMPTY(_NL_CTYPE_GAP1);
924 CTYPE_EMPTY(_NL_CTYPE_GAP2);
925 CTYPE_EMPTY(_NL_CTYPE_GAP3);
926 CTYPE_EMPTY(_NL_CTYPE_GAP4);
927 CTYPE_EMPTY(_NL_CTYPE_GAP5);
928 CTYPE_EMPTY(_NL_CTYPE_GAP6);
930 #define CTYPE_DATA(name, base, len) \
931 case _NL_ITEM_INDEX (name): \
932 iov[2 + elem + offset].iov_base = (base); \
933 iov[2 + elem + offset].iov_len = (len); \
934 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; \
935 break
937 CTYPE_DATA (_NL_CTYPE_CLASS,
938 ctype->ctype_b,
939 (256 + 128) * sizeof (char_class_t));
941 CTYPE_DATA (_NL_CTYPE_TOUPPER,
942 ctype->map_b[0],
943 (256 + 128) * sizeof (uint32_t));
944 CTYPE_DATA (_NL_CTYPE_TOLOWER,
945 ctype->map_b[1],
946 (256 + 128) * sizeof (uint32_t));
948 CTYPE_DATA (_NL_CTYPE_TOUPPER32,
949 ctype->map32_b[0],
950 256 * sizeof (uint32_t));
951 CTYPE_DATA (_NL_CTYPE_TOLOWER32,
952 ctype->map32_b[1],
953 256 * sizeof (uint32_t));
955 CTYPE_DATA (_NL_CTYPE_CLASS32,
956 ctype->ctype32_b,
957 256 * sizeof (char_class32_t));
959 CTYPE_DATA (_NL_CTYPE_CLASS_OFFSET,
960 &ctype->class_offset, sizeof (uint32_t));
962 CTYPE_DATA (_NL_CTYPE_MAP_OFFSET,
963 &ctype->map_offset, sizeof (uint32_t));
965 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TAB_SIZE,
966 &ctype->translit_idx_size, sizeof (uint32_t));
968 CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_IDX,
969 ctype->translit_from_idx,
970 ctype->translit_idx_size * sizeof (uint32_t));
972 CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_TBL,
973 ctype->translit_from_tbl,
974 ctype->translit_from_tbl_size);
976 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_IDX,
977 ctype->translit_to_idx,
978 ctype->translit_idx_size * sizeof (uint32_t));
980 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_TBL,
981 ctype->translit_to_tbl, ctype->translit_to_tbl_size);
983 case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
984 /* The class name array. */
985 total = 0;
986 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt, ++offset)
988 iov[2 + elem + offset].iov_base
989 = (void *) ctype->classnames[cnt];
990 iov[2 + elem + offset].iov_len
991 = strlen (ctype->classnames[cnt]) + 1;
992 total += iov[2 + elem + offset].iov_len;
994 iov[2 + elem + offset].iov_base = (void *) nulbytes;
995 iov[2 + elem + offset].iov_len = 4 - (total % 4);
996 total += 4 - (total % 4);
998 idx[elem + 1] = idx[elem] + total;
999 break;
1001 case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
1002 /* The class name array. */
1003 total = 0;
1004 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt, ++offset)
1006 iov[2 + elem + offset].iov_base
1007 = (void *) ctype->mapnames[cnt];
1008 iov[2 + elem + offset].iov_len
1009 = strlen (ctype->mapnames[cnt]) + 1;
1010 total += iov[2 + elem + offset].iov_len;
1012 iov[2 + elem + offset].iov_base = (void *) nulbytes;
1013 iov[2 + elem + offset].iov_len = 4 - (total % 4);
1014 total += 4 - (total % 4);
1016 idx[elem + 1] = idx[elem] + total;
1017 break;
1019 CTYPE_DATA (_NL_CTYPE_WIDTH,
1020 ctype->width.iov_base,
1021 ctype->width.iov_len);
1023 CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX,
1024 &ctype->mb_cur_max, sizeof (uint32_t));
1026 case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
1027 total = strlen (ctype->codeset_name) + 1;
1028 if (total % 4 == 0)
1029 iov[2 + elem + offset].iov_base = (char *) ctype->codeset_name;
1030 else
1032 iov[2 + elem + offset].iov_base = alloca ((total + 3) & ~3);
1033 memset (mempcpy (iov[2 + elem + offset].iov_base,
1034 ctype->codeset_name, total),
1035 '\0', 4 - (total & 3));
1036 total = (total + 3) & ~3;
1038 iov[2 + elem + offset].iov_len = total;
1039 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1040 break;
1043 CTYPE_DATA (_NL_CTYPE_MAP_TO_NONASCII,
1044 &ctype->to_nonascii, sizeof (uint32_t));
1046 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN):
1047 iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
1048 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1049 *(uint32_t *) iov[2 + elem + offset].iov_base =
1050 ctype->mbdigits_act / 10;
1051 idx[elem + 1] = idx[elem] + sizeof (uint32_t);
1052 break;
1054 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN):
1055 /* Align entries. */
1056 iov[2 + elem + offset].iov_base = (void *) nulbytes;
1057 iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1058 idx[elem] += iov[2 + elem + offset].iov_len;
1059 ++offset;
1061 iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
1062 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1063 *(uint32_t *) iov[2 + elem + offset].iov_base =
1064 ctype->wcdigits_act / 10;
1065 idx[elem + 1] = idx[elem] + sizeof (uint32_t);
1066 break;
1068 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB):
1069 /* Compute the length of all possible characters. For INDIGITS
1070 there might be more than one. We simply concatenate all of
1071 them with a NUL byte following. The NUL byte wouldn't be
1072 necessary but it makes it easier for the user. */
1073 total = 0;
1075 for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
1076 cnt < ctype->mbdigits_act; cnt += 10)
1077 total += ctype->mbdigits[cnt]->nbytes + 1;
1078 iov[2 + elem + offset].iov_base = (char *) alloca (total);
1079 iov[2 + elem + offset].iov_len = total;
1081 cp = iov[2 + elem + offset].iov_base;
1082 for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
1083 cnt < ctype->mbdigits_act; cnt += 10)
1085 cp = mempcpy (cp, ctype->mbdigits[cnt]->bytes,
1086 ctype->mbdigits[cnt]->nbytes);
1087 *cp++ = '\0';
1089 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1090 break;
1092 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB):
1093 /* Compute the length of all possible characters. For INDIGITS
1094 there might be more than one. We simply concatenate all of
1095 them with a NUL byte following. The NUL byte wouldn't be
1096 necessary but it makes it easier for the user. */
1097 cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB);
1098 total = ctype->mboutdigits[cnt]->nbytes + 1;
1099 iov[2 + elem + offset].iov_base = (char *) alloca (total);
1100 iov[2 + elem + offset].iov_len = total;
1102 *(char *) mempcpy (iov[2 + elem + offset].iov_base,
1103 ctype->mboutdigits[cnt]->bytes,
1104 ctype->mboutdigits[cnt]->nbytes) = '\0';
1105 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1106 break;
1108 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC):
1109 total = ctype->wcdigits_act / 10;
1111 iov[2 + elem + offset].iov_base =
1112 (uint32_t *) alloca (total * sizeof (uint32_t));
1113 iov[2 + elem + offset].iov_len = total * sizeof (uint32_t);
1115 for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC);
1116 cnt < ctype->wcdigits_act; cnt += 10)
1117 ((uint32_t *) iov[2 + elem + offset].iov_base)[cnt / 10]
1118 = ctype->wcdigits[cnt];
1119 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1120 break;
1122 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC):
1123 /* Align entries. */
1124 iov[2 + elem + offset].iov_base = (void *) nulbytes;
1125 iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1126 idx[elem] += iov[2 + elem + offset].iov_len;
1127 ++offset;
1128 /* FALLTRHOUGH */
1130 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT1_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC):
1131 cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC);
1132 iov[2 + elem + offset].iov_base = &ctype->wcoutdigits[cnt];
1133 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1134 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1135 break;
1137 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN):
1138 /* Align entries. */
1139 iov[2 + elem + offset].iov_base = (void *) nulbytes;
1140 iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1141 idx[elem] += iov[2 + elem + offset].iov_len;
1142 ++offset;
1144 default_missing_len = (ctype->default_missing
1145 ? wcslen ((wchar_t *)ctype->default_missing)
1146 : 0);
1147 iov[2 + elem + offset].iov_base = &default_missing_len;
1148 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1149 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1150 break;
1152 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING):
1153 iov[2 + elem + offset].iov_base =
1154 ctype->default_missing ?: (uint32_t *) L"";
1155 iov[2 + elem + offset].iov_len =
1156 wcslen (iov[2 + elem + offset].iov_base) * sizeof (uint32_t);
1157 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1158 break;
1160 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE_LEN):
1161 /* Align entries. */
1162 iov[2 + elem + offset].iov_base = (void *) nulbytes;
1163 iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1164 idx[elem] += iov[2 + elem + offset].iov_len;
1165 ++offset;
1167 iov[2 + elem + offset].iov_base = &ctype->ntranslit_ignore;
1168 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1169 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1170 break;
1172 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE):
1174 uint32_t *ranges = (uint32_t *) alloca (ctype->ntranslit_ignore
1175 * 3 * sizeof (uint32_t));
1176 struct translit_ignore_t *runp;
1178 iov[2 + elem + offset].iov_base = ranges;
1179 iov[2 + elem + offset].iov_len = (ctype->ntranslit_ignore
1180 * 3 * sizeof (uint32_t));
1182 for (runp = ctype->translit_ignore; runp != NULL;
1183 runp = runp->next)
1185 *ranges++ = runp->from;
1186 *ranges++ = runp->to;
1187 *ranges++ = runp->step;
1190 /* Remove the following line in case a new entry is added
1191 after _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN. */
1192 if (elem < nelems)
1193 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1194 break;
1196 default:
1197 assert (! "unknown CTYPE element");
1199 else
1201 /* Handle extra maps. */
1202 size_t nr = elem - _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
1203 if (nr < ctype->nr_charclass)
1205 iov[2 + elem + offset].iov_base = ctype->class_b[nr];
1206 iov[2 + elem + offset].iov_len = 256 / 32 * sizeof (uint32_t);
1207 idx[elem] += iov[2 + elem + offset].iov_len;
1208 ++offset;
1210 iov[2 + elem + offset] = ctype->class_3level[nr];
1212 else
1214 nr -= ctype->nr_charclass;
1215 assert (nr < ctype->map_collection_nr);
1216 iov[2 + elem + offset] = ctype->map_3level[nr];
1218 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1222 assert (2 + elem + offset == (nelems + 2 * ctype->nr_charclass
1223 + ctype->map_collection_nr + 4 + 2));
1225 write_locale_data (output_path, LC_CTYPE, "LC_CTYPE", 2 + elem + offset,
1226 iov);
1230 /* Local functions. */
1231 static void
1232 ctype_class_new (struct linereader *lr, struct locale_ctype_t *ctype,
1233 const char *name)
1235 size_t cnt;
1237 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
1238 if (strcmp (ctype->classnames[cnt], name) == 0)
1239 break;
1241 if (cnt < ctype->nr_charclass)
1243 lr_error (lr, _("character class `%s' already defined"), name);
1244 return;
1247 if (ctype->nr_charclass == MAX_NR_CHARCLASS)
1248 /* Exit code 2 is prescribed in P1003.2b. */
1249 WITH_CUR_LOCALE (error (2, 0, _("\
1250 implementation limit: no more than %Zd character classes allowed"),
1251 MAX_NR_CHARCLASS));
1253 ctype->classnames[ctype->nr_charclass++] = name;
1257 static void
1258 ctype_map_new (struct linereader *lr, struct locale_ctype_t *ctype,
1259 const char *name, const struct charmap_t *charmap)
1261 size_t max_chars = 0;
1262 size_t cnt;
1264 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
1266 if (strcmp (ctype->mapnames[cnt], name) == 0)
1267 break;
1269 if (max_chars < ctype->map_collection_max[cnt])
1270 max_chars = ctype->map_collection_max[cnt];
1273 if (cnt < ctype->map_collection_nr)
1275 lr_error (lr, _("character map `%s' already defined"), name);
1276 return;
1279 if (ctype->map_collection_nr == MAX_NR_CHARMAP)
1280 /* Exit code 2 is prescribed in P1003.2b. */
1281 WITH_CUR_LOCALE (error (2, 0, _("\
1282 implementation limit: no more than %d character maps allowed"),
1283 MAX_NR_CHARMAP));
1285 ctype->mapnames[cnt] = name;
1287 if (max_chars == 0)
1288 ctype->map_collection_max[cnt] = charmap->mb_cur_max == 1 ? 256 : 512;
1289 else
1290 ctype->map_collection_max[cnt] = max_chars;
1292 ctype->map_collection[cnt] = (uint32_t *)
1293 xcalloc (sizeof (uint32_t), ctype->map_collection_max[cnt]);
1294 ctype->map_collection_act[cnt] = 256;
1296 ++ctype->map_collection_nr;
1300 /* We have to be prepared that TABLE, MAX, and ACT can be NULL. This
1301 is possible if we only want to extend the name array. */
1302 static uint32_t *
1303 find_idx (struct locale_ctype_t *ctype, uint32_t **table, size_t *max,
1304 size_t *act, uint32_t idx)
1306 size_t cnt;
1308 if (idx < 256)
1309 return table == NULL ? NULL : &(*table)[idx];
1311 /* Use the charnames_idx lookup table instead of the slow search loop. */
1312 #if 1
1313 cnt = idx_table_get (&ctype->charnames_idx, idx);
1314 if (cnt == EMPTY)
1315 /* Not found. */
1316 cnt = ctype->charnames_act;
1317 #else
1318 for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
1319 if (ctype->charnames[cnt] == idx)
1320 break;
1321 #endif
1323 /* We have to distinguish two cases: the name is found or not. */
1324 if (cnt == ctype->charnames_act)
1326 /* Extend the name array. */
1327 if (ctype->charnames_act == ctype->charnames_max)
1329 ctype->charnames_max *= 2;
1330 ctype->charnames = (uint32_t *)
1331 xrealloc (ctype->charnames,
1332 sizeof (uint32_t) * ctype->charnames_max);
1334 ctype->charnames[ctype->charnames_act++] = idx;
1335 idx_table_add (&ctype->charnames_idx, idx, cnt);
1338 if (table == NULL)
1339 /* We have done everything we are asked to do. */
1340 return NULL;
1342 if (max == NULL)
1343 /* The caller does not want to extend the table. */
1344 return (cnt >= *act ? NULL : &(*table)[cnt]);
1346 if (cnt >= *act)
1348 if (cnt >= *max)
1350 size_t old_max = *max;
1352 *max *= 2;
1353 while (*max <= cnt);
1355 *table =
1356 (uint32_t *) xrealloc (*table, *max * sizeof (uint32_t));
1357 memset (&(*table)[old_max], '\0',
1358 (*max - old_max) * sizeof (uint32_t));
1361 *act = cnt + 1;
1364 return &(*table)[cnt];
1368 static int
1369 get_character (struct token *now, const struct charmap_t *charmap,
1370 struct repertoire_t *repertoire,
1371 struct charseq **seqp, uint32_t *wchp)
1373 if (now->tok == tok_bsymbol)
1375 /* This will hopefully be the normal case. */
1376 *wchp = repertoire_find_value (repertoire, now->val.str.startmb,
1377 now->val.str.lenmb);
1378 *seqp = charmap_find_value (charmap, now->val.str.startmb,
1379 now->val.str.lenmb);
1381 else if (now->tok == tok_ucs4)
1383 char utmp[10];
1385 snprintf (utmp, sizeof (utmp), "U%08X", now->val.ucs4);
1386 *seqp = charmap_find_value (charmap, utmp, 9);
1388 if (*seqp == NULL)
1389 *seqp = repertoire_find_seq (repertoire, now->val.ucs4);
1391 if (*seqp == NULL)
1393 /* Compute the value in the charmap from the UCS value. */
1394 const char *symbol = repertoire_find_symbol (repertoire,
1395 now->val.ucs4);
1397 if (symbol == NULL)
1398 *seqp = NULL;
1399 else
1400 *seqp = charmap_find_value (charmap, symbol, strlen (symbol));
1402 if (*seqp == NULL)
1404 if (repertoire != NULL)
1406 /* Insert a negative entry. */
1407 static const struct charseq negative
1408 = { .ucs4 = ILLEGAL_CHAR_VALUE };
1409 uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1410 sizeof (uint32_t));
1411 *newp = now->val.ucs4;
1413 insert_entry (&repertoire->seq_table, newp,
1414 sizeof (uint32_t), (void *) &negative);
1417 else
1418 (*seqp)->ucs4 = now->val.ucs4;
1420 else if ((*seqp)->ucs4 != now->val.ucs4)
1421 *seqp = NULL;
1423 *wchp = now->val.ucs4;
1425 else if (now->tok == tok_charcode)
1427 /* We must map from the byte code to UCS4. */
1428 *seqp = charmap_find_symbol (charmap, now->val.str.startmb,
1429 now->val.str.lenmb);
1431 if (*seqp == NULL)
1432 *wchp = ILLEGAL_CHAR_VALUE;
1433 else
1435 if ((*seqp)->ucs4 == UNINITIALIZED_CHAR_VALUE)
1436 (*seqp)->ucs4 = repertoire_find_value (repertoire, (*seqp)->name,
1437 strlen ((*seqp)->name));
1438 *wchp = (*seqp)->ucs4;
1441 else
1442 return 1;
1444 return 0;
1448 /* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and
1449 the .(2). counterparts. */
1450 static void
1451 charclass_symbolic_ellipsis (struct linereader *ldfile,
1452 struct locale_ctype_t *ctype,
1453 const struct charmap_t *charmap,
1454 struct repertoire_t *repertoire,
1455 struct token *now,
1456 const char *last_str,
1457 unsigned long int class256_bit,
1458 unsigned long int class_bit, int base,
1459 int ignore_content, int handle_digits, int step)
1461 const char *nowstr = now->val.str.startmb;
1462 char tmp[now->val.str.lenmb + 1];
1463 const char *cp;
1464 char *endp;
1465 unsigned long int from;
1466 unsigned long int to;
1468 /* We have to compute the ellipsis values using the symbolic names. */
1469 assert (last_str != NULL);
1471 if (strlen (last_str) != now->val.str.lenmb)
1473 invalid_range:
1474 lr_error (ldfile,
1475 _("`%s' and `%.*s' are not valid names for symbolic range"),
1476 last_str, (int) now->val.str.lenmb, nowstr);
1477 return;
1480 if (memcmp (last_str, nowstr, now->val.str.lenmb) == 0)
1481 /* Nothing to do, the names are the same. */
1482 return;
1484 for (cp = last_str; *cp == *(nowstr + (cp - last_str)); ++cp)
1487 errno = 0;
1488 from = strtoul (cp, &endp, base);
1489 if ((from == UINT_MAX && errno == ERANGE) || *endp != '\0')
1490 goto invalid_range;
1492 to = strtoul (nowstr + (cp - last_str), &endp, base);
1493 if ((to == UINT_MAX && errno == ERANGE)
1494 || (endp - nowstr) != now->val.str.lenmb || from >= to)
1495 goto invalid_range;
1497 /* OK, we have a range FROM - TO. Now we can create the symbolic names. */
1498 if (!ignore_content)
1500 now->val.str.startmb = tmp;
1501 while ((from += step) <= to)
1503 struct charseq *seq;
1504 uint32_t wch;
1506 sprintf (tmp, (base == 10 ? "%.*s%0*ld" : "%.*s%0*lX"),
1507 (int) (cp - last_str), last_str,
1508 (int) (now->val.str.lenmb - (cp - last_str)),
1509 from);
1511 get_character (now, charmap, repertoire, &seq, &wch);
1513 if (seq != NULL && seq->nbytes == 1)
1514 /* Yep, we can store information about this byte sequence. */
1515 ctype->class256_collection[seq->bytes[0]] |= class256_bit;
1517 if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1518 /* We have the UCS4 position. */
1519 *find_idx (ctype, &ctype->class_collection,
1520 &ctype->class_collection_max,
1521 &ctype->class_collection_act, wch) |= class_bit;
1523 if (handle_digits == 1)
1525 /* We must store the digit values. */
1526 if (ctype->mbdigits_act == ctype->mbdigits_max)
1528 ctype->mbdigits_max *= 2;
1529 ctype->mbdigits = xrealloc (ctype->mbdigits,
1530 (ctype->mbdigits_max
1531 * sizeof (char *)));
1532 ctype->wcdigits_max *= 2;
1533 ctype->wcdigits = xrealloc (ctype->wcdigits,
1534 (ctype->wcdigits_max
1535 * sizeof (uint32_t)));
1538 ctype->mbdigits[ctype->mbdigits_act++] = seq;
1539 ctype->wcdigits[ctype->wcdigits_act++] = wch;
1541 else if (handle_digits == 2)
1543 /* We must store the digit values. */
1544 if (ctype->outdigits_act >= 10)
1546 lr_error (ldfile, _("\
1547 %s: field `%s' does not contain exactly ten entries"),
1548 "LC_CTYPE", "outdigit");
1549 return;
1552 ctype->mboutdigits[ctype->outdigits_act] = seq;
1553 ctype->wcoutdigits[ctype->outdigits_act] = wch;
1554 ++ctype->outdigits_act;
1561 /* Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'. */
1562 static void
1563 charclass_ucs4_ellipsis (struct linereader *ldfile,
1564 struct locale_ctype_t *ctype,
1565 const struct charmap_t *charmap,
1566 struct repertoire_t *repertoire,
1567 struct token *now, uint32_t last_wch,
1568 unsigned long int class256_bit,
1569 unsigned long int class_bit, int ignore_content,
1570 int handle_digits, int step)
1572 if (last_wch > now->val.ucs4)
1574 lr_error (ldfile, _("\
1575 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1576 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, now->val.ucs4,
1577 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, last_wch);
1578 return;
1581 if (!ignore_content)
1582 while ((last_wch += step) <= now->val.ucs4)
1584 /* We have to find out whether there is a byte sequence corresponding
1585 to this UCS4 value. */
1586 struct charseq *seq;
1587 char utmp[10];
1589 snprintf (utmp, sizeof (utmp), "U%08X", last_wch);
1590 seq = charmap_find_value (charmap, utmp, 9);
1591 if (seq == NULL)
1593 snprintf (utmp, sizeof (utmp), "U%04X", last_wch);
1594 seq = charmap_find_value (charmap, utmp, 5);
1597 if (seq == NULL)
1598 /* Try looking in the repertoire map. */
1599 seq = repertoire_find_seq (repertoire, last_wch);
1601 /* If this is the first time we look for this sequence create a new
1602 entry. */
1603 if (seq == NULL)
1605 static const struct charseq negative
1606 = { .ucs4 = ILLEGAL_CHAR_VALUE };
1608 /* Find the symbolic name for this UCS4 value. */
1609 if (repertoire != NULL)
1611 const char *symbol = repertoire_find_symbol (repertoire,
1612 last_wch);
1613 uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1614 sizeof (uint32_t));
1615 *newp = last_wch;
1617 if (symbol != NULL)
1618 /* We have a name, now search the multibyte value. */
1619 seq = charmap_find_value (charmap, symbol, strlen (symbol));
1621 if (seq == NULL)
1622 /* We have to create a fake entry. */
1623 seq = (struct charseq *) &negative;
1624 else
1625 seq->ucs4 = last_wch;
1627 insert_entry (&repertoire->seq_table, newp, sizeof (uint32_t),
1628 seq);
1630 else
1631 /* We have to create a fake entry. */
1632 seq = (struct charseq *) &negative;
1635 /* We have a name, now search the multibyte value. */
1636 if (seq->ucs4 == last_wch && seq->nbytes == 1)
1637 /* Yep, we can store information about this byte sequence. */
1638 ctype->class256_collection[(size_t) seq->bytes[0]]
1639 |= class256_bit;
1641 /* And of course we have the UCS4 position. */
1642 if (class_bit != 0)
1643 *find_idx (ctype, &ctype->class_collection,
1644 &ctype->class_collection_max,
1645 &ctype->class_collection_act, last_wch) |= class_bit;
1647 if (handle_digits == 1)
1649 /* We must store the digit values. */
1650 if (ctype->mbdigits_act == ctype->mbdigits_max)
1652 ctype->mbdigits_max *= 2;
1653 ctype->mbdigits = xrealloc (ctype->mbdigits,
1654 (ctype->mbdigits_max
1655 * sizeof (char *)));
1656 ctype->wcdigits_max *= 2;
1657 ctype->wcdigits = xrealloc (ctype->wcdigits,
1658 (ctype->wcdigits_max
1659 * sizeof (uint32_t)));
1662 ctype->mbdigits[ctype->mbdigits_act++] = (seq->ucs4 == last_wch
1663 ? seq : NULL);
1664 ctype->wcdigits[ctype->wcdigits_act++] = last_wch;
1666 else if (handle_digits == 2)
1668 /* We must store the digit values. */
1669 if (ctype->outdigits_act >= 10)
1671 lr_error (ldfile, _("\
1672 %s: field `%s' does not contain exactly ten entries"),
1673 "LC_CTYPE", "outdigit");
1674 return;
1677 ctype->mboutdigits[ctype->outdigits_act] = (seq->ucs4 == last_wch
1678 ? seq : NULL);
1679 ctype->wcoutdigits[ctype->outdigits_act] = last_wch;
1680 ++ctype->outdigits_act;
1686 /* Ellipsis as in `/xea/x12.../xea/x34'. */
1687 static void
1688 charclass_charcode_ellipsis (struct linereader *ldfile,
1689 struct locale_ctype_t *ctype,
1690 const struct charmap_t *charmap,
1691 struct repertoire_t *repertoire,
1692 struct token *now, char *last_charcode,
1693 uint32_t last_charcode_len,
1694 unsigned long int class256_bit,
1695 unsigned long int class_bit, int ignore_content,
1696 int handle_digits)
1698 /* First check whether the to-value is larger. */
1699 if (now->val.charcode.nbytes != last_charcode_len)
1701 lr_error (ldfile, _("\
1702 start and end character sequence of range must have the same length"));
1703 return;
1706 if (memcmp (last_charcode, now->val.charcode.bytes, last_charcode_len) > 0)
1708 lr_error (ldfile, _("\
1709 to-value character sequence is smaller than from-value sequence"));
1710 return;
1713 if (!ignore_content)
1717 /* Increment the byte sequence value. */
1718 struct charseq *seq;
1719 uint32_t wch;
1720 int i;
1722 for (i = last_charcode_len - 1; i >= 0; --i)
1723 if (++last_charcode[i] != 0)
1724 break;
1726 if (last_charcode_len == 1)
1727 /* Of course we have the charcode value. */
1728 ctype->class256_collection[(size_t) last_charcode[0]]
1729 |= class256_bit;
1731 /* Find the symbolic name. */
1732 seq = charmap_find_symbol (charmap, last_charcode,
1733 last_charcode_len);
1734 if (seq != NULL)
1736 if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1737 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1738 strlen (seq->name));
1739 wch = seq == NULL ? ILLEGAL_CHAR_VALUE : seq->ucs4;
1741 if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1742 *find_idx (ctype, &ctype->class_collection,
1743 &ctype->class_collection_max,
1744 &ctype->class_collection_act, wch) |= class_bit;
1746 else
1747 wch = ILLEGAL_CHAR_VALUE;
1749 if (handle_digits == 1)
1751 /* We must store the digit values. */
1752 if (ctype->mbdigits_act == ctype->mbdigits_max)
1754 ctype->mbdigits_max *= 2;
1755 ctype->mbdigits = xrealloc (ctype->mbdigits,
1756 (ctype->mbdigits_max
1757 * sizeof (char *)));
1758 ctype->wcdigits_max *= 2;
1759 ctype->wcdigits = xrealloc (ctype->wcdigits,
1760 (ctype->wcdigits_max
1761 * sizeof (uint32_t)));
1764 seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1765 memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1766 seq->nbytes = last_charcode_len;
1768 ctype->mbdigits[ctype->mbdigits_act++] = seq;
1769 ctype->wcdigits[ctype->wcdigits_act++] = wch;
1771 else if (handle_digits == 2)
1773 struct charseq *seq;
1774 /* We must store the digit values. */
1775 if (ctype->outdigits_act >= 10)
1777 lr_error (ldfile, _("\
1778 %s: field `%s' does not contain exactly ten entries"),
1779 "LC_CTYPE", "outdigit");
1780 return;
1783 seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1784 memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1785 seq->nbytes = last_charcode_len;
1787 ctype->mboutdigits[ctype->outdigits_act] = seq;
1788 ctype->wcoutdigits[ctype->outdigits_act] = wch;
1789 ++ctype->outdigits_act;
1792 while (memcmp (last_charcode, now->val.charcode.bytes,
1793 last_charcode_len) != 0);
1798 static uint32_t *
1799 find_translit2 (struct locale_ctype_t *ctype, const struct charmap_t *charmap,
1800 uint32_t wch)
1802 struct translit_t *trunp = ctype->translit;
1803 struct translit_ignore_t *tirunp = ctype->translit_ignore;
1805 while (trunp != NULL)
1807 /* XXX We simplify things here. The transliterations we look
1808 for are only allowed to have one character. */
1809 if (trunp->from[0] == wch && trunp->from[1] == 0)
1811 /* Found it. Now look for a transliteration which can be
1812 represented with the character set. */
1813 struct translit_to_t *torunp = trunp->to;
1815 while (torunp != NULL)
1817 int i;
1819 for (i = 0; torunp->str[i] != 0; ++i)
1821 char utmp[10];
1823 snprintf (utmp, sizeof (utmp), "U%08X", torunp->str[i]);
1824 if (charmap_find_value (charmap, utmp, 9) == NULL)
1825 /* This character cannot be represented. */
1826 break;
1829 if (torunp->str[i] == 0)
1830 return torunp->str;
1832 torunp = torunp->next;
1835 break;
1838 trunp = trunp->next;
1841 /* Check for ignored chars. */
1842 while (tirunp != NULL)
1844 if (tirunp->from <= wch && tirunp->to >= wch)
1846 uint32_t wi;
1848 for (wi = tirunp->from; wi <= wch; wi += tirunp->step)
1849 if (wi == wch)
1850 return (uint32_t []) { 0 };
1854 /* Nothing found. */
1855 return NULL;
1859 uint32_t *
1860 find_translit (struct localedef_t *locale, const struct charmap_t *charmap,
1861 uint32_t wch)
1863 struct locale_ctype_t *ctype;
1864 uint32_t *result = NULL;
1866 assert (locale != NULL);
1867 ctype = locale->categories[LC_CTYPE].ctype;
1869 if (ctype->translit != NULL)
1870 result = find_translit2 (ctype, charmap, wch);
1872 if (result == NULL)
1874 struct translit_include_t *irunp = ctype->translit_include;
1876 while (irunp != NULL && result == NULL)
1878 result = find_translit (find_locale (CTYPE_LOCALE,
1879 irunp->copy_locale,
1880 irunp->copy_repertoire,
1881 charmap),
1882 charmap, wch);
1883 irunp = irunp->next;
1887 return result;
1891 /* Read one transliteration entry. */
1892 static uint32_t *
1893 read_widestring (struct linereader *ldfile, struct token *now,
1894 const struct charmap_t *charmap,
1895 struct repertoire_t *repertoire)
1897 uint32_t *wstr;
1899 if (now->tok == tok_default_missing)
1900 /* The special name "" will denote this case. */
1901 wstr = ((uint32_t *) { 0 });
1902 else if (now->tok == tok_bsymbol)
1904 /* Get the value from the repertoire. */
1905 wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1906 wstr[0] = repertoire_find_value (repertoire, now->val.str.startmb,
1907 now->val.str.lenmb);
1908 if (wstr[0] == ILLEGAL_CHAR_VALUE)
1910 /* We cannot proceed, we don't know the UCS4 value. */
1911 free (wstr);
1912 return NULL;
1915 wstr[1] = 0;
1917 else if (now->tok == tok_ucs4)
1919 wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1920 wstr[0] = now->val.ucs4;
1921 wstr[1] = 0;
1923 else if (now->tok == tok_charcode)
1925 /* Argh, we have to convert to the symbol name first and then to the
1926 UCS4 value. */
1927 struct charseq *seq = charmap_find_symbol (charmap,
1928 now->val.str.startmb,
1929 now->val.str.lenmb);
1930 if (seq == NULL)
1931 /* Cannot find the UCS4 value. */
1932 return NULL;
1934 if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1935 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1936 strlen (seq->name));
1937 if (seq->ucs4 == ILLEGAL_CHAR_VALUE)
1938 /* We cannot proceed, we don't know the UCS4 value. */
1939 return NULL;
1941 wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1942 wstr[0] = seq->ucs4;
1943 wstr[1] = 0;
1945 else if (now->tok == tok_string)
1947 wstr = now->val.str.startwc;
1948 if (wstr == NULL || wstr[0] == 0)
1949 return NULL;
1951 else
1953 if (now->tok != tok_eol && now->tok != tok_eof)
1954 lr_ignore_rest (ldfile, 0);
1955 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
1956 return (uint32_t *) -1l;
1959 return wstr;
1963 static void
1964 read_translit_entry (struct linereader *ldfile, struct locale_ctype_t *ctype,
1965 struct token *now, const struct charmap_t *charmap,
1966 struct repertoire_t *repertoire)
1968 uint32_t *from_wstr = read_widestring (ldfile, now, charmap, repertoire);
1969 struct translit_t *result;
1970 struct translit_to_t **top;
1971 struct obstack *ob = &ctype->mempool;
1972 int first;
1973 int ignore;
1975 if (from_wstr == NULL)
1976 /* There is no valid from string. */
1977 return;
1979 result = (struct translit_t *) obstack_alloc (ob,
1980 sizeof (struct translit_t));
1981 result->from = from_wstr;
1982 result->fname = ldfile->fname;
1983 result->lineno = ldfile->lineno;
1984 result->next = NULL;
1985 result->to = NULL;
1986 top = &result->to;
1987 first = 1;
1988 ignore = 0;
1990 while (1)
1992 uint32_t *to_wstr;
1994 /* Next we have one or more transliterations. They are
1995 separated by semicolons. */
1996 now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
1998 if (!first && (now->tok == tok_semicolon || now->tok == tok_eol))
2000 /* One string read. */
2001 const uint32_t zero = 0;
2003 if (!ignore)
2005 obstack_grow (ob, &zero, 4);
2006 to_wstr = obstack_finish (ob);
2008 *top = obstack_alloc (ob, sizeof (struct translit_to_t));
2009 (*top)->str = to_wstr;
2010 (*top)->next = NULL;
2013 if (now->tok == tok_eol)
2015 result->next = ctype->translit;
2016 ctype->translit = result;
2017 return;
2020 if (!ignore)
2021 top = &(*top)->next;
2022 ignore = 0;
2024 else
2026 to_wstr = read_widestring (ldfile, now, charmap, repertoire);
2027 if (to_wstr == (uint32_t *) -1l)
2029 /* An error occurred. */
2030 obstack_free (ob, result);
2031 return;
2034 if (to_wstr == NULL)
2035 ignore = 1;
2036 else
2037 /* This value is usable. */
2038 obstack_grow (ob, to_wstr, wcslen ((wchar_t *) to_wstr) * 4);
2040 first = 0;
2046 static void
2047 read_translit_ignore_entry (struct linereader *ldfile,
2048 struct locale_ctype_t *ctype,
2049 const struct charmap_t *charmap,
2050 struct repertoire_t *repertoire)
2052 /* We expect a semicolon-separated list of characters we ignore. We are
2053 only interested in the wide character definitions. These must be
2054 single characters, possibly defining a range when an ellipsis is used. */
2055 while (1)
2057 struct token *now = lr_token (ldfile, charmap, NULL, repertoire,
2058 verbose);
2059 struct translit_ignore_t *newp;
2060 uint32_t from;
2062 if (now->tok == tok_eol || now->tok == tok_eof)
2064 lr_error (ldfile,
2065 _("premature end of `translit_ignore' definition"));
2066 return;
2069 if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
2071 lr_error (ldfile, _("syntax error"));
2072 lr_ignore_rest (ldfile, 0);
2073 return;
2076 if (now->tok == tok_ucs4)
2077 from = now->val.ucs4;
2078 else
2079 /* Try to get the value. */
2080 from = repertoire_find_value (repertoire, now->val.str.startmb,
2081 now->val.str.lenmb);
2083 if (from == ILLEGAL_CHAR_VALUE)
2085 lr_error (ldfile, "invalid character name");
2086 newp = NULL;
2088 else
2090 newp = (struct translit_ignore_t *)
2091 obstack_alloc (&ctype->mempool, sizeof (struct translit_ignore_t));
2092 newp->from = from;
2093 newp->to = from;
2094 newp->step = 1;
2096 newp->next = ctype->translit_ignore;
2097 ctype->translit_ignore = newp;
2100 /* Now we expect either a semicolon, an ellipsis, or the end of the
2101 line. */
2102 now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2104 if (now->tok == tok_ellipsis2 || now->tok == tok_ellipsis2_2)
2106 /* XXX Should we bother implementing `....'? `...' certainly
2107 will not be implemented. */
2108 uint32_t to;
2109 int step = now->tok == tok_ellipsis2_2 ? 2 : 1;
2111 now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2113 if (now->tok == tok_eol || now->tok == tok_eof)
2115 lr_error (ldfile,
2116 _("premature end of `translit_ignore' definition"));
2117 return;
2120 if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
2122 lr_error (ldfile, _("syntax error"));
2123 lr_ignore_rest (ldfile, 0);
2124 return;
2127 if (now->tok == tok_ucs4)
2128 to = now->val.ucs4;
2129 else
2130 /* Try to get the value. */
2131 to = repertoire_find_value (repertoire, now->val.str.startmb,
2132 now->val.str.lenmb);
2134 if (to == ILLEGAL_CHAR_VALUE)
2135 lr_error (ldfile, "invalid character name");
2136 else
2138 /* Make sure the `to'-value is larger. */
2139 if (to >= from)
2141 newp->to = to;
2142 newp->step = step;
2144 else
2145 lr_error (ldfile, _("\
2146 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
2147 (to | from) < 65536 ? 4 : 8, to,
2148 (to | from) < 65536 ? 4 : 8, from);
2151 /* And the next token. */
2152 now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2155 if (now->tok == tok_eol || now->tok == tok_eof)
2156 /* We are done. */
2157 return;
2159 if (now->tok == tok_semicolon)
2160 /* Next round. */
2161 continue;
2163 /* If we come here something is wrong. */
2164 lr_error (ldfile, _("syntax error"));
2165 lr_ignore_rest (ldfile, 0);
2166 return;
2171 /* The parser for the LC_CTYPE section of the locale definition. */
2172 void
2173 ctype_read (struct linereader *ldfile, struct localedef_t *result,
2174 const struct charmap_t *charmap, const char *repertoire_name,
2175 int ignore_content)
2177 struct repertoire_t *repertoire = NULL;
2178 struct locale_ctype_t *ctype;
2179 struct token *now;
2180 enum token_t nowtok;
2181 size_t cnt;
2182 struct charseq *last_seq;
2183 uint32_t last_wch = 0;
2184 enum token_t last_token;
2185 enum token_t ellipsis_token;
2186 int step;
2187 char last_charcode[16];
2188 size_t last_charcode_len = 0;
2189 const char *last_str = NULL;
2190 int mapidx;
2191 struct localedef_t *copy_locale = NULL;
2193 /* Get the repertoire we have to use. */
2194 if (repertoire_name != NULL)
2195 repertoire = repertoire_read (repertoire_name);
2197 /* The rest of the line containing `LC_CTYPE' must be free. */
2198 lr_ignore_rest (ldfile, 1);
2203 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2204 nowtok = now->tok;
2206 while (nowtok == tok_eol);
2208 /* If we see `copy' now we are almost done. */
2209 if (nowtok == tok_copy)
2211 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2212 if (now->tok != tok_string)
2214 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2216 skip_category:
2218 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2219 while (now->tok != tok_eof && now->tok != tok_end);
2221 if (now->tok != tok_eof
2222 || (now = lr_token (ldfile, charmap, NULL, NULL, verbose),
2223 now->tok == tok_eof))
2224 lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
2225 else if (now->tok != tok_lc_ctype)
2227 lr_error (ldfile, _("\
2228 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2229 lr_ignore_rest (ldfile, 0);
2231 else
2232 lr_ignore_rest (ldfile, 1);
2234 return;
2237 if (! ignore_content)
2239 /* Get the locale definition. */
2240 copy_locale = load_locale (LC_CTYPE, now->val.str.startmb,
2241 repertoire_name, charmap, NULL);
2242 if ((copy_locale->avail & CTYPE_LOCALE) == 0)
2244 /* Not yet loaded. So do it now. */
2245 if (locfile_read (copy_locale, charmap) != 0)
2246 goto skip_category;
2249 if (copy_locale->categories[LC_CTYPE].ctype == NULL)
2250 return;
2253 lr_ignore_rest (ldfile, 1);
2255 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2256 nowtok = now->tok;
2259 /* Prepare the data structures. */
2260 ctype_startup (ldfile, result, charmap, copy_locale, ignore_content);
2261 ctype = result->categories[LC_CTYPE].ctype;
2263 /* Remember the repertoire we use. */
2264 if (!ignore_content)
2265 ctype->repertoire = repertoire;
2267 while (1)
2269 unsigned long int class_bit = 0;
2270 unsigned long int class256_bit = 0;
2271 int handle_digits = 0;
2273 /* Of course we don't proceed beyond the end of file. */
2274 if (nowtok == tok_eof)
2275 break;
2277 /* Ingore empty lines. */
2278 if (nowtok == tok_eol)
2280 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2281 nowtok = now->tok;
2282 continue;
2285 switch (nowtok)
2287 case tok_charclass:
2288 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2289 while (now->tok == tok_ident || now->tok == tok_string)
2291 ctype_class_new (ldfile, ctype, now->val.str.startmb);
2292 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2293 if (now->tok != tok_semicolon)
2294 break;
2295 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2297 if (now->tok != tok_eol)
2298 SYNTAX_ERROR (_("\
2299 %s: syntax error in definition of new character class"), "LC_CTYPE");
2300 break;
2302 case tok_charconv:
2303 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2304 while (now->tok == tok_ident || now->tok == tok_string)
2306 ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2307 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2308 if (now->tok != tok_semicolon)
2309 break;
2310 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2312 if (now->tok != tok_eol)
2313 SYNTAX_ERROR (_("\
2314 %s: syntax error in definition of new character map"), "LC_CTYPE");
2315 break;
2317 case tok_class:
2318 /* Ignore the rest of the line if we don't need the input of
2319 this line. */
2320 if (ignore_content)
2322 lr_ignore_rest (ldfile, 0);
2323 break;
2326 /* We simply forget the `class' keyword and use the following
2327 operand to determine the bit. */
2328 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2329 if (now->tok == tok_ident || now->tok == tok_string)
2331 /* Must can be one of the predefined class names. */
2332 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2333 if (strcmp (ctype->classnames[cnt], now->val.str.startmb) == 0)
2334 break;
2335 if (cnt >= ctype->nr_charclass)
2337 #ifdef PREDEFINED_CLASSES
2338 if (now->val.str.lenmb == 8
2339 && memcmp ("special1", now->val.str.startmb, 8) == 0)
2340 class_bit = _ISwspecial1;
2341 else if (now->val.str.lenmb == 8
2342 && memcmp ("special2", now->val.str.startmb, 8) == 0)
2343 class_bit = _ISwspecial2;
2344 else if (now->val.str.lenmb == 8
2345 && memcmp ("special3", now->val.str.startmb, 8) == 0)
2346 class_bit = _ISwspecial3;
2347 else
2348 #endif
2350 /* OK, it's a new class. */
2351 ctype_class_new (ldfile, ctype, now->val.str.startmb);
2353 class_bit = _ISwbit (ctype->nr_charclass - 1);
2356 else
2358 class_bit = _ISwbit (cnt);
2360 free (now->val.str.startmb);
2363 else if (now->tok == tok_digit)
2364 goto handle_tok_digit;
2365 else if (now->tok < tok_upper || now->tok > tok_blank)
2366 goto err_label;
2367 else
2369 class_bit = BITw (now->tok);
2370 class256_bit = BIT (now->tok);
2373 /* The next character must be a semicolon. */
2374 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2375 if (now->tok != tok_semicolon)
2376 goto err_label;
2377 goto read_charclass;
2379 case tok_upper:
2380 case tok_lower:
2381 case tok_alpha:
2382 case tok_alnum:
2383 case tok_space:
2384 case tok_cntrl:
2385 case tok_punct:
2386 case tok_graph:
2387 case tok_print:
2388 case tok_xdigit:
2389 case tok_blank:
2390 /* Ignore the rest of the line if we don't need the input of
2391 this line. */
2392 if (ignore_content)
2394 lr_ignore_rest (ldfile, 0);
2395 break;
2398 class_bit = BITw (now->tok);
2399 class256_bit = BIT (now->tok);
2400 handle_digits = 0;
2401 read_charclass:
2402 ctype->class_done |= class_bit;
2403 last_token = tok_none;
2404 ellipsis_token = tok_none;
2405 step = 1;
2406 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2407 while (now->tok != tok_eol && now->tok != tok_eof)
2409 uint32_t wch;
2410 struct charseq *seq;
2412 if (ellipsis_token == tok_none)
2414 if (get_character (now, charmap, repertoire, &seq, &wch))
2415 goto err_label;
2417 if (!ignore_content && seq != NULL && seq->nbytes == 1)
2418 /* Yep, we can store information about this byte
2419 sequence. */
2420 ctype->class256_collection[seq->bytes[0]] |= class256_bit;
2422 if (!ignore_content && wch != ILLEGAL_CHAR_VALUE
2423 && class_bit != 0)
2424 /* We have the UCS4 position. */
2425 *find_idx (ctype, &ctype->class_collection,
2426 &ctype->class_collection_max,
2427 &ctype->class_collection_act, wch) |= class_bit;
2429 last_token = now->tok;
2430 /* Terminate the string. */
2431 if (last_token == tok_bsymbol)
2433 now->val.str.startmb[now->val.str.lenmb] = '\0';
2434 last_str = now->val.str.startmb;
2436 else
2437 last_str = NULL;
2438 last_seq = seq;
2439 last_wch = wch;
2440 memcpy (last_charcode, now->val.charcode.bytes, 16);
2441 last_charcode_len = now->val.charcode.nbytes;
2443 if (!ignore_content && handle_digits == 1)
2445 /* We must store the digit values. */
2446 if (ctype->mbdigits_act == ctype->mbdigits_max)
2448 ctype->mbdigits_max += 10;
2449 ctype->mbdigits = xrealloc (ctype->mbdigits,
2450 (ctype->mbdigits_max
2451 * sizeof (char *)));
2452 ctype->wcdigits_max += 10;
2453 ctype->wcdigits = xrealloc (ctype->wcdigits,
2454 (ctype->wcdigits_max
2455 * sizeof (uint32_t)));
2458 ctype->mbdigits[ctype->mbdigits_act++] = seq;
2459 ctype->wcdigits[ctype->wcdigits_act++] = wch;
2461 else if (!ignore_content && handle_digits == 2)
2463 /* We must store the digit values. */
2464 if (ctype->outdigits_act >= 10)
2466 lr_error (ldfile, _("\
2467 %s: field `%s' does not contain exactly ten entries"),
2468 "LC_CTYPE", "outdigit");
2469 lr_ignore_rest (ldfile, 0);
2470 break;
2473 ctype->mboutdigits[ctype->outdigits_act] = seq;
2474 ctype->wcoutdigits[ctype->outdigits_act] = wch;
2475 ++ctype->outdigits_act;
2478 else
2480 /* Now it gets complicated. We have to resolve the
2481 ellipsis problem. First we must distinguish between
2482 the different kind of ellipsis and this must match the
2483 tokens we have seen. */
2484 assert (last_token != tok_none);
2486 if (last_token != now->tok)
2488 lr_error (ldfile, _("\
2489 ellipsis range must be marked by two operands of same type"));
2490 lr_ignore_rest (ldfile, 0);
2491 break;
2494 if (last_token == tok_bsymbol)
2496 if (ellipsis_token == tok_ellipsis3)
2497 lr_error (ldfile, _("with symbolic name range values \
2498 the absolute ellipsis `...' must not be used"));
2500 charclass_symbolic_ellipsis (ldfile, ctype, charmap,
2501 repertoire, now, last_str,
2502 class256_bit, class_bit,
2503 (ellipsis_token
2504 == tok_ellipsis4
2505 ? 10 : 16),
2506 ignore_content,
2507 handle_digits, step);
2509 else if (last_token == tok_ucs4)
2511 if (ellipsis_token != tok_ellipsis2)
2512 lr_error (ldfile, _("\
2513 with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
2515 charclass_ucs4_ellipsis (ldfile, ctype, charmap,
2516 repertoire, now, last_wch,
2517 class256_bit, class_bit,
2518 ignore_content, handle_digits,
2519 step);
2521 else
2523 assert (last_token == tok_charcode);
2525 if (ellipsis_token != tok_ellipsis3)
2526 lr_error (ldfile, _("\
2527 with character code range values one must use the absolute ellipsis `...'"));
2529 charclass_charcode_ellipsis (ldfile, ctype, charmap,
2530 repertoire, now,
2531 last_charcode,
2532 last_charcode_len,
2533 class256_bit, class_bit,
2534 ignore_content,
2535 handle_digits);
2538 /* Now we have used the last value. */
2539 last_token = tok_none;
2542 /* Next we expect a semicolon or the end of the line. */
2543 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2544 if (now->tok == tok_eol || now->tok == tok_eof)
2545 break;
2547 if (last_token != tok_none
2548 && now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4_2)
2550 if (now->tok == tok_ellipsis2_2)
2552 now->tok = tok_ellipsis2;
2553 step = 2;
2555 else if (now->tok == tok_ellipsis4_2)
2557 now->tok = tok_ellipsis4;
2558 step = 2;
2561 ellipsis_token = now->tok;
2563 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2564 continue;
2567 if (now->tok != tok_semicolon)
2568 goto err_label;
2570 /* And get the next character. */
2571 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2573 ellipsis_token = tok_none;
2574 step = 1;
2576 break;
2578 case tok_digit:
2579 /* Ignore the rest of the line if we don't need the input of
2580 this line. */
2581 if (ignore_content)
2583 lr_ignore_rest (ldfile, 0);
2584 break;
2587 handle_tok_digit:
2588 class_bit = _ISwdigit;
2589 class256_bit = _ISdigit;
2590 handle_digits = 1;
2591 goto read_charclass;
2593 case tok_outdigit:
2594 /* Ignore the rest of the line if we don't need the input of
2595 this line. */
2596 if (ignore_content)
2598 lr_ignore_rest (ldfile, 0);
2599 break;
2602 if (ctype->outdigits_act != 0)
2603 lr_error (ldfile, _("\
2604 %s: field `%s' declared more than once"),
2605 "LC_CTYPE", "outdigit");
2606 class_bit = 0;
2607 class256_bit = 0;
2608 handle_digits = 2;
2609 goto read_charclass;
2611 case tok_toupper:
2612 /* Ignore the rest of the line if we don't need the input of
2613 this line. */
2614 if (ignore_content)
2616 lr_ignore_rest (ldfile, 0);
2617 break;
2620 mapidx = 0;
2621 goto read_mapping;
2623 case tok_tolower:
2624 /* Ignore the rest of the line if we don't need the input of
2625 this line. */
2626 if (ignore_content)
2628 lr_ignore_rest (ldfile, 0);
2629 break;
2632 mapidx = 1;
2633 goto read_mapping;
2635 case tok_map:
2636 /* Ignore the rest of the line if we don't need the input of
2637 this line. */
2638 if (ignore_content)
2640 lr_ignore_rest (ldfile, 0);
2641 break;
2644 /* We simply forget the `map' keyword and use the following
2645 operand to determine the mapping. */
2646 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2647 if (now->tok == tok_ident || now->tok == tok_string)
2649 size_t cnt;
2651 for (cnt = 2; cnt < ctype->map_collection_nr; ++cnt)
2652 if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2653 break;
2655 if (cnt < ctype->map_collection_nr)
2656 free (now->val.str.startmb);
2657 else
2658 /* OK, it's a new map. */
2659 ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2661 mapidx = cnt;
2663 else if (now->tok < tok_toupper || now->tok > tok_tolower)
2664 goto err_label;
2665 else
2666 mapidx = now->tok - tok_toupper;
2668 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2669 /* This better should be a semicolon. */
2670 if (now->tok != tok_semicolon)
2671 goto err_label;
2673 read_mapping:
2674 /* Test whether this mapping was already defined. */
2675 if (ctype->tomap_done[mapidx])
2677 lr_error (ldfile, _("duplicated definition for mapping `%s'"),
2678 ctype->mapnames[mapidx]);
2679 lr_ignore_rest (ldfile, 0);
2680 break;
2682 ctype->tomap_done[mapidx] = 1;
2684 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2685 while (now->tok != tok_eol && now->tok != tok_eof)
2687 struct charseq *from_seq;
2688 uint32_t from_wch;
2689 struct charseq *to_seq;
2690 uint32_t to_wch;
2692 /* Every pair starts with an opening brace. */
2693 if (now->tok != tok_open_brace)
2694 goto err_label;
2696 /* Next comes the from-value. */
2697 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2698 if (get_character (now, charmap, repertoire, &from_seq,
2699 &from_wch) != 0)
2700 goto err_label;
2702 /* The next is a comma. */
2703 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2704 if (now->tok != tok_comma)
2705 goto err_label;
2707 /* And the other value. */
2708 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2709 if (get_character (now, charmap, repertoire, &to_seq,
2710 &to_wch) != 0)
2711 goto err_label;
2713 /* And the last thing is the closing brace. */
2714 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2715 if (now->tok != tok_close_brace)
2716 goto err_label;
2718 if (!ignore_content)
2720 /* Check whether the mapping converts from an ASCII value
2721 to a non-ASCII value. */
2722 if (from_seq != NULL && from_seq->nbytes == 1
2723 && isascii (from_seq->bytes[0])
2724 && to_seq != NULL && (to_seq->nbytes != 1
2725 || !isascii (to_seq->bytes[0])))
2726 ctype->to_nonascii = 1;
2728 if (mapidx < 2 && from_seq != NULL && to_seq != NULL
2729 && from_seq->nbytes == 1 && to_seq->nbytes == 1)
2730 /* We can use this value. */
2731 ctype->map256_collection[mapidx][from_seq->bytes[0]]
2732 = to_seq->bytes[0];
2734 if (from_wch != ILLEGAL_CHAR_VALUE
2735 && to_wch != ILLEGAL_CHAR_VALUE)
2736 /* Both correct values. */
2737 *find_idx (ctype, &ctype->map_collection[mapidx],
2738 &ctype->map_collection_max[mapidx],
2739 &ctype->map_collection_act[mapidx],
2740 from_wch) = to_wch;
2743 /* Now comes a semicolon or the end of the line/file. */
2744 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2745 if (now->tok == tok_semicolon)
2746 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2748 break;
2750 case tok_translit_start:
2751 /* Ignore the entire translit section with its peculiar syntax
2752 if we don't need the input. */
2753 if (ignore_content)
2757 lr_ignore_rest (ldfile, 0);
2758 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2760 while (now->tok != tok_translit_end && now->tok != tok_eof);
2762 if (now->tok == tok_eof)
2763 lr_error (ldfile, _(\
2764 "%s: `translit_start' section does not end with `translit_end'"),
2765 "LC_CTYPE");
2767 break;
2770 /* The rest of the line better should be empty. */
2771 lr_ignore_rest (ldfile, 1);
2773 /* We count here the number of allocated entries in the `translit'
2774 array. */
2775 cnt = 0;
2777 ldfile->translate_strings = 1;
2778 ldfile->return_widestr = 1;
2780 /* We proceed until we see the `translit_end' token. */
2781 while (now = lr_token (ldfile, charmap, NULL, repertoire, verbose),
2782 now->tok != tok_translit_end && now->tok != tok_eof)
2784 if (now->tok == tok_eol)
2785 /* Ignore empty lines. */
2786 continue;
2788 if (now->tok == tok_include)
2790 /* We have to include locale. */
2791 const char *locale_name;
2792 const char *repertoire_name;
2793 struct translit_include_t *include_stmt, **include_ptr;
2795 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2796 /* This should be a string or an identifier. In any
2797 case something to name a locale. */
2798 if (now->tok != tok_string && now->tok != tok_ident)
2800 translit_syntax:
2801 lr_error (ldfile, _("%s: syntax error"), "LC_CTYPE");
2802 lr_ignore_rest (ldfile, 0);
2803 continue;
2805 locale_name = now->val.str.startmb;
2807 /* Next should be a semicolon. */
2808 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2809 if (now->tok != tok_semicolon)
2810 goto translit_syntax;
2812 /* Now the repertoire name. */
2813 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2814 if ((now->tok != tok_string && now->tok != tok_ident)
2815 || now->val.str.startmb == NULL)
2816 goto translit_syntax;
2817 repertoire_name = now->val.str.startmb;
2818 if (repertoire_name[0] == '\0')
2819 /* Ignore the empty string. */
2820 repertoire_name = NULL;
2822 /* Save the include statement for later processing. */
2823 include_stmt = (struct translit_include_t *)
2824 xmalloc (sizeof (struct translit_include_t));
2825 include_stmt->copy_locale = locale_name;
2826 include_stmt->copy_repertoire = repertoire_name;
2827 include_stmt->next = NULL;
2829 include_ptr = &ctype->translit_include;
2830 while (*include_ptr != NULL)
2831 include_ptr = &(*include_ptr)->next;
2832 *include_ptr = include_stmt;
2834 /* The rest of the line must be empty. */
2835 lr_ignore_rest (ldfile, 1);
2837 /* Make sure the locale is read. */
2838 add_to_readlist (LC_CTYPE, locale_name, repertoire_name,
2839 1, NULL);
2840 continue;
2842 else if (now->tok == tok_default_missing)
2844 uint32_t *wstr;
2846 while (1)
2848 /* We expect a single character or string as the
2849 argument. */
2850 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2851 wstr = read_widestring (ldfile, now, charmap,
2852 repertoire);
2854 if (wstr != NULL)
2856 if (ctype->default_missing != NULL)
2858 lr_error (ldfile, _("\
2859 %s: duplicate `default_missing' definition"), "LC_CTYPE");
2860 WITH_CUR_LOCALE (error_at_line (0, 0,
2861 ctype->default_missing_file,
2862 ctype->default_missing_lineno,
2863 _("\
2864 previous definition was here")));
2866 else
2868 ctype->default_missing = wstr;
2869 ctype->default_missing_file = ldfile->fname;
2870 ctype->default_missing_lineno = ldfile->lineno;
2872 /* We can have more entries, ignore them. */
2873 lr_ignore_rest (ldfile, 0);
2874 break;
2876 else if (wstr == (uint32_t *) -1l)
2877 /* This was an syntax error. */
2878 break;
2880 /* Maybe there is another replacement we can use. */
2881 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2882 if (now->tok == tok_eol || now->tok == tok_eof)
2884 /* Nothing found. We tell the user. */
2885 lr_error (ldfile, _("\
2886 %s: no representable `default_missing' definition found"), "LC_CTYPE");
2887 break;
2889 if (now->tok != tok_semicolon)
2890 goto translit_syntax;
2893 continue;
2895 else if (now->tok == tok_translit_ignore)
2897 read_translit_ignore_entry (ldfile, ctype, charmap,
2898 repertoire);
2899 continue;
2902 read_translit_entry (ldfile, ctype, now, charmap, repertoire);
2904 ldfile->return_widestr = 0;
2906 if (now->tok == tok_eof)
2907 lr_error (ldfile, _(\
2908 "%s: `translit_start' section does not end with `translit_end'"),
2909 "LC_CTYPE");
2911 break;
2913 case tok_ident:
2914 /* Ignore the rest of the line if we don't need the input of
2915 this line. */
2916 if (ignore_content)
2918 lr_ignore_rest (ldfile, 0);
2919 break;
2922 /* This could mean one of several things. First test whether
2923 it's a character class name. */
2924 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2925 if (strcmp (now->val.str.startmb, ctype->classnames[cnt]) == 0)
2926 break;
2927 if (cnt < ctype->nr_charclass)
2929 class_bit = _ISwbit (cnt);
2930 class256_bit = cnt <= 11 ? _ISbit (cnt) : 0;
2931 free (now->val.str.startmb);
2932 goto read_charclass;
2934 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
2935 if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2936 break;
2937 if (cnt < ctype->map_collection_nr)
2939 mapidx = cnt;
2940 free (now->val.str.startmb);
2941 goto read_mapping;
2943 #ifdef PREDEFINED_CLASSES
2944 if (strcmp (now->val.str.startmb, "special1") == 0)
2946 class_bit = _ISwspecial1;
2947 free (now->val.str.startmb);
2948 goto read_charclass;
2950 if (strcmp (now->val.str.startmb, "special2") == 0)
2952 class_bit = _ISwspecial2;
2953 free (now->val.str.startmb);
2954 goto read_charclass;
2956 if (strcmp (now->val.str.startmb, "special3") == 0)
2958 class_bit = _ISwspecial3;
2959 free (now->val.str.startmb);
2960 goto read_charclass;
2962 if (strcmp (now->val.str.startmb, "tosymmetric") == 0)
2964 mapidx = 2;
2965 goto read_mapping;
2967 #endif
2968 break;
2970 case tok_end:
2971 /* Next we assume `LC_CTYPE'. */
2972 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2973 if (now->tok == tok_eof)
2974 break;
2975 if (now->tok == tok_eol)
2976 lr_error (ldfile, _("%s: incomplete `END' line"),
2977 "LC_CTYPE");
2978 else if (now->tok != tok_lc_ctype)
2979 lr_error (ldfile, _("\
2980 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2981 lr_ignore_rest (ldfile, now->tok == tok_lc_ctype);
2982 return;
2984 default:
2985 err_label:
2986 if (now->tok != tok_eof)
2987 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2990 /* Prepare for the next round. */
2991 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2992 nowtok = now->tok;
2995 /* When we come here we reached the end of the file. */
2996 lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
3000 static void
3001 set_class_defaults (struct locale_ctype_t *ctype,
3002 const struct charmap_t *charmap,
3003 struct repertoire_t *repertoire)
3005 size_t cnt;
3007 /* These function defines the default values for the classes and conversions
3008 according to POSIX.2 2.5.2.1.
3009 It may seem that the order of these if-blocks is arbitrary but it is NOT.
3010 Don't move them unless you know what you do! */
3012 auto void set_default (int bitpos, int from, int to);
3014 void set_default (int bitpos, int from, int to)
3016 char tmp[2];
3017 int ch;
3018 int bit = _ISbit (bitpos);
3019 int bitw = _ISwbit (bitpos);
3020 /* Define string. */
3021 strcpy (tmp, "?");
3023 for (ch = from; ch <= to; ++ch)
3025 struct charseq *seq;
3026 tmp[0] = ch;
3028 seq = charmap_find_value (charmap, tmp, 1);
3029 if (seq == NULL)
3031 char buf[10];
3032 sprintf (buf, "U%08X", ch);
3033 seq = charmap_find_value (charmap, buf, 9);
3035 if (seq == NULL)
3037 if (!be_quiet)
3038 WITH_CUR_LOCALE (error (0, 0, _("\
3039 %s: character `%s' not defined in charmap while needed as default value"),
3040 "LC_CTYPE", tmp));
3042 else if (seq->nbytes != 1)
3043 WITH_CUR_LOCALE (error (0, 0, _("\
3044 %s: character `%s' in charmap not representable with one byte"),
3045 "LC_CTYPE", tmp));
3046 else
3047 ctype->class256_collection[seq->bytes[0]] |= bit;
3049 /* No need to search here, the ASCII value is also the Unicode
3050 value. */
3051 ELEM (ctype, class_collection, , ch) |= bitw;
3055 /* Set default values if keyword was not present. */
3056 if ((ctype->class_done & BITw (tok_upper)) == 0)
3057 /* "If this keyword [lower] is not specified, the lowercase letters
3058 `A' through `Z', ..., shall automatically belong to this class,
3059 with implementation defined character values." [P1003.2, 2.5.2.1] */
3060 set_default (BITPOS (tok_upper), 'A', 'Z');
3062 if ((ctype->class_done & BITw (tok_lower)) == 0)
3063 /* "If this keyword [lower] is not specified, the lowercase letters
3064 `a' through `z', ..., shall automatically belong to this class,
3065 with implementation defined character values." [P1003.2, 2.5.2.1] */
3066 set_default (BITPOS (tok_lower), 'a', 'z');
3068 if ((ctype->class_done & BITw (tok_alpha)) == 0)
3070 /* Table 2-6 in P1003.2 says that characters in class `upper' or
3071 class `lower' *must* be in class `alpha'. */
3072 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
3073 unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower);
3075 for (cnt = 0; cnt < 256; ++cnt)
3076 if ((ctype->class256_collection[cnt] & mask) != 0)
3077 ctype->class256_collection[cnt] |= BIT (tok_alpha);
3079 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3080 if ((ctype->class_collection[cnt] & maskw) != 0)
3081 ctype->class_collection[cnt] |= BITw (tok_alpha);
3084 if ((ctype->class_done & BITw (tok_digit)) == 0)
3085 /* "If this keyword [digit] is not specified, the digits `0' through
3086 `9', ..., shall automatically belong to this class, with
3087 implementation-defined character values." [P1003.2, 2.5.2.1] */
3088 set_default (BITPOS (tok_digit), '0', '9');
3090 /* "Only characters specified for the `alpha' and `digit' keyword
3091 shall be specified. Characters specified for the keyword `alpha'
3092 and `digit' are automatically included in this class. */
3094 unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
3095 unsigned long int maskw = BITw (tok_alpha) | BITw (tok_digit);
3097 for (cnt = 0; cnt < 256; ++cnt)
3098 if ((ctype->class256_collection[cnt] & mask) != 0)
3099 ctype->class256_collection[cnt] |= BIT (tok_alnum);
3101 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3102 if ((ctype->class_collection[cnt] & maskw) != 0)
3103 ctype->class_collection[cnt] |= BITw (tok_alnum);
3106 if ((ctype->class_done & BITw (tok_space)) == 0)
3107 /* "If this keyword [space] is not specified, the characters <space>,
3108 <form-feed>, <newline>, <carriage-return>, <tab>, and
3109 <vertical-tab>, ..., shall automatically belong to this class,
3110 with implementation-defined character values." [P1003.2, 2.5.2.1] */
3112 struct charseq *seq;
3114 seq = charmap_find_value (charmap, "space", 5);
3115 if (seq == NULL)
3116 seq = charmap_find_value (charmap, "SP", 2);
3117 if (seq == NULL)
3118 seq = charmap_find_value (charmap, "U00000020", 9);
3119 if (seq == NULL)
3121 if (!be_quiet)
3122 WITH_CUR_LOCALE (error (0, 0, _("\
3123 %s: character `%s' not defined while needed as default value"),
3124 "LC_CTYPE", "<space>"));
3126 else if (seq->nbytes != 1)
3127 WITH_CUR_LOCALE (error (0, 0, _("\
3128 %s: character `%s' in charmap not representable with one byte"),
3129 "LC_CTYPE", "<space>"));
3130 else
3131 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3133 /* No need to search. */
3134 ELEM (ctype, class_collection, , L' ') |= BITw (tok_space);
3136 seq = charmap_find_value (charmap, "form-feed", 9);
3137 if (seq == NULL)
3138 seq = charmap_find_value (charmap, "U0000000C", 9);
3139 if (seq == NULL)
3141 if (!be_quiet)
3142 WITH_CUR_LOCALE (error (0, 0, _("\
3143 %s: character `%s' not defined while needed as default value"),
3144 "LC_CTYPE", "<form-feed>"));
3146 else if (seq->nbytes != 1)
3147 WITH_CUR_LOCALE (error (0, 0, _("\
3148 %s: character `%s' in charmap not representable with one byte"),
3149 "LC_CTYPE", "<form-feed>"));
3150 else
3151 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3153 /* No need to search. */
3154 ELEM (ctype, class_collection, , L'\f') |= BITw (tok_space);
3157 seq = charmap_find_value (charmap, "newline", 7);
3158 if (seq == NULL)
3159 seq = charmap_find_value (charmap, "U0000000A", 9);
3160 if (seq == NULL)
3162 if (!be_quiet)
3163 WITH_CUR_LOCALE (error (0, 0, _("\
3164 character `%s' not defined while needed as default value"),
3165 "<newline>"));
3167 else if (seq->nbytes != 1)
3168 WITH_CUR_LOCALE (error (0, 0, _("\
3169 %s: character `%s' in charmap not representable with one byte"),
3170 "LC_CTYPE", "<newline>"));
3171 else
3172 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3174 /* No need to search. */
3175 ELEM (ctype, class_collection, , L'\n') |= BITw (tok_space);
3178 seq = charmap_find_value (charmap, "carriage-return", 15);
3179 if (seq == NULL)
3180 seq = charmap_find_value (charmap, "U0000000D", 9);
3181 if (seq == NULL)
3183 if (!be_quiet)
3184 WITH_CUR_LOCALE (error (0, 0, _("\
3185 %s: character `%s' not defined while needed as default value"),
3186 "LC_CTYPE", "<carriage-return>"));
3188 else if (seq->nbytes != 1)
3189 WITH_CUR_LOCALE (error (0, 0, _("\
3190 %s: character `%s' in charmap not representable with one byte"),
3191 "LC_CTYPE", "<carriage-return>"));
3192 else
3193 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3195 /* No need to search. */
3196 ELEM (ctype, class_collection, , L'\r') |= BITw (tok_space);
3199 seq = charmap_find_value (charmap, "tab", 3);
3200 if (seq == NULL)
3201 seq = charmap_find_value (charmap, "U00000009", 9);
3202 if (seq == NULL)
3204 if (!be_quiet)
3205 WITH_CUR_LOCALE (error (0, 0, _("\
3206 %s: character `%s' not defined while needed as default value"),
3207 "LC_CTYPE", "<tab>"));
3209 else if (seq->nbytes != 1)
3210 WITH_CUR_LOCALE (error (0, 0, _("\
3211 %s: character `%s' in charmap not representable with one byte"),
3212 "LC_CTYPE", "<tab>"));
3213 else
3214 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3216 /* No need to search. */
3217 ELEM (ctype, class_collection, , L'\t') |= BITw (tok_space);
3220 seq = charmap_find_value (charmap, "vertical-tab", 12);
3221 if (seq == NULL)
3222 seq = charmap_find_value (charmap, "U0000000B", 9);
3223 if (seq == NULL)
3225 if (!be_quiet)
3226 WITH_CUR_LOCALE (error (0, 0, _("\
3227 %s: character `%s' not defined while needed as default value"),
3228 "LC_CTYPE", "<vertical-tab>"));
3230 else if (seq->nbytes != 1)
3231 WITH_CUR_LOCALE (error (0, 0, _("\
3232 %s: character `%s' in charmap not representable with one byte"),
3233 "LC_CTYPE", "<vertical-tab>"));
3234 else
3235 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3237 /* No need to search. */
3238 ELEM (ctype, class_collection, , L'\v') |= BITw (tok_space);
3241 if ((ctype->class_done & BITw (tok_xdigit)) == 0)
3242 /* "If this keyword is not specified, the digits `0' to `9', the
3243 uppercase letters `A' through `F', and the lowercase letters `a'
3244 through `f', ..., shell automatically belong to this class, with
3245 implementation defined character values." [P1003.2, 2.5.2.1] */
3247 set_default (BITPOS (tok_xdigit), '0', '9');
3248 set_default (BITPOS (tok_xdigit), 'A', 'F');
3249 set_default (BITPOS (tok_xdigit), 'a', 'f');
3252 if ((ctype->class_done & BITw (tok_blank)) == 0)
3253 /* "If this keyword [blank] is unspecified, the characters <space> and
3254 <tab> shall belong to this character class." [P1003.2, 2.5.2.1] */
3256 struct charseq *seq;
3258 seq = charmap_find_value (charmap, "space", 5);
3259 if (seq == NULL)
3260 seq = charmap_find_value (charmap, "SP", 2);
3261 if (seq == NULL)
3262 seq = charmap_find_value (charmap, "U00000020", 9);
3263 if (seq == NULL)
3265 if (!be_quiet)
3266 WITH_CUR_LOCALE (error (0, 0, _("\
3267 %s: character `%s' not defined while needed as default value"),
3268 "LC_CTYPE", "<space>"));
3270 else if (seq->nbytes != 1)
3271 WITH_CUR_LOCALE (error (0, 0, _("\
3272 %s: character `%s' in charmap not representable with one byte"),
3273 "LC_CTYPE", "<space>"));
3274 else
3275 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
3277 /* No need to search. */
3278 ELEM (ctype, class_collection, , L' ') |= BITw (tok_blank);
3281 seq = charmap_find_value (charmap, "tab", 3);
3282 if (seq == NULL)
3283 seq = charmap_find_value (charmap, "U00000009", 9);
3284 if (seq == NULL)
3286 if (!be_quiet)
3287 WITH_CUR_LOCALE (error (0, 0, _("\
3288 %s: character `%s' not defined while needed as default value"),
3289 "LC_CTYPE", "<tab>"));
3291 else if (seq->nbytes != 1)
3292 WITH_CUR_LOCALE (error (0, 0, _("\
3293 %s: character `%s' in charmap not representable with one byte"),
3294 "LC_CTYPE", "<tab>"));
3295 else
3296 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
3298 /* No need to search. */
3299 ELEM (ctype, class_collection, , L'\t') |= BITw (tok_blank);
3302 if ((ctype->class_done & BITw (tok_graph)) == 0)
3303 /* "If this keyword [graph] is not specified, characters specified for
3304 the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
3305 shall belong to this character class." [P1003.2, 2.5.2.1] */
3307 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
3308 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
3309 unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
3310 BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
3311 BITw (tok_punct);
3312 size_t cnt;
3314 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3315 if ((ctype->class_collection[cnt] & maskw) != 0)
3316 ctype->class_collection[cnt] |= BITw (tok_graph);
3318 for (cnt = 0; cnt < 256; ++cnt)
3319 if ((ctype->class256_collection[cnt] & mask) != 0)
3320 ctype->class256_collection[cnt] |= BIT (tok_graph);
3323 if ((ctype->class_done & BITw (tok_print)) == 0)
3324 /* "If this keyword [print] is not provided, characters specified for
3325 the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
3326 and the <space> character shall belong to this character class."
3327 [P1003.2, 2.5.2.1] */
3329 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
3330 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
3331 unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
3332 BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
3333 BITw (tok_punct);
3334 size_t cnt;
3335 struct charseq *seq;
3337 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3338 if ((ctype->class_collection[cnt] & maskw) != 0)
3339 ctype->class_collection[cnt] |= BITw (tok_print);
3341 for (cnt = 0; cnt < 256; ++cnt)
3342 if ((ctype->class256_collection[cnt] & mask) != 0)
3343 ctype->class256_collection[cnt] |= BIT (tok_print);
3346 seq = charmap_find_value (charmap, "space", 5);
3347 if (seq == NULL)
3348 seq = charmap_find_value (charmap, "SP", 2);
3349 if (seq == NULL)
3350 seq = charmap_find_value (charmap, "U00000020", 9);
3351 if (seq == NULL)
3353 if (!be_quiet)
3354 WITH_CUR_LOCALE (error (0, 0, _("\
3355 %s: character `%s' not defined while needed as default value"),
3356 "LC_CTYPE", "<space>"));
3358 else if (seq->nbytes != 1)
3359 WITH_CUR_LOCALE (error (0, 0, _("\
3360 %s: character `%s' in charmap not representable with one byte"),
3361 "LC_CTYPE", "<space>"));
3362 else
3363 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_print);
3365 /* No need to search. */
3366 ELEM (ctype, class_collection, , L' ') |= BITw (tok_print);
3369 if (ctype->tomap_done[0] == 0)
3370 /* "If this keyword [toupper] is not specified, the lowercase letters
3371 `a' through `z', and their corresponding uppercase letters `A' to
3372 `Z', ..., shall automatically be included, with implementation-
3373 defined character values." [P1003.2, 2.5.2.1] */
3375 char tmp[4];
3376 int ch;
3378 strcpy (tmp, "<?>");
3380 for (ch = 'a'; ch <= 'z'; ++ch)
3382 struct charseq *seq_from, *seq_to;
3384 tmp[1] = (char) ch;
3386 seq_from = charmap_find_value (charmap, &tmp[1], 1);
3387 if (seq_from == NULL)
3389 char buf[10];
3390 sprintf (buf, "U%08X", ch);
3391 seq_from = charmap_find_value (charmap, buf, 9);
3393 if (seq_from == NULL)
3395 if (!be_quiet)
3396 WITH_CUR_LOCALE (error (0, 0, _("\
3397 %s: character `%s' not defined while needed as default value"),
3398 "LC_CTYPE", tmp));
3400 else if (seq_from->nbytes != 1)
3402 if (!be_quiet)
3403 WITH_CUR_LOCALE (error (0, 0, _("\
3404 %s: character `%s' needed as default value not representable with one byte"),
3405 "LC_CTYPE", tmp));
3407 else
3409 /* This conversion is implementation defined. */
3410 tmp[1] = (char) (ch + ('A' - 'a'));
3411 seq_to = charmap_find_value (charmap, &tmp[1], 1);
3412 if (seq_to == NULL)
3414 char buf[10];
3415 sprintf (buf, "U%08X", ch + ('A' - 'a'));
3416 seq_to = charmap_find_value (charmap, buf, 9);
3418 if (seq_to == NULL)
3420 if (!be_quiet)
3421 WITH_CUR_LOCALE (error (0, 0, _("\
3422 %s: character `%s' not defined while needed as default value"),
3423 "LC_CTYPE", tmp));
3425 else if (seq_to->nbytes != 1)
3427 if (!be_quiet)
3428 WITH_CUR_LOCALE (error (0, 0, _("\
3429 %s: character `%s' needed as default value not representable with one byte"),
3430 "LC_CTYPE", tmp));
3432 else
3433 /* The index [0] is determined by the order of the
3434 `ctype_map_newP' calls in `ctype_startup'. */
3435 ctype->map256_collection[0][seq_from->bytes[0]]
3436 = seq_to->bytes[0];
3439 /* No need to search. */
3440 ELEM (ctype, map_collection, [0], ch) = ch + ('A' - 'a');
3444 if (ctype->tomap_done[1] == 0)
3445 /* "If this keyword [tolower] is not specified, the mapping shall be
3446 the reverse mapping of the one specified to `toupper'." [P1003.2] */
3448 for (cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
3449 if (ctype->map_collection[0][cnt] != 0)
3450 ELEM (ctype, map_collection, [1],
3451 ctype->map_collection[0][cnt])
3452 = ctype->charnames[cnt];
3454 for (cnt = 0; cnt < 256; ++cnt)
3455 if (ctype->map256_collection[0][cnt] != 0)
3456 ctype->map256_collection[1][ctype->map256_collection[0][cnt]] = cnt;
3459 if (ctype->outdigits_act != 10)
3461 if (ctype->outdigits_act != 0)
3462 WITH_CUR_LOCALE (error (0, 0, _("\
3463 %s: field `%s' does not contain exactly ten entries"),
3464 "LC_CTYPE", "outdigit"));
3466 for (cnt = ctype->outdigits_act; cnt < 10; ++cnt)
3468 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3469 digits + cnt, 1);
3471 if (ctype->mboutdigits[cnt] == NULL)
3472 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3473 longnames[cnt],
3474 strlen (longnames[cnt]));
3476 if (ctype->mboutdigits[cnt] == NULL)
3477 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3478 uninames[cnt], 9);
3480 if (ctype->mboutdigits[cnt] == NULL)
3482 /* Provide a replacement. */
3483 WITH_CUR_LOCALE (error (0, 0, _("\
3484 no output digits defined and none of the standard names in the charmap")));
3486 ctype->mboutdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
3487 sizeof (struct charseq)
3488 + 1);
3490 /* This is better than nothing. */
3491 ctype->mboutdigits[cnt]->bytes[0] = digits[cnt];
3492 ctype->mboutdigits[cnt]->nbytes = 1;
3495 ctype->wcoutdigits[cnt] = L'0' + cnt;
3498 ctype->outdigits_act = 10;
3503 /* Construction of sparse 3-level tables.
3504 See wchar-lookup.h for their structure and the meaning of p and q. */
3506 struct wctype_table
3508 /* Parameters. */
3509 unsigned int p;
3510 unsigned int q;
3511 /* Working representation. */
3512 size_t level1_alloc;
3513 size_t level1_size;
3514 uint32_t *level1;
3515 size_t level2_alloc;
3516 size_t level2_size;
3517 uint32_t *level2;
3518 size_t level3_alloc;
3519 size_t level3_size;
3520 uint32_t *level3;
3521 /* Compressed representation. */
3522 size_t result_size;
3523 char *result;
3526 /* Initialize. Assumes t->p and t->q have already been set. */
3527 static inline void
3528 wctype_table_init (struct wctype_table *t)
3530 t->level1 = NULL;
3531 t->level1_alloc = t->level1_size = 0;
3532 t->level2 = NULL;
3533 t->level2_alloc = t->level2_size = 0;
3534 t->level3 = NULL;
3535 t->level3_alloc = t->level3_size = 0;
3538 /* Retrieve an entry. */
3539 static inline int
3540 wctype_table_get (struct wctype_table *t, uint32_t wc)
3542 uint32_t index1 = wc >> (t->q + t->p + 5);
3543 if (index1 < t->level1_size)
3545 uint32_t lookup1 = t->level1[index1];
3546 if (lookup1 != EMPTY)
3548 uint32_t index2 = ((wc >> (t->p + 5)) & ((1 << t->q) - 1))
3549 + (lookup1 << t->q);
3550 uint32_t lookup2 = t->level2[index2];
3551 if (lookup2 != EMPTY)
3553 uint32_t index3 = ((wc >> 5) & ((1 << t->p) - 1))
3554 + (lookup2 << t->p);
3555 uint32_t lookup3 = t->level3[index3];
3556 uint32_t index4 = wc & 0x1f;
3558 return (lookup3 >> index4) & 1;
3562 return 0;
3565 /* Add one entry. */
3566 static void
3567 wctype_table_add (struct wctype_table *t, uint32_t wc)
3569 uint32_t index1 = wc >> (t->q + t->p + 5);
3570 uint32_t index2 = (wc >> (t->p + 5)) & ((1 << t->q) - 1);
3571 uint32_t index3 = (wc >> 5) & ((1 << t->p) - 1);
3572 uint32_t index4 = wc & 0x1f;
3573 size_t i, i1, i2;
3575 if (index1 >= t->level1_size)
3577 if (index1 >= t->level1_alloc)
3579 size_t alloc = 2 * t->level1_alloc;
3580 if (alloc <= index1)
3581 alloc = index1 + 1;
3582 t->level1 = (uint32_t *) xrealloc ((char *) t->level1,
3583 alloc * sizeof (uint32_t));
3584 t->level1_alloc = alloc;
3586 while (index1 >= t->level1_size)
3587 t->level1[t->level1_size++] = EMPTY;
3590 if (t->level1[index1] == EMPTY)
3592 if (t->level2_size == t->level2_alloc)
3594 size_t alloc = 2 * t->level2_alloc + 1;
3595 t->level2 = (uint32_t *) xrealloc ((char *) t->level2,
3596 (alloc << t->q) * sizeof (uint32_t));
3597 t->level2_alloc = alloc;
3599 i1 = t->level2_size << t->q;
3600 i2 = (t->level2_size + 1) << t->q;
3601 for (i = i1; i < i2; i++)
3602 t->level2[i] = EMPTY;
3603 t->level1[index1] = t->level2_size++;
3606 index2 += t->level1[index1] << t->q;
3608 if (t->level2[index2] == EMPTY)
3610 if (t->level3_size == t->level3_alloc)
3612 size_t alloc = 2 * t->level3_alloc + 1;
3613 t->level3 = (uint32_t *) xrealloc ((char *) t->level3,
3614 (alloc << t->p) * sizeof (uint32_t));
3615 t->level3_alloc = alloc;
3617 i1 = t->level3_size << t->p;
3618 i2 = (t->level3_size + 1) << t->p;
3619 for (i = i1; i < i2; i++)
3620 t->level3[i] = 0;
3621 t->level2[index2] = t->level3_size++;
3624 index3 += t->level2[index2] << t->p;
3626 t->level3[index3] |= (uint32_t)1 << index4;
3629 /* Finalize and shrink. */
3630 static void
3631 wctype_table_finalize (struct wctype_table *t)
3633 size_t i, j, k;
3634 uint32_t reorder3[t->level3_size];
3635 uint32_t reorder2[t->level2_size];
3636 uint32_t level1_offset, level2_offset, level3_offset;
3638 /* Uniquify level3 blocks. */
3639 k = 0;
3640 for (j = 0; j < t->level3_size; j++)
3642 for (i = 0; i < k; i++)
3643 if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p],
3644 (1 << t->p) * sizeof (uint32_t)) == 0)
3645 break;
3646 /* Relocate block j to block i. */
3647 reorder3[j] = i;
3648 if (i == k)
3650 if (i != j)
3651 memcpy (&t->level3[i << t->p], &t->level3[j << t->p],
3652 (1 << t->p) * sizeof (uint32_t));
3653 k++;
3656 t->level3_size = k;
3658 for (i = 0; i < (t->level2_size << t->q); i++)
3659 if (t->level2[i] != EMPTY)
3660 t->level2[i] = reorder3[t->level2[i]];
3662 /* Uniquify level2 blocks. */
3663 k = 0;
3664 for (j = 0; j < t->level2_size; j++)
3666 for (i = 0; i < k; i++)
3667 if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q],
3668 (1 << t->q) * sizeof (uint32_t)) == 0)
3669 break;
3670 /* Relocate block j to block i. */
3671 reorder2[j] = i;
3672 if (i == k)
3674 if (i != j)
3675 memcpy (&t->level2[i << t->q], &t->level2[j << t->q],
3676 (1 << t->q) * sizeof (uint32_t));
3677 k++;
3680 t->level2_size = k;
3682 for (i = 0; i < t->level1_size; i++)
3683 if (t->level1[i] != EMPTY)
3684 t->level1[i] = reorder2[t->level1[i]];
3686 /* Create and fill the resulting compressed representation. */
3687 t->result_size =
3688 5 * sizeof (uint32_t)
3689 + t->level1_size * sizeof (uint32_t)
3690 + (t->level2_size << t->q) * sizeof (uint32_t)
3691 + (t->level3_size << t->p) * sizeof (uint32_t);
3692 t->result = (char *) xmalloc (t->result_size);
3694 level1_offset =
3695 5 * sizeof (uint32_t);
3696 level2_offset =
3697 5 * sizeof (uint32_t)
3698 + t->level1_size * sizeof (uint32_t);
3699 level3_offset =
3700 5 * sizeof (uint32_t)
3701 + t->level1_size * sizeof (uint32_t)
3702 + (t->level2_size << t->q) * sizeof (uint32_t);
3704 ((uint32_t *) t->result)[0] = t->q + t->p + 5;
3705 ((uint32_t *) t->result)[1] = t->level1_size;
3706 ((uint32_t *) t->result)[2] = t->p + 5;
3707 ((uint32_t *) t->result)[3] = (1 << t->q) - 1;
3708 ((uint32_t *) t->result)[4] = (1 << t->p) - 1;
3710 for (i = 0; i < t->level1_size; i++)
3711 ((uint32_t *) (t->result + level1_offset))[i] =
3712 (t->level1[i] == EMPTY
3714 : (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset);
3716 for (i = 0; i < (t->level2_size << t->q); i++)
3717 ((uint32_t *) (t->result + level2_offset))[i] =
3718 (t->level2[i] == EMPTY
3720 : (t->level2[i] << t->p) * sizeof (uint32_t) + level3_offset);
3722 for (i = 0; i < (t->level3_size << t->p); i++)
3723 ((uint32_t *) (t->result + level3_offset))[i] = t->level3[i];
3725 if (t->level1_alloc > 0)
3726 free (t->level1);
3727 if (t->level2_alloc > 0)
3728 free (t->level2);
3729 if (t->level3_alloc > 0)
3730 free (t->level3);
3733 #define TABLE wcwidth_table
3734 #define ELEMENT uint8_t
3735 #define DEFAULT 0xff
3736 #include "3level.h"
3738 #define TABLE wctrans_table
3739 #define ELEMENT int32_t
3740 #define DEFAULT 0
3741 #define wctrans_table_add wctrans_table_add_internal
3742 #include "3level.h"
3743 #undef wctrans_table_add
3744 /* The wctrans_table must actually store the difference between the
3745 desired result and the argument. */
3746 static inline void
3747 wctrans_table_add (struct wctrans_table *t, uint32_t wc, uint32_t mapped_wc)
3749 wctrans_table_add_internal (t, wc, mapped_wc - wc);
3753 /* Flattens the included transliterations into a translit list.
3754 Inserts them in the list at `cursor', and returns the new cursor. */
3755 static struct translit_t **
3756 translit_flatten (struct locale_ctype_t *ctype,
3757 const struct charmap_t *charmap,
3758 struct translit_t **cursor)
3760 while (ctype->translit_include != NULL)
3762 const char *copy_locale = ctype->translit_include->copy_locale;
3763 const char *copy_repertoire = ctype->translit_include->copy_repertoire;
3764 struct localedef_t *other;
3766 /* Unchain the include statement. During the depth-first traversal
3767 we don't want to visit any locale more than once. */
3768 ctype->translit_include = ctype->translit_include->next;
3770 other = find_locale (LC_CTYPE, copy_locale, copy_repertoire, charmap);
3772 if (other == NULL || other->categories[LC_CTYPE].ctype == NULL)
3774 WITH_CUR_LOCALE (error (0, 0, _("\
3775 %s: transliteration data from locale `%s' not available"),
3776 "LC_CTYPE", copy_locale));
3778 else
3780 struct locale_ctype_t *other_ctype =
3781 other->categories[LC_CTYPE].ctype;
3783 cursor = translit_flatten (other_ctype, charmap, cursor);
3784 assert (other_ctype->translit_include == NULL);
3786 if (other_ctype->translit != NULL)
3788 /* Insert the other_ctype->translit list at *cursor. */
3789 struct translit_t *endp = other_ctype->translit;
3790 while (endp->next != NULL)
3791 endp = endp->next;
3793 endp->next = *cursor;
3794 *cursor = other_ctype->translit;
3796 /* Avoid any risk of circular lists. */
3797 other_ctype->translit = NULL;
3799 cursor = &endp->next;
3802 if (ctype->default_missing == NULL)
3803 ctype->default_missing = other_ctype->default_missing;
3807 return cursor;
3810 static void
3811 allocate_arrays (struct locale_ctype_t *ctype, const struct charmap_t *charmap,
3812 struct repertoire_t *repertoire)
3814 size_t idx, nr;
3815 const void *key;
3816 size_t len;
3817 void *vdata;
3818 void *curs;
3820 /* You wonder about this amount of memory? This is only because some
3821 users do not manage to address the array with unsigned values or
3822 data types with range >= 256. '\200' would result in the array
3823 index -128. To help these poor people we duplicate the entries for
3824 128 up to 255 below the entry for \0. */
3825 ctype->ctype_b = (char_class_t *) xcalloc (256 + 128, sizeof (char_class_t));
3826 ctype->ctype32_b = (char_class32_t *) xcalloc (256, sizeof (char_class32_t));
3827 ctype->class_b = (uint32_t **)
3828 xmalloc (ctype->nr_charclass * sizeof (uint32_t *));
3829 ctype->class_3level = (struct iovec *)
3830 xmalloc (ctype->nr_charclass * sizeof (struct iovec));
3832 /* This is the array accessed using the multibyte string elements. */
3833 for (idx = 0; idx < 256; ++idx)
3834 ctype->ctype_b[128 + idx] = ctype->class256_collection[idx];
3836 /* Mirror first 127 entries. We must take care that entry -1 is not
3837 mirrored because EOF == -1. */
3838 for (idx = 0; idx < 127; ++idx)
3839 ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
3841 /* The 32 bit array contains all characters < 0x100. */
3842 for (idx = 0; idx < ctype->class_collection_act; ++idx)
3843 if (ctype->charnames[idx] < 0x100)
3844 ctype->ctype32_b[ctype->charnames[idx]] = ctype->class_collection[idx];
3846 for (nr = 0; nr < ctype->nr_charclass; nr++)
3848 ctype->class_b[nr] = (uint32_t *) xcalloc (256 / 32, sizeof (uint32_t));
3850 /* We only set CLASS_B for the bits in the ISO C classes, not
3851 the user defined classes. The number should not change but
3852 who knows. */
3853 #define LAST_ISO_C_BIT 11
3854 if (nr <= LAST_ISO_C_BIT)
3855 for (idx = 0; idx < 256; ++idx)
3856 if (ctype->class256_collection[idx] & _ISbit (nr))
3857 ctype->class_b[nr][idx >> 5] |= (uint32_t) 1 << (idx & 0x1f);
3860 for (nr = 0; nr < ctype->nr_charclass; nr++)
3862 struct wctype_table t;
3864 t.p = 4; /* or: 5 */
3865 t.q = 7; /* or: 6 */
3866 wctype_table_init (&t);
3868 for (idx = 0; idx < ctype->class_collection_act; ++idx)
3869 if (ctype->class_collection[idx] & _ISwbit (nr))
3870 wctype_table_add (&t, ctype->charnames[idx]);
3872 wctype_table_finalize (&t);
3874 if (verbose)
3875 WITH_CUR_LOCALE (fprintf (stderr, _("\
3876 %s: table for class \"%s\": %lu bytes\n"),
3877 "LC_CTYPE", ctype->classnames[nr],
3878 (unsigned long int) t.result_size));
3880 ctype->class_3level[nr].iov_base = t.result;
3881 ctype->class_3level[nr].iov_len = t.result_size;
3884 /* Room for table of mappings. */
3885 ctype->map_b = (uint32_t **) xmalloc (2 * sizeof (uint32_t *));
3886 ctype->map32_b = (uint32_t **) xmalloc (ctype->map_collection_nr
3887 * sizeof (uint32_t *));
3888 ctype->map_3level = (struct iovec *)
3889 xmalloc (ctype->map_collection_nr * sizeof (struct iovec));
3891 /* Fill in all mappings. */
3892 for (idx = 0; idx < 2; ++idx)
3894 unsigned int idx2;
3896 /* Allocate table. */
3897 ctype->map_b[idx] = (uint32_t *)
3898 xmalloc ((256 + 128) * sizeof (uint32_t));
3900 /* Copy values from collection. */
3901 for (idx2 = 0; idx2 < 256; ++idx2)
3902 ctype->map_b[idx][128 + idx2] = ctype->map256_collection[idx][idx2];
3904 /* Mirror first 127 entries. We must take care not to map entry
3905 -1 because EOF == -1. */
3906 for (idx2 = 0; idx2 < 127; ++idx2)
3907 ctype->map_b[idx][idx2] = ctype->map_b[idx][256 + idx2];
3909 /* EOF must map to EOF. */
3910 ctype->map_b[idx][127] = EOF;
3913 for (idx = 0; idx < ctype->map_collection_nr; ++idx)
3915 unsigned int idx2;
3917 /* Allocate table. */
3918 ctype->map32_b[idx] = (uint32_t *) xmalloc (256 * sizeof (uint32_t));
3920 /* Copy values from collection. Default is identity mapping. */
3921 for (idx2 = 0; idx2 < 256; ++idx2)
3922 ctype->map32_b[idx][idx2] =
3923 (ctype->map_collection[idx][idx2] != 0
3924 ? ctype->map_collection[idx][idx2]
3925 : idx2);
3928 for (nr = 0; nr < ctype->map_collection_nr; nr++)
3930 struct wctrans_table t;
3932 t.p = 7;
3933 t.q = 9;
3934 wctrans_table_init (&t);
3936 for (idx = 0; idx < ctype->map_collection_act[nr]; ++idx)
3937 if (ctype->map_collection[nr][idx] != 0)
3938 wctrans_table_add (&t, ctype->charnames[idx],
3939 ctype->map_collection[nr][idx]);
3941 wctrans_table_finalize (&t);
3943 if (verbose)
3944 WITH_CUR_LOCALE (fprintf (stderr, _("\
3945 %s: table for map \"%s\": %lu bytes\n"),
3946 "LC_CTYPE", ctype->mapnames[nr],
3947 (unsigned long int) t.result_size));
3949 ctype->map_3level[nr].iov_base = t.result;
3950 ctype->map_3level[nr].iov_len = t.result_size;
3953 /* Extra array for class and map names. */
3954 ctype->class_name_ptr = (uint32_t *) xmalloc (ctype->nr_charclass
3955 * sizeof (uint32_t));
3956 ctype->map_name_ptr = (uint32_t *) xmalloc (ctype->map_collection_nr
3957 * sizeof (uint32_t));
3959 ctype->class_offset = _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
3960 ctype->map_offset = ctype->class_offset + ctype->nr_charclass;
3962 /* Array for width information. Because the expected widths are very
3963 small (never larger than 2) we use only one single byte. This
3964 saves space.
3965 We put only printable characters in the table. wcwidth is specified
3966 to return -1 for non-printable characters. Doing the check here
3967 saves a run-time check.
3968 But we put L'\0' in the table. This again saves a run-time check. */
3970 struct wcwidth_table t;
3972 t.p = 7;
3973 t.q = 9;
3974 wcwidth_table_init (&t);
3976 /* First set all the printable characters of the character set to
3977 the default width. */
3978 curs = NULL;
3979 while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
3981 struct charseq *data = (struct charseq *) vdata;
3983 if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
3984 data->ucs4 = repertoire_find_value (ctype->repertoire,
3985 data->name, len);
3987 if (data->ucs4 != ILLEGAL_CHAR_VALUE)
3989 uint32_t *class_bits =
3990 find_idx (ctype, &ctype->class_collection, NULL,
3991 &ctype->class_collection_act, data->ucs4);
3993 if (class_bits != NULL && (*class_bits & BITw (tok_print)))
3994 wcwidth_table_add (&t, data->ucs4, charmap->width_default);
3998 /* Now add the explicitly specified widths. */
3999 if (charmap->width_rules != NULL)
4001 size_t cnt;
4003 for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
4005 unsigned char bytes[charmap->mb_cur_max];
4006 int nbytes = charmap->width_rules[cnt].from->nbytes;
4008 /* We have the range of character for which the width is
4009 specified described using byte sequences of the multibyte
4010 charset. We have to convert this to UCS4 now. And we
4011 cannot simply convert the beginning and the end of the
4012 sequence, we have to iterate over the byte sequence and
4013 convert it for every single character. */
4014 memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
4016 while (nbytes < charmap->width_rules[cnt].to->nbytes
4017 || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
4018 nbytes) <= 0)
4020 /* Find the UCS value for `bytes'. */
4021 int inner;
4022 uint32_t wch;
4023 struct charseq *seq =
4024 charmap_find_symbol (charmap, bytes, nbytes);
4026 if (seq == NULL)
4027 wch = ILLEGAL_CHAR_VALUE;
4028 else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
4029 wch = seq->ucs4;
4030 else
4031 wch = repertoire_find_value (ctype->repertoire, seq->name,
4032 strlen (seq->name));
4034 if (wch != ILLEGAL_CHAR_VALUE)
4036 /* Store the value. */
4037 uint32_t *class_bits =
4038 find_idx (ctype, &ctype->class_collection, NULL,
4039 &ctype->class_collection_act, wch);
4041 if (class_bits != NULL && (*class_bits & BITw (tok_print)))
4042 wcwidth_table_add (&t, wch,
4043 charmap->width_rules[cnt].width);
4046 /* "Increment" the bytes sequence. */
4047 inner = nbytes - 1;
4048 while (inner >= 0 && bytes[inner] == 0xff)
4049 --inner;
4051 if (inner < 0)
4053 /* We have to extend the byte sequence. */
4054 if (nbytes >= charmap->width_rules[cnt].to->nbytes)
4055 break;
4057 bytes[0] = 1;
4058 memset (&bytes[1], 0, nbytes);
4059 ++nbytes;
4061 else
4063 ++bytes[inner];
4064 while (++inner < nbytes)
4065 bytes[inner] = 0;
4071 /* Set the width of L'\0' to 0. */
4072 wcwidth_table_add (&t, 0, 0);
4074 wcwidth_table_finalize (&t);
4076 if (verbose)
4077 WITH_CUR_LOCALE (fprintf (stderr, _("%s: table for width: %lu bytes\n"),
4078 "LC_CTYPE", (unsigned long int) t.result_size));
4080 ctype->width.iov_base = t.result;
4081 ctype->width.iov_len = t.result_size;
4084 /* Set MB_CUR_MAX. */
4085 ctype->mb_cur_max = charmap->mb_cur_max;
4087 /* Now determine the table for the transliteration information.
4089 XXX It is not yet clear to me whether it is worth implementing a
4090 complicated algorithm which uses a hash table to locate the entries.
4091 For now I'll use a simple array which can be searching using binary
4092 search. */
4093 if (ctype->translit_include != NULL)
4094 /* Traverse the locales mentioned in the `include' statements in a
4095 depth-first way and fold in their transliteration information. */
4096 translit_flatten (ctype, charmap, &ctype->translit);
4098 if (ctype->translit != NULL)
4100 /* First count how many entries we have. This is the upper limit
4101 since some entries from the included files might be overwritten. */
4102 size_t number = 0;
4103 size_t cnt;
4104 struct translit_t *runp = ctype->translit;
4105 struct translit_t **sorted;
4106 size_t from_len, to_len;
4108 while (runp != NULL)
4110 ++number;
4111 runp = runp->next;
4114 /* Next we allocate an array large enough and fill in the values. */
4115 sorted = (struct translit_t **) alloca (number
4116 * sizeof (struct translit_t **));
4117 runp = ctype->translit;
4118 number = 0;
4121 /* Search for the place where to insert this string.
4122 XXX Better use a real sorting algorithm later. */
4123 size_t idx = 0;
4124 int replace = 0;
4126 while (idx < number)
4128 int res = wcscmp ((const wchar_t *) sorted[idx]->from,
4129 (const wchar_t *) runp->from);
4130 if (res == 0)
4132 replace = 1;
4133 break;
4135 if (res > 0)
4136 break;
4137 ++idx;
4140 if (replace)
4141 sorted[idx] = runp;
4142 else
4144 memmove (&sorted[idx + 1], &sorted[idx],
4145 (number - idx) * sizeof (struct translit_t *));
4146 sorted[idx] = runp;
4147 ++number;
4150 runp = runp->next;
4152 while (runp != NULL);
4154 /* The next step is putting all the possible transliteration
4155 strings in one memory block so that we can write it out.
4156 We need several different blocks:
4157 - index to the from-string array
4158 - from-string array
4159 - index to the to-string array
4160 - to-string array.
4162 from_len = to_len = 0;
4163 for (cnt = 0; cnt < number; ++cnt)
4165 struct translit_to_t *srunp;
4166 from_len += wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
4167 srunp = sorted[cnt]->to;
4168 while (srunp != NULL)
4170 to_len += wcslen ((const wchar_t *) srunp->str) + 1;
4171 srunp = srunp->next;
4173 /* Plus one for the extra NUL character marking the end of
4174 the list for the current entry. */
4175 ++to_len;
4178 /* We can allocate the arrays for the results. */
4179 ctype->translit_from_idx = xmalloc (number * sizeof (uint32_t));
4180 ctype->translit_from_tbl = xmalloc (from_len * sizeof (uint32_t));
4181 ctype->translit_to_idx = xmalloc (number * sizeof (uint32_t));
4182 ctype->translit_to_tbl = xmalloc (to_len * sizeof (uint32_t));
4184 from_len = 0;
4185 to_len = 0;
4186 for (cnt = 0; cnt < number; ++cnt)
4188 size_t len;
4189 struct translit_to_t *srunp;
4191 ctype->translit_from_idx[cnt] = from_len;
4192 ctype->translit_to_idx[cnt] = to_len;
4194 len = wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
4195 wmemcpy ((wchar_t *) &ctype->translit_from_tbl[from_len],
4196 (const wchar_t *) sorted[cnt]->from, len);
4197 from_len += len;
4199 ctype->translit_to_idx[cnt] = to_len;
4200 srunp = sorted[cnt]->to;
4201 while (srunp != NULL)
4203 len = wcslen ((const wchar_t *) srunp->str) + 1;
4204 wmemcpy ((wchar_t *) &ctype->translit_to_tbl[to_len],
4205 (const wchar_t *) srunp->str, len);
4206 to_len += len;
4207 srunp = srunp->next;
4209 ctype->translit_to_tbl[to_len++] = L'\0';
4212 /* Store the information about the length. */
4213 ctype->translit_idx_size = number;
4214 ctype->translit_from_tbl_size = from_len * sizeof (uint32_t);
4215 ctype->translit_to_tbl_size = to_len * sizeof (uint32_t);
4217 else
4219 /* Provide some dummy pointers since we have nothing to write out. */
4220 static uint32_t no_str = { 0 };
4222 ctype->translit_from_idx = &no_str;
4223 ctype->translit_from_tbl = &no_str;
4224 ctype->translit_to_tbl = &no_str;
4225 ctype->translit_idx_size = 0;
4226 ctype->translit_from_tbl_size = 0;
4227 ctype->translit_to_tbl_size = 0;