* include/features.h: Grok _ATFILE_SOURCE and define __USE_ATFILE when
[glibc.git] / locale / programs / ld-ctype.c
blobb0b2e3f805327435e80c31ea300cb7e18a7f5cb2
1 /* Copyright (C) 1995-2002, 2003, 2004, 2005 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License version 2 as
7 published by the Free Software Foundation.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 #ifdef HAVE_CONFIG_H
19 # include <config.h>
20 #endif
22 #include <alloca.h>
23 #include <byteswap.h>
24 #include <endian.h>
25 #include <errno.h>
26 #include <limits.h>
27 #include <obstack.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <wchar.h>
31 #include <wctype.h>
32 #include <sys/uio.h>
34 #include "localedef.h"
35 #include "charmap.h"
36 #include "localeinfo.h"
37 #include "langinfo.h"
38 #include "linereader.h"
39 #include "locfile-token.h"
40 #include "locfile.h"
42 #include <assert.h>
45 #ifdef PREDEFINED_CLASSES
46 /* These are the extra bits not in wctype.h since these are not preallocated
47 classes. */
48 # define _ISwspecial1 (1 << 29)
49 # define _ISwspecial2 (1 << 30)
50 # define _ISwspecial3 (1 << 31)
51 #endif
54 /* The bit used for representing a special class. */
55 #define BITPOS(class) ((class) - tok_upper)
56 #define BIT(class) (_ISbit (BITPOS (class)))
57 #define BITw(class) (_ISwbit (BITPOS (class)))
59 #define ELEM(ctype, collection, idx, value) \
60 *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \
61 &ctype->collection##_act idx, value)
64 /* To be compatible with former implementations we for now restrict
65 the number of bits for character classes to 16. When compatibility
66 is not necessary anymore increase the number to 32. */
67 #define char_class_t uint16_t
68 #define char_class32_t uint32_t
71 /* Type to describe a transliteration action. We have a possibly
72 multiple character from-string and a set of multiple character
73 to-strings. All are 32bit values since this is what is used in
74 the gconv functions. */
75 struct translit_to_t
77 uint32_t *str;
79 struct translit_to_t *next;
82 struct translit_t
84 uint32_t *from;
86 const char *fname;
87 size_t lineno;
89 struct translit_to_t *to;
91 struct translit_t *next;
94 struct translit_ignore_t
96 uint32_t from;
97 uint32_t to;
98 uint32_t step;
100 const char *fname;
101 size_t lineno;
103 struct translit_ignore_t *next;
107 /* Type to describe a transliteration include statement. */
108 struct translit_include_t
110 const char *copy_locale;
111 const char *copy_repertoire;
113 struct translit_include_t *next;
117 /* Sparse table of uint32_t. */
118 #define TABLE idx_table
119 #define ELEMENT uint32_t
120 #define DEFAULT ((uint32_t) ~0)
121 #define NO_FINALIZE
122 #include "3level.h"
125 /* The real definition of the struct for the LC_CTYPE locale. */
126 struct locale_ctype_t
128 uint32_t *charnames;
129 size_t charnames_max;
130 size_t charnames_act;
131 /* An index lookup table, to speedup find_idx. */
132 struct idx_table charnames_idx;
134 struct repertoire_t *repertoire;
136 /* We will allow up to 8 * sizeof (uint32_t) character classes. */
137 #define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
138 size_t nr_charclass;
139 const char *classnames[MAX_NR_CHARCLASS];
140 uint32_t last_class_char;
141 uint32_t class256_collection[256];
142 uint32_t *class_collection;
143 size_t class_collection_max;
144 size_t class_collection_act;
145 uint32_t class_done;
146 uint32_t class_offset;
148 struct charseq **mbdigits;
149 size_t mbdigits_act;
150 size_t mbdigits_max;
151 uint32_t *wcdigits;
152 size_t wcdigits_act;
153 size_t wcdigits_max;
155 struct charseq *mboutdigits[10];
156 uint32_t wcoutdigits[10];
157 size_t outdigits_act;
159 /* If the following number ever turns out to be too small simply
160 increase it. But I doubt it will. --drepper@gnu */
161 #define MAX_NR_CHARMAP 16
162 const char *mapnames[MAX_NR_CHARMAP];
163 uint32_t *map_collection[MAX_NR_CHARMAP];
164 uint32_t map256_collection[2][256];
165 size_t map_collection_max[MAX_NR_CHARMAP];
166 size_t map_collection_act[MAX_NR_CHARMAP];
167 size_t map_collection_nr;
168 size_t last_map_idx;
169 int tomap_done[MAX_NR_CHARMAP];
170 uint32_t map_offset;
172 /* Transliteration information. */
173 struct translit_include_t *translit_include;
174 struct translit_t *translit;
175 struct translit_ignore_t *translit_ignore;
176 uint32_t ntranslit_ignore;
178 uint32_t *default_missing;
179 const char *default_missing_file;
180 size_t default_missing_lineno;
182 uint32_t to_nonascii;
184 /* The arrays for the binary representation. */
185 char_class_t *ctype_b;
186 char_class32_t *ctype32_b;
187 uint32_t **map_b;
188 uint32_t **map32_b;
189 uint32_t **class_b;
190 struct iovec *class_3level;
191 struct iovec *map_3level;
192 uint32_t *class_name_ptr;
193 uint32_t *map_name_ptr;
194 struct iovec width;
195 uint32_t mb_cur_max;
196 const char *codeset_name;
197 uint32_t *translit_from_idx;
198 uint32_t *translit_from_tbl;
199 uint32_t *translit_to_idx;
200 uint32_t *translit_to_tbl;
201 uint32_t translit_idx_size;
202 size_t translit_from_tbl_size;
203 size_t translit_to_tbl_size;
205 struct obstack mempool;
209 /* Marker for an empty slot. This has the value 0xFFFFFFFF, regardless
210 whether 'int' is 16 bit, 32 bit, or 64 bit. */
211 #define EMPTY ((uint32_t) ~0)
214 #define obstack_chunk_alloc xmalloc
215 #define obstack_chunk_free free
218 /* Prototypes for local functions. */
219 static void ctype_startup (struct linereader *lr, struct localedef_t *locale,
220 const struct charmap_t *charmap,
221 struct localedef_t *copy_locale,
222 int ignore_content);
223 static void ctype_class_new (struct linereader *lr,
224 struct locale_ctype_t *ctype, const char *name);
225 static void ctype_map_new (struct linereader *lr,
226 struct locale_ctype_t *ctype,
227 const char *name, const struct charmap_t *charmap);
228 static uint32_t *find_idx (struct locale_ctype_t *ctype, uint32_t **table,
229 size_t *max, size_t *act, unsigned int idx);
230 static void set_class_defaults (struct locale_ctype_t *ctype,
231 const struct charmap_t *charmap,
232 struct repertoire_t *repertoire);
233 static void allocate_arrays (struct locale_ctype_t *ctype,
234 const struct charmap_t *charmap,
235 struct repertoire_t *repertoire);
238 static const char *longnames[] =
240 "zero", "one", "two", "three", "four",
241 "five", "six", "seven", "eight", "nine"
243 static const char *uninames[] =
245 "U00000030", "U00000031", "U00000032", "U00000033", "U00000034",
246 "U00000035", "U00000036", "U00000037", "U00000038", "U00000039"
248 static const unsigned char digits[] = "0123456789";
251 static void
252 ctype_startup (struct linereader *lr, struct localedef_t *locale,
253 const struct charmap_t *charmap,
254 struct localedef_t *copy_locale, int ignore_content)
256 unsigned int cnt;
257 struct locale_ctype_t *ctype;
259 if (!ignore_content && locale->categories[LC_CTYPE].ctype == NULL)
261 if (copy_locale == NULL)
263 /* Allocate the needed room. */
264 locale->categories[LC_CTYPE].ctype = ctype =
265 (struct locale_ctype_t *) xcalloc (1,
266 sizeof (struct locale_ctype_t));
268 /* We have seen no names yet. */
269 ctype->charnames_max = charmap->mb_cur_max == 1 ? 256 : 512;
270 ctype->charnames =
271 (unsigned int *) xmalloc (ctype->charnames_max
272 * sizeof (unsigned int));
273 for (cnt = 0; cnt < 256; ++cnt)
274 ctype->charnames[cnt] = cnt;
275 ctype->charnames_act = 256;
276 idx_table_init (&ctype->charnames_idx);
278 /* Fill character class information. */
279 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
280 /* The order of the following instructions determines the bit
281 positions! */
282 ctype_class_new (lr, ctype, "upper");
283 ctype_class_new (lr, ctype, "lower");
284 ctype_class_new (lr, ctype, "alpha");
285 ctype_class_new (lr, ctype, "digit");
286 ctype_class_new (lr, ctype, "xdigit");
287 ctype_class_new (lr, ctype, "space");
288 ctype_class_new (lr, ctype, "print");
289 ctype_class_new (lr, ctype, "graph");
290 ctype_class_new (lr, ctype, "blank");
291 ctype_class_new (lr, ctype, "cntrl");
292 ctype_class_new (lr, ctype, "punct");
293 ctype_class_new (lr, ctype, "alnum");
294 #ifdef PREDEFINED_CLASSES
295 /* The following are extensions from ISO 14652. */
296 ctype_class_new (lr, ctype, "left_to_right");
297 ctype_class_new (lr, ctype, "right_to_left");
298 ctype_class_new (lr, ctype, "num_terminator");
299 ctype_class_new (lr, ctype, "num_separator");
300 ctype_class_new (lr, ctype, "segment_separator");
301 ctype_class_new (lr, ctype, "block_separator");
302 ctype_class_new (lr, ctype, "direction_control");
303 ctype_class_new (lr, ctype, "sym_swap_layout");
304 ctype_class_new (lr, ctype, "char_shape_selector");
305 ctype_class_new (lr, ctype, "num_shape_selector");
306 ctype_class_new (lr, ctype, "non_spacing");
307 ctype_class_new (lr, ctype, "non_spacing_level3");
308 ctype_class_new (lr, ctype, "normal_connect");
309 ctype_class_new (lr, ctype, "r_connect");
310 ctype_class_new (lr, ctype, "no_connect");
311 ctype_class_new (lr, ctype, "no_connect-space");
312 ctype_class_new (lr, ctype, "vowel_connect");
313 #endif
315 ctype->class_collection_max = charmap->mb_cur_max == 1 ? 256 : 512;
316 ctype->class_collection
317 = (uint32_t *) xcalloc (sizeof (unsigned long int),
318 ctype->class_collection_max);
319 ctype->class_collection_act = 256;
321 /* Fill character map information. */
322 ctype->last_map_idx = MAX_NR_CHARMAP;
323 ctype_map_new (lr, ctype, "toupper", charmap);
324 ctype_map_new (lr, ctype, "tolower", charmap);
325 #ifdef PREDEFINED_CLASSES
326 ctype_map_new (lr, ctype, "tosymmetric", charmap);
327 #endif
329 /* Fill first 256 entries in `toXXX' arrays. */
330 for (cnt = 0; cnt < 256; ++cnt)
332 ctype->map_collection[0][cnt] = cnt;
333 ctype->map_collection[1][cnt] = cnt;
334 #ifdef PREDEFINED_CLASSES
335 ctype->map_collection[2][cnt] = cnt;
336 #endif
337 ctype->map256_collection[0][cnt] = cnt;
338 ctype->map256_collection[1][cnt] = cnt;
341 if (enc_not_ascii_compatible)
342 ctype->to_nonascii = 1;
344 obstack_init (&ctype->mempool);
346 else
347 ctype = locale->categories[LC_CTYPE].ctype =
348 copy_locale->categories[LC_CTYPE].ctype;
353 void
354 ctype_finish (struct localedef_t *locale, const struct charmap_t *charmap)
356 /* See POSIX.2, table 2-6 for the meaning of the following table. */
357 #define NCLASS 12
358 static const struct
360 const char *name;
361 const char allow[NCLASS];
363 valid_table[NCLASS] =
365 /* The order is important. See token.h for more information.
366 M = Always, D = Default, - = Permitted, X = Mutually exclusive */
367 { "upper", "--MX-XDDXXX-" },
368 { "lower", "--MX-XDDXXX-" },
369 { "alpha", "---X-XDDXXX-" },
370 { "digit", "XXX--XDDXXX-" },
371 { "xdigit", "-----XDDXXX-" },
372 { "space", "XXXXX------X" },
373 { "print", "---------X--" },
374 { "graph", "---------X--" },
375 { "blank", "XXXXXM-----X" },
376 { "cntrl", "XXXXX-XX--XX" },
377 { "punct", "XXXXX-DD-X-X" },
378 { "alnum", "-----XDDXXX-" }
380 size_t cnt;
381 int cls1, cls2;
382 uint32_t space_value;
383 struct charseq *space_seq;
384 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
385 int warned;
386 const void *key;
387 size_t len;
388 void *vdata;
389 void *curs;
391 /* Now resolve copying and also handle completely missing definitions. */
392 if (ctype == NULL)
394 const char *repertoire_name;
396 /* First see whether we were supposed to copy. If yes, find the
397 actual definition. */
398 if (locale->copy_name[LC_CTYPE] != NULL)
400 /* Find the copying locale. This has to happen transitively since
401 the locale we are copying from might also copying another one. */
402 struct localedef_t *from = locale;
405 from = find_locale (LC_CTYPE, from->copy_name[LC_CTYPE],
406 from->repertoire_name, charmap);
407 while (from->categories[LC_CTYPE].ctype == NULL
408 && from->copy_name[LC_CTYPE] != NULL);
410 ctype = locale->categories[LC_CTYPE].ctype
411 = from->categories[LC_CTYPE].ctype;
414 /* If there is still no definition issue an warning and create an
415 empty one. */
416 if (ctype == NULL)
418 if (! be_quiet)
419 WITH_CUR_LOCALE (error (0, 0, _("\
420 No definition for %s category found"), "LC_CTYPE"));
421 ctype_startup (NULL, locale, charmap, NULL, 0);
422 ctype = locale->categories[LC_CTYPE].ctype;
425 /* Get the repertoire we have to use. */
426 repertoire_name = locale->repertoire_name ?: repertoire_global;
427 if (repertoire_name != NULL)
428 ctype->repertoire = repertoire_read (repertoire_name);
431 /* We need the name of the currently used 8-bit character set to
432 make correct conversion between this 8-bit representation and the
433 ISO 10646 character set used internally for wide characters. */
434 ctype->codeset_name = charmap->code_set_name;
435 if (ctype->codeset_name == NULL)
437 if (! be_quiet)
438 WITH_CUR_LOCALE (error (0, 0, _("\
439 No character set name specified in charmap")));
440 ctype->codeset_name = "//UNKNOWN//";
443 /* Set default value for classes not specified. */
444 set_class_defaults (ctype, charmap, ctype->repertoire);
446 /* Check according to table. */
447 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
449 uint32_t tmp = ctype->class_collection[cnt];
451 if (tmp != 0)
453 for (cls1 = 0; cls1 < NCLASS; ++cls1)
454 if ((tmp & _ISwbit (cls1)) != 0)
455 for (cls2 = 0; cls2 < NCLASS; ++cls2)
456 if (valid_table[cls1].allow[cls2] != '-')
458 int eq = (tmp & _ISwbit (cls2)) != 0;
459 switch (valid_table[cls1].allow[cls2])
461 case 'M':
462 if (!eq)
464 uint32_t value = ctype->charnames[cnt];
466 if (!be_quiet)
467 WITH_CUR_LOCALE (error (0, 0, _("\
468 character L'\\u%0*x' in class `%s' must be in class `%s'"),
469 value > 0xffff ? 8 : 4,
470 value,
471 valid_table[cls1].name,
472 valid_table[cls2].name));
474 break;
476 case 'X':
477 if (eq)
479 uint32_t value = ctype->charnames[cnt];
481 if (!be_quiet)
482 WITH_CUR_LOCALE (error (0, 0, _("\
483 character L'\\u%0*x' in class `%s' must not be in class `%s'"),
484 value > 0xffff ? 8 : 4,
485 value,
486 valid_table[cls1].name,
487 valid_table[cls2].name));
489 break;
491 case 'D':
492 ctype->class_collection[cnt] |= _ISwbit (cls2);
493 break;
495 default:
496 WITH_CUR_LOCALE (error (5, 0, _("\
497 internal error in %s, line %u"), __FUNCTION__, __LINE__));
503 for (cnt = 0; cnt < 256; ++cnt)
505 uint32_t tmp = ctype->class256_collection[cnt];
507 if (tmp != 0)
509 for (cls1 = 0; cls1 < NCLASS; ++cls1)
510 if ((tmp & _ISbit (cls1)) != 0)
511 for (cls2 = 0; cls2 < NCLASS; ++cls2)
512 if (valid_table[cls1].allow[cls2] != '-')
514 int eq = (tmp & _ISbit (cls2)) != 0;
515 switch (valid_table[cls1].allow[cls2])
517 case 'M':
518 if (!eq)
520 char buf[17];
522 snprintf (buf, sizeof buf, "\\%Zo", cnt);
524 if (!be_quiet)
525 WITH_CUR_LOCALE (error (0, 0, _("\
526 character '%s' in class `%s' must be in class `%s'"),
527 buf,
528 valid_table[cls1].name,
529 valid_table[cls2].name));
531 break;
533 case 'X':
534 if (eq)
536 char buf[17];
538 snprintf (buf, sizeof buf, "\\%Zo", cnt);
540 if (!be_quiet)
541 WITH_CUR_LOCALE (error (0, 0, _("\
542 character '%s' in class `%s' must not be in class `%s'"),
543 buf,
544 valid_table[cls1].name,
545 valid_table[cls2].name));
547 break;
549 case 'D':
550 ctype->class256_collection[cnt] |= _ISbit (cls2);
551 break;
553 default:
554 WITH_CUR_LOCALE (error (5, 0, _("\
555 internal error in %s, line %u"), __FUNCTION__, __LINE__));
561 /* ... and now test <SP> as a special case. */
562 space_value = 32;
563 if (((cnt = BITPOS (tok_space),
564 (ELEM (ctype, class_collection, , space_value)
565 & BITw (tok_space)) == 0)
566 || (cnt = BITPOS (tok_blank),
567 (ELEM (ctype, class_collection, , space_value)
568 & BITw (tok_blank)) == 0)))
570 if (!be_quiet)
571 WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"),
572 valid_table[cnt].name));
574 else if (((cnt = BITPOS (tok_punct),
575 (ELEM (ctype, class_collection, , space_value)
576 & BITw (tok_punct)) != 0)
577 || (cnt = BITPOS (tok_graph),
578 (ELEM (ctype, class_collection, , space_value)
579 & BITw (tok_graph))
580 != 0)))
582 if (!be_quiet)
583 WITH_CUR_LOCALE (error (0, 0, _("\
584 <SP> character must not be in class `%s'"),
585 valid_table[cnt].name));
587 else
588 ELEM (ctype, class_collection, , space_value) |= BITw (tok_print);
590 space_seq = charmap_find_value (charmap, "SP", 2);
591 if (space_seq == NULL)
592 space_seq = charmap_find_value (charmap, "space", 5);
593 if (space_seq == NULL)
594 space_seq = charmap_find_value (charmap, "U00000020", 9);
595 if (space_seq == NULL || space_seq->nbytes != 1)
597 if (!be_quiet)
598 WITH_CUR_LOCALE (error (0, 0, _("\
599 character <SP> not defined in character map")));
601 else if (((cnt = BITPOS (tok_space),
602 (ctype->class256_collection[space_seq->bytes[0]]
603 & BIT (tok_space)) == 0)
604 || (cnt = BITPOS (tok_blank),
605 (ctype->class256_collection[space_seq->bytes[0]]
606 & BIT (tok_blank)) == 0)))
608 if (!be_quiet)
609 WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"),
610 valid_table[cnt].name));
612 else if (((cnt = BITPOS (tok_punct),
613 (ctype->class256_collection[space_seq->bytes[0]]
614 & BIT (tok_punct)) != 0)
615 || (cnt = BITPOS (tok_graph),
616 (ctype->class256_collection[space_seq->bytes[0]]
617 & BIT (tok_graph)) != 0)))
619 if (!be_quiet)
620 WITH_CUR_LOCALE (error (0, 0, _("\
621 <SP> character must not be in class `%s'"),
622 valid_table[cnt].name));
624 else
625 ctype->class256_collection[space_seq->bytes[0]] |= BIT (tok_print);
627 /* Now that the tests are done make sure the name array contains all
628 characters which are handled in the WIDTH section of the
629 character set definition file. */
630 if (charmap->width_rules != NULL)
631 for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
633 unsigned char bytes[charmap->mb_cur_max];
634 int nbytes = charmap->width_rules[cnt].from->nbytes;
636 /* We have the range of character for which the width is
637 specified described using byte sequences of the multibyte
638 charset. We have to convert this to UCS4 now. And we
639 cannot simply convert the beginning and the end of the
640 sequence, we have to iterate over the byte sequence and
641 convert it for every single character. */
642 memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
644 while (nbytes < charmap->width_rules[cnt].to->nbytes
645 || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
646 nbytes) <= 0)
648 /* Find the UCS value for `bytes'. */
649 int inner;
650 uint32_t wch;
651 struct charseq *seq = charmap_find_symbol (charmap, bytes, nbytes);
653 if (seq == NULL)
654 wch = ILLEGAL_CHAR_VALUE;
655 else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
656 wch = seq->ucs4;
657 else
658 wch = repertoire_find_value (ctype->repertoire, seq->name,
659 strlen (seq->name));
661 if (wch != ILLEGAL_CHAR_VALUE)
662 /* We are only interested in the side-effects of the
663 `find_idx' call. It will add appropriate entries in
664 the name array if this is necessary. */
665 (void) find_idx (ctype, NULL, NULL, NULL, wch);
667 /* "Increment" the bytes sequence. */
668 inner = nbytes - 1;
669 while (inner >= 0 && bytes[inner] == 0xff)
670 --inner;
672 if (inner < 0)
674 /* We have to extend the byte sequence. */
675 if (nbytes >= charmap->width_rules[cnt].to->nbytes)
676 break;
678 bytes[0] = 1;
679 memset (&bytes[1], 0, nbytes);
680 ++nbytes;
682 else
684 ++bytes[inner];
685 while (++inner < nbytes)
686 bytes[inner] = 0;
691 /* Now set all the other characters of the character set to the
692 default width. */
693 curs = NULL;
694 while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
696 struct charseq *data = (struct charseq *) vdata;
698 if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
699 data->ucs4 = repertoire_find_value (ctype->repertoire,
700 data->name, len);
702 if (data->ucs4 != ILLEGAL_CHAR_VALUE)
703 (void) find_idx (ctype, NULL, NULL, NULL, data->ucs4);
706 /* There must be a multiple of 10 digits. */
707 if (ctype->mbdigits_act % 10 != 0)
709 assert (ctype->mbdigits_act == ctype->wcdigits_act);
710 ctype->wcdigits_act -= ctype->mbdigits_act % 10;
711 ctype->mbdigits_act -= ctype->mbdigits_act % 10;
712 WITH_CUR_LOCALE (error (0, 0, _("\
713 `digit' category has not entries in groups of ten")));
716 /* Check the input digits. There must be a multiple of ten available.
717 In each group it could be that one or the other character is missing.
718 In this case the whole group must be removed. */
719 cnt = 0;
720 while (cnt < ctype->mbdigits_act)
722 size_t inner;
723 for (inner = 0; inner < 10; ++inner)
724 if (ctype->mbdigits[cnt + inner] == NULL)
725 break;
727 if (inner == 10)
728 cnt += 10;
729 else
731 /* Remove the group. */
732 memmove (&ctype->mbdigits[cnt], &ctype->mbdigits[cnt + 10],
733 ((ctype->wcdigits_act - cnt - 10)
734 * sizeof (ctype->mbdigits[0])));
735 ctype->mbdigits_act -= 10;
739 /* If no input digits are given use the default. */
740 if (ctype->mbdigits_act == 0)
742 if (ctype->mbdigits_max == 0)
744 ctype->mbdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
745 10 * sizeof (struct charseq *));
746 ctype->mbdigits_max = 10;
749 for (cnt = 0; cnt < 10; ++cnt)
751 ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
752 digits + cnt, 1);
753 if (ctype->mbdigits[cnt] == NULL)
755 ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
756 longnames[cnt],
757 strlen (longnames[cnt]));
758 if (ctype->mbdigits[cnt] == NULL)
760 /* Hum, this ain't good. */
761 WITH_CUR_LOCALE (error (0, 0, _("\
762 no input digits defined and none of the standard names in the charmap")));
764 ctype->mbdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
765 sizeof (struct charseq) + 1);
767 /* This is better than nothing. */
768 ctype->mbdigits[cnt]->bytes[0] = digits[cnt];
769 ctype->mbdigits[cnt]->nbytes = 1;
774 ctype->mbdigits_act = 10;
777 /* Check the wide character input digits. There must be a multiple
778 of ten available. In each group it could be that one or the other
779 character is missing. In this case the whole group must be
780 removed. */
781 cnt = 0;
782 while (cnt < ctype->wcdigits_act)
784 size_t inner;
785 for (inner = 0; inner < 10; ++inner)
786 if (ctype->wcdigits[cnt + inner] == ILLEGAL_CHAR_VALUE)
787 break;
789 if (inner == 10)
790 cnt += 10;
791 else
793 /* Remove the group. */
794 memmove (&ctype->wcdigits[cnt], &ctype->wcdigits[cnt + 10],
795 ((ctype->wcdigits_act - cnt - 10)
796 * sizeof (ctype->wcdigits[0])));
797 ctype->wcdigits_act -= 10;
801 /* If no input digits are given use the default. */
802 if (ctype->wcdigits_act == 0)
804 if (ctype->wcdigits_max == 0)
806 ctype->wcdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
807 10 * sizeof (uint32_t));
808 ctype->wcdigits_max = 10;
811 for (cnt = 0; cnt < 10; ++cnt)
812 ctype->wcdigits[cnt] = L'0' + cnt;
814 ctype->mbdigits_act = 10;
817 /* Check the outdigits. */
818 warned = 0;
819 for (cnt = 0; cnt < 10; ++cnt)
820 if (ctype->mboutdigits[cnt] == NULL)
822 static struct charseq replace[2];
824 if (!warned)
826 WITH_CUR_LOCALE (error (0, 0, _("\
827 not all characters used in `outdigit' are available in the charmap")));
828 warned = 1;
831 replace[0].nbytes = 1;
832 replace[0].bytes[0] = '?';
833 replace[0].bytes[1] = '\0';
834 ctype->mboutdigits[cnt] = &replace[0];
837 warned = 0;
838 for (cnt = 0; cnt < 10; ++cnt)
839 if (ctype->wcoutdigits[cnt] == 0)
841 if (!warned)
843 WITH_CUR_LOCALE (error (0, 0, _("\
844 not all characters used in `outdigit' are available in the repertoire")));
845 warned = 1;
848 ctype->wcoutdigits[cnt] = L'?';
851 /* Sort the entries in the translit_ignore list. */
852 if (ctype->translit_ignore != NULL)
854 struct translit_ignore_t *firstp = ctype->translit_ignore;
855 struct translit_ignore_t *runp;
857 ctype->ntranslit_ignore = 1;
859 for (runp = firstp->next; runp != NULL; runp = runp->next)
861 struct translit_ignore_t *lastp = NULL;
862 struct translit_ignore_t *cmpp;
864 ++ctype->ntranslit_ignore;
866 for (cmpp = firstp; cmpp != NULL; lastp = cmpp, cmpp = cmpp->next)
867 if (runp->from < cmpp->from)
868 break;
870 runp->next = lastp;
871 if (lastp == NULL)
872 firstp = runp;
875 ctype->translit_ignore = firstp;
880 void
881 ctype_output (struct localedef_t *locale, const struct charmap_t *charmap,
882 const char *output_path)
884 static const char nulbytes[4] = { 0, 0, 0, 0 };
885 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
886 const size_t nelems = (_NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1)
887 + ctype->nr_charclass + ctype->map_collection_nr);
888 struct iovec *iov = alloca (sizeof *iov
889 * (2 + nelems + 2 * ctype->nr_charclass
890 + ctype->map_collection_nr + 4));
891 struct locale_file data;
892 uint32_t *idx = alloca (sizeof *idx * (nelems + 1));
893 uint32_t default_missing_len;
894 size_t elem, cnt, offset, total;
895 char *cp;
897 /* Now prepare the output: Find the sizes of the table we can use. */
898 allocate_arrays (ctype, charmap, ctype->repertoire);
900 data.magic = LIMAGIC (LC_CTYPE);
901 data.n = nelems;
902 iov[0].iov_base = (void *) &data;
903 iov[0].iov_len = sizeof (data);
905 iov[1].iov_base = (void *) idx;
906 iov[1].iov_len = nelems * sizeof (uint32_t);
908 idx[0] = iov[0].iov_len + iov[1].iov_len;
909 offset = 0;
911 for (elem = 0; elem < nelems; ++elem)
913 if (elem < _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1))
914 switch (elem)
916 #define CTYPE_EMPTY(name) \
917 case name: \
918 iov[2 + elem + offset].iov_base = NULL; \
919 iov[2 + elem + offset].iov_len = 0; \
920 idx[elem + 1] = idx[elem]; \
921 break
923 CTYPE_EMPTY(_NL_CTYPE_GAP1);
924 CTYPE_EMPTY(_NL_CTYPE_GAP2);
925 CTYPE_EMPTY(_NL_CTYPE_GAP3);
926 CTYPE_EMPTY(_NL_CTYPE_GAP4);
927 CTYPE_EMPTY(_NL_CTYPE_GAP5);
928 CTYPE_EMPTY(_NL_CTYPE_GAP6);
930 #define CTYPE_DATA(name, base, len) \
931 case _NL_ITEM_INDEX (name): \
932 iov[2 + elem + offset].iov_base = (base); \
933 iov[2 + elem + offset].iov_len = (len); \
934 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; \
935 break
937 CTYPE_DATA (_NL_CTYPE_CLASS,
938 ctype->ctype_b,
939 (256 + 128) * sizeof (char_class_t));
941 CTYPE_DATA (_NL_CTYPE_TOUPPER,
942 ctype->map_b[0],
943 (256 + 128) * sizeof (uint32_t));
944 CTYPE_DATA (_NL_CTYPE_TOLOWER,
945 ctype->map_b[1],
946 (256 + 128) * sizeof (uint32_t));
948 CTYPE_DATA (_NL_CTYPE_TOUPPER32,
949 ctype->map32_b[0],
950 256 * sizeof (uint32_t));
951 CTYPE_DATA (_NL_CTYPE_TOLOWER32,
952 ctype->map32_b[1],
953 256 * sizeof (uint32_t));
955 CTYPE_DATA (_NL_CTYPE_CLASS32,
956 ctype->ctype32_b,
957 256 * sizeof (char_class32_t));
959 CTYPE_DATA (_NL_CTYPE_CLASS_OFFSET,
960 &ctype->class_offset, sizeof (uint32_t));
962 CTYPE_DATA (_NL_CTYPE_MAP_OFFSET,
963 &ctype->map_offset, sizeof (uint32_t));
965 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TAB_SIZE,
966 &ctype->translit_idx_size, sizeof (uint32_t));
968 CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_IDX,
969 ctype->translit_from_idx,
970 ctype->translit_idx_size * sizeof (uint32_t));
972 CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_TBL,
973 ctype->translit_from_tbl,
974 ctype->translit_from_tbl_size);
976 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_IDX,
977 ctype->translit_to_idx,
978 ctype->translit_idx_size * sizeof (uint32_t));
980 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_TBL,
981 ctype->translit_to_tbl, ctype->translit_to_tbl_size);
983 case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
984 /* The class name array. */
985 total = 0;
986 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt, ++offset)
988 iov[2 + elem + offset].iov_base
989 = (void *) ctype->classnames[cnt];
990 iov[2 + elem + offset].iov_len
991 = strlen (ctype->classnames[cnt]) + 1;
992 total += iov[2 + elem + offset].iov_len;
994 iov[2 + elem + offset].iov_base = (void *) nulbytes;
995 iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
996 total += 1 + (4 - ((total + 1) % 4));
998 idx[elem + 1] = idx[elem] + total;
999 break;
1001 case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
1002 /* The class name array. */
1003 total = 0;
1004 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt, ++offset)
1006 iov[2 + elem + offset].iov_base
1007 = (void *) ctype->mapnames[cnt];
1008 iov[2 + elem + offset].iov_len
1009 = strlen (ctype->mapnames[cnt]) + 1;
1010 total += iov[2 + elem + offset].iov_len;
1012 iov[2 + elem + offset].iov_base = (void *) nulbytes;
1013 iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
1014 total += 1 + (4 - ((total + 1) % 4));
1016 idx[elem + 1] = idx[elem] + total;
1017 break;
1019 CTYPE_DATA (_NL_CTYPE_WIDTH,
1020 ctype->width.iov_base,
1021 ctype->width.iov_len);
1023 CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX,
1024 &ctype->mb_cur_max, sizeof (uint32_t));
1026 case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
1027 total = strlen (ctype->codeset_name) + 1;
1028 if (total % 4 == 0)
1029 iov[2 + elem + offset].iov_base = (char *) ctype->codeset_name;
1030 else
1032 iov[2 + elem + offset].iov_base = alloca ((total + 3) & ~3);
1033 memset (mempcpy (iov[2 + elem + offset].iov_base,
1034 ctype->codeset_name, total),
1035 '\0', 4 - (total & 3));
1036 total = (total + 3) & ~3;
1038 iov[2 + elem + offset].iov_len = total;
1039 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1040 break;
1043 CTYPE_DATA (_NL_CTYPE_MAP_TO_NONASCII,
1044 &ctype->to_nonascii, sizeof (uint32_t));
1046 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN):
1047 iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
1048 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1049 *(uint32_t *) iov[2 + elem + offset].iov_base =
1050 ctype->mbdigits_act / 10;
1051 idx[elem + 1] = idx[elem] + sizeof (uint32_t);
1052 break;
1054 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN):
1055 /* Align entries. */
1056 iov[2 + elem + offset].iov_base = (void *) nulbytes;
1057 iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1058 idx[elem] += iov[2 + elem + offset].iov_len;
1059 ++offset;
1061 iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
1062 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1063 *(uint32_t *) iov[2 + elem + offset].iov_base =
1064 ctype->wcdigits_act / 10;
1065 idx[elem + 1] = idx[elem] + sizeof (uint32_t);
1066 break;
1068 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB):
1069 /* Compute the length of all possible characters. For INDIGITS
1070 there might be more than one. We simply concatenate all of
1071 them with a NUL byte following. The NUL byte wouldn't be
1072 necessary but it makes it easier for the user. */
1073 total = 0;
1075 for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
1076 cnt < ctype->mbdigits_act; cnt += 10)
1077 total += ctype->mbdigits[cnt]->nbytes + 1;
1078 iov[2 + elem + offset].iov_base = (char *) alloca (total);
1079 iov[2 + elem + offset].iov_len = total;
1081 cp = iov[2 + elem + offset].iov_base;
1082 for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
1083 cnt < ctype->mbdigits_act; cnt += 10)
1085 cp = mempcpy (cp, ctype->mbdigits[cnt]->bytes,
1086 ctype->mbdigits[cnt]->nbytes);
1087 *cp++ = '\0';
1089 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1090 break;
1092 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB):
1093 /* Compute the length of all possible characters. For INDIGITS
1094 there might be more than one. We simply concatenate all of
1095 them with a NUL byte following. The NUL byte wouldn't be
1096 necessary but it makes it easier for the user. */
1097 cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB);
1098 total = ctype->mboutdigits[cnt]->nbytes + 1;
1099 iov[2 + elem + offset].iov_base = (char *) alloca (total);
1100 iov[2 + elem + offset].iov_len = total;
1102 *(char *) mempcpy (iov[2 + elem + offset].iov_base,
1103 ctype->mboutdigits[cnt]->bytes,
1104 ctype->mboutdigits[cnt]->nbytes) = '\0';
1105 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1106 break;
1108 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC):
1109 total = ctype->wcdigits_act / 10;
1111 iov[2 + elem + offset].iov_base =
1112 (uint32_t *) alloca (total * sizeof (uint32_t));
1113 iov[2 + elem + offset].iov_len = total * sizeof (uint32_t);
1115 for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC);
1116 cnt < ctype->wcdigits_act; cnt += 10)
1117 ((uint32_t *) iov[2 + elem + offset].iov_base)[cnt / 10]
1118 = ctype->wcdigits[cnt];
1119 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1120 break;
1122 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC):
1123 /* Align entries. */
1124 iov[2 + elem + offset].iov_base = (void *) nulbytes;
1125 iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1126 idx[elem] += iov[2 + elem + offset].iov_len;
1127 ++offset;
1128 /* FALLTRHOUGH */
1130 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT1_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC):
1131 cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC);
1132 iov[2 + elem + offset].iov_base = &ctype->wcoutdigits[cnt];
1133 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1134 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1135 break;
1137 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN):
1138 /* Align entries. */
1139 iov[2 + elem + offset].iov_base = (void *) nulbytes;
1140 iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1141 idx[elem] += iov[2 + elem + offset].iov_len;
1142 ++offset;
1144 default_missing_len = (ctype->default_missing
1145 ? wcslen ((wchar_t *)ctype->default_missing)
1146 : 0);
1147 iov[2 + elem + offset].iov_base = &default_missing_len;
1148 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1149 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1150 break;
1152 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING):
1153 iov[2 + elem + offset].iov_base =
1154 ctype->default_missing ?: (uint32_t *) L"";
1155 iov[2 + elem + offset].iov_len =
1156 wcslen (iov[2 + elem + offset].iov_base);
1157 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1158 break;
1160 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE_LEN):
1161 /* Align entries. */
1162 iov[2 + elem + offset].iov_base = (void *) nulbytes;
1163 iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1164 idx[elem] += iov[2 + elem + offset].iov_len;
1165 ++offset;
1167 iov[2 + elem + offset].iov_base = &ctype->ntranslit_ignore;
1168 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1169 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1170 break;
1172 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE):
1174 uint32_t *ranges = (uint32_t *) alloca (ctype->ntranslit_ignore
1175 * 3 * sizeof (uint32_t));
1176 struct translit_ignore_t *runp;
1178 iov[2 + elem + offset].iov_base = ranges;
1179 iov[2 + elem + offset].iov_len = (ctype->ntranslit_ignore
1180 * 3 * sizeof (uint32_t));
1182 for (runp = ctype->translit_ignore; runp != NULL;
1183 runp = runp->next)
1185 *ranges++ = runp->from;
1186 *ranges++ = runp->to;
1187 *ranges++ = runp->step;
1190 /* Remove the following line in case a new entry is added
1191 after _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN. */
1192 if (elem < nelems)
1193 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1194 break;
1196 default:
1197 assert (! "unknown CTYPE element");
1199 else
1201 /* Handle extra maps. */
1202 size_t nr = elem - _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
1203 if (nr < ctype->nr_charclass)
1205 iov[2 + elem + offset].iov_base = ctype->class_b[nr];
1206 iov[2 + elem + offset].iov_len = 256 / 32 * sizeof (uint32_t);
1207 idx[elem] += iov[2 + elem + offset].iov_len;
1208 ++offset;
1210 iov[2 + elem + offset] = ctype->class_3level[nr];
1212 else
1214 nr -= ctype->nr_charclass;
1215 assert (nr < ctype->map_collection_nr);
1216 iov[2 + elem + offset] = ctype->map_3level[nr];
1218 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1222 assert (2 + elem + offset == (nelems + 2 * ctype->nr_charclass
1223 + ctype->map_collection_nr + 4 + 2));
1225 write_locale_data (output_path, LC_CTYPE, "LC_CTYPE", 2 + elem + offset,
1226 iov);
1230 /* Local functions. */
1231 static void
1232 ctype_class_new (struct linereader *lr, struct locale_ctype_t *ctype,
1233 const char *name)
1235 size_t cnt;
1237 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
1238 if (strcmp (ctype->classnames[cnt], name) == 0)
1239 break;
1241 if (cnt < ctype->nr_charclass)
1243 lr_error (lr, _("character class `%s' already defined"), name);
1244 return;
1247 if (ctype->nr_charclass == MAX_NR_CHARCLASS)
1248 /* Exit code 2 is prescribed in P1003.2b. */
1249 WITH_CUR_LOCALE (error (2, 0, _("\
1250 implementation limit: no more than %Zd character classes allowed"),
1251 MAX_NR_CHARCLASS));
1253 ctype->classnames[ctype->nr_charclass++] = name;
1257 static void
1258 ctype_map_new (struct linereader *lr, struct locale_ctype_t *ctype,
1259 const char *name, const struct charmap_t *charmap)
1261 size_t max_chars = 0;
1262 size_t cnt;
1264 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
1266 if (strcmp (ctype->mapnames[cnt], name) == 0)
1267 break;
1269 if (max_chars < ctype->map_collection_max[cnt])
1270 max_chars = ctype->map_collection_max[cnt];
1273 if (cnt < ctype->map_collection_nr)
1275 lr_error (lr, _("character map `%s' already defined"), name);
1276 return;
1279 if (ctype->map_collection_nr == MAX_NR_CHARMAP)
1280 /* Exit code 2 is prescribed in P1003.2b. */
1281 WITH_CUR_LOCALE (error (2, 0, _("\
1282 implementation limit: no more than %d character maps allowed"),
1283 MAX_NR_CHARMAP));
1285 ctype->mapnames[cnt] = name;
1287 if (max_chars == 0)
1288 ctype->map_collection_max[cnt] = charmap->mb_cur_max == 1 ? 256 : 512;
1289 else
1290 ctype->map_collection_max[cnt] = max_chars;
1292 ctype->map_collection[cnt] = (uint32_t *)
1293 xcalloc (sizeof (uint32_t), ctype->map_collection_max[cnt]);
1294 ctype->map_collection_act[cnt] = 256;
1296 ++ctype->map_collection_nr;
1300 /* We have to be prepared that TABLE, MAX, and ACT can be NULL. This
1301 is possible if we only want to extend the name array. */
1302 static uint32_t *
1303 find_idx (struct locale_ctype_t *ctype, uint32_t **table, size_t *max,
1304 size_t *act, uint32_t idx)
1306 size_t cnt;
1308 if (idx < 256)
1309 return table == NULL ? NULL : &(*table)[idx];
1311 /* Use the charnames_idx lookup table instead of the slow search loop. */
1312 #if 1
1313 cnt = idx_table_get (&ctype->charnames_idx, idx);
1314 if (cnt == EMPTY)
1315 /* Not found. */
1316 cnt = ctype->charnames_act;
1317 #else
1318 for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
1319 if (ctype->charnames[cnt] == idx)
1320 break;
1321 #endif
1323 /* We have to distinguish two cases: the name is found or not. */
1324 if (cnt == ctype->charnames_act)
1326 /* Extend the name array. */
1327 if (ctype->charnames_act == ctype->charnames_max)
1329 ctype->charnames_max *= 2;
1330 ctype->charnames = (uint32_t *)
1331 xrealloc (ctype->charnames,
1332 sizeof (uint32_t) * ctype->charnames_max);
1334 ctype->charnames[ctype->charnames_act++] = idx;
1335 idx_table_add (&ctype->charnames_idx, idx, cnt);
1338 if (table == NULL)
1339 /* We have done everything we are asked to do. */
1340 return NULL;
1342 if (max == NULL)
1343 /* The caller does not want to extend the table. */
1344 return (cnt >= *act ? NULL : &(*table)[cnt]);
1346 if (cnt >= *act)
1348 if (cnt >= *max)
1350 size_t old_max = *max;
1352 *max *= 2;
1353 while (*max <= cnt);
1355 *table =
1356 (uint32_t *) xrealloc (*table, *max * sizeof (uint32_t));
1357 memset (&(*table)[old_max], '\0',
1358 (*max - old_max) * sizeof (uint32_t));
1361 *act = cnt + 1;
1364 return &(*table)[cnt];
1368 static int
1369 get_character (struct token *now, const struct charmap_t *charmap,
1370 struct repertoire_t *repertoire,
1371 struct charseq **seqp, uint32_t *wchp)
1373 if (now->tok == tok_bsymbol)
1375 /* This will hopefully be the normal case. */
1376 *wchp = repertoire_find_value (repertoire, now->val.str.startmb,
1377 now->val.str.lenmb);
1378 *seqp = charmap_find_value (charmap, now->val.str.startmb,
1379 now->val.str.lenmb);
1381 else if (now->tok == tok_ucs4)
1383 char utmp[10];
1385 snprintf (utmp, sizeof (utmp), "U%08X", now->val.ucs4);
1386 *seqp = charmap_find_value (charmap, utmp, 9);
1388 if (*seqp == NULL)
1389 *seqp = repertoire_find_seq (repertoire, now->val.ucs4);
1391 if (*seqp == NULL)
1393 /* Compute the value in the charmap from the UCS value. */
1394 const char *symbol = repertoire_find_symbol (repertoire,
1395 now->val.ucs4);
1397 if (symbol == NULL)
1398 *seqp = NULL;
1399 else
1400 *seqp = charmap_find_value (charmap, symbol, strlen (symbol));
1402 if (*seqp == NULL)
1404 if (repertoire != NULL)
1406 /* Insert a negative entry. */
1407 static const struct charseq negative
1408 = { .ucs4 = ILLEGAL_CHAR_VALUE };
1409 uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1410 sizeof (uint32_t));
1411 *newp = now->val.ucs4;
1413 insert_entry (&repertoire->seq_table, newp,
1414 sizeof (uint32_t), (void *) &negative);
1417 else
1418 (*seqp)->ucs4 = now->val.ucs4;
1420 else if ((*seqp)->ucs4 != now->val.ucs4)
1421 *seqp = NULL;
1423 *wchp = now->val.ucs4;
1425 else if (now->tok == tok_charcode)
1427 /* We must map from the byte code to UCS4. */
1428 *seqp = charmap_find_symbol (charmap, now->val.str.startmb,
1429 now->val.str.lenmb);
1431 if (*seqp == NULL)
1432 *wchp = ILLEGAL_CHAR_VALUE;
1433 else
1435 if ((*seqp)->ucs4 == UNINITIALIZED_CHAR_VALUE)
1436 (*seqp)->ucs4 = repertoire_find_value (repertoire, (*seqp)->name,
1437 strlen ((*seqp)->name));
1438 *wchp = (*seqp)->ucs4;
1441 else
1442 return 1;
1444 return 0;
1448 /* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and
1449 the .(2). counterparts. */
1450 static void
1451 charclass_symbolic_ellipsis (struct linereader *ldfile,
1452 struct locale_ctype_t *ctype,
1453 const struct charmap_t *charmap,
1454 struct repertoire_t *repertoire,
1455 struct token *now,
1456 const char *last_str,
1457 unsigned long int class256_bit,
1458 unsigned long int class_bit, int base,
1459 int ignore_content, int handle_digits, int step)
1461 const char *nowstr = now->val.str.startmb;
1462 char tmp[now->val.str.lenmb + 1];
1463 const char *cp;
1464 char *endp;
1465 unsigned long int from;
1466 unsigned long int to;
1468 /* We have to compute the ellipsis values using the symbolic names. */
1469 assert (last_str != NULL);
1471 if (strlen (last_str) != now->val.str.lenmb)
1473 invalid_range:
1474 lr_error (ldfile,
1475 _("`%s' and `%.*s' are no valid names for symbolic range"),
1476 last_str, (int) now->val.str.lenmb, nowstr);
1477 return;
1480 if (memcmp (last_str, nowstr, now->val.str.lenmb) == 0)
1481 /* Nothing to do, the names are the same. */
1482 return;
1484 for (cp = last_str; *cp == *(nowstr + (cp - last_str)); ++cp)
1487 errno = 0;
1488 from = strtoul (cp, &endp, base);
1489 if ((from == UINT_MAX && errno == ERANGE) || *endp != '\0')
1490 goto invalid_range;
1492 to = strtoul (nowstr + (cp - last_str), &endp, base);
1493 if ((to == UINT_MAX && errno == ERANGE)
1494 || (endp - nowstr) != now->val.str.lenmb || from >= to)
1495 goto invalid_range;
1497 /* OK, we have a range FROM - TO. Now we can create the symbolic names. */
1498 if (!ignore_content)
1500 now->val.str.startmb = tmp;
1501 while ((from += step) <= to)
1503 struct charseq *seq;
1504 uint32_t wch;
1506 sprintf (tmp, (base == 10 ? "%.*s%0*ld" : "%.*s%0*lX"),
1507 (int) (cp - last_str), last_str,
1508 (int) (now->val.str.lenmb - (cp - last_str)),
1509 from);
1511 get_character (now, charmap, repertoire, &seq, &wch);
1513 if (seq != NULL && seq->nbytes == 1)
1514 /* Yep, we can store information about this byte sequence. */
1515 ctype->class256_collection[seq->bytes[0]] |= class256_bit;
1517 if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1518 /* We have the UCS4 position. */
1519 *find_idx (ctype, &ctype->class_collection,
1520 &ctype->class_collection_max,
1521 &ctype->class_collection_act, wch) |= class_bit;
1523 if (handle_digits == 1)
1525 /* We must store the digit values. */
1526 if (ctype->mbdigits_act == ctype->mbdigits_max)
1528 ctype->mbdigits_max *= 2;
1529 ctype->mbdigits = xrealloc (ctype->mbdigits,
1530 (ctype->mbdigits_max
1531 * sizeof (char *)));
1532 ctype->wcdigits_max *= 2;
1533 ctype->wcdigits = xrealloc (ctype->wcdigits,
1534 (ctype->wcdigits_max
1535 * sizeof (uint32_t)));
1538 ctype->mbdigits[ctype->mbdigits_act++] = seq;
1539 ctype->wcdigits[ctype->wcdigits_act++] = wch;
1541 else if (handle_digits == 2)
1543 /* We must store the digit values. */
1544 if (ctype->outdigits_act >= 10)
1546 lr_error (ldfile, _("\
1547 %s: field `%s' does not contain exactly ten entries"),
1548 "LC_CTYPE", "outdigit");
1549 return;
1552 ctype->mboutdigits[ctype->outdigits_act] = seq;
1553 ctype->wcoutdigits[ctype->outdigits_act] = wch;
1554 ++ctype->outdigits_act;
1561 /* Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'. */
1562 static void
1563 charclass_ucs4_ellipsis (struct linereader *ldfile,
1564 struct locale_ctype_t *ctype,
1565 const struct charmap_t *charmap,
1566 struct repertoire_t *repertoire,
1567 struct token *now, uint32_t last_wch,
1568 unsigned long int class256_bit,
1569 unsigned long int class_bit, int ignore_content,
1570 int handle_digits, int step)
1572 if (last_wch > now->val.ucs4)
1574 lr_error (ldfile, _("\
1575 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1576 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, now->val.ucs4,
1577 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, last_wch);
1578 return;
1581 if (!ignore_content)
1582 while ((last_wch += step) <= now->val.ucs4)
1584 /* We have to find out whether there is a byte sequence corresponding
1585 to this UCS4 value. */
1586 struct charseq *seq;
1587 char utmp[10];
1589 snprintf (utmp, sizeof (utmp), "U%08X", last_wch);
1590 seq = charmap_find_value (charmap, utmp, 9);
1591 if (seq == NULL)
1593 snprintf (utmp, sizeof (utmp), "U%04X", last_wch);
1594 seq = charmap_find_value (charmap, utmp, 5);
1597 if (seq == NULL)
1598 /* Try looking in the repertoire map. */
1599 seq = repertoire_find_seq (repertoire, last_wch);
1601 /* If this is the first time we look for this sequence create a new
1602 entry. */
1603 if (seq == NULL)
1605 static const struct charseq negative
1606 = { .ucs4 = ILLEGAL_CHAR_VALUE };
1608 /* Find the symbolic name for this UCS4 value. */
1609 if (repertoire != NULL)
1611 const char *symbol = repertoire_find_symbol (repertoire,
1612 last_wch);
1613 uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1614 sizeof (uint32_t));
1615 *newp = last_wch;
1617 if (symbol != NULL)
1618 /* We have a name, now search the multibyte value. */
1619 seq = charmap_find_value (charmap, symbol, strlen (symbol));
1621 if (seq == NULL)
1622 /* We have to create a fake entry. */
1623 seq = (struct charseq *) &negative;
1624 else
1625 seq->ucs4 = last_wch;
1627 insert_entry (&repertoire->seq_table, newp, sizeof (uint32_t),
1628 seq);
1630 else
1631 /* We have to create a fake entry. */
1632 seq = (struct charseq *) &negative;
1635 /* We have a name, now search the multibyte value. */
1636 if (seq->ucs4 == last_wch && seq->nbytes == 1)
1637 /* Yep, we can store information about this byte sequence. */
1638 ctype->class256_collection[(size_t) seq->bytes[0]]
1639 |= class256_bit;
1641 /* And of course we have the UCS4 position. */
1642 if (class_bit != 0)
1643 *find_idx (ctype, &ctype->class_collection,
1644 &ctype->class_collection_max,
1645 &ctype->class_collection_act, last_wch) |= class_bit;
1647 if (handle_digits == 1)
1649 /* We must store the digit values. */
1650 if (ctype->mbdigits_act == ctype->mbdigits_max)
1652 ctype->mbdigits_max *= 2;
1653 ctype->mbdigits = xrealloc (ctype->mbdigits,
1654 (ctype->mbdigits_max
1655 * sizeof (char *)));
1656 ctype->wcdigits_max *= 2;
1657 ctype->wcdigits = xrealloc (ctype->wcdigits,
1658 (ctype->wcdigits_max
1659 * sizeof (uint32_t)));
1662 ctype->mbdigits[ctype->mbdigits_act++] = (seq->ucs4 == last_wch
1663 ? seq : NULL);
1664 ctype->wcdigits[ctype->wcdigits_act++] = last_wch;
1666 else if (handle_digits == 2)
1668 /* We must store the digit values. */
1669 if (ctype->outdigits_act >= 10)
1671 lr_error (ldfile, _("\
1672 %s: field `%s' does not contain exactly ten entries"),
1673 "LC_CTYPE", "outdigit");
1674 return;
1677 ctype->mboutdigits[ctype->outdigits_act] = (seq->ucs4 == last_wch
1678 ? seq : NULL);
1679 ctype->wcoutdigits[ctype->outdigits_act] = last_wch;
1680 ++ctype->outdigits_act;
1686 /* Ellipsis as in `/xea/x12.../xea/x34'. */
1687 static void
1688 charclass_charcode_ellipsis (struct linereader *ldfile,
1689 struct locale_ctype_t *ctype,
1690 const struct charmap_t *charmap,
1691 struct repertoire_t *repertoire,
1692 struct token *now, char *last_charcode,
1693 uint32_t last_charcode_len,
1694 unsigned long int class256_bit,
1695 unsigned long int class_bit, int ignore_content,
1696 int handle_digits)
1698 /* First check whether the to-value is larger. */
1699 if (now->val.charcode.nbytes != last_charcode_len)
1701 lr_error (ldfile, _("\
1702 start and end character sequence of range must have the same length"));
1703 return;
1706 if (memcmp (last_charcode, now->val.charcode.bytes, last_charcode_len) > 0)
1708 lr_error (ldfile, _("\
1709 to-value character sequence is smaller than from-value sequence"));
1710 return;
1713 if (!ignore_content)
1717 /* Increment the byte sequence value. */
1718 struct charseq *seq;
1719 uint32_t wch;
1720 int i;
1722 for (i = last_charcode_len - 1; i >= 0; --i)
1723 if (++last_charcode[i] != 0)
1724 break;
1726 if (last_charcode_len == 1)
1727 /* Of course we have the charcode value. */
1728 ctype->class256_collection[(size_t) last_charcode[0]]
1729 |= class256_bit;
1731 /* Find the symbolic name. */
1732 seq = charmap_find_symbol (charmap, last_charcode,
1733 last_charcode_len);
1734 if (seq != NULL)
1736 if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1737 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1738 strlen (seq->name));
1739 wch = seq == NULL ? ILLEGAL_CHAR_VALUE : seq->ucs4;
1741 if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1742 *find_idx (ctype, &ctype->class_collection,
1743 &ctype->class_collection_max,
1744 &ctype->class_collection_act, wch) |= class_bit;
1746 else
1747 wch = ILLEGAL_CHAR_VALUE;
1749 if (handle_digits == 1)
1751 /* We must store the digit values. */
1752 if (ctype->mbdigits_act == ctype->mbdigits_max)
1754 ctype->mbdigits_max *= 2;
1755 ctype->mbdigits = xrealloc (ctype->mbdigits,
1756 (ctype->mbdigits_max
1757 * sizeof (char *)));
1758 ctype->wcdigits_max *= 2;
1759 ctype->wcdigits = xrealloc (ctype->wcdigits,
1760 (ctype->wcdigits_max
1761 * sizeof (uint32_t)));
1764 seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1765 memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1766 seq->nbytes = last_charcode_len;
1768 ctype->mbdigits[ctype->mbdigits_act++] = seq;
1769 ctype->wcdigits[ctype->wcdigits_act++] = wch;
1771 else if (handle_digits == 2)
1773 struct charseq *seq;
1774 /* We must store the digit values. */
1775 if (ctype->outdigits_act >= 10)
1777 lr_error (ldfile, _("\
1778 %s: field `%s' does not contain exactly ten entries"),
1779 "LC_CTYPE", "outdigit");
1780 return;
1783 seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1784 memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1785 seq->nbytes = last_charcode_len;
1787 ctype->mboutdigits[ctype->outdigits_act] = seq;
1788 ctype->wcoutdigits[ctype->outdigits_act] = wch;
1789 ++ctype->outdigits_act;
1792 while (memcmp (last_charcode, now->val.charcode.bytes,
1793 last_charcode_len) != 0);
1798 static uint32_t *
1799 find_translit2 (struct locale_ctype_t *ctype, const struct charmap_t *charmap,
1800 uint32_t wch)
1802 struct translit_t *trunp = ctype->translit;
1803 struct translit_ignore_t *tirunp = ctype->translit_ignore;
1805 while (trunp != NULL)
1807 /* XXX We simplify things here. The transliterations we look
1808 for are only allowed to have one character. */
1809 if (trunp->from[0] == wch && trunp->from[1] == 0)
1811 /* Found it. Now look for a transliteration which can be
1812 represented with the character set. */
1813 struct translit_to_t *torunp = trunp->to;
1815 while (torunp != NULL)
1817 int i;
1819 for (i = 0; torunp->str[i] != 0; ++i)
1821 char utmp[10];
1823 snprintf (utmp, sizeof (utmp), "U%08X", torunp->str[i]);
1824 if (charmap_find_value (charmap, utmp, 9) == NULL)
1825 /* This character cannot be represented. */
1826 break;
1829 if (torunp->str[i] == 0)
1830 return torunp->str;
1832 torunp = torunp->next;
1835 break;
1838 trunp = trunp->next;
1841 /* Check for ignored chars. */
1842 while (tirunp != NULL)
1844 if (tirunp->from <= wch && tirunp->to >= wch)
1846 uint32_t wi;
1848 for (wi = tirunp->from; wi <= wch; wi += tirunp->step)
1849 if (wi == wch)
1850 return (uint32_t []) { 0 };
1854 /* Nothing found. */
1855 return NULL;
1859 uint32_t *
1860 find_translit (struct localedef_t *locale, const struct charmap_t *charmap,
1861 uint32_t wch)
1863 struct locale_ctype_t *ctype;
1864 uint32_t *result = NULL;
1866 assert (locale != NULL);
1867 ctype = locale->categories[LC_CTYPE].ctype;
1869 if (ctype->translit != NULL)
1870 result = find_translit2 (ctype, charmap, wch);
1872 if (result == NULL)
1874 struct translit_include_t *irunp = ctype->translit_include;
1876 while (irunp != NULL && result == NULL)
1878 result = find_translit (find_locale (CTYPE_LOCALE,
1879 irunp->copy_locale,
1880 irunp->copy_repertoire,
1881 charmap),
1882 charmap, wch);
1883 irunp = irunp->next;
1887 return result;
1891 /* Read one transliteration entry. */
1892 static uint32_t *
1893 read_widestring (struct linereader *ldfile, struct token *now,
1894 const struct charmap_t *charmap,
1895 struct repertoire_t *repertoire)
1897 uint32_t *wstr;
1899 if (now->tok == tok_default_missing)
1900 /* The special name "" will denote this case. */
1901 wstr = ((uint32_t *) { 0 });
1902 else if (now->tok == tok_bsymbol)
1904 /* Get the value from the repertoire. */
1905 wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1906 wstr[0] = repertoire_find_value (repertoire, now->val.str.startmb,
1907 now->val.str.lenmb);
1908 if (wstr[0] == ILLEGAL_CHAR_VALUE)
1910 /* We cannot proceed, we don't know the UCS4 value. */
1911 free (wstr);
1912 return NULL;
1915 wstr[1] = 0;
1917 else if (now->tok == tok_ucs4)
1919 wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1920 wstr[0] = now->val.ucs4;
1921 wstr[1] = 0;
1923 else if (now->tok == tok_charcode)
1925 /* Argh, we have to convert to the symbol name first and then to the
1926 UCS4 value. */
1927 struct charseq *seq = charmap_find_symbol (charmap,
1928 now->val.str.startmb,
1929 now->val.str.lenmb);
1930 if (seq == NULL)
1931 /* Cannot find the UCS4 value. */
1932 return NULL;
1934 if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1935 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1936 strlen (seq->name));
1937 if (seq->ucs4 == ILLEGAL_CHAR_VALUE)
1938 /* We cannot proceed, we don't know the UCS4 value. */
1939 return NULL;
1941 wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1942 wstr[0] = seq->ucs4;
1943 wstr[1] = 0;
1945 else if (now->tok == tok_string)
1947 wstr = now->val.str.startwc;
1948 if (wstr == NULL || wstr[0] == 0)
1949 return NULL;
1951 else
1953 if (now->tok != tok_eol && now->tok != tok_eof)
1954 lr_ignore_rest (ldfile, 0);
1955 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
1956 return (uint32_t *) -1l;
1959 return wstr;
1963 static void
1964 read_translit_entry (struct linereader *ldfile, struct locale_ctype_t *ctype,
1965 struct token *now, const struct charmap_t *charmap,
1966 struct repertoire_t *repertoire)
1968 uint32_t *from_wstr = read_widestring (ldfile, now, charmap, repertoire);
1969 struct translit_t *result;
1970 struct translit_to_t **top;
1971 struct obstack *ob = &ctype->mempool;
1972 int first;
1973 int ignore;
1975 if (from_wstr == NULL)
1976 /* There is no valid from string. */
1977 return;
1979 result = (struct translit_t *) obstack_alloc (ob,
1980 sizeof (struct translit_t));
1981 result->from = from_wstr;
1982 result->fname = ldfile->fname;
1983 result->lineno = ldfile->lineno;
1984 result->next = NULL;
1985 result->to = NULL;
1986 top = &result->to;
1987 first = 1;
1988 ignore = 0;
1990 while (1)
1992 uint32_t *to_wstr;
1994 /* Next we have one or more transliterations. They are
1995 separated by semicolons. */
1996 now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
1998 if (!first && (now->tok == tok_semicolon || now->tok == tok_eol))
2000 /* One string read. */
2001 const uint32_t zero = 0;
2003 if (!ignore)
2005 obstack_grow (ob, &zero, 4);
2006 to_wstr = obstack_finish (ob);
2008 *top = obstack_alloc (ob, sizeof (struct translit_to_t));
2009 (*top)->str = to_wstr;
2010 (*top)->next = NULL;
2013 if (now->tok == tok_eol)
2015 result->next = ctype->translit;
2016 ctype->translit = result;
2017 return;
2020 if (!ignore)
2021 top = &(*top)->next;
2022 ignore = 0;
2024 else
2026 to_wstr = read_widestring (ldfile, now, charmap, repertoire);
2027 if (to_wstr == (uint32_t *) -1l)
2029 /* An error occurred. */
2030 obstack_free (ob, result);
2031 return;
2034 if (to_wstr == NULL)
2035 ignore = 1;
2036 else
2037 /* This value is usable. */
2038 obstack_grow (ob, to_wstr, wcslen ((wchar_t *) to_wstr) * 4);
2040 first = 0;
2046 static void
2047 read_translit_ignore_entry (struct linereader *ldfile,
2048 struct locale_ctype_t *ctype,
2049 const struct charmap_t *charmap,
2050 struct repertoire_t *repertoire)
2052 /* We expect a semicolon-separated list of characters we ignore. We are
2053 only interested in the wide character definitions. These must be
2054 single characters, possibly defining a range when an ellipsis is used. */
2055 while (1)
2057 struct token *now = lr_token (ldfile, charmap, NULL, repertoire,
2058 verbose);
2059 struct translit_ignore_t *newp;
2060 uint32_t from;
2062 if (now->tok == tok_eol || now->tok == tok_eof)
2064 lr_error (ldfile,
2065 _("premature end of `translit_ignore' definition"));
2066 return;
2069 if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
2071 lr_error (ldfile, _("syntax error"));
2072 lr_ignore_rest (ldfile, 0);
2073 return;
2076 if (now->tok == tok_ucs4)
2077 from = now->val.ucs4;
2078 else
2079 /* Try to get the value. */
2080 from = repertoire_find_value (repertoire, now->val.str.startmb,
2081 now->val.str.lenmb);
2083 if (from == ILLEGAL_CHAR_VALUE)
2085 lr_error (ldfile, "invalid character name");
2086 newp = NULL;
2088 else
2090 newp = (struct translit_ignore_t *)
2091 obstack_alloc (&ctype->mempool, sizeof (struct translit_ignore_t));
2092 newp->from = from;
2093 newp->to = from;
2094 newp->step = 1;
2096 newp->next = ctype->translit_ignore;
2097 ctype->translit_ignore = newp;
2100 /* Now we expect either a semicolon, an ellipsis, or the end of the
2101 line. */
2102 now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2104 if (now->tok == tok_ellipsis2 || now->tok == tok_ellipsis2_2)
2106 /* XXX Should we bother implementing `....'? `...' certainly
2107 will not be implemented. */
2108 uint32_t to;
2109 int step = now->tok == tok_ellipsis2_2 ? 2 : 1;
2111 now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2113 if (now->tok == tok_eol || now->tok == tok_eof)
2115 lr_error (ldfile,
2116 _("premature end of `translit_ignore' definition"));
2117 return;
2120 if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
2122 lr_error (ldfile, _("syntax error"));
2123 lr_ignore_rest (ldfile, 0);
2124 return;
2127 if (now->tok == tok_ucs4)
2128 to = now->val.ucs4;
2129 else
2130 /* Try to get the value. */
2131 to = repertoire_find_value (repertoire, now->val.str.startmb,
2132 now->val.str.lenmb);
2134 if (to == ILLEGAL_CHAR_VALUE)
2135 lr_error (ldfile, "invalid character name");
2136 else
2138 /* Make sure the `to'-value is larger. */
2139 if (to >= from)
2141 newp->to = to;
2142 newp->step = step;
2144 else
2145 lr_error (ldfile, _("\
2146 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
2147 (to | from) < 65536 ? 4 : 8, to,
2148 (to | from) < 65536 ? 4 : 8, from);
2151 /* And the next token. */
2152 now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2155 if (now->tok == tok_eol || now->tok == tok_eof)
2156 /* We are done. */
2157 return;
2159 if (now->tok == tok_semicolon)
2160 /* Next round. */
2161 continue;
2163 /* If we come here something is wrong. */
2164 lr_error (ldfile, _("syntax error"));
2165 lr_ignore_rest (ldfile, 0);
2166 return;
2171 /* The parser for the LC_CTYPE section of the locale definition. */
2172 void
2173 ctype_read (struct linereader *ldfile, struct localedef_t *result,
2174 const struct charmap_t *charmap, const char *repertoire_name,
2175 int ignore_content)
2177 struct repertoire_t *repertoire = NULL;
2178 struct locale_ctype_t *ctype;
2179 struct token *now;
2180 enum token_t nowtok;
2181 size_t cnt;
2182 struct charseq *last_seq;
2183 uint32_t last_wch = 0;
2184 enum token_t last_token;
2185 enum token_t ellipsis_token;
2186 int step;
2187 char last_charcode[16];
2188 size_t last_charcode_len = 0;
2189 const char *last_str = NULL;
2190 int mapidx;
2191 struct localedef_t *copy_locale = NULL;
2193 /* Get the repertoire we have to use. */
2194 if (repertoire_name != NULL)
2195 repertoire = repertoire_read (repertoire_name);
2197 /* The rest of the line containing `LC_CTYPE' must be free. */
2198 lr_ignore_rest (ldfile, 1);
2203 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2204 nowtok = now->tok;
2206 while (nowtok == tok_eol);
2208 /* If we see `copy' now we are almost done. */
2209 if (nowtok == tok_copy)
2211 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2212 if (now->tok != tok_string)
2214 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2216 skip_category:
2218 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2219 while (now->tok != tok_eof && now->tok != tok_end);
2221 if (now->tok != tok_eof
2222 || (now = lr_token (ldfile, charmap, NULL, NULL, verbose),
2223 now->tok == tok_eof))
2224 lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
2225 else if (now->tok != tok_lc_ctype)
2227 lr_error (ldfile, _("\
2228 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2229 lr_ignore_rest (ldfile, 0);
2231 else
2232 lr_ignore_rest (ldfile, 1);
2234 return;
2237 if (! ignore_content)
2239 /* Get the locale definition. */
2240 copy_locale = load_locale (LC_CTYPE, now->val.str.startmb,
2241 repertoire_name, charmap, NULL);
2242 if ((copy_locale->avail & CTYPE_LOCALE) == 0)
2244 /* Not yet loaded. So do it now. */
2245 if (locfile_read (copy_locale, charmap) != 0)
2246 goto skip_category;
2250 lr_ignore_rest (ldfile, 1);
2252 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2253 nowtok = now->tok;
2256 /* Prepare the data structures. */
2257 ctype_startup (ldfile, result, charmap, copy_locale, ignore_content);
2258 ctype = result->categories[LC_CTYPE].ctype;
2260 /* Remember the repertoire we use. */
2261 if (!ignore_content)
2262 ctype->repertoire = repertoire;
2264 while (1)
2266 unsigned long int class_bit = 0;
2267 unsigned long int class256_bit = 0;
2268 int handle_digits = 0;
2270 /* Of course we don't proceed beyond the end of file. */
2271 if (nowtok == tok_eof)
2272 break;
2274 /* Ingore empty lines. */
2275 if (nowtok == tok_eol)
2277 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2278 nowtok = now->tok;
2279 continue;
2282 switch (nowtok)
2284 case tok_charclass:
2285 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2286 while (now->tok == tok_ident || now->tok == tok_string)
2288 ctype_class_new (ldfile, ctype, now->val.str.startmb);
2289 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2290 if (now->tok != tok_semicolon)
2291 break;
2292 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2294 if (now->tok != tok_eol)
2295 SYNTAX_ERROR (_("\
2296 %s: syntax error in definition of new character class"), "LC_CTYPE");
2297 break;
2299 case tok_charconv:
2300 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2301 while (now->tok == tok_ident || now->tok == tok_string)
2303 ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2304 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2305 if (now->tok != tok_semicolon)
2306 break;
2307 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2309 if (now->tok != tok_eol)
2310 SYNTAX_ERROR (_("\
2311 %s: syntax error in definition of new character map"), "LC_CTYPE");
2312 break;
2314 case tok_class:
2315 /* Ignore the rest of the line if we don't need the input of
2316 this line. */
2317 if (ignore_content)
2319 lr_ignore_rest (ldfile, 0);
2320 break;
2323 /* We simply forget the `class' keyword and use the following
2324 operand to determine the bit. */
2325 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2326 if (now->tok == tok_ident || now->tok == tok_string)
2328 /* Must can be one of the predefined class names. */
2329 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2330 if (strcmp (ctype->classnames[cnt], now->val.str.startmb) == 0)
2331 break;
2332 if (cnt >= ctype->nr_charclass)
2334 #ifdef PREDEFINED_CLASSES
2335 if (now->val.str.lenmb == 8
2336 && memcmp ("special1", now->val.str.startmb, 8) == 0)
2337 class_bit = _ISwspecial1;
2338 else if (now->val.str.lenmb == 8
2339 && memcmp ("special2", now->val.str.startmb, 8) == 0)
2340 class_bit = _ISwspecial2;
2341 else if (now->val.str.lenmb == 8
2342 && memcmp ("special3", now->val.str.startmb, 8) == 0)
2343 class_bit = _ISwspecial3;
2344 else
2345 #endif
2347 /* OK, it's a new class. */
2348 ctype_class_new (ldfile, ctype, now->val.str.startmb);
2350 class_bit = _ISwbit (ctype->nr_charclass - 1);
2353 else
2355 class_bit = _ISwbit (cnt);
2357 free (now->val.str.startmb);
2360 else if (now->tok == tok_digit)
2361 goto handle_tok_digit;
2362 else if (now->tok < tok_upper || now->tok > tok_blank)
2363 goto err_label;
2364 else
2366 class_bit = BITw (now->tok);
2367 class256_bit = BIT (now->tok);
2370 /* The next character must be a semicolon. */
2371 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2372 if (now->tok != tok_semicolon)
2373 goto err_label;
2374 goto read_charclass;
2376 case tok_upper:
2377 case tok_lower:
2378 case tok_alpha:
2379 case tok_alnum:
2380 case tok_space:
2381 case tok_cntrl:
2382 case tok_punct:
2383 case tok_graph:
2384 case tok_print:
2385 case tok_xdigit:
2386 case tok_blank:
2387 /* Ignore the rest of the line if we don't need the input of
2388 this line. */
2389 if (ignore_content)
2391 lr_ignore_rest (ldfile, 0);
2392 break;
2395 class_bit = BITw (now->tok);
2396 class256_bit = BIT (now->tok);
2397 handle_digits = 0;
2398 read_charclass:
2399 ctype->class_done |= class_bit;
2400 last_token = tok_none;
2401 ellipsis_token = tok_none;
2402 step = 1;
2403 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2404 while (now->tok != tok_eol && now->tok != tok_eof)
2406 uint32_t wch;
2407 struct charseq *seq;
2409 if (ellipsis_token == tok_none)
2411 if (get_character (now, charmap, repertoire, &seq, &wch))
2412 goto err_label;
2414 if (!ignore_content && seq != NULL && seq->nbytes == 1)
2415 /* Yep, we can store information about this byte
2416 sequence. */
2417 ctype->class256_collection[seq->bytes[0]] |= class256_bit;
2419 if (!ignore_content && wch != ILLEGAL_CHAR_VALUE
2420 && class_bit != 0)
2421 /* We have the UCS4 position. */
2422 *find_idx (ctype, &ctype->class_collection,
2423 &ctype->class_collection_max,
2424 &ctype->class_collection_act, wch) |= class_bit;
2426 last_token = now->tok;
2427 /* Terminate the string. */
2428 if (last_token == tok_bsymbol)
2430 now->val.str.startmb[now->val.str.lenmb] = '\0';
2431 last_str = now->val.str.startmb;
2433 else
2434 last_str = NULL;
2435 last_seq = seq;
2436 last_wch = wch;
2437 memcpy (last_charcode, now->val.charcode.bytes, 16);
2438 last_charcode_len = now->val.charcode.nbytes;
2440 if (!ignore_content && handle_digits == 1)
2442 /* We must store the digit values. */
2443 if (ctype->mbdigits_act == ctype->mbdigits_max)
2445 ctype->mbdigits_max += 10;
2446 ctype->mbdigits = xrealloc (ctype->mbdigits,
2447 (ctype->mbdigits_max
2448 * sizeof (char *)));
2449 ctype->wcdigits_max += 10;
2450 ctype->wcdigits = xrealloc (ctype->wcdigits,
2451 (ctype->wcdigits_max
2452 * sizeof (uint32_t)));
2455 ctype->mbdigits[ctype->mbdigits_act++] = seq;
2456 ctype->wcdigits[ctype->wcdigits_act++] = wch;
2458 else if (!ignore_content && handle_digits == 2)
2460 /* We must store the digit values. */
2461 if (ctype->outdigits_act >= 10)
2463 lr_error (ldfile, _("\
2464 %s: field `%s' does not contain exactly ten entries"),
2465 "LC_CTYPE", "outdigit");
2466 lr_ignore_rest (ldfile, 0);
2467 break;
2470 ctype->mboutdigits[ctype->outdigits_act] = seq;
2471 ctype->wcoutdigits[ctype->outdigits_act] = wch;
2472 ++ctype->outdigits_act;
2475 else
2477 /* Now it gets complicated. We have to resolve the
2478 ellipsis problem. First we must distinguish between
2479 the different kind of ellipsis and this must match the
2480 tokens we have seen. */
2481 assert (last_token != tok_none);
2483 if (last_token != now->tok)
2485 lr_error (ldfile, _("\
2486 ellipsis range must be marked by two operands of same type"));
2487 lr_ignore_rest (ldfile, 0);
2488 break;
2491 if (last_token == tok_bsymbol)
2493 if (ellipsis_token == tok_ellipsis3)
2494 lr_error (ldfile, _("with symbolic name range values \
2495 the absolute ellipsis `...' must not be used"));
2497 charclass_symbolic_ellipsis (ldfile, ctype, charmap,
2498 repertoire, now, last_str,
2499 class256_bit, class_bit,
2500 (ellipsis_token
2501 == tok_ellipsis4
2502 ? 10 : 16),
2503 ignore_content,
2504 handle_digits, step);
2506 else if (last_token == tok_ucs4)
2508 if (ellipsis_token != tok_ellipsis2)
2509 lr_error (ldfile, _("\
2510 with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
2512 charclass_ucs4_ellipsis (ldfile, ctype, charmap,
2513 repertoire, now, last_wch,
2514 class256_bit, class_bit,
2515 ignore_content, handle_digits,
2516 step);
2518 else
2520 assert (last_token == tok_charcode);
2522 if (ellipsis_token != tok_ellipsis3)
2523 lr_error (ldfile, _("\
2524 with character code range values one must use the absolute ellipsis `...'"));
2526 charclass_charcode_ellipsis (ldfile, ctype, charmap,
2527 repertoire, now,
2528 last_charcode,
2529 last_charcode_len,
2530 class256_bit, class_bit,
2531 ignore_content,
2532 handle_digits);
2535 /* Now we have used the last value. */
2536 last_token = tok_none;
2539 /* Next we expect a semicolon or the end of the line. */
2540 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2541 if (now->tok == tok_eol || now->tok == tok_eof)
2542 break;
2544 if (last_token != tok_none
2545 && now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4_2)
2547 if (now->tok == tok_ellipsis2_2)
2549 now->tok = tok_ellipsis2;
2550 step = 2;
2552 else if (now->tok == tok_ellipsis4_2)
2554 now->tok = tok_ellipsis4;
2555 step = 2;
2558 ellipsis_token = now->tok;
2560 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2561 continue;
2564 if (now->tok != tok_semicolon)
2565 goto err_label;
2567 /* And get the next character. */
2568 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2570 ellipsis_token = tok_none;
2571 step = 1;
2573 break;
2575 case tok_digit:
2576 /* Ignore the rest of the line if we don't need the input of
2577 this line. */
2578 if (ignore_content)
2580 lr_ignore_rest (ldfile, 0);
2581 break;
2584 handle_tok_digit:
2585 class_bit = _ISwdigit;
2586 class256_bit = _ISdigit;
2587 handle_digits = 1;
2588 goto read_charclass;
2590 case tok_outdigit:
2591 /* Ignore the rest of the line if we don't need the input of
2592 this line. */
2593 if (ignore_content)
2595 lr_ignore_rest (ldfile, 0);
2596 break;
2599 if (ctype->outdigits_act != 0)
2600 lr_error (ldfile, _("\
2601 %s: field `%s' declared more than once"),
2602 "LC_CTYPE", "outdigit");
2603 class_bit = 0;
2604 class256_bit = 0;
2605 handle_digits = 2;
2606 goto read_charclass;
2608 case tok_toupper:
2609 /* Ignore the rest of the line if we don't need the input of
2610 this line. */
2611 if (ignore_content)
2613 lr_ignore_rest (ldfile, 0);
2614 break;
2617 mapidx = 0;
2618 goto read_mapping;
2620 case tok_tolower:
2621 /* Ignore the rest of the line if we don't need the input of
2622 this line. */
2623 if (ignore_content)
2625 lr_ignore_rest (ldfile, 0);
2626 break;
2629 mapidx = 1;
2630 goto read_mapping;
2632 case tok_map:
2633 /* Ignore the rest of the line if we don't need the input of
2634 this line. */
2635 if (ignore_content)
2637 lr_ignore_rest (ldfile, 0);
2638 break;
2641 /* We simply forget the `map' keyword and use the following
2642 operand to determine the mapping. */
2643 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2644 if (now->tok == tok_ident || now->tok == tok_string)
2646 size_t cnt;
2648 for (cnt = 2; cnt < ctype->map_collection_nr; ++cnt)
2649 if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2650 break;
2652 if (cnt < ctype->map_collection_nr)
2653 free (now->val.str.startmb);
2654 else
2655 /* OK, it's a new map. */
2656 ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2658 mapidx = cnt;
2660 else if (now->tok < tok_toupper || now->tok > tok_tolower)
2661 goto err_label;
2662 else
2663 mapidx = now->tok - tok_toupper;
2665 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2666 /* This better should be a semicolon. */
2667 if (now->tok != tok_semicolon)
2668 goto err_label;
2670 read_mapping:
2671 /* Test whether this mapping was already defined. */
2672 if (ctype->tomap_done[mapidx])
2674 lr_error (ldfile, _("duplicated definition for mapping `%s'"),
2675 ctype->mapnames[mapidx]);
2676 lr_ignore_rest (ldfile, 0);
2677 break;
2679 ctype->tomap_done[mapidx] = 1;
2681 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2682 while (now->tok != tok_eol && now->tok != tok_eof)
2684 struct charseq *from_seq;
2685 uint32_t from_wch;
2686 struct charseq *to_seq;
2687 uint32_t to_wch;
2689 /* Every pair starts with an opening brace. */
2690 if (now->tok != tok_open_brace)
2691 goto err_label;
2693 /* Next comes the from-value. */
2694 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2695 if (get_character (now, charmap, repertoire, &from_seq,
2696 &from_wch) != 0)
2697 goto err_label;
2699 /* The next is a comma. */
2700 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2701 if (now->tok != tok_comma)
2702 goto err_label;
2704 /* And the other value. */
2705 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2706 if (get_character (now, charmap, repertoire, &to_seq,
2707 &to_wch) != 0)
2708 goto err_label;
2710 /* And the last thing is the closing brace. */
2711 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2712 if (now->tok != tok_close_brace)
2713 goto err_label;
2715 if (!ignore_content)
2717 /* Check whether the mapping converts from an ASCII value
2718 to a non-ASCII value. */
2719 if (from_seq != NULL && from_seq->nbytes == 1
2720 && isascii (from_seq->bytes[0])
2721 && to_seq != NULL && (to_seq->nbytes != 1
2722 || !isascii (to_seq->bytes[0])))
2723 ctype->to_nonascii = 1;
2725 if (mapidx < 2 && from_seq != NULL && to_seq != NULL
2726 && from_seq->nbytes == 1 && to_seq->nbytes == 1)
2727 /* We can use this value. */
2728 ctype->map256_collection[mapidx][from_seq->bytes[0]]
2729 = to_seq->bytes[0];
2731 if (from_wch != ILLEGAL_CHAR_VALUE
2732 && to_wch != ILLEGAL_CHAR_VALUE)
2733 /* Both correct values. */
2734 *find_idx (ctype, &ctype->map_collection[mapidx],
2735 &ctype->map_collection_max[mapidx],
2736 &ctype->map_collection_act[mapidx],
2737 from_wch) = to_wch;
2740 /* Now comes a semicolon or the end of the line/file. */
2741 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2742 if (now->tok == tok_semicolon)
2743 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2745 break;
2747 case tok_translit_start:
2748 /* Ignore the entire translit section with its peculiar syntax
2749 if we don't need the input. */
2750 if (ignore_content)
2754 lr_ignore_rest (ldfile, 0);
2755 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2757 while (now->tok != tok_translit_end && now->tok != tok_eof);
2759 if (now->tok == tok_eof)
2760 lr_error (ldfile, _(\
2761 "%s: `translit_start' section does not end with `translit_end'"),
2762 "LC_CTYPE");
2764 break;
2767 /* The rest of the line better should be empty. */
2768 lr_ignore_rest (ldfile, 1);
2770 /* We count here the number of allocated entries in the `translit'
2771 array. */
2772 cnt = 0;
2774 ldfile->translate_strings = 1;
2775 ldfile->return_widestr = 1;
2777 /* We proceed until we see the `translit_end' token. */
2778 while (now = lr_token (ldfile, charmap, NULL, repertoire, verbose),
2779 now->tok != tok_translit_end && now->tok != tok_eof)
2781 if (now->tok == tok_eol)
2782 /* Ignore empty lines. */
2783 continue;
2785 if (now->tok == tok_include)
2787 /* We have to include locale. */
2788 const char *locale_name;
2789 const char *repertoire_name;
2790 struct translit_include_t *include_stmt, **include_ptr;
2792 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2793 /* This should be a string or an identifier. In any
2794 case something to name a locale. */
2795 if (now->tok != tok_string && now->tok != tok_ident)
2797 translit_syntax:
2798 lr_error (ldfile, _("%s: syntax error"), "LC_CTYPE");
2799 lr_ignore_rest (ldfile, 0);
2800 continue;
2802 locale_name = now->val.str.startmb;
2804 /* Next should be a semicolon. */
2805 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2806 if (now->tok != tok_semicolon)
2807 goto translit_syntax;
2809 /* Now the repertoire name. */
2810 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2811 if ((now->tok != tok_string && now->tok != tok_ident)
2812 || now->val.str.startmb == NULL)
2813 goto translit_syntax;
2814 repertoire_name = now->val.str.startmb;
2815 if (repertoire_name[0] == '\0')
2816 /* Ignore the empty string. */
2817 repertoire_name = NULL;
2819 /* Save the include statement for later processing. */
2820 include_stmt = (struct translit_include_t *)
2821 xmalloc (sizeof (struct translit_include_t));
2822 include_stmt->copy_locale = locale_name;
2823 include_stmt->copy_repertoire = repertoire_name;
2824 include_stmt->next = NULL;
2826 include_ptr = &ctype->translit_include;
2827 while (*include_ptr != NULL)
2828 include_ptr = &(*include_ptr)->next;
2829 *include_ptr = include_stmt;
2831 /* The rest of the line must be empty. */
2832 lr_ignore_rest (ldfile, 1);
2834 /* Make sure the locale is read. */
2835 add_to_readlist (LC_CTYPE, locale_name, repertoire_name,
2836 1, NULL);
2837 continue;
2839 else if (now->tok == tok_default_missing)
2841 uint32_t *wstr;
2843 while (1)
2845 /* We expect a single character or string as the
2846 argument. */
2847 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2848 wstr = read_widestring (ldfile, now, charmap,
2849 repertoire);
2851 if (wstr != NULL)
2853 if (ctype->default_missing != NULL)
2855 lr_error (ldfile, _("\
2856 %s: duplicate `default_missing' definition"), "LC_CTYPE");
2857 WITH_CUR_LOCALE (error_at_line (0, 0,
2858 ctype->default_missing_file,
2859 ctype->default_missing_lineno,
2860 _("\
2861 previous definition was here")));
2863 else
2865 ctype->default_missing = wstr;
2866 ctype->default_missing_file = ldfile->fname;
2867 ctype->default_missing_lineno = ldfile->lineno;
2869 /* We can have more entries, ignore them. */
2870 lr_ignore_rest (ldfile, 0);
2871 break;
2873 else if (wstr == (uint32_t *) -1l)
2874 /* This was an syntax error. */
2875 break;
2877 /* Maybe there is another replacement we can use. */
2878 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2879 if (now->tok == tok_eol || now->tok == tok_eof)
2881 /* Nothing found. We tell the user. */
2882 lr_error (ldfile, _("\
2883 %s: no representable `default_missing' definition found"), "LC_CTYPE");
2884 break;
2886 if (now->tok != tok_semicolon)
2887 goto translit_syntax;
2890 continue;
2892 else if (now->tok == tok_translit_ignore)
2894 read_translit_ignore_entry (ldfile, ctype, charmap,
2895 repertoire);
2896 continue;
2899 read_translit_entry (ldfile, ctype, now, charmap, repertoire);
2901 ldfile->return_widestr = 0;
2903 if (now->tok == tok_eof)
2904 lr_error (ldfile, _(\
2905 "%s: `translit_start' section does not end with `translit_end'"),
2906 "LC_CTYPE");
2908 break;
2910 case tok_ident:
2911 /* Ignore the rest of the line if we don't need the input of
2912 this line. */
2913 if (ignore_content)
2915 lr_ignore_rest (ldfile, 0);
2916 break;
2919 /* This could mean one of several things. First test whether
2920 it's a character class name. */
2921 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2922 if (strcmp (now->val.str.startmb, ctype->classnames[cnt]) == 0)
2923 break;
2924 if (cnt < ctype->nr_charclass)
2926 class_bit = _ISwbit (cnt);
2927 class256_bit = cnt <= 11 ? _ISbit (cnt) : 0;
2928 free (now->val.str.startmb);
2929 goto read_charclass;
2931 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
2932 if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2933 break;
2934 if (cnt < ctype->map_collection_nr)
2936 mapidx = cnt;
2937 free (now->val.str.startmb);
2938 goto read_mapping;
2940 #ifdef PREDEFINED_CLASSES
2941 if (strcmp (now->val.str.startmb, "special1") == 0)
2943 class_bit = _ISwspecial1;
2944 free (now->val.str.startmb);
2945 goto read_charclass;
2947 if (strcmp (now->val.str.startmb, "special2") == 0)
2949 class_bit = _ISwspecial2;
2950 free (now->val.str.startmb);
2951 goto read_charclass;
2953 if (strcmp (now->val.str.startmb, "special3") == 0)
2955 class_bit = _ISwspecial3;
2956 free (now->val.str.startmb);
2957 goto read_charclass;
2959 if (strcmp (now->val.str.startmb, "tosymmetric") == 0)
2961 mapidx = 2;
2962 goto read_mapping;
2964 #endif
2965 break;
2967 case tok_end:
2968 /* Next we assume `LC_CTYPE'. */
2969 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2970 if (now->tok == tok_eof)
2971 break;
2972 if (now->tok == tok_eol)
2973 lr_error (ldfile, _("%s: incomplete `END' line"),
2974 "LC_CTYPE");
2975 else if (now->tok != tok_lc_ctype)
2976 lr_error (ldfile, _("\
2977 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2978 lr_ignore_rest (ldfile, now->tok == tok_lc_ctype);
2979 return;
2981 default:
2982 err_label:
2983 if (now->tok != tok_eof)
2984 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2987 /* Prepare for the next round. */
2988 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2989 nowtok = now->tok;
2992 /* When we come here we reached the end of the file. */
2993 lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
2997 static void
2998 set_class_defaults (struct locale_ctype_t *ctype,
2999 const struct charmap_t *charmap,
3000 struct repertoire_t *repertoire)
3002 size_t cnt;
3004 /* These function defines the default values for the classes and conversions
3005 according to POSIX.2 2.5.2.1.
3006 It may seem that the order of these if-blocks is arbitrary but it is NOT.
3007 Don't move them unless you know what you do! */
3009 auto void set_default (int bitpos, int from, int to);
3011 void set_default (int bitpos, int from, int to)
3013 char tmp[2];
3014 int ch;
3015 int bit = _ISbit (bitpos);
3016 int bitw = _ISwbit (bitpos);
3017 /* Define string. */
3018 strcpy (tmp, "?");
3020 for (ch = from; ch <= to; ++ch)
3022 struct charseq *seq;
3023 tmp[0] = ch;
3025 seq = charmap_find_value (charmap, tmp, 1);
3026 if (seq == NULL)
3028 char buf[10];
3029 sprintf (buf, "U%08X", ch);
3030 seq = charmap_find_value (charmap, buf, 9);
3032 if (seq == NULL)
3034 if (!be_quiet)
3035 WITH_CUR_LOCALE (error (0, 0, _("\
3036 %s: character `%s' not defined in charmap while needed as default value"),
3037 "LC_CTYPE", tmp));
3039 else if (seq->nbytes != 1)
3040 WITH_CUR_LOCALE (error (0, 0, _("\
3041 %s: character `%s' in charmap not representable with one byte"),
3042 "LC_CTYPE", tmp));
3043 else
3044 ctype->class256_collection[seq->bytes[0]] |= bit;
3046 /* No need to search here, the ASCII value is also the Unicode
3047 value. */
3048 ELEM (ctype, class_collection, , ch) |= bitw;
3052 /* Set default values if keyword was not present. */
3053 if ((ctype->class_done & BITw (tok_upper)) == 0)
3054 /* "If this keyword [lower] is not specified, the lowercase letters
3055 `A' through `Z', ..., shall automatically belong to this class,
3056 with implementation defined character values." [P1003.2, 2.5.2.1] */
3057 set_default (BITPOS (tok_upper), 'A', 'Z');
3059 if ((ctype->class_done & BITw (tok_lower)) == 0)
3060 /* "If this keyword [lower] is not specified, the lowercase letters
3061 `a' through `z', ..., shall automatically belong to this class,
3062 with implementation defined character values." [P1003.2, 2.5.2.1] */
3063 set_default (BITPOS (tok_lower), 'a', 'z');
3065 if ((ctype->class_done & BITw (tok_alpha)) == 0)
3067 /* Table 2-6 in P1003.2 says that characters in class `upper' or
3068 class `lower' *must* be in class `alpha'. */
3069 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
3070 unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower);
3072 for (cnt = 0; cnt < 256; ++cnt)
3073 if ((ctype->class256_collection[cnt] & mask) != 0)
3074 ctype->class256_collection[cnt] |= BIT (tok_alpha);
3076 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3077 if ((ctype->class_collection[cnt] & maskw) != 0)
3078 ctype->class_collection[cnt] |= BITw (tok_alpha);
3081 if ((ctype->class_done & BITw (tok_digit)) == 0)
3082 /* "If this keyword [digit] is not specified, the digits `0' through
3083 `9', ..., shall automatically belong to this class, with
3084 implementation-defined character values." [P1003.2, 2.5.2.1] */
3085 set_default (BITPOS (tok_digit), '0', '9');
3087 /* "Only characters specified for the `alpha' and `digit' keyword
3088 shall be specified. Characters specified for the keyword `alpha'
3089 and `digit' are automatically included in this class. */
3091 unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
3092 unsigned long int maskw = BITw (tok_alpha) | BITw (tok_digit);
3094 for (cnt = 0; cnt < 256; ++cnt)
3095 if ((ctype->class256_collection[cnt] & mask) != 0)
3096 ctype->class256_collection[cnt] |= BIT (tok_alnum);
3098 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3099 if ((ctype->class_collection[cnt] & maskw) != 0)
3100 ctype->class_collection[cnt] |= BITw (tok_alnum);
3103 if ((ctype->class_done & BITw (tok_space)) == 0)
3104 /* "If this keyword [space] is not specified, the characters <space>,
3105 <form-feed>, <newline>, <carriage-return>, <tab>, and
3106 <vertical-tab>, ..., shall automatically belong to this class,
3107 with implementation-defined character values." [P1003.2, 2.5.2.1] */
3109 struct charseq *seq;
3111 seq = charmap_find_value (charmap, "space", 5);
3112 if (seq == NULL)
3113 seq = charmap_find_value (charmap, "SP", 2);
3114 if (seq == NULL)
3115 seq = charmap_find_value (charmap, "U00000020", 9);
3116 if (seq == NULL)
3118 if (!be_quiet)
3119 WITH_CUR_LOCALE (error (0, 0, _("\
3120 %s: character `%s' not defined while needed as default value"),
3121 "LC_CTYPE", "<space>"));
3123 else if (seq->nbytes != 1)
3124 WITH_CUR_LOCALE (error (0, 0, _("\
3125 %s: character `%s' in charmap not representable with one byte"),
3126 "LC_CTYPE", "<space>"));
3127 else
3128 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3130 /* No need to search. */
3131 ELEM (ctype, class_collection, , L' ') |= BITw (tok_space);
3133 seq = charmap_find_value (charmap, "form-feed", 9);
3134 if (seq == NULL)
3135 seq = charmap_find_value (charmap, "U0000000C", 9);
3136 if (seq == NULL)
3138 if (!be_quiet)
3139 WITH_CUR_LOCALE (error (0, 0, _("\
3140 %s: character `%s' not defined while needed as default value"),
3141 "LC_CTYPE", "<form-feed>"));
3143 else if (seq->nbytes != 1)
3144 WITH_CUR_LOCALE (error (0, 0, _("\
3145 %s: character `%s' in charmap not representable with one byte"),
3146 "LC_CTYPE", "<form-feed>"));
3147 else
3148 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3150 /* No need to search. */
3151 ELEM (ctype, class_collection, , L'\f') |= BITw (tok_space);
3154 seq = charmap_find_value (charmap, "newline", 7);
3155 if (seq == NULL)
3156 seq = charmap_find_value (charmap, "U0000000A", 9);
3157 if (seq == NULL)
3159 if (!be_quiet)
3160 WITH_CUR_LOCALE (error (0, 0, _("\
3161 character `%s' not defined while needed as default value"),
3162 "<newline>"));
3164 else if (seq->nbytes != 1)
3165 WITH_CUR_LOCALE (error (0, 0, _("\
3166 %s: character `%s' in charmap not representable with one byte"),
3167 "LC_CTYPE", "<newline>"));
3168 else
3169 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3171 /* No need to search. */
3172 ELEM (ctype, class_collection, , L'\n') |= BITw (tok_space);
3175 seq = charmap_find_value (charmap, "carriage-return", 15);
3176 if (seq == NULL)
3177 seq = charmap_find_value (charmap, "U0000000D", 9);
3178 if (seq == NULL)
3180 if (!be_quiet)
3181 WITH_CUR_LOCALE (error (0, 0, _("\
3182 %s: character `%s' not defined while needed as default value"),
3183 "LC_CTYPE", "<carriage-return>"));
3185 else if (seq->nbytes != 1)
3186 WITH_CUR_LOCALE (error (0, 0, _("\
3187 %s: character `%s' in charmap not representable with one byte"),
3188 "LC_CTYPE", "<carriage-return>"));
3189 else
3190 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3192 /* No need to search. */
3193 ELEM (ctype, class_collection, , L'\r') |= BITw (tok_space);
3196 seq = charmap_find_value (charmap, "tab", 3);
3197 if (seq == NULL)
3198 seq = charmap_find_value (charmap, "U00000009", 9);
3199 if (seq == NULL)
3201 if (!be_quiet)
3202 WITH_CUR_LOCALE (error (0, 0, _("\
3203 %s: character `%s' not defined while needed as default value"),
3204 "LC_CTYPE", "<tab>"));
3206 else if (seq->nbytes != 1)
3207 WITH_CUR_LOCALE (error (0, 0, _("\
3208 %s: character `%s' in charmap not representable with one byte"),
3209 "LC_CTYPE", "<tab>"));
3210 else
3211 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3213 /* No need to search. */
3214 ELEM (ctype, class_collection, , L'\t') |= BITw (tok_space);
3217 seq = charmap_find_value (charmap, "vertical-tab", 12);
3218 if (seq == NULL)
3219 seq = charmap_find_value (charmap, "U0000000B", 9);
3220 if (seq == NULL)
3222 if (!be_quiet)
3223 WITH_CUR_LOCALE (error (0, 0, _("\
3224 %s: character `%s' not defined while needed as default value"),
3225 "LC_CTYPE", "<vertical-tab>"));
3227 else if (seq->nbytes != 1)
3228 WITH_CUR_LOCALE (error (0, 0, _("\
3229 %s: character `%s' in charmap not representable with one byte"),
3230 "LC_CTYPE", "<vertical-tab>"));
3231 else
3232 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3234 /* No need to search. */
3235 ELEM (ctype, class_collection, , L'\v') |= BITw (tok_space);
3238 if ((ctype->class_done & BITw (tok_xdigit)) == 0)
3239 /* "If this keyword is not specified, the digits `0' to `9', the
3240 uppercase letters `A' through `F', and the lowercase letters `a'
3241 through `f', ..., shell automatically belong to this class, with
3242 implementation defined character values." [P1003.2, 2.5.2.1] */
3244 set_default (BITPOS (tok_xdigit), '0', '9');
3245 set_default (BITPOS (tok_xdigit), 'A', 'F');
3246 set_default (BITPOS (tok_xdigit), 'a', 'f');
3249 if ((ctype->class_done & BITw (tok_blank)) == 0)
3250 /* "If this keyword [blank] is unspecified, the characters <space> and
3251 <tab> shall belong to this character class." [P1003.2, 2.5.2.1] */
3253 struct charseq *seq;
3255 seq = charmap_find_value (charmap, "space", 5);
3256 if (seq == NULL)
3257 seq = charmap_find_value (charmap, "SP", 2);
3258 if (seq == NULL)
3259 seq = charmap_find_value (charmap, "U00000020", 9);
3260 if (seq == NULL)
3262 if (!be_quiet)
3263 WITH_CUR_LOCALE (error (0, 0, _("\
3264 %s: character `%s' not defined while needed as default value"),
3265 "LC_CTYPE", "<space>"));
3267 else if (seq->nbytes != 1)
3268 WITH_CUR_LOCALE (error (0, 0, _("\
3269 %s: character `%s' in charmap not representable with one byte"),
3270 "LC_CTYPE", "<space>"));
3271 else
3272 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
3274 /* No need to search. */
3275 ELEM (ctype, class_collection, , L' ') |= BITw (tok_blank);
3278 seq = charmap_find_value (charmap, "tab", 3);
3279 if (seq == NULL)
3280 seq = charmap_find_value (charmap, "U00000009", 9);
3281 if (seq == NULL)
3283 if (!be_quiet)
3284 WITH_CUR_LOCALE (error (0, 0, _("\
3285 %s: character `%s' not defined while needed as default value"),
3286 "LC_CTYPE", "<tab>"));
3288 else if (seq->nbytes != 1)
3289 WITH_CUR_LOCALE (error (0, 0, _("\
3290 %s: character `%s' in charmap not representable with one byte"),
3291 "LC_CTYPE", "<tab>"));
3292 else
3293 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
3295 /* No need to search. */
3296 ELEM (ctype, class_collection, , L'\t') |= BITw (tok_blank);
3299 if ((ctype->class_done & BITw (tok_graph)) == 0)
3300 /* "If this keyword [graph] is not specified, characters specified for
3301 the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
3302 shall belong to this character class." [P1003.2, 2.5.2.1] */
3304 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
3305 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
3306 unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
3307 BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
3308 BITw (tok_punct);
3309 size_t cnt;
3311 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3312 if ((ctype->class_collection[cnt] & maskw) != 0)
3313 ctype->class_collection[cnt] |= BITw (tok_graph);
3315 for (cnt = 0; cnt < 256; ++cnt)
3316 if ((ctype->class256_collection[cnt] & mask) != 0)
3317 ctype->class256_collection[cnt] |= BIT (tok_graph);
3320 if ((ctype->class_done & BITw (tok_print)) == 0)
3321 /* "If this keyword [print] is not provided, characters specified for
3322 the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
3323 and the <space> character shall belong to this character class."
3324 [P1003.2, 2.5.2.1] */
3326 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
3327 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
3328 unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
3329 BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
3330 BITw (tok_punct);
3331 size_t cnt;
3332 struct charseq *seq;
3334 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3335 if ((ctype->class_collection[cnt] & maskw) != 0)
3336 ctype->class_collection[cnt] |= BITw (tok_print);
3338 for (cnt = 0; cnt < 256; ++cnt)
3339 if ((ctype->class256_collection[cnt] & mask) != 0)
3340 ctype->class256_collection[cnt] |= BIT (tok_print);
3343 seq = charmap_find_value (charmap, "space", 5);
3344 if (seq == NULL)
3345 seq = charmap_find_value (charmap, "SP", 2);
3346 if (seq == NULL)
3347 seq = charmap_find_value (charmap, "U00000020", 9);
3348 if (seq == NULL)
3350 if (!be_quiet)
3351 WITH_CUR_LOCALE (error (0, 0, _("\
3352 %s: character `%s' not defined while needed as default value"),
3353 "LC_CTYPE", "<space>"));
3355 else if (seq->nbytes != 1)
3356 WITH_CUR_LOCALE (error (0, 0, _("\
3357 %s: character `%s' in charmap not representable with one byte"),
3358 "LC_CTYPE", "<space>"));
3359 else
3360 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_print);
3362 /* No need to search. */
3363 ELEM (ctype, class_collection, , L' ') |= BITw (tok_print);
3366 if (ctype->tomap_done[0] == 0)
3367 /* "If this keyword [toupper] is not specified, the lowercase letters
3368 `a' through `z', and their corresponding uppercase letters `A' to
3369 `Z', ..., shall automatically be included, with implementation-
3370 defined character values." [P1003.2, 2.5.2.1] */
3372 char tmp[4];
3373 int ch;
3375 strcpy (tmp, "<?>");
3377 for (ch = 'a'; ch <= 'z'; ++ch)
3379 struct charseq *seq_from, *seq_to;
3381 tmp[1] = (char) ch;
3383 seq_from = charmap_find_value (charmap, &tmp[1], 1);
3384 if (seq_from == NULL)
3386 char buf[10];
3387 sprintf (buf, "U%08X", ch);
3388 seq_from = charmap_find_value (charmap, buf, 9);
3390 if (seq_from == NULL)
3392 if (!be_quiet)
3393 WITH_CUR_LOCALE (error (0, 0, _("\
3394 %s: character `%s' not defined while needed as default value"),
3395 "LC_CTYPE", tmp));
3397 else if (seq_from->nbytes != 1)
3399 if (!be_quiet)
3400 WITH_CUR_LOCALE (error (0, 0, _("\
3401 %s: character `%s' needed as default value not representable with one byte"),
3402 "LC_CTYPE", tmp));
3404 else
3406 /* This conversion is implementation defined. */
3407 tmp[1] = (char) (ch + ('A' - 'a'));
3408 seq_to = charmap_find_value (charmap, &tmp[1], 1);
3409 if (seq_to == NULL)
3411 char buf[10];
3412 sprintf (buf, "U%08X", ch + ('A' - 'a'));
3413 seq_to = charmap_find_value (charmap, buf, 9);
3415 if (seq_to == NULL)
3417 if (!be_quiet)
3418 WITH_CUR_LOCALE (error (0, 0, _("\
3419 %s: character `%s' not defined while needed as default value"),
3420 "LC_CTYPE", tmp));
3422 else if (seq_to->nbytes != 1)
3424 if (!be_quiet)
3425 WITH_CUR_LOCALE (error (0, 0, _("\
3426 %s: character `%s' needed as default value not representable with one byte"),
3427 "LC_CTYPE", tmp));
3429 else
3430 /* The index [0] is determined by the order of the
3431 `ctype_map_newP' calls in `ctype_startup'. */
3432 ctype->map256_collection[0][seq_from->bytes[0]]
3433 = seq_to->bytes[0];
3436 /* No need to search. */
3437 ELEM (ctype, map_collection, [0], ch) = ch + ('A' - 'a');
3441 if (ctype->tomap_done[1] == 0)
3442 /* "If this keyword [tolower] is not specified, the mapping shall be
3443 the reverse mapping of the one specified to `toupper'." [P1003.2] */
3445 for (cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
3446 if (ctype->map_collection[0][cnt] != 0)
3447 ELEM (ctype, map_collection, [1],
3448 ctype->map_collection[0][cnt])
3449 = ctype->charnames[cnt];
3451 for (cnt = 0; cnt < 256; ++cnt)
3452 if (ctype->map256_collection[0][cnt] != 0)
3453 ctype->map256_collection[1][ctype->map256_collection[0][cnt]] = cnt;
3456 if (ctype->outdigits_act != 10)
3458 if (ctype->outdigits_act != 0)
3459 WITH_CUR_LOCALE (error (0, 0, _("\
3460 %s: field `%s' does not contain exactly ten entries"),
3461 "LC_CTYPE", "outdigit"));
3463 for (cnt = ctype->outdigits_act; cnt < 10; ++cnt)
3465 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3466 digits + cnt, 1);
3468 if (ctype->mboutdigits[cnt] == NULL)
3469 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3470 longnames[cnt],
3471 strlen (longnames[cnt]));
3473 if (ctype->mboutdigits[cnt] == NULL)
3474 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3475 uninames[cnt], 9);
3477 if (ctype->mboutdigits[cnt] == NULL)
3479 /* Provide a replacement. */
3480 WITH_CUR_LOCALE (error (0, 0, _("\
3481 no output digits defined and none of the standard names in the charmap")));
3483 ctype->mboutdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
3484 sizeof (struct charseq)
3485 + 1);
3487 /* This is better than nothing. */
3488 ctype->mboutdigits[cnt]->bytes[0] = digits[cnt];
3489 ctype->mboutdigits[cnt]->nbytes = 1;
3492 ctype->wcoutdigits[cnt] = L'0' + cnt;
3495 ctype->outdigits_act = 10;
3500 /* Construction of sparse 3-level tables.
3501 See wchar-lookup.h for their structure and the meaning of p and q. */
3503 struct wctype_table
3505 /* Parameters. */
3506 unsigned int p;
3507 unsigned int q;
3508 /* Working representation. */
3509 size_t level1_alloc;
3510 size_t level1_size;
3511 uint32_t *level1;
3512 size_t level2_alloc;
3513 size_t level2_size;
3514 uint32_t *level2;
3515 size_t level3_alloc;
3516 size_t level3_size;
3517 uint32_t *level3;
3518 /* Compressed representation. */
3519 size_t result_size;
3520 char *result;
3523 /* Initialize. Assumes t->p and t->q have already been set. */
3524 static inline void
3525 wctype_table_init (struct wctype_table *t)
3527 t->level1 = NULL;
3528 t->level1_alloc = t->level1_size = 0;
3529 t->level2 = NULL;
3530 t->level2_alloc = t->level2_size = 0;
3531 t->level3 = NULL;
3532 t->level3_alloc = t->level3_size = 0;
3535 /* Retrieve an entry. */
3536 static inline int
3537 wctype_table_get (struct wctype_table *t, uint32_t wc)
3539 uint32_t index1 = wc >> (t->q + t->p + 5);
3540 if (index1 < t->level1_size)
3542 uint32_t lookup1 = t->level1[index1];
3543 if (lookup1 != EMPTY)
3545 uint32_t index2 = ((wc >> (t->p + 5)) & ((1 << t->q) - 1))
3546 + (lookup1 << t->q);
3547 uint32_t lookup2 = t->level2[index2];
3548 if (lookup2 != EMPTY)
3550 uint32_t index3 = ((wc >> 5) & ((1 << t->p) - 1))
3551 + (lookup2 << t->p);
3552 uint32_t lookup3 = t->level3[index3];
3553 uint32_t index4 = wc & 0x1f;
3555 return (lookup3 >> index4) & 1;
3559 return 0;
3562 /* Add one entry. */
3563 static void
3564 wctype_table_add (struct wctype_table *t, uint32_t wc)
3566 uint32_t index1 = wc >> (t->q + t->p + 5);
3567 uint32_t index2 = (wc >> (t->p + 5)) & ((1 << t->q) - 1);
3568 uint32_t index3 = (wc >> 5) & ((1 << t->p) - 1);
3569 uint32_t index4 = wc & 0x1f;
3570 size_t i, i1, i2;
3572 if (index1 >= t->level1_size)
3574 if (index1 >= t->level1_alloc)
3576 size_t alloc = 2 * t->level1_alloc;
3577 if (alloc <= index1)
3578 alloc = index1 + 1;
3579 t->level1 = (uint32_t *) xrealloc ((char *) t->level1,
3580 alloc * sizeof (uint32_t));
3581 t->level1_alloc = alloc;
3583 while (index1 >= t->level1_size)
3584 t->level1[t->level1_size++] = EMPTY;
3587 if (t->level1[index1] == EMPTY)
3589 if (t->level2_size == t->level2_alloc)
3591 size_t alloc = 2 * t->level2_alloc + 1;
3592 t->level2 = (uint32_t *) xrealloc ((char *) t->level2,
3593 (alloc << t->q) * sizeof (uint32_t));
3594 t->level2_alloc = alloc;
3596 i1 = t->level2_size << t->q;
3597 i2 = (t->level2_size + 1) << t->q;
3598 for (i = i1; i < i2; i++)
3599 t->level2[i] = EMPTY;
3600 t->level1[index1] = t->level2_size++;
3603 index2 += t->level1[index1] << t->q;
3605 if (t->level2[index2] == EMPTY)
3607 if (t->level3_size == t->level3_alloc)
3609 size_t alloc = 2 * t->level3_alloc + 1;
3610 t->level3 = (uint32_t *) xrealloc ((char *) t->level3,
3611 (alloc << t->p) * sizeof (uint32_t));
3612 t->level3_alloc = alloc;
3614 i1 = t->level3_size << t->p;
3615 i2 = (t->level3_size + 1) << t->p;
3616 for (i = i1; i < i2; i++)
3617 t->level3[i] = 0;
3618 t->level2[index2] = t->level3_size++;
3621 index3 += t->level2[index2] << t->p;
3623 t->level3[index3] |= (uint32_t)1 << index4;
3626 /* Finalize and shrink. */
3627 static void
3628 wctype_table_finalize (struct wctype_table *t)
3630 size_t i, j, k;
3631 uint32_t reorder3[t->level3_size];
3632 uint32_t reorder2[t->level2_size];
3633 uint32_t level1_offset, level2_offset, level3_offset;
3635 /* Uniquify level3 blocks. */
3636 k = 0;
3637 for (j = 0; j < t->level3_size; j++)
3639 for (i = 0; i < k; i++)
3640 if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p],
3641 (1 << t->p) * sizeof (uint32_t)) == 0)
3642 break;
3643 /* Relocate block j to block i. */
3644 reorder3[j] = i;
3645 if (i == k)
3647 if (i != j)
3648 memcpy (&t->level3[i << t->p], &t->level3[j << t->p],
3649 (1 << t->p) * sizeof (uint32_t));
3650 k++;
3653 t->level3_size = k;
3655 for (i = 0; i < (t->level2_size << t->q); i++)
3656 if (t->level2[i] != EMPTY)
3657 t->level2[i] = reorder3[t->level2[i]];
3659 /* Uniquify level2 blocks. */
3660 k = 0;
3661 for (j = 0; j < t->level2_size; j++)
3663 for (i = 0; i < k; i++)
3664 if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q],
3665 (1 << t->q) * sizeof (uint32_t)) == 0)
3666 break;
3667 /* Relocate block j to block i. */
3668 reorder2[j] = i;
3669 if (i == k)
3671 if (i != j)
3672 memcpy (&t->level2[i << t->q], &t->level2[j << t->q],
3673 (1 << t->q) * sizeof (uint32_t));
3674 k++;
3677 t->level2_size = k;
3679 for (i = 0; i < t->level1_size; i++)
3680 if (t->level1[i] != EMPTY)
3681 t->level1[i] = reorder2[t->level1[i]];
3683 /* Create and fill the resulting compressed representation. */
3684 t->result_size =
3685 5 * sizeof (uint32_t)
3686 + t->level1_size * sizeof (uint32_t)
3687 + (t->level2_size << t->q) * sizeof (uint32_t)
3688 + (t->level3_size << t->p) * sizeof (uint32_t);
3689 t->result = (char *) xmalloc (t->result_size);
3691 level1_offset =
3692 5 * sizeof (uint32_t);
3693 level2_offset =
3694 5 * sizeof (uint32_t)
3695 + t->level1_size * sizeof (uint32_t);
3696 level3_offset =
3697 5 * sizeof (uint32_t)
3698 + t->level1_size * sizeof (uint32_t)
3699 + (t->level2_size << t->q) * sizeof (uint32_t);
3701 ((uint32_t *) t->result)[0] = t->q + t->p + 5;
3702 ((uint32_t *) t->result)[1] = t->level1_size;
3703 ((uint32_t *) t->result)[2] = t->p + 5;
3704 ((uint32_t *) t->result)[3] = (1 << t->q) - 1;
3705 ((uint32_t *) t->result)[4] = (1 << t->p) - 1;
3707 for (i = 0; i < t->level1_size; i++)
3708 ((uint32_t *) (t->result + level1_offset))[i] =
3709 (t->level1[i] == EMPTY
3711 : (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset);
3713 for (i = 0; i < (t->level2_size << t->q); i++)
3714 ((uint32_t *) (t->result + level2_offset))[i] =
3715 (t->level2[i] == EMPTY
3717 : (t->level2[i] << t->p) * sizeof (uint32_t) + level3_offset);
3719 for (i = 0; i < (t->level3_size << t->p); i++)
3720 ((uint32_t *) (t->result + level3_offset))[i] = t->level3[i];
3722 if (t->level1_alloc > 0)
3723 free (t->level1);
3724 if (t->level2_alloc > 0)
3725 free (t->level2);
3726 if (t->level3_alloc > 0)
3727 free (t->level3);
3730 #define TABLE wcwidth_table
3731 #define ELEMENT uint8_t
3732 #define DEFAULT 0xff
3733 #include "3level.h"
3735 #define TABLE wctrans_table
3736 #define ELEMENT int32_t
3737 #define DEFAULT 0
3738 #define wctrans_table_add wctrans_table_add_internal
3739 #include "3level.h"
3740 #undef wctrans_table_add
3741 /* The wctrans_table must actually store the difference between the
3742 desired result and the argument. */
3743 static inline void
3744 wctrans_table_add (struct wctrans_table *t, uint32_t wc, uint32_t mapped_wc)
3746 wctrans_table_add_internal (t, wc, mapped_wc - wc);
3750 /* Flattens the included transliterations into a translit list.
3751 Inserts them in the list at `cursor', and returns the new cursor. */
3752 static struct translit_t **
3753 translit_flatten (struct locale_ctype_t *ctype,
3754 const struct charmap_t *charmap,
3755 struct translit_t **cursor)
3757 while (ctype->translit_include != NULL)
3759 const char *copy_locale = ctype->translit_include->copy_locale;
3760 const char *copy_repertoire = ctype->translit_include->copy_repertoire;
3761 struct localedef_t *other;
3763 /* Unchain the include statement. During the depth-first traversal
3764 we don't want to visit any locale more than once. */
3765 ctype->translit_include = ctype->translit_include->next;
3767 other = find_locale (LC_CTYPE, copy_locale, copy_repertoire, charmap);
3769 if (other == NULL)
3771 WITH_CUR_LOCALE (error (0, 0, _("\
3772 %s: transliteration data from locale `%s' not available"),
3773 "LC_CTYPE", copy_locale));
3775 else
3777 struct locale_ctype_t *other_ctype =
3778 other->categories[LC_CTYPE].ctype;
3780 cursor = translit_flatten (other_ctype, charmap, cursor);
3781 assert (other_ctype->translit_include == NULL);
3783 if (other_ctype->translit != NULL)
3785 /* Insert the other_ctype->translit list at *cursor. */
3786 struct translit_t *endp = other_ctype->translit;
3787 while (endp->next != NULL)
3788 endp = endp->next;
3790 endp->next = *cursor;
3791 *cursor = other_ctype->translit;
3793 /* Avoid any risk of circular lists. */
3794 other_ctype->translit = NULL;
3796 cursor = &endp->next;
3799 if (ctype->default_missing == NULL)
3800 ctype->default_missing = other_ctype->default_missing;
3804 return cursor;
3807 static void
3808 allocate_arrays (struct locale_ctype_t *ctype, const struct charmap_t *charmap,
3809 struct repertoire_t *repertoire)
3811 size_t idx, nr;
3812 const void *key;
3813 size_t len;
3814 void *vdata;
3815 void *curs;
3817 /* You wonder about this amount of memory? This is only because some
3818 users do not manage to address the array with unsigned values or
3819 data types with range >= 256. '\200' would result in the array
3820 index -128. To help these poor people we duplicate the entries for
3821 128 up to 255 below the entry for \0. */
3822 ctype->ctype_b = (char_class_t *) xcalloc (256 + 128, sizeof (char_class_t));
3823 ctype->ctype32_b = (char_class32_t *) xcalloc (256, sizeof (char_class32_t));
3824 ctype->class_b = (uint32_t **)
3825 xmalloc (ctype->nr_charclass * sizeof (uint32_t *));
3826 ctype->class_3level = (struct iovec *)
3827 xmalloc (ctype->nr_charclass * sizeof (struct iovec));
3829 /* This is the array accessed using the multibyte string elements. */
3830 for (idx = 0; idx < 256; ++idx)
3831 ctype->ctype_b[128 + idx] = ctype->class256_collection[idx];
3833 /* Mirror first 127 entries. We must take care that entry -1 is not
3834 mirrored because EOF == -1. */
3835 for (idx = 0; idx < 127; ++idx)
3836 ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
3838 /* The 32 bit array contains all characters < 0x100. */
3839 for (idx = 0; idx < ctype->class_collection_act; ++idx)
3840 if (ctype->charnames[idx] < 0x100)
3841 ctype->ctype32_b[ctype->charnames[idx]] = ctype->class_collection[idx];
3843 for (nr = 0; nr < ctype->nr_charclass; nr++)
3845 ctype->class_b[nr] = (uint32_t *) xcalloc (256 / 32, sizeof (uint32_t));
3847 for (idx = 0; idx < 256; ++idx)
3848 if (ctype->class256_collection[idx] & _ISbit (nr))
3849 ctype->class_b[nr][idx >> 5] |= (uint32_t)1 << (idx & 0x1f);
3852 for (nr = 0; nr < ctype->nr_charclass; nr++)
3854 struct wctype_table t;
3856 t.p = 4; /* or: 5 */
3857 t.q = 7; /* or: 6 */
3858 wctype_table_init (&t);
3860 for (idx = 0; idx < ctype->class_collection_act; ++idx)
3861 if (ctype->class_collection[idx] & _ISwbit (nr))
3862 wctype_table_add (&t, ctype->charnames[idx]);
3864 wctype_table_finalize (&t);
3866 if (verbose)
3867 WITH_CUR_LOCALE (fprintf (stderr, _("\
3868 %s: table for class \"%s\": %lu bytes\n"),
3869 "LC_CTYPE", ctype->classnames[nr],
3870 (unsigned long int) t.result_size));
3872 ctype->class_3level[nr].iov_base = t.result;
3873 ctype->class_3level[nr].iov_len = t.result_size;
3876 /* Room for table of mappings. */
3877 ctype->map_b = (uint32_t **) xmalloc (2 * sizeof (uint32_t *));
3878 ctype->map32_b = (uint32_t **) xmalloc (ctype->map_collection_nr
3879 * sizeof (uint32_t *));
3880 ctype->map_3level = (struct iovec *)
3881 xmalloc (ctype->map_collection_nr * sizeof (struct iovec));
3883 /* Fill in all mappings. */
3884 for (idx = 0; idx < 2; ++idx)
3886 unsigned int idx2;
3888 /* Allocate table. */
3889 ctype->map_b[idx] = (uint32_t *)
3890 xmalloc ((256 + 128) * sizeof (uint32_t));
3892 /* Copy values from collection. */
3893 for (idx2 = 0; idx2 < 256; ++idx2)
3894 ctype->map_b[idx][128 + idx2] = ctype->map256_collection[idx][idx2];
3896 /* Mirror first 127 entries. We must take care not to map entry
3897 -1 because EOF == -1. */
3898 for (idx2 = 0; idx2 < 127; ++idx2)
3899 ctype->map_b[idx][idx2] = ctype->map_b[idx][256 + idx2];
3901 /* EOF must map to EOF. */
3902 ctype->map_b[idx][127] = EOF;
3905 for (idx = 0; idx < ctype->map_collection_nr; ++idx)
3907 unsigned int idx2;
3909 /* Allocate table. */
3910 ctype->map32_b[idx] = (uint32_t *) xmalloc (256 * sizeof (uint32_t));
3912 /* Copy values from collection. Default is identity mapping. */
3913 for (idx2 = 0; idx2 < 256; ++idx2)
3914 ctype->map32_b[idx][idx2] =
3915 (ctype->map_collection[idx][idx2] != 0
3916 ? ctype->map_collection[idx][idx2]
3917 : idx2);
3920 for (nr = 0; nr < ctype->map_collection_nr; nr++)
3922 struct wctrans_table t;
3924 t.p = 7;
3925 t.q = 9;
3926 wctrans_table_init (&t);
3928 for (idx = 0; idx < ctype->map_collection_act[nr]; ++idx)
3929 if (ctype->map_collection[nr][idx] != 0)
3930 wctrans_table_add (&t, ctype->charnames[idx],
3931 ctype->map_collection[nr][idx]);
3933 wctrans_table_finalize (&t);
3935 if (verbose)
3936 WITH_CUR_LOCALE (fprintf (stderr, _("\
3937 %s: table for map \"%s\": %lu bytes\n"),
3938 "LC_CTYPE", ctype->mapnames[nr],
3939 (unsigned long int) t.result_size));
3941 ctype->map_3level[nr].iov_base = t.result;
3942 ctype->map_3level[nr].iov_len = t.result_size;
3945 /* Extra array for class and map names. */
3946 ctype->class_name_ptr = (uint32_t *) xmalloc (ctype->nr_charclass
3947 * sizeof (uint32_t));
3948 ctype->map_name_ptr = (uint32_t *) xmalloc (ctype->map_collection_nr
3949 * sizeof (uint32_t));
3951 ctype->class_offset = _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
3952 ctype->map_offset = ctype->class_offset + ctype->nr_charclass;
3954 /* Array for width information. Because the expected widths are very
3955 small (never larger than 2) we use only one single byte. This
3956 saves space.
3957 We put only printable characters in the table. wcwidth is specified
3958 to return -1 for non-printable characters. Doing the check here
3959 saves a run-time check.
3960 But we put L'\0' in the table. This again saves a run-time check. */
3962 struct wcwidth_table t;
3964 t.p = 7;
3965 t.q = 9;
3966 wcwidth_table_init (&t);
3968 /* First set all the printable characters of the character set to
3969 the default width. */
3970 curs = NULL;
3971 while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
3973 struct charseq *data = (struct charseq *) vdata;
3975 if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
3976 data->ucs4 = repertoire_find_value (ctype->repertoire,
3977 data->name, len);
3979 if (data->ucs4 != ILLEGAL_CHAR_VALUE)
3981 uint32_t *class_bits =
3982 find_idx (ctype, &ctype->class_collection, NULL,
3983 &ctype->class_collection_act, data->ucs4);
3985 if (class_bits != NULL && (*class_bits & BITw (tok_print)))
3986 wcwidth_table_add (&t, data->ucs4, charmap->width_default);
3990 /* Now add the explicitly specified widths. */
3991 if (charmap->width_rules != NULL)
3993 size_t cnt;
3995 for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
3997 unsigned char bytes[charmap->mb_cur_max];
3998 int nbytes = charmap->width_rules[cnt].from->nbytes;
4000 /* We have the range of character for which the width is
4001 specified described using byte sequences of the multibyte
4002 charset. We have to convert this to UCS4 now. And we
4003 cannot simply convert the beginning and the end of the
4004 sequence, we have to iterate over the byte sequence and
4005 convert it for every single character. */
4006 memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
4008 while (nbytes < charmap->width_rules[cnt].to->nbytes
4009 || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
4010 nbytes) <= 0)
4012 /* Find the UCS value for `bytes'. */
4013 int inner;
4014 uint32_t wch;
4015 struct charseq *seq =
4016 charmap_find_symbol (charmap, bytes, nbytes);
4018 if (seq == NULL)
4019 wch = ILLEGAL_CHAR_VALUE;
4020 else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
4021 wch = seq->ucs4;
4022 else
4023 wch = repertoire_find_value (ctype->repertoire, seq->name,
4024 strlen (seq->name));
4026 if (wch != ILLEGAL_CHAR_VALUE)
4028 /* Store the value. */
4029 uint32_t *class_bits =
4030 find_idx (ctype, &ctype->class_collection, NULL,
4031 &ctype->class_collection_act, wch);
4033 if (class_bits != NULL && (*class_bits & BITw (tok_print)))
4034 wcwidth_table_add (&t, wch,
4035 charmap->width_rules[cnt].width);
4038 /* "Increment" the bytes sequence. */
4039 inner = nbytes - 1;
4040 while (inner >= 0 && bytes[inner] == 0xff)
4041 --inner;
4043 if (inner < 0)
4045 /* We have to extend the byte sequence. */
4046 if (nbytes >= charmap->width_rules[cnt].to->nbytes)
4047 break;
4049 bytes[0] = 1;
4050 memset (&bytes[1], 0, nbytes);
4051 ++nbytes;
4053 else
4055 ++bytes[inner];
4056 while (++inner < nbytes)
4057 bytes[inner] = 0;
4063 /* Set the width of L'\0' to 0. */
4064 wcwidth_table_add (&t, 0, 0);
4066 wcwidth_table_finalize (&t);
4068 if (verbose)
4069 WITH_CUR_LOCALE (fprintf (stderr, _("%s: table for width: %lu bytes\n"),
4070 "LC_CTYPE", (unsigned long int) t.result_size));
4072 ctype->width.iov_base = t.result;
4073 ctype->width.iov_len = t.result_size;
4076 /* Set MB_CUR_MAX. */
4077 ctype->mb_cur_max = charmap->mb_cur_max;
4079 /* Now determine the table for the transliteration information.
4081 XXX It is not yet clear to me whether it is worth implementing a
4082 complicated algorithm which uses a hash table to locate the entries.
4083 For now I'll use a simple array which can be searching using binary
4084 search. */
4085 if (ctype->translit_include != NULL)
4086 /* Traverse the locales mentioned in the `include' statements in a
4087 depth-first way and fold in their transliteration information. */
4088 translit_flatten (ctype, charmap, &ctype->translit);
4090 if (ctype->translit != NULL)
4092 /* First count how many entries we have. This is the upper limit
4093 since some entries from the included files might be overwritten. */
4094 size_t number = 0;
4095 size_t cnt;
4096 struct translit_t *runp = ctype->translit;
4097 struct translit_t **sorted;
4098 size_t from_len, to_len;
4100 while (runp != NULL)
4102 ++number;
4103 runp = runp->next;
4106 /* Next we allocate an array large enough and fill in the values. */
4107 sorted = (struct translit_t **) alloca (number
4108 * sizeof (struct translit_t **));
4109 runp = ctype->translit;
4110 number = 0;
4113 /* Search for the place where to insert this string.
4114 XXX Better use a real sorting algorithm later. */
4115 size_t idx = 0;
4116 int replace = 0;
4118 while (idx < number)
4120 int res = wcscmp ((const wchar_t *) sorted[idx]->from,
4121 (const wchar_t *) runp->from);
4122 if (res == 0)
4124 replace = 1;
4125 break;
4127 if (res > 0)
4128 break;
4129 ++idx;
4132 if (replace)
4133 sorted[idx] = runp;
4134 else
4136 memmove (&sorted[idx + 1], &sorted[idx],
4137 (number - idx) * sizeof (struct translit_t *));
4138 sorted[idx] = runp;
4139 ++number;
4142 runp = runp->next;
4144 while (runp != NULL);
4146 /* The next step is putting all the possible transliteration
4147 strings in one memory block so that we can write it out.
4148 We need several different blocks:
4149 - index to the from-string array
4150 - from-string array
4151 - index to the to-string array
4152 - to-string array.
4154 from_len = to_len = 0;
4155 for (cnt = 0; cnt < number; ++cnt)
4157 struct translit_to_t *srunp;
4158 from_len += wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
4159 srunp = sorted[cnt]->to;
4160 while (srunp != NULL)
4162 to_len += wcslen ((const wchar_t *) srunp->str) + 1;
4163 srunp = srunp->next;
4165 /* Plus one for the extra NUL character marking the end of
4166 the list for the current entry. */
4167 ++to_len;
4170 /* We can allocate the arrays for the results. */
4171 ctype->translit_from_idx = xmalloc (number * sizeof (uint32_t));
4172 ctype->translit_from_tbl = xmalloc (from_len * sizeof (uint32_t));
4173 ctype->translit_to_idx = xmalloc (number * sizeof (uint32_t));
4174 ctype->translit_to_tbl = xmalloc (to_len * sizeof (uint32_t));
4176 from_len = 0;
4177 to_len = 0;
4178 for (cnt = 0; cnt < number; ++cnt)
4180 size_t len;
4181 struct translit_to_t *srunp;
4183 ctype->translit_from_idx[cnt] = from_len;
4184 ctype->translit_to_idx[cnt] = to_len;
4186 len = wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
4187 wmemcpy ((wchar_t *) &ctype->translit_from_tbl[from_len],
4188 (const wchar_t *) sorted[cnt]->from, len);
4189 from_len += len;
4191 ctype->translit_to_idx[cnt] = to_len;
4192 srunp = sorted[cnt]->to;
4193 while (srunp != NULL)
4195 len = wcslen ((const wchar_t *) srunp->str) + 1;
4196 wmemcpy ((wchar_t *) &ctype->translit_to_tbl[to_len],
4197 (const wchar_t *) srunp->str, len);
4198 to_len += len;
4199 srunp = srunp->next;
4201 ctype->translit_to_tbl[to_len++] = L'\0';
4204 /* Store the information about the length. */
4205 ctype->translit_idx_size = number;
4206 ctype->translit_from_tbl_size = from_len * sizeof (uint32_t);
4207 ctype->translit_to_tbl_size = to_len * sizeof (uint32_t);
4209 else
4211 /* Provide some dummy pointers since we have nothing to write out. */
4212 static uint32_t no_str = { 0 };
4214 ctype->translit_from_idx = &no_str;
4215 ctype->translit_from_tbl = &no_str;
4216 ctype->translit_to_tbl = &no_str;
4217 ctype->translit_idx_size = 0;
4218 ctype->translit_from_tbl_size = 0;
4219 ctype->translit_to_tbl_size = 0;