2.5-18.1
[glibc.git] / locale / programs / ld-ctype.c
blob40d5e4b17f375ebe61913c427b73e30c7fcc2f1e
1 /* Copyright (C) 1995-2006, 2007 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License version 2 as
7 published by the Free Software Foundation.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 #ifdef HAVE_CONFIG_H
19 # include <config.h>
20 #endif
22 #include <alloca.h>
23 #include <byteswap.h>
24 #include <endian.h>
25 #include <errno.h>
26 #include <limits.h>
27 #include <obstack.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <wchar.h>
31 #include <wctype.h>
32 #include <sys/uio.h>
34 #include "localedef.h"
35 #include "charmap.h"
36 #include "localeinfo.h"
37 #include "langinfo.h"
38 #include "linereader.h"
39 #include "locfile-token.h"
40 #include "locfile.h"
42 #include <assert.h>
45 #ifdef PREDEFINED_CLASSES
46 /* These are the extra bits not in wctype.h since these are not preallocated
47 classes. */
48 # define _ISwspecial1 (1 << 29)
49 # define _ISwspecial2 (1 << 30)
50 # define _ISwspecial3 (1 << 31)
51 #endif
54 /* The bit used for representing a special class. */
55 #define BITPOS(class) ((class) - tok_upper)
56 #define BIT(class) (_ISbit (BITPOS (class)))
57 #define BITw(class) (_ISwbit (BITPOS (class)))
59 #define ELEM(ctype, collection, idx, value) \
60 *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \
61 &ctype->collection##_act idx, value)
64 /* To be compatible with former implementations we for now restrict
65 the number of bits for character classes to 16. When compatibility
66 is not necessary anymore increase the number to 32. */
67 #define char_class_t uint16_t
68 #define char_class32_t uint32_t
71 /* Type to describe a transliteration action. We have a possibly
72 multiple character from-string and a set of multiple character
73 to-strings. All are 32bit values since this is what is used in
74 the gconv functions. */
75 struct translit_to_t
77 uint32_t *str;
79 struct translit_to_t *next;
82 struct translit_t
84 uint32_t *from;
86 const char *fname;
87 size_t lineno;
89 struct translit_to_t *to;
91 struct translit_t *next;
94 struct translit_ignore_t
96 uint32_t from;
97 uint32_t to;
98 uint32_t step;
100 const char *fname;
101 size_t lineno;
103 struct translit_ignore_t *next;
107 /* Type to describe a transliteration include statement. */
108 struct translit_include_t
110 const char *copy_locale;
111 const char *copy_repertoire;
113 struct translit_include_t *next;
117 /* Sparse table of uint32_t. */
118 #define TABLE idx_table
119 #define ELEMENT uint32_t
120 #define DEFAULT ((uint32_t) ~0)
121 #define NO_FINALIZE
122 #include "3level.h"
125 /* The real definition of the struct for the LC_CTYPE locale. */
126 struct locale_ctype_t
128 uint32_t *charnames;
129 size_t charnames_max;
130 size_t charnames_act;
131 /* An index lookup table, to speedup find_idx. */
132 struct idx_table charnames_idx;
134 struct repertoire_t *repertoire;
136 /* We will allow up to 8 * sizeof (uint32_t) character classes. */
137 #define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
138 size_t nr_charclass;
139 const char *classnames[MAX_NR_CHARCLASS];
140 uint32_t last_class_char;
141 uint32_t class256_collection[256];
142 uint32_t *class_collection;
143 size_t class_collection_max;
144 size_t class_collection_act;
145 uint32_t class_done;
146 uint32_t class_offset;
148 struct charseq **mbdigits;
149 size_t mbdigits_act;
150 size_t mbdigits_max;
151 uint32_t *wcdigits;
152 size_t wcdigits_act;
153 size_t wcdigits_max;
155 struct charseq *mboutdigits[10];
156 uint32_t wcoutdigits[10];
157 size_t outdigits_act;
159 /* If the following number ever turns out to be too small simply
160 increase it. But I doubt it will. --drepper@gnu */
161 #define MAX_NR_CHARMAP 16
162 const char *mapnames[MAX_NR_CHARMAP];
163 uint32_t *map_collection[MAX_NR_CHARMAP];
164 uint32_t map256_collection[2][256];
165 size_t map_collection_max[MAX_NR_CHARMAP];
166 size_t map_collection_act[MAX_NR_CHARMAP];
167 size_t map_collection_nr;
168 size_t last_map_idx;
169 int tomap_done[MAX_NR_CHARMAP];
170 uint32_t map_offset;
172 /* Transliteration information. */
173 struct translit_include_t *translit_include;
174 struct translit_t *translit;
175 struct translit_ignore_t *translit_ignore;
176 uint32_t ntranslit_ignore;
178 uint32_t *default_missing;
179 const char *default_missing_file;
180 size_t default_missing_lineno;
182 uint32_t to_nonascii;
184 /* The arrays for the binary representation. */
185 char_class_t *ctype_b;
186 char_class32_t *ctype32_b;
187 uint32_t **map_b;
188 uint32_t **map32_b;
189 uint32_t **class_b;
190 struct iovec *class_3level;
191 struct iovec *map_3level;
192 uint32_t *class_name_ptr;
193 uint32_t *map_name_ptr;
194 struct iovec width;
195 uint32_t mb_cur_max;
196 const char *codeset_name;
197 uint32_t *translit_from_idx;
198 uint32_t *translit_from_tbl;
199 uint32_t *translit_to_idx;
200 uint32_t *translit_to_tbl;
201 uint32_t translit_idx_size;
202 size_t translit_from_tbl_size;
203 size_t translit_to_tbl_size;
205 struct obstack mempool;
209 /* Marker for an empty slot. This has the value 0xFFFFFFFF, regardless
210 whether 'int' is 16 bit, 32 bit, or 64 bit. */
211 #define EMPTY ((uint32_t) ~0)
214 #define obstack_chunk_alloc xmalloc
215 #define obstack_chunk_free free
218 /* Prototypes for local functions. */
219 static void ctype_startup (struct linereader *lr, struct localedef_t *locale,
220 const struct charmap_t *charmap,
221 struct localedef_t *copy_locale,
222 int ignore_content);
223 static void ctype_class_new (struct linereader *lr,
224 struct locale_ctype_t *ctype, const char *name);
225 static void ctype_map_new (struct linereader *lr,
226 struct locale_ctype_t *ctype,
227 const char *name, const struct charmap_t *charmap);
228 static uint32_t *find_idx (struct locale_ctype_t *ctype, uint32_t **table,
229 size_t *max, size_t *act, unsigned int idx);
230 static void set_class_defaults (struct locale_ctype_t *ctype,
231 const struct charmap_t *charmap,
232 struct repertoire_t *repertoire);
233 static void allocate_arrays (struct locale_ctype_t *ctype,
234 const struct charmap_t *charmap,
235 struct repertoire_t *repertoire);
238 static const char *longnames[] =
240 "zero", "one", "two", "three", "four",
241 "five", "six", "seven", "eight", "nine"
243 static const char *uninames[] =
245 "U00000030", "U00000031", "U00000032", "U00000033", "U00000034",
246 "U00000035", "U00000036", "U00000037", "U00000038", "U00000039"
248 static const unsigned char digits[] = "0123456789";
251 static void
252 ctype_startup (struct linereader *lr, struct localedef_t *locale,
253 const struct charmap_t *charmap,
254 struct localedef_t *copy_locale, int ignore_content)
256 unsigned int cnt;
257 struct locale_ctype_t *ctype;
259 if (!ignore_content && locale->categories[LC_CTYPE].ctype == NULL)
261 if (copy_locale == NULL)
263 /* Allocate the needed room. */
264 locale->categories[LC_CTYPE].ctype = ctype =
265 (struct locale_ctype_t *) xcalloc (1,
266 sizeof (struct locale_ctype_t));
268 /* We have seen no names yet. */
269 ctype->charnames_max = charmap->mb_cur_max == 1 ? 256 : 512;
270 ctype->charnames =
271 (unsigned int *) xmalloc (ctype->charnames_max
272 * sizeof (unsigned int));
273 for (cnt = 0; cnt < 256; ++cnt)
274 ctype->charnames[cnt] = cnt;
275 ctype->charnames_act = 256;
276 idx_table_init (&ctype->charnames_idx);
278 /* Fill character class information. */
279 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
280 /* The order of the following instructions determines the bit
281 positions! */
282 ctype_class_new (lr, ctype, "upper");
283 ctype_class_new (lr, ctype, "lower");
284 ctype_class_new (lr, ctype, "alpha");
285 ctype_class_new (lr, ctype, "digit");
286 ctype_class_new (lr, ctype, "xdigit");
287 ctype_class_new (lr, ctype, "space");
288 ctype_class_new (lr, ctype, "print");
289 ctype_class_new (lr, ctype, "graph");
290 ctype_class_new (lr, ctype, "blank");
291 ctype_class_new (lr, ctype, "cntrl");
292 ctype_class_new (lr, ctype, "punct");
293 ctype_class_new (lr, ctype, "alnum");
294 #ifdef PREDEFINED_CLASSES
295 /* The following are extensions from ISO 14652. */
296 ctype_class_new (lr, ctype, "left_to_right");
297 ctype_class_new (lr, ctype, "right_to_left");
298 ctype_class_new (lr, ctype, "num_terminator");
299 ctype_class_new (lr, ctype, "num_separator");
300 ctype_class_new (lr, ctype, "segment_separator");
301 ctype_class_new (lr, ctype, "block_separator");
302 ctype_class_new (lr, ctype, "direction_control");
303 ctype_class_new (lr, ctype, "sym_swap_layout");
304 ctype_class_new (lr, ctype, "char_shape_selector");
305 ctype_class_new (lr, ctype, "num_shape_selector");
306 ctype_class_new (lr, ctype, "non_spacing");
307 ctype_class_new (lr, ctype, "non_spacing_level3");
308 ctype_class_new (lr, ctype, "normal_connect");
309 ctype_class_new (lr, ctype, "r_connect");
310 ctype_class_new (lr, ctype, "no_connect");
311 ctype_class_new (lr, ctype, "no_connect-space");
312 ctype_class_new (lr, ctype, "vowel_connect");
313 #endif
315 ctype->class_collection_max = charmap->mb_cur_max == 1 ? 256 : 512;
316 ctype->class_collection
317 = (uint32_t *) xcalloc (sizeof (unsigned long int),
318 ctype->class_collection_max);
319 ctype->class_collection_act = 256;
321 /* Fill character map information. */
322 ctype->last_map_idx = MAX_NR_CHARMAP;
323 ctype_map_new (lr, ctype, "toupper", charmap);
324 ctype_map_new (lr, ctype, "tolower", charmap);
325 #ifdef PREDEFINED_CLASSES
326 ctype_map_new (lr, ctype, "tosymmetric", charmap);
327 #endif
329 /* Fill first 256 entries in `toXXX' arrays. */
330 for (cnt = 0; cnt < 256; ++cnt)
332 ctype->map_collection[0][cnt] = cnt;
333 ctype->map_collection[1][cnt] = cnt;
334 #ifdef PREDEFINED_CLASSES
335 ctype->map_collection[2][cnt] = cnt;
336 #endif
337 ctype->map256_collection[0][cnt] = cnt;
338 ctype->map256_collection[1][cnt] = cnt;
341 if (enc_not_ascii_compatible)
342 ctype->to_nonascii = 1;
344 obstack_init (&ctype->mempool);
346 else
347 ctype = locale->categories[LC_CTYPE].ctype =
348 copy_locale->categories[LC_CTYPE].ctype;
353 void
354 ctype_finish (struct localedef_t *locale, const struct charmap_t *charmap)
356 /* See POSIX.2, table 2-6 for the meaning of the following table. */
357 #define NCLASS 12
358 static const struct
360 const char *name;
361 const char allow[NCLASS];
363 valid_table[NCLASS] =
365 /* The order is important. See token.h for more information.
366 M = Always, D = Default, - = Permitted, X = Mutually exclusive */
367 { "upper", "--MX-XDDXXX-" },
368 { "lower", "--MX-XDDXXX-" },
369 { "alpha", "---X-XDDXXX-" },
370 { "digit", "XXX--XDDXXX-" },
371 { "xdigit", "-----XDDXXX-" },
372 { "space", "XXXXX------X" },
373 { "print", "---------X--" },
374 { "graph", "---------X--" },
375 { "blank", "XXXXXM-----X" },
376 { "cntrl", "XXXXX-XX--XX" },
377 { "punct", "XXXXX-DD-X-X" },
378 { "alnum", "-----XDDXXX-" }
380 size_t cnt;
381 int cls1, cls2;
382 uint32_t space_value;
383 struct charseq *space_seq;
384 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
385 int warned;
386 const void *key;
387 size_t len;
388 void *vdata;
389 void *curs;
391 /* Now resolve copying and also handle completely missing definitions. */
392 if (ctype == NULL)
394 const char *repertoire_name;
396 /* First see whether we were supposed to copy. If yes, find the
397 actual definition. */
398 if (locale->copy_name[LC_CTYPE] != NULL)
400 /* Find the copying locale. This has to happen transitively since
401 the locale we are copying from might also copying another one. */
402 struct localedef_t *from = locale;
405 from = find_locale (LC_CTYPE, from->copy_name[LC_CTYPE],
406 from->repertoire_name, charmap);
407 while (from->categories[LC_CTYPE].ctype == NULL
408 && from->copy_name[LC_CTYPE] != NULL);
410 ctype = locale->categories[LC_CTYPE].ctype
411 = from->categories[LC_CTYPE].ctype;
414 /* If there is still no definition issue an warning and create an
415 empty one. */
416 if (ctype == NULL)
418 if (! be_quiet)
419 WITH_CUR_LOCALE (error (0, 0, _("\
420 No definition for %s category found"), "LC_CTYPE"));
421 ctype_startup (NULL, locale, charmap, NULL, 0);
422 ctype = locale->categories[LC_CTYPE].ctype;
425 /* Get the repertoire we have to use. */
426 repertoire_name = locale->repertoire_name ?: repertoire_global;
427 if (repertoire_name != NULL)
428 ctype->repertoire = repertoire_read (repertoire_name);
431 /* We need the name of the currently used 8-bit character set to
432 make correct conversion between this 8-bit representation and the
433 ISO 10646 character set used internally for wide characters. */
434 ctype->codeset_name = charmap->code_set_name;
435 if (ctype->codeset_name == NULL)
437 if (! be_quiet)
438 WITH_CUR_LOCALE (error (0, 0, _("\
439 No character set name specified in charmap")));
440 ctype->codeset_name = "//UNKNOWN//";
443 /* Set default value for classes not specified. */
444 set_class_defaults (ctype, charmap, ctype->repertoire);
446 /* Check according to table. */
447 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
449 uint32_t tmp = ctype->class_collection[cnt];
451 if (tmp != 0)
453 for (cls1 = 0; cls1 < NCLASS; ++cls1)
454 if ((tmp & _ISwbit (cls1)) != 0)
455 for (cls2 = 0; cls2 < NCLASS; ++cls2)
456 if (valid_table[cls1].allow[cls2] != '-')
458 int eq = (tmp & _ISwbit (cls2)) != 0;
459 switch (valid_table[cls1].allow[cls2])
461 case 'M':
462 if (!eq)
464 uint32_t value = ctype->charnames[cnt];
466 if (!be_quiet)
467 WITH_CUR_LOCALE (error (0, 0, _("\
468 character L'\\u%0*x' in class `%s' must be in class `%s'"),
469 value > 0xffff ? 8 : 4,
470 value,
471 valid_table[cls1].name,
472 valid_table[cls2].name));
474 break;
476 case 'X':
477 if (eq)
479 uint32_t value = ctype->charnames[cnt];
481 if (!be_quiet)
482 WITH_CUR_LOCALE (error (0, 0, _("\
483 character L'\\u%0*x' in class `%s' must not be in class `%s'"),
484 value > 0xffff ? 8 : 4,
485 value,
486 valid_table[cls1].name,
487 valid_table[cls2].name));
489 break;
491 case 'D':
492 ctype->class_collection[cnt] |= _ISwbit (cls2);
493 break;
495 default:
496 WITH_CUR_LOCALE (error (5, 0, _("\
497 internal error in %s, line %u"), __FUNCTION__, __LINE__));
503 for (cnt = 0; cnt < 256; ++cnt)
505 uint32_t tmp = ctype->class256_collection[cnt];
507 if (tmp != 0)
509 for (cls1 = 0; cls1 < NCLASS; ++cls1)
510 if ((tmp & _ISbit (cls1)) != 0)
511 for (cls2 = 0; cls2 < NCLASS; ++cls2)
512 if (valid_table[cls1].allow[cls2] != '-')
514 int eq = (tmp & _ISbit (cls2)) != 0;
515 switch (valid_table[cls1].allow[cls2])
517 case 'M':
518 if (!eq)
520 char buf[17];
522 snprintf (buf, sizeof buf, "\\%Zo", cnt);
524 if (!be_quiet)
525 WITH_CUR_LOCALE (error (0, 0, _("\
526 character '%s' in class `%s' must be in class `%s'"),
527 buf,
528 valid_table[cls1].name,
529 valid_table[cls2].name));
531 break;
533 case 'X':
534 if (eq)
536 char buf[17];
538 snprintf (buf, sizeof buf, "\\%Zo", cnt);
540 if (!be_quiet)
541 WITH_CUR_LOCALE (error (0, 0, _("\
542 character '%s' in class `%s' must not be in class `%s'"),
543 buf,
544 valid_table[cls1].name,
545 valid_table[cls2].name));
547 break;
549 case 'D':
550 ctype->class256_collection[cnt] |= _ISbit (cls2);
551 break;
553 default:
554 WITH_CUR_LOCALE (error (5, 0, _("\
555 internal error in %s, line %u"), __FUNCTION__, __LINE__));
561 /* ... and now test <SP> as a special case. */
562 space_value = 32;
563 if (((cnt = BITPOS (tok_space),
564 (ELEM (ctype, class_collection, , space_value)
565 & BITw (tok_space)) == 0)
566 || (cnt = BITPOS (tok_blank),
567 (ELEM (ctype, class_collection, , space_value)
568 & BITw (tok_blank)) == 0)))
570 if (!be_quiet)
571 WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"),
572 valid_table[cnt].name));
574 else if (((cnt = BITPOS (tok_punct),
575 (ELEM (ctype, class_collection, , space_value)
576 & BITw (tok_punct)) != 0)
577 || (cnt = BITPOS (tok_graph),
578 (ELEM (ctype, class_collection, , space_value)
579 & BITw (tok_graph))
580 != 0)))
582 if (!be_quiet)
583 WITH_CUR_LOCALE (error (0, 0, _("\
584 <SP> character must not be in class `%s'"),
585 valid_table[cnt].name));
587 else
588 ELEM (ctype, class_collection, , space_value) |= BITw (tok_print);
590 space_seq = charmap_find_value (charmap, "SP", 2);
591 if (space_seq == NULL)
592 space_seq = charmap_find_value (charmap, "space", 5);
593 if (space_seq == NULL)
594 space_seq = charmap_find_value (charmap, "U00000020", 9);
595 if (space_seq == NULL || space_seq->nbytes != 1)
597 if (!be_quiet)
598 WITH_CUR_LOCALE (error (0, 0, _("\
599 character <SP> not defined in character map")));
601 else if (((cnt = BITPOS (tok_space),
602 (ctype->class256_collection[space_seq->bytes[0]]
603 & BIT (tok_space)) == 0)
604 || (cnt = BITPOS (tok_blank),
605 (ctype->class256_collection[space_seq->bytes[0]]
606 & BIT (tok_blank)) == 0)))
608 if (!be_quiet)
609 WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"),
610 valid_table[cnt].name));
612 else if (((cnt = BITPOS (tok_punct),
613 (ctype->class256_collection[space_seq->bytes[0]]
614 & BIT (tok_punct)) != 0)
615 || (cnt = BITPOS (tok_graph),
616 (ctype->class256_collection[space_seq->bytes[0]]
617 & BIT (tok_graph)) != 0)))
619 if (!be_quiet)
620 WITH_CUR_LOCALE (error (0, 0, _("\
621 <SP> character must not be in class `%s'"),
622 valid_table[cnt].name));
624 else
625 ctype->class256_collection[space_seq->bytes[0]] |= BIT (tok_print);
627 /* Now that the tests are done make sure the name array contains all
628 characters which are handled in the WIDTH section of the
629 character set definition file. */
630 if (charmap->width_rules != NULL)
631 for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
633 unsigned char bytes[charmap->mb_cur_max];
634 int nbytes = charmap->width_rules[cnt].from->nbytes;
636 /* We have the range of character for which the width is
637 specified described using byte sequences of the multibyte
638 charset. We have to convert this to UCS4 now. And we
639 cannot simply convert the beginning and the end of the
640 sequence, we have to iterate over the byte sequence and
641 convert it for every single character. */
642 memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
644 while (nbytes < charmap->width_rules[cnt].to->nbytes
645 || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
646 nbytes) <= 0)
648 /* Find the UCS value for `bytes'. */
649 int inner;
650 uint32_t wch;
651 struct charseq *seq = charmap_find_symbol (charmap, bytes, nbytes);
653 if (seq == NULL)
654 wch = ILLEGAL_CHAR_VALUE;
655 else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
656 wch = seq->ucs4;
657 else
658 wch = repertoire_find_value (ctype->repertoire, seq->name,
659 strlen (seq->name));
661 if (wch != ILLEGAL_CHAR_VALUE)
662 /* We are only interested in the side-effects of the
663 `find_idx' call. It will add appropriate entries in
664 the name array if this is necessary. */
665 (void) find_idx (ctype, NULL, NULL, NULL, wch);
667 /* "Increment" the bytes sequence. */
668 inner = nbytes - 1;
669 while (inner >= 0 && bytes[inner] == 0xff)
670 --inner;
672 if (inner < 0)
674 /* We have to extend the byte sequence. */
675 if (nbytes >= charmap->width_rules[cnt].to->nbytes)
676 break;
678 bytes[0] = 1;
679 memset (&bytes[1], 0, nbytes);
680 ++nbytes;
682 else
684 ++bytes[inner];
685 while (++inner < nbytes)
686 bytes[inner] = 0;
691 /* Now set all the other characters of the character set to the
692 default width. */
693 curs = NULL;
694 while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
696 struct charseq *data = (struct charseq *) vdata;
698 if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
699 data->ucs4 = repertoire_find_value (ctype->repertoire,
700 data->name, len);
702 if (data->ucs4 != ILLEGAL_CHAR_VALUE)
703 (void) find_idx (ctype, NULL, NULL, NULL, data->ucs4);
706 /* There must be a multiple of 10 digits. */
707 if (ctype->mbdigits_act % 10 != 0)
709 assert (ctype->mbdigits_act == ctype->wcdigits_act);
710 ctype->wcdigits_act -= ctype->mbdigits_act % 10;
711 ctype->mbdigits_act -= ctype->mbdigits_act % 10;
712 WITH_CUR_LOCALE (error (0, 0, _("\
713 `digit' category has not entries in groups of ten")));
716 /* Check the input digits. There must be a multiple of ten available.
717 In each group it could be that one or the other character is missing.
718 In this case the whole group must be removed. */
719 cnt = 0;
720 while (cnt < ctype->mbdigits_act)
722 size_t inner;
723 for (inner = 0; inner < 10; ++inner)
724 if (ctype->mbdigits[cnt + inner] == NULL)
725 break;
727 if (inner == 10)
728 cnt += 10;
729 else
731 /* Remove the group. */
732 memmove (&ctype->mbdigits[cnt], &ctype->mbdigits[cnt + 10],
733 ((ctype->wcdigits_act - cnt - 10)
734 * sizeof (ctype->mbdigits[0])));
735 ctype->mbdigits_act -= 10;
739 /* If no input digits are given use the default. */
740 if (ctype->mbdigits_act == 0)
742 if (ctype->mbdigits_max == 0)
744 ctype->mbdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
745 10 * sizeof (struct charseq *));
746 ctype->mbdigits_max = 10;
749 for (cnt = 0; cnt < 10; ++cnt)
751 ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
752 digits + cnt, 1);
753 if (ctype->mbdigits[cnt] == NULL)
755 ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
756 longnames[cnt],
757 strlen (longnames[cnt]));
758 if (ctype->mbdigits[cnt] == NULL)
760 /* Hum, this ain't good. */
761 WITH_CUR_LOCALE (error (0, 0, _("\
762 no input digits defined and none of the standard names in the charmap")));
764 ctype->mbdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
765 sizeof (struct charseq) + 1);
767 /* This is better than nothing. */
768 ctype->mbdigits[cnt]->bytes[0] = digits[cnt];
769 ctype->mbdigits[cnt]->nbytes = 1;
774 ctype->mbdigits_act = 10;
777 /* Check the wide character input digits. There must be a multiple
778 of ten available. In each group it could be that one or the other
779 character is missing. In this case the whole group must be
780 removed. */
781 cnt = 0;
782 while (cnt < ctype->wcdigits_act)
784 size_t inner;
785 for (inner = 0; inner < 10; ++inner)
786 if (ctype->wcdigits[cnt + inner] == ILLEGAL_CHAR_VALUE)
787 break;
789 if (inner == 10)
790 cnt += 10;
791 else
793 /* Remove the group. */
794 memmove (&ctype->wcdigits[cnt], &ctype->wcdigits[cnt + 10],
795 ((ctype->wcdigits_act - cnt - 10)
796 * sizeof (ctype->wcdigits[0])));
797 ctype->wcdigits_act -= 10;
801 /* If no input digits are given use the default. */
802 if (ctype->wcdigits_act == 0)
804 if (ctype->wcdigits_max == 0)
806 ctype->wcdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
807 10 * sizeof (uint32_t));
808 ctype->wcdigits_max = 10;
811 for (cnt = 0; cnt < 10; ++cnt)
812 ctype->wcdigits[cnt] = L'0' + cnt;
814 ctype->mbdigits_act = 10;
817 /* Check the outdigits. */
818 warned = 0;
819 for (cnt = 0; cnt < 10; ++cnt)
820 if (ctype->mboutdigits[cnt] == NULL)
822 static struct charseq replace[2];
824 if (!warned)
826 WITH_CUR_LOCALE (error (0, 0, _("\
827 not all characters used in `outdigit' are available in the charmap")));
828 warned = 1;
831 replace[0].nbytes = 1;
832 replace[0].bytes[0] = '?';
833 replace[0].bytes[1] = '\0';
834 ctype->mboutdigits[cnt] = &replace[0];
837 warned = 0;
838 for (cnt = 0; cnt < 10; ++cnt)
839 if (ctype->wcoutdigits[cnt] == 0)
841 if (!warned)
843 WITH_CUR_LOCALE (error (0, 0, _("\
844 not all characters used in `outdigit' are available in the repertoire")));
845 warned = 1;
848 ctype->wcoutdigits[cnt] = L'?';
851 /* Sort the entries in the translit_ignore list. */
852 if (ctype->translit_ignore != NULL)
854 struct translit_ignore_t *firstp = ctype->translit_ignore;
855 struct translit_ignore_t *runp;
857 ctype->ntranslit_ignore = 1;
859 for (runp = firstp->next; runp != NULL; runp = runp->next)
861 struct translit_ignore_t *lastp = NULL;
862 struct translit_ignore_t *cmpp;
864 ++ctype->ntranslit_ignore;
866 for (cmpp = firstp; cmpp != NULL; lastp = cmpp, cmpp = cmpp->next)
867 if (runp->from < cmpp->from)
868 break;
870 runp->next = lastp;
871 if (lastp == NULL)
872 firstp = runp;
875 ctype->translit_ignore = firstp;
880 void
881 ctype_output (struct localedef_t *locale, const struct charmap_t *charmap,
882 const char *output_path)
884 static const char nulbytes[4] = { 0, 0, 0, 0 };
885 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
886 const size_t nelems = (_NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1)
887 + ctype->nr_charclass + ctype->map_collection_nr);
888 struct iovec *iov = alloca (sizeof *iov
889 * (2 + nelems + 2 * ctype->nr_charclass
890 + ctype->map_collection_nr + 4));
891 struct locale_file data;
892 uint32_t *idx = alloca (sizeof *idx * (nelems + 1));
893 uint32_t default_missing_len;
894 size_t elem, cnt, offset, total;
895 char *cp;
897 /* Now prepare the output: Find the sizes of the table we can use. */
898 allocate_arrays (ctype, charmap, ctype->repertoire);
900 data.magic = LIMAGIC (LC_CTYPE);
901 data.n = nelems;
902 iov[0].iov_base = (void *) &data;
903 iov[0].iov_len = sizeof (data);
905 iov[1].iov_base = (void *) idx;
906 iov[1].iov_len = nelems * sizeof (uint32_t);
908 idx[0] = iov[0].iov_len + iov[1].iov_len;
909 offset = 0;
911 for (elem = 0; elem < nelems; ++elem)
913 if (elem < _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1))
914 switch (elem)
916 #define CTYPE_EMPTY(name) \
917 case name: \
918 iov[2 + elem + offset].iov_base = NULL; \
919 iov[2 + elem + offset].iov_len = 0; \
920 idx[elem + 1] = idx[elem]; \
921 break
923 CTYPE_EMPTY(_NL_CTYPE_GAP1);
924 CTYPE_EMPTY(_NL_CTYPE_GAP2);
925 CTYPE_EMPTY(_NL_CTYPE_GAP3);
926 CTYPE_EMPTY(_NL_CTYPE_GAP4);
927 CTYPE_EMPTY(_NL_CTYPE_GAP5);
928 CTYPE_EMPTY(_NL_CTYPE_GAP6);
930 #define CTYPE_DATA(name, base, len) \
931 case _NL_ITEM_INDEX (name): \
932 iov[2 + elem + offset].iov_base = (base); \
933 iov[2 + elem + offset].iov_len = (len); \
934 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; \
935 break
937 CTYPE_DATA (_NL_CTYPE_CLASS,
938 ctype->ctype_b,
939 (256 + 128) * sizeof (char_class_t));
941 CTYPE_DATA (_NL_CTYPE_TOUPPER,
942 ctype->map_b[0],
943 (256 + 128) * sizeof (uint32_t));
944 CTYPE_DATA (_NL_CTYPE_TOLOWER,
945 ctype->map_b[1],
946 (256 + 128) * sizeof (uint32_t));
948 CTYPE_DATA (_NL_CTYPE_TOUPPER32,
949 ctype->map32_b[0],
950 256 * sizeof (uint32_t));
951 CTYPE_DATA (_NL_CTYPE_TOLOWER32,
952 ctype->map32_b[1],
953 256 * sizeof (uint32_t));
955 CTYPE_DATA (_NL_CTYPE_CLASS32,
956 ctype->ctype32_b,
957 256 * sizeof (char_class32_t));
959 CTYPE_DATA (_NL_CTYPE_CLASS_OFFSET,
960 &ctype->class_offset, sizeof (uint32_t));
962 CTYPE_DATA (_NL_CTYPE_MAP_OFFSET,
963 &ctype->map_offset, sizeof (uint32_t));
965 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TAB_SIZE,
966 &ctype->translit_idx_size, sizeof (uint32_t));
968 CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_IDX,
969 ctype->translit_from_idx,
970 ctype->translit_idx_size * sizeof (uint32_t));
972 CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_TBL,
973 ctype->translit_from_tbl,
974 ctype->translit_from_tbl_size);
976 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_IDX,
977 ctype->translit_to_idx,
978 ctype->translit_idx_size * sizeof (uint32_t));
980 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_TBL,
981 ctype->translit_to_tbl, ctype->translit_to_tbl_size);
983 case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
984 /* The class name array. */
985 total = 0;
986 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt, ++offset)
988 iov[2 + elem + offset].iov_base
989 = (void *) ctype->classnames[cnt];
990 iov[2 + elem + offset].iov_len
991 = strlen (ctype->classnames[cnt]) + 1;
992 total += iov[2 + elem + offset].iov_len;
994 iov[2 + elem + offset].iov_base = (void *) nulbytes;
995 iov[2 + elem + offset].iov_len = 4 - (total % 4);
996 total += 4 - (total % 4);
998 idx[elem + 1] = idx[elem] + total;
999 break;
1001 case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
1002 /* The class name array. */
1003 total = 0;
1004 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt, ++offset)
1006 iov[2 + elem + offset].iov_base
1007 = (void *) ctype->mapnames[cnt];
1008 iov[2 + elem + offset].iov_len
1009 = strlen (ctype->mapnames[cnt]) + 1;
1010 total += iov[2 + elem + offset].iov_len;
1012 iov[2 + elem + offset].iov_base = (void *) nulbytes;
1013 iov[2 + elem + offset].iov_len = 4 - (total % 4);
1014 total += 4 - (total % 4);
1016 idx[elem + 1] = idx[elem] + total;
1017 break;
1019 CTYPE_DATA (_NL_CTYPE_WIDTH,
1020 ctype->width.iov_base,
1021 ctype->width.iov_len);
1023 CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX,
1024 &ctype->mb_cur_max, sizeof (uint32_t));
1026 case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
1027 total = strlen (ctype->codeset_name) + 1;
1028 if (total % 4 == 0)
1029 iov[2 + elem + offset].iov_base = (char *) ctype->codeset_name;
1030 else
1032 iov[2 + elem + offset].iov_base = alloca ((total + 3) & ~3);
1033 memset (mempcpy (iov[2 + elem + offset].iov_base,
1034 ctype->codeset_name, total),
1035 '\0', 4 - (total & 3));
1036 total = (total + 3) & ~3;
1038 iov[2 + elem + offset].iov_len = total;
1039 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1040 break;
1043 CTYPE_DATA (_NL_CTYPE_MAP_TO_NONASCII,
1044 &ctype->to_nonascii, sizeof (uint32_t));
1046 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN):
1047 iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
1048 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1049 *(uint32_t *) iov[2 + elem + offset].iov_base =
1050 ctype->mbdigits_act / 10;
1051 idx[elem + 1] = idx[elem] + sizeof (uint32_t);
1052 break;
1054 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN):
1055 /* Align entries. */
1056 iov[2 + elem + offset].iov_base = (void *) nulbytes;
1057 iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1058 idx[elem] += iov[2 + elem + offset].iov_len;
1059 ++offset;
1061 iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
1062 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1063 *(uint32_t *) iov[2 + elem + offset].iov_base =
1064 ctype->wcdigits_act / 10;
1065 idx[elem + 1] = idx[elem] + sizeof (uint32_t);
1066 break;
1068 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB):
1069 /* Compute the length of all possible characters. For INDIGITS
1070 there might be more than one. We simply concatenate all of
1071 them with a NUL byte following. The NUL byte wouldn't be
1072 necessary but it makes it easier for the user. */
1073 total = 0;
1075 for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
1076 cnt < ctype->mbdigits_act; cnt += 10)
1077 total += ctype->mbdigits[cnt]->nbytes + 1;
1078 iov[2 + elem + offset].iov_base = (char *) alloca (total);
1079 iov[2 + elem + offset].iov_len = total;
1081 cp = iov[2 + elem + offset].iov_base;
1082 for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
1083 cnt < ctype->mbdigits_act; cnt += 10)
1085 cp = mempcpy (cp, ctype->mbdigits[cnt]->bytes,
1086 ctype->mbdigits[cnt]->nbytes);
1087 *cp++ = '\0';
1089 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1090 break;
1092 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB):
1093 /* Compute the length of all possible characters. For INDIGITS
1094 there might be more than one. We simply concatenate all of
1095 them with a NUL byte following. The NUL byte wouldn't be
1096 necessary but it makes it easier for the user. */
1097 cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB);
1098 total = ctype->mboutdigits[cnt]->nbytes + 1;
1099 iov[2 + elem + offset].iov_base = (char *) alloca (total);
1100 iov[2 + elem + offset].iov_len = total;
1102 *(char *) mempcpy (iov[2 + elem + offset].iov_base,
1103 ctype->mboutdigits[cnt]->bytes,
1104 ctype->mboutdigits[cnt]->nbytes) = '\0';
1105 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1106 break;
1108 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC):
1109 total = ctype->wcdigits_act / 10;
1111 iov[2 + elem + offset].iov_base =
1112 (uint32_t *) alloca (total * sizeof (uint32_t));
1113 iov[2 + elem + offset].iov_len = total * sizeof (uint32_t);
1115 for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC);
1116 cnt < ctype->wcdigits_act; cnt += 10)
1117 ((uint32_t *) iov[2 + elem + offset].iov_base)[cnt / 10]
1118 = ctype->wcdigits[cnt];
1119 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1120 break;
1122 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC):
1123 /* Align entries. */
1124 iov[2 + elem + offset].iov_base = (void *) nulbytes;
1125 iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1126 idx[elem] += iov[2 + elem + offset].iov_len;
1127 ++offset;
1128 /* FALLTRHOUGH */
1130 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT1_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC):
1131 cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC);
1132 iov[2 + elem + offset].iov_base = &ctype->wcoutdigits[cnt];
1133 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1134 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1135 break;
1137 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN):
1138 /* Align entries. */
1139 iov[2 + elem + offset].iov_base = (void *) nulbytes;
1140 iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1141 idx[elem] += iov[2 + elem + offset].iov_len;
1142 ++offset;
1144 default_missing_len = (ctype->default_missing
1145 ? wcslen ((wchar_t *)ctype->default_missing)
1146 : 0);
1147 iov[2 + elem + offset].iov_base = &default_missing_len;
1148 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1149 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1150 break;
1152 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING):
1153 iov[2 + elem + offset].iov_base =
1154 ctype->default_missing ?: (uint32_t *) L"";
1155 iov[2 + elem + offset].iov_len =
1156 wcslen (iov[2 + elem + offset].iov_base) * sizeof (uint32_t);
1157 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1158 break;
1160 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE_LEN):
1161 /* Align entries. */
1162 iov[2 + elem + offset].iov_base = (void *) nulbytes;
1163 iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1164 idx[elem] += iov[2 + elem + offset].iov_len;
1165 ++offset;
1167 iov[2 + elem + offset].iov_base = &ctype->ntranslit_ignore;
1168 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1169 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1170 break;
1172 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE):
1174 uint32_t *ranges = (uint32_t *) alloca (ctype->ntranslit_ignore
1175 * 3 * sizeof (uint32_t));
1176 struct translit_ignore_t *runp;
1178 iov[2 + elem + offset].iov_base = ranges;
1179 iov[2 + elem + offset].iov_len = (ctype->ntranslit_ignore
1180 * 3 * sizeof (uint32_t));
1182 for (runp = ctype->translit_ignore; runp != NULL;
1183 runp = runp->next)
1185 *ranges++ = runp->from;
1186 *ranges++ = runp->to;
1187 *ranges++ = runp->step;
1190 /* Remove the following line in case a new entry is added
1191 after _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN. */
1192 if (elem < nelems)
1193 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1194 break;
1196 default:
1197 assert (! "unknown CTYPE element");
1199 else
1201 /* Handle extra maps. */
1202 size_t nr = elem - _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
1203 if (nr < ctype->nr_charclass)
1205 iov[2 + elem + offset].iov_base = ctype->class_b[nr];
1206 iov[2 + elem + offset].iov_len = 256 / 32 * sizeof (uint32_t);
1207 idx[elem] += iov[2 + elem + offset].iov_len;
1208 ++offset;
1210 iov[2 + elem + offset] = ctype->class_3level[nr];
1212 else
1214 nr -= ctype->nr_charclass;
1215 assert (nr < ctype->map_collection_nr);
1216 iov[2 + elem + offset] = ctype->map_3level[nr];
1218 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1222 assert (2 + elem + offset == (nelems + 2 * ctype->nr_charclass
1223 + ctype->map_collection_nr + 4 + 2));
1225 write_locale_data (output_path, LC_CTYPE, "LC_CTYPE", 2 + elem + offset,
1226 iov);
1230 /* Local functions. */
1231 static void
1232 ctype_class_new (struct linereader *lr, struct locale_ctype_t *ctype,
1233 const char *name)
1235 size_t cnt;
1237 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
1238 if (strcmp (ctype->classnames[cnt], name) == 0)
1239 break;
1241 if (cnt < ctype->nr_charclass)
1243 lr_error (lr, _("character class `%s' already defined"), name);
1244 return;
1247 if (ctype->nr_charclass == MAX_NR_CHARCLASS)
1248 /* Exit code 2 is prescribed in P1003.2b. */
1249 WITH_CUR_LOCALE (error (2, 0, _("\
1250 implementation limit: no more than %Zd character classes allowed"),
1251 MAX_NR_CHARCLASS));
1253 ctype->classnames[ctype->nr_charclass++] = name;
1257 static void
1258 ctype_map_new (struct linereader *lr, struct locale_ctype_t *ctype,
1259 const char *name, const struct charmap_t *charmap)
1261 size_t max_chars = 0;
1262 size_t cnt;
1264 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
1266 if (strcmp (ctype->mapnames[cnt], name) == 0)
1267 break;
1269 if (max_chars < ctype->map_collection_max[cnt])
1270 max_chars = ctype->map_collection_max[cnt];
1273 if (cnt < ctype->map_collection_nr)
1275 lr_error (lr, _("character map `%s' already defined"), name);
1276 return;
1279 if (ctype->map_collection_nr == MAX_NR_CHARMAP)
1280 /* Exit code 2 is prescribed in P1003.2b. */
1281 WITH_CUR_LOCALE (error (2, 0, _("\
1282 implementation limit: no more than %d character maps allowed"),
1283 MAX_NR_CHARMAP));
1285 ctype->mapnames[cnt] = name;
1287 if (max_chars == 0)
1288 ctype->map_collection_max[cnt] = charmap->mb_cur_max == 1 ? 256 : 512;
1289 else
1290 ctype->map_collection_max[cnt] = max_chars;
1292 ctype->map_collection[cnt] = (uint32_t *)
1293 xcalloc (sizeof (uint32_t), ctype->map_collection_max[cnt]);
1294 ctype->map_collection_act[cnt] = 256;
1296 ++ctype->map_collection_nr;
1300 /* We have to be prepared that TABLE, MAX, and ACT can be NULL. This
1301 is possible if we only want to extend the name array. */
1302 static uint32_t *
1303 find_idx (struct locale_ctype_t *ctype, uint32_t **table, size_t *max,
1304 size_t *act, uint32_t idx)
1306 size_t cnt;
1308 if (idx < 256)
1309 return table == NULL ? NULL : &(*table)[idx];
1311 /* Use the charnames_idx lookup table instead of the slow search loop. */
1312 #if 1
1313 cnt = idx_table_get (&ctype->charnames_idx, idx);
1314 if (cnt == EMPTY)
1315 /* Not found. */
1316 cnt = ctype->charnames_act;
1317 #else
1318 for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
1319 if (ctype->charnames[cnt] == idx)
1320 break;
1321 #endif
1323 /* We have to distinguish two cases: the name is found or not. */
1324 if (cnt == ctype->charnames_act)
1326 /* Extend the name array. */
1327 if (ctype->charnames_act == ctype->charnames_max)
1329 ctype->charnames_max *= 2;
1330 ctype->charnames = (uint32_t *)
1331 xrealloc (ctype->charnames,
1332 sizeof (uint32_t) * ctype->charnames_max);
1334 ctype->charnames[ctype->charnames_act++] = idx;
1335 idx_table_add (&ctype->charnames_idx, idx, cnt);
1338 if (table == NULL)
1339 /* We have done everything we are asked to do. */
1340 return NULL;
1342 if (max == NULL)
1343 /* The caller does not want to extend the table. */
1344 return (cnt >= *act ? NULL : &(*table)[cnt]);
1346 if (cnt >= *act)
1348 if (cnt >= *max)
1350 size_t old_max = *max;
1352 *max *= 2;
1353 while (*max <= cnt);
1355 *table =
1356 (uint32_t *) xrealloc (*table, *max * sizeof (uint32_t));
1357 memset (&(*table)[old_max], '\0',
1358 (*max - old_max) * sizeof (uint32_t));
1361 *act = cnt + 1;
1364 return &(*table)[cnt];
1368 static int
1369 get_character (struct token *now, const struct charmap_t *charmap,
1370 struct repertoire_t *repertoire,
1371 struct charseq **seqp, uint32_t *wchp)
1373 if (now->tok == tok_bsymbol)
1375 /* This will hopefully be the normal case. */
1376 *wchp = repertoire_find_value (repertoire, now->val.str.startmb,
1377 now->val.str.lenmb);
1378 *seqp = charmap_find_value (charmap, now->val.str.startmb,
1379 now->val.str.lenmb);
1381 else if (now->tok == tok_ucs4)
1383 char utmp[10];
1385 snprintf (utmp, sizeof (utmp), "U%08X", now->val.ucs4);
1386 *seqp = charmap_find_value (charmap, utmp, 9);
1388 if (*seqp == NULL)
1389 *seqp = repertoire_find_seq (repertoire, now->val.ucs4);
1391 if (*seqp == NULL)
1393 /* Compute the value in the charmap from the UCS value. */
1394 const char *symbol = repertoire_find_symbol (repertoire,
1395 now->val.ucs4);
1397 if (symbol == NULL)
1398 *seqp = NULL;
1399 else
1400 *seqp = charmap_find_value (charmap, symbol, strlen (symbol));
1402 if (*seqp == NULL)
1404 if (repertoire != NULL)
1406 /* Insert a negative entry. */
1407 static const struct charseq negative
1408 = { .ucs4 = ILLEGAL_CHAR_VALUE };
1409 uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1410 sizeof (uint32_t));
1411 *newp = now->val.ucs4;
1413 insert_entry (&repertoire->seq_table, newp,
1414 sizeof (uint32_t), (void *) &negative);
1417 else
1418 (*seqp)->ucs4 = now->val.ucs4;
1420 else if ((*seqp)->ucs4 != now->val.ucs4)
1421 *seqp = NULL;
1423 *wchp = now->val.ucs4;
1425 else if (now->tok == tok_charcode)
1427 /* We must map from the byte code to UCS4. */
1428 *seqp = charmap_find_symbol (charmap, now->val.str.startmb,
1429 now->val.str.lenmb);
1431 if (*seqp == NULL)
1432 *wchp = ILLEGAL_CHAR_VALUE;
1433 else
1435 if ((*seqp)->ucs4 == UNINITIALIZED_CHAR_VALUE)
1436 (*seqp)->ucs4 = repertoire_find_value (repertoire, (*seqp)->name,
1437 strlen ((*seqp)->name));
1438 *wchp = (*seqp)->ucs4;
1441 else
1442 return 1;
1444 return 0;
1448 /* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and
1449 the .(2). counterparts. */
1450 static void
1451 charclass_symbolic_ellipsis (struct linereader *ldfile,
1452 struct locale_ctype_t *ctype,
1453 const struct charmap_t *charmap,
1454 struct repertoire_t *repertoire,
1455 struct token *now,
1456 const char *last_str,
1457 unsigned long int class256_bit,
1458 unsigned long int class_bit, int base,
1459 int ignore_content, int handle_digits, int step)
1461 const char *nowstr = now->val.str.startmb;
1462 char tmp[now->val.str.lenmb + 1];
1463 const char *cp;
1464 char *endp;
1465 unsigned long int from;
1466 unsigned long int to;
1468 /* We have to compute the ellipsis values using the symbolic names. */
1469 assert (last_str != NULL);
1471 if (strlen (last_str) != now->val.str.lenmb)
1473 invalid_range:
1474 lr_error (ldfile,
1475 _("`%s' and `%.*s' are not valid names for symbolic range"),
1476 last_str, (int) now->val.str.lenmb, nowstr);
1477 return;
1480 if (memcmp (last_str, nowstr, now->val.str.lenmb) == 0)
1481 /* Nothing to do, the names are the same. */
1482 return;
1484 for (cp = last_str; *cp == *(nowstr + (cp - last_str)); ++cp)
1487 errno = 0;
1488 from = strtoul (cp, &endp, base);
1489 if ((from == UINT_MAX && errno == ERANGE) || *endp != '\0')
1490 goto invalid_range;
1492 to = strtoul (nowstr + (cp - last_str), &endp, base);
1493 if ((to == UINT_MAX && errno == ERANGE)
1494 || (endp - nowstr) != now->val.str.lenmb || from >= to)
1495 goto invalid_range;
1497 /* OK, we have a range FROM - TO. Now we can create the symbolic names. */
1498 if (!ignore_content)
1500 now->val.str.startmb = tmp;
1501 while ((from += step) <= to)
1503 struct charseq *seq;
1504 uint32_t wch;
1506 sprintf (tmp, (base == 10 ? "%.*s%0*ld" : "%.*s%0*lX"),
1507 (int) (cp - last_str), last_str,
1508 (int) (now->val.str.lenmb - (cp - last_str)),
1509 from);
1511 get_character (now, charmap, repertoire, &seq, &wch);
1513 if (seq != NULL && seq->nbytes == 1)
1514 /* Yep, we can store information about this byte sequence. */
1515 ctype->class256_collection[seq->bytes[0]] |= class256_bit;
1517 if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1518 /* We have the UCS4 position. */
1519 *find_idx (ctype, &ctype->class_collection,
1520 &ctype->class_collection_max,
1521 &ctype->class_collection_act, wch) |= class_bit;
1523 if (handle_digits == 1)
1525 /* We must store the digit values. */
1526 if (ctype->mbdigits_act == ctype->mbdigits_max)
1528 ctype->mbdigits_max *= 2;
1529 ctype->mbdigits = xrealloc (ctype->mbdigits,
1530 (ctype->mbdigits_max
1531 * sizeof (char *)));
1532 ctype->wcdigits_max *= 2;
1533 ctype->wcdigits = xrealloc (ctype->wcdigits,
1534 (ctype->wcdigits_max
1535 * sizeof (uint32_t)));
1538 ctype->mbdigits[ctype->mbdigits_act++] = seq;
1539 ctype->wcdigits[ctype->wcdigits_act++] = wch;
1541 else if (handle_digits == 2)
1543 /* We must store the digit values. */
1544 if (ctype->outdigits_act >= 10)
1546 lr_error (ldfile, _("\
1547 %s: field `%s' does not contain exactly ten entries"),
1548 "LC_CTYPE", "outdigit");
1549 return;
1552 ctype->mboutdigits[ctype->outdigits_act] = seq;
1553 ctype->wcoutdigits[ctype->outdigits_act] = wch;
1554 ++ctype->outdigits_act;
1561 /* Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'. */
1562 static void
1563 charclass_ucs4_ellipsis (struct linereader *ldfile,
1564 struct locale_ctype_t *ctype,
1565 const struct charmap_t *charmap,
1566 struct repertoire_t *repertoire,
1567 struct token *now, uint32_t last_wch,
1568 unsigned long int class256_bit,
1569 unsigned long int class_bit, int ignore_content,
1570 int handle_digits, int step)
1572 if (last_wch > now->val.ucs4)
1574 lr_error (ldfile, _("\
1575 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1576 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, now->val.ucs4,
1577 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, last_wch);
1578 return;
1581 if (!ignore_content)
1582 while ((last_wch += step) <= now->val.ucs4)
1584 /* We have to find out whether there is a byte sequence corresponding
1585 to this UCS4 value. */
1586 struct charseq *seq;
1587 char utmp[10];
1589 snprintf (utmp, sizeof (utmp), "U%08X", last_wch);
1590 seq = charmap_find_value (charmap, utmp, 9);
1591 if (seq == NULL)
1593 snprintf (utmp, sizeof (utmp), "U%04X", last_wch);
1594 seq = charmap_find_value (charmap, utmp, 5);
1597 if (seq == NULL)
1598 /* Try looking in the repertoire map. */
1599 seq = repertoire_find_seq (repertoire, last_wch);
1601 /* If this is the first time we look for this sequence create a new
1602 entry. */
1603 if (seq == NULL)
1605 static const struct charseq negative
1606 = { .ucs4 = ILLEGAL_CHAR_VALUE };
1608 /* Find the symbolic name for this UCS4 value. */
1609 if (repertoire != NULL)
1611 const char *symbol = repertoire_find_symbol (repertoire,
1612 last_wch);
1613 uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1614 sizeof (uint32_t));
1615 *newp = last_wch;
1617 if (symbol != NULL)
1618 /* We have a name, now search the multibyte value. */
1619 seq = charmap_find_value (charmap, symbol, strlen (symbol));
1621 if (seq == NULL)
1622 /* We have to create a fake entry. */
1623 seq = (struct charseq *) &negative;
1624 else
1625 seq->ucs4 = last_wch;
1627 insert_entry (&repertoire->seq_table, newp, sizeof (uint32_t),
1628 seq);
1630 else
1631 /* We have to create a fake entry. */
1632 seq = (struct charseq *) &negative;
1635 /* We have a name, now search the multibyte value. */
1636 if (seq->ucs4 == last_wch && seq->nbytes == 1)
1637 /* Yep, we can store information about this byte sequence. */
1638 ctype->class256_collection[(size_t) seq->bytes[0]]
1639 |= class256_bit;
1641 /* And of course we have the UCS4 position. */
1642 if (class_bit != 0)
1643 *find_idx (ctype, &ctype->class_collection,
1644 &ctype->class_collection_max,
1645 &ctype->class_collection_act, last_wch) |= class_bit;
1647 if (handle_digits == 1)
1649 /* We must store the digit values. */
1650 if (ctype->mbdigits_act == ctype->mbdigits_max)
1652 ctype->mbdigits_max *= 2;
1653 ctype->mbdigits = xrealloc (ctype->mbdigits,
1654 (ctype->mbdigits_max
1655 * sizeof (char *)));
1656 ctype->wcdigits_max *= 2;
1657 ctype->wcdigits = xrealloc (ctype->wcdigits,
1658 (ctype->wcdigits_max
1659 * sizeof (uint32_t)));
1662 ctype->mbdigits[ctype->mbdigits_act++] = (seq->ucs4 == last_wch
1663 ? seq : NULL);
1664 ctype->wcdigits[ctype->wcdigits_act++] = last_wch;
1666 else if (handle_digits == 2)
1668 /* We must store the digit values. */
1669 if (ctype->outdigits_act >= 10)
1671 lr_error (ldfile, _("\
1672 %s: field `%s' does not contain exactly ten entries"),
1673 "LC_CTYPE", "outdigit");
1674 return;
1677 ctype->mboutdigits[ctype->outdigits_act] = (seq->ucs4 == last_wch
1678 ? seq : NULL);
1679 ctype->wcoutdigits[ctype->outdigits_act] = last_wch;
1680 ++ctype->outdigits_act;
1686 /* Ellipsis as in `/xea/x12.../xea/x34'. */
1687 static void
1688 charclass_charcode_ellipsis (struct linereader *ldfile,
1689 struct locale_ctype_t *ctype,
1690 const struct charmap_t *charmap,
1691 struct repertoire_t *repertoire,
1692 struct token *now, char *last_charcode,
1693 uint32_t last_charcode_len,
1694 unsigned long int class256_bit,
1695 unsigned long int class_bit, int ignore_content,
1696 int handle_digits)
1698 /* First check whether the to-value is larger. */
1699 if (now->val.charcode.nbytes != last_charcode_len)
1701 lr_error (ldfile, _("\
1702 start and end character sequence of range must have the same length"));
1703 return;
1706 if (memcmp (last_charcode, now->val.charcode.bytes, last_charcode_len) > 0)
1708 lr_error (ldfile, _("\
1709 to-value character sequence is smaller than from-value sequence"));
1710 return;
1713 if (!ignore_content)
1717 /* Increment the byte sequence value. */
1718 struct charseq *seq;
1719 uint32_t wch;
1720 int i;
1722 for (i = last_charcode_len - 1; i >= 0; --i)
1723 if (++last_charcode[i] != 0)
1724 break;
1726 if (last_charcode_len == 1)
1727 /* Of course we have the charcode value. */
1728 ctype->class256_collection[(size_t) last_charcode[0]]
1729 |= class256_bit;
1731 /* Find the symbolic name. */
1732 seq = charmap_find_symbol (charmap, last_charcode,
1733 last_charcode_len);
1734 if (seq != NULL)
1736 if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1737 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1738 strlen (seq->name));
1739 wch = seq == NULL ? ILLEGAL_CHAR_VALUE : seq->ucs4;
1741 if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1742 *find_idx (ctype, &ctype->class_collection,
1743 &ctype->class_collection_max,
1744 &ctype->class_collection_act, wch) |= class_bit;
1746 else
1747 wch = ILLEGAL_CHAR_VALUE;
1749 if (handle_digits == 1)
1751 /* We must store the digit values. */
1752 if (ctype->mbdigits_act == ctype->mbdigits_max)
1754 ctype->mbdigits_max *= 2;
1755 ctype->mbdigits = xrealloc (ctype->mbdigits,
1756 (ctype->mbdigits_max
1757 * sizeof (char *)));
1758 ctype->wcdigits_max *= 2;
1759 ctype->wcdigits = xrealloc (ctype->wcdigits,
1760 (ctype->wcdigits_max
1761 * sizeof (uint32_t)));
1764 seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1765 memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1766 seq->nbytes = last_charcode_len;
1768 ctype->mbdigits[ctype->mbdigits_act++] = seq;
1769 ctype->wcdigits[ctype->wcdigits_act++] = wch;
1771 else if (handle_digits == 2)
1773 struct charseq *seq;
1774 /* We must store the digit values. */
1775 if (ctype->outdigits_act >= 10)
1777 lr_error (ldfile, _("\
1778 %s: field `%s' does not contain exactly ten entries"),
1779 "LC_CTYPE", "outdigit");
1780 return;
1783 seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1784 memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1785 seq->nbytes = last_charcode_len;
1787 ctype->mboutdigits[ctype->outdigits_act] = seq;
1788 ctype->wcoutdigits[ctype->outdigits_act] = wch;
1789 ++ctype->outdigits_act;
1792 while (memcmp (last_charcode, now->val.charcode.bytes,
1793 last_charcode_len) != 0);
1798 static uint32_t *
1799 find_translit2 (struct locale_ctype_t *ctype, const struct charmap_t *charmap,
1800 uint32_t wch)
1802 struct translit_t *trunp = ctype->translit;
1803 struct translit_ignore_t *tirunp = ctype->translit_ignore;
1805 while (trunp != NULL)
1807 /* XXX We simplify things here. The transliterations we look
1808 for are only allowed to have one character. */
1809 if (trunp->from[0] == wch && trunp->from[1] == 0)
1811 /* Found it. Now look for a transliteration which can be
1812 represented with the character set. */
1813 struct translit_to_t *torunp = trunp->to;
1815 while (torunp != NULL)
1817 int i;
1819 for (i = 0; torunp->str[i] != 0; ++i)
1821 char utmp[10];
1823 snprintf (utmp, sizeof (utmp), "U%08X", torunp->str[i]);
1824 if (charmap_find_value (charmap, utmp, 9) == NULL)
1825 /* This character cannot be represented. */
1826 break;
1829 if (torunp->str[i] == 0)
1830 return torunp->str;
1832 torunp = torunp->next;
1835 break;
1838 trunp = trunp->next;
1841 /* Check for ignored chars. */
1842 while (tirunp != NULL)
1844 if (tirunp->from <= wch && tirunp->to >= wch)
1846 uint32_t wi;
1848 for (wi = tirunp->from; wi <= wch; wi += tirunp->step)
1849 if (wi == wch)
1850 return (uint32_t []) { 0 };
1854 /* Nothing found. */
1855 return NULL;
1859 uint32_t *
1860 find_translit (struct localedef_t *locale, const struct charmap_t *charmap,
1861 uint32_t wch)
1863 struct locale_ctype_t *ctype;
1864 uint32_t *result = NULL;
1866 assert (locale != NULL);
1867 ctype = locale->categories[LC_CTYPE].ctype;
1869 if (ctype == NULL)
1870 return NULL;
1872 if (ctype->translit != NULL)
1873 result = find_translit2 (ctype, charmap, wch);
1875 if (result == NULL)
1877 struct translit_include_t *irunp = ctype->translit_include;
1879 while (irunp != NULL && result == NULL)
1881 result = find_translit (find_locale (CTYPE_LOCALE,
1882 irunp->copy_locale,
1883 irunp->copy_repertoire,
1884 charmap),
1885 charmap, wch);
1886 irunp = irunp->next;
1890 return result;
1894 /* Read one transliteration entry. */
1895 static uint32_t *
1896 read_widestring (struct linereader *ldfile, struct token *now,
1897 const struct charmap_t *charmap,
1898 struct repertoire_t *repertoire)
1900 uint32_t *wstr;
1902 if (now->tok == tok_default_missing)
1903 /* The special name "" will denote this case. */
1904 wstr = ((uint32_t *) { 0 });
1905 else if (now->tok == tok_bsymbol)
1907 /* Get the value from the repertoire. */
1908 wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1909 wstr[0] = repertoire_find_value (repertoire, now->val.str.startmb,
1910 now->val.str.lenmb);
1911 if (wstr[0] == ILLEGAL_CHAR_VALUE)
1913 /* We cannot proceed, we don't know the UCS4 value. */
1914 free (wstr);
1915 return NULL;
1918 wstr[1] = 0;
1920 else if (now->tok == tok_ucs4)
1922 wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1923 wstr[0] = now->val.ucs4;
1924 wstr[1] = 0;
1926 else if (now->tok == tok_charcode)
1928 /* Argh, we have to convert to the symbol name first and then to the
1929 UCS4 value. */
1930 struct charseq *seq = charmap_find_symbol (charmap,
1931 now->val.str.startmb,
1932 now->val.str.lenmb);
1933 if (seq == NULL)
1934 /* Cannot find the UCS4 value. */
1935 return NULL;
1937 if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1938 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1939 strlen (seq->name));
1940 if (seq->ucs4 == ILLEGAL_CHAR_VALUE)
1941 /* We cannot proceed, we don't know the UCS4 value. */
1942 return NULL;
1944 wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1945 wstr[0] = seq->ucs4;
1946 wstr[1] = 0;
1948 else if (now->tok == tok_string)
1950 wstr = now->val.str.startwc;
1951 if (wstr == NULL || wstr[0] == 0)
1952 return NULL;
1954 else
1956 if (now->tok != tok_eol && now->tok != tok_eof)
1957 lr_ignore_rest (ldfile, 0);
1958 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
1959 return (uint32_t *) -1l;
1962 return wstr;
1966 static void
1967 read_translit_entry (struct linereader *ldfile, struct locale_ctype_t *ctype,
1968 struct token *now, const struct charmap_t *charmap,
1969 struct repertoire_t *repertoire)
1971 uint32_t *from_wstr = read_widestring (ldfile, now, charmap, repertoire);
1972 struct translit_t *result;
1973 struct translit_to_t **top;
1974 struct obstack *ob = &ctype->mempool;
1975 int first;
1976 int ignore;
1978 if (from_wstr == NULL)
1979 /* There is no valid from string. */
1980 return;
1982 result = (struct translit_t *) obstack_alloc (ob,
1983 sizeof (struct translit_t));
1984 result->from = from_wstr;
1985 result->fname = ldfile->fname;
1986 result->lineno = ldfile->lineno;
1987 result->next = NULL;
1988 result->to = NULL;
1989 top = &result->to;
1990 first = 1;
1991 ignore = 0;
1993 while (1)
1995 uint32_t *to_wstr;
1997 /* Next we have one or more transliterations. They are
1998 separated by semicolons. */
1999 now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2001 if (!first && (now->tok == tok_semicolon || now->tok == tok_eol))
2003 /* One string read. */
2004 const uint32_t zero = 0;
2006 if (!ignore)
2008 obstack_grow (ob, &zero, 4);
2009 to_wstr = obstack_finish (ob);
2011 *top = obstack_alloc (ob, sizeof (struct translit_to_t));
2012 (*top)->str = to_wstr;
2013 (*top)->next = NULL;
2016 if (now->tok == tok_eol)
2018 result->next = ctype->translit;
2019 ctype->translit = result;
2020 return;
2023 if (!ignore)
2024 top = &(*top)->next;
2025 ignore = 0;
2027 else
2029 to_wstr = read_widestring (ldfile, now, charmap, repertoire);
2030 if (to_wstr == (uint32_t *) -1l)
2032 /* An error occurred. */
2033 obstack_free (ob, result);
2034 return;
2037 if (to_wstr == NULL)
2038 ignore = 1;
2039 else
2040 /* This value is usable. */
2041 obstack_grow (ob, to_wstr, wcslen ((wchar_t *) to_wstr) * 4);
2043 first = 0;
2049 static void
2050 read_translit_ignore_entry (struct linereader *ldfile,
2051 struct locale_ctype_t *ctype,
2052 const struct charmap_t *charmap,
2053 struct repertoire_t *repertoire)
2055 /* We expect a semicolon-separated list of characters we ignore. We are
2056 only interested in the wide character definitions. These must be
2057 single characters, possibly defining a range when an ellipsis is used. */
2058 while (1)
2060 struct token *now = lr_token (ldfile, charmap, NULL, repertoire,
2061 verbose);
2062 struct translit_ignore_t *newp;
2063 uint32_t from;
2065 if (now->tok == tok_eol || now->tok == tok_eof)
2067 lr_error (ldfile,
2068 _("premature end of `translit_ignore' definition"));
2069 return;
2072 if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
2074 lr_error (ldfile, _("syntax error"));
2075 lr_ignore_rest (ldfile, 0);
2076 return;
2079 if (now->tok == tok_ucs4)
2080 from = now->val.ucs4;
2081 else
2082 /* Try to get the value. */
2083 from = repertoire_find_value (repertoire, now->val.str.startmb,
2084 now->val.str.lenmb);
2086 if (from == ILLEGAL_CHAR_VALUE)
2088 lr_error (ldfile, "invalid character name");
2089 newp = NULL;
2091 else
2093 newp = (struct translit_ignore_t *)
2094 obstack_alloc (&ctype->mempool, sizeof (struct translit_ignore_t));
2095 newp->from = from;
2096 newp->to = from;
2097 newp->step = 1;
2099 newp->next = ctype->translit_ignore;
2100 ctype->translit_ignore = newp;
2103 /* Now we expect either a semicolon, an ellipsis, or the end of the
2104 line. */
2105 now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2107 if (now->tok == tok_ellipsis2 || now->tok == tok_ellipsis2_2)
2109 /* XXX Should we bother implementing `....'? `...' certainly
2110 will not be implemented. */
2111 uint32_t to;
2112 int step = now->tok == tok_ellipsis2_2 ? 2 : 1;
2114 now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2116 if (now->tok == tok_eol || now->tok == tok_eof)
2118 lr_error (ldfile,
2119 _("premature end of `translit_ignore' definition"));
2120 return;
2123 if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
2125 lr_error (ldfile, _("syntax error"));
2126 lr_ignore_rest (ldfile, 0);
2127 return;
2130 if (now->tok == tok_ucs4)
2131 to = now->val.ucs4;
2132 else
2133 /* Try to get the value. */
2134 to = repertoire_find_value (repertoire, now->val.str.startmb,
2135 now->val.str.lenmb);
2137 if (to == ILLEGAL_CHAR_VALUE)
2138 lr_error (ldfile, "invalid character name");
2139 else
2141 /* Make sure the `to'-value is larger. */
2142 if (to >= from)
2144 newp->to = to;
2145 newp->step = step;
2147 else
2148 lr_error (ldfile, _("\
2149 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
2150 (to | from) < 65536 ? 4 : 8, to,
2151 (to | from) < 65536 ? 4 : 8, from);
2154 /* And the next token. */
2155 now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2158 if (now->tok == tok_eol || now->tok == tok_eof)
2159 /* We are done. */
2160 return;
2162 if (now->tok == tok_semicolon)
2163 /* Next round. */
2164 continue;
2166 /* If we come here something is wrong. */
2167 lr_error (ldfile, _("syntax error"));
2168 lr_ignore_rest (ldfile, 0);
2169 return;
2174 /* The parser for the LC_CTYPE section of the locale definition. */
2175 void
2176 ctype_read (struct linereader *ldfile, struct localedef_t *result,
2177 const struct charmap_t *charmap, const char *repertoire_name,
2178 int ignore_content)
2180 struct repertoire_t *repertoire = NULL;
2181 struct locale_ctype_t *ctype;
2182 struct token *now;
2183 enum token_t nowtok;
2184 size_t cnt;
2185 struct charseq *last_seq;
2186 uint32_t last_wch = 0;
2187 enum token_t last_token;
2188 enum token_t ellipsis_token;
2189 int step;
2190 char last_charcode[16];
2191 size_t last_charcode_len = 0;
2192 const char *last_str = NULL;
2193 int mapidx;
2194 struct localedef_t *copy_locale = NULL;
2196 /* Get the repertoire we have to use. */
2197 if (repertoire_name != NULL)
2198 repertoire = repertoire_read (repertoire_name);
2200 /* The rest of the line containing `LC_CTYPE' must be free. */
2201 lr_ignore_rest (ldfile, 1);
2206 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2207 nowtok = now->tok;
2209 while (nowtok == tok_eol);
2211 /* If we see `copy' now we are almost done. */
2212 if (nowtok == tok_copy)
2214 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2215 if (now->tok != tok_string)
2217 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2219 skip_category:
2221 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2222 while (now->tok != tok_eof && now->tok != tok_end);
2224 if (now->tok != tok_eof
2225 || (now = lr_token (ldfile, charmap, NULL, NULL, verbose),
2226 now->tok == tok_eof))
2227 lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
2228 else if (now->tok != tok_lc_ctype)
2230 lr_error (ldfile, _("\
2231 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2232 lr_ignore_rest (ldfile, 0);
2234 else
2235 lr_ignore_rest (ldfile, 1);
2237 return;
2240 if (! ignore_content)
2242 /* Get the locale definition. */
2243 copy_locale = load_locale (LC_CTYPE, now->val.str.startmb,
2244 repertoire_name, charmap, NULL);
2245 if ((copy_locale->avail & CTYPE_LOCALE) == 0)
2247 /* Not yet loaded. So do it now. */
2248 if (locfile_read (copy_locale, charmap) != 0)
2249 goto skip_category;
2252 if (copy_locale->categories[LC_CTYPE].ctype == NULL)
2253 return;
2256 lr_ignore_rest (ldfile, 1);
2258 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2259 nowtok = now->tok;
2262 /* Prepare the data structures. */
2263 ctype_startup (ldfile, result, charmap, copy_locale, ignore_content);
2264 ctype = result->categories[LC_CTYPE].ctype;
2266 /* Remember the repertoire we use. */
2267 if (!ignore_content)
2268 ctype->repertoire = repertoire;
2270 while (1)
2272 unsigned long int class_bit = 0;
2273 unsigned long int class256_bit = 0;
2274 int handle_digits = 0;
2276 /* Of course we don't proceed beyond the end of file. */
2277 if (nowtok == tok_eof)
2278 break;
2280 /* Ingore empty lines. */
2281 if (nowtok == tok_eol)
2283 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2284 nowtok = now->tok;
2285 continue;
2288 switch (nowtok)
2290 case tok_charclass:
2291 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2292 while (now->tok == tok_ident || now->tok == tok_string)
2294 ctype_class_new (ldfile, ctype, now->val.str.startmb);
2295 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2296 if (now->tok != tok_semicolon)
2297 break;
2298 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2300 if (now->tok != tok_eol)
2301 SYNTAX_ERROR (_("\
2302 %s: syntax error in definition of new character class"), "LC_CTYPE");
2303 break;
2305 case tok_charconv:
2306 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2307 while (now->tok == tok_ident || now->tok == tok_string)
2309 ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2310 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2311 if (now->tok != tok_semicolon)
2312 break;
2313 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2315 if (now->tok != tok_eol)
2316 SYNTAX_ERROR (_("\
2317 %s: syntax error in definition of new character map"), "LC_CTYPE");
2318 break;
2320 case tok_class:
2321 /* Ignore the rest of the line if we don't need the input of
2322 this line. */
2323 if (ignore_content)
2325 lr_ignore_rest (ldfile, 0);
2326 break;
2329 /* We simply forget the `class' keyword and use the following
2330 operand to determine the bit. */
2331 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2332 if (now->tok == tok_ident || now->tok == tok_string)
2334 /* Must can be one of the predefined class names. */
2335 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2336 if (strcmp (ctype->classnames[cnt], now->val.str.startmb) == 0)
2337 break;
2338 if (cnt >= ctype->nr_charclass)
2340 #ifdef PREDEFINED_CLASSES
2341 if (now->val.str.lenmb == 8
2342 && memcmp ("special1", now->val.str.startmb, 8) == 0)
2343 class_bit = _ISwspecial1;
2344 else if (now->val.str.lenmb == 8
2345 && memcmp ("special2", now->val.str.startmb, 8) == 0)
2346 class_bit = _ISwspecial2;
2347 else if (now->val.str.lenmb == 8
2348 && memcmp ("special3", now->val.str.startmb, 8) == 0)
2349 class_bit = _ISwspecial3;
2350 else
2351 #endif
2353 /* OK, it's a new class. */
2354 ctype_class_new (ldfile, ctype, now->val.str.startmb);
2356 class_bit = _ISwbit (ctype->nr_charclass - 1);
2359 else
2361 class_bit = _ISwbit (cnt);
2363 free (now->val.str.startmb);
2366 else if (now->tok == tok_digit)
2367 goto handle_tok_digit;
2368 else if (now->tok < tok_upper || now->tok > tok_blank)
2369 goto err_label;
2370 else
2372 class_bit = BITw (now->tok);
2373 class256_bit = BIT (now->tok);
2376 /* The next character must be a semicolon. */
2377 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2378 if (now->tok != tok_semicolon)
2379 goto err_label;
2380 goto read_charclass;
2382 case tok_upper:
2383 case tok_lower:
2384 case tok_alpha:
2385 case tok_alnum:
2386 case tok_space:
2387 case tok_cntrl:
2388 case tok_punct:
2389 case tok_graph:
2390 case tok_print:
2391 case tok_xdigit:
2392 case tok_blank:
2393 /* Ignore the rest of the line if we don't need the input of
2394 this line. */
2395 if (ignore_content)
2397 lr_ignore_rest (ldfile, 0);
2398 break;
2401 class_bit = BITw (now->tok);
2402 class256_bit = BIT (now->tok);
2403 handle_digits = 0;
2404 read_charclass:
2405 ctype->class_done |= class_bit;
2406 last_token = tok_none;
2407 ellipsis_token = tok_none;
2408 step = 1;
2409 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2410 while (now->tok != tok_eol && now->tok != tok_eof)
2412 uint32_t wch;
2413 struct charseq *seq;
2415 if (ellipsis_token == tok_none)
2417 if (get_character (now, charmap, repertoire, &seq, &wch))
2418 goto err_label;
2420 if (!ignore_content && seq != NULL && seq->nbytes == 1)
2421 /* Yep, we can store information about this byte
2422 sequence. */
2423 ctype->class256_collection[seq->bytes[0]] |= class256_bit;
2425 if (!ignore_content && wch != ILLEGAL_CHAR_VALUE
2426 && class_bit != 0)
2427 /* We have the UCS4 position. */
2428 *find_idx (ctype, &ctype->class_collection,
2429 &ctype->class_collection_max,
2430 &ctype->class_collection_act, wch) |= class_bit;
2432 last_token = now->tok;
2433 /* Terminate the string. */
2434 if (last_token == tok_bsymbol)
2436 now->val.str.startmb[now->val.str.lenmb] = '\0';
2437 last_str = now->val.str.startmb;
2439 else
2440 last_str = NULL;
2441 last_seq = seq;
2442 last_wch = wch;
2443 memcpy (last_charcode, now->val.charcode.bytes, 16);
2444 last_charcode_len = now->val.charcode.nbytes;
2446 if (!ignore_content && handle_digits == 1)
2448 /* We must store the digit values. */
2449 if (ctype->mbdigits_act == ctype->mbdigits_max)
2451 ctype->mbdigits_max += 10;
2452 ctype->mbdigits = xrealloc (ctype->mbdigits,
2453 (ctype->mbdigits_max
2454 * sizeof (char *)));
2455 ctype->wcdigits_max += 10;
2456 ctype->wcdigits = xrealloc (ctype->wcdigits,
2457 (ctype->wcdigits_max
2458 * sizeof (uint32_t)));
2461 ctype->mbdigits[ctype->mbdigits_act++] = seq;
2462 ctype->wcdigits[ctype->wcdigits_act++] = wch;
2464 else if (!ignore_content && handle_digits == 2)
2466 /* We must store the digit values. */
2467 if (ctype->outdigits_act >= 10)
2469 lr_error (ldfile, _("\
2470 %s: field `%s' does not contain exactly ten entries"),
2471 "LC_CTYPE", "outdigit");
2472 lr_ignore_rest (ldfile, 0);
2473 break;
2476 ctype->mboutdigits[ctype->outdigits_act] = seq;
2477 ctype->wcoutdigits[ctype->outdigits_act] = wch;
2478 ++ctype->outdigits_act;
2481 else
2483 /* Now it gets complicated. We have to resolve the
2484 ellipsis problem. First we must distinguish between
2485 the different kind of ellipsis and this must match the
2486 tokens we have seen. */
2487 assert (last_token != tok_none);
2489 if (last_token != now->tok)
2491 lr_error (ldfile, _("\
2492 ellipsis range must be marked by two operands of same type"));
2493 lr_ignore_rest (ldfile, 0);
2494 break;
2497 if (last_token == tok_bsymbol)
2499 if (ellipsis_token == tok_ellipsis3)
2500 lr_error (ldfile, _("with symbolic name range values \
2501 the absolute ellipsis `...' must not be used"));
2503 charclass_symbolic_ellipsis (ldfile, ctype, charmap,
2504 repertoire, now, last_str,
2505 class256_bit, class_bit,
2506 (ellipsis_token
2507 == tok_ellipsis4
2508 ? 10 : 16),
2509 ignore_content,
2510 handle_digits, step);
2512 else if (last_token == tok_ucs4)
2514 if (ellipsis_token != tok_ellipsis2)
2515 lr_error (ldfile, _("\
2516 with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
2518 charclass_ucs4_ellipsis (ldfile, ctype, charmap,
2519 repertoire, now, last_wch,
2520 class256_bit, class_bit,
2521 ignore_content, handle_digits,
2522 step);
2524 else
2526 assert (last_token == tok_charcode);
2528 if (ellipsis_token != tok_ellipsis3)
2529 lr_error (ldfile, _("\
2530 with character code range values one must use the absolute ellipsis `...'"));
2532 charclass_charcode_ellipsis (ldfile, ctype, charmap,
2533 repertoire, now,
2534 last_charcode,
2535 last_charcode_len,
2536 class256_bit, class_bit,
2537 ignore_content,
2538 handle_digits);
2541 /* Now we have used the last value. */
2542 last_token = tok_none;
2545 /* Next we expect a semicolon or the end of the line. */
2546 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2547 if (now->tok == tok_eol || now->tok == tok_eof)
2548 break;
2550 if (last_token != tok_none
2551 && now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4_2)
2553 if (now->tok == tok_ellipsis2_2)
2555 now->tok = tok_ellipsis2;
2556 step = 2;
2558 else if (now->tok == tok_ellipsis4_2)
2560 now->tok = tok_ellipsis4;
2561 step = 2;
2564 ellipsis_token = now->tok;
2566 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2567 continue;
2570 if (now->tok != tok_semicolon)
2571 goto err_label;
2573 /* And get the next character. */
2574 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2576 ellipsis_token = tok_none;
2577 step = 1;
2579 break;
2581 case tok_digit:
2582 /* Ignore the rest of the line if we don't need the input of
2583 this line. */
2584 if (ignore_content)
2586 lr_ignore_rest (ldfile, 0);
2587 break;
2590 handle_tok_digit:
2591 class_bit = _ISwdigit;
2592 class256_bit = _ISdigit;
2593 handle_digits = 1;
2594 goto read_charclass;
2596 case tok_outdigit:
2597 /* Ignore the rest of the line if we don't need the input of
2598 this line. */
2599 if (ignore_content)
2601 lr_ignore_rest (ldfile, 0);
2602 break;
2605 if (ctype->outdigits_act != 0)
2606 lr_error (ldfile, _("\
2607 %s: field `%s' declared more than once"),
2608 "LC_CTYPE", "outdigit");
2609 class_bit = 0;
2610 class256_bit = 0;
2611 handle_digits = 2;
2612 goto read_charclass;
2614 case tok_toupper:
2615 /* Ignore the rest of the line if we don't need the input of
2616 this line. */
2617 if (ignore_content)
2619 lr_ignore_rest (ldfile, 0);
2620 break;
2623 mapidx = 0;
2624 goto read_mapping;
2626 case tok_tolower:
2627 /* Ignore the rest of the line if we don't need the input of
2628 this line. */
2629 if (ignore_content)
2631 lr_ignore_rest (ldfile, 0);
2632 break;
2635 mapidx = 1;
2636 goto read_mapping;
2638 case tok_map:
2639 /* Ignore the rest of the line if we don't need the input of
2640 this line. */
2641 if (ignore_content)
2643 lr_ignore_rest (ldfile, 0);
2644 break;
2647 /* We simply forget the `map' keyword and use the following
2648 operand to determine the mapping. */
2649 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2650 if (now->tok == tok_ident || now->tok == tok_string)
2652 size_t cnt;
2654 for (cnt = 2; cnt < ctype->map_collection_nr; ++cnt)
2655 if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2656 break;
2658 if (cnt < ctype->map_collection_nr)
2659 free (now->val.str.startmb);
2660 else
2661 /* OK, it's a new map. */
2662 ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2664 mapidx = cnt;
2666 else if (now->tok < tok_toupper || now->tok > tok_tolower)
2667 goto err_label;
2668 else
2669 mapidx = now->tok - tok_toupper;
2671 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2672 /* This better should be a semicolon. */
2673 if (now->tok != tok_semicolon)
2674 goto err_label;
2676 read_mapping:
2677 /* Test whether this mapping was already defined. */
2678 if (ctype->tomap_done[mapidx])
2680 lr_error (ldfile, _("duplicated definition for mapping `%s'"),
2681 ctype->mapnames[mapidx]);
2682 lr_ignore_rest (ldfile, 0);
2683 break;
2685 ctype->tomap_done[mapidx] = 1;
2687 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2688 while (now->tok != tok_eol && now->tok != tok_eof)
2690 struct charseq *from_seq;
2691 uint32_t from_wch;
2692 struct charseq *to_seq;
2693 uint32_t to_wch;
2695 /* Every pair starts with an opening brace. */
2696 if (now->tok != tok_open_brace)
2697 goto err_label;
2699 /* Next comes the from-value. */
2700 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2701 if (get_character (now, charmap, repertoire, &from_seq,
2702 &from_wch) != 0)
2703 goto err_label;
2705 /* The next is a comma. */
2706 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2707 if (now->tok != tok_comma)
2708 goto err_label;
2710 /* And the other value. */
2711 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2712 if (get_character (now, charmap, repertoire, &to_seq,
2713 &to_wch) != 0)
2714 goto err_label;
2716 /* And the last thing is the closing brace. */
2717 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2718 if (now->tok != tok_close_brace)
2719 goto err_label;
2721 if (!ignore_content)
2723 /* Check whether the mapping converts from an ASCII value
2724 to a non-ASCII value. */
2725 if (from_seq != NULL && from_seq->nbytes == 1
2726 && isascii (from_seq->bytes[0])
2727 && to_seq != NULL && (to_seq->nbytes != 1
2728 || !isascii (to_seq->bytes[0])))
2729 ctype->to_nonascii = 1;
2731 if (mapidx < 2 && from_seq != NULL && to_seq != NULL
2732 && from_seq->nbytes == 1 && to_seq->nbytes == 1)
2733 /* We can use this value. */
2734 ctype->map256_collection[mapidx][from_seq->bytes[0]]
2735 = to_seq->bytes[0];
2737 if (from_wch != ILLEGAL_CHAR_VALUE
2738 && to_wch != ILLEGAL_CHAR_VALUE)
2739 /* Both correct values. */
2740 *find_idx (ctype, &ctype->map_collection[mapidx],
2741 &ctype->map_collection_max[mapidx],
2742 &ctype->map_collection_act[mapidx],
2743 from_wch) = to_wch;
2746 /* Now comes a semicolon or the end of the line/file. */
2747 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2748 if (now->tok == tok_semicolon)
2749 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2751 break;
2753 case tok_translit_start:
2754 /* Ignore the entire translit section with its peculiar syntax
2755 if we don't need the input. */
2756 if (ignore_content)
2760 lr_ignore_rest (ldfile, 0);
2761 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2763 while (now->tok != tok_translit_end && now->tok != tok_eof);
2765 if (now->tok == tok_eof)
2766 lr_error (ldfile, _(\
2767 "%s: `translit_start' section does not end with `translit_end'"),
2768 "LC_CTYPE");
2770 break;
2773 /* The rest of the line better should be empty. */
2774 lr_ignore_rest (ldfile, 1);
2776 /* We count here the number of allocated entries in the `translit'
2777 array. */
2778 cnt = 0;
2780 ldfile->translate_strings = 1;
2781 ldfile->return_widestr = 1;
2783 /* We proceed until we see the `translit_end' token. */
2784 while (now = lr_token (ldfile, charmap, NULL, repertoire, verbose),
2785 now->tok != tok_translit_end && now->tok != tok_eof)
2787 if (now->tok == tok_eol)
2788 /* Ignore empty lines. */
2789 continue;
2791 if (now->tok == tok_include)
2793 /* We have to include locale. */
2794 const char *locale_name;
2795 const char *repertoire_name;
2796 struct translit_include_t *include_stmt, **include_ptr;
2798 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2799 /* This should be a string or an identifier. In any
2800 case something to name a locale. */
2801 if (now->tok != tok_string && now->tok != tok_ident)
2803 translit_syntax:
2804 lr_error (ldfile, _("%s: syntax error"), "LC_CTYPE");
2805 lr_ignore_rest (ldfile, 0);
2806 continue;
2808 locale_name = now->val.str.startmb;
2810 /* Next should be a semicolon. */
2811 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2812 if (now->tok != tok_semicolon)
2813 goto translit_syntax;
2815 /* Now the repertoire name. */
2816 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2817 if ((now->tok != tok_string && now->tok != tok_ident)
2818 || now->val.str.startmb == NULL)
2819 goto translit_syntax;
2820 repertoire_name = now->val.str.startmb;
2821 if (repertoire_name[0] == '\0')
2822 /* Ignore the empty string. */
2823 repertoire_name = NULL;
2825 /* Save the include statement for later processing. */
2826 include_stmt = (struct translit_include_t *)
2827 xmalloc (sizeof (struct translit_include_t));
2828 include_stmt->copy_locale = locale_name;
2829 include_stmt->copy_repertoire = repertoire_name;
2830 include_stmt->next = NULL;
2832 include_ptr = &ctype->translit_include;
2833 while (*include_ptr != NULL)
2834 include_ptr = &(*include_ptr)->next;
2835 *include_ptr = include_stmt;
2837 /* The rest of the line must be empty. */
2838 lr_ignore_rest (ldfile, 1);
2840 /* Make sure the locale is read. */
2841 add_to_readlist (LC_CTYPE, locale_name, repertoire_name,
2842 1, NULL);
2843 continue;
2845 else if (now->tok == tok_default_missing)
2847 uint32_t *wstr;
2849 while (1)
2851 /* We expect a single character or string as the
2852 argument. */
2853 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2854 wstr = read_widestring (ldfile, now, charmap,
2855 repertoire);
2857 if (wstr != NULL)
2859 if (ctype->default_missing != NULL)
2861 lr_error (ldfile, _("\
2862 %s: duplicate `default_missing' definition"), "LC_CTYPE");
2863 WITH_CUR_LOCALE (error_at_line (0, 0,
2864 ctype->default_missing_file,
2865 ctype->default_missing_lineno,
2866 _("\
2867 previous definition was here")));
2869 else
2871 ctype->default_missing = wstr;
2872 ctype->default_missing_file = ldfile->fname;
2873 ctype->default_missing_lineno = ldfile->lineno;
2875 /* We can have more entries, ignore them. */
2876 lr_ignore_rest (ldfile, 0);
2877 break;
2879 else if (wstr == (uint32_t *) -1l)
2880 /* This was an syntax error. */
2881 break;
2883 /* Maybe there is another replacement we can use. */
2884 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2885 if (now->tok == tok_eol || now->tok == tok_eof)
2887 /* Nothing found. We tell the user. */
2888 lr_error (ldfile, _("\
2889 %s: no representable `default_missing' definition found"), "LC_CTYPE");
2890 break;
2892 if (now->tok != tok_semicolon)
2893 goto translit_syntax;
2896 continue;
2898 else if (now->tok == tok_translit_ignore)
2900 read_translit_ignore_entry (ldfile, ctype, charmap,
2901 repertoire);
2902 continue;
2905 read_translit_entry (ldfile, ctype, now, charmap, repertoire);
2907 ldfile->return_widestr = 0;
2909 if (now->tok == tok_eof)
2910 lr_error (ldfile, _(\
2911 "%s: `translit_start' section does not end with `translit_end'"),
2912 "LC_CTYPE");
2914 break;
2916 case tok_ident:
2917 /* Ignore the rest of the line if we don't need the input of
2918 this line. */
2919 if (ignore_content)
2921 lr_ignore_rest (ldfile, 0);
2922 break;
2925 /* This could mean one of several things. First test whether
2926 it's a character class name. */
2927 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2928 if (strcmp (now->val.str.startmb, ctype->classnames[cnt]) == 0)
2929 break;
2930 if (cnt < ctype->nr_charclass)
2932 class_bit = _ISwbit (cnt);
2933 class256_bit = cnt <= 11 ? _ISbit (cnt) : 0;
2934 free (now->val.str.startmb);
2935 goto read_charclass;
2937 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
2938 if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2939 break;
2940 if (cnt < ctype->map_collection_nr)
2942 mapidx = cnt;
2943 free (now->val.str.startmb);
2944 goto read_mapping;
2946 #ifdef PREDEFINED_CLASSES
2947 if (strcmp (now->val.str.startmb, "special1") == 0)
2949 class_bit = _ISwspecial1;
2950 free (now->val.str.startmb);
2951 goto read_charclass;
2953 if (strcmp (now->val.str.startmb, "special2") == 0)
2955 class_bit = _ISwspecial2;
2956 free (now->val.str.startmb);
2957 goto read_charclass;
2959 if (strcmp (now->val.str.startmb, "special3") == 0)
2961 class_bit = _ISwspecial3;
2962 free (now->val.str.startmb);
2963 goto read_charclass;
2965 if (strcmp (now->val.str.startmb, "tosymmetric") == 0)
2967 mapidx = 2;
2968 goto read_mapping;
2970 #endif
2971 break;
2973 case tok_end:
2974 /* Next we assume `LC_CTYPE'. */
2975 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2976 if (now->tok == tok_eof)
2977 break;
2978 if (now->tok == tok_eol)
2979 lr_error (ldfile, _("%s: incomplete `END' line"),
2980 "LC_CTYPE");
2981 else if (now->tok != tok_lc_ctype)
2982 lr_error (ldfile, _("\
2983 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2984 lr_ignore_rest (ldfile, now->tok == tok_lc_ctype);
2985 return;
2987 default:
2988 err_label:
2989 if (now->tok != tok_eof)
2990 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2993 /* Prepare for the next round. */
2994 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2995 nowtok = now->tok;
2998 /* When we come here we reached the end of the file. */
2999 lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
3003 static void
3004 set_class_defaults (struct locale_ctype_t *ctype,
3005 const struct charmap_t *charmap,
3006 struct repertoire_t *repertoire)
3008 size_t cnt;
3010 /* These function defines the default values for the classes and conversions
3011 according to POSIX.2 2.5.2.1.
3012 It may seem that the order of these if-blocks is arbitrary but it is NOT.
3013 Don't move them unless you know what you do! */
3015 auto void set_default (int bitpos, int from, int to);
3017 void set_default (int bitpos, int from, int to)
3019 char tmp[2];
3020 int ch;
3021 int bit = _ISbit (bitpos);
3022 int bitw = _ISwbit (bitpos);
3023 /* Define string. */
3024 strcpy (tmp, "?");
3026 for (ch = from; ch <= to; ++ch)
3028 struct charseq *seq;
3029 tmp[0] = ch;
3031 seq = charmap_find_value (charmap, tmp, 1);
3032 if (seq == NULL)
3034 char buf[10];
3035 sprintf (buf, "U%08X", ch);
3036 seq = charmap_find_value (charmap, buf, 9);
3038 if (seq == NULL)
3040 if (!be_quiet)
3041 WITH_CUR_LOCALE (error (0, 0, _("\
3042 %s: character `%s' not defined in charmap while needed as default value"),
3043 "LC_CTYPE", tmp));
3045 else if (seq->nbytes != 1)
3046 WITH_CUR_LOCALE (error (0, 0, _("\
3047 %s: character `%s' in charmap not representable with one byte"),
3048 "LC_CTYPE", tmp));
3049 else
3050 ctype->class256_collection[seq->bytes[0]] |= bit;
3052 /* No need to search here, the ASCII value is also the Unicode
3053 value. */
3054 ELEM (ctype, class_collection, , ch) |= bitw;
3058 /* Set default values if keyword was not present. */
3059 if ((ctype->class_done & BITw (tok_upper)) == 0)
3060 /* "If this keyword [lower] is not specified, the lowercase letters
3061 `A' through `Z', ..., shall automatically belong to this class,
3062 with implementation defined character values." [P1003.2, 2.5.2.1] */
3063 set_default (BITPOS (tok_upper), 'A', 'Z');
3065 if ((ctype->class_done & BITw (tok_lower)) == 0)
3066 /* "If this keyword [lower] is not specified, the lowercase letters
3067 `a' through `z', ..., shall automatically belong to this class,
3068 with implementation defined character values." [P1003.2, 2.5.2.1] */
3069 set_default (BITPOS (tok_lower), 'a', 'z');
3071 if ((ctype->class_done & BITw (tok_alpha)) == 0)
3073 /* Table 2-6 in P1003.2 says that characters in class `upper' or
3074 class `lower' *must* be in class `alpha'. */
3075 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
3076 unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower);
3078 for (cnt = 0; cnt < 256; ++cnt)
3079 if ((ctype->class256_collection[cnt] & mask) != 0)
3080 ctype->class256_collection[cnt] |= BIT (tok_alpha);
3082 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3083 if ((ctype->class_collection[cnt] & maskw) != 0)
3084 ctype->class_collection[cnt] |= BITw (tok_alpha);
3087 if ((ctype->class_done & BITw (tok_digit)) == 0)
3088 /* "If this keyword [digit] is not specified, the digits `0' through
3089 `9', ..., shall automatically belong to this class, with
3090 implementation-defined character values." [P1003.2, 2.5.2.1] */
3091 set_default (BITPOS (tok_digit), '0', '9');
3093 /* "Only characters specified for the `alpha' and `digit' keyword
3094 shall be specified. Characters specified for the keyword `alpha'
3095 and `digit' are automatically included in this class. */
3097 unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
3098 unsigned long int maskw = BITw (tok_alpha) | BITw (tok_digit);
3100 for (cnt = 0; cnt < 256; ++cnt)
3101 if ((ctype->class256_collection[cnt] & mask) != 0)
3102 ctype->class256_collection[cnt] |= BIT (tok_alnum);
3104 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3105 if ((ctype->class_collection[cnt] & maskw) != 0)
3106 ctype->class_collection[cnt] |= BITw (tok_alnum);
3109 if ((ctype->class_done & BITw (tok_space)) == 0)
3110 /* "If this keyword [space] is not specified, the characters <space>,
3111 <form-feed>, <newline>, <carriage-return>, <tab>, and
3112 <vertical-tab>, ..., shall automatically belong to this class,
3113 with implementation-defined character values." [P1003.2, 2.5.2.1] */
3115 struct charseq *seq;
3117 seq = charmap_find_value (charmap, "space", 5);
3118 if (seq == NULL)
3119 seq = charmap_find_value (charmap, "SP", 2);
3120 if (seq == NULL)
3121 seq = charmap_find_value (charmap, "U00000020", 9);
3122 if (seq == NULL)
3124 if (!be_quiet)
3125 WITH_CUR_LOCALE (error (0, 0, _("\
3126 %s: character `%s' not defined while needed as default value"),
3127 "LC_CTYPE", "<space>"));
3129 else if (seq->nbytes != 1)
3130 WITH_CUR_LOCALE (error (0, 0, _("\
3131 %s: character `%s' in charmap not representable with one byte"),
3132 "LC_CTYPE", "<space>"));
3133 else
3134 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3136 /* No need to search. */
3137 ELEM (ctype, class_collection, , L' ') |= BITw (tok_space);
3139 seq = charmap_find_value (charmap, "form-feed", 9);
3140 if (seq == NULL)
3141 seq = charmap_find_value (charmap, "U0000000C", 9);
3142 if (seq == NULL)
3144 if (!be_quiet)
3145 WITH_CUR_LOCALE (error (0, 0, _("\
3146 %s: character `%s' not defined while needed as default value"),
3147 "LC_CTYPE", "<form-feed>"));
3149 else if (seq->nbytes != 1)
3150 WITH_CUR_LOCALE (error (0, 0, _("\
3151 %s: character `%s' in charmap not representable with one byte"),
3152 "LC_CTYPE", "<form-feed>"));
3153 else
3154 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3156 /* No need to search. */
3157 ELEM (ctype, class_collection, , L'\f') |= BITw (tok_space);
3160 seq = charmap_find_value (charmap, "newline", 7);
3161 if (seq == NULL)
3162 seq = charmap_find_value (charmap, "U0000000A", 9);
3163 if (seq == NULL)
3165 if (!be_quiet)
3166 WITH_CUR_LOCALE (error (0, 0, _("\
3167 character `%s' not defined while needed as default value"),
3168 "<newline>"));
3170 else if (seq->nbytes != 1)
3171 WITH_CUR_LOCALE (error (0, 0, _("\
3172 %s: character `%s' in charmap not representable with one byte"),
3173 "LC_CTYPE", "<newline>"));
3174 else
3175 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3177 /* No need to search. */
3178 ELEM (ctype, class_collection, , L'\n') |= BITw (tok_space);
3181 seq = charmap_find_value (charmap, "carriage-return", 15);
3182 if (seq == NULL)
3183 seq = charmap_find_value (charmap, "U0000000D", 9);
3184 if (seq == NULL)
3186 if (!be_quiet)
3187 WITH_CUR_LOCALE (error (0, 0, _("\
3188 %s: character `%s' not defined while needed as default value"),
3189 "LC_CTYPE", "<carriage-return>"));
3191 else if (seq->nbytes != 1)
3192 WITH_CUR_LOCALE (error (0, 0, _("\
3193 %s: character `%s' in charmap not representable with one byte"),
3194 "LC_CTYPE", "<carriage-return>"));
3195 else
3196 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3198 /* No need to search. */
3199 ELEM (ctype, class_collection, , L'\r') |= BITw (tok_space);
3202 seq = charmap_find_value (charmap, "tab", 3);
3203 if (seq == NULL)
3204 seq = charmap_find_value (charmap, "U00000009", 9);
3205 if (seq == NULL)
3207 if (!be_quiet)
3208 WITH_CUR_LOCALE (error (0, 0, _("\
3209 %s: character `%s' not defined while needed as default value"),
3210 "LC_CTYPE", "<tab>"));
3212 else if (seq->nbytes != 1)
3213 WITH_CUR_LOCALE (error (0, 0, _("\
3214 %s: character `%s' in charmap not representable with one byte"),
3215 "LC_CTYPE", "<tab>"));
3216 else
3217 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3219 /* No need to search. */
3220 ELEM (ctype, class_collection, , L'\t') |= BITw (tok_space);
3223 seq = charmap_find_value (charmap, "vertical-tab", 12);
3224 if (seq == NULL)
3225 seq = charmap_find_value (charmap, "U0000000B", 9);
3226 if (seq == NULL)
3228 if (!be_quiet)
3229 WITH_CUR_LOCALE (error (0, 0, _("\
3230 %s: character `%s' not defined while needed as default value"),
3231 "LC_CTYPE", "<vertical-tab>"));
3233 else if (seq->nbytes != 1)
3234 WITH_CUR_LOCALE (error (0, 0, _("\
3235 %s: character `%s' in charmap not representable with one byte"),
3236 "LC_CTYPE", "<vertical-tab>"));
3237 else
3238 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3240 /* No need to search. */
3241 ELEM (ctype, class_collection, , L'\v') |= BITw (tok_space);
3244 if ((ctype->class_done & BITw (tok_xdigit)) == 0)
3245 /* "If this keyword is not specified, the digits `0' to `9', the
3246 uppercase letters `A' through `F', and the lowercase letters `a'
3247 through `f', ..., shell automatically belong to this class, with
3248 implementation defined character values." [P1003.2, 2.5.2.1] */
3250 set_default (BITPOS (tok_xdigit), '0', '9');
3251 set_default (BITPOS (tok_xdigit), 'A', 'F');
3252 set_default (BITPOS (tok_xdigit), 'a', 'f');
3255 if ((ctype->class_done & BITw (tok_blank)) == 0)
3256 /* "If this keyword [blank] is unspecified, the characters <space> and
3257 <tab> shall belong to this character class." [P1003.2, 2.5.2.1] */
3259 struct charseq *seq;
3261 seq = charmap_find_value (charmap, "space", 5);
3262 if (seq == NULL)
3263 seq = charmap_find_value (charmap, "SP", 2);
3264 if (seq == NULL)
3265 seq = charmap_find_value (charmap, "U00000020", 9);
3266 if (seq == NULL)
3268 if (!be_quiet)
3269 WITH_CUR_LOCALE (error (0, 0, _("\
3270 %s: character `%s' not defined while needed as default value"),
3271 "LC_CTYPE", "<space>"));
3273 else if (seq->nbytes != 1)
3274 WITH_CUR_LOCALE (error (0, 0, _("\
3275 %s: character `%s' in charmap not representable with one byte"),
3276 "LC_CTYPE", "<space>"));
3277 else
3278 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
3280 /* No need to search. */
3281 ELEM (ctype, class_collection, , L' ') |= BITw (tok_blank);
3284 seq = charmap_find_value (charmap, "tab", 3);
3285 if (seq == NULL)
3286 seq = charmap_find_value (charmap, "U00000009", 9);
3287 if (seq == NULL)
3289 if (!be_quiet)
3290 WITH_CUR_LOCALE (error (0, 0, _("\
3291 %s: character `%s' not defined while needed as default value"),
3292 "LC_CTYPE", "<tab>"));
3294 else if (seq->nbytes != 1)
3295 WITH_CUR_LOCALE (error (0, 0, _("\
3296 %s: character `%s' in charmap not representable with one byte"),
3297 "LC_CTYPE", "<tab>"));
3298 else
3299 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
3301 /* No need to search. */
3302 ELEM (ctype, class_collection, , L'\t') |= BITw (tok_blank);
3305 if ((ctype->class_done & BITw (tok_graph)) == 0)
3306 /* "If this keyword [graph] is not specified, characters specified for
3307 the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
3308 shall belong to this character class." [P1003.2, 2.5.2.1] */
3310 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
3311 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
3312 unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
3313 BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
3314 BITw (tok_punct);
3315 size_t cnt;
3317 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3318 if ((ctype->class_collection[cnt] & maskw) != 0)
3319 ctype->class_collection[cnt] |= BITw (tok_graph);
3321 for (cnt = 0; cnt < 256; ++cnt)
3322 if ((ctype->class256_collection[cnt] & mask) != 0)
3323 ctype->class256_collection[cnt] |= BIT (tok_graph);
3326 if ((ctype->class_done & BITw (tok_print)) == 0)
3327 /* "If this keyword [print] is not provided, characters specified for
3328 the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
3329 and the <space> character shall belong to this character class."
3330 [P1003.2, 2.5.2.1] */
3332 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
3333 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
3334 unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
3335 BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
3336 BITw (tok_punct);
3337 size_t cnt;
3338 struct charseq *seq;
3340 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3341 if ((ctype->class_collection[cnt] & maskw) != 0)
3342 ctype->class_collection[cnt] |= BITw (tok_print);
3344 for (cnt = 0; cnt < 256; ++cnt)
3345 if ((ctype->class256_collection[cnt] & mask) != 0)
3346 ctype->class256_collection[cnt] |= BIT (tok_print);
3349 seq = charmap_find_value (charmap, "space", 5);
3350 if (seq == NULL)
3351 seq = charmap_find_value (charmap, "SP", 2);
3352 if (seq == NULL)
3353 seq = charmap_find_value (charmap, "U00000020", 9);
3354 if (seq == NULL)
3356 if (!be_quiet)
3357 WITH_CUR_LOCALE (error (0, 0, _("\
3358 %s: character `%s' not defined while needed as default value"),
3359 "LC_CTYPE", "<space>"));
3361 else if (seq->nbytes != 1)
3362 WITH_CUR_LOCALE (error (0, 0, _("\
3363 %s: character `%s' in charmap not representable with one byte"),
3364 "LC_CTYPE", "<space>"));
3365 else
3366 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_print);
3368 /* No need to search. */
3369 ELEM (ctype, class_collection, , L' ') |= BITw (tok_print);
3372 if (ctype->tomap_done[0] == 0)
3373 /* "If this keyword [toupper] is not specified, the lowercase letters
3374 `a' through `z', and their corresponding uppercase letters `A' to
3375 `Z', ..., shall automatically be included, with implementation-
3376 defined character values." [P1003.2, 2.5.2.1] */
3378 char tmp[4];
3379 int ch;
3381 strcpy (tmp, "<?>");
3383 for (ch = 'a'; ch <= 'z'; ++ch)
3385 struct charseq *seq_from, *seq_to;
3387 tmp[1] = (char) ch;
3389 seq_from = charmap_find_value (charmap, &tmp[1], 1);
3390 if (seq_from == NULL)
3392 char buf[10];
3393 sprintf (buf, "U%08X", ch);
3394 seq_from = charmap_find_value (charmap, buf, 9);
3396 if (seq_from == NULL)
3398 if (!be_quiet)
3399 WITH_CUR_LOCALE (error (0, 0, _("\
3400 %s: character `%s' not defined while needed as default value"),
3401 "LC_CTYPE", tmp));
3403 else if (seq_from->nbytes != 1)
3405 if (!be_quiet)
3406 WITH_CUR_LOCALE (error (0, 0, _("\
3407 %s: character `%s' needed as default value not representable with one byte"),
3408 "LC_CTYPE", tmp));
3410 else
3412 /* This conversion is implementation defined. */
3413 tmp[1] = (char) (ch + ('A' - 'a'));
3414 seq_to = charmap_find_value (charmap, &tmp[1], 1);
3415 if (seq_to == NULL)
3417 char buf[10];
3418 sprintf (buf, "U%08X", ch + ('A' - 'a'));
3419 seq_to = charmap_find_value (charmap, buf, 9);
3421 if (seq_to == NULL)
3423 if (!be_quiet)
3424 WITH_CUR_LOCALE (error (0, 0, _("\
3425 %s: character `%s' not defined while needed as default value"),
3426 "LC_CTYPE", tmp));
3428 else if (seq_to->nbytes != 1)
3430 if (!be_quiet)
3431 WITH_CUR_LOCALE (error (0, 0, _("\
3432 %s: character `%s' needed as default value not representable with one byte"),
3433 "LC_CTYPE", tmp));
3435 else
3436 /* The index [0] is determined by the order of the
3437 `ctype_map_newP' calls in `ctype_startup'. */
3438 ctype->map256_collection[0][seq_from->bytes[0]]
3439 = seq_to->bytes[0];
3442 /* No need to search. */
3443 ELEM (ctype, map_collection, [0], ch) = ch + ('A' - 'a');
3447 if (ctype->tomap_done[1] == 0)
3448 /* "If this keyword [tolower] is not specified, the mapping shall be
3449 the reverse mapping of the one specified to `toupper'." [P1003.2] */
3451 for (cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
3452 if (ctype->map_collection[0][cnt] != 0)
3453 ELEM (ctype, map_collection, [1],
3454 ctype->map_collection[0][cnt])
3455 = ctype->charnames[cnt];
3457 for (cnt = 0; cnt < 256; ++cnt)
3458 if (ctype->map256_collection[0][cnt] != 0)
3459 ctype->map256_collection[1][ctype->map256_collection[0][cnt]] = cnt;
3462 if (ctype->outdigits_act != 10)
3464 if (ctype->outdigits_act != 0)
3465 WITH_CUR_LOCALE (error (0, 0, _("\
3466 %s: field `%s' does not contain exactly ten entries"),
3467 "LC_CTYPE", "outdigit"));
3469 for (cnt = ctype->outdigits_act; cnt < 10; ++cnt)
3471 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3472 digits + cnt, 1);
3474 if (ctype->mboutdigits[cnt] == NULL)
3475 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3476 longnames[cnt],
3477 strlen (longnames[cnt]));
3479 if (ctype->mboutdigits[cnt] == NULL)
3480 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3481 uninames[cnt], 9);
3483 if (ctype->mboutdigits[cnt] == NULL)
3485 /* Provide a replacement. */
3486 WITH_CUR_LOCALE (error (0, 0, _("\
3487 no output digits defined and none of the standard names in the charmap")));
3489 ctype->mboutdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
3490 sizeof (struct charseq)
3491 + 1);
3493 /* This is better than nothing. */
3494 ctype->mboutdigits[cnt]->bytes[0] = digits[cnt];
3495 ctype->mboutdigits[cnt]->nbytes = 1;
3498 ctype->wcoutdigits[cnt] = L'0' + cnt;
3501 ctype->outdigits_act = 10;
3506 /* Construction of sparse 3-level tables.
3507 See wchar-lookup.h for their structure and the meaning of p and q. */
3509 struct wctype_table
3511 /* Parameters. */
3512 unsigned int p;
3513 unsigned int q;
3514 /* Working representation. */
3515 size_t level1_alloc;
3516 size_t level1_size;
3517 uint32_t *level1;
3518 size_t level2_alloc;
3519 size_t level2_size;
3520 uint32_t *level2;
3521 size_t level3_alloc;
3522 size_t level3_size;
3523 uint32_t *level3;
3524 /* Compressed representation. */
3525 size_t result_size;
3526 char *result;
3529 /* Initialize. Assumes t->p and t->q have already been set. */
3530 static inline void
3531 wctype_table_init (struct wctype_table *t)
3533 t->level1 = NULL;
3534 t->level1_alloc = t->level1_size = 0;
3535 t->level2 = NULL;
3536 t->level2_alloc = t->level2_size = 0;
3537 t->level3 = NULL;
3538 t->level3_alloc = t->level3_size = 0;
3541 /* Retrieve an entry. */
3542 static inline int
3543 wctype_table_get (struct wctype_table *t, uint32_t wc)
3545 uint32_t index1 = wc >> (t->q + t->p + 5);
3546 if (index1 < t->level1_size)
3548 uint32_t lookup1 = t->level1[index1];
3549 if (lookup1 != EMPTY)
3551 uint32_t index2 = ((wc >> (t->p + 5)) & ((1 << t->q) - 1))
3552 + (lookup1 << t->q);
3553 uint32_t lookup2 = t->level2[index2];
3554 if (lookup2 != EMPTY)
3556 uint32_t index3 = ((wc >> 5) & ((1 << t->p) - 1))
3557 + (lookup2 << t->p);
3558 uint32_t lookup3 = t->level3[index3];
3559 uint32_t index4 = wc & 0x1f;
3561 return (lookup3 >> index4) & 1;
3565 return 0;
3568 /* Add one entry. */
3569 static void
3570 wctype_table_add (struct wctype_table *t, uint32_t wc)
3572 uint32_t index1 = wc >> (t->q + t->p + 5);
3573 uint32_t index2 = (wc >> (t->p + 5)) & ((1 << t->q) - 1);
3574 uint32_t index3 = (wc >> 5) & ((1 << t->p) - 1);
3575 uint32_t index4 = wc & 0x1f;
3576 size_t i, i1, i2;
3578 if (index1 >= t->level1_size)
3580 if (index1 >= t->level1_alloc)
3582 size_t alloc = 2 * t->level1_alloc;
3583 if (alloc <= index1)
3584 alloc = index1 + 1;
3585 t->level1 = (uint32_t *) xrealloc ((char *) t->level1,
3586 alloc * sizeof (uint32_t));
3587 t->level1_alloc = alloc;
3589 while (index1 >= t->level1_size)
3590 t->level1[t->level1_size++] = EMPTY;
3593 if (t->level1[index1] == EMPTY)
3595 if (t->level2_size == t->level2_alloc)
3597 size_t alloc = 2 * t->level2_alloc + 1;
3598 t->level2 = (uint32_t *) xrealloc ((char *) t->level2,
3599 (alloc << t->q) * sizeof (uint32_t));
3600 t->level2_alloc = alloc;
3602 i1 = t->level2_size << t->q;
3603 i2 = (t->level2_size + 1) << t->q;
3604 for (i = i1; i < i2; i++)
3605 t->level2[i] = EMPTY;
3606 t->level1[index1] = t->level2_size++;
3609 index2 += t->level1[index1] << t->q;
3611 if (t->level2[index2] == EMPTY)
3613 if (t->level3_size == t->level3_alloc)
3615 size_t alloc = 2 * t->level3_alloc + 1;
3616 t->level3 = (uint32_t *) xrealloc ((char *) t->level3,
3617 (alloc << t->p) * sizeof (uint32_t));
3618 t->level3_alloc = alloc;
3620 i1 = t->level3_size << t->p;
3621 i2 = (t->level3_size + 1) << t->p;
3622 for (i = i1; i < i2; i++)
3623 t->level3[i] = 0;
3624 t->level2[index2] = t->level3_size++;
3627 index3 += t->level2[index2] << t->p;
3629 t->level3[index3] |= (uint32_t)1 << index4;
3632 /* Finalize and shrink. */
3633 static void
3634 wctype_table_finalize (struct wctype_table *t)
3636 size_t i, j, k;
3637 uint32_t reorder3[t->level3_size];
3638 uint32_t reorder2[t->level2_size];
3639 uint32_t level1_offset, level2_offset, level3_offset;
3641 /* Uniquify level3 blocks. */
3642 k = 0;
3643 for (j = 0; j < t->level3_size; j++)
3645 for (i = 0; i < k; i++)
3646 if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p],
3647 (1 << t->p) * sizeof (uint32_t)) == 0)
3648 break;
3649 /* Relocate block j to block i. */
3650 reorder3[j] = i;
3651 if (i == k)
3653 if (i != j)
3654 memcpy (&t->level3[i << t->p], &t->level3[j << t->p],
3655 (1 << t->p) * sizeof (uint32_t));
3656 k++;
3659 t->level3_size = k;
3661 for (i = 0; i < (t->level2_size << t->q); i++)
3662 if (t->level2[i] != EMPTY)
3663 t->level2[i] = reorder3[t->level2[i]];
3665 /* Uniquify level2 blocks. */
3666 k = 0;
3667 for (j = 0; j < t->level2_size; j++)
3669 for (i = 0; i < k; i++)
3670 if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q],
3671 (1 << t->q) * sizeof (uint32_t)) == 0)
3672 break;
3673 /* Relocate block j to block i. */
3674 reorder2[j] = i;
3675 if (i == k)
3677 if (i != j)
3678 memcpy (&t->level2[i << t->q], &t->level2[j << t->q],
3679 (1 << t->q) * sizeof (uint32_t));
3680 k++;
3683 t->level2_size = k;
3685 for (i = 0; i < t->level1_size; i++)
3686 if (t->level1[i] != EMPTY)
3687 t->level1[i] = reorder2[t->level1[i]];
3689 /* Create and fill the resulting compressed representation. */
3690 t->result_size =
3691 5 * sizeof (uint32_t)
3692 + t->level1_size * sizeof (uint32_t)
3693 + (t->level2_size << t->q) * sizeof (uint32_t)
3694 + (t->level3_size << t->p) * sizeof (uint32_t);
3695 t->result = (char *) xmalloc (t->result_size);
3697 level1_offset =
3698 5 * sizeof (uint32_t);
3699 level2_offset =
3700 5 * sizeof (uint32_t)
3701 + t->level1_size * sizeof (uint32_t);
3702 level3_offset =
3703 5 * sizeof (uint32_t)
3704 + t->level1_size * sizeof (uint32_t)
3705 + (t->level2_size << t->q) * sizeof (uint32_t);
3707 ((uint32_t *) t->result)[0] = t->q + t->p + 5;
3708 ((uint32_t *) t->result)[1] = t->level1_size;
3709 ((uint32_t *) t->result)[2] = t->p + 5;
3710 ((uint32_t *) t->result)[3] = (1 << t->q) - 1;
3711 ((uint32_t *) t->result)[4] = (1 << t->p) - 1;
3713 for (i = 0; i < t->level1_size; i++)
3714 ((uint32_t *) (t->result + level1_offset))[i] =
3715 (t->level1[i] == EMPTY
3717 : (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset);
3719 for (i = 0; i < (t->level2_size << t->q); i++)
3720 ((uint32_t *) (t->result + level2_offset))[i] =
3721 (t->level2[i] == EMPTY
3723 : (t->level2[i] << t->p) * sizeof (uint32_t) + level3_offset);
3725 for (i = 0; i < (t->level3_size << t->p); i++)
3726 ((uint32_t *) (t->result + level3_offset))[i] = t->level3[i];
3728 if (t->level1_alloc > 0)
3729 free (t->level1);
3730 if (t->level2_alloc > 0)
3731 free (t->level2);
3732 if (t->level3_alloc > 0)
3733 free (t->level3);
3736 #define TABLE wcwidth_table
3737 #define ELEMENT uint8_t
3738 #define DEFAULT 0xff
3739 #include "3level.h"
3741 #define TABLE wctrans_table
3742 #define ELEMENT int32_t
3743 #define DEFAULT 0
3744 #define wctrans_table_add wctrans_table_add_internal
3745 #include "3level.h"
3746 #undef wctrans_table_add
3747 /* The wctrans_table must actually store the difference between the
3748 desired result and the argument. */
3749 static inline void
3750 wctrans_table_add (struct wctrans_table *t, uint32_t wc, uint32_t mapped_wc)
3752 wctrans_table_add_internal (t, wc, mapped_wc - wc);
3756 /* Flattens the included transliterations into a translit list.
3757 Inserts them in the list at `cursor', and returns the new cursor. */
3758 static struct translit_t **
3759 translit_flatten (struct locale_ctype_t *ctype,
3760 const struct charmap_t *charmap,
3761 struct translit_t **cursor)
3763 while (ctype->translit_include != NULL)
3765 const char *copy_locale = ctype->translit_include->copy_locale;
3766 const char *copy_repertoire = ctype->translit_include->copy_repertoire;
3767 struct localedef_t *other;
3769 /* Unchain the include statement. During the depth-first traversal
3770 we don't want to visit any locale more than once. */
3771 ctype->translit_include = ctype->translit_include->next;
3773 other = find_locale (LC_CTYPE, copy_locale, copy_repertoire, charmap);
3775 if (other == NULL || other->categories[LC_CTYPE].ctype == NULL)
3777 WITH_CUR_LOCALE (error (0, 0, _("\
3778 %s: transliteration data from locale `%s' not available"),
3779 "LC_CTYPE", copy_locale));
3781 else
3783 struct locale_ctype_t *other_ctype =
3784 other->categories[LC_CTYPE].ctype;
3786 cursor = translit_flatten (other_ctype, charmap, cursor);
3787 assert (other_ctype->translit_include == NULL);
3789 if (other_ctype->translit != NULL)
3791 /* Insert the other_ctype->translit list at *cursor. */
3792 struct translit_t *endp = other_ctype->translit;
3793 while (endp->next != NULL)
3794 endp = endp->next;
3796 endp->next = *cursor;
3797 *cursor = other_ctype->translit;
3799 /* Avoid any risk of circular lists. */
3800 other_ctype->translit = NULL;
3802 cursor = &endp->next;
3805 if (ctype->default_missing == NULL)
3806 ctype->default_missing = other_ctype->default_missing;
3810 return cursor;
3813 static void
3814 allocate_arrays (struct locale_ctype_t *ctype, const struct charmap_t *charmap,
3815 struct repertoire_t *repertoire)
3817 size_t idx, nr;
3818 const void *key;
3819 size_t len;
3820 void *vdata;
3821 void *curs;
3823 /* You wonder about this amount of memory? This is only because some
3824 users do not manage to address the array with unsigned values or
3825 data types with range >= 256. '\200' would result in the array
3826 index -128. To help these poor people we duplicate the entries for
3827 128 up to 255 below the entry for \0. */
3828 ctype->ctype_b = (char_class_t *) xcalloc (256 + 128, sizeof (char_class_t));
3829 ctype->ctype32_b = (char_class32_t *) xcalloc (256, sizeof (char_class32_t));
3830 ctype->class_b = (uint32_t **)
3831 xmalloc (ctype->nr_charclass * sizeof (uint32_t *));
3832 ctype->class_3level = (struct iovec *)
3833 xmalloc (ctype->nr_charclass * sizeof (struct iovec));
3835 /* This is the array accessed using the multibyte string elements. */
3836 for (idx = 0; idx < 256; ++idx)
3837 ctype->ctype_b[128 + idx] = ctype->class256_collection[idx];
3839 /* Mirror first 127 entries. We must take care that entry -1 is not
3840 mirrored because EOF == -1. */
3841 for (idx = 0; idx < 127; ++idx)
3842 ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
3844 /* The 32 bit array contains all characters < 0x100. */
3845 for (idx = 0; idx < ctype->class_collection_act; ++idx)
3846 if (ctype->charnames[idx] < 0x100)
3847 ctype->ctype32_b[ctype->charnames[idx]] = ctype->class_collection[idx];
3849 for (nr = 0; nr < ctype->nr_charclass; nr++)
3851 ctype->class_b[nr] = (uint32_t *) xcalloc (256 / 32, sizeof (uint32_t));
3853 /* We only set CLASS_B for the bits in the ISO C classes, not
3854 the user defined classes. The number should not change but
3855 who knows. */
3856 #define LAST_ISO_C_BIT 11
3857 if (nr <= LAST_ISO_C_BIT)
3858 for (idx = 0; idx < 256; ++idx)
3859 if (ctype->class256_collection[idx] & _ISbit (nr))
3860 ctype->class_b[nr][idx >> 5] |= (uint32_t) 1 << (idx & 0x1f);
3863 for (nr = 0; nr < ctype->nr_charclass; nr++)
3865 struct wctype_table t;
3867 t.p = 4; /* or: 5 */
3868 t.q = 7; /* or: 6 */
3869 wctype_table_init (&t);
3871 for (idx = 0; idx < ctype->class_collection_act; ++idx)
3872 if (ctype->class_collection[idx] & _ISwbit (nr))
3873 wctype_table_add (&t, ctype->charnames[idx]);
3875 wctype_table_finalize (&t);
3877 if (verbose)
3878 WITH_CUR_LOCALE (fprintf (stderr, _("\
3879 %s: table for class \"%s\": %lu bytes\n"),
3880 "LC_CTYPE", ctype->classnames[nr],
3881 (unsigned long int) t.result_size));
3883 ctype->class_3level[nr].iov_base = t.result;
3884 ctype->class_3level[nr].iov_len = t.result_size;
3887 /* Room for table of mappings. */
3888 ctype->map_b = (uint32_t **) xmalloc (2 * sizeof (uint32_t *));
3889 ctype->map32_b = (uint32_t **) xmalloc (ctype->map_collection_nr
3890 * sizeof (uint32_t *));
3891 ctype->map_3level = (struct iovec *)
3892 xmalloc (ctype->map_collection_nr * sizeof (struct iovec));
3894 /* Fill in all mappings. */
3895 for (idx = 0; idx < 2; ++idx)
3897 unsigned int idx2;
3899 /* Allocate table. */
3900 ctype->map_b[idx] = (uint32_t *)
3901 xmalloc ((256 + 128) * sizeof (uint32_t));
3903 /* Copy values from collection. */
3904 for (idx2 = 0; idx2 < 256; ++idx2)
3905 ctype->map_b[idx][128 + idx2] = ctype->map256_collection[idx][idx2];
3907 /* Mirror first 127 entries. We must take care not to map entry
3908 -1 because EOF == -1. */
3909 for (idx2 = 0; idx2 < 127; ++idx2)
3910 ctype->map_b[idx][idx2] = ctype->map_b[idx][256 + idx2];
3912 /* EOF must map to EOF. */
3913 ctype->map_b[idx][127] = EOF;
3916 for (idx = 0; idx < ctype->map_collection_nr; ++idx)
3918 unsigned int idx2;
3920 /* Allocate table. */
3921 ctype->map32_b[idx] = (uint32_t *) xmalloc (256 * sizeof (uint32_t));
3923 /* Copy values from collection. Default is identity mapping. */
3924 for (idx2 = 0; idx2 < 256; ++idx2)
3925 ctype->map32_b[idx][idx2] =
3926 (ctype->map_collection[idx][idx2] != 0
3927 ? ctype->map_collection[idx][idx2]
3928 : idx2);
3931 for (nr = 0; nr < ctype->map_collection_nr; nr++)
3933 struct wctrans_table t;
3935 t.p = 7;
3936 t.q = 9;
3937 wctrans_table_init (&t);
3939 for (idx = 0; idx < ctype->map_collection_act[nr]; ++idx)
3940 if (ctype->map_collection[nr][idx] != 0)
3941 wctrans_table_add (&t, ctype->charnames[idx],
3942 ctype->map_collection[nr][idx]);
3944 wctrans_table_finalize (&t);
3946 if (verbose)
3947 WITH_CUR_LOCALE (fprintf (stderr, _("\
3948 %s: table for map \"%s\": %lu bytes\n"),
3949 "LC_CTYPE", ctype->mapnames[nr],
3950 (unsigned long int) t.result_size));
3952 ctype->map_3level[nr].iov_base = t.result;
3953 ctype->map_3level[nr].iov_len = t.result_size;
3956 /* Extra array for class and map names. */
3957 ctype->class_name_ptr = (uint32_t *) xmalloc (ctype->nr_charclass
3958 * sizeof (uint32_t));
3959 ctype->map_name_ptr = (uint32_t *) xmalloc (ctype->map_collection_nr
3960 * sizeof (uint32_t));
3962 ctype->class_offset = _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
3963 ctype->map_offset = ctype->class_offset + ctype->nr_charclass;
3965 /* Array for width information. Because the expected widths are very
3966 small (never larger than 2) we use only one single byte. This
3967 saves space.
3968 We put only printable characters in the table. wcwidth is specified
3969 to return -1 for non-printable characters. Doing the check here
3970 saves a run-time check.
3971 But we put L'\0' in the table. This again saves a run-time check. */
3973 struct wcwidth_table t;
3975 t.p = 7;
3976 t.q = 9;
3977 wcwidth_table_init (&t);
3979 /* First set all the printable characters of the character set to
3980 the default width. */
3981 curs = NULL;
3982 while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
3984 struct charseq *data = (struct charseq *) vdata;
3986 if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
3987 data->ucs4 = repertoire_find_value (ctype->repertoire,
3988 data->name, len);
3990 if (data->ucs4 != ILLEGAL_CHAR_VALUE)
3992 uint32_t *class_bits =
3993 find_idx (ctype, &ctype->class_collection, NULL,
3994 &ctype->class_collection_act, data->ucs4);
3996 if (class_bits != NULL && (*class_bits & BITw (tok_print)))
3997 wcwidth_table_add (&t, data->ucs4, charmap->width_default);
4001 /* Now add the explicitly specified widths. */
4002 if (charmap->width_rules != NULL)
4004 size_t cnt;
4006 for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
4008 unsigned char bytes[charmap->mb_cur_max];
4009 int nbytes = charmap->width_rules[cnt].from->nbytes;
4011 /* We have the range of character for which the width is
4012 specified described using byte sequences of the multibyte
4013 charset. We have to convert this to UCS4 now. And we
4014 cannot simply convert the beginning and the end of the
4015 sequence, we have to iterate over the byte sequence and
4016 convert it for every single character. */
4017 memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
4019 while (nbytes < charmap->width_rules[cnt].to->nbytes
4020 || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
4021 nbytes) <= 0)
4023 /* Find the UCS value for `bytes'. */
4024 int inner;
4025 uint32_t wch;
4026 struct charseq *seq =
4027 charmap_find_symbol (charmap, bytes, nbytes);
4029 if (seq == NULL)
4030 wch = ILLEGAL_CHAR_VALUE;
4031 else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
4032 wch = seq->ucs4;
4033 else
4034 wch = repertoire_find_value (ctype->repertoire, seq->name,
4035 strlen (seq->name));
4037 if (wch != ILLEGAL_CHAR_VALUE)
4039 /* Store the value. */
4040 uint32_t *class_bits =
4041 find_idx (ctype, &ctype->class_collection, NULL,
4042 &ctype->class_collection_act, wch);
4044 if (class_bits != NULL && (*class_bits & BITw (tok_print)))
4045 wcwidth_table_add (&t, wch,
4046 charmap->width_rules[cnt].width);
4049 /* "Increment" the bytes sequence. */
4050 inner = nbytes - 1;
4051 while (inner >= 0 && bytes[inner] == 0xff)
4052 --inner;
4054 if (inner < 0)
4056 /* We have to extend the byte sequence. */
4057 if (nbytes >= charmap->width_rules[cnt].to->nbytes)
4058 break;
4060 bytes[0] = 1;
4061 memset (&bytes[1], 0, nbytes);
4062 ++nbytes;
4064 else
4066 ++bytes[inner];
4067 while (++inner < nbytes)
4068 bytes[inner] = 0;
4074 /* Set the width of L'\0' to 0. */
4075 wcwidth_table_add (&t, 0, 0);
4077 wcwidth_table_finalize (&t);
4079 if (verbose)
4080 WITH_CUR_LOCALE (fprintf (stderr, _("%s: table for width: %lu bytes\n"),
4081 "LC_CTYPE", (unsigned long int) t.result_size));
4083 ctype->width.iov_base = t.result;
4084 ctype->width.iov_len = t.result_size;
4087 /* Set MB_CUR_MAX. */
4088 ctype->mb_cur_max = charmap->mb_cur_max;
4090 /* Now determine the table for the transliteration information.
4092 XXX It is not yet clear to me whether it is worth implementing a
4093 complicated algorithm which uses a hash table to locate the entries.
4094 For now I'll use a simple array which can be searching using binary
4095 search. */
4096 if (ctype->translit_include != NULL)
4097 /* Traverse the locales mentioned in the `include' statements in a
4098 depth-first way and fold in their transliteration information. */
4099 translit_flatten (ctype, charmap, &ctype->translit);
4101 if (ctype->translit != NULL)
4103 /* First count how many entries we have. This is the upper limit
4104 since some entries from the included files might be overwritten. */
4105 size_t number = 0;
4106 size_t cnt;
4107 struct translit_t *runp = ctype->translit;
4108 struct translit_t **sorted;
4109 size_t from_len, to_len;
4111 while (runp != NULL)
4113 ++number;
4114 runp = runp->next;
4117 /* Next we allocate an array large enough and fill in the values. */
4118 sorted = (struct translit_t **) alloca (number
4119 * sizeof (struct translit_t **));
4120 runp = ctype->translit;
4121 number = 0;
4124 /* Search for the place where to insert this string.
4125 XXX Better use a real sorting algorithm later. */
4126 size_t idx = 0;
4127 int replace = 0;
4129 while (idx < number)
4131 int res = wcscmp ((const wchar_t *) sorted[idx]->from,
4132 (const wchar_t *) runp->from);
4133 if (res == 0)
4135 replace = 1;
4136 break;
4138 if (res > 0)
4139 break;
4140 ++idx;
4143 if (replace)
4144 sorted[idx] = runp;
4145 else
4147 memmove (&sorted[idx + 1], &sorted[idx],
4148 (number - idx) * sizeof (struct translit_t *));
4149 sorted[idx] = runp;
4150 ++number;
4153 runp = runp->next;
4155 while (runp != NULL);
4157 /* The next step is putting all the possible transliteration
4158 strings in one memory block so that we can write it out.
4159 We need several different blocks:
4160 - index to the from-string array
4161 - from-string array
4162 - index to the to-string array
4163 - to-string array.
4165 from_len = to_len = 0;
4166 for (cnt = 0; cnt < number; ++cnt)
4168 struct translit_to_t *srunp;
4169 from_len += wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
4170 srunp = sorted[cnt]->to;
4171 while (srunp != NULL)
4173 to_len += wcslen ((const wchar_t *) srunp->str) + 1;
4174 srunp = srunp->next;
4176 /* Plus one for the extra NUL character marking the end of
4177 the list for the current entry. */
4178 ++to_len;
4181 /* We can allocate the arrays for the results. */
4182 ctype->translit_from_idx = xmalloc (number * sizeof (uint32_t));
4183 ctype->translit_from_tbl = xmalloc (from_len * sizeof (uint32_t));
4184 ctype->translit_to_idx = xmalloc (number * sizeof (uint32_t));
4185 ctype->translit_to_tbl = xmalloc (to_len * sizeof (uint32_t));
4187 from_len = 0;
4188 to_len = 0;
4189 for (cnt = 0; cnt < number; ++cnt)
4191 size_t len;
4192 struct translit_to_t *srunp;
4194 ctype->translit_from_idx[cnt] = from_len;
4195 ctype->translit_to_idx[cnt] = to_len;
4197 len = wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
4198 wmemcpy ((wchar_t *) &ctype->translit_from_tbl[from_len],
4199 (const wchar_t *) sorted[cnt]->from, len);
4200 from_len += len;
4202 ctype->translit_to_idx[cnt] = to_len;
4203 srunp = sorted[cnt]->to;
4204 while (srunp != NULL)
4206 len = wcslen ((const wchar_t *) srunp->str) + 1;
4207 wmemcpy ((wchar_t *) &ctype->translit_to_tbl[to_len],
4208 (const wchar_t *) srunp->str, len);
4209 to_len += len;
4210 srunp = srunp->next;
4212 ctype->translit_to_tbl[to_len++] = L'\0';
4215 /* Store the information about the length. */
4216 ctype->translit_idx_size = number;
4217 ctype->translit_from_tbl_size = from_len * sizeof (uint32_t);
4218 ctype->translit_to_tbl_size = to_len * sizeof (uint32_t);
4220 else
4222 /* Provide some dummy pointers since we have nothing to write out. */
4223 static uint32_t no_str = { 0 };
4225 ctype->translit_from_idx = &no_str;
4226 ctype->translit_from_tbl = &no_str;
4227 ctype->translit_to_tbl = &no_str;
4228 ctype->translit_idx_size = 0;
4229 ctype->translit_from_tbl_size = 0;
4230 ctype->translit_to_tbl_size = 0;