Replace FSF snail mail address with URLs.
[glibc.git] / locale / programs / ld-ctype.c
blobc4790d8b14c18335e3583a28f4e7b444e09d5950
1 /* Copyright (C) 1995-2006,2007,2009,2011 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, see <http://www.gnu.org/licenses/>. */
18 #ifdef HAVE_CONFIG_H
19 # include <config.h>
20 #endif
22 #include <alloca.h>
23 #include <byteswap.h>
24 #include <endian.h>
25 #include <errno.h>
26 #include <limits.h>
27 #include <obstack.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <wchar.h>
31 #include <wctype.h>
32 #include <sys/uio.h>
34 #include "localedef.h"
35 #include "charmap.h"
36 #include "localeinfo.h"
37 #include "langinfo.h"
38 #include "linereader.h"
39 #include "locfile-token.h"
40 #include "locfile.h"
42 #include <assert.h>
45 #ifdef PREDEFINED_CLASSES
46 /* These are the extra bits not in wctype.h since these are not preallocated
47 classes. */
48 # define _ISwspecial1 (1 << 29)
49 # define _ISwspecial2 (1 << 30)
50 # define _ISwspecial3 (1 << 31)
51 #endif
54 /* The bit used for representing a special class. */
55 #define BITPOS(class) ((class) - tok_upper)
56 #define BIT(class) (_ISbit (BITPOS (class)))
57 #define BITw(class) (_ISwbit (BITPOS (class)))
59 #define ELEM(ctype, collection, idx, value) \
60 *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \
61 &ctype->collection##_act idx, value)
64 /* To be compatible with former implementations we for now restrict
65 the number of bits for character classes to 16. When compatibility
66 is not necessary anymore increase the number to 32. */
67 #define char_class_t uint16_t
68 #define char_class32_t uint32_t
71 /* Type to describe a transliteration action. We have a possibly
72 multiple character from-string and a set of multiple character
73 to-strings. All are 32bit values since this is what is used in
74 the gconv functions. */
75 struct translit_to_t
77 uint32_t *str;
79 struct translit_to_t *next;
82 struct translit_t
84 uint32_t *from;
86 const char *fname;
87 size_t lineno;
89 struct translit_to_t *to;
91 struct translit_t *next;
94 struct translit_ignore_t
96 uint32_t from;
97 uint32_t to;
98 uint32_t step;
100 const char *fname;
101 size_t lineno;
103 struct translit_ignore_t *next;
107 /* Type to describe a transliteration include statement. */
108 struct translit_include_t
110 const char *copy_locale;
111 const char *copy_repertoire;
113 struct translit_include_t *next;
117 /* Sparse table of uint32_t. */
118 #define TABLE idx_table
119 #define ELEMENT uint32_t
120 #define DEFAULT ((uint32_t) ~0)
121 #define NO_FINALIZE
122 #include "3level.h"
125 /* The real definition of the struct for the LC_CTYPE locale. */
126 struct locale_ctype_t
128 uint32_t *charnames;
129 size_t charnames_max;
130 size_t charnames_act;
131 /* An index lookup table, to speedup find_idx. */
132 struct idx_table charnames_idx;
134 struct repertoire_t *repertoire;
136 /* We will allow up to 8 * sizeof (uint32_t) character classes. */
137 #define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
138 size_t nr_charclass;
139 const char *classnames[MAX_NR_CHARCLASS];
140 uint32_t last_class_char;
141 uint32_t class256_collection[256];
142 uint32_t *class_collection;
143 size_t class_collection_max;
144 size_t class_collection_act;
145 uint32_t class_done;
146 uint32_t class_offset;
148 struct charseq **mbdigits;
149 size_t mbdigits_act;
150 size_t mbdigits_max;
151 uint32_t *wcdigits;
152 size_t wcdigits_act;
153 size_t wcdigits_max;
155 struct charseq *mboutdigits[10];
156 uint32_t wcoutdigits[10];
157 size_t outdigits_act;
159 /* If the following number ever turns out to be too small simply
160 increase it. But I doubt it will. --drepper@gnu */
161 #define MAX_NR_CHARMAP 16
162 const char *mapnames[MAX_NR_CHARMAP];
163 uint32_t *map_collection[MAX_NR_CHARMAP];
164 uint32_t map256_collection[2][256];
165 size_t map_collection_max[MAX_NR_CHARMAP];
166 size_t map_collection_act[MAX_NR_CHARMAP];
167 size_t map_collection_nr;
168 size_t last_map_idx;
169 int tomap_done[MAX_NR_CHARMAP];
170 uint32_t map_offset;
172 /* Transliteration information. */
173 struct translit_include_t *translit_include;
174 struct translit_t *translit;
175 struct translit_ignore_t *translit_ignore;
176 uint32_t ntranslit_ignore;
178 uint32_t *default_missing;
179 const char *default_missing_file;
180 size_t default_missing_lineno;
182 uint32_t to_nonascii;
183 uint32_t nonascii_case;
185 /* The arrays for the binary representation. */
186 char_class_t *ctype_b;
187 char_class32_t *ctype32_b;
188 uint32_t **map_b;
189 uint32_t **map32_b;
190 uint32_t **class_b;
191 struct iovec *class_3level;
192 struct iovec *map_3level;
193 uint32_t *class_name_ptr;
194 uint32_t *map_name_ptr;
195 struct iovec width;
196 uint32_t mb_cur_max;
197 const char *codeset_name;
198 uint32_t *translit_from_idx;
199 uint32_t *translit_from_tbl;
200 uint32_t *translit_to_idx;
201 uint32_t *translit_to_tbl;
202 uint32_t translit_idx_size;
203 size_t translit_from_tbl_size;
204 size_t translit_to_tbl_size;
206 struct obstack mempool;
210 /* Marker for an empty slot. This has the value 0xFFFFFFFF, regardless
211 whether 'int' is 16 bit, 32 bit, or 64 bit. */
212 #define EMPTY ((uint32_t) ~0)
215 #define obstack_chunk_alloc xmalloc
216 #define obstack_chunk_free free
219 /* Prototypes for local functions. */
220 static void ctype_startup (struct linereader *lr, struct localedef_t *locale,
221 const struct charmap_t *charmap,
222 struct localedef_t *copy_locale,
223 int ignore_content);
224 static void ctype_class_new (struct linereader *lr,
225 struct locale_ctype_t *ctype, const char *name);
226 static void ctype_map_new (struct linereader *lr,
227 struct locale_ctype_t *ctype,
228 const char *name, const struct charmap_t *charmap);
229 static uint32_t *find_idx (struct locale_ctype_t *ctype, uint32_t **table,
230 size_t *max, size_t *act, unsigned int idx);
231 static void set_class_defaults (struct locale_ctype_t *ctype,
232 const struct charmap_t *charmap,
233 struct repertoire_t *repertoire);
234 static void allocate_arrays (struct locale_ctype_t *ctype,
235 const struct charmap_t *charmap,
236 struct repertoire_t *repertoire);
239 static const char *longnames[] =
241 "zero", "one", "two", "three", "four",
242 "five", "six", "seven", "eight", "nine"
244 static const char *uninames[] =
246 "U00000030", "U00000031", "U00000032", "U00000033", "U00000034",
247 "U00000035", "U00000036", "U00000037", "U00000038", "U00000039"
249 static const unsigned char digits[] = "0123456789";
252 static void
253 ctype_startup (struct linereader *lr, struct localedef_t *locale,
254 const struct charmap_t *charmap,
255 struct localedef_t *copy_locale, int ignore_content)
257 unsigned int cnt;
258 struct locale_ctype_t *ctype;
260 if (!ignore_content && locale->categories[LC_CTYPE].ctype == NULL)
262 if (copy_locale == NULL)
264 /* Allocate the needed room. */
265 locale->categories[LC_CTYPE].ctype = ctype =
266 (struct locale_ctype_t *) xcalloc (1,
267 sizeof (struct locale_ctype_t));
269 /* We have seen no names yet. */
270 ctype->charnames_max = charmap->mb_cur_max == 1 ? 256 : 512;
271 ctype->charnames =
272 (unsigned int *) xmalloc (ctype->charnames_max
273 * sizeof (unsigned int));
274 for (cnt = 0; cnt < 256; ++cnt)
275 ctype->charnames[cnt] = cnt;
276 ctype->charnames_act = 256;
277 idx_table_init (&ctype->charnames_idx);
279 /* Fill character class information. */
280 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
281 /* The order of the following instructions determines the bit
282 positions! */
283 ctype_class_new (lr, ctype, "upper");
284 ctype_class_new (lr, ctype, "lower");
285 ctype_class_new (lr, ctype, "alpha");
286 ctype_class_new (lr, ctype, "digit");
287 ctype_class_new (lr, ctype, "xdigit");
288 ctype_class_new (lr, ctype, "space");
289 ctype_class_new (lr, ctype, "print");
290 ctype_class_new (lr, ctype, "graph");
291 ctype_class_new (lr, ctype, "blank");
292 ctype_class_new (lr, ctype, "cntrl");
293 ctype_class_new (lr, ctype, "punct");
294 ctype_class_new (lr, ctype, "alnum");
295 #ifdef PREDEFINED_CLASSES
296 /* The following are extensions from ISO 14652. */
297 ctype_class_new (lr, ctype, "left_to_right");
298 ctype_class_new (lr, ctype, "right_to_left");
299 ctype_class_new (lr, ctype, "num_terminator");
300 ctype_class_new (lr, ctype, "num_separator");
301 ctype_class_new (lr, ctype, "segment_separator");
302 ctype_class_new (lr, ctype, "block_separator");
303 ctype_class_new (lr, ctype, "direction_control");
304 ctype_class_new (lr, ctype, "sym_swap_layout");
305 ctype_class_new (lr, ctype, "char_shape_selector");
306 ctype_class_new (lr, ctype, "num_shape_selector");
307 ctype_class_new (lr, ctype, "non_spacing");
308 ctype_class_new (lr, ctype, "non_spacing_level3");
309 ctype_class_new (lr, ctype, "normal_connect");
310 ctype_class_new (lr, ctype, "r_connect");
311 ctype_class_new (lr, ctype, "no_connect");
312 ctype_class_new (lr, ctype, "no_connect-space");
313 ctype_class_new (lr, ctype, "vowel_connect");
314 #endif
316 ctype->class_collection_max = charmap->mb_cur_max == 1 ? 256 : 512;
317 ctype->class_collection
318 = (uint32_t *) xcalloc (sizeof (unsigned long int),
319 ctype->class_collection_max);
320 ctype->class_collection_act = 256;
322 /* Fill character map information. */
323 ctype->last_map_idx = MAX_NR_CHARMAP;
324 ctype_map_new (lr, ctype, "toupper", charmap);
325 ctype_map_new (lr, ctype, "tolower", charmap);
326 #ifdef PREDEFINED_CLASSES
327 ctype_map_new (lr, ctype, "tosymmetric", charmap);
328 #endif
330 /* Fill first 256 entries in `toXXX' arrays. */
331 for (cnt = 0; cnt < 256; ++cnt)
333 ctype->map_collection[0][cnt] = cnt;
334 ctype->map_collection[1][cnt] = cnt;
335 #ifdef PREDEFINED_CLASSES
336 ctype->map_collection[2][cnt] = cnt;
337 #endif
338 ctype->map256_collection[0][cnt] = cnt;
339 ctype->map256_collection[1][cnt] = cnt;
342 if (enc_not_ascii_compatible)
343 ctype->to_nonascii = 1;
345 obstack_init (&ctype->mempool);
347 else
348 ctype = locale->categories[LC_CTYPE].ctype =
349 copy_locale->categories[LC_CTYPE].ctype;
354 void
355 ctype_finish (struct localedef_t *locale, const struct charmap_t *charmap)
357 /* See POSIX.2, table 2-6 for the meaning of the following table. */
358 #define NCLASS 12
359 static const struct
361 const char *name;
362 const char allow[NCLASS];
364 valid_table[NCLASS] =
366 /* The order is important. See token.h for more information.
367 M = Always, D = Default, - = Permitted, X = Mutually exclusive */
368 { "upper", "--MX-XDDXXX-" },
369 { "lower", "--MX-XDDXXX-" },
370 { "alpha", "---X-XDDXXX-" },
371 { "digit", "XXX--XDDXXX-" },
372 { "xdigit", "-----XDDXXX-" },
373 { "space", "XXXXX------X" },
374 { "print", "---------X--" },
375 { "graph", "---------X--" },
376 { "blank", "XXXXXM-----X" },
377 { "cntrl", "XXXXX-XX--XX" },
378 { "punct", "XXXXX-DD-X-X" },
379 { "alnum", "-----XDDXXX-" }
381 size_t cnt;
382 int cls1, cls2;
383 uint32_t space_value;
384 struct charseq *space_seq;
385 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
386 int warned;
387 const void *key;
388 size_t len;
389 void *vdata;
390 void *curs;
392 /* Now resolve copying and also handle completely missing definitions. */
393 if (ctype == NULL)
395 const char *repertoire_name;
397 /* First see whether we were supposed to copy. If yes, find the
398 actual definition. */
399 if (locale->copy_name[LC_CTYPE] != NULL)
401 /* Find the copying locale. This has to happen transitively since
402 the locale we are copying from might also copying another one. */
403 struct localedef_t *from = locale;
406 from = find_locale (LC_CTYPE, from->copy_name[LC_CTYPE],
407 from->repertoire_name, charmap);
408 while (from->categories[LC_CTYPE].ctype == NULL
409 && from->copy_name[LC_CTYPE] != NULL);
411 ctype = locale->categories[LC_CTYPE].ctype
412 = from->categories[LC_CTYPE].ctype;
415 /* If there is still no definition issue an warning and create an
416 empty one. */
417 if (ctype == NULL)
419 if (! be_quiet)
420 WITH_CUR_LOCALE (error (0, 0, _("\
421 No definition for %s category found"), "LC_CTYPE"));
422 ctype_startup (NULL, locale, charmap, NULL, 0);
423 ctype = locale->categories[LC_CTYPE].ctype;
426 /* Get the repertoire we have to use. */
427 repertoire_name = locale->repertoire_name ?: repertoire_global;
428 if (repertoire_name != NULL)
429 ctype->repertoire = repertoire_read (repertoire_name);
432 /* We need the name of the currently used 8-bit character set to
433 make correct conversion between this 8-bit representation and the
434 ISO 10646 character set used internally for wide characters. */
435 ctype->codeset_name = charmap->code_set_name;
436 if (ctype->codeset_name == NULL)
438 if (! be_quiet)
439 WITH_CUR_LOCALE (error (0, 0, _("\
440 No character set name specified in charmap")));
441 ctype->codeset_name = "//UNKNOWN//";
444 /* Set default value for classes not specified. */
445 set_class_defaults (ctype, charmap, ctype->repertoire);
447 /* Check according to table. */
448 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
450 uint32_t tmp = ctype->class_collection[cnt];
452 if (tmp != 0)
454 for (cls1 = 0; cls1 < NCLASS; ++cls1)
455 if ((tmp & _ISwbit (cls1)) != 0)
456 for (cls2 = 0; cls2 < NCLASS; ++cls2)
457 if (valid_table[cls1].allow[cls2] != '-')
459 int eq = (tmp & _ISwbit (cls2)) != 0;
460 switch (valid_table[cls1].allow[cls2])
462 case 'M':
463 if (!eq)
465 uint32_t value = ctype->charnames[cnt];
467 if (!be_quiet)
468 WITH_CUR_LOCALE (error (0, 0, _("\
469 character L'\\u%0*x' in class `%s' must be in class `%s'"),
470 value > 0xffff ? 8 : 4,
471 value,
472 valid_table[cls1].name,
473 valid_table[cls2].name));
475 break;
477 case 'X':
478 if (eq)
480 uint32_t value = ctype->charnames[cnt];
482 if (!be_quiet)
483 WITH_CUR_LOCALE (error (0, 0, _("\
484 character L'\\u%0*x' in class `%s' must not be in class `%s'"),
485 value > 0xffff ? 8 : 4,
486 value,
487 valid_table[cls1].name,
488 valid_table[cls2].name));
490 break;
492 case 'D':
493 ctype->class_collection[cnt] |= _ISwbit (cls2);
494 break;
496 default:
497 WITH_CUR_LOCALE (error (5, 0, _("\
498 internal error in %s, line %u"), __FUNCTION__, __LINE__));
504 for (cnt = 0; cnt < 256; ++cnt)
506 uint32_t tmp = ctype->class256_collection[cnt];
508 if (tmp != 0)
510 for (cls1 = 0; cls1 < NCLASS; ++cls1)
511 if ((tmp & _ISbit (cls1)) != 0)
512 for (cls2 = 0; cls2 < NCLASS; ++cls2)
513 if (valid_table[cls1].allow[cls2] != '-')
515 int eq = (tmp & _ISbit (cls2)) != 0;
516 switch (valid_table[cls1].allow[cls2])
518 case 'M':
519 if (!eq)
521 char buf[17];
523 snprintf (buf, sizeof buf, "\\%Zo", cnt);
525 if (!be_quiet)
526 WITH_CUR_LOCALE (error (0, 0, _("\
527 character '%s' in class `%s' must be in class `%s'"),
528 buf,
529 valid_table[cls1].name,
530 valid_table[cls2].name));
532 break;
534 case 'X':
535 if (eq)
537 char buf[17];
539 snprintf (buf, sizeof buf, "\\%Zo", cnt);
541 if (!be_quiet)
542 WITH_CUR_LOCALE (error (0, 0, _("\
543 character '%s' in class `%s' must not be in class `%s'"),
544 buf,
545 valid_table[cls1].name,
546 valid_table[cls2].name));
548 break;
550 case 'D':
551 ctype->class256_collection[cnt] |= _ISbit (cls2);
552 break;
554 default:
555 WITH_CUR_LOCALE (error (5, 0, _("\
556 internal error in %s, line %u"), __FUNCTION__, __LINE__));
562 /* ... and now test <SP> as a special case. */
563 space_value = 32;
564 if (((cnt = BITPOS (tok_space),
565 (ELEM (ctype, class_collection, , space_value)
566 & BITw (tok_space)) == 0)
567 || (cnt = BITPOS (tok_blank),
568 (ELEM (ctype, class_collection, , space_value)
569 & BITw (tok_blank)) == 0)))
571 if (!be_quiet)
572 WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"),
573 valid_table[cnt].name));
575 else if (((cnt = BITPOS (tok_punct),
576 (ELEM (ctype, class_collection, , space_value)
577 & BITw (tok_punct)) != 0)
578 || (cnt = BITPOS (tok_graph),
579 (ELEM (ctype, class_collection, , space_value)
580 & BITw (tok_graph))
581 != 0)))
583 if (!be_quiet)
584 WITH_CUR_LOCALE (error (0, 0, _("\
585 <SP> character must not be in class `%s'"),
586 valid_table[cnt].name));
588 else
589 ELEM (ctype, class_collection, , space_value) |= BITw (tok_print);
591 space_seq = charmap_find_value (charmap, "SP", 2);
592 if (space_seq == NULL)
593 space_seq = charmap_find_value (charmap, "space", 5);
594 if (space_seq == NULL)
595 space_seq = charmap_find_value (charmap, "U00000020", 9);
596 if (space_seq == NULL || space_seq->nbytes != 1)
598 if (!be_quiet)
599 WITH_CUR_LOCALE (error (0, 0, _("\
600 character <SP> not defined in character map")));
602 else if (((cnt = BITPOS (tok_space),
603 (ctype->class256_collection[space_seq->bytes[0]]
604 & BIT (tok_space)) == 0)
605 || (cnt = BITPOS (tok_blank),
606 (ctype->class256_collection[space_seq->bytes[0]]
607 & BIT (tok_blank)) == 0)))
609 if (!be_quiet)
610 WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"),
611 valid_table[cnt].name));
613 else if (((cnt = BITPOS (tok_punct),
614 (ctype->class256_collection[space_seq->bytes[0]]
615 & BIT (tok_punct)) != 0)
616 || (cnt = BITPOS (tok_graph),
617 (ctype->class256_collection[space_seq->bytes[0]]
618 & BIT (tok_graph)) != 0)))
620 if (!be_quiet)
621 WITH_CUR_LOCALE (error (0, 0, _("\
622 <SP> character must not be in class `%s'"),
623 valid_table[cnt].name));
625 else
626 ctype->class256_collection[space_seq->bytes[0]] |= BIT (tok_print);
628 /* Check whether all single-byte characters make to their upper/lowercase
629 equivalent according to the ASCII rules. */
630 for (cnt = 'A'; cnt <= 'Z'; ++cnt)
632 uint32_t uppval = ctype->map256_collection[0][cnt];
633 uint32_t lowval = ctype->map256_collection[1][cnt];
634 uint32_t lowuppval = ctype->map256_collection[0][lowval];
635 uint32_t lowlowval = ctype->map256_collection[1][lowval];
637 if (uppval != cnt
638 || lowval != cnt + 0x20
639 || lowuppval != cnt
640 || lowlowval != cnt + 0x20)
641 ctype->nonascii_case = 1;
643 for (cnt = 0; cnt < 256; ++cnt)
644 if (cnt < 'A' || (cnt > 'Z' && cnt < 'a') || cnt > 'z')
645 if (ctype->map256_collection[0][cnt] != cnt
646 || ctype->map256_collection[1][cnt] != cnt)
647 ctype->nonascii_case = 1;
649 /* Now that the tests are done make sure the name array contains all
650 characters which are handled in the WIDTH section of the
651 character set definition file. */
652 if (charmap->width_rules != NULL)
653 for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
655 unsigned char bytes[charmap->mb_cur_max];
656 int nbytes = charmap->width_rules[cnt].from->nbytes;
658 /* We have the range of character for which the width is
659 specified described using byte sequences of the multibyte
660 charset. We have to convert this to UCS4 now. And we
661 cannot simply convert the beginning and the end of the
662 sequence, we have to iterate over the byte sequence and
663 convert it for every single character. */
664 memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
666 while (nbytes < charmap->width_rules[cnt].to->nbytes
667 || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
668 nbytes) <= 0)
670 /* Find the UCS value for `bytes'. */
671 int inner;
672 uint32_t wch;
673 struct charseq *seq
674 = charmap_find_symbol (charmap, (char *) bytes, nbytes);
676 if (seq == NULL)
677 wch = ILLEGAL_CHAR_VALUE;
678 else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
679 wch = seq->ucs4;
680 else
681 wch = repertoire_find_value (ctype->repertoire, seq->name,
682 strlen (seq->name));
684 if (wch != ILLEGAL_CHAR_VALUE)
685 /* We are only interested in the side-effects of the
686 `find_idx' call. It will add appropriate entries in
687 the name array if this is necessary. */
688 (void) find_idx (ctype, NULL, NULL, NULL, wch);
690 /* "Increment" the bytes sequence. */
691 inner = nbytes - 1;
692 while (inner >= 0 && bytes[inner] == 0xff)
693 --inner;
695 if (inner < 0)
697 /* We have to extend the byte sequence. */
698 if (nbytes >= charmap->width_rules[cnt].to->nbytes)
699 break;
701 bytes[0] = 1;
702 memset (&bytes[1], 0, nbytes);
703 ++nbytes;
705 else
707 ++bytes[inner];
708 while (++inner < nbytes)
709 bytes[inner] = 0;
714 /* Now set all the other characters of the character set to the
715 default width. */
716 curs = NULL;
717 while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
719 struct charseq *data = (struct charseq *) vdata;
721 if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
722 data->ucs4 = repertoire_find_value (ctype->repertoire,
723 data->name, len);
725 if (data->ucs4 != ILLEGAL_CHAR_VALUE)
726 (void) find_idx (ctype, NULL, NULL, NULL, data->ucs4);
729 /* There must be a multiple of 10 digits. */
730 if (ctype->mbdigits_act % 10 != 0)
732 assert (ctype->mbdigits_act == ctype->wcdigits_act);
733 ctype->wcdigits_act -= ctype->mbdigits_act % 10;
734 ctype->mbdigits_act -= ctype->mbdigits_act % 10;
735 WITH_CUR_LOCALE (error (0, 0, _("\
736 `digit' category has not entries in groups of ten")));
739 /* Check the input digits. There must be a multiple of ten available.
740 In each group it could be that one or the other character is missing.
741 In this case the whole group must be removed. */
742 cnt = 0;
743 while (cnt < ctype->mbdigits_act)
745 size_t inner;
746 for (inner = 0; inner < 10; ++inner)
747 if (ctype->mbdigits[cnt + inner] == NULL)
748 break;
750 if (inner == 10)
751 cnt += 10;
752 else
754 /* Remove the group. */
755 memmove (&ctype->mbdigits[cnt], &ctype->mbdigits[cnt + 10],
756 ((ctype->wcdigits_act - cnt - 10)
757 * sizeof (ctype->mbdigits[0])));
758 ctype->mbdigits_act -= 10;
762 /* If no input digits are given use the default. */
763 if (ctype->mbdigits_act == 0)
765 if (ctype->mbdigits_max == 0)
767 ctype->mbdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
768 10 * sizeof (struct charseq *));
769 ctype->mbdigits_max = 10;
772 for (cnt = 0; cnt < 10; ++cnt)
774 ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
775 (char *) digits + cnt, 1);
776 if (ctype->mbdigits[cnt] == NULL)
778 ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
779 longnames[cnt],
780 strlen (longnames[cnt]));
781 if (ctype->mbdigits[cnt] == NULL)
783 /* Hum, this ain't good. */
784 WITH_CUR_LOCALE (error (0, 0, _("\
785 no input digits defined and none of the standard names in the charmap")));
787 ctype->mbdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
788 sizeof (struct charseq) + 1);
790 /* This is better than nothing. */
791 ctype->mbdigits[cnt]->bytes[0] = digits[cnt];
792 ctype->mbdigits[cnt]->nbytes = 1;
797 ctype->mbdigits_act = 10;
800 /* Check the wide character input digits. There must be a multiple
801 of ten available. In each group it could be that one or the other
802 character is missing. In this case the whole group must be
803 removed. */
804 cnt = 0;
805 while (cnt < ctype->wcdigits_act)
807 size_t inner;
808 for (inner = 0; inner < 10; ++inner)
809 if (ctype->wcdigits[cnt + inner] == ILLEGAL_CHAR_VALUE)
810 break;
812 if (inner == 10)
813 cnt += 10;
814 else
816 /* Remove the group. */
817 memmove (&ctype->wcdigits[cnt], &ctype->wcdigits[cnt + 10],
818 ((ctype->wcdigits_act - cnt - 10)
819 * sizeof (ctype->wcdigits[0])));
820 ctype->wcdigits_act -= 10;
824 /* If no input digits are given use the default. */
825 if (ctype->wcdigits_act == 0)
827 if (ctype->wcdigits_max == 0)
829 ctype->wcdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
830 10 * sizeof (uint32_t));
831 ctype->wcdigits_max = 10;
834 for (cnt = 0; cnt < 10; ++cnt)
835 ctype->wcdigits[cnt] = L'0' + cnt;
837 ctype->mbdigits_act = 10;
840 /* Check the outdigits. */
841 warned = 0;
842 for (cnt = 0; cnt < 10; ++cnt)
843 if (ctype->mboutdigits[cnt] == NULL)
845 static struct charseq replace[2];
847 if (!warned)
849 WITH_CUR_LOCALE (error (0, 0, _("\
850 not all characters used in `outdigit' are available in the charmap")));
851 warned = 1;
854 replace[0].nbytes = 1;
855 replace[0].bytes[0] = '?';
856 replace[0].bytes[1] = '\0';
857 ctype->mboutdigits[cnt] = &replace[0];
860 warned = 0;
861 for (cnt = 0; cnt < 10; ++cnt)
862 if (ctype->wcoutdigits[cnt] == 0)
864 if (!warned)
866 WITH_CUR_LOCALE (error (0, 0, _("\
867 not all characters used in `outdigit' are available in the repertoire")));
868 warned = 1;
871 ctype->wcoutdigits[cnt] = L'?';
874 /* Sort the entries in the translit_ignore list. */
875 if (ctype->translit_ignore != NULL)
877 struct translit_ignore_t *firstp = ctype->translit_ignore;
878 struct translit_ignore_t *runp;
880 ctype->ntranslit_ignore = 1;
882 for (runp = firstp->next; runp != NULL; runp = runp->next)
884 struct translit_ignore_t *lastp = NULL;
885 struct translit_ignore_t *cmpp;
887 ++ctype->ntranslit_ignore;
889 for (cmpp = firstp; cmpp != NULL; lastp = cmpp, cmpp = cmpp->next)
890 if (runp->from < cmpp->from)
891 break;
893 runp->next = lastp;
894 if (lastp == NULL)
895 firstp = runp;
898 ctype->translit_ignore = firstp;
903 void
904 ctype_output (struct localedef_t *locale, const struct charmap_t *charmap,
905 const char *output_path)
907 static const char nulbytes[4] = { 0, 0, 0, 0 };
908 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
909 const size_t nelems = (_NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1)
910 + ctype->nr_charclass + ctype->map_collection_nr);
911 struct iovec *iov = alloca (sizeof *iov
912 * (2 + nelems + 2 * ctype->nr_charclass
913 + ctype->map_collection_nr + 4));
914 struct locale_file data;
915 uint32_t *idx = alloca (sizeof *idx * (nelems + 1));
916 uint32_t default_missing_len;
917 size_t elem, cnt, offset, total;
918 char *cp;
920 /* Now prepare the output: Find the sizes of the table we can use. */
921 allocate_arrays (ctype, charmap, ctype->repertoire);
923 data.magic = LIMAGIC (LC_CTYPE);
924 data.n = nelems;
925 iov[0].iov_base = (void *) &data;
926 iov[0].iov_len = sizeof (data);
928 iov[1].iov_base = (void *) idx;
929 iov[1].iov_len = nelems * sizeof (uint32_t);
931 idx[0] = iov[0].iov_len + iov[1].iov_len;
932 offset = 0;
934 for (elem = 0; elem < nelems; ++elem)
936 if (elem < _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1))
937 switch (elem)
939 #define CTYPE_EMPTY(name) \
940 case name: \
941 iov[2 + elem + offset].iov_base = NULL; \
942 iov[2 + elem + offset].iov_len = 0; \
943 idx[elem + 1] = idx[elem]; \
944 break
946 CTYPE_EMPTY(_NL_CTYPE_GAP1);
947 CTYPE_EMPTY(_NL_CTYPE_GAP2);
948 CTYPE_EMPTY(_NL_CTYPE_GAP3);
949 CTYPE_EMPTY(_NL_CTYPE_GAP4);
950 CTYPE_EMPTY(_NL_CTYPE_GAP5);
951 CTYPE_EMPTY(_NL_CTYPE_GAP6);
953 #define CTYPE_DATA(name, base, len) \
954 case _NL_ITEM_INDEX (name): \
955 iov[2 + elem + offset].iov_base = (base); \
956 iov[2 + elem + offset].iov_len = (len); \
957 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; \
958 break
960 CTYPE_DATA (_NL_CTYPE_CLASS,
961 ctype->ctype_b,
962 (256 + 128) * sizeof (char_class_t));
964 CTYPE_DATA (_NL_CTYPE_TOUPPER,
965 ctype->map_b[0],
966 (256 + 128) * sizeof (uint32_t));
967 CTYPE_DATA (_NL_CTYPE_TOLOWER,
968 ctype->map_b[1],
969 (256 + 128) * sizeof (uint32_t));
971 CTYPE_DATA (_NL_CTYPE_TOUPPER32,
972 ctype->map32_b[0],
973 256 * sizeof (uint32_t));
974 CTYPE_DATA (_NL_CTYPE_TOLOWER32,
975 ctype->map32_b[1],
976 256 * sizeof (uint32_t));
978 CTYPE_DATA (_NL_CTYPE_CLASS32,
979 ctype->ctype32_b,
980 256 * sizeof (char_class32_t));
982 CTYPE_DATA (_NL_CTYPE_CLASS_OFFSET,
983 &ctype->class_offset, sizeof (uint32_t));
985 CTYPE_DATA (_NL_CTYPE_MAP_OFFSET,
986 &ctype->map_offset, sizeof (uint32_t));
988 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TAB_SIZE,
989 &ctype->translit_idx_size, sizeof (uint32_t));
991 CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_IDX,
992 ctype->translit_from_idx,
993 ctype->translit_idx_size * sizeof (uint32_t));
995 CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_TBL,
996 ctype->translit_from_tbl,
997 ctype->translit_from_tbl_size);
999 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_IDX,
1000 ctype->translit_to_idx,
1001 ctype->translit_idx_size * sizeof (uint32_t));
1003 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_TBL,
1004 ctype->translit_to_tbl, ctype->translit_to_tbl_size);
1006 case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
1007 /* The class name array. */
1008 total = 0;
1009 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt, ++offset)
1011 iov[2 + elem + offset].iov_base
1012 = (void *) ctype->classnames[cnt];
1013 iov[2 + elem + offset].iov_len
1014 = strlen (ctype->classnames[cnt]) + 1;
1015 total += iov[2 + elem + offset].iov_len;
1017 iov[2 + elem + offset].iov_base = (void *) nulbytes;
1018 iov[2 + elem + offset].iov_len = 4 - (total % 4);
1019 total += 4 - (total % 4);
1021 idx[elem + 1] = idx[elem] + total;
1022 break;
1024 case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
1025 /* The class name array. */
1026 total = 0;
1027 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt, ++offset)
1029 iov[2 + elem + offset].iov_base
1030 = (void *) ctype->mapnames[cnt];
1031 iov[2 + elem + offset].iov_len
1032 = strlen (ctype->mapnames[cnt]) + 1;
1033 total += iov[2 + elem + offset].iov_len;
1035 iov[2 + elem + offset].iov_base = (void *) nulbytes;
1036 iov[2 + elem + offset].iov_len = 4 - (total % 4);
1037 total += 4 - (total % 4);
1039 idx[elem + 1] = idx[elem] + total;
1040 break;
1042 CTYPE_DATA (_NL_CTYPE_WIDTH,
1043 ctype->width.iov_base,
1044 ctype->width.iov_len);
1046 CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX,
1047 &ctype->mb_cur_max, sizeof (uint32_t));
1049 case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
1050 total = strlen (ctype->codeset_name) + 1;
1051 if (total % 4 == 0)
1052 iov[2 + elem + offset].iov_base = (char *) ctype->codeset_name;
1053 else
1055 iov[2 + elem + offset].iov_base = alloca ((total + 3) & ~3);
1056 memset (mempcpy (iov[2 + elem + offset].iov_base,
1057 ctype->codeset_name, total),
1058 '\0', 4 - (total & 3));
1059 total = (total + 3) & ~3;
1061 iov[2 + elem + offset].iov_len = total;
1062 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1063 break;
1066 CTYPE_DATA (_NL_CTYPE_MAP_TO_NONASCII,
1067 &ctype->to_nonascii, sizeof (uint32_t));
1069 CTYPE_DATA (_NL_CTYPE_NONASCII_CASE,
1070 &ctype->nonascii_case, sizeof (uint32_t));
1072 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN):
1073 iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
1074 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1075 *(uint32_t *) iov[2 + elem + offset].iov_base =
1076 ctype->mbdigits_act / 10;
1077 idx[elem + 1] = idx[elem] + sizeof (uint32_t);
1078 break;
1080 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN):
1081 /* Align entries. */
1082 iov[2 + elem + offset].iov_base = (void *) nulbytes;
1083 iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1084 idx[elem] += iov[2 + elem + offset].iov_len;
1085 ++offset;
1087 iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
1088 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1089 *(uint32_t *) iov[2 + elem + offset].iov_base =
1090 ctype->wcdigits_act / 10;
1091 idx[elem + 1] = idx[elem] + sizeof (uint32_t);
1092 break;
1094 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB):
1095 /* Compute the length of all possible characters. For INDIGITS
1096 there might be more than one. We simply concatenate all of
1097 them with a NUL byte following. The NUL byte wouldn't be
1098 necessary but it makes it easier for the user. */
1099 total = 0;
1101 for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
1102 cnt < ctype->mbdigits_act; cnt += 10)
1103 total += ctype->mbdigits[cnt]->nbytes + 1;
1104 iov[2 + elem + offset].iov_base = (char *) alloca (total);
1105 iov[2 + elem + offset].iov_len = total;
1107 cp = iov[2 + elem + offset].iov_base;
1108 for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
1109 cnt < ctype->mbdigits_act; cnt += 10)
1111 cp = mempcpy (cp, ctype->mbdigits[cnt]->bytes,
1112 ctype->mbdigits[cnt]->nbytes);
1113 *cp++ = '\0';
1115 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1116 break;
1118 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB):
1119 /* Compute the length of all possible characters. For INDIGITS
1120 there might be more than one. We simply concatenate all of
1121 them with a NUL byte following. The NUL byte wouldn't be
1122 necessary but it makes it easier for the user. */
1123 cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB);
1124 total = ctype->mboutdigits[cnt]->nbytes + 1;
1125 iov[2 + elem + offset].iov_base = (char *) alloca (total);
1126 iov[2 + elem + offset].iov_len = total;
1128 *(char *) mempcpy (iov[2 + elem + offset].iov_base,
1129 ctype->mboutdigits[cnt]->bytes,
1130 ctype->mboutdigits[cnt]->nbytes) = '\0';
1131 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1132 break;
1134 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC):
1135 total = ctype->wcdigits_act / 10;
1137 iov[2 + elem + offset].iov_base =
1138 (uint32_t *) alloca (total * sizeof (uint32_t));
1139 iov[2 + elem + offset].iov_len = total * sizeof (uint32_t);
1141 for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC);
1142 cnt < ctype->wcdigits_act; cnt += 10)
1143 ((uint32_t *) iov[2 + elem + offset].iov_base)[cnt / 10]
1144 = ctype->wcdigits[cnt];
1145 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1146 break;
1148 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC):
1149 /* Align entries. */
1150 iov[2 + elem + offset].iov_base = (void *) nulbytes;
1151 iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1152 idx[elem] += iov[2 + elem + offset].iov_len;
1153 ++offset;
1154 /* FALLTRHOUGH */
1156 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT1_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC):
1157 cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC);
1158 iov[2 + elem + offset].iov_base = &ctype->wcoutdigits[cnt];
1159 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1160 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1161 break;
1163 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN):
1164 /* Align entries. */
1165 iov[2 + elem + offset].iov_base = (void *) nulbytes;
1166 iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1167 idx[elem] += iov[2 + elem + offset].iov_len;
1168 ++offset;
1170 default_missing_len = (ctype->default_missing
1171 ? wcslen ((wchar_t *)ctype->default_missing)
1172 : 0);
1173 iov[2 + elem + offset].iov_base = &default_missing_len;
1174 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1175 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1176 break;
1178 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING):
1179 iov[2 + elem + offset].iov_base =
1180 ctype->default_missing ?: (uint32_t *) L"";
1181 iov[2 + elem + offset].iov_len =
1182 wcslen (iov[2 + elem + offset].iov_base) * sizeof (uint32_t);
1183 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1184 break;
1186 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE_LEN):
1187 /* Align entries. */
1188 iov[2 + elem + offset].iov_base = (void *) nulbytes;
1189 iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1190 idx[elem] += iov[2 + elem + offset].iov_len;
1191 ++offset;
1193 iov[2 + elem + offset].iov_base = &ctype->ntranslit_ignore;
1194 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1195 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1196 break;
1198 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE):
1200 uint32_t *ranges = (uint32_t *) alloca (ctype->ntranslit_ignore
1201 * 3 * sizeof (uint32_t));
1202 struct translit_ignore_t *runp;
1204 iov[2 + elem + offset].iov_base = ranges;
1205 iov[2 + elem + offset].iov_len = (ctype->ntranslit_ignore
1206 * 3 * sizeof (uint32_t));
1208 for (runp = ctype->translit_ignore; runp != NULL;
1209 runp = runp->next)
1211 *ranges++ = runp->from;
1212 *ranges++ = runp->to;
1213 *ranges++ = runp->step;
1216 /* Remove the following line in case a new entry is added
1217 after _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN. */
1218 if (elem < nelems)
1219 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1220 break;
1222 default:
1223 assert (! "unknown CTYPE element");
1225 else
1227 /* Handle extra maps. */
1228 size_t nr = elem - _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
1229 if (nr < ctype->nr_charclass)
1231 iov[2 + elem + offset].iov_base = ctype->class_b[nr];
1232 iov[2 + elem + offset].iov_len = 256 / 32 * sizeof (uint32_t);
1233 idx[elem] += iov[2 + elem + offset].iov_len;
1234 ++offset;
1236 iov[2 + elem + offset] = ctype->class_3level[nr];
1238 else
1240 nr -= ctype->nr_charclass;
1241 assert (nr < ctype->map_collection_nr);
1242 iov[2 + elem + offset] = ctype->map_3level[nr];
1244 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1248 assert (2 + elem + offset == (nelems + 2 * ctype->nr_charclass
1249 + ctype->map_collection_nr + 4 + 2));
1251 write_locale_data (output_path, LC_CTYPE, "LC_CTYPE", 2 + elem + offset,
1252 iov);
1256 /* Local functions. */
1257 static void
1258 ctype_class_new (struct linereader *lr, struct locale_ctype_t *ctype,
1259 const char *name)
1261 size_t cnt;
1263 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
1264 if (strcmp (ctype->classnames[cnt], name) == 0)
1265 break;
1267 if (cnt < ctype->nr_charclass)
1269 lr_error (lr, _("character class `%s' already defined"), name);
1270 return;
1273 if (ctype->nr_charclass == MAX_NR_CHARCLASS)
1274 /* Exit code 2 is prescribed in P1003.2b. */
1275 WITH_CUR_LOCALE (error (2, 0, _("\
1276 implementation limit: no more than %Zd character classes allowed"),
1277 MAX_NR_CHARCLASS));
1279 ctype->classnames[ctype->nr_charclass++] = name;
1283 static void
1284 ctype_map_new (struct linereader *lr, struct locale_ctype_t *ctype,
1285 const char *name, const struct charmap_t *charmap)
1287 size_t max_chars = 0;
1288 size_t cnt;
1290 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
1292 if (strcmp (ctype->mapnames[cnt], name) == 0)
1293 break;
1295 if (max_chars < ctype->map_collection_max[cnt])
1296 max_chars = ctype->map_collection_max[cnt];
1299 if (cnt < ctype->map_collection_nr)
1301 lr_error (lr, _("character map `%s' already defined"), name);
1302 return;
1305 if (ctype->map_collection_nr == MAX_NR_CHARMAP)
1306 /* Exit code 2 is prescribed in P1003.2b. */
1307 WITH_CUR_LOCALE (error (2, 0, _("\
1308 implementation limit: no more than %d character maps allowed"),
1309 MAX_NR_CHARMAP));
1311 ctype->mapnames[cnt] = name;
1313 if (max_chars == 0)
1314 ctype->map_collection_max[cnt] = charmap->mb_cur_max == 1 ? 256 : 512;
1315 else
1316 ctype->map_collection_max[cnt] = max_chars;
1318 ctype->map_collection[cnt] = (uint32_t *)
1319 xcalloc (sizeof (uint32_t), ctype->map_collection_max[cnt]);
1320 ctype->map_collection_act[cnt] = 256;
1322 ++ctype->map_collection_nr;
1326 /* We have to be prepared that TABLE, MAX, and ACT can be NULL. This
1327 is possible if we only want to extend the name array. */
1328 static uint32_t *
1329 find_idx (struct locale_ctype_t *ctype, uint32_t **table, size_t *max,
1330 size_t *act, uint32_t idx)
1332 size_t cnt;
1334 if (idx < 256)
1335 return table == NULL ? NULL : &(*table)[idx];
1337 /* Use the charnames_idx lookup table instead of the slow search loop. */
1338 #if 1
1339 cnt = idx_table_get (&ctype->charnames_idx, idx);
1340 if (cnt == EMPTY)
1341 /* Not found. */
1342 cnt = ctype->charnames_act;
1343 #else
1344 for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
1345 if (ctype->charnames[cnt] == idx)
1346 break;
1347 #endif
1349 /* We have to distinguish two cases: the name is found or not. */
1350 if (cnt == ctype->charnames_act)
1352 /* Extend the name array. */
1353 if (ctype->charnames_act == ctype->charnames_max)
1355 ctype->charnames_max *= 2;
1356 ctype->charnames = (uint32_t *)
1357 xrealloc (ctype->charnames,
1358 sizeof (uint32_t) * ctype->charnames_max);
1360 ctype->charnames[ctype->charnames_act++] = idx;
1361 idx_table_add (&ctype->charnames_idx, idx, cnt);
1364 if (table == NULL)
1365 /* We have done everything we are asked to do. */
1366 return NULL;
1368 if (max == NULL)
1369 /* The caller does not want to extend the table. */
1370 return (cnt >= *act ? NULL : &(*table)[cnt]);
1372 if (cnt >= *act)
1374 if (cnt >= *max)
1376 size_t old_max = *max;
1378 *max *= 2;
1379 while (*max <= cnt);
1381 *table =
1382 (uint32_t *) xrealloc (*table, *max * sizeof (uint32_t));
1383 memset (&(*table)[old_max], '\0',
1384 (*max - old_max) * sizeof (uint32_t));
1387 *act = cnt + 1;
1390 return &(*table)[cnt];
1394 static int
1395 get_character (struct token *now, const struct charmap_t *charmap,
1396 struct repertoire_t *repertoire,
1397 struct charseq **seqp, uint32_t *wchp)
1399 if (now->tok == tok_bsymbol)
1401 /* This will hopefully be the normal case. */
1402 *wchp = repertoire_find_value (repertoire, now->val.str.startmb,
1403 now->val.str.lenmb);
1404 *seqp = charmap_find_value (charmap, now->val.str.startmb,
1405 now->val.str.lenmb);
1407 else if (now->tok == tok_ucs4)
1409 char utmp[10];
1411 snprintf (utmp, sizeof (utmp), "U%08X", now->val.ucs4);
1412 *seqp = charmap_find_value (charmap, utmp, 9);
1414 if (*seqp == NULL)
1415 *seqp = repertoire_find_seq (repertoire, now->val.ucs4);
1417 if (*seqp == NULL)
1419 /* Compute the value in the charmap from the UCS value. */
1420 const char *symbol = repertoire_find_symbol (repertoire,
1421 now->val.ucs4);
1423 if (symbol == NULL)
1424 *seqp = NULL;
1425 else
1426 *seqp = charmap_find_value (charmap, symbol, strlen (symbol));
1428 if (*seqp == NULL)
1430 if (repertoire != NULL)
1432 /* Insert a negative entry. */
1433 static const struct charseq negative
1434 = { .ucs4 = ILLEGAL_CHAR_VALUE };
1435 uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1436 sizeof (uint32_t));
1437 *newp = now->val.ucs4;
1439 insert_entry (&repertoire->seq_table, newp,
1440 sizeof (uint32_t), (void *) &negative);
1443 else
1444 (*seqp)->ucs4 = now->val.ucs4;
1446 else if ((*seqp)->ucs4 != now->val.ucs4)
1447 *seqp = NULL;
1449 *wchp = now->val.ucs4;
1451 else if (now->tok == tok_charcode)
1453 /* We must map from the byte code to UCS4. */
1454 *seqp = charmap_find_symbol (charmap, now->val.str.startmb,
1455 now->val.str.lenmb);
1457 if (*seqp == NULL)
1458 *wchp = ILLEGAL_CHAR_VALUE;
1459 else
1461 if ((*seqp)->ucs4 == UNINITIALIZED_CHAR_VALUE)
1462 (*seqp)->ucs4 = repertoire_find_value (repertoire, (*seqp)->name,
1463 strlen ((*seqp)->name));
1464 *wchp = (*seqp)->ucs4;
1467 else
1468 return 1;
1470 return 0;
1474 /* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and
1475 the .(2). counterparts. */
1476 static void
1477 charclass_symbolic_ellipsis (struct linereader *ldfile,
1478 struct locale_ctype_t *ctype,
1479 const struct charmap_t *charmap,
1480 struct repertoire_t *repertoire,
1481 struct token *now,
1482 const char *last_str,
1483 unsigned long int class256_bit,
1484 unsigned long int class_bit, int base,
1485 int ignore_content, int handle_digits, int step)
1487 const char *nowstr = now->val.str.startmb;
1488 char tmp[now->val.str.lenmb + 1];
1489 const char *cp;
1490 char *endp;
1491 unsigned long int from;
1492 unsigned long int to;
1494 /* We have to compute the ellipsis values using the symbolic names. */
1495 assert (last_str != NULL);
1497 if (strlen (last_str) != now->val.str.lenmb)
1499 invalid_range:
1500 lr_error (ldfile,
1501 _("`%s' and `%.*s' are not valid names for symbolic range"),
1502 last_str, (int) now->val.str.lenmb, nowstr);
1503 return;
1506 if (memcmp (last_str, nowstr, now->val.str.lenmb) == 0)
1507 /* Nothing to do, the names are the same. */
1508 return;
1510 for (cp = last_str; *cp == *(nowstr + (cp - last_str)); ++cp)
1513 errno = 0;
1514 from = strtoul (cp, &endp, base);
1515 if ((from == UINT_MAX && errno == ERANGE) || *endp != '\0')
1516 goto invalid_range;
1518 to = strtoul (nowstr + (cp - last_str), &endp, base);
1519 if ((to == UINT_MAX && errno == ERANGE)
1520 || (endp - nowstr) != now->val.str.lenmb || from >= to)
1521 goto invalid_range;
1523 /* OK, we have a range FROM - TO. Now we can create the symbolic names. */
1524 if (!ignore_content)
1526 now->val.str.startmb = tmp;
1527 while ((from += step) <= to)
1529 struct charseq *seq;
1530 uint32_t wch;
1532 sprintf (tmp, (base == 10 ? "%.*s%0*ld" : "%.*s%0*lX"),
1533 (int) (cp - last_str), last_str,
1534 (int) (now->val.str.lenmb - (cp - last_str)),
1535 from);
1537 get_character (now, charmap, repertoire, &seq, &wch);
1539 if (seq != NULL && seq->nbytes == 1)
1540 /* Yep, we can store information about this byte sequence. */
1541 ctype->class256_collection[seq->bytes[0]] |= class256_bit;
1543 if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1544 /* We have the UCS4 position. */
1545 *find_idx (ctype, &ctype->class_collection,
1546 &ctype->class_collection_max,
1547 &ctype->class_collection_act, wch) |= class_bit;
1549 if (handle_digits == 1)
1551 /* We must store the digit values. */
1552 if (ctype->mbdigits_act == ctype->mbdigits_max)
1554 ctype->mbdigits_max *= 2;
1555 ctype->mbdigits = xrealloc (ctype->mbdigits,
1556 (ctype->mbdigits_max
1557 * sizeof (char *)));
1558 ctype->wcdigits_max *= 2;
1559 ctype->wcdigits = xrealloc (ctype->wcdigits,
1560 (ctype->wcdigits_max
1561 * sizeof (uint32_t)));
1564 ctype->mbdigits[ctype->mbdigits_act++] = seq;
1565 ctype->wcdigits[ctype->wcdigits_act++] = wch;
1567 else if (handle_digits == 2)
1569 /* We must store the digit values. */
1570 if (ctype->outdigits_act >= 10)
1572 lr_error (ldfile, _("\
1573 %s: field `%s' does not contain exactly ten entries"),
1574 "LC_CTYPE", "outdigit");
1575 return;
1578 ctype->mboutdigits[ctype->outdigits_act] = seq;
1579 ctype->wcoutdigits[ctype->outdigits_act] = wch;
1580 ++ctype->outdigits_act;
1587 /* Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'. */
1588 static void
1589 charclass_ucs4_ellipsis (struct linereader *ldfile,
1590 struct locale_ctype_t *ctype,
1591 const struct charmap_t *charmap,
1592 struct repertoire_t *repertoire,
1593 struct token *now, uint32_t last_wch,
1594 unsigned long int class256_bit,
1595 unsigned long int class_bit, int ignore_content,
1596 int handle_digits, int step)
1598 if (last_wch > now->val.ucs4)
1600 lr_error (ldfile, _("\
1601 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1602 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, now->val.ucs4,
1603 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, last_wch);
1604 return;
1607 if (!ignore_content)
1608 while ((last_wch += step) <= now->val.ucs4)
1610 /* We have to find out whether there is a byte sequence corresponding
1611 to this UCS4 value. */
1612 struct charseq *seq;
1613 char utmp[10];
1615 snprintf (utmp, sizeof (utmp), "U%08X", last_wch);
1616 seq = charmap_find_value (charmap, utmp, 9);
1617 if (seq == NULL)
1619 snprintf (utmp, sizeof (utmp), "U%04X", last_wch);
1620 seq = charmap_find_value (charmap, utmp, 5);
1623 if (seq == NULL)
1624 /* Try looking in the repertoire map. */
1625 seq = repertoire_find_seq (repertoire, last_wch);
1627 /* If this is the first time we look for this sequence create a new
1628 entry. */
1629 if (seq == NULL)
1631 static const struct charseq negative
1632 = { .ucs4 = ILLEGAL_CHAR_VALUE };
1634 /* Find the symbolic name for this UCS4 value. */
1635 if (repertoire != NULL)
1637 const char *symbol = repertoire_find_symbol (repertoire,
1638 last_wch);
1639 uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1640 sizeof (uint32_t));
1641 *newp = last_wch;
1643 if (symbol != NULL)
1644 /* We have a name, now search the multibyte value. */
1645 seq = charmap_find_value (charmap, symbol, strlen (symbol));
1647 if (seq == NULL)
1648 /* We have to create a fake entry. */
1649 seq = (struct charseq *) &negative;
1650 else
1651 seq->ucs4 = last_wch;
1653 insert_entry (&repertoire->seq_table, newp, sizeof (uint32_t),
1654 seq);
1656 else
1657 /* We have to create a fake entry. */
1658 seq = (struct charseq *) &negative;
1661 /* We have a name, now search the multibyte value. */
1662 if (seq->ucs4 == last_wch && seq->nbytes == 1)
1663 /* Yep, we can store information about this byte sequence. */
1664 ctype->class256_collection[(size_t) seq->bytes[0]]
1665 |= class256_bit;
1667 /* And of course we have the UCS4 position. */
1668 if (class_bit != 0)
1669 *find_idx (ctype, &ctype->class_collection,
1670 &ctype->class_collection_max,
1671 &ctype->class_collection_act, last_wch) |= class_bit;
1673 if (handle_digits == 1)
1675 /* We must store the digit values. */
1676 if (ctype->mbdigits_act == ctype->mbdigits_max)
1678 ctype->mbdigits_max *= 2;
1679 ctype->mbdigits = xrealloc (ctype->mbdigits,
1680 (ctype->mbdigits_max
1681 * sizeof (char *)));
1682 ctype->wcdigits_max *= 2;
1683 ctype->wcdigits = xrealloc (ctype->wcdigits,
1684 (ctype->wcdigits_max
1685 * sizeof (uint32_t)));
1688 ctype->mbdigits[ctype->mbdigits_act++] = (seq->ucs4 == last_wch
1689 ? seq : NULL);
1690 ctype->wcdigits[ctype->wcdigits_act++] = last_wch;
1692 else if (handle_digits == 2)
1694 /* We must store the digit values. */
1695 if (ctype->outdigits_act >= 10)
1697 lr_error (ldfile, _("\
1698 %s: field `%s' does not contain exactly ten entries"),
1699 "LC_CTYPE", "outdigit");
1700 return;
1703 ctype->mboutdigits[ctype->outdigits_act] = (seq->ucs4 == last_wch
1704 ? seq : NULL);
1705 ctype->wcoutdigits[ctype->outdigits_act] = last_wch;
1706 ++ctype->outdigits_act;
1712 /* Ellipsis as in `/xea/x12.../xea/x34'. */
1713 static void
1714 charclass_charcode_ellipsis (struct linereader *ldfile,
1715 struct locale_ctype_t *ctype,
1716 const struct charmap_t *charmap,
1717 struct repertoire_t *repertoire,
1718 struct token *now, char *last_charcode,
1719 uint32_t last_charcode_len,
1720 unsigned long int class256_bit,
1721 unsigned long int class_bit, int ignore_content,
1722 int handle_digits)
1724 /* First check whether the to-value is larger. */
1725 if (now->val.charcode.nbytes != last_charcode_len)
1727 lr_error (ldfile, _("\
1728 start and end character sequence of range must have the same length"));
1729 return;
1732 if (memcmp (last_charcode, now->val.charcode.bytes, last_charcode_len) > 0)
1734 lr_error (ldfile, _("\
1735 to-value character sequence is smaller than from-value sequence"));
1736 return;
1739 if (!ignore_content)
1743 /* Increment the byte sequence value. */
1744 struct charseq *seq;
1745 uint32_t wch;
1746 int i;
1748 for (i = last_charcode_len - 1; i >= 0; --i)
1749 if (++last_charcode[i] != 0)
1750 break;
1752 if (last_charcode_len == 1)
1753 /* Of course we have the charcode value. */
1754 ctype->class256_collection[(size_t) last_charcode[0]]
1755 |= class256_bit;
1757 /* Find the symbolic name. */
1758 seq = charmap_find_symbol (charmap, last_charcode,
1759 last_charcode_len);
1760 if (seq != NULL)
1762 if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1763 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1764 strlen (seq->name));
1765 wch = seq == NULL ? ILLEGAL_CHAR_VALUE : seq->ucs4;
1767 if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1768 *find_idx (ctype, &ctype->class_collection,
1769 &ctype->class_collection_max,
1770 &ctype->class_collection_act, wch) |= class_bit;
1772 else
1773 wch = ILLEGAL_CHAR_VALUE;
1775 if (handle_digits == 1)
1777 /* We must store the digit values. */
1778 if (ctype->mbdigits_act == ctype->mbdigits_max)
1780 ctype->mbdigits_max *= 2;
1781 ctype->mbdigits = xrealloc (ctype->mbdigits,
1782 (ctype->mbdigits_max
1783 * sizeof (char *)));
1784 ctype->wcdigits_max *= 2;
1785 ctype->wcdigits = xrealloc (ctype->wcdigits,
1786 (ctype->wcdigits_max
1787 * sizeof (uint32_t)));
1790 seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1791 memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1792 seq->nbytes = last_charcode_len;
1794 ctype->mbdigits[ctype->mbdigits_act++] = seq;
1795 ctype->wcdigits[ctype->wcdigits_act++] = wch;
1797 else if (handle_digits == 2)
1799 struct charseq *seq;
1800 /* We must store the digit values. */
1801 if (ctype->outdigits_act >= 10)
1803 lr_error (ldfile, _("\
1804 %s: field `%s' does not contain exactly ten entries"),
1805 "LC_CTYPE", "outdigit");
1806 return;
1809 seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1810 memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1811 seq->nbytes = last_charcode_len;
1813 ctype->mboutdigits[ctype->outdigits_act] = seq;
1814 ctype->wcoutdigits[ctype->outdigits_act] = wch;
1815 ++ctype->outdigits_act;
1818 while (memcmp (last_charcode, now->val.charcode.bytes,
1819 last_charcode_len) != 0);
1824 static uint32_t *
1825 find_translit2 (struct locale_ctype_t *ctype, const struct charmap_t *charmap,
1826 uint32_t wch)
1828 struct translit_t *trunp = ctype->translit;
1829 struct translit_ignore_t *tirunp = ctype->translit_ignore;
1831 while (trunp != NULL)
1833 /* XXX We simplify things here. The transliterations we look
1834 for are only allowed to have one character. */
1835 if (trunp->from[0] == wch && trunp->from[1] == 0)
1837 /* Found it. Now look for a transliteration which can be
1838 represented with the character set. */
1839 struct translit_to_t *torunp = trunp->to;
1841 while (torunp != NULL)
1843 int i;
1845 for (i = 0; torunp->str[i] != 0; ++i)
1847 char utmp[10];
1849 snprintf (utmp, sizeof (utmp), "U%08X", torunp->str[i]);
1850 if (charmap_find_value (charmap, utmp, 9) == NULL)
1851 /* This character cannot be represented. */
1852 break;
1855 if (torunp->str[i] == 0)
1856 return torunp->str;
1858 torunp = torunp->next;
1861 break;
1864 trunp = trunp->next;
1867 /* Check for ignored chars. */
1868 while (tirunp != NULL)
1870 if (tirunp->from <= wch && tirunp->to >= wch)
1872 uint32_t wi;
1874 for (wi = tirunp->from; wi <= wch; wi += tirunp->step)
1875 if (wi == wch)
1876 return (uint32_t []) { 0 };
1880 /* Nothing found. */
1881 return NULL;
1885 uint32_t *
1886 find_translit (struct localedef_t *locale, const struct charmap_t *charmap,
1887 uint32_t wch)
1889 struct locale_ctype_t *ctype;
1890 uint32_t *result = NULL;
1892 assert (locale != NULL);
1893 ctype = locale->categories[LC_CTYPE].ctype;
1895 if (ctype == NULL)
1896 return NULL;
1898 if (ctype->translit != NULL)
1899 result = find_translit2 (ctype, charmap, wch);
1901 if (result == NULL)
1903 struct translit_include_t *irunp = ctype->translit_include;
1905 while (irunp != NULL && result == NULL)
1907 result = find_translit (find_locale (CTYPE_LOCALE,
1908 irunp->copy_locale,
1909 irunp->copy_repertoire,
1910 charmap),
1911 charmap, wch);
1912 irunp = irunp->next;
1916 return result;
1920 /* Read one transliteration entry. */
1921 static uint32_t *
1922 read_widestring (struct linereader *ldfile, struct token *now,
1923 const struct charmap_t *charmap,
1924 struct repertoire_t *repertoire)
1926 uint32_t *wstr;
1928 if (now->tok == tok_default_missing)
1929 /* The special name "" will denote this case. */
1930 wstr = ((uint32_t *) { 0 });
1931 else if (now->tok == tok_bsymbol)
1933 /* Get the value from the repertoire. */
1934 wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1935 wstr[0] = repertoire_find_value (repertoire, now->val.str.startmb,
1936 now->val.str.lenmb);
1937 if (wstr[0] == ILLEGAL_CHAR_VALUE)
1939 /* We cannot proceed, we don't know the UCS4 value. */
1940 free (wstr);
1941 return NULL;
1944 wstr[1] = 0;
1946 else if (now->tok == tok_ucs4)
1948 wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1949 wstr[0] = now->val.ucs4;
1950 wstr[1] = 0;
1952 else if (now->tok == tok_charcode)
1954 /* Argh, we have to convert to the symbol name first and then to the
1955 UCS4 value. */
1956 struct charseq *seq = charmap_find_symbol (charmap,
1957 now->val.str.startmb,
1958 now->val.str.lenmb);
1959 if (seq == NULL)
1960 /* Cannot find the UCS4 value. */
1961 return NULL;
1963 if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1964 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1965 strlen (seq->name));
1966 if (seq->ucs4 == ILLEGAL_CHAR_VALUE)
1967 /* We cannot proceed, we don't know the UCS4 value. */
1968 return NULL;
1970 wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1971 wstr[0] = seq->ucs4;
1972 wstr[1] = 0;
1974 else if (now->tok == tok_string)
1976 wstr = now->val.str.startwc;
1977 if (wstr == NULL || wstr[0] == 0)
1978 return NULL;
1980 else
1982 if (now->tok != tok_eol && now->tok != tok_eof)
1983 lr_ignore_rest (ldfile, 0);
1984 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
1985 return (uint32_t *) -1l;
1988 return wstr;
1992 static void
1993 read_translit_entry (struct linereader *ldfile, struct locale_ctype_t *ctype,
1994 struct token *now, const struct charmap_t *charmap,
1995 struct repertoire_t *repertoire)
1997 uint32_t *from_wstr = read_widestring (ldfile, now, charmap, repertoire);
1998 struct translit_t *result;
1999 struct translit_to_t **top;
2000 struct obstack *ob = &ctype->mempool;
2001 int first;
2002 int ignore;
2004 if (from_wstr == NULL)
2005 /* There is no valid from string. */
2006 return;
2008 result = (struct translit_t *) obstack_alloc (ob,
2009 sizeof (struct translit_t));
2010 result->from = from_wstr;
2011 result->fname = ldfile->fname;
2012 result->lineno = ldfile->lineno;
2013 result->next = NULL;
2014 result->to = NULL;
2015 top = &result->to;
2016 first = 1;
2017 ignore = 0;
2019 while (1)
2021 uint32_t *to_wstr;
2023 /* Next we have one or more transliterations. They are
2024 separated by semicolons. */
2025 now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2027 if (!first && (now->tok == tok_semicolon || now->tok == tok_eol))
2029 /* One string read. */
2030 const uint32_t zero = 0;
2032 if (!ignore)
2034 obstack_grow (ob, &zero, 4);
2035 to_wstr = obstack_finish (ob);
2037 *top = obstack_alloc (ob, sizeof (struct translit_to_t));
2038 (*top)->str = to_wstr;
2039 (*top)->next = NULL;
2042 if (now->tok == tok_eol)
2044 result->next = ctype->translit;
2045 ctype->translit = result;
2046 return;
2049 if (!ignore)
2050 top = &(*top)->next;
2051 ignore = 0;
2053 else
2055 to_wstr = read_widestring (ldfile, now, charmap, repertoire);
2056 if (to_wstr == (uint32_t *) -1l)
2058 /* An error occurred. */
2059 obstack_free (ob, result);
2060 return;
2063 if (to_wstr == NULL)
2064 ignore = 1;
2065 else
2066 /* This value is usable. */
2067 obstack_grow (ob, to_wstr, wcslen ((wchar_t *) to_wstr) * 4);
2069 first = 0;
2075 static void
2076 read_translit_ignore_entry (struct linereader *ldfile,
2077 struct locale_ctype_t *ctype,
2078 const struct charmap_t *charmap,
2079 struct repertoire_t *repertoire)
2081 /* We expect a semicolon-separated list of characters we ignore. We are
2082 only interested in the wide character definitions. These must be
2083 single characters, possibly defining a range when an ellipsis is used. */
2084 while (1)
2086 struct token *now = lr_token (ldfile, charmap, NULL, repertoire,
2087 verbose);
2088 struct translit_ignore_t *newp;
2089 uint32_t from;
2091 if (now->tok == tok_eol || now->tok == tok_eof)
2093 lr_error (ldfile,
2094 _("premature end of `translit_ignore' definition"));
2095 return;
2098 if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
2100 lr_error (ldfile, _("syntax error"));
2101 lr_ignore_rest (ldfile, 0);
2102 return;
2105 if (now->tok == tok_ucs4)
2106 from = now->val.ucs4;
2107 else
2108 /* Try to get the value. */
2109 from = repertoire_find_value (repertoire, now->val.str.startmb,
2110 now->val.str.lenmb);
2112 if (from == ILLEGAL_CHAR_VALUE)
2114 lr_error (ldfile, "invalid character name");
2115 newp = NULL;
2117 else
2119 newp = (struct translit_ignore_t *)
2120 obstack_alloc (&ctype->mempool, sizeof (struct translit_ignore_t));
2121 newp->from = from;
2122 newp->to = from;
2123 newp->step = 1;
2125 newp->next = ctype->translit_ignore;
2126 ctype->translit_ignore = newp;
2129 /* Now we expect either a semicolon, an ellipsis, or the end of the
2130 line. */
2131 now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2133 if (now->tok == tok_ellipsis2 || now->tok == tok_ellipsis2_2)
2135 /* XXX Should we bother implementing `....'? `...' certainly
2136 will not be implemented. */
2137 uint32_t to;
2138 int step = now->tok == tok_ellipsis2_2 ? 2 : 1;
2140 now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2142 if (now->tok == tok_eol || now->tok == tok_eof)
2144 lr_error (ldfile,
2145 _("premature end of `translit_ignore' definition"));
2146 return;
2149 if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
2151 lr_error (ldfile, _("syntax error"));
2152 lr_ignore_rest (ldfile, 0);
2153 return;
2156 if (now->tok == tok_ucs4)
2157 to = now->val.ucs4;
2158 else
2159 /* Try to get the value. */
2160 to = repertoire_find_value (repertoire, now->val.str.startmb,
2161 now->val.str.lenmb);
2163 if (to == ILLEGAL_CHAR_VALUE)
2164 lr_error (ldfile, "invalid character name");
2165 else
2167 /* Make sure the `to'-value is larger. */
2168 if (to >= from)
2170 newp->to = to;
2171 newp->step = step;
2173 else
2174 lr_error (ldfile, _("\
2175 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
2176 (to | from) < 65536 ? 4 : 8, to,
2177 (to | from) < 65536 ? 4 : 8, from);
2180 /* And the next token. */
2181 now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2184 if (now->tok == tok_eol || now->tok == tok_eof)
2185 /* We are done. */
2186 return;
2188 if (now->tok == tok_semicolon)
2189 /* Next round. */
2190 continue;
2192 /* If we come here something is wrong. */
2193 lr_error (ldfile, _("syntax error"));
2194 lr_ignore_rest (ldfile, 0);
2195 return;
2200 /* The parser for the LC_CTYPE section of the locale definition. */
2201 void
2202 ctype_read (struct linereader *ldfile, struct localedef_t *result,
2203 const struct charmap_t *charmap, const char *repertoire_name,
2204 int ignore_content)
2206 struct repertoire_t *repertoire = NULL;
2207 struct locale_ctype_t *ctype;
2208 struct token *now;
2209 enum token_t nowtok;
2210 size_t cnt;
2211 uint32_t last_wch = 0;
2212 enum token_t last_token;
2213 enum token_t ellipsis_token;
2214 int step;
2215 char last_charcode[16];
2216 size_t last_charcode_len = 0;
2217 const char *last_str = NULL;
2218 int mapidx;
2219 struct localedef_t *copy_locale = NULL;
2221 /* Get the repertoire we have to use. */
2222 if (repertoire_name != NULL)
2223 repertoire = repertoire_read (repertoire_name);
2225 /* The rest of the line containing `LC_CTYPE' must be free. */
2226 lr_ignore_rest (ldfile, 1);
2231 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2232 nowtok = now->tok;
2234 while (nowtok == tok_eol);
2236 /* If we see `copy' now we are almost done. */
2237 if (nowtok == tok_copy)
2239 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2240 if (now->tok != tok_string)
2242 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2244 skip_category:
2246 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2247 while (now->tok != tok_eof && now->tok != tok_end);
2249 if (now->tok != tok_eof
2250 || (now = lr_token (ldfile, charmap, NULL, NULL, verbose),
2251 now->tok == tok_eof))
2252 lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
2253 else if (now->tok != tok_lc_ctype)
2255 lr_error (ldfile, _("\
2256 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2257 lr_ignore_rest (ldfile, 0);
2259 else
2260 lr_ignore_rest (ldfile, 1);
2262 return;
2265 if (! ignore_content)
2267 /* Get the locale definition. */
2268 copy_locale = load_locale (LC_CTYPE, now->val.str.startmb,
2269 repertoire_name, charmap, NULL);
2270 if ((copy_locale->avail & CTYPE_LOCALE) == 0)
2272 /* Not yet loaded. So do it now. */
2273 if (locfile_read (copy_locale, charmap) != 0)
2274 goto skip_category;
2277 if (copy_locale->categories[LC_CTYPE].ctype == NULL)
2278 return;
2281 lr_ignore_rest (ldfile, 1);
2283 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2284 nowtok = now->tok;
2287 /* Prepare the data structures. */
2288 ctype_startup (ldfile, result, charmap, copy_locale, ignore_content);
2289 ctype = result->categories[LC_CTYPE].ctype;
2291 /* Remember the repertoire we use. */
2292 if (!ignore_content)
2293 ctype->repertoire = repertoire;
2295 while (1)
2297 unsigned long int class_bit = 0;
2298 unsigned long int class256_bit = 0;
2299 int handle_digits = 0;
2301 /* Of course we don't proceed beyond the end of file. */
2302 if (nowtok == tok_eof)
2303 break;
2305 /* Ingore empty lines. */
2306 if (nowtok == tok_eol)
2308 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2309 nowtok = now->tok;
2310 continue;
2313 switch (nowtok)
2315 case tok_charclass:
2316 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2317 while (now->tok == tok_ident || now->tok == tok_string)
2319 ctype_class_new (ldfile, ctype, now->val.str.startmb);
2320 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2321 if (now->tok != tok_semicolon)
2322 break;
2323 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2325 if (now->tok != tok_eol)
2326 SYNTAX_ERROR (_("\
2327 %s: syntax error in definition of new character class"), "LC_CTYPE");
2328 break;
2330 case tok_charconv:
2331 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2332 while (now->tok == tok_ident || now->tok == tok_string)
2334 ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2335 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2336 if (now->tok != tok_semicolon)
2337 break;
2338 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2340 if (now->tok != tok_eol)
2341 SYNTAX_ERROR (_("\
2342 %s: syntax error in definition of new character map"), "LC_CTYPE");
2343 break;
2345 case tok_class:
2346 /* Ignore the rest of the line if we don't need the input of
2347 this line. */
2348 if (ignore_content)
2350 lr_ignore_rest (ldfile, 0);
2351 break;
2354 /* We simply forget the `class' keyword and use the following
2355 operand to determine the bit. */
2356 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2357 if (now->tok == tok_ident || now->tok == tok_string)
2359 /* Must can be one of the predefined class names. */
2360 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2361 if (strcmp (ctype->classnames[cnt], now->val.str.startmb) == 0)
2362 break;
2363 if (cnt >= ctype->nr_charclass)
2365 #ifdef PREDEFINED_CLASSES
2366 if (now->val.str.lenmb == 8
2367 && memcmp ("special1", now->val.str.startmb, 8) == 0)
2368 class_bit = _ISwspecial1;
2369 else if (now->val.str.lenmb == 8
2370 && memcmp ("special2", now->val.str.startmb, 8) == 0)
2371 class_bit = _ISwspecial2;
2372 else if (now->val.str.lenmb == 8
2373 && memcmp ("special3", now->val.str.startmb, 8) == 0)
2374 class_bit = _ISwspecial3;
2375 else
2376 #endif
2378 /* OK, it's a new class. */
2379 ctype_class_new (ldfile, ctype, now->val.str.startmb);
2381 class_bit = _ISwbit (ctype->nr_charclass - 1);
2384 else
2386 class_bit = _ISwbit (cnt);
2388 free (now->val.str.startmb);
2391 else if (now->tok == tok_digit)
2392 goto handle_tok_digit;
2393 else if (now->tok < tok_upper || now->tok > tok_blank)
2394 goto err_label;
2395 else
2397 class_bit = BITw (now->tok);
2398 class256_bit = BIT (now->tok);
2401 /* The next character must be a semicolon. */
2402 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2403 if (now->tok != tok_semicolon)
2404 goto err_label;
2405 goto read_charclass;
2407 case tok_upper:
2408 case tok_lower:
2409 case tok_alpha:
2410 case tok_alnum:
2411 case tok_space:
2412 case tok_cntrl:
2413 case tok_punct:
2414 case tok_graph:
2415 case tok_print:
2416 case tok_xdigit:
2417 case tok_blank:
2418 /* Ignore the rest of the line if we don't need the input of
2419 this line. */
2420 if (ignore_content)
2422 lr_ignore_rest (ldfile, 0);
2423 break;
2426 class_bit = BITw (now->tok);
2427 class256_bit = BIT (now->tok);
2428 handle_digits = 0;
2429 read_charclass:
2430 ctype->class_done |= class_bit;
2431 last_token = tok_none;
2432 ellipsis_token = tok_none;
2433 step = 1;
2434 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2435 while (now->tok != tok_eol && now->tok != tok_eof)
2437 uint32_t wch;
2438 struct charseq *seq;
2440 if (ellipsis_token == tok_none)
2442 if (get_character (now, charmap, repertoire, &seq, &wch))
2443 goto err_label;
2445 if (!ignore_content && seq != NULL && seq->nbytes == 1)
2446 /* Yep, we can store information about this byte
2447 sequence. */
2448 ctype->class256_collection[seq->bytes[0]] |= class256_bit;
2450 if (!ignore_content && wch != ILLEGAL_CHAR_VALUE
2451 && class_bit != 0)
2452 /* We have the UCS4 position. */
2453 *find_idx (ctype, &ctype->class_collection,
2454 &ctype->class_collection_max,
2455 &ctype->class_collection_act, wch) |= class_bit;
2457 last_token = now->tok;
2458 /* Terminate the string. */
2459 if (last_token == tok_bsymbol)
2461 now->val.str.startmb[now->val.str.lenmb] = '\0';
2462 last_str = now->val.str.startmb;
2464 else
2465 last_str = NULL;
2466 last_wch = wch;
2467 memcpy (last_charcode, now->val.charcode.bytes, 16);
2468 last_charcode_len = now->val.charcode.nbytes;
2470 if (!ignore_content && handle_digits == 1)
2472 /* We must store the digit values. */
2473 if (ctype->mbdigits_act == ctype->mbdigits_max)
2475 ctype->mbdigits_max += 10;
2476 ctype->mbdigits = xrealloc (ctype->mbdigits,
2477 (ctype->mbdigits_max
2478 * sizeof (char *)));
2479 ctype->wcdigits_max += 10;
2480 ctype->wcdigits = xrealloc (ctype->wcdigits,
2481 (ctype->wcdigits_max
2482 * sizeof (uint32_t)));
2485 ctype->mbdigits[ctype->mbdigits_act++] = seq;
2486 ctype->wcdigits[ctype->wcdigits_act++] = wch;
2488 else if (!ignore_content && handle_digits == 2)
2490 /* We must store the digit values. */
2491 if (ctype->outdigits_act >= 10)
2493 lr_error (ldfile, _("\
2494 %s: field `%s' does not contain exactly ten entries"),
2495 "LC_CTYPE", "outdigit");
2496 lr_ignore_rest (ldfile, 0);
2497 break;
2500 ctype->mboutdigits[ctype->outdigits_act] = seq;
2501 ctype->wcoutdigits[ctype->outdigits_act] = wch;
2502 ++ctype->outdigits_act;
2505 else
2507 /* Now it gets complicated. We have to resolve the
2508 ellipsis problem. First we must distinguish between
2509 the different kind of ellipsis and this must match the
2510 tokens we have seen. */
2511 assert (last_token != tok_none);
2513 if (last_token != now->tok)
2515 lr_error (ldfile, _("\
2516 ellipsis range must be marked by two operands of same type"));
2517 lr_ignore_rest (ldfile, 0);
2518 break;
2521 if (last_token == tok_bsymbol)
2523 if (ellipsis_token == tok_ellipsis3)
2524 lr_error (ldfile, _("with symbolic name range values \
2525 the absolute ellipsis `...' must not be used"));
2527 charclass_symbolic_ellipsis (ldfile, ctype, charmap,
2528 repertoire, now, last_str,
2529 class256_bit, class_bit,
2530 (ellipsis_token
2531 == tok_ellipsis4
2532 ? 10 : 16),
2533 ignore_content,
2534 handle_digits, step);
2536 else if (last_token == tok_ucs4)
2538 if (ellipsis_token != tok_ellipsis2)
2539 lr_error (ldfile, _("\
2540 with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
2542 charclass_ucs4_ellipsis (ldfile, ctype, charmap,
2543 repertoire, now, last_wch,
2544 class256_bit, class_bit,
2545 ignore_content, handle_digits,
2546 step);
2548 else
2550 assert (last_token == tok_charcode);
2552 if (ellipsis_token != tok_ellipsis3)
2553 lr_error (ldfile, _("\
2554 with character code range values one must use the absolute ellipsis `...'"));
2556 charclass_charcode_ellipsis (ldfile, ctype, charmap,
2557 repertoire, now,
2558 last_charcode,
2559 last_charcode_len,
2560 class256_bit, class_bit,
2561 ignore_content,
2562 handle_digits);
2565 /* Now we have used the last value. */
2566 last_token = tok_none;
2569 /* Next we expect a semicolon or the end of the line. */
2570 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2571 if (now->tok == tok_eol || now->tok == tok_eof)
2572 break;
2574 if (last_token != tok_none
2575 && now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4_2)
2577 if (now->tok == tok_ellipsis2_2)
2579 now->tok = tok_ellipsis2;
2580 step = 2;
2582 else if (now->tok == tok_ellipsis4_2)
2584 now->tok = tok_ellipsis4;
2585 step = 2;
2588 ellipsis_token = now->tok;
2590 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2591 continue;
2594 if (now->tok != tok_semicolon)
2595 goto err_label;
2597 /* And get the next character. */
2598 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2600 ellipsis_token = tok_none;
2601 step = 1;
2603 break;
2605 case tok_digit:
2606 /* Ignore the rest of the line if we don't need the input of
2607 this line. */
2608 if (ignore_content)
2610 lr_ignore_rest (ldfile, 0);
2611 break;
2614 handle_tok_digit:
2615 class_bit = _ISwdigit;
2616 class256_bit = _ISdigit;
2617 handle_digits = 1;
2618 goto read_charclass;
2620 case tok_outdigit:
2621 /* Ignore the rest of the line if we don't need the input of
2622 this line. */
2623 if (ignore_content)
2625 lr_ignore_rest (ldfile, 0);
2626 break;
2629 if (ctype->outdigits_act != 0)
2630 lr_error (ldfile, _("\
2631 %s: field `%s' declared more than once"),
2632 "LC_CTYPE", "outdigit");
2633 class_bit = 0;
2634 class256_bit = 0;
2635 handle_digits = 2;
2636 goto read_charclass;
2638 case tok_toupper:
2639 /* Ignore the rest of the line if we don't need the input of
2640 this line. */
2641 if (ignore_content)
2643 lr_ignore_rest (ldfile, 0);
2644 break;
2647 mapidx = 0;
2648 goto read_mapping;
2650 case tok_tolower:
2651 /* Ignore the rest of the line if we don't need the input of
2652 this line. */
2653 if (ignore_content)
2655 lr_ignore_rest (ldfile, 0);
2656 break;
2659 mapidx = 1;
2660 goto read_mapping;
2662 case tok_map:
2663 /* Ignore the rest of the line if we don't need the input of
2664 this line. */
2665 if (ignore_content)
2667 lr_ignore_rest (ldfile, 0);
2668 break;
2671 /* We simply forget the `map' keyword and use the following
2672 operand to determine the mapping. */
2673 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2674 if (now->tok == tok_ident || now->tok == tok_string)
2676 size_t cnt;
2678 for (cnt = 2; cnt < ctype->map_collection_nr; ++cnt)
2679 if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2680 break;
2682 if (cnt < ctype->map_collection_nr)
2683 free (now->val.str.startmb);
2684 else
2685 /* OK, it's a new map. */
2686 ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2688 mapidx = cnt;
2690 else if (now->tok < tok_toupper || now->tok > tok_tolower)
2691 goto err_label;
2692 else
2693 mapidx = now->tok - tok_toupper;
2695 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2696 /* This better should be a semicolon. */
2697 if (now->tok != tok_semicolon)
2698 goto err_label;
2700 read_mapping:
2701 /* Test whether this mapping was already defined. */
2702 if (ctype->tomap_done[mapidx])
2704 lr_error (ldfile, _("duplicated definition for mapping `%s'"),
2705 ctype->mapnames[mapidx]);
2706 lr_ignore_rest (ldfile, 0);
2707 break;
2709 ctype->tomap_done[mapidx] = 1;
2711 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2712 while (now->tok != tok_eol && now->tok != tok_eof)
2714 struct charseq *from_seq;
2715 uint32_t from_wch;
2716 struct charseq *to_seq;
2717 uint32_t to_wch;
2719 /* Every pair starts with an opening brace. */
2720 if (now->tok != tok_open_brace)
2721 goto err_label;
2723 /* Next comes the from-value. */
2724 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2725 if (get_character (now, charmap, repertoire, &from_seq,
2726 &from_wch) != 0)
2727 goto err_label;
2729 /* The next is a comma. */
2730 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2731 if (now->tok != tok_comma)
2732 goto err_label;
2734 /* And the other value. */
2735 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2736 if (get_character (now, charmap, repertoire, &to_seq,
2737 &to_wch) != 0)
2738 goto err_label;
2740 /* And the last thing is the closing brace. */
2741 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2742 if (now->tok != tok_close_brace)
2743 goto err_label;
2745 if (!ignore_content)
2747 /* Check whether the mapping converts from an ASCII value
2748 to a non-ASCII value. */
2749 if (from_seq != NULL && from_seq->nbytes == 1
2750 && isascii (from_seq->bytes[0])
2751 && to_seq != NULL && (to_seq->nbytes != 1
2752 || !isascii (to_seq->bytes[0])))
2753 ctype->to_nonascii = 1;
2755 if (mapidx < 2 && from_seq != NULL && to_seq != NULL
2756 && from_seq->nbytes == 1 && to_seq->nbytes == 1)
2757 /* We can use this value. */
2758 ctype->map256_collection[mapidx][from_seq->bytes[0]]
2759 = to_seq->bytes[0];
2761 if (from_wch != ILLEGAL_CHAR_VALUE
2762 && to_wch != ILLEGAL_CHAR_VALUE)
2763 /* Both correct values. */
2764 *find_idx (ctype, &ctype->map_collection[mapidx],
2765 &ctype->map_collection_max[mapidx],
2766 &ctype->map_collection_act[mapidx],
2767 from_wch) = to_wch;
2770 /* Now comes a semicolon or the end of the line/file. */
2771 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2772 if (now->tok == tok_semicolon)
2773 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2775 break;
2777 case tok_translit_start:
2778 /* Ignore the entire translit section with its peculiar syntax
2779 if we don't need the input. */
2780 if (ignore_content)
2784 lr_ignore_rest (ldfile, 0);
2785 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2787 while (now->tok != tok_translit_end && now->tok != tok_eof);
2789 if (now->tok == tok_eof)
2790 lr_error (ldfile, _(\
2791 "%s: `translit_start' section does not end with `translit_end'"),
2792 "LC_CTYPE");
2794 break;
2797 /* The rest of the line better should be empty. */
2798 lr_ignore_rest (ldfile, 1);
2800 /* We count here the number of allocated entries in the `translit'
2801 array. */
2802 cnt = 0;
2804 ldfile->translate_strings = 1;
2805 ldfile->return_widestr = 1;
2807 /* We proceed until we see the `translit_end' token. */
2808 while (now = lr_token (ldfile, charmap, NULL, repertoire, verbose),
2809 now->tok != tok_translit_end && now->tok != tok_eof)
2811 if (now->tok == tok_eol)
2812 /* Ignore empty lines. */
2813 continue;
2815 if (now->tok == tok_include)
2817 /* We have to include locale. */
2818 const char *locale_name;
2819 const char *repertoire_name;
2820 struct translit_include_t *include_stmt, **include_ptr;
2822 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2823 /* This should be a string or an identifier. In any
2824 case something to name a locale. */
2825 if (now->tok != tok_string && now->tok != tok_ident)
2827 translit_syntax:
2828 lr_error (ldfile, _("%s: syntax error"), "LC_CTYPE");
2829 lr_ignore_rest (ldfile, 0);
2830 continue;
2832 locale_name = now->val.str.startmb;
2834 /* Next should be a semicolon. */
2835 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2836 if (now->tok != tok_semicolon)
2837 goto translit_syntax;
2839 /* Now the repertoire name. */
2840 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2841 if ((now->tok != tok_string && now->tok != tok_ident)
2842 || now->val.str.startmb == NULL)
2843 goto translit_syntax;
2844 repertoire_name = now->val.str.startmb;
2845 if (repertoire_name[0] == '\0')
2846 /* Ignore the empty string. */
2847 repertoire_name = NULL;
2849 /* Save the include statement for later processing. */
2850 include_stmt = (struct translit_include_t *)
2851 xmalloc (sizeof (struct translit_include_t));
2852 include_stmt->copy_locale = locale_name;
2853 include_stmt->copy_repertoire = repertoire_name;
2854 include_stmt->next = NULL;
2856 include_ptr = &ctype->translit_include;
2857 while (*include_ptr != NULL)
2858 include_ptr = &(*include_ptr)->next;
2859 *include_ptr = include_stmt;
2861 /* The rest of the line must be empty. */
2862 lr_ignore_rest (ldfile, 1);
2864 /* Make sure the locale is read. */
2865 add_to_readlist (LC_CTYPE, locale_name, repertoire_name,
2866 1, NULL);
2867 continue;
2869 else if (now->tok == tok_default_missing)
2871 uint32_t *wstr;
2873 while (1)
2875 /* We expect a single character or string as the
2876 argument. */
2877 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2878 wstr = read_widestring (ldfile, now, charmap,
2879 repertoire);
2881 if (wstr != NULL)
2883 if (ctype->default_missing != NULL)
2885 lr_error (ldfile, _("\
2886 %s: duplicate `default_missing' definition"), "LC_CTYPE");
2887 WITH_CUR_LOCALE (error_at_line (0, 0,
2888 ctype->default_missing_file,
2889 ctype->default_missing_lineno,
2890 _("\
2891 previous definition was here")));
2893 else
2895 ctype->default_missing = wstr;
2896 ctype->default_missing_file = ldfile->fname;
2897 ctype->default_missing_lineno = ldfile->lineno;
2899 /* We can have more entries, ignore them. */
2900 lr_ignore_rest (ldfile, 0);
2901 break;
2903 else if (wstr == (uint32_t *) -1l)
2904 /* This was an syntax error. */
2905 break;
2907 /* Maybe there is another replacement we can use. */
2908 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2909 if (now->tok == tok_eol || now->tok == tok_eof)
2911 /* Nothing found. We tell the user. */
2912 lr_error (ldfile, _("\
2913 %s: no representable `default_missing' definition found"), "LC_CTYPE");
2914 break;
2916 if (now->tok != tok_semicolon)
2917 goto translit_syntax;
2920 continue;
2922 else if (now->tok == tok_translit_ignore)
2924 read_translit_ignore_entry (ldfile, ctype, charmap,
2925 repertoire);
2926 continue;
2929 read_translit_entry (ldfile, ctype, now, charmap, repertoire);
2931 ldfile->return_widestr = 0;
2933 if (now->tok == tok_eof)
2934 lr_error (ldfile, _(\
2935 "%s: `translit_start' section does not end with `translit_end'"),
2936 "LC_CTYPE");
2938 break;
2940 case tok_ident:
2941 /* Ignore the rest of the line if we don't need the input of
2942 this line. */
2943 if (ignore_content)
2945 lr_ignore_rest (ldfile, 0);
2946 break;
2949 /* This could mean one of several things. First test whether
2950 it's a character class name. */
2951 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2952 if (strcmp (now->val.str.startmb, ctype->classnames[cnt]) == 0)
2953 break;
2954 if (cnt < ctype->nr_charclass)
2956 class_bit = _ISwbit (cnt);
2957 class256_bit = cnt <= 11 ? _ISbit (cnt) : 0;
2958 free (now->val.str.startmb);
2959 goto read_charclass;
2961 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
2962 if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2963 break;
2964 if (cnt < ctype->map_collection_nr)
2966 mapidx = cnt;
2967 free (now->val.str.startmb);
2968 goto read_mapping;
2970 #ifdef PREDEFINED_CLASSES
2971 if (strcmp (now->val.str.startmb, "special1") == 0)
2973 class_bit = _ISwspecial1;
2974 free (now->val.str.startmb);
2975 goto read_charclass;
2977 if (strcmp (now->val.str.startmb, "special2") == 0)
2979 class_bit = _ISwspecial2;
2980 free (now->val.str.startmb);
2981 goto read_charclass;
2983 if (strcmp (now->val.str.startmb, "special3") == 0)
2985 class_bit = _ISwspecial3;
2986 free (now->val.str.startmb);
2987 goto read_charclass;
2989 if (strcmp (now->val.str.startmb, "tosymmetric") == 0)
2991 mapidx = 2;
2992 goto read_mapping;
2994 #endif
2995 break;
2997 case tok_end:
2998 /* Next we assume `LC_CTYPE'. */
2999 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
3000 if (now->tok == tok_eof)
3001 break;
3002 if (now->tok == tok_eol)
3003 lr_error (ldfile, _("%s: incomplete `END' line"),
3004 "LC_CTYPE");
3005 else if (now->tok != tok_lc_ctype)
3006 lr_error (ldfile, _("\
3007 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
3008 lr_ignore_rest (ldfile, now->tok == tok_lc_ctype);
3009 return;
3011 default:
3012 err_label:
3013 if (now->tok != tok_eof)
3014 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
3017 /* Prepare for the next round. */
3018 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
3019 nowtok = now->tok;
3022 /* When we come here we reached the end of the file. */
3023 lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
3027 static void
3028 set_class_defaults (struct locale_ctype_t *ctype,
3029 const struct charmap_t *charmap,
3030 struct repertoire_t *repertoire)
3032 size_t cnt;
3034 /* These function defines the default values for the classes and conversions
3035 according to POSIX.2 2.5.2.1.
3036 It may seem that the order of these if-blocks is arbitrary but it is NOT.
3037 Don't move them unless you know what you do! */
3039 auto void set_default (int bitpos, int from, int to);
3041 void set_default (int bitpos, int from, int to)
3043 char tmp[2];
3044 int ch;
3045 int bit = _ISbit (bitpos);
3046 int bitw = _ISwbit (bitpos);
3047 /* Define string. */
3048 strcpy (tmp, "?");
3050 for (ch = from; ch <= to; ++ch)
3052 struct charseq *seq;
3053 tmp[0] = ch;
3055 seq = charmap_find_value (charmap, tmp, 1);
3056 if (seq == NULL)
3058 char buf[10];
3059 sprintf (buf, "U%08X", ch);
3060 seq = charmap_find_value (charmap, buf, 9);
3062 if (seq == NULL)
3064 if (!be_quiet)
3065 WITH_CUR_LOCALE (error (0, 0, _("\
3066 %s: character `%s' not defined while needed as default value"),
3067 "LC_CTYPE", tmp));
3069 else if (seq->nbytes != 1)
3070 WITH_CUR_LOCALE (error (0, 0, _("\
3071 %s: character `%s' in charmap not representable with one byte"),
3072 "LC_CTYPE", tmp));
3073 else
3074 ctype->class256_collection[seq->bytes[0]] |= bit;
3076 /* No need to search here, the ASCII value is also the Unicode
3077 value. */
3078 ELEM (ctype, class_collection, , ch) |= bitw;
3082 /* Set default values if keyword was not present. */
3083 if ((ctype->class_done & BITw (tok_upper)) == 0)
3084 /* "If this keyword [lower] is not specified, the lowercase letters
3085 `A' through `Z', ..., shall automatically belong to this class,
3086 with implementation defined character values." [P1003.2, 2.5.2.1] */
3087 set_default (BITPOS (tok_upper), 'A', 'Z');
3089 if ((ctype->class_done & BITw (tok_lower)) == 0)
3090 /* "If this keyword [lower] is not specified, the lowercase letters
3091 `a' through `z', ..., shall automatically belong to this class,
3092 with implementation defined character values." [P1003.2, 2.5.2.1] */
3093 set_default (BITPOS (tok_lower), 'a', 'z');
3095 if ((ctype->class_done & BITw (tok_alpha)) == 0)
3097 /* Table 2-6 in P1003.2 says that characters in class `upper' or
3098 class `lower' *must* be in class `alpha'. */
3099 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
3100 unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower);
3102 for (cnt = 0; cnt < 256; ++cnt)
3103 if ((ctype->class256_collection[cnt] & mask) != 0)
3104 ctype->class256_collection[cnt] |= BIT (tok_alpha);
3106 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3107 if ((ctype->class_collection[cnt] & maskw) != 0)
3108 ctype->class_collection[cnt] |= BITw (tok_alpha);
3111 if ((ctype->class_done & BITw (tok_digit)) == 0)
3112 /* "If this keyword [digit] is not specified, the digits `0' through
3113 `9', ..., shall automatically belong to this class, with
3114 implementation-defined character values." [P1003.2, 2.5.2.1] */
3115 set_default (BITPOS (tok_digit), '0', '9');
3117 /* "Only characters specified for the `alpha' and `digit' keyword
3118 shall be specified. Characters specified for the keyword `alpha'
3119 and `digit' are automatically included in this class. */
3121 unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
3122 unsigned long int maskw = BITw (tok_alpha) | BITw (tok_digit);
3124 for (cnt = 0; cnt < 256; ++cnt)
3125 if ((ctype->class256_collection[cnt] & mask) != 0)
3126 ctype->class256_collection[cnt] |= BIT (tok_alnum);
3128 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3129 if ((ctype->class_collection[cnt] & maskw) != 0)
3130 ctype->class_collection[cnt] |= BITw (tok_alnum);
3133 if ((ctype->class_done & BITw (tok_space)) == 0)
3134 /* "If this keyword [space] is not specified, the characters <space>,
3135 <form-feed>, <newline>, <carriage-return>, <tab>, and
3136 <vertical-tab>, ..., shall automatically belong to this class,
3137 with implementation-defined character values." [P1003.2, 2.5.2.1] */
3139 struct charseq *seq;
3141 seq = charmap_find_value (charmap, "space", 5);
3142 if (seq == NULL)
3143 seq = charmap_find_value (charmap, "SP", 2);
3144 if (seq == NULL)
3145 seq = charmap_find_value (charmap, "U00000020", 9);
3146 if (seq == NULL)
3148 if (!be_quiet)
3149 WITH_CUR_LOCALE (error (0, 0, _("\
3150 %s: character `%s' not defined while needed as default value"),
3151 "LC_CTYPE", "<space>"));
3153 else if (seq->nbytes != 1)
3154 WITH_CUR_LOCALE (error (0, 0, _("\
3155 %s: character `%s' in charmap not representable with one byte"),
3156 "LC_CTYPE", "<space>"));
3157 else
3158 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3160 /* No need to search. */
3161 ELEM (ctype, class_collection, , L' ') |= BITw (tok_space);
3163 seq = charmap_find_value (charmap, "form-feed", 9);
3164 if (seq == NULL)
3165 seq = charmap_find_value (charmap, "U0000000C", 9);
3166 if (seq == NULL)
3168 if (!be_quiet)
3169 WITH_CUR_LOCALE (error (0, 0, _("\
3170 %s: character `%s' not defined while needed as default value"),
3171 "LC_CTYPE", "<form-feed>"));
3173 else if (seq->nbytes != 1)
3174 WITH_CUR_LOCALE (error (0, 0, _("\
3175 %s: character `%s' in charmap not representable with one byte"),
3176 "LC_CTYPE", "<form-feed>"));
3177 else
3178 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3180 /* No need to search. */
3181 ELEM (ctype, class_collection, , L'\f') |= BITw (tok_space);
3184 seq = charmap_find_value (charmap, "newline", 7);
3185 if (seq == NULL)
3186 seq = charmap_find_value (charmap, "U0000000A", 9);
3187 if (seq == NULL)
3189 if (!be_quiet)
3190 WITH_CUR_LOCALE (error (0, 0, _("\
3191 %s: character `%s' not defined while needed as default value"),
3192 "LC_CTYPE", "<newline>"));
3194 else if (seq->nbytes != 1)
3195 WITH_CUR_LOCALE (error (0, 0, _("\
3196 %s: character `%s' in charmap not representable with one byte"),
3197 "LC_CTYPE", "<newline>"));
3198 else
3199 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3201 /* No need to search. */
3202 ELEM (ctype, class_collection, , L'\n') |= BITw (tok_space);
3205 seq = charmap_find_value (charmap, "carriage-return", 15);
3206 if (seq == NULL)
3207 seq = charmap_find_value (charmap, "U0000000D", 9);
3208 if (seq == NULL)
3210 if (!be_quiet)
3211 WITH_CUR_LOCALE (error (0, 0, _("\
3212 %s: character `%s' not defined while needed as default value"),
3213 "LC_CTYPE", "<carriage-return>"));
3215 else if (seq->nbytes != 1)
3216 WITH_CUR_LOCALE (error (0, 0, _("\
3217 %s: character `%s' in charmap not representable with one byte"),
3218 "LC_CTYPE", "<carriage-return>"));
3219 else
3220 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3222 /* No need to search. */
3223 ELEM (ctype, class_collection, , L'\r') |= BITw (tok_space);
3226 seq = charmap_find_value (charmap, "tab", 3);
3227 if (seq == NULL)
3228 seq = charmap_find_value (charmap, "U00000009", 9);
3229 if (seq == NULL)
3231 if (!be_quiet)
3232 WITH_CUR_LOCALE (error (0, 0, _("\
3233 %s: character `%s' not defined while needed as default value"),
3234 "LC_CTYPE", "<tab>"));
3236 else if (seq->nbytes != 1)
3237 WITH_CUR_LOCALE (error (0, 0, _("\
3238 %s: character `%s' in charmap not representable with one byte"),
3239 "LC_CTYPE", "<tab>"));
3240 else
3241 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3243 /* No need to search. */
3244 ELEM (ctype, class_collection, , L'\t') |= BITw (tok_space);
3247 seq = charmap_find_value (charmap, "vertical-tab", 12);
3248 if (seq == NULL)
3249 seq = charmap_find_value (charmap, "U0000000B", 9);
3250 if (seq == NULL)
3252 if (!be_quiet)
3253 WITH_CUR_LOCALE (error (0, 0, _("\
3254 %s: character `%s' not defined while needed as default value"),
3255 "LC_CTYPE", "<vertical-tab>"));
3257 else if (seq->nbytes != 1)
3258 WITH_CUR_LOCALE (error (0, 0, _("\
3259 %s: character `%s' in charmap not representable with one byte"),
3260 "LC_CTYPE", "<vertical-tab>"));
3261 else
3262 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3264 /* No need to search. */
3265 ELEM (ctype, class_collection, , L'\v') |= BITw (tok_space);
3268 if ((ctype->class_done & BITw (tok_xdigit)) == 0)
3269 /* "If this keyword is not specified, the digits `0' to `9', the
3270 uppercase letters `A' through `F', and the lowercase letters `a'
3271 through `f', ..., shell automatically belong to this class, with
3272 implementation defined character values." [P1003.2, 2.5.2.1] */
3274 set_default (BITPOS (tok_xdigit), '0', '9');
3275 set_default (BITPOS (tok_xdigit), 'A', 'F');
3276 set_default (BITPOS (tok_xdigit), 'a', 'f');
3279 if ((ctype->class_done & BITw (tok_blank)) == 0)
3280 /* "If this keyword [blank] is unspecified, the characters <space> and
3281 <tab> shall belong to this character class." [P1003.2, 2.5.2.1] */
3283 struct charseq *seq;
3285 seq = charmap_find_value (charmap, "space", 5);
3286 if (seq == NULL)
3287 seq = charmap_find_value (charmap, "SP", 2);
3288 if (seq == NULL)
3289 seq = charmap_find_value (charmap, "U00000020", 9);
3290 if (seq == NULL)
3292 if (!be_quiet)
3293 WITH_CUR_LOCALE (error (0, 0, _("\
3294 %s: character `%s' not defined while needed as default value"),
3295 "LC_CTYPE", "<space>"));
3297 else if (seq->nbytes != 1)
3298 WITH_CUR_LOCALE (error (0, 0, _("\
3299 %s: character `%s' in charmap not representable with one byte"),
3300 "LC_CTYPE", "<space>"));
3301 else
3302 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
3304 /* No need to search. */
3305 ELEM (ctype, class_collection, , L' ') |= BITw (tok_blank);
3308 seq = charmap_find_value (charmap, "tab", 3);
3309 if (seq == NULL)
3310 seq = charmap_find_value (charmap, "U00000009", 9);
3311 if (seq == NULL)
3313 if (!be_quiet)
3314 WITH_CUR_LOCALE (error (0, 0, _("\
3315 %s: character `%s' not defined while needed as default value"),
3316 "LC_CTYPE", "<tab>"));
3318 else if (seq->nbytes != 1)
3319 WITH_CUR_LOCALE (error (0, 0, _("\
3320 %s: character `%s' in charmap not representable with one byte"),
3321 "LC_CTYPE", "<tab>"));
3322 else
3323 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
3325 /* No need to search. */
3326 ELEM (ctype, class_collection, , L'\t') |= BITw (tok_blank);
3329 if ((ctype->class_done & BITw (tok_graph)) == 0)
3330 /* "If this keyword [graph] is not specified, characters specified for
3331 the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
3332 shall belong to this character class." [P1003.2, 2.5.2.1] */
3334 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
3335 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
3336 unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
3337 BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
3338 BITw (tok_punct);
3339 size_t cnt;
3341 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3342 if ((ctype->class_collection[cnt] & maskw) != 0)
3343 ctype->class_collection[cnt] |= BITw (tok_graph);
3345 for (cnt = 0; cnt < 256; ++cnt)
3346 if ((ctype->class256_collection[cnt] & mask) != 0)
3347 ctype->class256_collection[cnt] |= BIT (tok_graph);
3350 if ((ctype->class_done & BITw (tok_print)) == 0)
3351 /* "If this keyword [print] is not provided, characters specified for
3352 the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
3353 and the <space> character shall belong to this character class."
3354 [P1003.2, 2.5.2.1] */
3356 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
3357 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
3358 unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
3359 BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
3360 BITw (tok_punct);
3361 size_t cnt;
3362 struct charseq *seq;
3364 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3365 if ((ctype->class_collection[cnt] & maskw) != 0)
3366 ctype->class_collection[cnt] |= BITw (tok_print);
3368 for (cnt = 0; cnt < 256; ++cnt)
3369 if ((ctype->class256_collection[cnt] & mask) != 0)
3370 ctype->class256_collection[cnt] |= BIT (tok_print);
3373 seq = charmap_find_value (charmap, "space", 5);
3374 if (seq == NULL)
3375 seq = charmap_find_value (charmap, "SP", 2);
3376 if (seq == NULL)
3377 seq = charmap_find_value (charmap, "U00000020", 9);
3378 if (seq == NULL)
3380 if (!be_quiet)
3381 WITH_CUR_LOCALE (error (0, 0, _("\
3382 %s: character `%s' not defined while needed as default value"),
3383 "LC_CTYPE", "<space>"));
3385 else if (seq->nbytes != 1)
3386 WITH_CUR_LOCALE (error (0, 0, _("\
3387 %s: character `%s' in charmap not representable with one byte"),
3388 "LC_CTYPE", "<space>"));
3389 else
3390 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_print);
3392 /* No need to search. */
3393 ELEM (ctype, class_collection, , L' ') |= BITw (tok_print);
3396 if (ctype->tomap_done[0] == 0)
3397 /* "If this keyword [toupper] is not specified, the lowercase letters
3398 `a' through `z', and their corresponding uppercase letters `A' to
3399 `Z', ..., shall automatically be included, with implementation-
3400 defined character values." [P1003.2, 2.5.2.1] */
3402 char tmp[4];
3403 int ch;
3405 strcpy (tmp, "<?>");
3407 for (ch = 'a'; ch <= 'z'; ++ch)
3409 struct charseq *seq_from, *seq_to;
3411 tmp[1] = (char) ch;
3413 seq_from = charmap_find_value (charmap, &tmp[1], 1);
3414 if (seq_from == NULL)
3416 char buf[10];
3417 sprintf (buf, "U%08X", ch);
3418 seq_from = charmap_find_value (charmap, buf, 9);
3420 if (seq_from == NULL)
3422 if (!be_quiet)
3423 WITH_CUR_LOCALE (error (0, 0, _("\
3424 %s: character `%s' not defined while needed as default value"),
3425 "LC_CTYPE", tmp));
3427 else if (seq_from->nbytes != 1)
3429 if (!be_quiet)
3430 WITH_CUR_LOCALE (error (0, 0, _("\
3431 %s: character `%s' needed as default value not representable with one byte"),
3432 "LC_CTYPE", tmp));
3434 else
3436 /* This conversion is implementation defined. */
3437 tmp[1] = (char) (ch + ('A' - 'a'));
3438 seq_to = charmap_find_value (charmap, &tmp[1], 1);
3439 if (seq_to == NULL)
3441 char buf[10];
3442 sprintf (buf, "U%08X", ch + ('A' - 'a'));
3443 seq_to = charmap_find_value (charmap, buf, 9);
3445 if (seq_to == NULL)
3447 if (!be_quiet)
3448 WITH_CUR_LOCALE (error (0, 0, _("\
3449 %s: character `%s' not defined while needed as default value"),
3450 "LC_CTYPE", tmp));
3452 else if (seq_to->nbytes != 1)
3454 if (!be_quiet)
3455 WITH_CUR_LOCALE (error (0, 0, _("\
3456 %s: character `%s' needed as default value not representable with one byte"),
3457 "LC_CTYPE", tmp));
3459 else
3460 /* The index [0] is determined by the order of the
3461 `ctype_map_newP' calls in `ctype_startup'. */
3462 ctype->map256_collection[0][seq_from->bytes[0]]
3463 = seq_to->bytes[0];
3466 /* No need to search. */
3467 ELEM (ctype, map_collection, [0], ch) = ch + ('A' - 'a');
3471 if (ctype->tomap_done[1] == 0)
3472 /* "If this keyword [tolower] is not specified, the mapping shall be
3473 the reverse mapping of the one specified to `toupper'." [P1003.2] */
3475 for (cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
3476 if (ctype->map_collection[0][cnt] != 0)
3477 ELEM (ctype, map_collection, [1],
3478 ctype->map_collection[0][cnt])
3479 = ctype->charnames[cnt];
3481 for (cnt = 0; cnt < 256; ++cnt)
3482 if (ctype->map256_collection[0][cnt] != 0)
3483 ctype->map256_collection[1][ctype->map256_collection[0][cnt]] = cnt;
3486 if (ctype->outdigits_act != 10)
3488 if (ctype->outdigits_act != 0)
3489 WITH_CUR_LOCALE (error (0, 0, _("\
3490 %s: field `%s' does not contain exactly ten entries"),
3491 "LC_CTYPE", "outdigit"));
3493 for (cnt = ctype->outdigits_act; cnt < 10; ++cnt)
3495 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3496 (char *) digits + cnt,
3499 if (ctype->mboutdigits[cnt] == NULL)
3500 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3501 longnames[cnt],
3502 strlen (longnames[cnt]));
3504 if (ctype->mboutdigits[cnt] == NULL)
3505 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3506 uninames[cnt], 9);
3508 if (ctype->mboutdigits[cnt] == NULL)
3510 /* Provide a replacement. */
3511 WITH_CUR_LOCALE (error (0, 0, _("\
3512 no output digits defined and none of the standard names in the charmap")));
3514 ctype->mboutdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
3515 sizeof (struct charseq)
3516 + 1);
3518 /* This is better than nothing. */
3519 ctype->mboutdigits[cnt]->bytes[0] = digits[cnt];
3520 ctype->mboutdigits[cnt]->nbytes = 1;
3523 ctype->wcoutdigits[cnt] = L'0' + cnt;
3526 ctype->outdigits_act = 10;
3531 /* Construction of sparse 3-level tables.
3532 See wchar-lookup.h for their structure and the meaning of p and q. */
3534 struct wctype_table
3536 /* Parameters. */
3537 unsigned int p;
3538 unsigned int q;
3539 /* Working representation. */
3540 size_t level1_alloc;
3541 size_t level1_size;
3542 uint32_t *level1;
3543 size_t level2_alloc;
3544 size_t level2_size;
3545 uint32_t *level2;
3546 size_t level3_alloc;
3547 size_t level3_size;
3548 uint32_t *level3;
3549 /* Compressed representation. */
3550 size_t result_size;
3551 char *result;
3554 /* Initialize. Assumes t->p and t->q have already been set. */
3555 static inline void
3556 wctype_table_init (struct wctype_table *t)
3558 t->level1 = NULL;
3559 t->level1_alloc = t->level1_size = 0;
3560 t->level2 = NULL;
3561 t->level2_alloc = t->level2_size = 0;
3562 t->level3 = NULL;
3563 t->level3_alloc = t->level3_size = 0;
3566 /* Retrieve an entry. */
3567 static inline int
3568 wctype_table_get (struct wctype_table *t, uint32_t wc)
3570 uint32_t index1 = wc >> (t->q + t->p + 5);
3571 if (index1 < t->level1_size)
3573 uint32_t lookup1 = t->level1[index1];
3574 if (lookup1 != EMPTY)
3576 uint32_t index2 = ((wc >> (t->p + 5)) & ((1 << t->q) - 1))
3577 + (lookup1 << t->q);
3578 uint32_t lookup2 = t->level2[index2];
3579 if (lookup2 != EMPTY)
3581 uint32_t index3 = ((wc >> 5) & ((1 << t->p) - 1))
3582 + (lookup2 << t->p);
3583 uint32_t lookup3 = t->level3[index3];
3584 uint32_t index4 = wc & 0x1f;
3586 return (lookup3 >> index4) & 1;
3590 return 0;
3593 /* Add one entry. */
3594 static void
3595 wctype_table_add (struct wctype_table *t, uint32_t wc)
3597 uint32_t index1 = wc >> (t->q + t->p + 5);
3598 uint32_t index2 = (wc >> (t->p + 5)) & ((1 << t->q) - 1);
3599 uint32_t index3 = (wc >> 5) & ((1 << t->p) - 1);
3600 uint32_t index4 = wc & 0x1f;
3601 size_t i, i1, i2;
3603 if (index1 >= t->level1_size)
3605 if (index1 >= t->level1_alloc)
3607 size_t alloc = 2 * t->level1_alloc;
3608 if (alloc <= index1)
3609 alloc = index1 + 1;
3610 t->level1 = (uint32_t *) xrealloc ((char *) t->level1,
3611 alloc * sizeof (uint32_t));
3612 t->level1_alloc = alloc;
3614 while (index1 >= t->level1_size)
3615 t->level1[t->level1_size++] = EMPTY;
3618 if (t->level1[index1] == EMPTY)
3620 if (t->level2_size == t->level2_alloc)
3622 size_t alloc = 2 * t->level2_alloc + 1;
3623 t->level2 = (uint32_t *) xrealloc ((char *) t->level2,
3624 (alloc << t->q) * sizeof (uint32_t));
3625 t->level2_alloc = alloc;
3627 i1 = t->level2_size << t->q;
3628 i2 = (t->level2_size + 1) << t->q;
3629 for (i = i1; i < i2; i++)
3630 t->level2[i] = EMPTY;
3631 t->level1[index1] = t->level2_size++;
3634 index2 += t->level1[index1] << t->q;
3636 if (t->level2[index2] == EMPTY)
3638 if (t->level3_size == t->level3_alloc)
3640 size_t alloc = 2 * t->level3_alloc + 1;
3641 t->level3 = (uint32_t *) xrealloc ((char *) t->level3,
3642 (alloc << t->p) * sizeof (uint32_t));
3643 t->level3_alloc = alloc;
3645 i1 = t->level3_size << t->p;
3646 i2 = (t->level3_size + 1) << t->p;
3647 for (i = i1; i < i2; i++)
3648 t->level3[i] = 0;
3649 t->level2[index2] = t->level3_size++;
3652 index3 += t->level2[index2] << t->p;
3654 t->level3[index3] |= (uint32_t)1 << index4;
3657 /* Finalize and shrink. */
3658 static void
3659 wctype_table_finalize (struct wctype_table *t)
3661 size_t i, j, k;
3662 uint32_t reorder3[t->level3_size];
3663 uint32_t reorder2[t->level2_size];
3664 uint32_t level1_offset, level2_offset, level3_offset;
3666 /* Uniquify level3 blocks. */
3667 k = 0;
3668 for (j = 0; j < t->level3_size; j++)
3670 for (i = 0; i < k; i++)
3671 if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p],
3672 (1 << t->p) * sizeof (uint32_t)) == 0)
3673 break;
3674 /* Relocate block j to block i. */
3675 reorder3[j] = i;
3676 if (i == k)
3678 if (i != j)
3679 memcpy (&t->level3[i << t->p], &t->level3[j << t->p],
3680 (1 << t->p) * sizeof (uint32_t));
3681 k++;
3684 t->level3_size = k;
3686 for (i = 0; i < (t->level2_size << t->q); i++)
3687 if (t->level2[i] != EMPTY)
3688 t->level2[i] = reorder3[t->level2[i]];
3690 /* Uniquify level2 blocks. */
3691 k = 0;
3692 for (j = 0; j < t->level2_size; j++)
3694 for (i = 0; i < k; i++)
3695 if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q],
3696 (1 << t->q) * sizeof (uint32_t)) == 0)
3697 break;
3698 /* Relocate block j to block i. */
3699 reorder2[j] = i;
3700 if (i == k)
3702 if (i != j)
3703 memcpy (&t->level2[i << t->q], &t->level2[j << t->q],
3704 (1 << t->q) * sizeof (uint32_t));
3705 k++;
3708 t->level2_size = k;
3710 for (i = 0; i < t->level1_size; i++)
3711 if (t->level1[i] != EMPTY)
3712 t->level1[i] = reorder2[t->level1[i]];
3714 /* Create and fill the resulting compressed representation. */
3715 t->result_size =
3716 5 * sizeof (uint32_t)
3717 + t->level1_size * sizeof (uint32_t)
3718 + (t->level2_size << t->q) * sizeof (uint32_t)
3719 + (t->level3_size << t->p) * sizeof (uint32_t);
3720 t->result = (char *) xmalloc (t->result_size);
3722 level1_offset =
3723 5 * sizeof (uint32_t);
3724 level2_offset =
3725 5 * sizeof (uint32_t)
3726 + t->level1_size * sizeof (uint32_t);
3727 level3_offset =
3728 5 * sizeof (uint32_t)
3729 + t->level1_size * sizeof (uint32_t)
3730 + (t->level2_size << t->q) * sizeof (uint32_t);
3732 ((uint32_t *) t->result)[0] = t->q + t->p + 5;
3733 ((uint32_t *) t->result)[1] = t->level1_size;
3734 ((uint32_t *) t->result)[2] = t->p + 5;
3735 ((uint32_t *) t->result)[3] = (1 << t->q) - 1;
3736 ((uint32_t *) t->result)[4] = (1 << t->p) - 1;
3738 for (i = 0; i < t->level1_size; i++)
3739 ((uint32_t *) (t->result + level1_offset))[i] =
3740 (t->level1[i] == EMPTY
3742 : (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset);
3744 for (i = 0; i < (t->level2_size << t->q); i++)
3745 ((uint32_t *) (t->result + level2_offset))[i] =
3746 (t->level2[i] == EMPTY
3748 : (t->level2[i] << t->p) * sizeof (uint32_t) + level3_offset);
3750 for (i = 0; i < (t->level3_size << t->p); i++)
3751 ((uint32_t *) (t->result + level3_offset))[i] = t->level3[i];
3753 if (t->level1_alloc > 0)
3754 free (t->level1);
3755 if (t->level2_alloc > 0)
3756 free (t->level2);
3757 if (t->level3_alloc > 0)
3758 free (t->level3);
3761 #define TABLE wcwidth_table
3762 #define ELEMENT uint8_t
3763 #define DEFAULT 0xff
3764 #include "3level.h"
3766 #define TABLE wctrans_table
3767 #define ELEMENT int32_t
3768 #define DEFAULT 0
3769 #define wctrans_table_add wctrans_table_add_internal
3770 #include "3level.h"
3771 #undef wctrans_table_add
3772 /* The wctrans_table must actually store the difference between the
3773 desired result and the argument. */
3774 static inline void
3775 wctrans_table_add (struct wctrans_table *t, uint32_t wc, uint32_t mapped_wc)
3777 wctrans_table_add_internal (t, wc, mapped_wc - wc);
3781 /* Flattens the included transliterations into a translit list.
3782 Inserts them in the list at `cursor', and returns the new cursor. */
3783 static struct translit_t **
3784 translit_flatten (struct locale_ctype_t *ctype,
3785 const struct charmap_t *charmap,
3786 struct translit_t **cursor)
3788 while (ctype->translit_include != NULL)
3790 const char *copy_locale = ctype->translit_include->copy_locale;
3791 const char *copy_repertoire = ctype->translit_include->copy_repertoire;
3792 struct localedef_t *other;
3794 /* Unchain the include statement. During the depth-first traversal
3795 we don't want to visit any locale more than once. */
3796 ctype->translit_include = ctype->translit_include->next;
3798 other = find_locale (LC_CTYPE, copy_locale, copy_repertoire, charmap);
3800 if (other == NULL || other->categories[LC_CTYPE].ctype == NULL)
3802 WITH_CUR_LOCALE (error (0, 0, _("\
3803 %s: transliteration data from locale `%s' not available"),
3804 "LC_CTYPE", copy_locale));
3806 else
3808 struct locale_ctype_t *other_ctype =
3809 other->categories[LC_CTYPE].ctype;
3811 cursor = translit_flatten (other_ctype, charmap, cursor);
3812 assert (other_ctype->translit_include == NULL);
3814 if (other_ctype->translit != NULL)
3816 /* Insert the other_ctype->translit list at *cursor. */
3817 struct translit_t *endp = other_ctype->translit;
3818 while (endp->next != NULL)
3819 endp = endp->next;
3821 endp->next = *cursor;
3822 *cursor = other_ctype->translit;
3824 /* Avoid any risk of circular lists. */
3825 other_ctype->translit = NULL;
3827 cursor = &endp->next;
3830 if (ctype->default_missing == NULL)
3831 ctype->default_missing = other_ctype->default_missing;
3835 return cursor;
3838 static void
3839 allocate_arrays (struct locale_ctype_t *ctype, const struct charmap_t *charmap,
3840 struct repertoire_t *repertoire)
3842 size_t idx, nr;
3843 const void *key;
3844 size_t len;
3845 void *vdata;
3846 void *curs;
3848 /* You wonder about this amount of memory? This is only because some
3849 users do not manage to address the array with unsigned values or
3850 data types with range >= 256. '\200' would result in the array
3851 index -128. To help these poor people we duplicate the entries for
3852 128 up to 255 below the entry for \0. */
3853 ctype->ctype_b = (char_class_t *) xcalloc (256 + 128, sizeof (char_class_t));
3854 ctype->ctype32_b = (char_class32_t *) xcalloc (256, sizeof (char_class32_t));
3855 ctype->class_b = (uint32_t **)
3856 xmalloc (ctype->nr_charclass * sizeof (uint32_t *));
3857 ctype->class_3level = (struct iovec *)
3858 xmalloc (ctype->nr_charclass * sizeof (struct iovec));
3860 /* This is the array accessed using the multibyte string elements. */
3861 for (idx = 0; idx < 256; ++idx)
3862 ctype->ctype_b[128 + idx] = ctype->class256_collection[idx];
3864 /* Mirror first 127 entries. We must take care that entry -1 is not
3865 mirrored because EOF == -1. */
3866 for (idx = 0; idx < 127; ++idx)
3867 ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
3869 /* The 32 bit array contains all characters < 0x100. */
3870 for (idx = 0; idx < ctype->class_collection_act; ++idx)
3871 if (ctype->charnames[idx] < 0x100)
3872 ctype->ctype32_b[ctype->charnames[idx]] = ctype->class_collection[idx];
3874 for (nr = 0; nr < ctype->nr_charclass; nr++)
3876 ctype->class_b[nr] = (uint32_t *) xcalloc (256 / 32, sizeof (uint32_t));
3878 /* We only set CLASS_B for the bits in the ISO C classes, not
3879 the user defined classes. The number should not change but
3880 who knows. */
3881 #define LAST_ISO_C_BIT 11
3882 if (nr <= LAST_ISO_C_BIT)
3883 for (idx = 0; idx < 256; ++idx)
3884 if (ctype->class256_collection[idx] & _ISbit (nr))
3885 ctype->class_b[nr][idx >> 5] |= (uint32_t) 1 << (idx & 0x1f);
3888 for (nr = 0; nr < ctype->nr_charclass; nr++)
3890 struct wctype_table t;
3892 t.p = 4; /* or: 5 */
3893 t.q = 7; /* or: 6 */
3894 wctype_table_init (&t);
3896 for (idx = 0; idx < ctype->class_collection_act; ++idx)
3897 if (ctype->class_collection[idx] & _ISwbit (nr))
3898 wctype_table_add (&t, ctype->charnames[idx]);
3900 wctype_table_finalize (&t);
3902 if (verbose)
3903 WITH_CUR_LOCALE (fprintf (stderr, _("\
3904 %s: table for class \"%s\": %lu bytes\n"),
3905 "LC_CTYPE", ctype->classnames[nr],
3906 (unsigned long int) t.result_size));
3908 ctype->class_3level[nr].iov_base = t.result;
3909 ctype->class_3level[nr].iov_len = t.result_size;
3912 /* Room for table of mappings. */
3913 ctype->map_b = (uint32_t **) xmalloc (2 * sizeof (uint32_t *));
3914 ctype->map32_b = (uint32_t **) xmalloc (ctype->map_collection_nr
3915 * sizeof (uint32_t *));
3916 ctype->map_3level = (struct iovec *)
3917 xmalloc (ctype->map_collection_nr * sizeof (struct iovec));
3919 /* Fill in all mappings. */
3920 for (idx = 0; idx < 2; ++idx)
3922 unsigned int idx2;
3924 /* Allocate table. */
3925 ctype->map_b[idx] = (uint32_t *)
3926 xmalloc ((256 + 128) * sizeof (uint32_t));
3928 /* Copy values from collection. */
3929 for (idx2 = 0; idx2 < 256; ++idx2)
3930 ctype->map_b[idx][128 + idx2] = ctype->map256_collection[idx][idx2];
3932 /* Mirror first 127 entries. We must take care not to map entry
3933 -1 because EOF == -1. */
3934 for (idx2 = 0; idx2 < 127; ++idx2)
3935 ctype->map_b[idx][idx2] = ctype->map_b[idx][256 + idx2];
3937 /* EOF must map to EOF. */
3938 ctype->map_b[idx][127] = EOF;
3941 for (idx = 0; idx < ctype->map_collection_nr; ++idx)
3943 unsigned int idx2;
3945 /* Allocate table. */
3946 ctype->map32_b[idx] = (uint32_t *) xmalloc (256 * sizeof (uint32_t));
3948 /* Copy values from collection. Default is identity mapping. */
3949 for (idx2 = 0; idx2 < 256; ++idx2)
3950 ctype->map32_b[idx][idx2] =
3951 (ctype->map_collection[idx][idx2] != 0
3952 ? ctype->map_collection[idx][idx2]
3953 : idx2);
3956 for (nr = 0; nr < ctype->map_collection_nr; nr++)
3958 struct wctrans_table t;
3960 t.p = 7;
3961 t.q = 9;
3962 wctrans_table_init (&t);
3964 for (idx = 0; idx < ctype->map_collection_act[nr]; ++idx)
3965 if (ctype->map_collection[nr][idx] != 0)
3966 wctrans_table_add (&t, ctype->charnames[idx],
3967 ctype->map_collection[nr][idx]);
3969 wctrans_table_finalize (&t);
3971 if (verbose)
3972 WITH_CUR_LOCALE (fprintf (stderr, _("\
3973 %s: table for map \"%s\": %lu bytes\n"),
3974 "LC_CTYPE", ctype->mapnames[nr],
3975 (unsigned long int) t.result_size));
3977 ctype->map_3level[nr].iov_base = t.result;
3978 ctype->map_3level[nr].iov_len = t.result_size;
3981 /* Extra array for class and map names. */
3982 ctype->class_name_ptr = (uint32_t *) xmalloc (ctype->nr_charclass
3983 * sizeof (uint32_t));
3984 ctype->map_name_ptr = (uint32_t *) xmalloc (ctype->map_collection_nr
3985 * sizeof (uint32_t));
3987 ctype->class_offset = _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
3988 ctype->map_offset = ctype->class_offset + ctype->nr_charclass;
3990 /* Array for width information. Because the expected widths are very
3991 small (never larger than 2) we use only one single byte. This
3992 saves space.
3993 We put only printable characters in the table. wcwidth is specified
3994 to return -1 for non-printable characters. Doing the check here
3995 saves a run-time check.
3996 But we put L'\0' in the table. This again saves a run-time check. */
3998 struct wcwidth_table t;
4000 t.p = 7;
4001 t.q = 9;
4002 wcwidth_table_init (&t);
4004 /* First set all the printable characters of the character set to
4005 the default width. */
4006 curs = NULL;
4007 while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
4009 struct charseq *data = (struct charseq *) vdata;
4011 if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
4012 data->ucs4 = repertoire_find_value (ctype->repertoire,
4013 data->name, len);
4015 if (data->ucs4 != ILLEGAL_CHAR_VALUE)
4017 uint32_t *class_bits =
4018 find_idx (ctype, &ctype->class_collection, NULL,
4019 &ctype->class_collection_act, data->ucs4);
4021 if (class_bits != NULL && (*class_bits & BITw (tok_print)))
4022 wcwidth_table_add (&t, data->ucs4, charmap->width_default);
4026 /* Now add the explicitly specified widths. */
4027 if (charmap->width_rules != NULL)
4029 size_t cnt;
4031 for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
4033 unsigned char bytes[charmap->mb_cur_max];
4034 int nbytes = charmap->width_rules[cnt].from->nbytes;
4036 /* We have the range of character for which the width is
4037 specified described using byte sequences of the multibyte
4038 charset. We have to convert this to UCS4 now. And we
4039 cannot simply convert the beginning and the end of the
4040 sequence, we have to iterate over the byte sequence and
4041 convert it for every single character. */
4042 memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
4044 while (nbytes < charmap->width_rules[cnt].to->nbytes
4045 || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
4046 nbytes) <= 0)
4048 /* Find the UCS value for `bytes'. */
4049 int inner;
4050 uint32_t wch;
4051 struct charseq *seq =
4052 charmap_find_symbol (charmap, (char *) bytes, nbytes);
4054 if (seq == NULL)
4055 wch = ILLEGAL_CHAR_VALUE;
4056 else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
4057 wch = seq->ucs4;
4058 else
4059 wch = repertoire_find_value (ctype->repertoire, seq->name,
4060 strlen (seq->name));
4062 if (wch != ILLEGAL_CHAR_VALUE)
4064 /* Store the value. */
4065 uint32_t *class_bits =
4066 find_idx (ctype, &ctype->class_collection, NULL,
4067 &ctype->class_collection_act, wch);
4069 if (class_bits != NULL && (*class_bits & BITw (tok_print)))
4070 wcwidth_table_add (&t, wch,
4071 charmap->width_rules[cnt].width);
4074 /* "Increment" the bytes sequence. */
4075 inner = nbytes - 1;
4076 while (inner >= 0 && bytes[inner] == 0xff)
4077 --inner;
4079 if (inner < 0)
4081 /* We have to extend the byte sequence. */
4082 if (nbytes >= charmap->width_rules[cnt].to->nbytes)
4083 break;
4085 bytes[0] = 1;
4086 memset (&bytes[1], 0, nbytes);
4087 ++nbytes;
4089 else
4091 ++bytes[inner];
4092 while (++inner < nbytes)
4093 bytes[inner] = 0;
4099 /* Set the width of L'\0' to 0. */
4100 wcwidth_table_add (&t, 0, 0);
4102 wcwidth_table_finalize (&t);
4104 if (verbose)
4105 WITH_CUR_LOCALE (fprintf (stderr, _("%s: table for width: %lu bytes\n"),
4106 "LC_CTYPE", (unsigned long int) t.result_size));
4108 ctype->width.iov_base = t.result;
4109 ctype->width.iov_len = t.result_size;
4112 /* Set MB_CUR_MAX. */
4113 ctype->mb_cur_max = charmap->mb_cur_max;
4115 /* Now determine the table for the transliteration information.
4117 XXX It is not yet clear to me whether it is worth implementing a
4118 complicated algorithm which uses a hash table to locate the entries.
4119 For now I'll use a simple array which can be searching using binary
4120 search. */
4121 if (ctype->translit_include != NULL)
4122 /* Traverse the locales mentioned in the `include' statements in a
4123 depth-first way and fold in their transliteration information. */
4124 translit_flatten (ctype, charmap, &ctype->translit);
4126 if (ctype->translit != NULL)
4128 /* First count how many entries we have. This is the upper limit
4129 since some entries from the included files might be overwritten. */
4130 size_t number = 0;
4131 size_t cnt;
4132 struct translit_t *runp = ctype->translit;
4133 struct translit_t **sorted;
4134 size_t from_len, to_len;
4136 while (runp != NULL)
4138 ++number;
4139 runp = runp->next;
4142 /* Next we allocate an array large enough and fill in the values. */
4143 sorted = (struct translit_t **) alloca (number
4144 * sizeof (struct translit_t **));
4145 runp = ctype->translit;
4146 number = 0;
4149 /* Search for the place where to insert this string.
4150 XXX Better use a real sorting algorithm later. */
4151 size_t idx = 0;
4152 int replace = 0;
4154 while (idx < number)
4156 int res = wcscmp ((const wchar_t *) sorted[idx]->from,
4157 (const wchar_t *) runp->from);
4158 if (res == 0)
4160 replace = 1;
4161 break;
4163 if (res > 0)
4164 break;
4165 ++idx;
4168 if (replace)
4169 sorted[idx] = runp;
4170 else
4172 memmove (&sorted[idx + 1], &sorted[idx],
4173 (number - idx) * sizeof (struct translit_t *));
4174 sorted[idx] = runp;
4175 ++number;
4178 runp = runp->next;
4180 while (runp != NULL);
4182 /* The next step is putting all the possible transliteration
4183 strings in one memory block so that we can write it out.
4184 We need several different blocks:
4185 - index to the from-string array
4186 - from-string array
4187 - index to the to-string array
4188 - to-string array.
4190 from_len = to_len = 0;
4191 for (cnt = 0; cnt < number; ++cnt)
4193 struct translit_to_t *srunp;
4194 from_len += wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
4195 srunp = sorted[cnt]->to;
4196 while (srunp != NULL)
4198 to_len += wcslen ((const wchar_t *) srunp->str) + 1;
4199 srunp = srunp->next;
4201 /* Plus one for the extra NUL character marking the end of
4202 the list for the current entry. */
4203 ++to_len;
4206 /* We can allocate the arrays for the results. */
4207 ctype->translit_from_idx = xmalloc (number * sizeof (uint32_t));
4208 ctype->translit_from_tbl = xmalloc (from_len * sizeof (uint32_t));
4209 ctype->translit_to_idx = xmalloc (number * sizeof (uint32_t));
4210 ctype->translit_to_tbl = xmalloc (to_len * sizeof (uint32_t));
4212 from_len = 0;
4213 to_len = 0;
4214 for (cnt = 0; cnt < number; ++cnt)
4216 size_t len;
4217 struct translit_to_t *srunp;
4219 ctype->translit_from_idx[cnt] = from_len;
4220 ctype->translit_to_idx[cnt] = to_len;
4222 len = wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
4223 wmemcpy ((wchar_t *) &ctype->translit_from_tbl[from_len],
4224 (const wchar_t *) sorted[cnt]->from, len);
4225 from_len += len;
4227 ctype->translit_to_idx[cnt] = to_len;
4228 srunp = sorted[cnt]->to;
4229 while (srunp != NULL)
4231 len = wcslen ((const wchar_t *) srunp->str) + 1;
4232 wmemcpy ((wchar_t *) &ctype->translit_to_tbl[to_len],
4233 (const wchar_t *) srunp->str, len);
4234 to_len += len;
4235 srunp = srunp->next;
4237 ctype->translit_to_tbl[to_len++] = L'\0';
4240 /* Store the information about the length. */
4241 ctype->translit_idx_size = number;
4242 ctype->translit_from_tbl_size = from_len * sizeof (uint32_t);
4243 ctype->translit_to_tbl_size = to_len * sizeof (uint32_t);
4245 else
4247 /* Provide some dummy pointers since we have nothing to write out. */
4248 static uint32_t no_str = { 0 };
4250 ctype->translit_from_idx = &no_str;
4251 ctype->translit_from_tbl = &no_str;
4252 ctype->translit_to_tbl = &no_str;
4253 ctype->translit_idx_size = 0;
4254 ctype->translit_from_tbl_size = 0;
4255 ctype->translit_to_tbl_size = 0;