SysV shared memory definitions for Linux/PA.
[glibc.git] / locale / programs / ld-ctype.c
blob2a2c8314815602effd99d137e7bf1ce50fc423f8
1 /* Copyright (C) 1995-1999, 2000, 2001, 2002 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, write to the Free
17 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18 02111-1307 USA. */
20 #ifdef HAVE_CONFIG_H
21 # include <config.h>
22 #endif
24 #include <alloca.h>
25 #include <byteswap.h>
26 #include <endian.h>
27 #include <errno.h>
28 #include <limits.h>
29 #include <obstack.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <wchar.h>
33 #include <wctype.h>
34 #include <sys/uio.h>
36 #include "localedef.h"
37 #include "charmap.h"
38 #include "localeinfo.h"
39 #include "langinfo.h"
40 #include "linereader.h"
41 #include "locfile-token.h"
42 #include "locfile.h"
44 #include <assert.h>
47 #ifdef PREDEFINED_CLASSES
48 /* These are the extra bits not in wctype.h since these are not preallocated
49 classes. */
50 # define _ISwspecial1 (1 << 29)
51 # define _ISwspecial2 (1 << 30)
52 # define _ISwspecial3 (1 << 31)
53 #endif
56 /* The bit used for representing a special class. */
57 #define BITPOS(class) ((class) - tok_upper)
58 #define BIT(class) (_ISbit (BITPOS (class)))
59 #define BITw(class) (_ISwbit (BITPOS (class)))
61 #define ELEM(ctype, collection, idx, value) \
62 *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \
63 &ctype->collection##_act idx, value)
66 /* To be compatible with former implementations we for now restrict
67 the number of bits for character classes to 16. When compatibility
68 is not necessary anymore increase the number to 32. */
69 #define char_class_t uint16_t
70 #define char_class32_t uint32_t
73 /* Type to describe a transliteration action. We have a possibly
74 multiple character from-string and a set of multiple character
75 to-strings. All are 32bit values since this is what is used in
76 the gconv functions. */
77 struct translit_to_t
79 uint32_t *str;
81 struct translit_to_t *next;
84 struct translit_t
86 uint32_t *from;
88 const char *fname;
89 size_t lineno;
91 struct translit_to_t *to;
93 struct translit_t *next;
96 struct translit_ignore_t
98 uint32_t from;
99 uint32_t to;
100 uint32_t step;
102 const char *fname;
103 size_t lineno;
105 struct translit_ignore_t *next;
109 /* Type to describe a transliteration include statement. */
110 struct translit_include_t
112 const char *copy_locale;
113 const char *copy_repertoire;
115 struct translit_include_t *next;
119 /* Sparse table of uint32_t. */
120 #define TABLE idx_table
121 #define ELEMENT uint32_t
122 #define DEFAULT ((uint32_t) ~0)
123 #define NO_FINALIZE
124 #include "3level.h"
127 /* The real definition of the struct for the LC_CTYPE locale. */
128 struct locale_ctype_t
130 uint32_t *charnames;
131 size_t charnames_max;
132 size_t charnames_act;
133 /* An index lookup table, to speedup find_idx. */
134 struct idx_table charnames_idx;
136 struct repertoire_t *repertoire;
138 /* We will allow up to 8 * sizeof (uint32_t) character classes. */
139 #define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
140 size_t nr_charclass;
141 const char *classnames[MAX_NR_CHARCLASS];
142 uint32_t last_class_char;
143 uint32_t class256_collection[256];
144 uint32_t *class_collection;
145 size_t class_collection_max;
146 size_t class_collection_act;
147 uint32_t class_done;
148 uint32_t class_offset;
150 struct charseq **mbdigits;
151 size_t mbdigits_act;
152 size_t mbdigits_max;
153 uint32_t *wcdigits;
154 size_t wcdigits_act;
155 size_t wcdigits_max;
157 struct charseq *mboutdigits[10];
158 uint32_t wcoutdigits[10];
159 size_t outdigits_act;
161 /* If the following number ever turns out to be too small simply
162 increase it. But I doubt it will. --drepper@gnu */
163 #define MAX_NR_CHARMAP 16
164 const char *mapnames[MAX_NR_CHARMAP];
165 uint32_t *map_collection[MAX_NR_CHARMAP];
166 uint32_t map256_collection[2][256];
167 size_t map_collection_max[MAX_NR_CHARMAP];
168 size_t map_collection_act[MAX_NR_CHARMAP];
169 size_t map_collection_nr;
170 size_t last_map_idx;
171 int tomap_done[MAX_NR_CHARMAP];
172 uint32_t map_offset;
174 /* Transliteration information. */
175 struct translit_include_t *translit_include;
176 struct translit_t *translit;
177 struct translit_ignore_t *translit_ignore;
178 uint32_t ntranslit_ignore;
180 uint32_t *default_missing;
181 const char *default_missing_file;
182 size_t default_missing_lineno;
184 /* The arrays for the binary representation. */
185 char_class_t *ctype_b;
186 char_class32_t *ctype32_b;
187 uint32_t **map_b;
188 uint32_t **map32_b;
189 uint32_t **class_b;
190 struct iovec *class_3level;
191 struct iovec *map_3level;
192 uint32_t *class_name_ptr;
193 uint32_t *map_name_ptr;
194 struct iovec width;
195 uint32_t mb_cur_max;
196 const char *codeset_name;
197 uint32_t *translit_from_idx;
198 uint32_t *translit_from_tbl;
199 uint32_t *translit_to_idx;
200 uint32_t *translit_to_tbl;
201 uint32_t translit_idx_size;
202 size_t translit_from_tbl_size;
203 size_t translit_to_tbl_size;
205 struct obstack mempool;
209 /* Marker for an empty slot. This has the value 0xFFFFFFFF, regardless
210 whether 'int' is 16 bit, 32 bit, or 64 bit. */
211 #define EMPTY ((uint32_t) ~0)
214 #define obstack_chunk_alloc xmalloc
215 #define obstack_chunk_free free
218 /* Prototypes for local functions. */
219 static void ctype_startup (struct linereader *lr, struct localedef_t *locale,
220 const struct charmap_t *charmap,
221 struct localedef_t *copy_locale,
222 int ignore_content);
223 static void ctype_class_new (struct linereader *lr,
224 struct locale_ctype_t *ctype, const char *name);
225 static void ctype_map_new (struct linereader *lr,
226 struct locale_ctype_t *ctype,
227 const char *name, const struct charmap_t *charmap);
228 static uint32_t *find_idx (struct locale_ctype_t *ctype, uint32_t **table,
229 size_t *max, size_t *act, unsigned int idx);
230 static void set_class_defaults (struct locale_ctype_t *ctype,
231 const struct charmap_t *charmap,
232 struct repertoire_t *repertoire);
233 static void allocate_arrays (struct locale_ctype_t *ctype,
234 const struct charmap_t *charmap,
235 struct repertoire_t *repertoire);
238 static const char *longnames[] =
240 "zero", "one", "two", "three", "four",
241 "five", "six", "seven", "eight", "nine"
243 static const char *uninames[] =
245 "U00000030", "U00000031", "U00000032", "U00000033", "U00000034",
246 "U00000035", "U00000036", "U00000037", "U00000038", "U00000039"
248 static const unsigned char digits[] = "0123456789";
251 static void
252 ctype_startup (struct linereader *lr, struct localedef_t *locale,
253 const struct charmap_t *charmap,
254 struct localedef_t *copy_locale, int ignore_content)
256 unsigned int cnt;
257 struct locale_ctype_t *ctype;
259 if (!ignore_content && locale->categories[LC_CTYPE].ctype == NULL)
261 if (copy_locale == NULL)
263 /* Allocate the needed room. */
264 locale->categories[LC_CTYPE].ctype = ctype =
265 (struct locale_ctype_t *) xcalloc (1,
266 sizeof (struct locale_ctype_t));
268 /* We have seen no names yet. */
269 ctype->charnames_max = charmap->mb_cur_max == 1 ? 256 : 512;
270 ctype->charnames =
271 (unsigned int *) xmalloc (ctype->charnames_max
272 * sizeof (unsigned int));
273 for (cnt = 0; cnt < 256; ++cnt)
274 ctype->charnames[cnt] = cnt;
275 ctype->charnames_act = 256;
276 idx_table_init (&ctype->charnames_idx);
278 /* Fill character class information. */
279 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
280 /* The order of the following instructions determines the bit
281 positions! */
282 ctype_class_new (lr, ctype, "upper");
283 ctype_class_new (lr, ctype, "lower");
284 ctype_class_new (lr, ctype, "alpha");
285 ctype_class_new (lr, ctype, "digit");
286 ctype_class_new (lr, ctype, "xdigit");
287 ctype_class_new (lr, ctype, "space");
288 ctype_class_new (lr, ctype, "print");
289 ctype_class_new (lr, ctype, "graph");
290 ctype_class_new (lr, ctype, "blank");
291 ctype_class_new (lr, ctype, "cntrl");
292 ctype_class_new (lr, ctype, "punct");
293 ctype_class_new (lr, ctype, "alnum");
294 #ifdef PREDEFINED_CLASSES
295 /* The following are extensions from ISO 14652. */
296 ctype_class_new (lr, ctype, "left_to_right");
297 ctype_class_new (lr, ctype, "right_to_left");
298 ctype_class_new (lr, ctype, "num_terminator");
299 ctype_class_new (lr, ctype, "num_separator");
300 ctype_class_new (lr, ctype, "segment_separator");
301 ctype_class_new (lr, ctype, "block_separator");
302 ctype_class_new (lr, ctype, "direction_control");
303 ctype_class_new (lr, ctype, "sym_swap_layout");
304 ctype_class_new (lr, ctype, "char_shape_selector");
305 ctype_class_new (lr, ctype, "num_shape_selector");
306 ctype_class_new (lr, ctype, "non_spacing");
307 ctype_class_new (lr, ctype, "non_spacing_level3");
308 ctype_class_new (lr, ctype, "normal_connect");
309 ctype_class_new (lr, ctype, "r_connect");
310 ctype_class_new (lr, ctype, "no_connect");
311 ctype_class_new (lr, ctype, "no_connect-space");
312 ctype_class_new (lr, ctype, "vowel_connect");
313 #endif
315 ctype->class_collection_max = charmap->mb_cur_max == 1 ? 256 : 512;
316 ctype->class_collection
317 = (uint32_t *) xcalloc (sizeof (unsigned long int),
318 ctype->class_collection_max);
319 ctype->class_collection_act = 256;
321 /* Fill character map information. */
322 ctype->last_map_idx = MAX_NR_CHARMAP;
323 ctype_map_new (lr, ctype, "toupper", charmap);
324 ctype_map_new (lr, ctype, "tolower", charmap);
325 #ifdef PREDEFINED_CLASSES
326 ctype_map_new (lr, ctype, "tosymmetric", charmap);
327 #endif
329 /* Fill first 256 entries in `toXXX' arrays. */
330 for (cnt = 0; cnt < 256; ++cnt)
332 ctype->map_collection[0][cnt] = cnt;
333 ctype->map_collection[1][cnt] = cnt;
334 #ifdef PREDEFINED_CLASSES
335 ctype->map_collection[2][cnt] = cnt;
336 #endif
337 ctype->map256_collection[0][cnt] = cnt;
338 ctype->map256_collection[1][cnt] = cnt;
341 obstack_init (&ctype->mempool);
343 else
344 ctype = locale->categories[LC_CTYPE].ctype =
345 copy_locale->categories[LC_CTYPE].ctype;
350 void
351 ctype_finish (struct localedef_t *locale, const struct charmap_t *charmap)
353 /* See POSIX.2, table 2-6 for the meaning of the following table. */
354 #define NCLASS 12
355 static const struct
357 const char *name;
358 const char allow[NCLASS];
360 valid_table[NCLASS] =
362 /* The order is important. See token.h for more information.
363 M = Always, D = Default, - = Permitted, X = Mutually exclusive */
364 { "upper", "--MX-XDDXXX-" },
365 { "lower", "--MX-XDDXXX-" },
366 { "alpha", "---X-XDDXXX-" },
367 { "digit", "XXX--XDDXXX-" },
368 { "xdigit", "-----XDDXXX-" },
369 { "space", "XXXXX------X" },
370 { "print", "---------X--" },
371 { "graph", "---------X--" },
372 { "blank", "XXXXXM-----X" },
373 { "cntrl", "XXXXX-XX--XX" },
374 { "punct", "XXXXX-DD-X-X" },
375 { "alnum", "-----XDDXXX-" }
377 size_t cnt;
378 int cls1, cls2;
379 uint32_t space_value;
380 struct charseq *space_seq;
381 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
382 int warned;
383 const void *key;
384 size_t len;
385 void *vdata;
386 void *curs;
388 /* Now resolve copying and also handle completely missing definitions. */
389 if (ctype == NULL)
391 const char *repertoire_name;
393 /* First see whether we were supposed to copy. If yes, find the
394 actual definition. */
395 if (locale->copy_name[LC_CTYPE] != NULL)
397 /* Find the copying locale. This has to happen transitively since
398 the locale we are copying from might also copying another one. */
399 struct localedef_t *from = locale;
402 from = find_locale (LC_CTYPE, from->copy_name[LC_CTYPE],
403 from->repertoire_name, charmap);
404 while (from->categories[LC_CTYPE].ctype == NULL
405 && from->copy_name[LC_CTYPE] != NULL);
407 ctype = locale->categories[LC_CTYPE].ctype
408 = from->categories[LC_CTYPE].ctype;
411 /* If there is still no definition issue an warning and create an
412 empty one. */
413 if (ctype == NULL)
415 if (! be_quiet)
416 WITH_CUR_LOCALE (error (0, 0, _("\
417 No definition for %s category found"), "LC_CTYPE"));
418 ctype_startup (NULL, locale, charmap, NULL, 0);
419 ctype = locale->categories[LC_CTYPE].ctype;
422 /* Get the repertoire we have to use. */
423 repertoire_name = locale->repertoire_name ?: repertoire_global;
424 if (repertoire_name != NULL)
425 ctype->repertoire = repertoire_read (repertoire_name);
428 /* We need the name of the currently used 8-bit character set to
429 make correct conversion between this 8-bit representation and the
430 ISO 10646 character set used internally for wide characters. */
431 ctype->codeset_name = charmap->code_set_name;
432 if (ctype->codeset_name == NULL)
434 if (! be_quiet)
435 WITH_CUR_LOCALE (error (0, 0, _("\
436 No character set name specified in charmap")));
437 ctype->codeset_name = "//UNKNOWN//";
440 /* Set default value for classes not specified. */
441 set_class_defaults (ctype, charmap, ctype->repertoire);
443 /* Check according to table. */
444 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
446 uint32_t tmp = ctype->class_collection[cnt];
448 if (tmp != 0)
450 for (cls1 = 0; cls1 < NCLASS; ++cls1)
451 if ((tmp & _ISwbit (cls1)) != 0)
452 for (cls2 = 0; cls2 < NCLASS; ++cls2)
453 if (valid_table[cls1].allow[cls2] != '-')
455 int eq = (tmp & _ISwbit (cls2)) != 0;
456 switch (valid_table[cls1].allow[cls2])
458 case 'M':
459 if (!eq)
461 uint32_t value = ctype->charnames[cnt];
463 if (!be_quiet)
464 WITH_CUR_LOCALE (error (0, 0, _("\
465 character L'\\u%0*x' in class `%s' must be in class `%s'"),
466 value > 0xffff ? 8 : 4,
467 value,
468 valid_table[cls1].name,
469 valid_table[cls2].name));
471 break;
473 case 'X':
474 if (eq)
476 uint32_t value = ctype->charnames[cnt];
478 if (!be_quiet)
479 WITH_CUR_LOCALE (error (0, 0, _("\
480 character L'\\u%0*x' in class `%s' must not be in class `%s'"),
481 value > 0xffff ? 8 : 4,
482 value,
483 valid_table[cls1].name,
484 valid_table[cls2].name));
486 break;
488 case 'D':
489 ctype->class_collection[cnt] |= _ISwbit (cls2);
490 break;
492 default:
493 WITH_CUR_LOCALE (error (5, 0, _("\
494 internal error in %s, line %u"), __FUNCTION__, __LINE__));
500 for (cnt = 0; cnt < 256; ++cnt)
502 uint32_t tmp = ctype->class256_collection[cnt];
504 if (tmp != 0)
506 for (cls1 = 0; cls1 < NCLASS; ++cls1)
507 if ((tmp & _ISbit (cls1)) != 0)
508 for (cls2 = 0; cls2 < NCLASS; ++cls2)
509 if (valid_table[cls1].allow[cls2] != '-')
511 int eq = (tmp & _ISbit (cls2)) != 0;
512 switch (valid_table[cls1].allow[cls2])
514 case 'M':
515 if (!eq)
517 char buf[17];
519 snprintf (buf, sizeof buf, "\\%Zo", cnt);
521 if (!be_quiet)
522 WITH_CUR_LOCALE (error (0, 0, _("\
523 character '%s' in class `%s' must be in class `%s'"),
524 buf,
525 valid_table[cls1].name,
526 valid_table[cls2].name));
528 break;
530 case 'X':
531 if (eq)
533 char buf[17];
535 snprintf (buf, sizeof buf, "\\%Zo", cnt);
537 if (!be_quiet)
538 WITH_CUR_LOCALE (error (0, 0, _("\
539 character '%s' in class `%s' must not be in class `%s'"),
540 buf,
541 valid_table[cls1].name,
542 valid_table[cls2].name));
544 break;
546 case 'D':
547 ctype->class256_collection[cnt] |= _ISbit (cls2);
548 break;
550 default:
551 WITH_CUR_LOCALE (error (5, 0, _("\
552 internal error in %s, line %u"), __FUNCTION__, __LINE__));
558 /* ... and now test <SP> as a special case. */
559 space_value = 32;
560 if (((cnt = BITPOS (tok_space),
561 (ELEM (ctype, class_collection, , space_value)
562 & BITw (tok_space)) == 0)
563 || (cnt = BITPOS (tok_blank),
564 (ELEM (ctype, class_collection, , space_value)
565 & BITw (tok_blank)) == 0)))
567 if (!be_quiet)
568 WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"),
569 valid_table[cnt].name));
571 else if (((cnt = BITPOS (tok_punct),
572 (ELEM (ctype, class_collection, , space_value)
573 & BITw (tok_punct)) != 0)
574 || (cnt = BITPOS (tok_graph),
575 (ELEM (ctype, class_collection, , space_value)
576 & BITw (tok_graph))
577 != 0)))
579 if (!be_quiet)
580 WITH_CUR_LOCALE (error (0, 0, _("\
581 <SP> character must not be in class `%s'"),
582 valid_table[cnt].name));
584 else
585 ELEM (ctype, class_collection, , space_value) |= BITw (tok_print);
587 space_seq = charmap_find_value (charmap, "SP", 2);
588 if (space_seq == NULL)
589 space_seq = charmap_find_value (charmap, "space", 5);
590 if (space_seq == NULL)
591 space_seq = charmap_find_value (charmap, "U00000020", 9);
592 if (space_seq == NULL || space_seq->nbytes != 1)
594 if (!be_quiet)
595 WITH_CUR_LOCALE (error (0, 0, _("\
596 character <SP> not defined in character map")));
598 else if (((cnt = BITPOS (tok_space),
599 (ctype->class256_collection[space_seq->bytes[0]]
600 & BIT (tok_space)) == 0)
601 || (cnt = BITPOS (tok_blank),
602 (ctype->class256_collection[space_seq->bytes[0]]
603 & BIT (tok_blank)) == 0)))
605 if (!be_quiet)
606 WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"),
607 valid_table[cnt].name));
609 else if (((cnt = BITPOS (tok_punct),
610 (ctype->class256_collection[space_seq->bytes[0]]
611 & BIT (tok_punct)) != 0)
612 || (cnt = BITPOS (tok_graph),
613 (ctype->class256_collection[space_seq->bytes[0]]
614 & BIT (tok_graph)) != 0)))
616 if (!be_quiet)
617 WITH_CUR_LOCALE (error (0, 0, _("\
618 <SP> character must not be in class `%s'"),
619 valid_table[cnt].name));
621 else
622 ctype->class256_collection[space_seq->bytes[0]] |= BIT (tok_print);
624 /* Now that the tests are done make sure the name array contains all
625 characters which are handled in the WIDTH section of the
626 character set definition file. */
627 if (charmap->width_rules != NULL)
628 for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
630 unsigned char bytes[charmap->mb_cur_max];
631 int nbytes = charmap->width_rules[cnt].from->nbytes;
633 /* We have the range of character for which the width is
634 specified described using byte sequences of the multibyte
635 charset. We have to convert this to UCS4 now. And we
636 cannot simply convert the beginning and the end of the
637 sequence, we have to iterate over the byte sequence and
638 convert it for every single character. */
639 memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
641 while (nbytes < charmap->width_rules[cnt].to->nbytes
642 || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
643 nbytes) <= 0)
645 /* Find the UCS value for `bytes'. */
646 int inner;
647 uint32_t wch;
648 struct charseq *seq = charmap_find_symbol (charmap, bytes, nbytes);
650 if (seq == NULL)
651 wch = ILLEGAL_CHAR_VALUE;
652 else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
653 wch = seq->ucs4;
654 else
655 wch = repertoire_find_value (ctype->repertoire, seq->name,
656 strlen (seq->name));
658 if (wch != ILLEGAL_CHAR_VALUE)
659 /* We are only interested in the side-effects of the
660 `find_idx' call. It will add appropriate entries in
661 the name array if this is necessary. */
662 (void) find_idx (ctype, NULL, NULL, NULL, wch);
664 /* "Increment" the bytes sequence. */
665 inner = nbytes - 1;
666 while (inner >= 0 && bytes[inner] == 0xff)
667 --inner;
669 if (inner < 0)
671 /* We have to extend the byte sequence. */
672 if (nbytes >= charmap->width_rules[cnt].to->nbytes)
673 break;
675 bytes[0] = 1;
676 memset (&bytes[1], 0, nbytes);
677 ++nbytes;
679 else
681 ++bytes[inner];
682 while (++inner < nbytes)
683 bytes[inner] = 0;
688 /* Now set all the other characters of the character set to the
689 default width. */
690 curs = NULL;
691 while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
693 struct charseq *data = (struct charseq *) vdata;
695 if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
696 data->ucs4 = repertoire_find_value (ctype->repertoire,
697 data->name, len);
699 if (data->ucs4 != ILLEGAL_CHAR_VALUE)
700 (void) find_idx (ctype, NULL, NULL, NULL, data->ucs4);
703 /* There must be a multiple of 10 digits. */
704 if (ctype->mbdigits_act % 10 != 0)
706 assert (ctype->mbdigits_act == ctype->wcdigits_act);
707 ctype->wcdigits_act -= ctype->mbdigits_act % 10;
708 ctype->mbdigits_act -= ctype->mbdigits_act % 10;
709 WITH_CUR_LOCALE (error (0, 0, _("\
710 `digit' category has not entries in groups of ten")));
713 /* Check the input digits. There must be a multiple of ten available.
714 In each group it could be that one or the other character is missing.
715 In this case the whole group must be removed. */
716 cnt = 0;
717 while (cnt < ctype->mbdigits_act)
719 size_t inner;
720 for (inner = 0; inner < 10; ++inner)
721 if (ctype->mbdigits[cnt + inner] == NULL)
722 break;
724 if (inner == 10)
725 cnt += 10;
726 else
728 /* Remove the group. */
729 memmove (&ctype->mbdigits[cnt], &ctype->mbdigits[cnt + 10],
730 ((ctype->wcdigits_act - cnt - 10)
731 * sizeof (ctype->mbdigits[0])));
732 ctype->mbdigits_act -= 10;
736 /* If no input digits are given use the default. */
737 if (ctype->mbdigits_act == 0)
739 if (ctype->mbdigits_max == 0)
741 ctype->mbdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
742 10 * sizeof (struct charseq *));
743 ctype->mbdigits_max = 10;
746 for (cnt = 0; cnt < 10; ++cnt)
748 ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
749 digits + cnt, 1);
750 if (ctype->mbdigits[cnt] == NULL)
752 ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
753 longnames[cnt],
754 strlen (longnames[cnt]));
755 if (ctype->mbdigits[cnt] == NULL)
757 /* Hum, this ain't good. */
758 WITH_CUR_LOCALE (error (0, 0, _("\
759 no input digits defined and none of the standard names in the charmap")));
761 ctype->mbdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
762 sizeof (struct charseq) + 1);
764 /* This is better than nothing. */
765 ctype->mbdigits[cnt]->bytes[0] = digits[cnt];
766 ctype->mbdigits[cnt]->nbytes = 1;
771 ctype->mbdigits_act = 10;
774 /* Check the wide character input digits. There must be a multiple
775 of ten available. In each group it could be that one or the other
776 character is missing. In this case the whole group must be
777 removed. */
778 cnt = 0;
779 while (cnt < ctype->wcdigits_act)
781 size_t inner;
782 for (inner = 0; inner < 10; ++inner)
783 if (ctype->wcdigits[cnt + inner] == ILLEGAL_CHAR_VALUE)
784 break;
786 if (inner == 10)
787 cnt += 10;
788 else
790 /* Remove the group. */
791 memmove (&ctype->wcdigits[cnt], &ctype->wcdigits[cnt + 10],
792 ((ctype->wcdigits_act - cnt - 10)
793 * sizeof (ctype->wcdigits[0])));
794 ctype->wcdigits_act -= 10;
798 /* If no input digits are given use the default. */
799 if (ctype->wcdigits_act == 0)
801 if (ctype->wcdigits_max == 0)
803 ctype->wcdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
804 10 * sizeof (uint32_t));
805 ctype->wcdigits_max = 10;
808 for (cnt = 0; cnt < 10; ++cnt)
809 ctype->wcdigits[cnt] = L'0' + cnt;
811 ctype->mbdigits_act = 10;
814 /* Check the outdigits. */
815 warned = 0;
816 for (cnt = 0; cnt < 10; ++cnt)
817 if (ctype->mboutdigits[cnt] == NULL)
819 static struct charseq replace[2];
821 if (!warned)
823 WITH_CUR_LOCALE (error (0, 0, _("\
824 not all characters used in `outdigit' are available in the charmap")));
825 warned = 1;
828 replace[0].nbytes = 1;
829 replace[0].bytes[0] = '?';
830 replace[0].bytes[1] = '\0';
831 ctype->mboutdigits[cnt] = &replace[0];
834 warned = 0;
835 for (cnt = 0; cnt < 10; ++cnt)
836 if (ctype->wcoutdigits[cnt] == 0)
838 if (!warned)
840 WITH_CUR_LOCALE (error (0, 0, _("\
841 not all characters used in `outdigit' are available in the repertoire")));
842 warned = 1;
845 ctype->wcoutdigits[cnt] = L'?';
848 /* Sort the entries in the translit_ignore list. */
849 if (ctype->translit_ignore != NULL)
851 struct translit_ignore_t *firstp = ctype->translit_ignore;
852 struct translit_ignore_t *runp;
854 ctype->ntranslit_ignore = 1;
856 for (runp = firstp->next; runp != NULL; runp = runp->next)
858 struct translit_ignore_t *lastp = NULL;
859 struct translit_ignore_t *cmpp;
861 ++ctype->ntranslit_ignore;
863 for (cmpp = firstp; cmpp != NULL; lastp = cmpp, cmpp = cmpp->next)
864 if (runp->from < cmpp->from)
865 break;
867 runp->next = lastp;
868 if (lastp == NULL)
869 firstp = runp;
872 ctype->translit_ignore = firstp;
877 void
878 ctype_output (struct localedef_t *locale, const struct charmap_t *charmap,
879 const char *output_path)
881 static const char nulbytes[4] = { 0, 0, 0, 0 };
882 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
883 const size_t nelems = (_NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1)
884 + ctype->nr_charclass + ctype->map_collection_nr);
885 struct iovec iov[2 + nelems + 2 * ctype->nr_charclass
886 + ctype->map_collection_nr + 4];
887 struct locale_file data;
888 uint32_t idx[nelems + 1];
889 uint32_t default_missing_len;
890 size_t elem, cnt, offset, total;
891 char *cp;
893 /* Now prepare the output: Find the sizes of the table we can use. */
894 allocate_arrays (ctype, charmap, ctype->repertoire);
896 data.magic = LIMAGIC (LC_CTYPE);
897 data.n = nelems;
898 iov[0].iov_base = (void *) &data;
899 iov[0].iov_len = sizeof (data);
901 iov[1].iov_base = (void *) idx;
902 iov[1].iov_len = nelems * sizeof (uint32_t);
904 idx[0] = iov[0].iov_len + iov[1].iov_len;
905 offset = 0;
907 for (elem = 0; elem < nelems; ++elem)
909 if (elem < _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1))
910 switch (elem)
912 #define CTYPE_EMPTY(name) \
913 case name: \
914 iov[2 + elem + offset].iov_base = NULL; \
915 iov[2 + elem + offset].iov_len = 0; \
916 idx[elem + 1] = idx[elem]; \
917 break
919 CTYPE_EMPTY(_NL_CTYPE_GAP1);
920 CTYPE_EMPTY(_NL_CTYPE_GAP2);
921 CTYPE_EMPTY(_NL_CTYPE_GAP3);
922 CTYPE_EMPTY(_NL_CTYPE_GAP4);
923 CTYPE_EMPTY(_NL_CTYPE_GAP5);
924 CTYPE_EMPTY(_NL_CTYPE_GAP6);
926 #define CTYPE_DATA(name, base, len) \
927 case _NL_ITEM_INDEX (name): \
928 iov[2 + elem + offset].iov_base = (base); \
929 iov[2 + elem + offset].iov_len = (len); \
930 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; \
931 break
933 CTYPE_DATA (_NL_CTYPE_CLASS,
934 ctype->ctype_b,
935 (256 + 128) * sizeof (char_class_t));
937 CTYPE_DATA (_NL_CTYPE_TOUPPER,
938 ctype->map_b[0],
939 (256 + 128) * sizeof (uint32_t));
940 CTYPE_DATA (_NL_CTYPE_TOLOWER,
941 ctype->map_b[1],
942 (256 + 128) * sizeof (uint32_t));
944 CTYPE_DATA (_NL_CTYPE_TOUPPER32,
945 ctype->map32_b[0],
946 256 * sizeof (uint32_t));
947 CTYPE_DATA (_NL_CTYPE_TOLOWER32,
948 ctype->map32_b[1],
949 256 * sizeof (uint32_t));
951 CTYPE_DATA (_NL_CTYPE_CLASS32,
952 ctype->ctype32_b,
953 256 * sizeof (char_class32_t));
955 CTYPE_DATA (_NL_CTYPE_CLASS_OFFSET,
956 &ctype->class_offset, sizeof (uint32_t));
958 CTYPE_DATA (_NL_CTYPE_MAP_OFFSET,
959 &ctype->map_offset, sizeof (uint32_t));
961 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TAB_SIZE,
962 &ctype->translit_idx_size, sizeof (uint32_t));
964 CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_IDX,
965 ctype->translit_from_idx,
966 ctype->translit_idx_size * sizeof (uint32_t));
968 CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_TBL,
969 ctype->translit_from_tbl,
970 ctype->translit_from_tbl_size);
972 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_IDX,
973 ctype->translit_to_idx,
974 ctype->translit_idx_size * sizeof (uint32_t));
976 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_TBL,
977 ctype->translit_to_tbl, ctype->translit_to_tbl_size);
979 case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
980 /* The class name array. */
981 total = 0;
982 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt, ++offset)
984 iov[2 + elem + offset].iov_base
985 = (void *) ctype->classnames[cnt];
986 iov[2 + elem + offset].iov_len
987 = strlen (ctype->classnames[cnt]) + 1;
988 total += iov[2 + elem + offset].iov_len;
990 iov[2 + elem + offset].iov_base = (void *) nulbytes;
991 iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
992 total += 1 + (4 - ((total + 1) % 4));
994 idx[elem + 1] = idx[elem] + total;
995 break;
997 case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
998 /* The class name array. */
999 total = 0;
1000 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt, ++offset)
1002 iov[2 + elem + offset].iov_base
1003 = (void *) ctype->mapnames[cnt];
1004 iov[2 + elem + offset].iov_len
1005 = strlen (ctype->mapnames[cnt]) + 1;
1006 total += iov[2 + elem + offset].iov_len;
1008 iov[2 + elem + offset].iov_base = (void *) nulbytes;
1009 iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
1010 total += 1 + (4 - ((total + 1) % 4));
1012 idx[elem + 1] = idx[elem] + total;
1013 break;
1015 CTYPE_DATA (_NL_CTYPE_WIDTH,
1016 ctype->width.iov_base,
1017 ctype->width.iov_len);
1019 CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX,
1020 &ctype->mb_cur_max, sizeof (uint32_t));
1022 case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
1023 total = strlen (ctype->codeset_name) + 1;
1024 if (total % 4 == 0)
1025 iov[2 + elem + offset].iov_base = (char *) ctype->codeset_name;
1026 else
1028 iov[2 + elem + offset].iov_base = alloca ((total + 3) & ~3);
1029 memset (mempcpy (iov[2 + elem + offset].iov_base,
1030 ctype->codeset_name, total),
1031 '\0', 4 - (total & 3));
1032 total = (total + 3) & ~3;
1034 iov[2 + elem + offset].iov_len = total;
1035 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1036 break;
1038 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN):
1039 iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
1040 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1041 *(uint32_t *) iov[2 + elem + offset].iov_base =
1042 ctype->mbdigits_act / 10;
1043 idx[elem + 1] = idx[elem] + sizeof (uint32_t);
1044 break;
1046 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN):
1047 /* Align entries. */
1048 iov[2 + elem + offset].iov_base = (void *) nulbytes;
1049 iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1050 idx[elem] += iov[2 + elem + offset].iov_len;
1051 ++offset;
1053 iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
1054 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1055 *(uint32_t *) iov[2 + elem + offset].iov_base =
1056 ctype->wcdigits_act / 10;
1057 idx[elem + 1] = idx[elem] + sizeof (uint32_t);
1058 break;
1060 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB):
1061 /* Compute the length of all possible characters. For INDIGITS
1062 there might be more than one. We simply concatenate all of
1063 them with a NUL byte following. The NUL byte wouldn't be
1064 necessary but it makes it easier for the user. */
1065 total = 0;
1067 for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
1068 cnt < ctype->mbdigits_act; cnt += 10)
1069 total += ctype->mbdigits[cnt]->nbytes + 1;
1070 iov[2 + elem + offset].iov_base = (char *) alloca (total);
1071 iov[2 + elem + offset].iov_len = total;
1073 cp = iov[2 + elem + offset].iov_base;
1074 for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
1075 cnt < ctype->mbdigits_act; cnt += 10)
1077 cp = mempcpy (cp, ctype->mbdigits[cnt]->bytes,
1078 ctype->mbdigits[cnt]->nbytes);
1079 *cp++ = '\0';
1081 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1082 break;
1084 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB):
1085 /* Compute the length of all possible characters. For INDIGITS
1086 there might be more than one. We simply concatenate all of
1087 them with a NUL byte following. The NUL byte wouldn't be
1088 necessary but it makes it easier for the user. */
1089 cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB);
1090 total = ctype->mboutdigits[cnt]->nbytes + 1;
1091 iov[2 + elem + offset].iov_base = (char *) alloca (total);
1092 iov[2 + elem + offset].iov_len = total;
1094 *(char *) mempcpy (iov[2 + elem + offset].iov_base,
1095 ctype->mboutdigits[cnt]->bytes,
1096 ctype->mboutdigits[cnt]->nbytes) = '\0';
1097 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1098 break;
1100 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC):
1101 total = ctype->wcdigits_act / 10;
1103 iov[2 + elem + offset].iov_base =
1104 (uint32_t *) alloca (total * sizeof (uint32_t));
1105 iov[2 + elem + offset].iov_len = total * sizeof (uint32_t);
1107 for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC);
1108 cnt < ctype->wcdigits_act; cnt += 10)
1109 ((uint32_t *) iov[2 + elem + offset].iov_base)[cnt / 10]
1110 = ctype->wcdigits[cnt];
1111 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1112 break;
1114 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC):
1115 /* Align entries. */
1116 iov[2 + elem + offset].iov_base = (void *) nulbytes;
1117 iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1118 idx[elem] += iov[2 + elem + offset].iov_len;
1119 ++offset;
1120 /* FALLTRHOUGH */
1122 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT1_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC):
1123 cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC);
1124 iov[2 + elem + offset].iov_base = &ctype->wcoutdigits[cnt];
1125 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1126 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1127 break;
1129 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN):
1130 /* Align entries. */
1131 iov[2 + elem + offset].iov_base = (void *) nulbytes;
1132 iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1133 idx[elem] += iov[2 + elem + offset].iov_len;
1134 ++offset;
1136 default_missing_len = (ctype->default_missing
1137 ? wcslen ((wchar_t *)ctype->default_missing)
1138 : 0);
1139 iov[2 + elem + offset].iov_base = &default_missing_len;
1140 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1141 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1142 break;
1144 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING):
1145 iov[2 + elem + offset].iov_base =
1146 ctype->default_missing ?: (uint32_t *) L"";
1147 iov[2 + elem + offset].iov_len =
1148 wcslen (iov[2 + elem + offset].iov_base);
1149 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1150 break;
1152 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE_LEN):
1153 /* Align entries. */
1154 iov[2 + elem + offset].iov_base = (void *) nulbytes;
1155 iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1156 idx[elem] += iov[2 + elem + offset].iov_len;
1157 ++offset;
1159 iov[2 + elem + offset].iov_base = &ctype->ntranslit_ignore;
1160 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1161 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1162 break;
1164 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE):
1166 uint32_t *ranges = (uint32_t *) alloca (ctype->ntranslit_ignore
1167 * 3 * sizeof (uint32_t));
1168 struct translit_ignore_t *runp;
1170 iov[2 + elem + offset].iov_base = ranges;
1171 iov[2 + elem + offset].iov_len = (ctype->ntranslit_ignore
1172 * 3 * sizeof (uint32_t));
1174 for (runp = ctype->translit_ignore; runp != NULL;
1175 runp = runp->next)
1177 *ranges++ = runp->from;
1178 *ranges++ = runp->to;
1179 *ranges++ = runp->step;
1182 /* Remove the following line in case a new entry is added
1183 after _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN. */
1184 if (elem < nelems)
1185 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1186 break;
1188 default:
1189 assert (! "unknown CTYPE element");
1191 else
1193 /* Handle extra maps. */
1194 size_t nr = elem - _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
1195 if (nr < ctype->nr_charclass)
1197 iov[2 + elem + offset].iov_base = ctype->class_b[nr];
1198 iov[2 + elem + offset].iov_len = 256 / 32 * sizeof (uint32_t);
1199 idx[elem] += iov[2 + elem + offset].iov_len;
1200 ++offset;
1202 iov[2 + elem + offset] = ctype->class_3level[nr];
1204 else
1206 nr -= ctype->nr_charclass;
1207 assert (nr < ctype->map_collection_nr);
1208 iov[2 + elem + offset] = ctype->map_3level[nr];
1210 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1214 assert (2 + elem + offset == (nelems + 2 * ctype->nr_charclass
1215 + ctype->map_collection_nr + 4 + 2));
1217 write_locale_data (output_path, LC_CTYPE, "LC_CTYPE", 2 + elem + offset,
1218 iov);
1222 /* Local functions. */
1223 static void
1224 ctype_class_new (struct linereader *lr, struct locale_ctype_t *ctype,
1225 const char *name)
1227 size_t cnt;
1229 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
1230 if (strcmp (ctype->classnames[cnt], name) == 0)
1231 break;
1233 if (cnt < ctype->nr_charclass)
1235 lr_error (lr, _("character class `%s' already defined"), name);
1236 return;
1239 if (ctype->nr_charclass == MAX_NR_CHARCLASS)
1240 /* Exit code 2 is prescribed in P1003.2b. */
1241 WITH_CUR_LOCALE (error (2, 0, _("\
1242 implementation limit: no more than %Zd character classes allowed"),
1243 MAX_NR_CHARCLASS));
1245 ctype->classnames[ctype->nr_charclass++] = name;
1249 static void
1250 ctype_map_new (struct linereader *lr, struct locale_ctype_t *ctype,
1251 const char *name, const struct charmap_t *charmap)
1253 size_t max_chars = 0;
1254 size_t cnt;
1256 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
1258 if (strcmp (ctype->mapnames[cnt], name) == 0)
1259 break;
1261 if (max_chars < ctype->map_collection_max[cnt])
1262 max_chars = ctype->map_collection_max[cnt];
1265 if (cnt < ctype->map_collection_nr)
1267 lr_error (lr, _("character map `%s' already defined"), name);
1268 return;
1271 if (ctype->map_collection_nr == MAX_NR_CHARMAP)
1272 /* Exit code 2 is prescribed in P1003.2b. */
1273 WITH_CUR_LOCALE (error (2, 0, _("\
1274 implementation limit: no more than %d character maps allowed"),
1275 MAX_NR_CHARMAP));
1277 ctype->mapnames[cnt] = name;
1279 if (max_chars == 0)
1280 ctype->map_collection_max[cnt] = charmap->mb_cur_max == 1 ? 256 : 512;
1281 else
1282 ctype->map_collection_max[cnt] = max_chars;
1284 ctype->map_collection[cnt] = (uint32_t *)
1285 xcalloc (sizeof (uint32_t), ctype->map_collection_max[cnt]);
1286 ctype->map_collection_act[cnt] = 256;
1288 ++ctype->map_collection_nr;
1292 /* We have to be prepared that TABLE, MAX, and ACT can be NULL. This
1293 is possible if we only want to extend the name array. */
1294 static uint32_t *
1295 find_idx (struct locale_ctype_t *ctype, uint32_t **table, size_t *max,
1296 size_t *act, uint32_t idx)
1298 size_t cnt;
1300 if (idx < 256)
1301 return table == NULL ? NULL : &(*table)[idx];
1303 /* Use the charnames_idx lookup table instead of the slow search loop. */
1304 #if 1
1305 cnt = idx_table_get (&ctype->charnames_idx, idx);
1306 if (cnt == EMPTY)
1307 /* Not found. */
1308 cnt = ctype->charnames_act;
1309 #else
1310 for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
1311 if (ctype->charnames[cnt] == idx)
1312 break;
1313 #endif
1315 /* We have to distinguish two cases: the name is found or not. */
1316 if (cnt == ctype->charnames_act)
1318 /* Extend the name array. */
1319 if (ctype->charnames_act == ctype->charnames_max)
1321 ctype->charnames_max *= 2;
1322 ctype->charnames = (uint32_t *)
1323 xrealloc (ctype->charnames,
1324 sizeof (uint32_t) * ctype->charnames_max);
1326 ctype->charnames[ctype->charnames_act++] = idx;
1327 idx_table_add (&ctype->charnames_idx, idx, cnt);
1330 if (table == NULL)
1331 /* We have done everything we are asked to do. */
1332 return NULL;
1334 if (max == NULL)
1335 /* The caller does not want to extend the table. */
1336 return (cnt >= *act ? NULL : &(*table)[cnt]);
1338 if (cnt >= *act)
1340 if (cnt >= *max)
1342 size_t old_max = *max;
1344 *max *= 2;
1345 while (*max <= cnt);
1347 *table =
1348 (uint32_t *) xrealloc (*table, *max * sizeof (uint32_t));
1349 memset (&(*table)[old_max], '\0',
1350 (*max - old_max) * sizeof (uint32_t));
1353 *act = cnt + 1;
1356 return &(*table)[cnt];
1360 static int
1361 get_character (struct token *now, const struct charmap_t *charmap,
1362 struct repertoire_t *repertoire,
1363 struct charseq **seqp, uint32_t *wchp)
1365 if (now->tok == tok_bsymbol)
1367 /* This will hopefully be the normal case. */
1368 *wchp = repertoire_find_value (repertoire, now->val.str.startmb,
1369 now->val.str.lenmb);
1370 *seqp = charmap_find_value (charmap, now->val.str.startmb,
1371 now->val.str.lenmb);
1373 else if (now->tok == tok_ucs4)
1375 char utmp[10];
1377 snprintf (utmp, sizeof (utmp), "U%08X", now->val.ucs4);
1378 *seqp = charmap_find_value (charmap, utmp, 9);
1380 if (*seqp == NULL)
1381 *seqp = repertoire_find_seq (repertoire, now->val.ucs4);
1383 if (*seqp == NULL)
1385 /* Compute the value in the charmap from the UCS value. */
1386 const char *symbol = repertoire_find_symbol (repertoire,
1387 now->val.ucs4);
1389 if (symbol == NULL)
1390 *seqp = NULL;
1391 else
1392 *seqp = charmap_find_value (charmap, symbol, strlen (symbol));
1394 if (*seqp == NULL)
1396 if (repertoire != NULL)
1398 /* Insert a negative entry. */
1399 static const struct charseq negative
1400 = { .ucs4 = ILLEGAL_CHAR_VALUE };
1401 uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1402 sizeof (uint32_t));
1403 *newp = now->val.ucs4;
1405 insert_entry (&repertoire->seq_table, newp,
1406 sizeof (uint32_t), (void *) &negative);
1409 else
1410 (*seqp)->ucs4 = now->val.ucs4;
1412 else if ((*seqp)->ucs4 != now->val.ucs4)
1413 *seqp = NULL;
1415 *wchp = now->val.ucs4;
1417 else if (now->tok == tok_charcode)
1419 /* We must map from the byte code to UCS4. */
1420 *seqp = charmap_find_symbol (charmap, now->val.str.startmb,
1421 now->val.str.lenmb);
1423 if (*seqp == NULL)
1424 *wchp = ILLEGAL_CHAR_VALUE;
1425 else
1427 if ((*seqp)->ucs4 == UNINITIALIZED_CHAR_VALUE)
1428 (*seqp)->ucs4 = repertoire_find_value (repertoire, (*seqp)->name,
1429 strlen ((*seqp)->name));
1430 *wchp = (*seqp)->ucs4;
1433 else
1434 return 1;
1436 return 0;
1440 /* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and
1441 the .(2). counterparts. */
1442 static void
1443 charclass_symbolic_ellipsis (struct linereader *ldfile,
1444 struct locale_ctype_t *ctype,
1445 const struct charmap_t *charmap,
1446 struct repertoire_t *repertoire,
1447 struct token *now,
1448 const char *last_str,
1449 unsigned long int class256_bit,
1450 unsigned long int class_bit, int base,
1451 int ignore_content, int handle_digits, int step)
1453 const char *nowstr = now->val.str.startmb;
1454 char tmp[now->val.str.lenmb + 1];
1455 const char *cp;
1456 char *endp;
1457 unsigned long int from;
1458 unsigned long int to;
1460 /* We have to compute the ellipsis values using the symbolic names. */
1461 assert (last_str != NULL);
1463 if (strlen (last_str) != now->val.str.lenmb)
1465 invalid_range:
1466 lr_error (ldfile,
1467 _("`%s' and `%.*s' are no valid names for symbolic range"),
1468 last_str, (int) now->val.str.lenmb, nowstr);
1469 return;
1472 if (memcmp (last_str, nowstr, now->val.str.lenmb) == 0)
1473 /* Nothing to do, the names are the same. */
1474 return;
1476 for (cp = last_str; *cp == *(nowstr + (cp - last_str)); ++cp)
1479 errno = 0;
1480 from = strtoul (cp, &endp, base);
1481 if ((from == UINT_MAX && errno == ERANGE) || *endp != '\0')
1482 goto invalid_range;
1484 to = strtoul (nowstr + (cp - last_str), &endp, base);
1485 if ((to == UINT_MAX && errno == ERANGE)
1486 || (endp - nowstr) != now->val.str.lenmb || from >= to)
1487 goto invalid_range;
1489 /* OK, we have a range FROM - TO. Now we can create the symbolic names. */
1490 if (!ignore_content)
1492 now->val.str.startmb = tmp;
1493 while ((from += step) <= to)
1495 struct charseq *seq;
1496 uint32_t wch;
1498 sprintf (tmp, (base == 10 ? "%.*s%0*ld" : "%.*s%0*lX"),
1499 (int) (cp - last_str), last_str,
1500 (int) (now->val.str.lenmb - (cp - last_str)),
1501 from);
1503 get_character (now, charmap, repertoire, &seq, &wch);
1505 if (seq != NULL && seq->nbytes == 1)
1506 /* Yep, we can store information about this byte sequence. */
1507 ctype->class256_collection[seq->bytes[0]] |= class256_bit;
1509 if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1510 /* We have the UCS4 position. */
1511 *find_idx (ctype, &ctype->class_collection,
1512 &ctype->class_collection_max,
1513 &ctype->class_collection_act, wch) |= class_bit;
1515 if (handle_digits == 1)
1517 /* We must store the digit values. */
1518 if (ctype->mbdigits_act == ctype->mbdigits_max)
1520 ctype->mbdigits_max *= 2;
1521 ctype->mbdigits = xrealloc (ctype->mbdigits,
1522 (ctype->mbdigits_max
1523 * sizeof (char *)));
1524 ctype->wcdigits_max *= 2;
1525 ctype->wcdigits = xrealloc (ctype->wcdigits,
1526 (ctype->wcdigits_max
1527 * sizeof (uint32_t)));
1530 ctype->mbdigits[ctype->mbdigits_act++] = seq;
1531 ctype->wcdigits[ctype->wcdigits_act++] = wch;
1533 else if (handle_digits == 2)
1535 /* We must store the digit values. */
1536 if (ctype->outdigits_act >= 10)
1538 lr_error (ldfile, _("\
1539 %s: field `%s' does not contain exactly ten entries"),
1540 "LC_CTYPE", "outdigit");
1541 return;
1544 ctype->mboutdigits[ctype->outdigits_act] = seq;
1545 ctype->wcoutdigits[ctype->outdigits_act] = wch;
1546 ++ctype->outdigits_act;
1553 /* Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'. */
1554 static void
1555 charclass_ucs4_ellipsis (struct linereader *ldfile,
1556 struct locale_ctype_t *ctype,
1557 const struct charmap_t *charmap,
1558 struct repertoire_t *repertoire,
1559 struct token *now, uint32_t last_wch,
1560 unsigned long int class256_bit,
1561 unsigned long int class_bit, int ignore_content,
1562 int handle_digits, int step)
1564 if (last_wch > now->val.ucs4)
1566 lr_error (ldfile, _("\
1567 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1568 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, now->val.ucs4,
1569 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, last_wch);
1570 return;
1573 if (!ignore_content)
1574 while ((last_wch += step) <= now->val.ucs4)
1576 /* We have to find out whether there is a byte sequence corresponding
1577 to this UCS4 value. */
1578 struct charseq *seq;
1579 char utmp[10];
1581 snprintf (utmp, sizeof (utmp), "U%08X", last_wch);
1582 seq = charmap_find_value (charmap, utmp, 9);
1583 if (seq == NULL)
1585 snprintf (utmp, sizeof (utmp), "U%04X", last_wch);
1586 seq = charmap_find_value (charmap, utmp, 5);
1589 if (seq == NULL)
1590 /* Try looking in the repertoire map. */
1591 seq = repertoire_find_seq (repertoire, last_wch);
1593 /* If this is the first time we look for this sequence create a new
1594 entry. */
1595 if (seq == NULL)
1597 static const struct charseq negative
1598 = { .ucs4 = ILLEGAL_CHAR_VALUE };
1600 /* Find the symbolic name for this UCS4 value. */
1601 if (repertoire != NULL)
1603 const char *symbol = repertoire_find_symbol (repertoire,
1604 last_wch);
1605 uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1606 sizeof (uint32_t));
1607 *newp = last_wch;
1609 if (symbol != NULL)
1610 /* We have a name, now search the multibyte value. */
1611 seq = charmap_find_value (charmap, symbol, strlen (symbol));
1613 if (seq == NULL)
1614 /* We have to create a fake entry. */
1615 seq = (struct charseq *) &negative;
1616 else
1617 seq->ucs4 = last_wch;
1619 insert_entry (&repertoire->seq_table, newp, sizeof (uint32_t),
1620 seq);
1622 else
1623 /* We have to create a fake entry. */
1624 seq = (struct charseq *) &negative;
1627 /* We have a name, now search the multibyte value. */
1628 if (seq->ucs4 == last_wch && seq->nbytes == 1)
1629 /* Yep, we can store information about this byte sequence. */
1630 ctype->class256_collection[(size_t) seq->bytes[0]]
1631 |= class256_bit;
1633 /* And of course we have the UCS4 position. */
1634 if (class_bit != 0)
1635 *find_idx (ctype, &ctype->class_collection,
1636 &ctype->class_collection_max,
1637 &ctype->class_collection_act, last_wch) |= class_bit;
1639 if (handle_digits == 1)
1641 /* We must store the digit values. */
1642 if (ctype->mbdigits_act == ctype->mbdigits_max)
1644 ctype->mbdigits_max *= 2;
1645 ctype->mbdigits = xrealloc (ctype->mbdigits,
1646 (ctype->mbdigits_max
1647 * sizeof (char *)));
1648 ctype->wcdigits_max *= 2;
1649 ctype->wcdigits = xrealloc (ctype->wcdigits,
1650 (ctype->wcdigits_max
1651 * sizeof (uint32_t)));
1654 ctype->mbdigits[ctype->mbdigits_act++] = (seq->ucs4 == last_wch
1655 ? seq : NULL);
1656 ctype->wcdigits[ctype->wcdigits_act++] = last_wch;
1658 else if (handle_digits == 2)
1660 /* We must store the digit values. */
1661 if (ctype->outdigits_act >= 10)
1663 lr_error (ldfile, _("\
1664 %s: field `%s' does not contain exactly ten entries"),
1665 "LC_CTYPE", "outdigit");
1666 return;
1669 ctype->mboutdigits[ctype->outdigits_act] = (seq->ucs4 == last_wch
1670 ? seq : NULL);
1671 ctype->wcoutdigits[ctype->outdigits_act] = last_wch;
1672 ++ctype->outdigits_act;
1678 /* Ellipsis as in `/xea/x12.../xea/x34'. */
1679 static void
1680 charclass_charcode_ellipsis (struct linereader *ldfile,
1681 struct locale_ctype_t *ctype,
1682 const struct charmap_t *charmap,
1683 struct repertoire_t *repertoire,
1684 struct token *now, char *last_charcode,
1685 uint32_t last_charcode_len,
1686 unsigned long int class256_bit,
1687 unsigned long int class_bit, int ignore_content,
1688 int handle_digits)
1690 /* First check whether the to-value is larger. */
1691 if (now->val.charcode.nbytes != last_charcode_len)
1693 lr_error (ldfile, _("\
1694 start and end character sequence of range must have the same length"));
1695 return;
1698 if (memcmp (last_charcode, now->val.charcode.bytes, last_charcode_len) > 0)
1700 lr_error (ldfile, _("\
1701 to-value character sequence is smaller than from-value sequence"));
1702 return;
1705 if (!ignore_content)
1709 /* Increment the byte sequence value. */
1710 struct charseq *seq;
1711 uint32_t wch;
1712 int i;
1714 for (i = last_charcode_len - 1; i >= 0; --i)
1715 if (++last_charcode[i] != 0)
1716 break;
1718 if (last_charcode_len == 1)
1719 /* Of course we have the charcode value. */
1720 ctype->class256_collection[(size_t) last_charcode[0]]
1721 |= class256_bit;
1723 /* Find the symbolic name. */
1724 seq = charmap_find_symbol (charmap, last_charcode,
1725 last_charcode_len);
1726 if (seq != NULL)
1728 if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1729 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1730 strlen (seq->name));
1731 wch = seq == NULL ? ILLEGAL_CHAR_VALUE : seq->ucs4;
1733 if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1734 *find_idx (ctype, &ctype->class_collection,
1735 &ctype->class_collection_max,
1736 &ctype->class_collection_act, wch) |= class_bit;
1738 else
1739 wch = ILLEGAL_CHAR_VALUE;
1741 if (handle_digits == 1)
1743 /* We must store the digit values. */
1744 if (ctype->mbdigits_act == ctype->mbdigits_max)
1746 ctype->mbdigits_max *= 2;
1747 ctype->mbdigits = xrealloc (ctype->mbdigits,
1748 (ctype->mbdigits_max
1749 * sizeof (char *)));
1750 ctype->wcdigits_max *= 2;
1751 ctype->wcdigits = xrealloc (ctype->wcdigits,
1752 (ctype->wcdigits_max
1753 * sizeof (uint32_t)));
1756 seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1757 memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1758 seq->nbytes = last_charcode_len;
1760 ctype->mbdigits[ctype->mbdigits_act++] = seq;
1761 ctype->wcdigits[ctype->wcdigits_act++] = wch;
1763 else if (handle_digits == 2)
1765 struct charseq *seq;
1766 /* We must store the digit values. */
1767 if (ctype->outdigits_act >= 10)
1769 lr_error (ldfile, _("\
1770 %s: field `%s' does not contain exactly ten entries"),
1771 "LC_CTYPE", "outdigit");
1772 return;
1775 seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1776 memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1777 seq->nbytes = last_charcode_len;
1779 ctype->mboutdigits[ctype->outdigits_act] = seq;
1780 ctype->wcoutdigits[ctype->outdigits_act] = wch;
1781 ++ctype->outdigits_act;
1784 while (memcmp (last_charcode, now->val.charcode.bytes,
1785 last_charcode_len) != 0);
1790 static uint32_t *
1791 find_translit2 (struct locale_ctype_t *ctype, const struct charmap_t *charmap,
1792 uint32_t wch)
1794 struct translit_t *trunp = ctype->translit;
1795 struct translit_ignore_t *tirunp = ctype->translit_ignore;
1797 while (trunp != NULL)
1799 /* XXX We simplify things here. The transliterations we look
1800 for are only allowed to have one character. */
1801 if (trunp->from[0] == wch && trunp->from[1] == 0)
1803 /* Found it. Now look for a transliteration which can be
1804 represented with the character set. */
1805 struct translit_to_t *torunp = trunp->to;
1807 while (torunp != NULL)
1809 int i;
1811 for (i = 0; torunp->str[i] != 0; ++i)
1813 char utmp[10];
1815 snprintf (utmp, sizeof (utmp), "U%08X", torunp->str[i]);
1816 if (charmap_find_value (charmap, utmp, 9) == NULL)
1817 /* This character cannot be represented. */
1818 break;
1821 if (torunp->str[i] == 0)
1822 return torunp->str;
1824 torunp = torunp->next;
1827 break;
1830 trunp = trunp->next;
1833 /* Check for ignored chars. */
1834 while (tirunp != NULL)
1836 if (tirunp->from <= wch && tirunp->to >= wch)
1838 uint32_t wi;
1840 for (wi = tirunp->from; wi <= wch; wi += tirunp->step)
1841 if (wi == wch)
1842 return (uint32_t []) { 0 };
1846 /* Nothing found. */
1847 return NULL;
1851 uint32_t *
1852 find_translit (struct localedef_t *locale, const struct charmap_t *charmap,
1853 uint32_t wch)
1855 struct locale_ctype_t *ctype;
1856 uint32_t *result = NULL;
1858 assert (locale != NULL);
1859 ctype = locale->categories[LC_CTYPE].ctype;
1861 if (ctype->translit != NULL)
1862 result = find_translit2 (ctype, charmap, wch);
1864 if (result == NULL)
1866 struct translit_include_t *irunp = ctype->translit_include;
1868 while (irunp != NULL && result == NULL)
1870 result = find_translit (find_locale (CTYPE_LOCALE,
1871 irunp->copy_locale,
1872 irunp->copy_repertoire,
1873 charmap),
1874 charmap, wch);
1875 irunp = irunp->next;
1879 return result;
1883 /* Read one transliteration entry. */
1884 static uint32_t *
1885 read_widestring (struct linereader *ldfile, struct token *now,
1886 const struct charmap_t *charmap,
1887 struct repertoire_t *repertoire)
1889 uint32_t *wstr;
1891 if (now->tok == tok_default_missing)
1892 /* The special name "" will denote this case. */
1893 wstr = ((uint32_t *) { 0 });
1894 else if (now->tok == tok_bsymbol)
1896 /* Get the value from the repertoire. */
1897 wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1898 wstr[0] = repertoire_find_value (repertoire, now->val.str.startmb,
1899 now->val.str.lenmb);
1900 if (wstr[0] == ILLEGAL_CHAR_VALUE)
1902 /* We cannot proceed, we don't know the UCS4 value. */
1903 free (wstr);
1904 return NULL;
1907 wstr[1] = 0;
1909 else if (now->tok == tok_ucs4)
1911 wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1912 wstr[0] = now->val.ucs4;
1913 wstr[1] = 0;
1915 else if (now->tok == tok_charcode)
1917 /* Argh, we have to convert to the symbol name first and then to the
1918 UCS4 value. */
1919 struct charseq *seq = charmap_find_symbol (charmap,
1920 now->val.str.startmb,
1921 now->val.str.lenmb);
1922 if (seq == NULL)
1923 /* Cannot find the UCS4 value. */
1924 return NULL;
1926 if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1927 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1928 strlen (seq->name));
1929 if (seq->ucs4 == ILLEGAL_CHAR_VALUE)
1930 /* We cannot proceed, we don't know the UCS4 value. */
1931 return NULL;
1933 wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1934 wstr[0] = seq->ucs4;
1935 wstr[1] = 0;
1937 else if (now->tok == tok_string)
1939 wstr = now->val.str.startwc;
1940 if (wstr == NULL || wstr[0] == 0)
1941 return NULL;
1943 else
1945 if (now->tok != tok_eol && now->tok != tok_eof)
1946 lr_ignore_rest (ldfile, 0);
1947 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
1948 return (uint32_t *) -1l;
1951 return wstr;
1955 static void
1956 read_translit_entry (struct linereader *ldfile, struct locale_ctype_t *ctype,
1957 struct token *now, const struct charmap_t *charmap,
1958 struct repertoire_t *repertoire)
1960 uint32_t *from_wstr = read_widestring (ldfile, now, charmap, repertoire);
1961 struct translit_t *result;
1962 struct translit_to_t **top;
1963 struct obstack *ob = &ctype->mempool;
1964 int first;
1965 int ignore;
1967 if (from_wstr == NULL)
1968 /* There is no valid from string. */
1969 return;
1971 result = (struct translit_t *) obstack_alloc (ob,
1972 sizeof (struct translit_t));
1973 result->from = from_wstr;
1974 result->fname = ldfile->fname;
1975 result->lineno = ldfile->lineno;
1976 result->next = NULL;
1977 result->to = NULL;
1978 top = &result->to;
1979 first = 1;
1980 ignore = 0;
1982 while (1)
1984 uint32_t *to_wstr;
1986 /* Next we have one or more transliterations. They are
1987 separated by semicolons. */
1988 now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
1990 if (!first && (now->tok == tok_semicolon || now->tok == tok_eol))
1992 /* One string read. */
1993 const uint32_t zero = 0;
1995 if (!ignore)
1997 obstack_grow (ob, &zero, 4);
1998 to_wstr = obstack_finish (ob);
2000 *top = obstack_alloc (ob, sizeof (struct translit_to_t));
2001 (*top)->str = to_wstr;
2002 (*top)->next = NULL;
2005 if (now->tok == tok_eol)
2007 result->next = ctype->translit;
2008 ctype->translit = result;
2009 return;
2012 if (!ignore)
2013 top = &(*top)->next;
2014 ignore = 0;
2016 else
2018 to_wstr = read_widestring (ldfile, now, charmap, repertoire);
2019 if (to_wstr == (uint32_t *) -1l)
2021 /* An error occurred. */
2022 obstack_free (ob, result);
2023 return;
2026 if (to_wstr == NULL)
2027 ignore = 1;
2028 else
2029 /* This value is usable. */
2030 obstack_grow (ob, to_wstr, wcslen ((wchar_t *) to_wstr) * 4);
2032 first = 0;
2038 static void
2039 read_translit_ignore_entry (struct linereader *ldfile,
2040 struct locale_ctype_t *ctype,
2041 const struct charmap_t *charmap,
2042 struct repertoire_t *repertoire)
2044 /* We expect a semicolon-separated list of characters we ignore. We are
2045 only interested in the wide character definitions. These must be
2046 single characters, possibly defining a range when an ellipsis is used. */
2047 while (1)
2049 struct token *now = lr_token (ldfile, charmap, NULL, repertoire,
2050 verbose);
2051 struct translit_ignore_t *newp;
2052 uint32_t from;
2054 if (now->tok == tok_eol || now->tok == tok_eof)
2056 lr_error (ldfile,
2057 _("premature end of `translit_ignore' definition"));
2058 return;
2061 if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
2063 lr_error (ldfile, _("syntax error"));
2064 lr_ignore_rest (ldfile, 0);
2065 return;
2068 if (now->tok == tok_ucs4)
2069 from = now->val.ucs4;
2070 else
2071 /* Try to get the value. */
2072 from = repertoire_find_value (repertoire, now->val.str.startmb,
2073 now->val.str.lenmb);
2075 if (from == ILLEGAL_CHAR_VALUE)
2077 lr_error (ldfile, "invalid character name");
2078 newp = NULL;
2080 else
2082 newp = (struct translit_ignore_t *)
2083 obstack_alloc (&ctype->mempool, sizeof (struct translit_ignore_t));
2084 newp->from = from;
2085 newp->to = from;
2086 newp->step = 1;
2088 newp->next = ctype->translit_ignore;
2089 ctype->translit_ignore = newp;
2092 /* Now we expect either a semicolon, an ellipsis, or the end of the
2093 line. */
2094 now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2096 if (now->tok == tok_ellipsis2 || now->tok == tok_ellipsis2_2)
2098 /* XXX Should we bother implementing `....'? `...' certainly
2099 will not be implemented. */
2100 uint32_t to;
2101 int step = now->tok == tok_ellipsis2_2 ? 2 : 1;
2103 now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2105 if (now->tok == tok_eol || now->tok == tok_eof)
2107 lr_error (ldfile,
2108 _("premature end of `translit_ignore' definition"));
2109 return;
2112 if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
2114 lr_error (ldfile, _("syntax error"));
2115 lr_ignore_rest (ldfile, 0);
2116 return;
2119 if (now->tok == tok_ucs4)
2120 to = now->val.ucs4;
2121 else
2122 /* Try to get the value. */
2123 to = repertoire_find_value (repertoire, now->val.str.startmb,
2124 now->val.str.lenmb);
2126 if (to == ILLEGAL_CHAR_VALUE)
2127 lr_error (ldfile, "invalid character name");
2128 else
2130 /* Make sure the `to'-value is larger. */
2131 if (to >= from)
2133 newp->to = to;
2134 newp->step = step;
2136 else
2137 lr_error (ldfile, _("\
2138 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
2139 (to | from) < 65536 ? 4 : 8, to,
2140 (to | from) < 65536 ? 4 : 8, from);
2143 /* And the next token. */
2144 now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2147 if (now->tok == tok_eol || now->tok == tok_eof)
2148 /* We are done. */
2149 return;
2151 if (now->tok == tok_semicolon)
2152 /* Next round. */
2153 continue;
2155 /* If we come here something is wrong. */
2156 lr_error (ldfile, _("syntax error"));
2157 lr_ignore_rest (ldfile, 0);
2158 return;
2163 /* The parser for the LC_CTYPE section of the locale definition. */
2164 void
2165 ctype_read (struct linereader *ldfile, struct localedef_t *result,
2166 const struct charmap_t *charmap, const char *repertoire_name,
2167 int ignore_content)
2169 struct repertoire_t *repertoire = NULL;
2170 struct locale_ctype_t *ctype;
2171 struct token *now;
2172 enum token_t nowtok;
2173 size_t cnt;
2174 struct charseq *last_seq;
2175 uint32_t last_wch = 0;
2176 enum token_t last_token;
2177 enum token_t ellipsis_token;
2178 int step;
2179 char last_charcode[16];
2180 size_t last_charcode_len = 0;
2181 const char *last_str = NULL;
2182 int mapidx;
2183 struct localedef_t *copy_locale = NULL;
2185 /* Get the repertoire we have to use. */
2186 if (repertoire_name != NULL)
2187 repertoire = repertoire_read (repertoire_name);
2189 /* The rest of the line containing `LC_CTYPE' must be free. */
2190 lr_ignore_rest (ldfile, 1);
2195 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2196 nowtok = now->tok;
2198 while (nowtok == tok_eol);
2200 /* If we see `copy' now we are almost done. */
2201 if (nowtok == tok_copy)
2203 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2204 if (now->tok != tok_string)
2206 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2208 skip_category:
2210 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2211 while (now->tok != tok_eof && now->tok != tok_end);
2213 if (now->tok != tok_eof
2214 || (now = lr_token (ldfile, charmap, NULL, NULL, verbose),
2215 now->tok == tok_eof))
2216 lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
2217 else if (now->tok != tok_lc_ctype)
2219 lr_error (ldfile, _("\
2220 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2221 lr_ignore_rest (ldfile, 0);
2223 else
2224 lr_ignore_rest (ldfile, 1);
2226 return;
2229 if (! ignore_content)
2231 /* Get the locale definition. */
2232 copy_locale = load_locale (LC_CTYPE, now->val.str.startmb,
2233 repertoire_name, charmap, NULL);
2234 if ((copy_locale->avail & CTYPE_LOCALE) == 0)
2236 /* Not yet loaded. So do it now. */
2237 if (locfile_read (copy_locale, charmap) != 0)
2238 goto skip_category;
2242 lr_ignore_rest (ldfile, 1);
2244 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2245 nowtok = now->tok;
2248 /* Prepare the data structures. */
2249 ctype_startup (ldfile, result, charmap, copy_locale, ignore_content);
2250 ctype = result->categories[LC_CTYPE].ctype;
2252 /* Remember the repertoire we use. */
2253 if (!ignore_content)
2254 ctype->repertoire = repertoire;
2256 while (1)
2258 unsigned long int class_bit = 0;
2259 unsigned long int class256_bit = 0;
2260 int handle_digits = 0;
2262 /* Of course we don't proceed beyond the end of file. */
2263 if (nowtok == tok_eof)
2264 break;
2266 /* Ingore empty lines. */
2267 if (nowtok == tok_eol)
2269 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2270 nowtok = now->tok;
2271 continue;
2274 switch (nowtok)
2276 case tok_charclass:
2277 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2278 while (now->tok == tok_ident || now->tok == tok_string)
2280 ctype_class_new (ldfile, ctype, now->val.str.startmb);
2281 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2282 if (now->tok != tok_semicolon)
2283 break;
2284 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2286 if (now->tok != tok_eol)
2287 SYNTAX_ERROR (_("\
2288 %s: syntax error in definition of new character class"), "LC_CTYPE");
2289 break;
2291 case tok_charconv:
2292 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2293 while (now->tok == tok_ident || now->tok == tok_string)
2295 ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2296 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2297 if (now->tok != tok_semicolon)
2298 break;
2299 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2301 if (now->tok != tok_eol)
2302 SYNTAX_ERROR (_("\
2303 %s: syntax error in definition of new character map"), "LC_CTYPE");
2304 break;
2306 case tok_class:
2307 /* Ignore the rest of the line if we don't need the input of
2308 this line. */
2309 if (ignore_content)
2311 lr_ignore_rest (ldfile, 0);
2312 break;
2315 /* We simply forget the `class' keyword and use the following
2316 operand to determine the bit. */
2317 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2318 if (now->tok == tok_ident || now->tok == tok_string)
2320 /* Must can be one of the predefined class names. */
2321 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2322 if (strcmp (ctype->classnames[cnt], now->val.str.startmb) == 0)
2323 break;
2324 if (cnt >= ctype->nr_charclass)
2326 #ifdef PREDEFINED_CLASSES
2327 if (now->val.str.lenmb == 8
2328 && memcmp ("special1", now->val.str.startmb, 8) == 0)
2329 class_bit = _ISwspecial1;
2330 else if (now->val.str.lenmb == 8
2331 && memcmp ("special2", now->val.str.startmb, 8) == 0)
2332 class_bit = _ISwspecial2;
2333 else if (now->val.str.lenmb == 8
2334 && memcmp ("special3", now->val.str.startmb, 8) == 0)
2335 class_bit = _ISwspecial3;
2336 else
2337 #endif
2339 /* OK, it's a new class. */
2340 ctype_class_new (ldfile, ctype, now->val.str.startmb);
2342 class_bit = _ISwbit (ctype->nr_charclass - 1);
2345 else
2347 class_bit = _ISwbit (cnt);
2349 free (now->val.str.startmb);
2352 else if (now->tok == tok_digit)
2353 goto handle_tok_digit;
2354 else if (now->tok < tok_upper || now->tok > tok_blank)
2355 goto err_label;
2356 else
2358 class_bit = BITw (now->tok);
2359 class256_bit = BIT (now->tok);
2362 /* The next character must be a semicolon. */
2363 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2364 if (now->tok != tok_semicolon)
2365 goto err_label;
2366 goto read_charclass;
2368 case tok_upper:
2369 case tok_lower:
2370 case tok_alpha:
2371 case tok_alnum:
2372 case tok_space:
2373 case tok_cntrl:
2374 case tok_punct:
2375 case tok_graph:
2376 case tok_print:
2377 case tok_xdigit:
2378 case tok_blank:
2379 /* Ignore the rest of the line if we don't need the input of
2380 this line. */
2381 if (ignore_content)
2383 lr_ignore_rest (ldfile, 0);
2384 break;
2387 class_bit = BITw (now->tok);
2388 class256_bit = BIT (now->tok);
2389 handle_digits = 0;
2390 read_charclass:
2391 ctype->class_done |= class_bit;
2392 last_token = tok_none;
2393 ellipsis_token = tok_none;
2394 step = 1;
2395 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2396 while (now->tok != tok_eol && now->tok != tok_eof)
2398 uint32_t wch;
2399 struct charseq *seq;
2401 if (ellipsis_token == tok_none)
2403 if (get_character (now, charmap, repertoire, &seq, &wch))
2404 goto err_label;
2406 if (!ignore_content && seq != NULL && seq->nbytes == 1)
2407 /* Yep, we can store information about this byte
2408 sequence. */
2409 ctype->class256_collection[seq->bytes[0]] |= class256_bit;
2411 if (!ignore_content && wch != ILLEGAL_CHAR_VALUE
2412 && class_bit != 0)
2413 /* We have the UCS4 position. */
2414 *find_idx (ctype, &ctype->class_collection,
2415 &ctype->class_collection_max,
2416 &ctype->class_collection_act, wch) |= class_bit;
2418 last_token = now->tok;
2419 /* Terminate the string. */
2420 if (last_token == tok_bsymbol)
2422 now->val.str.startmb[now->val.str.lenmb] = '\0';
2423 last_str = now->val.str.startmb;
2425 else
2426 last_str = NULL;
2427 last_seq = seq;
2428 last_wch = wch;
2429 memcpy (last_charcode, now->val.charcode.bytes, 16);
2430 last_charcode_len = now->val.charcode.nbytes;
2432 if (!ignore_content && handle_digits == 1)
2434 /* We must store the digit values. */
2435 if (ctype->mbdigits_act == ctype->mbdigits_max)
2437 ctype->mbdigits_max += 10;
2438 ctype->mbdigits = xrealloc (ctype->mbdigits,
2439 (ctype->mbdigits_max
2440 * sizeof (char *)));
2441 ctype->wcdigits_max += 10;
2442 ctype->wcdigits = xrealloc (ctype->wcdigits,
2443 (ctype->wcdigits_max
2444 * sizeof (uint32_t)));
2447 ctype->mbdigits[ctype->mbdigits_act++] = seq;
2448 ctype->wcdigits[ctype->wcdigits_act++] = wch;
2450 else if (!ignore_content && handle_digits == 2)
2452 /* We must store the digit values. */
2453 if (ctype->outdigits_act >= 10)
2455 lr_error (ldfile, _("\
2456 %s: field `%s' does not contain exactly ten entries"),
2457 "LC_CTYPE", "outdigit");
2458 lr_ignore_rest (ldfile, 0);
2459 break;
2462 ctype->mboutdigits[ctype->outdigits_act] = seq;
2463 ctype->wcoutdigits[ctype->outdigits_act] = wch;
2464 ++ctype->outdigits_act;
2467 else
2469 /* Now it gets complicated. We have to resolve the
2470 ellipsis problem. First we must distinguish between
2471 the different kind of ellipsis and this must match the
2472 tokens we have seen. */
2473 assert (last_token != tok_none);
2475 if (last_token != now->tok)
2477 lr_error (ldfile, _("\
2478 ellipsis range must be marked by two operands of same type"));
2479 lr_ignore_rest (ldfile, 0);
2480 break;
2483 if (last_token == tok_bsymbol)
2485 if (ellipsis_token == tok_ellipsis3)
2486 lr_error (ldfile, _("with symbolic name range values \
2487 the absolute ellipsis `...' must not be used"));
2489 charclass_symbolic_ellipsis (ldfile, ctype, charmap,
2490 repertoire, now, last_str,
2491 class256_bit, class_bit,
2492 (ellipsis_token
2493 == tok_ellipsis4
2494 ? 10 : 16),
2495 ignore_content,
2496 handle_digits, step);
2498 else if (last_token == tok_ucs4)
2500 if (ellipsis_token != tok_ellipsis2)
2501 lr_error (ldfile, _("\
2502 with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
2504 charclass_ucs4_ellipsis (ldfile, ctype, charmap,
2505 repertoire, now, last_wch,
2506 class256_bit, class_bit,
2507 ignore_content, handle_digits,
2508 step);
2510 else
2512 assert (last_token == tok_charcode);
2514 if (ellipsis_token != tok_ellipsis3)
2515 lr_error (ldfile, _("\
2516 with character code range values one must use the absolute ellipsis `...'"));
2518 charclass_charcode_ellipsis (ldfile, ctype, charmap,
2519 repertoire, now,
2520 last_charcode,
2521 last_charcode_len,
2522 class256_bit, class_bit,
2523 ignore_content,
2524 handle_digits);
2527 /* Now we have used the last value. */
2528 last_token = tok_none;
2531 /* Next we expect a semicolon or the end of the line. */
2532 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2533 if (now->tok == tok_eol || now->tok == tok_eof)
2534 break;
2536 if (last_token != tok_none
2537 && now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4_2)
2539 if (now->tok == tok_ellipsis2_2)
2541 now->tok = tok_ellipsis2;
2542 step = 2;
2544 else if (now->tok == tok_ellipsis4_2)
2546 now->tok = tok_ellipsis4;
2547 step = 2;
2550 ellipsis_token = now->tok;
2552 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2553 continue;
2556 if (now->tok != tok_semicolon)
2557 goto err_label;
2559 /* And get the next character. */
2560 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2562 ellipsis_token = tok_none;
2563 step = 1;
2565 break;
2567 case tok_digit:
2568 /* Ignore the rest of the line if we don't need the input of
2569 this line. */
2570 if (ignore_content)
2572 lr_ignore_rest (ldfile, 0);
2573 break;
2576 handle_tok_digit:
2577 class_bit = _ISwdigit;
2578 class256_bit = _ISdigit;
2579 handle_digits = 1;
2580 goto read_charclass;
2582 case tok_outdigit:
2583 /* Ignore the rest of the line if we don't need the input of
2584 this line. */
2585 if (ignore_content)
2587 lr_ignore_rest (ldfile, 0);
2588 break;
2591 if (ctype->outdigits_act != 0)
2592 lr_error (ldfile, _("\
2593 %s: field `%s' declared more than once"),
2594 "LC_CTYPE", "outdigit");
2595 class_bit = 0;
2596 class256_bit = 0;
2597 handle_digits = 2;
2598 goto read_charclass;
2600 case tok_toupper:
2601 /* Ignore the rest of the line if we don't need the input of
2602 this line. */
2603 if (ignore_content)
2605 lr_ignore_rest (ldfile, 0);
2606 break;
2609 mapidx = 0;
2610 goto read_mapping;
2612 case tok_tolower:
2613 /* Ignore the rest of the line if we don't need the input of
2614 this line. */
2615 if (ignore_content)
2617 lr_ignore_rest (ldfile, 0);
2618 break;
2621 mapidx = 1;
2622 goto read_mapping;
2624 case tok_map:
2625 /* Ignore the rest of the line if we don't need the input of
2626 this line. */
2627 if (ignore_content)
2629 lr_ignore_rest (ldfile, 0);
2630 break;
2633 /* We simply forget the `map' keyword and use the following
2634 operand to determine the mapping. */
2635 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2636 if (now->tok == tok_ident || now->tok == tok_string)
2638 size_t cnt;
2640 for (cnt = 2; cnt < ctype->map_collection_nr; ++cnt)
2641 if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2642 break;
2644 if (cnt < ctype->map_collection_nr)
2645 free (now->val.str.startmb);
2646 else
2647 /* OK, it's a new map. */
2648 ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2650 mapidx = cnt;
2652 else if (now->tok < tok_toupper || now->tok > tok_tolower)
2653 goto err_label;
2654 else
2655 mapidx = now->tok - tok_toupper;
2657 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2658 /* This better should be a semicolon. */
2659 if (now->tok != tok_semicolon)
2660 goto err_label;
2662 read_mapping:
2663 /* Test whether this mapping was already defined. */
2664 if (ctype->tomap_done[mapidx])
2666 lr_error (ldfile, _("duplicated definition for mapping `%s'"),
2667 ctype->mapnames[mapidx]);
2668 lr_ignore_rest (ldfile, 0);
2669 break;
2671 ctype->tomap_done[mapidx] = 1;
2673 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2674 while (now->tok != tok_eol && now->tok != tok_eof)
2676 struct charseq *from_seq;
2677 uint32_t from_wch;
2678 struct charseq *to_seq;
2679 uint32_t to_wch;
2681 /* Every pair starts with an opening brace. */
2682 if (now->tok != tok_open_brace)
2683 goto err_label;
2685 /* Next comes the from-value. */
2686 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2687 if (get_character (now, charmap, repertoire, &from_seq,
2688 &from_wch) != 0)
2689 goto err_label;
2691 /* The next is a comma. */
2692 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2693 if (now->tok != tok_comma)
2694 goto err_label;
2696 /* And the other value. */
2697 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2698 if (get_character (now, charmap, repertoire, &to_seq,
2699 &to_wch) != 0)
2700 goto err_label;
2702 /* And the last thing is the closing brace. */
2703 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2704 if (now->tok != tok_close_brace)
2705 goto err_label;
2707 if (!ignore_content)
2709 if (mapidx < 2 && from_seq != NULL && to_seq != NULL
2710 && from_seq->nbytes == 1 && to_seq->nbytes == 1)
2711 /* We can use this value. */
2712 ctype->map256_collection[mapidx][from_seq->bytes[0]]
2713 = to_seq->bytes[0];
2715 if (from_wch != ILLEGAL_CHAR_VALUE
2716 && to_wch != ILLEGAL_CHAR_VALUE)
2717 /* Both correct values. */
2718 *find_idx (ctype, &ctype->map_collection[mapidx],
2719 &ctype->map_collection_max[mapidx],
2720 &ctype->map_collection_act[mapidx],
2721 from_wch) = to_wch;
2724 /* Now comes a semicolon or the end of the line/file. */
2725 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2726 if (now->tok == tok_semicolon)
2727 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2729 break;
2731 case tok_translit_start:
2732 /* Ignore the entire translit section with its peculiar syntax
2733 if we don't need the input. */
2734 if (ignore_content)
2738 lr_ignore_rest (ldfile, 0);
2739 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2741 while (now->tok != tok_translit_end && now->tok != tok_eof);
2743 if (now->tok == tok_eof)
2744 lr_error (ldfile, _(\
2745 "%s: `translit_start' section does not end with `translit_end'"),
2746 "LC_CTYPE");
2748 break;
2751 /* The rest of the line better should be empty. */
2752 lr_ignore_rest (ldfile, 1);
2754 /* We count here the number of allocated entries in the `translit'
2755 array. */
2756 cnt = 0;
2758 ldfile->translate_strings = 1;
2759 ldfile->return_widestr = 1;
2761 /* We proceed until we see the `translit_end' token. */
2762 while (now = lr_token (ldfile, charmap, NULL, repertoire, verbose),
2763 now->tok != tok_translit_end && now->tok != tok_eof)
2765 if (now->tok == tok_eol)
2766 /* Ignore empty lines. */
2767 continue;
2769 if (now->tok == tok_include)
2771 /* We have to include locale. */
2772 const char *locale_name;
2773 const char *repertoire_name;
2774 struct translit_include_t *include_stmt, **include_ptr;
2776 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2777 /* This should be a string or an identifier. In any
2778 case something to name a locale. */
2779 if (now->tok != tok_string && now->tok != tok_ident)
2781 translit_syntax:
2782 lr_error (ldfile, _("%s: syntax error"), "LC_CTYPE");
2783 lr_ignore_rest (ldfile, 0);
2784 continue;
2786 locale_name = now->val.str.startmb;
2788 /* Next should be a semicolon. */
2789 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2790 if (now->tok != tok_semicolon)
2791 goto translit_syntax;
2793 /* Now the repertoire name. */
2794 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2795 if ((now->tok != tok_string && now->tok != tok_ident)
2796 || now->val.str.startmb == NULL)
2797 goto translit_syntax;
2798 repertoire_name = now->val.str.startmb;
2799 if (repertoire_name[0] == '\0')
2800 /* Ignore the empty string. */
2801 repertoire_name = NULL;
2803 /* Save the include statement for later processing. */
2804 include_stmt = (struct translit_include_t *)
2805 xmalloc (sizeof (struct translit_include_t));
2806 include_stmt->copy_locale = locale_name;
2807 include_stmt->copy_repertoire = repertoire_name;
2808 include_stmt->next = NULL;
2810 include_ptr = &ctype->translit_include;
2811 while (*include_ptr != NULL)
2812 include_ptr = &(*include_ptr)->next;
2813 *include_ptr = include_stmt;
2815 /* The rest of the line must be empty. */
2816 lr_ignore_rest (ldfile, 1);
2818 /* Make sure the locale is read. */
2819 add_to_readlist (LC_CTYPE, locale_name, repertoire_name,
2820 1, NULL);
2821 continue;
2823 else if (now->tok == tok_default_missing)
2825 uint32_t *wstr;
2827 while (1)
2829 /* We expect a single character or string as the
2830 argument. */
2831 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2832 wstr = read_widestring (ldfile, now, charmap,
2833 repertoire);
2835 if (wstr != NULL)
2837 if (ctype->default_missing != NULL)
2839 lr_error (ldfile, _("\
2840 %s: duplicate `default_missing' definition"), "LC_CTYPE");
2841 WITH_CUR_LOCALE (error_at_line (0, 0,
2842 ctype->default_missing_file,
2843 ctype->default_missing_lineno,
2844 _("\
2845 previous definition was here")));
2847 else
2849 ctype->default_missing = wstr;
2850 ctype->default_missing_file = ldfile->fname;
2851 ctype->default_missing_lineno = ldfile->lineno;
2853 /* We can have more entries, ignore them. */
2854 lr_ignore_rest (ldfile, 0);
2855 break;
2857 else if (wstr == (uint32_t *) -1l)
2858 /* This was an syntax error. */
2859 break;
2861 /* Maybe there is another replacement we can use. */
2862 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2863 if (now->tok == tok_eol || now->tok == tok_eof)
2865 /* Nothing found. We tell the user. */
2866 lr_error (ldfile, _("\
2867 %s: no representable `default_missing' definition found"), "LC_CTYPE");
2868 break;
2870 if (now->tok != tok_semicolon)
2871 goto translit_syntax;
2874 continue;
2876 else if (now->tok == tok_translit_ignore)
2878 read_translit_ignore_entry (ldfile, ctype, charmap,
2879 repertoire);
2880 continue;
2883 read_translit_entry (ldfile, ctype, now, charmap, repertoire);
2885 ldfile->return_widestr = 0;
2887 if (now->tok == tok_eof)
2888 lr_error (ldfile, _(\
2889 "%s: `translit_start' section does not end with `translit_end'"),
2890 "LC_CTYPE");
2892 break;
2894 case tok_ident:
2895 /* Ignore the rest of the line if we don't need the input of
2896 this line. */
2897 if (ignore_content)
2899 lr_ignore_rest (ldfile, 0);
2900 break;
2903 /* This could mean one of several things. First test whether
2904 it's a character class name. */
2905 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2906 if (strcmp (now->val.str.startmb, ctype->classnames[cnt]) == 0)
2907 break;
2908 if (cnt < ctype->nr_charclass)
2910 class_bit = _ISwbit (cnt);
2911 class256_bit = cnt <= 11 ? _ISbit (cnt) : 0;
2912 free (now->val.str.startmb);
2913 goto read_charclass;
2915 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
2916 if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2917 break;
2918 if (cnt < ctype->map_collection_nr)
2920 mapidx = cnt;
2921 free (now->val.str.startmb);
2922 goto read_mapping;
2924 #ifdef PREDEFINED_CLASSES
2925 if (strcmp (now->val.str.startmb, "special1") == 0)
2927 class_bit = _ISwspecial1;
2928 free (now->val.str.startmb);
2929 goto read_charclass;
2931 if (strcmp (now->val.str.startmb, "special2") == 0)
2933 class_bit = _ISwspecial2;
2934 free (now->val.str.startmb);
2935 goto read_charclass;
2937 if (strcmp (now->val.str.startmb, "special3") == 0)
2939 class_bit = _ISwspecial3;
2940 free (now->val.str.startmb);
2941 goto read_charclass;
2943 if (strcmp (now->val.str.startmb, "tosymmetric") == 0)
2945 mapidx = 2;
2946 goto read_mapping;
2948 #endif
2949 break;
2951 case tok_end:
2952 /* Next we assume `LC_CTYPE'. */
2953 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2954 if (now->tok == tok_eof)
2955 break;
2956 if (now->tok == tok_eol)
2957 lr_error (ldfile, _("%s: incomplete `END' line"),
2958 "LC_CTYPE");
2959 else if (now->tok != tok_lc_ctype)
2960 lr_error (ldfile, _("\
2961 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2962 lr_ignore_rest (ldfile, now->tok == tok_lc_ctype);
2963 return;
2965 default:
2966 err_label:
2967 if (now->tok != tok_eof)
2968 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2971 /* Prepare for the next round. */
2972 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2973 nowtok = now->tok;
2976 /* When we come here we reached the end of the file. */
2977 lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
2981 static void
2982 set_class_defaults (struct locale_ctype_t *ctype,
2983 const struct charmap_t *charmap,
2984 struct repertoire_t *repertoire)
2986 size_t cnt;
2988 /* These function defines the default values for the classes and conversions
2989 according to POSIX.2 2.5.2.1.
2990 It may seem that the order of these if-blocks is arbitrary but it is NOT.
2991 Don't move them unless you know what you do! */
2993 auto void set_default (int bitpos, int from, int to);
2995 void set_default (int bitpos, int from, int to)
2997 char tmp[2];
2998 int ch;
2999 int bit = _ISbit (bitpos);
3000 int bitw = _ISwbit (bitpos);
3001 /* Define string. */
3002 strcpy (tmp, "?");
3004 for (ch = from; ch <= to; ++ch)
3006 struct charseq *seq;
3007 tmp[0] = ch;
3009 seq = charmap_find_value (charmap, tmp, 1);
3010 if (seq == NULL)
3012 char buf[10];
3013 sprintf (buf, "U%08X", ch);
3014 seq = charmap_find_value (charmap, buf, 9);
3016 if (seq == NULL)
3018 if (!be_quiet)
3019 WITH_CUR_LOCALE (error (0, 0, _("\
3020 %s: character `%s' not defined in charmap while needed as default value"),
3021 "LC_CTYPE", tmp));
3023 else if (seq->nbytes != 1)
3024 WITH_CUR_LOCALE (error (0, 0, _("\
3025 %s: character `%s' in charmap not representable with one byte"),
3026 "LC_CTYPE", tmp));
3027 else
3028 ctype->class256_collection[seq->bytes[0]] |= bit;
3030 /* No need to search here, the ASCII value is also the Unicode
3031 value. */
3032 ELEM (ctype, class_collection, , ch) |= bitw;
3036 /* Set default values if keyword was not present. */
3037 if ((ctype->class_done & BITw (tok_upper)) == 0)
3038 /* "If this keyword [lower] is not specified, the lowercase letters
3039 `A' through `Z', ..., shall automatically belong to this class,
3040 with implementation defined character values." [P1003.2, 2.5.2.1] */
3041 set_default (BITPOS (tok_upper), 'A', 'Z');
3043 if ((ctype->class_done & BITw (tok_lower)) == 0)
3044 /* "If this keyword [lower] is not specified, the lowercase letters
3045 `a' through `z', ..., shall automatically belong to this class,
3046 with implementation defined character values." [P1003.2, 2.5.2.1] */
3047 set_default (BITPOS (tok_lower), 'a', 'z');
3049 if ((ctype->class_done & BITw (tok_alpha)) == 0)
3051 /* Table 2-6 in P1003.2 says that characters in class `upper' or
3052 class `lower' *must* be in class `alpha'. */
3053 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
3054 unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower);
3056 for (cnt = 0; cnt < 256; ++cnt)
3057 if ((ctype->class256_collection[cnt] & mask) != 0)
3058 ctype->class256_collection[cnt] |= BIT (tok_alpha);
3060 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3061 if ((ctype->class_collection[cnt] & maskw) != 0)
3062 ctype->class_collection[cnt] |= BITw (tok_alpha);
3065 if ((ctype->class_done & BITw (tok_digit)) == 0)
3066 /* "If this keyword [digit] is not specified, the digits `0' through
3067 `9', ..., shall automatically belong to this class, with
3068 implementation-defined character values." [P1003.2, 2.5.2.1] */
3069 set_default (BITPOS (tok_digit), '0', '9');
3071 /* "Only characters specified for the `alpha' and `digit' keyword
3072 shall be specified. Characters specified for the keyword `alpha'
3073 and `digit' are automatically included in this class. */
3075 unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
3076 unsigned long int maskw = BITw (tok_alpha) | BITw (tok_digit);
3078 for (cnt = 0; cnt < 256; ++cnt)
3079 if ((ctype->class256_collection[cnt] & mask) != 0)
3080 ctype->class256_collection[cnt] |= BIT (tok_alnum);
3082 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3083 if ((ctype->class_collection[cnt] & maskw) != 0)
3084 ctype->class_collection[cnt] |= BITw (tok_alnum);
3087 if ((ctype->class_done & BITw (tok_space)) == 0)
3088 /* "If this keyword [space] is not specified, the characters <space>,
3089 <form-feed>, <newline>, <carriage-return>, <tab>, and
3090 <vertical-tab>, ..., shall automatically belong to this class,
3091 with implementation-defined character values." [P1003.2, 2.5.2.1] */
3093 struct charseq *seq;
3095 seq = charmap_find_value (charmap, "space", 5);
3096 if (seq == NULL)
3097 seq = charmap_find_value (charmap, "SP", 2);
3098 if (seq == NULL)
3099 seq = charmap_find_value (charmap, "U00000020", 9);
3100 if (seq == NULL)
3102 if (!be_quiet)
3103 WITH_CUR_LOCALE (error (0, 0, _("\
3104 %s: character `%s' not defined while needed as default value"),
3105 "LC_CTYPE", "<space>"));
3107 else if (seq->nbytes != 1)
3108 WITH_CUR_LOCALE (error (0, 0, _("\
3109 %s: character `%s' in charmap not representable with one byte"),
3110 "LC_CTYPE", "<space>"));
3111 else
3112 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3114 /* No need to search. */
3115 ELEM (ctype, class_collection, , L' ') |= BITw (tok_space);
3117 seq = charmap_find_value (charmap, "form-feed", 9);
3118 if (seq == NULL)
3119 seq = charmap_find_value (charmap, "U0000000C", 9);
3120 if (seq == NULL)
3122 if (!be_quiet)
3123 WITH_CUR_LOCALE (error (0, 0, _("\
3124 %s: character `%s' not defined while needed as default value"),
3125 "LC_CTYPE", "<form-feed>"));
3127 else if (seq->nbytes != 1)
3128 WITH_CUR_LOCALE (error (0, 0, _("\
3129 %s: character `%s' in charmap not representable with one byte"),
3130 "LC_CTYPE", "<form-feed>"));
3131 else
3132 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3134 /* No need to search. */
3135 ELEM (ctype, class_collection, , L'\f') |= BITw (tok_space);
3138 seq = charmap_find_value (charmap, "newline", 7);
3139 if (seq == NULL)
3140 seq = charmap_find_value (charmap, "U0000000A", 9);
3141 if (seq == NULL)
3143 if (!be_quiet)
3144 WITH_CUR_LOCALE (error (0, 0, _("\
3145 character `%s' not defined while needed as default value"),
3146 "<newline>"));
3148 else if (seq->nbytes != 1)
3149 WITH_CUR_LOCALE (error (0, 0, _("\
3150 %s: character `%s' in charmap not representable with one byte"),
3151 "LC_CTYPE", "<newline>"));
3152 else
3153 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3155 /* No need to search. */
3156 ELEM (ctype, class_collection, , L'\n') |= BITw (tok_space);
3159 seq = charmap_find_value (charmap, "carriage-return", 15);
3160 if (seq == NULL)
3161 seq = charmap_find_value (charmap, "U0000000D", 9);
3162 if (seq == NULL)
3164 if (!be_quiet)
3165 WITH_CUR_LOCALE (error (0, 0, _("\
3166 %s: character `%s' not defined while needed as default value"),
3167 "LC_CTYPE", "<carriage-return>"));
3169 else if (seq->nbytes != 1)
3170 WITH_CUR_LOCALE (error (0, 0, _("\
3171 %s: character `%s' in charmap not representable with one byte"),
3172 "LC_CTYPE", "<carriage-return>"));
3173 else
3174 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3176 /* No need to search. */
3177 ELEM (ctype, class_collection, , L'\r') |= BITw (tok_space);
3180 seq = charmap_find_value (charmap, "tab", 3);
3181 if (seq == NULL)
3182 seq = charmap_find_value (charmap, "U00000009", 9);
3183 if (seq == NULL)
3185 if (!be_quiet)
3186 WITH_CUR_LOCALE (error (0, 0, _("\
3187 %s: character `%s' not defined while needed as default value"),
3188 "LC_CTYPE", "<tab>"));
3190 else if (seq->nbytes != 1)
3191 WITH_CUR_LOCALE (error (0, 0, _("\
3192 %s: character `%s' in charmap not representable with one byte"),
3193 "LC_CTYPE", "<tab>"));
3194 else
3195 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3197 /* No need to search. */
3198 ELEM (ctype, class_collection, , L'\t') |= BITw (tok_space);
3201 seq = charmap_find_value (charmap, "vertical-tab", 12);
3202 if (seq == NULL)
3203 seq = charmap_find_value (charmap, "U0000000B", 9);
3204 if (seq == NULL)
3206 if (!be_quiet)
3207 WITH_CUR_LOCALE (error (0, 0, _("\
3208 %s: character `%s' not defined while needed as default value"),
3209 "LC_CTYPE", "<vertical-tab>"));
3211 else if (seq->nbytes != 1)
3212 WITH_CUR_LOCALE (error (0, 0, _("\
3213 %s: character `%s' in charmap not representable with one byte"),
3214 "LC_CTYPE", "<vertical-tab>"));
3215 else
3216 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3218 /* No need to search. */
3219 ELEM (ctype, class_collection, , L'\v') |= BITw (tok_space);
3222 if ((ctype->class_done & BITw (tok_xdigit)) == 0)
3223 /* "If this keyword is not specified, the digits `0' to `9', the
3224 uppercase letters `A' through `F', and the lowercase letters `a'
3225 through `f', ..., shell automatically belong to this class, with
3226 implementation defined character values." [P1003.2, 2.5.2.1] */
3228 set_default (BITPOS (tok_xdigit), '0', '9');
3229 set_default (BITPOS (tok_xdigit), 'A', 'F');
3230 set_default (BITPOS (tok_xdigit), 'a', 'f');
3233 if ((ctype->class_done & BITw (tok_blank)) == 0)
3234 /* "If this keyword [blank] is unspecified, the characters <space> and
3235 <tab> shall belong to this character class." [P1003.2, 2.5.2.1] */
3237 struct charseq *seq;
3239 seq = charmap_find_value (charmap, "space", 5);
3240 if (seq == NULL)
3241 seq = charmap_find_value (charmap, "SP", 2);
3242 if (seq == NULL)
3243 seq = charmap_find_value (charmap, "U00000020", 9);
3244 if (seq == NULL)
3246 if (!be_quiet)
3247 WITH_CUR_LOCALE (error (0, 0, _("\
3248 %s: character `%s' not defined while needed as default value"),
3249 "LC_CTYPE", "<space>"));
3251 else if (seq->nbytes != 1)
3252 WITH_CUR_LOCALE (error (0, 0, _("\
3253 %s: character `%s' in charmap not representable with one byte"),
3254 "LC_CTYPE", "<space>"));
3255 else
3256 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
3258 /* No need to search. */
3259 ELEM (ctype, class_collection, , L' ') |= BITw (tok_blank);
3262 seq = charmap_find_value (charmap, "tab", 3);
3263 if (seq == NULL)
3264 seq = charmap_find_value (charmap, "U00000009", 9);
3265 if (seq == NULL)
3267 if (!be_quiet)
3268 WITH_CUR_LOCALE (error (0, 0, _("\
3269 %s: character `%s' not defined while needed as default value"),
3270 "LC_CTYPE", "<tab>"));
3272 else if (seq->nbytes != 1)
3273 WITH_CUR_LOCALE (error (0, 0, _("\
3274 %s: character `%s' in charmap not representable with one byte"),
3275 "LC_CTYPE", "<tab>"));
3276 else
3277 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
3279 /* No need to search. */
3280 ELEM (ctype, class_collection, , L'\t') |= BITw (tok_blank);
3283 if ((ctype->class_done & BITw (tok_graph)) == 0)
3284 /* "If this keyword [graph] is not specified, characters specified for
3285 the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
3286 shall belong to this character class." [P1003.2, 2.5.2.1] */
3288 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
3289 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
3290 unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
3291 BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
3292 BITw (tok_punct);
3293 size_t cnt;
3295 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3296 if ((ctype->class_collection[cnt] & maskw) != 0)
3297 ctype->class_collection[cnt] |= BITw (tok_graph);
3299 for (cnt = 0; cnt < 256; ++cnt)
3300 if ((ctype->class256_collection[cnt] & mask) != 0)
3301 ctype->class256_collection[cnt] |= BIT (tok_graph);
3304 if ((ctype->class_done & BITw (tok_print)) == 0)
3305 /* "If this keyword [print] is not provided, characters specified for
3306 the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
3307 and the <space> character shall belong to this character class."
3308 [P1003.2, 2.5.2.1] */
3310 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
3311 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
3312 unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
3313 BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
3314 BITw (tok_punct);
3315 size_t cnt;
3316 struct charseq *seq;
3318 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3319 if ((ctype->class_collection[cnt] & maskw) != 0)
3320 ctype->class_collection[cnt] |= BITw (tok_print);
3322 for (cnt = 0; cnt < 256; ++cnt)
3323 if ((ctype->class256_collection[cnt] & mask) != 0)
3324 ctype->class256_collection[cnt] |= BIT (tok_print);
3327 seq = charmap_find_value (charmap, "space", 5);
3328 if (seq == NULL)
3329 seq = charmap_find_value (charmap, "SP", 2);
3330 if (seq == NULL)
3331 seq = charmap_find_value (charmap, "U00000020", 9);
3332 if (seq == NULL)
3334 if (!be_quiet)
3335 WITH_CUR_LOCALE (error (0, 0, _("\
3336 %s: character `%s' not defined while needed as default value"),
3337 "LC_CTYPE", "<space>"));
3339 else if (seq->nbytes != 1)
3340 WITH_CUR_LOCALE (error (0, 0, _("\
3341 %s: character `%s' in charmap not representable with one byte"),
3342 "LC_CTYPE", "<space>"));
3343 else
3344 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_print);
3346 /* No need to search. */
3347 ELEM (ctype, class_collection, , L' ') |= BITw (tok_print);
3350 if (ctype->tomap_done[0] == 0)
3351 /* "If this keyword [toupper] is not specified, the lowercase letters
3352 `a' through `z', and their corresponding uppercase letters `A' to
3353 `Z', ..., shall automatically be included, with implementation-
3354 defined character values." [P1003.2, 2.5.2.1] */
3356 char tmp[4];
3357 int ch;
3359 strcpy (tmp, "<?>");
3361 for (ch = 'a'; ch <= 'z'; ++ch)
3363 struct charseq *seq_from, *seq_to;
3365 tmp[1] = (char) ch;
3367 seq_from = charmap_find_value (charmap, &tmp[1], 1);
3368 if (seq_from == NULL)
3370 char buf[10];
3371 sprintf (buf, "U%08X", ch);
3372 seq_from = charmap_find_value (charmap, buf, 9);
3374 if (seq_from == NULL)
3376 if (!be_quiet)
3377 WITH_CUR_LOCALE (error (0, 0, _("\
3378 %s: character `%s' not defined while needed as default value"),
3379 "LC_CTYPE", tmp));
3381 else if (seq_from->nbytes != 1)
3383 if (!be_quiet)
3384 WITH_CUR_LOCALE (error (0, 0, _("\
3385 %s: character `%s' needed as default value not representable with one byte"),
3386 "LC_CTYPE", tmp));
3388 else
3390 /* This conversion is implementation defined. */
3391 tmp[1] = (char) (ch + ('A' - 'a'));
3392 seq_to = charmap_find_value (charmap, &tmp[1], 1);
3393 if (seq_to == NULL)
3395 char buf[10];
3396 sprintf (buf, "U%08X", ch + ('A' - 'a'));
3397 seq_to = charmap_find_value (charmap, buf, 9);
3399 if (seq_to == NULL)
3401 if (!be_quiet)
3402 WITH_CUR_LOCALE (error (0, 0, _("\
3403 %s: character `%s' not defined while needed as default value"),
3404 "LC_CTYPE", tmp));
3406 else if (seq_to->nbytes != 1)
3408 if (!be_quiet)
3409 WITH_CUR_LOCALE (error (0, 0, _("\
3410 %s: character `%s' needed as default value not representable with one byte"),
3411 "LC_CTYPE", tmp));
3413 else
3414 /* The index [0] is determined by the order of the
3415 `ctype_map_newP' calls in `ctype_startup'. */
3416 ctype->map256_collection[0][seq_from->bytes[0]]
3417 = seq_to->bytes[0];
3420 /* No need to search. */
3421 ELEM (ctype, map_collection, [0], ch) = ch + ('A' - 'a');
3425 if (ctype->tomap_done[1] == 0)
3426 /* "If this keyword [tolower] is not specified, the mapping shall be
3427 the reverse mapping of the one specified to `toupper'." [P1003.2] */
3429 for (cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
3430 if (ctype->map_collection[0][cnt] != 0)
3431 ELEM (ctype, map_collection, [1],
3432 ctype->map_collection[0][cnt])
3433 = ctype->charnames[cnt];
3435 for (cnt = 0; cnt < 256; ++cnt)
3436 if (ctype->map256_collection[0][cnt] != 0)
3437 ctype->map256_collection[1][ctype->map256_collection[0][cnt]] = cnt;
3440 if (ctype->outdigits_act != 10)
3442 if (ctype->outdigits_act != 0)
3443 WITH_CUR_LOCALE (error (0, 0, _("\
3444 %s: field `%s' does not contain exactly ten entries"),
3445 "LC_CTYPE", "outdigit"));
3447 for (cnt = ctype->outdigits_act; cnt < 10; ++cnt)
3449 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3450 digits + cnt, 1);
3452 if (ctype->mboutdigits[cnt] == NULL)
3453 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3454 longnames[cnt],
3455 strlen (longnames[cnt]));
3457 if (ctype->mboutdigits[cnt] == NULL)
3458 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3459 uninames[cnt], 9);
3461 if (ctype->mboutdigits[cnt] == NULL)
3463 /* Provide a replacement. */
3464 WITH_CUR_LOCALE (error (0, 0, _("\
3465 no output digits defined and none of the standard names in the charmap")));
3467 ctype->mboutdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
3468 sizeof (struct charseq)
3469 + 1);
3471 /* This is better than nothing. */
3472 ctype->mboutdigits[cnt]->bytes[0] = digits[cnt];
3473 ctype->mboutdigits[cnt]->nbytes = 1;
3476 ctype->wcoutdigits[cnt] = L'0' + cnt;
3479 ctype->outdigits_act = 10;
3484 /* Construction of sparse 3-level tables.
3485 See wchar-lookup.h for their structure and the meaning of p and q. */
3487 struct wctype_table
3489 /* Parameters. */
3490 unsigned int p;
3491 unsigned int q;
3492 /* Working representation. */
3493 size_t level1_alloc;
3494 size_t level1_size;
3495 uint32_t *level1;
3496 size_t level2_alloc;
3497 size_t level2_size;
3498 uint32_t *level2;
3499 size_t level3_alloc;
3500 size_t level3_size;
3501 uint32_t *level3;
3502 /* Compressed representation. */
3503 size_t result_size;
3504 char *result;
3507 /* Initialize. Assumes t->p and t->q have already been set. */
3508 static inline void
3509 wctype_table_init (struct wctype_table *t)
3511 t->level1 = NULL;
3512 t->level1_alloc = t->level1_size = 0;
3513 t->level2 = NULL;
3514 t->level2_alloc = t->level2_size = 0;
3515 t->level3 = NULL;
3516 t->level3_alloc = t->level3_size = 0;
3519 /* Retrieve an entry. */
3520 static inline int
3521 wctype_table_get (struct wctype_table *t, uint32_t wc)
3523 uint32_t index1 = wc >> (t->q + t->p + 5);
3524 if (index1 < t->level1_size)
3526 uint32_t lookup1 = t->level1[index1];
3527 if (lookup1 != EMPTY)
3529 uint32_t index2 = ((wc >> (t->p + 5)) & ((1 << t->q) - 1))
3530 + (lookup1 << t->q);
3531 uint32_t lookup2 = t->level2[index2];
3532 if (lookup2 != EMPTY)
3534 uint32_t index3 = ((wc >> 5) & ((1 << t->p) - 1))
3535 + (lookup2 << t->p);
3536 uint32_t lookup3 = t->level3[index3];
3537 uint32_t index4 = wc & 0x1f;
3539 return (lookup3 >> index4) & 1;
3543 return 0;
3546 /* Add one entry. */
3547 static void
3548 wctype_table_add (struct wctype_table *t, uint32_t wc)
3550 uint32_t index1 = wc >> (t->q + t->p + 5);
3551 uint32_t index2 = (wc >> (t->p + 5)) & ((1 << t->q) - 1);
3552 uint32_t index3 = (wc >> 5) & ((1 << t->p) - 1);
3553 uint32_t index4 = wc & 0x1f;
3554 size_t i, i1, i2;
3556 if (index1 >= t->level1_size)
3558 if (index1 >= t->level1_alloc)
3560 size_t alloc = 2 * t->level1_alloc;
3561 if (alloc <= index1)
3562 alloc = index1 + 1;
3563 t->level1 = (uint32_t *) xrealloc ((char *) t->level1,
3564 alloc * sizeof (uint32_t));
3565 t->level1_alloc = alloc;
3567 while (index1 >= t->level1_size)
3568 t->level1[t->level1_size++] = EMPTY;
3571 if (t->level1[index1] == EMPTY)
3573 if (t->level2_size == t->level2_alloc)
3575 size_t alloc = 2 * t->level2_alloc + 1;
3576 t->level2 = (uint32_t *) xrealloc ((char *) t->level2,
3577 (alloc << t->q) * sizeof (uint32_t));
3578 t->level2_alloc = alloc;
3580 i1 = t->level2_size << t->q;
3581 i2 = (t->level2_size + 1) << t->q;
3582 for (i = i1; i < i2; i++)
3583 t->level2[i] = EMPTY;
3584 t->level1[index1] = t->level2_size++;
3587 index2 += t->level1[index1] << t->q;
3589 if (t->level2[index2] == EMPTY)
3591 if (t->level3_size == t->level3_alloc)
3593 size_t alloc = 2 * t->level3_alloc + 1;
3594 t->level3 = (uint32_t *) xrealloc ((char *) t->level3,
3595 (alloc << t->p) * sizeof (uint32_t));
3596 t->level3_alloc = alloc;
3598 i1 = t->level3_size << t->p;
3599 i2 = (t->level3_size + 1) << t->p;
3600 for (i = i1; i < i2; i++)
3601 t->level3[i] = 0;
3602 t->level2[index2] = t->level3_size++;
3605 index3 += t->level2[index2] << t->p;
3607 t->level3[index3] |= (uint32_t)1 << index4;
3610 /* Finalize and shrink. */
3611 static void
3612 wctype_table_finalize (struct wctype_table *t)
3614 size_t i, j, k;
3615 uint32_t reorder3[t->level3_size];
3616 uint32_t reorder2[t->level2_size];
3617 uint32_t level1_offset, level2_offset, level3_offset;
3619 /* Uniquify level3 blocks. */
3620 k = 0;
3621 for (j = 0; j < t->level3_size; j++)
3623 for (i = 0; i < k; i++)
3624 if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p],
3625 (1 << t->p) * sizeof (uint32_t)) == 0)
3626 break;
3627 /* Relocate block j to block i. */
3628 reorder3[j] = i;
3629 if (i == k)
3631 if (i != j)
3632 memcpy (&t->level3[i << t->p], &t->level3[j << t->p],
3633 (1 << t->p) * sizeof (uint32_t));
3634 k++;
3637 t->level3_size = k;
3639 for (i = 0; i < (t->level2_size << t->q); i++)
3640 if (t->level2[i] != EMPTY)
3641 t->level2[i] = reorder3[t->level2[i]];
3643 /* Uniquify level2 blocks. */
3644 k = 0;
3645 for (j = 0; j < t->level2_size; j++)
3647 for (i = 0; i < k; i++)
3648 if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q],
3649 (1 << t->q) * sizeof (uint32_t)) == 0)
3650 break;
3651 /* Relocate block j to block i. */
3652 reorder2[j] = i;
3653 if (i == k)
3655 if (i != j)
3656 memcpy (&t->level2[i << t->q], &t->level2[j << t->q],
3657 (1 << t->q) * sizeof (uint32_t));
3658 k++;
3661 t->level2_size = k;
3663 for (i = 0; i < t->level1_size; i++)
3664 if (t->level1[i] != EMPTY)
3665 t->level1[i] = reorder2[t->level1[i]];
3667 /* Create and fill the resulting compressed representation. */
3668 t->result_size =
3669 5 * sizeof (uint32_t)
3670 + t->level1_size * sizeof (uint32_t)
3671 + (t->level2_size << t->q) * sizeof (uint32_t)
3672 + (t->level3_size << t->p) * sizeof (uint32_t);
3673 t->result = (char *) xmalloc (t->result_size);
3675 level1_offset =
3676 5 * sizeof (uint32_t);
3677 level2_offset =
3678 5 * sizeof (uint32_t)
3679 + t->level1_size * sizeof (uint32_t);
3680 level3_offset =
3681 5 * sizeof (uint32_t)
3682 + t->level1_size * sizeof (uint32_t)
3683 + (t->level2_size << t->q) * sizeof (uint32_t);
3685 ((uint32_t *) t->result)[0] = t->q + t->p + 5;
3686 ((uint32_t *) t->result)[1] = t->level1_size;
3687 ((uint32_t *) t->result)[2] = t->p + 5;
3688 ((uint32_t *) t->result)[3] = (1 << t->q) - 1;
3689 ((uint32_t *) t->result)[4] = (1 << t->p) - 1;
3691 for (i = 0; i < t->level1_size; i++)
3692 ((uint32_t *) (t->result + level1_offset))[i] =
3693 (t->level1[i] == EMPTY
3695 : (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset);
3697 for (i = 0; i < (t->level2_size << t->q); i++)
3698 ((uint32_t *) (t->result + level2_offset))[i] =
3699 (t->level2[i] == EMPTY
3701 : (t->level2[i] << t->p) * sizeof (uint32_t) + level3_offset);
3703 for (i = 0; i < (t->level3_size << t->p); i++)
3704 ((uint32_t *) (t->result + level3_offset))[i] = t->level3[i];
3706 if (t->level1_alloc > 0)
3707 free (t->level1);
3708 if (t->level2_alloc > 0)
3709 free (t->level2);
3710 if (t->level3_alloc > 0)
3711 free (t->level3);
3714 #define TABLE wcwidth_table
3715 #define ELEMENT uint8_t
3716 #define DEFAULT 0xff
3717 #include "3level.h"
3719 #define TABLE wctrans_table
3720 #define ELEMENT int32_t
3721 #define DEFAULT 0
3722 #define wctrans_table_add wctrans_table_add_internal
3723 #include "3level.h"
3724 #undef wctrans_table_add
3725 /* The wctrans_table must actually store the difference between the
3726 desired result and the argument. */
3727 static inline void
3728 wctrans_table_add (struct wctrans_table *t, uint32_t wc, uint32_t mapped_wc)
3730 wctrans_table_add_internal (t, wc, mapped_wc - wc);
3734 /* Flattens the included transliterations into a translit list.
3735 Inserts them in the list at `cursor', and returns the new cursor. */
3736 static struct translit_t **
3737 translit_flatten (struct locale_ctype_t *ctype,
3738 const struct charmap_t *charmap,
3739 struct translit_t **cursor)
3741 while (ctype->translit_include != NULL)
3743 const char *copy_locale = ctype->translit_include->copy_locale;
3744 const char *copy_repertoire = ctype->translit_include->copy_repertoire;
3745 struct localedef_t *other;
3747 /* Unchain the include statement. During the depth-first traversal
3748 we don't want to visit any locale more than once. */
3749 ctype->translit_include = ctype->translit_include->next;
3751 other = find_locale (LC_CTYPE, copy_locale, copy_repertoire, charmap);
3753 if (other == NULL)
3755 WITH_CUR_LOCALE (error (0, 0, _("\
3756 %s: transliteration data from locale `%s' not available"),
3757 "LC_CTYPE", copy_locale));
3759 else
3761 struct locale_ctype_t *other_ctype =
3762 other->categories[LC_CTYPE].ctype;
3764 cursor = translit_flatten (other_ctype, charmap, cursor);
3765 assert (other_ctype->translit_include == NULL);
3767 if (other_ctype->translit != NULL)
3769 /* Insert the other_ctype->translit list at *cursor. */
3770 struct translit_t *endp = other_ctype->translit;
3771 while (endp->next != NULL)
3772 endp = endp->next;
3774 endp->next = *cursor;
3775 *cursor = other_ctype->translit;
3777 /* Avoid any risk of circular lists. */
3778 other_ctype->translit = NULL;
3780 cursor = &endp->next;
3783 if (ctype->default_missing == NULL)
3784 ctype->default_missing = other_ctype->default_missing;
3788 return cursor;
3791 static void
3792 allocate_arrays (struct locale_ctype_t *ctype, const struct charmap_t *charmap,
3793 struct repertoire_t *repertoire)
3795 size_t idx, nr;
3796 const void *key;
3797 size_t len;
3798 void *vdata;
3799 void *curs;
3801 /* You wonder about this amount of memory? This is only because some
3802 users do not manage to address the array with unsigned values or
3803 data types with range >= 256. '\200' would result in the array
3804 index -128. To help these poor people we duplicate the entries for
3805 128 up to 255 below the entry for \0. */
3806 ctype->ctype_b = (char_class_t *) xcalloc (256 + 128, sizeof (char_class_t));
3807 ctype->ctype32_b = (char_class32_t *) xcalloc (256, sizeof (char_class32_t));
3808 ctype->class_b = (uint32_t **)
3809 xmalloc (ctype->nr_charclass * sizeof (uint32_t *));
3810 ctype->class_3level = (struct iovec *)
3811 xmalloc (ctype->nr_charclass * sizeof (struct iovec));
3813 /* This is the array accessed using the multibyte string elements. */
3814 for (idx = 0; idx < 256; ++idx)
3815 ctype->ctype_b[128 + idx] = ctype->class256_collection[idx];
3817 /* Mirror first 127 entries. We must take care that entry -1 is not
3818 mirrored because EOF == -1. */
3819 for (idx = 0; idx < 127; ++idx)
3820 ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
3822 /* The 32 bit array contains all characters < 0x100. */
3823 for (idx = 0; idx < ctype->class_collection_act; ++idx)
3824 if (ctype->charnames[idx] < 0x100)
3825 ctype->ctype32_b[ctype->charnames[idx]] = ctype->class_collection[idx];
3827 for (nr = 0; nr < ctype->nr_charclass; nr++)
3829 ctype->class_b[nr] = (uint32_t *) xcalloc (256 / 32, sizeof (uint32_t));
3831 for (idx = 0; idx < 256; ++idx)
3832 if (ctype->class256_collection[idx] & _ISbit (nr))
3833 ctype->class_b[nr][idx >> 5] |= (uint32_t)1 << (idx & 0x1f);
3836 for (nr = 0; nr < ctype->nr_charclass; nr++)
3838 struct wctype_table t;
3840 t.p = 4; /* or: 5 */
3841 t.q = 7; /* or: 6 */
3842 wctype_table_init (&t);
3844 for (idx = 0; idx < ctype->class_collection_act; ++idx)
3845 if (ctype->class_collection[idx] & _ISwbit (nr))
3846 wctype_table_add (&t, ctype->charnames[idx]);
3848 wctype_table_finalize (&t);
3850 if (verbose)
3851 WITH_CUR_LOCALE (fprintf (stderr, _("\
3852 %s: table for class \"%s\": %lu bytes\n"),
3853 "LC_CTYPE", ctype->classnames[nr],
3854 (unsigned long int) t.result_size));
3856 ctype->class_3level[nr].iov_base = t.result;
3857 ctype->class_3level[nr].iov_len = t.result_size;
3860 /* Room for table of mappings. */
3861 ctype->map_b = (uint32_t **) xmalloc (2 * sizeof (uint32_t *));
3862 ctype->map32_b = (uint32_t **) xmalloc (ctype->map_collection_nr
3863 * sizeof (uint32_t *));
3864 ctype->map_3level = (struct iovec *)
3865 xmalloc (ctype->map_collection_nr * sizeof (struct iovec));
3867 /* Fill in all mappings. */
3868 for (idx = 0; idx < 2; ++idx)
3870 unsigned int idx2;
3872 /* Allocate table. */
3873 ctype->map_b[idx] = (uint32_t *)
3874 xmalloc ((256 + 128) * sizeof (uint32_t));
3876 /* Copy values from collection. */
3877 for (idx2 = 0; idx2 < 256; ++idx2)
3878 ctype->map_b[idx][128 + idx2] = ctype->map256_collection[idx][idx2];
3880 /* Mirror first 127 entries. We must take care not to map entry
3881 -1 because EOF == -1. */
3882 for (idx2 = 0; idx2 < 127; ++idx2)
3883 ctype->map_b[idx][idx2] = ctype->map_b[idx][256 + idx2];
3885 /* EOF must map to EOF. */
3886 ctype->map_b[idx][127] = EOF;
3889 for (idx = 0; idx < ctype->map_collection_nr; ++idx)
3891 unsigned int idx2;
3893 /* Allocate table. */
3894 ctype->map32_b[idx] = (uint32_t *) xmalloc (256 * sizeof (uint32_t));
3896 /* Copy values from collection. Default is identity mapping. */
3897 for (idx2 = 0; idx2 < 256; ++idx2)
3898 ctype->map32_b[idx][idx2] =
3899 (ctype->map_collection[idx][idx2] != 0
3900 ? ctype->map_collection[idx][idx2]
3901 : idx2);
3904 for (nr = 0; nr < ctype->map_collection_nr; nr++)
3906 struct wctrans_table t;
3908 t.p = 7;
3909 t.q = 9;
3910 wctrans_table_init (&t);
3912 for (idx = 0; idx < ctype->map_collection_act[nr]; ++idx)
3913 if (ctype->map_collection[nr][idx] != 0)
3914 wctrans_table_add (&t, ctype->charnames[idx],
3915 ctype->map_collection[nr][idx]);
3917 wctrans_table_finalize (&t);
3919 if (verbose)
3920 WITH_CUR_LOCALE (fprintf (stderr, _("\
3921 %s: table for map \"%s\": %lu bytes\n"),
3922 "LC_CTYPE", ctype->mapnames[nr],
3923 (unsigned long int) t.result_size));
3925 ctype->map_3level[nr].iov_base = t.result;
3926 ctype->map_3level[nr].iov_len = t.result_size;
3929 /* Extra array for class and map names. */
3930 ctype->class_name_ptr = (uint32_t *) xmalloc (ctype->nr_charclass
3931 * sizeof (uint32_t));
3932 ctype->map_name_ptr = (uint32_t *) xmalloc (ctype->map_collection_nr
3933 * sizeof (uint32_t));
3935 ctype->class_offset = _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
3936 ctype->map_offset = ctype->class_offset + ctype->nr_charclass;
3938 /* Array for width information. Because the expected widths are very
3939 small (never larger than 2) we use only one single byte. This
3940 saves space.
3941 We put only printable characters in the table. wcwidth is specified
3942 to return -1 for non-printable characters. Doing the check here
3943 saves a run-time check.
3944 But we put L'\0' in the table. This again saves a run-time check. */
3946 struct wcwidth_table t;
3948 t.p = 7;
3949 t.q = 9;
3950 wcwidth_table_init (&t);
3952 /* First set all the printable characters of the character set to
3953 the default width. */
3954 curs = NULL;
3955 while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
3957 struct charseq *data = (struct charseq *) vdata;
3959 if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
3960 data->ucs4 = repertoire_find_value (ctype->repertoire,
3961 data->name, len);
3963 if (data->ucs4 != ILLEGAL_CHAR_VALUE)
3965 uint32_t *class_bits =
3966 find_idx (ctype, &ctype->class_collection, NULL,
3967 &ctype->class_collection_act, data->ucs4);
3969 if (class_bits != NULL && (*class_bits & BITw (tok_print)))
3970 wcwidth_table_add (&t, data->ucs4, charmap->width_default);
3974 /* Now add the explicitly specified widths. */
3975 if (charmap->width_rules != NULL)
3977 size_t cnt;
3979 for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
3981 unsigned char bytes[charmap->mb_cur_max];
3982 int nbytes = charmap->width_rules[cnt].from->nbytes;
3984 /* We have the range of character for which the width is
3985 specified described using byte sequences of the multibyte
3986 charset. We have to convert this to UCS4 now. And we
3987 cannot simply convert the beginning and the end of the
3988 sequence, we have to iterate over the byte sequence and
3989 convert it for every single character. */
3990 memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
3992 while (nbytes < charmap->width_rules[cnt].to->nbytes
3993 || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
3994 nbytes) <= 0)
3996 /* Find the UCS value for `bytes'. */
3997 int inner;
3998 uint32_t wch;
3999 struct charseq *seq =
4000 charmap_find_symbol (charmap, bytes, nbytes);
4002 if (seq == NULL)
4003 wch = ILLEGAL_CHAR_VALUE;
4004 else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
4005 wch = seq->ucs4;
4006 else
4007 wch = repertoire_find_value (ctype->repertoire, seq->name,
4008 strlen (seq->name));
4010 if (wch != ILLEGAL_CHAR_VALUE)
4012 /* Store the value. */
4013 uint32_t *class_bits =
4014 find_idx (ctype, &ctype->class_collection, NULL,
4015 &ctype->class_collection_act, wch);
4017 if (class_bits != NULL && (*class_bits & BITw (tok_print)))
4018 wcwidth_table_add (&t, wch,
4019 charmap->width_rules[cnt].width);
4022 /* "Increment" the bytes sequence. */
4023 inner = nbytes - 1;
4024 while (inner >= 0 && bytes[inner] == 0xff)
4025 --inner;
4027 if (inner < 0)
4029 /* We have to extend the byte sequence. */
4030 if (nbytes >= charmap->width_rules[cnt].to->nbytes)
4031 break;
4033 bytes[0] = 1;
4034 memset (&bytes[1], 0, nbytes);
4035 ++nbytes;
4037 else
4039 ++bytes[inner];
4040 while (++inner < nbytes)
4041 bytes[inner] = 0;
4047 /* Set the width of L'\0' to 0. */
4048 wcwidth_table_add (&t, 0, 0);
4050 wcwidth_table_finalize (&t);
4052 if (verbose)
4053 WITH_CUR_LOCALE (fprintf (stderr, _("%s: table for width: %lu bytes\n"),
4054 "LC_CTYPE", (unsigned long int) t.result_size));
4056 ctype->width.iov_base = t.result;
4057 ctype->width.iov_len = t.result_size;
4060 /* Set MB_CUR_MAX. */
4061 ctype->mb_cur_max = charmap->mb_cur_max;
4063 /* Now determine the table for the transliteration information.
4065 XXX It is not yet clear to me whether it is worth implementing a
4066 complicated algorithm which uses a hash table to locate the entries.
4067 For now I'll use a simple array which can be searching using binary
4068 search. */
4069 if (ctype->translit_include != NULL)
4070 /* Traverse the locales mentioned in the `include' statements in a
4071 depth-first way and fold in their transliteration information. */
4072 translit_flatten (ctype, charmap, &ctype->translit);
4074 if (ctype->translit != NULL)
4076 /* First count how many entries we have. This is the upper limit
4077 since some entries from the included files might be overwritten. */
4078 size_t number = 0;
4079 size_t cnt;
4080 struct translit_t *runp = ctype->translit;
4081 struct translit_t **sorted;
4082 size_t from_len, to_len;
4084 while (runp != NULL)
4086 ++number;
4087 runp = runp->next;
4090 /* Next we allocate an array large enough and fill in the values. */
4091 sorted = (struct translit_t **) alloca (number
4092 * sizeof (struct translit_t **));
4093 runp = ctype->translit;
4094 number = 0;
4097 /* Search for the place where to insert this string.
4098 XXX Better use a real sorting algorithm later. */
4099 size_t idx = 0;
4100 int replace = 0;
4102 while (idx < number)
4104 int res = wcscmp ((const wchar_t *) sorted[idx]->from,
4105 (const wchar_t *) runp->from);
4106 if (res == 0)
4108 replace = 1;
4109 break;
4111 if (res > 0)
4112 break;
4113 ++idx;
4116 if (replace)
4117 sorted[idx] = runp;
4118 else
4120 memmove (&sorted[idx + 1], &sorted[idx],
4121 (number - idx) * sizeof (struct translit_t *));
4122 sorted[idx] = runp;
4123 ++number;
4126 runp = runp->next;
4128 while (runp != NULL);
4130 /* The next step is putting all the possible transliteration
4131 strings in one memory block so that we can write it out.
4132 We need several different blocks:
4133 - index to the from-string array
4134 - from-string array
4135 - index to the to-string array
4136 - to-string array.
4138 from_len = to_len = 0;
4139 for (cnt = 0; cnt < number; ++cnt)
4141 struct translit_to_t *srunp;
4142 from_len += wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
4143 srunp = sorted[cnt]->to;
4144 while (srunp != NULL)
4146 to_len += wcslen ((const wchar_t *) srunp->str) + 1;
4147 srunp = srunp->next;
4149 /* Plus one for the extra NUL character marking the end of
4150 the list for the current entry. */
4151 ++to_len;
4154 /* We can allocate the arrays for the results. */
4155 ctype->translit_from_idx = xmalloc (number * sizeof (uint32_t));
4156 ctype->translit_from_tbl = xmalloc (from_len * sizeof (uint32_t));
4157 ctype->translit_to_idx = xmalloc (number * sizeof (uint32_t));
4158 ctype->translit_to_tbl = xmalloc (to_len * sizeof (uint32_t));
4160 from_len = 0;
4161 to_len = 0;
4162 for (cnt = 0; cnt < number; ++cnt)
4164 size_t len;
4165 struct translit_to_t *srunp;
4167 ctype->translit_from_idx[cnt] = from_len;
4168 ctype->translit_to_idx[cnt] = to_len;
4170 len = wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
4171 wmemcpy ((wchar_t *) &ctype->translit_from_tbl[from_len],
4172 (const wchar_t *) sorted[cnt]->from, len);
4173 from_len += len;
4175 ctype->translit_to_idx[cnt] = to_len;
4176 srunp = sorted[cnt]->to;
4177 while (srunp != NULL)
4179 len = wcslen ((const wchar_t *) srunp->str) + 1;
4180 wmemcpy ((wchar_t *) &ctype->translit_to_tbl[to_len],
4181 (const wchar_t *) srunp->str, len);
4182 to_len += len;
4183 srunp = srunp->next;
4185 ctype->translit_to_tbl[to_len++] = L'\0';
4188 /* Store the information about the length. */
4189 ctype->translit_idx_size = number;
4190 ctype->translit_from_tbl_size = from_len * sizeof (uint32_t);
4191 ctype->translit_to_tbl_size = to_len * sizeof (uint32_t);
4193 else
4195 /* Provide some dummy pointers since we have nothing to write out. */
4196 static uint32_t no_str = { 0 };
4198 ctype->translit_from_idx = &no_str;
4199 ctype->translit_from_tbl = &no_str;
4200 ctype->translit_to_tbl = &no_str;
4201 ctype->translit_idx_size = 0;
4202 ctype->translit_from_tbl_size = 0;
4203 ctype->translit_to_tbl_size = 0;