Remove whitespace from last change
[glibc.git] / locale / programs / ld-ctype.c
blobed8fa919d547881115299dbf8c356fddef0f0fb7
1 /* Copyright (C) 1995-2002, 2003, 2004 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, write to the Free
17 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18 02111-1307 USA. */
20 #ifdef HAVE_CONFIG_H
21 # include <config.h>
22 #endif
24 #include <alloca.h>
25 #include <byteswap.h>
26 #include <endian.h>
27 #include <errno.h>
28 #include <limits.h>
29 #include <obstack.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <wchar.h>
33 #include <wctype.h>
34 #include <sys/uio.h>
36 #include "localedef.h"
37 #include "charmap.h"
38 #include "localeinfo.h"
39 #include "langinfo.h"
40 #include "linereader.h"
41 #include "locfile-token.h"
42 #include "locfile.h"
44 #include <assert.h>
47 #ifdef PREDEFINED_CLASSES
48 /* These are the extra bits not in wctype.h since these are not preallocated
49 classes. */
50 # define _ISwspecial1 (1 << 29)
51 # define _ISwspecial2 (1 << 30)
52 # define _ISwspecial3 (1 << 31)
53 #endif
56 /* The bit used for representing a special class. */
57 #define BITPOS(class) ((class) - tok_upper)
58 #define BIT(class) (_ISbit (BITPOS (class)))
59 #define BITw(class) (_ISwbit (BITPOS (class)))
61 #define ELEM(ctype, collection, idx, value) \
62 *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \
63 &ctype->collection##_act idx, value)
66 /* To be compatible with former implementations we for now restrict
67 the number of bits for character classes to 16. When compatibility
68 is not necessary anymore increase the number to 32. */
69 #define char_class_t uint16_t
70 #define char_class32_t uint32_t
73 /* Type to describe a transliteration action. We have a possibly
74 multiple character from-string and a set of multiple character
75 to-strings. All are 32bit values since this is what is used in
76 the gconv functions. */
77 struct translit_to_t
79 uint32_t *str;
81 struct translit_to_t *next;
84 struct translit_t
86 uint32_t *from;
88 const char *fname;
89 size_t lineno;
91 struct translit_to_t *to;
93 struct translit_t *next;
96 struct translit_ignore_t
98 uint32_t from;
99 uint32_t to;
100 uint32_t step;
102 const char *fname;
103 size_t lineno;
105 struct translit_ignore_t *next;
109 /* Type to describe a transliteration include statement. */
110 struct translit_include_t
112 const char *copy_locale;
113 const char *copy_repertoire;
115 struct translit_include_t *next;
119 /* Sparse table of uint32_t. */
120 #define TABLE idx_table
121 #define ELEMENT uint32_t
122 #define DEFAULT ((uint32_t) ~0)
123 #define NO_FINALIZE
124 #include "3level.h"
127 /* The real definition of the struct for the LC_CTYPE locale. */
128 struct locale_ctype_t
130 uint32_t *charnames;
131 size_t charnames_max;
132 size_t charnames_act;
133 /* An index lookup table, to speedup find_idx. */
134 struct idx_table charnames_idx;
136 struct repertoire_t *repertoire;
138 /* We will allow up to 8 * sizeof (uint32_t) character classes. */
139 #define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
140 size_t nr_charclass;
141 const char *classnames[MAX_NR_CHARCLASS];
142 uint32_t last_class_char;
143 uint32_t class256_collection[256];
144 uint32_t *class_collection;
145 size_t class_collection_max;
146 size_t class_collection_act;
147 uint32_t class_done;
148 uint32_t class_offset;
150 struct charseq **mbdigits;
151 size_t mbdigits_act;
152 size_t mbdigits_max;
153 uint32_t *wcdigits;
154 size_t wcdigits_act;
155 size_t wcdigits_max;
157 struct charseq *mboutdigits[10];
158 uint32_t wcoutdigits[10];
159 size_t outdigits_act;
161 /* If the following number ever turns out to be too small simply
162 increase it. But I doubt it will. --drepper@gnu */
163 #define MAX_NR_CHARMAP 16
164 const char *mapnames[MAX_NR_CHARMAP];
165 uint32_t *map_collection[MAX_NR_CHARMAP];
166 uint32_t map256_collection[2][256];
167 size_t map_collection_max[MAX_NR_CHARMAP];
168 size_t map_collection_act[MAX_NR_CHARMAP];
169 size_t map_collection_nr;
170 size_t last_map_idx;
171 int tomap_done[MAX_NR_CHARMAP];
172 uint32_t map_offset;
174 /* Transliteration information. */
175 struct translit_include_t *translit_include;
176 struct translit_t *translit;
177 struct translit_ignore_t *translit_ignore;
178 uint32_t ntranslit_ignore;
180 uint32_t *default_missing;
181 const char *default_missing_file;
182 size_t default_missing_lineno;
184 uint32_t to_nonascii;
186 /* The arrays for the binary representation. */
187 char_class_t *ctype_b;
188 char_class32_t *ctype32_b;
189 uint32_t **map_b;
190 uint32_t **map32_b;
191 uint32_t **class_b;
192 struct iovec *class_3level;
193 struct iovec *map_3level;
194 uint32_t *class_name_ptr;
195 uint32_t *map_name_ptr;
196 struct iovec width;
197 uint32_t mb_cur_max;
198 const char *codeset_name;
199 uint32_t *translit_from_idx;
200 uint32_t *translit_from_tbl;
201 uint32_t *translit_to_idx;
202 uint32_t *translit_to_tbl;
203 uint32_t translit_idx_size;
204 size_t translit_from_tbl_size;
205 size_t translit_to_tbl_size;
207 struct obstack mempool;
211 /* Marker for an empty slot. This has the value 0xFFFFFFFF, regardless
212 whether 'int' is 16 bit, 32 bit, or 64 bit. */
213 #define EMPTY ((uint32_t) ~0)
216 #define obstack_chunk_alloc xmalloc
217 #define obstack_chunk_free free
220 /* Prototypes for local functions. */
221 static void ctype_startup (struct linereader *lr, struct localedef_t *locale,
222 const struct charmap_t *charmap,
223 struct localedef_t *copy_locale,
224 int ignore_content);
225 static void ctype_class_new (struct linereader *lr,
226 struct locale_ctype_t *ctype, const char *name);
227 static void ctype_map_new (struct linereader *lr,
228 struct locale_ctype_t *ctype,
229 const char *name, const struct charmap_t *charmap);
230 static uint32_t *find_idx (struct locale_ctype_t *ctype, uint32_t **table,
231 size_t *max, size_t *act, unsigned int idx);
232 static void set_class_defaults (struct locale_ctype_t *ctype,
233 const struct charmap_t *charmap,
234 struct repertoire_t *repertoire);
235 static void allocate_arrays (struct locale_ctype_t *ctype,
236 const struct charmap_t *charmap,
237 struct repertoire_t *repertoire);
240 static const char *longnames[] =
242 "zero", "one", "two", "three", "four",
243 "five", "six", "seven", "eight", "nine"
245 static const char *uninames[] =
247 "U00000030", "U00000031", "U00000032", "U00000033", "U00000034",
248 "U00000035", "U00000036", "U00000037", "U00000038", "U00000039"
250 static const unsigned char digits[] = "0123456789";
253 static void
254 ctype_startup (struct linereader *lr, struct localedef_t *locale,
255 const struct charmap_t *charmap,
256 struct localedef_t *copy_locale, int ignore_content)
258 unsigned int cnt;
259 struct locale_ctype_t *ctype;
261 if (!ignore_content && locale->categories[LC_CTYPE].ctype == NULL)
263 if (copy_locale == NULL)
265 /* Allocate the needed room. */
266 locale->categories[LC_CTYPE].ctype = ctype =
267 (struct locale_ctype_t *) xcalloc (1,
268 sizeof (struct locale_ctype_t));
270 /* We have seen no names yet. */
271 ctype->charnames_max = charmap->mb_cur_max == 1 ? 256 : 512;
272 ctype->charnames =
273 (unsigned int *) xmalloc (ctype->charnames_max
274 * sizeof (unsigned int));
275 for (cnt = 0; cnt < 256; ++cnt)
276 ctype->charnames[cnt] = cnt;
277 ctype->charnames_act = 256;
278 idx_table_init (&ctype->charnames_idx);
280 /* Fill character class information. */
281 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
282 /* The order of the following instructions determines the bit
283 positions! */
284 ctype_class_new (lr, ctype, "upper");
285 ctype_class_new (lr, ctype, "lower");
286 ctype_class_new (lr, ctype, "alpha");
287 ctype_class_new (lr, ctype, "digit");
288 ctype_class_new (lr, ctype, "xdigit");
289 ctype_class_new (lr, ctype, "space");
290 ctype_class_new (lr, ctype, "print");
291 ctype_class_new (lr, ctype, "graph");
292 ctype_class_new (lr, ctype, "blank");
293 ctype_class_new (lr, ctype, "cntrl");
294 ctype_class_new (lr, ctype, "punct");
295 ctype_class_new (lr, ctype, "alnum");
296 #ifdef PREDEFINED_CLASSES
297 /* The following are extensions from ISO 14652. */
298 ctype_class_new (lr, ctype, "left_to_right");
299 ctype_class_new (lr, ctype, "right_to_left");
300 ctype_class_new (lr, ctype, "num_terminator");
301 ctype_class_new (lr, ctype, "num_separator");
302 ctype_class_new (lr, ctype, "segment_separator");
303 ctype_class_new (lr, ctype, "block_separator");
304 ctype_class_new (lr, ctype, "direction_control");
305 ctype_class_new (lr, ctype, "sym_swap_layout");
306 ctype_class_new (lr, ctype, "char_shape_selector");
307 ctype_class_new (lr, ctype, "num_shape_selector");
308 ctype_class_new (lr, ctype, "non_spacing");
309 ctype_class_new (lr, ctype, "non_spacing_level3");
310 ctype_class_new (lr, ctype, "normal_connect");
311 ctype_class_new (lr, ctype, "r_connect");
312 ctype_class_new (lr, ctype, "no_connect");
313 ctype_class_new (lr, ctype, "no_connect-space");
314 ctype_class_new (lr, ctype, "vowel_connect");
315 #endif
317 ctype->class_collection_max = charmap->mb_cur_max == 1 ? 256 : 512;
318 ctype->class_collection
319 = (uint32_t *) xcalloc (sizeof (unsigned long int),
320 ctype->class_collection_max);
321 ctype->class_collection_act = 256;
323 /* Fill character map information. */
324 ctype->last_map_idx = MAX_NR_CHARMAP;
325 ctype_map_new (lr, ctype, "toupper", charmap);
326 ctype_map_new (lr, ctype, "tolower", charmap);
327 #ifdef PREDEFINED_CLASSES
328 ctype_map_new (lr, ctype, "tosymmetric", charmap);
329 #endif
331 /* Fill first 256 entries in `toXXX' arrays. */
332 for (cnt = 0; cnt < 256; ++cnt)
334 ctype->map_collection[0][cnt] = cnt;
335 ctype->map_collection[1][cnt] = cnt;
336 #ifdef PREDEFINED_CLASSES
337 ctype->map_collection[2][cnt] = cnt;
338 #endif
339 ctype->map256_collection[0][cnt] = cnt;
340 ctype->map256_collection[1][cnt] = cnt;
343 if (enc_not_ascii_compatible)
344 ctype->to_nonascii = 1;
346 obstack_init (&ctype->mempool);
348 else
349 ctype = locale->categories[LC_CTYPE].ctype =
350 copy_locale->categories[LC_CTYPE].ctype;
355 void
356 ctype_finish (struct localedef_t *locale, const struct charmap_t *charmap)
358 /* See POSIX.2, table 2-6 for the meaning of the following table. */
359 #define NCLASS 12
360 static const struct
362 const char *name;
363 const char allow[NCLASS];
365 valid_table[NCLASS] =
367 /* The order is important. See token.h for more information.
368 M = Always, D = Default, - = Permitted, X = Mutually exclusive */
369 { "upper", "--MX-XDDXXX-" },
370 { "lower", "--MX-XDDXXX-" },
371 { "alpha", "---X-XDDXXX-" },
372 { "digit", "XXX--XDDXXX-" },
373 { "xdigit", "-----XDDXXX-" },
374 { "space", "XXXXX------X" },
375 { "print", "---------X--" },
376 { "graph", "---------X--" },
377 { "blank", "XXXXXM-----X" },
378 { "cntrl", "XXXXX-XX--XX" },
379 { "punct", "XXXXX-DD-X-X" },
380 { "alnum", "-----XDDXXX-" }
382 size_t cnt;
383 int cls1, cls2;
384 uint32_t space_value;
385 struct charseq *space_seq;
386 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
387 int warned;
388 const void *key;
389 size_t len;
390 void *vdata;
391 void *curs;
393 /* Now resolve copying and also handle completely missing definitions. */
394 if (ctype == NULL)
396 const char *repertoire_name;
398 /* First see whether we were supposed to copy. If yes, find the
399 actual definition. */
400 if (locale->copy_name[LC_CTYPE] != NULL)
402 /* Find the copying locale. This has to happen transitively since
403 the locale we are copying from might also copying another one. */
404 struct localedef_t *from = locale;
407 from = find_locale (LC_CTYPE, from->copy_name[LC_CTYPE],
408 from->repertoire_name, charmap);
409 while (from->categories[LC_CTYPE].ctype == NULL
410 && from->copy_name[LC_CTYPE] != NULL);
412 ctype = locale->categories[LC_CTYPE].ctype
413 = from->categories[LC_CTYPE].ctype;
416 /* If there is still no definition issue an warning and create an
417 empty one. */
418 if (ctype == NULL)
420 if (! be_quiet)
421 WITH_CUR_LOCALE (error (0, 0, _("\
422 No definition for %s category found"), "LC_CTYPE"));
423 ctype_startup (NULL, locale, charmap, NULL, 0);
424 ctype = locale->categories[LC_CTYPE].ctype;
427 /* Get the repertoire we have to use. */
428 repertoire_name = locale->repertoire_name ?: repertoire_global;
429 if (repertoire_name != NULL)
430 ctype->repertoire = repertoire_read (repertoire_name);
433 /* We need the name of the currently used 8-bit character set to
434 make correct conversion between this 8-bit representation and the
435 ISO 10646 character set used internally for wide characters. */
436 ctype->codeset_name = charmap->code_set_name;
437 if (ctype->codeset_name == NULL)
439 if (! be_quiet)
440 WITH_CUR_LOCALE (error (0, 0, _("\
441 No character set name specified in charmap")));
442 ctype->codeset_name = "//UNKNOWN//";
445 /* Set default value for classes not specified. */
446 set_class_defaults (ctype, charmap, ctype->repertoire);
448 /* Check according to table. */
449 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
451 uint32_t tmp = ctype->class_collection[cnt];
453 if (tmp != 0)
455 for (cls1 = 0; cls1 < NCLASS; ++cls1)
456 if ((tmp & _ISwbit (cls1)) != 0)
457 for (cls2 = 0; cls2 < NCLASS; ++cls2)
458 if (valid_table[cls1].allow[cls2] != '-')
460 int eq = (tmp & _ISwbit (cls2)) != 0;
461 switch (valid_table[cls1].allow[cls2])
463 case 'M':
464 if (!eq)
466 uint32_t value = ctype->charnames[cnt];
468 if (!be_quiet)
469 WITH_CUR_LOCALE (error (0, 0, _("\
470 character L'\\u%0*x' in class `%s' must be in class `%s'"),
471 value > 0xffff ? 8 : 4,
472 value,
473 valid_table[cls1].name,
474 valid_table[cls2].name));
476 break;
478 case 'X':
479 if (eq)
481 uint32_t value = ctype->charnames[cnt];
483 if (!be_quiet)
484 WITH_CUR_LOCALE (error (0, 0, _("\
485 character L'\\u%0*x' in class `%s' must not be in class `%s'"),
486 value > 0xffff ? 8 : 4,
487 value,
488 valid_table[cls1].name,
489 valid_table[cls2].name));
491 break;
493 case 'D':
494 ctype->class_collection[cnt] |= _ISwbit (cls2);
495 break;
497 default:
498 WITH_CUR_LOCALE (error (5, 0, _("\
499 internal error in %s, line %u"), __FUNCTION__, __LINE__));
505 for (cnt = 0; cnt < 256; ++cnt)
507 uint32_t tmp = ctype->class256_collection[cnt];
509 if (tmp != 0)
511 for (cls1 = 0; cls1 < NCLASS; ++cls1)
512 if ((tmp & _ISbit (cls1)) != 0)
513 for (cls2 = 0; cls2 < NCLASS; ++cls2)
514 if (valid_table[cls1].allow[cls2] != '-')
516 int eq = (tmp & _ISbit (cls2)) != 0;
517 switch (valid_table[cls1].allow[cls2])
519 case 'M':
520 if (!eq)
522 char buf[17];
524 snprintf (buf, sizeof buf, "\\%Zo", cnt);
526 if (!be_quiet)
527 WITH_CUR_LOCALE (error (0, 0, _("\
528 character '%s' in class `%s' must be in class `%s'"),
529 buf,
530 valid_table[cls1].name,
531 valid_table[cls2].name));
533 break;
535 case 'X':
536 if (eq)
538 char buf[17];
540 snprintf (buf, sizeof buf, "\\%Zo", cnt);
542 if (!be_quiet)
543 WITH_CUR_LOCALE (error (0, 0, _("\
544 character '%s' in class `%s' must not be in class `%s'"),
545 buf,
546 valid_table[cls1].name,
547 valid_table[cls2].name));
549 break;
551 case 'D':
552 ctype->class256_collection[cnt] |= _ISbit (cls2);
553 break;
555 default:
556 WITH_CUR_LOCALE (error (5, 0, _("\
557 internal error in %s, line %u"), __FUNCTION__, __LINE__));
563 /* ... and now test <SP> as a special case. */
564 space_value = 32;
565 if (((cnt = BITPOS (tok_space),
566 (ELEM (ctype, class_collection, , space_value)
567 & BITw (tok_space)) == 0)
568 || (cnt = BITPOS (tok_blank),
569 (ELEM (ctype, class_collection, , space_value)
570 & BITw (tok_blank)) == 0)))
572 if (!be_quiet)
573 WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"),
574 valid_table[cnt].name));
576 else if (((cnt = BITPOS (tok_punct),
577 (ELEM (ctype, class_collection, , space_value)
578 & BITw (tok_punct)) != 0)
579 || (cnt = BITPOS (tok_graph),
580 (ELEM (ctype, class_collection, , space_value)
581 & BITw (tok_graph))
582 != 0)))
584 if (!be_quiet)
585 WITH_CUR_LOCALE (error (0, 0, _("\
586 <SP> character must not be in class `%s'"),
587 valid_table[cnt].name));
589 else
590 ELEM (ctype, class_collection, , space_value) |= BITw (tok_print);
592 space_seq = charmap_find_value (charmap, "SP", 2);
593 if (space_seq == NULL)
594 space_seq = charmap_find_value (charmap, "space", 5);
595 if (space_seq == NULL)
596 space_seq = charmap_find_value (charmap, "U00000020", 9);
597 if (space_seq == NULL || space_seq->nbytes != 1)
599 if (!be_quiet)
600 WITH_CUR_LOCALE (error (0, 0, _("\
601 character <SP> not defined in character map")));
603 else if (((cnt = BITPOS (tok_space),
604 (ctype->class256_collection[space_seq->bytes[0]]
605 & BIT (tok_space)) == 0)
606 || (cnt = BITPOS (tok_blank),
607 (ctype->class256_collection[space_seq->bytes[0]]
608 & BIT (tok_blank)) == 0)))
610 if (!be_quiet)
611 WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"),
612 valid_table[cnt].name));
614 else if (((cnt = BITPOS (tok_punct),
615 (ctype->class256_collection[space_seq->bytes[0]]
616 & BIT (tok_punct)) != 0)
617 || (cnt = BITPOS (tok_graph),
618 (ctype->class256_collection[space_seq->bytes[0]]
619 & BIT (tok_graph)) != 0)))
621 if (!be_quiet)
622 WITH_CUR_LOCALE (error (0, 0, _("\
623 <SP> character must not be in class `%s'"),
624 valid_table[cnt].name));
626 else
627 ctype->class256_collection[space_seq->bytes[0]] |= BIT (tok_print);
629 /* Now that the tests are done make sure the name array contains all
630 characters which are handled in the WIDTH section of the
631 character set definition file. */
632 if (charmap->width_rules != NULL)
633 for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
635 unsigned char bytes[charmap->mb_cur_max];
636 int nbytes = charmap->width_rules[cnt].from->nbytes;
638 /* We have the range of character for which the width is
639 specified described using byte sequences of the multibyte
640 charset. We have to convert this to UCS4 now. And we
641 cannot simply convert the beginning and the end of the
642 sequence, we have to iterate over the byte sequence and
643 convert it for every single character. */
644 memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
646 while (nbytes < charmap->width_rules[cnt].to->nbytes
647 || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
648 nbytes) <= 0)
650 /* Find the UCS value for `bytes'. */
651 int inner;
652 uint32_t wch;
653 struct charseq *seq = charmap_find_symbol (charmap, bytes, nbytes);
655 if (seq == NULL)
656 wch = ILLEGAL_CHAR_VALUE;
657 else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
658 wch = seq->ucs4;
659 else
660 wch = repertoire_find_value (ctype->repertoire, seq->name,
661 strlen (seq->name));
663 if (wch != ILLEGAL_CHAR_VALUE)
664 /* We are only interested in the side-effects of the
665 `find_idx' call. It will add appropriate entries in
666 the name array if this is necessary. */
667 (void) find_idx (ctype, NULL, NULL, NULL, wch);
669 /* "Increment" the bytes sequence. */
670 inner = nbytes - 1;
671 while (inner >= 0 && bytes[inner] == 0xff)
672 --inner;
674 if (inner < 0)
676 /* We have to extend the byte sequence. */
677 if (nbytes >= charmap->width_rules[cnt].to->nbytes)
678 break;
680 bytes[0] = 1;
681 memset (&bytes[1], 0, nbytes);
682 ++nbytes;
684 else
686 ++bytes[inner];
687 while (++inner < nbytes)
688 bytes[inner] = 0;
693 /* Now set all the other characters of the character set to the
694 default width. */
695 curs = NULL;
696 while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
698 struct charseq *data = (struct charseq *) vdata;
700 if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
701 data->ucs4 = repertoire_find_value (ctype->repertoire,
702 data->name, len);
704 if (data->ucs4 != ILLEGAL_CHAR_VALUE)
705 (void) find_idx (ctype, NULL, NULL, NULL, data->ucs4);
708 /* There must be a multiple of 10 digits. */
709 if (ctype->mbdigits_act % 10 != 0)
711 assert (ctype->mbdigits_act == ctype->wcdigits_act);
712 ctype->wcdigits_act -= ctype->mbdigits_act % 10;
713 ctype->mbdigits_act -= ctype->mbdigits_act % 10;
714 WITH_CUR_LOCALE (error (0, 0, _("\
715 `digit' category has not entries in groups of ten")));
718 /* Check the input digits. There must be a multiple of ten available.
719 In each group it could be that one or the other character is missing.
720 In this case the whole group must be removed. */
721 cnt = 0;
722 while (cnt < ctype->mbdigits_act)
724 size_t inner;
725 for (inner = 0; inner < 10; ++inner)
726 if (ctype->mbdigits[cnt + inner] == NULL)
727 break;
729 if (inner == 10)
730 cnt += 10;
731 else
733 /* Remove the group. */
734 memmove (&ctype->mbdigits[cnt], &ctype->mbdigits[cnt + 10],
735 ((ctype->wcdigits_act - cnt - 10)
736 * sizeof (ctype->mbdigits[0])));
737 ctype->mbdigits_act -= 10;
741 /* If no input digits are given use the default. */
742 if (ctype->mbdigits_act == 0)
744 if (ctype->mbdigits_max == 0)
746 ctype->mbdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
747 10 * sizeof (struct charseq *));
748 ctype->mbdigits_max = 10;
751 for (cnt = 0; cnt < 10; ++cnt)
753 ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
754 digits + cnt, 1);
755 if (ctype->mbdigits[cnt] == NULL)
757 ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
758 longnames[cnt],
759 strlen (longnames[cnt]));
760 if (ctype->mbdigits[cnt] == NULL)
762 /* Hum, this ain't good. */
763 WITH_CUR_LOCALE (error (0, 0, _("\
764 no input digits defined and none of the standard names in the charmap")));
766 ctype->mbdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
767 sizeof (struct charseq) + 1);
769 /* This is better than nothing. */
770 ctype->mbdigits[cnt]->bytes[0] = digits[cnt];
771 ctype->mbdigits[cnt]->nbytes = 1;
776 ctype->mbdigits_act = 10;
779 /* Check the wide character input digits. There must be a multiple
780 of ten available. In each group it could be that one or the other
781 character is missing. In this case the whole group must be
782 removed. */
783 cnt = 0;
784 while (cnt < ctype->wcdigits_act)
786 size_t inner;
787 for (inner = 0; inner < 10; ++inner)
788 if (ctype->wcdigits[cnt + inner] == ILLEGAL_CHAR_VALUE)
789 break;
791 if (inner == 10)
792 cnt += 10;
793 else
795 /* Remove the group. */
796 memmove (&ctype->wcdigits[cnt], &ctype->wcdigits[cnt + 10],
797 ((ctype->wcdigits_act - cnt - 10)
798 * sizeof (ctype->wcdigits[0])));
799 ctype->wcdigits_act -= 10;
803 /* If no input digits are given use the default. */
804 if (ctype->wcdigits_act == 0)
806 if (ctype->wcdigits_max == 0)
808 ctype->wcdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
809 10 * sizeof (uint32_t));
810 ctype->wcdigits_max = 10;
813 for (cnt = 0; cnt < 10; ++cnt)
814 ctype->wcdigits[cnt] = L'0' + cnt;
816 ctype->mbdigits_act = 10;
819 /* Check the outdigits. */
820 warned = 0;
821 for (cnt = 0; cnt < 10; ++cnt)
822 if (ctype->mboutdigits[cnt] == NULL)
824 static struct charseq replace[2];
826 if (!warned)
828 WITH_CUR_LOCALE (error (0, 0, _("\
829 not all characters used in `outdigit' are available in the charmap")));
830 warned = 1;
833 replace[0].nbytes = 1;
834 replace[0].bytes[0] = '?';
835 replace[0].bytes[1] = '\0';
836 ctype->mboutdigits[cnt] = &replace[0];
839 warned = 0;
840 for (cnt = 0; cnt < 10; ++cnt)
841 if (ctype->wcoutdigits[cnt] == 0)
843 if (!warned)
845 WITH_CUR_LOCALE (error (0, 0, _("\
846 not all characters used in `outdigit' are available in the repertoire")));
847 warned = 1;
850 ctype->wcoutdigits[cnt] = L'?';
853 /* Sort the entries in the translit_ignore list. */
854 if (ctype->translit_ignore != NULL)
856 struct translit_ignore_t *firstp = ctype->translit_ignore;
857 struct translit_ignore_t *runp;
859 ctype->ntranslit_ignore = 1;
861 for (runp = firstp->next; runp != NULL; runp = runp->next)
863 struct translit_ignore_t *lastp = NULL;
864 struct translit_ignore_t *cmpp;
866 ++ctype->ntranslit_ignore;
868 for (cmpp = firstp; cmpp != NULL; lastp = cmpp, cmpp = cmpp->next)
869 if (runp->from < cmpp->from)
870 break;
872 runp->next = lastp;
873 if (lastp == NULL)
874 firstp = runp;
877 ctype->translit_ignore = firstp;
882 void
883 ctype_output (struct localedef_t *locale, const struct charmap_t *charmap,
884 const char *output_path)
886 static const char nulbytes[4] = { 0, 0, 0, 0 };
887 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
888 const size_t nelems = (_NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1)
889 + ctype->nr_charclass + ctype->map_collection_nr);
890 struct iovec *iov = alloca (sizeof *iov
891 * (2 + nelems + 2 * ctype->nr_charclass
892 + ctype->map_collection_nr + 4));
893 struct locale_file data;
894 uint32_t *idx = alloca (sizeof *idx * (nelems + 1));
895 uint32_t default_missing_len;
896 size_t elem, cnt, offset, total;
897 char *cp;
899 /* Now prepare the output: Find the sizes of the table we can use. */
900 allocate_arrays (ctype, charmap, ctype->repertoire);
902 data.magic = LIMAGIC (LC_CTYPE);
903 data.n = nelems;
904 iov[0].iov_base = (void *) &data;
905 iov[0].iov_len = sizeof (data);
907 iov[1].iov_base = (void *) idx;
908 iov[1].iov_len = nelems * sizeof (uint32_t);
910 idx[0] = iov[0].iov_len + iov[1].iov_len;
911 offset = 0;
913 for (elem = 0; elem < nelems; ++elem)
915 if (elem < _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1))
916 switch (elem)
918 #define CTYPE_EMPTY(name) \
919 case name: \
920 iov[2 + elem + offset].iov_base = NULL; \
921 iov[2 + elem + offset].iov_len = 0; \
922 idx[elem + 1] = idx[elem]; \
923 break
925 CTYPE_EMPTY(_NL_CTYPE_GAP1);
926 CTYPE_EMPTY(_NL_CTYPE_GAP2);
927 CTYPE_EMPTY(_NL_CTYPE_GAP3);
928 CTYPE_EMPTY(_NL_CTYPE_GAP4);
929 CTYPE_EMPTY(_NL_CTYPE_GAP5);
930 CTYPE_EMPTY(_NL_CTYPE_GAP6);
932 #define CTYPE_DATA(name, base, len) \
933 case _NL_ITEM_INDEX (name): \
934 iov[2 + elem + offset].iov_base = (base); \
935 iov[2 + elem + offset].iov_len = (len); \
936 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; \
937 break
939 CTYPE_DATA (_NL_CTYPE_CLASS,
940 ctype->ctype_b,
941 (256 + 128) * sizeof (char_class_t));
943 CTYPE_DATA (_NL_CTYPE_TOUPPER,
944 ctype->map_b[0],
945 (256 + 128) * sizeof (uint32_t));
946 CTYPE_DATA (_NL_CTYPE_TOLOWER,
947 ctype->map_b[1],
948 (256 + 128) * sizeof (uint32_t));
950 CTYPE_DATA (_NL_CTYPE_TOUPPER32,
951 ctype->map32_b[0],
952 256 * sizeof (uint32_t));
953 CTYPE_DATA (_NL_CTYPE_TOLOWER32,
954 ctype->map32_b[1],
955 256 * sizeof (uint32_t));
957 CTYPE_DATA (_NL_CTYPE_CLASS32,
958 ctype->ctype32_b,
959 256 * sizeof (char_class32_t));
961 CTYPE_DATA (_NL_CTYPE_CLASS_OFFSET,
962 &ctype->class_offset, sizeof (uint32_t));
964 CTYPE_DATA (_NL_CTYPE_MAP_OFFSET,
965 &ctype->map_offset, sizeof (uint32_t));
967 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TAB_SIZE,
968 &ctype->translit_idx_size, sizeof (uint32_t));
970 CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_IDX,
971 ctype->translit_from_idx,
972 ctype->translit_idx_size * sizeof (uint32_t));
974 CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_TBL,
975 ctype->translit_from_tbl,
976 ctype->translit_from_tbl_size);
978 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_IDX,
979 ctype->translit_to_idx,
980 ctype->translit_idx_size * sizeof (uint32_t));
982 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_TBL,
983 ctype->translit_to_tbl, ctype->translit_to_tbl_size);
985 case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
986 /* The class name array. */
987 total = 0;
988 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt, ++offset)
990 iov[2 + elem + offset].iov_base
991 = (void *) ctype->classnames[cnt];
992 iov[2 + elem + offset].iov_len
993 = strlen (ctype->classnames[cnt]) + 1;
994 total += iov[2 + elem + offset].iov_len;
996 iov[2 + elem + offset].iov_base = (void *) nulbytes;
997 iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
998 total += 1 + (4 - ((total + 1) % 4));
1000 idx[elem + 1] = idx[elem] + total;
1001 break;
1003 case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
1004 /* The class name array. */
1005 total = 0;
1006 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt, ++offset)
1008 iov[2 + elem + offset].iov_base
1009 = (void *) ctype->mapnames[cnt];
1010 iov[2 + elem + offset].iov_len
1011 = strlen (ctype->mapnames[cnt]) + 1;
1012 total += iov[2 + elem + offset].iov_len;
1014 iov[2 + elem + offset].iov_base = (void *) nulbytes;
1015 iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
1016 total += 1 + (4 - ((total + 1) % 4));
1018 idx[elem + 1] = idx[elem] + total;
1019 break;
1021 CTYPE_DATA (_NL_CTYPE_WIDTH,
1022 ctype->width.iov_base,
1023 ctype->width.iov_len);
1025 CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX,
1026 &ctype->mb_cur_max, sizeof (uint32_t));
1028 case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
1029 total = strlen (ctype->codeset_name) + 1;
1030 if (total % 4 == 0)
1031 iov[2 + elem + offset].iov_base = (char *) ctype->codeset_name;
1032 else
1034 iov[2 + elem + offset].iov_base = alloca ((total + 3) & ~3);
1035 memset (mempcpy (iov[2 + elem + offset].iov_base,
1036 ctype->codeset_name, total),
1037 '\0', 4 - (total & 3));
1038 total = (total + 3) & ~3;
1040 iov[2 + elem + offset].iov_len = total;
1041 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1042 break;
1045 CTYPE_DATA (_NL_CTYPE_MAP_TO_NONASCII,
1046 &ctype->to_nonascii, sizeof (uint32_t));
1048 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN):
1049 iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
1050 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1051 *(uint32_t *) iov[2 + elem + offset].iov_base =
1052 ctype->mbdigits_act / 10;
1053 idx[elem + 1] = idx[elem] + sizeof (uint32_t);
1054 break;
1056 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN):
1057 /* Align entries. */
1058 iov[2 + elem + offset].iov_base = (void *) nulbytes;
1059 iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1060 idx[elem] += iov[2 + elem + offset].iov_len;
1061 ++offset;
1063 iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
1064 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1065 *(uint32_t *) iov[2 + elem + offset].iov_base =
1066 ctype->wcdigits_act / 10;
1067 idx[elem + 1] = idx[elem] + sizeof (uint32_t);
1068 break;
1070 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB):
1071 /* Compute the length of all possible characters. For INDIGITS
1072 there might be more than one. We simply concatenate all of
1073 them with a NUL byte following. The NUL byte wouldn't be
1074 necessary but it makes it easier for the user. */
1075 total = 0;
1077 for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
1078 cnt < ctype->mbdigits_act; cnt += 10)
1079 total += ctype->mbdigits[cnt]->nbytes + 1;
1080 iov[2 + elem + offset].iov_base = (char *) alloca (total);
1081 iov[2 + elem + offset].iov_len = total;
1083 cp = iov[2 + elem + offset].iov_base;
1084 for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
1085 cnt < ctype->mbdigits_act; cnt += 10)
1087 cp = mempcpy (cp, ctype->mbdigits[cnt]->bytes,
1088 ctype->mbdigits[cnt]->nbytes);
1089 *cp++ = '\0';
1091 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1092 break;
1094 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB):
1095 /* Compute the length of all possible characters. For INDIGITS
1096 there might be more than one. We simply concatenate all of
1097 them with a NUL byte following. The NUL byte wouldn't be
1098 necessary but it makes it easier for the user. */
1099 cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB);
1100 total = ctype->mboutdigits[cnt]->nbytes + 1;
1101 iov[2 + elem + offset].iov_base = (char *) alloca (total);
1102 iov[2 + elem + offset].iov_len = total;
1104 *(char *) mempcpy (iov[2 + elem + offset].iov_base,
1105 ctype->mboutdigits[cnt]->bytes,
1106 ctype->mboutdigits[cnt]->nbytes) = '\0';
1107 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1108 break;
1110 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC):
1111 total = ctype->wcdigits_act / 10;
1113 iov[2 + elem + offset].iov_base =
1114 (uint32_t *) alloca (total * sizeof (uint32_t));
1115 iov[2 + elem + offset].iov_len = total * sizeof (uint32_t);
1117 for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC);
1118 cnt < ctype->wcdigits_act; cnt += 10)
1119 ((uint32_t *) iov[2 + elem + offset].iov_base)[cnt / 10]
1120 = ctype->wcdigits[cnt];
1121 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1122 break;
1124 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC):
1125 /* Align entries. */
1126 iov[2 + elem + offset].iov_base = (void *) nulbytes;
1127 iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1128 idx[elem] += iov[2 + elem + offset].iov_len;
1129 ++offset;
1130 /* FALLTRHOUGH */
1132 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT1_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC):
1133 cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC);
1134 iov[2 + elem + offset].iov_base = &ctype->wcoutdigits[cnt];
1135 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1136 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1137 break;
1139 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN):
1140 /* Align entries. */
1141 iov[2 + elem + offset].iov_base = (void *) nulbytes;
1142 iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1143 idx[elem] += iov[2 + elem + offset].iov_len;
1144 ++offset;
1146 default_missing_len = (ctype->default_missing
1147 ? wcslen ((wchar_t *)ctype->default_missing)
1148 : 0);
1149 iov[2 + elem + offset].iov_base = &default_missing_len;
1150 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1151 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1152 break;
1154 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING):
1155 iov[2 + elem + offset].iov_base =
1156 ctype->default_missing ?: (uint32_t *) L"";
1157 iov[2 + elem + offset].iov_len =
1158 wcslen (iov[2 + elem + offset].iov_base);
1159 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1160 break;
1162 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE_LEN):
1163 /* Align entries. */
1164 iov[2 + elem + offset].iov_base = (void *) nulbytes;
1165 iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1166 idx[elem] += iov[2 + elem + offset].iov_len;
1167 ++offset;
1169 iov[2 + elem + offset].iov_base = &ctype->ntranslit_ignore;
1170 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1171 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1172 break;
1174 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE):
1176 uint32_t *ranges = (uint32_t *) alloca (ctype->ntranslit_ignore
1177 * 3 * sizeof (uint32_t));
1178 struct translit_ignore_t *runp;
1180 iov[2 + elem + offset].iov_base = ranges;
1181 iov[2 + elem + offset].iov_len = (ctype->ntranslit_ignore
1182 * 3 * sizeof (uint32_t));
1184 for (runp = ctype->translit_ignore; runp != NULL;
1185 runp = runp->next)
1187 *ranges++ = runp->from;
1188 *ranges++ = runp->to;
1189 *ranges++ = runp->step;
1192 /* Remove the following line in case a new entry is added
1193 after _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN. */
1194 if (elem < nelems)
1195 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1196 break;
1198 default:
1199 assert (! "unknown CTYPE element");
1201 else
1203 /* Handle extra maps. */
1204 size_t nr = elem - _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
1205 if (nr < ctype->nr_charclass)
1207 iov[2 + elem + offset].iov_base = ctype->class_b[nr];
1208 iov[2 + elem + offset].iov_len = 256 / 32 * sizeof (uint32_t);
1209 idx[elem] += iov[2 + elem + offset].iov_len;
1210 ++offset;
1212 iov[2 + elem + offset] = ctype->class_3level[nr];
1214 else
1216 nr -= ctype->nr_charclass;
1217 assert (nr < ctype->map_collection_nr);
1218 iov[2 + elem + offset] = ctype->map_3level[nr];
1220 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1224 assert (2 + elem + offset == (nelems + 2 * ctype->nr_charclass
1225 + ctype->map_collection_nr + 4 + 2));
1227 write_locale_data (output_path, LC_CTYPE, "LC_CTYPE", 2 + elem + offset,
1228 iov);
1232 /* Local functions. */
1233 static void
1234 ctype_class_new (struct linereader *lr, struct locale_ctype_t *ctype,
1235 const char *name)
1237 size_t cnt;
1239 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
1240 if (strcmp (ctype->classnames[cnt], name) == 0)
1241 break;
1243 if (cnt < ctype->nr_charclass)
1245 lr_error (lr, _("character class `%s' already defined"), name);
1246 return;
1249 if (ctype->nr_charclass == MAX_NR_CHARCLASS)
1250 /* Exit code 2 is prescribed in P1003.2b. */
1251 WITH_CUR_LOCALE (error (2, 0, _("\
1252 implementation limit: no more than %Zd character classes allowed"),
1253 MAX_NR_CHARCLASS));
1255 ctype->classnames[ctype->nr_charclass++] = name;
1259 static void
1260 ctype_map_new (struct linereader *lr, struct locale_ctype_t *ctype,
1261 const char *name, const struct charmap_t *charmap)
1263 size_t max_chars = 0;
1264 size_t cnt;
1266 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
1268 if (strcmp (ctype->mapnames[cnt], name) == 0)
1269 break;
1271 if (max_chars < ctype->map_collection_max[cnt])
1272 max_chars = ctype->map_collection_max[cnt];
1275 if (cnt < ctype->map_collection_nr)
1277 lr_error (lr, _("character map `%s' already defined"), name);
1278 return;
1281 if (ctype->map_collection_nr == MAX_NR_CHARMAP)
1282 /* Exit code 2 is prescribed in P1003.2b. */
1283 WITH_CUR_LOCALE (error (2, 0, _("\
1284 implementation limit: no more than %d character maps allowed"),
1285 MAX_NR_CHARMAP));
1287 ctype->mapnames[cnt] = name;
1289 if (max_chars == 0)
1290 ctype->map_collection_max[cnt] = charmap->mb_cur_max == 1 ? 256 : 512;
1291 else
1292 ctype->map_collection_max[cnt] = max_chars;
1294 ctype->map_collection[cnt] = (uint32_t *)
1295 xcalloc (sizeof (uint32_t), ctype->map_collection_max[cnt]);
1296 ctype->map_collection_act[cnt] = 256;
1298 ++ctype->map_collection_nr;
1302 /* We have to be prepared that TABLE, MAX, and ACT can be NULL. This
1303 is possible if we only want to extend the name array. */
1304 static uint32_t *
1305 find_idx (struct locale_ctype_t *ctype, uint32_t **table, size_t *max,
1306 size_t *act, uint32_t idx)
1308 size_t cnt;
1310 if (idx < 256)
1311 return table == NULL ? NULL : &(*table)[idx];
1313 /* Use the charnames_idx lookup table instead of the slow search loop. */
1314 #if 1
1315 cnt = idx_table_get (&ctype->charnames_idx, idx);
1316 if (cnt == EMPTY)
1317 /* Not found. */
1318 cnt = ctype->charnames_act;
1319 #else
1320 for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
1321 if (ctype->charnames[cnt] == idx)
1322 break;
1323 #endif
1325 /* We have to distinguish two cases: the name is found or not. */
1326 if (cnt == ctype->charnames_act)
1328 /* Extend the name array. */
1329 if (ctype->charnames_act == ctype->charnames_max)
1331 ctype->charnames_max *= 2;
1332 ctype->charnames = (uint32_t *)
1333 xrealloc (ctype->charnames,
1334 sizeof (uint32_t) * ctype->charnames_max);
1336 ctype->charnames[ctype->charnames_act++] = idx;
1337 idx_table_add (&ctype->charnames_idx, idx, cnt);
1340 if (table == NULL)
1341 /* We have done everything we are asked to do. */
1342 return NULL;
1344 if (max == NULL)
1345 /* The caller does not want to extend the table. */
1346 return (cnt >= *act ? NULL : &(*table)[cnt]);
1348 if (cnt >= *act)
1350 if (cnt >= *max)
1352 size_t old_max = *max;
1354 *max *= 2;
1355 while (*max <= cnt);
1357 *table =
1358 (uint32_t *) xrealloc (*table, *max * sizeof (uint32_t));
1359 memset (&(*table)[old_max], '\0',
1360 (*max - old_max) * sizeof (uint32_t));
1363 *act = cnt + 1;
1366 return &(*table)[cnt];
1370 static int
1371 get_character (struct token *now, const struct charmap_t *charmap,
1372 struct repertoire_t *repertoire,
1373 struct charseq **seqp, uint32_t *wchp)
1375 if (now->tok == tok_bsymbol)
1377 /* This will hopefully be the normal case. */
1378 *wchp = repertoire_find_value (repertoire, now->val.str.startmb,
1379 now->val.str.lenmb);
1380 *seqp = charmap_find_value (charmap, now->val.str.startmb,
1381 now->val.str.lenmb);
1383 else if (now->tok == tok_ucs4)
1385 char utmp[10];
1387 snprintf (utmp, sizeof (utmp), "U%08X", now->val.ucs4);
1388 *seqp = charmap_find_value (charmap, utmp, 9);
1390 if (*seqp == NULL)
1391 *seqp = repertoire_find_seq (repertoire, now->val.ucs4);
1393 if (*seqp == NULL)
1395 /* Compute the value in the charmap from the UCS value. */
1396 const char *symbol = repertoire_find_symbol (repertoire,
1397 now->val.ucs4);
1399 if (symbol == NULL)
1400 *seqp = NULL;
1401 else
1402 *seqp = charmap_find_value (charmap, symbol, strlen (symbol));
1404 if (*seqp == NULL)
1406 if (repertoire != NULL)
1408 /* Insert a negative entry. */
1409 static const struct charseq negative
1410 = { .ucs4 = ILLEGAL_CHAR_VALUE };
1411 uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1412 sizeof (uint32_t));
1413 *newp = now->val.ucs4;
1415 insert_entry (&repertoire->seq_table, newp,
1416 sizeof (uint32_t), (void *) &negative);
1419 else
1420 (*seqp)->ucs4 = now->val.ucs4;
1422 else if ((*seqp)->ucs4 != now->val.ucs4)
1423 *seqp = NULL;
1425 *wchp = now->val.ucs4;
1427 else if (now->tok == tok_charcode)
1429 /* We must map from the byte code to UCS4. */
1430 *seqp = charmap_find_symbol (charmap, now->val.str.startmb,
1431 now->val.str.lenmb);
1433 if (*seqp == NULL)
1434 *wchp = ILLEGAL_CHAR_VALUE;
1435 else
1437 if ((*seqp)->ucs4 == UNINITIALIZED_CHAR_VALUE)
1438 (*seqp)->ucs4 = repertoire_find_value (repertoire, (*seqp)->name,
1439 strlen ((*seqp)->name));
1440 *wchp = (*seqp)->ucs4;
1443 else
1444 return 1;
1446 return 0;
1450 /* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and
1451 the .(2). counterparts. */
1452 static void
1453 charclass_symbolic_ellipsis (struct linereader *ldfile,
1454 struct locale_ctype_t *ctype,
1455 const struct charmap_t *charmap,
1456 struct repertoire_t *repertoire,
1457 struct token *now,
1458 const char *last_str,
1459 unsigned long int class256_bit,
1460 unsigned long int class_bit, int base,
1461 int ignore_content, int handle_digits, int step)
1463 const char *nowstr = now->val.str.startmb;
1464 char tmp[now->val.str.lenmb + 1];
1465 const char *cp;
1466 char *endp;
1467 unsigned long int from;
1468 unsigned long int to;
1470 /* We have to compute the ellipsis values using the symbolic names. */
1471 assert (last_str != NULL);
1473 if (strlen (last_str) != now->val.str.lenmb)
1475 invalid_range:
1476 lr_error (ldfile,
1477 _("`%s' and `%.*s' are no valid names for symbolic range"),
1478 last_str, (int) now->val.str.lenmb, nowstr);
1479 return;
1482 if (memcmp (last_str, nowstr, now->val.str.lenmb) == 0)
1483 /* Nothing to do, the names are the same. */
1484 return;
1486 for (cp = last_str; *cp == *(nowstr + (cp - last_str)); ++cp)
1489 errno = 0;
1490 from = strtoul (cp, &endp, base);
1491 if ((from == UINT_MAX && errno == ERANGE) || *endp != '\0')
1492 goto invalid_range;
1494 to = strtoul (nowstr + (cp - last_str), &endp, base);
1495 if ((to == UINT_MAX && errno == ERANGE)
1496 || (endp - nowstr) != now->val.str.lenmb || from >= to)
1497 goto invalid_range;
1499 /* OK, we have a range FROM - TO. Now we can create the symbolic names. */
1500 if (!ignore_content)
1502 now->val.str.startmb = tmp;
1503 while ((from += step) <= to)
1505 struct charseq *seq;
1506 uint32_t wch;
1508 sprintf (tmp, (base == 10 ? "%.*s%0*ld" : "%.*s%0*lX"),
1509 (int) (cp - last_str), last_str,
1510 (int) (now->val.str.lenmb - (cp - last_str)),
1511 from);
1513 get_character (now, charmap, repertoire, &seq, &wch);
1515 if (seq != NULL && seq->nbytes == 1)
1516 /* Yep, we can store information about this byte sequence. */
1517 ctype->class256_collection[seq->bytes[0]] |= class256_bit;
1519 if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1520 /* We have the UCS4 position. */
1521 *find_idx (ctype, &ctype->class_collection,
1522 &ctype->class_collection_max,
1523 &ctype->class_collection_act, wch) |= class_bit;
1525 if (handle_digits == 1)
1527 /* We must store the digit values. */
1528 if (ctype->mbdigits_act == ctype->mbdigits_max)
1530 ctype->mbdigits_max *= 2;
1531 ctype->mbdigits = xrealloc (ctype->mbdigits,
1532 (ctype->mbdigits_max
1533 * sizeof (char *)));
1534 ctype->wcdigits_max *= 2;
1535 ctype->wcdigits = xrealloc (ctype->wcdigits,
1536 (ctype->wcdigits_max
1537 * sizeof (uint32_t)));
1540 ctype->mbdigits[ctype->mbdigits_act++] = seq;
1541 ctype->wcdigits[ctype->wcdigits_act++] = wch;
1543 else if (handle_digits == 2)
1545 /* We must store the digit values. */
1546 if (ctype->outdigits_act >= 10)
1548 lr_error (ldfile, _("\
1549 %s: field `%s' does not contain exactly ten entries"),
1550 "LC_CTYPE", "outdigit");
1551 return;
1554 ctype->mboutdigits[ctype->outdigits_act] = seq;
1555 ctype->wcoutdigits[ctype->outdigits_act] = wch;
1556 ++ctype->outdigits_act;
1563 /* Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'. */
1564 static void
1565 charclass_ucs4_ellipsis (struct linereader *ldfile,
1566 struct locale_ctype_t *ctype,
1567 const struct charmap_t *charmap,
1568 struct repertoire_t *repertoire,
1569 struct token *now, uint32_t last_wch,
1570 unsigned long int class256_bit,
1571 unsigned long int class_bit, int ignore_content,
1572 int handle_digits, int step)
1574 if (last_wch > now->val.ucs4)
1576 lr_error (ldfile, _("\
1577 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1578 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, now->val.ucs4,
1579 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, last_wch);
1580 return;
1583 if (!ignore_content)
1584 while ((last_wch += step) <= now->val.ucs4)
1586 /* We have to find out whether there is a byte sequence corresponding
1587 to this UCS4 value. */
1588 struct charseq *seq;
1589 char utmp[10];
1591 snprintf (utmp, sizeof (utmp), "U%08X", last_wch);
1592 seq = charmap_find_value (charmap, utmp, 9);
1593 if (seq == NULL)
1595 snprintf (utmp, sizeof (utmp), "U%04X", last_wch);
1596 seq = charmap_find_value (charmap, utmp, 5);
1599 if (seq == NULL)
1600 /* Try looking in the repertoire map. */
1601 seq = repertoire_find_seq (repertoire, last_wch);
1603 /* If this is the first time we look for this sequence create a new
1604 entry. */
1605 if (seq == NULL)
1607 static const struct charseq negative
1608 = { .ucs4 = ILLEGAL_CHAR_VALUE };
1610 /* Find the symbolic name for this UCS4 value. */
1611 if (repertoire != NULL)
1613 const char *symbol = repertoire_find_symbol (repertoire,
1614 last_wch);
1615 uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1616 sizeof (uint32_t));
1617 *newp = last_wch;
1619 if (symbol != NULL)
1620 /* We have a name, now search the multibyte value. */
1621 seq = charmap_find_value (charmap, symbol, strlen (symbol));
1623 if (seq == NULL)
1624 /* We have to create a fake entry. */
1625 seq = (struct charseq *) &negative;
1626 else
1627 seq->ucs4 = last_wch;
1629 insert_entry (&repertoire->seq_table, newp, sizeof (uint32_t),
1630 seq);
1632 else
1633 /* We have to create a fake entry. */
1634 seq = (struct charseq *) &negative;
1637 /* We have a name, now search the multibyte value. */
1638 if (seq->ucs4 == last_wch && seq->nbytes == 1)
1639 /* Yep, we can store information about this byte sequence. */
1640 ctype->class256_collection[(size_t) seq->bytes[0]]
1641 |= class256_bit;
1643 /* And of course we have the UCS4 position. */
1644 if (class_bit != 0)
1645 *find_idx (ctype, &ctype->class_collection,
1646 &ctype->class_collection_max,
1647 &ctype->class_collection_act, last_wch) |= class_bit;
1649 if (handle_digits == 1)
1651 /* We must store the digit values. */
1652 if (ctype->mbdigits_act == ctype->mbdigits_max)
1654 ctype->mbdigits_max *= 2;
1655 ctype->mbdigits = xrealloc (ctype->mbdigits,
1656 (ctype->mbdigits_max
1657 * sizeof (char *)));
1658 ctype->wcdigits_max *= 2;
1659 ctype->wcdigits = xrealloc (ctype->wcdigits,
1660 (ctype->wcdigits_max
1661 * sizeof (uint32_t)));
1664 ctype->mbdigits[ctype->mbdigits_act++] = (seq->ucs4 == last_wch
1665 ? seq : NULL);
1666 ctype->wcdigits[ctype->wcdigits_act++] = last_wch;
1668 else if (handle_digits == 2)
1670 /* We must store the digit values. */
1671 if (ctype->outdigits_act >= 10)
1673 lr_error (ldfile, _("\
1674 %s: field `%s' does not contain exactly ten entries"),
1675 "LC_CTYPE", "outdigit");
1676 return;
1679 ctype->mboutdigits[ctype->outdigits_act] = (seq->ucs4 == last_wch
1680 ? seq : NULL);
1681 ctype->wcoutdigits[ctype->outdigits_act] = last_wch;
1682 ++ctype->outdigits_act;
1688 /* Ellipsis as in `/xea/x12.../xea/x34'. */
1689 static void
1690 charclass_charcode_ellipsis (struct linereader *ldfile,
1691 struct locale_ctype_t *ctype,
1692 const struct charmap_t *charmap,
1693 struct repertoire_t *repertoire,
1694 struct token *now, char *last_charcode,
1695 uint32_t last_charcode_len,
1696 unsigned long int class256_bit,
1697 unsigned long int class_bit, int ignore_content,
1698 int handle_digits)
1700 /* First check whether the to-value is larger. */
1701 if (now->val.charcode.nbytes != last_charcode_len)
1703 lr_error (ldfile, _("\
1704 start and end character sequence of range must have the same length"));
1705 return;
1708 if (memcmp (last_charcode, now->val.charcode.bytes, last_charcode_len) > 0)
1710 lr_error (ldfile, _("\
1711 to-value character sequence is smaller than from-value sequence"));
1712 return;
1715 if (!ignore_content)
1719 /* Increment the byte sequence value. */
1720 struct charseq *seq;
1721 uint32_t wch;
1722 int i;
1724 for (i = last_charcode_len - 1; i >= 0; --i)
1725 if (++last_charcode[i] != 0)
1726 break;
1728 if (last_charcode_len == 1)
1729 /* Of course we have the charcode value. */
1730 ctype->class256_collection[(size_t) last_charcode[0]]
1731 |= class256_bit;
1733 /* Find the symbolic name. */
1734 seq = charmap_find_symbol (charmap, last_charcode,
1735 last_charcode_len);
1736 if (seq != NULL)
1738 if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1739 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1740 strlen (seq->name));
1741 wch = seq == NULL ? ILLEGAL_CHAR_VALUE : seq->ucs4;
1743 if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1744 *find_idx (ctype, &ctype->class_collection,
1745 &ctype->class_collection_max,
1746 &ctype->class_collection_act, wch) |= class_bit;
1748 else
1749 wch = ILLEGAL_CHAR_VALUE;
1751 if (handle_digits == 1)
1753 /* We must store the digit values. */
1754 if (ctype->mbdigits_act == ctype->mbdigits_max)
1756 ctype->mbdigits_max *= 2;
1757 ctype->mbdigits = xrealloc (ctype->mbdigits,
1758 (ctype->mbdigits_max
1759 * sizeof (char *)));
1760 ctype->wcdigits_max *= 2;
1761 ctype->wcdigits = xrealloc (ctype->wcdigits,
1762 (ctype->wcdigits_max
1763 * sizeof (uint32_t)));
1766 seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1767 memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1768 seq->nbytes = last_charcode_len;
1770 ctype->mbdigits[ctype->mbdigits_act++] = seq;
1771 ctype->wcdigits[ctype->wcdigits_act++] = wch;
1773 else if (handle_digits == 2)
1775 struct charseq *seq;
1776 /* We must store the digit values. */
1777 if (ctype->outdigits_act >= 10)
1779 lr_error (ldfile, _("\
1780 %s: field `%s' does not contain exactly ten entries"),
1781 "LC_CTYPE", "outdigit");
1782 return;
1785 seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1786 memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1787 seq->nbytes = last_charcode_len;
1789 ctype->mboutdigits[ctype->outdigits_act] = seq;
1790 ctype->wcoutdigits[ctype->outdigits_act] = wch;
1791 ++ctype->outdigits_act;
1794 while (memcmp (last_charcode, now->val.charcode.bytes,
1795 last_charcode_len) != 0);
1800 static uint32_t *
1801 find_translit2 (struct locale_ctype_t *ctype, const struct charmap_t *charmap,
1802 uint32_t wch)
1804 struct translit_t *trunp = ctype->translit;
1805 struct translit_ignore_t *tirunp = ctype->translit_ignore;
1807 while (trunp != NULL)
1809 /* XXX We simplify things here. The transliterations we look
1810 for are only allowed to have one character. */
1811 if (trunp->from[0] == wch && trunp->from[1] == 0)
1813 /* Found it. Now look for a transliteration which can be
1814 represented with the character set. */
1815 struct translit_to_t *torunp = trunp->to;
1817 while (torunp != NULL)
1819 int i;
1821 for (i = 0; torunp->str[i] != 0; ++i)
1823 char utmp[10];
1825 snprintf (utmp, sizeof (utmp), "U%08X", torunp->str[i]);
1826 if (charmap_find_value (charmap, utmp, 9) == NULL)
1827 /* This character cannot be represented. */
1828 break;
1831 if (torunp->str[i] == 0)
1832 return torunp->str;
1834 torunp = torunp->next;
1837 break;
1840 trunp = trunp->next;
1843 /* Check for ignored chars. */
1844 while (tirunp != NULL)
1846 if (tirunp->from <= wch && tirunp->to >= wch)
1848 uint32_t wi;
1850 for (wi = tirunp->from; wi <= wch; wi += tirunp->step)
1851 if (wi == wch)
1852 return (uint32_t []) { 0 };
1856 /* Nothing found. */
1857 return NULL;
1861 uint32_t *
1862 find_translit (struct localedef_t *locale, const struct charmap_t *charmap,
1863 uint32_t wch)
1865 struct locale_ctype_t *ctype;
1866 uint32_t *result = NULL;
1868 assert (locale != NULL);
1869 ctype = locale->categories[LC_CTYPE].ctype;
1871 if (ctype->translit != NULL)
1872 result = find_translit2 (ctype, charmap, wch);
1874 if (result == NULL)
1876 struct translit_include_t *irunp = ctype->translit_include;
1878 while (irunp != NULL && result == NULL)
1880 result = find_translit (find_locale (CTYPE_LOCALE,
1881 irunp->copy_locale,
1882 irunp->copy_repertoire,
1883 charmap),
1884 charmap, wch);
1885 irunp = irunp->next;
1889 return result;
1893 /* Read one transliteration entry. */
1894 static uint32_t *
1895 read_widestring (struct linereader *ldfile, struct token *now,
1896 const struct charmap_t *charmap,
1897 struct repertoire_t *repertoire)
1899 uint32_t *wstr;
1901 if (now->tok == tok_default_missing)
1902 /* The special name "" will denote this case. */
1903 wstr = ((uint32_t *) { 0 });
1904 else if (now->tok == tok_bsymbol)
1906 /* Get the value from the repertoire. */
1907 wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1908 wstr[0] = repertoire_find_value (repertoire, now->val.str.startmb,
1909 now->val.str.lenmb);
1910 if (wstr[0] == ILLEGAL_CHAR_VALUE)
1912 /* We cannot proceed, we don't know the UCS4 value. */
1913 free (wstr);
1914 return NULL;
1917 wstr[1] = 0;
1919 else if (now->tok == tok_ucs4)
1921 wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1922 wstr[0] = now->val.ucs4;
1923 wstr[1] = 0;
1925 else if (now->tok == tok_charcode)
1927 /* Argh, we have to convert to the symbol name first and then to the
1928 UCS4 value. */
1929 struct charseq *seq = charmap_find_symbol (charmap,
1930 now->val.str.startmb,
1931 now->val.str.lenmb);
1932 if (seq == NULL)
1933 /* Cannot find the UCS4 value. */
1934 return NULL;
1936 if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1937 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1938 strlen (seq->name));
1939 if (seq->ucs4 == ILLEGAL_CHAR_VALUE)
1940 /* We cannot proceed, we don't know the UCS4 value. */
1941 return NULL;
1943 wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1944 wstr[0] = seq->ucs4;
1945 wstr[1] = 0;
1947 else if (now->tok == tok_string)
1949 wstr = now->val.str.startwc;
1950 if (wstr == NULL || wstr[0] == 0)
1951 return NULL;
1953 else
1955 if (now->tok != tok_eol && now->tok != tok_eof)
1956 lr_ignore_rest (ldfile, 0);
1957 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
1958 return (uint32_t *) -1l;
1961 return wstr;
1965 static void
1966 read_translit_entry (struct linereader *ldfile, struct locale_ctype_t *ctype,
1967 struct token *now, const struct charmap_t *charmap,
1968 struct repertoire_t *repertoire)
1970 uint32_t *from_wstr = read_widestring (ldfile, now, charmap, repertoire);
1971 struct translit_t *result;
1972 struct translit_to_t **top;
1973 struct obstack *ob = &ctype->mempool;
1974 int first;
1975 int ignore;
1977 if (from_wstr == NULL)
1978 /* There is no valid from string. */
1979 return;
1981 result = (struct translit_t *) obstack_alloc (ob,
1982 sizeof (struct translit_t));
1983 result->from = from_wstr;
1984 result->fname = ldfile->fname;
1985 result->lineno = ldfile->lineno;
1986 result->next = NULL;
1987 result->to = NULL;
1988 top = &result->to;
1989 first = 1;
1990 ignore = 0;
1992 while (1)
1994 uint32_t *to_wstr;
1996 /* Next we have one or more transliterations. They are
1997 separated by semicolons. */
1998 now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2000 if (!first && (now->tok == tok_semicolon || now->tok == tok_eol))
2002 /* One string read. */
2003 const uint32_t zero = 0;
2005 if (!ignore)
2007 obstack_grow (ob, &zero, 4);
2008 to_wstr = obstack_finish (ob);
2010 *top = obstack_alloc (ob, sizeof (struct translit_to_t));
2011 (*top)->str = to_wstr;
2012 (*top)->next = NULL;
2015 if (now->tok == tok_eol)
2017 result->next = ctype->translit;
2018 ctype->translit = result;
2019 return;
2022 if (!ignore)
2023 top = &(*top)->next;
2024 ignore = 0;
2026 else
2028 to_wstr = read_widestring (ldfile, now, charmap, repertoire);
2029 if (to_wstr == (uint32_t *) -1l)
2031 /* An error occurred. */
2032 obstack_free (ob, result);
2033 return;
2036 if (to_wstr == NULL)
2037 ignore = 1;
2038 else
2039 /* This value is usable. */
2040 obstack_grow (ob, to_wstr, wcslen ((wchar_t *) to_wstr) * 4);
2042 first = 0;
2048 static void
2049 read_translit_ignore_entry (struct linereader *ldfile,
2050 struct locale_ctype_t *ctype,
2051 const struct charmap_t *charmap,
2052 struct repertoire_t *repertoire)
2054 /* We expect a semicolon-separated list of characters we ignore. We are
2055 only interested in the wide character definitions. These must be
2056 single characters, possibly defining a range when an ellipsis is used. */
2057 while (1)
2059 struct token *now = lr_token (ldfile, charmap, NULL, repertoire,
2060 verbose);
2061 struct translit_ignore_t *newp;
2062 uint32_t from;
2064 if (now->tok == tok_eol || now->tok == tok_eof)
2066 lr_error (ldfile,
2067 _("premature end of `translit_ignore' definition"));
2068 return;
2071 if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
2073 lr_error (ldfile, _("syntax error"));
2074 lr_ignore_rest (ldfile, 0);
2075 return;
2078 if (now->tok == tok_ucs4)
2079 from = now->val.ucs4;
2080 else
2081 /* Try to get the value. */
2082 from = repertoire_find_value (repertoire, now->val.str.startmb,
2083 now->val.str.lenmb);
2085 if (from == ILLEGAL_CHAR_VALUE)
2087 lr_error (ldfile, "invalid character name");
2088 newp = NULL;
2090 else
2092 newp = (struct translit_ignore_t *)
2093 obstack_alloc (&ctype->mempool, sizeof (struct translit_ignore_t));
2094 newp->from = from;
2095 newp->to = from;
2096 newp->step = 1;
2098 newp->next = ctype->translit_ignore;
2099 ctype->translit_ignore = newp;
2102 /* Now we expect either a semicolon, an ellipsis, or the end of the
2103 line. */
2104 now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2106 if (now->tok == tok_ellipsis2 || now->tok == tok_ellipsis2_2)
2108 /* XXX Should we bother implementing `....'? `...' certainly
2109 will not be implemented. */
2110 uint32_t to;
2111 int step = now->tok == tok_ellipsis2_2 ? 2 : 1;
2113 now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2115 if (now->tok == tok_eol || now->tok == tok_eof)
2117 lr_error (ldfile,
2118 _("premature end of `translit_ignore' definition"));
2119 return;
2122 if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
2124 lr_error (ldfile, _("syntax error"));
2125 lr_ignore_rest (ldfile, 0);
2126 return;
2129 if (now->tok == tok_ucs4)
2130 to = now->val.ucs4;
2131 else
2132 /* Try to get the value. */
2133 to = repertoire_find_value (repertoire, now->val.str.startmb,
2134 now->val.str.lenmb);
2136 if (to == ILLEGAL_CHAR_VALUE)
2137 lr_error (ldfile, "invalid character name");
2138 else
2140 /* Make sure the `to'-value is larger. */
2141 if (to >= from)
2143 newp->to = to;
2144 newp->step = step;
2146 else
2147 lr_error (ldfile, _("\
2148 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
2149 (to | from) < 65536 ? 4 : 8, to,
2150 (to | from) < 65536 ? 4 : 8, from);
2153 /* And the next token. */
2154 now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2157 if (now->tok == tok_eol || now->tok == tok_eof)
2158 /* We are done. */
2159 return;
2161 if (now->tok == tok_semicolon)
2162 /* Next round. */
2163 continue;
2165 /* If we come here something is wrong. */
2166 lr_error (ldfile, _("syntax error"));
2167 lr_ignore_rest (ldfile, 0);
2168 return;
2173 /* The parser for the LC_CTYPE section of the locale definition. */
2174 void
2175 ctype_read (struct linereader *ldfile, struct localedef_t *result,
2176 const struct charmap_t *charmap, const char *repertoire_name,
2177 int ignore_content)
2179 struct repertoire_t *repertoire = NULL;
2180 struct locale_ctype_t *ctype;
2181 struct token *now;
2182 enum token_t nowtok;
2183 size_t cnt;
2184 struct charseq *last_seq;
2185 uint32_t last_wch = 0;
2186 enum token_t last_token;
2187 enum token_t ellipsis_token;
2188 int step;
2189 char last_charcode[16];
2190 size_t last_charcode_len = 0;
2191 const char *last_str = NULL;
2192 int mapidx;
2193 struct localedef_t *copy_locale = NULL;
2195 /* Get the repertoire we have to use. */
2196 if (repertoire_name != NULL)
2197 repertoire = repertoire_read (repertoire_name);
2199 /* The rest of the line containing `LC_CTYPE' must be free. */
2200 lr_ignore_rest (ldfile, 1);
2205 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2206 nowtok = now->tok;
2208 while (nowtok == tok_eol);
2210 /* If we see `copy' now we are almost done. */
2211 if (nowtok == tok_copy)
2213 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2214 if (now->tok != tok_string)
2216 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2218 skip_category:
2220 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2221 while (now->tok != tok_eof && now->tok != tok_end);
2223 if (now->tok != tok_eof
2224 || (now = lr_token (ldfile, charmap, NULL, NULL, verbose),
2225 now->tok == tok_eof))
2226 lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
2227 else if (now->tok != tok_lc_ctype)
2229 lr_error (ldfile, _("\
2230 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2231 lr_ignore_rest (ldfile, 0);
2233 else
2234 lr_ignore_rest (ldfile, 1);
2236 return;
2239 if (! ignore_content)
2241 /* Get the locale definition. */
2242 copy_locale = load_locale (LC_CTYPE, now->val.str.startmb,
2243 repertoire_name, charmap, NULL);
2244 if ((copy_locale->avail & CTYPE_LOCALE) == 0)
2246 /* Not yet loaded. So do it now. */
2247 if (locfile_read (copy_locale, charmap) != 0)
2248 goto skip_category;
2252 lr_ignore_rest (ldfile, 1);
2254 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2255 nowtok = now->tok;
2258 /* Prepare the data structures. */
2259 ctype_startup (ldfile, result, charmap, copy_locale, ignore_content);
2260 ctype = result->categories[LC_CTYPE].ctype;
2262 /* Remember the repertoire we use. */
2263 if (!ignore_content)
2264 ctype->repertoire = repertoire;
2266 while (1)
2268 unsigned long int class_bit = 0;
2269 unsigned long int class256_bit = 0;
2270 int handle_digits = 0;
2272 /* Of course we don't proceed beyond the end of file. */
2273 if (nowtok == tok_eof)
2274 break;
2276 /* Ingore empty lines. */
2277 if (nowtok == tok_eol)
2279 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2280 nowtok = now->tok;
2281 continue;
2284 switch (nowtok)
2286 case tok_charclass:
2287 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2288 while (now->tok == tok_ident || now->tok == tok_string)
2290 ctype_class_new (ldfile, ctype, now->val.str.startmb);
2291 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2292 if (now->tok != tok_semicolon)
2293 break;
2294 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2296 if (now->tok != tok_eol)
2297 SYNTAX_ERROR (_("\
2298 %s: syntax error in definition of new character class"), "LC_CTYPE");
2299 break;
2301 case tok_charconv:
2302 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2303 while (now->tok == tok_ident || now->tok == tok_string)
2305 ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2306 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2307 if (now->tok != tok_semicolon)
2308 break;
2309 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2311 if (now->tok != tok_eol)
2312 SYNTAX_ERROR (_("\
2313 %s: syntax error in definition of new character map"), "LC_CTYPE");
2314 break;
2316 case tok_class:
2317 /* Ignore the rest of the line if we don't need the input of
2318 this line. */
2319 if (ignore_content)
2321 lr_ignore_rest (ldfile, 0);
2322 break;
2325 /* We simply forget the `class' keyword and use the following
2326 operand to determine the bit. */
2327 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2328 if (now->tok == tok_ident || now->tok == tok_string)
2330 /* Must can be one of the predefined class names. */
2331 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2332 if (strcmp (ctype->classnames[cnt], now->val.str.startmb) == 0)
2333 break;
2334 if (cnt >= ctype->nr_charclass)
2336 #ifdef PREDEFINED_CLASSES
2337 if (now->val.str.lenmb == 8
2338 && memcmp ("special1", now->val.str.startmb, 8) == 0)
2339 class_bit = _ISwspecial1;
2340 else if (now->val.str.lenmb == 8
2341 && memcmp ("special2", now->val.str.startmb, 8) == 0)
2342 class_bit = _ISwspecial2;
2343 else if (now->val.str.lenmb == 8
2344 && memcmp ("special3", now->val.str.startmb, 8) == 0)
2345 class_bit = _ISwspecial3;
2346 else
2347 #endif
2349 /* OK, it's a new class. */
2350 ctype_class_new (ldfile, ctype, now->val.str.startmb);
2352 class_bit = _ISwbit (ctype->nr_charclass - 1);
2355 else
2357 class_bit = _ISwbit (cnt);
2359 free (now->val.str.startmb);
2362 else if (now->tok == tok_digit)
2363 goto handle_tok_digit;
2364 else if (now->tok < tok_upper || now->tok > tok_blank)
2365 goto err_label;
2366 else
2368 class_bit = BITw (now->tok);
2369 class256_bit = BIT (now->tok);
2372 /* The next character must be a semicolon. */
2373 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2374 if (now->tok != tok_semicolon)
2375 goto err_label;
2376 goto read_charclass;
2378 case tok_upper:
2379 case tok_lower:
2380 case tok_alpha:
2381 case tok_alnum:
2382 case tok_space:
2383 case tok_cntrl:
2384 case tok_punct:
2385 case tok_graph:
2386 case tok_print:
2387 case tok_xdigit:
2388 case tok_blank:
2389 /* Ignore the rest of the line if we don't need the input of
2390 this line. */
2391 if (ignore_content)
2393 lr_ignore_rest (ldfile, 0);
2394 break;
2397 class_bit = BITw (now->tok);
2398 class256_bit = BIT (now->tok);
2399 handle_digits = 0;
2400 read_charclass:
2401 ctype->class_done |= class_bit;
2402 last_token = tok_none;
2403 ellipsis_token = tok_none;
2404 step = 1;
2405 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2406 while (now->tok != tok_eol && now->tok != tok_eof)
2408 uint32_t wch;
2409 struct charseq *seq;
2411 if (ellipsis_token == tok_none)
2413 if (get_character (now, charmap, repertoire, &seq, &wch))
2414 goto err_label;
2416 if (!ignore_content && seq != NULL && seq->nbytes == 1)
2417 /* Yep, we can store information about this byte
2418 sequence. */
2419 ctype->class256_collection[seq->bytes[0]] |= class256_bit;
2421 if (!ignore_content && wch != ILLEGAL_CHAR_VALUE
2422 && class_bit != 0)
2423 /* We have the UCS4 position. */
2424 *find_idx (ctype, &ctype->class_collection,
2425 &ctype->class_collection_max,
2426 &ctype->class_collection_act, wch) |= class_bit;
2428 last_token = now->tok;
2429 /* Terminate the string. */
2430 if (last_token == tok_bsymbol)
2432 now->val.str.startmb[now->val.str.lenmb] = '\0';
2433 last_str = now->val.str.startmb;
2435 else
2436 last_str = NULL;
2437 last_seq = seq;
2438 last_wch = wch;
2439 memcpy (last_charcode, now->val.charcode.bytes, 16);
2440 last_charcode_len = now->val.charcode.nbytes;
2442 if (!ignore_content && handle_digits == 1)
2444 /* We must store the digit values. */
2445 if (ctype->mbdigits_act == ctype->mbdigits_max)
2447 ctype->mbdigits_max += 10;
2448 ctype->mbdigits = xrealloc (ctype->mbdigits,
2449 (ctype->mbdigits_max
2450 * sizeof (char *)));
2451 ctype->wcdigits_max += 10;
2452 ctype->wcdigits = xrealloc (ctype->wcdigits,
2453 (ctype->wcdigits_max
2454 * sizeof (uint32_t)));
2457 ctype->mbdigits[ctype->mbdigits_act++] = seq;
2458 ctype->wcdigits[ctype->wcdigits_act++] = wch;
2460 else if (!ignore_content && handle_digits == 2)
2462 /* We must store the digit values. */
2463 if (ctype->outdigits_act >= 10)
2465 lr_error (ldfile, _("\
2466 %s: field `%s' does not contain exactly ten entries"),
2467 "LC_CTYPE", "outdigit");
2468 lr_ignore_rest (ldfile, 0);
2469 break;
2472 ctype->mboutdigits[ctype->outdigits_act] = seq;
2473 ctype->wcoutdigits[ctype->outdigits_act] = wch;
2474 ++ctype->outdigits_act;
2477 else
2479 /* Now it gets complicated. We have to resolve the
2480 ellipsis problem. First we must distinguish between
2481 the different kind of ellipsis and this must match the
2482 tokens we have seen. */
2483 assert (last_token != tok_none);
2485 if (last_token != now->tok)
2487 lr_error (ldfile, _("\
2488 ellipsis range must be marked by two operands of same type"));
2489 lr_ignore_rest (ldfile, 0);
2490 break;
2493 if (last_token == tok_bsymbol)
2495 if (ellipsis_token == tok_ellipsis3)
2496 lr_error (ldfile, _("with symbolic name range values \
2497 the absolute ellipsis `...' must not be used"));
2499 charclass_symbolic_ellipsis (ldfile, ctype, charmap,
2500 repertoire, now, last_str,
2501 class256_bit, class_bit,
2502 (ellipsis_token
2503 == tok_ellipsis4
2504 ? 10 : 16),
2505 ignore_content,
2506 handle_digits, step);
2508 else if (last_token == tok_ucs4)
2510 if (ellipsis_token != tok_ellipsis2)
2511 lr_error (ldfile, _("\
2512 with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
2514 charclass_ucs4_ellipsis (ldfile, ctype, charmap,
2515 repertoire, now, last_wch,
2516 class256_bit, class_bit,
2517 ignore_content, handle_digits,
2518 step);
2520 else
2522 assert (last_token == tok_charcode);
2524 if (ellipsis_token != tok_ellipsis3)
2525 lr_error (ldfile, _("\
2526 with character code range values one must use the absolute ellipsis `...'"));
2528 charclass_charcode_ellipsis (ldfile, ctype, charmap,
2529 repertoire, now,
2530 last_charcode,
2531 last_charcode_len,
2532 class256_bit, class_bit,
2533 ignore_content,
2534 handle_digits);
2537 /* Now we have used the last value. */
2538 last_token = tok_none;
2541 /* Next we expect a semicolon or the end of the line. */
2542 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2543 if (now->tok == tok_eol || now->tok == tok_eof)
2544 break;
2546 if (last_token != tok_none
2547 && now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4_2)
2549 if (now->tok == tok_ellipsis2_2)
2551 now->tok = tok_ellipsis2;
2552 step = 2;
2554 else if (now->tok == tok_ellipsis4_2)
2556 now->tok = tok_ellipsis4;
2557 step = 2;
2560 ellipsis_token = now->tok;
2562 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2563 continue;
2566 if (now->tok != tok_semicolon)
2567 goto err_label;
2569 /* And get the next character. */
2570 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2572 ellipsis_token = tok_none;
2573 step = 1;
2575 break;
2577 case tok_digit:
2578 /* Ignore the rest of the line if we don't need the input of
2579 this line. */
2580 if (ignore_content)
2582 lr_ignore_rest (ldfile, 0);
2583 break;
2586 handle_tok_digit:
2587 class_bit = _ISwdigit;
2588 class256_bit = _ISdigit;
2589 handle_digits = 1;
2590 goto read_charclass;
2592 case tok_outdigit:
2593 /* Ignore the rest of the line if we don't need the input of
2594 this line. */
2595 if (ignore_content)
2597 lr_ignore_rest (ldfile, 0);
2598 break;
2601 if (ctype->outdigits_act != 0)
2602 lr_error (ldfile, _("\
2603 %s: field `%s' declared more than once"),
2604 "LC_CTYPE", "outdigit");
2605 class_bit = 0;
2606 class256_bit = 0;
2607 handle_digits = 2;
2608 goto read_charclass;
2610 case tok_toupper:
2611 /* Ignore the rest of the line if we don't need the input of
2612 this line. */
2613 if (ignore_content)
2615 lr_ignore_rest (ldfile, 0);
2616 break;
2619 mapidx = 0;
2620 goto read_mapping;
2622 case tok_tolower:
2623 /* Ignore the rest of the line if we don't need the input of
2624 this line. */
2625 if (ignore_content)
2627 lr_ignore_rest (ldfile, 0);
2628 break;
2631 mapidx = 1;
2632 goto read_mapping;
2634 case tok_map:
2635 /* Ignore the rest of the line if we don't need the input of
2636 this line. */
2637 if (ignore_content)
2639 lr_ignore_rest (ldfile, 0);
2640 break;
2643 /* We simply forget the `map' keyword and use the following
2644 operand to determine the mapping. */
2645 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2646 if (now->tok == tok_ident || now->tok == tok_string)
2648 size_t cnt;
2650 for (cnt = 2; cnt < ctype->map_collection_nr; ++cnt)
2651 if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2652 break;
2654 if (cnt < ctype->map_collection_nr)
2655 free (now->val.str.startmb);
2656 else
2657 /* OK, it's a new map. */
2658 ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2660 mapidx = cnt;
2662 else if (now->tok < tok_toupper || now->tok > tok_tolower)
2663 goto err_label;
2664 else
2665 mapidx = now->tok - tok_toupper;
2667 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2668 /* This better should be a semicolon. */
2669 if (now->tok != tok_semicolon)
2670 goto err_label;
2672 read_mapping:
2673 /* Test whether this mapping was already defined. */
2674 if (ctype->tomap_done[mapidx])
2676 lr_error (ldfile, _("duplicated definition for mapping `%s'"),
2677 ctype->mapnames[mapidx]);
2678 lr_ignore_rest (ldfile, 0);
2679 break;
2681 ctype->tomap_done[mapidx] = 1;
2683 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2684 while (now->tok != tok_eol && now->tok != tok_eof)
2686 struct charseq *from_seq;
2687 uint32_t from_wch;
2688 struct charseq *to_seq;
2689 uint32_t to_wch;
2691 /* Every pair starts with an opening brace. */
2692 if (now->tok != tok_open_brace)
2693 goto err_label;
2695 /* Next comes the from-value. */
2696 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2697 if (get_character (now, charmap, repertoire, &from_seq,
2698 &from_wch) != 0)
2699 goto err_label;
2701 /* The next is a comma. */
2702 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2703 if (now->tok != tok_comma)
2704 goto err_label;
2706 /* And the other value. */
2707 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2708 if (get_character (now, charmap, repertoire, &to_seq,
2709 &to_wch) != 0)
2710 goto err_label;
2712 /* And the last thing is the closing brace. */
2713 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2714 if (now->tok != tok_close_brace)
2715 goto err_label;
2717 if (!ignore_content)
2719 /* Check whether the mapping converts from an ASCII value
2720 to a non-ASCII value. */
2721 if (from_seq != NULL && from_seq->nbytes == 1
2722 && isascii (from_seq->bytes[0])
2723 && to_seq != NULL && (to_seq->nbytes != 1
2724 || !isascii (to_seq->bytes[0])))
2725 ctype->to_nonascii = 1;
2727 if (mapidx < 2 && from_seq != NULL && to_seq != NULL
2728 && from_seq->nbytes == 1 && to_seq->nbytes == 1)
2729 /* We can use this value. */
2730 ctype->map256_collection[mapidx][from_seq->bytes[0]]
2731 = to_seq->bytes[0];
2733 if (from_wch != ILLEGAL_CHAR_VALUE
2734 && to_wch != ILLEGAL_CHAR_VALUE)
2735 /* Both correct values. */
2736 *find_idx (ctype, &ctype->map_collection[mapidx],
2737 &ctype->map_collection_max[mapidx],
2738 &ctype->map_collection_act[mapidx],
2739 from_wch) = to_wch;
2742 /* Now comes a semicolon or the end of the line/file. */
2743 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2744 if (now->tok == tok_semicolon)
2745 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2747 break;
2749 case tok_translit_start:
2750 /* Ignore the entire translit section with its peculiar syntax
2751 if we don't need the input. */
2752 if (ignore_content)
2756 lr_ignore_rest (ldfile, 0);
2757 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2759 while (now->tok != tok_translit_end && now->tok != tok_eof);
2761 if (now->tok == tok_eof)
2762 lr_error (ldfile, _(\
2763 "%s: `translit_start' section does not end with `translit_end'"),
2764 "LC_CTYPE");
2766 break;
2769 /* The rest of the line better should be empty. */
2770 lr_ignore_rest (ldfile, 1);
2772 /* We count here the number of allocated entries in the `translit'
2773 array. */
2774 cnt = 0;
2776 ldfile->translate_strings = 1;
2777 ldfile->return_widestr = 1;
2779 /* We proceed until we see the `translit_end' token. */
2780 while (now = lr_token (ldfile, charmap, NULL, repertoire, verbose),
2781 now->tok != tok_translit_end && now->tok != tok_eof)
2783 if (now->tok == tok_eol)
2784 /* Ignore empty lines. */
2785 continue;
2787 if (now->tok == tok_include)
2789 /* We have to include locale. */
2790 const char *locale_name;
2791 const char *repertoire_name;
2792 struct translit_include_t *include_stmt, **include_ptr;
2794 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2795 /* This should be a string or an identifier. In any
2796 case something to name a locale. */
2797 if (now->tok != tok_string && now->tok != tok_ident)
2799 translit_syntax:
2800 lr_error (ldfile, _("%s: syntax error"), "LC_CTYPE");
2801 lr_ignore_rest (ldfile, 0);
2802 continue;
2804 locale_name = now->val.str.startmb;
2806 /* Next should be a semicolon. */
2807 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2808 if (now->tok != tok_semicolon)
2809 goto translit_syntax;
2811 /* Now the repertoire name. */
2812 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2813 if ((now->tok != tok_string && now->tok != tok_ident)
2814 || now->val.str.startmb == NULL)
2815 goto translit_syntax;
2816 repertoire_name = now->val.str.startmb;
2817 if (repertoire_name[0] == '\0')
2818 /* Ignore the empty string. */
2819 repertoire_name = NULL;
2821 /* Save the include statement for later processing. */
2822 include_stmt = (struct translit_include_t *)
2823 xmalloc (sizeof (struct translit_include_t));
2824 include_stmt->copy_locale = locale_name;
2825 include_stmt->copy_repertoire = repertoire_name;
2826 include_stmt->next = NULL;
2828 include_ptr = &ctype->translit_include;
2829 while (*include_ptr != NULL)
2830 include_ptr = &(*include_ptr)->next;
2831 *include_ptr = include_stmt;
2833 /* The rest of the line must be empty. */
2834 lr_ignore_rest (ldfile, 1);
2836 /* Make sure the locale is read. */
2837 add_to_readlist (LC_CTYPE, locale_name, repertoire_name,
2838 1, NULL);
2839 continue;
2841 else if (now->tok == tok_default_missing)
2843 uint32_t *wstr;
2845 while (1)
2847 /* We expect a single character or string as the
2848 argument. */
2849 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2850 wstr = read_widestring (ldfile, now, charmap,
2851 repertoire);
2853 if (wstr != NULL)
2855 if (ctype->default_missing != NULL)
2857 lr_error (ldfile, _("\
2858 %s: duplicate `default_missing' definition"), "LC_CTYPE");
2859 WITH_CUR_LOCALE (error_at_line (0, 0,
2860 ctype->default_missing_file,
2861 ctype->default_missing_lineno,
2862 _("\
2863 previous definition was here")));
2865 else
2867 ctype->default_missing = wstr;
2868 ctype->default_missing_file = ldfile->fname;
2869 ctype->default_missing_lineno = ldfile->lineno;
2871 /* We can have more entries, ignore them. */
2872 lr_ignore_rest (ldfile, 0);
2873 break;
2875 else if (wstr == (uint32_t *) -1l)
2876 /* This was an syntax error. */
2877 break;
2879 /* Maybe there is another replacement we can use. */
2880 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2881 if (now->tok == tok_eol || now->tok == tok_eof)
2883 /* Nothing found. We tell the user. */
2884 lr_error (ldfile, _("\
2885 %s: no representable `default_missing' definition found"), "LC_CTYPE");
2886 break;
2888 if (now->tok != tok_semicolon)
2889 goto translit_syntax;
2892 continue;
2894 else if (now->tok == tok_translit_ignore)
2896 read_translit_ignore_entry (ldfile, ctype, charmap,
2897 repertoire);
2898 continue;
2901 read_translit_entry (ldfile, ctype, now, charmap, repertoire);
2903 ldfile->return_widestr = 0;
2905 if (now->tok == tok_eof)
2906 lr_error (ldfile, _(\
2907 "%s: `translit_start' section does not end with `translit_end'"),
2908 "LC_CTYPE");
2910 break;
2912 case tok_ident:
2913 /* Ignore the rest of the line if we don't need the input of
2914 this line. */
2915 if (ignore_content)
2917 lr_ignore_rest (ldfile, 0);
2918 break;
2921 /* This could mean one of several things. First test whether
2922 it's a character class name. */
2923 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2924 if (strcmp (now->val.str.startmb, ctype->classnames[cnt]) == 0)
2925 break;
2926 if (cnt < ctype->nr_charclass)
2928 class_bit = _ISwbit (cnt);
2929 class256_bit = cnt <= 11 ? _ISbit (cnt) : 0;
2930 free (now->val.str.startmb);
2931 goto read_charclass;
2933 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
2934 if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2935 break;
2936 if (cnt < ctype->map_collection_nr)
2938 mapidx = cnt;
2939 free (now->val.str.startmb);
2940 goto read_mapping;
2942 #ifdef PREDEFINED_CLASSES
2943 if (strcmp (now->val.str.startmb, "special1") == 0)
2945 class_bit = _ISwspecial1;
2946 free (now->val.str.startmb);
2947 goto read_charclass;
2949 if (strcmp (now->val.str.startmb, "special2") == 0)
2951 class_bit = _ISwspecial2;
2952 free (now->val.str.startmb);
2953 goto read_charclass;
2955 if (strcmp (now->val.str.startmb, "special3") == 0)
2957 class_bit = _ISwspecial3;
2958 free (now->val.str.startmb);
2959 goto read_charclass;
2961 if (strcmp (now->val.str.startmb, "tosymmetric") == 0)
2963 mapidx = 2;
2964 goto read_mapping;
2966 #endif
2967 break;
2969 case tok_end:
2970 /* Next we assume `LC_CTYPE'. */
2971 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2972 if (now->tok == tok_eof)
2973 break;
2974 if (now->tok == tok_eol)
2975 lr_error (ldfile, _("%s: incomplete `END' line"),
2976 "LC_CTYPE");
2977 else if (now->tok != tok_lc_ctype)
2978 lr_error (ldfile, _("\
2979 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2980 lr_ignore_rest (ldfile, now->tok == tok_lc_ctype);
2981 return;
2983 default:
2984 err_label:
2985 if (now->tok != tok_eof)
2986 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2989 /* Prepare for the next round. */
2990 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2991 nowtok = now->tok;
2994 /* When we come here we reached the end of the file. */
2995 lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
2999 static void
3000 set_class_defaults (struct locale_ctype_t *ctype,
3001 const struct charmap_t *charmap,
3002 struct repertoire_t *repertoire)
3004 size_t cnt;
3006 /* These function defines the default values for the classes and conversions
3007 according to POSIX.2 2.5.2.1.
3008 It may seem that the order of these if-blocks is arbitrary but it is NOT.
3009 Don't move them unless you know what you do! */
3011 auto void set_default (int bitpos, int from, int to);
3013 void set_default (int bitpos, int from, int to)
3015 char tmp[2];
3016 int ch;
3017 int bit = _ISbit (bitpos);
3018 int bitw = _ISwbit (bitpos);
3019 /* Define string. */
3020 strcpy (tmp, "?");
3022 for (ch = from; ch <= to; ++ch)
3024 struct charseq *seq;
3025 tmp[0] = ch;
3027 seq = charmap_find_value (charmap, tmp, 1);
3028 if (seq == NULL)
3030 char buf[10];
3031 sprintf (buf, "U%08X", ch);
3032 seq = charmap_find_value (charmap, buf, 9);
3034 if (seq == NULL)
3036 if (!be_quiet)
3037 WITH_CUR_LOCALE (error (0, 0, _("\
3038 %s: character `%s' not defined in charmap while needed as default value"),
3039 "LC_CTYPE", tmp));
3041 else if (seq->nbytes != 1)
3042 WITH_CUR_LOCALE (error (0, 0, _("\
3043 %s: character `%s' in charmap not representable with one byte"),
3044 "LC_CTYPE", tmp));
3045 else
3046 ctype->class256_collection[seq->bytes[0]] |= bit;
3048 /* No need to search here, the ASCII value is also the Unicode
3049 value. */
3050 ELEM (ctype, class_collection, , ch) |= bitw;
3054 /* Set default values if keyword was not present. */
3055 if ((ctype->class_done & BITw (tok_upper)) == 0)
3056 /* "If this keyword [lower] is not specified, the lowercase letters
3057 `A' through `Z', ..., shall automatically belong to this class,
3058 with implementation defined character values." [P1003.2, 2.5.2.1] */
3059 set_default (BITPOS (tok_upper), 'A', 'Z');
3061 if ((ctype->class_done & BITw (tok_lower)) == 0)
3062 /* "If this keyword [lower] is not specified, the lowercase letters
3063 `a' through `z', ..., shall automatically belong to this class,
3064 with implementation defined character values." [P1003.2, 2.5.2.1] */
3065 set_default (BITPOS (tok_lower), 'a', 'z');
3067 if ((ctype->class_done & BITw (tok_alpha)) == 0)
3069 /* Table 2-6 in P1003.2 says that characters in class `upper' or
3070 class `lower' *must* be in class `alpha'. */
3071 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
3072 unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower);
3074 for (cnt = 0; cnt < 256; ++cnt)
3075 if ((ctype->class256_collection[cnt] & mask) != 0)
3076 ctype->class256_collection[cnt] |= BIT (tok_alpha);
3078 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3079 if ((ctype->class_collection[cnt] & maskw) != 0)
3080 ctype->class_collection[cnt] |= BITw (tok_alpha);
3083 if ((ctype->class_done & BITw (tok_digit)) == 0)
3084 /* "If this keyword [digit] is not specified, the digits `0' through
3085 `9', ..., shall automatically belong to this class, with
3086 implementation-defined character values." [P1003.2, 2.5.2.1] */
3087 set_default (BITPOS (tok_digit), '0', '9');
3089 /* "Only characters specified for the `alpha' and `digit' keyword
3090 shall be specified. Characters specified for the keyword `alpha'
3091 and `digit' are automatically included in this class. */
3093 unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
3094 unsigned long int maskw = BITw (tok_alpha) | BITw (tok_digit);
3096 for (cnt = 0; cnt < 256; ++cnt)
3097 if ((ctype->class256_collection[cnt] & mask) != 0)
3098 ctype->class256_collection[cnt] |= BIT (tok_alnum);
3100 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3101 if ((ctype->class_collection[cnt] & maskw) != 0)
3102 ctype->class_collection[cnt] |= BITw (tok_alnum);
3105 if ((ctype->class_done & BITw (tok_space)) == 0)
3106 /* "If this keyword [space] is not specified, the characters <space>,
3107 <form-feed>, <newline>, <carriage-return>, <tab>, and
3108 <vertical-tab>, ..., shall automatically belong to this class,
3109 with implementation-defined character values." [P1003.2, 2.5.2.1] */
3111 struct charseq *seq;
3113 seq = charmap_find_value (charmap, "space", 5);
3114 if (seq == NULL)
3115 seq = charmap_find_value (charmap, "SP", 2);
3116 if (seq == NULL)
3117 seq = charmap_find_value (charmap, "U00000020", 9);
3118 if (seq == NULL)
3120 if (!be_quiet)
3121 WITH_CUR_LOCALE (error (0, 0, _("\
3122 %s: character `%s' not defined while needed as default value"),
3123 "LC_CTYPE", "<space>"));
3125 else if (seq->nbytes != 1)
3126 WITH_CUR_LOCALE (error (0, 0, _("\
3127 %s: character `%s' in charmap not representable with one byte"),
3128 "LC_CTYPE", "<space>"));
3129 else
3130 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3132 /* No need to search. */
3133 ELEM (ctype, class_collection, , L' ') |= BITw (tok_space);
3135 seq = charmap_find_value (charmap, "form-feed", 9);
3136 if (seq == NULL)
3137 seq = charmap_find_value (charmap, "U0000000C", 9);
3138 if (seq == NULL)
3140 if (!be_quiet)
3141 WITH_CUR_LOCALE (error (0, 0, _("\
3142 %s: character `%s' not defined while needed as default value"),
3143 "LC_CTYPE", "<form-feed>"));
3145 else if (seq->nbytes != 1)
3146 WITH_CUR_LOCALE (error (0, 0, _("\
3147 %s: character `%s' in charmap not representable with one byte"),
3148 "LC_CTYPE", "<form-feed>"));
3149 else
3150 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3152 /* No need to search. */
3153 ELEM (ctype, class_collection, , L'\f') |= BITw (tok_space);
3156 seq = charmap_find_value (charmap, "newline", 7);
3157 if (seq == NULL)
3158 seq = charmap_find_value (charmap, "U0000000A", 9);
3159 if (seq == NULL)
3161 if (!be_quiet)
3162 WITH_CUR_LOCALE (error (0, 0, _("\
3163 character `%s' not defined while needed as default value"),
3164 "<newline>"));
3166 else if (seq->nbytes != 1)
3167 WITH_CUR_LOCALE (error (0, 0, _("\
3168 %s: character `%s' in charmap not representable with one byte"),
3169 "LC_CTYPE", "<newline>"));
3170 else
3171 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3173 /* No need to search. */
3174 ELEM (ctype, class_collection, , L'\n') |= BITw (tok_space);
3177 seq = charmap_find_value (charmap, "carriage-return", 15);
3178 if (seq == NULL)
3179 seq = charmap_find_value (charmap, "U0000000D", 9);
3180 if (seq == NULL)
3182 if (!be_quiet)
3183 WITH_CUR_LOCALE (error (0, 0, _("\
3184 %s: character `%s' not defined while needed as default value"),
3185 "LC_CTYPE", "<carriage-return>"));
3187 else if (seq->nbytes != 1)
3188 WITH_CUR_LOCALE (error (0, 0, _("\
3189 %s: character `%s' in charmap not representable with one byte"),
3190 "LC_CTYPE", "<carriage-return>"));
3191 else
3192 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3194 /* No need to search. */
3195 ELEM (ctype, class_collection, , L'\r') |= BITw (tok_space);
3198 seq = charmap_find_value (charmap, "tab", 3);
3199 if (seq == NULL)
3200 seq = charmap_find_value (charmap, "U00000009", 9);
3201 if (seq == NULL)
3203 if (!be_quiet)
3204 WITH_CUR_LOCALE (error (0, 0, _("\
3205 %s: character `%s' not defined while needed as default value"),
3206 "LC_CTYPE", "<tab>"));
3208 else if (seq->nbytes != 1)
3209 WITH_CUR_LOCALE (error (0, 0, _("\
3210 %s: character `%s' in charmap not representable with one byte"),
3211 "LC_CTYPE", "<tab>"));
3212 else
3213 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3215 /* No need to search. */
3216 ELEM (ctype, class_collection, , L'\t') |= BITw (tok_space);
3219 seq = charmap_find_value (charmap, "vertical-tab", 12);
3220 if (seq == NULL)
3221 seq = charmap_find_value (charmap, "U0000000B", 9);
3222 if (seq == NULL)
3224 if (!be_quiet)
3225 WITH_CUR_LOCALE (error (0, 0, _("\
3226 %s: character `%s' not defined while needed as default value"),
3227 "LC_CTYPE", "<vertical-tab>"));
3229 else if (seq->nbytes != 1)
3230 WITH_CUR_LOCALE (error (0, 0, _("\
3231 %s: character `%s' in charmap not representable with one byte"),
3232 "LC_CTYPE", "<vertical-tab>"));
3233 else
3234 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3236 /* No need to search. */
3237 ELEM (ctype, class_collection, , L'\v') |= BITw (tok_space);
3240 if ((ctype->class_done & BITw (tok_xdigit)) == 0)
3241 /* "If this keyword is not specified, the digits `0' to `9', the
3242 uppercase letters `A' through `F', and the lowercase letters `a'
3243 through `f', ..., shell automatically belong to this class, with
3244 implementation defined character values." [P1003.2, 2.5.2.1] */
3246 set_default (BITPOS (tok_xdigit), '0', '9');
3247 set_default (BITPOS (tok_xdigit), 'A', 'F');
3248 set_default (BITPOS (tok_xdigit), 'a', 'f');
3251 if ((ctype->class_done & BITw (tok_blank)) == 0)
3252 /* "If this keyword [blank] is unspecified, the characters <space> and
3253 <tab> shall belong to this character class." [P1003.2, 2.5.2.1] */
3255 struct charseq *seq;
3257 seq = charmap_find_value (charmap, "space", 5);
3258 if (seq == NULL)
3259 seq = charmap_find_value (charmap, "SP", 2);
3260 if (seq == NULL)
3261 seq = charmap_find_value (charmap, "U00000020", 9);
3262 if (seq == NULL)
3264 if (!be_quiet)
3265 WITH_CUR_LOCALE (error (0, 0, _("\
3266 %s: character `%s' not defined while needed as default value"),
3267 "LC_CTYPE", "<space>"));
3269 else if (seq->nbytes != 1)
3270 WITH_CUR_LOCALE (error (0, 0, _("\
3271 %s: character `%s' in charmap not representable with one byte"),
3272 "LC_CTYPE", "<space>"));
3273 else
3274 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
3276 /* No need to search. */
3277 ELEM (ctype, class_collection, , L' ') |= BITw (tok_blank);
3280 seq = charmap_find_value (charmap, "tab", 3);
3281 if (seq == NULL)
3282 seq = charmap_find_value (charmap, "U00000009", 9);
3283 if (seq == NULL)
3285 if (!be_quiet)
3286 WITH_CUR_LOCALE (error (0, 0, _("\
3287 %s: character `%s' not defined while needed as default value"),
3288 "LC_CTYPE", "<tab>"));
3290 else if (seq->nbytes != 1)
3291 WITH_CUR_LOCALE (error (0, 0, _("\
3292 %s: character `%s' in charmap not representable with one byte"),
3293 "LC_CTYPE", "<tab>"));
3294 else
3295 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
3297 /* No need to search. */
3298 ELEM (ctype, class_collection, , L'\t') |= BITw (tok_blank);
3301 if ((ctype->class_done & BITw (tok_graph)) == 0)
3302 /* "If this keyword [graph] is not specified, characters specified for
3303 the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
3304 shall belong to this character class." [P1003.2, 2.5.2.1] */
3306 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
3307 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
3308 unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
3309 BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
3310 BITw (tok_punct);
3311 size_t cnt;
3313 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3314 if ((ctype->class_collection[cnt] & maskw) != 0)
3315 ctype->class_collection[cnt] |= BITw (tok_graph);
3317 for (cnt = 0; cnt < 256; ++cnt)
3318 if ((ctype->class256_collection[cnt] & mask) != 0)
3319 ctype->class256_collection[cnt] |= BIT (tok_graph);
3322 if ((ctype->class_done & BITw (tok_print)) == 0)
3323 /* "If this keyword [print] is not provided, characters specified for
3324 the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
3325 and the <space> character shall belong to this character class."
3326 [P1003.2, 2.5.2.1] */
3328 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
3329 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
3330 unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
3331 BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
3332 BITw (tok_punct);
3333 size_t cnt;
3334 struct charseq *seq;
3336 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3337 if ((ctype->class_collection[cnt] & maskw) != 0)
3338 ctype->class_collection[cnt] |= BITw (tok_print);
3340 for (cnt = 0; cnt < 256; ++cnt)
3341 if ((ctype->class256_collection[cnt] & mask) != 0)
3342 ctype->class256_collection[cnt] |= BIT (tok_print);
3345 seq = charmap_find_value (charmap, "space", 5);
3346 if (seq == NULL)
3347 seq = charmap_find_value (charmap, "SP", 2);
3348 if (seq == NULL)
3349 seq = charmap_find_value (charmap, "U00000020", 9);
3350 if (seq == NULL)
3352 if (!be_quiet)
3353 WITH_CUR_LOCALE (error (0, 0, _("\
3354 %s: character `%s' not defined while needed as default value"),
3355 "LC_CTYPE", "<space>"));
3357 else if (seq->nbytes != 1)
3358 WITH_CUR_LOCALE (error (0, 0, _("\
3359 %s: character `%s' in charmap not representable with one byte"),
3360 "LC_CTYPE", "<space>"));
3361 else
3362 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_print);
3364 /* No need to search. */
3365 ELEM (ctype, class_collection, , L' ') |= BITw (tok_print);
3368 if (ctype->tomap_done[0] == 0)
3369 /* "If this keyword [toupper] is not specified, the lowercase letters
3370 `a' through `z', and their corresponding uppercase letters `A' to
3371 `Z', ..., shall automatically be included, with implementation-
3372 defined character values." [P1003.2, 2.5.2.1] */
3374 char tmp[4];
3375 int ch;
3377 strcpy (tmp, "<?>");
3379 for (ch = 'a'; ch <= 'z'; ++ch)
3381 struct charseq *seq_from, *seq_to;
3383 tmp[1] = (char) ch;
3385 seq_from = charmap_find_value (charmap, &tmp[1], 1);
3386 if (seq_from == NULL)
3388 char buf[10];
3389 sprintf (buf, "U%08X", ch);
3390 seq_from = charmap_find_value (charmap, buf, 9);
3392 if (seq_from == NULL)
3394 if (!be_quiet)
3395 WITH_CUR_LOCALE (error (0, 0, _("\
3396 %s: character `%s' not defined while needed as default value"),
3397 "LC_CTYPE", tmp));
3399 else if (seq_from->nbytes != 1)
3401 if (!be_quiet)
3402 WITH_CUR_LOCALE (error (0, 0, _("\
3403 %s: character `%s' needed as default value not representable with one byte"),
3404 "LC_CTYPE", tmp));
3406 else
3408 /* This conversion is implementation defined. */
3409 tmp[1] = (char) (ch + ('A' - 'a'));
3410 seq_to = charmap_find_value (charmap, &tmp[1], 1);
3411 if (seq_to == NULL)
3413 char buf[10];
3414 sprintf (buf, "U%08X", ch + ('A' - 'a'));
3415 seq_to = charmap_find_value (charmap, buf, 9);
3417 if (seq_to == NULL)
3419 if (!be_quiet)
3420 WITH_CUR_LOCALE (error (0, 0, _("\
3421 %s: character `%s' not defined while needed as default value"),
3422 "LC_CTYPE", tmp));
3424 else if (seq_to->nbytes != 1)
3426 if (!be_quiet)
3427 WITH_CUR_LOCALE (error (0, 0, _("\
3428 %s: character `%s' needed as default value not representable with one byte"),
3429 "LC_CTYPE", tmp));
3431 else
3432 /* The index [0] is determined by the order of the
3433 `ctype_map_newP' calls in `ctype_startup'. */
3434 ctype->map256_collection[0][seq_from->bytes[0]]
3435 = seq_to->bytes[0];
3438 /* No need to search. */
3439 ELEM (ctype, map_collection, [0], ch) = ch + ('A' - 'a');
3443 if (ctype->tomap_done[1] == 0)
3444 /* "If this keyword [tolower] is not specified, the mapping shall be
3445 the reverse mapping of the one specified to `toupper'." [P1003.2] */
3447 for (cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
3448 if (ctype->map_collection[0][cnt] != 0)
3449 ELEM (ctype, map_collection, [1],
3450 ctype->map_collection[0][cnt])
3451 = ctype->charnames[cnt];
3453 for (cnt = 0; cnt < 256; ++cnt)
3454 if (ctype->map256_collection[0][cnt] != 0)
3455 ctype->map256_collection[1][ctype->map256_collection[0][cnt]] = cnt;
3458 if (ctype->outdigits_act != 10)
3460 if (ctype->outdigits_act != 0)
3461 WITH_CUR_LOCALE (error (0, 0, _("\
3462 %s: field `%s' does not contain exactly ten entries"),
3463 "LC_CTYPE", "outdigit"));
3465 for (cnt = ctype->outdigits_act; cnt < 10; ++cnt)
3467 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3468 digits + cnt, 1);
3470 if (ctype->mboutdigits[cnt] == NULL)
3471 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3472 longnames[cnt],
3473 strlen (longnames[cnt]));
3475 if (ctype->mboutdigits[cnt] == NULL)
3476 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3477 uninames[cnt], 9);
3479 if (ctype->mboutdigits[cnt] == NULL)
3481 /* Provide a replacement. */
3482 WITH_CUR_LOCALE (error (0, 0, _("\
3483 no output digits defined and none of the standard names in the charmap")));
3485 ctype->mboutdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
3486 sizeof (struct charseq)
3487 + 1);
3489 /* This is better than nothing. */
3490 ctype->mboutdigits[cnt]->bytes[0] = digits[cnt];
3491 ctype->mboutdigits[cnt]->nbytes = 1;
3494 ctype->wcoutdigits[cnt] = L'0' + cnt;
3497 ctype->outdigits_act = 10;
3502 /* Construction of sparse 3-level tables.
3503 See wchar-lookup.h for their structure and the meaning of p and q. */
3505 struct wctype_table
3507 /* Parameters. */
3508 unsigned int p;
3509 unsigned int q;
3510 /* Working representation. */
3511 size_t level1_alloc;
3512 size_t level1_size;
3513 uint32_t *level1;
3514 size_t level2_alloc;
3515 size_t level2_size;
3516 uint32_t *level2;
3517 size_t level3_alloc;
3518 size_t level3_size;
3519 uint32_t *level3;
3520 /* Compressed representation. */
3521 size_t result_size;
3522 char *result;
3525 /* Initialize. Assumes t->p and t->q have already been set. */
3526 static inline void
3527 wctype_table_init (struct wctype_table *t)
3529 t->level1 = NULL;
3530 t->level1_alloc = t->level1_size = 0;
3531 t->level2 = NULL;
3532 t->level2_alloc = t->level2_size = 0;
3533 t->level3 = NULL;
3534 t->level3_alloc = t->level3_size = 0;
3537 /* Retrieve an entry. */
3538 static inline int
3539 wctype_table_get (struct wctype_table *t, uint32_t wc)
3541 uint32_t index1 = wc >> (t->q + t->p + 5);
3542 if (index1 < t->level1_size)
3544 uint32_t lookup1 = t->level1[index1];
3545 if (lookup1 != EMPTY)
3547 uint32_t index2 = ((wc >> (t->p + 5)) & ((1 << t->q) - 1))
3548 + (lookup1 << t->q);
3549 uint32_t lookup2 = t->level2[index2];
3550 if (lookup2 != EMPTY)
3552 uint32_t index3 = ((wc >> 5) & ((1 << t->p) - 1))
3553 + (lookup2 << t->p);
3554 uint32_t lookup3 = t->level3[index3];
3555 uint32_t index4 = wc & 0x1f;
3557 return (lookup3 >> index4) & 1;
3561 return 0;
3564 /* Add one entry. */
3565 static void
3566 wctype_table_add (struct wctype_table *t, uint32_t wc)
3568 uint32_t index1 = wc >> (t->q + t->p + 5);
3569 uint32_t index2 = (wc >> (t->p + 5)) & ((1 << t->q) - 1);
3570 uint32_t index3 = (wc >> 5) & ((1 << t->p) - 1);
3571 uint32_t index4 = wc & 0x1f;
3572 size_t i, i1, i2;
3574 if (index1 >= t->level1_size)
3576 if (index1 >= t->level1_alloc)
3578 size_t alloc = 2 * t->level1_alloc;
3579 if (alloc <= index1)
3580 alloc = index1 + 1;
3581 t->level1 = (uint32_t *) xrealloc ((char *) t->level1,
3582 alloc * sizeof (uint32_t));
3583 t->level1_alloc = alloc;
3585 while (index1 >= t->level1_size)
3586 t->level1[t->level1_size++] = EMPTY;
3589 if (t->level1[index1] == EMPTY)
3591 if (t->level2_size == t->level2_alloc)
3593 size_t alloc = 2 * t->level2_alloc + 1;
3594 t->level2 = (uint32_t *) xrealloc ((char *) t->level2,
3595 (alloc << t->q) * sizeof (uint32_t));
3596 t->level2_alloc = alloc;
3598 i1 = t->level2_size << t->q;
3599 i2 = (t->level2_size + 1) << t->q;
3600 for (i = i1; i < i2; i++)
3601 t->level2[i] = EMPTY;
3602 t->level1[index1] = t->level2_size++;
3605 index2 += t->level1[index1] << t->q;
3607 if (t->level2[index2] == EMPTY)
3609 if (t->level3_size == t->level3_alloc)
3611 size_t alloc = 2 * t->level3_alloc + 1;
3612 t->level3 = (uint32_t *) xrealloc ((char *) t->level3,
3613 (alloc << t->p) * sizeof (uint32_t));
3614 t->level3_alloc = alloc;
3616 i1 = t->level3_size << t->p;
3617 i2 = (t->level3_size + 1) << t->p;
3618 for (i = i1; i < i2; i++)
3619 t->level3[i] = 0;
3620 t->level2[index2] = t->level3_size++;
3623 index3 += t->level2[index2] << t->p;
3625 t->level3[index3] |= (uint32_t)1 << index4;
3628 /* Finalize and shrink. */
3629 static void
3630 wctype_table_finalize (struct wctype_table *t)
3632 size_t i, j, k;
3633 uint32_t reorder3[t->level3_size];
3634 uint32_t reorder2[t->level2_size];
3635 uint32_t level1_offset, level2_offset, level3_offset;
3637 /* Uniquify level3 blocks. */
3638 k = 0;
3639 for (j = 0; j < t->level3_size; j++)
3641 for (i = 0; i < k; i++)
3642 if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p],
3643 (1 << t->p) * sizeof (uint32_t)) == 0)
3644 break;
3645 /* Relocate block j to block i. */
3646 reorder3[j] = i;
3647 if (i == k)
3649 if (i != j)
3650 memcpy (&t->level3[i << t->p], &t->level3[j << t->p],
3651 (1 << t->p) * sizeof (uint32_t));
3652 k++;
3655 t->level3_size = k;
3657 for (i = 0; i < (t->level2_size << t->q); i++)
3658 if (t->level2[i] != EMPTY)
3659 t->level2[i] = reorder3[t->level2[i]];
3661 /* Uniquify level2 blocks. */
3662 k = 0;
3663 for (j = 0; j < t->level2_size; j++)
3665 for (i = 0; i < k; i++)
3666 if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q],
3667 (1 << t->q) * sizeof (uint32_t)) == 0)
3668 break;
3669 /* Relocate block j to block i. */
3670 reorder2[j] = i;
3671 if (i == k)
3673 if (i != j)
3674 memcpy (&t->level2[i << t->q], &t->level2[j << t->q],
3675 (1 << t->q) * sizeof (uint32_t));
3676 k++;
3679 t->level2_size = k;
3681 for (i = 0; i < t->level1_size; i++)
3682 if (t->level1[i] != EMPTY)
3683 t->level1[i] = reorder2[t->level1[i]];
3685 /* Create and fill the resulting compressed representation. */
3686 t->result_size =
3687 5 * sizeof (uint32_t)
3688 + t->level1_size * sizeof (uint32_t)
3689 + (t->level2_size << t->q) * sizeof (uint32_t)
3690 + (t->level3_size << t->p) * sizeof (uint32_t);
3691 t->result = (char *) xmalloc (t->result_size);
3693 level1_offset =
3694 5 * sizeof (uint32_t);
3695 level2_offset =
3696 5 * sizeof (uint32_t)
3697 + t->level1_size * sizeof (uint32_t);
3698 level3_offset =
3699 5 * sizeof (uint32_t)
3700 + t->level1_size * sizeof (uint32_t)
3701 + (t->level2_size << t->q) * sizeof (uint32_t);
3703 ((uint32_t *) t->result)[0] = t->q + t->p + 5;
3704 ((uint32_t *) t->result)[1] = t->level1_size;
3705 ((uint32_t *) t->result)[2] = t->p + 5;
3706 ((uint32_t *) t->result)[3] = (1 << t->q) - 1;
3707 ((uint32_t *) t->result)[4] = (1 << t->p) - 1;
3709 for (i = 0; i < t->level1_size; i++)
3710 ((uint32_t *) (t->result + level1_offset))[i] =
3711 (t->level1[i] == EMPTY
3713 : (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset);
3715 for (i = 0; i < (t->level2_size << t->q); i++)
3716 ((uint32_t *) (t->result + level2_offset))[i] =
3717 (t->level2[i] == EMPTY
3719 : (t->level2[i] << t->p) * sizeof (uint32_t) + level3_offset);
3721 for (i = 0; i < (t->level3_size << t->p); i++)
3722 ((uint32_t *) (t->result + level3_offset))[i] = t->level3[i];
3724 if (t->level1_alloc > 0)
3725 free (t->level1);
3726 if (t->level2_alloc > 0)
3727 free (t->level2);
3728 if (t->level3_alloc > 0)
3729 free (t->level3);
3732 #define TABLE wcwidth_table
3733 #define ELEMENT uint8_t
3734 #define DEFAULT 0xff
3735 #include "3level.h"
3737 #define TABLE wctrans_table
3738 #define ELEMENT int32_t
3739 #define DEFAULT 0
3740 #define wctrans_table_add wctrans_table_add_internal
3741 #include "3level.h"
3742 #undef wctrans_table_add
3743 /* The wctrans_table must actually store the difference between the
3744 desired result and the argument. */
3745 static inline void
3746 wctrans_table_add (struct wctrans_table *t, uint32_t wc, uint32_t mapped_wc)
3748 wctrans_table_add_internal (t, wc, mapped_wc - wc);
3752 /* Flattens the included transliterations into a translit list.
3753 Inserts them in the list at `cursor', and returns the new cursor. */
3754 static struct translit_t **
3755 translit_flatten (struct locale_ctype_t *ctype,
3756 const struct charmap_t *charmap,
3757 struct translit_t **cursor)
3759 while (ctype->translit_include != NULL)
3761 const char *copy_locale = ctype->translit_include->copy_locale;
3762 const char *copy_repertoire = ctype->translit_include->copy_repertoire;
3763 struct localedef_t *other;
3765 /* Unchain the include statement. During the depth-first traversal
3766 we don't want to visit any locale more than once. */
3767 ctype->translit_include = ctype->translit_include->next;
3769 other = find_locale (LC_CTYPE, copy_locale, copy_repertoire, charmap);
3771 if (other == NULL)
3773 WITH_CUR_LOCALE (error (0, 0, _("\
3774 %s: transliteration data from locale `%s' not available"),
3775 "LC_CTYPE", copy_locale));
3777 else
3779 struct locale_ctype_t *other_ctype =
3780 other->categories[LC_CTYPE].ctype;
3782 cursor = translit_flatten (other_ctype, charmap, cursor);
3783 assert (other_ctype->translit_include == NULL);
3785 if (other_ctype->translit != NULL)
3787 /* Insert the other_ctype->translit list at *cursor. */
3788 struct translit_t *endp = other_ctype->translit;
3789 while (endp->next != NULL)
3790 endp = endp->next;
3792 endp->next = *cursor;
3793 *cursor = other_ctype->translit;
3795 /* Avoid any risk of circular lists. */
3796 other_ctype->translit = NULL;
3798 cursor = &endp->next;
3801 if (ctype->default_missing == NULL)
3802 ctype->default_missing = other_ctype->default_missing;
3806 return cursor;
3809 static void
3810 allocate_arrays (struct locale_ctype_t *ctype, const struct charmap_t *charmap,
3811 struct repertoire_t *repertoire)
3813 size_t idx, nr;
3814 const void *key;
3815 size_t len;
3816 void *vdata;
3817 void *curs;
3819 /* You wonder about this amount of memory? This is only because some
3820 users do not manage to address the array with unsigned values or
3821 data types with range >= 256. '\200' would result in the array
3822 index -128. To help these poor people we duplicate the entries for
3823 128 up to 255 below the entry for \0. */
3824 ctype->ctype_b = (char_class_t *) xcalloc (256 + 128, sizeof (char_class_t));
3825 ctype->ctype32_b = (char_class32_t *) xcalloc (256, sizeof (char_class32_t));
3826 ctype->class_b = (uint32_t **)
3827 xmalloc (ctype->nr_charclass * sizeof (uint32_t *));
3828 ctype->class_3level = (struct iovec *)
3829 xmalloc (ctype->nr_charclass * sizeof (struct iovec));
3831 /* This is the array accessed using the multibyte string elements. */
3832 for (idx = 0; idx < 256; ++idx)
3833 ctype->ctype_b[128 + idx] = ctype->class256_collection[idx];
3835 /* Mirror first 127 entries. We must take care that entry -1 is not
3836 mirrored because EOF == -1. */
3837 for (idx = 0; idx < 127; ++idx)
3838 ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
3840 /* The 32 bit array contains all characters < 0x100. */
3841 for (idx = 0; idx < ctype->class_collection_act; ++idx)
3842 if (ctype->charnames[idx] < 0x100)
3843 ctype->ctype32_b[ctype->charnames[idx]] = ctype->class_collection[idx];
3845 for (nr = 0; nr < ctype->nr_charclass; nr++)
3847 ctype->class_b[nr] = (uint32_t *) xcalloc (256 / 32, sizeof (uint32_t));
3849 for (idx = 0; idx < 256; ++idx)
3850 if (ctype->class256_collection[idx] & _ISbit (nr))
3851 ctype->class_b[nr][idx >> 5] |= (uint32_t)1 << (idx & 0x1f);
3854 for (nr = 0; nr < ctype->nr_charclass; nr++)
3856 struct wctype_table t;
3858 t.p = 4; /* or: 5 */
3859 t.q = 7; /* or: 6 */
3860 wctype_table_init (&t);
3862 for (idx = 0; idx < ctype->class_collection_act; ++idx)
3863 if (ctype->class_collection[idx] & _ISwbit (nr))
3864 wctype_table_add (&t, ctype->charnames[idx]);
3866 wctype_table_finalize (&t);
3868 if (verbose)
3869 WITH_CUR_LOCALE (fprintf (stderr, _("\
3870 %s: table for class \"%s\": %lu bytes\n"),
3871 "LC_CTYPE", ctype->classnames[nr],
3872 (unsigned long int) t.result_size));
3874 ctype->class_3level[nr].iov_base = t.result;
3875 ctype->class_3level[nr].iov_len = t.result_size;
3878 /* Room for table of mappings. */
3879 ctype->map_b = (uint32_t **) xmalloc (2 * sizeof (uint32_t *));
3880 ctype->map32_b = (uint32_t **) xmalloc (ctype->map_collection_nr
3881 * sizeof (uint32_t *));
3882 ctype->map_3level = (struct iovec *)
3883 xmalloc (ctype->map_collection_nr * sizeof (struct iovec));
3885 /* Fill in all mappings. */
3886 for (idx = 0; idx < 2; ++idx)
3888 unsigned int idx2;
3890 /* Allocate table. */
3891 ctype->map_b[idx] = (uint32_t *)
3892 xmalloc ((256 + 128) * sizeof (uint32_t));
3894 /* Copy values from collection. */
3895 for (idx2 = 0; idx2 < 256; ++idx2)
3896 ctype->map_b[idx][128 + idx2] = ctype->map256_collection[idx][idx2];
3898 /* Mirror first 127 entries. We must take care not to map entry
3899 -1 because EOF == -1. */
3900 for (idx2 = 0; idx2 < 127; ++idx2)
3901 ctype->map_b[idx][idx2] = ctype->map_b[idx][256 + idx2];
3903 /* EOF must map to EOF. */
3904 ctype->map_b[idx][127] = EOF;
3907 for (idx = 0; idx < ctype->map_collection_nr; ++idx)
3909 unsigned int idx2;
3911 /* Allocate table. */
3912 ctype->map32_b[idx] = (uint32_t *) xmalloc (256 * sizeof (uint32_t));
3914 /* Copy values from collection. Default is identity mapping. */
3915 for (idx2 = 0; idx2 < 256; ++idx2)
3916 ctype->map32_b[idx][idx2] =
3917 (ctype->map_collection[idx][idx2] != 0
3918 ? ctype->map_collection[idx][idx2]
3919 : idx2);
3922 for (nr = 0; nr < ctype->map_collection_nr; nr++)
3924 struct wctrans_table t;
3926 t.p = 7;
3927 t.q = 9;
3928 wctrans_table_init (&t);
3930 for (idx = 0; idx < ctype->map_collection_act[nr]; ++idx)
3931 if (ctype->map_collection[nr][idx] != 0)
3932 wctrans_table_add (&t, ctype->charnames[idx],
3933 ctype->map_collection[nr][idx]);
3935 wctrans_table_finalize (&t);
3937 if (verbose)
3938 WITH_CUR_LOCALE (fprintf (stderr, _("\
3939 %s: table for map \"%s\": %lu bytes\n"),
3940 "LC_CTYPE", ctype->mapnames[nr],
3941 (unsigned long int) t.result_size));
3943 ctype->map_3level[nr].iov_base = t.result;
3944 ctype->map_3level[nr].iov_len = t.result_size;
3947 /* Extra array for class and map names. */
3948 ctype->class_name_ptr = (uint32_t *) xmalloc (ctype->nr_charclass
3949 * sizeof (uint32_t));
3950 ctype->map_name_ptr = (uint32_t *) xmalloc (ctype->map_collection_nr
3951 * sizeof (uint32_t));
3953 ctype->class_offset = _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
3954 ctype->map_offset = ctype->class_offset + ctype->nr_charclass;
3956 /* Array for width information. Because the expected widths are very
3957 small (never larger than 2) we use only one single byte. This
3958 saves space.
3959 We put only printable characters in the table. wcwidth is specified
3960 to return -1 for non-printable characters. Doing the check here
3961 saves a run-time check.
3962 But we put L'\0' in the table. This again saves a run-time check. */
3964 struct wcwidth_table t;
3966 t.p = 7;
3967 t.q = 9;
3968 wcwidth_table_init (&t);
3970 /* First set all the printable characters of the character set to
3971 the default width. */
3972 curs = NULL;
3973 while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
3975 struct charseq *data = (struct charseq *) vdata;
3977 if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
3978 data->ucs4 = repertoire_find_value (ctype->repertoire,
3979 data->name, len);
3981 if (data->ucs4 != ILLEGAL_CHAR_VALUE)
3983 uint32_t *class_bits =
3984 find_idx (ctype, &ctype->class_collection, NULL,
3985 &ctype->class_collection_act, data->ucs4);
3987 if (class_bits != NULL && (*class_bits & BITw (tok_print)))
3988 wcwidth_table_add (&t, data->ucs4, charmap->width_default);
3992 /* Now add the explicitly specified widths. */
3993 if (charmap->width_rules != NULL)
3995 size_t cnt;
3997 for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
3999 unsigned char bytes[charmap->mb_cur_max];
4000 int nbytes = charmap->width_rules[cnt].from->nbytes;
4002 /* We have the range of character for which the width is
4003 specified described using byte sequences of the multibyte
4004 charset. We have to convert this to UCS4 now. And we
4005 cannot simply convert the beginning and the end of the
4006 sequence, we have to iterate over the byte sequence and
4007 convert it for every single character. */
4008 memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
4010 while (nbytes < charmap->width_rules[cnt].to->nbytes
4011 || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
4012 nbytes) <= 0)
4014 /* Find the UCS value for `bytes'. */
4015 int inner;
4016 uint32_t wch;
4017 struct charseq *seq =
4018 charmap_find_symbol (charmap, bytes, nbytes);
4020 if (seq == NULL)
4021 wch = ILLEGAL_CHAR_VALUE;
4022 else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
4023 wch = seq->ucs4;
4024 else
4025 wch = repertoire_find_value (ctype->repertoire, seq->name,
4026 strlen (seq->name));
4028 if (wch != ILLEGAL_CHAR_VALUE)
4030 /* Store the value. */
4031 uint32_t *class_bits =
4032 find_idx (ctype, &ctype->class_collection, NULL,
4033 &ctype->class_collection_act, wch);
4035 if (class_bits != NULL && (*class_bits & BITw (tok_print)))
4036 wcwidth_table_add (&t, wch,
4037 charmap->width_rules[cnt].width);
4040 /* "Increment" the bytes sequence. */
4041 inner = nbytes - 1;
4042 while (inner >= 0 && bytes[inner] == 0xff)
4043 --inner;
4045 if (inner < 0)
4047 /* We have to extend the byte sequence. */
4048 if (nbytes >= charmap->width_rules[cnt].to->nbytes)
4049 break;
4051 bytes[0] = 1;
4052 memset (&bytes[1], 0, nbytes);
4053 ++nbytes;
4055 else
4057 ++bytes[inner];
4058 while (++inner < nbytes)
4059 bytes[inner] = 0;
4065 /* Set the width of L'\0' to 0. */
4066 wcwidth_table_add (&t, 0, 0);
4068 wcwidth_table_finalize (&t);
4070 if (verbose)
4071 WITH_CUR_LOCALE (fprintf (stderr, _("%s: table for width: %lu bytes\n"),
4072 "LC_CTYPE", (unsigned long int) t.result_size));
4074 ctype->width.iov_base = t.result;
4075 ctype->width.iov_len = t.result_size;
4078 /* Set MB_CUR_MAX. */
4079 ctype->mb_cur_max = charmap->mb_cur_max;
4081 /* Now determine the table for the transliteration information.
4083 XXX It is not yet clear to me whether it is worth implementing a
4084 complicated algorithm which uses a hash table to locate the entries.
4085 For now I'll use a simple array which can be searching using binary
4086 search. */
4087 if (ctype->translit_include != NULL)
4088 /* Traverse the locales mentioned in the `include' statements in a
4089 depth-first way and fold in their transliteration information. */
4090 translit_flatten (ctype, charmap, &ctype->translit);
4092 if (ctype->translit != NULL)
4094 /* First count how many entries we have. This is the upper limit
4095 since some entries from the included files might be overwritten. */
4096 size_t number = 0;
4097 size_t cnt;
4098 struct translit_t *runp = ctype->translit;
4099 struct translit_t **sorted;
4100 size_t from_len, to_len;
4102 while (runp != NULL)
4104 ++number;
4105 runp = runp->next;
4108 /* Next we allocate an array large enough and fill in the values. */
4109 sorted = (struct translit_t **) alloca (number
4110 * sizeof (struct translit_t **));
4111 runp = ctype->translit;
4112 number = 0;
4115 /* Search for the place where to insert this string.
4116 XXX Better use a real sorting algorithm later. */
4117 size_t idx = 0;
4118 int replace = 0;
4120 while (idx < number)
4122 int res = wcscmp ((const wchar_t *) sorted[idx]->from,
4123 (const wchar_t *) runp->from);
4124 if (res == 0)
4126 replace = 1;
4127 break;
4129 if (res > 0)
4130 break;
4131 ++idx;
4134 if (replace)
4135 sorted[idx] = runp;
4136 else
4138 memmove (&sorted[idx + 1], &sorted[idx],
4139 (number - idx) * sizeof (struct translit_t *));
4140 sorted[idx] = runp;
4141 ++number;
4144 runp = runp->next;
4146 while (runp != NULL);
4148 /* The next step is putting all the possible transliteration
4149 strings in one memory block so that we can write it out.
4150 We need several different blocks:
4151 - index to the from-string array
4152 - from-string array
4153 - index to the to-string array
4154 - to-string array.
4156 from_len = to_len = 0;
4157 for (cnt = 0; cnt < number; ++cnt)
4159 struct translit_to_t *srunp;
4160 from_len += wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
4161 srunp = sorted[cnt]->to;
4162 while (srunp != NULL)
4164 to_len += wcslen ((const wchar_t *) srunp->str) + 1;
4165 srunp = srunp->next;
4167 /* Plus one for the extra NUL character marking the end of
4168 the list for the current entry. */
4169 ++to_len;
4172 /* We can allocate the arrays for the results. */
4173 ctype->translit_from_idx = xmalloc (number * sizeof (uint32_t));
4174 ctype->translit_from_tbl = xmalloc (from_len * sizeof (uint32_t));
4175 ctype->translit_to_idx = xmalloc (number * sizeof (uint32_t));
4176 ctype->translit_to_tbl = xmalloc (to_len * sizeof (uint32_t));
4178 from_len = 0;
4179 to_len = 0;
4180 for (cnt = 0; cnt < number; ++cnt)
4182 size_t len;
4183 struct translit_to_t *srunp;
4185 ctype->translit_from_idx[cnt] = from_len;
4186 ctype->translit_to_idx[cnt] = to_len;
4188 len = wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
4189 wmemcpy ((wchar_t *) &ctype->translit_from_tbl[from_len],
4190 (const wchar_t *) sorted[cnt]->from, len);
4191 from_len += len;
4193 ctype->translit_to_idx[cnt] = to_len;
4194 srunp = sorted[cnt]->to;
4195 while (srunp != NULL)
4197 len = wcslen ((const wchar_t *) srunp->str) + 1;
4198 wmemcpy ((wchar_t *) &ctype->translit_to_tbl[to_len],
4199 (const wchar_t *) srunp->str, len);
4200 to_len += len;
4201 srunp = srunp->next;
4203 ctype->translit_to_tbl[to_len++] = L'\0';
4206 /* Store the information about the length. */
4207 ctype->translit_idx_size = number;
4208 ctype->translit_from_tbl_size = from_len * sizeof (uint32_t);
4209 ctype->translit_to_tbl_size = to_len * sizeof (uint32_t);
4211 else
4213 /* Provide some dummy pointers since we have nothing to write out. */
4214 static uint32_t no_str = { 0 };
4216 ctype->translit_from_idx = &no_str;
4217 ctype->translit_from_tbl = &no_str;
4218 ctype->translit_to_tbl = &no_str;
4219 ctype->translit_idx_size = 0;
4220 ctype->translit_from_tbl_size = 0;
4221 ctype->translit_to_tbl_size = 0;