Merge remote branch 'origin/roland/hwcap_mask'
[glibc.git] / locale / programs / ld-ctype.c
blob376a02c2f00eb50d7bad6ff2d9d9ecd45d729843
1 /* Copyright (C) 1995-2006, 2007, 2009 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
19 #ifdef HAVE_CONFIG_H
20 # include <config.h>
21 #endif
23 #include <alloca.h>
24 #include <byteswap.h>
25 #include <endian.h>
26 #include <errno.h>
27 #include <limits.h>
28 #include <obstack.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <wchar.h>
32 #include <wctype.h>
33 #include <sys/uio.h>
35 #include "localedef.h"
36 #include "charmap.h"
37 #include "localeinfo.h"
38 #include "langinfo.h"
39 #include "linereader.h"
40 #include "locfile-token.h"
41 #include "locfile.h"
43 #include <assert.h>
46 #ifdef PREDEFINED_CLASSES
47 /* These are the extra bits not in wctype.h since these are not preallocated
48 classes. */
49 # define _ISwspecial1 (1 << 29)
50 # define _ISwspecial2 (1 << 30)
51 # define _ISwspecial3 (1 << 31)
52 #endif
55 /* The bit used for representing a special class. */
56 #define BITPOS(class) ((class) - tok_upper)
57 #define BIT(class) (_ISbit (BITPOS (class)))
58 #define BITw(class) (_ISwbit (BITPOS (class)))
60 #define ELEM(ctype, collection, idx, value) \
61 *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \
62 &ctype->collection##_act idx, value)
65 /* To be compatible with former implementations we for now restrict
66 the number of bits for character classes to 16. When compatibility
67 is not necessary anymore increase the number to 32. */
68 #define char_class_t uint16_t
69 #define char_class32_t uint32_t
72 /* Type to describe a transliteration action. We have a possibly
73 multiple character from-string and a set of multiple character
74 to-strings. All are 32bit values since this is what is used in
75 the gconv functions. */
76 struct translit_to_t
78 uint32_t *str;
80 struct translit_to_t *next;
83 struct translit_t
85 uint32_t *from;
87 const char *fname;
88 size_t lineno;
90 struct translit_to_t *to;
92 struct translit_t *next;
95 struct translit_ignore_t
97 uint32_t from;
98 uint32_t to;
99 uint32_t step;
101 const char *fname;
102 size_t lineno;
104 struct translit_ignore_t *next;
108 /* Type to describe a transliteration include statement. */
109 struct translit_include_t
111 const char *copy_locale;
112 const char *copy_repertoire;
114 struct translit_include_t *next;
118 /* Sparse table of uint32_t. */
119 #define TABLE idx_table
120 #define ELEMENT uint32_t
121 #define DEFAULT ((uint32_t) ~0)
122 #define NO_FINALIZE
123 #include "3level.h"
126 /* The real definition of the struct for the LC_CTYPE locale. */
127 struct locale_ctype_t
129 uint32_t *charnames;
130 size_t charnames_max;
131 size_t charnames_act;
132 /* An index lookup table, to speedup find_idx. */
133 struct idx_table charnames_idx;
135 struct repertoire_t *repertoire;
137 /* We will allow up to 8 * sizeof (uint32_t) character classes. */
138 #define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
139 size_t nr_charclass;
140 const char *classnames[MAX_NR_CHARCLASS];
141 uint32_t last_class_char;
142 uint32_t class256_collection[256];
143 uint32_t *class_collection;
144 size_t class_collection_max;
145 size_t class_collection_act;
146 uint32_t class_done;
147 uint32_t class_offset;
149 struct charseq **mbdigits;
150 size_t mbdigits_act;
151 size_t mbdigits_max;
152 uint32_t *wcdigits;
153 size_t wcdigits_act;
154 size_t wcdigits_max;
156 struct charseq *mboutdigits[10];
157 uint32_t wcoutdigits[10];
158 size_t outdigits_act;
160 /* If the following number ever turns out to be too small simply
161 increase it. But I doubt it will. --drepper@gnu */
162 #define MAX_NR_CHARMAP 16
163 const char *mapnames[MAX_NR_CHARMAP];
164 uint32_t *map_collection[MAX_NR_CHARMAP];
165 uint32_t map256_collection[2][256];
166 size_t map_collection_max[MAX_NR_CHARMAP];
167 size_t map_collection_act[MAX_NR_CHARMAP];
168 size_t map_collection_nr;
169 size_t last_map_idx;
170 int tomap_done[MAX_NR_CHARMAP];
171 uint32_t map_offset;
173 /* Transliteration information. */
174 struct translit_include_t *translit_include;
175 struct translit_t *translit;
176 struct translit_ignore_t *translit_ignore;
177 uint32_t ntranslit_ignore;
179 uint32_t *default_missing;
180 const char *default_missing_file;
181 size_t default_missing_lineno;
183 uint32_t to_nonascii;
184 uint32_t nonascii_case;
186 /* The arrays for the binary representation. */
187 char_class_t *ctype_b;
188 char_class32_t *ctype32_b;
189 uint32_t **map_b;
190 uint32_t **map32_b;
191 uint32_t **class_b;
192 struct iovec *class_3level;
193 struct iovec *map_3level;
194 uint32_t *class_name_ptr;
195 uint32_t *map_name_ptr;
196 struct iovec width;
197 uint32_t mb_cur_max;
198 const char *codeset_name;
199 uint32_t *translit_from_idx;
200 uint32_t *translit_from_tbl;
201 uint32_t *translit_to_idx;
202 uint32_t *translit_to_tbl;
203 uint32_t translit_idx_size;
204 size_t translit_from_tbl_size;
205 size_t translit_to_tbl_size;
207 struct obstack mempool;
211 /* Marker for an empty slot. This has the value 0xFFFFFFFF, regardless
212 whether 'int' is 16 bit, 32 bit, or 64 bit. */
213 #define EMPTY ((uint32_t) ~0)
216 #define obstack_chunk_alloc xmalloc
217 #define obstack_chunk_free free
220 /* Prototypes for local functions. */
221 static void ctype_startup (struct linereader *lr, struct localedef_t *locale,
222 const struct charmap_t *charmap,
223 struct localedef_t *copy_locale,
224 int ignore_content);
225 static void ctype_class_new (struct linereader *lr,
226 struct locale_ctype_t *ctype, const char *name);
227 static void ctype_map_new (struct linereader *lr,
228 struct locale_ctype_t *ctype,
229 const char *name, const struct charmap_t *charmap);
230 static uint32_t *find_idx (struct locale_ctype_t *ctype, uint32_t **table,
231 size_t *max, size_t *act, unsigned int idx);
232 static void set_class_defaults (struct locale_ctype_t *ctype,
233 const struct charmap_t *charmap,
234 struct repertoire_t *repertoire);
235 static void allocate_arrays (struct locale_ctype_t *ctype,
236 const struct charmap_t *charmap,
237 struct repertoire_t *repertoire);
240 static const char *longnames[] =
242 "zero", "one", "two", "three", "four",
243 "five", "six", "seven", "eight", "nine"
245 static const char *uninames[] =
247 "U00000030", "U00000031", "U00000032", "U00000033", "U00000034",
248 "U00000035", "U00000036", "U00000037", "U00000038", "U00000039"
250 static const unsigned char digits[] = "0123456789";
253 static void
254 ctype_startup (struct linereader *lr, struct localedef_t *locale,
255 const struct charmap_t *charmap,
256 struct localedef_t *copy_locale, int ignore_content)
258 unsigned int cnt;
259 struct locale_ctype_t *ctype;
261 if (!ignore_content && locale->categories[LC_CTYPE].ctype == NULL)
263 if (copy_locale == NULL)
265 /* Allocate the needed room. */
266 locale->categories[LC_CTYPE].ctype = ctype =
267 (struct locale_ctype_t *) xcalloc (1,
268 sizeof (struct locale_ctype_t));
270 /* We have seen no names yet. */
271 ctype->charnames_max = charmap->mb_cur_max == 1 ? 256 : 512;
272 ctype->charnames =
273 (unsigned int *) xmalloc (ctype->charnames_max
274 * sizeof (unsigned int));
275 for (cnt = 0; cnt < 256; ++cnt)
276 ctype->charnames[cnt] = cnt;
277 ctype->charnames_act = 256;
278 idx_table_init (&ctype->charnames_idx);
280 /* Fill character class information. */
281 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
282 /* The order of the following instructions determines the bit
283 positions! */
284 ctype_class_new (lr, ctype, "upper");
285 ctype_class_new (lr, ctype, "lower");
286 ctype_class_new (lr, ctype, "alpha");
287 ctype_class_new (lr, ctype, "digit");
288 ctype_class_new (lr, ctype, "xdigit");
289 ctype_class_new (lr, ctype, "space");
290 ctype_class_new (lr, ctype, "print");
291 ctype_class_new (lr, ctype, "graph");
292 ctype_class_new (lr, ctype, "blank");
293 ctype_class_new (lr, ctype, "cntrl");
294 ctype_class_new (lr, ctype, "punct");
295 ctype_class_new (lr, ctype, "alnum");
296 #ifdef PREDEFINED_CLASSES
297 /* The following are extensions from ISO 14652. */
298 ctype_class_new (lr, ctype, "left_to_right");
299 ctype_class_new (lr, ctype, "right_to_left");
300 ctype_class_new (lr, ctype, "num_terminator");
301 ctype_class_new (lr, ctype, "num_separator");
302 ctype_class_new (lr, ctype, "segment_separator");
303 ctype_class_new (lr, ctype, "block_separator");
304 ctype_class_new (lr, ctype, "direction_control");
305 ctype_class_new (lr, ctype, "sym_swap_layout");
306 ctype_class_new (lr, ctype, "char_shape_selector");
307 ctype_class_new (lr, ctype, "num_shape_selector");
308 ctype_class_new (lr, ctype, "non_spacing");
309 ctype_class_new (lr, ctype, "non_spacing_level3");
310 ctype_class_new (lr, ctype, "normal_connect");
311 ctype_class_new (lr, ctype, "r_connect");
312 ctype_class_new (lr, ctype, "no_connect");
313 ctype_class_new (lr, ctype, "no_connect-space");
314 ctype_class_new (lr, ctype, "vowel_connect");
315 #endif
317 ctype->class_collection_max = charmap->mb_cur_max == 1 ? 256 : 512;
318 ctype->class_collection
319 = (uint32_t *) xcalloc (sizeof (unsigned long int),
320 ctype->class_collection_max);
321 ctype->class_collection_act = 256;
323 /* Fill character map information. */
324 ctype->last_map_idx = MAX_NR_CHARMAP;
325 ctype_map_new (lr, ctype, "toupper", charmap);
326 ctype_map_new (lr, ctype, "tolower", charmap);
327 #ifdef PREDEFINED_CLASSES
328 ctype_map_new (lr, ctype, "tosymmetric", charmap);
329 #endif
331 /* Fill first 256 entries in `toXXX' arrays. */
332 for (cnt = 0; cnt < 256; ++cnt)
334 ctype->map_collection[0][cnt] = cnt;
335 ctype->map_collection[1][cnt] = cnt;
336 #ifdef PREDEFINED_CLASSES
337 ctype->map_collection[2][cnt] = cnt;
338 #endif
339 ctype->map256_collection[0][cnt] = cnt;
340 ctype->map256_collection[1][cnt] = cnt;
343 if (enc_not_ascii_compatible)
344 ctype->to_nonascii = 1;
346 obstack_init (&ctype->mempool);
348 else
349 ctype = locale->categories[LC_CTYPE].ctype =
350 copy_locale->categories[LC_CTYPE].ctype;
355 void
356 ctype_finish (struct localedef_t *locale, const struct charmap_t *charmap)
358 /* See POSIX.2, table 2-6 for the meaning of the following table. */
359 #define NCLASS 12
360 static const struct
362 const char *name;
363 const char allow[NCLASS];
365 valid_table[NCLASS] =
367 /* The order is important. See token.h for more information.
368 M = Always, D = Default, - = Permitted, X = Mutually exclusive */
369 { "upper", "--MX-XDDXXX-" },
370 { "lower", "--MX-XDDXXX-" },
371 { "alpha", "---X-XDDXXX-" },
372 { "digit", "XXX--XDDXXX-" },
373 { "xdigit", "-----XDDXXX-" },
374 { "space", "XXXXX------X" },
375 { "print", "---------X--" },
376 { "graph", "---------X--" },
377 { "blank", "XXXXXM-----X" },
378 { "cntrl", "XXXXX-XX--XX" },
379 { "punct", "XXXXX-DD-X-X" },
380 { "alnum", "-----XDDXXX-" }
382 size_t cnt;
383 int cls1, cls2;
384 uint32_t space_value;
385 struct charseq *space_seq;
386 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
387 int warned;
388 const void *key;
389 size_t len;
390 void *vdata;
391 void *curs;
393 /* Now resolve copying and also handle completely missing definitions. */
394 if (ctype == NULL)
396 const char *repertoire_name;
398 /* First see whether we were supposed to copy. If yes, find the
399 actual definition. */
400 if (locale->copy_name[LC_CTYPE] != NULL)
402 /* Find the copying locale. This has to happen transitively since
403 the locale we are copying from might also copying another one. */
404 struct localedef_t *from = locale;
407 from = find_locale (LC_CTYPE, from->copy_name[LC_CTYPE],
408 from->repertoire_name, charmap);
409 while (from->categories[LC_CTYPE].ctype == NULL
410 && from->copy_name[LC_CTYPE] != NULL);
412 ctype = locale->categories[LC_CTYPE].ctype
413 = from->categories[LC_CTYPE].ctype;
416 /* If there is still no definition issue an warning and create an
417 empty one. */
418 if (ctype == NULL)
420 if (! be_quiet)
421 WITH_CUR_LOCALE (error (0, 0, _("\
422 No definition for %s category found"), "LC_CTYPE"));
423 ctype_startup (NULL, locale, charmap, NULL, 0);
424 ctype = locale->categories[LC_CTYPE].ctype;
427 /* Get the repertoire we have to use. */
428 repertoire_name = locale->repertoire_name ?: repertoire_global;
429 if (repertoire_name != NULL)
430 ctype->repertoire = repertoire_read (repertoire_name);
433 /* We need the name of the currently used 8-bit character set to
434 make correct conversion between this 8-bit representation and the
435 ISO 10646 character set used internally for wide characters. */
436 ctype->codeset_name = charmap->code_set_name;
437 if (ctype->codeset_name == NULL)
439 if (! be_quiet)
440 WITH_CUR_LOCALE (error (0, 0, _("\
441 No character set name specified in charmap")));
442 ctype->codeset_name = "//UNKNOWN//";
445 /* Set default value for classes not specified. */
446 set_class_defaults (ctype, charmap, ctype->repertoire);
448 /* Check according to table. */
449 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
451 uint32_t tmp = ctype->class_collection[cnt];
453 if (tmp != 0)
455 for (cls1 = 0; cls1 < NCLASS; ++cls1)
456 if ((tmp & _ISwbit (cls1)) != 0)
457 for (cls2 = 0; cls2 < NCLASS; ++cls2)
458 if (valid_table[cls1].allow[cls2] != '-')
460 int eq = (tmp & _ISwbit (cls2)) != 0;
461 switch (valid_table[cls1].allow[cls2])
463 case 'M':
464 if (!eq)
466 uint32_t value = ctype->charnames[cnt];
468 if (!be_quiet)
469 WITH_CUR_LOCALE (error (0, 0, _("\
470 character L'\\u%0*x' in class `%s' must be in class `%s'"),
471 value > 0xffff ? 8 : 4,
472 value,
473 valid_table[cls1].name,
474 valid_table[cls2].name));
476 break;
478 case 'X':
479 if (eq)
481 uint32_t value = ctype->charnames[cnt];
483 if (!be_quiet)
484 WITH_CUR_LOCALE (error (0, 0, _("\
485 character L'\\u%0*x' in class `%s' must not be in class `%s'"),
486 value > 0xffff ? 8 : 4,
487 value,
488 valid_table[cls1].name,
489 valid_table[cls2].name));
491 break;
493 case 'D':
494 ctype->class_collection[cnt] |= _ISwbit (cls2);
495 break;
497 default:
498 WITH_CUR_LOCALE (error (5, 0, _("\
499 internal error in %s, line %u"), __FUNCTION__, __LINE__));
505 for (cnt = 0; cnt < 256; ++cnt)
507 uint32_t tmp = ctype->class256_collection[cnt];
509 if (tmp != 0)
511 for (cls1 = 0; cls1 < NCLASS; ++cls1)
512 if ((tmp & _ISbit (cls1)) != 0)
513 for (cls2 = 0; cls2 < NCLASS; ++cls2)
514 if (valid_table[cls1].allow[cls2] != '-')
516 int eq = (tmp & _ISbit (cls2)) != 0;
517 switch (valid_table[cls1].allow[cls2])
519 case 'M':
520 if (!eq)
522 char buf[17];
524 snprintf (buf, sizeof buf, "\\%Zo", cnt);
526 if (!be_quiet)
527 WITH_CUR_LOCALE (error (0, 0, _("\
528 character '%s' in class `%s' must be in class `%s'"),
529 buf,
530 valid_table[cls1].name,
531 valid_table[cls2].name));
533 break;
535 case 'X':
536 if (eq)
538 char buf[17];
540 snprintf (buf, sizeof buf, "\\%Zo", cnt);
542 if (!be_quiet)
543 WITH_CUR_LOCALE (error (0, 0, _("\
544 character '%s' in class `%s' must not be in class `%s'"),
545 buf,
546 valid_table[cls1].name,
547 valid_table[cls2].name));
549 break;
551 case 'D':
552 ctype->class256_collection[cnt] |= _ISbit (cls2);
553 break;
555 default:
556 WITH_CUR_LOCALE (error (5, 0, _("\
557 internal error in %s, line %u"), __FUNCTION__, __LINE__));
563 /* ... and now test <SP> as a special case. */
564 space_value = 32;
565 if (((cnt = BITPOS (tok_space),
566 (ELEM (ctype, class_collection, , space_value)
567 & BITw (tok_space)) == 0)
568 || (cnt = BITPOS (tok_blank),
569 (ELEM (ctype, class_collection, , space_value)
570 & BITw (tok_blank)) == 0)))
572 if (!be_quiet)
573 WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"),
574 valid_table[cnt].name));
576 else if (((cnt = BITPOS (tok_punct),
577 (ELEM (ctype, class_collection, , space_value)
578 & BITw (tok_punct)) != 0)
579 || (cnt = BITPOS (tok_graph),
580 (ELEM (ctype, class_collection, , space_value)
581 & BITw (tok_graph))
582 != 0)))
584 if (!be_quiet)
585 WITH_CUR_LOCALE (error (0, 0, _("\
586 <SP> character must not be in class `%s'"),
587 valid_table[cnt].name));
589 else
590 ELEM (ctype, class_collection, , space_value) |= BITw (tok_print);
592 space_seq = charmap_find_value (charmap, "SP", 2);
593 if (space_seq == NULL)
594 space_seq = charmap_find_value (charmap, "space", 5);
595 if (space_seq == NULL)
596 space_seq = charmap_find_value (charmap, "U00000020", 9);
597 if (space_seq == NULL || space_seq->nbytes != 1)
599 if (!be_quiet)
600 WITH_CUR_LOCALE (error (0, 0, _("\
601 character <SP> not defined in character map")));
603 else if (((cnt = BITPOS (tok_space),
604 (ctype->class256_collection[space_seq->bytes[0]]
605 & BIT (tok_space)) == 0)
606 || (cnt = BITPOS (tok_blank),
607 (ctype->class256_collection[space_seq->bytes[0]]
608 & BIT (tok_blank)) == 0)))
610 if (!be_quiet)
611 WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"),
612 valid_table[cnt].name));
614 else if (((cnt = BITPOS (tok_punct),
615 (ctype->class256_collection[space_seq->bytes[0]]
616 & BIT (tok_punct)) != 0)
617 || (cnt = BITPOS (tok_graph),
618 (ctype->class256_collection[space_seq->bytes[0]]
619 & BIT (tok_graph)) != 0)))
621 if (!be_quiet)
622 WITH_CUR_LOCALE (error (0, 0, _("\
623 <SP> character must not be in class `%s'"),
624 valid_table[cnt].name));
626 else
627 ctype->class256_collection[space_seq->bytes[0]] |= BIT (tok_print);
629 /* Check whether all single-byte characters make to their upper/lowercase
630 equivalent according to the ASCII rules. */
631 for (cnt = 'A'; cnt <= 'Z'; ++cnt)
633 uint32_t uppval = ctype->map256_collection[0][cnt];
634 uint32_t lowval = ctype->map256_collection[1][cnt];
635 uint32_t lowuppval = ctype->map256_collection[0][lowval];
636 uint32_t lowlowval = ctype->map256_collection[1][lowval];
638 if (uppval != cnt
639 || lowval != cnt + 0x20
640 || lowuppval != cnt
641 || lowlowval != cnt + 0x20)
642 ctype->nonascii_case = 1;
644 for (cnt = 0; cnt < 256; ++cnt)
645 if (cnt < 'A' || (cnt > 'Z' && cnt < 'a') || cnt > 'z')
646 if (ctype->map256_collection[0][cnt] != cnt
647 || ctype->map256_collection[1][cnt] != cnt)
648 ctype->nonascii_case = 1;
650 /* Now that the tests are done make sure the name array contains all
651 characters which are handled in the WIDTH section of the
652 character set definition file. */
653 if (charmap->width_rules != NULL)
654 for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
656 unsigned char bytes[charmap->mb_cur_max];
657 int nbytes = charmap->width_rules[cnt].from->nbytes;
659 /* We have the range of character for which the width is
660 specified described using byte sequences of the multibyte
661 charset. We have to convert this to UCS4 now. And we
662 cannot simply convert the beginning and the end of the
663 sequence, we have to iterate over the byte sequence and
664 convert it for every single character. */
665 memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
667 while (nbytes < charmap->width_rules[cnt].to->nbytes
668 || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
669 nbytes) <= 0)
671 /* Find the UCS value for `bytes'. */
672 int inner;
673 uint32_t wch;
674 struct charseq *seq
675 = charmap_find_symbol (charmap, (char *) bytes, nbytes);
677 if (seq == NULL)
678 wch = ILLEGAL_CHAR_VALUE;
679 else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
680 wch = seq->ucs4;
681 else
682 wch = repertoire_find_value (ctype->repertoire, seq->name,
683 strlen (seq->name));
685 if (wch != ILLEGAL_CHAR_VALUE)
686 /* We are only interested in the side-effects of the
687 `find_idx' call. It will add appropriate entries in
688 the name array if this is necessary. */
689 (void) find_idx (ctype, NULL, NULL, NULL, wch);
691 /* "Increment" the bytes sequence. */
692 inner = nbytes - 1;
693 while (inner >= 0 && bytes[inner] == 0xff)
694 --inner;
696 if (inner < 0)
698 /* We have to extend the byte sequence. */
699 if (nbytes >= charmap->width_rules[cnt].to->nbytes)
700 break;
702 bytes[0] = 1;
703 memset (&bytes[1], 0, nbytes);
704 ++nbytes;
706 else
708 ++bytes[inner];
709 while (++inner < nbytes)
710 bytes[inner] = 0;
715 /* Now set all the other characters of the character set to the
716 default width. */
717 curs = NULL;
718 while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
720 struct charseq *data = (struct charseq *) vdata;
722 if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
723 data->ucs4 = repertoire_find_value (ctype->repertoire,
724 data->name, len);
726 if (data->ucs4 != ILLEGAL_CHAR_VALUE)
727 (void) find_idx (ctype, NULL, NULL, NULL, data->ucs4);
730 /* There must be a multiple of 10 digits. */
731 if (ctype->mbdigits_act % 10 != 0)
733 assert (ctype->mbdigits_act == ctype->wcdigits_act);
734 ctype->wcdigits_act -= ctype->mbdigits_act % 10;
735 ctype->mbdigits_act -= ctype->mbdigits_act % 10;
736 WITH_CUR_LOCALE (error (0, 0, _("\
737 `digit' category has not entries in groups of ten")));
740 /* Check the input digits. There must be a multiple of ten available.
741 In each group it could be that one or the other character is missing.
742 In this case the whole group must be removed. */
743 cnt = 0;
744 while (cnt < ctype->mbdigits_act)
746 size_t inner;
747 for (inner = 0; inner < 10; ++inner)
748 if (ctype->mbdigits[cnt + inner] == NULL)
749 break;
751 if (inner == 10)
752 cnt += 10;
753 else
755 /* Remove the group. */
756 memmove (&ctype->mbdigits[cnt], &ctype->mbdigits[cnt + 10],
757 ((ctype->wcdigits_act - cnt - 10)
758 * sizeof (ctype->mbdigits[0])));
759 ctype->mbdigits_act -= 10;
763 /* If no input digits are given use the default. */
764 if (ctype->mbdigits_act == 0)
766 if (ctype->mbdigits_max == 0)
768 ctype->mbdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
769 10 * sizeof (struct charseq *));
770 ctype->mbdigits_max = 10;
773 for (cnt = 0; cnt < 10; ++cnt)
775 ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
776 (char *) digits + cnt, 1);
777 if (ctype->mbdigits[cnt] == NULL)
779 ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
780 longnames[cnt],
781 strlen (longnames[cnt]));
782 if (ctype->mbdigits[cnt] == NULL)
784 /* Hum, this ain't good. */
785 WITH_CUR_LOCALE (error (0, 0, _("\
786 no input digits defined and none of the standard names in the charmap")));
788 ctype->mbdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
789 sizeof (struct charseq) + 1);
791 /* This is better than nothing. */
792 ctype->mbdigits[cnt]->bytes[0] = digits[cnt];
793 ctype->mbdigits[cnt]->nbytes = 1;
798 ctype->mbdigits_act = 10;
801 /* Check the wide character input digits. There must be a multiple
802 of ten available. In each group it could be that one or the other
803 character is missing. In this case the whole group must be
804 removed. */
805 cnt = 0;
806 while (cnt < ctype->wcdigits_act)
808 size_t inner;
809 for (inner = 0; inner < 10; ++inner)
810 if (ctype->wcdigits[cnt + inner] == ILLEGAL_CHAR_VALUE)
811 break;
813 if (inner == 10)
814 cnt += 10;
815 else
817 /* Remove the group. */
818 memmove (&ctype->wcdigits[cnt], &ctype->wcdigits[cnt + 10],
819 ((ctype->wcdigits_act - cnt - 10)
820 * sizeof (ctype->wcdigits[0])));
821 ctype->wcdigits_act -= 10;
825 /* If no input digits are given use the default. */
826 if (ctype->wcdigits_act == 0)
828 if (ctype->wcdigits_max == 0)
830 ctype->wcdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
831 10 * sizeof (uint32_t));
832 ctype->wcdigits_max = 10;
835 for (cnt = 0; cnt < 10; ++cnt)
836 ctype->wcdigits[cnt] = L'0' + cnt;
838 ctype->mbdigits_act = 10;
841 /* Check the outdigits. */
842 warned = 0;
843 for (cnt = 0; cnt < 10; ++cnt)
844 if (ctype->mboutdigits[cnt] == NULL)
846 static struct charseq replace[2];
848 if (!warned)
850 WITH_CUR_LOCALE (error (0, 0, _("\
851 not all characters used in `outdigit' are available in the charmap")));
852 warned = 1;
855 replace[0].nbytes = 1;
856 replace[0].bytes[0] = '?';
857 replace[0].bytes[1] = '\0';
858 ctype->mboutdigits[cnt] = &replace[0];
861 warned = 0;
862 for (cnt = 0; cnt < 10; ++cnt)
863 if (ctype->wcoutdigits[cnt] == 0)
865 if (!warned)
867 WITH_CUR_LOCALE (error (0, 0, _("\
868 not all characters used in `outdigit' are available in the repertoire")));
869 warned = 1;
872 ctype->wcoutdigits[cnt] = L'?';
875 /* Sort the entries in the translit_ignore list. */
876 if (ctype->translit_ignore != NULL)
878 struct translit_ignore_t *firstp = ctype->translit_ignore;
879 struct translit_ignore_t *runp;
881 ctype->ntranslit_ignore = 1;
883 for (runp = firstp->next; runp != NULL; runp = runp->next)
885 struct translit_ignore_t *lastp = NULL;
886 struct translit_ignore_t *cmpp;
888 ++ctype->ntranslit_ignore;
890 for (cmpp = firstp; cmpp != NULL; lastp = cmpp, cmpp = cmpp->next)
891 if (runp->from < cmpp->from)
892 break;
894 runp->next = lastp;
895 if (lastp == NULL)
896 firstp = runp;
899 ctype->translit_ignore = firstp;
904 void
905 ctype_output (struct localedef_t *locale, const struct charmap_t *charmap,
906 const char *output_path)
908 static const char nulbytes[4] = { 0, 0, 0, 0 };
909 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
910 const size_t nelems = (_NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1)
911 + ctype->nr_charclass + ctype->map_collection_nr);
912 struct iovec *iov = alloca (sizeof *iov
913 * (2 + nelems + 2 * ctype->nr_charclass
914 + ctype->map_collection_nr + 4));
915 struct locale_file data;
916 uint32_t *idx = alloca (sizeof *idx * (nelems + 1));
917 uint32_t default_missing_len;
918 size_t elem, cnt, offset, total;
919 char *cp;
921 /* Now prepare the output: Find the sizes of the table we can use. */
922 allocate_arrays (ctype, charmap, ctype->repertoire);
924 data.magic = LIMAGIC (LC_CTYPE);
925 data.n = nelems;
926 iov[0].iov_base = (void *) &data;
927 iov[0].iov_len = sizeof (data);
929 iov[1].iov_base = (void *) idx;
930 iov[1].iov_len = nelems * sizeof (uint32_t);
932 idx[0] = iov[0].iov_len + iov[1].iov_len;
933 offset = 0;
935 for (elem = 0; elem < nelems; ++elem)
937 if (elem < _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1))
938 switch (elem)
940 #define CTYPE_EMPTY(name) \
941 case name: \
942 iov[2 + elem + offset].iov_base = NULL; \
943 iov[2 + elem + offset].iov_len = 0; \
944 idx[elem + 1] = idx[elem]; \
945 break
947 CTYPE_EMPTY(_NL_CTYPE_GAP1);
948 CTYPE_EMPTY(_NL_CTYPE_GAP2);
949 CTYPE_EMPTY(_NL_CTYPE_GAP3);
950 CTYPE_EMPTY(_NL_CTYPE_GAP4);
951 CTYPE_EMPTY(_NL_CTYPE_GAP5);
952 CTYPE_EMPTY(_NL_CTYPE_GAP6);
954 #define CTYPE_DATA(name, base, len) \
955 case _NL_ITEM_INDEX (name): \
956 iov[2 + elem + offset].iov_base = (base); \
957 iov[2 + elem + offset].iov_len = (len); \
958 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; \
959 break
961 CTYPE_DATA (_NL_CTYPE_CLASS,
962 ctype->ctype_b,
963 (256 + 128) * sizeof (char_class_t));
965 CTYPE_DATA (_NL_CTYPE_TOUPPER,
966 ctype->map_b[0],
967 (256 + 128) * sizeof (uint32_t));
968 CTYPE_DATA (_NL_CTYPE_TOLOWER,
969 ctype->map_b[1],
970 (256 + 128) * sizeof (uint32_t));
972 CTYPE_DATA (_NL_CTYPE_TOUPPER32,
973 ctype->map32_b[0],
974 256 * sizeof (uint32_t));
975 CTYPE_DATA (_NL_CTYPE_TOLOWER32,
976 ctype->map32_b[1],
977 256 * sizeof (uint32_t));
979 CTYPE_DATA (_NL_CTYPE_CLASS32,
980 ctype->ctype32_b,
981 256 * sizeof (char_class32_t));
983 CTYPE_DATA (_NL_CTYPE_CLASS_OFFSET,
984 &ctype->class_offset, sizeof (uint32_t));
986 CTYPE_DATA (_NL_CTYPE_MAP_OFFSET,
987 &ctype->map_offset, sizeof (uint32_t));
989 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TAB_SIZE,
990 &ctype->translit_idx_size, sizeof (uint32_t));
992 CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_IDX,
993 ctype->translit_from_idx,
994 ctype->translit_idx_size * sizeof (uint32_t));
996 CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_TBL,
997 ctype->translit_from_tbl,
998 ctype->translit_from_tbl_size);
1000 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_IDX,
1001 ctype->translit_to_idx,
1002 ctype->translit_idx_size * sizeof (uint32_t));
1004 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_TBL,
1005 ctype->translit_to_tbl, ctype->translit_to_tbl_size);
1007 case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
1008 /* The class name array. */
1009 total = 0;
1010 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt, ++offset)
1012 iov[2 + elem + offset].iov_base
1013 = (void *) ctype->classnames[cnt];
1014 iov[2 + elem + offset].iov_len
1015 = strlen (ctype->classnames[cnt]) + 1;
1016 total += iov[2 + elem + offset].iov_len;
1018 iov[2 + elem + offset].iov_base = (void *) nulbytes;
1019 iov[2 + elem + offset].iov_len = 4 - (total % 4);
1020 total += 4 - (total % 4);
1022 idx[elem + 1] = idx[elem] + total;
1023 break;
1025 case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
1026 /* The class name array. */
1027 total = 0;
1028 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt, ++offset)
1030 iov[2 + elem + offset].iov_base
1031 = (void *) ctype->mapnames[cnt];
1032 iov[2 + elem + offset].iov_len
1033 = strlen (ctype->mapnames[cnt]) + 1;
1034 total += iov[2 + elem + offset].iov_len;
1036 iov[2 + elem + offset].iov_base = (void *) nulbytes;
1037 iov[2 + elem + offset].iov_len = 4 - (total % 4);
1038 total += 4 - (total % 4);
1040 idx[elem + 1] = idx[elem] + total;
1041 break;
1043 CTYPE_DATA (_NL_CTYPE_WIDTH,
1044 ctype->width.iov_base,
1045 ctype->width.iov_len);
1047 CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX,
1048 &ctype->mb_cur_max, sizeof (uint32_t));
1050 case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
1051 total = strlen (ctype->codeset_name) + 1;
1052 if (total % 4 == 0)
1053 iov[2 + elem + offset].iov_base = (char *) ctype->codeset_name;
1054 else
1056 iov[2 + elem + offset].iov_base = alloca ((total + 3) & ~3);
1057 memset (mempcpy (iov[2 + elem + offset].iov_base,
1058 ctype->codeset_name, total),
1059 '\0', 4 - (total & 3));
1060 total = (total + 3) & ~3;
1062 iov[2 + elem + offset].iov_len = total;
1063 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1064 break;
1067 CTYPE_DATA (_NL_CTYPE_MAP_TO_NONASCII,
1068 &ctype->to_nonascii, sizeof (uint32_t));
1070 CTYPE_DATA (_NL_CTYPE_NONASCII_CASE,
1071 &ctype->nonascii_case, sizeof (uint32_t));
1073 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN):
1074 iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
1075 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1076 *(uint32_t *) iov[2 + elem + offset].iov_base =
1077 ctype->mbdigits_act / 10;
1078 idx[elem + 1] = idx[elem] + sizeof (uint32_t);
1079 break;
1081 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN):
1082 /* Align entries. */
1083 iov[2 + elem + offset].iov_base = (void *) nulbytes;
1084 iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1085 idx[elem] += iov[2 + elem + offset].iov_len;
1086 ++offset;
1088 iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
1089 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1090 *(uint32_t *) iov[2 + elem + offset].iov_base =
1091 ctype->wcdigits_act / 10;
1092 idx[elem + 1] = idx[elem] + sizeof (uint32_t);
1093 break;
1095 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB):
1096 /* Compute the length of all possible characters. For INDIGITS
1097 there might be more than one. We simply concatenate all of
1098 them with a NUL byte following. The NUL byte wouldn't be
1099 necessary but it makes it easier for the user. */
1100 total = 0;
1102 for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
1103 cnt < ctype->mbdigits_act; cnt += 10)
1104 total += ctype->mbdigits[cnt]->nbytes + 1;
1105 iov[2 + elem + offset].iov_base = (char *) alloca (total);
1106 iov[2 + elem + offset].iov_len = total;
1108 cp = iov[2 + elem + offset].iov_base;
1109 for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
1110 cnt < ctype->mbdigits_act; cnt += 10)
1112 cp = mempcpy (cp, ctype->mbdigits[cnt]->bytes,
1113 ctype->mbdigits[cnt]->nbytes);
1114 *cp++ = '\0';
1116 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1117 break;
1119 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB):
1120 /* Compute the length of all possible characters. For INDIGITS
1121 there might be more than one. We simply concatenate all of
1122 them with a NUL byte following. The NUL byte wouldn't be
1123 necessary but it makes it easier for the user. */
1124 cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB);
1125 total = ctype->mboutdigits[cnt]->nbytes + 1;
1126 iov[2 + elem + offset].iov_base = (char *) alloca (total);
1127 iov[2 + elem + offset].iov_len = total;
1129 *(char *) mempcpy (iov[2 + elem + offset].iov_base,
1130 ctype->mboutdigits[cnt]->bytes,
1131 ctype->mboutdigits[cnt]->nbytes) = '\0';
1132 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1133 break;
1135 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC):
1136 total = ctype->wcdigits_act / 10;
1138 iov[2 + elem + offset].iov_base =
1139 (uint32_t *) alloca (total * sizeof (uint32_t));
1140 iov[2 + elem + offset].iov_len = total * sizeof (uint32_t);
1142 for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC);
1143 cnt < ctype->wcdigits_act; cnt += 10)
1144 ((uint32_t *) iov[2 + elem + offset].iov_base)[cnt / 10]
1145 = ctype->wcdigits[cnt];
1146 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1147 break;
1149 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC):
1150 /* Align entries. */
1151 iov[2 + elem + offset].iov_base = (void *) nulbytes;
1152 iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1153 idx[elem] += iov[2 + elem + offset].iov_len;
1154 ++offset;
1155 /* FALLTRHOUGH */
1157 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT1_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC):
1158 cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC);
1159 iov[2 + elem + offset].iov_base = &ctype->wcoutdigits[cnt];
1160 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1161 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1162 break;
1164 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN):
1165 /* Align entries. */
1166 iov[2 + elem + offset].iov_base = (void *) nulbytes;
1167 iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1168 idx[elem] += iov[2 + elem + offset].iov_len;
1169 ++offset;
1171 default_missing_len = (ctype->default_missing
1172 ? wcslen ((wchar_t *)ctype->default_missing)
1173 : 0);
1174 iov[2 + elem + offset].iov_base = &default_missing_len;
1175 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1176 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1177 break;
1179 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING):
1180 iov[2 + elem + offset].iov_base =
1181 ctype->default_missing ?: (uint32_t *) L"";
1182 iov[2 + elem + offset].iov_len =
1183 wcslen (iov[2 + elem + offset].iov_base) * sizeof (uint32_t);
1184 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1185 break;
1187 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE_LEN):
1188 /* Align entries. */
1189 iov[2 + elem + offset].iov_base = (void *) nulbytes;
1190 iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1191 idx[elem] += iov[2 + elem + offset].iov_len;
1192 ++offset;
1194 iov[2 + elem + offset].iov_base = &ctype->ntranslit_ignore;
1195 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1196 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1197 break;
1199 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE):
1201 uint32_t *ranges = (uint32_t *) alloca (ctype->ntranslit_ignore
1202 * 3 * sizeof (uint32_t));
1203 struct translit_ignore_t *runp;
1205 iov[2 + elem + offset].iov_base = ranges;
1206 iov[2 + elem + offset].iov_len = (ctype->ntranslit_ignore
1207 * 3 * sizeof (uint32_t));
1209 for (runp = ctype->translit_ignore; runp != NULL;
1210 runp = runp->next)
1212 *ranges++ = runp->from;
1213 *ranges++ = runp->to;
1214 *ranges++ = runp->step;
1217 /* Remove the following line in case a new entry is added
1218 after _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN. */
1219 if (elem < nelems)
1220 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1221 break;
1223 default:
1224 assert (! "unknown CTYPE element");
1226 else
1228 /* Handle extra maps. */
1229 size_t nr = elem - _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
1230 if (nr < ctype->nr_charclass)
1232 iov[2 + elem + offset].iov_base = ctype->class_b[nr];
1233 iov[2 + elem + offset].iov_len = 256 / 32 * sizeof (uint32_t);
1234 idx[elem] += iov[2 + elem + offset].iov_len;
1235 ++offset;
1237 iov[2 + elem + offset] = ctype->class_3level[nr];
1239 else
1241 nr -= ctype->nr_charclass;
1242 assert (nr < ctype->map_collection_nr);
1243 iov[2 + elem + offset] = ctype->map_3level[nr];
1245 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1249 assert (2 + elem + offset == (nelems + 2 * ctype->nr_charclass
1250 + ctype->map_collection_nr + 4 + 2));
1252 write_locale_data (output_path, LC_CTYPE, "LC_CTYPE", 2 + elem + offset,
1253 iov);
1257 /* Local functions. */
1258 static void
1259 ctype_class_new (struct linereader *lr, struct locale_ctype_t *ctype,
1260 const char *name)
1262 size_t cnt;
1264 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
1265 if (strcmp (ctype->classnames[cnt], name) == 0)
1266 break;
1268 if (cnt < ctype->nr_charclass)
1270 lr_error (lr, _("character class `%s' already defined"), name);
1271 return;
1274 if (ctype->nr_charclass == MAX_NR_CHARCLASS)
1275 /* Exit code 2 is prescribed in P1003.2b. */
1276 WITH_CUR_LOCALE (error (2, 0, _("\
1277 implementation limit: no more than %Zd character classes allowed"),
1278 MAX_NR_CHARCLASS));
1280 ctype->classnames[ctype->nr_charclass++] = name;
1284 static void
1285 ctype_map_new (struct linereader *lr, struct locale_ctype_t *ctype,
1286 const char *name, const struct charmap_t *charmap)
1288 size_t max_chars = 0;
1289 size_t cnt;
1291 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
1293 if (strcmp (ctype->mapnames[cnt], name) == 0)
1294 break;
1296 if (max_chars < ctype->map_collection_max[cnt])
1297 max_chars = ctype->map_collection_max[cnt];
1300 if (cnt < ctype->map_collection_nr)
1302 lr_error (lr, _("character map `%s' already defined"), name);
1303 return;
1306 if (ctype->map_collection_nr == MAX_NR_CHARMAP)
1307 /* Exit code 2 is prescribed in P1003.2b. */
1308 WITH_CUR_LOCALE (error (2, 0, _("\
1309 implementation limit: no more than %d character maps allowed"),
1310 MAX_NR_CHARMAP));
1312 ctype->mapnames[cnt] = name;
1314 if (max_chars == 0)
1315 ctype->map_collection_max[cnt] = charmap->mb_cur_max == 1 ? 256 : 512;
1316 else
1317 ctype->map_collection_max[cnt] = max_chars;
1319 ctype->map_collection[cnt] = (uint32_t *)
1320 xcalloc (sizeof (uint32_t), ctype->map_collection_max[cnt]);
1321 ctype->map_collection_act[cnt] = 256;
1323 ++ctype->map_collection_nr;
1327 /* We have to be prepared that TABLE, MAX, and ACT can be NULL. This
1328 is possible if we only want to extend the name array. */
1329 static uint32_t *
1330 find_idx (struct locale_ctype_t *ctype, uint32_t **table, size_t *max,
1331 size_t *act, uint32_t idx)
1333 size_t cnt;
1335 if (idx < 256)
1336 return table == NULL ? NULL : &(*table)[idx];
1338 /* Use the charnames_idx lookup table instead of the slow search loop. */
1339 #if 1
1340 cnt = idx_table_get (&ctype->charnames_idx, idx);
1341 if (cnt == EMPTY)
1342 /* Not found. */
1343 cnt = ctype->charnames_act;
1344 #else
1345 for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
1346 if (ctype->charnames[cnt] == idx)
1347 break;
1348 #endif
1350 /* We have to distinguish two cases: the name is found or not. */
1351 if (cnt == ctype->charnames_act)
1353 /* Extend the name array. */
1354 if (ctype->charnames_act == ctype->charnames_max)
1356 ctype->charnames_max *= 2;
1357 ctype->charnames = (uint32_t *)
1358 xrealloc (ctype->charnames,
1359 sizeof (uint32_t) * ctype->charnames_max);
1361 ctype->charnames[ctype->charnames_act++] = idx;
1362 idx_table_add (&ctype->charnames_idx, idx, cnt);
1365 if (table == NULL)
1366 /* We have done everything we are asked to do. */
1367 return NULL;
1369 if (max == NULL)
1370 /* The caller does not want to extend the table. */
1371 return (cnt >= *act ? NULL : &(*table)[cnt]);
1373 if (cnt >= *act)
1375 if (cnt >= *max)
1377 size_t old_max = *max;
1379 *max *= 2;
1380 while (*max <= cnt);
1382 *table =
1383 (uint32_t *) xrealloc (*table, *max * sizeof (uint32_t));
1384 memset (&(*table)[old_max], '\0',
1385 (*max - old_max) * sizeof (uint32_t));
1388 *act = cnt + 1;
1391 return &(*table)[cnt];
1395 static int
1396 get_character (struct token *now, const struct charmap_t *charmap,
1397 struct repertoire_t *repertoire,
1398 struct charseq **seqp, uint32_t *wchp)
1400 if (now->tok == tok_bsymbol)
1402 /* This will hopefully be the normal case. */
1403 *wchp = repertoire_find_value (repertoire, now->val.str.startmb,
1404 now->val.str.lenmb);
1405 *seqp = charmap_find_value (charmap, now->val.str.startmb,
1406 now->val.str.lenmb);
1408 else if (now->tok == tok_ucs4)
1410 char utmp[10];
1412 snprintf (utmp, sizeof (utmp), "U%08X", now->val.ucs4);
1413 *seqp = charmap_find_value (charmap, utmp, 9);
1415 if (*seqp == NULL)
1416 *seqp = repertoire_find_seq (repertoire, now->val.ucs4);
1418 if (*seqp == NULL)
1420 /* Compute the value in the charmap from the UCS value. */
1421 const char *symbol = repertoire_find_symbol (repertoire,
1422 now->val.ucs4);
1424 if (symbol == NULL)
1425 *seqp = NULL;
1426 else
1427 *seqp = charmap_find_value (charmap, symbol, strlen (symbol));
1429 if (*seqp == NULL)
1431 if (repertoire != NULL)
1433 /* Insert a negative entry. */
1434 static const struct charseq negative
1435 = { .ucs4 = ILLEGAL_CHAR_VALUE };
1436 uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1437 sizeof (uint32_t));
1438 *newp = now->val.ucs4;
1440 insert_entry (&repertoire->seq_table, newp,
1441 sizeof (uint32_t), (void *) &negative);
1444 else
1445 (*seqp)->ucs4 = now->val.ucs4;
1447 else if ((*seqp)->ucs4 != now->val.ucs4)
1448 *seqp = NULL;
1450 *wchp = now->val.ucs4;
1452 else if (now->tok == tok_charcode)
1454 /* We must map from the byte code to UCS4. */
1455 *seqp = charmap_find_symbol (charmap, now->val.str.startmb,
1456 now->val.str.lenmb);
1458 if (*seqp == NULL)
1459 *wchp = ILLEGAL_CHAR_VALUE;
1460 else
1462 if ((*seqp)->ucs4 == UNINITIALIZED_CHAR_VALUE)
1463 (*seqp)->ucs4 = repertoire_find_value (repertoire, (*seqp)->name,
1464 strlen ((*seqp)->name));
1465 *wchp = (*seqp)->ucs4;
1468 else
1469 return 1;
1471 return 0;
1475 /* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and
1476 the .(2). counterparts. */
1477 static void
1478 charclass_symbolic_ellipsis (struct linereader *ldfile,
1479 struct locale_ctype_t *ctype,
1480 const struct charmap_t *charmap,
1481 struct repertoire_t *repertoire,
1482 struct token *now,
1483 const char *last_str,
1484 unsigned long int class256_bit,
1485 unsigned long int class_bit, int base,
1486 int ignore_content, int handle_digits, int step)
1488 const char *nowstr = now->val.str.startmb;
1489 char tmp[now->val.str.lenmb + 1];
1490 const char *cp;
1491 char *endp;
1492 unsigned long int from;
1493 unsigned long int to;
1495 /* We have to compute the ellipsis values using the symbolic names. */
1496 assert (last_str != NULL);
1498 if (strlen (last_str) != now->val.str.lenmb)
1500 invalid_range:
1501 lr_error (ldfile,
1502 _("`%s' and `%.*s' are not valid names for symbolic range"),
1503 last_str, (int) now->val.str.lenmb, nowstr);
1504 return;
1507 if (memcmp (last_str, nowstr, now->val.str.lenmb) == 0)
1508 /* Nothing to do, the names are the same. */
1509 return;
1511 for (cp = last_str; *cp == *(nowstr + (cp - last_str)); ++cp)
1514 errno = 0;
1515 from = strtoul (cp, &endp, base);
1516 if ((from == UINT_MAX && errno == ERANGE) || *endp != '\0')
1517 goto invalid_range;
1519 to = strtoul (nowstr + (cp - last_str), &endp, base);
1520 if ((to == UINT_MAX && errno == ERANGE)
1521 || (endp - nowstr) != now->val.str.lenmb || from >= to)
1522 goto invalid_range;
1524 /* OK, we have a range FROM - TO. Now we can create the symbolic names. */
1525 if (!ignore_content)
1527 now->val.str.startmb = tmp;
1528 while ((from += step) <= to)
1530 struct charseq *seq;
1531 uint32_t wch;
1533 sprintf (tmp, (base == 10 ? "%.*s%0*ld" : "%.*s%0*lX"),
1534 (int) (cp - last_str), last_str,
1535 (int) (now->val.str.lenmb - (cp - last_str)),
1536 from);
1538 get_character (now, charmap, repertoire, &seq, &wch);
1540 if (seq != NULL && seq->nbytes == 1)
1541 /* Yep, we can store information about this byte sequence. */
1542 ctype->class256_collection[seq->bytes[0]] |= class256_bit;
1544 if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1545 /* We have the UCS4 position. */
1546 *find_idx (ctype, &ctype->class_collection,
1547 &ctype->class_collection_max,
1548 &ctype->class_collection_act, wch) |= class_bit;
1550 if (handle_digits == 1)
1552 /* We must store the digit values. */
1553 if (ctype->mbdigits_act == ctype->mbdigits_max)
1555 ctype->mbdigits_max *= 2;
1556 ctype->mbdigits = xrealloc (ctype->mbdigits,
1557 (ctype->mbdigits_max
1558 * sizeof (char *)));
1559 ctype->wcdigits_max *= 2;
1560 ctype->wcdigits = xrealloc (ctype->wcdigits,
1561 (ctype->wcdigits_max
1562 * sizeof (uint32_t)));
1565 ctype->mbdigits[ctype->mbdigits_act++] = seq;
1566 ctype->wcdigits[ctype->wcdigits_act++] = wch;
1568 else if (handle_digits == 2)
1570 /* We must store the digit values. */
1571 if (ctype->outdigits_act >= 10)
1573 lr_error (ldfile, _("\
1574 %s: field `%s' does not contain exactly ten entries"),
1575 "LC_CTYPE", "outdigit");
1576 return;
1579 ctype->mboutdigits[ctype->outdigits_act] = seq;
1580 ctype->wcoutdigits[ctype->outdigits_act] = wch;
1581 ++ctype->outdigits_act;
1588 /* Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'. */
1589 static void
1590 charclass_ucs4_ellipsis (struct linereader *ldfile,
1591 struct locale_ctype_t *ctype,
1592 const struct charmap_t *charmap,
1593 struct repertoire_t *repertoire,
1594 struct token *now, uint32_t last_wch,
1595 unsigned long int class256_bit,
1596 unsigned long int class_bit, int ignore_content,
1597 int handle_digits, int step)
1599 if (last_wch > now->val.ucs4)
1601 lr_error (ldfile, _("\
1602 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1603 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, now->val.ucs4,
1604 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, last_wch);
1605 return;
1608 if (!ignore_content)
1609 while ((last_wch += step) <= now->val.ucs4)
1611 /* We have to find out whether there is a byte sequence corresponding
1612 to this UCS4 value. */
1613 struct charseq *seq;
1614 char utmp[10];
1616 snprintf (utmp, sizeof (utmp), "U%08X", last_wch);
1617 seq = charmap_find_value (charmap, utmp, 9);
1618 if (seq == NULL)
1620 snprintf (utmp, sizeof (utmp), "U%04X", last_wch);
1621 seq = charmap_find_value (charmap, utmp, 5);
1624 if (seq == NULL)
1625 /* Try looking in the repertoire map. */
1626 seq = repertoire_find_seq (repertoire, last_wch);
1628 /* If this is the first time we look for this sequence create a new
1629 entry. */
1630 if (seq == NULL)
1632 static const struct charseq negative
1633 = { .ucs4 = ILLEGAL_CHAR_VALUE };
1635 /* Find the symbolic name for this UCS4 value. */
1636 if (repertoire != NULL)
1638 const char *symbol = repertoire_find_symbol (repertoire,
1639 last_wch);
1640 uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1641 sizeof (uint32_t));
1642 *newp = last_wch;
1644 if (symbol != NULL)
1645 /* We have a name, now search the multibyte value. */
1646 seq = charmap_find_value (charmap, symbol, strlen (symbol));
1648 if (seq == NULL)
1649 /* We have to create a fake entry. */
1650 seq = (struct charseq *) &negative;
1651 else
1652 seq->ucs4 = last_wch;
1654 insert_entry (&repertoire->seq_table, newp, sizeof (uint32_t),
1655 seq);
1657 else
1658 /* We have to create a fake entry. */
1659 seq = (struct charseq *) &negative;
1662 /* We have a name, now search the multibyte value. */
1663 if (seq->ucs4 == last_wch && seq->nbytes == 1)
1664 /* Yep, we can store information about this byte sequence. */
1665 ctype->class256_collection[(size_t) seq->bytes[0]]
1666 |= class256_bit;
1668 /* And of course we have the UCS4 position. */
1669 if (class_bit != 0)
1670 *find_idx (ctype, &ctype->class_collection,
1671 &ctype->class_collection_max,
1672 &ctype->class_collection_act, last_wch) |= class_bit;
1674 if (handle_digits == 1)
1676 /* We must store the digit values. */
1677 if (ctype->mbdigits_act == ctype->mbdigits_max)
1679 ctype->mbdigits_max *= 2;
1680 ctype->mbdigits = xrealloc (ctype->mbdigits,
1681 (ctype->mbdigits_max
1682 * sizeof (char *)));
1683 ctype->wcdigits_max *= 2;
1684 ctype->wcdigits = xrealloc (ctype->wcdigits,
1685 (ctype->wcdigits_max
1686 * sizeof (uint32_t)));
1689 ctype->mbdigits[ctype->mbdigits_act++] = (seq->ucs4 == last_wch
1690 ? seq : NULL);
1691 ctype->wcdigits[ctype->wcdigits_act++] = last_wch;
1693 else if (handle_digits == 2)
1695 /* We must store the digit values. */
1696 if (ctype->outdigits_act >= 10)
1698 lr_error (ldfile, _("\
1699 %s: field `%s' does not contain exactly ten entries"),
1700 "LC_CTYPE", "outdigit");
1701 return;
1704 ctype->mboutdigits[ctype->outdigits_act] = (seq->ucs4 == last_wch
1705 ? seq : NULL);
1706 ctype->wcoutdigits[ctype->outdigits_act] = last_wch;
1707 ++ctype->outdigits_act;
1713 /* Ellipsis as in `/xea/x12.../xea/x34'. */
1714 static void
1715 charclass_charcode_ellipsis (struct linereader *ldfile,
1716 struct locale_ctype_t *ctype,
1717 const struct charmap_t *charmap,
1718 struct repertoire_t *repertoire,
1719 struct token *now, char *last_charcode,
1720 uint32_t last_charcode_len,
1721 unsigned long int class256_bit,
1722 unsigned long int class_bit, int ignore_content,
1723 int handle_digits)
1725 /* First check whether the to-value is larger. */
1726 if (now->val.charcode.nbytes != last_charcode_len)
1728 lr_error (ldfile, _("\
1729 start and end character sequence of range must have the same length"));
1730 return;
1733 if (memcmp (last_charcode, now->val.charcode.bytes, last_charcode_len) > 0)
1735 lr_error (ldfile, _("\
1736 to-value character sequence is smaller than from-value sequence"));
1737 return;
1740 if (!ignore_content)
1744 /* Increment the byte sequence value. */
1745 struct charseq *seq;
1746 uint32_t wch;
1747 int i;
1749 for (i = last_charcode_len - 1; i >= 0; --i)
1750 if (++last_charcode[i] != 0)
1751 break;
1753 if (last_charcode_len == 1)
1754 /* Of course we have the charcode value. */
1755 ctype->class256_collection[(size_t) last_charcode[0]]
1756 |= class256_bit;
1758 /* Find the symbolic name. */
1759 seq = charmap_find_symbol (charmap, last_charcode,
1760 last_charcode_len);
1761 if (seq != NULL)
1763 if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1764 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1765 strlen (seq->name));
1766 wch = seq == NULL ? ILLEGAL_CHAR_VALUE : seq->ucs4;
1768 if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1769 *find_idx (ctype, &ctype->class_collection,
1770 &ctype->class_collection_max,
1771 &ctype->class_collection_act, wch) |= class_bit;
1773 else
1774 wch = ILLEGAL_CHAR_VALUE;
1776 if (handle_digits == 1)
1778 /* We must store the digit values. */
1779 if (ctype->mbdigits_act == ctype->mbdigits_max)
1781 ctype->mbdigits_max *= 2;
1782 ctype->mbdigits = xrealloc (ctype->mbdigits,
1783 (ctype->mbdigits_max
1784 * sizeof (char *)));
1785 ctype->wcdigits_max *= 2;
1786 ctype->wcdigits = xrealloc (ctype->wcdigits,
1787 (ctype->wcdigits_max
1788 * sizeof (uint32_t)));
1791 seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1792 memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1793 seq->nbytes = last_charcode_len;
1795 ctype->mbdigits[ctype->mbdigits_act++] = seq;
1796 ctype->wcdigits[ctype->wcdigits_act++] = wch;
1798 else if (handle_digits == 2)
1800 struct charseq *seq;
1801 /* We must store the digit values. */
1802 if (ctype->outdigits_act >= 10)
1804 lr_error (ldfile, _("\
1805 %s: field `%s' does not contain exactly ten entries"),
1806 "LC_CTYPE", "outdigit");
1807 return;
1810 seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1811 memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1812 seq->nbytes = last_charcode_len;
1814 ctype->mboutdigits[ctype->outdigits_act] = seq;
1815 ctype->wcoutdigits[ctype->outdigits_act] = wch;
1816 ++ctype->outdigits_act;
1819 while (memcmp (last_charcode, now->val.charcode.bytes,
1820 last_charcode_len) != 0);
1825 static uint32_t *
1826 find_translit2 (struct locale_ctype_t *ctype, const struct charmap_t *charmap,
1827 uint32_t wch)
1829 struct translit_t *trunp = ctype->translit;
1830 struct translit_ignore_t *tirunp = ctype->translit_ignore;
1832 while (trunp != NULL)
1834 /* XXX We simplify things here. The transliterations we look
1835 for are only allowed to have one character. */
1836 if (trunp->from[0] == wch && trunp->from[1] == 0)
1838 /* Found it. Now look for a transliteration which can be
1839 represented with the character set. */
1840 struct translit_to_t *torunp = trunp->to;
1842 while (torunp != NULL)
1844 int i;
1846 for (i = 0; torunp->str[i] != 0; ++i)
1848 char utmp[10];
1850 snprintf (utmp, sizeof (utmp), "U%08X", torunp->str[i]);
1851 if (charmap_find_value (charmap, utmp, 9) == NULL)
1852 /* This character cannot be represented. */
1853 break;
1856 if (torunp->str[i] == 0)
1857 return torunp->str;
1859 torunp = torunp->next;
1862 break;
1865 trunp = trunp->next;
1868 /* Check for ignored chars. */
1869 while (tirunp != NULL)
1871 if (tirunp->from <= wch && tirunp->to >= wch)
1873 uint32_t wi;
1875 for (wi = tirunp->from; wi <= wch; wi += tirunp->step)
1876 if (wi == wch)
1877 return (uint32_t []) { 0 };
1881 /* Nothing found. */
1882 return NULL;
1886 uint32_t *
1887 find_translit (struct localedef_t *locale, const struct charmap_t *charmap,
1888 uint32_t wch)
1890 struct locale_ctype_t *ctype;
1891 uint32_t *result = NULL;
1893 assert (locale != NULL);
1894 ctype = locale->categories[LC_CTYPE].ctype;
1896 if (ctype == NULL)
1897 return NULL;
1899 if (ctype->translit != NULL)
1900 result = find_translit2 (ctype, charmap, wch);
1902 if (result == NULL)
1904 struct translit_include_t *irunp = ctype->translit_include;
1906 while (irunp != NULL && result == NULL)
1908 result = find_translit (find_locale (CTYPE_LOCALE,
1909 irunp->copy_locale,
1910 irunp->copy_repertoire,
1911 charmap),
1912 charmap, wch);
1913 irunp = irunp->next;
1917 return result;
1921 /* Read one transliteration entry. */
1922 static uint32_t *
1923 read_widestring (struct linereader *ldfile, struct token *now,
1924 const struct charmap_t *charmap,
1925 struct repertoire_t *repertoire)
1927 uint32_t *wstr;
1929 if (now->tok == tok_default_missing)
1930 /* The special name "" will denote this case. */
1931 wstr = ((uint32_t *) { 0 });
1932 else if (now->tok == tok_bsymbol)
1934 /* Get the value from the repertoire. */
1935 wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1936 wstr[0] = repertoire_find_value (repertoire, now->val.str.startmb,
1937 now->val.str.lenmb);
1938 if (wstr[0] == ILLEGAL_CHAR_VALUE)
1940 /* We cannot proceed, we don't know the UCS4 value. */
1941 free (wstr);
1942 return NULL;
1945 wstr[1] = 0;
1947 else if (now->tok == tok_ucs4)
1949 wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1950 wstr[0] = now->val.ucs4;
1951 wstr[1] = 0;
1953 else if (now->tok == tok_charcode)
1955 /* Argh, we have to convert to the symbol name first and then to the
1956 UCS4 value. */
1957 struct charseq *seq = charmap_find_symbol (charmap,
1958 now->val.str.startmb,
1959 now->val.str.lenmb);
1960 if (seq == NULL)
1961 /* Cannot find the UCS4 value. */
1962 return NULL;
1964 if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1965 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1966 strlen (seq->name));
1967 if (seq->ucs4 == ILLEGAL_CHAR_VALUE)
1968 /* We cannot proceed, we don't know the UCS4 value. */
1969 return NULL;
1971 wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1972 wstr[0] = seq->ucs4;
1973 wstr[1] = 0;
1975 else if (now->tok == tok_string)
1977 wstr = now->val.str.startwc;
1978 if (wstr == NULL || wstr[0] == 0)
1979 return NULL;
1981 else
1983 if (now->tok != tok_eol && now->tok != tok_eof)
1984 lr_ignore_rest (ldfile, 0);
1985 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
1986 return (uint32_t *) -1l;
1989 return wstr;
1993 static void
1994 read_translit_entry (struct linereader *ldfile, struct locale_ctype_t *ctype,
1995 struct token *now, const struct charmap_t *charmap,
1996 struct repertoire_t *repertoire)
1998 uint32_t *from_wstr = read_widestring (ldfile, now, charmap, repertoire);
1999 struct translit_t *result;
2000 struct translit_to_t **top;
2001 struct obstack *ob = &ctype->mempool;
2002 int first;
2003 int ignore;
2005 if (from_wstr == NULL)
2006 /* There is no valid from string. */
2007 return;
2009 result = (struct translit_t *) obstack_alloc (ob,
2010 sizeof (struct translit_t));
2011 result->from = from_wstr;
2012 result->fname = ldfile->fname;
2013 result->lineno = ldfile->lineno;
2014 result->next = NULL;
2015 result->to = NULL;
2016 top = &result->to;
2017 first = 1;
2018 ignore = 0;
2020 while (1)
2022 uint32_t *to_wstr;
2024 /* Next we have one or more transliterations. They are
2025 separated by semicolons. */
2026 now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2028 if (!first && (now->tok == tok_semicolon || now->tok == tok_eol))
2030 /* One string read. */
2031 const uint32_t zero = 0;
2033 if (!ignore)
2035 obstack_grow (ob, &zero, 4);
2036 to_wstr = obstack_finish (ob);
2038 *top = obstack_alloc (ob, sizeof (struct translit_to_t));
2039 (*top)->str = to_wstr;
2040 (*top)->next = NULL;
2043 if (now->tok == tok_eol)
2045 result->next = ctype->translit;
2046 ctype->translit = result;
2047 return;
2050 if (!ignore)
2051 top = &(*top)->next;
2052 ignore = 0;
2054 else
2056 to_wstr = read_widestring (ldfile, now, charmap, repertoire);
2057 if (to_wstr == (uint32_t *) -1l)
2059 /* An error occurred. */
2060 obstack_free (ob, result);
2061 return;
2064 if (to_wstr == NULL)
2065 ignore = 1;
2066 else
2067 /* This value is usable. */
2068 obstack_grow (ob, to_wstr, wcslen ((wchar_t *) to_wstr) * 4);
2070 first = 0;
2076 static void
2077 read_translit_ignore_entry (struct linereader *ldfile,
2078 struct locale_ctype_t *ctype,
2079 const struct charmap_t *charmap,
2080 struct repertoire_t *repertoire)
2082 /* We expect a semicolon-separated list of characters we ignore. We are
2083 only interested in the wide character definitions. These must be
2084 single characters, possibly defining a range when an ellipsis is used. */
2085 while (1)
2087 struct token *now = lr_token (ldfile, charmap, NULL, repertoire,
2088 verbose);
2089 struct translit_ignore_t *newp;
2090 uint32_t from;
2092 if (now->tok == tok_eol || now->tok == tok_eof)
2094 lr_error (ldfile,
2095 _("premature end of `translit_ignore' definition"));
2096 return;
2099 if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
2101 lr_error (ldfile, _("syntax error"));
2102 lr_ignore_rest (ldfile, 0);
2103 return;
2106 if (now->tok == tok_ucs4)
2107 from = now->val.ucs4;
2108 else
2109 /* Try to get the value. */
2110 from = repertoire_find_value (repertoire, now->val.str.startmb,
2111 now->val.str.lenmb);
2113 if (from == ILLEGAL_CHAR_VALUE)
2115 lr_error (ldfile, "invalid character name");
2116 newp = NULL;
2118 else
2120 newp = (struct translit_ignore_t *)
2121 obstack_alloc (&ctype->mempool, sizeof (struct translit_ignore_t));
2122 newp->from = from;
2123 newp->to = from;
2124 newp->step = 1;
2126 newp->next = ctype->translit_ignore;
2127 ctype->translit_ignore = newp;
2130 /* Now we expect either a semicolon, an ellipsis, or the end of the
2131 line. */
2132 now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2134 if (now->tok == tok_ellipsis2 || now->tok == tok_ellipsis2_2)
2136 /* XXX Should we bother implementing `....'? `...' certainly
2137 will not be implemented. */
2138 uint32_t to;
2139 int step = now->tok == tok_ellipsis2_2 ? 2 : 1;
2141 now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2143 if (now->tok == tok_eol || now->tok == tok_eof)
2145 lr_error (ldfile,
2146 _("premature end of `translit_ignore' definition"));
2147 return;
2150 if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
2152 lr_error (ldfile, _("syntax error"));
2153 lr_ignore_rest (ldfile, 0);
2154 return;
2157 if (now->tok == tok_ucs4)
2158 to = now->val.ucs4;
2159 else
2160 /* Try to get the value. */
2161 to = repertoire_find_value (repertoire, now->val.str.startmb,
2162 now->val.str.lenmb);
2164 if (to == ILLEGAL_CHAR_VALUE)
2165 lr_error (ldfile, "invalid character name");
2166 else
2168 /* Make sure the `to'-value is larger. */
2169 if (to >= from)
2171 newp->to = to;
2172 newp->step = step;
2174 else
2175 lr_error (ldfile, _("\
2176 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
2177 (to | from) < 65536 ? 4 : 8, to,
2178 (to | from) < 65536 ? 4 : 8, from);
2181 /* And the next token. */
2182 now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2185 if (now->tok == tok_eol || now->tok == tok_eof)
2186 /* We are done. */
2187 return;
2189 if (now->tok == tok_semicolon)
2190 /* Next round. */
2191 continue;
2193 /* If we come here something is wrong. */
2194 lr_error (ldfile, _("syntax error"));
2195 lr_ignore_rest (ldfile, 0);
2196 return;
2201 /* The parser for the LC_CTYPE section of the locale definition. */
2202 void
2203 ctype_read (struct linereader *ldfile, struct localedef_t *result,
2204 const struct charmap_t *charmap, const char *repertoire_name,
2205 int ignore_content)
2207 struct repertoire_t *repertoire = NULL;
2208 struct locale_ctype_t *ctype;
2209 struct token *now;
2210 enum token_t nowtok;
2211 size_t cnt;
2212 struct charseq *last_seq;
2213 uint32_t last_wch = 0;
2214 enum token_t last_token;
2215 enum token_t ellipsis_token;
2216 int step;
2217 char last_charcode[16];
2218 size_t last_charcode_len = 0;
2219 const char *last_str = NULL;
2220 int mapidx;
2221 struct localedef_t *copy_locale = NULL;
2223 /* Get the repertoire we have to use. */
2224 if (repertoire_name != NULL)
2225 repertoire = repertoire_read (repertoire_name);
2227 /* The rest of the line containing `LC_CTYPE' must be free. */
2228 lr_ignore_rest (ldfile, 1);
2233 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2234 nowtok = now->tok;
2236 while (nowtok == tok_eol);
2238 /* If we see `copy' now we are almost done. */
2239 if (nowtok == tok_copy)
2241 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2242 if (now->tok != tok_string)
2244 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2246 skip_category:
2248 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2249 while (now->tok != tok_eof && now->tok != tok_end);
2251 if (now->tok != tok_eof
2252 || (now = lr_token (ldfile, charmap, NULL, NULL, verbose),
2253 now->tok == tok_eof))
2254 lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
2255 else if (now->tok != tok_lc_ctype)
2257 lr_error (ldfile, _("\
2258 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2259 lr_ignore_rest (ldfile, 0);
2261 else
2262 lr_ignore_rest (ldfile, 1);
2264 return;
2267 if (! ignore_content)
2269 /* Get the locale definition. */
2270 copy_locale = load_locale (LC_CTYPE, now->val.str.startmb,
2271 repertoire_name, charmap, NULL);
2272 if ((copy_locale->avail & CTYPE_LOCALE) == 0)
2274 /* Not yet loaded. So do it now. */
2275 if (locfile_read (copy_locale, charmap) != 0)
2276 goto skip_category;
2279 if (copy_locale->categories[LC_CTYPE].ctype == NULL)
2280 return;
2283 lr_ignore_rest (ldfile, 1);
2285 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2286 nowtok = now->tok;
2289 /* Prepare the data structures. */
2290 ctype_startup (ldfile, result, charmap, copy_locale, ignore_content);
2291 ctype = result->categories[LC_CTYPE].ctype;
2293 /* Remember the repertoire we use. */
2294 if (!ignore_content)
2295 ctype->repertoire = repertoire;
2297 while (1)
2299 unsigned long int class_bit = 0;
2300 unsigned long int class256_bit = 0;
2301 int handle_digits = 0;
2303 /* Of course we don't proceed beyond the end of file. */
2304 if (nowtok == tok_eof)
2305 break;
2307 /* Ingore empty lines. */
2308 if (nowtok == tok_eol)
2310 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2311 nowtok = now->tok;
2312 continue;
2315 switch (nowtok)
2317 case tok_charclass:
2318 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2319 while (now->tok == tok_ident || now->tok == tok_string)
2321 ctype_class_new (ldfile, ctype, now->val.str.startmb);
2322 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2323 if (now->tok != tok_semicolon)
2324 break;
2325 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2327 if (now->tok != tok_eol)
2328 SYNTAX_ERROR (_("\
2329 %s: syntax error in definition of new character class"), "LC_CTYPE");
2330 break;
2332 case tok_charconv:
2333 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2334 while (now->tok == tok_ident || now->tok == tok_string)
2336 ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2337 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2338 if (now->tok != tok_semicolon)
2339 break;
2340 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2342 if (now->tok != tok_eol)
2343 SYNTAX_ERROR (_("\
2344 %s: syntax error in definition of new character map"), "LC_CTYPE");
2345 break;
2347 case tok_class:
2348 /* Ignore the rest of the line if we don't need the input of
2349 this line. */
2350 if (ignore_content)
2352 lr_ignore_rest (ldfile, 0);
2353 break;
2356 /* We simply forget the `class' keyword and use the following
2357 operand to determine the bit. */
2358 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2359 if (now->tok == tok_ident || now->tok == tok_string)
2361 /* Must can be one of the predefined class names. */
2362 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2363 if (strcmp (ctype->classnames[cnt], now->val.str.startmb) == 0)
2364 break;
2365 if (cnt >= ctype->nr_charclass)
2367 #ifdef PREDEFINED_CLASSES
2368 if (now->val.str.lenmb == 8
2369 && memcmp ("special1", now->val.str.startmb, 8) == 0)
2370 class_bit = _ISwspecial1;
2371 else if (now->val.str.lenmb == 8
2372 && memcmp ("special2", now->val.str.startmb, 8) == 0)
2373 class_bit = _ISwspecial2;
2374 else if (now->val.str.lenmb == 8
2375 && memcmp ("special3", now->val.str.startmb, 8) == 0)
2376 class_bit = _ISwspecial3;
2377 else
2378 #endif
2380 /* OK, it's a new class. */
2381 ctype_class_new (ldfile, ctype, now->val.str.startmb);
2383 class_bit = _ISwbit (ctype->nr_charclass - 1);
2386 else
2388 class_bit = _ISwbit (cnt);
2390 free (now->val.str.startmb);
2393 else if (now->tok == tok_digit)
2394 goto handle_tok_digit;
2395 else if (now->tok < tok_upper || now->tok > tok_blank)
2396 goto err_label;
2397 else
2399 class_bit = BITw (now->tok);
2400 class256_bit = BIT (now->tok);
2403 /* The next character must be a semicolon. */
2404 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2405 if (now->tok != tok_semicolon)
2406 goto err_label;
2407 goto read_charclass;
2409 case tok_upper:
2410 case tok_lower:
2411 case tok_alpha:
2412 case tok_alnum:
2413 case tok_space:
2414 case tok_cntrl:
2415 case tok_punct:
2416 case tok_graph:
2417 case tok_print:
2418 case tok_xdigit:
2419 case tok_blank:
2420 /* Ignore the rest of the line if we don't need the input of
2421 this line. */
2422 if (ignore_content)
2424 lr_ignore_rest (ldfile, 0);
2425 break;
2428 class_bit = BITw (now->tok);
2429 class256_bit = BIT (now->tok);
2430 handle_digits = 0;
2431 read_charclass:
2432 ctype->class_done |= class_bit;
2433 last_token = tok_none;
2434 ellipsis_token = tok_none;
2435 step = 1;
2436 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2437 while (now->tok != tok_eol && now->tok != tok_eof)
2439 uint32_t wch;
2440 struct charseq *seq;
2442 if (ellipsis_token == tok_none)
2444 if (get_character (now, charmap, repertoire, &seq, &wch))
2445 goto err_label;
2447 if (!ignore_content && seq != NULL && seq->nbytes == 1)
2448 /* Yep, we can store information about this byte
2449 sequence. */
2450 ctype->class256_collection[seq->bytes[0]] |= class256_bit;
2452 if (!ignore_content && wch != ILLEGAL_CHAR_VALUE
2453 && class_bit != 0)
2454 /* We have the UCS4 position. */
2455 *find_idx (ctype, &ctype->class_collection,
2456 &ctype->class_collection_max,
2457 &ctype->class_collection_act, wch) |= class_bit;
2459 last_token = now->tok;
2460 /* Terminate the string. */
2461 if (last_token == tok_bsymbol)
2463 now->val.str.startmb[now->val.str.lenmb] = '\0';
2464 last_str = now->val.str.startmb;
2466 else
2467 last_str = NULL;
2468 last_seq = seq;
2469 last_wch = wch;
2470 memcpy (last_charcode, now->val.charcode.bytes, 16);
2471 last_charcode_len = now->val.charcode.nbytes;
2473 if (!ignore_content && handle_digits == 1)
2475 /* We must store the digit values. */
2476 if (ctype->mbdigits_act == ctype->mbdigits_max)
2478 ctype->mbdigits_max += 10;
2479 ctype->mbdigits = xrealloc (ctype->mbdigits,
2480 (ctype->mbdigits_max
2481 * sizeof (char *)));
2482 ctype->wcdigits_max += 10;
2483 ctype->wcdigits = xrealloc (ctype->wcdigits,
2484 (ctype->wcdigits_max
2485 * sizeof (uint32_t)));
2488 ctype->mbdigits[ctype->mbdigits_act++] = seq;
2489 ctype->wcdigits[ctype->wcdigits_act++] = wch;
2491 else if (!ignore_content && handle_digits == 2)
2493 /* We must store the digit values. */
2494 if (ctype->outdigits_act >= 10)
2496 lr_error (ldfile, _("\
2497 %s: field `%s' does not contain exactly ten entries"),
2498 "LC_CTYPE", "outdigit");
2499 lr_ignore_rest (ldfile, 0);
2500 break;
2503 ctype->mboutdigits[ctype->outdigits_act] = seq;
2504 ctype->wcoutdigits[ctype->outdigits_act] = wch;
2505 ++ctype->outdigits_act;
2508 else
2510 /* Now it gets complicated. We have to resolve the
2511 ellipsis problem. First we must distinguish between
2512 the different kind of ellipsis and this must match the
2513 tokens we have seen. */
2514 assert (last_token != tok_none);
2516 if (last_token != now->tok)
2518 lr_error (ldfile, _("\
2519 ellipsis range must be marked by two operands of same type"));
2520 lr_ignore_rest (ldfile, 0);
2521 break;
2524 if (last_token == tok_bsymbol)
2526 if (ellipsis_token == tok_ellipsis3)
2527 lr_error (ldfile, _("with symbolic name range values \
2528 the absolute ellipsis `...' must not be used"));
2530 charclass_symbolic_ellipsis (ldfile, ctype, charmap,
2531 repertoire, now, last_str,
2532 class256_bit, class_bit,
2533 (ellipsis_token
2534 == tok_ellipsis4
2535 ? 10 : 16),
2536 ignore_content,
2537 handle_digits, step);
2539 else if (last_token == tok_ucs4)
2541 if (ellipsis_token != tok_ellipsis2)
2542 lr_error (ldfile, _("\
2543 with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
2545 charclass_ucs4_ellipsis (ldfile, ctype, charmap,
2546 repertoire, now, last_wch,
2547 class256_bit, class_bit,
2548 ignore_content, handle_digits,
2549 step);
2551 else
2553 assert (last_token == tok_charcode);
2555 if (ellipsis_token != tok_ellipsis3)
2556 lr_error (ldfile, _("\
2557 with character code range values one must use the absolute ellipsis `...'"));
2559 charclass_charcode_ellipsis (ldfile, ctype, charmap,
2560 repertoire, now,
2561 last_charcode,
2562 last_charcode_len,
2563 class256_bit, class_bit,
2564 ignore_content,
2565 handle_digits);
2568 /* Now we have used the last value. */
2569 last_token = tok_none;
2572 /* Next we expect a semicolon or the end of the line. */
2573 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2574 if (now->tok == tok_eol || now->tok == tok_eof)
2575 break;
2577 if (last_token != tok_none
2578 && now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4_2)
2580 if (now->tok == tok_ellipsis2_2)
2582 now->tok = tok_ellipsis2;
2583 step = 2;
2585 else if (now->tok == tok_ellipsis4_2)
2587 now->tok = tok_ellipsis4;
2588 step = 2;
2591 ellipsis_token = now->tok;
2593 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2594 continue;
2597 if (now->tok != tok_semicolon)
2598 goto err_label;
2600 /* And get the next character. */
2601 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2603 ellipsis_token = tok_none;
2604 step = 1;
2606 break;
2608 case tok_digit:
2609 /* Ignore the rest of the line if we don't need the input of
2610 this line. */
2611 if (ignore_content)
2613 lr_ignore_rest (ldfile, 0);
2614 break;
2617 handle_tok_digit:
2618 class_bit = _ISwdigit;
2619 class256_bit = _ISdigit;
2620 handle_digits = 1;
2621 goto read_charclass;
2623 case tok_outdigit:
2624 /* Ignore the rest of the line if we don't need the input of
2625 this line. */
2626 if (ignore_content)
2628 lr_ignore_rest (ldfile, 0);
2629 break;
2632 if (ctype->outdigits_act != 0)
2633 lr_error (ldfile, _("\
2634 %s: field `%s' declared more than once"),
2635 "LC_CTYPE", "outdigit");
2636 class_bit = 0;
2637 class256_bit = 0;
2638 handle_digits = 2;
2639 goto read_charclass;
2641 case tok_toupper:
2642 /* Ignore the rest of the line if we don't need the input of
2643 this line. */
2644 if (ignore_content)
2646 lr_ignore_rest (ldfile, 0);
2647 break;
2650 mapidx = 0;
2651 goto read_mapping;
2653 case tok_tolower:
2654 /* Ignore the rest of the line if we don't need the input of
2655 this line. */
2656 if (ignore_content)
2658 lr_ignore_rest (ldfile, 0);
2659 break;
2662 mapidx = 1;
2663 goto read_mapping;
2665 case tok_map:
2666 /* Ignore the rest of the line if we don't need the input of
2667 this line. */
2668 if (ignore_content)
2670 lr_ignore_rest (ldfile, 0);
2671 break;
2674 /* We simply forget the `map' keyword and use the following
2675 operand to determine the mapping. */
2676 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2677 if (now->tok == tok_ident || now->tok == tok_string)
2679 size_t cnt;
2681 for (cnt = 2; cnt < ctype->map_collection_nr; ++cnt)
2682 if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2683 break;
2685 if (cnt < ctype->map_collection_nr)
2686 free (now->val.str.startmb);
2687 else
2688 /* OK, it's a new map. */
2689 ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2691 mapidx = cnt;
2693 else if (now->tok < tok_toupper || now->tok > tok_tolower)
2694 goto err_label;
2695 else
2696 mapidx = now->tok - tok_toupper;
2698 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2699 /* This better should be a semicolon. */
2700 if (now->tok != tok_semicolon)
2701 goto err_label;
2703 read_mapping:
2704 /* Test whether this mapping was already defined. */
2705 if (ctype->tomap_done[mapidx])
2707 lr_error (ldfile, _("duplicated definition for mapping `%s'"),
2708 ctype->mapnames[mapidx]);
2709 lr_ignore_rest (ldfile, 0);
2710 break;
2712 ctype->tomap_done[mapidx] = 1;
2714 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2715 while (now->tok != tok_eol && now->tok != tok_eof)
2717 struct charseq *from_seq;
2718 uint32_t from_wch;
2719 struct charseq *to_seq;
2720 uint32_t to_wch;
2722 /* Every pair starts with an opening brace. */
2723 if (now->tok != tok_open_brace)
2724 goto err_label;
2726 /* Next comes the from-value. */
2727 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2728 if (get_character (now, charmap, repertoire, &from_seq,
2729 &from_wch) != 0)
2730 goto err_label;
2732 /* The next is a comma. */
2733 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2734 if (now->tok != tok_comma)
2735 goto err_label;
2737 /* And the other value. */
2738 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2739 if (get_character (now, charmap, repertoire, &to_seq,
2740 &to_wch) != 0)
2741 goto err_label;
2743 /* And the last thing is the closing brace. */
2744 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2745 if (now->tok != tok_close_brace)
2746 goto err_label;
2748 if (!ignore_content)
2750 /* Check whether the mapping converts from an ASCII value
2751 to a non-ASCII value. */
2752 if (from_seq != NULL && from_seq->nbytes == 1
2753 && isascii (from_seq->bytes[0])
2754 && to_seq != NULL && (to_seq->nbytes != 1
2755 || !isascii (to_seq->bytes[0])))
2756 ctype->to_nonascii = 1;
2758 if (mapidx < 2 && from_seq != NULL && to_seq != NULL
2759 && from_seq->nbytes == 1 && to_seq->nbytes == 1)
2760 /* We can use this value. */
2761 ctype->map256_collection[mapidx][from_seq->bytes[0]]
2762 = to_seq->bytes[0];
2764 if (from_wch != ILLEGAL_CHAR_VALUE
2765 && to_wch != ILLEGAL_CHAR_VALUE)
2766 /* Both correct values. */
2767 *find_idx (ctype, &ctype->map_collection[mapidx],
2768 &ctype->map_collection_max[mapidx],
2769 &ctype->map_collection_act[mapidx],
2770 from_wch) = to_wch;
2773 /* Now comes a semicolon or the end of the line/file. */
2774 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2775 if (now->tok == tok_semicolon)
2776 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2778 break;
2780 case tok_translit_start:
2781 /* Ignore the entire translit section with its peculiar syntax
2782 if we don't need the input. */
2783 if (ignore_content)
2787 lr_ignore_rest (ldfile, 0);
2788 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2790 while (now->tok != tok_translit_end && now->tok != tok_eof);
2792 if (now->tok == tok_eof)
2793 lr_error (ldfile, _(\
2794 "%s: `translit_start' section does not end with `translit_end'"),
2795 "LC_CTYPE");
2797 break;
2800 /* The rest of the line better should be empty. */
2801 lr_ignore_rest (ldfile, 1);
2803 /* We count here the number of allocated entries in the `translit'
2804 array. */
2805 cnt = 0;
2807 ldfile->translate_strings = 1;
2808 ldfile->return_widestr = 1;
2810 /* We proceed until we see the `translit_end' token. */
2811 while (now = lr_token (ldfile, charmap, NULL, repertoire, verbose),
2812 now->tok != tok_translit_end && now->tok != tok_eof)
2814 if (now->tok == tok_eol)
2815 /* Ignore empty lines. */
2816 continue;
2818 if (now->tok == tok_include)
2820 /* We have to include locale. */
2821 const char *locale_name;
2822 const char *repertoire_name;
2823 struct translit_include_t *include_stmt, **include_ptr;
2825 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2826 /* This should be a string or an identifier. In any
2827 case something to name a locale. */
2828 if (now->tok != tok_string && now->tok != tok_ident)
2830 translit_syntax:
2831 lr_error (ldfile, _("%s: syntax error"), "LC_CTYPE");
2832 lr_ignore_rest (ldfile, 0);
2833 continue;
2835 locale_name = now->val.str.startmb;
2837 /* Next should be a semicolon. */
2838 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2839 if (now->tok != tok_semicolon)
2840 goto translit_syntax;
2842 /* Now the repertoire name. */
2843 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2844 if ((now->tok != tok_string && now->tok != tok_ident)
2845 || now->val.str.startmb == NULL)
2846 goto translit_syntax;
2847 repertoire_name = now->val.str.startmb;
2848 if (repertoire_name[0] == '\0')
2849 /* Ignore the empty string. */
2850 repertoire_name = NULL;
2852 /* Save the include statement for later processing. */
2853 include_stmt = (struct translit_include_t *)
2854 xmalloc (sizeof (struct translit_include_t));
2855 include_stmt->copy_locale = locale_name;
2856 include_stmt->copy_repertoire = repertoire_name;
2857 include_stmt->next = NULL;
2859 include_ptr = &ctype->translit_include;
2860 while (*include_ptr != NULL)
2861 include_ptr = &(*include_ptr)->next;
2862 *include_ptr = include_stmt;
2864 /* The rest of the line must be empty. */
2865 lr_ignore_rest (ldfile, 1);
2867 /* Make sure the locale is read. */
2868 add_to_readlist (LC_CTYPE, locale_name, repertoire_name,
2869 1, NULL);
2870 continue;
2872 else if (now->tok == tok_default_missing)
2874 uint32_t *wstr;
2876 while (1)
2878 /* We expect a single character or string as the
2879 argument. */
2880 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2881 wstr = read_widestring (ldfile, now, charmap,
2882 repertoire);
2884 if (wstr != NULL)
2886 if (ctype->default_missing != NULL)
2888 lr_error (ldfile, _("\
2889 %s: duplicate `default_missing' definition"), "LC_CTYPE");
2890 WITH_CUR_LOCALE (error_at_line (0, 0,
2891 ctype->default_missing_file,
2892 ctype->default_missing_lineno,
2893 _("\
2894 previous definition was here")));
2896 else
2898 ctype->default_missing = wstr;
2899 ctype->default_missing_file = ldfile->fname;
2900 ctype->default_missing_lineno = ldfile->lineno;
2902 /* We can have more entries, ignore them. */
2903 lr_ignore_rest (ldfile, 0);
2904 break;
2906 else if (wstr == (uint32_t *) -1l)
2907 /* This was an syntax error. */
2908 break;
2910 /* Maybe there is another replacement we can use. */
2911 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2912 if (now->tok == tok_eol || now->tok == tok_eof)
2914 /* Nothing found. We tell the user. */
2915 lr_error (ldfile, _("\
2916 %s: no representable `default_missing' definition found"), "LC_CTYPE");
2917 break;
2919 if (now->tok != tok_semicolon)
2920 goto translit_syntax;
2923 continue;
2925 else if (now->tok == tok_translit_ignore)
2927 read_translit_ignore_entry (ldfile, ctype, charmap,
2928 repertoire);
2929 continue;
2932 read_translit_entry (ldfile, ctype, now, charmap, repertoire);
2934 ldfile->return_widestr = 0;
2936 if (now->tok == tok_eof)
2937 lr_error (ldfile, _(\
2938 "%s: `translit_start' section does not end with `translit_end'"),
2939 "LC_CTYPE");
2941 break;
2943 case tok_ident:
2944 /* Ignore the rest of the line if we don't need the input of
2945 this line. */
2946 if (ignore_content)
2948 lr_ignore_rest (ldfile, 0);
2949 break;
2952 /* This could mean one of several things. First test whether
2953 it's a character class name. */
2954 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2955 if (strcmp (now->val.str.startmb, ctype->classnames[cnt]) == 0)
2956 break;
2957 if (cnt < ctype->nr_charclass)
2959 class_bit = _ISwbit (cnt);
2960 class256_bit = cnt <= 11 ? _ISbit (cnt) : 0;
2961 free (now->val.str.startmb);
2962 goto read_charclass;
2964 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
2965 if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2966 break;
2967 if (cnt < ctype->map_collection_nr)
2969 mapidx = cnt;
2970 free (now->val.str.startmb);
2971 goto read_mapping;
2973 #ifdef PREDEFINED_CLASSES
2974 if (strcmp (now->val.str.startmb, "special1") == 0)
2976 class_bit = _ISwspecial1;
2977 free (now->val.str.startmb);
2978 goto read_charclass;
2980 if (strcmp (now->val.str.startmb, "special2") == 0)
2982 class_bit = _ISwspecial2;
2983 free (now->val.str.startmb);
2984 goto read_charclass;
2986 if (strcmp (now->val.str.startmb, "special3") == 0)
2988 class_bit = _ISwspecial3;
2989 free (now->val.str.startmb);
2990 goto read_charclass;
2992 if (strcmp (now->val.str.startmb, "tosymmetric") == 0)
2994 mapidx = 2;
2995 goto read_mapping;
2997 #endif
2998 break;
3000 case tok_end:
3001 /* Next we assume `LC_CTYPE'. */
3002 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
3003 if (now->tok == tok_eof)
3004 break;
3005 if (now->tok == tok_eol)
3006 lr_error (ldfile, _("%s: incomplete `END' line"),
3007 "LC_CTYPE");
3008 else if (now->tok != tok_lc_ctype)
3009 lr_error (ldfile, _("\
3010 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
3011 lr_ignore_rest (ldfile, now->tok == tok_lc_ctype);
3012 return;
3014 default:
3015 err_label:
3016 if (now->tok != tok_eof)
3017 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
3020 /* Prepare for the next round. */
3021 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
3022 nowtok = now->tok;
3025 /* When we come here we reached the end of the file. */
3026 lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
3030 static void
3031 set_class_defaults (struct locale_ctype_t *ctype,
3032 const struct charmap_t *charmap,
3033 struct repertoire_t *repertoire)
3035 size_t cnt;
3037 /* These function defines the default values for the classes and conversions
3038 according to POSIX.2 2.5.2.1.
3039 It may seem that the order of these if-blocks is arbitrary but it is NOT.
3040 Don't move them unless you know what you do! */
3042 auto void set_default (int bitpos, int from, int to);
3044 void set_default (int bitpos, int from, int to)
3046 char tmp[2];
3047 int ch;
3048 int bit = _ISbit (bitpos);
3049 int bitw = _ISwbit (bitpos);
3050 /* Define string. */
3051 strcpy (tmp, "?");
3053 for (ch = from; ch <= to; ++ch)
3055 struct charseq *seq;
3056 tmp[0] = ch;
3058 seq = charmap_find_value (charmap, tmp, 1);
3059 if (seq == NULL)
3061 char buf[10];
3062 sprintf (buf, "U%08X", ch);
3063 seq = charmap_find_value (charmap, buf, 9);
3065 if (seq == NULL)
3067 if (!be_quiet)
3068 WITH_CUR_LOCALE (error (0, 0, _("\
3069 %s: character `%s' not defined while needed as default value"),
3070 "LC_CTYPE", tmp));
3072 else if (seq->nbytes != 1)
3073 WITH_CUR_LOCALE (error (0, 0, _("\
3074 %s: character `%s' in charmap not representable with one byte"),
3075 "LC_CTYPE", tmp));
3076 else
3077 ctype->class256_collection[seq->bytes[0]] |= bit;
3079 /* No need to search here, the ASCII value is also the Unicode
3080 value. */
3081 ELEM (ctype, class_collection, , ch) |= bitw;
3085 /* Set default values if keyword was not present. */
3086 if ((ctype->class_done & BITw (tok_upper)) == 0)
3087 /* "If this keyword [lower] is not specified, the lowercase letters
3088 `A' through `Z', ..., shall automatically belong to this class,
3089 with implementation defined character values." [P1003.2, 2.5.2.1] */
3090 set_default (BITPOS (tok_upper), 'A', 'Z');
3092 if ((ctype->class_done & BITw (tok_lower)) == 0)
3093 /* "If this keyword [lower] is not specified, the lowercase letters
3094 `a' through `z', ..., shall automatically belong to this class,
3095 with implementation defined character values." [P1003.2, 2.5.2.1] */
3096 set_default (BITPOS (tok_lower), 'a', 'z');
3098 if ((ctype->class_done & BITw (tok_alpha)) == 0)
3100 /* Table 2-6 in P1003.2 says that characters in class `upper' or
3101 class `lower' *must* be in class `alpha'. */
3102 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
3103 unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower);
3105 for (cnt = 0; cnt < 256; ++cnt)
3106 if ((ctype->class256_collection[cnt] & mask) != 0)
3107 ctype->class256_collection[cnt] |= BIT (tok_alpha);
3109 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3110 if ((ctype->class_collection[cnt] & maskw) != 0)
3111 ctype->class_collection[cnt] |= BITw (tok_alpha);
3114 if ((ctype->class_done & BITw (tok_digit)) == 0)
3115 /* "If this keyword [digit] is not specified, the digits `0' through
3116 `9', ..., shall automatically belong to this class, with
3117 implementation-defined character values." [P1003.2, 2.5.2.1] */
3118 set_default (BITPOS (tok_digit), '0', '9');
3120 /* "Only characters specified for the `alpha' and `digit' keyword
3121 shall be specified. Characters specified for the keyword `alpha'
3122 and `digit' are automatically included in this class. */
3124 unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
3125 unsigned long int maskw = BITw (tok_alpha) | BITw (tok_digit);
3127 for (cnt = 0; cnt < 256; ++cnt)
3128 if ((ctype->class256_collection[cnt] & mask) != 0)
3129 ctype->class256_collection[cnt] |= BIT (tok_alnum);
3131 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3132 if ((ctype->class_collection[cnt] & maskw) != 0)
3133 ctype->class_collection[cnt] |= BITw (tok_alnum);
3136 if ((ctype->class_done & BITw (tok_space)) == 0)
3137 /* "If this keyword [space] is not specified, the characters <space>,
3138 <form-feed>, <newline>, <carriage-return>, <tab>, and
3139 <vertical-tab>, ..., shall automatically belong to this class,
3140 with implementation-defined character values." [P1003.2, 2.5.2.1] */
3142 struct charseq *seq;
3144 seq = charmap_find_value (charmap, "space", 5);
3145 if (seq == NULL)
3146 seq = charmap_find_value (charmap, "SP", 2);
3147 if (seq == NULL)
3148 seq = charmap_find_value (charmap, "U00000020", 9);
3149 if (seq == NULL)
3151 if (!be_quiet)
3152 WITH_CUR_LOCALE (error (0, 0, _("\
3153 %s: character `%s' not defined while needed as default value"),
3154 "LC_CTYPE", "<space>"));
3156 else if (seq->nbytes != 1)
3157 WITH_CUR_LOCALE (error (0, 0, _("\
3158 %s: character `%s' in charmap not representable with one byte"),
3159 "LC_CTYPE", "<space>"));
3160 else
3161 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3163 /* No need to search. */
3164 ELEM (ctype, class_collection, , L' ') |= BITw (tok_space);
3166 seq = charmap_find_value (charmap, "form-feed", 9);
3167 if (seq == NULL)
3168 seq = charmap_find_value (charmap, "U0000000C", 9);
3169 if (seq == NULL)
3171 if (!be_quiet)
3172 WITH_CUR_LOCALE (error (0, 0, _("\
3173 %s: character `%s' not defined while needed as default value"),
3174 "LC_CTYPE", "<form-feed>"));
3176 else if (seq->nbytes != 1)
3177 WITH_CUR_LOCALE (error (0, 0, _("\
3178 %s: character `%s' in charmap not representable with one byte"),
3179 "LC_CTYPE", "<form-feed>"));
3180 else
3181 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3183 /* No need to search. */
3184 ELEM (ctype, class_collection, , L'\f') |= BITw (tok_space);
3187 seq = charmap_find_value (charmap, "newline", 7);
3188 if (seq == NULL)
3189 seq = charmap_find_value (charmap, "U0000000A", 9);
3190 if (seq == NULL)
3192 if (!be_quiet)
3193 WITH_CUR_LOCALE (error (0, 0, _("\
3194 %s: character `%s' not defined while needed as default value"),
3195 "LC_CTYPE", "<newline>"));
3197 else if (seq->nbytes != 1)
3198 WITH_CUR_LOCALE (error (0, 0, _("\
3199 %s: character `%s' in charmap not representable with one byte"),
3200 "LC_CTYPE", "<newline>"));
3201 else
3202 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3204 /* No need to search. */
3205 ELEM (ctype, class_collection, , L'\n') |= BITw (tok_space);
3208 seq = charmap_find_value (charmap, "carriage-return", 15);
3209 if (seq == NULL)
3210 seq = charmap_find_value (charmap, "U0000000D", 9);
3211 if (seq == NULL)
3213 if (!be_quiet)
3214 WITH_CUR_LOCALE (error (0, 0, _("\
3215 %s: character `%s' not defined while needed as default value"),
3216 "LC_CTYPE", "<carriage-return>"));
3218 else if (seq->nbytes != 1)
3219 WITH_CUR_LOCALE (error (0, 0, _("\
3220 %s: character `%s' in charmap not representable with one byte"),
3221 "LC_CTYPE", "<carriage-return>"));
3222 else
3223 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3225 /* No need to search. */
3226 ELEM (ctype, class_collection, , L'\r') |= BITw (tok_space);
3229 seq = charmap_find_value (charmap, "tab", 3);
3230 if (seq == NULL)
3231 seq = charmap_find_value (charmap, "U00000009", 9);
3232 if (seq == NULL)
3234 if (!be_quiet)
3235 WITH_CUR_LOCALE (error (0, 0, _("\
3236 %s: character `%s' not defined while needed as default value"),
3237 "LC_CTYPE", "<tab>"));
3239 else if (seq->nbytes != 1)
3240 WITH_CUR_LOCALE (error (0, 0, _("\
3241 %s: character `%s' in charmap not representable with one byte"),
3242 "LC_CTYPE", "<tab>"));
3243 else
3244 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3246 /* No need to search. */
3247 ELEM (ctype, class_collection, , L'\t') |= BITw (tok_space);
3250 seq = charmap_find_value (charmap, "vertical-tab", 12);
3251 if (seq == NULL)
3252 seq = charmap_find_value (charmap, "U0000000B", 9);
3253 if (seq == NULL)
3255 if (!be_quiet)
3256 WITH_CUR_LOCALE (error (0, 0, _("\
3257 %s: character `%s' not defined while needed as default value"),
3258 "LC_CTYPE", "<vertical-tab>"));
3260 else if (seq->nbytes != 1)
3261 WITH_CUR_LOCALE (error (0, 0, _("\
3262 %s: character `%s' in charmap not representable with one byte"),
3263 "LC_CTYPE", "<vertical-tab>"));
3264 else
3265 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3267 /* No need to search. */
3268 ELEM (ctype, class_collection, , L'\v') |= BITw (tok_space);
3271 if ((ctype->class_done & BITw (tok_xdigit)) == 0)
3272 /* "If this keyword is not specified, the digits `0' to `9', the
3273 uppercase letters `A' through `F', and the lowercase letters `a'
3274 through `f', ..., shell automatically belong to this class, with
3275 implementation defined character values." [P1003.2, 2.5.2.1] */
3277 set_default (BITPOS (tok_xdigit), '0', '9');
3278 set_default (BITPOS (tok_xdigit), 'A', 'F');
3279 set_default (BITPOS (tok_xdigit), 'a', 'f');
3282 if ((ctype->class_done & BITw (tok_blank)) == 0)
3283 /* "If this keyword [blank] is unspecified, the characters <space> and
3284 <tab> shall belong to this character class." [P1003.2, 2.5.2.1] */
3286 struct charseq *seq;
3288 seq = charmap_find_value (charmap, "space", 5);
3289 if (seq == NULL)
3290 seq = charmap_find_value (charmap, "SP", 2);
3291 if (seq == NULL)
3292 seq = charmap_find_value (charmap, "U00000020", 9);
3293 if (seq == NULL)
3295 if (!be_quiet)
3296 WITH_CUR_LOCALE (error (0, 0, _("\
3297 %s: character `%s' not defined while needed as default value"),
3298 "LC_CTYPE", "<space>"));
3300 else if (seq->nbytes != 1)
3301 WITH_CUR_LOCALE (error (0, 0, _("\
3302 %s: character `%s' in charmap not representable with one byte"),
3303 "LC_CTYPE", "<space>"));
3304 else
3305 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
3307 /* No need to search. */
3308 ELEM (ctype, class_collection, , L' ') |= BITw (tok_blank);
3311 seq = charmap_find_value (charmap, "tab", 3);
3312 if (seq == NULL)
3313 seq = charmap_find_value (charmap, "U00000009", 9);
3314 if (seq == NULL)
3316 if (!be_quiet)
3317 WITH_CUR_LOCALE (error (0, 0, _("\
3318 %s: character `%s' not defined while needed as default value"),
3319 "LC_CTYPE", "<tab>"));
3321 else if (seq->nbytes != 1)
3322 WITH_CUR_LOCALE (error (0, 0, _("\
3323 %s: character `%s' in charmap not representable with one byte"),
3324 "LC_CTYPE", "<tab>"));
3325 else
3326 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
3328 /* No need to search. */
3329 ELEM (ctype, class_collection, , L'\t') |= BITw (tok_blank);
3332 if ((ctype->class_done & BITw (tok_graph)) == 0)
3333 /* "If this keyword [graph] is not specified, characters specified for
3334 the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
3335 shall belong to this character class." [P1003.2, 2.5.2.1] */
3337 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
3338 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
3339 unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
3340 BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
3341 BITw (tok_punct);
3342 size_t cnt;
3344 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3345 if ((ctype->class_collection[cnt] & maskw) != 0)
3346 ctype->class_collection[cnt] |= BITw (tok_graph);
3348 for (cnt = 0; cnt < 256; ++cnt)
3349 if ((ctype->class256_collection[cnt] & mask) != 0)
3350 ctype->class256_collection[cnt] |= BIT (tok_graph);
3353 if ((ctype->class_done & BITw (tok_print)) == 0)
3354 /* "If this keyword [print] is not provided, characters specified for
3355 the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
3356 and the <space> character shall belong to this character class."
3357 [P1003.2, 2.5.2.1] */
3359 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
3360 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
3361 unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
3362 BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
3363 BITw (tok_punct);
3364 size_t cnt;
3365 struct charseq *seq;
3367 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3368 if ((ctype->class_collection[cnt] & maskw) != 0)
3369 ctype->class_collection[cnt] |= BITw (tok_print);
3371 for (cnt = 0; cnt < 256; ++cnt)
3372 if ((ctype->class256_collection[cnt] & mask) != 0)
3373 ctype->class256_collection[cnt] |= BIT (tok_print);
3376 seq = charmap_find_value (charmap, "space", 5);
3377 if (seq == NULL)
3378 seq = charmap_find_value (charmap, "SP", 2);
3379 if (seq == NULL)
3380 seq = charmap_find_value (charmap, "U00000020", 9);
3381 if (seq == NULL)
3383 if (!be_quiet)
3384 WITH_CUR_LOCALE (error (0, 0, _("\
3385 %s: character `%s' not defined while needed as default value"),
3386 "LC_CTYPE", "<space>"));
3388 else if (seq->nbytes != 1)
3389 WITH_CUR_LOCALE (error (0, 0, _("\
3390 %s: character `%s' in charmap not representable with one byte"),
3391 "LC_CTYPE", "<space>"));
3392 else
3393 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_print);
3395 /* No need to search. */
3396 ELEM (ctype, class_collection, , L' ') |= BITw (tok_print);
3399 if (ctype->tomap_done[0] == 0)
3400 /* "If this keyword [toupper] is not specified, the lowercase letters
3401 `a' through `z', and their corresponding uppercase letters `A' to
3402 `Z', ..., shall automatically be included, with implementation-
3403 defined character values." [P1003.2, 2.5.2.1] */
3405 char tmp[4];
3406 int ch;
3408 strcpy (tmp, "<?>");
3410 for (ch = 'a'; ch <= 'z'; ++ch)
3412 struct charseq *seq_from, *seq_to;
3414 tmp[1] = (char) ch;
3416 seq_from = charmap_find_value (charmap, &tmp[1], 1);
3417 if (seq_from == NULL)
3419 char buf[10];
3420 sprintf (buf, "U%08X", ch);
3421 seq_from = charmap_find_value (charmap, buf, 9);
3423 if (seq_from == NULL)
3425 if (!be_quiet)
3426 WITH_CUR_LOCALE (error (0, 0, _("\
3427 %s: character `%s' not defined while needed as default value"),
3428 "LC_CTYPE", tmp));
3430 else if (seq_from->nbytes != 1)
3432 if (!be_quiet)
3433 WITH_CUR_LOCALE (error (0, 0, _("\
3434 %s: character `%s' needed as default value not representable with one byte"),
3435 "LC_CTYPE", tmp));
3437 else
3439 /* This conversion is implementation defined. */
3440 tmp[1] = (char) (ch + ('A' - 'a'));
3441 seq_to = charmap_find_value (charmap, &tmp[1], 1);
3442 if (seq_to == NULL)
3444 char buf[10];
3445 sprintf (buf, "U%08X", ch + ('A' - 'a'));
3446 seq_to = charmap_find_value (charmap, buf, 9);
3448 if (seq_to == NULL)
3450 if (!be_quiet)
3451 WITH_CUR_LOCALE (error (0, 0, _("\
3452 %s: character `%s' not defined while needed as default value"),
3453 "LC_CTYPE", tmp));
3455 else if (seq_to->nbytes != 1)
3457 if (!be_quiet)
3458 WITH_CUR_LOCALE (error (0, 0, _("\
3459 %s: character `%s' needed as default value not representable with one byte"),
3460 "LC_CTYPE", tmp));
3462 else
3463 /* The index [0] is determined by the order of the
3464 `ctype_map_newP' calls in `ctype_startup'. */
3465 ctype->map256_collection[0][seq_from->bytes[0]]
3466 = seq_to->bytes[0];
3469 /* No need to search. */
3470 ELEM (ctype, map_collection, [0], ch) = ch + ('A' - 'a');
3474 if (ctype->tomap_done[1] == 0)
3475 /* "If this keyword [tolower] is not specified, the mapping shall be
3476 the reverse mapping of the one specified to `toupper'." [P1003.2] */
3478 for (cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
3479 if (ctype->map_collection[0][cnt] != 0)
3480 ELEM (ctype, map_collection, [1],
3481 ctype->map_collection[0][cnt])
3482 = ctype->charnames[cnt];
3484 for (cnt = 0; cnt < 256; ++cnt)
3485 if (ctype->map256_collection[0][cnt] != 0)
3486 ctype->map256_collection[1][ctype->map256_collection[0][cnt]] = cnt;
3489 if (ctype->outdigits_act != 10)
3491 if (ctype->outdigits_act != 0)
3492 WITH_CUR_LOCALE (error (0, 0, _("\
3493 %s: field `%s' does not contain exactly ten entries"),
3494 "LC_CTYPE", "outdigit"));
3496 for (cnt = ctype->outdigits_act; cnt < 10; ++cnt)
3498 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3499 (char *) digits + cnt,
3502 if (ctype->mboutdigits[cnt] == NULL)
3503 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3504 longnames[cnt],
3505 strlen (longnames[cnt]));
3507 if (ctype->mboutdigits[cnt] == NULL)
3508 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3509 uninames[cnt], 9);
3511 if (ctype->mboutdigits[cnt] == NULL)
3513 /* Provide a replacement. */
3514 WITH_CUR_LOCALE (error (0, 0, _("\
3515 no output digits defined and none of the standard names in the charmap")));
3517 ctype->mboutdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
3518 sizeof (struct charseq)
3519 + 1);
3521 /* This is better than nothing. */
3522 ctype->mboutdigits[cnt]->bytes[0] = digits[cnt];
3523 ctype->mboutdigits[cnt]->nbytes = 1;
3526 ctype->wcoutdigits[cnt] = L'0' + cnt;
3529 ctype->outdigits_act = 10;
3534 /* Construction of sparse 3-level tables.
3535 See wchar-lookup.h for their structure and the meaning of p and q. */
3537 struct wctype_table
3539 /* Parameters. */
3540 unsigned int p;
3541 unsigned int q;
3542 /* Working representation. */
3543 size_t level1_alloc;
3544 size_t level1_size;
3545 uint32_t *level1;
3546 size_t level2_alloc;
3547 size_t level2_size;
3548 uint32_t *level2;
3549 size_t level3_alloc;
3550 size_t level3_size;
3551 uint32_t *level3;
3552 /* Compressed representation. */
3553 size_t result_size;
3554 char *result;
3557 /* Initialize. Assumes t->p and t->q have already been set. */
3558 static inline void
3559 wctype_table_init (struct wctype_table *t)
3561 t->level1 = NULL;
3562 t->level1_alloc = t->level1_size = 0;
3563 t->level2 = NULL;
3564 t->level2_alloc = t->level2_size = 0;
3565 t->level3 = NULL;
3566 t->level3_alloc = t->level3_size = 0;
3569 /* Retrieve an entry. */
3570 static inline int
3571 wctype_table_get (struct wctype_table *t, uint32_t wc)
3573 uint32_t index1 = wc >> (t->q + t->p + 5);
3574 if (index1 < t->level1_size)
3576 uint32_t lookup1 = t->level1[index1];
3577 if (lookup1 != EMPTY)
3579 uint32_t index2 = ((wc >> (t->p + 5)) & ((1 << t->q) - 1))
3580 + (lookup1 << t->q);
3581 uint32_t lookup2 = t->level2[index2];
3582 if (lookup2 != EMPTY)
3584 uint32_t index3 = ((wc >> 5) & ((1 << t->p) - 1))
3585 + (lookup2 << t->p);
3586 uint32_t lookup3 = t->level3[index3];
3587 uint32_t index4 = wc & 0x1f;
3589 return (lookup3 >> index4) & 1;
3593 return 0;
3596 /* Add one entry. */
3597 static void
3598 wctype_table_add (struct wctype_table *t, uint32_t wc)
3600 uint32_t index1 = wc >> (t->q + t->p + 5);
3601 uint32_t index2 = (wc >> (t->p + 5)) & ((1 << t->q) - 1);
3602 uint32_t index3 = (wc >> 5) & ((1 << t->p) - 1);
3603 uint32_t index4 = wc & 0x1f;
3604 size_t i, i1, i2;
3606 if (index1 >= t->level1_size)
3608 if (index1 >= t->level1_alloc)
3610 size_t alloc = 2 * t->level1_alloc;
3611 if (alloc <= index1)
3612 alloc = index1 + 1;
3613 t->level1 = (uint32_t *) xrealloc ((char *) t->level1,
3614 alloc * sizeof (uint32_t));
3615 t->level1_alloc = alloc;
3617 while (index1 >= t->level1_size)
3618 t->level1[t->level1_size++] = EMPTY;
3621 if (t->level1[index1] == EMPTY)
3623 if (t->level2_size == t->level2_alloc)
3625 size_t alloc = 2 * t->level2_alloc + 1;
3626 t->level2 = (uint32_t *) xrealloc ((char *) t->level2,
3627 (alloc << t->q) * sizeof (uint32_t));
3628 t->level2_alloc = alloc;
3630 i1 = t->level2_size << t->q;
3631 i2 = (t->level2_size + 1) << t->q;
3632 for (i = i1; i < i2; i++)
3633 t->level2[i] = EMPTY;
3634 t->level1[index1] = t->level2_size++;
3637 index2 += t->level1[index1] << t->q;
3639 if (t->level2[index2] == EMPTY)
3641 if (t->level3_size == t->level3_alloc)
3643 size_t alloc = 2 * t->level3_alloc + 1;
3644 t->level3 = (uint32_t *) xrealloc ((char *) t->level3,
3645 (alloc << t->p) * sizeof (uint32_t));
3646 t->level3_alloc = alloc;
3648 i1 = t->level3_size << t->p;
3649 i2 = (t->level3_size + 1) << t->p;
3650 for (i = i1; i < i2; i++)
3651 t->level3[i] = 0;
3652 t->level2[index2] = t->level3_size++;
3655 index3 += t->level2[index2] << t->p;
3657 t->level3[index3] |= (uint32_t)1 << index4;
3660 /* Finalize and shrink. */
3661 static void
3662 wctype_table_finalize (struct wctype_table *t)
3664 size_t i, j, k;
3665 uint32_t reorder3[t->level3_size];
3666 uint32_t reorder2[t->level2_size];
3667 uint32_t level1_offset, level2_offset, level3_offset;
3669 /* Uniquify level3 blocks. */
3670 k = 0;
3671 for (j = 0; j < t->level3_size; j++)
3673 for (i = 0; i < k; i++)
3674 if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p],
3675 (1 << t->p) * sizeof (uint32_t)) == 0)
3676 break;
3677 /* Relocate block j to block i. */
3678 reorder3[j] = i;
3679 if (i == k)
3681 if (i != j)
3682 memcpy (&t->level3[i << t->p], &t->level3[j << t->p],
3683 (1 << t->p) * sizeof (uint32_t));
3684 k++;
3687 t->level3_size = k;
3689 for (i = 0; i < (t->level2_size << t->q); i++)
3690 if (t->level2[i] != EMPTY)
3691 t->level2[i] = reorder3[t->level2[i]];
3693 /* Uniquify level2 blocks. */
3694 k = 0;
3695 for (j = 0; j < t->level2_size; j++)
3697 for (i = 0; i < k; i++)
3698 if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q],
3699 (1 << t->q) * sizeof (uint32_t)) == 0)
3700 break;
3701 /* Relocate block j to block i. */
3702 reorder2[j] = i;
3703 if (i == k)
3705 if (i != j)
3706 memcpy (&t->level2[i << t->q], &t->level2[j << t->q],
3707 (1 << t->q) * sizeof (uint32_t));
3708 k++;
3711 t->level2_size = k;
3713 for (i = 0; i < t->level1_size; i++)
3714 if (t->level1[i] != EMPTY)
3715 t->level1[i] = reorder2[t->level1[i]];
3717 /* Create and fill the resulting compressed representation. */
3718 t->result_size =
3719 5 * sizeof (uint32_t)
3720 + t->level1_size * sizeof (uint32_t)
3721 + (t->level2_size << t->q) * sizeof (uint32_t)
3722 + (t->level3_size << t->p) * sizeof (uint32_t);
3723 t->result = (char *) xmalloc (t->result_size);
3725 level1_offset =
3726 5 * sizeof (uint32_t);
3727 level2_offset =
3728 5 * sizeof (uint32_t)
3729 + t->level1_size * sizeof (uint32_t);
3730 level3_offset =
3731 5 * sizeof (uint32_t)
3732 + t->level1_size * sizeof (uint32_t)
3733 + (t->level2_size << t->q) * sizeof (uint32_t);
3735 ((uint32_t *) t->result)[0] = t->q + t->p + 5;
3736 ((uint32_t *) t->result)[1] = t->level1_size;
3737 ((uint32_t *) t->result)[2] = t->p + 5;
3738 ((uint32_t *) t->result)[3] = (1 << t->q) - 1;
3739 ((uint32_t *) t->result)[4] = (1 << t->p) - 1;
3741 for (i = 0; i < t->level1_size; i++)
3742 ((uint32_t *) (t->result + level1_offset))[i] =
3743 (t->level1[i] == EMPTY
3745 : (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset);
3747 for (i = 0; i < (t->level2_size << t->q); i++)
3748 ((uint32_t *) (t->result + level2_offset))[i] =
3749 (t->level2[i] == EMPTY
3751 : (t->level2[i] << t->p) * sizeof (uint32_t) + level3_offset);
3753 for (i = 0; i < (t->level3_size << t->p); i++)
3754 ((uint32_t *) (t->result + level3_offset))[i] = t->level3[i];
3756 if (t->level1_alloc > 0)
3757 free (t->level1);
3758 if (t->level2_alloc > 0)
3759 free (t->level2);
3760 if (t->level3_alloc > 0)
3761 free (t->level3);
3764 #define TABLE wcwidth_table
3765 #define ELEMENT uint8_t
3766 #define DEFAULT 0xff
3767 #include "3level.h"
3769 #define TABLE wctrans_table
3770 #define ELEMENT int32_t
3771 #define DEFAULT 0
3772 #define wctrans_table_add wctrans_table_add_internal
3773 #include "3level.h"
3774 #undef wctrans_table_add
3775 /* The wctrans_table must actually store the difference between the
3776 desired result and the argument. */
3777 static inline void
3778 wctrans_table_add (struct wctrans_table *t, uint32_t wc, uint32_t mapped_wc)
3780 wctrans_table_add_internal (t, wc, mapped_wc - wc);
3784 /* Flattens the included transliterations into a translit list.
3785 Inserts them in the list at `cursor', and returns the new cursor. */
3786 static struct translit_t **
3787 translit_flatten (struct locale_ctype_t *ctype,
3788 const struct charmap_t *charmap,
3789 struct translit_t **cursor)
3791 while (ctype->translit_include != NULL)
3793 const char *copy_locale = ctype->translit_include->copy_locale;
3794 const char *copy_repertoire = ctype->translit_include->copy_repertoire;
3795 struct localedef_t *other;
3797 /* Unchain the include statement. During the depth-first traversal
3798 we don't want to visit any locale more than once. */
3799 ctype->translit_include = ctype->translit_include->next;
3801 other = find_locale (LC_CTYPE, copy_locale, copy_repertoire, charmap);
3803 if (other == NULL || other->categories[LC_CTYPE].ctype == NULL)
3805 WITH_CUR_LOCALE (error (0, 0, _("\
3806 %s: transliteration data from locale `%s' not available"),
3807 "LC_CTYPE", copy_locale));
3809 else
3811 struct locale_ctype_t *other_ctype =
3812 other->categories[LC_CTYPE].ctype;
3814 cursor = translit_flatten (other_ctype, charmap, cursor);
3815 assert (other_ctype->translit_include == NULL);
3817 if (other_ctype->translit != NULL)
3819 /* Insert the other_ctype->translit list at *cursor. */
3820 struct translit_t *endp = other_ctype->translit;
3821 while (endp->next != NULL)
3822 endp = endp->next;
3824 endp->next = *cursor;
3825 *cursor = other_ctype->translit;
3827 /* Avoid any risk of circular lists. */
3828 other_ctype->translit = NULL;
3830 cursor = &endp->next;
3833 if (ctype->default_missing == NULL)
3834 ctype->default_missing = other_ctype->default_missing;
3838 return cursor;
3841 static void
3842 allocate_arrays (struct locale_ctype_t *ctype, const struct charmap_t *charmap,
3843 struct repertoire_t *repertoire)
3845 size_t idx, nr;
3846 const void *key;
3847 size_t len;
3848 void *vdata;
3849 void *curs;
3851 /* You wonder about this amount of memory? This is only because some
3852 users do not manage to address the array with unsigned values or
3853 data types with range >= 256. '\200' would result in the array
3854 index -128. To help these poor people we duplicate the entries for
3855 128 up to 255 below the entry for \0. */
3856 ctype->ctype_b = (char_class_t *) xcalloc (256 + 128, sizeof (char_class_t));
3857 ctype->ctype32_b = (char_class32_t *) xcalloc (256, sizeof (char_class32_t));
3858 ctype->class_b = (uint32_t **)
3859 xmalloc (ctype->nr_charclass * sizeof (uint32_t *));
3860 ctype->class_3level = (struct iovec *)
3861 xmalloc (ctype->nr_charclass * sizeof (struct iovec));
3863 /* This is the array accessed using the multibyte string elements. */
3864 for (idx = 0; idx < 256; ++idx)
3865 ctype->ctype_b[128 + idx] = ctype->class256_collection[idx];
3867 /* Mirror first 127 entries. We must take care that entry -1 is not
3868 mirrored because EOF == -1. */
3869 for (idx = 0; idx < 127; ++idx)
3870 ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
3872 /* The 32 bit array contains all characters < 0x100. */
3873 for (idx = 0; idx < ctype->class_collection_act; ++idx)
3874 if (ctype->charnames[idx] < 0x100)
3875 ctype->ctype32_b[ctype->charnames[idx]] = ctype->class_collection[idx];
3877 for (nr = 0; nr < ctype->nr_charclass; nr++)
3879 ctype->class_b[nr] = (uint32_t *) xcalloc (256 / 32, sizeof (uint32_t));
3881 /* We only set CLASS_B for the bits in the ISO C classes, not
3882 the user defined classes. The number should not change but
3883 who knows. */
3884 #define LAST_ISO_C_BIT 11
3885 if (nr <= LAST_ISO_C_BIT)
3886 for (idx = 0; idx < 256; ++idx)
3887 if (ctype->class256_collection[idx] & _ISbit (nr))
3888 ctype->class_b[nr][idx >> 5] |= (uint32_t) 1 << (idx & 0x1f);
3891 for (nr = 0; nr < ctype->nr_charclass; nr++)
3893 struct wctype_table t;
3895 t.p = 4; /* or: 5 */
3896 t.q = 7; /* or: 6 */
3897 wctype_table_init (&t);
3899 for (idx = 0; idx < ctype->class_collection_act; ++idx)
3900 if (ctype->class_collection[idx] & _ISwbit (nr))
3901 wctype_table_add (&t, ctype->charnames[idx]);
3903 wctype_table_finalize (&t);
3905 if (verbose)
3906 WITH_CUR_LOCALE (fprintf (stderr, _("\
3907 %s: table for class \"%s\": %lu bytes\n"),
3908 "LC_CTYPE", ctype->classnames[nr],
3909 (unsigned long int) t.result_size));
3911 ctype->class_3level[nr].iov_base = t.result;
3912 ctype->class_3level[nr].iov_len = t.result_size;
3915 /* Room for table of mappings. */
3916 ctype->map_b = (uint32_t **) xmalloc (2 * sizeof (uint32_t *));
3917 ctype->map32_b = (uint32_t **) xmalloc (ctype->map_collection_nr
3918 * sizeof (uint32_t *));
3919 ctype->map_3level = (struct iovec *)
3920 xmalloc (ctype->map_collection_nr * sizeof (struct iovec));
3922 /* Fill in all mappings. */
3923 for (idx = 0; idx < 2; ++idx)
3925 unsigned int idx2;
3927 /* Allocate table. */
3928 ctype->map_b[idx] = (uint32_t *)
3929 xmalloc ((256 + 128) * sizeof (uint32_t));
3931 /* Copy values from collection. */
3932 for (idx2 = 0; idx2 < 256; ++idx2)
3933 ctype->map_b[idx][128 + idx2] = ctype->map256_collection[idx][idx2];
3935 /* Mirror first 127 entries. We must take care not to map entry
3936 -1 because EOF == -1. */
3937 for (idx2 = 0; idx2 < 127; ++idx2)
3938 ctype->map_b[idx][idx2] = ctype->map_b[idx][256 + idx2];
3940 /* EOF must map to EOF. */
3941 ctype->map_b[idx][127] = EOF;
3944 for (idx = 0; idx < ctype->map_collection_nr; ++idx)
3946 unsigned int idx2;
3948 /* Allocate table. */
3949 ctype->map32_b[idx] = (uint32_t *) xmalloc (256 * sizeof (uint32_t));
3951 /* Copy values from collection. Default is identity mapping. */
3952 for (idx2 = 0; idx2 < 256; ++idx2)
3953 ctype->map32_b[idx][idx2] =
3954 (ctype->map_collection[idx][idx2] != 0
3955 ? ctype->map_collection[idx][idx2]
3956 : idx2);
3959 for (nr = 0; nr < ctype->map_collection_nr; nr++)
3961 struct wctrans_table t;
3963 t.p = 7;
3964 t.q = 9;
3965 wctrans_table_init (&t);
3967 for (idx = 0; idx < ctype->map_collection_act[nr]; ++idx)
3968 if (ctype->map_collection[nr][idx] != 0)
3969 wctrans_table_add (&t, ctype->charnames[idx],
3970 ctype->map_collection[nr][idx]);
3972 wctrans_table_finalize (&t);
3974 if (verbose)
3975 WITH_CUR_LOCALE (fprintf (stderr, _("\
3976 %s: table for map \"%s\": %lu bytes\n"),
3977 "LC_CTYPE", ctype->mapnames[nr],
3978 (unsigned long int) t.result_size));
3980 ctype->map_3level[nr].iov_base = t.result;
3981 ctype->map_3level[nr].iov_len = t.result_size;
3984 /* Extra array for class and map names. */
3985 ctype->class_name_ptr = (uint32_t *) xmalloc (ctype->nr_charclass
3986 * sizeof (uint32_t));
3987 ctype->map_name_ptr = (uint32_t *) xmalloc (ctype->map_collection_nr
3988 * sizeof (uint32_t));
3990 ctype->class_offset = _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
3991 ctype->map_offset = ctype->class_offset + ctype->nr_charclass;
3993 /* Array for width information. Because the expected widths are very
3994 small (never larger than 2) we use only one single byte. This
3995 saves space.
3996 We put only printable characters in the table. wcwidth is specified
3997 to return -1 for non-printable characters. Doing the check here
3998 saves a run-time check.
3999 But we put L'\0' in the table. This again saves a run-time check. */
4001 struct wcwidth_table t;
4003 t.p = 7;
4004 t.q = 9;
4005 wcwidth_table_init (&t);
4007 /* First set all the printable characters of the character set to
4008 the default width. */
4009 curs = NULL;
4010 while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
4012 struct charseq *data = (struct charseq *) vdata;
4014 if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
4015 data->ucs4 = repertoire_find_value (ctype->repertoire,
4016 data->name, len);
4018 if (data->ucs4 != ILLEGAL_CHAR_VALUE)
4020 uint32_t *class_bits =
4021 find_idx (ctype, &ctype->class_collection, NULL,
4022 &ctype->class_collection_act, data->ucs4);
4024 if (class_bits != NULL && (*class_bits & BITw (tok_print)))
4025 wcwidth_table_add (&t, data->ucs4, charmap->width_default);
4029 /* Now add the explicitly specified widths. */
4030 if (charmap->width_rules != NULL)
4032 size_t cnt;
4034 for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
4036 unsigned char bytes[charmap->mb_cur_max];
4037 int nbytes = charmap->width_rules[cnt].from->nbytes;
4039 /* We have the range of character for which the width is
4040 specified described using byte sequences of the multibyte
4041 charset. We have to convert this to UCS4 now. And we
4042 cannot simply convert the beginning and the end of the
4043 sequence, we have to iterate over the byte sequence and
4044 convert it for every single character. */
4045 memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
4047 while (nbytes < charmap->width_rules[cnt].to->nbytes
4048 || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
4049 nbytes) <= 0)
4051 /* Find the UCS value for `bytes'. */
4052 int inner;
4053 uint32_t wch;
4054 struct charseq *seq =
4055 charmap_find_symbol (charmap, (char *) bytes, nbytes);
4057 if (seq == NULL)
4058 wch = ILLEGAL_CHAR_VALUE;
4059 else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
4060 wch = seq->ucs4;
4061 else
4062 wch = repertoire_find_value (ctype->repertoire, seq->name,
4063 strlen (seq->name));
4065 if (wch != ILLEGAL_CHAR_VALUE)
4067 /* Store the value. */
4068 uint32_t *class_bits =
4069 find_idx (ctype, &ctype->class_collection, NULL,
4070 &ctype->class_collection_act, wch);
4072 if (class_bits != NULL && (*class_bits & BITw (tok_print)))
4073 wcwidth_table_add (&t, wch,
4074 charmap->width_rules[cnt].width);
4077 /* "Increment" the bytes sequence. */
4078 inner = nbytes - 1;
4079 while (inner >= 0 && bytes[inner] == 0xff)
4080 --inner;
4082 if (inner < 0)
4084 /* We have to extend the byte sequence. */
4085 if (nbytes >= charmap->width_rules[cnt].to->nbytes)
4086 break;
4088 bytes[0] = 1;
4089 memset (&bytes[1], 0, nbytes);
4090 ++nbytes;
4092 else
4094 ++bytes[inner];
4095 while (++inner < nbytes)
4096 bytes[inner] = 0;
4102 /* Set the width of L'\0' to 0. */
4103 wcwidth_table_add (&t, 0, 0);
4105 wcwidth_table_finalize (&t);
4107 if (verbose)
4108 WITH_CUR_LOCALE (fprintf (stderr, _("%s: table for width: %lu bytes\n"),
4109 "LC_CTYPE", (unsigned long int) t.result_size));
4111 ctype->width.iov_base = t.result;
4112 ctype->width.iov_len = t.result_size;
4115 /* Set MB_CUR_MAX. */
4116 ctype->mb_cur_max = charmap->mb_cur_max;
4118 /* Now determine the table for the transliteration information.
4120 XXX It is not yet clear to me whether it is worth implementing a
4121 complicated algorithm which uses a hash table to locate the entries.
4122 For now I'll use a simple array which can be searching using binary
4123 search. */
4124 if (ctype->translit_include != NULL)
4125 /* Traverse the locales mentioned in the `include' statements in a
4126 depth-first way and fold in their transliteration information. */
4127 translit_flatten (ctype, charmap, &ctype->translit);
4129 if (ctype->translit != NULL)
4131 /* First count how many entries we have. This is the upper limit
4132 since some entries from the included files might be overwritten. */
4133 size_t number = 0;
4134 size_t cnt;
4135 struct translit_t *runp = ctype->translit;
4136 struct translit_t **sorted;
4137 size_t from_len, to_len;
4139 while (runp != NULL)
4141 ++number;
4142 runp = runp->next;
4145 /* Next we allocate an array large enough and fill in the values. */
4146 sorted = (struct translit_t **) alloca (number
4147 * sizeof (struct translit_t **));
4148 runp = ctype->translit;
4149 number = 0;
4152 /* Search for the place where to insert this string.
4153 XXX Better use a real sorting algorithm later. */
4154 size_t idx = 0;
4155 int replace = 0;
4157 while (idx < number)
4159 int res = wcscmp ((const wchar_t *) sorted[idx]->from,
4160 (const wchar_t *) runp->from);
4161 if (res == 0)
4163 replace = 1;
4164 break;
4166 if (res > 0)
4167 break;
4168 ++idx;
4171 if (replace)
4172 sorted[idx] = runp;
4173 else
4175 memmove (&sorted[idx + 1], &sorted[idx],
4176 (number - idx) * sizeof (struct translit_t *));
4177 sorted[idx] = runp;
4178 ++number;
4181 runp = runp->next;
4183 while (runp != NULL);
4185 /* The next step is putting all the possible transliteration
4186 strings in one memory block so that we can write it out.
4187 We need several different blocks:
4188 - index to the from-string array
4189 - from-string array
4190 - index to the to-string array
4191 - to-string array.
4193 from_len = to_len = 0;
4194 for (cnt = 0; cnt < number; ++cnt)
4196 struct translit_to_t *srunp;
4197 from_len += wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
4198 srunp = sorted[cnt]->to;
4199 while (srunp != NULL)
4201 to_len += wcslen ((const wchar_t *) srunp->str) + 1;
4202 srunp = srunp->next;
4204 /* Plus one for the extra NUL character marking the end of
4205 the list for the current entry. */
4206 ++to_len;
4209 /* We can allocate the arrays for the results. */
4210 ctype->translit_from_idx = xmalloc (number * sizeof (uint32_t));
4211 ctype->translit_from_tbl = xmalloc (from_len * sizeof (uint32_t));
4212 ctype->translit_to_idx = xmalloc (number * sizeof (uint32_t));
4213 ctype->translit_to_tbl = xmalloc (to_len * sizeof (uint32_t));
4215 from_len = 0;
4216 to_len = 0;
4217 for (cnt = 0; cnt < number; ++cnt)
4219 size_t len;
4220 struct translit_to_t *srunp;
4222 ctype->translit_from_idx[cnt] = from_len;
4223 ctype->translit_to_idx[cnt] = to_len;
4225 len = wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
4226 wmemcpy ((wchar_t *) &ctype->translit_from_tbl[from_len],
4227 (const wchar_t *) sorted[cnt]->from, len);
4228 from_len += len;
4230 ctype->translit_to_idx[cnt] = to_len;
4231 srunp = sorted[cnt]->to;
4232 while (srunp != NULL)
4234 len = wcslen ((const wchar_t *) srunp->str) + 1;
4235 wmemcpy ((wchar_t *) &ctype->translit_to_tbl[to_len],
4236 (const wchar_t *) srunp->str, len);
4237 to_len += len;
4238 srunp = srunp->next;
4240 ctype->translit_to_tbl[to_len++] = L'\0';
4243 /* Store the information about the length. */
4244 ctype->translit_idx_size = number;
4245 ctype->translit_from_tbl_size = from_len * sizeof (uint32_t);
4246 ctype->translit_to_tbl_size = to_len * sizeof (uint32_t);
4248 else
4250 /* Provide some dummy pointers since we have nothing to write out. */
4251 static uint32_t no_str = { 0 };
4253 ctype->translit_from_idx = &no_str;
4254 ctype->translit_from_tbl = &no_str;
4255 ctype->translit_to_tbl = &no_str;
4256 ctype->translit_idx_size = 0;
4257 ctype->translit_from_tbl_size = 0;
4258 ctype->translit_to_tbl_size = 0;