1 /* Copyright (C) 1995-2006,2007,2009,2011 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, see <http://www.gnu.org/licenses/>. */
34 #include "localedef.h"
36 #include "localeinfo.h"
38 #include "linereader.h"
39 #include "locfile-token.h"
45 #ifdef PREDEFINED_CLASSES
46 /* These are the extra bits not in wctype.h since these are not preallocated
48 # define _ISwspecial1 (1 << 29)
49 # define _ISwspecial2 (1 << 30)
50 # define _ISwspecial3 (1 << 31)
54 /* The bit used for representing a special class. */
55 #define BITPOS(class) ((class) - tok_upper)
56 #define BIT(class) (_ISbit (BITPOS (class)))
57 #define BITw(class) (_ISwbit (BITPOS (class)))
59 #define ELEM(ctype, collection, idx, value) \
60 *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \
61 &ctype->collection##_act idx, value)
64 /* To be compatible with former implementations we for now restrict
65 the number of bits for character classes to 16. When compatibility
66 is not necessary anymore increase the number to 32. */
67 #define char_class_t uint16_t
68 #define char_class32_t uint32_t
71 /* Type to describe a transliteration action. We have a possibly
72 multiple character from-string and a set of multiple character
73 to-strings. All are 32bit values since this is what is used in
74 the gconv functions. */
79 struct translit_to_t
*next
;
89 struct translit_to_t
*to
;
91 struct translit_t
*next
;
94 struct translit_ignore_t
103 struct translit_ignore_t
*next
;
107 /* Type to describe a transliteration include statement. */
108 struct translit_include_t
110 const char *copy_locale
;
111 const char *copy_repertoire
;
113 struct translit_include_t
*next
;
117 /* Sparse table of uint32_t. */
118 #define TABLE idx_table
119 #define ELEMENT uint32_t
120 #define DEFAULT ((uint32_t) ~0)
125 /* The real definition of the struct for the LC_CTYPE locale. */
126 struct locale_ctype_t
129 size_t charnames_max
;
130 size_t charnames_act
;
131 /* An index lookup table, to speedup find_idx. */
132 struct idx_table charnames_idx
;
134 struct repertoire_t
*repertoire
;
136 /* We will allow up to 8 * sizeof (uint32_t) character classes. */
137 #define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
139 const char *classnames
[MAX_NR_CHARCLASS
];
140 uint32_t last_class_char
;
141 uint32_t class256_collection
[256];
142 uint32_t *class_collection
;
143 size_t class_collection_max
;
144 size_t class_collection_act
;
146 uint32_t class_offset
;
148 struct charseq
**mbdigits
;
155 struct charseq
*mboutdigits
[10];
156 uint32_t wcoutdigits
[10];
157 size_t outdigits_act
;
159 /* If the following number ever turns out to be too small simply
160 increase it. But I doubt it will. --drepper@gnu */
161 #define MAX_NR_CHARMAP 16
162 const char *mapnames
[MAX_NR_CHARMAP
];
163 uint32_t *map_collection
[MAX_NR_CHARMAP
];
164 uint32_t map256_collection
[2][256];
165 size_t map_collection_max
[MAX_NR_CHARMAP
];
166 size_t map_collection_act
[MAX_NR_CHARMAP
];
167 size_t map_collection_nr
;
169 int tomap_done
[MAX_NR_CHARMAP
];
172 /* Transliteration information. */
173 struct translit_include_t
*translit_include
;
174 struct translit_t
*translit
;
175 struct translit_ignore_t
*translit_ignore
;
176 uint32_t ntranslit_ignore
;
178 uint32_t *default_missing
;
179 const char *default_missing_file
;
180 size_t default_missing_lineno
;
182 uint32_t to_nonascii
;
183 uint32_t nonascii_case
;
185 /* The arrays for the binary representation. */
186 char_class_t
*ctype_b
;
187 char_class32_t
*ctype32_b
;
191 struct iovec
*class_3level
;
192 struct iovec
*map_3level
;
193 uint32_t *class_name_ptr
;
194 uint32_t *map_name_ptr
;
197 const char *codeset_name
;
198 uint32_t *translit_from_idx
;
199 uint32_t *translit_from_tbl
;
200 uint32_t *translit_to_idx
;
201 uint32_t *translit_to_tbl
;
202 uint32_t translit_idx_size
;
203 size_t translit_from_tbl_size
;
204 size_t translit_to_tbl_size
;
206 struct obstack mempool
;
210 /* Marker for an empty slot. This has the value 0xFFFFFFFF, regardless
211 whether 'int' is 16 bit, 32 bit, or 64 bit. */
212 #define EMPTY ((uint32_t) ~0)
215 #define obstack_chunk_alloc xmalloc
216 #define obstack_chunk_free free
219 /* Prototypes for local functions. */
220 static void ctype_startup (struct linereader
*lr
, struct localedef_t
*locale
,
221 const struct charmap_t
*charmap
,
222 struct localedef_t
*copy_locale
,
224 static void ctype_class_new (struct linereader
*lr
,
225 struct locale_ctype_t
*ctype
, const char *name
);
226 static void ctype_map_new (struct linereader
*lr
,
227 struct locale_ctype_t
*ctype
,
228 const char *name
, const struct charmap_t
*charmap
);
229 static uint32_t *find_idx (struct locale_ctype_t
*ctype
, uint32_t **table
,
230 size_t *max
, size_t *act
, unsigned int idx
);
231 static void set_class_defaults (struct locale_ctype_t
*ctype
,
232 const struct charmap_t
*charmap
,
233 struct repertoire_t
*repertoire
);
234 static void allocate_arrays (struct locale_ctype_t
*ctype
,
235 const struct charmap_t
*charmap
,
236 struct repertoire_t
*repertoire
);
239 static const char *longnames
[] =
241 "zero", "one", "two", "three", "four",
242 "five", "six", "seven", "eight", "nine"
244 static const char *uninames
[] =
246 "U00000030", "U00000031", "U00000032", "U00000033", "U00000034",
247 "U00000035", "U00000036", "U00000037", "U00000038", "U00000039"
249 static const unsigned char digits
[] = "0123456789";
253 ctype_startup (struct linereader
*lr
, struct localedef_t
*locale
,
254 const struct charmap_t
*charmap
,
255 struct localedef_t
*copy_locale
, int ignore_content
)
258 struct locale_ctype_t
*ctype
;
260 if (!ignore_content
&& locale
->categories
[LC_CTYPE
].ctype
== NULL
)
262 if (copy_locale
== NULL
)
264 /* Allocate the needed room. */
265 locale
->categories
[LC_CTYPE
].ctype
= ctype
=
266 (struct locale_ctype_t
*) xcalloc (1,
267 sizeof (struct locale_ctype_t
));
269 /* We have seen no names yet. */
270 ctype
->charnames_max
= charmap
->mb_cur_max
== 1 ? 256 : 512;
272 (unsigned int *) xmalloc (ctype
->charnames_max
273 * sizeof (unsigned int));
274 for (cnt
= 0; cnt
< 256; ++cnt
)
275 ctype
->charnames
[cnt
] = cnt
;
276 ctype
->charnames_act
= 256;
277 idx_table_init (&ctype
->charnames_idx
);
279 /* Fill character class information. */
280 ctype
->last_class_char
= ILLEGAL_CHAR_VALUE
;
281 /* The order of the following instructions determines the bit
283 ctype_class_new (lr
, ctype
, "upper");
284 ctype_class_new (lr
, ctype
, "lower");
285 ctype_class_new (lr
, ctype
, "alpha");
286 ctype_class_new (lr
, ctype
, "digit");
287 ctype_class_new (lr
, ctype
, "xdigit");
288 ctype_class_new (lr
, ctype
, "space");
289 ctype_class_new (lr
, ctype
, "print");
290 ctype_class_new (lr
, ctype
, "graph");
291 ctype_class_new (lr
, ctype
, "blank");
292 ctype_class_new (lr
, ctype
, "cntrl");
293 ctype_class_new (lr
, ctype
, "punct");
294 ctype_class_new (lr
, ctype
, "alnum");
295 #ifdef PREDEFINED_CLASSES
296 /* The following are extensions from ISO 14652. */
297 ctype_class_new (lr
, ctype
, "left_to_right");
298 ctype_class_new (lr
, ctype
, "right_to_left");
299 ctype_class_new (lr
, ctype
, "num_terminator");
300 ctype_class_new (lr
, ctype
, "num_separator");
301 ctype_class_new (lr
, ctype
, "segment_separator");
302 ctype_class_new (lr
, ctype
, "block_separator");
303 ctype_class_new (lr
, ctype
, "direction_control");
304 ctype_class_new (lr
, ctype
, "sym_swap_layout");
305 ctype_class_new (lr
, ctype
, "char_shape_selector");
306 ctype_class_new (lr
, ctype
, "num_shape_selector");
307 ctype_class_new (lr
, ctype
, "non_spacing");
308 ctype_class_new (lr
, ctype
, "non_spacing_level3");
309 ctype_class_new (lr
, ctype
, "normal_connect");
310 ctype_class_new (lr
, ctype
, "r_connect");
311 ctype_class_new (lr
, ctype
, "no_connect");
312 ctype_class_new (lr
, ctype
, "no_connect-space");
313 ctype_class_new (lr
, ctype
, "vowel_connect");
316 ctype
->class_collection_max
= charmap
->mb_cur_max
== 1 ? 256 : 512;
317 ctype
->class_collection
318 = (uint32_t *) xcalloc (sizeof (unsigned long int),
319 ctype
->class_collection_max
);
320 ctype
->class_collection_act
= 256;
322 /* Fill character map information. */
323 ctype
->last_map_idx
= MAX_NR_CHARMAP
;
324 ctype_map_new (lr
, ctype
, "toupper", charmap
);
325 ctype_map_new (lr
, ctype
, "tolower", charmap
);
326 #ifdef PREDEFINED_CLASSES
327 ctype_map_new (lr
, ctype
, "tosymmetric", charmap
);
330 /* Fill first 256 entries in `toXXX' arrays. */
331 for (cnt
= 0; cnt
< 256; ++cnt
)
333 ctype
->map_collection
[0][cnt
] = cnt
;
334 ctype
->map_collection
[1][cnt
] = cnt
;
335 #ifdef PREDEFINED_CLASSES
336 ctype
->map_collection
[2][cnt
] = cnt
;
338 ctype
->map256_collection
[0][cnt
] = cnt
;
339 ctype
->map256_collection
[1][cnt
] = cnt
;
342 if (enc_not_ascii_compatible
)
343 ctype
->to_nonascii
= 1;
345 obstack_init (&ctype
->mempool
);
348 ctype
= locale
->categories
[LC_CTYPE
].ctype
=
349 copy_locale
->categories
[LC_CTYPE
].ctype
;
355 ctype_finish (struct localedef_t
*locale
, const struct charmap_t
*charmap
)
357 /* See POSIX.2, table 2-6 for the meaning of the following table. */
362 const char allow
[NCLASS
];
364 valid_table
[NCLASS
] =
366 /* The order is important. See token.h for more information.
367 M = Always, D = Default, - = Permitted, X = Mutually exclusive */
368 { "upper", "--MX-XDDXXX-" },
369 { "lower", "--MX-XDDXXX-" },
370 { "alpha", "---X-XDDXXX-" },
371 { "digit", "XXX--XDDXXX-" },
372 { "xdigit", "-----XDDXXX-" },
373 { "space", "XXXXX------X" },
374 { "print", "---------X--" },
375 { "graph", "---------X--" },
376 { "blank", "XXXXXM-----X" },
377 { "cntrl", "XXXXX-XX--XX" },
378 { "punct", "XXXXX-DD-X-X" },
379 { "alnum", "-----XDDXXX-" }
383 uint32_t space_value
;
384 struct charseq
*space_seq
;
385 struct locale_ctype_t
*ctype
= locale
->categories
[LC_CTYPE
].ctype
;
392 /* Now resolve copying and also handle completely missing definitions. */
395 const char *repertoire_name
;
397 /* First see whether we were supposed to copy. If yes, find the
398 actual definition. */
399 if (locale
->copy_name
[LC_CTYPE
] != NULL
)
401 /* Find the copying locale. This has to happen transitively since
402 the locale we are copying from might also copying another one. */
403 struct localedef_t
*from
= locale
;
406 from
= find_locale (LC_CTYPE
, from
->copy_name
[LC_CTYPE
],
407 from
->repertoire_name
, charmap
);
408 while (from
->categories
[LC_CTYPE
].ctype
== NULL
409 && from
->copy_name
[LC_CTYPE
] != NULL
);
411 ctype
= locale
->categories
[LC_CTYPE
].ctype
412 = from
->categories
[LC_CTYPE
].ctype
;
415 /* If there is still no definition issue an warning and create an
420 WITH_CUR_LOCALE (error (0, 0, _("\
421 No definition for %s category found"), "LC_CTYPE"));
422 ctype_startup (NULL
, locale
, charmap
, NULL
, 0);
423 ctype
= locale
->categories
[LC_CTYPE
].ctype
;
426 /* Get the repertoire we have to use. */
427 repertoire_name
= locale
->repertoire_name
?: repertoire_global
;
428 if (repertoire_name
!= NULL
)
429 ctype
->repertoire
= repertoire_read (repertoire_name
);
432 /* We need the name of the currently used 8-bit character set to
433 make correct conversion between this 8-bit representation and the
434 ISO 10646 character set used internally for wide characters. */
435 ctype
->codeset_name
= charmap
->code_set_name
;
436 if (ctype
->codeset_name
== NULL
)
439 WITH_CUR_LOCALE (error (0, 0, _("\
440 No character set name specified in charmap")));
441 ctype
->codeset_name
= "//UNKNOWN//";
444 /* Set default value for classes not specified. */
445 set_class_defaults (ctype
, charmap
, ctype
->repertoire
);
447 /* Check according to table. */
448 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
450 uint32_t tmp
= ctype
->class_collection
[cnt
];
454 for (cls1
= 0; cls1
< NCLASS
; ++cls1
)
455 if ((tmp
& _ISwbit (cls1
)) != 0)
456 for (cls2
= 0; cls2
< NCLASS
; ++cls2
)
457 if (valid_table
[cls1
].allow
[cls2
] != '-')
459 int eq
= (tmp
& _ISwbit (cls2
)) != 0;
460 switch (valid_table
[cls1
].allow
[cls2
])
465 uint32_t value
= ctype
->charnames
[cnt
];
468 WITH_CUR_LOCALE (error (0, 0, _("\
469 character L'\\u%0*x' in class `%s' must be in class `%s'"),
470 value
> 0xffff ? 8 : 4,
472 valid_table
[cls1
].name
,
473 valid_table
[cls2
].name
));
480 uint32_t value
= ctype
->charnames
[cnt
];
483 WITH_CUR_LOCALE (error (0, 0, _("\
484 character L'\\u%0*x' in class `%s' must not be in class `%s'"),
485 value
> 0xffff ? 8 : 4,
487 valid_table
[cls1
].name
,
488 valid_table
[cls2
].name
));
493 ctype
->class_collection
[cnt
] |= _ISwbit (cls2
);
497 WITH_CUR_LOCALE (error (5, 0, _("\
498 internal error in %s, line %u"), __FUNCTION__
, __LINE__
));
504 for (cnt
= 0; cnt
< 256; ++cnt
)
506 uint32_t tmp
= ctype
->class256_collection
[cnt
];
510 for (cls1
= 0; cls1
< NCLASS
; ++cls1
)
511 if ((tmp
& _ISbit (cls1
)) != 0)
512 for (cls2
= 0; cls2
< NCLASS
; ++cls2
)
513 if (valid_table
[cls1
].allow
[cls2
] != '-')
515 int eq
= (tmp
& _ISbit (cls2
)) != 0;
516 switch (valid_table
[cls1
].allow
[cls2
])
523 snprintf (buf
, sizeof buf
, "\\%Zo", cnt
);
526 WITH_CUR_LOCALE (error (0, 0, _("\
527 character '%s' in class `%s' must be in class `%s'"),
529 valid_table
[cls1
].name
,
530 valid_table
[cls2
].name
));
539 snprintf (buf
, sizeof buf
, "\\%Zo", cnt
);
542 WITH_CUR_LOCALE (error (0, 0, _("\
543 character '%s' in class `%s' must not be in class `%s'"),
545 valid_table
[cls1
].name
,
546 valid_table
[cls2
].name
));
551 ctype
->class256_collection
[cnt
] |= _ISbit (cls2
);
555 WITH_CUR_LOCALE (error (5, 0, _("\
556 internal error in %s, line %u"), __FUNCTION__
, __LINE__
));
562 /* ... and now test <SP> as a special case. */
564 if (((cnt
= BITPOS (tok_space
),
565 (ELEM (ctype
, class_collection
, , space_value
)
566 & BITw (tok_space
)) == 0)
567 || (cnt
= BITPOS (tok_blank
),
568 (ELEM (ctype
, class_collection
, , space_value
)
569 & BITw (tok_blank
)) == 0)))
572 WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"),
573 valid_table
[cnt
].name
));
575 else if (((cnt
= BITPOS (tok_punct
),
576 (ELEM (ctype
, class_collection
, , space_value
)
577 & BITw (tok_punct
)) != 0)
578 || (cnt
= BITPOS (tok_graph
),
579 (ELEM (ctype
, class_collection
, , space_value
)
584 WITH_CUR_LOCALE (error (0, 0, _("\
585 <SP> character must not be in class `%s'"),
586 valid_table
[cnt
].name
));
589 ELEM (ctype
, class_collection
, , space_value
) |= BITw (tok_print
);
591 space_seq
= charmap_find_value (charmap
, "SP", 2);
592 if (space_seq
== NULL
)
593 space_seq
= charmap_find_value (charmap
, "space", 5);
594 if (space_seq
== NULL
)
595 space_seq
= charmap_find_value (charmap
, "U00000020", 9);
596 if (space_seq
== NULL
|| space_seq
->nbytes
!= 1)
599 WITH_CUR_LOCALE (error (0, 0, _("\
600 character <SP> not defined in character map")));
602 else if (((cnt
= BITPOS (tok_space
),
603 (ctype
->class256_collection
[space_seq
->bytes
[0]]
604 & BIT (tok_space
)) == 0)
605 || (cnt
= BITPOS (tok_blank
),
606 (ctype
->class256_collection
[space_seq
->bytes
[0]]
607 & BIT (tok_blank
)) == 0)))
610 WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"),
611 valid_table
[cnt
].name
));
613 else if (((cnt
= BITPOS (tok_punct
),
614 (ctype
->class256_collection
[space_seq
->bytes
[0]]
615 & BIT (tok_punct
)) != 0)
616 || (cnt
= BITPOS (tok_graph
),
617 (ctype
->class256_collection
[space_seq
->bytes
[0]]
618 & BIT (tok_graph
)) != 0)))
621 WITH_CUR_LOCALE (error (0, 0, _("\
622 <SP> character must not be in class `%s'"),
623 valid_table
[cnt
].name
));
626 ctype
->class256_collection
[space_seq
->bytes
[0]] |= BIT (tok_print
);
628 /* Check whether all single-byte characters make to their upper/lowercase
629 equivalent according to the ASCII rules. */
630 for (cnt
= 'A'; cnt
<= 'Z'; ++cnt
)
632 uint32_t uppval
= ctype
->map256_collection
[0][cnt
];
633 uint32_t lowval
= ctype
->map256_collection
[1][cnt
];
634 uint32_t lowuppval
= ctype
->map256_collection
[0][lowval
];
635 uint32_t lowlowval
= ctype
->map256_collection
[1][lowval
];
638 || lowval
!= cnt
+ 0x20
640 || lowlowval
!= cnt
+ 0x20)
641 ctype
->nonascii_case
= 1;
643 for (cnt
= 0; cnt
< 256; ++cnt
)
644 if (cnt
< 'A' || (cnt
> 'Z' && cnt
< 'a') || cnt
> 'z')
645 if (ctype
->map256_collection
[0][cnt
] != cnt
646 || ctype
->map256_collection
[1][cnt
] != cnt
)
647 ctype
->nonascii_case
= 1;
649 /* Now that the tests are done make sure the name array contains all
650 characters which are handled in the WIDTH section of the
651 character set definition file. */
652 if (charmap
->width_rules
!= NULL
)
653 for (cnt
= 0; cnt
< charmap
->nwidth_rules
; ++cnt
)
655 unsigned char bytes
[charmap
->mb_cur_max
];
656 int nbytes
= charmap
->width_rules
[cnt
].from
->nbytes
;
658 /* We have the range of character for which the width is
659 specified described using byte sequences of the multibyte
660 charset. We have to convert this to UCS4 now. And we
661 cannot simply convert the beginning and the end of the
662 sequence, we have to iterate over the byte sequence and
663 convert it for every single character. */
664 memcpy (bytes
, charmap
->width_rules
[cnt
].from
->bytes
, nbytes
);
666 while (nbytes
< charmap
->width_rules
[cnt
].to
->nbytes
667 || memcmp (bytes
, charmap
->width_rules
[cnt
].to
->bytes
,
670 /* Find the UCS value for `bytes'. */
674 = charmap_find_symbol (charmap
, (char *) bytes
, nbytes
);
677 wch
= ILLEGAL_CHAR_VALUE
;
678 else if (seq
->ucs4
!= UNINITIALIZED_CHAR_VALUE
)
681 wch
= repertoire_find_value (ctype
->repertoire
, seq
->name
,
684 if (wch
!= ILLEGAL_CHAR_VALUE
)
685 /* We are only interested in the side-effects of the
686 `find_idx' call. It will add appropriate entries in
687 the name array if this is necessary. */
688 (void) find_idx (ctype
, NULL
, NULL
, NULL
, wch
);
690 /* "Increment" the bytes sequence. */
692 while (inner
>= 0 && bytes
[inner
] == 0xff)
697 /* We have to extend the byte sequence. */
698 if (nbytes
>= charmap
->width_rules
[cnt
].to
->nbytes
)
702 memset (&bytes
[1], 0, nbytes
);
708 while (++inner
< nbytes
)
714 /* Now set all the other characters of the character set to the
717 while (iterate_table (&charmap
->char_table
, &curs
, &key
, &len
, &vdata
) == 0)
719 struct charseq
*data
= (struct charseq
*) vdata
;
721 if (data
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
722 data
->ucs4
= repertoire_find_value (ctype
->repertoire
,
725 if (data
->ucs4
!= ILLEGAL_CHAR_VALUE
)
726 (void) find_idx (ctype
, NULL
, NULL
, NULL
, data
->ucs4
);
729 /* There must be a multiple of 10 digits. */
730 if (ctype
->mbdigits_act
% 10 != 0)
732 assert (ctype
->mbdigits_act
== ctype
->wcdigits_act
);
733 ctype
->wcdigits_act
-= ctype
->mbdigits_act
% 10;
734 ctype
->mbdigits_act
-= ctype
->mbdigits_act
% 10;
735 WITH_CUR_LOCALE (error (0, 0, _("\
736 `digit' category has not entries in groups of ten")));
739 /* Check the input digits. There must be a multiple of ten available.
740 In each group it could be that one or the other character is missing.
741 In this case the whole group must be removed. */
743 while (cnt
< ctype
->mbdigits_act
)
746 for (inner
= 0; inner
< 10; ++inner
)
747 if (ctype
->mbdigits
[cnt
+ inner
] == NULL
)
754 /* Remove the group. */
755 memmove (&ctype
->mbdigits
[cnt
], &ctype
->mbdigits
[cnt
+ 10],
756 ((ctype
->wcdigits_act
- cnt
- 10)
757 * sizeof (ctype
->mbdigits
[0])));
758 ctype
->mbdigits_act
-= 10;
762 /* If no input digits are given use the default. */
763 if (ctype
->mbdigits_act
== 0)
765 if (ctype
->mbdigits_max
== 0)
767 ctype
->mbdigits
= obstack_alloc (&((struct charmap_t
*) charmap
)->mem_pool
,
768 10 * sizeof (struct charseq
*));
769 ctype
->mbdigits_max
= 10;
772 for (cnt
= 0; cnt
< 10; ++cnt
)
774 ctype
->mbdigits
[cnt
] = charmap_find_symbol (charmap
,
775 (char *) digits
+ cnt
, 1);
776 if (ctype
->mbdigits
[cnt
] == NULL
)
778 ctype
->mbdigits
[cnt
] = charmap_find_symbol (charmap
,
780 strlen (longnames
[cnt
]));
781 if (ctype
->mbdigits
[cnt
] == NULL
)
783 /* Hum, this ain't good. */
784 WITH_CUR_LOCALE (error (0, 0, _("\
785 no input digits defined and none of the standard names in the charmap")));
787 ctype
->mbdigits
[cnt
] = obstack_alloc (&((struct charmap_t
*) charmap
)->mem_pool
,
788 sizeof (struct charseq
) + 1);
790 /* This is better than nothing. */
791 ctype
->mbdigits
[cnt
]->bytes
[0] = digits
[cnt
];
792 ctype
->mbdigits
[cnt
]->nbytes
= 1;
797 ctype
->mbdigits_act
= 10;
800 /* Check the wide character input digits. There must be a multiple
801 of ten available. In each group it could be that one or the other
802 character is missing. In this case the whole group must be
805 while (cnt
< ctype
->wcdigits_act
)
808 for (inner
= 0; inner
< 10; ++inner
)
809 if (ctype
->wcdigits
[cnt
+ inner
] == ILLEGAL_CHAR_VALUE
)
816 /* Remove the group. */
817 memmove (&ctype
->wcdigits
[cnt
], &ctype
->wcdigits
[cnt
+ 10],
818 ((ctype
->wcdigits_act
- cnt
- 10)
819 * sizeof (ctype
->wcdigits
[0])));
820 ctype
->wcdigits_act
-= 10;
824 /* If no input digits are given use the default. */
825 if (ctype
->wcdigits_act
== 0)
827 if (ctype
->wcdigits_max
== 0)
829 ctype
->wcdigits
= obstack_alloc (&((struct charmap_t
*) charmap
)->mem_pool
,
830 10 * sizeof (uint32_t));
831 ctype
->wcdigits_max
= 10;
834 for (cnt
= 0; cnt
< 10; ++cnt
)
835 ctype
->wcdigits
[cnt
] = L
'0' + cnt
;
837 ctype
->mbdigits_act
= 10;
840 /* Check the outdigits. */
842 for (cnt
= 0; cnt
< 10; ++cnt
)
843 if (ctype
->mboutdigits
[cnt
] == NULL
)
845 static struct charseq replace
[2];
849 WITH_CUR_LOCALE (error (0, 0, _("\
850 not all characters used in `outdigit' are available in the charmap")));
854 replace
[0].nbytes
= 1;
855 replace
[0].bytes
[0] = '?';
856 replace
[0].bytes
[1] = '\0';
857 ctype
->mboutdigits
[cnt
] = &replace
[0];
861 for (cnt
= 0; cnt
< 10; ++cnt
)
862 if (ctype
->wcoutdigits
[cnt
] == 0)
866 WITH_CUR_LOCALE (error (0, 0, _("\
867 not all characters used in `outdigit' are available in the repertoire")));
871 ctype
->wcoutdigits
[cnt
] = L
'?';
874 /* Sort the entries in the translit_ignore list. */
875 if (ctype
->translit_ignore
!= NULL
)
877 struct translit_ignore_t
*firstp
= ctype
->translit_ignore
;
878 struct translit_ignore_t
*runp
;
880 ctype
->ntranslit_ignore
= 1;
882 for (runp
= firstp
->next
; runp
!= NULL
; runp
= runp
->next
)
884 struct translit_ignore_t
*lastp
= NULL
;
885 struct translit_ignore_t
*cmpp
;
887 ++ctype
->ntranslit_ignore
;
889 for (cmpp
= firstp
; cmpp
!= NULL
; lastp
= cmpp
, cmpp
= cmpp
->next
)
890 if (runp
->from
< cmpp
->from
)
898 ctype
->translit_ignore
= firstp
;
904 ctype_output (struct localedef_t
*locale
, const struct charmap_t
*charmap
,
905 const char *output_path
)
907 static const char nulbytes
[4] = { 0, 0, 0, 0 };
908 struct locale_ctype_t
*ctype
= locale
->categories
[LC_CTYPE
].ctype
;
909 const size_t nelems
= (_NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1
)
910 + ctype
->nr_charclass
+ ctype
->map_collection_nr
);
911 struct iovec
*iov
= alloca (sizeof *iov
912 * (2 + nelems
+ 2 * ctype
->nr_charclass
913 + ctype
->map_collection_nr
+ 4));
914 struct locale_file data
;
915 uint32_t *idx
= alloca (sizeof *idx
* (nelems
+ 1));
916 uint32_t default_missing_len
;
917 size_t elem
, cnt
, offset
, total
;
920 /* Now prepare the output: Find the sizes of the table we can use. */
921 allocate_arrays (ctype
, charmap
, ctype
->repertoire
);
923 data
.magic
= LIMAGIC (LC_CTYPE
);
925 iov
[0].iov_base
= (void *) &data
;
926 iov
[0].iov_len
= sizeof (data
);
928 iov
[1].iov_base
= (void *) idx
;
929 iov
[1].iov_len
= nelems
* sizeof (uint32_t);
931 idx
[0] = iov
[0].iov_len
+ iov
[1].iov_len
;
934 for (elem
= 0; elem
< nelems
; ++elem
)
936 if (elem
< _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1
))
939 #define CTYPE_EMPTY(name) \
941 iov[2 + elem + offset].iov_base = NULL; \
942 iov[2 + elem + offset].iov_len = 0; \
943 idx[elem + 1] = idx[elem]; \
946 CTYPE_EMPTY(_NL_CTYPE_GAP1
);
947 CTYPE_EMPTY(_NL_CTYPE_GAP2
);
948 CTYPE_EMPTY(_NL_CTYPE_GAP3
);
949 CTYPE_EMPTY(_NL_CTYPE_GAP4
);
950 CTYPE_EMPTY(_NL_CTYPE_GAP5
);
951 CTYPE_EMPTY(_NL_CTYPE_GAP6
);
953 #define CTYPE_DATA(name, base, len) \
954 case _NL_ITEM_INDEX (name): \
955 iov[2 + elem + offset].iov_base = (base); \
956 iov[2 + elem + offset].iov_len = (len); \
957 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; \
960 CTYPE_DATA (_NL_CTYPE_CLASS
,
962 (256 + 128) * sizeof (char_class_t
));
964 CTYPE_DATA (_NL_CTYPE_TOUPPER
,
966 (256 + 128) * sizeof (uint32_t));
967 CTYPE_DATA (_NL_CTYPE_TOLOWER
,
969 (256 + 128) * sizeof (uint32_t));
971 CTYPE_DATA (_NL_CTYPE_TOUPPER32
,
973 256 * sizeof (uint32_t));
974 CTYPE_DATA (_NL_CTYPE_TOLOWER32
,
976 256 * sizeof (uint32_t));
978 CTYPE_DATA (_NL_CTYPE_CLASS32
,
980 256 * sizeof (char_class32_t
));
982 CTYPE_DATA (_NL_CTYPE_CLASS_OFFSET
,
983 &ctype
->class_offset
, sizeof (uint32_t));
985 CTYPE_DATA (_NL_CTYPE_MAP_OFFSET
,
986 &ctype
->map_offset
, sizeof (uint32_t));
988 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TAB_SIZE
,
989 &ctype
->translit_idx_size
, sizeof (uint32_t));
991 CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_IDX
,
992 ctype
->translit_from_idx
,
993 ctype
->translit_idx_size
* sizeof (uint32_t));
995 CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_TBL
,
996 ctype
->translit_from_tbl
,
997 ctype
->translit_from_tbl_size
);
999 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_IDX
,
1000 ctype
->translit_to_idx
,
1001 ctype
->translit_idx_size
* sizeof (uint32_t));
1003 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_TBL
,
1004 ctype
->translit_to_tbl
, ctype
->translit_to_tbl_size
);
1006 case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES
):
1007 /* The class name array. */
1009 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
, ++offset
)
1011 iov
[2 + elem
+ offset
].iov_base
1012 = (void *) ctype
->classnames
[cnt
];
1013 iov
[2 + elem
+ offset
].iov_len
1014 = strlen (ctype
->classnames
[cnt
]) + 1;
1015 total
+= iov
[2 + elem
+ offset
].iov_len
;
1017 iov
[2 + elem
+ offset
].iov_base
= (void *) nulbytes
;
1018 iov
[2 + elem
+ offset
].iov_len
= 4 - (total
% 4);
1019 total
+= 4 - (total
% 4);
1021 idx
[elem
+ 1] = idx
[elem
] + total
;
1024 case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES
):
1025 /* The class name array. */
1027 for (cnt
= 0; cnt
< ctype
->map_collection_nr
; ++cnt
, ++offset
)
1029 iov
[2 + elem
+ offset
].iov_base
1030 = (void *) ctype
->mapnames
[cnt
];
1031 iov
[2 + elem
+ offset
].iov_len
1032 = strlen (ctype
->mapnames
[cnt
]) + 1;
1033 total
+= iov
[2 + elem
+ offset
].iov_len
;
1035 iov
[2 + elem
+ offset
].iov_base
= (void *) nulbytes
;
1036 iov
[2 + elem
+ offset
].iov_len
= 4 - (total
% 4);
1037 total
+= 4 - (total
% 4);
1039 idx
[elem
+ 1] = idx
[elem
] + total
;
1042 CTYPE_DATA (_NL_CTYPE_WIDTH
,
1043 ctype
->width
.iov_base
,
1044 ctype
->width
.iov_len
);
1046 CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX
,
1047 &ctype
->mb_cur_max
, sizeof (uint32_t));
1049 case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME
):
1050 total
= strlen (ctype
->codeset_name
) + 1;
1052 iov
[2 + elem
+ offset
].iov_base
= (char *) ctype
->codeset_name
;
1055 iov
[2 + elem
+ offset
].iov_base
= alloca ((total
+ 3) & ~3);
1056 memset (mempcpy (iov
[2 + elem
+ offset
].iov_base
,
1057 ctype
->codeset_name
, total
),
1058 '\0', 4 - (total
& 3));
1059 total
= (total
+ 3) & ~3;
1061 iov
[2 + elem
+ offset
].iov_len
= total
;
1062 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1066 CTYPE_DATA (_NL_CTYPE_MAP_TO_NONASCII
,
1067 &ctype
->to_nonascii
, sizeof (uint32_t));
1069 CTYPE_DATA (_NL_CTYPE_NONASCII_CASE
,
1070 &ctype
->nonascii_case
, sizeof (uint32_t));
1072 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN
):
1073 iov
[2 + elem
+ offset
].iov_base
= alloca (sizeof (uint32_t));
1074 iov
[2 + elem
+ offset
].iov_len
= sizeof (uint32_t);
1075 *(uint32_t *) iov
[2 + elem
+ offset
].iov_base
=
1076 ctype
->mbdigits_act
/ 10;
1077 idx
[elem
+ 1] = idx
[elem
] + sizeof (uint32_t);
1080 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN
):
1081 /* Align entries. */
1082 iov
[2 + elem
+ offset
].iov_base
= (void *) nulbytes
;
1083 iov
[2 + elem
+ offset
].iov_len
= (4 - idx
[elem
] % 4) % 4;
1084 idx
[elem
] += iov
[2 + elem
+ offset
].iov_len
;
1087 iov
[2 + elem
+ offset
].iov_base
= alloca (sizeof (uint32_t));
1088 iov
[2 + elem
+ offset
].iov_len
= sizeof (uint32_t);
1089 *(uint32_t *) iov
[2 + elem
+ offset
].iov_base
=
1090 ctype
->wcdigits_act
/ 10;
1091 idx
[elem
+ 1] = idx
[elem
] + sizeof (uint32_t);
1094 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB
) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB
):
1095 /* Compute the length of all possible characters. For INDIGITS
1096 there might be more than one. We simply concatenate all of
1097 them with a NUL byte following. The NUL byte wouldn't be
1098 necessary but it makes it easier for the user. */
1101 for (cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB
);
1102 cnt
< ctype
->mbdigits_act
; cnt
+= 10)
1103 total
+= ctype
->mbdigits
[cnt
]->nbytes
+ 1;
1104 iov
[2 + elem
+ offset
].iov_base
= (char *) alloca (total
);
1105 iov
[2 + elem
+ offset
].iov_len
= total
;
1107 cp
= iov
[2 + elem
+ offset
].iov_base
;
1108 for (cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB
);
1109 cnt
< ctype
->mbdigits_act
; cnt
+= 10)
1111 cp
= mempcpy (cp
, ctype
->mbdigits
[cnt
]->bytes
,
1112 ctype
->mbdigits
[cnt
]->nbytes
);
1115 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1118 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB
) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB
):
1119 /* Compute the length of all possible characters. For INDIGITS
1120 there might be more than one. We simply concatenate all of
1121 them with a NUL byte following. The NUL byte wouldn't be
1122 necessary but it makes it easier for the user. */
1123 cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB
);
1124 total
= ctype
->mboutdigits
[cnt
]->nbytes
+ 1;
1125 iov
[2 + elem
+ offset
].iov_base
= (char *) alloca (total
);
1126 iov
[2 + elem
+ offset
].iov_len
= total
;
1128 *(char *) mempcpy (iov
[2 + elem
+ offset
].iov_base
,
1129 ctype
->mboutdigits
[cnt
]->bytes
,
1130 ctype
->mboutdigits
[cnt
]->nbytes
) = '\0';
1131 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1134 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC
) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC
):
1135 total
= ctype
->wcdigits_act
/ 10;
1137 iov
[2 + elem
+ offset
].iov_base
=
1138 (uint32_t *) alloca (total
* sizeof (uint32_t));
1139 iov
[2 + elem
+ offset
].iov_len
= total
* sizeof (uint32_t);
1141 for (cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC
);
1142 cnt
< ctype
->wcdigits_act
; cnt
+= 10)
1143 ((uint32_t *) iov
[2 + elem
+ offset
].iov_base
)[cnt
/ 10]
1144 = ctype
->wcdigits
[cnt
];
1145 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1148 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC
):
1149 /* Align entries. */
1150 iov
[2 + elem
+ offset
].iov_base
= (void *) nulbytes
;
1151 iov
[2 + elem
+ offset
].iov_len
= (4 - idx
[elem
] % 4) % 4;
1152 idx
[elem
] += iov
[2 + elem
+ offset
].iov_len
;
1156 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT1_WC
) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC
):
1157 cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC
);
1158 iov
[2 + elem
+ offset
].iov_base
= &ctype
->wcoutdigits
[cnt
];
1159 iov
[2 + elem
+ offset
].iov_len
= sizeof (uint32_t);
1160 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1163 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN
):
1164 /* Align entries. */
1165 iov
[2 + elem
+ offset
].iov_base
= (void *) nulbytes
;
1166 iov
[2 + elem
+ offset
].iov_len
= (4 - idx
[elem
] % 4) % 4;
1167 idx
[elem
] += iov
[2 + elem
+ offset
].iov_len
;
1170 default_missing_len
= (ctype
->default_missing
1171 ? wcslen ((wchar_t *)ctype
->default_missing
)
1173 iov
[2 + elem
+ offset
].iov_base
= &default_missing_len
;
1174 iov
[2 + elem
+ offset
].iov_len
= sizeof (uint32_t);
1175 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1178 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING
):
1179 iov
[2 + elem
+ offset
].iov_base
=
1180 ctype
->default_missing
?: (uint32_t *) L
"";
1181 iov
[2 + elem
+ offset
].iov_len
=
1182 wcslen (iov
[2 + elem
+ offset
].iov_base
) * sizeof (uint32_t);
1183 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1186 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE_LEN
):
1187 /* Align entries. */
1188 iov
[2 + elem
+ offset
].iov_base
= (void *) nulbytes
;
1189 iov
[2 + elem
+ offset
].iov_len
= (4 - idx
[elem
] % 4) % 4;
1190 idx
[elem
] += iov
[2 + elem
+ offset
].iov_len
;
1193 iov
[2 + elem
+ offset
].iov_base
= &ctype
->ntranslit_ignore
;
1194 iov
[2 + elem
+ offset
].iov_len
= sizeof (uint32_t);
1195 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1198 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE
):
1200 uint32_t *ranges
= (uint32_t *) alloca (ctype
->ntranslit_ignore
1201 * 3 * sizeof (uint32_t));
1202 struct translit_ignore_t
*runp
;
1204 iov
[2 + elem
+ offset
].iov_base
= ranges
;
1205 iov
[2 + elem
+ offset
].iov_len
= (ctype
->ntranslit_ignore
1206 * 3 * sizeof (uint32_t));
1208 for (runp
= ctype
->translit_ignore
; runp
!= NULL
;
1211 *ranges
++ = runp
->from
;
1212 *ranges
++ = runp
->to
;
1213 *ranges
++ = runp
->step
;
1216 /* Remove the following line in case a new entry is added
1217 after _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN. */
1219 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1223 assert (! "unknown CTYPE element");
1227 /* Handle extra maps. */
1228 size_t nr
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1
);
1229 if (nr
< ctype
->nr_charclass
)
1231 iov
[2 + elem
+ offset
].iov_base
= ctype
->class_b
[nr
];
1232 iov
[2 + elem
+ offset
].iov_len
= 256 / 32 * sizeof (uint32_t);
1233 idx
[elem
] += iov
[2 + elem
+ offset
].iov_len
;
1236 iov
[2 + elem
+ offset
] = ctype
->class_3level
[nr
];
1240 nr
-= ctype
->nr_charclass
;
1241 assert (nr
< ctype
->map_collection_nr
);
1242 iov
[2 + elem
+ offset
] = ctype
->map_3level
[nr
];
1244 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1248 assert (2 + elem
+ offset
== (nelems
+ 2 * ctype
->nr_charclass
1249 + ctype
->map_collection_nr
+ 4 + 2));
1251 write_locale_data (output_path
, LC_CTYPE
, "LC_CTYPE", 2 + elem
+ offset
,
1256 /* Local functions. */
1258 ctype_class_new (struct linereader
*lr
, struct locale_ctype_t
*ctype
,
1263 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
)
1264 if (strcmp (ctype
->classnames
[cnt
], name
) == 0)
1267 if (cnt
< ctype
->nr_charclass
)
1269 lr_error (lr
, _("character class `%s' already defined"), name
);
1273 if (ctype
->nr_charclass
== MAX_NR_CHARCLASS
)
1274 /* Exit code 2 is prescribed in P1003.2b. */
1275 WITH_CUR_LOCALE (error (2, 0, _("\
1276 implementation limit: no more than %Zd character classes allowed"),
1279 ctype
->classnames
[ctype
->nr_charclass
++] = name
;
1284 ctype_map_new (struct linereader
*lr
, struct locale_ctype_t
*ctype
,
1285 const char *name
, const struct charmap_t
*charmap
)
1287 size_t max_chars
= 0;
1290 for (cnt
= 0; cnt
< ctype
->map_collection_nr
; ++cnt
)
1292 if (strcmp (ctype
->mapnames
[cnt
], name
) == 0)
1295 if (max_chars
< ctype
->map_collection_max
[cnt
])
1296 max_chars
= ctype
->map_collection_max
[cnt
];
1299 if (cnt
< ctype
->map_collection_nr
)
1301 lr_error (lr
, _("character map `%s' already defined"), name
);
1305 if (ctype
->map_collection_nr
== MAX_NR_CHARMAP
)
1306 /* Exit code 2 is prescribed in P1003.2b. */
1307 WITH_CUR_LOCALE (error (2, 0, _("\
1308 implementation limit: no more than %d character maps allowed"),
1311 ctype
->mapnames
[cnt
] = name
;
1314 ctype
->map_collection_max
[cnt
] = charmap
->mb_cur_max
== 1 ? 256 : 512;
1316 ctype
->map_collection_max
[cnt
] = max_chars
;
1318 ctype
->map_collection
[cnt
] = (uint32_t *)
1319 xcalloc (sizeof (uint32_t), ctype
->map_collection_max
[cnt
]);
1320 ctype
->map_collection_act
[cnt
] = 256;
1322 ++ctype
->map_collection_nr
;
1326 /* We have to be prepared that TABLE, MAX, and ACT can be NULL. This
1327 is possible if we only want to extend the name array. */
1329 find_idx (struct locale_ctype_t
*ctype
, uint32_t **table
, size_t *max
,
1330 size_t *act
, uint32_t idx
)
1335 return table
== NULL
? NULL
: &(*table
)[idx
];
1337 /* Use the charnames_idx lookup table instead of the slow search loop. */
1339 cnt
= idx_table_get (&ctype
->charnames_idx
, idx
);
1342 cnt
= ctype
->charnames_act
;
1344 for (cnt
= 256; cnt
< ctype
->charnames_act
; ++cnt
)
1345 if (ctype
->charnames
[cnt
] == idx
)
1349 /* We have to distinguish two cases: the name is found or not. */
1350 if (cnt
== ctype
->charnames_act
)
1352 /* Extend the name array. */
1353 if (ctype
->charnames_act
== ctype
->charnames_max
)
1355 ctype
->charnames_max
*= 2;
1356 ctype
->charnames
= (uint32_t *)
1357 xrealloc (ctype
->charnames
,
1358 sizeof (uint32_t) * ctype
->charnames_max
);
1360 ctype
->charnames
[ctype
->charnames_act
++] = idx
;
1361 idx_table_add (&ctype
->charnames_idx
, idx
, cnt
);
1365 /* We have done everything we are asked to do. */
1369 /* The caller does not want to extend the table. */
1370 return (cnt
>= *act
? NULL
: &(*table
)[cnt
]);
1376 size_t old_max
= *max
;
1379 while (*max
<= cnt
);
1382 (uint32_t *) xrealloc (*table
, *max
* sizeof (uint32_t));
1383 memset (&(*table
)[old_max
], '\0',
1384 (*max
- old_max
) * sizeof (uint32_t));
1390 return &(*table
)[cnt
];
1395 get_character (struct token
*now
, const struct charmap_t
*charmap
,
1396 struct repertoire_t
*repertoire
,
1397 struct charseq
**seqp
, uint32_t *wchp
)
1399 if (now
->tok
== tok_bsymbol
)
1401 /* This will hopefully be the normal case. */
1402 *wchp
= repertoire_find_value (repertoire
, now
->val
.str
.startmb
,
1403 now
->val
.str
.lenmb
);
1404 *seqp
= charmap_find_value (charmap
, now
->val
.str
.startmb
,
1405 now
->val
.str
.lenmb
);
1407 else if (now
->tok
== tok_ucs4
)
1411 snprintf (utmp
, sizeof (utmp
), "U%08X", now
->val
.ucs4
);
1412 *seqp
= charmap_find_value (charmap
, utmp
, 9);
1415 *seqp
= repertoire_find_seq (repertoire
, now
->val
.ucs4
);
1419 /* Compute the value in the charmap from the UCS value. */
1420 const char *symbol
= repertoire_find_symbol (repertoire
,
1426 *seqp
= charmap_find_value (charmap
, symbol
, strlen (symbol
));
1430 if (repertoire
!= NULL
)
1432 /* Insert a negative entry. */
1433 static const struct charseq negative
1434 = { .ucs4
= ILLEGAL_CHAR_VALUE
};
1435 uint32_t *newp
= obstack_alloc (&repertoire
->mem_pool
,
1437 *newp
= now
->val
.ucs4
;
1439 insert_entry (&repertoire
->seq_table
, newp
,
1440 sizeof (uint32_t), (void *) &negative
);
1444 (*seqp
)->ucs4
= now
->val
.ucs4
;
1446 else if ((*seqp
)->ucs4
!= now
->val
.ucs4
)
1449 *wchp
= now
->val
.ucs4
;
1451 else if (now
->tok
== tok_charcode
)
1453 /* We must map from the byte code to UCS4. */
1454 *seqp
= charmap_find_symbol (charmap
, now
->val
.str
.startmb
,
1455 now
->val
.str
.lenmb
);
1458 *wchp
= ILLEGAL_CHAR_VALUE
;
1461 if ((*seqp
)->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1462 (*seqp
)->ucs4
= repertoire_find_value (repertoire
, (*seqp
)->name
,
1463 strlen ((*seqp
)->name
));
1464 *wchp
= (*seqp
)->ucs4
;
1474 /* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and
1475 the .(2). counterparts. */
1477 charclass_symbolic_ellipsis (struct linereader
*ldfile
,
1478 struct locale_ctype_t
*ctype
,
1479 const struct charmap_t
*charmap
,
1480 struct repertoire_t
*repertoire
,
1482 const char *last_str
,
1483 unsigned long int class256_bit
,
1484 unsigned long int class_bit
, int base
,
1485 int ignore_content
, int handle_digits
, int step
)
1487 const char *nowstr
= now
->val
.str
.startmb
;
1488 char tmp
[now
->val
.str
.lenmb
+ 1];
1491 unsigned long int from
;
1492 unsigned long int to
;
1494 /* We have to compute the ellipsis values using the symbolic names. */
1495 assert (last_str
!= NULL
);
1497 if (strlen (last_str
) != now
->val
.str
.lenmb
)
1501 _("`%s' and `%.*s' are not valid names for symbolic range"),
1502 last_str
, (int) now
->val
.str
.lenmb
, nowstr
);
1506 if (memcmp (last_str
, nowstr
, now
->val
.str
.lenmb
) == 0)
1507 /* Nothing to do, the names are the same. */
1510 for (cp
= last_str
; *cp
== *(nowstr
+ (cp
- last_str
)); ++cp
)
1514 from
= strtoul (cp
, &endp
, base
);
1515 if ((from
== UINT_MAX
&& errno
== ERANGE
) || *endp
!= '\0')
1518 to
= strtoul (nowstr
+ (cp
- last_str
), &endp
, base
);
1519 if ((to
== UINT_MAX
&& errno
== ERANGE
)
1520 || (endp
- nowstr
) != now
->val
.str
.lenmb
|| from
>= to
)
1523 /* OK, we have a range FROM - TO. Now we can create the symbolic names. */
1524 if (!ignore_content
)
1526 now
->val
.str
.startmb
= tmp
;
1527 while ((from
+= step
) <= to
)
1529 struct charseq
*seq
;
1532 sprintf (tmp
, (base
== 10 ? "%.*s%0*ld" : "%.*s%0*lX"),
1533 (int) (cp
- last_str
), last_str
,
1534 (int) (now
->val
.str
.lenmb
- (cp
- last_str
)),
1537 get_character (now
, charmap
, repertoire
, &seq
, &wch
);
1539 if (seq
!= NULL
&& seq
->nbytes
== 1)
1540 /* Yep, we can store information about this byte sequence. */
1541 ctype
->class256_collection
[seq
->bytes
[0]] |= class256_bit
;
1543 if (wch
!= ILLEGAL_CHAR_VALUE
&& class_bit
!= 0)
1544 /* We have the UCS4 position. */
1545 *find_idx (ctype
, &ctype
->class_collection
,
1546 &ctype
->class_collection_max
,
1547 &ctype
->class_collection_act
, wch
) |= class_bit
;
1549 if (handle_digits
== 1)
1551 /* We must store the digit values. */
1552 if (ctype
->mbdigits_act
== ctype
->mbdigits_max
)
1554 ctype
->mbdigits_max
*= 2;
1555 ctype
->mbdigits
= xrealloc (ctype
->mbdigits
,
1556 (ctype
->mbdigits_max
1557 * sizeof (char *)));
1558 ctype
->wcdigits_max
*= 2;
1559 ctype
->wcdigits
= xrealloc (ctype
->wcdigits
,
1560 (ctype
->wcdigits_max
1561 * sizeof (uint32_t)));
1564 ctype
->mbdigits
[ctype
->mbdigits_act
++] = seq
;
1565 ctype
->wcdigits
[ctype
->wcdigits_act
++] = wch
;
1567 else if (handle_digits
== 2)
1569 /* We must store the digit values. */
1570 if (ctype
->outdigits_act
>= 10)
1572 lr_error (ldfile
, _("\
1573 %s: field `%s' does not contain exactly ten entries"),
1574 "LC_CTYPE", "outdigit");
1578 ctype
->mboutdigits
[ctype
->outdigits_act
] = seq
;
1579 ctype
->wcoutdigits
[ctype
->outdigits_act
] = wch
;
1580 ++ctype
->outdigits_act
;
1587 /* Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'. */
1589 charclass_ucs4_ellipsis (struct linereader
*ldfile
,
1590 struct locale_ctype_t
*ctype
,
1591 const struct charmap_t
*charmap
,
1592 struct repertoire_t
*repertoire
,
1593 struct token
*now
, uint32_t last_wch
,
1594 unsigned long int class256_bit
,
1595 unsigned long int class_bit
, int ignore_content
,
1596 int handle_digits
, int step
)
1598 if (last_wch
> now
->val
.ucs4
)
1600 lr_error (ldfile
, _("\
1601 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1602 (now
->val
.ucs4
| last_wch
) < 65536 ? 4 : 8, now
->val
.ucs4
,
1603 (now
->val
.ucs4
| last_wch
) < 65536 ? 4 : 8, last_wch
);
1607 if (!ignore_content
)
1608 while ((last_wch
+= step
) <= now
->val
.ucs4
)
1610 /* We have to find out whether there is a byte sequence corresponding
1611 to this UCS4 value. */
1612 struct charseq
*seq
;
1615 snprintf (utmp
, sizeof (utmp
), "U%08X", last_wch
);
1616 seq
= charmap_find_value (charmap
, utmp
, 9);
1619 snprintf (utmp
, sizeof (utmp
), "U%04X", last_wch
);
1620 seq
= charmap_find_value (charmap
, utmp
, 5);
1624 /* Try looking in the repertoire map. */
1625 seq
= repertoire_find_seq (repertoire
, last_wch
);
1627 /* If this is the first time we look for this sequence create a new
1631 static const struct charseq negative
1632 = { .ucs4
= ILLEGAL_CHAR_VALUE
};
1634 /* Find the symbolic name for this UCS4 value. */
1635 if (repertoire
!= NULL
)
1637 const char *symbol
= repertoire_find_symbol (repertoire
,
1639 uint32_t *newp
= obstack_alloc (&repertoire
->mem_pool
,
1644 /* We have a name, now search the multibyte value. */
1645 seq
= charmap_find_value (charmap
, symbol
, strlen (symbol
));
1648 /* We have to create a fake entry. */
1649 seq
= (struct charseq
*) &negative
;
1651 seq
->ucs4
= last_wch
;
1653 insert_entry (&repertoire
->seq_table
, newp
, sizeof (uint32_t),
1657 /* We have to create a fake entry. */
1658 seq
= (struct charseq
*) &negative
;
1661 /* We have a name, now search the multibyte value. */
1662 if (seq
->ucs4
== last_wch
&& seq
->nbytes
== 1)
1663 /* Yep, we can store information about this byte sequence. */
1664 ctype
->class256_collection
[(size_t) seq
->bytes
[0]]
1667 /* And of course we have the UCS4 position. */
1669 *find_idx (ctype
, &ctype
->class_collection
,
1670 &ctype
->class_collection_max
,
1671 &ctype
->class_collection_act
, last_wch
) |= class_bit
;
1673 if (handle_digits
== 1)
1675 /* We must store the digit values. */
1676 if (ctype
->mbdigits_act
== ctype
->mbdigits_max
)
1678 ctype
->mbdigits_max
*= 2;
1679 ctype
->mbdigits
= xrealloc (ctype
->mbdigits
,
1680 (ctype
->mbdigits_max
1681 * sizeof (char *)));
1682 ctype
->wcdigits_max
*= 2;
1683 ctype
->wcdigits
= xrealloc (ctype
->wcdigits
,
1684 (ctype
->wcdigits_max
1685 * sizeof (uint32_t)));
1688 ctype
->mbdigits
[ctype
->mbdigits_act
++] = (seq
->ucs4
== last_wch
1690 ctype
->wcdigits
[ctype
->wcdigits_act
++] = last_wch
;
1692 else if (handle_digits
== 2)
1694 /* We must store the digit values. */
1695 if (ctype
->outdigits_act
>= 10)
1697 lr_error (ldfile
, _("\
1698 %s: field `%s' does not contain exactly ten entries"),
1699 "LC_CTYPE", "outdigit");
1703 ctype
->mboutdigits
[ctype
->outdigits_act
] = (seq
->ucs4
== last_wch
1705 ctype
->wcoutdigits
[ctype
->outdigits_act
] = last_wch
;
1706 ++ctype
->outdigits_act
;
1712 /* Ellipsis as in `/xea/x12.../xea/x34'. */
1714 charclass_charcode_ellipsis (struct linereader
*ldfile
,
1715 struct locale_ctype_t
*ctype
,
1716 const struct charmap_t
*charmap
,
1717 struct repertoire_t
*repertoire
,
1718 struct token
*now
, char *last_charcode
,
1719 uint32_t last_charcode_len
,
1720 unsigned long int class256_bit
,
1721 unsigned long int class_bit
, int ignore_content
,
1724 /* First check whether the to-value is larger. */
1725 if (now
->val
.charcode
.nbytes
!= last_charcode_len
)
1727 lr_error (ldfile
, _("\
1728 start and end character sequence of range must have the same length"));
1732 if (memcmp (last_charcode
, now
->val
.charcode
.bytes
, last_charcode_len
) > 0)
1734 lr_error (ldfile
, _("\
1735 to-value character sequence is smaller than from-value sequence"));
1739 if (!ignore_content
)
1743 /* Increment the byte sequence value. */
1744 struct charseq
*seq
;
1748 for (i
= last_charcode_len
- 1; i
>= 0; --i
)
1749 if (++last_charcode
[i
] != 0)
1752 if (last_charcode_len
== 1)
1753 /* Of course we have the charcode value. */
1754 ctype
->class256_collection
[(size_t) last_charcode
[0]]
1757 /* Find the symbolic name. */
1758 seq
= charmap_find_symbol (charmap
, last_charcode
,
1762 if (seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1763 seq
->ucs4
= repertoire_find_value (repertoire
, seq
->name
,
1764 strlen (seq
->name
));
1765 wch
= seq
== NULL
? ILLEGAL_CHAR_VALUE
: seq
->ucs4
;
1767 if (wch
!= ILLEGAL_CHAR_VALUE
&& class_bit
!= 0)
1768 *find_idx (ctype
, &ctype
->class_collection
,
1769 &ctype
->class_collection_max
,
1770 &ctype
->class_collection_act
, wch
) |= class_bit
;
1773 wch
= ILLEGAL_CHAR_VALUE
;
1775 if (handle_digits
== 1)
1777 /* We must store the digit values. */
1778 if (ctype
->mbdigits_act
== ctype
->mbdigits_max
)
1780 ctype
->mbdigits_max
*= 2;
1781 ctype
->mbdigits
= xrealloc (ctype
->mbdigits
,
1782 (ctype
->mbdigits_max
1783 * sizeof (char *)));
1784 ctype
->wcdigits_max
*= 2;
1785 ctype
->wcdigits
= xrealloc (ctype
->wcdigits
,
1786 (ctype
->wcdigits_max
1787 * sizeof (uint32_t)));
1790 seq
= xmalloc (sizeof (struct charseq
) + last_charcode_len
);
1791 memcpy ((char *) (seq
+ 1), last_charcode
, last_charcode_len
);
1792 seq
->nbytes
= last_charcode_len
;
1794 ctype
->mbdigits
[ctype
->mbdigits_act
++] = seq
;
1795 ctype
->wcdigits
[ctype
->wcdigits_act
++] = wch
;
1797 else if (handle_digits
== 2)
1799 struct charseq
*seq
;
1800 /* We must store the digit values. */
1801 if (ctype
->outdigits_act
>= 10)
1803 lr_error (ldfile
, _("\
1804 %s: field `%s' does not contain exactly ten entries"),
1805 "LC_CTYPE", "outdigit");
1809 seq
= xmalloc (sizeof (struct charseq
) + last_charcode_len
);
1810 memcpy ((char *) (seq
+ 1), last_charcode
, last_charcode_len
);
1811 seq
->nbytes
= last_charcode_len
;
1813 ctype
->mboutdigits
[ctype
->outdigits_act
] = seq
;
1814 ctype
->wcoutdigits
[ctype
->outdigits_act
] = wch
;
1815 ++ctype
->outdigits_act
;
1818 while (memcmp (last_charcode
, now
->val
.charcode
.bytes
,
1819 last_charcode_len
) != 0);
1825 find_translit2 (struct locale_ctype_t
*ctype
, const struct charmap_t
*charmap
,
1828 struct translit_t
*trunp
= ctype
->translit
;
1829 struct translit_ignore_t
*tirunp
= ctype
->translit_ignore
;
1831 while (trunp
!= NULL
)
1833 /* XXX We simplify things here. The transliterations we look
1834 for are only allowed to have one character. */
1835 if (trunp
->from
[0] == wch
&& trunp
->from
[1] == 0)
1837 /* Found it. Now look for a transliteration which can be
1838 represented with the character set. */
1839 struct translit_to_t
*torunp
= trunp
->to
;
1841 while (torunp
!= NULL
)
1845 for (i
= 0; torunp
->str
[i
] != 0; ++i
)
1849 snprintf (utmp
, sizeof (utmp
), "U%08X", torunp
->str
[i
]);
1850 if (charmap_find_value (charmap
, utmp
, 9) == NULL
)
1851 /* This character cannot be represented. */
1855 if (torunp
->str
[i
] == 0)
1858 torunp
= torunp
->next
;
1864 trunp
= trunp
->next
;
1867 /* Check for ignored chars. */
1868 while (tirunp
!= NULL
)
1870 if (tirunp
->from
<= wch
&& tirunp
->to
>= wch
)
1874 for (wi
= tirunp
->from
; wi
<= wch
; wi
+= tirunp
->step
)
1876 return (uint32_t []) { 0 };
1880 /* Nothing found. */
1886 find_translit (struct localedef_t
*locale
, const struct charmap_t
*charmap
,
1889 struct locale_ctype_t
*ctype
;
1890 uint32_t *result
= NULL
;
1892 assert (locale
!= NULL
);
1893 ctype
= locale
->categories
[LC_CTYPE
].ctype
;
1898 if (ctype
->translit
!= NULL
)
1899 result
= find_translit2 (ctype
, charmap
, wch
);
1903 struct translit_include_t
*irunp
= ctype
->translit_include
;
1905 while (irunp
!= NULL
&& result
== NULL
)
1907 result
= find_translit (find_locale (CTYPE_LOCALE
,
1909 irunp
->copy_repertoire
,
1912 irunp
= irunp
->next
;
1920 /* Read one transliteration entry. */
1922 read_widestring (struct linereader
*ldfile
, struct token
*now
,
1923 const struct charmap_t
*charmap
,
1924 struct repertoire_t
*repertoire
)
1928 if (now
->tok
== tok_default_missing
)
1929 /* The special name "" will denote this case. */
1930 wstr
= ((uint32_t *) { 0 });
1931 else if (now
->tok
== tok_bsymbol
)
1933 /* Get the value from the repertoire. */
1934 wstr
= (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1935 wstr
[0] = repertoire_find_value (repertoire
, now
->val
.str
.startmb
,
1936 now
->val
.str
.lenmb
);
1937 if (wstr
[0] == ILLEGAL_CHAR_VALUE
)
1939 /* We cannot proceed, we don't know the UCS4 value. */
1946 else if (now
->tok
== tok_ucs4
)
1948 wstr
= (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1949 wstr
[0] = now
->val
.ucs4
;
1952 else if (now
->tok
== tok_charcode
)
1954 /* Argh, we have to convert to the symbol name first and then to the
1956 struct charseq
*seq
= charmap_find_symbol (charmap
,
1957 now
->val
.str
.startmb
,
1958 now
->val
.str
.lenmb
);
1960 /* Cannot find the UCS4 value. */
1963 if (seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1964 seq
->ucs4
= repertoire_find_value (repertoire
, seq
->name
,
1965 strlen (seq
->name
));
1966 if (seq
->ucs4
== ILLEGAL_CHAR_VALUE
)
1967 /* We cannot proceed, we don't know the UCS4 value. */
1970 wstr
= (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1971 wstr
[0] = seq
->ucs4
;
1974 else if (now
->tok
== tok_string
)
1976 wstr
= now
->val
.str
.startwc
;
1977 if (wstr
== NULL
|| wstr
[0] == 0)
1982 if (now
->tok
!= tok_eol
&& now
->tok
!= tok_eof
)
1983 lr_ignore_rest (ldfile
, 0);
1984 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
1985 return (uint32_t *) -1l;
1993 read_translit_entry (struct linereader
*ldfile
, struct locale_ctype_t
*ctype
,
1994 struct token
*now
, const struct charmap_t
*charmap
,
1995 struct repertoire_t
*repertoire
)
1997 uint32_t *from_wstr
= read_widestring (ldfile
, now
, charmap
, repertoire
);
1998 struct translit_t
*result
;
1999 struct translit_to_t
**top
;
2000 struct obstack
*ob
= &ctype
->mempool
;
2004 if (from_wstr
== NULL
)
2005 /* There is no valid from string. */
2008 result
= (struct translit_t
*) obstack_alloc (ob
,
2009 sizeof (struct translit_t
));
2010 result
->from
= from_wstr
;
2011 result
->fname
= ldfile
->fname
;
2012 result
->lineno
= ldfile
->lineno
;
2013 result
->next
= NULL
;
2023 /* Next we have one or more transliterations. They are
2024 separated by semicolons. */
2025 now
= lr_token (ldfile
, charmap
, NULL
, repertoire
, verbose
);
2027 if (!first
&& (now
->tok
== tok_semicolon
|| now
->tok
== tok_eol
))
2029 /* One string read. */
2030 const uint32_t zero
= 0;
2034 obstack_grow (ob
, &zero
, 4);
2035 to_wstr
= obstack_finish (ob
);
2037 *top
= obstack_alloc (ob
, sizeof (struct translit_to_t
));
2038 (*top
)->str
= to_wstr
;
2039 (*top
)->next
= NULL
;
2042 if (now
->tok
== tok_eol
)
2044 result
->next
= ctype
->translit
;
2045 ctype
->translit
= result
;
2050 top
= &(*top
)->next
;
2055 to_wstr
= read_widestring (ldfile
, now
, charmap
, repertoire
);
2056 if (to_wstr
== (uint32_t *) -1l)
2058 /* An error occurred. */
2059 obstack_free (ob
, result
);
2063 if (to_wstr
== NULL
)
2066 /* This value is usable. */
2067 obstack_grow (ob
, to_wstr
, wcslen ((wchar_t *) to_wstr
) * 4);
2076 read_translit_ignore_entry (struct linereader
*ldfile
,
2077 struct locale_ctype_t
*ctype
,
2078 const struct charmap_t
*charmap
,
2079 struct repertoire_t
*repertoire
)
2081 /* We expect a semicolon-separated list of characters we ignore. We are
2082 only interested in the wide character definitions. These must be
2083 single characters, possibly defining a range when an ellipsis is used. */
2086 struct token
*now
= lr_token (ldfile
, charmap
, NULL
, repertoire
,
2088 struct translit_ignore_t
*newp
;
2091 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
2094 _("premature end of `translit_ignore' definition"));
2098 if (now
->tok
!= tok_bsymbol
&& now
->tok
!= tok_ucs4
)
2100 lr_error (ldfile
, _("syntax error"));
2101 lr_ignore_rest (ldfile
, 0);
2105 if (now
->tok
== tok_ucs4
)
2106 from
= now
->val
.ucs4
;
2108 /* Try to get the value. */
2109 from
= repertoire_find_value (repertoire
, now
->val
.str
.startmb
,
2110 now
->val
.str
.lenmb
);
2112 if (from
== ILLEGAL_CHAR_VALUE
)
2114 lr_error (ldfile
, "invalid character name");
2119 newp
= (struct translit_ignore_t
*)
2120 obstack_alloc (&ctype
->mempool
, sizeof (struct translit_ignore_t
));
2125 newp
->next
= ctype
->translit_ignore
;
2126 ctype
->translit_ignore
= newp
;
2129 /* Now we expect either a semicolon, an ellipsis, or the end of the
2131 now
= lr_token (ldfile
, charmap
, NULL
, repertoire
, verbose
);
2133 if (now
->tok
== tok_ellipsis2
|| now
->tok
== tok_ellipsis2_2
)
2135 /* XXX Should we bother implementing `....'? `...' certainly
2136 will not be implemented. */
2138 int step
= now
->tok
== tok_ellipsis2_2
? 2 : 1;
2140 now
= lr_token (ldfile
, charmap
, NULL
, repertoire
, verbose
);
2142 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
2145 _("premature end of `translit_ignore' definition"));
2149 if (now
->tok
!= tok_bsymbol
&& now
->tok
!= tok_ucs4
)
2151 lr_error (ldfile
, _("syntax error"));
2152 lr_ignore_rest (ldfile
, 0);
2156 if (now
->tok
== tok_ucs4
)
2159 /* Try to get the value. */
2160 to
= repertoire_find_value (repertoire
, now
->val
.str
.startmb
,
2161 now
->val
.str
.lenmb
);
2163 if (to
== ILLEGAL_CHAR_VALUE
)
2164 lr_error (ldfile
, "invalid character name");
2167 /* Make sure the `to'-value is larger. */
2174 lr_error (ldfile
, _("\
2175 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
2176 (to
| from
) < 65536 ? 4 : 8, to
,
2177 (to
| from
) < 65536 ? 4 : 8, from
);
2180 /* And the next token. */
2181 now
= lr_token (ldfile
, charmap
, NULL
, repertoire
, verbose
);
2184 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
2188 if (now
->tok
== tok_semicolon
)
2192 /* If we come here something is wrong. */
2193 lr_error (ldfile
, _("syntax error"));
2194 lr_ignore_rest (ldfile
, 0);
2200 /* The parser for the LC_CTYPE section of the locale definition. */
2202 ctype_read (struct linereader
*ldfile
, struct localedef_t
*result
,
2203 const struct charmap_t
*charmap
, const char *repertoire_name
,
2206 struct repertoire_t
*repertoire
= NULL
;
2207 struct locale_ctype_t
*ctype
;
2209 enum token_t nowtok
;
2211 uint32_t last_wch
= 0;
2212 enum token_t last_token
;
2213 enum token_t ellipsis_token
;
2215 char last_charcode
[16];
2216 size_t last_charcode_len
= 0;
2217 const char *last_str
= NULL
;
2219 struct localedef_t
*copy_locale
= NULL
;
2221 /* Get the repertoire we have to use. */
2222 if (repertoire_name
!= NULL
)
2223 repertoire
= repertoire_read (repertoire_name
);
2225 /* The rest of the line containing `LC_CTYPE' must be free. */
2226 lr_ignore_rest (ldfile
, 1);
2231 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2234 while (nowtok
== tok_eol
);
2236 /* If we see `copy' now we are almost done. */
2237 if (nowtok
== tok_copy
)
2239 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2240 if (now
->tok
!= tok_string
)
2242 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2246 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2247 while (now
->tok
!= tok_eof
&& now
->tok
!= tok_end
);
2249 if (now
->tok
!= tok_eof
2250 || (now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
),
2251 now
->tok
== tok_eof
))
2252 lr_error (ldfile
, _("%s: premature end of file"), "LC_CTYPE");
2253 else if (now
->tok
!= tok_lc_ctype
)
2255 lr_error (ldfile
, _("\
2256 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2257 lr_ignore_rest (ldfile
, 0);
2260 lr_ignore_rest (ldfile
, 1);
2265 if (! ignore_content
)
2267 /* Get the locale definition. */
2268 copy_locale
= load_locale (LC_CTYPE
, now
->val
.str
.startmb
,
2269 repertoire_name
, charmap
, NULL
);
2270 if ((copy_locale
->avail
& CTYPE_LOCALE
) == 0)
2272 /* Not yet loaded. So do it now. */
2273 if (locfile_read (copy_locale
, charmap
) != 0)
2277 if (copy_locale
->categories
[LC_CTYPE
].ctype
== NULL
)
2281 lr_ignore_rest (ldfile
, 1);
2283 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2287 /* Prepare the data structures. */
2288 ctype_startup (ldfile
, result
, charmap
, copy_locale
, ignore_content
);
2289 ctype
= result
->categories
[LC_CTYPE
].ctype
;
2291 /* Remember the repertoire we use. */
2292 if (!ignore_content
)
2293 ctype
->repertoire
= repertoire
;
2297 unsigned long int class_bit
= 0;
2298 unsigned long int class256_bit
= 0;
2299 int handle_digits
= 0;
2301 /* Of course we don't proceed beyond the end of file. */
2302 if (nowtok
== tok_eof
)
2305 /* Ingore empty lines. */
2306 if (nowtok
== tok_eol
)
2308 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2316 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2317 while (now
->tok
== tok_ident
|| now
->tok
== tok_string
)
2319 ctype_class_new (ldfile
, ctype
, now
->val
.str
.startmb
);
2320 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2321 if (now
->tok
!= tok_semicolon
)
2323 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2325 if (now
->tok
!= tok_eol
)
2327 %s: syntax error in definition of new character class"), "LC_CTYPE");
2331 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2332 while (now
->tok
== tok_ident
|| now
->tok
== tok_string
)
2334 ctype_map_new (ldfile
, ctype
, now
->val
.str
.startmb
, charmap
);
2335 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2336 if (now
->tok
!= tok_semicolon
)
2338 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2340 if (now
->tok
!= tok_eol
)
2342 %s: syntax error in definition of new character map"), "LC_CTYPE");
2346 /* Ignore the rest of the line if we don't need the input of
2350 lr_ignore_rest (ldfile
, 0);
2354 /* We simply forget the `class' keyword and use the following
2355 operand to determine the bit. */
2356 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2357 if (now
->tok
== tok_ident
|| now
->tok
== tok_string
)
2359 /* Must can be one of the predefined class names. */
2360 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
)
2361 if (strcmp (ctype
->classnames
[cnt
], now
->val
.str
.startmb
) == 0)
2363 if (cnt
>= ctype
->nr_charclass
)
2365 #ifdef PREDEFINED_CLASSES
2366 if (now
->val
.str
.lenmb
== 8
2367 && memcmp ("special1", now
->val
.str
.startmb
, 8) == 0)
2368 class_bit
= _ISwspecial1
;
2369 else if (now
->val
.str
.lenmb
== 8
2370 && memcmp ("special2", now
->val
.str
.startmb
, 8) == 0)
2371 class_bit
= _ISwspecial2
;
2372 else if (now
->val
.str
.lenmb
== 8
2373 && memcmp ("special3", now
->val
.str
.startmb
, 8) == 0)
2374 class_bit
= _ISwspecial3
;
2378 /* OK, it's a new class. */
2379 ctype_class_new (ldfile
, ctype
, now
->val
.str
.startmb
);
2381 class_bit
= _ISwbit (ctype
->nr_charclass
- 1);
2386 class_bit
= _ISwbit (cnt
);
2388 free (now
->val
.str
.startmb
);
2391 else if (now
->tok
== tok_digit
)
2392 goto handle_tok_digit
;
2393 else if (now
->tok
< tok_upper
|| now
->tok
> tok_blank
)
2397 class_bit
= BITw (now
->tok
);
2398 class256_bit
= BIT (now
->tok
);
2401 /* The next character must be a semicolon. */
2402 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2403 if (now
->tok
!= tok_semicolon
)
2405 goto read_charclass
;
2418 /* Ignore the rest of the line if we don't need the input of
2422 lr_ignore_rest (ldfile
, 0);
2426 class_bit
= BITw (now
->tok
);
2427 class256_bit
= BIT (now
->tok
);
2430 ctype
->class_done
|= class_bit
;
2431 last_token
= tok_none
;
2432 ellipsis_token
= tok_none
;
2434 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2435 while (now
->tok
!= tok_eol
&& now
->tok
!= tok_eof
)
2438 struct charseq
*seq
;
2440 if (ellipsis_token
== tok_none
)
2442 if (get_character (now
, charmap
, repertoire
, &seq
, &wch
))
2445 if (!ignore_content
&& seq
!= NULL
&& seq
->nbytes
== 1)
2446 /* Yep, we can store information about this byte
2448 ctype
->class256_collection
[seq
->bytes
[0]] |= class256_bit
;
2450 if (!ignore_content
&& wch
!= ILLEGAL_CHAR_VALUE
2452 /* We have the UCS4 position. */
2453 *find_idx (ctype
, &ctype
->class_collection
,
2454 &ctype
->class_collection_max
,
2455 &ctype
->class_collection_act
, wch
) |= class_bit
;
2457 last_token
= now
->tok
;
2458 /* Terminate the string. */
2459 if (last_token
== tok_bsymbol
)
2461 now
->val
.str
.startmb
[now
->val
.str
.lenmb
] = '\0';
2462 last_str
= now
->val
.str
.startmb
;
2467 memcpy (last_charcode
, now
->val
.charcode
.bytes
, 16);
2468 last_charcode_len
= now
->val
.charcode
.nbytes
;
2470 if (!ignore_content
&& handle_digits
== 1)
2472 /* We must store the digit values. */
2473 if (ctype
->mbdigits_act
== ctype
->mbdigits_max
)
2475 ctype
->mbdigits_max
+= 10;
2476 ctype
->mbdigits
= xrealloc (ctype
->mbdigits
,
2477 (ctype
->mbdigits_max
2478 * sizeof (char *)));
2479 ctype
->wcdigits_max
+= 10;
2480 ctype
->wcdigits
= xrealloc (ctype
->wcdigits
,
2481 (ctype
->wcdigits_max
2482 * sizeof (uint32_t)));
2485 ctype
->mbdigits
[ctype
->mbdigits_act
++] = seq
;
2486 ctype
->wcdigits
[ctype
->wcdigits_act
++] = wch
;
2488 else if (!ignore_content
&& handle_digits
== 2)
2490 /* We must store the digit values. */
2491 if (ctype
->outdigits_act
>= 10)
2493 lr_error (ldfile
, _("\
2494 %s: field `%s' does not contain exactly ten entries"),
2495 "LC_CTYPE", "outdigit");
2496 lr_ignore_rest (ldfile
, 0);
2500 ctype
->mboutdigits
[ctype
->outdigits_act
] = seq
;
2501 ctype
->wcoutdigits
[ctype
->outdigits_act
] = wch
;
2502 ++ctype
->outdigits_act
;
2507 /* Now it gets complicated. We have to resolve the
2508 ellipsis problem. First we must distinguish between
2509 the different kind of ellipsis and this must match the
2510 tokens we have seen. */
2511 assert (last_token
!= tok_none
);
2513 if (last_token
!= now
->tok
)
2515 lr_error (ldfile
, _("\
2516 ellipsis range must be marked by two operands of same type"));
2517 lr_ignore_rest (ldfile
, 0);
2521 if (last_token
== tok_bsymbol
)
2523 if (ellipsis_token
== tok_ellipsis3
)
2524 lr_error (ldfile
, _("with symbolic name range values \
2525 the absolute ellipsis `...' must not be used"));
2527 charclass_symbolic_ellipsis (ldfile
, ctype
, charmap
,
2528 repertoire
, now
, last_str
,
2529 class256_bit
, class_bit
,
2534 handle_digits
, step
);
2536 else if (last_token
== tok_ucs4
)
2538 if (ellipsis_token
!= tok_ellipsis2
)
2539 lr_error (ldfile
, _("\
2540 with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
2542 charclass_ucs4_ellipsis (ldfile
, ctype
, charmap
,
2543 repertoire
, now
, last_wch
,
2544 class256_bit
, class_bit
,
2545 ignore_content
, handle_digits
,
2550 assert (last_token
== tok_charcode
);
2552 if (ellipsis_token
!= tok_ellipsis3
)
2553 lr_error (ldfile
, _("\
2554 with character code range values one must use the absolute ellipsis `...'"));
2556 charclass_charcode_ellipsis (ldfile
, ctype
, charmap
,
2560 class256_bit
, class_bit
,
2565 /* Now we have used the last value. */
2566 last_token
= tok_none
;
2569 /* Next we expect a semicolon or the end of the line. */
2570 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2571 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
2574 if (last_token
!= tok_none
2575 && now
->tok
>= tok_ellipsis2
&& now
->tok
<= tok_ellipsis4_2
)
2577 if (now
->tok
== tok_ellipsis2_2
)
2579 now
->tok
= tok_ellipsis2
;
2582 else if (now
->tok
== tok_ellipsis4_2
)
2584 now
->tok
= tok_ellipsis4
;
2588 ellipsis_token
= now
->tok
;
2590 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2594 if (now
->tok
!= tok_semicolon
)
2597 /* And get the next character. */
2598 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2600 ellipsis_token
= tok_none
;
2606 /* Ignore the rest of the line if we don't need the input of
2610 lr_ignore_rest (ldfile
, 0);
2615 class_bit
= _ISwdigit
;
2616 class256_bit
= _ISdigit
;
2618 goto read_charclass
;
2621 /* Ignore the rest of the line if we don't need the input of
2625 lr_ignore_rest (ldfile
, 0);
2629 if (ctype
->outdigits_act
!= 0)
2630 lr_error (ldfile
, _("\
2631 %s: field `%s' declared more than once"),
2632 "LC_CTYPE", "outdigit");
2636 goto read_charclass
;
2639 /* Ignore the rest of the line if we don't need the input of
2643 lr_ignore_rest (ldfile
, 0);
2651 /* Ignore the rest of the line if we don't need the input of
2655 lr_ignore_rest (ldfile
, 0);
2663 /* Ignore the rest of the line if we don't need the input of
2667 lr_ignore_rest (ldfile
, 0);
2671 /* We simply forget the `map' keyword and use the following
2672 operand to determine the mapping. */
2673 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2674 if (now
->tok
== tok_ident
|| now
->tok
== tok_string
)
2678 for (cnt
= 2; cnt
< ctype
->map_collection_nr
; ++cnt
)
2679 if (strcmp (now
->val
.str
.startmb
, ctype
->mapnames
[cnt
]) == 0)
2682 if (cnt
< ctype
->map_collection_nr
)
2683 free (now
->val
.str
.startmb
);
2685 /* OK, it's a new map. */
2686 ctype_map_new (ldfile
, ctype
, now
->val
.str
.startmb
, charmap
);
2690 else if (now
->tok
< tok_toupper
|| now
->tok
> tok_tolower
)
2693 mapidx
= now
->tok
- tok_toupper
;
2695 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2696 /* This better should be a semicolon. */
2697 if (now
->tok
!= tok_semicolon
)
2701 /* Test whether this mapping was already defined. */
2702 if (ctype
->tomap_done
[mapidx
])
2704 lr_error (ldfile
, _("duplicated definition for mapping `%s'"),
2705 ctype
->mapnames
[mapidx
]);
2706 lr_ignore_rest (ldfile
, 0);
2709 ctype
->tomap_done
[mapidx
] = 1;
2711 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2712 while (now
->tok
!= tok_eol
&& now
->tok
!= tok_eof
)
2714 struct charseq
*from_seq
;
2716 struct charseq
*to_seq
;
2719 /* Every pair starts with an opening brace. */
2720 if (now
->tok
!= tok_open_brace
)
2723 /* Next comes the from-value. */
2724 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2725 if (get_character (now
, charmap
, repertoire
, &from_seq
,
2729 /* The next is a comma. */
2730 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2731 if (now
->tok
!= tok_comma
)
2734 /* And the other value. */
2735 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2736 if (get_character (now
, charmap
, repertoire
, &to_seq
,
2740 /* And the last thing is the closing brace. */
2741 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2742 if (now
->tok
!= tok_close_brace
)
2745 if (!ignore_content
)
2747 /* Check whether the mapping converts from an ASCII value
2748 to a non-ASCII value. */
2749 if (from_seq
!= NULL
&& from_seq
->nbytes
== 1
2750 && isascii (from_seq
->bytes
[0])
2751 && to_seq
!= NULL
&& (to_seq
->nbytes
!= 1
2752 || !isascii (to_seq
->bytes
[0])))
2753 ctype
->to_nonascii
= 1;
2755 if (mapidx
< 2 && from_seq
!= NULL
&& to_seq
!= NULL
2756 && from_seq
->nbytes
== 1 && to_seq
->nbytes
== 1)
2757 /* We can use this value. */
2758 ctype
->map256_collection
[mapidx
][from_seq
->bytes
[0]]
2761 if (from_wch
!= ILLEGAL_CHAR_VALUE
2762 && to_wch
!= ILLEGAL_CHAR_VALUE
)
2763 /* Both correct values. */
2764 *find_idx (ctype
, &ctype
->map_collection
[mapidx
],
2765 &ctype
->map_collection_max
[mapidx
],
2766 &ctype
->map_collection_act
[mapidx
],
2770 /* Now comes a semicolon or the end of the line/file. */
2771 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2772 if (now
->tok
== tok_semicolon
)
2773 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2777 case tok_translit_start
:
2778 /* Ignore the entire translit section with its peculiar syntax
2779 if we don't need the input. */
2784 lr_ignore_rest (ldfile
, 0);
2785 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2787 while (now
->tok
!= tok_translit_end
&& now
->tok
!= tok_eof
);
2789 if (now
->tok
== tok_eof
)
2790 lr_error (ldfile
, _(\
2791 "%s: `translit_start' section does not end with `translit_end'"),
2797 /* The rest of the line better should be empty. */
2798 lr_ignore_rest (ldfile
, 1);
2800 /* We count here the number of allocated entries in the `translit'
2804 ldfile
->translate_strings
= 1;
2805 ldfile
->return_widestr
= 1;
2807 /* We proceed until we see the `translit_end' token. */
2808 while (now
= lr_token (ldfile
, charmap
, NULL
, repertoire
, verbose
),
2809 now
->tok
!= tok_translit_end
&& now
->tok
!= tok_eof
)
2811 if (now
->tok
== tok_eol
)
2812 /* Ignore empty lines. */
2815 if (now
->tok
== tok_include
)
2817 /* We have to include locale. */
2818 const char *locale_name
;
2819 const char *repertoire_name
;
2820 struct translit_include_t
*include_stmt
, **include_ptr
;
2822 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2823 /* This should be a string or an identifier. In any
2824 case something to name a locale. */
2825 if (now
->tok
!= tok_string
&& now
->tok
!= tok_ident
)
2828 lr_error (ldfile
, _("%s: syntax error"), "LC_CTYPE");
2829 lr_ignore_rest (ldfile
, 0);
2832 locale_name
= now
->val
.str
.startmb
;
2834 /* Next should be a semicolon. */
2835 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2836 if (now
->tok
!= tok_semicolon
)
2837 goto translit_syntax
;
2839 /* Now the repertoire name. */
2840 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2841 if ((now
->tok
!= tok_string
&& now
->tok
!= tok_ident
)
2842 || now
->val
.str
.startmb
== NULL
)
2843 goto translit_syntax
;
2844 repertoire_name
= now
->val
.str
.startmb
;
2845 if (repertoire_name
[0] == '\0')
2846 /* Ignore the empty string. */
2847 repertoire_name
= NULL
;
2849 /* Save the include statement for later processing. */
2850 include_stmt
= (struct translit_include_t
*)
2851 xmalloc (sizeof (struct translit_include_t
));
2852 include_stmt
->copy_locale
= locale_name
;
2853 include_stmt
->copy_repertoire
= repertoire_name
;
2854 include_stmt
->next
= NULL
;
2856 include_ptr
= &ctype
->translit_include
;
2857 while (*include_ptr
!= NULL
)
2858 include_ptr
= &(*include_ptr
)->next
;
2859 *include_ptr
= include_stmt
;
2861 /* The rest of the line must be empty. */
2862 lr_ignore_rest (ldfile
, 1);
2864 /* Make sure the locale is read. */
2865 add_to_readlist (LC_CTYPE
, locale_name
, repertoire_name
,
2869 else if (now
->tok
== tok_default_missing
)
2875 /* We expect a single character or string as the
2877 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2878 wstr
= read_widestring (ldfile
, now
, charmap
,
2883 if (ctype
->default_missing
!= NULL
)
2885 lr_error (ldfile
, _("\
2886 %s: duplicate `default_missing' definition"), "LC_CTYPE");
2887 WITH_CUR_LOCALE (error_at_line (0, 0,
2888 ctype
->default_missing_file
,
2889 ctype
->default_missing_lineno
,
2891 previous definition was here")));
2895 ctype
->default_missing
= wstr
;
2896 ctype
->default_missing_file
= ldfile
->fname
;
2897 ctype
->default_missing_lineno
= ldfile
->lineno
;
2899 /* We can have more entries, ignore them. */
2900 lr_ignore_rest (ldfile
, 0);
2903 else if (wstr
== (uint32_t *) -1l)
2904 /* This was an syntax error. */
2907 /* Maybe there is another replacement we can use. */
2908 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2909 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
2911 /* Nothing found. We tell the user. */
2912 lr_error (ldfile
, _("\
2913 %s: no representable `default_missing' definition found"), "LC_CTYPE");
2916 if (now
->tok
!= tok_semicolon
)
2917 goto translit_syntax
;
2922 else if (now
->tok
== tok_translit_ignore
)
2924 read_translit_ignore_entry (ldfile
, ctype
, charmap
,
2929 read_translit_entry (ldfile
, ctype
, now
, charmap
, repertoire
);
2931 ldfile
->return_widestr
= 0;
2933 if (now
->tok
== tok_eof
)
2934 lr_error (ldfile
, _(\
2935 "%s: `translit_start' section does not end with `translit_end'"),
2941 /* Ignore the rest of the line if we don't need the input of
2945 lr_ignore_rest (ldfile
, 0);
2949 /* This could mean one of several things. First test whether
2950 it's a character class name. */
2951 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
)
2952 if (strcmp (now
->val
.str
.startmb
, ctype
->classnames
[cnt
]) == 0)
2954 if (cnt
< ctype
->nr_charclass
)
2956 class_bit
= _ISwbit (cnt
);
2957 class256_bit
= cnt
<= 11 ? _ISbit (cnt
) : 0;
2958 free (now
->val
.str
.startmb
);
2959 goto read_charclass
;
2961 for (cnt
= 0; cnt
< ctype
->map_collection_nr
; ++cnt
)
2962 if (strcmp (now
->val
.str
.startmb
, ctype
->mapnames
[cnt
]) == 0)
2964 if (cnt
< ctype
->map_collection_nr
)
2967 free (now
->val
.str
.startmb
);
2970 #ifdef PREDEFINED_CLASSES
2971 if (strcmp (now
->val
.str
.startmb
, "special1") == 0)
2973 class_bit
= _ISwspecial1
;
2974 free (now
->val
.str
.startmb
);
2975 goto read_charclass
;
2977 if (strcmp (now
->val
.str
.startmb
, "special2") == 0)
2979 class_bit
= _ISwspecial2
;
2980 free (now
->val
.str
.startmb
);
2981 goto read_charclass
;
2983 if (strcmp (now
->val
.str
.startmb
, "special3") == 0)
2985 class_bit
= _ISwspecial3
;
2986 free (now
->val
.str
.startmb
);
2987 goto read_charclass
;
2989 if (strcmp (now
->val
.str
.startmb
, "tosymmetric") == 0)
2998 /* Next we assume `LC_CTYPE'. */
2999 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
3000 if (now
->tok
== tok_eof
)
3002 if (now
->tok
== tok_eol
)
3003 lr_error (ldfile
, _("%s: incomplete `END' line"),
3005 else if (now
->tok
!= tok_lc_ctype
)
3006 lr_error (ldfile
, _("\
3007 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
3008 lr_ignore_rest (ldfile
, now
->tok
== tok_lc_ctype
);
3013 if (now
->tok
!= tok_eof
)
3014 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
3017 /* Prepare for the next round. */
3018 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
3022 /* When we come here we reached the end of the file. */
3023 lr_error (ldfile
, _("%s: premature end of file"), "LC_CTYPE");
3028 set_class_defaults (struct locale_ctype_t
*ctype
,
3029 const struct charmap_t
*charmap
,
3030 struct repertoire_t
*repertoire
)
3034 /* These function defines the default values for the classes and conversions
3035 according to POSIX.2 2.5.2.1.
3036 It may seem that the order of these if-blocks is arbitrary but it is NOT.
3037 Don't move them unless you know what you do! */
3039 auto void set_default (int bitpos
, int from
, int to
);
3041 void set_default (int bitpos
, int from
, int to
)
3045 int bit
= _ISbit (bitpos
);
3046 int bitw
= _ISwbit (bitpos
);
3047 /* Define string. */
3050 for (ch
= from
; ch
<= to
; ++ch
)
3052 struct charseq
*seq
;
3055 seq
= charmap_find_value (charmap
, tmp
, 1);
3059 sprintf (buf
, "U%08X", ch
);
3060 seq
= charmap_find_value (charmap
, buf
, 9);
3065 WITH_CUR_LOCALE (error (0, 0, _("\
3066 %s: character `%s' not defined while needed as default value"),
3069 else if (seq
->nbytes
!= 1)
3070 WITH_CUR_LOCALE (error (0, 0, _("\
3071 %s: character `%s' in charmap not representable with one byte"),
3074 ctype
->class256_collection
[seq
->bytes
[0]] |= bit
;
3076 /* No need to search here, the ASCII value is also the Unicode
3078 ELEM (ctype
, class_collection
, , ch
) |= bitw
;
3082 /* Set default values if keyword was not present. */
3083 if ((ctype
->class_done
& BITw (tok_upper
)) == 0)
3084 /* "If this keyword [lower] is not specified, the lowercase letters
3085 `A' through `Z', ..., shall automatically belong to this class,
3086 with implementation defined character values." [P1003.2, 2.5.2.1] */
3087 set_default (BITPOS (tok_upper
), 'A', 'Z');
3089 if ((ctype
->class_done
& BITw (tok_lower
)) == 0)
3090 /* "If this keyword [lower] is not specified, the lowercase letters
3091 `a' through `z', ..., shall automatically belong to this class,
3092 with implementation defined character values." [P1003.2, 2.5.2.1] */
3093 set_default (BITPOS (tok_lower
), 'a', 'z');
3095 if ((ctype
->class_done
& BITw (tok_alpha
)) == 0)
3097 /* Table 2-6 in P1003.2 says that characters in class `upper' or
3098 class `lower' *must* be in class `alpha'. */
3099 unsigned long int mask
= BIT (tok_upper
) | BIT (tok_lower
);
3100 unsigned long int maskw
= BITw (tok_upper
) | BITw (tok_lower
);
3102 for (cnt
= 0; cnt
< 256; ++cnt
)
3103 if ((ctype
->class256_collection
[cnt
] & mask
) != 0)
3104 ctype
->class256_collection
[cnt
] |= BIT (tok_alpha
);
3106 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
3107 if ((ctype
->class_collection
[cnt
] & maskw
) != 0)
3108 ctype
->class_collection
[cnt
] |= BITw (tok_alpha
);
3111 if ((ctype
->class_done
& BITw (tok_digit
)) == 0)
3112 /* "If this keyword [digit] is not specified, the digits `0' through
3113 `9', ..., shall automatically belong to this class, with
3114 implementation-defined character values." [P1003.2, 2.5.2.1] */
3115 set_default (BITPOS (tok_digit
), '0', '9');
3117 /* "Only characters specified for the `alpha' and `digit' keyword
3118 shall be specified. Characters specified for the keyword `alpha'
3119 and `digit' are automatically included in this class. */
3121 unsigned long int mask
= BIT (tok_alpha
) | BIT (tok_digit
);
3122 unsigned long int maskw
= BITw (tok_alpha
) | BITw (tok_digit
);
3124 for (cnt
= 0; cnt
< 256; ++cnt
)
3125 if ((ctype
->class256_collection
[cnt
] & mask
) != 0)
3126 ctype
->class256_collection
[cnt
] |= BIT (tok_alnum
);
3128 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
3129 if ((ctype
->class_collection
[cnt
] & maskw
) != 0)
3130 ctype
->class_collection
[cnt
] |= BITw (tok_alnum
);
3133 if ((ctype
->class_done
& BITw (tok_space
)) == 0)
3134 /* "If this keyword [space] is not specified, the characters <space>,
3135 <form-feed>, <newline>, <carriage-return>, <tab>, and
3136 <vertical-tab>, ..., shall automatically belong to this class,
3137 with implementation-defined character values." [P1003.2, 2.5.2.1] */
3139 struct charseq
*seq
;
3141 seq
= charmap_find_value (charmap
, "space", 5);
3143 seq
= charmap_find_value (charmap
, "SP", 2);
3145 seq
= charmap_find_value (charmap
, "U00000020", 9);
3149 WITH_CUR_LOCALE (error (0, 0, _("\
3150 %s: character `%s' not defined while needed as default value"),
3151 "LC_CTYPE", "<space>"));
3153 else if (seq
->nbytes
!= 1)
3154 WITH_CUR_LOCALE (error (0, 0, _("\
3155 %s: character `%s' in charmap not representable with one byte"),
3156 "LC_CTYPE", "<space>"));
3158 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3160 /* No need to search. */
3161 ELEM (ctype
, class_collection
, , L
' ') |= BITw (tok_space
);
3163 seq
= charmap_find_value (charmap
, "form-feed", 9);
3165 seq
= charmap_find_value (charmap
, "U0000000C", 9);
3169 WITH_CUR_LOCALE (error (0, 0, _("\
3170 %s: character `%s' not defined while needed as default value"),
3171 "LC_CTYPE", "<form-feed>"));
3173 else if (seq
->nbytes
!= 1)
3174 WITH_CUR_LOCALE (error (0, 0, _("\
3175 %s: character `%s' in charmap not representable with one byte"),
3176 "LC_CTYPE", "<form-feed>"));
3178 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3180 /* No need to search. */
3181 ELEM (ctype
, class_collection
, , L
'\f') |= BITw (tok_space
);
3184 seq
= charmap_find_value (charmap
, "newline", 7);
3186 seq
= charmap_find_value (charmap
, "U0000000A", 9);
3190 WITH_CUR_LOCALE (error (0, 0, _("\
3191 %s: character `%s' not defined while needed as default value"),
3192 "LC_CTYPE", "<newline>"));
3194 else if (seq
->nbytes
!= 1)
3195 WITH_CUR_LOCALE (error (0, 0, _("\
3196 %s: character `%s' in charmap not representable with one byte"),
3197 "LC_CTYPE", "<newline>"));
3199 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3201 /* No need to search. */
3202 ELEM (ctype
, class_collection
, , L
'\n') |= BITw (tok_space
);
3205 seq
= charmap_find_value (charmap
, "carriage-return", 15);
3207 seq
= charmap_find_value (charmap
, "U0000000D", 9);
3211 WITH_CUR_LOCALE (error (0, 0, _("\
3212 %s: character `%s' not defined while needed as default value"),
3213 "LC_CTYPE", "<carriage-return>"));
3215 else if (seq
->nbytes
!= 1)
3216 WITH_CUR_LOCALE (error (0, 0, _("\
3217 %s: character `%s' in charmap not representable with one byte"),
3218 "LC_CTYPE", "<carriage-return>"));
3220 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3222 /* No need to search. */
3223 ELEM (ctype
, class_collection
, , L
'\r') |= BITw (tok_space
);
3226 seq
= charmap_find_value (charmap
, "tab", 3);
3228 seq
= charmap_find_value (charmap
, "U00000009", 9);
3232 WITH_CUR_LOCALE (error (0, 0, _("\
3233 %s: character `%s' not defined while needed as default value"),
3234 "LC_CTYPE", "<tab>"));
3236 else if (seq
->nbytes
!= 1)
3237 WITH_CUR_LOCALE (error (0, 0, _("\
3238 %s: character `%s' in charmap not representable with one byte"),
3239 "LC_CTYPE", "<tab>"));
3241 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3243 /* No need to search. */
3244 ELEM (ctype
, class_collection
, , L
'\t') |= BITw (tok_space
);
3247 seq
= charmap_find_value (charmap
, "vertical-tab", 12);
3249 seq
= charmap_find_value (charmap
, "U0000000B", 9);
3253 WITH_CUR_LOCALE (error (0, 0, _("\
3254 %s: character `%s' not defined while needed as default value"),
3255 "LC_CTYPE", "<vertical-tab>"));
3257 else if (seq
->nbytes
!= 1)
3258 WITH_CUR_LOCALE (error (0, 0, _("\
3259 %s: character `%s' in charmap not representable with one byte"),
3260 "LC_CTYPE", "<vertical-tab>"));
3262 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3264 /* No need to search. */
3265 ELEM (ctype
, class_collection
, , L
'\v') |= BITw (tok_space
);
3268 if ((ctype
->class_done
& BITw (tok_xdigit
)) == 0)
3269 /* "If this keyword is not specified, the digits `0' to `9', the
3270 uppercase letters `A' through `F', and the lowercase letters `a'
3271 through `f', ..., shell automatically belong to this class, with
3272 implementation defined character values." [P1003.2, 2.5.2.1] */
3274 set_default (BITPOS (tok_xdigit
), '0', '9');
3275 set_default (BITPOS (tok_xdigit
), 'A', 'F');
3276 set_default (BITPOS (tok_xdigit
), 'a', 'f');
3279 if ((ctype
->class_done
& BITw (tok_blank
)) == 0)
3280 /* "If this keyword [blank] is unspecified, the characters <space> and
3281 <tab> shall belong to this character class." [P1003.2, 2.5.2.1] */
3283 struct charseq
*seq
;
3285 seq
= charmap_find_value (charmap
, "space", 5);
3287 seq
= charmap_find_value (charmap
, "SP", 2);
3289 seq
= charmap_find_value (charmap
, "U00000020", 9);
3293 WITH_CUR_LOCALE (error (0, 0, _("\
3294 %s: character `%s' not defined while needed as default value"),
3295 "LC_CTYPE", "<space>"));
3297 else if (seq
->nbytes
!= 1)
3298 WITH_CUR_LOCALE (error (0, 0, _("\
3299 %s: character `%s' in charmap not representable with one byte"),
3300 "LC_CTYPE", "<space>"));
3302 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_blank
);
3304 /* No need to search. */
3305 ELEM (ctype
, class_collection
, , L
' ') |= BITw (tok_blank
);
3308 seq
= charmap_find_value (charmap
, "tab", 3);
3310 seq
= charmap_find_value (charmap
, "U00000009", 9);
3314 WITH_CUR_LOCALE (error (0, 0, _("\
3315 %s: character `%s' not defined while needed as default value"),
3316 "LC_CTYPE", "<tab>"));
3318 else if (seq
->nbytes
!= 1)
3319 WITH_CUR_LOCALE (error (0, 0, _("\
3320 %s: character `%s' in charmap not representable with one byte"),
3321 "LC_CTYPE", "<tab>"));
3323 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_blank
);
3325 /* No need to search. */
3326 ELEM (ctype
, class_collection
, , L
'\t') |= BITw (tok_blank
);
3329 if ((ctype
->class_done
& BITw (tok_graph
)) == 0)
3330 /* "If this keyword [graph] is not specified, characters specified for
3331 the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
3332 shall belong to this character class." [P1003.2, 2.5.2.1] */
3334 unsigned long int mask
= BIT (tok_upper
) | BIT (tok_lower
) |
3335 BIT (tok_alpha
) | BIT (tok_digit
) | BIT (tok_xdigit
) | BIT (tok_punct
);
3336 unsigned long int maskw
= BITw (tok_upper
) | BITw (tok_lower
) |
3337 BITw (tok_alpha
) | BITw (tok_digit
) | BITw (tok_xdigit
) |
3341 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
3342 if ((ctype
->class_collection
[cnt
] & maskw
) != 0)
3343 ctype
->class_collection
[cnt
] |= BITw (tok_graph
);
3345 for (cnt
= 0; cnt
< 256; ++cnt
)
3346 if ((ctype
->class256_collection
[cnt
] & mask
) != 0)
3347 ctype
->class256_collection
[cnt
] |= BIT (tok_graph
);
3350 if ((ctype
->class_done
& BITw (tok_print
)) == 0)
3351 /* "If this keyword [print] is not provided, characters specified for
3352 the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
3353 and the <space> character shall belong to this character class."
3354 [P1003.2, 2.5.2.1] */
3356 unsigned long int mask
= BIT (tok_upper
) | BIT (tok_lower
) |
3357 BIT (tok_alpha
) | BIT (tok_digit
) | BIT (tok_xdigit
) | BIT (tok_punct
);
3358 unsigned long int maskw
= BITw (tok_upper
) | BITw (tok_lower
) |
3359 BITw (tok_alpha
) | BITw (tok_digit
) | BITw (tok_xdigit
) |
3362 struct charseq
*seq
;
3364 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
3365 if ((ctype
->class_collection
[cnt
] & maskw
) != 0)
3366 ctype
->class_collection
[cnt
] |= BITw (tok_print
);
3368 for (cnt
= 0; cnt
< 256; ++cnt
)
3369 if ((ctype
->class256_collection
[cnt
] & mask
) != 0)
3370 ctype
->class256_collection
[cnt
] |= BIT (tok_print
);
3373 seq
= charmap_find_value (charmap
, "space", 5);
3375 seq
= charmap_find_value (charmap
, "SP", 2);
3377 seq
= charmap_find_value (charmap
, "U00000020", 9);
3381 WITH_CUR_LOCALE (error (0, 0, _("\
3382 %s: character `%s' not defined while needed as default value"),
3383 "LC_CTYPE", "<space>"));
3385 else if (seq
->nbytes
!= 1)
3386 WITH_CUR_LOCALE (error (0, 0, _("\
3387 %s: character `%s' in charmap not representable with one byte"),
3388 "LC_CTYPE", "<space>"));
3390 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_print
);
3392 /* No need to search. */
3393 ELEM (ctype
, class_collection
, , L
' ') |= BITw (tok_print
);
3396 if (ctype
->tomap_done
[0] == 0)
3397 /* "If this keyword [toupper] is not specified, the lowercase letters
3398 `a' through `z', and their corresponding uppercase letters `A' to
3399 `Z', ..., shall automatically be included, with implementation-
3400 defined character values." [P1003.2, 2.5.2.1] */
3405 strcpy (tmp
, "<?>");
3407 for (ch
= 'a'; ch
<= 'z'; ++ch
)
3409 struct charseq
*seq_from
, *seq_to
;
3413 seq_from
= charmap_find_value (charmap
, &tmp
[1], 1);
3414 if (seq_from
== NULL
)
3417 sprintf (buf
, "U%08X", ch
);
3418 seq_from
= charmap_find_value (charmap
, buf
, 9);
3420 if (seq_from
== NULL
)
3423 WITH_CUR_LOCALE (error (0, 0, _("\
3424 %s: character `%s' not defined while needed as default value"),
3427 else if (seq_from
->nbytes
!= 1)
3430 WITH_CUR_LOCALE (error (0, 0, _("\
3431 %s: character `%s' needed as default value not representable with one byte"),
3436 /* This conversion is implementation defined. */
3437 tmp
[1] = (char) (ch
+ ('A' - 'a'));
3438 seq_to
= charmap_find_value (charmap
, &tmp
[1], 1);
3442 sprintf (buf
, "U%08X", ch
+ ('A' - 'a'));
3443 seq_to
= charmap_find_value (charmap
, buf
, 9);
3448 WITH_CUR_LOCALE (error (0, 0, _("\
3449 %s: character `%s' not defined while needed as default value"),
3452 else if (seq_to
->nbytes
!= 1)
3455 WITH_CUR_LOCALE (error (0, 0, _("\
3456 %s: character `%s' needed as default value not representable with one byte"),
3460 /* The index [0] is determined by the order of the
3461 `ctype_map_newP' calls in `ctype_startup'. */
3462 ctype
->map256_collection
[0][seq_from
->bytes
[0]]
3466 /* No need to search. */
3467 ELEM (ctype
, map_collection
, [0], ch
) = ch
+ ('A' - 'a');
3471 if (ctype
->tomap_done
[1] == 0)
3472 /* "If this keyword [tolower] is not specified, the mapping shall be
3473 the reverse mapping of the one specified to `toupper'." [P1003.2] */
3475 for (cnt
= 0; cnt
< ctype
->map_collection_act
[0]; ++cnt
)
3476 if (ctype
->map_collection
[0][cnt
] != 0)
3477 ELEM (ctype
, map_collection
, [1],
3478 ctype
->map_collection
[0][cnt
])
3479 = ctype
->charnames
[cnt
];
3481 for (cnt
= 0; cnt
< 256; ++cnt
)
3482 if (ctype
->map256_collection
[0][cnt
] != 0)
3483 ctype
->map256_collection
[1][ctype
->map256_collection
[0][cnt
]] = cnt
;
3486 if (ctype
->outdigits_act
!= 10)
3488 if (ctype
->outdigits_act
!= 0)
3489 WITH_CUR_LOCALE (error (0, 0, _("\
3490 %s: field `%s' does not contain exactly ten entries"),
3491 "LC_CTYPE", "outdigit"));
3493 for (cnt
= ctype
->outdigits_act
; cnt
< 10; ++cnt
)
3495 ctype
->mboutdigits
[cnt
] = charmap_find_symbol (charmap
,
3496 (char *) digits
+ cnt
,
3499 if (ctype
->mboutdigits
[cnt
] == NULL
)
3500 ctype
->mboutdigits
[cnt
] = charmap_find_symbol (charmap
,
3502 strlen (longnames
[cnt
]));
3504 if (ctype
->mboutdigits
[cnt
] == NULL
)
3505 ctype
->mboutdigits
[cnt
] = charmap_find_symbol (charmap
,
3508 if (ctype
->mboutdigits
[cnt
] == NULL
)
3510 /* Provide a replacement. */
3511 WITH_CUR_LOCALE (error (0, 0, _("\
3512 no output digits defined and none of the standard names in the charmap")));
3514 ctype
->mboutdigits
[cnt
] = obstack_alloc (&((struct charmap_t
*) charmap
)->mem_pool
,
3515 sizeof (struct charseq
)
3518 /* This is better than nothing. */
3519 ctype
->mboutdigits
[cnt
]->bytes
[0] = digits
[cnt
];
3520 ctype
->mboutdigits
[cnt
]->nbytes
= 1;
3523 ctype
->wcoutdigits
[cnt
] = L
'0' + cnt
;
3526 ctype
->outdigits_act
= 10;
3531 /* Construction of sparse 3-level tables.
3532 See wchar-lookup.h for their structure and the meaning of p and q. */
3539 /* Working representation. */
3540 size_t level1_alloc
;
3543 size_t level2_alloc
;
3546 size_t level3_alloc
;
3549 /* Compressed representation. */
3554 /* Initialize. Assumes t->p and t->q have already been set. */
3556 wctype_table_init (struct wctype_table
*t
)
3559 t
->level1_alloc
= t
->level1_size
= 0;
3561 t
->level2_alloc
= t
->level2_size
= 0;
3563 t
->level3_alloc
= t
->level3_size
= 0;
3566 /* Retrieve an entry. */
3568 wctype_table_get (struct wctype_table
*t
, uint32_t wc
)
3570 uint32_t index1
= wc
>> (t
->q
+ t
->p
+ 5);
3571 if (index1
< t
->level1_size
)
3573 uint32_t lookup1
= t
->level1
[index1
];
3574 if (lookup1
!= EMPTY
)
3576 uint32_t index2
= ((wc
>> (t
->p
+ 5)) & ((1 << t
->q
) - 1))
3577 + (lookup1
<< t
->q
);
3578 uint32_t lookup2
= t
->level2
[index2
];
3579 if (lookup2
!= EMPTY
)
3581 uint32_t index3
= ((wc
>> 5) & ((1 << t
->p
) - 1))
3582 + (lookup2
<< t
->p
);
3583 uint32_t lookup3
= t
->level3
[index3
];
3584 uint32_t index4
= wc
& 0x1f;
3586 return (lookup3
>> index4
) & 1;
3593 /* Add one entry. */
3595 wctype_table_add (struct wctype_table
*t
, uint32_t wc
)
3597 uint32_t index1
= wc
>> (t
->q
+ t
->p
+ 5);
3598 uint32_t index2
= (wc
>> (t
->p
+ 5)) & ((1 << t
->q
) - 1);
3599 uint32_t index3
= (wc
>> 5) & ((1 << t
->p
) - 1);
3600 uint32_t index4
= wc
& 0x1f;
3603 if (index1
>= t
->level1_size
)
3605 if (index1
>= t
->level1_alloc
)
3607 size_t alloc
= 2 * t
->level1_alloc
;
3608 if (alloc
<= index1
)
3610 t
->level1
= (uint32_t *) xrealloc ((char *) t
->level1
,
3611 alloc
* sizeof (uint32_t));
3612 t
->level1_alloc
= alloc
;
3614 while (index1
>= t
->level1_size
)
3615 t
->level1
[t
->level1_size
++] = EMPTY
;
3618 if (t
->level1
[index1
] == EMPTY
)
3620 if (t
->level2_size
== t
->level2_alloc
)
3622 size_t alloc
= 2 * t
->level2_alloc
+ 1;
3623 t
->level2
= (uint32_t *) xrealloc ((char *) t
->level2
,
3624 (alloc
<< t
->q
) * sizeof (uint32_t));
3625 t
->level2_alloc
= alloc
;
3627 i1
= t
->level2_size
<< t
->q
;
3628 i2
= (t
->level2_size
+ 1) << t
->q
;
3629 for (i
= i1
; i
< i2
; i
++)
3630 t
->level2
[i
] = EMPTY
;
3631 t
->level1
[index1
] = t
->level2_size
++;
3634 index2
+= t
->level1
[index1
] << t
->q
;
3636 if (t
->level2
[index2
] == EMPTY
)
3638 if (t
->level3_size
== t
->level3_alloc
)
3640 size_t alloc
= 2 * t
->level3_alloc
+ 1;
3641 t
->level3
= (uint32_t *) xrealloc ((char *) t
->level3
,
3642 (alloc
<< t
->p
) * sizeof (uint32_t));
3643 t
->level3_alloc
= alloc
;
3645 i1
= t
->level3_size
<< t
->p
;
3646 i2
= (t
->level3_size
+ 1) << t
->p
;
3647 for (i
= i1
; i
< i2
; i
++)
3649 t
->level2
[index2
] = t
->level3_size
++;
3652 index3
+= t
->level2
[index2
] << t
->p
;
3654 t
->level3
[index3
] |= (uint32_t)1 << index4
;
3657 /* Finalize and shrink. */
3659 wctype_table_finalize (struct wctype_table
*t
)
3662 uint32_t reorder3
[t
->level3_size
];
3663 uint32_t reorder2
[t
->level2_size
];
3664 uint32_t level1_offset
, level2_offset
, level3_offset
;
3666 /* Uniquify level3 blocks. */
3668 for (j
= 0; j
< t
->level3_size
; j
++)
3670 for (i
= 0; i
< k
; i
++)
3671 if (memcmp (&t
->level3
[i
<< t
->p
], &t
->level3
[j
<< t
->p
],
3672 (1 << t
->p
) * sizeof (uint32_t)) == 0)
3674 /* Relocate block j to block i. */
3679 memcpy (&t
->level3
[i
<< t
->p
], &t
->level3
[j
<< t
->p
],
3680 (1 << t
->p
) * sizeof (uint32_t));
3686 for (i
= 0; i
< (t
->level2_size
<< t
->q
); i
++)
3687 if (t
->level2
[i
] != EMPTY
)
3688 t
->level2
[i
] = reorder3
[t
->level2
[i
]];
3690 /* Uniquify level2 blocks. */
3692 for (j
= 0; j
< t
->level2_size
; j
++)
3694 for (i
= 0; i
< k
; i
++)
3695 if (memcmp (&t
->level2
[i
<< t
->q
], &t
->level2
[j
<< t
->q
],
3696 (1 << t
->q
) * sizeof (uint32_t)) == 0)
3698 /* Relocate block j to block i. */
3703 memcpy (&t
->level2
[i
<< t
->q
], &t
->level2
[j
<< t
->q
],
3704 (1 << t
->q
) * sizeof (uint32_t));
3710 for (i
= 0; i
< t
->level1_size
; i
++)
3711 if (t
->level1
[i
] != EMPTY
)
3712 t
->level1
[i
] = reorder2
[t
->level1
[i
]];
3714 /* Create and fill the resulting compressed representation. */
3716 5 * sizeof (uint32_t)
3717 + t
->level1_size
* sizeof (uint32_t)
3718 + (t
->level2_size
<< t
->q
) * sizeof (uint32_t)
3719 + (t
->level3_size
<< t
->p
) * sizeof (uint32_t);
3720 t
->result
= (char *) xmalloc (t
->result_size
);
3723 5 * sizeof (uint32_t);
3725 5 * sizeof (uint32_t)
3726 + t
->level1_size
* sizeof (uint32_t);
3728 5 * sizeof (uint32_t)
3729 + t
->level1_size
* sizeof (uint32_t)
3730 + (t
->level2_size
<< t
->q
) * sizeof (uint32_t);
3732 ((uint32_t *) t
->result
)[0] = t
->q
+ t
->p
+ 5;
3733 ((uint32_t *) t
->result
)[1] = t
->level1_size
;
3734 ((uint32_t *) t
->result
)[2] = t
->p
+ 5;
3735 ((uint32_t *) t
->result
)[3] = (1 << t
->q
) - 1;
3736 ((uint32_t *) t
->result
)[4] = (1 << t
->p
) - 1;
3738 for (i
= 0; i
< t
->level1_size
; i
++)
3739 ((uint32_t *) (t
->result
+ level1_offset
))[i
] =
3740 (t
->level1
[i
] == EMPTY
3742 : (t
->level1
[i
] << t
->q
) * sizeof (uint32_t) + level2_offset
);
3744 for (i
= 0; i
< (t
->level2_size
<< t
->q
); i
++)
3745 ((uint32_t *) (t
->result
+ level2_offset
))[i
] =
3746 (t
->level2
[i
] == EMPTY
3748 : (t
->level2
[i
] << t
->p
) * sizeof (uint32_t) + level3_offset
);
3750 for (i
= 0; i
< (t
->level3_size
<< t
->p
); i
++)
3751 ((uint32_t *) (t
->result
+ level3_offset
))[i
] = t
->level3
[i
];
3753 if (t
->level1_alloc
> 0)
3755 if (t
->level2_alloc
> 0)
3757 if (t
->level3_alloc
> 0)
3761 #define TABLE wcwidth_table
3762 #define ELEMENT uint8_t
3763 #define DEFAULT 0xff
3766 #define TABLE wctrans_table
3767 #define ELEMENT int32_t
3769 #define wctrans_table_add wctrans_table_add_internal
3771 #undef wctrans_table_add
3772 /* The wctrans_table must actually store the difference between the
3773 desired result and the argument. */
3775 wctrans_table_add (struct wctrans_table
*t
, uint32_t wc
, uint32_t mapped_wc
)
3777 wctrans_table_add_internal (t
, wc
, mapped_wc
- wc
);
3781 /* Flattens the included transliterations into a translit list.
3782 Inserts them in the list at `cursor', and returns the new cursor. */
3783 static struct translit_t
**
3784 translit_flatten (struct locale_ctype_t
*ctype
,
3785 const struct charmap_t
*charmap
,
3786 struct translit_t
**cursor
)
3788 while (ctype
->translit_include
!= NULL
)
3790 const char *copy_locale
= ctype
->translit_include
->copy_locale
;
3791 const char *copy_repertoire
= ctype
->translit_include
->copy_repertoire
;
3792 struct localedef_t
*other
;
3794 /* Unchain the include statement. During the depth-first traversal
3795 we don't want to visit any locale more than once. */
3796 ctype
->translit_include
= ctype
->translit_include
->next
;
3798 other
= find_locale (LC_CTYPE
, copy_locale
, copy_repertoire
, charmap
);
3800 if (other
== NULL
|| other
->categories
[LC_CTYPE
].ctype
== NULL
)
3802 WITH_CUR_LOCALE (error (0, 0, _("\
3803 %s: transliteration data from locale `%s' not available"),
3804 "LC_CTYPE", copy_locale
));
3808 struct locale_ctype_t
*other_ctype
=
3809 other
->categories
[LC_CTYPE
].ctype
;
3811 cursor
= translit_flatten (other_ctype
, charmap
, cursor
);
3812 assert (other_ctype
->translit_include
== NULL
);
3814 if (other_ctype
->translit
!= NULL
)
3816 /* Insert the other_ctype->translit list at *cursor. */
3817 struct translit_t
*endp
= other_ctype
->translit
;
3818 while (endp
->next
!= NULL
)
3821 endp
->next
= *cursor
;
3822 *cursor
= other_ctype
->translit
;
3824 /* Avoid any risk of circular lists. */
3825 other_ctype
->translit
= NULL
;
3827 cursor
= &endp
->next
;
3830 if (ctype
->default_missing
== NULL
)
3831 ctype
->default_missing
= other_ctype
->default_missing
;
3839 allocate_arrays (struct locale_ctype_t
*ctype
, const struct charmap_t
*charmap
,
3840 struct repertoire_t
*repertoire
)
3848 /* You wonder about this amount of memory? This is only because some
3849 users do not manage to address the array with unsigned values or
3850 data types with range >= 256. '\200' would result in the array
3851 index -128. To help these poor people we duplicate the entries for
3852 128 up to 255 below the entry for \0. */
3853 ctype
->ctype_b
= (char_class_t
*) xcalloc (256 + 128, sizeof (char_class_t
));
3854 ctype
->ctype32_b
= (char_class32_t
*) xcalloc (256, sizeof (char_class32_t
));
3855 ctype
->class_b
= (uint32_t **)
3856 xmalloc (ctype
->nr_charclass
* sizeof (uint32_t *));
3857 ctype
->class_3level
= (struct iovec
*)
3858 xmalloc (ctype
->nr_charclass
* sizeof (struct iovec
));
3860 /* This is the array accessed using the multibyte string elements. */
3861 for (idx
= 0; idx
< 256; ++idx
)
3862 ctype
->ctype_b
[128 + idx
] = ctype
->class256_collection
[idx
];
3864 /* Mirror first 127 entries. We must take care that entry -1 is not
3865 mirrored because EOF == -1. */
3866 for (idx
= 0; idx
< 127; ++idx
)
3867 ctype
->ctype_b
[idx
] = ctype
->ctype_b
[256 + idx
];
3869 /* The 32 bit array contains all characters < 0x100. */
3870 for (idx
= 0; idx
< ctype
->class_collection_act
; ++idx
)
3871 if (ctype
->charnames
[idx
] < 0x100)
3872 ctype
->ctype32_b
[ctype
->charnames
[idx
]] = ctype
->class_collection
[idx
];
3874 for (nr
= 0; nr
< ctype
->nr_charclass
; nr
++)
3876 ctype
->class_b
[nr
] = (uint32_t *) xcalloc (256 / 32, sizeof (uint32_t));
3878 /* We only set CLASS_B for the bits in the ISO C classes, not
3879 the user defined classes. The number should not change but
3881 #define LAST_ISO_C_BIT 11
3882 if (nr
<= LAST_ISO_C_BIT
)
3883 for (idx
= 0; idx
< 256; ++idx
)
3884 if (ctype
->class256_collection
[idx
] & _ISbit (nr
))
3885 ctype
->class_b
[nr
][idx
>> 5] |= (uint32_t) 1 << (idx
& 0x1f);
3888 for (nr
= 0; nr
< ctype
->nr_charclass
; nr
++)
3890 struct wctype_table t
;
3892 t
.p
= 4; /* or: 5 */
3893 t
.q
= 7; /* or: 6 */
3894 wctype_table_init (&t
);
3896 for (idx
= 0; idx
< ctype
->class_collection_act
; ++idx
)
3897 if (ctype
->class_collection
[idx
] & _ISwbit (nr
))
3898 wctype_table_add (&t
, ctype
->charnames
[idx
]);
3900 wctype_table_finalize (&t
);
3903 WITH_CUR_LOCALE (fprintf (stderr
, _("\
3904 %s: table for class \"%s\": %lu bytes\n"),
3905 "LC_CTYPE", ctype
->classnames
[nr
],
3906 (unsigned long int) t
.result_size
));
3908 ctype
->class_3level
[nr
].iov_base
= t
.result
;
3909 ctype
->class_3level
[nr
].iov_len
= t
.result_size
;
3912 /* Room for table of mappings. */
3913 ctype
->map_b
= (uint32_t **) xmalloc (2 * sizeof (uint32_t *));
3914 ctype
->map32_b
= (uint32_t **) xmalloc (ctype
->map_collection_nr
3915 * sizeof (uint32_t *));
3916 ctype
->map_3level
= (struct iovec
*)
3917 xmalloc (ctype
->map_collection_nr
* sizeof (struct iovec
));
3919 /* Fill in all mappings. */
3920 for (idx
= 0; idx
< 2; ++idx
)
3924 /* Allocate table. */
3925 ctype
->map_b
[idx
] = (uint32_t *)
3926 xmalloc ((256 + 128) * sizeof (uint32_t));
3928 /* Copy values from collection. */
3929 for (idx2
= 0; idx2
< 256; ++idx2
)
3930 ctype
->map_b
[idx
][128 + idx2
] = ctype
->map256_collection
[idx
][idx2
];
3932 /* Mirror first 127 entries. We must take care not to map entry
3933 -1 because EOF == -1. */
3934 for (idx2
= 0; idx2
< 127; ++idx2
)
3935 ctype
->map_b
[idx
][idx2
] = ctype
->map_b
[idx
][256 + idx2
];
3937 /* EOF must map to EOF. */
3938 ctype
->map_b
[idx
][127] = EOF
;
3941 for (idx
= 0; idx
< ctype
->map_collection_nr
; ++idx
)
3945 /* Allocate table. */
3946 ctype
->map32_b
[idx
] = (uint32_t *) xmalloc (256 * sizeof (uint32_t));
3948 /* Copy values from collection. Default is identity mapping. */
3949 for (idx2
= 0; idx2
< 256; ++idx2
)
3950 ctype
->map32_b
[idx
][idx2
] =
3951 (ctype
->map_collection
[idx
][idx2
] != 0
3952 ? ctype
->map_collection
[idx
][idx2
]
3956 for (nr
= 0; nr
< ctype
->map_collection_nr
; nr
++)
3958 struct wctrans_table t
;
3962 wctrans_table_init (&t
);
3964 for (idx
= 0; idx
< ctype
->map_collection_act
[nr
]; ++idx
)
3965 if (ctype
->map_collection
[nr
][idx
] != 0)
3966 wctrans_table_add (&t
, ctype
->charnames
[idx
],
3967 ctype
->map_collection
[nr
][idx
]);
3969 wctrans_table_finalize (&t
);
3972 WITH_CUR_LOCALE (fprintf (stderr
, _("\
3973 %s: table for map \"%s\": %lu bytes\n"),
3974 "LC_CTYPE", ctype
->mapnames
[nr
],
3975 (unsigned long int) t
.result_size
));
3977 ctype
->map_3level
[nr
].iov_base
= t
.result
;
3978 ctype
->map_3level
[nr
].iov_len
= t
.result_size
;
3981 /* Extra array for class and map names. */
3982 ctype
->class_name_ptr
= (uint32_t *) xmalloc (ctype
->nr_charclass
3983 * sizeof (uint32_t));
3984 ctype
->map_name_ptr
= (uint32_t *) xmalloc (ctype
->map_collection_nr
3985 * sizeof (uint32_t));
3987 ctype
->class_offset
= _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1
);
3988 ctype
->map_offset
= ctype
->class_offset
+ ctype
->nr_charclass
;
3990 /* Array for width information. Because the expected widths are very
3991 small (never larger than 2) we use only one single byte. This
3993 We put only printable characters in the table. wcwidth is specified
3994 to return -1 for non-printable characters. Doing the check here
3995 saves a run-time check.
3996 But we put L'\0' in the table. This again saves a run-time check. */
3998 struct wcwidth_table t
;
4002 wcwidth_table_init (&t
);
4004 /* First set all the printable characters of the character set to
4005 the default width. */
4007 while (iterate_table (&charmap
->char_table
, &curs
, &key
, &len
, &vdata
) == 0)
4009 struct charseq
*data
= (struct charseq
*) vdata
;
4011 if (data
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
4012 data
->ucs4
= repertoire_find_value (ctype
->repertoire
,
4015 if (data
->ucs4
!= ILLEGAL_CHAR_VALUE
)
4017 uint32_t *class_bits
=
4018 find_idx (ctype
, &ctype
->class_collection
, NULL
,
4019 &ctype
->class_collection_act
, data
->ucs4
);
4021 if (class_bits
!= NULL
&& (*class_bits
& BITw (tok_print
)))
4022 wcwidth_table_add (&t
, data
->ucs4
, charmap
->width_default
);
4026 /* Now add the explicitly specified widths. */
4027 if (charmap
->width_rules
!= NULL
)
4031 for (cnt
= 0; cnt
< charmap
->nwidth_rules
; ++cnt
)
4033 unsigned char bytes
[charmap
->mb_cur_max
];
4034 int nbytes
= charmap
->width_rules
[cnt
].from
->nbytes
;
4036 /* We have the range of character for which the width is
4037 specified described using byte sequences of the multibyte
4038 charset. We have to convert this to UCS4 now. And we
4039 cannot simply convert the beginning and the end of the
4040 sequence, we have to iterate over the byte sequence and
4041 convert it for every single character. */
4042 memcpy (bytes
, charmap
->width_rules
[cnt
].from
->bytes
, nbytes
);
4044 while (nbytes
< charmap
->width_rules
[cnt
].to
->nbytes
4045 || memcmp (bytes
, charmap
->width_rules
[cnt
].to
->bytes
,
4048 /* Find the UCS value for `bytes'. */
4051 struct charseq
*seq
=
4052 charmap_find_symbol (charmap
, (char *) bytes
, nbytes
);
4055 wch
= ILLEGAL_CHAR_VALUE
;
4056 else if (seq
->ucs4
!= UNINITIALIZED_CHAR_VALUE
)
4059 wch
= repertoire_find_value (ctype
->repertoire
, seq
->name
,
4060 strlen (seq
->name
));
4062 if (wch
!= ILLEGAL_CHAR_VALUE
)
4064 /* Store the value. */
4065 uint32_t *class_bits
=
4066 find_idx (ctype
, &ctype
->class_collection
, NULL
,
4067 &ctype
->class_collection_act
, wch
);
4069 if (class_bits
!= NULL
&& (*class_bits
& BITw (tok_print
)))
4070 wcwidth_table_add (&t
, wch
,
4071 charmap
->width_rules
[cnt
].width
);
4074 /* "Increment" the bytes sequence. */
4076 while (inner
>= 0 && bytes
[inner
] == 0xff)
4081 /* We have to extend the byte sequence. */
4082 if (nbytes
>= charmap
->width_rules
[cnt
].to
->nbytes
)
4086 memset (&bytes
[1], 0, nbytes
);
4092 while (++inner
< nbytes
)
4099 /* Set the width of L'\0' to 0. */
4100 wcwidth_table_add (&t
, 0, 0);
4102 wcwidth_table_finalize (&t
);
4105 WITH_CUR_LOCALE (fprintf (stderr
, _("%s: table for width: %lu bytes\n"),
4106 "LC_CTYPE", (unsigned long int) t
.result_size
));
4108 ctype
->width
.iov_base
= t
.result
;
4109 ctype
->width
.iov_len
= t
.result_size
;
4112 /* Set MB_CUR_MAX. */
4113 ctype
->mb_cur_max
= charmap
->mb_cur_max
;
4115 /* Now determine the table for the transliteration information.
4117 XXX It is not yet clear to me whether it is worth implementing a
4118 complicated algorithm which uses a hash table to locate the entries.
4119 For now I'll use a simple array which can be searching using binary
4121 if (ctype
->translit_include
!= NULL
)
4122 /* Traverse the locales mentioned in the `include' statements in a
4123 depth-first way and fold in their transliteration information. */
4124 translit_flatten (ctype
, charmap
, &ctype
->translit
);
4126 if (ctype
->translit
!= NULL
)
4128 /* First count how many entries we have. This is the upper limit
4129 since some entries from the included files might be overwritten. */
4132 struct translit_t
*runp
= ctype
->translit
;
4133 struct translit_t
**sorted
;
4134 size_t from_len
, to_len
;
4136 while (runp
!= NULL
)
4142 /* Next we allocate an array large enough and fill in the values. */
4143 sorted
= (struct translit_t
**) alloca (number
4144 * sizeof (struct translit_t
**));
4145 runp
= ctype
->translit
;
4149 /* Search for the place where to insert this string.
4150 XXX Better use a real sorting algorithm later. */
4154 while (idx
< number
)
4156 int res
= wcscmp ((const wchar_t *) sorted
[idx
]->from
,
4157 (const wchar_t *) runp
->from
);
4172 memmove (&sorted
[idx
+ 1], &sorted
[idx
],
4173 (number
- idx
) * sizeof (struct translit_t
*));
4180 while (runp
!= NULL
);
4182 /* The next step is putting all the possible transliteration
4183 strings in one memory block so that we can write it out.
4184 We need several different blocks:
4185 - index to the from-string array
4187 - index to the to-string array
4190 from_len
= to_len
= 0;
4191 for (cnt
= 0; cnt
< number
; ++cnt
)
4193 struct translit_to_t
*srunp
;
4194 from_len
+= wcslen ((const wchar_t *) sorted
[cnt
]->from
) + 1;
4195 srunp
= sorted
[cnt
]->to
;
4196 while (srunp
!= NULL
)
4198 to_len
+= wcslen ((const wchar_t *) srunp
->str
) + 1;
4199 srunp
= srunp
->next
;
4201 /* Plus one for the extra NUL character marking the end of
4202 the list for the current entry. */
4206 /* We can allocate the arrays for the results. */
4207 ctype
->translit_from_idx
= xmalloc (number
* sizeof (uint32_t));
4208 ctype
->translit_from_tbl
= xmalloc (from_len
* sizeof (uint32_t));
4209 ctype
->translit_to_idx
= xmalloc (number
* sizeof (uint32_t));
4210 ctype
->translit_to_tbl
= xmalloc (to_len
* sizeof (uint32_t));
4214 for (cnt
= 0; cnt
< number
; ++cnt
)
4217 struct translit_to_t
*srunp
;
4219 ctype
->translit_from_idx
[cnt
] = from_len
;
4220 ctype
->translit_to_idx
[cnt
] = to_len
;
4222 len
= wcslen ((const wchar_t *) sorted
[cnt
]->from
) + 1;
4223 wmemcpy ((wchar_t *) &ctype
->translit_from_tbl
[from_len
],
4224 (const wchar_t *) sorted
[cnt
]->from
, len
);
4227 ctype
->translit_to_idx
[cnt
] = to_len
;
4228 srunp
= sorted
[cnt
]->to
;
4229 while (srunp
!= NULL
)
4231 len
= wcslen ((const wchar_t *) srunp
->str
) + 1;
4232 wmemcpy ((wchar_t *) &ctype
->translit_to_tbl
[to_len
],
4233 (const wchar_t *) srunp
->str
, len
);
4235 srunp
= srunp
->next
;
4237 ctype
->translit_to_tbl
[to_len
++] = L
'\0';
4240 /* Store the information about the length. */
4241 ctype
->translit_idx_size
= number
;
4242 ctype
->translit_from_tbl_size
= from_len
* sizeof (uint32_t);
4243 ctype
->translit_to_tbl_size
= to_len
* sizeof (uint32_t);
4247 /* Provide some dummy pointers since we have nothing to write out. */
4248 static uint32_t no_str
= { 0 };
4250 ctype
->translit_from_idx
= &no_str
;
4251 ctype
->translit_from_tbl
= &no_str
;
4252 ctype
->translit_to_tbl
= &no_str
;
4253 ctype
->translit_idx_size
= 0;
4254 ctype
->translit_from_tbl_size
= 0;
4255 ctype
->translit_to_tbl_size
= 0;