1 /* Copyright (C) 1995-2006, 2007, 2009 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
35 #include "localedef.h"
37 #include "localeinfo.h"
39 #include "linereader.h"
40 #include "locfile-token.h"
46 #ifdef PREDEFINED_CLASSES
47 /* These are the extra bits not in wctype.h since these are not preallocated
49 # define _ISwspecial1 (1 << 29)
50 # define _ISwspecial2 (1 << 30)
51 # define _ISwspecial3 (1 << 31)
55 /* The bit used for representing a special class. */
56 #define BITPOS(class) ((class) - tok_upper)
57 #define BIT(class) (_ISbit (BITPOS (class)))
58 #define BITw(class) (_ISwbit (BITPOS (class)))
60 #define ELEM(ctype, collection, idx, value) \
61 *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \
62 &ctype->collection##_act idx, value)
65 /* To be compatible with former implementations we for now restrict
66 the number of bits for character classes to 16. When compatibility
67 is not necessary anymore increase the number to 32. */
68 #define char_class_t uint16_t
69 #define char_class32_t uint32_t
72 /* Type to describe a transliteration action. We have a possibly
73 multiple character from-string and a set of multiple character
74 to-strings. All are 32bit values since this is what is used in
75 the gconv functions. */
80 struct translit_to_t
*next
;
90 struct translit_to_t
*to
;
92 struct translit_t
*next
;
95 struct translit_ignore_t
104 struct translit_ignore_t
*next
;
108 /* Type to describe a transliteration include statement. */
109 struct translit_include_t
111 const char *copy_locale
;
112 const char *copy_repertoire
;
114 struct translit_include_t
*next
;
118 /* Sparse table of uint32_t. */
119 #define TABLE idx_table
120 #define ELEMENT uint32_t
121 #define DEFAULT ((uint32_t) ~0)
126 /* The real definition of the struct for the LC_CTYPE locale. */
127 struct locale_ctype_t
130 size_t charnames_max
;
131 size_t charnames_act
;
132 /* An index lookup table, to speedup find_idx. */
133 struct idx_table charnames_idx
;
135 struct repertoire_t
*repertoire
;
137 /* We will allow up to 8 * sizeof (uint32_t) character classes. */
138 #define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
140 const char *classnames
[MAX_NR_CHARCLASS
];
141 uint32_t last_class_char
;
142 uint32_t class256_collection
[256];
143 uint32_t *class_collection
;
144 size_t class_collection_max
;
145 size_t class_collection_act
;
147 uint32_t class_offset
;
149 struct charseq
**mbdigits
;
156 struct charseq
*mboutdigits
[10];
157 uint32_t wcoutdigits
[10];
158 size_t outdigits_act
;
160 /* If the following number ever turns out to be too small simply
161 increase it. But I doubt it will. --drepper@gnu */
162 #define MAX_NR_CHARMAP 16
163 const char *mapnames
[MAX_NR_CHARMAP
];
164 uint32_t *map_collection
[MAX_NR_CHARMAP
];
165 uint32_t map256_collection
[2][256];
166 size_t map_collection_max
[MAX_NR_CHARMAP
];
167 size_t map_collection_act
[MAX_NR_CHARMAP
];
168 size_t map_collection_nr
;
170 int tomap_done
[MAX_NR_CHARMAP
];
173 /* Transliteration information. */
174 struct translit_include_t
*translit_include
;
175 struct translit_t
*translit
;
176 struct translit_ignore_t
*translit_ignore
;
177 uint32_t ntranslit_ignore
;
179 uint32_t *default_missing
;
180 const char *default_missing_file
;
181 size_t default_missing_lineno
;
183 uint32_t to_nonascii
;
184 uint32_t nonascii_case
;
186 /* The arrays for the binary representation. */
187 char_class_t
*ctype_b
;
188 char_class32_t
*ctype32_b
;
192 struct iovec
*class_3level
;
193 struct iovec
*map_3level
;
194 uint32_t *class_name_ptr
;
195 uint32_t *map_name_ptr
;
198 const char *codeset_name
;
199 uint32_t *translit_from_idx
;
200 uint32_t *translit_from_tbl
;
201 uint32_t *translit_to_idx
;
202 uint32_t *translit_to_tbl
;
203 uint32_t translit_idx_size
;
204 size_t translit_from_tbl_size
;
205 size_t translit_to_tbl_size
;
207 struct obstack mempool
;
211 /* Marker for an empty slot. This has the value 0xFFFFFFFF, regardless
212 whether 'int' is 16 bit, 32 bit, or 64 bit. */
213 #define EMPTY ((uint32_t) ~0)
216 #define obstack_chunk_alloc xmalloc
217 #define obstack_chunk_free free
220 /* Prototypes for local functions. */
221 static void ctype_startup (struct linereader
*lr
, struct localedef_t
*locale
,
222 const struct charmap_t
*charmap
,
223 struct localedef_t
*copy_locale
,
225 static void ctype_class_new (struct linereader
*lr
,
226 struct locale_ctype_t
*ctype
, const char *name
);
227 static void ctype_map_new (struct linereader
*lr
,
228 struct locale_ctype_t
*ctype
,
229 const char *name
, const struct charmap_t
*charmap
);
230 static uint32_t *find_idx (struct locale_ctype_t
*ctype
, uint32_t **table
,
231 size_t *max
, size_t *act
, unsigned int idx
);
232 static void set_class_defaults (struct locale_ctype_t
*ctype
,
233 const struct charmap_t
*charmap
,
234 struct repertoire_t
*repertoire
);
235 static void allocate_arrays (struct locale_ctype_t
*ctype
,
236 const struct charmap_t
*charmap
,
237 struct repertoire_t
*repertoire
);
240 static const char *longnames
[] =
242 "zero", "one", "two", "three", "four",
243 "five", "six", "seven", "eight", "nine"
245 static const char *uninames
[] =
247 "U00000030", "U00000031", "U00000032", "U00000033", "U00000034",
248 "U00000035", "U00000036", "U00000037", "U00000038", "U00000039"
250 static const unsigned char digits
[] = "0123456789";
254 ctype_startup (struct linereader
*lr
, struct localedef_t
*locale
,
255 const struct charmap_t
*charmap
,
256 struct localedef_t
*copy_locale
, int ignore_content
)
259 struct locale_ctype_t
*ctype
;
261 if (!ignore_content
&& locale
->categories
[LC_CTYPE
].ctype
== NULL
)
263 if (copy_locale
== NULL
)
265 /* Allocate the needed room. */
266 locale
->categories
[LC_CTYPE
].ctype
= ctype
=
267 (struct locale_ctype_t
*) xcalloc (1,
268 sizeof (struct locale_ctype_t
));
270 /* We have seen no names yet. */
271 ctype
->charnames_max
= charmap
->mb_cur_max
== 1 ? 256 : 512;
273 (unsigned int *) xmalloc (ctype
->charnames_max
274 * sizeof (unsigned int));
275 for (cnt
= 0; cnt
< 256; ++cnt
)
276 ctype
->charnames
[cnt
] = cnt
;
277 ctype
->charnames_act
= 256;
278 idx_table_init (&ctype
->charnames_idx
);
280 /* Fill character class information. */
281 ctype
->last_class_char
= ILLEGAL_CHAR_VALUE
;
282 /* The order of the following instructions determines the bit
284 ctype_class_new (lr
, ctype
, "upper");
285 ctype_class_new (lr
, ctype
, "lower");
286 ctype_class_new (lr
, ctype
, "alpha");
287 ctype_class_new (lr
, ctype
, "digit");
288 ctype_class_new (lr
, ctype
, "xdigit");
289 ctype_class_new (lr
, ctype
, "space");
290 ctype_class_new (lr
, ctype
, "print");
291 ctype_class_new (lr
, ctype
, "graph");
292 ctype_class_new (lr
, ctype
, "blank");
293 ctype_class_new (lr
, ctype
, "cntrl");
294 ctype_class_new (lr
, ctype
, "punct");
295 ctype_class_new (lr
, ctype
, "alnum");
296 #ifdef PREDEFINED_CLASSES
297 /* The following are extensions from ISO 14652. */
298 ctype_class_new (lr
, ctype
, "left_to_right");
299 ctype_class_new (lr
, ctype
, "right_to_left");
300 ctype_class_new (lr
, ctype
, "num_terminator");
301 ctype_class_new (lr
, ctype
, "num_separator");
302 ctype_class_new (lr
, ctype
, "segment_separator");
303 ctype_class_new (lr
, ctype
, "block_separator");
304 ctype_class_new (lr
, ctype
, "direction_control");
305 ctype_class_new (lr
, ctype
, "sym_swap_layout");
306 ctype_class_new (lr
, ctype
, "char_shape_selector");
307 ctype_class_new (lr
, ctype
, "num_shape_selector");
308 ctype_class_new (lr
, ctype
, "non_spacing");
309 ctype_class_new (lr
, ctype
, "non_spacing_level3");
310 ctype_class_new (lr
, ctype
, "normal_connect");
311 ctype_class_new (lr
, ctype
, "r_connect");
312 ctype_class_new (lr
, ctype
, "no_connect");
313 ctype_class_new (lr
, ctype
, "no_connect-space");
314 ctype_class_new (lr
, ctype
, "vowel_connect");
317 ctype
->class_collection_max
= charmap
->mb_cur_max
== 1 ? 256 : 512;
318 ctype
->class_collection
319 = (uint32_t *) xcalloc (sizeof (unsigned long int),
320 ctype
->class_collection_max
);
321 ctype
->class_collection_act
= 256;
323 /* Fill character map information. */
324 ctype
->last_map_idx
= MAX_NR_CHARMAP
;
325 ctype_map_new (lr
, ctype
, "toupper", charmap
);
326 ctype_map_new (lr
, ctype
, "tolower", charmap
);
327 #ifdef PREDEFINED_CLASSES
328 ctype_map_new (lr
, ctype
, "tosymmetric", charmap
);
331 /* Fill first 256 entries in `toXXX' arrays. */
332 for (cnt
= 0; cnt
< 256; ++cnt
)
334 ctype
->map_collection
[0][cnt
] = cnt
;
335 ctype
->map_collection
[1][cnt
] = cnt
;
336 #ifdef PREDEFINED_CLASSES
337 ctype
->map_collection
[2][cnt
] = cnt
;
339 ctype
->map256_collection
[0][cnt
] = cnt
;
340 ctype
->map256_collection
[1][cnt
] = cnt
;
343 if (enc_not_ascii_compatible
)
344 ctype
->to_nonascii
= 1;
346 obstack_init (&ctype
->mempool
);
349 ctype
= locale
->categories
[LC_CTYPE
].ctype
=
350 copy_locale
->categories
[LC_CTYPE
].ctype
;
356 ctype_finish (struct localedef_t
*locale
, const struct charmap_t
*charmap
)
358 /* See POSIX.2, table 2-6 for the meaning of the following table. */
363 const char allow
[NCLASS
];
365 valid_table
[NCLASS
] =
367 /* The order is important. See token.h for more information.
368 M = Always, D = Default, - = Permitted, X = Mutually exclusive */
369 { "upper", "--MX-XDDXXX-" },
370 { "lower", "--MX-XDDXXX-" },
371 { "alpha", "---X-XDDXXX-" },
372 { "digit", "XXX--XDDXXX-" },
373 { "xdigit", "-----XDDXXX-" },
374 { "space", "XXXXX------X" },
375 { "print", "---------X--" },
376 { "graph", "---------X--" },
377 { "blank", "XXXXXM-----X" },
378 { "cntrl", "XXXXX-XX--XX" },
379 { "punct", "XXXXX-DD-X-X" },
380 { "alnum", "-----XDDXXX-" }
384 uint32_t space_value
;
385 struct charseq
*space_seq
;
386 struct locale_ctype_t
*ctype
= locale
->categories
[LC_CTYPE
].ctype
;
393 /* Now resolve copying and also handle completely missing definitions. */
396 const char *repertoire_name
;
398 /* First see whether we were supposed to copy. If yes, find the
399 actual definition. */
400 if (locale
->copy_name
[LC_CTYPE
] != NULL
)
402 /* Find the copying locale. This has to happen transitively since
403 the locale we are copying from might also copying another one. */
404 struct localedef_t
*from
= locale
;
407 from
= find_locale (LC_CTYPE
, from
->copy_name
[LC_CTYPE
],
408 from
->repertoire_name
, charmap
);
409 while (from
->categories
[LC_CTYPE
].ctype
== NULL
410 && from
->copy_name
[LC_CTYPE
] != NULL
);
412 ctype
= locale
->categories
[LC_CTYPE
].ctype
413 = from
->categories
[LC_CTYPE
].ctype
;
416 /* If there is still no definition issue an warning and create an
421 WITH_CUR_LOCALE (error (0, 0, _("\
422 No definition for %s category found"), "LC_CTYPE"));
423 ctype_startup (NULL
, locale
, charmap
, NULL
, 0);
424 ctype
= locale
->categories
[LC_CTYPE
].ctype
;
427 /* Get the repertoire we have to use. */
428 repertoire_name
= locale
->repertoire_name
?: repertoire_global
;
429 if (repertoire_name
!= NULL
)
430 ctype
->repertoire
= repertoire_read (repertoire_name
);
433 /* We need the name of the currently used 8-bit character set to
434 make correct conversion between this 8-bit representation and the
435 ISO 10646 character set used internally for wide characters. */
436 ctype
->codeset_name
= charmap
->code_set_name
;
437 if (ctype
->codeset_name
== NULL
)
440 WITH_CUR_LOCALE (error (0, 0, _("\
441 No character set name specified in charmap")));
442 ctype
->codeset_name
= "//UNKNOWN//";
445 /* Set default value for classes not specified. */
446 set_class_defaults (ctype
, charmap
, ctype
->repertoire
);
448 /* Check according to table. */
449 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
451 uint32_t tmp
= ctype
->class_collection
[cnt
];
455 for (cls1
= 0; cls1
< NCLASS
; ++cls1
)
456 if ((tmp
& _ISwbit (cls1
)) != 0)
457 for (cls2
= 0; cls2
< NCLASS
; ++cls2
)
458 if (valid_table
[cls1
].allow
[cls2
] != '-')
460 int eq
= (tmp
& _ISwbit (cls2
)) != 0;
461 switch (valid_table
[cls1
].allow
[cls2
])
466 uint32_t value
= ctype
->charnames
[cnt
];
469 WITH_CUR_LOCALE (error (0, 0, _("\
470 character L'\\u%0*x' in class `%s' must be in class `%s'"),
471 value
> 0xffff ? 8 : 4,
473 valid_table
[cls1
].name
,
474 valid_table
[cls2
].name
));
481 uint32_t value
= ctype
->charnames
[cnt
];
484 WITH_CUR_LOCALE (error (0, 0, _("\
485 character L'\\u%0*x' in class `%s' must not be in class `%s'"),
486 value
> 0xffff ? 8 : 4,
488 valid_table
[cls1
].name
,
489 valid_table
[cls2
].name
));
494 ctype
->class_collection
[cnt
] |= _ISwbit (cls2
);
498 WITH_CUR_LOCALE (error (5, 0, _("\
499 internal error in %s, line %u"), __FUNCTION__
, __LINE__
));
505 for (cnt
= 0; cnt
< 256; ++cnt
)
507 uint32_t tmp
= ctype
->class256_collection
[cnt
];
511 for (cls1
= 0; cls1
< NCLASS
; ++cls1
)
512 if ((tmp
& _ISbit (cls1
)) != 0)
513 for (cls2
= 0; cls2
< NCLASS
; ++cls2
)
514 if (valid_table
[cls1
].allow
[cls2
] != '-')
516 int eq
= (tmp
& _ISbit (cls2
)) != 0;
517 switch (valid_table
[cls1
].allow
[cls2
])
524 snprintf (buf
, sizeof buf
, "\\%Zo", cnt
);
527 WITH_CUR_LOCALE (error (0, 0, _("\
528 character '%s' in class `%s' must be in class `%s'"),
530 valid_table
[cls1
].name
,
531 valid_table
[cls2
].name
));
540 snprintf (buf
, sizeof buf
, "\\%Zo", cnt
);
543 WITH_CUR_LOCALE (error (0, 0, _("\
544 character '%s' in class `%s' must not be in class `%s'"),
546 valid_table
[cls1
].name
,
547 valid_table
[cls2
].name
));
552 ctype
->class256_collection
[cnt
] |= _ISbit (cls2
);
556 WITH_CUR_LOCALE (error (5, 0, _("\
557 internal error in %s, line %u"), __FUNCTION__
, __LINE__
));
563 /* ... and now test <SP> as a special case. */
565 if (((cnt
= BITPOS (tok_space
),
566 (ELEM (ctype
, class_collection
, , space_value
)
567 & BITw (tok_space
)) == 0)
568 || (cnt
= BITPOS (tok_blank
),
569 (ELEM (ctype
, class_collection
, , space_value
)
570 & BITw (tok_blank
)) == 0)))
573 WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"),
574 valid_table
[cnt
].name
));
576 else if (((cnt
= BITPOS (tok_punct
),
577 (ELEM (ctype
, class_collection
, , space_value
)
578 & BITw (tok_punct
)) != 0)
579 || (cnt
= BITPOS (tok_graph
),
580 (ELEM (ctype
, class_collection
, , space_value
)
585 WITH_CUR_LOCALE (error (0, 0, _("\
586 <SP> character must not be in class `%s'"),
587 valid_table
[cnt
].name
));
590 ELEM (ctype
, class_collection
, , space_value
) |= BITw (tok_print
);
592 space_seq
= charmap_find_value (charmap
, "SP", 2);
593 if (space_seq
== NULL
)
594 space_seq
= charmap_find_value (charmap
, "space", 5);
595 if (space_seq
== NULL
)
596 space_seq
= charmap_find_value (charmap
, "U00000020", 9);
597 if (space_seq
== NULL
|| space_seq
->nbytes
!= 1)
600 WITH_CUR_LOCALE (error (0, 0, _("\
601 character <SP> not defined in character map")));
603 else if (((cnt
= BITPOS (tok_space
),
604 (ctype
->class256_collection
[space_seq
->bytes
[0]]
605 & BIT (tok_space
)) == 0)
606 || (cnt
= BITPOS (tok_blank
),
607 (ctype
->class256_collection
[space_seq
->bytes
[0]]
608 & BIT (tok_blank
)) == 0)))
611 WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"),
612 valid_table
[cnt
].name
));
614 else if (((cnt
= BITPOS (tok_punct
),
615 (ctype
->class256_collection
[space_seq
->bytes
[0]]
616 & BIT (tok_punct
)) != 0)
617 || (cnt
= BITPOS (tok_graph
),
618 (ctype
->class256_collection
[space_seq
->bytes
[0]]
619 & BIT (tok_graph
)) != 0)))
622 WITH_CUR_LOCALE (error (0, 0, _("\
623 <SP> character must not be in class `%s'"),
624 valid_table
[cnt
].name
));
627 ctype
->class256_collection
[space_seq
->bytes
[0]] |= BIT (tok_print
);
629 /* Check whether all single-byte characters make to their upper/lowercase
630 equivalent according to the ASCII rules. */
631 for (cnt
= 'A'; cnt
<= 'Z'; ++cnt
)
633 uint32_t uppval
= ctype
->map256_collection
[0][cnt
];
634 uint32_t lowval
= ctype
->map256_collection
[1][cnt
];
635 uint32_t lowuppval
= ctype
->map256_collection
[0][lowval
];
636 uint32_t lowlowval
= ctype
->map256_collection
[1][lowval
];
639 || lowval
!= cnt
+ 0x20
641 || lowlowval
!= cnt
+ 0x20)
642 ctype
->nonascii_case
= 1;
644 for (cnt
= 0; cnt
< 256; ++cnt
)
645 if (cnt
< 'A' || (cnt
> 'Z' && cnt
< 'a') || cnt
> 'z')
646 if (ctype
->map256_collection
[0][cnt
] != cnt
647 || ctype
->map256_collection
[1][cnt
] != cnt
)
648 ctype
->nonascii_case
= 1;
650 /* Now that the tests are done make sure the name array contains all
651 characters which are handled in the WIDTH section of the
652 character set definition file. */
653 if (charmap
->width_rules
!= NULL
)
654 for (cnt
= 0; cnt
< charmap
->nwidth_rules
; ++cnt
)
656 unsigned char bytes
[charmap
->mb_cur_max
];
657 int nbytes
= charmap
->width_rules
[cnt
].from
->nbytes
;
659 /* We have the range of character for which the width is
660 specified described using byte sequences of the multibyte
661 charset. We have to convert this to UCS4 now. And we
662 cannot simply convert the beginning and the end of the
663 sequence, we have to iterate over the byte sequence and
664 convert it for every single character. */
665 memcpy (bytes
, charmap
->width_rules
[cnt
].from
->bytes
, nbytes
);
667 while (nbytes
< charmap
->width_rules
[cnt
].to
->nbytes
668 || memcmp (bytes
, charmap
->width_rules
[cnt
].to
->bytes
,
671 /* Find the UCS value for `bytes'. */
675 = charmap_find_symbol (charmap
, (char *) bytes
, nbytes
);
678 wch
= ILLEGAL_CHAR_VALUE
;
679 else if (seq
->ucs4
!= UNINITIALIZED_CHAR_VALUE
)
682 wch
= repertoire_find_value (ctype
->repertoire
, seq
->name
,
685 if (wch
!= ILLEGAL_CHAR_VALUE
)
686 /* We are only interested in the side-effects of the
687 `find_idx' call. It will add appropriate entries in
688 the name array if this is necessary. */
689 (void) find_idx (ctype
, NULL
, NULL
, NULL
, wch
);
691 /* "Increment" the bytes sequence. */
693 while (inner
>= 0 && bytes
[inner
] == 0xff)
698 /* We have to extend the byte sequence. */
699 if (nbytes
>= charmap
->width_rules
[cnt
].to
->nbytes
)
703 memset (&bytes
[1], 0, nbytes
);
709 while (++inner
< nbytes
)
715 /* Now set all the other characters of the character set to the
718 while (iterate_table (&charmap
->char_table
, &curs
, &key
, &len
, &vdata
) == 0)
720 struct charseq
*data
= (struct charseq
*) vdata
;
722 if (data
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
723 data
->ucs4
= repertoire_find_value (ctype
->repertoire
,
726 if (data
->ucs4
!= ILLEGAL_CHAR_VALUE
)
727 (void) find_idx (ctype
, NULL
, NULL
, NULL
, data
->ucs4
);
730 /* There must be a multiple of 10 digits. */
731 if (ctype
->mbdigits_act
% 10 != 0)
733 assert (ctype
->mbdigits_act
== ctype
->wcdigits_act
);
734 ctype
->wcdigits_act
-= ctype
->mbdigits_act
% 10;
735 ctype
->mbdigits_act
-= ctype
->mbdigits_act
% 10;
736 WITH_CUR_LOCALE (error (0, 0, _("\
737 `digit' category has not entries in groups of ten")));
740 /* Check the input digits. There must be a multiple of ten available.
741 In each group it could be that one or the other character is missing.
742 In this case the whole group must be removed. */
744 while (cnt
< ctype
->mbdigits_act
)
747 for (inner
= 0; inner
< 10; ++inner
)
748 if (ctype
->mbdigits
[cnt
+ inner
] == NULL
)
755 /* Remove the group. */
756 memmove (&ctype
->mbdigits
[cnt
], &ctype
->mbdigits
[cnt
+ 10],
757 ((ctype
->wcdigits_act
- cnt
- 10)
758 * sizeof (ctype
->mbdigits
[0])));
759 ctype
->mbdigits_act
-= 10;
763 /* If no input digits are given use the default. */
764 if (ctype
->mbdigits_act
== 0)
766 if (ctype
->mbdigits_max
== 0)
768 ctype
->mbdigits
= obstack_alloc (&((struct charmap_t
*) charmap
)->mem_pool
,
769 10 * sizeof (struct charseq
*));
770 ctype
->mbdigits_max
= 10;
773 for (cnt
= 0; cnt
< 10; ++cnt
)
775 ctype
->mbdigits
[cnt
] = charmap_find_symbol (charmap
,
776 (char *) digits
+ cnt
, 1);
777 if (ctype
->mbdigits
[cnt
] == NULL
)
779 ctype
->mbdigits
[cnt
] = charmap_find_symbol (charmap
,
781 strlen (longnames
[cnt
]));
782 if (ctype
->mbdigits
[cnt
] == NULL
)
784 /* Hum, this ain't good. */
785 WITH_CUR_LOCALE (error (0, 0, _("\
786 no input digits defined and none of the standard names in the charmap")));
788 ctype
->mbdigits
[cnt
] = obstack_alloc (&((struct charmap_t
*) charmap
)->mem_pool
,
789 sizeof (struct charseq
) + 1);
791 /* This is better than nothing. */
792 ctype
->mbdigits
[cnt
]->bytes
[0] = digits
[cnt
];
793 ctype
->mbdigits
[cnt
]->nbytes
= 1;
798 ctype
->mbdigits_act
= 10;
801 /* Check the wide character input digits. There must be a multiple
802 of ten available. In each group it could be that one or the other
803 character is missing. In this case the whole group must be
806 while (cnt
< ctype
->wcdigits_act
)
809 for (inner
= 0; inner
< 10; ++inner
)
810 if (ctype
->wcdigits
[cnt
+ inner
] == ILLEGAL_CHAR_VALUE
)
817 /* Remove the group. */
818 memmove (&ctype
->wcdigits
[cnt
], &ctype
->wcdigits
[cnt
+ 10],
819 ((ctype
->wcdigits_act
- cnt
- 10)
820 * sizeof (ctype
->wcdigits
[0])));
821 ctype
->wcdigits_act
-= 10;
825 /* If no input digits are given use the default. */
826 if (ctype
->wcdigits_act
== 0)
828 if (ctype
->wcdigits_max
== 0)
830 ctype
->wcdigits
= obstack_alloc (&((struct charmap_t
*) charmap
)->mem_pool
,
831 10 * sizeof (uint32_t));
832 ctype
->wcdigits_max
= 10;
835 for (cnt
= 0; cnt
< 10; ++cnt
)
836 ctype
->wcdigits
[cnt
] = L
'0' + cnt
;
838 ctype
->mbdigits_act
= 10;
841 /* Check the outdigits. */
843 for (cnt
= 0; cnt
< 10; ++cnt
)
844 if (ctype
->mboutdigits
[cnt
] == NULL
)
846 static struct charseq replace
[2];
850 WITH_CUR_LOCALE (error (0, 0, _("\
851 not all characters used in `outdigit' are available in the charmap")));
855 replace
[0].nbytes
= 1;
856 replace
[0].bytes
[0] = '?';
857 replace
[0].bytes
[1] = '\0';
858 ctype
->mboutdigits
[cnt
] = &replace
[0];
862 for (cnt
= 0; cnt
< 10; ++cnt
)
863 if (ctype
->wcoutdigits
[cnt
] == 0)
867 WITH_CUR_LOCALE (error (0, 0, _("\
868 not all characters used in `outdigit' are available in the repertoire")));
872 ctype
->wcoutdigits
[cnt
] = L
'?';
875 /* Sort the entries in the translit_ignore list. */
876 if (ctype
->translit_ignore
!= NULL
)
878 struct translit_ignore_t
*firstp
= ctype
->translit_ignore
;
879 struct translit_ignore_t
*runp
;
881 ctype
->ntranslit_ignore
= 1;
883 for (runp
= firstp
->next
; runp
!= NULL
; runp
= runp
->next
)
885 struct translit_ignore_t
*lastp
= NULL
;
886 struct translit_ignore_t
*cmpp
;
888 ++ctype
->ntranslit_ignore
;
890 for (cmpp
= firstp
; cmpp
!= NULL
; lastp
= cmpp
, cmpp
= cmpp
->next
)
891 if (runp
->from
< cmpp
->from
)
899 ctype
->translit_ignore
= firstp
;
905 ctype_output (struct localedef_t
*locale
, const struct charmap_t
*charmap
,
906 const char *output_path
)
908 static const char nulbytes
[4] = { 0, 0, 0, 0 };
909 struct locale_ctype_t
*ctype
= locale
->categories
[LC_CTYPE
].ctype
;
910 const size_t nelems
= (_NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1
)
911 + ctype
->nr_charclass
+ ctype
->map_collection_nr
);
912 struct iovec
*iov
= alloca (sizeof *iov
913 * (2 + nelems
+ 2 * ctype
->nr_charclass
914 + ctype
->map_collection_nr
+ 4));
915 struct locale_file data
;
916 uint32_t *idx
= alloca (sizeof *idx
* (nelems
+ 1));
917 uint32_t default_missing_len
;
918 size_t elem
, cnt
, offset
, total
;
921 /* Now prepare the output: Find the sizes of the table we can use. */
922 allocate_arrays (ctype
, charmap
, ctype
->repertoire
);
924 data
.magic
= LIMAGIC (LC_CTYPE
);
926 iov
[0].iov_base
= (void *) &data
;
927 iov
[0].iov_len
= sizeof (data
);
929 iov
[1].iov_base
= (void *) idx
;
930 iov
[1].iov_len
= nelems
* sizeof (uint32_t);
932 idx
[0] = iov
[0].iov_len
+ iov
[1].iov_len
;
935 for (elem
= 0; elem
< nelems
; ++elem
)
937 if (elem
< _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1
))
940 #define CTYPE_EMPTY(name) \
942 iov[2 + elem + offset].iov_base = NULL; \
943 iov[2 + elem + offset].iov_len = 0; \
944 idx[elem + 1] = idx[elem]; \
947 CTYPE_EMPTY(_NL_CTYPE_GAP1
);
948 CTYPE_EMPTY(_NL_CTYPE_GAP2
);
949 CTYPE_EMPTY(_NL_CTYPE_GAP3
);
950 CTYPE_EMPTY(_NL_CTYPE_GAP4
);
951 CTYPE_EMPTY(_NL_CTYPE_GAP5
);
952 CTYPE_EMPTY(_NL_CTYPE_GAP6
);
954 #define CTYPE_DATA(name, base, len) \
955 case _NL_ITEM_INDEX (name): \
956 iov[2 + elem + offset].iov_base = (base); \
957 iov[2 + elem + offset].iov_len = (len); \
958 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; \
961 CTYPE_DATA (_NL_CTYPE_CLASS
,
963 (256 + 128) * sizeof (char_class_t
));
965 CTYPE_DATA (_NL_CTYPE_TOUPPER
,
967 (256 + 128) * sizeof (uint32_t));
968 CTYPE_DATA (_NL_CTYPE_TOLOWER
,
970 (256 + 128) * sizeof (uint32_t));
972 CTYPE_DATA (_NL_CTYPE_TOUPPER32
,
974 256 * sizeof (uint32_t));
975 CTYPE_DATA (_NL_CTYPE_TOLOWER32
,
977 256 * sizeof (uint32_t));
979 CTYPE_DATA (_NL_CTYPE_CLASS32
,
981 256 * sizeof (char_class32_t
));
983 CTYPE_DATA (_NL_CTYPE_CLASS_OFFSET
,
984 &ctype
->class_offset
, sizeof (uint32_t));
986 CTYPE_DATA (_NL_CTYPE_MAP_OFFSET
,
987 &ctype
->map_offset
, sizeof (uint32_t));
989 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TAB_SIZE
,
990 &ctype
->translit_idx_size
, sizeof (uint32_t));
992 CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_IDX
,
993 ctype
->translit_from_idx
,
994 ctype
->translit_idx_size
* sizeof (uint32_t));
996 CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_TBL
,
997 ctype
->translit_from_tbl
,
998 ctype
->translit_from_tbl_size
);
1000 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_IDX
,
1001 ctype
->translit_to_idx
,
1002 ctype
->translit_idx_size
* sizeof (uint32_t));
1004 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_TBL
,
1005 ctype
->translit_to_tbl
, ctype
->translit_to_tbl_size
);
1007 case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES
):
1008 /* The class name array. */
1010 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
, ++offset
)
1012 iov
[2 + elem
+ offset
].iov_base
1013 = (void *) ctype
->classnames
[cnt
];
1014 iov
[2 + elem
+ offset
].iov_len
1015 = strlen (ctype
->classnames
[cnt
]) + 1;
1016 total
+= iov
[2 + elem
+ offset
].iov_len
;
1018 iov
[2 + elem
+ offset
].iov_base
= (void *) nulbytes
;
1019 iov
[2 + elem
+ offset
].iov_len
= 4 - (total
% 4);
1020 total
+= 4 - (total
% 4);
1022 idx
[elem
+ 1] = idx
[elem
] + total
;
1025 case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES
):
1026 /* The class name array. */
1028 for (cnt
= 0; cnt
< ctype
->map_collection_nr
; ++cnt
, ++offset
)
1030 iov
[2 + elem
+ offset
].iov_base
1031 = (void *) ctype
->mapnames
[cnt
];
1032 iov
[2 + elem
+ offset
].iov_len
1033 = strlen (ctype
->mapnames
[cnt
]) + 1;
1034 total
+= iov
[2 + elem
+ offset
].iov_len
;
1036 iov
[2 + elem
+ offset
].iov_base
= (void *) nulbytes
;
1037 iov
[2 + elem
+ offset
].iov_len
= 4 - (total
% 4);
1038 total
+= 4 - (total
% 4);
1040 idx
[elem
+ 1] = idx
[elem
] + total
;
1043 CTYPE_DATA (_NL_CTYPE_WIDTH
,
1044 ctype
->width
.iov_base
,
1045 ctype
->width
.iov_len
);
1047 CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX
,
1048 &ctype
->mb_cur_max
, sizeof (uint32_t));
1050 case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME
):
1051 total
= strlen (ctype
->codeset_name
) + 1;
1053 iov
[2 + elem
+ offset
].iov_base
= (char *) ctype
->codeset_name
;
1056 iov
[2 + elem
+ offset
].iov_base
= alloca ((total
+ 3) & ~3);
1057 memset (mempcpy (iov
[2 + elem
+ offset
].iov_base
,
1058 ctype
->codeset_name
, total
),
1059 '\0', 4 - (total
& 3));
1060 total
= (total
+ 3) & ~3;
1062 iov
[2 + elem
+ offset
].iov_len
= total
;
1063 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1067 CTYPE_DATA (_NL_CTYPE_MAP_TO_NONASCII
,
1068 &ctype
->to_nonascii
, sizeof (uint32_t));
1070 CTYPE_DATA (_NL_CTYPE_NONASCII_CASE
,
1071 &ctype
->nonascii_case
, sizeof (uint32_t));
1073 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN
):
1074 iov
[2 + elem
+ offset
].iov_base
= alloca (sizeof (uint32_t));
1075 iov
[2 + elem
+ offset
].iov_len
= sizeof (uint32_t);
1076 *(uint32_t *) iov
[2 + elem
+ offset
].iov_base
=
1077 ctype
->mbdigits_act
/ 10;
1078 idx
[elem
+ 1] = idx
[elem
] + sizeof (uint32_t);
1081 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN
):
1082 /* Align entries. */
1083 iov
[2 + elem
+ offset
].iov_base
= (void *) nulbytes
;
1084 iov
[2 + elem
+ offset
].iov_len
= (4 - idx
[elem
] % 4) % 4;
1085 idx
[elem
] += iov
[2 + elem
+ offset
].iov_len
;
1088 iov
[2 + elem
+ offset
].iov_base
= alloca (sizeof (uint32_t));
1089 iov
[2 + elem
+ offset
].iov_len
= sizeof (uint32_t);
1090 *(uint32_t *) iov
[2 + elem
+ offset
].iov_base
=
1091 ctype
->wcdigits_act
/ 10;
1092 idx
[elem
+ 1] = idx
[elem
] + sizeof (uint32_t);
1095 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB
) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB
):
1096 /* Compute the length of all possible characters. For INDIGITS
1097 there might be more than one. We simply concatenate all of
1098 them with a NUL byte following. The NUL byte wouldn't be
1099 necessary but it makes it easier for the user. */
1102 for (cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB
);
1103 cnt
< ctype
->mbdigits_act
; cnt
+= 10)
1104 total
+= ctype
->mbdigits
[cnt
]->nbytes
+ 1;
1105 iov
[2 + elem
+ offset
].iov_base
= (char *) alloca (total
);
1106 iov
[2 + elem
+ offset
].iov_len
= total
;
1108 cp
= iov
[2 + elem
+ offset
].iov_base
;
1109 for (cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB
);
1110 cnt
< ctype
->mbdigits_act
; cnt
+= 10)
1112 cp
= mempcpy (cp
, ctype
->mbdigits
[cnt
]->bytes
,
1113 ctype
->mbdigits
[cnt
]->nbytes
);
1116 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1119 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB
) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB
):
1120 /* Compute the length of all possible characters. For INDIGITS
1121 there might be more than one. We simply concatenate all of
1122 them with a NUL byte following. The NUL byte wouldn't be
1123 necessary but it makes it easier for the user. */
1124 cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB
);
1125 total
= ctype
->mboutdigits
[cnt
]->nbytes
+ 1;
1126 iov
[2 + elem
+ offset
].iov_base
= (char *) alloca (total
);
1127 iov
[2 + elem
+ offset
].iov_len
= total
;
1129 *(char *) mempcpy (iov
[2 + elem
+ offset
].iov_base
,
1130 ctype
->mboutdigits
[cnt
]->bytes
,
1131 ctype
->mboutdigits
[cnt
]->nbytes
) = '\0';
1132 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1135 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC
) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC
):
1136 total
= ctype
->wcdigits_act
/ 10;
1138 iov
[2 + elem
+ offset
].iov_base
=
1139 (uint32_t *) alloca (total
* sizeof (uint32_t));
1140 iov
[2 + elem
+ offset
].iov_len
= total
* sizeof (uint32_t);
1142 for (cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC
);
1143 cnt
< ctype
->wcdigits_act
; cnt
+= 10)
1144 ((uint32_t *) iov
[2 + elem
+ offset
].iov_base
)[cnt
/ 10]
1145 = ctype
->wcdigits
[cnt
];
1146 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1149 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC
):
1150 /* Align entries. */
1151 iov
[2 + elem
+ offset
].iov_base
= (void *) nulbytes
;
1152 iov
[2 + elem
+ offset
].iov_len
= (4 - idx
[elem
] % 4) % 4;
1153 idx
[elem
] += iov
[2 + elem
+ offset
].iov_len
;
1157 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT1_WC
) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC
):
1158 cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC
);
1159 iov
[2 + elem
+ offset
].iov_base
= &ctype
->wcoutdigits
[cnt
];
1160 iov
[2 + elem
+ offset
].iov_len
= sizeof (uint32_t);
1161 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1164 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN
):
1165 /* Align entries. */
1166 iov
[2 + elem
+ offset
].iov_base
= (void *) nulbytes
;
1167 iov
[2 + elem
+ offset
].iov_len
= (4 - idx
[elem
] % 4) % 4;
1168 idx
[elem
] += iov
[2 + elem
+ offset
].iov_len
;
1171 default_missing_len
= (ctype
->default_missing
1172 ? wcslen ((wchar_t *)ctype
->default_missing
)
1174 iov
[2 + elem
+ offset
].iov_base
= &default_missing_len
;
1175 iov
[2 + elem
+ offset
].iov_len
= sizeof (uint32_t);
1176 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1179 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING
):
1180 iov
[2 + elem
+ offset
].iov_base
=
1181 ctype
->default_missing
?: (uint32_t *) L
"";
1182 iov
[2 + elem
+ offset
].iov_len
=
1183 wcslen (iov
[2 + elem
+ offset
].iov_base
) * sizeof (uint32_t);
1184 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1187 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE_LEN
):
1188 /* Align entries. */
1189 iov
[2 + elem
+ offset
].iov_base
= (void *) nulbytes
;
1190 iov
[2 + elem
+ offset
].iov_len
= (4 - idx
[elem
] % 4) % 4;
1191 idx
[elem
] += iov
[2 + elem
+ offset
].iov_len
;
1194 iov
[2 + elem
+ offset
].iov_base
= &ctype
->ntranslit_ignore
;
1195 iov
[2 + elem
+ offset
].iov_len
= sizeof (uint32_t);
1196 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1199 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE
):
1201 uint32_t *ranges
= (uint32_t *) alloca (ctype
->ntranslit_ignore
1202 * 3 * sizeof (uint32_t));
1203 struct translit_ignore_t
*runp
;
1205 iov
[2 + elem
+ offset
].iov_base
= ranges
;
1206 iov
[2 + elem
+ offset
].iov_len
= (ctype
->ntranslit_ignore
1207 * 3 * sizeof (uint32_t));
1209 for (runp
= ctype
->translit_ignore
; runp
!= NULL
;
1212 *ranges
++ = runp
->from
;
1213 *ranges
++ = runp
->to
;
1214 *ranges
++ = runp
->step
;
1217 /* Remove the following line in case a new entry is added
1218 after _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN. */
1220 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1224 assert (! "unknown CTYPE element");
1228 /* Handle extra maps. */
1229 size_t nr
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1
);
1230 if (nr
< ctype
->nr_charclass
)
1232 iov
[2 + elem
+ offset
].iov_base
= ctype
->class_b
[nr
];
1233 iov
[2 + elem
+ offset
].iov_len
= 256 / 32 * sizeof (uint32_t);
1234 idx
[elem
] += iov
[2 + elem
+ offset
].iov_len
;
1237 iov
[2 + elem
+ offset
] = ctype
->class_3level
[nr
];
1241 nr
-= ctype
->nr_charclass
;
1242 assert (nr
< ctype
->map_collection_nr
);
1243 iov
[2 + elem
+ offset
] = ctype
->map_3level
[nr
];
1245 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1249 assert (2 + elem
+ offset
== (nelems
+ 2 * ctype
->nr_charclass
1250 + ctype
->map_collection_nr
+ 4 + 2));
1252 write_locale_data (output_path
, LC_CTYPE
, "LC_CTYPE", 2 + elem
+ offset
,
1257 /* Local functions. */
1259 ctype_class_new (struct linereader
*lr
, struct locale_ctype_t
*ctype
,
1264 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
)
1265 if (strcmp (ctype
->classnames
[cnt
], name
) == 0)
1268 if (cnt
< ctype
->nr_charclass
)
1270 lr_error (lr
, _("character class `%s' already defined"), name
);
1274 if (ctype
->nr_charclass
== MAX_NR_CHARCLASS
)
1275 /* Exit code 2 is prescribed in P1003.2b. */
1276 WITH_CUR_LOCALE (error (2, 0, _("\
1277 implementation limit: no more than %Zd character classes allowed"),
1280 ctype
->classnames
[ctype
->nr_charclass
++] = name
;
1285 ctype_map_new (struct linereader
*lr
, struct locale_ctype_t
*ctype
,
1286 const char *name
, const struct charmap_t
*charmap
)
1288 size_t max_chars
= 0;
1291 for (cnt
= 0; cnt
< ctype
->map_collection_nr
; ++cnt
)
1293 if (strcmp (ctype
->mapnames
[cnt
], name
) == 0)
1296 if (max_chars
< ctype
->map_collection_max
[cnt
])
1297 max_chars
= ctype
->map_collection_max
[cnt
];
1300 if (cnt
< ctype
->map_collection_nr
)
1302 lr_error (lr
, _("character map `%s' already defined"), name
);
1306 if (ctype
->map_collection_nr
== MAX_NR_CHARMAP
)
1307 /* Exit code 2 is prescribed in P1003.2b. */
1308 WITH_CUR_LOCALE (error (2, 0, _("\
1309 implementation limit: no more than %d character maps allowed"),
1312 ctype
->mapnames
[cnt
] = name
;
1315 ctype
->map_collection_max
[cnt
] = charmap
->mb_cur_max
== 1 ? 256 : 512;
1317 ctype
->map_collection_max
[cnt
] = max_chars
;
1319 ctype
->map_collection
[cnt
] = (uint32_t *)
1320 xcalloc (sizeof (uint32_t), ctype
->map_collection_max
[cnt
]);
1321 ctype
->map_collection_act
[cnt
] = 256;
1323 ++ctype
->map_collection_nr
;
1327 /* We have to be prepared that TABLE, MAX, and ACT can be NULL. This
1328 is possible if we only want to extend the name array. */
1330 find_idx (struct locale_ctype_t
*ctype
, uint32_t **table
, size_t *max
,
1331 size_t *act
, uint32_t idx
)
1336 return table
== NULL
? NULL
: &(*table
)[idx
];
1338 /* Use the charnames_idx lookup table instead of the slow search loop. */
1340 cnt
= idx_table_get (&ctype
->charnames_idx
, idx
);
1343 cnt
= ctype
->charnames_act
;
1345 for (cnt
= 256; cnt
< ctype
->charnames_act
; ++cnt
)
1346 if (ctype
->charnames
[cnt
] == idx
)
1350 /* We have to distinguish two cases: the name is found or not. */
1351 if (cnt
== ctype
->charnames_act
)
1353 /* Extend the name array. */
1354 if (ctype
->charnames_act
== ctype
->charnames_max
)
1356 ctype
->charnames_max
*= 2;
1357 ctype
->charnames
= (uint32_t *)
1358 xrealloc (ctype
->charnames
,
1359 sizeof (uint32_t) * ctype
->charnames_max
);
1361 ctype
->charnames
[ctype
->charnames_act
++] = idx
;
1362 idx_table_add (&ctype
->charnames_idx
, idx
, cnt
);
1366 /* We have done everything we are asked to do. */
1370 /* The caller does not want to extend the table. */
1371 return (cnt
>= *act
? NULL
: &(*table
)[cnt
]);
1377 size_t old_max
= *max
;
1380 while (*max
<= cnt
);
1383 (uint32_t *) xrealloc (*table
, *max
* sizeof (uint32_t));
1384 memset (&(*table
)[old_max
], '\0',
1385 (*max
- old_max
) * sizeof (uint32_t));
1391 return &(*table
)[cnt
];
1396 get_character (struct token
*now
, const struct charmap_t
*charmap
,
1397 struct repertoire_t
*repertoire
,
1398 struct charseq
**seqp
, uint32_t *wchp
)
1400 if (now
->tok
== tok_bsymbol
)
1402 /* This will hopefully be the normal case. */
1403 *wchp
= repertoire_find_value (repertoire
, now
->val
.str
.startmb
,
1404 now
->val
.str
.lenmb
);
1405 *seqp
= charmap_find_value (charmap
, now
->val
.str
.startmb
,
1406 now
->val
.str
.lenmb
);
1408 else if (now
->tok
== tok_ucs4
)
1412 snprintf (utmp
, sizeof (utmp
), "U%08X", now
->val
.ucs4
);
1413 *seqp
= charmap_find_value (charmap
, utmp
, 9);
1416 *seqp
= repertoire_find_seq (repertoire
, now
->val
.ucs4
);
1420 /* Compute the value in the charmap from the UCS value. */
1421 const char *symbol
= repertoire_find_symbol (repertoire
,
1427 *seqp
= charmap_find_value (charmap
, symbol
, strlen (symbol
));
1431 if (repertoire
!= NULL
)
1433 /* Insert a negative entry. */
1434 static const struct charseq negative
1435 = { .ucs4
= ILLEGAL_CHAR_VALUE
};
1436 uint32_t *newp
= obstack_alloc (&repertoire
->mem_pool
,
1438 *newp
= now
->val
.ucs4
;
1440 insert_entry (&repertoire
->seq_table
, newp
,
1441 sizeof (uint32_t), (void *) &negative
);
1445 (*seqp
)->ucs4
= now
->val
.ucs4
;
1447 else if ((*seqp
)->ucs4
!= now
->val
.ucs4
)
1450 *wchp
= now
->val
.ucs4
;
1452 else if (now
->tok
== tok_charcode
)
1454 /* We must map from the byte code to UCS4. */
1455 *seqp
= charmap_find_symbol (charmap
, now
->val
.str
.startmb
,
1456 now
->val
.str
.lenmb
);
1459 *wchp
= ILLEGAL_CHAR_VALUE
;
1462 if ((*seqp
)->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1463 (*seqp
)->ucs4
= repertoire_find_value (repertoire
, (*seqp
)->name
,
1464 strlen ((*seqp
)->name
));
1465 *wchp
= (*seqp
)->ucs4
;
1475 /* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and
1476 the .(2). counterparts. */
1478 charclass_symbolic_ellipsis (struct linereader
*ldfile
,
1479 struct locale_ctype_t
*ctype
,
1480 const struct charmap_t
*charmap
,
1481 struct repertoire_t
*repertoire
,
1483 const char *last_str
,
1484 unsigned long int class256_bit
,
1485 unsigned long int class_bit
, int base
,
1486 int ignore_content
, int handle_digits
, int step
)
1488 const char *nowstr
= now
->val
.str
.startmb
;
1489 char tmp
[now
->val
.str
.lenmb
+ 1];
1492 unsigned long int from
;
1493 unsigned long int to
;
1495 /* We have to compute the ellipsis values using the symbolic names. */
1496 assert (last_str
!= NULL
);
1498 if (strlen (last_str
) != now
->val
.str
.lenmb
)
1502 _("`%s' and `%.*s' are not valid names for symbolic range"),
1503 last_str
, (int) now
->val
.str
.lenmb
, nowstr
);
1507 if (memcmp (last_str
, nowstr
, now
->val
.str
.lenmb
) == 0)
1508 /* Nothing to do, the names are the same. */
1511 for (cp
= last_str
; *cp
== *(nowstr
+ (cp
- last_str
)); ++cp
)
1515 from
= strtoul (cp
, &endp
, base
);
1516 if ((from
== UINT_MAX
&& errno
== ERANGE
) || *endp
!= '\0')
1519 to
= strtoul (nowstr
+ (cp
- last_str
), &endp
, base
);
1520 if ((to
== UINT_MAX
&& errno
== ERANGE
)
1521 || (endp
- nowstr
) != now
->val
.str
.lenmb
|| from
>= to
)
1524 /* OK, we have a range FROM - TO. Now we can create the symbolic names. */
1525 if (!ignore_content
)
1527 now
->val
.str
.startmb
= tmp
;
1528 while ((from
+= step
) <= to
)
1530 struct charseq
*seq
;
1533 sprintf (tmp
, (base
== 10 ? "%.*s%0*ld" : "%.*s%0*lX"),
1534 (int) (cp
- last_str
), last_str
,
1535 (int) (now
->val
.str
.lenmb
- (cp
- last_str
)),
1538 get_character (now
, charmap
, repertoire
, &seq
, &wch
);
1540 if (seq
!= NULL
&& seq
->nbytes
== 1)
1541 /* Yep, we can store information about this byte sequence. */
1542 ctype
->class256_collection
[seq
->bytes
[0]] |= class256_bit
;
1544 if (wch
!= ILLEGAL_CHAR_VALUE
&& class_bit
!= 0)
1545 /* We have the UCS4 position. */
1546 *find_idx (ctype
, &ctype
->class_collection
,
1547 &ctype
->class_collection_max
,
1548 &ctype
->class_collection_act
, wch
) |= class_bit
;
1550 if (handle_digits
== 1)
1552 /* We must store the digit values. */
1553 if (ctype
->mbdigits_act
== ctype
->mbdigits_max
)
1555 ctype
->mbdigits_max
*= 2;
1556 ctype
->mbdigits
= xrealloc (ctype
->mbdigits
,
1557 (ctype
->mbdigits_max
1558 * sizeof (char *)));
1559 ctype
->wcdigits_max
*= 2;
1560 ctype
->wcdigits
= xrealloc (ctype
->wcdigits
,
1561 (ctype
->wcdigits_max
1562 * sizeof (uint32_t)));
1565 ctype
->mbdigits
[ctype
->mbdigits_act
++] = seq
;
1566 ctype
->wcdigits
[ctype
->wcdigits_act
++] = wch
;
1568 else if (handle_digits
== 2)
1570 /* We must store the digit values. */
1571 if (ctype
->outdigits_act
>= 10)
1573 lr_error (ldfile
, _("\
1574 %s: field `%s' does not contain exactly ten entries"),
1575 "LC_CTYPE", "outdigit");
1579 ctype
->mboutdigits
[ctype
->outdigits_act
] = seq
;
1580 ctype
->wcoutdigits
[ctype
->outdigits_act
] = wch
;
1581 ++ctype
->outdigits_act
;
1588 /* Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'. */
1590 charclass_ucs4_ellipsis (struct linereader
*ldfile
,
1591 struct locale_ctype_t
*ctype
,
1592 const struct charmap_t
*charmap
,
1593 struct repertoire_t
*repertoire
,
1594 struct token
*now
, uint32_t last_wch
,
1595 unsigned long int class256_bit
,
1596 unsigned long int class_bit
, int ignore_content
,
1597 int handle_digits
, int step
)
1599 if (last_wch
> now
->val
.ucs4
)
1601 lr_error (ldfile
, _("\
1602 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1603 (now
->val
.ucs4
| last_wch
) < 65536 ? 4 : 8, now
->val
.ucs4
,
1604 (now
->val
.ucs4
| last_wch
) < 65536 ? 4 : 8, last_wch
);
1608 if (!ignore_content
)
1609 while ((last_wch
+= step
) <= now
->val
.ucs4
)
1611 /* We have to find out whether there is a byte sequence corresponding
1612 to this UCS4 value. */
1613 struct charseq
*seq
;
1616 snprintf (utmp
, sizeof (utmp
), "U%08X", last_wch
);
1617 seq
= charmap_find_value (charmap
, utmp
, 9);
1620 snprintf (utmp
, sizeof (utmp
), "U%04X", last_wch
);
1621 seq
= charmap_find_value (charmap
, utmp
, 5);
1625 /* Try looking in the repertoire map. */
1626 seq
= repertoire_find_seq (repertoire
, last_wch
);
1628 /* If this is the first time we look for this sequence create a new
1632 static const struct charseq negative
1633 = { .ucs4
= ILLEGAL_CHAR_VALUE
};
1635 /* Find the symbolic name for this UCS4 value. */
1636 if (repertoire
!= NULL
)
1638 const char *symbol
= repertoire_find_symbol (repertoire
,
1640 uint32_t *newp
= obstack_alloc (&repertoire
->mem_pool
,
1645 /* We have a name, now search the multibyte value. */
1646 seq
= charmap_find_value (charmap
, symbol
, strlen (symbol
));
1649 /* We have to create a fake entry. */
1650 seq
= (struct charseq
*) &negative
;
1652 seq
->ucs4
= last_wch
;
1654 insert_entry (&repertoire
->seq_table
, newp
, sizeof (uint32_t),
1658 /* We have to create a fake entry. */
1659 seq
= (struct charseq
*) &negative
;
1662 /* We have a name, now search the multibyte value. */
1663 if (seq
->ucs4
== last_wch
&& seq
->nbytes
== 1)
1664 /* Yep, we can store information about this byte sequence. */
1665 ctype
->class256_collection
[(size_t) seq
->bytes
[0]]
1668 /* And of course we have the UCS4 position. */
1670 *find_idx (ctype
, &ctype
->class_collection
,
1671 &ctype
->class_collection_max
,
1672 &ctype
->class_collection_act
, last_wch
) |= class_bit
;
1674 if (handle_digits
== 1)
1676 /* We must store the digit values. */
1677 if (ctype
->mbdigits_act
== ctype
->mbdigits_max
)
1679 ctype
->mbdigits_max
*= 2;
1680 ctype
->mbdigits
= xrealloc (ctype
->mbdigits
,
1681 (ctype
->mbdigits_max
1682 * sizeof (char *)));
1683 ctype
->wcdigits_max
*= 2;
1684 ctype
->wcdigits
= xrealloc (ctype
->wcdigits
,
1685 (ctype
->wcdigits_max
1686 * sizeof (uint32_t)));
1689 ctype
->mbdigits
[ctype
->mbdigits_act
++] = (seq
->ucs4
== last_wch
1691 ctype
->wcdigits
[ctype
->wcdigits_act
++] = last_wch
;
1693 else if (handle_digits
== 2)
1695 /* We must store the digit values. */
1696 if (ctype
->outdigits_act
>= 10)
1698 lr_error (ldfile
, _("\
1699 %s: field `%s' does not contain exactly ten entries"),
1700 "LC_CTYPE", "outdigit");
1704 ctype
->mboutdigits
[ctype
->outdigits_act
] = (seq
->ucs4
== last_wch
1706 ctype
->wcoutdigits
[ctype
->outdigits_act
] = last_wch
;
1707 ++ctype
->outdigits_act
;
1713 /* Ellipsis as in `/xea/x12.../xea/x34'. */
1715 charclass_charcode_ellipsis (struct linereader
*ldfile
,
1716 struct locale_ctype_t
*ctype
,
1717 const struct charmap_t
*charmap
,
1718 struct repertoire_t
*repertoire
,
1719 struct token
*now
, char *last_charcode
,
1720 uint32_t last_charcode_len
,
1721 unsigned long int class256_bit
,
1722 unsigned long int class_bit
, int ignore_content
,
1725 /* First check whether the to-value is larger. */
1726 if (now
->val
.charcode
.nbytes
!= last_charcode_len
)
1728 lr_error (ldfile
, _("\
1729 start and end character sequence of range must have the same length"));
1733 if (memcmp (last_charcode
, now
->val
.charcode
.bytes
, last_charcode_len
) > 0)
1735 lr_error (ldfile
, _("\
1736 to-value character sequence is smaller than from-value sequence"));
1740 if (!ignore_content
)
1744 /* Increment the byte sequence value. */
1745 struct charseq
*seq
;
1749 for (i
= last_charcode_len
- 1; i
>= 0; --i
)
1750 if (++last_charcode
[i
] != 0)
1753 if (last_charcode_len
== 1)
1754 /* Of course we have the charcode value. */
1755 ctype
->class256_collection
[(size_t) last_charcode
[0]]
1758 /* Find the symbolic name. */
1759 seq
= charmap_find_symbol (charmap
, last_charcode
,
1763 if (seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1764 seq
->ucs4
= repertoire_find_value (repertoire
, seq
->name
,
1765 strlen (seq
->name
));
1766 wch
= seq
== NULL
? ILLEGAL_CHAR_VALUE
: seq
->ucs4
;
1768 if (wch
!= ILLEGAL_CHAR_VALUE
&& class_bit
!= 0)
1769 *find_idx (ctype
, &ctype
->class_collection
,
1770 &ctype
->class_collection_max
,
1771 &ctype
->class_collection_act
, wch
) |= class_bit
;
1774 wch
= ILLEGAL_CHAR_VALUE
;
1776 if (handle_digits
== 1)
1778 /* We must store the digit values. */
1779 if (ctype
->mbdigits_act
== ctype
->mbdigits_max
)
1781 ctype
->mbdigits_max
*= 2;
1782 ctype
->mbdigits
= xrealloc (ctype
->mbdigits
,
1783 (ctype
->mbdigits_max
1784 * sizeof (char *)));
1785 ctype
->wcdigits_max
*= 2;
1786 ctype
->wcdigits
= xrealloc (ctype
->wcdigits
,
1787 (ctype
->wcdigits_max
1788 * sizeof (uint32_t)));
1791 seq
= xmalloc (sizeof (struct charseq
) + last_charcode_len
);
1792 memcpy ((char *) (seq
+ 1), last_charcode
, last_charcode_len
);
1793 seq
->nbytes
= last_charcode_len
;
1795 ctype
->mbdigits
[ctype
->mbdigits_act
++] = seq
;
1796 ctype
->wcdigits
[ctype
->wcdigits_act
++] = wch
;
1798 else if (handle_digits
== 2)
1800 struct charseq
*seq
;
1801 /* We must store the digit values. */
1802 if (ctype
->outdigits_act
>= 10)
1804 lr_error (ldfile
, _("\
1805 %s: field `%s' does not contain exactly ten entries"),
1806 "LC_CTYPE", "outdigit");
1810 seq
= xmalloc (sizeof (struct charseq
) + last_charcode_len
);
1811 memcpy ((char *) (seq
+ 1), last_charcode
, last_charcode_len
);
1812 seq
->nbytes
= last_charcode_len
;
1814 ctype
->mboutdigits
[ctype
->outdigits_act
] = seq
;
1815 ctype
->wcoutdigits
[ctype
->outdigits_act
] = wch
;
1816 ++ctype
->outdigits_act
;
1819 while (memcmp (last_charcode
, now
->val
.charcode
.bytes
,
1820 last_charcode_len
) != 0);
1826 find_translit2 (struct locale_ctype_t
*ctype
, const struct charmap_t
*charmap
,
1829 struct translit_t
*trunp
= ctype
->translit
;
1830 struct translit_ignore_t
*tirunp
= ctype
->translit_ignore
;
1832 while (trunp
!= NULL
)
1834 /* XXX We simplify things here. The transliterations we look
1835 for are only allowed to have one character. */
1836 if (trunp
->from
[0] == wch
&& trunp
->from
[1] == 0)
1838 /* Found it. Now look for a transliteration which can be
1839 represented with the character set. */
1840 struct translit_to_t
*torunp
= trunp
->to
;
1842 while (torunp
!= NULL
)
1846 for (i
= 0; torunp
->str
[i
] != 0; ++i
)
1850 snprintf (utmp
, sizeof (utmp
), "U%08X", torunp
->str
[i
]);
1851 if (charmap_find_value (charmap
, utmp
, 9) == NULL
)
1852 /* This character cannot be represented. */
1856 if (torunp
->str
[i
] == 0)
1859 torunp
= torunp
->next
;
1865 trunp
= trunp
->next
;
1868 /* Check for ignored chars. */
1869 while (tirunp
!= NULL
)
1871 if (tirunp
->from
<= wch
&& tirunp
->to
>= wch
)
1875 for (wi
= tirunp
->from
; wi
<= wch
; wi
+= tirunp
->step
)
1877 return (uint32_t []) { 0 };
1881 /* Nothing found. */
1887 find_translit (struct localedef_t
*locale
, const struct charmap_t
*charmap
,
1890 struct locale_ctype_t
*ctype
;
1891 uint32_t *result
= NULL
;
1893 assert (locale
!= NULL
);
1894 ctype
= locale
->categories
[LC_CTYPE
].ctype
;
1899 if (ctype
->translit
!= NULL
)
1900 result
= find_translit2 (ctype
, charmap
, wch
);
1904 struct translit_include_t
*irunp
= ctype
->translit_include
;
1906 while (irunp
!= NULL
&& result
== NULL
)
1908 result
= find_translit (find_locale (CTYPE_LOCALE
,
1910 irunp
->copy_repertoire
,
1913 irunp
= irunp
->next
;
1921 /* Read one transliteration entry. */
1923 read_widestring (struct linereader
*ldfile
, struct token
*now
,
1924 const struct charmap_t
*charmap
,
1925 struct repertoire_t
*repertoire
)
1929 if (now
->tok
== tok_default_missing
)
1930 /* The special name "" will denote this case. */
1931 wstr
= ((uint32_t *) { 0 });
1932 else if (now
->tok
== tok_bsymbol
)
1934 /* Get the value from the repertoire. */
1935 wstr
= (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1936 wstr
[0] = repertoire_find_value (repertoire
, now
->val
.str
.startmb
,
1937 now
->val
.str
.lenmb
);
1938 if (wstr
[0] == ILLEGAL_CHAR_VALUE
)
1940 /* We cannot proceed, we don't know the UCS4 value. */
1947 else if (now
->tok
== tok_ucs4
)
1949 wstr
= (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1950 wstr
[0] = now
->val
.ucs4
;
1953 else if (now
->tok
== tok_charcode
)
1955 /* Argh, we have to convert to the symbol name first and then to the
1957 struct charseq
*seq
= charmap_find_symbol (charmap
,
1958 now
->val
.str
.startmb
,
1959 now
->val
.str
.lenmb
);
1961 /* Cannot find the UCS4 value. */
1964 if (seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1965 seq
->ucs4
= repertoire_find_value (repertoire
, seq
->name
,
1966 strlen (seq
->name
));
1967 if (seq
->ucs4
== ILLEGAL_CHAR_VALUE
)
1968 /* We cannot proceed, we don't know the UCS4 value. */
1971 wstr
= (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1972 wstr
[0] = seq
->ucs4
;
1975 else if (now
->tok
== tok_string
)
1977 wstr
= now
->val
.str
.startwc
;
1978 if (wstr
== NULL
|| wstr
[0] == 0)
1983 if (now
->tok
!= tok_eol
&& now
->tok
!= tok_eof
)
1984 lr_ignore_rest (ldfile
, 0);
1985 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
1986 return (uint32_t *) -1l;
1994 read_translit_entry (struct linereader
*ldfile
, struct locale_ctype_t
*ctype
,
1995 struct token
*now
, const struct charmap_t
*charmap
,
1996 struct repertoire_t
*repertoire
)
1998 uint32_t *from_wstr
= read_widestring (ldfile
, now
, charmap
, repertoire
);
1999 struct translit_t
*result
;
2000 struct translit_to_t
**top
;
2001 struct obstack
*ob
= &ctype
->mempool
;
2005 if (from_wstr
== NULL
)
2006 /* There is no valid from string. */
2009 result
= (struct translit_t
*) obstack_alloc (ob
,
2010 sizeof (struct translit_t
));
2011 result
->from
= from_wstr
;
2012 result
->fname
= ldfile
->fname
;
2013 result
->lineno
= ldfile
->lineno
;
2014 result
->next
= NULL
;
2024 /* Next we have one or more transliterations. They are
2025 separated by semicolons. */
2026 now
= lr_token (ldfile
, charmap
, NULL
, repertoire
, verbose
);
2028 if (!first
&& (now
->tok
== tok_semicolon
|| now
->tok
== tok_eol
))
2030 /* One string read. */
2031 const uint32_t zero
= 0;
2035 obstack_grow (ob
, &zero
, 4);
2036 to_wstr
= obstack_finish (ob
);
2038 *top
= obstack_alloc (ob
, sizeof (struct translit_to_t
));
2039 (*top
)->str
= to_wstr
;
2040 (*top
)->next
= NULL
;
2043 if (now
->tok
== tok_eol
)
2045 result
->next
= ctype
->translit
;
2046 ctype
->translit
= result
;
2051 top
= &(*top
)->next
;
2056 to_wstr
= read_widestring (ldfile
, now
, charmap
, repertoire
);
2057 if (to_wstr
== (uint32_t *) -1l)
2059 /* An error occurred. */
2060 obstack_free (ob
, result
);
2064 if (to_wstr
== NULL
)
2067 /* This value is usable. */
2068 obstack_grow (ob
, to_wstr
, wcslen ((wchar_t *) to_wstr
) * 4);
2077 read_translit_ignore_entry (struct linereader
*ldfile
,
2078 struct locale_ctype_t
*ctype
,
2079 const struct charmap_t
*charmap
,
2080 struct repertoire_t
*repertoire
)
2082 /* We expect a semicolon-separated list of characters we ignore. We are
2083 only interested in the wide character definitions. These must be
2084 single characters, possibly defining a range when an ellipsis is used. */
2087 struct token
*now
= lr_token (ldfile
, charmap
, NULL
, repertoire
,
2089 struct translit_ignore_t
*newp
;
2092 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
2095 _("premature end of `translit_ignore' definition"));
2099 if (now
->tok
!= tok_bsymbol
&& now
->tok
!= tok_ucs4
)
2101 lr_error (ldfile
, _("syntax error"));
2102 lr_ignore_rest (ldfile
, 0);
2106 if (now
->tok
== tok_ucs4
)
2107 from
= now
->val
.ucs4
;
2109 /* Try to get the value. */
2110 from
= repertoire_find_value (repertoire
, now
->val
.str
.startmb
,
2111 now
->val
.str
.lenmb
);
2113 if (from
== ILLEGAL_CHAR_VALUE
)
2115 lr_error (ldfile
, "invalid character name");
2120 newp
= (struct translit_ignore_t
*)
2121 obstack_alloc (&ctype
->mempool
, sizeof (struct translit_ignore_t
));
2126 newp
->next
= ctype
->translit_ignore
;
2127 ctype
->translit_ignore
= newp
;
2130 /* Now we expect either a semicolon, an ellipsis, or the end of the
2132 now
= lr_token (ldfile
, charmap
, NULL
, repertoire
, verbose
);
2134 if (now
->tok
== tok_ellipsis2
|| now
->tok
== tok_ellipsis2_2
)
2136 /* XXX Should we bother implementing `....'? `...' certainly
2137 will not be implemented. */
2139 int step
= now
->tok
== tok_ellipsis2_2
? 2 : 1;
2141 now
= lr_token (ldfile
, charmap
, NULL
, repertoire
, verbose
);
2143 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
2146 _("premature end of `translit_ignore' definition"));
2150 if (now
->tok
!= tok_bsymbol
&& now
->tok
!= tok_ucs4
)
2152 lr_error (ldfile
, _("syntax error"));
2153 lr_ignore_rest (ldfile
, 0);
2157 if (now
->tok
== tok_ucs4
)
2160 /* Try to get the value. */
2161 to
= repertoire_find_value (repertoire
, now
->val
.str
.startmb
,
2162 now
->val
.str
.lenmb
);
2164 if (to
== ILLEGAL_CHAR_VALUE
)
2165 lr_error (ldfile
, "invalid character name");
2168 /* Make sure the `to'-value is larger. */
2175 lr_error (ldfile
, _("\
2176 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
2177 (to
| from
) < 65536 ? 4 : 8, to
,
2178 (to
| from
) < 65536 ? 4 : 8, from
);
2181 /* And the next token. */
2182 now
= lr_token (ldfile
, charmap
, NULL
, repertoire
, verbose
);
2185 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
2189 if (now
->tok
== tok_semicolon
)
2193 /* If we come here something is wrong. */
2194 lr_error (ldfile
, _("syntax error"));
2195 lr_ignore_rest (ldfile
, 0);
2201 /* The parser for the LC_CTYPE section of the locale definition. */
2203 ctype_read (struct linereader
*ldfile
, struct localedef_t
*result
,
2204 const struct charmap_t
*charmap
, const char *repertoire_name
,
2207 struct repertoire_t
*repertoire
= NULL
;
2208 struct locale_ctype_t
*ctype
;
2210 enum token_t nowtok
;
2212 struct charseq
*last_seq
;
2213 uint32_t last_wch
= 0;
2214 enum token_t last_token
;
2215 enum token_t ellipsis_token
;
2217 char last_charcode
[16];
2218 size_t last_charcode_len
= 0;
2219 const char *last_str
= NULL
;
2221 struct localedef_t
*copy_locale
= NULL
;
2223 /* Get the repertoire we have to use. */
2224 if (repertoire_name
!= NULL
)
2225 repertoire
= repertoire_read (repertoire_name
);
2227 /* The rest of the line containing `LC_CTYPE' must be free. */
2228 lr_ignore_rest (ldfile
, 1);
2233 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2236 while (nowtok
== tok_eol
);
2238 /* If we see `copy' now we are almost done. */
2239 if (nowtok
== tok_copy
)
2241 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2242 if (now
->tok
!= tok_string
)
2244 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2248 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2249 while (now
->tok
!= tok_eof
&& now
->tok
!= tok_end
);
2251 if (now
->tok
!= tok_eof
2252 || (now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
),
2253 now
->tok
== tok_eof
))
2254 lr_error (ldfile
, _("%s: premature end of file"), "LC_CTYPE");
2255 else if (now
->tok
!= tok_lc_ctype
)
2257 lr_error (ldfile
, _("\
2258 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2259 lr_ignore_rest (ldfile
, 0);
2262 lr_ignore_rest (ldfile
, 1);
2267 if (! ignore_content
)
2269 /* Get the locale definition. */
2270 copy_locale
= load_locale (LC_CTYPE
, now
->val
.str
.startmb
,
2271 repertoire_name
, charmap
, NULL
);
2272 if ((copy_locale
->avail
& CTYPE_LOCALE
) == 0)
2274 /* Not yet loaded. So do it now. */
2275 if (locfile_read (copy_locale
, charmap
) != 0)
2279 if (copy_locale
->categories
[LC_CTYPE
].ctype
== NULL
)
2283 lr_ignore_rest (ldfile
, 1);
2285 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2289 /* Prepare the data structures. */
2290 ctype_startup (ldfile
, result
, charmap
, copy_locale
, ignore_content
);
2291 ctype
= result
->categories
[LC_CTYPE
].ctype
;
2293 /* Remember the repertoire we use. */
2294 if (!ignore_content
)
2295 ctype
->repertoire
= repertoire
;
2299 unsigned long int class_bit
= 0;
2300 unsigned long int class256_bit
= 0;
2301 int handle_digits
= 0;
2303 /* Of course we don't proceed beyond the end of file. */
2304 if (nowtok
== tok_eof
)
2307 /* Ingore empty lines. */
2308 if (nowtok
== tok_eol
)
2310 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2318 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2319 while (now
->tok
== tok_ident
|| now
->tok
== tok_string
)
2321 ctype_class_new (ldfile
, ctype
, now
->val
.str
.startmb
);
2322 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2323 if (now
->tok
!= tok_semicolon
)
2325 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2327 if (now
->tok
!= tok_eol
)
2329 %s: syntax error in definition of new character class"), "LC_CTYPE");
2333 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2334 while (now
->tok
== tok_ident
|| now
->tok
== tok_string
)
2336 ctype_map_new (ldfile
, ctype
, now
->val
.str
.startmb
, charmap
);
2337 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2338 if (now
->tok
!= tok_semicolon
)
2340 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2342 if (now
->tok
!= tok_eol
)
2344 %s: syntax error in definition of new character map"), "LC_CTYPE");
2348 /* Ignore the rest of the line if we don't need the input of
2352 lr_ignore_rest (ldfile
, 0);
2356 /* We simply forget the `class' keyword and use the following
2357 operand to determine the bit. */
2358 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2359 if (now
->tok
== tok_ident
|| now
->tok
== tok_string
)
2361 /* Must can be one of the predefined class names. */
2362 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
)
2363 if (strcmp (ctype
->classnames
[cnt
], now
->val
.str
.startmb
) == 0)
2365 if (cnt
>= ctype
->nr_charclass
)
2367 #ifdef PREDEFINED_CLASSES
2368 if (now
->val
.str
.lenmb
== 8
2369 && memcmp ("special1", now
->val
.str
.startmb
, 8) == 0)
2370 class_bit
= _ISwspecial1
;
2371 else if (now
->val
.str
.lenmb
== 8
2372 && memcmp ("special2", now
->val
.str
.startmb
, 8) == 0)
2373 class_bit
= _ISwspecial2
;
2374 else if (now
->val
.str
.lenmb
== 8
2375 && memcmp ("special3", now
->val
.str
.startmb
, 8) == 0)
2376 class_bit
= _ISwspecial3
;
2380 /* OK, it's a new class. */
2381 ctype_class_new (ldfile
, ctype
, now
->val
.str
.startmb
);
2383 class_bit
= _ISwbit (ctype
->nr_charclass
- 1);
2388 class_bit
= _ISwbit (cnt
);
2390 free (now
->val
.str
.startmb
);
2393 else if (now
->tok
== tok_digit
)
2394 goto handle_tok_digit
;
2395 else if (now
->tok
< tok_upper
|| now
->tok
> tok_blank
)
2399 class_bit
= BITw (now
->tok
);
2400 class256_bit
= BIT (now
->tok
);
2403 /* The next character must be a semicolon. */
2404 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2405 if (now
->tok
!= tok_semicolon
)
2407 goto read_charclass
;
2420 /* Ignore the rest of the line if we don't need the input of
2424 lr_ignore_rest (ldfile
, 0);
2428 class_bit
= BITw (now
->tok
);
2429 class256_bit
= BIT (now
->tok
);
2432 ctype
->class_done
|= class_bit
;
2433 last_token
= tok_none
;
2434 ellipsis_token
= tok_none
;
2436 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2437 while (now
->tok
!= tok_eol
&& now
->tok
!= tok_eof
)
2440 struct charseq
*seq
;
2442 if (ellipsis_token
== tok_none
)
2444 if (get_character (now
, charmap
, repertoire
, &seq
, &wch
))
2447 if (!ignore_content
&& seq
!= NULL
&& seq
->nbytes
== 1)
2448 /* Yep, we can store information about this byte
2450 ctype
->class256_collection
[seq
->bytes
[0]] |= class256_bit
;
2452 if (!ignore_content
&& wch
!= ILLEGAL_CHAR_VALUE
2454 /* We have the UCS4 position. */
2455 *find_idx (ctype
, &ctype
->class_collection
,
2456 &ctype
->class_collection_max
,
2457 &ctype
->class_collection_act
, wch
) |= class_bit
;
2459 last_token
= now
->tok
;
2460 /* Terminate the string. */
2461 if (last_token
== tok_bsymbol
)
2463 now
->val
.str
.startmb
[now
->val
.str
.lenmb
] = '\0';
2464 last_str
= now
->val
.str
.startmb
;
2470 memcpy (last_charcode
, now
->val
.charcode
.bytes
, 16);
2471 last_charcode_len
= now
->val
.charcode
.nbytes
;
2473 if (!ignore_content
&& handle_digits
== 1)
2475 /* We must store the digit values. */
2476 if (ctype
->mbdigits_act
== ctype
->mbdigits_max
)
2478 ctype
->mbdigits_max
+= 10;
2479 ctype
->mbdigits
= xrealloc (ctype
->mbdigits
,
2480 (ctype
->mbdigits_max
2481 * sizeof (char *)));
2482 ctype
->wcdigits_max
+= 10;
2483 ctype
->wcdigits
= xrealloc (ctype
->wcdigits
,
2484 (ctype
->wcdigits_max
2485 * sizeof (uint32_t)));
2488 ctype
->mbdigits
[ctype
->mbdigits_act
++] = seq
;
2489 ctype
->wcdigits
[ctype
->wcdigits_act
++] = wch
;
2491 else if (!ignore_content
&& handle_digits
== 2)
2493 /* We must store the digit values. */
2494 if (ctype
->outdigits_act
>= 10)
2496 lr_error (ldfile
, _("\
2497 %s: field `%s' does not contain exactly ten entries"),
2498 "LC_CTYPE", "outdigit");
2499 lr_ignore_rest (ldfile
, 0);
2503 ctype
->mboutdigits
[ctype
->outdigits_act
] = seq
;
2504 ctype
->wcoutdigits
[ctype
->outdigits_act
] = wch
;
2505 ++ctype
->outdigits_act
;
2510 /* Now it gets complicated. We have to resolve the
2511 ellipsis problem. First we must distinguish between
2512 the different kind of ellipsis and this must match the
2513 tokens we have seen. */
2514 assert (last_token
!= tok_none
);
2516 if (last_token
!= now
->tok
)
2518 lr_error (ldfile
, _("\
2519 ellipsis range must be marked by two operands of same type"));
2520 lr_ignore_rest (ldfile
, 0);
2524 if (last_token
== tok_bsymbol
)
2526 if (ellipsis_token
== tok_ellipsis3
)
2527 lr_error (ldfile
, _("with symbolic name range values \
2528 the absolute ellipsis `...' must not be used"));
2530 charclass_symbolic_ellipsis (ldfile
, ctype
, charmap
,
2531 repertoire
, now
, last_str
,
2532 class256_bit
, class_bit
,
2537 handle_digits
, step
);
2539 else if (last_token
== tok_ucs4
)
2541 if (ellipsis_token
!= tok_ellipsis2
)
2542 lr_error (ldfile
, _("\
2543 with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
2545 charclass_ucs4_ellipsis (ldfile
, ctype
, charmap
,
2546 repertoire
, now
, last_wch
,
2547 class256_bit
, class_bit
,
2548 ignore_content
, handle_digits
,
2553 assert (last_token
== tok_charcode
);
2555 if (ellipsis_token
!= tok_ellipsis3
)
2556 lr_error (ldfile
, _("\
2557 with character code range values one must use the absolute ellipsis `...'"));
2559 charclass_charcode_ellipsis (ldfile
, ctype
, charmap
,
2563 class256_bit
, class_bit
,
2568 /* Now we have used the last value. */
2569 last_token
= tok_none
;
2572 /* Next we expect a semicolon or the end of the line. */
2573 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2574 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
2577 if (last_token
!= tok_none
2578 && now
->tok
>= tok_ellipsis2
&& now
->tok
<= tok_ellipsis4_2
)
2580 if (now
->tok
== tok_ellipsis2_2
)
2582 now
->tok
= tok_ellipsis2
;
2585 else if (now
->tok
== tok_ellipsis4_2
)
2587 now
->tok
= tok_ellipsis4
;
2591 ellipsis_token
= now
->tok
;
2593 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2597 if (now
->tok
!= tok_semicolon
)
2600 /* And get the next character. */
2601 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2603 ellipsis_token
= tok_none
;
2609 /* Ignore the rest of the line if we don't need the input of
2613 lr_ignore_rest (ldfile
, 0);
2618 class_bit
= _ISwdigit
;
2619 class256_bit
= _ISdigit
;
2621 goto read_charclass
;
2624 /* Ignore the rest of the line if we don't need the input of
2628 lr_ignore_rest (ldfile
, 0);
2632 if (ctype
->outdigits_act
!= 0)
2633 lr_error (ldfile
, _("\
2634 %s: field `%s' declared more than once"),
2635 "LC_CTYPE", "outdigit");
2639 goto read_charclass
;
2642 /* Ignore the rest of the line if we don't need the input of
2646 lr_ignore_rest (ldfile
, 0);
2654 /* Ignore the rest of the line if we don't need the input of
2658 lr_ignore_rest (ldfile
, 0);
2666 /* Ignore the rest of the line if we don't need the input of
2670 lr_ignore_rest (ldfile
, 0);
2674 /* We simply forget the `map' keyword and use the following
2675 operand to determine the mapping. */
2676 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2677 if (now
->tok
== tok_ident
|| now
->tok
== tok_string
)
2681 for (cnt
= 2; cnt
< ctype
->map_collection_nr
; ++cnt
)
2682 if (strcmp (now
->val
.str
.startmb
, ctype
->mapnames
[cnt
]) == 0)
2685 if (cnt
< ctype
->map_collection_nr
)
2686 free (now
->val
.str
.startmb
);
2688 /* OK, it's a new map. */
2689 ctype_map_new (ldfile
, ctype
, now
->val
.str
.startmb
, charmap
);
2693 else if (now
->tok
< tok_toupper
|| now
->tok
> tok_tolower
)
2696 mapidx
= now
->tok
- tok_toupper
;
2698 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2699 /* This better should be a semicolon. */
2700 if (now
->tok
!= tok_semicolon
)
2704 /* Test whether this mapping was already defined. */
2705 if (ctype
->tomap_done
[mapidx
])
2707 lr_error (ldfile
, _("duplicated definition for mapping `%s'"),
2708 ctype
->mapnames
[mapidx
]);
2709 lr_ignore_rest (ldfile
, 0);
2712 ctype
->tomap_done
[mapidx
] = 1;
2714 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2715 while (now
->tok
!= tok_eol
&& now
->tok
!= tok_eof
)
2717 struct charseq
*from_seq
;
2719 struct charseq
*to_seq
;
2722 /* Every pair starts with an opening brace. */
2723 if (now
->tok
!= tok_open_brace
)
2726 /* Next comes the from-value. */
2727 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2728 if (get_character (now
, charmap
, repertoire
, &from_seq
,
2732 /* The next is a comma. */
2733 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2734 if (now
->tok
!= tok_comma
)
2737 /* And the other value. */
2738 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2739 if (get_character (now
, charmap
, repertoire
, &to_seq
,
2743 /* And the last thing is the closing brace. */
2744 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2745 if (now
->tok
!= tok_close_brace
)
2748 if (!ignore_content
)
2750 /* Check whether the mapping converts from an ASCII value
2751 to a non-ASCII value. */
2752 if (from_seq
!= NULL
&& from_seq
->nbytes
== 1
2753 && isascii (from_seq
->bytes
[0])
2754 && to_seq
!= NULL
&& (to_seq
->nbytes
!= 1
2755 || !isascii (to_seq
->bytes
[0])))
2756 ctype
->to_nonascii
= 1;
2758 if (mapidx
< 2 && from_seq
!= NULL
&& to_seq
!= NULL
2759 && from_seq
->nbytes
== 1 && to_seq
->nbytes
== 1)
2760 /* We can use this value. */
2761 ctype
->map256_collection
[mapidx
][from_seq
->bytes
[0]]
2764 if (from_wch
!= ILLEGAL_CHAR_VALUE
2765 && to_wch
!= ILLEGAL_CHAR_VALUE
)
2766 /* Both correct values. */
2767 *find_idx (ctype
, &ctype
->map_collection
[mapidx
],
2768 &ctype
->map_collection_max
[mapidx
],
2769 &ctype
->map_collection_act
[mapidx
],
2773 /* Now comes a semicolon or the end of the line/file. */
2774 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2775 if (now
->tok
== tok_semicolon
)
2776 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2780 case tok_translit_start
:
2781 /* Ignore the entire translit section with its peculiar syntax
2782 if we don't need the input. */
2787 lr_ignore_rest (ldfile
, 0);
2788 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2790 while (now
->tok
!= tok_translit_end
&& now
->tok
!= tok_eof
);
2792 if (now
->tok
== tok_eof
)
2793 lr_error (ldfile
, _(\
2794 "%s: `translit_start' section does not end with `translit_end'"),
2800 /* The rest of the line better should be empty. */
2801 lr_ignore_rest (ldfile
, 1);
2803 /* We count here the number of allocated entries in the `translit'
2807 ldfile
->translate_strings
= 1;
2808 ldfile
->return_widestr
= 1;
2810 /* We proceed until we see the `translit_end' token. */
2811 while (now
= lr_token (ldfile
, charmap
, NULL
, repertoire
, verbose
),
2812 now
->tok
!= tok_translit_end
&& now
->tok
!= tok_eof
)
2814 if (now
->tok
== tok_eol
)
2815 /* Ignore empty lines. */
2818 if (now
->tok
== tok_include
)
2820 /* We have to include locale. */
2821 const char *locale_name
;
2822 const char *repertoire_name
;
2823 struct translit_include_t
*include_stmt
, **include_ptr
;
2825 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2826 /* This should be a string or an identifier. In any
2827 case something to name a locale. */
2828 if (now
->tok
!= tok_string
&& now
->tok
!= tok_ident
)
2831 lr_error (ldfile
, _("%s: syntax error"), "LC_CTYPE");
2832 lr_ignore_rest (ldfile
, 0);
2835 locale_name
= now
->val
.str
.startmb
;
2837 /* Next should be a semicolon. */
2838 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2839 if (now
->tok
!= tok_semicolon
)
2840 goto translit_syntax
;
2842 /* Now the repertoire name. */
2843 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2844 if ((now
->tok
!= tok_string
&& now
->tok
!= tok_ident
)
2845 || now
->val
.str
.startmb
== NULL
)
2846 goto translit_syntax
;
2847 repertoire_name
= now
->val
.str
.startmb
;
2848 if (repertoire_name
[0] == '\0')
2849 /* Ignore the empty string. */
2850 repertoire_name
= NULL
;
2852 /* Save the include statement for later processing. */
2853 include_stmt
= (struct translit_include_t
*)
2854 xmalloc (sizeof (struct translit_include_t
));
2855 include_stmt
->copy_locale
= locale_name
;
2856 include_stmt
->copy_repertoire
= repertoire_name
;
2857 include_stmt
->next
= NULL
;
2859 include_ptr
= &ctype
->translit_include
;
2860 while (*include_ptr
!= NULL
)
2861 include_ptr
= &(*include_ptr
)->next
;
2862 *include_ptr
= include_stmt
;
2864 /* The rest of the line must be empty. */
2865 lr_ignore_rest (ldfile
, 1);
2867 /* Make sure the locale is read. */
2868 add_to_readlist (LC_CTYPE
, locale_name
, repertoire_name
,
2872 else if (now
->tok
== tok_default_missing
)
2878 /* We expect a single character or string as the
2880 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2881 wstr
= read_widestring (ldfile
, now
, charmap
,
2886 if (ctype
->default_missing
!= NULL
)
2888 lr_error (ldfile
, _("\
2889 %s: duplicate `default_missing' definition"), "LC_CTYPE");
2890 WITH_CUR_LOCALE (error_at_line (0, 0,
2891 ctype
->default_missing_file
,
2892 ctype
->default_missing_lineno
,
2894 previous definition was here")));
2898 ctype
->default_missing
= wstr
;
2899 ctype
->default_missing_file
= ldfile
->fname
;
2900 ctype
->default_missing_lineno
= ldfile
->lineno
;
2902 /* We can have more entries, ignore them. */
2903 lr_ignore_rest (ldfile
, 0);
2906 else if (wstr
== (uint32_t *) -1l)
2907 /* This was an syntax error. */
2910 /* Maybe there is another replacement we can use. */
2911 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2912 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
2914 /* Nothing found. We tell the user. */
2915 lr_error (ldfile
, _("\
2916 %s: no representable `default_missing' definition found"), "LC_CTYPE");
2919 if (now
->tok
!= tok_semicolon
)
2920 goto translit_syntax
;
2925 else if (now
->tok
== tok_translit_ignore
)
2927 read_translit_ignore_entry (ldfile
, ctype
, charmap
,
2932 read_translit_entry (ldfile
, ctype
, now
, charmap
, repertoire
);
2934 ldfile
->return_widestr
= 0;
2936 if (now
->tok
== tok_eof
)
2937 lr_error (ldfile
, _(\
2938 "%s: `translit_start' section does not end with `translit_end'"),
2944 /* Ignore the rest of the line if we don't need the input of
2948 lr_ignore_rest (ldfile
, 0);
2952 /* This could mean one of several things. First test whether
2953 it's a character class name. */
2954 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
)
2955 if (strcmp (now
->val
.str
.startmb
, ctype
->classnames
[cnt
]) == 0)
2957 if (cnt
< ctype
->nr_charclass
)
2959 class_bit
= _ISwbit (cnt
);
2960 class256_bit
= cnt
<= 11 ? _ISbit (cnt
) : 0;
2961 free (now
->val
.str
.startmb
);
2962 goto read_charclass
;
2964 for (cnt
= 0; cnt
< ctype
->map_collection_nr
; ++cnt
)
2965 if (strcmp (now
->val
.str
.startmb
, ctype
->mapnames
[cnt
]) == 0)
2967 if (cnt
< ctype
->map_collection_nr
)
2970 free (now
->val
.str
.startmb
);
2973 #ifdef PREDEFINED_CLASSES
2974 if (strcmp (now
->val
.str
.startmb
, "special1") == 0)
2976 class_bit
= _ISwspecial1
;
2977 free (now
->val
.str
.startmb
);
2978 goto read_charclass
;
2980 if (strcmp (now
->val
.str
.startmb
, "special2") == 0)
2982 class_bit
= _ISwspecial2
;
2983 free (now
->val
.str
.startmb
);
2984 goto read_charclass
;
2986 if (strcmp (now
->val
.str
.startmb
, "special3") == 0)
2988 class_bit
= _ISwspecial3
;
2989 free (now
->val
.str
.startmb
);
2990 goto read_charclass
;
2992 if (strcmp (now
->val
.str
.startmb
, "tosymmetric") == 0)
3001 /* Next we assume `LC_CTYPE'. */
3002 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
3003 if (now
->tok
== tok_eof
)
3005 if (now
->tok
== tok_eol
)
3006 lr_error (ldfile
, _("%s: incomplete `END' line"),
3008 else if (now
->tok
!= tok_lc_ctype
)
3009 lr_error (ldfile
, _("\
3010 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
3011 lr_ignore_rest (ldfile
, now
->tok
== tok_lc_ctype
);
3016 if (now
->tok
!= tok_eof
)
3017 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
3020 /* Prepare for the next round. */
3021 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
3025 /* When we come here we reached the end of the file. */
3026 lr_error (ldfile
, _("%s: premature end of file"), "LC_CTYPE");
3031 set_class_defaults (struct locale_ctype_t
*ctype
,
3032 const struct charmap_t
*charmap
,
3033 struct repertoire_t
*repertoire
)
3037 /* These function defines the default values for the classes and conversions
3038 according to POSIX.2 2.5.2.1.
3039 It may seem that the order of these if-blocks is arbitrary but it is NOT.
3040 Don't move them unless you know what you do! */
3042 auto void set_default (int bitpos
, int from
, int to
);
3044 void set_default (int bitpos
, int from
, int to
)
3048 int bit
= _ISbit (bitpos
);
3049 int bitw
= _ISwbit (bitpos
);
3050 /* Define string. */
3053 for (ch
= from
; ch
<= to
; ++ch
)
3055 struct charseq
*seq
;
3058 seq
= charmap_find_value (charmap
, tmp
, 1);
3062 sprintf (buf
, "U%08X", ch
);
3063 seq
= charmap_find_value (charmap
, buf
, 9);
3068 WITH_CUR_LOCALE (error (0, 0, _("\
3069 %s: character `%s' not defined while needed as default value"),
3072 else if (seq
->nbytes
!= 1)
3073 WITH_CUR_LOCALE (error (0, 0, _("\
3074 %s: character `%s' in charmap not representable with one byte"),
3077 ctype
->class256_collection
[seq
->bytes
[0]] |= bit
;
3079 /* No need to search here, the ASCII value is also the Unicode
3081 ELEM (ctype
, class_collection
, , ch
) |= bitw
;
3085 /* Set default values if keyword was not present. */
3086 if ((ctype
->class_done
& BITw (tok_upper
)) == 0)
3087 /* "If this keyword [lower] is not specified, the lowercase letters
3088 `A' through `Z', ..., shall automatically belong to this class,
3089 with implementation defined character values." [P1003.2, 2.5.2.1] */
3090 set_default (BITPOS (tok_upper
), 'A', 'Z');
3092 if ((ctype
->class_done
& BITw (tok_lower
)) == 0)
3093 /* "If this keyword [lower] is not specified, the lowercase letters
3094 `a' through `z', ..., shall automatically belong to this class,
3095 with implementation defined character values." [P1003.2, 2.5.2.1] */
3096 set_default (BITPOS (tok_lower
), 'a', 'z');
3098 if ((ctype
->class_done
& BITw (tok_alpha
)) == 0)
3100 /* Table 2-6 in P1003.2 says that characters in class `upper' or
3101 class `lower' *must* be in class `alpha'. */
3102 unsigned long int mask
= BIT (tok_upper
) | BIT (tok_lower
);
3103 unsigned long int maskw
= BITw (tok_upper
) | BITw (tok_lower
);
3105 for (cnt
= 0; cnt
< 256; ++cnt
)
3106 if ((ctype
->class256_collection
[cnt
] & mask
) != 0)
3107 ctype
->class256_collection
[cnt
] |= BIT (tok_alpha
);
3109 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
3110 if ((ctype
->class_collection
[cnt
] & maskw
) != 0)
3111 ctype
->class_collection
[cnt
] |= BITw (tok_alpha
);
3114 if ((ctype
->class_done
& BITw (tok_digit
)) == 0)
3115 /* "If this keyword [digit] is not specified, the digits `0' through
3116 `9', ..., shall automatically belong to this class, with
3117 implementation-defined character values." [P1003.2, 2.5.2.1] */
3118 set_default (BITPOS (tok_digit
), '0', '9');
3120 /* "Only characters specified for the `alpha' and `digit' keyword
3121 shall be specified. Characters specified for the keyword `alpha'
3122 and `digit' are automatically included in this class. */
3124 unsigned long int mask
= BIT (tok_alpha
) | BIT (tok_digit
);
3125 unsigned long int maskw
= BITw (tok_alpha
) | BITw (tok_digit
);
3127 for (cnt
= 0; cnt
< 256; ++cnt
)
3128 if ((ctype
->class256_collection
[cnt
] & mask
) != 0)
3129 ctype
->class256_collection
[cnt
] |= BIT (tok_alnum
);
3131 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
3132 if ((ctype
->class_collection
[cnt
] & maskw
) != 0)
3133 ctype
->class_collection
[cnt
] |= BITw (tok_alnum
);
3136 if ((ctype
->class_done
& BITw (tok_space
)) == 0)
3137 /* "If this keyword [space] is not specified, the characters <space>,
3138 <form-feed>, <newline>, <carriage-return>, <tab>, and
3139 <vertical-tab>, ..., shall automatically belong to this class,
3140 with implementation-defined character values." [P1003.2, 2.5.2.1] */
3142 struct charseq
*seq
;
3144 seq
= charmap_find_value (charmap
, "space", 5);
3146 seq
= charmap_find_value (charmap
, "SP", 2);
3148 seq
= charmap_find_value (charmap
, "U00000020", 9);
3152 WITH_CUR_LOCALE (error (0, 0, _("\
3153 %s: character `%s' not defined while needed as default value"),
3154 "LC_CTYPE", "<space>"));
3156 else if (seq
->nbytes
!= 1)
3157 WITH_CUR_LOCALE (error (0, 0, _("\
3158 %s: character `%s' in charmap not representable with one byte"),
3159 "LC_CTYPE", "<space>"));
3161 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3163 /* No need to search. */
3164 ELEM (ctype
, class_collection
, , L
' ') |= BITw (tok_space
);
3166 seq
= charmap_find_value (charmap
, "form-feed", 9);
3168 seq
= charmap_find_value (charmap
, "U0000000C", 9);
3172 WITH_CUR_LOCALE (error (0, 0, _("\
3173 %s: character `%s' not defined while needed as default value"),
3174 "LC_CTYPE", "<form-feed>"));
3176 else if (seq
->nbytes
!= 1)
3177 WITH_CUR_LOCALE (error (0, 0, _("\
3178 %s: character `%s' in charmap not representable with one byte"),
3179 "LC_CTYPE", "<form-feed>"));
3181 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3183 /* No need to search. */
3184 ELEM (ctype
, class_collection
, , L
'\f') |= BITw (tok_space
);
3187 seq
= charmap_find_value (charmap
, "newline", 7);
3189 seq
= charmap_find_value (charmap
, "U0000000A", 9);
3193 WITH_CUR_LOCALE (error (0, 0, _("\
3194 %s: character `%s' not defined while needed as default value"),
3195 "LC_CTYPE", "<newline>"));
3197 else if (seq
->nbytes
!= 1)
3198 WITH_CUR_LOCALE (error (0, 0, _("\
3199 %s: character `%s' in charmap not representable with one byte"),
3200 "LC_CTYPE", "<newline>"));
3202 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3204 /* No need to search. */
3205 ELEM (ctype
, class_collection
, , L
'\n') |= BITw (tok_space
);
3208 seq
= charmap_find_value (charmap
, "carriage-return", 15);
3210 seq
= charmap_find_value (charmap
, "U0000000D", 9);
3214 WITH_CUR_LOCALE (error (0, 0, _("\
3215 %s: character `%s' not defined while needed as default value"),
3216 "LC_CTYPE", "<carriage-return>"));
3218 else if (seq
->nbytes
!= 1)
3219 WITH_CUR_LOCALE (error (0, 0, _("\
3220 %s: character `%s' in charmap not representable with one byte"),
3221 "LC_CTYPE", "<carriage-return>"));
3223 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3225 /* No need to search. */
3226 ELEM (ctype
, class_collection
, , L
'\r') |= BITw (tok_space
);
3229 seq
= charmap_find_value (charmap
, "tab", 3);
3231 seq
= charmap_find_value (charmap
, "U00000009", 9);
3235 WITH_CUR_LOCALE (error (0, 0, _("\
3236 %s: character `%s' not defined while needed as default value"),
3237 "LC_CTYPE", "<tab>"));
3239 else if (seq
->nbytes
!= 1)
3240 WITH_CUR_LOCALE (error (0, 0, _("\
3241 %s: character `%s' in charmap not representable with one byte"),
3242 "LC_CTYPE", "<tab>"));
3244 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3246 /* No need to search. */
3247 ELEM (ctype
, class_collection
, , L
'\t') |= BITw (tok_space
);
3250 seq
= charmap_find_value (charmap
, "vertical-tab", 12);
3252 seq
= charmap_find_value (charmap
, "U0000000B", 9);
3256 WITH_CUR_LOCALE (error (0, 0, _("\
3257 %s: character `%s' not defined while needed as default value"),
3258 "LC_CTYPE", "<vertical-tab>"));
3260 else if (seq
->nbytes
!= 1)
3261 WITH_CUR_LOCALE (error (0, 0, _("\
3262 %s: character `%s' in charmap not representable with one byte"),
3263 "LC_CTYPE", "<vertical-tab>"));
3265 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3267 /* No need to search. */
3268 ELEM (ctype
, class_collection
, , L
'\v') |= BITw (tok_space
);
3271 if ((ctype
->class_done
& BITw (tok_xdigit
)) == 0)
3272 /* "If this keyword is not specified, the digits `0' to `9', the
3273 uppercase letters `A' through `F', and the lowercase letters `a'
3274 through `f', ..., shell automatically belong to this class, with
3275 implementation defined character values." [P1003.2, 2.5.2.1] */
3277 set_default (BITPOS (tok_xdigit
), '0', '9');
3278 set_default (BITPOS (tok_xdigit
), 'A', 'F');
3279 set_default (BITPOS (tok_xdigit
), 'a', 'f');
3282 if ((ctype
->class_done
& BITw (tok_blank
)) == 0)
3283 /* "If this keyword [blank] is unspecified, the characters <space> and
3284 <tab> shall belong to this character class." [P1003.2, 2.5.2.1] */
3286 struct charseq
*seq
;
3288 seq
= charmap_find_value (charmap
, "space", 5);
3290 seq
= charmap_find_value (charmap
, "SP", 2);
3292 seq
= charmap_find_value (charmap
, "U00000020", 9);
3296 WITH_CUR_LOCALE (error (0, 0, _("\
3297 %s: character `%s' not defined while needed as default value"),
3298 "LC_CTYPE", "<space>"));
3300 else if (seq
->nbytes
!= 1)
3301 WITH_CUR_LOCALE (error (0, 0, _("\
3302 %s: character `%s' in charmap not representable with one byte"),
3303 "LC_CTYPE", "<space>"));
3305 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_blank
);
3307 /* No need to search. */
3308 ELEM (ctype
, class_collection
, , L
' ') |= BITw (tok_blank
);
3311 seq
= charmap_find_value (charmap
, "tab", 3);
3313 seq
= charmap_find_value (charmap
, "U00000009", 9);
3317 WITH_CUR_LOCALE (error (0, 0, _("\
3318 %s: character `%s' not defined while needed as default value"),
3319 "LC_CTYPE", "<tab>"));
3321 else if (seq
->nbytes
!= 1)
3322 WITH_CUR_LOCALE (error (0, 0, _("\
3323 %s: character `%s' in charmap not representable with one byte"),
3324 "LC_CTYPE", "<tab>"));
3326 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_blank
);
3328 /* No need to search. */
3329 ELEM (ctype
, class_collection
, , L
'\t') |= BITw (tok_blank
);
3332 if ((ctype
->class_done
& BITw (tok_graph
)) == 0)
3333 /* "If this keyword [graph] is not specified, characters specified for
3334 the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
3335 shall belong to this character class." [P1003.2, 2.5.2.1] */
3337 unsigned long int mask
= BIT (tok_upper
) | BIT (tok_lower
) |
3338 BIT (tok_alpha
) | BIT (tok_digit
) | BIT (tok_xdigit
) | BIT (tok_punct
);
3339 unsigned long int maskw
= BITw (tok_upper
) | BITw (tok_lower
) |
3340 BITw (tok_alpha
) | BITw (tok_digit
) | BITw (tok_xdigit
) |
3344 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
3345 if ((ctype
->class_collection
[cnt
] & maskw
) != 0)
3346 ctype
->class_collection
[cnt
] |= BITw (tok_graph
);
3348 for (cnt
= 0; cnt
< 256; ++cnt
)
3349 if ((ctype
->class256_collection
[cnt
] & mask
) != 0)
3350 ctype
->class256_collection
[cnt
] |= BIT (tok_graph
);
3353 if ((ctype
->class_done
& BITw (tok_print
)) == 0)
3354 /* "If this keyword [print] is not provided, characters specified for
3355 the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
3356 and the <space> character shall belong to this character class."
3357 [P1003.2, 2.5.2.1] */
3359 unsigned long int mask
= BIT (tok_upper
) | BIT (tok_lower
) |
3360 BIT (tok_alpha
) | BIT (tok_digit
) | BIT (tok_xdigit
) | BIT (tok_punct
);
3361 unsigned long int maskw
= BITw (tok_upper
) | BITw (tok_lower
) |
3362 BITw (tok_alpha
) | BITw (tok_digit
) | BITw (tok_xdigit
) |
3365 struct charseq
*seq
;
3367 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
3368 if ((ctype
->class_collection
[cnt
] & maskw
) != 0)
3369 ctype
->class_collection
[cnt
] |= BITw (tok_print
);
3371 for (cnt
= 0; cnt
< 256; ++cnt
)
3372 if ((ctype
->class256_collection
[cnt
] & mask
) != 0)
3373 ctype
->class256_collection
[cnt
] |= BIT (tok_print
);
3376 seq
= charmap_find_value (charmap
, "space", 5);
3378 seq
= charmap_find_value (charmap
, "SP", 2);
3380 seq
= charmap_find_value (charmap
, "U00000020", 9);
3384 WITH_CUR_LOCALE (error (0, 0, _("\
3385 %s: character `%s' not defined while needed as default value"),
3386 "LC_CTYPE", "<space>"));
3388 else if (seq
->nbytes
!= 1)
3389 WITH_CUR_LOCALE (error (0, 0, _("\
3390 %s: character `%s' in charmap not representable with one byte"),
3391 "LC_CTYPE", "<space>"));
3393 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_print
);
3395 /* No need to search. */
3396 ELEM (ctype
, class_collection
, , L
' ') |= BITw (tok_print
);
3399 if (ctype
->tomap_done
[0] == 0)
3400 /* "If this keyword [toupper] is not specified, the lowercase letters
3401 `a' through `z', and their corresponding uppercase letters `A' to
3402 `Z', ..., shall automatically be included, with implementation-
3403 defined character values." [P1003.2, 2.5.2.1] */
3408 strcpy (tmp
, "<?>");
3410 for (ch
= 'a'; ch
<= 'z'; ++ch
)
3412 struct charseq
*seq_from
, *seq_to
;
3416 seq_from
= charmap_find_value (charmap
, &tmp
[1], 1);
3417 if (seq_from
== NULL
)
3420 sprintf (buf
, "U%08X", ch
);
3421 seq_from
= charmap_find_value (charmap
, buf
, 9);
3423 if (seq_from
== NULL
)
3426 WITH_CUR_LOCALE (error (0, 0, _("\
3427 %s: character `%s' not defined while needed as default value"),
3430 else if (seq_from
->nbytes
!= 1)
3433 WITH_CUR_LOCALE (error (0, 0, _("\
3434 %s: character `%s' needed as default value not representable with one byte"),
3439 /* This conversion is implementation defined. */
3440 tmp
[1] = (char) (ch
+ ('A' - 'a'));
3441 seq_to
= charmap_find_value (charmap
, &tmp
[1], 1);
3445 sprintf (buf
, "U%08X", ch
+ ('A' - 'a'));
3446 seq_to
= charmap_find_value (charmap
, buf
, 9);
3451 WITH_CUR_LOCALE (error (0, 0, _("\
3452 %s: character `%s' not defined while needed as default value"),
3455 else if (seq_to
->nbytes
!= 1)
3458 WITH_CUR_LOCALE (error (0, 0, _("\
3459 %s: character `%s' needed as default value not representable with one byte"),
3463 /* The index [0] is determined by the order of the
3464 `ctype_map_newP' calls in `ctype_startup'. */
3465 ctype
->map256_collection
[0][seq_from
->bytes
[0]]
3469 /* No need to search. */
3470 ELEM (ctype
, map_collection
, [0], ch
) = ch
+ ('A' - 'a');
3474 if (ctype
->tomap_done
[1] == 0)
3475 /* "If this keyword [tolower] is not specified, the mapping shall be
3476 the reverse mapping of the one specified to `toupper'." [P1003.2] */
3478 for (cnt
= 0; cnt
< ctype
->map_collection_act
[0]; ++cnt
)
3479 if (ctype
->map_collection
[0][cnt
] != 0)
3480 ELEM (ctype
, map_collection
, [1],
3481 ctype
->map_collection
[0][cnt
])
3482 = ctype
->charnames
[cnt
];
3484 for (cnt
= 0; cnt
< 256; ++cnt
)
3485 if (ctype
->map256_collection
[0][cnt
] != 0)
3486 ctype
->map256_collection
[1][ctype
->map256_collection
[0][cnt
]] = cnt
;
3489 if (ctype
->outdigits_act
!= 10)
3491 if (ctype
->outdigits_act
!= 0)
3492 WITH_CUR_LOCALE (error (0, 0, _("\
3493 %s: field `%s' does not contain exactly ten entries"),
3494 "LC_CTYPE", "outdigit"));
3496 for (cnt
= ctype
->outdigits_act
; cnt
< 10; ++cnt
)
3498 ctype
->mboutdigits
[cnt
] = charmap_find_symbol (charmap
,
3499 (char *) digits
+ cnt
,
3502 if (ctype
->mboutdigits
[cnt
] == NULL
)
3503 ctype
->mboutdigits
[cnt
] = charmap_find_symbol (charmap
,
3505 strlen (longnames
[cnt
]));
3507 if (ctype
->mboutdigits
[cnt
] == NULL
)
3508 ctype
->mboutdigits
[cnt
] = charmap_find_symbol (charmap
,
3511 if (ctype
->mboutdigits
[cnt
] == NULL
)
3513 /* Provide a replacement. */
3514 WITH_CUR_LOCALE (error (0, 0, _("\
3515 no output digits defined and none of the standard names in the charmap")));
3517 ctype
->mboutdigits
[cnt
] = obstack_alloc (&((struct charmap_t
*) charmap
)->mem_pool
,
3518 sizeof (struct charseq
)
3521 /* This is better than nothing. */
3522 ctype
->mboutdigits
[cnt
]->bytes
[0] = digits
[cnt
];
3523 ctype
->mboutdigits
[cnt
]->nbytes
= 1;
3526 ctype
->wcoutdigits
[cnt
] = L
'0' + cnt
;
3529 ctype
->outdigits_act
= 10;
3534 /* Construction of sparse 3-level tables.
3535 See wchar-lookup.h for their structure and the meaning of p and q. */
3542 /* Working representation. */
3543 size_t level1_alloc
;
3546 size_t level2_alloc
;
3549 size_t level3_alloc
;
3552 /* Compressed representation. */
3557 /* Initialize. Assumes t->p and t->q have already been set. */
3559 wctype_table_init (struct wctype_table
*t
)
3562 t
->level1_alloc
= t
->level1_size
= 0;
3564 t
->level2_alloc
= t
->level2_size
= 0;
3566 t
->level3_alloc
= t
->level3_size
= 0;
3569 /* Retrieve an entry. */
3571 wctype_table_get (struct wctype_table
*t
, uint32_t wc
)
3573 uint32_t index1
= wc
>> (t
->q
+ t
->p
+ 5);
3574 if (index1
< t
->level1_size
)
3576 uint32_t lookup1
= t
->level1
[index1
];
3577 if (lookup1
!= EMPTY
)
3579 uint32_t index2
= ((wc
>> (t
->p
+ 5)) & ((1 << t
->q
) - 1))
3580 + (lookup1
<< t
->q
);
3581 uint32_t lookup2
= t
->level2
[index2
];
3582 if (lookup2
!= EMPTY
)
3584 uint32_t index3
= ((wc
>> 5) & ((1 << t
->p
) - 1))
3585 + (lookup2
<< t
->p
);
3586 uint32_t lookup3
= t
->level3
[index3
];
3587 uint32_t index4
= wc
& 0x1f;
3589 return (lookup3
>> index4
) & 1;
3596 /* Add one entry. */
3598 wctype_table_add (struct wctype_table
*t
, uint32_t wc
)
3600 uint32_t index1
= wc
>> (t
->q
+ t
->p
+ 5);
3601 uint32_t index2
= (wc
>> (t
->p
+ 5)) & ((1 << t
->q
) - 1);
3602 uint32_t index3
= (wc
>> 5) & ((1 << t
->p
) - 1);
3603 uint32_t index4
= wc
& 0x1f;
3606 if (index1
>= t
->level1_size
)
3608 if (index1
>= t
->level1_alloc
)
3610 size_t alloc
= 2 * t
->level1_alloc
;
3611 if (alloc
<= index1
)
3613 t
->level1
= (uint32_t *) xrealloc ((char *) t
->level1
,
3614 alloc
* sizeof (uint32_t));
3615 t
->level1_alloc
= alloc
;
3617 while (index1
>= t
->level1_size
)
3618 t
->level1
[t
->level1_size
++] = EMPTY
;
3621 if (t
->level1
[index1
] == EMPTY
)
3623 if (t
->level2_size
== t
->level2_alloc
)
3625 size_t alloc
= 2 * t
->level2_alloc
+ 1;
3626 t
->level2
= (uint32_t *) xrealloc ((char *) t
->level2
,
3627 (alloc
<< t
->q
) * sizeof (uint32_t));
3628 t
->level2_alloc
= alloc
;
3630 i1
= t
->level2_size
<< t
->q
;
3631 i2
= (t
->level2_size
+ 1) << t
->q
;
3632 for (i
= i1
; i
< i2
; i
++)
3633 t
->level2
[i
] = EMPTY
;
3634 t
->level1
[index1
] = t
->level2_size
++;
3637 index2
+= t
->level1
[index1
] << t
->q
;
3639 if (t
->level2
[index2
] == EMPTY
)
3641 if (t
->level3_size
== t
->level3_alloc
)
3643 size_t alloc
= 2 * t
->level3_alloc
+ 1;
3644 t
->level3
= (uint32_t *) xrealloc ((char *) t
->level3
,
3645 (alloc
<< t
->p
) * sizeof (uint32_t));
3646 t
->level3_alloc
= alloc
;
3648 i1
= t
->level3_size
<< t
->p
;
3649 i2
= (t
->level3_size
+ 1) << t
->p
;
3650 for (i
= i1
; i
< i2
; i
++)
3652 t
->level2
[index2
] = t
->level3_size
++;
3655 index3
+= t
->level2
[index2
] << t
->p
;
3657 t
->level3
[index3
] |= (uint32_t)1 << index4
;
3660 /* Finalize and shrink. */
3662 wctype_table_finalize (struct wctype_table
*t
)
3665 uint32_t reorder3
[t
->level3_size
];
3666 uint32_t reorder2
[t
->level2_size
];
3667 uint32_t level1_offset
, level2_offset
, level3_offset
;
3669 /* Uniquify level3 blocks. */
3671 for (j
= 0; j
< t
->level3_size
; j
++)
3673 for (i
= 0; i
< k
; i
++)
3674 if (memcmp (&t
->level3
[i
<< t
->p
], &t
->level3
[j
<< t
->p
],
3675 (1 << t
->p
) * sizeof (uint32_t)) == 0)
3677 /* Relocate block j to block i. */
3682 memcpy (&t
->level3
[i
<< t
->p
], &t
->level3
[j
<< t
->p
],
3683 (1 << t
->p
) * sizeof (uint32_t));
3689 for (i
= 0; i
< (t
->level2_size
<< t
->q
); i
++)
3690 if (t
->level2
[i
] != EMPTY
)
3691 t
->level2
[i
] = reorder3
[t
->level2
[i
]];
3693 /* Uniquify level2 blocks. */
3695 for (j
= 0; j
< t
->level2_size
; j
++)
3697 for (i
= 0; i
< k
; i
++)
3698 if (memcmp (&t
->level2
[i
<< t
->q
], &t
->level2
[j
<< t
->q
],
3699 (1 << t
->q
) * sizeof (uint32_t)) == 0)
3701 /* Relocate block j to block i. */
3706 memcpy (&t
->level2
[i
<< t
->q
], &t
->level2
[j
<< t
->q
],
3707 (1 << t
->q
) * sizeof (uint32_t));
3713 for (i
= 0; i
< t
->level1_size
; i
++)
3714 if (t
->level1
[i
] != EMPTY
)
3715 t
->level1
[i
] = reorder2
[t
->level1
[i
]];
3717 /* Create and fill the resulting compressed representation. */
3719 5 * sizeof (uint32_t)
3720 + t
->level1_size
* sizeof (uint32_t)
3721 + (t
->level2_size
<< t
->q
) * sizeof (uint32_t)
3722 + (t
->level3_size
<< t
->p
) * sizeof (uint32_t);
3723 t
->result
= (char *) xmalloc (t
->result_size
);
3726 5 * sizeof (uint32_t);
3728 5 * sizeof (uint32_t)
3729 + t
->level1_size
* sizeof (uint32_t);
3731 5 * sizeof (uint32_t)
3732 + t
->level1_size
* sizeof (uint32_t)
3733 + (t
->level2_size
<< t
->q
) * sizeof (uint32_t);
3735 ((uint32_t *) t
->result
)[0] = t
->q
+ t
->p
+ 5;
3736 ((uint32_t *) t
->result
)[1] = t
->level1_size
;
3737 ((uint32_t *) t
->result
)[2] = t
->p
+ 5;
3738 ((uint32_t *) t
->result
)[3] = (1 << t
->q
) - 1;
3739 ((uint32_t *) t
->result
)[4] = (1 << t
->p
) - 1;
3741 for (i
= 0; i
< t
->level1_size
; i
++)
3742 ((uint32_t *) (t
->result
+ level1_offset
))[i
] =
3743 (t
->level1
[i
] == EMPTY
3745 : (t
->level1
[i
] << t
->q
) * sizeof (uint32_t) + level2_offset
);
3747 for (i
= 0; i
< (t
->level2_size
<< t
->q
); i
++)
3748 ((uint32_t *) (t
->result
+ level2_offset
))[i
] =
3749 (t
->level2
[i
] == EMPTY
3751 : (t
->level2
[i
] << t
->p
) * sizeof (uint32_t) + level3_offset
);
3753 for (i
= 0; i
< (t
->level3_size
<< t
->p
); i
++)
3754 ((uint32_t *) (t
->result
+ level3_offset
))[i
] = t
->level3
[i
];
3756 if (t
->level1_alloc
> 0)
3758 if (t
->level2_alloc
> 0)
3760 if (t
->level3_alloc
> 0)
3764 #define TABLE wcwidth_table
3765 #define ELEMENT uint8_t
3766 #define DEFAULT 0xff
3769 #define TABLE wctrans_table
3770 #define ELEMENT int32_t
3772 #define wctrans_table_add wctrans_table_add_internal
3774 #undef wctrans_table_add
3775 /* The wctrans_table must actually store the difference between the
3776 desired result and the argument. */
3778 wctrans_table_add (struct wctrans_table
*t
, uint32_t wc
, uint32_t mapped_wc
)
3780 wctrans_table_add_internal (t
, wc
, mapped_wc
- wc
);
3784 /* Flattens the included transliterations into a translit list.
3785 Inserts them in the list at `cursor', and returns the new cursor. */
3786 static struct translit_t
**
3787 translit_flatten (struct locale_ctype_t
*ctype
,
3788 const struct charmap_t
*charmap
,
3789 struct translit_t
**cursor
)
3791 while (ctype
->translit_include
!= NULL
)
3793 const char *copy_locale
= ctype
->translit_include
->copy_locale
;
3794 const char *copy_repertoire
= ctype
->translit_include
->copy_repertoire
;
3795 struct localedef_t
*other
;
3797 /* Unchain the include statement. During the depth-first traversal
3798 we don't want to visit any locale more than once. */
3799 ctype
->translit_include
= ctype
->translit_include
->next
;
3801 other
= find_locale (LC_CTYPE
, copy_locale
, copy_repertoire
, charmap
);
3803 if (other
== NULL
|| other
->categories
[LC_CTYPE
].ctype
== NULL
)
3805 WITH_CUR_LOCALE (error (0, 0, _("\
3806 %s: transliteration data from locale `%s' not available"),
3807 "LC_CTYPE", copy_locale
));
3811 struct locale_ctype_t
*other_ctype
=
3812 other
->categories
[LC_CTYPE
].ctype
;
3814 cursor
= translit_flatten (other_ctype
, charmap
, cursor
);
3815 assert (other_ctype
->translit_include
== NULL
);
3817 if (other_ctype
->translit
!= NULL
)
3819 /* Insert the other_ctype->translit list at *cursor. */
3820 struct translit_t
*endp
= other_ctype
->translit
;
3821 while (endp
->next
!= NULL
)
3824 endp
->next
= *cursor
;
3825 *cursor
= other_ctype
->translit
;
3827 /* Avoid any risk of circular lists. */
3828 other_ctype
->translit
= NULL
;
3830 cursor
= &endp
->next
;
3833 if (ctype
->default_missing
== NULL
)
3834 ctype
->default_missing
= other_ctype
->default_missing
;
3842 allocate_arrays (struct locale_ctype_t
*ctype
, const struct charmap_t
*charmap
,
3843 struct repertoire_t
*repertoire
)
3851 /* You wonder about this amount of memory? This is only because some
3852 users do not manage to address the array with unsigned values or
3853 data types with range >= 256. '\200' would result in the array
3854 index -128. To help these poor people we duplicate the entries for
3855 128 up to 255 below the entry for \0. */
3856 ctype
->ctype_b
= (char_class_t
*) xcalloc (256 + 128, sizeof (char_class_t
));
3857 ctype
->ctype32_b
= (char_class32_t
*) xcalloc (256, sizeof (char_class32_t
));
3858 ctype
->class_b
= (uint32_t **)
3859 xmalloc (ctype
->nr_charclass
* sizeof (uint32_t *));
3860 ctype
->class_3level
= (struct iovec
*)
3861 xmalloc (ctype
->nr_charclass
* sizeof (struct iovec
));
3863 /* This is the array accessed using the multibyte string elements. */
3864 for (idx
= 0; idx
< 256; ++idx
)
3865 ctype
->ctype_b
[128 + idx
] = ctype
->class256_collection
[idx
];
3867 /* Mirror first 127 entries. We must take care that entry -1 is not
3868 mirrored because EOF == -1. */
3869 for (idx
= 0; idx
< 127; ++idx
)
3870 ctype
->ctype_b
[idx
] = ctype
->ctype_b
[256 + idx
];
3872 /* The 32 bit array contains all characters < 0x100. */
3873 for (idx
= 0; idx
< ctype
->class_collection_act
; ++idx
)
3874 if (ctype
->charnames
[idx
] < 0x100)
3875 ctype
->ctype32_b
[ctype
->charnames
[idx
]] = ctype
->class_collection
[idx
];
3877 for (nr
= 0; nr
< ctype
->nr_charclass
; nr
++)
3879 ctype
->class_b
[nr
] = (uint32_t *) xcalloc (256 / 32, sizeof (uint32_t));
3881 /* We only set CLASS_B for the bits in the ISO C classes, not
3882 the user defined classes. The number should not change but
3884 #define LAST_ISO_C_BIT 11
3885 if (nr
<= LAST_ISO_C_BIT
)
3886 for (idx
= 0; idx
< 256; ++idx
)
3887 if (ctype
->class256_collection
[idx
] & _ISbit (nr
))
3888 ctype
->class_b
[nr
][idx
>> 5] |= (uint32_t) 1 << (idx
& 0x1f);
3891 for (nr
= 0; nr
< ctype
->nr_charclass
; nr
++)
3893 struct wctype_table t
;
3895 t
.p
= 4; /* or: 5 */
3896 t
.q
= 7; /* or: 6 */
3897 wctype_table_init (&t
);
3899 for (idx
= 0; idx
< ctype
->class_collection_act
; ++idx
)
3900 if (ctype
->class_collection
[idx
] & _ISwbit (nr
))
3901 wctype_table_add (&t
, ctype
->charnames
[idx
]);
3903 wctype_table_finalize (&t
);
3906 WITH_CUR_LOCALE (fprintf (stderr
, _("\
3907 %s: table for class \"%s\": %lu bytes\n"),
3908 "LC_CTYPE", ctype
->classnames
[nr
],
3909 (unsigned long int) t
.result_size
));
3911 ctype
->class_3level
[nr
].iov_base
= t
.result
;
3912 ctype
->class_3level
[nr
].iov_len
= t
.result_size
;
3915 /* Room for table of mappings. */
3916 ctype
->map_b
= (uint32_t **) xmalloc (2 * sizeof (uint32_t *));
3917 ctype
->map32_b
= (uint32_t **) xmalloc (ctype
->map_collection_nr
3918 * sizeof (uint32_t *));
3919 ctype
->map_3level
= (struct iovec
*)
3920 xmalloc (ctype
->map_collection_nr
* sizeof (struct iovec
));
3922 /* Fill in all mappings. */
3923 for (idx
= 0; idx
< 2; ++idx
)
3927 /* Allocate table. */
3928 ctype
->map_b
[idx
] = (uint32_t *)
3929 xmalloc ((256 + 128) * sizeof (uint32_t));
3931 /* Copy values from collection. */
3932 for (idx2
= 0; idx2
< 256; ++idx2
)
3933 ctype
->map_b
[idx
][128 + idx2
] = ctype
->map256_collection
[idx
][idx2
];
3935 /* Mirror first 127 entries. We must take care not to map entry
3936 -1 because EOF == -1. */
3937 for (idx2
= 0; idx2
< 127; ++idx2
)
3938 ctype
->map_b
[idx
][idx2
] = ctype
->map_b
[idx
][256 + idx2
];
3940 /* EOF must map to EOF. */
3941 ctype
->map_b
[idx
][127] = EOF
;
3944 for (idx
= 0; idx
< ctype
->map_collection_nr
; ++idx
)
3948 /* Allocate table. */
3949 ctype
->map32_b
[idx
] = (uint32_t *) xmalloc (256 * sizeof (uint32_t));
3951 /* Copy values from collection. Default is identity mapping. */
3952 for (idx2
= 0; idx2
< 256; ++idx2
)
3953 ctype
->map32_b
[idx
][idx2
] =
3954 (ctype
->map_collection
[idx
][idx2
] != 0
3955 ? ctype
->map_collection
[idx
][idx2
]
3959 for (nr
= 0; nr
< ctype
->map_collection_nr
; nr
++)
3961 struct wctrans_table t
;
3965 wctrans_table_init (&t
);
3967 for (idx
= 0; idx
< ctype
->map_collection_act
[nr
]; ++idx
)
3968 if (ctype
->map_collection
[nr
][idx
] != 0)
3969 wctrans_table_add (&t
, ctype
->charnames
[idx
],
3970 ctype
->map_collection
[nr
][idx
]);
3972 wctrans_table_finalize (&t
);
3975 WITH_CUR_LOCALE (fprintf (stderr
, _("\
3976 %s: table for map \"%s\": %lu bytes\n"),
3977 "LC_CTYPE", ctype
->mapnames
[nr
],
3978 (unsigned long int) t
.result_size
));
3980 ctype
->map_3level
[nr
].iov_base
= t
.result
;
3981 ctype
->map_3level
[nr
].iov_len
= t
.result_size
;
3984 /* Extra array for class and map names. */
3985 ctype
->class_name_ptr
= (uint32_t *) xmalloc (ctype
->nr_charclass
3986 * sizeof (uint32_t));
3987 ctype
->map_name_ptr
= (uint32_t *) xmalloc (ctype
->map_collection_nr
3988 * sizeof (uint32_t));
3990 ctype
->class_offset
= _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1
);
3991 ctype
->map_offset
= ctype
->class_offset
+ ctype
->nr_charclass
;
3993 /* Array for width information. Because the expected widths are very
3994 small (never larger than 2) we use only one single byte. This
3996 We put only printable characters in the table. wcwidth is specified
3997 to return -1 for non-printable characters. Doing the check here
3998 saves a run-time check.
3999 But we put L'\0' in the table. This again saves a run-time check. */
4001 struct wcwidth_table t
;
4005 wcwidth_table_init (&t
);
4007 /* First set all the printable characters of the character set to
4008 the default width. */
4010 while (iterate_table (&charmap
->char_table
, &curs
, &key
, &len
, &vdata
) == 0)
4012 struct charseq
*data
= (struct charseq
*) vdata
;
4014 if (data
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
4015 data
->ucs4
= repertoire_find_value (ctype
->repertoire
,
4018 if (data
->ucs4
!= ILLEGAL_CHAR_VALUE
)
4020 uint32_t *class_bits
=
4021 find_idx (ctype
, &ctype
->class_collection
, NULL
,
4022 &ctype
->class_collection_act
, data
->ucs4
);
4024 if (class_bits
!= NULL
&& (*class_bits
& BITw (tok_print
)))
4025 wcwidth_table_add (&t
, data
->ucs4
, charmap
->width_default
);
4029 /* Now add the explicitly specified widths. */
4030 if (charmap
->width_rules
!= NULL
)
4034 for (cnt
= 0; cnt
< charmap
->nwidth_rules
; ++cnt
)
4036 unsigned char bytes
[charmap
->mb_cur_max
];
4037 int nbytes
= charmap
->width_rules
[cnt
].from
->nbytes
;
4039 /* We have the range of character for which the width is
4040 specified described using byte sequences of the multibyte
4041 charset. We have to convert this to UCS4 now. And we
4042 cannot simply convert the beginning and the end of the
4043 sequence, we have to iterate over the byte sequence and
4044 convert it for every single character. */
4045 memcpy (bytes
, charmap
->width_rules
[cnt
].from
->bytes
, nbytes
);
4047 while (nbytes
< charmap
->width_rules
[cnt
].to
->nbytes
4048 || memcmp (bytes
, charmap
->width_rules
[cnt
].to
->bytes
,
4051 /* Find the UCS value for `bytes'. */
4054 struct charseq
*seq
=
4055 charmap_find_symbol (charmap
, (char *) bytes
, nbytes
);
4058 wch
= ILLEGAL_CHAR_VALUE
;
4059 else if (seq
->ucs4
!= UNINITIALIZED_CHAR_VALUE
)
4062 wch
= repertoire_find_value (ctype
->repertoire
, seq
->name
,
4063 strlen (seq
->name
));
4065 if (wch
!= ILLEGAL_CHAR_VALUE
)
4067 /* Store the value. */
4068 uint32_t *class_bits
=
4069 find_idx (ctype
, &ctype
->class_collection
, NULL
,
4070 &ctype
->class_collection_act
, wch
);
4072 if (class_bits
!= NULL
&& (*class_bits
& BITw (tok_print
)))
4073 wcwidth_table_add (&t
, wch
,
4074 charmap
->width_rules
[cnt
].width
);
4077 /* "Increment" the bytes sequence. */
4079 while (inner
>= 0 && bytes
[inner
] == 0xff)
4084 /* We have to extend the byte sequence. */
4085 if (nbytes
>= charmap
->width_rules
[cnt
].to
->nbytes
)
4089 memset (&bytes
[1], 0, nbytes
);
4095 while (++inner
< nbytes
)
4102 /* Set the width of L'\0' to 0. */
4103 wcwidth_table_add (&t
, 0, 0);
4105 wcwidth_table_finalize (&t
);
4108 WITH_CUR_LOCALE (fprintf (stderr
, _("%s: table for width: %lu bytes\n"),
4109 "LC_CTYPE", (unsigned long int) t
.result_size
));
4111 ctype
->width
.iov_base
= t
.result
;
4112 ctype
->width
.iov_len
= t
.result_size
;
4115 /* Set MB_CUR_MAX. */
4116 ctype
->mb_cur_max
= charmap
->mb_cur_max
;
4118 /* Now determine the table for the transliteration information.
4120 XXX It is not yet clear to me whether it is worth implementing a
4121 complicated algorithm which uses a hash table to locate the entries.
4122 For now I'll use a simple array which can be searching using binary
4124 if (ctype
->translit_include
!= NULL
)
4125 /* Traverse the locales mentioned in the `include' statements in a
4126 depth-first way and fold in their transliteration information. */
4127 translit_flatten (ctype
, charmap
, &ctype
->translit
);
4129 if (ctype
->translit
!= NULL
)
4131 /* First count how many entries we have. This is the upper limit
4132 since some entries from the included files might be overwritten. */
4135 struct translit_t
*runp
= ctype
->translit
;
4136 struct translit_t
**sorted
;
4137 size_t from_len
, to_len
;
4139 while (runp
!= NULL
)
4145 /* Next we allocate an array large enough and fill in the values. */
4146 sorted
= (struct translit_t
**) alloca (number
4147 * sizeof (struct translit_t
**));
4148 runp
= ctype
->translit
;
4152 /* Search for the place where to insert this string.
4153 XXX Better use a real sorting algorithm later. */
4157 while (idx
< number
)
4159 int res
= wcscmp ((const wchar_t *) sorted
[idx
]->from
,
4160 (const wchar_t *) runp
->from
);
4175 memmove (&sorted
[idx
+ 1], &sorted
[idx
],
4176 (number
- idx
) * sizeof (struct translit_t
*));
4183 while (runp
!= NULL
);
4185 /* The next step is putting all the possible transliteration
4186 strings in one memory block so that we can write it out.
4187 We need several different blocks:
4188 - index to the from-string array
4190 - index to the to-string array
4193 from_len
= to_len
= 0;
4194 for (cnt
= 0; cnt
< number
; ++cnt
)
4196 struct translit_to_t
*srunp
;
4197 from_len
+= wcslen ((const wchar_t *) sorted
[cnt
]->from
) + 1;
4198 srunp
= sorted
[cnt
]->to
;
4199 while (srunp
!= NULL
)
4201 to_len
+= wcslen ((const wchar_t *) srunp
->str
) + 1;
4202 srunp
= srunp
->next
;
4204 /* Plus one for the extra NUL character marking the end of
4205 the list for the current entry. */
4209 /* We can allocate the arrays for the results. */
4210 ctype
->translit_from_idx
= xmalloc (number
* sizeof (uint32_t));
4211 ctype
->translit_from_tbl
= xmalloc (from_len
* sizeof (uint32_t));
4212 ctype
->translit_to_idx
= xmalloc (number
* sizeof (uint32_t));
4213 ctype
->translit_to_tbl
= xmalloc (to_len
* sizeof (uint32_t));
4217 for (cnt
= 0; cnt
< number
; ++cnt
)
4220 struct translit_to_t
*srunp
;
4222 ctype
->translit_from_idx
[cnt
] = from_len
;
4223 ctype
->translit_to_idx
[cnt
] = to_len
;
4225 len
= wcslen ((const wchar_t *) sorted
[cnt
]->from
) + 1;
4226 wmemcpy ((wchar_t *) &ctype
->translit_from_tbl
[from_len
],
4227 (const wchar_t *) sorted
[cnt
]->from
, len
);
4230 ctype
->translit_to_idx
[cnt
] = to_len
;
4231 srunp
= sorted
[cnt
]->to
;
4232 while (srunp
!= NULL
)
4234 len
= wcslen ((const wchar_t *) srunp
->str
) + 1;
4235 wmemcpy ((wchar_t *) &ctype
->translit_to_tbl
[to_len
],
4236 (const wchar_t *) srunp
->str
, len
);
4238 srunp
= srunp
->next
;
4240 ctype
->translit_to_tbl
[to_len
++] = L
'\0';
4243 /* Store the information about the length. */
4244 ctype
->translit_idx_size
= number
;
4245 ctype
->translit_from_tbl_size
= from_len
* sizeof (uint32_t);
4246 ctype
->translit_to_tbl_size
= to_len
* sizeof (uint32_t);
4250 /* Provide some dummy pointers since we have nothing to write out. */
4251 static uint32_t no_str
= { 0 };
4253 ctype
->translit_from_idx
= &no_str
;
4254 ctype
->translit_from_tbl
= &no_str
;
4255 ctype
->translit_to_tbl
= &no_str
;
4256 ctype
->translit_idx_size
= 0;
4257 ctype
->translit_from_tbl_size
= 0;
4258 ctype
->translit_to_tbl_size
= 0;