1 /* Copyright (C) 1995-2002, 2003, 2004 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, write to the Free
17 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
36 #include "localedef.h"
38 #include "localeinfo.h"
40 #include "linereader.h"
41 #include "locfile-token.h"
47 #ifdef PREDEFINED_CLASSES
48 /* These are the extra bits not in wctype.h since these are not preallocated
50 # define _ISwspecial1 (1 << 29)
51 # define _ISwspecial2 (1 << 30)
52 # define _ISwspecial3 (1 << 31)
56 /* The bit used for representing a special class. */
57 #define BITPOS(class) ((class) - tok_upper)
58 #define BIT(class) (_ISbit (BITPOS (class)))
59 #define BITw(class) (_ISwbit (BITPOS (class)))
61 #define ELEM(ctype, collection, idx, value) \
62 *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \
63 &ctype->collection##_act idx, value)
66 /* To be compatible with former implementations we for now restrict
67 the number of bits for character classes to 16. When compatibility
68 is not necessary anymore increase the number to 32. */
69 #define char_class_t uint16_t
70 #define char_class32_t uint32_t
73 /* Type to describe a transliteration action. We have a possibly
74 multiple character from-string and a set of multiple character
75 to-strings. All are 32bit values since this is what is used in
76 the gconv functions. */
81 struct translit_to_t
*next
;
91 struct translit_to_t
*to
;
93 struct translit_t
*next
;
96 struct translit_ignore_t
105 struct translit_ignore_t
*next
;
109 /* Type to describe a transliteration include statement. */
110 struct translit_include_t
112 const char *copy_locale
;
113 const char *copy_repertoire
;
115 struct translit_include_t
*next
;
119 /* Sparse table of uint32_t. */
120 #define TABLE idx_table
121 #define ELEMENT uint32_t
122 #define DEFAULT ((uint32_t) ~0)
127 /* The real definition of the struct for the LC_CTYPE locale. */
128 struct locale_ctype_t
131 size_t charnames_max
;
132 size_t charnames_act
;
133 /* An index lookup table, to speedup find_idx. */
134 struct idx_table charnames_idx
;
136 struct repertoire_t
*repertoire
;
138 /* We will allow up to 8 * sizeof (uint32_t) character classes. */
139 #define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
141 const char *classnames
[MAX_NR_CHARCLASS
];
142 uint32_t last_class_char
;
143 uint32_t class256_collection
[256];
144 uint32_t *class_collection
;
145 size_t class_collection_max
;
146 size_t class_collection_act
;
148 uint32_t class_offset
;
150 struct charseq
**mbdigits
;
157 struct charseq
*mboutdigits
[10];
158 uint32_t wcoutdigits
[10];
159 size_t outdigits_act
;
161 /* If the following number ever turns out to be too small simply
162 increase it. But I doubt it will. --drepper@gnu */
163 #define MAX_NR_CHARMAP 16
164 const char *mapnames
[MAX_NR_CHARMAP
];
165 uint32_t *map_collection
[MAX_NR_CHARMAP
];
166 uint32_t map256_collection
[2][256];
167 size_t map_collection_max
[MAX_NR_CHARMAP
];
168 size_t map_collection_act
[MAX_NR_CHARMAP
];
169 size_t map_collection_nr
;
171 int tomap_done
[MAX_NR_CHARMAP
];
174 /* Transliteration information. */
175 struct translit_include_t
*translit_include
;
176 struct translit_t
*translit
;
177 struct translit_ignore_t
*translit_ignore
;
178 uint32_t ntranslit_ignore
;
180 uint32_t *default_missing
;
181 const char *default_missing_file
;
182 size_t default_missing_lineno
;
184 uint32_t to_nonascii
;
186 /* The arrays for the binary representation. */
187 char_class_t
*ctype_b
;
188 char_class32_t
*ctype32_b
;
192 struct iovec
*class_3level
;
193 struct iovec
*map_3level
;
194 uint32_t *class_name_ptr
;
195 uint32_t *map_name_ptr
;
198 const char *codeset_name
;
199 uint32_t *translit_from_idx
;
200 uint32_t *translit_from_tbl
;
201 uint32_t *translit_to_idx
;
202 uint32_t *translit_to_tbl
;
203 uint32_t translit_idx_size
;
204 size_t translit_from_tbl_size
;
205 size_t translit_to_tbl_size
;
207 struct obstack mempool
;
211 /* Marker for an empty slot. This has the value 0xFFFFFFFF, regardless
212 whether 'int' is 16 bit, 32 bit, or 64 bit. */
213 #define EMPTY ((uint32_t) ~0)
216 #define obstack_chunk_alloc xmalloc
217 #define obstack_chunk_free free
220 /* Prototypes for local functions. */
221 static void ctype_startup (struct linereader
*lr
, struct localedef_t
*locale
,
222 const struct charmap_t
*charmap
,
223 struct localedef_t
*copy_locale
,
225 static void ctype_class_new (struct linereader
*lr
,
226 struct locale_ctype_t
*ctype
, const char *name
);
227 static void ctype_map_new (struct linereader
*lr
,
228 struct locale_ctype_t
*ctype
,
229 const char *name
, const struct charmap_t
*charmap
);
230 static uint32_t *find_idx (struct locale_ctype_t
*ctype
, uint32_t **table
,
231 size_t *max
, size_t *act
, unsigned int idx
);
232 static void set_class_defaults (struct locale_ctype_t
*ctype
,
233 const struct charmap_t
*charmap
,
234 struct repertoire_t
*repertoire
);
235 static void allocate_arrays (struct locale_ctype_t
*ctype
,
236 const struct charmap_t
*charmap
,
237 struct repertoire_t
*repertoire
);
240 static const char *longnames
[] =
242 "zero", "one", "two", "three", "four",
243 "five", "six", "seven", "eight", "nine"
245 static const char *uninames
[] =
247 "U00000030", "U00000031", "U00000032", "U00000033", "U00000034",
248 "U00000035", "U00000036", "U00000037", "U00000038", "U00000039"
250 static const unsigned char digits
[] = "0123456789";
254 ctype_startup (struct linereader
*lr
, struct localedef_t
*locale
,
255 const struct charmap_t
*charmap
,
256 struct localedef_t
*copy_locale
, int ignore_content
)
259 struct locale_ctype_t
*ctype
;
261 if (!ignore_content
&& locale
->categories
[LC_CTYPE
].ctype
== NULL
)
263 if (copy_locale
== NULL
)
265 /* Allocate the needed room. */
266 locale
->categories
[LC_CTYPE
].ctype
= ctype
=
267 (struct locale_ctype_t
*) xcalloc (1,
268 sizeof (struct locale_ctype_t
));
270 /* We have seen no names yet. */
271 ctype
->charnames_max
= charmap
->mb_cur_max
== 1 ? 256 : 512;
273 (unsigned int *) xmalloc (ctype
->charnames_max
274 * sizeof (unsigned int));
275 for (cnt
= 0; cnt
< 256; ++cnt
)
276 ctype
->charnames
[cnt
] = cnt
;
277 ctype
->charnames_act
= 256;
278 idx_table_init (&ctype
->charnames_idx
);
280 /* Fill character class information. */
281 ctype
->last_class_char
= ILLEGAL_CHAR_VALUE
;
282 /* The order of the following instructions determines the bit
284 ctype_class_new (lr
, ctype
, "upper");
285 ctype_class_new (lr
, ctype
, "lower");
286 ctype_class_new (lr
, ctype
, "alpha");
287 ctype_class_new (lr
, ctype
, "digit");
288 ctype_class_new (lr
, ctype
, "xdigit");
289 ctype_class_new (lr
, ctype
, "space");
290 ctype_class_new (lr
, ctype
, "print");
291 ctype_class_new (lr
, ctype
, "graph");
292 ctype_class_new (lr
, ctype
, "blank");
293 ctype_class_new (lr
, ctype
, "cntrl");
294 ctype_class_new (lr
, ctype
, "punct");
295 ctype_class_new (lr
, ctype
, "alnum");
296 #ifdef PREDEFINED_CLASSES
297 /* The following are extensions from ISO 14652. */
298 ctype_class_new (lr
, ctype
, "left_to_right");
299 ctype_class_new (lr
, ctype
, "right_to_left");
300 ctype_class_new (lr
, ctype
, "num_terminator");
301 ctype_class_new (lr
, ctype
, "num_separator");
302 ctype_class_new (lr
, ctype
, "segment_separator");
303 ctype_class_new (lr
, ctype
, "block_separator");
304 ctype_class_new (lr
, ctype
, "direction_control");
305 ctype_class_new (lr
, ctype
, "sym_swap_layout");
306 ctype_class_new (lr
, ctype
, "char_shape_selector");
307 ctype_class_new (lr
, ctype
, "num_shape_selector");
308 ctype_class_new (lr
, ctype
, "non_spacing");
309 ctype_class_new (lr
, ctype
, "non_spacing_level3");
310 ctype_class_new (lr
, ctype
, "normal_connect");
311 ctype_class_new (lr
, ctype
, "r_connect");
312 ctype_class_new (lr
, ctype
, "no_connect");
313 ctype_class_new (lr
, ctype
, "no_connect-space");
314 ctype_class_new (lr
, ctype
, "vowel_connect");
317 ctype
->class_collection_max
= charmap
->mb_cur_max
== 1 ? 256 : 512;
318 ctype
->class_collection
319 = (uint32_t *) xcalloc (sizeof (unsigned long int),
320 ctype
->class_collection_max
);
321 ctype
->class_collection_act
= 256;
323 /* Fill character map information. */
324 ctype
->last_map_idx
= MAX_NR_CHARMAP
;
325 ctype_map_new (lr
, ctype
, "toupper", charmap
);
326 ctype_map_new (lr
, ctype
, "tolower", charmap
);
327 #ifdef PREDEFINED_CLASSES
328 ctype_map_new (lr
, ctype
, "tosymmetric", charmap
);
331 /* Fill first 256 entries in `toXXX' arrays. */
332 for (cnt
= 0; cnt
< 256; ++cnt
)
334 ctype
->map_collection
[0][cnt
] = cnt
;
335 ctype
->map_collection
[1][cnt
] = cnt
;
336 #ifdef PREDEFINED_CLASSES
337 ctype
->map_collection
[2][cnt
] = cnt
;
339 ctype
->map256_collection
[0][cnt
] = cnt
;
340 ctype
->map256_collection
[1][cnt
] = cnt
;
343 if (enc_not_ascii_compatible
)
344 ctype
->to_nonascii
= 1;
346 obstack_init (&ctype
->mempool
);
349 ctype
= locale
->categories
[LC_CTYPE
].ctype
=
350 copy_locale
->categories
[LC_CTYPE
].ctype
;
356 ctype_finish (struct localedef_t
*locale
, const struct charmap_t
*charmap
)
358 /* See POSIX.2, table 2-6 for the meaning of the following table. */
363 const char allow
[NCLASS
];
365 valid_table
[NCLASS
] =
367 /* The order is important. See token.h for more information.
368 M = Always, D = Default, - = Permitted, X = Mutually exclusive */
369 { "upper", "--MX-XDDXXX-" },
370 { "lower", "--MX-XDDXXX-" },
371 { "alpha", "---X-XDDXXX-" },
372 { "digit", "XXX--XDDXXX-" },
373 { "xdigit", "-----XDDXXX-" },
374 { "space", "XXXXX------X" },
375 { "print", "---------X--" },
376 { "graph", "---------X--" },
377 { "blank", "XXXXXM-----X" },
378 { "cntrl", "XXXXX-XX--XX" },
379 { "punct", "XXXXX-DD-X-X" },
380 { "alnum", "-----XDDXXX-" }
384 uint32_t space_value
;
385 struct charseq
*space_seq
;
386 struct locale_ctype_t
*ctype
= locale
->categories
[LC_CTYPE
].ctype
;
393 /* Now resolve copying and also handle completely missing definitions. */
396 const char *repertoire_name
;
398 /* First see whether we were supposed to copy. If yes, find the
399 actual definition. */
400 if (locale
->copy_name
[LC_CTYPE
] != NULL
)
402 /* Find the copying locale. This has to happen transitively since
403 the locale we are copying from might also copying another one. */
404 struct localedef_t
*from
= locale
;
407 from
= find_locale (LC_CTYPE
, from
->copy_name
[LC_CTYPE
],
408 from
->repertoire_name
, charmap
);
409 while (from
->categories
[LC_CTYPE
].ctype
== NULL
410 && from
->copy_name
[LC_CTYPE
] != NULL
);
412 ctype
= locale
->categories
[LC_CTYPE
].ctype
413 = from
->categories
[LC_CTYPE
].ctype
;
416 /* If there is still no definition issue an warning and create an
421 WITH_CUR_LOCALE (error (0, 0, _("\
422 No definition for %s category found"), "LC_CTYPE"));
423 ctype_startup (NULL
, locale
, charmap
, NULL
, 0);
424 ctype
= locale
->categories
[LC_CTYPE
].ctype
;
427 /* Get the repertoire we have to use. */
428 repertoire_name
= locale
->repertoire_name
?: repertoire_global
;
429 if (repertoire_name
!= NULL
)
430 ctype
->repertoire
= repertoire_read (repertoire_name
);
433 /* We need the name of the currently used 8-bit character set to
434 make correct conversion between this 8-bit representation and the
435 ISO 10646 character set used internally for wide characters. */
436 ctype
->codeset_name
= charmap
->code_set_name
;
437 if (ctype
->codeset_name
== NULL
)
440 WITH_CUR_LOCALE (error (0, 0, _("\
441 No character set name specified in charmap")));
442 ctype
->codeset_name
= "//UNKNOWN//";
445 /* Set default value for classes not specified. */
446 set_class_defaults (ctype
, charmap
, ctype
->repertoire
);
448 /* Check according to table. */
449 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
451 uint32_t tmp
= ctype
->class_collection
[cnt
];
455 for (cls1
= 0; cls1
< NCLASS
; ++cls1
)
456 if ((tmp
& _ISwbit (cls1
)) != 0)
457 for (cls2
= 0; cls2
< NCLASS
; ++cls2
)
458 if (valid_table
[cls1
].allow
[cls2
] != '-')
460 int eq
= (tmp
& _ISwbit (cls2
)) != 0;
461 switch (valid_table
[cls1
].allow
[cls2
])
466 uint32_t value
= ctype
->charnames
[cnt
];
469 WITH_CUR_LOCALE (error (0, 0, _("\
470 character L'\\u%0*x' in class `%s' must be in class `%s'"),
471 value
> 0xffff ? 8 : 4,
473 valid_table
[cls1
].name
,
474 valid_table
[cls2
].name
));
481 uint32_t value
= ctype
->charnames
[cnt
];
484 WITH_CUR_LOCALE (error (0, 0, _("\
485 character L'\\u%0*x' in class `%s' must not be in class `%s'"),
486 value
> 0xffff ? 8 : 4,
488 valid_table
[cls1
].name
,
489 valid_table
[cls2
].name
));
494 ctype
->class_collection
[cnt
] |= _ISwbit (cls2
);
498 WITH_CUR_LOCALE (error (5, 0, _("\
499 internal error in %s, line %u"), __FUNCTION__
, __LINE__
));
505 for (cnt
= 0; cnt
< 256; ++cnt
)
507 uint32_t tmp
= ctype
->class256_collection
[cnt
];
511 for (cls1
= 0; cls1
< NCLASS
; ++cls1
)
512 if ((tmp
& _ISbit (cls1
)) != 0)
513 for (cls2
= 0; cls2
< NCLASS
; ++cls2
)
514 if (valid_table
[cls1
].allow
[cls2
] != '-')
516 int eq
= (tmp
& _ISbit (cls2
)) != 0;
517 switch (valid_table
[cls1
].allow
[cls2
])
524 snprintf (buf
, sizeof buf
, "\\%Zo", cnt
);
527 WITH_CUR_LOCALE (error (0, 0, _("\
528 character '%s' in class `%s' must be in class `%s'"),
530 valid_table
[cls1
].name
,
531 valid_table
[cls2
].name
));
540 snprintf (buf
, sizeof buf
, "\\%Zo", cnt
);
543 WITH_CUR_LOCALE (error (0, 0, _("\
544 character '%s' in class `%s' must not be in class `%s'"),
546 valid_table
[cls1
].name
,
547 valid_table
[cls2
].name
));
552 ctype
->class256_collection
[cnt
] |= _ISbit (cls2
);
556 WITH_CUR_LOCALE (error (5, 0, _("\
557 internal error in %s, line %u"), __FUNCTION__
, __LINE__
));
563 /* ... and now test <SP> as a special case. */
565 if (((cnt
= BITPOS (tok_space
),
566 (ELEM (ctype
, class_collection
, , space_value
)
567 & BITw (tok_space
)) == 0)
568 || (cnt
= BITPOS (tok_blank
),
569 (ELEM (ctype
, class_collection
, , space_value
)
570 & BITw (tok_blank
)) == 0)))
573 WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"),
574 valid_table
[cnt
].name
));
576 else if (((cnt
= BITPOS (tok_punct
),
577 (ELEM (ctype
, class_collection
, , space_value
)
578 & BITw (tok_punct
)) != 0)
579 || (cnt
= BITPOS (tok_graph
),
580 (ELEM (ctype
, class_collection
, , space_value
)
585 WITH_CUR_LOCALE (error (0, 0, _("\
586 <SP> character must not be in class `%s'"),
587 valid_table
[cnt
].name
));
590 ELEM (ctype
, class_collection
, , space_value
) |= BITw (tok_print
);
592 space_seq
= charmap_find_value (charmap
, "SP", 2);
593 if (space_seq
== NULL
)
594 space_seq
= charmap_find_value (charmap
, "space", 5);
595 if (space_seq
== NULL
)
596 space_seq
= charmap_find_value (charmap
, "U00000020", 9);
597 if (space_seq
== NULL
|| space_seq
->nbytes
!= 1)
600 WITH_CUR_LOCALE (error (0, 0, _("\
601 character <SP> not defined in character map")));
603 else if (((cnt
= BITPOS (tok_space
),
604 (ctype
->class256_collection
[space_seq
->bytes
[0]]
605 & BIT (tok_space
)) == 0)
606 || (cnt
= BITPOS (tok_blank
),
607 (ctype
->class256_collection
[space_seq
->bytes
[0]]
608 & BIT (tok_blank
)) == 0)))
611 WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"),
612 valid_table
[cnt
].name
));
614 else if (((cnt
= BITPOS (tok_punct
),
615 (ctype
->class256_collection
[space_seq
->bytes
[0]]
616 & BIT (tok_punct
)) != 0)
617 || (cnt
= BITPOS (tok_graph
),
618 (ctype
->class256_collection
[space_seq
->bytes
[0]]
619 & BIT (tok_graph
)) != 0)))
622 WITH_CUR_LOCALE (error (0, 0, _("\
623 <SP> character must not be in class `%s'"),
624 valid_table
[cnt
].name
));
627 ctype
->class256_collection
[space_seq
->bytes
[0]] |= BIT (tok_print
);
629 /* Now that the tests are done make sure the name array contains all
630 characters which are handled in the WIDTH section of the
631 character set definition file. */
632 if (charmap
->width_rules
!= NULL
)
633 for (cnt
= 0; cnt
< charmap
->nwidth_rules
; ++cnt
)
635 unsigned char bytes
[charmap
->mb_cur_max
];
636 int nbytes
= charmap
->width_rules
[cnt
].from
->nbytes
;
638 /* We have the range of character for which the width is
639 specified described using byte sequences of the multibyte
640 charset. We have to convert this to UCS4 now. And we
641 cannot simply convert the beginning and the end of the
642 sequence, we have to iterate over the byte sequence and
643 convert it for every single character. */
644 memcpy (bytes
, charmap
->width_rules
[cnt
].from
->bytes
, nbytes
);
646 while (nbytes
< charmap
->width_rules
[cnt
].to
->nbytes
647 || memcmp (bytes
, charmap
->width_rules
[cnt
].to
->bytes
,
650 /* Find the UCS value for `bytes'. */
653 struct charseq
*seq
= charmap_find_symbol (charmap
, bytes
, nbytes
);
656 wch
= ILLEGAL_CHAR_VALUE
;
657 else if (seq
->ucs4
!= UNINITIALIZED_CHAR_VALUE
)
660 wch
= repertoire_find_value (ctype
->repertoire
, seq
->name
,
663 if (wch
!= ILLEGAL_CHAR_VALUE
)
664 /* We are only interested in the side-effects of the
665 `find_idx' call. It will add appropriate entries in
666 the name array if this is necessary. */
667 (void) find_idx (ctype
, NULL
, NULL
, NULL
, wch
);
669 /* "Increment" the bytes sequence. */
671 while (inner
>= 0 && bytes
[inner
] == 0xff)
676 /* We have to extend the byte sequence. */
677 if (nbytes
>= charmap
->width_rules
[cnt
].to
->nbytes
)
681 memset (&bytes
[1], 0, nbytes
);
687 while (++inner
< nbytes
)
693 /* Now set all the other characters of the character set to the
696 while (iterate_table (&charmap
->char_table
, &curs
, &key
, &len
, &vdata
) == 0)
698 struct charseq
*data
= (struct charseq
*) vdata
;
700 if (data
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
701 data
->ucs4
= repertoire_find_value (ctype
->repertoire
,
704 if (data
->ucs4
!= ILLEGAL_CHAR_VALUE
)
705 (void) find_idx (ctype
, NULL
, NULL
, NULL
, data
->ucs4
);
708 /* There must be a multiple of 10 digits. */
709 if (ctype
->mbdigits_act
% 10 != 0)
711 assert (ctype
->mbdigits_act
== ctype
->wcdigits_act
);
712 ctype
->wcdigits_act
-= ctype
->mbdigits_act
% 10;
713 ctype
->mbdigits_act
-= ctype
->mbdigits_act
% 10;
714 WITH_CUR_LOCALE (error (0, 0, _("\
715 `digit' category has not entries in groups of ten")));
718 /* Check the input digits. There must be a multiple of ten available.
719 In each group it could be that one or the other character is missing.
720 In this case the whole group must be removed. */
722 while (cnt
< ctype
->mbdigits_act
)
725 for (inner
= 0; inner
< 10; ++inner
)
726 if (ctype
->mbdigits
[cnt
+ inner
] == NULL
)
733 /* Remove the group. */
734 memmove (&ctype
->mbdigits
[cnt
], &ctype
->mbdigits
[cnt
+ 10],
735 ((ctype
->wcdigits_act
- cnt
- 10)
736 * sizeof (ctype
->mbdigits
[0])));
737 ctype
->mbdigits_act
-= 10;
741 /* If no input digits are given use the default. */
742 if (ctype
->mbdigits_act
== 0)
744 if (ctype
->mbdigits_max
== 0)
746 ctype
->mbdigits
= obstack_alloc (&((struct charmap_t
*) charmap
)->mem_pool
,
747 10 * sizeof (struct charseq
*));
748 ctype
->mbdigits_max
= 10;
751 for (cnt
= 0; cnt
< 10; ++cnt
)
753 ctype
->mbdigits
[cnt
] = charmap_find_symbol (charmap
,
755 if (ctype
->mbdigits
[cnt
] == NULL
)
757 ctype
->mbdigits
[cnt
] = charmap_find_symbol (charmap
,
759 strlen (longnames
[cnt
]));
760 if (ctype
->mbdigits
[cnt
] == NULL
)
762 /* Hum, this ain't good. */
763 WITH_CUR_LOCALE (error (0, 0, _("\
764 no input digits defined and none of the standard names in the charmap")));
766 ctype
->mbdigits
[cnt
] = obstack_alloc (&((struct charmap_t
*) charmap
)->mem_pool
,
767 sizeof (struct charseq
) + 1);
769 /* This is better than nothing. */
770 ctype
->mbdigits
[cnt
]->bytes
[0] = digits
[cnt
];
771 ctype
->mbdigits
[cnt
]->nbytes
= 1;
776 ctype
->mbdigits_act
= 10;
779 /* Check the wide character input digits. There must be a multiple
780 of ten available. In each group it could be that one or the other
781 character is missing. In this case the whole group must be
784 while (cnt
< ctype
->wcdigits_act
)
787 for (inner
= 0; inner
< 10; ++inner
)
788 if (ctype
->wcdigits
[cnt
+ inner
] == ILLEGAL_CHAR_VALUE
)
795 /* Remove the group. */
796 memmove (&ctype
->wcdigits
[cnt
], &ctype
->wcdigits
[cnt
+ 10],
797 ((ctype
->wcdigits_act
- cnt
- 10)
798 * sizeof (ctype
->wcdigits
[0])));
799 ctype
->wcdigits_act
-= 10;
803 /* If no input digits are given use the default. */
804 if (ctype
->wcdigits_act
== 0)
806 if (ctype
->wcdigits_max
== 0)
808 ctype
->wcdigits
= obstack_alloc (&((struct charmap_t
*) charmap
)->mem_pool
,
809 10 * sizeof (uint32_t));
810 ctype
->wcdigits_max
= 10;
813 for (cnt
= 0; cnt
< 10; ++cnt
)
814 ctype
->wcdigits
[cnt
] = L
'0' + cnt
;
816 ctype
->mbdigits_act
= 10;
819 /* Check the outdigits. */
821 for (cnt
= 0; cnt
< 10; ++cnt
)
822 if (ctype
->mboutdigits
[cnt
] == NULL
)
824 static struct charseq replace
[2];
828 WITH_CUR_LOCALE (error (0, 0, _("\
829 not all characters used in `outdigit' are available in the charmap")));
833 replace
[0].nbytes
= 1;
834 replace
[0].bytes
[0] = '?';
835 replace
[0].bytes
[1] = '\0';
836 ctype
->mboutdigits
[cnt
] = &replace
[0];
840 for (cnt
= 0; cnt
< 10; ++cnt
)
841 if (ctype
->wcoutdigits
[cnt
] == 0)
845 WITH_CUR_LOCALE (error (0, 0, _("\
846 not all characters used in `outdigit' are available in the repertoire")));
850 ctype
->wcoutdigits
[cnt
] = L
'?';
853 /* Sort the entries in the translit_ignore list. */
854 if (ctype
->translit_ignore
!= NULL
)
856 struct translit_ignore_t
*firstp
= ctype
->translit_ignore
;
857 struct translit_ignore_t
*runp
;
859 ctype
->ntranslit_ignore
= 1;
861 for (runp
= firstp
->next
; runp
!= NULL
; runp
= runp
->next
)
863 struct translit_ignore_t
*lastp
= NULL
;
864 struct translit_ignore_t
*cmpp
;
866 ++ctype
->ntranslit_ignore
;
868 for (cmpp
= firstp
; cmpp
!= NULL
; lastp
= cmpp
, cmpp
= cmpp
->next
)
869 if (runp
->from
< cmpp
->from
)
877 ctype
->translit_ignore
= firstp
;
883 ctype_output (struct localedef_t
*locale
, const struct charmap_t
*charmap
,
884 const char *output_path
)
886 static const char nulbytes
[4] = { 0, 0, 0, 0 };
887 struct locale_ctype_t
*ctype
= locale
->categories
[LC_CTYPE
].ctype
;
888 const size_t nelems
= (_NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1
)
889 + ctype
->nr_charclass
+ ctype
->map_collection_nr
);
890 struct iovec
*iov
= alloca (sizeof *iov
891 * (2 + nelems
+ 2 * ctype
->nr_charclass
892 + ctype
->map_collection_nr
+ 4));
893 struct locale_file data
;
894 uint32_t *idx
= alloca (sizeof *idx
* (nelems
+ 1));
895 uint32_t default_missing_len
;
896 size_t elem
, cnt
, offset
, total
;
899 /* Now prepare the output: Find the sizes of the table we can use. */
900 allocate_arrays (ctype
, charmap
, ctype
->repertoire
);
902 data
.magic
= LIMAGIC (LC_CTYPE
);
904 iov
[0].iov_base
= (void *) &data
;
905 iov
[0].iov_len
= sizeof (data
);
907 iov
[1].iov_base
= (void *) idx
;
908 iov
[1].iov_len
= nelems
* sizeof (uint32_t);
910 idx
[0] = iov
[0].iov_len
+ iov
[1].iov_len
;
913 for (elem
= 0; elem
< nelems
; ++elem
)
915 if (elem
< _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1
))
918 #define CTYPE_EMPTY(name) \
920 iov[2 + elem + offset].iov_base = NULL; \
921 iov[2 + elem + offset].iov_len = 0; \
922 idx[elem + 1] = idx[elem]; \
925 CTYPE_EMPTY(_NL_CTYPE_GAP1
);
926 CTYPE_EMPTY(_NL_CTYPE_GAP2
);
927 CTYPE_EMPTY(_NL_CTYPE_GAP3
);
928 CTYPE_EMPTY(_NL_CTYPE_GAP4
);
929 CTYPE_EMPTY(_NL_CTYPE_GAP5
);
930 CTYPE_EMPTY(_NL_CTYPE_GAP6
);
932 #define CTYPE_DATA(name, base, len) \
933 case _NL_ITEM_INDEX (name): \
934 iov[2 + elem + offset].iov_base = (base); \
935 iov[2 + elem + offset].iov_len = (len); \
936 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; \
939 CTYPE_DATA (_NL_CTYPE_CLASS
,
941 (256 + 128) * sizeof (char_class_t
));
943 CTYPE_DATA (_NL_CTYPE_TOUPPER
,
945 (256 + 128) * sizeof (uint32_t));
946 CTYPE_DATA (_NL_CTYPE_TOLOWER
,
948 (256 + 128) * sizeof (uint32_t));
950 CTYPE_DATA (_NL_CTYPE_TOUPPER32
,
952 256 * sizeof (uint32_t));
953 CTYPE_DATA (_NL_CTYPE_TOLOWER32
,
955 256 * sizeof (uint32_t));
957 CTYPE_DATA (_NL_CTYPE_CLASS32
,
959 256 * sizeof (char_class32_t
));
961 CTYPE_DATA (_NL_CTYPE_CLASS_OFFSET
,
962 &ctype
->class_offset
, sizeof (uint32_t));
964 CTYPE_DATA (_NL_CTYPE_MAP_OFFSET
,
965 &ctype
->map_offset
, sizeof (uint32_t));
967 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TAB_SIZE
,
968 &ctype
->translit_idx_size
, sizeof (uint32_t));
970 CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_IDX
,
971 ctype
->translit_from_idx
,
972 ctype
->translit_idx_size
* sizeof (uint32_t));
974 CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_TBL
,
975 ctype
->translit_from_tbl
,
976 ctype
->translit_from_tbl_size
);
978 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_IDX
,
979 ctype
->translit_to_idx
,
980 ctype
->translit_idx_size
* sizeof (uint32_t));
982 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_TBL
,
983 ctype
->translit_to_tbl
, ctype
->translit_to_tbl_size
);
985 case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES
):
986 /* The class name array. */
988 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
, ++offset
)
990 iov
[2 + elem
+ offset
].iov_base
991 = (void *) ctype
->classnames
[cnt
];
992 iov
[2 + elem
+ offset
].iov_len
993 = strlen (ctype
->classnames
[cnt
]) + 1;
994 total
+= iov
[2 + elem
+ offset
].iov_len
;
996 iov
[2 + elem
+ offset
].iov_base
= (void *) nulbytes
;
997 iov
[2 + elem
+ offset
].iov_len
= 1 + (4 - ((total
+ 1) % 4));
998 total
+= 1 + (4 - ((total
+ 1) % 4));
1000 idx
[elem
+ 1] = idx
[elem
] + total
;
1003 case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES
):
1004 /* The class name array. */
1006 for (cnt
= 0; cnt
< ctype
->map_collection_nr
; ++cnt
, ++offset
)
1008 iov
[2 + elem
+ offset
].iov_base
1009 = (void *) ctype
->mapnames
[cnt
];
1010 iov
[2 + elem
+ offset
].iov_len
1011 = strlen (ctype
->mapnames
[cnt
]) + 1;
1012 total
+= iov
[2 + elem
+ offset
].iov_len
;
1014 iov
[2 + elem
+ offset
].iov_base
= (void *) nulbytes
;
1015 iov
[2 + elem
+ offset
].iov_len
= 1 + (4 - ((total
+ 1) % 4));
1016 total
+= 1 + (4 - ((total
+ 1) % 4));
1018 idx
[elem
+ 1] = idx
[elem
] + total
;
1021 CTYPE_DATA (_NL_CTYPE_WIDTH
,
1022 ctype
->width
.iov_base
,
1023 ctype
->width
.iov_len
);
1025 CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX
,
1026 &ctype
->mb_cur_max
, sizeof (uint32_t));
1028 case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME
):
1029 total
= strlen (ctype
->codeset_name
) + 1;
1031 iov
[2 + elem
+ offset
].iov_base
= (char *) ctype
->codeset_name
;
1034 iov
[2 + elem
+ offset
].iov_base
= alloca ((total
+ 3) & ~3);
1035 memset (mempcpy (iov
[2 + elem
+ offset
].iov_base
,
1036 ctype
->codeset_name
, total
),
1037 '\0', 4 - (total
& 3));
1038 total
= (total
+ 3) & ~3;
1040 iov
[2 + elem
+ offset
].iov_len
= total
;
1041 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1045 CTYPE_DATA (_NL_CTYPE_MAP_TO_NONASCII
,
1046 &ctype
->to_nonascii
, sizeof (uint32_t));
1048 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN
):
1049 iov
[2 + elem
+ offset
].iov_base
= alloca (sizeof (uint32_t));
1050 iov
[2 + elem
+ offset
].iov_len
= sizeof (uint32_t);
1051 *(uint32_t *) iov
[2 + elem
+ offset
].iov_base
=
1052 ctype
->mbdigits_act
/ 10;
1053 idx
[elem
+ 1] = idx
[elem
] + sizeof (uint32_t);
1056 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN
):
1057 /* Align entries. */
1058 iov
[2 + elem
+ offset
].iov_base
= (void *) nulbytes
;
1059 iov
[2 + elem
+ offset
].iov_len
= (4 - idx
[elem
] % 4) % 4;
1060 idx
[elem
] += iov
[2 + elem
+ offset
].iov_len
;
1063 iov
[2 + elem
+ offset
].iov_base
= alloca (sizeof (uint32_t));
1064 iov
[2 + elem
+ offset
].iov_len
= sizeof (uint32_t);
1065 *(uint32_t *) iov
[2 + elem
+ offset
].iov_base
=
1066 ctype
->wcdigits_act
/ 10;
1067 idx
[elem
+ 1] = idx
[elem
] + sizeof (uint32_t);
1070 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB
) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB
):
1071 /* Compute the length of all possible characters. For INDIGITS
1072 there might be more than one. We simply concatenate all of
1073 them with a NUL byte following. The NUL byte wouldn't be
1074 necessary but it makes it easier for the user. */
1077 for (cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB
);
1078 cnt
< ctype
->mbdigits_act
; cnt
+= 10)
1079 total
+= ctype
->mbdigits
[cnt
]->nbytes
+ 1;
1080 iov
[2 + elem
+ offset
].iov_base
= (char *) alloca (total
);
1081 iov
[2 + elem
+ offset
].iov_len
= total
;
1083 cp
= iov
[2 + elem
+ offset
].iov_base
;
1084 for (cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB
);
1085 cnt
< ctype
->mbdigits_act
; cnt
+= 10)
1087 cp
= mempcpy (cp
, ctype
->mbdigits
[cnt
]->bytes
,
1088 ctype
->mbdigits
[cnt
]->nbytes
);
1091 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1094 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB
) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB
):
1095 /* Compute the length of all possible characters. For INDIGITS
1096 there might be more than one. We simply concatenate all of
1097 them with a NUL byte following. The NUL byte wouldn't be
1098 necessary but it makes it easier for the user. */
1099 cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB
);
1100 total
= ctype
->mboutdigits
[cnt
]->nbytes
+ 1;
1101 iov
[2 + elem
+ offset
].iov_base
= (char *) alloca (total
);
1102 iov
[2 + elem
+ offset
].iov_len
= total
;
1104 *(char *) mempcpy (iov
[2 + elem
+ offset
].iov_base
,
1105 ctype
->mboutdigits
[cnt
]->bytes
,
1106 ctype
->mboutdigits
[cnt
]->nbytes
) = '\0';
1107 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1110 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC
) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC
):
1111 total
= ctype
->wcdigits_act
/ 10;
1113 iov
[2 + elem
+ offset
].iov_base
=
1114 (uint32_t *) alloca (total
* sizeof (uint32_t));
1115 iov
[2 + elem
+ offset
].iov_len
= total
* sizeof (uint32_t);
1117 for (cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC
);
1118 cnt
< ctype
->wcdigits_act
; cnt
+= 10)
1119 ((uint32_t *) iov
[2 + elem
+ offset
].iov_base
)[cnt
/ 10]
1120 = ctype
->wcdigits
[cnt
];
1121 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1124 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC
):
1125 /* Align entries. */
1126 iov
[2 + elem
+ offset
].iov_base
= (void *) nulbytes
;
1127 iov
[2 + elem
+ offset
].iov_len
= (4 - idx
[elem
] % 4) % 4;
1128 idx
[elem
] += iov
[2 + elem
+ offset
].iov_len
;
1132 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT1_WC
) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC
):
1133 cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC
);
1134 iov
[2 + elem
+ offset
].iov_base
= &ctype
->wcoutdigits
[cnt
];
1135 iov
[2 + elem
+ offset
].iov_len
= sizeof (uint32_t);
1136 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1139 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN
):
1140 /* Align entries. */
1141 iov
[2 + elem
+ offset
].iov_base
= (void *) nulbytes
;
1142 iov
[2 + elem
+ offset
].iov_len
= (4 - idx
[elem
] % 4) % 4;
1143 idx
[elem
] += iov
[2 + elem
+ offset
].iov_len
;
1146 default_missing_len
= (ctype
->default_missing
1147 ? wcslen ((wchar_t *)ctype
->default_missing
)
1149 iov
[2 + elem
+ offset
].iov_base
= &default_missing_len
;
1150 iov
[2 + elem
+ offset
].iov_len
= sizeof (uint32_t);
1151 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1154 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING
):
1155 iov
[2 + elem
+ offset
].iov_base
=
1156 ctype
->default_missing
?: (uint32_t *) L
"";
1157 iov
[2 + elem
+ offset
].iov_len
=
1158 wcslen (iov
[2 + elem
+ offset
].iov_base
);
1159 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1162 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE_LEN
):
1163 /* Align entries. */
1164 iov
[2 + elem
+ offset
].iov_base
= (void *) nulbytes
;
1165 iov
[2 + elem
+ offset
].iov_len
= (4 - idx
[elem
] % 4) % 4;
1166 idx
[elem
] += iov
[2 + elem
+ offset
].iov_len
;
1169 iov
[2 + elem
+ offset
].iov_base
= &ctype
->ntranslit_ignore
;
1170 iov
[2 + elem
+ offset
].iov_len
= sizeof (uint32_t);
1171 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1174 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE
):
1176 uint32_t *ranges
= (uint32_t *) alloca (ctype
->ntranslit_ignore
1177 * 3 * sizeof (uint32_t));
1178 struct translit_ignore_t
*runp
;
1180 iov
[2 + elem
+ offset
].iov_base
= ranges
;
1181 iov
[2 + elem
+ offset
].iov_len
= (ctype
->ntranslit_ignore
1182 * 3 * sizeof (uint32_t));
1184 for (runp
= ctype
->translit_ignore
; runp
!= NULL
;
1187 *ranges
++ = runp
->from
;
1188 *ranges
++ = runp
->to
;
1189 *ranges
++ = runp
->step
;
1192 /* Remove the following line in case a new entry is added
1193 after _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN. */
1195 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1199 assert (! "unknown CTYPE element");
1203 /* Handle extra maps. */
1204 size_t nr
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1
);
1205 if (nr
< ctype
->nr_charclass
)
1207 iov
[2 + elem
+ offset
].iov_base
= ctype
->class_b
[nr
];
1208 iov
[2 + elem
+ offset
].iov_len
= 256 / 32 * sizeof (uint32_t);
1209 idx
[elem
] += iov
[2 + elem
+ offset
].iov_len
;
1212 iov
[2 + elem
+ offset
] = ctype
->class_3level
[nr
];
1216 nr
-= ctype
->nr_charclass
;
1217 assert (nr
< ctype
->map_collection_nr
);
1218 iov
[2 + elem
+ offset
] = ctype
->map_3level
[nr
];
1220 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1224 assert (2 + elem
+ offset
== (nelems
+ 2 * ctype
->nr_charclass
1225 + ctype
->map_collection_nr
+ 4 + 2));
1227 write_locale_data (output_path
, LC_CTYPE
, "LC_CTYPE", 2 + elem
+ offset
,
1232 /* Local functions. */
1234 ctype_class_new (struct linereader
*lr
, struct locale_ctype_t
*ctype
,
1239 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
)
1240 if (strcmp (ctype
->classnames
[cnt
], name
) == 0)
1243 if (cnt
< ctype
->nr_charclass
)
1245 lr_error (lr
, _("character class `%s' already defined"), name
);
1249 if (ctype
->nr_charclass
== MAX_NR_CHARCLASS
)
1250 /* Exit code 2 is prescribed in P1003.2b. */
1251 WITH_CUR_LOCALE (error (2, 0, _("\
1252 implementation limit: no more than %Zd character classes allowed"),
1255 ctype
->classnames
[ctype
->nr_charclass
++] = name
;
1260 ctype_map_new (struct linereader
*lr
, struct locale_ctype_t
*ctype
,
1261 const char *name
, const struct charmap_t
*charmap
)
1263 size_t max_chars
= 0;
1266 for (cnt
= 0; cnt
< ctype
->map_collection_nr
; ++cnt
)
1268 if (strcmp (ctype
->mapnames
[cnt
], name
) == 0)
1271 if (max_chars
< ctype
->map_collection_max
[cnt
])
1272 max_chars
= ctype
->map_collection_max
[cnt
];
1275 if (cnt
< ctype
->map_collection_nr
)
1277 lr_error (lr
, _("character map `%s' already defined"), name
);
1281 if (ctype
->map_collection_nr
== MAX_NR_CHARMAP
)
1282 /* Exit code 2 is prescribed in P1003.2b. */
1283 WITH_CUR_LOCALE (error (2, 0, _("\
1284 implementation limit: no more than %d character maps allowed"),
1287 ctype
->mapnames
[cnt
] = name
;
1290 ctype
->map_collection_max
[cnt
] = charmap
->mb_cur_max
== 1 ? 256 : 512;
1292 ctype
->map_collection_max
[cnt
] = max_chars
;
1294 ctype
->map_collection
[cnt
] = (uint32_t *)
1295 xcalloc (sizeof (uint32_t), ctype
->map_collection_max
[cnt
]);
1296 ctype
->map_collection_act
[cnt
] = 256;
1298 ++ctype
->map_collection_nr
;
1302 /* We have to be prepared that TABLE, MAX, and ACT can be NULL. This
1303 is possible if we only want to extend the name array. */
1305 find_idx (struct locale_ctype_t
*ctype
, uint32_t **table
, size_t *max
,
1306 size_t *act
, uint32_t idx
)
1311 return table
== NULL
? NULL
: &(*table
)[idx
];
1313 /* Use the charnames_idx lookup table instead of the slow search loop. */
1315 cnt
= idx_table_get (&ctype
->charnames_idx
, idx
);
1318 cnt
= ctype
->charnames_act
;
1320 for (cnt
= 256; cnt
< ctype
->charnames_act
; ++cnt
)
1321 if (ctype
->charnames
[cnt
] == idx
)
1325 /* We have to distinguish two cases: the name is found or not. */
1326 if (cnt
== ctype
->charnames_act
)
1328 /* Extend the name array. */
1329 if (ctype
->charnames_act
== ctype
->charnames_max
)
1331 ctype
->charnames_max
*= 2;
1332 ctype
->charnames
= (uint32_t *)
1333 xrealloc (ctype
->charnames
,
1334 sizeof (uint32_t) * ctype
->charnames_max
);
1336 ctype
->charnames
[ctype
->charnames_act
++] = idx
;
1337 idx_table_add (&ctype
->charnames_idx
, idx
, cnt
);
1341 /* We have done everything we are asked to do. */
1345 /* The caller does not want to extend the table. */
1346 return (cnt
>= *act
? NULL
: &(*table
)[cnt
]);
1352 size_t old_max
= *max
;
1355 while (*max
<= cnt
);
1358 (uint32_t *) xrealloc (*table
, *max
* sizeof (uint32_t));
1359 memset (&(*table
)[old_max
], '\0',
1360 (*max
- old_max
) * sizeof (uint32_t));
1366 return &(*table
)[cnt
];
1371 get_character (struct token
*now
, const struct charmap_t
*charmap
,
1372 struct repertoire_t
*repertoire
,
1373 struct charseq
**seqp
, uint32_t *wchp
)
1375 if (now
->tok
== tok_bsymbol
)
1377 /* This will hopefully be the normal case. */
1378 *wchp
= repertoire_find_value (repertoire
, now
->val
.str
.startmb
,
1379 now
->val
.str
.lenmb
);
1380 *seqp
= charmap_find_value (charmap
, now
->val
.str
.startmb
,
1381 now
->val
.str
.lenmb
);
1383 else if (now
->tok
== tok_ucs4
)
1387 snprintf (utmp
, sizeof (utmp
), "U%08X", now
->val
.ucs4
);
1388 *seqp
= charmap_find_value (charmap
, utmp
, 9);
1391 *seqp
= repertoire_find_seq (repertoire
, now
->val
.ucs4
);
1395 /* Compute the value in the charmap from the UCS value. */
1396 const char *symbol
= repertoire_find_symbol (repertoire
,
1402 *seqp
= charmap_find_value (charmap
, symbol
, strlen (symbol
));
1406 if (repertoire
!= NULL
)
1408 /* Insert a negative entry. */
1409 static const struct charseq negative
1410 = { .ucs4
= ILLEGAL_CHAR_VALUE
};
1411 uint32_t *newp
= obstack_alloc (&repertoire
->mem_pool
,
1413 *newp
= now
->val
.ucs4
;
1415 insert_entry (&repertoire
->seq_table
, newp
,
1416 sizeof (uint32_t), (void *) &negative
);
1420 (*seqp
)->ucs4
= now
->val
.ucs4
;
1422 else if ((*seqp
)->ucs4
!= now
->val
.ucs4
)
1425 *wchp
= now
->val
.ucs4
;
1427 else if (now
->tok
== tok_charcode
)
1429 /* We must map from the byte code to UCS4. */
1430 *seqp
= charmap_find_symbol (charmap
, now
->val
.str
.startmb
,
1431 now
->val
.str
.lenmb
);
1434 *wchp
= ILLEGAL_CHAR_VALUE
;
1437 if ((*seqp
)->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1438 (*seqp
)->ucs4
= repertoire_find_value (repertoire
, (*seqp
)->name
,
1439 strlen ((*seqp
)->name
));
1440 *wchp
= (*seqp
)->ucs4
;
1450 /* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and
1451 the .(2). counterparts. */
1453 charclass_symbolic_ellipsis (struct linereader
*ldfile
,
1454 struct locale_ctype_t
*ctype
,
1455 const struct charmap_t
*charmap
,
1456 struct repertoire_t
*repertoire
,
1458 const char *last_str
,
1459 unsigned long int class256_bit
,
1460 unsigned long int class_bit
, int base
,
1461 int ignore_content
, int handle_digits
, int step
)
1463 const char *nowstr
= now
->val
.str
.startmb
;
1464 char tmp
[now
->val
.str
.lenmb
+ 1];
1467 unsigned long int from
;
1468 unsigned long int to
;
1470 /* We have to compute the ellipsis values using the symbolic names. */
1471 assert (last_str
!= NULL
);
1473 if (strlen (last_str
) != now
->val
.str
.lenmb
)
1477 _("`%s' and `%.*s' are no valid names for symbolic range"),
1478 last_str
, (int) now
->val
.str
.lenmb
, nowstr
);
1482 if (memcmp (last_str
, nowstr
, now
->val
.str
.lenmb
) == 0)
1483 /* Nothing to do, the names are the same. */
1486 for (cp
= last_str
; *cp
== *(nowstr
+ (cp
- last_str
)); ++cp
)
1490 from
= strtoul (cp
, &endp
, base
);
1491 if ((from
== UINT_MAX
&& errno
== ERANGE
) || *endp
!= '\0')
1494 to
= strtoul (nowstr
+ (cp
- last_str
), &endp
, base
);
1495 if ((to
== UINT_MAX
&& errno
== ERANGE
)
1496 || (endp
- nowstr
) != now
->val
.str
.lenmb
|| from
>= to
)
1499 /* OK, we have a range FROM - TO. Now we can create the symbolic names. */
1500 if (!ignore_content
)
1502 now
->val
.str
.startmb
= tmp
;
1503 while ((from
+= step
) <= to
)
1505 struct charseq
*seq
;
1508 sprintf (tmp
, (base
== 10 ? "%.*s%0*ld" : "%.*s%0*lX"),
1509 (int) (cp
- last_str
), last_str
,
1510 (int) (now
->val
.str
.lenmb
- (cp
- last_str
)),
1513 get_character (now
, charmap
, repertoire
, &seq
, &wch
);
1515 if (seq
!= NULL
&& seq
->nbytes
== 1)
1516 /* Yep, we can store information about this byte sequence. */
1517 ctype
->class256_collection
[seq
->bytes
[0]] |= class256_bit
;
1519 if (wch
!= ILLEGAL_CHAR_VALUE
&& class_bit
!= 0)
1520 /* We have the UCS4 position. */
1521 *find_idx (ctype
, &ctype
->class_collection
,
1522 &ctype
->class_collection_max
,
1523 &ctype
->class_collection_act
, wch
) |= class_bit
;
1525 if (handle_digits
== 1)
1527 /* We must store the digit values. */
1528 if (ctype
->mbdigits_act
== ctype
->mbdigits_max
)
1530 ctype
->mbdigits_max
*= 2;
1531 ctype
->mbdigits
= xrealloc (ctype
->mbdigits
,
1532 (ctype
->mbdigits_max
1533 * sizeof (char *)));
1534 ctype
->wcdigits_max
*= 2;
1535 ctype
->wcdigits
= xrealloc (ctype
->wcdigits
,
1536 (ctype
->wcdigits_max
1537 * sizeof (uint32_t)));
1540 ctype
->mbdigits
[ctype
->mbdigits_act
++] = seq
;
1541 ctype
->wcdigits
[ctype
->wcdigits_act
++] = wch
;
1543 else if (handle_digits
== 2)
1545 /* We must store the digit values. */
1546 if (ctype
->outdigits_act
>= 10)
1548 lr_error (ldfile
, _("\
1549 %s: field `%s' does not contain exactly ten entries"),
1550 "LC_CTYPE", "outdigit");
1554 ctype
->mboutdigits
[ctype
->outdigits_act
] = seq
;
1555 ctype
->wcoutdigits
[ctype
->outdigits_act
] = wch
;
1556 ++ctype
->outdigits_act
;
1563 /* Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'. */
1565 charclass_ucs4_ellipsis (struct linereader
*ldfile
,
1566 struct locale_ctype_t
*ctype
,
1567 const struct charmap_t
*charmap
,
1568 struct repertoire_t
*repertoire
,
1569 struct token
*now
, uint32_t last_wch
,
1570 unsigned long int class256_bit
,
1571 unsigned long int class_bit
, int ignore_content
,
1572 int handle_digits
, int step
)
1574 if (last_wch
> now
->val
.ucs4
)
1576 lr_error (ldfile
, _("\
1577 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1578 (now
->val
.ucs4
| last_wch
) < 65536 ? 4 : 8, now
->val
.ucs4
,
1579 (now
->val
.ucs4
| last_wch
) < 65536 ? 4 : 8, last_wch
);
1583 if (!ignore_content
)
1584 while ((last_wch
+= step
) <= now
->val
.ucs4
)
1586 /* We have to find out whether there is a byte sequence corresponding
1587 to this UCS4 value. */
1588 struct charseq
*seq
;
1591 snprintf (utmp
, sizeof (utmp
), "U%08X", last_wch
);
1592 seq
= charmap_find_value (charmap
, utmp
, 9);
1595 snprintf (utmp
, sizeof (utmp
), "U%04X", last_wch
);
1596 seq
= charmap_find_value (charmap
, utmp
, 5);
1600 /* Try looking in the repertoire map. */
1601 seq
= repertoire_find_seq (repertoire
, last_wch
);
1603 /* If this is the first time we look for this sequence create a new
1607 static const struct charseq negative
1608 = { .ucs4
= ILLEGAL_CHAR_VALUE
};
1610 /* Find the symbolic name for this UCS4 value. */
1611 if (repertoire
!= NULL
)
1613 const char *symbol
= repertoire_find_symbol (repertoire
,
1615 uint32_t *newp
= obstack_alloc (&repertoire
->mem_pool
,
1620 /* We have a name, now search the multibyte value. */
1621 seq
= charmap_find_value (charmap
, symbol
, strlen (symbol
));
1624 /* We have to create a fake entry. */
1625 seq
= (struct charseq
*) &negative
;
1627 seq
->ucs4
= last_wch
;
1629 insert_entry (&repertoire
->seq_table
, newp
, sizeof (uint32_t),
1633 /* We have to create a fake entry. */
1634 seq
= (struct charseq
*) &negative
;
1637 /* We have a name, now search the multibyte value. */
1638 if (seq
->ucs4
== last_wch
&& seq
->nbytes
== 1)
1639 /* Yep, we can store information about this byte sequence. */
1640 ctype
->class256_collection
[(size_t) seq
->bytes
[0]]
1643 /* And of course we have the UCS4 position. */
1645 *find_idx (ctype
, &ctype
->class_collection
,
1646 &ctype
->class_collection_max
,
1647 &ctype
->class_collection_act
, last_wch
) |= class_bit
;
1649 if (handle_digits
== 1)
1651 /* We must store the digit values. */
1652 if (ctype
->mbdigits_act
== ctype
->mbdigits_max
)
1654 ctype
->mbdigits_max
*= 2;
1655 ctype
->mbdigits
= xrealloc (ctype
->mbdigits
,
1656 (ctype
->mbdigits_max
1657 * sizeof (char *)));
1658 ctype
->wcdigits_max
*= 2;
1659 ctype
->wcdigits
= xrealloc (ctype
->wcdigits
,
1660 (ctype
->wcdigits_max
1661 * sizeof (uint32_t)));
1664 ctype
->mbdigits
[ctype
->mbdigits_act
++] = (seq
->ucs4
== last_wch
1666 ctype
->wcdigits
[ctype
->wcdigits_act
++] = last_wch
;
1668 else if (handle_digits
== 2)
1670 /* We must store the digit values. */
1671 if (ctype
->outdigits_act
>= 10)
1673 lr_error (ldfile
, _("\
1674 %s: field `%s' does not contain exactly ten entries"),
1675 "LC_CTYPE", "outdigit");
1679 ctype
->mboutdigits
[ctype
->outdigits_act
] = (seq
->ucs4
== last_wch
1681 ctype
->wcoutdigits
[ctype
->outdigits_act
] = last_wch
;
1682 ++ctype
->outdigits_act
;
1688 /* Ellipsis as in `/xea/x12.../xea/x34'. */
1690 charclass_charcode_ellipsis (struct linereader
*ldfile
,
1691 struct locale_ctype_t
*ctype
,
1692 const struct charmap_t
*charmap
,
1693 struct repertoire_t
*repertoire
,
1694 struct token
*now
, char *last_charcode
,
1695 uint32_t last_charcode_len
,
1696 unsigned long int class256_bit
,
1697 unsigned long int class_bit
, int ignore_content
,
1700 /* First check whether the to-value is larger. */
1701 if (now
->val
.charcode
.nbytes
!= last_charcode_len
)
1703 lr_error (ldfile
, _("\
1704 start and end character sequence of range must have the same length"));
1708 if (memcmp (last_charcode
, now
->val
.charcode
.bytes
, last_charcode_len
) > 0)
1710 lr_error (ldfile
, _("\
1711 to-value character sequence is smaller than from-value sequence"));
1715 if (!ignore_content
)
1719 /* Increment the byte sequence value. */
1720 struct charseq
*seq
;
1724 for (i
= last_charcode_len
- 1; i
>= 0; --i
)
1725 if (++last_charcode
[i
] != 0)
1728 if (last_charcode_len
== 1)
1729 /* Of course we have the charcode value. */
1730 ctype
->class256_collection
[(size_t) last_charcode
[0]]
1733 /* Find the symbolic name. */
1734 seq
= charmap_find_symbol (charmap
, last_charcode
,
1738 if (seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1739 seq
->ucs4
= repertoire_find_value (repertoire
, seq
->name
,
1740 strlen (seq
->name
));
1741 wch
= seq
== NULL
? ILLEGAL_CHAR_VALUE
: seq
->ucs4
;
1743 if (wch
!= ILLEGAL_CHAR_VALUE
&& class_bit
!= 0)
1744 *find_idx (ctype
, &ctype
->class_collection
,
1745 &ctype
->class_collection_max
,
1746 &ctype
->class_collection_act
, wch
) |= class_bit
;
1749 wch
= ILLEGAL_CHAR_VALUE
;
1751 if (handle_digits
== 1)
1753 /* We must store the digit values. */
1754 if (ctype
->mbdigits_act
== ctype
->mbdigits_max
)
1756 ctype
->mbdigits_max
*= 2;
1757 ctype
->mbdigits
= xrealloc (ctype
->mbdigits
,
1758 (ctype
->mbdigits_max
1759 * sizeof (char *)));
1760 ctype
->wcdigits_max
*= 2;
1761 ctype
->wcdigits
= xrealloc (ctype
->wcdigits
,
1762 (ctype
->wcdigits_max
1763 * sizeof (uint32_t)));
1766 seq
= xmalloc (sizeof (struct charseq
) + last_charcode_len
);
1767 memcpy ((char *) (seq
+ 1), last_charcode
, last_charcode_len
);
1768 seq
->nbytes
= last_charcode_len
;
1770 ctype
->mbdigits
[ctype
->mbdigits_act
++] = seq
;
1771 ctype
->wcdigits
[ctype
->wcdigits_act
++] = wch
;
1773 else if (handle_digits
== 2)
1775 struct charseq
*seq
;
1776 /* We must store the digit values. */
1777 if (ctype
->outdigits_act
>= 10)
1779 lr_error (ldfile
, _("\
1780 %s: field `%s' does not contain exactly ten entries"),
1781 "LC_CTYPE", "outdigit");
1785 seq
= xmalloc (sizeof (struct charseq
) + last_charcode_len
);
1786 memcpy ((char *) (seq
+ 1), last_charcode
, last_charcode_len
);
1787 seq
->nbytes
= last_charcode_len
;
1789 ctype
->mboutdigits
[ctype
->outdigits_act
] = seq
;
1790 ctype
->wcoutdigits
[ctype
->outdigits_act
] = wch
;
1791 ++ctype
->outdigits_act
;
1794 while (memcmp (last_charcode
, now
->val
.charcode
.bytes
,
1795 last_charcode_len
) != 0);
1801 find_translit2 (struct locale_ctype_t
*ctype
, const struct charmap_t
*charmap
,
1804 struct translit_t
*trunp
= ctype
->translit
;
1805 struct translit_ignore_t
*tirunp
= ctype
->translit_ignore
;
1807 while (trunp
!= NULL
)
1809 /* XXX We simplify things here. The transliterations we look
1810 for are only allowed to have one character. */
1811 if (trunp
->from
[0] == wch
&& trunp
->from
[1] == 0)
1813 /* Found it. Now look for a transliteration which can be
1814 represented with the character set. */
1815 struct translit_to_t
*torunp
= trunp
->to
;
1817 while (torunp
!= NULL
)
1821 for (i
= 0; torunp
->str
[i
] != 0; ++i
)
1825 snprintf (utmp
, sizeof (utmp
), "U%08X", torunp
->str
[i
]);
1826 if (charmap_find_value (charmap
, utmp
, 9) == NULL
)
1827 /* This character cannot be represented. */
1831 if (torunp
->str
[i
] == 0)
1834 torunp
= torunp
->next
;
1840 trunp
= trunp
->next
;
1843 /* Check for ignored chars. */
1844 while (tirunp
!= NULL
)
1846 if (tirunp
->from
<= wch
&& tirunp
->to
>= wch
)
1850 for (wi
= tirunp
->from
; wi
<= wch
; wi
+= tirunp
->step
)
1852 return (uint32_t []) { 0 };
1856 /* Nothing found. */
1862 find_translit (struct localedef_t
*locale
, const struct charmap_t
*charmap
,
1865 struct locale_ctype_t
*ctype
;
1866 uint32_t *result
= NULL
;
1868 assert (locale
!= NULL
);
1869 ctype
= locale
->categories
[LC_CTYPE
].ctype
;
1871 if (ctype
->translit
!= NULL
)
1872 result
= find_translit2 (ctype
, charmap
, wch
);
1876 struct translit_include_t
*irunp
= ctype
->translit_include
;
1878 while (irunp
!= NULL
&& result
== NULL
)
1880 result
= find_translit (find_locale (CTYPE_LOCALE
,
1882 irunp
->copy_repertoire
,
1885 irunp
= irunp
->next
;
1893 /* Read one transliteration entry. */
1895 read_widestring (struct linereader
*ldfile
, struct token
*now
,
1896 const struct charmap_t
*charmap
,
1897 struct repertoire_t
*repertoire
)
1901 if (now
->tok
== tok_default_missing
)
1902 /* The special name "" will denote this case. */
1903 wstr
= ((uint32_t *) { 0 });
1904 else if (now
->tok
== tok_bsymbol
)
1906 /* Get the value from the repertoire. */
1907 wstr
= (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1908 wstr
[0] = repertoire_find_value (repertoire
, now
->val
.str
.startmb
,
1909 now
->val
.str
.lenmb
);
1910 if (wstr
[0] == ILLEGAL_CHAR_VALUE
)
1912 /* We cannot proceed, we don't know the UCS4 value. */
1919 else if (now
->tok
== tok_ucs4
)
1921 wstr
= (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1922 wstr
[0] = now
->val
.ucs4
;
1925 else if (now
->tok
== tok_charcode
)
1927 /* Argh, we have to convert to the symbol name first and then to the
1929 struct charseq
*seq
= charmap_find_symbol (charmap
,
1930 now
->val
.str
.startmb
,
1931 now
->val
.str
.lenmb
);
1933 /* Cannot find the UCS4 value. */
1936 if (seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1937 seq
->ucs4
= repertoire_find_value (repertoire
, seq
->name
,
1938 strlen (seq
->name
));
1939 if (seq
->ucs4
== ILLEGAL_CHAR_VALUE
)
1940 /* We cannot proceed, we don't know the UCS4 value. */
1943 wstr
= (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1944 wstr
[0] = seq
->ucs4
;
1947 else if (now
->tok
== tok_string
)
1949 wstr
= now
->val
.str
.startwc
;
1950 if (wstr
== NULL
|| wstr
[0] == 0)
1955 if (now
->tok
!= tok_eol
&& now
->tok
!= tok_eof
)
1956 lr_ignore_rest (ldfile
, 0);
1957 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
1958 return (uint32_t *) -1l;
1966 read_translit_entry (struct linereader
*ldfile
, struct locale_ctype_t
*ctype
,
1967 struct token
*now
, const struct charmap_t
*charmap
,
1968 struct repertoire_t
*repertoire
)
1970 uint32_t *from_wstr
= read_widestring (ldfile
, now
, charmap
, repertoire
);
1971 struct translit_t
*result
;
1972 struct translit_to_t
**top
;
1973 struct obstack
*ob
= &ctype
->mempool
;
1977 if (from_wstr
== NULL
)
1978 /* There is no valid from string. */
1981 result
= (struct translit_t
*) obstack_alloc (ob
,
1982 sizeof (struct translit_t
));
1983 result
->from
= from_wstr
;
1984 result
->fname
= ldfile
->fname
;
1985 result
->lineno
= ldfile
->lineno
;
1986 result
->next
= NULL
;
1996 /* Next we have one or more transliterations. They are
1997 separated by semicolons. */
1998 now
= lr_token (ldfile
, charmap
, NULL
, repertoire
, verbose
);
2000 if (!first
&& (now
->tok
== tok_semicolon
|| now
->tok
== tok_eol
))
2002 /* One string read. */
2003 const uint32_t zero
= 0;
2007 obstack_grow (ob
, &zero
, 4);
2008 to_wstr
= obstack_finish (ob
);
2010 *top
= obstack_alloc (ob
, sizeof (struct translit_to_t
));
2011 (*top
)->str
= to_wstr
;
2012 (*top
)->next
= NULL
;
2015 if (now
->tok
== tok_eol
)
2017 result
->next
= ctype
->translit
;
2018 ctype
->translit
= result
;
2023 top
= &(*top
)->next
;
2028 to_wstr
= read_widestring (ldfile
, now
, charmap
, repertoire
);
2029 if (to_wstr
== (uint32_t *) -1l)
2031 /* An error occurred. */
2032 obstack_free (ob
, result
);
2036 if (to_wstr
== NULL
)
2039 /* This value is usable. */
2040 obstack_grow (ob
, to_wstr
, wcslen ((wchar_t *) to_wstr
) * 4);
2049 read_translit_ignore_entry (struct linereader
*ldfile
,
2050 struct locale_ctype_t
*ctype
,
2051 const struct charmap_t
*charmap
,
2052 struct repertoire_t
*repertoire
)
2054 /* We expect a semicolon-separated list of characters we ignore. We are
2055 only interested in the wide character definitions. These must be
2056 single characters, possibly defining a range when an ellipsis is used. */
2059 struct token
*now
= lr_token (ldfile
, charmap
, NULL
, repertoire
,
2061 struct translit_ignore_t
*newp
;
2064 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
2067 _("premature end of `translit_ignore' definition"));
2071 if (now
->tok
!= tok_bsymbol
&& now
->tok
!= tok_ucs4
)
2073 lr_error (ldfile
, _("syntax error"));
2074 lr_ignore_rest (ldfile
, 0);
2078 if (now
->tok
== tok_ucs4
)
2079 from
= now
->val
.ucs4
;
2081 /* Try to get the value. */
2082 from
= repertoire_find_value (repertoire
, now
->val
.str
.startmb
,
2083 now
->val
.str
.lenmb
);
2085 if (from
== ILLEGAL_CHAR_VALUE
)
2087 lr_error (ldfile
, "invalid character name");
2092 newp
= (struct translit_ignore_t
*)
2093 obstack_alloc (&ctype
->mempool
, sizeof (struct translit_ignore_t
));
2098 newp
->next
= ctype
->translit_ignore
;
2099 ctype
->translit_ignore
= newp
;
2102 /* Now we expect either a semicolon, an ellipsis, or the end of the
2104 now
= lr_token (ldfile
, charmap
, NULL
, repertoire
, verbose
);
2106 if (now
->tok
== tok_ellipsis2
|| now
->tok
== tok_ellipsis2_2
)
2108 /* XXX Should we bother implementing `....'? `...' certainly
2109 will not be implemented. */
2111 int step
= now
->tok
== tok_ellipsis2_2
? 2 : 1;
2113 now
= lr_token (ldfile
, charmap
, NULL
, repertoire
, verbose
);
2115 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
2118 _("premature end of `translit_ignore' definition"));
2122 if (now
->tok
!= tok_bsymbol
&& now
->tok
!= tok_ucs4
)
2124 lr_error (ldfile
, _("syntax error"));
2125 lr_ignore_rest (ldfile
, 0);
2129 if (now
->tok
== tok_ucs4
)
2132 /* Try to get the value. */
2133 to
= repertoire_find_value (repertoire
, now
->val
.str
.startmb
,
2134 now
->val
.str
.lenmb
);
2136 if (to
== ILLEGAL_CHAR_VALUE
)
2137 lr_error (ldfile
, "invalid character name");
2140 /* Make sure the `to'-value is larger. */
2147 lr_error (ldfile
, _("\
2148 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
2149 (to
| from
) < 65536 ? 4 : 8, to
,
2150 (to
| from
) < 65536 ? 4 : 8, from
);
2153 /* And the next token. */
2154 now
= lr_token (ldfile
, charmap
, NULL
, repertoire
, verbose
);
2157 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
2161 if (now
->tok
== tok_semicolon
)
2165 /* If we come here something is wrong. */
2166 lr_error (ldfile
, _("syntax error"));
2167 lr_ignore_rest (ldfile
, 0);
2173 /* The parser for the LC_CTYPE section of the locale definition. */
2175 ctype_read (struct linereader
*ldfile
, struct localedef_t
*result
,
2176 const struct charmap_t
*charmap
, const char *repertoire_name
,
2179 struct repertoire_t
*repertoire
= NULL
;
2180 struct locale_ctype_t
*ctype
;
2182 enum token_t nowtok
;
2184 struct charseq
*last_seq
;
2185 uint32_t last_wch
= 0;
2186 enum token_t last_token
;
2187 enum token_t ellipsis_token
;
2189 char last_charcode
[16];
2190 size_t last_charcode_len
= 0;
2191 const char *last_str
= NULL
;
2193 struct localedef_t
*copy_locale
= NULL
;
2195 /* Get the repertoire we have to use. */
2196 if (repertoire_name
!= NULL
)
2197 repertoire
= repertoire_read (repertoire_name
);
2199 /* The rest of the line containing `LC_CTYPE' must be free. */
2200 lr_ignore_rest (ldfile
, 1);
2205 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2208 while (nowtok
== tok_eol
);
2210 /* If we see `copy' now we are almost done. */
2211 if (nowtok
== tok_copy
)
2213 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2214 if (now
->tok
!= tok_string
)
2216 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2220 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2221 while (now
->tok
!= tok_eof
&& now
->tok
!= tok_end
);
2223 if (now
->tok
!= tok_eof
2224 || (now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
),
2225 now
->tok
== tok_eof
))
2226 lr_error (ldfile
, _("%s: premature end of file"), "LC_CTYPE");
2227 else if (now
->tok
!= tok_lc_ctype
)
2229 lr_error (ldfile
, _("\
2230 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2231 lr_ignore_rest (ldfile
, 0);
2234 lr_ignore_rest (ldfile
, 1);
2239 if (! ignore_content
)
2241 /* Get the locale definition. */
2242 copy_locale
= load_locale (LC_CTYPE
, now
->val
.str
.startmb
,
2243 repertoire_name
, charmap
, NULL
);
2244 if ((copy_locale
->avail
& CTYPE_LOCALE
) == 0)
2246 /* Not yet loaded. So do it now. */
2247 if (locfile_read (copy_locale
, charmap
) != 0)
2252 lr_ignore_rest (ldfile
, 1);
2254 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2258 /* Prepare the data structures. */
2259 ctype_startup (ldfile
, result
, charmap
, copy_locale
, ignore_content
);
2260 ctype
= result
->categories
[LC_CTYPE
].ctype
;
2262 /* Remember the repertoire we use. */
2263 if (!ignore_content
)
2264 ctype
->repertoire
= repertoire
;
2268 unsigned long int class_bit
= 0;
2269 unsigned long int class256_bit
= 0;
2270 int handle_digits
= 0;
2272 /* Of course we don't proceed beyond the end of file. */
2273 if (nowtok
== tok_eof
)
2276 /* Ingore empty lines. */
2277 if (nowtok
== tok_eol
)
2279 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2287 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2288 while (now
->tok
== tok_ident
|| now
->tok
== tok_string
)
2290 ctype_class_new (ldfile
, ctype
, now
->val
.str
.startmb
);
2291 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2292 if (now
->tok
!= tok_semicolon
)
2294 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2296 if (now
->tok
!= tok_eol
)
2298 %s: syntax error in definition of new character class"), "LC_CTYPE");
2302 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2303 while (now
->tok
== tok_ident
|| now
->tok
== tok_string
)
2305 ctype_map_new (ldfile
, ctype
, now
->val
.str
.startmb
, charmap
);
2306 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2307 if (now
->tok
!= tok_semicolon
)
2309 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2311 if (now
->tok
!= tok_eol
)
2313 %s: syntax error in definition of new character map"), "LC_CTYPE");
2317 /* Ignore the rest of the line if we don't need the input of
2321 lr_ignore_rest (ldfile
, 0);
2325 /* We simply forget the `class' keyword and use the following
2326 operand to determine the bit. */
2327 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2328 if (now
->tok
== tok_ident
|| now
->tok
== tok_string
)
2330 /* Must can be one of the predefined class names. */
2331 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
)
2332 if (strcmp (ctype
->classnames
[cnt
], now
->val
.str
.startmb
) == 0)
2334 if (cnt
>= ctype
->nr_charclass
)
2336 #ifdef PREDEFINED_CLASSES
2337 if (now
->val
.str
.lenmb
== 8
2338 && memcmp ("special1", now
->val
.str
.startmb
, 8) == 0)
2339 class_bit
= _ISwspecial1
;
2340 else if (now
->val
.str
.lenmb
== 8
2341 && memcmp ("special2", now
->val
.str
.startmb
, 8) == 0)
2342 class_bit
= _ISwspecial2
;
2343 else if (now
->val
.str
.lenmb
== 8
2344 && memcmp ("special3", now
->val
.str
.startmb
, 8) == 0)
2345 class_bit
= _ISwspecial3
;
2349 /* OK, it's a new class. */
2350 ctype_class_new (ldfile
, ctype
, now
->val
.str
.startmb
);
2352 class_bit
= _ISwbit (ctype
->nr_charclass
- 1);
2357 class_bit
= _ISwbit (cnt
);
2359 free (now
->val
.str
.startmb
);
2362 else if (now
->tok
== tok_digit
)
2363 goto handle_tok_digit
;
2364 else if (now
->tok
< tok_upper
|| now
->tok
> tok_blank
)
2368 class_bit
= BITw (now
->tok
);
2369 class256_bit
= BIT (now
->tok
);
2372 /* The next character must be a semicolon. */
2373 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2374 if (now
->tok
!= tok_semicolon
)
2376 goto read_charclass
;
2389 /* Ignore the rest of the line if we don't need the input of
2393 lr_ignore_rest (ldfile
, 0);
2397 class_bit
= BITw (now
->tok
);
2398 class256_bit
= BIT (now
->tok
);
2401 ctype
->class_done
|= class_bit
;
2402 last_token
= tok_none
;
2403 ellipsis_token
= tok_none
;
2405 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2406 while (now
->tok
!= tok_eol
&& now
->tok
!= tok_eof
)
2409 struct charseq
*seq
;
2411 if (ellipsis_token
== tok_none
)
2413 if (get_character (now
, charmap
, repertoire
, &seq
, &wch
))
2416 if (!ignore_content
&& seq
!= NULL
&& seq
->nbytes
== 1)
2417 /* Yep, we can store information about this byte
2419 ctype
->class256_collection
[seq
->bytes
[0]] |= class256_bit
;
2421 if (!ignore_content
&& wch
!= ILLEGAL_CHAR_VALUE
2423 /* We have the UCS4 position. */
2424 *find_idx (ctype
, &ctype
->class_collection
,
2425 &ctype
->class_collection_max
,
2426 &ctype
->class_collection_act
, wch
) |= class_bit
;
2428 last_token
= now
->tok
;
2429 /* Terminate the string. */
2430 if (last_token
== tok_bsymbol
)
2432 now
->val
.str
.startmb
[now
->val
.str
.lenmb
] = '\0';
2433 last_str
= now
->val
.str
.startmb
;
2439 memcpy (last_charcode
, now
->val
.charcode
.bytes
, 16);
2440 last_charcode_len
= now
->val
.charcode
.nbytes
;
2442 if (!ignore_content
&& handle_digits
== 1)
2444 /* We must store the digit values. */
2445 if (ctype
->mbdigits_act
== ctype
->mbdigits_max
)
2447 ctype
->mbdigits_max
+= 10;
2448 ctype
->mbdigits
= xrealloc (ctype
->mbdigits
,
2449 (ctype
->mbdigits_max
2450 * sizeof (char *)));
2451 ctype
->wcdigits_max
+= 10;
2452 ctype
->wcdigits
= xrealloc (ctype
->wcdigits
,
2453 (ctype
->wcdigits_max
2454 * sizeof (uint32_t)));
2457 ctype
->mbdigits
[ctype
->mbdigits_act
++] = seq
;
2458 ctype
->wcdigits
[ctype
->wcdigits_act
++] = wch
;
2460 else if (!ignore_content
&& handle_digits
== 2)
2462 /* We must store the digit values. */
2463 if (ctype
->outdigits_act
>= 10)
2465 lr_error (ldfile
, _("\
2466 %s: field `%s' does not contain exactly ten entries"),
2467 "LC_CTYPE", "outdigit");
2468 lr_ignore_rest (ldfile
, 0);
2472 ctype
->mboutdigits
[ctype
->outdigits_act
] = seq
;
2473 ctype
->wcoutdigits
[ctype
->outdigits_act
] = wch
;
2474 ++ctype
->outdigits_act
;
2479 /* Now it gets complicated. We have to resolve the
2480 ellipsis problem. First we must distinguish between
2481 the different kind of ellipsis and this must match the
2482 tokens we have seen. */
2483 assert (last_token
!= tok_none
);
2485 if (last_token
!= now
->tok
)
2487 lr_error (ldfile
, _("\
2488 ellipsis range must be marked by two operands of same type"));
2489 lr_ignore_rest (ldfile
, 0);
2493 if (last_token
== tok_bsymbol
)
2495 if (ellipsis_token
== tok_ellipsis3
)
2496 lr_error (ldfile
, _("with symbolic name range values \
2497 the absolute ellipsis `...' must not be used"));
2499 charclass_symbolic_ellipsis (ldfile
, ctype
, charmap
,
2500 repertoire
, now
, last_str
,
2501 class256_bit
, class_bit
,
2506 handle_digits
, step
);
2508 else if (last_token
== tok_ucs4
)
2510 if (ellipsis_token
!= tok_ellipsis2
)
2511 lr_error (ldfile
, _("\
2512 with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
2514 charclass_ucs4_ellipsis (ldfile
, ctype
, charmap
,
2515 repertoire
, now
, last_wch
,
2516 class256_bit
, class_bit
,
2517 ignore_content
, handle_digits
,
2522 assert (last_token
== tok_charcode
);
2524 if (ellipsis_token
!= tok_ellipsis3
)
2525 lr_error (ldfile
, _("\
2526 with character code range values one must use the absolute ellipsis `...'"));
2528 charclass_charcode_ellipsis (ldfile
, ctype
, charmap
,
2532 class256_bit
, class_bit
,
2537 /* Now we have used the last value. */
2538 last_token
= tok_none
;
2541 /* Next we expect a semicolon or the end of the line. */
2542 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2543 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
2546 if (last_token
!= tok_none
2547 && now
->tok
>= tok_ellipsis2
&& now
->tok
<= tok_ellipsis4_2
)
2549 if (now
->tok
== tok_ellipsis2_2
)
2551 now
->tok
= tok_ellipsis2
;
2554 else if (now
->tok
== tok_ellipsis4_2
)
2556 now
->tok
= tok_ellipsis4
;
2560 ellipsis_token
= now
->tok
;
2562 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2566 if (now
->tok
!= tok_semicolon
)
2569 /* And get the next character. */
2570 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2572 ellipsis_token
= tok_none
;
2578 /* Ignore the rest of the line if we don't need the input of
2582 lr_ignore_rest (ldfile
, 0);
2587 class_bit
= _ISwdigit
;
2588 class256_bit
= _ISdigit
;
2590 goto read_charclass
;
2593 /* Ignore the rest of the line if we don't need the input of
2597 lr_ignore_rest (ldfile
, 0);
2601 if (ctype
->outdigits_act
!= 0)
2602 lr_error (ldfile
, _("\
2603 %s: field `%s' declared more than once"),
2604 "LC_CTYPE", "outdigit");
2608 goto read_charclass
;
2611 /* Ignore the rest of the line if we don't need the input of
2615 lr_ignore_rest (ldfile
, 0);
2623 /* Ignore the rest of the line if we don't need the input of
2627 lr_ignore_rest (ldfile
, 0);
2635 /* Ignore the rest of the line if we don't need the input of
2639 lr_ignore_rest (ldfile
, 0);
2643 /* We simply forget the `map' keyword and use the following
2644 operand to determine the mapping. */
2645 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2646 if (now
->tok
== tok_ident
|| now
->tok
== tok_string
)
2650 for (cnt
= 2; cnt
< ctype
->map_collection_nr
; ++cnt
)
2651 if (strcmp (now
->val
.str
.startmb
, ctype
->mapnames
[cnt
]) == 0)
2654 if (cnt
< ctype
->map_collection_nr
)
2655 free (now
->val
.str
.startmb
);
2657 /* OK, it's a new map. */
2658 ctype_map_new (ldfile
, ctype
, now
->val
.str
.startmb
, charmap
);
2662 else if (now
->tok
< tok_toupper
|| now
->tok
> tok_tolower
)
2665 mapidx
= now
->tok
- tok_toupper
;
2667 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2668 /* This better should be a semicolon. */
2669 if (now
->tok
!= tok_semicolon
)
2673 /* Test whether this mapping was already defined. */
2674 if (ctype
->tomap_done
[mapidx
])
2676 lr_error (ldfile
, _("duplicated definition for mapping `%s'"),
2677 ctype
->mapnames
[mapidx
]);
2678 lr_ignore_rest (ldfile
, 0);
2681 ctype
->tomap_done
[mapidx
] = 1;
2683 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2684 while (now
->tok
!= tok_eol
&& now
->tok
!= tok_eof
)
2686 struct charseq
*from_seq
;
2688 struct charseq
*to_seq
;
2691 /* Every pair starts with an opening brace. */
2692 if (now
->tok
!= tok_open_brace
)
2695 /* Next comes the from-value. */
2696 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2697 if (get_character (now
, charmap
, repertoire
, &from_seq
,
2701 /* The next is a comma. */
2702 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2703 if (now
->tok
!= tok_comma
)
2706 /* And the other value. */
2707 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2708 if (get_character (now
, charmap
, repertoire
, &to_seq
,
2712 /* And the last thing is the closing brace. */
2713 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2714 if (now
->tok
!= tok_close_brace
)
2717 if (!ignore_content
)
2719 /* Check whether the mapping converts from an ASCII value
2720 to a non-ASCII value. */
2721 if (from_seq
!= NULL
&& from_seq
->nbytes
== 1
2722 && isascii (from_seq
->bytes
[0])
2723 && to_seq
!= NULL
&& (to_seq
->nbytes
!= 1
2724 || !isascii (to_seq
->bytes
[0])))
2725 ctype
->to_nonascii
= 1;
2727 if (mapidx
< 2 && from_seq
!= NULL
&& to_seq
!= NULL
2728 && from_seq
->nbytes
== 1 && to_seq
->nbytes
== 1)
2729 /* We can use this value. */
2730 ctype
->map256_collection
[mapidx
][from_seq
->bytes
[0]]
2733 if (from_wch
!= ILLEGAL_CHAR_VALUE
2734 && to_wch
!= ILLEGAL_CHAR_VALUE
)
2735 /* Both correct values. */
2736 *find_idx (ctype
, &ctype
->map_collection
[mapidx
],
2737 &ctype
->map_collection_max
[mapidx
],
2738 &ctype
->map_collection_act
[mapidx
],
2742 /* Now comes a semicolon or the end of the line/file. */
2743 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2744 if (now
->tok
== tok_semicolon
)
2745 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2749 case tok_translit_start
:
2750 /* Ignore the entire translit section with its peculiar syntax
2751 if we don't need the input. */
2756 lr_ignore_rest (ldfile
, 0);
2757 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2759 while (now
->tok
!= tok_translit_end
&& now
->tok
!= tok_eof
);
2761 if (now
->tok
== tok_eof
)
2762 lr_error (ldfile
, _(\
2763 "%s: `translit_start' section does not end with `translit_end'"),
2769 /* The rest of the line better should be empty. */
2770 lr_ignore_rest (ldfile
, 1);
2772 /* We count here the number of allocated entries in the `translit'
2776 ldfile
->translate_strings
= 1;
2777 ldfile
->return_widestr
= 1;
2779 /* We proceed until we see the `translit_end' token. */
2780 while (now
= lr_token (ldfile
, charmap
, NULL
, repertoire
, verbose
),
2781 now
->tok
!= tok_translit_end
&& now
->tok
!= tok_eof
)
2783 if (now
->tok
== tok_eol
)
2784 /* Ignore empty lines. */
2787 if (now
->tok
== tok_include
)
2789 /* We have to include locale. */
2790 const char *locale_name
;
2791 const char *repertoire_name
;
2792 struct translit_include_t
*include_stmt
, **include_ptr
;
2794 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2795 /* This should be a string or an identifier. In any
2796 case something to name a locale. */
2797 if (now
->tok
!= tok_string
&& now
->tok
!= tok_ident
)
2800 lr_error (ldfile
, _("%s: syntax error"), "LC_CTYPE");
2801 lr_ignore_rest (ldfile
, 0);
2804 locale_name
= now
->val
.str
.startmb
;
2806 /* Next should be a semicolon. */
2807 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2808 if (now
->tok
!= tok_semicolon
)
2809 goto translit_syntax
;
2811 /* Now the repertoire name. */
2812 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2813 if ((now
->tok
!= tok_string
&& now
->tok
!= tok_ident
)
2814 || now
->val
.str
.startmb
== NULL
)
2815 goto translit_syntax
;
2816 repertoire_name
= now
->val
.str
.startmb
;
2817 if (repertoire_name
[0] == '\0')
2818 /* Ignore the empty string. */
2819 repertoire_name
= NULL
;
2821 /* Save the include statement for later processing. */
2822 include_stmt
= (struct translit_include_t
*)
2823 xmalloc (sizeof (struct translit_include_t
));
2824 include_stmt
->copy_locale
= locale_name
;
2825 include_stmt
->copy_repertoire
= repertoire_name
;
2826 include_stmt
->next
= NULL
;
2828 include_ptr
= &ctype
->translit_include
;
2829 while (*include_ptr
!= NULL
)
2830 include_ptr
= &(*include_ptr
)->next
;
2831 *include_ptr
= include_stmt
;
2833 /* The rest of the line must be empty. */
2834 lr_ignore_rest (ldfile
, 1);
2836 /* Make sure the locale is read. */
2837 add_to_readlist (LC_CTYPE
, locale_name
, repertoire_name
,
2841 else if (now
->tok
== tok_default_missing
)
2847 /* We expect a single character or string as the
2849 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2850 wstr
= read_widestring (ldfile
, now
, charmap
,
2855 if (ctype
->default_missing
!= NULL
)
2857 lr_error (ldfile
, _("\
2858 %s: duplicate `default_missing' definition"), "LC_CTYPE");
2859 WITH_CUR_LOCALE (error_at_line (0, 0,
2860 ctype
->default_missing_file
,
2861 ctype
->default_missing_lineno
,
2863 previous definition was here")));
2867 ctype
->default_missing
= wstr
;
2868 ctype
->default_missing_file
= ldfile
->fname
;
2869 ctype
->default_missing_lineno
= ldfile
->lineno
;
2871 /* We can have more entries, ignore them. */
2872 lr_ignore_rest (ldfile
, 0);
2875 else if (wstr
== (uint32_t *) -1l)
2876 /* This was an syntax error. */
2879 /* Maybe there is another replacement we can use. */
2880 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2881 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
2883 /* Nothing found. We tell the user. */
2884 lr_error (ldfile
, _("\
2885 %s: no representable `default_missing' definition found"), "LC_CTYPE");
2888 if (now
->tok
!= tok_semicolon
)
2889 goto translit_syntax
;
2894 else if (now
->tok
== tok_translit_ignore
)
2896 read_translit_ignore_entry (ldfile
, ctype
, charmap
,
2901 read_translit_entry (ldfile
, ctype
, now
, charmap
, repertoire
);
2903 ldfile
->return_widestr
= 0;
2905 if (now
->tok
== tok_eof
)
2906 lr_error (ldfile
, _(\
2907 "%s: `translit_start' section does not end with `translit_end'"),
2913 /* Ignore the rest of the line if we don't need the input of
2917 lr_ignore_rest (ldfile
, 0);
2921 /* This could mean one of several things. First test whether
2922 it's a character class name. */
2923 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
)
2924 if (strcmp (now
->val
.str
.startmb
, ctype
->classnames
[cnt
]) == 0)
2926 if (cnt
< ctype
->nr_charclass
)
2928 class_bit
= _ISwbit (cnt
);
2929 class256_bit
= cnt
<= 11 ? _ISbit (cnt
) : 0;
2930 free (now
->val
.str
.startmb
);
2931 goto read_charclass
;
2933 for (cnt
= 0; cnt
< ctype
->map_collection_nr
; ++cnt
)
2934 if (strcmp (now
->val
.str
.startmb
, ctype
->mapnames
[cnt
]) == 0)
2936 if (cnt
< ctype
->map_collection_nr
)
2939 free (now
->val
.str
.startmb
);
2942 #ifdef PREDEFINED_CLASSES
2943 if (strcmp (now
->val
.str
.startmb
, "special1") == 0)
2945 class_bit
= _ISwspecial1
;
2946 free (now
->val
.str
.startmb
);
2947 goto read_charclass
;
2949 if (strcmp (now
->val
.str
.startmb
, "special2") == 0)
2951 class_bit
= _ISwspecial2
;
2952 free (now
->val
.str
.startmb
);
2953 goto read_charclass
;
2955 if (strcmp (now
->val
.str
.startmb
, "special3") == 0)
2957 class_bit
= _ISwspecial3
;
2958 free (now
->val
.str
.startmb
);
2959 goto read_charclass
;
2961 if (strcmp (now
->val
.str
.startmb
, "tosymmetric") == 0)
2970 /* Next we assume `LC_CTYPE'. */
2971 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2972 if (now
->tok
== tok_eof
)
2974 if (now
->tok
== tok_eol
)
2975 lr_error (ldfile
, _("%s: incomplete `END' line"),
2977 else if (now
->tok
!= tok_lc_ctype
)
2978 lr_error (ldfile
, _("\
2979 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2980 lr_ignore_rest (ldfile
, now
->tok
== tok_lc_ctype
);
2985 if (now
->tok
!= tok_eof
)
2986 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2989 /* Prepare for the next round. */
2990 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2994 /* When we come here we reached the end of the file. */
2995 lr_error (ldfile
, _("%s: premature end of file"), "LC_CTYPE");
3000 set_class_defaults (struct locale_ctype_t
*ctype
,
3001 const struct charmap_t
*charmap
,
3002 struct repertoire_t
*repertoire
)
3006 /* These function defines the default values for the classes and conversions
3007 according to POSIX.2 2.5.2.1.
3008 It may seem that the order of these if-blocks is arbitrary but it is NOT.
3009 Don't move them unless you know what you do! */
3011 auto void set_default (int bitpos
, int from
, int to
);
3013 void set_default (int bitpos
, int from
, int to
)
3017 int bit
= _ISbit (bitpos
);
3018 int bitw
= _ISwbit (bitpos
);
3019 /* Define string. */
3022 for (ch
= from
; ch
<= to
; ++ch
)
3024 struct charseq
*seq
;
3027 seq
= charmap_find_value (charmap
, tmp
, 1);
3031 sprintf (buf
, "U%08X", ch
);
3032 seq
= charmap_find_value (charmap
, buf
, 9);
3037 WITH_CUR_LOCALE (error (0, 0, _("\
3038 %s: character `%s' not defined in charmap while needed as default value"),
3041 else if (seq
->nbytes
!= 1)
3042 WITH_CUR_LOCALE (error (0, 0, _("\
3043 %s: character `%s' in charmap not representable with one byte"),
3046 ctype
->class256_collection
[seq
->bytes
[0]] |= bit
;
3048 /* No need to search here, the ASCII value is also the Unicode
3050 ELEM (ctype
, class_collection
, , ch
) |= bitw
;
3054 /* Set default values if keyword was not present. */
3055 if ((ctype
->class_done
& BITw (tok_upper
)) == 0)
3056 /* "If this keyword [lower] is not specified, the lowercase letters
3057 `A' through `Z', ..., shall automatically belong to this class,
3058 with implementation defined character values." [P1003.2, 2.5.2.1] */
3059 set_default (BITPOS (tok_upper
), 'A', 'Z');
3061 if ((ctype
->class_done
& BITw (tok_lower
)) == 0)
3062 /* "If this keyword [lower] is not specified, the lowercase letters
3063 `a' through `z', ..., shall automatically belong to this class,
3064 with implementation defined character values." [P1003.2, 2.5.2.1] */
3065 set_default (BITPOS (tok_lower
), 'a', 'z');
3067 if ((ctype
->class_done
& BITw (tok_alpha
)) == 0)
3069 /* Table 2-6 in P1003.2 says that characters in class `upper' or
3070 class `lower' *must* be in class `alpha'. */
3071 unsigned long int mask
= BIT (tok_upper
) | BIT (tok_lower
);
3072 unsigned long int maskw
= BITw (tok_upper
) | BITw (tok_lower
);
3074 for (cnt
= 0; cnt
< 256; ++cnt
)
3075 if ((ctype
->class256_collection
[cnt
] & mask
) != 0)
3076 ctype
->class256_collection
[cnt
] |= BIT (tok_alpha
);
3078 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
3079 if ((ctype
->class_collection
[cnt
] & maskw
) != 0)
3080 ctype
->class_collection
[cnt
] |= BITw (tok_alpha
);
3083 if ((ctype
->class_done
& BITw (tok_digit
)) == 0)
3084 /* "If this keyword [digit] is not specified, the digits `0' through
3085 `9', ..., shall automatically belong to this class, with
3086 implementation-defined character values." [P1003.2, 2.5.2.1] */
3087 set_default (BITPOS (tok_digit
), '0', '9');
3089 /* "Only characters specified for the `alpha' and `digit' keyword
3090 shall be specified. Characters specified for the keyword `alpha'
3091 and `digit' are automatically included in this class. */
3093 unsigned long int mask
= BIT (tok_alpha
) | BIT (tok_digit
);
3094 unsigned long int maskw
= BITw (tok_alpha
) | BITw (tok_digit
);
3096 for (cnt
= 0; cnt
< 256; ++cnt
)
3097 if ((ctype
->class256_collection
[cnt
] & mask
) != 0)
3098 ctype
->class256_collection
[cnt
] |= BIT (tok_alnum
);
3100 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
3101 if ((ctype
->class_collection
[cnt
] & maskw
) != 0)
3102 ctype
->class_collection
[cnt
] |= BITw (tok_alnum
);
3105 if ((ctype
->class_done
& BITw (tok_space
)) == 0)
3106 /* "If this keyword [space] is not specified, the characters <space>,
3107 <form-feed>, <newline>, <carriage-return>, <tab>, and
3108 <vertical-tab>, ..., shall automatically belong to this class,
3109 with implementation-defined character values." [P1003.2, 2.5.2.1] */
3111 struct charseq
*seq
;
3113 seq
= charmap_find_value (charmap
, "space", 5);
3115 seq
= charmap_find_value (charmap
, "SP", 2);
3117 seq
= charmap_find_value (charmap
, "U00000020", 9);
3121 WITH_CUR_LOCALE (error (0, 0, _("\
3122 %s: character `%s' not defined while needed as default value"),
3123 "LC_CTYPE", "<space>"));
3125 else if (seq
->nbytes
!= 1)
3126 WITH_CUR_LOCALE (error (0, 0, _("\
3127 %s: character `%s' in charmap not representable with one byte"),
3128 "LC_CTYPE", "<space>"));
3130 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3132 /* No need to search. */
3133 ELEM (ctype
, class_collection
, , L
' ') |= BITw (tok_space
);
3135 seq
= charmap_find_value (charmap
, "form-feed", 9);
3137 seq
= charmap_find_value (charmap
, "U0000000C", 9);
3141 WITH_CUR_LOCALE (error (0, 0, _("\
3142 %s: character `%s' not defined while needed as default value"),
3143 "LC_CTYPE", "<form-feed>"));
3145 else if (seq
->nbytes
!= 1)
3146 WITH_CUR_LOCALE (error (0, 0, _("\
3147 %s: character `%s' in charmap not representable with one byte"),
3148 "LC_CTYPE", "<form-feed>"));
3150 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3152 /* No need to search. */
3153 ELEM (ctype
, class_collection
, , L
'\f') |= BITw (tok_space
);
3156 seq
= charmap_find_value (charmap
, "newline", 7);
3158 seq
= charmap_find_value (charmap
, "U0000000A", 9);
3162 WITH_CUR_LOCALE (error (0, 0, _("\
3163 character `%s' not defined while needed as default value"),
3166 else if (seq
->nbytes
!= 1)
3167 WITH_CUR_LOCALE (error (0, 0, _("\
3168 %s: character `%s' in charmap not representable with one byte"),
3169 "LC_CTYPE", "<newline>"));
3171 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3173 /* No need to search. */
3174 ELEM (ctype
, class_collection
, , L
'\n') |= BITw (tok_space
);
3177 seq
= charmap_find_value (charmap
, "carriage-return", 15);
3179 seq
= charmap_find_value (charmap
, "U0000000D", 9);
3183 WITH_CUR_LOCALE (error (0, 0, _("\
3184 %s: character `%s' not defined while needed as default value"),
3185 "LC_CTYPE", "<carriage-return>"));
3187 else if (seq
->nbytes
!= 1)
3188 WITH_CUR_LOCALE (error (0, 0, _("\
3189 %s: character `%s' in charmap not representable with one byte"),
3190 "LC_CTYPE", "<carriage-return>"));
3192 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3194 /* No need to search. */
3195 ELEM (ctype
, class_collection
, , L
'\r') |= BITw (tok_space
);
3198 seq
= charmap_find_value (charmap
, "tab", 3);
3200 seq
= charmap_find_value (charmap
, "U00000009", 9);
3204 WITH_CUR_LOCALE (error (0, 0, _("\
3205 %s: character `%s' not defined while needed as default value"),
3206 "LC_CTYPE", "<tab>"));
3208 else if (seq
->nbytes
!= 1)
3209 WITH_CUR_LOCALE (error (0, 0, _("\
3210 %s: character `%s' in charmap not representable with one byte"),
3211 "LC_CTYPE", "<tab>"));
3213 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3215 /* No need to search. */
3216 ELEM (ctype
, class_collection
, , L
'\t') |= BITw (tok_space
);
3219 seq
= charmap_find_value (charmap
, "vertical-tab", 12);
3221 seq
= charmap_find_value (charmap
, "U0000000B", 9);
3225 WITH_CUR_LOCALE (error (0, 0, _("\
3226 %s: character `%s' not defined while needed as default value"),
3227 "LC_CTYPE", "<vertical-tab>"));
3229 else if (seq
->nbytes
!= 1)
3230 WITH_CUR_LOCALE (error (0, 0, _("\
3231 %s: character `%s' in charmap not representable with one byte"),
3232 "LC_CTYPE", "<vertical-tab>"));
3234 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3236 /* No need to search. */
3237 ELEM (ctype
, class_collection
, , L
'\v') |= BITw (tok_space
);
3240 if ((ctype
->class_done
& BITw (tok_xdigit
)) == 0)
3241 /* "If this keyword is not specified, the digits `0' to `9', the
3242 uppercase letters `A' through `F', and the lowercase letters `a'
3243 through `f', ..., shell automatically belong to this class, with
3244 implementation defined character values." [P1003.2, 2.5.2.1] */
3246 set_default (BITPOS (tok_xdigit
), '0', '9');
3247 set_default (BITPOS (tok_xdigit
), 'A', 'F');
3248 set_default (BITPOS (tok_xdigit
), 'a', 'f');
3251 if ((ctype
->class_done
& BITw (tok_blank
)) == 0)
3252 /* "If this keyword [blank] is unspecified, the characters <space> and
3253 <tab> shall belong to this character class." [P1003.2, 2.5.2.1] */
3255 struct charseq
*seq
;
3257 seq
= charmap_find_value (charmap
, "space", 5);
3259 seq
= charmap_find_value (charmap
, "SP", 2);
3261 seq
= charmap_find_value (charmap
, "U00000020", 9);
3265 WITH_CUR_LOCALE (error (0, 0, _("\
3266 %s: character `%s' not defined while needed as default value"),
3267 "LC_CTYPE", "<space>"));
3269 else if (seq
->nbytes
!= 1)
3270 WITH_CUR_LOCALE (error (0, 0, _("\
3271 %s: character `%s' in charmap not representable with one byte"),
3272 "LC_CTYPE", "<space>"));
3274 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_blank
);
3276 /* No need to search. */
3277 ELEM (ctype
, class_collection
, , L
' ') |= BITw (tok_blank
);
3280 seq
= charmap_find_value (charmap
, "tab", 3);
3282 seq
= charmap_find_value (charmap
, "U00000009", 9);
3286 WITH_CUR_LOCALE (error (0, 0, _("\
3287 %s: character `%s' not defined while needed as default value"),
3288 "LC_CTYPE", "<tab>"));
3290 else if (seq
->nbytes
!= 1)
3291 WITH_CUR_LOCALE (error (0, 0, _("\
3292 %s: character `%s' in charmap not representable with one byte"),
3293 "LC_CTYPE", "<tab>"));
3295 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_blank
);
3297 /* No need to search. */
3298 ELEM (ctype
, class_collection
, , L
'\t') |= BITw (tok_blank
);
3301 if ((ctype
->class_done
& BITw (tok_graph
)) == 0)
3302 /* "If this keyword [graph] is not specified, characters specified for
3303 the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
3304 shall belong to this character class." [P1003.2, 2.5.2.1] */
3306 unsigned long int mask
= BIT (tok_upper
) | BIT (tok_lower
) |
3307 BIT (tok_alpha
) | BIT (tok_digit
) | BIT (tok_xdigit
) | BIT (tok_punct
);
3308 unsigned long int maskw
= BITw (tok_upper
) | BITw (tok_lower
) |
3309 BITw (tok_alpha
) | BITw (tok_digit
) | BITw (tok_xdigit
) |
3313 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
3314 if ((ctype
->class_collection
[cnt
] & maskw
) != 0)
3315 ctype
->class_collection
[cnt
] |= BITw (tok_graph
);
3317 for (cnt
= 0; cnt
< 256; ++cnt
)
3318 if ((ctype
->class256_collection
[cnt
] & mask
) != 0)
3319 ctype
->class256_collection
[cnt
] |= BIT (tok_graph
);
3322 if ((ctype
->class_done
& BITw (tok_print
)) == 0)
3323 /* "If this keyword [print] is not provided, characters specified for
3324 the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
3325 and the <space> character shall belong to this character class."
3326 [P1003.2, 2.5.2.1] */
3328 unsigned long int mask
= BIT (tok_upper
) | BIT (tok_lower
) |
3329 BIT (tok_alpha
) | BIT (tok_digit
) | BIT (tok_xdigit
) | BIT (tok_punct
);
3330 unsigned long int maskw
= BITw (tok_upper
) | BITw (tok_lower
) |
3331 BITw (tok_alpha
) | BITw (tok_digit
) | BITw (tok_xdigit
) |
3334 struct charseq
*seq
;
3336 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
3337 if ((ctype
->class_collection
[cnt
] & maskw
) != 0)
3338 ctype
->class_collection
[cnt
] |= BITw (tok_print
);
3340 for (cnt
= 0; cnt
< 256; ++cnt
)
3341 if ((ctype
->class256_collection
[cnt
] & mask
) != 0)
3342 ctype
->class256_collection
[cnt
] |= BIT (tok_print
);
3345 seq
= charmap_find_value (charmap
, "space", 5);
3347 seq
= charmap_find_value (charmap
, "SP", 2);
3349 seq
= charmap_find_value (charmap
, "U00000020", 9);
3353 WITH_CUR_LOCALE (error (0, 0, _("\
3354 %s: character `%s' not defined while needed as default value"),
3355 "LC_CTYPE", "<space>"));
3357 else if (seq
->nbytes
!= 1)
3358 WITH_CUR_LOCALE (error (0, 0, _("\
3359 %s: character `%s' in charmap not representable with one byte"),
3360 "LC_CTYPE", "<space>"));
3362 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_print
);
3364 /* No need to search. */
3365 ELEM (ctype
, class_collection
, , L
' ') |= BITw (tok_print
);
3368 if (ctype
->tomap_done
[0] == 0)
3369 /* "If this keyword [toupper] is not specified, the lowercase letters
3370 `a' through `z', and their corresponding uppercase letters `A' to
3371 `Z', ..., shall automatically be included, with implementation-
3372 defined character values." [P1003.2, 2.5.2.1] */
3377 strcpy (tmp
, "<?>");
3379 for (ch
= 'a'; ch
<= 'z'; ++ch
)
3381 struct charseq
*seq_from
, *seq_to
;
3385 seq_from
= charmap_find_value (charmap
, &tmp
[1], 1);
3386 if (seq_from
== NULL
)
3389 sprintf (buf
, "U%08X", ch
);
3390 seq_from
= charmap_find_value (charmap
, buf
, 9);
3392 if (seq_from
== NULL
)
3395 WITH_CUR_LOCALE (error (0, 0, _("\
3396 %s: character `%s' not defined while needed as default value"),
3399 else if (seq_from
->nbytes
!= 1)
3402 WITH_CUR_LOCALE (error (0, 0, _("\
3403 %s: character `%s' needed as default value not representable with one byte"),
3408 /* This conversion is implementation defined. */
3409 tmp
[1] = (char) (ch
+ ('A' - 'a'));
3410 seq_to
= charmap_find_value (charmap
, &tmp
[1], 1);
3414 sprintf (buf
, "U%08X", ch
+ ('A' - 'a'));
3415 seq_to
= charmap_find_value (charmap
, buf
, 9);
3420 WITH_CUR_LOCALE (error (0, 0, _("\
3421 %s: character `%s' not defined while needed as default value"),
3424 else if (seq_to
->nbytes
!= 1)
3427 WITH_CUR_LOCALE (error (0, 0, _("\
3428 %s: character `%s' needed as default value not representable with one byte"),
3432 /* The index [0] is determined by the order of the
3433 `ctype_map_newP' calls in `ctype_startup'. */
3434 ctype
->map256_collection
[0][seq_from
->bytes
[0]]
3438 /* No need to search. */
3439 ELEM (ctype
, map_collection
, [0], ch
) = ch
+ ('A' - 'a');
3443 if (ctype
->tomap_done
[1] == 0)
3444 /* "If this keyword [tolower] is not specified, the mapping shall be
3445 the reverse mapping of the one specified to `toupper'." [P1003.2] */
3447 for (cnt
= 0; cnt
< ctype
->map_collection_act
[0]; ++cnt
)
3448 if (ctype
->map_collection
[0][cnt
] != 0)
3449 ELEM (ctype
, map_collection
, [1],
3450 ctype
->map_collection
[0][cnt
])
3451 = ctype
->charnames
[cnt
];
3453 for (cnt
= 0; cnt
< 256; ++cnt
)
3454 if (ctype
->map256_collection
[0][cnt
] != 0)
3455 ctype
->map256_collection
[1][ctype
->map256_collection
[0][cnt
]] = cnt
;
3458 if (ctype
->outdigits_act
!= 10)
3460 if (ctype
->outdigits_act
!= 0)
3461 WITH_CUR_LOCALE (error (0, 0, _("\
3462 %s: field `%s' does not contain exactly ten entries"),
3463 "LC_CTYPE", "outdigit"));
3465 for (cnt
= ctype
->outdigits_act
; cnt
< 10; ++cnt
)
3467 ctype
->mboutdigits
[cnt
] = charmap_find_symbol (charmap
,
3470 if (ctype
->mboutdigits
[cnt
] == NULL
)
3471 ctype
->mboutdigits
[cnt
] = charmap_find_symbol (charmap
,
3473 strlen (longnames
[cnt
]));
3475 if (ctype
->mboutdigits
[cnt
] == NULL
)
3476 ctype
->mboutdigits
[cnt
] = charmap_find_symbol (charmap
,
3479 if (ctype
->mboutdigits
[cnt
] == NULL
)
3481 /* Provide a replacement. */
3482 WITH_CUR_LOCALE (error (0, 0, _("\
3483 no output digits defined and none of the standard names in the charmap")));
3485 ctype
->mboutdigits
[cnt
] = obstack_alloc (&((struct charmap_t
*) charmap
)->mem_pool
,
3486 sizeof (struct charseq
)
3489 /* This is better than nothing. */
3490 ctype
->mboutdigits
[cnt
]->bytes
[0] = digits
[cnt
];
3491 ctype
->mboutdigits
[cnt
]->nbytes
= 1;
3494 ctype
->wcoutdigits
[cnt
] = L
'0' + cnt
;
3497 ctype
->outdigits_act
= 10;
3502 /* Construction of sparse 3-level tables.
3503 See wchar-lookup.h for their structure and the meaning of p and q. */
3510 /* Working representation. */
3511 size_t level1_alloc
;
3514 size_t level2_alloc
;
3517 size_t level3_alloc
;
3520 /* Compressed representation. */
3525 /* Initialize. Assumes t->p and t->q have already been set. */
3527 wctype_table_init (struct wctype_table
*t
)
3530 t
->level1_alloc
= t
->level1_size
= 0;
3532 t
->level2_alloc
= t
->level2_size
= 0;
3534 t
->level3_alloc
= t
->level3_size
= 0;
3537 /* Retrieve an entry. */
3539 wctype_table_get (struct wctype_table
*t
, uint32_t wc
)
3541 uint32_t index1
= wc
>> (t
->q
+ t
->p
+ 5);
3542 if (index1
< t
->level1_size
)
3544 uint32_t lookup1
= t
->level1
[index1
];
3545 if (lookup1
!= EMPTY
)
3547 uint32_t index2
= ((wc
>> (t
->p
+ 5)) & ((1 << t
->q
) - 1))
3548 + (lookup1
<< t
->q
);
3549 uint32_t lookup2
= t
->level2
[index2
];
3550 if (lookup2
!= EMPTY
)
3552 uint32_t index3
= ((wc
>> 5) & ((1 << t
->p
) - 1))
3553 + (lookup2
<< t
->p
);
3554 uint32_t lookup3
= t
->level3
[index3
];
3555 uint32_t index4
= wc
& 0x1f;
3557 return (lookup3
>> index4
) & 1;
3564 /* Add one entry. */
3566 wctype_table_add (struct wctype_table
*t
, uint32_t wc
)
3568 uint32_t index1
= wc
>> (t
->q
+ t
->p
+ 5);
3569 uint32_t index2
= (wc
>> (t
->p
+ 5)) & ((1 << t
->q
) - 1);
3570 uint32_t index3
= (wc
>> 5) & ((1 << t
->p
) - 1);
3571 uint32_t index4
= wc
& 0x1f;
3574 if (index1
>= t
->level1_size
)
3576 if (index1
>= t
->level1_alloc
)
3578 size_t alloc
= 2 * t
->level1_alloc
;
3579 if (alloc
<= index1
)
3581 t
->level1
= (uint32_t *) xrealloc ((char *) t
->level1
,
3582 alloc
* sizeof (uint32_t));
3583 t
->level1_alloc
= alloc
;
3585 while (index1
>= t
->level1_size
)
3586 t
->level1
[t
->level1_size
++] = EMPTY
;
3589 if (t
->level1
[index1
] == EMPTY
)
3591 if (t
->level2_size
== t
->level2_alloc
)
3593 size_t alloc
= 2 * t
->level2_alloc
+ 1;
3594 t
->level2
= (uint32_t *) xrealloc ((char *) t
->level2
,
3595 (alloc
<< t
->q
) * sizeof (uint32_t));
3596 t
->level2_alloc
= alloc
;
3598 i1
= t
->level2_size
<< t
->q
;
3599 i2
= (t
->level2_size
+ 1) << t
->q
;
3600 for (i
= i1
; i
< i2
; i
++)
3601 t
->level2
[i
] = EMPTY
;
3602 t
->level1
[index1
] = t
->level2_size
++;
3605 index2
+= t
->level1
[index1
] << t
->q
;
3607 if (t
->level2
[index2
] == EMPTY
)
3609 if (t
->level3_size
== t
->level3_alloc
)
3611 size_t alloc
= 2 * t
->level3_alloc
+ 1;
3612 t
->level3
= (uint32_t *) xrealloc ((char *) t
->level3
,
3613 (alloc
<< t
->p
) * sizeof (uint32_t));
3614 t
->level3_alloc
= alloc
;
3616 i1
= t
->level3_size
<< t
->p
;
3617 i2
= (t
->level3_size
+ 1) << t
->p
;
3618 for (i
= i1
; i
< i2
; i
++)
3620 t
->level2
[index2
] = t
->level3_size
++;
3623 index3
+= t
->level2
[index2
] << t
->p
;
3625 t
->level3
[index3
] |= (uint32_t)1 << index4
;
3628 /* Finalize and shrink. */
3630 wctype_table_finalize (struct wctype_table
*t
)
3633 uint32_t reorder3
[t
->level3_size
];
3634 uint32_t reorder2
[t
->level2_size
];
3635 uint32_t level1_offset
, level2_offset
, level3_offset
;
3637 /* Uniquify level3 blocks. */
3639 for (j
= 0; j
< t
->level3_size
; j
++)
3641 for (i
= 0; i
< k
; i
++)
3642 if (memcmp (&t
->level3
[i
<< t
->p
], &t
->level3
[j
<< t
->p
],
3643 (1 << t
->p
) * sizeof (uint32_t)) == 0)
3645 /* Relocate block j to block i. */
3650 memcpy (&t
->level3
[i
<< t
->p
], &t
->level3
[j
<< t
->p
],
3651 (1 << t
->p
) * sizeof (uint32_t));
3657 for (i
= 0; i
< (t
->level2_size
<< t
->q
); i
++)
3658 if (t
->level2
[i
] != EMPTY
)
3659 t
->level2
[i
] = reorder3
[t
->level2
[i
]];
3661 /* Uniquify level2 blocks. */
3663 for (j
= 0; j
< t
->level2_size
; j
++)
3665 for (i
= 0; i
< k
; i
++)
3666 if (memcmp (&t
->level2
[i
<< t
->q
], &t
->level2
[j
<< t
->q
],
3667 (1 << t
->q
) * sizeof (uint32_t)) == 0)
3669 /* Relocate block j to block i. */
3674 memcpy (&t
->level2
[i
<< t
->q
], &t
->level2
[j
<< t
->q
],
3675 (1 << t
->q
) * sizeof (uint32_t));
3681 for (i
= 0; i
< t
->level1_size
; i
++)
3682 if (t
->level1
[i
] != EMPTY
)
3683 t
->level1
[i
] = reorder2
[t
->level1
[i
]];
3685 /* Create and fill the resulting compressed representation. */
3687 5 * sizeof (uint32_t)
3688 + t
->level1_size
* sizeof (uint32_t)
3689 + (t
->level2_size
<< t
->q
) * sizeof (uint32_t)
3690 + (t
->level3_size
<< t
->p
) * sizeof (uint32_t);
3691 t
->result
= (char *) xmalloc (t
->result_size
);
3694 5 * sizeof (uint32_t);
3696 5 * sizeof (uint32_t)
3697 + t
->level1_size
* sizeof (uint32_t);
3699 5 * sizeof (uint32_t)
3700 + t
->level1_size
* sizeof (uint32_t)
3701 + (t
->level2_size
<< t
->q
) * sizeof (uint32_t);
3703 ((uint32_t *) t
->result
)[0] = t
->q
+ t
->p
+ 5;
3704 ((uint32_t *) t
->result
)[1] = t
->level1_size
;
3705 ((uint32_t *) t
->result
)[2] = t
->p
+ 5;
3706 ((uint32_t *) t
->result
)[3] = (1 << t
->q
) - 1;
3707 ((uint32_t *) t
->result
)[4] = (1 << t
->p
) - 1;
3709 for (i
= 0; i
< t
->level1_size
; i
++)
3710 ((uint32_t *) (t
->result
+ level1_offset
))[i
] =
3711 (t
->level1
[i
] == EMPTY
3713 : (t
->level1
[i
] << t
->q
) * sizeof (uint32_t) + level2_offset
);
3715 for (i
= 0; i
< (t
->level2_size
<< t
->q
); i
++)
3716 ((uint32_t *) (t
->result
+ level2_offset
))[i
] =
3717 (t
->level2
[i
] == EMPTY
3719 : (t
->level2
[i
] << t
->p
) * sizeof (uint32_t) + level3_offset
);
3721 for (i
= 0; i
< (t
->level3_size
<< t
->p
); i
++)
3722 ((uint32_t *) (t
->result
+ level3_offset
))[i
] = t
->level3
[i
];
3724 if (t
->level1_alloc
> 0)
3726 if (t
->level2_alloc
> 0)
3728 if (t
->level3_alloc
> 0)
3732 #define TABLE wcwidth_table
3733 #define ELEMENT uint8_t
3734 #define DEFAULT 0xff
3737 #define TABLE wctrans_table
3738 #define ELEMENT int32_t
3740 #define wctrans_table_add wctrans_table_add_internal
3742 #undef wctrans_table_add
3743 /* The wctrans_table must actually store the difference between the
3744 desired result and the argument. */
3746 wctrans_table_add (struct wctrans_table
*t
, uint32_t wc
, uint32_t mapped_wc
)
3748 wctrans_table_add_internal (t
, wc
, mapped_wc
- wc
);
3752 /* Flattens the included transliterations into a translit list.
3753 Inserts them in the list at `cursor', and returns the new cursor. */
3754 static struct translit_t
**
3755 translit_flatten (struct locale_ctype_t
*ctype
,
3756 const struct charmap_t
*charmap
,
3757 struct translit_t
**cursor
)
3759 while (ctype
->translit_include
!= NULL
)
3761 const char *copy_locale
= ctype
->translit_include
->copy_locale
;
3762 const char *copy_repertoire
= ctype
->translit_include
->copy_repertoire
;
3763 struct localedef_t
*other
;
3765 /* Unchain the include statement. During the depth-first traversal
3766 we don't want to visit any locale more than once. */
3767 ctype
->translit_include
= ctype
->translit_include
->next
;
3769 other
= find_locale (LC_CTYPE
, copy_locale
, copy_repertoire
, charmap
);
3773 WITH_CUR_LOCALE (error (0, 0, _("\
3774 %s: transliteration data from locale `%s' not available"),
3775 "LC_CTYPE", copy_locale
));
3779 struct locale_ctype_t
*other_ctype
=
3780 other
->categories
[LC_CTYPE
].ctype
;
3782 cursor
= translit_flatten (other_ctype
, charmap
, cursor
);
3783 assert (other_ctype
->translit_include
== NULL
);
3785 if (other_ctype
->translit
!= NULL
)
3787 /* Insert the other_ctype->translit list at *cursor. */
3788 struct translit_t
*endp
= other_ctype
->translit
;
3789 while (endp
->next
!= NULL
)
3792 endp
->next
= *cursor
;
3793 *cursor
= other_ctype
->translit
;
3795 /* Avoid any risk of circular lists. */
3796 other_ctype
->translit
= NULL
;
3798 cursor
= &endp
->next
;
3801 if (ctype
->default_missing
== NULL
)
3802 ctype
->default_missing
= other_ctype
->default_missing
;
3810 allocate_arrays (struct locale_ctype_t
*ctype
, const struct charmap_t
*charmap
,
3811 struct repertoire_t
*repertoire
)
3819 /* You wonder about this amount of memory? This is only because some
3820 users do not manage to address the array with unsigned values or
3821 data types with range >= 256. '\200' would result in the array
3822 index -128. To help these poor people we duplicate the entries for
3823 128 up to 255 below the entry for \0. */
3824 ctype
->ctype_b
= (char_class_t
*) xcalloc (256 + 128, sizeof (char_class_t
));
3825 ctype
->ctype32_b
= (char_class32_t
*) xcalloc (256, sizeof (char_class32_t
));
3826 ctype
->class_b
= (uint32_t **)
3827 xmalloc (ctype
->nr_charclass
* sizeof (uint32_t *));
3828 ctype
->class_3level
= (struct iovec
*)
3829 xmalloc (ctype
->nr_charclass
* sizeof (struct iovec
));
3831 /* This is the array accessed using the multibyte string elements. */
3832 for (idx
= 0; idx
< 256; ++idx
)
3833 ctype
->ctype_b
[128 + idx
] = ctype
->class256_collection
[idx
];
3835 /* Mirror first 127 entries. We must take care that entry -1 is not
3836 mirrored because EOF == -1. */
3837 for (idx
= 0; idx
< 127; ++idx
)
3838 ctype
->ctype_b
[idx
] = ctype
->ctype_b
[256 + idx
];
3840 /* The 32 bit array contains all characters < 0x100. */
3841 for (idx
= 0; idx
< ctype
->class_collection_act
; ++idx
)
3842 if (ctype
->charnames
[idx
] < 0x100)
3843 ctype
->ctype32_b
[ctype
->charnames
[idx
]] = ctype
->class_collection
[idx
];
3845 for (nr
= 0; nr
< ctype
->nr_charclass
; nr
++)
3847 ctype
->class_b
[nr
] = (uint32_t *) xcalloc (256 / 32, sizeof (uint32_t));
3849 for (idx
= 0; idx
< 256; ++idx
)
3850 if (ctype
->class256_collection
[idx
] & _ISbit (nr
))
3851 ctype
->class_b
[nr
][idx
>> 5] |= (uint32_t)1 << (idx
& 0x1f);
3854 for (nr
= 0; nr
< ctype
->nr_charclass
; nr
++)
3856 struct wctype_table t
;
3858 t
.p
= 4; /* or: 5 */
3859 t
.q
= 7; /* or: 6 */
3860 wctype_table_init (&t
);
3862 for (idx
= 0; idx
< ctype
->class_collection_act
; ++idx
)
3863 if (ctype
->class_collection
[idx
] & _ISwbit (nr
))
3864 wctype_table_add (&t
, ctype
->charnames
[idx
]);
3866 wctype_table_finalize (&t
);
3869 WITH_CUR_LOCALE (fprintf (stderr
, _("\
3870 %s: table for class \"%s\": %lu bytes\n"),
3871 "LC_CTYPE", ctype
->classnames
[nr
],
3872 (unsigned long int) t
.result_size
));
3874 ctype
->class_3level
[nr
].iov_base
= t
.result
;
3875 ctype
->class_3level
[nr
].iov_len
= t
.result_size
;
3878 /* Room for table of mappings. */
3879 ctype
->map_b
= (uint32_t **) xmalloc (2 * sizeof (uint32_t *));
3880 ctype
->map32_b
= (uint32_t **) xmalloc (ctype
->map_collection_nr
3881 * sizeof (uint32_t *));
3882 ctype
->map_3level
= (struct iovec
*)
3883 xmalloc (ctype
->map_collection_nr
* sizeof (struct iovec
));
3885 /* Fill in all mappings. */
3886 for (idx
= 0; idx
< 2; ++idx
)
3890 /* Allocate table. */
3891 ctype
->map_b
[idx
] = (uint32_t *)
3892 xmalloc ((256 + 128) * sizeof (uint32_t));
3894 /* Copy values from collection. */
3895 for (idx2
= 0; idx2
< 256; ++idx2
)
3896 ctype
->map_b
[idx
][128 + idx2
] = ctype
->map256_collection
[idx
][idx2
];
3898 /* Mirror first 127 entries. We must take care not to map entry
3899 -1 because EOF == -1. */
3900 for (idx2
= 0; idx2
< 127; ++idx2
)
3901 ctype
->map_b
[idx
][idx2
] = ctype
->map_b
[idx
][256 + idx2
];
3903 /* EOF must map to EOF. */
3904 ctype
->map_b
[idx
][127] = EOF
;
3907 for (idx
= 0; idx
< ctype
->map_collection_nr
; ++idx
)
3911 /* Allocate table. */
3912 ctype
->map32_b
[idx
] = (uint32_t *) xmalloc (256 * sizeof (uint32_t));
3914 /* Copy values from collection. Default is identity mapping. */
3915 for (idx2
= 0; idx2
< 256; ++idx2
)
3916 ctype
->map32_b
[idx
][idx2
] =
3917 (ctype
->map_collection
[idx
][idx2
] != 0
3918 ? ctype
->map_collection
[idx
][idx2
]
3922 for (nr
= 0; nr
< ctype
->map_collection_nr
; nr
++)
3924 struct wctrans_table t
;
3928 wctrans_table_init (&t
);
3930 for (idx
= 0; idx
< ctype
->map_collection_act
[nr
]; ++idx
)
3931 if (ctype
->map_collection
[nr
][idx
] != 0)
3932 wctrans_table_add (&t
, ctype
->charnames
[idx
],
3933 ctype
->map_collection
[nr
][idx
]);
3935 wctrans_table_finalize (&t
);
3938 WITH_CUR_LOCALE (fprintf (stderr
, _("\
3939 %s: table for map \"%s\": %lu bytes\n"),
3940 "LC_CTYPE", ctype
->mapnames
[nr
],
3941 (unsigned long int) t
.result_size
));
3943 ctype
->map_3level
[nr
].iov_base
= t
.result
;
3944 ctype
->map_3level
[nr
].iov_len
= t
.result_size
;
3947 /* Extra array for class and map names. */
3948 ctype
->class_name_ptr
= (uint32_t *) xmalloc (ctype
->nr_charclass
3949 * sizeof (uint32_t));
3950 ctype
->map_name_ptr
= (uint32_t *) xmalloc (ctype
->map_collection_nr
3951 * sizeof (uint32_t));
3953 ctype
->class_offset
= _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1
);
3954 ctype
->map_offset
= ctype
->class_offset
+ ctype
->nr_charclass
;
3956 /* Array for width information. Because the expected widths are very
3957 small (never larger than 2) we use only one single byte. This
3959 We put only printable characters in the table. wcwidth is specified
3960 to return -1 for non-printable characters. Doing the check here
3961 saves a run-time check.
3962 But we put L'\0' in the table. This again saves a run-time check. */
3964 struct wcwidth_table t
;
3968 wcwidth_table_init (&t
);
3970 /* First set all the printable characters of the character set to
3971 the default width. */
3973 while (iterate_table (&charmap
->char_table
, &curs
, &key
, &len
, &vdata
) == 0)
3975 struct charseq
*data
= (struct charseq
*) vdata
;
3977 if (data
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
3978 data
->ucs4
= repertoire_find_value (ctype
->repertoire
,
3981 if (data
->ucs4
!= ILLEGAL_CHAR_VALUE
)
3983 uint32_t *class_bits
=
3984 find_idx (ctype
, &ctype
->class_collection
, NULL
,
3985 &ctype
->class_collection_act
, data
->ucs4
);
3987 if (class_bits
!= NULL
&& (*class_bits
& BITw (tok_print
)))
3988 wcwidth_table_add (&t
, data
->ucs4
, charmap
->width_default
);
3992 /* Now add the explicitly specified widths. */
3993 if (charmap
->width_rules
!= NULL
)
3997 for (cnt
= 0; cnt
< charmap
->nwidth_rules
; ++cnt
)
3999 unsigned char bytes
[charmap
->mb_cur_max
];
4000 int nbytes
= charmap
->width_rules
[cnt
].from
->nbytes
;
4002 /* We have the range of character for which the width is
4003 specified described using byte sequences of the multibyte
4004 charset. We have to convert this to UCS4 now. And we
4005 cannot simply convert the beginning and the end of the
4006 sequence, we have to iterate over the byte sequence and
4007 convert it for every single character. */
4008 memcpy (bytes
, charmap
->width_rules
[cnt
].from
->bytes
, nbytes
);
4010 while (nbytes
< charmap
->width_rules
[cnt
].to
->nbytes
4011 || memcmp (bytes
, charmap
->width_rules
[cnt
].to
->bytes
,
4014 /* Find the UCS value for `bytes'. */
4017 struct charseq
*seq
=
4018 charmap_find_symbol (charmap
, bytes
, nbytes
);
4021 wch
= ILLEGAL_CHAR_VALUE
;
4022 else if (seq
->ucs4
!= UNINITIALIZED_CHAR_VALUE
)
4025 wch
= repertoire_find_value (ctype
->repertoire
, seq
->name
,
4026 strlen (seq
->name
));
4028 if (wch
!= ILLEGAL_CHAR_VALUE
)
4030 /* Store the value. */
4031 uint32_t *class_bits
=
4032 find_idx (ctype
, &ctype
->class_collection
, NULL
,
4033 &ctype
->class_collection_act
, wch
);
4035 if (class_bits
!= NULL
&& (*class_bits
& BITw (tok_print
)))
4036 wcwidth_table_add (&t
, wch
,
4037 charmap
->width_rules
[cnt
].width
);
4040 /* "Increment" the bytes sequence. */
4042 while (inner
>= 0 && bytes
[inner
] == 0xff)
4047 /* We have to extend the byte sequence. */
4048 if (nbytes
>= charmap
->width_rules
[cnt
].to
->nbytes
)
4052 memset (&bytes
[1], 0, nbytes
);
4058 while (++inner
< nbytes
)
4065 /* Set the width of L'\0' to 0. */
4066 wcwidth_table_add (&t
, 0, 0);
4068 wcwidth_table_finalize (&t
);
4071 WITH_CUR_LOCALE (fprintf (stderr
, _("%s: table for width: %lu bytes\n"),
4072 "LC_CTYPE", (unsigned long int) t
.result_size
));
4074 ctype
->width
.iov_base
= t
.result
;
4075 ctype
->width
.iov_len
= t
.result_size
;
4078 /* Set MB_CUR_MAX. */
4079 ctype
->mb_cur_max
= charmap
->mb_cur_max
;
4081 /* Now determine the table for the transliteration information.
4083 XXX It is not yet clear to me whether it is worth implementing a
4084 complicated algorithm which uses a hash table to locate the entries.
4085 For now I'll use a simple array which can be searching using binary
4087 if (ctype
->translit_include
!= NULL
)
4088 /* Traverse the locales mentioned in the `include' statements in a
4089 depth-first way and fold in their transliteration information. */
4090 translit_flatten (ctype
, charmap
, &ctype
->translit
);
4092 if (ctype
->translit
!= NULL
)
4094 /* First count how many entries we have. This is the upper limit
4095 since some entries from the included files might be overwritten. */
4098 struct translit_t
*runp
= ctype
->translit
;
4099 struct translit_t
**sorted
;
4100 size_t from_len
, to_len
;
4102 while (runp
!= NULL
)
4108 /* Next we allocate an array large enough and fill in the values. */
4109 sorted
= (struct translit_t
**) alloca (number
4110 * sizeof (struct translit_t
**));
4111 runp
= ctype
->translit
;
4115 /* Search for the place where to insert this string.
4116 XXX Better use a real sorting algorithm later. */
4120 while (idx
< number
)
4122 int res
= wcscmp ((const wchar_t *) sorted
[idx
]->from
,
4123 (const wchar_t *) runp
->from
);
4138 memmove (&sorted
[idx
+ 1], &sorted
[idx
],
4139 (number
- idx
) * sizeof (struct translit_t
*));
4146 while (runp
!= NULL
);
4148 /* The next step is putting all the possible transliteration
4149 strings in one memory block so that we can write it out.
4150 We need several different blocks:
4151 - index to the from-string array
4153 - index to the to-string array
4156 from_len
= to_len
= 0;
4157 for (cnt
= 0; cnt
< number
; ++cnt
)
4159 struct translit_to_t
*srunp
;
4160 from_len
+= wcslen ((const wchar_t *) sorted
[cnt
]->from
) + 1;
4161 srunp
= sorted
[cnt
]->to
;
4162 while (srunp
!= NULL
)
4164 to_len
+= wcslen ((const wchar_t *) srunp
->str
) + 1;
4165 srunp
= srunp
->next
;
4167 /* Plus one for the extra NUL character marking the end of
4168 the list for the current entry. */
4172 /* We can allocate the arrays for the results. */
4173 ctype
->translit_from_idx
= xmalloc (number
* sizeof (uint32_t));
4174 ctype
->translit_from_tbl
= xmalloc (from_len
* sizeof (uint32_t));
4175 ctype
->translit_to_idx
= xmalloc (number
* sizeof (uint32_t));
4176 ctype
->translit_to_tbl
= xmalloc (to_len
* sizeof (uint32_t));
4180 for (cnt
= 0; cnt
< number
; ++cnt
)
4183 struct translit_to_t
*srunp
;
4185 ctype
->translit_from_idx
[cnt
] = from_len
;
4186 ctype
->translit_to_idx
[cnt
] = to_len
;
4188 len
= wcslen ((const wchar_t *) sorted
[cnt
]->from
) + 1;
4189 wmemcpy ((wchar_t *) &ctype
->translit_from_tbl
[from_len
],
4190 (const wchar_t *) sorted
[cnt
]->from
, len
);
4193 ctype
->translit_to_idx
[cnt
] = to_len
;
4194 srunp
= sorted
[cnt
]->to
;
4195 while (srunp
!= NULL
)
4197 len
= wcslen ((const wchar_t *) srunp
->str
) + 1;
4198 wmemcpy ((wchar_t *) &ctype
->translit_to_tbl
[to_len
],
4199 (const wchar_t *) srunp
->str
, len
);
4201 srunp
= srunp
->next
;
4203 ctype
->translit_to_tbl
[to_len
++] = L
'\0';
4206 /* Store the information about the length. */
4207 ctype
->translit_idx_size
= number
;
4208 ctype
->translit_from_tbl_size
= from_len
* sizeof (uint32_t);
4209 ctype
->translit_to_tbl_size
= to_len
* sizeof (uint32_t);
4213 /* Provide some dummy pointers since we have nothing to write out. */
4214 static uint32_t no_str
= { 0 };
4216 ctype
->translit_from_idx
= &no_str
;
4217 ctype
->translit_from_tbl
= &no_str
;
4218 ctype
->translit_to_tbl
= &no_str
;
4219 ctype
->translit_idx_size
= 0;
4220 ctype
->translit_from_tbl_size
= 0;
4221 ctype
->translit_to_tbl_size
= 0;