1 /* Copyright (C) 1995-1999, 2000, 2001 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, write to the Free
17 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
37 #include "localeinfo.h"
39 #include "linereader.h"
40 #include "locfile-token.h"
42 #include "localedef.h"
47 #ifdef PREDEFINED_CLASSES
48 /* These are the extra bits not in wctype.h since these are not preallocated
50 # define _ISwspecial1 (1 << 29)
51 # define _ISwspecial2 (1 << 30)
52 # define _ISwspecial3 (1 << 31)
56 /* The bit used for representing a special class. */
57 #define BITPOS(class) ((class) - tok_upper)
58 #define BIT(class) (_ISbit (BITPOS (class)))
59 #define BITw(class) (_ISwbit (BITPOS (class)))
61 #define ELEM(ctype, collection, idx, value) \
62 *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \
63 &ctype->collection##_act idx, value)
66 /* To be compatible with former implementations we for now restrict
67 the number of bits for character classes to 16. When compatibility
68 is not necessary anymore increase the number to 32. */
69 #define char_class_t uint16_t
70 #define char_class32_t uint32_t
73 /* Type to describe a transliteration action. We have a possibly
74 multiple character from-string and a set of multiple character
75 to-strings. All are 32bit values since this is what is used in
76 the gconv functions. */
81 struct translit_to_t
*next
;
91 struct translit_to_t
*to
;
93 struct translit_t
*next
;
96 struct translit_ignore_t
105 struct translit_ignore_t
*next
;
109 /* Type to describe a transliteration include statement. */
110 struct translit_include_t
112 const char *copy_locale
;
113 const char *copy_repertoire
;
115 struct translit_include_t
*next
;
119 /* Sparse table of uint32_t. */
120 #define TABLE idx_table
121 #define ELEMENT uint32_t
122 #define DEFAULT ((uint32_t) ~0)
127 /* The real definition of the struct for the LC_CTYPE locale. */
128 struct locale_ctype_t
131 size_t charnames_max
;
132 size_t charnames_act
;
133 /* An index lookup table, to speedup find_idx. */
134 struct idx_table charnames_idx
;
136 struct repertoire_t
*repertoire
;
138 /* We will allow up to 8 * sizeof (uint32_t) character classes. */
139 #define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
141 const char *classnames
[MAX_NR_CHARCLASS
];
142 uint32_t last_class_char
;
143 uint32_t class256_collection
[256];
144 uint32_t *class_collection
;
145 size_t class_collection_max
;
146 size_t class_collection_act
;
148 uint32_t class_offset
;
150 struct charseq
**mbdigits
;
157 struct charseq
*mboutdigits
[10];
158 uint32_t wcoutdigits
[10];
159 size_t outdigits_act
;
161 /* If the following number ever turns out to be too small simply
162 increase it. But I doubt it will. --drepper@gnu */
163 #define MAX_NR_CHARMAP 16
164 const char *mapnames
[MAX_NR_CHARMAP
];
165 uint32_t *map_collection
[MAX_NR_CHARMAP
];
166 uint32_t map256_collection
[2][256];
167 size_t map_collection_max
[MAX_NR_CHARMAP
];
168 size_t map_collection_act
[MAX_NR_CHARMAP
];
169 size_t map_collection_nr
;
171 int tomap_done
[MAX_NR_CHARMAP
];
174 /* Transliteration information. */
175 struct translit_include_t
*translit_include
;
176 struct translit_t
*translit
;
177 struct translit_ignore_t
*translit_ignore
;
178 uint32_t ntranslit_ignore
;
180 uint32_t *default_missing
;
181 const char *default_missing_file
;
182 size_t default_missing_lineno
;
184 /* The arrays for the binary representation. */
185 char_class_t
*ctype_b
;
186 char_class32_t
*ctype32_b
;
190 struct iovec
*class_3level
;
191 struct iovec
*map_3level
;
192 uint32_t *class_name_ptr
;
193 uint32_t *map_name_ptr
;
196 const char *codeset_name
;
197 uint32_t *translit_from_idx
;
198 uint32_t *translit_from_tbl
;
199 uint32_t *translit_to_idx
;
200 uint32_t *translit_to_tbl
;
201 uint32_t translit_idx_size
;
202 size_t translit_from_tbl_size
;
203 size_t translit_to_tbl_size
;
205 struct obstack mempool
;
209 /* Marker for an empty slot. This has the value 0xFFFFFFFF, regardless
210 whether 'int' is 16 bit, 32 bit, or 64 bit. */
211 #define EMPTY ((uint32_t) ~0)
214 #define obstack_chunk_alloc xmalloc
215 #define obstack_chunk_free free
218 /* Prototypes for local functions. */
219 static void ctype_startup (struct linereader
*lr
, struct localedef_t
*locale
,
220 const struct charmap_t
*charmap
,
221 struct localedef_t
*copy_locale
,
223 static void ctype_class_new (struct linereader
*lr
,
224 struct locale_ctype_t
*ctype
, const char *name
);
225 static void ctype_map_new (struct linereader
*lr
,
226 struct locale_ctype_t
*ctype
,
227 const char *name
, const struct charmap_t
*charmap
);
228 static uint32_t *find_idx (struct locale_ctype_t
*ctype
, uint32_t **table
,
229 size_t *max
, size_t *act
, unsigned int idx
);
230 static void set_class_defaults (struct locale_ctype_t
*ctype
,
231 const struct charmap_t
*charmap
,
232 struct repertoire_t
*repertoire
);
233 static void allocate_arrays (struct locale_ctype_t
*ctype
,
234 const struct charmap_t
*charmap
,
235 struct repertoire_t
*repertoire
);
238 static const char *longnames
[] =
240 "zero", "one", "two", "three", "four",
241 "five", "six", "seven", "eight", "nine"
243 static const char *uninames
[] =
245 "U00000030", "U00000031", "U00000032", "U00000033", "U00000034",
246 "U00000035", "U00000036", "U00000037", "U00000038", "U00000039"
248 static const unsigned char digits
[] = "0123456789";
252 ctype_startup (struct linereader
*lr
, struct localedef_t
*locale
,
253 const struct charmap_t
*charmap
,
254 struct localedef_t
*copy_locale
, int ignore_content
)
257 struct locale_ctype_t
*ctype
;
259 if (!ignore_content
&& locale
->categories
[LC_CTYPE
].ctype
== NULL
)
261 if (copy_locale
== NULL
)
263 /* Allocate the needed room. */
264 locale
->categories
[LC_CTYPE
].ctype
= ctype
=
265 (struct locale_ctype_t
*) xcalloc (1,
266 sizeof (struct locale_ctype_t
));
268 /* We have seen no names yet. */
269 ctype
->charnames_max
= charmap
->mb_cur_max
== 1 ? 256 : 512;
271 (unsigned int *) xmalloc (ctype
->charnames_max
272 * sizeof (unsigned int));
273 for (cnt
= 0; cnt
< 256; ++cnt
)
274 ctype
->charnames
[cnt
] = cnt
;
275 ctype
->charnames_act
= 256;
276 idx_table_init (&ctype
->charnames_idx
);
278 /* Fill character class information. */
279 ctype
->last_class_char
= ILLEGAL_CHAR_VALUE
;
280 /* The order of the following instructions determines the bit
282 ctype_class_new (lr
, ctype
, "upper");
283 ctype_class_new (lr
, ctype
, "lower");
284 ctype_class_new (lr
, ctype
, "alpha");
285 ctype_class_new (lr
, ctype
, "digit");
286 ctype_class_new (lr
, ctype
, "xdigit");
287 ctype_class_new (lr
, ctype
, "space");
288 ctype_class_new (lr
, ctype
, "print");
289 ctype_class_new (lr
, ctype
, "graph");
290 ctype_class_new (lr
, ctype
, "blank");
291 ctype_class_new (lr
, ctype
, "cntrl");
292 ctype_class_new (lr
, ctype
, "punct");
293 ctype_class_new (lr
, ctype
, "alnum");
294 #ifdef PREDEFINED_CLASSES
295 /* The following are extensions from ISO 14652. */
296 ctype_class_new (lr
, ctype
, "left_to_right");
297 ctype_class_new (lr
, ctype
, "right_to_left");
298 ctype_class_new (lr
, ctype
, "num_terminator");
299 ctype_class_new (lr
, ctype
, "num_separator");
300 ctype_class_new (lr
, ctype
, "segment_separator");
301 ctype_class_new (lr
, ctype
, "block_separator");
302 ctype_class_new (lr
, ctype
, "direction_control");
303 ctype_class_new (lr
, ctype
, "sym_swap_layout");
304 ctype_class_new (lr
, ctype
, "char_shape_selector");
305 ctype_class_new (lr
, ctype
, "num_shape_selector");
306 ctype_class_new (lr
, ctype
, "non_spacing");
307 ctype_class_new (lr
, ctype
, "non_spacing_level3");
308 ctype_class_new (lr
, ctype
, "normal_connect");
309 ctype_class_new (lr
, ctype
, "r_connect");
310 ctype_class_new (lr
, ctype
, "no_connect");
311 ctype_class_new (lr
, ctype
, "no_connect-space");
312 ctype_class_new (lr
, ctype
, "vowel_connect");
315 ctype
->class_collection_max
= charmap
->mb_cur_max
== 1 ? 256 : 512;
316 ctype
->class_collection
317 = (uint32_t *) xcalloc (sizeof (unsigned long int),
318 ctype
->class_collection_max
);
319 ctype
->class_collection_act
= 256;
321 /* Fill character map information. */
322 ctype
->last_map_idx
= MAX_NR_CHARMAP
;
323 ctype_map_new (lr
, ctype
, "toupper", charmap
);
324 ctype_map_new (lr
, ctype
, "tolower", charmap
);
325 #ifdef PREDEFINED_CLASSES
326 ctype_map_new (lr
, ctype
, "tosymmetric", charmap
);
329 /* Fill first 256 entries in `toXXX' arrays. */
330 for (cnt
= 0; cnt
< 256; ++cnt
)
332 ctype
->map_collection
[0][cnt
] = cnt
;
333 ctype
->map_collection
[1][cnt
] = cnt
;
334 #ifdef PREDEFINED_CLASSES
335 ctype
->map_collection
[2][cnt
] = cnt
;
337 ctype
->map256_collection
[0][cnt
] = cnt
;
338 ctype
->map256_collection
[1][cnt
] = cnt
;
341 obstack_init (&ctype
->mempool
);
344 ctype
= locale
->categories
[LC_CTYPE
].ctype
=
345 copy_locale
->categories
[LC_CTYPE
].ctype
;
351 ctype_finish (struct localedef_t
*locale
, const struct charmap_t
*charmap
)
353 /* See POSIX.2, table 2-6 for the meaning of the following table. */
358 const char allow
[NCLASS
];
360 valid_table
[NCLASS
] =
362 /* The order is important. See token.h for more information.
363 M = Always, D = Default, - = Permitted, X = Mutually exclusive */
364 { "upper", "--MX-XDDXXX-" },
365 { "lower", "--MX-XDDXXX-" },
366 { "alpha", "---X-XDDXXX-" },
367 { "digit", "XXX--XDDXXX-" },
368 { "xdigit", "-----XDDXXX-" },
369 { "space", "XXXXX------X" },
370 { "print", "---------X--" },
371 { "graph", "---------X--" },
372 { "blank", "XXXXXM-----X" },
373 { "cntrl", "XXXXX-XX--XX" },
374 { "punct", "XXXXX-DD-X-X" },
375 { "alnum", "-----XDDXXX-" }
379 uint32_t space_value
;
380 struct charseq
*space_seq
;
381 struct locale_ctype_t
*ctype
= locale
->categories
[LC_CTYPE
].ctype
;
388 /* Now resolve copying and also handle completely missing definitions. */
391 const char *repertoire_name
;
393 /* First see whether we were supposed to copy. If yes, find the
394 actual definition. */
395 if (locale
->copy_name
[LC_CTYPE
] != NULL
)
397 /* Find the copying locale. This has to happen transitively since
398 the locale we are copying from might also copying another one. */
399 struct localedef_t
*from
= locale
;
402 from
= find_locale (LC_CTYPE
, from
->copy_name
[LC_CTYPE
],
403 from
->repertoire_name
, charmap
);
404 while (from
->categories
[LC_CTYPE
].ctype
== NULL
405 && from
->copy_name
[LC_CTYPE
] != NULL
);
407 ctype
= locale
->categories
[LC_CTYPE
].ctype
408 = from
->categories
[LC_CTYPE
].ctype
;
411 /* If there is still no definition issue an warning and create an
416 error (0, 0, _("No definition for %s category found"), "LC_CTYPE");
417 ctype_startup (NULL
, locale
, charmap
, NULL
, 0);
418 ctype
= locale
->categories
[LC_CTYPE
].ctype
;
421 /* Get the repertoire we have to use. */
422 repertoire_name
= locale
->repertoire_name
?: repertoire_global
;
423 if (repertoire_name
!= NULL
)
424 ctype
->repertoire
= repertoire_read (repertoire_name
);
427 /* We need the name of the currently used 8-bit character set to
428 make correct conversion between this 8-bit representation and the
429 ISO 10646 character set used internally for wide characters. */
430 ctype
->codeset_name
= charmap
->code_set_name
;
431 if (ctype
->codeset_name
== NULL
)
434 error (0, 0, _("No character set name specified in charmap"));
435 ctype
->codeset_name
= "//UNKNOWN//";
438 /* Set default value for classes not specified. */
439 set_class_defaults (ctype
, charmap
, ctype
->repertoire
);
441 /* Check according to table. */
442 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
444 uint32_t tmp
= ctype
->class_collection
[cnt
];
448 for (cls1
= 0; cls1
< NCLASS
; ++cls1
)
449 if ((tmp
& _ISwbit (cls1
)) != 0)
450 for (cls2
= 0; cls2
< NCLASS
; ++cls2
)
451 if (valid_table
[cls1
].allow
[cls2
] != '-')
453 int eq
= (tmp
& _ISwbit (cls2
)) != 0;
454 switch (valid_table
[cls1
].allow
[cls2
])
459 uint32_t value
= ctype
->charnames
[cnt
];
463 character L'\\u%0*x' in class `%s' must be in class `%s'"),
464 value
> 0xffff ? 8 : 4, value
,
465 valid_table
[cls1
].name
,
466 valid_table
[cls2
].name
);
473 uint32_t value
= ctype
->charnames
[cnt
];
477 character L'\\u%0*x' in class `%s' must not be in class `%s'"),
478 value
> 0xffff ? 8 : 4, value
,
479 valid_table
[cls1
].name
,
480 valid_table
[cls2
].name
);
485 ctype
->class_collection
[cnt
] |= _ISwbit (cls2
);
489 error (5, 0, _("internal error in %s, line %u"),
490 __FUNCTION__
, __LINE__
);
496 for (cnt
= 0; cnt
< 256; ++cnt
)
498 uint32_t tmp
= ctype
->class256_collection
[cnt
];
502 for (cls1
= 0; cls1
< NCLASS
; ++cls1
)
503 if ((tmp
& _ISbit (cls1
)) != 0)
504 for (cls2
= 0; cls2
< NCLASS
; ++cls2
)
505 if (valid_table
[cls1
].allow
[cls2
] != '-')
507 int eq
= (tmp
& _ISbit (cls2
)) != 0;
508 switch (valid_table
[cls1
].allow
[cls2
])
515 snprintf (buf
, sizeof buf
, "\\%Zo", cnt
);
519 character '%s' in class `%s' must be in class `%s'"),
520 buf
, valid_table
[cls1
].name
,
521 valid_table
[cls2
].name
);
530 snprintf (buf
, sizeof buf
, "\\%Zo", cnt
);
534 character '%s' in class `%s' must not be in class `%s'"),
535 buf
, valid_table
[cls1
].name
,
536 valid_table
[cls2
].name
);
541 ctype
->class256_collection
[cnt
] |= _ISbit (cls2
);
545 error (5, 0, _("internal error in %s, line %u"),
546 __FUNCTION__
, __LINE__
);
552 /* ... and now test <SP> as a special case. */
554 if (((cnt
= BITPOS (tok_space
),
555 (ELEM (ctype
, class_collection
, , space_value
)
556 & BITw (tok_space
)) == 0)
557 || (cnt
= BITPOS (tok_blank
),
558 (ELEM (ctype
, class_collection
, , space_value
)
559 & BITw (tok_blank
)) == 0)))
562 error (0, 0, _("<SP> character not in class `%s'"),
563 valid_table
[cnt
].name
);
565 else if (((cnt
= BITPOS (tok_punct
),
566 (ELEM (ctype
, class_collection
, , space_value
)
567 & BITw (tok_punct
)) != 0)
568 || (cnt
= BITPOS (tok_graph
),
569 (ELEM (ctype
, class_collection
, , space_value
)
574 error (0, 0, _("<SP> character must not be in class `%s'"),
575 valid_table
[cnt
].name
);
578 ELEM (ctype
, class_collection
, , space_value
) |= BITw (tok_print
);
580 space_seq
= charmap_find_value (charmap
, "SP", 2);
581 if (space_seq
== NULL
)
582 space_seq
= charmap_find_value (charmap
, "space", 5);
583 if (space_seq
== NULL
)
584 space_seq
= charmap_find_value (charmap
, "U00000020", 9);
585 if (space_seq
== NULL
|| space_seq
->nbytes
!= 1)
588 error (0, 0, _("character <SP> not defined in character map"));
590 else if (((cnt
= BITPOS (tok_space
),
591 (ctype
->class256_collection
[space_seq
->bytes
[0]]
592 & BIT (tok_space
)) == 0)
593 || (cnt
= BITPOS (tok_blank
),
594 (ctype
->class256_collection
[space_seq
->bytes
[0]]
595 & BIT (tok_blank
)) == 0)))
598 error (0, 0, _("<SP> character not in class `%s'"),
599 valid_table
[cnt
].name
);
601 else if (((cnt
= BITPOS (tok_punct
),
602 (ctype
->class256_collection
[space_seq
->bytes
[0]]
603 & BIT (tok_punct
)) != 0)
604 || (cnt
= BITPOS (tok_graph
),
605 (ctype
->class256_collection
[space_seq
->bytes
[0]]
606 & BIT (tok_graph
)) != 0)))
609 error (0, 0, _("<SP> character must not be in class `%s'"),
610 valid_table
[cnt
].name
);
613 ctype
->class256_collection
[space_seq
->bytes
[0]] |= BIT (tok_print
);
615 /* Now that the tests are done make sure the name array contains all
616 characters which are handled in the WIDTH section of the
617 character set definition file. */
618 if (charmap
->width_rules
!= NULL
)
619 for (cnt
= 0; cnt
< charmap
->nwidth_rules
; ++cnt
)
621 unsigned char bytes
[charmap
->mb_cur_max
];
622 int nbytes
= charmap
->width_rules
[cnt
].from
->nbytes
;
624 /* We have the range of character for which the width is
625 specified described using byte sequences of the multibyte
626 charset. We have to convert this to UCS4 now. And we
627 cannot simply convert the beginning and the end of the
628 sequence, we have to iterate over the byte sequence and
629 convert it for every single character. */
630 memcpy (bytes
, charmap
->width_rules
[cnt
].from
->bytes
, nbytes
);
632 while (nbytes
< charmap
->width_rules
[cnt
].to
->nbytes
633 || memcmp (bytes
, charmap
->width_rules
[cnt
].to
->bytes
,
636 /* Find the UCS value for `bytes'. */
639 struct charseq
*seq
= charmap_find_symbol (charmap
, bytes
, nbytes
);
642 wch
= ILLEGAL_CHAR_VALUE
;
643 else if (seq
->ucs4
!= UNINITIALIZED_CHAR_VALUE
)
646 wch
= repertoire_find_value (ctype
->repertoire
, seq
->name
,
649 if (wch
!= ILLEGAL_CHAR_VALUE
)
650 /* We are only interested in the side-effects of the
651 `find_idx' call. It will add appropriate entries in
652 the name array if this is necessary. */
653 (void) find_idx (ctype
, NULL
, NULL
, NULL
, wch
);
655 /* "Increment" the bytes sequence. */
657 while (inner
>= 0 && bytes
[inner
] == 0xff)
662 /* We have to extend the byte sequence. */
663 if (nbytes
>= charmap
->width_rules
[cnt
].to
->nbytes
)
667 memset (&bytes
[1], 0, nbytes
);
673 while (++inner
< nbytes
)
679 /* Now set all the other characters of the character set to the
682 while (iterate_table (&charmap
->char_table
, &curs
, &key
, &len
, &vdata
) == 0)
684 struct charseq
*data
= (struct charseq
*) vdata
;
686 if (data
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
687 data
->ucs4
= repertoire_find_value (ctype
->repertoire
,
690 if (data
->ucs4
!= ILLEGAL_CHAR_VALUE
)
691 (void) find_idx (ctype
, NULL
, NULL
, NULL
, data
->ucs4
);
694 /* There must be a multiple of 10 digits. */
695 if (ctype
->mbdigits_act
% 10 != 0)
697 assert (ctype
->mbdigits_act
== ctype
->wcdigits_act
);
698 ctype
->wcdigits_act
-= ctype
->mbdigits_act
% 10;
699 ctype
->mbdigits_act
-= ctype
->mbdigits_act
% 10;
700 error (0, 0, _("`digit' category has not entries in groups of ten"));
703 /* Check the input digits. There must be a multiple of ten available.
704 In each group it could be that one or the other character is missing.
705 In this case the whole group must be removed. */
707 while (cnt
< ctype
->mbdigits_act
)
710 for (inner
= 0; inner
< 10; ++inner
)
711 if (ctype
->mbdigits
[cnt
+ inner
] == NULL
)
718 /* Remove the group. */
719 memmove (&ctype
->mbdigits
[cnt
], &ctype
->mbdigits
[cnt
+ 10],
720 ((ctype
->wcdigits_act
- cnt
- 10)
721 * sizeof (ctype
->mbdigits
[0])));
722 ctype
->mbdigits_act
-= 10;
726 /* If no input digits are given use the default. */
727 if (ctype
->mbdigits_act
== 0)
729 if (ctype
->mbdigits_max
== 0)
731 ctype
->mbdigits
= obstack_alloc (&((struct charmap_t
*) charmap
)->mem_pool
,
732 10 * sizeof (struct charseq
*));
733 ctype
->mbdigits_max
= 10;
736 for (cnt
= 0; cnt
< 10; ++cnt
)
738 ctype
->mbdigits
[cnt
] = charmap_find_symbol (charmap
,
740 if (ctype
->mbdigits
[cnt
] == NULL
)
742 ctype
->mbdigits
[cnt
] = charmap_find_symbol (charmap
,
744 strlen (longnames
[cnt
]));
745 if (ctype
->mbdigits
[cnt
] == NULL
)
747 /* Hum, this ain't good. */
749 no input digits defined and none of the standard names in the charmap"));
751 ctype
->mbdigits
[cnt
] = obstack_alloc (&((struct charmap_t
*) charmap
)->mem_pool
,
752 sizeof (struct charseq
) + 1);
754 /* This is better than nothing. */
755 ctype
->mbdigits
[cnt
]->bytes
[0] = digits
[cnt
];
756 ctype
->mbdigits
[cnt
]->nbytes
= 1;
761 ctype
->mbdigits_act
= 10;
764 /* Check the wide character input digits. There must be a multiple
765 of ten available. In each group it could be that one or the other
766 character is missing. In this case the whole group must be
769 while (cnt
< ctype
->wcdigits_act
)
772 for (inner
= 0; inner
< 10; ++inner
)
773 if (ctype
->wcdigits
[cnt
+ inner
] == ILLEGAL_CHAR_VALUE
)
780 /* Remove the group. */
781 memmove (&ctype
->wcdigits
[cnt
], &ctype
->wcdigits
[cnt
+ 10],
782 ((ctype
->wcdigits_act
- cnt
- 10)
783 * sizeof (ctype
->wcdigits
[0])));
784 ctype
->wcdigits_act
-= 10;
788 /* If no input digits are given use the default. */
789 if (ctype
->wcdigits_act
== 0)
791 if (ctype
->wcdigits_max
== 0)
793 ctype
->wcdigits
= obstack_alloc (&((struct charmap_t
*) charmap
)->mem_pool
,
794 10 * sizeof (uint32_t));
795 ctype
->wcdigits_max
= 10;
798 for (cnt
= 0; cnt
< 10; ++cnt
)
799 ctype
->wcdigits
[cnt
] = L
'0' + cnt
;
801 ctype
->mbdigits_act
= 10;
804 /* Check the outdigits. */
806 for (cnt
= 0; cnt
< 10; ++cnt
)
807 if (ctype
->mboutdigits
[cnt
] == NULL
)
809 static struct charseq replace
[2];
814 not all characters used in `outdigit' are available in the charmap"));
818 replace
[0].nbytes
= 1;
819 replace
[0].bytes
[0] = '?';
820 replace
[0].bytes
[1] = '\0';
821 ctype
->mboutdigits
[cnt
] = &replace
[0];
825 for (cnt
= 0; cnt
< 10; ++cnt
)
826 if (ctype
->wcoutdigits
[cnt
] == 0)
831 not all characters used in `outdigit' are available in the repertoire"));
835 ctype
->wcoutdigits
[cnt
] = L
'?';
838 /* Sort the entries in the translit_ignore list. */
839 if (ctype
->translit_ignore
!= NULL
)
841 struct translit_ignore_t
*firstp
= ctype
->translit_ignore
;
842 struct translit_ignore_t
*runp
;
844 ctype
->ntranslit_ignore
= 1;
846 for (runp
= firstp
->next
; runp
!= NULL
; runp
= runp
->next
)
848 struct translit_ignore_t
*lastp
= NULL
;
849 struct translit_ignore_t
*cmpp
;
851 ++ctype
->ntranslit_ignore
;
853 for (cmpp
= firstp
; cmpp
!= NULL
; lastp
= cmpp
, cmpp
= cmpp
->next
)
854 if (runp
->from
< cmpp
->from
)
862 ctype
->translit_ignore
= firstp
;
868 ctype_output (struct localedef_t
*locale
, const struct charmap_t
*charmap
,
869 const char *output_path
)
871 static const char nulbytes
[4] = { 0, 0, 0, 0 };
872 struct locale_ctype_t
*ctype
= locale
->categories
[LC_CTYPE
].ctype
;
873 const size_t nelems
= (_NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1
)
874 + ctype
->nr_charclass
+ ctype
->map_collection_nr
);
875 struct iovec iov
[2 + nelems
+ 2 * ctype
->nr_charclass
876 + ctype
->map_collection_nr
+ 4];
877 struct locale_file data
;
878 uint32_t idx
[nelems
+ 1];
879 uint32_t default_missing_len
;
880 size_t elem
, cnt
, offset
, total
;
883 /* Now prepare the output: Find the sizes of the table we can use. */
884 allocate_arrays (ctype
, charmap
, ctype
->repertoire
);
886 data
.magic
= LIMAGIC (LC_CTYPE
);
888 iov
[0].iov_base
= (void *) &data
;
889 iov
[0].iov_len
= sizeof (data
);
891 iov
[1].iov_base
= (void *) idx
;
892 iov
[1].iov_len
= nelems
* sizeof (uint32_t);
894 idx
[0] = iov
[0].iov_len
+ iov
[1].iov_len
;
897 for (elem
= 0; elem
< nelems
; ++elem
)
899 if (elem
< _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1
))
902 #define CTYPE_EMPTY(name) \
904 iov[2 + elem + offset].iov_base = NULL; \
905 iov[2 + elem + offset].iov_len = 0; \
906 idx[elem + 1] = idx[elem]; \
909 CTYPE_EMPTY(_NL_CTYPE_GAP1
);
910 CTYPE_EMPTY(_NL_CTYPE_GAP2
);
911 CTYPE_EMPTY(_NL_CTYPE_GAP3
);
912 CTYPE_EMPTY(_NL_CTYPE_GAP4
);
913 CTYPE_EMPTY(_NL_CTYPE_GAP5
);
914 CTYPE_EMPTY(_NL_CTYPE_GAP6
);
916 #define CTYPE_DATA(name, base, len) \
917 case _NL_ITEM_INDEX (name): \
918 iov[2 + elem + offset].iov_base = (base); \
919 iov[2 + elem + offset].iov_len = (len); \
920 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; \
923 CTYPE_DATA (_NL_CTYPE_CLASS
,
925 (256 + 128) * sizeof (char_class_t
));
927 CTYPE_DATA (_NL_CTYPE_TOUPPER
,
929 (256 + 128) * sizeof (uint32_t));
930 CTYPE_DATA (_NL_CTYPE_TOLOWER
,
932 (256 + 128) * sizeof (uint32_t));
934 CTYPE_DATA (_NL_CTYPE_TOUPPER32
,
936 256 * sizeof (uint32_t));
937 CTYPE_DATA (_NL_CTYPE_TOLOWER32
,
939 256 * sizeof (uint32_t));
941 CTYPE_DATA (_NL_CTYPE_CLASS32
,
943 256 * sizeof (char_class32_t
));
945 CTYPE_DATA (_NL_CTYPE_CLASS_OFFSET
,
946 &ctype
->class_offset
, sizeof (uint32_t));
948 CTYPE_DATA (_NL_CTYPE_MAP_OFFSET
,
949 &ctype
->map_offset
, sizeof (uint32_t));
951 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TAB_SIZE
,
952 &ctype
->translit_idx_size
, sizeof (uint32_t));
954 CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_IDX
,
955 ctype
->translit_from_idx
,
956 ctype
->translit_idx_size
* sizeof (uint32_t));
958 CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_TBL
,
959 ctype
->translit_from_tbl
,
960 ctype
->translit_from_tbl_size
);
962 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_IDX
,
963 ctype
->translit_to_idx
,
964 ctype
->translit_idx_size
* sizeof (uint32_t));
966 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_TBL
,
967 ctype
->translit_to_tbl
, ctype
->translit_to_tbl_size
);
969 case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES
):
970 /* The class name array. */
972 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
, ++offset
)
974 iov
[2 + elem
+ offset
].iov_base
975 = (void *) ctype
->classnames
[cnt
];
976 iov
[2 + elem
+ offset
].iov_len
977 = strlen (ctype
->classnames
[cnt
]) + 1;
978 total
+= iov
[2 + elem
+ offset
].iov_len
;
980 iov
[2 + elem
+ offset
].iov_base
= (void *) nulbytes
;
981 iov
[2 + elem
+ offset
].iov_len
= 1 + (4 - ((total
+ 1) % 4));
982 total
+= 1 + (4 - ((total
+ 1) % 4));
984 idx
[elem
+ 1] = idx
[elem
] + total
;
987 case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES
):
988 /* The class name array. */
990 for (cnt
= 0; cnt
< ctype
->map_collection_nr
; ++cnt
, ++offset
)
992 iov
[2 + elem
+ offset
].iov_base
993 = (void *) ctype
->mapnames
[cnt
];
994 iov
[2 + elem
+ offset
].iov_len
995 = strlen (ctype
->mapnames
[cnt
]) + 1;
996 total
+= iov
[2 + elem
+ offset
].iov_len
;
998 iov
[2 + elem
+ offset
].iov_base
= (void *) nulbytes
;
999 iov
[2 + elem
+ offset
].iov_len
= 1 + (4 - ((total
+ 1) % 4));
1000 total
+= 1 + (4 - ((total
+ 1) % 4));
1002 idx
[elem
+ 1] = idx
[elem
] + total
;
1005 CTYPE_DATA (_NL_CTYPE_WIDTH
,
1006 ctype
->width
.iov_base
,
1007 ctype
->width
.iov_len
);
1009 CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX
,
1010 &ctype
->mb_cur_max
, sizeof (uint32_t));
1012 case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME
):
1013 total
= strlen (ctype
->codeset_name
) + 1;
1015 iov
[2 + elem
+ offset
].iov_base
= (char *) ctype
->codeset_name
;
1018 iov
[2 + elem
+ offset
].iov_base
= alloca ((total
+ 3) & ~3);
1019 memset (mempcpy (iov
[2 + elem
+ offset
].iov_base
,
1020 ctype
->codeset_name
, total
),
1021 '\0', 4 - (total
& 3));
1022 total
= (total
+ 3) & ~3;
1024 iov
[2 + elem
+ offset
].iov_len
= total
;
1025 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1028 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN
):
1029 iov
[2 + elem
+ offset
].iov_base
= alloca (sizeof (uint32_t));
1030 iov
[2 + elem
+ offset
].iov_len
= sizeof (uint32_t);
1031 *(uint32_t *) iov
[2 + elem
+ offset
].iov_base
=
1032 ctype
->mbdigits_act
/ 10;
1033 idx
[elem
+ 1] = idx
[elem
] + sizeof (uint32_t);
1036 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN
):
1037 /* Align entries. */
1038 iov
[2 + elem
+ offset
].iov_base
= (void *) nulbytes
;
1039 iov
[2 + elem
+ offset
].iov_len
= (4 - idx
[elem
] % 4) % 4;
1040 idx
[elem
] += iov
[2 + elem
+ offset
].iov_len
;
1043 iov
[2 + elem
+ offset
].iov_base
= alloca (sizeof (uint32_t));
1044 iov
[2 + elem
+ offset
].iov_len
= sizeof (uint32_t);
1045 *(uint32_t *) iov
[2 + elem
+ offset
].iov_base
=
1046 ctype
->wcdigits_act
/ 10;
1047 idx
[elem
+ 1] = idx
[elem
] + sizeof (uint32_t);
1050 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB
) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB
):
1051 /* Compute the length of all possible characters. For INDIGITS
1052 there might be more than one. We simply concatenate all of
1053 them with a NUL byte following. The NUL byte wouldn't be
1054 necessary but it makes it easier for the user. */
1057 for (cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB
);
1058 cnt
< ctype
->mbdigits_act
; cnt
+= 10)
1059 total
+= ctype
->mbdigits
[cnt
]->nbytes
+ 1;
1060 iov
[2 + elem
+ offset
].iov_base
= (char *) alloca (total
);
1061 iov
[2 + elem
+ offset
].iov_len
= total
;
1063 cp
= iov
[2 + elem
+ offset
].iov_base
;
1064 for (cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB
);
1065 cnt
< ctype
->mbdigits_act
; cnt
+= 10)
1067 cp
= mempcpy (cp
, ctype
->mbdigits
[cnt
]->bytes
,
1068 ctype
->mbdigits
[cnt
]->nbytes
);
1071 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1074 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB
) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB
):
1075 /* Compute the length of all possible characters. For INDIGITS
1076 there might be more than one. We simply concatenate all of
1077 them with a NUL byte following. The NUL byte wouldn't be
1078 necessary but it makes it easier for the user. */
1079 cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB
);
1080 total
= ctype
->mboutdigits
[cnt
]->nbytes
+ 1;
1081 iov
[2 + elem
+ offset
].iov_base
= (char *) alloca (total
);
1082 iov
[2 + elem
+ offset
].iov_len
= total
;
1084 *(char *) mempcpy (iov
[2 + elem
+ offset
].iov_base
,
1085 ctype
->mboutdigits
[cnt
]->bytes
,
1086 ctype
->mboutdigits
[cnt
]->nbytes
) = '\0';
1087 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1090 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC
) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC
):
1091 total
= ctype
->wcdigits_act
/ 10;
1093 iov
[2 + elem
+ offset
].iov_base
=
1094 (uint32_t *) alloca (total
* sizeof (uint32_t));
1095 iov
[2 + elem
+ offset
].iov_len
= total
* sizeof (uint32_t);
1097 for (cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC
);
1098 cnt
< ctype
->wcdigits_act
; cnt
+= 10)
1099 ((uint32_t *) iov
[2 + elem
+ offset
].iov_base
)[cnt
/ 10]
1100 = ctype
->wcdigits
[cnt
];
1101 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1104 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC
):
1105 /* Align entries. */
1106 iov
[2 + elem
+ offset
].iov_base
= (void *) nulbytes
;
1107 iov
[2 + elem
+ offset
].iov_len
= (4 - idx
[elem
] % 4) % 4;
1108 idx
[elem
] += iov
[2 + elem
+ offset
].iov_len
;
1112 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT1_WC
) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC
):
1113 cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC
);
1114 iov
[2 + elem
+ offset
].iov_base
= &ctype
->wcoutdigits
[cnt
];
1115 iov
[2 + elem
+ offset
].iov_len
= sizeof (uint32_t);
1116 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1119 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN
):
1120 /* Align entries. */
1121 iov
[2 + elem
+ offset
].iov_base
= (void *) nulbytes
;
1122 iov
[2 + elem
+ offset
].iov_len
= (4 - idx
[elem
] % 4) % 4;
1123 idx
[elem
] += iov
[2 + elem
+ offset
].iov_len
;
1126 default_missing_len
= (ctype
->default_missing
1127 ? wcslen ((wchar_t *)ctype
->default_missing
)
1129 iov
[2 + elem
+ offset
].iov_base
= &default_missing_len
;
1130 iov
[2 + elem
+ offset
].iov_len
= sizeof (uint32_t);
1131 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1134 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING
):
1135 iov
[2 + elem
+ offset
].iov_base
=
1136 ctype
->default_missing
?: (uint32_t *) L
"";
1137 iov
[2 + elem
+ offset
].iov_len
=
1138 wcslen (iov
[2 + elem
+ offset
].iov_base
);
1139 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1142 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE_LEN
):
1143 /* Align entries. */
1144 iov
[2 + elem
+ offset
].iov_base
= (void *) nulbytes
;
1145 iov
[2 + elem
+ offset
].iov_len
= (4 - idx
[elem
] % 4) % 4;
1146 idx
[elem
] += iov
[2 + elem
+ offset
].iov_len
;
1149 iov
[2 + elem
+ offset
].iov_base
= &ctype
->ntranslit_ignore
;
1150 iov
[2 + elem
+ offset
].iov_len
= sizeof (uint32_t);
1151 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1154 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE
):
1156 uint32_t *ranges
= (uint32_t *) alloca (ctype
->ntranslit_ignore
1157 * 3 * sizeof (uint32_t));
1158 struct translit_ignore_t
*runp
;
1160 iov
[2 + elem
+ offset
].iov_base
= ranges
;
1161 iov
[2 + elem
+ offset
].iov_len
= (ctype
->ntranslit_ignore
1162 * 3 * sizeof (uint32_t));
1164 for (runp
= ctype
->translit_ignore
; runp
!= NULL
;
1167 *ranges
++ = runp
->from
;
1168 *ranges
++ = runp
->to
;
1169 *ranges
++ = runp
->step
;
1172 /* Remove the following line in case a new entry is added
1173 after _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN. */
1175 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1179 assert (! "unknown CTYPE element");
1183 /* Handle extra maps. */
1184 size_t nr
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1
);
1185 if (nr
< ctype
->nr_charclass
)
1187 iov
[2 + elem
+ offset
].iov_base
= ctype
->class_b
[nr
];
1188 iov
[2 + elem
+ offset
].iov_len
= 256 / 32 * sizeof (uint32_t);
1189 idx
[elem
] += iov
[2 + elem
+ offset
].iov_len
;
1192 iov
[2 + elem
+ offset
] = ctype
->class_3level
[nr
];
1196 nr
-= ctype
->nr_charclass
;
1197 assert (nr
< ctype
->map_collection_nr
);
1198 iov
[2 + elem
+ offset
] = ctype
->map_3level
[nr
];
1200 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1204 assert (2 + elem
+ offset
== (nelems
+ 2 * ctype
->nr_charclass
1205 + ctype
->map_collection_nr
+ 4 + 2));
1207 write_locale_data (output_path
, "LC_CTYPE", 2 + elem
+ offset
, iov
);
1211 /* Local functions. */
1213 ctype_class_new (struct linereader
*lr
, struct locale_ctype_t
*ctype
,
1218 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
)
1219 if (strcmp (ctype
->classnames
[cnt
], name
) == 0)
1222 if (cnt
< ctype
->nr_charclass
)
1224 lr_error (lr
, _("character class `%s' already defined"), name
);
1228 if (ctype
->nr_charclass
== MAX_NR_CHARCLASS
)
1229 /* Exit code 2 is prescribed in P1003.2b. */
1231 implementation limit: no more than %Zd character classes allowed"),
1234 ctype
->classnames
[ctype
->nr_charclass
++] = name
;
1239 ctype_map_new (struct linereader
*lr
, struct locale_ctype_t
*ctype
,
1240 const char *name
, const struct charmap_t
*charmap
)
1242 size_t max_chars
= 0;
1245 for (cnt
= 0; cnt
< ctype
->map_collection_nr
; ++cnt
)
1247 if (strcmp (ctype
->mapnames
[cnt
], name
) == 0)
1250 if (max_chars
< ctype
->map_collection_max
[cnt
])
1251 max_chars
= ctype
->map_collection_max
[cnt
];
1254 if (cnt
< ctype
->map_collection_nr
)
1256 lr_error (lr
, _("character map `%s' already defined"), name
);
1260 if (ctype
->map_collection_nr
== MAX_NR_CHARMAP
)
1261 /* Exit code 2 is prescribed in P1003.2b. */
1263 implementation limit: no more than %d character maps allowed"),
1266 ctype
->mapnames
[cnt
] = name
;
1269 ctype
->map_collection_max
[cnt
] = charmap
->mb_cur_max
== 1 ? 256 : 512;
1271 ctype
->map_collection_max
[cnt
] = max_chars
;
1273 ctype
->map_collection
[cnt
] = (uint32_t *)
1274 xcalloc (sizeof (uint32_t), ctype
->map_collection_max
[cnt
]);
1275 ctype
->map_collection_act
[cnt
] = 256;
1277 ++ctype
->map_collection_nr
;
1281 /* We have to be prepared that TABLE, MAX, and ACT can be NULL. This
1282 is possible if we only want to extend the name array. */
1284 find_idx (struct locale_ctype_t
*ctype
, uint32_t **table
, size_t *max
,
1285 size_t *act
, uint32_t idx
)
1290 return table
== NULL
? NULL
: &(*table
)[idx
];
1292 /* Use the charnames_idx lookup table instead of the slow search loop. */
1294 cnt
= idx_table_get (&ctype
->charnames_idx
, idx
);
1297 cnt
= ctype
->charnames_act
;
1299 for (cnt
= 256; cnt
< ctype
->charnames_act
; ++cnt
)
1300 if (ctype
->charnames
[cnt
] == idx
)
1304 /* We have to distinguish two cases: the name is found or not. */
1305 if (cnt
== ctype
->charnames_act
)
1307 /* Extend the name array. */
1308 if (ctype
->charnames_act
== ctype
->charnames_max
)
1310 ctype
->charnames_max
*= 2;
1311 ctype
->charnames
= (uint32_t *)
1312 xrealloc (ctype
->charnames
,
1313 sizeof (uint32_t) * ctype
->charnames_max
);
1315 ctype
->charnames
[ctype
->charnames_act
++] = idx
;
1316 idx_table_add (&ctype
->charnames_idx
, idx
, cnt
);
1320 /* We have done everything we are asked to do. */
1324 /* The caller does not want to extend the table. */
1325 return (cnt
>= *act
? NULL
: &(*table
)[cnt
]);
1331 size_t old_max
= *max
;
1334 while (*max
<= cnt
);
1337 (uint32_t *) xrealloc (*table
, *max
* sizeof (uint32_t));
1338 memset (&(*table
)[old_max
], '\0',
1339 (*max
- old_max
) * sizeof (uint32_t));
1345 return &(*table
)[cnt
];
1350 get_character (struct token
*now
, const struct charmap_t
*charmap
,
1351 struct repertoire_t
*repertoire
,
1352 struct charseq
**seqp
, uint32_t *wchp
)
1354 if (now
->tok
== tok_bsymbol
)
1356 /* This will hopefully be the normal case. */
1357 *wchp
= repertoire_find_value (repertoire
, now
->val
.str
.startmb
,
1358 now
->val
.str
.lenmb
);
1359 *seqp
= charmap_find_value (charmap
, now
->val
.str
.startmb
,
1360 now
->val
.str
.lenmb
);
1362 else if (now
->tok
== tok_ucs4
)
1366 snprintf (utmp
, sizeof (utmp
), "U%08X", now
->val
.ucs4
);
1367 *seqp
= charmap_find_value (charmap
, utmp
, 9);
1370 *seqp
= repertoire_find_seq (repertoire
, now
->val
.ucs4
);
1374 /* Compute the value in the charmap from the UCS value. */
1375 const char *symbol
= repertoire_find_symbol (repertoire
,
1381 *seqp
= charmap_find_value (charmap
, symbol
, strlen (symbol
));
1385 if (repertoire
!= NULL
)
1387 /* Insert a negative entry. */
1388 static const struct charseq negative
1389 = { .ucs4
= ILLEGAL_CHAR_VALUE
};
1390 uint32_t *newp
= obstack_alloc (&repertoire
->mem_pool
,
1392 *newp
= now
->val
.ucs4
;
1394 insert_entry (&repertoire
->seq_table
, newp
,
1395 sizeof (uint32_t), (void *) &negative
);
1399 (*seqp
)->ucs4
= now
->val
.ucs4
;
1401 else if ((*seqp
)->ucs4
!= now
->val
.ucs4
)
1404 *wchp
= now
->val
.ucs4
;
1406 else if (now
->tok
== tok_charcode
)
1408 /* We must map from the byte code to UCS4. */
1409 *seqp
= charmap_find_symbol (charmap
, now
->val
.str
.startmb
,
1410 now
->val
.str
.lenmb
);
1413 *wchp
= ILLEGAL_CHAR_VALUE
;
1416 if ((*seqp
)->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1417 (*seqp
)->ucs4
= repertoire_find_value (repertoire
, (*seqp
)->name
,
1418 strlen ((*seqp
)->name
));
1419 *wchp
= (*seqp
)->ucs4
;
1429 /* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and
1430 the .(2). counterparts. */
1432 charclass_symbolic_ellipsis (struct linereader
*ldfile
,
1433 struct locale_ctype_t
*ctype
,
1434 const struct charmap_t
*charmap
,
1435 struct repertoire_t
*repertoire
,
1437 const char *last_str
,
1438 unsigned long int class256_bit
,
1439 unsigned long int class_bit
, int base
,
1440 int ignore_content
, int handle_digits
, int step
)
1442 const char *nowstr
= now
->val
.str
.startmb
;
1443 char tmp
[now
->val
.str
.lenmb
+ 1];
1446 unsigned long int from
;
1447 unsigned long int to
;
1449 /* We have to compute the ellipsis values using the symbolic names. */
1450 assert (last_str
!= NULL
);
1452 if (strlen (last_str
) != now
->val
.str
.lenmb
)
1456 _("`%s' and `%.*s' are no valid names for symbolic range"),
1457 last_str
, (int) now
->val
.str
.lenmb
, nowstr
);
1461 if (memcmp (last_str
, nowstr
, now
->val
.str
.lenmb
) == 0)
1462 /* Nothing to do, the names are the same. */
1465 for (cp
= last_str
; *cp
== *(nowstr
+ (cp
- last_str
)); ++cp
)
1469 from
= strtoul (cp
, &endp
, base
);
1470 if ((from
== UINT_MAX
&& errno
== ERANGE
) || *endp
!= '\0')
1473 to
= strtoul (nowstr
+ (cp
- last_str
), &endp
, base
);
1474 if ((to
== UINT_MAX
&& errno
== ERANGE
)
1475 || (endp
- nowstr
) != now
->val
.str
.lenmb
|| from
>= to
)
1478 /* OK, we have a range FROM - TO. Now we can create the symbolic names. */
1479 if (!ignore_content
)
1481 now
->val
.str
.startmb
= tmp
;
1482 while ((from
+= step
) <= to
)
1484 struct charseq
*seq
;
1487 sprintf (tmp
, (base
== 10 ? "%.*s%0*ld" : "%.*s%0*lX"),
1488 (int) (cp
- last_str
), last_str
,
1489 (int) (now
->val
.str
.lenmb
- (cp
- last_str
)),
1492 get_character (now
, charmap
, repertoire
, &seq
, &wch
);
1494 if (seq
!= NULL
&& seq
->nbytes
== 1)
1495 /* Yep, we can store information about this byte sequence. */
1496 ctype
->class256_collection
[seq
->bytes
[0]] |= class256_bit
;
1498 if (wch
!= ILLEGAL_CHAR_VALUE
&& class_bit
!= 0)
1499 /* We have the UCS4 position. */
1500 *find_idx (ctype
, &ctype
->class_collection
,
1501 &ctype
->class_collection_max
,
1502 &ctype
->class_collection_act
, wch
) |= class_bit
;
1504 if (handle_digits
== 1)
1506 /* We must store the digit values. */
1507 if (ctype
->mbdigits_act
== ctype
->mbdigits_max
)
1509 ctype
->mbdigits_max
*= 2;
1510 ctype
->mbdigits
= xrealloc (ctype
->mbdigits
,
1511 (ctype
->mbdigits_max
1512 * sizeof (char *)));
1513 ctype
->wcdigits_max
*= 2;
1514 ctype
->wcdigits
= xrealloc (ctype
->wcdigits
,
1515 (ctype
->wcdigits_max
1516 * sizeof (uint32_t)));
1519 ctype
->mbdigits
[ctype
->mbdigits_act
++] = seq
;
1520 ctype
->wcdigits
[ctype
->wcdigits_act
++] = wch
;
1522 else if (handle_digits
== 2)
1524 /* We must store the digit values. */
1525 if (ctype
->outdigits_act
>= 10)
1527 lr_error (ldfile
, _("\
1528 %s: field `%s' does not contain exactly ten entries"),
1529 "LC_CTYPE", "outdigit");
1533 ctype
->mboutdigits
[ctype
->outdigits_act
] = seq
;
1534 ctype
->wcoutdigits
[ctype
->outdigits_act
] = wch
;
1535 ++ctype
->outdigits_act
;
1542 /* Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'. */
1544 charclass_ucs4_ellipsis (struct linereader
*ldfile
,
1545 struct locale_ctype_t
*ctype
,
1546 const struct charmap_t
*charmap
,
1547 struct repertoire_t
*repertoire
,
1548 struct token
*now
, uint32_t last_wch
,
1549 unsigned long int class256_bit
,
1550 unsigned long int class_bit
, int ignore_content
,
1551 int handle_digits
, int step
)
1553 if (last_wch
> now
->val
.ucs4
)
1555 lr_error (ldfile
, _("\
1556 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1557 (now
->val
.ucs4
| last_wch
) < 65536 ? 4 : 8, now
->val
.ucs4
,
1558 (now
->val
.ucs4
| last_wch
) < 65536 ? 4 : 8, last_wch
);
1562 if (!ignore_content
)
1563 while ((last_wch
+= step
) <= now
->val
.ucs4
)
1565 /* We have to find out whether there is a byte sequence corresponding
1566 to this UCS4 value. */
1567 struct charseq
*seq
;
1570 snprintf (utmp
, sizeof (utmp
), "U%08X", last_wch
);
1571 seq
= charmap_find_value (charmap
, utmp
, 9);
1574 snprintf (utmp
, sizeof (utmp
), "U%04X", last_wch
);
1575 seq
= charmap_find_value (charmap
, utmp
, 5);
1579 /* Try looking in the repertoire map. */
1580 seq
= repertoire_find_seq (repertoire
, last_wch
);
1582 /* If this is the first time we look for this sequence create a new
1586 static const struct charseq negative
1587 = { .ucs4
= ILLEGAL_CHAR_VALUE
};
1589 /* Find the symbolic name for this UCS4 value. */
1590 if (repertoire
!= NULL
)
1592 const char *symbol
= repertoire_find_symbol (repertoire
,
1594 uint32_t *newp
= obstack_alloc (&repertoire
->mem_pool
,
1599 /* We have a name, now search the multibyte value. */
1600 seq
= charmap_find_value (charmap
, symbol
, strlen (symbol
));
1603 /* We have to create a fake entry. */
1604 seq
= (struct charseq
*) &negative
;
1606 seq
->ucs4
= last_wch
;
1608 insert_entry (&repertoire
->seq_table
, newp
, sizeof (uint32_t),
1612 /* We have to create a fake entry. */
1613 seq
= (struct charseq
*) &negative
;
1616 /* We have a name, now search the multibyte value. */
1617 if (seq
->ucs4
== last_wch
&& seq
->nbytes
== 1)
1618 /* Yep, we can store information about this byte sequence. */
1619 ctype
->class256_collection
[(size_t) seq
->bytes
[0]]
1622 /* And of course we have the UCS4 position. */
1624 *find_idx (ctype
, &ctype
->class_collection
,
1625 &ctype
->class_collection_max
,
1626 &ctype
->class_collection_act
, last_wch
) |= class_bit
;
1628 if (handle_digits
== 1)
1630 /* We must store the digit values. */
1631 if (ctype
->mbdigits_act
== ctype
->mbdigits_max
)
1633 ctype
->mbdigits_max
*= 2;
1634 ctype
->mbdigits
= xrealloc (ctype
->mbdigits
,
1635 (ctype
->mbdigits_max
1636 * sizeof (char *)));
1637 ctype
->wcdigits_max
*= 2;
1638 ctype
->wcdigits
= xrealloc (ctype
->wcdigits
,
1639 (ctype
->wcdigits_max
1640 * sizeof (uint32_t)));
1643 ctype
->mbdigits
[ctype
->mbdigits_act
++] = (seq
->ucs4
== last_wch
1645 ctype
->wcdigits
[ctype
->wcdigits_act
++] = last_wch
;
1647 else if (handle_digits
== 2)
1649 /* We must store the digit values. */
1650 if (ctype
->outdigits_act
>= 10)
1652 lr_error (ldfile
, _("\
1653 %s: field `%s' does not contain exactly ten entries"),
1654 "LC_CTYPE", "outdigit");
1658 ctype
->mboutdigits
[ctype
->outdigits_act
] = (seq
->ucs4
== last_wch
1660 ctype
->wcoutdigits
[ctype
->outdigits_act
] = last_wch
;
1661 ++ctype
->outdigits_act
;
1667 /* Ellipsis as in `/xea/x12.../xea/x34'. */
1669 charclass_charcode_ellipsis (struct linereader
*ldfile
,
1670 struct locale_ctype_t
*ctype
,
1671 const struct charmap_t
*charmap
,
1672 struct repertoire_t
*repertoire
,
1673 struct token
*now
, char *last_charcode
,
1674 uint32_t last_charcode_len
,
1675 unsigned long int class256_bit
,
1676 unsigned long int class_bit
, int ignore_content
,
1679 /* First check whether the to-value is larger. */
1680 if (now
->val
.charcode
.nbytes
!= last_charcode_len
)
1682 lr_error (ldfile
, _("\
1683 start and end character sequence of range must have the same length"));
1687 if (memcmp (last_charcode
, now
->val
.charcode
.bytes
, last_charcode_len
) > 0)
1689 lr_error (ldfile
, _("\
1690 to-value character sequence is smaller than from-value sequence"));
1694 if (!ignore_content
)
1698 /* Increment the byte sequence value. */
1699 struct charseq
*seq
;
1703 for (i
= last_charcode_len
- 1; i
>= 0; --i
)
1704 if (++last_charcode
[i
] != 0)
1707 if (last_charcode_len
== 1)
1708 /* Of course we have the charcode value. */
1709 ctype
->class256_collection
[(size_t) last_charcode
[0]]
1712 /* Find the symbolic name. */
1713 seq
= charmap_find_symbol (charmap
, last_charcode
,
1717 if (seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1718 seq
->ucs4
= repertoire_find_value (repertoire
, seq
->name
,
1719 strlen (seq
->name
));
1720 wch
= seq
== NULL
? ILLEGAL_CHAR_VALUE
: seq
->ucs4
;
1722 if (wch
!= ILLEGAL_CHAR_VALUE
&& class_bit
!= 0)
1723 *find_idx (ctype
, &ctype
->class_collection
,
1724 &ctype
->class_collection_max
,
1725 &ctype
->class_collection_act
, wch
) |= class_bit
;
1728 wch
= ILLEGAL_CHAR_VALUE
;
1730 if (handle_digits
== 1)
1732 /* We must store the digit values. */
1733 if (ctype
->mbdigits_act
== ctype
->mbdigits_max
)
1735 ctype
->mbdigits_max
*= 2;
1736 ctype
->mbdigits
= xrealloc (ctype
->mbdigits
,
1737 (ctype
->mbdigits_max
1738 * sizeof (char *)));
1739 ctype
->wcdigits_max
*= 2;
1740 ctype
->wcdigits
= xrealloc (ctype
->wcdigits
,
1741 (ctype
->wcdigits_max
1742 * sizeof (uint32_t)));
1745 seq
= xmalloc (sizeof (struct charseq
) + last_charcode_len
);
1746 memcpy ((char *) (seq
+ 1), last_charcode
, last_charcode_len
);
1747 seq
->nbytes
= last_charcode_len
;
1749 ctype
->mbdigits
[ctype
->mbdigits_act
++] = seq
;
1750 ctype
->wcdigits
[ctype
->wcdigits_act
++] = wch
;
1752 else if (handle_digits
== 2)
1754 struct charseq
*seq
;
1755 /* We must store the digit values. */
1756 if (ctype
->outdigits_act
>= 10)
1758 lr_error (ldfile
, _("\
1759 %s: field `%s' does not contain exactly ten entries"),
1760 "LC_CTYPE", "outdigit");
1764 seq
= xmalloc (sizeof (struct charseq
) + last_charcode_len
);
1765 memcpy ((char *) (seq
+ 1), last_charcode
, last_charcode_len
);
1766 seq
->nbytes
= last_charcode_len
;
1768 ctype
->mboutdigits
[ctype
->outdigits_act
] = seq
;
1769 ctype
->wcoutdigits
[ctype
->outdigits_act
] = wch
;
1770 ++ctype
->outdigits_act
;
1773 while (memcmp (last_charcode
, now
->val
.charcode
.bytes
,
1774 last_charcode_len
) != 0);
1780 find_translit2 (struct locale_ctype_t
*ctype
, const struct charmap_t
*charmap
,
1783 struct translit_t
*trunp
= ctype
->translit
;
1784 struct translit_ignore_t
*tirunp
= ctype
->translit_ignore
;
1786 while (trunp
!= NULL
)
1788 /* XXX We simplify things here. The transliterations we look
1789 for are only allowed to have one character. */
1790 if (trunp
->from
[0] == wch
&& trunp
->from
[1] == 0)
1792 /* Found it. Now look for a transliteration which can be
1793 represented with the character set. */
1794 struct translit_to_t
*torunp
= trunp
->to
;
1796 while (torunp
!= NULL
)
1800 for (i
= 0; torunp
->str
[i
] != 0; ++i
)
1804 snprintf (utmp
, sizeof (utmp
), "U%08X", torunp
->str
[i
]);
1805 if (charmap_find_value (charmap
, utmp
, 9) == NULL
)
1806 /* This character cannot be represented. */
1810 if (torunp
->str
[i
] == 0)
1813 torunp
= torunp
->next
;
1819 trunp
= trunp
->next
;
1822 /* Check for ignored chars. */
1823 while (tirunp
!= NULL
)
1825 if (tirunp
->from
<= wch
&& tirunp
->to
>= wch
)
1829 for (wi
= tirunp
->from
; wi
<= wch
; wi
+= tirunp
->step
)
1831 return (uint32_t []) { 0 };
1835 /* Nothing found. */
1841 find_translit (struct localedef_t
*locale
, const struct charmap_t
*charmap
,
1844 struct locale_ctype_t
*ctype
;
1845 uint32_t *result
= NULL
;
1847 assert (locale
!= NULL
);
1848 ctype
= locale
->categories
[LC_CTYPE
].ctype
;
1850 if (ctype
->translit
!= NULL
)
1851 result
= find_translit2 (ctype
, charmap
, wch
);
1855 struct translit_include_t
*irunp
= ctype
->translit_include
;
1857 while (irunp
!= NULL
&& result
== NULL
)
1859 result
= find_translit (find_locale (CTYPE_LOCALE
,
1861 irunp
->copy_repertoire
,
1864 irunp
= irunp
->next
;
1872 /* Read one transliteration entry. */
1874 read_widestring (struct linereader
*ldfile
, struct token
*now
,
1875 const struct charmap_t
*charmap
,
1876 struct repertoire_t
*repertoire
)
1880 if (now
->tok
== tok_default_missing
)
1881 /* The special name "" will denote this case. */
1882 wstr
= ((uint32_t *) { 0 });
1883 else if (now
->tok
== tok_bsymbol
)
1885 /* Get the value from the repertoire. */
1886 wstr
= (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1887 wstr
[0] = repertoire_find_value (repertoire
, now
->val
.str
.startmb
,
1888 now
->val
.str
.lenmb
);
1889 if (wstr
[0] == ILLEGAL_CHAR_VALUE
)
1891 /* We cannot proceed, we don't know the UCS4 value. */
1898 else if (now
->tok
== tok_ucs4
)
1900 wstr
= (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1901 wstr
[0] = now
->val
.ucs4
;
1904 else if (now
->tok
== tok_charcode
)
1906 /* Argh, we have to convert to the symbol name first and then to the
1908 struct charseq
*seq
= charmap_find_symbol (charmap
,
1909 now
->val
.str
.startmb
,
1910 now
->val
.str
.lenmb
);
1912 /* Cannot find the UCS4 value. */
1915 if (seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1916 seq
->ucs4
= repertoire_find_value (repertoire
, seq
->name
,
1917 strlen (seq
->name
));
1918 if (seq
->ucs4
== ILLEGAL_CHAR_VALUE
)
1919 /* We cannot proceed, we don't know the UCS4 value. */
1922 wstr
= (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1923 wstr
[0] = seq
->ucs4
;
1926 else if (now
->tok
== tok_string
)
1928 wstr
= now
->val
.str
.startwc
;
1929 if (wstr
== NULL
|| wstr
[0] == 0)
1934 if (now
->tok
!= tok_eol
&& now
->tok
!= tok_eof
)
1935 lr_ignore_rest (ldfile
, 0);
1936 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
1937 return (uint32_t *) -1l;
1945 read_translit_entry (struct linereader
*ldfile
, struct locale_ctype_t
*ctype
,
1946 struct token
*now
, const struct charmap_t
*charmap
,
1947 struct repertoire_t
*repertoire
)
1949 uint32_t *from_wstr
= read_widestring (ldfile
, now
, charmap
, repertoire
);
1950 struct translit_t
*result
;
1951 struct translit_to_t
**top
;
1952 struct obstack
*ob
= &ctype
->mempool
;
1956 if (from_wstr
== NULL
)
1957 /* There is no valid from string. */
1960 result
= (struct translit_t
*) obstack_alloc (ob
,
1961 sizeof (struct translit_t
));
1962 result
->from
= from_wstr
;
1963 result
->fname
= ldfile
->fname
;
1964 result
->lineno
= ldfile
->lineno
;
1965 result
->next
= NULL
;
1975 /* Next we have one or more transliterations. They are
1976 separated by semicolons. */
1977 now
= lr_token (ldfile
, charmap
, NULL
, repertoire
, verbose
);
1979 if (!first
&& (now
->tok
== tok_semicolon
|| now
->tok
== tok_eol
))
1981 /* One string read. */
1982 const uint32_t zero
= 0;
1986 obstack_grow (ob
, &zero
, 4);
1987 to_wstr
= obstack_finish (ob
);
1989 *top
= obstack_alloc (ob
, sizeof (struct translit_to_t
));
1990 (*top
)->str
= to_wstr
;
1991 (*top
)->next
= NULL
;
1994 if (now
->tok
== tok_eol
)
1996 result
->next
= ctype
->translit
;
1997 ctype
->translit
= result
;
2002 top
= &(*top
)->next
;
2007 to_wstr
= read_widestring (ldfile
, now
, charmap
, repertoire
);
2008 if (to_wstr
== (uint32_t *) -1l)
2010 /* An error occurred. */
2011 obstack_free (ob
, result
);
2015 if (to_wstr
== NULL
)
2018 /* This value is usable. */
2019 obstack_grow (ob
, to_wstr
, wcslen ((wchar_t *) to_wstr
) * 4);
2028 read_translit_ignore_entry (struct linereader
*ldfile
,
2029 struct locale_ctype_t
*ctype
,
2030 const struct charmap_t
*charmap
,
2031 struct repertoire_t
*repertoire
)
2033 /* We expect a semicolon-separated list of characters we ignore. We are
2034 only interested in the wide character definitions. These must be
2035 single characters, possibly defining a range when an ellipsis is used. */
2038 struct token
*now
= lr_token (ldfile
, charmap
, NULL
, repertoire
,
2040 struct translit_ignore_t
*newp
;
2043 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
2046 _("premature end of `translit_ignore' definition"));
2050 if (now
->tok
!= tok_bsymbol
&& now
->tok
!= tok_ucs4
)
2052 lr_error (ldfile
, _("syntax error"));
2053 lr_ignore_rest (ldfile
, 0);
2057 if (now
->tok
== tok_ucs4
)
2058 from
= now
->val
.ucs4
;
2060 /* Try to get the value. */
2061 from
= repertoire_find_value (repertoire
, now
->val
.str
.startmb
,
2062 now
->val
.str
.lenmb
);
2064 if (from
== ILLEGAL_CHAR_VALUE
)
2066 lr_error (ldfile
, "invalid character name");
2071 newp
= (struct translit_ignore_t
*)
2072 obstack_alloc (&ctype
->mempool
, sizeof (struct translit_ignore_t
));
2077 newp
->next
= ctype
->translit_ignore
;
2078 ctype
->translit_ignore
= newp
;
2081 /* Now we expect either a semicolon, an ellipsis, or the end of the
2083 now
= lr_token (ldfile
, charmap
, NULL
, repertoire
, verbose
);
2085 if (now
->tok
== tok_ellipsis2
|| now
->tok
== tok_ellipsis2_2
)
2087 /* XXX Should we bother implementing `....'? `...' certainly
2088 will not be implemented. */
2090 int step
= now
->tok
== tok_ellipsis2_2
? 2 : 1;
2092 now
= lr_token (ldfile
, charmap
, NULL
, repertoire
, verbose
);
2094 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
2097 _("premature end of `translit_ignore' definition"));
2101 if (now
->tok
!= tok_bsymbol
&& now
->tok
!= tok_ucs4
)
2103 lr_error (ldfile
, _("syntax error"));
2104 lr_ignore_rest (ldfile
, 0);
2108 if (now
->tok
== tok_ucs4
)
2111 /* Try to get the value. */
2112 to
= repertoire_find_value (repertoire
, now
->val
.str
.startmb
,
2113 now
->val
.str
.lenmb
);
2115 if (to
== ILLEGAL_CHAR_VALUE
)
2116 lr_error (ldfile
, "invalid character name");
2119 /* Make sure the `to'-value is larger. */
2126 lr_error (ldfile
, _("\
2127 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
2128 (to
| from
) < 65536 ? 4 : 8, to
,
2129 (to
| from
) < 65536 ? 4 : 8, from
);
2132 /* And the next token. */
2133 now
= lr_token (ldfile
, charmap
, NULL
, repertoire
, verbose
);
2136 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
2140 if (now
->tok
== tok_semicolon
)
2144 /* If we come here something is wrong. */
2145 lr_error (ldfile
, _("syntax error"));
2146 lr_ignore_rest (ldfile
, 0);
2152 /* The parser for the LC_CTYPE section of the locale definition. */
2154 ctype_read (struct linereader
*ldfile
, struct localedef_t
*result
,
2155 const struct charmap_t
*charmap
, const char *repertoire_name
,
2158 struct repertoire_t
*repertoire
= NULL
;
2159 struct locale_ctype_t
*ctype
;
2161 enum token_t nowtok
;
2163 struct charseq
*last_seq
;
2164 uint32_t last_wch
= 0;
2165 enum token_t last_token
;
2166 enum token_t ellipsis_token
;
2168 char last_charcode
[16];
2169 size_t last_charcode_len
= 0;
2170 const char *last_str
= NULL
;
2172 struct localedef_t
*copy_locale
= NULL
;
2174 /* Get the repertoire we have to use. */
2175 if (repertoire_name
!= NULL
)
2176 repertoire
= repertoire_read (repertoire_name
);
2178 /* The rest of the line containing `LC_CTYPE' must be free. */
2179 lr_ignore_rest (ldfile
, 1);
2184 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2187 while (nowtok
== tok_eol
);
2189 /* If we see `copy' now we are almost done. */
2190 if (nowtok
== tok_copy
)
2192 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2193 if (now
->tok
!= tok_string
)
2195 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2199 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2200 while (now
->tok
!= tok_eof
&& now
->tok
!= tok_end
);
2202 if (now
->tok
!= tok_eof
2203 || (now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
),
2204 now
->tok
== tok_eof
))
2205 lr_error (ldfile
, _("%s: premature end of file"), "LC_CTYPE");
2206 else if (now
->tok
!= tok_lc_ctype
)
2208 lr_error (ldfile
, _("\
2209 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2210 lr_ignore_rest (ldfile
, 0);
2213 lr_ignore_rest (ldfile
, 1);
2218 if (! ignore_content
)
2220 /* Get the locale definition. */
2221 copy_locale
= load_locale (LC_CTYPE
, now
->val
.str
.startmb
,
2222 repertoire_name
, charmap
, NULL
);
2223 if ((copy_locale
->avail
& CTYPE_LOCALE
) == 0)
2225 /* Not yet loaded. So do it now. */
2226 if (locfile_read (copy_locale
, charmap
) != 0)
2231 lr_ignore_rest (ldfile
, 1);
2233 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2237 /* Prepare the data structures. */
2238 ctype_startup (ldfile
, result
, charmap
, copy_locale
, ignore_content
);
2239 ctype
= result
->categories
[LC_CTYPE
].ctype
;
2241 /* Remember the repertoire we use. */
2242 if (!ignore_content
)
2243 ctype
->repertoire
= repertoire
;
2247 unsigned long int class_bit
= 0;
2248 unsigned long int class256_bit
= 0;
2249 int handle_digits
= 0;
2251 /* Of course we don't proceed beyond the end of file. */
2252 if (nowtok
== tok_eof
)
2255 /* Ingore empty lines. */
2256 if (nowtok
== tok_eol
)
2258 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2266 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2267 while (now
->tok
== tok_ident
|| now
->tok
== tok_string
)
2269 ctype_class_new (ldfile
, ctype
, now
->val
.str
.startmb
);
2270 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2271 if (now
->tok
!= tok_semicolon
)
2273 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2275 if (now
->tok
!= tok_eol
)
2277 %s: syntax error in definition of new character class"), "LC_CTYPE");
2281 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2282 while (now
->tok
== tok_ident
|| now
->tok
== tok_string
)
2284 ctype_map_new (ldfile
, ctype
, now
->val
.str
.startmb
, charmap
);
2285 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2286 if (now
->tok
!= tok_semicolon
)
2288 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2290 if (now
->tok
!= tok_eol
)
2292 %s: syntax error in definition of new character map"), "LC_CTYPE");
2296 /* Ignore the rest of the line if we don't need the input of
2300 lr_ignore_rest (ldfile
, 0);
2304 /* We simply forget the `class' keyword and use the following
2305 operand to determine the bit. */
2306 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2307 if (now
->tok
== tok_ident
|| now
->tok
== tok_string
)
2309 /* Must can be one of the predefined class names. */
2310 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
)
2311 if (strcmp (ctype
->classnames
[cnt
], now
->val
.str
.startmb
) == 0)
2313 if (cnt
>= ctype
->nr_charclass
)
2315 #ifdef PREDEFINED_CLASSES
2316 if (now
->val
.str
.lenmb
== 8
2317 && memcmp ("special1", now
->val
.str
.startmb
, 8) == 0)
2318 class_bit
= _ISwspecial1
;
2319 else if (now
->val
.str
.lenmb
== 8
2320 && memcmp ("special2", now
->val
.str
.startmb
, 8) == 0)
2321 class_bit
= _ISwspecial2
;
2322 else if (now
->val
.str
.lenmb
== 8
2323 && memcmp ("special3", now
->val
.str
.startmb
, 8) == 0)
2324 class_bit
= _ISwspecial3
;
2328 /* OK, it's a new class. */
2329 ctype_class_new (ldfile
, ctype
, now
->val
.str
.startmb
);
2331 class_bit
= _ISwbit (ctype
->nr_charclass
- 1);
2336 class_bit
= _ISwbit (cnt
);
2338 free (now
->val
.str
.startmb
);
2341 else if (now
->tok
== tok_digit
)
2342 goto handle_tok_digit
;
2343 else if (now
->tok
< tok_upper
|| now
->tok
> tok_blank
)
2347 class_bit
= BITw (now
->tok
);
2348 class256_bit
= BIT (now
->tok
);
2351 /* The next character must be a semicolon. */
2352 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2353 if (now
->tok
!= tok_semicolon
)
2355 goto read_charclass
;
2368 /* Ignore the rest of the line if we don't need the input of
2372 lr_ignore_rest (ldfile
, 0);
2376 class_bit
= BITw (now
->tok
);
2377 class256_bit
= BIT (now
->tok
);
2380 ctype
->class_done
|= class_bit
;
2381 last_token
= tok_none
;
2382 ellipsis_token
= tok_none
;
2384 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2385 while (now
->tok
!= tok_eol
&& now
->tok
!= tok_eof
)
2388 struct charseq
*seq
;
2390 if (ellipsis_token
== tok_none
)
2392 if (get_character (now
, charmap
, repertoire
, &seq
, &wch
))
2395 if (!ignore_content
&& seq
!= NULL
&& seq
->nbytes
== 1)
2396 /* Yep, we can store information about this byte
2398 ctype
->class256_collection
[seq
->bytes
[0]] |= class256_bit
;
2400 if (!ignore_content
&& wch
!= ILLEGAL_CHAR_VALUE
2402 /* We have the UCS4 position. */
2403 *find_idx (ctype
, &ctype
->class_collection
,
2404 &ctype
->class_collection_max
,
2405 &ctype
->class_collection_act
, wch
) |= class_bit
;
2407 last_token
= now
->tok
;
2408 /* Terminate the string. */
2409 if (last_token
== tok_bsymbol
)
2411 now
->val
.str
.startmb
[now
->val
.str
.lenmb
] = '\0';
2412 last_str
= now
->val
.str
.startmb
;
2418 memcpy (last_charcode
, now
->val
.charcode
.bytes
, 16);
2419 last_charcode_len
= now
->val
.charcode
.nbytes
;
2421 if (!ignore_content
&& handle_digits
== 1)
2423 /* We must store the digit values. */
2424 if (ctype
->mbdigits_act
== ctype
->mbdigits_max
)
2426 ctype
->mbdigits_max
+= 10;
2427 ctype
->mbdigits
= xrealloc (ctype
->mbdigits
,
2428 (ctype
->mbdigits_max
2429 * sizeof (char *)));
2430 ctype
->wcdigits_max
+= 10;
2431 ctype
->wcdigits
= xrealloc (ctype
->wcdigits
,
2432 (ctype
->wcdigits_max
2433 * sizeof (uint32_t)));
2436 ctype
->mbdigits
[ctype
->mbdigits_act
++] = seq
;
2437 ctype
->wcdigits
[ctype
->wcdigits_act
++] = wch
;
2439 else if (!ignore_content
&& handle_digits
== 2)
2441 /* We must store the digit values. */
2442 if (ctype
->outdigits_act
>= 10)
2444 lr_error (ldfile
, _("\
2445 %s: field `%s' does not contain exactly ten entries"),
2446 "LC_CTYPE", "outdigit");
2447 lr_ignore_rest (ldfile
, 0);
2451 ctype
->mboutdigits
[ctype
->outdigits_act
] = seq
;
2452 ctype
->wcoutdigits
[ctype
->outdigits_act
] = wch
;
2453 ++ctype
->outdigits_act
;
2458 /* Now it gets complicated. We have to resolve the
2459 ellipsis problem. First we must distinguish between
2460 the different kind of ellipsis and this must match the
2461 tokens we have seen. */
2462 assert (last_token
!= tok_none
);
2464 if (last_token
!= now
->tok
)
2466 lr_error (ldfile
, _("\
2467 ellipsis range must be marked by two operands of same type"));
2468 lr_ignore_rest (ldfile
, 0);
2472 if (last_token
== tok_bsymbol
)
2474 if (ellipsis_token
== tok_ellipsis3
)
2475 lr_error (ldfile
, _("with symbolic name range values \
2476 the absolute ellipsis `...' must not be used"));
2478 charclass_symbolic_ellipsis (ldfile
, ctype
, charmap
,
2479 repertoire
, now
, last_str
,
2480 class256_bit
, class_bit
,
2485 handle_digits
, step
);
2487 else if (last_token
== tok_ucs4
)
2489 if (ellipsis_token
!= tok_ellipsis2
)
2490 lr_error (ldfile
, _("\
2491 with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
2493 charclass_ucs4_ellipsis (ldfile
, ctype
, charmap
,
2494 repertoire
, now
, last_wch
,
2495 class256_bit
, class_bit
,
2496 ignore_content
, handle_digits
,
2501 assert (last_token
== tok_charcode
);
2503 if (ellipsis_token
!= tok_ellipsis3
)
2504 lr_error (ldfile
, _("\
2505 with character code range values one must use the absolute ellipsis `...'"));
2507 charclass_charcode_ellipsis (ldfile
, ctype
, charmap
,
2511 class256_bit
, class_bit
,
2516 /* Now we have used the last value. */
2517 last_token
= tok_none
;
2520 /* Next we expect a semicolon or the end of the line. */
2521 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2522 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
2525 if (last_token
!= tok_none
2526 && now
->tok
>= tok_ellipsis2
&& now
->tok
<= tok_ellipsis4_2
)
2528 if (now
->tok
== tok_ellipsis2_2
)
2530 now
->tok
= tok_ellipsis2
;
2533 else if (now
->tok
== tok_ellipsis4_2
)
2535 now
->tok
= tok_ellipsis4
;
2539 ellipsis_token
= now
->tok
;
2541 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2545 if (now
->tok
!= tok_semicolon
)
2548 /* And get the next character. */
2549 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2551 ellipsis_token
= tok_none
;
2557 /* Ignore the rest of the line if we don't need the input of
2561 lr_ignore_rest (ldfile
, 0);
2566 class_bit
= _ISwdigit
;
2567 class256_bit
= _ISdigit
;
2569 goto read_charclass
;
2572 /* Ignore the rest of the line if we don't need the input of
2576 lr_ignore_rest (ldfile
, 0);
2580 if (ctype
->outdigits_act
!= 0)
2581 lr_error (ldfile
, _("\
2582 %s: field `%s' declared more than once"),
2583 "LC_CTYPE", "outdigit");
2587 goto read_charclass
;
2590 /* Ignore the rest of the line if we don't need the input of
2594 lr_ignore_rest (ldfile
, 0);
2602 /* Ignore the rest of the line if we don't need the input of
2606 lr_ignore_rest (ldfile
, 0);
2614 /* Ignore the rest of the line if we don't need the input of
2618 lr_ignore_rest (ldfile
, 0);
2622 /* We simply forget the `map' keyword and use the following
2623 operand to determine the mapping. */
2624 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2625 if (now
->tok
== tok_ident
|| now
->tok
== tok_string
)
2629 for (cnt
= 2; cnt
< ctype
->map_collection_nr
; ++cnt
)
2630 if (strcmp (now
->val
.str
.startmb
, ctype
->mapnames
[cnt
]) == 0)
2633 if (cnt
< ctype
->map_collection_nr
)
2634 free (now
->val
.str
.startmb
);
2636 /* OK, it's a new map. */
2637 ctype_map_new (ldfile
, ctype
, now
->val
.str
.startmb
, charmap
);
2641 else if (now
->tok
< tok_toupper
|| now
->tok
> tok_tolower
)
2644 mapidx
= now
->tok
- tok_toupper
;
2646 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2647 /* This better should be a semicolon. */
2648 if (now
->tok
!= tok_semicolon
)
2652 /* Test whether this mapping was already defined. */
2653 if (ctype
->tomap_done
[mapidx
])
2655 lr_error (ldfile
, _("duplicated definition for mapping `%s'"),
2656 ctype
->mapnames
[mapidx
]);
2657 lr_ignore_rest (ldfile
, 0);
2660 ctype
->tomap_done
[mapidx
] = 1;
2662 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2663 while (now
->tok
!= tok_eol
&& now
->tok
!= tok_eof
)
2665 struct charseq
*from_seq
;
2667 struct charseq
*to_seq
;
2670 /* Every pair starts with an opening brace. */
2671 if (now
->tok
!= tok_open_brace
)
2674 /* Next comes the from-value. */
2675 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2676 if (get_character (now
, charmap
, repertoire
, &from_seq
,
2680 /* The next is a comma. */
2681 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2682 if (now
->tok
!= tok_comma
)
2685 /* And the other value. */
2686 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2687 if (get_character (now
, charmap
, repertoire
, &to_seq
,
2691 /* And the last thing is the closing brace. */
2692 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2693 if (now
->tok
!= tok_close_brace
)
2696 if (!ignore_content
)
2698 if (mapidx
< 2 && from_seq
!= NULL
&& to_seq
!= NULL
2699 && from_seq
->nbytes
== 1 && to_seq
->nbytes
== 1)
2700 /* We can use this value. */
2701 ctype
->map256_collection
[mapidx
][from_seq
->bytes
[0]]
2704 if (from_wch
!= ILLEGAL_CHAR_VALUE
2705 && to_wch
!= ILLEGAL_CHAR_VALUE
)
2706 /* Both correct values. */
2707 *find_idx (ctype
, &ctype
->map_collection
[mapidx
],
2708 &ctype
->map_collection_max
[mapidx
],
2709 &ctype
->map_collection_act
[mapidx
],
2713 /* Now comes a semicolon or the end of the line/file. */
2714 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2715 if (now
->tok
== tok_semicolon
)
2716 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2720 case tok_translit_start
:
2721 /* Ignore the entire translit section with its peculiar syntax
2722 if we don't need the input. */
2727 lr_ignore_rest (ldfile
, 0);
2728 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2730 while (now
->tok
!= tok_translit_end
&& now
->tok
!= tok_eof
);
2732 if (now
->tok
== tok_eof
)
2733 lr_error (ldfile
, _(\
2734 "%s: `translit_start' section does not end with `translit_end'"),
2740 /* The rest of the line better should be empty. */
2741 lr_ignore_rest (ldfile
, 1);
2743 /* We count here the number of allocated entries in the `translit'
2747 ldfile
->translate_strings
= 1;
2748 ldfile
->return_widestr
= 1;
2750 /* We proceed until we see the `translit_end' token. */
2751 while (now
= lr_token (ldfile
, charmap
, NULL
, repertoire
, verbose
),
2752 now
->tok
!= tok_translit_end
&& now
->tok
!= tok_eof
)
2754 if (now
->tok
== tok_eol
)
2755 /* Ignore empty lines. */
2758 if (now
->tok
== tok_include
)
2760 /* We have to include locale. */
2761 const char *locale_name
;
2762 const char *repertoire_name
;
2763 struct translit_include_t
*include_stmt
, **include_ptr
;
2765 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2766 /* This should be a string or an identifier. In any
2767 case something to name a locale. */
2768 if (now
->tok
!= tok_string
&& now
->tok
!= tok_ident
)
2771 lr_error (ldfile
, _("%s: syntax error"), "LC_CTYPE");
2772 lr_ignore_rest (ldfile
, 0);
2775 locale_name
= now
->val
.str
.startmb
;
2777 /* Next should be a semicolon. */
2778 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2779 if (now
->tok
!= tok_semicolon
)
2780 goto translit_syntax
;
2782 /* Now the repertoire name. */
2783 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2784 if ((now
->tok
!= tok_string
&& now
->tok
!= tok_ident
)
2785 || now
->val
.str
.startmb
== NULL
)
2786 goto translit_syntax
;
2787 repertoire_name
= now
->val
.str
.startmb
;
2789 /* Save the include statement for later processing. */
2790 include_stmt
= (struct translit_include_t
*)
2791 xmalloc (sizeof (struct translit_include_t
));
2792 include_stmt
->copy_locale
= locale_name
;
2793 include_stmt
->copy_repertoire
= repertoire_name
;
2794 include_stmt
->next
= NULL
;
2796 include_ptr
= &ctype
->translit_include
;
2797 while (*include_ptr
!= NULL
)
2798 include_ptr
= &(*include_ptr
)->next
;
2799 *include_ptr
= include_stmt
;
2801 /* The rest of the line must be empty. */
2802 lr_ignore_rest (ldfile
, 1);
2804 /* Make sure the locale is read. */
2805 add_to_readlist (LC_CTYPE
, locale_name
, repertoire_name
,
2809 else if (now
->tok
== tok_default_missing
)
2815 /* We expect a single character or string as the
2817 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2818 wstr
= read_widestring (ldfile
, now
, charmap
,
2823 if (ctype
->default_missing
!= NULL
)
2825 lr_error (ldfile
, _("\
2826 %s: duplicate `default_missing' definition"), "LC_CTYPE");
2827 error_at_line (0, 0, ctype
->default_missing_file
,
2828 ctype
->default_missing_lineno
,
2830 previous definition was here"));
2834 ctype
->default_missing
= wstr
;
2835 ctype
->default_missing_file
= ldfile
->fname
;
2836 ctype
->default_missing_lineno
= ldfile
->lineno
;
2838 /* We can have more entries, ignore them. */
2839 lr_ignore_rest (ldfile
, 0);
2842 else if (wstr
== (uint32_t *) -1l)
2843 /* This was an syntax error. */
2846 /* Maybe there is another replacement we can use. */
2847 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2848 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
2850 /* Nothing found. We tell the user. */
2851 lr_error (ldfile
, _("\
2852 %s: no representable `default_missing' definition found"), "LC_CTYPE");
2855 if (now
->tok
!= tok_semicolon
)
2856 goto translit_syntax
;
2861 else if (now
->tok
== tok_translit_ignore
)
2863 read_translit_ignore_entry (ldfile
, ctype
, charmap
,
2868 read_translit_entry (ldfile
, ctype
, now
, charmap
, repertoire
);
2870 ldfile
->return_widestr
= 0;
2872 if (now
->tok
== tok_eof
)
2873 lr_error (ldfile
, _(\
2874 "%s: `translit_start' section does not end with `translit_end'"),
2880 /* Ignore the rest of the line if we don't need the input of
2884 lr_ignore_rest (ldfile
, 0);
2888 /* This could mean one of several things. First test whether
2889 it's a character class name. */
2890 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
)
2891 if (strcmp (now
->val
.str
.startmb
, ctype
->classnames
[cnt
]) == 0)
2893 if (cnt
< ctype
->nr_charclass
)
2895 class_bit
= _ISwbit (cnt
);
2896 class256_bit
= cnt
<= 11 ? _ISbit (cnt
) : 0;
2897 free (now
->val
.str
.startmb
);
2898 goto read_charclass
;
2900 for (cnt
= 0; cnt
< ctype
->map_collection_nr
; ++cnt
)
2901 if (strcmp (now
->val
.str
.startmb
, ctype
->mapnames
[cnt
]) == 0)
2903 if (cnt
< ctype
->map_collection_nr
)
2906 free (now
->val
.str
.startmb
);
2909 #ifdef PREDEFINED_CLASSES
2910 if (strcmp (now
->val
.str
.startmb
, "special1") == 0)
2912 class_bit
= _ISwspecial1
;
2913 free (now
->val
.str
.startmb
);
2914 goto read_charclass
;
2916 if (strcmp (now
->val
.str
.startmb
, "special2") == 0)
2918 class_bit
= _ISwspecial2
;
2919 free (now
->val
.str
.startmb
);
2920 goto read_charclass
;
2922 if (strcmp (now
->val
.str
.startmb
, "special3") == 0)
2924 class_bit
= _ISwspecial3
;
2925 free (now
->val
.str
.startmb
);
2926 goto read_charclass
;
2928 if (strcmp (now
->val
.str
.startmb
, "tosymmetric") == 0)
2937 /* Next we assume `LC_CTYPE'. */
2938 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2939 if (now
->tok
== tok_eof
)
2941 if (now
->tok
== tok_eol
)
2942 lr_error (ldfile
, _("%s: incomplete `END' line"),
2944 else if (now
->tok
!= tok_lc_ctype
)
2945 lr_error (ldfile
, _("\
2946 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2947 lr_ignore_rest (ldfile
, now
->tok
== tok_lc_ctype
);
2952 if (now
->tok
!= tok_eof
)
2953 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2956 /* Prepare for the next round. */
2957 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2961 /* When we come here we reached the end of the file. */
2962 lr_error (ldfile
, _("%s: premature end of file"), "LC_CTYPE");
2967 set_class_defaults (struct locale_ctype_t
*ctype
,
2968 const struct charmap_t
*charmap
,
2969 struct repertoire_t
*repertoire
)
2973 /* These function defines the default values for the classes and conversions
2974 according to POSIX.2 2.5.2.1.
2975 It may seem that the order of these if-blocks is arbitrary but it is NOT.
2976 Don't move them unless you know what you do! */
2978 auto void set_default (int bitpos
, int from
, int to
);
2980 void set_default (int bitpos
, int from
, int to
)
2984 int bit
= _ISbit (bitpos
);
2985 int bitw
= _ISwbit (bitpos
);
2986 /* Define string. */
2989 for (ch
= from
; ch
<= to
; ++ch
)
2991 struct charseq
*seq
;
2994 seq
= charmap_find_value (charmap
, tmp
, 1);
2998 sprintf (buf
, "U%08X", ch
);
2999 seq
= charmap_find_value (charmap
, buf
, 9);
3005 %s: character `%s' not defined in charmap while needed as default value"),
3008 else if (seq
->nbytes
!= 1)
3010 %s: character `%s' in charmap not representable with one byte"),
3013 ctype
->class256_collection
[seq
->bytes
[0]] |= bit
;
3015 /* No need to search here, the ASCII value is also the Unicode
3017 ELEM (ctype
, class_collection
, , ch
) |= bitw
;
3021 /* Set default values if keyword was not present. */
3022 if ((ctype
->class_done
& BITw (tok_upper
)) == 0)
3023 /* "If this keyword [lower] is not specified, the lowercase letters
3024 `A' through `Z', ..., shall automatically belong to this class,
3025 with implementation defined character values." [P1003.2, 2.5.2.1] */
3026 set_default (BITPOS (tok_upper
), 'A', 'Z');
3028 if ((ctype
->class_done
& BITw (tok_lower
)) == 0)
3029 /* "If this keyword [lower] is not specified, the lowercase letters
3030 `a' through `z', ..., shall automatically belong to this class,
3031 with implementation defined character values." [P1003.2, 2.5.2.1] */
3032 set_default (BITPOS (tok_lower
), 'a', 'z');
3034 if ((ctype
->class_done
& BITw (tok_alpha
)) == 0)
3036 /* Table 2-6 in P1003.2 says that characters in class `upper' or
3037 class `lower' *must* be in class `alpha'. */
3038 unsigned long int mask
= BIT (tok_upper
) | BIT (tok_lower
);
3039 unsigned long int maskw
= BITw (tok_upper
) | BITw (tok_lower
);
3041 for (cnt
= 0; cnt
< 256; ++cnt
)
3042 if ((ctype
->class256_collection
[cnt
] & mask
) != 0)
3043 ctype
->class256_collection
[cnt
] |= BIT (tok_alpha
);
3045 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
3046 if ((ctype
->class_collection
[cnt
] & maskw
) != 0)
3047 ctype
->class_collection
[cnt
] |= BITw (tok_alpha
);
3050 if ((ctype
->class_done
& BITw (tok_digit
)) == 0)
3051 /* "If this keyword [digit] is not specified, the digits `0' through
3052 `9', ..., shall automatically belong to this class, with
3053 implementation-defined character values." [P1003.2, 2.5.2.1] */
3054 set_default (BITPOS (tok_digit
), '0', '9');
3056 /* "Only characters specified for the `alpha' and `digit' keyword
3057 shall be specified. Characters specified for the keyword `alpha'
3058 and `digit' are automatically included in this class. */
3060 unsigned long int mask
= BIT (tok_alpha
) | BIT (tok_digit
);
3061 unsigned long int maskw
= BITw (tok_alpha
) | BITw (tok_digit
);
3063 for (cnt
= 0; cnt
< 256; ++cnt
)
3064 if ((ctype
->class256_collection
[cnt
] & mask
) != 0)
3065 ctype
->class256_collection
[cnt
] |= BIT (tok_alnum
);
3067 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
3068 if ((ctype
->class_collection
[cnt
] & maskw
) != 0)
3069 ctype
->class_collection
[cnt
] |= BITw (tok_alnum
);
3072 if ((ctype
->class_done
& BITw (tok_space
)) == 0)
3073 /* "If this keyword [space] is not specified, the characters <space>,
3074 <form-feed>, <newline>, <carriage-return>, <tab>, and
3075 <vertical-tab>, ..., shall automatically belong to this class,
3076 with implementation-defined character values." [P1003.2, 2.5.2.1] */
3078 struct charseq
*seq
;
3080 seq
= charmap_find_value (charmap
, "space", 5);
3082 seq
= charmap_find_value (charmap
, "SP", 2);
3084 seq
= charmap_find_value (charmap
, "U00000020", 9);
3089 %s: character `%s' not defined while needed as default value"),
3090 "LC_CTYPE", "<space>");
3092 else if (seq
->nbytes
!= 1)
3094 %s: character `%s' in charmap not representable with one byte"),
3095 "LC_CTYPE", "<space>");
3097 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3099 /* No need to search. */
3100 ELEM (ctype
, class_collection
, , L
' ') |= BITw (tok_space
);
3102 seq
= charmap_find_value (charmap
, "form-feed", 9);
3104 seq
= charmap_find_value (charmap
, "U0000000C", 9);
3109 %s: character `%s' not defined while needed as default value"),
3110 "LC_CTYPE", "<form-feed>");
3112 else if (seq
->nbytes
!= 1)
3114 %s: character `%s' in charmap not representable with one byte"),
3115 "LC_CTYPE", "<form-feed>");
3117 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3119 /* No need to search. */
3120 ELEM (ctype
, class_collection
, , L
'\f') |= BITw (tok_space
);
3123 seq
= charmap_find_value (charmap
, "newline", 7);
3125 seq
= charmap_find_value (charmap
, "U0000000A", 9);
3130 character `%s' not defined while needed as default value"),
3133 else if (seq
->nbytes
!= 1)
3135 %s: character `%s' in charmap not representable with one byte"),
3136 "LC_CTYPE", "<newline>");
3138 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3140 /* No need to search. */
3141 ELEM (ctype
, class_collection
, , L
'\n') |= BITw (tok_space
);
3144 seq
= charmap_find_value (charmap
, "carriage-return", 15);
3146 seq
= charmap_find_value (charmap
, "U0000000D", 9);
3151 %s: character `%s' not defined while needed as default value"),
3152 "LC_CTYPE", "<carriage-return>");
3154 else if (seq
->nbytes
!= 1)
3156 %s: character `%s' in charmap not representable with one byte"),
3157 "LC_CTYPE", "<carriage-return>");
3159 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3161 /* No need to search. */
3162 ELEM (ctype
, class_collection
, , L
'\r') |= BITw (tok_space
);
3165 seq
= charmap_find_value (charmap
, "tab", 3);
3167 seq
= charmap_find_value (charmap
, "U00000009", 9);
3172 %s: character `%s' not defined while needed as default value"),
3173 "LC_CTYPE", "<tab>");
3175 else if (seq
->nbytes
!= 1)
3177 %s: character `%s' in charmap not representable with one byte"),
3178 "LC_CTYPE", "<tab>");
3180 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3182 /* No need to search. */
3183 ELEM (ctype
, class_collection
, , L
'\t') |= BITw (tok_space
);
3186 seq
= charmap_find_value (charmap
, "vertical-tab", 12);
3188 seq
= charmap_find_value (charmap
, "U0000000B", 9);
3193 %s: character `%s' not defined while needed as default value"),
3194 "LC_CTYPE", "<vertical-tab>");
3196 else if (seq
->nbytes
!= 1)
3198 %s: character `%s' in charmap not representable with one byte"),
3199 "LC_CTYPE", "<vertical-tab>");
3201 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3203 /* No need to search. */
3204 ELEM (ctype
, class_collection
, , L
'\v') |= BITw (tok_space
);
3207 if ((ctype
->class_done
& BITw (tok_xdigit
)) == 0)
3208 /* "If this keyword is not specified, the digits `0' to `9', the
3209 uppercase letters `A' through `F', and the lowercase letters `a'
3210 through `f', ..., shell automatically belong to this class, with
3211 implementation defined character values." [P1003.2, 2.5.2.1] */
3213 set_default (BITPOS (tok_xdigit
), '0', '9');
3214 set_default (BITPOS (tok_xdigit
), 'A', 'F');
3215 set_default (BITPOS (tok_xdigit
), 'a', 'f');
3218 if ((ctype
->class_done
& BITw (tok_blank
)) == 0)
3219 /* "If this keyword [blank] is unspecified, the characters <space> and
3220 <tab> shall belong to this character class." [P1003.2, 2.5.2.1] */
3222 struct charseq
*seq
;
3224 seq
= charmap_find_value (charmap
, "space", 5);
3226 seq
= charmap_find_value (charmap
, "SP", 2);
3228 seq
= charmap_find_value (charmap
, "U00000020", 9);
3233 %s: character `%s' not defined while needed as default value"),
3234 "LC_CTYPE", "<space>");
3236 else if (seq
->nbytes
!= 1)
3238 %s: character `%s' in charmap not representable with one byte"),
3239 "LC_CTYPE", "<space>");
3241 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_blank
);
3243 /* No need to search. */
3244 ELEM (ctype
, class_collection
, , L
' ') |= BITw (tok_blank
);
3247 seq
= charmap_find_value (charmap
, "tab", 3);
3249 seq
= charmap_find_value (charmap
, "U00000009", 9);
3254 %s: character `%s' not defined while needed as default value"),
3255 "LC_CTYPE", "<tab>");
3257 else if (seq
->nbytes
!= 1)
3259 %s: character `%s' in charmap not representable with one byte"),
3260 "LC_CTYPE", "<tab>");
3262 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_blank
);
3264 /* No need to search. */
3265 ELEM (ctype
, class_collection
, , L
'\t') |= BITw (tok_blank
);
3268 if ((ctype
->class_done
& BITw (tok_graph
)) == 0)
3269 /* "If this keyword [graph] is not specified, characters specified for
3270 the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
3271 shall belong to this character class." [P1003.2, 2.5.2.1] */
3273 unsigned long int mask
= BIT (tok_upper
) | BIT (tok_lower
) |
3274 BIT (tok_alpha
) | BIT (tok_digit
) | BIT (tok_xdigit
) | BIT (tok_punct
);
3275 unsigned long int maskw
= BITw (tok_upper
) | BITw (tok_lower
) |
3276 BITw (tok_alpha
) | BITw (tok_digit
) | BITw (tok_xdigit
) |
3280 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
3281 if ((ctype
->class_collection
[cnt
] & maskw
) != 0)
3282 ctype
->class_collection
[cnt
] |= BITw (tok_graph
);
3284 for (cnt
= 0; cnt
< 256; ++cnt
)
3285 if ((ctype
->class256_collection
[cnt
] & mask
) != 0)
3286 ctype
->class256_collection
[cnt
] |= BIT (tok_graph
);
3289 if ((ctype
->class_done
& BITw (tok_print
)) == 0)
3290 /* "If this keyword [print] is not provided, characters specified for
3291 the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
3292 and the <space> character shall belong to this character class."
3293 [P1003.2, 2.5.2.1] */
3295 unsigned long int mask
= BIT (tok_upper
) | BIT (tok_lower
) |
3296 BIT (tok_alpha
) | BIT (tok_digit
) | BIT (tok_xdigit
) | BIT (tok_punct
);
3297 unsigned long int maskw
= BITw (tok_upper
) | BITw (tok_lower
) |
3298 BITw (tok_alpha
) | BITw (tok_digit
) | BITw (tok_xdigit
) |
3301 struct charseq
*seq
;
3303 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
3304 if ((ctype
->class_collection
[cnt
] & maskw
) != 0)
3305 ctype
->class_collection
[cnt
] |= BITw (tok_print
);
3307 for (cnt
= 0; cnt
< 256; ++cnt
)
3308 if ((ctype
->class256_collection
[cnt
] & mask
) != 0)
3309 ctype
->class256_collection
[cnt
] |= BIT (tok_print
);
3312 seq
= charmap_find_value (charmap
, "space", 5);
3314 seq
= charmap_find_value (charmap
, "SP", 2);
3316 seq
= charmap_find_value (charmap
, "U00000020", 9);
3321 %s: character `%s' not defined while needed as default value"),
3322 "LC_CTYPE", "<space>");
3324 else if (seq
->nbytes
!= 1)
3326 %s: character `%s' in charmap not representable with one byte"),
3327 "LC_CTYPE", "<space>");
3329 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_print
);
3331 /* No need to search. */
3332 ELEM (ctype
, class_collection
, , L
' ') |= BITw (tok_print
);
3335 if (ctype
->tomap_done
[0] == 0)
3336 /* "If this keyword [toupper] is not specified, the lowercase letters
3337 `a' through `z', and their corresponding uppercase letters `A' to
3338 `Z', ..., shall automatically be included, with implementation-
3339 defined character values." [P1003.2, 2.5.2.1] */
3344 strcpy (tmp
, "<?>");
3346 for (ch
= 'a'; ch
<= 'z'; ++ch
)
3348 struct charseq
*seq_from
, *seq_to
;
3352 seq_from
= charmap_find_value (charmap
, &tmp
[1], 1);
3353 if (seq_from
== NULL
)
3356 sprintf (buf
, "U%08X", ch
);
3357 seq_from
= charmap_find_value (charmap
, buf
, 9);
3359 if (seq_from
== NULL
)
3363 %s: character `%s' not defined while needed as default value"),
3366 else if (seq_from
->nbytes
!= 1)
3370 %s: character `%s' needed as default value not representable with one byte"),
3375 /* This conversion is implementation defined. */
3376 tmp
[1] = (char) (ch
+ ('A' - 'a'));
3377 seq_to
= charmap_find_value (charmap
, &tmp
[1], 1);
3381 sprintf (buf
, "U%08X", ch
+ ('A' - 'a'));
3382 seq_to
= charmap_find_value (charmap
, buf
, 9);
3388 %s: character `%s' not defined while needed as default value"),
3391 else if (seq_to
->nbytes
!= 1)
3395 %s: character `%s' needed as default value not representable with one byte"),
3399 /* The index [0] is determined by the order of the
3400 `ctype_map_newP' calls in `ctype_startup'. */
3401 ctype
->map256_collection
[0][seq_from
->bytes
[0]]
3405 /* No need to search. */
3406 ELEM (ctype
, map_collection
, [0], ch
) = ch
+ ('A' - 'a');
3410 if (ctype
->tomap_done
[1] == 0)
3411 /* "If this keyword [tolower] is not specified, the mapping shall be
3412 the reverse mapping of the one specified to `toupper'." [P1003.2] */
3414 for (cnt
= 0; cnt
< ctype
->map_collection_act
[0]; ++cnt
)
3415 if (ctype
->map_collection
[0][cnt
] != 0)
3416 ELEM (ctype
, map_collection
, [1],
3417 ctype
->map_collection
[0][cnt
])
3418 = ctype
->charnames
[cnt
];
3420 for (cnt
= 0; cnt
< 256; ++cnt
)
3421 if (ctype
->map256_collection
[0][cnt
] != 0)
3422 ctype
->map256_collection
[1][ctype
->map256_collection
[0][cnt
]] = cnt
;
3425 if (ctype
->outdigits_act
!= 10)
3427 if (ctype
->outdigits_act
!= 0)
3428 error (0,0, _("%s: field `%s' does not contain exactly ten entries"),
3429 "LC_CTYPE", "outdigit");
3431 for (cnt
= ctype
->outdigits_act
; cnt
< 10; ++cnt
)
3433 ctype
->mboutdigits
[cnt
] = charmap_find_symbol (charmap
,
3436 if (ctype
->mboutdigits
[cnt
] == NULL
)
3437 ctype
->mboutdigits
[cnt
] = charmap_find_symbol (charmap
,
3439 strlen (longnames
[cnt
]));
3441 if (ctype
->mboutdigits
[cnt
] == NULL
)
3442 ctype
->mboutdigits
[cnt
] = charmap_find_symbol (charmap
,
3445 if (ctype
->mboutdigits
[cnt
] == NULL
)
3447 /* Provide a replacement. */
3449 no output digits defined and none of the standard names in the charmap"));
3451 ctype
->mboutdigits
[cnt
] = obstack_alloc (&((struct charmap_t
*) charmap
)->mem_pool
,
3452 sizeof (struct charseq
)
3455 /* This is better than nothing. */
3456 ctype
->mboutdigits
[cnt
]->bytes
[0] = digits
[cnt
];
3457 ctype
->mboutdigits
[cnt
]->nbytes
= 1;
3460 ctype
->wcoutdigits
[cnt
] = L
'0' + cnt
;
3463 ctype
->outdigits_act
= 10;
3468 /* Construction of sparse 3-level tables.
3469 See wchar-lookup.h for their structure and the meaning of p and q. */
3476 /* Working representation. */
3477 size_t level1_alloc
;
3480 size_t level2_alloc
;
3483 size_t level3_alloc
;
3486 /* Compressed representation. */
3491 /* Initialize. Assumes t->p and t->q have already been set. */
3493 wctype_table_init (struct wctype_table
*t
)
3496 t
->level1_alloc
= t
->level1_size
= 0;
3498 t
->level2_alloc
= t
->level2_size
= 0;
3500 t
->level3_alloc
= t
->level3_size
= 0;
3503 /* Retrieve an entry. */
3505 wctype_table_get (struct wctype_table
*t
, uint32_t wc
)
3507 uint32_t index1
= wc
>> (t
->q
+ t
->p
+ 5);
3508 if (index1
< t
->level1_size
)
3510 uint32_t lookup1
= t
->level1
[index1
];
3511 if (lookup1
!= EMPTY
)
3513 uint32_t index2
= ((wc
>> (t
->p
+ 5)) & ((1 << t
->q
) - 1))
3514 + (lookup1
<< t
->q
);
3515 uint32_t lookup2
= t
->level2
[index2
];
3516 if (lookup2
!= EMPTY
)
3518 uint32_t index3
= ((wc
>> 5) & ((1 << t
->p
) - 1))
3519 + (lookup2
<< t
->p
);
3520 uint32_t lookup3
= t
->level3
[index3
];
3521 uint32_t index4
= wc
& 0x1f;
3523 return (lookup3
>> index4
) & 1;
3530 /* Add one entry. */
3532 wctype_table_add (struct wctype_table
*t
, uint32_t wc
)
3534 uint32_t index1
= wc
>> (t
->q
+ t
->p
+ 5);
3535 uint32_t index2
= (wc
>> (t
->p
+ 5)) & ((1 << t
->q
) - 1);
3536 uint32_t index3
= (wc
>> 5) & ((1 << t
->p
) - 1);
3537 uint32_t index4
= wc
& 0x1f;
3540 if (index1
>= t
->level1_size
)
3542 if (index1
>= t
->level1_alloc
)
3544 size_t alloc
= 2 * t
->level1_alloc
;
3545 if (alloc
<= index1
)
3547 t
->level1
= (uint32_t *) xrealloc ((char *) t
->level1
,
3548 alloc
* sizeof (uint32_t));
3549 t
->level1_alloc
= alloc
;
3551 while (index1
>= t
->level1_size
)
3552 t
->level1
[t
->level1_size
++] = EMPTY
;
3555 if (t
->level1
[index1
] == EMPTY
)
3557 if (t
->level2_size
== t
->level2_alloc
)
3559 size_t alloc
= 2 * t
->level2_alloc
+ 1;
3560 t
->level2
= (uint32_t *) xrealloc ((char *) t
->level2
,
3561 (alloc
<< t
->q
) * sizeof (uint32_t));
3562 t
->level2_alloc
= alloc
;
3564 i1
= t
->level2_size
<< t
->q
;
3565 i2
= (t
->level2_size
+ 1) << t
->q
;
3566 for (i
= i1
; i
< i2
; i
++)
3567 t
->level2
[i
] = EMPTY
;
3568 t
->level1
[index1
] = t
->level2_size
++;
3571 index2
+= t
->level1
[index1
] << t
->q
;
3573 if (t
->level2
[index2
] == EMPTY
)
3575 if (t
->level3_size
== t
->level3_alloc
)
3577 size_t alloc
= 2 * t
->level3_alloc
+ 1;
3578 t
->level3
= (uint32_t *) xrealloc ((char *) t
->level3
,
3579 (alloc
<< t
->p
) * sizeof (uint32_t));
3580 t
->level3_alloc
= alloc
;
3582 i1
= t
->level3_size
<< t
->p
;
3583 i2
= (t
->level3_size
+ 1) << t
->p
;
3584 for (i
= i1
; i
< i2
; i
++)
3586 t
->level2
[index2
] = t
->level3_size
++;
3589 index3
+= t
->level2
[index2
] << t
->p
;
3591 t
->level3
[index3
] |= (uint32_t)1 << index4
;
3594 /* Finalize and shrink. */
3596 wctype_table_finalize (struct wctype_table
*t
)
3599 uint32_t reorder3
[t
->level3_size
];
3600 uint32_t reorder2
[t
->level2_size
];
3601 uint32_t level1_offset
, level2_offset
, level3_offset
;
3603 /* Uniquify level3 blocks. */
3605 for (j
= 0; j
< t
->level3_size
; j
++)
3607 for (i
= 0; i
< k
; i
++)
3608 if (memcmp (&t
->level3
[i
<< t
->p
], &t
->level3
[j
<< t
->p
],
3609 (1 << t
->p
) * sizeof (uint32_t)) == 0)
3611 /* Relocate block j to block i. */
3616 memcpy (&t
->level3
[i
<< t
->p
], &t
->level3
[j
<< t
->p
],
3617 (1 << t
->p
) * sizeof (uint32_t));
3623 for (i
= 0; i
< (t
->level2_size
<< t
->q
); i
++)
3624 if (t
->level2
[i
] != EMPTY
)
3625 t
->level2
[i
] = reorder3
[t
->level2
[i
]];
3627 /* Uniquify level2 blocks. */
3629 for (j
= 0; j
< t
->level2_size
; j
++)
3631 for (i
= 0; i
< k
; i
++)
3632 if (memcmp (&t
->level2
[i
<< t
->q
], &t
->level2
[j
<< t
->q
],
3633 (1 << t
->q
) * sizeof (uint32_t)) == 0)
3635 /* Relocate block j to block i. */
3640 memcpy (&t
->level2
[i
<< t
->q
], &t
->level2
[j
<< t
->q
],
3641 (1 << t
->q
) * sizeof (uint32_t));
3647 for (i
= 0; i
< t
->level1_size
; i
++)
3648 if (t
->level1
[i
] != EMPTY
)
3649 t
->level1
[i
] = reorder2
[t
->level1
[i
]];
3651 /* Create and fill the resulting compressed representation. */
3653 5 * sizeof (uint32_t)
3654 + t
->level1_size
* sizeof (uint32_t)
3655 + (t
->level2_size
<< t
->q
) * sizeof (uint32_t)
3656 + (t
->level3_size
<< t
->p
) * sizeof (uint32_t);
3657 t
->result
= (char *) xmalloc (t
->result_size
);
3660 5 * sizeof (uint32_t);
3662 5 * sizeof (uint32_t)
3663 + t
->level1_size
* sizeof (uint32_t);
3665 5 * sizeof (uint32_t)
3666 + t
->level1_size
* sizeof (uint32_t)
3667 + (t
->level2_size
<< t
->q
) * sizeof (uint32_t);
3669 ((uint32_t *) t
->result
)[0] = t
->q
+ t
->p
+ 5;
3670 ((uint32_t *) t
->result
)[1] = t
->level1_size
;
3671 ((uint32_t *) t
->result
)[2] = t
->p
+ 5;
3672 ((uint32_t *) t
->result
)[3] = (1 << t
->q
) - 1;
3673 ((uint32_t *) t
->result
)[4] = (1 << t
->p
) - 1;
3675 for (i
= 0; i
< t
->level1_size
; i
++)
3676 ((uint32_t *) (t
->result
+ level1_offset
))[i
] =
3677 (t
->level1
[i
] == EMPTY
3679 : (t
->level1
[i
] << t
->q
) * sizeof (uint32_t) + level2_offset
);
3681 for (i
= 0; i
< (t
->level2_size
<< t
->q
); i
++)
3682 ((uint32_t *) (t
->result
+ level2_offset
))[i
] =
3683 (t
->level2
[i
] == EMPTY
3685 : (t
->level2
[i
] << t
->p
) * sizeof (uint32_t) + level3_offset
);
3687 for (i
= 0; i
< (t
->level3_size
<< t
->p
); i
++)
3688 ((uint32_t *) (t
->result
+ level3_offset
))[i
] = t
->level3
[i
];
3690 if (t
->level1_alloc
> 0)
3692 if (t
->level2_alloc
> 0)
3694 if (t
->level3_alloc
> 0)
3698 #define TABLE wcwidth_table
3699 #define ELEMENT uint8_t
3700 #define DEFAULT 0xff
3703 #define TABLE wctrans_table
3704 #define ELEMENT int32_t
3706 #define wctrans_table_add wctrans_table_add_internal
3708 #undef wctrans_table_add
3709 /* The wctrans_table must actually store the difference between the
3710 desired result and the argument. */
3712 wctrans_table_add (struct wctrans_table
*t
, uint32_t wc
, uint32_t mapped_wc
)
3714 wctrans_table_add_internal (t
, wc
, mapped_wc
- wc
);
3718 /* Flattens the included transliterations into a translit list.
3719 Inserts them in the list at `cursor', and returns the new cursor. */
3720 static struct translit_t
**
3721 translit_flatten (struct locale_ctype_t
*ctype
,
3722 const struct charmap_t
*charmap
,
3723 struct translit_t
**cursor
)
3725 while (ctype
->translit_include
!= NULL
)
3727 const char *copy_locale
= ctype
->translit_include
->copy_locale
;
3728 const char *copy_repertoire
= ctype
->translit_include
->copy_repertoire
;
3729 struct localedef_t
*other
;
3731 /* Unchain the include statement. During the depth-first traversal
3732 we don't want to visit any locale more than once. */
3733 ctype
->translit_include
= ctype
->translit_include
->next
;
3735 other
= find_locale (LC_CTYPE
, copy_locale
, copy_repertoire
, charmap
);
3740 %s: transliteration data from locale `%s' not available"),
3741 "LC_CTYPE", copy_locale
);
3745 struct locale_ctype_t
*other_ctype
=
3746 other
->categories
[LC_CTYPE
].ctype
;
3748 cursor
= translit_flatten (other_ctype
, charmap
, cursor
);
3749 assert (other_ctype
->translit_include
== NULL
);
3751 if (other_ctype
->translit
!= NULL
)
3753 /* Insert the other_ctype->translit list at *cursor. */
3754 struct translit_t
*endp
= other_ctype
->translit
;
3755 while (endp
->next
!= NULL
)
3758 endp
->next
= *cursor
;
3759 *cursor
= other_ctype
->translit
;
3761 /* Avoid any risk of circular lists. */
3762 other_ctype
->translit
= NULL
;
3764 cursor
= &endp
->next
;
3767 if (ctype
->default_missing
== NULL
)
3768 ctype
->default_missing
= other_ctype
->default_missing
;
3776 allocate_arrays (struct locale_ctype_t
*ctype
, const struct charmap_t
*charmap
,
3777 struct repertoire_t
*repertoire
)
3785 /* You wonder about this amount of memory? This is only because some
3786 users do not manage to address the array with unsigned values or
3787 data types with range >= 256. '\200' would result in the array
3788 index -128. To help these poor people we duplicate the entries for
3789 128 up to 255 below the entry for \0. */
3790 ctype
->ctype_b
= (char_class_t
*) xcalloc (256 + 128, sizeof (char_class_t
));
3791 ctype
->ctype32_b
= (char_class32_t
*) xcalloc (256, sizeof (char_class32_t
));
3792 ctype
->class_b
= (uint32_t **)
3793 xmalloc (ctype
->nr_charclass
* sizeof (uint32_t *));
3794 ctype
->class_3level
= (struct iovec
*)
3795 xmalloc (ctype
->nr_charclass
* sizeof (struct iovec
));
3797 /* This is the array accessed using the multibyte string elements. */
3798 for (idx
= 0; idx
< 256; ++idx
)
3799 ctype
->ctype_b
[128 + idx
] = ctype
->class256_collection
[idx
];
3801 /* Mirror first 127 entries. We must take care that entry -1 is not
3802 mirrored because EOF == -1. */
3803 for (idx
= 0; idx
< 127; ++idx
)
3804 ctype
->ctype_b
[idx
] = ctype
->ctype_b
[256 + idx
];
3806 /* The 32 bit array contains all characters < 0x100. */
3807 for (idx
= 0; idx
< ctype
->class_collection_act
; ++idx
)
3808 if (ctype
->charnames
[idx
] < 0x100)
3809 ctype
->ctype32_b
[ctype
->charnames
[idx
]] = ctype
->class_collection
[idx
];
3811 for (nr
= 0; nr
< ctype
->nr_charclass
; nr
++)
3813 ctype
->class_b
[nr
] = (uint32_t *) xcalloc (256 / 32, sizeof (uint32_t));
3815 for (idx
= 0; idx
< 256; ++idx
)
3816 if (ctype
->class256_collection
[idx
] & _ISbit (nr
))
3817 ctype
->class_b
[nr
][idx
>> 5] |= (uint32_t)1 << (idx
& 0x1f);
3820 for (nr
= 0; nr
< ctype
->nr_charclass
; nr
++)
3822 struct wctype_table t
;
3824 t
.p
= 4; /* or: 5 */
3825 t
.q
= 7; /* or: 6 */
3826 wctype_table_init (&t
);
3828 for (idx
= 0; idx
< ctype
->class_collection_act
; ++idx
)
3829 if (ctype
->class_collection
[idx
] & _ISwbit (nr
))
3830 wctype_table_add (&t
, ctype
->charnames
[idx
]);
3832 wctype_table_finalize (&t
);
3835 fprintf (stderr
, _("%s: table for class \"%s\": %lu bytes\n"),
3836 "LC_CTYPE", ctype
->classnames
[nr
],
3837 (unsigned long int) t
.result_size
);
3839 ctype
->class_3level
[nr
].iov_base
= t
.result
;
3840 ctype
->class_3level
[nr
].iov_len
= t
.result_size
;
3843 /* Room for table of mappings. */
3844 ctype
->map_b
= (uint32_t **) xmalloc (2 * sizeof (uint32_t *));
3845 ctype
->map32_b
= (uint32_t **) xmalloc (ctype
->map_collection_nr
3846 * sizeof (uint32_t *));
3847 ctype
->map_3level
= (struct iovec
*)
3848 xmalloc (ctype
->map_collection_nr
* sizeof (struct iovec
));
3850 /* Fill in all mappings. */
3851 for (idx
= 0; idx
< 2; ++idx
)
3855 /* Allocate table. */
3856 ctype
->map_b
[idx
] = (uint32_t *)
3857 xmalloc ((256 + 128) * sizeof (uint32_t));
3859 /* Copy values from collection. */
3860 for (idx2
= 0; idx2
< 256; ++idx2
)
3861 ctype
->map_b
[idx
][128 + idx2
] = ctype
->map256_collection
[idx
][idx2
];
3863 /* Mirror first 127 entries. We must take care not to map entry
3864 -1 because EOF == -1. */
3865 for (idx2
= 0; idx2
< 127; ++idx2
)
3866 ctype
->map_b
[idx
][idx2
] = ctype
->map_b
[idx
][256 + idx2
];
3868 /* EOF must map to EOF. */
3869 ctype
->map_b
[idx
][127] = EOF
;
3872 for (idx
= 0; idx
< ctype
->map_collection_nr
; ++idx
)
3876 /* Allocate table. */
3877 ctype
->map32_b
[idx
] = (uint32_t *) xmalloc (256 * sizeof (uint32_t));
3879 /* Copy values from collection. Default is identity mapping. */
3880 for (idx2
= 0; idx2
< 256; ++idx2
)
3881 ctype
->map32_b
[idx
][idx2
] =
3882 (ctype
->map_collection
[idx
][idx2
] != 0
3883 ? ctype
->map_collection
[idx
][idx2
]
3887 for (nr
= 0; nr
< ctype
->map_collection_nr
; nr
++)
3889 struct wctrans_table t
;
3893 wctrans_table_init (&t
);
3895 for (idx
= 0; idx
< ctype
->map_collection_act
[nr
]; ++idx
)
3896 if (ctype
->map_collection
[nr
][idx
] != 0)
3897 wctrans_table_add (&t
, ctype
->charnames
[idx
],
3898 ctype
->map_collection
[nr
][idx
]);
3900 wctrans_table_finalize (&t
);
3903 fprintf (stderr
, _("%s: table for map \"%s\": %lu bytes\n"),
3904 "LC_CTYPE", ctype
->mapnames
[nr
],
3905 (unsigned long int) t
.result_size
);
3907 ctype
->map_3level
[nr
].iov_base
= t
.result
;
3908 ctype
->map_3level
[nr
].iov_len
= t
.result_size
;
3911 /* Extra array for class and map names. */
3912 ctype
->class_name_ptr
= (uint32_t *) xmalloc (ctype
->nr_charclass
3913 * sizeof (uint32_t));
3914 ctype
->map_name_ptr
= (uint32_t *) xmalloc (ctype
->map_collection_nr
3915 * sizeof (uint32_t));
3917 ctype
->class_offset
= _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1
);
3918 ctype
->map_offset
= ctype
->class_offset
+ ctype
->nr_charclass
;
3920 /* Array for width information. Because the expected widths are very
3921 small (never larger than 2) we use only one single byte. This
3923 We put only printable characters in the table. wcwidth is specified
3924 to return -1 for non-printable characters. Doing the check here
3925 saves a run-time check.
3926 But we put L'\0' in the table. This again saves a run-time check. */
3928 struct wcwidth_table t
;
3932 wcwidth_table_init (&t
);
3934 /* First set all the printable characters of the character set to
3935 the default width. */
3937 while (iterate_table (&charmap
->char_table
, &curs
, &key
, &len
, &vdata
) == 0)
3939 struct charseq
*data
= (struct charseq
*) vdata
;
3941 if (data
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
3942 data
->ucs4
= repertoire_find_value (ctype
->repertoire
,
3945 if (data
->ucs4
!= ILLEGAL_CHAR_VALUE
)
3947 uint32_t *class_bits
=
3948 find_idx (ctype
, &ctype
->class_collection
, NULL
,
3949 &ctype
->class_collection_act
, data
->ucs4
);
3951 if (class_bits
!= NULL
&& (*class_bits
& BITw (tok_print
)))
3952 wcwidth_table_add (&t
, data
->ucs4
, charmap
->width_default
);
3956 /* Now add the explicitly specified widths. */
3957 if (charmap
->width_rules
!= NULL
)
3961 for (cnt
= 0; cnt
< charmap
->nwidth_rules
; ++cnt
)
3963 unsigned char bytes
[charmap
->mb_cur_max
];
3964 int nbytes
= charmap
->width_rules
[cnt
].from
->nbytes
;
3966 /* We have the range of character for which the width is
3967 specified described using byte sequences of the multibyte
3968 charset. We have to convert this to UCS4 now. And we
3969 cannot simply convert the beginning and the end of the
3970 sequence, we have to iterate over the byte sequence and
3971 convert it for every single character. */
3972 memcpy (bytes
, charmap
->width_rules
[cnt
].from
->bytes
, nbytes
);
3974 while (nbytes
< charmap
->width_rules
[cnt
].to
->nbytes
3975 || memcmp (bytes
, charmap
->width_rules
[cnt
].to
->bytes
,
3978 /* Find the UCS value for `bytes'. */
3981 struct charseq
*seq
=
3982 charmap_find_symbol (charmap
, bytes
, nbytes
);
3985 wch
= ILLEGAL_CHAR_VALUE
;
3986 else if (seq
->ucs4
!= UNINITIALIZED_CHAR_VALUE
)
3989 wch
= repertoire_find_value (ctype
->repertoire
, seq
->name
,
3990 strlen (seq
->name
));
3992 if (wch
!= ILLEGAL_CHAR_VALUE
)
3994 /* Store the value. */
3995 uint32_t *class_bits
=
3996 find_idx (ctype
, &ctype
->class_collection
, NULL
,
3997 &ctype
->class_collection_act
, wch
);
3999 if (class_bits
!= NULL
&& (*class_bits
& BITw (tok_print
)))
4000 wcwidth_table_add (&t
, wch
,
4001 charmap
->width_rules
[cnt
].width
);
4004 /* "Increment" the bytes sequence. */
4006 while (inner
>= 0 && bytes
[inner
] == 0xff)
4011 /* We have to extend the byte sequence. */
4012 if (nbytes
>= charmap
->width_rules
[cnt
].to
->nbytes
)
4016 memset (&bytes
[1], 0, nbytes
);
4022 while (++inner
< nbytes
)
4029 /* Set the width of L'\0' to 0. */
4030 wcwidth_table_add (&t
, 0, 0);
4032 wcwidth_table_finalize (&t
);
4035 fprintf (stderr
, _("%s: table for width: %lu bytes\n"),
4036 "LC_CTYPE", (unsigned long int) t
.result_size
);
4038 ctype
->width
.iov_base
= t
.result
;
4039 ctype
->width
.iov_len
= t
.result_size
;
4042 /* Set MB_CUR_MAX. */
4043 ctype
->mb_cur_max
= charmap
->mb_cur_max
;
4045 /* Now determine the table for the transliteration information.
4047 XXX It is not yet clear to me whether it is worth implementing a
4048 complicated algorithm which uses a hash table to locate the entries.
4049 For now I'll use a simple array which can be searching using binary
4051 if (ctype
->translit_include
!= NULL
)
4052 /* Traverse the locales mentioned in the `include' statements in a
4053 depth-first way and fold in their transliteration information. */
4054 translit_flatten (ctype
, charmap
, &ctype
->translit
);
4056 if (ctype
->translit
!= NULL
)
4058 /* First count how many entries we have. This is the upper limit
4059 since some entries from the included files might be overwritten. */
4062 struct translit_t
*runp
= ctype
->translit
;
4063 struct translit_t
**sorted
;
4064 size_t from_len
, to_len
;
4066 while (runp
!= NULL
)
4072 /* Next we allocate an array large enough and fill in the values. */
4073 sorted
= (struct translit_t
**) alloca (number
4074 * sizeof (struct translit_t
**));
4075 runp
= ctype
->translit
;
4079 /* Search for the place where to insert this string.
4080 XXX Better use a real sorting algorithm later. */
4084 while (idx
< number
)
4086 int res
= wcscmp ((const wchar_t *) sorted
[idx
]->from
,
4087 (const wchar_t *) runp
->from
);
4102 memmove (&sorted
[idx
+ 1], &sorted
[idx
],
4103 (number
- idx
) * sizeof (struct translit_t
*));
4110 while (runp
!= NULL
);
4112 /* The next step is putting all the possible transliteration
4113 strings in one memory block so that we can write it out.
4114 We need several different blocks:
4115 - index to the from-string array
4117 - index to the to-string array
4120 from_len
= to_len
= 0;
4121 for (cnt
= 0; cnt
< number
; ++cnt
)
4123 struct translit_to_t
*srunp
;
4124 from_len
+= wcslen ((const wchar_t *) sorted
[cnt
]->from
) + 1;
4125 srunp
= sorted
[cnt
]->to
;
4126 while (srunp
!= NULL
)
4128 to_len
+= wcslen ((const wchar_t *) srunp
->str
) + 1;
4129 srunp
= srunp
->next
;
4131 /* Plus one for the extra NUL character marking the end of
4132 the list for the current entry. */
4136 /* We can allocate the arrays for the results. */
4137 ctype
->translit_from_idx
= xmalloc (number
* sizeof (uint32_t));
4138 ctype
->translit_from_tbl
= xmalloc (from_len
* sizeof (uint32_t));
4139 ctype
->translit_to_idx
= xmalloc (number
* sizeof (uint32_t));
4140 ctype
->translit_to_tbl
= xmalloc (to_len
* sizeof (uint32_t));
4144 for (cnt
= 0; cnt
< number
; ++cnt
)
4147 struct translit_to_t
*srunp
;
4149 ctype
->translit_from_idx
[cnt
] = from_len
;
4150 ctype
->translit_to_idx
[cnt
] = to_len
;
4152 len
= wcslen ((const wchar_t *) sorted
[cnt
]->from
) + 1;
4153 wmemcpy ((wchar_t *) &ctype
->translit_from_tbl
[from_len
],
4154 (const wchar_t *) sorted
[cnt
]->from
, len
);
4157 ctype
->translit_to_idx
[cnt
] = to_len
;
4158 srunp
= sorted
[cnt
]->to
;
4159 while (srunp
!= NULL
)
4161 len
= wcslen ((const wchar_t *) srunp
->str
) + 1;
4162 wmemcpy ((wchar_t *) &ctype
->translit_to_tbl
[to_len
],
4163 (const wchar_t *) srunp
->str
, len
);
4165 srunp
= srunp
->next
;
4167 ctype
->translit_to_tbl
[to_len
++] = L
'\0';
4170 /* Store the information about the length. */
4171 ctype
->translit_idx_size
= number
;
4172 ctype
->translit_from_tbl_size
= from_len
* sizeof (uint32_t);
4173 ctype
->translit_to_tbl_size
= to_len
* sizeof (uint32_t);
4177 /* Provide some dummy pointers since we have nothing to write out. */
4178 static uint32_t no_str
= { 0 };
4180 ctype
->translit_from_idx
= &no_str
;
4181 ctype
->translit_from_tbl
= &no_str
;
4182 ctype
->translit_to_tbl
= &no_str
;
4183 ctype
->translit_idx_size
= 0;
4184 ctype
->translit_from_tbl_size
= 0;
4185 ctype
->translit_to_tbl_size
= 0;