1 /* Copyright (C) 1995-2005, 2006 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License version 2 as
7 published by the Free Software Foundation.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
34 #include "localedef.h"
36 #include "localeinfo.h"
38 #include "linereader.h"
39 #include "locfile-token.h"
45 #ifdef PREDEFINED_CLASSES
46 /* These are the extra bits not in wctype.h since these are not preallocated
48 # define _ISwspecial1 (1 << 29)
49 # define _ISwspecial2 (1 << 30)
50 # define _ISwspecial3 (1 << 31)
54 /* The bit used for representing a special class. */
55 #define BITPOS(class) ((class) - tok_upper)
56 #define BIT(class) (_ISbit (BITPOS (class)))
57 #define BITw(class) (_ISwbit (BITPOS (class)))
59 #define ELEM(ctype, collection, idx, value) \
60 *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \
61 &ctype->collection##_act idx, value)
64 /* To be compatible with former implementations we for now restrict
65 the number of bits for character classes to 16. When compatibility
66 is not necessary anymore increase the number to 32. */
67 #define char_class_t uint16_t
68 #define char_class32_t uint32_t
71 /* Type to describe a transliteration action. We have a possibly
72 multiple character from-string and a set of multiple character
73 to-strings. All are 32bit values since this is what is used in
74 the gconv functions. */
79 struct translit_to_t
*next
;
89 struct translit_to_t
*to
;
91 struct translit_t
*next
;
94 struct translit_ignore_t
103 struct translit_ignore_t
*next
;
107 /* Type to describe a transliteration include statement. */
108 struct translit_include_t
110 const char *copy_locale
;
111 const char *copy_repertoire
;
113 struct translit_include_t
*next
;
117 /* Sparse table of uint32_t. */
118 #define TABLE idx_table
119 #define ELEMENT uint32_t
120 #define DEFAULT ((uint32_t) ~0)
125 /* The real definition of the struct for the LC_CTYPE locale. */
126 struct locale_ctype_t
129 size_t charnames_max
;
130 size_t charnames_act
;
131 /* An index lookup table, to speedup find_idx. */
132 struct idx_table charnames_idx
;
134 struct repertoire_t
*repertoire
;
136 /* We will allow up to 8 * sizeof (uint32_t) character classes. */
137 #define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
139 const char *classnames
[MAX_NR_CHARCLASS
];
140 uint32_t last_class_char
;
141 uint32_t class256_collection
[256];
142 uint32_t *class_collection
;
143 size_t class_collection_max
;
144 size_t class_collection_act
;
146 uint32_t class_offset
;
148 struct charseq
**mbdigits
;
155 struct charseq
*mboutdigits
[10];
156 uint32_t wcoutdigits
[10];
157 size_t outdigits_act
;
159 /* If the following number ever turns out to be too small simply
160 increase it. But I doubt it will. --drepper@gnu */
161 #define MAX_NR_CHARMAP 16
162 const char *mapnames
[MAX_NR_CHARMAP
];
163 uint32_t *map_collection
[MAX_NR_CHARMAP
];
164 uint32_t map256_collection
[2][256];
165 size_t map_collection_max
[MAX_NR_CHARMAP
];
166 size_t map_collection_act
[MAX_NR_CHARMAP
];
167 size_t map_collection_nr
;
169 int tomap_done
[MAX_NR_CHARMAP
];
172 /* Transliteration information. */
173 struct translit_include_t
*translit_include
;
174 struct translit_t
*translit
;
175 struct translit_ignore_t
*translit_ignore
;
176 uint32_t ntranslit_ignore
;
178 uint32_t *default_missing
;
179 const char *default_missing_file
;
180 size_t default_missing_lineno
;
182 uint32_t to_nonascii
;
184 /* The arrays for the binary representation. */
185 char_class_t
*ctype_b
;
186 char_class32_t
*ctype32_b
;
190 struct iovec
*class_3level
;
191 struct iovec
*map_3level
;
192 uint32_t *class_name_ptr
;
193 uint32_t *map_name_ptr
;
196 const char *codeset_name
;
197 uint32_t *translit_from_idx
;
198 uint32_t *translit_from_tbl
;
199 uint32_t *translit_to_idx
;
200 uint32_t *translit_to_tbl
;
201 uint32_t translit_idx_size
;
202 size_t translit_from_tbl_size
;
203 size_t translit_to_tbl_size
;
205 struct obstack mempool
;
209 /* Marker for an empty slot. This has the value 0xFFFFFFFF, regardless
210 whether 'int' is 16 bit, 32 bit, or 64 bit. */
211 #define EMPTY ((uint32_t) ~0)
214 #define obstack_chunk_alloc xmalloc
215 #define obstack_chunk_free free
218 /* Prototypes for local functions. */
219 static void ctype_startup (struct linereader
*lr
, struct localedef_t
*locale
,
220 const struct charmap_t
*charmap
,
221 struct localedef_t
*copy_locale
,
223 static void ctype_class_new (struct linereader
*lr
,
224 struct locale_ctype_t
*ctype
, const char *name
);
225 static void ctype_map_new (struct linereader
*lr
,
226 struct locale_ctype_t
*ctype
,
227 const char *name
, const struct charmap_t
*charmap
);
228 static uint32_t *find_idx (struct locale_ctype_t
*ctype
, uint32_t **table
,
229 size_t *max
, size_t *act
, unsigned int idx
);
230 static void set_class_defaults (struct locale_ctype_t
*ctype
,
231 const struct charmap_t
*charmap
,
232 struct repertoire_t
*repertoire
);
233 static void allocate_arrays (struct locale_ctype_t
*ctype
,
234 const struct charmap_t
*charmap
,
235 struct repertoire_t
*repertoire
);
238 static const char *longnames
[] =
240 "zero", "one", "two", "three", "four",
241 "five", "six", "seven", "eight", "nine"
243 static const char *uninames
[] =
245 "U00000030", "U00000031", "U00000032", "U00000033", "U00000034",
246 "U00000035", "U00000036", "U00000037", "U00000038", "U00000039"
248 static const unsigned char digits
[] = "0123456789";
252 ctype_startup (struct linereader
*lr
, struct localedef_t
*locale
,
253 const struct charmap_t
*charmap
,
254 struct localedef_t
*copy_locale
, int ignore_content
)
257 struct locale_ctype_t
*ctype
;
259 if (!ignore_content
&& locale
->categories
[LC_CTYPE
].ctype
== NULL
)
261 if (copy_locale
== NULL
)
263 /* Allocate the needed room. */
264 locale
->categories
[LC_CTYPE
].ctype
= ctype
=
265 (struct locale_ctype_t
*) xcalloc (1,
266 sizeof (struct locale_ctype_t
));
268 /* We have seen no names yet. */
269 ctype
->charnames_max
= charmap
->mb_cur_max
== 1 ? 256 : 512;
271 (unsigned int *) xmalloc (ctype
->charnames_max
272 * sizeof (unsigned int));
273 for (cnt
= 0; cnt
< 256; ++cnt
)
274 ctype
->charnames
[cnt
] = cnt
;
275 ctype
->charnames_act
= 256;
276 idx_table_init (&ctype
->charnames_idx
);
278 /* Fill character class information. */
279 ctype
->last_class_char
= ILLEGAL_CHAR_VALUE
;
280 /* The order of the following instructions determines the bit
282 ctype_class_new (lr
, ctype
, "upper");
283 ctype_class_new (lr
, ctype
, "lower");
284 ctype_class_new (lr
, ctype
, "alpha");
285 ctype_class_new (lr
, ctype
, "digit");
286 ctype_class_new (lr
, ctype
, "xdigit");
287 ctype_class_new (lr
, ctype
, "space");
288 ctype_class_new (lr
, ctype
, "print");
289 ctype_class_new (lr
, ctype
, "graph");
290 ctype_class_new (lr
, ctype
, "blank");
291 ctype_class_new (lr
, ctype
, "cntrl");
292 ctype_class_new (lr
, ctype
, "punct");
293 ctype_class_new (lr
, ctype
, "alnum");
294 #ifdef PREDEFINED_CLASSES
295 /* The following are extensions from ISO 14652. */
296 ctype_class_new (lr
, ctype
, "left_to_right");
297 ctype_class_new (lr
, ctype
, "right_to_left");
298 ctype_class_new (lr
, ctype
, "num_terminator");
299 ctype_class_new (lr
, ctype
, "num_separator");
300 ctype_class_new (lr
, ctype
, "segment_separator");
301 ctype_class_new (lr
, ctype
, "block_separator");
302 ctype_class_new (lr
, ctype
, "direction_control");
303 ctype_class_new (lr
, ctype
, "sym_swap_layout");
304 ctype_class_new (lr
, ctype
, "char_shape_selector");
305 ctype_class_new (lr
, ctype
, "num_shape_selector");
306 ctype_class_new (lr
, ctype
, "non_spacing");
307 ctype_class_new (lr
, ctype
, "non_spacing_level3");
308 ctype_class_new (lr
, ctype
, "normal_connect");
309 ctype_class_new (lr
, ctype
, "r_connect");
310 ctype_class_new (lr
, ctype
, "no_connect");
311 ctype_class_new (lr
, ctype
, "no_connect-space");
312 ctype_class_new (lr
, ctype
, "vowel_connect");
315 ctype
->class_collection_max
= charmap
->mb_cur_max
== 1 ? 256 : 512;
316 ctype
->class_collection
317 = (uint32_t *) xcalloc (sizeof (unsigned long int),
318 ctype
->class_collection_max
);
319 ctype
->class_collection_act
= 256;
321 /* Fill character map information. */
322 ctype
->last_map_idx
= MAX_NR_CHARMAP
;
323 ctype_map_new (lr
, ctype
, "toupper", charmap
);
324 ctype_map_new (lr
, ctype
, "tolower", charmap
);
325 #ifdef PREDEFINED_CLASSES
326 ctype_map_new (lr
, ctype
, "tosymmetric", charmap
);
329 /* Fill first 256 entries in `toXXX' arrays. */
330 for (cnt
= 0; cnt
< 256; ++cnt
)
332 ctype
->map_collection
[0][cnt
] = cnt
;
333 ctype
->map_collection
[1][cnt
] = cnt
;
334 #ifdef PREDEFINED_CLASSES
335 ctype
->map_collection
[2][cnt
] = cnt
;
337 ctype
->map256_collection
[0][cnt
] = cnt
;
338 ctype
->map256_collection
[1][cnt
] = cnt
;
341 if (enc_not_ascii_compatible
)
342 ctype
->to_nonascii
= 1;
344 obstack_init (&ctype
->mempool
);
347 ctype
= locale
->categories
[LC_CTYPE
].ctype
=
348 copy_locale
->categories
[LC_CTYPE
].ctype
;
354 ctype_finish (struct localedef_t
*locale
, const struct charmap_t
*charmap
)
356 /* See POSIX.2, table 2-6 for the meaning of the following table. */
361 const char allow
[NCLASS
];
363 valid_table
[NCLASS
] =
365 /* The order is important. See token.h for more information.
366 M = Always, D = Default, - = Permitted, X = Mutually exclusive */
367 { "upper", "--MX-XDDXXX-" },
368 { "lower", "--MX-XDDXXX-" },
369 { "alpha", "---X-XDDXXX-" },
370 { "digit", "XXX--XDDXXX-" },
371 { "xdigit", "-----XDDXXX-" },
372 { "space", "XXXXX------X" },
373 { "print", "---------X--" },
374 { "graph", "---------X--" },
375 { "blank", "XXXXXM-----X" },
376 { "cntrl", "XXXXX-XX--XX" },
377 { "punct", "XXXXX-DD-X-X" },
378 { "alnum", "-----XDDXXX-" }
382 uint32_t space_value
;
383 struct charseq
*space_seq
;
384 struct locale_ctype_t
*ctype
= locale
->categories
[LC_CTYPE
].ctype
;
391 /* Now resolve copying and also handle completely missing definitions. */
394 const char *repertoire_name
;
396 /* First see whether we were supposed to copy. If yes, find the
397 actual definition. */
398 if (locale
->copy_name
[LC_CTYPE
] != NULL
)
400 /* Find the copying locale. This has to happen transitively since
401 the locale we are copying from might also copying another one. */
402 struct localedef_t
*from
= locale
;
405 from
= find_locale (LC_CTYPE
, from
->copy_name
[LC_CTYPE
],
406 from
->repertoire_name
, charmap
);
407 while (from
->categories
[LC_CTYPE
].ctype
== NULL
408 && from
->copy_name
[LC_CTYPE
] != NULL
);
410 ctype
= locale
->categories
[LC_CTYPE
].ctype
411 = from
->categories
[LC_CTYPE
].ctype
;
414 /* If there is still no definition issue an warning and create an
419 WITH_CUR_LOCALE (error (0, 0, _("\
420 No definition for %s category found"), "LC_CTYPE"));
421 ctype_startup (NULL
, locale
, charmap
, NULL
, 0);
422 ctype
= locale
->categories
[LC_CTYPE
].ctype
;
425 /* Get the repertoire we have to use. */
426 repertoire_name
= locale
->repertoire_name
?: repertoire_global
;
427 if (repertoire_name
!= NULL
)
428 ctype
->repertoire
= repertoire_read (repertoire_name
);
431 /* We need the name of the currently used 8-bit character set to
432 make correct conversion between this 8-bit representation and the
433 ISO 10646 character set used internally for wide characters. */
434 ctype
->codeset_name
= charmap
->code_set_name
;
435 if (ctype
->codeset_name
== NULL
)
438 WITH_CUR_LOCALE (error (0, 0, _("\
439 No character set name specified in charmap")));
440 ctype
->codeset_name
= "//UNKNOWN//";
443 /* Set default value for classes not specified. */
444 set_class_defaults (ctype
, charmap
, ctype
->repertoire
);
446 /* Check according to table. */
447 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
449 uint32_t tmp
= ctype
->class_collection
[cnt
];
453 for (cls1
= 0; cls1
< NCLASS
; ++cls1
)
454 if ((tmp
& _ISwbit (cls1
)) != 0)
455 for (cls2
= 0; cls2
< NCLASS
; ++cls2
)
456 if (valid_table
[cls1
].allow
[cls2
] != '-')
458 int eq
= (tmp
& _ISwbit (cls2
)) != 0;
459 switch (valid_table
[cls1
].allow
[cls2
])
464 uint32_t value
= ctype
->charnames
[cnt
];
467 WITH_CUR_LOCALE (error (0, 0, _("\
468 character L'\\u%0*x' in class `%s' must be in class `%s'"),
469 value
> 0xffff ? 8 : 4,
471 valid_table
[cls1
].name
,
472 valid_table
[cls2
].name
));
479 uint32_t value
= ctype
->charnames
[cnt
];
482 WITH_CUR_LOCALE (error (0, 0, _("\
483 character L'\\u%0*x' in class `%s' must not be in class `%s'"),
484 value
> 0xffff ? 8 : 4,
486 valid_table
[cls1
].name
,
487 valid_table
[cls2
].name
));
492 ctype
->class_collection
[cnt
] |= _ISwbit (cls2
);
496 WITH_CUR_LOCALE (error (5, 0, _("\
497 internal error in %s, line %u"), __FUNCTION__
, __LINE__
));
503 for (cnt
= 0; cnt
< 256; ++cnt
)
505 uint32_t tmp
= ctype
->class256_collection
[cnt
];
509 for (cls1
= 0; cls1
< NCLASS
; ++cls1
)
510 if ((tmp
& _ISbit (cls1
)) != 0)
511 for (cls2
= 0; cls2
< NCLASS
; ++cls2
)
512 if (valid_table
[cls1
].allow
[cls2
] != '-')
514 int eq
= (tmp
& _ISbit (cls2
)) != 0;
515 switch (valid_table
[cls1
].allow
[cls2
])
522 snprintf (buf
, sizeof buf
, "\\%Zo", cnt
);
525 WITH_CUR_LOCALE (error (0, 0, _("\
526 character '%s' in class `%s' must be in class `%s'"),
528 valid_table
[cls1
].name
,
529 valid_table
[cls2
].name
));
538 snprintf (buf
, sizeof buf
, "\\%Zo", cnt
);
541 WITH_CUR_LOCALE (error (0, 0, _("\
542 character '%s' in class `%s' must not be in class `%s'"),
544 valid_table
[cls1
].name
,
545 valid_table
[cls2
].name
));
550 ctype
->class256_collection
[cnt
] |= _ISbit (cls2
);
554 WITH_CUR_LOCALE (error (5, 0, _("\
555 internal error in %s, line %u"), __FUNCTION__
, __LINE__
));
561 /* ... and now test <SP> as a special case. */
563 if (((cnt
= BITPOS (tok_space
),
564 (ELEM (ctype
, class_collection
, , space_value
)
565 & BITw (tok_space
)) == 0)
566 || (cnt
= BITPOS (tok_blank
),
567 (ELEM (ctype
, class_collection
, , space_value
)
568 & BITw (tok_blank
)) == 0)))
571 WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"),
572 valid_table
[cnt
].name
));
574 else if (((cnt
= BITPOS (tok_punct
),
575 (ELEM (ctype
, class_collection
, , space_value
)
576 & BITw (tok_punct
)) != 0)
577 || (cnt
= BITPOS (tok_graph
),
578 (ELEM (ctype
, class_collection
, , space_value
)
583 WITH_CUR_LOCALE (error (0, 0, _("\
584 <SP> character must not be in class `%s'"),
585 valid_table
[cnt
].name
));
588 ELEM (ctype
, class_collection
, , space_value
) |= BITw (tok_print
);
590 space_seq
= charmap_find_value (charmap
, "SP", 2);
591 if (space_seq
== NULL
)
592 space_seq
= charmap_find_value (charmap
, "space", 5);
593 if (space_seq
== NULL
)
594 space_seq
= charmap_find_value (charmap
, "U00000020", 9);
595 if (space_seq
== NULL
|| space_seq
->nbytes
!= 1)
598 WITH_CUR_LOCALE (error (0, 0, _("\
599 character <SP> not defined in character map")));
601 else if (((cnt
= BITPOS (tok_space
),
602 (ctype
->class256_collection
[space_seq
->bytes
[0]]
603 & BIT (tok_space
)) == 0)
604 || (cnt
= BITPOS (tok_blank
),
605 (ctype
->class256_collection
[space_seq
->bytes
[0]]
606 & BIT (tok_blank
)) == 0)))
609 WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"),
610 valid_table
[cnt
].name
));
612 else if (((cnt
= BITPOS (tok_punct
),
613 (ctype
->class256_collection
[space_seq
->bytes
[0]]
614 & BIT (tok_punct
)) != 0)
615 || (cnt
= BITPOS (tok_graph
),
616 (ctype
->class256_collection
[space_seq
->bytes
[0]]
617 & BIT (tok_graph
)) != 0)))
620 WITH_CUR_LOCALE (error (0, 0, _("\
621 <SP> character must not be in class `%s'"),
622 valid_table
[cnt
].name
));
625 ctype
->class256_collection
[space_seq
->bytes
[0]] |= BIT (tok_print
);
627 /* Now that the tests are done make sure the name array contains all
628 characters which are handled in the WIDTH section of the
629 character set definition file. */
630 if (charmap
->width_rules
!= NULL
)
631 for (cnt
= 0; cnt
< charmap
->nwidth_rules
; ++cnt
)
633 unsigned char bytes
[charmap
->mb_cur_max
];
634 int nbytes
= charmap
->width_rules
[cnt
].from
->nbytes
;
636 /* We have the range of character for which the width is
637 specified described using byte sequences of the multibyte
638 charset. We have to convert this to UCS4 now. And we
639 cannot simply convert the beginning and the end of the
640 sequence, we have to iterate over the byte sequence and
641 convert it for every single character. */
642 memcpy (bytes
, charmap
->width_rules
[cnt
].from
->bytes
, nbytes
);
644 while (nbytes
< charmap
->width_rules
[cnt
].to
->nbytes
645 || memcmp (bytes
, charmap
->width_rules
[cnt
].to
->bytes
,
648 /* Find the UCS value for `bytes'. */
651 struct charseq
*seq
= charmap_find_symbol (charmap
, bytes
, nbytes
);
654 wch
= ILLEGAL_CHAR_VALUE
;
655 else if (seq
->ucs4
!= UNINITIALIZED_CHAR_VALUE
)
658 wch
= repertoire_find_value (ctype
->repertoire
, seq
->name
,
661 if (wch
!= ILLEGAL_CHAR_VALUE
)
662 /* We are only interested in the side-effects of the
663 `find_idx' call. It will add appropriate entries in
664 the name array if this is necessary. */
665 (void) find_idx (ctype
, NULL
, NULL
, NULL
, wch
);
667 /* "Increment" the bytes sequence. */
669 while (inner
>= 0 && bytes
[inner
] == 0xff)
674 /* We have to extend the byte sequence. */
675 if (nbytes
>= charmap
->width_rules
[cnt
].to
->nbytes
)
679 memset (&bytes
[1], 0, nbytes
);
685 while (++inner
< nbytes
)
691 /* Now set all the other characters of the character set to the
694 while (iterate_table (&charmap
->char_table
, &curs
, &key
, &len
, &vdata
) == 0)
696 struct charseq
*data
= (struct charseq
*) vdata
;
698 if (data
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
699 data
->ucs4
= repertoire_find_value (ctype
->repertoire
,
702 if (data
->ucs4
!= ILLEGAL_CHAR_VALUE
)
703 (void) find_idx (ctype
, NULL
, NULL
, NULL
, data
->ucs4
);
706 /* There must be a multiple of 10 digits. */
707 if (ctype
->mbdigits_act
% 10 != 0)
709 assert (ctype
->mbdigits_act
== ctype
->wcdigits_act
);
710 ctype
->wcdigits_act
-= ctype
->mbdigits_act
% 10;
711 ctype
->mbdigits_act
-= ctype
->mbdigits_act
% 10;
712 WITH_CUR_LOCALE (error (0, 0, _("\
713 `digit' category has not entries in groups of ten")));
716 /* Check the input digits. There must be a multiple of ten available.
717 In each group it could be that one or the other character is missing.
718 In this case the whole group must be removed. */
720 while (cnt
< ctype
->mbdigits_act
)
723 for (inner
= 0; inner
< 10; ++inner
)
724 if (ctype
->mbdigits
[cnt
+ inner
] == NULL
)
731 /* Remove the group. */
732 memmove (&ctype
->mbdigits
[cnt
], &ctype
->mbdigits
[cnt
+ 10],
733 ((ctype
->wcdigits_act
- cnt
- 10)
734 * sizeof (ctype
->mbdigits
[0])));
735 ctype
->mbdigits_act
-= 10;
739 /* If no input digits are given use the default. */
740 if (ctype
->mbdigits_act
== 0)
742 if (ctype
->mbdigits_max
== 0)
744 ctype
->mbdigits
= obstack_alloc (&((struct charmap_t
*) charmap
)->mem_pool
,
745 10 * sizeof (struct charseq
*));
746 ctype
->mbdigits_max
= 10;
749 for (cnt
= 0; cnt
< 10; ++cnt
)
751 ctype
->mbdigits
[cnt
] = charmap_find_symbol (charmap
,
753 if (ctype
->mbdigits
[cnt
] == NULL
)
755 ctype
->mbdigits
[cnt
] = charmap_find_symbol (charmap
,
757 strlen (longnames
[cnt
]));
758 if (ctype
->mbdigits
[cnt
] == NULL
)
760 /* Hum, this ain't good. */
761 WITH_CUR_LOCALE (error (0, 0, _("\
762 no input digits defined and none of the standard names in the charmap")));
764 ctype
->mbdigits
[cnt
] = obstack_alloc (&((struct charmap_t
*) charmap
)->mem_pool
,
765 sizeof (struct charseq
) + 1);
767 /* This is better than nothing. */
768 ctype
->mbdigits
[cnt
]->bytes
[0] = digits
[cnt
];
769 ctype
->mbdigits
[cnt
]->nbytes
= 1;
774 ctype
->mbdigits_act
= 10;
777 /* Check the wide character input digits. There must be a multiple
778 of ten available. In each group it could be that one or the other
779 character is missing. In this case the whole group must be
782 while (cnt
< ctype
->wcdigits_act
)
785 for (inner
= 0; inner
< 10; ++inner
)
786 if (ctype
->wcdigits
[cnt
+ inner
] == ILLEGAL_CHAR_VALUE
)
793 /* Remove the group. */
794 memmove (&ctype
->wcdigits
[cnt
], &ctype
->wcdigits
[cnt
+ 10],
795 ((ctype
->wcdigits_act
- cnt
- 10)
796 * sizeof (ctype
->wcdigits
[0])));
797 ctype
->wcdigits_act
-= 10;
801 /* If no input digits are given use the default. */
802 if (ctype
->wcdigits_act
== 0)
804 if (ctype
->wcdigits_max
== 0)
806 ctype
->wcdigits
= obstack_alloc (&((struct charmap_t
*) charmap
)->mem_pool
,
807 10 * sizeof (uint32_t));
808 ctype
->wcdigits_max
= 10;
811 for (cnt
= 0; cnt
< 10; ++cnt
)
812 ctype
->wcdigits
[cnt
] = L
'0' + cnt
;
814 ctype
->mbdigits_act
= 10;
817 /* Check the outdigits. */
819 for (cnt
= 0; cnt
< 10; ++cnt
)
820 if (ctype
->mboutdigits
[cnt
] == NULL
)
822 static struct charseq replace
[2];
826 WITH_CUR_LOCALE (error (0, 0, _("\
827 not all characters used in `outdigit' are available in the charmap")));
831 replace
[0].nbytes
= 1;
832 replace
[0].bytes
[0] = '?';
833 replace
[0].bytes
[1] = '\0';
834 ctype
->mboutdigits
[cnt
] = &replace
[0];
838 for (cnt
= 0; cnt
< 10; ++cnt
)
839 if (ctype
->wcoutdigits
[cnt
] == 0)
843 WITH_CUR_LOCALE (error (0, 0, _("\
844 not all characters used in `outdigit' are available in the repertoire")));
848 ctype
->wcoutdigits
[cnt
] = L
'?';
851 /* Sort the entries in the translit_ignore list. */
852 if (ctype
->translit_ignore
!= NULL
)
854 struct translit_ignore_t
*firstp
= ctype
->translit_ignore
;
855 struct translit_ignore_t
*runp
;
857 ctype
->ntranslit_ignore
= 1;
859 for (runp
= firstp
->next
; runp
!= NULL
; runp
= runp
->next
)
861 struct translit_ignore_t
*lastp
= NULL
;
862 struct translit_ignore_t
*cmpp
;
864 ++ctype
->ntranslit_ignore
;
866 for (cmpp
= firstp
; cmpp
!= NULL
; lastp
= cmpp
, cmpp
= cmpp
->next
)
867 if (runp
->from
< cmpp
->from
)
875 ctype
->translit_ignore
= firstp
;
881 ctype_output (struct localedef_t
*locale
, const struct charmap_t
*charmap
,
882 const char *output_path
)
884 static const char nulbytes
[4] = { 0, 0, 0, 0 };
885 struct locale_ctype_t
*ctype
= locale
->categories
[LC_CTYPE
].ctype
;
886 const size_t nelems
= (_NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1
)
887 + ctype
->nr_charclass
+ ctype
->map_collection_nr
);
888 struct iovec
*iov
= alloca (sizeof *iov
889 * (2 + nelems
+ 2 * ctype
->nr_charclass
890 + ctype
->map_collection_nr
+ 4));
891 struct locale_file data
;
892 uint32_t *idx
= alloca (sizeof *idx
* (nelems
+ 1));
893 uint32_t default_missing_len
;
894 size_t elem
, cnt
, offset
, total
;
897 /* Now prepare the output: Find the sizes of the table we can use. */
898 allocate_arrays (ctype
, charmap
, ctype
->repertoire
);
900 data
.magic
= LIMAGIC (LC_CTYPE
);
902 iov
[0].iov_base
= (void *) &data
;
903 iov
[0].iov_len
= sizeof (data
);
905 iov
[1].iov_base
= (void *) idx
;
906 iov
[1].iov_len
= nelems
* sizeof (uint32_t);
908 idx
[0] = iov
[0].iov_len
+ iov
[1].iov_len
;
911 for (elem
= 0; elem
< nelems
; ++elem
)
913 if (elem
< _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1
))
916 #define CTYPE_EMPTY(name) \
918 iov[2 + elem + offset].iov_base = NULL; \
919 iov[2 + elem + offset].iov_len = 0; \
920 idx[elem + 1] = idx[elem]; \
923 CTYPE_EMPTY(_NL_CTYPE_GAP1
);
924 CTYPE_EMPTY(_NL_CTYPE_GAP2
);
925 CTYPE_EMPTY(_NL_CTYPE_GAP3
);
926 CTYPE_EMPTY(_NL_CTYPE_GAP4
);
927 CTYPE_EMPTY(_NL_CTYPE_GAP5
);
928 CTYPE_EMPTY(_NL_CTYPE_GAP6
);
930 #define CTYPE_DATA(name, base, len) \
931 case _NL_ITEM_INDEX (name): \
932 iov[2 + elem + offset].iov_base = (base); \
933 iov[2 + elem + offset].iov_len = (len); \
934 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; \
937 CTYPE_DATA (_NL_CTYPE_CLASS
,
939 (256 + 128) * sizeof (char_class_t
));
941 CTYPE_DATA (_NL_CTYPE_TOUPPER
,
943 (256 + 128) * sizeof (uint32_t));
944 CTYPE_DATA (_NL_CTYPE_TOLOWER
,
946 (256 + 128) * sizeof (uint32_t));
948 CTYPE_DATA (_NL_CTYPE_TOUPPER32
,
950 256 * sizeof (uint32_t));
951 CTYPE_DATA (_NL_CTYPE_TOLOWER32
,
953 256 * sizeof (uint32_t));
955 CTYPE_DATA (_NL_CTYPE_CLASS32
,
957 256 * sizeof (char_class32_t
));
959 CTYPE_DATA (_NL_CTYPE_CLASS_OFFSET
,
960 &ctype
->class_offset
, sizeof (uint32_t));
962 CTYPE_DATA (_NL_CTYPE_MAP_OFFSET
,
963 &ctype
->map_offset
, sizeof (uint32_t));
965 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TAB_SIZE
,
966 &ctype
->translit_idx_size
, sizeof (uint32_t));
968 CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_IDX
,
969 ctype
->translit_from_idx
,
970 ctype
->translit_idx_size
* sizeof (uint32_t));
972 CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_TBL
,
973 ctype
->translit_from_tbl
,
974 ctype
->translit_from_tbl_size
);
976 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_IDX
,
977 ctype
->translit_to_idx
,
978 ctype
->translit_idx_size
* sizeof (uint32_t));
980 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_TBL
,
981 ctype
->translit_to_tbl
, ctype
->translit_to_tbl_size
);
983 case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES
):
984 /* The class name array. */
986 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
, ++offset
)
988 iov
[2 + elem
+ offset
].iov_base
989 = (void *) ctype
->classnames
[cnt
];
990 iov
[2 + elem
+ offset
].iov_len
991 = strlen (ctype
->classnames
[cnt
]) + 1;
992 total
+= iov
[2 + elem
+ offset
].iov_len
;
994 iov
[2 + elem
+ offset
].iov_base
= (void *) nulbytes
;
995 iov
[2 + elem
+ offset
].iov_len
= 4 - (total
% 4);
996 total
+= 4 - (total
% 4);
998 idx
[elem
+ 1] = idx
[elem
] + total
;
1001 case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES
):
1002 /* The class name array. */
1004 for (cnt
= 0; cnt
< ctype
->map_collection_nr
; ++cnt
, ++offset
)
1006 iov
[2 + elem
+ offset
].iov_base
1007 = (void *) ctype
->mapnames
[cnt
];
1008 iov
[2 + elem
+ offset
].iov_len
1009 = strlen (ctype
->mapnames
[cnt
]) + 1;
1010 total
+= iov
[2 + elem
+ offset
].iov_len
;
1012 iov
[2 + elem
+ offset
].iov_base
= (void *) nulbytes
;
1013 iov
[2 + elem
+ offset
].iov_len
= 4 - (total
% 4);
1014 total
+= 4 - (total
% 4);
1016 idx
[elem
+ 1] = idx
[elem
] + total
;
1019 CTYPE_DATA (_NL_CTYPE_WIDTH
,
1020 ctype
->width
.iov_base
,
1021 ctype
->width
.iov_len
);
1023 CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX
,
1024 &ctype
->mb_cur_max
, sizeof (uint32_t));
1026 case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME
):
1027 total
= strlen (ctype
->codeset_name
) + 1;
1029 iov
[2 + elem
+ offset
].iov_base
= (char *) ctype
->codeset_name
;
1032 iov
[2 + elem
+ offset
].iov_base
= alloca ((total
+ 3) & ~3);
1033 memset (mempcpy (iov
[2 + elem
+ offset
].iov_base
,
1034 ctype
->codeset_name
, total
),
1035 '\0', 4 - (total
& 3));
1036 total
= (total
+ 3) & ~3;
1038 iov
[2 + elem
+ offset
].iov_len
= total
;
1039 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1043 CTYPE_DATA (_NL_CTYPE_MAP_TO_NONASCII
,
1044 &ctype
->to_nonascii
, sizeof (uint32_t));
1046 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN
):
1047 iov
[2 + elem
+ offset
].iov_base
= alloca (sizeof (uint32_t));
1048 iov
[2 + elem
+ offset
].iov_len
= sizeof (uint32_t);
1049 *(uint32_t *) iov
[2 + elem
+ offset
].iov_base
=
1050 ctype
->mbdigits_act
/ 10;
1051 idx
[elem
+ 1] = idx
[elem
] + sizeof (uint32_t);
1054 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN
):
1055 /* Align entries. */
1056 iov
[2 + elem
+ offset
].iov_base
= (void *) nulbytes
;
1057 iov
[2 + elem
+ offset
].iov_len
= (4 - idx
[elem
] % 4) % 4;
1058 idx
[elem
] += iov
[2 + elem
+ offset
].iov_len
;
1061 iov
[2 + elem
+ offset
].iov_base
= alloca (sizeof (uint32_t));
1062 iov
[2 + elem
+ offset
].iov_len
= sizeof (uint32_t);
1063 *(uint32_t *) iov
[2 + elem
+ offset
].iov_base
=
1064 ctype
->wcdigits_act
/ 10;
1065 idx
[elem
+ 1] = idx
[elem
] + sizeof (uint32_t);
1068 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB
) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB
):
1069 /* Compute the length of all possible characters. For INDIGITS
1070 there might be more than one. We simply concatenate all of
1071 them with a NUL byte following. The NUL byte wouldn't be
1072 necessary but it makes it easier for the user. */
1075 for (cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB
);
1076 cnt
< ctype
->mbdigits_act
; cnt
+= 10)
1077 total
+= ctype
->mbdigits
[cnt
]->nbytes
+ 1;
1078 iov
[2 + elem
+ offset
].iov_base
= (char *) alloca (total
);
1079 iov
[2 + elem
+ offset
].iov_len
= total
;
1081 cp
= iov
[2 + elem
+ offset
].iov_base
;
1082 for (cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB
);
1083 cnt
< ctype
->mbdigits_act
; cnt
+= 10)
1085 cp
= mempcpy (cp
, ctype
->mbdigits
[cnt
]->bytes
,
1086 ctype
->mbdigits
[cnt
]->nbytes
);
1089 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1092 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB
) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB
):
1093 /* Compute the length of all possible characters. For INDIGITS
1094 there might be more than one. We simply concatenate all of
1095 them with a NUL byte following. The NUL byte wouldn't be
1096 necessary but it makes it easier for the user. */
1097 cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB
);
1098 total
= ctype
->mboutdigits
[cnt
]->nbytes
+ 1;
1099 iov
[2 + elem
+ offset
].iov_base
= (char *) alloca (total
);
1100 iov
[2 + elem
+ offset
].iov_len
= total
;
1102 *(char *) mempcpy (iov
[2 + elem
+ offset
].iov_base
,
1103 ctype
->mboutdigits
[cnt
]->bytes
,
1104 ctype
->mboutdigits
[cnt
]->nbytes
) = '\0';
1105 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1108 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC
) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC
):
1109 total
= ctype
->wcdigits_act
/ 10;
1111 iov
[2 + elem
+ offset
].iov_base
=
1112 (uint32_t *) alloca (total
* sizeof (uint32_t));
1113 iov
[2 + elem
+ offset
].iov_len
= total
* sizeof (uint32_t);
1115 for (cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC
);
1116 cnt
< ctype
->wcdigits_act
; cnt
+= 10)
1117 ((uint32_t *) iov
[2 + elem
+ offset
].iov_base
)[cnt
/ 10]
1118 = ctype
->wcdigits
[cnt
];
1119 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1122 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC
):
1123 /* Align entries. */
1124 iov
[2 + elem
+ offset
].iov_base
= (void *) nulbytes
;
1125 iov
[2 + elem
+ offset
].iov_len
= (4 - idx
[elem
] % 4) % 4;
1126 idx
[elem
] += iov
[2 + elem
+ offset
].iov_len
;
1130 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT1_WC
) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC
):
1131 cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC
);
1132 iov
[2 + elem
+ offset
].iov_base
= &ctype
->wcoutdigits
[cnt
];
1133 iov
[2 + elem
+ offset
].iov_len
= sizeof (uint32_t);
1134 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1137 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN
):
1138 /* Align entries. */
1139 iov
[2 + elem
+ offset
].iov_base
= (void *) nulbytes
;
1140 iov
[2 + elem
+ offset
].iov_len
= (4 - idx
[elem
] % 4) % 4;
1141 idx
[elem
] += iov
[2 + elem
+ offset
].iov_len
;
1144 default_missing_len
= (ctype
->default_missing
1145 ? wcslen ((wchar_t *)ctype
->default_missing
)
1147 iov
[2 + elem
+ offset
].iov_base
= &default_missing_len
;
1148 iov
[2 + elem
+ offset
].iov_len
= sizeof (uint32_t);
1149 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1152 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING
):
1153 iov
[2 + elem
+ offset
].iov_base
=
1154 ctype
->default_missing
?: (uint32_t *) L
"";
1155 iov
[2 + elem
+ offset
].iov_len
=
1156 wcslen (iov
[2 + elem
+ offset
].iov_base
) * sizeof (uint32_t);
1157 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1160 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE_LEN
):
1161 /* Align entries. */
1162 iov
[2 + elem
+ offset
].iov_base
= (void *) nulbytes
;
1163 iov
[2 + elem
+ offset
].iov_len
= (4 - idx
[elem
] % 4) % 4;
1164 idx
[elem
] += iov
[2 + elem
+ offset
].iov_len
;
1167 iov
[2 + elem
+ offset
].iov_base
= &ctype
->ntranslit_ignore
;
1168 iov
[2 + elem
+ offset
].iov_len
= sizeof (uint32_t);
1169 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1172 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE
):
1174 uint32_t *ranges
= (uint32_t *) alloca (ctype
->ntranslit_ignore
1175 * 3 * sizeof (uint32_t));
1176 struct translit_ignore_t
*runp
;
1178 iov
[2 + elem
+ offset
].iov_base
= ranges
;
1179 iov
[2 + elem
+ offset
].iov_len
= (ctype
->ntranslit_ignore
1180 * 3 * sizeof (uint32_t));
1182 for (runp
= ctype
->translit_ignore
; runp
!= NULL
;
1185 *ranges
++ = runp
->from
;
1186 *ranges
++ = runp
->to
;
1187 *ranges
++ = runp
->step
;
1190 /* Remove the following line in case a new entry is added
1191 after _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN. */
1193 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1197 assert (! "unknown CTYPE element");
1201 /* Handle extra maps. */
1202 size_t nr
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1
);
1203 if (nr
< ctype
->nr_charclass
)
1205 iov
[2 + elem
+ offset
].iov_base
= ctype
->class_b
[nr
];
1206 iov
[2 + elem
+ offset
].iov_len
= 256 / 32 * sizeof (uint32_t);
1207 idx
[elem
] += iov
[2 + elem
+ offset
].iov_len
;
1210 iov
[2 + elem
+ offset
] = ctype
->class_3level
[nr
];
1214 nr
-= ctype
->nr_charclass
;
1215 assert (nr
< ctype
->map_collection_nr
);
1216 iov
[2 + elem
+ offset
] = ctype
->map_3level
[nr
];
1218 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1222 assert (2 + elem
+ offset
== (nelems
+ 2 * ctype
->nr_charclass
1223 + ctype
->map_collection_nr
+ 4 + 2));
1225 write_locale_data (output_path
, LC_CTYPE
, "LC_CTYPE", 2 + elem
+ offset
,
1230 /* Local functions. */
1232 ctype_class_new (struct linereader
*lr
, struct locale_ctype_t
*ctype
,
1237 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
)
1238 if (strcmp (ctype
->classnames
[cnt
], name
) == 0)
1241 if (cnt
< ctype
->nr_charclass
)
1243 lr_error (lr
, _("character class `%s' already defined"), name
);
1247 if (ctype
->nr_charclass
== MAX_NR_CHARCLASS
)
1248 /* Exit code 2 is prescribed in P1003.2b. */
1249 WITH_CUR_LOCALE (error (2, 0, _("\
1250 implementation limit: no more than %Zd character classes allowed"),
1253 ctype
->classnames
[ctype
->nr_charclass
++] = name
;
1258 ctype_map_new (struct linereader
*lr
, struct locale_ctype_t
*ctype
,
1259 const char *name
, const struct charmap_t
*charmap
)
1261 size_t max_chars
= 0;
1264 for (cnt
= 0; cnt
< ctype
->map_collection_nr
; ++cnt
)
1266 if (strcmp (ctype
->mapnames
[cnt
], name
) == 0)
1269 if (max_chars
< ctype
->map_collection_max
[cnt
])
1270 max_chars
= ctype
->map_collection_max
[cnt
];
1273 if (cnt
< ctype
->map_collection_nr
)
1275 lr_error (lr
, _("character map `%s' already defined"), name
);
1279 if (ctype
->map_collection_nr
== MAX_NR_CHARMAP
)
1280 /* Exit code 2 is prescribed in P1003.2b. */
1281 WITH_CUR_LOCALE (error (2, 0, _("\
1282 implementation limit: no more than %d character maps allowed"),
1285 ctype
->mapnames
[cnt
] = name
;
1288 ctype
->map_collection_max
[cnt
] = charmap
->mb_cur_max
== 1 ? 256 : 512;
1290 ctype
->map_collection_max
[cnt
] = max_chars
;
1292 ctype
->map_collection
[cnt
] = (uint32_t *)
1293 xcalloc (sizeof (uint32_t), ctype
->map_collection_max
[cnt
]);
1294 ctype
->map_collection_act
[cnt
] = 256;
1296 ++ctype
->map_collection_nr
;
1300 /* We have to be prepared that TABLE, MAX, and ACT can be NULL. This
1301 is possible if we only want to extend the name array. */
1303 find_idx (struct locale_ctype_t
*ctype
, uint32_t **table
, size_t *max
,
1304 size_t *act
, uint32_t idx
)
1309 return table
== NULL
? NULL
: &(*table
)[idx
];
1311 /* Use the charnames_idx lookup table instead of the slow search loop. */
1313 cnt
= idx_table_get (&ctype
->charnames_idx
, idx
);
1316 cnt
= ctype
->charnames_act
;
1318 for (cnt
= 256; cnt
< ctype
->charnames_act
; ++cnt
)
1319 if (ctype
->charnames
[cnt
] == idx
)
1323 /* We have to distinguish two cases: the name is found or not. */
1324 if (cnt
== ctype
->charnames_act
)
1326 /* Extend the name array. */
1327 if (ctype
->charnames_act
== ctype
->charnames_max
)
1329 ctype
->charnames_max
*= 2;
1330 ctype
->charnames
= (uint32_t *)
1331 xrealloc (ctype
->charnames
,
1332 sizeof (uint32_t) * ctype
->charnames_max
);
1334 ctype
->charnames
[ctype
->charnames_act
++] = idx
;
1335 idx_table_add (&ctype
->charnames_idx
, idx
, cnt
);
1339 /* We have done everything we are asked to do. */
1343 /* The caller does not want to extend the table. */
1344 return (cnt
>= *act
? NULL
: &(*table
)[cnt
]);
1350 size_t old_max
= *max
;
1353 while (*max
<= cnt
);
1356 (uint32_t *) xrealloc (*table
, *max
* sizeof (uint32_t));
1357 memset (&(*table
)[old_max
], '\0',
1358 (*max
- old_max
) * sizeof (uint32_t));
1364 return &(*table
)[cnt
];
1369 get_character (struct token
*now
, const struct charmap_t
*charmap
,
1370 struct repertoire_t
*repertoire
,
1371 struct charseq
**seqp
, uint32_t *wchp
)
1373 if (now
->tok
== tok_bsymbol
)
1375 /* This will hopefully be the normal case. */
1376 *wchp
= repertoire_find_value (repertoire
, now
->val
.str
.startmb
,
1377 now
->val
.str
.lenmb
);
1378 *seqp
= charmap_find_value (charmap
, now
->val
.str
.startmb
,
1379 now
->val
.str
.lenmb
);
1381 else if (now
->tok
== tok_ucs4
)
1385 snprintf (utmp
, sizeof (utmp
), "U%08X", now
->val
.ucs4
);
1386 *seqp
= charmap_find_value (charmap
, utmp
, 9);
1389 *seqp
= repertoire_find_seq (repertoire
, now
->val
.ucs4
);
1393 /* Compute the value in the charmap from the UCS value. */
1394 const char *symbol
= repertoire_find_symbol (repertoire
,
1400 *seqp
= charmap_find_value (charmap
, symbol
, strlen (symbol
));
1404 if (repertoire
!= NULL
)
1406 /* Insert a negative entry. */
1407 static const struct charseq negative
1408 = { .ucs4
= ILLEGAL_CHAR_VALUE
};
1409 uint32_t *newp
= obstack_alloc (&repertoire
->mem_pool
,
1411 *newp
= now
->val
.ucs4
;
1413 insert_entry (&repertoire
->seq_table
, newp
,
1414 sizeof (uint32_t), (void *) &negative
);
1418 (*seqp
)->ucs4
= now
->val
.ucs4
;
1420 else if ((*seqp
)->ucs4
!= now
->val
.ucs4
)
1423 *wchp
= now
->val
.ucs4
;
1425 else if (now
->tok
== tok_charcode
)
1427 /* We must map from the byte code to UCS4. */
1428 *seqp
= charmap_find_symbol (charmap
, now
->val
.str
.startmb
,
1429 now
->val
.str
.lenmb
);
1432 *wchp
= ILLEGAL_CHAR_VALUE
;
1435 if ((*seqp
)->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1436 (*seqp
)->ucs4
= repertoire_find_value (repertoire
, (*seqp
)->name
,
1437 strlen ((*seqp
)->name
));
1438 *wchp
= (*seqp
)->ucs4
;
1448 /* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and
1449 the .(2). counterparts. */
1451 charclass_symbolic_ellipsis (struct linereader
*ldfile
,
1452 struct locale_ctype_t
*ctype
,
1453 const struct charmap_t
*charmap
,
1454 struct repertoire_t
*repertoire
,
1456 const char *last_str
,
1457 unsigned long int class256_bit
,
1458 unsigned long int class_bit
, int base
,
1459 int ignore_content
, int handle_digits
, int step
)
1461 const char *nowstr
= now
->val
.str
.startmb
;
1462 char tmp
[now
->val
.str
.lenmb
+ 1];
1465 unsigned long int from
;
1466 unsigned long int to
;
1468 /* We have to compute the ellipsis values using the symbolic names. */
1469 assert (last_str
!= NULL
);
1471 if (strlen (last_str
) != now
->val
.str
.lenmb
)
1475 _("`%s' and `%.*s' are not valid names for symbolic range"),
1476 last_str
, (int) now
->val
.str
.lenmb
, nowstr
);
1480 if (memcmp (last_str
, nowstr
, now
->val
.str
.lenmb
) == 0)
1481 /* Nothing to do, the names are the same. */
1484 for (cp
= last_str
; *cp
== *(nowstr
+ (cp
- last_str
)); ++cp
)
1488 from
= strtoul (cp
, &endp
, base
);
1489 if ((from
== UINT_MAX
&& errno
== ERANGE
) || *endp
!= '\0')
1492 to
= strtoul (nowstr
+ (cp
- last_str
), &endp
, base
);
1493 if ((to
== UINT_MAX
&& errno
== ERANGE
)
1494 || (endp
- nowstr
) != now
->val
.str
.lenmb
|| from
>= to
)
1497 /* OK, we have a range FROM - TO. Now we can create the symbolic names. */
1498 if (!ignore_content
)
1500 now
->val
.str
.startmb
= tmp
;
1501 while ((from
+= step
) <= to
)
1503 struct charseq
*seq
;
1506 sprintf (tmp
, (base
== 10 ? "%.*s%0*ld" : "%.*s%0*lX"),
1507 (int) (cp
- last_str
), last_str
,
1508 (int) (now
->val
.str
.lenmb
- (cp
- last_str
)),
1511 get_character (now
, charmap
, repertoire
, &seq
, &wch
);
1513 if (seq
!= NULL
&& seq
->nbytes
== 1)
1514 /* Yep, we can store information about this byte sequence. */
1515 ctype
->class256_collection
[seq
->bytes
[0]] |= class256_bit
;
1517 if (wch
!= ILLEGAL_CHAR_VALUE
&& class_bit
!= 0)
1518 /* We have the UCS4 position. */
1519 *find_idx (ctype
, &ctype
->class_collection
,
1520 &ctype
->class_collection_max
,
1521 &ctype
->class_collection_act
, wch
) |= class_bit
;
1523 if (handle_digits
== 1)
1525 /* We must store the digit values. */
1526 if (ctype
->mbdigits_act
== ctype
->mbdigits_max
)
1528 ctype
->mbdigits_max
*= 2;
1529 ctype
->mbdigits
= xrealloc (ctype
->mbdigits
,
1530 (ctype
->mbdigits_max
1531 * sizeof (char *)));
1532 ctype
->wcdigits_max
*= 2;
1533 ctype
->wcdigits
= xrealloc (ctype
->wcdigits
,
1534 (ctype
->wcdigits_max
1535 * sizeof (uint32_t)));
1538 ctype
->mbdigits
[ctype
->mbdigits_act
++] = seq
;
1539 ctype
->wcdigits
[ctype
->wcdigits_act
++] = wch
;
1541 else if (handle_digits
== 2)
1543 /* We must store the digit values. */
1544 if (ctype
->outdigits_act
>= 10)
1546 lr_error (ldfile
, _("\
1547 %s: field `%s' does not contain exactly ten entries"),
1548 "LC_CTYPE", "outdigit");
1552 ctype
->mboutdigits
[ctype
->outdigits_act
] = seq
;
1553 ctype
->wcoutdigits
[ctype
->outdigits_act
] = wch
;
1554 ++ctype
->outdigits_act
;
1561 /* Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'. */
1563 charclass_ucs4_ellipsis (struct linereader
*ldfile
,
1564 struct locale_ctype_t
*ctype
,
1565 const struct charmap_t
*charmap
,
1566 struct repertoire_t
*repertoire
,
1567 struct token
*now
, uint32_t last_wch
,
1568 unsigned long int class256_bit
,
1569 unsigned long int class_bit
, int ignore_content
,
1570 int handle_digits
, int step
)
1572 if (last_wch
> now
->val
.ucs4
)
1574 lr_error (ldfile
, _("\
1575 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1576 (now
->val
.ucs4
| last_wch
) < 65536 ? 4 : 8, now
->val
.ucs4
,
1577 (now
->val
.ucs4
| last_wch
) < 65536 ? 4 : 8, last_wch
);
1581 if (!ignore_content
)
1582 while ((last_wch
+= step
) <= now
->val
.ucs4
)
1584 /* We have to find out whether there is a byte sequence corresponding
1585 to this UCS4 value. */
1586 struct charseq
*seq
;
1589 snprintf (utmp
, sizeof (utmp
), "U%08X", last_wch
);
1590 seq
= charmap_find_value (charmap
, utmp
, 9);
1593 snprintf (utmp
, sizeof (utmp
), "U%04X", last_wch
);
1594 seq
= charmap_find_value (charmap
, utmp
, 5);
1598 /* Try looking in the repertoire map. */
1599 seq
= repertoire_find_seq (repertoire
, last_wch
);
1601 /* If this is the first time we look for this sequence create a new
1605 static const struct charseq negative
1606 = { .ucs4
= ILLEGAL_CHAR_VALUE
};
1608 /* Find the symbolic name for this UCS4 value. */
1609 if (repertoire
!= NULL
)
1611 const char *symbol
= repertoire_find_symbol (repertoire
,
1613 uint32_t *newp
= obstack_alloc (&repertoire
->mem_pool
,
1618 /* We have a name, now search the multibyte value. */
1619 seq
= charmap_find_value (charmap
, symbol
, strlen (symbol
));
1622 /* We have to create a fake entry. */
1623 seq
= (struct charseq
*) &negative
;
1625 seq
->ucs4
= last_wch
;
1627 insert_entry (&repertoire
->seq_table
, newp
, sizeof (uint32_t),
1631 /* We have to create a fake entry. */
1632 seq
= (struct charseq
*) &negative
;
1635 /* We have a name, now search the multibyte value. */
1636 if (seq
->ucs4
== last_wch
&& seq
->nbytes
== 1)
1637 /* Yep, we can store information about this byte sequence. */
1638 ctype
->class256_collection
[(size_t) seq
->bytes
[0]]
1641 /* And of course we have the UCS4 position. */
1643 *find_idx (ctype
, &ctype
->class_collection
,
1644 &ctype
->class_collection_max
,
1645 &ctype
->class_collection_act
, last_wch
) |= class_bit
;
1647 if (handle_digits
== 1)
1649 /* We must store the digit values. */
1650 if (ctype
->mbdigits_act
== ctype
->mbdigits_max
)
1652 ctype
->mbdigits_max
*= 2;
1653 ctype
->mbdigits
= xrealloc (ctype
->mbdigits
,
1654 (ctype
->mbdigits_max
1655 * sizeof (char *)));
1656 ctype
->wcdigits_max
*= 2;
1657 ctype
->wcdigits
= xrealloc (ctype
->wcdigits
,
1658 (ctype
->wcdigits_max
1659 * sizeof (uint32_t)));
1662 ctype
->mbdigits
[ctype
->mbdigits_act
++] = (seq
->ucs4
== last_wch
1664 ctype
->wcdigits
[ctype
->wcdigits_act
++] = last_wch
;
1666 else if (handle_digits
== 2)
1668 /* We must store the digit values. */
1669 if (ctype
->outdigits_act
>= 10)
1671 lr_error (ldfile
, _("\
1672 %s: field `%s' does not contain exactly ten entries"),
1673 "LC_CTYPE", "outdigit");
1677 ctype
->mboutdigits
[ctype
->outdigits_act
] = (seq
->ucs4
== last_wch
1679 ctype
->wcoutdigits
[ctype
->outdigits_act
] = last_wch
;
1680 ++ctype
->outdigits_act
;
1686 /* Ellipsis as in `/xea/x12.../xea/x34'. */
1688 charclass_charcode_ellipsis (struct linereader
*ldfile
,
1689 struct locale_ctype_t
*ctype
,
1690 const struct charmap_t
*charmap
,
1691 struct repertoire_t
*repertoire
,
1692 struct token
*now
, char *last_charcode
,
1693 uint32_t last_charcode_len
,
1694 unsigned long int class256_bit
,
1695 unsigned long int class_bit
, int ignore_content
,
1698 /* First check whether the to-value is larger. */
1699 if (now
->val
.charcode
.nbytes
!= last_charcode_len
)
1701 lr_error (ldfile
, _("\
1702 start and end character sequence of range must have the same length"));
1706 if (memcmp (last_charcode
, now
->val
.charcode
.bytes
, last_charcode_len
) > 0)
1708 lr_error (ldfile
, _("\
1709 to-value character sequence is smaller than from-value sequence"));
1713 if (!ignore_content
)
1717 /* Increment the byte sequence value. */
1718 struct charseq
*seq
;
1722 for (i
= last_charcode_len
- 1; i
>= 0; --i
)
1723 if (++last_charcode
[i
] != 0)
1726 if (last_charcode_len
== 1)
1727 /* Of course we have the charcode value. */
1728 ctype
->class256_collection
[(size_t) last_charcode
[0]]
1731 /* Find the symbolic name. */
1732 seq
= charmap_find_symbol (charmap
, last_charcode
,
1736 if (seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1737 seq
->ucs4
= repertoire_find_value (repertoire
, seq
->name
,
1738 strlen (seq
->name
));
1739 wch
= seq
== NULL
? ILLEGAL_CHAR_VALUE
: seq
->ucs4
;
1741 if (wch
!= ILLEGAL_CHAR_VALUE
&& class_bit
!= 0)
1742 *find_idx (ctype
, &ctype
->class_collection
,
1743 &ctype
->class_collection_max
,
1744 &ctype
->class_collection_act
, wch
) |= class_bit
;
1747 wch
= ILLEGAL_CHAR_VALUE
;
1749 if (handle_digits
== 1)
1751 /* We must store the digit values. */
1752 if (ctype
->mbdigits_act
== ctype
->mbdigits_max
)
1754 ctype
->mbdigits_max
*= 2;
1755 ctype
->mbdigits
= xrealloc (ctype
->mbdigits
,
1756 (ctype
->mbdigits_max
1757 * sizeof (char *)));
1758 ctype
->wcdigits_max
*= 2;
1759 ctype
->wcdigits
= xrealloc (ctype
->wcdigits
,
1760 (ctype
->wcdigits_max
1761 * sizeof (uint32_t)));
1764 seq
= xmalloc (sizeof (struct charseq
) + last_charcode_len
);
1765 memcpy ((char *) (seq
+ 1), last_charcode
, last_charcode_len
);
1766 seq
->nbytes
= last_charcode_len
;
1768 ctype
->mbdigits
[ctype
->mbdigits_act
++] = seq
;
1769 ctype
->wcdigits
[ctype
->wcdigits_act
++] = wch
;
1771 else if (handle_digits
== 2)
1773 struct charseq
*seq
;
1774 /* We must store the digit values. */
1775 if (ctype
->outdigits_act
>= 10)
1777 lr_error (ldfile
, _("\
1778 %s: field `%s' does not contain exactly ten entries"),
1779 "LC_CTYPE", "outdigit");
1783 seq
= xmalloc (sizeof (struct charseq
) + last_charcode_len
);
1784 memcpy ((char *) (seq
+ 1), last_charcode
, last_charcode_len
);
1785 seq
->nbytes
= last_charcode_len
;
1787 ctype
->mboutdigits
[ctype
->outdigits_act
] = seq
;
1788 ctype
->wcoutdigits
[ctype
->outdigits_act
] = wch
;
1789 ++ctype
->outdigits_act
;
1792 while (memcmp (last_charcode
, now
->val
.charcode
.bytes
,
1793 last_charcode_len
) != 0);
1799 find_translit2 (struct locale_ctype_t
*ctype
, const struct charmap_t
*charmap
,
1802 struct translit_t
*trunp
= ctype
->translit
;
1803 struct translit_ignore_t
*tirunp
= ctype
->translit_ignore
;
1805 while (trunp
!= NULL
)
1807 /* XXX We simplify things here. The transliterations we look
1808 for are only allowed to have one character. */
1809 if (trunp
->from
[0] == wch
&& trunp
->from
[1] == 0)
1811 /* Found it. Now look for a transliteration which can be
1812 represented with the character set. */
1813 struct translit_to_t
*torunp
= trunp
->to
;
1815 while (torunp
!= NULL
)
1819 for (i
= 0; torunp
->str
[i
] != 0; ++i
)
1823 snprintf (utmp
, sizeof (utmp
), "U%08X", torunp
->str
[i
]);
1824 if (charmap_find_value (charmap
, utmp
, 9) == NULL
)
1825 /* This character cannot be represented. */
1829 if (torunp
->str
[i
] == 0)
1832 torunp
= torunp
->next
;
1838 trunp
= trunp
->next
;
1841 /* Check for ignored chars. */
1842 while (tirunp
!= NULL
)
1844 if (tirunp
->from
<= wch
&& tirunp
->to
>= wch
)
1848 for (wi
= tirunp
->from
; wi
<= wch
; wi
+= tirunp
->step
)
1850 return (uint32_t []) { 0 };
1854 /* Nothing found. */
1860 find_translit (struct localedef_t
*locale
, const struct charmap_t
*charmap
,
1863 struct locale_ctype_t
*ctype
;
1864 uint32_t *result
= NULL
;
1866 assert (locale
!= NULL
);
1867 ctype
= locale
->categories
[LC_CTYPE
].ctype
;
1869 if (ctype
->translit
!= NULL
)
1870 result
= find_translit2 (ctype
, charmap
, wch
);
1874 struct translit_include_t
*irunp
= ctype
->translit_include
;
1876 while (irunp
!= NULL
&& result
== NULL
)
1878 result
= find_translit (find_locale (CTYPE_LOCALE
,
1880 irunp
->copy_repertoire
,
1883 irunp
= irunp
->next
;
1891 /* Read one transliteration entry. */
1893 read_widestring (struct linereader
*ldfile
, struct token
*now
,
1894 const struct charmap_t
*charmap
,
1895 struct repertoire_t
*repertoire
)
1899 if (now
->tok
== tok_default_missing
)
1900 /* The special name "" will denote this case. */
1901 wstr
= ((uint32_t *) { 0 });
1902 else if (now
->tok
== tok_bsymbol
)
1904 /* Get the value from the repertoire. */
1905 wstr
= (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1906 wstr
[0] = repertoire_find_value (repertoire
, now
->val
.str
.startmb
,
1907 now
->val
.str
.lenmb
);
1908 if (wstr
[0] == ILLEGAL_CHAR_VALUE
)
1910 /* We cannot proceed, we don't know the UCS4 value. */
1917 else if (now
->tok
== tok_ucs4
)
1919 wstr
= (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1920 wstr
[0] = now
->val
.ucs4
;
1923 else if (now
->tok
== tok_charcode
)
1925 /* Argh, we have to convert to the symbol name first and then to the
1927 struct charseq
*seq
= charmap_find_symbol (charmap
,
1928 now
->val
.str
.startmb
,
1929 now
->val
.str
.lenmb
);
1931 /* Cannot find the UCS4 value. */
1934 if (seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1935 seq
->ucs4
= repertoire_find_value (repertoire
, seq
->name
,
1936 strlen (seq
->name
));
1937 if (seq
->ucs4
== ILLEGAL_CHAR_VALUE
)
1938 /* We cannot proceed, we don't know the UCS4 value. */
1941 wstr
= (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1942 wstr
[0] = seq
->ucs4
;
1945 else if (now
->tok
== tok_string
)
1947 wstr
= now
->val
.str
.startwc
;
1948 if (wstr
== NULL
|| wstr
[0] == 0)
1953 if (now
->tok
!= tok_eol
&& now
->tok
!= tok_eof
)
1954 lr_ignore_rest (ldfile
, 0);
1955 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
1956 return (uint32_t *) -1l;
1964 read_translit_entry (struct linereader
*ldfile
, struct locale_ctype_t
*ctype
,
1965 struct token
*now
, const struct charmap_t
*charmap
,
1966 struct repertoire_t
*repertoire
)
1968 uint32_t *from_wstr
= read_widestring (ldfile
, now
, charmap
, repertoire
);
1969 struct translit_t
*result
;
1970 struct translit_to_t
**top
;
1971 struct obstack
*ob
= &ctype
->mempool
;
1975 if (from_wstr
== NULL
)
1976 /* There is no valid from string. */
1979 result
= (struct translit_t
*) obstack_alloc (ob
,
1980 sizeof (struct translit_t
));
1981 result
->from
= from_wstr
;
1982 result
->fname
= ldfile
->fname
;
1983 result
->lineno
= ldfile
->lineno
;
1984 result
->next
= NULL
;
1994 /* Next we have one or more transliterations. They are
1995 separated by semicolons. */
1996 now
= lr_token (ldfile
, charmap
, NULL
, repertoire
, verbose
);
1998 if (!first
&& (now
->tok
== tok_semicolon
|| now
->tok
== tok_eol
))
2000 /* One string read. */
2001 const uint32_t zero
= 0;
2005 obstack_grow (ob
, &zero
, 4);
2006 to_wstr
= obstack_finish (ob
);
2008 *top
= obstack_alloc (ob
, sizeof (struct translit_to_t
));
2009 (*top
)->str
= to_wstr
;
2010 (*top
)->next
= NULL
;
2013 if (now
->tok
== tok_eol
)
2015 result
->next
= ctype
->translit
;
2016 ctype
->translit
= result
;
2021 top
= &(*top
)->next
;
2026 to_wstr
= read_widestring (ldfile
, now
, charmap
, repertoire
);
2027 if (to_wstr
== (uint32_t *) -1l)
2029 /* An error occurred. */
2030 obstack_free (ob
, result
);
2034 if (to_wstr
== NULL
)
2037 /* This value is usable. */
2038 obstack_grow (ob
, to_wstr
, wcslen ((wchar_t *) to_wstr
) * 4);
2047 read_translit_ignore_entry (struct linereader
*ldfile
,
2048 struct locale_ctype_t
*ctype
,
2049 const struct charmap_t
*charmap
,
2050 struct repertoire_t
*repertoire
)
2052 /* We expect a semicolon-separated list of characters we ignore. We are
2053 only interested in the wide character definitions. These must be
2054 single characters, possibly defining a range when an ellipsis is used. */
2057 struct token
*now
= lr_token (ldfile
, charmap
, NULL
, repertoire
,
2059 struct translit_ignore_t
*newp
;
2062 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
2065 _("premature end of `translit_ignore' definition"));
2069 if (now
->tok
!= tok_bsymbol
&& now
->tok
!= tok_ucs4
)
2071 lr_error (ldfile
, _("syntax error"));
2072 lr_ignore_rest (ldfile
, 0);
2076 if (now
->tok
== tok_ucs4
)
2077 from
= now
->val
.ucs4
;
2079 /* Try to get the value. */
2080 from
= repertoire_find_value (repertoire
, now
->val
.str
.startmb
,
2081 now
->val
.str
.lenmb
);
2083 if (from
== ILLEGAL_CHAR_VALUE
)
2085 lr_error (ldfile
, "invalid character name");
2090 newp
= (struct translit_ignore_t
*)
2091 obstack_alloc (&ctype
->mempool
, sizeof (struct translit_ignore_t
));
2096 newp
->next
= ctype
->translit_ignore
;
2097 ctype
->translit_ignore
= newp
;
2100 /* Now we expect either a semicolon, an ellipsis, or the end of the
2102 now
= lr_token (ldfile
, charmap
, NULL
, repertoire
, verbose
);
2104 if (now
->tok
== tok_ellipsis2
|| now
->tok
== tok_ellipsis2_2
)
2106 /* XXX Should we bother implementing `....'? `...' certainly
2107 will not be implemented. */
2109 int step
= now
->tok
== tok_ellipsis2_2
? 2 : 1;
2111 now
= lr_token (ldfile
, charmap
, NULL
, repertoire
, verbose
);
2113 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
2116 _("premature end of `translit_ignore' definition"));
2120 if (now
->tok
!= tok_bsymbol
&& now
->tok
!= tok_ucs4
)
2122 lr_error (ldfile
, _("syntax error"));
2123 lr_ignore_rest (ldfile
, 0);
2127 if (now
->tok
== tok_ucs4
)
2130 /* Try to get the value. */
2131 to
= repertoire_find_value (repertoire
, now
->val
.str
.startmb
,
2132 now
->val
.str
.lenmb
);
2134 if (to
== ILLEGAL_CHAR_VALUE
)
2135 lr_error (ldfile
, "invalid character name");
2138 /* Make sure the `to'-value is larger. */
2145 lr_error (ldfile
, _("\
2146 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
2147 (to
| from
) < 65536 ? 4 : 8, to
,
2148 (to
| from
) < 65536 ? 4 : 8, from
);
2151 /* And the next token. */
2152 now
= lr_token (ldfile
, charmap
, NULL
, repertoire
, verbose
);
2155 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
2159 if (now
->tok
== tok_semicolon
)
2163 /* If we come here something is wrong. */
2164 lr_error (ldfile
, _("syntax error"));
2165 lr_ignore_rest (ldfile
, 0);
2171 /* The parser for the LC_CTYPE section of the locale definition. */
2173 ctype_read (struct linereader
*ldfile
, struct localedef_t
*result
,
2174 const struct charmap_t
*charmap
, const char *repertoire_name
,
2177 struct repertoire_t
*repertoire
= NULL
;
2178 struct locale_ctype_t
*ctype
;
2180 enum token_t nowtok
;
2182 struct charseq
*last_seq
;
2183 uint32_t last_wch
= 0;
2184 enum token_t last_token
;
2185 enum token_t ellipsis_token
;
2187 char last_charcode
[16];
2188 size_t last_charcode_len
= 0;
2189 const char *last_str
= NULL
;
2191 struct localedef_t
*copy_locale
= NULL
;
2193 /* Get the repertoire we have to use. */
2194 if (repertoire_name
!= NULL
)
2195 repertoire
= repertoire_read (repertoire_name
);
2197 /* The rest of the line containing `LC_CTYPE' must be free. */
2198 lr_ignore_rest (ldfile
, 1);
2203 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2206 while (nowtok
== tok_eol
);
2208 /* If we see `copy' now we are almost done. */
2209 if (nowtok
== tok_copy
)
2211 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2212 if (now
->tok
!= tok_string
)
2214 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2218 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2219 while (now
->tok
!= tok_eof
&& now
->tok
!= tok_end
);
2221 if (now
->tok
!= tok_eof
2222 || (now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
),
2223 now
->tok
== tok_eof
))
2224 lr_error (ldfile
, _("%s: premature end of file"), "LC_CTYPE");
2225 else if (now
->tok
!= tok_lc_ctype
)
2227 lr_error (ldfile
, _("\
2228 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2229 lr_ignore_rest (ldfile
, 0);
2232 lr_ignore_rest (ldfile
, 1);
2237 if (! ignore_content
)
2239 /* Get the locale definition. */
2240 copy_locale
= load_locale (LC_CTYPE
, now
->val
.str
.startmb
,
2241 repertoire_name
, charmap
, NULL
);
2242 if ((copy_locale
->avail
& CTYPE_LOCALE
) == 0)
2244 /* Not yet loaded. So do it now. */
2245 if (locfile_read (copy_locale
, charmap
) != 0)
2249 if (copy_locale
->categories
[LC_CTYPE
].ctype
== NULL
)
2253 lr_ignore_rest (ldfile
, 1);
2255 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2259 /* Prepare the data structures. */
2260 ctype_startup (ldfile
, result
, charmap
, copy_locale
, ignore_content
);
2261 ctype
= result
->categories
[LC_CTYPE
].ctype
;
2263 /* Remember the repertoire we use. */
2264 if (!ignore_content
)
2265 ctype
->repertoire
= repertoire
;
2269 unsigned long int class_bit
= 0;
2270 unsigned long int class256_bit
= 0;
2271 int handle_digits
= 0;
2273 /* Of course we don't proceed beyond the end of file. */
2274 if (nowtok
== tok_eof
)
2277 /* Ingore empty lines. */
2278 if (nowtok
== tok_eol
)
2280 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2288 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2289 while (now
->tok
== tok_ident
|| now
->tok
== tok_string
)
2291 ctype_class_new (ldfile
, ctype
, now
->val
.str
.startmb
);
2292 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2293 if (now
->tok
!= tok_semicolon
)
2295 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2297 if (now
->tok
!= tok_eol
)
2299 %s: syntax error in definition of new character class"), "LC_CTYPE");
2303 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2304 while (now
->tok
== tok_ident
|| now
->tok
== tok_string
)
2306 ctype_map_new (ldfile
, ctype
, now
->val
.str
.startmb
, charmap
);
2307 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2308 if (now
->tok
!= tok_semicolon
)
2310 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2312 if (now
->tok
!= tok_eol
)
2314 %s: syntax error in definition of new character map"), "LC_CTYPE");
2318 /* Ignore the rest of the line if we don't need the input of
2322 lr_ignore_rest (ldfile
, 0);
2326 /* We simply forget the `class' keyword and use the following
2327 operand to determine the bit. */
2328 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2329 if (now
->tok
== tok_ident
|| now
->tok
== tok_string
)
2331 /* Must can be one of the predefined class names. */
2332 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
)
2333 if (strcmp (ctype
->classnames
[cnt
], now
->val
.str
.startmb
) == 0)
2335 if (cnt
>= ctype
->nr_charclass
)
2337 #ifdef PREDEFINED_CLASSES
2338 if (now
->val
.str
.lenmb
== 8
2339 && memcmp ("special1", now
->val
.str
.startmb
, 8) == 0)
2340 class_bit
= _ISwspecial1
;
2341 else if (now
->val
.str
.lenmb
== 8
2342 && memcmp ("special2", now
->val
.str
.startmb
, 8) == 0)
2343 class_bit
= _ISwspecial2
;
2344 else if (now
->val
.str
.lenmb
== 8
2345 && memcmp ("special3", now
->val
.str
.startmb
, 8) == 0)
2346 class_bit
= _ISwspecial3
;
2350 /* OK, it's a new class. */
2351 ctype_class_new (ldfile
, ctype
, now
->val
.str
.startmb
);
2353 class_bit
= _ISwbit (ctype
->nr_charclass
- 1);
2358 class_bit
= _ISwbit (cnt
);
2360 free (now
->val
.str
.startmb
);
2363 else if (now
->tok
== tok_digit
)
2364 goto handle_tok_digit
;
2365 else if (now
->tok
< tok_upper
|| now
->tok
> tok_blank
)
2369 class_bit
= BITw (now
->tok
);
2370 class256_bit
= BIT (now
->tok
);
2373 /* The next character must be a semicolon. */
2374 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2375 if (now
->tok
!= tok_semicolon
)
2377 goto read_charclass
;
2390 /* Ignore the rest of the line if we don't need the input of
2394 lr_ignore_rest (ldfile
, 0);
2398 class_bit
= BITw (now
->tok
);
2399 class256_bit
= BIT (now
->tok
);
2402 ctype
->class_done
|= class_bit
;
2403 last_token
= tok_none
;
2404 ellipsis_token
= tok_none
;
2406 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2407 while (now
->tok
!= tok_eol
&& now
->tok
!= tok_eof
)
2410 struct charseq
*seq
;
2412 if (ellipsis_token
== tok_none
)
2414 if (get_character (now
, charmap
, repertoire
, &seq
, &wch
))
2417 if (!ignore_content
&& seq
!= NULL
&& seq
->nbytes
== 1)
2418 /* Yep, we can store information about this byte
2420 ctype
->class256_collection
[seq
->bytes
[0]] |= class256_bit
;
2422 if (!ignore_content
&& wch
!= ILLEGAL_CHAR_VALUE
2424 /* We have the UCS4 position. */
2425 *find_idx (ctype
, &ctype
->class_collection
,
2426 &ctype
->class_collection_max
,
2427 &ctype
->class_collection_act
, wch
) |= class_bit
;
2429 last_token
= now
->tok
;
2430 /* Terminate the string. */
2431 if (last_token
== tok_bsymbol
)
2433 now
->val
.str
.startmb
[now
->val
.str
.lenmb
] = '\0';
2434 last_str
= now
->val
.str
.startmb
;
2440 memcpy (last_charcode
, now
->val
.charcode
.bytes
, 16);
2441 last_charcode_len
= now
->val
.charcode
.nbytes
;
2443 if (!ignore_content
&& handle_digits
== 1)
2445 /* We must store the digit values. */
2446 if (ctype
->mbdigits_act
== ctype
->mbdigits_max
)
2448 ctype
->mbdigits_max
+= 10;
2449 ctype
->mbdigits
= xrealloc (ctype
->mbdigits
,
2450 (ctype
->mbdigits_max
2451 * sizeof (char *)));
2452 ctype
->wcdigits_max
+= 10;
2453 ctype
->wcdigits
= xrealloc (ctype
->wcdigits
,
2454 (ctype
->wcdigits_max
2455 * sizeof (uint32_t)));
2458 ctype
->mbdigits
[ctype
->mbdigits_act
++] = seq
;
2459 ctype
->wcdigits
[ctype
->wcdigits_act
++] = wch
;
2461 else if (!ignore_content
&& handle_digits
== 2)
2463 /* We must store the digit values. */
2464 if (ctype
->outdigits_act
>= 10)
2466 lr_error (ldfile
, _("\
2467 %s: field `%s' does not contain exactly ten entries"),
2468 "LC_CTYPE", "outdigit");
2469 lr_ignore_rest (ldfile
, 0);
2473 ctype
->mboutdigits
[ctype
->outdigits_act
] = seq
;
2474 ctype
->wcoutdigits
[ctype
->outdigits_act
] = wch
;
2475 ++ctype
->outdigits_act
;
2480 /* Now it gets complicated. We have to resolve the
2481 ellipsis problem. First we must distinguish between
2482 the different kind of ellipsis and this must match the
2483 tokens we have seen. */
2484 assert (last_token
!= tok_none
);
2486 if (last_token
!= now
->tok
)
2488 lr_error (ldfile
, _("\
2489 ellipsis range must be marked by two operands of same type"));
2490 lr_ignore_rest (ldfile
, 0);
2494 if (last_token
== tok_bsymbol
)
2496 if (ellipsis_token
== tok_ellipsis3
)
2497 lr_error (ldfile
, _("with symbolic name range values \
2498 the absolute ellipsis `...' must not be used"));
2500 charclass_symbolic_ellipsis (ldfile
, ctype
, charmap
,
2501 repertoire
, now
, last_str
,
2502 class256_bit
, class_bit
,
2507 handle_digits
, step
);
2509 else if (last_token
== tok_ucs4
)
2511 if (ellipsis_token
!= tok_ellipsis2
)
2512 lr_error (ldfile
, _("\
2513 with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
2515 charclass_ucs4_ellipsis (ldfile
, ctype
, charmap
,
2516 repertoire
, now
, last_wch
,
2517 class256_bit
, class_bit
,
2518 ignore_content
, handle_digits
,
2523 assert (last_token
== tok_charcode
);
2525 if (ellipsis_token
!= tok_ellipsis3
)
2526 lr_error (ldfile
, _("\
2527 with character code range values one must use the absolute ellipsis `...'"));
2529 charclass_charcode_ellipsis (ldfile
, ctype
, charmap
,
2533 class256_bit
, class_bit
,
2538 /* Now we have used the last value. */
2539 last_token
= tok_none
;
2542 /* Next we expect a semicolon or the end of the line. */
2543 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2544 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
2547 if (last_token
!= tok_none
2548 && now
->tok
>= tok_ellipsis2
&& now
->tok
<= tok_ellipsis4_2
)
2550 if (now
->tok
== tok_ellipsis2_2
)
2552 now
->tok
= tok_ellipsis2
;
2555 else if (now
->tok
== tok_ellipsis4_2
)
2557 now
->tok
= tok_ellipsis4
;
2561 ellipsis_token
= now
->tok
;
2563 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2567 if (now
->tok
!= tok_semicolon
)
2570 /* And get the next character. */
2571 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2573 ellipsis_token
= tok_none
;
2579 /* Ignore the rest of the line if we don't need the input of
2583 lr_ignore_rest (ldfile
, 0);
2588 class_bit
= _ISwdigit
;
2589 class256_bit
= _ISdigit
;
2591 goto read_charclass
;
2594 /* Ignore the rest of the line if we don't need the input of
2598 lr_ignore_rest (ldfile
, 0);
2602 if (ctype
->outdigits_act
!= 0)
2603 lr_error (ldfile
, _("\
2604 %s: field `%s' declared more than once"),
2605 "LC_CTYPE", "outdigit");
2609 goto read_charclass
;
2612 /* Ignore the rest of the line if we don't need the input of
2616 lr_ignore_rest (ldfile
, 0);
2624 /* Ignore the rest of the line if we don't need the input of
2628 lr_ignore_rest (ldfile
, 0);
2636 /* Ignore the rest of the line if we don't need the input of
2640 lr_ignore_rest (ldfile
, 0);
2644 /* We simply forget the `map' keyword and use the following
2645 operand to determine the mapping. */
2646 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2647 if (now
->tok
== tok_ident
|| now
->tok
== tok_string
)
2651 for (cnt
= 2; cnt
< ctype
->map_collection_nr
; ++cnt
)
2652 if (strcmp (now
->val
.str
.startmb
, ctype
->mapnames
[cnt
]) == 0)
2655 if (cnt
< ctype
->map_collection_nr
)
2656 free (now
->val
.str
.startmb
);
2658 /* OK, it's a new map. */
2659 ctype_map_new (ldfile
, ctype
, now
->val
.str
.startmb
, charmap
);
2663 else if (now
->tok
< tok_toupper
|| now
->tok
> tok_tolower
)
2666 mapidx
= now
->tok
- tok_toupper
;
2668 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2669 /* This better should be a semicolon. */
2670 if (now
->tok
!= tok_semicolon
)
2674 /* Test whether this mapping was already defined. */
2675 if (ctype
->tomap_done
[mapidx
])
2677 lr_error (ldfile
, _("duplicated definition for mapping `%s'"),
2678 ctype
->mapnames
[mapidx
]);
2679 lr_ignore_rest (ldfile
, 0);
2682 ctype
->tomap_done
[mapidx
] = 1;
2684 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2685 while (now
->tok
!= tok_eol
&& now
->tok
!= tok_eof
)
2687 struct charseq
*from_seq
;
2689 struct charseq
*to_seq
;
2692 /* Every pair starts with an opening brace. */
2693 if (now
->tok
!= tok_open_brace
)
2696 /* Next comes the from-value. */
2697 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2698 if (get_character (now
, charmap
, repertoire
, &from_seq
,
2702 /* The next is a comma. */
2703 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2704 if (now
->tok
!= tok_comma
)
2707 /* And the other value. */
2708 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2709 if (get_character (now
, charmap
, repertoire
, &to_seq
,
2713 /* And the last thing is the closing brace. */
2714 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2715 if (now
->tok
!= tok_close_brace
)
2718 if (!ignore_content
)
2720 /* Check whether the mapping converts from an ASCII value
2721 to a non-ASCII value. */
2722 if (from_seq
!= NULL
&& from_seq
->nbytes
== 1
2723 && isascii (from_seq
->bytes
[0])
2724 && to_seq
!= NULL
&& (to_seq
->nbytes
!= 1
2725 || !isascii (to_seq
->bytes
[0])))
2726 ctype
->to_nonascii
= 1;
2728 if (mapidx
< 2 && from_seq
!= NULL
&& to_seq
!= NULL
2729 && from_seq
->nbytes
== 1 && to_seq
->nbytes
== 1)
2730 /* We can use this value. */
2731 ctype
->map256_collection
[mapidx
][from_seq
->bytes
[0]]
2734 if (from_wch
!= ILLEGAL_CHAR_VALUE
2735 && to_wch
!= ILLEGAL_CHAR_VALUE
)
2736 /* Both correct values. */
2737 *find_idx (ctype
, &ctype
->map_collection
[mapidx
],
2738 &ctype
->map_collection_max
[mapidx
],
2739 &ctype
->map_collection_act
[mapidx
],
2743 /* Now comes a semicolon or the end of the line/file. */
2744 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2745 if (now
->tok
== tok_semicolon
)
2746 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2750 case tok_translit_start
:
2751 /* Ignore the entire translit section with its peculiar syntax
2752 if we don't need the input. */
2757 lr_ignore_rest (ldfile
, 0);
2758 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2760 while (now
->tok
!= tok_translit_end
&& now
->tok
!= tok_eof
);
2762 if (now
->tok
== tok_eof
)
2763 lr_error (ldfile
, _(\
2764 "%s: `translit_start' section does not end with `translit_end'"),
2770 /* The rest of the line better should be empty. */
2771 lr_ignore_rest (ldfile
, 1);
2773 /* We count here the number of allocated entries in the `translit'
2777 ldfile
->translate_strings
= 1;
2778 ldfile
->return_widestr
= 1;
2780 /* We proceed until we see the `translit_end' token. */
2781 while (now
= lr_token (ldfile
, charmap
, NULL
, repertoire
, verbose
),
2782 now
->tok
!= tok_translit_end
&& now
->tok
!= tok_eof
)
2784 if (now
->tok
== tok_eol
)
2785 /* Ignore empty lines. */
2788 if (now
->tok
== tok_include
)
2790 /* We have to include locale. */
2791 const char *locale_name
;
2792 const char *repertoire_name
;
2793 struct translit_include_t
*include_stmt
, **include_ptr
;
2795 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2796 /* This should be a string or an identifier. In any
2797 case something to name a locale. */
2798 if (now
->tok
!= tok_string
&& now
->tok
!= tok_ident
)
2801 lr_error (ldfile
, _("%s: syntax error"), "LC_CTYPE");
2802 lr_ignore_rest (ldfile
, 0);
2805 locale_name
= now
->val
.str
.startmb
;
2807 /* Next should be a semicolon. */
2808 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2809 if (now
->tok
!= tok_semicolon
)
2810 goto translit_syntax
;
2812 /* Now the repertoire name. */
2813 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2814 if ((now
->tok
!= tok_string
&& now
->tok
!= tok_ident
)
2815 || now
->val
.str
.startmb
== NULL
)
2816 goto translit_syntax
;
2817 repertoire_name
= now
->val
.str
.startmb
;
2818 if (repertoire_name
[0] == '\0')
2819 /* Ignore the empty string. */
2820 repertoire_name
= NULL
;
2822 /* Save the include statement for later processing. */
2823 include_stmt
= (struct translit_include_t
*)
2824 xmalloc (sizeof (struct translit_include_t
));
2825 include_stmt
->copy_locale
= locale_name
;
2826 include_stmt
->copy_repertoire
= repertoire_name
;
2827 include_stmt
->next
= NULL
;
2829 include_ptr
= &ctype
->translit_include
;
2830 while (*include_ptr
!= NULL
)
2831 include_ptr
= &(*include_ptr
)->next
;
2832 *include_ptr
= include_stmt
;
2834 /* The rest of the line must be empty. */
2835 lr_ignore_rest (ldfile
, 1);
2837 /* Make sure the locale is read. */
2838 add_to_readlist (LC_CTYPE
, locale_name
, repertoire_name
,
2842 else if (now
->tok
== tok_default_missing
)
2848 /* We expect a single character or string as the
2850 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2851 wstr
= read_widestring (ldfile
, now
, charmap
,
2856 if (ctype
->default_missing
!= NULL
)
2858 lr_error (ldfile
, _("\
2859 %s: duplicate `default_missing' definition"), "LC_CTYPE");
2860 WITH_CUR_LOCALE (error_at_line (0, 0,
2861 ctype
->default_missing_file
,
2862 ctype
->default_missing_lineno
,
2864 previous definition was here")));
2868 ctype
->default_missing
= wstr
;
2869 ctype
->default_missing_file
= ldfile
->fname
;
2870 ctype
->default_missing_lineno
= ldfile
->lineno
;
2872 /* We can have more entries, ignore them. */
2873 lr_ignore_rest (ldfile
, 0);
2876 else if (wstr
== (uint32_t *) -1l)
2877 /* This was an syntax error. */
2880 /* Maybe there is another replacement we can use. */
2881 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2882 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
2884 /* Nothing found. We tell the user. */
2885 lr_error (ldfile
, _("\
2886 %s: no representable `default_missing' definition found"), "LC_CTYPE");
2889 if (now
->tok
!= tok_semicolon
)
2890 goto translit_syntax
;
2895 else if (now
->tok
== tok_translit_ignore
)
2897 read_translit_ignore_entry (ldfile
, ctype
, charmap
,
2902 read_translit_entry (ldfile
, ctype
, now
, charmap
, repertoire
);
2904 ldfile
->return_widestr
= 0;
2906 if (now
->tok
== tok_eof
)
2907 lr_error (ldfile
, _(\
2908 "%s: `translit_start' section does not end with `translit_end'"),
2914 /* Ignore the rest of the line if we don't need the input of
2918 lr_ignore_rest (ldfile
, 0);
2922 /* This could mean one of several things. First test whether
2923 it's a character class name. */
2924 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
)
2925 if (strcmp (now
->val
.str
.startmb
, ctype
->classnames
[cnt
]) == 0)
2927 if (cnt
< ctype
->nr_charclass
)
2929 class_bit
= _ISwbit (cnt
);
2930 class256_bit
= cnt
<= 11 ? _ISbit (cnt
) : 0;
2931 free (now
->val
.str
.startmb
);
2932 goto read_charclass
;
2934 for (cnt
= 0; cnt
< ctype
->map_collection_nr
; ++cnt
)
2935 if (strcmp (now
->val
.str
.startmb
, ctype
->mapnames
[cnt
]) == 0)
2937 if (cnt
< ctype
->map_collection_nr
)
2940 free (now
->val
.str
.startmb
);
2943 #ifdef PREDEFINED_CLASSES
2944 if (strcmp (now
->val
.str
.startmb
, "special1") == 0)
2946 class_bit
= _ISwspecial1
;
2947 free (now
->val
.str
.startmb
);
2948 goto read_charclass
;
2950 if (strcmp (now
->val
.str
.startmb
, "special2") == 0)
2952 class_bit
= _ISwspecial2
;
2953 free (now
->val
.str
.startmb
);
2954 goto read_charclass
;
2956 if (strcmp (now
->val
.str
.startmb
, "special3") == 0)
2958 class_bit
= _ISwspecial3
;
2959 free (now
->val
.str
.startmb
);
2960 goto read_charclass
;
2962 if (strcmp (now
->val
.str
.startmb
, "tosymmetric") == 0)
2971 /* Next we assume `LC_CTYPE'. */
2972 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2973 if (now
->tok
== tok_eof
)
2975 if (now
->tok
== tok_eol
)
2976 lr_error (ldfile
, _("%s: incomplete `END' line"),
2978 else if (now
->tok
!= tok_lc_ctype
)
2979 lr_error (ldfile
, _("\
2980 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2981 lr_ignore_rest (ldfile
, now
->tok
== tok_lc_ctype
);
2986 if (now
->tok
!= tok_eof
)
2987 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2990 /* Prepare for the next round. */
2991 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2995 /* When we come here we reached the end of the file. */
2996 lr_error (ldfile
, _("%s: premature end of file"), "LC_CTYPE");
3001 set_class_defaults (struct locale_ctype_t
*ctype
,
3002 const struct charmap_t
*charmap
,
3003 struct repertoire_t
*repertoire
)
3007 /* These function defines the default values for the classes and conversions
3008 according to POSIX.2 2.5.2.1.
3009 It may seem that the order of these if-blocks is arbitrary but it is NOT.
3010 Don't move them unless you know what you do! */
3012 auto void set_default (int bitpos
, int from
, int to
);
3014 void set_default (int bitpos
, int from
, int to
)
3018 int bit
= _ISbit (bitpos
);
3019 int bitw
= _ISwbit (bitpos
);
3020 /* Define string. */
3023 for (ch
= from
; ch
<= to
; ++ch
)
3025 struct charseq
*seq
;
3028 seq
= charmap_find_value (charmap
, tmp
, 1);
3032 sprintf (buf
, "U%08X", ch
);
3033 seq
= charmap_find_value (charmap
, buf
, 9);
3038 WITH_CUR_LOCALE (error (0, 0, _("\
3039 %s: character `%s' not defined in charmap while needed as default value"),
3042 else if (seq
->nbytes
!= 1)
3043 WITH_CUR_LOCALE (error (0, 0, _("\
3044 %s: character `%s' in charmap not representable with one byte"),
3047 ctype
->class256_collection
[seq
->bytes
[0]] |= bit
;
3049 /* No need to search here, the ASCII value is also the Unicode
3051 ELEM (ctype
, class_collection
, , ch
) |= bitw
;
3055 /* Set default values if keyword was not present. */
3056 if ((ctype
->class_done
& BITw (tok_upper
)) == 0)
3057 /* "If this keyword [lower] is not specified, the lowercase letters
3058 `A' through `Z', ..., shall automatically belong to this class,
3059 with implementation defined character values." [P1003.2, 2.5.2.1] */
3060 set_default (BITPOS (tok_upper
), 'A', 'Z');
3062 if ((ctype
->class_done
& BITw (tok_lower
)) == 0)
3063 /* "If this keyword [lower] is not specified, the lowercase letters
3064 `a' through `z', ..., shall automatically belong to this class,
3065 with implementation defined character values." [P1003.2, 2.5.2.1] */
3066 set_default (BITPOS (tok_lower
), 'a', 'z');
3068 if ((ctype
->class_done
& BITw (tok_alpha
)) == 0)
3070 /* Table 2-6 in P1003.2 says that characters in class `upper' or
3071 class `lower' *must* be in class `alpha'. */
3072 unsigned long int mask
= BIT (tok_upper
) | BIT (tok_lower
);
3073 unsigned long int maskw
= BITw (tok_upper
) | BITw (tok_lower
);
3075 for (cnt
= 0; cnt
< 256; ++cnt
)
3076 if ((ctype
->class256_collection
[cnt
] & mask
) != 0)
3077 ctype
->class256_collection
[cnt
] |= BIT (tok_alpha
);
3079 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
3080 if ((ctype
->class_collection
[cnt
] & maskw
) != 0)
3081 ctype
->class_collection
[cnt
] |= BITw (tok_alpha
);
3084 if ((ctype
->class_done
& BITw (tok_digit
)) == 0)
3085 /* "If this keyword [digit] is not specified, the digits `0' through
3086 `9', ..., shall automatically belong to this class, with
3087 implementation-defined character values." [P1003.2, 2.5.2.1] */
3088 set_default (BITPOS (tok_digit
), '0', '9');
3090 /* "Only characters specified for the `alpha' and `digit' keyword
3091 shall be specified. Characters specified for the keyword `alpha'
3092 and `digit' are automatically included in this class. */
3094 unsigned long int mask
= BIT (tok_alpha
) | BIT (tok_digit
);
3095 unsigned long int maskw
= BITw (tok_alpha
) | BITw (tok_digit
);
3097 for (cnt
= 0; cnt
< 256; ++cnt
)
3098 if ((ctype
->class256_collection
[cnt
] & mask
) != 0)
3099 ctype
->class256_collection
[cnt
] |= BIT (tok_alnum
);
3101 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
3102 if ((ctype
->class_collection
[cnt
] & maskw
) != 0)
3103 ctype
->class_collection
[cnt
] |= BITw (tok_alnum
);
3106 if ((ctype
->class_done
& BITw (tok_space
)) == 0)
3107 /* "If this keyword [space] is not specified, the characters <space>,
3108 <form-feed>, <newline>, <carriage-return>, <tab>, and
3109 <vertical-tab>, ..., shall automatically belong to this class,
3110 with implementation-defined character values." [P1003.2, 2.5.2.1] */
3112 struct charseq
*seq
;
3114 seq
= charmap_find_value (charmap
, "space", 5);
3116 seq
= charmap_find_value (charmap
, "SP", 2);
3118 seq
= charmap_find_value (charmap
, "U00000020", 9);
3122 WITH_CUR_LOCALE (error (0, 0, _("\
3123 %s: character `%s' not defined while needed as default value"),
3124 "LC_CTYPE", "<space>"));
3126 else if (seq
->nbytes
!= 1)
3127 WITH_CUR_LOCALE (error (0, 0, _("\
3128 %s: character `%s' in charmap not representable with one byte"),
3129 "LC_CTYPE", "<space>"));
3131 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3133 /* No need to search. */
3134 ELEM (ctype
, class_collection
, , L
' ') |= BITw (tok_space
);
3136 seq
= charmap_find_value (charmap
, "form-feed", 9);
3138 seq
= charmap_find_value (charmap
, "U0000000C", 9);
3142 WITH_CUR_LOCALE (error (0, 0, _("\
3143 %s: character `%s' not defined while needed as default value"),
3144 "LC_CTYPE", "<form-feed>"));
3146 else if (seq
->nbytes
!= 1)
3147 WITH_CUR_LOCALE (error (0, 0, _("\
3148 %s: character `%s' in charmap not representable with one byte"),
3149 "LC_CTYPE", "<form-feed>"));
3151 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3153 /* No need to search. */
3154 ELEM (ctype
, class_collection
, , L
'\f') |= BITw (tok_space
);
3157 seq
= charmap_find_value (charmap
, "newline", 7);
3159 seq
= charmap_find_value (charmap
, "U0000000A", 9);
3163 WITH_CUR_LOCALE (error (0, 0, _("\
3164 character `%s' not defined while needed as default value"),
3167 else if (seq
->nbytes
!= 1)
3168 WITH_CUR_LOCALE (error (0, 0, _("\
3169 %s: character `%s' in charmap not representable with one byte"),
3170 "LC_CTYPE", "<newline>"));
3172 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3174 /* No need to search. */
3175 ELEM (ctype
, class_collection
, , L
'\n') |= BITw (tok_space
);
3178 seq
= charmap_find_value (charmap
, "carriage-return", 15);
3180 seq
= charmap_find_value (charmap
, "U0000000D", 9);
3184 WITH_CUR_LOCALE (error (0, 0, _("\
3185 %s: character `%s' not defined while needed as default value"),
3186 "LC_CTYPE", "<carriage-return>"));
3188 else if (seq
->nbytes
!= 1)
3189 WITH_CUR_LOCALE (error (0, 0, _("\
3190 %s: character `%s' in charmap not representable with one byte"),
3191 "LC_CTYPE", "<carriage-return>"));
3193 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3195 /* No need to search. */
3196 ELEM (ctype
, class_collection
, , L
'\r') |= BITw (tok_space
);
3199 seq
= charmap_find_value (charmap
, "tab", 3);
3201 seq
= charmap_find_value (charmap
, "U00000009", 9);
3205 WITH_CUR_LOCALE (error (0, 0, _("\
3206 %s: character `%s' not defined while needed as default value"),
3207 "LC_CTYPE", "<tab>"));
3209 else if (seq
->nbytes
!= 1)
3210 WITH_CUR_LOCALE (error (0, 0, _("\
3211 %s: character `%s' in charmap not representable with one byte"),
3212 "LC_CTYPE", "<tab>"));
3214 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3216 /* No need to search. */
3217 ELEM (ctype
, class_collection
, , L
'\t') |= BITw (tok_space
);
3220 seq
= charmap_find_value (charmap
, "vertical-tab", 12);
3222 seq
= charmap_find_value (charmap
, "U0000000B", 9);
3226 WITH_CUR_LOCALE (error (0, 0, _("\
3227 %s: character `%s' not defined while needed as default value"),
3228 "LC_CTYPE", "<vertical-tab>"));
3230 else if (seq
->nbytes
!= 1)
3231 WITH_CUR_LOCALE (error (0, 0, _("\
3232 %s: character `%s' in charmap not representable with one byte"),
3233 "LC_CTYPE", "<vertical-tab>"));
3235 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3237 /* No need to search. */
3238 ELEM (ctype
, class_collection
, , L
'\v') |= BITw (tok_space
);
3241 if ((ctype
->class_done
& BITw (tok_xdigit
)) == 0)
3242 /* "If this keyword is not specified, the digits `0' to `9', the
3243 uppercase letters `A' through `F', and the lowercase letters `a'
3244 through `f', ..., shell automatically belong to this class, with
3245 implementation defined character values." [P1003.2, 2.5.2.1] */
3247 set_default (BITPOS (tok_xdigit
), '0', '9');
3248 set_default (BITPOS (tok_xdigit
), 'A', 'F');
3249 set_default (BITPOS (tok_xdigit
), 'a', 'f');
3252 if ((ctype
->class_done
& BITw (tok_blank
)) == 0)
3253 /* "If this keyword [blank] is unspecified, the characters <space> and
3254 <tab> shall belong to this character class." [P1003.2, 2.5.2.1] */
3256 struct charseq
*seq
;
3258 seq
= charmap_find_value (charmap
, "space", 5);
3260 seq
= charmap_find_value (charmap
, "SP", 2);
3262 seq
= charmap_find_value (charmap
, "U00000020", 9);
3266 WITH_CUR_LOCALE (error (0, 0, _("\
3267 %s: character `%s' not defined while needed as default value"),
3268 "LC_CTYPE", "<space>"));
3270 else if (seq
->nbytes
!= 1)
3271 WITH_CUR_LOCALE (error (0, 0, _("\
3272 %s: character `%s' in charmap not representable with one byte"),
3273 "LC_CTYPE", "<space>"));
3275 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_blank
);
3277 /* No need to search. */
3278 ELEM (ctype
, class_collection
, , L
' ') |= BITw (tok_blank
);
3281 seq
= charmap_find_value (charmap
, "tab", 3);
3283 seq
= charmap_find_value (charmap
, "U00000009", 9);
3287 WITH_CUR_LOCALE (error (0, 0, _("\
3288 %s: character `%s' not defined while needed as default value"),
3289 "LC_CTYPE", "<tab>"));
3291 else if (seq
->nbytes
!= 1)
3292 WITH_CUR_LOCALE (error (0, 0, _("\
3293 %s: character `%s' in charmap not representable with one byte"),
3294 "LC_CTYPE", "<tab>"));
3296 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_blank
);
3298 /* No need to search. */
3299 ELEM (ctype
, class_collection
, , L
'\t') |= BITw (tok_blank
);
3302 if ((ctype
->class_done
& BITw (tok_graph
)) == 0)
3303 /* "If this keyword [graph] is not specified, characters specified for
3304 the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
3305 shall belong to this character class." [P1003.2, 2.5.2.1] */
3307 unsigned long int mask
= BIT (tok_upper
) | BIT (tok_lower
) |
3308 BIT (tok_alpha
) | BIT (tok_digit
) | BIT (tok_xdigit
) | BIT (tok_punct
);
3309 unsigned long int maskw
= BITw (tok_upper
) | BITw (tok_lower
) |
3310 BITw (tok_alpha
) | BITw (tok_digit
) | BITw (tok_xdigit
) |
3314 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
3315 if ((ctype
->class_collection
[cnt
] & maskw
) != 0)
3316 ctype
->class_collection
[cnt
] |= BITw (tok_graph
);
3318 for (cnt
= 0; cnt
< 256; ++cnt
)
3319 if ((ctype
->class256_collection
[cnt
] & mask
) != 0)
3320 ctype
->class256_collection
[cnt
] |= BIT (tok_graph
);
3323 if ((ctype
->class_done
& BITw (tok_print
)) == 0)
3324 /* "If this keyword [print] is not provided, characters specified for
3325 the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
3326 and the <space> character shall belong to this character class."
3327 [P1003.2, 2.5.2.1] */
3329 unsigned long int mask
= BIT (tok_upper
) | BIT (tok_lower
) |
3330 BIT (tok_alpha
) | BIT (tok_digit
) | BIT (tok_xdigit
) | BIT (tok_punct
);
3331 unsigned long int maskw
= BITw (tok_upper
) | BITw (tok_lower
) |
3332 BITw (tok_alpha
) | BITw (tok_digit
) | BITw (tok_xdigit
) |
3335 struct charseq
*seq
;
3337 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
3338 if ((ctype
->class_collection
[cnt
] & maskw
) != 0)
3339 ctype
->class_collection
[cnt
] |= BITw (tok_print
);
3341 for (cnt
= 0; cnt
< 256; ++cnt
)
3342 if ((ctype
->class256_collection
[cnt
] & mask
) != 0)
3343 ctype
->class256_collection
[cnt
] |= BIT (tok_print
);
3346 seq
= charmap_find_value (charmap
, "space", 5);
3348 seq
= charmap_find_value (charmap
, "SP", 2);
3350 seq
= charmap_find_value (charmap
, "U00000020", 9);
3354 WITH_CUR_LOCALE (error (0, 0, _("\
3355 %s: character `%s' not defined while needed as default value"),
3356 "LC_CTYPE", "<space>"));
3358 else if (seq
->nbytes
!= 1)
3359 WITH_CUR_LOCALE (error (0, 0, _("\
3360 %s: character `%s' in charmap not representable with one byte"),
3361 "LC_CTYPE", "<space>"));
3363 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_print
);
3365 /* No need to search. */
3366 ELEM (ctype
, class_collection
, , L
' ') |= BITw (tok_print
);
3369 if (ctype
->tomap_done
[0] == 0)
3370 /* "If this keyword [toupper] is not specified, the lowercase letters
3371 `a' through `z', and their corresponding uppercase letters `A' to
3372 `Z', ..., shall automatically be included, with implementation-
3373 defined character values." [P1003.2, 2.5.2.1] */
3378 strcpy (tmp
, "<?>");
3380 for (ch
= 'a'; ch
<= 'z'; ++ch
)
3382 struct charseq
*seq_from
, *seq_to
;
3386 seq_from
= charmap_find_value (charmap
, &tmp
[1], 1);
3387 if (seq_from
== NULL
)
3390 sprintf (buf
, "U%08X", ch
);
3391 seq_from
= charmap_find_value (charmap
, buf
, 9);
3393 if (seq_from
== NULL
)
3396 WITH_CUR_LOCALE (error (0, 0, _("\
3397 %s: character `%s' not defined while needed as default value"),
3400 else if (seq_from
->nbytes
!= 1)
3403 WITH_CUR_LOCALE (error (0, 0, _("\
3404 %s: character `%s' needed as default value not representable with one byte"),
3409 /* This conversion is implementation defined. */
3410 tmp
[1] = (char) (ch
+ ('A' - 'a'));
3411 seq_to
= charmap_find_value (charmap
, &tmp
[1], 1);
3415 sprintf (buf
, "U%08X", ch
+ ('A' - 'a'));
3416 seq_to
= charmap_find_value (charmap
, buf
, 9);
3421 WITH_CUR_LOCALE (error (0, 0, _("\
3422 %s: character `%s' not defined while needed as default value"),
3425 else if (seq_to
->nbytes
!= 1)
3428 WITH_CUR_LOCALE (error (0, 0, _("\
3429 %s: character `%s' needed as default value not representable with one byte"),
3433 /* The index [0] is determined by the order of the
3434 `ctype_map_newP' calls in `ctype_startup'. */
3435 ctype
->map256_collection
[0][seq_from
->bytes
[0]]
3439 /* No need to search. */
3440 ELEM (ctype
, map_collection
, [0], ch
) = ch
+ ('A' - 'a');
3444 if (ctype
->tomap_done
[1] == 0)
3445 /* "If this keyword [tolower] is not specified, the mapping shall be
3446 the reverse mapping of the one specified to `toupper'." [P1003.2] */
3448 for (cnt
= 0; cnt
< ctype
->map_collection_act
[0]; ++cnt
)
3449 if (ctype
->map_collection
[0][cnt
] != 0)
3450 ELEM (ctype
, map_collection
, [1],
3451 ctype
->map_collection
[0][cnt
])
3452 = ctype
->charnames
[cnt
];
3454 for (cnt
= 0; cnt
< 256; ++cnt
)
3455 if (ctype
->map256_collection
[0][cnt
] != 0)
3456 ctype
->map256_collection
[1][ctype
->map256_collection
[0][cnt
]] = cnt
;
3459 if (ctype
->outdigits_act
!= 10)
3461 if (ctype
->outdigits_act
!= 0)
3462 WITH_CUR_LOCALE (error (0, 0, _("\
3463 %s: field `%s' does not contain exactly ten entries"),
3464 "LC_CTYPE", "outdigit"));
3466 for (cnt
= ctype
->outdigits_act
; cnt
< 10; ++cnt
)
3468 ctype
->mboutdigits
[cnt
] = charmap_find_symbol (charmap
,
3471 if (ctype
->mboutdigits
[cnt
] == NULL
)
3472 ctype
->mboutdigits
[cnt
] = charmap_find_symbol (charmap
,
3474 strlen (longnames
[cnt
]));
3476 if (ctype
->mboutdigits
[cnt
] == NULL
)
3477 ctype
->mboutdigits
[cnt
] = charmap_find_symbol (charmap
,
3480 if (ctype
->mboutdigits
[cnt
] == NULL
)
3482 /* Provide a replacement. */
3483 WITH_CUR_LOCALE (error (0, 0, _("\
3484 no output digits defined and none of the standard names in the charmap")));
3486 ctype
->mboutdigits
[cnt
] = obstack_alloc (&((struct charmap_t
*) charmap
)->mem_pool
,
3487 sizeof (struct charseq
)
3490 /* This is better than nothing. */
3491 ctype
->mboutdigits
[cnt
]->bytes
[0] = digits
[cnt
];
3492 ctype
->mboutdigits
[cnt
]->nbytes
= 1;
3495 ctype
->wcoutdigits
[cnt
] = L
'0' + cnt
;
3498 ctype
->outdigits_act
= 10;
3503 /* Construction of sparse 3-level tables.
3504 See wchar-lookup.h for their structure and the meaning of p and q. */
3511 /* Working representation. */
3512 size_t level1_alloc
;
3515 size_t level2_alloc
;
3518 size_t level3_alloc
;
3521 /* Compressed representation. */
3526 /* Initialize. Assumes t->p and t->q have already been set. */
3528 wctype_table_init (struct wctype_table
*t
)
3531 t
->level1_alloc
= t
->level1_size
= 0;
3533 t
->level2_alloc
= t
->level2_size
= 0;
3535 t
->level3_alloc
= t
->level3_size
= 0;
3538 /* Retrieve an entry. */
3540 wctype_table_get (struct wctype_table
*t
, uint32_t wc
)
3542 uint32_t index1
= wc
>> (t
->q
+ t
->p
+ 5);
3543 if (index1
< t
->level1_size
)
3545 uint32_t lookup1
= t
->level1
[index1
];
3546 if (lookup1
!= EMPTY
)
3548 uint32_t index2
= ((wc
>> (t
->p
+ 5)) & ((1 << t
->q
) - 1))
3549 + (lookup1
<< t
->q
);
3550 uint32_t lookup2
= t
->level2
[index2
];
3551 if (lookup2
!= EMPTY
)
3553 uint32_t index3
= ((wc
>> 5) & ((1 << t
->p
) - 1))
3554 + (lookup2
<< t
->p
);
3555 uint32_t lookup3
= t
->level3
[index3
];
3556 uint32_t index4
= wc
& 0x1f;
3558 return (lookup3
>> index4
) & 1;
3565 /* Add one entry. */
3567 wctype_table_add (struct wctype_table
*t
, uint32_t wc
)
3569 uint32_t index1
= wc
>> (t
->q
+ t
->p
+ 5);
3570 uint32_t index2
= (wc
>> (t
->p
+ 5)) & ((1 << t
->q
) - 1);
3571 uint32_t index3
= (wc
>> 5) & ((1 << t
->p
) - 1);
3572 uint32_t index4
= wc
& 0x1f;
3575 if (index1
>= t
->level1_size
)
3577 if (index1
>= t
->level1_alloc
)
3579 size_t alloc
= 2 * t
->level1_alloc
;
3580 if (alloc
<= index1
)
3582 t
->level1
= (uint32_t *) xrealloc ((char *) t
->level1
,
3583 alloc
* sizeof (uint32_t));
3584 t
->level1_alloc
= alloc
;
3586 while (index1
>= t
->level1_size
)
3587 t
->level1
[t
->level1_size
++] = EMPTY
;
3590 if (t
->level1
[index1
] == EMPTY
)
3592 if (t
->level2_size
== t
->level2_alloc
)
3594 size_t alloc
= 2 * t
->level2_alloc
+ 1;
3595 t
->level2
= (uint32_t *) xrealloc ((char *) t
->level2
,
3596 (alloc
<< t
->q
) * sizeof (uint32_t));
3597 t
->level2_alloc
= alloc
;
3599 i1
= t
->level2_size
<< t
->q
;
3600 i2
= (t
->level2_size
+ 1) << t
->q
;
3601 for (i
= i1
; i
< i2
; i
++)
3602 t
->level2
[i
] = EMPTY
;
3603 t
->level1
[index1
] = t
->level2_size
++;
3606 index2
+= t
->level1
[index1
] << t
->q
;
3608 if (t
->level2
[index2
] == EMPTY
)
3610 if (t
->level3_size
== t
->level3_alloc
)
3612 size_t alloc
= 2 * t
->level3_alloc
+ 1;
3613 t
->level3
= (uint32_t *) xrealloc ((char *) t
->level3
,
3614 (alloc
<< t
->p
) * sizeof (uint32_t));
3615 t
->level3_alloc
= alloc
;
3617 i1
= t
->level3_size
<< t
->p
;
3618 i2
= (t
->level3_size
+ 1) << t
->p
;
3619 for (i
= i1
; i
< i2
; i
++)
3621 t
->level2
[index2
] = t
->level3_size
++;
3624 index3
+= t
->level2
[index2
] << t
->p
;
3626 t
->level3
[index3
] |= (uint32_t)1 << index4
;
3629 /* Finalize and shrink. */
3631 wctype_table_finalize (struct wctype_table
*t
)
3634 uint32_t reorder3
[t
->level3_size
];
3635 uint32_t reorder2
[t
->level2_size
];
3636 uint32_t level1_offset
, level2_offset
, level3_offset
;
3638 /* Uniquify level3 blocks. */
3640 for (j
= 0; j
< t
->level3_size
; j
++)
3642 for (i
= 0; i
< k
; i
++)
3643 if (memcmp (&t
->level3
[i
<< t
->p
], &t
->level3
[j
<< t
->p
],
3644 (1 << t
->p
) * sizeof (uint32_t)) == 0)
3646 /* Relocate block j to block i. */
3651 memcpy (&t
->level3
[i
<< t
->p
], &t
->level3
[j
<< t
->p
],
3652 (1 << t
->p
) * sizeof (uint32_t));
3658 for (i
= 0; i
< (t
->level2_size
<< t
->q
); i
++)
3659 if (t
->level2
[i
] != EMPTY
)
3660 t
->level2
[i
] = reorder3
[t
->level2
[i
]];
3662 /* Uniquify level2 blocks. */
3664 for (j
= 0; j
< t
->level2_size
; j
++)
3666 for (i
= 0; i
< k
; i
++)
3667 if (memcmp (&t
->level2
[i
<< t
->q
], &t
->level2
[j
<< t
->q
],
3668 (1 << t
->q
) * sizeof (uint32_t)) == 0)
3670 /* Relocate block j to block i. */
3675 memcpy (&t
->level2
[i
<< t
->q
], &t
->level2
[j
<< t
->q
],
3676 (1 << t
->q
) * sizeof (uint32_t));
3682 for (i
= 0; i
< t
->level1_size
; i
++)
3683 if (t
->level1
[i
] != EMPTY
)
3684 t
->level1
[i
] = reorder2
[t
->level1
[i
]];
3686 /* Create and fill the resulting compressed representation. */
3688 5 * sizeof (uint32_t)
3689 + t
->level1_size
* sizeof (uint32_t)
3690 + (t
->level2_size
<< t
->q
) * sizeof (uint32_t)
3691 + (t
->level3_size
<< t
->p
) * sizeof (uint32_t);
3692 t
->result
= (char *) xmalloc (t
->result_size
);
3695 5 * sizeof (uint32_t);
3697 5 * sizeof (uint32_t)
3698 + t
->level1_size
* sizeof (uint32_t);
3700 5 * sizeof (uint32_t)
3701 + t
->level1_size
* sizeof (uint32_t)
3702 + (t
->level2_size
<< t
->q
) * sizeof (uint32_t);
3704 ((uint32_t *) t
->result
)[0] = t
->q
+ t
->p
+ 5;
3705 ((uint32_t *) t
->result
)[1] = t
->level1_size
;
3706 ((uint32_t *) t
->result
)[2] = t
->p
+ 5;
3707 ((uint32_t *) t
->result
)[3] = (1 << t
->q
) - 1;
3708 ((uint32_t *) t
->result
)[4] = (1 << t
->p
) - 1;
3710 for (i
= 0; i
< t
->level1_size
; i
++)
3711 ((uint32_t *) (t
->result
+ level1_offset
))[i
] =
3712 (t
->level1
[i
] == EMPTY
3714 : (t
->level1
[i
] << t
->q
) * sizeof (uint32_t) + level2_offset
);
3716 for (i
= 0; i
< (t
->level2_size
<< t
->q
); i
++)
3717 ((uint32_t *) (t
->result
+ level2_offset
))[i
] =
3718 (t
->level2
[i
] == EMPTY
3720 : (t
->level2
[i
] << t
->p
) * sizeof (uint32_t) + level3_offset
);
3722 for (i
= 0; i
< (t
->level3_size
<< t
->p
); i
++)
3723 ((uint32_t *) (t
->result
+ level3_offset
))[i
] = t
->level3
[i
];
3725 if (t
->level1_alloc
> 0)
3727 if (t
->level2_alloc
> 0)
3729 if (t
->level3_alloc
> 0)
3733 #define TABLE wcwidth_table
3734 #define ELEMENT uint8_t
3735 #define DEFAULT 0xff
3738 #define TABLE wctrans_table
3739 #define ELEMENT int32_t
3741 #define wctrans_table_add wctrans_table_add_internal
3743 #undef wctrans_table_add
3744 /* The wctrans_table must actually store the difference between the
3745 desired result and the argument. */
3747 wctrans_table_add (struct wctrans_table
*t
, uint32_t wc
, uint32_t mapped_wc
)
3749 wctrans_table_add_internal (t
, wc
, mapped_wc
- wc
);
3753 /* Flattens the included transliterations into a translit list.
3754 Inserts them in the list at `cursor', and returns the new cursor. */
3755 static struct translit_t
**
3756 translit_flatten (struct locale_ctype_t
*ctype
,
3757 const struct charmap_t
*charmap
,
3758 struct translit_t
**cursor
)
3760 while (ctype
->translit_include
!= NULL
)
3762 const char *copy_locale
= ctype
->translit_include
->copy_locale
;
3763 const char *copy_repertoire
= ctype
->translit_include
->copy_repertoire
;
3764 struct localedef_t
*other
;
3766 /* Unchain the include statement. During the depth-first traversal
3767 we don't want to visit any locale more than once. */
3768 ctype
->translit_include
= ctype
->translit_include
->next
;
3770 other
= find_locale (LC_CTYPE
, copy_locale
, copy_repertoire
, charmap
);
3772 if (other
== NULL
|| other
->categories
[LC_CTYPE
].ctype
== NULL
)
3774 WITH_CUR_LOCALE (error (0, 0, _("\
3775 %s: transliteration data from locale `%s' not available"),
3776 "LC_CTYPE", copy_locale
));
3780 struct locale_ctype_t
*other_ctype
=
3781 other
->categories
[LC_CTYPE
].ctype
;
3783 cursor
= translit_flatten (other_ctype
, charmap
, cursor
);
3784 assert (other_ctype
->translit_include
== NULL
);
3786 if (other_ctype
->translit
!= NULL
)
3788 /* Insert the other_ctype->translit list at *cursor. */
3789 struct translit_t
*endp
= other_ctype
->translit
;
3790 while (endp
->next
!= NULL
)
3793 endp
->next
= *cursor
;
3794 *cursor
= other_ctype
->translit
;
3796 /* Avoid any risk of circular lists. */
3797 other_ctype
->translit
= NULL
;
3799 cursor
= &endp
->next
;
3802 if (ctype
->default_missing
== NULL
)
3803 ctype
->default_missing
= other_ctype
->default_missing
;
3811 allocate_arrays (struct locale_ctype_t
*ctype
, const struct charmap_t
*charmap
,
3812 struct repertoire_t
*repertoire
)
3820 /* You wonder about this amount of memory? This is only because some
3821 users do not manage to address the array with unsigned values or
3822 data types with range >= 256. '\200' would result in the array
3823 index -128. To help these poor people we duplicate the entries for
3824 128 up to 255 below the entry for \0. */
3825 ctype
->ctype_b
= (char_class_t
*) xcalloc (256 + 128, sizeof (char_class_t
));
3826 ctype
->ctype32_b
= (char_class32_t
*) xcalloc (256, sizeof (char_class32_t
));
3827 ctype
->class_b
= (uint32_t **)
3828 xmalloc (ctype
->nr_charclass
* sizeof (uint32_t *));
3829 ctype
->class_3level
= (struct iovec
*)
3830 xmalloc (ctype
->nr_charclass
* sizeof (struct iovec
));
3832 /* This is the array accessed using the multibyte string elements. */
3833 for (idx
= 0; idx
< 256; ++idx
)
3834 ctype
->ctype_b
[128 + idx
] = ctype
->class256_collection
[idx
];
3836 /* Mirror first 127 entries. We must take care that entry -1 is not
3837 mirrored because EOF == -1. */
3838 for (idx
= 0; idx
< 127; ++idx
)
3839 ctype
->ctype_b
[idx
] = ctype
->ctype_b
[256 + idx
];
3841 /* The 32 bit array contains all characters < 0x100. */
3842 for (idx
= 0; idx
< ctype
->class_collection_act
; ++idx
)
3843 if (ctype
->charnames
[idx
] < 0x100)
3844 ctype
->ctype32_b
[ctype
->charnames
[idx
]] = ctype
->class_collection
[idx
];
3846 for (nr
= 0; nr
< ctype
->nr_charclass
; nr
++)
3848 ctype
->class_b
[nr
] = (uint32_t *) xcalloc (256 / 32, sizeof (uint32_t));
3850 /* We only set CLASS_B for the bits in the ISO C classes, not
3851 the user defined classes. The number should not change but
3853 #define LAST_ISO_C_BIT 11
3854 if (nr
<= LAST_ISO_C_BIT
)
3855 for (idx
= 0; idx
< 256; ++idx
)
3856 if (ctype
->class256_collection
[idx
] & _ISbit (nr
))
3857 ctype
->class_b
[nr
][idx
>> 5] |= (uint32_t) 1 << (idx
& 0x1f);
3860 for (nr
= 0; nr
< ctype
->nr_charclass
; nr
++)
3862 struct wctype_table t
;
3864 t
.p
= 4; /* or: 5 */
3865 t
.q
= 7; /* or: 6 */
3866 wctype_table_init (&t
);
3868 for (idx
= 0; idx
< ctype
->class_collection_act
; ++idx
)
3869 if (ctype
->class_collection
[idx
] & _ISwbit (nr
))
3870 wctype_table_add (&t
, ctype
->charnames
[idx
]);
3872 wctype_table_finalize (&t
);
3875 WITH_CUR_LOCALE (fprintf (stderr
, _("\
3876 %s: table for class \"%s\": %lu bytes\n"),
3877 "LC_CTYPE", ctype
->classnames
[nr
],
3878 (unsigned long int) t
.result_size
));
3880 ctype
->class_3level
[nr
].iov_base
= t
.result
;
3881 ctype
->class_3level
[nr
].iov_len
= t
.result_size
;
3884 /* Room for table of mappings. */
3885 ctype
->map_b
= (uint32_t **) xmalloc (2 * sizeof (uint32_t *));
3886 ctype
->map32_b
= (uint32_t **) xmalloc (ctype
->map_collection_nr
3887 * sizeof (uint32_t *));
3888 ctype
->map_3level
= (struct iovec
*)
3889 xmalloc (ctype
->map_collection_nr
* sizeof (struct iovec
));
3891 /* Fill in all mappings. */
3892 for (idx
= 0; idx
< 2; ++idx
)
3896 /* Allocate table. */
3897 ctype
->map_b
[idx
] = (uint32_t *)
3898 xmalloc ((256 + 128) * sizeof (uint32_t));
3900 /* Copy values from collection. */
3901 for (idx2
= 0; idx2
< 256; ++idx2
)
3902 ctype
->map_b
[idx
][128 + idx2
] = ctype
->map256_collection
[idx
][idx2
];
3904 /* Mirror first 127 entries. We must take care not to map entry
3905 -1 because EOF == -1. */
3906 for (idx2
= 0; idx2
< 127; ++idx2
)
3907 ctype
->map_b
[idx
][idx2
] = ctype
->map_b
[idx
][256 + idx2
];
3909 /* EOF must map to EOF. */
3910 ctype
->map_b
[idx
][127] = EOF
;
3913 for (idx
= 0; idx
< ctype
->map_collection_nr
; ++idx
)
3917 /* Allocate table. */
3918 ctype
->map32_b
[idx
] = (uint32_t *) xmalloc (256 * sizeof (uint32_t));
3920 /* Copy values from collection. Default is identity mapping. */
3921 for (idx2
= 0; idx2
< 256; ++idx2
)
3922 ctype
->map32_b
[idx
][idx2
] =
3923 (ctype
->map_collection
[idx
][idx2
] != 0
3924 ? ctype
->map_collection
[idx
][idx2
]
3928 for (nr
= 0; nr
< ctype
->map_collection_nr
; nr
++)
3930 struct wctrans_table t
;
3934 wctrans_table_init (&t
);
3936 for (idx
= 0; idx
< ctype
->map_collection_act
[nr
]; ++idx
)
3937 if (ctype
->map_collection
[nr
][idx
] != 0)
3938 wctrans_table_add (&t
, ctype
->charnames
[idx
],
3939 ctype
->map_collection
[nr
][idx
]);
3941 wctrans_table_finalize (&t
);
3944 WITH_CUR_LOCALE (fprintf (stderr
, _("\
3945 %s: table for map \"%s\": %lu bytes\n"),
3946 "LC_CTYPE", ctype
->mapnames
[nr
],
3947 (unsigned long int) t
.result_size
));
3949 ctype
->map_3level
[nr
].iov_base
= t
.result
;
3950 ctype
->map_3level
[nr
].iov_len
= t
.result_size
;
3953 /* Extra array for class and map names. */
3954 ctype
->class_name_ptr
= (uint32_t *) xmalloc (ctype
->nr_charclass
3955 * sizeof (uint32_t));
3956 ctype
->map_name_ptr
= (uint32_t *) xmalloc (ctype
->map_collection_nr
3957 * sizeof (uint32_t));
3959 ctype
->class_offset
= _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1
);
3960 ctype
->map_offset
= ctype
->class_offset
+ ctype
->nr_charclass
;
3962 /* Array for width information. Because the expected widths are very
3963 small (never larger than 2) we use only one single byte. This
3965 We put only printable characters in the table. wcwidth is specified
3966 to return -1 for non-printable characters. Doing the check here
3967 saves a run-time check.
3968 But we put L'\0' in the table. This again saves a run-time check. */
3970 struct wcwidth_table t
;
3974 wcwidth_table_init (&t
);
3976 /* First set all the printable characters of the character set to
3977 the default width. */
3979 while (iterate_table (&charmap
->char_table
, &curs
, &key
, &len
, &vdata
) == 0)
3981 struct charseq
*data
= (struct charseq
*) vdata
;
3983 if (data
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
3984 data
->ucs4
= repertoire_find_value (ctype
->repertoire
,
3987 if (data
->ucs4
!= ILLEGAL_CHAR_VALUE
)
3989 uint32_t *class_bits
=
3990 find_idx (ctype
, &ctype
->class_collection
, NULL
,
3991 &ctype
->class_collection_act
, data
->ucs4
);
3993 if (class_bits
!= NULL
&& (*class_bits
& BITw (tok_print
)))
3994 wcwidth_table_add (&t
, data
->ucs4
, charmap
->width_default
);
3998 /* Now add the explicitly specified widths. */
3999 if (charmap
->width_rules
!= NULL
)
4003 for (cnt
= 0; cnt
< charmap
->nwidth_rules
; ++cnt
)
4005 unsigned char bytes
[charmap
->mb_cur_max
];
4006 int nbytes
= charmap
->width_rules
[cnt
].from
->nbytes
;
4008 /* We have the range of character for which the width is
4009 specified described using byte sequences of the multibyte
4010 charset. We have to convert this to UCS4 now. And we
4011 cannot simply convert the beginning and the end of the
4012 sequence, we have to iterate over the byte sequence and
4013 convert it for every single character. */
4014 memcpy (bytes
, charmap
->width_rules
[cnt
].from
->bytes
, nbytes
);
4016 while (nbytes
< charmap
->width_rules
[cnt
].to
->nbytes
4017 || memcmp (bytes
, charmap
->width_rules
[cnt
].to
->bytes
,
4020 /* Find the UCS value for `bytes'. */
4023 struct charseq
*seq
=
4024 charmap_find_symbol (charmap
, bytes
, nbytes
);
4027 wch
= ILLEGAL_CHAR_VALUE
;
4028 else if (seq
->ucs4
!= UNINITIALIZED_CHAR_VALUE
)
4031 wch
= repertoire_find_value (ctype
->repertoire
, seq
->name
,
4032 strlen (seq
->name
));
4034 if (wch
!= ILLEGAL_CHAR_VALUE
)
4036 /* Store the value. */
4037 uint32_t *class_bits
=
4038 find_idx (ctype
, &ctype
->class_collection
, NULL
,
4039 &ctype
->class_collection_act
, wch
);
4041 if (class_bits
!= NULL
&& (*class_bits
& BITw (tok_print
)))
4042 wcwidth_table_add (&t
, wch
,
4043 charmap
->width_rules
[cnt
].width
);
4046 /* "Increment" the bytes sequence. */
4048 while (inner
>= 0 && bytes
[inner
] == 0xff)
4053 /* We have to extend the byte sequence. */
4054 if (nbytes
>= charmap
->width_rules
[cnt
].to
->nbytes
)
4058 memset (&bytes
[1], 0, nbytes
);
4064 while (++inner
< nbytes
)
4071 /* Set the width of L'\0' to 0. */
4072 wcwidth_table_add (&t
, 0, 0);
4074 wcwidth_table_finalize (&t
);
4077 WITH_CUR_LOCALE (fprintf (stderr
, _("%s: table for width: %lu bytes\n"),
4078 "LC_CTYPE", (unsigned long int) t
.result_size
));
4080 ctype
->width
.iov_base
= t
.result
;
4081 ctype
->width
.iov_len
= t
.result_size
;
4084 /* Set MB_CUR_MAX. */
4085 ctype
->mb_cur_max
= charmap
->mb_cur_max
;
4087 /* Now determine the table for the transliteration information.
4089 XXX It is not yet clear to me whether it is worth implementing a
4090 complicated algorithm which uses a hash table to locate the entries.
4091 For now I'll use a simple array which can be searching using binary
4093 if (ctype
->translit_include
!= NULL
)
4094 /* Traverse the locales mentioned in the `include' statements in a
4095 depth-first way and fold in their transliteration information. */
4096 translit_flatten (ctype
, charmap
, &ctype
->translit
);
4098 if (ctype
->translit
!= NULL
)
4100 /* First count how many entries we have. This is the upper limit
4101 since some entries from the included files might be overwritten. */
4104 struct translit_t
*runp
= ctype
->translit
;
4105 struct translit_t
**sorted
;
4106 size_t from_len
, to_len
;
4108 while (runp
!= NULL
)
4114 /* Next we allocate an array large enough and fill in the values. */
4115 sorted
= (struct translit_t
**) alloca (number
4116 * sizeof (struct translit_t
**));
4117 runp
= ctype
->translit
;
4121 /* Search for the place where to insert this string.
4122 XXX Better use a real sorting algorithm later. */
4126 while (idx
< number
)
4128 int res
= wcscmp ((const wchar_t *) sorted
[idx
]->from
,
4129 (const wchar_t *) runp
->from
);
4144 memmove (&sorted
[idx
+ 1], &sorted
[idx
],
4145 (number
- idx
) * sizeof (struct translit_t
*));
4152 while (runp
!= NULL
);
4154 /* The next step is putting all the possible transliteration
4155 strings in one memory block so that we can write it out.
4156 We need several different blocks:
4157 - index to the from-string array
4159 - index to the to-string array
4162 from_len
= to_len
= 0;
4163 for (cnt
= 0; cnt
< number
; ++cnt
)
4165 struct translit_to_t
*srunp
;
4166 from_len
+= wcslen ((const wchar_t *) sorted
[cnt
]->from
) + 1;
4167 srunp
= sorted
[cnt
]->to
;
4168 while (srunp
!= NULL
)
4170 to_len
+= wcslen ((const wchar_t *) srunp
->str
) + 1;
4171 srunp
= srunp
->next
;
4173 /* Plus one for the extra NUL character marking the end of
4174 the list for the current entry. */
4178 /* We can allocate the arrays for the results. */
4179 ctype
->translit_from_idx
= xmalloc (number
* sizeof (uint32_t));
4180 ctype
->translit_from_tbl
= xmalloc (from_len
* sizeof (uint32_t));
4181 ctype
->translit_to_idx
= xmalloc (number
* sizeof (uint32_t));
4182 ctype
->translit_to_tbl
= xmalloc (to_len
* sizeof (uint32_t));
4186 for (cnt
= 0; cnt
< number
; ++cnt
)
4189 struct translit_to_t
*srunp
;
4191 ctype
->translit_from_idx
[cnt
] = from_len
;
4192 ctype
->translit_to_idx
[cnt
] = to_len
;
4194 len
= wcslen ((const wchar_t *) sorted
[cnt
]->from
) + 1;
4195 wmemcpy ((wchar_t *) &ctype
->translit_from_tbl
[from_len
],
4196 (const wchar_t *) sorted
[cnt
]->from
, len
);
4199 ctype
->translit_to_idx
[cnt
] = to_len
;
4200 srunp
= sorted
[cnt
]->to
;
4201 while (srunp
!= NULL
)
4203 len
= wcslen ((const wchar_t *) srunp
->str
) + 1;
4204 wmemcpy ((wchar_t *) &ctype
->translit_to_tbl
[to_len
],
4205 (const wchar_t *) srunp
->str
, len
);
4207 srunp
= srunp
->next
;
4209 ctype
->translit_to_tbl
[to_len
++] = L
'\0';
4212 /* Store the information about the length. */
4213 ctype
->translit_idx_size
= number
;
4214 ctype
->translit_from_tbl_size
= from_len
* sizeof (uint32_t);
4215 ctype
->translit_to_tbl_size
= to_len
* sizeof (uint32_t);
4219 /* Provide some dummy pointers since we have nothing to write out. */
4220 static uint32_t no_str
= { 0 };
4222 ctype
->translit_from_idx
= &no_str
;
4223 ctype
->translit_from_tbl
= &no_str
;
4224 ctype
->translit_to_tbl
= &no_str
;
4225 ctype
->translit_idx_size
= 0;
4226 ctype
->translit_from_tbl_size
= 0;
4227 ctype
->translit_to_tbl_size
= 0;