1 /* Copyright (C) 1995-2006, 2007 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License version 2 as
7 published by the Free Software Foundation.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
34 #include "localedef.h"
36 #include "localeinfo.h"
38 #include "linereader.h"
39 #include "locfile-token.h"
45 #ifdef PREDEFINED_CLASSES
46 /* These are the extra bits not in wctype.h since these are not preallocated
48 # define _ISwspecial1 (1 << 29)
49 # define _ISwspecial2 (1 << 30)
50 # define _ISwspecial3 (1 << 31)
54 /* The bit used for representing a special class. */
55 #define BITPOS(class) ((class) - tok_upper)
56 #define BIT(class) (_ISbit (BITPOS (class)))
57 #define BITw(class) (_ISwbit (BITPOS (class)))
59 #define ELEM(ctype, collection, idx, value) \
60 *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \
61 &ctype->collection##_act idx, value)
64 /* To be compatible with former implementations we for now restrict
65 the number of bits for character classes to 16. When compatibility
66 is not necessary anymore increase the number to 32. */
67 #define char_class_t uint16_t
68 #define char_class32_t uint32_t
71 /* Type to describe a transliteration action. We have a possibly
72 multiple character from-string and a set of multiple character
73 to-strings. All are 32bit values since this is what is used in
74 the gconv functions. */
79 struct translit_to_t
*next
;
89 struct translit_to_t
*to
;
91 struct translit_t
*next
;
94 struct translit_ignore_t
103 struct translit_ignore_t
*next
;
107 /* Type to describe a transliteration include statement. */
108 struct translit_include_t
110 const char *copy_locale
;
111 const char *copy_repertoire
;
113 struct translit_include_t
*next
;
117 /* Sparse table of uint32_t. */
118 #define TABLE idx_table
119 #define ELEMENT uint32_t
120 #define DEFAULT ((uint32_t) ~0)
125 /* The real definition of the struct for the LC_CTYPE locale. */
126 struct locale_ctype_t
129 size_t charnames_max
;
130 size_t charnames_act
;
131 /* An index lookup table, to speedup find_idx. */
132 struct idx_table charnames_idx
;
134 struct repertoire_t
*repertoire
;
136 /* We will allow up to 8 * sizeof (uint32_t) character classes. */
137 #define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
139 const char *classnames
[MAX_NR_CHARCLASS
];
140 uint32_t last_class_char
;
141 uint32_t class256_collection
[256];
142 uint32_t *class_collection
;
143 size_t class_collection_max
;
144 size_t class_collection_act
;
146 uint32_t class_offset
;
148 struct charseq
**mbdigits
;
155 struct charseq
*mboutdigits
[10];
156 uint32_t wcoutdigits
[10];
157 size_t outdigits_act
;
159 /* If the following number ever turns out to be too small simply
160 increase it. But I doubt it will. --drepper@gnu */
161 #define MAX_NR_CHARMAP 16
162 const char *mapnames
[MAX_NR_CHARMAP
];
163 uint32_t *map_collection
[MAX_NR_CHARMAP
];
164 uint32_t map256_collection
[2][256];
165 size_t map_collection_max
[MAX_NR_CHARMAP
];
166 size_t map_collection_act
[MAX_NR_CHARMAP
];
167 size_t map_collection_nr
;
169 int tomap_done
[MAX_NR_CHARMAP
];
172 /* Transliteration information. */
173 struct translit_include_t
*translit_include
;
174 struct translit_t
*translit
;
175 struct translit_ignore_t
*translit_ignore
;
176 uint32_t ntranslit_ignore
;
178 uint32_t *default_missing
;
179 const char *default_missing_file
;
180 size_t default_missing_lineno
;
182 uint32_t to_nonascii
;
184 /* The arrays for the binary representation. */
185 char_class_t
*ctype_b
;
186 char_class32_t
*ctype32_b
;
190 struct iovec
*class_3level
;
191 struct iovec
*map_3level
;
192 uint32_t *class_name_ptr
;
193 uint32_t *map_name_ptr
;
196 const char *codeset_name
;
197 uint32_t *translit_from_idx
;
198 uint32_t *translit_from_tbl
;
199 uint32_t *translit_to_idx
;
200 uint32_t *translit_to_tbl
;
201 uint32_t translit_idx_size
;
202 size_t translit_from_tbl_size
;
203 size_t translit_to_tbl_size
;
205 struct obstack mempool
;
209 /* Marker for an empty slot. This has the value 0xFFFFFFFF, regardless
210 whether 'int' is 16 bit, 32 bit, or 64 bit. */
211 #define EMPTY ((uint32_t) ~0)
214 #define obstack_chunk_alloc xmalloc
215 #define obstack_chunk_free free
218 /* Prototypes for local functions. */
219 static void ctype_startup (struct linereader
*lr
, struct localedef_t
*locale
,
220 const struct charmap_t
*charmap
,
221 struct localedef_t
*copy_locale
,
223 static void ctype_class_new (struct linereader
*lr
,
224 struct locale_ctype_t
*ctype
, const char *name
);
225 static void ctype_map_new (struct linereader
*lr
,
226 struct locale_ctype_t
*ctype
,
227 const char *name
, const struct charmap_t
*charmap
);
228 static uint32_t *find_idx (struct locale_ctype_t
*ctype
, uint32_t **table
,
229 size_t *max
, size_t *act
, unsigned int idx
);
230 static void set_class_defaults (struct locale_ctype_t
*ctype
,
231 const struct charmap_t
*charmap
,
232 struct repertoire_t
*repertoire
);
233 static void allocate_arrays (struct locale_ctype_t
*ctype
,
234 const struct charmap_t
*charmap
,
235 struct repertoire_t
*repertoire
);
238 static const char *longnames
[] =
240 "zero", "one", "two", "three", "four",
241 "five", "six", "seven", "eight", "nine"
243 static const char *uninames
[] =
245 "U00000030", "U00000031", "U00000032", "U00000033", "U00000034",
246 "U00000035", "U00000036", "U00000037", "U00000038", "U00000039"
248 static const unsigned char digits
[] = "0123456789";
252 ctype_startup (struct linereader
*lr
, struct localedef_t
*locale
,
253 const struct charmap_t
*charmap
,
254 struct localedef_t
*copy_locale
, int ignore_content
)
257 struct locale_ctype_t
*ctype
;
259 if (!ignore_content
&& locale
->categories
[LC_CTYPE
].ctype
== NULL
)
261 if (copy_locale
== NULL
)
263 /* Allocate the needed room. */
264 locale
->categories
[LC_CTYPE
].ctype
= ctype
=
265 (struct locale_ctype_t
*) xcalloc (1,
266 sizeof (struct locale_ctype_t
));
268 /* We have seen no names yet. */
269 ctype
->charnames_max
= charmap
->mb_cur_max
== 1 ? 256 : 512;
271 (unsigned int *) xmalloc (ctype
->charnames_max
272 * sizeof (unsigned int));
273 for (cnt
= 0; cnt
< 256; ++cnt
)
274 ctype
->charnames
[cnt
] = cnt
;
275 ctype
->charnames_act
= 256;
276 idx_table_init (&ctype
->charnames_idx
);
278 /* Fill character class information. */
279 ctype
->last_class_char
= ILLEGAL_CHAR_VALUE
;
280 /* The order of the following instructions determines the bit
282 ctype_class_new (lr
, ctype
, "upper");
283 ctype_class_new (lr
, ctype
, "lower");
284 ctype_class_new (lr
, ctype
, "alpha");
285 ctype_class_new (lr
, ctype
, "digit");
286 ctype_class_new (lr
, ctype
, "xdigit");
287 ctype_class_new (lr
, ctype
, "space");
288 ctype_class_new (lr
, ctype
, "print");
289 ctype_class_new (lr
, ctype
, "graph");
290 ctype_class_new (lr
, ctype
, "blank");
291 ctype_class_new (lr
, ctype
, "cntrl");
292 ctype_class_new (lr
, ctype
, "punct");
293 ctype_class_new (lr
, ctype
, "alnum");
294 #ifdef PREDEFINED_CLASSES
295 /* The following are extensions from ISO 14652. */
296 ctype_class_new (lr
, ctype
, "left_to_right");
297 ctype_class_new (lr
, ctype
, "right_to_left");
298 ctype_class_new (lr
, ctype
, "num_terminator");
299 ctype_class_new (lr
, ctype
, "num_separator");
300 ctype_class_new (lr
, ctype
, "segment_separator");
301 ctype_class_new (lr
, ctype
, "block_separator");
302 ctype_class_new (lr
, ctype
, "direction_control");
303 ctype_class_new (lr
, ctype
, "sym_swap_layout");
304 ctype_class_new (lr
, ctype
, "char_shape_selector");
305 ctype_class_new (lr
, ctype
, "num_shape_selector");
306 ctype_class_new (lr
, ctype
, "non_spacing");
307 ctype_class_new (lr
, ctype
, "non_spacing_level3");
308 ctype_class_new (lr
, ctype
, "normal_connect");
309 ctype_class_new (lr
, ctype
, "r_connect");
310 ctype_class_new (lr
, ctype
, "no_connect");
311 ctype_class_new (lr
, ctype
, "no_connect-space");
312 ctype_class_new (lr
, ctype
, "vowel_connect");
315 ctype
->class_collection_max
= charmap
->mb_cur_max
== 1 ? 256 : 512;
316 ctype
->class_collection
317 = (uint32_t *) xcalloc (sizeof (unsigned long int),
318 ctype
->class_collection_max
);
319 ctype
->class_collection_act
= 256;
321 /* Fill character map information. */
322 ctype
->last_map_idx
= MAX_NR_CHARMAP
;
323 ctype_map_new (lr
, ctype
, "toupper", charmap
);
324 ctype_map_new (lr
, ctype
, "tolower", charmap
);
325 #ifdef PREDEFINED_CLASSES
326 ctype_map_new (lr
, ctype
, "tosymmetric", charmap
);
329 /* Fill first 256 entries in `toXXX' arrays. */
330 for (cnt
= 0; cnt
< 256; ++cnt
)
332 ctype
->map_collection
[0][cnt
] = cnt
;
333 ctype
->map_collection
[1][cnt
] = cnt
;
334 #ifdef PREDEFINED_CLASSES
335 ctype
->map_collection
[2][cnt
] = cnt
;
337 ctype
->map256_collection
[0][cnt
] = cnt
;
338 ctype
->map256_collection
[1][cnt
] = cnt
;
341 if (enc_not_ascii_compatible
)
342 ctype
->to_nonascii
= 1;
344 obstack_init (&ctype
->mempool
);
347 ctype
= locale
->categories
[LC_CTYPE
].ctype
=
348 copy_locale
->categories
[LC_CTYPE
].ctype
;
354 ctype_finish (struct localedef_t
*locale
, const struct charmap_t
*charmap
)
356 /* See POSIX.2, table 2-6 for the meaning of the following table. */
361 const char allow
[NCLASS
];
363 valid_table
[NCLASS
] =
365 /* The order is important. See token.h for more information.
366 M = Always, D = Default, - = Permitted, X = Mutually exclusive */
367 { "upper", "--MX-XDDXXX-" },
368 { "lower", "--MX-XDDXXX-" },
369 { "alpha", "---X-XDDXXX-" },
370 { "digit", "XXX--XDDXXX-" },
371 { "xdigit", "-----XDDXXX-" },
372 { "space", "XXXXX------X" },
373 { "print", "---------X--" },
374 { "graph", "---------X--" },
375 { "blank", "XXXXXM-----X" },
376 { "cntrl", "XXXXX-XX--XX" },
377 { "punct", "XXXXX-DD-X-X" },
378 { "alnum", "-----XDDXXX-" }
382 uint32_t space_value
;
383 struct charseq
*space_seq
;
384 struct locale_ctype_t
*ctype
= locale
->categories
[LC_CTYPE
].ctype
;
391 /* Now resolve copying and also handle completely missing definitions. */
394 const char *repertoire_name
;
396 /* First see whether we were supposed to copy. If yes, find the
397 actual definition. */
398 if (locale
->copy_name
[LC_CTYPE
] != NULL
)
400 /* Find the copying locale. This has to happen transitively since
401 the locale we are copying from might also copying another one. */
402 struct localedef_t
*from
= locale
;
405 from
= find_locale (LC_CTYPE
, from
->copy_name
[LC_CTYPE
],
406 from
->repertoire_name
, charmap
);
407 while (from
->categories
[LC_CTYPE
].ctype
== NULL
408 && from
->copy_name
[LC_CTYPE
] != NULL
);
410 ctype
= locale
->categories
[LC_CTYPE
].ctype
411 = from
->categories
[LC_CTYPE
].ctype
;
414 /* If there is still no definition issue an warning and create an
419 WITH_CUR_LOCALE (error (0, 0, _("\
420 No definition for %s category found"), "LC_CTYPE"));
421 ctype_startup (NULL
, locale
, charmap
, NULL
, 0);
422 ctype
= locale
->categories
[LC_CTYPE
].ctype
;
425 /* Get the repertoire we have to use. */
426 repertoire_name
= locale
->repertoire_name
?: repertoire_global
;
427 if (repertoire_name
!= NULL
)
428 ctype
->repertoire
= repertoire_read (repertoire_name
);
431 /* We need the name of the currently used 8-bit character set to
432 make correct conversion between this 8-bit representation and the
433 ISO 10646 character set used internally for wide characters. */
434 ctype
->codeset_name
= charmap
->code_set_name
;
435 if (ctype
->codeset_name
== NULL
)
438 WITH_CUR_LOCALE (error (0, 0, _("\
439 No character set name specified in charmap")));
440 ctype
->codeset_name
= "//UNKNOWN//";
443 /* Set default value for classes not specified. */
444 set_class_defaults (ctype
, charmap
, ctype
->repertoire
);
446 /* Check according to table. */
447 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
449 uint32_t tmp
= ctype
->class_collection
[cnt
];
453 for (cls1
= 0; cls1
< NCLASS
; ++cls1
)
454 if ((tmp
& _ISwbit (cls1
)) != 0)
455 for (cls2
= 0; cls2
< NCLASS
; ++cls2
)
456 if (valid_table
[cls1
].allow
[cls2
] != '-')
458 int eq
= (tmp
& _ISwbit (cls2
)) != 0;
459 switch (valid_table
[cls1
].allow
[cls2
])
464 uint32_t value
= ctype
->charnames
[cnt
];
467 WITH_CUR_LOCALE (error (0, 0, _("\
468 character L'\\u%0*x' in class `%s' must be in class `%s'"),
469 value
> 0xffff ? 8 : 4,
471 valid_table
[cls1
].name
,
472 valid_table
[cls2
].name
));
479 uint32_t value
= ctype
->charnames
[cnt
];
482 WITH_CUR_LOCALE (error (0, 0, _("\
483 character L'\\u%0*x' in class `%s' must not be in class `%s'"),
484 value
> 0xffff ? 8 : 4,
486 valid_table
[cls1
].name
,
487 valid_table
[cls2
].name
));
492 ctype
->class_collection
[cnt
] |= _ISwbit (cls2
);
496 WITH_CUR_LOCALE (error (5, 0, _("\
497 internal error in %s, line %u"), __FUNCTION__
, __LINE__
));
503 for (cnt
= 0; cnt
< 256; ++cnt
)
505 uint32_t tmp
= ctype
->class256_collection
[cnt
];
509 for (cls1
= 0; cls1
< NCLASS
; ++cls1
)
510 if ((tmp
& _ISbit (cls1
)) != 0)
511 for (cls2
= 0; cls2
< NCLASS
; ++cls2
)
512 if (valid_table
[cls1
].allow
[cls2
] != '-')
514 int eq
= (tmp
& _ISbit (cls2
)) != 0;
515 switch (valid_table
[cls1
].allow
[cls2
])
522 snprintf (buf
, sizeof buf
, "\\%Zo", cnt
);
525 WITH_CUR_LOCALE (error (0, 0, _("\
526 character '%s' in class `%s' must be in class `%s'"),
528 valid_table
[cls1
].name
,
529 valid_table
[cls2
].name
));
538 snprintf (buf
, sizeof buf
, "\\%Zo", cnt
);
541 WITH_CUR_LOCALE (error (0, 0, _("\
542 character '%s' in class `%s' must not be in class `%s'"),
544 valid_table
[cls1
].name
,
545 valid_table
[cls2
].name
));
550 ctype
->class256_collection
[cnt
] |= _ISbit (cls2
);
554 WITH_CUR_LOCALE (error (5, 0, _("\
555 internal error in %s, line %u"), __FUNCTION__
, __LINE__
));
561 /* ... and now test <SP> as a special case. */
563 if (((cnt
= BITPOS (tok_space
),
564 (ELEM (ctype
, class_collection
, , space_value
)
565 & BITw (tok_space
)) == 0)
566 || (cnt
= BITPOS (tok_blank
),
567 (ELEM (ctype
, class_collection
, , space_value
)
568 & BITw (tok_blank
)) == 0)))
571 WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"),
572 valid_table
[cnt
].name
));
574 else if (((cnt
= BITPOS (tok_punct
),
575 (ELEM (ctype
, class_collection
, , space_value
)
576 & BITw (tok_punct
)) != 0)
577 || (cnt
= BITPOS (tok_graph
),
578 (ELEM (ctype
, class_collection
, , space_value
)
583 WITH_CUR_LOCALE (error (0, 0, _("\
584 <SP> character must not be in class `%s'"),
585 valid_table
[cnt
].name
));
588 ELEM (ctype
, class_collection
, , space_value
) |= BITw (tok_print
);
590 space_seq
= charmap_find_value (charmap
, "SP", 2);
591 if (space_seq
== NULL
)
592 space_seq
= charmap_find_value (charmap
, "space", 5);
593 if (space_seq
== NULL
)
594 space_seq
= charmap_find_value (charmap
, "U00000020", 9);
595 if (space_seq
== NULL
|| space_seq
->nbytes
!= 1)
598 WITH_CUR_LOCALE (error (0, 0, _("\
599 character <SP> not defined in character map")));
601 else if (((cnt
= BITPOS (tok_space
),
602 (ctype
->class256_collection
[space_seq
->bytes
[0]]
603 & BIT (tok_space
)) == 0)
604 || (cnt
= BITPOS (tok_blank
),
605 (ctype
->class256_collection
[space_seq
->bytes
[0]]
606 & BIT (tok_blank
)) == 0)))
609 WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"),
610 valid_table
[cnt
].name
));
612 else if (((cnt
= BITPOS (tok_punct
),
613 (ctype
->class256_collection
[space_seq
->bytes
[0]]
614 & BIT (tok_punct
)) != 0)
615 || (cnt
= BITPOS (tok_graph
),
616 (ctype
->class256_collection
[space_seq
->bytes
[0]]
617 & BIT (tok_graph
)) != 0)))
620 WITH_CUR_LOCALE (error (0, 0, _("\
621 <SP> character must not be in class `%s'"),
622 valid_table
[cnt
].name
));
625 ctype
->class256_collection
[space_seq
->bytes
[0]] |= BIT (tok_print
);
627 /* Now that the tests are done make sure the name array contains all
628 characters which are handled in the WIDTH section of the
629 character set definition file. */
630 if (charmap
->width_rules
!= NULL
)
631 for (cnt
= 0; cnt
< charmap
->nwidth_rules
; ++cnt
)
633 unsigned char bytes
[charmap
->mb_cur_max
];
634 int nbytes
= charmap
->width_rules
[cnt
].from
->nbytes
;
636 /* We have the range of character for which the width is
637 specified described using byte sequences of the multibyte
638 charset. We have to convert this to UCS4 now. And we
639 cannot simply convert the beginning and the end of the
640 sequence, we have to iterate over the byte sequence and
641 convert it for every single character. */
642 memcpy (bytes
, charmap
->width_rules
[cnt
].from
->bytes
, nbytes
);
644 while (nbytes
< charmap
->width_rules
[cnt
].to
->nbytes
645 || memcmp (bytes
, charmap
->width_rules
[cnt
].to
->bytes
,
648 /* Find the UCS value for `bytes'. */
651 struct charseq
*seq
= charmap_find_symbol (charmap
, bytes
, nbytes
);
654 wch
= ILLEGAL_CHAR_VALUE
;
655 else if (seq
->ucs4
!= UNINITIALIZED_CHAR_VALUE
)
658 wch
= repertoire_find_value (ctype
->repertoire
, seq
->name
,
661 if (wch
!= ILLEGAL_CHAR_VALUE
)
662 /* We are only interested in the side-effects of the
663 `find_idx' call. It will add appropriate entries in
664 the name array if this is necessary. */
665 (void) find_idx (ctype
, NULL
, NULL
, NULL
, wch
);
667 /* "Increment" the bytes sequence. */
669 while (inner
>= 0 && bytes
[inner
] == 0xff)
674 /* We have to extend the byte sequence. */
675 if (nbytes
>= charmap
->width_rules
[cnt
].to
->nbytes
)
679 memset (&bytes
[1], 0, nbytes
);
685 while (++inner
< nbytes
)
691 /* Now set all the other characters of the character set to the
694 while (iterate_table (&charmap
->char_table
, &curs
, &key
, &len
, &vdata
) == 0)
696 struct charseq
*data
= (struct charseq
*) vdata
;
698 if (data
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
699 data
->ucs4
= repertoire_find_value (ctype
->repertoire
,
702 if (data
->ucs4
!= ILLEGAL_CHAR_VALUE
)
703 (void) find_idx (ctype
, NULL
, NULL
, NULL
, data
->ucs4
);
706 /* There must be a multiple of 10 digits. */
707 if (ctype
->mbdigits_act
% 10 != 0)
709 assert (ctype
->mbdigits_act
== ctype
->wcdigits_act
);
710 ctype
->wcdigits_act
-= ctype
->mbdigits_act
% 10;
711 ctype
->mbdigits_act
-= ctype
->mbdigits_act
% 10;
712 WITH_CUR_LOCALE (error (0, 0, _("\
713 `digit' category has not entries in groups of ten")));
716 /* Check the input digits. There must be a multiple of ten available.
717 In each group it could be that one or the other character is missing.
718 In this case the whole group must be removed. */
720 while (cnt
< ctype
->mbdigits_act
)
723 for (inner
= 0; inner
< 10; ++inner
)
724 if (ctype
->mbdigits
[cnt
+ inner
] == NULL
)
731 /* Remove the group. */
732 memmove (&ctype
->mbdigits
[cnt
], &ctype
->mbdigits
[cnt
+ 10],
733 ((ctype
->wcdigits_act
- cnt
- 10)
734 * sizeof (ctype
->mbdigits
[0])));
735 ctype
->mbdigits_act
-= 10;
739 /* If no input digits are given use the default. */
740 if (ctype
->mbdigits_act
== 0)
742 if (ctype
->mbdigits_max
== 0)
744 ctype
->mbdigits
= obstack_alloc (&((struct charmap_t
*) charmap
)->mem_pool
,
745 10 * sizeof (struct charseq
*));
746 ctype
->mbdigits_max
= 10;
749 for (cnt
= 0; cnt
< 10; ++cnt
)
751 ctype
->mbdigits
[cnt
] = charmap_find_symbol (charmap
,
753 if (ctype
->mbdigits
[cnt
] == NULL
)
755 ctype
->mbdigits
[cnt
] = charmap_find_symbol (charmap
,
757 strlen (longnames
[cnt
]));
758 if (ctype
->mbdigits
[cnt
] == NULL
)
760 /* Hum, this ain't good. */
761 WITH_CUR_LOCALE (error (0, 0, _("\
762 no input digits defined and none of the standard names in the charmap")));
764 ctype
->mbdigits
[cnt
] = obstack_alloc (&((struct charmap_t
*) charmap
)->mem_pool
,
765 sizeof (struct charseq
) + 1);
767 /* This is better than nothing. */
768 ctype
->mbdigits
[cnt
]->bytes
[0] = digits
[cnt
];
769 ctype
->mbdigits
[cnt
]->nbytes
= 1;
774 ctype
->mbdigits_act
= 10;
777 /* Check the wide character input digits. There must be a multiple
778 of ten available. In each group it could be that one or the other
779 character is missing. In this case the whole group must be
782 while (cnt
< ctype
->wcdigits_act
)
785 for (inner
= 0; inner
< 10; ++inner
)
786 if (ctype
->wcdigits
[cnt
+ inner
] == ILLEGAL_CHAR_VALUE
)
793 /* Remove the group. */
794 memmove (&ctype
->wcdigits
[cnt
], &ctype
->wcdigits
[cnt
+ 10],
795 ((ctype
->wcdigits_act
- cnt
- 10)
796 * sizeof (ctype
->wcdigits
[0])));
797 ctype
->wcdigits_act
-= 10;
801 /* If no input digits are given use the default. */
802 if (ctype
->wcdigits_act
== 0)
804 if (ctype
->wcdigits_max
== 0)
806 ctype
->wcdigits
= obstack_alloc (&((struct charmap_t
*) charmap
)->mem_pool
,
807 10 * sizeof (uint32_t));
808 ctype
->wcdigits_max
= 10;
811 for (cnt
= 0; cnt
< 10; ++cnt
)
812 ctype
->wcdigits
[cnt
] = L
'0' + cnt
;
814 ctype
->mbdigits_act
= 10;
817 /* Check the outdigits. */
819 for (cnt
= 0; cnt
< 10; ++cnt
)
820 if (ctype
->mboutdigits
[cnt
] == NULL
)
822 static struct charseq replace
[2];
826 WITH_CUR_LOCALE (error (0, 0, _("\
827 not all characters used in `outdigit' are available in the charmap")));
831 replace
[0].nbytes
= 1;
832 replace
[0].bytes
[0] = '?';
833 replace
[0].bytes
[1] = '\0';
834 ctype
->mboutdigits
[cnt
] = &replace
[0];
838 for (cnt
= 0; cnt
< 10; ++cnt
)
839 if (ctype
->wcoutdigits
[cnt
] == 0)
843 WITH_CUR_LOCALE (error (0, 0, _("\
844 not all characters used in `outdigit' are available in the repertoire")));
848 ctype
->wcoutdigits
[cnt
] = L
'?';
851 /* Sort the entries in the translit_ignore list. */
852 if (ctype
->translit_ignore
!= NULL
)
854 struct translit_ignore_t
*firstp
= ctype
->translit_ignore
;
855 struct translit_ignore_t
*runp
;
857 ctype
->ntranslit_ignore
= 1;
859 for (runp
= firstp
->next
; runp
!= NULL
; runp
= runp
->next
)
861 struct translit_ignore_t
*lastp
= NULL
;
862 struct translit_ignore_t
*cmpp
;
864 ++ctype
->ntranslit_ignore
;
866 for (cmpp
= firstp
; cmpp
!= NULL
; lastp
= cmpp
, cmpp
= cmpp
->next
)
867 if (runp
->from
< cmpp
->from
)
875 ctype
->translit_ignore
= firstp
;
881 ctype_output (struct localedef_t
*locale
, const struct charmap_t
*charmap
,
882 const char *output_path
)
884 static const char nulbytes
[4] = { 0, 0, 0, 0 };
885 struct locale_ctype_t
*ctype
= locale
->categories
[LC_CTYPE
].ctype
;
886 const size_t nelems
= (_NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1
)
887 + ctype
->nr_charclass
+ ctype
->map_collection_nr
);
888 struct iovec
*iov
= alloca (sizeof *iov
889 * (2 + nelems
+ 2 * ctype
->nr_charclass
890 + ctype
->map_collection_nr
+ 4));
891 struct locale_file data
;
892 uint32_t *idx
= alloca (sizeof *idx
* (nelems
+ 1));
893 uint32_t default_missing_len
;
894 size_t elem
, cnt
, offset
, total
;
897 /* Now prepare the output: Find the sizes of the table we can use. */
898 allocate_arrays (ctype
, charmap
, ctype
->repertoire
);
900 data
.magic
= LIMAGIC (LC_CTYPE
);
902 iov
[0].iov_base
= (void *) &data
;
903 iov
[0].iov_len
= sizeof (data
);
905 iov
[1].iov_base
= (void *) idx
;
906 iov
[1].iov_len
= nelems
* sizeof (uint32_t);
908 idx
[0] = iov
[0].iov_len
+ iov
[1].iov_len
;
911 for (elem
= 0; elem
< nelems
; ++elem
)
913 if (elem
< _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1
))
916 #define CTYPE_EMPTY(name) \
918 iov[2 + elem + offset].iov_base = NULL; \
919 iov[2 + elem + offset].iov_len = 0; \
920 idx[elem + 1] = idx[elem]; \
923 CTYPE_EMPTY(_NL_CTYPE_GAP1
);
924 CTYPE_EMPTY(_NL_CTYPE_GAP2
);
925 CTYPE_EMPTY(_NL_CTYPE_GAP3
);
926 CTYPE_EMPTY(_NL_CTYPE_GAP4
);
927 CTYPE_EMPTY(_NL_CTYPE_GAP5
);
928 CTYPE_EMPTY(_NL_CTYPE_GAP6
);
930 #define CTYPE_DATA(name, base, len) \
931 case _NL_ITEM_INDEX (name): \
932 iov[2 + elem + offset].iov_base = (base); \
933 iov[2 + elem + offset].iov_len = (len); \
934 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; \
937 CTYPE_DATA (_NL_CTYPE_CLASS
,
939 (256 + 128) * sizeof (char_class_t
));
941 CTYPE_DATA (_NL_CTYPE_TOUPPER
,
943 (256 + 128) * sizeof (uint32_t));
944 CTYPE_DATA (_NL_CTYPE_TOLOWER
,
946 (256 + 128) * sizeof (uint32_t));
948 CTYPE_DATA (_NL_CTYPE_TOUPPER32
,
950 256 * sizeof (uint32_t));
951 CTYPE_DATA (_NL_CTYPE_TOLOWER32
,
953 256 * sizeof (uint32_t));
955 CTYPE_DATA (_NL_CTYPE_CLASS32
,
957 256 * sizeof (char_class32_t
));
959 CTYPE_DATA (_NL_CTYPE_CLASS_OFFSET
,
960 &ctype
->class_offset
, sizeof (uint32_t));
962 CTYPE_DATA (_NL_CTYPE_MAP_OFFSET
,
963 &ctype
->map_offset
, sizeof (uint32_t));
965 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TAB_SIZE
,
966 &ctype
->translit_idx_size
, sizeof (uint32_t));
968 CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_IDX
,
969 ctype
->translit_from_idx
,
970 ctype
->translit_idx_size
* sizeof (uint32_t));
972 CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_TBL
,
973 ctype
->translit_from_tbl
,
974 ctype
->translit_from_tbl_size
);
976 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_IDX
,
977 ctype
->translit_to_idx
,
978 ctype
->translit_idx_size
* sizeof (uint32_t));
980 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_TBL
,
981 ctype
->translit_to_tbl
, ctype
->translit_to_tbl_size
);
983 case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES
):
984 /* The class name array. */
986 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
, ++offset
)
988 iov
[2 + elem
+ offset
].iov_base
989 = (void *) ctype
->classnames
[cnt
];
990 iov
[2 + elem
+ offset
].iov_len
991 = strlen (ctype
->classnames
[cnt
]) + 1;
992 total
+= iov
[2 + elem
+ offset
].iov_len
;
994 iov
[2 + elem
+ offset
].iov_base
= (void *) nulbytes
;
995 iov
[2 + elem
+ offset
].iov_len
= 4 - (total
% 4);
996 total
+= 4 - (total
% 4);
998 idx
[elem
+ 1] = idx
[elem
] + total
;
1001 case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES
):
1002 /* The class name array. */
1004 for (cnt
= 0; cnt
< ctype
->map_collection_nr
; ++cnt
, ++offset
)
1006 iov
[2 + elem
+ offset
].iov_base
1007 = (void *) ctype
->mapnames
[cnt
];
1008 iov
[2 + elem
+ offset
].iov_len
1009 = strlen (ctype
->mapnames
[cnt
]) + 1;
1010 total
+= iov
[2 + elem
+ offset
].iov_len
;
1012 iov
[2 + elem
+ offset
].iov_base
= (void *) nulbytes
;
1013 iov
[2 + elem
+ offset
].iov_len
= 4 - (total
% 4);
1014 total
+= 4 - (total
% 4);
1016 idx
[elem
+ 1] = idx
[elem
] + total
;
1019 CTYPE_DATA (_NL_CTYPE_WIDTH
,
1020 ctype
->width
.iov_base
,
1021 ctype
->width
.iov_len
);
1023 CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX
,
1024 &ctype
->mb_cur_max
, sizeof (uint32_t));
1026 case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME
):
1027 total
= strlen (ctype
->codeset_name
) + 1;
1029 iov
[2 + elem
+ offset
].iov_base
= (char *) ctype
->codeset_name
;
1032 iov
[2 + elem
+ offset
].iov_base
= alloca ((total
+ 3) & ~3);
1033 memset (mempcpy (iov
[2 + elem
+ offset
].iov_base
,
1034 ctype
->codeset_name
, total
),
1035 '\0', 4 - (total
& 3));
1036 total
= (total
+ 3) & ~3;
1038 iov
[2 + elem
+ offset
].iov_len
= total
;
1039 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1043 CTYPE_DATA (_NL_CTYPE_MAP_TO_NONASCII
,
1044 &ctype
->to_nonascii
, sizeof (uint32_t));
1046 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN
):
1047 iov
[2 + elem
+ offset
].iov_base
= alloca (sizeof (uint32_t));
1048 iov
[2 + elem
+ offset
].iov_len
= sizeof (uint32_t);
1049 *(uint32_t *) iov
[2 + elem
+ offset
].iov_base
=
1050 ctype
->mbdigits_act
/ 10;
1051 idx
[elem
+ 1] = idx
[elem
] + sizeof (uint32_t);
1054 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN
):
1055 /* Align entries. */
1056 iov
[2 + elem
+ offset
].iov_base
= (void *) nulbytes
;
1057 iov
[2 + elem
+ offset
].iov_len
= (4 - idx
[elem
] % 4) % 4;
1058 idx
[elem
] += iov
[2 + elem
+ offset
].iov_len
;
1061 iov
[2 + elem
+ offset
].iov_base
= alloca (sizeof (uint32_t));
1062 iov
[2 + elem
+ offset
].iov_len
= sizeof (uint32_t);
1063 *(uint32_t *) iov
[2 + elem
+ offset
].iov_base
=
1064 ctype
->wcdigits_act
/ 10;
1065 idx
[elem
+ 1] = idx
[elem
] + sizeof (uint32_t);
1068 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB
) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB
):
1069 /* Compute the length of all possible characters. For INDIGITS
1070 there might be more than one. We simply concatenate all of
1071 them with a NUL byte following. The NUL byte wouldn't be
1072 necessary but it makes it easier for the user. */
1075 for (cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB
);
1076 cnt
< ctype
->mbdigits_act
; cnt
+= 10)
1077 total
+= ctype
->mbdigits
[cnt
]->nbytes
+ 1;
1078 iov
[2 + elem
+ offset
].iov_base
= (char *) alloca (total
);
1079 iov
[2 + elem
+ offset
].iov_len
= total
;
1081 cp
= iov
[2 + elem
+ offset
].iov_base
;
1082 for (cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB
);
1083 cnt
< ctype
->mbdigits_act
; cnt
+= 10)
1085 cp
= mempcpy (cp
, ctype
->mbdigits
[cnt
]->bytes
,
1086 ctype
->mbdigits
[cnt
]->nbytes
);
1089 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1092 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB
) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB
):
1093 /* Compute the length of all possible characters. For INDIGITS
1094 there might be more than one. We simply concatenate all of
1095 them with a NUL byte following. The NUL byte wouldn't be
1096 necessary but it makes it easier for the user. */
1097 cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB
);
1098 total
= ctype
->mboutdigits
[cnt
]->nbytes
+ 1;
1099 iov
[2 + elem
+ offset
].iov_base
= (char *) alloca (total
);
1100 iov
[2 + elem
+ offset
].iov_len
= total
;
1102 *(char *) mempcpy (iov
[2 + elem
+ offset
].iov_base
,
1103 ctype
->mboutdigits
[cnt
]->bytes
,
1104 ctype
->mboutdigits
[cnt
]->nbytes
) = '\0';
1105 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1108 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC
) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC
):
1109 total
= ctype
->wcdigits_act
/ 10;
1111 iov
[2 + elem
+ offset
].iov_base
=
1112 (uint32_t *) alloca (total
* sizeof (uint32_t));
1113 iov
[2 + elem
+ offset
].iov_len
= total
* sizeof (uint32_t);
1115 for (cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC
);
1116 cnt
< ctype
->wcdigits_act
; cnt
+= 10)
1117 ((uint32_t *) iov
[2 + elem
+ offset
].iov_base
)[cnt
/ 10]
1118 = ctype
->wcdigits
[cnt
];
1119 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1122 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC
):
1123 /* Align entries. */
1124 iov
[2 + elem
+ offset
].iov_base
= (void *) nulbytes
;
1125 iov
[2 + elem
+ offset
].iov_len
= (4 - idx
[elem
] % 4) % 4;
1126 idx
[elem
] += iov
[2 + elem
+ offset
].iov_len
;
1130 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT1_WC
) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC
):
1131 cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC
);
1132 iov
[2 + elem
+ offset
].iov_base
= &ctype
->wcoutdigits
[cnt
];
1133 iov
[2 + elem
+ offset
].iov_len
= sizeof (uint32_t);
1134 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1137 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN
):
1138 /* Align entries. */
1139 iov
[2 + elem
+ offset
].iov_base
= (void *) nulbytes
;
1140 iov
[2 + elem
+ offset
].iov_len
= (4 - idx
[elem
] % 4) % 4;
1141 idx
[elem
] += iov
[2 + elem
+ offset
].iov_len
;
1144 default_missing_len
= (ctype
->default_missing
1145 ? wcslen ((wchar_t *)ctype
->default_missing
)
1147 iov
[2 + elem
+ offset
].iov_base
= &default_missing_len
;
1148 iov
[2 + elem
+ offset
].iov_len
= sizeof (uint32_t);
1149 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1152 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING
):
1153 iov
[2 + elem
+ offset
].iov_base
=
1154 ctype
->default_missing
?: (uint32_t *) L
"";
1155 iov
[2 + elem
+ offset
].iov_len
=
1156 wcslen (iov
[2 + elem
+ offset
].iov_base
) * sizeof (uint32_t);
1157 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1160 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE_LEN
):
1161 /* Align entries. */
1162 iov
[2 + elem
+ offset
].iov_base
= (void *) nulbytes
;
1163 iov
[2 + elem
+ offset
].iov_len
= (4 - idx
[elem
] % 4) % 4;
1164 idx
[elem
] += iov
[2 + elem
+ offset
].iov_len
;
1167 iov
[2 + elem
+ offset
].iov_base
= &ctype
->ntranslit_ignore
;
1168 iov
[2 + elem
+ offset
].iov_len
= sizeof (uint32_t);
1169 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1172 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE
):
1174 uint32_t *ranges
= (uint32_t *) alloca (ctype
->ntranslit_ignore
1175 * 3 * sizeof (uint32_t));
1176 struct translit_ignore_t
*runp
;
1178 iov
[2 + elem
+ offset
].iov_base
= ranges
;
1179 iov
[2 + elem
+ offset
].iov_len
= (ctype
->ntranslit_ignore
1180 * 3 * sizeof (uint32_t));
1182 for (runp
= ctype
->translit_ignore
; runp
!= NULL
;
1185 *ranges
++ = runp
->from
;
1186 *ranges
++ = runp
->to
;
1187 *ranges
++ = runp
->step
;
1190 /* Remove the following line in case a new entry is added
1191 after _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN. */
1193 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1197 assert (! "unknown CTYPE element");
1201 /* Handle extra maps. */
1202 size_t nr
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1
);
1203 if (nr
< ctype
->nr_charclass
)
1205 iov
[2 + elem
+ offset
].iov_base
= ctype
->class_b
[nr
];
1206 iov
[2 + elem
+ offset
].iov_len
= 256 / 32 * sizeof (uint32_t);
1207 idx
[elem
] += iov
[2 + elem
+ offset
].iov_len
;
1210 iov
[2 + elem
+ offset
] = ctype
->class_3level
[nr
];
1214 nr
-= ctype
->nr_charclass
;
1215 assert (nr
< ctype
->map_collection_nr
);
1216 iov
[2 + elem
+ offset
] = ctype
->map_3level
[nr
];
1218 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1222 assert (2 + elem
+ offset
== (nelems
+ 2 * ctype
->nr_charclass
1223 + ctype
->map_collection_nr
+ 4 + 2));
1225 write_locale_data (output_path
, LC_CTYPE
, "LC_CTYPE", 2 + elem
+ offset
,
1230 /* Local functions. */
1232 ctype_class_new (struct linereader
*lr
, struct locale_ctype_t
*ctype
,
1237 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
)
1238 if (strcmp (ctype
->classnames
[cnt
], name
) == 0)
1241 if (cnt
< ctype
->nr_charclass
)
1243 lr_error (lr
, _("character class `%s' already defined"), name
);
1247 if (ctype
->nr_charclass
== MAX_NR_CHARCLASS
)
1248 /* Exit code 2 is prescribed in P1003.2b. */
1249 WITH_CUR_LOCALE (error (2, 0, _("\
1250 implementation limit: no more than %Zd character classes allowed"),
1253 ctype
->classnames
[ctype
->nr_charclass
++] = name
;
1258 ctype_map_new (struct linereader
*lr
, struct locale_ctype_t
*ctype
,
1259 const char *name
, const struct charmap_t
*charmap
)
1261 size_t max_chars
= 0;
1264 for (cnt
= 0; cnt
< ctype
->map_collection_nr
; ++cnt
)
1266 if (strcmp (ctype
->mapnames
[cnt
], name
) == 0)
1269 if (max_chars
< ctype
->map_collection_max
[cnt
])
1270 max_chars
= ctype
->map_collection_max
[cnt
];
1273 if (cnt
< ctype
->map_collection_nr
)
1275 lr_error (lr
, _("character map `%s' already defined"), name
);
1279 if (ctype
->map_collection_nr
== MAX_NR_CHARMAP
)
1280 /* Exit code 2 is prescribed in P1003.2b. */
1281 WITH_CUR_LOCALE (error (2, 0, _("\
1282 implementation limit: no more than %d character maps allowed"),
1285 ctype
->mapnames
[cnt
] = name
;
1288 ctype
->map_collection_max
[cnt
] = charmap
->mb_cur_max
== 1 ? 256 : 512;
1290 ctype
->map_collection_max
[cnt
] = max_chars
;
1292 ctype
->map_collection
[cnt
] = (uint32_t *)
1293 xcalloc (sizeof (uint32_t), ctype
->map_collection_max
[cnt
]);
1294 ctype
->map_collection_act
[cnt
] = 256;
1296 ++ctype
->map_collection_nr
;
1300 /* We have to be prepared that TABLE, MAX, and ACT can be NULL. This
1301 is possible if we only want to extend the name array. */
1303 find_idx (struct locale_ctype_t
*ctype
, uint32_t **table
, size_t *max
,
1304 size_t *act
, uint32_t idx
)
1309 return table
== NULL
? NULL
: &(*table
)[idx
];
1311 /* Use the charnames_idx lookup table instead of the slow search loop. */
1313 cnt
= idx_table_get (&ctype
->charnames_idx
, idx
);
1316 cnt
= ctype
->charnames_act
;
1318 for (cnt
= 256; cnt
< ctype
->charnames_act
; ++cnt
)
1319 if (ctype
->charnames
[cnt
] == idx
)
1323 /* We have to distinguish two cases: the name is found or not. */
1324 if (cnt
== ctype
->charnames_act
)
1326 /* Extend the name array. */
1327 if (ctype
->charnames_act
== ctype
->charnames_max
)
1329 ctype
->charnames_max
*= 2;
1330 ctype
->charnames
= (uint32_t *)
1331 xrealloc (ctype
->charnames
,
1332 sizeof (uint32_t) * ctype
->charnames_max
);
1334 ctype
->charnames
[ctype
->charnames_act
++] = idx
;
1335 idx_table_add (&ctype
->charnames_idx
, idx
, cnt
);
1339 /* We have done everything we are asked to do. */
1343 /* The caller does not want to extend the table. */
1344 return (cnt
>= *act
? NULL
: &(*table
)[cnt
]);
1350 size_t old_max
= *max
;
1353 while (*max
<= cnt
);
1356 (uint32_t *) xrealloc (*table
, *max
* sizeof (uint32_t));
1357 memset (&(*table
)[old_max
], '\0',
1358 (*max
- old_max
) * sizeof (uint32_t));
1364 return &(*table
)[cnt
];
1369 get_character (struct token
*now
, const struct charmap_t
*charmap
,
1370 struct repertoire_t
*repertoire
,
1371 struct charseq
**seqp
, uint32_t *wchp
)
1373 if (now
->tok
== tok_bsymbol
)
1375 /* This will hopefully be the normal case. */
1376 *wchp
= repertoire_find_value (repertoire
, now
->val
.str
.startmb
,
1377 now
->val
.str
.lenmb
);
1378 *seqp
= charmap_find_value (charmap
, now
->val
.str
.startmb
,
1379 now
->val
.str
.lenmb
);
1381 else if (now
->tok
== tok_ucs4
)
1385 snprintf (utmp
, sizeof (utmp
), "U%08X", now
->val
.ucs4
);
1386 *seqp
= charmap_find_value (charmap
, utmp
, 9);
1389 *seqp
= repertoire_find_seq (repertoire
, now
->val
.ucs4
);
1393 /* Compute the value in the charmap from the UCS value. */
1394 const char *symbol
= repertoire_find_symbol (repertoire
,
1400 *seqp
= charmap_find_value (charmap
, symbol
, strlen (symbol
));
1404 if (repertoire
!= NULL
)
1406 /* Insert a negative entry. */
1407 static const struct charseq negative
1408 = { .ucs4
= ILLEGAL_CHAR_VALUE
};
1409 uint32_t *newp
= obstack_alloc (&repertoire
->mem_pool
,
1411 *newp
= now
->val
.ucs4
;
1413 insert_entry (&repertoire
->seq_table
, newp
,
1414 sizeof (uint32_t), (void *) &negative
);
1418 (*seqp
)->ucs4
= now
->val
.ucs4
;
1420 else if ((*seqp
)->ucs4
!= now
->val
.ucs4
)
1423 *wchp
= now
->val
.ucs4
;
1425 else if (now
->tok
== tok_charcode
)
1427 /* We must map from the byte code to UCS4. */
1428 *seqp
= charmap_find_symbol (charmap
, now
->val
.str
.startmb
,
1429 now
->val
.str
.lenmb
);
1432 *wchp
= ILLEGAL_CHAR_VALUE
;
1435 if ((*seqp
)->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1436 (*seqp
)->ucs4
= repertoire_find_value (repertoire
, (*seqp
)->name
,
1437 strlen ((*seqp
)->name
));
1438 *wchp
= (*seqp
)->ucs4
;
1448 /* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and
1449 the .(2). counterparts. */
1451 charclass_symbolic_ellipsis (struct linereader
*ldfile
,
1452 struct locale_ctype_t
*ctype
,
1453 const struct charmap_t
*charmap
,
1454 struct repertoire_t
*repertoire
,
1456 const char *last_str
,
1457 unsigned long int class256_bit
,
1458 unsigned long int class_bit
, int base
,
1459 int ignore_content
, int handle_digits
, int step
)
1461 const char *nowstr
= now
->val
.str
.startmb
;
1462 char tmp
[now
->val
.str
.lenmb
+ 1];
1465 unsigned long int from
;
1466 unsigned long int to
;
1468 /* We have to compute the ellipsis values using the symbolic names. */
1469 assert (last_str
!= NULL
);
1471 if (strlen (last_str
) != now
->val
.str
.lenmb
)
1475 _("`%s' and `%.*s' are not valid names for symbolic range"),
1476 last_str
, (int) now
->val
.str
.lenmb
, nowstr
);
1480 if (memcmp (last_str
, nowstr
, now
->val
.str
.lenmb
) == 0)
1481 /* Nothing to do, the names are the same. */
1484 for (cp
= last_str
; *cp
== *(nowstr
+ (cp
- last_str
)); ++cp
)
1488 from
= strtoul (cp
, &endp
, base
);
1489 if ((from
== UINT_MAX
&& errno
== ERANGE
) || *endp
!= '\0')
1492 to
= strtoul (nowstr
+ (cp
- last_str
), &endp
, base
);
1493 if ((to
== UINT_MAX
&& errno
== ERANGE
)
1494 || (endp
- nowstr
) != now
->val
.str
.lenmb
|| from
>= to
)
1497 /* OK, we have a range FROM - TO. Now we can create the symbolic names. */
1498 if (!ignore_content
)
1500 now
->val
.str
.startmb
= tmp
;
1501 while ((from
+= step
) <= to
)
1503 struct charseq
*seq
;
1506 sprintf (tmp
, (base
== 10 ? "%.*s%0*ld" : "%.*s%0*lX"),
1507 (int) (cp
- last_str
), last_str
,
1508 (int) (now
->val
.str
.lenmb
- (cp
- last_str
)),
1511 get_character (now
, charmap
, repertoire
, &seq
, &wch
);
1513 if (seq
!= NULL
&& seq
->nbytes
== 1)
1514 /* Yep, we can store information about this byte sequence. */
1515 ctype
->class256_collection
[seq
->bytes
[0]] |= class256_bit
;
1517 if (wch
!= ILLEGAL_CHAR_VALUE
&& class_bit
!= 0)
1518 /* We have the UCS4 position. */
1519 *find_idx (ctype
, &ctype
->class_collection
,
1520 &ctype
->class_collection_max
,
1521 &ctype
->class_collection_act
, wch
) |= class_bit
;
1523 if (handle_digits
== 1)
1525 /* We must store the digit values. */
1526 if (ctype
->mbdigits_act
== ctype
->mbdigits_max
)
1528 ctype
->mbdigits_max
*= 2;
1529 ctype
->mbdigits
= xrealloc (ctype
->mbdigits
,
1530 (ctype
->mbdigits_max
1531 * sizeof (char *)));
1532 ctype
->wcdigits_max
*= 2;
1533 ctype
->wcdigits
= xrealloc (ctype
->wcdigits
,
1534 (ctype
->wcdigits_max
1535 * sizeof (uint32_t)));
1538 ctype
->mbdigits
[ctype
->mbdigits_act
++] = seq
;
1539 ctype
->wcdigits
[ctype
->wcdigits_act
++] = wch
;
1541 else if (handle_digits
== 2)
1543 /* We must store the digit values. */
1544 if (ctype
->outdigits_act
>= 10)
1546 lr_error (ldfile
, _("\
1547 %s: field `%s' does not contain exactly ten entries"),
1548 "LC_CTYPE", "outdigit");
1552 ctype
->mboutdigits
[ctype
->outdigits_act
] = seq
;
1553 ctype
->wcoutdigits
[ctype
->outdigits_act
] = wch
;
1554 ++ctype
->outdigits_act
;
1561 /* Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'. */
1563 charclass_ucs4_ellipsis (struct linereader
*ldfile
,
1564 struct locale_ctype_t
*ctype
,
1565 const struct charmap_t
*charmap
,
1566 struct repertoire_t
*repertoire
,
1567 struct token
*now
, uint32_t last_wch
,
1568 unsigned long int class256_bit
,
1569 unsigned long int class_bit
, int ignore_content
,
1570 int handle_digits
, int step
)
1572 if (last_wch
> now
->val
.ucs4
)
1574 lr_error (ldfile
, _("\
1575 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1576 (now
->val
.ucs4
| last_wch
) < 65536 ? 4 : 8, now
->val
.ucs4
,
1577 (now
->val
.ucs4
| last_wch
) < 65536 ? 4 : 8, last_wch
);
1581 if (!ignore_content
)
1582 while ((last_wch
+= step
) <= now
->val
.ucs4
)
1584 /* We have to find out whether there is a byte sequence corresponding
1585 to this UCS4 value. */
1586 struct charseq
*seq
;
1589 snprintf (utmp
, sizeof (utmp
), "U%08X", last_wch
);
1590 seq
= charmap_find_value (charmap
, utmp
, 9);
1593 snprintf (utmp
, sizeof (utmp
), "U%04X", last_wch
);
1594 seq
= charmap_find_value (charmap
, utmp
, 5);
1598 /* Try looking in the repertoire map. */
1599 seq
= repertoire_find_seq (repertoire
, last_wch
);
1601 /* If this is the first time we look for this sequence create a new
1605 static const struct charseq negative
1606 = { .ucs4
= ILLEGAL_CHAR_VALUE
};
1608 /* Find the symbolic name for this UCS4 value. */
1609 if (repertoire
!= NULL
)
1611 const char *symbol
= repertoire_find_symbol (repertoire
,
1613 uint32_t *newp
= obstack_alloc (&repertoire
->mem_pool
,
1618 /* We have a name, now search the multibyte value. */
1619 seq
= charmap_find_value (charmap
, symbol
, strlen (symbol
));
1622 /* We have to create a fake entry. */
1623 seq
= (struct charseq
*) &negative
;
1625 seq
->ucs4
= last_wch
;
1627 insert_entry (&repertoire
->seq_table
, newp
, sizeof (uint32_t),
1631 /* We have to create a fake entry. */
1632 seq
= (struct charseq
*) &negative
;
1635 /* We have a name, now search the multibyte value. */
1636 if (seq
->ucs4
== last_wch
&& seq
->nbytes
== 1)
1637 /* Yep, we can store information about this byte sequence. */
1638 ctype
->class256_collection
[(size_t) seq
->bytes
[0]]
1641 /* And of course we have the UCS4 position. */
1643 *find_idx (ctype
, &ctype
->class_collection
,
1644 &ctype
->class_collection_max
,
1645 &ctype
->class_collection_act
, last_wch
) |= class_bit
;
1647 if (handle_digits
== 1)
1649 /* We must store the digit values. */
1650 if (ctype
->mbdigits_act
== ctype
->mbdigits_max
)
1652 ctype
->mbdigits_max
*= 2;
1653 ctype
->mbdigits
= xrealloc (ctype
->mbdigits
,
1654 (ctype
->mbdigits_max
1655 * sizeof (char *)));
1656 ctype
->wcdigits_max
*= 2;
1657 ctype
->wcdigits
= xrealloc (ctype
->wcdigits
,
1658 (ctype
->wcdigits_max
1659 * sizeof (uint32_t)));
1662 ctype
->mbdigits
[ctype
->mbdigits_act
++] = (seq
->ucs4
== last_wch
1664 ctype
->wcdigits
[ctype
->wcdigits_act
++] = last_wch
;
1666 else if (handle_digits
== 2)
1668 /* We must store the digit values. */
1669 if (ctype
->outdigits_act
>= 10)
1671 lr_error (ldfile
, _("\
1672 %s: field `%s' does not contain exactly ten entries"),
1673 "LC_CTYPE", "outdigit");
1677 ctype
->mboutdigits
[ctype
->outdigits_act
] = (seq
->ucs4
== last_wch
1679 ctype
->wcoutdigits
[ctype
->outdigits_act
] = last_wch
;
1680 ++ctype
->outdigits_act
;
1686 /* Ellipsis as in `/xea/x12.../xea/x34'. */
1688 charclass_charcode_ellipsis (struct linereader
*ldfile
,
1689 struct locale_ctype_t
*ctype
,
1690 const struct charmap_t
*charmap
,
1691 struct repertoire_t
*repertoire
,
1692 struct token
*now
, char *last_charcode
,
1693 uint32_t last_charcode_len
,
1694 unsigned long int class256_bit
,
1695 unsigned long int class_bit
, int ignore_content
,
1698 /* First check whether the to-value is larger. */
1699 if (now
->val
.charcode
.nbytes
!= last_charcode_len
)
1701 lr_error (ldfile
, _("\
1702 start and end character sequence of range must have the same length"));
1706 if (memcmp (last_charcode
, now
->val
.charcode
.bytes
, last_charcode_len
) > 0)
1708 lr_error (ldfile
, _("\
1709 to-value character sequence is smaller than from-value sequence"));
1713 if (!ignore_content
)
1717 /* Increment the byte sequence value. */
1718 struct charseq
*seq
;
1722 for (i
= last_charcode_len
- 1; i
>= 0; --i
)
1723 if (++last_charcode
[i
] != 0)
1726 if (last_charcode_len
== 1)
1727 /* Of course we have the charcode value. */
1728 ctype
->class256_collection
[(size_t) last_charcode
[0]]
1731 /* Find the symbolic name. */
1732 seq
= charmap_find_symbol (charmap
, last_charcode
,
1736 if (seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1737 seq
->ucs4
= repertoire_find_value (repertoire
, seq
->name
,
1738 strlen (seq
->name
));
1739 wch
= seq
== NULL
? ILLEGAL_CHAR_VALUE
: seq
->ucs4
;
1741 if (wch
!= ILLEGAL_CHAR_VALUE
&& class_bit
!= 0)
1742 *find_idx (ctype
, &ctype
->class_collection
,
1743 &ctype
->class_collection_max
,
1744 &ctype
->class_collection_act
, wch
) |= class_bit
;
1747 wch
= ILLEGAL_CHAR_VALUE
;
1749 if (handle_digits
== 1)
1751 /* We must store the digit values. */
1752 if (ctype
->mbdigits_act
== ctype
->mbdigits_max
)
1754 ctype
->mbdigits_max
*= 2;
1755 ctype
->mbdigits
= xrealloc (ctype
->mbdigits
,
1756 (ctype
->mbdigits_max
1757 * sizeof (char *)));
1758 ctype
->wcdigits_max
*= 2;
1759 ctype
->wcdigits
= xrealloc (ctype
->wcdigits
,
1760 (ctype
->wcdigits_max
1761 * sizeof (uint32_t)));
1764 seq
= xmalloc (sizeof (struct charseq
) + last_charcode_len
);
1765 memcpy ((char *) (seq
+ 1), last_charcode
, last_charcode_len
);
1766 seq
->nbytes
= last_charcode_len
;
1768 ctype
->mbdigits
[ctype
->mbdigits_act
++] = seq
;
1769 ctype
->wcdigits
[ctype
->wcdigits_act
++] = wch
;
1771 else if (handle_digits
== 2)
1773 struct charseq
*seq
;
1774 /* We must store the digit values. */
1775 if (ctype
->outdigits_act
>= 10)
1777 lr_error (ldfile
, _("\
1778 %s: field `%s' does not contain exactly ten entries"),
1779 "LC_CTYPE", "outdigit");
1783 seq
= xmalloc (sizeof (struct charseq
) + last_charcode_len
);
1784 memcpy ((char *) (seq
+ 1), last_charcode
, last_charcode_len
);
1785 seq
->nbytes
= last_charcode_len
;
1787 ctype
->mboutdigits
[ctype
->outdigits_act
] = seq
;
1788 ctype
->wcoutdigits
[ctype
->outdigits_act
] = wch
;
1789 ++ctype
->outdigits_act
;
1792 while (memcmp (last_charcode
, now
->val
.charcode
.bytes
,
1793 last_charcode_len
) != 0);
1799 find_translit2 (struct locale_ctype_t
*ctype
, const struct charmap_t
*charmap
,
1802 struct translit_t
*trunp
= ctype
->translit
;
1803 struct translit_ignore_t
*tirunp
= ctype
->translit_ignore
;
1805 while (trunp
!= NULL
)
1807 /* XXX We simplify things here. The transliterations we look
1808 for are only allowed to have one character. */
1809 if (trunp
->from
[0] == wch
&& trunp
->from
[1] == 0)
1811 /* Found it. Now look for a transliteration which can be
1812 represented with the character set. */
1813 struct translit_to_t
*torunp
= trunp
->to
;
1815 while (torunp
!= NULL
)
1819 for (i
= 0; torunp
->str
[i
] != 0; ++i
)
1823 snprintf (utmp
, sizeof (utmp
), "U%08X", torunp
->str
[i
]);
1824 if (charmap_find_value (charmap
, utmp
, 9) == NULL
)
1825 /* This character cannot be represented. */
1829 if (torunp
->str
[i
] == 0)
1832 torunp
= torunp
->next
;
1838 trunp
= trunp
->next
;
1841 /* Check for ignored chars. */
1842 while (tirunp
!= NULL
)
1844 if (tirunp
->from
<= wch
&& tirunp
->to
>= wch
)
1848 for (wi
= tirunp
->from
; wi
<= wch
; wi
+= tirunp
->step
)
1850 return (uint32_t []) { 0 };
1854 /* Nothing found. */
1860 find_translit (struct localedef_t
*locale
, const struct charmap_t
*charmap
,
1863 struct locale_ctype_t
*ctype
;
1864 uint32_t *result
= NULL
;
1866 assert (locale
!= NULL
);
1867 ctype
= locale
->categories
[LC_CTYPE
].ctype
;
1872 if (ctype
->translit
!= NULL
)
1873 result
= find_translit2 (ctype
, charmap
, wch
);
1877 struct translit_include_t
*irunp
= ctype
->translit_include
;
1879 while (irunp
!= NULL
&& result
== NULL
)
1881 result
= find_translit (find_locale (CTYPE_LOCALE
,
1883 irunp
->copy_repertoire
,
1886 irunp
= irunp
->next
;
1894 /* Read one transliteration entry. */
1896 read_widestring (struct linereader
*ldfile
, struct token
*now
,
1897 const struct charmap_t
*charmap
,
1898 struct repertoire_t
*repertoire
)
1902 if (now
->tok
== tok_default_missing
)
1903 /* The special name "" will denote this case. */
1904 wstr
= ((uint32_t *) { 0 });
1905 else if (now
->tok
== tok_bsymbol
)
1907 /* Get the value from the repertoire. */
1908 wstr
= (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1909 wstr
[0] = repertoire_find_value (repertoire
, now
->val
.str
.startmb
,
1910 now
->val
.str
.lenmb
);
1911 if (wstr
[0] == ILLEGAL_CHAR_VALUE
)
1913 /* We cannot proceed, we don't know the UCS4 value. */
1920 else if (now
->tok
== tok_ucs4
)
1922 wstr
= (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1923 wstr
[0] = now
->val
.ucs4
;
1926 else if (now
->tok
== tok_charcode
)
1928 /* Argh, we have to convert to the symbol name first and then to the
1930 struct charseq
*seq
= charmap_find_symbol (charmap
,
1931 now
->val
.str
.startmb
,
1932 now
->val
.str
.lenmb
);
1934 /* Cannot find the UCS4 value. */
1937 if (seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1938 seq
->ucs4
= repertoire_find_value (repertoire
, seq
->name
,
1939 strlen (seq
->name
));
1940 if (seq
->ucs4
== ILLEGAL_CHAR_VALUE
)
1941 /* We cannot proceed, we don't know the UCS4 value. */
1944 wstr
= (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1945 wstr
[0] = seq
->ucs4
;
1948 else if (now
->tok
== tok_string
)
1950 wstr
= now
->val
.str
.startwc
;
1951 if (wstr
== NULL
|| wstr
[0] == 0)
1956 if (now
->tok
!= tok_eol
&& now
->tok
!= tok_eof
)
1957 lr_ignore_rest (ldfile
, 0);
1958 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
1959 return (uint32_t *) -1l;
1967 read_translit_entry (struct linereader
*ldfile
, struct locale_ctype_t
*ctype
,
1968 struct token
*now
, const struct charmap_t
*charmap
,
1969 struct repertoire_t
*repertoire
)
1971 uint32_t *from_wstr
= read_widestring (ldfile
, now
, charmap
, repertoire
);
1972 struct translit_t
*result
;
1973 struct translit_to_t
**top
;
1974 struct obstack
*ob
= &ctype
->mempool
;
1978 if (from_wstr
== NULL
)
1979 /* There is no valid from string. */
1982 result
= (struct translit_t
*) obstack_alloc (ob
,
1983 sizeof (struct translit_t
));
1984 result
->from
= from_wstr
;
1985 result
->fname
= ldfile
->fname
;
1986 result
->lineno
= ldfile
->lineno
;
1987 result
->next
= NULL
;
1997 /* Next we have one or more transliterations. They are
1998 separated by semicolons. */
1999 now
= lr_token (ldfile
, charmap
, NULL
, repertoire
, verbose
);
2001 if (!first
&& (now
->tok
== tok_semicolon
|| now
->tok
== tok_eol
))
2003 /* One string read. */
2004 const uint32_t zero
= 0;
2008 obstack_grow (ob
, &zero
, 4);
2009 to_wstr
= obstack_finish (ob
);
2011 *top
= obstack_alloc (ob
, sizeof (struct translit_to_t
));
2012 (*top
)->str
= to_wstr
;
2013 (*top
)->next
= NULL
;
2016 if (now
->tok
== tok_eol
)
2018 result
->next
= ctype
->translit
;
2019 ctype
->translit
= result
;
2024 top
= &(*top
)->next
;
2029 to_wstr
= read_widestring (ldfile
, now
, charmap
, repertoire
);
2030 if (to_wstr
== (uint32_t *) -1l)
2032 /* An error occurred. */
2033 obstack_free (ob
, result
);
2037 if (to_wstr
== NULL
)
2040 /* This value is usable. */
2041 obstack_grow (ob
, to_wstr
, wcslen ((wchar_t *) to_wstr
) * 4);
2050 read_translit_ignore_entry (struct linereader
*ldfile
,
2051 struct locale_ctype_t
*ctype
,
2052 const struct charmap_t
*charmap
,
2053 struct repertoire_t
*repertoire
)
2055 /* We expect a semicolon-separated list of characters we ignore. We are
2056 only interested in the wide character definitions. These must be
2057 single characters, possibly defining a range when an ellipsis is used. */
2060 struct token
*now
= lr_token (ldfile
, charmap
, NULL
, repertoire
,
2062 struct translit_ignore_t
*newp
;
2065 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
2068 _("premature end of `translit_ignore' definition"));
2072 if (now
->tok
!= tok_bsymbol
&& now
->tok
!= tok_ucs4
)
2074 lr_error (ldfile
, _("syntax error"));
2075 lr_ignore_rest (ldfile
, 0);
2079 if (now
->tok
== tok_ucs4
)
2080 from
= now
->val
.ucs4
;
2082 /* Try to get the value. */
2083 from
= repertoire_find_value (repertoire
, now
->val
.str
.startmb
,
2084 now
->val
.str
.lenmb
);
2086 if (from
== ILLEGAL_CHAR_VALUE
)
2088 lr_error (ldfile
, "invalid character name");
2093 newp
= (struct translit_ignore_t
*)
2094 obstack_alloc (&ctype
->mempool
, sizeof (struct translit_ignore_t
));
2099 newp
->next
= ctype
->translit_ignore
;
2100 ctype
->translit_ignore
= newp
;
2103 /* Now we expect either a semicolon, an ellipsis, or the end of the
2105 now
= lr_token (ldfile
, charmap
, NULL
, repertoire
, verbose
);
2107 if (now
->tok
== tok_ellipsis2
|| now
->tok
== tok_ellipsis2_2
)
2109 /* XXX Should we bother implementing `....'? `...' certainly
2110 will not be implemented. */
2112 int step
= now
->tok
== tok_ellipsis2_2
? 2 : 1;
2114 now
= lr_token (ldfile
, charmap
, NULL
, repertoire
, verbose
);
2116 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
2119 _("premature end of `translit_ignore' definition"));
2123 if (now
->tok
!= tok_bsymbol
&& now
->tok
!= tok_ucs4
)
2125 lr_error (ldfile
, _("syntax error"));
2126 lr_ignore_rest (ldfile
, 0);
2130 if (now
->tok
== tok_ucs4
)
2133 /* Try to get the value. */
2134 to
= repertoire_find_value (repertoire
, now
->val
.str
.startmb
,
2135 now
->val
.str
.lenmb
);
2137 if (to
== ILLEGAL_CHAR_VALUE
)
2138 lr_error (ldfile
, "invalid character name");
2141 /* Make sure the `to'-value is larger. */
2148 lr_error (ldfile
, _("\
2149 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
2150 (to
| from
) < 65536 ? 4 : 8, to
,
2151 (to
| from
) < 65536 ? 4 : 8, from
);
2154 /* And the next token. */
2155 now
= lr_token (ldfile
, charmap
, NULL
, repertoire
, verbose
);
2158 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
2162 if (now
->tok
== tok_semicolon
)
2166 /* If we come here something is wrong. */
2167 lr_error (ldfile
, _("syntax error"));
2168 lr_ignore_rest (ldfile
, 0);
2174 /* The parser for the LC_CTYPE section of the locale definition. */
2176 ctype_read (struct linereader
*ldfile
, struct localedef_t
*result
,
2177 const struct charmap_t
*charmap
, const char *repertoire_name
,
2180 struct repertoire_t
*repertoire
= NULL
;
2181 struct locale_ctype_t
*ctype
;
2183 enum token_t nowtok
;
2185 struct charseq
*last_seq
;
2186 uint32_t last_wch
= 0;
2187 enum token_t last_token
;
2188 enum token_t ellipsis_token
;
2190 char last_charcode
[16];
2191 size_t last_charcode_len
= 0;
2192 const char *last_str
= NULL
;
2194 struct localedef_t
*copy_locale
= NULL
;
2196 /* Get the repertoire we have to use. */
2197 if (repertoire_name
!= NULL
)
2198 repertoire
= repertoire_read (repertoire_name
);
2200 /* The rest of the line containing `LC_CTYPE' must be free. */
2201 lr_ignore_rest (ldfile
, 1);
2206 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2209 while (nowtok
== tok_eol
);
2211 /* If we see `copy' now we are almost done. */
2212 if (nowtok
== tok_copy
)
2214 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2215 if (now
->tok
!= tok_string
)
2217 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2221 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2222 while (now
->tok
!= tok_eof
&& now
->tok
!= tok_end
);
2224 if (now
->tok
!= tok_eof
2225 || (now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
),
2226 now
->tok
== tok_eof
))
2227 lr_error (ldfile
, _("%s: premature end of file"), "LC_CTYPE");
2228 else if (now
->tok
!= tok_lc_ctype
)
2230 lr_error (ldfile
, _("\
2231 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2232 lr_ignore_rest (ldfile
, 0);
2235 lr_ignore_rest (ldfile
, 1);
2240 if (! ignore_content
)
2242 /* Get the locale definition. */
2243 copy_locale
= load_locale (LC_CTYPE
, now
->val
.str
.startmb
,
2244 repertoire_name
, charmap
, NULL
);
2245 if ((copy_locale
->avail
& CTYPE_LOCALE
) == 0)
2247 /* Not yet loaded. So do it now. */
2248 if (locfile_read (copy_locale
, charmap
) != 0)
2252 if (copy_locale
->categories
[LC_CTYPE
].ctype
== NULL
)
2256 lr_ignore_rest (ldfile
, 1);
2258 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2262 /* Prepare the data structures. */
2263 ctype_startup (ldfile
, result
, charmap
, copy_locale
, ignore_content
);
2264 ctype
= result
->categories
[LC_CTYPE
].ctype
;
2266 /* Remember the repertoire we use. */
2267 if (!ignore_content
)
2268 ctype
->repertoire
= repertoire
;
2272 unsigned long int class_bit
= 0;
2273 unsigned long int class256_bit
= 0;
2274 int handle_digits
= 0;
2276 /* Of course we don't proceed beyond the end of file. */
2277 if (nowtok
== tok_eof
)
2280 /* Ingore empty lines. */
2281 if (nowtok
== tok_eol
)
2283 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2291 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2292 while (now
->tok
== tok_ident
|| now
->tok
== tok_string
)
2294 ctype_class_new (ldfile
, ctype
, now
->val
.str
.startmb
);
2295 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2296 if (now
->tok
!= tok_semicolon
)
2298 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2300 if (now
->tok
!= tok_eol
)
2302 %s: syntax error in definition of new character class"), "LC_CTYPE");
2306 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2307 while (now
->tok
== tok_ident
|| now
->tok
== tok_string
)
2309 ctype_map_new (ldfile
, ctype
, now
->val
.str
.startmb
, charmap
);
2310 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2311 if (now
->tok
!= tok_semicolon
)
2313 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2315 if (now
->tok
!= tok_eol
)
2317 %s: syntax error in definition of new character map"), "LC_CTYPE");
2321 /* Ignore the rest of the line if we don't need the input of
2325 lr_ignore_rest (ldfile
, 0);
2329 /* We simply forget the `class' keyword and use the following
2330 operand to determine the bit. */
2331 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2332 if (now
->tok
== tok_ident
|| now
->tok
== tok_string
)
2334 /* Must can be one of the predefined class names. */
2335 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
)
2336 if (strcmp (ctype
->classnames
[cnt
], now
->val
.str
.startmb
) == 0)
2338 if (cnt
>= ctype
->nr_charclass
)
2340 #ifdef PREDEFINED_CLASSES
2341 if (now
->val
.str
.lenmb
== 8
2342 && memcmp ("special1", now
->val
.str
.startmb
, 8) == 0)
2343 class_bit
= _ISwspecial1
;
2344 else if (now
->val
.str
.lenmb
== 8
2345 && memcmp ("special2", now
->val
.str
.startmb
, 8) == 0)
2346 class_bit
= _ISwspecial2
;
2347 else if (now
->val
.str
.lenmb
== 8
2348 && memcmp ("special3", now
->val
.str
.startmb
, 8) == 0)
2349 class_bit
= _ISwspecial3
;
2353 /* OK, it's a new class. */
2354 ctype_class_new (ldfile
, ctype
, now
->val
.str
.startmb
);
2356 class_bit
= _ISwbit (ctype
->nr_charclass
- 1);
2361 class_bit
= _ISwbit (cnt
);
2363 free (now
->val
.str
.startmb
);
2366 else if (now
->tok
== tok_digit
)
2367 goto handle_tok_digit
;
2368 else if (now
->tok
< tok_upper
|| now
->tok
> tok_blank
)
2372 class_bit
= BITw (now
->tok
);
2373 class256_bit
= BIT (now
->tok
);
2376 /* The next character must be a semicolon. */
2377 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2378 if (now
->tok
!= tok_semicolon
)
2380 goto read_charclass
;
2393 /* Ignore the rest of the line if we don't need the input of
2397 lr_ignore_rest (ldfile
, 0);
2401 class_bit
= BITw (now
->tok
);
2402 class256_bit
= BIT (now
->tok
);
2405 ctype
->class_done
|= class_bit
;
2406 last_token
= tok_none
;
2407 ellipsis_token
= tok_none
;
2409 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2410 while (now
->tok
!= tok_eol
&& now
->tok
!= tok_eof
)
2413 struct charseq
*seq
;
2415 if (ellipsis_token
== tok_none
)
2417 if (get_character (now
, charmap
, repertoire
, &seq
, &wch
))
2420 if (!ignore_content
&& seq
!= NULL
&& seq
->nbytes
== 1)
2421 /* Yep, we can store information about this byte
2423 ctype
->class256_collection
[seq
->bytes
[0]] |= class256_bit
;
2425 if (!ignore_content
&& wch
!= ILLEGAL_CHAR_VALUE
2427 /* We have the UCS4 position. */
2428 *find_idx (ctype
, &ctype
->class_collection
,
2429 &ctype
->class_collection_max
,
2430 &ctype
->class_collection_act
, wch
) |= class_bit
;
2432 last_token
= now
->tok
;
2433 /* Terminate the string. */
2434 if (last_token
== tok_bsymbol
)
2436 now
->val
.str
.startmb
[now
->val
.str
.lenmb
] = '\0';
2437 last_str
= now
->val
.str
.startmb
;
2443 memcpy (last_charcode
, now
->val
.charcode
.bytes
, 16);
2444 last_charcode_len
= now
->val
.charcode
.nbytes
;
2446 if (!ignore_content
&& handle_digits
== 1)
2448 /* We must store the digit values. */
2449 if (ctype
->mbdigits_act
== ctype
->mbdigits_max
)
2451 ctype
->mbdigits_max
+= 10;
2452 ctype
->mbdigits
= xrealloc (ctype
->mbdigits
,
2453 (ctype
->mbdigits_max
2454 * sizeof (char *)));
2455 ctype
->wcdigits_max
+= 10;
2456 ctype
->wcdigits
= xrealloc (ctype
->wcdigits
,
2457 (ctype
->wcdigits_max
2458 * sizeof (uint32_t)));
2461 ctype
->mbdigits
[ctype
->mbdigits_act
++] = seq
;
2462 ctype
->wcdigits
[ctype
->wcdigits_act
++] = wch
;
2464 else if (!ignore_content
&& handle_digits
== 2)
2466 /* We must store the digit values. */
2467 if (ctype
->outdigits_act
>= 10)
2469 lr_error (ldfile
, _("\
2470 %s: field `%s' does not contain exactly ten entries"),
2471 "LC_CTYPE", "outdigit");
2472 lr_ignore_rest (ldfile
, 0);
2476 ctype
->mboutdigits
[ctype
->outdigits_act
] = seq
;
2477 ctype
->wcoutdigits
[ctype
->outdigits_act
] = wch
;
2478 ++ctype
->outdigits_act
;
2483 /* Now it gets complicated. We have to resolve the
2484 ellipsis problem. First we must distinguish between
2485 the different kind of ellipsis and this must match the
2486 tokens we have seen. */
2487 assert (last_token
!= tok_none
);
2489 if (last_token
!= now
->tok
)
2491 lr_error (ldfile
, _("\
2492 ellipsis range must be marked by two operands of same type"));
2493 lr_ignore_rest (ldfile
, 0);
2497 if (last_token
== tok_bsymbol
)
2499 if (ellipsis_token
== tok_ellipsis3
)
2500 lr_error (ldfile
, _("with symbolic name range values \
2501 the absolute ellipsis `...' must not be used"));
2503 charclass_symbolic_ellipsis (ldfile
, ctype
, charmap
,
2504 repertoire
, now
, last_str
,
2505 class256_bit
, class_bit
,
2510 handle_digits
, step
);
2512 else if (last_token
== tok_ucs4
)
2514 if (ellipsis_token
!= tok_ellipsis2
)
2515 lr_error (ldfile
, _("\
2516 with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
2518 charclass_ucs4_ellipsis (ldfile
, ctype
, charmap
,
2519 repertoire
, now
, last_wch
,
2520 class256_bit
, class_bit
,
2521 ignore_content
, handle_digits
,
2526 assert (last_token
== tok_charcode
);
2528 if (ellipsis_token
!= tok_ellipsis3
)
2529 lr_error (ldfile
, _("\
2530 with character code range values one must use the absolute ellipsis `...'"));
2532 charclass_charcode_ellipsis (ldfile
, ctype
, charmap
,
2536 class256_bit
, class_bit
,
2541 /* Now we have used the last value. */
2542 last_token
= tok_none
;
2545 /* Next we expect a semicolon or the end of the line. */
2546 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2547 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
2550 if (last_token
!= tok_none
2551 && now
->tok
>= tok_ellipsis2
&& now
->tok
<= tok_ellipsis4_2
)
2553 if (now
->tok
== tok_ellipsis2_2
)
2555 now
->tok
= tok_ellipsis2
;
2558 else if (now
->tok
== tok_ellipsis4_2
)
2560 now
->tok
= tok_ellipsis4
;
2564 ellipsis_token
= now
->tok
;
2566 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2570 if (now
->tok
!= tok_semicolon
)
2573 /* And get the next character. */
2574 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2576 ellipsis_token
= tok_none
;
2582 /* Ignore the rest of the line if we don't need the input of
2586 lr_ignore_rest (ldfile
, 0);
2591 class_bit
= _ISwdigit
;
2592 class256_bit
= _ISdigit
;
2594 goto read_charclass
;
2597 /* Ignore the rest of the line if we don't need the input of
2601 lr_ignore_rest (ldfile
, 0);
2605 if (ctype
->outdigits_act
!= 0)
2606 lr_error (ldfile
, _("\
2607 %s: field `%s' declared more than once"),
2608 "LC_CTYPE", "outdigit");
2612 goto read_charclass
;
2615 /* Ignore the rest of the line if we don't need the input of
2619 lr_ignore_rest (ldfile
, 0);
2627 /* Ignore the rest of the line if we don't need the input of
2631 lr_ignore_rest (ldfile
, 0);
2639 /* Ignore the rest of the line if we don't need the input of
2643 lr_ignore_rest (ldfile
, 0);
2647 /* We simply forget the `map' keyword and use the following
2648 operand to determine the mapping. */
2649 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2650 if (now
->tok
== tok_ident
|| now
->tok
== tok_string
)
2654 for (cnt
= 2; cnt
< ctype
->map_collection_nr
; ++cnt
)
2655 if (strcmp (now
->val
.str
.startmb
, ctype
->mapnames
[cnt
]) == 0)
2658 if (cnt
< ctype
->map_collection_nr
)
2659 free (now
->val
.str
.startmb
);
2661 /* OK, it's a new map. */
2662 ctype_map_new (ldfile
, ctype
, now
->val
.str
.startmb
, charmap
);
2666 else if (now
->tok
< tok_toupper
|| now
->tok
> tok_tolower
)
2669 mapidx
= now
->tok
- tok_toupper
;
2671 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2672 /* This better should be a semicolon. */
2673 if (now
->tok
!= tok_semicolon
)
2677 /* Test whether this mapping was already defined. */
2678 if (ctype
->tomap_done
[mapidx
])
2680 lr_error (ldfile
, _("duplicated definition for mapping `%s'"),
2681 ctype
->mapnames
[mapidx
]);
2682 lr_ignore_rest (ldfile
, 0);
2685 ctype
->tomap_done
[mapidx
] = 1;
2687 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2688 while (now
->tok
!= tok_eol
&& now
->tok
!= tok_eof
)
2690 struct charseq
*from_seq
;
2692 struct charseq
*to_seq
;
2695 /* Every pair starts with an opening brace. */
2696 if (now
->tok
!= tok_open_brace
)
2699 /* Next comes the from-value. */
2700 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2701 if (get_character (now
, charmap
, repertoire
, &from_seq
,
2705 /* The next is a comma. */
2706 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2707 if (now
->tok
!= tok_comma
)
2710 /* And the other value. */
2711 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2712 if (get_character (now
, charmap
, repertoire
, &to_seq
,
2716 /* And the last thing is the closing brace. */
2717 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2718 if (now
->tok
!= tok_close_brace
)
2721 if (!ignore_content
)
2723 /* Check whether the mapping converts from an ASCII value
2724 to a non-ASCII value. */
2725 if (from_seq
!= NULL
&& from_seq
->nbytes
== 1
2726 && isascii (from_seq
->bytes
[0])
2727 && to_seq
!= NULL
&& (to_seq
->nbytes
!= 1
2728 || !isascii (to_seq
->bytes
[0])))
2729 ctype
->to_nonascii
= 1;
2731 if (mapidx
< 2 && from_seq
!= NULL
&& to_seq
!= NULL
2732 && from_seq
->nbytes
== 1 && to_seq
->nbytes
== 1)
2733 /* We can use this value. */
2734 ctype
->map256_collection
[mapidx
][from_seq
->bytes
[0]]
2737 if (from_wch
!= ILLEGAL_CHAR_VALUE
2738 && to_wch
!= ILLEGAL_CHAR_VALUE
)
2739 /* Both correct values. */
2740 *find_idx (ctype
, &ctype
->map_collection
[mapidx
],
2741 &ctype
->map_collection_max
[mapidx
],
2742 &ctype
->map_collection_act
[mapidx
],
2746 /* Now comes a semicolon or the end of the line/file. */
2747 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2748 if (now
->tok
== tok_semicolon
)
2749 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2753 case tok_translit_start
:
2754 /* Ignore the entire translit section with its peculiar syntax
2755 if we don't need the input. */
2760 lr_ignore_rest (ldfile
, 0);
2761 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2763 while (now
->tok
!= tok_translit_end
&& now
->tok
!= tok_eof
);
2765 if (now
->tok
== tok_eof
)
2766 lr_error (ldfile
, _(\
2767 "%s: `translit_start' section does not end with `translit_end'"),
2773 /* The rest of the line better should be empty. */
2774 lr_ignore_rest (ldfile
, 1);
2776 /* We count here the number of allocated entries in the `translit'
2780 ldfile
->translate_strings
= 1;
2781 ldfile
->return_widestr
= 1;
2783 /* We proceed until we see the `translit_end' token. */
2784 while (now
= lr_token (ldfile
, charmap
, NULL
, repertoire
, verbose
),
2785 now
->tok
!= tok_translit_end
&& now
->tok
!= tok_eof
)
2787 if (now
->tok
== tok_eol
)
2788 /* Ignore empty lines. */
2791 if (now
->tok
== tok_include
)
2793 /* We have to include locale. */
2794 const char *locale_name
;
2795 const char *repertoire_name
;
2796 struct translit_include_t
*include_stmt
, **include_ptr
;
2798 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2799 /* This should be a string or an identifier. In any
2800 case something to name a locale. */
2801 if (now
->tok
!= tok_string
&& now
->tok
!= tok_ident
)
2804 lr_error (ldfile
, _("%s: syntax error"), "LC_CTYPE");
2805 lr_ignore_rest (ldfile
, 0);
2808 locale_name
= now
->val
.str
.startmb
;
2810 /* Next should be a semicolon. */
2811 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2812 if (now
->tok
!= tok_semicolon
)
2813 goto translit_syntax
;
2815 /* Now the repertoire name. */
2816 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2817 if ((now
->tok
!= tok_string
&& now
->tok
!= tok_ident
)
2818 || now
->val
.str
.startmb
== NULL
)
2819 goto translit_syntax
;
2820 repertoire_name
= now
->val
.str
.startmb
;
2821 if (repertoire_name
[0] == '\0')
2822 /* Ignore the empty string. */
2823 repertoire_name
= NULL
;
2825 /* Save the include statement for later processing. */
2826 include_stmt
= (struct translit_include_t
*)
2827 xmalloc (sizeof (struct translit_include_t
));
2828 include_stmt
->copy_locale
= locale_name
;
2829 include_stmt
->copy_repertoire
= repertoire_name
;
2830 include_stmt
->next
= NULL
;
2832 include_ptr
= &ctype
->translit_include
;
2833 while (*include_ptr
!= NULL
)
2834 include_ptr
= &(*include_ptr
)->next
;
2835 *include_ptr
= include_stmt
;
2837 /* The rest of the line must be empty. */
2838 lr_ignore_rest (ldfile
, 1);
2840 /* Make sure the locale is read. */
2841 add_to_readlist (LC_CTYPE
, locale_name
, repertoire_name
,
2845 else if (now
->tok
== tok_default_missing
)
2851 /* We expect a single character or string as the
2853 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2854 wstr
= read_widestring (ldfile
, now
, charmap
,
2859 if (ctype
->default_missing
!= NULL
)
2861 lr_error (ldfile
, _("\
2862 %s: duplicate `default_missing' definition"), "LC_CTYPE");
2863 WITH_CUR_LOCALE (error_at_line (0, 0,
2864 ctype
->default_missing_file
,
2865 ctype
->default_missing_lineno
,
2867 previous definition was here")));
2871 ctype
->default_missing
= wstr
;
2872 ctype
->default_missing_file
= ldfile
->fname
;
2873 ctype
->default_missing_lineno
= ldfile
->lineno
;
2875 /* We can have more entries, ignore them. */
2876 lr_ignore_rest (ldfile
, 0);
2879 else if (wstr
== (uint32_t *) -1l)
2880 /* This was an syntax error. */
2883 /* Maybe there is another replacement we can use. */
2884 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2885 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
2887 /* Nothing found. We tell the user. */
2888 lr_error (ldfile
, _("\
2889 %s: no representable `default_missing' definition found"), "LC_CTYPE");
2892 if (now
->tok
!= tok_semicolon
)
2893 goto translit_syntax
;
2898 else if (now
->tok
== tok_translit_ignore
)
2900 read_translit_ignore_entry (ldfile
, ctype
, charmap
,
2905 read_translit_entry (ldfile
, ctype
, now
, charmap
, repertoire
);
2907 ldfile
->return_widestr
= 0;
2909 if (now
->tok
== tok_eof
)
2910 lr_error (ldfile
, _(\
2911 "%s: `translit_start' section does not end with `translit_end'"),
2917 /* Ignore the rest of the line if we don't need the input of
2921 lr_ignore_rest (ldfile
, 0);
2925 /* This could mean one of several things. First test whether
2926 it's a character class name. */
2927 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
)
2928 if (strcmp (now
->val
.str
.startmb
, ctype
->classnames
[cnt
]) == 0)
2930 if (cnt
< ctype
->nr_charclass
)
2932 class_bit
= _ISwbit (cnt
);
2933 class256_bit
= cnt
<= 11 ? _ISbit (cnt
) : 0;
2934 free (now
->val
.str
.startmb
);
2935 goto read_charclass
;
2937 for (cnt
= 0; cnt
< ctype
->map_collection_nr
; ++cnt
)
2938 if (strcmp (now
->val
.str
.startmb
, ctype
->mapnames
[cnt
]) == 0)
2940 if (cnt
< ctype
->map_collection_nr
)
2943 free (now
->val
.str
.startmb
);
2946 #ifdef PREDEFINED_CLASSES
2947 if (strcmp (now
->val
.str
.startmb
, "special1") == 0)
2949 class_bit
= _ISwspecial1
;
2950 free (now
->val
.str
.startmb
);
2951 goto read_charclass
;
2953 if (strcmp (now
->val
.str
.startmb
, "special2") == 0)
2955 class_bit
= _ISwspecial2
;
2956 free (now
->val
.str
.startmb
);
2957 goto read_charclass
;
2959 if (strcmp (now
->val
.str
.startmb
, "special3") == 0)
2961 class_bit
= _ISwspecial3
;
2962 free (now
->val
.str
.startmb
);
2963 goto read_charclass
;
2965 if (strcmp (now
->val
.str
.startmb
, "tosymmetric") == 0)
2974 /* Next we assume `LC_CTYPE'. */
2975 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2976 if (now
->tok
== tok_eof
)
2978 if (now
->tok
== tok_eol
)
2979 lr_error (ldfile
, _("%s: incomplete `END' line"),
2981 else if (now
->tok
!= tok_lc_ctype
)
2982 lr_error (ldfile
, _("\
2983 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2984 lr_ignore_rest (ldfile
, now
->tok
== tok_lc_ctype
);
2989 if (now
->tok
!= tok_eof
)
2990 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2993 /* Prepare for the next round. */
2994 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2998 /* When we come here we reached the end of the file. */
2999 lr_error (ldfile
, _("%s: premature end of file"), "LC_CTYPE");
3004 set_class_defaults (struct locale_ctype_t
*ctype
,
3005 const struct charmap_t
*charmap
,
3006 struct repertoire_t
*repertoire
)
3010 /* These function defines the default values for the classes and conversions
3011 according to POSIX.2 2.5.2.1.
3012 It may seem that the order of these if-blocks is arbitrary but it is NOT.
3013 Don't move them unless you know what you do! */
3015 auto void set_default (int bitpos
, int from
, int to
);
3017 void set_default (int bitpos
, int from
, int to
)
3021 int bit
= _ISbit (bitpos
);
3022 int bitw
= _ISwbit (bitpos
);
3023 /* Define string. */
3026 for (ch
= from
; ch
<= to
; ++ch
)
3028 struct charseq
*seq
;
3031 seq
= charmap_find_value (charmap
, tmp
, 1);
3035 sprintf (buf
, "U%08X", ch
);
3036 seq
= charmap_find_value (charmap
, buf
, 9);
3041 WITH_CUR_LOCALE (error (0, 0, _("\
3042 %s: character `%s' not defined in charmap while needed as default value"),
3045 else if (seq
->nbytes
!= 1)
3046 WITH_CUR_LOCALE (error (0, 0, _("\
3047 %s: character `%s' in charmap not representable with one byte"),
3050 ctype
->class256_collection
[seq
->bytes
[0]] |= bit
;
3052 /* No need to search here, the ASCII value is also the Unicode
3054 ELEM (ctype
, class_collection
, , ch
) |= bitw
;
3058 /* Set default values if keyword was not present. */
3059 if ((ctype
->class_done
& BITw (tok_upper
)) == 0)
3060 /* "If this keyword [lower] is not specified, the lowercase letters
3061 `A' through `Z', ..., shall automatically belong to this class,
3062 with implementation defined character values." [P1003.2, 2.5.2.1] */
3063 set_default (BITPOS (tok_upper
), 'A', 'Z');
3065 if ((ctype
->class_done
& BITw (tok_lower
)) == 0)
3066 /* "If this keyword [lower] is not specified, the lowercase letters
3067 `a' through `z', ..., shall automatically belong to this class,
3068 with implementation defined character values." [P1003.2, 2.5.2.1] */
3069 set_default (BITPOS (tok_lower
), 'a', 'z');
3071 if ((ctype
->class_done
& BITw (tok_alpha
)) == 0)
3073 /* Table 2-6 in P1003.2 says that characters in class `upper' or
3074 class `lower' *must* be in class `alpha'. */
3075 unsigned long int mask
= BIT (tok_upper
) | BIT (tok_lower
);
3076 unsigned long int maskw
= BITw (tok_upper
) | BITw (tok_lower
);
3078 for (cnt
= 0; cnt
< 256; ++cnt
)
3079 if ((ctype
->class256_collection
[cnt
] & mask
) != 0)
3080 ctype
->class256_collection
[cnt
] |= BIT (tok_alpha
);
3082 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
3083 if ((ctype
->class_collection
[cnt
] & maskw
) != 0)
3084 ctype
->class_collection
[cnt
] |= BITw (tok_alpha
);
3087 if ((ctype
->class_done
& BITw (tok_digit
)) == 0)
3088 /* "If this keyword [digit] is not specified, the digits `0' through
3089 `9', ..., shall automatically belong to this class, with
3090 implementation-defined character values." [P1003.2, 2.5.2.1] */
3091 set_default (BITPOS (tok_digit
), '0', '9');
3093 /* "Only characters specified for the `alpha' and `digit' keyword
3094 shall be specified. Characters specified for the keyword `alpha'
3095 and `digit' are automatically included in this class. */
3097 unsigned long int mask
= BIT (tok_alpha
) | BIT (tok_digit
);
3098 unsigned long int maskw
= BITw (tok_alpha
) | BITw (tok_digit
);
3100 for (cnt
= 0; cnt
< 256; ++cnt
)
3101 if ((ctype
->class256_collection
[cnt
] & mask
) != 0)
3102 ctype
->class256_collection
[cnt
] |= BIT (tok_alnum
);
3104 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
3105 if ((ctype
->class_collection
[cnt
] & maskw
) != 0)
3106 ctype
->class_collection
[cnt
] |= BITw (tok_alnum
);
3109 if ((ctype
->class_done
& BITw (tok_space
)) == 0)
3110 /* "If this keyword [space] is not specified, the characters <space>,
3111 <form-feed>, <newline>, <carriage-return>, <tab>, and
3112 <vertical-tab>, ..., shall automatically belong to this class,
3113 with implementation-defined character values." [P1003.2, 2.5.2.1] */
3115 struct charseq
*seq
;
3117 seq
= charmap_find_value (charmap
, "space", 5);
3119 seq
= charmap_find_value (charmap
, "SP", 2);
3121 seq
= charmap_find_value (charmap
, "U00000020", 9);
3125 WITH_CUR_LOCALE (error (0, 0, _("\
3126 %s: character `%s' not defined while needed as default value"),
3127 "LC_CTYPE", "<space>"));
3129 else if (seq
->nbytes
!= 1)
3130 WITH_CUR_LOCALE (error (0, 0, _("\
3131 %s: character `%s' in charmap not representable with one byte"),
3132 "LC_CTYPE", "<space>"));
3134 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3136 /* No need to search. */
3137 ELEM (ctype
, class_collection
, , L
' ') |= BITw (tok_space
);
3139 seq
= charmap_find_value (charmap
, "form-feed", 9);
3141 seq
= charmap_find_value (charmap
, "U0000000C", 9);
3145 WITH_CUR_LOCALE (error (0, 0, _("\
3146 %s: character `%s' not defined while needed as default value"),
3147 "LC_CTYPE", "<form-feed>"));
3149 else if (seq
->nbytes
!= 1)
3150 WITH_CUR_LOCALE (error (0, 0, _("\
3151 %s: character `%s' in charmap not representable with one byte"),
3152 "LC_CTYPE", "<form-feed>"));
3154 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3156 /* No need to search. */
3157 ELEM (ctype
, class_collection
, , L
'\f') |= BITw (tok_space
);
3160 seq
= charmap_find_value (charmap
, "newline", 7);
3162 seq
= charmap_find_value (charmap
, "U0000000A", 9);
3166 WITH_CUR_LOCALE (error (0, 0, _("\
3167 character `%s' not defined while needed as default value"),
3170 else if (seq
->nbytes
!= 1)
3171 WITH_CUR_LOCALE (error (0, 0, _("\
3172 %s: character `%s' in charmap not representable with one byte"),
3173 "LC_CTYPE", "<newline>"));
3175 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3177 /* No need to search. */
3178 ELEM (ctype
, class_collection
, , L
'\n') |= BITw (tok_space
);
3181 seq
= charmap_find_value (charmap
, "carriage-return", 15);
3183 seq
= charmap_find_value (charmap
, "U0000000D", 9);
3187 WITH_CUR_LOCALE (error (0, 0, _("\
3188 %s: character `%s' not defined while needed as default value"),
3189 "LC_CTYPE", "<carriage-return>"));
3191 else if (seq
->nbytes
!= 1)
3192 WITH_CUR_LOCALE (error (0, 0, _("\
3193 %s: character `%s' in charmap not representable with one byte"),
3194 "LC_CTYPE", "<carriage-return>"));
3196 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3198 /* No need to search. */
3199 ELEM (ctype
, class_collection
, , L
'\r') |= BITw (tok_space
);
3202 seq
= charmap_find_value (charmap
, "tab", 3);
3204 seq
= charmap_find_value (charmap
, "U00000009", 9);
3208 WITH_CUR_LOCALE (error (0, 0, _("\
3209 %s: character `%s' not defined while needed as default value"),
3210 "LC_CTYPE", "<tab>"));
3212 else if (seq
->nbytes
!= 1)
3213 WITH_CUR_LOCALE (error (0, 0, _("\
3214 %s: character `%s' in charmap not representable with one byte"),
3215 "LC_CTYPE", "<tab>"));
3217 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3219 /* No need to search. */
3220 ELEM (ctype
, class_collection
, , L
'\t') |= BITw (tok_space
);
3223 seq
= charmap_find_value (charmap
, "vertical-tab", 12);
3225 seq
= charmap_find_value (charmap
, "U0000000B", 9);
3229 WITH_CUR_LOCALE (error (0, 0, _("\
3230 %s: character `%s' not defined while needed as default value"),
3231 "LC_CTYPE", "<vertical-tab>"));
3233 else if (seq
->nbytes
!= 1)
3234 WITH_CUR_LOCALE (error (0, 0, _("\
3235 %s: character `%s' in charmap not representable with one byte"),
3236 "LC_CTYPE", "<vertical-tab>"));
3238 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3240 /* No need to search. */
3241 ELEM (ctype
, class_collection
, , L
'\v') |= BITw (tok_space
);
3244 if ((ctype
->class_done
& BITw (tok_xdigit
)) == 0)
3245 /* "If this keyword is not specified, the digits `0' to `9', the
3246 uppercase letters `A' through `F', and the lowercase letters `a'
3247 through `f', ..., shell automatically belong to this class, with
3248 implementation defined character values." [P1003.2, 2.5.2.1] */
3250 set_default (BITPOS (tok_xdigit
), '0', '9');
3251 set_default (BITPOS (tok_xdigit
), 'A', 'F');
3252 set_default (BITPOS (tok_xdigit
), 'a', 'f');
3255 if ((ctype
->class_done
& BITw (tok_blank
)) == 0)
3256 /* "If this keyword [blank] is unspecified, the characters <space> and
3257 <tab> shall belong to this character class." [P1003.2, 2.5.2.1] */
3259 struct charseq
*seq
;
3261 seq
= charmap_find_value (charmap
, "space", 5);
3263 seq
= charmap_find_value (charmap
, "SP", 2);
3265 seq
= charmap_find_value (charmap
, "U00000020", 9);
3269 WITH_CUR_LOCALE (error (0, 0, _("\
3270 %s: character `%s' not defined while needed as default value"),
3271 "LC_CTYPE", "<space>"));
3273 else if (seq
->nbytes
!= 1)
3274 WITH_CUR_LOCALE (error (0, 0, _("\
3275 %s: character `%s' in charmap not representable with one byte"),
3276 "LC_CTYPE", "<space>"));
3278 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_blank
);
3280 /* No need to search. */
3281 ELEM (ctype
, class_collection
, , L
' ') |= BITw (tok_blank
);
3284 seq
= charmap_find_value (charmap
, "tab", 3);
3286 seq
= charmap_find_value (charmap
, "U00000009", 9);
3290 WITH_CUR_LOCALE (error (0, 0, _("\
3291 %s: character `%s' not defined while needed as default value"),
3292 "LC_CTYPE", "<tab>"));
3294 else if (seq
->nbytes
!= 1)
3295 WITH_CUR_LOCALE (error (0, 0, _("\
3296 %s: character `%s' in charmap not representable with one byte"),
3297 "LC_CTYPE", "<tab>"));
3299 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_blank
);
3301 /* No need to search. */
3302 ELEM (ctype
, class_collection
, , L
'\t') |= BITw (tok_blank
);
3305 if ((ctype
->class_done
& BITw (tok_graph
)) == 0)
3306 /* "If this keyword [graph] is not specified, characters specified for
3307 the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
3308 shall belong to this character class." [P1003.2, 2.5.2.1] */
3310 unsigned long int mask
= BIT (tok_upper
) | BIT (tok_lower
) |
3311 BIT (tok_alpha
) | BIT (tok_digit
) | BIT (tok_xdigit
) | BIT (tok_punct
);
3312 unsigned long int maskw
= BITw (tok_upper
) | BITw (tok_lower
) |
3313 BITw (tok_alpha
) | BITw (tok_digit
) | BITw (tok_xdigit
) |
3317 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
3318 if ((ctype
->class_collection
[cnt
] & maskw
) != 0)
3319 ctype
->class_collection
[cnt
] |= BITw (tok_graph
);
3321 for (cnt
= 0; cnt
< 256; ++cnt
)
3322 if ((ctype
->class256_collection
[cnt
] & mask
) != 0)
3323 ctype
->class256_collection
[cnt
] |= BIT (tok_graph
);
3326 if ((ctype
->class_done
& BITw (tok_print
)) == 0)
3327 /* "If this keyword [print] is not provided, characters specified for
3328 the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
3329 and the <space> character shall belong to this character class."
3330 [P1003.2, 2.5.2.1] */
3332 unsigned long int mask
= BIT (tok_upper
) | BIT (tok_lower
) |
3333 BIT (tok_alpha
) | BIT (tok_digit
) | BIT (tok_xdigit
) | BIT (tok_punct
);
3334 unsigned long int maskw
= BITw (tok_upper
) | BITw (tok_lower
) |
3335 BITw (tok_alpha
) | BITw (tok_digit
) | BITw (tok_xdigit
) |
3338 struct charseq
*seq
;
3340 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
3341 if ((ctype
->class_collection
[cnt
] & maskw
) != 0)
3342 ctype
->class_collection
[cnt
] |= BITw (tok_print
);
3344 for (cnt
= 0; cnt
< 256; ++cnt
)
3345 if ((ctype
->class256_collection
[cnt
] & mask
) != 0)
3346 ctype
->class256_collection
[cnt
] |= BIT (tok_print
);
3349 seq
= charmap_find_value (charmap
, "space", 5);
3351 seq
= charmap_find_value (charmap
, "SP", 2);
3353 seq
= charmap_find_value (charmap
, "U00000020", 9);
3357 WITH_CUR_LOCALE (error (0, 0, _("\
3358 %s: character `%s' not defined while needed as default value"),
3359 "LC_CTYPE", "<space>"));
3361 else if (seq
->nbytes
!= 1)
3362 WITH_CUR_LOCALE (error (0, 0, _("\
3363 %s: character `%s' in charmap not representable with one byte"),
3364 "LC_CTYPE", "<space>"));
3366 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_print
);
3368 /* No need to search. */
3369 ELEM (ctype
, class_collection
, , L
' ') |= BITw (tok_print
);
3372 if (ctype
->tomap_done
[0] == 0)
3373 /* "If this keyword [toupper] is not specified, the lowercase letters
3374 `a' through `z', and their corresponding uppercase letters `A' to
3375 `Z', ..., shall automatically be included, with implementation-
3376 defined character values." [P1003.2, 2.5.2.1] */
3381 strcpy (tmp
, "<?>");
3383 for (ch
= 'a'; ch
<= 'z'; ++ch
)
3385 struct charseq
*seq_from
, *seq_to
;
3389 seq_from
= charmap_find_value (charmap
, &tmp
[1], 1);
3390 if (seq_from
== NULL
)
3393 sprintf (buf
, "U%08X", ch
);
3394 seq_from
= charmap_find_value (charmap
, buf
, 9);
3396 if (seq_from
== NULL
)
3399 WITH_CUR_LOCALE (error (0, 0, _("\
3400 %s: character `%s' not defined while needed as default value"),
3403 else if (seq_from
->nbytes
!= 1)
3406 WITH_CUR_LOCALE (error (0, 0, _("\
3407 %s: character `%s' needed as default value not representable with one byte"),
3412 /* This conversion is implementation defined. */
3413 tmp
[1] = (char) (ch
+ ('A' - 'a'));
3414 seq_to
= charmap_find_value (charmap
, &tmp
[1], 1);
3418 sprintf (buf
, "U%08X", ch
+ ('A' - 'a'));
3419 seq_to
= charmap_find_value (charmap
, buf
, 9);
3424 WITH_CUR_LOCALE (error (0, 0, _("\
3425 %s: character `%s' not defined while needed as default value"),
3428 else if (seq_to
->nbytes
!= 1)
3431 WITH_CUR_LOCALE (error (0, 0, _("\
3432 %s: character `%s' needed as default value not representable with one byte"),
3436 /* The index [0] is determined by the order of the
3437 `ctype_map_newP' calls in `ctype_startup'. */
3438 ctype
->map256_collection
[0][seq_from
->bytes
[0]]
3442 /* No need to search. */
3443 ELEM (ctype
, map_collection
, [0], ch
) = ch
+ ('A' - 'a');
3447 if (ctype
->tomap_done
[1] == 0)
3448 /* "If this keyword [tolower] is not specified, the mapping shall be
3449 the reverse mapping of the one specified to `toupper'." [P1003.2] */
3451 for (cnt
= 0; cnt
< ctype
->map_collection_act
[0]; ++cnt
)
3452 if (ctype
->map_collection
[0][cnt
] != 0)
3453 ELEM (ctype
, map_collection
, [1],
3454 ctype
->map_collection
[0][cnt
])
3455 = ctype
->charnames
[cnt
];
3457 for (cnt
= 0; cnt
< 256; ++cnt
)
3458 if (ctype
->map256_collection
[0][cnt
] != 0)
3459 ctype
->map256_collection
[1][ctype
->map256_collection
[0][cnt
]] = cnt
;
3462 if (ctype
->outdigits_act
!= 10)
3464 if (ctype
->outdigits_act
!= 0)
3465 WITH_CUR_LOCALE (error (0, 0, _("\
3466 %s: field `%s' does not contain exactly ten entries"),
3467 "LC_CTYPE", "outdigit"));
3469 for (cnt
= ctype
->outdigits_act
; cnt
< 10; ++cnt
)
3471 ctype
->mboutdigits
[cnt
] = charmap_find_symbol (charmap
,
3474 if (ctype
->mboutdigits
[cnt
] == NULL
)
3475 ctype
->mboutdigits
[cnt
] = charmap_find_symbol (charmap
,
3477 strlen (longnames
[cnt
]));
3479 if (ctype
->mboutdigits
[cnt
] == NULL
)
3480 ctype
->mboutdigits
[cnt
] = charmap_find_symbol (charmap
,
3483 if (ctype
->mboutdigits
[cnt
] == NULL
)
3485 /* Provide a replacement. */
3486 WITH_CUR_LOCALE (error (0, 0, _("\
3487 no output digits defined and none of the standard names in the charmap")));
3489 ctype
->mboutdigits
[cnt
] = obstack_alloc (&((struct charmap_t
*) charmap
)->mem_pool
,
3490 sizeof (struct charseq
)
3493 /* This is better than nothing. */
3494 ctype
->mboutdigits
[cnt
]->bytes
[0] = digits
[cnt
];
3495 ctype
->mboutdigits
[cnt
]->nbytes
= 1;
3498 ctype
->wcoutdigits
[cnt
] = L
'0' + cnt
;
3501 ctype
->outdigits_act
= 10;
3506 /* Construction of sparse 3-level tables.
3507 See wchar-lookup.h for their structure and the meaning of p and q. */
3514 /* Working representation. */
3515 size_t level1_alloc
;
3518 size_t level2_alloc
;
3521 size_t level3_alloc
;
3524 /* Compressed representation. */
3529 /* Initialize. Assumes t->p and t->q have already been set. */
3531 wctype_table_init (struct wctype_table
*t
)
3534 t
->level1_alloc
= t
->level1_size
= 0;
3536 t
->level2_alloc
= t
->level2_size
= 0;
3538 t
->level3_alloc
= t
->level3_size
= 0;
3541 /* Retrieve an entry. */
3543 wctype_table_get (struct wctype_table
*t
, uint32_t wc
)
3545 uint32_t index1
= wc
>> (t
->q
+ t
->p
+ 5);
3546 if (index1
< t
->level1_size
)
3548 uint32_t lookup1
= t
->level1
[index1
];
3549 if (lookup1
!= EMPTY
)
3551 uint32_t index2
= ((wc
>> (t
->p
+ 5)) & ((1 << t
->q
) - 1))
3552 + (lookup1
<< t
->q
);
3553 uint32_t lookup2
= t
->level2
[index2
];
3554 if (lookup2
!= EMPTY
)
3556 uint32_t index3
= ((wc
>> 5) & ((1 << t
->p
) - 1))
3557 + (lookup2
<< t
->p
);
3558 uint32_t lookup3
= t
->level3
[index3
];
3559 uint32_t index4
= wc
& 0x1f;
3561 return (lookup3
>> index4
) & 1;
3568 /* Add one entry. */
3570 wctype_table_add (struct wctype_table
*t
, uint32_t wc
)
3572 uint32_t index1
= wc
>> (t
->q
+ t
->p
+ 5);
3573 uint32_t index2
= (wc
>> (t
->p
+ 5)) & ((1 << t
->q
) - 1);
3574 uint32_t index3
= (wc
>> 5) & ((1 << t
->p
) - 1);
3575 uint32_t index4
= wc
& 0x1f;
3578 if (index1
>= t
->level1_size
)
3580 if (index1
>= t
->level1_alloc
)
3582 size_t alloc
= 2 * t
->level1_alloc
;
3583 if (alloc
<= index1
)
3585 t
->level1
= (uint32_t *) xrealloc ((char *) t
->level1
,
3586 alloc
* sizeof (uint32_t));
3587 t
->level1_alloc
= alloc
;
3589 while (index1
>= t
->level1_size
)
3590 t
->level1
[t
->level1_size
++] = EMPTY
;
3593 if (t
->level1
[index1
] == EMPTY
)
3595 if (t
->level2_size
== t
->level2_alloc
)
3597 size_t alloc
= 2 * t
->level2_alloc
+ 1;
3598 t
->level2
= (uint32_t *) xrealloc ((char *) t
->level2
,
3599 (alloc
<< t
->q
) * sizeof (uint32_t));
3600 t
->level2_alloc
= alloc
;
3602 i1
= t
->level2_size
<< t
->q
;
3603 i2
= (t
->level2_size
+ 1) << t
->q
;
3604 for (i
= i1
; i
< i2
; i
++)
3605 t
->level2
[i
] = EMPTY
;
3606 t
->level1
[index1
] = t
->level2_size
++;
3609 index2
+= t
->level1
[index1
] << t
->q
;
3611 if (t
->level2
[index2
] == EMPTY
)
3613 if (t
->level3_size
== t
->level3_alloc
)
3615 size_t alloc
= 2 * t
->level3_alloc
+ 1;
3616 t
->level3
= (uint32_t *) xrealloc ((char *) t
->level3
,
3617 (alloc
<< t
->p
) * sizeof (uint32_t));
3618 t
->level3_alloc
= alloc
;
3620 i1
= t
->level3_size
<< t
->p
;
3621 i2
= (t
->level3_size
+ 1) << t
->p
;
3622 for (i
= i1
; i
< i2
; i
++)
3624 t
->level2
[index2
] = t
->level3_size
++;
3627 index3
+= t
->level2
[index2
] << t
->p
;
3629 t
->level3
[index3
] |= (uint32_t)1 << index4
;
3632 /* Finalize and shrink. */
3634 wctype_table_finalize (struct wctype_table
*t
)
3637 uint32_t reorder3
[t
->level3_size
];
3638 uint32_t reorder2
[t
->level2_size
];
3639 uint32_t level1_offset
, level2_offset
, level3_offset
;
3641 /* Uniquify level3 blocks. */
3643 for (j
= 0; j
< t
->level3_size
; j
++)
3645 for (i
= 0; i
< k
; i
++)
3646 if (memcmp (&t
->level3
[i
<< t
->p
], &t
->level3
[j
<< t
->p
],
3647 (1 << t
->p
) * sizeof (uint32_t)) == 0)
3649 /* Relocate block j to block i. */
3654 memcpy (&t
->level3
[i
<< t
->p
], &t
->level3
[j
<< t
->p
],
3655 (1 << t
->p
) * sizeof (uint32_t));
3661 for (i
= 0; i
< (t
->level2_size
<< t
->q
); i
++)
3662 if (t
->level2
[i
] != EMPTY
)
3663 t
->level2
[i
] = reorder3
[t
->level2
[i
]];
3665 /* Uniquify level2 blocks. */
3667 for (j
= 0; j
< t
->level2_size
; j
++)
3669 for (i
= 0; i
< k
; i
++)
3670 if (memcmp (&t
->level2
[i
<< t
->q
], &t
->level2
[j
<< t
->q
],
3671 (1 << t
->q
) * sizeof (uint32_t)) == 0)
3673 /* Relocate block j to block i. */
3678 memcpy (&t
->level2
[i
<< t
->q
], &t
->level2
[j
<< t
->q
],
3679 (1 << t
->q
) * sizeof (uint32_t));
3685 for (i
= 0; i
< t
->level1_size
; i
++)
3686 if (t
->level1
[i
] != EMPTY
)
3687 t
->level1
[i
] = reorder2
[t
->level1
[i
]];
3689 /* Create and fill the resulting compressed representation. */
3691 5 * sizeof (uint32_t)
3692 + t
->level1_size
* sizeof (uint32_t)
3693 + (t
->level2_size
<< t
->q
) * sizeof (uint32_t)
3694 + (t
->level3_size
<< t
->p
) * sizeof (uint32_t);
3695 t
->result
= (char *) xmalloc (t
->result_size
);
3698 5 * sizeof (uint32_t);
3700 5 * sizeof (uint32_t)
3701 + t
->level1_size
* sizeof (uint32_t);
3703 5 * sizeof (uint32_t)
3704 + t
->level1_size
* sizeof (uint32_t)
3705 + (t
->level2_size
<< t
->q
) * sizeof (uint32_t);
3707 ((uint32_t *) t
->result
)[0] = t
->q
+ t
->p
+ 5;
3708 ((uint32_t *) t
->result
)[1] = t
->level1_size
;
3709 ((uint32_t *) t
->result
)[2] = t
->p
+ 5;
3710 ((uint32_t *) t
->result
)[3] = (1 << t
->q
) - 1;
3711 ((uint32_t *) t
->result
)[4] = (1 << t
->p
) - 1;
3713 for (i
= 0; i
< t
->level1_size
; i
++)
3714 ((uint32_t *) (t
->result
+ level1_offset
))[i
] =
3715 (t
->level1
[i
] == EMPTY
3717 : (t
->level1
[i
] << t
->q
) * sizeof (uint32_t) + level2_offset
);
3719 for (i
= 0; i
< (t
->level2_size
<< t
->q
); i
++)
3720 ((uint32_t *) (t
->result
+ level2_offset
))[i
] =
3721 (t
->level2
[i
] == EMPTY
3723 : (t
->level2
[i
] << t
->p
) * sizeof (uint32_t) + level3_offset
);
3725 for (i
= 0; i
< (t
->level3_size
<< t
->p
); i
++)
3726 ((uint32_t *) (t
->result
+ level3_offset
))[i
] = t
->level3
[i
];
3728 if (t
->level1_alloc
> 0)
3730 if (t
->level2_alloc
> 0)
3732 if (t
->level3_alloc
> 0)
3736 #define TABLE wcwidth_table
3737 #define ELEMENT uint8_t
3738 #define DEFAULT 0xff
3741 #define TABLE wctrans_table
3742 #define ELEMENT int32_t
3744 #define wctrans_table_add wctrans_table_add_internal
3746 #undef wctrans_table_add
3747 /* The wctrans_table must actually store the difference between the
3748 desired result and the argument. */
3750 wctrans_table_add (struct wctrans_table
*t
, uint32_t wc
, uint32_t mapped_wc
)
3752 wctrans_table_add_internal (t
, wc
, mapped_wc
- wc
);
3756 /* Flattens the included transliterations into a translit list.
3757 Inserts them in the list at `cursor', and returns the new cursor. */
3758 static struct translit_t
**
3759 translit_flatten (struct locale_ctype_t
*ctype
,
3760 const struct charmap_t
*charmap
,
3761 struct translit_t
**cursor
)
3763 while (ctype
->translit_include
!= NULL
)
3765 const char *copy_locale
= ctype
->translit_include
->copy_locale
;
3766 const char *copy_repertoire
= ctype
->translit_include
->copy_repertoire
;
3767 struct localedef_t
*other
;
3769 /* Unchain the include statement. During the depth-first traversal
3770 we don't want to visit any locale more than once. */
3771 ctype
->translit_include
= ctype
->translit_include
->next
;
3773 other
= find_locale (LC_CTYPE
, copy_locale
, copy_repertoire
, charmap
);
3775 if (other
== NULL
|| other
->categories
[LC_CTYPE
].ctype
== NULL
)
3777 WITH_CUR_LOCALE (error (0, 0, _("\
3778 %s: transliteration data from locale `%s' not available"),
3779 "LC_CTYPE", copy_locale
));
3783 struct locale_ctype_t
*other_ctype
=
3784 other
->categories
[LC_CTYPE
].ctype
;
3786 cursor
= translit_flatten (other_ctype
, charmap
, cursor
);
3787 assert (other_ctype
->translit_include
== NULL
);
3789 if (other_ctype
->translit
!= NULL
)
3791 /* Insert the other_ctype->translit list at *cursor. */
3792 struct translit_t
*endp
= other_ctype
->translit
;
3793 while (endp
->next
!= NULL
)
3796 endp
->next
= *cursor
;
3797 *cursor
= other_ctype
->translit
;
3799 /* Avoid any risk of circular lists. */
3800 other_ctype
->translit
= NULL
;
3802 cursor
= &endp
->next
;
3805 if (ctype
->default_missing
== NULL
)
3806 ctype
->default_missing
= other_ctype
->default_missing
;
3814 allocate_arrays (struct locale_ctype_t
*ctype
, const struct charmap_t
*charmap
,
3815 struct repertoire_t
*repertoire
)
3823 /* You wonder about this amount of memory? This is only because some
3824 users do not manage to address the array with unsigned values or
3825 data types with range >= 256. '\200' would result in the array
3826 index -128. To help these poor people we duplicate the entries for
3827 128 up to 255 below the entry for \0. */
3828 ctype
->ctype_b
= (char_class_t
*) xcalloc (256 + 128, sizeof (char_class_t
));
3829 ctype
->ctype32_b
= (char_class32_t
*) xcalloc (256, sizeof (char_class32_t
));
3830 ctype
->class_b
= (uint32_t **)
3831 xmalloc (ctype
->nr_charclass
* sizeof (uint32_t *));
3832 ctype
->class_3level
= (struct iovec
*)
3833 xmalloc (ctype
->nr_charclass
* sizeof (struct iovec
));
3835 /* This is the array accessed using the multibyte string elements. */
3836 for (idx
= 0; idx
< 256; ++idx
)
3837 ctype
->ctype_b
[128 + idx
] = ctype
->class256_collection
[idx
];
3839 /* Mirror first 127 entries. We must take care that entry -1 is not
3840 mirrored because EOF == -1. */
3841 for (idx
= 0; idx
< 127; ++idx
)
3842 ctype
->ctype_b
[idx
] = ctype
->ctype_b
[256 + idx
];
3844 /* The 32 bit array contains all characters < 0x100. */
3845 for (idx
= 0; idx
< ctype
->class_collection_act
; ++idx
)
3846 if (ctype
->charnames
[idx
] < 0x100)
3847 ctype
->ctype32_b
[ctype
->charnames
[idx
]] = ctype
->class_collection
[idx
];
3849 for (nr
= 0; nr
< ctype
->nr_charclass
; nr
++)
3851 ctype
->class_b
[nr
] = (uint32_t *) xcalloc (256 / 32, sizeof (uint32_t));
3853 /* We only set CLASS_B for the bits in the ISO C classes, not
3854 the user defined classes. The number should not change but
3856 #define LAST_ISO_C_BIT 11
3857 if (nr
<= LAST_ISO_C_BIT
)
3858 for (idx
= 0; idx
< 256; ++idx
)
3859 if (ctype
->class256_collection
[idx
] & _ISbit (nr
))
3860 ctype
->class_b
[nr
][idx
>> 5] |= (uint32_t) 1 << (idx
& 0x1f);
3863 for (nr
= 0; nr
< ctype
->nr_charclass
; nr
++)
3865 struct wctype_table t
;
3867 t
.p
= 4; /* or: 5 */
3868 t
.q
= 7; /* or: 6 */
3869 wctype_table_init (&t
);
3871 for (idx
= 0; idx
< ctype
->class_collection_act
; ++idx
)
3872 if (ctype
->class_collection
[idx
] & _ISwbit (nr
))
3873 wctype_table_add (&t
, ctype
->charnames
[idx
]);
3875 wctype_table_finalize (&t
);
3878 WITH_CUR_LOCALE (fprintf (stderr
, _("\
3879 %s: table for class \"%s\": %lu bytes\n"),
3880 "LC_CTYPE", ctype
->classnames
[nr
],
3881 (unsigned long int) t
.result_size
));
3883 ctype
->class_3level
[nr
].iov_base
= t
.result
;
3884 ctype
->class_3level
[nr
].iov_len
= t
.result_size
;
3887 /* Room for table of mappings. */
3888 ctype
->map_b
= (uint32_t **) xmalloc (2 * sizeof (uint32_t *));
3889 ctype
->map32_b
= (uint32_t **) xmalloc (ctype
->map_collection_nr
3890 * sizeof (uint32_t *));
3891 ctype
->map_3level
= (struct iovec
*)
3892 xmalloc (ctype
->map_collection_nr
* sizeof (struct iovec
));
3894 /* Fill in all mappings. */
3895 for (idx
= 0; idx
< 2; ++idx
)
3899 /* Allocate table. */
3900 ctype
->map_b
[idx
] = (uint32_t *)
3901 xmalloc ((256 + 128) * sizeof (uint32_t));
3903 /* Copy values from collection. */
3904 for (idx2
= 0; idx2
< 256; ++idx2
)
3905 ctype
->map_b
[idx
][128 + idx2
] = ctype
->map256_collection
[idx
][idx2
];
3907 /* Mirror first 127 entries. We must take care not to map entry
3908 -1 because EOF == -1. */
3909 for (idx2
= 0; idx2
< 127; ++idx2
)
3910 ctype
->map_b
[idx
][idx2
] = ctype
->map_b
[idx
][256 + idx2
];
3912 /* EOF must map to EOF. */
3913 ctype
->map_b
[idx
][127] = EOF
;
3916 for (idx
= 0; idx
< ctype
->map_collection_nr
; ++idx
)
3920 /* Allocate table. */
3921 ctype
->map32_b
[idx
] = (uint32_t *) xmalloc (256 * sizeof (uint32_t));
3923 /* Copy values from collection. Default is identity mapping. */
3924 for (idx2
= 0; idx2
< 256; ++idx2
)
3925 ctype
->map32_b
[idx
][idx2
] =
3926 (ctype
->map_collection
[idx
][idx2
] != 0
3927 ? ctype
->map_collection
[idx
][idx2
]
3931 for (nr
= 0; nr
< ctype
->map_collection_nr
; nr
++)
3933 struct wctrans_table t
;
3937 wctrans_table_init (&t
);
3939 for (idx
= 0; idx
< ctype
->map_collection_act
[nr
]; ++idx
)
3940 if (ctype
->map_collection
[nr
][idx
] != 0)
3941 wctrans_table_add (&t
, ctype
->charnames
[idx
],
3942 ctype
->map_collection
[nr
][idx
]);
3944 wctrans_table_finalize (&t
);
3947 WITH_CUR_LOCALE (fprintf (stderr
, _("\
3948 %s: table for map \"%s\": %lu bytes\n"),
3949 "LC_CTYPE", ctype
->mapnames
[nr
],
3950 (unsigned long int) t
.result_size
));
3952 ctype
->map_3level
[nr
].iov_base
= t
.result
;
3953 ctype
->map_3level
[nr
].iov_len
= t
.result_size
;
3956 /* Extra array for class and map names. */
3957 ctype
->class_name_ptr
= (uint32_t *) xmalloc (ctype
->nr_charclass
3958 * sizeof (uint32_t));
3959 ctype
->map_name_ptr
= (uint32_t *) xmalloc (ctype
->map_collection_nr
3960 * sizeof (uint32_t));
3962 ctype
->class_offset
= _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1
);
3963 ctype
->map_offset
= ctype
->class_offset
+ ctype
->nr_charclass
;
3965 /* Array for width information. Because the expected widths are very
3966 small (never larger than 2) we use only one single byte. This
3968 We put only printable characters in the table. wcwidth is specified
3969 to return -1 for non-printable characters. Doing the check here
3970 saves a run-time check.
3971 But we put L'\0' in the table. This again saves a run-time check. */
3973 struct wcwidth_table t
;
3977 wcwidth_table_init (&t
);
3979 /* First set all the printable characters of the character set to
3980 the default width. */
3982 while (iterate_table (&charmap
->char_table
, &curs
, &key
, &len
, &vdata
) == 0)
3984 struct charseq
*data
= (struct charseq
*) vdata
;
3986 if (data
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
3987 data
->ucs4
= repertoire_find_value (ctype
->repertoire
,
3990 if (data
->ucs4
!= ILLEGAL_CHAR_VALUE
)
3992 uint32_t *class_bits
=
3993 find_idx (ctype
, &ctype
->class_collection
, NULL
,
3994 &ctype
->class_collection_act
, data
->ucs4
);
3996 if (class_bits
!= NULL
&& (*class_bits
& BITw (tok_print
)))
3997 wcwidth_table_add (&t
, data
->ucs4
, charmap
->width_default
);
4001 /* Now add the explicitly specified widths. */
4002 if (charmap
->width_rules
!= NULL
)
4006 for (cnt
= 0; cnt
< charmap
->nwidth_rules
; ++cnt
)
4008 unsigned char bytes
[charmap
->mb_cur_max
];
4009 int nbytes
= charmap
->width_rules
[cnt
].from
->nbytes
;
4011 /* We have the range of character for which the width is
4012 specified described using byte sequences of the multibyte
4013 charset. We have to convert this to UCS4 now. And we
4014 cannot simply convert the beginning and the end of the
4015 sequence, we have to iterate over the byte sequence and
4016 convert it for every single character. */
4017 memcpy (bytes
, charmap
->width_rules
[cnt
].from
->bytes
, nbytes
);
4019 while (nbytes
< charmap
->width_rules
[cnt
].to
->nbytes
4020 || memcmp (bytes
, charmap
->width_rules
[cnt
].to
->bytes
,
4023 /* Find the UCS value for `bytes'. */
4026 struct charseq
*seq
=
4027 charmap_find_symbol (charmap
, bytes
, nbytes
);
4030 wch
= ILLEGAL_CHAR_VALUE
;
4031 else if (seq
->ucs4
!= UNINITIALIZED_CHAR_VALUE
)
4034 wch
= repertoire_find_value (ctype
->repertoire
, seq
->name
,
4035 strlen (seq
->name
));
4037 if (wch
!= ILLEGAL_CHAR_VALUE
)
4039 /* Store the value. */
4040 uint32_t *class_bits
=
4041 find_idx (ctype
, &ctype
->class_collection
, NULL
,
4042 &ctype
->class_collection_act
, wch
);
4044 if (class_bits
!= NULL
&& (*class_bits
& BITw (tok_print
)))
4045 wcwidth_table_add (&t
, wch
,
4046 charmap
->width_rules
[cnt
].width
);
4049 /* "Increment" the bytes sequence. */
4051 while (inner
>= 0 && bytes
[inner
] == 0xff)
4056 /* We have to extend the byte sequence. */
4057 if (nbytes
>= charmap
->width_rules
[cnt
].to
->nbytes
)
4061 memset (&bytes
[1], 0, nbytes
);
4067 while (++inner
< nbytes
)
4074 /* Set the width of L'\0' to 0. */
4075 wcwidth_table_add (&t
, 0, 0);
4077 wcwidth_table_finalize (&t
);
4080 WITH_CUR_LOCALE (fprintf (stderr
, _("%s: table for width: %lu bytes\n"),
4081 "LC_CTYPE", (unsigned long int) t
.result_size
));
4083 ctype
->width
.iov_base
= t
.result
;
4084 ctype
->width
.iov_len
= t
.result_size
;
4087 /* Set MB_CUR_MAX. */
4088 ctype
->mb_cur_max
= charmap
->mb_cur_max
;
4090 /* Now determine the table for the transliteration information.
4092 XXX It is not yet clear to me whether it is worth implementing a
4093 complicated algorithm which uses a hash table to locate the entries.
4094 For now I'll use a simple array which can be searching using binary
4096 if (ctype
->translit_include
!= NULL
)
4097 /* Traverse the locales mentioned in the `include' statements in a
4098 depth-first way and fold in their transliteration information. */
4099 translit_flatten (ctype
, charmap
, &ctype
->translit
);
4101 if (ctype
->translit
!= NULL
)
4103 /* First count how many entries we have. This is the upper limit
4104 since some entries from the included files might be overwritten. */
4107 struct translit_t
*runp
= ctype
->translit
;
4108 struct translit_t
**sorted
;
4109 size_t from_len
, to_len
;
4111 while (runp
!= NULL
)
4117 /* Next we allocate an array large enough and fill in the values. */
4118 sorted
= (struct translit_t
**) alloca (number
4119 * sizeof (struct translit_t
**));
4120 runp
= ctype
->translit
;
4124 /* Search for the place where to insert this string.
4125 XXX Better use a real sorting algorithm later. */
4129 while (idx
< number
)
4131 int res
= wcscmp ((const wchar_t *) sorted
[idx
]->from
,
4132 (const wchar_t *) runp
->from
);
4147 memmove (&sorted
[idx
+ 1], &sorted
[idx
],
4148 (number
- idx
) * sizeof (struct translit_t
*));
4155 while (runp
!= NULL
);
4157 /* The next step is putting all the possible transliteration
4158 strings in one memory block so that we can write it out.
4159 We need several different blocks:
4160 - index to the from-string array
4162 - index to the to-string array
4165 from_len
= to_len
= 0;
4166 for (cnt
= 0; cnt
< number
; ++cnt
)
4168 struct translit_to_t
*srunp
;
4169 from_len
+= wcslen ((const wchar_t *) sorted
[cnt
]->from
) + 1;
4170 srunp
= sorted
[cnt
]->to
;
4171 while (srunp
!= NULL
)
4173 to_len
+= wcslen ((const wchar_t *) srunp
->str
) + 1;
4174 srunp
= srunp
->next
;
4176 /* Plus one for the extra NUL character marking the end of
4177 the list for the current entry. */
4181 /* We can allocate the arrays for the results. */
4182 ctype
->translit_from_idx
= xmalloc (number
* sizeof (uint32_t));
4183 ctype
->translit_from_tbl
= xmalloc (from_len
* sizeof (uint32_t));
4184 ctype
->translit_to_idx
= xmalloc (number
* sizeof (uint32_t));
4185 ctype
->translit_to_tbl
= xmalloc (to_len
* sizeof (uint32_t));
4189 for (cnt
= 0; cnt
< number
; ++cnt
)
4192 struct translit_to_t
*srunp
;
4194 ctype
->translit_from_idx
[cnt
] = from_len
;
4195 ctype
->translit_to_idx
[cnt
] = to_len
;
4197 len
= wcslen ((const wchar_t *) sorted
[cnt
]->from
) + 1;
4198 wmemcpy ((wchar_t *) &ctype
->translit_from_tbl
[from_len
],
4199 (const wchar_t *) sorted
[cnt
]->from
, len
);
4202 ctype
->translit_to_idx
[cnt
] = to_len
;
4203 srunp
= sorted
[cnt
]->to
;
4204 while (srunp
!= NULL
)
4206 len
= wcslen ((const wchar_t *) srunp
->str
) + 1;
4207 wmemcpy ((wchar_t *) &ctype
->translit_to_tbl
[to_len
],
4208 (const wchar_t *) srunp
->str
, len
);
4210 srunp
= srunp
->next
;
4212 ctype
->translit_to_tbl
[to_len
++] = L
'\0';
4215 /* Store the information about the length. */
4216 ctype
->translit_idx_size
= number
;
4217 ctype
->translit_from_tbl_size
= from_len
* sizeof (uint32_t);
4218 ctype
->translit_to_tbl_size
= to_len
* sizeof (uint32_t);
4222 /* Provide some dummy pointers since we have nothing to write out. */
4223 static uint32_t no_str
= { 0 };
4225 ctype
->translit_from_idx
= &no_str
;
4226 ctype
->translit_from_tbl
= &no_str
;
4227 ctype
->translit_to_tbl
= &no_str
;
4228 ctype
->translit_idx_size
= 0;
4229 ctype
->translit_from_tbl_size
= 0;
4230 ctype
->translit_to_tbl_size
= 0;