1 /* Copyright (C) 1995-2002, 2003, 2004, 2005 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License version 2 as
7 published by the Free Software Foundation.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
34 #include "localedef.h"
36 #include "localeinfo.h"
38 #include "linereader.h"
39 #include "locfile-token.h"
45 #ifdef PREDEFINED_CLASSES
46 /* These are the extra bits not in wctype.h since these are not preallocated
48 # define _ISwspecial1 (1 << 29)
49 # define _ISwspecial2 (1 << 30)
50 # define _ISwspecial3 (1 << 31)
54 /* The bit used for representing a special class. */
55 #define BITPOS(class) ((class) - tok_upper)
56 #define BIT(class) (_ISbit (BITPOS (class)))
57 #define BITw(class) (_ISwbit (BITPOS (class)))
59 #define ELEM(ctype, collection, idx, value) \
60 *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \
61 &ctype->collection##_act idx, value)
64 /* To be compatible with former implementations we for now restrict
65 the number of bits for character classes to 16. When compatibility
66 is not necessary anymore increase the number to 32. */
67 #define char_class_t uint16_t
68 #define char_class32_t uint32_t
71 /* Type to describe a transliteration action. We have a possibly
72 multiple character from-string and a set of multiple character
73 to-strings. All are 32bit values since this is what is used in
74 the gconv functions. */
79 struct translit_to_t
*next
;
89 struct translit_to_t
*to
;
91 struct translit_t
*next
;
94 struct translit_ignore_t
103 struct translit_ignore_t
*next
;
107 /* Type to describe a transliteration include statement. */
108 struct translit_include_t
110 const char *copy_locale
;
111 const char *copy_repertoire
;
113 struct translit_include_t
*next
;
117 /* Sparse table of uint32_t. */
118 #define TABLE idx_table
119 #define ELEMENT uint32_t
120 #define DEFAULT ((uint32_t) ~0)
125 /* The real definition of the struct for the LC_CTYPE locale. */
126 struct locale_ctype_t
129 size_t charnames_max
;
130 size_t charnames_act
;
131 /* An index lookup table, to speedup find_idx. */
132 struct idx_table charnames_idx
;
134 struct repertoire_t
*repertoire
;
136 /* We will allow up to 8 * sizeof (uint32_t) character classes. */
137 #define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
139 const char *classnames
[MAX_NR_CHARCLASS
];
140 uint32_t last_class_char
;
141 uint32_t class256_collection
[256];
142 uint32_t *class_collection
;
143 size_t class_collection_max
;
144 size_t class_collection_act
;
146 uint32_t class_offset
;
148 struct charseq
**mbdigits
;
155 struct charseq
*mboutdigits
[10];
156 uint32_t wcoutdigits
[10];
157 size_t outdigits_act
;
159 /* If the following number ever turns out to be too small simply
160 increase it. But I doubt it will. --drepper@gnu */
161 #define MAX_NR_CHARMAP 16
162 const char *mapnames
[MAX_NR_CHARMAP
];
163 uint32_t *map_collection
[MAX_NR_CHARMAP
];
164 uint32_t map256_collection
[2][256];
165 size_t map_collection_max
[MAX_NR_CHARMAP
];
166 size_t map_collection_act
[MAX_NR_CHARMAP
];
167 size_t map_collection_nr
;
169 int tomap_done
[MAX_NR_CHARMAP
];
172 /* Transliteration information. */
173 struct translit_include_t
*translit_include
;
174 struct translit_t
*translit
;
175 struct translit_ignore_t
*translit_ignore
;
176 uint32_t ntranslit_ignore
;
178 uint32_t *default_missing
;
179 const char *default_missing_file
;
180 size_t default_missing_lineno
;
182 uint32_t to_nonascii
;
184 /* The arrays for the binary representation. */
185 char_class_t
*ctype_b
;
186 char_class32_t
*ctype32_b
;
190 struct iovec
*class_3level
;
191 struct iovec
*map_3level
;
192 uint32_t *class_name_ptr
;
193 uint32_t *map_name_ptr
;
196 const char *codeset_name
;
197 uint32_t *translit_from_idx
;
198 uint32_t *translit_from_tbl
;
199 uint32_t *translit_to_idx
;
200 uint32_t *translit_to_tbl
;
201 uint32_t translit_idx_size
;
202 size_t translit_from_tbl_size
;
203 size_t translit_to_tbl_size
;
205 struct obstack mempool
;
209 /* Marker for an empty slot. This has the value 0xFFFFFFFF, regardless
210 whether 'int' is 16 bit, 32 bit, or 64 bit. */
211 #define EMPTY ((uint32_t) ~0)
214 #define obstack_chunk_alloc xmalloc
215 #define obstack_chunk_free free
218 /* Prototypes for local functions. */
219 static void ctype_startup (struct linereader
*lr
, struct localedef_t
*locale
,
220 const struct charmap_t
*charmap
,
221 struct localedef_t
*copy_locale
,
223 static void ctype_class_new (struct linereader
*lr
,
224 struct locale_ctype_t
*ctype
, const char *name
);
225 static void ctype_map_new (struct linereader
*lr
,
226 struct locale_ctype_t
*ctype
,
227 const char *name
, const struct charmap_t
*charmap
);
228 static uint32_t *find_idx (struct locale_ctype_t
*ctype
, uint32_t **table
,
229 size_t *max
, size_t *act
, unsigned int idx
);
230 static void set_class_defaults (struct locale_ctype_t
*ctype
,
231 const struct charmap_t
*charmap
,
232 struct repertoire_t
*repertoire
);
233 static void allocate_arrays (struct locale_ctype_t
*ctype
,
234 const struct charmap_t
*charmap
,
235 struct repertoire_t
*repertoire
);
238 static const char *longnames
[] =
240 "zero", "one", "two", "three", "four",
241 "five", "six", "seven", "eight", "nine"
243 static const char *uninames
[] =
245 "U00000030", "U00000031", "U00000032", "U00000033", "U00000034",
246 "U00000035", "U00000036", "U00000037", "U00000038", "U00000039"
248 static const unsigned char digits
[] = "0123456789";
252 ctype_startup (struct linereader
*lr
, struct localedef_t
*locale
,
253 const struct charmap_t
*charmap
,
254 struct localedef_t
*copy_locale
, int ignore_content
)
257 struct locale_ctype_t
*ctype
;
259 if (!ignore_content
&& locale
->categories
[LC_CTYPE
].ctype
== NULL
)
261 if (copy_locale
== NULL
)
263 /* Allocate the needed room. */
264 locale
->categories
[LC_CTYPE
].ctype
= ctype
=
265 (struct locale_ctype_t
*) xcalloc (1,
266 sizeof (struct locale_ctype_t
));
268 /* We have seen no names yet. */
269 ctype
->charnames_max
= charmap
->mb_cur_max
== 1 ? 256 : 512;
271 (unsigned int *) xmalloc (ctype
->charnames_max
272 * sizeof (unsigned int));
273 for (cnt
= 0; cnt
< 256; ++cnt
)
274 ctype
->charnames
[cnt
] = cnt
;
275 ctype
->charnames_act
= 256;
276 idx_table_init (&ctype
->charnames_idx
);
278 /* Fill character class information. */
279 ctype
->last_class_char
= ILLEGAL_CHAR_VALUE
;
280 /* The order of the following instructions determines the bit
282 ctype_class_new (lr
, ctype
, "upper");
283 ctype_class_new (lr
, ctype
, "lower");
284 ctype_class_new (lr
, ctype
, "alpha");
285 ctype_class_new (lr
, ctype
, "digit");
286 ctype_class_new (lr
, ctype
, "xdigit");
287 ctype_class_new (lr
, ctype
, "space");
288 ctype_class_new (lr
, ctype
, "print");
289 ctype_class_new (lr
, ctype
, "graph");
290 ctype_class_new (lr
, ctype
, "blank");
291 ctype_class_new (lr
, ctype
, "cntrl");
292 ctype_class_new (lr
, ctype
, "punct");
293 ctype_class_new (lr
, ctype
, "alnum");
294 #ifdef PREDEFINED_CLASSES
295 /* The following are extensions from ISO 14652. */
296 ctype_class_new (lr
, ctype
, "left_to_right");
297 ctype_class_new (lr
, ctype
, "right_to_left");
298 ctype_class_new (lr
, ctype
, "num_terminator");
299 ctype_class_new (lr
, ctype
, "num_separator");
300 ctype_class_new (lr
, ctype
, "segment_separator");
301 ctype_class_new (lr
, ctype
, "block_separator");
302 ctype_class_new (lr
, ctype
, "direction_control");
303 ctype_class_new (lr
, ctype
, "sym_swap_layout");
304 ctype_class_new (lr
, ctype
, "char_shape_selector");
305 ctype_class_new (lr
, ctype
, "num_shape_selector");
306 ctype_class_new (lr
, ctype
, "non_spacing");
307 ctype_class_new (lr
, ctype
, "non_spacing_level3");
308 ctype_class_new (lr
, ctype
, "normal_connect");
309 ctype_class_new (lr
, ctype
, "r_connect");
310 ctype_class_new (lr
, ctype
, "no_connect");
311 ctype_class_new (lr
, ctype
, "no_connect-space");
312 ctype_class_new (lr
, ctype
, "vowel_connect");
315 ctype
->class_collection_max
= charmap
->mb_cur_max
== 1 ? 256 : 512;
316 ctype
->class_collection
317 = (uint32_t *) xcalloc (sizeof (unsigned long int),
318 ctype
->class_collection_max
);
319 ctype
->class_collection_act
= 256;
321 /* Fill character map information. */
322 ctype
->last_map_idx
= MAX_NR_CHARMAP
;
323 ctype_map_new (lr
, ctype
, "toupper", charmap
);
324 ctype_map_new (lr
, ctype
, "tolower", charmap
);
325 #ifdef PREDEFINED_CLASSES
326 ctype_map_new (lr
, ctype
, "tosymmetric", charmap
);
329 /* Fill first 256 entries in `toXXX' arrays. */
330 for (cnt
= 0; cnt
< 256; ++cnt
)
332 ctype
->map_collection
[0][cnt
] = cnt
;
333 ctype
->map_collection
[1][cnt
] = cnt
;
334 #ifdef PREDEFINED_CLASSES
335 ctype
->map_collection
[2][cnt
] = cnt
;
337 ctype
->map256_collection
[0][cnt
] = cnt
;
338 ctype
->map256_collection
[1][cnt
] = cnt
;
341 if (enc_not_ascii_compatible
)
342 ctype
->to_nonascii
= 1;
344 obstack_init (&ctype
->mempool
);
347 ctype
= locale
->categories
[LC_CTYPE
].ctype
=
348 copy_locale
->categories
[LC_CTYPE
].ctype
;
354 ctype_finish (struct localedef_t
*locale
, const struct charmap_t
*charmap
)
356 /* See POSIX.2, table 2-6 for the meaning of the following table. */
361 const char allow
[NCLASS
];
363 valid_table
[NCLASS
] =
365 /* The order is important. See token.h for more information.
366 M = Always, D = Default, - = Permitted, X = Mutually exclusive */
367 { "upper", "--MX-XDDXXX-" },
368 { "lower", "--MX-XDDXXX-" },
369 { "alpha", "---X-XDDXXX-" },
370 { "digit", "XXX--XDDXXX-" },
371 { "xdigit", "-----XDDXXX-" },
372 { "space", "XXXXX------X" },
373 { "print", "---------X--" },
374 { "graph", "---------X--" },
375 { "blank", "XXXXXM-----X" },
376 { "cntrl", "XXXXX-XX--XX" },
377 { "punct", "XXXXX-DD-X-X" },
378 { "alnum", "-----XDDXXX-" }
382 uint32_t space_value
;
383 struct charseq
*space_seq
;
384 struct locale_ctype_t
*ctype
= locale
->categories
[LC_CTYPE
].ctype
;
391 /* Now resolve copying and also handle completely missing definitions. */
394 const char *repertoire_name
;
396 /* First see whether we were supposed to copy. If yes, find the
397 actual definition. */
398 if (locale
->copy_name
[LC_CTYPE
] != NULL
)
400 /* Find the copying locale. This has to happen transitively since
401 the locale we are copying from might also copying another one. */
402 struct localedef_t
*from
= locale
;
405 from
= find_locale (LC_CTYPE
, from
->copy_name
[LC_CTYPE
],
406 from
->repertoire_name
, charmap
);
407 while (from
->categories
[LC_CTYPE
].ctype
== NULL
408 && from
->copy_name
[LC_CTYPE
] != NULL
);
410 ctype
= locale
->categories
[LC_CTYPE
].ctype
411 = from
->categories
[LC_CTYPE
].ctype
;
414 /* If there is still no definition issue an warning and create an
419 WITH_CUR_LOCALE (error (0, 0, _("\
420 No definition for %s category found"), "LC_CTYPE"));
421 ctype_startup (NULL
, locale
, charmap
, NULL
, 0);
422 ctype
= locale
->categories
[LC_CTYPE
].ctype
;
425 /* Get the repertoire we have to use. */
426 repertoire_name
= locale
->repertoire_name
?: repertoire_global
;
427 if (repertoire_name
!= NULL
)
428 ctype
->repertoire
= repertoire_read (repertoire_name
);
431 /* We need the name of the currently used 8-bit character set to
432 make correct conversion between this 8-bit representation and the
433 ISO 10646 character set used internally for wide characters. */
434 ctype
->codeset_name
= charmap
->code_set_name
;
435 if (ctype
->codeset_name
== NULL
)
438 WITH_CUR_LOCALE (error (0, 0, _("\
439 No character set name specified in charmap")));
440 ctype
->codeset_name
= "//UNKNOWN//";
443 /* Set default value for classes not specified. */
444 set_class_defaults (ctype
, charmap
, ctype
->repertoire
);
446 /* Check according to table. */
447 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
449 uint32_t tmp
= ctype
->class_collection
[cnt
];
453 for (cls1
= 0; cls1
< NCLASS
; ++cls1
)
454 if ((tmp
& _ISwbit (cls1
)) != 0)
455 for (cls2
= 0; cls2
< NCLASS
; ++cls2
)
456 if (valid_table
[cls1
].allow
[cls2
] != '-')
458 int eq
= (tmp
& _ISwbit (cls2
)) != 0;
459 switch (valid_table
[cls1
].allow
[cls2
])
464 uint32_t value
= ctype
->charnames
[cnt
];
467 WITH_CUR_LOCALE (error (0, 0, _("\
468 character L'\\u%0*x' in class `%s' must be in class `%s'"),
469 value
> 0xffff ? 8 : 4,
471 valid_table
[cls1
].name
,
472 valid_table
[cls2
].name
));
479 uint32_t value
= ctype
->charnames
[cnt
];
482 WITH_CUR_LOCALE (error (0, 0, _("\
483 character L'\\u%0*x' in class `%s' must not be in class `%s'"),
484 value
> 0xffff ? 8 : 4,
486 valid_table
[cls1
].name
,
487 valid_table
[cls2
].name
));
492 ctype
->class_collection
[cnt
] |= _ISwbit (cls2
);
496 WITH_CUR_LOCALE (error (5, 0, _("\
497 internal error in %s, line %u"), __FUNCTION__
, __LINE__
));
503 for (cnt
= 0; cnt
< 256; ++cnt
)
505 uint32_t tmp
= ctype
->class256_collection
[cnt
];
509 for (cls1
= 0; cls1
< NCLASS
; ++cls1
)
510 if ((tmp
& _ISbit (cls1
)) != 0)
511 for (cls2
= 0; cls2
< NCLASS
; ++cls2
)
512 if (valid_table
[cls1
].allow
[cls2
] != '-')
514 int eq
= (tmp
& _ISbit (cls2
)) != 0;
515 switch (valid_table
[cls1
].allow
[cls2
])
522 snprintf (buf
, sizeof buf
, "\\%Zo", cnt
);
525 WITH_CUR_LOCALE (error (0, 0, _("\
526 character '%s' in class `%s' must be in class `%s'"),
528 valid_table
[cls1
].name
,
529 valid_table
[cls2
].name
));
538 snprintf (buf
, sizeof buf
, "\\%Zo", cnt
);
541 WITH_CUR_LOCALE (error (0, 0, _("\
542 character '%s' in class `%s' must not be in class `%s'"),
544 valid_table
[cls1
].name
,
545 valid_table
[cls2
].name
));
550 ctype
->class256_collection
[cnt
] |= _ISbit (cls2
);
554 WITH_CUR_LOCALE (error (5, 0, _("\
555 internal error in %s, line %u"), __FUNCTION__
, __LINE__
));
561 /* ... and now test <SP> as a special case. */
563 if (((cnt
= BITPOS (tok_space
),
564 (ELEM (ctype
, class_collection
, , space_value
)
565 & BITw (tok_space
)) == 0)
566 || (cnt
= BITPOS (tok_blank
),
567 (ELEM (ctype
, class_collection
, , space_value
)
568 & BITw (tok_blank
)) == 0)))
571 WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"),
572 valid_table
[cnt
].name
));
574 else if (((cnt
= BITPOS (tok_punct
),
575 (ELEM (ctype
, class_collection
, , space_value
)
576 & BITw (tok_punct
)) != 0)
577 || (cnt
= BITPOS (tok_graph
),
578 (ELEM (ctype
, class_collection
, , space_value
)
583 WITH_CUR_LOCALE (error (0, 0, _("\
584 <SP> character must not be in class `%s'"),
585 valid_table
[cnt
].name
));
588 ELEM (ctype
, class_collection
, , space_value
) |= BITw (tok_print
);
590 space_seq
= charmap_find_value (charmap
, "SP", 2);
591 if (space_seq
== NULL
)
592 space_seq
= charmap_find_value (charmap
, "space", 5);
593 if (space_seq
== NULL
)
594 space_seq
= charmap_find_value (charmap
, "U00000020", 9);
595 if (space_seq
== NULL
|| space_seq
->nbytes
!= 1)
598 WITH_CUR_LOCALE (error (0, 0, _("\
599 character <SP> not defined in character map")));
601 else if (((cnt
= BITPOS (tok_space
),
602 (ctype
->class256_collection
[space_seq
->bytes
[0]]
603 & BIT (tok_space
)) == 0)
604 || (cnt
= BITPOS (tok_blank
),
605 (ctype
->class256_collection
[space_seq
->bytes
[0]]
606 & BIT (tok_blank
)) == 0)))
609 WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"),
610 valid_table
[cnt
].name
));
612 else if (((cnt
= BITPOS (tok_punct
),
613 (ctype
->class256_collection
[space_seq
->bytes
[0]]
614 & BIT (tok_punct
)) != 0)
615 || (cnt
= BITPOS (tok_graph
),
616 (ctype
->class256_collection
[space_seq
->bytes
[0]]
617 & BIT (tok_graph
)) != 0)))
620 WITH_CUR_LOCALE (error (0, 0, _("\
621 <SP> character must not be in class `%s'"),
622 valid_table
[cnt
].name
));
625 ctype
->class256_collection
[space_seq
->bytes
[0]] |= BIT (tok_print
);
627 /* Now that the tests are done make sure the name array contains all
628 characters which are handled in the WIDTH section of the
629 character set definition file. */
630 if (charmap
->width_rules
!= NULL
)
631 for (cnt
= 0; cnt
< charmap
->nwidth_rules
; ++cnt
)
633 unsigned char bytes
[charmap
->mb_cur_max
];
634 int nbytes
= charmap
->width_rules
[cnt
].from
->nbytes
;
636 /* We have the range of character for which the width is
637 specified described using byte sequences of the multibyte
638 charset. We have to convert this to UCS4 now. And we
639 cannot simply convert the beginning and the end of the
640 sequence, we have to iterate over the byte sequence and
641 convert it for every single character. */
642 memcpy (bytes
, charmap
->width_rules
[cnt
].from
->bytes
, nbytes
);
644 while (nbytes
< charmap
->width_rules
[cnt
].to
->nbytes
645 || memcmp (bytes
, charmap
->width_rules
[cnt
].to
->bytes
,
648 /* Find the UCS value for `bytes'. */
651 struct charseq
*seq
= charmap_find_symbol (charmap
, bytes
, nbytes
);
654 wch
= ILLEGAL_CHAR_VALUE
;
655 else if (seq
->ucs4
!= UNINITIALIZED_CHAR_VALUE
)
658 wch
= repertoire_find_value (ctype
->repertoire
, seq
->name
,
661 if (wch
!= ILLEGAL_CHAR_VALUE
)
662 /* We are only interested in the side-effects of the
663 `find_idx' call. It will add appropriate entries in
664 the name array if this is necessary. */
665 (void) find_idx (ctype
, NULL
, NULL
, NULL
, wch
);
667 /* "Increment" the bytes sequence. */
669 while (inner
>= 0 && bytes
[inner
] == 0xff)
674 /* We have to extend the byte sequence. */
675 if (nbytes
>= charmap
->width_rules
[cnt
].to
->nbytes
)
679 memset (&bytes
[1], 0, nbytes
);
685 while (++inner
< nbytes
)
691 /* Now set all the other characters of the character set to the
694 while (iterate_table (&charmap
->char_table
, &curs
, &key
, &len
, &vdata
) == 0)
696 struct charseq
*data
= (struct charseq
*) vdata
;
698 if (data
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
699 data
->ucs4
= repertoire_find_value (ctype
->repertoire
,
702 if (data
->ucs4
!= ILLEGAL_CHAR_VALUE
)
703 (void) find_idx (ctype
, NULL
, NULL
, NULL
, data
->ucs4
);
706 /* There must be a multiple of 10 digits. */
707 if (ctype
->mbdigits_act
% 10 != 0)
709 assert (ctype
->mbdigits_act
== ctype
->wcdigits_act
);
710 ctype
->wcdigits_act
-= ctype
->mbdigits_act
% 10;
711 ctype
->mbdigits_act
-= ctype
->mbdigits_act
% 10;
712 WITH_CUR_LOCALE (error (0, 0, _("\
713 `digit' category has not entries in groups of ten")));
716 /* Check the input digits. There must be a multiple of ten available.
717 In each group it could be that one or the other character is missing.
718 In this case the whole group must be removed. */
720 while (cnt
< ctype
->mbdigits_act
)
723 for (inner
= 0; inner
< 10; ++inner
)
724 if (ctype
->mbdigits
[cnt
+ inner
] == NULL
)
731 /* Remove the group. */
732 memmove (&ctype
->mbdigits
[cnt
], &ctype
->mbdigits
[cnt
+ 10],
733 ((ctype
->wcdigits_act
- cnt
- 10)
734 * sizeof (ctype
->mbdigits
[0])));
735 ctype
->mbdigits_act
-= 10;
739 /* If no input digits are given use the default. */
740 if (ctype
->mbdigits_act
== 0)
742 if (ctype
->mbdigits_max
== 0)
744 ctype
->mbdigits
= obstack_alloc (&((struct charmap_t
*) charmap
)->mem_pool
,
745 10 * sizeof (struct charseq
*));
746 ctype
->mbdigits_max
= 10;
749 for (cnt
= 0; cnt
< 10; ++cnt
)
751 ctype
->mbdigits
[cnt
] = charmap_find_symbol (charmap
,
753 if (ctype
->mbdigits
[cnt
] == NULL
)
755 ctype
->mbdigits
[cnt
] = charmap_find_symbol (charmap
,
757 strlen (longnames
[cnt
]));
758 if (ctype
->mbdigits
[cnt
] == NULL
)
760 /* Hum, this ain't good. */
761 WITH_CUR_LOCALE (error (0, 0, _("\
762 no input digits defined and none of the standard names in the charmap")));
764 ctype
->mbdigits
[cnt
] = obstack_alloc (&((struct charmap_t
*) charmap
)->mem_pool
,
765 sizeof (struct charseq
) + 1);
767 /* This is better than nothing. */
768 ctype
->mbdigits
[cnt
]->bytes
[0] = digits
[cnt
];
769 ctype
->mbdigits
[cnt
]->nbytes
= 1;
774 ctype
->mbdigits_act
= 10;
777 /* Check the wide character input digits. There must be a multiple
778 of ten available. In each group it could be that one or the other
779 character is missing. In this case the whole group must be
782 while (cnt
< ctype
->wcdigits_act
)
785 for (inner
= 0; inner
< 10; ++inner
)
786 if (ctype
->wcdigits
[cnt
+ inner
] == ILLEGAL_CHAR_VALUE
)
793 /* Remove the group. */
794 memmove (&ctype
->wcdigits
[cnt
], &ctype
->wcdigits
[cnt
+ 10],
795 ((ctype
->wcdigits_act
- cnt
- 10)
796 * sizeof (ctype
->wcdigits
[0])));
797 ctype
->wcdigits_act
-= 10;
801 /* If no input digits are given use the default. */
802 if (ctype
->wcdigits_act
== 0)
804 if (ctype
->wcdigits_max
== 0)
806 ctype
->wcdigits
= obstack_alloc (&((struct charmap_t
*) charmap
)->mem_pool
,
807 10 * sizeof (uint32_t));
808 ctype
->wcdigits_max
= 10;
811 for (cnt
= 0; cnt
< 10; ++cnt
)
812 ctype
->wcdigits
[cnt
] = L
'0' + cnt
;
814 ctype
->mbdigits_act
= 10;
817 /* Check the outdigits. */
819 for (cnt
= 0; cnt
< 10; ++cnt
)
820 if (ctype
->mboutdigits
[cnt
] == NULL
)
822 static struct charseq replace
[2];
826 WITH_CUR_LOCALE (error (0, 0, _("\
827 not all characters used in `outdigit' are available in the charmap")));
831 replace
[0].nbytes
= 1;
832 replace
[0].bytes
[0] = '?';
833 replace
[0].bytes
[1] = '\0';
834 ctype
->mboutdigits
[cnt
] = &replace
[0];
838 for (cnt
= 0; cnt
< 10; ++cnt
)
839 if (ctype
->wcoutdigits
[cnt
] == 0)
843 WITH_CUR_LOCALE (error (0, 0, _("\
844 not all characters used in `outdigit' are available in the repertoire")));
848 ctype
->wcoutdigits
[cnt
] = L
'?';
851 /* Sort the entries in the translit_ignore list. */
852 if (ctype
->translit_ignore
!= NULL
)
854 struct translit_ignore_t
*firstp
= ctype
->translit_ignore
;
855 struct translit_ignore_t
*runp
;
857 ctype
->ntranslit_ignore
= 1;
859 for (runp
= firstp
->next
; runp
!= NULL
; runp
= runp
->next
)
861 struct translit_ignore_t
*lastp
= NULL
;
862 struct translit_ignore_t
*cmpp
;
864 ++ctype
->ntranslit_ignore
;
866 for (cmpp
= firstp
; cmpp
!= NULL
; lastp
= cmpp
, cmpp
= cmpp
->next
)
867 if (runp
->from
< cmpp
->from
)
875 ctype
->translit_ignore
= firstp
;
881 ctype_output (struct localedef_t
*locale
, const struct charmap_t
*charmap
,
882 const char *output_path
)
884 static const char nulbytes
[4] = { 0, 0, 0, 0 };
885 struct locale_ctype_t
*ctype
= locale
->categories
[LC_CTYPE
].ctype
;
886 const size_t nelems
= (_NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1
)
887 + ctype
->nr_charclass
+ ctype
->map_collection_nr
);
888 struct iovec
*iov
= alloca (sizeof *iov
889 * (2 + nelems
+ 2 * ctype
->nr_charclass
890 + ctype
->map_collection_nr
+ 4));
891 struct locale_file data
;
892 uint32_t *idx
= alloca (sizeof *idx
* (nelems
+ 1));
893 uint32_t default_missing_len
;
894 size_t elem
, cnt
, offset
, total
;
897 /* Now prepare the output: Find the sizes of the table we can use. */
898 allocate_arrays (ctype
, charmap
, ctype
->repertoire
);
900 data
.magic
= LIMAGIC (LC_CTYPE
);
902 iov
[0].iov_base
= (void *) &data
;
903 iov
[0].iov_len
= sizeof (data
);
905 iov
[1].iov_base
= (void *) idx
;
906 iov
[1].iov_len
= nelems
* sizeof (uint32_t);
908 idx
[0] = iov
[0].iov_len
+ iov
[1].iov_len
;
911 for (elem
= 0; elem
< nelems
; ++elem
)
913 if (elem
< _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1
))
916 #define CTYPE_EMPTY(name) \
918 iov[2 + elem + offset].iov_base = NULL; \
919 iov[2 + elem + offset].iov_len = 0; \
920 idx[elem + 1] = idx[elem]; \
923 CTYPE_EMPTY(_NL_CTYPE_GAP1
);
924 CTYPE_EMPTY(_NL_CTYPE_GAP2
);
925 CTYPE_EMPTY(_NL_CTYPE_GAP3
);
926 CTYPE_EMPTY(_NL_CTYPE_GAP4
);
927 CTYPE_EMPTY(_NL_CTYPE_GAP5
);
928 CTYPE_EMPTY(_NL_CTYPE_GAP6
);
930 #define CTYPE_DATA(name, base, len) \
931 case _NL_ITEM_INDEX (name): \
932 iov[2 + elem + offset].iov_base = (base); \
933 iov[2 + elem + offset].iov_len = (len); \
934 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; \
937 CTYPE_DATA (_NL_CTYPE_CLASS
,
939 (256 + 128) * sizeof (char_class_t
));
941 CTYPE_DATA (_NL_CTYPE_TOUPPER
,
943 (256 + 128) * sizeof (uint32_t));
944 CTYPE_DATA (_NL_CTYPE_TOLOWER
,
946 (256 + 128) * sizeof (uint32_t));
948 CTYPE_DATA (_NL_CTYPE_TOUPPER32
,
950 256 * sizeof (uint32_t));
951 CTYPE_DATA (_NL_CTYPE_TOLOWER32
,
953 256 * sizeof (uint32_t));
955 CTYPE_DATA (_NL_CTYPE_CLASS32
,
957 256 * sizeof (char_class32_t
));
959 CTYPE_DATA (_NL_CTYPE_CLASS_OFFSET
,
960 &ctype
->class_offset
, sizeof (uint32_t));
962 CTYPE_DATA (_NL_CTYPE_MAP_OFFSET
,
963 &ctype
->map_offset
, sizeof (uint32_t));
965 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TAB_SIZE
,
966 &ctype
->translit_idx_size
, sizeof (uint32_t));
968 CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_IDX
,
969 ctype
->translit_from_idx
,
970 ctype
->translit_idx_size
* sizeof (uint32_t));
972 CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_TBL
,
973 ctype
->translit_from_tbl
,
974 ctype
->translit_from_tbl_size
);
976 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_IDX
,
977 ctype
->translit_to_idx
,
978 ctype
->translit_idx_size
* sizeof (uint32_t));
980 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_TBL
,
981 ctype
->translit_to_tbl
, ctype
->translit_to_tbl_size
);
983 case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES
):
984 /* The class name array. */
986 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
, ++offset
)
988 iov
[2 + elem
+ offset
].iov_base
989 = (void *) ctype
->classnames
[cnt
];
990 iov
[2 + elem
+ offset
].iov_len
991 = strlen (ctype
->classnames
[cnt
]) + 1;
992 total
+= iov
[2 + elem
+ offset
].iov_len
;
994 iov
[2 + elem
+ offset
].iov_base
= (void *) nulbytes
;
995 iov
[2 + elem
+ offset
].iov_len
= 1 + (4 - ((total
+ 1) % 4));
996 total
+= 1 + (4 - ((total
+ 1) % 4));
998 idx
[elem
+ 1] = idx
[elem
] + total
;
1001 case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES
):
1002 /* The class name array. */
1004 for (cnt
= 0; cnt
< ctype
->map_collection_nr
; ++cnt
, ++offset
)
1006 iov
[2 + elem
+ offset
].iov_base
1007 = (void *) ctype
->mapnames
[cnt
];
1008 iov
[2 + elem
+ offset
].iov_len
1009 = strlen (ctype
->mapnames
[cnt
]) + 1;
1010 total
+= iov
[2 + elem
+ offset
].iov_len
;
1012 iov
[2 + elem
+ offset
].iov_base
= (void *) nulbytes
;
1013 iov
[2 + elem
+ offset
].iov_len
= 1 + (4 - ((total
+ 1) % 4));
1014 total
+= 1 + (4 - ((total
+ 1) % 4));
1016 idx
[elem
+ 1] = idx
[elem
] + total
;
1019 CTYPE_DATA (_NL_CTYPE_WIDTH
,
1020 ctype
->width
.iov_base
,
1021 ctype
->width
.iov_len
);
1023 CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX
,
1024 &ctype
->mb_cur_max
, sizeof (uint32_t));
1026 case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME
):
1027 total
= strlen (ctype
->codeset_name
) + 1;
1029 iov
[2 + elem
+ offset
].iov_base
= (char *) ctype
->codeset_name
;
1032 iov
[2 + elem
+ offset
].iov_base
= alloca ((total
+ 3) & ~3);
1033 memset (mempcpy (iov
[2 + elem
+ offset
].iov_base
,
1034 ctype
->codeset_name
, total
),
1035 '\0', 4 - (total
& 3));
1036 total
= (total
+ 3) & ~3;
1038 iov
[2 + elem
+ offset
].iov_len
= total
;
1039 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1043 CTYPE_DATA (_NL_CTYPE_MAP_TO_NONASCII
,
1044 &ctype
->to_nonascii
, sizeof (uint32_t));
1046 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN
):
1047 iov
[2 + elem
+ offset
].iov_base
= alloca (sizeof (uint32_t));
1048 iov
[2 + elem
+ offset
].iov_len
= sizeof (uint32_t);
1049 *(uint32_t *) iov
[2 + elem
+ offset
].iov_base
=
1050 ctype
->mbdigits_act
/ 10;
1051 idx
[elem
+ 1] = idx
[elem
] + sizeof (uint32_t);
1054 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN
):
1055 /* Align entries. */
1056 iov
[2 + elem
+ offset
].iov_base
= (void *) nulbytes
;
1057 iov
[2 + elem
+ offset
].iov_len
= (4 - idx
[elem
] % 4) % 4;
1058 idx
[elem
] += iov
[2 + elem
+ offset
].iov_len
;
1061 iov
[2 + elem
+ offset
].iov_base
= alloca (sizeof (uint32_t));
1062 iov
[2 + elem
+ offset
].iov_len
= sizeof (uint32_t);
1063 *(uint32_t *) iov
[2 + elem
+ offset
].iov_base
=
1064 ctype
->wcdigits_act
/ 10;
1065 idx
[elem
+ 1] = idx
[elem
] + sizeof (uint32_t);
1068 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB
) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB
):
1069 /* Compute the length of all possible characters. For INDIGITS
1070 there might be more than one. We simply concatenate all of
1071 them with a NUL byte following. The NUL byte wouldn't be
1072 necessary but it makes it easier for the user. */
1075 for (cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB
);
1076 cnt
< ctype
->mbdigits_act
; cnt
+= 10)
1077 total
+= ctype
->mbdigits
[cnt
]->nbytes
+ 1;
1078 iov
[2 + elem
+ offset
].iov_base
= (char *) alloca (total
);
1079 iov
[2 + elem
+ offset
].iov_len
= total
;
1081 cp
= iov
[2 + elem
+ offset
].iov_base
;
1082 for (cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB
);
1083 cnt
< ctype
->mbdigits_act
; cnt
+= 10)
1085 cp
= mempcpy (cp
, ctype
->mbdigits
[cnt
]->bytes
,
1086 ctype
->mbdigits
[cnt
]->nbytes
);
1089 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1092 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB
) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB
):
1093 /* Compute the length of all possible characters. For INDIGITS
1094 there might be more than one. We simply concatenate all of
1095 them with a NUL byte following. The NUL byte wouldn't be
1096 necessary but it makes it easier for the user. */
1097 cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB
);
1098 total
= ctype
->mboutdigits
[cnt
]->nbytes
+ 1;
1099 iov
[2 + elem
+ offset
].iov_base
= (char *) alloca (total
);
1100 iov
[2 + elem
+ offset
].iov_len
= total
;
1102 *(char *) mempcpy (iov
[2 + elem
+ offset
].iov_base
,
1103 ctype
->mboutdigits
[cnt
]->bytes
,
1104 ctype
->mboutdigits
[cnt
]->nbytes
) = '\0';
1105 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1108 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC
) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC
):
1109 total
= ctype
->wcdigits_act
/ 10;
1111 iov
[2 + elem
+ offset
].iov_base
=
1112 (uint32_t *) alloca (total
* sizeof (uint32_t));
1113 iov
[2 + elem
+ offset
].iov_len
= total
* sizeof (uint32_t);
1115 for (cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC
);
1116 cnt
< ctype
->wcdigits_act
; cnt
+= 10)
1117 ((uint32_t *) iov
[2 + elem
+ offset
].iov_base
)[cnt
/ 10]
1118 = ctype
->wcdigits
[cnt
];
1119 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1122 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC
):
1123 /* Align entries. */
1124 iov
[2 + elem
+ offset
].iov_base
= (void *) nulbytes
;
1125 iov
[2 + elem
+ offset
].iov_len
= (4 - idx
[elem
] % 4) % 4;
1126 idx
[elem
] += iov
[2 + elem
+ offset
].iov_len
;
1130 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT1_WC
) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC
):
1131 cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC
);
1132 iov
[2 + elem
+ offset
].iov_base
= &ctype
->wcoutdigits
[cnt
];
1133 iov
[2 + elem
+ offset
].iov_len
= sizeof (uint32_t);
1134 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1137 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN
):
1138 /* Align entries. */
1139 iov
[2 + elem
+ offset
].iov_base
= (void *) nulbytes
;
1140 iov
[2 + elem
+ offset
].iov_len
= (4 - idx
[elem
] % 4) % 4;
1141 idx
[elem
] += iov
[2 + elem
+ offset
].iov_len
;
1144 default_missing_len
= (ctype
->default_missing
1145 ? wcslen ((wchar_t *)ctype
->default_missing
)
1147 iov
[2 + elem
+ offset
].iov_base
= &default_missing_len
;
1148 iov
[2 + elem
+ offset
].iov_len
= sizeof (uint32_t);
1149 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1152 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING
):
1153 iov
[2 + elem
+ offset
].iov_base
=
1154 ctype
->default_missing
?: (uint32_t *) L
"";
1155 iov
[2 + elem
+ offset
].iov_len
=
1156 wcslen (iov
[2 + elem
+ offset
].iov_base
);
1157 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1160 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE_LEN
):
1161 /* Align entries. */
1162 iov
[2 + elem
+ offset
].iov_base
= (void *) nulbytes
;
1163 iov
[2 + elem
+ offset
].iov_len
= (4 - idx
[elem
] % 4) % 4;
1164 idx
[elem
] += iov
[2 + elem
+ offset
].iov_len
;
1167 iov
[2 + elem
+ offset
].iov_base
= &ctype
->ntranslit_ignore
;
1168 iov
[2 + elem
+ offset
].iov_len
= sizeof (uint32_t);
1169 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1172 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE
):
1174 uint32_t *ranges
= (uint32_t *) alloca (ctype
->ntranslit_ignore
1175 * 3 * sizeof (uint32_t));
1176 struct translit_ignore_t
*runp
;
1178 iov
[2 + elem
+ offset
].iov_base
= ranges
;
1179 iov
[2 + elem
+ offset
].iov_len
= (ctype
->ntranslit_ignore
1180 * 3 * sizeof (uint32_t));
1182 for (runp
= ctype
->translit_ignore
; runp
!= NULL
;
1185 *ranges
++ = runp
->from
;
1186 *ranges
++ = runp
->to
;
1187 *ranges
++ = runp
->step
;
1190 /* Remove the following line in case a new entry is added
1191 after _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN. */
1193 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1197 assert (! "unknown CTYPE element");
1201 /* Handle extra maps. */
1202 size_t nr
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1
);
1203 if (nr
< ctype
->nr_charclass
)
1205 iov
[2 + elem
+ offset
].iov_base
= ctype
->class_b
[nr
];
1206 iov
[2 + elem
+ offset
].iov_len
= 256 / 32 * sizeof (uint32_t);
1207 idx
[elem
] += iov
[2 + elem
+ offset
].iov_len
;
1210 iov
[2 + elem
+ offset
] = ctype
->class_3level
[nr
];
1214 nr
-= ctype
->nr_charclass
;
1215 assert (nr
< ctype
->map_collection_nr
);
1216 iov
[2 + elem
+ offset
] = ctype
->map_3level
[nr
];
1218 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1222 assert (2 + elem
+ offset
== (nelems
+ 2 * ctype
->nr_charclass
1223 + ctype
->map_collection_nr
+ 4 + 2));
1225 write_locale_data (output_path
, LC_CTYPE
, "LC_CTYPE", 2 + elem
+ offset
,
1230 /* Local functions. */
1232 ctype_class_new (struct linereader
*lr
, struct locale_ctype_t
*ctype
,
1237 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
)
1238 if (strcmp (ctype
->classnames
[cnt
], name
) == 0)
1241 if (cnt
< ctype
->nr_charclass
)
1243 lr_error (lr
, _("character class `%s' already defined"), name
);
1247 if (ctype
->nr_charclass
== MAX_NR_CHARCLASS
)
1248 /* Exit code 2 is prescribed in P1003.2b. */
1249 WITH_CUR_LOCALE (error (2, 0, _("\
1250 implementation limit: no more than %Zd character classes allowed"),
1253 ctype
->classnames
[ctype
->nr_charclass
++] = name
;
1258 ctype_map_new (struct linereader
*lr
, struct locale_ctype_t
*ctype
,
1259 const char *name
, const struct charmap_t
*charmap
)
1261 size_t max_chars
= 0;
1264 for (cnt
= 0; cnt
< ctype
->map_collection_nr
; ++cnt
)
1266 if (strcmp (ctype
->mapnames
[cnt
], name
) == 0)
1269 if (max_chars
< ctype
->map_collection_max
[cnt
])
1270 max_chars
= ctype
->map_collection_max
[cnt
];
1273 if (cnt
< ctype
->map_collection_nr
)
1275 lr_error (lr
, _("character map `%s' already defined"), name
);
1279 if (ctype
->map_collection_nr
== MAX_NR_CHARMAP
)
1280 /* Exit code 2 is prescribed in P1003.2b. */
1281 WITH_CUR_LOCALE (error (2, 0, _("\
1282 implementation limit: no more than %d character maps allowed"),
1285 ctype
->mapnames
[cnt
] = name
;
1288 ctype
->map_collection_max
[cnt
] = charmap
->mb_cur_max
== 1 ? 256 : 512;
1290 ctype
->map_collection_max
[cnt
] = max_chars
;
1292 ctype
->map_collection
[cnt
] = (uint32_t *)
1293 xcalloc (sizeof (uint32_t), ctype
->map_collection_max
[cnt
]);
1294 ctype
->map_collection_act
[cnt
] = 256;
1296 ++ctype
->map_collection_nr
;
1300 /* We have to be prepared that TABLE, MAX, and ACT can be NULL. This
1301 is possible if we only want to extend the name array. */
1303 find_idx (struct locale_ctype_t
*ctype
, uint32_t **table
, size_t *max
,
1304 size_t *act
, uint32_t idx
)
1309 return table
== NULL
? NULL
: &(*table
)[idx
];
1311 /* Use the charnames_idx lookup table instead of the slow search loop. */
1313 cnt
= idx_table_get (&ctype
->charnames_idx
, idx
);
1316 cnt
= ctype
->charnames_act
;
1318 for (cnt
= 256; cnt
< ctype
->charnames_act
; ++cnt
)
1319 if (ctype
->charnames
[cnt
] == idx
)
1323 /* We have to distinguish two cases: the name is found or not. */
1324 if (cnt
== ctype
->charnames_act
)
1326 /* Extend the name array. */
1327 if (ctype
->charnames_act
== ctype
->charnames_max
)
1329 ctype
->charnames_max
*= 2;
1330 ctype
->charnames
= (uint32_t *)
1331 xrealloc (ctype
->charnames
,
1332 sizeof (uint32_t) * ctype
->charnames_max
);
1334 ctype
->charnames
[ctype
->charnames_act
++] = idx
;
1335 idx_table_add (&ctype
->charnames_idx
, idx
, cnt
);
1339 /* We have done everything we are asked to do. */
1343 /* The caller does not want to extend the table. */
1344 return (cnt
>= *act
? NULL
: &(*table
)[cnt
]);
1350 size_t old_max
= *max
;
1353 while (*max
<= cnt
);
1356 (uint32_t *) xrealloc (*table
, *max
* sizeof (uint32_t));
1357 memset (&(*table
)[old_max
], '\0',
1358 (*max
- old_max
) * sizeof (uint32_t));
1364 return &(*table
)[cnt
];
1369 get_character (struct token
*now
, const struct charmap_t
*charmap
,
1370 struct repertoire_t
*repertoire
,
1371 struct charseq
**seqp
, uint32_t *wchp
)
1373 if (now
->tok
== tok_bsymbol
)
1375 /* This will hopefully be the normal case. */
1376 *wchp
= repertoire_find_value (repertoire
, now
->val
.str
.startmb
,
1377 now
->val
.str
.lenmb
);
1378 *seqp
= charmap_find_value (charmap
, now
->val
.str
.startmb
,
1379 now
->val
.str
.lenmb
);
1381 else if (now
->tok
== tok_ucs4
)
1385 snprintf (utmp
, sizeof (utmp
), "U%08X", now
->val
.ucs4
);
1386 *seqp
= charmap_find_value (charmap
, utmp
, 9);
1389 *seqp
= repertoire_find_seq (repertoire
, now
->val
.ucs4
);
1393 /* Compute the value in the charmap from the UCS value. */
1394 const char *symbol
= repertoire_find_symbol (repertoire
,
1400 *seqp
= charmap_find_value (charmap
, symbol
, strlen (symbol
));
1404 if (repertoire
!= NULL
)
1406 /* Insert a negative entry. */
1407 static const struct charseq negative
1408 = { .ucs4
= ILLEGAL_CHAR_VALUE
};
1409 uint32_t *newp
= obstack_alloc (&repertoire
->mem_pool
,
1411 *newp
= now
->val
.ucs4
;
1413 insert_entry (&repertoire
->seq_table
, newp
,
1414 sizeof (uint32_t), (void *) &negative
);
1418 (*seqp
)->ucs4
= now
->val
.ucs4
;
1420 else if ((*seqp
)->ucs4
!= now
->val
.ucs4
)
1423 *wchp
= now
->val
.ucs4
;
1425 else if (now
->tok
== tok_charcode
)
1427 /* We must map from the byte code to UCS4. */
1428 *seqp
= charmap_find_symbol (charmap
, now
->val
.str
.startmb
,
1429 now
->val
.str
.lenmb
);
1432 *wchp
= ILLEGAL_CHAR_VALUE
;
1435 if ((*seqp
)->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1436 (*seqp
)->ucs4
= repertoire_find_value (repertoire
, (*seqp
)->name
,
1437 strlen ((*seqp
)->name
));
1438 *wchp
= (*seqp
)->ucs4
;
1448 /* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and
1449 the .(2). counterparts. */
1451 charclass_symbolic_ellipsis (struct linereader
*ldfile
,
1452 struct locale_ctype_t
*ctype
,
1453 const struct charmap_t
*charmap
,
1454 struct repertoire_t
*repertoire
,
1456 const char *last_str
,
1457 unsigned long int class256_bit
,
1458 unsigned long int class_bit
, int base
,
1459 int ignore_content
, int handle_digits
, int step
)
1461 const char *nowstr
= now
->val
.str
.startmb
;
1462 char tmp
[now
->val
.str
.lenmb
+ 1];
1465 unsigned long int from
;
1466 unsigned long int to
;
1468 /* We have to compute the ellipsis values using the symbolic names. */
1469 assert (last_str
!= NULL
);
1471 if (strlen (last_str
) != now
->val
.str
.lenmb
)
1475 _("`%s' and `%.*s' are no valid names for symbolic range"),
1476 last_str
, (int) now
->val
.str
.lenmb
, nowstr
);
1480 if (memcmp (last_str
, nowstr
, now
->val
.str
.lenmb
) == 0)
1481 /* Nothing to do, the names are the same. */
1484 for (cp
= last_str
; *cp
== *(nowstr
+ (cp
- last_str
)); ++cp
)
1488 from
= strtoul (cp
, &endp
, base
);
1489 if ((from
== UINT_MAX
&& errno
== ERANGE
) || *endp
!= '\0')
1492 to
= strtoul (nowstr
+ (cp
- last_str
), &endp
, base
);
1493 if ((to
== UINT_MAX
&& errno
== ERANGE
)
1494 || (endp
- nowstr
) != now
->val
.str
.lenmb
|| from
>= to
)
1497 /* OK, we have a range FROM - TO. Now we can create the symbolic names. */
1498 if (!ignore_content
)
1500 now
->val
.str
.startmb
= tmp
;
1501 while ((from
+= step
) <= to
)
1503 struct charseq
*seq
;
1506 sprintf (tmp
, (base
== 10 ? "%.*s%0*ld" : "%.*s%0*lX"),
1507 (int) (cp
- last_str
), last_str
,
1508 (int) (now
->val
.str
.lenmb
- (cp
- last_str
)),
1511 get_character (now
, charmap
, repertoire
, &seq
, &wch
);
1513 if (seq
!= NULL
&& seq
->nbytes
== 1)
1514 /* Yep, we can store information about this byte sequence. */
1515 ctype
->class256_collection
[seq
->bytes
[0]] |= class256_bit
;
1517 if (wch
!= ILLEGAL_CHAR_VALUE
&& class_bit
!= 0)
1518 /* We have the UCS4 position. */
1519 *find_idx (ctype
, &ctype
->class_collection
,
1520 &ctype
->class_collection_max
,
1521 &ctype
->class_collection_act
, wch
) |= class_bit
;
1523 if (handle_digits
== 1)
1525 /* We must store the digit values. */
1526 if (ctype
->mbdigits_act
== ctype
->mbdigits_max
)
1528 ctype
->mbdigits_max
*= 2;
1529 ctype
->mbdigits
= xrealloc (ctype
->mbdigits
,
1530 (ctype
->mbdigits_max
1531 * sizeof (char *)));
1532 ctype
->wcdigits_max
*= 2;
1533 ctype
->wcdigits
= xrealloc (ctype
->wcdigits
,
1534 (ctype
->wcdigits_max
1535 * sizeof (uint32_t)));
1538 ctype
->mbdigits
[ctype
->mbdigits_act
++] = seq
;
1539 ctype
->wcdigits
[ctype
->wcdigits_act
++] = wch
;
1541 else if (handle_digits
== 2)
1543 /* We must store the digit values. */
1544 if (ctype
->outdigits_act
>= 10)
1546 lr_error (ldfile
, _("\
1547 %s: field `%s' does not contain exactly ten entries"),
1548 "LC_CTYPE", "outdigit");
1552 ctype
->mboutdigits
[ctype
->outdigits_act
] = seq
;
1553 ctype
->wcoutdigits
[ctype
->outdigits_act
] = wch
;
1554 ++ctype
->outdigits_act
;
1561 /* Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'. */
1563 charclass_ucs4_ellipsis (struct linereader
*ldfile
,
1564 struct locale_ctype_t
*ctype
,
1565 const struct charmap_t
*charmap
,
1566 struct repertoire_t
*repertoire
,
1567 struct token
*now
, uint32_t last_wch
,
1568 unsigned long int class256_bit
,
1569 unsigned long int class_bit
, int ignore_content
,
1570 int handle_digits
, int step
)
1572 if (last_wch
> now
->val
.ucs4
)
1574 lr_error (ldfile
, _("\
1575 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1576 (now
->val
.ucs4
| last_wch
) < 65536 ? 4 : 8, now
->val
.ucs4
,
1577 (now
->val
.ucs4
| last_wch
) < 65536 ? 4 : 8, last_wch
);
1581 if (!ignore_content
)
1582 while ((last_wch
+= step
) <= now
->val
.ucs4
)
1584 /* We have to find out whether there is a byte sequence corresponding
1585 to this UCS4 value. */
1586 struct charseq
*seq
;
1589 snprintf (utmp
, sizeof (utmp
), "U%08X", last_wch
);
1590 seq
= charmap_find_value (charmap
, utmp
, 9);
1593 snprintf (utmp
, sizeof (utmp
), "U%04X", last_wch
);
1594 seq
= charmap_find_value (charmap
, utmp
, 5);
1598 /* Try looking in the repertoire map. */
1599 seq
= repertoire_find_seq (repertoire
, last_wch
);
1601 /* If this is the first time we look for this sequence create a new
1605 static const struct charseq negative
1606 = { .ucs4
= ILLEGAL_CHAR_VALUE
};
1608 /* Find the symbolic name for this UCS4 value. */
1609 if (repertoire
!= NULL
)
1611 const char *symbol
= repertoire_find_symbol (repertoire
,
1613 uint32_t *newp
= obstack_alloc (&repertoire
->mem_pool
,
1618 /* We have a name, now search the multibyte value. */
1619 seq
= charmap_find_value (charmap
, symbol
, strlen (symbol
));
1622 /* We have to create a fake entry. */
1623 seq
= (struct charseq
*) &negative
;
1625 seq
->ucs4
= last_wch
;
1627 insert_entry (&repertoire
->seq_table
, newp
, sizeof (uint32_t),
1631 /* We have to create a fake entry. */
1632 seq
= (struct charseq
*) &negative
;
1635 /* We have a name, now search the multibyte value. */
1636 if (seq
->ucs4
== last_wch
&& seq
->nbytes
== 1)
1637 /* Yep, we can store information about this byte sequence. */
1638 ctype
->class256_collection
[(size_t) seq
->bytes
[0]]
1641 /* And of course we have the UCS4 position. */
1643 *find_idx (ctype
, &ctype
->class_collection
,
1644 &ctype
->class_collection_max
,
1645 &ctype
->class_collection_act
, last_wch
) |= class_bit
;
1647 if (handle_digits
== 1)
1649 /* We must store the digit values. */
1650 if (ctype
->mbdigits_act
== ctype
->mbdigits_max
)
1652 ctype
->mbdigits_max
*= 2;
1653 ctype
->mbdigits
= xrealloc (ctype
->mbdigits
,
1654 (ctype
->mbdigits_max
1655 * sizeof (char *)));
1656 ctype
->wcdigits_max
*= 2;
1657 ctype
->wcdigits
= xrealloc (ctype
->wcdigits
,
1658 (ctype
->wcdigits_max
1659 * sizeof (uint32_t)));
1662 ctype
->mbdigits
[ctype
->mbdigits_act
++] = (seq
->ucs4
== last_wch
1664 ctype
->wcdigits
[ctype
->wcdigits_act
++] = last_wch
;
1666 else if (handle_digits
== 2)
1668 /* We must store the digit values. */
1669 if (ctype
->outdigits_act
>= 10)
1671 lr_error (ldfile
, _("\
1672 %s: field `%s' does not contain exactly ten entries"),
1673 "LC_CTYPE", "outdigit");
1677 ctype
->mboutdigits
[ctype
->outdigits_act
] = (seq
->ucs4
== last_wch
1679 ctype
->wcoutdigits
[ctype
->outdigits_act
] = last_wch
;
1680 ++ctype
->outdigits_act
;
1686 /* Ellipsis as in `/xea/x12.../xea/x34'. */
1688 charclass_charcode_ellipsis (struct linereader
*ldfile
,
1689 struct locale_ctype_t
*ctype
,
1690 const struct charmap_t
*charmap
,
1691 struct repertoire_t
*repertoire
,
1692 struct token
*now
, char *last_charcode
,
1693 uint32_t last_charcode_len
,
1694 unsigned long int class256_bit
,
1695 unsigned long int class_bit
, int ignore_content
,
1698 /* First check whether the to-value is larger. */
1699 if (now
->val
.charcode
.nbytes
!= last_charcode_len
)
1701 lr_error (ldfile
, _("\
1702 start and end character sequence of range must have the same length"));
1706 if (memcmp (last_charcode
, now
->val
.charcode
.bytes
, last_charcode_len
) > 0)
1708 lr_error (ldfile
, _("\
1709 to-value character sequence is smaller than from-value sequence"));
1713 if (!ignore_content
)
1717 /* Increment the byte sequence value. */
1718 struct charseq
*seq
;
1722 for (i
= last_charcode_len
- 1; i
>= 0; --i
)
1723 if (++last_charcode
[i
] != 0)
1726 if (last_charcode_len
== 1)
1727 /* Of course we have the charcode value. */
1728 ctype
->class256_collection
[(size_t) last_charcode
[0]]
1731 /* Find the symbolic name. */
1732 seq
= charmap_find_symbol (charmap
, last_charcode
,
1736 if (seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1737 seq
->ucs4
= repertoire_find_value (repertoire
, seq
->name
,
1738 strlen (seq
->name
));
1739 wch
= seq
== NULL
? ILLEGAL_CHAR_VALUE
: seq
->ucs4
;
1741 if (wch
!= ILLEGAL_CHAR_VALUE
&& class_bit
!= 0)
1742 *find_idx (ctype
, &ctype
->class_collection
,
1743 &ctype
->class_collection_max
,
1744 &ctype
->class_collection_act
, wch
) |= class_bit
;
1747 wch
= ILLEGAL_CHAR_VALUE
;
1749 if (handle_digits
== 1)
1751 /* We must store the digit values. */
1752 if (ctype
->mbdigits_act
== ctype
->mbdigits_max
)
1754 ctype
->mbdigits_max
*= 2;
1755 ctype
->mbdigits
= xrealloc (ctype
->mbdigits
,
1756 (ctype
->mbdigits_max
1757 * sizeof (char *)));
1758 ctype
->wcdigits_max
*= 2;
1759 ctype
->wcdigits
= xrealloc (ctype
->wcdigits
,
1760 (ctype
->wcdigits_max
1761 * sizeof (uint32_t)));
1764 seq
= xmalloc (sizeof (struct charseq
) + last_charcode_len
);
1765 memcpy ((char *) (seq
+ 1), last_charcode
, last_charcode_len
);
1766 seq
->nbytes
= last_charcode_len
;
1768 ctype
->mbdigits
[ctype
->mbdigits_act
++] = seq
;
1769 ctype
->wcdigits
[ctype
->wcdigits_act
++] = wch
;
1771 else if (handle_digits
== 2)
1773 struct charseq
*seq
;
1774 /* We must store the digit values. */
1775 if (ctype
->outdigits_act
>= 10)
1777 lr_error (ldfile
, _("\
1778 %s: field `%s' does not contain exactly ten entries"),
1779 "LC_CTYPE", "outdigit");
1783 seq
= xmalloc (sizeof (struct charseq
) + last_charcode_len
);
1784 memcpy ((char *) (seq
+ 1), last_charcode
, last_charcode_len
);
1785 seq
->nbytes
= last_charcode_len
;
1787 ctype
->mboutdigits
[ctype
->outdigits_act
] = seq
;
1788 ctype
->wcoutdigits
[ctype
->outdigits_act
] = wch
;
1789 ++ctype
->outdigits_act
;
1792 while (memcmp (last_charcode
, now
->val
.charcode
.bytes
,
1793 last_charcode_len
) != 0);
1799 find_translit2 (struct locale_ctype_t
*ctype
, const struct charmap_t
*charmap
,
1802 struct translit_t
*trunp
= ctype
->translit
;
1803 struct translit_ignore_t
*tirunp
= ctype
->translit_ignore
;
1805 while (trunp
!= NULL
)
1807 /* XXX We simplify things here. The transliterations we look
1808 for are only allowed to have one character. */
1809 if (trunp
->from
[0] == wch
&& trunp
->from
[1] == 0)
1811 /* Found it. Now look for a transliteration which can be
1812 represented with the character set. */
1813 struct translit_to_t
*torunp
= trunp
->to
;
1815 while (torunp
!= NULL
)
1819 for (i
= 0; torunp
->str
[i
] != 0; ++i
)
1823 snprintf (utmp
, sizeof (utmp
), "U%08X", torunp
->str
[i
]);
1824 if (charmap_find_value (charmap
, utmp
, 9) == NULL
)
1825 /* This character cannot be represented. */
1829 if (torunp
->str
[i
] == 0)
1832 torunp
= torunp
->next
;
1838 trunp
= trunp
->next
;
1841 /* Check for ignored chars. */
1842 while (tirunp
!= NULL
)
1844 if (tirunp
->from
<= wch
&& tirunp
->to
>= wch
)
1848 for (wi
= tirunp
->from
; wi
<= wch
; wi
+= tirunp
->step
)
1850 return (uint32_t []) { 0 };
1854 /* Nothing found. */
1860 find_translit (struct localedef_t
*locale
, const struct charmap_t
*charmap
,
1863 struct locale_ctype_t
*ctype
;
1864 uint32_t *result
= NULL
;
1866 assert (locale
!= NULL
);
1867 ctype
= locale
->categories
[LC_CTYPE
].ctype
;
1869 if (ctype
->translit
!= NULL
)
1870 result
= find_translit2 (ctype
, charmap
, wch
);
1874 struct translit_include_t
*irunp
= ctype
->translit_include
;
1876 while (irunp
!= NULL
&& result
== NULL
)
1878 result
= find_translit (find_locale (CTYPE_LOCALE
,
1880 irunp
->copy_repertoire
,
1883 irunp
= irunp
->next
;
1891 /* Read one transliteration entry. */
1893 read_widestring (struct linereader
*ldfile
, struct token
*now
,
1894 const struct charmap_t
*charmap
,
1895 struct repertoire_t
*repertoire
)
1899 if (now
->tok
== tok_default_missing
)
1900 /* The special name "" will denote this case. */
1901 wstr
= ((uint32_t *) { 0 });
1902 else if (now
->tok
== tok_bsymbol
)
1904 /* Get the value from the repertoire. */
1905 wstr
= (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1906 wstr
[0] = repertoire_find_value (repertoire
, now
->val
.str
.startmb
,
1907 now
->val
.str
.lenmb
);
1908 if (wstr
[0] == ILLEGAL_CHAR_VALUE
)
1910 /* We cannot proceed, we don't know the UCS4 value. */
1917 else if (now
->tok
== tok_ucs4
)
1919 wstr
= (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1920 wstr
[0] = now
->val
.ucs4
;
1923 else if (now
->tok
== tok_charcode
)
1925 /* Argh, we have to convert to the symbol name first and then to the
1927 struct charseq
*seq
= charmap_find_symbol (charmap
,
1928 now
->val
.str
.startmb
,
1929 now
->val
.str
.lenmb
);
1931 /* Cannot find the UCS4 value. */
1934 if (seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1935 seq
->ucs4
= repertoire_find_value (repertoire
, seq
->name
,
1936 strlen (seq
->name
));
1937 if (seq
->ucs4
== ILLEGAL_CHAR_VALUE
)
1938 /* We cannot proceed, we don't know the UCS4 value. */
1941 wstr
= (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1942 wstr
[0] = seq
->ucs4
;
1945 else if (now
->tok
== tok_string
)
1947 wstr
= now
->val
.str
.startwc
;
1948 if (wstr
== NULL
|| wstr
[0] == 0)
1953 if (now
->tok
!= tok_eol
&& now
->tok
!= tok_eof
)
1954 lr_ignore_rest (ldfile
, 0);
1955 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
1956 return (uint32_t *) -1l;
1964 read_translit_entry (struct linereader
*ldfile
, struct locale_ctype_t
*ctype
,
1965 struct token
*now
, const struct charmap_t
*charmap
,
1966 struct repertoire_t
*repertoire
)
1968 uint32_t *from_wstr
= read_widestring (ldfile
, now
, charmap
, repertoire
);
1969 struct translit_t
*result
;
1970 struct translit_to_t
**top
;
1971 struct obstack
*ob
= &ctype
->mempool
;
1975 if (from_wstr
== NULL
)
1976 /* There is no valid from string. */
1979 result
= (struct translit_t
*) obstack_alloc (ob
,
1980 sizeof (struct translit_t
));
1981 result
->from
= from_wstr
;
1982 result
->fname
= ldfile
->fname
;
1983 result
->lineno
= ldfile
->lineno
;
1984 result
->next
= NULL
;
1994 /* Next we have one or more transliterations. They are
1995 separated by semicolons. */
1996 now
= lr_token (ldfile
, charmap
, NULL
, repertoire
, verbose
);
1998 if (!first
&& (now
->tok
== tok_semicolon
|| now
->tok
== tok_eol
))
2000 /* One string read. */
2001 const uint32_t zero
= 0;
2005 obstack_grow (ob
, &zero
, 4);
2006 to_wstr
= obstack_finish (ob
);
2008 *top
= obstack_alloc (ob
, sizeof (struct translit_to_t
));
2009 (*top
)->str
= to_wstr
;
2010 (*top
)->next
= NULL
;
2013 if (now
->tok
== tok_eol
)
2015 result
->next
= ctype
->translit
;
2016 ctype
->translit
= result
;
2021 top
= &(*top
)->next
;
2026 to_wstr
= read_widestring (ldfile
, now
, charmap
, repertoire
);
2027 if (to_wstr
== (uint32_t *) -1l)
2029 /* An error occurred. */
2030 obstack_free (ob
, result
);
2034 if (to_wstr
== NULL
)
2037 /* This value is usable. */
2038 obstack_grow (ob
, to_wstr
, wcslen ((wchar_t *) to_wstr
) * 4);
2047 read_translit_ignore_entry (struct linereader
*ldfile
,
2048 struct locale_ctype_t
*ctype
,
2049 const struct charmap_t
*charmap
,
2050 struct repertoire_t
*repertoire
)
2052 /* We expect a semicolon-separated list of characters we ignore. We are
2053 only interested in the wide character definitions. These must be
2054 single characters, possibly defining a range when an ellipsis is used. */
2057 struct token
*now
= lr_token (ldfile
, charmap
, NULL
, repertoire
,
2059 struct translit_ignore_t
*newp
;
2062 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
2065 _("premature end of `translit_ignore' definition"));
2069 if (now
->tok
!= tok_bsymbol
&& now
->tok
!= tok_ucs4
)
2071 lr_error (ldfile
, _("syntax error"));
2072 lr_ignore_rest (ldfile
, 0);
2076 if (now
->tok
== tok_ucs4
)
2077 from
= now
->val
.ucs4
;
2079 /* Try to get the value. */
2080 from
= repertoire_find_value (repertoire
, now
->val
.str
.startmb
,
2081 now
->val
.str
.lenmb
);
2083 if (from
== ILLEGAL_CHAR_VALUE
)
2085 lr_error (ldfile
, "invalid character name");
2090 newp
= (struct translit_ignore_t
*)
2091 obstack_alloc (&ctype
->mempool
, sizeof (struct translit_ignore_t
));
2096 newp
->next
= ctype
->translit_ignore
;
2097 ctype
->translit_ignore
= newp
;
2100 /* Now we expect either a semicolon, an ellipsis, or the end of the
2102 now
= lr_token (ldfile
, charmap
, NULL
, repertoire
, verbose
);
2104 if (now
->tok
== tok_ellipsis2
|| now
->tok
== tok_ellipsis2_2
)
2106 /* XXX Should we bother implementing `....'? `...' certainly
2107 will not be implemented. */
2109 int step
= now
->tok
== tok_ellipsis2_2
? 2 : 1;
2111 now
= lr_token (ldfile
, charmap
, NULL
, repertoire
, verbose
);
2113 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
2116 _("premature end of `translit_ignore' definition"));
2120 if (now
->tok
!= tok_bsymbol
&& now
->tok
!= tok_ucs4
)
2122 lr_error (ldfile
, _("syntax error"));
2123 lr_ignore_rest (ldfile
, 0);
2127 if (now
->tok
== tok_ucs4
)
2130 /* Try to get the value. */
2131 to
= repertoire_find_value (repertoire
, now
->val
.str
.startmb
,
2132 now
->val
.str
.lenmb
);
2134 if (to
== ILLEGAL_CHAR_VALUE
)
2135 lr_error (ldfile
, "invalid character name");
2138 /* Make sure the `to'-value is larger. */
2145 lr_error (ldfile
, _("\
2146 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
2147 (to
| from
) < 65536 ? 4 : 8, to
,
2148 (to
| from
) < 65536 ? 4 : 8, from
);
2151 /* And the next token. */
2152 now
= lr_token (ldfile
, charmap
, NULL
, repertoire
, verbose
);
2155 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
2159 if (now
->tok
== tok_semicolon
)
2163 /* If we come here something is wrong. */
2164 lr_error (ldfile
, _("syntax error"));
2165 lr_ignore_rest (ldfile
, 0);
2171 /* The parser for the LC_CTYPE section of the locale definition. */
2173 ctype_read (struct linereader
*ldfile
, struct localedef_t
*result
,
2174 const struct charmap_t
*charmap
, const char *repertoire_name
,
2177 struct repertoire_t
*repertoire
= NULL
;
2178 struct locale_ctype_t
*ctype
;
2180 enum token_t nowtok
;
2182 struct charseq
*last_seq
;
2183 uint32_t last_wch
= 0;
2184 enum token_t last_token
;
2185 enum token_t ellipsis_token
;
2187 char last_charcode
[16];
2188 size_t last_charcode_len
= 0;
2189 const char *last_str
= NULL
;
2191 struct localedef_t
*copy_locale
= NULL
;
2193 /* Get the repertoire we have to use. */
2194 if (repertoire_name
!= NULL
)
2195 repertoire
= repertoire_read (repertoire_name
);
2197 /* The rest of the line containing `LC_CTYPE' must be free. */
2198 lr_ignore_rest (ldfile
, 1);
2203 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2206 while (nowtok
== tok_eol
);
2208 /* If we see `copy' now we are almost done. */
2209 if (nowtok
== tok_copy
)
2211 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2212 if (now
->tok
!= tok_string
)
2214 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2218 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2219 while (now
->tok
!= tok_eof
&& now
->tok
!= tok_end
);
2221 if (now
->tok
!= tok_eof
2222 || (now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
),
2223 now
->tok
== tok_eof
))
2224 lr_error (ldfile
, _("%s: premature end of file"), "LC_CTYPE");
2225 else if (now
->tok
!= tok_lc_ctype
)
2227 lr_error (ldfile
, _("\
2228 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2229 lr_ignore_rest (ldfile
, 0);
2232 lr_ignore_rest (ldfile
, 1);
2237 if (! ignore_content
)
2239 /* Get the locale definition. */
2240 copy_locale
= load_locale (LC_CTYPE
, now
->val
.str
.startmb
,
2241 repertoire_name
, charmap
, NULL
);
2242 if ((copy_locale
->avail
& CTYPE_LOCALE
) == 0)
2244 /* Not yet loaded. So do it now. */
2245 if (locfile_read (copy_locale
, charmap
) != 0)
2250 lr_ignore_rest (ldfile
, 1);
2252 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2256 /* Prepare the data structures. */
2257 ctype_startup (ldfile
, result
, charmap
, copy_locale
, ignore_content
);
2258 ctype
= result
->categories
[LC_CTYPE
].ctype
;
2260 /* Remember the repertoire we use. */
2261 if (!ignore_content
)
2262 ctype
->repertoire
= repertoire
;
2266 unsigned long int class_bit
= 0;
2267 unsigned long int class256_bit
= 0;
2268 int handle_digits
= 0;
2270 /* Of course we don't proceed beyond the end of file. */
2271 if (nowtok
== tok_eof
)
2274 /* Ingore empty lines. */
2275 if (nowtok
== tok_eol
)
2277 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2285 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2286 while (now
->tok
== tok_ident
|| now
->tok
== tok_string
)
2288 ctype_class_new (ldfile
, ctype
, now
->val
.str
.startmb
);
2289 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2290 if (now
->tok
!= tok_semicolon
)
2292 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2294 if (now
->tok
!= tok_eol
)
2296 %s: syntax error in definition of new character class"), "LC_CTYPE");
2300 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2301 while (now
->tok
== tok_ident
|| now
->tok
== tok_string
)
2303 ctype_map_new (ldfile
, ctype
, now
->val
.str
.startmb
, charmap
);
2304 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2305 if (now
->tok
!= tok_semicolon
)
2307 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2309 if (now
->tok
!= tok_eol
)
2311 %s: syntax error in definition of new character map"), "LC_CTYPE");
2315 /* Ignore the rest of the line if we don't need the input of
2319 lr_ignore_rest (ldfile
, 0);
2323 /* We simply forget the `class' keyword and use the following
2324 operand to determine the bit. */
2325 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2326 if (now
->tok
== tok_ident
|| now
->tok
== tok_string
)
2328 /* Must can be one of the predefined class names. */
2329 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
)
2330 if (strcmp (ctype
->classnames
[cnt
], now
->val
.str
.startmb
) == 0)
2332 if (cnt
>= ctype
->nr_charclass
)
2334 #ifdef PREDEFINED_CLASSES
2335 if (now
->val
.str
.lenmb
== 8
2336 && memcmp ("special1", now
->val
.str
.startmb
, 8) == 0)
2337 class_bit
= _ISwspecial1
;
2338 else if (now
->val
.str
.lenmb
== 8
2339 && memcmp ("special2", now
->val
.str
.startmb
, 8) == 0)
2340 class_bit
= _ISwspecial2
;
2341 else if (now
->val
.str
.lenmb
== 8
2342 && memcmp ("special3", now
->val
.str
.startmb
, 8) == 0)
2343 class_bit
= _ISwspecial3
;
2347 /* OK, it's a new class. */
2348 ctype_class_new (ldfile
, ctype
, now
->val
.str
.startmb
);
2350 class_bit
= _ISwbit (ctype
->nr_charclass
- 1);
2355 class_bit
= _ISwbit (cnt
);
2357 free (now
->val
.str
.startmb
);
2360 else if (now
->tok
== tok_digit
)
2361 goto handle_tok_digit
;
2362 else if (now
->tok
< tok_upper
|| now
->tok
> tok_blank
)
2366 class_bit
= BITw (now
->tok
);
2367 class256_bit
= BIT (now
->tok
);
2370 /* The next character must be a semicolon. */
2371 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2372 if (now
->tok
!= tok_semicolon
)
2374 goto read_charclass
;
2387 /* Ignore the rest of the line if we don't need the input of
2391 lr_ignore_rest (ldfile
, 0);
2395 class_bit
= BITw (now
->tok
);
2396 class256_bit
= BIT (now
->tok
);
2399 ctype
->class_done
|= class_bit
;
2400 last_token
= tok_none
;
2401 ellipsis_token
= tok_none
;
2403 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2404 while (now
->tok
!= tok_eol
&& now
->tok
!= tok_eof
)
2407 struct charseq
*seq
;
2409 if (ellipsis_token
== tok_none
)
2411 if (get_character (now
, charmap
, repertoire
, &seq
, &wch
))
2414 if (!ignore_content
&& seq
!= NULL
&& seq
->nbytes
== 1)
2415 /* Yep, we can store information about this byte
2417 ctype
->class256_collection
[seq
->bytes
[0]] |= class256_bit
;
2419 if (!ignore_content
&& wch
!= ILLEGAL_CHAR_VALUE
2421 /* We have the UCS4 position. */
2422 *find_idx (ctype
, &ctype
->class_collection
,
2423 &ctype
->class_collection_max
,
2424 &ctype
->class_collection_act
, wch
) |= class_bit
;
2426 last_token
= now
->tok
;
2427 /* Terminate the string. */
2428 if (last_token
== tok_bsymbol
)
2430 now
->val
.str
.startmb
[now
->val
.str
.lenmb
] = '\0';
2431 last_str
= now
->val
.str
.startmb
;
2437 memcpy (last_charcode
, now
->val
.charcode
.bytes
, 16);
2438 last_charcode_len
= now
->val
.charcode
.nbytes
;
2440 if (!ignore_content
&& handle_digits
== 1)
2442 /* We must store the digit values. */
2443 if (ctype
->mbdigits_act
== ctype
->mbdigits_max
)
2445 ctype
->mbdigits_max
+= 10;
2446 ctype
->mbdigits
= xrealloc (ctype
->mbdigits
,
2447 (ctype
->mbdigits_max
2448 * sizeof (char *)));
2449 ctype
->wcdigits_max
+= 10;
2450 ctype
->wcdigits
= xrealloc (ctype
->wcdigits
,
2451 (ctype
->wcdigits_max
2452 * sizeof (uint32_t)));
2455 ctype
->mbdigits
[ctype
->mbdigits_act
++] = seq
;
2456 ctype
->wcdigits
[ctype
->wcdigits_act
++] = wch
;
2458 else if (!ignore_content
&& handle_digits
== 2)
2460 /* We must store the digit values. */
2461 if (ctype
->outdigits_act
>= 10)
2463 lr_error (ldfile
, _("\
2464 %s: field `%s' does not contain exactly ten entries"),
2465 "LC_CTYPE", "outdigit");
2466 lr_ignore_rest (ldfile
, 0);
2470 ctype
->mboutdigits
[ctype
->outdigits_act
] = seq
;
2471 ctype
->wcoutdigits
[ctype
->outdigits_act
] = wch
;
2472 ++ctype
->outdigits_act
;
2477 /* Now it gets complicated. We have to resolve the
2478 ellipsis problem. First we must distinguish between
2479 the different kind of ellipsis and this must match the
2480 tokens we have seen. */
2481 assert (last_token
!= tok_none
);
2483 if (last_token
!= now
->tok
)
2485 lr_error (ldfile
, _("\
2486 ellipsis range must be marked by two operands of same type"));
2487 lr_ignore_rest (ldfile
, 0);
2491 if (last_token
== tok_bsymbol
)
2493 if (ellipsis_token
== tok_ellipsis3
)
2494 lr_error (ldfile
, _("with symbolic name range values \
2495 the absolute ellipsis `...' must not be used"));
2497 charclass_symbolic_ellipsis (ldfile
, ctype
, charmap
,
2498 repertoire
, now
, last_str
,
2499 class256_bit
, class_bit
,
2504 handle_digits
, step
);
2506 else if (last_token
== tok_ucs4
)
2508 if (ellipsis_token
!= tok_ellipsis2
)
2509 lr_error (ldfile
, _("\
2510 with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
2512 charclass_ucs4_ellipsis (ldfile
, ctype
, charmap
,
2513 repertoire
, now
, last_wch
,
2514 class256_bit
, class_bit
,
2515 ignore_content
, handle_digits
,
2520 assert (last_token
== tok_charcode
);
2522 if (ellipsis_token
!= tok_ellipsis3
)
2523 lr_error (ldfile
, _("\
2524 with character code range values one must use the absolute ellipsis `...'"));
2526 charclass_charcode_ellipsis (ldfile
, ctype
, charmap
,
2530 class256_bit
, class_bit
,
2535 /* Now we have used the last value. */
2536 last_token
= tok_none
;
2539 /* Next we expect a semicolon or the end of the line. */
2540 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2541 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
2544 if (last_token
!= tok_none
2545 && now
->tok
>= tok_ellipsis2
&& now
->tok
<= tok_ellipsis4_2
)
2547 if (now
->tok
== tok_ellipsis2_2
)
2549 now
->tok
= tok_ellipsis2
;
2552 else if (now
->tok
== tok_ellipsis4_2
)
2554 now
->tok
= tok_ellipsis4
;
2558 ellipsis_token
= now
->tok
;
2560 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2564 if (now
->tok
!= tok_semicolon
)
2567 /* And get the next character. */
2568 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2570 ellipsis_token
= tok_none
;
2576 /* Ignore the rest of the line if we don't need the input of
2580 lr_ignore_rest (ldfile
, 0);
2585 class_bit
= _ISwdigit
;
2586 class256_bit
= _ISdigit
;
2588 goto read_charclass
;
2591 /* Ignore the rest of the line if we don't need the input of
2595 lr_ignore_rest (ldfile
, 0);
2599 if (ctype
->outdigits_act
!= 0)
2600 lr_error (ldfile
, _("\
2601 %s: field `%s' declared more than once"),
2602 "LC_CTYPE", "outdigit");
2606 goto read_charclass
;
2609 /* Ignore the rest of the line if we don't need the input of
2613 lr_ignore_rest (ldfile
, 0);
2621 /* Ignore the rest of the line if we don't need the input of
2625 lr_ignore_rest (ldfile
, 0);
2633 /* Ignore the rest of the line if we don't need the input of
2637 lr_ignore_rest (ldfile
, 0);
2641 /* We simply forget the `map' keyword and use the following
2642 operand to determine the mapping. */
2643 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2644 if (now
->tok
== tok_ident
|| now
->tok
== tok_string
)
2648 for (cnt
= 2; cnt
< ctype
->map_collection_nr
; ++cnt
)
2649 if (strcmp (now
->val
.str
.startmb
, ctype
->mapnames
[cnt
]) == 0)
2652 if (cnt
< ctype
->map_collection_nr
)
2653 free (now
->val
.str
.startmb
);
2655 /* OK, it's a new map. */
2656 ctype_map_new (ldfile
, ctype
, now
->val
.str
.startmb
, charmap
);
2660 else if (now
->tok
< tok_toupper
|| now
->tok
> tok_tolower
)
2663 mapidx
= now
->tok
- tok_toupper
;
2665 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2666 /* This better should be a semicolon. */
2667 if (now
->tok
!= tok_semicolon
)
2671 /* Test whether this mapping was already defined. */
2672 if (ctype
->tomap_done
[mapidx
])
2674 lr_error (ldfile
, _("duplicated definition for mapping `%s'"),
2675 ctype
->mapnames
[mapidx
]);
2676 lr_ignore_rest (ldfile
, 0);
2679 ctype
->tomap_done
[mapidx
] = 1;
2681 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2682 while (now
->tok
!= tok_eol
&& now
->tok
!= tok_eof
)
2684 struct charseq
*from_seq
;
2686 struct charseq
*to_seq
;
2689 /* Every pair starts with an opening brace. */
2690 if (now
->tok
!= tok_open_brace
)
2693 /* Next comes the from-value. */
2694 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2695 if (get_character (now
, charmap
, repertoire
, &from_seq
,
2699 /* The next is a comma. */
2700 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2701 if (now
->tok
!= tok_comma
)
2704 /* And the other value. */
2705 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2706 if (get_character (now
, charmap
, repertoire
, &to_seq
,
2710 /* And the last thing is the closing brace. */
2711 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2712 if (now
->tok
!= tok_close_brace
)
2715 if (!ignore_content
)
2717 /* Check whether the mapping converts from an ASCII value
2718 to a non-ASCII value. */
2719 if (from_seq
!= NULL
&& from_seq
->nbytes
== 1
2720 && isascii (from_seq
->bytes
[0])
2721 && to_seq
!= NULL
&& (to_seq
->nbytes
!= 1
2722 || !isascii (to_seq
->bytes
[0])))
2723 ctype
->to_nonascii
= 1;
2725 if (mapidx
< 2 && from_seq
!= NULL
&& to_seq
!= NULL
2726 && from_seq
->nbytes
== 1 && to_seq
->nbytes
== 1)
2727 /* We can use this value. */
2728 ctype
->map256_collection
[mapidx
][from_seq
->bytes
[0]]
2731 if (from_wch
!= ILLEGAL_CHAR_VALUE
2732 && to_wch
!= ILLEGAL_CHAR_VALUE
)
2733 /* Both correct values. */
2734 *find_idx (ctype
, &ctype
->map_collection
[mapidx
],
2735 &ctype
->map_collection_max
[mapidx
],
2736 &ctype
->map_collection_act
[mapidx
],
2740 /* Now comes a semicolon or the end of the line/file. */
2741 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2742 if (now
->tok
== tok_semicolon
)
2743 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2747 case tok_translit_start
:
2748 /* Ignore the entire translit section with its peculiar syntax
2749 if we don't need the input. */
2754 lr_ignore_rest (ldfile
, 0);
2755 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2757 while (now
->tok
!= tok_translit_end
&& now
->tok
!= tok_eof
);
2759 if (now
->tok
== tok_eof
)
2760 lr_error (ldfile
, _(\
2761 "%s: `translit_start' section does not end with `translit_end'"),
2767 /* The rest of the line better should be empty. */
2768 lr_ignore_rest (ldfile
, 1);
2770 /* We count here the number of allocated entries in the `translit'
2774 ldfile
->translate_strings
= 1;
2775 ldfile
->return_widestr
= 1;
2777 /* We proceed until we see the `translit_end' token. */
2778 while (now
= lr_token (ldfile
, charmap
, NULL
, repertoire
, verbose
),
2779 now
->tok
!= tok_translit_end
&& now
->tok
!= tok_eof
)
2781 if (now
->tok
== tok_eol
)
2782 /* Ignore empty lines. */
2785 if (now
->tok
== tok_include
)
2787 /* We have to include locale. */
2788 const char *locale_name
;
2789 const char *repertoire_name
;
2790 struct translit_include_t
*include_stmt
, **include_ptr
;
2792 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2793 /* This should be a string or an identifier. In any
2794 case something to name a locale. */
2795 if (now
->tok
!= tok_string
&& now
->tok
!= tok_ident
)
2798 lr_error (ldfile
, _("%s: syntax error"), "LC_CTYPE");
2799 lr_ignore_rest (ldfile
, 0);
2802 locale_name
= now
->val
.str
.startmb
;
2804 /* Next should be a semicolon. */
2805 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2806 if (now
->tok
!= tok_semicolon
)
2807 goto translit_syntax
;
2809 /* Now the repertoire name. */
2810 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2811 if ((now
->tok
!= tok_string
&& now
->tok
!= tok_ident
)
2812 || now
->val
.str
.startmb
== NULL
)
2813 goto translit_syntax
;
2814 repertoire_name
= now
->val
.str
.startmb
;
2815 if (repertoire_name
[0] == '\0')
2816 /* Ignore the empty string. */
2817 repertoire_name
= NULL
;
2819 /* Save the include statement for later processing. */
2820 include_stmt
= (struct translit_include_t
*)
2821 xmalloc (sizeof (struct translit_include_t
));
2822 include_stmt
->copy_locale
= locale_name
;
2823 include_stmt
->copy_repertoire
= repertoire_name
;
2824 include_stmt
->next
= NULL
;
2826 include_ptr
= &ctype
->translit_include
;
2827 while (*include_ptr
!= NULL
)
2828 include_ptr
= &(*include_ptr
)->next
;
2829 *include_ptr
= include_stmt
;
2831 /* The rest of the line must be empty. */
2832 lr_ignore_rest (ldfile
, 1);
2834 /* Make sure the locale is read. */
2835 add_to_readlist (LC_CTYPE
, locale_name
, repertoire_name
,
2839 else if (now
->tok
== tok_default_missing
)
2845 /* We expect a single character or string as the
2847 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2848 wstr
= read_widestring (ldfile
, now
, charmap
,
2853 if (ctype
->default_missing
!= NULL
)
2855 lr_error (ldfile
, _("\
2856 %s: duplicate `default_missing' definition"), "LC_CTYPE");
2857 WITH_CUR_LOCALE (error_at_line (0, 0,
2858 ctype
->default_missing_file
,
2859 ctype
->default_missing_lineno
,
2861 previous definition was here")));
2865 ctype
->default_missing
= wstr
;
2866 ctype
->default_missing_file
= ldfile
->fname
;
2867 ctype
->default_missing_lineno
= ldfile
->lineno
;
2869 /* We can have more entries, ignore them. */
2870 lr_ignore_rest (ldfile
, 0);
2873 else if (wstr
== (uint32_t *) -1l)
2874 /* This was an syntax error. */
2877 /* Maybe there is another replacement we can use. */
2878 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2879 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
2881 /* Nothing found. We tell the user. */
2882 lr_error (ldfile
, _("\
2883 %s: no representable `default_missing' definition found"), "LC_CTYPE");
2886 if (now
->tok
!= tok_semicolon
)
2887 goto translit_syntax
;
2892 else if (now
->tok
== tok_translit_ignore
)
2894 read_translit_ignore_entry (ldfile
, ctype
, charmap
,
2899 read_translit_entry (ldfile
, ctype
, now
, charmap
, repertoire
);
2901 ldfile
->return_widestr
= 0;
2903 if (now
->tok
== tok_eof
)
2904 lr_error (ldfile
, _(\
2905 "%s: `translit_start' section does not end with `translit_end'"),
2911 /* Ignore the rest of the line if we don't need the input of
2915 lr_ignore_rest (ldfile
, 0);
2919 /* This could mean one of several things. First test whether
2920 it's a character class name. */
2921 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
)
2922 if (strcmp (now
->val
.str
.startmb
, ctype
->classnames
[cnt
]) == 0)
2924 if (cnt
< ctype
->nr_charclass
)
2926 class_bit
= _ISwbit (cnt
);
2927 class256_bit
= cnt
<= 11 ? _ISbit (cnt
) : 0;
2928 free (now
->val
.str
.startmb
);
2929 goto read_charclass
;
2931 for (cnt
= 0; cnt
< ctype
->map_collection_nr
; ++cnt
)
2932 if (strcmp (now
->val
.str
.startmb
, ctype
->mapnames
[cnt
]) == 0)
2934 if (cnt
< ctype
->map_collection_nr
)
2937 free (now
->val
.str
.startmb
);
2940 #ifdef PREDEFINED_CLASSES
2941 if (strcmp (now
->val
.str
.startmb
, "special1") == 0)
2943 class_bit
= _ISwspecial1
;
2944 free (now
->val
.str
.startmb
);
2945 goto read_charclass
;
2947 if (strcmp (now
->val
.str
.startmb
, "special2") == 0)
2949 class_bit
= _ISwspecial2
;
2950 free (now
->val
.str
.startmb
);
2951 goto read_charclass
;
2953 if (strcmp (now
->val
.str
.startmb
, "special3") == 0)
2955 class_bit
= _ISwspecial3
;
2956 free (now
->val
.str
.startmb
);
2957 goto read_charclass
;
2959 if (strcmp (now
->val
.str
.startmb
, "tosymmetric") == 0)
2968 /* Next we assume `LC_CTYPE'. */
2969 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2970 if (now
->tok
== tok_eof
)
2972 if (now
->tok
== tok_eol
)
2973 lr_error (ldfile
, _("%s: incomplete `END' line"),
2975 else if (now
->tok
!= tok_lc_ctype
)
2976 lr_error (ldfile
, _("\
2977 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2978 lr_ignore_rest (ldfile
, now
->tok
== tok_lc_ctype
);
2983 if (now
->tok
!= tok_eof
)
2984 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2987 /* Prepare for the next round. */
2988 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2992 /* When we come here we reached the end of the file. */
2993 lr_error (ldfile
, _("%s: premature end of file"), "LC_CTYPE");
2998 set_class_defaults (struct locale_ctype_t
*ctype
,
2999 const struct charmap_t
*charmap
,
3000 struct repertoire_t
*repertoire
)
3004 /* These function defines the default values for the classes and conversions
3005 according to POSIX.2 2.5.2.1.
3006 It may seem that the order of these if-blocks is arbitrary but it is NOT.
3007 Don't move them unless you know what you do! */
3009 auto void set_default (int bitpos
, int from
, int to
);
3011 void set_default (int bitpos
, int from
, int to
)
3015 int bit
= _ISbit (bitpos
);
3016 int bitw
= _ISwbit (bitpos
);
3017 /* Define string. */
3020 for (ch
= from
; ch
<= to
; ++ch
)
3022 struct charseq
*seq
;
3025 seq
= charmap_find_value (charmap
, tmp
, 1);
3029 sprintf (buf
, "U%08X", ch
);
3030 seq
= charmap_find_value (charmap
, buf
, 9);
3035 WITH_CUR_LOCALE (error (0, 0, _("\
3036 %s: character `%s' not defined in charmap while needed as default value"),
3039 else if (seq
->nbytes
!= 1)
3040 WITH_CUR_LOCALE (error (0, 0, _("\
3041 %s: character `%s' in charmap not representable with one byte"),
3044 ctype
->class256_collection
[seq
->bytes
[0]] |= bit
;
3046 /* No need to search here, the ASCII value is also the Unicode
3048 ELEM (ctype
, class_collection
, , ch
) |= bitw
;
3052 /* Set default values if keyword was not present. */
3053 if ((ctype
->class_done
& BITw (tok_upper
)) == 0)
3054 /* "If this keyword [lower] is not specified, the lowercase letters
3055 `A' through `Z', ..., shall automatically belong to this class,
3056 with implementation defined character values." [P1003.2, 2.5.2.1] */
3057 set_default (BITPOS (tok_upper
), 'A', 'Z');
3059 if ((ctype
->class_done
& BITw (tok_lower
)) == 0)
3060 /* "If this keyword [lower] is not specified, the lowercase letters
3061 `a' through `z', ..., shall automatically belong to this class,
3062 with implementation defined character values." [P1003.2, 2.5.2.1] */
3063 set_default (BITPOS (tok_lower
), 'a', 'z');
3065 if ((ctype
->class_done
& BITw (tok_alpha
)) == 0)
3067 /* Table 2-6 in P1003.2 says that characters in class `upper' or
3068 class `lower' *must* be in class `alpha'. */
3069 unsigned long int mask
= BIT (tok_upper
) | BIT (tok_lower
);
3070 unsigned long int maskw
= BITw (tok_upper
) | BITw (tok_lower
);
3072 for (cnt
= 0; cnt
< 256; ++cnt
)
3073 if ((ctype
->class256_collection
[cnt
] & mask
) != 0)
3074 ctype
->class256_collection
[cnt
] |= BIT (tok_alpha
);
3076 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
3077 if ((ctype
->class_collection
[cnt
] & maskw
) != 0)
3078 ctype
->class_collection
[cnt
] |= BITw (tok_alpha
);
3081 if ((ctype
->class_done
& BITw (tok_digit
)) == 0)
3082 /* "If this keyword [digit] is not specified, the digits `0' through
3083 `9', ..., shall automatically belong to this class, with
3084 implementation-defined character values." [P1003.2, 2.5.2.1] */
3085 set_default (BITPOS (tok_digit
), '0', '9');
3087 /* "Only characters specified for the `alpha' and `digit' keyword
3088 shall be specified. Characters specified for the keyword `alpha'
3089 and `digit' are automatically included in this class. */
3091 unsigned long int mask
= BIT (tok_alpha
) | BIT (tok_digit
);
3092 unsigned long int maskw
= BITw (tok_alpha
) | BITw (tok_digit
);
3094 for (cnt
= 0; cnt
< 256; ++cnt
)
3095 if ((ctype
->class256_collection
[cnt
] & mask
) != 0)
3096 ctype
->class256_collection
[cnt
] |= BIT (tok_alnum
);
3098 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
3099 if ((ctype
->class_collection
[cnt
] & maskw
) != 0)
3100 ctype
->class_collection
[cnt
] |= BITw (tok_alnum
);
3103 if ((ctype
->class_done
& BITw (tok_space
)) == 0)
3104 /* "If this keyword [space] is not specified, the characters <space>,
3105 <form-feed>, <newline>, <carriage-return>, <tab>, and
3106 <vertical-tab>, ..., shall automatically belong to this class,
3107 with implementation-defined character values." [P1003.2, 2.5.2.1] */
3109 struct charseq
*seq
;
3111 seq
= charmap_find_value (charmap
, "space", 5);
3113 seq
= charmap_find_value (charmap
, "SP", 2);
3115 seq
= charmap_find_value (charmap
, "U00000020", 9);
3119 WITH_CUR_LOCALE (error (0, 0, _("\
3120 %s: character `%s' not defined while needed as default value"),
3121 "LC_CTYPE", "<space>"));
3123 else if (seq
->nbytes
!= 1)
3124 WITH_CUR_LOCALE (error (0, 0, _("\
3125 %s: character `%s' in charmap not representable with one byte"),
3126 "LC_CTYPE", "<space>"));
3128 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3130 /* No need to search. */
3131 ELEM (ctype
, class_collection
, , L
' ') |= BITw (tok_space
);
3133 seq
= charmap_find_value (charmap
, "form-feed", 9);
3135 seq
= charmap_find_value (charmap
, "U0000000C", 9);
3139 WITH_CUR_LOCALE (error (0, 0, _("\
3140 %s: character `%s' not defined while needed as default value"),
3141 "LC_CTYPE", "<form-feed>"));
3143 else if (seq
->nbytes
!= 1)
3144 WITH_CUR_LOCALE (error (0, 0, _("\
3145 %s: character `%s' in charmap not representable with one byte"),
3146 "LC_CTYPE", "<form-feed>"));
3148 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3150 /* No need to search. */
3151 ELEM (ctype
, class_collection
, , L
'\f') |= BITw (tok_space
);
3154 seq
= charmap_find_value (charmap
, "newline", 7);
3156 seq
= charmap_find_value (charmap
, "U0000000A", 9);
3160 WITH_CUR_LOCALE (error (0, 0, _("\
3161 character `%s' not defined while needed as default value"),
3164 else if (seq
->nbytes
!= 1)
3165 WITH_CUR_LOCALE (error (0, 0, _("\
3166 %s: character `%s' in charmap not representable with one byte"),
3167 "LC_CTYPE", "<newline>"));
3169 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3171 /* No need to search. */
3172 ELEM (ctype
, class_collection
, , L
'\n') |= BITw (tok_space
);
3175 seq
= charmap_find_value (charmap
, "carriage-return", 15);
3177 seq
= charmap_find_value (charmap
, "U0000000D", 9);
3181 WITH_CUR_LOCALE (error (0, 0, _("\
3182 %s: character `%s' not defined while needed as default value"),
3183 "LC_CTYPE", "<carriage-return>"));
3185 else if (seq
->nbytes
!= 1)
3186 WITH_CUR_LOCALE (error (0, 0, _("\
3187 %s: character `%s' in charmap not representable with one byte"),
3188 "LC_CTYPE", "<carriage-return>"));
3190 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3192 /* No need to search. */
3193 ELEM (ctype
, class_collection
, , L
'\r') |= BITw (tok_space
);
3196 seq
= charmap_find_value (charmap
, "tab", 3);
3198 seq
= charmap_find_value (charmap
, "U00000009", 9);
3202 WITH_CUR_LOCALE (error (0, 0, _("\
3203 %s: character `%s' not defined while needed as default value"),
3204 "LC_CTYPE", "<tab>"));
3206 else if (seq
->nbytes
!= 1)
3207 WITH_CUR_LOCALE (error (0, 0, _("\
3208 %s: character `%s' in charmap not representable with one byte"),
3209 "LC_CTYPE", "<tab>"));
3211 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3213 /* No need to search. */
3214 ELEM (ctype
, class_collection
, , L
'\t') |= BITw (tok_space
);
3217 seq
= charmap_find_value (charmap
, "vertical-tab", 12);
3219 seq
= charmap_find_value (charmap
, "U0000000B", 9);
3223 WITH_CUR_LOCALE (error (0, 0, _("\
3224 %s: character `%s' not defined while needed as default value"),
3225 "LC_CTYPE", "<vertical-tab>"));
3227 else if (seq
->nbytes
!= 1)
3228 WITH_CUR_LOCALE (error (0, 0, _("\
3229 %s: character `%s' in charmap not representable with one byte"),
3230 "LC_CTYPE", "<vertical-tab>"));
3232 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3234 /* No need to search. */
3235 ELEM (ctype
, class_collection
, , L
'\v') |= BITw (tok_space
);
3238 if ((ctype
->class_done
& BITw (tok_xdigit
)) == 0)
3239 /* "If this keyword is not specified, the digits `0' to `9', the
3240 uppercase letters `A' through `F', and the lowercase letters `a'
3241 through `f', ..., shell automatically belong to this class, with
3242 implementation defined character values." [P1003.2, 2.5.2.1] */
3244 set_default (BITPOS (tok_xdigit
), '0', '9');
3245 set_default (BITPOS (tok_xdigit
), 'A', 'F');
3246 set_default (BITPOS (tok_xdigit
), 'a', 'f');
3249 if ((ctype
->class_done
& BITw (tok_blank
)) == 0)
3250 /* "If this keyword [blank] is unspecified, the characters <space> and
3251 <tab> shall belong to this character class." [P1003.2, 2.5.2.1] */
3253 struct charseq
*seq
;
3255 seq
= charmap_find_value (charmap
, "space", 5);
3257 seq
= charmap_find_value (charmap
, "SP", 2);
3259 seq
= charmap_find_value (charmap
, "U00000020", 9);
3263 WITH_CUR_LOCALE (error (0, 0, _("\
3264 %s: character `%s' not defined while needed as default value"),
3265 "LC_CTYPE", "<space>"));
3267 else if (seq
->nbytes
!= 1)
3268 WITH_CUR_LOCALE (error (0, 0, _("\
3269 %s: character `%s' in charmap not representable with one byte"),
3270 "LC_CTYPE", "<space>"));
3272 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_blank
);
3274 /* No need to search. */
3275 ELEM (ctype
, class_collection
, , L
' ') |= BITw (tok_blank
);
3278 seq
= charmap_find_value (charmap
, "tab", 3);
3280 seq
= charmap_find_value (charmap
, "U00000009", 9);
3284 WITH_CUR_LOCALE (error (0, 0, _("\
3285 %s: character `%s' not defined while needed as default value"),
3286 "LC_CTYPE", "<tab>"));
3288 else if (seq
->nbytes
!= 1)
3289 WITH_CUR_LOCALE (error (0, 0, _("\
3290 %s: character `%s' in charmap not representable with one byte"),
3291 "LC_CTYPE", "<tab>"));
3293 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_blank
);
3295 /* No need to search. */
3296 ELEM (ctype
, class_collection
, , L
'\t') |= BITw (tok_blank
);
3299 if ((ctype
->class_done
& BITw (tok_graph
)) == 0)
3300 /* "If this keyword [graph] is not specified, characters specified for
3301 the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
3302 shall belong to this character class." [P1003.2, 2.5.2.1] */
3304 unsigned long int mask
= BIT (tok_upper
) | BIT (tok_lower
) |
3305 BIT (tok_alpha
) | BIT (tok_digit
) | BIT (tok_xdigit
) | BIT (tok_punct
);
3306 unsigned long int maskw
= BITw (tok_upper
) | BITw (tok_lower
) |
3307 BITw (tok_alpha
) | BITw (tok_digit
) | BITw (tok_xdigit
) |
3311 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
3312 if ((ctype
->class_collection
[cnt
] & maskw
) != 0)
3313 ctype
->class_collection
[cnt
] |= BITw (tok_graph
);
3315 for (cnt
= 0; cnt
< 256; ++cnt
)
3316 if ((ctype
->class256_collection
[cnt
] & mask
) != 0)
3317 ctype
->class256_collection
[cnt
] |= BIT (tok_graph
);
3320 if ((ctype
->class_done
& BITw (tok_print
)) == 0)
3321 /* "If this keyword [print] is not provided, characters specified for
3322 the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
3323 and the <space> character shall belong to this character class."
3324 [P1003.2, 2.5.2.1] */
3326 unsigned long int mask
= BIT (tok_upper
) | BIT (tok_lower
) |
3327 BIT (tok_alpha
) | BIT (tok_digit
) | BIT (tok_xdigit
) | BIT (tok_punct
);
3328 unsigned long int maskw
= BITw (tok_upper
) | BITw (tok_lower
) |
3329 BITw (tok_alpha
) | BITw (tok_digit
) | BITw (tok_xdigit
) |
3332 struct charseq
*seq
;
3334 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
3335 if ((ctype
->class_collection
[cnt
] & maskw
) != 0)
3336 ctype
->class_collection
[cnt
] |= BITw (tok_print
);
3338 for (cnt
= 0; cnt
< 256; ++cnt
)
3339 if ((ctype
->class256_collection
[cnt
] & mask
) != 0)
3340 ctype
->class256_collection
[cnt
] |= BIT (tok_print
);
3343 seq
= charmap_find_value (charmap
, "space", 5);
3345 seq
= charmap_find_value (charmap
, "SP", 2);
3347 seq
= charmap_find_value (charmap
, "U00000020", 9);
3351 WITH_CUR_LOCALE (error (0, 0, _("\
3352 %s: character `%s' not defined while needed as default value"),
3353 "LC_CTYPE", "<space>"));
3355 else if (seq
->nbytes
!= 1)
3356 WITH_CUR_LOCALE (error (0, 0, _("\
3357 %s: character `%s' in charmap not representable with one byte"),
3358 "LC_CTYPE", "<space>"));
3360 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_print
);
3362 /* No need to search. */
3363 ELEM (ctype
, class_collection
, , L
' ') |= BITw (tok_print
);
3366 if (ctype
->tomap_done
[0] == 0)
3367 /* "If this keyword [toupper] is not specified, the lowercase letters
3368 `a' through `z', and their corresponding uppercase letters `A' to
3369 `Z', ..., shall automatically be included, with implementation-
3370 defined character values." [P1003.2, 2.5.2.1] */
3375 strcpy (tmp
, "<?>");
3377 for (ch
= 'a'; ch
<= 'z'; ++ch
)
3379 struct charseq
*seq_from
, *seq_to
;
3383 seq_from
= charmap_find_value (charmap
, &tmp
[1], 1);
3384 if (seq_from
== NULL
)
3387 sprintf (buf
, "U%08X", ch
);
3388 seq_from
= charmap_find_value (charmap
, buf
, 9);
3390 if (seq_from
== NULL
)
3393 WITH_CUR_LOCALE (error (0, 0, _("\
3394 %s: character `%s' not defined while needed as default value"),
3397 else if (seq_from
->nbytes
!= 1)
3400 WITH_CUR_LOCALE (error (0, 0, _("\
3401 %s: character `%s' needed as default value not representable with one byte"),
3406 /* This conversion is implementation defined. */
3407 tmp
[1] = (char) (ch
+ ('A' - 'a'));
3408 seq_to
= charmap_find_value (charmap
, &tmp
[1], 1);
3412 sprintf (buf
, "U%08X", ch
+ ('A' - 'a'));
3413 seq_to
= charmap_find_value (charmap
, buf
, 9);
3418 WITH_CUR_LOCALE (error (0, 0, _("\
3419 %s: character `%s' not defined while needed as default value"),
3422 else if (seq_to
->nbytes
!= 1)
3425 WITH_CUR_LOCALE (error (0, 0, _("\
3426 %s: character `%s' needed as default value not representable with one byte"),
3430 /* The index [0] is determined by the order of the
3431 `ctype_map_newP' calls in `ctype_startup'. */
3432 ctype
->map256_collection
[0][seq_from
->bytes
[0]]
3436 /* No need to search. */
3437 ELEM (ctype
, map_collection
, [0], ch
) = ch
+ ('A' - 'a');
3441 if (ctype
->tomap_done
[1] == 0)
3442 /* "If this keyword [tolower] is not specified, the mapping shall be
3443 the reverse mapping of the one specified to `toupper'." [P1003.2] */
3445 for (cnt
= 0; cnt
< ctype
->map_collection_act
[0]; ++cnt
)
3446 if (ctype
->map_collection
[0][cnt
] != 0)
3447 ELEM (ctype
, map_collection
, [1],
3448 ctype
->map_collection
[0][cnt
])
3449 = ctype
->charnames
[cnt
];
3451 for (cnt
= 0; cnt
< 256; ++cnt
)
3452 if (ctype
->map256_collection
[0][cnt
] != 0)
3453 ctype
->map256_collection
[1][ctype
->map256_collection
[0][cnt
]] = cnt
;
3456 if (ctype
->outdigits_act
!= 10)
3458 if (ctype
->outdigits_act
!= 0)
3459 WITH_CUR_LOCALE (error (0, 0, _("\
3460 %s: field `%s' does not contain exactly ten entries"),
3461 "LC_CTYPE", "outdigit"));
3463 for (cnt
= ctype
->outdigits_act
; cnt
< 10; ++cnt
)
3465 ctype
->mboutdigits
[cnt
] = charmap_find_symbol (charmap
,
3468 if (ctype
->mboutdigits
[cnt
] == NULL
)
3469 ctype
->mboutdigits
[cnt
] = charmap_find_symbol (charmap
,
3471 strlen (longnames
[cnt
]));
3473 if (ctype
->mboutdigits
[cnt
] == NULL
)
3474 ctype
->mboutdigits
[cnt
] = charmap_find_symbol (charmap
,
3477 if (ctype
->mboutdigits
[cnt
] == NULL
)
3479 /* Provide a replacement. */
3480 WITH_CUR_LOCALE (error (0, 0, _("\
3481 no output digits defined and none of the standard names in the charmap")));
3483 ctype
->mboutdigits
[cnt
] = obstack_alloc (&((struct charmap_t
*) charmap
)->mem_pool
,
3484 sizeof (struct charseq
)
3487 /* This is better than nothing. */
3488 ctype
->mboutdigits
[cnt
]->bytes
[0] = digits
[cnt
];
3489 ctype
->mboutdigits
[cnt
]->nbytes
= 1;
3492 ctype
->wcoutdigits
[cnt
] = L
'0' + cnt
;
3495 ctype
->outdigits_act
= 10;
3500 /* Construction of sparse 3-level tables.
3501 See wchar-lookup.h for their structure and the meaning of p and q. */
3508 /* Working representation. */
3509 size_t level1_alloc
;
3512 size_t level2_alloc
;
3515 size_t level3_alloc
;
3518 /* Compressed representation. */
3523 /* Initialize. Assumes t->p and t->q have already been set. */
3525 wctype_table_init (struct wctype_table
*t
)
3528 t
->level1_alloc
= t
->level1_size
= 0;
3530 t
->level2_alloc
= t
->level2_size
= 0;
3532 t
->level3_alloc
= t
->level3_size
= 0;
3535 /* Retrieve an entry. */
3537 wctype_table_get (struct wctype_table
*t
, uint32_t wc
)
3539 uint32_t index1
= wc
>> (t
->q
+ t
->p
+ 5);
3540 if (index1
< t
->level1_size
)
3542 uint32_t lookup1
= t
->level1
[index1
];
3543 if (lookup1
!= EMPTY
)
3545 uint32_t index2
= ((wc
>> (t
->p
+ 5)) & ((1 << t
->q
) - 1))
3546 + (lookup1
<< t
->q
);
3547 uint32_t lookup2
= t
->level2
[index2
];
3548 if (lookup2
!= EMPTY
)
3550 uint32_t index3
= ((wc
>> 5) & ((1 << t
->p
) - 1))
3551 + (lookup2
<< t
->p
);
3552 uint32_t lookup3
= t
->level3
[index3
];
3553 uint32_t index4
= wc
& 0x1f;
3555 return (lookup3
>> index4
) & 1;
3562 /* Add one entry. */
3564 wctype_table_add (struct wctype_table
*t
, uint32_t wc
)
3566 uint32_t index1
= wc
>> (t
->q
+ t
->p
+ 5);
3567 uint32_t index2
= (wc
>> (t
->p
+ 5)) & ((1 << t
->q
) - 1);
3568 uint32_t index3
= (wc
>> 5) & ((1 << t
->p
) - 1);
3569 uint32_t index4
= wc
& 0x1f;
3572 if (index1
>= t
->level1_size
)
3574 if (index1
>= t
->level1_alloc
)
3576 size_t alloc
= 2 * t
->level1_alloc
;
3577 if (alloc
<= index1
)
3579 t
->level1
= (uint32_t *) xrealloc ((char *) t
->level1
,
3580 alloc
* sizeof (uint32_t));
3581 t
->level1_alloc
= alloc
;
3583 while (index1
>= t
->level1_size
)
3584 t
->level1
[t
->level1_size
++] = EMPTY
;
3587 if (t
->level1
[index1
] == EMPTY
)
3589 if (t
->level2_size
== t
->level2_alloc
)
3591 size_t alloc
= 2 * t
->level2_alloc
+ 1;
3592 t
->level2
= (uint32_t *) xrealloc ((char *) t
->level2
,
3593 (alloc
<< t
->q
) * sizeof (uint32_t));
3594 t
->level2_alloc
= alloc
;
3596 i1
= t
->level2_size
<< t
->q
;
3597 i2
= (t
->level2_size
+ 1) << t
->q
;
3598 for (i
= i1
; i
< i2
; i
++)
3599 t
->level2
[i
] = EMPTY
;
3600 t
->level1
[index1
] = t
->level2_size
++;
3603 index2
+= t
->level1
[index1
] << t
->q
;
3605 if (t
->level2
[index2
] == EMPTY
)
3607 if (t
->level3_size
== t
->level3_alloc
)
3609 size_t alloc
= 2 * t
->level3_alloc
+ 1;
3610 t
->level3
= (uint32_t *) xrealloc ((char *) t
->level3
,
3611 (alloc
<< t
->p
) * sizeof (uint32_t));
3612 t
->level3_alloc
= alloc
;
3614 i1
= t
->level3_size
<< t
->p
;
3615 i2
= (t
->level3_size
+ 1) << t
->p
;
3616 for (i
= i1
; i
< i2
; i
++)
3618 t
->level2
[index2
] = t
->level3_size
++;
3621 index3
+= t
->level2
[index2
] << t
->p
;
3623 t
->level3
[index3
] |= (uint32_t)1 << index4
;
3626 /* Finalize and shrink. */
3628 wctype_table_finalize (struct wctype_table
*t
)
3631 uint32_t reorder3
[t
->level3_size
];
3632 uint32_t reorder2
[t
->level2_size
];
3633 uint32_t level1_offset
, level2_offset
, level3_offset
;
3635 /* Uniquify level3 blocks. */
3637 for (j
= 0; j
< t
->level3_size
; j
++)
3639 for (i
= 0; i
< k
; i
++)
3640 if (memcmp (&t
->level3
[i
<< t
->p
], &t
->level3
[j
<< t
->p
],
3641 (1 << t
->p
) * sizeof (uint32_t)) == 0)
3643 /* Relocate block j to block i. */
3648 memcpy (&t
->level3
[i
<< t
->p
], &t
->level3
[j
<< t
->p
],
3649 (1 << t
->p
) * sizeof (uint32_t));
3655 for (i
= 0; i
< (t
->level2_size
<< t
->q
); i
++)
3656 if (t
->level2
[i
] != EMPTY
)
3657 t
->level2
[i
] = reorder3
[t
->level2
[i
]];
3659 /* Uniquify level2 blocks. */
3661 for (j
= 0; j
< t
->level2_size
; j
++)
3663 for (i
= 0; i
< k
; i
++)
3664 if (memcmp (&t
->level2
[i
<< t
->q
], &t
->level2
[j
<< t
->q
],
3665 (1 << t
->q
) * sizeof (uint32_t)) == 0)
3667 /* Relocate block j to block i. */
3672 memcpy (&t
->level2
[i
<< t
->q
], &t
->level2
[j
<< t
->q
],
3673 (1 << t
->q
) * sizeof (uint32_t));
3679 for (i
= 0; i
< t
->level1_size
; i
++)
3680 if (t
->level1
[i
] != EMPTY
)
3681 t
->level1
[i
] = reorder2
[t
->level1
[i
]];
3683 /* Create and fill the resulting compressed representation. */
3685 5 * sizeof (uint32_t)
3686 + t
->level1_size
* sizeof (uint32_t)
3687 + (t
->level2_size
<< t
->q
) * sizeof (uint32_t)
3688 + (t
->level3_size
<< t
->p
) * sizeof (uint32_t);
3689 t
->result
= (char *) xmalloc (t
->result_size
);
3692 5 * sizeof (uint32_t);
3694 5 * sizeof (uint32_t)
3695 + t
->level1_size
* sizeof (uint32_t);
3697 5 * sizeof (uint32_t)
3698 + t
->level1_size
* sizeof (uint32_t)
3699 + (t
->level2_size
<< t
->q
) * sizeof (uint32_t);
3701 ((uint32_t *) t
->result
)[0] = t
->q
+ t
->p
+ 5;
3702 ((uint32_t *) t
->result
)[1] = t
->level1_size
;
3703 ((uint32_t *) t
->result
)[2] = t
->p
+ 5;
3704 ((uint32_t *) t
->result
)[3] = (1 << t
->q
) - 1;
3705 ((uint32_t *) t
->result
)[4] = (1 << t
->p
) - 1;
3707 for (i
= 0; i
< t
->level1_size
; i
++)
3708 ((uint32_t *) (t
->result
+ level1_offset
))[i
] =
3709 (t
->level1
[i
] == EMPTY
3711 : (t
->level1
[i
] << t
->q
) * sizeof (uint32_t) + level2_offset
);
3713 for (i
= 0; i
< (t
->level2_size
<< t
->q
); i
++)
3714 ((uint32_t *) (t
->result
+ level2_offset
))[i
] =
3715 (t
->level2
[i
] == EMPTY
3717 : (t
->level2
[i
] << t
->p
) * sizeof (uint32_t) + level3_offset
);
3719 for (i
= 0; i
< (t
->level3_size
<< t
->p
); i
++)
3720 ((uint32_t *) (t
->result
+ level3_offset
))[i
] = t
->level3
[i
];
3722 if (t
->level1_alloc
> 0)
3724 if (t
->level2_alloc
> 0)
3726 if (t
->level3_alloc
> 0)
3730 #define TABLE wcwidth_table
3731 #define ELEMENT uint8_t
3732 #define DEFAULT 0xff
3735 #define TABLE wctrans_table
3736 #define ELEMENT int32_t
3738 #define wctrans_table_add wctrans_table_add_internal
3740 #undef wctrans_table_add
3741 /* The wctrans_table must actually store the difference between the
3742 desired result and the argument. */
3744 wctrans_table_add (struct wctrans_table
*t
, uint32_t wc
, uint32_t mapped_wc
)
3746 wctrans_table_add_internal (t
, wc
, mapped_wc
- wc
);
3750 /* Flattens the included transliterations into a translit list.
3751 Inserts them in the list at `cursor', and returns the new cursor. */
3752 static struct translit_t
**
3753 translit_flatten (struct locale_ctype_t
*ctype
,
3754 const struct charmap_t
*charmap
,
3755 struct translit_t
**cursor
)
3757 while (ctype
->translit_include
!= NULL
)
3759 const char *copy_locale
= ctype
->translit_include
->copy_locale
;
3760 const char *copy_repertoire
= ctype
->translit_include
->copy_repertoire
;
3761 struct localedef_t
*other
;
3763 /* Unchain the include statement. During the depth-first traversal
3764 we don't want to visit any locale more than once. */
3765 ctype
->translit_include
= ctype
->translit_include
->next
;
3767 other
= find_locale (LC_CTYPE
, copy_locale
, copy_repertoire
, charmap
);
3771 WITH_CUR_LOCALE (error (0, 0, _("\
3772 %s: transliteration data from locale `%s' not available"),
3773 "LC_CTYPE", copy_locale
));
3777 struct locale_ctype_t
*other_ctype
=
3778 other
->categories
[LC_CTYPE
].ctype
;
3780 cursor
= translit_flatten (other_ctype
, charmap
, cursor
);
3781 assert (other_ctype
->translit_include
== NULL
);
3783 if (other_ctype
->translit
!= NULL
)
3785 /* Insert the other_ctype->translit list at *cursor. */
3786 struct translit_t
*endp
= other_ctype
->translit
;
3787 while (endp
->next
!= NULL
)
3790 endp
->next
= *cursor
;
3791 *cursor
= other_ctype
->translit
;
3793 /* Avoid any risk of circular lists. */
3794 other_ctype
->translit
= NULL
;
3796 cursor
= &endp
->next
;
3799 if (ctype
->default_missing
== NULL
)
3800 ctype
->default_missing
= other_ctype
->default_missing
;
3808 allocate_arrays (struct locale_ctype_t
*ctype
, const struct charmap_t
*charmap
,
3809 struct repertoire_t
*repertoire
)
3817 /* You wonder about this amount of memory? This is only because some
3818 users do not manage to address the array with unsigned values or
3819 data types with range >= 256. '\200' would result in the array
3820 index -128. To help these poor people we duplicate the entries for
3821 128 up to 255 below the entry for \0. */
3822 ctype
->ctype_b
= (char_class_t
*) xcalloc (256 + 128, sizeof (char_class_t
));
3823 ctype
->ctype32_b
= (char_class32_t
*) xcalloc (256, sizeof (char_class32_t
));
3824 ctype
->class_b
= (uint32_t **)
3825 xmalloc (ctype
->nr_charclass
* sizeof (uint32_t *));
3826 ctype
->class_3level
= (struct iovec
*)
3827 xmalloc (ctype
->nr_charclass
* sizeof (struct iovec
));
3829 /* This is the array accessed using the multibyte string elements. */
3830 for (idx
= 0; idx
< 256; ++idx
)
3831 ctype
->ctype_b
[128 + idx
] = ctype
->class256_collection
[idx
];
3833 /* Mirror first 127 entries. We must take care that entry -1 is not
3834 mirrored because EOF == -1. */
3835 for (idx
= 0; idx
< 127; ++idx
)
3836 ctype
->ctype_b
[idx
] = ctype
->ctype_b
[256 + idx
];
3838 /* The 32 bit array contains all characters < 0x100. */
3839 for (idx
= 0; idx
< ctype
->class_collection_act
; ++idx
)
3840 if (ctype
->charnames
[idx
] < 0x100)
3841 ctype
->ctype32_b
[ctype
->charnames
[idx
]] = ctype
->class_collection
[idx
];
3843 for (nr
= 0; nr
< ctype
->nr_charclass
; nr
++)
3845 ctype
->class_b
[nr
] = (uint32_t *) xcalloc (256 / 32, sizeof (uint32_t));
3847 for (idx
= 0; idx
< 256; ++idx
)
3848 if (ctype
->class256_collection
[idx
] & _ISbit (nr
))
3849 ctype
->class_b
[nr
][idx
>> 5] |= (uint32_t)1 << (idx
& 0x1f);
3852 for (nr
= 0; nr
< ctype
->nr_charclass
; nr
++)
3854 struct wctype_table t
;
3856 t
.p
= 4; /* or: 5 */
3857 t
.q
= 7; /* or: 6 */
3858 wctype_table_init (&t
);
3860 for (idx
= 0; idx
< ctype
->class_collection_act
; ++idx
)
3861 if (ctype
->class_collection
[idx
] & _ISwbit (nr
))
3862 wctype_table_add (&t
, ctype
->charnames
[idx
]);
3864 wctype_table_finalize (&t
);
3867 WITH_CUR_LOCALE (fprintf (stderr
, _("\
3868 %s: table for class \"%s\": %lu bytes\n"),
3869 "LC_CTYPE", ctype
->classnames
[nr
],
3870 (unsigned long int) t
.result_size
));
3872 ctype
->class_3level
[nr
].iov_base
= t
.result
;
3873 ctype
->class_3level
[nr
].iov_len
= t
.result_size
;
3876 /* Room for table of mappings. */
3877 ctype
->map_b
= (uint32_t **) xmalloc (2 * sizeof (uint32_t *));
3878 ctype
->map32_b
= (uint32_t **) xmalloc (ctype
->map_collection_nr
3879 * sizeof (uint32_t *));
3880 ctype
->map_3level
= (struct iovec
*)
3881 xmalloc (ctype
->map_collection_nr
* sizeof (struct iovec
));
3883 /* Fill in all mappings. */
3884 for (idx
= 0; idx
< 2; ++idx
)
3888 /* Allocate table. */
3889 ctype
->map_b
[idx
] = (uint32_t *)
3890 xmalloc ((256 + 128) * sizeof (uint32_t));
3892 /* Copy values from collection. */
3893 for (idx2
= 0; idx2
< 256; ++idx2
)
3894 ctype
->map_b
[idx
][128 + idx2
] = ctype
->map256_collection
[idx
][idx2
];
3896 /* Mirror first 127 entries. We must take care not to map entry
3897 -1 because EOF == -1. */
3898 for (idx2
= 0; idx2
< 127; ++idx2
)
3899 ctype
->map_b
[idx
][idx2
] = ctype
->map_b
[idx
][256 + idx2
];
3901 /* EOF must map to EOF. */
3902 ctype
->map_b
[idx
][127] = EOF
;
3905 for (idx
= 0; idx
< ctype
->map_collection_nr
; ++idx
)
3909 /* Allocate table. */
3910 ctype
->map32_b
[idx
] = (uint32_t *) xmalloc (256 * sizeof (uint32_t));
3912 /* Copy values from collection. Default is identity mapping. */
3913 for (idx2
= 0; idx2
< 256; ++idx2
)
3914 ctype
->map32_b
[idx
][idx2
] =
3915 (ctype
->map_collection
[idx
][idx2
] != 0
3916 ? ctype
->map_collection
[idx
][idx2
]
3920 for (nr
= 0; nr
< ctype
->map_collection_nr
; nr
++)
3922 struct wctrans_table t
;
3926 wctrans_table_init (&t
);
3928 for (idx
= 0; idx
< ctype
->map_collection_act
[nr
]; ++idx
)
3929 if (ctype
->map_collection
[nr
][idx
] != 0)
3930 wctrans_table_add (&t
, ctype
->charnames
[idx
],
3931 ctype
->map_collection
[nr
][idx
]);
3933 wctrans_table_finalize (&t
);
3936 WITH_CUR_LOCALE (fprintf (stderr
, _("\
3937 %s: table for map \"%s\": %lu bytes\n"),
3938 "LC_CTYPE", ctype
->mapnames
[nr
],
3939 (unsigned long int) t
.result_size
));
3941 ctype
->map_3level
[nr
].iov_base
= t
.result
;
3942 ctype
->map_3level
[nr
].iov_len
= t
.result_size
;
3945 /* Extra array for class and map names. */
3946 ctype
->class_name_ptr
= (uint32_t *) xmalloc (ctype
->nr_charclass
3947 * sizeof (uint32_t));
3948 ctype
->map_name_ptr
= (uint32_t *) xmalloc (ctype
->map_collection_nr
3949 * sizeof (uint32_t));
3951 ctype
->class_offset
= _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1
);
3952 ctype
->map_offset
= ctype
->class_offset
+ ctype
->nr_charclass
;
3954 /* Array for width information. Because the expected widths are very
3955 small (never larger than 2) we use only one single byte. This
3957 We put only printable characters in the table. wcwidth is specified
3958 to return -1 for non-printable characters. Doing the check here
3959 saves a run-time check.
3960 But we put L'\0' in the table. This again saves a run-time check. */
3962 struct wcwidth_table t
;
3966 wcwidth_table_init (&t
);
3968 /* First set all the printable characters of the character set to
3969 the default width. */
3971 while (iterate_table (&charmap
->char_table
, &curs
, &key
, &len
, &vdata
) == 0)
3973 struct charseq
*data
= (struct charseq
*) vdata
;
3975 if (data
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
3976 data
->ucs4
= repertoire_find_value (ctype
->repertoire
,
3979 if (data
->ucs4
!= ILLEGAL_CHAR_VALUE
)
3981 uint32_t *class_bits
=
3982 find_idx (ctype
, &ctype
->class_collection
, NULL
,
3983 &ctype
->class_collection_act
, data
->ucs4
);
3985 if (class_bits
!= NULL
&& (*class_bits
& BITw (tok_print
)))
3986 wcwidth_table_add (&t
, data
->ucs4
, charmap
->width_default
);
3990 /* Now add the explicitly specified widths. */
3991 if (charmap
->width_rules
!= NULL
)
3995 for (cnt
= 0; cnt
< charmap
->nwidth_rules
; ++cnt
)
3997 unsigned char bytes
[charmap
->mb_cur_max
];
3998 int nbytes
= charmap
->width_rules
[cnt
].from
->nbytes
;
4000 /* We have the range of character for which the width is
4001 specified described using byte sequences of the multibyte
4002 charset. We have to convert this to UCS4 now. And we
4003 cannot simply convert the beginning and the end of the
4004 sequence, we have to iterate over the byte sequence and
4005 convert it for every single character. */
4006 memcpy (bytes
, charmap
->width_rules
[cnt
].from
->bytes
, nbytes
);
4008 while (nbytes
< charmap
->width_rules
[cnt
].to
->nbytes
4009 || memcmp (bytes
, charmap
->width_rules
[cnt
].to
->bytes
,
4012 /* Find the UCS value for `bytes'. */
4015 struct charseq
*seq
=
4016 charmap_find_symbol (charmap
, bytes
, nbytes
);
4019 wch
= ILLEGAL_CHAR_VALUE
;
4020 else if (seq
->ucs4
!= UNINITIALIZED_CHAR_VALUE
)
4023 wch
= repertoire_find_value (ctype
->repertoire
, seq
->name
,
4024 strlen (seq
->name
));
4026 if (wch
!= ILLEGAL_CHAR_VALUE
)
4028 /* Store the value. */
4029 uint32_t *class_bits
=
4030 find_idx (ctype
, &ctype
->class_collection
, NULL
,
4031 &ctype
->class_collection_act
, wch
);
4033 if (class_bits
!= NULL
&& (*class_bits
& BITw (tok_print
)))
4034 wcwidth_table_add (&t
, wch
,
4035 charmap
->width_rules
[cnt
].width
);
4038 /* "Increment" the bytes sequence. */
4040 while (inner
>= 0 && bytes
[inner
] == 0xff)
4045 /* We have to extend the byte sequence. */
4046 if (nbytes
>= charmap
->width_rules
[cnt
].to
->nbytes
)
4050 memset (&bytes
[1], 0, nbytes
);
4056 while (++inner
< nbytes
)
4063 /* Set the width of L'\0' to 0. */
4064 wcwidth_table_add (&t
, 0, 0);
4066 wcwidth_table_finalize (&t
);
4069 WITH_CUR_LOCALE (fprintf (stderr
, _("%s: table for width: %lu bytes\n"),
4070 "LC_CTYPE", (unsigned long int) t
.result_size
));
4072 ctype
->width
.iov_base
= t
.result
;
4073 ctype
->width
.iov_len
= t
.result_size
;
4076 /* Set MB_CUR_MAX. */
4077 ctype
->mb_cur_max
= charmap
->mb_cur_max
;
4079 /* Now determine the table for the transliteration information.
4081 XXX It is not yet clear to me whether it is worth implementing a
4082 complicated algorithm which uses a hash table to locate the entries.
4083 For now I'll use a simple array which can be searching using binary
4085 if (ctype
->translit_include
!= NULL
)
4086 /* Traverse the locales mentioned in the `include' statements in a
4087 depth-first way and fold in their transliteration information. */
4088 translit_flatten (ctype
, charmap
, &ctype
->translit
);
4090 if (ctype
->translit
!= NULL
)
4092 /* First count how many entries we have. This is the upper limit
4093 since some entries from the included files might be overwritten. */
4096 struct translit_t
*runp
= ctype
->translit
;
4097 struct translit_t
**sorted
;
4098 size_t from_len
, to_len
;
4100 while (runp
!= NULL
)
4106 /* Next we allocate an array large enough and fill in the values. */
4107 sorted
= (struct translit_t
**) alloca (number
4108 * sizeof (struct translit_t
**));
4109 runp
= ctype
->translit
;
4113 /* Search for the place where to insert this string.
4114 XXX Better use a real sorting algorithm later. */
4118 while (idx
< number
)
4120 int res
= wcscmp ((const wchar_t *) sorted
[idx
]->from
,
4121 (const wchar_t *) runp
->from
);
4136 memmove (&sorted
[idx
+ 1], &sorted
[idx
],
4137 (number
- idx
) * sizeof (struct translit_t
*));
4144 while (runp
!= NULL
);
4146 /* The next step is putting all the possible transliteration
4147 strings in one memory block so that we can write it out.
4148 We need several different blocks:
4149 - index to the from-string array
4151 - index to the to-string array
4154 from_len
= to_len
= 0;
4155 for (cnt
= 0; cnt
< number
; ++cnt
)
4157 struct translit_to_t
*srunp
;
4158 from_len
+= wcslen ((const wchar_t *) sorted
[cnt
]->from
) + 1;
4159 srunp
= sorted
[cnt
]->to
;
4160 while (srunp
!= NULL
)
4162 to_len
+= wcslen ((const wchar_t *) srunp
->str
) + 1;
4163 srunp
= srunp
->next
;
4165 /* Plus one for the extra NUL character marking the end of
4166 the list for the current entry. */
4170 /* We can allocate the arrays for the results. */
4171 ctype
->translit_from_idx
= xmalloc (number
* sizeof (uint32_t));
4172 ctype
->translit_from_tbl
= xmalloc (from_len
* sizeof (uint32_t));
4173 ctype
->translit_to_idx
= xmalloc (number
* sizeof (uint32_t));
4174 ctype
->translit_to_tbl
= xmalloc (to_len
* sizeof (uint32_t));
4178 for (cnt
= 0; cnt
< number
; ++cnt
)
4181 struct translit_to_t
*srunp
;
4183 ctype
->translit_from_idx
[cnt
] = from_len
;
4184 ctype
->translit_to_idx
[cnt
] = to_len
;
4186 len
= wcslen ((const wchar_t *) sorted
[cnt
]->from
) + 1;
4187 wmemcpy ((wchar_t *) &ctype
->translit_from_tbl
[from_len
],
4188 (const wchar_t *) sorted
[cnt
]->from
, len
);
4191 ctype
->translit_to_idx
[cnt
] = to_len
;
4192 srunp
= sorted
[cnt
]->to
;
4193 while (srunp
!= NULL
)
4195 len
= wcslen ((const wchar_t *) srunp
->str
) + 1;
4196 wmemcpy ((wchar_t *) &ctype
->translit_to_tbl
[to_len
],
4197 (const wchar_t *) srunp
->str
, len
);
4199 srunp
= srunp
->next
;
4201 ctype
->translit_to_tbl
[to_len
++] = L
'\0';
4204 /* Store the information about the length. */
4205 ctype
->translit_idx_size
= number
;
4206 ctype
->translit_from_tbl_size
= from_len
* sizeof (uint32_t);
4207 ctype
->translit_to_tbl_size
= to_len
* sizeof (uint32_t);
4211 /* Provide some dummy pointers since we have nothing to write out. */
4212 static uint32_t no_str
= { 0 };
4214 ctype
->translit_from_idx
= &no_str
;
4215 ctype
->translit_from_tbl
= &no_str
;
4216 ctype
->translit_to_tbl
= &no_str
;
4217 ctype
->translit_idx_size
= 0;
4218 ctype
->translit_from_tbl_size
= 0;
4219 ctype
->translit_to_tbl_size
= 0;