1 /* Copyright (C) 1995-2017 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, see <http://www.gnu.org/licenses/>. */
35 #include "localedef.h"
37 #include "localeinfo.h"
39 #include "linereader.h"
40 #include "locfile-token.h"
46 /* The bit used for representing a special class. */
47 #define BITPOS(class) ((class) - tok_upper)
48 #define BIT(class) (_ISbit (BITPOS (class)))
49 #define BITw(class) (_ISwbit (BITPOS (class)))
51 #define ELEM(ctype, collection, idx, value) \
52 *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \
53 &ctype->collection##_act idx, value)
56 /* To be compatible with former implementations we for now restrict
57 the number of bits for character classes to 16. When compatibility
58 is not necessary anymore increase the number to 32. */
59 #define char_class_t uint16_t
60 #define char_class32_t uint32_t
63 /* Type to describe a transliteration action. We have a possibly
64 multiple character from-string and a set of multiple character
65 to-strings. All are 32bit values since this is what is used in
66 the gconv functions. */
71 struct translit_to_t
*next
;
81 struct translit_to_t
*to
;
83 struct translit_t
*next
;
86 struct translit_ignore_t
95 struct translit_ignore_t
*next
;
99 /* Type to describe a transliteration include statement. */
100 struct translit_include_t
102 const char *copy_locale
;
103 const char *copy_repertoire
;
105 struct translit_include_t
*next
;
108 /* Provide some dummy pointer for empty string. */
109 static uint32_t no_str
[] = { 0 };
112 /* Sparse table of uint32_t. */
113 #define TABLE idx_table
114 #define ELEMENT uint32_t
115 #define DEFAULT ((uint32_t) ~0)
116 #define NO_ADD_LOCALE
119 #define TABLE wcwidth_table
120 #define ELEMENT uint8_t
124 #define TABLE wctrans_table
125 #define ELEMENT int32_t
127 #define wctrans_table_add wctrans_table_add_internal
129 #undef wctrans_table_add
130 /* The wctrans_table must actually store the difference between the
131 desired result and the argument. */
133 wctrans_table_add (struct wctrans_table
*t
, uint32_t wc
, uint32_t mapped_wc
)
135 wctrans_table_add_internal (t
, wc
, mapped_wc
- wc
);
138 /* Construction of sparse 3-level tables.
139 See wchar-lookup.h for their structure and the meaning of p and q. */
146 /* Working representation. */
159 static void add_locale_wctype_table (struct locale_file
*file
,
160 struct wctype_table
*t
);
162 /* The real definition of the struct for the LC_CTYPE locale. */
163 struct locale_ctype_t
166 size_t charnames_max
;
167 size_t charnames_act
;
168 /* An index lookup table, to speedup find_idx. */
169 struct idx_table charnames_idx
;
171 struct repertoire_t
*repertoire
;
173 /* We will allow up to 8 * sizeof (uint32_t) character classes. */
174 #define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
176 const char *classnames
[MAX_NR_CHARCLASS
];
177 uint32_t last_class_char
;
178 uint32_t class256_collection
[256];
179 uint32_t *class_collection
;
180 size_t class_collection_max
;
181 size_t class_collection_act
;
183 uint32_t class_offset
;
185 struct charseq
**mbdigits
;
192 struct charseq
*mboutdigits
[10];
193 uint32_t wcoutdigits
[10];
194 size_t outdigits_act
;
196 /* If the following number ever turns out to be too small simply
197 increase it. But I doubt it will. --drepper@gnu */
198 #define MAX_NR_CHARMAP 16
199 const char *mapnames
[MAX_NR_CHARMAP
];
200 uint32_t *map_collection
[MAX_NR_CHARMAP
];
201 uint32_t map256_collection
[2][256];
202 size_t map_collection_max
[MAX_NR_CHARMAP
];
203 size_t map_collection_act
[MAX_NR_CHARMAP
];
204 size_t map_collection_nr
;
206 int tomap_done
[MAX_NR_CHARMAP
];
209 /* Transliteration information. */
210 struct translit_include_t
*translit_include
;
211 struct translit_t
*translit
;
212 struct translit_ignore_t
*translit_ignore
;
213 uint32_t ntranslit_ignore
;
215 uint32_t *default_missing
;
216 const char *default_missing_file
;
217 size_t default_missing_lineno
;
219 uint32_t to_nonascii
;
220 uint32_t nonascii_case
;
222 /* The arrays for the binary representation. */
223 char_class_t
*ctype_b
;
224 char_class32_t
*ctype32_b
;
228 struct wctype_table
*class_3level
;
229 struct wctrans_table
*map_3level
;
230 uint32_t *class_name_ptr
;
231 uint32_t *map_name_ptr
;
232 struct wcwidth_table width
;
234 const char *codeset_name
;
235 uint32_t *translit_from_idx
;
236 uint32_t *translit_from_tbl
;
237 uint32_t *translit_to_idx
;
238 uint32_t *translit_to_tbl
;
239 uint32_t translit_idx_size
;
240 size_t translit_from_tbl_size
;
241 size_t translit_to_tbl_size
;
243 struct obstack mempool
;
247 /* Marker for an empty slot. This has the value 0xFFFFFFFF, regardless
248 whether 'int' is 16 bit, 32 bit, or 64 bit. */
249 #define EMPTY ((uint32_t) ~0)
252 #define obstack_chunk_alloc xmalloc
253 #define obstack_chunk_free free
256 /* Prototypes for local functions. */
257 static void ctype_startup (struct linereader
*lr
, struct localedef_t
*locale
,
258 const struct charmap_t
*charmap
,
259 struct localedef_t
*copy_locale
,
261 static void ctype_class_new (struct linereader
*lr
,
262 struct locale_ctype_t
*ctype
, const char *name
);
263 static void ctype_map_new (struct linereader
*lr
,
264 struct locale_ctype_t
*ctype
,
265 const char *name
, const struct charmap_t
*charmap
);
266 static uint32_t *find_idx (struct locale_ctype_t
*ctype
, uint32_t **table
,
267 size_t *max
, size_t *act
, uint32_t idx
);
268 static void set_class_defaults (struct locale_ctype_t
*ctype
,
269 const struct charmap_t
*charmap
,
270 struct repertoire_t
*repertoire
);
271 static void allocate_arrays (struct locale_ctype_t
*ctype
,
272 const struct charmap_t
*charmap
,
273 struct repertoire_t
*repertoire
);
276 static const char *longnames
[] =
278 "zero", "one", "two", "three", "four",
279 "five", "six", "seven", "eight", "nine"
281 static const char *uninames
[] =
283 "U00000030", "U00000031", "U00000032", "U00000033", "U00000034",
284 "U00000035", "U00000036", "U00000037", "U00000038", "U00000039"
286 static const unsigned char digits
[] = "0123456789";
290 ctype_startup (struct linereader
*lr
, struct localedef_t
*locale
,
291 const struct charmap_t
*charmap
,
292 struct localedef_t
*copy_locale
, int ignore_content
)
295 struct locale_ctype_t
*ctype
;
297 if (!ignore_content
&& locale
->categories
[LC_CTYPE
].ctype
== NULL
)
299 if (copy_locale
== NULL
)
301 /* Allocate the needed room. */
302 locale
->categories
[LC_CTYPE
].ctype
= ctype
=
303 (struct locale_ctype_t
*) xcalloc (1,
304 sizeof (struct locale_ctype_t
));
306 /* We have seen no names yet. */
307 ctype
->charnames_max
= charmap
->mb_cur_max
== 1 ? 256 : 512;
308 ctype
->charnames
= (uint32_t *) xmalloc (ctype
->charnames_max
309 * sizeof (uint32_t));
310 for (cnt
= 0; cnt
< 256; ++cnt
)
311 ctype
->charnames
[cnt
] = cnt
;
312 ctype
->charnames_act
= 256;
313 idx_table_init (&ctype
->charnames_idx
);
315 /* Fill character class information. */
316 ctype
->last_class_char
= ILLEGAL_CHAR_VALUE
;
317 /* The order of the following instructions determines the bit
319 ctype_class_new (lr
, ctype
, "upper");
320 ctype_class_new (lr
, ctype
, "lower");
321 ctype_class_new (lr
, ctype
, "alpha");
322 ctype_class_new (lr
, ctype
, "digit");
323 ctype_class_new (lr
, ctype
, "xdigit");
324 ctype_class_new (lr
, ctype
, "space");
325 ctype_class_new (lr
, ctype
, "print");
326 ctype_class_new (lr
, ctype
, "graph");
327 ctype_class_new (lr
, ctype
, "blank");
328 ctype_class_new (lr
, ctype
, "cntrl");
329 ctype_class_new (lr
, ctype
, "punct");
330 ctype_class_new (lr
, ctype
, "alnum");
332 ctype
->class_collection_max
= charmap
->mb_cur_max
== 1 ? 256 : 512;
333 ctype
->class_collection
334 = (uint32_t *) xcalloc (sizeof (unsigned long int),
335 ctype
->class_collection_max
);
336 ctype
->class_collection_act
= 256;
338 /* Fill character map information. */
339 ctype
->last_map_idx
= MAX_NR_CHARMAP
;
340 ctype_map_new (lr
, ctype
, "toupper", charmap
);
341 ctype_map_new (lr
, ctype
, "tolower", charmap
);
343 /* Fill first 256 entries in `toXXX' arrays. */
344 for (cnt
= 0; cnt
< 256; ++cnt
)
346 ctype
->map_collection
[0][cnt
] = cnt
;
347 ctype
->map_collection
[1][cnt
] = cnt
;
349 ctype
->map256_collection
[0][cnt
] = cnt
;
350 ctype
->map256_collection
[1][cnt
] = cnt
;
353 if (enc_not_ascii_compatible
)
354 ctype
->to_nonascii
= 1;
356 obstack_init (&ctype
->mempool
);
359 ctype
= locale
->categories
[LC_CTYPE
].ctype
=
360 copy_locale
->categories
[LC_CTYPE
].ctype
;
366 ctype_finish (struct localedef_t
*locale
, const struct charmap_t
*charmap
)
368 /* See POSIX.2, table 2-6 for the meaning of the following table. */
373 const char allow
[NCLASS
];
375 valid_table
[NCLASS
] =
377 /* The order is important. See token.h for more information.
378 M = Always, D = Default, - = Permitted, X = Mutually exclusive */
379 { "upper", "--MX-XDDXXX-" },
380 { "lower", "--MX-XDDXXX-" },
381 { "alpha", "---X-XDDXXX-" },
382 { "digit", "XXX--XDDXXX-" },
383 { "xdigit", "-----XDDXXX-" },
384 { "space", "XXXXX------X" },
385 { "print", "---------X--" },
386 { "graph", "---------X--" },
387 { "blank", "XXXXXM-----X" },
388 { "cntrl", "XXXXX-XX--XX" },
389 { "punct", "XXXXX-DD-X-X" },
390 { "alnum", "-----XDDXXX-" }
394 uint32_t space_value
;
395 struct charseq
*space_seq
;
396 struct locale_ctype_t
*ctype
= locale
->categories
[LC_CTYPE
].ctype
;
403 /* Now resolve copying and also handle completely missing definitions. */
406 const char *repertoire_name
;
408 /* First see whether we were supposed to copy. If yes, find the
409 actual definition. */
410 if (locale
->copy_name
[LC_CTYPE
] != NULL
)
412 /* Find the copying locale. This has to happen transitively since
413 the locale we are copying from might also copying another one. */
414 struct localedef_t
*from
= locale
;
417 from
= find_locale (LC_CTYPE
, from
->copy_name
[LC_CTYPE
],
418 from
->repertoire_name
, charmap
);
419 while (from
->categories
[LC_CTYPE
].ctype
== NULL
420 && from
->copy_name
[LC_CTYPE
] != NULL
);
422 ctype
= locale
->categories
[LC_CTYPE
].ctype
423 = from
->categories
[LC_CTYPE
].ctype
;
426 /* If there is still no definition issue an warning and create an
431 WITH_CUR_LOCALE (error (0, 0, _("\
432 No definition for %s category found"), "LC_CTYPE"));
433 ctype_startup (NULL
, locale
, charmap
, NULL
, 0);
434 ctype
= locale
->categories
[LC_CTYPE
].ctype
;
437 /* Get the repertoire we have to use. */
438 repertoire_name
= locale
->repertoire_name
?: repertoire_global
;
439 if (repertoire_name
!= NULL
)
440 ctype
->repertoire
= repertoire_read (repertoire_name
);
443 /* We need the name of the currently used 8-bit character set to
444 make correct conversion between this 8-bit representation and the
445 ISO 10646 character set used internally for wide characters. */
446 ctype
->codeset_name
= charmap
->code_set_name
;
447 if (ctype
->codeset_name
== NULL
)
450 WITH_CUR_LOCALE (error (0, 0, _("\
451 No character set name specified in charmap")));
452 ctype
->codeset_name
= "//UNKNOWN//";
455 /* Set default value for classes not specified. */
456 set_class_defaults (ctype
, charmap
, ctype
->repertoire
);
458 /* Check according to table. */
459 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
461 uint32_t tmp
= ctype
->class_collection
[cnt
];
465 for (cls1
= 0; cls1
< NCLASS
; ++cls1
)
466 if ((tmp
& _ISwbit (cls1
)) != 0)
467 for (cls2
= 0; cls2
< NCLASS
; ++cls2
)
468 if (valid_table
[cls1
].allow
[cls2
] != '-')
470 int eq
= (tmp
& _ISwbit (cls2
)) != 0;
471 switch (valid_table
[cls1
].allow
[cls2
])
476 uint32_t value
= ctype
->charnames
[cnt
];
479 WITH_CUR_LOCALE (error (0, 0, _("\
480 character L'\\u%0*x' in class `%s' must be in class `%s'"),
481 value
> 0xffff ? 8 : 4,
483 valid_table
[cls1
].name
,
484 valid_table
[cls2
].name
));
491 uint32_t value
= ctype
->charnames
[cnt
];
494 WITH_CUR_LOCALE (error (0, 0, _("\
495 character L'\\u%0*x' in class `%s' must not be in class `%s'"),
496 value
> 0xffff ? 8 : 4,
498 valid_table
[cls1
].name
,
499 valid_table
[cls2
].name
));
504 ctype
->class_collection
[cnt
] |= _ISwbit (cls2
);
508 WITH_CUR_LOCALE (error (5, 0, _("\
509 internal error in %s, line %u"), __FUNCTION__
, __LINE__
));
515 for (cnt
= 0; cnt
< 256; ++cnt
)
517 uint32_t tmp
= ctype
->class256_collection
[cnt
];
521 for (cls1
= 0; cls1
< NCLASS
; ++cls1
)
522 if ((tmp
& _ISbit (cls1
)) != 0)
523 for (cls2
= 0; cls2
< NCLASS
; ++cls2
)
524 if (valid_table
[cls1
].allow
[cls2
] != '-')
526 int eq
= (tmp
& _ISbit (cls2
)) != 0;
527 switch (valid_table
[cls1
].allow
[cls2
])
534 snprintf (buf
, sizeof buf
, "\\%Zo", cnt
);
537 WITH_CUR_LOCALE (error (0, 0, _("\
538 character '%s' in class `%s' must be in class `%s'"),
540 valid_table
[cls1
].name
,
541 valid_table
[cls2
].name
));
550 snprintf (buf
, sizeof buf
, "\\%Zo", cnt
);
553 WITH_CUR_LOCALE (error (0, 0, _("\
554 character '%s' in class `%s' must not be in class `%s'"),
556 valid_table
[cls1
].name
,
557 valid_table
[cls2
].name
));
562 ctype
->class256_collection
[cnt
] |= _ISbit (cls2
);
566 WITH_CUR_LOCALE (error (5, 0, _("\
567 internal error in %s, line %u"), __FUNCTION__
, __LINE__
));
573 /* ... and now test <SP> as a special case. */
575 if (((cnt
= BITPOS (tok_space
),
576 (ELEM (ctype
, class_collection
, , space_value
)
577 & BITw (tok_space
)) == 0)
578 || (cnt
= BITPOS (tok_blank
),
579 (ELEM (ctype
, class_collection
, , space_value
)
580 & BITw (tok_blank
)) == 0)))
583 WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"),
584 valid_table
[cnt
].name
));
586 else if (((cnt
= BITPOS (tok_punct
),
587 (ELEM (ctype
, class_collection
, , space_value
)
588 & BITw (tok_punct
)) != 0)
589 || (cnt
= BITPOS (tok_graph
),
590 (ELEM (ctype
, class_collection
, , space_value
)
595 WITH_CUR_LOCALE (error (0, 0, _("\
596 <SP> character must not be in class `%s'"),
597 valid_table
[cnt
].name
));
600 ELEM (ctype
, class_collection
, , space_value
) |= BITw (tok_print
);
602 space_seq
= charmap_find_value (charmap
, "SP", 2);
603 if (space_seq
== NULL
)
604 space_seq
= charmap_find_value (charmap
, "space", 5);
605 if (space_seq
== NULL
)
606 space_seq
= charmap_find_value (charmap
, "U00000020", 9);
607 if (space_seq
== NULL
|| space_seq
->nbytes
!= 1)
610 WITH_CUR_LOCALE (error (0, 0, _("\
611 character <SP> not defined in character map")));
613 else if (((cnt
= BITPOS (tok_space
),
614 (ctype
->class256_collection
[space_seq
->bytes
[0]]
615 & BIT (tok_space
)) == 0)
616 || (cnt
= BITPOS (tok_blank
),
617 (ctype
->class256_collection
[space_seq
->bytes
[0]]
618 & BIT (tok_blank
)) == 0)))
621 WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"),
622 valid_table
[cnt
].name
));
624 else if (((cnt
= BITPOS (tok_punct
),
625 (ctype
->class256_collection
[space_seq
->bytes
[0]]
626 & BIT (tok_punct
)) != 0)
627 || (cnt
= BITPOS (tok_graph
),
628 (ctype
->class256_collection
[space_seq
->bytes
[0]]
629 & BIT (tok_graph
)) != 0)))
632 WITH_CUR_LOCALE (error (0, 0, _("\
633 <SP> character must not be in class `%s'"),
634 valid_table
[cnt
].name
));
637 ctype
->class256_collection
[space_seq
->bytes
[0]] |= BIT (tok_print
);
639 /* Check whether all single-byte characters make to their upper/lowercase
640 equivalent according to the ASCII rules. */
641 for (cnt
= 'A'; cnt
<= 'Z'; ++cnt
)
643 uint32_t uppval
= ctype
->map256_collection
[0][cnt
];
644 uint32_t lowval
= ctype
->map256_collection
[1][cnt
];
645 uint32_t lowuppval
= ctype
->map256_collection
[0][lowval
];
646 uint32_t lowlowval
= ctype
->map256_collection
[1][lowval
];
649 || lowval
!= cnt
+ 0x20
651 || lowlowval
!= cnt
+ 0x20)
652 ctype
->nonascii_case
= 1;
654 for (cnt
= 0; cnt
< 256; ++cnt
)
655 if (cnt
< 'A' || (cnt
> 'Z' && cnt
< 'a') || cnt
> 'z')
656 if (ctype
->map256_collection
[0][cnt
] != cnt
657 || ctype
->map256_collection
[1][cnt
] != cnt
)
658 ctype
->nonascii_case
= 1;
660 /* Now that the tests are done make sure the name array contains all
661 characters which are handled in the WIDTH section of the
662 character set definition file. */
663 if (charmap
->width_rules
!= NULL
)
664 for (cnt
= 0; cnt
< charmap
->nwidth_rules
; ++cnt
)
666 unsigned char bytes
[charmap
->mb_cur_max
];
667 int nbytes
= charmap
->width_rules
[cnt
].from
->nbytes
;
669 /* We have the range of character for which the width is
670 specified described using byte sequences of the multibyte
671 charset. We have to convert this to UCS4 now. And we
672 cannot simply convert the beginning and the end of the
673 sequence, we have to iterate over the byte sequence and
674 convert it for every single character. */
675 memcpy (bytes
, charmap
->width_rules
[cnt
].from
->bytes
, nbytes
);
677 while (nbytes
< charmap
->width_rules
[cnt
].to
->nbytes
678 || memcmp (bytes
, charmap
->width_rules
[cnt
].to
->bytes
,
681 /* Find the UCS value for `bytes'. */
685 = charmap_find_symbol (charmap
, (char *) bytes
, nbytes
);
688 wch
= ILLEGAL_CHAR_VALUE
;
689 else if (seq
->ucs4
!= UNINITIALIZED_CHAR_VALUE
)
692 wch
= repertoire_find_value (ctype
->repertoire
, seq
->name
,
695 if (wch
!= ILLEGAL_CHAR_VALUE
)
696 /* We are only interested in the side-effects of the
697 `find_idx' call. It will add appropriate entries in
698 the name array if this is necessary. */
699 (void) find_idx (ctype
, NULL
, NULL
, NULL
, wch
);
701 /* "Increment" the bytes sequence. */
703 while (inner
>= 0 && bytes
[inner
] == 0xff)
708 /* We have to extend the byte sequence. */
709 if (nbytes
>= charmap
->width_rules
[cnt
].to
->nbytes
)
713 memset (&bytes
[1], 0, nbytes
);
719 while (++inner
< nbytes
)
725 /* Now set all the other characters of the character set to the
728 while (iterate_table (&charmap
->char_table
, &curs
, &key
, &len
, &vdata
) == 0)
730 struct charseq
*data
= (struct charseq
*) vdata
;
732 if (data
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
733 data
->ucs4
= repertoire_find_value (ctype
->repertoire
,
736 if (data
->ucs4
!= ILLEGAL_CHAR_VALUE
)
737 (void) find_idx (ctype
, NULL
, NULL
, NULL
, data
->ucs4
);
740 /* There must be a multiple of 10 digits. */
741 if (ctype
->mbdigits_act
% 10 != 0)
743 assert (ctype
->mbdigits_act
== ctype
->wcdigits_act
);
744 ctype
->wcdigits_act
-= ctype
->mbdigits_act
% 10;
745 ctype
->mbdigits_act
-= ctype
->mbdigits_act
% 10;
746 WITH_CUR_LOCALE (error (0, 0, _("\
747 `digit' category has not entries in groups of ten")));
750 /* Check the input digits. There must be a multiple of ten available.
751 In each group it could be that one or the other character is missing.
752 In this case the whole group must be removed. */
754 while (cnt
< ctype
->mbdigits_act
)
757 for (inner
= 0; inner
< 10; ++inner
)
758 if (ctype
->mbdigits
[cnt
+ inner
] == NULL
)
765 /* Remove the group. */
766 memmove (&ctype
->mbdigits
[cnt
], &ctype
->mbdigits
[cnt
+ 10],
767 ((ctype
->wcdigits_act
- cnt
- 10)
768 * sizeof (ctype
->mbdigits
[0])));
769 ctype
->mbdigits_act
-= 10;
773 /* If no input digits are given use the default. */
774 if (ctype
->mbdigits_act
== 0)
776 if (ctype
->mbdigits_max
== 0)
778 ctype
->mbdigits
= obstack_alloc (&((struct charmap_t
*) charmap
)->mem_pool
,
779 10 * sizeof (struct charseq
*));
780 ctype
->mbdigits_max
= 10;
783 for (cnt
= 0; cnt
< 10; ++cnt
)
785 ctype
->mbdigits
[cnt
] = charmap_find_symbol (charmap
,
786 (char *) digits
+ cnt
, 1);
787 if (ctype
->mbdigits
[cnt
] == NULL
)
789 ctype
->mbdigits
[cnt
] = charmap_find_symbol (charmap
,
791 strlen (longnames
[cnt
]));
792 if (ctype
->mbdigits
[cnt
] == NULL
)
794 /* Hum, this ain't good. */
795 WITH_CUR_LOCALE (error (0, 0, _("\
796 no input digits defined and none of the standard names in the charmap")));
798 ctype
->mbdigits
[cnt
] = obstack_alloc (&((struct charmap_t
*) charmap
)->mem_pool
,
799 sizeof (struct charseq
) + 1);
801 /* This is better than nothing. */
802 ctype
->mbdigits
[cnt
]->bytes
[0] = digits
[cnt
];
803 ctype
->mbdigits
[cnt
]->nbytes
= 1;
808 ctype
->mbdigits_act
= 10;
811 /* Check the wide character input digits. There must be a multiple
812 of ten available. In each group it could be that one or the other
813 character is missing. In this case the whole group must be
816 while (cnt
< ctype
->wcdigits_act
)
819 for (inner
= 0; inner
< 10; ++inner
)
820 if (ctype
->wcdigits
[cnt
+ inner
] == ILLEGAL_CHAR_VALUE
)
827 /* Remove the group. */
828 memmove (&ctype
->wcdigits
[cnt
], &ctype
->wcdigits
[cnt
+ 10],
829 ((ctype
->wcdigits_act
- cnt
- 10)
830 * sizeof (ctype
->wcdigits
[0])));
831 ctype
->wcdigits_act
-= 10;
835 /* If no input digits are given use the default. */
836 if (ctype
->wcdigits_act
== 0)
838 if (ctype
->wcdigits_max
== 0)
840 ctype
->wcdigits
= obstack_alloc (&((struct charmap_t
*) charmap
)->mem_pool
,
841 10 * sizeof (uint32_t));
842 ctype
->wcdigits_max
= 10;
845 for (cnt
= 0; cnt
< 10; ++cnt
)
846 ctype
->wcdigits
[cnt
] = L
'0' + cnt
;
848 ctype
->mbdigits_act
= 10;
851 /* Check the outdigits. */
853 for (cnt
= 0; cnt
< 10; ++cnt
)
854 if (ctype
->mboutdigits
[cnt
] == NULL
)
856 static struct charseq replace
[2];
860 WITH_CUR_LOCALE (error (0, 0, _("\
861 not all characters used in `outdigit' are available in the charmap")));
865 replace
[0].nbytes
= 1;
866 replace
[0].bytes
[0] = '?';
867 replace
[0].bytes
[1] = '\0';
868 ctype
->mboutdigits
[cnt
] = &replace
[0];
872 for (cnt
= 0; cnt
< 10; ++cnt
)
873 if (ctype
->wcoutdigits
[cnt
] == 0)
877 WITH_CUR_LOCALE (error (0, 0, _("\
878 not all characters used in `outdigit' are available in the repertoire")));
882 ctype
->wcoutdigits
[cnt
] = L
'?';
885 /* Sort the entries in the translit_ignore list. */
886 if (ctype
->translit_ignore
!= NULL
)
888 struct translit_ignore_t
*firstp
= ctype
->translit_ignore
;
889 struct translit_ignore_t
*runp
;
891 ctype
->ntranslit_ignore
= 1;
893 for (runp
= firstp
->next
; runp
!= NULL
; runp
= runp
->next
)
895 struct translit_ignore_t
*lastp
= NULL
;
896 struct translit_ignore_t
*cmpp
;
898 ++ctype
->ntranslit_ignore
;
900 for (cmpp
= firstp
; cmpp
!= NULL
; lastp
= cmpp
, cmpp
= cmpp
->next
)
901 if (runp
->from
< cmpp
->from
)
909 ctype
->translit_ignore
= firstp
;
915 ctype_output (struct localedef_t
*locale
, const struct charmap_t
*charmap
,
916 const char *output_path
)
918 struct locale_ctype_t
*ctype
= locale
->categories
[LC_CTYPE
].ctype
;
919 const size_t nelems
= (_NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1
)
920 + ctype
->nr_charclass
+ ctype
->map_collection_nr
);
921 struct locale_file file
;
922 uint32_t default_missing_len
;
925 /* Now prepare the output: Find the sizes of the table we can use. */
926 allocate_arrays (ctype
, charmap
, ctype
->repertoire
);
928 default_missing_len
= (ctype
->default_missing
929 ? wcslen ((wchar_t *) ctype
->default_missing
)
932 init_locale_data (&file
, nelems
);
933 for (elem
= 0; elem
< nelems
; ++elem
)
935 if (elem
< _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1
))
938 #define CTYPE_EMPTY(name) \
940 add_locale_empty (&file); \
943 CTYPE_EMPTY(_NL_CTYPE_GAP1
);
944 CTYPE_EMPTY(_NL_CTYPE_GAP2
);
945 CTYPE_EMPTY(_NL_CTYPE_GAP3
);
946 CTYPE_EMPTY(_NL_CTYPE_GAP4
);
947 CTYPE_EMPTY(_NL_CTYPE_GAP5
);
948 CTYPE_EMPTY(_NL_CTYPE_GAP6
);
950 #define CTYPE_RAW_DATA(name, base, size) \
951 case _NL_ITEM_INDEX (name): \
952 add_locale_raw_data (&file, base, size); \
955 CTYPE_RAW_DATA (_NL_CTYPE_CLASS
,
957 (256 + 128) * sizeof (char_class_t
));
959 #define CTYPE_UINT32_ARRAY(name, base, n_elems) \
960 case _NL_ITEM_INDEX (name): \
961 add_locale_uint32_array (&file, base, n_elems); \
964 CTYPE_UINT32_ARRAY (_NL_CTYPE_TOUPPER
, ctype
->map_b
[0], 256 + 128);
965 CTYPE_UINT32_ARRAY (_NL_CTYPE_TOLOWER
, ctype
->map_b
[1], 256 + 128);
966 CTYPE_UINT32_ARRAY (_NL_CTYPE_TOUPPER32
, ctype
->map32_b
[0], 256);
967 CTYPE_UINT32_ARRAY (_NL_CTYPE_TOLOWER32
, ctype
->map32_b
[1], 256);
968 CTYPE_RAW_DATA (_NL_CTYPE_CLASS32
,
970 256 * sizeof (char_class32_t
));
972 #define CTYPE_UINT32(name, value) \
973 case _NL_ITEM_INDEX (name): \
974 add_locale_uint32 (&file, value); \
977 CTYPE_UINT32 (_NL_CTYPE_CLASS_OFFSET
, ctype
->class_offset
);
978 CTYPE_UINT32 (_NL_CTYPE_MAP_OFFSET
, ctype
->map_offset
);
979 CTYPE_UINT32 (_NL_CTYPE_TRANSLIT_TAB_SIZE
, ctype
->translit_idx_size
);
981 CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_FROM_IDX
,
982 ctype
->translit_from_idx
,
983 ctype
->translit_idx_size
);
985 CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_FROM_TBL
,
986 ctype
->translit_from_tbl
,
987 ctype
->translit_from_tbl_size
988 / sizeof (uint32_t));
990 CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_TO_IDX
,
991 ctype
->translit_to_idx
,
992 ctype
->translit_idx_size
);
994 CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_TO_TBL
,
995 ctype
->translit_to_tbl
,
996 ctype
->translit_to_tbl_size
/ sizeof (uint32_t));
998 case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES
):
999 /* The class name array. */
1000 start_locale_structure (&file
);
1001 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
)
1002 add_locale_string (&file
, ctype
->classnames
[cnt
]);
1003 add_locale_char (&file
, 0);
1004 align_locale_data (&file
, LOCFILE_ALIGN
);
1005 end_locale_structure (&file
);
1008 case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES
):
1009 /* The class name array. */
1010 start_locale_structure (&file
);
1011 for (cnt
= 0; cnt
< ctype
->map_collection_nr
; ++cnt
)
1012 add_locale_string (&file
, ctype
->mapnames
[cnt
]);
1013 add_locale_char (&file
, 0);
1014 align_locale_data (&file
, LOCFILE_ALIGN
);
1015 end_locale_structure (&file
);
1018 case _NL_ITEM_INDEX (_NL_CTYPE_WIDTH
):
1019 add_locale_wcwidth_table (&file
, &ctype
->width
);
1022 CTYPE_UINT32 (_NL_CTYPE_MB_CUR_MAX
, ctype
->mb_cur_max
);
1024 case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME
):
1025 add_locale_string (&file
, ctype
->codeset_name
);
1028 CTYPE_UINT32 (_NL_CTYPE_MAP_TO_NONASCII
, ctype
->to_nonascii
);
1030 CTYPE_UINT32 (_NL_CTYPE_NONASCII_CASE
, ctype
->nonascii_case
);
1032 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN
):
1033 add_locale_uint32 (&file
, ctype
->mbdigits_act
/ 10);
1036 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN
):
1037 add_locale_uint32 (&file
, ctype
->wcdigits_act
/ 10);
1040 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB
) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB
):
1041 start_locale_structure (&file
);
1042 for (cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB
);
1043 cnt
< ctype
->mbdigits_act
; cnt
+= 10)
1045 add_locale_raw_data (&file
, ctype
->mbdigits
[cnt
]->bytes
,
1046 ctype
->mbdigits
[cnt
]->nbytes
);
1047 add_locale_char (&file
, 0);
1049 end_locale_structure (&file
);
1052 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB
) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB
):
1053 start_locale_structure (&file
);
1054 cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB
);
1055 add_locale_raw_data (&file
, ctype
->mboutdigits
[cnt
]->bytes
,
1056 ctype
->mboutdigits
[cnt
]->nbytes
);
1057 add_locale_char (&file
, 0);
1058 end_locale_structure (&file
);
1061 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC
) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC
):
1062 start_locale_structure (&file
);
1063 for (cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC
);
1064 cnt
< ctype
->wcdigits_act
; cnt
+= 10)
1065 add_locale_uint32 (&file
, ctype
->wcdigits
[cnt
]);
1066 end_locale_structure (&file
);
1069 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC
) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC
):
1070 cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC
);
1071 add_locale_uint32 (&file
, ctype
->wcoutdigits
[cnt
]);
1074 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN
):
1075 add_locale_uint32 (&file
, default_missing_len
);
1078 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING
):
1079 add_locale_uint32_array (&file
, ctype
->default_missing
,
1080 default_missing_len
);
1083 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE_LEN
):
1084 add_locale_uint32 (&file
, ctype
->ntranslit_ignore
);
1087 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE
):
1088 start_locale_structure (&file
);
1090 struct translit_ignore_t
*runp
;
1091 for (runp
= ctype
->translit_ignore
; runp
!= NULL
;
1094 add_locale_uint32 (&file
, runp
->from
);
1095 add_locale_uint32 (&file
, runp
->to
);
1096 add_locale_uint32 (&file
, runp
->step
);
1099 end_locale_structure (&file
);
1103 assert (! "unknown CTYPE element");
1107 /* Handle extra maps. */
1108 size_t nr
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1
);
1109 if (nr
< ctype
->nr_charclass
)
1111 start_locale_prelude (&file
);
1112 add_locale_uint32_array (&file
, ctype
->class_b
[nr
], 256 / 32);
1113 end_locale_prelude (&file
);
1114 add_locale_wctype_table (&file
, &ctype
->class_3level
[nr
]);
1118 nr
-= ctype
->nr_charclass
;
1119 assert (nr
< ctype
->map_collection_nr
);
1120 add_locale_wctrans_table (&file
, &ctype
->map_3level
[nr
]);
1125 write_locale_data (output_path
, LC_CTYPE
, "LC_CTYPE", &file
);
1129 /* Local functions. */
1131 ctype_class_new (struct linereader
*lr
, struct locale_ctype_t
*ctype
,
1136 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
)
1137 if (strcmp (ctype
->classnames
[cnt
], name
) == 0)
1140 if (cnt
< ctype
->nr_charclass
)
1142 lr_error (lr
, _("character class `%s' already defined"), name
);
1146 if (ctype
->nr_charclass
== MAX_NR_CHARCLASS
)
1147 /* Exit code 2 is prescribed in P1003.2b. */
1148 WITH_CUR_LOCALE (error (2, 0, _("\
1149 implementation limit: no more than %Zd character classes allowed"),
1152 ctype
->classnames
[ctype
->nr_charclass
++] = name
;
1157 ctype_map_new (struct linereader
*lr
, struct locale_ctype_t
*ctype
,
1158 const char *name
, const struct charmap_t
*charmap
)
1160 size_t max_chars
= 0;
1163 for (cnt
= 0; cnt
< ctype
->map_collection_nr
; ++cnt
)
1165 if (strcmp (ctype
->mapnames
[cnt
], name
) == 0)
1168 if (max_chars
< ctype
->map_collection_max
[cnt
])
1169 max_chars
= ctype
->map_collection_max
[cnt
];
1172 if (cnt
< ctype
->map_collection_nr
)
1174 lr_error (lr
, _("character map `%s' already defined"), name
);
1178 if (ctype
->map_collection_nr
== MAX_NR_CHARMAP
)
1179 /* Exit code 2 is prescribed in P1003.2b. */
1180 WITH_CUR_LOCALE (error (2, 0, _("\
1181 implementation limit: no more than %d character maps allowed"),
1184 ctype
->mapnames
[cnt
] = name
;
1187 ctype
->map_collection_max
[cnt
] = charmap
->mb_cur_max
== 1 ? 256 : 512;
1189 ctype
->map_collection_max
[cnt
] = max_chars
;
1191 ctype
->map_collection
[cnt
] = (uint32_t *)
1192 xcalloc (sizeof (uint32_t), ctype
->map_collection_max
[cnt
]);
1193 ctype
->map_collection_act
[cnt
] = 256;
1195 ++ctype
->map_collection_nr
;
1199 /* We have to be prepared that TABLE, MAX, and ACT can be NULL. This
1200 is possible if we only want to extend the name array. */
1202 find_idx (struct locale_ctype_t
*ctype
, uint32_t **table
, size_t *max
,
1203 size_t *act
, uint32_t idx
)
1208 return table
== NULL
? NULL
: &(*table
)[idx
];
1210 /* Use the charnames_idx lookup table instead of the slow search loop. */
1212 cnt
= idx_table_get (&ctype
->charnames_idx
, idx
);
1215 cnt
= ctype
->charnames_act
;
1217 for (cnt
= 256; cnt
< ctype
->charnames_act
; ++cnt
)
1218 if (ctype
->charnames
[cnt
] == idx
)
1222 /* We have to distinguish two cases: the name is found or not. */
1223 if (cnt
== ctype
->charnames_act
)
1225 /* Extend the name array. */
1226 if (ctype
->charnames_act
== ctype
->charnames_max
)
1228 ctype
->charnames_max
*= 2;
1229 ctype
->charnames
= (uint32_t *)
1230 xrealloc (ctype
->charnames
,
1231 sizeof (uint32_t) * ctype
->charnames_max
);
1233 ctype
->charnames
[ctype
->charnames_act
++] = idx
;
1234 idx_table_add (&ctype
->charnames_idx
, idx
, cnt
);
1238 /* We have done everything we are asked to do. */
1242 /* The caller does not want to extend the table. */
1243 return (cnt
>= *act
? NULL
: &(*table
)[cnt
]);
1249 size_t old_max
= *max
;
1252 while (*max
<= cnt
);
1255 (uint32_t *) xrealloc (*table
, *max
* sizeof (uint32_t));
1256 memset (&(*table
)[old_max
], '\0',
1257 (*max
- old_max
) * sizeof (uint32_t));
1263 return &(*table
)[cnt
];
1268 get_character (struct token
*now
, const struct charmap_t
*charmap
,
1269 struct repertoire_t
*repertoire
,
1270 struct charseq
**seqp
, uint32_t *wchp
)
1272 if (now
->tok
== tok_bsymbol
)
1274 /* This will hopefully be the normal case. */
1275 *wchp
= repertoire_find_value (repertoire
, now
->val
.str
.startmb
,
1276 now
->val
.str
.lenmb
);
1277 *seqp
= charmap_find_value (charmap
, now
->val
.str
.startmb
,
1278 now
->val
.str
.lenmb
);
1280 else if (now
->tok
== tok_ucs4
)
1284 snprintf (utmp
, sizeof (utmp
), "U%08X", now
->val
.ucs4
);
1285 *seqp
= charmap_find_value (charmap
, utmp
, 9);
1288 *seqp
= repertoire_find_seq (repertoire
, now
->val
.ucs4
);
1292 /* Compute the value in the charmap from the UCS value. */
1293 const char *symbol
= repertoire_find_symbol (repertoire
,
1299 *seqp
= charmap_find_value (charmap
, symbol
, strlen (symbol
));
1303 if (repertoire
!= NULL
)
1305 /* Insert a negative entry. */
1306 static const struct charseq negative
1307 = { .ucs4
= ILLEGAL_CHAR_VALUE
};
1308 uint32_t *newp
= obstack_alloc (&repertoire
->mem_pool
,
1310 *newp
= now
->val
.ucs4
;
1312 insert_entry (&repertoire
->seq_table
, newp
,
1313 sizeof (uint32_t), (void *) &negative
);
1317 (*seqp
)->ucs4
= now
->val
.ucs4
;
1319 else if ((*seqp
)->ucs4
!= now
->val
.ucs4
)
1322 *wchp
= now
->val
.ucs4
;
1324 else if (now
->tok
== tok_charcode
)
1326 /* We must map from the byte code to UCS4. */
1327 *seqp
= charmap_find_symbol (charmap
, now
->val
.str
.startmb
,
1328 now
->val
.str
.lenmb
);
1331 *wchp
= ILLEGAL_CHAR_VALUE
;
1334 if ((*seqp
)->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1335 (*seqp
)->ucs4
= repertoire_find_value (repertoire
, (*seqp
)->name
,
1336 strlen ((*seqp
)->name
));
1337 *wchp
= (*seqp
)->ucs4
;
1347 /* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and
1348 the .(2). counterparts. */
1350 charclass_symbolic_ellipsis (struct linereader
*ldfile
,
1351 struct locale_ctype_t
*ctype
,
1352 const struct charmap_t
*charmap
,
1353 struct repertoire_t
*repertoire
,
1355 const char *last_str
,
1356 unsigned long int class256_bit
,
1357 unsigned long int class_bit
, int base
,
1358 int ignore_content
, int handle_digits
, int step
)
1360 const char *nowstr
= now
->val
.str
.startmb
;
1361 char tmp
[now
->val
.str
.lenmb
+ 1];
1364 unsigned long int from
;
1365 unsigned long int to
;
1367 /* We have to compute the ellipsis values using the symbolic names. */
1368 assert (last_str
!= NULL
);
1370 if (strlen (last_str
) != now
->val
.str
.lenmb
)
1374 _("`%s' and `%.*s' are not valid names for symbolic range"),
1375 last_str
, (int) now
->val
.str
.lenmb
, nowstr
);
1379 if (memcmp (last_str
, nowstr
, now
->val
.str
.lenmb
) == 0)
1380 /* Nothing to do, the names are the same. */
1383 for (cp
= last_str
; *cp
== *(nowstr
+ (cp
- last_str
)); ++cp
)
1387 from
= strtoul (cp
, &endp
, base
);
1388 if ((from
== UINT_MAX
&& errno
== ERANGE
) || *endp
!= '\0')
1391 to
= strtoul (nowstr
+ (cp
- last_str
), &endp
, base
);
1392 if ((to
== UINT_MAX
&& errno
== ERANGE
)
1393 || (endp
- nowstr
) != now
->val
.str
.lenmb
|| from
>= to
)
1396 /* OK, we have a range FROM - TO. Now we can create the symbolic names. */
1397 if (!ignore_content
)
1399 now
->val
.str
.startmb
= tmp
;
1400 while ((from
+= step
) <= to
)
1402 struct charseq
*seq
;
1405 sprintf (tmp
, (base
== 10 ? "%.*s%0*ld" : "%.*s%0*lX"),
1406 (int) (cp
- last_str
), last_str
,
1407 (int) (now
->val
.str
.lenmb
- (cp
- last_str
)),
1410 get_character (now
, charmap
, repertoire
, &seq
, &wch
);
1412 if (seq
!= NULL
&& seq
->nbytes
== 1)
1413 /* Yep, we can store information about this byte sequence. */
1414 ctype
->class256_collection
[seq
->bytes
[0]] |= class256_bit
;
1416 if (wch
!= ILLEGAL_CHAR_VALUE
&& class_bit
!= 0)
1417 /* We have the UCS4 position. */
1418 *find_idx (ctype
, &ctype
->class_collection
,
1419 &ctype
->class_collection_max
,
1420 &ctype
->class_collection_act
, wch
) |= class_bit
;
1422 if (handle_digits
== 1)
1424 /* We must store the digit values. */
1425 if (ctype
->mbdigits_act
== ctype
->mbdigits_max
)
1427 ctype
->mbdigits_max
*= 2;
1428 ctype
->mbdigits
= xrealloc (ctype
->mbdigits
,
1429 (ctype
->mbdigits_max
1430 * sizeof (char *)));
1431 ctype
->wcdigits_max
*= 2;
1432 ctype
->wcdigits
= xrealloc (ctype
->wcdigits
,
1433 (ctype
->wcdigits_max
1434 * sizeof (uint32_t)));
1437 ctype
->mbdigits
[ctype
->mbdigits_act
++] = seq
;
1438 ctype
->wcdigits
[ctype
->wcdigits_act
++] = wch
;
1440 else if (handle_digits
== 2)
1442 /* We must store the digit values. */
1443 if (ctype
->outdigits_act
>= 10)
1445 lr_error (ldfile
, _("\
1446 %s: field `%s' does not contain exactly ten entries"),
1447 "LC_CTYPE", "outdigit");
1451 ctype
->mboutdigits
[ctype
->outdigits_act
] = seq
;
1452 ctype
->wcoutdigits
[ctype
->outdigits_act
] = wch
;
1453 ++ctype
->outdigits_act
;
1460 /* Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'. */
1462 charclass_ucs4_ellipsis (struct linereader
*ldfile
,
1463 struct locale_ctype_t
*ctype
,
1464 const struct charmap_t
*charmap
,
1465 struct repertoire_t
*repertoire
,
1466 struct token
*now
, uint32_t last_wch
,
1467 unsigned long int class256_bit
,
1468 unsigned long int class_bit
, int ignore_content
,
1469 int handle_digits
, int step
)
1471 if (last_wch
> now
->val
.ucs4
)
1473 lr_error (ldfile
, _("\
1474 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1475 (now
->val
.ucs4
| last_wch
) < 65536 ? 4 : 8, now
->val
.ucs4
,
1476 (now
->val
.ucs4
| last_wch
) < 65536 ? 4 : 8, last_wch
);
1480 if (!ignore_content
)
1481 while ((last_wch
+= step
) <= now
->val
.ucs4
)
1483 /* We have to find out whether there is a byte sequence corresponding
1484 to this UCS4 value. */
1485 struct charseq
*seq
;
1488 snprintf (utmp
, sizeof (utmp
), "U%08X", last_wch
);
1489 seq
= charmap_find_value (charmap
, utmp
, 9);
1492 snprintf (utmp
, sizeof (utmp
), "U%04X", last_wch
);
1493 seq
= charmap_find_value (charmap
, utmp
, 5);
1497 /* Try looking in the repertoire map. */
1498 seq
= repertoire_find_seq (repertoire
, last_wch
);
1500 /* If this is the first time we look for this sequence create a new
1504 static const struct charseq negative
1505 = { .ucs4
= ILLEGAL_CHAR_VALUE
};
1507 /* Find the symbolic name for this UCS4 value. */
1508 if (repertoire
!= NULL
)
1510 const char *symbol
= repertoire_find_symbol (repertoire
,
1512 uint32_t *newp
= obstack_alloc (&repertoire
->mem_pool
,
1517 /* We have a name, now search the multibyte value. */
1518 seq
= charmap_find_value (charmap
, symbol
, strlen (symbol
));
1521 /* We have to create a fake entry. */
1522 seq
= (struct charseq
*) &negative
;
1524 seq
->ucs4
= last_wch
;
1526 insert_entry (&repertoire
->seq_table
, newp
, sizeof (uint32_t),
1530 /* We have to create a fake entry. */
1531 seq
= (struct charseq
*) &negative
;
1534 /* We have a name, now search the multibyte value. */
1535 if (seq
->ucs4
== last_wch
&& seq
->nbytes
== 1)
1536 /* Yep, we can store information about this byte sequence. */
1537 ctype
->class256_collection
[(size_t) seq
->bytes
[0]]
1540 /* And of course we have the UCS4 position. */
1542 *find_idx (ctype
, &ctype
->class_collection
,
1543 &ctype
->class_collection_max
,
1544 &ctype
->class_collection_act
, last_wch
) |= class_bit
;
1546 if (handle_digits
== 1)
1548 /* We must store the digit values. */
1549 if (ctype
->mbdigits_act
== ctype
->mbdigits_max
)
1551 ctype
->mbdigits_max
*= 2;
1552 ctype
->mbdigits
= xrealloc (ctype
->mbdigits
,
1553 (ctype
->mbdigits_max
1554 * sizeof (char *)));
1555 ctype
->wcdigits_max
*= 2;
1556 ctype
->wcdigits
= xrealloc (ctype
->wcdigits
,
1557 (ctype
->wcdigits_max
1558 * sizeof (uint32_t)));
1561 ctype
->mbdigits
[ctype
->mbdigits_act
++] = (seq
->ucs4
== last_wch
1563 ctype
->wcdigits
[ctype
->wcdigits_act
++] = last_wch
;
1565 else if (handle_digits
== 2)
1567 /* We must store the digit values. */
1568 if (ctype
->outdigits_act
>= 10)
1570 lr_error (ldfile
, _("\
1571 %s: field `%s' does not contain exactly ten entries"),
1572 "LC_CTYPE", "outdigit");
1576 ctype
->mboutdigits
[ctype
->outdigits_act
] = (seq
->ucs4
== last_wch
1578 ctype
->wcoutdigits
[ctype
->outdigits_act
] = last_wch
;
1579 ++ctype
->outdigits_act
;
1585 /* Ellipsis as in `/xea/x12.../xea/x34'. */
1587 charclass_charcode_ellipsis (struct linereader
*ldfile
,
1588 struct locale_ctype_t
*ctype
,
1589 const struct charmap_t
*charmap
,
1590 struct repertoire_t
*repertoire
,
1591 struct token
*now
, char *last_charcode
,
1592 uint32_t last_charcode_len
,
1593 unsigned long int class256_bit
,
1594 unsigned long int class_bit
, int ignore_content
,
1597 /* First check whether the to-value is larger. */
1598 if (now
->val
.charcode
.nbytes
!= last_charcode_len
)
1600 lr_error (ldfile
, _("\
1601 start and end character sequence of range must have the same length"));
1605 if (memcmp (last_charcode
, now
->val
.charcode
.bytes
, last_charcode_len
) > 0)
1607 lr_error (ldfile
, _("\
1608 to-value character sequence is smaller than from-value sequence"));
1612 if (!ignore_content
)
1616 /* Increment the byte sequence value. */
1617 struct charseq
*seq
;
1621 for (i
= last_charcode_len
- 1; i
>= 0; --i
)
1622 if (++last_charcode
[i
] != 0)
1625 if (last_charcode_len
== 1)
1626 /* Of course we have the charcode value. */
1627 ctype
->class256_collection
[(size_t) last_charcode
[0]]
1630 /* Find the symbolic name. */
1631 seq
= charmap_find_symbol (charmap
, last_charcode
,
1635 if (seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1636 seq
->ucs4
= repertoire_find_value (repertoire
, seq
->name
,
1637 strlen (seq
->name
));
1638 wch
= seq
== NULL
? ILLEGAL_CHAR_VALUE
: seq
->ucs4
;
1640 if (wch
!= ILLEGAL_CHAR_VALUE
&& class_bit
!= 0)
1641 *find_idx (ctype
, &ctype
->class_collection
,
1642 &ctype
->class_collection_max
,
1643 &ctype
->class_collection_act
, wch
) |= class_bit
;
1646 wch
= ILLEGAL_CHAR_VALUE
;
1648 if (handle_digits
== 1)
1650 /* We must store the digit values. */
1651 if (ctype
->mbdigits_act
== ctype
->mbdigits_max
)
1653 ctype
->mbdigits_max
*= 2;
1654 ctype
->mbdigits
= xrealloc (ctype
->mbdigits
,
1655 (ctype
->mbdigits_max
1656 * sizeof (char *)));
1657 ctype
->wcdigits_max
*= 2;
1658 ctype
->wcdigits
= xrealloc (ctype
->wcdigits
,
1659 (ctype
->wcdigits_max
1660 * sizeof (uint32_t)));
1663 seq
= xmalloc (sizeof (struct charseq
) + last_charcode_len
);
1664 memcpy ((char *) (seq
+ 1), last_charcode
, last_charcode_len
);
1665 seq
->nbytes
= last_charcode_len
;
1667 ctype
->mbdigits
[ctype
->mbdigits_act
++] = seq
;
1668 ctype
->wcdigits
[ctype
->wcdigits_act
++] = wch
;
1670 else if (handle_digits
== 2)
1672 struct charseq
*seq
;
1673 /* We must store the digit values. */
1674 if (ctype
->outdigits_act
>= 10)
1676 lr_error (ldfile
, _("\
1677 %s: field `%s' does not contain exactly ten entries"),
1678 "LC_CTYPE", "outdigit");
1682 seq
= xmalloc (sizeof (struct charseq
) + last_charcode_len
);
1683 memcpy ((char *) (seq
+ 1), last_charcode
, last_charcode_len
);
1684 seq
->nbytes
= last_charcode_len
;
1686 ctype
->mboutdigits
[ctype
->outdigits_act
] = seq
;
1687 ctype
->wcoutdigits
[ctype
->outdigits_act
] = wch
;
1688 ++ctype
->outdigits_act
;
1691 while (memcmp (last_charcode
, now
->val
.charcode
.bytes
,
1692 last_charcode_len
) != 0);
1698 find_translit2 (struct locale_ctype_t
*ctype
, const struct charmap_t
*charmap
,
1701 struct translit_t
*trunp
= ctype
->translit
;
1702 struct translit_ignore_t
*tirunp
= ctype
->translit_ignore
;
1704 while (trunp
!= NULL
)
1706 /* XXX We simplify things here. The transliterations we look
1707 for are only allowed to have one character. */
1708 if (trunp
->from
[0] == wch
&& trunp
->from
[1] == 0)
1710 /* Found it. Now look for a transliteration which can be
1711 represented with the character set. */
1712 struct translit_to_t
*torunp
= trunp
->to
;
1714 while (torunp
!= NULL
)
1718 for (i
= 0; torunp
->str
[i
] != 0; ++i
)
1722 snprintf (utmp
, sizeof (utmp
), "U%08X", torunp
->str
[i
]);
1723 if (charmap_find_value (charmap
, utmp
, 9) == NULL
)
1724 /* This character cannot be represented. */
1728 if (torunp
->str
[i
] == 0)
1731 torunp
= torunp
->next
;
1737 trunp
= trunp
->next
;
1740 /* Check for ignored chars. */
1741 while (tirunp
!= NULL
)
1743 if (tirunp
->from
<= wch
&& tirunp
->to
>= wch
)
1747 for (wi
= tirunp
->from
; wi
<= wch
; wi
+= tirunp
->step
)
1753 /* Nothing found. */
1759 find_translit (struct localedef_t
*locale
, const struct charmap_t
*charmap
,
1762 struct locale_ctype_t
*ctype
;
1763 uint32_t *result
= NULL
;
1765 assert (locale
!= NULL
);
1766 ctype
= locale
->categories
[LC_CTYPE
].ctype
;
1771 if (ctype
->translit
!= NULL
)
1772 result
= find_translit2 (ctype
, charmap
, wch
);
1776 struct translit_include_t
*irunp
= ctype
->translit_include
;
1778 while (irunp
!= NULL
&& result
== NULL
)
1780 result
= find_translit (find_locale (CTYPE_LOCALE
,
1782 irunp
->copy_repertoire
,
1785 irunp
= irunp
->next
;
1793 /* Read one transliteration entry. */
1795 read_widestring (struct linereader
*ldfile
, struct token
*now
,
1796 const struct charmap_t
*charmap
,
1797 struct repertoire_t
*repertoire
)
1801 if (now
->tok
== tok_default_missing
)
1802 /* The special name "" will denote this case. */
1804 else if (now
->tok
== tok_bsymbol
)
1806 /* Get the value from the repertoire. */
1807 wstr
= (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1808 wstr
[0] = repertoire_find_value (repertoire
, now
->val
.str
.startmb
,
1809 now
->val
.str
.lenmb
);
1810 if (wstr
[0] == ILLEGAL_CHAR_VALUE
)
1812 /* We cannot proceed, we don't know the UCS4 value. */
1819 else if (now
->tok
== tok_ucs4
)
1821 wstr
= (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1822 wstr
[0] = now
->val
.ucs4
;
1825 else if (now
->tok
== tok_charcode
)
1827 /* Argh, we have to convert to the symbol name first and then to the
1829 struct charseq
*seq
= charmap_find_symbol (charmap
,
1830 now
->val
.str
.startmb
,
1831 now
->val
.str
.lenmb
);
1833 /* Cannot find the UCS4 value. */
1836 if (seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1837 seq
->ucs4
= repertoire_find_value (repertoire
, seq
->name
,
1838 strlen (seq
->name
));
1839 if (seq
->ucs4
== ILLEGAL_CHAR_VALUE
)
1840 /* We cannot proceed, we don't know the UCS4 value. */
1843 wstr
= (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1844 wstr
[0] = seq
->ucs4
;
1847 else if (now
->tok
== tok_string
)
1849 wstr
= now
->val
.str
.startwc
;
1850 if (wstr
== NULL
|| wstr
[0] == 0)
1855 if (now
->tok
!= tok_eol
&& now
->tok
!= tok_eof
)
1856 lr_ignore_rest (ldfile
, 0);
1857 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
1858 return (uint32_t *) -1l;
1866 read_translit_entry (struct linereader
*ldfile
, struct locale_ctype_t
*ctype
,
1867 struct token
*now
, const struct charmap_t
*charmap
,
1868 struct repertoire_t
*repertoire
)
1870 uint32_t *from_wstr
= read_widestring (ldfile
, now
, charmap
, repertoire
);
1871 struct translit_t
*result
;
1872 struct translit_to_t
**top
;
1873 struct obstack
*ob
= &ctype
->mempool
;
1877 if (from_wstr
== NULL
)
1878 /* There is no valid from string. */
1881 result
= (struct translit_t
*) obstack_alloc (ob
,
1882 sizeof (struct translit_t
));
1883 result
->from
= from_wstr
;
1884 result
->fname
= ldfile
->fname
;
1885 result
->lineno
= ldfile
->lineno
;
1886 result
->next
= NULL
;
1896 /* Next we have one or more transliterations. They are
1897 separated by semicolons. */
1898 now
= lr_token (ldfile
, charmap
, NULL
, repertoire
, verbose
);
1900 if (!first
&& (now
->tok
== tok_semicolon
|| now
->tok
== tok_eol
))
1902 /* One string read. */
1903 const uint32_t zero
= 0;
1907 obstack_grow (ob
, &zero
, 4);
1908 to_wstr
= obstack_finish (ob
);
1910 *top
= obstack_alloc (ob
, sizeof (struct translit_to_t
));
1911 (*top
)->str
= to_wstr
;
1912 (*top
)->next
= NULL
;
1915 if (now
->tok
== tok_eol
)
1917 result
->next
= ctype
->translit
;
1918 ctype
->translit
= result
;
1923 top
= &(*top
)->next
;
1928 to_wstr
= read_widestring (ldfile
, now
, charmap
, repertoire
);
1929 if (to_wstr
== (uint32_t *) -1l)
1931 /* An error occurred. */
1932 obstack_free (ob
, result
);
1936 if (to_wstr
== NULL
)
1939 /* This value is usable. */
1940 obstack_grow (ob
, to_wstr
, wcslen ((wchar_t *) to_wstr
) * 4);
1949 read_translit_ignore_entry (struct linereader
*ldfile
,
1950 struct locale_ctype_t
*ctype
,
1951 const struct charmap_t
*charmap
,
1952 struct repertoire_t
*repertoire
)
1954 /* We expect a semicolon-separated list of characters we ignore. We are
1955 only interested in the wide character definitions. These must be
1956 single characters, possibly defining a range when an ellipsis is used. */
1959 struct token
*now
= lr_token (ldfile
, charmap
, NULL
, repertoire
,
1961 struct translit_ignore_t
*newp
;
1964 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
1967 _("premature end of `translit_ignore' definition"));
1971 if (now
->tok
!= tok_bsymbol
&& now
->tok
!= tok_ucs4
)
1973 lr_error (ldfile
, _("syntax error"));
1974 lr_ignore_rest (ldfile
, 0);
1978 if (now
->tok
== tok_ucs4
)
1979 from
= now
->val
.ucs4
;
1981 /* Try to get the value. */
1982 from
= repertoire_find_value (repertoire
, now
->val
.str
.startmb
,
1983 now
->val
.str
.lenmb
);
1985 if (from
== ILLEGAL_CHAR_VALUE
)
1987 lr_error (ldfile
, "invalid character name");
1992 newp
= (struct translit_ignore_t
*)
1993 obstack_alloc (&ctype
->mempool
, sizeof (struct translit_ignore_t
));
1998 newp
->next
= ctype
->translit_ignore
;
1999 ctype
->translit_ignore
= newp
;
2002 /* Now we expect either a semicolon, an ellipsis, or the end of the
2004 now
= lr_token (ldfile
, charmap
, NULL
, repertoire
, verbose
);
2006 if (now
->tok
== tok_ellipsis2
|| now
->tok
== tok_ellipsis2_2
)
2008 /* XXX Should we bother implementing `....'? `...' certainly
2009 will not be implemented. */
2011 int step
= now
->tok
== tok_ellipsis2_2
? 2 : 1;
2013 now
= lr_token (ldfile
, charmap
, NULL
, repertoire
, verbose
);
2015 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
2018 _("premature end of `translit_ignore' definition"));
2022 if (now
->tok
!= tok_bsymbol
&& now
->tok
!= tok_ucs4
)
2024 lr_error (ldfile
, _("syntax error"));
2025 lr_ignore_rest (ldfile
, 0);
2029 if (now
->tok
== tok_ucs4
)
2032 /* Try to get the value. */
2033 to
= repertoire_find_value (repertoire
, now
->val
.str
.startmb
,
2034 now
->val
.str
.lenmb
);
2036 if (to
== ILLEGAL_CHAR_VALUE
)
2037 lr_error (ldfile
, "invalid character name");
2040 /* Make sure the `to'-value is larger. */
2047 lr_error (ldfile
, _("\
2048 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
2049 (to
| from
) < 65536 ? 4 : 8, to
,
2050 (to
| from
) < 65536 ? 4 : 8, from
);
2053 /* And the next token. */
2054 now
= lr_token (ldfile
, charmap
, NULL
, repertoire
, verbose
);
2057 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
2061 if (now
->tok
== tok_semicolon
)
2065 /* If we come here something is wrong. */
2066 lr_error (ldfile
, _("syntax error"));
2067 lr_ignore_rest (ldfile
, 0);
2073 /* The parser for the LC_CTYPE section of the locale definition. */
2075 ctype_read (struct linereader
*ldfile
, struct localedef_t
*result
,
2076 const struct charmap_t
*charmap
, const char *repertoire_name
,
2079 struct repertoire_t
*repertoire
= NULL
;
2080 struct locale_ctype_t
*ctype
;
2082 enum token_t nowtok
;
2084 uint32_t last_wch
= 0;
2085 enum token_t last_token
;
2086 enum token_t ellipsis_token
;
2088 char last_charcode
[16];
2089 size_t last_charcode_len
= 0;
2090 const char *last_str
= NULL
;
2092 struct localedef_t
*copy_locale
= NULL
;
2094 /* Get the repertoire we have to use. */
2095 if (repertoire_name
!= NULL
)
2096 repertoire
= repertoire_read (repertoire_name
);
2098 /* The rest of the line containing `LC_CTYPE' must be free. */
2099 lr_ignore_rest (ldfile
, 1);
2104 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2107 while (nowtok
== tok_eol
);
2109 /* If we see `copy' now we are almost done. */
2110 if (nowtok
== tok_copy
)
2112 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2113 if (now
->tok
!= tok_string
)
2115 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2119 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2120 while (now
->tok
!= tok_eof
&& now
->tok
!= tok_end
);
2122 if (now
->tok
!= tok_eof
2123 || (now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
),
2124 now
->tok
== tok_eof
))
2125 lr_error (ldfile
, _("%s: premature end of file"), "LC_CTYPE");
2126 else if (now
->tok
!= tok_lc_ctype
)
2128 lr_error (ldfile
, _("\
2129 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2130 lr_ignore_rest (ldfile
, 0);
2133 lr_ignore_rest (ldfile
, 1);
2138 if (! ignore_content
)
2140 /* Get the locale definition. */
2141 copy_locale
= load_locale (LC_CTYPE
, now
->val
.str
.startmb
,
2142 repertoire_name
, charmap
, NULL
);
2143 if ((copy_locale
->avail
& CTYPE_LOCALE
) == 0)
2145 /* Not yet loaded. So do it now. */
2146 if (locfile_read (copy_locale
, charmap
) != 0)
2150 if (copy_locale
->categories
[LC_CTYPE
].ctype
== NULL
)
2154 lr_ignore_rest (ldfile
, 1);
2156 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2160 /* Prepare the data structures. */
2161 ctype_startup (ldfile
, result
, charmap
, copy_locale
, ignore_content
);
2162 ctype
= result
->categories
[LC_CTYPE
].ctype
;
2164 /* Remember the repertoire we use. */
2165 if (!ignore_content
)
2166 ctype
->repertoire
= repertoire
;
2170 unsigned long int class_bit
= 0;
2171 unsigned long int class256_bit
= 0;
2172 int handle_digits
= 0;
2174 /* Of course we don't proceed beyond the end of file. */
2175 if (nowtok
== tok_eof
)
2178 /* Ingore empty lines. */
2179 if (nowtok
== tok_eol
)
2181 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2189 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2190 while (now
->tok
== tok_ident
|| now
->tok
== tok_string
)
2192 ctype_class_new (ldfile
, ctype
, now
->val
.str
.startmb
);
2193 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2194 if (now
->tok
!= tok_semicolon
)
2196 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2198 if (now
->tok
!= tok_eol
)
2200 %s: syntax error in definition of new character class"), "LC_CTYPE");
2204 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2205 while (now
->tok
== tok_ident
|| now
->tok
== tok_string
)
2207 ctype_map_new (ldfile
, ctype
, now
->val
.str
.startmb
, charmap
);
2208 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2209 if (now
->tok
!= tok_semicolon
)
2211 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2213 if (now
->tok
!= tok_eol
)
2215 %s: syntax error in definition of new character map"), "LC_CTYPE");
2219 /* Ignore the rest of the line if we don't need the input of
2223 lr_ignore_rest (ldfile
, 0);
2227 /* We simply forget the `class' keyword and use the following
2228 operand to determine the bit. */
2229 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2230 if (now
->tok
== tok_ident
|| now
->tok
== tok_string
)
2232 /* Must can be one of the predefined class names. */
2233 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
)
2234 if (strcmp (ctype
->classnames
[cnt
], now
->val
.str
.startmb
) == 0)
2236 if (cnt
>= ctype
->nr_charclass
)
2238 /* OK, it's a new class. */
2239 ctype_class_new (ldfile
, ctype
, now
->val
.str
.startmb
);
2241 class_bit
= _ISwbit (ctype
->nr_charclass
- 1);
2245 class_bit
= _ISwbit (cnt
);
2247 free (now
->val
.str
.startmb
);
2250 else if (now
->tok
== tok_digit
)
2251 goto handle_tok_digit
;
2252 else if (now
->tok
< tok_upper
|| now
->tok
> tok_blank
)
2256 class_bit
= BITw (now
->tok
);
2257 class256_bit
= BIT (now
->tok
);
2260 /* The next character must be a semicolon. */
2261 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2262 if (now
->tok
!= tok_semicolon
)
2264 goto read_charclass
;
2277 /* Ignore the rest of the line if we don't need the input of
2281 lr_ignore_rest (ldfile
, 0);
2285 class_bit
= BITw (now
->tok
);
2286 class256_bit
= BIT (now
->tok
);
2289 ctype
->class_done
|= class_bit
;
2290 last_token
= tok_none
;
2291 ellipsis_token
= tok_none
;
2293 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2294 while (now
->tok
!= tok_eol
&& now
->tok
!= tok_eof
)
2297 struct charseq
*seq
;
2299 if (ellipsis_token
== tok_none
)
2301 if (get_character (now
, charmap
, repertoire
, &seq
, &wch
))
2304 if (!ignore_content
&& seq
!= NULL
&& seq
->nbytes
== 1)
2305 /* Yep, we can store information about this byte
2307 ctype
->class256_collection
[seq
->bytes
[0]] |= class256_bit
;
2309 if (!ignore_content
&& wch
!= ILLEGAL_CHAR_VALUE
2311 /* We have the UCS4 position. */
2312 *find_idx (ctype
, &ctype
->class_collection
,
2313 &ctype
->class_collection_max
,
2314 &ctype
->class_collection_act
, wch
) |= class_bit
;
2316 last_token
= now
->tok
;
2317 /* Terminate the string. */
2318 if (last_token
== tok_bsymbol
)
2320 now
->val
.str
.startmb
[now
->val
.str
.lenmb
] = '\0';
2321 last_str
= now
->val
.str
.startmb
;
2326 memcpy (last_charcode
, now
->val
.charcode
.bytes
, 16);
2327 last_charcode_len
= now
->val
.charcode
.nbytes
;
2329 if (!ignore_content
&& handle_digits
== 1)
2331 /* We must store the digit values. */
2332 if (ctype
->mbdigits_act
== ctype
->mbdigits_max
)
2334 ctype
->mbdigits_max
+= 10;
2335 ctype
->mbdigits
= xrealloc (ctype
->mbdigits
,
2336 (ctype
->mbdigits_max
2337 * sizeof (char *)));
2338 ctype
->wcdigits_max
+= 10;
2339 ctype
->wcdigits
= xrealloc (ctype
->wcdigits
,
2340 (ctype
->wcdigits_max
2341 * sizeof (uint32_t)));
2344 ctype
->mbdigits
[ctype
->mbdigits_act
++] = seq
;
2345 ctype
->wcdigits
[ctype
->wcdigits_act
++] = wch
;
2347 else if (!ignore_content
&& handle_digits
== 2)
2349 /* We must store the digit values. */
2350 if (ctype
->outdigits_act
>= 10)
2352 lr_error (ldfile
, _("\
2353 %s: field `%s' does not contain exactly ten entries"),
2354 "LC_CTYPE", "outdigit");
2355 lr_ignore_rest (ldfile
, 0);
2359 ctype
->mboutdigits
[ctype
->outdigits_act
] = seq
;
2360 ctype
->wcoutdigits
[ctype
->outdigits_act
] = wch
;
2361 ++ctype
->outdigits_act
;
2366 /* Now it gets complicated. We have to resolve the
2367 ellipsis problem. First we must distinguish between
2368 the different kind of ellipsis and this must match the
2369 tokens we have seen. */
2370 assert (last_token
!= tok_none
);
2372 if (last_token
!= now
->tok
)
2374 lr_error (ldfile
, _("\
2375 ellipsis range must be marked by two operands of same type"));
2376 lr_ignore_rest (ldfile
, 0);
2380 if (last_token
== tok_bsymbol
)
2382 if (ellipsis_token
== tok_ellipsis3
)
2383 lr_error (ldfile
, _("with symbolic name range values \
2384 the absolute ellipsis `...' must not be used"));
2386 charclass_symbolic_ellipsis (ldfile
, ctype
, charmap
,
2387 repertoire
, now
, last_str
,
2388 class256_bit
, class_bit
,
2393 handle_digits
, step
);
2395 else if (last_token
== tok_ucs4
)
2397 if (ellipsis_token
!= tok_ellipsis2
)
2398 lr_error (ldfile
, _("\
2399 with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
2401 charclass_ucs4_ellipsis (ldfile
, ctype
, charmap
,
2402 repertoire
, now
, last_wch
,
2403 class256_bit
, class_bit
,
2404 ignore_content
, handle_digits
,
2409 assert (last_token
== tok_charcode
);
2411 if (ellipsis_token
!= tok_ellipsis3
)
2412 lr_error (ldfile
, _("\
2413 with character code range values one must use the absolute ellipsis `...'"));
2415 charclass_charcode_ellipsis (ldfile
, ctype
, charmap
,
2419 class256_bit
, class_bit
,
2424 /* Now we have used the last value. */
2425 last_token
= tok_none
;
2428 /* Next we expect a semicolon or the end of the line. */
2429 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2430 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
2433 if (last_token
!= tok_none
2434 && now
->tok
>= tok_ellipsis2
&& now
->tok
<= tok_ellipsis4_2
)
2436 if (now
->tok
== tok_ellipsis2_2
)
2438 now
->tok
= tok_ellipsis2
;
2441 else if (now
->tok
== tok_ellipsis4_2
)
2443 now
->tok
= tok_ellipsis4
;
2447 ellipsis_token
= now
->tok
;
2449 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2453 if (now
->tok
!= tok_semicolon
)
2456 /* And get the next character. */
2457 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2459 ellipsis_token
= tok_none
;
2465 /* Ignore the rest of the line if we don't need the input of
2469 lr_ignore_rest (ldfile
, 0);
2474 class_bit
= _ISwdigit
;
2475 class256_bit
= _ISdigit
;
2477 goto read_charclass
;
2480 /* Ignore the rest of the line if we don't need the input of
2484 lr_ignore_rest (ldfile
, 0);
2488 if (ctype
->outdigits_act
!= 0)
2489 lr_error (ldfile
, _("\
2490 %s: field `%s' declared more than once"),
2491 "LC_CTYPE", "outdigit");
2495 goto read_charclass
;
2498 /* Ignore the rest of the line if we don't need the input of
2502 lr_ignore_rest (ldfile
, 0);
2510 /* Ignore the rest of the line if we don't need the input of
2514 lr_ignore_rest (ldfile
, 0);
2522 /* Ignore the rest of the line if we don't need the input of
2526 lr_ignore_rest (ldfile
, 0);
2530 /* We simply forget the `map' keyword and use the following
2531 operand to determine the mapping. */
2532 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2533 if (now
->tok
== tok_ident
|| now
->tok
== tok_string
)
2537 for (cnt
= 2; cnt
< ctype
->map_collection_nr
; ++cnt
)
2538 if (strcmp (now
->val
.str
.startmb
, ctype
->mapnames
[cnt
]) == 0)
2541 if (cnt
< ctype
->map_collection_nr
)
2542 free (now
->val
.str
.startmb
);
2544 /* OK, it's a new map. */
2545 ctype_map_new (ldfile
, ctype
, now
->val
.str
.startmb
, charmap
);
2549 else if (now
->tok
< tok_toupper
|| now
->tok
> tok_tolower
)
2552 mapidx
= now
->tok
- tok_toupper
;
2554 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2555 /* This better should be a semicolon. */
2556 if (now
->tok
!= tok_semicolon
)
2560 /* Test whether this mapping was already defined. */
2561 if (ctype
->tomap_done
[mapidx
])
2563 lr_error (ldfile
, _("duplicated definition for mapping `%s'"),
2564 ctype
->mapnames
[mapidx
]);
2565 lr_ignore_rest (ldfile
, 0);
2568 ctype
->tomap_done
[mapidx
] = 1;
2570 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2571 while (now
->tok
!= tok_eol
&& now
->tok
!= tok_eof
)
2573 struct charseq
*from_seq
;
2575 struct charseq
*to_seq
;
2578 /* Every pair starts with an opening brace. */
2579 if (now
->tok
!= tok_open_brace
)
2582 /* Next comes the from-value. */
2583 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2584 if (get_character (now
, charmap
, repertoire
, &from_seq
,
2588 /* The next is a comma. */
2589 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2590 if (now
->tok
!= tok_comma
)
2593 /* And the other value. */
2594 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2595 if (get_character (now
, charmap
, repertoire
, &to_seq
,
2599 /* And the last thing is the closing brace. */
2600 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2601 if (now
->tok
!= tok_close_brace
)
2604 if (!ignore_content
)
2606 /* Check whether the mapping converts from an ASCII value
2607 to a non-ASCII value. */
2608 if (from_seq
!= NULL
&& from_seq
->nbytes
== 1
2609 && isascii (from_seq
->bytes
[0])
2610 && to_seq
!= NULL
&& (to_seq
->nbytes
!= 1
2611 || !isascii (to_seq
->bytes
[0])))
2612 ctype
->to_nonascii
= 1;
2614 if (mapidx
< 2 && from_seq
!= NULL
&& to_seq
!= NULL
2615 && from_seq
->nbytes
== 1 && to_seq
->nbytes
== 1)
2616 /* We can use this value. */
2617 ctype
->map256_collection
[mapidx
][from_seq
->bytes
[0]]
2620 if (from_wch
!= ILLEGAL_CHAR_VALUE
2621 && to_wch
!= ILLEGAL_CHAR_VALUE
)
2622 /* Both correct values. */
2623 *find_idx (ctype
, &ctype
->map_collection
[mapidx
],
2624 &ctype
->map_collection_max
[mapidx
],
2625 &ctype
->map_collection_act
[mapidx
],
2629 /* Now comes a semicolon or the end of the line/file. */
2630 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2631 if (now
->tok
== tok_semicolon
)
2632 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2636 case tok_translit_start
:
2637 /* Ignore the entire translit section with its peculiar syntax
2638 if we don't need the input. */
2643 lr_ignore_rest (ldfile
, 0);
2644 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2646 while (now
->tok
!= tok_translit_end
&& now
->tok
!= tok_eof
);
2648 if (now
->tok
== tok_eof
)
2649 lr_error (ldfile
, _(\
2650 "%s: `translit_start' section does not end with `translit_end'"),
2656 /* The rest of the line better should be empty. */
2657 lr_ignore_rest (ldfile
, 1);
2659 /* We count here the number of allocated entries in the `translit'
2663 ldfile
->translate_strings
= 1;
2664 ldfile
->return_widestr
= 1;
2666 /* We proceed until we see the `translit_end' token. */
2667 while (now
= lr_token (ldfile
, charmap
, NULL
, repertoire
, verbose
),
2668 now
->tok
!= tok_translit_end
&& now
->tok
!= tok_eof
)
2670 if (now
->tok
== tok_eol
)
2671 /* Ignore empty lines. */
2674 if (now
->tok
== tok_include
)
2676 /* We have to include locale. */
2677 const char *locale_name
;
2678 const char *repertoire_name
;
2679 struct translit_include_t
*include_stmt
, **include_ptr
;
2681 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2682 /* This should be a string or an identifier. In any
2683 case something to name a locale. */
2684 if (now
->tok
!= tok_string
&& now
->tok
!= tok_ident
)
2687 lr_error (ldfile
, _("%s: syntax error"), "LC_CTYPE");
2688 lr_ignore_rest (ldfile
, 0);
2691 locale_name
= now
->val
.str
.startmb
;
2693 /* Next should be a semicolon. */
2694 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2695 if (now
->tok
!= tok_semicolon
)
2696 goto translit_syntax
;
2698 /* Now the repertoire name. */
2699 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2700 if ((now
->tok
!= tok_string
&& now
->tok
!= tok_ident
)
2701 || now
->val
.str
.startmb
== NULL
)
2702 goto translit_syntax
;
2703 repertoire_name
= now
->val
.str
.startmb
;
2704 if (repertoire_name
[0] == '\0')
2705 /* Ignore the empty string. */
2706 repertoire_name
= NULL
;
2708 /* Save the include statement for later processing. */
2709 include_stmt
= (struct translit_include_t
*)
2710 xmalloc (sizeof (struct translit_include_t
));
2711 include_stmt
->copy_locale
= locale_name
;
2712 include_stmt
->copy_repertoire
= repertoire_name
;
2713 include_stmt
->next
= NULL
;
2715 include_ptr
= &ctype
->translit_include
;
2716 while (*include_ptr
!= NULL
)
2717 include_ptr
= &(*include_ptr
)->next
;
2718 *include_ptr
= include_stmt
;
2720 /* The rest of the line must be empty. */
2721 lr_ignore_rest (ldfile
, 1);
2723 /* Make sure the locale is read. */
2724 add_to_readlist (LC_CTYPE
, locale_name
, repertoire_name
,
2728 else if (now
->tok
== tok_default_missing
)
2734 /* We expect a single character or string as the
2736 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2737 wstr
= read_widestring (ldfile
, now
, charmap
,
2742 if (ctype
->default_missing
!= NULL
)
2744 lr_error (ldfile
, _("\
2745 %s: duplicate `default_missing' definition"), "LC_CTYPE");
2746 WITH_CUR_LOCALE (error_at_line (0, 0,
2747 ctype
->default_missing_file
,
2748 ctype
->default_missing_lineno
,
2750 previous definition was here")));
2754 ctype
->default_missing
= wstr
;
2755 ctype
->default_missing_file
= ldfile
->fname
;
2756 ctype
->default_missing_lineno
= ldfile
->lineno
;
2758 /* We can have more entries, ignore them. */
2759 lr_ignore_rest (ldfile
, 0);
2762 else if (wstr
== (uint32_t *) -1l)
2763 /* This was an syntax error. */
2766 /* Maybe there is another replacement we can use. */
2767 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2768 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
2770 /* Nothing found. We tell the user. */
2771 lr_error (ldfile
, _("\
2772 %s: no representable `default_missing' definition found"), "LC_CTYPE");
2775 if (now
->tok
!= tok_semicolon
)
2776 goto translit_syntax
;
2781 else if (now
->tok
== tok_translit_ignore
)
2783 read_translit_ignore_entry (ldfile
, ctype
, charmap
,
2788 read_translit_entry (ldfile
, ctype
, now
, charmap
, repertoire
);
2790 ldfile
->return_widestr
= 0;
2792 if (now
->tok
== tok_eof
)
2793 lr_error (ldfile
, _(\
2794 "%s: `translit_start' section does not end with `translit_end'"),
2800 /* Ignore the rest of the line if we don't need the input of
2804 lr_ignore_rest (ldfile
, 0);
2808 /* This could mean one of several things. First test whether
2809 it's a character class name. */
2810 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
)
2811 if (strcmp (now
->val
.str
.startmb
, ctype
->classnames
[cnt
]) == 0)
2813 if (cnt
< ctype
->nr_charclass
)
2815 class_bit
= _ISwbit (cnt
);
2816 class256_bit
= cnt
<= 11 ? _ISbit (cnt
) : 0;
2817 free (now
->val
.str
.startmb
);
2818 goto read_charclass
;
2820 for (cnt
= 0; cnt
< ctype
->map_collection_nr
; ++cnt
)
2821 if (strcmp (now
->val
.str
.startmb
, ctype
->mapnames
[cnt
]) == 0)
2823 if (cnt
< ctype
->map_collection_nr
)
2826 free (now
->val
.str
.startmb
);
2832 /* Next we assume `LC_CTYPE'. */
2833 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2834 if (now
->tok
== tok_eof
)
2836 if (now
->tok
== tok_eol
)
2837 lr_error (ldfile
, _("%s: incomplete `END' line"),
2839 else if (now
->tok
!= tok_lc_ctype
)
2840 lr_error (ldfile
, _("\
2841 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2842 lr_ignore_rest (ldfile
, now
->tok
== tok_lc_ctype
);
2847 if (now
->tok
!= tok_eof
)
2848 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2851 /* Prepare for the next round. */
2852 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2856 /* When we come here we reached the end of the file. */
2857 lr_error (ldfile
, _("%s: premature end of file"), "LC_CTYPE");
2861 /* Subroutine of set_class_defaults, below. */
2863 set_one_default (struct locale_ctype_t
*ctype
,
2864 const struct charmap_t
*charmap
,
2865 int bitpos
, int from
, int to
)
2869 int bit
= _ISbit (bitpos
);
2870 int bitw
= _ISwbit (bitpos
);
2871 /* Define string. */
2874 for (ch
= from
; ch
<= to
; ++ch
)
2876 struct charseq
*seq
;
2879 seq
= charmap_find_value (charmap
, tmp
, 1);
2883 sprintf (buf
, "U%08X", ch
);
2884 seq
= charmap_find_value (charmap
, buf
, 9);
2889 WITH_CUR_LOCALE (error (0, 0, _("\
2890 %s: character `%s' not defined while needed as default value"),
2893 else if (seq
->nbytes
!= 1)
2894 WITH_CUR_LOCALE (error (0, 0, _("\
2895 %s: character `%s' in charmap not representable with one byte"),
2898 ctype
->class256_collection
[seq
->bytes
[0]] |= bit
;
2900 /* No need to search here, the ASCII value is also the Unicode
2902 ELEM (ctype
, class_collection
, , ch
) |= bitw
;
2907 set_class_defaults (struct locale_ctype_t
*ctype
,
2908 const struct charmap_t
*charmap
,
2909 struct repertoire_t
*repertoire
)
2911 #define set_default(bitpos, from, to) \
2912 set_one_default (ctype, charmap, bitpos, from, to)
2914 /* These function defines the default values for the classes and conversions
2915 according to POSIX.2 2.5.2.1.
2916 It may seem that the order of these if-blocks is arbitrary but it is NOT.
2917 Don't move them unless you know what you do! */
2919 /* Set default values if keyword was not present. */
2920 if ((ctype
->class_done
& BITw (tok_upper
)) == 0)
2921 /* "If this keyword [lower] is not specified, the lowercase letters
2922 `A' through `Z', ..., shall automatically belong to this class,
2923 with implementation defined character values." [P1003.2, 2.5.2.1] */
2924 set_default (BITPOS (tok_upper
), 'A', 'Z');
2926 if ((ctype
->class_done
& BITw (tok_lower
)) == 0)
2927 /* "If this keyword [lower] is not specified, the lowercase letters
2928 `a' through `z', ..., shall automatically belong to this class,
2929 with implementation defined character values." [P1003.2, 2.5.2.1] */
2930 set_default (BITPOS (tok_lower
), 'a', 'z');
2932 if ((ctype
->class_done
& BITw (tok_alpha
)) == 0)
2934 /* Table 2-6 in P1003.2 says that characters in class `upper' or
2935 class `lower' *must* be in class `alpha'. */
2936 unsigned long int mask
= BIT (tok_upper
) | BIT (tok_lower
);
2937 unsigned long int maskw
= BITw (tok_upper
) | BITw (tok_lower
);
2939 for (size_t cnt
= 0; cnt
< 256; ++cnt
)
2940 if ((ctype
->class256_collection
[cnt
] & mask
) != 0)
2941 ctype
->class256_collection
[cnt
] |= BIT (tok_alpha
);
2943 for (size_t cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
2944 if ((ctype
->class_collection
[cnt
] & maskw
) != 0)
2945 ctype
->class_collection
[cnt
] |= BITw (tok_alpha
);
2948 if ((ctype
->class_done
& BITw (tok_digit
)) == 0)
2949 /* "If this keyword [digit] is not specified, the digits `0' through
2950 `9', ..., shall automatically belong to this class, with
2951 implementation-defined character values." [P1003.2, 2.5.2.1] */
2952 set_default (BITPOS (tok_digit
), '0', '9');
2954 /* "Only characters specified for the `alpha' and `digit' keyword
2955 shall be specified. Characters specified for the keyword `alpha'
2956 and `digit' are automatically included in this class. */
2958 unsigned long int mask
= BIT (tok_alpha
) | BIT (tok_digit
);
2959 unsigned long int maskw
= BITw (tok_alpha
) | BITw (tok_digit
);
2961 for (size_t cnt
= 0; cnt
< 256; ++cnt
)
2962 if ((ctype
->class256_collection
[cnt
] & mask
) != 0)
2963 ctype
->class256_collection
[cnt
] |= BIT (tok_alnum
);
2965 for (size_t cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
2966 if ((ctype
->class_collection
[cnt
] & maskw
) != 0)
2967 ctype
->class_collection
[cnt
] |= BITw (tok_alnum
);
2970 if ((ctype
->class_done
& BITw (tok_space
)) == 0)
2971 /* "If this keyword [space] is not specified, the characters <space>,
2972 <form-feed>, <newline>, <carriage-return>, <tab>, and
2973 <vertical-tab>, ..., shall automatically belong to this class,
2974 with implementation-defined character values." [P1003.2, 2.5.2.1] */
2976 struct charseq
*seq
;
2978 seq
= charmap_find_value (charmap
, "space", 5);
2980 seq
= charmap_find_value (charmap
, "SP", 2);
2982 seq
= charmap_find_value (charmap
, "U00000020", 9);
2986 WITH_CUR_LOCALE (error (0, 0, _("\
2987 %s: character `%s' not defined while needed as default value"),
2988 "LC_CTYPE", "<space>"));
2990 else if (seq
->nbytes
!= 1)
2991 WITH_CUR_LOCALE (error (0, 0, _("\
2992 %s: character `%s' in charmap not representable with one byte"),
2993 "LC_CTYPE", "<space>"));
2995 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
2997 /* No need to search. */
2998 ELEM (ctype
, class_collection
, , L
' ') |= BITw (tok_space
);
3000 seq
= charmap_find_value (charmap
, "form-feed", 9);
3002 seq
= charmap_find_value (charmap
, "U0000000C", 9);
3006 WITH_CUR_LOCALE (error (0, 0, _("\
3007 %s: character `%s' not defined while needed as default value"),
3008 "LC_CTYPE", "<form-feed>"));
3010 else if (seq
->nbytes
!= 1)
3011 WITH_CUR_LOCALE (error (0, 0, _("\
3012 %s: character `%s' in charmap not representable with one byte"),
3013 "LC_CTYPE", "<form-feed>"));
3015 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3017 /* No need to search. */
3018 ELEM (ctype
, class_collection
, , L
'\f') |= BITw (tok_space
);
3021 seq
= charmap_find_value (charmap
, "newline", 7);
3023 seq
= charmap_find_value (charmap
, "U0000000A", 9);
3027 WITH_CUR_LOCALE (error (0, 0, _("\
3028 %s: character `%s' not defined while needed as default value"),
3029 "LC_CTYPE", "<newline>"));
3031 else if (seq
->nbytes
!= 1)
3032 WITH_CUR_LOCALE (error (0, 0, _("\
3033 %s: character `%s' in charmap not representable with one byte"),
3034 "LC_CTYPE", "<newline>"));
3036 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3038 /* No need to search. */
3039 ELEM (ctype
, class_collection
, , L
'\n') |= BITw (tok_space
);
3042 seq
= charmap_find_value (charmap
, "carriage-return", 15);
3044 seq
= charmap_find_value (charmap
, "U0000000D", 9);
3048 WITH_CUR_LOCALE (error (0, 0, _("\
3049 %s: character `%s' not defined while needed as default value"),
3050 "LC_CTYPE", "<carriage-return>"));
3052 else if (seq
->nbytes
!= 1)
3053 WITH_CUR_LOCALE (error (0, 0, _("\
3054 %s: character `%s' in charmap not representable with one byte"),
3055 "LC_CTYPE", "<carriage-return>"));
3057 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3059 /* No need to search. */
3060 ELEM (ctype
, class_collection
, , L
'\r') |= BITw (tok_space
);
3063 seq
= charmap_find_value (charmap
, "tab", 3);
3065 seq
= charmap_find_value (charmap
, "U00000009", 9);
3069 WITH_CUR_LOCALE (error (0, 0, _("\
3070 %s: character `%s' not defined while needed as default value"),
3071 "LC_CTYPE", "<tab>"));
3073 else if (seq
->nbytes
!= 1)
3074 WITH_CUR_LOCALE (error (0, 0, _("\
3075 %s: character `%s' in charmap not representable with one byte"),
3076 "LC_CTYPE", "<tab>"));
3078 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3080 /* No need to search. */
3081 ELEM (ctype
, class_collection
, , L
'\t') |= BITw (tok_space
);
3084 seq
= charmap_find_value (charmap
, "vertical-tab", 12);
3086 seq
= charmap_find_value (charmap
, "U0000000B", 9);
3090 WITH_CUR_LOCALE (error (0, 0, _("\
3091 %s: character `%s' not defined while needed as default value"),
3092 "LC_CTYPE", "<vertical-tab>"));
3094 else if (seq
->nbytes
!= 1)
3095 WITH_CUR_LOCALE (error (0, 0, _("\
3096 %s: character `%s' in charmap not representable with one byte"),
3097 "LC_CTYPE", "<vertical-tab>"));
3099 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3101 /* No need to search. */
3102 ELEM (ctype
, class_collection
, , L
'\v') |= BITw (tok_space
);
3105 if ((ctype
->class_done
& BITw (tok_xdigit
)) == 0)
3106 /* "If this keyword is not specified, the digits `0' to `9', the
3107 uppercase letters `A' through `F', and the lowercase letters `a'
3108 through `f', ..., shell automatically belong to this class, with
3109 implementation defined character values." [P1003.2, 2.5.2.1] */
3111 set_default (BITPOS (tok_xdigit
), '0', '9');
3112 set_default (BITPOS (tok_xdigit
), 'A', 'F');
3113 set_default (BITPOS (tok_xdigit
), 'a', 'f');
3116 if ((ctype
->class_done
& BITw (tok_blank
)) == 0)
3117 /* "If this keyword [blank] is unspecified, the characters <space> and
3118 <tab> shall belong to this character class." [P1003.2, 2.5.2.1] */
3120 struct charseq
*seq
;
3122 seq
= charmap_find_value (charmap
, "space", 5);
3124 seq
= charmap_find_value (charmap
, "SP", 2);
3126 seq
= charmap_find_value (charmap
, "U00000020", 9);
3130 WITH_CUR_LOCALE (error (0, 0, _("\
3131 %s: character `%s' not defined while needed as default value"),
3132 "LC_CTYPE", "<space>"));
3134 else if (seq
->nbytes
!= 1)
3135 WITH_CUR_LOCALE (error (0, 0, _("\
3136 %s: character `%s' in charmap not representable with one byte"),
3137 "LC_CTYPE", "<space>"));
3139 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_blank
);
3141 /* No need to search. */
3142 ELEM (ctype
, class_collection
, , L
' ') |= BITw (tok_blank
);
3145 seq
= charmap_find_value (charmap
, "tab", 3);
3147 seq
= charmap_find_value (charmap
, "U00000009", 9);
3151 WITH_CUR_LOCALE (error (0, 0, _("\
3152 %s: character `%s' not defined while needed as default value"),
3153 "LC_CTYPE", "<tab>"));
3155 else if (seq
->nbytes
!= 1)
3156 WITH_CUR_LOCALE (error (0, 0, _("\
3157 %s: character `%s' in charmap not representable with one byte"),
3158 "LC_CTYPE", "<tab>"));
3160 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_blank
);
3162 /* No need to search. */
3163 ELEM (ctype
, class_collection
, , L
'\t') |= BITw (tok_blank
);
3166 if ((ctype
->class_done
& BITw (tok_graph
)) == 0)
3167 /* "If this keyword [graph] is not specified, characters specified for
3168 the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
3169 shall belong to this character class." [P1003.2, 2.5.2.1] */
3171 unsigned long int mask
= BIT (tok_upper
) | BIT (tok_lower
) |
3172 BIT (tok_alpha
) | BIT (tok_digit
) | BIT (tok_xdigit
) | BIT (tok_punct
);
3173 unsigned long int maskw
= BITw (tok_upper
) | BITw (tok_lower
) |
3174 BITw (tok_alpha
) | BITw (tok_digit
) | BITw (tok_xdigit
) |
3177 for (size_t cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
3178 if ((ctype
->class_collection
[cnt
] & maskw
) != 0)
3179 ctype
->class_collection
[cnt
] |= BITw (tok_graph
);
3181 for (size_t cnt
= 0; cnt
< 256; ++cnt
)
3182 if ((ctype
->class256_collection
[cnt
] & mask
) != 0)
3183 ctype
->class256_collection
[cnt
] |= BIT (tok_graph
);
3186 if ((ctype
->class_done
& BITw (tok_print
)) == 0)
3187 /* "If this keyword [print] is not provided, characters specified for
3188 the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
3189 and the <space> character shall belong to this character class."
3190 [P1003.2, 2.5.2.1] */
3192 unsigned long int mask
= BIT (tok_upper
) | BIT (tok_lower
) |
3193 BIT (tok_alpha
) | BIT (tok_digit
) | BIT (tok_xdigit
) | BIT (tok_punct
);
3194 unsigned long int maskw
= BITw (tok_upper
) | BITw (tok_lower
) |
3195 BITw (tok_alpha
) | BITw (tok_digit
) | BITw (tok_xdigit
) |
3197 struct charseq
*seq
;
3199 for (size_t cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
3200 if ((ctype
->class_collection
[cnt
] & maskw
) != 0)
3201 ctype
->class_collection
[cnt
] |= BITw (tok_print
);
3203 for (size_t cnt
= 0; cnt
< 256; ++cnt
)
3204 if ((ctype
->class256_collection
[cnt
] & mask
) != 0)
3205 ctype
->class256_collection
[cnt
] |= BIT (tok_print
);
3208 seq
= charmap_find_value (charmap
, "space", 5);
3210 seq
= charmap_find_value (charmap
, "SP", 2);
3212 seq
= charmap_find_value (charmap
, "U00000020", 9);
3216 WITH_CUR_LOCALE (error (0, 0, _("\
3217 %s: character `%s' not defined while needed as default value"),
3218 "LC_CTYPE", "<space>"));
3220 else if (seq
->nbytes
!= 1)
3221 WITH_CUR_LOCALE (error (0, 0, _("\
3222 %s: character `%s' in charmap not representable with one byte"),
3223 "LC_CTYPE", "<space>"));
3225 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_print
);
3227 /* No need to search. */
3228 ELEM (ctype
, class_collection
, , L
' ') |= BITw (tok_print
);
3231 if (ctype
->tomap_done
[0] == 0)
3232 /* "If this keyword [toupper] is not specified, the lowercase letters
3233 `a' through `z', and their corresponding uppercase letters `A' to
3234 `Z', ..., shall automatically be included, with implementation-
3235 defined character values." [P1003.2, 2.5.2.1] */
3240 strcpy (tmp
, "<?>");
3242 for (ch
= 'a'; ch
<= 'z'; ++ch
)
3244 struct charseq
*seq_from
, *seq_to
;
3248 seq_from
= charmap_find_value (charmap
, &tmp
[1], 1);
3249 if (seq_from
== NULL
)
3252 sprintf (buf
, "U%08X", ch
);
3253 seq_from
= charmap_find_value (charmap
, buf
, 9);
3255 if (seq_from
== NULL
)
3258 WITH_CUR_LOCALE (error (0, 0, _("\
3259 %s: character `%s' not defined while needed as default value"),
3262 else if (seq_from
->nbytes
!= 1)
3265 WITH_CUR_LOCALE (error (0, 0, _("\
3266 %s: character `%s' needed as default value not representable with one byte"),
3271 /* This conversion is implementation defined. */
3272 tmp
[1] = (char) (ch
+ ('A' - 'a'));
3273 seq_to
= charmap_find_value (charmap
, &tmp
[1], 1);
3277 sprintf (buf
, "U%08X", ch
+ ('A' - 'a'));
3278 seq_to
= charmap_find_value (charmap
, buf
, 9);
3283 WITH_CUR_LOCALE (error (0, 0, _("\
3284 %s: character `%s' not defined while needed as default value"),
3287 else if (seq_to
->nbytes
!= 1)
3290 WITH_CUR_LOCALE (error (0, 0, _("\
3291 %s: character `%s' needed as default value not representable with one byte"),
3295 /* The index [0] is determined by the order of the
3296 `ctype_map_newP' calls in `ctype_startup'. */
3297 ctype
->map256_collection
[0][seq_from
->bytes
[0]]
3301 /* No need to search. */
3302 ELEM (ctype
, map_collection
, [0], ch
) = ch
+ ('A' - 'a');
3306 if (ctype
->tomap_done
[1] == 0)
3307 /* "If this keyword [tolower] is not specified, the mapping shall be
3308 the reverse mapping of the one specified to `toupper'." [P1003.2] */
3310 for (size_t cnt
= 0; cnt
< ctype
->map_collection_act
[0]; ++cnt
)
3311 if (ctype
->map_collection
[0][cnt
] != 0)
3312 ELEM (ctype
, map_collection
, [1],
3313 ctype
->map_collection
[0][cnt
])
3314 = ctype
->charnames
[cnt
];
3316 for (size_t cnt
= 0; cnt
< 256; ++cnt
)
3317 if (ctype
->map256_collection
[0][cnt
] != 0)
3318 ctype
->map256_collection
[1][ctype
->map256_collection
[0][cnt
]] = cnt
;
3321 if (ctype
->outdigits_act
!= 10)
3323 if (ctype
->outdigits_act
!= 0)
3324 WITH_CUR_LOCALE (error (0, 0, _("\
3325 %s: field `%s' does not contain exactly ten entries"),
3326 "LC_CTYPE", "outdigit"));
3328 for (size_t cnt
= ctype
->outdigits_act
; cnt
< 10; ++cnt
)
3330 ctype
->mboutdigits
[cnt
] = charmap_find_symbol (charmap
,
3331 (char *) digits
+ cnt
,
3334 if (ctype
->mboutdigits
[cnt
] == NULL
)
3335 ctype
->mboutdigits
[cnt
] = charmap_find_symbol (charmap
,
3337 strlen (longnames
[cnt
]));
3339 if (ctype
->mboutdigits
[cnt
] == NULL
)
3340 ctype
->mboutdigits
[cnt
] = charmap_find_symbol (charmap
,
3343 if (ctype
->mboutdigits
[cnt
] == NULL
)
3345 /* Provide a replacement. */
3346 WITH_CUR_LOCALE (error (0, 0, _("\
3347 no output digits defined and none of the standard names in the charmap")));
3349 ctype
->mboutdigits
[cnt
] = obstack_alloc (&((struct charmap_t
*) charmap
)->mem_pool
,
3350 sizeof (struct charseq
)
3353 /* This is better than nothing. */
3354 ctype
->mboutdigits
[cnt
]->bytes
[0] = digits
[cnt
];
3355 ctype
->mboutdigits
[cnt
]->nbytes
= 1;
3358 ctype
->wcoutdigits
[cnt
] = L
'0' + cnt
;
3361 ctype
->outdigits_act
= 10;
3368 /* Initialize. Assumes t->p and t->q have already been set. */
3370 wctype_table_init (struct wctype_table
*t
)
3373 t
->level1_alloc
= t
->level1_size
= 0;
3375 t
->level2_alloc
= t
->level2_size
= 0;
3377 t
->level3_alloc
= t
->level3_size
= 0;
3380 /* Retrieve an entry. */
3382 wctype_table_get (struct wctype_table
*t
, uint32_t wc
)
3384 uint32_t index1
= wc
>> (t
->q
+ t
->p
+ 5);
3385 if (index1
< t
->level1_size
)
3387 uint32_t lookup1
= t
->level1
[index1
];
3388 if (lookup1
!= EMPTY
)
3390 uint32_t index2
= ((wc
>> (t
->p
+ 5)) & ((1 << t
->q
) - 1))
3391 + (lookup1
<< t
->q
);
3392 uint32_t lookup2
= t
->level2
[index2
];
3393 if (lookup2
!= EMPTY
)
3395 uint32_t index3
= ((wc
>> 5) & ((1 << t
->p
) - 1))
3396 + (lookup2
<< t
->p
);
3397 uint32_t lookup3
= t
->level3
[index3
];
3398 uint32_t index4
= wc
& 0x1f;
3400 return (lookup3
>> index4
) & 1;
3407 /* Add one entry. */
3409 wctype_table_add (struct wctype_table
*t
, uint32_t wc
)
3411 uint32_t index1
= wc
>> (t
->q
+ t
->p
+ 5);
3412 uint32_t index2
= (wc
>> (t
->p
+ 5)) & ((1 << t
->q
) - 1);
3413 uint32_t index3
= (wc
>> 5) & ((1 << t
->p
) - 1);
3414 uint32_t index4
= wc
& 0x1f;
3417 if (index1
>= t
->level1_size
)
3419 if (index1
>= t
->level1_alloc
)
3421 size_t alloc
= 2 * t
->level1_alloc
;
3422 if (alloc
<= index1
)
3424 t
->level1
= (uint32_t *) xrealloc ((char *) t
->level1
,
3425 alloc
* sizeof (uint32_t));
3426 t
->level1_alloc
= alloc
;
3428 while (index1
>= t
->level1_size
)
3429 t
->level1
[t
->level1_size
++] = EMPTY
;
3432 if (t
->level1
[index1
] == EMPTY
)
3434 if (t
->level2_size
== t
->level2_alloc
)
3436 size_t alloc
= 2 * t
->level2_alloc
+ 1;
3437 t
->level2
= (uint32_t *) xrealloc ((char *) t
->level2
,
3438 (alloc
<< t
->q
) * sizeof (uint32_t));
3439 t
->level2_alloc
= alloc
;
3441 i1
= t
->level2_size
<< t
->q
;
3442 i2
= (t
->level2_size
+ 1) << t
->q
;
3443 for (i
= i1
; i
< i2
; i
++)
3444 t
->level2
[i
] = EMPTY
;
3445 t
->level1
[index1
] = t
->level2_size
++;
3448 index2
+= t
->level1
[index1
] << t
->q
;
3450 if (t
->level2
[index2
] == EMPTY
)
3452 if (t
->level3_size
== t
->level3_alloc
)
3454 size_t alloc
= 2 * t
->level3_alloc
+ 1;
3455 t
->level3
= (uint32_t *) xrealloc ((char *) t
->level3
,
3456 (alloc
<< t
->p
) * sizeof (uint32_t));
3457 t
->level3_alloc
= alloc
;
3459 i1
= t
->level3_size
<< t
->p
;
3460 i2
= (t
->level3_size
+ 1) << t
->p
;
3461 for (i
= i1
; i
< i2
; i
++)
3463 t
->level2
[index2
] = t
->level3_size
++;
3466 index3
+= t
->level2
[index2
] << t
->p
;
3468 t
->level3
[index3
] |= (uint32_t)1 << index4
;
3471 /* Finalize and shrink. */
3473 add_locale_wctype_table (struct locale_file
*file
, struct wctype_table
*t
)
3476 uint32_t reorder3
[t
->level3_size
];
3477 uint32_t reorder2
[t
->level2_size
];
3478 uint32_t level2_offset
, level3_offset
;
3480 /* Uniquify level3 blocks. */
3482 for (j
= 0; j
< t
->level3_size
; j
++)
3484 for (i
= 0; i
< k
; i
++)
3485 if (memcmp (&t
->level3
[i
<< t
->p
], &t
->level3
[j
<< t
->p
],
3486 (1 << t
->p
) * sizeof (uint32_t)) == 0)
3488 /* Relocate block j to block i. */
3493 memcpy (&t
->level3
[i
<< t
->p
], &t
->level3
[j
<< t
->p
],
3494 (1 << t
->p
) * sizeof (uint32_t));
3500 for (i
= 0; i
< (t
->level2_size
<< t
->q
); i
++)
3501 if (t
->level2
[i
] != EMPTY
)
3502 t
->level2
[i
] = reorder3
[t
->level2
[i
]];
3504 /* Uniquify level2 blocks. */
3506 for (j
= 0; j
< t
->level2_size
; j
++)
3508 for (i
= 0; i
< k
; i
++)
3509 if (memcmp (&t
->level2
[i
<< t
->q
], &t
->level2
[j
<< t
->q
],
3510 (1 << t
->q
) * sizeof (uint32_t)) == 0)
3512 /* Relocate block j to block i. */
3517 memcpy (&t
->level2
[i
<< t
->q
], &t
->level2
[j
<< t
->q
],
3518 (1 << t
->q
) * sizeof (uint32_t));
3524 for (i
= 0; i
< t
->level1_size
; i
++)
3525 if (t
->level1
[i
] != EMPTY
)
3526 t
->level1
[i
] = reorder2
[t
->level1
[i
]];
3529 5 * sizeof (uint32_t)
3530 + t
->level1_size
* sizeof (uint32_t)
3531 + (t
->level2_size
<< t
->q
) * sizeof (uint32_t)
3532 + (t
->level3_size
<< t
->p
) * sizeof (uint32_t);
3535 5 * sizeof (uint32_t)
3536 + t
->level1_size
* sizeof (uint32_t);
3538 5 * sizeof (uint32_t)
3539 + t
->level1_size
* sizeof (uint32_t)
3540 + (t
->level2_size
<< t
->q
) * sizeof (uint32_t);
3542 start_locale_structure (file
);
3543 add_locale_uint32 (file
, t
->q
+ t
->p
+ 5);
3544 add_locale_uint32 (file
, t
->level1_size
);
3545 add_locale_uint32 (file
, t
->p
+ 5);
3546 add_locale_uint32 (file
, (1 << t
->q
) - 1);
3547 add_locale_uint32 (file
, (1 << t
->p
) - 1);
3549 for (i
= 0; i
< t
->level1_size
; i
++)
3552 t
->level1
[i
] == EMPTY
3554 : (t
->level1
[i
] << t
->q
) * sizeof (uint32_t) + level2_offset
);
3556 for (i
= 0; i
< (t
->level2_size
<< t
->q
); i
++)
3559 t
->level2
[i
] == EMPTY
3561 : (t
->level2
[i
] << t
->p
) * sizeof (uint32_t) + level3_offset
);
3563 add_locale_uint32_array (file
, t
->level3
, t
->level3_size
<< t
->p
);
3564 end_locale_structure (file
);
3566 if (t
->level1_alloc
> 0)
3568 if (t
->level2_alloc
> 0)
3570 if (t
->level3_alloc
> 0)
3574 /* Flattens the included transliterations into a translit list.
3575 Inserts them in the list at `cursor', and returns the new cursor. */
3576 static struct translit_t
**
3577 translit_flatten (struct locale_ctype_t
*ctype
,
3578 const struct charmap_t
*charmap
,
3579 struct translit_t
**cursor
)
3581 while (ctype
->translit_include
!= NULL
)
3583 const char *copy_locale
= ctype
->translit_include
->copy_locale
;
3584 const char *copy_repertoire
= ctype
->translit_include
->copy_repertoire
;
3585 struct localedef_t
*other
;
3587 /* Unchain the include statement. During the depth-first traversal
3588 we don't want to visit any locale more than once. */
3589 ctype
->translit_include
= ctype
->translit_include
->next
;
3591 other
= find_locale (LC_CTYPE
, copy_locale
, copy_repertoire
, charmap
);
3593 if (other
== NULL
|| other
->categories
[LC_CTYPE
].ctype
== NULL
)
3595 WITH_CUR_LOCALE (error (0, 0, _("\
3596 %s: transliteration data from locale `%s' not available"),
3597 "LC_CTYPE", copy_locale
));
3601 struct locale_ctype_t
*other_ctype
=
3602 other
->categories
[LC_CTYPE
].ctype
;
3604 cursor
= translit_flatten (other_ctype
, charmap
, cursor
);
3605 assert (other_ctype
->translit_include
== NULL
);
3607 if (other_ctype
->translit
!= NULL
)
3609 /* Insert the other_ctype->translit list at *cursor. */
3610 struct translit_t
*endp
= other_ctype
->translit
;
3611 while (endp
->next
!= NULL
)
3614 endp
->next
= *cursor
;
3615 *cursor
= other_ctype
->translit
;
3617 /* Avoid any risk of circular lists. */
3618 other_ctype
->translit
= NULL
;
3620 cursor
= &endp
->next
;
3623 if (ctype
->default_missing
== NULL
)
3624 ctype
->default_missing
= other_ctype
->default_missing
;
3632 allocate_arrays (struct locale_ctype_t
*ctype
, const struct charmap_t
*charmap
,
3633 struct repertoire_t
*repertoire
)
3641 /* You wonder about this amount of memory? This is only because some
3642 users do not manage to address the array with unsigned values or
3643 data types with range >= 256. '\200' would result in the array
3644 index -128. To help these poor people we duplicate the entries for
3645 128 up to 255 below the entry for \0. */
3646 ctype
->ctype_b
= (char_class_t
*) xcalloc (256 + 128, sizeof (char_class_t
));
3647 ctype
->ctype32_b
= (char_class32_t
*) xcalloc (256, sizeof (char_class32_t
));
3648 ctype
->class_b
= (uint32_t **)
3649 xmalloc (ctype
->nr_charclass
* sizeof (uint32_t *));
3650 ctype
->class_3level
= (struct wctype_table
*)
3651 xmalloc (ctype
->nr_charclass
* sizeof (struct wctype_table
));
3653 /* This is the array accessed using the multibyte string elements. */
3654 for (idx
= 0; idx
< 256; ++idx
)
3655 ctype
->ctype_b
[128 + idx
] = ctype
->class256_collection
[idx
];
3657 /* Mirror first 127 entries. We must take care that entry -1 is not
3658 mirrored because EOF == -1. */
3659 for (idx
= 0; idx
< 127; ++idx
)
3660 ctype
->ctype_b
[idx
] = ctype
->ctype_b
[256 + idx
];
3662 /* The 32 bit array contains all characters < 0x100. */
3663 for (idx
= 0; idx
< ctype
->class_collection_act
; ++idx
)
3664 if (ctype
->charnames
[idx
] < 0x100)
3665 ctype
->ctype32_b
[ctype
->charnames
[idx
]] = ctype
->class_collection
[idx
];
3667 for (nr
= 0; nr
< ctype
->nr_charclass
; nr
++)
3669 ctype
->class_b
[nr
] = (uint32_t *) xcalloc (256 / 32, sizeof (uint32_t));
3671 /* We only set CLASS_B for the bits in the ISO C classes, not
3672 the user defined classes. The number should not change but
3674 #define LAST_ISO_C_BIT 11
3675 if (nr
<= LAST_ISO_C_BIT
)
3676 for (idx
= 0; idx
< 256; ++idx
)
3677 if (ctype
->class256_collection
[idx
] & _ISbit (nr
))
3678 ctype
->class_b
[nr
][idx
>> 5] |= (uint32_t) 1 << (idx
& 0x1f);
3681 for (nr
= 0; nr
< ctype
->nr_charclass
; nr
++)
3683 struct wctype_table
*t
;
3685 t
= &ctype
->class_3level
[nr
];
3686 t
->p
= 4; /* or: 5 */
3687 t
->q
= 7; /* or: 6 */
3688 wctype_table_init (t
);
3690 for (idx
= 0; idx
< ctype
->class_collection_act
; ++idx
)
3691 if (ctype
->class_collection
[idx
] & _ISwbit (nr
))
3692 wctype_table_add (t
, ctype
->charnames
[idx
]);
3695 WITH_CUR_LOCALE (fprintf (stderr
, _("\
3696 %s: table for class \"%s\": %lu bytes\n"),
3697 "LC_CTYPE", ctype
->classnames
[nr
],
3698 (unsigned long int) t
->result_size
));
3701 /* Room for table of mappings. */
3702 ctype
->map_b
= (uint32_t **) xmalloc (2 * sizeof (uint32_t *));
3703 ctype
->map32_b
= (uint32_t **) xmalloc (ctype
->map_collection_nr
3704 * sizeof (uint32_t *));
3705 ctype
->map_3level
= (struct wctrans_table
*)
3706 xmalloc (ctype
->map_collection_nr
* sizeof (struct wctrans_table
));
3708 /* Fill in all mappings. */
3709 for (idx
= 0; idx
< 2; ++idx
)
3713 /* Allocate table. */
3714 ctype
->map_b
[idx
] = (uint32_t *)
3715 xmalloc ((256 + 128) * sizeof (uint32_t));
3717 /* Copy values from collection. */
3718 for (idx2
= 0; idx2
< 256; ++idx2
)
3719 ctype
->map_b
[idx
][128 + idx2
] = ctype
->map256_collection
[idx
][idx2
];
3721 /* Mirror first 127 entries. We must take care not to map entry
3722 -1 because EOF == -1. */
3723 for (idx2
= 0; idx2
< 127; ++idx2
)
3724 ctype
->map_b
[idx
][idx2
] = ctype
->map_b
[idx
][256 + idx2
];
3726 /* EOF must map to EOF. */
3727 ctype
->map_b
[idx
][127] = EOF
;
3730 for (idx
= 0; idx
< ctype
->map_collection_nr
; ++idx
)
3734 /* Allocate table. */
3735 ctype
->map32_b
[idx
] = (uint32_t *) xmalloc (256 * sizeof (uint32_t));
3737 /* Copy values from collection. Default is identity mapping. */
3738 for (idx2
= 0; idx2
< 256; ++idx2
)
3739 ctype
->map32_b
[idx
][idx2
] =
3740 (ctype
->map_collection
[idx
][idx2
] != 0
3741 ? ctype
->map_collection
[idx
][idx2
]
3745 for (nr
= 0; nr
< ctype
->map_collection_nr
; nr
++)
3747 struct wctrans_table
*t
;
3749 t
= &ctype
->map_3level
[nr
];
3752 wctrans_table_init (t
);
3754 for (idx
= 0; idx
< ctype
->map_collection_act
[nr
]; ++idx
)
3755 if (ctype
->map_collection
[nr
][idx
] != 0)
3756 wctrans_table_add (t
, ctype
->charnames
[idx
],
3757 ctype
->map_collection
[nr
][idx
]);
3760 WITH_CUR_LOCALE (fprintf (stderr
, _("\
3761 %s: table for map \"%s\": %lu bytes\n"),
3762 "LC_CTYPE", ctype
->mapnames
[nr
],
3763 (unsigned long int) t
->result_size
));
3766 /* Extra array for class and map names. */
3767 ctype
->class_name_ptr
= (uint32_t *) xmalloc (ctype
->nr_charclass
3768 * sizeof (uint32_t));
3769 ctype
->map_name_ptr
= (uint32_t *) xmalloc (ctype
->map_collection_nr
3770 * sizeof (uint32_t));
3772 ctype
->class_offset
= _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1
);
3773 ctype
->map_offset
= ctype
->class_offset
+ ctype
->nr_charclass
;
3775 /* Array for width information. Because the expected widths are very
3776 small (never larger than 2) we use only one single byte. This
3778 We put only printable characters in the table. wcwidth is specified
3779 to return -1 for non-printable characters. Doing the check here
3780 saves a run-time check.
3781 But we put L'\0' in the table. This again saves a run-time check. */
3783 struct wcwidth_table
*t
;
3788 wcwidth_table_init (t
);
3790 /* First set all the printable characters of the character set to
3791 the default width. */
3793 while (iterate_table (&charmap
->char_table
, &curs
, &key
, &len
, &vdata
) == 0)
3795 struct charseq
*data
= (struct charseq
*) vdata
;
3797 if (data
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
3798 data
->ucs4
= repertoire_find_value (ctype
->repertoire
,
3801 if (data
->ucs4
!= ILLEGAL_CHAR_VALUE
)
3803 uint32_t *class_bits
=
3804 find_idx (ctype
, &ctype
->class_collection
, NULL
,
3805 &ctype
->class_collection_act
, data
->ucs4
);
3807 if (class_bits
!= NULL
&& (*class_bits
& BITw (tok_print
)))
3808 wcwidth_table_add (t
, data
->ucs4
, charmap
->width_default
);
3812 /* Now add the explicitly specified widths. */
3813 if (charmap
->width_rules
!= NULL
)
3814 for (size_t cnt
= 0; cnt
< charmap
->nwidth_rules
; ++cnt
)
3816 unsigned char bytes
[charmap
->mb_cur_max
];
3817 int nbytes
= charmap
->width_rules
[cnt
].from
->nbytes
;
3819 /* We have the range of character for which the width is
3820 specified described using byte sequences of the multibyte
3821 charset. We have to convert this to UCS4 now. And we
3822 cannot simply convert the beginning and the end of the
3823 sequence, we have to iterate over the byte sequence and
3824 convert it for every single character. */
3825 memcpy (bytes
, charmap
->width_rules
[cnt
].from
->bytes
, nbytes
);
3827 while (nbytes
< charmap
->width_rules
[cnt
].to
->nbytes
3828 || memcmp (bytes
, charmap
->width_rules
[cnt
].to
->bytes
,
3831 /* Find the UCS value for `bytes'. */
3834 struct charseq
*seq
=
3835 charmap_find_symbol (charmap
, (char *) bytes
, nbytes
);
3838 wch
= ILLEGAL_CHAR_VALUE
;
3839 else if (seq
->ucs4
!= UNINITIALIZED_CHAR_VALUE
)
3842 wch
= repertoire_find_value (ctype
->repertoire
, seq
->name
,
3843 strlen (seq
->name
));
3845 if (wch
!= ILLEGAL_CHAR_VALUE
)
3847 /* Store the value. */
3848 uint32_t *class_bits
=
3849 find_idx (ctype
, &ctype
->class_collection
, NULL
,
3850 &ctype
->class_collection_act
, wch
);
3852 if (class_bits
!= NULL
&& (*class_bits
& BITw (tok_print
)))
3853 wcwidth_table_add (t
, wch
,
3854 charmap
->width_rules
[cnt
].width
);
3857 /* "Increment" the bytes sequence. */
3859 while (inner
>= 0 && bytes
[inner
] == 0xff)
3864 /* We have to extend the byte sequence. */
3865 if (nbytes
>= charmap
->width_rules
[cnt
].to
->nbytes
)
3869 memset (&bytes
[1], 0, nbytes
);
3875 while (++inner
< nbytes
)
3881 /* Set the width of L'\0' to 0. */
3882 wcwidth_table_add (t
, 0, 0);
3885 WITH_CUR_LOCALE (fprintf (stderr
, _("%s: table for width: %lu bytes\n"),
3886 "LC_CTYPE", (unsigned long int) t
->result_size
));
3889 /* Set MB_CUR_MAX. */
3890 ctype
->mb_cur_max
= charmap
->mb_cur_max
;
3892 /* Now determine the table for the transliteration information.
3894 XXX It is not yet clear to me whether it is worth implementing a
3895 complicated algorithm which uses a hash table to locate the entries.
3896 For now I'll use a simple array which can be searching using binary
3898 if (ctype
->translit_include
!= NULL
)
3899 /* Traverse the locales mentioned in the `include' statements in a
3900 depth-first way and fold in their transliteration information. */
3901 translit_flatten (ctype
, charmap
, &ctype
->translit
);
3903 if (ctype
->translit
!= NULL
)
3905 /* First count how many entries we have. This is the upper limit
3906 since some entries from the included files might be overwritten. */
3908 struct translit_t
*runp
= ctype
->translit
;
3909 struct translit_t
**sorted
;
3910 size_t from_len
, to_len
;
3912 while (runp
!= NULL
)
3918 /* Next we allocate an array large enough and fill in the values. */
3919 sorted
= (struct translit_t
**) alloca (number
3920 * sizeof (struct translit_t
**));
3921 runp
= ctype
->translit
;
3925 /* Search for the place where to insert this string.
3926 XXX Better use a real sorting algorithm later. */
3930 while (idx
< number
)
3932 int res
= wcscmp ((const wchar_t *) sorted
[idx
]->from
,
3933 (const wchar_t *) runp
->from
);
3948 memmove (&sorted
[idx
+ 1], &sorted
[idx
],
3949 (number
- idx
) * sizeof (struct translit_t
*));
3956 while (runp
!= NULL
);
3958 /* The next step is putting all the possible transliteration
3959 strings in one memory block so that we can write it out.
3960 We need several different blocks:
3961 - index to the from-string array
3963 - index to the to-string array
3966 from_len
= to_len
= 0;
3967 for (size_t cnt
= 0; cnt
< number
; ++cnt
)
3969 struct translit_to_t
*srunp
;
3970 from_len
+= wcslen ((const wchar_t *) sorted
[cnt
]->from
) + 1;
3971 srunp
= sorted
[cnt
]->to
;
3972 while (srunp
!= NULL
)
3974 to_len
+= wcslen ((const wchar_t *) srunp
->str
) + 1;
3975 srunp
= srunp
->next
;
3977 /* Plus one for the extra NUL character marking the end of
3978 the list for the current entry. */
3982 /* We can allocate the arrays for the results. */
3983 ctype
->translit_from_idx
= xmalloc (number
* sizeof (uint32_t));
3984 ctype
->translit_from_tbl
= xmalloc (from_len
* sizeof (uint32_t));
3985 ctype
->translit_to_idx
= xmalloc (number
* sizeof (uint32_t));
3986 ctype
->translit_to_tbl
= xmalloc (to_len
* sizeof (uint32_t));
3990 for (size_t cnt
= 0; cnt
< number
; ++cnt
)
3993 struct translit_to_t
*srunp
;
3995 ctype
->translit_from_idx
[cnt
] = from_len
;
3996 ctype
->translit_to_idx
[cnt
] = to_len
;
3998 len
= wcslen ((const wchar_t *) sorted
[cnt
]->from
) + 1;
3999 wmemcpy ((wchar_t *) &ctype
->translit_from_tbl
[from_len
],
4000 (const wchar_t *) sorted
[cnt
]->from
, len
);
4003 ctype
->translit_to_idx
[cnt
] = to_len
;
4004 srunp
= sorted
[cnt
]->to
;
4005 while (srunp
!= NULL
)
4007 len
= wcslen ((const wchar_t *) srunp
->str
) + 1;
4008 wmemcpy ((wchar_t *) &ctype
->translit_to_tbl
[to_len
],
4009 (const wchar_t *) srunp
->str
, len
);
4011 srunp
= srunp
->next
;
4013 ctype
->translit_to_tbl
[to_len
++] = L
'\0';
4016 /* Store the information about the length. */
4017 ctype
->translit_idx_size
= number
;
4018 ctype
->translit_from_tbl_size
= from_len
* sizeof (uint32_t);
4019 ctype
->translit_to_tbl_size
= to_len
* sizeof (uint32_t);
4023 ctype
->translit_from_idx
= no_str
;
4024 ctype
->translit_from_tbl
= no_str
;
4025 ctype
->translit_to_tbl
= no_str
;
4026 ctype
->translit_idx_size
= 0;
4027 ctype
->translit_from_tbl_size
= 0;
4028 ctype
->translit_to_tbl_size
= 0;