1 /* Copyright (C) 1995-2017 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, see <http://www.gnu.org/licenses/>. */
35 #include "localedef.h"
37 #include "localeinfo.h"
39 #include "linereader.h"
40 #include "locfile-token.h"
46 /* The bit used for representing a special class. */
47 #define BITPOS(class) ((class) - tok_upper)
48 #define BIT(class) (_ISbit (BITPOS (class)))
49 #define BITw(class) (_ISwbit (BITPOS (class)))
51 #define ELEM(ctype, collection, idx, value) \
52 *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \
53 &ctype->collection##_act idx, value)
56 /* To be compatible with former implementations we for now restrict
57 the number of bits for character classes to 16. When compatibility
58 is not necessary anymore increase the number to 32. */
59 #define char_class_t uint16_t
60 #define char_class32_t uint32_t
63 /* Type to describe a transliteration action. We have a possibly
64 multiple character from-string and a set of multiple character
65 to-strings. All are 32bit values since this is what is used in
66 the gconv functions. */
71 struct translit_to_t
*next
;
81 struct translit_to_t
*to
;
83 struct translit_t
*next
;
86 struct translit_ignore_t
95 struct translit_ignore_t
*next
;
99 /* Type to describe a transliteration include statement. */
100 struct translit_include_t
102 const char *copy_locale
;
103 const char *copy_repertoire
;
105 struct translit_include_t
*next
;
108 /* Provide some dummy pointer for empty string. */
109 static uint32_t no_str
[] = { 0 };
112 /* Sparse table of uint32_t. */
113 #define TABLE idx_table
114 #define ELEMENT uint32_t
115 #define DEFAULT ((uint32_t) ~0)
116 #define NO_ADD_LOCALE
119 #define TABLE wcwidth_table
120 #define ELEMENT uint8_t
124 #define TABLE wctrans_table
125 #define ELEMENT int32_t
127 #define wctrans_table_add wctrans_table_add_internal
129 #undef wctrans_table_add
130 /* The wctrans_table must actually store the difference between the
131 desired result and the argument. */
133 wctrans_table_add (struct wctrans_table
*t
, uint32_t wc
, uint32_t mapped_wc
)
135 wctrans_table_add_internal (t
, wc
, mapped_wc
- wc
);
138 /* Construction of sparse 3-level tables.
139 See wchar-lookup.h for their structure and the meaning of p and q. */
146 /* Working representation. */
159 static void add_locale_wctype_table (struct locale_file
*file
,
160 struct wctype_table
*t
);
162 /* The real definition of the struct for the LC_CTYPE locale. */
163 struct locale_ctype_t
166 size_t charnames_max
;
167 size_t charnames_act
;
168 /* An index lookup table, to speedup find_idx. */
169 struct idx_table charnames_idx
;
171 struct repertoire_t
*repertoire
;
173 /* We will allow up to 8 * sizeof (uint32_t) character classes. */
174 #define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
176 const char *classnames
[MAX_NR_CHARCLASS
];
177 uint32_t last_class_char
;
178 uint32_t class256_collection
[256];
179 uint32_t *class_collection
;
180 size_t class_collection_max
;
181 size_t class_collection_act
;
183 uint32_t class_offset
;
185 struct charseq
**mbdigits
;
192 struct charseq
*mboutdigits
[10];
193 uint32_t wcoutdigits
[10];
194 size_t outdigits_act
;
196 /* If the following number ever turns out to be too small simply
197 increase it. But I doubt it will. --drepper@gnu */
198 #define MAX_NR_CHARMAP 16
199 const char *mapnames
[MAX_NR_CHARMAP
];
200 uint32_t *map_collection
[MAX_NR_CHARMAP
];
201 uint32_t map256_collection
[2][256];
202 size_t map_collection_max
[MAX_NR_CHARMAP
];
203 size_t map_collection_act
[MAX_NR_CHARMAP
];
204 size_t map_collection_nr
;
206 int tomap_done
[MAX_NR_CHARMAP
];
209 /* Transliteration information. */
210 struct translit_include_t
*translit_include
;
211 struct translit_t
*translit
;
212 struct translit_ignore_t
*translit_ignore
;
213 uint32_t ntranslit_ignore
;
215 uint32_t *default_missing
;
216 const char *default_missing_file
;
217 size_t default_missing_lineno
;
219 uint32_t to_nonascii
;
220 uint32_t nonascii_case
;
222 /* The arrays for the binary representation. */
223 char_class_t
*ctype_b
;
224 char_class32_t
*ctype32_b
;
228 struct wctype_table
*class_3level
;
229 struct wctrans_table
*map_3level
;
230 uint32_t *class_name_ptr
;
231 uint32_t *map_name_ptr
;
232 struct wcwidth_table width
;
234 const char *codeset_name
;
235 uint32_t *translit_from_idx
;
236 uint32_t *translit_from_tbl
;
237 uint32_t *translit_to_idx
;
238 uint32_t *translit_to_tbl
;
239 uint32_t translit_idx_size
;
240 size_t translit_from_tbl_size
;
241 size_t translit_to_tbl_size
;
243 struct obstack mempool
;
247 /* Marker for an empty slot. This has the value 0xFFFFFFFF, regardless
248 whether 'int' is 16 bit, 32 bit, or 64 bit. */
249 #define EMPTY ((uint32_t) ~0)
252 #define obstack_chunk_alloc xmalloc
253 #define obstack_chunk_free free
256 /* Prototypes for local functions. */
257 static void ctype_startup (struct linereader
*lr
, struct localedef_t
*locale
,
258 const struct charmap_t
*charmap
,
259 struct localedef_t
*copy_locale
,
261 static void ctype_class_new (struct linereader
*lr
,
262 struct locale_ctype_t
*ctype
, const char *name
);
263 static void ctype_map_new (struct linereader
*lr
,
264 struct locale_ctype_t
*ctype
,
265 const char *name
, const struct charmap_t
*charmap
);
266 static uint32_t *find_idx (struct locale_ctype_t
*ctype
, uint32_t **table
,
267 size_t *max
, size_t *act
, uint32_t idx
);
268 static void set_class_defaults (struct locale_ctype_t
*ctype
,
269 const struct charmap_t
*charmap
,
270 struct repertoire_t
*repertoire
);
271 static void allocate_arrays (struct locale_ctype_t
*ctype
,
272 const struct charmap_t
*charmap
,
273 struct repertoire_t
*repertoire
);
276 static const char *longnames
[] =
278 "zero", "one", "two", "three", "four",
279 "five", "six", "seven", "eight", "nine"
281 static const char *uninames
[] =
283 "U00000030", "U00000031", "U00000032", "U00000033", "U00000034",
284 "U00000035", "U00000036", "U00000037", "U00000038", "U00000039"
286 static const unsigned char digits
[] = "0123456789";
290 ctype_startup (struct linereader
*lr
, struct localedef_t
*locale
,
291 const struct charmap_t
*charmap
,
292 struct localedef_t
*copy_locale
, int ignore_content
)
295 struct locale_ctype_t
*ctype
;
297 if (!ignore_content
&& locale
->categories
[LC_CTYPE
].ctype
== NULL
)
299 if (copy_locale
== NULL
)
301 /* Allocate the needed room. */
302 locale
->categories
[LC_CTYPE
].ctype
= ctype
=
303 (struct locale_ctype_t
*) xcalloc (1,
304 sizeof (struct locale_ctype_t
));
306 /* We have seen no names yet. */
307 ctype
->charnames_max
= charmap
->mb_cur_max
== 1 ? 256 : 512;
308 ctype
->charnames
= (uint32_t *) xmalloc (ctype
->charnames_max
309 * sizeof (uint32_t));
310 for (cnt
= 0; cnt
< 256; ++cnt
)
311 ctype
->charnames
[cnt
] = cnt
;
312 ctype
->charnames_act
= 256;
313 idx_table_init (&ctype
->charnames_idx
);
315 /* Fill character class information. */
316 ctype
->last_class_char
= ILLEGAL_CHAR_VALUE
;
317 /* The order of the following instructions determines the bit
319 ctype_class_new (lr
, ctype
, "upper");
320 ctype_class_new (lr
, ctype
, "lower");
321 ctype_class_new (lr
, ctype
, "alpha");
322 ctype_class_new (lr
, ctype
, "digit");
323 ctype_class_new (lr
, ctype
, "xdigit");
324 ctype_class_new (lr
, ctype
, "space");
325 ctype_class_new (lr
, ctype
, "print");
326 ctype_class_new (lr
, ctype
, "graph");
327 ctype_class_new (lr
, ctype
, "blank");
328 ctype_class_new (lr
, ctype
, "cntrl");
329 ctype_class_new (lr
, ctype
, "punct");
330 ctype_class_new (lr
, ctype
, "alnum");
332 ctype
->class_collection_max
= charmap
->mb_cur_max
== 1 ? 256 : 512;
333 ctype
->class_collection
334 = (uint32_t *) xcalloc (sizeof (unsigned long int),
335 ctype
->class_collection_max
);
336 ctype
->class_collection_act
= 256;
338 /* Fill character map information. */
339 ctype
->last_map_idx
= MAX_NR_CHARMAP
;
340 ctype_map_new (lr
, ctype
, "toupper", charmap
);
341 ctype_map_new (lr
, ctype
, "tolower", charmap
);
343 /* Fill first 256 entries in `toXXX' arrays. */
344 for (cnt
= 0; cnt
< 256; ++cnt
)
346 ctype
->map_collection
[0][cnt
] = cnt
;
347 ctype
->map_collection
[1][cnt
] = cnt
;
349 ctype
->map256_collection
[0][cnt
] = cnt
;
350 ctype
->map256_collection
[1][cnt
] = cnt
;
353 if (enc_not_ascii_compatible
)
354 ctype
->to_nonascii
= 1;
356 obstack_init (&ctype
->mempool
);
359 ctype
= locale
->categories
[LC_CTYPE
].ctype
=
360 copy_locale
->categories
[LC_CTYPE
].ctype
;
366 ctype_finish (struct localedef_t
*locale
, const struct charmap_t
*charmap
)
368 /* See POSIX.2, table 2-6 for the meaning of the following table. */
373 const char allow
[NCLASS
];
375 valid_table
[NCLASS
] =
377 /* The order is important. See token.h for more information.
378 M = Always, D = Default, - = Permitted, X = Mutually exclusive */
379 { "upper", "--MX-XDDXXX-" },
380 { "lower", "--MX-XDDXXX-" },
381 { "alpha", "---X-XDDXXX-" },
382 { "digit", "XXX--XDDXXX-" },
383 { "xdigit", "-----XDDXXX-" },
384 { "space", "XXXXX------X" },
385 { "print", "---------X--" },
386 { "graph", "---------X--" },
387 { "blank", "XXXXXM-----X" },
388 { "cntrl", "XXXXX-XX--XX" },
389 { "punct", "XXXXX-DD-X-X" },
390 { "alnum", "-----XDDXXX-" }
394 uint32_t space_value
;
395 struct charseq
*space_seq
;
396 struct locale_ctype_t
*ctype
= locale
->categories
[LC_CTYPE
].ctype
;
403 /* Now resolve copying and also handle completely missing definitions. */
406 const char *repertoire_name
;
408 /* First see whether we were supposed to copy. If yes, find the
409 actual definition. */
410 if (locale
->copy_name
[LC_CTYPE
] != NULL
)
412 /* Find the copying locale. This has to happen transitively since
413 the locale we are copying from might also copying another one. */
414 struct localedef_t
*from
= locale
;
417 from
= find_locale (LC_CTYPE
, from
->copy_name
[LC_CTYPE
],
418 from
->repertoire_name
, charmap
);
419 while (from
->categories
[LC_CTYPE
].ctype
== NULL
420 && from
->copy_name
[LC_CTYPE
] != NULL
);
422 ctype
= locale
->categories
[LC_CTYPE
].ctype
423 = from
->categories
[LC_CTYPE
].ctype
;
426 /* If there is still no definition issue an warning and create an
431 No definition for %s category found"), "LC_CTYPE");
432 ctype_startup (NULL
, locale
, charmap
, NULL
, 0);
433 ctype
= locale
->categories
[LC_CTYPE
].ctype
;
436 /* Get the repertoire we have to use. */
437 repertoire_name
= locale
->repertoire_name
?: repertoire_global
;
438 if (repertoire_name
!= NULL
)
439 ctype
->repertoire
= repertoire_read (repertoire_name
);
442 /* We need the name of the currently used 8-bit character set to
443 make correct conversion between this 8-bit representation and the
444 ISO 10646 character set used internally for wide characters. */
445 ctype
->codeset_name
= charmap
->code_set_name
;
446 if (ctype
->codeset_name
== NULL
)
448 record_error (0, 0, _("\
449 No character set name specified in charmap"));
450 ctype
->codeset_name
= "//UNKNOWN//";
453 /* Set default value for classes not specified. */
454 set_class_defaults (ctype
, charmap
, ctype
->repertoire
);
456 /* Check according to table. */
457 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
459 uint32_t tmp
= ctype
->class_collection
[cnt
];
463 for (cls1
= 0; cls1
< NCLASS
; ++cls1
)
464 if ((tmp
& _ISwbit (cls1
)) != 0)
465 for (cls2
= 0; cls2
< NCLASS
; ++cls2
)
466 if (valid_table
[cls1
].allow
[cls2
] != '-')
468 int eq
= (tmp
& _ISwbit (cls2
)) != 0;
469 switch (valid_table
[cls1
].allow
[cls2
])
474 uint32_t value
= ctype
->charnames
[cnt
];
476 record_error (0, 0, _("\
477 character L'\\u%0*x' in class `%s' must be in class `%s'"),
478 value
> 0xffff ? 8 : 4,
480 valid_table
[cls1
].name
,
481 valid_table
[cls2
].name
);
488 uint32_t value
= ctype
->charnames
[cnt
];
490 record_error (0, 0, _("\
491 character L'\\u%0*x' in class `%s' must not be in class `%s'"),
492 value
> 0xffff ? 8 : 4,
494 valid_table
[cls1
].name
,
495 valid_table
[cls2
].name
);
500 ctype
->class_collection
[cnt
] |= _ISwbit (cls2
);
504 record_error (5, 0, _("\
505 internal error in %s, line %u"), __FUNCTION__
, __LINE__
);
511 for (cnt
= 0; cnt
< 256; ++cnt
)
513 uint32_t tmp
= ctype
->class256_collection
[cnt
];
517 for (cls1
= 0; cls1
< NCLASS
; ++cls1
)
518 if ((tmp
& _ISbit (cls1
)) != 0)
519 for (cls2
= 0; cls2
< NCLASS
; ++cls2
)
520 if (valid_table
[cls1
].allow
[cls2
] != '-')
522 int eq
= (tmp
& _ISbit (cls2
)) != 0;
523 switch (valid_table
[cls1
].allow
[cls2
])
530 snprintf (buf
, sizeof buf
, "\\%Zo", cnt
);
532 record_error (0, 0, _("\
533 character '%s' in class `%s' must be in class `%s'"),
535 valid_table
[cls1
].name
,
536 valid_table
[cls2
].name
);
545 snprintf (buf
, sizeof buf
, "\\%Zo", cnt
);
547 record_error (0, 0, _("\
548 character '%s' in class `%s' must not be in class `%s'"),
550 valid_table
[cls1
].name
,
551 valid_table
[cls2
].name
);
556 ctype
->class256_collection
[cnt
] |= _ISbit (cls2
);
560 record_error (5, 0, _("\
561 internal error in %s, line %u"), __FUNCTION__
, __LINE__
);
567 /* ... and now test <SP> as a special case. */
569 if (((cnt
= BITPOS (tok_space
),
570 (ELEM (ctype
, class_collection
, , space_value
)
571 & BITw (tok_space
)) == 0)
572 || (cnt
= BITPOS (tok_blank
),
573 (ELEM (ctype
, class_collection
, , space_value
)
574 & BITw (tok_blank
)) == 0)))
576 record_error (0, 0, _("<SP> character not in class `%s'"),
577 valid_table
[cnt
].name
);
579 else if (((cnt
= BITPOS (tok_punct
),
580 (ELEM (ctype
, class_collection
, , space_value
)
581 & BITw (tok_punct
)) != 0)
582 || (cnt
= BITPOS (tok_graph
),
583 (ELEM (ctype
, class_collection
, , space_value
)
587 record_error (0, 0, _("\
588 <SP> character must not be in class `%s'"),
589 valid_table
[cnt
].name
);
592 ELEM (ctype
, class_collection
, , space_value
) |= BITw (tok_print
);
594 space_seq
= charmap_find_value (charmap
, "SP", 2);
595 if (space_seq
== NULL
)
596 space_seq
= charmap_find_value (charmap
, "space", 5);
597 if (space_seq
== NULL
)
598 space_seq
= charmap_find_value (charmap
, "U00000020", 9);
599 if (space_seq
== NULL
|| space_seq
->nbytes
!= 1)
601 record_error (0, 0, _("\
602 character <SP> not defined in character map"));
604 else if (((cnt
= BITPOS (tok_space
),
605 (ctype
->class256_collection
[space_seq
->bytes
[0]]
606 & BIT (tok_space
)) == 0)
607 || (cnt
= BITPOS (tok_blank
),
608 (ctype
->class256_collection
[space_seq
->bytes
[0]]
609 & BIT (tok_blank
)) == 0)))
611 record_error (0, 0, _("<SP> character not in class `%s'"),
612 valid_table
[cnt
].name
);
614 else if (((cnt
= BITPOS (tok_punct
),
615 (ctype
->class256_collection
[space_seq
->bytes
[0]]
616 & BIT (tok_punct
)) != 0)
617 || (cnt
= BITPOS (tok_graph
),
618 (ctype
->class256_collection
[space_seq
->bytes
[0]]
619 & BIT (tok_graph
)) != 0)))
621 record_error (0, 0, _("\
622 <SP> character must not be in class `%s'"),
623 valid_table
[cnt
].name
);
626 ctype
->class256_collection
[space_seq
->bytes
[0]] |= BIT (tok_print
);
628 /* Check whether all single-byte characters make to their upper/lowercase
629 equivalent according to the ASCII rules. */
630 for (cnt
= 'A'; cnt
<= 'Z'; ++cnt
)
632 uint32_t uppval
= ctype
->map256_collection
[0][cnt
];
633 uint32_t lowval
= ctype
->map256_collection
[1][cnt
];
634 uint32_t lowuppval
= ctype
->map256_collection
[0][lowval
];
635 uint32_t lowlowval
= ctype
->map256_collection
[1][lowval
];
638 || lowval
!= cnt
+ 0x20
640 || lowlowval
!= cnt
+ 0x20)
641 ctype
->nonascii_case
= 1;
643 for (cnt
= 0; cnt
< 256; ++cnt
)
644 if (cnt
< 'A' || (cnt
> 'Z' && cnt
< 'a') || cnt
> 'z')
645 if (ctype
->map256_collection
[0][cnt
] != cnt
646 || ctype
->map256_collection
[1][cnt
] != cnt
)
647 ctype
->nonascii_case
= 1;
649 /* Now that the tests are done make sure the name array contains all
650 characters which are handled in the WIDTH section of the
651 character set definition file. */
652 if (charmap
->width_rules
!= NULL
)
653 for (cnt
= 0; cnt
< charmap
->nwidth_rules
; ++cnt
)
655 unsigned char bytes
[charmap
->mb_cur_max
];
656 int nbytes
= charmap
->width_rules
[cnt
].from
->nbytes
;
658 /* We have the range of character for which the width is
659 specified described using byte sequences of the multibyte
660 charset. We have to convert this to UCS4 now. And we
661 cannot simply convert the beginning and the end of the
662 sequence, we have to iterate over the byte sequence and
663 convert it for every single character. */
664 memcpy (bytes
, charmap
->width_rules
[cnt
].from
->bytes
, nbytes
);
666 while (nbytes
< charmap
->width_rules
[cnt
].to
->nbytes
667 || memcmp (bytes
, charmap
->width_rules
[cnt
].to
->bytes
,
670 /* Find the UCS value for `bytes'. */
674 = charmap_find_symbol (charmap
, (char *) bytes
, nbytes
);
677 wch
= ILLEGAL_CHAR_VALUE
;
678 else if (seq
->ucs4
!= UNINITIALIZED_CHAR_VALUE
)
681 wch
= repertoire_find_value (ctype
->repertoire
, seq
->name
,
684 if (wch
!= ILLEGAL_CHAR_VALUE
)
685 /* We are only interested in the side-effects of the
686 `find_idx' call. It will add appropriate entries in
687 the name array if this is necessary. */
688 (void) find_idx (ctype
, NULL
, NULL
, NULL
, wch
);
690 /* "Increment" the bytes sequence. */
692 while (inner
>= 0 && bytes
[inner
] == 0xff)
697 /* We have to extend the byte sequence. */
698 if (nbytes
>= charmap
->width_rules
[cnt
].to
->nbytes
)
702 memset (&bytes
[1], 0, nbytes
);
708 while (++inner
< nbytes
)
714 /* Now set all the other characters of the character set to the
717 while (iterate_table (&charmap
->char_table
, &curs
, &key
, &len
, &vdata
) == 0)
719 struct charseq
*data
= (struct charseq
*) vdata
;
721 if (data
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
722 data
->ucs4
= repertoire_find_value (ctype
->repertoire
,
725 if (data
->ucs4
!= ILLEGAL_CHAR_VALUE
)
726 (void) find_idx (ctype
, NULL
, NULL
, NULL
, data
->ucs4
);
729 /* There must be a multiple of 10 digits. */
730 if (ctype
->mbdigits_act
% 10 != 0)
732 assert (ctype
->mbdigits_act
== ctype
->wcdigits_act
);
733 ctype
->wcdigits_act
-= ctype
->mbdigits_act
% 10;
734 ctype
->mbdigits_act
-= ctype
->mbdigits_act
% 10;
735 record_error (0, 0, _("\
736 `digit' category has not entries in groups of ten"));
739 /* Check the input digits. There must be a multiple of ten available.
740 In each group it could be that one or the other character is missing.
741 In this case the whole group must be removed. */
743 while (cnt
< ctype
->mbdigits_act
)
746 for (inner
= 0; inner
< 10; ++inner
)
747 if (ctype
->mbdigits
[cnt
+ inner
] == NULL
)
754 /* Remove the group. */
755 memmove (&ctype
->mbdigits
[cnt
], &ctype
->mbdigits
[cnt
+ 10],
756 ((ctype
->wcdigits_act
- cnt
- 10)
757 * sizeof (ctype
->mbdigits
[0])));
758 ctype
->mbdigits_act
-= 10;
762 /* If no input digits are given use the default. */
763 if (ctype
->mbdigits_act
== 0)
765 if (ctype
->mbdigits_max
== 0)
767 ctype
->mbdigits
= obstack_alloc (&((struct charmap_t
*) charmap
)->mem_pool
,
768 10 * sizeof (struct charseq
*));
769 ctype
->mbdigits_max
= 10;
772 for (cnt
= 0; cnt
< 10; ++cnt
)
774 ctype
->mbdigits
[cnt
] = charmap_find_symbol (charmap
,
775 (char *) digits
+ cnt
, 1);
776 if (ctype
->mbdigits
[cnt
] == NULL
)
778 ctype
->mbdigits
[cnt
] = charmap_find_symbol (charmap
,
780 strlen (longnames
[cnt
]));
781 if (ctype
->mbdigits
[cnt
] == NULL
)
783 /* Hum, this ain't good. */
784 record_error (0, 0, _("\
785 no input digits defined and none of the standard names in the charmap"));
787 ctype
->mbdigits
[cnt
] = obstack_alloc (&((struct charmap_t
*) charmap
)->mem_pool
,
788 sizeof (struct charseq
) + 1);
790 /* This is better than nothing. */
791 ctype
->mbdigits
[cnt
]->bytes
[0] = digits
[cnt
];
792 ctype
->mbdigits
[cnt
]->nbytes
= 1;
797 ctype
->mbdigits_act
= 10;
800 /* Check the wide character input digits. There must be a multiple
801 of ten available. In each group it could be that one or the other
802 character is missing. In this case the whole group must be
805 while (cnt
< ctype
->wcdigits_act
)
808 for (inner
= 0; inner
< 10; ++inner
)
809 if (ctype
->wcdigits
[cnt
+ inner
] == ILLEGAL_CHAR_VALUE
)
816 /* Remove the group. */
817 memmove (&ctype
->wcdigits
[cnt
], &ctype
->wcdigits
[cnt
+ 10],
818 ((ctype
->wcdigits_act
- cnt
- 10)
819 * sizeof (ctype
->wcdigits
[0])));
820 ctype
->wcdigits_act
-= 10;
824 /* If no input digits are given use the default. */
825 if (ctype
->wcdigits_act
== 0)
827 if (ctype
->wcdigits_max
== 0)
829 ctype
->wcdigits
= obstack_alloc (&((struct charmap_t
*) charmap
)->mem_pool
,
830 10 * sizeof (uint32_t));
831 ctype
->wcdigits_max
= 10;
834 for (cnt
= 0; cnt
< 10; ++cnt
)
835 ctype
->wcdigits
[cnt
] = L
'0' + cnt
;
837 ctype
->mbdigits_act
= 10;
840 /* Check the outdigits. */
842 for (cnt
= 0; cnt
< 10; ++cnt
)
843 if (ctype
->mboutdigits
[cnt
] == NULL
)
845 static struct charseq replace
[2];
849 record_error (0, 0, _("\
850 not all characters used in `outdigit' are available in the charmap"));
854 replace
[0].nbytes
= 1;
855 replace
[0].bytes
[0] = '?';
856 replace
[0].bytes
[1] = '\0';
857 ctype
->mboutdigits
[cnt
] = &replace
[0];
861 for (cnt
= 0; cnt
< 10; ++cnt
)
862 if (ctype
->wcoutdigits
[cnt
] == 0)
866 record_error (0, 0, _("\
867 not all characters used in `outdigit' are available in the repertoire"));
871 ctype
->wcoutdigits
[cnt
] = L
'?';
874 /* Sort the entries in the translit_ignore list. */
875 if (ctype
->translit_ignore
!= NULL
)
877 struct translit_ignore_t
*firstp
= ctype
->translit_ignore
;
878 struct translit_ignore_t
*runp
;
880 ctype
->ntranslit_ignore
= 1;
882 for (runp
= firstp
->next
; runp
!= NULL
; runp
= runp
->next
)
884 struct translit_ignore_t
*lastp
= NULL
;
885 struct translit_ignore_t
*cmpp
;
887 ++ctype
->ntranslit_ignore
;
889 for (cmpp
= firstp
; cmpp
!= NULL
; lastp
= cmpp
, cmpp
= cmpp
->next
)
890 if (runp
->from
< cmpp
->from
)
898 ctype
->translit_ignore
= firstp
;
904 ctype_output (struct localedef_t
*locale
, const struct charmap_t
*charmap
,
905 const char *output_path
)
907 struct locale_ctype_t
*ctype
= locale
->categories
[LC_CTYPE
].ctype
;
908 const size_t nelems
= (_NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1
)
909 + ctype
->nr_charclass
+ ctype
->map_collection_nr
);
910 struct locale_file file
;
911 uint32_t default_missing_len
;
914 /* Now prepare the output: Find the sizes of the table we can use. */
915 allocate_arrays (ctype
, charmap
, ctype
->repertoire
);
917 default_missing_len
= (ctype
->default_missing
918 ? wcslen ((wchar_t *) ctype
->default_missing
)
921 init_locale_data (&file
, nelems
);
922 for (elem
= 0; elem
< nelems
; ++elem
)
924 if (elem
< _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1
))
927 #define CTYPE_EMPTY(name) \
929 add_locale_empty (&file); \
932 CTYPE_EMPTY(_NL_CTYPE_GAP1
);
933 CTYPE_EMPTY(_NL_CTYPE_GAP2
);
934 CTYPE_EMPTY(_NL_CTYPE_GAP3
);
935 CTYPE_EMPTY(_NL_CTYPE_GAP4
);
936 CTYPE_EMPTY(_NL_CTYPE_GAP5
);
937 CTYPE_EMPTY(_NL_CTYPE_GAP6
);
939 #define CTYPE_RAW_DATA(name, base, size) \
940 case _NL_ITEM_INDEX (name): \
941 add_locale_raw_data (&file, base, size); \
944 CTYPE_RAW_DATA (_NL_CTYPE_CLASS
,
946 (256 + 128) * sizeof (char_class_t
));
948 #define CTYPE_UINT32_ARRAY(name, base, n_elems) \
949 case _NL_ITEM_INDEX (name): \
950 add_locale_uint32_array (&file, base, n_elems); \
953 CTYPE_UINT32_ARRAY (_NL_CTYPE_TOUPPER
, ctype
->map_b
[0], 256 + 128);
954 CTYPE_UINT32_ARRAY (_NL_CTYPE_TOLOWER
, ctype
->map_b
[1], 256 + 128);
955 CTYPE_UINT32_ARRAY (_NL_CTYPE_TOUPPER32
, ctype
->map32_b
[0], 256);
956 CTYPE_UINT32_ARRAY (_NL_CTYPE_TOLOWER32
, ctype
->map32_b
[1], 256);
957 CTYPE_RAW_DATA (_NL_CTYPE_CLASS32
,
959 256 * sizeof (char_class32_t
));
961 #define CTYPE_UINT32(name, value) \
962 case _NL_ITEM_INDEX (name): \
963 add_locale_uint32 (&file, value); \
966 CTYPE_UINT32 (_NL_CTYPE_CLASS_OFFSET
, ctype
->class_offset
);
967 CTYPE_UINT32 (_NL_CTYPE_MAP_OFFSET
, ctype
->map_offset
);
968 CTYPE_UINT32 (_NL_CTYPE_TRANSLIT_TAB_SIZE
, ctype
->translit_idx_size
);
970 CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_FROM_IDX
,
971 ctype
->translit_from_idx
,
972 ctype
->translit_idx_size
);
974 CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_FROM_TBL
,
975 ctype
->translit_from_tbl
,
976 ctype
->translit_from_tbl_size
977 / sizeof (uint32_t));
979 CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_TO_IDX
,
980 ctype
->translit_to_idx
,
981 ctype
->translit_idx_size
);
983 CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_TO_TBL
,
984 ctype
->translit_to_tbl
,
985 ctype
->translit_to_tbl_size
/ sizeof (uint32_t));
987 case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES
):
988 /* The class name array. */
989 start_locale_structure (&file
);
990 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
)
991 add_locale_string (&file
, ctype
->classnames
[cnt
]);
992 add_locale_char (&file
, 0);
993 align_locale_data (&file
, LOCFILE_ALIGN
);
994 end_locale_structure (&file
);
997 case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES
):
998 /* The class name array. */
999 start_locale_structure (&file
);
1000 for (cnt
= 0; cnt
< ctype
->map_collection_nr
; ++cnt
)
1001 add_locale_string (&file
, ctype
->mapnames
[cnt
]);
1002 add_locale_char (&file
, 0);
1003 align_locale_data (&file
, LOCFILE_ALIGN
);
1004 end_locale_structure (&file
);
1007 case _NL_ITEM_INDEX (_NL_CTYPE_WIDTH
):
1008 add_locale_wcwidth_table (&file
, &ctype
->width
);
1011 CTYPE_UINT32 (_NL_CTYPE_MB_CUR_MAX
, ctype
->mb_cur_max
);
1013 case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME
):
1014 add_locale_string (&file
, ctype
->codeset_name
);
1017 CTYPE_UINT32 (_NL_CTYPE_MAP_TO_NONASCII
, ctype
->to_nonascii
);
1019 CTYPE_UINT32 (_NL_CTYPE_NONASCII_CASE
, ctype
->nonascii_case
);
1021 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN
):
1022 add_locale_uint32 (&file
, ctype
->mbdigits_act
/ 10);
1025 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN
):
1026 add_locale_uint32 (&file
, ctype
->wcdigits_act
/ 10);
1029 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB
) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB
):
1030 start_locale_structure (&file
);
1031 for (cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB
);
1032 cnt
< ctype
->mbdigits_act
; cnt
+= 10)
1034 add_locale_raw_data (&file
, ctype
->mbdigits
[cnt
]->bytes
,
1035 ctype
->mbdigits
[cnt
]->nbytes
);
1036 add_locale_char (&file
, 0);
1038 end_locale_structure (&file
);
1041 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB
) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB
):
1042 start_locale_structure (&file
);
1043 cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB
);
1044 add_locale_raw_data (&file
, ctype
->mboutdigits
[cnt
]->bytes
,
1045 ctype
->mboutdigits
[cnt
]->nbytes
);
1046 add_locale_char (&file
, 0);
1047 end_locale_structure (&file
);
1050 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC
) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC
):
1051 start_locale_structure (&file
);
1052 for (cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC
);
1053 cnt
< ctype
->wcdigits_act
; cnt
+= 10)
1054 add_locale_uint32 (&file
, ctype
->wcdigits
[cnt
]);
1055 end_locale_structure (&file
);
1058 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC
) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC
):
1059 cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC
);
1060 add_locale_uint32 (&file
, ctype
->wcoutdigits
[cnt
]);
1063 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN
):
1064 add_locale_uint32 (&file
, default_missing_len
);
1067 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING
):
1068 add_locale_uint32_array (&file
, ctype
->default_missing
,
1069 default_missing_len
);
1072 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE_LEN
):
1073 add_locale_uint32 (&file
, ctype
->ntranslit_ignore
);
1076 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE
):
1077 start_locale_structure (&file
);
1079 struct translit_ignore_t
*runp
;
1080 for (runp
= ctype
->translit_ignore
; runp
!= NULL
;
1083 add_locale_uint32 (&file
, runp
->from
);
1084 add_locale_uint32 (&file
, runp
->to
);
1085 add_locale_uint32 (&file
, runp
->step
);
1088 end_locale_structure (&file
);
1092 assert (! "unknown CTYPE element");
1096 /* Handle extra maps. */
1097 size_t nr
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1
);
1098 if (nr
< ctype
->nr_charclass
)
1100 start_locale_prelude (&file
);
1101 add_locale_uint32_array (&file
, ctype
->class_b
[nr
], 256 / 32);
1102 end_locale_prelude (&file
);
1103 add_locale_wctype_table (&file
, &ctype
->class_3level
[nr
]);
1107 nr
-= ctype
->nr_charclass
;
1108 assert (nr
< ctype
->map_collection_nr
);
1109 add_locale_wctrans_table (&file
, &ctype
->map_3level
[nr
]);
1114 write_locale_data (output_path
, LC_CTYPE
, "LC_CTYPE", &file
);
1118 /* Local functions. */
1120 ctype_class_new (struct linereader
*lr
, struct locale_ctype_t
*ctype
,
1125 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
)
1126 if (strcmp (ctype
->classnames
[cnt
], name
) == 0)
1129 if (cnt
< ctype
->nr_charclass
)
1131 lr_error (lr
, _("character class `%s' already defined"), name
);
1135 if (ctype
->nr_charclass
== MAX_NR_CHARCLASS
)
1136 /* Exit code 2 is prescribed in P1003.2b. */
1137 record_error (2, 0, _("\
1138 implementation limit: no more than %Zd character classes allowed"),
1141 ctype
->classnames
[ctype
->nr_charclass
++] = name
;
1146 ctype_map_new (struct linereader
*lr
, struct locale_ctype_t
*ctype
,
1147 const char *name
, const struct charmap_t
*charmap
)
1149 size_t max_chars
= 0;
1152 for (cnt
= 0; cnt
< ctype
->map_collection_nr
; ++cnt
)
1154 if (strcmp (ctype
->mapnames
[cnt
], name
) == 0)
1157 if (max_chars
< ctype
->map_collection_max
[cnt
])
1158 max_chars
= ctype
->map_collection_max
[cnt
];
1161 if (cnt
< ctype
->map_collection_nr
)
1163 lr_error (lr
, _("character map `%s' already defined"), name
);
1167 if (ctype
->map_collection_nr
== MAX_NR_CHARMAP
)
1168 /* Exit code 2 is prescribed in P1003.2b. */
1169 record_error (2, 0, _("\
1170 implementation limit: no more than %d character maps allowed"),
1173 ctype
->mapnames
[cnt
] = name
;
1176 ctype
->map_collection_max
[cnt
] = charmap
->mb_cur_max
== 1 ? 256 : 512;
1178 ctype
->map_collection_max
[cnt
] = max_chars
;
1180 ctype
->map_collection
[cnt
] = (uint32_t *)
1181 xcalloc (sizeof (uint32_t), ctype
->map_collection_max
[cnt
]);
1182 ctype
->map_collection_act
[cnt
] = 256;
1184 ++ctype
->map_collection_nr
;
1188 /* We have to be prepared that TABLE, MAX, and ACT can be NULL. This
1189 is possible if we only want to extend the name array. */
1191 find_idx (struct locale_ctype_t
*ctype
, uint32_t **table
, size_t *max
,
1192 size_t *act
, uint32_t idx
)
1197 return table
== NULL
? NULL
: &(*table
)[idx
];
1199 /* Use the charnames_idx lookup table instead of the slow search loop. */
1201 cnt
= idx_table_get (&ctype
->charnames_idx
, idx
);
1204 cnt
= ctype
->charnames_act
;
1206 for (cnt
= 256; cnt
< ctype
->charnames_act
; ++cnt
)
1207 if (ctype
->charnames
[cnt
] == idx
)
1211 /* We have to distinguish two cases: the name is found or not. */
1212 if (cnt
== ctype
->charnames_act
)
1214 /* Extend the name array. */
1215 if (ctype
->charnames_act
== ctype
->charnames_max
)
1217 ctype
->charnames_max
*= 2;
1218 ctype
->charnames
= (uint32_t *)
1219 xrealloc (ctype
->charnames
,
1220 sizeof (uint32_t) * ctype
->charnames_max
);
1222 ctype
->charnames
[ctype
->charnames_act
++] = idx
;
1223 idx_table_add (&ctype
->charnames_idx
, idx
, cnt
);
1227 /* We have done everything we are asked to do. */
1231 /* The caller does not want to extend the table. */
1232 return (cnt
>= *act
? NULL
: &(*table
)[cnt
]);
1238 size_t old_max
= *max
;
1241 while (*max
<= cnt
);
1244 (uint32_t *) xrealloc (*table
, *max
* sizeof (uint32_t));
1245 memset (&(*table
)[old_max
], '\0',
1246 (*max
- old_max
) * sizeof (uint32_t));
1252 return &(*table
)[cnt
];
1257 get_character (struct token
*now
, const struct charmap_t
*charmap
,
1258 struct repertoire_t
*repertoire
,
1259 struct charseq
**seqp
, uint32_t *wchp
)
1261 if (now
->tok
== tok_bsymbol
)
1263 /* This will hopefully be the normal case. */
1264 *wchp
= repertoire_find_value (repertoire
, now
->val
.str
.startmb
,
1265 now
->val
.str
.lenmb
);
1266 *seqp
= charmap_find_value (charmap
, now
->val
.str
.startmb
,
1267 now
->val
.str
.lenmb
);
1269 else if (now
->tok
== tok_ucs4
)
1273 snprintf (utmp
, sizeof (utmp
), "U%08X", now
->val
.ucs4
);
1274 *seqp
= charmap_find_value (charmap
, utmp
, 9);
1277 *seqp
= repertoire_find_seq (repertoire
, now
->val
.ucs4
);
1281 /* Compute the value in the charmap from the UCS value. */
1282 const char *symbol
= repertoire_find_symbol (repertoire
,
1288 *seqp
= charmap_find_value (charmap
, symbol
, strlen (symbol
));
1292 if (repertoire
!= NULL
)
1294 /* Insert a negative entry. */
1295 static const struct charseq negative
1296 = { .ucs4
= ILLEGAL_CHAR_VALUE
};
1297 uint32_t *newp
= obstack_alloc (&repertoire
->mem_pool
,
1299 *newp
= now
->val
.ucs4
;
1301 insert_entry (&repertoire
->seq_table
, newp
,
1302 sizeof (uint32_t), (void *) &negative
);
1306 (*seqp
)->ucs4
= now
->val
.ucs4
;
1308 else if ((*seqp
)->ucs4
!= now
->val
.ucs4
)
1311 *wchp
= now
->val
.ucs4
;
1313 else if (now
->tok
== tok_charcode
)
1315 /* We must map from the byte code to UCS4. */
1316 *seqp
= charmap_find_symbol (charmap
, now
->val
.str
.startmb
,
1317 now
->val
.str
.lenmb
);
1320 *wchp
= ILLEGAL_CHAR_VALUE
;
1323 if ((*seqp
)->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1324 (*seqp
)->ucs4
= repertoire_find_value (repertoire
, (*seqp
)->name
,
1325 strlen ((*seqp
)->name
));
1326 *wchp
= (*seqp
)->ucs4
;
1336 /* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and
1337 the .(2). counterparts. */
1339 charclass_symbolic_ellipsis (struct linereader
*ldfile
,
1340 struct locale_ctype_t
*ctype
,
1341 const struct charmap_t
*charmap
,
1342 struct repertoire_t
*repertoire
,
1344 const char *last_str
,
1345 unsigned long int class256_bit
,
1346 unsigned long int class_bit
, int base
,
1347 int ignore_content
, int handle_digits
, int step
)
1349 const char *nowstr
= now
->val
.str
.startmb
;
1350 char tmp
[now
->val
.str
.lenmb
+ 1];
1353 unsigned long int from
;
1354 unsigned long int to
;
1356 /* We have to compute the ellipsis values using the symbolic names. */
1357 assert (last_str
!= NULL
);
1359 if (strlen (last_str
) != now
->val
.str
.lenmb
)
1363 _("`%s' and `%.*s' are not valid names for symbolic range"),
1364 last_str
, (int) now
->val
.str
.lenmb
, nowstr
);
1368 if (memcmp (last_str
, nowstr
, now
->val
.str
.lenmb
) == 0)
1369 /* Nothing to do, the names are the same. */
1372 for (cp
= last_str
; *cp
== *(nowstr
+ (cp
- last_str
)); ++cp
)
1376 from
= strtoul (cp
, &endp
, base
);
1377 if ((from
== UINT_MAX
&& errno
== ERANGE
) || *endp
!= '\0')
1380 to
= strtoul (nowstr
+ (cp
- last_str
), &endp
, base
);
1381 if ((to
== UINT_MAX
&& errno
== ERANGE
)
1382 || (endp
- nowstr
) != now
->val
.str
.lenmb
|| from
>= to
)
1385 /* OK, we have a range FROM - TO. Now we can create the symbolic names. */
1386 if (!ignore_content
)
1388 now
->val
.str
.startmb
= tmp
;
1389 while ((from
+= step
) <= to
)
1391 struct charseq
*seq
;
1394 sprintf (tmp
, (base
== 10 ? "%.*s%0*ld" : "%.*s%0*lX"),
1395 (int) (cp
- last_str
), last_str
,
1396 (int) (now
->val
.str
.lenmb
- (cp
- last_str
)),
1399 get_character (now
, charmap
, repertoire
, &seq
, &wch
);
1401 if (seq
!= NULL
&& seq
->nbytes
== 1)
1402 /* Yep, we can store information about this byte sequence. */
1403 ctype
->class256_collection
[seq
->bytes
[0]] |= class256_bit
;
1405 if (wch
!= ILLEGAL_CHAR_VALUE
&& class_bit
!= 0)
1406 /* We have the UCS4 position. */
1407 *find_idx (ctype
, &ctype
->class_collection
,
1408 &ctype
->class_collection_max
,
1409 &ctype
->class_collection_act
, wch
) |= class_bit
;
1411 if (handle_digits
== 1)
1413 /* We must store the digit values. */
1414 if (ctype
->mbdigits_act
== ctype
->mbdigits_max
)
1416 ctype
->mbdigits_max
*= 2;
1417 ctype
->mbdigits
= xrealloc (ctype
->mbdigits
,
1418 (ctype
->mbdigits_max
1419 * sizeof (char *)));
1420 ctype
->wcdigits_max
*= 2;
1421 ctype
->wcdigits
= xrealloc (ctype
->wcdigits
,
1422 (ctype
->wcdigits_max
1423 * sizeof (uint32_t)));
1426 ctype
->mbdigits
[ctype
->mbdigits_act
++] = seq
;
1427 ctype
->wcdigits
[ctype
->wcdigits_act
++] = wch
;
1429 else if (handle_digits
== 2)
1431 /* We must store the digit values. */
1432 if (ctype
->outdigits_act
>= 10)
1434 lr_error (ldfile
, _("\
1435 %s: field `%s' does not contain exactly ten entries"),
1436 "LC_CTYPE", "outdigit");
1440 ctype
->mboutdigits
[ctype
->outdigits_act
] = seq
;
1441 ctype
->wcoutdigits
[ctype
->outdigits_act
] = wch
;
1442 ++ctype
->outdigits_act
;
1449 /* Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'. */
1451 charclass_ucs4_ellipsis (struct linereader
*ldfile
,
1452 struct locale_ctype_t
*ctype
,
1453 const struct charmap_t
*charmap
,
1454 struct repertoire_t
*repertoire
,
1455 struct token
*now
, uint32_t last_wch
,
1456 unsigned long int class256_bit
,
1457 unsigned long int class_bit
, int ignore_content
,
1458 int handle_digits
, int step
)
1460 if (last_wch
> now
->val
.ucs4
)
1462 lr_error (ldfile
, _("\
1463 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1464 (now
->val
.ucs4
| last_wch
) < 65536 ? 4 : 8, now
->val
.ucs4
,
1465 (now
->val
.ucs4
| last_wch
) < 65536 ? 4 : 8, last_wch
);
1469 if (!ignore_content
)
1470 while ((last_wch
+= step
) <= now
->val
.ucs4
)
1472 /* We have to find out whether there is a byte sequence corresponding
1473 to this UCS4 value. */
1474 struct charseq
*seq
;
1477 snprintf (utmp
, sizeof (utmp
), "U%08X", last_wch
);
1478 seq
= charmap_find_value (charmap
, utmp
, 9);
1481 snprintf (utmp
, sizeof (utmp
), "U%04X", last_wch
);
1482 seq
= charmap_find_value (charmap
, utmp
, 5);
1486 /* Try looking in the repertoire map. */
1487 seq
= repertoire_find_seq (repertoire
, last_wch
);
1489 /* If this is the first time we look for this sequence create a new
1493 static const struct charseq negative
1494 = { .ucs4
= ILLEGAL_CHAR_VALUE
};
1496 /* Find the symbolic name for this UCS4 value. */
1497 if (repertoire
!= NULL
)
1499 const char *symbol
= repertoire_find_symbol (repertoire
,
1501 uint32_t *newp
= obstack_alloc (&repertoire
->mem_pool
,
1506 /* We have a name, now search the multibyte value. */
1507 seq
= charmap_find_value (charmap
, symbol
, strlen (symbol
));
1510 /* We have to create a fake entry. */
1511 seq
= (struct charseq
*) &negative
;
1513 seq
->ucs4
= last_wch
;
1515 insert_entry (&repertoire
->seq_table
, newp
, sizeof (uint32_t),
1519 /* We have to create a fake entry. */
1520 seq
= (struct charseq
*) &negative
;
1523 /* We have a name, now search the multibyte value. */
1524 if (seq
->ucs4
== last_wch
&& seq
->nbytes
== 1)
1525 /* Yep, we can store information about this byte sequence. */
1526 ctype
->class256_collection
[(size_t) seq
->bytes
[0]]
1529 /* And of course we have the UCS4 position. */
1531 *find_idx (ctype
, &ctype
->class_collection
,
1532 &ctype
->class_collection_max
,
1533 &ctype
->class_collection_act
, last_wch
) |= class_bit
;
1535 if (handle_digits
== 1)
1537 /* We must store the digit values. */
1538 if (ctype
->mbdigits_act
== ctype
->mbdigits_max
)
1540 ctype
->mbdigits_max
*= 2;
1541 ctype
->mbdigits
= xrealloc (ctype
->mbdigits
,
1542 (ctype
->mbdigits_max
1543 * sizeof (char *)));
1544 ctype
->wcdigits_max
*= 2;
1545 ctype
->wcdigits
= xrealloc (ctype
->wcdigits
,
1546 (ctype
->wcdigits_max
1547 * sizeof (uint32_t)));
1550 ctype
->mbdigits
[ctype
->mbdigits_act
++] = (seq
->ucs4
== last_wch
1552 ctype
->wcdigits
[ctype
->wcdigits_act
++] = last_wch
;
1554 else if (handle_digits
== 2)
1556 /* We must store the digit values. */
1557 if (ctype
->outdigits_act
>= 10)
1559 lr_error (ldfile
, _("\
1560 %s: field `%s' does not contain exactly ten entries"),
1561 "LC_CTYPE", "outdigit");
1565 ctype
->mboutdigits
[ctype
->outdigits_act
] = (seq
->ucs4
== last_wch
1567 ctype
->wcoutdigits
[ctype
->outdigits_act
] = last_wch
;
1568 ++ctype
->outdigits_act
;
1574 /* Ellipsis as in `/xea/x12.../xea/x34'. */
1576 charclass_charcode_ellipsis (struct linereader
*ldfile
,
1577 struct locale_ctype_t
*ctype
,
1578 const struct charmap_t
*charmap
,
1579 struct repertoire_t
*repertoire
,
1580 struct token
*now
, char *last_charcode
,
1581 uint32_t last_charcode_len
,
1582 unsigned long int class256_bit
,
1583 unsigned long int class_bit
, int ignore_content
,
1586 /* First check whether the to-value is larger. */
1587 if (now
->val
.charcode
.nbytes
!= last_charcode_len
)
1589 lr_error (ldfile
, _("\
1590 start and end character sequence of range must have the same length"));
1594 if (memcmp (last_charcode
, now
->val
.charcode
.bytes
, last_charcode_len
) > 0)
1596 lr_error (ldfile
, _("\
1597 to-value character sequence is smaller than from-value sequence"));
1601 if (!ignore_content
)
1605 /* Increment the byte sequence value. */
1606 struct charseq
*seq
;
1610 for (i
= last_charcode_len
- 1; i
>= 0; --i
)
1611 if (++last_charcode
[i
] != 0)
1614 if (last_charcode_len
== 1)
1615 /* Of course we have the charcode value. */
1616 ctype
->class256_collection
[(size_t) last_charcode
[0]]
1619 /* Find the symbolic name. */
1620 seq
= charmap_find_symbol (charmap
, last_charcode
,
1624 if (seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1625 seq
->ucs4
= repertoire_find_value (repertoire
, seq
->name
,
1626 strlen (seq
->name
));
1627 wch
= seq
== NULL
? ILLEGAL_CHAR_VALUE
: seq
->ucs4
;
1629 if (wch
!= ILLEGAL_CHAR_VALUE
&& class_bit
!= 0)
1630 *find_idx (ctype
, &ctype
->class_collection
,
1631 &ctype
->class_collection_max
,
1632 &ctype
->class_collection_act
, wch
) |= class_bit
;
1635 wch
= ILLEGAL_CHAR_VALUE
;
1637 if (handle_digits
== 1)
1639 /* We must store the digit values. */
1640 if (ctype
->mbdigits_act
== ctype
->mbdigits_max
)
1642 ctype
->mbdigits_max
*= 2;
1643 ctype
->mbdigits
= xrealloc (ctype
->mbdigits
,
1644 (ctype
->mbdigits_max
1645 * sizeof (char *)));
1646 ctype
->wcdigits_max
*= 2;
1647 ctype
->wcdigits
= xrealloc (ctype
->wcdigits
,
1648 (ctype
->wcdigits_max
1649 * sizeof (uint32_t)));
1652 seq
= xmalloc (sizeof (struct charseq
) + last_charcode_len
);
1653 memcpy ((char *) (seq
+ 1), last_charcode
, last_charcode_len
);
1654 seq
->nbytes
= last_charcode_len
;
1656 ctype
->mbdigits
[ctype
->mbdigits_act
++] = seq
;
1657 ctype
->wcdigits
[ctype
->wcdigits_act
++] = wch
;
1659 else if (handle_digits
== 2)
1661 struct charseq
*seq
;
1662 /* We must store the digit values. */
1663 if (ctype
->outdigits_act
>= 10)
1665 lr_error (ldfile
, _("\
1666 %s: field `%s' does not contain exactly ten entries"),
1667 "LC_CTYPE", "outdigit");
1671 seq
= xmalloc (sizeof (struct charseq
) + last_charcode_len
);
1672 memcpy ((char *) (seq
+ 1), last_charcode
, last_charcode_len
);
1673 seq
->nbytes
= last_charcode_len
;
1675 ctype
->mboutdigits
[ctype
->outdigits_act
] = seq
;
1676 ctype
->wcoutdigits
[ctype
->outdigits_act
] = wch
;
1677 ++ctype
->outdigits_act
;
1680 while (memcmp (last_charcode
, now
->val
.charcode
.bytes
,
1681 last_charcode_len
) != 0);
1687 find_translit2 (struct locale_ctype_t
*ctype
, const struct charmap_t
*charmap
,
1690 struct translit_t
*trunp
= ctype
->translit
;
1691 struct translit_ignore_t
*tirunp
= ctype
->translit_ignore
;
1693 while (trunp
!= NULL
)
1695 /* XXX We simplify things here. The transliterations we look
1696 for are only allowed to have one character. */
1697 if (trunp
->from
[0] == wch
&& trunp
->from
[1] == 0)
1699 /* Found it. Now look for a transliteration which can be
1700 represented with the character set. */
1701 struct translit_to_t
*torunp
= trunp
->to
;
1703 while (torunp
!= NULL
)
1707 for (i
= 0; torunp
->str
[i
] != 0; ++i
)
1711 snprintf (utmp
, sizeof (utmp
), "U%08X", torunp
->str
[i
]);
1712 if (charmap_find_value (charmap
, utmp
, 9) == NULL
)
1713 /* This character cannot be represented. */
1717 if (torunp
->str
[i
] == 0)
1720 torunp
= torunp
->next
;
1726 trunp
= trunp
->next
;
1729 /* Check for ignored chars. */
1730 while (tirunp
!= NULL
)
1732 if (tirunp
->from
<= wch
&& tirunp
->to
>= wch
)
1736 for (wi
= tirunp
->from
; wi
<= wch
; wi
+= tirunp
->step
)
1742 /* Nothing found. */
1748 find_translit (struct localedef_t
*locale
, const struct charmap_t
*charmap
,
1751 struct locale_ctype_t
*ctype
;
1752 uint32_t *result
= NULL
;
1754 assert (locale
!= NULL
);
1755 ctype
= locale
->categories
[LC_CTYPE
].ctype
;
1760 if (ctype
->translit
!= NULL
)
1761 result
= find_translit2 (ctype
, charmap
, wch
);
1765 struct translit_include_t
*irunp
= ctype
->translit_include
;
1767 while (irunp
!= NULL
&& result
== NULL
)
1769 result
= find_translit (find_locale (CTYPE_LOCALE
,
1771 irunp
->copy_repertoire
,
1774 irunp
= irunp
->next
;
1782 /* Read one transliteration entry. */
1784 read_widestring (struct linereader
*ldfile
, struct token
*now
,
1785 const struct charmap_t
*charmap
,
1786 struct repertoire_t
*repertoire
)
1790 if (now
->tok
== tok_default_missing
)
1791 /* The special name "" will denote this case. */
1793 else if (now
->tok
== tok_bsymbol
)
1795 /* Get the value from the repertoire. */
1796 wstr
= (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1797 wstr
[0] = repertoire_find_value (repertoire
, now
->val
.str
.startmb
,
1798 now
->val
.str
.lenmb
);
1799 if (wstr
[0] == ILLEGAL_CHAR_VALUE
)
1801 /* We cannot proceed, we don't know the UCS4 value. */
1808 else if (now
->tok
== tok_ucs4
)
1810 wstr
= (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1811 wstr
[0] = now
->val
.ucs4
;
1814 else if (now
->tok
== tok_charcode
)
1816 /* Argh, we have to convert to the symbol name first and then to the
1818 struct charseq
*seq
= charmap_find_symbol (charmap
,
1819 now
->val
.str
.startmb
,
1820 now
->val
.str
.lenmb
);
1822 /* Cannot find the UCS4 value. */
1825 if (seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1826 seq
->ucs4
= repertoire_find_value (repertoire
, seq
->name
,
1827 strlen (seq
->name
));
1828 if (seq
->ucs4
== ILLEGAL_CHAR_VALUE
)
1829 /* We cannot proceed, we don't know the UCS4 value. */
1832 wstr
= (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1833 wstr
[0] = seq
->ucs4
;
1836 else if (now
->tok
== tok_string
)
1838 wstr
= now
->val
.str
.startwc
;
1839 if (wstr
== NULL
|| wstr
[0] == 0)
1844 if (now
->tok
!= tok_eol
&& now
->tok
!= tok_eof
)
1845 lr_ignore_rest (ldfile
, 0);
1846 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
1847 return (uint32_t *) -1l;
1855 read_translit_entry (struct linereader
*ldfile
, struct locale_ctype_t
*ctype
,
1856 struct token
*now
, const struct charmap_t
*charmap
,
1857 struct repertoire_t
*repertoire
)
1859 uint32_t *from_wstr
= read_widestring (ldfile
, now
, charmap
, repertoire
);
1860 struct translit_t
*result
;
1861 struct translit_to_t
**top
;
1862 struct obstack
*ob
= &ctype
->mempool
;
1866 if (from_wstr
== NULL
)
1867 /* There is no valid from string. */
1870 result
= (struct translit_t
*) obstack_alloc (ob
,
1871 sizeof (struct translit_t
));
1872 result
->from
= from_wstr
;
1873 result
->fname
= ldfile
->fname
;
1874 result
->lineno
= ldfile
->lineno
;
1875 result
->next
= NULL
;
1885 /* Next we have one or more transliterations. They are
1886 separated by semicolons. */
1887 now
= lr_token (ldfile
, charmap
, NULL
, repertoire
, verbose
);
1889 if (!first
&& (now
->tok
== tok_semicolon
|| now
->tok
== tok_eol
))
1891 /* One string read. */
1892 const uint32_t zero
= 0;
1896 obstack_grow (ob
, &zero
, 4);
1897 to_wstr
= obstack_finish (ob
);
1899 *top
= obstack_alloc (ob
, sizeof (struct translit_to_t
));
1900 (*top
)->str
= to_wstr
;
1901 (*top
)->next
= NULL
;
1904 if (now
->tok
== tok_eol
)
1906 result
->next
= ctype
->translit
;
1907 ctype
->translit
= result
;
1912 top
= &(*top
)->next
;
1917 to_wstr
= read_widestring (ldfile
, now
, charmap
, repertoire
);
1918 if (to_wstr
== (uint32_t *) -1l)
1920 /* An error occurred. */
1921 obstack_free (ob
, result
);
1925 if (to_wstr
== NULL
)
1928 /* This value is usable. */
1929 obstack_grow (ob
, to_wstr
, wcslen ((wchar_t *) to_wstr
) * 4);
1938 read_translit_ignore_entry (struct linereader
*ldfile
,
1939 struct locale_ctype_t
*ctype
,
1940 const struct charmap_t
*charmap
,
1941 struct repertoire_t
*repertoire
)
1943 /* We expect a semicolon-separated list of characters we ignore. We are
1944 only interested in the wide character definitions. These must be
1945 single characters, possibly defining a range when an ellipsis is used. */
1948 struct token
*now
= lr_token (ldfile
, charmap
, NULL
, repertoire
,
1950 struct translit_ignore_t
*newp
;
1953 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
1956 _("premature end of `translit_ignore' definition"));
1960 if (now
->tok
!= tok_bsymbol
&& now
->tok
!= tok_ucs4
)
1962 lr_error (ldfile
, _("syntax error"));
1963 lr_ignore_rest (ldfile
, 0);
1967 if (now
->tok
== tok_ucs4
)
1968 from
= now
->val
.ucs4
;
1970 /* Try to get the value. */
1971 from
= repertoire_find_value (repertoire
, now
->val
.str
.startmb
,
1972 now
->val
.str
.lenmb
);
1974 if (from
== ILLEGAL_CHAR_VALUE
)
1976 lr_error (ldfile
, "invalid character name");
1981 newp
= (struct translit_ignore_t
*)
1982 obstack_alloc (&ctype
->mempool
, sizeof (struct translit_ignore_t
));
1987 newp
->next
= ctype
->translit_ignore
;
1988 ctype
->translit_ignore
= newp
;
1991 /* Now we expect either a semicolon, an ellipsis, or the end of the
1993 now
= lr_token (ldfile
, charmap
, NULL
, repertoire
, verbose
);
1995 if (now
->tok
== tok_ellipsis2
|| now
->tok
== tok_ellipsis2_2
)
1997 /* XXX Should we bother implementing `....'? `...' certainly
1998 will not be implemented. */
2000 int step
= now
->tok
== tok_ellipsis2_2
? 2 : 1;
2002 now
= lr_token (ldfile
, charmap
, NULL
, repertoire
, verbose
);
2004 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
2007 _("premature end of `translit_ignore' definition"));
2011 if (now
->tok
!= tok_bsymbol
&& now
->tok
!= tok_ucs4
)
2013 lr_error (ldfile
, _("syntax error"));
2014 lr_ignore_rest (ldfile
, 0);
2018 if (now
->tok
== tok_ucs4
)
2021 /* Try to get the value. */
2022 to
= repertoire_find_value (repertoire
, now
->val
.str
.startmb
,
2023 now
->val
.str
.lenmb
);
2025 if (to
== ILLEGAL_CHAR_VALUE
)
2026 lr_error (ldfile
, "invalid character name");
2029 /* Make sure the `to'-value is larger. */
2036 lr_error (ldfile
, _("\
2037 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
2038 (to
| from
) < 65536 ? 4 : 8, to
,
2039 (to
| from
) < 65536 ? 4 : 8, from
);
2042 /* And the next token. */
2043 now
= lr_token (ldfile
, charmap
, NULL
, repertoire
, verbose
);
2046 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
2050 if (now
->tok
== tok_semicolon
)
2054 /* If we come here something is wrong. */
2055 lr_error (ldfile
, _("syntax error"));
2056 lr_ignore_rest (ldfile
, 0);
2062 /* The parser for the LC_CTYPE section of the locale definition. */
2064 ctype_read (struct linereader
*ldfile
, struct localedef_t
*result
,
2065 const struct charmap_t
*charmap
, const char *repertoire_name
,
2068 struct repertoire_t
*repertoire
= NULL
;
2069 struct locale_ctype_t
*ctype
;
2071 enum token_t nowtok
;
2073 uint32_t last_wch
= 0;
2074 enum token_t last_token
;
2075 enum token_t ellipsis_token
;
2077 char last_charcode
[16];
2078 size_t last_charcode_len
= 0;
2079 const char *last_str
= NULL
;
2081 struct localedef_t
*copy_locale
= NULL
;
2083 /* Get the repertoire we have to use. */
2084 if (repertoire_name
!= NULL
)
2085 repertoire
= repertoire_read (repertoire_name
);
2087 /* The rest of the line containing `LC_CTYPE' must be free. */
2088 lr_ignore_rest (ldfile
, 1);
2093 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2096 while (nowtok
== tok_eol
);
2098 /* If we see `copy' now we are almost done. */
2099 if (nowtok
== tok_copy
)
2101 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2102 if (now
->tok
!= tok_string
)
2104 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2108 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2109 while (now
->tok
!= tok_eof
&& now
->tok
!= tok_end
);
2111 if (now
->tok
!= tok_eof
2112 || (now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
),
2113 now
->tok
== tok_eof
))
2114 lr_error (ldfile
, _("%s: premature end of file"), "LC_CTYPE");
2115 else if (now
->tok
!= tok_lc_ctype
)
2117 lr_error (ldfile
, _("\
2118 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2119 lr_ignore_rest (ldfile
, 0);
2122 lr_ignore_rest (ldfile
, 1);
2127 if (! ignore_content
)
2129 /* Get the locale definition. */
2130 copy_locale
= load_locale (LC_CTYPE
, now
->val
.str
.startmb
,
2131 repertoire_name
, charmap
, NULL
);
2132 if ((copy_locale
->avail
& CTYPE_LOCALE
) == 0)
2134 /* Not yet loaded. So do it now. */
2135 if (locfile_read (copy_locale
, charmap
) != 0)
2139 if (copy_locale
->categories
[LC_CTYPE
].ctype
== NULL
)
2143 lr_ignore_rest (ldfile
, 1);
2145 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2149 /* Prepare the data structures. */
2150 ctype_startup (ldfile
, result
, charmap
, copy_locale
, ignore_content
);
2151 ctype
= result
->categories
[LC_CTYPE
].ctype
;
2153 /* Remember the repertoire we use. */
2154 if (!ignore_content
)
2155 ctype
->repertoire
= repertoire
;
2159 unsigned long int class_bit
= 0;
2160 unsigned long int class256_bit
= 0;
2161 int handle_digits
= 0;
2163 /* Of course we don't proceed beyond the end of file. */
2164 if (nowtok
== tok_eof
)
2167 /* Ingore empty lines. */
2168 if (nowtok
== tok_eol
)
2170 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2178 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2179 while (now
->tok
== tok_ident
|| now
->tok
== tok_string
)
2181 ctype_class_new (ldfile
, ctype
, now
->val
.str
.startmb
);
2182 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2183 if (now
->tok
!= tok_semicolon
)
2185 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2187 if (now
->tok
!= tok_eol
)
2189 %s: syntax error in definition of new character class"), "LC_CTYPE");
2193 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2194 while (now
->tok
== tok_ident
|| now
->tok
== tok_string
)
2196 ctype_map_new (ldfile
, ctype
, now
->val
.str
.startmb
, charmap
);
2197 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2198 if (now
->tok
!= tok_semicolon
)
2200 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2202 if (now
->tok
!= tok_eol
)
2204 %s: syntax error in definition of new character map"), "LC_CTYPE");
2208 /* Ignore the rest of the line if we don't need the input of
2212 lr_ignore_rest (ldfile
, 0);
2216 /* We simply forget the `class' keyword and use the following
2217 operand to determine the bit. */
2218 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2219 if (now
->tok
== tok_ident
|| now
->tok
== tok_string
)
2221 /* Must can be one of the predefined class names. */
2222 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
)
2223 if (strcmp (ctype
->classnames
[cnt
], now
->val
.str
.startmb
) == 0)
2225 if (cnt
>= ctype
->nr_charclass
)
2227 /* OK, it's a new class. */
2228 ctype_class_new (ldfile
, ctype
, now
->val
.str
.startmb
);
2230 class_bit
= _ISwbit (ctype
->nr_charclass
- 1);
2234 class_bit
= _ISwbit (cnt
);
2236 free (now
->val
.str
.startmb
);
2239 else if (now
->tok
== tok_digit
)
2240 goto handle_tok_digit
;
2241 else if (now
->tok
< tok_upper
|| now
->tok
> tok_blank
)
2245 class_bit
= BITw (now
->tok
);
2246 class256_bit
= BIT (now
->tok
);
2249 /* The next character must be a semicolon. */
2250 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2251 if (now
->tok
!= tok_semicolon
)
2253 goto read_charclass
;
2266 /* Ignore the rest of the line if we don't need the input of
2270 lr_ignore_rest (ldfile
, 0);
2274 class_bit
= BITw (now
->tok
);
2275 class256_bit
= BIT (now
->tok
);
2278 ctype
->class_done
|= class_bit
;
2279 last_token
= tok_none
;
2280 ellipsis_token
= tok_none
;
2282 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2283 while (now
->tok
!= tok_eol
&& now
->tok
!= tok_eof
)
2286 struct charseq
*seq
;
2288 if (ellipsis_token
== tok_none
)
2290 if (get_character (now
, charmap
, repertoire
, &seq
, &wch
))
2293 if (!ignore_content
&& seq
!= NULL
&& seq
->nbytes
== 1)
2294 /* Yep, we can store information about this byte
2296 ctype
->class256_collection
[seq
->bytes
[0]] |= class256_bit
;
2298 if (!ignore_content
&& wch
!= ILLEGAL_CHAR_VALUE
2300 /* We have the UCS4 position. */
2301 *find_idx (ctype
, &ctype
->class_collection
,
2302 &ctype
->class_collection_max
,
2303 &ctype
->class_collection_act
, wch
) |= class_bit
;
2305 last_token
= now
->tok
;
2306 /* Terminate the string. */
2307 if (last_token
== tok_bsymbol
)
2309 now
->val
.str
.startmb
[now
->val
.str
.lenmb
] = '\0';
2310 last_str
= now
->val
.str
.startmb
;
2315 memcpy (last_charcode
, now
->val
.charcode
.bytes
, 16);
2316 last_charcode_len
= now
->val
.charcode
.nbytes
;
2318 if (!ignore_content
&& handle_digits
== 1)
2320 /* We must store the digit values. */
2321 if (ctype
->mbdigits_act
== ctype
->mbdigits_max
)
2323 ctype
->mbdigits_max
+= 10;
2324 ctype
->mbdigits
= xrealloc (ctype
->mbdigits
,
2325 (ctype
->mbdigits_max
2326 * sizeof (char *)));
2327 ctype
->wcdigits_max
+= 10;
2328 ctype
->wcdigits
= xrealloc (ctype
->wcdigits
,
2329 (ctype
->wcdigits_max
2330 * sizeof (uint32_t)));
2333 ctype
->mbdigits
[ctype
->mbdigits_act
++] = seq
;
2334 ctype
->wcdigits
[ctype
->wcdigits_act
++] = wch
;
2336 else if (!ignore_content
&& handle_digits
== 2)
2338 /* We must store the digit values. */
2339 if (ctype
->outdigits_act
>= 10)
2341 lr_error (ldfile
, _("\
2342 %s: field `%s' does not contain exactly ten entries"),
2343 "LC_CTYPE", "outdigit");
2344 lr_ignore_rest (ldfile
, 0);
2348 ctype
->mboutdigits
[ctype
->outdigits_act
] = seq
;
2349 ctype
->wcoutdigits
[ctype
->outdigits_act
] = wch
;
2350 ++ctype
->outdigits_act
;
2355 /* Now it gets complicated. We have to resolve the
2356 ellipsis problem. First we must distinguish between
2357 the different kind of ellipsis and this must match the
2358 tokens we have seen. */
2359 assert (last_token
!= tok_none
);
2361 if (last_token
!= now
->tok
)
2363 lr_error (ldfile
, _("\
2364 ellipsis range must be marked by two operands of same type"));
2365 lr_ignore_rest (ldfile
, 0);
2369 if (last_token
== tok_bsymbol
)
2371 if (ellipsis_token
== tok_ellipsis3
)
2372 lr_error (ldfile
, _("with symbolic name range values \
2373 the absolute ellipsis `...' must not be used"));
2375 charclass_symbolic_ellipsis (ldfile
, ctype
, charmap
,
2376 repertoire
, now
, last_str
,
2377 class256_bit
, class_bit
,
2382 handle_digits
, step
);
2384 else if (last_token
== tok_ucs4
)
2386 if (ellipsis_token
!= tok_ellipsis2
)
2387 lr_error (ldfile
, _("\
2388 with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
2390 charclass_ucs4_ellipsis (ldfile
, ctype
, charmap
,
2391 repertoire
, now
, last_wch
,
2392 class256_bit
, class_bit
,
2393 ignore_content
, handle_digits
,
2398 assert (last_token
== tok_charcode
);
2400 if (ellipsis_token
!= tok_ellipsis3
)
2401 lr_error (ldfile
, _("\
2402 with character code range values one must use the absolute ellipsis `...'"));
2404 charclass_charcode_ellipsis (ldfile
, ctype
, charmap
,
2408 class256_bit
, class_bit
,
2413 /* Now we have used the last value. */
2414 last_token
= tok_none
;
2417 /* Next we expect a semicolon or the end of the line. */
2418 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2419 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
2422 if (last_token
!= tok_none
2423 && now
->tok
>= tok_ellipsis2
&& now
->tok
<= tok_ellipsis4_2
)
2425 if (now
->tok
== tok_ellipsis2_2
)
2427 now
->tok
= tok_ellipsis2
;
2430 else if (now
->tok
== tok_ellipsis4_2
)
2432 now
->tok
= tok_ellipsis4
;
2436 ellipsis_token
= now
->tok
;
2438 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2442 if (now
->tok
!= tok_semicolon
)
2445 /* And get the next character. */
2446 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2448 ellipsis_token
= tok_none
;
2454 /* Ignore the rest of the line if we don't need the input of
2458 lr_ignore_rest (ldfile
, 0);
2463 class_bit
= _ISwdigit
;
2464 class256_bit
= _ISdigit
;
2466 goto read_charclass
;
2469 /* Ignore the rest of the line if we don't need the input of
2473 lr_ignore_rest (ldfile
, 0);
2477 if (ctype
->outdigits_act
!= 0)
2478 lr_error (ldfile
, _("\
2479 %s: field `%s' declared more than once"),
2480 "LC_CTYPE", "outdigit");
2484 goto read_charclass
;
2487 /* Ignore the rest of the line if we don't need the input of
2491 lr_ignore_rest (ldfile
, 0);
2499 /* Ignore the rest of the line if we don't need the input of
2503 lr_ignore_rest (ldfile
, 0);
2511 /* Ignore the rest of the line if we don't need the input of
2515 lr_ignore_rest (ldfile
, 0);
2519 /* We simply forget the `map' keyword and use the following
2520 operand to determine the mapping. */
2521 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2522 if (now
->tok
== tok_ident
|| now
->tok
== tok_string
)
2526 for (cnt
= 2; cnt
< ctype
->map_collection_nr
; ++cnt
)
2527 if (strcmp (now
->val
.str
.startmb
, ctype
->mapnames
[cnt
]) == 0)
2530 if (cnt
< ctype
->map_collection_nr
)
2531 free (now
->val
.str
.startmb
);
2533 /* OK, it's a new map. */
2534 ctype_map_new (ldfile
, ctype
, now
->val
.str
.startmb
, charmap
);
2538 else if (now
->tok
< tok_toupper
|| now
->tok
> tok_tolower
)
2541 mapidx
= now
->tok
- tok_toupper
;
2543 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2544 /* This better should be a semicolon. */
2545 if (now
->tok
!= tok_semicolon
)
2549 /* Test whether this mapping was already defined. */
2550 if (ctype
->tomap_done
[mapidx
])
2552 lr_error (ldfile
, _("duplicated definition for mapping `%s'"),
2553 ctype
->mapnames
[mapidx
]);
2554 lr_ignore_rest (ldfile
, 0);
2557 ctype
->tomap_done
[mapidx
] = 1;
2559 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2560 while (now
->tok
!= tok_eol
&& now
->tok
!= tok_eof
)
2562 struct charseq
*from_seq
;
2564 struct charseq
*to_seq
;
2567 /* Every pair starts with an opening brace. */
2568 if (now
->tok
!= tok_open_brace
)
2571 /* Next comes the from-value. */
2572 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2573 if (get_character (now
, charmap
, repertoire
, &from_seq
,
2577 /* The next is a comma. */
2578 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2579 if (now
->tok
!= tok_comma
)
2582 /* And the other value. */
2583 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2584 if (get_character (now
, charmap
, repertoire
, &to_seq
,
2588 /* And the last thing is the closing brace. */
2589 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2590 if (now
->tok
!= tok_close_brace
)
2593 if (!ignore_content
)
2595 /* Check whether the mapping converts from an ASCII value
2596 to a non-ASCII value. */
2597 if (from_seq
!= NULL
&& from_seq
->nbytes
== 1
2598 && isascii (from_seq
->bytes
[0])
2599 && to_seq
!= NULL
&& (to_seq
->nbytes
!= 1
2600 || !isascii (to_seq
->bytes
[0])))
2601 ctype
->to_nonascii
= 1;
2603 if (mapidx
< 2 && from_seq
!= NULL
&& to_seq
!= NULL
2604 && from_seq
->nbytes
== 1 && to_seq
->nbytes
== 1)
2605 /* We can use this value. */
2606 ctype
->map256_collection
[mapidx
][from_seq
->bytes
[0]]
2609 if (from_wch
!= ILLEGAL_CHAR_VALUE
2610 && to_wch
!= ILLEGAL_CHAR_VALUE
)
2611 /* Both correct values. */
2612 *find_idx (ctype
, &ctype
->map_collection
[mapidx
],
2613 &ctype
->map_collection_max
[mapidx
],
2614 &ctype
->map_collection_act
[mapidx
],
2618 /* Now comes a semicolon or the end of the line/file. */
2619 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2620 if (now
->tok
== tok_semicolon
)
2621 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2625 case tok_translit_start
:
2626 /* Ignore the entire translit section with its peculiar syntax
2627 if we don't need the input. */
2632 lr_ignore_rest (ldfile
, 0);
2633 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2635 while (now
->tok
!= tok_translit_end
&& now
->tok
!= tok_eof
);
2637 if (now
->tok
== tok_eof
)
2638 lr_error (ldfile
, _(\
2639 "%s: `translit_start' section does not end with `translit_end'"),
2645 /* The rest of the line better should be empty. */
2646 lr_ignore_rest (ldfile
, 1);
2648 /* We count here the number of allocated entries in the `translit'
2652 ldfile
->translate_strings
= 1;
2653 ldfile
->return_widestr
= 1;
2655 /* We proceed until we see the `translit_end' token. */
2656 while (now
= lr_token (ldfile
, charmap
, NULL
, repertoire
, verbose
),
2657 now
->tok
!= tok_translit_end
&& now
->tok
!= tok_eof
)
2659 if (now
->tok
== tok_eol
)
2660 /* Ignore empty lines. */
2663 if (now
->tok
== tok_include
)
2665 /* We have to include locale. */
2666 const char *locale_name
;
2667 const char *repertoire_name
;
2668 struct translit_include_t
*include_stmt
, **include_ptr
;
2670 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2671 /* This should be a string or an identifier. In any
2672 case something to name a locale. */
2673 if (now
->tok
!= tok_string
&& now
->tok
!= tok_ident
)
2676 lr_error (ldfile
, _("%s: syntax error"), "LC_CTYPE");
2677 lr_ignore_rest (ldfile
, 0);
2680 locale_name
= now
->val
.str
.startmb
;
2682 /* Next should be a semicolon. */
2683 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2684 if (now
->tok
!= tok_semicolon
)
2685 goto translit_syntax
;
2687 /* Now the repertoire name. */
2688 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2689 if ((now
->tok
!= tok_string
&& now
->tok
!= tok_ident
)
2690 || now
->val
.str
.startmb
== NULL
)
2691 goto translit_syntax
;
2692 repertoire_name
= now
->val
.str
.startmb
;
2693 if (repertoire_name
[0] == '\0')
2694 /* Ignore the empty string. */
2695 repertoire_name
= NULL
;
2697 /* Save the include statement for later processing. */
2698 include_stmt
= (struct translit_include_t
*)
2699 xmalloc (sizeof (struct translit_include_t
));
2700 include_stmt
->copy_locale
= locale_name
;
2701 include_stmt
->copy_repertoire
= repertoire_name
;
2702 include_stmt
->next
= NULL
;
2704 include_ptr
= &ctype
->translit_include
;
2705 while (*include_ptr
!= NULL
)
2706 include_ptr
= &(*include_ptr
)->next
;
2707 *include_ptr
= include_stmt
;
2709 /* The rest of the line must be empty. */
2710 lr_ignore_rest (ldfile
, 1);
2712 /* Make sure the locale is read. */
2713 add_to_readlist (LC_CTYPE
, locale_name
, repertoire_name
,
2717 else if (now
->tok
== tok_default_missing
)
2723 /* We expect a single character or string as the
2725 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2726 wstr
= read_widestring (ldfile
, now
, charmap
,
2731 if (ctype
->default_missing
!= NULL
)
2733 lr_error (ldfile
, _("\
2734 %s: duplicate `default_missing' definition"), "LC_CTYPE");
2735 record_error_at_line (0, 0,
2736 ctype
->default_missing_file
,
2737 ctype
->default_missing_lineno
,
2739 previous definition was here"));
2743 ctype
->default_missing
= wstr
;
2744 ctype
->default_missing_file
= ldfile
->fname
;
2745 ctype
->default_missing_lineno
= ldfile
->lineno
;
2747 /* We can have more entries, ignore them. */
2748 lr_ignore_rest (ldfile
, 0);
2751 else if (wstr
== (uint32_t *) -1l)
2752 /* This was an syntax error. */
2755 /* Maybe there is another replacement we can use. */
2756 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2757 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
2759 /* Nothing found. We tell the user. */
2760 lr_error (ldfile
, _("\
2761 %s: no representable `default_missing' definition found"), "LC_CTYPE");
2764 if (now
->tok
!= tok_semicolon
)
2765 goto translit_syntax
;
2770 else if (now
->tok
== tok_translit_ignore
)
2772 read_translit_ignore_entry (ldfile
, ctype
, charmap
,
2777 read_translit_entry (ldfile
, ctype
, now
, charmap
, repertoire
);
2779 ldfile
->return_widestr
= 0;
2781 if (now
->tok
== tok_eof
)
2782 lr_error (ldfile
, _(\
2783 "%s: `translit_start' section does not end with `translit_end'"),
2789 /* Ignore the rest of the line if we don't need the input of
2793 lr_ignore_rest (ldfile
, 0);
2797 /* This could mean one of several things. First test whether
2798 it's a character class name. */
2799 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
)
2800 if (strcmp (now
->val
.str
.startmb
, ctype
->classnames
[cnt
]) == 0)
2802 if (cnt
< ctype
->nr_charclass
)
2804 class_bit
= _ISwbit (cnt
);
2805 class256_bit
= cnt
<= 11 ? _ISbit (cnt
) : 0;
2806 free (now
->val
.str
.startmb
);
2807 goto read_charclass
;
2809 for (cnt
= 0; cnt
< ctype
->map_collection_nr
; ++cnt
)
2810 if (strcmp (now
->val
.str
.startmb
, ctype
->mapnames
[cnt
]) == 0)
2812 if (cnt
< ctype
->map_collection_nr
)
2815 free (now
->val
.str
.startmb
);
2821 /* Next we assume `LC_CTYPE'. */
2822 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2823 if (now
->tok
== tok_eof
)
2825 if (now
->tok
== tok_eol
)
2826 lr_error (ldfile
, _("%s: incomplete `END' line"),
2828 else if (now
->tok
!= tok_lc_ctype
)
2829 lr_error (ldfile
, _("\
2830 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2831 lr_ignore_rest (ldfile
, now
->tok
== tok_lc_ctype
);
2836 if (now
->tok
!= tok_eof
)
2837 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2840 /* Prepare for the next round. */
2841 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2845 /* When we come here we reached the end of the file. */
2846 lr_error (ldfile
, _("%s: premature end of file"), "LC_CTYPE");
2850 /* Subroutine of set_class_defaults, below. */
2852 set_one_default (struct locale_ctype_t
*ctype
,
2853 const struct charmap_t
*charmap
,
2854 int bitpos
, int from
, int to
)
2858 int bit
= _ISbit (bitpos
);
2859 int bitw
= _ISwbit (bitpos
);
2860 /* Define string. */
2863 for (ch
= from
; ch
<= to
; ++ch
)
2865 struct charseq
*seq
;
2868 seq
= charmap_find_value (charmap
, tmp
, 1);
2872 sprintf (buf
, "U%08X", ch
);
2873 seq
= charmap_find_value (charmap
, buf
, 9);
2877 record_error (0, 0, _("\
2878 %s: character `%s' not defined while needed as default value"),
2881 else if (seq
->nbytes
!= 1)
2882 record_error (0, 0, _("\
2883 %s: character `%s' in charmap not representable with one byte"),
2886 ctype
->class256_collection
[seq
->bytes
[0]] |= bit
;
2888 /* No need to search here, the ASCII value is also the Unicode
2890 ELEM (ctype
, class_collection
, , ch
) |= bitw
;
2895 set_class_defaults (struct locale_ctype_t
*ctype
,
2896 const struct charmap_t
*charmap
,
2897 struct repertoire_t
*repertoire
)
2899 #define set_default(bitpos, from, to) \
2900 set_one_default (ctype, charmap, bitpos, from, to)
2902 /* These function defines the default values for the classes and conversions
2903 according to POSIX.2 2.5.2.1.
2904 It may seem that the order of these if-blocks is arbitrary but it is NOT.
2905 Don't move them unless you know what you do! */
2907 /* Set default values if keyword was not present. */
2908 if ((ctype
->class_done
& BITw (tok_upper
)) == 0)
2909 /* "If this keyword [lower] is not specified, the lowercase letters
2910 `A' through `Z', ..., shall automatically belong to this class,
2911 with implementation defined character values." [P1003.2, 2.5.2.1] */
2912 set_default (BITPOS (tok_upper
), 'A', 'Z');
2914 if ((ctype
->class_done
& BITw (tok_lower
)) == 0)
2915 /* "If this keyword [lower] is not specified, the lowercase letters
2916 `a' through `z', ..., shall automatically belong to this class,
2917 with implementation defined character values." [P1003.2, 2.5.2.1] */
2918 set_default (BITPOS (tok_lower
), 'a', 'z');
2920 if ((ctype
->class_done
& BITw (tok_alpha
)) == 0)
2922 /* Table 2-6 in P1003.2 says that characters in class `upper' or
2923 class `lower' *must* be in class `alpha'. */
2924 unsigned long int mask
= BIT (tok_upper
) | BIT (tok_lower
);
2925 unsigned long int maskw
= BITw (tok_upper
) | BITw (tok_lower
);
2927 for (size_t cnt
= 0; cnt
< 256; ++cnt
)
2928 if ((ctype
->class256_collection
[cnt
] & mask
) != 0)
2929 ctype
->class256_collection
[cnt
] |= BIT (tok_alpha
);
2931 for (size_t cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
2932 if ((ctype
->class_collection
[cnt
] & maskw
) != 0)
2933 ctype
->class_collection
[cnt
] |= BITw (tok_alpha
);
2936 if ((ctype
->class_done
& BITw (tok_digit
)) == 0)
2937 /* "If this keyword [digit] is not specified, the digits `0' through
2938 `9', ..., shall automatically belong to this class, with
2939 implementation-defined character values." [P1003.2, 2.5.2.1] */
2940 set_default (BITPOS (tok_digit
), '0', '9');
2942 /* "Only characters specified for the `alpha' and `digit' keyword
2943 shall be specified. Characters specified for the keyword `alpha'
2944 and `digit' are automatically included in this class. */
2946 unsigned long int mask
= BIT (tok_alpha
) | BIT (tok_digit
);
2947 unsigned long int maskw
= BITw (tok_alpha
) | BITw (tok_digit
);
2949 for (size_t cnt
= 0; cnt
< 256; ++cnt
)
2950 if ((ctype
->class256_collection
[cnt
] & mask
) != 0)
2951 ctype
->class256_collection
[cnt
] |= BIT (tok_alnum
);
2953 for (size_t cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
2954 if ((ctype
->class_collection
[cnt
] & maskw
) != 0)
2955 ctype
->class_collection
[cnt
] |= BITw (tok_alnum
);
2958 if ((ctype
->class_done
& BITw (tok_space
)) == 0)
2959 /* "If this keyword [space] is not specified, the characters <space>,
2960 <form-feed>, <newline>, <carriage-return>, <tab>, and
2961 <vertical-tab>, ..., shall automatically belong to this class,
2962 with implementation-defined character values." [P1003.2, 2.5.2.1] */
2964 struct charseq
*seq
;
2966 seq
= charmap_find_value (charmap
, "space", 5);
2968 seq
= charmap_find_value (charmap
, "SP", 2);
2970 seq
= charmap_find_value (charmap
, "U00000020", 9);
2973 record_error (0, 0, _("\
2974 %s: character `%s' not defined while needed as default value"),
2975 "LC_CTYPE", "<space>");
2977 else if (seq
->nbytes
!= 1)
2978 record_error (0, 0, _("\
2979 %s: character `%s' in charmap not representable with one byte"),
2980 "LC_CTYPE", "<space>");
2982 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
2984 /* No need to search. */
2985 ELEM (ctype
, class_collection
, , L
' ') |= BITw (tok_space
);
2987 seq
= charmap_find_value (charmap
, "form-feed", 9);
2989 seq
= charmap_find_value (charmap
, "U0000000C", 9);
2992 record_error (0, 0, _("\
2993 %s: character `%s' not defined while needed as default value"),
2994 "LC_CTYPE", "<form-feed>");
2996 else if (seq
->nbytes
!= 1)
2997 record_error (0, 0, _("\
2998 %s: character `%s' in charmap not representable with one byte"),
2999 "LC_CTYPE", "<form-feed>");
3001 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3003 /* No need to search. */
3004 ELEM (ctype
, class_collection
, , L
'\f') |= BITw (tok_space
);
3007 seq
= charmap_find_value (charmap
, "newline", 7);
3009 seq
= charmap_find_value (charmap
, "U0000000A", 9);
3012 record_error (0, 0, _("\
3013 %s: character `%s' not defined while needed as default value"),
3014 "LC_CTYPE", "<newline>");
3016 else if (seq
->nbytes
!= 1)
3017 record_error (0, 0, _("\
3018 %s: character `%s' in charmap not representable with one byte"),
3019 "LC_CTYPE", "<newline>");
3021 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3023 /* No need to search. */
3024 ELEM (ctype
, class_collection
, , L
'\n') |= BITw (tok_space
);
3027 seq
= charmap_find_value (charmap
, "carriage-return", 15);
3029 seq
= charmap_find_value (charmap
, "U0000000D", 9);
3032 record_error (0, 0, _("\
3033 %s: character `%s' not defined while needed as default value"),
3034 "LC_CTYPE", "<carriage-return>");
3036 else if (seq
->nbytes
!= 1)
3037 record_error (0, 0, _("\
3038 %s: character `%s' in charmap not representable with one byte"),
3039 "LC_CTYPE", "<carriage-return>");
3041 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3043 /* No need to search. */
3044 ELEM (ctype
, class_collection
, , L
'\r') |= BITw (tok_space
);
3047 seq
= charmap_find_value (charmap
, "tab", 3);
3049 seq
= charmap_find_value (charmap
, "U00000009", 9);
3052 record_error (0, 0, _("\
3053 %s: character `%s' not defined while needed as default value"),
3054 "LC_CTYPE", "<tab>");
3056 else if (seq
->nbytes
!= 1)
3057 record_error (0, 0, _("\
3058 %s: character `%s' in charmap not representable with one byte"),
3059 "LC_CTYPE", "<tab>");
3061 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3063 /* No need to search. */
3064 ELEM (ctype
, class_collection
, , L
'\t') |= BITw (tok_space
);
3067 seq
= charmap_find_value (charmap
, "vertical-tab", 12);
3069 seq
= charmap_find_value (charmap
, "U0000000B", 9);
3072 record_error (0, 0, _("\
3073 %s: character `%s' not defined while needed as default value"),
3074 "LC_CTYPE", "<vertical-tab>");
3076 else if (seq
->nbytes
!= 1)
3077 record_error (0, 0, _("\
3078 %s: character `%s' in charmap not representable with one byte"),
3079 "LC_CTYPE", "<vertical-tab>");
3081 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3083 /* No need to search. */
3084 ELEM (ctype
, class_collection
, , L
'\v') |= BITw (tok_space
);
3087 if ((ctype
->class_done
& BITw (tok_xdigit
)) == 0)
3088 /* "If this keyword is not specified, the digits `0' to `9', the
3089 uppercase letters `A' through `F', and the lowercase letters `a'
3090 through `f', ..., shell automatically belong to this class, with
3091 implementation defined character values." [P1003.2, 2.5.2.1] */
3093 set_default (BITPOS (tok_xdigit
), '0', '9');
3094 set_default (BITPOS (tok_xdigit
), 'A', 'F');
3095 set_default (BITPOS (tok_xdigit
), 'a', 'f');
3098 if ((ctype
->class_done
& BITw (tok_blank
)) == 0)
3099 /* "If this keyword [blank] is unspecified, the characters <space> and
3100 <tab> shall belong to this character class." [P1003.2, 2.5.2.1] */
3102 struct charseq
*seq
;
3104 seq
= charmap_find_value (charmap
, "space", 5);
3106 seq
= charmap_find_value (charmap
, "SP", 2);
3108 seq
= charmap_find_value (charmap
, "U00000020", 9);
3111 record_error (0, 0, _("\
3112 %s: character `%s' not defined while needed as default value"),
3113 "LC_CTYPE", "<space>");
3115 else if (seq
->nbytes
!= 1)
3116 record_error (0, 0, _("\
3117 %s: character `%s' in charmap not representable with one byte"),
3118 "LC_CTYPE", "<space>");
3120 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_blank
);
3122 /* No need to search. */
3123 ELEM (ctype
, class_collection
, , L
' ') |= BITw (tok_blank
);
3126 seq
= charmap_find_value (charmap
, "tab", 3);
3128 seq
= charmap_find_value (charmap
, "U00000009", 9);
3131 record_error (0, 0, _("\
3132 %s: character `%s' not defined while needed as default value"),
3133 "LC_CTYPE", "<tab>");
3135 else if (seq
->nbytes
!= 1)
3136 record_error (0, 0, _("\
3137 %s: character `%s' in charmap not representable with one byte"),
3138 "LC_CTYPE", "<tab>");
3140 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_blank
);
3142 /* No need to search. */
3143 ELEM (ctype
, class_collection
, , L
'\t') |= BITw (tok_blank
);
3146 if ((ctype
->class_done
& BITw (tok_graph
)) == 0)
3147 /* "If this keyword [graph] is not specified, characters specified for
3148 the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
3149 shall belong to this character class." [P1003.2, 2.5.2.1] */
3151 unsigned long int mask
= BIT (tok_upper
) | BIT (tok_lower
) |
3152 BIT (tok_alpha
) | BIT (tok_digit
) | BIT (tok_xdigit
) | BIT (tok_punct
);
3153 unsigned long int maskw
= BITw (tok_upper
) | BITw (tok_lower
) |
3154 BITw (tok_alpha
) | BITw (tok_digit
) | BITw (tok_xdigit
) |
3157 for (size_t cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
3158 if ((ctype
->class_collection
[cnt
] & maskw
) != 0)
3159 ctype
->class_collection
[cnt
] |= BITw (tok_graph
);
3161 for (size_t cnt
= 0; cnt
< 256; ++cnt
)
3162 if ((ctype
->class256_collection
[cnt
] & mask
) != 0)
3163 ctype
->class256_collection
[cnt
] |= BIT (tok_graph
);
3166 if ((ctype
->class_done
& BITw (tok_print
)) == 0)
3167 /* "If this keyword [print] is not provided, characters specified for
3168 the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
3169 and the <space> character shall belong to this character class."
3170 [P1003.2, 2.5.2.1] */
3172 unsigned long int mask
= BIT (tok_upper
) | BIT (tok_lower
) |
3173 BIT (tok_alpha
) | BIT (tok_digit
) | BIT (tok_xdigit
) | BIT (tok_punct
);
3174 unsigned long int maskw
= BITw (tok_upper
) | BITw (tok_lower
) |
3175 BITw (tok_alpha
) | BITw (tok_digit
) | BITw (tok_xdigit
) |
3177 struct charseq
*seq
;
3179 for (size_t cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
3180 if ((ctype
->class_collection
[cnt
] & maskw
) != 0)
3181 ctype
->class_collection
[cnt
] |= BITw (tok_print
);
3183 for (size_t cnt
= 0; cnt
< 256; ++cnt
)
3184 if ((ctype
->class256_collection
[cnt
] & mask
) != 0)
3185 ctype
->class256_collection
[cnt
] |= BIT (tok_print
);
3188 seq
= charmap_find_value (charmap
, "space", 5);
3190 seq
= charmap_find_value (charmap
, "SP", 2);
3192 seq
= charmap_find_value (charmap
, "U00000020", 9);
3195 record_error (0, 0, _("\
3196 %s: character `%s' not defined while needed as default value"),
3197 "LC_CTYPE", "<space>");
3199 else if (seq
->nbytes
!= 1)
3200 record_error (0, 0, _("\
3201 %s: character `%s' in charmap not representable with one byte"),
3202 "LC_CTYPE", "<space>");
3204 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_print
);
3206 /* No need to search. */
3207 ELEM (ctype
, class_collection
, , L
' ') |= BITw (tok_print
);
3210 if (ctype
->tomap_done
[0] == 0)
3211 /* "If this keyword [toupper] is not specified, the lowercase letters
3212 `a' through `z', and their corresponding uppercase letters `A' to
3213 `Z', ..., shall automatically be included, with implementation-
3214 defined character values." [P1003.2, 2.5.2.1] */
3219 strcpy (tmp
, "<?>");
3221 for (ch
= 'a'; ch
<= 'z'; ++ch
)
3223 struct charseq
*seq_from
, *seq_to
;
3227 seq_from
= charmap_find_value (charmap
, &tmp
[1], 1);
3228 if (seq_from
== NULL
)
3231 sprintf (buf
, "U%08X", ch
);
3232 seq_from
= charmap_find_value (charmap
, buf
, 9);
3234 if (seq_from
== NULL
)
3236 record_error (0, 0, _("\
3237 %s: character `%s' not defined while needed as default value"),
3240 else if (seq_from
->nbytes
!= 1)
3242 record_error (0, 0, _("\
3243 %s: character `%s' needed as default value not representable with one byte"),
3248 /* This conversion is implementation defined. */
3249 tmp
[1] = (char) (ch
+ ('A' - 'a'));
3250 seq_to
= charmap_find_value (charmap
, &tmp
[1], 1);
3254 sprintf (buf
, "U%08X", ch
+ ('A' - 'a'));
3255 seq_to
= charmap_find_value (charmap
, buf
, 9);
3259 record_error (0, 0, _("\
3260 %s: character `%s' not defined while needed as default value"),
3263 else if (seq_to
->nbytes
!= 1)
3265 record_error (0, 0, _("\
3266 %s: character `%s' needed as default value not representable with one byte"),
3270 /* The index [0] is determined by the order of the
3271 `ctype_map_newP' calls in `ctype_startup'. */
3272 ctype
->map256_collection
[0][seq_from
->bytes
[0]]
3276 /* No need to search. */
3277 ELEM (ctype
, map_collection
, [0], ch
) = ch
+ ('A' - 'a');
3281 if (ctype
->tomap_done
[1] == 0)
3282 /* "If this keyword [tolower] is not specified, the mapping shall be
3283 the reverse mapping of the one specified to `toupper'." [P1003.2] */
3285 for (size_t cnt
= 0; cnt
< ctype
->map_collection_act
[0]; ++cnt
)
3286 if (ctype
->map_collection
[0][cnt
] != 0)
3287 ELEM (ctype
, map_collection
, [1],
3288 ctype
->map_collection
[0][cnt
])
3289 = ctype
->charnames
[cnt
];
3291 for (size_t cnt
= 0; cnt
< 256; ++cnt
)
3292 if (ctype
->map256_collection
[0][cnt
] != 0)
3293 ctype
->map256_collection
[1][ctype
->map256_collection
[0][cnt
]] = cnt
;
3296 if (ctype
->outdigits_act
!= 10)
3298 if (ctype
->outdigits_act
!= 0)
3299 record_error (0, 0, _("\
3300 %s: field `%s' does not contain exactly ten entries"),
3301 "LC_CTYPE", "outdigit");
3303 for (size_t cnt
= ctype
->outdigits_act
; cnt
< 10; ++cnt
)
3305 ctype
->mboutdigits
[cnt
] = charmap_find_symbol (charmap
,
3306 (char *) digits
+ cnt
,
3309 if (ctype
->mboutdigits
[cnt
] == NULL
)
3310 ctype
->mboutdigits
[cnt
] = charmap_find_symbol (charmap
,
3312 strlen (longnames
[cnt
]));
3314 if (ctype
->mboutdigits
[cnt
] == NULL
)
3315 ctype
->mboutdigits
[cnt
] = charmap_find_symbol (charmap
,
3318 if (ctype
->mboutdigits
[cnt
] == NULL
)
3320 /* Provide a replacement. */
3321 record_error (0, 0, _("\
3322 no output digits defined and none of the standard names in the charmap"));
3324 ctype
->mboutdigits
[cnt
] = obstack_alloc (&((struct charmap_t
*) charmap
)->mem_pool
,
3325 sizeof (struct charseq
)
3328 /* This is better than nothing. */
3329 ctype
->mboutdigits
[cnt
]->bytes
[0] = digits
[cnt
];
3330 ctype
->mboutdigits
[cnt
]->nbytes
= 1;
3333 ctype
->wcoutdigits
[cnt
] = L
'0' + cnt
;
3336 ctype
->outdigits_act
= 10;
3343 /* Initialize. Assumes t->p and t->q have already been set. */
3345 wctype_table_init (struct wctype_table
*t
)
3348 t
->level1_alloc
= t
->level1_size
= 0;
3350 t
->level2_alloc
= t
->level2_size
= 0;
3352 t
->level3_alloc
= t
->level3_size
= 0;
3355 /* Retrieve an entry. */
3357 wctype_table_get (struct wctype_table
*t
, uint32_t wc
)
3359 uint32_t index1
= wc
>> (t
->q
+ t
->p
+ 5);
3360 if (index1
< t
->level1_size
)
3362 uint32_t lookup1
= t
->level1
[index1
];
3363 if (lookup1
!= EMPTY
)
3365 uint32_t index2
= ((wc
>> (t
->p
+ 5)) & ((1 << t
->q
) - 1))
3366 + (lookup1
<< t
->q
);
3367 uint32_t lookup2
= t
->level2
[index2
];
3368 if (lookup2
!= EMPTY
)
3370 uint32_t index3
= ((wc
>> 5) & ((1 << t
->p
) - 1))
3371 + (lookup2
<< t
->p
);
3372 uint32_t lookup3
= t
->level3
[index3
];
3373 uint32_t index4
= wc
& 0x1f;
3375 return (lookup3
>> index4
) & 1;
3382 /* Add one entry. */
3384 wctype_table_add (struct wctype_table
*t
, uint32_t wc
)
3386 uint32_t index1
= wc
>> (t
->q
+ t
->p
+ 5);
3387 uint32_t index2
= (wc
>> (t
->p
+ 5)) & ((1 << t
->q
) - 1);
3388 uint32_t index3
= (wc
>> 5) & ((1 << t
->p
) - 1);
3389 uint32_t index4
= wc
& 0x1f;
3392 if (index1
>= t
->level1_size
)
3394 if (index1
>= t
->level1_alloc
)
3396 size_t alloc
= 2 * t
->level1_alloc
;
3397 if (alloc
<= index1
)
3399 t
->level1
= (uint32_t *) xrealloc ((char *) t
->level1
,
3400 alloc
* sizeof (uint32_t));
3401 t
->level1_alloc
= alloc
;
3403 while (index1
>= t
->level1_size
)
3404 t
->level1
[t
->level1_size
++] = EMPTY
;
3407 if (t
->level1
[index1
] == EMPTY
)
3409 if (t
->level2_size
== t
->level2_alloc
)
3411 size_t alloc
= 2 * t
->level2_alloc
+ 1;
3412 t
->level2
= (uint32_t *) xrealloc ((char *) t
->level2
,
3413 (alloc
<< t
->q
) * sizeof (uint32_t));
3414 t
->level2_alloc
= alloc
;
3416 i1
= t
->level2_size
<< t
->q
;
3417 i2
= (t
->level2_size
+ 1) << t
->q
;
3418 for (i
= i1
; i
< i2
; i
++)
3419 t
->level2
[i
] = EMPTY
;
3420 t
->level1
[index1
] = t
->level2_size
++;
3423 index2
+= t
->level1
[index1
] << t
->q
;
3425 if (t
->level2
[index2
] == EMPTY
)
3427 if (t
->level3_size
== t
->level3_alloc
)
3429 size_t alloc
= 2 * t
->level3_alloc
+ 1;
3430 t
->level3
= (uint32_t *) xrealloc ((char *) t
->level3
,
3431 (alloc
<< t
->p
) * sizeof (uint32_t));
3432 t
->level3_alloc
= alloc
;
3434 i1
= t
->level3_size
<< t
->p
;
3435 i2
= (t
->level3_size
+ 1) << t
->p
;
3436 for (i
= i1
; i
< i2
; i
++)
3438 t
->level2
[index2
] = t
->level3_size
++;
3441 index3
+= t
->level2
[index2
] << t
->p
;
3443 t
->level3
[index3
] |= (uint32_t)1 << index4
;
3446 /* Finalize and shrink. */
3448 add_locale_wctype_table (struct locale_file
*file
, struct wctype_table
*t
)
3451 uint32_t reorder3
[t
->level3_size
];
3452 uint32_t reorder2
[t
->level2_size
];
3453 uint32_t level2_offset
, level3_offset
;
3455 /* Uniquify level3 blocks. */
3457 for (j
= 0; j
< t
->level3_size
; j
++)
3459 for (i
= 0; i
< k
; i
++)
3460 if (memcmp (&t
->level3
[i
<< t
->p
], &t
->level3
[j
<< t
->p
],
3461 (1 << t
->p
) * sizeof (uint32_t)) == 0)
3463 /* Relocate block j to block i. */
3468 memcpy (&t
->level3
[i
<< t
->p
], &t
->level3
[j
<< t
->p
],
3469 (1 << t
->p
) * sizeof (uint32_t));
3475 for (i
= 0; i
< (t
->level2_size
<< t
->q
); i
++)
3476 if (t
->level2
[i
] != EMPTY
)
3477 t
->level2
[i
] = reorder3
[t
->level2
[i
]];
3479 /* Uniquify level2 blocks. */
3481 for (j
= 0; j
< t
->level2_size
; j
++)
3483 for (i
= 0; i
< k
; i
++)
3484 if (memcmp (&t
->level2
[i
<< t
->q
], &t
->level2
[j
<< t
->q
],
3485 (1 << t
->q
) * sizeof (uint32_t)) == 0)
3487 /* Relocate block j to block i. */
3492 memcpy (&t
->level2
[i
<< t
->q
], &t
->level2
[j
<< t
->q
],
3493 (1 << t
->q
) * sizeof (uint32_t));
3499 for (i
= 0; i
< t
->level1_size
; i
++)
3500 if (t
->level1
[i
] != EMPTY
)
3501 t
->level1
[i
] = reorder2
[t
->level1
[i
]];
3504 5 * sizeof (uint32_t)
3505 + t
->level1_size
* sizeof (uint32_t)
3506 + (t
->level2_size
<< t
->q
) * sizeof (uint32_t)
3507 + (t
->level3_size
<< t
->p
) * sizeof (uint32_t);
3510 5 * sizeof (uint32_t)
3511 + t
->level1_size
* sizeof (uint32_t);
3513 5 * sizeof (uint32_t)
3514 + t
->level1_size
* sizeof (uint32_t)
3515 + (t
->level2_size
<< t
->q
) * sizeof (uint32_t);
3517 start_locale_structure (file
);
3518 add_locale_uint32 (file
, t
->q
+ t
->p
+ 5);
3519 add_locale_uint32 (file
, t
->level1_size
);
3520 add_locale_uint32 (file
, t
->p
+ 5);
3521 add_locale_uint32 (file
, (1 << t
->q
) - 1);
3522 add_locale_uint32 (file
, (1 << t
->p
) - 1);
3524 for (i
= 0; i
< t
->level1_size
; i
++)
3527 t
->level1
[i
] == EMPTY
3529 : (t
->level1
[i
] << t
->q
) * sizeof (uint32_t) + level2_offset
);
3531 for (i
= 0; i
< (t
->level2_size
<< t
->q
); i
++)
3534 t
->level2
[i
] == EMPTY
3536 : (t
->level2
[i
] << t
->p
) * sizeof (uint32_t) + level3_offset
);
3538 add_locale_uint32_array (file
, t
->level3
, t
->level3_size
<< t
->p
);
3539 end_locale_structure (file
);
3541 if (t
->level1_alloc
> 0)
3543 if (t
->level2_alloc
> 0)
3545 if (t
->level3_alloc
> 0)
3549 /* Flattens the included transliterations into a translit list.
3550 Inserts them in the list at `cursor', and returns the new cursor. */
3551 static struct translit_t
**
3552 translit_flatten (struct locale_ctype_t
*ctype
,
3553 const struct charmap_t
*charmap
,
3554 struct translit_t
**cursor
)
3556 while (ctype
->translit_include
!= NULL
)
3558 const char *copy_locale
= ctype
->translit_include
->copy_locale
;
3559 const char *copy_repertoire
= ctype
->translit_include
->copy_repertoire
;
3560 struct localedef_t
*other
;
3562 /* Unchain the include statement. During the depth-first traversal
3563 we don't want to visit any locale more than once. */
3564 ctype
->translit_include
= ctype
->translit_include
->next
;
3566 other
= find_locale (LC_CTYPE
, copy_locale
, copy_repertoire
, charmap
);
3568 if (other
== NULL
|| other
->categories
[LC_CTYPE
].ctype
== NULL
)
3570 record_error (0, 0, _("\
3571 %s: transliteration data from locale `%s' not available"),
3572 "LC_CTYPE", copy_locale
);
3576 struct locale_ctype_t
*other_ctype
=
3577 other
->categories
[LC_CTYPE
].ctype
;
3579 cursor
= translit_flatten (other_ctype
, charmap
, cursor
);
3580 assert (other_ctype
->translit_include
== NULL
);
3582 if (other_ctype
->translit
!= NULL
)
3584 /* Insert the other_ctype->translit list at *cursor. */
3585 struct translit_t
*endp
= other_ctype
->translit
;
3586 while (endp
->next
!= NULL
)
3589 endp
->next
= *cursor
;
3590 *cursor
= other_ctype
->translit
;
3592 /* Avoid any risk of circular lists. */
3593 other_ctype
->translit
= NULL
;
3595 cursor
= &endp
->next
;
3598 if (ctype
->default_missing
== NULL
)
3599 ctype
->default_missing
= other_ctype
->default_missing
;
3607 allocate_arrays (struct locale_ctype_t
*ctype
, const struct charmap_t
*charmap
,
3608 struct repertoire_t
*repertoire
)
3616 /* You wonder about this amount of memory? This is only because some
3617 users do not manage to address the array with unsigned values or
3618 data types with range >= 256. '\200' would result in the array
3619 index -128. To help these poor people we duplicate the entries for
3620 128 up to 255 below the entry for \0. */
3621 ctype
->ctype_b
= (char_class_t
*) xcalloc (256 + 128, sizeof (char_class_t
));
3622 ctype
->ctype32_b
= (char_class32_t
*) xcalloc (256, sizeof (char_class32_t
));
3623 ctype
->class_b
= (uint32_t **)
3624 xmalloc (ctype
->nr_charclass
* sizeof (uint32_t *));
3625 ctype
->class_3level
= (struct wctype_table
*)
3626 xmalloc (ctype
->nr_charclass
* sizeof (struct wctype_table
));
3628 /* This is the array accessed using the multibyte string elements. */
3629 for (idx
= 0; idx
< 256; ++idx
)
3630 ctype
->ctype_b
[128 + idx
] = ctype
->class256_collection
[idx
];
3632 /* Mirror first 127 entries. We must take care that entry -1 is not
3633 mirrored because EOF == -1. */
3634 for (idx
= 0; idx
< 127; ++idx
)
3635 ctype
->ctype_b
[idx
] = ctype
->ctype_b
[256 + idx
];
3637 /* The 32 bit array contains all characters < 0x100. */
3638 for (idx
= 0; idx
< ctype
->class_collection_act
; ++idx
)
3639 if (ctype
->charnames
[idx
] < 0x100)
3640 ctype
->ctype32_b
[ctype
->charnames
[idx
]] = ctype
->class_collection
[idx
];
3642 for (nr
= 0; nr
< ctype
->nr_charclass
; nr
++)
3644 ctype
->class_b
[nr
] = (uint32_t *) xcalloc (256 / 32, sizeof (uint32_t));
3646 /* We only set CLASS_B for the bits in the ISO C classes, not
3647 the user defined classes. The number should not change but
3649 #define LAST_ISO_C_BIT 11
3650 if (nr
<= LAST_ISO_C_BIT
)
3651 for (idx
= 0; idx
< 256; ++idx
)
3652 if (ctype
->class256_collection
[idx
] & _ISbit (nr
))
3653 ctype
->class_b
[nr
][idx
>> 5] |= (uint32_t) 1 << (idx
& 0x1f);
3656 for (nr
= 0; nr
< ctype
->nr_charclass
; nr
++)
3658 struct wctype_table
*t
;
3660 t
= &ctype
->class_3level
[nr
];
3661 t
->p
= 4; /* or: 5 */
3662 t
->q
= 7; /* or: 6 */
3663 wctype_table_init (t
);
3665 for (idx
= 0; idx
< ctype
->class_collection_act
; ++idx
)
3666 if (ctype
->class_collection
[idx
] & _ISwbit (nr
))
3667 wctype_table_add (t
, ctype
->charnames
[idx
]);
3669 record_verbose (stderr
, _("\
3670 %s: table for class \"%s\": %lu bytes"),
3671 "LC_CTYPE", ctype
->classnames
[nr
],
3672 (unsigned long int) t
->result_size
);
3675 /* Room for table of mappings. */
3676 ctype
->map_b
= (uint32_t **) xmalloc (2 * sizeof (uint32_t *));
3677 ctype
->map32_b
= (uint32_t **) xmalloc (ctype
->map_collection_nr
3678 * sizeof (uint32_t *));
3679 ctype
->map_3level
= (struct wctrans_table
*)
3680 xmalloc (ctype
->map_collection_nr
* sizeof (struct wctrans_table
));
3682 /* Fill in all mappings. */
3683 for (idx
= 0; idx
< 2; ++idx
)
3687 /* Allocate table. */
3688 ctype
->map_b
[idx
] = (uint32_t *)
3689 xmalloc ((256 + 128) * sizeof (uint32_t));
3691 /* Copy values from collection. */
3692 for (idx2
= 0; idx2
< 256; ++idx2
)
3693 ctype
->map_b
[idx
][128 + idx2
] = ctype
->map256_collection
[idx
][idx2
];
3695 /* Mirror first 127 entries. We must take care not to map entry
3696 -1 because EOF == -1. */
3697 for (idx2
= 0; idx2
< 127; ++idx2
)
3698 ctype
->map_b
[idx
][idx2
] = ctype
->map_b
[idx
][256 + idx2
];
3700 /* EOF must map to EOF. */
3701 ctype
->map_b
[idx
][127] = EOF
;
3704 for (idx
= 0; idx
< ctype
->map_collection_nr
; ++idx
)
3708 /* Allocate table. */
3709 ctype
->map32_b
[idx
] = (uint32_t *) xmalloc (256 * sizeof (uint32_t));
3711 /* Copy values from collection. Default is identity mapping. */
3712 for (idx2
= 0; idx2
< 256; ++idx2
)
3713 ctype
->map32_b
[idx
][idx2
] =
3714 (ctype
->map_collection
[idx
][idx2
] != 0
3715 ? ctype
->map_collection
[idx
][idx2
]
3719 for (nr
= 0; nr
< ctype
->map_collection_nr
; nr
++)
3721 struct wctrans_table
*t
;
3723 t
= &ctype
->map_3level
[nr
];
3726 wctrans_table_init (t
);
3728 for (idx
= 0; idx
< ctype
->map_collection_act
[nr
]; ++idx
)
3729 if (ctype
->map_collection
[nr
][idx
] != 0)
3730 wctrans_table_add (t
, ctype
->charnames
[idx
],
3731 ctype
->map_collection
[nr
][idx
]);
3733 record_verbose (stderr
, _("\
3734 %s: table for map \"%s\": %lu bytes"),
3735 "LC_CTYPE", ctype
->mapnames
[nr
],
3736 (unsigned long int) t
->result_size
);
3739 /* Extra array for class and map names. */
3740 ctype
->class_name_ptr
= (uint32_t *) xmalloc (ctype
->nr_charclass
3741 * sizeof (uint32_t));
3742 ctype
->map_name_ptr
= (uint32_t *) xmalloc (ctype
->map_collection_nr
3743 * sizeof (uint32_t));
3745 ctype
->class_offset
= _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1
);
3746 ctype
->map_offset
= ctype
->class_offset
+ ctype
->nr_charclass
;
3748 /* Array for width information. Because the expected widths are very
3749 small (never larger than 2) we use only one single byte. This
3751 We put only printable characters in the table. wcwidth is specified
3752 to return -1 for non-printable characters. Doing the check here
3753 saves a run-time check.
3754 But we put L'\0' in the table. This again saves a run-time check. */
3756 struct wcwidth_table
*t
;
3761 wcwidth_table_init (t
);
3763 /* First set all the printable characters of the character set to
3764 the default width. */
3766 while (iterate_table (&charmap
->char_table
, &curs
, &key
, &len
, &vdata
) == 0)
3768 struct charseq
*data
= (struct charseq
*) vdata
;
3770 if (data
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
3771 data
->ucs4
= repertoire_find_value (ctype
->repertoire
,
3774 if (data
->ucs4
!= ILLEGAL_CHAR_VALUE
)
3776 uint32_t *class_bits
=
3777 find_idx (ctype
, &ctype
->class_collection
, NULL
,
3778 &ctype
->class_collection_act
, data
->ucs4
);
3780 if (class_bits
!= NULL
&& (*class_bits
& BITw (tok_print
)))
3781 wcwidth_table_add (t
, data
->ucs4
, charmap
->width_default
);
3785 /* Now add the explicitly specified widths. */
3786 if (charmap
->width_rules
!= NULL
)
3787 for (size_t cnt
= 0; cnt
< charmap
->nwidth_rules
; ++cnt
)
3789 unsigned char bytes
[charmap
->mb_cur_max
];
3790 int nbytes
= charmap
->width_rules
[cnt
].from
->nbytes
;
3792 /* We have the range of character for which the width is
3793 specified described using byte sequences of the multibyte
3794 charset. We have to convert this to UCS4 now. And we
3795 cannot simply convert the beginning and the end of the
3796 sequence, we have to iterate over the byte sequence and
3797 convert it for every single character. */
3798 memcpy (bytes
, charmap
->width_rules
[cnt
].from
->bytes
, nbytes
);
3800 while (nbytes
< charmap
->width_rules
[cnt
].to
->nbytes
3801 || memcmp (bytes
, charmap
->width_rules
[cnt
].to
->bytes
,
3804 /* Find the UCS value for `bytes'. */
3807 struct charseq
*seq
=
3808 charmap_find_symbol (charmap
, (char *) bytes
, nbytes
);
3811 wch
= ILLEGAL_CHAR_VALUE
;
3812 else if (seq
->ucs4
!= UNINITIALIZED_CHAR_VALUE
)
3815 wch
= repertoire_find_value (ctype
->repertoire
, seq
->name
,
3816 strlen (seq
->name
));
3818 if (wch
!= ILLEGAL_CHAR_VALUE
)
3820 /* Store the value. */
3821 uint32_t *class_bits
=
3822 find_idx (ctype
, &ctype
->class_collection
, NULL
,
3823 &ctype
->class_collection_act
, wch
);
3825 if (class_bits
!= NULL
&& (*class_bits
& BITw (tok_print
)))
3826 wcwidth_table_add (t
, wch
,
3827 charmap
->width_rules
[cnt
].width
);
3830 /* "Increment" the bytes sequence. */
3832 while (inner
>= 0 && bytes
[inner
] == 0xff)
3837 /* We have to extend the byte sequence. */
3838 if (nbytes
>= charmap
->width_rules
[cnt
].to
->nbytes
)
3842 memset (&bytes
[1], 0, nbytes
);
3848 while (++inner
< nbytes
)
3854 /* Set the width of L'\0' to 0. */
3855 wcwidth_table_add (t
, 0, 0);
3857 record_verbose (stderr
, _("%s: table for width: %lu bytes"),
3858 "LC_CTYPE", (unsigned long int) t
->result_size
);
3861 /* Set MB_CUR_MAX. */
3862 ctype
->mb_cur_max
= charmap
->mb_cur_max
;
3864 /* Now determine the table for the transliteration information.
3866 XXX It is not yet clear to me whether it is worth implementing a
3867 complicated algorithm which uses a hash table to locate the entries.
3868 For now I'll use a simple array which can be searching using binary
3870 if (ctype
->translit_include
!= NULL
)
3871 /* Traverse the locales mentioned in the `include' statements in a
3872 depth-first way and fold in their transliteration information. */
3873 translit_flatten (ctype
, charmap
, &ctype
->translit
);
3875 if (ctype
->translit
!= NULL
)
3877 /* First count how many entries we have. This is the upper limit
3878 since some entries from the included files might be overwritten. */
3880 struct translit_t
*runp
= ctype
->translit
;
3881 struct translit_t
**sorted
;
3882 size_t from_len
, to_len
;
3884 while (runp
!= NULL
)
3890 /* Next we allocate an array large enough and fill in the values. */
3891 sorted
= (struct translit_t
**) alloca (number
3892 * sizeof (struct translit_t
**));
3893 runp
= ctype
->translit
;
3897 /* Search for the place where to insert this string.
3898 XXX Better use a real sorting algorithm later. */
3902 while (idx
< number
)
3904 int res
= wcscmp ((const wchar_t *) sorted
[idx
]->from
,
3905 (const wchar_t *) runp
->from
);
3920 memmove (&sorted
[idx
+ 1], &sorted
[idx
],
3921 (number
- idx
) * sizeof (struct translit_t
*));
3928 while (runp
!= NULL
);
3930 /* The next step is putting all the possible transliteration
3931 strings in one memory block so that we can write it out.
3932 We need several different blocks:
3933 - index to the from-string array
3935 - index to the to-string array
3938 from_len
= to_len
= 0;
3939 for (size_t cnt
= 0; cnt
< number
; ++cnt
)
3941 struct translit_to_t
*srunp
;
3942 from_len
+= wcslen ((const wchar_t *) sorted
[cnt
]->from
) + 1;
3943 srunp
= sorted
[cnt
]->to
;
3944 while (srunp
!= NULL
)
3946 to_len
+= wcslen ((const wchar_t *) srunp
->str
) + 1;
3947 srunp
= srunp
->next
;
3949 /* Plus one for the extra NUL character marking the end of
3950 the list for the current entry. */
3954 /* We can allocate the arrays for the results. */
3955 ctype
->translit_from_idx
= xmalloc (number
* sizeof (uint32_t));
3956 ctype
->translit_from_tbl
= xmalloc (from_len
* sizeof (uint32_t));
3957 ctype
->translit_to_idx
= xmalloc (number
* sizeof (uint32_t));
3958 ctype
->translit_to_tbl
= xmalloc (to_len
* sizeof (uint32_t));
3962 for (size_t cnt
= 0; cnt
< number
; ++cnt
)
3965 struct translit_to_t
*srunp
;
3967 ctype
->translit_from_idx
[cnt
] = from_len
;
3968 ctype
->translit_to_idx
[cnt
] = to_len
;
3970 len
= wcslen ((const wchar_t *) sorted
[cnt
]->from
) + 1;
3971 wmemcpy ((wchar_t *) &ctype
->translit_from_tbl
[from_len
],
3972 (const wchar_t *) sorted
[cnt
]->from
, len
);
3975 ctype
->translit_to_idx
[cnt
] = to_len
;
3976 srunp
= sorted
[cnt
]->to
;
3977 while (srunp
!= NULL
)
3979 len
= wcslen ((const wchar_t *) srunp
->str
) + 1;
3980 wmemcpy ((wchar_t *) &ctype
->translit_to_tbl
[to_len
],
3981 (const wchar_t *) srunp
->str
, len
);
3983 srunp
= srunp
->next
;
3985 ctype
->translit_to_tbl
[to_len
++] = L
'\0';
3988 /* Store the information about the length. */
3989 ctype
->translit_idx_size
= number
;
3990 ctype
->translit_from_tbl_size
= from_len
* sizeof (uint32_t);
3991 ctype
->translit_to_tbl_size
= to_len
* sizeof (uint32_t);
3995 ctype
->translit_from_idx
= no_str
;
3996 ctype
->translit_from_tbl
= no_str
;
3997 ctype
->translit_to_tbl
= no_str
;
3998 ctype
->translit_idx_size
= 0;
3999 ctype
->translit_from_tbl_size
= 0;
4000 ctype
->translit_to_tbl_size
= 0;