1 /* Copyright (C) 1995-2019 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, see <http://www.gnu.org/licenses/>. */
35 #include "localedef.h"
37 #include "localeinfo.h"
39 #include "linereader.h"
40 #include "locfile-token.h"
46 /* The bit used for representing a special class. */
47 #define BITPOS(class) ((class) - tok_upper)
48 #define BIT(class) (_ISbit (BITPOS (class)))
49 #define BITw(class) (_ISwbit (BITPOS (class)))
51 #define ELEM(ctype, collection, idx, value) \
52 *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \
53 &ctype->collection##_act idx, value)
56 /* To be compatible with former implementations we for now restrict
57 the number of bits for character classes to 16. When compatibility
58 is not necessary anymore increase the number to 32. */
59 #define char_class_t uint16_t
60 #define char_class32_t uint32_t
63 /* Type to describe a transliteration action. We have a possibly
64 multiple character from-string and a set of multiple character
65 to-strings. All are 32bit values since this is what is used in
66 the gconv functions. */
71 struct translit_to_t
*next
;
81 struct translit_to_t
*to
;
83 struct translit_t
*next
;
86 struct translit_ignore_t
95 struct translit_ignore_t
*next
;
99 /* Type to describe a transliteration include statement. */
100 struct translit_include_t
102 const char *copy_locale
;
103 const char *copy_repertoire
;
105 struct translit_include_t
*next
;
108 /* Provide some dummy pointer for empty string. */
109 static uint32_t no_str
[] = { 0 };
112 /* Sparse table of uint32_t. */
113 #define TABLE idx_table
114 #define ELEMENT uint32_t
115 #define DEFAULT ((uint32_t) ~0)
116 #define NO_ADD_LOCALE
119 #define TABLE wcwidth_table
120 #define ELEMENT uint8_t
124 #define TABLE wctrans_table
125 #define ELEMENT int32_t
127 #define wctrans_table_add wctrans_table_add_internal
129 #undef wctrans_table_add
130 /* The wctrans_table must actually store the difference between the
131 desired result and the argument. */
133 wctrans_table_add (struct wctrans_table
*t
, uint32_t wc
, uint32_t mapped_wc
)
135 wctrans_table_add_internal (t
, wc
, mapped_wc
- wc
);
138 /* Construction of sparse 3-level tables.
139 See wchar-lookup.h for their structure and the meaning of p and q. */
146 /* Working representation. */
159 static void add_locale_wctype_table (struct locale_file
*file
,
160 struct wctype_table
*t
);
162 /* The real definition of the struct for the LC_CTYPE locale. */
163 struct locale_ctype_t
166 size_t charnames_max
;
167 size_t charnames_act
;
168 /* An index lookup table, to speedup find_idx. */
169 struct idx_table charnames_idx
;
171 struct repertoire_t
*repertoire
;
173 /* We will allow up to 8 * sizeof (uint32_t) character classes. */
174 #define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
176 const char *classnames
[MAX_NR_CHARCLASS
];
177 uint32_t last_class_char
;
178 uint32_t class256_collection
[256];
179 uint32_t *class_collection
;
180 size_t class_collection_max
;
181 size_t class_collection_act
;
183 uint32_t class_offset
;
185 struct charseq
**mbdigits
;
192 struct charseq
*mboutdigits
[10];
193 uint32_t wcoutdigits
[10];
194 size_t outdigits_act
;
196 /* If the following number ever turns out to be too small simply
197 increase it. But I doubt it will. --drepper@gnu */
198 #define MAX_NR_CHARMAP 16
199 const char *mapnames
[MAX_NR_CHARMAP
];
200 uint32_t *map_collection
[MAX_NR_CHARMAP
];
201 uint32_t map256_collection
[2][256];
202 size_t map_collection_max
[MAX_NR_CHARMAP
];
203 size_t map_collection_act
[MAX_NR_CHARMAP
];
204 size_t map_collection_nr
;
206 int tomap_done
[MAX_NR_CHARMAP
];
209 /* Transliteration information. */
210 struct translit_include_t
*translit_include
;
211 struct translit_t
*translit
;
212 struct translit_ignore_t
*translit_ignore
;
213 uint32_t ntranslit_ignore
;
215 uint32_t *default_missing
;
216 const char *default_missing_file
;
217 size_t default_missing_lineno
;
219 uint32_t to_nonascii
;
220 uint32_t nonascii_case
;
222 /* The arrays for the binary representation. */
223 char_class_t
*ctype_b
;
224 char_class32_t
*ctype32_b
;
228 struct wctype_table
*class_3level
;
229 struct wctrans_table
*map_3level
;
230 uint32_t *class_name_ptr
;
231 uint32_t *map_name_ptr
;
232 struct wcwidth_table width
;
234 const char *codeset_name
;
235 uint32_t *translit_from_idx
;
236 uint32_t *translit_from_tbl
;
237 uint32_t *translit_to_idx
;
238 uint32_t *translit_to_tbl
;
239 uint32_t translit_idx_size
;
240 size_t translit_from_tbl_size
;
241 size_t translit_to_tbl_size
;
243 struct obstack mempool
;
247 /* Marker for an empty slot. This has the value 0xFFFFFFFF, regardless
248 whether 'int' is 16 bit, 32 bit, or 64 bit. */
249 #define EMPTY ((uint32_t) ~0)
252 #define obstack_chunk_alloc xmalloc
253 #define obstack_chunk_free free
256 /* Prototypes for local functions. */
257 static void ctype_startup (struct linereader
*lr
, struct localedef_t
*locale
,
258 const struct charmap_t
*charmap
,
259 struct localedef_t
*copy_locale
,
261 static void ctype_class_new (struct linereader
*lr
,
262 struct locale_ctype_t
*ctype
, const char *name
);
263 static void ctype_map_new (struct linereader
*lr
,
264 struct locale_ctype_t
*ctype
,
265 const char *name
, const struct charmap_t
*charmap
);
266 static uint32_t *find_idx (struct locale_ctype_t
*ctype
, uint32_t **table
,
267 size_t *max
, size_t *act
, uint32_t idx
);
268 static void set_class_defaults (struct locale_ctype_t
*ctype
,
269 const struct charmap_t
*charmap
,
270 struct repertoire_t
*repertoire
);
271 static void allocate_arrays (struct locale_ctype_t
*ctype
,
272 const struct charmap_t
*charmap
,
273 struct repertoire_t
*repertoire
);
276 static const char *longnames
[] =
278 "zero", "one", "two", "three", "four",
279 "five", "six", "seven", "eight", "nine"
281 static const char *uninames
[] =
283 "U00000030", "U00000031", "U00000032", "U00000033", "U00000034",
284 "U00000035", "U00000036", "U00000037", "U00000038", "U00000039"
286 static const unsigned char digits
[] = "0123456789";
290 ctype_startup (struct linereader
*lr
, struct localedef_t
*locale
,
291 const struct charmap_t
*charmap
,
292 struct localedef_t
*copy_locale
, int ignore_content
)
295 struct locale_ctype_t
*ctype
;
297 if (!ignore_content
&& locale
->categories
[LC_CTYPE
].ctype
== NULL
)
299 if (copy_locale
== NULL
)
301 /* Allocate the needed room. */
302 locale
->categories
[LC_CTYPE
].ctype
= ctype
=
303 (struct locale_ctype_t
*) xcalloc (1,
304 sizeof (struct locale_ctype_t
));
306 /* We have seen no names yet. */
307 ctype
->charnames_max
= charmap
->mb_cur_max
== 1 ? 256 : 512;
308 ctype
->charnames
= (uint32_t *) xmalloc (ctype
->charnames_max
309 * sizeof (uint32_t));
310 for (cnt
= 0; cnt
< 256; ++cnt
)
311 ctype
->charnames
[cnt
] = cnt
;
312 ctype
->charnames_act
= 256;
313 idx_table_init (&ctype
->charnames_idx
);
315 /* Fill character class information. */
316 ctype
->last_class_char
= ILLEGAL_CHAR_VALUE
;
317 /* The order of the following instructions determines the bit
319 ctype_class_new (lr
, ctype
, "upper");
320 ctype_class_new (lr
, ctype
, "lower");
321 ctype_class_new (lr
, ctype
, "alpha");
322 ctype_class_new (lr
, ctype
, "digit");
323 ctype_class_new (lr
, ctype
, "xdigit");
324 ctype_class_new (lr
, ctype
, "space");
325 ctype_class_new (lr
, ctype
, "print");
326 ctype_class_new (lr
, ctype
, "graph");
327 ctype_class_new (lr
, ctype
, "blank");
328 ctype_class_new (lr
, ctype
, "cntrl");
329 ctype_class_new (lr
, ctype
, "punct");
330 ctype_class_new (lr
, ctype
, "alnum");
332 ctype
->class_collection_max
= charmap
->mb_cur_max
== 1 ? 256 : 512;
333 ctype
->class_collection
334 = (uint32_t *) xcalloc (sizeof (unsigned long int),
335 ctype
->class_collection_max
);
336 ctype
->class_collection_act
= 256;
338 /* Fill character map information. */
339 ctype
->last_map_idx
= MAX_NR_CHARMAP
;
340 ctype_map_new (lr
, ctype
, "toupper", charmap
);
341 ctype_map_new (lr
, ctype
, "tolower", charmap
);
343 /* Fill first 256 entries in `toXXX' arrays. */
344 for (cnt
= 0; cnt
< 256; ++cnt
)
346 ctype
->map_collection
[0][cnt
] = cnt
;
347 ctype
->map_collection
[1][cnt
] = cnt
;
349 ctype
->map256_collection
[0][cnt
] = cnt
;
350 ctype
->map256_collection
[1][cnt
] = cnt
;
353 if (enc_not_ascii_compatible
)
354 ctype
->to_nonascii
= 1;
356 obstack_init (&ctype
->mempool
);
359 ctype
= locale
->categories
[LC_CTYPE
].ctype
=
360 copy_locale
->categories
[LC_CTYPE
].ctype
;
366 ctype_finish (struct localedef_t
*locale
, const struct charmap_t
*charmap
)
368 /* See POSIX.2, table 2-6 for the meaning of the following table. */
373 const char allow
[NCLASS
];
375 valid_table
[NCLASS
] =
377 /* The order is important. See token.h for more information.
378 M = Always, D = Default, - = Permitted, X = Mutually exclusive */
379 { "upper", "--MX-XDDXXX-" },
380 { "lower", "--MX-XDDXXX-" },
381 { "alpha", "---X-XDDXXX-" },
382 { "digit", "XXX--XDDXXX-" },
383 { "xdigit", "-----XDDXXX-" },
384 { "space", "XXXXX------X" },
385 { "print", "---------X--" },
386 { "graph", "---------X--" },
387 { "blank", "XXXXXM-----X" },
388 { "cntrl", "XXXXX-XX--XX" },
389 { "punct", "XXXXX-DD-X-X" },
390 { "alnum", "-----XDDXXX-" }
394 uint32_t space_value
;
395 struct charseq
*space_seq
;
396 struct locale_ctype_t
*ctype
= locale
->categories
[LC_CTYPE
].ctype
;
403 /* Now resolve copying and also handle completely missing definitions. */
406 const char *repertoire_name
;
408 /* First see whether we were supposed to copy. If yes, find the
409 actual definition. */
410 if (locale
->copy_name
[LC_CTYPE
] != NULL
)
412 /* Find the copying locale. This has to happen transitively since
413 the locale we are copying from might also copying another one. */
414 struct localedef_t
*from
= locale
;
417 from
= find_locale (LC_CTYPE
, from
->copy_name
[LC_CTYPE
],
418 from
->repertoire_name
, charmap
);
419 while (from
->categories
[LC_CTYPE
].ctype
== NULL
420 && from
->copy_name
[LC_CTYPE
] != NULL
);
422 ctype
= locale
->categories
[LC_CTYPE
].ctype
423 = from
->categories
[LC_CTYPE
].ctype
;
426 /* If there is still no definition issue an warning and create an
431 No definition for %s category found"), "LC_CTYPE");
432 ctype_startup (NULL
, locale
, charmap
, NULL
, 0);
433 ctype
= locale
->categories
[LC_CTYPE
].ctype
;
436 /* Get the repertoire we have to use. */
437 repertoire_name
= locale
->repertoire_name
?: repertoire_global
;
438 if (repertoire_name
!= NULL
)
439 ctype
->repertoire
= repertoire_read (repertoire_name
);
442 /* We need the name of the currently used 8-bit character set to
443 make correct conversion between this 8-bit representation and the
444 ISO 10646 character set used internally for wide characters. */
445 ctype
->codeset_name
= charmap
->code_set_name
;
446 if (ctype
->codeset_name
== NULL
)
448 record_error (0, 0, _("\
449 No character set name specified in charmap"));
450 ctype
->codeset_name
= "//UNKNOWN//";
453 /* Set default value for classes not specified. */
454 set_class_defaults (ctype
, charmap
, ctype
->repertoire
);
456 /* Check according to table. */
457 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
459 uint32_t tmp
= ctype
->class_collection
[cnt
];
463 for (cls1
= 0; cls1
< NCLASS
; ++cls1
)
464 if ((tmp
& _ISwbit (cls1
)) != 0)
465 for (cls2
= 0; cls2
< NCLASS
; ++cls2
)
466 if (valid_table
[cls1
].allow
[cls2
] != '-')
468 int eq
= (tmp
& _ISwbit (cls2
)) != 0;
469 switch (valid_table
[cls1
].allow
[cls2
])
474 uint32_t value
= ctype
->charnames
[cnt
];
476 record_error (0, 0, _("\
477 character L'\\u%0*x' in class `%s' must be in class `%s'"),
478 value
> 0xffff ? 8 : 4,
480 valid_table
[cls1
].name
,
481 valid_table
[cls2
].name
);
488 uint32_t value
= ctype
->charnames
[cnt
];
490 record_error (0, 0, _("\
491 character L'\\u%0*x' in class `%s' must not be in class `%s'"),
492 value
> 0xffff ? 8 : 4,
494 valid_table
[cls1
].name
,
495 valid_table
[cls2
].name
);
500 ctype
->class_collection
[cnt
] |= _ISwbit (cls2
);
504 record_error (5, 0, _("\
505 internal error in %s, line %u"), __FUNCTION__
, __LINE__
);
511 for (cnt
= 0; cnt
< 256; ++cnt
)
513 uint32_t tmp
= ctype
->class256_collection
[cnt
];
517 for (cls1
= 0; cls1
< NCLASS
; ++cls1
)
518 if ((tmp
& _ISbit (cls1
)) != 0)
519 for (cls2
= 0; cls2
< NCLASS
; ++cls2
)
520 if (valid_table
[cls1
].allow
[cls2
] != '-')
522 int eq
= (tmp
& _ISbit (cls2
)) != 0;
523 switch (valid_table
[cls1
].allow
[cls2
])
530 snprintf (buf
, sizeof buf
, "\\%Zo", cnt
);
532 record_error (0, 0, _("\
533 character '%s' in class `%s' must be in class `%s'"),
535 valid_table
[cls1
].name
,
536 valid_table
[cls2
].name
);
545 snprintf (buf
, sizeof buf
, "\\%Zo", cnt
);
547 record_error (0, 0, _("\
548 character '%s' in class `%s' must not be in class `%s'"),
550 valid_table
[cls1
].name
,
551 valid_table
[cls2
].name
);
556 ctype
->class256_collection
[cnt
] |= _ISbit (cls2
);
560 record_error (5, 0, _("\
561 internal error in %s, line %u"), __FUNCTION__
, __LINE__
);
567 /* ... and now test <SP> as a special case. */
569 if (((cnt
= BITPOS (tok_space
),
570 (ELEM (ctype
, class_collection
, , space_value
)
571 & BITw (tok_space
)) == 0)
572 || (cnt
= BITPOS (tok_blank
),
573 (ELEM (ctype
, class_collection
, , space_value
)
574 & BITw (tok_blank
)) == 0)))
576 record_error (0, 0, _("<SP> character not in class `%s'"),
577 valid_table
[cnt
].name
);
579 else if (((cnt
= BITPOS (tok_punct
),
580 (ELEM (ctype
, class_collection
, , space_value
)
581 & BITw (tok_punct
)) != 0)
582 || (cnt
= BITPOS (tok_graph
),
583 (ELEM (ctype
, class_collection
, , space_value
)
587 record_error (0, 0, _("\
588 <SP> character must not be in class `%s'"),
589 valid_table
[cnt
].name
);
592 ELEM (ctype
, class_collection
, , space_value
) |= BITw (tok_print
);
594 space_seq
= charmap_find_value (charmap
, "SP", 2);
595 if (space_seq
== NULL
)
596 space_seq
= charmap_find_value (charmap
, "space", 5);
597 if (space_seq
== NULL
)
598 space_seq
= charmap_find_value (charmap
, "U00000020", 9);
599 if (space_seq
== NULL
|| space_seq
->nbytes
!= 1)
601 record_error (0, 0, _("\
602 character <SP> not defined in character map"));
604 else if (((cnt
= BITPOS (tok_space
),
605 (ctype
->class256_collection
[space_seq
->bytes
[0]]
606 & BIT (tok_space
)) == 0)
607 || (cnt
= BITPOS (tok_blank
),
608 (ctype
->class256_collection
[space_seq
->bytes
[0]]
609 & BIT (tok_blank
)) == 0)))
611 record_error (0, 0, _("<SP> character not in class `%s'"),
612 valid_table
[cnt
].name
);
614 else if (((cnt
= BITPOS (tok_punct
),
615 (ctype
->class256_collection
[space_seq
->bytes
[0]]
616 & BIT (tok_punct
)) != 0)
617 || (cnt
= BITPOS (tok_graph
),
618 (ctype
->class256_collection
[space_seq
->bytes
[0]]
619 & BIT (tok_graph
)) != 0)))
621 record_error (0, 0, _("\
622 <SP> character must not be in class `%s'"),
623 valid_table
[cnt
].name
);
626 ctype
->class256_collection
[space_seq
->bytes
[0]] |= BIT (tok_print
);
628 /* Check whether all single-byte characters make to their upper/lowercase
629 equivalent according to the ASCII rules. */
630 for (cnt
= 'A'; cnt
<= 'Z'; ++cnt
)
632 uint32_t uppval
= ctype
->map256_collection
[0][cnt
];
633 uint32_t lowval
= ctype
->map256_collection
[1][cnt
];
634 uint32_t lowuppval
= ctype
->map256_collection
[0][lowval
];
635 uint32_t lowlowval
= ctype
->map256_collection
[1][lowval
];
638 || lowval
!= cnt
+ 0x20
640 || lowlowval
!= cnt
+ 0x20)
641 ctype
->nonascii_case
= 1;
643 for (cnt
= 0; cnt
< 256; ++cnt
)
644 if (cnt
< 'A' || (cnt
> 'Z' && cnt
< 'a') || cnt
> 'z')
645 if (ctype
->map256_collection
[0][cnt
] != cnt
646 || ctype
->map256_collection
[1][cnt
] != cnt
)
647 ctype
->nonascii_case
= 1;
649 /* Now that the tests are done make sure the name array contains all
650 characters which are handled in the WIDTH section of the
651 character set definition file. */
652 if (charmap
->width_rules
!= NULL
)
653 for (cnt
= 0; cnt
< charmap
->nwidth_rules
; ++cnt
)
655 unsigned char bytes
[charmap
->mb_cur_max
];
656 int nbytes
= charmap
->width_rules
[cnt
].from
->nbytes
;
658 /* We have the range of character for which the width is
659 specified described using byte sequences of the multibyte
660 charset. We have to convert this to UCS4 now. And we
661 cannot simply convert the beginning and the end of the
662 sequence, we have to iterate over the byte sequence and
663 convert it for every single character. */
664 memcpy (bytes
, charmap
->width_rules
[cnt
].from
->bytes
, nbytes
);
666 while (nbytes
< charmap
->width_rules
[cnt
].to
->nbytes
667 || memcmp (bytes
, charmap
->width_rules
[cnt
].to
->bytes
,
670 /* Find the UCS value for `bytes'. */
674 = charmap_find_symbol (charmap
, (char *) bytes
, nbytes
);
677 wch
= ILLEGAL_CHAR_VALUE
;
678 else if (seq
->ucs4
!= UNINITIALIZED_CHAR_VALUE
)
681 wch
= repertoire_find_value (ctype
->repertoire
, seq
->name
,
684 if (wch
!= ILLEGAL_CHAR_VALUE
)
685 /* We are only interested in the side-effects of the
686 `find_idx' call. It will add appropriate entries in
687 the name array if this is necessary. */
688 (void) find_idx (ctype
, NULL
, NULL
, NULL
, wch
);
690 /* "Increment" the bytes sequence. */
692 while (inner
>= 0 && bytes
[inner
] == 0xff)
697 /* We have to extend the byte sequence. */
698 if (nbytes
>= charmap
->width_rules
[cnt
].to
->nbytes
)
702 memset (&bytes
[1], 0, nbytes
);
708 while (++inner
< nbytes
)
714 /* Now set all the other characters of the character set to the
717 while (iterate_table (&charmap
->char_table
, &curs
, &key
, &len
, &vdata
) == 0)
719 struct charseq
*data
= (struct charseq
*) vdata
;
721 if (data
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
722 data
->ucs4
= repertoire_find_value (ctype
->repertoire
,
725 if (data
->ucs4
!= ILLEGAL_CHAR_VALUE
)
726 (void) find_idx (ctype
, NULL
, NULL
, NULL
, data
->ucs4
);
729 /* There must be a multiple of 10 digits. */
730 if (ctype
->mbdigits_act
% 10 != 0)
732 assert (ctype
->mbdigits_act
== ctype
->wcdigits_act
);
733 ctype
->wcdigits_act
-= ctype
->mbdigits_act
% 10;
734 ctype
->mbdigits_act
-= ctype
->mbdigits_act
% 10;
735 record_error (0, 0, _("\
736 `digit' category has not entries in groups of ten"));
739 /* Check the input digits. There must be a multiple of ten available.
740 In each group it could be that one or the other character is missing.
741 In this case the whole group must be removed. */
743 while (cnt
< ctype
->mbdigits_act
)
746 for (inner
= 0; inner
< 10; ++inner
)
747 if (ctype
->mbdigits
[cnt
+ inner
] == NULL
)
754 /* Remove the group. */
755 memmove (&ctype
->mbdigits
[cnt
], &ctype
->mbdigits
[cnt
+ 10],
756 ((ctype
->wcdigits_act
- cnt
- 10)
757 * sizeof (ctype
->mbdigits
[0])));
758 ctype
->mbdigits_act
-= 10;
762 /* If no input digits are given use the default. */
763 if (ctype
->mbdigits_act
== 0)
765 if (ctype
->mbdigits_max
== 0)
767 ctype
->mbdigits
= obstack_alloc (&((struct charmap_t
*) charmap
)->mem_pool
,
768 10 * sizeof (struct charseq
*));
769 ctype
->mbdigits_max
= 10;
772 for (cnt
= 0; cnt
< 10; ++cnt
)
774 ctype
->mbdigits
[cnt
] = charmap_find_symbol (charmap
,
775 (char *) digits
+ cnt
, 1);
776 if (ctype
->mbdigits
[cnt
] == NULL
)
778 ctype
->mbdigits
[cnt
] = charmap_find_symbol (charmap
,
780 strlen (longnames
[cnt
]));
781 if (ctype
->mbdigits
[cnt
] == NULL
)
783 /* Hum, this ain't good. */
784 record_error (0, 0, _("\
785 no input digits defined and none of the standard names in the charmap"));
787 ctype
->mbdigits
[cnt
] = obstack_alloc (&((struct charmap_t
*) charmap
)->mem_pool
,
788 sizeof (struct charseq
) + 1);
790 /* This is better than nothing. */
791 ctype
->mbdigits
[cnt
]->bytes
[0] = digits
[cnt
];
792 ctype
->mbdigits
[cnt
]->nbytes
= 1;
797 ctype
->mbdigits_act
= 10;
800 /* Check the wide character input digits. There must be a multiple
801 of ten available. In each group it could be that one or the other
802 character is missing. In this case the whole group must be
805 while (cnt
< ctype
->wcdigits_act
)
808 for (inner
= 0; inner
< 10; ++inner
)
809 if (ctype
->wcdigits
[cnt
+ inner
] == ILLEGAL_CHAR_VALUE
)
816 /* Remove the group. */
817 memmove (&ctype
->wcdigits
[cnt
], &ctype
->wcdigits
[cnt
+ 10],
818 ((ctype
->wcdigits_act
- cnt
- 10)
819 * sizeof (ctype
->wcdigits
[0])));
820 ctype
->wcdigits_act
-= 10;
824 /* If no input digits are given use the default. */
825 if (ctype
->wcdigits_act
== 0)
827 if (ctype
->wcdigits_max
== 0)
829 ctype
->wcdigits
= obstack_alloc (&((struct charmap_t
*) charmap
)->mem_pool
,
830 10 * sizeof (uint32_t));
831 ctype
->wcdigits_max
= 10;
834 for (cnt
= 0; cnt
< 10; ++cnt
)
835 ctype
->wcdigits
[cnt
] = L
'0' + cnt
;
837 ctype
->mbdigits_act
= 10;
840 /* Check the outdigits. */
842 for (cnt
= 0; cnt
< 10; ++cnt
)
843 if (ctype
->mboutdigits
[cnt
] == NULL
)
845 static struct charseq replace
[2];
849 record_error (0, 0, _("\
850 not all characters used in `outdigit' are available in the charmap"));
854 replace
[0].nbytes
= 1;
855 replace
[0].bytes
[0] = '?';
856 replace
[0].bytes
[1] = '\0';
857 ctype
->mboutdigits
[cnt
] = &replace
[0];
861 for (cnt
= 0; cnt
< 10; ++cnt
)
862 if (ctype
->wcoutdigits
[cnt
] == 0)
866 record_error (0, 0, _("\
867 not all characters used in `outdigit' are available in the repertoire"));
871 ctype
->wcoutdigits
[cnt
] = L
'?';
874 /* Sort the entries in the translit_ignore list. */
875 if (ctype
->translit_ignore
!= NULL
)
877 struct translit_ignore_t
*firstp
= ctype
->translit_ignore
;
878 struct translit_ignore_t
*runp
;
880 ctype
->ntranslit_ignore
= 1;
882 for (runp
= firstp
->next
; runp
!= NULL
; runp
= runp
->next
)
884 struct translit_ignore_t
*lastp
= NULL
;
885 struct translit_ignore_t
*cmpp
;
887 ++ctype
->ntranslit_ignore
;
889 for (cmpp
= firstp
; cmpp
!= NULL
; lastp
= cmpp
, cmpp
= cmpp
->next
)
890 if (runp
->from
< cmpp
->from
)
898 ctype
->translit_ignore
= firstp
;
904 ctype_output (struct localedef_t
*locale
, const struct charmap_t
*charmap
,
905 const char *output_path
)
907 struct locale_ctype_t
*ctype
= locale
->categories
[LC_CTYPE
].ctype
;
908 const size_t nelems
= (_NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1
)
909 + ctype
->nr_charclass
+ ctype
->map_collection_nr
);
910 struct locale_file file
;
911 uint32_t default_missing_len
;
914 /* Now prepare the output: Find the sizes of the table we can use. */
915 allocate_arrays (ctype
, charmap
, ctype
->repertoire
);
917 default_missing_len
= (ctype
->default_missing
918 ? wcslen ((wchar_t *) ctype
->default_missing
)
921 init_locale_data (&file
, nelems
);
922 for (elem
= 0; elem
< nelems
; ++elem
)
924 if (elem
< _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1
))
927 #define CTYPE_EMPTY(name) \
929 add_locale_empty (&file); \
932 CTYPE_EMPTY(_NL_CTYPE_GAP1
);
933 CTYPE_EMPTY(_NL_CTYPE_GAP2
);
934 CTYPE_EMPTY(_NL_CTYPE_GAP3
);
935 CTYPE_EMPTY(_NL_CTYPE_GAP4
);
936 CTYPE_EMPTY(_NL_CTYPE_GAP5
);
937 CTYPE_EMPTY(_NL_CTYPE_GAP6
);
939 #define CTYPE_RAW_DATA(name, base, size) \
940 case _NL_ITEM_INDEX (name): \
941 add_locale_raw_data (&file, base, size); \
944 CTYPE_RAW_DATA (_NL_CTYPE_CLASS
,
946 (256 + 128) * sizeof (char_class_t
));
948 #define CTYPE_UINT32_ARRAY(name, base, n_elems) \
949 case _NL_ITEM_INDEX (name): \
950 add_locale_uint32_array (&file, base, n_elems); \
953 CTYPE_UINT32_ARRAY (_NL_CTYPE_TOUPPER
, ctype
->map_b
[0], 256 + 128);
954 CTYPE_UINT32_ARRAY (_NL_CTYPE_TOLOWER
, ctype
->map_b
[1], 256 + 128);
955 CTYPE_UINT32_ARRAY (_NL_CTYPE_TOUPPER32
, ctype
->map32_b
[0], 256);
956 CTYPE_UINT32_ARRAY (_NL_CTYPE_TOLOWER32
, ctype
->map32_b
[1], 256);
957 CTYPE_RAW_DATA (_NL_CTYPE_CLASS32
,
959 256 * sizeof (char_class32_t
));
961 #define CTYPE_UINT32(name, value) \
962 case _NL_ITEM_INDEX (name): \
963 add_locale_uint32 (&file, value); \
966 CTYPE_UINT32 (_NL_CTYPE_CLASS_OFFSET
, ctype
->class_offset
);
967 CTYPE_UINT32 (_NL_CTYPE_MAP_OFFSET
, ctype
->map_offset
);
968 CTYPE_UINT32 (_NL_CTYPE_TRANSLIT_TAB_SIZE
, ctype
->translit_idx_size
);
970 CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_FROM_IDX
,
971 ctype
->translit_from_idx
,
972 ctype
->translit_idx_size
);
974 CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_FROM_TBL
,
975 ctype
->translit_from_tbl
,
976 ctype
->translit_from_tbl_size
977 / sizeof (uint32_t));
979 CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_TO_IDX
,
980 ctype
->translit_to_idx
,
981 ctype
->translit_idx_size
);
983 CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_TO_TBL
,
984 ctype
->translit_to_tbl
,
985 ctype
->translit_to_tbl_size
/ sizeof (uint32_t));
987 case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES
):
988 /* The class name array. */
989 start_locale_structure (&file
);
990 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
)
991 add_locale_string (&file
, ctype
->classnames
[cnt
]);
992 add_locale_char (&file
, 0);
993 align_locale_data (&file
, LOCFILE_ALIGN
);
994 end_locale_structure (&file
);
997 case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES
):
998 /* The class name array. */
999 start_locale_structure (&file
);
1000 for (cnt
= 0; cnt
< ctype
->map_collection_nr
; ++cnt
)
1001 add_locale_string (&file
, ctype
->mapnames
[cnt
]);
1002 add_locale_char (&file
, 0);
1003 align_locale_data (&file
, LOCFILE_ALIGN
);
1004 end_locale_structure (&file
);
1007 case _NL_ITEM_INDEX (_NL_CTYPE_WIDTH
):
1008 add_locale_wcwidth_table (&file
, &ctype
->width
);
1011 CTYPE_UINT32 (_NL_CTYPE_MB_CUR_MAX
, ctype
->mb_cur_max
);
1013 case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME
):
1014 add_locale_string (&file
, ctype
->codeset_name
);
1017 CTYPE_UINT32 (_NL_CTYPE_MAP_TO_NONASCII
, ctype
->to_nonascii
);
1019 CTYPE_UINT32 (_NL_CTYPE_NONASCII_CASE
, ctype
->nonascii_case
);
1021 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN
):
1022 add_locale_uint32 (&file
, ctype
->mbdigits_act
/ 10);
1025 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN
):
1026 add_locale_uint32 (&file
, ctype
->wcdigits_act
/ 10);
1029 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB
) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB
):
1030 start_locale_structure (&file
);
1031 for (cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB
);
1032 cnt
< ctype
->mbdigits_act
; cnt
+= 10)
1034 add_locale_raw_data (&file
, ctype
->mbdigits
[cnt
]->bytes
,
1035 ctype
->mbdigits
[cnt
]->nbytes
);
1036 add_locale_char (&file
, 0);
1038 end_locale_structure (&file
);
1041 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB
) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB
):
1042 start_locale_structure (&file
);
1043 cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB
);
1044 add_locale_raw_data (&file
, ctype
->mboutdigits
[cnt
]->bytes
,
1045 ctype
->mboutdigits
[cnt
]->nbytes
);
1046 add_locale_char (&file
, 0);
1047 end_locale_structure (&file
);
1050 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC
) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC
):
1051 start_locale_structure (&file
);
1052 for (cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC
);
1053 cnt
< ctype
->wcdigits_act
; cnt
+= 10)
1054 add_locale_uint32 (&file
, ctype
->wcdigits
[cnt
]);
1055 end_locale_structure (&file
);
1058 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC
) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC
):
1059 cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC
);
1060 add_locale_uint32 (&file
, ctype
->wcoutdigits
[cnt
]);
1063 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN
):
1064 add_locale_uint32 (&file
, default_missing_len
);
1067 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING
):
1068 add_locale_uint32_array (&file
, ctype
->default_missing
,
1069 default_missing_len
);
1072 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE_LEN
):
1073 add_locale_uint32 (&file
, ctype
->ntranslit_ignore
);
1076 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE
):
1077 start_locale_structure (&file
);
1079 struct translit_ignore_t
*runp
;
1080 for (runp
= ctype
->translit_ignore
; runp
!= NULL
;
1083 add_locale_uint32 (&file
, runp
->from
);
1084 add_locale_uint32 (&file
, runp
->to
);
1085 add_locale_uint32 (&file
, runp
->step
);
1088 end_locale_structure (&file
);
1092 assert (! "unknown CTYPE element");
1096 /* Handle extra maps. */
1097 size_t nr
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1
);
1098 if (nr
< ctype
->nr_charclass
)
1100 start_locale_prelude (&file
);
1101 add_locale_uint32_array (&file
, ctype
->class_b
[nr
], 256 / 32);
1102 end_locale_prelude (&file
);
1103 add_locale_wctype_table (&file
, &ctype
->class_3level
[nr
]);
1107 nr
-= ctype
->nr_charclass
;
1108 assert (nr
< ctype
->map_collection_nr
);
1109 add_locale_wctrans_table (&file
, &ctype
->map_3level
[nr
]);
1114 write_locale_data (output_path
, LC_CTYPE
, "LC_CTYPE", &file
);
1118 /* Local functions. */
1120 ctype_class_new (struct linereader
*lr
, struct locale_ctype_t
*ctype
,
1125 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
)
1126 if (strcmp (ctype
->classnames
[cnt
], name
) == 0)
1129 if (cnt
< ctype
->nr_charclass
)
1131 lr_error (lr
, _("character class `%s' already defined"), name
);
1135 if (ctype
->nr_charclass
== MAX_NR_CHARCLASS
)
1136 /* Exit code 2 is prescribed in P1003.2b. */
1137 record_error (2, 0, _("\
1138 implementation limit: no more than %Zd character classes allowed"),
1141 ctype
->classnames
[ctype
->nr_charclass
++] = name
;
1146 ctype_map_new (struct linereader
*lr
, struct locale_ctype_t
*ctype
,
1147 const char *name
, const struct charmap_t
*charmap
)
1149 size_t max_chars
= 0;
1152 for (cnt
= 0; cnt
< ctype
->map_collection_nr
; ++cnt
)
1154 if (strcmp (ctype
->mapnames
[cnt
], name
) == 0)
1157 if (max_chars
< ctype
->map_collection_max
[cnt
])
1158 max_chars
= ctype
->map_collection_max
[cnt
];
1161 if (cnt
< ctype
->map_collection_nr
)
1163 lr_error (lr
, _("character map `%s' already defined"), name
);
1167 if (ctype
->map_collection_nr
== MAX_NR_CHARMAP
)
1168 /* Exit code 2 is prescribed in P1003.2b. */
1169 record_error (2, 0, _("\
1170 implementation limit: no more than %d character maps allowed"),
1173 ctype
->mapnames
[cnt
] = name
;
1176 ctype
->map_collection_max
[cnt
] = charmap
->mb_cur_max
== 1 ? 256 : 512;
1178 ctype
->map_collection_max
[cnt
] = max_chars
;
1180 ctype
->map_collection
[cnt
] = (uint32_t *)
1181 xcalloc (sizeof (uint32_t), ctype
->map_collection_max
[cnt
]);
1182 ctype
->map_collection_act
[cnt
] = 256;
1184 ++ctype
->map_collection_nr
;
1188 /* We have to be prepared that TABLE, MAX, and ACT can be NULL. This
1189 is possible if we only want to extend the name array. */
1191 find_idx (struct locale_ctype_t
*ctype
, uint32_t **table
, size_t *max
,
1192 size_t *act
, uint32_t idx
)
1197 return table
== NULL
? NULL
: &(*table
)[idx
];
1199 /* Use the charnames_idx lookup table instead of the slow search loop. */
1201 cnt
= idx_table_get (&ctype
->charnames_idx
, idx
);
1204 cnt
= ctype
->charnames_act
;
1206 for (cnt
= 256; cnt
< ctype
->charnames_act
; ++cnt
)
1207 if (ctype
->charnames
[cnt
] == idx
)
1211 /* We have to distinguish two cases: the name is found or not. */
1212 if (cnt
== ctype
->charnames_act
)
1214 /* Extend the name array. */
1215 if (ctype
->charnames_act
== ctype
->charnames_max
)
1217 ctype
->charnames_max
*= 2;
1218 ctype
->charnames
= (uint32_t *)
1219 xrealloc (ctype
->charnames
,
1220 sizeof (uint32_t) * ctype
->charnames_max
);
1222 ctype
->charnames
[ctype
->charnames_act
++] = idx
;
1223 idx_table_add (&ctype
->charnames_idx
, idx
, cnt
);
1227 /* We have done everything we are asked to do. */
1231 /* The caller does not want to extend the table. */
1232 return (cnt
>= *act
? NULL
: &(*table
)[cnt
]);
1238 size_t old_max
= *max
;
1241 while (*max
<= cnt
);
1244 (uint32_t *) xrealloc (*table
, *max
* sizeof (uint32_t));
1245 memset (&(*table
)[old_max
], '\0',
1246 (*max
- old_max
) * sizeof (uint32_t));
1252 return &(*table
)[cnt
];
1257 get_character (struct token
*now
, const struct charmap_t
*charmap
,
1258 struct repertoire_t
*repertoire
,
1259 struct charseq
**seqp
, uint32_t *wchp
)
1261 if (now
->tok
== tok_bsymbol
)
1263 /* This will hopefully be the normal case. */
1264 *wchp
= repertoire_find_value (repertoire
, now
->val
.str
.startmb
,
1265 now
->val
.str
.lenmb
);
1266 *seqp
= charmap_find_value (charmap
, now
->val
.str
.startmb
,
1267 now
->val
.str
.lenmb
);
1269 else if (now
->tok
== tok_ucs4
)
1273 snprintf (utmp
, sizeof (utmp
), "U%08X", now
->val
.ucs4
);
1274 *seqp
= charmap_find_value (charmap
, utmp
, 9);
1277 *seqp
= repertoire_find_seq (repertoire
, now
->val
.ucs4
);
1281 /* Compute the value in the charmap from the UCS value. */
1282 const char *symbol
= repertoire_find_symbol (repertoire
,
1288 *seqp
= charmap_find_value (charmap
, symbol
, strlen (symbol
));
1292 if (repertoire
!= NULL
)
1294 /* Insert a negative entry. */
1295 static const struct charseq negative
1296 = { .ucs4
= ILLEGAL_CHAR_VALUE
};
1297 uint32_t *newp
= obstack_alloc (&repertoire
->mem_pool
,
1299 *newp
= now
->val
.ucs4
;
1301 insert_entry (&repertoire
->seq_table
, newp
,
1302 sizeof (uint32_t), (void *) &negative
);
1306 (*seqp
)->ucs4
= now
->val
.ucs4
;
1308 else if ((*seqp
)->ucs4
!= now
->val
.ucs4
)
1311 *wchp
= now
->val
.ucs4
;
1313 else if (now
->tok
== tok_charcode
)
1315 /* We must map from the byte code to UCS4. */
1316 *seqp
= charmap_find_symbol (charmap
, now
->val
.str
.startmb
,
1317 now
->val
.str
.lenmb
);
1320 *wchp
= ILLEGAL_CHAR_VALUE
;
1323 if ((*seqp
)->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1324 (*seqp
)->ucs4
= repertoire_find_value (repertoire
, (*seqp
)->name
,
1325 strlen ((*seqp
)->name
));
1326 *wchp
= (*seqp
)->ucs4
;
1336 /* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and
1337 the .(2). counterparts. */
1339 charclass_symbolic_ellipsis (struct linereader
*ldfile
,
1340 struct locale_ctype_t
*ctype
,
1341 const struct charmap_t
*charmap
,
1342 struct repertoire_t
*repertoire
,
1344 const char *last_str
,
1345 unsigned long int class256_bit
,
1346 unsigned long int class_bit
, int base
,
1347 int ignore_content
, int handle_digits
, int step
)
1349 const char *nowstr
= now
->val
.str
.startmb
;
1350 char tmp
[now
->val
.str
.lenmb
+ 1];
1353 unsigned long int from
;
1354 unsigned long int to
;
1356 /* We have to compute the ellipsis values using the symbolic names. */
1357 assert (last_str
!= NULL
);
1359 if (strlen (last_str
) != now
->val
.str
.lenmb
)
1363 _("`%s' and `%.*s' are not valid names for symbolic range"),
1364 last_str
, (int) now
->val
.str
.lenmb
, nowstr
);
1368 if (memcmp (last_str
, nowstr
, now
->val
.str
.lenmb
) == 0)
1369 /* Nothing to do, the names are the same. */
1372 for (cp
= last_str
; *cp
== *(nowstr
+ (cp
- last_str
)); ++cp
)
1376 from
= strtoul (cp
, &endp
, base
);
1377 if ((from
== UINT_MAX
&& errno
== ERANGE
) || *endp
!= '\0')
1380 to
= strtoul (nowstr
+ (cp
- last_str
), &endp
, base
);
1381 if ((to
== UINT_MAX
&& errno
== ERANGE
)
1382 || (endp
- nowstr
) != now
->val
.str
.lenmb
|| from
>= to
)
1385 /* OK, we have a range FROM - TO. Now we can create the symbolic names. */
1386 if (!ignore_content
)
1388 now
->val
.str
.startmb
= tmp
;
1389 while ((from
+= step
) <= to
)
1391 struct charseq
*seq
;
1394 sprintf (tmp
, (base
== 10 ? "%.*s%0*ld" : "%.*s%0*lX"),
1395 (int) (cp
- last_str
), last_str
,
1396 (int) (now
->val
.str
.lenmb
- (cp
- last_str
)),
1399 if (get_character (now
, charmap
, repertoire
, &seq
, &wch
))
1402 if (seq
!= NULL
&& seq
->nbytes
== 1)
1403 /* Yep, we can store information about this byte sequence. */
1404 ctype
->class256_collection
[seq
->bytes
[0]] |= class256_bit
;
1406 if (wch
!= ILLEGAL_CHAR_VALUE
&& class_bit
!= 0)
1407 /* We have the UCS4 position. */
1408 *find_idx (ctype
, &ctype
->class_collection
,
1409 &ctype
->class_collection_max
,
1410 &ctype
->class_collection_act
, wch
) |= class_bit
;
1412 if (handle_digits
== 1)
1414 /* We must store the digit values. */
1415 if (ctype
->mbdigits_act
== ctype
->mbdigits_max
)
1417 ctype
->mbdigits_max
*= 2;
1418 ctype
->mbdigits
= xrealloc (ctype
->mbdigits
,
1419 (ctype
->mbdigits_max
1420 * sizeof (char *)));
1421 ctype
->wcdigits_max
*= 2;
1422 ctype
->wcdigits
= xrealloc (ctype
->wcdigits
,
1423 (ctype
->wcdigits_max
1424 * sizeof (uint32_t)));
1427 ctype
->mbdigits
[ctype
->mbdigits_act
++] = seq
;
1428 ctype
->wcdigits
[ctype
->wcdigits_act
++] = wch
;
1430 else if (handle_digits
== 2)
1432 /* We must store the digit values. */
1433 if (ctype
->outdigits_act
>= 10)
1435 lr_error (ldfile
, _("\
1436 %s: field `%s' does not contain exactly ten entries"),
1437 "LC_CTYPE", "outdigit");
1441 ctype
->mboutdigits
[ctype
->outdigits_act
] = seq
;
1442 ctype
->wcoutdigits
[ctype
->outdigits_act
] = wch
;
1443 ++ctype
->outdigits_act
;
1450 /* Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'. */
1452 charclass_ucs4_ellipsis (struct linereader
*ldfile
,
1453 struct locale_ctype_t
*ctype
,
1454 const struct charmap_t
*charmap
,
1455 struct repertoire_t
*repertoire
,
1456 struct token
*now
, uint32_t last_wch
,
1457 unsigned long int class256_bit
,
1458 unsigned long int class_bit
, int ignore_content
,
1459 int handle_digits
, int step
)
1461 if (last_wch
> now
->val
.ucs4
)
1463 lr_error (ldfile
, _("\
1464 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1465 (now
->val
.ucs4
| last_wch
) < 65536 ? 4 : 8, now
->val
.ucs4
,
1466 (now
->val
.ucs4
| last_wch
) < 65536 ? 4 : 8, last_wch
);
1470 if (!ignore_content
)
1471 while ((last_wch
+= step
) <= now
->val
.ucs4
)
1473 /* We have to find out whether there is a byte sequence corresponding
1474 to this UCS4 value. */
1475 struct charseq
*seq
;
1478 snprintf (utmp
, sizeof (utmp
), "U%08X", last_wch
);
1479 seq
= charmap_find_value (charmap
, utmp
, 9);
1482 snprintf (utmp
, sizeof (utmp
), "U%04X", last_wch
);
1483 seq
= charmap_find_value (charmap
, utmp
, 5);
1487 /* Try looking in the repertoire map. */
1488 seq
= repertoire_find_seq (repertoire
, last_wch
);
1490 /* If this is the first time we look for this sequence create a new
1494 static const struct charseq negative
1495 = { .ucs4
= ILLEGAL_CHAR_VALUE
};
1497 /* Find the symbolic name for this UCS4 value. */
1498 if (repertoire
!= NULL
)
1500 const char *symbol
= repertoire_find_symbol (repertoire
,
1502 uint32_t *newp
= obstack_alloc (&repertoire
->mem_pool
,
1507 /* We have a name, now search the multibyte value. */
1508 seq
= charmap_find_value (charmap
, symbol
, strlen (symbol
));
1511 /* We have to create a fake entry. */
1512 seq
= (struct charseq
*) &negative
;
1514 seq
->ucs4
= last_wch
;
1516 insert_entry (&repertoire
->seq_table
, newp
, sizeof (uint32_t),
1520 /* We have to create a fake entry. */
1521 seq
= (struct charseq
*) &negative
;
1524 /* We have a name, now search the multibyte value. */
1525 if (seq
->ucs4
== last_wch
&& seq
->nbytes
== 1)
1526 /* Yep, we can store information about this byte sequence. */
1527 ctype
->class256_collection
[(size_t) seq
->bytes
[0]]
1530 /* And of course we have the UCS4 position. */
1532 *find_idx (ctype
, &ctype
->class_collection
,
1533 &ctype
->class_collection_max
,
1534 &ctype
->class_collection_act
, last_wch
) |= class_bit
;
1536 if (handle_digits
== 1)
1538 /* We must store the digit values. */
1539 if (ctype
->mbdigits_act
== ctype
->mbdigits_max
)
1541 ctype
->mbdigits_max
*= 2;
1542 ctype
->mbdigits
= xrealloc (ctype
->mbdigits
,
1543 (ctype
->mbdigits_max
1544 * sizeof (char *)));
1545 ctype
->wcdigits_max
*= 2;
1546 ctype
->wcdigits
= xrealloc (ctype
->wcdigits
,
1547 (ctype
->wcdigits_max
1548 * sizeof (uint32_t)));
1551 ctype
->mbdigits
[ctype
->mbdigits_act
++] = (seq
->ucs4
== last_wch
1553 ctype
->wcdigits
[ctype
->wcdigits_act
++] = last_wch
;
1555 else if (handle_digits
== 2)
1557 /* We must store the digit values. */
1558 if (ctype
->outdigits_act
>= 10)
1560 lr_error (ldfile
, _("\
1561 %s: field `%s' does not contain exactly ten entries"),
1562 "LC_CTYPE", "outdigit");
1566 ctype
->mboutdigits
[ctype
->outdigits_act
] = (seq
->ucs4
== last_wch
1568 ctype
->wcoutdigits
[ctype
->outdigits_act
] = last_wch
;
1569 ++ctype
->outdigits_act
;
1575 /* Ellipsis as in `/xea/x12.../xea/x34'. */
1577 charclass_charcode_ellipsis (struct linereader
*ldfile
,
1578 struct locale_ctype_t
*ctype
,
1579 const struct charmap_t
*charmap
,
1580 struct repertoire_t
*repertoire
,
1581 struct token
*now
, char *last_charcode
,
1582 uint32_t last_charcode_len
,
1583 unsigned long int class256_bit
,
1584 unsigned long int class_bit
, int ignore_content
,
1587 /* First check whether the to-value is larger. */
1588 if (now
->val
.charcode
.nbytes
!= last_charcode_len
)
1590 lr_error (ldfile
, _("\
1591 start and end character sequence of range must have the same length"));
1595 if (memcmp (last_charcode
, now
->val
.charcode
.bytes
, last_charcode_len
) > 0)
1597 lr_error (ldfile
, _("\
1598 to-value character sequence is smaller than from-value sequence"));
1602 if (!ignore_content
)
1606 /* Increment the byte sequence value. */
1607 struct charseq
*seq
;
1611 for (i
= last_charcode_len
- 1; i
>= 0; --i
)
1612 if (++last_charcode
[i
] != 0)
1615 if (last_charcode_len
== 1)
1616 /* Of course we have the charcode value. */
1617 ctype
->class256_collection
[(size_t) last_charcode
[0]]
1620 /* Find the symbolic name. */
1621 seq
= charmap_find_symbol (charmap
, last_charcode
,
1625 if (seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1626 seq
->ucs4
= repertoire_find_value (repertoire
, seq
->name
,
1627 strlen (seq
->name
));
1628 wch
= seq
== NULL
? ILLEGAL_CHAR_VALUE
: seq
->ucs4
;
1630 if (wch
!= ILLEGAL_CHAR_VALUE
&& class_bit
!= 0)
1631 *find_idx (ctype
, &ctype
->class_collection
,
1632 &ctype
->class_collection_max
,
1633 &ctype
->class_collection_act
, wch
) |= class_bit
;
1636 wch
= ILLEGAL_CHAR_VALUE
;
1638 if (handle_digits
== 1)
1640 /* We must store the digit values. */
1641 if (ctype
->mbdigits_act
== ctype
->mbdigits_max
)
1643 ctype
->mbdigits_max
*= 2;
1644 ctype
->mbdigits
= xrealloc (ctype
->mbdigits
,
1645 (ctype
->mbdigits_max
1646 * sizeof (char *)));
1647 ctype
->wcdigits_max
*= 2;
1648 ctype
->wcdigits
= xrealloc (ctype
->wcdigits
,
1649 (ctype
->wcdigits_max
1650 * sizeof (uint32_t)));
1653 seq
= xmalloc (sizeof (struct charseq
) + last_charcode_len
);
1654 memcpy ((char *) (seq
+ 1), last_charcode
, last_charcode_len
);
1655 seq
->nbytes
= last_charcode_len
;
1657 ctype
->mbdigits
[ctype
->mbdigits_act
++] = seq
;
1658 ctype
->wcdigits
[ctype
->wcdigits_act
++] = wch
;
1660 else if (handle_digits
== 2)
1662 struct charseq
*seq
;
1663 /* We must store the digit values. */
1664 if (ctype
->outdigits_act
>= 10)
1666 lr_error (ldfile
, _("\
1667 %s: field `%s' does not contain exactly ten entries"),
1668 "LC_CTYPE", "outdigit");
1672 seq
= xmalloc (sizeof (struct charseq
) + last_charcode_len
);
1673 memcpy ((char *) (seq
+ 1), last_charcode
, last_charcode_len
);
1674 seq
->nbytes
= last_charcode_len
;
1676 ctype
->mboutdigits
[ctype
->outdigits_act
] = seq
;
1677 ctype
->wcoutdigits
[ctype
->outdigits_act
] = wch
;
1678 ++ctype
->outdigits_act
;
1681 while (memcmp (last_charcode
, now
->val
.charcode
.bytes
,
1682 last_charcode_len
) != 0);
1688 find_translit2 (struct locale_ctype_t
*ctype
, const struct charmap_t
*charmap
,
1691 struct translit_t
*trunp
= ctype
->translit
;
1692 struct translit_ignore_t
*tirunp
= ctype
->translit_ignore
;
1694 while (trunp
!= NULL
)
1696 /* XXX We simplify things here. The transliterations we look
1697 for are only allowed to have one character. */
1698 if (trunp
->from
[0] == wch
&& trunp
->from
[1] == 0)
1700 /* Found it. Now look for a transliteration which can be
1701 represented with the character set. */
1702 struct translit_to_t
*torunp
= trunp
->to
;
1704 while (torunp
!= NULL
)
1708 for (i
= 0; torunp
->str
[i
] != 0; ++i
)
1712 snprintf (utmp
, sizeof (utmp
), "U%08X", torunp
->str
[i
]);
1713 if (charmap_find_value (charmap
, utmp
, 9) == NULL
)
1714 /* This character cannot be represented. */
1718 if (torunp
->str
[i
] == 0)
1721 torunp
= torunp
->next
;
1727 trunp
= trunp
->next
;
1730 /* Check for ignored chars. */
1731 while (tirunp
!= NULL
)
1733 if (tirunp
->from
<= wch
&& tirunp
->to
>= wch
)
1737 for (wi
= tirunp
->from
; wi
<= wch
; wi
+= tirunp
->step
)
1743 /* Nothing found. */
1749 find_translit (struct localedef_t
*locale
, const struct charmap_t
*charmap
,
1752 struct locale_ctype_t
*ctype
;
1753 uint32_t *result
= NULL
;
1755 assert (locale
!= NULL
);
1756 ctype
= locale
->categories
[LC_CTYPE
].ctype
;
1761 if (ctype
->translit
!= NULL
)
1762 result
= find_translit2 (ctype
, charmap
, wch
);
1766 struct translit_include_t
*irunp
= ctype
->translit_include
;
1768 while (irunp
!= NULL
&& result
== NULL
)
1770 result
= find_translit (find_locale (CTYPE_LOCALE
,
1772 irunp
->copy_repertoire
,
1775 irunp
= irunp
->next
;
1783 /* Read one transliteration entry. */
1785 read_widestring (struct linereader
*ldfile
, struct token
*now
,
1786 const struct charmap_t
*charmap
,
1787 struct repertoire_t
*repertoire
)
1791 if (now
->tok
== tok_default_missing
)
1792 /* The special name "" will denote this case. */
1794 else if (now
->tok
== tok_bsymbol
)
1796 /* Get the value from the repertoire. */
1797 wstr
= (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1798 wstr
[0] = repertoire_find_value (repertoire
, now
->val
.str
.startmb
,
1799 now
->val
.str
.lenmb
);
1800 if (wstr
[0] == ILLEGAL_CHAR_VALUE
)
1802 /* We cannot proceed, we don't know the UCS4 value. */
1809 else if (now
->tok
== tok_ucs4
)
1811 wstr
= (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1812 wstr
[0] = now
->val
.ucs4
;
1815 else if (now
->tok
== tok_charcode
)
1817 /* Argh, we have to convert to the symbol name first and then to the
1819 struct charseq
*seq
= charmap_find_symbol (charmap
,
1820 now
->val
.str
.startmb
,
1821 now
->val
.str
.lenmb
);
1823 /* Cannot find the UCS4 value. */
1826 if (seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1827 seq
->ucs4
= repertoire_find_value (repertoire
, seq
->name
,
1828 strlen (seq
->name
));
1829 if (seq
->ucs4
== ILLEGAL_CHAR_VALUE
)
1830 /* We cannot proceed, we don't know the UCS4 value. */
1833 wstr
= (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1834 wstr
[0] = seq
->ucs4
;
1837 else if (now
->tok
== tok_string
)
1839 wstr
= now
->val
.str
.startwc
;
1840 if (wstr
== NULL
|| wstr
[0] == 0)
1845 if (now
->tok
!= tok_eol
&& now
->tok
!= tok_eof
)
1846 lr_ignore_rest (ldfile
, 0);
1847 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
1848 return (uint32_t *) -1l;
1856 read_translit_entry (struct linereader
*ldfile
, struct locale_ctype_t
*ctype
,
1857 struct token
*now
, const struct charmap_t
*charmap
,
1858 struct repertoire_t
*repertoire
)
1860 uint32_t *from_wstr
= read_widestring (ldfile
, now
, charmap
, repertoire
);
1861 struct translit_t
*result
;
1862 struct translit_to_t
**top
;
1863 struct obstack
*ob
= &ctype
->mempool
;
1867 if (from_wstr
== NULL
)
1868 /* There is no valid from string. */
1871 result
= (struct translit_t
*) obstack_alloc (ob
,
1872 sizeof (struct translit_t
));
1873 result
->from
= from_wstr
;
1874 result
->fname
= ldfile
->fname
;
1875 result
->lineno
= ldfile
->lineno
;
1876 result
->next
= NULL
;
1886 /* Next we have one or more transliterations. They are
1887 separated by semicolons. */
1888 now
= lr_token (ldfile
, charmap
, NULL
, repertoire
, verbose
);
1890 if (!first
&& (now
->tok
== tok_semicolon
|| now
->tok
== tok_eol
))
1892 /* One string read. */
1893 const uint32_t zero
= 0;
1897 obstack_grow (ob
, &zero
, 4);
1898 to_wstr
= obstack_finish (ob
);
1900 *top
= obstack_alloc (ob
, sizeof (struct translit_to_t
));
1901 (*top
)->str
= to_wstr
;
1902 (*top
)->next
= NULL
;
1905 if (now
->tok
== tok_eol
)
1907 result
->next
= ctype
->translit
;
1908 ctype
->translit
= result
;
1913 top
= &(*top
)->next
;
1918 to_wstr
= read_widestring (ldfile
, now
, charmap
, repertoire
);
1919 if (to_wstr
== (uint32_t *) -1l)
1921 /* An error occurred. */
1922 obstack_free (ob
, result
);
1926 if (to_wstr
== NULL
)
1929 /* This value is usable. */
1930 obstack_grow (ob
, to_wstr
, wcslen ((wchar_t *) to_wstr
) * 4);
1939 read_translit_ignore_entry (struct linereader
*ldfile
,
1940 struct locale_ctype_t
*ctype
,
1941 const struct charmap_t
*charmap
,
1942 struct repertoire_t
*repertoire
)
1944 /* We expect a semicolon-separated list of characters we ignore. We are
1945 only interested in the wide character definitions. These must be
1946 single characters, possibly defining a range when an ellipsis is used. */
1949 struct token
*now
= lr_token (ldfile
, charmap
, NULL
, repertoire
,
1951 struct translit_ignore_t
*newp
;
1954 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
1957 _("premature end of `translit_ignore' definition"));
1961 if (now
->tok
!= tok_bsymbol
&& now
->tok
!= tok_ucs4
)
1963 lr_error (ldfile
, _("syntax error"));
1964 lr_ignore_rest (ldfile
, 0);
1968 if (now
->tok
== tok_ucs4
)
1969 from
= now
->val
.ucs4
;
1971 /* Try to get the value. */
1972 from
= repertoire_find_value (repertoire
, now
->val
.str
.startmb
,
1973 now
->val
.str
.lenmb
);
1975 if (from
== ILLEGAL_CHAR_VALUE
)
1977 lr_error (ldfile
, "invalid character name");
1982 newp
= (struct translit_ignore_t
*)
1983 obstack_alloc (&ctype
->mempool
, sizeof (struct translit_ignore_t
));
1988 newp
->next
= ctype
->translit_ignore
;
1989 ctype
->translit_ignore
= newp
;
1992 /* Now we expect either a semicolon, an ellipsis, or the end of the
1994 now
= lr_token (ldfile
, charmap
, NULL
, repertoire
, verbose
);
1996 if (now
->tok
== tok_ellipsis2
|| now
->tok
== tok_ellipsis2_2
)
1998 /* XXX Should we bother implementing `....'? `...' certainly
1999 will not be implemented. */
2001 int step
= now
->tok
== tok_ellipsis2_2
? 2 : 1;
2003 now
= lr_token (ldfile
, charmap
, NULL
, repertoire
, verbose
);
2005 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
2008 _("premature end of `translit_ignore' definition"));
2012 if (now
->tok
!= tok_bsymbol
&& now
->tok
!= tok_ucs4
)
2014 lr_error (ldfile
, _("syntax error"));
2015 lr_ignore_rest (ldfile
, 0);
2019 if (now
->tok
== tok_ucs4
)
2022 /* Try to get the value. */
2023 to
= repertoire_find_value (repertoire
, now
->val
.str
.startmb
,
2024 now
->val
.str
.lenmb
);
2026 if (to
== ILLEGAL_CHAR_VALUE
)
2027 lr_error (ldfile
, "invalid character name");
2030 /* Make sure the `to'-value is larger. */
2037 lr_error (ldfile
, _("\
2038 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
2039 (to
| from
) < 65536 ? 4 : 8, to
,
2040 (to
| from
) < 65536 ? 4 : 8, from
);
2043 /* And the next token. */
2044 now
= lr_token (ldfile
, charmap
, NULL
, repertoire
, verbose
);
2047 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
2051 if (now
->tok
== tok_semicolon
)
2055 /* If we come here something is wrong. */
2056 lr_error (ldfile
, _("syntax error"));
2057 lr_ignore_rest (ldfile
, 0);
2063 /* The parser for the LC_CTYPE section of the locale definition. */
2065 ctype_read (struct linereader
*ldfile
, struct localedef_t
*result
,
2066 const struct charmap_t
*charmap
, const char *repertoire_name
,
2069 struct repertoire_t
*repertoire
= NULL
;
2070 struct locale_ctype_t
*ctype
;
2072 enum token_t nowtok
;
2074 uint32_t last_wch
= 0;
2075 enum token_t last_token
;
2076 enum token_t ellipsis_token
;
2078 char last_charcode
[16];
2079 size_t last_charcode_len
= 0;
2080 const char *last_str
= NULL
;
2082 struct localedef_t
*copy_locale
= NULL
;
2084 /* Get the repertoire we have to use. */
2085 if (repertoire_name
!= NULL
)
2086 repertoire
= repertoire_read (repertoire_name
);
2088 /* The rest of the line containing `LC_CTYPE' must be free. */
2089 lr_ignore_rest (ldfile
, 1);
2094 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2097 while (nowtok
== tok_eol
);
2099 /* If we see `copy' now we are almost done. */
2100 if (nowtok
== tok_copy
)
2102 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2103 if (now
->tok
!= tok_string
)
2105 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2109 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2110 while (now
->tok
!= tok_eof
&& now
->tok
!= tok_end
);
2112 if (now
->tok
!= tok_eof
2113 || (now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
),
2114 now
->tok
== tok_eof
))
2115 lr_error (ldfile
, _("%s: premature end of file"), "LC_CTYPE");
2116 else if (now
->tok
!= tok_lc_ctype
)
2118 lr_error (ldfile
, _("\
2119 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2120 lr_ignore_rest (ldfile
, 0);
2123 lr_ignore_rest (ldfile
, 1);
2128 if (! ignore_content
)
2130 /* Get the locale definition. */
2131 copy_locale
= load_locale (LC_CTYPE
, now
->val
.str
.startmb
,
2132 repertoire_name
, charmap
, NULL
);
2133 if ((copy_locale
->avail
& CTYPE_LOCALE
) == 0)
2135 /* Not yet loaded. So do it now. */
2136 if (locfile_read (copy_locale
, charmap
) != 0)
2140 if (copy_locale
->categories
[LC_CTYPE
].ctype
== NULL
)
2144 lr_ignore_rest (ldfile
, 1);
2146 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2150 /* Prepare the data structures. */
2151 ctype_startup (ldfile
, result
, charmap
, copy_locale
, ignore_content
);
2152 ctype
= result
->categories
[LC_CTYPE
].ctype
;
2154 /* Remember the repertoire we use. */
2155 if (!ignore_content
)
2156 ctype
->repertoire
= repertoire
;
2160 unsigned long int class_bit
= 0;
2161 unsigned long int class256_bit
= 0;
2162 int handle_digits
= 0;
2164 /* Of course we don't proceed beyond the end of file. */
2165 if (nowtok
== tok_eof
)
2168 /* Ingore empty lines. */
2169 if (nowtok
== tok_eol
)
2171 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2179 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2180 while (now
->tok
== tok_ident
|| now
->tok
== tok_string
)
2182 ctype_class_new (ldfile
, ctype
, now
->val
.str
.startmb
);
2183 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2184 if (now
->tok
!= tok_semicolon
)
2186 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2188 if (now
->tok
!= tok_eol
)
2190 %s: syntax error in definition of new character class"), "LC_CTYPE");
2194 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2195 while (now
->tok
== tok_ident
|| now
->tok
== tok_string
)
2197 ctype_map_new (ldfile
, ctype
, now
->val
.str
.startmb
, charmap
);
2198 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2199 if (now
->tok
!= tok_semicolon
)
2201 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2203 if (now
->tok
!= tok_eol
)
2205 %s: syntax error in definition of new character map"), "LC_CTYPE");
2209 /* Ignore the rest of the line if we don't need the input of
2213 lr_ignore_rest (ldfile
, 0);
2217 /* We simply forget the `class' keyword and use the following
2218 operand to determine the bit. */
2219 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2220 if (now
->tok
== tok_ident
|| now
->tok
== tok_string
)
2222 /* Must can be one of the predefined class names. */
2223 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
)
2224 if (strcmp (ctype
->classnames
[cnt
], now
->val
.str
.startmb
) == 0)
2226 if (cnt
>= ctype
->nr_charclass
)
2228 /* OK, it's a new class. */
2229 ctype_class_new (ldfile
, ctype
, now
->val
.str
.startmb
);
2231 class_bit
= _ISwbit (ctype
->nr_charclass
- 1);
2235 class_bit
= _ISwbit (cnt
);
2237 free (now
->val
.str
.startmb
);
2240 else if (now
->tok
== tok_digit
)
2241 goto handle_tok_digit
;
2242 else if (now
->tok
< tok_upper
|| now
->tok
> tok_blank
)
2246 class_bit
= BITw (now
->tok
);
2247 class256_bit
= BIT (now
->tok
);
2250 /* The next character must be a semicolon. */
2251 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2252 if (now
->tok
!= tok_semicolon
)
2254 goto read_charclass
;
2267 /* Ignore the rest of the line if we don't need the input of
2271 lr_ignore_rest (ldfile
, 0);
2275 class_bit
= BITw (now
->tok
);
2276 class256_bit
= BIT (now
->tok
);
2279 ctype
->class_done
|= class_bit
;
2280 last_token
= tok_none
;
2281 ellipsis_token
= tok_none
;
2283 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2284 while (now
->tok
!= tok_eol
&& now
->tok
!= tok_eof
)
2287 struct charseq
*seq
;
2289 if (ellipsis_token
== tok_none
)
2291 if (get_character (now
, charmap
, repertoire
, &seq
, &wch
))
2294 if (!ignore_content
&& seq
!= NULL
&& seq
->nbytes
== 1)
2295 /* Yep, we can store information about this byte
2297 ctype
->class256_collection
[seq
->bytes
[0]] |= class256_bit
;
2299 if (!ignore_content
&& wch
!= ILLEGAL_CHAR_VALUE
2301 /* We have the UCS4 position. */
2302 *find_idx (ctype
, &ctype
->class_collection
,
2303 &ctype
->class_collection_max
,
2304 &ctype
->class_collection_act
, wch
) |= class_bit
;
2306 last_token
= now
->tok
;
2307 /* Terminate the string. */
2308 if (last_token
== tok_bsymbol
)
2310 now
->val
.str
.startmb
[now
->val
.str
.lenmb
] = '\0';
2311 last_str
= now
->val
.str
.startmb
;
2316 memcpy (last_charcode
, now
->val
.charcode
.bytes
, 16);
2317 last_charcode_len
= now
->val
.charcode
.nbytes
;
2319 if (!ignore_content
&& handle_digits
== 1)
2321 /* We must store the digit values. */
2322 if (ctype
->mbdigits_act
== ctype
->mbdigits_max
)
2324 ctype
->mbdigits_max
+= 10;
2325 ctype
->mbdigits
= xrealloc (ctype
->mbdigits
,
2326 (ctype
->mbdigits_max
2327 * sizeof (char *)));
2328 ctype
->wcdigits_max
+= 10;
2329 ctype
->wcdigits
= xrealloc (ctype
->wcdigits
,
2330 (ctype
->wcdigits_max
2331 * sizeof (uint32_t)));
2334 ctype
->mbdigits
[ctype
->mbdigits_act
++] = seq
;
2335 ctype
->wcdigits
[ctype
->wcdigits_act
++] = wch
;
2337 else if (!ignore_content
&& handle_digits
== 2)
2339 /* We must store the digit values. */
2340 if (ctype
->outdigits_act
>= 10)
2342 lr_error (ldfile
, _("\
2343 %s: field `%s' does not contain exactly ten entries"),
2344 "LC_CTYPE", "outdigit");
2345 lr_ignore_rest (ldfile
, 0);
2349 ctype
->mboutdigits
[ctype
->outdigits_act
] = seq
;
2350 ctype
->wcoutdigits
[ctype
->outdigits_act
] = wch
;
2351 ++ctype
->outdigits_act
;
2356 /* Now it gets complicated. We have to resolve the
2357 ellipsis problem. First we must distinguish between
2358 the different kind of ellipsis and this must match the
2359 tokens we have seen. */
2360 assert (last_token
!= tok_none
);
2362 if (last_token
!= now
->tok
)
2364 lr_error (ldfile
, _("\
2365 ellipsis range must be marked by two operands of same type"));
2366 lr_ignore_rest (ldfile
, 0);
2370 if (last_token
== tok_bsymbol
)
2372 if (ellipsis_token
== tok_ellipsis3
)
2373 lr_error (ldfile
, _("with symbolic name range values \
2374 the absolute ellipsis `...' must not be used"));
2376 charclass_symbolic_ellipsis (ldfile
, ctype
, charmap
,
2377 repertoire
, now
, last_str
,
2378 class256_bit
, class_bit
,
2383 handle_digits
, step
);
2385 else if (last_token
== tok_ucs4
)
2387 if (ellipsis_token
!= tok_ellipsis2
)
2388 lr_error (ldfile
, _("\
2389 with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
2391 charclass_ucs4_ellipsis (ldfile
, ctype
, charmap
,
2392 repertoire
, now
, last_wch
,
2393 class256_bit
, class_bit
,
2394 ignore_content
, handle_digits
,
2399 assert (last_token
== tok_charcode
);
2401 if (ellipsis_token
!= tok_ellipsis3
)
2402 lr_error (ldfile
, _("\
2403 with character code range values one must use the absolute ellipsis `...'"));
2405 charclass_charcode_ellipsis (ldfile
, ctype
, charmap
,
2409 class256_bit
, class_bit
,
2414 /* Now we have used the last value. */
2415 last_token
= tok_none
;
2418 /* Next we expect a semicolon or the end of the line. */
2419 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2420 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
2423 if (last_token
!= tok_none
2424 && now
->tok
>= tok_ellipsis2
&& now
->tok
<= tok_ellipsis4_2
)
2426 if (now
->tok
== tok_ellipsis2_2
)
2428 now
->tok
= tok_ellipsis2
;
2431 else if (now
->tok
== tok_ellipsis4_2
)
2433 now
->tok
= tok_ellipsis4
;
2437 ellipsis_token
= now
->tok
;
2439 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2443 if (now
->tok
!= tok_semicolon
)
2446 /* And get the next character. */
2447 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2449 ellipsis_token
= tok_none
;
2455 /* Ignore the rest of the line if we don't need the input of
2459 lr_ignore_rest (ldfile
, 0);
2464 class_bit
= _ISwdigit
;
2465 class256_bit
= _ISdigit
;
2467 goto read_charclass
;
2470 /* Ignore the rest of the line if we don't need the input of
2474 lr_ignore_rest (ldfile
, 0);
2478 if (ctype
->outdigits_act
!= 0)
2479 lr_error (ldfile
, _("\
2480 %s: field `%s' declared more than once"),
2481 "LC_CTYPE", "outdigit");
2485 goto read_charclass
;
2488 /* Ignore the rest of the line if we don't need the input of
2492 lr_ignore_rest (ldfile
, 0);
2500 /* Ignore the rest of the line if we don't need the input of
2504 lr_ignore_rest (ldfile
, 0);
2512 /* Ignore the rest of the line if we don't need the input of
2516 lr_ignore_rest (ldfile
, 0);
2520 /* We simply forget the `map' keyword and use the following
2521 operand to determine the mapping. */
2522 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2523 if (now
->tok
== tok_ident
|| now
->tok
== tok_string
)
2527 for (cnt
= 2; cnt
< ctype
->map_collection_nr
; ++cnt
)
2528 if (strcmp (now
->val
.str
.startmb
, ctype
->mapnames
[cnt
]) == 0)
2531 if (cnt
< ctype
->map_collection_nr
)
2532 free (now
->val
.str
.startmb
);
2534 /* OK, it's a new map. */
2535 ctype_map_new (ldfile
, ctype
, now
->val
.str
.startmb
, charmap
);
2539 else if (now
->tok
< tok_toupper
|| now
->tok
> tok_tolower
)
2542 mapidx
= now
->tok
- tok_toupper
;
2544 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2545 /* This better should be a semicolon. */
2546 if (now
->tok
!= tok_semicolon
)
2550 /* Test whether this mapping was already defined. */
2551 if (ctype
->tomap_done
[mapidx
])
2553 lr_error (ldfile
, _("duplicated definition for mapping `%s'"),
2554 ctype
->mapnames
[mapidx
]);
2555 lr_ignore_rest (ldfile
, 0);
2558 ctype
->tomap_done
[mapidx
] = 1;
2560 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2561 while (now
->tok
!= tok_eol
&& now
->tok
!= tok_eof
)
2563 struct charseq
*from_seq
;
2565 struct charseq
*to_seq
;
2568 /* Every pair starts with an opening brace. */
2569 if (now
->tok
!= tok_open_brace
)
2572 /* Next comes the from-value. */
2573 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2574 if (get_character (now
, charmap
, repertoire
, &from_seq
,
2578 /* The next is a comma. */
2579 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2580 if (now
->tok
!= tok_comma
)
2583 /* And the other value. */
2584 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2585 if (get_character (now
, charmap
, repertoire
, &to_seq
,
2589 /* And the last thing is the closing brace. */
2590 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2591 if (now
->tok
!= tok_close_brace
)
2594 if (!ignore_content
)
2596 /* Check whether the mapping converts from an ASCII value
2597 to a non-ASCII value. */
2598 if (from_seq
!= NULL
&& from_seq
->nbytes
== 1
2599 && isascii (from_seq
->bytes
[0])
2600 && to_seq
!= NULL
&& (to_seq
->nbytes
!= 1
2601 || !isascii (to_seq
->bytes
[0])))
2602 ctype
->to_nonascii
= 1;
2604 if (mapidx
< 2 && from_seq
!= NULL
&& to_seq
!= NULL
2605 && from_seq
->nbytes
== 1 && to_seq
->nbytes
== 1)
2606 /* We can use this value. */
2607 ctype
->map256_collection
[mapidx
][from_seq
->bytes
[0]]
2610 if (from_wch
!= ILLEGAL_CHAR_VALUE
2611 && to_wch
!= ILLEGAL_CHAR_VALUE
)
2612 /* Both correct values. */
2613 *find_idx (ctype
, &ctype
->map_collection
[mapidx
],
2614 &ctype
->map_collection_max
[mapidx
],
2615 &ctype
->map_collection_act
[mapidx
],
2619 /* Now comes a semicolon or the end of the line/file. */
2620 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2621 if (now
->tok
== tok_semicolon
)
2622 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2626 case tok_translit_start
:
2627 /* Ignore the entire translit section with its peculiar syntax
2628 if we don't need the input. */
2633 lr_ignore_rest (ldfile
, 0);
2634 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2636 while (now
->tok
!= tok_translit_end
&& now
->tok
!= tok_eof
);
2638 if (now
->tok
== tok_eof
)
2639 lr_error (ldfile
, _(\
2640 "%s: `translit_start' section does not end with `translit_end'"),
2646 /* The rest of the line better should be empty. */
2647 lr_ignore_rest (ldfile
, 1);
2649 /* We count here the number of allocated entries in the `translit'
2653 ldfile
->translate_strings
= 1;
2654 ldfile
->return_widestr
= 1;
2656 /* We proceed until we see the `translit_end' token. */
2657 while (now
= lr_token (ldfile
, charmap
, NULL
, repertoire
, verbose
),
2658 now
->tok
!= tok_translit_end
&& now
->tok
!= tok_eof
)
2660 if (now
->tok
== tok_eol
)
2661 /* Ignore empty lines. */
2664 if (now
->tok
== tok_include
)
2666 /* We have to include locale. */
2667 const char *locale_name
;
2668 const char *repertoire_name
;
2669 struct translit_include_t
*include_stmt
, **include_ptr
;
2671 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2672 /* This should be a string or an identifier. In any
2673 case something to name a locale. */
2674 if (now
->tok
!= tok_string
&& now
->tok
!= tok_ident
)
2677 lr_error (ldfile
, _("%s: syntax error"), "LC_CTYPE");
2678 lr_ignore_rest (ldfile
, 0);
2681 locale_name
= now
->val
.str
.startmb
;
2683 /* Next should be a semicolon. */
2684 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2685 if (now
->tok
!= tok_semicolon
)
2686 goto translit_syntax
;
2688 /* Now the repertoire name. */
2689 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2690 if ((now
->tok
!= tok_string
&& now
->tok
!= tok_ident
)
2691 || now
->val
.str
.startmb
== NULL
)
2692 goto translit_syntax
;
2693 repertoire_name
= now
->val
.str
.startmb
;
2694 if (repertoire_name
[0] == '\0')
2695 /* Ignore the empty string. */
2696 repertoire_name
= NULL
;
2698 /* Save the include statement for later processing. */
2699 include_stmt
= (struct translit_include_t
*)
2700 xmalloc (sizeof (struct translit_include_t
));
2701 include_stmt
->copy_locale
= locale_name
;
2702 include_stmt
->copy_repertoire
= repertoire_name
;
2703 include_stmt
->next
= NULL
;
2705 include_ptr
= &ctype
->translit_include
;
2706 while (*include_ptr
!= NULL
)
2707 include_ptr
= &(*include_ptr
)->next
;
2708 *include_ptr
= include_stmt
;
2710 /* The rest of the line must be empty. */
2711 lr_ignore_rest (ldfile
, 1);
2713 /* Make sure the locale is read. */
2714 add_to_readlist (LC_CTYPE
, locale_name
, repertoire_name
,
2718 else if (now
->tok
== tok_default_missing
)
2724 /* We expect a single character or string as the
2726 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2727 wstr
= read_widestring (ldfile
, now
, charmap
,
2732 if (ctype
->default_missing
!= NULL
)
2734 lr_error (ldfile
, _("\
2735 %s: duplicate `default_missing' definition"), "LC_CTYPE");
2736 record_error_at_line (0, 0,
2737 ctype
->default_missing_file
,
2738 ctype
->default_missing_lineno
,
2740 previous definition was here"));
2744 ctype
->default_missing
= wstr
;
2745 ctype
->default_missing_file
= ldfile
->fname
;
2746 ctype
->default_missing_lineno
= ldfile
->lineno
;
2748 /* We can have more entries, ignore them. */
2749 lr_ignore_rest (ldfile
, 0);
2752 else if (wstr
== (uint32_t *) -1l)
2753 /* This was an syntax error. */
2756 /* Maybe there is another replacement we can use. */
2757 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2758 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
2760 /* Nothing found. We tell the user. */
2761 lr_error (ldfile
, _("\
2762 %s: no representable `default_missing' definition found"), "LC_CTYPE");
2765 if (now
->tok
!= tok_semicolon
)
2766 goto translit_syntax
;
2771 else if (now
->tok
== tok_translit_ignore
)
2773 read_translit_ignore_entry (ldfile
, ctype
, charmap
,
2778 read_translit_entry (ldfile
, ctype
, now
, charmap
, repertoire
);
2780 ldfile
->return_widestr
= 0;
2782 if (now
->tok
== tok_eof
)
2783 lr_error (ldfile
, _(\
2784 "%s: `translit_start' section does not end with `translit_end'"),
2790 /* Ignore the rest of the line if we don't need the input of
2794 lr_ignore_rest (ldfile
, 0);
2798 /* This could mean one of several things. First test whether
2799 it's a character class name. */
2800 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
)
2801 if (strcmp (now
->val
.str
.startmb
, ctype
->classnames
[cnt
]) == 0)
2803 if (cnt
< ctype
->nr_charclass
)
2805 class_bit
= _ISwbit (cnt
);
2806 class256_bit
= cnt
<= 11 ? _ISbit (cnt
) : 0;
2807 free (now
->val
.str
.startmb
);
2808 goto read_charclass
;
2810 for (cnt
= 0; cnt
< ctype
->map_collection_nr
; ++cnt
)
2811 if (strcmp (now
->val
.str
.startmb
, ctype
->mapnames
[cnt
]) == 0)
2813 if (cnt
< ctype
->map_collection_nr
)
2816 free (now
->val
.str
.startmb
);
2822 /* Next we assume `LC_CTYPE'. */
2823 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2824 if (now
->tok
== tok_eof
)
2826 if (now
->tok
== tok_eol
)
2827 lr_error (ldfile
, _("%s: incomplete `END' line"),
2829 else if (now
->tok
!= tok_lc_ctype
)
2830 lr_error (ldfile
, _("\
2831 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2832 lr_ignore_rest (ldfile
, now
->tok
== tok_lc_ctype
);
2837 if (now
->tok
!= tok_eof
)
2838 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2841 /* Prepare for the next round. */
2842 now
= lr_token (ldfile
, charmap
, NULL
, NULL
, verbose
);
2846 /* When we come here we reached the end of the file. */
2847 lr_error (ldfile
, _("%s: premature end of file"), "LC_CTYPE");
2851 /* Subroutine of set_class_defaults, below. */
2853 set_one_default (struct locale_ctype_t
*ctype
,
2854 const struct charmap_t
*charmap
,
2855 int bitpos
, int from
, int to
)
2859 int bit
= _ISbit (bitpos
);
2860 int bitw
= _ISwbit (bitpos
);
2861 /* Define string. */
2864 for (ch
= from
; ch
<= to
; ++ch
)
2866 struct charseq
*seq
;
2869 seq
= charmap_find_value (charmap
, tmp
, 1);
2873 sprintf (buf
, "U%08X", ch
);
2874 seq
= charmap_find_value (charmap
, buf
, 9);
2878 record_error (0, 0, _("\
2879 %s: character `%s' not defined while needed as default value"),
2882 else if (seq
->nbytes
!= 1)
2883 record_error (0, 0, _("\
2884 %s: character `%s' in charmap not representable with one byte"),
2887 ctype
->class256_collection
[seq
->bytes
[0]] |= bit
;
2889 /* No need to search here, the ASCII value is also the Unicode
2891 ELEM (ctype
, class_collection
, , ch
) |= bitw
;
2896 set_class_defaults (struct locale_ctype_t
*ctype
,
2897 const struct charmap_t
*charmap
,
2898 struct repertoire_t
*repertoire
)
2900 #define set_default(bitpos, from, to) \
2901 set_one_default (ctype, charmap, bitpos, from, to)
2903 /* These function defines the default values for the classes and conversions
2904 according to POSIX.2 2.5.2.1.
2905 It may seem that the order of these if-blocks is arbitrary but it is NOT.
2906 Don't move them unless you know what you do! */
2908 /* Set default values if keyword was not present. */
2909 if ((ctype
->class_done
& BITw (tok_upper
)) == 0)
2910 /* "If this keyword [lower] is not specified, the lowercase letters
2911 `A' through `Z', ..., shall automatically belong to this class,
2912 with implementation defined character values." [P1003.2, 2.5.2.1] */
2913 set_default (BITPOS (tok_upper
), 'A', 'Z');
2915 if ((ctype
->class_done
& BITw (tok_lower
)) == 0)
2916 /* "If this keyword [lower] is not specified, the lowercase letters
2917 `a' through `z', ..., shall automatically belong to this class,
2918 with implementation defined character values." [P1003.2, 2.5.2.1] */
2919 set_default (BITPOS (tok_lower
), 'a', 'z');
2921 if ((ctype
->class_done
& BITw (tok_alpha
)) == 0)
2923 /* Table 2-6 in P1003.2 says that characters in class `upper' or
2924 class `lower' *must* be in class `alpha'. */
2925 unsigned long int mask
= BIT (tok_upper
) | BIT (tok_lower
);
2926 unsigned long int maskw
= BITw (tok_upper
) | BITw (tok_lower
);
2928 for (size_t cnt
= 0; cnt
< 256; ++cnt
)
2929 if ((ctype
->class256_collection
[cnt
] & mask
) != 0)
2930 ctype
->class256_collection
[cnt
] |= BIT (tok_alpha
);
2932 for (size_t cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
2933 if ((ctype
->class_collection
[cnt
] & maskw
) != 0)
2934 ctype
->class_collection
[cnt
] |= BITw (tok_alpha
);
2937 if ((ctype
->class_done
& BITw (tok_digit
)) == 0)
2938 /* "If this keyword [digit] is not specified, the digits `0' through
2939 `9', ..., shall automatically belong to this class, with
2940 implementation-defined character values." [P1003.2, 2.5.2.1] */
2941 set_default (BITPOS (tok_digit
), '0', '9');
2943 /* "Only characters specified for the `alpha' and `digit' keyword
2944 shall be specified. Characters specified for the keyword `alpha'
2945 and `digit' are automatically included in this class. */
2947 unsigned long int mask
= BIT (tok_alpha
) | BIT (tok_digit
);
2948 unsigned long int maskw
= BITw (tok_alpha
) | BITw (tok_digit
);
2950 for (size_t cnt
= 0; cnt
< 256; ++cnt
)
2951 if ((ctype
->class256_collection
[cnt
] & mask
) != 0)
2952 ctype
->class256_collection
[cnt
] |= BIT (tok_alnum
);
2954 for (size_t cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
2955 if ((ctype
->class_collection
[cnt
] & maskw
) != 0)
2956 ctype
->class_collection
[cnt
] |= BITw (tok_alnum
);
2959 if ((ctype
->class_done
& BITw (tok_space
)) == 0)
2960 /* "If this keyword [space] is not specified, the characters <space>,
2961 <form-feed>, <newline>, <carriage-return>, <tab>, and
2962 <vertical-tab>, ..., shall automatically belong to this class,
2963 with implementation-defined character values." [P1003.2, 2.5.2.1] */
2965 struct charseq
*seq
;
2967 seq
= charmap_find_value (charmap
, "space", 5);
2969 seq
= charmap_find_value (charmap
, "SP", 2);
2971 seq
= charmap_find_value (charmap
, "U00000020", 9);
2974 record_error (0, 0, _("\
2975 %s: character `%s' not defined while needed as default value"),
2976 "LC_CTYPE", "<space>");
2978 else if (seq
->nbytes
!= 1)
2979 record_error (0, 0, _("\
2980 %s: character `%s' in charmap not representable with one byte"),
2981 "LC_CTYPE", "<space>");
2983 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
2985 /* No need to search. */
2986 ELEM (ctype
, class_collection
, , L
' ') |= BITw (tok_space
);
2988 seq
= charmap_find_value (charmap
, "form-feed", 9);
2990 seq
= charmap_find_value (charmap
, "U0000000C", 9);
2993 record_error (0, 0, _("\
2994 %s: character `%s' not defined while needed as default value"),
2995 "LC_CTYPE", "<form-feed>");
2997 else if (seq
->nbytes
!= 1)
2998 record_error (0, 0, _("\
2999 %s: character `%s' in charmap not representable with one byte"),
3000 "LC_CTYPE", "<form-feed>");
3002 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3004 /* No need to search. */
3005 ELEM (ctype
, class_collection
, , L
'\f') |= BITw (tok_space
);
3008 seq
= charmap_find_value (charmap
, "newline", 7);
3010 seq
= charmap_find_value (charmap
, "U0000000A", 9);
3013 record_error (0, 0, _("\
3014 %s: character `%s' not defined while needed as default value"),
3015 "LC_CTYPE", "<newline>");
3017 else if (seq
->nbytes
!= 1)
3018 record_error (0, 0, _("\
3019 %s: character `%s' in charmap not representable with one byte"),
3020 "LC_CTYPE", "<newline>");
3022 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3024 /* No need to search. */
3025 ELEM (ctype
, class_collection
, , L
'\n') |= BITw (tok_space
);
3028 seq
= charmap_find_value (charmap
, "carriage-return", 15);
3030 seq
= charmap_find_value (charmap
, "U0000000D", 9);
3033 record_error (0, 0, _("\
3034 %s: character `%s' not defined while needed as default value"),
3035 "LC_CTYPE", "<carriage-return>");
3037 else if (seq
->nbytes
!= 1)
3038 record_error (0, 0, _("\
3039 %s: character `%s' in charmap not representable with one byte"),
3040 "LC_CTYPE", "<carriage-return>");
3042 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3044 /* No need to search. */
3045 ELEM (ctype
, class_collection
, , L
'\r') |= BITw (tok_space
);
3048 seq
= charmap_find_value (charmap
, "tab", 3);
3050 seq
= charmap_find_value (charmap
, "U00000009", 9);
3053 record_error (0, 0, _("\
3054 %s: character `%s' not defined while needed as default value"),
3055 "LC_CTYPE", "<tab>");
3057 else if (seq
->nbytes
!= 1)
3058 record_error (0, 0, _("\
3059 %s: character `%s' in charmap not representable with one byte"),
3060 "LC_CTYPE", "<tab>");
3062 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3064 /* No need to search. */
3065 ELEM (ctype
, class_collection
, , L
'\t') |= BITw (tok_space
);
3068 seq
= charmap_find_value (charmap
, "vertical-tab", 12);
3070 seq
= charmap_find_value (charmap
, "U0000000B", 9);
3073 record_error (0, 0, _("\
3074 %s: character `%s' not defined while needed as default value"),
3075 "LC_CTYPE", "<vertical-tab>");
3077 else if (seq
->nbytes
!= 1)
3078 record_error (0, 0, _("\
3079 %s: character `%s' in charmap not representable with one byte"),
3080 "LC_CTYPE", "<vertical-tab>");
3082 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3084 /* No need to search. */
3085 ELEM (ctype
, class_collection
, , L
'\v') |= BITw (tok_space
);
3088 if ((ctype
->class_done
& BITw (tok_xdigit
)) == 0)
3089 /* "If this keyword is not specified, the digits `0' to `9', the
3090 uppercase letters `A' through `F', and the lowercase letters `a'
3091 through `f', ..., shell automatically belong to this class, with
3092 implementation defined character values." [P1003.2, 2.5.2.1] */
3094 set_default (BITPOS (tok_xdigit
), '0', '9');
3095 set_default (BITPOS (tok_xdigit
), 'A', 'F');
3096 set_default (BITPOS (tok_xdigit
), 'a', 'f');
3099 if ((ctype
->class_done
& BITw (tok_blank
)) == 0)
3100 /* "If this keyword [blank] is unspecified, the characters <space> and
3101 <tab> shall belong to this character class." [P1003.2, 2.5.2.1] */
3103 struct charseq
*seq
;
3105 seq
= charmap_find_value (charmap
, "space", 5);
3107 seq
= charmap_find_value (charmap
, "SP", 2);
3109 seq
= charmap_find_value (charmap
, "U00000020", 9);
3112 record_error (0, 0, _("\
3113 %s: character `%s' not defined while needed as default value"),
3114 "LC_CTYPE", "<space>");
3116 else if (seq
->nbytes
!= 1)
3117 record_error (0, 0, _("\
3118 %s: character `%s' in charmap not representable with one byte"),
3119 "LC_CTYPE", "<space>");
3121 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_blank
);
3123 /* No need to search. */
3124 ELEM (ctype
, class_collection
, , L
' ') |= BITw (tok_blank
);
3127 seq
= charmap_find_value (charmap
, "tab", 3);
3129 seq
= charmap_find_value (charmap
, "U00000009", 9);
3132 record_error (0, 0, _("\
3133 %s: character `%s' not defined while needed as default value"),
3134 "LC_CTYPE", "<tab>");
3136 else if (seq
->nbytes
!= 1)
3137 record_error (0, 0, _("\
3138 %s: character `%s' in charmap not representable with one byte"),
3139 "LC_CTYPE", "<tab>");
3141 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_blank
);
3143 /* No need to search. */
3144 ELEM (ctype
, class_collection
, , L
'\t') |= BITw (tok_blank
);
3147 if ((ctype
->class_done
& BITw (tok_graph
)) == 0)
3148 /* "If this keyword [graph] is not specified, characters specified for
3149 the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
3150 shall belong to this character class." [P1003.2, 2.5.2.1] */
3152 unsigned long int mask
= BIT (tok_upper
) | BIT (tok_lower
)
3153 | BIT (tok_alpha
) | BIT (tok_digit
) | BIT (tok_xdigit
)
3155 unsigned long int maskw
= BITw (tok_upper
) | BITw (tok_lower
)
3156 | BITw (tok_alpha
) | BITw (tok_digit
) | BITw (tok_xdigit
)
3159 for (size_t cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
3160 if ((ctype
->class_collection
[cnt
] & maskw
) != 0)
3161 ctype
->class_collection
[cnt
] |= BITw (tok_graph
);
3163 for (size_t cnt
= 0; cnt
< 256; ++cnt
)
3164 if ((ctype
->class256_collection
[cnt
] & mask
) != 0)
3165 ctype
->class256_collection
[cnt
] |= BIT (tok_graph
);
3168 if ((ctype
->class_done
& BITw (tok_print
)) == 0)
3169 /* "If this keyword [print] is not provided, characters specified for
3170 the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
3171 and the <space> character shall belong to this character class."
3172 [P1003.2, 2.5.2.1] */
3174 unsigned long int mask
= BIT (tok_upper
) | BIT (tok_lower
)
3175 | BIT (tok_alpha
) | BIT (tok_digit
) | BIT (tok_xdigit
)
3177 unsigned long int maskw
= BITw (tok_upper
) | BITw (tok_lower
)
3178 | BITw (tok_alpha
) | BITw (tok_digit
) | BITw (tok_xdigit
)
3180 struct charseq
*seq
;
3182 for (size_t cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
3183 if ((ctype
->class_collection
[cnt
] & maskw
) != 0)
3184 ctype
->class_collection
[cnt
] |= BITw (tok_print
);
3186 for (size_t cnt
= 0; cnt
< 256; ++cnt
)
3187 if ((ctype
->class256_collection
[cnt
] & mask
) != 0)
3188 ctype
->class256_collection
[cnt
] |= BIT (tok_print
);
3191 seq
= charmap_find_value (charmap
, "space", 5);
3193 seq
= charmap_find_value (charmap
, "SP", 2);
3195 seq
= charmap_find_value (charmap
, "U00000020", 9);
3198 record_error (0, 0, _("\
3199 %s: character `%s' not defined while needed as default value"),
3200 "LC_CTYPE", "<space>");
3202 else if (seq
->nbytes
!= 1)
3203 record_error (0, 0, _("\
3204 %s: character `%s' in charmap not representable with one byte"),
3205 "LC_CTYPE", "<space>");
3207 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_print
);
3209 /* No need to search. */
3210 ELEM (ctype
, class_collection
, , L
' ') |= BITw (tok_print
);
3213 if (ctype
->tomap_done
[0] == 0)
3214 /* "If this keyword [toupper] is not specified, the lowercase letters
3215 `a' through `z', and their corresponding uppercase letters `A' to
3216 `Z', ..., shall automatically be included, with implementation-
3217 defined character values." [P1003.2, 2.5.2.1] */
3222 strcpy (tmp
, "<?>");
3224 for (ch
= 'a'; ch
<= 'z'; ++ch
)
3226 struct charseq
*seq_from
, *seq_to
;
3230 seq_from
= charmap_find_value (charmap
, &tmp
[1], 1);
3231 if (seq_from
== NULL
)
3234 sprintf (buf
, "U%08X", ch
);
3235 seq_from
= charmap_find_value (charmap
, buf
, 9);
3237 if (seq_from
== NULL
)
3239 record_error (0, 0, _("\
3240 %s: character `%s' not defined while needed as default value"),
3243 else if (seq_from
->nbytes
!= 1)
3245 record_error (0, 0, _("\
3246 %s: character `%s' needed as default value not representable with one byte"),
3251 /* This conversion is implementation defined. */
3252 tmp
[1] = (char) (ch
+ ('A' - 'a'));
3253 seq_to
= charmap_find_value (charmap
, &tmp
[1], 1);
3257 sprintf (buf
, "U%08X", ch
+ ('A' - 'a'));
3258 seq_to
= charmap_find_value (charmap
, buf
, 9);
3262 record_error (0, 0, _("\
3263 %s: character `%s' not defined while needed as default value"),
3266 else if (seq_to
->nbytes
!= 1)
3268 record_error (0, 0, _("\
3269 %s: character `%s' needed as default value not representable with one byte"),
3273 /* The index [0] is determined by the order of the
3274 `ctype_map_newP' calls in `ctype_startup'. */
3275 ctype
->map256_collection
[0][seq_from
->bytes
[0]]
3279 /* No need to search. */
3280 ELEM (ctype
, map_collection
, [0], ch
) = ch
+ ('A' - 'a');
3284 if (ctype
->tomap_done
[1] == 0)
3285 /* "If this keyword [tolower] is not specified, the mapping shall be
3286 the reverse mapping of the one specified to `toupper'." [P1003.2] */
3288 for (size_t cnt
= 0; cnt
< ctype
->map_collection_act
[0]; ++cnt
)
3289 if (ctype
->map_collection
[0][cnt
] != 0)
3290 ELEM (ctype
, map_collection
, [1],
3291 ctype
->map_collection
[0][cnt
])
3292 = ctype
->charnames
[cnt
];
3294 for (size_t cnt
= 0; cnt
< 256; ++cnt
)
3295 if (ctype
->map256_collection
[0][cnt
] != 0)
3296 ctype
->map256_collection
[1][ctype
->map256_collection
[0][cnt
]] = cnt
;
3299 if (ctype
->outdigits_act
!= 10)
3301 if (ctype
->outdigits_act
!= 0)
3302 record_error (0, 0, _("\
3303 %s: field `%s' does not contain exactly ten entries"),
3304 "LC_CTYPE", "outdigit");
3306 for (size_t cnt
= ctype
->outdigits_act
; cnt
< 10; ++cnt
)
3308 ctype
->mboutdigits
[cnt
] = charmap_find_symbol (charmap
,
3309 (char *) digits
+ cnt
,
3312 if (ctype
->mboutdigits
[cnt
] == NULL
)
3313 ctype
->mboutdigits
[cnt
] = charmap_find_symbol (charmap
,
3315 strlen (longnames
[cnt
]));
3317 if (ctype
->mboutdigits
[cnt
] == NULL
)
3318 ctype
->mboutdigits
[cnt
] = charmap_find_symbol (charmap
,
3321 if (ctype
->mboutdigits
[cnt
] == NULL
)
3323 /* Provide a replacement. */
3324 record_error (0, 0, _("\
3325 no output digits defined and none of the standard names in the charmap"));
3327 ctype
->mboutdigits
[cnt
] = obstack_alloc (&((struct charmap_t
*) charmap
)->mem_pool
,
3328 sizeof (struct charseq
)
3331 /* This is better than nothing. */
3332 ctype
->mboutdigits
[cnt
]->bytes
[0] = digits
[cnt
];
3333 ctype
->mboutdigits
[cnt
]->nbytes
= 1;
3336 ctype
->wcoutdigits
[cnt
] = L
'0' + cnt
;
3339 ctype
->outdigits_act
= 10;
3346 /* Initialize. Assumes t->p and t->q have already been set. */
3348 wctype_table_init (struct wctype_table
*t
)
3351 t
->level1_alloc
= t
->level1_size
= 0;
3353 t
->level2_alloc
= t
->level2_size
= 0;
3355 t
->level3_alloc
= t
->level3_size
= 0;
3358 /* Retrieve an entry. */
3360 wctype_table_get (struct wctype_table
*t
, uint32_t wc
)
3362 uint32_t index1
= wc
>> (t
->q
+ t
->p
+ 5);
3363 if (index1
< t
->level1_size
)
3365 uint32_t lookup1
= t
->level1
[index1
];
3366 if (lookup1
!= EMPTY
)
3368 uint32_t index2
= ((wc
>> (t
->p
+ 5)) & ((1 << t
->q
) - 1))
3369 + (lookup1
<< t
->q
);
3370 uint32_t lookup2
= t
->level2
[index2
];
3371 if (lookup2
!= EMPTY
)
3373 uint32_t index3
= ((wc
>> 5) & ((1 << t
->p
) - 1))
3374 + (lookup2
<< t
->p
);
3375 uint32_t lookup3
= t
->level3
[index3
];
3376 uint32_t index4
= wc
& 0x1f;
3378 return (lookup3
>> index4
) & 1;
3385 /* Add one entry. */
3387 wctype_table_add (struct wctype_table
*t
, uint32_t wc
)
3389 uint32_t index1
= wc
>> (t
->q
+ t
->p
+ 5);
3390 uint32_t index2
= (wc
>> (t
->p
+ 5)) & ((1 << t
->q
) - 1);
3391 uint32_t index3
= (wc
>> 5) & ((1 << t
->p
) - 1);
3392 uint32_t index4
= wc
& 0x1f;
3395 if (index1
>= t
->level1_size
)
3397 if (index1
>= t
->level1_alloc
)
3399 size_t alloc
= 2 * t
->level1_alloc
;
3400 if (alloc
<= index1
)
3402 t
->level1
= (uint32_t *) xrealloc ((char *) t
->level1
,
3403 alloc
* sizeof (uint32_t));
3404 t
->level1_alloc
= alloc
;
3406 while (index1
>= t
->level1_size
)
3407 t
->level1
[t
->level1_size
++] = EMPTY
;
3410 if (t
->level1
[index1
] == EMPTY
)
3412 if (t
->level2_size
== t
->level2_alloc
)
3414 size_t alloc
= 2 * t
->level2_alloc
+ 1;
3415 t
->level2
= (uint32_t *) xrealloc ((char *) t
->level2
,
3416 (alloc
<< t
->q
) * sizeof (uint32_t));
3417 t
->level2_alloc
= alloc
;
3419 i1
= t
->level2_size
<< t
->q
;
3420 i2
= (t
->level2_size
+ 1) << t
->q
;
3421 for (i
= i1
; i
< i2
; i
++)
3422 t
->level2
[i
] = EMPTY
;
3423 t
->level1
[index1
] = t
->level2_size
++;
3426 index2
+= t
->level1
[index1
] << t
->q
;
3428 if (t
->level2
[index2
] == EMPTY
)
3430 if (t
->level3_size
== t
->level3_alloc
)
3432 size_t alloc
= 2 * t
->level3_alloc
+ 1;
3433 t
->level3
= (uint32_t *) xrealloc ((char *) t
->level3
,
3434 (alloc
<< t
->p
) * sizeof (uint32_t));
3435 t
->level3_alloc
= alloc
;
3437 i1
= t
->level3_size
<< t
->p
;
3438 i2
= (t
->level3_size
+ 1) << t
->p
;
3439 for (i
= i1
; i
< i2
; i
++)
3441 t
->level2
[index2
] = t
->level3_size
++;
3444 index3
+= t
->level2
[index2
] << t
->p
;
3446 t
->level3
[index3
] |= (uint32_t)1 << index4
;
3449 /* Finalize and shrink. */
3451 add_locale_wctype_table (struct locale_file
*file
, struct wctype_table
*t
)
3454 uint32_t reorder3
[t
->level3_size
];
3455 uint32_t reorder2
[t
->level2_size
];
3456 uint32_t level2_offset
, level3_offset
;
3458 /* Uniquify level3 blocks. */
3460 for (j
= 0; j
< t
->level3_size
; j
++)
3462 for (i
= 0; i
< k
; i
++)
3463 if (memcmp (&t
->level3
[i
<< t
->p
], &t
->level3
[j
<< t
->p
],
3464 (1 << t
->p
) * sizeof (uint32_t)) == 0)
3466 /* Relocate block j to block i. */
3471 memcpy (&t
->level3
[i
<< t
->p
], &t
->level3
[j
<< t
->p
],
3472 (1 << t
->p
) * sizeof (uint32_t));
3478 for (i
= 0; i
< (t
->level2_size
<< t
->q
); i
++)
3479 if (t
->level2
[i
] != EMPTY
)
3480 t
->level2
[i
] = reorder3
[t
->level2
[i
]];
3482 /* Uniquify level2 blocks. */
3484 for (j
= 0; j
< t
->level2_size
; j
++)
3486 for (i
= 0; i
< k
; i
++)
3487 if (memcmp (&t
->level2
[i
<< t
->q
], &t
->level2
[j
<< t
->q
],
3488 (1 << t
->q
) * sizeof (uint32_t)) == 0)
3490 /* Relocate block j to block i. */
3495 memcpy (&t
->level2
[i
<< t
->q
], &t
->level2
[j
<< t
->q
],
3496 (1 << t
->q
) * sizeof (uint32_t));
3502 for (i
= 0; i
< t
->level1_size
; i
++)
3503 if (t
->level1
[i
] != EMPTY
)
3504 t
->level1
[i
] = reorder2
[t
->level1
[i
]];
3507 5 * sizeof (uint32_t)
3508 + t
->level1_size
* sizeof (uint32_t)
3509 + (t
->level2_size
<< t
->q
) * sizeof (uint32_t)
3510 + (t
->level3_size
<< t
->p
) * sizeof (uint32_t);
3513 5 * sizeof (uint32_t)
3514 + t
->level1_size
* sizeof (uint32_t);
3516 5 * sizeof (uint32_t)
3517 + t
->level1_size
* sizeof (uint32_t)
3518 + (t
->level2_size
<< t
->q
) * sizeof (uint32_t);
3520 start_locale_structure (file
);
3521 add_locale_uint32 (file
, t
->q
+ t
->p
+ 5);
3522 add_locale_uint32 (file
, t
->level1_size
);
3523 add_locale_uint32 (file
, t
->p
+ 5);
3524 add_locale_uint32 (file
, (1 << t
->q
) - 1);
3525 add_locale_uint32 (file
, (1 << t
->p
) - 1);
3527 for (i
= 0; i
< t
->level1_size
; i
++)
3530 t
->level1
[i
] == EMPTY
3532 : (t
->level1
[i
] << t
->q
) * sizeof (uint32_t) + level2_offset
);
3534 for (i
= 0; i
< (t
->level2_size
<< t
->q
); i
++)
3537 t
->level2
[i
] == EMPTY
3539 : (t
->level2
[i
] << t
->p
) * sizeof (uint32_t) + level3_offset
);
3541 add_locale_uint32_array (file
, t
->level3
, t
->level3_size
<< t
->p
);
3542 end_locale_structure (file
);
3544 if (t
->level1_alloc
> 0)
3546 if (t
->level2_alloc
> 0)
3548 if (t
->level3_alloc
> 0)
3552 /* Flattens the included transliterations into a translit list.
3553 Inserts them in the list at `cursor', and returns the new cursor. */
3554 static struct translit_t
**
3555 translit_flatten (struct locale_ctype_t
*ctype
,
3556 const struct charmap_t
*charmap
,
3557 struct translit_t
**cursor
)
3559 while (ctype
->translit_include
!= NULL
)
3561 const char *copy_locale
= ctype
->translit_include
->copy_locale
;
3562 const char *copy_repertoire
= ctype
->translit_include
->copy_repertoire
;
3563 struct localedef_t
*other
;
3565 /* Unchain the include statement. During the depth-first traversal
3566 we don't want to visit any locale more than once. */
3567 ctype
->translit_include
= ctype
->translit_include
->next
;
3569 other
= find_locale (LC_CTYPE
, copy_locale
, copy_repertoire
, charmap
);
3571 if (other
== NULL
|| other
->categories
[LC_CTYPE
].ctype
== NULL
)
3573 record_error (0, 0, _("\
3574 %s: transliteration data from locale `%s' not available"),
3575 "LC_CTYPE", copy_locale
);
3579 struct locale_ctype_t
*other_ctype
=
3580 other
->categories
[LC_CTYPE
].ctype
;
3582 cursor
= translit_flatten (other_ctype
, charmap
, cursor
);
3583 assert (other_ctype
->translit_include
== NULL
);
3585 if (other_ctype
->translit
!= NULL
)
3587 /* Insert the other_ctype->translit list at *cursor. */
3588 struct translit_t
*endp
= other_ctype
->translit
;
3589 while (endp
->next
!= NULL
)
3592 endp
->next
= *cursor
;
3593 *cursor
= other_ctype
->translit
;
3595 /* Avoid any risk of circular lists. */
3596 other_ctype
->translit
= NULL
;
3598 cursor
= &endp
->next
;
3601 if (ctype
->default_missing
== NULL
)
3602 ctype
->default_missing
= other_ctype
->default_missing
;
3610 allocate_arrays (struct locale_ctype_t
*ctype
, const struct charmap_t
*charmap
,
3611 struct repertoire_t
*repertoire
)
3619 /* You wonder about this amount of memory? This is only because some
3620 users do not manage to address the array with unsigned values or
3621 data types with range >= 256. '\200' would result in the array
3622 index -128. To help these poor people we duplicate the entries for
3623 128 up to 255 below the entry for \0. */
3624 ctype
->ctype_b
= (char_class_t
*) xcalloc (256 + 128, sizeof (char_class_t
));
3625 ctype
->ctype32_b
= (char_class32_t
*) xcalloc (256, sizeof (char_class32_t
));
3626 ctype
->class_b
= (uint32_t **)
3627 xmalloc (ctype
->nr_charclass
* sizeof (uint32_t *));
3628 ctype
->class_3level
= (struct wctype_table
*)
3629 xmalloc (ctype
->nr_charclass
* sizeof (struct wctype_table
));
3631 /* This is the array accessed using the multibyte string elements. */
3632 for (idx
= 0; idx
< 256; ++idx
)
3633 ctype
->ctype_b
[128 + idx
] = ctype
->class256_collection
[idx
];
3635 /* Mirror first 127 entries. We must take care that entry -1 is not
3636 mirrored because EOF == -1. */
3637 for (idx
= 0; idx
< 127; ++idx
)
3638 ctype
->ctype_b
[idx
] = ctype
->ctype_b
[256 + idx
];
3640 /* The 32 bit array contains all characters < 0x100. */
3641 for (idx
= 0; idx
< ctype
->class_collection_act
; ++idx
)
3642 if (ctype
->charnames
[idx
] < 0x100)
3643 ctype
->ctype32_b
[ctype
->charnames
[idx
]] = ctype
->class_collection
[idx
];
3645 for (nr
= 0; nr
< ctype
->nr_charclass
; nr
++)
3647 ctype
->class_b
[nr
] = (uint32_t *) xcalloc (256 / 32, sizeof (uint32_t));
3649 /* We only set CLASS_B for the bits in the ISO C classes, not
3650 the user defined classes. The number should not change but
3652 #define LAST_ISO_C_BIT 11
3653 if (nr
<= LAST_ISO_C_BIT
)
3654 for (idx
= 0; idx
< 256; ++idx
)
3655 if (ctype
->class256_collection
[idx
] & _ISbit (nr
))
3656 ctype
->class_b
[nr
][idx
>> 5] |= (uint32_t) 1 << (idx
& 0x1f);
3659 for (nr
= 0; nr
< ctype
->nr_charclass
; nr
++)
3661 struct wctype_table
*t
;
3663 t
= &ctype
->class_3level
[nr
];
3664 t
->p
= 4; /* or: 5 */
3665 t
->q
= 7; /* or: 6 */
3666 wctype_table_init (t
);
3668 for (idx
= 0; idx
< ctype
->class_collection_act
; ++idx
)
3669 if (ctype
->class_collection
[idx
] & _ISwbit (nr
))
3670 wctype_table_add (t
, ctype
->charnames
[idx
]);
3672 record_verbose (stderr
, _("\
3673 %s: table for class \"%s\": %lu bytes"),
3674 "LC_CTYPE", ctype
->classnames
[nr
],
3675 (unsigned long int) t
->result_size
);
3678 /* Room for table of mappings. */
3679 ctype
->map_b
= (uint32_t **) xmalloc (2 * sizeof (uint32_t *));
3680 ctype
->map32_b
= (uint32_t **) xmalloc (ctype
->map_collection_nr
3681 * sizeof (uint32_t *));
3682 ctype
->map_3level
= (struct wctrans_table
*)
3683 xmalloc (ctype
->map_collection_nr
* sizeof (struct wctrans_table
));
3685 /* Fill in all mappings. */
3686 for (idx
= 0; idx
< 2; ++idx
)
3690 /* Allocate table. */
3691 ctype
->map_b
[idx
] = (uint32_t *)
3692 xmalloc ((256 + 128) * sizeof (uint32_t));
3694 /* Copy values from collection. */
3695 for (idx2
= 0; idx2
< 256; ++idx2
)
3696 ctype
->map_b
[idx
][128 + idx2
] = ctype
->map256_collection
[idx
][idx2
];
3698 /* Mirror first 127 entries. We must take care not to map entry
3699 -1 because EOF == -1. */
3700 for (idx2
= 0; idx2
< 127; ++idx2
)
3701 ctype
->map_b
[idx
][idx2
] = ctype
->map_b
[idx
][256 + idx2
];
3703 /* EOF must map to EOF. */
3704 ctype
->map_b
[idx
][127] = EOF
;
3707 for (idx
= 0; idx
< ctype
->map_collection_nr
; ++idx
)
3711 /* Allocate table. */
3712 ctype
->map32_b
[idx
] = (uint32_t *) xmalloc (256 * sizeof (uint32_t));
3714 /* Copy values from collection. Default is identity mapping. */
3715 for (idx2
= 0; idx2
< 256; ++idx2
)
3716 ctype
->map32_b
[idx
][idx2
] =
3717 (ctype
->map_collection
[idx
][idx2
] != 0
3718 ? ctype
->map_collection
[idx
][idx2
]
3722 for (nr
= 0; nr
< ctype
->map_collection_nr
; nr
++)
3724 struct wctrans_table
*t
;
3726 t
= &ctype
->map_3level
[nr
];
3729 wctrans_table_init (t
);
3731 for (idx
= 0; idx
< ctype
->map_collection_act
[nr
]; ++idx
)
3732 if (ctype
->map_collection
[nr
][idx
] != 0)
3733 wctrans_table_add (t
, ctype
->charnames
[idx
],
3734 ctype
->map_collection
[nr
][idx
]);
3736 record_verbose (stderr
, _("\
3737 %s: table for map \"%s\": %lu bytes"),
3738 "LC_CTYPE", ctype
->mapnames
[nr
],
3739 (unsigned long int) t
->result_size
);
3742 /* Extra array for class and map names. */
3743 ctype
->class_name_ptr
= (uint32_t *) xmalloc (ctype
->nr_charclass
3744 * sizeof (uint32_t));
3745 ctype
->map_name_ptr
= (uint32_t *) xmalloc (ctype
->map_collection_nr
3746 * sizeof (uint32_t));
3748 ctype
->class_offset
= _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1
);
3749 ctype
->map_offset
= ctype
->class_offset
+ ctype
->nr_charclass
;
3751 /* Array for width information. Because the expected widths are very
3752 small (never larger than 2) we use only one single byte. This
3754 We put only printable characters in the table. wcwidth is specified
3755 to return -1 for non-printable characters. Doing the check here
3756 saves a run-time check.
3757 But we put L'\0' in the table. This again saves a run-time check. */
3759 struct wcwidth_table
*t
;
3764 wcwidth_table_init (t
);
3766 /* First set all the printable characters of the character set to
3767 the default width. */
3769 while (iterate_table (&charmap
->char_table
, &curs
, &key
, &len
, &vdata
) == 0)
3771 struct charseq
*data
= (struct charseq
*) vdata
;
3773 if (data
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
3774 data
->ucs4
= repertoire_find_value (ctype
->repertoire
,
3777 if (data
->ucs4
!= ILLEGAL_CHAR_VALUE
)
3779 uint32_t *class_bits
=
3780 find_idx (ctype
, &ctype
->class_collection
, NULL
,
3781 &ctype
->class_collection_act
, data
->ucs4
);
3783 if (class_bits
!= NULL
&& (*class_bits
& BITw (tok_print
)))
3784 wcwidth_table_add (t
, data
->ucs4
, charmap
->width_default
);
3788 /* Now add the explicitly specified widths. */
3789 if (charmap
->width_rules
!= NULL
)
3790 for (size_t cnt
= 0; cnt
< charmap
->nwidth_rules
; ++cnt
)
3792 unsigned char bytes
[charmap
->mb_cur_max
];
3793 int nbytes
= charmap
->width_rules
[cnt
].from
->nbytes
;
3795 /* We have the range of character for which the width is
3796 specified described using byte sequences of the multibyte
3797 charset. We have to convert this to UCS4 now. And we
3798 cannot simply convert the beginning and the end of the
3799 sequence, we have to iterate over the byte sequence and
3800 convert it for every single character. */
3801 memcpy (bytes
, charmap
->width_rules
[cnt
].from
->bytes
, nbytes
);
3803 while (nbytes
< charmap
->width_rules
[cnt
].to
->nbytes
3804 || memcmp (bytes
, charmap
->width_rules
[cnt
].to
->bytes
,
3807 /* Find the UCS value for `bytes'. */
3810 struct charseq
*seq
=
3811 charmap_find_symbol (charmap
, (char *) bytes
, nbytes
);
3814 wch
= ILLEGAL_CHAR_VALUE
;
3815 else if (seq
->ucs4
!= UNINITIALIZED_CHAR_VALUE
)
3818 wch
= repertoire_find_value (ctype
->repertoire
, seq
->name
,
3819 strlen (seq
->name
));
3821 if (wch
!= ILLEGAL_CHAR_VALUE
)
3823 /* Store the value. */
3824 uint32_t *class_bits
=
3825 find_idx (ctype
, &ctype
->class_collection
, NULL
,
3826 &ctype
->class_collection_act
, wch
);
3828 if (class_bits
!= NULL
&& (*class_bits
& BITw (tok_print
)))
3829 wcwidth_table_add (t
, wch
,
3830 charmap
->width_rules
[cnt
].width
);
3833 /* "Increment" the bytes sequence. */
3835 while (inner
>= 0 && bytes
[inner
] == 0xff)
3840 /* We have to extend the byte sequence. */
3841 if (nbytes
>= charmap
->width_rules
[cnt
].to
->nbytes
)
3845 memset (&bytes
[1], 0, nbytes
);
3851 while (++inner
< nbytes
)
3857 /* Set the width of L'\0' to 0. */
3858 wcwidth_table_add (t
, 0, 0);
3860 record_verbose (stderr
, _("%s: table for width: %lu bytes"),
3861 "LC_CTYPE", (unsigned long int) t
->result_size
);
3864 /* Set MB_CUR_MAX. */
3865 ctype
->mb_cur_max
= charmap
->mb_cur_max
;
3867 /* Now determine the table for the transliteration information.
3869 XXX It is not yet clear to me whether it is worth implementing a
3870 complicated algorithm which uses a hash table to locate the entries.
3871 For now I'll use a simple array which can be searching using binary
3873 if (ctype
->translit_include
!= NULL
)
3874 /* Traverse the locales mentioned in the `include' statements in a
3875 depth-first way and fold in their transliteration information. */
3876 translit_flatten (ctype
, charmap
, &ctype
->translit
);
3878 if (ctype
->translit
!= NULL
)
3880 /* First count how many entries we have. This is the upper limit
3881 since some entries from the included files might be overwritten. */
3883 struct translit_t
*runp
= ctype
->translit
;
3884 struct translit_t
**sorted
;
3885 size_t from_len
, to_len
;
3887 while (runp
!= NULL
)
3893 /* Next we allocate an array large enough and fill in the values. */
3894 sorted
= (struct translit_t
**) alloca (number
3895 * sizeof (struct translit_t
**));
3896 runp
= ctype
->translit
;
3900 /* Search for the place where to insert this string.
3901 XXX Better use a real sorting algorithm later. */
3905 while (idx
< number
)
3907 int res
= wcscmp ((const wchar_t *) sorted
[idx
]->from
,
3908 (const wchar_t *) runp
->from
);
3923 memmove (&sorted
[idx
+ 1], &sorted
[idx
],
3924 (number
- idx
) * sizeof (struct translit_t
*));
3931 while (runp
!= NULL
);
3933 /* The next step is putting all the possible transliteration
3934 strings in one memory block so that we can write it out.
3935 We need several different blocks:
3936 - index to the from-string array
3938 - index to the to-string array
3941 from_len
= to_len
= 0;
3942 for (size_t cnt
= 0; cnt
< number
; ++cnt
)
3944 struct translit_to_t
*srunp
;
3945 from_len
+= wcslen ((const wchar_t *) sorted
[cnt
]->from
) + 1;
3946 srunp
= sorted
[cnt
]->to
;
3947 while (srunp
!= NULL
)
3949 to_len
+= wcslen ((const wchar_t *) srunp
->str
) + 1;
3950 srunp
= srunp
->next
;
3952 /* Plus one for the extra NUL character marking the end of
3953 the list for the current entry. */
3957 /* We can allocate the arrays for the results. */
3958 ctype
->translit_from_idx
= xmalloc (number
* sizeof (uint32_t));
3959 ctype
->translit_from_tbl
= xmalloc (from_len
* sizeof (uint32_t));
3960 ctype
->translit_to_idx
= xmalloc (number
* sizeof (uint32_t));
3961 ctype
->translit_to_tbl
= xmalloc (to_len
* sizeof (uint32_t));
3965 for (size_t cnt
= 0; cnt
< number
; ++cnt
)
3968 struct translit_to_t
*srunp
;
3970 ctype
->translit_from_idx
[cnt
] = from_len
;
3971 ctype
->translit_to_idx
[cnt
] = to_len
;
3973 len
= wcslen ((const wchar_t *) sorted
[cnt
]->from
) + 1;
3974 wmemcpy ((wchar_t *) &ctype
->translit_from_tbl
[from_len
],
3975 (const wchar_t *) sorted
[cnt
]->from
, len
);
3978 ctype
->translit_to_idx
[cnt
] = to_len
;
3979 srunp
= sorted
[cnt
]->to
;
3980 while (srunp
!= NULL
)
3982 len
= wcslen ((const wchar_t *) srunp
->str
) + 1;
3983 wmemcpy ((wchar_t *) &ctype
->translit_to_tbl
[to_len
],
3984 (const wchar_t *) srunp
->str
, len
);
3986 srunp
= srunp
->next
;
3988 ctype
->translit_to_tbl
[to_len
++] = L
'\0';
3991 /* Store the information about the length. */
3992 ctype
->translit_idx_size
= number
;
3993 ctype
->translit_from_tbl_size
= from_len
* sizeof (uint32_t);
3994 ctype
->translit_to_tbl_size
= to_len
* sizeof (uint32_t);
3998 ctype
->translit_from_idx
= no_str
;
3999 ctype
->translit_from_tbl
= no_str
;
4000 ctype
->translit_to_tbl
= no_str
;
4001 ctype
->translit_idx_size
= 0;
4002 ctype
->translit_from_tbl_size
= 0;
4003 ctype
->translit_to_tbl_size
= 0;