1 /* Copyright (C) 1995, 1996, 1997, 1998, 1999 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
37 #include "localeinfo.h"
39 #include "linereader.h"
40 #include "locfile-token.h"
42 #include "localedef.h"
47 #ifdef PREDEFINED_CLASSES
48 /* These are the extra bits not in wctype.h since these are not preallocated
50 # define _ISwspecial1 (1 << 29)
51 # define _ISwspecial2 (1 << 30)
52 # define _ISwspecial3 (1 << 31)
56 /* The bit used for representing a special class. */
57 #define BITPOS(class) ((class) - tok_upper)
58 #define BIT(class) (_ISbit (BITPOS (class)))
59 #define BITw(class) (_ISwbit (BITPOS (class)))
61 #define ELEM(ctype, collection, idx, value) \
62 *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \
63 &ctype->collection##_act idx, value)
66 /* To be compatible with former implementations we for now restrict
67 the number of bits for character classes to 16. When compatibility
68 is not necessary anymore increase the number to 32. */
69 #define char_class_t uint16_t
70 #define char_class32_t uint32_t
73 /* Type to describe a transliteration action. We have a possibly
74 multiple character from-string and a set of multiple character
75 to-strings. All are 32bit values since this is what is used in
76 the gconv functions. */
81 struct translit_to_t
*next
;
88 struct translit_to_t
*to
;
90 struct translit_t
*next
;
94 /* The real definition of the struct for the LC_CTYPE locale. */
101 struct repertoire_t
*repertoire
;
103 /* We will allow up to 8 * sizeof (uint32_t) character classes. */
104 #define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
106 const char *classnames
[MAX_NR_CHARCLASS
];
107 uint32_t last_class_char
;
108 uint32_t class256_collection
[256];
109 uint32_t *class_collection
;
110 size_t class_collection_max
;
111 size_t class_collection_act
;
114 struct charseq
**mbdigits
;
121 struct charseq
*mboutdigits
[10];
122 uint32_t wcoutdigits
[10];
123 size_t outdigits_act
;
125 /* If the following number ever turns out to be too small simply
126 increase it. But I doubt it will. --drepper@gnu */
127 #define MAX_NR_CHARMAP 16
128 const char *mapnames
[MAX_NR_CHARMAP
];
129 uint32_t *map_collection
[MAX_NR_CHARMAP
];
130 uint32_t map256_collection
[2][256];
131 size_t map_collection_max
[MAX_NR_CHARMAP
];
132 size_t map_collection_act
[MAX_NR_CHARMAP
];
133 size_t map_collection_nr
;
135 int tomap_done
[MAX_NR_CHARMAP
];
137 /* Transliteration information. */
138 const char *translit_copy_locale
;
139 const char *translit_copy_repertoire
;
140 struct translit_t
*translit
;
142 /* The arrays for the binary representation. */
145 char_class_t
*ctype_b
;
146 char_class32_t
*ctype32_b
;
149 uint32_t *class_name_ptr
;
150 uint32_t *map_name_ptr
;
151 unsigned char *width
;
153 const char *codeset_name
;
154 uint32_t translit_hash_size
;
155 uint32_t translit_hash_layers
;
156 uint32_t *translit_from_idx
;
157 uint32_t *translit_from_tbl
;
158 uint32_t *translit_to_idx
;
159 uint32_t *translit_to_tbl
;
160 size_t translit_idx_size
;
161 size_t translit_from_tbl_size
;
162 size_t translit_to_tbl_size
;
164 struct obstack mem_pool
;
168 #define obstack_chunk_alloc xmalloc
169 #define obstack_chunk_free free
172 /* Prototypes for local functions. */
173 static void ctype_startup (struct linereader
*lr
, struct localedef_t
*locale
,
174 struct charmap_t
*charmap
, int ignore_content
);
175 static void ctype_class_new (struct linereader
*lr
,
176 struct locale_ctype_t
*ctype
, const char *name
);
177 static void ctype_map_new (struct linereader
*lr
,
178 struct locale_ctype_t
*ctype
,
179 const char *name
, struct charmap_t
*charmap
);
180 static uint32_t *find_idx (struct locale_ctype_t
*ctype
, uint32_t **table
,
181 size_t *max
, size_t *act
, unsigned int idx
);
182 static void set_class_defaults (struct locale_ctype_t
*ctype
,
183 struct charmap_t
*charmap
,
184 struct repertoire_t
*repertoire
);
185 static void allocate_arrays (struct locale_ctype_t
*ctype
,
186 struct charmap_t
*charmap
,
187 struct repertoire_t
*repertoire
);
190 static const char *longnames
[] =
192 "zero", "one", "two", "three", "four",
193 "five", "six", "seven", "eight", "nine"
195 static const unsigned char digits
[] = "0123456789";
199 ctype_startup (struct linereader
*lr
, struct localedef_t
*locale
,
200 struct charmap_t
*charmap
, int ignore_content
)
203 struct locale_ctype_t
*ctype
;
207 /* Allocate the needed room. */
208 locale
->categories
[LC_CTYPE
].ctype
= ctype
=
209 (struct locale_ctype_t
*) xcalloc (1, sizeof (struct locale_ctype_t
));
211 /* We have seen no names yet. */
212 ctype
->charnames_max
= charmap
->mb_cur_max
== 1 ? 256 : 512;
214 (unsigned int *) xmalloc (ctype
->charnames_max
215 * sizeof (unsigned int));
216 for (cnt
= 0; cnt
< 256; ++cnt
)
217 ctype
->charnames
[cnt
] = cnt
;
218 ctype
->charnames_act
= 256;
220 /* Fill character class information. */
221 ctype
->last_class_char
= ILLEGAL_CHAR_VALUE
;
222 /* The order of the following instructions determines the bit
224 ctype_class_new (lr
, ctype
, "upper");
225 ctype_class_new (lr
, ctype
, "lower");
226 ctype_class_new (lr
, ctype
, "alpha");
227 ctype_class_new (lr
, ctype
, "digit");
228 ctype_class_new (lr
, ctype
, "xdigit");
229 ctype_class_new (lr
, ctype
, "space");
230 ctype_class_new (lr
, ctype
, "print");
231 ctype_class_new (lr
, ctype
, "graph");
232 ctype_class_new (lr
, ctype
, "blank");
233 ctype_class_new (lr
, ctype
, "cntrl");
234 ctype_class_new (lr
, ctype
, "punct");
235 ctype_class_new (lr
, ctype
, "alnum");
236 #ifdef PREDEFINED_CLASSES
237 /* The following are extensions from ISO 14652. */
238 ctype_class_new (lr
, ctype
, "left_to_right");
239 ctype_class_new (lr
, ctype
, "right_to_left");
240 ctype_class_new (lr
, ctype
, "num_terminator");
241 ctype_class_new (lr
, ctype
, "num_separator");
242 ctype_class_new (lr
, ctype
, "segment_separator");
243 ctype_class_new (lr
, ctype
, "block_separator");
244 ctype_class_new (lr
, ctype
, "direction_control");
245 ctype_class_new (lr
, ctype
, "sym_swap_layout");
246 ctype_class_new (lr
, ctype
, "char_shape_selector");
247 ctype_class_new (lr
, ctype
, "num_shape_selector");
248 ctype_class_new (lr
, ctype
, "non_spacing");
249 ctype_class_new (lr
, ctype
, "non_spacing_level3");
250 ctype_class_new (lr
, ctype
, "normal_connect");
251 ctype_class_new (lr
, ctype
, "r_connect");
252 ctype_class_new (lr
, ctype
, "no_connect");
253 ctype_class_new (lr
, ctype
, "no_connect-space");
254 ctype_class_new (lr
, ctype
, "vowel_connect");
257 ctype
->class_collection_max
= charmap
->mb_cur_max
== 1 ? 256 : 512;
258 ctype
->class_collection
259 = (uint32_t *) xcalloc (sizeof (unsigned long int),
260 ctype
->class_collection_max
);
261 ctype
->class_collection_act
= 256;
263 /* Fill character map information. */
264 ctype
->map_collection_nr
= 0;
265 ctype
->last_map_idx
= MAX_NR_CHARMAP
;
266 ctype_map_new (lr
, ctype
, "toupper", charmap
);
267 ctype_map_new (lr
, ctype
, "tolower", charmap
);
268 #ifdef PREDEFINED_CLASSES
269 ctype_map_new (lr
, ctype
, "tosymmetric", charmap
);
272 /* Fill first 256 entries in `toXXX' arrays. */
273 for (cnt
= 0; cnt
< 256; ++cnt
)
275 ctype
->map_collection
[0][cnt
] = cnt
;
276 ctype
->map_collection
[1][cnt
] = cnt
;
277 #ifdef PREDEFINED_CLASSES
278 ctype
->map_collection
[2][cnt
] = cnt
;
280 ctype
->map256_collection
[0][cnt
] = cnt
;
281 ctype
->map256_collection
[1][cnt
] = cnt
;
284 obstack_init (&ctype
->mem_pool
);
290 ctype_finish (struct localedef_t
*locale
, struct charmap_t
*charmap
)
292 /* See POSIX.2, table 2-6 for the meaning of the following table. */
297 const char allow
[NCLASS
];
299 valid_table
[NCLASS
] =
301 /* The order is important. See token.h for more information.
302 M = Always, D = Default, - = Permitted, X = Mutually exclusive */
303 { "upper", "--MX-XDDXXX-" },
304 { "lower", "--MX-XDDXXX-" },
305 { "alpha", "---X-XDDXXX-" },
306 { "digit", "XXX--XDDXXX-" },
307 { "xdigit", "-----XDDXXX-" },
308 { "space", "XXXXX------X" },
309 { "print", "---------X--" },
310 { "graph", "---------X--" },
311 { "blank", "XXXXXM-----X" },
312 { "cntrl", "XXXXX-XX--XX" },
313 { "punct", "XXXXX-DD-X-X" },
314 { "alnum", "-----XDDXXX-" }
318 uint32_t space_value
;
319 struct charseq
*space_seq
;
320 struct locale_ctype_t
*ctype
= locale
->categories
[LC_CTYPE
].ctype
;
323 /* Now resolve copying and also handle completely missing definitions. */
326 /* First see whether we were supposed to copy. If yes, find the
327 actual definition. */
328 if (locale
->copy_name
[LC_CTYPE
] != NULL
)
330 /* Find the copying locale. This has to happen transitively since
331 the locale we are copying from might also copying another one. */
332 struct localedef_t
*from
= locale
;
335 from
= find_locale (LC_CTYPE
, from
->copy_name
[LC_CTYPE
],
336 from
->repertoire_name
, charmap
);
337 while (from
->categories
[LC_CTYPE
].ctype
== NULL
338 && from
->copy_name
[LC_CTYPE
] != NULL
);
340 ctype
= locale
->categories
[LC_CTYPE
].ctype
341 = from
->categories
[LC_CTYPE
].ctype
;
344 /* If there is still no definition issue an warning and create an
348 error (0, 0, _("No definition for %s category found"), "LC_CTYPE");
349 ctype_startup (NULL
, locale
, charmap
, 0);
350 ctype
= locale
->categories
[LC_CTYPE
].ctype
;
354 /* Set default value for classes not specified. */
355 set_class_defaults (ctype
, charmap
, ctype
->repertoire
);
357 /* Check according to table. */
358 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
360 uint32_t tmp
= ctype
->class_collection
[cnt
];
364 for (cls1
= 0; cls1
< NCLASS
; ++cls1
)
365 if ((tmp
& _ISwbit (cls1
)) != 0)
366 for (cls2
= 0; cls2
< NCLASS
; ++cls2
)
367 if (valid_table
[cls1
].allow
[cls2
] != '-')
369 int eq
= (tmp
& _ISwbit (cls2
)) != 0;
370 switch (valid_table
[cls1
].allow
[cls2
])
375 uint32_t value
= ctype
->charnames
[cnt
];
379 character L'\\u%0*x' in class `%s' must be in class `%s'"),
380 value
> 0xffff ? 8 : 4, value
,
381 valid_table
[cls1
].name
,
382 valid_table
[cls2
].name
);
389 uint32_t value
= ctype
->charnames
[cnt
];
393 character L'\\u%0*x' in class `%s' must not be in class `%s'"),
394 value
> 0xffff ? 8 : 4, value
,
395 valid_table
[cls1
].name
,
396 valid_table
[cls2
].name
);
401 ctype
->class_collection
[cnt
] |= _ISwbit (cls2
);
405 error (5, 0, _("internal error in %s, line %u"),
406 __FUNCTION__
, __LINE__
);
412 for (cnt
= 0; cnt
< 256; ++cnt
)
414 uint32_t tmp
= ctype
->class256_collection
[cnt
];
418 for (cls1
= 0; cls1
< NCLASS
; ++cls1
)
419 if ((tmp
& _ISbit (cls1
)) != 0)
420 for (cls2
= 0; cls2
< NCLASS
; ++cls2
)
421 if (valid_table
[cls1
].allow
[cls2
] != '-')
423 int eq
= (tmp
& _ISbit (cls2
)) != 0;
424 switch (valid_table
[cls1
].allow
[cls2
])
431 sprintf (buf
, "\\%o", cnt
);
435 character '%s' in class `%s' must be in class `%s'"),
436 buf
, valid_table
[cls1
].name
,
437 valid_table
[cls2
].name
);
446 sprintf (buf
, "\\%o", cnt
);
450 character '%s' in class `%s' must not be in class `%s'"),
451 buf
, valid_table
[cls1
].name
,
452 valid_table
[cls2
].name
);
457 ctype
->class256_collection
[cnt
] |= _ISbit (cls2
);
461 error (5, 0, _("internal error in %s, line %u"),
462 __FUNCTION__
, __LINE__
);
468 /* ... and now test <SP> as a special case. */
469 space_value
= repertoire_find_value (ctype
->repertoire
, "SP", 2);
470 if (space_value
== ILLEGAL_CHAR_VALUE
)
473 error (0, 0, _("character <SP> not defined in character map"));
475 else if (((cnt
= BITPOS (tok_space
),
476 (ELEM (ctype
, class_collection
, , space_value
)
477 & BITw (tok_space
)) == 0)
478 || (cnt
= BITPOS (tok_blank
),
479 (ELEM (ctype
, class_collection
, , space_value
)
480 & BITw (tok_blank
)) == 0)))
483 error (0, 0, _("<SP> character not in class `%s'"),
484 valid_table
[cnt
].name
);
486 else if (((cnt
= BITPOS (tok_punct
),
487 (ELEM (ctype
, class_collection
, , space_value
)
488 & BITw (tok_punct
)) != 0)
489 || (cnt
= BITPOS (tok_graph
),
490 (ELEM (ctype
, class_collection
, , space_value
)
495 error (0, 0, _("<SP> character must not be in class `%s'"),
496 valid_table
[cnt
].name
);
499 ELEM (ctype
, class_collection
, , space_value
) |= BITw (tok_print
);
501 space_seq
= charmap_find_value (charmap
, "SP", 2);
502 if (space_seq
== NULL
|| space_seq
->nbytes
!= 1)
505 error (0, 0, _("character <SP> not defined in character map"));
507 else if (((cnt
= BITPOS (tok_space
),
508 (ctype
->class256_collection
[space_seq
->bytes
[0]]
509 & BIT (tok_space
)) == 0)
510 || (cnt
= BITPOS (tok_blank
),
511 (ctype
->class256_collection
[space_seq
->bytes
[0]]
512 & BIT (tok_blank
)) == 0)))
515 error (0, 0, _("<SP> character not in class `%s'"),
516 valid_table
[cnt
].name
);
518 else if (((cnt
= BITPOS (tok_punct
),
519 (ctype
->class256_collection
[space_seq
->bytes
[0]]
520 & BIT (tok_punct
)) != 0)
521 || (cnt
= BITPOS (tok_graph
),
522 (ctype
->class256_collection
[space_seq
->bytes
[0]]
523 & BIT (tok_graph
)) != 0)))
526 error (0, 0, _("<SP> character must not be in class `%s'"),
527 valid_table
[cnt
].name
);
530 ctype
->class256_collection
[space_seq
->bytes
[0]] |= BIT (tok_print
);
532 /* Now that the tests are done make sure the name array contains all
533 characters which are handled in the WIDTH section of the
534 character set definition file. */
535 if (charmap
->width_rules
!= NULL
)
536 for (cnt
= 0; cnt
< charmap
->nwidth_rules
; ++cnt
)
538 unsigned char bytes
[charmap
->mb_cur_max
];
539 int nbytes
= charmap
->width_rules
[cnt
].from
->nbytes
;
541 /* We have the range of character for which the width is
542 specified described using byte sequences of the multibyte
543 charset. We have to convert this to UCS4 now. And we
544 cannot simply convert the beginning and the end of the
545 sequence, we have to iterate over the byte sequence and
546 convert it for every single character. */
547 memcpy (bytes
, charmap
->width_rules
[cnt
].from
->bytes
, nbytes
);
549 while (nbytes
< charmap
->width_rules
[cnt
].to
->nbytes
550 || memcmp (bytes
, charmap
->width_rules
[cnt
].to
->bytes
,
553 /* Find the UCS value for `bytes'. */
554 uint32_t wch
= repertoire_find_value (ctype
->repertoire
, bytes
,
558 if (wch
!= ILLEGAL_CHAR_VALUE
)
559 /* We are only interested in the side-effects of the
560 `find_idx' call. It will add appropriate entries in
561 the name array if this is necessary. */
562 (void) find_idx (ctype
, NULL
, NULL
, NULL
, wch
);
564 /* "Increment" the bytes sequence. */
566 while (inner
>= 0 && bytes
[inner
] == 0xff)
571 /* We have to extend the byte sequence. */
572 if (nbytes
>= charmap
->width_rules
[cnt
].to
->nbytes
)
576 memset (&bytes
[1], 0, nbytes
);
582 while (++inner
< nbytes
)
588 /* There must be a multiple of 10 digits. */
589 if (ctype
->mbdigits_act
% 10 != 0)
591 assert (ctype
->mbdigits_act
== ctype
->wcdigits_act
);
592 ctype
->wcdigits_act
-= ctype
->mbdigits_act
% 10;
593 ctype
->mbdigits_act
-= ctype
->mbdigits_act
% 10;
594 error (0, 0, _("`digit' category has not entries in groups of ten"));
597 /* Check the input digits. There must be a multiple of ten available.
598 In each group it could be that one or the other character is missing.
599 In this case the whole group must be removed. */
601 while (cnt
< ctype
->mbdigits_act
)
604 for (inner
= 0; inner
< 10; ++inner
)
605 if (ctype
->mbdigits
[cnt
+ inner
] == NULL
)
612 /* Remove the group. */
613 memmove (&ctype
->mbdigits
[cnt
], &ctype
->mbdigits
[cnt
+ 10],
614 ((ctype
->wcdigits_act
- cnt
- 10)
615 * sizeof (ctype
->mbdigits
[0])));
616 ctype
->mbdigits_act
-= 10;
620 /* If no input digits are given use the default. */
621 if (ctype
->mbdigits_act
== 0)
623 if (ctype
->mbdigits_max
== 0)
625 ctype
->mbdigits
= obstack_alloc (&charmap
->mem_pool
,
626 10 * sizeof (struct charseq
*));
627 ctype
->mbdigits_max
= 10;
630 for (cnt
= 0; cnt
< 10; ++cnt
)
632 ctype
->mbdigits
[cnt
] = charmap_find_symbol (charmap
,
634 if (ctype
->mbdigits
[cnt
] == NULL
)
636 ctype
->mbdigits
[cnt
] = charmap_find_symbol (charmap
,
638 strlen (longnames
[cnt
]));
639 if (ctype
->mbdigits
[cnt
] == NULL
)
641 /* Hum, this ain't good. */
643 no input digits defined and none of the standard names in the charmap"));
645 ctype
->mbdigits
[cnt
] = obstack_alloc (&charmap
->mem_pool
,
646 sizeof (struct charseq
) + 1);
648 /* This is better than nothing. */
649 ctype
->mbdigits
[cnt
]->bytes
[0] = digits
[cnt
];
650 ctype
->mbdigits
[cnt
]->nbytes
= 1;
655 ctype
->mbdigits_act
= 10;
658 /* Check the wide character input digits. There must be a multiple
659 of ten available. In each group it could be that one or the other
660 character is missing. In this case the whole group must be
663 while (cnt
< ctype
->wcdigits_act
)
666 for (inner
= 0; inner
< 10; ++inner
)
667 if (ctype
->wcdigits
[cnt
+ inner
] == ILLEGAL_CHAR_VALUE
)
674 /* Remove the group. */
675 memmove (&ctype
->wcdigits
[cnt
], &ctype
->wcdigits
[cnt
+ 10],
676 ((ctype
->wcdigits_act
- cnt
- 10)
677 * sizeof (ctype
->wcdigits
[0])));
678 ctype
->wcdigits_act
-= 10;
682 /* If no input digits are given use the default. */
683 if (ctype
->wcdigits_act
== 0)
685 if (ctype
->wcdigits_max
== 0)
687 ctype
->wcdigits
= obstack_alloc (&charmap
->mem_pool
,
688 10 * sizeof (uint32_t));
689 ctype
->wcdigits_max
= 10;
692 for (cnt
= 0; cnt
< 10; ++cnt
)
693 ctype
->wcdigits
[cnt
] = L
'0' + cnt
;
695 ctype
->mbdigits_act
= 10;
698 /* Check the outdigits. */
700 for (cnt
= 0; cnt
< 10; ++cnt
)
701 if (ctype
->mboutdigits
[cnt
] == NULL
)
703 static struct charseq replace
[2];
708 not all characters used in `outdigit' are available in the charmap"));
712 replace
[0].nbytes
= 1;
713 replace
[0].bytes
[0] = '?';
714 replace
[0].bytes
[1] = '\0';
715 ctype
->mboutdigits
[cnt
] = &replace
[0];
719 for (cnt
= 0; cnt
< 10; ++cnt
)
720 if (ctype
->wcoutdigits
[cnt
] == 0)
725 not all characters used in `outdigit' are available in the repertoire"));
729 ctype
->wcoutdigits
[cnt
] = L
'?';
735 ctype_output (struct localedef_t
*locale
, struct charmap_t
*charmap
,
736 const char *output_path
)
738 struct locale_ctype_t
*ctype
= locale
->categories
[LC_CTYPE
].ctype
;
739 const size_t nelems
= (_NL_ITEM_INDEX (_NL_NUM_LC_CTYPE
)
740 + (ctype
->map_collection_nr
- 2));
741 struct iovec iov
[2 + nelems
+ ctype
->nr_charclass
742 + ctype
->map_collection_nr
];
743 struct locale_file data
;
744 uint32_t idx
[nelems
+ 1];
745 size_t elem
, cnt
, offset
, total
;
748 /* Now prepare the output: Find the sizes of the table we can use. */
749 allocate_arrays (ctype
, charmap
, ctype
->repertoire
);
751 data
.magic
= LIMAGIC (LC_CTYPE
);
753 iov
[0].iov_base
= (void *) &data
;
754 iov
[0].iov_len
= sizeof (data
);
756 iov
[1].iov_base
= (void *) idx
;
757 iov
[1].iov_len
= sizeof (idx
);
759 idx
[0] = iov
[0].iov_len
+ iov
[1].iov_len
;
762 for (elem
= 0; elem
< nelems
; ++elem
)
764 if (elem
< _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE
))
767 #define CTYPE_DATA(name, base, len) \
768 case _NL_ITEM_INDEX (name): \
769 iov[2 + elem + offset].iov_base = (base); \
770 iov[2 + elem + offset].iov_len = (len); \
771 if (elem + 1 < nelems) \
772 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; \
775 CTYPE_DATA (_NL_CTYPE_CLASS
,
777 (256 + 128) * sizeof (char_class_t
));
779 CTYPE_DATA (_NL_CTYPE_TOUPPER
,
781 (ctype
->plane_size
* ctype
->plane_cnt
+ 128)
782 * sizeof (uint32_t));
783 CTYPE_DATA (_NL_CTYPE_TOLOWER
,
785 (ctype
->plane_size
* ctype
->plane_cnt
+ 128)
786 * sizeof (uint32_t));
788 CTYPE_DATA (_NL_CTYPE_CLASS32
,
790 (ctype
->plane_size
* ctype
->plane_cnt
791 * sizeof (char_class32_t
)));
793 CTYPE_DATA (_NL_CTYPE_NAMES
,
794 ctype
->names
, (ctype
->plane_size
* ctype
->plane_cnt
795 * sizeof (uint32_t)));
797 CTYPE_DATA (_NL_CTYPE_TRANSLIT_HASH_SIZE
,
798 &ctype
->translit_hash_size
, sizeof (uint32_t));
799 CTYPE_DATA (_NL_CTYPE_TRANSLIT_HASH_LAYERS
,
800 &ctype
->translit_hash_layers
, sizeof (uint32_t));
802 CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_IDX
,
803 ctype
->translit_from_idx
,
804 ctype
->translit_idx_size
);
806 CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_TBL
,
807 ctype
->translit_from_tbl
,
808 ctype
->translit_from_tbl_size
);
810 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_IDX
,
811 ctype
->translit_to_idx
,
812 ctype
->translit_idx_size
);
814 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_TBL
,
815 ctype
->translit_to_tbl
, ctype
->translit_to_tbl_size
);
817 CTYPE_DATA (_NL_CTYPE_HASH_SIZE
,
818 &ctype
->plane_size
, sizeof (uint32_t));
819 CTYPE_DATA (_NL_CTYPE_HASH_LAYERS
,
820 &ctype
->plane_cnt
, sizeof (uint32_t));
822 case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES
):
823 /* The class name array. */
825 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
, ++offset
)
827 iov
[2 + elem
+ offset
].iov_base
828 = (void *) ctype
->classnames
[cnt
];
829 iov
[2 + elem
+ offset
].iov_len
830 = strlen (ctype
->classnames
[cnt
]) + 1;
831 total
+= iov
[2 + elem
+ offset
].iov_len
;
833 iov
[2 + elem
+ offset
].iov_base
= (void *) "\0\0\0";
834 iov
[2 + elem
+ offset
].iov_len
= 1 + (4 - ((total
+ 1) % 4));
835 total
+= 1 + (4 - ((total
+ 1) % 4));
837 idx
[elem
+ 1] = idx
[elem
] + total
;
840 case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES
):
841 /* The class name array. */
843 for (cnt
= 0; cnt
< ctype
->map_collection_nr
; ++cnt
, ++offset
)
845 iov
[2 + elem
+ offset
].iov_base
846 = (void *) ctype
->mapnames
[cnt
];
847 iov
[2 + elem
+ offset
].iov_len
848 = strlen (ctype
->mapnames
[cnt
]) + 1;
849 total
+= iov
[2 + elem
+ offset
].iov_len
;
851 iov
[2 + elem
+ offset
].iov_base
= (void *) "\0\0\0";
852 iov
[2 + elem
+ offset
].iov_len
= 1 + (4 - ((total
+ 1) % 4));
853 total
+= 1 + (4 - ((total
+ 1) % 4));
855 idx
[elem
+ 1] = idx
[elem
] + total
;
858 CTYPE_DATA (_NL_CTYPE_WIDTH
,
859 ctype
->width
, ctype
->plane_size
* ctype
->plane_cnt
);
861 CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX
,
862 &ctype
->mb_cur_max
, sizeof (uint32_t));
864 case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME
):
865 total
= strlen (ctype
->codeset_name
) + 1;
867 iov
[2 + elem
+ offset
].iov_base
= (char *) ctype
->codeset_name
;
870 iov
[2 + elem
+ offset
].iov_base
= alloca ((total
+ 3) & ~3);
871 memset (mempcpy (iov
[2 + elem
+ offset
].iov_base
,
872 ctype
->codeset_name
, total
),
873 '\0', 4 - (total
& 3));
874 total
= (total
+ 3) & ~3;
876 iov
[2 + elem
+ offset
].iov_len
= total
;
877 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
880 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN
):
881 iov
[2 + elem
+ offset
].iov_base
= alloca (sizeof (uint32_t));
882 iov
[2 + elem
+ offset
].iov_len
= sizeof (uint32_t);
883 *(uint32_t *) iov
[2 + elem
+ offset
].iov_base
=
884 ctype
->mbdigits_act
/ 10;
885 idx
[elem
+ 1] = idx
[elem
] + sizeof (uint32_t);
888 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN
):
889 iov
[2 + elem
+ offset
].iov_base
= alloca (sizeof (uint32_t));
890 iov
[2 + elem
+ offset
].iov_len
= sizeof (uint32_t);
891 *(uint32_t *) iov
[2 + elem
+ offset
].iov_base
=
892 ctype
->wcdigits_act
/ 10;
893 idx
[elem
+ 1] = idx
[elem
] + sizeof (uint32_t);
896 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB
) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB
):
897 /* Compute the length of all possible characters. For INDIGITS
898 there might be more than one. We simply concatenate all of
899 them with a NUL byte following. The NUL byte wouldn't be
900 necessary but it makes it easier for the user. */
902 for (cnt
= elem
- _NL_CTYPE_INDIGITS0_MB
;
903 cnt
< ctype
->mbdigits_act
; cnt
+= 10)
904 total
+= ctype
->mbdigits
[cnt
]->nbytes
+ 1;
905 iov
[2 + elem
+ offset
].iov_base
= (char *) alloca (total
);
906 iov
[2 + elem
+ offset
].iov_len
= total
;
908 cp
= iov
[2 + elem
+ offset
].iov_base
;
909 for (cnt
= elem
- _NL_CTYPE_INDIGITS0_MB
;
910 cnt
< ctype
->mbdigits_act
; cnt
+= 10)
912 cp
= mempcpy (cp
, ctype
->mbdigits
[cnt
]->bytes
,
913 ctype
->mbdigits
[cnt
]->nbytes
);
916 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
919 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB
) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB
):
920 /* Compute the length of all possible characters. For INDIGITS
921 there might be more than one. We simply concatenate all of
922 them with a NUL byte following. The NUL byte wouldn't be
923 necessary but it makes it easier for the user. */
924 cnt
= elem
- _NL_CTYPE_OUTDIGIT0_MB
;
925 total
= ctype
->mboutdigits
[cnt
]->nbytes
+ 1;
926 iov
[2 + elem
+ offset
].iov_base
= (char *) alloca (total
);
927 iov
[2 + elem
+ offset
].iov_len
= total
;
929 *(char *) mempcpy (iov
[2 + elem
+ offset
].iov_base
,
930 ctype
->mbdigits
[cnt
]->bytes
,
931 ctype
->mbdigits
[cnt
]->nbytes
) = '\0';
932 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
935 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC
) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC
):
936 total
= ctype
->wcdigits_act
/ 10;
938 iov
[2 + elem
+ offset
].iov_base
=
939 (uint32_t *) alloca (total
* sizeof (uint32_t));
940 iov
[2 + elem
+ offset
].iov_len
= total
* sizeof (uint32_t);
942 for (cnt
= elem
- _NL_CTYPE_INDIGITS0_WC
;
943 cnt
< ctype
->wcdigits_act
; cnt
+= 10)
944 ((uint32_t *) iov
[2 + elem
+ offset
].iov_base
)[cnt
/ 10]
945 = ctype
->wcdigits
[cnt
];
946 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
949 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC
) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC
):
950 cnt
= elem
- _NL_CTYPE_OUTDIGIT0_WC
;
951 iov
[2 + elem
+ offset
].iov_base
= &ctype
->wcoutdigits
[cnt
];
952 iov
[2 + elem
+ offset
].iov_len
= sizeof (uint32_t);
953 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
957 assert (! "unknown CTYPE element");
961 /* Handle extra maps. */
962 size_t nr
= (elem
- _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE
)) + 2;
964 iov
[2 + elem
+ offset
].iov_base
= ctype
->map
[nr
];
965 iov
[2 + elem
+ offset
].iov_len
= ((ctype
->plane_size
966 * ctype
->plane_cnt
+ 128)
967 * sizeof (uint32_t));
969 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
973 assert (2 + elem
+ offset
== (nelems
+ ctype
->nr_charclass
974 + ctype
->map_collection_nr
+ 2));
976 write_locale_data (output_path
, "LC_CTYPE", 2 + elem
+ offset
, iov
);
980 /* Local functions. */
982 ctype_class_new (struct linereader
*lr
, struct locale_ctype_t
*ctype
,
987 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
)
988 if (strcmp (ctype
->classnames
[cnt
], name
) == 0)
991 if (cnt
< ctype
->nr_charclass
)
993 lr_error (lr
, _("character class `%s' already defined"), name
);
997 if (ctype
->nr_charclass
== MAX_NR_CHARCLASS
)
998 /* Exit code 2 is prescribed in P1003.2b. */
1000 implementation limit: no more than %d character classes allowed"),
1003 ctype
->classnames
[ctype
->nr_charclass
++] = name
;
1008 ctype_map_new (struct linereader
*lr
, struct locale_ctype_t
*ctype
,
1009 const char *name
, struct charmap_t
*charmap
)
1011 size_t max_chars
= 0;
1014 for (cnt
= 0; cnt
< ctype
->map_collection_nr
; ++cnt
)
1016 if (strcmp (ctype
->mapnames
[cnt
], name
) == 0)
1019 if (max_chars
< ctype
->map_collection_max
[cnt
])
1020 max_chars
= ctype
->map_collection_max
[cnt
];
1023 if (cnt
< ctype
->map_collection_nr
)
1025 lr_error (lr
, _("character map `%s' already defined"), name
);
1029 if (ctype
->map_collection_nr
== MAX_NR_CHARMAP
)
1030 /* Exit code 2 is prescribed in P1003.2b. */
1032 implementation limit: no more than %d character maps allowed"),
1035 ctype
->mapnames
[cnt
] = name
;
1038 ctype
->map_collection_max
[cnt
] = charmap
->mb_cur_max
== 1 ? 256 : 512;
1040 ctype
->map_collection_max
[cnt
] = max_chars
;
1042 ctype
->map_collection
[cnt
] = (uint32_t *)
1043 xmalloc (sizeof (uint32_t) * ctype
->map_collection_max
[cnt
]);
1044 memset (ctype
->map_collection
[cnt
], '\0',
1045 sizeof (uint32_t) * ctype
->map_collection_max
[cnt
]);
1046 ctype
->map_collection_act
[cnt
] = 256;
1048 ++ctype
->map_collection_nr
;
1052 /* We have to be prepared that TABLE, MAX, and ACT can be NULL. This
1053 is possible if we only want to extend the name array. */
1055 find_idx (struct locale_ctype_t
*ctype
, uint32_t **table
, size_t *max
,
1056 size_t *act
, uint32_t idx
)
1061 return table
== NULL
? NULL
: &(*table
)[idx
];
1063 for (cnt
= 256; cnt
< ctype
->charnames_act
; ++cnt
)
1064 if (ctype
->charnames
[cnt
] == idx
)
1067 /* We have to distinguish two cases: the name is found or not. */
1068 if (cnt
== ctype
->charnames_act
)
1070 /* Extend the name array. */
1071 if (ctype
->charnames_act
== ctype
->charnames_max
)
1073 ctype
->charnames_max
*= 2;
1074 ctype
->charnames
= (unsigned int *)
1075 xrealloc (ctype
->charnames
,
1076 sizeof (unsigned int) * ctype
->charnames_max
);
1078 ctype
->charnames
[ctype
->charnames_act
++] = idx
;
1082 /* We have done everything we are asked to do. */
1089 size_t old_max
= *max
;
1092 while (*max
<= cnt
);
1095 (uint32_t *) xrealloc (*table
, *max
* sizeof (unsigned long int));
1096 memset (&(*table
)[old_max
], '\0',
1097 (*max
- old_max
) * sizeof (uint32_t));
1103 return &(*table
)[cnt
];
1108 get_character (struct token
*now
, struct charmap_t
*charmap
,
1109 struct repertoire_t
*repertoire
,
1110 struct charseq
**seqp
, uint32_t *wchp
)
1112 if (now
->tok
== tok_bsymbol
)
1114 /* This will hopefully be the normal case. */
1115 *wchp
= repertoire_find_value (repertoire
, now
->val
.str
.startmb
,
1116 now
->val
.str
.lenmb
);
1117 *seqp
= charmap_find_value (charmap
, now
->val
.str
.startmb
,
1118 now
->val
.str
.lenmb
);
1120 else if (now
->tok
== tok_ucs4
)
1122 *seqp
= repertoire_find_seq (repertoire
, now
->val
.ucs4
);
1126 /* Compute the value in the charmap from the UCS value. */
1127 const char *symbol
= repertoire_find_symbol (repertoire
,
1133 *seqp
= charmap_find_value (charmap
, symbol
, strlen (symbol
));
1137 /* Insert a negative entry. */
1138 static const struct charseq negative
1139 = { .ucs4
= ILLEGAL_CHAR_VALUE
};
1140 uint32_t *newp
= obstack_alloc (&repertoire
->mem_pool
, 4);
1141 *newp
= now
->val
.ucs4
;
1143 insert_entry (&repertoire
->seq_table
, newp
, 4,
1144 (void *) &negative
);
1147 (*seqp
)->ucs4
= now
->val
.ucs4
;
1149 else if ((*seqp
)->ucs4
!= now
->val
.ucs4
)
1152 *wchp
= now
->val
.ucs4
;
1154 else if (now
->tok
== tok_charcode
)
1156 /* We must map from the byte code to UCS4. */
1157 *seqp
= charmap_find_symbol (charmap
, now
->val
.str
.startmb
,
1158 now
->val
.str
.lenmb
);
1161 *wchp
= ILLEGAL_CHAR_VALUE
;
1164 if ((*seqp
)->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1165 (*seqp
)->ucs4
= repertoire_find_value (repertoire
, (*seqp
)->name
,
1166 strlen ((*seqp
)->name
));
1167 *wchp
= (*seqp
)->ucs4
;
1177 /* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>'. */
1179 charclass_symbolic_ellipsis (struct linereader
*ldfile
,
1180 struct locale_ctype_t
*ctype
,
1181 struct charmap_t
*charmap
,
1182 struct repertoire_t
*repertoire
,
1184 const char *last_str
,
1185 unsigned long int class256_bit
,
1186 unsigned long int class_bit
, int base
,
1187 int ignore_content
, int handle_digits
)
1189 const char *nowstr
= now
->val
.str
.startmb
;
1190 char tmp
[now
->val
.str
.lenmb
+ 1];
1193 unsigned long int from
;
1194 unsigned long int to
;
1196 /* We have to compute the ellipsis values using the symbolic names. */
1197 assert (last_str
!= NULL
);
1199 if (strlen (last_str
) != now
->val
.str
.lenmb
)
1203 _("`%s' and `%.*s' are no valid names for symbolic range"),
1204 last_str
, now
->val
.str
.lenmb
, nowstr
);
1208 if (memcmp (last_str
, nowstr
, now
->val
.str
.lenmb
) == 0)
1209 /* Nothing to do, the names are the same. */
1212 for (cp
= last_str
; *cp
== *(nowstr
+ (cp
- last_str
)); ++cp
)
1216 from
= strtoul (cp
, &endp
, base
);
1217 if ((from
== UINT_MAX
&& errno
== ERANGE
) || *endp
!= '\0')
1220 to
= strtoul (nowstr
+ (cp
- last_str
), &endp
, base
);
1221 if ((to
== UINT_MAX
&& errno
== ERANGE
)
1222 || (endp
- nowstr
) != now
->val
.str
.lenmb
|| from
>= to
)
1225 /* OK, we have a range FROM - TO. Now we can create the symbolic names. */
1226 if (!ignore_content
)
1228 now
->val
.str
.startmb
= tmp
;
1229 while (++from
<= to
)
1231 struct charseq
*seq
;
1234 sprintf (tmp
, (base
== 10 ? "%.*s%0*d" : "%.*s%0*X"), cp
- last_str
,
1235 last_str
, now
->val
.str
.lenmb
- (cp
- last_str
), from
);
1237 get_character (now
, charmap
, repertoire
, &seq
, &wch
);
1239 if (seq
!= NULL
&& seq
->nbytes
== 1)
1240 /* Yep, we can store information about this byte sequence. */
1241 ctype
->class256_collection
[seq
->bytes
[0]] |= class256_bit
;
1243 if (wch
!= ILLEGAL_CHAR_VALUE
&& class_bit
!= 0)
1244 /* We have the UCS4 position. */
1245 *find_idx (ctype
, &ctype
->class_collection
,
1246 &ctype
->class_collection_max
,
1247 &ctype
->class_collection_act
, wch
) |= class_bit
;
1249 if (handle_digits
== 1)
1251 /* We must store the digit values. */
1252 if (ctype
->mbdigits_act
== ctype
->mbdigits_max
)
1254 ctype
->mbdigits_max
*= 2;
1255 ctype
->mbdigits
= xrealloc (ctype
->mbdigits
,
1256 (ctype
->mbdigits_max
1257 * sizeof (char *)));
1258 ctype
->wcdigits_max
*= 2;
1259 ctype
->wcdigits
= xrealloc (ctype
->wcdigits
,
1260 (ctype
->wcdigits_max
1261 * sizeof (uint32_t)));
1264 ctype
->mbdigits
[ctype
->mbdigits_act
++] = seq
;
1265 ctype
->wcdigits
[ctype
->wcdigits_act
++] = wch
;
1267 else if (handle_digits
== 2)
1269 /* We must store the digit values. */
1270 if (ctype
->outdigits_act
>= 10)
1272 lr_error (ldfile
, _("\
1273 %s: field `%s' does not contain exactly ten entries"),
1274 "LC_CTYPE", "outdigit");
1278 ctype
->mboutdigits
[ctype
->outdigits_act
] = seq
;
1279 ctype
->wcoutdigits
[ctype
->outdigits_act
] = wch
;
1280 ++ctype
->outdigits_act
;
1287 /* Ellipsis like in `<U1234>..<U2345>'. */
1289 charclass_ucs4_ellipsis (struct linereader
*ldfile
,
1290 struct locale_ctype_t
*ctype
,
1291 struct charmap_t
*charmap
,
1292 struct repertoire_t
*repertoire
,
1293 struct token
*now
, uint32_t last_wch
,
1294 unsigned long int class256_bit
,
1295 unsigned long int class_bit
, int ignore_content
,
1298 if (last_wch
> now
->val
.ucs4
)
1300 lr_error (ldfile
, _("\
1301 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1302 (now
->val
.ucs4
| last_wch
) < 65536 ? 4 : 8, now
->val
.ucs4
,
1303 (now
->val
.ucs4
| last_wch
) < 65536 ? 4 : 8, last_wch
);
1307 if (!ignore_content
)
1308 while (++last_wch
<= now
->val
.ucs4
)
1310 /* We have to find out whether there is a byte sequence corresponding
1311 to this UCS4 value. */
1312 struct charseq
*seq
= repertoire_find_seq (repertoire
, last_wch
);
1314 /* If this is the first time we look for this sequence create a new
1318 /* Find the symbolic name for this UCS4 value. */
1319 const char *symbol
= repertoire_find_symbol (repertoire
, last_wch
);
1320 uint32_t *newp
= obstack_alloc (&repertoire
->mem_pool
, 4);
1324 /* We have a name, now search the multibyte value. */
1325 seq
= charmap_find_value (charmap
, symbol
, strlen (symbol
));
1329 /* We have to create a fake entry. */
1330 static const struct charseq negative
1331 = { .ucs4
= ILLEGAL_CHAR_VALUE
};
1332 seq
= (struct charseq
*) &negative
;
1335 seq
->ucs4
= last_wch
;
1337 insert_entry (&repertoire
->seq_table
, newp
, 4, seq
);
1340 /* We have a name, now search the multibyte value. */
1341 if (seq
->ucs4
== last_wch
&& seq
->nbytes
== 1)
1342 /* Yep, we can store information about this byte sequence. */
1343 ctype
->class256_collection
[(size_t) seq
->bytes
[0]]
1346 /* And of course we have the UCS4 position. */
1347 if (class_bit
!= 0 && class_bit
!= 0)
1348 *find_idx (ctype
, &ctype
->class_collection
,
1349 &ctype
->class_collection_max
,
1350 &ctype
->class_collection_act
, last_wch
) |= class_bit
;
1352 if (handle_digits
== 1)
1354 /* We must store the digit values. */
1355 if (ctype
->mbdigits_act
== ctype
->mbdigits_max
)
1357 ctype
->mbdigits_max
*= 2;
1358 ctype
->mbdigits
= xrealloc (ctype
->mbdigits
,
1359 (ctype
->mbdigits_max
1360 * sizeof (char *)));
1361 ctype
->wcdigits_max
*= 2;
1362 ctype
->wcdigits
= xrealloc (ctype
->wcdigits
,
1363 (ctype
->wcdigits_max
1364 * sizeof (uint32_t)));
1367 ctype
->mbdigits
[ctype
->mbdigits_act
++] = (seq
->ucs4
== last_wch
1369 ctype
->wcdigits
[ctype
->wcdigits_act
++] = last_wch
;
1371 else if (handle_digits
== 2)
1373 /* We must store the digit values. */
1374 if (ctype
->outdigits_act
>= 10)
1376 lr_error (ldfile
, _("\
1377 %s: field `%s' does not contain exactly ten entries"),
1378 "LC_CTYPE", "outdigit");
1382 ctype
->mboutdigits
[ctype
->outdigits_act
] = (seq
->ucs4
== last_wch
1384 ctype
->wcoutdigits
[ctype
->outdigits_act
] = last_wch
;
1385 ++ctype
->outdigits_act
;
1391 /* Ellipsis as in `/xea/x12.../xea/x34'. */
1393 charclass_charcode_ellipsis (struct linereader
*ldfile
,
1394 struct locale_ctype_t
*ctype
,
1395 struct charmap_t
*charmap
,
1396 struct repertoire_t
*repertoire
,
1397 struct token
*now
, char *last_charcode
,
1398 uint32_t last_charcode_len
,
1399 unsigned long int class256_bit
,
1400 unsigned long int class_bit
, int ignore_content
,
1403 /* First check whether the to-value is larger. */
1404 if (now
->val
.charcode
.nbytes
!= last_charcode_len
)
1406 lr_error (ldfile
, _("\
1407 start end end character sequence of range must have the same length"));
1411 if (memcmp (last_charcode
, now
->val
.charcode
.bytes
, last_charcode_len
) > 0)
1413 lr_error (ldfile
, _("\
1414 to-value character sequence is smaller than from-value sequence"));
1418 if (!ignore_content
)
1422 /* Increment the byte sequence value. */
1423 struct charseq
*seq
;
1427 for (i
= last_charcode_len
- 1; i
>= 0; --i
)
1428 if (++last_charcode
[i
] != 0)
1431 if (last_charcode_len
== 1)
1432 /* Of course we have the charcode value. */
1433 ctype
->class256_collection
[(size_t) last_charcode
[0]]
1436 /* Find the symbolic name. */
1437 seq
= charmap_find_symbol (charmap
, last_charcode
,
1441 if (seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1442 seq
->ucs4
= repertoire_find_value (repertoire
, seq
->name
,
1443 strlen (seq
->name
));
1446 if (wch
!= ILLEGAL_CHAR_VALUE
&& class_bit
!= 0)
1447 *find_idx (ctype
, &ctype
->class_collection
,
1448 &ctype
->class_collection_max
,
1449 &ctype
->class_collection_act
, wch
) |= class_bit
;
1452 wch
= ILLEGAL_CHAR_VALUE
;
1454 if (handle_digits
== 1)
1456 /* We must store the digit values. */
1457 if (ctype
->mbdigits_act
== ctype
->mbdigits_max
)
1459 ctype
->mbdigits_max
*= 2;
1460 ctype
->mbdigits
= xrealloc (ctype
->mbdigits
,
1461 (ctype
->mbdigits_max
1462 * sizeof (char *)));
1463 ctype
->wcdigits_max
*= 2;
1464 ctype
->wcdigits
= xrealloc (ctype
->wcdigits
,
1465 (ctype
->wcdigits_max
1466 * sizeof (uint32_t)));
1469 seq
= xmalloc (sizeof (struct charseq
) + last_charcode_len
);
1470 memcpy ((char *) (seq
+ 1), last_charcode
, last_charcode_len
);
1471 seq
->nbytes
= last_charcode_len
;
1473 ctype
->mbdigits
[ctype
->mbdigits_act
++] = seq
;
1474 ctype
->wcdigits
[ctype
->wcdigits_act
++] = wch
;
1476 else if (handle_digits
== 2)
1478 struct charseq
*seq
;
1479 /* We must store the digit values. */
1480 if (ctype
->outdigits_act
>= 10)
1482 lr_error (ldfile
, _("\
1483 %s: field `%s' does not contain exactly ten entries"),
1484 "LC_CTYPE", "outdigit");
1488 seq
= xmalloc (sizeof (struct charseq
) + last_charcode_len
);
1489 memcpy ((char *) (seq
+ 1), last_charcode
, last_charcode_len
);
1490 seq
->nbytes
= last_charcode_len
;
1492 ctype
->mboutdigits
[ctype
->outdigits_act
] = seq
;
1493 ctype
->wcoutdigits
[ctype
->outdigits_act
] = wch
;
1494 ++ctype
->outdigits_act
;
1497 while (memcmp (last_charcode
, now
->val
.charcode
.bytes
,
1498 last_charcode_len
) != 0);
1503 /* Read one transliteration entry. */
1505 read_widestring (struct linereader
*ldfile
, struct token
*now
,
1506 struct charmap_t
*charmap
, struct repertoire_t
*repertoire
)
1510 if (now
->tok
== tok_default_missing
)
1511 /* The special name "" will denote this case. */
1512 wstr
= (uint32_t *) L
"";
1513 else if (now
->tok
== tok_bsymbol
)
1515 /* Get the value from the repertoire. */
1516 wstr
= xmalloc (2 * sizeof (uint32_t));
1517 wstr
[0] = repertoire_find_value (repertoire
, now
->val
.str
.startmb
,
1518 now
->val
.str
.lenmb
);
1519 if (wstr
[0] == ILLEGAL_CHAR_VALUE
)
1520 /* We cannot proceed, we don't know the UCS4 value. */
1525 else if (now
->tok
== tok_ucs4
)
1527 wstr
= xmalloc (2 * sizeof (uint32_t));
1528 wstr
[0] = now
->val
.ucs4
;
1531 else if (now
->tok
== tok_charcode
)
1533 /* Argh, we have to convert to the symbol name first and then to the
1535 struct charseq
*seq
= charmap_find_symbol (charmap
,
1536 now
->val
.str
.startmb
,
1537 now
->val
.str
.lenmb
);
1539 /* Cannot find the UCS4 value. */
1542 if (seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1543 seq
->ucs4
= repertoire_find_value (repertoire
, seq
->name
,
1544 strlen (seq
->name
));
1545 if (seq
->ucs4
== ILLEGAL_CHAR_VALUE
)
1546 /* We cannot proceed, we don't know the UCS4 value. */
1549 wstr
= xmalloc (2 * sizeof (uint32_t));
1550 wstr
[0] = seq
->ucs4
;
1553 else if (now
->tok
== tok_string
)
1555 wstr
= now
->val
.str
.startwc
;
1561 if (now
->tok
!= tok_eol
&& now
->tok
!= tok_eof
)
1562 lr_ignore_rest (ldfile
, 0);
1563 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
1564 return (uint32_t *) -1l;
1572 read_translit_entry (struct linereader
*ldfile
, struct locale_ctype_t
*ctype
,
1573 struct token
*now
, struct charmap_t
*charmap
,
1574 struct repertoire_t
*repertoire
)
1576 uint32_t *from_wstr
= read_widestring (ldfile
, now
, charmap
, repertoire
);
1577 struct translit_t
*result
;
1578 struct translit_to_t
**top
;
1579 struct obstack
*ob
= &ctype
->mem_pool
;
1583 if (from_wstr
== NULL
)
1584 /* There is no valid from string. */
1587 result
= (struct translit_t
*) obstack_alloc (ob
,
1588 sizeof (struct translit_t
));
1589 result
->from
= from_wstr
;
1590 result
->next
= NULL
;
1600 /* Next we have one or more transliterations. They are
1601 separated by semicolons. */
1602 now
= lr_token (ldfile
, charmap
, repertoire
);
1604 if (!first
&& (now
->tok
== tok_semicolon
|| now
->tok
== tok_eol
))
1606 /* One string read. */
1607 const uint32_t zero
= 0;
1611 obstack_grow (ob
, &zero
, 4);
1612 to_wstr
= obstack_finish (ob
);
1614 *top
= obstack_alloc (ob
, sizeof (struct translit_to_t
));
1615 (*top
)->str
= to_wstr
;
1616 (*top
)->next
= NULL
;
1619 if (now
->tok
== tok_eol
)
1621 result
->next
= ctype
->translit
;
1622 ctype
->translit
= result
;
1627 top
= &(*top
)->next
;
1632 to_wstr
= read_widestring (ldfile
, now
, charmap
, repertoire
);
1633 if (to_wstr
== (uint32_t *) -1l)
1635 /* An error occurred. */
1636 obstack_free (ob
, result
);
1640 if (to_wstr
== NULL
)
1643 /* This value is usable. */
1644 obstack_grow (ob
, to_wstr
, wcslen ((wchar_t *) to_wstr
) * 4);
1652 /* The parser for the LC_CTYPE section of the locale definition. */
1654 ctype_read (struct linereader
*ldfile
, struct localedef_t
*result
,
1655 struct charmap_t
*charmap
, const char *repertoire_name
,
1658 struct repertoire_t
*repertoire
= NULL
;
1659 struct locale_ctype_t
*ctype
;
1661 enum token_t nowtok
;
1663 struct charseq
*last_seq
;
1664 uint32_t last_wch
= 0;
1665 enum token_t last_token
;
1666 enum token_t ellipsis_token
;
1667 char last_charcode
[16];
1668 size_t last_charcode_len
= 0;
1669 const char *last_str
= NULL
;
1672 /* Get the repertoire we have to use. */
1673 if (repertoire_name
!= NULL
)
1674 repertoire
= repertoire_read (repertoire_name
);
1676 /* The rest of the line containing `LC_CTYPE' must be free. */
1677 lr_ignore_rest (ldfile
, 1);
1682 now
= lr_token (ldfile
, charmap
, NULL
);
1685 while (nowtok
== tok_eol
);
1687 /* If we see `copy' now we are almost done. */
1688 if (nowtok
== tok_copy
)
1690 handle_copy (ldfile
, charmap
, repertoire
, result
, tok_lc_ctype
, LC_CTYPE
,
1691 "LC_CTYPE", ignore_content
);
1695 /* Prepare the data structures. */
1696 ctype_startup (ldfile
, result
, charmap
, ignore_content
);
1697 ctype
= result
->categories
[LC_CTYPE
].ctype
;
1699 /* Remember the repertoire we use. */
1700 if (!ignore_content
)
1701 ctype
->repertoire
= repertoire
;
1705 unsigned long int class_bit
= 0;
1706 unsigned long int class256_bit
= 0;
1707 int handle_digits
= 0;
1709 /* Of course we don't proceed beyond the end of file. */
1710 if (nowtok
== tok_eof
)
1713 /* Ingore empty lines. */
1714 if (nowtok
== tok_eol
)
1716 now
= lr_token (ldfile
, charmap
, NULL
);
1724 now
= lr_token (ldfile
, charmap
, NULL
);
1725 while (now
->tok
== tok_ident
|| now
->tok
== tok_string
)
1727 ctype_class_new (ldfile
, ctype
, now
->val
.str
.startmb
);
1728 now
= lr_token (ldfile
, charmap
, NULL
);
1729 if (now
->tok
!= tok_semicolon
)
1731 now
= lr_token (ldfile
, charmap
, NULL
);
1733 if (now
->tok
!= tok_eol
)
1735 %s: syntax error in definition of new character class"), "LC_CTYPE");
1739 now
= lr_token (ldfile
, charmap
, NULL
);
1740 while (now
->tok
== tok_ident
|| now
->tok
== tok_string
)
1742 ctype_map_new (ldfile
, ctype
, now
->val
.str
.startmb
, charmap
);
1743 now
= lr_token (ldfile
, charmap
, NULL
);
1744 if (now
->tok
!= tok_semicolon
)
1746 now
= lr_token (ldfile
, charmap
, NULL
);
1748 if (now
->tok
!= tok_eol
)
1750 %s: syntax error in definition of new character map"), "LC_CTYPE");
1754 /* Ignore the rest of the line if we don't need the input of
1758 lr_ignore_rest (ldfile
, 0);
1762 /* We simply forget the `class' keyword and use the following
1763 operand to determine the bit. */
1764 now
= lr_token (ldfile
, charmap
, NULL
);
1765 if (now
->tok
== tok_ident
|| now
->tok
== tok_string
)
1767 /* Must can be one of the predefined class names. */
1768 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
)
1769 if (strcmp (ctype
->classnames
[cnt
], now
->val
.str
.startmb
) == 0)
1771 if (cnt
>= ctype
->nr_charclass
)
1773 #ifdef PREDEFINED_CLASSES
1774 if (now
->val
.str
.lenmb
== 8
1775 && memcmp ("special1", now
->val
.str
.startmb
, 8) == 0)
1776 class_bit
= _ISwspecial1
;
1777 else if (now
->val
.str
.lenmb
== 8
1778 && memcmp ("special2", now
->val
.str
.startmb
, 8) == 0)
1779 class_bit
= _ISwspecial2
;
1780 else if (now
->val
.str
.lenmb
== 8
1781 && memcmp ("special3", now
->val
.str
.startmb
, 8) == 0)
1782 class_bit
= _ISwspecial3
;
1786 /* OK, it's a new class. */
1787 ctype_class_new (ldfile
, ctype
, now
->val
.str
.startmb
);
1789 class_bit
= _ISwbit (ctype
->nr_charclass
- 1);
1793 class_bit
= _ISwbit (cnt
);
1795 free (now
->val
.str
.startmb
);
1797 else if (now
->tok
== tok_digit
)
1798 goto handle_tok_digit
;
1799 else if (now
->tok
< tok_upper
|| now
->tok
> tok_blank
)
1803 class_bit
= BITw (now
->tok
);
1804 class256_bit
= BIT (now
->tok
);
1807 /* The next character must be a semicolon. */
1808 now
= lr_token (ldfile
, charmap
, NULL
);
1809 if (now
->tok
!= tok_semicolon
)
1811 goto read_charclass
;
1824 /* Ignore the rest of the line if we don't need the input of
1828 lr_ignore_rest (ldfile
, 0);
1832 class_bit
= BITw (now
->tok
);
1833 class256_bit
= BIT (now
->tok
);
1836 ctype
->class_done
|= class_bit
;
1837 last_token
= tok_none
;
1838 ellipsis_token
= tok_none
;
1839 now
= lr_token (ldfile
, charmap
, NULL
);
1840 while (now
->tok
!= tok_eol
&& now
->tok
!= tok_eof
)
1843 struct charseq
*seq
;
1845 if (ellipsis_token
== tok_none
)
1847 if (get_character (now
, charmap
, repertoire
, &seq
, &wch
))
1850 if (!ignore_content
&& seq
!= NULL
&& seq
->nbytes
== 1)
1851 /* Yep, we can store information about this byte
1853 ctype
->class256_collection
[seq
->bytes
[0]] |= class256_bit
;
1855 if (!ignore_content
&& wch
!= ILLEGAL_CHAR_VALUE
1857 /* We have the UCS4 position. */
1858 *find_idx (ctype
, &ctype
->class_collection
,
1859 &ctype
->class_collection_max
,
1860 &ctype
->class_collection_act
, wch
) |= class_bit
;
1862 last_token
= now
->tok
;
1863 /* Terminate the string. */
1864 if (last_token
== tok_bsymbol
)
1866 now
->val
.str
.startmb
[now
->val
.str
.lenmb
] = '\0';
1867 last_str
= now
->val
.str
.startmb
;
1873 memcpy (last_charcode
, now
->val
.charcode
.bytes
, 16);
1874 last_charcode_len
= now
->val
.charcode
.nbytes
;
1876 if (!ignore_content
&& handle_digits
== 1)
1878 /* We must store the digit values. */
1879 if (ctype
->mbdigits_act
== ctype
->mbdigits_max
)
1881 ctype
->mbdigits_max
+= 10;
1882 ctype
->mbdigits
= xrealloc (ctype
->mbdigits
,
1883 (ctype
->mbdigits_max
1884 * sizeof (char *)));
1885 ctype
->wcdigits_max
+= 10;
1886 ctype
->wcdigits
= xrealloc (ctype
->wcdigits
,
1887 (ctype
->wcdigits_max
1888 * sizeof (uint32_t)));
1891 ctype
->mbdigits
[ctype
->mbdigits_act
++] = seq
;
1892 ctype
->wcdigits
[ctype
->wcdigits_act
++] = wch
;
1894 else if (!ignore_content
&& handle_digits
== 2)
1896 /* We must store the digit values. */
1897 if (ctype
->outdigits_act
>= 10)
1899 lr_error (ldfile
, _("\
1900 %s: field `%s' does not contain exactly ten entries"),
1901 "LC_CTYPE", "outdigit");
1905 ctype
->mboutdigits
[ctype
->outdigits_act
] = seq
;
1906 ctype
->wcoutdigits
[ctype
->outdigits_act
] = wch
;
1907 ++ctype
->outdigits_act
;
1912 /* Now it gets complicated. We have to resolve the
1913 ellipsis problem. First we must distinguish between
1914 the different kind of ellipsis and this must match the
1915 tokens we have seen. */
1916 assert (last_token
!= tok_none
);
1918 if (last_token
!= now
->tok
)
1920 lr_error (ldfile
, _("\
1921 ellipsis range must be marked by two operands of same type"));
1922 lr_ignore_rest (ldfile
, 0);
1926 if (last_token
== tok_bsymbol
)
1928 if (ellipsis_token
== tok_ellipsis3
)
1929 lr_error (ldfile
, _("with symbolic name range values \
1930 the absolute ellipsis `...' must not be used"));
1932 charclass_symbolic_ellipsis (ldfile
, ctype
, charmap
,
1933 repertoire
, now
, last_str
,
1934 class256_bit
, class_bit
,
1941 else if (last_token
== tok_ucs4
)
1943 if (ellipsis_token
!= tok_ellipsis2
)
1944 lr_error (ldfile
, _("\
1945 with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
1947 charclass_ucs4_ellipsis (ldfile
, ctype
, charmap
,
1948 repertoire
, now
, last_wch
,
1949 class256_bit
, class_bit
,
1950 ignore_content
, handle_digits
);
1954 assert (last_token
== tok_charcode
);
1956 if (ellipsis_token
!= tok_ellipsis3
)
1957 lr_error (ldfile
, _("\
1958 with character code range values one must use the absolute ellipsis `...'"));
1960 charclass_charcode_ellipsis (ldfile
, ctype
, charmap
,
1964 class256_bit
, class_bit
,
1969 /* Now we have used the last value. */
1970 last_token
= tok_none
;
1973 /* Next we expect a semicolon or the end of the line. */
1974 now
= lr_token (ldfile
, charmap
, NULL
);
1975 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
1978 if (last_token
!= tok_none
1979 && now
->tok
>= tok_ellipsis2
&& now
->tok
<= tok_ellipsis4
)
1981 ellipsis_token
= now
->tok
;
1982 now
= lr_token (ldfile
, charmap
, NULL
);
1986 if (now
->tok
!= tok_semicolon
)
1989 /* And get the next character. */
1990 now
= lr_token (ldfile
, charmap
, NULL
);
1992 ellipsis_token
= tok_none
;
1997 /* Ignore the rest of the line if we don't need the input of
2001 lr_ignore_rest (ldfile
, 0);
2006 class_bit
= _ISwdigit
;
2007 class256_bit
= _ISdigit
;
2009 goto read_charclass
;
2012 /* Ignore the rest of the line if we don't need the input of
2016 lr_ignore_rest (ldfile
, 0);
2020 if (ctype
->outdigits_act
!= 0)
2021 lr_error (ldfile
, _("\
2022 %s: field `%s' declared more than once"),
2023 "LC_CTYPE", "outdigit");
2027 goto read_charclass
;
2030 /* Ignore the rest of the line if we don't need the input of
2034 lr_ignore_rest (ldfile
, 0);
2042 /* Ignore the rest of the line if we don't need the input of
2046 lr_ignore_rest (ldfile
, 0);
2054 /* Ignore the rest of the line if we don't need the input of
2058 lr_ignore_rest (ldfile
, 0);
2062 /* We simply forget the `map' keyword and use the following
2063 operand to determine the mapping. */
2064 now
= lr_token (ldfile
, charmap
, NULL
);
2065 if (now
->tok
== tok_ident
|| now
->tok
== tok_string
)
2069 for (cnt
= 2; cnt
< ctype
->map_collection_nr
; ++cnt
)
2070 if (strcmp (now
->val
.str
.startmb
, ctype
->mapnames
[cnt
]) == 0)
2073 if (cnt
>= ctype
->map_collection_nr
)
2074 /* OK, it's a new map. */
2075 ctype_map_new (ldfile
, ctype
, now
->val
.str
.startmb
, charmap
);
2079 else if (now
->tok
< tok_toupper
|| now
->tok
> tok_tolower
)
2082 mapidx
= now
->tok
- tok_toupper
;
2084 now
= lr_token (ldfile
, charmap
, NULL
);
2085 /* This better should be a semicolon. */
2086 if (now
->tok
!= tok_semicolon
)
2090 /* Test whether this mapping was already defined. */
2091 if (ctype
->tomap_done
[mapidx
])
2093 lr_error (ldfile
, _("duplicated definition for mapping `%s'"),
2094 ctype
->mapnames
[mapidx
]);
2095 lr_ignore_rest (ldfile
, 0);
2098 ctype
->tomap_done
[mapidx
] = 1;
2100 now
= lr_token (ldfile
, charmap
, NULL
);
2101 while (now
->tok
!= tok_eol
&& now
->tok
!= tok_eof
)
2103 struct charseq
*from_seq
;
2105 struct charseq
*to_seq
;
2108 /* Every pair starts with an opening brace. */
2109 if (now
->tok
!= tok_open_brace
)
2112 /* Next comes the from-value. */
2113 now
= lr_token (ldfile
, charmap
, NULL
);
2114 if (get_character (now
, charmap
, repertoire
, &from_seq
,
2118 /* The next is a comma. */
2119 now
= lr_token (ldfile
, charmap
, NULL
);
2120 if (now
->tok
!= tok_comma
)
2123 /* And the other value. */
2124 now
= lr_token (ldfile
, charmap
, NULL
);
2125 if (get_character (now
, charmap
, repertoire
, &to_seq
,
2129 /* And the last thing is the closing brace. */
2130 now
= lr_token (ldfile
, charmap
, NULL
);
2131 if (now
->tok
!= tok_close_brace
)
2134 if (!ignore_content
)
2136 if (mapidx
< 2 && from_seq
!= NULL
&& to_seq
!= NULL
2137 && from_seq
->nbytes
== 1 && to_seq
->nbytes
== 1)
2138 /* We can use this value. */
2139 ctype
->map256_collection
[mapidx
][from_seq
->bytes
[0]]
2142 if (from_wch
!= ILLEGAL_CHAR_VALUE
2143 && to_wch
!= ILLEGAL_CHAR_VALUE
)
2144 /* Both correct values. */
2145 *find_idx (ctype
, &ctype
->map_collection
[mapidx
],
2146 &ctype
->map_collection_max
[mapidx
],
2147 &ctype
->map_collection_act
[mapidx
],
2151 /* Now comes a semicolon or the end of the line/file. */
2152 now
= lr_token (ldfile
, charmap
, NULL
);
2153 if (now
->tok
== tok_semicolon
)
2154 now
= lr_token (ldfile
, charmap
, NULL
);
2158 case tok_translit_start
:
2159 /* Ignore the rest of the line if we don't need the input of
2163 lr_ignore_rest (ldfile
, 0);
2167 /* The rest of the line better should be empty. */
2168 lr_ignore_rest (ldfile
, 1);
2170 /* We count here the number of allocated entries in the `translit'
2174 /* We proceed until we see the `translit_end' token. */
2175 while (now
= lr_token (ldfile
, charmap
, repertoire
),
2176 now
->tok
!= tok_translit_end
&& now
->tok
!= tok_eof
)
2178 if (now
->tok
== tok_eol
)
2179 /* Ignore empty lines. */
2182 if (now
->tok
== tok_translit_end
)
2184 lr_ignore_rest (ldfile
, 0);
2188 if (now
->tok
== tok_include
)
2190 /* We have to include locale. */
2191 const char *locale_name
;
2192 const char *repertoire_name
;
2194 now
= lr_token (ldfile
, charmap
, NULL
);
2195 /* This should be a string or an identifier. In any
2196 case something to name a locale. */
2197 if (now
->tok
!= tok_string
&& now
->tok
!= tok_ident
)
2200 lr_error (ldfile
, _("%s: syntax error"), "LC_CTYPE");
2201 lr_ignore_rest (ldfile
, 0);
2204 locale_name
= now
->val
.str
.startmb
;
2206 /* Next should be a semicolon. */
2207 now
= lr_token (ldfile
, charmap
, NULL
);
2208 if (now
->tok
!= tok_semicolon
)
2209 goto translit_syntax
;
2211 /* Now the repertoire name. */
2212 now
= lr_token (ldfile
, charmap
, NULL
);
2213 if ((now
->tok
!= tok_string
&& now
->tok
!= tok_ident
)
2214 || now
->val
.str
.startmb
== NULL
)
2215 goto translit_syntax
;
2216 repertoire_name
= now
->val
.str
.startmb
;
2218 /* We must not have more than one `include'. */
2219 if (ctype
->translit_copy_locale
!= NULL
)
2221 lr_error (ldfile
, _("\
2222 %s: only one `include' instruction allowed"), "LC_CTYPE");
2223 lr_ignore_rest (ldfile
, 0);
2227 ctype
->translit_copy_locale
= locale_name
;
2228 ctype
->translit_copy_repertoire
= repertoire_name
;
2230 /* The rest of the line must be empty. */
2231 lr_ignore_rest (ldfile
, 1);
2235 read_translit_entry (ldfile
, ctype
, now
, charmap
, repertoire
);
2240 /* Ignore the rest of the line if we don't need the input of
2244 lr_ignore_rest (ldfile
, 0);
2248 /* This could mean one of several things. First test whether
2249 it's a character class name. */
2250 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
)
2251 if (strcmp (now
->val
.str
.startmb
, ctype
->classnames
[cnt
]) == 0)
2253 if (cnt
< ctype
->nr_charclass
)
2255 class_bit
= _ISwbit (cnt
);
2256 class256_bit
= cnt
<= 11 ? _ISbit (cnt
) : 0;
2257 free (now
->val
.str
.startmb
);
2258 goto read_charclass
;
2260 for (cnt
= 0; cnt
< ctype
->map_collection_nr
; ++cnt
)
2261 if (strcmp (now
->val
.str
.startmb
, ctype
->mapnames
[cnt
]) == 0)
2263 if (cnt
< ctype
->map_collection_nr
)
2266 free (now
->val
.str
.startmb
);
2269 #ifdef PREDEFINED_CLASSES
2270 if (strcmp (now
->val
.str
.startmb
, "special1") == 0)
2272 class_bit
= _ISwspecial1
;
2273 free (now
->val
.str
.startmb
);
2274 goto read_charclass
;
2276 if (strcmp (now
->val
.str
.startmb
, "special2") == 0)
2278 class_bit
= _ISwspecial2
;
2279 free (now
->val
.str
.startmb
);
2280 goto read_charclass
;
2282 if (strcmp (now
->val
.str
.startmb
, "special3") == 0)
2284 class_bit
= _ISwspecial3
;
2285 free (now
->val
.str
.startmb
);
2286 goto read_charclass
;
2288 if (strcmp (now
->val
.str
.startmb
, "tosymmetric") == 0)
2297 /* Next we assume `LC_CTYPE'. */
2298 now
= lr_token (ldfile
, charmap
, NULL
);
2299 if (now
->tok
== tok_eof
)
2301 if (now
->tok
== tok_eol
)
2302 lr_error (ldfile
, _("%s: incomplete `END' line"),
2304 else if (now
->tok
!= tok_lc_ctype
)
2305 lr_error (ldfile
, _("\
2306 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2307 lr_ignore_rest (ldfile
, now
->tok
== tok_lc_ctype
);
2312 if (now
->tok
!= tok_eof
)
2313 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2316 /* Prepare for the next round. */
2317 now
= lr_token (ldfile
, charmap
, NULL
);
2321 /* When we come here we reached the end of the file. */
2322 lr_error (ldfile
, _("%s: premature end of file"), "LC_CTYPE");
2327 set_class_defaults (struct locale_ctype_t
*ctype
, struct charmap_t
*charmap
,
2328 struct repertoire_t
*repertoire
)
2332 /* These function defines the default values for the classes and conversions
2333 according to POSIX.2 2.5.2.1.
2334 It may seem that the order of these if-blocks is arbitrary but it is NOT.
2335 Don't move them unless you know what you do! */
2337 void set_default (int bitpos
, int from
, int to
)
2341 int bit
= _ISbit (bitpos
);
2342 int bitw
= _ISwbit (bitpos
);
2343 /* Define string. */
2346 for (ch
= from
; ch
<= to
; ++ch
)
2349 struct charseq
*seq
;
2352 value
= repertoire_find_value (repertoire
, tmp
, 1);
2353 if (value
== ILLEGAL_CHAR_VALUE
)
2357 %s: character `%s' not defined in repertoire while needed as default value"),
2361 ELEM (ctype
, class_collection
, , value
) |= bitw
;
2363 seq
= charmap_find_value (charmap
, tmp
, 1);
2368 %s: character `%s' not defined in charmap while needed as default value"),
2371 else if (seq
->nbytes
!= 1)
2373 %s: character `%s' in charmap not representable with one byte"),
2376 ctype
->class256_collection
[seq
->bytes
[0]] |= bit
;
2380 /* Set default values if keyword was not present. */
2381 if ((ctype
->class_done
& BITw (tok_upper
)) == 0)
2382 /* "If this keyword [lower] is not specified, the lowercase letters
2383 `A' through `Z', ..., shall automatically belong to this class,
2384 with implementation defined character values." [P1003.2, 2.5.2.1] */
2385 set_default (BITPOS (tok_upper
), 'A', 'Z');
2387 if ((ctype
->class_done
& BITw (tok_lower
)) == 0)
2388 /* "If this keyword [lower] is not specified, the lowercase letters
2389 `a' through `z', ..., shall automatically belong to this class,
2390 with implementation defined character values." [P1003.2, 2.5.2.1] */
2391 set_default (BITPOS (tok_lower
), 'a', 'z');
2393 if ((ctype
->class_done
& BITw (tok_alpha
)) == 0)
2395 /* Table 2-6 in P1003.2 says that characters in class `upper' or
2396 class `lower' *must* be in class `alpha'. */
2397 unsigned long int mask
= BIT (tok_upper
) | BIT (tok_lower
);
2398 unsigned long int maskw
= BITw (tok_upper
) | BITw (tok_lower
);
2400 for (cnt
= 0; cnt
< 256; ++cnt
)
2401 if ((ctype
->class256_collection
[cnt
] & mask
) != 0)
2402 ctype
->class256_collection
[cnt
] |= BIT (tok_alpha
);
2404 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
2405 if ((ctype
->class_collection
[cnt
] & maskw
) != 0)
2406 ctype
->class_collection
[cnt
] |= BITw (tok_alpha
);
2409 if ((ctype
->class_done
& BITw (tok_digit
)) == 0)
2410 /* "If this keyword [digit] is not specified, the digits `0' through
2411 `9', ..., shall automatically belong to this class, with
2412 implementation-defined character values." [P1003.2, 2.5.2.1] */
2413 set_default (BITPOS (tok_digit
), '0', '9');
2415 /* "Only characters specified for the `alpha' and `digit' keyword
2416 shall be specified. Characters specified for the keyword `alpha'
2417 and `digit' are automatically included in this class. */
2419 unsigned long int mask
= BIT (tok_alpha
) | BIT (tok_digit
);
2420 unsigned long int maskw
= BITw (tok_alpha
) | BITw (tok_digit
);
2422 for (cnt
= 0; cnt
< 256; ++cnt
)
2423 if ((ctype
->class256_collection
[cnt
] & mask
) != 0)
2424 ctype
->class256_collection
[cnt
] |= BIT (tok_alnum
);
2426 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
2427 if ((ctype
->class_collection
[cnt
] & maskw
) != 0)
2428 ctype
->class_collection
[cnt
] |= BITw (tok_alnum
);
2431 if ((ctype
->class_done
& BITw (tok_space
)) == 0)
2432 /* "If this keyword [space] is not specified, the characters <space>,
2433 <form-feed>, <newline>, <carriage-return>, <tab>, and
2434 <vertical-tab>, ..., shall automatically belong to this class,
2435 with implementation-defined character values." [P1003.2, 2.5.2.1] */
2438 struct charseq
*seq
;
2440 value
= repertoire_find_value (repertoire
, "space", 5);
2441 if (value
== ILLEGAL_CHAR_VALUE
)
2445 %s: character `%s' not defined while needed as default value"),
2446 "LC_CTYPE", "<space>");
2449 ELEM (ctype
, class_collection
, , value
) |= BIT (tok_space
);
2451 seq
= charmap_find_value (charmap
, "space", 5);
2456 %s: character `%s' not defined while needed as default value"),
2457 "LC_CTYPE", "<space>");
2459 else if (seq
->nbytes
!= 1)
2461 %s: character `%s' in charmap not representable with one byte"),
2462 "LC_CTYPE", "<space>");
2464 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
2467 value
= repertoire_find_value (repertoire
, "form-feed", 9);
2468 if (value
== ILLEGAL_CHAR_VALUE
)
2472 %s: character `%s' not defined while needed as default value"),
2473 "LC_CTYPE", "<form-feed>");
2476 ELEM (ctype
, class_collection
, , value
) |= BIT (tok_space
);
2478 seq
= charmap_find_value (charmap
, "form-feed", 9);
2483 %s: character `%s' not defined while needed as default value"),
2484 "LC_CTYPE", "<form-feed>");
2486 else if (seq
->nbytes
!= 1)
2488 %s: character `%s' in charmap not representable with one byte"),
2489 "LC_CTYPE", "<form-feed>");
2491 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
2494 value
= repertoire_find_value (repertoire
, "newline", 7);
2495 if (value
== ILLEGAL_CHAR_VALUE
)
2499 %s: character `%s' not defined while needed as default value"),
2500 "LC_CTYPE", "<newline>");
2503 ELEM (ctype
, class_collection
, , value
) |= BIT (tok_space
);
2505 seq
= charmap_find_value (charmap
, "newline", 7);
2510 character `%s' not defined while needed as default value"),
2513 else if (seq
->nbytes
!= 1)
2515 %s: character `%s' in charmap not representable with one byte"),
2516 "LC_CTYPE", "<newline>");
2518 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
2521 value
= repertoire_find_value (repertoire
, "carriage-return", 15);
2522 if (value
== ILLEGAL_CHAR_VALUE
)
2526 %s: character `%s' not defined while needed as default value"),
2527 "LC_CTYPE", "<carriage-return>");
2530 ELEM (ctype
, class_collection
, , value
) |= BIT (tok_space
);
2532 seq
= charmap_find_value (charmap
, "carriage-return", 15);
2537 %s: character `%s' not defined while needed as default value"),
2538 "LC_CTYPE", "<carriage-return>");
2540 else if (seq
->nbytes
!= 1)
2542 %s: character `%s' in charmap not representable with one byte"),
2543 "LC_CTYPE", "<carriage-return>");
2545 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
2548 value
= repertoire_find_value (repertoire
, "tab", 3);
2549 if (value
== ILLEGAL_CHAR_VALUE
)
2553 %s: character `%s' not defined while needed as default value"),
2554 "LC_CTYPE", "<tab>");
2557 ELEM (ctype
, class_collection
, , value
) |= BIT (tok_space
);
2559 seq
= charmap_find_value (charmap
, "tab", 3);
2564 %s: character `%s' not defined while needed as default value"),
2565 "LC_CTYPE", "<tab>");
2567 else if (seq
->nbytes
!= 1)
2569 %s: character `%s' in charmap not representable with one byte"),
2570 "LC_CTYPE", "<tab>");
2572 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
2575 value
= repertoire_find_value (repertoire
, "vertical-tab", 12);
2576 if (value
== ILLEGAL_CHAR_VALUE
)
2580 %s: character `%s' not defined while needed as default value"),
2581 "LC_CTYPE", "<vertical-tab>");
2584 ELEM (ctype
, class_collection
, , value
) |= BIT (tok_space
);
2586 seq
= charmap_find_value (charmap
, "vertical-tab", 12);
2591 %s: character `%s' not defined while needed as default value"),
2592 "LC_CTYPE", "<vertical-tab>");
2594 else if (seq
->nbytes
!= 1)
2596 %s: character `%s' in charmap not representable with one byte"),
2597 "LC_CTYPE", "<vertical-tab>");
2599 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
2602 if ((ctype
->class_done
& BITw (tok_xdigit
)) == 0)
2603 /* "If this keyword is not specified, the digits `0' to `9', the
2604 uppercase letters `A' through `F', and the lowercase letters `a'
2605 through `f', ..., shell automatically belong to this class, with
2606 implementation defined character values." [P1003.2, 2.5.2.1] */
2608 set_default (BITPOS (tok_xdigit
), '0', '9');
2609 set_default (BITPOS (tok_xdigit
), 'A', 'F');
2610 set_default (BITPOS (tok_xdigit
), 'a', 'f');
2613 if ((ctype
->class_done
& BITw (tok_blank
)) == 0)
2614 /* "If this keyword [blank] is unspecified, the characters <space> and
2615 <tab> shall belong to this character class." [P1003.2, 2.5.2.1] */
2618 struct charseq
*seq
;
2620 value
= repertoire_find_value (repertoire
, "space", 5);
2621 if (value
== ILLEGAL_CHAR_VALUE
)
2625 %s: character `%s' not defined while needed as default value"),
2626 "LC_CTYPE", "<space>");
2629 ELEM (ctype
, class_collection
, , value
) |= BIT (tok_blank
);
2631 seq
= charmap_find_value (charmap
, "space", 5);
2636 %s: character `%s' not defined while needed as default value"),
2637 "LC_CTYPE", "<space>");
2639 else if (seq
->nbytes
!= 1)
2641 %s: character `%s' in charmap not representable with one byte"),
2642 "LC_CTYPE", "<space>");
2644 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_blank
);
2647 value
= repertoire_find_value (repertoire
, "tab", 3);
2648 if (value
== ILLEGAL_CHAR_VALUE
)
2652 %s: character `%s' not defined while needed as default value"),
2653 "LC_CTYPE", "<tab>");
2656 ELEM (ctype
, class_collection
, , value
) |= BIT (tok_blank
);
2658 seq
= charmap_find_value (charmap
, "tab", 3);
2663 %s: character `%s' not defined while needed as default value"),
2664 "LC_CTYPE", "<tab>");
2666 else if (seq
->nbytes
!= 1)
2668 %s: character `%s' in charmap not representable with one byte"),
2669 "LC_CTYPE", "<tab>");
2671 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_blank
);
2674 if ((ctype
->class_done
& BITw (tok_graph
)) == 0)
2675 /* "If this keyword [graph] is not specified, characters specified for
2676 the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
2677 shall belong to this character class." [P1003.2, 2.5.2.1] */
2679 unsigned long int mask
= BIT (tok_upper
) | BIT (tok_lower
) |
2680 BIT (tok_alpha
) | BIT (tok_digit
) | BIT (tok_xdigit
) | BIT (tok_punct
);
2683 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
2684 if ((ctype
->class_collection
[cnt
] & mask
) != 0)
2685 ctype
->class_collection
[cnt
] |= BIT (tok_graph
);
2687 for (cnt
= 0; cnt
< 256; ++cnt
)
2688 if ((ctype
->class256_collection
[cnt
] & mask
) != 0)
2689 ctype
->class256_collection
[cnt
] |= BIT (tok_graph
);
2692 if ((ctype
->class_done
& BITw (tok_print
)) == 0)
2693 /* "If this keyword [print] is not provided, characters specified for
2694 the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
2695 and the <space> character shall belong to this character class."
2696 [P1003.2, 2.5.2.1] */
2698 unsigned long int mask
= BIT (tok_upper
) | BIT (tok_lower
) |
2699 BIT (tok_alpha
) | BIT (tok_digit
) | BIT (tok_xdigit
) | BIT (tok_punct
);
2702 struct charseq
*seq
;
2704 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
2705 if ((ctype
->class_collection
[cnt
] & mask
) != 0)
2706 ctype
->class_collection
[cnt
] |= BIT (tok_print
);
2708 for (cnt
= 0; cnt
< 256; ++cnt
)
2709 if ((ctype
->class256_collection
[cnt
] & mask
) != 0)
2710 ctype
->class256_collection
[cnt
] |= BIT (tok_print
);
2713 space
= repertoire_find_value (repertoire
, "space", 5);
2714 if (space
== ILLEGAL_CHAR_VALUE
)
2718 %s: character `%s' not defined while needed as default value"),
2719 "LC_CTYPE", "<space>");
2722 ELEM (ctype
, class_collection
, , space
) |= BIT (tok_print
);
2724 seq
= charmap_find_value (charmap
, "space", 5);
2729 %s: character `%s' not defined while needed as default value"),
2730 "LC_CTYPE", "<space>");
2732 else if (seq
->nbytes
!= 1)
2734 %s: character `%s' in charmap not representable with one byte"),
2735 "LC_CTYPE", "<space>");
2737 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_print
);
2740 if (ctype
->tomap_done
[0] == 0)
2741 /* "If this keyword [toupper] is not specified, the lowercase letters
2742 `a' through `z', and their corresponding uppercase letters `A' to
2743 `Z', ..., shall automatically be included, with implementation-
2744 defined character values." [P1003.2, 2.5.2.1] */
2749 strcpy (tmp
, "<?>");
2751 for (ch
= 'a'; ch
<= 'z'; ++ch
)
2753 uint32_t value_from
, value_to
;
2754 struct charseq
*seq_from
, *seq_to
;
2758 value_from
= repertoire_find_value (repertoire
, &tmp
[1], 1);
2759 if (value_from
== ILLEGAL_CHAR_VALUE
)
2763 %s: character `%s' not defined while needed as default value"),
2768 /* This conversion is implementation defined. */
2769 tmp
[1] = (char) (ch
+ ('A' - 'a'));
2770 value_to
= repertoire_find_value (repertoire
, &tmp
[1], 1);
2771 if (value_to
== ILLEGAL_CHAR_VALUE
)
2775 %s: character `%s' not defined while needed as default value"),
2779 /* The index [0] is determined by the order of the
2780 `ctype_map_newP' calls in `ctype_startup'. */
2781 ELEM (ctype
, map_collection
, [0], value_from
) = value_to
;
2784 seq_from
= charmap_find_value (charmap
, &tmp
[1], 1);
2785 if (seq_from
== NULL
)
2789 %s: character `%s' not defined while needed as default value"),
2792 else if (seq_from
->nbytes
!= 1)
2796 %s: character `%s' needed as default value not representable with one byte"),
2801 /* This conversion is implementation defined. */
2802 tmp
[1] = (char) (ch
+ ('A' - 'a'));
2803 seq_to
= charmap_find_value (charmap
, &tmp
[1], 1);
2808 %s: character `%s' not defined while needed as default value"),
2811 else if (seq_to
->nbytes
!= 1)
2815 %s: character `%s' needed as default value not representable with one byte"),
2819 /* The index [0] is determined by the order of the
2820 `ctype_map_newP' calls in `ctype_startup'. */
2821 ctype
->map256_collection
[0][seq_from
->bytes
[0]]
2827 if (ctype
->tomap_done
[1] == 0)
2828 /* "If this keyword [tolower] is not specified, the mapping shall be
2829 the reverse mapping of the one specified to `toupper'." [P1003.2] */
2831 for (cnt
= 0; cnt
< ctype
->map_collection_act
[0]; ++cnt
)
2832 if (ctype
->map_collection
[0][cnt
] != 0)
2833 ELEM (ctype
, map_collection
, [1],
2834 ctype
->map_collection
[0][cnt
])
2835 = ctype
->charnames
[cnt
];
2837 for (cnt
= 0; cnt
< 256; ++cnt
)
2838 if (ctype
->map256_collection
[0][cnt
] != 0)
2839 ctype
->map_collection
[1][ctype
->map_collection
[0][cnt
]]
2840 = ctype
->charnames
[cnt
];
2843 if (ctype
->outdigits_act
== 0)
2845 for (cnt
= 0; cnt
< 10; ++cnt
)
2847 ctype
->mboutdigits
[cnt
] = charmap_find_symbol (charmap
,
2850 if (ctype
->mboutdigits
[cnt
] == NULL
)
2852 ctype
->mboutdigits
[cnt
] = charmap_find_symbol (charmap
,
2854 strlen (longnames
[cnt
]));
2856 if (ctype
->mboutdigits
[cnt
] == NULL
)
2858 /* Provide a replacement. */
2860 no output digits defined and none of the standard names in the charmap"));
2862 ctype
->mboutdigits
[cnt
] = obstack_alloc (&charmap
->mem_pool
,
2863 sizeof (struct charseq
) + 1);
2865 /* This is better than nothing. */
2866 ctype
->mboutdigits
[cnt
]->bytes
[0] = digits
[cnt
];
2867 ctype
->mboutdigits
[cnt
]->nbytes
= 1;
2871 ctype
->wcoutdigits
[cnt
] = repertoire_find_value (repertoire
,
2874 if (ctype
->wcoutdigits
[cnt
] == ILLEGAL_CHAR_VALUE
)
2876 ctype
->wcoutdigits
[cnt
] = repertoire_find_value (repertoire
,
2878 strlen (longnames
[cnt
]));
2880 if (ctype
->wcoutdigits
[cnt
] == ILLEGAL_CHAR_VALUE
)
2882 /* Provide a replacement. */
2884 no output digits defined and none of the standard names in the repertoire"));
2886 /* This is better than nothing. */
2887 ctype
->wcoutdigits
[cnt
] = (uint32_t) digits
[cnt
];
2892 ctype
->outdigits_act
= 10;
2898 allocate_arrays (struct locale_ctype_t
*ctype
, struct charmap_t
*charmap
,
2899 struct repertoire_t
*repertoire
)
2903 /* First we have to decide how we organize the arrays. It is easy
2904 for a one-byte character set. But multi-byte character set
2905 cannot be stored flat because the chars might be sparsely used.
2906 So we determine an optimal hashing function for the used
2909 We use a very trivial hashing function to store the sparse
2910 table. CH % TABSIZE is used as an index. To solve multiple hits
2911 we have N planes. This guarantees a fixed search time for a
2912 character [N / 2]. In the following code we determine the minimum
2913 value for TABSIZE * N, where TABSIZE >= 256. */
2914 size_t min_total
= UINT_MAX
;
2915 size_t act_size
= 256;
2919 Computing table size for character classes might take a while..."),
2922 while (act_size
< min_total
)
2924 size_t cnt
[act_size
];
2925 size_t act_planes
= 1;
2927 memset (cnt
, '\0', sizeof cnt
);
2929 for (idx
= 0; idx
< 256; ++idx
)
2932 for (idx
= 0; idx
< ctype
->charnames_act
; ++idx
)
2933 if (ctype
->charnames
[idx
] >= 256)
2935 size_t nr
= ctype
->charnames
[idx
] % act_size
;
2937 if (++cnt
[nr
] > act_planes
)
2939 act_planes
= cnt
[nr
];
2940 if (act_size
* act_planes
>= min_total
)
2945 if (act_size
* act_planes
< min_total
)
2947 min_total
= act_size
* act_planes
;
2948 ctype
->plane_size
= act_size
;
2949 ctype
->plane_cnt
= act_planes
;
2956 fputs (_(" done\n"), stderr
);
2959 ctype
->names
= (uint32_t *) xcalloc (ctype
->plane_size
2963 for (idx
= 1; idx
< 256; ++idx
)
2964 ctype
->names
[idx
] = idx
;
2966 /* Trick: change the 0th entry's name to 1 to mark the cell occupied. */
2967 ctype
->names
[0] = 1;
2969 for (idx
= 256; idx
< ctype
->charnames_act
; ++idx
)
2971 size_t nr
= (ctype
->charnames
[idx
] % ctype
->plane_size
);
2974 while (ctype
->names
[nr
+ depth
* ctype
->plane_size
])
2976 assert (depth
< ctype
->plane_cnt
);
2978 ctype
->names
[nr
+ depth
* ctype
->plane_size
] = ctype
->charnames
[idx
];
2980 /* Now for faster access remember the index in the NAMES_B array. */
2981 ctype
->charnames
[idx
] = nr
+ depth
* ctype
->plane_size
;
2983 ctype
->names
[0] = 0;
2986 /* You wonder about this amount of memory? This is only because some
2987 users do not manage to address the array with unsigned values or
2988 data types with range >= 256. '\200' would result in the array
2989 index -128. To help these poor people we duplicate the entries for
2990 128 up to 255 below the entry for \0. */
2991 ctype
->ctype_b
= (char_class_t
*) xcalloc (256 + 128,
2992 sizeof (char_class_t
));
2993 ctype
->ctype32_b
= (char_class32_t
*) xcalloc (ctype
->plane_size
2995 sizeof (char_class32_t
));
2997 /* This is the array accessed using the multibyte string elements. */
2998 for (idx
= 0; idx
< 256; ++idx
)
2999 ctype
->ctype_b
[128 + idx
] = ctype
->class256_collection
[idx
];
3001 /* Mirror first 127 entries. We must take care that entry -1 is not
3002 mirrored because EOF == -1. */
3003 for (idx
= 0; idx
< 127; ++idx
)
3004 ctype
->ctype_b
[idx
] = ctype
->ctype_b
[256 + idx
];
3006 /* The 32 bit array contains all characters. */
3007 for (idx
= 0; idx
< ctype
->class_collection_act
; ++idx
)
3008 ctype
->ctype32_b
[ctype
->charnames
[idx
]] = ctype
->class_collection
[idx
];
3010 /* Room for table of mappings. */
3011 ctype
->map
= (uint32_t **) xmalloc (ctype
->map_collection_nr
3012 * sizeof (uint32_t *));
3014 /* Fill in all mappings. */
3015 for (idx
= 0; idx
< ctype
->map_collection_nr
; ++idx
)
3019 /* Allocate table. */
3020 ctype
->map
[idx
] = (uint32_t *) xmalloc ((ctype
->plane_size
3021 * ctype
->plane_cnt
+ 128)
3022 * sizeof (uint32_t));
3024 /* Copy default value (identity mapping). */
3025 memcpy (&ctype
->map
[idx
][128], ctype
->names
,
3026 ctype
->plane_size
* ctype
->plane_cnt
* sizeof (uint32_t));
3028 /* Copy values from collection. */
3029 for (idx2
= 0; idx2
< 256; ++idx2
)
3030 ctype
->map
[idx
][128 + idx2
] = ctype
->map256_collection
[idx
][idx2
];
3032 /* Mirror first 127 entries. We must take care not to map entry
3033 -1 because EOF == -1. */
3034 for (idx2
= 0; idx2
< 127; ++idx2
)
3035 ctype
->map
[idx
][idx2
] = ctype
->map
[idx
][256 + idx2
];
3037 /* EOF must map to EOF. */
3038 ctype
->map
[idx
][127] = EOF
;
3040 /* The 32 bit map collection. */
3041 for (idx2
= 0; idx2
< ctype
->map_collection_act
[idx
]; ++idx2
)
3042 if (ctype
->map_collection
[idx
][idx2
] != 0)
3043 ctype
->map
[idx
][128 + ctype
->charnames
[idx2
]]
3044 = ctype
->map_collection
[idx
][idx2
];
3047 /* Extra array for class and map names. */
3048 ctype
->class_name_ptr
= (uint32_t *) xmalloc (ctype
->nr_charclass
3049 * sizeof (uint32_t));
3050 ctype
->map_name_ptr
= (uint32_t *) xmalloc (ctype
->map_collection_nr
3051 * sizeof (uint32_t));
3053 /* Array for width information. Because the expected width are very
3054 small we use only one single byte. This save space and we need
3055 not provide the information twice with both endianesses. */
3056 ctype
->width
= (unsigned char *) xmalloc (ctype
->plane_size
3057 * ctype
->plane_cnt
);
3058 /* Initialize with default width value. */
3059 memset (ctype
->width
, charmap
->width_default
,
3060 ctype
->plane_size
* ctype
->plane_cnt
);
3061 if (charmap
->width_rules
!= NULL
)
3065 for (cnt
= 0; cnt
< charmap
->nwidth_rules
; ++cnt
)
3067 unsigned char bytes
[charmap
->mb_cur_max
];
3068 int nbytes
= charmap
->width_rules
[cnt
].from
->nbytes
;
3070 /* We have the range of character for which the width is
3071 specified described using byte sequences of the multibyte
3072 charset. We have to convert this to UCS4 now. And we
3073 cannot simply convert the beginning and the end of the
3074 sequence, we have to iterate over the byte sequence and
3075 convert it for every single character. */
3076 memcpy (bytes
, charmap
->width_rules
[cnt
].from
->bytes
, nbytes
);
3078 while (nbytes
< charmap
->width_rules
[cnt
].to
->nbytes
3079 || memcmp (bytes
, charmap
->width_rules
[cnt
].to
->bytes
,
3082 /* Find the UCS value for `bytes'. */
3083 uint32_t wch
= repertoire_find_value (ctype
->repertoire
, bytes
,
3087 if (wch
!= ILLEGAL_CHAR_VALUE
)
3089 /* Store the value. */
3090 size_t nr
= idx
% ctype
->plane_size
;
3093 while (ctype
->names
[nr
+ depth
* ctype
->plane_size
] != nr
)
3095 assert (depth
< ctype
->plane_cnt
);
3097 ctype
->width
[nr
+ depth
* ctype
->plane_size
]
3098 = charmap
->width_rules
[cnt
].width
;
3101 /* "Increment" the bytes sequence. */
3103 while (inner
>= 0 && bytes
[inner
] == 0xff)
3108 /* We have to extend the byte sequence. */
3109 if (nbytes
>= charmap
->width_rules
[cnt
].to
->nbytes
)
3113 memset (&bytes
[1], 0, nbytes
);
3119 while (++inner
< nbytes
)
3126 /* Set MB_CUR_MAX. */
3127 ctype
->mb_cur_max
= charmap
->mb_cur_max
;
3129 /* We need the name of the currently used 8-bit character set to
3130 make correct conversion between this 8-bit representation and the
3131 ISO 10646 character set used internally for wide characters. */
3132 ctype
->codeset_name
= charmap
->code_set_name
;
3134 /* Now determine the table for the transliteration information.
3136 XXX It is not yet clear to me whether it is worth implementing a
3137 complicated algorithm which uses a hash table to locate the entries.
3138 For now I'll use a simple array which can be searching using binary
3140 if (ctype
->translit_copy_locale
!= NULL
)
3142 /* Fold in the transliteration information from the locale mentioned
3143 in the `include' statement. */
3144 struct locale_ctype_t
*here
= ctype
;
3148 struct localedef_t
*other
= find_locale (LC_CTYPE
,
3149 here
->translit_copy_locale
,
3150 repertoire
->name
, charmap
);
3155 %s: transliteration data from locale `%s' not available"),
3156 "LC_CTYPE", here
->translit_copy_locale
);
3160 here
= other
->categories
[LC_CTYPE
].ctype
;
3162 /* Enqueue the information if necessary. */
3163 if (here
->translit
!= NULL
)
3165 struct translit_t
*endp
= here
->translit
;
3166 while (endp
->next
!= NULL
)
3169 endp
->next
= ctype
->translit
;
3170 ctype
->translit
= here
->translit
;
3173 while (here
->translit_copy_locale
!= NULL
);
3176 if (ctype
->translit
!= NULL
)
3178 /* First count how many entries we have. This is the upper limit
3179 since some entries from the included files might be overwritten. */
3182 struct translit_t
*runp
= ctype
->translit
;
3183 struct translit_t
**sorted
;
3184 size_t from_len
, to_len
;
3186 while (runp
!= NULL
)
3192 /* Next we allocate an array large enough and fill in the values. */
3193 sorted
= (struct translit_t
**) alloca (number
3194 * sizeof (struct translit_t
**));
3195 runp
= ctype
->translit
;
3199 /* Search for the place where to insert this string.
3200 XXX Better use a real sorting algorithm later. */
3204 while (idx
< number
)
3206 int res
= wcscmp ((const wchar_t *) sorted
[idx
]->from
,
3207 (const wchar_t *) runp
->from
);
3222 memmove (&sorted
[idx
+ 1], &sorted
[idx
],
3223 (number
- idx
) * sizeof (struct translit_t
*));
3230 while (runp
!= NULL
);
3232 /* The next step is putting all the possible transliteration
3233 strings in one memory block so that we can write it out.
3234 We need several different blocks:
3235 - index to the tfromstring array
3237 - index to the to-string array
3239 And this all must be available for both endianes variants.
3241 from_len
= to_len
= 0;
3242 for (cnt
= 0; cnt
< number
; ++cnt
)
3244 struct translit_to_t
*srunp
;
3245 from_len
+= wcslen ((const wchar_t *) sorted
[cnt
]->from
) + 1;
3246 srunp
= sorted
[cnt
]->to
;
3247 while (srunp
!= NULL
)
3249 to_len
+= wcslen ((const wchar_t *) srunp
->str
) + 1;
3250 srunp
= srunp
->next
;
3252 /* Plus one for the extra NUL character marking the end of
3253 the list for the current entry. */
3257 /* We can allocate the arrays for the results. */
3258 ctype
->translit_from_idx
= xmalloc (number
* sizeof (uint32_t));
3259 ctype
->translit_from_tbl
= xmalloc (from_len
* sizeof (uint32_t));
3260 ctype
->translit_to_idx
= xmalloc (number
* sizeof (uint32_t));
3261 ctype
->translit_to_tbl
= xmalloc (to_len
* sizeof (uint32_t));
3265 for (cnt
= 0; cnt
< number
; ++cnt
)
3268 struct translit_to_t
*srunp
;
3270 ctype
->translit_from_idx
[cnt
] = from_len
;
3271 ctype
->translit_to_idx
[cnt
] = to_len
;
3273 len
= wcslen ((const wchar_t *) sorted
[cnt
]->from
) + 1;
3274 wmemcpy ((wchar_t *) &ctype
->translit_from_tbl
[from_len
],
3275 (const wchar_t *) sorted
[cnt
]->from
, len
);
3278 ctype
->translit_to_idx
[cnt
] = to_len
;
3279 srunp
= sorted
[cnt
]->to
;
3280 while (srunp
!= NULL
)
3282 len
= wcslen ((const wchar_t *) srunp
->str
) + 1;
3283 wmemcpy ((wchar_t *) &ctype
->translit_to_tbl
[to_len
],
3284 (const wchar_t *) srunp
->str
, len
);
3286 srunp
= srunp
->next
;
3288 ctype
->translit_to_tbl
[to_len
++] = L
'\0';
3291 /* Store the information about the length. */
3292 ctype
->translit_idx_size
= number
* sizeof (uint32_t);
3293 ctype
->translit_from_tbl_size
= from_len
* sizeof (uint32_t);
3294 ctype
->translit_to_tbl_size
= to_len
* sizeof (uint32_t);
3298 /* Provide some dummy pointers since we have nothing to write out. */
3299 static uint32_t no_str
= { 0 };
3301 ctype
->translit_from_idx
= &no_str
;
3302 ctype
->translit_from_tbl
= &no_str
;
3303 ctype
->translit_to_tbl
= &no_str
;
3304 ctype
->translit_idx_size
= 0;
3305 ctype
->translit_from_tbl_size
= 0;
3306 ctype
->translit_to_tbl_size
= 0;