1 /* Copyright (C) 1995-2023 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published
6 by the Free Software Foundation; version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, see <https://www.gnu.org/licenses/>. */
25 #include <sys/param.h>
26 #include <array_length.h>
28 #include "localedef.h"
30 #include "localeinfo.h"
31 #include "linereader.h"
33 #include "elem-hash.h"
35 /* Uncomment the following line in the production version. */
36 /* #define NDEBUG 1 */
39 #define obstack_chunk_alloc malloc
40 #define obstack_chunk_free free
43 __attribute ((always_inline
))
44 obstack_int32_grow (struct obstack
*obstack
, int32_t data
)
46 assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack
)));
47 data
= maybe_swap_uint32 (data
);
48 if (sizeof (int32_t) == sizeof (int))
49 obstack_int_grow (obstack
, data
);
51 obstack_grow (obstack
, &data
, sizeof (int32_t));
55 __attribute ((always_inline
))
56 obstack_int32_grow_fast (struct obstack
*obstack
, int32_t data
)
58 assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack
)));
59 data
= maybe_swap_uint32 (data
);
60 if (sizeof (int32_t) == sizeof (int))
61 obstack_int_grow_fast (obstack
, data
);
63 obstack_grow (obstack
, &data
, sizeof (int32_t));
66 /* Forward declaration. */
69 /* Data type for list of strings. */
72 /* Successor in the known_sections list. */
73 struct section_list
*def_next
;
74 /* Successor in the sections list. */
75 struct section_list
*next
;
76 /* Name of the section. */
78 /* First element of this section. */
79 struct element_t
*first
;
80 /* Last element of this section. */
81 struct element_t
*last
;
82 /* These are the rules for this section. */
83 enum coll_sort_rule
*rules
;
84 /* Index of the rule set in the appropriate section of the output file. */
92 /* Number of elements. */
98 /* Data type for collating element. */
110 /* The following is a bit mask which bits are set if this element is
111 used in the appropriate level. Interesting for the singlebyte
114 XXX The type here restricts the number of levels to 32. It could
115 be changed if necessary but I doubt this is necessary. */
116 unsigned int used_in_level
;
118 struct element_list_t
*weights
;
120 /* Nonzero if this is a real character definition. */
123 /* Order of the character in the sequence. This information will
124 be used in range expressions. */
128 /* Where does the definition come from. */
132 /* Which section does this belong to. */
133 struct section_list
*section
;
135 /* Predecessor and successor in the order list. */
136 struct element_t
*last
;
137 struct element_t
*next
;
139 /* Next element in multibyte output list. */
140 struct element_t
*mbnext
;
141 struct element_t
*mblast
;
143 /* Next element in wide character output list. */
144 struct element_t
*wcnext
;
145 struct element_t
*wclast
;
148 /* Special element value. */
149 #define ELEMENT_ELLIPSIS2 ((struct element_t *) 1)
150 #define ELEMENT_ELLIPSIS3 ((struct element_t *) 2)
151 #define ELEMENT_ELLIPSIS4 ((struct element_t *) 3)
153 /* Data type for collating symbol. */
158 /* Point to place in the order list. */
159 struct element_t
*order
;
161 /* Where does the definition come from. */
166 /* Sparse table of struct element_t *. */
167 #define TABLE wchead_table
168 #define ELEMENT struct element_t *
171 #define NO_ADD_LOCALE
174 /* Sparse table of int32_t. */
175 #define TABLE collidx_table
176 #define ELEMENT int32_t
180 /* Sparse table of uint32_t. */
181 #define TABLE collseq_table
182 #define ELEMENT uint32_t
183 #define DEFAULT ~((uint32_t) 0)
187 /* Simple name list for the preprocessor. */
190 struct name_list
*next
;
195 /* The real definition of the struct for the LC_COLLATE locale. */
196 struct locale_collate_t
198 /* Does the locale use code points to compare the encoding? */
199 bool codepoint_collation
;
204 /* List of known scripts. */
205 struct section_list
*known_sections
;
206 /* List of used sections. */
207 struct section_list
*sections
;
208 /* Current section using definition. */
209 struct section_list
*current_section
;
210 /* There always can be an unnamed section. */
211 struct section_list unnamed_section
;
212 /* Flag whether the unnamed section has been defined. */
213 bool unnamed_section_defined
;
214 /* To make handling of errors easier we have another section. */
215 struct section_list error_section
;
216 /* Sometimes we are defining the values for collating symbols before
217 the first actual section. */
218 struct section_list symbol_section
;
220 /* Start of the order list. */
221 struct element_t
*start
;
223 /* The undefined element. */
224 struct element_t undefined
;
226 /* This is the cursor for `reorder_after' insertions. */
227 struct element_t
*cursor
;
229 /* This value is used when handling ellipsis. */
230 struct element_t ellipsis_weight
;
232 /* Known collating elements. */
233 hash_table elem_table
;
235 /* Known collating symbols. */
236 hash_table sym_table
;
238 /* Known collation sequences. */
239 hash_table seq_table
;
241 struct obstack mempool
;
243 /* The LC_COLLATE category is a bit special as it is sometimes possible
244 that the definitions from more than one input file contains information.
245 Therefore we keep all relevant input in a list. */
246 struct locale_collate_t
*next
;
248 /* Arrays with heads of the list for each of the leading bytes in
249 the multibyte sequences. */
250 struct element_t
*mbheads
[256];
252 /* Arrays with heads of the list for each of the leading bytes in
253 the multibyte sequences. */
254 struct wchead_table wcheads
;
256 /* The arrays with the collation sequence order. */
257 unsigned char mbseqorder
[256];
258 struct collseq_table wcseqorder
;
260 /* State of the preprocessor. */
271 /* We have a few global variables which are used for reading all
272 LC_COLLATE category descriptions in all files. */
273 static uint32_t nrules
;
275 /* List of defined preprocessor symbols. */
276 static struct name_list
*defined
;
279 /* We need UTF-8 encoding of numbers. */
281 __attribute ((always_inline
))
282 utf8_encode (char *buf
, int val
)
295 for (step
= 2; step
< 6; ++step
)
296 if ((val
& (~(uint32_t)0 << (5 * step
+ 1))) == 0)
300 *buf
= (unsigned char) (~0xff >> step
);
304 buf
[step
] = 0x80 | (val
& 0x3f);
315 static struct section_list
*
316 make_seclist_elem (struct locale_collate_t
*collate
, const char *string
,
317 struct section_list
*next
)
319 struct section_list
*newp
;
321 newp
= (struct section_list
*) obstack_alloc (&collate
->mempool
,
332 static struct element_t
*
333 new_element (struct locale_collate_t
*collate
, const char *mbs
, size_t mbslen
,
334 const uint32_t *wcs
, const char *name
, size_t namelen
,
337 struct element_t
*newp
;
339 newp
= (struct element_t
*) obstack_alloc (&collate
->mempool
,
341 newp
->name
= name
== NULL
? NULL
: obstack_copy0 (&collate
->mempool
,
345 newp
->mbs
= obstack_copy0 (&collate
->mempool
, mbs
, mbslen
);
355 size_t nwcs
= wcslen ((wchar_t *) wcs
);
357 /* Handle <U0000> as a single character. */
360 obstack_grow (&collate
->mempool
, wcs
, nwcs
* sizeof (uint32_t));
361 obstack_grow (&collate
->mempool
, &zero
, sizeof (uint32_t));
362 newp
->wcs
= (uint32_t *) obstack_finish (&collate
->mempool
);
370 newp
->mborder
= NULL
;
372 newp
->used_in_level
= 0;
373 newp
->is_character
= is_character
;
375 /* Will be assigned later. XXX */
376 newp
->mbseqorder
= 0;
377 newp
->wcseqorder
= 0;
379 /* Will be allocated later. */
380 newp
->weights
= NULL
;
385 newp
->section
= collate
->current_section
;
400 static struct symbol_t
*
401 new_symbol (struct locale_collate_t
*collate
, const char *name
, size_t len
)
403 struct symbol_t
*newp
;
405 newp
= (struct symbol_t
*) obstack_alloc (&collate
->mempool
, sizeof (*newp
));
407 newp
->name
= obstack_copy0 (&collate
->mempool
, name
, len
);
417 /* Test whether this name is already defined somewhere. */
419 check_duplicate (struct linereader
*ldfile
, struct locale_collate_t
*collate
,
420 const struct charmap_t
*charmap
,
421 struct repertoire_t
*repertoire
, const char *symbol
,
426 if (find_entry (&charmap
->char_table
, symbol
, symbol_len
, &ignore
) == 0)
428 lr_error (ldfile
, _("`%.*s' already defined in charmap"),
429 (int) symbol_len
, symbol
);
433 if (repertoire
!= NULL
434 && (find_entry (&repertoire
->char_table
, symbol
, symbol_len
, &ignore
)
437 lr_error (ldfile
, _("`%.*s' already defined in repertoire"),
438 (int) symbol_len
, symbol
);
442 if (find_entry (&collate
->sym_table
, symbol
, symbol_len
, &ignore
) == 0)
444 lr_error (ldfile
, _("`%.*s' already defined as collating symbol"),
445 (int) symbol_len
, symbol
);
449 if (find_entry (&collate
->elem_table
, symbol
, symbol_len
, &ignore
) == 0)
451 lr_error (ldfile
, _("`%.*s' already defined as collating element"),
452 (int) symbol_len
, symbol
);
460 /* Read the direction specification. */
462 read_directions (struct linereader
*ldfile
, struct token
*arg
,
463 const struct charmap_t
*charmap
,
464 struct repertoire_t
*repertoire
, struct localedef_t
*result
)
467 int max
= nrules
?: 10;
468 enum coll_sort_rule
*rules
= calloc (max
, sizeof (*rules
));
470 struct locale_collate_t
*collate
= result
->categories
[LC_COLLATE
].collate
;
476 if (arg
->tok
== tok_forward
)
478 if (rules
[cnt
] & sort_backward
)
482 lr_error (ldfile
, _("\
483 %s: `forward' and `backward' are mutually excluding each other"),
488 else if (rules
[cnt
] & sort_forward
)
492 lr_error (ldfile
, _("\
493 %s: `%s' mentioned more than once in definition of weight %d"),
494 "LC_COLLATE", "forward", cnt
+ 1);
498 rules
[cnt
] |= sort_forward
;
502 else if (arg
->tok
== tok_backward
)
504 if (rules
[cnt
] & sort_forward
)
508 lr_error (ldfile
, _("\
509 %s: `forward' and `backward' are mutually excluding each other"),
514 else if (rules
[cnt
] & sort_backward
)
518 lr_error (ldfile
, _("\
519 %s: `%s' mentioned more than once in definition of weight %d"),
520 "LC_COLLATE", "backward", cnt
+ 1);
524 rules
[cnt
] |= sort_backward
;
528 else if (arg
->tok
== tok_position
)
530 if (rules
[cnt
] & sort_position
)
534 lr_error (ldfile
, _("\
535 %s: `%s' mentioned more than once in definition of weight %d"),
536 "LC_COLLATE", "position", cnt
+ 1);
540 rules
[cnt
] |= sort_position
;
546 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
548 if (arg
->tok
== tok_eof
|| arg
->tok
== tok_eol
|| arg
->tok
== tok_comma
549 || arg
->tok
== tok_semicolon
)
551 if (! valid
&& ! warned
)
553 lr_error (ldfile
, _("%s: syntax error"), "LC_COLLATE");
557 /* See whether we have to increment the counter. */
558 if (arg
->tok
!= tok_comma
&& rules
[cnt
] != 0)
560 /* Add the default `forward' if we have seen only `position'. */
561 if (rules
[cnt
] == sort_position
)
562 rules
[cnt
] = sort_position
| sort_forward
;
567 if (arg
->tok
== tok_eof
|| arg
->tok
== tok_eol
)
568 /* End of line or file, so we exit the loop. */
573 /* See whether we have enough room in the array. */
577 rules
= (enum coll_sort_rule
*) xrealloc (rules
,
580 memset (&rules
[cnt
], '\0', (max
- cnt
) * sizeof (*rules
));
587 /* There must not be any more rule. */
590 lr_error (ldfile
, _("\
591 %s: too many rules; first entry only had %d"),
592 "LC_COLLATE", nrules
);
596 lr_ignore_rest (ldfile
, 0);
605 lr_error (ldfile
, _("%s: syntax error"), "LC_COLLATE");
610 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
615 /* Now we know how many rules we have. */
617 rules
= (enum coll_sort_rule
*) xrealloc (rules
,
618 nrules
* sizeof (*rules
));
624 /* Not enough rules in this specification. */
626 lr_error (ldfile
, _("%s: not enough sorting rules"), "LC_COLLATE");
629 rules
[cnt
] = sort_forward
;
630 while (++cnt
< nrules
);
634 collate
->current_section
->rules
= rules
;
638 static struct element_t
*
639 find_element (struct linereader
*ldfile
, struct locale_collate_t
*collate
,
640 const char *str
, size_t len
)
644 /* Search for the entries among the collation sequences already define. */
645 if (find_entry (&collate
->seq_table
, str
, len
, &result
) != 0)
647 /* Nope, not define yet. So we see whether it is a
651 if (find_entry (&collate
->sym_table
, str
, len
, &ptr
) == 0)
653 /* It's a collation symbol. */
654 struct symbol_t
*sym
= (struct symbol_t
*) ptr
;
658 result
= sym
->order
= new_element (collate
, NULL
, 0, NULL
,
661 else if (find_entry (&collate
->elem_table
, str
, len
, &result
) != 0)
663 /* It's also no collation element. So it is a character
664 element defined later. */
665 result
= new_element (collate
, NULL
, 0, NULL
, str
, len
, 1);
666 /* Insert it into the sequence table. */
667 insert_entry (&collate
->seq_table
, str
, len
, result
);
671 return (struct element_t
*) result
;
676 unlink_element (struct locale_collate_t
*collate
)
678 if (collate
->cursor
== collate
->start
)
680 assert (collate
->cursor
->next
== NULL
);
681 assert (collate
->cursor
->last
== NULL
);
682 collate
->cursor
= NULL
;
686 if (collate
->cursor
->next
!= NULL
)
687 collate
->cursor
->next
->last
= collate
->cursor
->last
;
688 if (collate
->cursor
->last
!= NULL
)
689 collate
->cursor
->last
->next
= collate
->cursor
->next
;
690 collate
->cursor
= collate
->cursor
->last
;
696 insert_weights (struct linereader
*ldfile
, struct element_t
*elem
,
697 const struct charmap_t
*charmap
,
698 struct repertoire_t
*repertoire
, struct localedef_t
*result
,
699 enum token_t ellipsis
)
703 struct locale_collate_t
*collate
= result
->categories
[LC_COLLATE
].collate
;
705 /* Initialize all the fields. */
706 elem
->file
= ldfile
->fname
;
707 elem
->line
= ldfile
->lineno
;
709 elem
->last
= collate
->cursor
;
710 elem
->next
= collate
->cursor
? collate
->cursor
->next
: NULL
;
711 if (collate
->cursor
!= NULL
&& collate
->cursor
->next
!= NULL
)
712 collate
->cursor
->next
->last
= elem
;
713 if (collate
->cursor
!= NULL
)
714 collate
->cursor
->next
= elem
;
715 if (collate
->start
== NULL
)
717 assert (collate
->cursor
== NULL
);
718 collate
->start
= elem
;
721 elem
->section
= collate
->current_section
;
723 if (collate
->current_section
->first
== NULL
)
724 collate
->current_section
->first
= elem
;
725 if (collate
->current_section
->last
== collate
->cursor
)
726 collate
->current_section
->last
= elem
;
728 collate
->cursor
= elem
;
730 elem
->weights
= (struct element_list_t
*)
731 obstack_alloc (&collate
->mempool
, nrules
* sizeof (struct element_list_t
));
732 memset (elem
->weights
, '\0', nrules
* sizeof (struct element_list_t
));
736 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
739 if (arg
->tok
== tok_eof
|| arg
->tok
== tok_eol
)
742 if (arg
->tok
== tok_ignore
)
744 /* The weight for this level has to be ignored. We use the
745 null pointer to indicate this. */
746 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
747 obstack_alloc (&collate
->mempool
, sizeof (struct element_t
*));
748 elem
->weights
[weight_cnt
].w
[0] = NULL
;
749 elem
->weights
[weight_cnt
].cnt
= 1;
751 else if (arg
->tok
== tok_bsymbol
|| arg
->tok
== tok_ucs4
)
754 struct element_t
*val
;
758 if (arg
->tok
== tok_bsymbol
)
760 symstr
= arg
->val
.str
.startmb
;
761 symlen
= arg
->val
.str
.lenmb
;
765 snprintf (ucs4str
, sizeof (ucs4str
), "U%08X", arg
->val
.ucs4
);
770 val
= find_element (ldfile
, collate
, symstr
, symlen
);
774 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
775 obstack_alloc (&collate
->mempool
, sizeof (struct element_t
*));
776 elem
->weights
[weight_cnt
].w
[0] = val
;
777 elem
->weights
[weight_cnt
].cnt
= 1;
779 else if (arg
->tok
== tok_string
)
781 /* Split the string up in the individual characters and put
782 the element definitions in the list. */
783 const char *cp
= arg
->val
.str
.startmb
;
785 struct element_t
*charelem
;
786 struct element_t
**weights
= NULL
;
791 lr_error (ldfile
, _("%s: empty weight string not allowed"),
793 lr_ignore_rest (ldfile
, 0);
801 /* Ahh, it's a bsymbol or an UCS4 value. If it's
802 the latter we have to unify the name. */
803 const char *startp
= ++cp
;
808 if (*cp
== ldfile
->escape_char
)
811 /* It's a syntax error. */
817 if (cp
- startp
== 5 && startp
[0] == 'U'
818 && isxdigit (startp
[1]) && isxdigit (startp
[2])
819 && isxdigit (startp
[3]) && isxdigit (startp
[4]))
821 unsigned int ucs4
= strtoul (startp
+ 1, NULL
, 16);
824 newstr
= (char *) xmalloc (10);
825 snprintf (newstr
, 10, "U%08X", ucs4
);
833 charelem
= find_element (ldfile
, collate
, startp
, len
);
838 /* People really shouldn't use characters directly in
839 the string. Especially since it's not really clear
840 what this means. We interpret all characters in the
841 string as if that would be bsymbols. Otherwise we
842 would have to match back to bsymbols somehow and this
843 is normally not what people normally expect. */
844 charelem
= find_element (ldfile
, collate
, cp
++, 1);
847 if (charelem
== NULL
)
849 /* We ignore the rest of the line. */
850 lr_ignore_rest (ldfile
, 0);
854 /* Add the pointer. */
857 struct element_t
**newp
;
859 newp
= (struct element_t
**)
860 alloca (max
* sizeof (struct element_t
*));
861 memcpy (newp
, weights
, cnt
* sizeof (struct element_t
*));
864 weights
[cnt
++] = charelem
;
868 /* Now store the information. */
869 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
870 obstack_alloc (&collate
->mempool
,
871 cnt
* sizeof (struct element_t
*));
872 memcpy (elem
->weights
[weight_cnt
].w
, weights
,
873 cnt
* sizeof (struct element_t
*));
874 elem
->weights
[weight_cnt
].cnt
= cnt
;
876 /* We don't need the string anymore. */
877 free (arg
->val
.str
.startmb
);
879 else if (ellipsis
!= tok_none
880 && (arg
->tok
== tok_ellipsis2
881 || arg
->tok
== tok_ellipsis3
882 || arg
->tok
== tok_ellipsis4
))
884 /* It must be the same ellipsis as used in the initial column. */
885 if (arg
->tok
!= ellipsis
)
886 lr_error (ldfile
, _("\
887 %s: weights must use the same ellipsis symbol as the name"),
890 /* The weight for this level will depend on the element
891 iterating over the range. Put a placeholder. */
892 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
893 obstack_alloc (&collate
->mempool
, sizeof (struct element_t
*));
894 elem
->weights
[weight_cnt
].w
[0] = ELEMENT_ELLIPSIS2
;
895 elem
->weights
[weight_cnt
].cnt
= 1;
900 /* It's a syntax error. */
901 lr_error (ldfile
, _("%s: syntax error"), "LC_COLLATE");
902 lr_ignore_rest (ldfile
, 0);
906 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
907 /* This better should be the end of the line or a semicolon. */
908 if (arg
->tok
== tok_semicolon
)
909 /* OK, ignore this and read the next token. */
910 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
911 else if (arg
->tok
!= tok_eof
&& arg
->tok
!= tok_eol
)
913 /* It's a syntax error. */
914 lr_error (ldfile
, _("%s: syntax error"), "LC_COLLATE");
915 lr_ignore_rest (ldfile
, 0);
919 while (++weight_cnt
< nrules
);
921 if (weight_cnt
< nrules
)
923 /* This means the rest of the line uses the current element as
927 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
928 obstack_alloc (&collate
->mempool
, sizeof (struct element_t
*));
929 if (ellipsis
== tok_none
)
930 elem
->weights
[weight_cnt
].w
[0] = elem
;
932 elem
->weights
[weight_cnt
].w
[0] = ELEMENT_ELLIPSIS2
;
933 elem
->weights
[weight_cnt
].cnt
= 1;
935 while (++weight_cnt
< nrules
);
939 if (arg
->tok
== tok_ignore
|| arg
->tok
== tok_bsymbol
)
941 /* Too many rule values. */
942 lr_error (ldfile
, _("%s: too many values"), "LC_COLLATE");
943 lr_ignore_rest (ldfile
, 0);
946 lr_ignore_rest (ldfile
, arg
->tok
!= tok_eol
&& arg
->tok
!= tok_eof
);
952 insert_value (struct linereader
*ldfile
, const char *symstr
, size_t symlen
,
953 const struct charmap_t
*charmap
, struct repertoire_t
*repertoire
,
954 struct localedef_t
*result
)
956 /* First find out what kind of symbol this is. */
959 struct element_t
*elem
= NULL
;
960 struct locale_collate_t
*collate
= result
->categories
[LC_COLLATE
].collate
;
962 /* Try to find the character in the charmap. */
963 seq
= charmap_find_value (charmap
, symstr
, symlen
);
965 /* Determine the wide character. */
966 if (seq
== NULL
|| seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
968 wc
= repertoire_find_value (repertoire
, symstr
, symlen
);
975 if (wc
== ILLEGAL_CHAR_VALUE
&& seq
== NULL
)
977 /* It's no character, so look through the collation elements and
980 if (find_entry (&collate
->elem_table
, symstr
, symlen
, &ptr
) != 0)
983 struct symbol_t
*sym
= NULL
;
985 /* It's also collation element. Therefore it's either a
986 collating symbol or it's a character which is not
987 supported by the character set. In the later case we
988 simply create a dummy entry. */
989 if (find_entry (&collate
->sym_table
, symstr
, symlen
, &result
) == 0)
991 /* It's a collation symbol. */
992 sym
= (struct symbol_t
*) result
;
999 elem
= new_element (collate
, NULL
, 0, NULL
, symstr
, symlen
, 0);
1004 /* Enter a fake element in the sequence table. This
1005 won't cause anything in the output since there is
1006 no multibyte or wide character associated with
1008 insert_entry (&collate
->seq_table
, symstr
, symlen
, elem
);
1012 /* Copy the result back. */
1017 /* Otherwise the symbols stands for a character. */
1019 if (find_entry (&collate
->seq_table
, symstr
, symlen
, &ptr
) != 0)
1021 uint32_t wcs
[2] = { wc
, 0 };
1023 /* We have to allocate an entry. */
1024 elem
= new_element (collate
,
1025 seq
!= NULL
? (char *) seq
->bytes
: NULL
,
1026 seq
!= NULL
? seq
->nbytes
: 0,
1027 wc
== ILLEGAL_CHAR_VALUE
? NULL
: wcs
,
1030 /* And add it to the table. */
1031 if (insert_entry (&collate
->seq_table
, symstr
, symlen
, elem
) != 0)
1032 /* This cannot happen. */
1033 assert (! "Internal error");
1037 /* Copy the result back. */
1040 /* Maybe the character was used before the definition. In this case
1041 we have to insert the byte sequences now. */
1042 if (elem
->mbs
== NULL
&& seq
!= NULL
)
1044 elem
->mbs
= obstack_copy0 (&collate
->mempool
,
1045 seq
->bytes
, seq
->nbytes
);
1046 elem
->nmbs
= seq
->nbytes
;
1049 if (elem
->wcs
== NULL
&& wc
!= ILLEGAL_CHAR_VALUE
)
1051 uint32_t wcs
[2] = { wc
, 0 };
1053 elem
->wcs
= obstack_copy (&collate
->mempool
, wcs
, sizeof (wcs
));
1059 /* Test whether this element is not already in the list. */
1060 if (elem
->next
!= NULL
|| elem
== collate
->cursor
)
1062 lr_error (ldfile
, _("order for `%.*s' already defined at %s:%zu"),
1063 (int) symlen
, symstr
, elem
->file
, elem
->line
);
1064 lr_ignore_rest (ldfile
, 0);
1068 insert_weights (ldfile
, elem
, charmap
, repertoire
, result
, tok_none
);
1075 handle_ellipsis (struct linereader
*ldfile
, const char *symstr
, size_t symlen
,
1076 enum token_t ellipsis
, const struct charmap_t
*charmap
,
1077 struct repertoire_t
*repertoire
,
1078 struct localedef_t
*result
)
1080 struct element_t
*startp
;
1081 struct element_t
*endp
;
1082 struct locale_collate_t
*collate
= result
->categories
[LC_COLLATE
].collate
;
1084 /* Unlink the entry added for the ellipsis. */
1085 unlink_element (collate
);
1086 startp
= collate
->cursor
;
1088 /* Process and add the end-entry. */
1090 && insert_value (ldfile
, symstr
, symlen
, charmap
, repertoire
, result
))
1091 /* Something went wrong with inserting the to-value. This means
1092 we cannot process the ellipsis. */
1095 /* Reset the cursor. */
1096 collate
->cursor
= startp
;
1098 /* Now we have to handle many different situations:
1099 - we have to distinguish between the three different ellipsis forms
1100 - the is the ellipsis at the beginning, in the middle, or at the end.
1102 endp
= collate
->cursor
->next
;
1103 assert (symstr
== NULL
|| endp
!= NULL
);
1105 /* XXX The following is probably very wrong since also collating symbols
1106 can appear in ranges. But do we want/can refine the test for that? */
1108 /* Both, the start and the end symbol, must stand for characters. */
1109 if ((startp
!= NULL
&& (startp
->name
== NULL
|| ! startp
->is_character
))
1110 || (endp
!= NULL
&& (endp
->name
== NULL
|| ! endp
->is_character
)))
1112 lr_error (ldfile
, _("\
1113 %s: the start and the end symbol of a range must stand for characters"),
1119 if (ellipsis
== tok_ellipsis3
)
1121 /* One requirement we make here: the length of the byte
1122 sequences for the first and end character must be the same.
1123 This is mainly to prevent unwanted effects and this is often
1124 not what is wanted. */
1125 size_t len
= (startp
->mbs
!= NULL
? startp
->nmbs
1126 : (endp
->mbs
!= NULL
? endp
->nmbs
: 0));
1127 char mbcnt
[len
+ 1];
1128 char mbend
[len
+ 1];
1130 /* Well, this should be caught somewhere else already. Just to
1132 assert (startp
== NULL
|| startp
->wcs
== NULL
|| startp
->wcs
[1] == 0);
1133 assert (endp
== NULL
|| endp
->wcs
== NULL
|| endp
->wcs
[1] == 0);
1135 if (startp
!= NULL
&& endp
!= NULL
1136 && startp
->mbs
!= NULL
&& endp
->mbs
!= NULL
1137 && startp
->nmbs
!= endp
->nmbs
)
1139 lr_error (ldfile
, _("\
1140 %s: byte sequences of first and last character must have the same length"),
1145 /* Determine whether we have to generate multibyte sequences. */
1146 if ((startp
== NULL
|| startp
->mbs
!= NULL
)
1147 && (endp
== NULL
|| endp
->mbs
!= NULL
))
1152 /* Prepare the beginning byte sequence. This is either from the
1153 beginning byte sequence or it is all nulls if it was an
1154 initial ellipsis. */
1155 if (startp
== NULL
|| startp
->mbs
== NULL
)
1156 memset (mbcnt
, '\0', len
);
1159 memcpy (mbcnt
, startp
->mbs
, len
);
1161 /* And increment it so that the value is the first one we will
1163 for (cnt
= len
- 1; cnt
>= 0; --cnt
)
1164 if (++mbcnt
[cnt
] != '\0')
1169 /* And the end sequence. */
1170 if (endp
== NULL
|| endp
->mbs
== NULL
)
1171 memset (mbend
, '\0', len
);
1173 memcpy (mbend
, endp
->mbs
, len
);
1176 /* Test whether we have a correct range. */
1177 ret
= memcmp (mbcnt
, mbend
, len
);
1181 lr_error (ldfile
, _("%s: byte sequence of first character of \
1182 range is not lower than that of the last character"), "LC_COLLATE");
1186 /* Generate the byte sequences data. */
1189 struct charseq
*seq
;
1191 /* Quite a bit of work ahead. We have to find the character
1192 definition for the byte sequence and then determine the
1193 wide character belonging to it. */
1194 seq
= charmap_find_symbol (charmap
, mbcnt
, len
);
1197 struct element_t
*elem
;
1200 /* I don't think this can ever happen. */
1201 assert (seq
->name
!= NULL
);
1202 namelen
= strlen (seq
->name
);
1204 if (seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1205 seq
->ucs4
= repertoire_find_value (repertoire
, seq
->name
,
1208 /* Now we are ready to insert the new value in the
1209 sequence. Find out whether the element is
1212 if (find_entry (&collate
->seq_table
, seq
->name
, namelen
,
1215 uint32_t wcs
[2] = { seq
->ucs4
, 0 };
1217 /* We have to allocate an entry. */
1218 elem
= new_element (collate
, mbcnt
, len
,
1219 seq
->ucs4
== ILLEGAL_CHAR_VALUE
1220 ? NULL
: wcs
, seq
->name
,
1223 /* And add it to the table. */
1224 if (insert_entry (&collate
->seq_table
, seq
->name
,
1225 namelen
, elem
) != 0)
1226 /* This cannot happen. */
1227 assert (! "Internal error");
1230 /* Copy the result. */
1233 /* Test whether this element is not already in the list. */
1234 if (elem
->next
!= NULL
|| (collate
->cursor
!= NULL
1235 && elem
->next
== collate
->cursor
))
1237 lr_error (ldfile
, _("\
1238 order for `%.*s' already defined at %s:%zu"),
1239 (int) namelen
, seq
->name
,
1240 elem
->file
, elem
->line
);
1244 /* Enqueue the new element. */
1245 elem
->last
= collate
->cursor
;
1246 if (collate
->cursor
== NULL
)
1250 elem
->next
= collate
->cursor
->next
;
1251 elem
->last
->next
= elem
;
1252 if (elem
->next
!= NULL
)
1253 elem
->next
->last
= elem
;
1255 if (collate
->start
== NULL
)
1257 assert (collate
->cursor
== NULL
);
1258 collate
->start
= elem
;
1260 collate
->cursor
= elem
;
1262 /* Add the weight value. We take them from the
1263 `ellipsis_weights' member of `collate'. */
1264 elem
->weights
= (struct element_list_t
*)
1265 obstack_alloc (&collate
->mempool
,
1266 nrules
* sizeof (struct element_list_t
));
1267 for (cnt
= 0; cnt
< nrules
; ++cnt
)
1268 if (collate
->ellipsis_weight
.weights
[cnt
].cnt
== 1
1269 && (collate
->ellipsis_weight
.weights
[cnt
].w
[0]
1270 == ELEMENT_ELLIPSIS2
))
1272 elem
->weights
[cnt
].w
= (struct element_t
**)
1273 obstack_alloc (&collate
->mempool
,
1274 sizeof (struct element_t
*));
1275 elem
->weights
[cnt
].w
[0] = elem
;
1276 elem
->weights
[cnt
].cnt
= 1;
1280 /* Simply use the weight from `ellipsis_weight'. */
1281 elem
->weights
[cnt
].w
=
1282 collate
->ellipsis_weight
.weights
[cnt
].w
;
1283 elem
->weights
[cnt
].cnt
=
1284 collate
->ellipsis_weight
.weights
[cnt
].cnt
;
1288 /* Increment for the next round. */
1290 for (cnt
= len
- 1; cnt
>= 0; --cnt
)
1291 if (++mbcnt
[cnt
] != '\0')
1294 /* Find out whether this was all. */
1295 if (cnt
< 0 || memcmp (mbcnt
, mbend
, len
) >= 0)
1296 /* Yep, that's all. */
1303 /* For symbolic range we naturally must have a beginning and an
1304 end specified by the user. */
1306 lr_error (ldfile
, _("\
1307 %s: symbolic range ellipsis must not directly follow `order_start'"),
1309 else if (endp
== NULL
)
1310 lr_error (ldfile
, _("\
1311 %s: symbolic range ellipsis must not be directly followed by `order_end'"),
1315 /* Determine the range. To do so we have to determine the
1316 common prefix of the both names and then the numeric
1317 values of both ends. */
1318 size_t lenfrom
= strlen (startp
->name
);
1319 size_t lento
= strlen (endp
->name
);
1320 char buf
[lento
+ 1];
1325 int base
= ellipsis
== tok_ellipsis2
? 16 : 10;
1327 if (lenfrom
!= lento
)
1330 lr_error (ldfile
, _("\
1331 `%s' and `%.*s' are not valid names for symbolic range"),
1332 startp
->name
, (int) lento
, endp
->name
);
1336 while (startp
->name
[preflen
] == endp
->name
[preflen
])
1337 if (startp
->name
[preflen
] == '\0')
1338 /* Nothing to be done. The start and end point are identical
1339 and while inserting the end point we have already given
1340 the user an error message. */
1346 from
= strtol (startp
->name
+ preflen
, &cp
, base
);
1347 if ((from
== UINT_MAX
&& errno
== ERANGE
) || *cp
!= '\0')
1351 to
= strtol (endp
->name
+ preflen
, &cp
, base
);
1352 if ((to
== UINT_MAX
&& errno
== ERANGE
) || *cp
!= '\0')
1355 /* Copy the prefix. */
1356 memcpy (buf
, startp
->name
, preflen
);
1358 /* Loop over all values. */
1359 for (++from
; from
< to
; ++from
)
1361 struct element_t
*elem
= NULL
;
1362 struct charseq
*seq
;
1366 /* Generate the name. */
1367 sprintf (buf
+ preflen
, base
== 10 ? "%0*ld" : "%0*lX",
1368 (int) (lenfrom
- preflen
), from
);
1370 /* Look whether this name is already defined. */
1372 if (find_entry (&collate
->seq_table
, buf
, symlen
, &ptr
) == 0)
1374 /* Copy back the result. */
1377 if (elem
->next
!= NULL
|| (collate
->cursor
!= NULL
1378 && elem
->next
== collate
->cursor
))
1380 lr_error (ldfile
, _("\
1381 %s: order for `%.*s' already defined at %s:%zu"),
1382 "LC_COLLATE", (int) lenfrom
, buf
,
1383 elem
->file
, elem
->line
);
1387 if (elem
->name
== NULL
)
1389 lr_error (ldfile
, _("%s: `%s' must be a character"),
1395 if (elem
== NULL
|| (elem
->mbs
== NULL
&& elem
->wcs
== NULL
))
1397 /* Search for a character of this name. */
1398 seq
= charmap_find_value (charmap
, buf
, lenfrom
);
1399 if (seq
== NULL
|| seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1401 wc
= repertoire_find_value (repertoire
, buf
, lenfrom
);
1409 if (wc
== ILLEGAL_CHAR_VALUE
&& seq
== NULL
)
1410 /* We don't know anything about a character with this
1411 name. XXX Should we warn? */
1416 uint32_t wcs
[2] = { wc
, 0 };
1418 /* We have to allocate an entry. */
1419 elem
= new_element (collate
,
1421 ? (char *) seq
->bytes
: NULL
,
1422 seq
!= NULL
? seq
->nbytes
: 0,
1423 wc
== ILLEGAL_CHAR_VALUE
1424 ? NULL
: wcs
, buf
, lenfrom
, 1);
1428 /* Update the element. */
1431 elem
->mbs
= obstack_copy0 (&collate
->mempool
,
1432 seq
->bytes
, seq
->nbytes
);
1433 elem
->nmbs
= seq
->nbytes
;
1436 if (wc
!= ILLEGAL_CHAR_VALUE
)
1440 obstack_grow (&collate
->mempool
,
1441 &wc
, sizeof (uint32_t));
1442 obstack_grow (&collate
->mempool
,
1443 &zero
, sizeof (uint32_t));
1444 elem
->wcs
= obstack_finish (&collate
->mempool
);
1449 elem
->file
= ldfile
->fname
;
1450 elem
->line
= ldfile
->lineno
;
1451 elem
->section
= collate
->current_section
;
1454 /* Enqueue the new element. */
1455 elem
->last
= collate
->cursor
;
1456 elem
->next
= collate
->cursor
->next
;
1457 elem
->last
->next
= elem
;
1458 if (elem
->next
!= NULL
)
1459 elem
->next
->last
= elem
;
1460 collate
->cursor
= elem
;
1462 /* Now add the weights. They come from the `ellipsis_weights'
1463 member of `collate'. */
1464 elem
->weights
= (struct element_list_t
*)
1465 obstack_alloc (&collate
->mempool
,
1466 nrules
* sizeof (struct element_list_t
));
1467 for (cnt
= 0; cnt
< nrules
; ++cnt
)
1468 if (collate
->ellipsis_weight
.weights
[cnt
].cnt
== 1
1469 && (collate
->ellipsis_weight
.weights
[cnt
].w
[0]
1470 == ELEMENT_ELLIPSIS2
))
1472 elem
->weights
[cnt
].w
= (struct element_t
**)
1473 obstack_alloc (&collate
->mempool
,
1474 sizeof (struct element_t
*));
1475 elem
->weights
[cnt
].w
[0] = elem
;
1476 elem
->weights
[cnt
].cnt
= 1;
1480 /* Simly use the weight from `ellipsis_weight'. */
1481 elem
->weights
[cnt
].w
=
1482 collate
->ellipsis_weight
.weights
[cnt
].w
;
1483 elem
->weights
[cnt
].cnt
=
1484 collate
->ellipsis_weight
.weights
[cnt
].cnt
;
1489 /* Move the cursor to the last entry in the ellipsis.
1490 Subsequent operations need to start from the last entry. */
1491 collate
->cursor
= endp
;
1496 collate_startup (struct linereader
*ldfile
, struct localedef_t
*locale
,
1497 struct localedef_t
*copy_locale
, int ignore_content
)
1499 if (!ignore_content
&& locale
->categories
[LC_COLLATE
].collate
== NULL
)
1501 struct locale_collate_t
*collate
;
1503 if (copy_locale
== NULL
)
1505 collate
= locale
->categories
[LC_COLLATE
].collate
=
1506 (struct locale_collate_t
*)
1507 xcalloc (1, sizeof (struct locale_collate_t
));
1509 /* Init the various data structures. */
1510 init_hash (&collate
->elem_table
, 100);
1511 init_hash (&collate
->sym_table
, 100);
1512 init_hash (&collate
->seq_table
, 500);
1513 obstack_init (&collate
->mempool
);
1515 collate
->col_weight_max
= -1;
1516 collate
->codepoint_collation
= false;
1519 /* Reuse the copy_locale's data structures. */
1520 collate
= locale
->categories
[LC_COLLATE
].collate
=
1521 copy_locale
->categories
[LC_COLLATE
].collate
;
1524 ldfile
->translate_strings
= 0;
1525 ldfile
->return_widestr
= 0;
1530 collate_finish (struct localedef_t
*locale
, const struct charmap_t
*charmap
)
1532 /* Now is the time when we can assign the individual collation
1533 values for all the symbols. We have possibly different values
1534 for the wide- and the multibyte-character symbols. This is done
1535 since it might make a difference in the encoding if there is in
1536 some cases no multibyte-character but there are wide-characters.
1537 (The other way around it is not important since theencoded
1538 collation value in the wide-character case is 32 bits wide and
1539 therefore requires no encoding).
1541 The lowest collation value assigned is 2. Zero is reserved for
1542 the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm'
1543 functions and 1 is used to separate the individual passes for the
1546 We also have to construct is list with all the bytes/words which
1547 can come first in a sequence, followed by all the elements which
1548 also start with this byte/word. The order is reverse which has
1549 among others the important effect that longer strings are located
1550 first in the list. This is required for the output data since
1551 the algorithm used in `strcoll' etc depends on this.
1553 The multibyte case is easy. We simply sort into an array with
1555 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1560 struct element_t
*runp
;
1562 int need_undefined
= 0;
1563 struct section_list
*sect
;
1566 if (collate
== NULL
)
1568 /* No data, no check. Issue a warning. */
1569 record_warning (_("No definition for %s category found"),
1574 /* No data required. */
1575 if (collate
->codepoint_collation
)
1578 /* If this assertion is hit change the type in `element_t'. */
1579 assert (nrules
<= sizeof (runp
->used_in_level
) * 8);
1581 /* Make sure that the `position' rule is used either in all sections
1583 for (i
= 0; i
< nrules
; ++i
)
1584 for (sect
= collate
->sections
; sect
!= NULL
; sect
= sect
->next
)
1585 if (sect
!= collate
->current_section
1586 && sect
->rules
!= NULL
1587 && ((sect
->rules
[i
] & sort_position
)
1588 != (collate
->current_section
->rules
[i
] & sort_position
)))
1590 record_error (0, 0, _("\
1591 %s: `position' must be used for a specific level in all sections or none"),
1596 /* Find out which elements are used at which level. At the same
1597 time we find out whether we have any undefined symbols. */
1598 runp
= collate
->start
;
1599 while (runp
!= NULL
)
1601 if (runp
->mbs
!= NULL
)
1603 for (i
= 0; i
< nrules
; ++i
)
1607 for (j
= 0; j
< runp
->weights
[i
].cnt
; ++j
)
1608 /* A NULL pointer as the weight means IGNORE. */
1609 if (runp
->weights
[i
].w
[j
] != NULL
)
1611 if (runp
->weights
[i
].w
[j
]->weights
== NULL
)
1613 record_error_at_line (0, 0, runp
->file
, runp
->line
,
1614 _("symbol `%s' not defined"),
1615 runp
->weights
[i
].w
[j
]->name
);
1618 runp
->weights
[i
].w
[j
] = &collate
->undefined
;
1621 /* Set the bit for the level. */
1622 runp
->weights
[i
].w
[j
]->used_in_level
|= 1 << i
;
1627 /* Up to the next entry. */
1631 /* Walk through the list of defined sequences and assign weights. Also
1632 create the data structure which will allow generating the single byte
1633 character based tables.
1635 Since at each time only the weights for each of the rules are
1636 only compared to other weights for this rule it is possible to
1637 assign more compact weight values than simply counting all
1638 weights in sequence. We can assign weights from 3, one for each
1639 rule individually and only for those elements, which are actually
1642 Why is this important? It is not for the wide char table. But
1643 it is for the singlebyte output since here larger numbers have to
1644 be encoded to make it possible to emit the value as a byte
1646 for (i
= 0; i
< nrules
; ++i
)
1651 runp
= collate
->start
;
1652 while (runp
!= NULL
)
1654 /* Determine the order. */
1655 if (runp
->used_in_level
!= 0)
1657 runp
->mborder
= (int *) obstack_alloc (&collate
->mempool
,
1658 nrules
* sizeof (int));
1660 for (i
= 0; i
< nrules
; ++i
)
1661 if ((runp
->used_in_level
& (1 << i
)) != 0)
1662 runp
->mborder
[i
] = mbact
[i
]++;
1664 runp
->mborder
[i
] = 0;
1667 if (runp
->mbs
!= NULL
)
1669 struct element_t
**eptr
;
1670 struct element_t
*lastp
= NULL
;
1672 /* Find the point where to insert in the list. */
1673 eptr
= &collate
->mbheads
[((unsigned char *) runp
->mbs
)[0]];
1674 while (*eptr
!= NULL
)
1676 if ((*eptr
)->nmbs
< runp
->nmbs
)
1679 if ((*eptr
)->nmbs
== runp
->nmbs
)
1681 int c
= memcmp ((*eptr
)->mbs
, runp
->mbs
, runp
->nmbs
);
1685 /* This should not happen. It means that we have
1686 to symbols with the same byte sequence. It is
1687 of course an error. */
1688 record_error_at_line (0, 0, (*eptr
)->file
,
1691 symbol `%s' has the same encoding as"), (*eptr
)->name
);
1693 record_error_at_line (0, 0, runp
->file
, runp
->line
,
1694 _("symbol `%s'"), runp
->name
);
1698 /* Insert it here. */
1702 /* To the next entry. */
1704 eptr
= &(*eptr
)->mbnext
;
1707 /* Set the pointers. */
1708 runp
->mbnext
= *eptr
;
1709 runp
->mblast
= lastp
;
1711 (*eptr
)->mblast
= runp
;
1717 if (runp
->used_in_level
)
1718 runp
->wcorder
= wcact
++;
1720 if (runp
->is_character
)
1722 if (runp
->nmbs
== 1)
1723 collate
->mbseqorder
[((unsigned char *) runp
->mbs
)[0]] = mbseqact
++;
1725 runp
->wcseqorder
= wcseqact
++;
1727 else if (runp
->mbs
!= NULL
&& runp
->weights
!= NULL
)
1728 /* This is for collation elements. */
1729 runp
->wcseqorder
= wcseqact
++;
1731 /* Up to the next entry. */
1735 /* Find out whether any of the `mbheads' entries is unset. In this
1736 case we use the UNDEFINED entry. */
1737 for (i
= 1; i
< 256; ++i
)
1738 if (collate
->mbheads
[i
] == NULL
)
1741 collate
->mbheads
[i
] = &collate
->undefined
;
1744 /* Now to the wide character case. */
1745 collate
->wcheads
.p
= 6;
1746 collate
->wcheads
.q
= 10;
1747 wchead_table_init (&collate
->wcheads
);
1749 collate
->wcseqorder
.p
= 6;
1750 collate
->wcseqorder
.q
= 10;
1751 collseq_table_init (&collate
->wcseqorder
);
1754 runp
= collate
->start
;
1755 while (runp
!= NULL
)
1757 if (runp
->wcs
!= NULL
)
1759 struct element_t
*e
;
1760 struct element_t
**eptr
;
1761 struct element_t
*lastp
;
1763 /* Insert the collation sequence value. */
1764 if (runp
->is_character
)
1765 collseq_table_add (&collate
->wcseqorder
, runp
->wcs
[0],
1768 /* Find the point where to insert in the list. */
1769 e
= wchead_table_get (&collate
->wcheads
, runp
->wcs
[0]);
1772 while (*eptr
!= NULL
)
1774 if ((*eptr
)->nwcs
< runp
->nwcs
)
1777 if ((*eptr
)->nwcs
== runp
->nwcs
)
1779 int c
= wmemcmp ((wchar_t *) (*eptr
)->wcs
,
1780 (wchar_t *) runp
->wcs
, runp
->nwcs
);
1784 /* This should not happen. It means that we have
1785 two symbols with the same byte sequence. It is
1786 of course an error. */
1787 record_error_at_line (0, 0, (*eptr
)->file
,
1790 symbol `%s' has the same encoding as"), (*eptr
)->name
);
1792 record_error_at_line (0, 0, runp
->file
, runp
->line
,
1793 _("symbol `%s'"), runp
->name
);
1797 /* Insert it here. */
1801 /* To the next entry. */
1803 eptr
= &(*eptr
)->wcnext
;
1806 /* Set the pointers. */
1807 runp
->wcnext
= *eptr
;
1808 runp
->wclast
= lastp
;
1810 (*eptr
)->wclast
= runp
;
1813 wchead_table_add (&collate
->wcheads
, runp
->wcs
[0], e
);
1818 /* Up to the next entry. */
1822 /* Now determine whether the UNDEFINED entry is needed and if yes,
1823 whether it was defined. */
1824 collate
->undefined
.used_in_level
= need_undefined
? ~0ul : 0;
1825 if (collate
->undefined
.file
== NULL
)
1829 /* This seems not to be enforced by recent standards. Don't
1830 emit an error, simply append UNDEFINED at the end. */
1831 collate
->undefined
.mborder
=
1832 (int *) obstack_alloc (&collate
->mempool
, nrules
* sizeof (int));
1834 for (i
= 0; i
< nrules
; ++i
)
1835 collate
->undefined
.mborder
[i
] = mbact
[i
]++;
1838 /* In any case we will need the definition for the wide character
1839 case. But we will not complain that it is missing since the
1840 specification strangely enough does not seem to account for
1842 collate
->undefined
.wcorder
= wcact
++;
1845 /* Finally, try to unify the rules for the sections. Whenever the rules
1846 for a section are the same as those for another section give the
1847 ruleset the same index. Since there are never many section we can
1848 use an O(n^2) algorithm here. */
1849 sect
= collate
->sections
;
1850 while (sect
!= NULL
&& sect
->rules
== NULL
)
1853 /* Bail out if we have no sections because of earlier errors. */
1856 record_error (EXIT_FAILURE
, 0, _("too many errors; giving up"));
1863 struct section_list
*osect
= collate
->sections
;
1865 while (osect
!= sect
)
1866 if (osect
->rules
!= NULL
1867 && memcmp (osect
->rules
, sect
->rules
,
1868 nrules
* sizeof (osect
->rules
[0])) == 0)
1871 osect
= osect
->next
;
1874 sect
->ruleidx
= ruleidx
++;
1876 sect
->ruleidx
= osect
->ruleidx
;
1881 while (sect
!= NULL
&& sect
->rules
== NULL
);
1883 while (sect
!= NULL
);
1884 /* We are currently not prepared for more than 128 rulesets. But this
1885 should never really be a problem. */
1886 assert (ruleidx
<= 128);
1891 output_weight (struct obstack
*pool
, struct locale_collate_t
*collate
,
1892 struct element_t
*elem
)
1897 /* Optimize the use of UNDEFINED. */
1898 if (elem
== &collate
->undefined
)
1899 /* The weights are already inserted. */
1902 /* This byte can start exactly one collation element and this is
1903 a single byte. We can directly give the index to the weights. */
1904 retval
= obstack_object_size (pool
);
1906 /* Construct the weight. */
1907 for (cnt
= 0; cnt
< nrules
; ++cnt
)
1909 char buf
[elem
->weights
[cnt
].cnt
* 7];
1913 for (i
= 0; i
< elem
->weights
[cnt
].cnt
; ++i
)
1914 /* Encode the weight value. We do nothing for IGNORE entries. */
1915 if (elem
->weights
[cnt
].w
[i
] != NULL
)
1916 len
+= utf8_encode (&buf
[len
],
1917 elem
->weights
[cnt
].w
[i
]->mborder
[cnt
]);
1919 /* And add the buffer content. */
1920 obstack_1grow (pool
, len
);
1921 obstack_grow (pool
, buf
, len
);
1924 return retval
| ((elem
->section
->ruleidx
& 0x7f) << 24);
1929 output_weightwc (struct obstack
*pool
, struct locale_collate_t
*collate
,
1930 struct element_t
*elem
)
1935 /* Optimize the use of UNDEFINED. */
1936 if (elem
== &collate
->undefined
)
1937 /* The weights are already inserted. */
1940 /* This byte can start exactly one collation element and this is
1941 a single byte. We can directly give the index to the weights. */
1942 retval
= obstack_object_size (pool
) / sizeof (int32_t);
1944 /* Construct the weight. */
1945 for (cnt
= 0; cnt
< nrules
; ++cnt
)
1947 int32_t buf
[elem
->weights
[cnt
].cnt
];
1951 for (i
= 0, j
= 0; i
< elem
->weights
[cnt
].cnt
; ++i
)
1952 if (elem
->weights
[cnt
].w
[i
] != NULL
)
1953 buf
[j
++] = elem
->weights
[cnt
].w
[i
]->wcorder
;
1955 /* And add the buffer content. */
1956 obstack_int32_grow (pool
, j
);
1958 obstack_grow (pool
, buf
, j
* sizeof (int32_t));
1959 maybe_swap_uint32_obstack (pool
, j
);
1962 return retval
| ((elem
->section
->ruleidx
& 0x7f) << 24);
1965 /* If localedef is every threaded, this would need to be __thread var. */
1968 struct obstack
*weightpool
;
1969 struct obstack
*extrapool
;
1970 struct obstack
*indpool
;
1971 struct locale_collate_t
*collate
;
1972 struct collidx_table
*tablewc
;
1975 static void add_to_tablewc (uint32_t ch
, struct element_t
*runp
);
1978 add_to_tablewc (uint32_t ch
, struct element_t
*runp
)
1980 if (runp
->wcnext
== NULL
&& runp
->nwcs
== 1)
1982 int32_t weigthidx
= output_weightwc (atwc
.weightpool
, atwc
.collate
,
1984 collidx_table_add (atwc
.tablewc
, ch
, weigthidx
);
1988 /* As for the singlebyte table, we recognize sequences and
1991 collidx_table_add (atwc
.tablewc
, ch
,
1992 -(obstack_object_size (atwc
.extrapool
)
1993 / sizeof (uint32_t)));
1997 /* Store the current index in the weight table. We know that
1998 the current position in the `extrapool' is aligned on a
2003 /* Find out wether this is a single entry or we have more than
2004 one consecutive entry. */
2005 if (runp
->wcnext
!= NULL
2006 && runp
->nwcs
== runp
->wcnext
->nwcs
2007 && wmemcmp ((wchar_t *) runp
->wcs
,
2008 (wchar_t *)runp
->wcnext
->wcs
,
2009 runp
->nwcs
- 1) == 0
2010 && (runp
->wcs
[runp
->nwcs
- 1]
2011 == runp
->wcnext
->wcs
[runp
->nwcs
- 1] + 1))
2014 struct element_t
*series_startp
= runp
;
2015 struct element_t
*curp
;
2017 /* Now add first the initial byte sequence. */
2018 added
= (1 + 1 + 2 * (runp
->nwcs
- 1)) * sizeof (int32_t);
2019 if (sizeof (int32_t) == sizeof (int))
2020 obstack_make_room (atwc
.extrapool
, added
);
2022 /* More than one consecutive entry. We mark this by having
2023 a negative index into the indirect table. */
2024 obstack_int32_grow_fast (atwc
.extrapool
,
2025 -(obstack_object_size (atwc
.indpool
)
2026 / sizeof (int32_t)));
2027 obstack_int32_grow_fast (atwc
.extrapool
, runp
->nwcs
- 1);
2030 runp
= runp
->wcnext
;
2031 while (runp
->wcnext
!= NULL
2032 && runp
->nwcs
== runp
->wcnext
->nwcs
2033 && wmemcmp ((wchar_t *) runp
->wcs
,
2034 (wchar_t *)runp
->wcnext
->wcs
,
2035 runp
->nwcs
- 1) == 0
2036 && (runp
->wcs
[runp
->nwcs
- 1]
2037 == runp
->wcnext
->wcs
[runp
->nwcs
- 1] + 1));
2039 /* Now walk backward from here to the beginning. */
2042 for (i
= 1; i
< runp
->nwcs
; ++i
)
2043 obstack_int32_grow_fast (atwc
.extrapool
, curp
->wcs
[i
]);
2045 /* Now find the end of the consecutive sequence and
2046 add all the indices in the indirect pool. */
2049 weightidx
= output_weightwc (atwc
.weightpool
, atwc
.collate
,
2051 obstack_int32_grow (atwc
.indpool
, weightidx
);
2053 curp
= curp
->wclast
;
2055 while (curp
!= series_startp
);
2057 /* Add the final weight. */
2058 weightidx
= output_weightwc (atwc
.weightpool
, atwc
.collate
,
2060 obstack_int32_grow (atwc
.indpool
, weightidx
);
2062 /* And add the end byte sequence. Without length this
2064 for (i
= 1; i
< curp
->nwcs
; ++i
)
2065 obstack_int32_grow (atwc
.extrapool
, curp
->wcs
[i
]);
2069 /* A single entry. Simply add the index and the length and
2070 string (except for the first character which is already
2074 /* Output the weight info. */
2075 weightidx
= output_weightwc (atwc
.weightpool
, atwc
.collate
,
2078 assert (runp
->nwcs
> 0);
2079 added
= (1 + 1 + runp
->nwcs
- 1) * sizeof (int32_t);
2080 if (sizeof (int) == sizeof (int32_t))
2081 obstack_make_room (atwc
.extrapool
, added
);
2083 obstack_int32_grow_fast (atwc
.extrapool
, weightidx
);
2084 obstack_int32_grow_fast (atwc
.extrapool
, runp
->nwcs
- 1);
2085 for (i
= 1; i
< runp
->nwcs
; ++i
)
2086 obstack_int32_grow_fast (atwc
.extrapool
, runp
->wcs
[i
]);
2090 runp
= runp
->wcnext
;
2092 while (runp
!= NULL
);
2096 /* Include the C locale identity tables for _NL_COLLATE_COLLSEQMB and
2097 _NL_COLLATE_COLLSEQWC. */
2098 #include "C-collate-seq.c"
2101 collate_output (struct localedef_t
*locale
, const struct charmap_t
*charmap
,
2102 const char *output_path
)
2104 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
2105 const size_t nelems
= _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE
);
2106 struct locale_file file
;
2108 int32_t tablemb
[256];
2109 struct obstack weightpool
;
2110 struct obstack extrapool
;
2111 struct obstack indirectpool
;
2112 struct section_list
*sect
;
2113 struct collidx_table tablewc
;
2115 uint32_t *elem_table
;
2117 struct element_t
*runp
;
2119 init_locale_data (&file
, nelems
);
2120 add_locale_uint32 (&file
, nrules
);
2122 /* If we have no LC_COLLATE data emit only the number of rules as zero. */
2123 if (collate
== NULL
|| collate
->codepoint_collation
)
2126 for (idx
= 1; idx
< nelems
; idx
++)
2128 /* The words have to be handled specially. */
2129 if (idx
== _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB
))
2130 add_locale_uint32 (&file
, 0);
2131 else if (idx
== _NL_ITEM_INDEX (_NL_COLLATE_CODESET
)
2133 /* A valid LC_COLLATE must have a code set name. */
2134 add_locale_string (&file
, charmap
->code_set_name
);
2135 else if (idx
== _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQMB
)
2137 add_locale_raw_data (&file
, collseqmb
, sizeof (collseqmb
));
2138 else if (idx
== _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQWC
)
2140 add_locale_uint32_array (&file
, collseqwc
,
2141 array_length (collseqwc
));
2143 add_locale_empty (&file
);
2145 write_locale_data (output_path
, LC_COLLATE
, "LC_COLLATE", &file
);
2149 obstack_init (&weightpool
);
2150 obstack_init (&extrapool
);
2151 obstack_init (&indirectpool
);
2153 /* Since we are using the sign of an integer to mark indirection the
2154 offsets in the arrays we are indirectly referring to must not be
2155 zero since -0 == 0. Therefore we add a bit of dummy content. */
2156 obstack_int32_grow (&extrapool
, 0);
2157 obstack_int32_grow (&indirectpool
, 0);
2159 /* Prepare the ruleset table. */
2160 for (sect
= collate
->sections
, i
= 0; sect
!= NULL
; sect
= sect
->next
)
2161 if (sect
->rules
!= NULL
&& sect
->ruleidx
== i
)
2165 obstack_make_room (&weightpool
, nrules
);
2167 for (j
= 0; j
< nrules
; ++j
)
2168 obstack_1grow_fast (&weightpool
, sect
->rules
[j
]);
2171 /* And align the output. */
2172 i
= (nrules
* i
) % LOCFILE_ALIGN
;
2175 obstack_1grow (&weightpool
, '\0');
2176 while (++i
< LOCFILE_ALIGN
);
2178 add_locale_raw_obstack (&file
, &weightpool
);
2180 /* Generate the 8-bit table. Walk through the lists of sequences
2181 starting with the same byte and add them one after the other to
2182 the table. In case we have more than one sequence starting with
2183 the same byte we have to use extra indirection.
2185 First add a record for the NUL byte. This entry will never be used
2186 so it does not matter. */
2189 /* Now insert the `UNDEFINED' value if it is used. Since this value
2190 will probably be used more than once it is good to store the
2191 weights only once. */
2192 if (collate
->undefined
.used_in_level
!= 0)
2193 output_weight (&weightpool
, collate
, &collate
->undefined
);
2195 for (ch
= 1; ch
< 256; ++ch
)
2196 if (collate
->mbheads
[ch
]->mbnext
== NULL
2197 && collate
->mbheads
[ch
]->nmbs
<= 1)
2199 tablemb
[ch
] = output_weight (&weightpool
, collate
,
2200 collate
->mbheads
[ch
]);
2204 /* The entries in the list are sorted by length and then
2205 alphabetically. This is the order in which we will add the
2206 elements to the collation table. This allows simply walking
2207 the table in sequence and stopping at the first matching
2208 entry. Since the longer sequences are coming first in the
2209 list they have the possibility to match first, just as it
2210 has to be. In the worst case we are walking to the end of
2211 the list where we put, if no singlebyte sequence is defined
2212 in the locale definition, the weights for UNDEFINED.
2214 To reduce the length of the search list we compress them a bit.
2215 This happens by collecting sequences of consecutive byte
2216 sequences in one entry (having and begin and end byte sequence)
2217 and add only one index into the weight table. We can find the
2218 consecutive entries since they are also consecutive in the list. */
2219 struct element_t
*runp
= collate
->mbheads
[ch
];
2220 struct element_t
*lastp
;
2222 assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool
)));
2224 tablemb
[ch
] = -obstack_object_size (&extrapool
);
2228 /* Store the current index in the weight table. We know that
2229 the current position in the `extrapool' is aligned on a
2234 /* Find out wether this is a single entry or we have more than
2235 one consecutive entry. */
2236 if (runp
->mbnext
!= NULL
2237 && runp
->nmbs
== runp
->mbnext
->nmbs
2238 && memcmp (runp
->mbs
, runp
->mbnext
->mbs
, runp
->nmbs
- 1) == 0
2239 && (runp
->mbs
[runp
->nmbs
- 1]
2240 == runp
->mbnext
->mbs
[runp
->nmbs
- 1] + 1))
2243 struct element_t
*series_startp
= runp
;
2244 struct element_t
*curp
;
2246 /* Compute how much space we will need. */
2247 added
= LOCFILE_ALIGN_UP (sizeof (int32_t) + 1
2248 + 2 * (runp
->nmbs
- 1));
2249 assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool
)));
2250 obstack_make_room (&extrapool
, added
);
2252 /* More than one consecutive entry. We mark this by having
2253 a negative index into the indirect table. */
2254 obstack_int32_grow_fast (&extrapool
,
2255 -(obstack_object_size (&indirectpool
)
2256 / sizeof (int32_t)));
2258 /* Now search first the end of the series. */
2260 runp
= runp
->mbnext
;
2261 while (runp
->mbnext
!= NULL
2262 && runp
->nmbs
== runp
->mbnext
->nmbs
2263 && memcmp (runp
->mbs
, runp
->mbnext
->mbs
,
2264 runp
->nmbs
- 1) == 0
2265 && (runp
->mbs
[runp
->nmbs
- 1]
2266 == runp
->mbnext
->mbs
[runp
->nmbs
- 1] + 1));
2268 /* Now walk backward from here to the beginning. */
2271 assert (runp
->nmbs
<= 256);
2272 obstack_1grow_fast (&extrapool
, curp
->nmbs
- 1);
2273 for (i
= 1; i
< curp
->nmbs
; ++i
)
2274 obstack_1grow_fast (&extrapool
, curp
->mbs
[i
]);
2276 /* Now find the end of the consecutive sequence and
2277 add all the indices in the indirect pool. */
2280 weightidx
= output_weight (&weightpool
, collate
, curp
);
2281 obstack_int32_grow (&indirectpool
, weightidx
);
2283 curp
= curp
->mblast
;
2285 while (curp
!= series_startp
);
2287 /* Add the final weight. */
2288 weightidx
= output_weight (&weightpool
, collate
, curp
);
2289 obstack_int32_grow (&indirectpool
, weightidx
);
2291 /* And add the end byte sequence. Without length this
2293 for (i
= 1; i
< curp
->nmbs
; ++i
)
2294 obstack_1grow_fast (&extrapool
, curp
->mbs
[i
]);
2298 /* A single entry. Simply add the index and the length and
2299 string (except for the first character which is already
2303 /* Output the weight info. */
2304 weightidx
= output_weight (&weightpool
, collate
, runp
);
2306 added
= LOCFILE_ALIGN_UP (sizeof (int32_t) + 1
2308 assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool
)));
2309 obstack_make_room (&extrapool
, added
);
2311 obstack_int32_grow_fast (&extrapool
, weightidx
);
2312 assert (runp
->nmbs
<= 256);
2313 obstack_1grow_fast (&extrapool
, runp
->nmbs
- 1);
2315 for (i
= 1; i
< runp
->nmbs
; ++i
)
2316 obstack_1grow_fast (&extrapool
, runp
->mbs
[i
]);
2319 /* Add alignment bytes if necessary. */
2320 while (!LOCFILE_ALIGNED_P (obstack_object_size (&extrapool
)))
2321 obstack_1grow_fast (&extrapool
, '\0');
2325 runp
= runp
->mbnext
;
2327 while (runp
!= NULL
);
2329 assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool
)));
2331 /* If the final entry in the list is not a single character we
2332 add an UNDEFINED entry here. */
2333 if (lastp
->nmbs
!= 1)
2335 int added
= LOCFILE_ALIGN_UP (sizeof (int32_t) + 1 + 1);
2336 obstack_make_room (&extrapool
, added
);
2338 obstack_int32_grow_fast (&extrapool
, 0);
2339 /* XXX What rule? We just pick the first. */
2340 obstack_1grow_fast (&extrapool
, 0);
2341 /* Length is zero. */
2342 obstack_1grow_fast (&extrapool
, 0);
2344 /* Add alignment bytes if necessary. */
2345 while (!LOCFILE_ALIGNED_P (obstack_object_size (&extrapool
)))
2346 obstack_1grow_fast (&extrapool
, '\0');
2350 /* Add padding to the tables if necessary. */
2351 while (!LOCFILE_ALIGNED_P (obstack_object_size (&weightpool
)))
2352 obstack_1grow (&weightpool
, 0);
2354 /* Now add the four tables. */
2355 add_locale_uint32_array (&file
, (const uint32_t *) tablemb
, 256);
2356 add_locale_raw_obstack (&file
, &weightpool
);
2357 add_locale_raw_obstack (&file
, &extrapool
);
2358 add_locale_raw_obstack (&file
, &indirectpool
);
2360 /* Now the same for the wide character table. We need to store some
2361 more information here. */
2362 add_locale_empty (&file
);
2363 add_locale_empty (&file
);
2364 add_locale_empty (&file
);
2366 /* Since we are using the sign of an integer to mark indirection the
2367 offsets in the arrays we are indirectly referring to must not be
2368 zero since -0 == 0. Therefore we add a bit of dummy content. */
2369 obstack_int32_grow (&extrapool
, 0);
2370 obstack_int32_grow (&indirectpool
, 0);
2372 /* Now insert the `UNDEFINED' value if it is used. Since this value
2373 will probably be used more than once it is good to store the
2374 weights only once. */
2375 if (output_weightwc (&weightpool
, collate
, &collate
->undefined
) != 0)
2378 /* Generate the table. Walk through the lists of sequences starting
2379 with the same wide character and add them one after the other to
2380 the table. In case we have more than one sequence starting with
2381 the same byte we have to use extra indirection. */
2384 collidx_table_init (&tablewc
);
2386 atwc
.weightpool
= &weightpool
;
2387 atwc
.extrapool
= &extrapool
;
2388 atwc
.indpool
= &indirectpool
;
2389 atwc
.collate
= collate
;
2390 atwc
.tablewc
= &tablewc
;
2392 wchead_table_iterate (&collate
->wcheads
, add_to_tablewc
);
2394 memset (&atwc
, 0, sizeof (atwc
));
2396 /* Now add the four tables. */
2397 add_locale_collidx_table (&file
, &tablewc
);
2398 add_locale_raw_obstack (&file
, &weightpool
);
2399 add_locale_raw_obstack (&file
, &extrapool
);
2400 add_locale_raw_obstack (&file
, &indirectpool
);
2402 /* Finally write the table with collation element names out. It is
2403 a hash table with a simple function which gets the name of the
2404 character as the input. One character might have many names. The
2405 value associated with the name is an index into the weight table
2406 where we are then interested in the first-level weight value.
2408 To determine how large the table should be we are counting the
2409 elements have to put in. Since we are using internal chaining
2410 using a secondary hash function we have to make the table a bit
2411 larger to avoid extremely long search times. We can achieve
2412 good results with a 40% larger table than there are entries. */
2414 runp
= collate
->start
;
2415 while (runp
!= NULL
)
2417 if (runp
->mbs
!= NULL
&& runp
->weights
!= NULL
&& !runp
->is_character
)
2418 /* Yep, the element really counts. */
2423 /* Add 50% and find the next prime number. */
2424 elem_size
= next_prime (elem_size
+ (elem_size
>> 1));
2426 /* Allocate the table. Each entry consists of two words: the hash
2427 value and an index in a secondary table which provides the index
2428 into the weight table and the string itself (so that a match can
2430 elem_table
= (uint32_t *) obstack_alloc (&extrapool
,
2431 elem_size
* 2 * sizeof (uint32_t));
2432 memset (elem_table
, '\0', elem_size
* 2 * sizeof (uint32_t));
2434 /* Now add the elements. */
2435 runp
= collate
->start
;
2436 while (runp
!= NULL
)
2438 if (runp
->mbs
!= NULL
&& runp
->weights
!= NULL
&& !runp
->is_character
)
2440 /* Compute the hash value of the name. */
2441 uint32_t namelen
= strlen (runp
->name
);
2442 uint32_t hash
= elem_hash (runp
->name
, namelen
);
2443 size_t idx
= hash
% elem_size
;
2445 size_t start_idx
= idx
;
2448 if (elem_table
[idx
* 2] != 0)
2450 /* The spot is already taken. Try iterating using the value
2451 from the secondary hashing function. */
2452 size_t iter
= hash
% (elem_size
- 2) + 1;
2457 if (idx
>= elem_size
)
2459 assert (idx
!= start_idx
);
2461 while (elem_table
[idx
* 2] != 0);
2463 /* This is the spot where we will insert the value. */
2464 elem_table
[idx
* 2] = hash
;
2465 elem_table
[idx
* 2 + 1] = obstack_object_size (&extrapool
);
2467 /* The string itself including length. */
2468 obstack_1grow (&extrapool
, namelen
);
2469 obstack_grow (&extrapool
, runp
->name
, namelen
);
2471 /* And the multibyte representation. */
2472 obstack_1grow (&extrapool
, runp
->nmbs
);
2473 obstack_grow (&extrapool
, runp
->mbs
, runp
->nmbs
);
2475 /* And align again to 32 bits. */
2476 if ((1 + namelen
+ 1 + runp
->nmbs
) % sizeof (int32_t) != 0)
2477 obstack_grow (&extrapool
, "\0\0",
2479 - ((1 + namelen
+ 1 + runp
->nmbs
)
2480 % sizeof (int32_t))));
2482 /* Now some 32-bit values: multibyte collation sequence,
2483 wide char string (including length), and wide char
2484 collation sequence. */
2485 obstack_int32_grow (&extrapool
, runp
->mbseqorder
);
2487 obstack_int32_grow (&extrapool
, runp
->nwcs
);
2488 obstack_grow (&extrapool
, runp
->wcs
,
2489 runp
->nwcs
* sizeof (uint32_t));
2490 maybe_swap_uint32_obstack (&extrapool
, runp
->nwcs
);
2492 obstack_int32_grow (&extrapool
, runp
->wcseqorder
);
2498 /* Prepare to write out this data. */
2499 add_locale_uint32 (&file
, elem_size
);
2500 add_locale_uint32_array (&file
, elem_table
, 2 * elem_size
);
2501 add_locale_raw_obstack (&file
, &extrapool
);
2502 add_locale_raw_data (&file
, collate
->mbseqorder
, 256);
2503 add_locale_collseq_table (&file
, &collate
->wcseqorder
);
2504 add_locale_string (&file
, charmap
->code_set_name
);
2505 write_locale_data (output_path
, LC_COLLATE
, "LC_COLLATE", &file
);
2507 obstack_free (&weightpool
, NULL
);
2508 obstack_free (&extrapool
, NULL
);
2509 obstack_free (&indirectpool
, NULL
);
2514 skip_to (struct linereader
*ldfile
, struct locale_collate_t
*collate
,
2515 const struct charmap_t
*charmap
, int to_endif
)
2519 struct token
*now
= lr_token (ldfile
, charmap
, NULL
, NULL
, 0);
2520 enum token_t nowtok
= now
->tok
;
2522 if (nowtok
== tok_eof
|| nowtok
== tok_end
)
2525 if (nowtok
== tok_ifdef
|| nowtok
== tok_ifndef
)
2527 lr_error (ldfile
, _("%s: nested conditionals not supported"),
2529 nowtok
= skip_to (ldfile
, collate
, charmap
, tok_endif
);
2530 if (nowtok
== tok_eof
|| nowtok
== tok_end
)
2533 else if (nowtok
== tok_endif
|| (!to_endif
&& nowtok
== tok_else
))
2535 lr_ignore_rest (ldfile
, 1);
2538 else if (!to_endif
&& (nowtok
== tok_elifdef
|| nowtok
== tok_elifndef
))
2540 /* Do not read the rest of the line. */
2543 else if (nowtok
== tok_else
)
2545 lr_error (ldfile
, _("%s: more than one 'else'"), "LC_COLLATE");
2548 lr_ignore_rest (ldfile
, 0);
2554 collate_read (struct linereader
*ldfile
, struct localedef_t
*result
,
2555 const struct charmap_t
*charmap
, const char *repertoire_name
,
2558 struct repertoire_t
*repertoire
= NULL
;
2559 struct locale_collate_t
*collate
;
2561 struct token
*arg
= NULL
;
2562 enum token_t nowtok
;
2563 enum token_t was_ellipsis
= tok_none
;
2564 struct localedef_t
*copy_locale
= NULL
;
2567 1 - between `order-start' and `order-end'
2568 2 - after `order-end'
2569 3 - after `reorder-after', waiting for `reorder-end'
2570 4 - after `reorder-end'
2571 5 - after `reorder-sections-after', waiting for `reorder-sections-end'
2572 6 - after `reorder-sections-end'
2576 /* Get the repertoire we have to use. */
2577 if (repertoire_name
!= NULL
)
2578 repertoire
= repertoire_read (repertoire_name
);
2580 /* The rest of the line containing `LC_COLLATE' must be free. */
2581 lr_ignore_rest (ldfile
, 1);
2587 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2590 while (nowtok
== tok_eol
);
2592 if (nowtok
!= tok_define
)
2596 lr_ignore_rest (ldfile
, 0);
2599 arg
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2600 if (arg
->tok
!= tok_ident
)
2601 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2604 /* Simply add the new symbol. */
2605 struct name_list
*newsym
= xmalloc (sizeof (*newsym
)
2606 + arg
->val
.str
.lenmb
+ 1);
2607 memcpy (newsym
->str
, arg
->val
.str
.startmb
, arg
->val
.str
.lenmb
);
2608 newsym
->str
[arg
->val
.str
.lenmb
] = '\0';
2609 newsym
->next
= defined
;
2612 lr_ignore_rest (ldfile
, 1);
2617 if (nowtok
== tok_copy
)
2619 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2620 if (now
->tok
!= tok_string
)
2622 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2626 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2627 while (now
->tok
!= tok_eof
&& now
->tok
!= tok_end
);
2629 if (now
->tok
!= tok_eof
2630 || (now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
),
2631 now
->tok
== tok_eof
))
2632 lr_error (ldfile
, _("%s: premature end of file"), "LC_COLLATE");
2633 else if (now
->tok
!= tok_lc_collate
)
2635 lr_error (ldfile
, _("\
2636 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
2637 lr_ignore_rest (ldfile
, 0);
2640 lr_ignore_rest (ldfile
, 1);
2645 if (! ignore_content
)
2647 /* Get the locale definition. */
2648 copy_locale
= load_locale (LC_COLLATE
, now
->val
.str
.startmb
,
2649 repertoire_name
, charmap
, NULL
);
2650 if ((copy_locale
->avail
& COLLATE_LOCALE
) == 0)
2652 /* Not yet loaded. So do it now. */
2653 if (locfile_read (copy_locale
, charmap
) != 0)
2657 if (copy_locale
->categories
[LC_COLLATE
].collate
== NULL
)
2661 lr_ignore_rest (ldfile
, 1);
2663 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2667 /* Prepare the data structures. */
2668 collate_startup (ldfile
, result
, copy_locale
, ignore_content
);
2669 collate
= result
->categories
[LC_COLLATE
].collate
;
2677 /* Of course we don't proceed beyond the end of file. */
2678 if (nowtok
== tok_eof
)
2681 /* Ingore empty lines. */
2682 if (nowtok
== tok_eol
)
2684 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2691 case tok_codepoint_collation
:
2692 collate
->codepoint_collation
= true;
2696 /* Allow copying other locales. */
2697 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2698 if (now
->tok
!= tok_string
)
2701 if (! ignore_content
)
2702 load_locale (LC_COLLATE
, now
->val
.str
.startmb
, repertoire_name
,
2705 lr_ignore_rest (ldfile
, 1);
2708 case tok_coll_weight_max
:
2709 /* Ignore the rest of the line if we don't need the input of
2713 lr_ignore_rest (ldfile
, 0);
2720 arg
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2721 if (arg
->tok
!= tok_number
)
2723 if (collate
->col_weight_max
!= -1)
2724 lr_error (ldfile
, _("%s: duplicate definition of `%s'"),
2725 "LC_COLLATE", "col_weight_max");
2727 collate
->col_weight_max
= arg
->val
.num
;
2728 lr_ignore_rest (ldfile
, 1);
2731 case tok_section_symbol
:
2732 /* Ignore the rest of the line if we don't need the input of
2736 lr_ignore_rest (ldfile
, 0);
2743 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2744 if (arg
->tok
!= tok_bsymbol
)
2746 else if (!ignore_content
)
2748 /* Check whether this section is already known. */
2749 struct section_list
*known
= collate
->sections
;
2750 while (known
!= NULL
)
2752 if (strcmp (known
->name
, arg
->val
.str
.startmb
) == 0)
2754 known
= known
->next
;
2760 _("%s: duplicate declaration of section `%s'"),
2761 "LC_COLLATE", arg
->val
.str
.startmb
);
2762 free (arg
->val
.str
.startmb
);
2765 collate
->sections
= make_seclist_elem (collate
,
2766 arg
->val
.str
.startmb
,
2769 lr_ignore_rest (ldfile
, known
== NULL
);
2773 free (arg
->val
.str
.startmb
);
2774 lr_ignore_rest (ldfile
, 0);
2778 case tok_collating_element
:
2779 /* Ignore the rest of the line if we don't need the input of
2783 lr_ignore_rest (ldfile
, 0);
2787 if (state
!= 0 && state
!= 2)
2790 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2791 if (arg
->tok
!= tok_bsymbol
)
2795 const char *symbol
= arg
->val
.str
.startmb
;
2796 size_t symbol_len
= arg
->val
.str
.lenmb
;
2798 /* Next the `from' keyword. */
2799 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2800 if (arg
->tok
!= tok_from
)
2802 free ((char *) symbol
);
2806 ldfile
->return_widestr
= 1;
2807 ldfile
->translate_strings
= 1;
2809 /* Finally the string with the replacement. */
2810 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2812 ldfile
->return_widestr
= 0;
2813 ldfile
->translate_strings
= 0;
2815 if (arg
->tok
!= tok_string
)
2818 if (!ignore_content
&& symbol
!= NULL
)
2820 /* The name is already defined. */
2821 if (check_duplicate (ldfile
, collate
, charmap
,
2822 repertoire
, symbol
, symbol_len
))
2825 if (arg
->val
.str
.startmb
!= NULL
)
2826 insert_entry (&collate
->elem_table
, symbol
, symbol_len
,
2827 new_element (collate
,
2828 arg
->val
.str
.startmb
,
2829 arg
->val
.str
.lenmb
- 1,
2830 arg
->val
.str
.startwc
,
2831 symbol
, symbol_len
, 0));
2836 free ((char *) symbol
);
2837 free (arg
->val
.str
.startmb
);
2838 free (arg
->val
.str
.startwc
);
2840 lr_ignore_rest (ldfile
, 1);
2844 case tok_collating_symbol
:
2845 /* Ignore the rest of the line if we don't need the input of
2849 lr_ignore_rest (ldfile
, 0);
2853 if (state
!= 0 && state
!= 2)
2856 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2857 if (arg
->tok
!= tok_bsymbol
)
2861 char *symbol
= arg
->val
.str
.startmb
;
2862 size_t symbol_len
= arg
->val
.str
.lenmb
;
2863 char *endsymbol
= NULL
;
2864 size_t endsymbol_len
= 0;
2865 enum token_t ellipsis
= tok_none
;
2867 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2868 if (arg
->tok
== tok_ellipsis2
|| arg
->tok
== tok_ellipsis4
)
2870 ellipsis
= arg
->tok
;
2872 arg
= lr_token (ldfile
, charmap
, result
, repertoire
,
2874 if (arg
->tok
!= tok_bsymbol
)
2880 endsymbol
= arg
->val
.str
.startmb
;
2881 endsymbol_len
= arg
->val
.str
.lenmb
;
2883 lr_ignore_rest (ldfile
, 1);
2885 else if (arg
->tok
!= tok_eol
)
2891 if (!ignore_content
)
2894 || (ellipsis
!= tok_none
&& endsymbol
== NULL
))
2896 lr_error (ldfile
, _("\
2897 %s: unknown character in collating symbol name"),
2901 else if (ellipsis
== tok_none
)
2903 /* A single symbol, no ellipsis. */
2904 if (check_duplicate (ldfile
, collate
, charmap
,
2905 repertoire
, symbol
, symbol_len
))
2906 /* The name is already defined. */
2909 insert_entry (&collate
->sym_table
, symbol
, symbol_len
,
2910 new_symbol (collate
, symbol
, symbol_len
));
2912 else if (symbol_len
!= endsymbol_len
)
2916 _("invalid names for character range"));
2921 /* Oh my, we have to handle an ellipsis. First, as
2922 usual, determine the common prefix and then
2923 convert the rest into a range. */
2925 unsigned long int from
;
2926 unsigned long int to
;
2929 for (prefixlen
= 0; prefixlen
< symbol_len
; ++prefixlen
)
2930 if (symbol
[prefixlen
] != endsymbol
[prefixlen
])
2933 /* Convert the rest into numbers. */
2934 symbol
[symbol_len
] = '\0';
2935 from
= strtoul (&symbol
[prefixlen
], &endp
,
2936 ellipsis
== tok_ellipsis2
? 16 : 10);
2938 goto col_sym_inv_range
;
2940 endsymbol
[symbol_len
] = '\0';
2941 to
= strtoul (&endsymbol
[prefixlen
], &endp
,
2942 ellipsis
== tok_ellipsis2
? 16 : 10);
2944 goto col_sym_inv_range
;
2947 goto col_sym_inv_range
;
2949 /* Now loop over all entries. */
2954 symbuf
= (char *) obstack_alloc (&collate
->mempool
,
2957 /* Create the name. */
2959 ellipsis
== tok_ellipsis2
2960 ? "%.*s%.*lX" : "%.*s%.*lu",
2961 (int) prefixlen
, symbol
,
2962 (int) (symbol_len
- prefixlen
), from
);
2964 if (check_duplicate (ldfile
, collate
, charmap
,
2965 repertoire
, symbuf
, symbol_len
))
2966 /* The name is already defined. */
2969 insert_entry (&collate
->sym_table
, symbuf
,
2971 new_symbol (collate
, symbuf
,
2974 /* Increment the counter. */
2990 case tok_symbol_equivalence
:
2991 /* Ignore the rest of the line if we don't need the input of
2995 lr_ignore_rest (ldfile
, 0);
3002 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
3003 if (arg
->tok
!= tok_bsymbol
)
3007 const char *newname
= arg
->val
.str
.startmb
;
3008 size_t newname_len
= arg
->val
.str
.lenmb
;
3009 const char *symname
;
3011 void *symval
; /* Actually struct symbol_t* */
3013 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
3014 if (arg
->tok
!= tok_bsymbol
)
3016 free ((char *) newname
);
3020 symname
= arg
->val
.str
.startmb
;
3021 symname_len
= arg
->val
.str
.lenmb
;
3023 if (newname
== NULL
)
3025 lr_error (ldfile
, _("\
3026 %s: unknown character in equivalent definition name"),
3030 free ((char *) newname
);
3031 free ((char *) symname
);
3034 if (symname
== NULL
)
3036 lr_error (ldfile
, _("\
3037 %s: unknown character in equivalent definition value"),
3039 goto sym_equiv_free
;
3042 /* See whether the symbol name is already defined. */
3043 if (find_entry (&collate
->sym_table
, symname
, symname_len
,
3046 lr_error (ldfile
, _("\
3047 %s: unknown symbol `%s' in equivalent definition"),
3048 "LC_COLLATE", symname
);
3049 goto sym_equiv_free
;
3052 if (insert_entry (&collate
->sym_table
,
3053 newname
, newname_len
, symval
) < 0)
3055 lr_error (ldfile
, _("\
3056 error while adding equivalent collating symbol"));
3057 goto sym_equiv_free
;
3060 free ((char *) symname
);
3062 lr_ignore_rest (ldfile
, 1);
3066 /* Ignore the rest of the line if we don't need the input of
3070 lr_ignore_rest (ldfile
, 0);
3074 /* We get told about the scripts we know. */
3075 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
3076 if (arg
->tok
!= tok_bsymbol
)
3080 struct section_list
*runp
= collate
->known_sections
;
3083 while (runp
!= NULL
)
3084 if (strncmp (runp
->name
, arg
->val
.str
.startmb
,
3085 arg
->val
.str
.lenmb
) == 0
3086 && runp
->name
[arg
->val
.str
.lenmb
] == '\0')
3089 runp
= runp
->def_next
;
3093 lr_error (ldfile
, _("duplicate definition of script `%s'"),
3095 lr_ignore_rest (ldfile
, 0);
3099 runp
= (struct section_list
*) xcalloc (1, sizeof (*runp
));
3100 name
= (char *) xmalloc (arg
->val
.str
.lenmb
+ 1);
3101 memcpy (name
, arg
->val
.str
.startmb
, arg
->val
.str
.lenmb
);
3102 name
[arg
->val
.str
.lenmb
] = '\0';
3105 runp
->def_next
= collate
->known_sections
;
3106 collate
->known_sections
= runp
;
3108 lr_ignore_rest (ldfile
, 1);
3111 case tok_order_start
:
3112 /* Ignore the rest of the line if we don't need the input of
3116 lr_ignore_rest (ldfile
, 0);
3120 if (state
!= 0 && state
!= 1 && state
!= 2)
3124 /* The 14652 draft does not specify whether all `order_start' lines
3125 must contain the same number of sort-rules, but 14651 does. So
3126 we require this here as well. */
3127 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
3128 if (arg
->tok
== tok_bsymbol
)
3130 /* This better should be a section name. */
3131 struct section_list
*sp
= collate
->known_sections
;
3133 && (sp
->name
== NULL
3134 || strncmp (sp
->name
, arg
->val
.str
.startmb
,
3135 arg
->val
.str
.lenmb
) != 0
3136 || sp
->name
[arg
->val
.str
.lenmb
] != '\0'))
3141 lr_error (ldfile
, _("\
3142 %s: unknown section name `%.*s'"),
3143 "LC_COLLATE", (int) arg
->val
.str
.lenmb
,
3144 arg
->val
.str
.startmb
);
3145 /* We use the error section. */
3146 collate
->current_section
= &collate
->error_section
;
3148 if (collate
->error_section
.first
== NULL
)
3150 /* Insert &collate->error_section at the end of
3151 the collate->sections list. */
3152 if (collate
->sections
== NULL
)
3153 collate
->sections
= &collate
->error_section
;
3156 sp
= collate
->sections
;
3157 while (sp
->next
!= NULL
)
3160 sp
->next
= &collate
->error_section
;
3162 collate
->error_section
.next
= NULL
;
3167 /* One should not be allowed to open the same
3169 if (sp
->first
!= NULL
)
3170 lr_error (ldfile
, _("\
3171 %s: multiple order definitions for section `%s'"),
3172 "LC_COLLATE", sp
->name
);
3175 /* Insert sp in the collate->sections list,
3176 right after collate->current_section. */
3177 if (collate
->current_section
!= NULL
)
3179 sp
->next
= collate
->current_section
->next
;
3180 collate
->current_section
->next
= sp
;
3182 else if (collate
->sections
== NULL
)
3183 /* This is the first section to be defined. */
3184 collate
->sections
= sp
;
3186 collate
->current_section
= sp
;
3189 /* Next should come the end of the line or a semicolon. */
3190 arg
= lr_token (ldfile
, charmap
, result
, repertoire
,
3192 if (arg
->tok
== tok_eol
)
3196 /* This means we have exactly one rule: `forward'. */
3198 lr_error (ldfile
, _("\
3199 %s: invalid number of sorting rules"),
3203 sp
->rules
= obstack_alloc (&collate
->mempool
,
3204 (sizeof (enum coll_sort_rule
)
3206 for (cnt
= 0; cnt
< nrules
; ++cnt
)
3207 sp
->rules
[cnt
] = sort_forward
;
3213 /* Get the next token. */
3214 arg
= lr_token (ldfile
, charmap
, result
, repertoire
,
3220 /* There is no section symbol. Therefore we use the unnamed
3222 collate
->current_section
= &collate
->unnamed_section
;
3224 if (collate
->unnamed_section_defined
)
3225 lr_error (ldfile
, _("\
3226 %s: multiple order definitions for unnamed section"),
3230 /* Insert &collate->unnamed_section at the beginning of
3231 the collate->sections list. */
3232 collate
->unnamed_section
.next
= collate
->sections
;
3233 collate
->sections
= &collate
->unnamed_section
;
3234 collate
->unnamed_section_defined
= true;
3238 /* Now read the direction names. */
3239 read_directions (ldfile
, arg
, charmap
, repertoire
, result
);
3241 /* From now we need the strings untranslated. */
3242 ldfile
->translate_strings
= 0;
3246 /* Ignore the rest of the line if we don't need the input of
3250 lr_ignore_rest (ldfile
, 0);
3257 /* Handle ellipsis at end of list. */
3258 if (was_ellipsis
!= tok_none
)
3260 handle_ellipsis (ldfile
, NULL
, 0, was_ellipsis
, charmap
,
3261 repertoire
, result
);
3262 was_ellipsis
= tok_none
;
3266 lr_ignore_rest (ldfile
, 1);
3269 case tok_reorder_after
:
3270 /* Ignore the rest of the line if we don't need the input of
3274 lr_ignore_rest (ldfile
, 0);
3280 lr_error (ldfile
, _("%s: missing `order_end' keyword"),
3284 /* Handle ellipsis at end of list. */
3285 if (was_ellipsis
!= tok_none
)
3287 handle_ellipsis (ldfile
, arg
->val
.str
.startmb
,
3288 arg
->val
.str
.lenmb
, was_ellipsis
, charmap
,
3289 repertoire
, result
);
3290 was_ellipsis
= tok_none
;
3293 else if (state
== 0 && copy_locale
== NULL
)
3295 else if (state
!= 0 && state
!= 2 && state
!= 3)
3299 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
3300 if (arg
->tok
== tok_bsymbol
|| arg
->tok
== tok_ucs4
)
3302 /* Find this symbol in the sequence table. */
3306 struct element_t
*insp
;
3310 if (arg
->tok
== tok_bsymbol
)
3312 startmb
= arg
->val
.str
.startmb
;
3313 lenmb
= arg
->val
.str
.lenmb
;
3317 sprintf (ucsbuf
, "U%08X", arg
->val
.ucs4
);
3322 if (find_entry (&collate
->seq_table
, startmb
, lenmb
, &ptr
) == 0)
3323 /* Yes, the symbol exists. Simply point the cursor
3325 collate
->cursor
= (struct element_t
*) ptr
;
3328 struct symbol_t
*symbp
;
3331 if (find_entry (&collate
->sym_table
, startmb
, lenmb
,
3336 if (symbp
->order
->last
!= NULL
3337 || symbp
->order
->next
!= NULL
)
3338 collate
->cursor
= symbp
->order
;
3341 /* This is a collating symbol but its position
3342 is not yet defined. */
3343 lr_error (ldfile
, _("\
3344 %s: order for collating symbol %.*s not yet defined"),
3345 "LC_COLLATE", (int) lenmb
, startmb
);
3346 collate
->cursor
= NULL
;
3350 else if (find_entry (&collate
->elem_table
, startmb
, lenmb
,
3353 insp
= (struct element_t
*) ptr
;
3355 if (insp
->last
!= NULL
|| insp
->next
!= NULL
)
3356 collate
->cursor
= insp
;
3359 /* This is a collating element but its position
3360 is not yet defined. */
3361 lr_error (ldfile
, _("\
3362 %s: order for collating element %.*s not yet defined"),
3363 "LC_COLLATE", (int) lenmb
, startmb
);
3364 collate
->cursor
= NULL
;
3370 /* This is bad. The symbol after which we have to
3371 insert does not exist. */
3372 lr_error (ldfile
, _("\
3373 %s: cannot reorder after %.*s: symbol not known"),
3374 "LC_COLLATE", (int) lenmb
, startmb
);
3375 collate
->cursor
= NULL
;
3380 lr_ignore_rest (ldfile
, no_error
);
3383 /* This must not happen. */
3387 case tok_reorder_end
:
3388 /* Ignore the rest of the line if we don't need the input of
3396 lr_ignore_rest (ldfile
, 1);
3399 case tok_reorder_sections_after
:
3400 /* Ignore the rest of the line if we don't need the input of
3404 lr_ignore_rest (ldfile
, 0);
3410 lr_error (ldfile
, _("%s: missing `order_end' keyword"),
3414 /* Handle ellipsis at end of list. */
3415 if (was_ellipsis
!= tok_none
)
3417 handle_ellipsis (ldfile
, NULL
, 0, was_ellipsis
, charmap
,
3418 repertoire
, result
);
3419 was_ellipsis
= tok_none
;
3422 else if (state
== 3)
3424 record_error (0, 0, _("\
3425 %s: missing `reorder-end' keyword"), "LC_COLLATE");
3428 else if (state
!= 2 && state
!= 4)
3432 /* Get the name of the sections we are adding after. */
3433 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
3434 if (arg
->tok
== tok_bsymbol
)
3436 /* Now find a section with this name. */
3437 struct section_list
*runp
= collate
->sections
;
3439 while (runp
!= NULL
)
3441 if (runp
->name
!= NULL
3442 && strlen (runp
->name
) == arg
->val
.str
.lenmb
3443 && memcmp (runp
->name
, arg
->val
.str
.startmb
,
3444 arg
->val
.str
.lenmb
) == 0)
3451 collate
->current_section
= runp
;
3454 /* This is bad. The section after which we have to
3455 reorder does not exist. Therefore we cannot
3456 process the whole rest of this reorder
3458 lr_error (ldfile
, _("%s: section `%.*s' not known"),
3459 "LC_COLLATE", (int) arg
->val
.str
.lenmb
,
3460 arg
->val
.str
.startmb
);
3464 lr_ignore_rest (ldfile
, 0);
3466 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
3468 while (now
->tok
== tok_reorder_sections_after
3469 || now
->tok
== tok_reorder_sections_end
3470 || now
->tok
== tok_end
);
3472 /* Process the token we just saw. */
3478 /* This must not happen. */
3482 case tok_reorder_sections_end
:
3483 /* Ignore the rest of the line if we don't need the input of
3491 lr_ignore_rest (ldfile
, 1);
3496 /* Ignore the rest of the line if we don't need the input of
3500 lr_ignore_rest (ldfile
, 0);
3504 if (state
!= 0 && state
!= 1 && state
!= 3 && state
!= 5)
3507 if ((state
== 0 || state
== 5) && nowtok
== tok_ucs4
)
3510 if (nowtok
== tok_ucs4
)
3512 snprintf (ucs4buf
, sizeof (ucs4buf
), "U%08X", now
->val
.ucs4
);
3516 else if (arg
!= NULL
)
3518 symstr
= arg
->val
.str
.startmb
;
3519 symlen
= arg
->val
.str
.lenmb
;
3523 lr_error (ldfile
, _("%s: bad symbol <%.*s>"), "LC_COLLATE",
3524 (int) ldfile
->token
.val
.str
.lenmb
,
3525 ldfile
->token
.val
.str
.startmb
);
3529 struct element_t
*seqp
;
3532 /* We are outside an `order_start' region. This means
3533 we must only accept definitions of values for
3534 collation symbols since these are purely abstract
3535 values and don't need directions associated. */
3538 if (find_entry (&collate
->seq_table
, symstr
, symlen
, &ptr
) == 0)
3542 /* It's already defined. First check whether this
3543 is really a collating symbol. */
3544 if (seqp
->is_character
)
3553 if (find_entry (&collate
->sym_table
, symstr
, symlen
,
3555 /* No collating symbol, it's an error. */
3558 /* Maybe this is the first time we define a symbol
3559 value and it is before the first actual section. */
3560 if (collate
->sections
== NULL
)
3561 collate
->sections
= collate
->current_section
=
3562 &collate
->symbol_section
;
3565 if (was_ellipsis
!= tok_none
)
3567 handle_ellipsis (ldfile
, symstr
, symlen
, was_ellipsis
,
3568 charmap
, repertoire
, result
);
3570 /* Remember that we processed the ellipsis. */
3571 was_ellipsis
= tok_none
;
3573 /* And don't add the value a second time. */
3577 else if (state
== 3)
3579 /* It is possible that we already have this collation sequence.
3580 In this case we move the entry. */
3584 /* If the symbol after which we have to insert was not found
3585 ignore all entries. */
3586 if (collate
->cursor
== NULL
)
3588 lr_ignore_rest (ldfile
, 0);
3592 if (find_entry (&collate
->seq_table
, symstr
, symlen
, &ptr
) == 0)
3594 seqp
= (struct element_t
*) ptr
;
3598 if (find_entry (&collate
->sym_table
, symstr
, symlen
, &sym
) == 0
3599 && (seqp
= ((struct symbol_t
*) sym
)->order
) != NULL
)
3602 if (find_entry (&collate
->elem_table
, symstr
, symlen
, &ptr
) == 0
3603 && (seqp
= (struct element_t
*) ptr
,
3604 seqp
->last
!= NULL
|| seqp
->next
!= NULL
3605 || (collate
->start
!= NULL
&& seqp
== collate
->start
)))
3608 /* Remove the entry from the old position. */
3609 if (seqp
->last
== NULL
)
3610 collate
->start
= seqp
->next
;
3612 seqp
->last
->next
= seqp
->next
;
3613 if (seqp
->next
!= NULL
)
3614 seqp
->next
->last
= seqp
->last
;
3616 /* We also have to check whether this entry is the
3617 first or last of a section. */
3618 if (seqp
->section
->first
== seqp
)
3620 if (seqp
->section
->first
== seqp
->section
->last
)
3621 /* This section has no content anymore. */
3622 seqp
->section
->first
= seqp
->section
->last
= NULL
;
3624 seqp
->section
->first
= seqp
->next
;
3626 else if (seqp
->section
->last
== seqp
)
3627 seqp
->section
->last
= seqp
->last
;
3629 /* Now insert it in the new place. */
3630 insert_weights (ldfile
, seqp
, charmap
, repertoire
, result
,
3635 /* Otherwise we just add a new entry. */
3637 else if (state
== 5)
3639 /* We are reordering sections. Find the named section. */
3640 struct section_list
*runp
= collate
->sections
;
3641 struct section_list
*prevp
= NULL
;
3643 while (runp
!= NULL
)
3645 if (runp
->name
!= NULL
3646 && strlen (runp
->name
) == symlen
3647 && memcmp (runp
->name
, symstr
, symlen
) == 0)
3656 lr_error (ldfile
, _("%s: section `%.*s' not known"),
3657 "LC_COLLATE", (int) symlen
, symstr
);
3658 lr_ignore_rest (ldfile
, 0);
3662 if (runp
!= collate
->current_section
)
3664 /* Remove the named section from the old place and
3665 insert it in the new one. */
3666 prevp
->next
= runp
->next
;
3668 runp
->next
= collate
->current_section
->next
;
3669 collate
->current_section
->next
= runp
;
3670 collate
->current_section
= runp
;
3673 /* Process the rest of the line which might change
3674 the collation rules. */
3675 arg
= lr_token (ldfile
, charmap
, result
, repertoire
,
3677 if (arg
->tok
!= tok_eof
&& arg
->tok
!= tok_eol
)
3678 read_directions (ldfile
, arg
, charmap
, repertoire
,
3683 else if (was_ellipsis
!= tok_none
)
3685 /* Using the information in the `ellipsis_weight'
3686 element and this and the last value we have to handle
3687 the ellipsis now. */
3688 assert (state
== 1);
3690 handle_ellipsis (ldfile
, symstr
, symlen
, was_ellipsis
, charmap
,
3691 repertoire
, result
);
3693 /* Remember that we processed the ellipsis. */
3694 was_ellipsis
= tok_none
;
3696 /* And don't add the value a second time. */
3700 /* Now insert in the new place. */
3701 insert_value (ldfile
, symstr
, symlen
, charmap
, repertoire
, result
);
3705 /* Ignore the rest of the line if we don't need the input of
3709 lr_ignore_rest (ldfile
, 0);
3716 if (was_ellipsis
!= tok_none
)
3719 _("%s: cannot have `%s' as end of ellipsis range"),
3720 "LC_COLLATE", "UNDEFINED");
3722 unlink_element (collate
);
3723 was_ellipsis
= tok_none
;
3726 /* See whether UNDEFINED already appeared somewhere. */
3727 if (collate
->undefined
.next
!= NULL
3728 || &collate
->undefined
== collate
->cursor
)
3731 _("%s: order for `%.*s' already defined at %s:%zu"),
3732 "LC_COLLATE", 9, "UNDEFINED",
3733 collate
->undefined
.file
,
3734 collate
->undefined
.line
);
3735 lr_ignore_rest (ldfile
, 0);
3738 /* Parse the weights. */
3739 insert_weights (ldfile
, &collate
->undefined
, charmap
,
3740 repertoire
, result
, tok_none
);
3743 case tok_ellipsis2
: /* symbolic hexadecimal ellipsis */
3744 case tok_ellipsis3
: /* absolute ellipsis */
3745 case tok_ellipsis4
: /* symbolic decimal ellipsis */
3746 /* This is the symbolic (decimal or hexadecimal) or absolute
3748 if (was_ellipsis
!= tok_none
)
3751 if (state
!= 0 && state
!= 1 && state
!= 3)
3754 was_ellipsis
= nowtok
;
3756 insert_weights (ldfile
, &collate
->ellipsis_weight
, charmap
,
3757 repertoire
, result
, nowtok
);
3762 /* Next we assume `LC_COLLATE'. */
3763 if (!ignore_content
)
3766 && copy_locale
== NULL
3767 && !collate
->codepoint_collation
)
3768 /* We must either see a copy statement or have
3769 ordering values, or codepoint_collation. */
3771 _("%s: empty category description not allowed"),
3773 else if (state
== 1)
3775 lr_error (ldfile
, _("%s: missing `order_end' keyword"),
3778 /* Handle ellipsis at end of list. */
3779 if (was_ellipsis
!= tok_none
)
3781 handle_ellipsis (ldfile
, NULL
, 0, was_ellipsis
, charmap
,
3782 repertoire
, result
);
3783 was_ellipsis
= tok_none
;
3786 else if (state
== 3)
3787 record_error (0, 0, _("\
3788 %s: missing `reorder-end' keyword"), "LC_COLLATE");
3789 else if (state
== 5)
3790 record_error (0, 0, _("\
3791 %s: missing `reorder-sections-end' keyword"), "LC_COLLATE");
3793 arg
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
3794 if (arg
->tok
== tok_eof
)
3796 if (arg
->tok
== tok_eol
)
3797 lr_error (ldfile
, _("%s: incomplete `END' line"), "LC_COLLATE");
3798 else if (arg
->tok
!= tok_lc_collate
)
3799 lr_error (ldfile
, _("\
3800 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
3801 lr_ignore_rest (ldfile
, arg
->tok
== tok_lc_collate
);
3807 lr_ignore_rest (ldfile
, 0);
3811 arg
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
3812 if (arg
->tok
!= tok_ident
)
3815 /* Simply add the new symbol. */
3816 struct name_list
*newsym
= xmalloc (sizeof (*newsym
)
3817 + arg
->val
.str
.lenmb
+ 1);
3818 memcpy (newsym
->str
, arg
->val
.str
.startmb
, arg
->val
.str
.lenmb
);
3819 newsym
->str
[arg
->val
.str
.lenmb
] = '\0';
3820 newsym
->next
= defined
;
3823 lr_ignore_rest (ldfile
, 1);
3829 lr_ignore_rest (ldfile
, 0);
3833 arg
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
3834 if (arg
->tok
!= tok_ident
)
3837 /* Remove _all_ occurrences of the symbol from the list. */
3838 struct name_list
*prevdef
= NULL
;
3839 struct name_list
*curdef
= defined
;
3840 while (curdef
!= NULL
)
3841 if (strncmp (arg
->val
.str
.startmb
, curdef
->str
,
3842 arg
->val
.str
.lenmb
) == 0
3843 && curdef
->str
[arg
->val
.str
.lenmb
] == '\0')
3845 if (prevdef
== NULL
)
3846 defined
= curdef
->next
;
3848 prevdef
->next
= curdef
->next
;
3850 struct name_list
*olddef
= curdef
;
3851 curdef
= curdef
->next
;
3858 curdef
= curdef
->next
;
3861 lr_ignore_rest (ldfile
, 1);
3868 lr_ignore_rest (ldfile
, 0);
3873 arg
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
3874 if (arg
->tok
!= tok_ident
)
3876 lr_ignore_rest (ldfile
, 1);
3878 if (collate
->else_action
== else_none
)
3881 while (curdef
!= NULL
)
3882 if (strncmp (arg
->val
.str
.startmb
, curdef
->str
,
3883 arg
->val
.str
.lenmb
) == 0
3884 && curdef
->str
[arg
->val
.str
.lenmb
] == '\0')
3887 curdef
= curdef
->next
;
3889 if ((nowtok
== tok_ifdef
&& curdef
!= NULL
)
3890 || (nowtok
== tok_ifndef
&& curdef
== NULL
))
3892 /* We have to use the if-branch. */
3893 collate
->else_action
= else_ignore
;
3897 /* We have to use the else-branch, if there is one. */
3898 nowtok
= skip_to (ldfile
, collate
, charmap
, 0);
3899 if (nowtok
== tok_else
)
3900 collate
->else_action
= else_seen
;
3901 else if (nowtok
== tok_elifdef
)
3906 else if (nowtok
== tok_elifndef
)
3908 nowtok
= tok_ifndef
;
3911 else if (nowtok
== tok_eof
)
3913 else if (nowtok
== tok_end
)
3919 /* XXX Should it really become necessary to support nested
3920 preprocessor handling we will push the state here. */
3921 lr_error (ldfile
, _("%s: nested conditionals not supported"),
3923 nowtok
= skip_to (ldfile
, collate
, charmap
, 1);
3924 if (nowtok
== tok_eof
)
3926 else if (nowtok
== tok_end
)
3936 lr_ignore_rest (ldfile
, 0);
3940 lr_ignore_rest (ldfile
, 1);
3942 if (collate
->else_action
== else_ignore
)
3944 /* Ignore everything until the endif. */
3945 nowtok
= skip_to (ldfile
, collate
, charmap
, 1);
3946 if (nowtok
== tok_eof
)
3948 else if (nowtok
== tok_end
)
3953 assert (collate
->else_action
== else_none
);
3954 lr_error (ldfile
, _("\
3955 %s: '%s' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE",
3956 nowtok
== tok_else
? "else"
3957 : nowtok
== tok_elifdef
? "elifdef" : "elifndef");
3964 lr_ignore_rest (ldfile
, 0);
3968 lr_ignore_rest (ldfile
, 1);
3970 if (collate
->else_action
!= else_ignore
3971 && collate
->else_action
!= else_seen
)
3972 lr_error (ldfile
, _("\
3973 %s: 'endif' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE");
3975 /* XXX If we support nested preprocessor directives we pop
3977 collate
->else_action
= else_none
;
3982 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
3985 /* Prepare for the next round. */
3986 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
3991 /* When we come here we reached the end of the file. */
3992 lr_error (ldfile
, _("%s: premature end of file"), "LC_COLLATE");