1 /* Copyright (C) 1995-2019 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, see <https://www.gnu.org/licenses/>. */
26 #include <sys/param.h>
28 #include "localedef.h"
30 #include "localeinfo.h"
31 #include "linereader.h"
33 #include "elem-hash.h"
35 /* Uncomment the following line in the production version. */
36 /* #define NDEBUG 1 */
39 #define obstack_chunk_alloc malloc
40 #define obstack_chunk_free free
43 __attribute ((always_inline
))
44 obstack_int32_grow (struct obstack
*obstack
, int32_t data
)
46 assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack
)));
47 data
= maybe_swap_uint32 (data
);
48 if (sizeof (int32_t) == sizeof (int))
49 obstack_int_grow (obstack
, data
);
51 obstack_grow (obstack
, &data
, sizeof (int32_t));
55 __attribute ((always_inline
))
56 obstack_int32_grow_fast (struct obstack
*obstack
, int32_t data
)
58 assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack
)));
59 data
= maybe_swap_uint32 (data
);
60 if (sizeof (int32_t) == sizeof (int))
61 obstack_int_grow_fast (obstack
, data
);
63 obstack_grow (obstack
, &data
, sizeof (int32_t));
66 /* Forward declaration. */
69 /* Data type for list of strings. */
72 /* Successor in the known_sections list. */
73 struct section_list
*def_next
;
74 /* Successor in the sections list. */
75 struct section_list
*next
;
76 /* Name of the section. */
78 /* First element of this section. */
79 struct element_t
*first
;
80 /* Last element of this section. */
81 struct element_t
*last
;
82 /* These are the rules for this section. */
83 enum coll_sort_rule
*rules
;
84 /* Index of the rule set in the appropriate section of the output file. */
92 /* Number of elements. */
98 /* Data type for collating element. */
110 /* The following is a bit mask which bits are set if this element is
111 used in the appropriate level. Interesting for the singlebyte
114 XXX The type here restricts the number of levels to 32. It could
115 be changed if necessary but I doubt this is necessary. */
116 unsigned int used_in_level
;
118 struct element_list_t
*weights
;
120 /* Nonzero if this is a real character definition. */
123 /* Order of the character in the sequence. This information will
124 be used in range expressions. */
128 /* Where does the definition come from. */
132 /* Which section does this belong to. */
133 struct section_list
*section
;
135 /* Predecessor and successor in the order list. */
136 struct element_t
*last
;
137 struct element_t
*next
;
139 /* Next element in multibyte output list. */
140 struct element_t
*mbnext
;
141 struct element_t
*mblast
;
143 /* Next element in wide character output list. */
144 struct element_t
*wcnext
;
145 struct element_t
*wclast
;
148 /* Special element value. */
149 #define ELEMENT_ELLIPSIS2 ((struct element_t *) 1)
150 #define ELEMENT_ELLIPSIS3 ((struct element_t *) 2)
151 #define ELEMENT_ELLIPSIS4 ((struct element_t *) 3)
153 /* Data type for collating symbol. */
158 /* Point to place in the order list. */
159 struct element_t
*order
;
161 /* Where does the definition come from. */
166 /* Sparse table of struct element_t *. */
167 #define TABLE wchead_table
168 #define ELEMENT struct element_t *
171 #define NO_ADD_LOCALE
174 /* Sparse table of int32_t. */
175 #define TABLE collidx_table
176 #define ELEMENT int32_t
180 /* Sparse table of uint32_t. */
181 #define TABLE collseq_table
182 #define ELEMENT uint32_t
183 #define DEFAULT ~((uint32_t) 0)
187 /* Simple name list for the preprocessor. */
190 struct name_list
*next
;
195 /* The real definition of the struct for the LC_COLLATE locale. */
196 struct locale_collate_t
201 /* List of known scripts. */
202 struct section_list
*known_sections
;
203 /* List of used sections. */
204 struct section_list
*sections
;
205 /* Current section using definition. */
206 struct section_list
*current_section
;
207 /* There always can be an unnamed section. */
208 struct section_list unnamed_section
;
209 /* Flag whether the unnamed section has been defined. */
210 bool unnamed_section_defined
;
211 /* To make handling of errors easier we have another section. */
212 struct section_list error_section
;
213 /* Sometimes we are defining the values for collating symbols before
214 the first actual section. */
215 struct section_list symbol_section
;
217 /* Start of the order list. */
218 struct element_t
*start
;
220 /* The undefined element. */
221 struct element_t undefined
;
223 /* This is the cursor for `reorder_after' insertions. */
224 struct element_t
*cursor
;
226 /* This value is used when handling ellipsis. */
227 struct element_t ellipsis_weight
;
229 /* Known collating elements. */
230 hash_table elem_table
;
232 /* Known collating symbols. */
233 hash_table sym_table
;
235 /* Known collation sequences. */
236 hash_table seq_table
;
238 struct obstack mempool
;
240 /* The LC_COLLATE category is a bit special as it is sometimes possible
241 that the definitions from more than one input file contains information.
242 Therefore we keep all relevant input in a list. */
243 struct locale_collate_t
*next
;
245 /* Arrays with heads of the list for each of the leading bytes in
246 the multibyte sequences. */
247 struct element_t
*mbheads
[256];
249 /* Arrays with heads of the list for each of the leading bytes in
250 the multibyte sequences. */
251 struct wchead_table wcheads
;
253 /* The arrays with the collation sequence order. */
254 unsigned char mbseqorder
[256];
255 struct collseq_table wcseqorder
;
257 /* State of the preprocessor. */
268 /* We have a few global variables which are used for reading all
269 LC_COLLATE category descriptions in all files. */
270 static uint32_t nrules
;
272 /* List of defined preprocessor symbols. */
273 static struct name_list
*defined
;
276 /* We need UTF-8 encoding of numbers. */
278 __attribute ((always_inline
))
279 utf8_encode (char *buf
, int val
)
292 for (step
= 2; step
< 6; ++step
)
293 if ((val
& (~(uint32_t)0 << (5 * step
+ 1))) == 0)
297 *buf
= (unsigned char) (~0xff >> step
);
301 buf
[step
] = 0x80 | (val
& 0x3f);
312 static struct section_list
*
313 make_seclist_elem (struct locale_collate_t
*collate
, const char *string
,
314 struct section_list
*next
)
316 struct section_list
*newp
;
318 newp
= (struct section_list
*) obstack_alloc (&collate
->mempool
,
329 static struct element_t
*
330 new_element (struct locale_collate_t
*collate
, const char *mbs
, size_t mbslen
,
331 const uint32_t *wcs
, const char *name
, size_t namelen
,
334 struct element_t
*newp
;
336 newp
= (struct element_t
*) obstack_alloc (&collate
->mempool
,
338 newp
->name
= name
== NULL
? NULL
: obstack_copy0 (&collate
->mempool
,
342 newp
->mbs
= obstack_copy0 (&collate
->mempool
, mbs
, mbslen
);
352 size_t nwcs
= wcslen ((wchar_t *) wcs
);
354 /* Handle <U0000> as a single character. */
357 obstack_grow (&collate
->mempool
, wcs
, nwcs
* sizeof (uint32_t));
358 obstack_grow (&collate
->mempool
, &zero
, sizeof (uint32_t));
359 newp
->wcs
= (uint32_t *) obstack_finish (&collate
->mempool
);
367 newp
->mborder
= NULL
;
369 newp
->used_in_level
= 0;
370 newp
->is_character
= is_character
;
372 /* Will be assigned later. XXX */
373 newp
->mbseqorder
= 0;
374 newp
->wcseqorder
= 0;
376 /* Will be allocated later. */
377 newp
->weights
= NULL
;
382 newp
->section
= collate
->current_section
;
397 static struct symbol_t
*
398 new_symbol (struct locale_collate_t
*collate
, const char *name
, size_t len
)
400 struct symbol_t
*newp
;
402 newp
= (struct symbol_t
*) obstack_alloc (&collate
->mempool
, sizeof (*newp
));
404 newp
->name
= obstack_copy0 (&collate
->mempool
, name
, len
);
414 /* Test whether this name is already defined somewhere. */
416 check_duplicate (struct linereader
*ldfile
, struct locale_collate_t
*collate
,
417 const struct charmap_t
*charmap
,
418 struct repertoire_t
*repertoire
, const char *symbol
,
423 if (find_entry (&charmap
->char_table
, symbol
, symbol_len
, &ignore
) == 0)
425 lr_error (ldfile
, _("`%.*s' already defined in charmap"),
426 (int) symbol_len
, symbol
);
430 if (repertoire
!= NULL
431 && (find_entry (&repertoire
->char_table
, symbol
, symbol_len
, &ignore
)
434 lr_error (ldfile
, _("`%.*s' already defined in repertoire"),
435 (int) symbol_len
, symbol
);
439 if (find_entry (&collate
->sym_table
, symbol
, symbol_len
, &ignore
) == 0)
441 lr_error (ldfile
, _("`%.*s' already defined as collating symbol"),
442 (int) symbol_len
, symbol
);
446 if (find_entry (&collate
->elem_table
, symbol
, symbol_len
, &ignore
) == 0)
448 lr_error (ldfile
, _("`%.*s' already defined as collating element"),
449 (int) symbol_len
, symbol
);
457 /* Read the direction specification. */
459 read_directions (struct linereader
*ldfile
, struct token
*arg
,
460 const struct charmap_t
*charmap
,
461 struct repertoire_t
*repertoire
, struct localedef_t
*result
)
464 int max
= nrules
?: 10;
465 enum coll_sort_rule
*rules
= calloc (max
, sizeof (*rules
));
467 struct locale_collate_t
*collate
= result
->categories
[LC_COLLATE
].collate
;
473 if (arg
->tok
== tok_forward
)
475 if (rules
[cnt
] & sort_backward
)
479 lr_error (ldfile
, _("\
480 %s: `forward' and `backward' are mutually excluding each other"),
485 else if (rules
[cnt
] & sort_forward
)
489 lr_error (ldfile
, _("\
490 %s: `%s' mentioned more than once in definition of weight %d"),
491 "LC_COLLATE", "forward", cnt
+ 1);
495 rules
[cnt
] |= sort_forward
;
499 else if (arg
->tok
== tok_backward
)
501 if (rules
[cnt
] & sort_forward
)
505 lr_error (ldfile
, _("\
506 %s: `forward' and `backward' are mutually excluding each other"),
511 else if (rules
[cnt
] & sort_backward
)
515 lr_error (ldfile
, _("\
516 %s: `%s' mentioned more than once in definition of weight %d"),
517 "LC_COLLATE", "backward", cnt
+ 1);
521 rules
[cnt
] |= sort_backward
;
525 else if (arg
->tok
== tok_position
)
527 if (rules
[cnt
] & sort_position
)
531 lr_error (ldfile
, _("\
532 %s: `%s' mentioned more than once in definition of weight %d"),
533 "LC_COLLATE", "position", cnt
+ 1);
537 rules
[cnt
] |= sort_position
;
543 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
545 if (arg
->tok
== tok_eof
|| arg
->tok
== tok_eol
|| arg
->tok
== tok_comma
546 || arg
->tok
== tok_semicolon
)
548 if (! valid
&& ! warned
)
550 lr_error (ldfile
, _("%s: syntax error"), "LC_COLLATE");
554 /* See whether we have to increment the counter. */
555 if (arg
->tok
!= tok_comma
&& rules
[cnt
] != 0)
557 /* Add the default `forward' if we have seen only `position'. */
558 if (rules
[cnt
] == sort_position
)
559 rules
[cnt
] = sort_position
| sort_forward
;
564 if (arg
->tok
== tok_eof
|| arg
->tok
== tok_eol
)
565 /* End of line or file, so we exit the loop. */
570 /* See whether we have enough room in the array. */
574 rules
= (enum coll_sort_rule
*) xrealloc (rules
,
577 memset (&rules
[cnt
], '\0', (max
- cnt
) * sizeof (*rules
));
584 /* There must not be any more rule. */
587 lr_error (ldfile
, _("\
588 %s: too many rules; first entry only had %d"),
589 "LC_COLLATE", nrules
);
593 lr_ignore_rest (ldfile
, 0);
602 lr_error (ldfile
, _("%s: syntax error"), "LC_COLLATE");
607 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
612 /* Now we know how many rules we have. */
614 rules
= (enum coll_sort_rule
*) xrealloc (rules
,
615 nrules
* sizeof (*rules
));
621 /* Not enough rules in this specification. */
623 lr_error (ldfile
, _("%s: not enough sorting rules"), "LC_COLLATE");
626 rules
[cnt
] = sort_forward
;
627 while (++cnt
< nrules
);
631 collate
->current_section
->rules
= rules
;
635 static struct element_t
*
636 find_element (struct linereader
*ldfile
, struct locale_collate_t
*collate
,
637 const char *str
, size_t len
)
641 /* Search for the entries among the collation sequences already define. */
642 if (find_entry (&collate
->seq_table
, str
, len
, &result
) != 0)
644 /* Nope, not define yet. So we see whether it is a
648 if (find_entry (&collate
->sym_table
, str
, len
, &ptr
) == 0)
650 /* It's a collation symbol. */
651 struct symbol_t
*sym
= (struct symbol_t
*) ptr
;
655 result
= sym
->order
= new_element (collate
, NULL
, 0, NULL
,
658 else if (find_entry (&collate
->elem_table
, str
, len
, &result
) != 0)
660 /* It's also no collation element. So it is a character
661 element defined later. */
662 result
= new_element (collate
, NULL
, 0, NULL
, str
, len
, 1);
663 /* Insert it into the sequence table. */
664 insert_entry (&collate
->seq_table
, str
, len
, result
);
668 return (struct element_t
*) result
;
673 unlink_element (struct locale_collate_t
*collate
)
675 if (collate
->cursor
== collate
->start
)
677 assert (collate
->cursor
->next
== NULL
);
678 assert (collate
->cursor
->last
== NULL
);
679 collate
->cursor
= NULL
;
683 if (collate
->cursor
->next
!= NULL
)
684 collate
->cursor
->next
->last
= collate
->cursor
->last
;
685 if (collate
->cursor
->last
!= NULL
)
686 collate
->cursor
->last
->next
= collate
->cursor
->next
;
687 collate
->cursor
= collate
->cursor
->last
;
693 insert_weights (struct linereader
*ldfile
, struct element_t
*elem
,
694 const struct charmap_t
*charmap
,
695 struct repertoire_t
*repertoire
, struct localedef_t
*result
,
696 enum token_t ellipsis
)
700 struct locale_collate_t
*collate
= result
->categories
[LC_COLLATE
].collate
;
702 /* Initialize all the fields. */
703 elem
->file
= ldfile
->fname
;
704 elem
->line
= ldfile
->lineno
;
706 elem
->last
= collate
->cursor
;
707 elem
->next
= collate
->cursor
? collate
->cursor
->next
: NULL
;
708 if (collate
->cursor
!= NULL
&& collate
->cursor
->next
!= NULL
)
709 collate
->cursor
->next
->last
= elem
;
710 if (collate
->cursor
!= NULL
)
711 collate
->cursor
->next
= elem
;
712 if (collate
->start
== NULL
)
714 assert (collate
->cursor
== NULL
);
715 collate
->start
= elem
;
718 elem
->section
= collate
->current_section
;
720 if (collate
->current_section
->first
== NULL
)
721 collate
->current_section
->first
= elem
;
722 if (collate
->current_section
->last
== collate
->cursor
)
723 collate
->current_section
->last
= elem
;
725 collate
->cursor
= elem
;
727 elem
->weights
= (struct element_list_t
*)
728 obstack_alloc (&collate
->mempool
, nrules
* sizeof (struct element_list_t
));
729 memset (elem
->weights
, '\0', nrules
* sizeof (struct element_list_t
));
733 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
736 if (arg
->tok
== tok_eof
|| arg
->tok
== tok_eol
)
739 if (arg
->tok
== tok_ignore
)
741 /* The weight for this level has to be ignored. We use the
742 null pointer to indicate this. */
743 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
744 obstack_alloc (&collate
->mempool
, sizeof (struct element_t
*));
745 elem
->weights
[weight_cnt
].w
[0] = NULL
;
746 elem
->weights
[weight_cnt
].cnt
= 1;
748 else if (arg
->tok
== tok_bsymbol
|| arg
->tok
== tok_ucs4
)
751 struct element_t
*val
;
755 if (arg
->tok
== tok_bsymbol
)
757 symstr
= arg
->val
.str
.startmb
;
758 symlen
= arg
->val
.str
.lenmb
;
762 snprintf (ucs4str
, sizeof (ucs4str
), "U%08X", arg
->val
.ucs4
);
767 val
= find_element (ldfile
, collate
, symstr
, symlen
);
771 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
772 obstack_alloc (&collate
->mempool
, sizeof (struct element_t
*));
773 elem
->weights
[weight_cnt
].w
[0] = val
;
774 elem
->weights
[weight_cnt
].cnt
= 1;
776 else if (arg
->tok
== tok_string
)
778 /* Split the string up in the individual characters and put
779 the element definitions in the list. */
780 const char *cp
= arg
->val
.str
.startmb
;
782 struct element_t
*charelem
;
783 struct element_t
**weights
= NULL
;
788 lr_error (ldfile
, _("%s: empty weight string not allowed"),
790 lr_ignore_rest (ldfile
, 0);
798 /* Ahh, it's a bsymbol or an UCS4 value. If it's
799 the latter we have to unify the name. */
800 const char *startp
= ++cp
;
805 if (*cp
== ldfile
->escape_char
)
808 /* It's a syntax error. */
814 if (cp
- startp
== 5 && startp
[0] == 'U'
815 && isxdigit (startp
[1]) && isxdigit (startp
[2])
816 && isxdigit (startp
[3]) && isxdigit (startp
[4]))
818 unsigned int ucs4
= strtoul (startp
+ 1, NULL
, 16);
821 newstr
= (char *) xmalloc (10);
822 snprintf (newstr
, 10, "U%08X", ucs4
);
830 charelem
= find_element (ldfile
, collate
, startp
, len
);
835 /* People really shouldn't use characters directly in
836 the string. Especially since it's not really clear
837 what this means. We interpret all characters in the
838 string as if that would be bsymbols. Otherwise we
839 would have to match back to bsymbols somehow and this
840 is normally not what people normally expect. */
841 charelem
= find_element (ldfile
, collate
, cp
++, 1);
844 if (charelem
== NULL
)
846 /* We ignore the rest of the line. */
847 lr_ignore_rest (ldfile
, 0);
851 /* Add the pointer. */
854 struct element_t
**newp
;
856 newp
= (struct element_t
**)
857 alloca (max
* sizeof (struct element_t
*));
858 memcpy (newp
, weights
, cnt
* sizeof (struct element_t
*));
861 weights
[cnt
++] = charelem
;
865 /* Now store the information. */
866 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
867 obstack_alloc (&collate
->mempool
,
868 cnt
* sizeof (struct element_t
*));
869 memcpy (elem
->weights
[weight_cnt
].w
, weights
,
870 cnt
* sizeof (struct element_t
*));
871 elem
->weights
[weight_cnt
].cnt
= cnt
;
873 /* We don't need the string anymore. */
874 free (arg
->val
.str
.startmb
);
876 else if (ellipsis
!= tok_none
877 && (arg
->tok
== tok_ellipsis2
878 || arg
->tok
== tok_ellipsis3
879 || arg
->tok
== tok_ellipsis4
))
881 /* It must be the same ellipsis as used in the initial column. */
882 if (arg
->tok
!= ellipsis
)
883 lr_error (ldfile
, _("\
884 %s: weights must use the same ellipsis symbol as the name"),
887 /* The weight for this level will depend on the element
888 iterating over the range. Put a placeholder. */
889 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
890 obstack_alloc (&collate
->mempool
, sizeof (struct element_t
*));
891 elem
->weights
[weight_cnt
].w
[0] = ELEMENT_ELLIPSIS2
;
892 elem
->weights
[weight_cnt
].cnt
= 1;
897 /* It's a syntax error. */
898 lr_error (ldfile
, _("%s: syntax error"), "LC_COLLATE");
899 lr_ignore_rest (ldfile
, 0);
903 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
904 /* This better should be the end of the line or a semicolon. */
905 if (arg
->tok
== tok_semicolon
)
906 /* OK, ignore this and read the next token. */
907 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
908 else if (arg
->tok
!= tok_eof
&& arg
->tok
!= tok_eol
)
910 /* It's a syntax error. */
911 lr_error (ldfile
, _("%s: syntax error"), "LC_COLLATE");
912 lr_ignore_rest (ldfile
, 0);
916 while (++weight_cnt
< nrules
);
918 if (weight_cnt
< nrules
)
920 /* This means the rest of the line uses the current element as
924 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
925 obstack_alloc (&collate
->mempool
, sizeof (struct element_t
*));
926 if (ellipsis
== tok_none
)
927 elem
->weights
[weight_cnt
].w
[0] = elem
;
929 elem
->weights
[weight_cnt
].w
[0] = ELEMENT_ELLIPSIS2
;
930 elem
->weights
[weight_cnt
].cnt
= 1;
932 while (++weight_cnt
< nrules
);
936 if (arg
->tok
== tok_ignore
|| arg
->tok
== tok_bsymbol
)
938 /* Too many rule values. */
939 lr_error (ldfile
, _("%s: too many values"), "LC_COLLATE");
940 lr_ignore_rest (ldfile
, 0);
943 lr_ignore_rest (ldfile
, arg
->tok
!= tok_eol
&& arg
->tok
!= tok_eof
);
949 insert_value (struct linereader
*ldfile
, const char *symstr
, size_t symlen
,
950 const struct charmap_t
*charmap
, struct repertoire_t
*repertoire
,
951 struct localedef_t
*result
)
953 /* First find out what kind of symbol this is. */
956 struct element_t
*elem
= NULL
;
957 struct locale_collate_t
*collate
= result
->categories
[LC_COLLATE
].collate
;
959 /* Try to find the character in the charmap. */
960 seq
= charmap_find_value (charmap
, symstr
, symlen
);
962 /* Determine the wide character. */
963 if (seq
== NULL
|| seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
965 wc
= repertoire_find_value (repertoire
, symstr
, symlen
);
972 if (wc
== ILLEGAL_CHAR_VALUE
&& seq
== NULL
)
974 /* It's no character, so look through the collation elements and
977 if (find_entry (&collate
->elem_table
, symstr
, symlen
, &ptr
) != 0)
980 struct symbol_t
*sym
= NULL
;
982 /* It's also collation element. Therefore it's either a
983 collating symbol or it's a character which is not
984 supported by the character set. In the later case we
985 simply create a dummy entry. */
986 if (find_entry (&collate
->sym_table
, symstr
, symlen
, &result
) == 0)
988 /* It's a collation symbol. */
989 sym
= (struct symbol_t
*) result
;
996 elem
= new_element (collate
, NULL
, 0, NULL
, symstr
, symlen
, 0);
1001 /* Enter a fake element in the sequence table. This
1002 won't cause anything in the output since there is
1003 no multibyte or wide character associated with
1005 insert_entry (&collate
->seq_table
, symstr
, symlen
, elem
);
1009 /* Copy the result back. */
1014 /* Otherwise the symbols stands for a character. */
1016 if (find_entry (&collate
->seq_table
, symstr
, symlen
, &ptr
) != 0)
1018 uint32_t wcs
[2] = { wc
, 0 };
1020 /* We have to allocate an entry. */
1021 elem
= new_element (collate
,
1022 seq
!= NULL
? (char *) seq
->bytes
: NULL
,
1023 seq
!= NULL
? seq
->nbytes
: 0,
1024 wc
== ILLEGAL_CHAR_VALUE
? NULL
: wcs
,
1027 /* And add it to the table. */
1028 if (insert_entry (&collate
->seq_table
, symstr
, symlen
, elem
) != 0)
1029 /* This cannot happen. */
1030 assert (! "Internal error");
1034 /* Copy the result back. */
1037 /* Maybe the character was used before the definition. In this case
1038 we have to insert the byte sequences now. */
1039 if (elem
->mbs
== NULL
&& seq
!= NULL
)
1041 elem
->mbs
= obstack_copy0 (&collate
->mempool
,
1042 seq
->bytes
, seq
->nbytes
);
1043 elem
->nmbs
= seq
->nbytes
;
1046 if (elem
->wcs
== NULL
&& wc
!= ILLEGAL_CHAR_VALUE
)
1048 uint32_t wcs
[2] = { wc
, 0 };
1050 elem
->wcs
= obstack_copy (&collate
->mempool
, wcs
, sizeof (wcs
));
1056 /* Test whether this element is not already in the list. */
1057 if (elem
->next
!= NULL
|| elem
== collate
->cursor
)
1059 lr_error (ldfile
, _("order for `%.*s' already defined at %s:%Zu"),
1060 (int) symlen
, symstr
, elem
->file
, elem
->line
);
1061 lr_ignore_rest (ldfile
, 0);
1065 insert_weights (ldfile
, elem
, charmap
, repertoire
, result
, tok_none
);
1072 handle_ellipsis (struct linereader
*ldfile
, const char *symstr
, size_t symlen
,
1073 enum token_t ellipsis
, const struct charmap_t
*charmap
,
1074 struct repertoire_t
*repertoire
,
1075 struct localedef_t
*result
)
1077 struct element_t
*startp
;
1078 struct element_t
*endp
;
1079 struct locale_collate_t
*collate
= result
->categories
[LC_COLLATE
].collate
;
1081 /* Unlink the entry added for the ellipsis. */
1082 unlink_element (collate
);
1083 startp
= collate
->cursor
;
1085 /* Process and add the end-entry. */
1087 && insert_value (ldfile
, symstr
, symlen
, charmap
, repertoire
, result
))
1088 /* Something went wrong with inserting the to-value. This means
1089 we cannot process the ellipsis. */
1092 /* Reset the cursor. */
1093 collate
->cursor
= startp
;
1095 /* Now we have to handle many different situations:
1096 - we have to distinguish between the three different ellipsis forms
1097 - the is the ellipsis at the beginning, in the middle, or at the end.
1099 endp
= collate
->cursor
->next
;
1100 assert (symstr
== NULL
|| endp
!= NULL
);
1102 /* XXX The following is probably very wrong since also collating symbols
1103 can appear in ranges. But do we want/can refine the test for that? */
1105 /* Both, the start and the end symbol, must stand for characters. */
1106 if ((startp
!= NULL
&& (startp
->name
== NULL
|| ! startp
->is_character
))
1107 || (endp
!= NULL
&& (endp
->name
== NULL
|| ! endp
->is_character
)))
1109 lr_error (ldfile
, _("\
1110 %s: the start and the end symbol of a range must stand for characters"),
1116 if (ellipsis
== tok_ellipsis3
)
1118 /* One requirement we make here: the length of the byte
1119 sequences for the first and end character must be the same.
1120 This is mainly to prevent unwanted effects and this is often
1121 not what is wanted. */
1122 size_t len
= (startp
->mbs
!= NULL
? startp
->nmbs
1123 : (endp
->mbs
!= NULL
? endp
->nmbs
: 0));
1124 char mbcnt
[len
+ 1];
1125 char mbend
[len
+ 1];
1127 /* Well, this should be caught somewhere else already. Just to
1129 assert (startp
== NULL
|| startp
->wcs
== NULL
|| startp
->wcs
[1] == 0);
1130 assert (endp
== NULL
|| endp
->wcs
== NULL
|| endp
->wcs
[1] == 0);
1132 if (startp
!= NULL
&& endp
!= NULL
1133 && startp
->mbs
!= NULL
&& endp
->mbs
!= NULL
1134 && startp
->nmbs
!= endp
->nmbs
)
1136 lr_error (ldfile
, _("\
1137 %s: byte sequences of first and last character must have the same length"),
1142 /* Determine whether we have to generate multibyte sequences. */
1143 if ((startp
== NULL
|| startp
->mbs
!= NULL
)
1144 && (endp
== NULL
|| endp
->mbs
!= NULL
))
1149 /* Prepare the beginning byte sequence. This is either from the
1150 beginning byte sequence or it is all nulls if it was an
1151 initial ellipsis. */
1152 if (startp
== NULL
|| startp
->mbs
== NULL
)
1153 memset (mbcnt
, '\0', len
);
1156 memcpy (mbcnt
, startp
->mbs
, len
);
1158 /* And increment it so that the value is the first one we will
1160 for (cnt
= len
- 1; cnt
>= 0; --cnt
)
1161 if (++mbcnt
[cnt
] != '\0')
1166 /* And the end sequence. */
1167 if (endp
== NULL
|| endp
->mbs
== NULL
)
1168 memset (mbend
, '\0', len
);
1170 memcpy (mbend
, endp
->mbs
, len
);
1173 /* Test whether we have a correct range. */
1174 ret
= memcmp (mbcnt
, mbend
, len
);
1178 lr_error (ldfile
, _("%s: byte sequence of first character of \
1179 range is not lower than that of the last character"), "LC_COLLATE");
1183 /* Generate the byte sequences data. */
1186 struct charseq
*seq
;
1188 /* Quite a bit of work ahead. We have to find the character
1189 definition for the byte sequence and then determine the
1190 wide character belonging to it. */
1191 seq
= charmap_find_symbol (charmap
, mbcnt
, len
);
1194 struct element_t
*elem
;
1197 /* I don't think this can ever happen. */
1198 assert (seq
->name
!= NULL
);
1199 namelen
= strlen (seq
->name
);
1201 if (seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1202 seq
->ucs4
= repertoire_find_value (repertoire
, seq
->name
,
1205 /* Now we are ready to insert the new value in the
1206 sequence. Find out whether the element is
1209 if (find_entry (&collate
->seq_table
, seq
->name
, namelen
,
1212 uint32_t wcs
[2] = { seq
->ucs4
, 0 };
1214 /* We have to allocate an entry. */
1215 elem
= new_element (collate
, mbcnt
, len
,
1216 seq
->ucs4
== ILLEGAL_CHAR_VALUE
1217 ? NULL
: wcs
, seq
->name
,
1220 /* And add it to the table. */
1221 if (insert_entry (&collate
->seq_table
, seq
->name
,
1222 namelen
, elem
) != 0)
1223 /* This cannot happen. */
1224 assert (! "Internal error");
1227 /* Copy the result. */
1230 /* Test whether this element is not already in the list. */
1231 if (elem
->next
!= NULL
|| (collate
->cursor
!= NULL
1232 && elem
->next
== collate
->cursor
))
1234 lr_error (ldfile
, _("\
1235 order for `%.*s' already defined at %s:%Zu"),
1236 (int) namelen
, seq
->name
,
1237 elem
->file
, elem
->line
);
1241 /* Enqueue the new element. */
1242 elem
->last
= collate
->cursor
;
1243 if (collate
->cursor
== NULL
)
1247 elem
->next
= collate
->cursor
->next
;
1248 elem
->last
->next
= elem
;
1249 if (elem
->next
!= NULL
)
1250 elem
->next
->last
= elem
;
1252 if (collate
->start
== NULL
)
1254 assert (collate
->cursor
== NULL
);
1255 collate
->start
= elem
;
1257 collate
->cursor
= elem
;
1259 /* Add the weight value. We take them from the
1260 `ellipsis_weights' member of `collate'. */
1261 elem
->weights
= (struct element_list_t
*)
1262 obstack_alloc (&collate
->mempool
,
1263 nrules
* sizeof (struct element_list_t
));
1264 for (cnt
= 0; cnt
< nrules
; ++cnt
)
1265 if (collate
->ellipsis_weight
.weights
[cnt
].cnt
== 1
1266 && (collate
->ellipsis_weight
.weights
[cnt
].w
[0]
1267 == ELEMENT_ELLIPSIS2
))
1269 elem
->weights
[cnt
].w
= (struct element_t
**)
1270 obstack_alloc (&collate
->mempool
,
1271 sizeof (struct element_t
*));
1272 elem
->weights
[cnt
].w
[0] = elem
;
1273 elem
->weights
[cnt
].cnt
= 1;
1277 /* Simply use the weight from `ellipsis_weight'. */
1278 elem
->weights
[cnt
].w
=
1279 collate
->ellipsis_weight
.weights
[cnt
].w
;
1280 elem
->weights
[cnt
].cnt
=
1281 collate
->ellipsis_weight
.weights
[cnt
].cnt
;
1285 /* Increment for the next round. */
1287 for (cnt
= len
- 1; cnt
>= 0; --cnt
)
1288 if (++mbcnt
[cnt
] != '\0')
1291 /* Find out whether this was all. */
1292 if (cnt
< 0 || memcmp (mbcnt
, mbend
, len
) >= 0)
1293 /* Yep, that's all. */
1300 /* For symbolic range we naturally must have a beginning and an
1301 end specified by the user. */
1303 lr_error (ldfile
, _("\
1304 %s: symbolic range ellipsis must not directly follow `order_start'"),
1306 else if (endp
== NULL
)
1307 lr_error (ldfile
, _("\
1308 %s: symbolic range ellipsis must not be directly followed by `order_end'"),
1312 /* Determine the range. To do so we have to determine the
1313 common prefix of the both names and then the numeric
1314 values of both ends. */
1315 size_t lenfrom
= strlen (startp
->name
);
1316 size_t lento
= strlen (endp
->name
);
1317 char buf
[lento
+ 1];
1322 int base
= ellipsis
== tok_ellipsis2
? 16 : 10;
1324 if (lenfrom
!= lento
)
1327 lr_error (ldfile
, _("\
1328 `%s' and `%.*s' are not valid names for symbolic range"),
1329 startp
->name
, (int) lento
, endp
->name
);
1333 while (startp
->name
[preflen
] == endp
->name
[preflen
])
1334 if (startp
->name
[preflen
] == '\0')
1335 /* Nothing to be done. The start and end point are identical
1336 and while inserting the end point we have already given
1337 the user an error message. */
1343 from
= strtol (startp
->name
+ preflen
, &cp
, base
);
1344 if ((from
== UINT_MAX
&& errno
== ERANGE
) || *cp
!= '\0')
1348 to
= strtol (endp
->name
+ preflen
, &cp
, base
);
1349 if ((to
== UINT_MAX
&& errno
== ERANGE
) || *cp
!= '\0')
1352 /* Copy the prefix. */
1353 memcpy (buf
, startp
->name
, preflen
);
1355 /* Loop over all values. */
1356 for (++from
; from
< to
; ++from
)
1358 struct element_t
*elem
= NULL
;
1359 struct charseq
*seq
;
1363 /* Generate the name. */
1364 sprintf (buf
+ preflen
, base
== 10 ? "%0*ld" : "%0*lX",
1365 (int) (lenfrom
- preflen
), from
);
1367 /* Look whether this name is already defined. */
1369 if (find_entry (&collate
->seq_table
, buf
, symlen
, &ptr
) == 0)
1371 /* Copy back the result. */
1374 if (elem
->next
!= NULL
|| (collate
->cursor
!= NULL
1375 && elem
->next
== collate
->cursor
))
1377 lr_error (ldfile
, _("\
1378 %s: order for `%.*s' already defined at %s:%Zu"),
1379 "LC_COLLATE", (int) lenfrom
, buf
,
1380 elem
->file
, elem
->line
);
1384 if (elem
->name
== NULL
)
1386 lr_error (ldfile
, _("%s: `%s' must be a character"),
1392 if (elem
== NULL
|| (elem
->mbs
== NULL
&& elem
->wcs
== NULL
))
1394 /* Search for a character of this name. */
1395 seq
= charmap_find_value (charmap
, buf
, lenfrom
);
1396 if (seq
== NULL
|| seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1398 wc
= repertoire_find_value (repertoire
, buf
, lenfrom
);
1406 if (wc
== ILLEGAL_CHAR_VALUE
&& seq
== NULL
)
1407 /* We don't know anything about a character with this
1408 name. XXX Should we warn? */
1413 uint32_t wcs
[2] = { wc
, 0 };
1415 /* We have to allocate an entry. */
1416 elem
= new_element (collate
,
1418 ? (char *) seq
->bytes
: NULL
,
1419 seq
!= NULL
? seq
->nbytes
: 0,
1420 wc
== ILLEGAL_CHAR_VALUE
1421 ? NULL
: wcs
, buf
, lenfrom
, 1);
1425 /* Update the element. */
1428 elem
->mbs
= obstack_copy0 (&collate
->mempool
,
1429 seq
->bytes
, seq
->nbytes
);
1430 elem
->nmbs
= seq
->nbytes
;
1433 if (wc
!= ILLEGAL_CHAR_VALUE
)
1437 obstack_grow (&collate
->mempool
,
1438 &wc
, sizeof (uint32_t));
1439 obstack_grow (&collate
->mempool
,
1440 &zero
, sizeof (uint32_t));
1441 elem
->wcs
= obstack_finish (&collate
->mempool
);
1446 elem
->file
= ldfile
->fname
;
1447 elem
->line
= ldfile
->lineno
;
1448 elem
->section
= collate
->current_section
;
1451 /* Enqueue the new element. */
1452 elem
->last
= collate
->cursor
;
1453 elem
->next
= collate
->cursor
->next
;
1454 elem
->last
->next
= elem
;
1455 if (elem
->next
!= NULL
)
1456 elem
->next
->last
= elem
;
1457 collate
->cursor
= elem
;
1459 /* Now add the weights. They come from the `ellipsis_weights'
1460 member of `collate'. */
1461 elem
->weights
= (struct element_list_t
*)
1462 obstack_alloc (&collate
->mempool
,
1463 nrules
* sizeof (struct element_list_t
));
1464 for (cnt
= 0; cnt
< nrules
; ++cnt
)
1465 if (collate
->ellipsis_weight
.weights
[cnt
].cnt
== 1
1466 && (collate
->ellipsis_weight
.weights
[cnt
].w
[0]
1467 == ELEMENT_ELLIPSIS2
))
1469 elem
->weights
[cnt
].w
= (struct element_t
**)
1470 obstack_alloc (&collate
->mempool
,
1471 sizeof (struct element_t
*));
1472 elem
->weights
[cnt
].w
[0] = elem
;
1473 elem
->weights
[cnt
].cnt
= 1;
1477 /* Simly use the weight from `ellipsis_weight'. */
1478 elem
->weights
[cnt
].w
=
1479 collate
->ellipsis_weight
.weights
[cnt
].w
;
1480 elem
->weights
[cnt
].cnt
=
1481 collate
->ellipsis_weight
.weights
[cnt
].cnt
;
1490 collate_startup (struct linereader
*ldfile
, struct localedef_t
*locale
,
1491 struct localedef_t
*copy_locale
, int ignore_content
)
1493 if (!ignore_content
&& locale
->categories
[LC_COLLATE
].collate
== NULL
)
1495 struct locale_collate_t
*collate
;
1497 if (copy_locale
== NULL
)
1499 collate
= locale
->categories
[LC_COLLATE
].collate
=
1500 (struct locale_collate_t
*)
1501 xcalloc (1, sizeof (struct locale_collate_t
));
1503 /* Init the various data structures. */
1504 init_hash (&collate
->elem_table
, 100);
1505 init_hash (&collate
->sym_table
, 100);
1506 init_hash (&collate
->seq_table
, 500);
1507 obstack_init (&collate
->mempool
);
1509 collate
->col_weight_max
= -1;
1512 /* Reuse the copy_locale's data structures. */
1513 collate
= locale
->categories
[LC_COLLATE
].collate
=
1514 copy_locale
->categories
[LC_COLLATE
].collate
;
1517 ldfile
->translate_strings
= 0;
1518 ldfile
->return_widestr
= 0;
1523 collate_finish (struct localedef_t
*locale
, const struct charmap_t
*charmap
)
1525 /* Now is the time when we can assign the individual collation
1526 values for all the symbols. We have possibly different values
1527 for the wide- and the multibyte-character symbols. This is done
1528 since it might make a difference in the encoding if there is in
1529 some cases no multibyte-character but there are wide-characters.
1530 (The other way around it is not important since theencoded
1531 collation value in the wide-character case is 32 bits wide and
1532 therefore requires no encoding).
1534 The lowest collation value assigned is 2. Zero is reserved for
1535 the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm'
1536 functions and 1 is used to separate the individual passes for the
1539 We also have to construct is list with all the bytes/words which
1540 can come first in a sequence, followed by all the elements which
1541 also start with this byte/word. The order is reverse which has
1542 among others the important effect that longer strings are located
1543 first in the list. This is required for the output data since
1544 the algorithm used in `strcoll' etc depends on this.
1546 The multibyte case is easy. We simply sort into an array with
1548 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1553 struct element_t
*runp
;
1555 int need_undefined
= 0;
1556 struct section_list
*sect
;
1558 int nr_wide_elems
= 0;
1560 if (collate
== NULL
)
1562 /* No data, no check. Issue a warning. */
1563 record_warning (_("No definition for %s category found"),
1568 /* If this assertion is hit change the type in `element_t'. */
1569 assert (nrules
<= sizeof (runp
->used_in_level
) * 8);
1571 /* Make sure that the `position' rule is used either in all sections
1573 for (i
= 0; i
< nrules
; ++i
)
1574 for (sect
= collate
->sections
; sect
!= NULL
; sect
= sect
->next
)
1575 if (sect
!= collate
->current_section
1576 && sect
->rules
!= NULL
1577 && ((sect
->rules
[i
] & sort_position
)
1578 != (collate
->current_section
->rules
[i
] & sort_position
)))
1580 record_error (0, 0, _("\
1581 %s: `position' must be used for a specific level in all sections or none"),
1586 /* Find out which elements are used at which level. At the same
1587 time we find out whether we have any undefined symbols. */
1588 runp
= collate
->start
;
1589 while (runp
!= NULL
)
1591 if (runp
->mbs
!= NULL
)
1593 for (i
= 0; i
< nrules
; ++i
)
1597 for (j
= 0; j
< runp
->weights
[i
].cnt
; ++j
)
1598 /* A NULL pointer as the weight means IGNORE. */
1599 if (runp
->weights
[i
].w
[j
] != NULL
)
1601 if (runp
->weights
[i
].w
[j
]->weights
== NULL
)
1603 record_error_at_line (0, 0, runp
->file
, runp
->line
,
1604 _("symbol `%s' not defined"),
1605 runp
->weights
[i
].w
[j
]->name
);
1608 runp
->weights
[i
].w
[j
] = &collate
->undefined
;
1611 /* Set the bit for the level. */
1612 runp
->weights
[i
].w
[j
]->used_in_level
|= 1 << i
;
1617 /* Up to the next entry. */
1621 /* Walk through the list of defined sequences and assign weights. Also
1622 create the data structure which will allow generating the single byte
1623 character based tables.
1625 Since at each time only the weights for each of the rules are
1626 only compared to other weights for this rule it is possible to
1627 assign more compact weight values than simply counting all
1628 weights in sequence. We can assign weights from 3, one for each
1629 rule individually and only for those elements, which are actually
1632 Why is this important? It is not for the wide char table. But
1633 it is for the singlebyte output since here larger numbers have to
1634 be encoded to make it possible to emit the value as a byte
1636 for (i
= 0; i
< nrules
; ++i
)
1641 runp
= collate
->start
;
1642 while (runp
!= NULL
)
1644 /* Determine the order. */
1645 if (runp
->used_in_level
!= 0)
1647 runp
->mborder
= (int *) obstack_alloc (&collate
->mempool
,
1648 nrules
* sizeof (int));
1650 for (i
= 0; i
< nrules
; ++i
)
1651 if ((runp
->used_in_level
& (1 << i
)) != 0)
1652 runp
->mborder
[i
] = mbact
[i
]++;
1654 runp
->mborder
[i
] = 0;
1657 if (runp
->mbs
!= NULL
)
1659 struct element_t
**eptr
;
1660 struct element_t
*lastp
= NULL
;
1662 /* Find the point where to insert in the list. */
1663 eptr
= &collate
->mbheads
[((unsigned char *) runp
->mbs
)[0]];
1664 while (*eptr
!= NULL
)
1666 if ((*eptr
)->nmbs
< runp
->nmbs
)
1669 if ((*eptr
)->nmbs
== runp
->nmbs
)
1671 int c
= memcmp ((*eptr
)->mbs
, runp
->mbs
, runp
->nmbs
);
1675 /* This should not happen. It means that we have
1676 to symbols with the same byte sequence. It is
1677 of course an error. */
1678 record_error_at_line (0, 0, (*eptr
)->file
,
1681 symbol `%s' has the same encoding as"), (*eptr
)->name
);
1683 record_error_at_line (0, 0, runp
->file
, runp
->line
,
1684 _("symbol `%s'"), runp
->name
);
1688 /* Insert it here. */
1692 /* To the next entry. */
1694 eptr
= &(*eptr
)->mbnext
;
1697 /* Set the pointers. */
1698 runp
->mbnext
= *eptr
;
1699 runp
->mblast
= lastp
;
1701 (*eptr
)->mblast
= runp
;
1707 if (runp
->used_in_level
)
1709 runp
->wcorder
= wcact
++;
1711 /* We take the opportunity to count the elements which have
1716 if (runp
->is_character
)
1718 if (runp
->nmbs
== 1)
1719 collate
->mbseqorder
[((unsigned char *) runp
->mbs
)[0]] = mbseqact
++;
1721 runp
->wcseqorder
= wcseqact
++;
1723 else if (runp
->mbs
!= NULL
&& runp
->weights
!= NULL
)
1724 /* This is for collation elements. */
1725 runp
->wcseqorder
= wcseqact
++;
1727 /* Up to the next entry. */
1731 /* Find out whether any of the `mbheads' entries is unset. In this
1732 case we use the UNDEFINED entry. */
1733 for (i
= 1; i
< 256; ++i
)
1734 if (collate
->mbheads
[i
] == NULL
)
1737 collate
->mbheads
[i
] = &collate
->undefined
;
1740 /* Now to the wide character case. */
1741 collate
->wcheads
.p
= 6;
1742 collate
->wcheads
.q
= 10;
1743 wchead_table_init (&collate
->wcheads
);
1745 collate
->wcseqorder
.p
= 6;
1746 collate
->wcseqorder
.q
= 10;
1747 collseq_table_init (&collate
->wcseqorder
);
1750 runp
= collate
->start
;
1751 while (runp
!= NULL
)
1753 if (runp
->wcs
!= NULL
)
1755 struct element_t
*e
;
1756 struct element_t
**eptr
;
1757 struct element_t
*lastp
;
1759 /* Insert the collation sequence value. */
1760 if (runp
->is_character
)
1761 collseq_table_add (&collate
->wcseqorder
, runp
->wcs
[0],
1764 /* Find the point where to insert in the list. */
1765 e
= wchead_table_get (&collate
->wcheads
, runp
->wcs
[0]);
1768 while (*eptr
!= NULL
)
1770 if ((*eptr
)->nwcs
< runp
->nwcs
)
1773 if ((*eptr
)->nwcs
== runp
->nwcs
)
1775 int c
= wmemcmp ((wchar_t *) (*eptr
)->wcs
,
1776 (wchar_t *) runp
->wcs
, runp
->nwcs
);
1780 /* This should not happen. It means that we have
1781 two symbols with the same byte sequence. It is
1782 of course an error. */
1783 record_error_at_line (0, 0, (*eptr
)->file
,
1786 symbol `%s' has the same encoding as"), (*eptr
)->name
);
1788 record_error_at_line (0, 0, runp
->file
, runp
->line
,
1789 _("symbol `%s'"), runp
->name
);
1793 /* Insert it here. */
1797 /* To the next entry. */
1799 eptr
= &(*eptr
)->wcnext
;
1802 /* Set the pointers. */
1803 runp
->wcnext
= *eptr
;
1804 runp
->wclast
= lastp
;
1806 (*eptr
)->wclast
= runp
;
1809 wchead_table_add (&collate
->wcheads
, runp
->wcs
[0], e
);
1814 /* Up to the next entry. */
1818 /* Now determine whether the UNDEFINED entry is needed and if yes,
1819 whether it was defined. */
1820 collate
->undefined
.used_in_level
= need_undefined
? ~0ul : 0;
1821 if (collate
->undefined
.file
== NULL
)
1825 /* This seems not to be enforced by recent standards. Don't
1826 emit an error, simply append UNDEFINED at the end. */
1827 collate
->undefined
.mborder
=
1828 (int *) obstack_alloc (&collate
->mempool
, nrules
* sizeof (int));
1830 for (i
= 0; i
< nrules
; ++i
)
1831 collate
->undefined
.mborder
[i
] = mbact
[i
]++;
1834 /* In any case we will need the definition for the wide character
1835 case. But we will not complain that it is missing since the
1836 specification strangely enough does not seem to account for
1838 collate
->undefined
.wcorder
= wcact
++;
1841 /* Finally, try to unify the rules for the sections. Whenever the rules
1842 for a section are the same as those for another section give the
1843 ruleset the same index. Since there are never many section we can
1844 use an O(n^2) algorithm here. */
1845 sect
= collate
->sections
;
1846 while (sect
!= NULL
&& sect
->rules
== NULL
)
1849 /* Bail out if we have no sections because of earlier errors. */
1852 record_error (EXIT_FAILURE
, 0, _("too many errors; giving up"));
1859 struct section_list
*osect
= collate
->sections
;
1861 while (osect
!= sect
)
1862 if (osect
->rules
!= NULL
1863 && memcmp (osect
->rules
, sect
->rules
,
1864 nrules
* sizeof (osect
->rules
[0])) == 0)
1867 osect
= osect
->next
;
1870 sect
->ruleidx
= ruleidx
++;
1872 sect
->ruleidx
= osect
->ruleidx
;
1877 while (sect
!= NULL
&& sect
->rules
== NULL
);
1879 while (sect
!= NULL
);
1880 /* We are currently not prepared for more than 128 rulesets. But this
1881 should never really be a problem. */
1882 assert (ruleidx
<= 128);
1887 output_weight (struct obstack
*pool
, struct locale_collate_t
*collate
,
1888 struct element_t
*elem
)
1893 /* Optimize the use of UNDEFINED. */
1894 if (elem
== &collate
->undefined
)
1895 /* The weights are already inserted. */
1898 /* This byte can start exactly one collation element and this is
1899 a single byte. We can directly give the index to the weights. */
1900 retval
= obstack_object_size (pool
);
1902 /* Construct the weight. */
1903 for (cnt
= 0; cnt
< nrules
; ++cnt
)
1905 char buf
[elem
->weights
[cnt
].cnt
* 7];
1909 for (i
= 0; i
< elem
->weights
[cnt
].cnt
; ++i
)
1910 /* Encode the weight value. We do nothing for IGNORE entries. */
1911 if (elem
->weights
[cnt
].w
[i
] != NULL
)
1912 len
+= utf8_encode (&buf
[len
],
1913 elem
->weights
[cnt
].w
[i
]->mborder
[cnt
]);
1915 /* And add the buffer content. */
1916 obstack_1grow (pool
, len
);
1917 obstack_grow (pool
, buf
, len
);
1920 return retval
| ((elem
->section
->ruleidx
& 0x7f) << 24);
1925 output_weightwc (struct obstack
*pool
, struct locale_collate_t
*collate
,
1926 struct element_t
*elem
)
1931 /* Optimize the use of UNDEFINED. */
1932 if (elem
== &collate
->undefined
)
1933 /* The weights are already inserted. */
1936 /* This byte can start exactly one collation element and this is
1937 a single byte. We can directly give the index to the weights. */
1938 retval
= obstack_object_size (pool
) / sizeof (int32_t);
1940 /* Construct the weight. */
1941 for (cnt
= 0; cnt
< nrules
; ++cnt
)
1943 int32_t buf
[elem
->weights
[cnt
].cnt
];
1947 for (i
= 0, j
= 0; i
< elem
->weights
[cnt
].cnt
; ++i
)
1948 if (elem
->weights
[cnt
].w
[i
] != NULL
)
1949 buf
[j
++] = elem
->weights
[cnt
].w
[i
]->wcorder
;
1951 /* And add the buffer content. */
1952 obstack_int32_grow (pool
, j
);
1954 obstack_grow (pool
, buf
, j
* sizeof (int32_t));
1955 maybe_swap_uint32_obstack (pool
, j
);
1958 return retval
| ((elem
->section
->ruleidx
& 0x7f) << 24);
1961 /* If localedef is every threaded, this would need to be __thread var. */
1964 struct obstack
*weightpool
;
1965 struct obstack
*extrapool
;
1966 struct obstack
*indpool
;
1967 struct locale_collate_t
*collate
;
1968 struct collidx_table
*tablewc
;
1971 static void add_to_tablewc (uint32_t ch
, struct element_t
*runp
);
1974 add_to_tablewc (uint32_t ch
, struct element_t
*runp
)
1976 if (runp
->wcnext
== NULL
&& runp
->nwcs
== 1)
1978 int32_t weigthidx
= output_weightwc (atwc
.weightpool
, atwc
.collate
,
1980 collidx_table_add (atwc
.tablewc
, ch
, weigthidx
);
1984 /* As for the singlebyte table, we recognize sequences and
1987 collidx_table_add (atwc
.tablewc
, ch
,
1988 -(obstack_object_size (atwc
.extrapool
)
1989 / sizeof (uint32_t)));
1993 /* Store the current index in the weight table. We know that
1994 the current position in the `extrapool' is aligned on a
1999 /* Find out wether this is a single entry or we have more than
2000 one consecutive entry. */
2001 if (runp
->wcnext
!= NULL
2002 && runp
->nwcs
== runp
->wcnext
->nwcs
2003 && wmemcmp ((wchar_t *) runp
->wcs
,
2004 (wchar_t *)runp
->wcnext
->wcs
,
2005 runp
->nwcs
- 1) == 0
2006 && (runp
->wcs
[runp
->nwcs
- 1]
2007 == runp
->wcnext
->wcs
[runp
->nwcs
- 1] + 1))
2010 struct element_t
*series_startp
= runp
;
2011 struct element_t
*curp
;
2013 /* Now add first the initial byte sequence. */
2014 added
= (1 + 1 + 2 * (runp
->nwcs
- 1)) * sizeof (int32_t);
2015 if (sizeof (int32_t) == sizeof (int))
2016 obstack_make_room (atwc
.extrapool
, added
);
2018 /* More than one consecutive entry. We mark this by having
2019 a negative index into the indirect table. */
2020 obstack_int32_grow_fast (atwc
.extrapool
,
2021 -(obstack_object_size (atwc
.indpool
)
2022 / sizeof (int32_t)));
2023 obstack_int32_grow_fast (atwc
.extrapool
, runp
->nwcs
- 1);
2026 runp
= runp
->wcnext
;
2027 while (runp
->wcnext
!= NULL
2028 && runp
->nwcs
== runp
->wcnext
->nwcs
2029 && wmemcmp ((wchar_t *) runp
->wcs
,
2030 (wchar_t *)runp
->wcnext
->wcs
,
2031 runp
->nwcs
- 1) == 0
2032 && (runp
->wcs
[runp
->nwcs
- 1]
2033 == runp
->wcnext
->wcs
[runp
->nwcs
- 1] + 1));
2035 /* Now walk backward from here to the beginning. */
2038 for (i
= 1; i
< runp
->nwcs
; ++i
)
2039 obstack_int32_grow_fast (atwc
.extrapool
, curp
->wcs
[i
]);
2041 /* Now find the end of the consecutive sequence and
2042 add all the indeces in the indirect pool. */
2045 weightidx
= output_weightwc (atwc
.weightpool
, atwc
.collate
,
2047 obstack_int32_grow (atwc
.indpool
, weightidx
);
2049 curp
= curp
->wclast
;
2051 while (curp
!= series_startp
);
2053 /* Add the final weight. */
2054 weightidx
= output_weightwc (atwc
.weightpool
, atwc
.collate
,
2056 obstack_int32_grow (atwc
.indpool
, weightidx
);
2058 /* And add the end byte sequence. Without length this
2060 for (i
= 1; i
< curp
->nwcs
; ++i
)
2061 obstack_int32_grow (atwc
.extrapool
, curp
->wcs
[i
]);
2065 /* A single entry. Simply add the index and the length and
2066 string (except for the first character which is already
2070 /* Output the weight info. */
2071 weightidx
= output_weightwc (atwc
.weightpool
, atwc
.collate
,
2074 assert (runp
->nwcs
> 0);
2075 added
= (1 + 1 + runp
->nwcs
- 1) * sizeof (int32_t);
2076 if (sizeof (int) == sizeof (int32_t))
2077 obstack_make_room (atwc
.extrapool
, added
);
2079 obstack_int32_grow_fast (atwc
.extrapool
, weightidx
);
2080 obstack_int32_grow_fast (atwc
.extrapool
, runp
->nwcs
- 1);
2081 for (i
= 1; i
< runp
->nwcs
; ++i
)
2082 obstack_int32_grow_fast (atwc
.extrapool
, runp
->wcs
[i
]);
2086 runp
= runp
->wcnext
;
2088 while (runp
!= NULL
);
2093 collate_output (struct localedef_t
*locale
, const struct charmap_t
*charmap
,
2094 const char *output_path
)
2096 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
2097 const size_t nelems
= _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE
);
2098 struct locale_file file
;
2100 int32_t tablemb
[256];
2101 struct obstack weightpool
;
2102 struct obstack extrapool
;
2103 struct obstack indirectpool
;
2104 struct section_list
*sect
;
2105 struct collidx_table tablewc
;
2107 uint32_t *elem_table
;
2109 struct element_t
*runp
;
2111 init_locale_data (&file
, nelems
);
2112 add_locale_uint32 (&file
, nrules
);
2114 /* If we have no LC_COLLATE data emit only the number of rules as zero. */
2115 if (collate
== NULL
)
2118 for (idx
= 1; idx
< nelems
; idx
++)
2120 /* The words have to be handled specially. */
2121 if (idx
== _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB
))
2122 add_locale_uint32 (&file
, 0);
2124 add_locale_empty (&file
);
2126 write_locale_data (output_path
, LC_COLLATE
, "LC_COLLATE", &file
);
2130 obstack_init (&weightpool
);
2131 obstack_init (&extrapool
);
2132 obstack_init (&indirectpool
);
2134 /* Since we are using the sign of an integer to mark indirection the
2135 offsets in the arrays we are indirectly referring to must not be
2136 zero since -0 == 0. Therefore we add a bit of dummy content. */
2137 obstack_int32_grow (&extrapool
, 0);
2138 obstack_int32_grow (&indirectpool
, 0);
2140 /* Prepare the ruleset table. */
2141 for (sect
= collate
->sections
, i
= 0; sect
!= NULL
; sect
= sect
->next
)
2142 if (sect
->rules
!= NULL
&& sect
->ruleidx
== i
)
2146 obstack_make_room (&weightpool
, nrules
);
2148 for (j
= 0; j
< nrules
; ++j
)
2149 obstack_1grow_fast (&weightpool
, sect
->rules
[j
]);
2152 /* And align the output. */
2153 i
= (nrules
* i
) % LOCFILE_ALIGN
;
2156 obstack_1grow (&weightpool
, '\0');
2157 while (++i
< LOCFILE_ALIGN
);
2159 add_locale_raw_obstack (&file
, &weightpool
);
2161 /* Generate the 8-bit table. Walk through the lists of sequences
2162 starting with the same byte and add them one after the other to
2163 the table. In case we have more than one sequence starting with
2164 the same byte we have to use extra indirection.
2166 First add a record for the NUL byte. This entry will never be used
2167 so it does not matter. */
2170 /* Now insert the `UNDEFINED' value if it is used. Since this value
2171 will probably be used more than once it is good to store the
2172 weights only once. */
2173 if (collate
->undefined
.used_in_level
!= 0)
2174 output_weight (&weightpool
, collate
, &collate
->undefined
);
2176 for (ch
= 1; ch
< 256; ++ch
)
2177 if (collate
->mbheads
[ch
]->mbnext
== NULL
2178 && collate
->mbheads
[ch
]->nmbs
<= 1)
2180 tablemb
[ch
] = output_weight (&weightpool
, collate
,
2181 collate
->mbheads
[ch
]);
2185 /* The entries in the list are sorted by length and then
2186 alphabetically. This is the order in which we will add the
2187 elements to the collation table. This allows simply walking
2188 the table in sequence and stopping at the first matching
2189 entry. Since the longer sequences are coming first in the
2190 list they have the possibility to match first, just as it
2191 has to be. In the worst case we are walking to the end of
2192 the list where we put, if no singlebyte sequence is defined
2193 in the locale definition, the weights for UNDEFINED.
2195 To reduce the length of the search list we compress them a bit.
2196 This happens by collecting sequences of consecutive byte
2197 sequences in one entry (having and begin and end byte sequence)
2198 and add only one index into the weight table. We can find the
2199 consecutive entries since they are also consecutive in the list. */
2200 struct element_t
*runp
= collate
->mbheads
[ch
];
2201 struct element_t
*lastp
;
2203 assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool
)));
2205 tablemb
[ch
] = -obstack_object_size (&extrapool
);
2209 /* Store the current index in the weight table. We know that
2210 the current position in the `extrapool' is aligned on a
2215 /* Find out wether this is a single entry or we have more than
2216 one consecutive entry. */
2217 if (runp
->mbnext
!= NULL
2218 && runp
->nmbs
== runp
->mbnext
->nmbs
2219 && memcmp (runp
->mbs
, runp
->mbnext
->mbs
, runp
->nmbs
- 1) == 0
2220 && (runp
->mbs
[runp
->nmbs
- 1]
2221 == runp
->mbnext
->mbs
[runp
->nmbs
- 1] + 1))
2224 struct element_t
*series_startp
= runp
;
2225 struct element_t
*curp
;
2227 /* Compute how much space we will need. */
2228 added
= LOCFILE_ALIGN_UP (sizeof (int32_t) + 1
2229 + 2 * (runp
->nmbs
- 1));
2230 assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool
)));
2231 obstack_make_room (&extrapool
, added
);
2233 /* More than one consecutive entry. We mark this by having
2234 a negative index into the indirect table. */
2235 obstack_int32_grow_fast (&extrapool
,
2236 -(obstack_object_size (&indirectpool
)
2237 / sizeof (int32_t)));
2239 /* Now search first the end of the series. */
2241 runp
= runp
->mbnext
;
2242 while (runp
->mbnext
!= NULL
2243 && runp
->nmbs
== runp
->mbnext
->nmbs
2244 && memcmp (runp
->mbs
, runp
->mbnext
->mbs
,
2245 runp
->nmbs
- 1) == 0
2246 && (runp
->mbs
[runp
->nmbs
- 1]
2247 == runp
->mbnext
->mbs
[runp
->nmbs
- 1] + 1));
2249 /* Now walk backward from here to the beginning. */
2252 assert (runp
->nmbs
<= 256);
2253 obstack_1grow_fast (&extrapool
, curp
->nmbs
- 1);
2254 for (i
= 1; i
< curp
->nmbs
; ++i
)
2255 obstack_1grow_fast (&extrapool
, curp
->mbs
[i
]);
2257 /* Now find the end of the consecutive sequence and
2258 add all the indeces in the indirect pool. */
2261 weightidx
= output_weight (&weightpool
, collate
, curp
);
2262 obstack_int32_grow (&indirectpool
, weightidx
);
2264 curp
= curp
->mblast
;
2266 while (curp
!= series_startp
);
2268 /* Add the final weight. */
2269 weightidx
= output_weight (&weightpool
, collate
, curp
);
2270 obstack_int32_grow (&indirectpool
, weightidx
);
2272 /* And add the end byte sequence. Without length this
2274 for (i
= 1; i
< curp
->nmbs
; ++i
)
2275 obstack_1grow_fast (&extrapool
, curp
->mbs
[i
]);
2279 /* A single entry. Simply add the index and the length and
2280 string (except for the first character which is already
2284 /* Output the weight info. */
2285 weightidx
= output_weight (&weightpool
, collate
, runp
);
2287 added
= LOCFILE_ALIGN_UP (sizeof (int32_t) + 1
2289 assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool
)));
2290 obstack_make_room (&extrapool
, added
);
2292 obstack_int32_grow_fast (&extrapool
, weightidx
);
2293 assert (runp
->nmbs
<= 256);
2294 obstack_1grow_fast (&extrapool
, runp
->nmbs
- 1);
2296 for (i
= 1; i
< runp
->nmbs
; ++i
)
2297 obstack_1grow_fast (&extrapool
, runp
->mbs
[i
]);
2300 /* Add alignment bytes if necessary. */
2301 while (!LOCFILE_ALIGNED_P (obstack_object_size (&extrapool
)))
2302 obstack_1grow_fast (&extrapool
, '\0');
2306 runp
= runp
->mbnext
;
2308 while (runp
!= NULL
);
2310 assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool
)));
2312 /* If the final entry in the list is not a single character we
2313 add an UNDEFINED entry here. */
2314 if (lastp
->nmbs
!= 1)
2316 int added
= LOCFILE_ALIGN_UP (sizeof (int32_t) + 1 + 1);
2317 obstack_make_room (&extrapool
, added
);
2319 obstack_int32_grow_fast (&extrapool
, 0);
2320 /* XXX What rule? We just pick the first. */
2321 obstack_1grow_fast (&extrapool
, 0);
2322 /* Length is zero. */
2323 obstack_1grow_fast (&extrapool
, 0);
2325 /* Add alignment bytes if necessary. */
2326 while (!LOCFILE_ALIGNED_P (obstack_object_size (&extrapool
)))
2327 obstack_1grow_fast (&extrapool
, '\0');
2331 /* Add padding to the tables if necessary. */
2332 while (!LOCFILE_ALIGNED_P (obstack_object_size (&weightpool
)))
2333 obstack_1grow (&weightpool
, 0);
2335 /* Now add the four tables. */
2336 add_locale_uint32_array (&file
, (const uint32_t *) tablemb
, 256);
2337 add_locale_raw_obstack (&file
, &weightpool
);
2338 add_locale_raw_obstack (&file
, &extrapool
);
2339 add_locale_raw_obstack (&file
, &indirectpool
);
2341 /* Now the same for the wide character table. We need to store some
2342 more information here. */
2343 add_locale_empty (&file
);
2344 add_locale_empty (&file
);
2345 add_locale_empty (&file
);
2347 /* Since we are using the sign of an integer to mark indirection the
2348 offsets in the arrays we are indirectly referring to must not be
2349 zero since -0 == 0. Therefore we add a bit of dummy content. */
2350 obstack_int32_grow (&extrapool
, 0);
2351 obstack_int32_grow (&indirectpool
, 0);
2353 /* Now insert the `UNDEFINED' value if it is used. Since this value
2354 will probably be used more than once it is good to store the
2355 weights only once. */
2356 if (output_weightwc (&weightpool
, collate
, &collate
->undefined
) != 0)
2359 /* Generate the table. Walk through the lists of sequences starting
2360 with the same wide character and add them one after the other to
2361 the table. In case we have more than one sequence starting with
2362 the same byte we have to use extra indirection. */
2365 collidx_table_init (&tablewc
);
2367 atwc
.weightpool
= &weightpool
;
2368 atwc
.extrapool
= &extrapool
;
2369 atwc
.indpool
= &indirectpool
;
2370 atwc
.collate
= collate
;
2371 atwc
.tablewc
= &tablewc
;
2373 wchead_table_iterate (&collate
->wcheads
, add_to_tablewc
);
2375 memset (&atwc
, 0, sizeof (atwc
));
2377 /* Now add the four tables. */
2378 add_locale_collidx_table (&file
, &tablewc
);
2379 add_locale_raw_obstack (&file
, &weightpool
);
2380 add_locale_raw_obstack (&file
, &extrapool
);
2381 add_locale_raw_obstack (&file
, &indirectpool
);
2383 /* Finally write the table with collation element names out. It is
2384 a hash table with a simple function which gets the name of the
2385 character as the input. One character might have many names. The
2386 value associated with the name is an index into the weight table
2387 where we are then interested in the first-level weight value.
2389 To determine how large the table should be we are counting the
2390 elements have to put in. Since we are using internal chaining
2391 using a secondary hash function we have to make the table a bit
2392 larger to avoid extremely long search times. We can achieve
2393 good results with a 40% larger table than there are entries. */
2395 runp
= collate
->start
;
2396 while (runp
!= NULL
)
2398 if (runp
->mbs
!= NULL
&& runp
->weights
!= NULL
&& !runp
->is_character
)
2399 /* Yep, the element really counts. */
2404 /* Add 50% and find the next prime number. */
2405 elem_size
= next_prime (elem_size
+ (elem_size
>> 1));
2407 /* Allocate the table. Each entry consists of two words: the hash
2408 value and an index in a secondary table which provides the index
2409 into the weight table and the string itself (so that a match can
2411 elem_table
= (uint32_t *) obstack_alloc (&extrapool
,
2412 elem_size
* 2 * sizeof (uint32_t));
2413 memset (elem_table
, '\0', elem_size
* 2 * sizeof (uint32_t));
2415 /* Now add the elements. */
2416 runp
= collate
->start
;
2417 while (runp
!= NULL
)
2419 if (runp
->mbs
!= NULL
&& runp
->weights
!= NULL
&& !runp
->is_character
)
2421 /* Compute the hash value of the name. */
2422 uint32_t namelen
= strlen (runp
->name
);
2423 uint32_t hash
= elem_hash (runp
->name
, namelen
);
2424 size_t idx
= hash
% elem_size
;
2426 size_t start_idx
= idx
;
2429 if (elem_table
[idx
* 2] != 0)
2431 /* The spot is already taken. Try iterating using the value
2432 from the secondary hashing function. */
2433 size_t iter
= hash
% (elem_size
- 2) + 1;
2438 if (idx
>= elem_size
)
2440 assert (idx
!= start_idx
);
2442 while (elem_table
[idx
* 2] != 0);
2444 /* This is the spot where we will insert the value. */
2445 elem_table
[idx
* 2] = hash
;
2446 elem_table
[idx
* 2 + 1] = obstack_object_size (&extrapool
);
2448 /* The string itself including length. */
2449 obstack_1grow (&extrapool
, namelen
);
2450 obstack_grow (&extrapool
, runp
->name
, namelen
);
2452 /* And the multibyte representation. */
2453 obstack_1grow (&extrapool
, runp
->nmbs
);
2454 obstack_grow (&extrapool
, runp
->mbs
, runp
->nmbs
);
2456 /* And align again to 32 bits. */
2457 if ((1 + namelen
+ 1 + runp
->nmbs
) % sizeof (int32_t) != 0)
2458 obstack_grow (&extrapool
, "\0\0",
2460 - ((1 + namelen
+ 1 + runp
->nmbs
)
2461 % sizeof (int32_t))));
2463 /* Now some 32-bit values: multibyte collation sequence,
2464 wide char string (including length), and wide char
2465 collation sequence. */
2466 obstack_int32_grow (&extrapool
, runp
->mbseqorder
);
2468 obstack_int32_grow (&extrapool
, runp
->nwcs
);
2469 obstack_grow (&extrapool
, runp
->wcs
,
2470 runp
->nwcs
* sizeof (uint32_t));
2471 maybe_swap_uint32_obstack (&extrapool
, runp
->nwcs
);
2473 obstack_int32_grow (&extrapool
, runp
->wcseqorder
);
2479 /* Prepare to write out this data. */
2480 add_locale_uint32 (&file
, elem_size
);
2481 add_locale_uint32_array (&file
, elem_table
, 2 * elem_size
);
2482 add_locale_raw_obstack (&file
, &extrapool
);
2483 add_locale_raw_data (&file
, collate
->mbseqorder
, 256);
2484 add_locale_collseq_table (&file
, &collate
->wcseqorder
);
2485 add_locale_string (&file
, charmap
->code_set_name
);
2486 write_locale_data (output_path
, LC_COLLATE
, "LC_COLLATE", &file
);
2488 obstack_free (&weightpool
, NULL
);
2489 obstack_free (&extrapool
, NULL
);
2490 obstack_free (&indirectpool
, NULL
);
2495 skip_to (struct linereader
*ldfile
, struct locale_collate_t
*collate
,
2496 const struct charmap_t
*charmap
, int to_endif
)
2500 struct token
*now
= lr_token (ldfile
, charmap
, NULL
, NULL
, 0);
2501 enum token_t nowtok
= now
->tok
;
2503 if (nowtok
== tok_eof
|| nowtok
== tok_end
)
2506 if (nowtok
== tok_ifdef
|| nowtok
== tok_ifndef
)
2508 lr_error (ldfile
, _("%s: nested conditionals not supported"),
2510 nowtok
= skip_to (ldfile
, collate
, charmap
, tok_endif
);
2511 if (nowtok
== tok_eof
|| nowtok
== tok_end
)
2514 else if (nowtok
== tok_endif
|| (!to_endif
&& nowtok
== tok_else
))
2516 lr_ignore_rest (ldfile
, 1);
2519 else if (!to_endif
&& (nowtok
== tok_elifdef
|| nowtok
== tok_elifndef
))
2521 /* Do not read the rest of the line. */
2524 else if (nowtok
== tok_else
)
2526 lr_error (ldfile
, _("%s: more than one 'else'"), "LC_COLLATE");
2529 lr_ignore_rest (ldfile
, 0);
2535 collate_read (struct linereader
*ldfile
, struct localedef_t
*result
,
2536 const struct charmap_t
*charmap
, const char *repertoire_name
,
2539 struct repertoire_t
*repertoire
= NULL
;
2540 struct locale_collate_t
*collate
;
2542 struct token
*arg
= NULL
;
2543 enum token_t nowtok
;
2544 enum token_t was_ellipsis
= tok_none
;
2545 struct localedef_t
*copy_locale
= NULL
;
2548 1 - between `order-start' and `order-end'
2549 2 - after `order-end'
2550 3 - after `reorder-after', waiting for `reorder-end'
2551 4 - after `reorder-end'
2552 5 - after `reorder-sections-after', waiting for `reorder-sections-end'
2553 6 - after `reorder-sections-end'
2557 /* Get the repertoire we have to use. */
2558 if (repertoire_name
!= NULL
)
2559 repertoire
= repertoire_read (repertoire_name
);
2561 /* The rest of the line containing `LC_COLLATE' must be free. */
2562 lr_ignore_rest (ldfile
, 1);
2568 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2571 while (nowtok
== tok_eol
);
2573 if (nowtok
!= tok_define
)
2577 lr_ignore_rest (ldfile
, 0);
2580 arg
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2581 if (arg
->tok
!= tok_ident
)
2582 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2585 /* Simply add the new symbol. */
2586 struct name_list
*newsym
= xmalloc (sizeof (*newsym
)
2587 + arg
->val
.str
.lenmb
+ 1);
2588 memcpy (newsym
->str
, arg
->val
.str
.startmb
, arg
->val
.str
.lenmb
);
2589 newsym
->str
[arg
->val
.str
.lenmb
] = '\0';
2590 newsym
->next
= defined
;
2593 lr_ignore_rest (ldfile
, 1);
2598 if (nowtok
== tok_copy
)
2600 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2601 if (now
->tok
!= tok_string
)
2603 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2607 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2608 while (now
->tok
!= tok_eof
&& now
->tok
!= tok_end
);
2610 if (now
->tok
!= tok_eof
2611 || (now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
),
2612 now
->tok
== tok_eof
))
2613 lr_error (ldfile
, _("%s: premature end of file"), "LC_COLLATE");
2614 else if (now
->tok
!= tok_lc_collate
)
2616 lr_error (ldfile
, _("\
2617 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
2618 lr_ignore_rest (ldfile
, 0);
2621 lr_ignore_rest (ldfile
, 1);
2626 if (! ignore_content
)
2628 /* Get the locale definition. */
2629 copy_locale
= load_locale (LC_COLLATE
, now
->val
.str
.startmb
,
2630 repertoire_name
, charmap
, NULL
);
2631 if ((copy_locale
->avail
& COLLATE_LOCALE
) == 0)
2633 /* Not yet loaded. So do it now. */
2634 if (locfile_read (copy_locale
, charmap
) != 0)
2638 if (copy_locale
->categories
[LC_COLLATE
].collate
== NULL
)
2642 lr_ignore_rest (ldfile
, 1);
2644 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2648 /* Prepare the data structures. */
2649 collate_startup (ldfile
, result
, copy_locale
, ignore_content
);
2650 collate
= result
->categories
[LC_COLLATE
].collate
;
2658 /* Of course we don't proceed beyond the end of file. */
2659 if (nowtok
== tok_eof
)
2662 /* Ingore empty lines. */
2663 if (nowtok
== tok_eol
)
2665 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2673 /* Allow copying other locales. */
2674 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2675 if (now
->tok
!= tok_string
)
2678 if (! ignore_content
)
2679 load_locale (LC_COLLATE
, now
->val
.str
.startmb
, repertoire_name
,
2682 lr_ignore_rest (ldfile
, 1);
2685 case tok_coll_weight_max
:
2686 /* Ignore the rest of the line if we don't need the input of
2690 lr_ignore_rest (ldfile
, 0);
2697 arg
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2698 if (arg
->tok
!= tok_number
)
2700 if (collate
->col_weight_max
!= -1)
2701 lr_error (ldfile
, _("%s: duplicate definition of `%s'"),
2702 "LC_COLLATE", "col_weight_max");
2704 collate
->col_weight_max
= arg
->val
.num
;
2705 lr_ignore_rest (ldfile
, 1);
2708 case tok_section_symbol
:
2709 /* Ignore the rest of the line if we don't need the input of
2713 lr_ignore_rest (ldfile
, 0);
2720 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2721 if (arg
->tok
!= tok_bsymbol
)
2723 else if (!ignore_content
)
2725 /* Check whether this section is already known. */
2726 struct section_list
*known
= collate
->sections
;
2727 while (known
!= NULL
)
2729 if (strcmp (known
->name
, arg
->val
.str
.startmb
) == 0)
2731 known
= known
->next
;
2737 _("%s: duplicate declaration of section `%s'"),
2738 "LC_COLLATE", arg
->val
.str
.startmb
);
2739 free (arg
->val
.str
.startmb
);
2742 collate
->sections
= make_seclist_elem (collate
,
2743 arg
->val
.str
.startmb
,
2746 lr_ignore_rest (ldfile
, known
== NULL
);
2750 free (arg
->val
.str
.startmb
);
2751 lr_ignore_rest (ldfile
, 0);
2755 case tok_collating_element
:
2756 /* Ignore the rest of the line if we don't need the input of
2760 lr_ignore_rest (ldfile
, 0);
2764 if (state
!= 0 && state
!= 2)
2767 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2768 if (arg
->tok
!= tok_bsymbol
)
2772 const char *symbol
= arg
->val
.str
.startmb
;
2773 size_t symbol_len
= arg
->val
.str
.lenmb
;
2775 /* Next the `from' keyword. */
2776 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2777 if (arg
->tok
!= tok_from
)
2779 free ((char *) symbol
);
2783 ldfile
->return_widestr
= 1;
2784 ldfile
->translate_strings
= 1;
2786 /* Finally the string with the replacement. */
2787 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2789 ldfile
->return_widestr
= 0;
2790 ldfile
->translate_strings
= 0;
2792 if (arg
->tok
!= tok_string
)
2795 if (!ignore_content
&& symbol
!= NULL
)
2797 /* The name is already defined. */
2798 if (check_duplicate (ldfile
, collate
, charmap
,
2799 repertoire
, symbol
, symbol_len
))
2802 if (arg
->val
.str
.startmb
!= NULL
)
2803 insert_entry (&collate
->elem_table
, symbol
, symbol_len
,
2804 new_element (collate
,
2805 arg
->val
.str
.startmb
,
2806 arg
->val
.str
.lenmb
- 1,
2807 arg
->val
.str
.startwc
,
2808 symbol
, symbol_len
, 0));
2813 free ((char *) symbol
);
2814 free (arg
->val
.str
.startmb
);
2815 free (arg
->val
.str
.startwc
);
2817 lr_ignore_rest (ldfile
, 1);
2821 case tok_collating_symbol
:
2822 /* Ignore the rest of the line if we don't need the input of
2826 lr_ignore_rest (ldfile
, 0);
2830 if (state
!= 0 && state
!= 2)
2833 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2834 if (arg
->tok
!= tok_bsymbol
)
2838 char *symbol
= arg
->val
.str
.startmb
;
2839 size_t symbol_len
= arg
->val
.str
.lenmb
;
2840 char *endsymbol
= NULL
;
2841 size_t endsymbol_len
= 0;
2842 enum token_t ellipsis
= tok_none
;
2844 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2845 if (arg
->tok
== tok_ellipsis2
|| arg
->tok
== tok_ellipsis4
)
2847 ellipsis
= arg
->tok
;
2849 arg
= lr_token (ldfile
, charmap
, result
, repertoire
,
2851 if (arg
->tok
!= tok_bsymbol
)
2857 endsymbol
= arg
->val
.str
.startmb
;
2858 endsymbol_len
= arg
->val
.str
.lenmb
;
2860 lr_ignore_rest (ldfile
, 1);
2862 else if (arg
->tok
!= tok_eol
)
2868 if (!ignore_content
)
2871 || (ellipsis
!= tok_none
&& endsymbol
== NULL
))
2873 lr_error (ldfile
, _("\
2874 %s: unknown character in collating symbol name"),
2878 else if (ellipsis
== tok_none
)
2880 /* A single symbol, no ellipsis. */
2881 if (check_duplicate (ldfile
, collate
, charmap
,
2882 repertoire
, symbol
, symbol_len
))
2883 /* The name is already defined. */
2886 insert_entry (&collate
->sym_table
, symbol
, symbol_len
,
2887 new_symbol (collate
, symbol
, symbol_len
));
2889 else if (symbol_len
!= endsymbol_len
)
2893 _("invalid names for character range"));
2898 /* Oh my, we have to handle an ellipsis. First, as
2899 usual, determine the common prefix and then
2900 convert the rest into a range. */
2902 unsigned long int from
;
2903 unsigned long int to
;
2906 for (prefixlen
= 0; prefixlen
< symbol_len
; ++prefixlen
)
2907 if (symbol
[prefixlen
] != endsymbol
[prefixlen
])
2910 /* Convert the rest into numbers. */
2911 symbol
[symbol_len
] = '\0';
2912 from
= strtoul (&symbol
[prefixlen
], &endp
,
2913 ellipsis
== tok_ellipsis2
? 16 : 10);
2915 goto col_sym_inv_range
;
2917 endsymbol
[symbol_len
] = '\0';
2918 to
= strtoul (&endsymbol
[prefixlen
], &endp
,
2919 ellipsis
== tok_ellipsis2
? 16 : 10);
2921 goto col_sym_inv_range
;
2924 goto col_sym_inv_range
;
2926 /* Now loop over all entries. */
2931 symbuf
= (char *) obstack_alloc (&collate
->mempool
,
2934 /* Create the name. */
2936 ellipsis
== tok_ellipsis2
2937 ? "%.*s%.*lX" : "%.*s%.*lu",
2938 (int) prefixlen
, symbol
,
2939 (int) (symbol_len
- prefixlen
), from
);
2941 if (check_duplicate (ldfile
, collate
, charmap
,
2942 repertoire
, symbuf
, symbol_len
))
2943 /* The name is already defined. */
2946 insert_entry (&collate
->sym_table
, symbuf
,
2948 new_symbol (collate
, symbuf
,
2951 /* Increment the counter. */
2967 case tok_symbol_equivalence
:
2968 /* Ignore the rest of the line if we don't need the input of
2972 lr_ignore_rest (ldfile
, 0);
2979 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2980 if (arg
->tok
!= tok_bsymbol
)
2984 const char *newname
= arg
->val
.str
.startmb
;
2985 size_t newname_len
= arg
->val
.str
.lenmb
;
2986 const char *symname
;
2988 void *symval
; /* Actually struct symbol_t* */
2990 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2991 if (arg
->tok
!= tok_bsymbol
)
2993 free ((char *) newname
);
2997 symname
= arg
->val
.str
.startmb
;
2998 symname_len
= arg
->val
.str
.lenmb
;
3000 if (newname
== NULL
)
3002 lr_error (ldfile
, _("\
3003 %s: unknown character in equivalent definition name"),
3007 free ((char *) newname
);
3008 free ((char *) symname
);
3011 if (symname
== NULL
)
3013 lr_error (ldfile
, _("\
3014 %s: unknown character in equivalent definition value"),
3016 goto sym_equiv_free
;
3019 /* See whether the symbol name is already defined. */
3020 if (find_entry (&collate
->sym_table
, symname
, symname_len
,
3023 lr_error (ldfile
, _("\
3024 %s: unknown symbol `%s' in equivalent definition"),
3025 "LC_COLLATE", symname
);
3026 goto sym_equiv_free
;
3029 if (insert_entry (&collate
->sym_table
,
3030 newname
, newname_len
, symval
) < 0)
3032 lr_error (ldfile
, _("\
3033 error while adding equivalent collating symbol"));
3034 goto sym_equiv_free
;
3037 free ((char *) symname
);
3039 lr_ignore_rest (ldfile
, 1);
3043 /* Ignore the rest of the line if we don't need the input of
3047 lr_ignore_rest (ldfile
, 0);
3051 /* We get told about the scripts we know. */
3052 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
3053 if (arg
->tok
!= tok_bsymbol
)
3057 struct section_list
*runp
= collate
->known_sections
;
3060 while (runp
!= NULL
)
3061 if (strncmp (runp
->name
, arg
->val
.str
.startmb
,
3062 arg
->val
.str
.lenmb
) == 0
3063 && runp
->name
[arg
->val
.str
.lenmb
] == '\0')
3066 runp
= runp
->def_next
;
3070 lr_error (ldfile
, _("duplicate definition of script `%s'"),
3072 lr_ignore_rest (ldfile
, 0);
3076 runp
= (struct section_list
*) xcalloc (1, sizeof (*runp
));
3077 name
= (char *) xmalloc (arg
->val
.str
.lenmb
+ 1);
3078 memcpy (name
, arg
->val
.str
.startmb
, arg
->val
.str
.lenmb
);
3079 name
[arg
->val
.str
.lenmb
] = '\0';
3082 runp
->def_next
= collate
->known_sections
;
3083 collate
->known_sections
= runp
;
3085 lr_ignore_rest (ldfile
, 1);
3088 case tok_order_start
:
3089 /* Ignore the rest of the line if we don't need the input of
3093 lr_ignore_rest (ldfile
, 0);
3097 if (state
!= 0 && state
!= 1 && state
!= 2)
3101 /* The 14652 draft does not specify whether all `order_start' lines
3102 must contain the same number of sort-rules, but 14651 does. So
3103 we require this here as well. */
3104 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
3105 if (arg
->tok
== tok_bsymbol
)
3107 /* This better should be a section name. */
3108 struct section_list
*sp
= collate
->known_sections
;
3110 && (sp
->name
== NULL
3111 || strncmp (sp
->name
, arg
->val
.str
.startmb
,
3112 arg
->val
.str
.lenmb
) != 0
3113 || sp
->name
[arg
->val
.str
.lenmb
] != '\0'))
3118 lr_error (ldfile
, _("\
3119 %s: unknown section name `%.*s'"),
3120 "LC_COLLATE", (int) arg
->val
.str
.lenmb
,
3121 arg
->val
.str
.startmb
);
3122 /* We use the error section. */
3123 collate
->current_section
= &collate
->error_section
;
3125 if (collate
->error_section
.first
== NULL
)
3127 /* Insert &collate->error_section at the end of
3128 the collate->sections list. */
3129 if (collate
->sections
== NULL
)
3130 collate
->sections
= &collate
->error_section
;
3133 sp
= collate
->sections
;
3134 while (sp
->next
!= NULL
)
3137 sp
->next
= &collate
->error_section
;
3139 collate
->error_section
.next
= NULL
;
3144 /* One should not be allowed to open the same
3146 if (sp
->first
!= NULL
)
3147 lr_error (ldfile
, _("\
3148 %s: multiple order definitions for section `%s'"),
3149 "LC_COLLATE", sp
->name
);
3152 /* Insert sp in the collate->sections list,
3153 right after collate->current_section. */
3154 if (collate
->current_section
!= NULL
)
3156 sp
->next
= collate
->current_section
->next
;
3157 collate
->current_section
->next
= sp
;
3159 else if (collate
->sections
== NULL
)
3160 /* This is the first section to be defined. */
3161 collate
->sections
= sp
;
3163 collate
->current_section
= sp
;
3166 /* Next should come the end of the line or a semicolon. */
3167 arg
= lr_token (ldfile
, charmap
, result
, repertoire
,
3169 if (arg
->tok
== tok_eol
)
3173 /* This means we have exactly one rule: `forward'. */
3175 lr_error (ldfile
, _("\
3176 %s: invalid number of sorting rules"),
3180 sp
->rules
= obstack_alloc (&collate
->mempool
,
3181 (sizeof (enum coll_sort_rule
)
3183 for (cnt
= 0; cnt
< nrules
; ++cnt
)
3184 sp
->rules
[cnt
] = sort_forward
;
3190 /* Get the next token. */
3191 arg
= lr_token (ldfile
, charmap
, result
, repertoire
,
3197 /* There is no section symbol. Therefore we use the unnamed
3199 collate
->current_section
= &collate
->unnamed_section
;
3201 if (collate
->unnamed_section_defined
)
3202 lr_error (ldfile
, _("\
3203 %s: multiple order definitions for unnamed section"),
3207 /* Insert &collate->unnamed_section at the beginning of
3208 the collate->sections list. */
3209 collate
->unnamed_section
.next
= collate
->sections
;
3210 collate
->sections
= &collate
->unnamed_section
;
3211 collate
->unnamed_section_defined
= true;
3215 /* Now read the direction names. */
3216 read_directions (ldfile
, arg
, charmap
, repertoire
, result
);
3218 /* From now we need the strings untranslated. */
3219 ldfile
->translate_strings
= 0;
3223 /* Ignore the rest of the line if we don't need the input of
3227 lr_ignore_rest (ldfile
, 0);
3234 /* Handle ellipsis at end of list. */
3235 if (was_ellipsis
!= tok_none
)
3237 handle_ellipsis (ldfile
, NULL
, 0, was_ellipsis
, charmap
,
3238 repertoire
, result
);
3239 was_ellipsis
= tok_none
;
3243 lr_ignore_rest (ldfile
, 1);
3246 case tok_reorder_after
:
3247 /* Ignore the rest of the line if we don't need the input of
3251 lr_ignore_rest (ldfile
, 0);
3257 lr_error (ldfile
, _("%s: missing `order_end' keyword"),
3261 /* Handle ellipsis at end of list. */
3262 if (was_ellipsis
!= tok_none
)
3264 handle_ellipsis (ldfile
, arg
->val
.str
.startmb
,
3265 arg
->val
.str
.lenmb
, was_ellipsis
, charmap
,
3266 repertoire
, result
);
3267 was_ellipsis
= tok_none
;
3270 else if (state
== 0 && copy_locale
== NULL
)
3272 else if (state
!= 0 && state
!= 2 && state
!= 3)
3276 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
3277 if (arg
->tok
== tok_bsymbol
|| arg
->tok
== tok_ucs4
)
3279 /* Find this symbol in the sequence table. */
3283 struct element_t
*insp
;
3287 if (arg
->tok
== tok_bsymbol
)
3289 startmb
= arg
->val
.str
.startmb
;
3290 lenmb
= arg
->val
.str
.lenmb
;
3294 sprintf (ucsbuf
, "U%08X", arg
->val
.ucs4
);
3299 if (find_entry (&collate
->seq_table
, startmb
, lenmb
, &ptr
) == 0)
3300 /* Yes, the symbol exists. Simply point the cursor
3302 collate
->cursor
= (struct element_t
*) ptr
;
3305 struct symbol_t
*symbp
;
3308 if (find_entry (&collate
->sym_table
, startmb
, lenmb
,
3313 if (symbp
->order
->last
!= NULL
3314 || symbp
->order
->next
!= NULL
)
3315 collate
->cursor
= symbp
->order
;
3318 /* This is a collating symbol but its position
3319 is not yet defined. */
3320 lr_error (ldfile
, _("\
3321 %s: order for collating symbol %.*s not yet defined"),
3322 "LC_COLLATE", (int) lenmb
, startmb
);
3323 collate
->cursor
= NULL
;
3327 else if (find_entry (&collate
->elem_table
, startmb
, lenmb
,
3330 insp
= (struct element_t
*) ptr
;
3332 if (insp
->last
!= NULL
|| insp
->next
!= NULL
)
3333 collate
->cursor
= insp
;
3336 /* This is a collating element but its position
3337 is not yet defined. */
3338 lr_error (ldfile
, _("\
3339 %s: order for collating element %.*s not yet defined"),
3340 "LC_COLLATE", (int) lenmb
, startmb
);
3341 collate
->cursor
= NULL
;
3347 /* This is bad. The symbol after which we have to
3348 insert does not exist. */
3349 lr_error (ldfile
, _("\
3350 %s: cannot reorder after %.*s: symbol not known"),
3351 "LC_COLLATE", (int) lenmb
, startmb
);
3352 collate
->cursor
= NULL
;
3357 lr_ignore_rest (ldfile
, no_error
);
3360 /* This must not happen. */
3364 case tok_reorder_end
:
3365 /* Ignore the rest of the line if we don't need the input of
3373 lr_ignore_rest (ldfile
, 1);
3376 case tok_reorder_sections_after
:
3377 /* Ignore the rest of the line if we don't need the input of
3381 lr_ignore_rest (ldfile
, 0);
3387 lr_error (ldfile
, _("%s: missing `order_end' keyword"),
3391 /* Handle ellipsis at end of list. */
3392 if (was_ellipsis
!= tok_none
)
3394 handle_ellipsis (ldfile
, NULL
, 0, was_ellipsis
, charmap
,
3395 repertoire
, result
);
3396 was_ellipsis
= tok_none
;
3399 else if (state
== 3)
3401 record_error (0, 0, _("\
3402 %s: missing `reorder-end' keyword"), "LC_COLLATE");
3405 else if (state
!= 2 && state
!= 4)
3409 /* Get the name of the sections we are adding after. */
3410 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
3411 if (arg
->tok
== tok_bsymbol
)
3413 /* Now find a section with this name. */
3414 struct section_list
*runp
= collate
->sections
;
3416 while (runp
!= NULL
)
3418 if (runp
->name
!= NULL
3419 && strlen (runp
->name
) == arg
->val
.str
.lenmb
3420 && memcmp (runp
->name
, arg
->val
.str
.startmb
,
3421 arg
->val
.str
.lenmb
) == 0)
3428 collate
->current_section
= runp
;
3431 /* This is bad. The section after which we have to
3432 reorder does not exist. Therefore we cannot
3433 process the whole rest of this reorder
3435 lr_error (ldfile
, _("%s: section `%.*s' not known"),
3436 "LC_COLLATE", (int) arg
->val
.str
.lenmb
,
3437 arg
->val
.str
.startmb
);
3441 lr_ignore_rest (ldfile
, 0);
3443 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
3445 while (now
->tok
== tok_reorder_sections_after
3446 || now
->tok
== tok_reorder_sections_end
3447 || now
->tok
== tok_end
);
3449 /* Process the token we just saw. */
3455 /* This must not happen. */
3459 case tok_reorder_sections_end
:
3460 /* Ignore the rest of the line if we don't need the input of
3468 lr_ignore_rest (ldfile
, 1);
3473 /* Ignore the rest of the line if we don't need the input of
3477 lr_ignore_rest (ldfile
, 0);
3481 if (state
!= 0 && state
!= 1 && state
!= 3 && state
!= 5)
3484 if ((state
== 0 || state
== 5) && nowtok
== tok_ucs4
)
3487 if (nowtok
== tok_ucs4
)
3489 snprintf (ucs4buf
, sizeof (ucs4buf
), "U%08X", now
->val
.ucs4
);
3493 else if (arg
!= NULL
)
3495 symstr
= arg
->val
.str
.startmb
;
3496 symlen
= arg
->val
.str
.lenmb
;
3500 lr_error (ldfile
, _("%s: bad symbol <%.*s>"), "LC_COLLATE",
3501 (int) ldfile
->token
.val
.str
.lenmb
,
3502 ldfile
->token
.val
.str
.startmb
);
3506 struct element_t
*seqp
;
3509 /* We are outside an `order_start' region. This means
3510 we must only accept definitions of values for
3511 collation symbols since these are purely abstract
3512 values and don't need directions associated. */
3515 if (find_entry (&collate
->seq_table
, symstr
, symlen
, &ptr
) == 0)
3519 /* It's already defined. First check whether this
3520 is really a collating symbol. */
3521 if (seqp
->is_character
)
3530 if (find_entry (&collate
->sym_table
, symstr
, symlen
,
3532 /* No collating symbol, it's an error. */
3535 /* Maybe this is the first time we define a symbol
3536 value and it is before the first actual section. */
3537 if (collate
->sections
== NULL
)
3538 collate
->sections
= collate
->current_section
=
3539 &collate
->symbol_section
;
3542 if (was_ellipsis
!= tok_none
)
3544 handle_ellipsis (ldfile
, symstr
, symlen
, was_ellipsis
,
3545 charmap
, repertoire
, result
);
3547 /* Remember that we processed the ellipsis. */
3548 was_ellipsis
= tok_none
;
3550 /* And don't add the value a second time. */
3554 else if (state
== 3)
3556 /* It is possible that we already have this collation sequence.
3557 In this case we move the entry. */
3561 /* If the symbol after which we have to insert was not found
3562 ignore all entries. */
3563 if (collate
->cursor
== NULL
)
3565 lr_ignore_rest (ldfile
, 0);
3569 if (find_entry (&collate
->seq_table
, symstr
, symlen
, &ptr
) == 0)
3571 seqp
= (struct element_t
*) ptr
;
3575 if (find_entry (&collate
->sym_table
, symstr
, symlen
, &sym
) == 0
3576 && (seqp
= ((struct symbol_t
*) sym
)->order
) != NULL
)
3579 if (find_entry (&collate
->elem_table
, symstr
, symlen
, &ptr
) == 0
3580 && (seqp
= (struct element_t
*) ptr
,
3581 seqp
->last
!= NULL
|| seqp
->next
!= NULL
3582 || (collate
->start
!= NULL
&& seqp
== collate
->start
)))
3585 /* Remove the entry from the old position. */
3586 if (seqp
->last
== NULL
)
3587 collate
->start
= seqp
->next
;
3589 seqp
->last
->next
= seqp
->next
;
3590 if (seqp
->next
!= NULL
)
3591 seqp
->next
->last
= seqp
->last
;
3593 /* We also have to check whether this entry is the
3594 first or last of a section. */
3595 if (seqp
->section
->first
== seqp
)
3597 if (seqp
->section
->first
== seqp
->section
->last
)
3598 /* This section has no content anymore. */
3599 seqp
->section
->first
= seqp
->section
->last
= NULL
;
3601 seqp
->section
->first
= seqp
->next
;
3603 else if (seqp
->section
->last
== seqp
)
3604 seqp
->section
->last
= seqp
->last
;
3606 /* Now insert it in the new place. */
3607 insert_weights (ldfile
, seqp
, charmap
, repertoire
, result
,
3612 /* Otherwise we just add a new entry. */
3614 else if (state
== 5)
3616 /* We are reordering sections. Find the named section. */
3617 struct section_list
*runp
= collate
->sections
;
3618 struct section_list
*prevp
= NULL
;
3620 while (runp
!= NULL
)
3622 if (runp
->name
!= NULL
3623 && strlen (runp
->name
) == symlen
3624 && memcmp (runp
->name
, symstr
, symlen
) == 0)
3633 lr_error (ldfile
, _("%s: section `%.*s' not known"),
3634 "LC_COLLATE", (int) symlen
, symstr
);
3635 lr_ignore_rest (ldfile
, 0);
3639 if (runp
!= collate
->current_section
)
3641 /* Remove the named section from the old place and
3642 insert it in the new one. */
3643 prevp
->next
= runp
->next
;
3645 runp
->next
= collate
->current_section
->next
;
3646 collate
->current_section
->next
= runp
;
3647 collate
->current_section
= runp
;
3650 /* Process the rest of the line which might change
3651 the collation rules. */
3652 arg
= lr_token (ldfile
, charmap
, result
, repertoire
,
3654 if (arg
->tok
!= tok_eof
&& arg
->tok
!= tok_eol
)
3655 read_directions (ldfile
, arg
, charmap
, repertoire
,
3660 else if (was_ellipsis
!= tok_none
)
3662 /* Using the information in the `ellipsis_weight'
3663 element and this and the last value we have to handle
3664 the ellipsis now. */
3665 assert (state
== 1);
3667 handle_ellipsis (ldfile
, symstr
, symlen
, was_ellipsis
, charmap
,
3668 repertoire
, result
);
3670 /* Remember that we processed the ellipsis. */
3671 was_ellipsis
= tok_none
;
3673 /* And don't add the value a second time. */
3677 /* Now insert in the new place. */
3678 insert_value (ldfile
, symstr
, symlen
, charmap
, repertoire
, result
);
3682 /* Ignore the rest of the line if we don't need the input of
3686 lr_ignore_rest (ldfile
, 0);
3693 if (was_ellipsis
!= tok_none
)
3696 _("%s: cannot have `%s' as end of ellipsis range"),
3697 "LC_COLLATE", "UNDEFINED");
3699 unlink_element (collate
);
3700 was_ellipsis
= tok_none
;
3703 /* See whether UNDEFINED already appeared somewhere. */
3704 if (collate
->undefined
.next
!= NULL
3705 || &collate
->undefined
== collate
->cursor
)
3708 _("%s: order for `%.*s' already defined at %s:%Zu"),
3709 "LC_COLLATE", 9, "UNDEFINED",
3710 collate
->undefined
.file
,
3711 collate
->undefined
.line
);
3712 lr_ignore_rest (ldfile
, 0);
3715 /* Parse the weights. */
3716 insert_weights (ldfile
, &collate
->undefined
, charmap
,
3717 repertoire
, result
, tok_none
);
3720 case tok_ellipsis2
: /* symbolic hexadecimal ellipsis */
3721 case tok_ellipsis3
: /* absolute ellipsis */
3722 case tok_ellipsis4
: /* symbolic decimal ellipsis */
3723 /* This is the symbolic (decimal or hexadecimal) or absolute
3725 if (was_ellipsis
!= tok_none
)
3728 if (state
!= 0 && state
!= 1 && state
!= 3)
3731 was_ellipsis
= nowtok
;
3733 insert_weights (ldfile
, &collate
->ellipsis_weight
, charmap
,
3734 repertoire
, result
, nowtok
);
3739 /* Next we assume `LC_COLLATE'. */
3740 if (!ignore_content
)
3742 if (state
== 0 && copy_locale
== NULL
)
3743 /* We must either see a copy statement or have
3746 _("%s: empty category description not allowed"),
3748 else if (state
== 1)
3750 lr_error (ldfile
, _("%s: missing `order_end' keyword"),
3753 /* Handle ellipsis at end of list. */
3754 if (was_ellipsis
!= tok_none
)
3756 handle_ellipsis (ldfile
, NULL
, 0, was_ellipsis
, charmap
,
3757 repertoire
, result
);
3758 was_ellipsis
= tok_none
;
3761 else if (state
== 3)
3762 record_error (0, 0, _("\
3763 %s: missing `reorder-end' keyword"), "LC_COLLATE");
3764 else if (state
== 5)
3765 record_error (0, 0, _("\
3766 %s: missing `reorder-sections-end' keyword"), "LC_COLLATE");
3768 arg
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
3769 if (arg
->tok
== tok_eof
)
3771 if (arg
->tok
== tok_eol
)
3772 lr_error (ldfile
, _("%s: incomplete `END' line"), "LC_COLLATE");
3773 else if (arg
->tok
!= tok_lc_collate
)
3774 lr_error (ldfile
, _("\
3775 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
3776 lr_ignore_rest (ldfile
, arg
->tok
== tok_lc_collate
);
3782 lr_ignore_rest (ldfile
, 0);
3786 arg
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
3787 if (arg
->tok
!= tok_ident
)
3790 /* Simply add the new symbol. */
3791 struct name_list
*newsym
= xmalloc (sizeof (*newsym
)
3792 + arg
->val
.str
.lenmb
+ 1);
3793 memcpy (newsym
->str
, arg
->val
.str
.startmb
, arg
->val
.str
.lenmb
);
3794 newsym
->str
[arg
->val
.str
.lenmb
] = '\0';
3795 newsym
->next
= defined
;
3798 lr_ignore_rest (ldfile
, 1);
3804 lr_ignore_rest (ldfile
, 0);
3808 arg
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
3809 if (arg
->tok
!= tok_ident
)
3812 /* Remove _all_ occurrences of the symbol from the list. */
3813 struct name_list
*prevdef
= NULL
;
3814 struct name_list
*curdef
= defined
;
3815 while (curdef
!= NULL
)
3816 if (strncmp (arg
->val
.str
.startmb
, curdef
->str
,
3817 arg
->val
.str
.lenmb
) == 0
3818 && curdef
->str
[arg
->val
.str
.lenmb
] == '\0')
3820 if (prevdef
== NULL
)
3821 defined
= curdef
->next
;
3823 prevdef
->next
= curdef
->next
;
3825 struct name_list
*olddef
= curdef
;
3826 curdef
= curdef
->next
;
3833 curdef
= curdef
->next
;
3836 lr_ignore_rest (ldfile
, 1);
3843 lr_ignore_rest (ldfile
, 0);
3848 arg
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
3849 if (arg
->tok
!= tok_ident
)
3851 lr_ignore_rest (ldfile
, 1);
3853 if (collate
->else_action
== else_none
)
3856 while (curdef
!= NULL
)
3857 if (strncmp (arg
->val
.str
.startmb
, curdef
->str
,
3858 arg
->val
.str
.lenmb
) == 0
3859 && curdef
->str
[arg
->val
.str
.lenmb
] == '\0')
3862 curdef
= curdef
->next
;
3864 if ((nowtok
== tok_ifdef
&& curdef
!= NULL
)
3865 || (nowtok
== tok_ifndef
&& curdef
== NULL
))
3867 /* We have to use the if-branch. */
3868 collate
->else_action
= else_ignore
;
3872 /* We have to use the else-branch, if there is one. */
3873 nowtok
= skip_to (ldfile
, collate
, charmap
, 0);
3874 if (nowtok
== tok_else
)
3875 collate
->else_action
= else_seen
;
3876 else if (nowtok
== tok_elifdef
)
3881 else if (nowtok
== tok_elifndef
)
3883 nowtok
= tok_ifndef
;
3886 else if (nowtok
== tok_eof
)
3888 else if (nowtok
== tok_end
)
3894 /* XXX Should it really become necessary to support nested
3895 preprocessor handling we will push the state here. */
3896 lr_error (ldfile
, _("%s: nested conditionals not supported"),
3898 nowtok
= skip_to (ldfile
, collate
, charmap
, 1);
3899 if (nowtok
== tok_eof
)
3901 else if (nowtok
== tok_end
)
3911 lr_ignore_rest (ldfile
, 0);
3915 lr_ignore_rest (ldfile
, 1);
3917 if (collate
->else_action
== else_ignore
)
3919 /* Ignore everything until the endif. */
3920 nowtok
= skip_to (ldfile
, collate
, charmap
, 1);
3921 if (nowtok
== tok_eof
)
3923 else if (nowtok
== tok_end
)
3928 assert (collate
->else_action
== else_none
);
3929 lr_error (ldfile
, _("\
3930 %s: '%s' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE",
3931 nowtok
== tok_else
? "else"
3932 : nowtok
== tok_elifdef
? "elifdef" : "elifndef");
3939 lr_ignore_rest (ldfile
, 0);
3943 lr_ignore_rest (ldfile
, 1);
3945 if (collate
->else_action
!= else_ignore
3946 && collate
->else_action
!= else_seen
)
3947 lr_error (ldfile
, _("\
3948 %s: 'endif' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE");
3950 /* XXX If we support nested preprocessor directives we pop
3952 collate
->else_action
= else_none
;
3957 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
3960 /* Prepare for the next round. */
3961 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
3966 /* When we come here we reached the end of the file. */
3967 lr_error (ldfile
, _("%s: premature end of file"), "LC_COLLATE");