1 /* Copyright (C) 1995-2014 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, see <http://www.gnu.org/licenses/>. */
27 #include <sys/param.h>
29 #include "localedef.h"
31 #include "localeinfo.h"
32 #include "linereader.h"
34 #include "elem-hash.h"
36 /* Uncomment the following line in the production version. */
37 /* #define NDEBUG 1 */
40 #define obstack_chunk_alloc malloc
41 #define obstack_chunk_free free
44 __attribute ((always_inline
))
45 obstack_int32_grow (struct obstack
*obstack
, int32_t data
)
47 assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack
)));
48 data
= maybe_swap_uint32 (data
);
49 if (sizeof (int32_t) == sizeof (int))
50 obstack_int_grow (obstack
, data
);
52 obstack_grow (obstack
, &data
, sizeof (int32_t));
56 __attribute ((always_inline
))
57 obstack_int32_grow_fast (struct obstack
*obstack
, int32_t data
)
59 assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack
)));
60 data
= maybe_swap_uint32 (data
);
61 if (sizeof (int32_t) == sizeof (int))
62 obstack_int_grow_fast (obstack
, data
);
64 obstack_grow (obstack
, &data
, sizeof (int32_t));
67 /* Forward declaration. */
70 /* Data type for list of strings. */
73 /* Successor in the known_sections list. */
74 struct section_list
*def_next
;
75 /* Successor in the sections list. */
76 struct section_list
*next
;
77 /* Name of the section. */
79 /* First element of this section. */
80 struct element_t
*first
;
81 /* Last element of this section. */
82 struct element_t
*last
;
83 /* These are the rules for this section. */
84 enum coll_sort_rule
*rules
;
85 /* Index of the rule set in the appropriate section of the output file. */
93 /* Number of elements. */
99 /* Data type for collating element. */
111 /* The following is a bit mask which bits are set if this element is
112 used in the appropriate level. Interesting for the singlebyte
115 XXX The type here restricts the number of levels to 32. It could
116 be changed if necessary but I doubt this is necessary. */
117 unsigned int used_in_level
;
119 struct element_list_t
*weights
;
121 /* Nonzero if this is a real character definition. */
124 /* Order of the character in the sequence. This information will
125 be used in range expressions. */
129 /* Where does the definition come from. */
133 /* Which section does this belong to. */
134 struct section_list
*section
;
136 /* Predecessor and successor in the order list. */
137 struct element_t
*last
;
138 struct element_t
*next
;
140 /* Next element in multibyte output list. */
141 struct element_t
*mbnext
;
142 struct element_t
*mblast
;
144 /* Next element in wide character output list. */
145 struct element_t
*wcnext
;
146 struct element_t
*wclast
;
149 /* Special element value. */
150 #define ELEMENT_ELLIPSIS2 ((struct element_t *) 1)
151 #define ELEMENT_ELLIPSIS3 ((struct element_t *) 2)
152 #define ELEMENT_ELLIPSIS4 ((struct element_t *) 3)
154 /* Data type for collating symbol. */
159 /* Point to place in the order list. */
160 struct element_t
*order
;
162 /* Where does the definition come from. */
167 /* Sparse table of struct element_t *. */
168 #define TABLE wchead_table
169 #define ELEMENT struct element_t *
172 #define NO_ADD_LOCALE
175 /* Sparse table of int32_t. */
176 #define TABLE collidx_table
177 #define ELEMENT int32_t
181 /* Sparse table of uint32_t. */
182 #define TABLE collseq_table
183 #define ELEMENT uint32_t
184 #define DEFAULT ~((uint32_t) 0)
188 /* Simple name list for the preprocessor. */
191 struct name_list
*next
;
196 /* The real definition of the struct for the LC_COLLATE locale. */
197 struct locale_collate_t
202 /* List of known scripts. */
203 struct section_list
*known_sections
;
204 /* List of used sections. */
205 struct section_list
*sections
;
206 /* Current section using definition. */
207 struct section_list
*current_section
;
208 /* There always can be an unnamed section. */
209 struct section_list unnamed_section
;
210 /* Flag whether the unnamed section has been defined. */
211 bool unnamed_section_defined
;
212 /* To make handling of errors easier we have another section. */
213 struct section_list error_section
;
214 /* Sometimes we are defining the values for collating symbols before
215 the first actual section. */
216 struct section_list symbol_section
;
218 /* Start of the order list. */
219 struct element_t
*start
;
221 /* The undefined element. */
222 struct element_t undefined
;
224 /* This is the cursor for `reorder_after' insertions. */
225 struct element_t
*cursor
;
227 /* This value is used when handling ellipsis. */
228 struct element_t ellipsis_weight
;
230 /* Known collating elements. */
231 hash_table elem_table
;
233 /* Known collating symbols. */
234 hash_table sym_table
;
236 /* Known collation sequences. */
237 hash_table seq_table
;
239 struct obstack mempool
;
241 /* The LC_COLLATE category is a bit special as it is sometimes possible
242 that the definitions from more than one input file contains information.
243 Therefore we keep all relevant input in a list. */
244 struct locale_collate_t
*next
;
246 /* Arrays with heads of the list for each of the leading bytes in
247 the multibyte sequences. */
248 struct element_t
*mbheads
[256];
250 /* Arrays with heads of the list for each of the leading bytes in
251 the multibyte sequences. */
252 struct wchead_table wcheads
;
254 /* The arrays with the collation sequence order. */
255 unsigned char mbseqorder
[256];
256 struct collseq_table wcseqorder
;
258 /* State of the preprocessor. */
269 /* We have a few global variables which are used for reading all
270 LC_COLLATE category descriptions in all files. */
271 static uint32_t nrules
;
273 /* List of defined preprocessor symbols. */
274 static struct name_list
*defined
;
277 /* We need UTF-8 encoding of numbers. */
279 __attribute ((always_inline
))
280 utf8_encode (char *buf
, int val
)
293 for (step
= 2; step
< 6; ++step
)
294 if ((val
& (~(uint32_t)0 << (5 * step
+ 1))) == 0)
298 *buf
= (unsigned char) (~0xff >> step
);
302 buf
[step
] = 0x80 | (val
& 0x3f);
313 static struct section_list
*
314 make_seclist_elem (struct locale_collate_t
*collate
, const char *string
,
315 struct section_list
*next
)
317 struct section_list
*newp
;
319 newp
= (struct section_list
*) obstack_alloc (&collate
->mempool
,
330 static struct element_t
*
331 new_element (struct locale_collate_t
*collate
, const char *mbs
, size_t mbslen
,
332 const uint32_t *wcs
, const char *name
, size_t namelen
,
335 struct element_t
*newp
;
337 newp
= (struct element_t
*) obstack_alloc (&collate
->mempool
,
339 newp
->name
= name
== NULL
? NULL
: obstack_copy0 (&collate
->mempool
,
343 newp
->mbs
= obstack_copy0 (&collate
->mempool
, mbs
, mbslen
);
353 size_t nwcs
= wcslen ((wchar_t *) wcs
);
355 /* Handle <U0000> as a single character. */
358 obstack_grow (&collate
->mempool
, wcs
, nwcs
* sizeof (uint32_t));
359 obstack_grow (&collate
->mempool
, &zero
, sizeof (uint32_t));
360 newp
->wcs
= (uint32_t *) obstack_finish (&collate
->mempool
);
368 newp
->mborder
= NULL
;
370 newp
->used_in_level
= 0;
371 newp
->is_character
= is_character
;
373 /* Will be assigned later. XXX */
374 newp
->mbseqorder
= 0;
375 newp
->wcseqorder
= 0;
377 /* Will be allocated later. */
378 newp
->weights
= NULL
;
383 newp
->section
= collate
->current_section
;
398 static struct symbol_t
*
399 new_symbol (struct locale_collate_t
*collate
, const char *name
, size_t len
)
401 struct symbol_t
*newp
;
403 newp
= (struct symbol_t
*) obstack_alloc (&collate
->mempool
, sizeof (*newp
));
405 newp
->name
= obstack_copy0 (&collate
->mempool
, name
, len
);
415 /* Test whether this name is already defined somewhere. */
417 check_duplicate (struct linereader
*ldfile
, struct locale_collate_t
*collate
,
418 const struct charmap_t
*charmap
,
419 struct repertoire_t
*repertoire
, const char *symbol
,
424 if (find_entry (&charmap
->char_table
, symbol
, symbol_len
, &ignore
) == 0)
426 lr_error (ldfile
, _("`%.*s' already defined in charmap"),
427 (int) symbol_len
, symbol
);
431 if (repertoire
!= NULL
432 && (find_entry (&repertoire
->char_table
, symbol
, symbol_len
, &ignore
)
435 lr_error (ldfile
, _("`%.*s' already defined in repertoire"),
436 (int) symbol_len
, symbol
);
440 if (find_entry (&collate
->sym_table
, symbol
, symbol_len
, &ignore
) == 0)
442 lr_error (ldfile
, _("`%.*s' already defined as collating symbol"),
443 (int) symbol_len
, symbol
);
447 if (find_entry (&collate
->elem_table
, symbol
, symbol_len
, &ignore
) == 0)
449 lr_error (ldfile
, _("`%.*s' already defined as collating element"),
450 (int) symbol_len
, symbol
);
458 /* Read the direction specification. */
460 read_directions (struct linereader
*ldfile
, struct token
*arg
,
461 const struct charmap_t
*charmap
,
462 struct repertoire_t
*repertoire
, struct localedef_t
*result
)
465 int max
= nrules
?: 10;
466 enum coll_sort_rule
*rules
= calloc (max
, sizeof (*rules
));
468 struct locale_collate_t
*collate
= result
->categories
[LC_COLLATE
].collate
;
474 if (arg
->tok
== tok_forward
)
476 if (rules
[cnt
] & sort_backward
)
480 lr_error (ldfile
, _("\
481 %s: `forward' and `backward' are mutually excluding each other"),
486 else if (rules
[cnt
] & sort_forward
)
490 lr_error (ldfile
, _("\
491 %s: `%s' mentioned more than once in definition of weight %d"),
492 "LC_COLLATE", "forward", cnt
+ 1);
496 rules
[cnt
] |= sort_forward
;
500 else if (arg
->tok
== tok_backward
)
502 if (rules
[cnt
] & sort_forward
)
506 lr_error (ldfile
, _("\
507 %s: `forward' and `backward' are mutually excluding each other"),
512 else if (rules
[cnt
] & sort_backward
)
516 lr_error (ldfile
, _("\
517 %s: `%s' mentioned more than once in definition of weight %d"),
518 "LC_COLLATE", "backward", cnt
+ 1);
522 rules
[cnt
] |= sort_backward
;
526 else if (arg
->tok
== tok_position
)
528 if (rules
[cnt
] & sort_position
)
532 lr_error (ldfile
, _("\
533 %s: `%s' mentioned more than once in definition of weight %d"),
534 "LC_COLLATE", "position", cnt
+ 1);
538 rules
[cnt
] |= sort_position
;
544 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
546 if (arg
->tok
== tok_eof
|| arg
->tok
== tok_eol
|| arg
->tok
== tok_comma
547 || arg
->tok
== tok_semicolon
)
549 if (! valid
&& ! warned
)
551 lr_error (ldfile
, _("%s: syntax error"), "LC_COLLATE");
555 /* See whether we have to increment the counter. */
556 if (arg
->tok
!= tok_comma
&& rules
[cnt
] != 0)
558 /* Add the default `forward' if we have seen only `position'. */
559 if (rules
[cnt
] == sort_position
)
560 rules
[cnt
] = sort_position
| sort_forward
;
565 if (arg
->tok
== tok_eof
|| arg
->tok
== tok_eol
)
566 /* End of line or file, so we exit the loop. */
571 /* See whether we have enough room in the array. */
575 rules
= (enum coll_sort_rule
*) xrealloc (rules
,
578 memset (&rules
[cnt
], '\0', (max
- cnt
) * sizeof (*rules
));
585 /* There must not be any more rule. */
588 lr_error (ldfile
, _("\
589 %s: too many rules; first entry only had %d"),
590 "LC_COLLATE", nrules
);
594 lr_ignore_rest (ldfile
, 0);
603 lr_error (ldfile
, _("%s: syntax error"), "LC_COLLATE");
608 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
613 /* Now we know how many rules we have. */
615 rules
= (enum coll_sort_rule
*) xrealloc (rules
,
616 nrules
* sizeof (*rules
));
622 /* Not enough rules in this specification. */
624 lr_error (ldfile
, _("%s: not enough sorting rules"), "LC_COLLATE");
627 rules
[cnt
] = sort_forward
;
628 while (++cnt
< nrules
);
632 collate
->current_section
->rules
= rules
;
636 static struct element_t
*
637 find_element (struct linereader
*ldfile
, struct locale_collate_t
*collate
,
638 const char *str
, size_t len
)
642 /* Search for the entries among the collation sequences already define. */
643 if (find_entry (&collate
->seq_table
, str
, len
, &result
) != 0)
645 /* Nope, not define yet. So we see whether it is a
649 if (find_entry (&collate
->sym_table
, str
, len
, &ptr
) == 0)
651 /* It's a collation symbol. */
652 struct symbol_t
*sym
= (struct symbol_t
*) ptr
;
656 result
= sym
->order
= new_element (collate
, NULL
, 0, NULL
,
659 else if (find_entry (&collate
->elem_table
, str
, len
, &result
) != 0)
661 /* It's also no collation element. So it is a character
662 element defined later. */
663 result
= new_element (collate
, NULL
, 0, NULL
, str
, len
, 1);
664 /* Insert it into the sequence table. */
665 insert_entry (&collate
->seq_table
, str
, len
, result
);
669 return (struct element_t
*) result
;
674 unlink_element (struct locale_collate_t
*collate
)
676 if (collate
->cursor
== collate
->start
)
678 assert (collate
->cursor
->next
== NULL
);
679 assert (collate
->cursor
->last
== NULL
);
680 collate
->cursor
= NULL
;
684 if (collate
->cursor
->next
!= NULL
)
685 collate
->cursor
->next
->last
= collate
->cursor
->last
;
686 if (collate
->cursor
->last
!= NULL
)
687 collate
->cursor
->last
->next
= collate
->cursor
->next
;
688 collate
->cursor
= collate
->cursor
->last
;
694 insert_weights (struct linereader
*ldfile
, struct element_t
*elem
,
695 const struct charmap_t
*charmap
,
696 struct repertoire_t
*repertoire
, struct localedef_t
*result
,
697 enum token_t ellipsis
)
701 struct locale_collate_t
*collate
= result
->categories
[LC_COLLATE
].collate
;
703 /* Initialize all the fields. */
704 elem
->file
= ldfile
->fname
;
705 elem
->line
= ldfile
->lineno
;
707 elem
->last
= collate
->cursor
;
708 elem
->next
= collate
->cursor
? collate
->cursor
->next
: NULL
;
709 if (collate
->cursor
!= NULL
&& collate
->cursor
->next
!= NULL
)
710 collate
->cursor
->next
->last
= elem
;
711 if (collate
->cursor
!= NULL
)
712 collate
->cursor
->next
= elem
;
713 if (collate
->start
== NULL
)
715 assert (collate
->cursor
== NULL
);
716 collate
->start
= elem
;
719 elem
->section
= collate
->current_section
;
721 if (collate
->current_section
->first
== NULL
)
722 collate
->current_section
->first
= elem
;
723 if (collate
->current_section
->last
== collate
->cursor
)
724 collate
->current_section
->last
= elem
;
726 collate
->cursor
= elem
;
728 elem
->weights
= (struct element_list_t
*)
729 obstack_alloc (&collate
->mempool
, nrules
* sizeof (struct element_list_t
));
730 memset (elem
->weights
, '\0', nrules
* sizeof (struct element_list_t
));
734 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
737 if (arg
->tok
== tok_eof
|| arg
->tok
== tok_eol
)
740 if (arg
->tok
== tok_ignore
)
742 /* The weight for this level has to be ignored. We use the
743 null pointer to indicate this. */
744 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
745 obstack_alloc (&collate
->mempool
, sizeof (struct element_t
*));
746 elem
->weights
[weight_cnt
].w
[0] = NULL
;
747 elem
->weights
[weight_cnt
].cnt
= 1;
749 else if (arg
->tok
== tok_bsymbol
|| arg
->tok
== tok_ucs4
)
752 struct element_t
*val
;
756 if (arg
->tok
== tok_bsymbol
)
758 symstr
= arg
->val
.str
.startmb
;
759 symlen
= arg
->val
.str
.lenmb
;
763 snprintf (ucs4str
, sizeof (ucs4str
), "U%08X", arg
->val
.ucs4
);
768 val
= find_element (ldfile
, collate
, symstr
, symlen
);
772 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
773 obstack_alloc (&collate
->mempool
, sizeof (struct element_t
*));
774 elem
->weights
[weight_cnt
].w
[0] = val
;
775 elem
->weights
[weight_cnt
].cnt
= 1;
777 else if (arg
->tok
== tok_string
)
779 /* Split the string up in the individual characters and put
780 the element definitions in the list. */
781 const char *cp
= arg
->val
.str
.startmb
;
783 struct element_t
*charelem
;
784 struct element_t
**weights
= NULL
;
789 lr_error (ldfile
, _("%s: empty weight string not allowed"),
791 lr_ignore_rest (ldfile
, 0);
799 /* Ahh, it's a bsymbol or an UCS4 value. If it's
800 the latter we have to unify the name. */
801 const char *startp
= ++cp
;
806 if (*cp
== ldfile
->escape_char
)
809 /* It's a syntax error. */
815 if (cp
- startp
== 5 && startp
[0] == 'U'
816 && isxdigit (startp
[1]) && isxdigit (startp
[2])
817 && isxdigit (startp
[3]) && isxdigit (startp
[4]))
819 unsigned int ucs4
= strtoul (startp
+ 1, NULL
, 16);
822 newstr
= (char *) xmalloc (10);
823 snprintf (newstr
, 10, "U%08X", ucs4
);
831 charelem
= find_element (ldfile
, collate
, startp
, len
);
836 /* People really shouldn't use characters directly in
837 the string. Especially since it's not really clear
838 what this means. We interpret all characters in the
839 string as if that would be bsymbols. Otherwise we
840 would have to match back to bsymbols somehow and this
841 is normally not what people normally expect. */
842 charelem
= find_element (ldfile
, collate
, cp
++, 1);
845 if (charelem
== NULL
)
847 /* We ignore the rest of the line. */
848 lr_ignore_rest (ldfile
, 0);
852 /* Add the pointer. */
855 struct element_t
**newp
;
857 newp
= (struct element_t
**)
858 alloca (max
* sizeof (struct element_t
*));
859 memcpy (newp
, weights
, cnt
* sizeof (struct element_t
*));
862 weights
[cnt
++] = charelem
;
866 /* Now store the information. */
867 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
868 obstack_alloc (&collate
->mempool
,
869 cnt
* sizeof (struct element_t
*));
870 memcpy (elem
->weights
[weight_cnt
].w
, weights
,
871 cnt
* sizeof (struct element_t
*));
872 elem
->weights
[weight_cnt
].cnt
= cnt
;
874 /* We don't need the string anymore. */
875 free (arg
->val
.str
.startmb
);
877 else if (ellipsis
!= tok_none
878 && (arg
->tok
== tok_ellipsis2
879 || arg
->tok
== tok_ellipsis3
880 || arg
->tok
== tok_ellipsis4
))
882 /* It must be the same ellipsis as used in the initial column. */
883 if (arg
->tok
!= ellipsis
)
884 lr_error (ldfile
, _("\
885 %s: weights must use the same ellipsis symbol as the name"),
888 /* The weight for this level will depend on the element
889 iterating over the range. Put a placeholder. */
890 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
891 obstack_alloc (&collate
->mempool
, sizeof (struct element_t
*));
892 elem
->weights
[weight_cnt
].w
[0] = ELEMENT_ELLIPSIS2
;
893 elem
->weights
[weight_cnt
].cnt
= 1;
898 /* It's a syntax error. */
899 lr_error (ldfile
, _("%s: syntax error"), "LC_COLLATE");
900 lr_ignore_rest (ldfile
, 0);
904 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
905 /* This better should be the end of the line or a semicolon. */
906 if (arg
->tok
== tok_semicolon
)
907 /* OK, ignore this and read the next token. */
908 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
909 else if (arg
->tok
!= tok_eof
&& arg
->tok
!= tok_eol
)
911 /* It's a syntax error. */
912 lr_error (ldfile
, _("%s: syntax error"), "LC_COLLATE");
913 lr_ignore_rest (ldfile
, 0);
917 while (++weight_cnt
< nrules
);
919 if (weight_cnt
< nrules
)
921 /* This means the rest of the line uses the current element as
925 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
926 obstack_alloc (&collate
->mempool
, sizeof (struct element_t
*));
927 if (ellipsis
== tok_none
)
928 elem
->weights
[weight_cnt
].w
[0] = elem
;
930 elem
->weights
[weight_cnt
].w
[0] = ELEMENT_ELLIPSIS2
;
931 elem
->weights
[weight_cnt
].cnt
= 1;
933 while (++weight_cnt
< nrules
);
937 if (arg
->tok
== tok_ignore
|| arg
->tok
== tok_bsymbol
)
939 /* Too many rule values. */
940 lr_error (ldfile
, _("%s: too many values"), "LC_COLLATE");
941 lr_ignore_rest (ldfile
, 0);
944 lr_ignore_rest (ldfile
, arg
->tok
!= tok_eol
&& arg
->tok
!= tok_eof
);
950 insert_value (struct linereader
*ldfile
, const char *symstr
, size_t symlen
,
951 const struct charmap_t
*charmap
, struct repertoire_t
*repertoire
,
952 struct localedef_t
*result
)
954 /* First find out what kind of symbol this is. */
957 struct element_t
*elem
= NULL
;
958 struct locale_collate_t
*collate
= result
->categories
[LC_COLLATE
].collate
;
960 /* Try to find the character in the charmap. */
961 seq
= charmap_find_value (charmap
, symstr
, symlen
);
963 /* Determine the wide character. */
964 if (seq
== NULL
|| seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
966 wc
= repertoire_find_value (repertoire
, symstr
, symlen
);
973 if (wc
== ILLEGAL_CHAR_VALUE
&& seq
== NULL
)
975 /* It's no character, so look through the collation elements and
978 if (find_entry (&collate
->elem_table
, symstr
, symlen
, &ptr
) != 0)
981 struct symbol_t
*sym
= NULL
;
983 /* It's also collation element. Therefore it's either a
984 collating symbol or it's a character which is not
985 supported by the character set. In the later case we
986 simply create a dummy entry. */
987 if (find_entry (&collate
->sym_table
, symstr
, symlen
, &result
) == 0)
989 /* It's a collation symbol. */
990 sym
= (struct symbol_t
*) result
;
997 elem
= new_element (collate
, NULL
, 0, NULL
, symstr
, symlen
, 0);
1002 /* Enter a fake element in the sequence table. This
1003 won't cause anything in the output since there is
1004 no multibyte or wide character associated with
1006 insert_entry (&collate
->seq_table
, symstr
, symlen
, elem
);
1010 /* Copy the result back. */
1015 /* Otherwise the symbols stands for a character. */
1017 if (find_entry (&collate
->seq_table
, symstr
, symlen
, &ptr
) != 0)
1019 uint32_t wcs
[2] = { wc
, 0 };
1021 /* We have to allocate an entry. */
1022 elem
= new_element (collate
,
1023 seq
!= NULL
? (char *) seq
->bytes
: NULL
,
1024 seq
!= NULL
? seq
->nbytes
: 0,
1025 wc
== ILLEGAL_CHAR_VALUE
? NULL
: wcs
,
1028 /* And add it to the table. */
1029 if (insert_entry (&collate
->seq_table
, symstr
, symlen
, elem
) != 0)
1030 /* This cannot happen. */
1031 assert (! "Internal error");
1035 /* Copy the result back. */
1038 /* Maybe the character was used before the definition. In this case
1039 we have to insert the byte sequences now. */
1040 if (elem
->mbs
== NULL
&& seq
!= NULL
)
1042 elem
->mbs
= obstack_copy0 (&collate
->mempool
,
1043 seq
->bytes
, seq
->nbytes
);
1044 elem
->nmbs
= seq
->nbytes
;
1047 if (elem
->wcs
== NULL
&& wc
!= ILLEGAL_CHAR_VALUE
)
1049 uint32_t wcs
[2] = { wc
, 0 };
1051 elem
->wcs
= obstack_copy (&collate
->mempool
, wcs
, sizeof (wcs
));
1057 /* Test whether this element is not already in the list. */
1058 if (elem
->next
!= NULL
|| elem
== collate
->cursor
)
1060 lr_error (ldfile
, _("order for `%.*s' already defined at %s:%Zu"),
1061 (int) symlen
, symstr
, elem
->file
, elem
->line
);
1062 lr_ignore_rest (ldfile
, 0);
1066 insert_weights (ldfile
, elem
, charmap
, repertoire
, result
, tok_none
);
1073 handle_ellipsis (struct linereader
*ldfile
, const char *symstr
, size_t symlen
,
1074 enum token_t ellipsis
, const struct charmap_t
*charmap
,
1075 struct repertoire_t
*repertoire
,
1076 struct localedef_t
*result
)
1078 struct element_t
*startp
;
1079 struct element_t
*endp
;
1080 struct locale_collate_t
*collate
= result
->categories
[LC_COLLATE
].collate
;
1082 /* Unlink the entry added for the ellipsis. */
1083 unlink_element (collate
);
1084 startp
= collate
->cursor
;
1086 /* Process and add the end-entry. */
1088 && insert_value (ldfile
, symstr
, symlen
, charmap
, repertoire
, result
))
1089 /* Something went wrong with inserting the to-value. This means
1090 we cannot process the ellipsis. */
1093 /* Reset the cursor. */
1094 collate
->cursor
= startp
;
1096 /* Now we have to handle many different situations:
1097 - we have to distinguish between the three different ellipsis forms
1098 - the is the ellipsis at the beginning, in the middle, or at the end.
1100 endp
= collate
->cursor
->next
;
1101 assert (symstr
== NULL
|| endp
!= NULL
);
1103 /* XXX The following is probably very wrong since also collating symbols
1104 can appear in ranges. But do we want/can refine the test for that? */
1106 /* Both, the start and the end symbol, must stand for characters. */
1107 if ((startp
!= NULL
&& (startp
->name
== NULL
|| ! startp
->is_character
))
1108 || (endp
!= NULL
&& (endp
->name
== NULL
|| ! endp
->is_character
)))
1110 lr_error (ldfile
, _("\
1111 %s: the start and the end symbol of a range must stand for characters"),
1117 if (ellipsis
== tok_ellipsis3
)
1119 /* One requirement we make here: the length of the byte
1120 sequences for the first and end character must be the same.
1121 This is mainly to prevent unwanted effects and this is often
1122 not what is wanted. */
1123 size_t len
= (startp
->mbs
!= NULL
? startp
->nmbs
1124 : (endp
->mbs
!= NULL
? endp
->nmbs
: 0));
1125 char mbcnt
[len
+ 1];
1126 char mbend
[len
+ 1];
1128 /* Well, this should be caught somewhere else already. Just to
1130 assert (startp
== NULL
|| startp
->wcs
== NULL
|| startp
->wcs
[1] == 0);
1131 assert (endp
== NULL
|| endp
->wcs
== NULL
|| endp
->wcs
[1] == 0);
1133 if (startp
!= NULL
&& endp
!= NULL
1134 && startp
->mbs
!= NULL
&& endp
->mbs
!= NULL
1135 && startp
->nmbs
!= endp
->nmbs
)
1137 lr_error (ldfile
, _("\
1138 %s: byte sequences of first and last character must have the same length"),
1143 /* Determine whether we have to generate multibyte sequences. */
1144 if ((startp
== NULL
|| startp
->mbs
!= NULL
)
1145 && (endp
== NULL
|| endp
->mbs
!= NULL
))
1150 /* Prepare the beginning byte sequence. This is either from the
1151 beginning byte sequence or it is all nulls if it was an
1152 initial ellipsis. */
1153 if (startp
== NULL
|| startp
->mbs
== NULL
)
1154 memset (mbcnt
, '\0', len
);
1157 memcpy (mbcnt
, startp
->mbs
, len
);
1159 /* And increment it so that the value is the first one we will
1161 for (cnt
= len
- 1; cnt
>= 0; --cnt
)
1162 if (++mbcnt
[cnt
] != '\0')
1167 /* And the end sequence. */
1168 if (endp
== NULL
|| endp
->mbs
== NULL
)
1169 memset (mbend
, '\0', len
);
1171 memcpy (mbend
, endp
->mbs
, len
);
1174 /* Test whether we have a correct range. */
1175 ret
= memcmp (mbcnt
, mbend
, len
);
1179 lr_error (ldfile
, _("%s: byte sequence of first character of \
1180 range is not lower than that of the last character"), "LC_COLLATE");
1184 /* Generate the byte sequences data. */
1187 struct charseq
*seq
;
1189 /* Quite a bit of work ahead. We have to find the character
1190 definition for the byte sequence and then determine the
1191 wide character belonging to it. */
1192 seq
= charmap_find_symbol (charmap
, mbcnt
, len
);
1195 struct element_t
*elem
;
1198 /* I don't think this can ever happen. */
1199 assert (seq
->name
!= NULL
);
1200 namelen
= strlen (seq
->name
);
1202 if (seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1203 seq
->ucs4
= repertoire_find_value (repertoire
, seq
->name
,
1206 /* Now we are ready to insert the new value in the
1207 sequence. Find out whether the element is
1210 if (find_entry (&collate
->seq_table
, seq
->name
, namelen
,
1213 uint32_t wcs
[2] = { seq
->ucs4
, 0 };
1215 /* We have to allocate an entry. */
1216 elem
= new_element (collate
, mbcnt
, len
,
1217 seq
->ucs4
== ILLEGAL_CHAR_VALUE
1218 ? NULL
: wcs
, seq
->name
,
1221 /* And add it to the table. */
1222 if (insert_entry (&collate
->seq_table
, seq
->name
,
1223 namelen
, elem
) != 0)
1224 /* This cannot happen. */
1225 assert (! "Internal error");
1228 /* Copy the result. */
1231 /* Test whether this element is not already in the list. */
1232 if (elem
->next
!= NULL
|| (collate
->cursor
!= NULL
1233 && elem
->next
== collate
->cursor
))
1235 lr_error (ldfile
, _("\
1236 order for `%.*s' already defined at %s:%Zu"),
1237 (int) namelen
, seq
->name
,
1238 elem
->file
, elem
->line
);
1242 /* Enqueue the new element. */
1243 elem
->last
= collate
->cursor
;
1244 if (collate
->cursor
== NULL
)
1248 elem
->next
= collate
->cursor
->next
;
1249 elem
->last
->next
= elem
;
1250 if (elem
->next
!= NULL
)
1251 elem
->next
->last
= elem
;
1253 if (collate
->start
== NULL
)
1255 assert (collate
->cursor
== NULL
);
1256 collate
->start
= elem
;
1258 collate
->cursor
= elem
;
1260 /* Add the weight value. We take them from the
1261 `ellipsis_weights' member of `collate'. */
1262 elem
->weights
= (struct element_list_t
*)
1263 obstack_alloc (&collate
->mempool
,
1264 nrules
* sizeof (struct element_list_t
));
1265 for (cnt
= 0; cnt
< nrules
; ++cnt
)
1266 if (collate
->ellipsis_weight
.weights
[cnt
].cnt
== 1
1267 && (collate
->ellipsis_weight
.weights
[cnt
].w
[0]
1268 == ELEMENT_ELLIPSIS2
))
1270 elem
->weights
[cnt
].w
= (struct element_t
**)
1271 obstack_alloc (&collate
->mempool
,
1272 sizeof (struct element_t
*));
1273 elem
->weights
[cnt
].w
[0] = elem
;
1274 elem
->weights
[cnt
].cnt
= 1;
1278 /* Simply use the weight from `ellipsis_weight'. */
1279 elem
->weights
[cnt
].w
=
1280 collate
->ellipsis_weight
.weights
[cnt
].w
;
1281 elem
->weights
[cnt
].cnt
=
1282 collate
->ellipsis_weight
.weights
[cnt
].cnt
;
1286 /* Increment for the next round. */
1288 for (cnt
= len
- 1; cnt
>= 0; --cnt
)
1289 if (++mbcnt
[cnt
] != '\0')
1292 /* Find out whether this was all. */
1293 if (cnt
< 0 || memcmp (mbcnt
, mbend
, len
) >= 0)
1294 /* Yep, that's all. */
1301 /* For symbolic range we naturally must have a beginning and an
1302 end specified by the user. */
1304 lr_error (ldfile
, _("\
1305 %s: symbolic range ellipsis must not directly follow `order_start'"),
1307 else if (endp
== NULL
)
1308 lr_error (ldfile
, _("\
1309 %s: symbolic range ellipsis must not be directly followed by `order_end'"),
1313 /* Determine the range. To do so we have to determine the
1314 common prefix of the both names and then the numeric
1315 values of both ends. */
1316 size_t lenfrom
= strlen (startp
->name
);
1317 size_t lento
= strlen (endp
->name
);
1318 char buf
[lento
+ 1];
1323 int base
= ellipsis
== tok_ellipsis2
? 16 : 10;
1325 if (lenfrom
!= lento
)
1328 lr_error (ldfile
, _("\
1329 `%s' and `%.*s' are not valid names for symbolic range"),
1330 startp
->name
, (int) lento
, endp
->name
);
1334 while (startp
->name
[preflen
] == endp
->name
[preflen
])
1335 if (startp
->name
[preflen
] == '\0')
1336 /* Nothing to be done. The start and end point are identical
1337 and while inserting the end point we have already given
1338 the user an error message. */
1344 from
= strtol (startp
->name
+ preflen
, &cp
, base
);
1345 if ((from
== UINT_MAX
&& errno
== ERANGE
) || *cp
!= '\0')
1349 to
= strtol (endp
->name
+ preflen
, &cp
, base
);
1350 if ((to
== UINT_MAX
&& errno
== ERANGE
) || *cp
!= '\0')
1353 /* Copy the prefix. */
1354 memcpy (buf
, startp
->name
, preflen
);
1356 /* Loop over all values. */
1357 for (++from
; from
< to
; ++from
)
1359 struct element_t
*elem
= NULL
;
1360 struct charseq
*seq
;
1364 /* Generate the name. */
1365 sprintf (buf
+ preflen
, base
== 10 ? "%0*ld" : "%0*lX",
1366 (int) (lenfrom
- preflen
), from
);
1368 /* Look whether this name is already defined. */
1370 if (find_entry (&collate
->seq_table
, buf
, symlen
, &ptr
) == 0)
1372 /* Copy back the result. */
1375 if (elem
->next
!= NULL
|| (collate
->cursor
!= NULL
1376 && elem
->next
== collate
->cursor
))
1378 lr_error (ldfile
, _("\
1379 %s: order for `%.*s' already defined at %s:%Zu"),
1380 "LC_COLLATE", (int) lenfrom
, buf
,
1381 elem
->file
, elem
->line
);
1385 if (elem
->name
== NULL
)
1387 lr_error (ldfile
, _("%s: `%s' must be a character"),
1393 if (elem
== NULL
|| (elem
->mbs
== NULL
&& elem
->wcs
== NULL
))
1395 /* Search for a character of this name. */
1396 seq
= charmap_find_value (charmap
, buf
, lenfrom
);
1397 if (seq
== NULL
|| seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1399 wc
= repertoire_find_value (repertoire
, buf
, lenfrom
);
1407 if (wc
== ILLEGAL_CHAR_VALUE
&& seq
== NULL
)
1408 /* We don't know anything about a character with this
1409 name. XXX Should we warn? */
1414 uint32_t wcs
[2] = { wc
, 0 };
1416 /* We have to allocate an entry. */
1417 elem
= new_element (collate
,
1419 ? (char *) seq
->bytes
: NULL
,
1420 seq
!= NULL
? seq
->nbytes
: 0,
1421 wc
== ILLEGAL_CHAR_VALUE
1422 ? NULL
: wcs
, buf
, lenfrom
, 1);
1426 /* Update the element. */
1429 elem
->mbs
= obstack_copy0 (&collate
->mempool
,
1430 seq
->bytes
, seq
->nbytes
);
1431 elem
->nmbs
= seq
->nbytes
;
1434 if (wc
!= ILLEGAL_CHAR_VALUE
)
1438 obstack_grow (&collate
->mempool
,
1439 &wc
, sizeof (uint32_t));
1440 obstack_grow (&collate
->mempool
,
1441 &zero
, sizeof (uint32_t));
1442 elem
->wcs
= obstack_finish (&collate
->mempool
);
1447 elem
->file
= ldfile
->fname
;
1448 elem
->line
= ldfile
->lineno
;
1449 elem
->section
= collate
->current_section
;
1452 /* Enqueue the new element. */
1453 elem
->last
= collate
->cursor
;
1454 elem
->next
= collate
->cursor
->next
;
1455 elem
->last
->next
= elem
;
1456 if (elem
->next
!= NULL
)
1457 elem
->next
->last
= elem
;
1458 collate
->cursor
= elem
;
1460 /* Now add the weights. They come from the `ellipsis_weights'
1461 member of `collate'. */
1462 elem
->weights
= (struct element_list_t
*)
1463 obstack_alloc (&collate
->mempool
,
1464 nrules
* sizeof (struct element_list_t
));
1465 for (cnt
= 0; cnt
< nrules
; ++cnt
)
1466 if (collate
->ellipsis_weight
.weights
[cnt
].cnt
== 1
1467 && (collate
->ellipsis_weight
.weights
[cnt
].w
[0]
1468 == ELEMENT_ELLIPSIS2
))
1470 elem
->weights
[cnt
].w
= (struct element_t
**)
1471 obstack_alloc (&collate
->mempool
,
1472 sizeof (struct element_t
*));
1473 elem
->weights
[cnt
].w
[0] = elem
;
1474 elem
->weights
[cnt
].cnt
= 1;
1478 /* Simly use the weight from `ellipsis_weight'. */
1479 elem
->weights
[cnt
].w
=
1480 collate
->ellipsis_weight
.weights
[cnt
].w
;
1481 elem
->weights
[cnt
].cnt
=
1482 collate
->ellipsis_weight
.weights
[cnt
].cnt
;
1491 collate_startup (struct linereader
*ldfile
, struct localedef_t
*locale
,
1492 struct localedef_t
*copy_locale
, int ignore_content
)
1494 if (!ignore_content
&& locale
->categories
[LC_COLLATE
].collate
== NULL
)
1496 struct locale_collate_t
*collate
;
1498 if (copy_locale
== NULL
)
1500 collate
= locale
->categories
[LC_COLLATE
].collate
=
1501 (struct locale_collate_t
*)
1502 xcalloc (1, sizeof (struct locale_collate_t
));
1504 /* Init the various data structures. */
1505 init_hash (&collate
->elem_table
, 100);
1506 init_hash (&collate
->sym_table
, 100);
1507 init_hash (&collate
->seq_table
, 500);
1508 obstack_init (&collate
->mempool
);
1510 collate
->col_weight_max
= -1;
1513 /* Reuse the copy_locale's data structures. */
1514 collate
= locale
->categories
[LC_COLLATE
].collate
=
1515 copy_locale
->categories
[LC_COLLATE
].collate
;
1518 ldfile
->translate_strings
= 0;
1519 ldfile
->return_widestr
= 0;
1524 collate_finish (struct localedef_t
*locale
, const struct charmap_t
*charmap
)
1526 /* Now is the time when we can assign the individual collation
1527 values for all the symbols. We have possibly different values
1528 for the wide- and the multibyte-character symbols. This is done
1529 since it might make a difference in the encoding if there is in
1530 some cases no multibyte-character but there are wide-characters.
1531 (The other way around it is not important since theencoded
1532 collation value in the wide-character case is 32 bits wide and
1533 therefore requires no encoding).
1535 The lowest collation value assigned is 2. Zero is reserved for
1536 the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm'
1537 functions and 1 is used to separate the individual passes for the
1540 We also have to construct is list with all the bytes/words which
1541 can come first in a sequence, followed by all the elements which
1542 also start with this byte/word. The order is reverse which has
1543 among others the important effect that longer strings are located
1544 first in the list. This is required for the output data since
1545 the algorithm used in `strcoll' etc depends on this.
1547 The multibyte case is easy. We simply sort into an array with
1549 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1554 struct element_t
*runp
;
1556 int need_undefined
= 0;
1557 struct section_list
*sect
;
1559 int nr_wide_elems
= 0;
1561 if (collate
== NULL
)
1563 /* No data, no check. */
1565 WITH_CUR_LOCALE (error (0, 0, _("No definition for %s category found"),
1570 /* If this assertion is hit change the type in `element_t'. */
1571 assert (nrules
<= sizeof (runp
->used_in_level
) * 8);
1573 /* Make sure that the `position' rule is used either in all sections
1575 for (i
= 0; i
< nrules
; ++i
)
1576 for (sect
= collate
->sections
; sect
!= NULL
; sect
= sect
->next
)
1577 if (sect
!= collate
->current_section
1578 && sect
->rules
!= NULL
1579 && ((sect
->rules
[i
] & sort_position
)
1580 != (collate
->current_section
->rules
[i
] & sort_position
)))
1582 WITH_CUR_LOCALE (error (0, 0, _("\
1583 %s: `position' must be used for a specific level in all sections or none"),
1588 /* Find out which elements are used at which level. At the same
1589 time we find out whether we have any undefined symbols. */
1590 runp
= collate
->start
;
1591 while (runp
!= NULL
)
1593 if (runp
->mbs
!= NULL
)
1595 for (i
= 0; i
< nrules
; ++i
)
1599 for (j
= 0; j
< runp
->weights
[i
].cnt
; ++j
)
1600 /* A NULL pointer as the weight means IGNORE. */
1601 if (runp
->weights
[i
].w
[j
] != NULL
)
1603 if (runp
->weights
[i
].w
[j
]->weights
== NULL
)
1605 WITH_CUR_LOCALE (error_at_line (0, 0, runp
->file
,
1607 _("symbol `%s' not defined"),
1608 runp
->weights
[i
].w
[j
]->name
));
1611 runp
->weights
[i
].w
[j
] = &collate
->undefined
;
1614 /* Set the bit for the level. */
1615 runp
->weights
[i
].w
[j
]->used_in_level
|= 1 << i
;
1620 /* Up to the next entry. */
1624 /* Walk through the list of defined sequences and assign weights. Also
1625 create the data structure which will allow generating the single byte
1626 character based tables.
1628 Since at each time only the weights for each of the rules are
1629 only compared to other weights for this rule it is possible to
1630 assign more compact weight values than simply counting all
1631 weights in sequence. We can assign weights from 3, one for each
1632 rule individually and only for those elements, which are actually
1635 Why is this important? It is not for the wide char table. But
1636 it is for the singlebyte output since here larger numbers have to
1637 be encoded to make it possible to emit the value as a byte
1639 for (i
= 0; i
< nrules
; ++i
)
1644 runp
= collate
->start
;
1645 while (runp
!= NULL
)
1647 /* Determine the order. */
1648 if (runp
->used_in_level
!= 0)
1650 runp
->mborder
= (int *) obstack_alloc (&collate
->mempool
,
1651 nrules
* sizeof (int));
1653 for (i
= 0; i
< nrules
; ++i
)
1654 if ((runp
->used_in_level
& (1 << i
)) != 0)
1655 runp
->mborder
[i
] = mbact
[i
]++;
1657 runp
->mborder
[i
] = 0;
1660 if (runp
->mbs
!= NULL
)
1662 struct element_t
**eptr
;
1663 struct element_t
*lastp
= NULL
;
1665 /* Find the point where to insert in the list. */
1666 eptr
= &collate
->mbheads
[((unsigned char *) runp
->mbs
)[0]];
1667 while (*eptr
!= NULL
)
1669 if ((*eptr
)->nmbs
< runp
->nmbs
)
1672 if ((*eptr
)->nmbs
== runp
->nmbs
)
1674 int c
= memcmp ((*eptr
)->mbs
, runp
->mbs
, runp
->nmbs
);
1678 /* This should not happen. It means that we have
1679 to symbols with the same byte sequence. It is
1680 of course an error. */
1681 WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr
)->file
,
1684 symbol `%s' has the same encoding as"), (*eptr
)->name
);
1685 error_at_line (0, 0, runp
->file
,
1692 /* Insert it here. */
1696 /* To the next entry. */
1698 eptr
= &(*eptr
)->mbnext
;
1701 /* Set the pointers. */
1702 runp
->mbnext
= *eptr
;
1703 runp
->mblast
= lastp
;
1705 (*eptr
)->mblast
= runp
;
1711 if (runp
->used_in_level
)
1713 runp
->wcorder
= wcact
++;
1715 /* We take the opportunity to count the elements which have
1720 if (runp
->is_character
)
1722 if (runp
->nmbs
== 1)
1723 collate
->mbseqorder
[((unsigned char *) runp
->mbs
)[0]] = mbseqact
++;
1725 runp
->wcseqorder
= wcseqact
++;
1727 else if (runp
->mbs
!= NULL
&& runp
->weights
!= NULL
)
1728 /* This is for collation elements. */
1729 runp
->wcseqorder
= wcseqact
++;
1731 /* Up to the next entry. */
1735 /* Find out whether any of the `mbheads' entries is unset. In this
1736 case we use the UNDEFINED entry. */
1737 for (i
= 1; i
< 256; ++i
)
1738 if (collate
->mbheads
[i
] == NULL
)
1741 collate
->mbheads
[i
] = &collate
->undefined
;
1744 /* Now to the wide character case. */
1745 collate
->wcheads
.p
= 6;
1746 collate
->wcheads
.q
= 10;
1747 wchead_table_init (&collate
->wcheads
);
1749 collate
->wcseqorder
.p
= 6;
1750 collate
->wcseqorder
.q
= 10;
1751 collseq_table_init (&collate
->wcseqorder
);
1754 runp
= collate
->start
;
1755 while (runp
!= NULL
)
1757 if (runp
->wcs
!= NULL
)
1759 struct element_t
*e
;
1760 struct element_t
**eptr
;
1761 struct element_t
*lastp
;
1763 /* Insert the collation sequence value. */
1764 if (runp
->is_character
)
1765 collseq_table_add (&collate
->wcseqorder
, runp
->wcs
[0],
1768 /* Find the point where to insert in the list. */
1769 e
= wchead_table_get (&collate
->wcheads
, runp
->wcs
[0]);
1772 while (*eptr
!= NULL
)
1774 if ((*eptr
)->nwcs
< runp
->nwcs
)
1777 if ((*eptr
)->nwcs
== runp
->nwcs
)
1779 int c
= wmemcmp ((wchar_t *) (*eptr
)->wcs
,
1780 (wchar_t *) runp
->wcs
, runp
->nwcs
);
1784 /* This should not happen. It means that we have
1785 two symbols with the same byte sequence. It is
1786 of course an error. */
1787 WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr
)->file
,
1790 symbol `%s' has the same encoding as"), (*eptr
)->name
);
1791 error_at_line (0, 0, runp
->file
,
1798 /* Insert it here. */
1802 /* To the next entry. */
1804 eptr
= &(*eptr
)->wcnext
;
1807 /* Set the pointers. */
1808 runp
->wcnext
= *eptr
;
1809 runp
->wclast
= lastp
;
1811 (*eptr
)->wclast
= runp
;
1814 wchead_table_add (&collate
->wcheads
, runp
->wcs
[0], e
);
1819 /* Up to the next entry. */
1823 /* Now determine whether the UNDEFINED entry is needed and if yes,
1824 whether it was defined. */
1825 collate
->undefined
.used_in_level
= need_undefined
? ~0ul : 0;
1826 if (collate
->undefined
.file
== NULL
)
1830 /* This seems not to be enforced by recent standards. Don't
1831 emit an error, simply append UNDEFINED at the end. */
1833 WITH_CUR_LOCALE (error (0, 0, _("no definition of `UNDEFINED'")));
1835 /* Add UNDEFINED at the end. */
1836 collate
->undefined
.mborder
=
1837 (int *) obstack_alloc (&collate
->mempool
, nrules
* sizeof (int));
1839 for (i
= 0; i
< nrules
; ++i
)
1840 collate
->undefined
.mborder
[i
] = mbact
[i
]++;
1843 /* In any case we will need the definition for the wide character
1844 case. But we will not complain that it is missing since the
1845 specification strangely enough does not seem to account for
1847 collate
->undefined
.wcorder
= wcact
++;
1850 /* Finally, try to unify the rules for the sections. Whenever the rules
1851 for a section are the same as those for another section give the
1852 ruleset the same index. Since there are never many section we can
1853 use an O(n^2) algorithm here. */
1854 sect
= collate
->sections
;
1855 while (sect
!= NULL
&& sect
->rules
== NULL
)
1858 /* Bail out if we have no sections because of earlier errors. */
1861 WITH_CUR_LOCALE (error (EXIT_FAILURE
, 0,
1862 _("too many errors; giving up")));
1869 struct section_list
*osect
= collate
->sections
;
1871 while (osect
!= sect
)
1872 if (osect
->rules
!= NULL
1873 && memcmp (osect
->rules
, sect
->rules
,
1874 nrules
* sizeof (osect
->rules
[0])) == 0)
1877 osect
= osect
->next
;
1880 sect
->ruleidx
= ruleidx
++;
1882 sect
->ruleidx
= osect
->ruleidx
;
1887 while (sect
!= NULL
&& sect
->rules
== NULL
);
1889 while (sect
!= NULL
);
1890 /* We are currently not prepared for more than 128 rulesets. But this
1891 should never really be a problem. */
1892 assert (ruleidx
<= 128);
1897 output_weight (struct obstack
*pool
, struct locale_collate_t
*collate
,
1898 struct element_t
*elem
)
1903 /* Optimize the use of UNDEFINED. */
1904 if (elem
== &collate
->undefined
)
1905 /* The weights are already inserted. */
1908 /* This byte can start exactly one collation element and this is
1909 a single byte. We can directly give the index to the weights. */
1910 retval
= obstack_object_size (pool
);
1912 /* Construct the weight. */
1913 for (cnt
= 0; cnt
< nrules
; ++cnt
)
1915 char buf
[elem
->weights
[cnt
].cnt
* 7];
1919 for (i
= 0; i
< elem
->weights
[cnt
].cnt
; ++i
)
1920 /* Encode the weight value. We do nothing for IGNORE entries. */
1921 if (elem
->weights
[cnt
].w
[i
] != NULL
)
1922 len
+= utf8_encode (&buf
[len
],
1923 elem
->weights
[cnt
].w
[i
]->mborder
[cnt
]);
1925 /* And add the buffer content. */
1926 obstack_1grow (pool
, len
);
1927 obstack_grow (pool
, buf
, len
);
1930 return retval
| ((elem
->section
->ruleidx
& 0x7f) << 24);
1935 output_weightwc (struct obstack
*pool
, struct locale_collate_t
*collate
,
1936 struct element_t
*elem
)
1941 /* Optimize the use of UNDEFINED. */
1942 if (elem
== &collate
->undefined
)
1943 /* The weights are already inserted. */
1946 /* This byte can start exactly one collation element and this is
1947 a single byte. We can directly give the index to the weights. */
1948 retval
= obstack_object_size (pool
) / sizeof (int32_t);
1950 /* Construct the weight. */
1951 for (cnt
= 0; cnt
< nrules
; ++cnt
)
1953 int32_t buf
[elem
->weights
[cnt
].cnt
];
1957 for (i
= 0, j
= 0; i
< elem
->weights
[cnt
].cnt
; ++i
)
1958 if (elem
->weights
[cnt
].w
[i
] != NULL
)
1959 buf
[j
++] = elem
->weights
[cnt
].w
[i
]->wcorder
;
1961 /* And add the buffer content. */
1962 obstack_int32_grow (pool
, j
);
1964 obstack_grow (pool
, buf
, j
* sizeof (int32_t));
1965 maybe_swap_uint32_obstack (pool
, j
);
1968 return retval
| ((elem
->section
->ruleidx
& 0x7f) << 24);
1971 /* If localedef is every threaded, this would need to be __thread var. */
1974 struct obstack
*weightpool
;
1975 struct obstack
*extrapool
;
1976 struct obstack
*indpool
;
1977 struct locale_collate_t
*collate
;
1978 struct collidx_table
*tablewc
;
1981 static void add_to_tablewc (uint32_t ch
, struct element_t
*runp
);
1984 add_to_tablewc (uint32_t ch
, struct element_t
*runp
)
1986 if (runp
->wcnext
== NULL
&& runp
->nwcs
== 1)
1988 int32_t weigthidx
= output_weightwc (atwc
.weightpool
, atwc
.collate
,
1990 collidx_table_add (atwc
.tablewc
, ch
, weigthidx
);
1994 /* As for the singlebyte table, we recognize sequences and
1997 collidx_table_add (atwc
.tablewc
, ch
,
1998 -(obstack_object_size (atwc
.extrapool
)
1999 / sizeof (uint32_t)));
2003 /* Store the current index in the weight table. We know that
2004 the current position in the `extrapool' is aligned on a
2009 /* Find out wether this is a single entry or we have more than
2010 one consecutive entry. */
2011 if (runp
->wcnext
!= NULL
2012 && runp
->nwcs
== runp
->wcnext
->nwcs
2013 && wmemcmp ((wchar_t *) runp
->wcs
,
2014 (wchar_t *)runp
->wcnext
->wcs
,
2015 runp
->nwcs
- 1) == 0
2016 && (runp
->wcs
[runp
->nwcs
- 1]
2017 == runp
->wcnext
->wcs
[runp
->nwcs
- 1] + 1))
2020 struct element_t
*series_startp
= runp
;
2021 struct element_t
*curp
;
2023 /* Now add first the initial byte sequence. */
2024 added
= (1 + 1 + 2 * (runp
->nwcs
- 1)) * sizeof (int32_t);
2025 if (sizeof (int32_t) == sizeof (int))
2026 obstack_make_room (atwc
.extrapool
, added
);
2028 /* More than one consecutive entry. We mark this by having
2029 a negative index into the indirect table. */
2030 obstack_int32_grow_fast (atwc
.extrapool
,
2031 -(obstack_object_size (atwc
.indpool
)
2032 / sizeof (int32_t)));
2033 obstack_int32_grow_fast (atwc
.extrapool
, runp
->nwcs
- 1);
2036 runp
= runp
->wcnext
;
2037 while (runp
->wcnext
!= NULL
2038 && runp
->nwcs
== runp
->wcnext
->nwcs
2039 && wmemcmp ((wchar_t *) runp
->wcs
,
2040 (wchar_t *)runp
->wcnext
->wcs
,
2041 runp
->nwcs
- 1) == 0
2042 && (runp
->wcs
[runp
->nwcs
- 1]
2043 == runp
->wcnext
->wcs
[runp
->nwcs
- 1] + 1));
2045 /* Now walk backward from here to the beginning. */
2048 for (i
= 1; i
< runp
->nwcs
; ++i
)
2049 obstack_int32_grow_fast (atwc
.extrapool
, curp
->wcs
[i
]);
2051 /* Now find the end of the consecutive sequence and
2052 add all the indeces in the indirect pool. */
2055 weightidx
= output_weightwc (atwc
.weightpool
, atwc
.collate
,
2057 obstack_int32_grow (atwc
.indpool
, weightidx
);
2059 curp
= curp
->wclast
;
2061 while (curp
!= series_startp
);
2063 /* Add the final weight. */
2064 weightidx
= output_weightwc (atwc
.weightpool
, atwc
.collate
,
2066 obstack_int32_grow (atwc
.indpool
, weightidx
);
2068 /* And add the end byte sequence. Without length this
2070 for (i
= 1; i
< curp
->nwcs
; ++i
)
2071 obstack_int32_grow (atwc
.extrapool
, curp
->wcs
[i
]);
2075 /* A single entry. Simply add the index and the length and
2076 string (except for the first character which is already
2080 /* Output the weight info. */
2081 weightidx
= output_weightwc (atwc
.weightpool
, atwc
.collate
,
2084 assert (runp
->nwcs
> 0);
2085 added
= (1 + 1 + runp
->nwcs
- 1) * sizeof (int32_t);
2086 if (sizeof (int) == sizeof (int32_t))
2087 obstack_make_room (atwc
.extrapool
, added
);
2089 obstack_int32_grow_fast (atwc
.extrapool
, weightidx
);
2090 obstack_int32_grow_fast (atwc
.extrapool
, runp
->nwcs
- 1);
2091 for (i
= 1; i
< runp
->nwcs
; ++i
)
2092 obstack_int32_grow_fast (atwc
.extrapool
, runp
->wcs
[i
]);
2096 runp
= runp
->wcnext
;
2098 while (runp
!= NULL
);
2103 collate_output (struct localedef_t
*locale
, const struct charmap_t
*charmap
,
2104 const char *output_path
)
2106 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
2107 const size_t nelems
= _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE
);
2108 struct locale_file file
;
2110 int32_t tablemb
[256];
2111 struct obstack weightpool
;
2112 struct obstack extrapool
;
2113 struct obstack indirectpool
;
2114 struct section_list
*sect
;
2115 struct collidx_table tablewc
;
2117 uint32_t *elem_table
;
2119 struct element_t
*runp
;
2121 init_locale_data (&file
, nelems
);
2122 add_locale_uint32 (&file
, nrules
);
2124 /* If we have no LC_COLLATE data emit only the number of rules as zero. */
2125 if (collate
== NULL
)
2128 for (idx
= 1; idx
< nelems
; idx
++)
2130 /* The words have to be handled specially. */
2131 if (idx
== _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB
))
2132 add_locale_uint32 (&file
, 0);
2134 add_locale_empty (&file
);
2136 write_locale_data (output_path
, LC_COLLATE
, "LC_COLLATE", &file
);
2140 obstack_init (&weightpool
);
2141 obstack_init (&extrapool
);
2142 obstack_init (&indirectpool
);
2144 /* Since we are using the sign of an integer to mark indirection the
2145 offsets in the arrays we are indirectly referring to must not be
2146 zero since -0 == 0. Therefore we add a bit of dummy content. */
2147 obstack_int32_grow (&extrapool
, 0);
2148 obstack_int32_grow (&indirectpool
, 0);
2150 /* Prepare the ruleset table. */
2151 for (sect
= collate
->sections
, i
= 0; sect
!= NULL
; sect
= sect
->next
)
2152 if (sect
->rules
!= NULL
&& sect
->ruleidx
== i
)
2156 obstack_make_room (&weightpool
, nrules
);
2158 for (j
= 0; j
< nrules
; ++j
)
2159 obstack_1grow_fast (&weightpool
, sect
->rules
[j
]);
2162 /* And align the output. */
2163 i
= (nrules
* i
) % LOCFILE_ALIGN
;
2166 obstack_1grow (&weightpool
, '\0');
2167 while (++i
< LOCFILE_ALIGN
);
2169 add_locale_raw_obstack (&file
, &weightpool
);
2171 /* Generate the 8-bit table. Walk through the lists of sequences
2172 starting with the same byte and add them one after the other to
2173 the table. In case we have more than one sequence starting with
2174 the same byte we have to use extra indirection.
2176 First add a record for the NUL byte. This entry will never be used
2177 so it does not matter. */
2180 /* Now insert the `UNDEFINED' value if it is used. Since this value
2181 will probably be used more than once it is good to store the
2182 weights only once. */
2183 if (collate
->undefined
.used_in_level
!= 0)
2184 output_weight (&weightpool
, collate
, &collate
->undefined
);
2186 for (ch
= 1; ch
< 256; ++ch
)
2187 if (collate
->mbheads
[ch
]->mbnext
== NULL
2188 && collate
->mbheads
[ch
]->nmbs
<= 1)
2190 tablemb
[ch
] = output_weight (&weightpool
, collate
,
2191 collate
->mbheads
[ch
]);
2195 /* The entries in the list are sorted by length and then
2196 alphabetically. This is the order in which we will add the
2197 elements to the collation table. This allows simply walking
2198 the table in sequence and stopping at the first matching
2199 entry. Since the longer sequences are coming first in the
2200 list they have the possibility to match first, just as it
2201 has to be. In the worst case we are walking to the end of
2202 the list where we put, if no singlebyte sequence is defined
2203 in the locale definition, the weights for UNDEFINED.
2205 To reduce the length of the search list we compress them a bit.
2206 This happens by collecting sequences of consecutive byte
2207 sequences in one entry (having and begin and end byte sequence)
2208 and add only one index into the weight table. We can find the
2209 consecutive entries since they are also consecutive in the list. */
2210 struct element_t
*runp
= collate
->mbheads
[ch
];
2211 struct element_t
*lastp
;
2213 assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool
)));
2215 tablemb
[ch
] = -obstack_object_size (&extrapool
);
2219 /* Store the current index in the weight table. We know that
2220 the current position in the `extrapool' is aligned on a
2225 /* Find out wether this is a single entry or we have more than
2226 one consecutive entry. */
2227 if (runp
->mbnext
!= NULL
2228 && runp
->nmbs
== runp
->mbnext
->nmbs
2229 && memcmp (runp
->mbs
, runp
->mbnext
->mbs
, runp
->nmbs
- 1) == 0
2230 && (runp
->mbs
[runp
->nmbs
- 1]
2231 == runp
->mbnext
->mbs
[runp
->nmbs
- 1] + 1))
2234 struct element_t
*series_startp
= runp
;
2235 struct element_t
*curp
;
2237 /* Compute how much space we will need. */
2238 added
= LOCFILE_ALIGN_UP (sizeof (int32_t) + 1
2239 + 2 * (runp
->nmbs
- 1));
2240 assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool
)));
2241 obstack_make_room (&extrapool
, added
);
2243 /* More than one consecutive entry. We mark this by having
2244 a negative index into the indirect table. */
2245 obstack_int32_grow_fast (&extrapool
,
2246 -(obstack_object_size (&indirectpool
)
2247 / sizeof (int32_t)));
2249 /* Now search first the end of the series. */
2251 runp
= runp
->mbnext
;
2252 while (runp
->mbnext
!= NULL
2253 && runp
->nmbs
== runp
->mbnext
->nmbs
2254 && memcmp (runp
->mbs
, runp
->mbnext
->mbs
,
2255 runp
->nmbs
- 1) == 0
2256 && (runp
->mbs
[runp
->nmbs
- 1]
2257 == runp
->mbnext
->mbs
[runp
->nmbs
- 1] + 1));
2259 /* Now walk backward from here to the beginning. */
2262 assert (runp
->nmbs
<= 256);
2263 obstack_1grow_fast (&extrapool
, curp
->nmbs
- 1);
2264 for (i
= 1; i
< curp
->nmbs
; ++i
)
2265 obstack_1grow_fast (&extrapool
, curp
->mbs
[i
]);
2267 /* Now find the end of the consecutive sequence and
2268 add all the indeces in the indirect pool. */
2271 weightidx
= output_weight (&weightpool
, collate
, curp
);
2272 obstack_int32_grow (&indirectpool
, weightidx
);
2274 curp
= curp
->mblast
;
2276 while (curp
!= series_startp
);
2278 /* Add the final weight. */
2279 weightidx
= output_weight (&weightpool
, collate
, curp
);
2280 obstack_int32_grow (&indirectpool
, weightidx
);
2282 /* And add the end byte sequence. Without length this
2284 for (i
= 1; i
< curp
->nmbs
; ++i
)
2285 obstack_1grow_fast (&extrapool
, curp
->mbs
[i
]);
2289 /* A single entry. Simply add the index and the length and
2290 string (except for the first character which is already
2294 /* Output the weight info. */
2295 weightidx
= output_weight (&weightpool
, collate
, runp
);
2297 added
= LOCFILE_ALIGN_UP (sizeof (int32_t) + 1
2299 assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool
)));
2300 obstack_make_room (&extrapool
, added
);
2302 obstack_int32_grow_fast (&extrapool
, weightidx
);
2303 assert (runp
->nmbs
<= 256);
2304 obstack_1grow_fast (&extrapool
, runp
->nmbs
- 1);
2306 for (i
= 1; i
< runp
->nmbs
; ++i
)
2307 obstack_1grow_fast (&extrapool
, runp
->mbs
[i
]);
2310 /* Add alignment bytes if necessary. */
2311 while (!LOCFILE_ALIGNED_P (obstack_object_size (&extrapool
)))
2312 obstack_1grow_fast (&extrapool
, '\0');
2316 runp
= runp
->mbnext
;
2318 while (runp
!= NULL
);
2320 assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool
)));
2322 /* If the final entry in the list is not a single character we
2323 add an UNDEFINED entry here. */
2324 if (lastp
->nmbs
!= 1)
2326 int added
= LOCFILE_ALIGN_UP (sizeof (int32_t) + 1 + 1);
2327 obstack_make_room (&extrapool
, added
);
2329 obstack_int32_grow_fast (&extrapool
, 0);
2330 /* XXX What rule? We just pick the first. */
2331 obstack_1grow_fast (&extrapool
, 0);
2332 /* Length is zero. */
2333 obstack_1grow_fast (&extrapool
, 0);
2335 /* Add alignment bytes if necessary. */
2336 while (!LOCFILE_ALIGNED_P (obstack_object_size (&extrapool
)))
2337 obstack_1grow_fast (&extrapool
, '\0');
2341 /* Add padding to the tables if necessary. */
2342 while (!LOCFILE_ALIGNED_P (obstack_object_size (&weightpool
)))
2343 obstack_1grow (&weightpool
, 0);
2345 /* Now add the four tables. */
2346 add_locale_uint32_array (&file
, (const uint32_t *) tablemb
, 256);
2347 add_locale_raw_obstack (&file
, &weightpool
);
2348 add_locale_raw_obstack (&file
, &extrapool
);
2349 add_locale_raw_obstack (&file
, &indirectpool
);
2351 /* Now the same for the wide character table. We need to store some
2352 more information here. */
2353 add_locale_empty (&file
);
2354 add_locale_empty (&file
);
2355 add_locale_empty (&file
);
2357 /* Since we are using the sign of an integer to mark indirection the
2358 offsets in the arrays we are indirectly referring to must not be
2359 zero since -0 == 0. Therefore we add a bit of dummy content. */
2360 obstack_int32_grow (&extrapool
, 0);
2361 obstack_int32_grow (&indirectpool
, 0);
2363 /* Now insert the `UNDEFINED' value if it is used. Since this value
2364 will probably be used more than once it is good to store the
2365 weights only once. */
2366 if (output_weightwc (&weightpool
, collate
, &collate
->undefined
) != 0)
2369 /* Generate the table. Walk through the lists of sequences starting
2370 with the same wide character and add them one after the other to
2371 the table. In case we have more than one sequence starting with
2372 the same byte we have to use extra indirection. */
2375 collidx_table_init (&tablewc
);
2377 atwc
.weightpool
= &weightpool
;
2378 atwc
.extrapool
= &extrapool
;
2379 atwc
.indpool
= &indirectpool
;
2380 atwc
.collate
= collate
;
2381 atwc
.tablewc
= &tablewc
;
2383 wchead_table_iterate (&collate
->wcheads
, add_to_tablewc
);
2385 memset (&atwc
, 0, sizeof (atwc
));
2387 /* Now add the four tables. */
2388 add_locale_collidx_table (&file
, &tablewc
);
2389 add_locale_raw_obstack (&file
, &weightpool
);
2390 add_locale_raw_obstack (&file
, &extrapool
);
2391 add_locale_raw_obstack (&file
, &indirectpool
);
2393 /* Finally write the table with collation element names out. It is
2394 a hash table with a simple function which gets the name of the
2395 character as the input. One character might have many names. The
2396 value associated with the name is an index into the weight table
2397 where we are then interested in the first-level weight value.
2399 To determine how large the table should be we are counting the
2400 elements have to put in. Since we are using internal chaining
2401 using a secondary hash function we have to make the table a bit
2402 larger to avoid extremely long search times. We can achieve
2403 good results with a 40% larger table than there are entries. */
2405 runp
= collate
->start
;
2406 while (runp
!= NULL
)
2408 if (runp
->mbs
!= NULL
&& runp
->weights
!= NULL
&& !runp
->is_character
)
2409 /* Yep, the element really counts. */
2414 /* Add 40% and find the next prime number. */
2415 elem_size
= next_prime (elem_size
* 1.4);
2417 /* Allocate the table. Each entry consists of two words: the hash
2418 value and an index in a secondary table which provides the index
2419 into the weight table and the string itself (so that a match can
2421 elem_table
= (uint32_t *) obstack_alloc (&extrapool
,
2422 elem_size
* 2 * sizeof (uint32_t));
2423 memset (elem_table
, '\0', elem_size
* 2 * sizeof (uint32_t));
2425 /* Now add the elements. */
2426 runp
= collate
->start
;
2427 while (runp
!= NULL
)
2429 if (runp
->mbs
!= NULL
&& runp
->weights
!= NULL
&& !runp
->is_character
)
2431 /* Compute the hash value of the name. */
2432 uint32_t namelen
= strlen (runp
->name
);
2433 uint32_t hash
= elem_hash (runp
->name
, namelen
);
2434 size_t idx
= hash
% elem_size
;
2436 size_t start_idx
= idx
;
2439 if (elem_table
[idx
* 2] != 0)
2441 /* The spot is already taken. Try iterating using the value
2442 from the secondary hashing function. */
2443 size_t iter
= hash
% (elem_size
- 2) + 1;
2448 if (idx
>= elem_size
)
2450 assert (idx
!= start_idx
);
2452 while (elem_table
[idx
* 2] != 0);
2454 /* This is the spot where we will insert the value. */
2455 elem_table
[idx
* 2] = hash
;
2456 elem_table
[idx
* 2 + 1] = obstack_object_size (&extrapool
);
2458 /* The string itself including length. */
2459 obstack_1grow (&extrapool
, namelen
);
2460 obstack_grow (&extrapool
, runp
->name
, namelen
);
2462 /* And the multibyte representation. */
2463 obstack_1grow (&extrapool
, runp
->nmbs
);
2464 obstack_grow (&extrapool
, runp
->mbs
, runp
->nmbs
);
2466 /* And align again to 32 bits. */
2467 if ((1 + namelen
+ 1 + runp
->nmbs
) % sizeof (int32_t) != 0)
2468 obstack_grow (&extrapool
, "\0\0",
2470 - ((1 + namelen
+ 1 + runp
->nmbs
)
2471 % sizeof (int32_t))));
2473 /* Now some 32-bit values: multibyte collation sequence,
2474 wide char string (including length), and wide char
2475 collation sequence. */
2476 obstack_int32_grow (&extrapool
, runp
->mbseqorder
);
2478 obstack_int32_grow (&extrapool
, runp
->nwcs
);
2479 obstack_grow (&extrapool
, runp
->wcs
,
2480 runp
->nwcs
* sizeof (uint32_t));
2481 maybe_swap_uint32_obstack (&extrapool
, runp
->nwcs
);
2483 obstack_int32_grow (&extrapool
, runp
->wcseqorder
);
2489 /* Prepare to write out this data. */
2490 add_locale_uint32 (&file
, elem_size
);
2491 add_locale_uint32_array (&file
, elem_table
, 2 * elem_size
);
2492 add_locale_raw_obstack (&file
, &extrapool
);
2493 add_locale_raw_data (&file
, collate
->mbseqorder
, 256);
2494 add_locale_collseq_table (&file
, &collate
->wcseqorder
);
2495 add_locale_string (&file
, charmap
->code_set_name
);
2496 write_locale_data (output_path
, LC_COLLATE
, "LC_COLLATE", &file
);
2498 obstack_free (&weightpool
, NULL
);
2499 obstack_free (&extrapool
, NULL
);
2500 obstack_free (&indirectpool
, NULL
);
2505 skip_to (struct linereader
*ldfile
, struct locale_collate_t
*collate
,
2506 const struct charmap_t
*charmap
, int to_endif
)
2510 struct token
*now
= lr_token (ldfile
, charmap
, NULL
, NULL
, 0);
2511 enum token_t nowtok
= now
->tok
;
2513 if (nowtok
== tok_eof
|| nowtok
== tok_end
)
2516 if (nowtok
== tok_ifdef
|| nowtok
== tok_ifndef
)
2518 lr_error (ldfile
, _("%s: nested conditionals not supported"),
2520 nowtok
= skip_to (ldfile
, collate
, charmap
, tok_endif
);
2521 if (nowtok
== tok_eof
|| nowtok
== tok_end
)
2524 else if (nowtok
== tok_endif
|| (!to_endif
&& nowtok
== tok_else
))
2526 lr_ignore_rest (ldfile
, 1);
2529 else if (!to_endif
&& (nowtok
== tok_elifdef
|| nowtok
== tok_elifndef
))
2531 /* Do not read the rest of the line. */
2534 else if (nowtok
== tok_else
)
2536 lr_error (ldfile
, _("%s: more than one 'else'"), "LC_COLLATE");
2539 lr_ignore_rest (ldfile
, 0);
2545 collate_read (struct linereader
*ldfile
, struct localedef_t
*result
,
2546 const struct charmap_t
*charmap
, const char *repertoire_name
,
2549 struct repertoire_t
*repertoire
= NULL
;
2550 struct locale_collate_t
*collate
;
2552 struct token
*arg
= NULL
;
2553 enum token_t nowtok
;
2554 enum token_t was_ellipsis
= tok_none
;
2555 struct localedef_t
*copy_locale
= NULL
;
2558 1 - between `order-start' and `order-end'
2559 2 - after `order-end'
2560 3 - after `reorder-after', waiting for `reorder-end'
2561 4 - after `reorder-end'
2562 5 - after `reorder-sections-after', waiting for `reorder-sections-end'
2563 6 - after `reorder-sections-end'
2567 /* Get the repertoire we have to use. */
2568 if (repertoire_name
!= NULL
)
2569 repertoire
= repertoire_read (repertoire_name
);
2571 /* The rest of the line containing `LC_COLLATE' must be free. */
2572 lr_ignore_rest (ldfile
, 1);
2578 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2581 while (nowtok
== tok_eol
);
2583 if (nowtok
!= tok_define
)
2587 lr_ignore_rest (ldfile
, 0);
2590 arg
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2591 if (arg
->tok
!= tok_ident
)
2592 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2595 /* Simply add the new symbol. */
2596 struct name_list
*newsym
= xmalloc (sizeof (*newsym
)
2597 + arg
->val
.str
.lenmb
+ 1);
2598 memcpy (newsym
->str
, arg
->val
.str
.startmb
, arg
->val
.str
.lenmb
);
2599 newsym
->str
[arg
->val
.str
.lenmb
] = '\0';
2600 newsym
->next
= defined
;
2603 lr_ignore_rest (ldfile
, 1);
2608 if (nowtok
== tok_copy
)
2610 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2611 if (now
->tok
!= tok_string
)
2613 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2617 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2618 while (now
->tok
!= tok_eof
&& now
->tok
!= tok_end
);
2620 if (now
->tok
!= tok_eof
2621 || (now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
),
2622 now
->tok
== tok_eof
))
2623 lr_error (ldfile
, _("%s: premature end of file"), "LC_COLLATE");
2624 else if (now
->tok
!= tok_lc_collate
)
2626 lr_error (ldfile
, _("\
2627 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
2628 lr_ignore_rest (ldfile
, 0);
2631 lr_ignore_rest (ldfile
, 1);
2636 if (! ignore_content
)
2638 /* Get the locale definition. */
2639 copy_locale
= load_locale (LC_COLLATE
, now
->val
.str
.startmb
,
2640 repertoire_name
, charmap
, NULL
);
2641 if ((copy_locale
->avail
& COLLATE_LOCALE
) == 0)
2643 /* Not yet loaded. So do it now. */
2644 if (locfile_read (copy_locale
, charmap
) != 0)
2648 if (copy_locale
->categories
[LC_COLLATE
].collate
== NULL
)
2652 lr_ignore_rest (ldfile
, 1);
2654 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2658 /* Prepare the data structures. */
2659 collate_startup (ldfile
, result
, copy_locale
, ignore_content
);
2660 collate
= result
->categories
[LC_COLLATE
].collate
;
2668 /* Of course we don't proceed beyond the end of file. */
2669 if (nowtok
== tok_eof
)
2672 /* Ingore empty lines. */
2673 if (nowtok
== tok_eol
)
2675 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2683 /* Allow copying other locales. */
2684 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2685 if (now
->tok
!= tok_string
)
2688 if (! ignore_content
)
2689 load_locale (LC_COLLATE
, now
->val
.str
.startmb
, repertoire_name
,
2692 lr_ignore_rest (ldfile
, 1);
2695 case tok_coll_weight_max
:
2696 /* Ignore the rest of the line if we don't need the input of
2700 lr_ignore_rest (ldfile
, 0);
2707 arg
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2708 if (arg
->tok
!= tok_number
)
2710 if (collate
->col_weight_max
!= -1)
2711 lr_error (ldfile
, _("%s: duplicate definition of `%s'"),
2712 "LC_COLLATE", "col_weight_max");
2714 collate
->col_weight_max
= arg
->val
.num
;
2715 lr_ignore_rest (ldfile
, 1);
2718 case tok_section_symbol
:
2719 /* Ignore the rest of the line if we don't need the input of
2723 lr_ignore_rest (ldfile
, 0);
2730 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2731 if (arg
->tok
!= tok_bsymbol
)
2733 else if (!ignore_content
)
2735 /* Check whether this section is already known. */
2736 struct section_list
*known
= collate
->sections
;
2737 while (known
!= NULL
)
2739 if (strcmp (known
->name
, arg
->val
.str
.startmb
) == 0)
2741 known
= known
->next
;
2747 _("%s: duplicate declaration of section `%s'"),
2748 "LC_COLLATE", arg
->val
.str
.startmb
);
2749 free (arg
->val
.str
.startmb
);
2752 collate
->sections
= make_seclist_elem (collate
,
2753 arg
->val
.str
.startmb
,
2756 lr_ignore_rest (ldfile
, known
== NULL
);
2760 free (arg
->val
.str
.startmb
);
2761 lr_ignore_rest (ldfile
, 0);
2765 case tok_collating_element
:
2766 /* Ignore the rest of the line if we don't need the input of
2770 lr_ignore_rest (ldfile
, 0);
2774 if (state
!= 0 && state
!= 2)
2777 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2778 if (arg
->tok
!= tok_bsymbol
)
2782 const char *symbol
= arg
->val
.str
.startmb
;
2783 size_t symbol_len
= arg
->val
.str
.lenmb
;
2785 /* Next the `from' keyword. */
2786 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2787 if (arg
->tok
!= tok_from
)
2789 free ((char *) symbol
);
2793 ldfile
->return_widestr
= 1;
2794 ldfile
->translate_strings
= 1;
2796 /* Finally the string with the replacement. */
2797 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2799 ldfile
->return_widestr
= 0;
2800 ldfile
->translate_strings
= 0;
2802 if (arg
->tok
!= tok_string
)
2805 if (!ignore_content
&& symbol
!= NULL
)
2807 /* The name is already defined. */
2808 if (check_duplicate (ldfile
, collate
, charmap
,
2809 repertoire
, symbol
, symbol_len
))
2812 if (arg
->val
.str
.startmb
!= NULL
)
2813 insert_entry (&collate
->elem_table
, symbol
, symbol_len
,
2814 new_element (collate
,
2815 arg
->val
.str
.startmb
,
2816 arg
->val
.str
.lenmb
- 1,
2817 arg
->val
.str
.startwc
,
2818 symbol
, symbol_len
, 0));
2823 free ((char *) symbol
);
2824 free (arg
->val
.str
.startmb
);
2825 free (arg
->val
.str
.startwc
);
2827 lr_ignore_rest (ldfile
, 1);
2831 case tok_collating_symbol
:
2832 /* Ignore the rest of the line if we don't need the input of
2836 lr_ignore_rest (ldfile
, 0);
2840 if (state
!= 0 && state
!= 2)
2843 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2844 if (arg
->tok
!= tok_bsymbol
)
2848 char *symbol
= arg
->val
.str
.startmb
;
2849 size_t symbol_len
= arg
->val
.str
.lenmb
;
2850 char *endsymbol
= NULL
;
2851 size_t endsymbol_len
= 0;
2852 enum token_t ellipsis
= tok_none
;
2854 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2855 if (arg
->tok
== tok_ellipsis2
|| arg
->tok
== tok_ellipsis4
)
2857 ellipsis
= arg
->tok
;
2859 arg
= lr_token (ldfile
, charmap
, result
, repertoire
,
2861 if (arg
->tok
!= tok_bsymbol
)
2867 endsymbol
= arg
->val
.str
.startmb
;
2868 endsymbol_len
= arg
->val
.str
.lenmb
;
2870 lr_ignore_rest (ldfile
, 1);
2872 else if (arg
->tok
!= tok_eol
)
2878 if (!ignore_content
)
2881 || (ellipsis
!= tok_none
&& endsymbol
== NULL
))
2883 lr_error (ldfile
, _("\
2884 %s: unknown character in collating symbol name"),
2888 else if (ellipsis
== tok_none
)
2890 /* A single symbol, no ellipsis. */
2891 if (check_duplicate (ldfile
, collate
, charmap
,
2892 repertoire
, symbol
, symbol_len
))
2893 /* The name is already defined. */
2896 insert_entry (&collate
->sym_table
, symbol
, symbol_len
,
2897 new_symbol (collate
, symbol
, symbol_len
));
2899 else if (symbol_len
!= endsymbol_len
)
2903 _("invalid names for character range"));
2908 /* Oh my, we have to handle an ellipsis. First, as
2909 usual, determine the common prefix and then
2910 convert the rest into a range. */
2912 unsigned long int from
;
2913 unsigned long int to
;
2916 for (prefixlen
= 0; prefixlen
< symbol_len
; ++prefixlen
)
2917 if (symbol
[prefixlen
] != endsymbol
[prefixlen
])
2920 /* Convert the rest into numbers. */
2921 symbol
[symbol_len
] = '\0';
2922 from
= strtoul (&symbol
[prefixlen
], &endp
,
2923 ellipsis
== tok_ellipsis2
? 16 : 10);
2925 goto col_sym_inv_range
;
2927 endsymbol
[symbol_len
] = '\0';
2928 to
= strtoul (&endsymbol
[prefixlen
], &endp
,
2929 ellipsis
== tok_ellipsis2
? 16 : 10);
2931 goto col_sym_inv_range
;
2934 goto col_sym_inv_range
;
2936 /* Now loop over all entries. */
2941 symbuf
= (char *) obstack_alloc (&collate
->mempool
,
2944 /* Create the name. */
2946 ellipsis
== tok_ellipsis2
2947 ? "%.*s%.*lX" : "%.*s%.*lu",
2948 (int) prefixlen
, symbol
,
2949 (int) (symbol_len
- prefixlen
), from
);
2951 if (check_duplicate (ldfile
, collate
, charmap
,
2952 repertoire
, symbuf
, symbol_len
))
2953 /* The name is already defined. */
2956 insert_entry (&collate
->sym_table
, symbuf
,
2958 new_symbol (collate
, symbuf
,
2961 /* Increment the counter. */
2977 case tok_symbol_equivalence
:
2978 /* Ignore the rest of the line if we don't need the input of
2982 lr_ignore_rest (ldfile
, 0);
2989 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2990 if (arg
->tok
!= tok_bsymbol
)
2994 const char *newname
= arg
->val
.str
.startmb
;
2995 size_t newname_len
= arg
->val
.str
.lenmb
;
2996 const char *symname
;
2998 void *symval
; /* Actually struct symbol_t* */
3000 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
3001 if (arg
->tok
!= tok_bsymbol
)
3003 free ((char *) newname
);
3007 symname
= arg
->val
.str
.startmb
;
3008 symname_len
= arg
->val
.str
.lenmb
;
3010 if (newname
== NULL
)
3012 lr_error (ldfile
, _("\
3013 %s: unknown character in equivalent definition name"),
3017 free ((char *) newname
);
3018 free ((char *) symname
);
3021 if (symname
== NULL
)
3023 lr_error (ldfile
, _("\
3024 %s: unknown character in equivalent definition value"),
3026 goto sym_equiv_free
;
3029 /* See whether the symbol name is already defined. */
3030 if (find_entry (&collate
->sym_table
, symname
, symname_len
,
3033 lr_error (ldfile
, _("\
3034 %s: unknown symbol `%s' in equivalent definition"),
3035 "LC_COLLATE", symname
);
3036 goto sym_equiv_free
;
3039 if (insert_entry (&collate
->sym_table
,
3040 newname
, newname_len
, symval
) < 0)
3042 lr_error (ldfile
, _("\
3043 error while adding equivalent collating symbol"));
3044 goto sym_equiv_free
;
3047 free ((char *) symname
);
3049 lr_ignore_rest (ldfile
, 1);
3053 /* Ignore the rest of the line if we don't need the input of
3057 lr_ignore_rest (ldfile
, 0);
3061 /* We get told about the scripts we know. */
3062 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
3063 if (arg
->tok
!= tok_bsymbol
)
3067 struct section_list
*runp
= collate
->known_sections
;
3070 while (runp
!= NULL
)
3071 if (strncmp (runp
->name
, arg
->val
.str
.startmb
,
3072 arg
->val
.str
.lenmb
) == 0
3073 && runp
->name
[arg
->val
.str
.lenmb
] == '\0')
3076 runp
= runp
->def_next
;
3080 lr_error (ldfile
, _("duplicate definition of script `%s'"),
3082 lr_ignore_rest (ldfile
, 0);
3086 runp
= (struct section_list
*) xcalloc (1, sizeof (*runp
));
3087 name
= (char *) xmalloc (arg
->val
.str
.lenmb
+ 1);
3088 memcpy (name
, arg
->val
.str
.startmb
, arg
->val
.str
.lenmb
);
3089 name
[arg
->val
.str
.lenmb
] = '\0';
3092 runp
->def_next
= collate
->known_sections
;
3093 collate
->known_sections
= runp
;
3095 lr_ignore_rest (ldfile
, 1);
3098 case tok_order_start
:
3099 /* Ignore the rest of the line if we don't need the input of
3103 lr_ignore_rest (ldfile
, 0);
3107 if (state
!= 0 && state
!= 1 && state
!= 2)
3111 /* The 14652 draft does not specify whether all `order_start' lines
3112 must contain the same number of sort-rules, but 14651 does. So
3113 we require this here as well. */
3114 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
3115 if (arg
->tok
== tok_bsymbol
)
3117 /* This better should be a section name. */
3118 struct section_list
*sp
= collate
->known_sections
;
3120 && (sp
->name
== NULL
3121 || strncmp (sp
->name
, arg
->val
.str
.startmb
,
3122 arg
->val
.str
.lenmb
) != 0
3123 || sp
->name
[arg
->val
.str
.lenmb
] != '\0'))
3128 lr_error (ldfile
, _("\
3129 %s: unknown section name `%.*s'"),
3130 "LC_COLLATE", (int) arg
->val
.str
.lenmb
,
3131 arg
->val
.str
.startmb
);
3132 /* We use the error section. */
3133 collate
->current_section
= &collate
->error_section
;
3135 if (collate
->error_section
.first
== NULL
)
3137 /* Insert &collate->error_section at the end of
3138 the collate->sections list. */
3139 if (collate
->sections
== NULL
)
3140 collate
->sections
= &collate
->error_section
;
3143 sp
= collate
->sections
;
3144 while (sp
->next
!= NULL
)
3147 sp
->next
= &collate
->error_section
;
3149 collate
->error_section
.next
= NULL
;
3154 /* One should not be allowed to open the same
3156 if (sp
->first
!= NULL
)
3157 lr_error (ldfile
, _("\
3158 %s: multiple order definitions for section `%s'"),
3159 "LC_COLLATE", sp
->name
);
3162 /* Insert sp in the collate->sections list,
3163 right after collate->current_section. */
3164 if (collate
->current_section
!= NULL
)
3166 sp
->next
= collate
->current_section
->next
;
3167 collate
->current_section
->next
= sp
;
3169 else if (collate
->sections
== NULL
)
3170 /* This is the first section to be defined. */
3171 collate
->sections
= sp
;
3173 collate
->current_section
= sp
;
3176 /* Next should come the end of the line or a semicolon. */
3177 arg
= lr_token (ldfile
, charmap
, result
, repertoire
,
3179 if (arg
->tok
== tok_eol
)
3183 /* This means we have exactly one rule: `forward'. */
3185 lr_error (ldfile
, _("\
3186 %s: invalid number of sorting rules"),
3190 sp
->rules
= obstack_alloc (&collate
->mempool
,
3191 (sizeof (enum coll_sort_rule
)
3193 for (cnt
= 0; cnt
< nrules
; ++cnt
)
3194 sp
->rules
[cnt
] = sort_forward
;
3200 /* Get the next token. */
3201 arg
= lr_token (ldfile
, charmap
, result
, repertoire
,
3207 /* There is no section symbol. Therefore we use the unnamed
3209 collate
->current_section
= &collate
->unnamed_section
;
3211 if (collate
->unnamed_section_defined
)
3212 lr_error (ldfile
, _("\
3213 %s: multiple order definitions for unnamed section"),
3217 /* Insert &collate->unnamed_section at the beginning of
3218 the collate->sections list. */
3219 collate
->unnamed_section
.next
= collate
->sections
;
3220 collate
->sections
= &collate
->unnamed_section
;
3221 collate
->unnamed_section_defined
= true;
3225 /* Now read the direction names. */
3226 read_directions (ldfile
, arg
, charmap
, repertoire
, result
);
3228 /* From now we need the strings untranslated. */
3229 ldfile
->translate_strings
= 0;
3233 /* Ignore the rest of the line if we don't need the input of
3237 lr_ignore_rest (ldfile
, 0);
3244 /* Handle ellipsis at end of list. */
3245 if (was_ellipsis
!= tok_none
)
3247 handle_ellipsis (ldfile
, NULL
, 0, was_ellipsis
, charmap
,
3248 repertoire
, result
);
3249 was_ellipsis
= tok_none
;
3253 lr_ignore_rest (ldfile
, 1);
3256 case tok_reorder_after
:
3257 /* Ignore the rest of the line if we don't need the input of
3261 lr_ignore_rest (ldfile
, 0);
3267 lr_error (ldfile
, _("%s: missing `order_end' keyword"),
3271 /* Handle ellipsis at end of list. */
3272 if (was_ellipsis
!= tok_none
)
3274 handle_ellipsis (ldfile
, arg
->val
.str
.startmb
,
3275 arg
->val
.str
.lenmb
, was_ellipsis
, charmap
,
3276 repertoire
, result
);
3277 was_ellipsis
= tok_none
;
3280 else if (state
== 0 && copy_locale
== NULL
)
3282 else if (state
!= 0 && state
!= 2 && state
!= 3)
3286 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
3287 if (arg
->tok
== tok_bsymbol
|| arg
->tok
== tok_ucs4
)
3289 /* Find this symbol in the sequence table. */
3293 struct element_t
*insp
;
3297 if (arg
->tok
== tok_bsymbol
)
3299 startmb
= arg
->val
.str
.startmb
;
3300 lenmb
= arg
->val
.str
.lenmb
;
3304 sprintf (ucsbuf
, "U%08X", arg
->val
.ucs4
);
3309 if (find_entry (&collate
->seq_table
, startmb
, lenmb
, &ptr
) == 0)
3310 /* Yes, the symbol exists. Simply point the cursor
3312 collate
->cursor
= (struct element_t
*) ptr
;
3315 struct symbol_t
*symbp
;
3318 if (find_entry (&collate
->sym_table
, startmb
, lenmb
,
3323 if (symbp
->order
->last
!= NULL
3324 || symbp
->order
->next
!= NULL
)
3325 collate
->cursor
= symbp
->order
;
3328 /* This is a collating symbol but its position
3329 is not yet defined. */
3330 lr_error (ldfile
, _("\
3331 %s: order for collating symbol %.*s not yet defined"),
3332 "LC_COLLATE", (int) lenmb
, startmb
);
3333 collate
->cursor
= NULL
;
3337 else if (find_entry (&collate
->elem_table
, startmb
, lenmb
,
3340 insp
= (struct element_t
*) ptr
;
3342 if (insp
->last
!= NULL
|| insp
->next
!= NULL
)
3343 collate
->cursor
= insp
;
3346 /* This is a collating element but its position
3347 is not yet defined. */
3348 lr_error (ldfile
, _("\
3349 %s: order for collating element %.*s not yet defined"),
3350 "LC_COLLATE", (int) lenmb
, startmb
);
3351 collate
->cursor
= NULL
;
3357 /* This is bad. The symbol after which we have to
3358 insert does not exist. */
3359 lr_error (ldfile
, _("\
3360 %s: cannot reorder after %.*s: symbol not known"),
3361 "LC_COLLATE", (int) lenmb
, startmb
);
3362 collate
->cursor
= NULL
;
3367 lr_ignore_rest (ldfile
, no_error
);
3370 /* This must not happen. */
3374 case tok_reorder_end
:
3375 /* Ignore the rest of the line if we don't need the input of
3383 lr_ignore_rest (ldfile
, 1);
3386 case tok_reorder_sections_after
:
3387 /* Ignore the rest of the line if we don't need the input of
3391 lr_ignore_rest (ldfile
, 0);
3397 lr_error (ldfile
, _("%s: missing `order_end' keyword"),
3401 /* Handle ellipsis at end of list. */
3402 if (was_ellipsis
!= tok_none
)
3404 handle_ellipsis (ldfile
, NULL
, 0, was_ellipsis
, charmap
,
3405 repertoire
, result
);
3406 was_ellipsis
= tok_none
;
3409 else if (state
== 3)
3411 WITH_CUR_LOCALE (error (0, 0, _("\
3412 %s: missing `reorder-end' keyword"), "LC_COLLATE"));
3415 else if (state
!= 2 && state
!= 4)
3419 /* Get the name of the sections we are adding after. */
3420 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
3421 if (arg
->tok
== tok_bsymbol
)
3423 /* Now find a section with this name. */
3424 struct section_list
*runp
= collate
->sections
;
3426 while (runp
!= NULL
)
3428 if (runp
->name
!= NULL
3429 && strlen (runp
->name
) == arg
->val
.str
.lenmb
3430 && memcmp (runp
->name
, arg
->val
.str
.startmb
,
3431 arg
->val
.str
.lenmb
) == 0)
3438 collate
->current_section
= runp
;
3441 /* This is bad. The section after which we have to
3442 reorder does not exist. Therefore we cannot
3443 process the whole rest of this reorder
3445 lr_error (ldfile
, _("%s: section `%.*s' not known"),
3446 "LC_COLLATE", (int) arg
->val
.str
.lenmb
,
3447 arg
->val
.str
.startmb
);
3451 lr_ignore_rest (ldfile
, 0);
3453 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
3455 while (now
->tok
== tok_reorder_sections_after
3456 || now
->tok
== tok_reorder_sections_end
3457 || now
->tok
== tok_end
);
3459 /* Process the token we just saw. */
3465 /* This must not happen. */
3469 case tok_reorder_sections_end
:
3470 /* Ignore the rest of the line if we don't need the input of
3478 lr_ignore_rest (ldfile
, 1);
3483 /* Ignore the rest of the line if we don't need the input of
3487 lr_ignore_rest (ldfile
, 0);
3491 if (state
!= 0 && state
!= 1 && state
!= 3 && state
!= 5)
3494 if ((state
== 0 || state
== 5) && nowtok
== tok_ucs4
)
3497 if (nowtok
== tok_ucs4
)
3499 snprintf (ucs4buf
, sizeof (ucs4buf
), "U%08X", now
->val
.ucs4
);
3503 else if (arg
!= NULL
)
3505 symstr
= arg
->val
.str
.startmb
;
3506 symlen
= arg
->val
.str
.lenmb
;
3510 lr_error (ldfile
, _("%s: bad symbol <%.*s>"), "LC_COLLATE",
3511 (int) ldfile
->token
.val
.str
.lenmb
,
3512 ldfile
->token
.val
.str
.startmb
);
3516 struct element_t
*seqp
;
3519 /* We are outside an `order_start' region. This means
3520 we must only accept definitions of values for
3521 collation symbols since these are purely abstract
3522 values and don't need directions associated. */
3525 if (find_entry (&collate
->seq_table
, symstr
, symlen
, &ptr
) == 0)
3529 /* It's already defined. First check whether this
3530 is really a collating symbol. */
3531 if (seqp
->is_character
)
3540 if (find_entry (&collate
->sym_table
, symstr
, symlen
,
3542 /* No collating symbol, it's an error. */
3545 /* Maybe this is the first time we define a symbol
3546 value and it is before the first actual section. */
3547 if (collate
->sections
== NULL
)
3548 collate
->sections
= collate
->current_section
=
3549 &collate
->symbol_section
;
3552 if (was_ellipsis
!= tok_none
)
3554 handle_ellipsis (ldfile
, symstr
, symlen
, was_ellipsis
,
3555 charmap
, repertoire
, result
);
3557 /* Remember that we processed the ellipsis. */
3558 was_ellipsis
= tok_none
;
3560 /* And don't add the value a second time. */
3564 else if (state
== 3)
3566 /* It is possible that we already have this collation sequence.
3567 In this case we move the entry. */
3571 /* If the symbol after which we have to insert was not found
3572 ignore all entries. */
3573 if (collate
->cursor
== NULL
)
3575 lr_ignore_rest (ldfile
, 0);
3579 if (find_entry (&collate
->seq_table
, symstr
, symlen
, &ptr
) == 0)
3581 seqp
= (struct element_t
*) ptr
;
3585 if (find_entry (&collate
->sym_table
, symstr
, symlen
, &sym
) == 0
3586 && (seqp
= ((struct symbol_t
*) sym
)->order
) != NULL
)
3589 if (find_entry (&collate
->elem_table
, symstr
, symlen
, &ptr
) == 0
3590 && (seqp
= (struct element_t
*) ptr
,
3591 seqp
->last
!= NULL
|| seqp
->next
!= NULL
3592 || (collate
->start
!= NULL
&& seqp
== collate
->start
)))
3595 /* Remove the entry from the old position. */
3596 if (seqp
->last
== NULL
)
3597 collate
->start
= seqp
->next
;
3599 seqp
->last
->next
= seqp
->next
;
3600 if (seqp
->next
!= NULL
)
3601 seqp
->next
->last
= seqp
->last
;
3603 /* We also have to check whether this entry is the
3604 first or last of a section. */
3605 if (seqp
->section
->first
== seqp
)
3607 if (seqp
->section
->first
== seqp
->section
->last
)
3608 /* This section has no content anymore. */
3609 seqp
->section
->first
= seqp
->section
->last
= NULL
;
3611 seqp
->section
->first
= seqp
->next
;
3613 else if (seqp
->section
->last
== seqp
)
3614 seqp
->section
->last
= seqp
->last
;
3616 /* Now insert it in the new place. */
3617 insert_weights (ldfile
, seqp
, charmap
, repertoire
, result
,
3622 /* Otherwise we just add a new entry. */
3624 else if (state
== 5)
3626 /* We are reordering sections. Find the named section. */
3627 struct section_list
*runp
= collate
->sections
;
3628 struct section_list
*prevp
= NULL
;
3630 while (runp
!= NULL
)
3632 if (runp
->name
!= NULL
3633 && strlen (runp
->name
) == symlen
3634 && memcmp (runp
->name
, symstr
, symlen
) == 0)
3643 lr_error (ldfile
, _("%s: section `%.*s' not known"),
3644 "LC_COLLATE", (int) symlen
, symstr
);
3645 lr_ignore_rest (ldfile
, 0);
3649 if (runp
!= collate
->current_section
)
3651 /* Remove the named section from the old place and
3652 insert it in the new one. */
3653 prevp
->next
= runp
->next
;
3655 runp
->next
= collate
->current_section
->next
;
3656 collate
->current_section
->next
= runp
;
3657 collate
->current_section
= runp
;
3660 /* Process the rest of the line which might change
3661 the collation rules. */
3662 arg
= lr_token (ldfile
, charmap
, result
, repertoire
,
3664 if (arg
->tok
!= tok_eof
&& arg
->tok
!= tok_eol
)
3665 read_directions (ldfile
, arg
, charmap
, repertoire
,
3670 else if (was_ellipsis
!= tok_none
)
3672 /* Using the information in the `ellipsis_weight'
3673 element and this and the last value we have to handle
3674 the ellipsis now. */
3675 assert (state
== 1);
3677 handle_ellipsis (ldfile
, symstr
, symlen
, was_ellipsis
, charmap
,
3678 repertoire
, result
);
3680 /* Remember that we processed the ellipsis. */
3681 was_ellipsis
= tok_none
;
3683 /* And don't add the value a second time. */
3687 /* Now insert in the new place. */
3688 insert_value (ldfile
, symstr
, symlen
, charmap
, repertoire
, result
);
3692 /* Ignore the rest of the line if we don't need the input of
3696 lr_ignore_rest (ldfile
, 0);
3703 if (was_ellipsis
!= tok_none
)
3706 _("%s: cannot have `%s' as end of ellipsis range"),
3707 "LC_COLLATE", "UNDEFINED");
3709 unlink_element (collate
);
3710 was_ellipsis
= tok_none
;
3713 /* See whether UNDEFINED already appeared somewhere. */
3714 if (collate
->undefined
.next
!= NULL
3715 || &collate
->undefined
== collate
->cursor
)
3718 _("%s: order for `%.*s' already defined at %s:%Zu"),
3719 "LC_COLLATE", 9, "UNDEFINED",
3720 collate
->undefined
.file
,
3721 collate
->undefined
.line
);
3722 lr_ignore_rest (ldfile
, 0);
3725 /* Parse the weights. */
3726 insert_weights (ldfile
, &collate
->undefined
, charmap
,
3727 repertoire
, result
, tok_none
);
3730 case tok_ellipsis2
: /* symbolic hexadecimal ellipsis */
3731 case tok_ellipsis3
: /* absolute ellipsis */
3732 case tok_ellipsis4
: /* symbolic decimal ellipsis */
3733 /* This is the symbolic (decimal or hexadecimal) or absolute
3735 if (was_ellipsis
!= tok_none
)
3738 if (state
!= 0 && state
!= 1 && state
!= 3)
3741 was_ellipsis
= nowtok
;
3743 insert_weights (ldfile
, &collate
->ellipsis_weight
, charmap
,
3744 repertoire
, result
, nowtok
);
3749 /* Next we assume `LC_COLLATE'. */
3750 if (!ignore_content
)
3752 if (state
== 0 && copy_locale
== NULL
)
3753 /* We must either see a copy statement or have
3756 _("%s: empty category description not allowed"),
3758 else if (state
== 1)
3760 lr_error (ldfile
, _("%s: missing `order_end' keyword"),
3763 /* Handle ellipsis at end of list. */
3764 if (was_ellipsis
!= tok_none
)
3766 handle_ellipsis (ldfile
, NULL
, 0, was_ellipsis
, charmap
,
3767 repertoire
, result
);
3768 was_ellipsis
= tok_none
;
3771 else if (state
== 3)
3772 WITH_CUR_LOCALE (error (0, 0, _("\
3773 %s: missing `reorder-end' keyword"), "LC_COLLATE"));
3774 else if (state
== 5)
3775 WITH_CUR_LOCALE (error (0, 0, _("\
3776 %s: missing `reorder-sections-end' keyword"), "LC_COLLATE"));
3778 arg
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
3779 if (arg
->tok
== tok_eof
)
3781 if (arg
->tok
== tok_eol
)
3782 lr_error (ldfile
, _("%s: incomplete `END' line"), "LC_COLLATE");
3783 else if (arg
->tok
!= tok_lc_collate
)
3784 lr_error (ldfile
, _("\
3785 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
3786 lr_ignore_rest (ldfile
, arg
->tok
== tok_lc_collate
);
3792 lr_ignore_rest (ldfile
, 0);
3796 arg
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
3797 if (arg
->tok
!= tok_ident
)
3800 /* Simply add the new symbol. */
3801 struct name_list
*newsym
= xmalloc (sizeof (*newsym
)
3802 + arg
->val
.str
.lenmb
+ 1);
3803 memcpy (newsym
->str
, arg
->val
.str
.startmb
, arg
->val
.str
.lenmb
);
3804 newsym
->str
[arg
->val
.str
.lenmb
] = '\0';
3805 newsym
->next
= defined
;
3808 lr_ignore_rest (ldfile
, 1);
3814 lr_ignore_rest (ldfile
, 0);
3818 arg
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
3819 if (arg
->tok
!= tok_ident
)
3822 /* Remove _all_ occurrences of the symbol from the list. */
3823 struct name_list
*prevdef
= NULL
;
3824 struct name_list
*curdef
= defined
;
3825 while (curdef
!= NULL
)
3826 if (strncmp (arg
->val
.str
.startmb
, curdef
->str
,
3827 arg
->val
.str
.lenmb
) == 0
3828 && curdef
->str
[arg
->val
.str
.lenmb
] == '\0')
3830 if (prevdef
== NULL
)
3831 defined
= curdef
->next
;
3833 prevdef
->next
= curdef
->next
;
3835 struct name_list
*olddef
= curdef
;
3836 curdef
= curdef
->next
;
3843 curdef
= curdef
->next
;
3846 lr_ignore_rest (ldfile
, 1);
3853 lr_ignore_rest (ldfile
, 0);
3858 arg
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
3859 if (arg
->tok
!= tok_ident
)
3861 lr_ignore_rest (ldfile
, 1);
3863 if (collate
->else_action
== else_none
)
3866 while (curdef
!= NULL
)
3867 if (strncmp (arg
->val
.str
.startmb
, curdef
->str
,
3868 arg
->val
.str
.lenmb
) == 0
3869 && curdef
->str
[arg
->val
.str
.lenmb
] == '\0')
3872 curdef
= curdef
->next
;
3874 if ((nowtok
== tok_ifdef
&& curdef
!= NULL
)
3875 || (nowtok
== tok_ifndef
&& curdef
== NULL
))
3877 /* We have to use the if-branch. */
3878 collate
->else_action
= else_ignore
;
3882 /* We have to use the else-branch, if there is one. */
3883 nowtok
= skip_to (ldfile
, collate
, charmap
, 0);
3884 if (nowtok
== tok_else
)
3885 collate
->else_action
= else_seen
;
3886 else if (nowtok
== tok_elifdef
)
3891 else if (nowtok
== tok_elifndef
)
3893 nowtok
= tok_ifndef
;
3896 else if (nowtok
== tok_eof
)
3898 else if (nowtok
== tok_end
)
3904 /* XXX Should it really become necessary to support nested
3905 preprocessor handling we will push the state here. */
3906 lr_error (ldfile
, _("%s: nested conditionals not supported"),
3908 nowtok
= skip_to (ldfile
, collate
, charmap
, 1);
3909 if (nowtok
== tok_eof
)
3911 else if (nowtok
== tok_end
)
3921 lr_ignore_rest (ldfile
, 0);
3925 lr_ignore_rest (ldfile
, 1);
3927 if (collate
->else_action
== else_ignore
)
3929 /* Ignore everything until the endif. */
3930 nowtok
= skip_to (ldfile
, collate
, charmap
, 1);
3931 if (nowtok
== tok_eof
)
3933 else if (nowtok
== tok_end
)
3938 assert (collate
->else_action
== else_none
);
3939 lr_error (ldfile
, _("\
3940 %s: '%s' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE",
3941 nowtok
== tok_else
? "else"
3942 : nowtok
== tok_elifdef
? "elifdef" : "elifndef");
3949 lr_ignore_rest (ldfile
, 0);
3953 lr_ignore_rest (ldfile
, 1);
3955 if (collate
->else_action
!= else_ignore
3956 && collate
->else_action
!= else_seen
)
3957 lr_error (ldfile
, _("\
3958 %s: 'endif' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE");
3960 /* XXX If we support nested preprocessor directives we pop
3962 collate
->else_action
= else_none
;
3967 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
3970 /* Prepare for the next round. */
3971 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
3976 /* When we come here we reached the end of the file. */
3977 lr_error (ldfile
, _("%s: premature end of file"), "LC_COLLATE");