1 /* Copyright (C) 1995-2015 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, see <http://www.gnu.org/licenses/>. */
27 #include <sys/param.h>
29 #include "localedef.h"
31 #include "localeinfo.h"
32 #include "linereader.h"
34 #include "elem-hash.h"
35 #include "../localeinfo.h"
37 /* Uncomment the following line in the production version. */
38 /* #define NDEBUG 1 */
41 #define obstack_chunk_alloc malloc
42 #define obstack_chunk_free free
45 __attribute ((always_inline
))
46 obstack_int32_grow (struct obstack
*obstack
, int32_t data
)
48 assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack
)));
49 data
= maybe_swap_uint32 (data
);
50 if (sizeof (int32_t) == sizeof (int))
51 obstack_int_grow (obstack
, data
);
53 obstack_grow (obstack
, &data
, sizeof (int32_t));
57 __attribute ((always_inline
))
58 obstack_int32_grow_fast (struct obstack
*obstack
, int32_t data
)
60 assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack
)));
61 data
= maybe_swap_uint32 (data
);
62 if (sizeof (int32_t) == sizeof (int))
63 obstack_int_grow_fast (obstack
, data
);
65 obstack_grow (obstack
, &data
, sizeof (int32_t));
68 /* Forward declaration. */
71 /* Data type for list of strings. */
74 /* Successor in the known_sections list. */
75 struct section_list
*def_next
;
76 /* Successor in the sections list. */
77 struct section_list
*next
;
78 /* Name of the section. */
80 /* First element of this section. */
81 struct element_t
*first
;
82 /* Last element of this section. */
83 struct element_t
*last
;
84 /* These are the rules for this section. */
85 enum coll_sort_rule
*rules
;
86 /* Index of the rule set in the appropriate section of the output file. */
94 /* Number of elements. */
100 /* Data type for collating element. */
112 /* The following is a bit mask which bits are set if this element is
113 used in the appropriate level. Interesting for the singlebyte
116 XXX The type here restricts the number of levels to 32. It could
117 be changed if necessary but I doubt this is necessary. */
118 unsigned int used_in_level
;
120 struct element_list_t
*weights
;
122 /* Nonzero if this is a real character definition. */
125 /* Order of the character in the sequence. This information will
126 be used in range expressions. */
130 /* Where does the definition come from. */
134 /* Which section does this belong to. */
135 struct section_list
*section
;
137 /* Predecessor and successor in the order list. */
138 struct element_t
*last
;
139 struct element_t
*next
;
141 /* Next element in multibyte output list. */
142 struct element_t
*mbnext
;
143 struct element_t
*mblast
;
145 /* Next element in wide character output list. */
146 struct element_t
*wcnext
;
147 struct element_t
*wclast
;
150 /* Special element value. */
151 #define ELEMENT_ELLIPSIS2 ((struct element_t *) 1)
152 #define ELEMENT_ELLIPSIS3 ((struct element_t *) 2)
153 #define ELEMENT_ELLIPSIS4 ((struct element_t *) 3)
155 /* Data type for collating symbol. */
160 /* Point to place in the order list. */
161 struct element_t
*order
;
163 /* Where does the definition come from. */
168 /* Sparse table of struct element_t *. */
169 #define TABLE wchead_table
170 #define ELEMENT struct element_t *
173 #define NO_ADD_LOCALE
176 /* Sparse table of int32_t. */
177 #define TABLE collidx_table
178 #define ELEMENT int32_t
182 /* Sparse table of uint32_t. */
183 #define TABLE collseq_table
184 #define ELEMENT uint32_t
185 #define DEFAULT ~((uint32_t) 0)
189 /* Simple name list for the preprocessor. */
192 struct name_list
*next
;
197 /* The real definition of the struct for the LC_COLLATE locale. */
198 struct locale_collate_t
203 /* List of known scripts. */
204 struct section_list
*known_sections
;
205 /* List of used sections. */
206 struct section_list
*sections
;
207 /* Current section using definition. */
208 struct section_list
*current_section
;
209 /* There always can be an unnamed section. */
210 struct section_list unnamed_section
;
211 /* Flag whether the unnamed section has been defined. */
212 bool unnamed_section_defined
;
213 /* To make handling of errors easier we have another section. */
214 struct section_list error_section
;
215 /* Sometimes we are defining the values for collating symbols before
216 the first actual section. */
217 struct section_list symbol_section
;
219 /* Start of the order list. */
220 struct element_t
*start
;
222 /* The undefined element. */
223 struct element_t undefined
;
225 /* This is the cursor for `reorder_after' insertions. */
226 struct element_t
*cursor
;
228 /* This value is used when handling ellipsis. */
229 struct element_t ellipsis_weight
;
231 /* Known collating elements. */
232 hash_table elem_table
;
234 /* Known collating symbols. */
235 hash_table sym_table
;
237 /* Known collation sequences. */
238 hash_table seq_table
;
240 struct obstack mempool
;
242 /* The LC_COLLATE category is a bit special as it is sometimes possible
243 that the definitions from more than one input file contains information.
244 Therefore we keep all relevant input in a list. */
245 struct locale_collate_t
*next
;
247 /* Arrays with heads of the list for each of the leading bytes in
248 the multibyte sequences. */
249 struct element_t
*mbheads
[256];
251 /* Arrays with heads of the list for each of the leading bytes in
252 the multibyte sequences. */
253 struct wchead_table wcheads
;
255 /* The arrays with the collation sequence order. */
256 unsigned char mbseqorder
[256];
257 struct collseq_table wcseqorder
;
259 /* State of the preprocessor. */
270 /* We have a few global variables which are used for reading all
271 LC_COLLATE category descriptions in all files. */
272 static uint32_t nrules
;
274 /* List of defined preprocessor symbols. */
275 static struct name_list
*defined
;
278 /* We need UTF-8 encoding of numbers. */
280 __attribute ((always_inline
))
281 utf8_encode (char *buf
, int val
)
294 for (step
= 2; step
< 6; ++step
)
295 if ((val
& (~(uint32_t)0 << (5 * step
+ 1))) == 0)
299 *buf
= (unsigned char) (~0xff >> step
);
303 buf
[step
] = 0x80 | (val
& 0x3f);
314 static struct section_list
*
315 make_seclist_elem (struct locale_collate_t
*collate
, const char *string
,
316 struct section_list
*next
)
318 struct section_list
*newp
;
320 newp
= (struct section_list
*) obstack_alloc (&collate
->mempool
,
331 static struct element_t
*
332 new_element (struct locale_collate_t
*collate
, const char *mbs
, size_t mbslen
,
333 const uint32_t *wcs
, const char *name
, size_t namelen
,
336 struct element_t
*newp
;
338 newp
= (struct element_t
*) obstack_alloc (&collate
->mempool
,
340 newp
->name
= name
== NULL
? NULL
: obstack_copy0 (&collate
->mempool
,
344 newp
->mbs
= obstack_copy0 (&collate
->mempool
, mbs
, mbslen
);
354 size_t nwcs
= wcslen ((wchar_t *) wcs
);
356 /* Handle <U0000> as a single character. */
359 obstack_grow (&collate
->mempool
, wcs
, nwcs
* sizeof (uint32_t));
360 obstack_grow (&collate
->mempool
, &zero
, sizeof (uint32_t));
361 newp
->wcs
= (uint32_t *) obstack_finish (&collate
->mempool
);
369 newp
->mborder
= NULL
;
371 newp
->used_in_level
= 0;
372 newp
->is_character
= is_character
;
374 /* Will be assigned later. XXX */
375 newp
->mbseqorder
= 0;
376 newp
->wcseqorder
= 0;
378 /* Will be allocated later. */
379 newp
->weights
= NULL
;
384 newp
->section
= collate
->current_section
;
399 static struct symbol_t
*
400 new_symbol (struct locale_collate_t
*collate
, const char *name
, size_t len
)
402 struct symbol_t
*newp
;
404 newp
= (struct symbol_t
*) obstack_alloc (&collate
->mempool
, sizeof (*newp
));
406 newp
->name
= obstack_copy0 (&collate
->mempool
, name
, len
);
416 /* Test whether this name is already defined somewhere. */
418 check_duplicate (struct linereader
*ldfile
, struct locale_collate_t
*collate
,
419 const struct charmap_t
*charmap
,
420 struct repertoire_t
*repertoire
, const char *symbol
,
425 if (find_entry (&charmap
->char_table
, symbol
, symbol_len
, &ignore
) == 0)
427 lr_error (ldfile
, _("`%.*s' already defined in charmap"),
428 (int) symbol_len
, symbol
);
432 if (repertoire
!= NULL
433 && (find_entry (&repertoire
->char_table
, symbol
, symbol_len
, &ignore
)
436 lr_error (ldfile
, _("`%.*s' already defined in repertoire"),
437 (int) symbol_len
, symbol
);
441 if (find_entry (&collate
->sym_table
, symbol
, symbol_len
, &ignore
) == 0)
443 lr_error (ldfile
, _("`%.*s' already defined as collating symbol"),
444 (int) symbol_len
, symbol
);
448 if (find_entry (&collate
->elem_table
, symbol
, symbol_len
, &ignore
) == 0)
450 lr_error (ldfile
, _("`%.*s' already defined as collating element"),
451 (int) symbol_len
, symbol
);
459 /* Read the direction specification. */
461 read_directions (struct linereader
*ldfile
, struct token
*arg
,
462 const struct charmap_t
*charmap
,
463 struct repertoire_t
*repertoire
, struct localedef_t
*result
)
466 int max
= nrules
?: 10;
467 enum coll_sort_rule
*rules
= calloc (max
, sizeof (*rules
));
469 struct locale_collate_t
*collate
= result
->categories
[LC_COLLATE
].collate
;
475 if (arg
->tok
== tok_forward
)
477 if (rules
[cnt
] & sort_backward
)
481 lr_error (ldfile
, _("\
482 %s: `forward' and `backward' are mutually excluding each other"),
487 else if (rules
[cnt
] & sort_forward
)
491 lr_error (ldfile
, _("\
492 %s: `%s' mentioned more than once in definition of weight %d"),
493 "LC_COLLATE", "forward", cnt
+ 1);
497 rules
[cnt
] |= sort_forward
;
501 else if (arg
->tok
== tok_backward
)
503 if (rules
[cnt
] & sort_forward
)
507 lr_error (ldfile
, _("\
508 %s: `forward' and `backward' are mutually excluding each other"),
513 else if (rules
[cnt
] & sort_backward
)
517 lr_error (ldfile
, _("\
518 %s: `%s' mentioned more than once in definition of weight %d"),
519 "LC_COLLATE", "backward", cnt
+ 1);
523 rules
[cnt
] |= sort_backward
;
527 else if (arg
->tok
== tok_position
)
529 if (rules
[cnt
] & sort_position
)
533 lr_error (ldfile
, _("\
534 %s: `%s' mentioned more than once in definition of weight %d"),
535 "LC_COLLATE", "position", cnt
+ 1);
539 rules
[cnt
] |= sort_position
;
545 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
547 if (arg
->tok
== tok_eof
|| arg
->tok
== tok_eol
|| arg
->tok
== tok_comma
548 || arg
->tok
== tok_semicolon
)
550 if (! valid
&& ! warned
)
552 lr_error (ldfile
, _("%s: syntax error"), "LC_COLLATE");
556 /* See whether we have to increment the counter. */
557 if (arg
->tok
!= tok_comma
&& rules
[cnt
] != 0)
559 /* Add the default `forward' if we have seen only `position'. */
560 if (rules
[cnt
] == sort_position
)
561 rules
[cnt
] = sort_position
| sort_forward
;
566 if (arg
->tok
== tok_eof
|| arg
->tok
== tok_eol
)
567 /* End of line or file, so we exit the loop. */
572 /* See whether we have enough room in the array. */
576 rules
= (enum coll_sort_rule
*) xrealloc (rules
,
579 memset (&rules
[cnt
], '\0', (max
- cnt
) * sizeof (*rules
));
586 /* There must not be any more rule. */
589 lr_error (ldfile
, _("\
590 %s: too many rules; first entry only had %d"),
591 "LC_COLLATE", nrules
);
595 lr_ignore_rest (ldfile
, 0);
604 lr_error (ldfile
, _("%s: syntax error"), "LC_COLLATE");
609 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
614 /* Now we know how many rules we have. */
616 rules
= (enum coll_sort_rule
*) xrealloc (rules
,
617 nrules
* sizeof (*rules
));
623 /* Not enough rules in this specification. */
625 lr_error (ldfile
, _("%s: not enough sorting rules"), "LC_COLLATE");
628 rules
[cnt
] = sort_forward
;
629 while (++cnt
< nrules
);
633 collate
->current_section
->rules
= rules
;
637 static struct element_t
*
638 find_element (struct linereader
*ldfile
, struct locale_collate_t
*collate
,
639 const char *str
, size_t len
)
643 /* Search for the entries among the collation sequences already define. */
644 if (find_entry (&collate
->seq_table
, str
, len
, &result
) != 0)
646 /* Nope, not define yet. So we see whether it is a
650 if (find_entry (&collate
->sym_table
, str
, len
, &ptr
) == 0)
652 /* It's a collation symbol. */
653 struct symbol_t
*sym
= (struct symbol_t
*) ptr
;
657 result
= sym
->order
= new_element (collate
, NULL
, 0, NULL
,
660 else if (find_entry (&collate
->elem_table
, str
, len
, &result
) != 0)
662 /* It's also no collation element. So it is a character
663 element defined later. */
664 result
= new_element (collate
, NULL
, 0, NULL
, str
, len
, 1);
665 /* Insert it into the sequence table. */
666 insert_entry (&collate
->seq_table
, str
, len
, result
);
670 return (struct element_t
*) result
;
675 unlink_element (struct locale_collate_t
*collate
)
677 if (collate
->cursor
== collate
->start
)
679 assert (collate
->cursor
->next
== NULL
);
680 assert (collate
->cursor
->last
== NULL
);
681 collate
->cursor
= NULL
;
685 if (collate
->cursor
->next
!= NULL
)
686 collate
->cursor
->next
->last
= collate
->cursor
->last
;
687 if (collate
->cursor
->last
!= NULL
)
688 collate
->cursor
->last
->next
= collate
->cursor
->next
;
689 collate
->cursor
= collate
->cursor
->last
;
695 insert_weights (struct linereader
*ldfile
, struct element_t
*elem
,
696 const struct charmap_t
*charmap
,
697 struct repertoire_t
*repertoire
, struct localedef_t
*result
,
698 enum token_t ellipsis
)
702 struct locale_collate_t
*collate
= result
->categories
[LC_COLLATE
].collate
;
704 /* Initialize all the fields. */
705 elem
->file
= ldfile
->fname
;
706 elem
->line
= ldfile
->lineno
;
708 elem
->last
= collate
->cursor
;
709 elem
->next
= collate
->cursor
? collate
->cursor
->next
: NULL
;
710 if (collate
->cursor
!= NULL
&& collate
->cursor
->next
!= NULL
)
711 collate
->cursor
->next
->last
= elem
;
712 if (collate
->cursor
!= NULL
)
713 collate
->cursor
->next
= elem
;
714 if (collate
->start
== NULL
)
716 assert (collate
->cursor
== NULL
);
717 collate
->start
= elem
;
720 elem
->section
= collate
->current_section
;
722 if (collate
->current_section
->first
== NULL
)
723 collate
->current_section
->first
= elem
;
724 if (collate
->current_section
->last
== collate
->cursor
)
725 collate
->current_section
->last
= elem
;
727 collate
->cursor
= elem
;
729 elem
->weights
= (struct element_list_t
*)
730 obstack_alloc (&collate
->mempool
, nrules
* sizeof (struct element_list_t
));
731 memset (elem
->weights
, '\0', nrules
* sizeof (struct element_list_t
));
735 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
738 if (arg
->tok
== tok_eof
|| arg
->tok
== tok_eol
)
741 if (arg
->tok
== tok_ignore
)
743 /* The weight for this level has to be ignored. We use the
744 null pointer to indicate this. */
745 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
746 obstack_alloc (&collate
->mempool
, sizeof (struct element_t
*));
747 elem
->weights
[weight_cnt
].w
[0] = NULL
;
748 elem
->weights
[weight_cnt
].cnt
= 1;
750 else if (arg
->tok
== tok_bsymbol
|| arg
->tok
== tok_ucs4
)
753 struct element_t
*val
;
757 if (arg
->tok
== tok_bsymbol
)
759 symstr
= arg
->val
.str
.startmb
;
760 symlen
= arg
->val
.str
.lenmb
;
764 snprintf (ucs4str
, sizeof (ucs4str
), "U%08X", arg
->val
.ucs4
);
769 val
= find_element (ldfile
, collate
, symstr
, symlen
);
773 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
774 obstack_alloc (&collate
->mempool
, sizeof (struct element_t
*));
775 elem
->weights
[weight_cnt
].w
[0] = val
;
776 elem
->weights
[weight_cnt
].cnt
= 1;
778 else if (arg
->tok
== tok_string
)
780 /* Split the string up in the individual characters and put
781 the element definitions in the list. */
782 const char *cp
= arg
->val
.str
.startmb
;
784 struct element_t
*charelem
;
785 struct element_t
**weights
= NULL
;
790 lr_error (ldfile
, _("%s: empty weight string not allowed"),
792 lr_ignore_rest (ldfile
, 0);
800 /* Ahh, it's a bsymbol or an UCS4 value. If it's
801 the latter we have to unify the name. */
802 const char *startp
= ++cp
;
807 if (*cp
== ldfile
->escape_char
)
810 /* It's a syntax error. */
816 if (cp
- startp
== 5 && startp
[0] == 'U'
817 && isxdigit (startp
[1]) && isxdigit (startp
[2])
818 && isxdigit (startp
[3]) && isxdigit (startp
[4]))
820 unsigned int ucs4
= strtoul (startp
+ 1, NULL
, 16);
823 newstr
= (char *) xmalloc (10);
824 snprintf (newstr
, 10, "U%08X", ucs4
);
832 charelem
= find_element (ldfile
, collate
, startp
, len
);
837 /* People really shouldn't use characters directly in
838 the string. Especially since it's not really clear
839 what this means. We interpret all characters in the
840 string as if that would be bsymbols. Otherwise we
841 would have to match back to bsymbols somehow and this
842 is normally not what people normally expect. */
843 charelem
= find_element (ldfile
, collate
, cp
++, 1);
846 if (charelem
== NULL
)
848 /* We ignore the rest of the line. */
849 lr_ignore_rest (ldfile
, 0);
853 /* Add the pointer. */
856 struct element_t
**newp
;
858 newp
= (struct element_t
**)
859 alloca (max
* sizeof (struct element_t
*));
860 memcpy (newp
, weights
, cnt
* sizeof (struct element_t
*));
863 weights
[cnt
++] = charelem
;
867 /* Now store the information. */
868 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
869 obstack_alloc (&collate
->mempool
,
870 cnt
* sizeof (struct element_t
*));
871 memcpy (elem
->weights
[weight_cnt
].w
, weights
,
872 cnt
* sizeof (struct element_t
*));
873 elem
->weights
[weight_cnt
].cnt
= cnt
;
875 /* We don't need the string anymore. */
876 free (arg
->val
.str
.startmb
);
878 else if (ellipsis
!= tok_none
879 && (arg
->tok
== tok_ellipsis2
880 || arg
->tok
== tok_ellipsis3
881 || arg
->tok
== tok_ellipsis4
))
883 /* It must be the same ellipsis as used in the initial column. */
884 if (arg
->tok
!= ellipsis
)
885 lr_error (ldfile
, _("\
886 %s: weights must use the same ellipsis symbol as the name"),
889 /* The weight for this level will depend on the element
890 iterating over the range. Put a placeholder. */
891 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
892 obstack_alloc (&collate
->mempool
, sizeof (struct element_t
*));
893 elem
->weights
[weight_cnt
].w
[0] = ELEMENT_ELLIPSIS2
;
894 elem
->weights
[weight_cnt
].cnt
= 1;
899 /* It's a syntax error. */
900 lr_error (ldfile
, _("%s: syntax error"), "LC_COLLATE");
901 lr_ignore_rest (ldfile
, 0);
905 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
906 /* This better should be the end of the line or a semicolon. */
907 if (arg
->tok
== tok_semicolon
)
908 /* OK, ignore this and read the next token. */
909 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
910 else if (arg
->tok
!= tok_eof
&& arg
->tok
!= tok_eol
)
912 /* It's a syntax error. */
913 lr_error (ldfile
, _("%s: syntax error"), "LC_COLLATE");
914 lr_ignore_rest (ldfile
, 0);
918 while (++weight_cnt
< nrules
);
920 if (weight_cnt
< nrules
)
922 /* This means the rest of the line uses the current element as
926 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
927 obstack_alloc (&collate
->mempool
, sizeof (struct element_t
*));
928 if (ellipsis
== tok_none
)
929 elem
->weights
[weight_cnt
].w
[0] = elem
;
931 elem
->weights
[weight_cnt
].w
[0] = ELEMENT_ELLIPSIS2
;
932 elem
->weights
[weight_cnt
].cnt
= 1;
934 while (++weight_cnt
< nrules
);
938 if (arg
->tok
== tok_ignore
|| arg
->tok
== tok_bsymbol
)
940 /* Too many rule values. */
941 lr_error (ldfile
, _("%s: too many values"), "LC_COLLATE");
942 lr_ignore_rest (ldfile
, 0);
945 lr_ignore_rest (ldfile
, arg
->tok
!= tok_eol
&& arg
->tok
!= tok_eof
);
951 insert_value (struct linereader
*ldfile
, const char *symstr
, size_t symlen
,
952 const struct charmap_t
*charmap
, struct repertoire_t
*repertoire
,
953 struct localedef_t
*result
)
955 /* First find out what kind of symbol this is. */
958 struct element_t
*elem
= NULL
;
959 struct locale_collate_t
*collate
= result
->categories
[LC_COLLATE
].collate
;
961 /* Try to find the character in the charmap. */
962 seq
= charmap_find_value (charmap
, symstr
, symlen
);
964 /* Determine the wide character. */
965 if (seq
== NULL
|| seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
967 wc
= repertoire_find_value (repertoire
, symstr
, symlen
);
974 if (wc
== ILLEGAL_CHAR_VALUE
&& seq
== NULL
)
976 /* It's no character, so look through the collation elements and
979 if (find_entry (&collate
->elem_table
, symstr
, symlen
, &ptr
) != 0)
982 struct symbol_t
*sym
= NULL
;
984 /* It's also collation element. Therefore it's either a
985 collating symbol or it's a character which is not
986 supported by the character set. In the later case we
987 simply create a dummy entry. */
988 if (find_entry (&collate
->sym_table
, symstr
, symlen
, &result
) == 0)
990 /* It's a collation symbol. */
991 sym
= (struct symbol_t
*) result
;
998 elem
= new_element (collate
, NULL
, 0, NULL
, symstr
, symlen
, 0);
1003 /* Enter a fake element in the sequence table. This
1004 won't cause anything in the output since there is
1005 no multibyte or wide character associated with
1007 insert_entry (&collate
->seq_table
, symstr
, symlen
, elem
);
1011 /* Copy the result back. */
1016 /* Otherwise the symbols stands for a character. */
1018 if (find_entry (&collate
->seq_table
, symstr
, symlen
, &ptr
) != 0)
1020 uint32_t wcs
[2] = { wc
, 0 };
1022 /* We have to allocate an entry. */
1023 elem
= new_element (collate
,
1024 seq
!= NULL
? (char *) seq
->bytes
: NULL
,
1025 seq
!= NULL
? seq
->nbytes
: 0,
1026 wc
== ILLEGAL_CHAR_VALUE
? NULL
: wcs
,
1029 /* And add it to the table. */
1030 if (insert_entry (&collate
->seq_table
, symstr
, symlen
, elem
) != 0)
1031 /* This cannot happen. */
1032 assert (! "Internal error");
1036 /* Copy the result back. */
1039 /* Maybe the character was used before the definition. In this case
1040 we have to insert the byte sequences now. */
1041 if (elem
->mbs
== NULL
&& seq
!= NULL
)
1043 elem
->mbs
= obstack_copy0 (&collate
->mempool
,
1044 seq
->bytes
, seq
->nbytes
);
1045 elem
->nmbs
= seq
->nbytes
;
1048 if (elem
->wcs
== NULL
&& wc
!= ILLEGAL_CHAR_VALUE
)
1050 uint32_t wcs
[2] = { wc
, 0 };
1052 elem
->wcs
= obstack_copy (&collate
->mempool
, wcs
, sizeof (wcs
));
1058 /* Test whether this element is not already in the list. */
1059 if (elem
->next
!= NULL
|| elem
== collate
->cursor
)
1061 lr_error (ldfile
, _("order for `%.*s' already defined at %s:%Zu"),
1062 (int) symlen
, symstr
, elem
->file
, elem
->line
);
1063 lr_ignore_rest (ldfile
, 0);
1067 insert_weights (ldfile
, elem
, charmap
, repertoire
, result
, tok_none
);
1074 handle_ellipsis (struct linereader
*ldfile
, const char *symstr
, size_t symlen
,
1075 enum token_t ellipsis
, const struct charmap_t
*charmap
,
1076 struct repertoire_t
*repertoire
,
1077 struct localedef_t
*result
)
1079 struct element_t
*startp
;
1080 struct element_t
*endp
;
1081 struct locale_collate_t
*collate
= result
->categories
[LC_COLLATE
].collate
;
1083 /* Unlink the entry added for the ellipsis. */
1084 unlink_element (collate
);
1085 startp
= collate
->cursor
;
1087 /* Process and add the end-entry. */
1089 && insert_value (ldfile
, symstr
, symlen
, charmap
, repertoire
, result
))
1090 /* Something went wrong with inserting the to-value. This means
1091 we cannot process the ellipsis. */
1094 /* Reset the cursor. */
1095 collate
->cursor
= startp
;
1097 /* Now we have to handle many different situations:
1098 - we have to distinguish between the three different ellipsis forms
1099 - the is the ellipsis at the beginning, in the middle, or at the end.
1101 endp
= collate
->cursor
->next
;
1102 assert (symstr
== NULL
|| endp
!= NULL
);
1104 /* XXX The following is probably very wrong since also collating symbols
1105 can appear in ranges. But do we want/can refine the test for that? */
1107 /* Both, the start and the end symbol, must stand for characters. */
1108 if ((startp
!= NULL
&& (startp
->name
== NULL
|| ! startp
->is_character
))
1109 || (endp
!= NULL
&& (endp
->name
== NULL
|| ! endp
->is_character
)))
1111 lr_error (ldfile
, _("\
1112 %s: the start and the end symbol of a range must stand for characters"),
1118 if (ellipsis
== tok_ellipsis3
)
1120 /* One requirement we make here: the length of the byte
1121 sequences for the first and end character must be the same.
1122 This is mainly to prevent unwanted effects and this is often
1123 not what is wanted. */
1124 size_t len
= (startp
->mbs
!= NULL
? startp
->nmbs
1125 : (endp
->mbs
!= NULL
? endp
->nmbs
: 0));
1126 char mbcnt
[len
+ 1];
1127 char mbend
[len
+ 1];
1129 /* Well, this should be caught somewhere else already. Just to
1131 assert (startp
== NULL
|| startp
->wcs
== NULL
|| startp
->wcs
[1] == 0);
1132 assert (endp
== NULL
|| endp
->wcs
== NULL
|| endp
->wcs
[1] == 0);
1134 if (startp
!= NULL
&& endp
!= NULL
1135 && startp
->mbs
!= NULL
&& endp
->mbs
!= NULL
1136 && startp
->nmbs
!= endp
->nmbs
)
1138 lr_error (ldfile
, _("\
1139 %s: byte sequences of first and last character must have the same length"),
1144 /* Determine whether we have to generate multibyte sequences. */
1145 if ((startp
== NULL
|| startp
->mbs
!= NULL
)
1146 && (endp
== NULL
|| endp
->mbs
!= NULL
))
1151 /* Prepare the beginning byte sequence. This is either from the
1152 beginning byte sequence or it is all nulls if it was an
1153 initial ellipsis. */
1154 if (startp
== NULL
|| startp
->mbs
== NULL
)
1155 memset (mbcnt
, '\0', len
);
1158 memcpy (mbcnt
, startp
->mbs
, len
);
1160 /* And increment it so that the value is the first one we will
1162 for (cnt
= len
- 1; cnt
>= 0; --cnt
)
1163 if (++mbcnt
[cnt
] != '\0')
1168 /* And the end sequence. */
1169 if (endp
== NULL
|| endp
->mbs
== NULL
)
1170 memset (mbend
, '\0', len
);
1172 memcpy (mbend
, endp
->mbs
, len
);
1175 /* Test whether we have a correct range. */
1176 ret
= memcmp (mbcnt
, mbend
, len
);
1180 lr_error (ldfile
, _("%s: byte sequence of first character of \
1181 range is not lower than that of the last character"), "LC_COLLATE");
1185 /* Generate the byte sequences data. */
1188 struct charseq
*seq
;
1190 /* Quite a bit of work ahead. We have to find the character
1191 definition for the byte sequence and then determine the
1192 wide character belonging to it. */
1193 seq
= charmap_find_symbol (charmap
, mbcnt
, len
);
1196 struct element_t
*elem
;
1199 /* I don't think this can ever happen. */
1200 assert (seq
->name
!= NULL
);
1201 namelen
= strlen (seq
->name
);
1203 if (seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1204 seq
->ucs4
= repertoire_find_value (repertoire
, seq
->name
,
1207 /* Now we are ready to insert the new value in the
1208 sequence. Find out whether the element is
1211 if (find_entry (&collate
->seq_table
, seq
->name
, namelen
,
1214 uint32_t wcs
[2] = { seq
->ucs4
, 0 };
1216 /* We have to allocate an entry. */
1217 elem
= new_element (collate
, mbcnt
, len
,
1218 seq
->ucs4
== ILLEGAL_CHAR_VALUE
1219 ? NULL
: wcs
, seq
->name
,
1222 /* And add it to the table. */
1223 if (insert_entry (&collate
->seq_table
, seq
->name
,
1224 namelen
, elem
) != 0)
1225 /* This cannot happen. */
1226 assert (! "Internal error");
1229 /* Copy the result. */
1232 /* Test whether this element is not already in the list. */
1233 if (elem
->next
!= NULL
|| (collate
->cursor
!= NULL
1234 && elem
->next
== collate
->cursor
))
1236 lr_error (ldfile
, _("\
1237 order for `%.*s' already defined at %s:%Zu"),
1238 (int) namelen
, seq
->name
,
1239 elem
->file
, elem
->line
);
1243 /* Enqueue the new element. */
1244 elem
->last
= collate
->cursor
;
1245 if (collate
->cursor
== NULL
)
1249 elem
->next
= collate
->cursor
->next
;
1250 elem
->last
->next
= elem
;
1251 if (elem
->next
!= NULL
)
1252 elem
->next
->last
= elem
;
1254 if (collate
->start
== NULL
)
1256 assert (collate
->cursor
== NULL
);
1257 collate
->start
= elem
;
1259 collate
->cursor
= elem
;
1261 /* Add the weight value. We take them from the
1262 `ellipsis_weights' member of `collate'. */
1263 elem
->weights
= (struct element_list_t
*)
1264 obstack_alloc (&collate
->mempool
,
1265 nrules
* sizeof (struct element_list_t
));
1266 for (cnt
= 0; cnt
< nrules
; ++cnt
)
1267 if (collate
->ellipsis_weight
.weights
[cnt
].cnt
== 1
1268 && (collate
->ellipsis_weight
.weights
[cnt
].w
[0]
1269 == ELEMENT_ELLIPSIS2
))
1271 elem
->weights
[cnt
].w
= (struct element_t
**)
1272 obstack_alloc (&collate
->mempool
,
1273 sizeof (struct element_t
*));
1274 elem
->weights
[cnt
].w
[0] = elem
;
1275 elem
->weights
[cnt
].cnt
= 1;
1279 /* Simply use the weight from `ellipsis_weight'. */
1280 elem
->weights
[cnt
].w
=
1281 collate
->ellipsis_weight
.weights
[cnt
].w
;
1282 elem
->weights
[cnt
].cnt
=
1283 collate
->ellipsis_weight
.weights
[cnt
].cnt
;
1287 /* Increment for the next round. */
1289 for (cnt
= len
- 1; cnt
>= 0; --cnt
)
1290 if (++mbcnt
[cnt
] != '\0')
1293 /* Find out whether this was all. */
1294 if (cnt
< 0 || memcmp (mbcnt
, mbend
, len
) >= 0)
1295 /* Yep, that's all. */
1302 /* For symbolic range we naturally must have a beginning and an
1303 end specified by the user. */
1305 lr_error (ldfile
, _("\
1306 %s: symbolic range ellipsis must not directly follow `order_start'"),
1308 else if (endp
== NULL
)
1309 lr_error (ldfile
, _("\
1310 %s: symbolic range ellipsis must not be directly followed by `order_end'"),
1314 /* Determine the range. To do so we have to determine the
1315 common prefix of the both names and then the numeric
1316 values of both ends. */
1317 size_t lenfrom
= strlen (startp
->name
);
1318 size_t lento
= strlen (endp
->name
);
1319 char buf
[lento
+ 1];
1324 int base
= ellipsis
== tok_ellipsis2
? 16 : 10;
1326 if (lenfrom
!= lento
)
1329 lr_error (ldfile
, _("\
1330 `%s' and `%.*s' are not valid names for symbolic range"),
1331 startp
->name
, (int) lento
, endp
->name
);
1335 while (startp
->name
[preflen
] == endp
->name
[preflen
])
1336 if (startp
->name
[preflen
] == '\0')
1337 /* Nothing to be done. The start and end point are identical
1338 and while inserting the end point we have already given
1339 the user an error message. */
1345 from
= strtol (startp
->name
+ preflen
, &cp
, base
);
1346 if ((from
== UINT_MAX
&& errno
== ERANGE
) || *cp
!= '\0')
1350 to
= strtol (endp
->name
+ preflen
, &cp
, base
);
1351 if ((to
== UINT_MAX
&& errno
== ERANGE
) || *cp
!= '\0')
1354 /* Copy the prefix. */
1355 memcpy (buf
, startp
->name
, preflen
);
1357 /* Loop over all values. */
1358 for (++from
; from
< to
; ++from
)
1360 struct element_t
*elem
= NULL
;
1361 struct charseq
*seq
;
1365 /* Generate the name. */
1366 sprintf (buf
+ preflen
, base
== 10 ? "%0*ld" : "%0*lX",
1367 (int) (lenfrom
- preflen
), from
);
1369 /* Look whether this name is already defined. */
1371 if (find_entry (&collate
->seq_table
, buf
, symlen
, &ptr
) == 0)
1373 /* Copy back the result. */
1376 if (elem
->next
!= NULL
|| (collate
->cursor
!= NULL
1377 && elem
->next
== collate
->cursor
))
1379 lr_error (ldfile
, _("\
1380 %s: order for `%.*s' already defined at %s:%Zu"),
1381 "LC_COLLATE", (int) lenfrom
, buf
,
1382 elem
->file
, elem
->line
);
1386 if (elem
->name
== NULL
)
1388 lr_error (ldfile
, _("%s: `%s' must be a character"),
1394 if (elem
== NULL
|| (elem
->mbs
== NULL
&& elem
->wcs
== NULL
))
1396 /* Search for a character of this name. */
1397 seq
= charmap_find_value (charmap
, buf
, lenfrom
);
1398 if (seq
== NULL
|| seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1400 wc
= repertoire_find_value (repertoire
, buf
, lenfrom
);
1408 if (wc
== ILLEGAL_CHAR_VALUE
&& seq
== NULL
)
1409 /* We don't know anything about a character with this
1410 name. XXX Should we warn? */
1415 uint32_t wcs
[2] = { wc
, 0 };
1417 /* We have to allocate an entry. */
1418 elem
= new_element (collate
,
1420 ? (char *) seq
->bytes
: NULL
,
1421 seq
!= NULL
? seq
->nbytes
: 0,
1422 wc
== ILLEGAL_CHAR_VALUE
1423 ? NULL
: wcs
, buf
, lenfrom
, 1);
1427 /* Update the element. */
1430 elem
->mbs
= obstack_copy0 (&collate
->mempool
,
1431 seq
->bytes
, seq
->nbytes
);
1432 elem
->nmbs
= seq
->nbytes
;
1435 if (wc
!= ILLEGAL_CHAR_VALUE
)
1439 obstack_grow (&collate
->mempool
,
1440 &wc
, sizeof (uint32_t));
1441 obstack_grow (&collate
->mempool
,
1442 &zero
, sizeof (uint32_t));
1443 elem
->wcs
= obstack_finish (&collate
->mempool
);
1448 elem
->file
= ldfile
->fname
;
1449 elem
->line
= ldfile
->lineno
;
1450 elem
->section
= collate
->current_section
;
1453 /* Enqueue the new element. */
1454 elem
->last
= collate
->cursor
;
1455 elem
->next
= collate
->cursor
->next
;
1456 elem
->last
->next
= elem
;
1457 if (elem
->next
!= NULL
)
1458 elem
->next
->last
= elem
;
1459 collate
->cursor
= elem
;
1461 /* Now add the weights. They come from the `ellipsis_weights'
1462 member of `collate'. */
1463 elem
->weights
= (struct element_list_t
*)
1464 obstack_alloc (&collate
->mempool
,
1465 nrules
* sizeof (struct element_list_t
));
1466 for (cnt
= 0; cnt
< nrules
; ++cnt
)
1467 if (collate
->ellipsis_weight
.weights
[cnt
].cnt
== 1
1468 && (collate
->ellipsis_weight
.weights
[cnt
].w
[0]
1469 == ELEMENT_ELLIPSIS2
))
1471 elem
->weights
[cnt
].w
= (struct element_t
**)
1472 obstack_alloc (&collate
->mempool
,
1473 sizeof (struct element_t
*));
1474 elem
->weights
[cnt
].w
[0] = elem
;
1475 elem
->weights
[cnt
].cnt
= 1;
1479 /* Simly use the weight from `ellipsis_weight'. */
1480 elem
->weights
[cnt
].w
=
1481 collate
->ellipsis_weight
.weights
[cnt
].w
;
1482 elem
->weights
[cnt
].cnt
=
1483 collate
->ellipsis_weight
.weights
[cnt
].cnt
;
1492 collate_startup (struct linereader
*ldfile
, struct localedef_t
*locale
,
1493 struct localedef_t
*copy_locale
, int ignore_content
)
1495 if (!ignore_content
&& locale
->categories
[LC_COLLATE
].collate
== NULL
)
1497 struct locale_collate_t
*collate
;
1499 if (copy_locale
== NULL
)
1501 collate
= locale
->categories
[LC_COLLATE
].collate
=
1502 (struct locale_collate_t
*)
1503 xcalloc (1, sizeof (struct locale_collate_t
));
1505 /* Init the various data structures. */
1506 init_hash (&collate
->elem_table
, 100);
1507 init_hash (&collate
->sym_table
, 100);
1508 init_hash (&collate
->seq_table
, 500);
1509 obstack_init (&collate
->mempool
);
1511 collate
->col_weight_max
= -1;
1514 /* Reuse the copy_locale's data structures. */
1515 collate
= locale
->categories
[LC_COLLATE
].collate
=
1516 copy_locale
->categories
[LC_COLLATE
].collate
;
1519 ldfile
->translate_strings
= 0;
1520 ldfile
->return_widestr
= 0;
1525 collate_finish (struct localedef_t
*locale
, const struct charmap_t
*charmap
)
1527 /* Now is the time when we can assign the individual collation
1528 values for all the symbols. We have possibly different values
1529 for the wide- and the multibyte-character symbols. This is done
1530 since it might make a difference in the encoding if there is in
1531 some cases no multibyte-character but there are wide-characters.
1532 (The other way around it is not important since theencoded
1533 collation value in the wide-character case is 32 bits wide and
1534 therefore requires no encoding).
1536 The lowest collation value assigned is 2. Zero is reserved for
1537 the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm'
1538 functions and 1 is used to separate the individual passes for the
1541 We also have to construct is list with all the bytes/words which
1542 can come first in a sequence, followed by all the elements which
1543 also start with this byte/word. The order is reverse which has
1544 among others the important effect that longer strings are located
1545 first in the list. This is required for the output data since
1546 the algorithm used in `strcoll' etc depends on this.
1548 The multibyte case is easy. We simply sort into an array with
1550 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1555 struct element_t
*runp
;
1557 int need_undefined
= 0;
1558 struct section_list
*sect
;
1560 int nr_wide_elems
= 0;
1562 if (collate
== NULL
)
1564 /* No data, no check. */
1566 WITH_CUR_LOCALE (error (0, 0, _("No definition for %s category found"),
1571 /* If this assertion is hit change the type in `element_t'. */
1572 assert (nrules
<= sizeof (runp
->used_in_level
) * 8);
1574 /* Make sure that the `position' rule is used either in all sections
1576 for (i
= 0; i
< nrules
; ++i
)
1577 for (sect
= collate
->sections
; sect
!= NULL
; sect
= sect
->next
)
1578 if (sect
!= collate
->current_section
1579 && sect
->rules
!= NULL
1580 && ((sect
->rules
[i
] & sort_position
)
1581 != (collate
->current_section
->rules
[i
] & sort_position
)))
1583 WITH_CUR_LOCALE (error (0, 0, _("\
1584 %s: `position' must be used for a specific level in all sections or none"),
1589 /* Find out which elements are used at which level. At the same
1590 time we find out whether we have any undefined symbols. */
1591 runp
= collate
->start
;
1592 while (runp
!= NULL
)
1594 if (runp
->mbs
!= NULL
)
1596 for (i
= 0; i
< nrules
; ++i
)
1600 for (j
= 0; j
< runp
->weights
[i
].cnt
; ++j
)
1601 /* A NULL pointer as the weight means IGNORE. */
1602 if (runp
->weights
[i
].w
[j
] != NULL
)
1604 if (runp
->weights
[i
].w
[j
]->weights
== NULL
)
1606 WITH_CUR_LOCALE (error_at_line (0, 0, runp
->file
,
1608 _("symbol `%s' not defined"),
1609 runp
->weights
[i
].w
[j
]->name
));
1612 runp
->weights
[i
].w
[j
] = &collate
->undefined
;
1615 /* Set the bit for the level. */
1616 runp
->weights
[i
].w
[j
]->used_in_level
|= 1 << i
;
1621 /* Up to the next entry. */
1625 /* Walk through the list of defined sequences and assign weights. Also
1626 create the data structure which will allow generating the single byte
1627 character based tables.
1629 Since at each time only the weights for each of the rules are
1630 only compared to other weights for this rule it is possible to
1631 assign more compact weight values than simply counting all
1632 weights in sequence. We can assign weights from 3, one for each
1633 rule individually and only for those elements, which are actually
1636 Why is this important? It is not for the wide char table. But
1637 it is for the singlebyte output since here larger numbers have to
1638 be encoded to make it possible to emit the value as a byte
1640 for (i
= 0; i
< nrules
; ++i
)
1645 runp
= collate
->start
;
1646 while (runp
!= NULL
)
1648 /* Determine the order. */
1649 if (runp
->used_in_level
!= 0)
1651 runp
->mborder
= (int *) obstack_alloc (&collate
->mempool
,
1652 nrules
* sizeof (int));
1654 for (i
= 0; i
< nrules
; ++i
)
1655 if ((runp
->used_in_level
& (1 << i
)) != 0)
1656 runp
->mborder
[i
] = mbact
[i
]++;
1658 runp
->mborder
[i
] = 0;
1661 if (runp
->mbs
!= NULL
)
1663 struct element_t
**eptr
;
1664 struct element_t
*lastp
= NULL
;
1666 /* Find the point where to insert in the list. */
1667 eptr
= &collate
->mbheads
[((unsigned char *) runp
->mbs
)[0]];
1668 while (*eptr
!= NULL
)
1670 if ((*eptr
)->nmbs
< runp
->nmbs
)
1673 if ((*eptr
)->nmbs
== runp
->nmbs
)
1675 int c
= memcmp ((*eptr
)->mbs
, runp
->mbs
, runp
->nmbs
);
1679 /* This should not happen. It means that we have
1680 to symbols with the same byte sequence. It is
1681 of course an error. */
1682 WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr
)->file
,
1685 symbol `%s' has the same encoding as"), (*eptr
)->name
);
1686 error_at_line (0, 0, runp
->file
,
1693 /* Insert it here. */
1697 /* To the next entry. */
1699 eptr
= &(*eptr
)->mbnext
;
1702 /* Set the pointers. */
1703 runp
->mbnext
= *eptr
;
1704 runp
->mblast
= lastp
;
1706 (*eptr
)->mblast
= runp
;
1712 if (runp
->used_in_level
)
1714 runp
->wcorder
= wcact
++;
1716 /* We take the opportunity to count the elements which have
1721 if (runp
->is_character
)
1723 if (runp
->nmbs
== 1)
1724 collate
->mbseqorder
[((unsigned char *) runp
->mbs
)[0]] = mbseqact
++;
1726 runp
->wcseqorder
= wcseqact
++;
1728 else if (runp
->mbs
!= NULL
&& runp
->weights
!= NULL
)
1729 /* This is for collation elements. */
1730 runp
->wcseqorder
= wcseqact
++;
1732 /* Up to the next entry. */
1736 /* Find out whether any of the `mbheads' entries is unset. In this
1737 case we use the UNDEFINED entry. */
1738 for (i
= 1; i
< 256; ++i
)
1739 if (collate
->mbheads
[i
] == NULL
)
1742 collate
->mbheads
[i
] = &collate
->undefined
;
1745 /* Now to the wide character case. */
1746 collate
->wcheads
.p
= 6;
1747 collate
->wcheads
.q
= 10;
1748 wchead_table_init (&collate
->wcheads
);
1750 collate
->wcseqorder
.p
= 6;
1751 collate
->wcseqorder
.q
= 10;
1752 collseq_table_init (&collate
->wcseqorder
);
1755 runp
= collate
->start
;
1756 while (runp
!= NULL
)
1758 if (runp
->wcs
!= NULL
)
1760 struct element_t
*e
;
1761 struct element_t
**eptr
;
1762 struct element_t
*lastp
;
1764 /* Insert the collation sequence value. */
1765 if (runp
->is_character
)
1766 collseq_table_add (&collate
->wcseqorder
, runp
->wcs
[0],
1769 /* Find the point where to insert in the list. */
1770 e
= wchead_table_get (&collate
->wcheads
, runp
->wcs
[0]);
1773 while (*eptr
!= NULL
)
1775 if ((*eptr
)->nwcs
< runp
->nwcs
)
1778 if ((*eptr
)->nwcs
== runp
->nwcs
)
1780 int c
= wmemcmp ((wchar_t *) (*eptr
)->wcs
,
1781 (wchar_t *) runp
->wcs
, runp
->nwcs
);
1785 /* This should not happen. It means that we have
1786 two symbols with the same byte sequence. It is
1787 of course an error. */
1788 WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr
)->file
,
1791 symbol `%s' has the same encoding as"), (*eptr
)->name
);
1792 error_at_line (0, 0, runp
->file
,
1799 /* Insert it here. */
1803 /* To the next entry. */
1805 eptr
= &(*eptr
)->wcnext
;
1808 /* Set the pointers. */
1809 runp
->wcnext
= *eptr
;
1810 runp
->wclast
= lastp
;
1812 (*eptr
)->wclast
= runp
;
1815 wchead_table_add (&collate
->wcheads
, runp
->wcs
[0], e
);
1820 /* Up to the next entry. */
1824 /* Now determine whether the UNDEFINED entry is needed and if yes,
1825 whether it was defined. */
1826 collate
->undefined
.used_in_level
= need_undefined
? ~0ul : 0;
1827 if (collate
->undefined
.file
== NULL
)
1831 /* This seems not to be enforced by recent standards. Don't
1832 emit an error, simply append UNDEFINED at the end. */
1834 WITH_CUR_LOCALE (error (0, 0, _("no definition of `UNDEFINED'")));
1836 /* Add UNDEFINED at the end. */
1837 collate
->undefined
.mborder
=
1838 (int *) obstack_alloc (&collate
->mempool
, nrules
* sizeof (int));
1840 for (i
= 0; i
< nrules
; ++i
)
1841 collate
->undefined
.mborder
[i
] = mbact
[i
]++;
1844 /* In any case we will need the definition for the wide character
1845 case. But we will not complain that it is missing since the
1846 specification strangely enough does not seem to account for
1848 collate
->undefined
.wcorder
= wcact
++;
1851 /* Finally, try to unify the rules for the sections. Whenever the rules
1852 for a section are the same as those for another section give the
1853 ruleset the same index. Since there are never many section we can
1854 use an O(n^2) algorithm here. */
1855 sect
= collate
->sections
;
1856 while (sect
!= NULL
&& sect
->rules
== NULL
)
1859 /* Bail out if we have no sections because of earlier errors. */
1862 WITH_CUR_LOCALE (error (EXIT_FAILURE
, 0,
1863 _("too many errors; giving up")));
1870 struct section_list
*osect
= collate
->sections
;
1872 while (osect
!= sect
)
1873 if (osect
->rules
!= NULL
1874 && memcmp (osect
->rules
, sect
->rules
,
1875 nrules
* sizeof (osect
->rules
[0])) == 0)
1878 osect
= osect
->next
;
1881 sect
->ruleidx
= ruleidx
++;
1883 sect
->ruleidx
= osect
->ruleidx
;
1888 while (sect
!= NULL
&& sect
->rules
== NULL
);
1890 while (sect
!= NULL
);
1891 /* We are currently not prepared for more than 128 rulesets. But this
1892 should never really be a problem. */
1893 assert (ruleidx
<= 128);
1898 output_weight (struct obstack
*pool
, struct locale_collate_t
*collate
,
1899 struct element_t
*elem
)
1904 /* Optimize the use of UNDEFINED. */
1905 if (elem
== &collate
->undefined
)
1906 /* The weights are already inserted. */
1909 /* This byte can start exactly one collation element and this is
1910 a single byte. We can directly give the index to the weights. */
1911 retval
= obstack_object_size (pool
);
1913 /* Construct the weight. */
1914 for (cnt
= 0; cnt
< nrules
; ++cnt
)
1916 char buf
[elem
->weights
[cnt
].cnt
* 7];
1920 for (i
= 0; i
< elem
->weights
[cnt
].cnt
; ++i
)
1921 /* Encode the weight value. We do nothing for IGNORE entries. */
1922 if (elem
->weights
[cnt
].w
[i
] != NULL
)
1923 len
+= utf8_encode (&buf
[len
],
1924 elem
->weights
[cnt
].w
[i
]->mborder
[cnt
]);
1926 /* And add the buffer content. */
1927 obstack_1grow (pool
, len
);
1928 obstack_grow (pool
, buf
, len
);
1931 return retval
| ((elem
->section
->ruleidx
& 0x7f) << 24);
1936 output_weightwc (struct obstack
*pool
, struct locale_collate_t
*collate
,
1937 struct element_t
*elem
)
1942 /* Optimize the use of UNDEFINED. */
1943 if (elem
== &collate
->undefined
)
1944 /* The weights are already inserted. */
1947 /* This byte can start exactly one collation element and this is
1948 a single byte. We can directly give the index to the weights. */
1949 retval
= obstack_object_size (pool
) / sizeof (int32_t);
1951 /* Construct the weight. */
1952 for (cnt
= 0; cnt
< nrules
; ++cnt
)
1954 int32_t buf
[elem
->weights
[cnt
].cnt
];
1958 for (i
= 0, j
= 0; i
< elem
->weights
[cnt
].cnt
; ++i
)
1959 if (elem
->weights
[cnt
].w
[i
] != NULL
)
1960 buf
[j
++] = elem
->weights
[cnt
].w
[i
]->wcorder
;
1962 /* And add the buffer content. */
1963 obstack_int32_grow (pool
, j
);
1965 obstack_grow (pool
, buf
, j
* sizeof (int32_t));
1966 maybe_swap_uint32_obstack (pool
, j
);
1969 return retval
| ((elem
->section
->ruleidx
& 0x7f) << 24);
1972 /* If localedef is every threaded, this would need to be __thread var. */
1975 struct obstack
*weightpool
;
1976 struct obstack
*extrapool
;
1977 struct obstack
*indpool
;
1978 struct locale_collate_t
*collate
;
1979 struct collidx_table
*tablewc
;
1982 static void add_to_tablewc (uint32_t ch
, struct element_t
*runp
);
1985 add_to_tablewc (uint32_t ch
, struct element_t
*runp
)
1987 if (runp
->wcnext
== NULL
&& runp
->nwcs
== 1)
1989 int32_t weigthidx
= output_weightwc (atwc
.weightpool
, atwc
.collate
,
1991 collidx_table_add (atwc
.tablewc
, ch
, weigthidx
);
1995 /* As for the singlebyte table, we recognize sequences and
1998 collidx_table_add (atwc
.tablewc
, ch
,
1999 -(obstack_object_size (atwc
.extrapool
)
2000 / sizeof (uint32_t)));
2004 /* Store the current index in the weight table. We know that
2005 the current position in the `extrapool' is aligned on a
2010 /* Find out wether this is a single entry or we have more than
2011 one consecutive entry. */
2012 if (runp
->wcnext
!= NULL
2013 && runp
->nwcs
== runp
->wcnext
->nwcs
2014 && wmemcmp ((wchar_t *) runp
->wcs
,
2015 (wchar_t *)runp
->wcnext
->wcs
,
2016 runp
->nwcs
- 1) == 0
2017 && (runp
->wcs
[runp
->nwcs
- 1]
2018 == runp
->wcnext
->wcs
[runp
->nwcs
- 1] + 1))
2021 struct element_t
*series_startp
= runp
;
2022 struct element_t
*curp
;
2024 /* Now add first the initial byte sequence. */
2025 added
= (1 + 1 + 2 * (runp
->nwcs
- 1)) * sizeof (int32_t);
2026 if (sizeof (int32_t) == sizeof (int))
2027 obstack_make_room (atwc
.extrapool
, added
);
2029 /* More than one consecutive entry. We mark this by having
2030 a negative index into the indirect table. */
2031 obstack_int32_grow_fast (atwc
.extrapool
,
2032 -(obstack_object_size (atwc
.indpool
)
2033 / sizeof (int32_t)));
2034 obstack_int32_grow_fast (atwc
.extrapool
, runp
->nwcs
- 1);
2037 runp
= runp
->wcnext
;
2038 while (runp
->wcnext
!= NULL
2039 && runp
->nwcs
== runp
->wcnext
->nwcs
2040 && wmemcmp ((wchar_t *) runp
->wcs
,
2041 (wchar_t *)runp
->wcnext
->wcs
,
2042 runp
->nwcs
- 1) == 0
2043 && (runp
->wcs
[runp
->nwcs
- 1]
2044 == runp
->wcnext
->wcs
[runp
->nwcs
- 1] + 1));
2046 /* Now walk backward from here to the beginning. */
2049 for (i
= 1; i
< runp
->nwcs
; ++i
)
2050 obstack_int32_grow_fast (atwc
.extrapool
, curp
->wcs
[i
]);
2052 /* Now find the end of the consecutive sequence and
2053 add all the indeces in the indirect pool. */
2056 weightidx
= output_weightwc (atwc
.weightpool
, atwc
.collate
,
2058 obstack_int32_grow (atwc
.indpool
, weightidx
);
2060 curp
= curp
->wclast
;
2062 while (curp
!= series_startp
);
2064 /* Add the final weight. */
2065 weightidx
= output_weightwc (atwc
.weightpool
, atwc
.collate
,
2067 obstack_int32_grow (atwc
.indpool
, weightidx
);
2069 /* And add the end byte sequence. Without length this
2071 for (i
= 1; i
< curp
->nwcs
; ++i
)
2072 obstack_int32_grow (atwc
.extrapool
, curp
->wcs
[i
]);
2076 /* A single entry. Simply add the index and the length and
2077 string (except for the first character which is already
2081 /* Output the weight info. */
2082 weightidx
= output_weightwc (atwc
.weightpool
, atwc
.collate
,
2085 assert (runp
->nwcs
> 0);
2086 added
= (1 + 1 + runp
->nwcs
- 1) * sizeof (int32_t);
2087 if (sizeof (int) == sizeof (int32_t))
2088 obstack_make_room (atwc
.extrapool
, added
);
2090 obstack_int32_grow_fast (atwc
.extrapool
, weightidx
);
2091 obstack_int32_grow_fast (atwc
.extrapool
, runp
->nwcs
- 1);
2092 for (i
= 1; i
< runp
->nwcs
; ++i
)
2093 obstack_int32_grow_fast (atwc
.extrapool
, runp
->wcs
[i
]);
2097 runp
= runp
->wcnext
;
2099 while (runp
!= NULL
);
2104 collate_output (struct localedef_t
*locale
, const struct charmap_t
*charmap
,
2105 const char *output_path
)
2107 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
2108 const size_t nelems
= _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE
);
2109 struct locale_file file
;
2111 int32_t tablemb
[256];
2112 struct obstack weightpool
;
2113 struct obstack extrapool
;
2114 struct obstack indirectpool
;
2115 struct section_list
*sect
;
2116 struct collidx_table tablewc
;
2118 uint32_t *elem_table
;
2120 struct element_t
*runp
;
2122 init_locale_data (&file
, nelems
);
2123 add_locale_uint32 (&file
, nrules
);
2125 /* If we have no LC_COLLATE data emit only the number of rules as zero. */
2126 if (collate
== NULL
)
2129 for (idx
= 1; idx
< nelems
; idx
++)
2131 /* The words have to be handled specially. */
2132 if (idx
== _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB
))
2133 add_locale_uint32 (&file
, 0);
2134 else if (idx
== _NL_ITEM_INDEX (_NL_COLLATE_ENCODING_TYPE
))
2135 add_locale_uint32 (&file
, __cet_other
);
2137 add_locale_empty (&file
);
2139 write_locale_data (output_path
, LC_COLLATE
, "LC_COLLATE", &file
);
2143 obstack_init (&weightpool
);
2144 obstack_init (&extrapool
);
2145 obstack_init (&indirectpool
);
2147 /* Since we are using the sign of an integer to mark indirection the
2148 offsets in the arrays we are indirectly referring to must not be
2149 zero since -0 == 0. Therefore we add a bit of dummy content. */
2150 obstack_int32_grow (&extrapool
, 0);
2151 obstack_int32_grow (&indirectpool
, 0);
2153 /* Prepare the ruleset table. */
2154 for (sect
= collate
->sections
, i
= 0; sect
!= NULL
; sect
= sect
->next
)
2155 if (sect
->rules
!= NULL
&& sect
->ruleidx
== i
)
2159 obstack_make_room (&weightpool
, nrules
);
2161 for (j
= 0; j
< nrules
; ++j
)
2162 obstack_1grow_fast (&weightpool
, sect
->rules
[j
]);
2165 /* And align the output. */
2166 i
= (nrules
* i
) % LOCFILE_ALIGN
;
2169 obstack_1grow (&weightpool
, '\0');
2170 while (++i
< LOCFILE_ALIGN
);
2172 add_locale_raw_obstack (&file
, &weightpool
);
2174 /* Generate the 8-bit table. Walk through the lists of sequences
2175 starting with the same byte and add them one after the other to
2176 the table. In case we have more than one sequence starting with
2177 the same byte we have to use extra indirection.
2179 First add a record for the NUL byte. This entry will never be used
2180 so it does not matter. */
2183 /* Now insert the `UNDEFINED' value if it is used. Since this value
2184 will probably be used more than once it is good to store the
2185 weights only once. */
2186 if (collate
->undefined
.used_in_level
!= 0)
2187 output_weight (&weightpool
, collate
, &collate
->undefined
);
2189 for (ch
= 1; ch
< 256; ++ch
)
2190 if (collate
->mbheads
[ch
]->mbnext
== NULL
2191 && collate
->mbheads
[ch
]->nmbs
<= 1)
2193 tablemb
[ch
] = output_weight (&weightpool
, collate
,
2194 collate
->mbheads
[ch
]);
2198 /* The entries in the list are sorted by length and then
2199 alphabetically. This is the order in which we will add the
2200 elements to the collation table. This allows simply walking
2201 the table in sequence and stopping at the first matching
2202 entry. Since the longer sequences are coming first in the
2203 list they have the possibility to match first, just as it
2204 has to be. In the worst case we are walking to the end of
2205 the list where we put, if no singlebyte sequence is defined
2206 in the locale definition, the weights for UNDEFINED.
2208 To reduce the length of the search list we compress them a bit.
2209 This happens by collecting sequences of consecutive byte
2210 sequences in one entry (having and begin and end byte sequence)
2211 and add only one index into the weight table. We can find the
2212 consecutive entries since they are also consecutive in the list. */
2213 struct element_t
*runp
= collate
->mbheads
[ch
];
2214 struct element_t
*lastp
;
2216 assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool
)));
2218 tablemb
[ch
] = -obstack_object_size (&extrapool
);
2222 /* Store the current index in the weight table. We know that
2223 the current position in the `extrapool' is aligned on a
2228 /* Find out wether this is a single entry or we have more than
2229 one consecutive entry. */
2230 if (runp
->mbnext
!= NULL
2231 && runp
->nmbs
== runp
->mbnext
->nmbs
2232 && memcmp (runp
->mbs
, runp
->mbnext
->mbs
, runp
->nmbs
- 1) == 0
2233 && (runp
->mbs
[runp
->nmbs
- 1]
2234 == runp
->mbnext
->mbs
[runp
->nmbs
- 1] + 1))
2237 struct element_t
*series_startp
= runp
;
2238 struct element_t
*curp
;
2240 /* Compute how much space we will need. */
2241 added
= LOCFILE_ALIGN_UP (sizeof (int32_t) + 1
2242 + 2 * (runp
->nmbs
- 1));
2243 assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool
)));
2244 obstack_make_room (&extrapool
, added
);
2246 /* More than one consecutive entry. We mark this by having
2247 a negative index into the indirect table. */
2248 obstack_int32_grow_fast (&extrapool
,
2249 -(obstack_object_size (&indirectpool
)
2250 / sizeof (int32_t)));
2252 /* Now search first the end of the series. */
2254 runp
= runp
->mbnext
;
2255 while (runp
->mbnext
!= NULL
2256 && runp
->nmbs
== runp
->mbnext
->nmbs
2257 && memcmp (runp
->mbs
, runp
->mbnext
->mbs
,
2258 runp
->nmbs
- 1) == 0
2259 && (runp
->mbs
[runp
->nmbs
- 1]
2260 == runp
->mbnext
->mbs
[runp
->nmbs
- 1] + 1));
2262 /* Now walk backward from here to the beginning. */
2265 assert (runp
->nmbs
<= 256);
2266 obstack_1grow_fast (&extrapool
, curp
->nmbs
- 1);
2267 for (i
= 1; i
< curp
->nmbs
; ++i
)
2268 obstack_1grow_fast (&extrapool
, curp
->mbs
[i
]);
2270 /* Now find the end of the consecutive sequence and
2271 add all the indeces in the indirect pool. */
2274 weightidx
= output_weight (&weightpool
, collate
, curp
);
2275 obstack_int32_grow (&indirectpool
, weightidx
);
2277 curp
= curp
->mblast
;
2279 while (curp
!= series_startp
);
2281 /* Add the final weight. */
2282 weightidx
= output_weight (&weightpool
, collate
, curp
);
2283 obstack_int32_grow (&indirectpool
, weightidx
);
2285 /* And add the end byte sequence. Without length this
2287 for (i
= 1; i
< curp
->nmbs
; ++i
)
2288 obstack_1grow_fast (&extrapool
, curp
->mbs
[i
]);
2292 /* A single entry. Simply add the index and the length and
2293 string (except for the first character which is already
2297 /* Output the weight info. */
2298 weightidx
= output_weight (&weightpool
, collate
, runp
);
2300 added
= LOCFILE_ALIGN_UP (sizeof (int32_t) + 1
2302 assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool
)));
2303 obstack_make_room (&extrapool
, added
);
2305 obstack_int32_grow_fast (&extrapool
, weightidx
);
2306 assert (runp
->nmbs
<= 256);
2307 obstack_1grow_fast (&extrapool
, runp
->nmbs
- 1);
2309 for (i
= 1; i
< runp
->nmbs
; ++i
)
2310 obstack_1grow_fast (&extrapool
, runp
->mbs
[i
]);
2313 /* Add alignment bytes if necessary. */
2314 while (!LOCFILE_ALIGNED_P (obstack_object_size (&extrapool
)))
2315 obstack_1grow_fast (&extrapool
, '\0');
2319 runp
= runp
->mbnext
;
2321 while (runp
!= NULL
);
2323 assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool
)));
2325 /* If the final entry in the list is not a single character we
2326 add an UNDEFINED entry here. */
2327 if (lastp
->nmbs
!= 1)
2329 int added
= LOCFILE_ALIGN_UP (sizeof (int32_t) + 1 + 1);
2330 obstack_make_room (&extrapool
, added
);
2332 obstack_int32_grow_fast (&extrapool
, 0);
2333 /* XXX What rule? We just pick the first. */
2334 obstack_1grow_fast (&extrapool
, 0);
2335 /* Length is zero. */
2336 obstack_1grow_fast (&extrapool
, 0);
2338 /* Add alignment bytes if necessary. */
2339 while (!LOCFILE_ALIGNED_P (obstack_object_size (&extrapool
)))
2340 obstack_1grow_fast (&extrapool
, '\0');
2344 /* Add padding to the tables if necessary. */
2345 while (!LOCFILE_ALIGNED_P (obstack_object_size (&weightpool
)))
2346 obstack_1grow (&weightpool
, 0);
2348 /* Now add the four tables. */
2349 add_locale_uint32_array (&file
, (const uint32_t *) tablemb
, 256);
2350 add_locale_raw_obstack (&file
, &weightpool
);
2351 add_locale_raw_obstack (&file
, &extrapool
);
2352 add_locale_raw_obstack (&file
, &indirectpool
);
2354 /* Now the same for the wide character table. We need to store some
2355 more information here. */
2356 add_locale_empty (&file
);
2357 add_locale_empty (&file
);
2358 add_locale_empty (&file
);
2360 /* Since we are using the sign of an integer to mark indirection the
2361 offsets in the arrays we are indirectly referring to must not be
2362 zero since -0 == 0. Therefore we add a bit of dummy content. */
2363 obstack_int32_grow (&extrapool
, 0);
2364 obstack_int32_grow (&indirectpool
, 0);
2366 /* Now insert the `UNDEFINED' value if it is used. Since this value
2367 will probably be used more than once it is good to store the
2368 weights only once. */
2369 if (output_weightwc (&weightpool
, collate
, &collate
->undefined
) != 0)
2372 /* Generate the table. Walk through the lists of sequences starting
2373 with the same wide character and add them one after the other to
2374 the table. In case we have more than one sequence starting with
2375 the same byte we have to use extra indirection. */
2378 collidx_table_init (&tablewc
);
2380 atwc
.weightpool
= &weightpool
;
2381 atwc
.extrapool
= &extrapool
;
2382 atwc
.indpool
= &indirectpool
;
2383 atwc
.collate
= collate
;
2384 atwc
.tablewc
= &tablewc
;
2386 wchead_table_iterate (&collate
->wcheads
, add_to_tablewc
);
2388 memset (&atwc
, 0, sizeof (atwc
));
2390 /* Now add the four tables. */
2391 add_locale_collidx_table (&file
, &tablewc
);
2392 add_locale_raw_obstack (&file
, &weightpool
);
2393 add_locale_raw_obstack (&file
, &extrapool
);
2394 add_locale_raw_obstack (&file
, &indirectpool
);
2396 /* Finally write the table with collation element names out. It is
2397 a hash table with a simple function which gets the name of the
2398 character as the input. One character might have many names. The
2399 value associated with the name is an index into the weight table
2400 where we are then interested in the first-level weight value.
2402 To determine how large the table should be we are counting the
2403 elements have to put in. Since we are using internal chaining
2404 using a secondary hash function we have to make the table a bit
2405 larger to avoid extremely long search times. We can achieve
2406 good results with a 40% larger table than there are entries. */
2408 runp
= collate
->start
;
2409 while (runp
!= NULL
)
2411 if (runp
->mbs
!= NULL
&& runp
->weights
!= NULL
&& !runp
->is_character
)
2412 /* Yep, the element really counts. */
2417 /* Add 40% and find the next prime number. */
2418 elem_size
= next_prime (elem_size
* 1.4);
2420 /* Allocate the table. Each entry consists of two words: the hash
2421 value and an index in a secondary table which provides the index
2422 into the weight table and the string itself (so that a match can
2424 elem_table
= (uint32_t *) obstack_alloc (&extrapool
,
2425 elem_size
* 2 * sizeof (uint32_t));
2426 memset (elem_table
, '\0', elem_size
* 2 * sizeof (uint32_t));
2428 /* Now add the elements. */
2429 runp
= collate
->start
;
2430 while (runp
!= NULL
)
2432 if (runp
->mbs
!= NULL
&& runp
->weights
!= NULL
&& !runp
->is_character
)
2434 /* Compute the hash value of the name. */
2435 uint32_t namelen
= strlen (runp
->name
);
2436 uint32_t hash
= elem_hash (runp
->name
, namelen
);
2437 size_t idx
= hash
% elem_size
;
2439 size_t start_idx
= idx
;
2442 if (elem_table
[idx
* 2] != 0)
2444 /* The spot is already taken. Try iterating using the value
2445 from the secondary hashing function. */
2446 size_t iter
= hash
% (elem_size
- 2) + 1;
2451 if (idx
>= elem_size
)
2453 assert (idx
!= start_idx
);
2455 while (elem_table
[idx
* 2] != 0);
2457 /* This is the spot where we will insert the value. */
2458 elem_table
[idx
* 2] = hash
;
2459 elem_table
[idx
* 2 + 1] = obstack_object_size (&extrapool
);
2461 /* The string itself including length. */
2462 obstack_1grow (&extrapool
, namelen
);
2463 obstack_grow (&extrapool
, runp
->name
, namelen
);
2465 /* And the multibyte representation. */
2466 obstack_1grow (&extrapool
, runp
->nmbs
);
2467 obstack_grow (&extrapool
, runp
->mbs
, runp
->nmbs
);
2469 /* And align again to 32 bits. */
2470 if ((1 + namelen
+ 1 + runp
->nmbs
) % sizeof (int32_t) != 0)
2471 obstack_grow (&extrapool
, "\0\0",
2473 - ((1 + namelen
+ 1 + runp
->nmbs
)
2474 % sizeof (int32_t))));
2476 /* Now some 32-bit values: multibyte collation sequence,
2477 wide char string (including length), and wide char
2478 collation sequence. */
2479 obstack_int32_grow (&extrapool
, runp
->mbseqorder
);
2481 obstack_int32_grow (&extrapool
, runp
->nwcs
);
2482 obstack_grow (&extrapool
, runp
->wcs
,
2483 runp
->nwcs
* sizeof (uint32_t));
2484 maybe_swap_uint32_obstack (&extrapool
, runp
->nwcs
);
2486 obstack_int32_grow (&extrapool
, runp
->wcseqorder
);
2492 /* Prepare to write out this data. */
2493 add_locale_uint32 (&file
, elem_size
);
2494 add_locale_uint32_array (&file
, elem_table
, 2 * elem_size
);
2495 add_locale_raw_obstack (&file
, &extrapool
);
2496 add_locale_raw_data (&file
, collate
->mbseqorder
, 256);
2497 add_locale_collseq_table (&file
, &collate
->wcseqorder
);
2498 add_locale_string (&file
, charmap
->code_set_name
);
2499 if (strcmp (charmap
->code_set_name
, "UTF-8") == 0)
2500 add_locale_uint32 (&file
, __cet_utf8
);
2501 else if (charmap
->mb_cur_max
== 1)
2502 add_locale_uint32 (&file
, __cet_8bit
);
2504 add_locale_uint32 (&file
, __cet_other
);
2505 write_locale_data (output_path
, LC_COLLATE
, "LC_COLLATE", &file
);
2507 obstack_free (&weightpool
, NULL
);
2508 obstack_free (&extrapool
, NULL
);
2509 obstack_free (&indirectpool
, NULL
);
2514 skip_to (struct linereader
*ldfile
, struct locale_collate_t
*collate
,
2515 const struct charmap_t
*charmap
, int to_endif
)
2519 struct token
*now
= lr_token (ldfile
, charmap
, NULL
, NULL
, 0);
2520 enum token_t nowtok
= now
->tok
;
2522 if (nowtok
== tok_eof
|| nowtok
== tok_end
)
2525 if (nowtok
== tok_ifdef
|| nowtok
== tok_ifndef
)
2527 lr_error (ldfile
, _("%s: nested conditionals not supported"),
2529 nowtok
= skip_to (ldfile
, collate
, charmap
, tok_endif
);
2530 if (nowtok
== tok_eof
|| nowtok
== tok_end
)
2533 else if (nowtok
== tok_endif
|| (!to_endif
&& nowtok
== tok_else
))
2535 lr_ignore_rest (ldfile
, 1);
2538 else if (!to_endif
&& (nowtok
== tok_elifdef
|| nowtok
== tok_elifndef
))
2540 /* Do not read the rest of the line. */
2543 else if (nowtok
== tok_else
)
2545 lr_error (ldfile
, _("%s: more than one 'else'"), "LC_COLLATE");
2548 lr_ignore_rest (ldfile
, 0);
2554 collate_read (struct linereader
*ldfile
, struct localedef_t
*result
,
2555 const struct charmap_t
*charmap
, const char *repertoire_name
,
2558 struct repertoire_t
*repertoire
= NULL
;
2559 struct locale_collate_t
*collate
;
2561 struct token
*arg
= NULL
;
2562 enum token_t nowtok
;
2563 enum token_t was_ellipsis
= tok_none
;
2564 struct localedef_t
*copy_locale
= NULL
;
2567 1 - between `order-start' and `order-end'
2568 2 - after `order-end'
2569 3 - after `reorder-after', waiting for `reorder-end'
2570 4 - after `reorder-end'
2571 5 - after `reorder-sections-after', waiting for `reorder-sections-end'
2572 6 - after `reorder-sections-end'
2576 /* Get the repertoire we have to use. */
2577 if (repertoire_name
!= NULL
)
2578 repertoire
= repertoire_read (repertoire_name
);
2580 /* The rest of the line containing `LC_COLLATE' must be free. */
2581 lr_ignore_rest (ldfile
, 1);
2587 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2590 while (nowtok
== tok_eol
);
2592 if (nowtok
!= tok_define
)
2596 lr_ignore_rest (ldfile
, 0);
2599 arg
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2600 if (arg
->tok
!= tok_ident
)
2601 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2604 /* Simply add the new symbol. */
2605 struct name_list
*newsym
= xmalloc (sizeof (*newsym
)
2606 + arg
->val
.str
.lenmb
+ 1);
2607 memcpy (newsym
->str
, arg
->val
.str
.startmb
, arg
->val
.str
.lenmb
);
2608 newsym
->str
[arg
->val
.str
.lenmb
] = '\0';
2609 newsym
->next
= defined
;
2612 lr_ignore_rest (ldfile
, 1);
2617 if (nowtok
== tok_copy
)
2619 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2620 if (now
->tok
!= tok_string
)
2622 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2626 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2627 while (now
->tok
!= tok_eof
&& now
->tok
!= tok_end
);
2629 if (now
->tok
!= tok_eof
2630 || (now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
),
2631 now
->tok
== tok_eof
))
2632 lr_error (ldfile
, _("%s: premature end of file"), "LC_COLLATE");
2633 else if (now
->tok
!= tok_lc_collate
)
2635 lr_error (ldfile
, _("\
2636 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
2637 lr_ignore_rest (ldfile
, 0);
2640 lr_ignore_rest (ldfile
, 1);
2645 if (! ignore_content
)
2647 /* Get the locale definition. */
2648 copy_locale
= load_locale (LC_COLLATE
, now
->val
.str
.startmb
,
2649 repertoire_name
, charmap
, NULL
);
2650 if ((copy_locale
->avail
& COLLATE_LOCALE
) == 0)
2652 /* Not yet loaded. So do it now. */
2653 if (locfile_read (copy_locale
, charmap
) != 0)
2657 if (copy_locale
->categories
[LC_COLLATE
].collate
== NULL
)
2661 lr_ignore_rest (ldfile
, 1);
2663 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2667 /* Prepare the data structures. */
2668 collate_startup (ldfile
, result
, copy_locale
, ignore_content
);
2669 collate
= result
->categories
[LC_COLLATE
].collate
;
2677 /* Of course we don't proceed beyond the end of file. */
2678 if (nowtok
== tok_eof
)
2681 /* Ingore empty lines. */
2682 if (nowtok
== tok_eol
)
2684 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2692 /* Allow copying other locales. */
2693 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2694 if (now
->tok
!= tok_string
)
2697 if (! ignore_content
)
2698 load_locale (LC_COLLATE
, now
->val
.str
.startmb
, repertoire_name
,
2701 lr_ignore_rest (ldfile
, 1);
2704 case tok_coll_weight_max
:
2705 /* Ignore the rest of the line if we don't need the input of
2709 lr_ignore_rest (ldfile
, 0);
2716 arg
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2717 if (arg
->tok
!= tok_number
)
2719 if (collate
->col_weight_max
!= -1)
2720 lr_error (ldfile
, _("%s: duplicate definition of `%s'"),
2721 "LC_COLLATE", "col_weight_max");
2723 collate
->col_weight_max
= arg
->val
.num
;
2724 lr_ignore_rest (ldfile
, 1);
2727 case tok_section_symbol
:
2728 /* Ignore the rest of the line if we don't need the input of
2732 lr_ignore_rest (ldfile
, 0);
2739 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2740 if (arg
->tok
!= tok_bsymbol
)
2742 else if (!ignore_content
)
2744 /* Check whether this section is already known. */
2745 struct section_list
*known
= collate
->sections
;
2746 while (known
!= NULL
)
2748 if (strcmp (known
->name
, arg
->val
.str
.startmb
) == 0)
2750 known
= known
->next
;
2756 _("%s: duplicate declaration of section `%s'"),
2757 "LC_COLLATE", arg
->val
.str
.startmb
);
2758 free (arg
->val
.str
.startmb
);
2761 collate
->sections
= make_seclist_elem (collate
,
2762 arg
->val
.str
.startmb
,
2765 lr_ignore_rest (ldfile
, known
== NULL
);
2769 free (arg
->val
.str
.startmb
);
2770 lr_ignore_rest (ldfile
, 0);
2774 case tok_collating_element
:
2775 /* Ignore the rest of the line if we don't need the input of
2779 lr_ignore_rest (ldfile
, 0);
2783 if (state
!= 0 && state
!= 2)
2786 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2787 if (arg
->tok
!= tok_bsymbol
)
2791 const char *symbol
= arg
->val
.str
.startmb
;
2792 size_t symbol_len
= arg
->val
.str
.lenmb
;
2794 /* Next the `from' keyword. */
2795 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2796 if (arg
->tok
!= tok_from
)
2798 free ((char *) symbol
);
2802 ldfile
->return_widestr
= 1;
2803 ldfile
->translate_strings
= 1;
2805 /* Finally the string with the replacement. */
2806 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2808 ldfile
->return_widestr
= 0;
2809 ldfile
->translate_strings
= 0;
2811 if (arg
->tok
!= tok_string
)
2814 if (!ignore_content
&& symbol
!= NULL
)
2816 /* The name is already defined. */
2817 if (check_duplicate (ldfile
, collate
, charmap
,
2818 repertoire
, symbol
, symbol_len
))
2821 if (arg
->val
.str
.startmb
!= NULL
)
2822 insert_entry (&collate
->elem_table
, symbol
, symbol_len
,
2823 new_element (collate
,
2824 arg
->val
.str
.startmb
,
2825 arg
->val
.str
.lenmb
- 1,
2826 arg
->val
.str
.startwc
,
2827 symbol
, symbol_len
, 0));
2832 free ((char *) symbol
);
2833 free (arg
->val
.str
.startmb
);
2834 free (arg
->val
.str
.startwc
);
2836 lr_ignore_rest (ldfile
, 1);
2840 case tok_collating_symbol
:
2841 /* Ignore the rest of the line if we don't need the input of
2845 lr_ignore_rest (ldfile
, 0);
2849 if (state
!= 0 && state
!= 2)
2852 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2853 if (arg
->tok
!= tok_bsymbol
)
2857 char *symbol
= arg
->val
.str
.startmb
;
2858 size_t symbol_len
= arg
->val
.str
.lenmb
;
2859 char *endsymbol
= NULL
;
2860 size_t endsymbol_len
= 0;
2861 enum token_t ellipsis
= tok_none
;
2863 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2864 if (arg
->tok
== tok_ellipsis2
|| arg
->tok
== tok_ellipsis4
)
2866 ellipsis
= arg
->tok
;
2868 arg
= lr_token (ldfile
, charmap
, result
, repertoire
,
2870 if (arg
->tok
!= tok_bsymbol
)
2876 endsymbol
= arg
->val
.str
.startmb
;
2877 endsymbol_len
= arg
->val
.str
.lenmb
;
2879 lr_ignore_rest (ldfile
, 1);
2881 else if (arg
->tok
!= tok_eol
)
2887 if (!ignore_content
)
2890 || (ellipsis
!= tok_none
&& endsymbol
== NULL
))
2892 lr_error (ldfile
, _("\
2893 %s: unknown character in collating symbol name"),
2897 else if (ellipsis
== tok_none
)
2899 /* A single symbol, no ellipsis. */
2900 if (check_duplicate (ldfile
, collate
, charmap
,
2901 repertoire
, symbol
, symbol_len
))
2902 /* The name is already defined. */
2905 insert_entry (&collate
->sym_table
, symbol
, symbol_len
,
2906 new_symbol (collate
, symbol
, symbol_len
));
2908 else if (symbol_len
!= endsymbol_len
)
2912 _("invalid names for character range"));
2917 /* Oh my, we have to handle an ellipsis. First, as
2918 usual, determine the common prefix and then
2919 convert the rest into a range. */
2921 unsigned long int from
;
2922 unsigned long int to
;
2925 for (prefixlen
= 0; prefixlen
< symbol_len
; ++prefixlen
)
2926 if (symbol
[prefixlen
] != endsymbol
[prefixlen
])
2929 /* Convert the rest into numbers. */
2930 symbol
[symbol_len
] = '\0';
2931 from
= strtoul (&symbol
[prefixlen
], &endp
,
2932 ellipsis
== tok_ellipsis2
? 16 : 10);
2934 goto col_sym_inv_range
;
2936 endsymbol
[symbol_len
] = '\0';
2937 to
= strtoul (&endsymbol
[prefixlen
], &endp
,
2938 ellipsis
== tok_ellipsis2
? 16 : 10);
2940 goto col_sym_inv_range
;
2943 goto col_sym_inv_range
;
2945 /* Now loop over all entries. */
2950 symbuf
= (char *) obstack_alloc (&collate
->mempool
,
2953 /* Create the name. */
2955 ellipsis
== tok_ellipsis2
2956 ? "%.*s%.*lX" : "%.*s%.*lu",
2957 (int) prefixlen
, symbol
,
2958 (int) (symbol_len
- prefixlen
), from
);
2960 if (check_duplicate (ldfile
, collate
, charmap
,
2961 repertoire
, symbuf
, symbol_len
))
2962 /* The name is already defined. */
2965 insert_entry (&collate
->sym_table
, symbuf
,
2967 new_symbol (collate
, symbuf
,
2970 /* Increment the counter. */
2986 case tok_symbol_equivalence
:
2987 /* Ignore the rest of the line if we don't need the input of
2991 lr_ignore_rest (ldfile
, 0);
2998 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2999 if (arg
->tok
!= tok_bsymbol
)
3003 const char *newname
= arg
->val
.str
.startmb
;
3004 size_t newname_len
= arg
->val
.str
.lenmb
;
3005 const char *symname
;
3007 void *symval
; /* Actually struct symbol_t* */
3009 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
3010 if (arg
->tok
!= tok_bsymbol
)
3012 free ((char *) newname
);
3016 symname
= arg
->val
.str
.startmb
;
3017 symname_len
= arg
->val
.str
.lenmb
;
3019 if (newname
== NULL
)
3021 lr_error (ldfile
, _("\
3022 %s: unknown character in equivalent definition name"),
3026 free ((char *) newname
);
3027 free ((char *) symname
);
3030 if (symname
== NULL
)
3032 lr_error (ldfile
, _("\
3033 %s: unknown character in equivalent definition value"),
3035 goto sym_equiv_free
;
3038 /* See whether the symbol name is already defined. */
3039 if (find_entry (&collate
->sym_table
, symname
, symname_len
,
3042 lr_error (ldfile
, _("\
3043 %s: unknown symbol `%s' in equivalent definition"),
3044 "LC_COLLATE", symname
);
3045 goto sym_equiv_free
;
3048 if (insert_entry (&collate
->sym_table
,
3049 newname
, newname_len
, symval
) < 0)
3051 lr_error (ldfile
, _("\
3052 error while adding equivalent collating symbol"));
3053 goto sym_equiv_free
;
3056 free ((char *) symname
);
3058 lr_ignore_rest (ldfile
, 1);
3062 /* Ignore the rest of the line if we don't need the input of
3066 lr_ignore_rest (ldfile
, 0);
3070 /* We get told about the scripts we know. */
3071 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
3072 if (arg
->tok
!= tok_bsymbol
)
3076 struct section_list
*runp
= collate
->known_sections
;
3079 while (runp
!= NULL
)
3080 if (strncmp (runp
->name
, arg
->val
.str
.startmb
,
3081 arg
->val
.str
.lenmb
) == 0
3082 && runp
->name
[arg
->val
.str
.lenmb
] == '\0')
3085 runp
= runp
->def_next
;
3089 lr_error (ldfile
, _("duplicate definition of script `%s'"),
3091 lr_ignore_rest (ldfile
, 0);
3095 runp
= (struct section_list
*) xcalloc (1, sizeof (*runp
));
3096 name
= (char *) xmalloc (arg
->val
.str
.lenmb
+ 1);
3097 memcpy (name
, arg
->val
.str
.startmb
, arg
->val
.str
.lenmb
);
3098 name
[arg
->val
.str
.lenmb
] = '\0';
3101 runp
->def_next
= collate
->known_sections
;
3102 collate
->known_sections
= runp
;
3104 lr_ignore_rest (ldfile
, 1);
3107 case tok_order_start
:
3108 /* Ignore the rest of the line if we don't need the input of
3112 lr_ignore_rest (ldfile
, 0);
3116 if (state
!= 0 && state
!= 1 && state
!= 2)
3120 /* The 14652 draft does not specify whether all `order_start' lines
3121 must contain the same number of sort-rules, but 14651 does. So
3122 we require this here as well. */
3123 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
3124 if (arg
->tok
== tok_bsymbol
)
3126 /* This better should be a section name. */
3127 struct section_list
*sp
= collate
->known_sections
;
3129 && (sp
->name
== NULL
3130 || strncmp (sp
->name
, arg
->val
.str
.startmb
,
3131 arg
->val
.str
.lenmb
) != 0
3132 || sp
->name
[arg
->val
.str
.lenmb
] != '\0'))
3137 lr_error (ldfile
, _("\
3138 %s: unknown section name `%.*s'"),
3139 "LC_COLLATE", (int) arg
->val
.str
.lenmb
,
3140 arg
->val
.str
.startmb
);
3141 /* We use the error section. */
3142 collate
->current_section
= &collate
->error_section
;
3144 if (collate
->error_section
.first
== NULL
)
3146 /* Insert &collate->error_section at the end of
3147 the collate->sections list. */
3148 if (collate
->sections
== NULL
)
3149 collate
->sections
= &collate
->error_section
;
3152 sp
= collate
->sections
;
3153 while (sp
->next
!= NULL
)
3156 sp
->next
= &collate
->error_section
;
3158 collate
->error_section
.next
= NULL
;
3163 /* One should not be allowed to open the same
3165 if (sp
->first
!= NULL
)
3166 lr_error (ldfile
, _("\
3167 %s: multiple order definitions for section `%s'"),
3168 "LC_COLLATE", sp
->name
);
3171 /* Insert sp in the collate->sections list,
3172 right after collate->current_section. */
3173 if (collate
->current_section
!= NULL
)
3175 sp
->next
= collate
->current_section
->next
;
3176 collate
->current_section
->next
= sp
;
3178 else if (collate
->sections
== NULL
)
3179 /* This is the first section to be defined. */
3180 collate
->sections
= sp
;
3182 collate
->current_section
= sp
;
3185 /* Next should come the end of the line or a semicolon. */
3186 arg
= lr_token (ldfile
, charmap
, result
, repertoire
,
3188 if (arg
->tok
== tok_eol
)
3192 /* This means we have exactly one rule: `forward'. */
3194 lr_error (ldfile
, _("\
3195 %s: invalid number of sorting rules"),
3199 sp
->rules
= obstack_alloc (&collate
->mempool
,
3200 (sizeof (enum coll_sort_rule
)
3202 for (cnt
= 0; cnt
< nrules
; ++cnt
)
3203 sp
->rules
[cnt
] = sort_forward
;
3209 /* Get the next token. */
3210 arg
= lr_token (ldfile
, charmap
, result
, repertoire
,
3216 /* There is no section symbol. Therefore we use the unnamed
3218 collate
->current_section
= &collate
->unnamed_section
;
3220 if (collate
->unnamed_section_defined
)
3221 lr_error (ldfile
, _("\
3222 %s: multiple order definitions for unnamed section"),
3226 /* Insert &collate->unnamed_section at the beginning of
3227 the collate->sections list. */
3228 collate
->unnamed_section
.next
= collate
->sections
;
3229 collate
->sections
= &collate
->unnamed_section
;
3230 collate
->unnamed_section_defined
= true;
3234 /* Now read the direction names. */
3235 read_directions (ldfile
, arg
, charmap
, repertoire
, result
);
3237 /* From now we need the strings untranslated. */
3238 ldfile
->translate_strings
= 0;
3242 /* Ignore the rest of the line if we don't need the input of
3246 lr_ignore_rest (ldfile
, 0);
3253 /* Handle ellipsis at end of list. */
3254 if (was_ellipsis
!= tok_none
)
3256 handle_ellipsis (ldfile
, NULL
, 0, was_ellipsis
, charmap
,
3257 repertoire
, result
);
3258 was_ellipsis
= tok_none
;
3262 lr_ignore_rest (ldfile
, 1);
3265 case tok_reorder_after
:
3266 /* Ignore the rest of the line if we don't need the input of
3270 lr_ignore_rest (ldfile
, 0);
3276 lr_error (ldfile
, _("%s: missing `order_end' keyword"),
3280 /* Handle ellipsis at end of list. */
3281 if (was_ellipsis
!= tok_none
)
3283 handle_ellipsis (ldfile
, arg
->val
.str
.startmb
,
3284 arg
->val
.str
.lenmb
, was_ellipsis
, charmap
,
3285 repertoire
, result
);
3286 was_ellipsis
= tok_none
;
3289 else if (state
== 0 && copy_locale
== NULL
)
3291 else if (state
!= 0 && state
!= 2 && state
!= 3)
3295 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
3296 if (arg
->tok
== tok_bsymbol
|| arg
->tok
== tok_ucs4
)
3298 /* Find this symbol in the sequence table. */
3302 struct element_t
*insp
;
3306 if (arg
->tok
== tok_bsymbol
)
3308 startmb
= arg
->val
.str
.startmb
;
3309 lenmb
= arg
->val
.str
.lenmb
;
3313 sprintf (ucsbuf
, "U%08X", arg
->val
.ucs4
);
3318 if (find_entry (&collate
->seq_table
, startmb
, lenmb
, &ptr
) == 0)
3319 /* Yes, the symbol exists. Simply point the cursor
3321 collate
->cursor
= (struct element_t
*) ptr
;
3324 struct symbol_t
*symbp
;
3327 if (find_entry (&collate
->sym_table
, startmb
, lenmb
,
3332 if (symbp
->order
->last
!= NULL
3333 || symbp
->order
->next
!= NULL
)
3334 collate
->cursor
= symbp
->order
;
3337 /* This is a collating symbol but its position
3338 is not yet defined. */
3339 lr_error (ldfile
, _("\
3340 %s: order for collating symbol %.*s not yet defined"),
3341 "LC_COLLATE", (int) lenmb
, startmb
);
3342 collate
->cursor
= NULL
;
3346 else if (find_entry (&collate
->elem_table
, startmb
, lenmb
,
3349 insp
= (struct element_t
*) ptr
;
3351 if (insp
->last
!= NULL
|| insp
->next
!= NULL
)
3352 collate
->cursor
= insp
;
3355 /* This is a collating element but its position
3356 is not yet defined. */
3357 lr_error (ldfile
, _("\
3358 %s: order for collating element %.*s not yet defined"),
3359 "LC_COLLATE", (int) lenmb
, startmb
);
3360 collate
->cursor
= NULL
;
3366 /* This is bad. The symbol after which we have to
3367 insert does not exist. */
3368 lr_error (ldfile
, _("\
3369 %s: cannot reorder after %.*s: symbol not known"),
3370 "LC_COLLATE", (int) lenmb
, startmb
);
3371 collate
->cursor
= NULL
;
3376 lr_ignore_rest (ldfile
, no_error
);
3379 /* This must not happen. */
3383 case tok_reorder_end
:
3384 /* Ignore the rest of the line if we don't need the input of
3392 lr_ignore_rest (ldfile
, 1);
3395 case tok_reorder_sections_after
:
3396 /* Ignore the rest of the line if we don't need the input of
3400 lr_ignore_rest (ldfile
, 0);
3406 lr_error (ldfile
, _("%s: missing `order_end' keyword"),
3410 /* Handle ellipsis at end of list. */
3411 if (was_ellipsis
!= tok_none
)
3413 handle_ellipsis (ldfile
, NULL
, 0, was_ellipsis
, charmap
,
3414 repertoire
, result
);
3415 was_ellipsis
= tok_none
;
3418 else if (state
== 3)
3420 WITH_CUR_LOCALE (error (0, 0, _("\
3421 %s: missing `reorder-end' keyword"), "LC_COLLATE"));
3424 else if (state
!= 2 && state
!= 4)
3428 /* Get the name of the sections we are adding after. */
3429 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
3430 if (arg
->tok
== tok_bsymbol
)
3432 /* Now find a section with this name. */
3433 struct section_list
*runp
= collate
->sections
;
3435 while (runp
!= NULL
)
3437 if (runp
->name
!= NULL
3438 && strlen (runp
->name
) == arg
->val
.str
.lenmb
3439 && memcmp (runp
->name
, arg
->val
.str
.startmb
,
3440 arg
->val
.str
.lenmb
) == 0)
3447 collate
->current_section
= runp
;
3450 /* This is bad. The section after which we have to
3451 reorder does not exist. Therefore we cannot
3452 process the whole rest of this reorder
3454 lr_error (ldfile
, _("%s: section `%.*s' not known"),
3455 "LC_COLLATE", (int) arg
->val
.str
.lenmb
,
3456 arg
->val
.str
.startmb
);
3460 lr_ignore_rest (ldfile
, 0);
3462 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
3464 while (now
->tok
== tok_reorder_sections_after
3465 || now
->tok
== tok_reorder_sections_end
3466 || now
->tok
== tok_end
);
3468 /* Process the token we just saw. */
3474 /* This must not happen. */
3478 case tok_reorder_sections_end
:
3479 /* Ignore the rest of the line if we don't need the input of
3487 lr_ignore_rest (ldfile
, 1);
3492 /* Ignore the rest of the line if we don't need the input of
3496 lr_ignore_rest (ldfile
, 0);
3500 if (state
!= 0 && state
!= 1 && state
!= 3 && state
!= 5)
3503 if ((state
== 0 || state
== 5) && nowtok
== tok_ucs4
)
3506 if (nowtok
== tok_ucs4
)
3508 snprintf (ucs4buf
, sizeof (ucs4buf
), "U%08X", now
->val
.ucs4
);
3512 else if (arg
!= NULL
)
3514 symstr
= arg
->val
.str
.startmb
;
3515 symlen
= arg
->val
.str
.lenmb
;
3519 lr_error (ldfile
, _("%s: bad symbol <%.*s>"), "LC_COLLATE",
3520 (int) ldfile
->token
.val
.str
.lenmb
,
3521 ldfile
->token
.val
.str
.startmb
);
3525 struct element_t
*seqp
;
3528 /* We are outside an `order_start' region. This means
3529 we must only accept definitions of values for
3530 collation symbols since these are purely abstract
3531 values and don't need directions associated. */
3534 if (find_entry (&collate
->seq_table
, symstr
, symlen
, &ptr
) == 0)
3538 /* It's already defined. First check whether this
3539 is really a collating symbol. */
3540 if (seqp
->is_character
)
3549 if (find_entry (&collate
->sym_table
, symstr
, symlen
,
3551 /* No collating symbol, it's an error. */
3554 /* Maybe this is the first time we define a symbol
3555 value and it is before the first actual section. */
3556 if (collate
->sections
== NULL
)
3557 collate
->sections
= collate
->current_section
=
3558 &collate
->symbol_section
;
3561 if (was_ellipsis
!= tok_none
)
3563 handle_ellipsis (ldfile
, symstr
, symlen
, was_ellipsis
,
3564 charmap
, repertoire
, result
);
3566 /* Remember that we processed the ellipsis. */
3567 was_ellipsis
= tok_none
;
3569 /* And don't add the value a second time. */
3573 else if (state
== 3)
3575 /* It is possible that we already have this collation sequence.
3576 In this case we move the entry. */
3580 /* If the symbol after which we have to insert was not found
3581 ignore all entries. */
3582 if (collate
->cursor
== NULL
)
3584 lr_ignore_rest (ldfile
, 0);
3588 if (find_entry (&collate
->seq_table
, symstr
, symlen
, &ptr
) == 0)
3590 seqp
= (struct element_t
*) ptr
;
3594 if (find_entry (&collate
->sym_table
, symstr
, symlen
, &sym
) == 0
3595 && (seqp
= ((struct symbol_t
*) sym
)->order
) != NULL
)
3598 if (find_entry (&collate
->elem_table
, symstr
, symlen
, &ptr
) == 0
3599 && (seqp
= (struct element_t
*) ptr
,
3600 seqp
->last
!= NULL
|| seqp
->next
!= NULL
3601 || (collate
->start
!= NULL
&& seqp
== collate
->start
)))
3604 /* Remove the entry from the old position. */
3605 if (seqp
->last
== NULL
)
3606 collate
->start
= seqp
->next
;
3608 seqp
->last
->next
= seqp
->next
;
3609 if (seqp
->next
!= NULL
)
3610 seqp
->next
->last
= seqp
->last
;
3612 /* We also have to check whether this entry is the
3613 first or last of a section. */
3614 if (seqp
->section
->first
== seqp
)
3616 if (seqp
->section
->first
== seqp
->section
->last
)
3617 /* This section has no content anymore. */
3618 seqp
->section
->first
= seqp
->section
->last
= NULL
;
3620 seqp
->section
->first
= seqp
->next
;
3622 else if (seqp
->section
->last
== seqp
)
3623 seqp
->section
->last
= seqp
->last
;
3625 /* Now insert it in the new place. */
3626 insert_weights (ldfile
, seqp
, charmap
, repertoire
, result
,
3631 /* Otherwise we just add a new entry. */
3633 else if (state
== 5)
3635 /* We are reordering sections. Find the named section. */
3636 struct section_list
*runp
= collate
->sections
;
3637 struct section_list
*prevp
= NULL
;
3639 while (runp
!= NULL
)
3641 if (runp
->name
!= NULL
3642 && strlen (runp
->name
) == symlen
3643 && memcmp (runp
->name
, symstr
, symlen
) == 0)
3652 lr_error (ldfile
, _("%s: section `%.*s' not known"),
3653 "LC_COLLATE", (int) symlen
, symstr
);
3654 lr_ignore_rest (ldfile
, 0);
3658 if (runp
!= collate
->current_section
)
3660 /* Remove the named section from the old place and
3661 insert it in the new one. */
3662 prevp
->next
= runp
->next
;
3664 runp
->next
= collate
->current_section
->next
;
3665 collate
->current_section
->next
= runp
;
3666 collate
->current_section
= runp
;
3669 /* Process the rest of the line which might change
3670 the collation rules. */
3671 arg
= lr_token (ldfile
, charmap
, result
, repertoire
,
3673 if (arg
->tok
!= tok_eof
&& arg
->tok
!= tok_eol
)
3674 read_directions (ldfile
, arg
, charmap
, repertoire
,
3679 else if (was_ellipsis
!= tok_none
)
3681 /* Using the information in the `ellipsis_weight'
3682 element and this and the last value we have to handle
3683 the ellipsis now. */
3684 assert (state
== 1);
3686 handle_ellipsis (ldfile
, symstr
, symlen
, was_ellipsis
, charmap
,
3687 repertoire
, result
);
3689 /* Remember that we processed the ellipsis. */
3690 was_ellipsis
= tok_none
;
3692 /* And don't add the value a second time. */
3696 /* Now insert in the new place. */
3697 insert_value (ldfile
, symstr
, symlen
, charmap
, repertoire
, result
);
3701 /* Ignore the rest of the line if we don't need the input of
3705 lr_ignore_rest (ldfile
, 0);
3712 if (was_ellipsis
!= tok_none
)
3715 _("%s: cannot have `%s' as end of ellipsis range"),
3716 "LC_COLLATE", "UNDEFINED");
3718 unlink_element (collate
);
3719 was_ellipsis
= tok_none
;
3722 /* See whether UNDEFINED already appeared somewhere. */
3723 if (collate
->undefined
.next
!= NULL
3724 || &collate
->undefined
== collate
->cursor
)
3727 _("%s: order for `%.*s' already defined at %s:%Zu"),
3728 "LC_COLLATE", 9, "UNDEFINED",
3729 collate
->undefined
.file
,
3730 collate
->undefined
.line
);
3731 lr_ignore_rest (ldfile
, 0);
3734 /* Parse the weights. */
3735 insert_weights (ldfile
, &collate
->undefined
, charmap
,
3736 repertoire
, result
, tok_none
);
3739 case tok_ellipsis2
: /* symbolic hexadecimal ellipsis */
3740 case tok_ellipsis3
: /* absolute ellipsis */
3741 case tok_ellipsis4
: /* symbolic decimal ellipsis */
3742 /* This is the symbolic (decimal or hexadecimal) or absolute
3744 if (was_ellipsis
!= tok_none
)
3747 if (state
!= 0 && state
!= 1 && state
!= 3)
3750 was_ellipsis
= nowtok
;
3752 insert_weights (ldfile
, &collate
->ellipsis_weight
, charmap
,
3753 repertoire
, result
, nowtok
);
3758 /* Next we assume `LC_COLLATE'. */
3759 if (!ignore_content
)
3761 if (state
== 0 && copy_locale
== NULL
)
3762 /* We must either see a copy statement or have
3765 _("%s: empty category description not allowed"),
3767 else if (state
== 1)
3769 lr_error (ldfile
, _("%s: missing `order_end' keyword"),
3772 /* Handle ellipsis at end of list. */
3773 if (was_ellipsis
!= tok_none
)
3775 handle_ellipsis (ldfile
, NULL
, 0, was_ellipsis
, charmap
,
3776 repertoire
, result
);
3777 was_ellipsis
= tok_none
;
3780 else if (state
== 3)
3781 WITH_CUR_LOCALE (error (0, 0, _("\
3782 %s: missing `reorder-end' keyword"), "LC_COLLATE"));
3783 else if (state
== 5)
3784 WITH_CUR_LOCALE (error (0, 0, _("\
3785 %s: missing `reorder-sections-end' keyword"), "LC_COLLATE"));
3787 arg
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
3788 if (arg
->tok
== tok_eof
)
3790 if (arg
->tok
== tok_eol
)
3791 lr_error (ldfile
, _("%s: incomplete `END' line"), "LC_COLLATE");
3792 else if (arg
->tok
!= tok_lc_collate
)
3793 lr_error (ldfile
, _("\
3794 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
3795 lr_ignore_rest (ldfile
, arg
->tok
== tok_lc_collate
);
3801 lr_ignore_rest (ldfile
, 0);
3805 arg
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
3806 if (arg
->tok
!= tok_ident
)
3809 /* Simply add the new symbol. */
3810 struct name_list
*newsym
= xmalloc (sizeof (*newsym
)
3811 + arg
->val
.str
.lenmb
+ 1);
3812 memcpy (newsym
->str
, arg
->val
.str
.startmb
, arg
->val
.str
.lenmb
);
3813 newsym
->str
[arg
->val
.str
.lenmb
] = '\0';
3814 newsym
->next
= defined
;
3817 lr_ignore_rest (ldfile
, 1);
3823 lr_ignore_rest (ldfile
, 0);
3827 arg
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
3828 if (arg
->tok
!= tok_ident
)
3831 /* Remove _all_ occurrences of the symbol from the list. */
3832 struct name_list
*prevdef
= NULL
;
3833 struct name_list
*curdef
= defined
;
3834 while (curdef
!= NULL
)
3835 if (strncmp (arg
->val
.str
.startmb
, curdef
->str
,
3836 arg
->val
.str
.lenmb
) == 0
3837 && curdef
->str
[arg
->val
.str
.lenmb
] == '\0')
3839 if (prevdef
== NULL
)
3840 defined
= curdef
->next
;
3842 prevdef
->next
= curdef
->next
;
3844 struct name_list
*olddef
= curdef
;
3845 curdef
= curdef
->next
;
3852 curdef
= curdef
->next
;
3855 lr_ignore_rest (ldfile
, 1);
3862 lr_ignore_rest (ldfile
, 0);
3867 arg
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
3868 if (arg
->tok
!= tok_ident
)
3870 lr_ignore_rest (ldfile
, 1);
3872 if (collate
->else_action
== else_none
)
3875 while (curdef
!= NULL
)
3876 if (strncmp (arg
->val
.str
.startmb
, curdef
->str
,
3877 arg
->val
.str
.lenmb
) == 0
3878 && curdef
->str
[arg
->val
.str
.lenmb
] == '\0')
3881 curdef
= curdef
->next
;
3883 if ((nowtok
== tok_ifdef
&& curdef
!= NULL
)
3884 || (nowtok
== tok_ifndef
&& curdef
== NULL
))
3886 /* We have to use the if-branch. */
3887 collate
->else_action
= else_ignore
;
3891 /* We have to use the else-branch, if there is one. */
3892 nowtok
= skip_to (ldfile
, collate
, charmap
, 0);
3893 if (nowtok
== tok_else
)
3894 collate
->else_action
= else_seen
;
3895 else if (nowtok
== tok_elifdef
)
3900 else if (nowtok
== tok_elifndef
)
3902 nowtok
= tok_ifndef
;
3905 else if (nowtok
== tok_eof
)
3907 else if (nowtok
== tok_end
)
3913 /* XXX Should it really become necessary to support nested
3914 preprocessor handling we will push the state here. */
3915 lr_error (ldfile
, _("%s: nested conditionals not supported"),
3917 nowtok
= skip_to (ldfile
, collate
, charmap
, 1);
3918 if (nowtok
== tok_eof
)
3920 else if (nowtok
== tok_end
)
3930 lr_ignore_rest (ldfile
, 0);
3934 lr_ignore_rest (ldfile
, 1);
3936 if (collate
->else_action
== else_ignore
)
3938 /* Ignore everything until the endif. */
3939 nowtok
= skip_to (ldfile
, collate
, charmap
, 1);
3940 if (nowtok
== tok_eof
)
3942 else if (nowtok
== tok_end
)
3947 assert (collate
->else_action
== else_none
);
3948 lr_error (ldfile
, _("\
3949 %s: '%s' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE",
3950 nowtok
== tok_else
? "else"
3951 : nowtok
== tok_elifdef
? "elifdef" : "elifndef");
3958 lr_ignore_rest (ldfile
, 0);
3962 lr_ignore_rest (ldfile
, 1);
3964 if (collate
->else_action
!= else_ignore
3965 && collate
->else_action
!= else_seen
)
3966 lr_error (ldfile
, _("\
3967 %s: 'endif' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE");
3969 /* XXX If we support nested preprocessor directives we pop
3971 collate
->else_action
= else_none
;
3976 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
3979 /* Prepare for the next round. */
3980 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
3985 /* When we come here we reached the end of the file. */
3986 lr_error (ldfile
, _("%s: premature end of file"), "LC_COLLATE");