1 /* Copyright (C) 1995-2003, 2005-2008, 2009 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
27 #include <sys/param.h>
29 #include "localedef.h"
31 #include "localeinfo.h"
32 #include "linereader.h"
34 #include "elem-hash.h"
36 /* Uncomment the following line in the production version. */
37 /* #define NDEBUG 1 */
40 #define obstack_chunk_alloc malloc
41 #define obstack_chunk_free free
44 __attribute ((always_inline
))
45 obstack_int32_grow (struct obstack
*obstack
, int32_t data
)
47 if (sizeof (int32_t) == sizeof (int))
48 obstack_int_grow (obstack
, data
);
50 obstack_grow (obstack
, &data
, sizeof (int32_t));
54 __attribute ((always_inline
))
55 obstack_int32_grow_fast (struct obstack
*obstack
, int32_t data
)
57 if (sizeof (int32_t) == sizeof (int))
58 obstack_int_grow_fast (obstack
, data
);
60 obstack_grow (obstack
, &data
, sizeof (int32_t));
63 /* Forward declaration. */
66 /* Data type for list of strings. */
69 /* Successor in the known_sections list. */
70 struct section_list
*def_next
;
71 /* Successor in the sections list. */
72 struct section_list
*next
;
73 /* Name of the section. */
75 /* First element of this section. */
76 struct element_t
*first
;
77 /* Last element of this section. */
78 struct element_t
*last
;
79 /* These are the rules for this section. */
80 enum coll_sort_rule
*rules
;
81 /* Index of the rule set in the appropriate section of the output file. */
89 /* Number of elements. */
95 /* Data type for collating element. */
107 /* The following is a bit mask which bits are set if this element is
108 used in the appropriate level. Interesting for the singlebyte
111 XXX The type here restricts the number of levels to 32. It could
112 be changed if necessary but I doubt this is necessary. */
113 unsigned int used_in_level
;
115 struct element_list_t
*weights
;
117 /* Nonzero if this is a real character definition. */
120 /* Order of the character in the sequence. This information will
121 be used in range expressions. */
125 /* Where does the definition come from. */
129 /* Which section does this belong to. */
130 struct section_list
*section
;
132 /* Predecessor and successor in the order list. */
133 struct element_t
*last
;
134 struct element_t
*next
;
136 /* Next element in multibyte output list. */
137 struct element_t
*mbnext
;
138 struct element_t
*mblast
;
140 /* Next element in wide character output list. */
141 struct element_t
*wcnext
;
142 struct element_t
*wclast
;
145 /* Special element value. */
146 #define ELEMENT_ELLIPSIS2 ((struct element_t *) 1)
147 #define ELEMENT_ELLIPSIS3 ((struct element_t *) 2)
148 #define ELEMENT_ELLIPSIS4 ((struct element_t *) 3)
150 /* Data type for collating symbol. */
155 /* Point to place in the order list. */
156 struct element_t
*order
;
158 /* Where does the definition come from. */
163 /* Sparse table of struct element_t *. */
164 #define TABLE wchead_table
165 #define ELEMENT struct element_t *
171 /* Sparse table of int32_t. */
172 #define TABLE collidx_table
173 #define ELEMENT int32_t
177 /* Sparse table of uint32_t. */
178 #define TABLE collseq_table
179 #define ELEMENT uint32_t
180 #define DEFAULT ~((uint32_t) 0)
184 /* Simple name list for the preprocessor. */
187 struct name_list
*next
;
192 /* The real definition of the struct for the LC_COLLATE locale. */
193 struct locale_collate_t
198 /* List of known scripts. */
199 struct section_list
*known_sections
;
200 /* List of used sections. */
201 struct section_list
*sections
;
202 /* Current section using definition. */
203 struct section_list
*current_section
;
204 /* There always can be an unnamed section. */
205 struct section_list unnamed_section
;
206 /* Flag whether the unnamed section has been defined. */
207 bool unnamed_section_defined
;
208 /* To make handling of errors easier we have another section. */
209 struct section_list error_section
;
210 /* Sometimes we are defining the values for collating symbols before
211 the first actual section. */
212 struct section_list symbol_section
;
214 /* Start of the order list. */
215 struct element_t
*start
;
217 /* The undefined element. */
218 struct element_t undefined
;
220 /* This is the cursor for `reorder_after' insertions. */
221 struct element_t
*cursor
;
223 /* This value is used when handling ellipsis. */
224 struct element_t ellipsis_weight
;
226 /* Known collating elements. */
227 hash_table elem_table
;
229 /* Known collating symbols. */
230 hash_table sym_table
;
232 /* Known collation sequences. */
233 hash_table seq_table
;
235 struct obstack mempool
;
237 /* The LC_COLLATE category is a bit special as it is sometimes possible
238 that the definitions from more than one input file contains information.
239 Therefore we keep all relevant input in a list. */
240 struct locale_collate_t
*next
;
242 /* Arrays with heads of the list for each of the leading bytes in
243 the multibyte sequences. */
244 struct element_t
*mbheads
[256];
246 /* Arrays with heads of the list for each of the leading bytes in
247 the multibyte sequences. */
248 struct wchead_table wcheads
;
250 /* The arrays with the collation sequence order. */
251 unsigned char mbseqorder
[256];
252 struct collseq_table wcseqorder
;
254 /* State of the preprocessor. */
265 /* We have a few global variables which are used for reading all
266 LC_COLLATE category descriptions in all files. */
267 static uint32_t nrules
;
269 /* List of defined preprocessor symbols. */
270 static struct name_list
*defined
;
273 /* We need UTF-8 encoding of numbers. */
275 __attribute ((always_inline
))
276 utf8_encode (char *buf
, int val
)
289 for (step
= 2; step
< 6; ++step
)
290 if ((val
& (~(uint32_t)0 << (5 * step
+ 1))) == 0)
294 *buf
= (unsigned char) (~0xff >> step
);
298 buf
[step
] = 0x80 | (val
& 0x3f);
309 static struct section_list
*
310 make_seclist_elem (struct locale_collate_t
*collate
, const char *string
,
311 struct section_list
*next
)
313 struct section_list
*newp
;
315 newp
= (struct section_list
*) obstack_alloc (&collate
->mempool
,
326 static struct element_t
*
327 new_element (struct locale_collate_t
*collate
, const char *mbs
, size_t mbslen
,
328 const uint32_t *wcs
, const char *name
, size_t namelen
,
331 struct element_t
*newp
;
333 newp
= (struct element_t
*) obstack_alloc (&collate
->mempool
,
335 newp
->name
= name
== NULL
? NULL
: obstack_copy0 (&collate
->mempool
,
339 newp
->mbs
= obstack_copy0 (&collate
->mempool
, mbs
, mbslen
);
349 size_t nwcs
= wcslen ((wchar_t *) wcs
);
351 obstack_grow (&collate
->mempool
, wcs
, nwcs
* sizeof (uint32_t));
352 obstack_grow (&collate
->mempool
, &zero
, sizeof (uint32_t));
353 newp
->wcs
= (uint32_t *) obstack_finish (&collate
->mempool
);
361 newp
->mborder
= NULL
;
363 newp
->used_in_level
= 0;
364 newp
->is_character
= is_character
;
366 /* Will be assigned later. XXX */
367 newp
->mbseqorder
= 0;
368 newp
->wcseqorder
= 0;
370 /* Will be allocated later. */
371 newp
->weights
= NULL
;
376 newp
->section
= collate
->current_section
;
391 static struct symbol_t
*
392 new_symbol (struct locale_collate_t
*collate
, const char *name
, size_t len
)
394 struct symbol_t
*newp
;
396 newp
= (struct symbol_t
*) obstack_alloc (&collate
->mempool
, sizeof (*newp
));
398 newp
->name
= obstack_copy0 (&collate
->mempool
, name
, len
);
408 /* Test whether this name is already defined somewhere. */
410 check_duplicate (struct linereader
*ldfile
, struct locale_collate_t
*collate
,
411 const struct charmap_t
*charmap
,
412 struct repertoire_t
*repertoire
, const char *symbol
,
417 if (find_entry (&charmap
->char_table
, symbol
, symbol_len
, &ignore
) == 0)
419 lr_error (ldfile
, _("`%.*s' already defined in charmap"),
420 (int) symbol_len
, symbol
);
424 if (repertoire
!= NULL
425 && (find_entry (&repertoire
->char_table
, symbol
, symbol_len
, &ignore
)
428 lr_error (ldfile
, _("`%.*s' already defined in repertoire"),
429 (int) symbol_len
, symbol
);
433 if (find_entry (&collate
->sym_table
, symbol
, symbol_len
, &ignore
) == 0)
435 lr_error (ldfile
, _("`%.*s' already defined as collating symbol"),
436 (int) symbol_len
, symbol
);
440 if (find_entry (&collate
->elem_table
, symbol
, symbol_len
, &ignore
) == 0)
442 lr_error (ldfile
, _("`%.*s' already defined as collating element"),
443 (int) symbol_len
, symbol
);
451 /* Read the direction specification. */
453 read_directions (struct linereader
*ldfile
, struct token
*arg
,
454 const struct charmap_t
*charmap
,
455 struct repertoire_t
*repertoire
, struct localedef_t
*result
)
458 int max
= nrules
?: 10;
459 enum coll_sort_rule
*rules
= calloc (max
, sizeof (*rules
));
461 struct locale_collate_t
*collate
= result
->categories
[LC_COLLATE
].collate
;
467 if (arg
->tok
== tok_forward
)
469 if (rules
[cnt
] & sort_backward
)
473 lr_error (ldfile
, _("\
474 %s: `forward' and `backward' are mutually excluding each other"),
479 else if (rules
[cnt
] & sort_forward
)
483 lr_error (ldfile
, _("\
484 %s: `%s' mentioned more than once in definition of weight %d"),
485 "LC_COLLATE", "forward", cnt
+ 1);
489 rules
[cnt
] |= sort_forward
;
493 else if (arg
->tok
== tok_backward
)
495 if (rules
[cnt
] & sort_forward
)
499 lr_error (ldfile
, _("\
500 %s: `forward' and `backward' are mutually excluding each other"),
505 else if (rules
[cnt
] & sort_backward
)
509 lr_error (ldfile
, _("\
510 %s: `%s' mentioned more than once in definition of weight %d"),
511 "LC_COLLATE", "backward", cnt
+ 1);
515 rules
[cnt
] |= sort_backward
;
519 else if (arg
->tok
== tok_position
)
521 if (rules
[cnt
] & sort_position
)
525 lr_error (ldfile
, _("\
526 %s: `%s' mentioned more than once in definition of weight %d"),
527 "LC_COLLATE", "position", cnt
+ 1);
531 rules
[cnt
] |= sort_position
;
537 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
539 if (arg
->tok
== tok_eof
|| arg
->tok
== tok_eol
|| arg
->tok
== tok_comma
540 || arg
->tok
== tok_semicolon
)
542 if (! valid
&& ! warned
)
544 lr_error (ldfile
, _("%s: syntax error"), "LC_COLLATE");
548 /* See whether we have to increment the counter. */
549 if (arg
->tok
!= tok_comma
&& rules
[cnt
] != 0)
551 /* Add the default `forward' if we have seen only `position'. */
552 if (rules
[cnt
] == sort_position
)
553 rules
[cnt
] = sort_position
| sort_forward
;
558 if (arg
->tok
== tok_eof
|| arg
->tok
== tok_eol
)
559 /* End of line or file, so we exit the loop. */
564 /* See whether we have enough room in the array. */
568 rules
= (enum coll_sort_rule
*) xrealloc (rules
,
571 memset (&rules
[cnt
], '\0', (max
- cnt
) * sizeof (*rules
));
578 /* There must not be any more rule. */
581 lr_error (ldfile
, _("\
582 %s: too many rules; first entry only had %d"),
583 "LC_COLLATE", nrules
);
587 lr_ignore_rest (ldfile
, 0);
596 lr_error (ldfile
, _("%s: syntax error"), "LC_COLLATE");
601 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
606 /* Now we know how many rules we have. */
608 rules
= (enum coll_sort_rule
*) xrealloc (rules
,
609 nrules
* sizeof (*rules
));
615 /* Not enough rules in this specification. */
617 lr_error (ldfile
, _("%s: not enough sorting rules"), "LC_COLLATE");
620 rules
[cnt
] = sort_forward
;
621 while (++cnt
< nrules
);
625 collate
->current_section
->rules
= rules
;
629 static struct element_t
*
630 find_element (struct linereader
*ldfile
, struct locale_collate_t
*collate
,
631 const char *str
, size_t len
)
635 /* Search for the entries among the collation sequences already define. */
636 if (find_entry (&collate
->seq_table
, str
, len
, &result
) != 0)
638 /* Nope, not define yet. So we see whether it is a
642 if (find_entry (&collate
->sym_table
, str
, len
, &ptr
) == 0)
644 /* It's a collation symbol. */
645 struct symbol_t
*sym
= (struct symbol_t
*) ptr
;
649 result
= sym
->order
= new_element (collate
, NULL
, 0, NULL
,
652 else if (find_entry (&collate
->elem_table
, str
, len
, &result
) != 0)
654 /* It's also no collation element. So it is a character
655 element defined later. */
656 result
= new_element (collate
, NULL
, 0, NULL
, str
, len
, 1);
657 /* Insert it into the sequence table. */
658 insert_entry (&collate
->seq_table
, str
, len
, result
);
662 return (struct element_t
*) result
;
667 unlink_element (struct locale_collate_t
*collate
)
669 if (collate
->cursor
== collate
->start
)
671 assert (collate
->cursor
->next
== NULL
);
672 assert (collate
->cursor
->last
== NULL
);
673 collate
->cursor
= NULL
;
677 if (collate
->cursor
->next
!= NULL
)
678 collate
->cursor
->next
->last
= collate
->cursor
->last
;
679 if (collate
->cursor
->last
!= NULL
)
680 collate
->cursor
->last
->next
= collate
->cursor
->next
;
681 collate
->cursor
= collate
->cursor
->last
;
687 insert_weights (struct linereader
*ldfile
, struct element_t
*elem
,
688 const struct charmap_t
*charmap
,
689 struct repertoire_t
*repertoire
, struct localedef_t
*result
,
690 enum token_t ellipsis
)
694 struct locale_collate_t
*collate
= result
->categories
[LC_COLLATE
].collate
;
696 /* Initialize all the fields. */
697 elem
->file
= ldfile
->fname
;
698 elem
->line
= ldfile
->lineno
;
700 elem
->last
= collate
->cursor
;
701 elem
->next
= collate
->cursor
? collate
->cursor
->next
: NULL
;
702 if (collate
->cursor
!= NULL
&& collate
->cursor
->next
!= NULL
)
703 collate
->cursor
->next
->last
= elem
;
704 if (collate
->cursor
!= NULL
)
705 collate
->cursor
->next
= elem
;
706 if (collate
->start
== NULL
)
708 assert (collate
->cursor
== NULL
);
709 collate
->start
= elem
;
712 elem
->section
= collate
->current_section
;
714 if (collate
->current_section
->first
== NULL
)
715 collate
->current_section
->first
= elem
;
716 if (collate
->current_section
->last
== collate
->cursor
)
717 collate
->current_section
->last
= elem
;
719 collate
->cursor
= elem
;
721 elem
->weights
= (struct element_list_t
*)
722 obstack_alloc (&collate
->mempool
, nrules
* sizeof (struct element_list_t
));
723 memset (elem
->weights
, '\0', nrules
* sizeof (struct element_list_t
));
727 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
730 if (arg
->tok
== tok_eof
|| arg
->tok
== tok_eol
)
733 if (arg
->tok
== tok_ignore
)
735 /* The weight for this level has to be ignored. We use the
736 null pointer to indicate this. */
737 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
738 obstack_alloc (&collate
->mempool
, sizeof (struct element_t
*));
739 elem
->weights
[weight_cnt
].w
[0] = NULL
;
740 elem
->weights
[weight_cnt
].cnt
= 1;
742 else if (arg
->tok
== tok_bsymbol
|| arg
->tok
== tok_ucs4
)
745 struct element_t
*val
;
749 if (arg
->tok
== tok_bsymbol
)
751 symstr
= arg
->val
.str
.startmb
;
752 symlen
= arg
->val
.str
.lenmb
;
756 snprintf (ucs4str
, sizeof (ucs4str
), "U%08X", arg
->val
.ucs4
);
761 val
= find_element (ldfile
, collate
, symstr
, symlen
);
765 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
766 obstack_alloc (&collate
->mempool
, sizeof (struct element_t
*));
767 elem
->weights
[weight_cnt
].w
[0] = val
;
768 elem
->weights
[weight_cnt
].cnt
= 1;
770 else if (arg
->tok
== tok_string
)
772 /* Split the string up in the individual characters and put
773 the element definitions in the list. */
774 const char *cp
= arg
->val
.str
.startmb
;
776 struct element_t
*charelem
;
777 struct element_t
**weights
= NULL
;
782 lr_error (ldfile
, _("%s: empty weight string not allowed"),
784 lr_ignore_rest (ldfile
, 0);
792 /* Ahh, it's a bsymbol or an UCS4 value. If it's
793 the latter we have to unify the name. */
794 const char *startp
= ++cp
;
799 if (*cp
== ldfile
->escape_char
)
802 /* It's a syntax error. */
808 if (cp
- startp
== 5 && startp
[0] == 'U'
809 && isxdigit (startp
[1]) && isxdigit (startp
[2])
810 && isxdigit (startp
[3]) && isxdigit (startp
[4]))
812 unsigned int ucs4
= strtoul (startp
+ 1, NULL
, 16);
815 newstr
= (char *) xmalloc (10);
816 snprintf (newstr
, 10, "U%08X", ucs4
);
824 charelem
= find_element (ldfile
, collate
, startp
, len
);
829 /* People really shouldn't use characters directly in
830 the string. Especially since it's not really clear
831 what this means. We interpret all characters in the
832 string as if that would be bsymbols. Otherwise we
833 would have to match back to bsymbols somehow and this
834 is normally not what people normally expect. */
835 charelem
= find_element (ldfile
, collate
, cp
++, 1);
838 if (charelem
== NULL
)
840 /* We ignore the rest of the line. */
841 lr_ignore_rest (ldfile
, 0);
845 /* Add the pointer. */
848 struct element_t
**newp
;
850 newp
= (struct element_t
**)
851 alloca (max
* sizeof (struct element_t
*));
852 memcpy (newp
, weights
, cnt
* sizeof (struct element_t
*));
855 weights
[cnt
++] = charelem
;
859 /* Now store the information. */
860 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
861 obstack_alloc (&collate
->mempool
,
862 cnt
* sizeof (struct element_t
*));
863 memcpy (elem
->weights
[weight_cnt
].w
, weights
,
864 cnt
* sizeof (struct element_t
*));
865 elem
->weights
[weight_cnt
].cnt
= cnt
;
867 /* We don't need the string anymore. */
868 free (arg
->val
.str
.startmb
);
870 else if (ellipsis
!= tok_none
871 && (arg
->tok
== tok_ellipsis2
872 || arg
->tok
== tok_ellipsis3
873 || arg
->tok
== tok_ellipsis4
))
875 /* It must be the same ellipsis as used in the initial column. */
876 if (arg
->tok
!= ellipsis
)
877 lr_error (ldfile
, _("\
878 %s: weights must use the same ellipsis symbol as the name"),
881 /* The weight for this level will depend on the element
882 iterating over the range. Put a placeholder. */
883 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
884 obstack_alloc (&collate
->mempool
, sizeof (struct element_t
*));
885 elem
->weights
[weight_cnt
].w
[0] = ELEMENT_ELLIPSIS2
;
886 elem
->weights
[weight_cnt
].cnt
= 1;
891 /* It's a syntax error. */
892 lr_error (ldfile
, _("%s: syntax error"), "LC_COLLATE");
893 lr_ignore_rest (ldfile
, 0);
897 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
898 /* This better should be the end of the line or a semicolon. */
899 if (arg
->tok
== tok_semicolon
)
900 /* OK, ignore this and read the next token. */
901 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
902 else if (arg
->tok
!= tok_eof
&& arg
->tok
!= tok_eol
)
904 /* It's a syntax error. */
905 lr_error (ldfile
, _("%s: syntax error"), "LC_COLLATE");
906 lr_ignore_rest (ldfile
, 0);
910 while (++weight_cnt
< nrules
);
912 if (weight_cnt
< nrules
)
914 /* This means the rest of the line uses the current element as
918 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
919 obstack_alloc (&collate
->mempool
, sizeof (struct element_t
*));
920 if (ellipsis
== tok_none
)
921 elem
->weights
[weight_cnt
].w
[0] = elem
;
923 elem
->weights
[weight_cnt
].w
[0] = ELEMENT_ELLIPSIS2
;
924 elem
->weights
[weight_cnt
].cnt
= 1;
926 while (++weight_cnt
< nrules
);
930 if (arg
->tok
== tok_ignore
|| arg
->tok
== tok_bsymbol
)
932 /* Too many rule values. */
933 lr_error (ldfile
, _("%s: too many values"), "LC_COLLATE");
934 lr_ignore_rest (ldfile
, 0);
937 lr_ignore_rest (ldfile
, arg
->tok
!= tok_eol
&& arg
->tok
!= tok_eof
);
943 insert_value (struct linereader
*ldfile
, const char *symstr
, size_t symlen
,
944 const struct charmap_t
*charmap
, struct repertoire_t
*repertoire
,
945 struct localedef_t
*result
)
947 /* First find out what kind of symbol this is. */
950 struct element_t
*elem
= NULL
;
951 struct locale_collate_t
*collate
= result
->categories
[LC_COLLATE
].collate
;
953 /* Try to find the character in the charmap. */
954 seq
= charmap_find_value (charmap
, symstr
, symlen
);
956 /* Determine the wide character. */
957 if (seq
== NULL
|| seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
959 wc
= repertoire_find_value (repertoire
, symstr
, symlen
);
966 if (wc
== ILLEGAL_CHAR_VALUE
&& seq
== NULL
)
968 /* It's no character, so look through the collation elements and
971 if (find_entry (&collate
->elem_table
, symstr
, symlen
, &ptr
) != 0)
974 struct symbol_t
*sym
= NULL
;
976 /* It's also collation element. Therefore it's either a
977 collating symbol or it's a character which is not
978 supported by the character set. In the later case we
979 simply create a dummy entry. */
980 if (find_entry (&collate
->sym_table
, symstr
, symlen
, &result
) == 0)
982 /* It's a collation symbol. */
983 sym
= (struct symbol_t
*) result
;
990 elem
= new_element (collate
, NULL
, 0, NULL
, symstr
, symlen
, 0);
995 /* Enter a fake element in the sequence table. This
996 won't cause anything in the output since there is
997 no multibyte or wide character associated with
999 insert_entry (&collate
->seq_table
, symstr
, symlen
, elem
);
1003 /* Copy the result back. */
1008 /* Otherwise the symbols stands for a character. */
1010 if (find_entry (&collate
->seq_table
, symstr
, symlen
, &ptr
) != 0)
1012 uint32_t wcs
[2] = { wc
, 0 };
1014 /* We have to allocate an entry. */
1015 elem
= new_element (collate
,
1016 seq
!= NULL
? (char *) seq
->bytes
: NULL
,
1017 seq
!= NULL
? seq
->nbytes
: 0,
1018 wc
== ILLEGAL_CHAR_VALUE
? NULL
: wcs
,
1021 /* And add it to the table. */
1022 if (insert_entry (&collate
->seq_table
, symstr
, symlen
, elem
) != 0)
1023 /* This cannot happen. */
1024 assert (! "Internal error");
1028 /* Copy the result back. */
1031 /* Maybe the character was used before the definition. In this case
1032 we have to insert the byte sequences now. */
1033 if (elem
->mbs
== NULL
&& seq
!= NULL
)
1035 elem
->mbs
= obstack_copy0 (&collate
->mempool
,
1036 seq
->bytes
, seq
->nbytes
);
1037 elem
->nmbs
= seq
->nbytes
;
1040 if (elem
->wcs
== NULL
&& wc
!= ILLEGAL_CHAR_VALUE
)
1042 uint32_t wcs
[2] = { wc
, 0 };
1044 elem
->wcs
= obstack_copy (&collate
->mempool
, wcs
, sizeof (wcs
));
1050 /* Test whether this element is not already in the list. */
1051 if (elem
->next
!= NULL
|| elem
== collate
->cursor
)
1053 lr_error (ldfile
, _("order for `%.*s' already defined at %s:%Zu"),
1054 (int) symlen
, symstr
, elem
->file
, elem
->line
);
1055 lr_ignore_rest (ldfile
, 0);
1059 insert_weights (ldfile
, elem
, charmap
, repertoire
, result
, tok_none
);
1066 handle_ellipsis (struct linereader
*ldfile
, const char *symstr
, size_t symlen
,
1067 enum token_t ellipsis
, const struct charmap_t
*charmap
,
1068 struct repertoire_t
*repertoire
,
1069 struct localedef_t
*result
)
1071 struct element_t
*startp
;
1072 struct element_t
*endp
;
1073 struct locale_collate_t
*collate
= result
->categories
[LC_COLLATE
].collate
;
1075 /* Unlink the entry added for the ellipsis. */
1076 unlink_element (collate
);
1077 startp
= collate
->cursor
;
1079 /* Process and add the end-entry. */
1081 && insert_value (ldfile
, symstr
, symlen
, charmap
, repertoire
, result
))
1082 /* Something went wrong with inserting the to-value. This means
1083 we cannot process the ellipsis. */
1086 /* Reset the cursor. */
1087 collate
->cursor
= startp
;
1089 /* Now we have to handle many different situations:
1090 - we have to distinguish between the three different ellipsis forms
1091 - the is the ellipsis at the beginning, in the middle, or at the end.
1093 endp
= collate
->cursor
->next
;
1094 assert (symstr
== NULL
|| endp
!= NULL
);
1096 /* XXX The following is probably very wrong since also collating symbols
1097 can appear in ranges. But do we want/can refine the test for that? */
1099 /* Both, the start and the end symbol, must stand for characters. */
1100 if ((startp
!= NULL
&& (startp
->name
== NULL
|| ! startp
->is_character
))
1101 || (endp
!= NULL
&& (endp
->name
== NULL
|| ! endp
->is_character
)))
1103 lr_error (ldfile
, _("\
1104 %s: the start and the end symbol of a range must stand for characters"),
1110 if (ellipsis
== tok_ellipsis3
)
1112 /* One requirement we make here: the length of the byte
1113 sequences for the first and end character must be the same.
1114 This is mainly to prevent unwanted effects and this is often
1115 not what is wanted. */
1116 size_t len
= (startp
->mbs
!= NULL
? startp
->nmbs
1117 : (endp
->mbs
!= NULL
? endp
->nmbs
: 0));
1118 char mbcnt
[len
+ 1];
1119 char mbend
[len
+ 1];
1121 /* Well, this should be caught somewhere else already. Just to
1123 assert (startp
== NULL
|| startp
->wcs
== NULL
|| startp
->wcs
[1] == 0);
1124 assert (endp
== NULL
|| endp
->wcs
== NULL
|| endp
->wcs
[1] == 0);
1126 if (startp
!= NULL
&& endp
!= NULL
1127 && startp
->mbs
!= NULL
&& endp
->mbs
!= NULL
1128 && startp
->nmbs
!= endp
->nmbs
)
1130 lr_error (ldfile
, _("\
1131 %s: byte sequences of first and last character must have the same length"),
1136 /* Determine whether we have to generate multibyte sequences. */
1137 if ((startp
== NULL
|| startp
->mbs
!= NULL
)
1138 && (endp
== NULL
|| endp
->mbs
!= NULL
))
1143 /* Prepare the beginning byte sequence. This is either from the
1144 beginning byte sequence or it is all nulls if it was an
1145 initial ellipsis. */
1146 if (startp
== NULL
|| startp
->mbs
== NULL
)
1147 memset (mbcnt
, '\0', len
);
1150 memcpy (mbcnt
, startp
->mbs
, len
);
1152 /* And increment it so that the value is the first one we will
1154 for (cnt
= len
- 1; cnt
>= 0; --cnt
)
1155 if (++mbcnt
[cnt
] != '\0')
1160 /* And the end sequence. */
1161 if (endp
== NULL
|| endp
->mbs
== NULL
)
1162 memset (mbend
, '\0', len
);
1164 memcpy (mbend
, endp
->mbs
, len
);
1167 /* Test whether we have a correct range. */
1168 ret
= memcmp (mbcnt
, mbend
, len
);
1172 lr_error (ldfile
, _("%s: byte sequence of first character of \
1173 range is not lower than that of the last character"), "LC_COLLATE");
1177 /* Generate the byte sequences data. */
1180 struct charseq
*seq
;
1182 /* Quite a bit of work ahead. We have to find the character
1183 definition for the byte sequence and then determine the
1184 wide character belonging to it. */
1185 seq
= charmap_find_symbol (charmap
, mbcnt
, len
);
1188 struct element_t
*elem
;
1191 /* I don't think this can ever happen. */
1192 assert (seq
->name
!= NULL
);
1193 namelen
= strlen (seq
->name
);
1195 if (seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1196 seq
->ucs4
= repertoire_find_value (repertoire
, seq
->name
,
1199 /* Now we are ready to insert the new value in the
1200 sequence. Find out whether the element is
1203 if (find_entry (&collate
->seq_table
, seq
->name
, namelen
,
1206 uint32_t wcs
[2] = { seq
->ucs4
, 0 };
1208 /* We have to allocate an entry. */
1209 elem
= new_element (collate
, mbcnt
, len
,
1210 seq
->ucs4
== ILLEGAL_CHAR_VALUE
1211 ? NULL
: wcs
, seq
->name
,
1214 /* And add it to the table. */
1215 if (insert_entry (&collate
->seq_table
, seq
->name
,
1216 namelen
, elem
) != 0)
1217 /* This cannot happen. */
1218 assert (! "Internal error");
1221 /* Copy the result. */
1224 /* Test whether this element is not already in the list. */
1225 if (elem
->next
!= NULL
|| (collate
->cursor
!= NULL
1226 && elem
->next
== collate
->cursor
))
1228 lr_error (ldfile
, _("\
1229 order for `%.*s' already defined at %s:%Zu"),
1230 (int) namelen
, seq
->name
,
1231 elem
->file
, elem
->line
);
1235 /* Enqueue the new element. */
1236 elem
->last
= collate
->cursor
;
1237 if (collate
->cursor
== NULL
)
1241 elem
->next
= collate
->cursor
->next
;
1242 elem
->last
->next
= elem
;
1243 if (elem
->next
!= NULL
)
1244 elem
->next
->last
= elem
;
1246 if (collate
->start
== NULL
)
1248 assert (collate
->cursor
== NULL
);
1249 collate
->start
= elem
;
1251 collate
->cursor
= elem
;
1253 /* Add the weight value. We take them from the
1254 `ellipsis_weights' member of `collate'. */
1255 elem
->weights
= (struct element_list_t
*)
1256 obstack_alloc (&collate
->mempool
,
1257 nrules
* sizeof (struct element_list_t
));
1258 for (cnt
= 0; cnt
< nrules
; ++cnt
)
1259 if (collate
->ellipsis_weight
.weights
[cnt
].cnt
== 1
1260 && (collate
->ellipsis_weight
.weights
[cnt
].w
[0]
1261 == ELEMENT_ELLIPSIS2
))
1263 elem
->weights
[cnt
].w
= (struct element_t
**)
1264 obstack_alloc (&collate
->mempool
,
1265 sizeof (struct element_t
*));
1266 elem
->weights
[cnt
].w
[0] = elem
;
1267 elem
->weights
[cnt
].cnt
= 1;
1271 /* Simply use the weight from `ellipsis_weight'. */
1272 elem
->weights
[cnt
].w
=
1273 collate
->ellipsis_weight
.weights
[cnt
].w
;
1274 elem
->weights
[cnt
].cnt
=
1275 collate
->ellipsis_weight
.weights
[cnt
].cnt
;
1279 /* Increment for the next round. */
1281 for (cnt
= len
- 1; cnt
>= 0; --cnt
)
1282 if (++mbcnt
[cnt
] != '\0')
1285 /* Find out whether this was all. */
1286 if (cnt
< 0 || memcmp (mbcnt
, mbend
, len
) >= 0)
1287 /* Yep, that's all. */
1294 /* For symbolic range we naturally must have a beginning and an
1295 end specified by the user. */
1297 lr_error (ldfile
, _("\
1298 %s: symbolic range ellipsis must not directly follow `order_start'"),
1300 else if (endp
== NULL
)
1301 lr_error (ldfile
, _("\
1302 %s: symbolic range ellipsis must not be directly followed by `order_end'"),
1306 /* Determine the range. To do so we have to determine the
1307 common prefix of the both names and then the numeric
1308 values of both ends. */
1309 size_t lenfrom
= strlen (startp
->name
);
1310 size_t lento
= strlen (endp
->name
);
1311 char buf
[lento
+ 1];
1316 int base
= ellipsis
== tok_ellipsis2
? 16 : 10;
1318 if (lenfrom
!= lento
)
1321 lr_error (ldfile
, _("\
1322 `%s' and `%.*s' are not valid names for symbolic range"),
1323 startp
->name
, (int) lento
, endp
->name
);
1327 while (startp
->name
[preflen
] == endp
->name
[preflen
])
1328 if (startp
->name
[preflen
] == '\0')
1329 /* Nothing to be done. The start and end point are identical
1330 and while inserting the end point we have already given
1331 the user an error message. */
1337 from
= strtol (startp
->name
+ preflen
, &cp
, base
);
1338 if ((from
== UINT_MAX
&& errno
== ERANGE
) || *cp
!= '\0')
1342 to
= strtol (endp
->name
+ preflen
, &cp
, base
);
1343 if ((to
== UINT_MAX
&& errno
== ERANGE
) || *cp
!= '\0')
1346 /* Copy the prefix. */
1347 memcpy (buf
, startp
->name
, preflen
);
1349 /* Loop over all values. */
1350 for (++from
; from
< to
; ++from
)
1352 struct element_t
*elem
= NULL
;
1353 struct charseq
*seq
;
1357 /* Generate the name. */
1358 sprintf (buf
+ preflen
, base
== 10 ? "%0*ld" : "%0*lX",
1359 (int) (lenfrom
- preflen
), from
);
1361 /* Look whether this name is already defined. */
1363 if (find_entry (&collate
->seq_table
, buf
, symlen
, &ptr
) == 0)
1365 /* Copy back the result. */
1368 if (elem
->next
!= NULL
|| (collate
->cursor
!= NULL
1369 && elem
->next
== collate
->cursor
))
1371 lr_error (ldfile
, _("\
1372 %s: order for `%.*s' already defined at %s:%Zu"),
1373 "LC_COLLATE", (int) lenfrom
, buf
,
1374 elem
->file
, elem
->line
);
1378 if (elem
->name
== NULL
)
1380 lr_error (ldfile
, _("%s: `%s' must be a character"),
1386 if (elem
== NULL
|| (elem
->mbs
== NULL
&& elem
->wcs
== NULL
))
1388 /* Search for a character of this name. */
1389 seq
= charmap_find_value (charmap
, buf
, lenfrom
);
1390 if (seq
== NULL
|| seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1392 wc
= repertoire_find_value (repertoire
, buf
, lenfrom
);
1400 if (wc
== ILLEGAL_CHAR_VALUE
&& seq
== NULL
)
1401 /* We don't know anything about a character with this
1402 name. XXX Should we warn? */
1407 uint32_t wcs
[2] = { wc
, 0 };
1409 /* We have to allocate an entry. */
1410 elem
= new_element (collate
,
1412 ? (char *) seq
->bytes
: NULL
,
1413 seq
!= NULL
? seq
->nbytes
: 0,
1414 wc
== ILLEGAL_CHAR_VALUE
1415 ? NULL
: wcs
, buf
, lenfrom
, 1);
1419 /* Update the element. */
1422 elem
->mbs
= obstack_copy0 (&collate
->mempool
,
1423 seq
->bytes
, seq
->nbytes
);
1424 elem
->nmbs
= seq
->nbytes
;
1427 if (wc
!= ILLEGAL_CHAR_VALUE
)
1431 obstack_grow (&collate
->mempool
,
1432 &wc
, sizeof (uint32_t));
1433 obstack_grow (&collate
->mempool
,
1434 &zero
, sizeof (uint32_t));
1435 elem
->wcs
= obstack_finish (&collate
->mempool
);
1440 elem
->file
= ldfile
->fname
;
1441 elem
->line
= ldfile
->lineno
;
1442 elem
->section
= collate
->current_section
;
1445 /* Enqueue the new element. */
1446 elem
->last
= collate
->cursor
;
1447 elem
->next
= collate
->cursor
->next
;
1448 elem
->last
->next
= elem
;
1449 if (elem
->next
!= NULL
)
1450 elem
->next
->last
= elem
;
1451 collate
->cursor
= elem
;
1453 /* Now add the weights. They come from the `ellipsis_weights'
1454 member of `collate'. */
1455 elem
->weights
= (struct element_list_t
*)
1456 obstack_alloc (&collate
->mempool
,
1457 nrules
* sizeof (struct element_list_t
));
1458 for (cnt
= 0; cnt
< nrules
; ++cnt
)
1459 if (collate
->ellipsis_weight
.weights
[cnt
].cnt
== 1
1460 && (collate
->ellipsis_weight
.weights
[cnt
].w
[0]
1461 == ELEMENT_ELLIPSIS2
))
1463 elem
->weights
[cnt
].w
= (struct element_t
**)
1464 obstack_alloc (&collate
->mempool
,
1465 sizeof (struct element_t
*));
1466 elem
->weights
[cnt
].w
[0] = elem
;
1467 elem
->weights
[cnt
].cnt
= 1;
1471 /* Simly use the weight from `ellipsis_weight'. */
1472 elem
->weights
[cnt
].w
=
1473 collate
->ellipsis_weight
.weights
[cnt
].w
;
1474 elem
->weights
[cnt
].cnt
=
1475 collate
->ellipsis_weight
.weights
[cnt
].cnt
;
1484 collate_startup (struct linereader
*ldfile
, struct localedef_t
*locale
,
1485 struct localedef_t
*copy_locale
, int ignore_content
)
1487 if (!ignore_content
&& locale
->categories
[LC_COLLATE
].collate
== NULL
)
1489 struct locale_collate_t
*collate
;
1491 if (copy_locale
== NULL
)
1493 collate
= locale
->categories
[LC_COLLATE
].collate
=
1494 (struct locale_collate_t
*)
1495 xcalloc (1, sizeof (struct locale_collate_t
));
1497 /* Init the various data structures. */
1498 init_hash (&collate
->elem_table
, 100);
1499 init_hash (&collate
->sym_table
, 100);
1500 init_hash (&collate
->seq_table
, 500);
1501 obstack_init (&collate
->mempool
);
1503 collate
->col_weight_max
= -1;
1506 /* Reuse the copy_locale's data structures. */
1507 collate
= locale
->categories
[LC_COLLATE
].collate
=
1508 copy_locale
->categories
[LC_COLLATE
].collate
;
1511 ldfile
->translate_strings
= 0;
1512 ldfile
->return_widestr
= 0;
1517 collate_finish (struct localedef_t
*locale
, const struct charmap_t
*charmap
)
1519 /* Now is the time when we can assign the individual collation
1520 values for all the symbols. We have possibly different values
1521 for the wide- and the multibyte-character symbols. This is done
1522 since it might make a difference in the encoding if there is in
1523 some cases no multibyte-character but there are wide-characters.
1524 (The other way around it is not important since theencoded
1525 collation value in the wide-character case is 32 bits wide and
1526 therefore requires no encoding).
1528 The lowest collation value assigned is 2. Zero is reserved for
1529 the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm'
1530 functions and 1 is used to separate the individual passes for the
1533 We also have to construct is list with all the bytes/words which
1534 can come first in a sequence, followed by all the elements which
1535 also start with this byte/word. The order is reverse which has
1536 among others the important effect that longer strings are located
1537 first in the list. This is required for the output data since
1538 the algorithm used in `strcoll' etc depends on this.
1540 The multibyte case is easy. We simply sort into an array with
1542 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1547 struct element_t
*runp
;
1549 int need_undefined
= 0;
1550 struct section_list
*sect
;
1552 int nr_wide_elems
= 0;
1554 if (collate
== NULL
)
1556 /* No data, no check. */
1558 WITH_CUR_LOCALE (error (0, 0, _("No definition for %s category found"),
1563 /* If this assertion is hit change the type in `element_t'. */
1564 assert (nrules
<= sizeof (runp
->used_in_level
) * 8);
1566 /* Make sure that the `position' rule is used either in all sections
1568 for (i
= 0; i
< nrules
; ++i
)
1569 for (sect
= collate
->sections
; sect
!= NULL
; sect
= sect
->next
)
1570 if (sect
!= collate
->current_section
1571 && sect
->rules
!= NULL
1572 && ((sect
->rules
[i
] & sort_position
)
1573 != (collate
->current_section
->rules
[i
] & sort_position
)))
1575 WITH_CUR_LOCALE (error (0, 0, _("\
1576 %s: `position' must be used for a specific level in all sections or none"),
1581 /* Find out which elements are used at which level. At the same
1582 time we find out whether we have any undefined symbols. */
1583 runp
= collate
->start
;
1584 while (runp
!= NULL
)
1586 if (runp
->mbs
!= NULL
)
1588 for (i
= 0; i
< nrules
; ++i
)
1592 for (j
= 0; j
< runp
->weights
[i
].cnt
; ++j
)
1593 /* A NULL pointer as the weight means IGNORE. */
1594 if (runp
->weights
[i
].w
[j
] != NULL
)
1596 if (runp
->weights
[i
].w
[j
]->weights
== NULL
)
1598 WITH_CUR_LOCALE (error_at_line (0, 0, runp
->file
,
1600 _("symbol `%s' not defined"),
1601 runp
->weights
[i
].w
[j
]->name
));
1604 runp
->weights
[i
].w
[j
] = &collate
->undefined
;
1607 /* Set the bit for the level. */
1608 runp
->weights
[i
].w
[j
]->used_in_level
|= 1 << i
;
1613 /* Up to the next entry. */
1617 /* Walk through the list of defined sequences and assign weights. Also
1618 create the data structure which will allow generating the single byte
1619 character based tables.
1621 Since at each time only the weights for each of the rules are
1622 only compared to other weights for this rule it is possible to
1623 assign more compact weight values than simply counting all
1624 weights in sequence. We can assign weights from 3, one for each
1625 rule individually and only for those elements, which are actually
1628 Why is this important? It is not for the wide char table. But
1629 it is for the singlebyte output since here larger numbers have to
1630 be encoded to make it possible to emit the value as a byte
1632 for (i
= 0; i
< nrules
; ++i
)
1637 runp
= collate
->start
;
1638 while (runp
!= NULL
)
1640 /* Determine the order. */
1641 if (runp
->used_in_level
!= 0)
1643 runp
->mborder
= (int *) obstack_alloc (&collate
->mempool
,
1644 nrules
* sizeof (int));
1646 for (i
= 0; i
< nrules
; ++i
)
1647 if ((runp
->used_in_level
& (1 << i
)) != 0)
1648 runp
->mborder
[i
] = mbact
[i
]++;
1650 runp
->mborder
[i
] = 0;
1653 if (runp
->mbs
!= NULL
)
1655 struct element_t
**eptr
;
1656 struct element_t
*lastp
= NULL
;
1658 /* Find the point where to insert in the list. */
1659 eptr
= &collate
->mbheads
[((unsigned char *) runp
->mbs
)[0]];
1660 while (*eptr
!= NULL
)
1662 if ((*eptr
)->nmbs
< runp
->nmbs
)
1665 if ((*eptr
)->nmbs
== runp
->nmbs
)
1667 int c
= memcmp ((*eptr
)->mbs
, runp
->mbs
, runp
->nmbs
);
1671 /* This should not happen. It means that we have
1672 to symbols with the same byte sequence. It is
1673 of course an error. */
1674 WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr
)->file
,
1677 symbol `%s' has the same encoding as"), (*eptr
)->name
);
1678 error_at_line (0, 0, runp
->file
,
1685 /* Insert it here. */
1689 /* To the next entry. */
1691 eptr
= &(*eptr
)->mbnext
;
1694 /* Set the pointers. */
1695 runp
->mbnext
= *eptr
;
1696 runp
->mblast
= lastp
;
1698 (*eptr
)->mblast
= runp
;
1704 if (runp
->used_in_level
)
1706 runp
->wcorder
= wcact
++;
1708 /* We take the opportunity to count the elements which have
1713 if (runp
->is_character
)
1715 if (runp
->nmbs
== 1)
1716 collate
->mbseqorder
[((unsigned char *) runp
->mbs
)[0]] = mbseqact
++;
1718 runp
->wcseqorder
= wcseqact
++;
1720 else if (runp
->mbs
!= NULL
&& runp
->weights
!= NULL
)
1721 /* This is for collation elements. */
1722 runp
->wcseqorder
= wcseqact
++;
1724 /* Up to the next entry. */
1728 /* Find out whether any of the `mbheads' entries is unset. In this
1729 case we use the UNDEFINED entry. */
1730 for (i
= 1; i
< 256; ++i
)
1731 if (collate
->mbheads
[i
] == NULL
)
1734 collate
->mbheads
[i
] = &collate
->undefined
;
1737 /* Now to the wide character case. */
1738 collate
->wcheads
.p
= 6;
1739 collate
->wcheads
.q
= 10;
1740 wchead_table_init (&collate
->wcheads
);
1742 collate
->wcseqorder
.p
= 6;
1743 collate
->wcseqorder
.q
= 10;
1744 collseq_table_init (&collate
->wcseqorder
);
1747 runp
= collate
->start
;
1748 while (runp
!= NULL
)
1750 if (runp
->wcs
!= NULL
)
1752 struct element_t
*e
;
1753 struct element_t
**eptr
;
1754 struct element_t
*lastp
;
1756 /* Insert the collation sequence value. */
1757 if (runp
->is_character
)
1758 collseq_table_add (&collate
->wcseqorder
, runp
->wcs
[0],
1761 /* Find the point where to insert in the list. */
1762 e
= wchead_table_get (&collate
->wcheads
, runp
->wcs
[0]);
1765 while (*eptr
!= NULL
)
1767 if ((*eptr
)->nwcs
< runp
->nwcs
)
1770 if ((*eptr
)->nwcs
== runp
->nwcs
)
1772 int c
= wmemcmp ((wchar_t *) (*eptr
)->wcs
,
1773 (wchar_t *) runp
->wcs
, runp
->nwcs
);
1777 /* This should not happen. It means that we have
1778 two symbols with the same byte sequence. It is
1779 of course an error. */
1780 WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr
)->file
,
1783 symbol `%s' has the same encoding as"), (*eptr
)->name
);
1784 error_at_line (0, 0, runp
->file
,
1791 /* Insert it here. */
1795 /* To the next entry. */
1797 eptr
= &(*eptr
)->wcnext
;
1800 /* Set the pointers. */
1801 runp
->wcnext
= *eptr
;
1802 runp
->wclast
= lastp
;
1804 (*eptr
)->wclast
= runp
;
1807 wchead_table_add (&collate
->wcheads
, runp
->wcs
[0], e
);
1812 /* Up to the next entry. */
1816 collseq_table_finalize (&collate
->wcseqorder
);
1818 /* Now determine whether the UNDEFINED entry is needed and if yes,
1819 whether it was defined. */
1820 collate
->undefined
.used_in_level
= need_undefined
? ~0ul : 0;
1821 if (collate
->undefined
.file
== NULL
)
1825 /* This seems not to be enforced by recent standards. Don't
1826 emit an error, simply append UNDEFINED at the end. */
1828 WITH_CUR_LOCALE (error (0, 0, _("no definition of `UNDEFINED'")));
1830 /* Add UNDEFINED at the end. */
1831 collate
->undefined
.mborder
=
1832 (int *) obstack_alloc (&collate
->mempool
, nrules
* sizeof (int));
1834 for (i
= 0; i
< nrules
; ++i
)
1835 collate
->undefined
.mborder
[i
] = mbact
[i
]++;
1838 /* In any case we will need the definition for the wide character
1839 case. But we will not complain that it is missing since the
1840 specification strangely enough does not seem to account for
1842 collate
->undefined
.wcorder
= wcact
++;
1845 /* Finally, try to unify the rules for the sections. Whenever the rules
1846 for a section are the same as those for another section give the
1847 ruleset the same index. Since there are never many section we can
1848 use an O(n^2) algorithm here. */
1849 sect
= collate
->sections
;
1850 while (sect
!= NULL
&& sect
->rules
== NULL
)
1853 /* Bail out if we have no sections because of earlier errors. */
1856 WITH_CUR_LOCALE (error (EXIT_FAILURE
, 0,
1857 _("too many errors; giving up")));
1864 struct section_list
*osect
= collate
->sections
;
1866 while (osect
!= sect
)
1867 if (osect
->rules
!= NULL
1868 && memcmp (osect
->rules
, sect
->rules
,
1869 nrules
* sizeof (osect
->rules
[0])) == 0)
1872 osect
= osect
->next
;
1875 sect
->ruleidx
= ruleidx
++;
1877 sect
->ruleidx
= osect
->ruleidx
;
1882 while (sect
!= NULL
&& sect
->rules
== NULL
);
1884 while (sect
!= NULL
);
1885 /* We are currently not prepared for more than 128 rulesets. But this
1886 should never really be a problem. */
1887 assert (ruleidx
<= 128);
1892 output_weight (struct obstack
*pool
, struct locale_collate_t
*collate
,
1893 struct element_t
*elem
)
1898 /* Optimize the use of UNDEFINED. */
1899 if (elem
== &collate
->undefined
)
1900 /* The weights are already inserted. */
1903 /* This byte can start exactly one collation element and this is
1904 a single byte. We can directly give the index to the weights. */
1905 retval
= obstack_object_size (pool
);
1907 /* Construct the weight. */
1908 for (cnt
= 0; cnt
< nrules
; ++cnt
)
1910 char buf
[elem
->weights
[cnt
].cnt
* 7];
1914 for (i
= 0; i
< elem
->weights
[cnt
].cnt
; ++i
)
1915 /* Encode the weight value. We do nothing for IGNORE entries. */
1916 if (elem
->weights
[cnt
].w
[i
] != NULL
)
1917 len
+= utf8_encode (&buf
[len
],
1918 elem
->weights
[cnt
].w
[i
]->mborder
[cnt
]);
1920 /* And add the buffer content. */
1921 obstack_1grow (pool
, len
);
1922 obstack_grow (pool
, buf
, len
);
1925 return retval
| ((elem
->section
->ruleidx
& 0x7f) << 24);
1930 output_weightwc (struct obstack
*pool
, struct locale_collate_t
*collate
,
1931 struct element_t
*elem
)
1936 /* Optimize the use of UNDEFINED. */
1937 if (elem
== &collate
->undefined
)
1938 /* The weights are already inserted. */
1941 /* This byte can start exactly one collation element and this is
1942 a single byte. We can directly give the index to the weights. */
1943 retval
= obstack_object_size (pool
) / sizeof (int32_t);
1945 /* Construct the weight. */
1946 for (cnt
= 0; cnt
< nrules
; ++cnt
)
1948 int32_t buf
[elem
->weights
[cnt
].cnt
];
1952 for (i
= 0, j
= 0; i
< elem
->weights
[cnt
].cnt
; ++i
)
1953 if (elem
->weights
[cnt
].w
[i
] != NULL
)
1954 buf
[j
++] = elem
->weights
[cnt
].w
[i
]->wcorder
;
1956 /* And add the buffer content. */
1957 obstack_int32_grow (pool
, j
);
1959 obstack_grow (pool
, buf
, j
* sizeof (int32_t));
1962 return retval
| ((elem
->section
->ruleidx
& 0x7f) << 24);
1965 /* If localedef is every threaded, this would need to be __thread var. */
1968 struct obstack
*weightpool
;
1969 struct obstack
*extrapool
;
1970 struct obstack
*indpool
;
1971 struct locale_collate_t
*collate
;
1972 struct collidx_table
*tablewc
;
1975 static void add_to_tablewc (uint32_t ch
, struct element_t
*runp
);
1978 add_to_tablewc (uint32_t ch
, struct element_t
*runp
)
1980 if (runp
->wcnext
== NULL
&& runp
->nwcs
== 1)
1982 int32_t weigthidx
= output_weightwc (atwc
.weightpool
, atwc
.collate
,
1984 collidx_table_add (atwc
.tablewc
, ch
, weigthidx
);
1988 /* As for the singlebyte table, we recognize sequences and
1990 struct element_t
*lastp
;
1992 collidx_table_add (atwc
.tablewc
, ch
,
1993 -(obstack_object_size (atwc
.extrapool
)
1994 / sizeof (uint32_t)));
1998 /* Store the current index in the weight table. We know that
1999 the current position in the `extrapool' is aligned on a
2004 /* Find out wether this is a single entry or we have more than
2005 one consecutive entry. */
2006 if (runp
->wcnext
!= NULL
2007 && runp
->nwcs
== runp
->wcnext
->nwcs
2008 && wmemcmp ((wchar_t *) runp
->wcs
,
2009 (wchar_t *)runp
->wcnext
->wcs
,
2010 runp
->nwcs
- 1) == 0
2011 && (runp
->wcs
[runp
->nwcs
- 1]
2012 == runp
->wcnext
->wcs
[runp
->nwcs
- 1] + 1))
2015 struct element_t
*series_startp
= runp
;
2016 struct element_t
*curp
;
2018 /* Now add first the initial byte sequence. */
2019 added
= (1 + 1 + 2 * (runp
->nwcs
- 1)) * sizeof (int32_t);
2020 if (sizeof (int32_t) == sizeof (int))
2021 obstack_make_room (atwc
.extrapool
, added
);
2023 /* More than one consecutive entry. We mark this by having
2024 a negative index into the indirect table. */
2025 obstack_int32_grow_fast (atwc
.extrapool
,
2026 -(obstack_object_size (atwc
.indpool
)
2027 / sizeof (int32_t)));
2028 obstack_int32_grow_fast (atwc
.extrapool
, runp
->nwcs
- 1);
2031 runp
= runp
->wcnext
;
2032 while (runp
->wcnext
!= NULL
2033 && runp
->nwcs
== runp
->wcnext
->nwcs
2034 && wmemcmp ((wchar_t *) runp
->wcs
,
2035 (wchar_t *)runp
->wcnext
->wcs
,
2036 runp
->nwcs
- 1) == 0
2037 && (runp
->wcs
[runp
->nwcs
- 1]
2038 == runp
->wcnext
->wcs
[runp
->nwcs
- 1] + 1));
2040 /* Now walk backward from here to the beginning. */
2043 for (i
= 1; i
< runp
->nwcs
; ++i
)
2044 obstack_int32_grow_fast (atwc
.extrapool
, curp
->wcs
[i
]);
2046 /* Now find the end of the consecutive sequence and
2047 add all the indeces in the indirect pool. */
2050 weightidx
= output_weightwc (atwc
.weightpool
, atwc
.collate
,
2052 obstack_int32_grow (atwc
.indpool
, weightidx
);
2054 curp
= curp
->wclast
;
2056 while (curp
!= series_startp
);
2058 /* Add the final weight. */
2059 weightidx
= output_weightwc (atwc
.weightpool
, atwc
.collate
,
2061 obstack_int32_grow (atwc
.indpool
, weightidx
);
2063 /* And add the end byte sequence. Without length this
2065 for (i
= 1; i
< curp
->nwcs
; ++i
)
2066 obstack_int32_grow (atwc
.extrapool
, curp
->wcs
[i
]);
2070 /* A single entry. Simply add the index and the length and
2071 string (except for the first character which is already
2075 /* Output the weight info. */
2076 weightidx
= output_weightwc (atwc
.weightpool
, atwc
.collate
,
2079 added
= (1 + 1 + runp
->nwcs
- 1) * sizeof (int32_t);
2080 if (sizeof (int) == sizeof (int32_t))
2081 obstack_make_room (atwc
.extrapool
, added
);
2083 obstack_int32_grow_fast (atwc
.extrapool
, weightidx
);
2084 obstack_int32_grow_fast (atwc
.extrapool
, runp
->nwcs
- 1);
2085 for (i
= 1; i
< runp
->nwcs
; ++i
)
2086 obstack_int32_grow_fast (atwc
.extrapool
, runp
->wcs
[i
]);
2091 runp
= runp
->wcnext
;
2093 while (runp
!= NULL
);
2098 collate_output (struct localedef_t
*locale
, const struct charmap_t
*charmap
,
2099 const char *output_path
)
2101 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
2102 const size_t nelems
= _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE
);
2103 struct iovec iov
[2 + nelems
];
2104 struct locale_file data
;
2105 uint32_t idx
[nelems
];
2108 int32_t tablemb
[256];
2109 struct obstack weightpool
;
2110 struct obstack extrapool
;
2111 struct obstack indirectpool
;
2112 struct section_list
*sect
;
2113 struct collidx_table tablewc
;
2115 uint32_t *elem_table
;
2117 struct element_t
*runp
;
2119 data
.magic
= LIMAGIC (LC_COLLATE
);
2121 iov
[0].iov_base
= (void *) &data
;
2122 iov
[0].iov_len
= sizeof (data
);
2124 iov
[1].iov_base
= (void *) idx
;
2125 iov
[1].iov_len
= sizeof (idx
);
2127 idx
[0] = iov
[0].iov_len
+ iov
[1].iov_len
;
2130 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_NRULES
));
2131 iov
[2 + cnt
].iov_base
= &nrules
;
2132 iov
[2 + cnt
].iov_len
= sizeof (uint32_t);
2133 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2136 /* If we have no LC_COLLATE data emit only the number of rules as zero. */
2137 if (collate
== NULL
)
2141 while (cnt
< _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE
))
2143 /* The words have to be handled specially. */
2144 if (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB
))
2146 iov
[2 + cnt
].iov_base
= &dummy
;
2147 iov
[2 + cnt
].iov_len
= sizeof (int32_t);
2151 iov
[2 + cnt
].iov_base
= NULL
;
2152 iov
[2 + cnt
].iov_len
= 0;
2155 if (cnt
+ 1 < _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE
))
2156 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2160 assert (cnt
== _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE
));
2162 write_locale_data (output_path
, LC_COLLATE
, "LC_COLLATE", 2 + cnt
, iov
);
2167 obstack_init (&weightpool
);
2168 obstack_init (&extrapool
);
2169 obstack_init (&indirectpool
);
2171 /* Since we are using the sign of an integer to mark indirection the
2172 offsets in the arrays we are indirectly referring to must not be
2173 zero since -0 == 0. Therefore we add a bit of dummy content. */
2174 obstack_int32_grow (&extrapool
, 0);
2175 obstack_int32_grow (&indirectpool
, 0);
2177 /* Prepare the ruleset table. */
2178 for (sect
= collate
->sections
, i
= 0; sect
!= NULL
; sect
= sect
->next
)
2179 if (sect
->rules
!= NULL
&& sect
->ruleidx
== i
)
2183 obstack_make_room (&weightpool
, nrules
);
2185 for (j
= 0; j
< nrules
; ++j
)
2186 obstack_1grow_fast (&weightpool
, sect
->rules
[j
]);
2189 /* And align the output. */
2190 i
= (nrules
* i
) % __alignof__ (int32_t);
2193 obstack_1grow (&weightpool
, '\0');
2194 while (++i
< __alignof__ (int32_t));
2196 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_RULESETS
));
2197 iov
[2 + cnt
].iov_len
= obstack_object_size (&weightpool
);
2198 iov
[2 + cnt
].iov_base
= obstack_finish (&weightpool
);
2199 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2202 /* Generate the 8-bit table. Walk through the lists of sequences
2203 starting with the same byte and add them one after the other to
2204 the table. In case we have more than one sequence starting with
2205 the same byte we have to use extra indirection.
2207 First add a record for the NUL byte. This entry will never be used
2208 so it does not matter. */
2211 /* Now insert the `UNDEFINED' value if it is used. Since this value
2212 will probably be used more than once it is good to store the
2213 weights only once. */
2214 if (collate
->undefined
.used_in_level
!= 0)
2215 output_weight (&weightpool
, collate
, &collate
->undefined
);
2217 for (ch
= 1; ch
< 256; ++ch
)
2218 if (collate
->mbheads
[ch
]->mbnext
== NULL
2219 && collate
->mbheads
[ch
]->nmbs
<= 1)
2221 tablemb
[ch
] = output_weight (&weightpool
, collate
,
2222 collate
->mbheads
[ch
]);
2226 /* The entries in the list are sorted by length and then
2227 alphabetically. This is the order in which we will add the
2228 elements to the collation table. This allows simply walking
2229 the table in sequence and stopping at the first matching
2230 entry. Since the longer sequences are coming first in the
2231 list they have the possibility to match first, just as it
2232 has to be. In the worst case we are walking to the end of
2233 the list where we put, if no singlebyte sequence is defined
2234 in the locale definition, the weights for UNDEFINED.
2236 To reduce the length of the search list we compress them a bit.
2237 This happens by collecting sequences of consecutive byte
2238 sequences in one entry (having and begin and end byte sequence)
2239 and add only one index into the weight table. We can find the
2240 consecutive entries since they are also consecutive in the list. */
2241 struct element_t
*runp
= collate
->mbheads
[ch
];
2242 struct element_t
*lastp
;
2244 assert ((obstack_object_size (&extrapool
)
2245 & (__alignof__ (int32_t) - 1)) == 0);
2247 tablemb
[ch
] = -obstack_object_size (&extrapool
);
2251 /* Store the current index in the weight table. We know that
2252 the current position in the `extrapool' is aligned on a
2257 /* Find out wether this is a single entry or we have more than
2258 one consecutive entry. */
2259 if (runp
->mbnext
!= NULL
2260 && runp
->nmbs
== runp
->mbnext
->nmbs
2261 && memcmp (runp
->mbs
, runp
->mbnext
->mbs
, runp
->nmbs
- 1) == 0
2262 && (runp
->mbs
[runp
->nmbs
- 1]
2263 == runp
->mbnext
->mbs
[runp
->nmbs
- 1] + 1))
2266 struct element_t
*series_startp
= runp
;
2267 struct element_t
*curp
;
2269 /* Compute how much space we will need. */
2270 added
= ((sizeof (int32_t) + 1 + 2 * (runp
->nmbs
- 1)
2271 + __alignof__ (int32_t) - 1)
2272 & ~(__alignof__ (int32_t) - 1));
2273 assert ((obstack_object_size (&extrapool
)
2274 & (__alignof__ (int32_t) - 1)) == 0);
2275 obstack_make_room (&extrapool
, added
);
2277 /* More than one consecutive entry. We mark this by having
2278 a negative index into the indirect table. */
2279 obstack_int32_grow_fast (&extrapool
,
2280 -(obstack_object_size (&indirectpool
)
2281 / sizeof (int32_t)));
2283 /* Now search first the end of the series. */
2285 runp
= runp
->mbnext
;
2286 while (runp
->mbnext
!= NULL
2287 && runp
->nmbs
== runp
->mbnext
->nmbs
2288 && memcmp (runp
->mbs
, runp
->mbnext
->mbs
,
2289 runp
->nmbs
- 1) == 0
2290 && (runp
->mbs
[runp
->nmbs
- 1]
2291 == runp
->mbnext
->mbs
[runp
->nmbs
- 1] + 1));
2293 /* Now walk backward from here to the beginning. */
2296 assert (runp
->nmbs
<= 256);
2297 obstack_1grow_fast (&extrapool
, curp
->nmbs
- 1);
2298 for (i
= 1; i
< curp
->nmbs
; ++i
)
2299 obstack_1grow_fast (&extrapool
, curp
->mbs
[i
]);
2301 /* Now find the end of the consecutive sequence and
2302 add all the indeces in the indirect pool. */
2305 weightidx
= output_weight (&weightpool
, collate
, curp
);
2306 obstack_int32_grow (&indirectpool
, weightidx
);
2308 curp
= curp
->mblast
;
2310 while (curp
!= series_startp
);
2312 /* Add the final weight. */
2313 weightidx
= output_weight (&weightpool
, collate
, curp
);
2314 obstack_int32_grow (&indirectpool
, weightidx
);
2316 /* And add the end byte sequence. Without length this
2318 for (i
= 1; i
< curp
->nmbs
; ++i
)
2319 obstack_1grow_fast (&extrapool
, curp
->mbs
[i
]);
2323 /* A single entry. Simply add the index and the length and
2324 string (except for the first character which is already
2328 /* Output the weight info. */
2329 weightidx
= output_weight (&weightpool
, collate
, runp
);
2331 added
= ((sizeof (int32_t) + 1 + runp
->nmbs
- 1
2332 + __alignof__ (int32_t) - 1)
2333 & ~(__alignof__ (int32_t) - 1));
2334 assert ((obstack_object_size (&extrapool
)
2335 & (__alignof__ (int32_t) - 1)) == 0);
2336 obstack_make_room (&extrapool
, added
);
2338 obstack_int32_grow_fast (&extrapool
, weightidx
);
2339 assert (runp
->nmbs
<= 256);
2340 obstack_1grow_fast (&extrapool
, runp
->nmbs
- 1);
2342 for (i
= 1; i
< runp
->nmbs
; ++i
)
2343 obstack_1grow_fast (&extrapool
, runp
->mbs
[i
]);
2346 /* Add alignment bytes if necessary. */
2347 while ((obstack_object_size (&extrapool
)
2348 & (__alignof__ (int32_t) - 1)) != 0)
2349 obstack_1grow_fast (&extrapool
, '\0');
2353 runp
= runp
->mbnext
;
2355 while (runp
!= NULL
);
2357 assert ((obstack_object_size (&extrapool
)
2358 & (__alignof__ (int32_t) - 1)) == 0);
2360 /* If the final entry in the list is not a single character we
2361 add an UNDEFINED entry here. */
2362 if (lastp
->nmbs
!= 1)
2364 int added
= ((sizeof (int32_t) + 1 + 1 + __alignof__ (int32_t) - 1)
2365 & ~(__alignof__ (int32_t) - 1));
2366 obstack_make_room (&extrapool
, added
);
2368 obstack_int32_grow_fast (&extrapool
, 0);
2369 /* XXX What rule? We just pick the first. */
2370 obstack_1grow_fast (&extrapool
, 0);
2371 /* Length is zero. */
2372 obstack_1grow_fast (&extrapool
, 0);
2374 /* Add alignment bytes if necessary. */
2375 while ((obstack_object_size (&extrapool
)
2376 & (__alignof__ (int32_t) - 1)) != 0)
2377 obstack_1grow_fast (&extrapool
, '\0');
2381 /* Add padding to the tables if necessary. */
2382 while ((obstack_object_size (&weightpool
) & (__alignof__ (int32_t) - 1))
2384 obstack_1grow (&weightpool
, 0);
2386 /* Now add the four tables. */
2387 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_TABLEMB
));
2388 iov
[2 + cnt
].iov_base
= tablemb
;
2389 iov
[2 + cnt
].iov_len
= sizeof (tablemb
);
2390 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2391 assert ((iov
[2 + cnt
].iov_len
& (__alignof__ (int32_t) - 1)) == 0);
2394 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_WEIGHTMB
));
2395 iov
[2 + cnt
].iov_len
= obstack_object_size (&weightpool
);
2396 iov
[2 + cnt
].iov_base
= obstack_finish (&weightpool
);
2397 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2400 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_EXTRAMB
));
2401 iov
[2 + cnt
].iov_len
= obstack_object_size (&extrapool
);
2402 iov
[2 + cnt
].iov_base
= obstack_finish (&extrapool
);
2403 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2406 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_INDIRECTMB
));
2407 iov
[2 + cnt
].iov_len
= obstack_object_size (&indirectpool
);
2408 iov
[2 + cnt
].iov_base
= obstack_finish (&indirectpool
);
2409 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2410 assert ((iov
[2 + cnt
].iov_len
& (__alignof__ (int32_t) - 1)) == 0);
2414 /* Now the same for the wide character table. We need to store some
2415 more information here. */
2416 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_GAP1
));
2417 iov
[2 + cnt
].iov_base
= NULL
;
2418 iov
[2 + cnt
].iov_len
= 0;
2419 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2420 assert (idx
[cnt
] % __alignof__ (int32_t) == 0);
2423 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_GAP2
));
2424 iov
[2 + cnt
].iov_base
= NULL
;
2425 iov
[2 + cnt
].iov_len
= 0;
2426 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2427 assert (idx
[cnt
] % __alignof__ (int32_t) == 0);
2430 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_GAP3
));
2431 iov
[2 + cnt
].iov_base
= NULL
;
2432 iov
[2 + cnt
].iov_len
= 0;
2433 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2434 assert (idx
[cnt
] % __alignof__ (int32_t) == 0);
2437 /* Since we are using the sign of an integer to mark indirection the
2438 offsets in the arrays we are indirectly referring to must not be
2439 zero since -0 == 0. Therefore we add a bit of dummy content. */
2440 obstack_int32_grow (&extrapool
, 0);
2441 obstack_int32_grow (&indirectpool
, 0);
2443 /* Now insert the `UNDEFINED' value if it is used. Since this value
2444 will probably be used more than once it is good to store the
2445 weights only once. */
2446 if (output_weightwc (&weightpool
, collate
, &collate
->undefined
) != 0)
2449 /* Generate the table. Walk through the lists of sequences starting
2450 with the same wide character and add them one after the other to
2451 the table. In case we have more than one sequence starting with
2452 the same byte we have to use extra indirection. */
2455 collidx_table_init (&tablewc
);
2457 atwc
.weightpool
= &weightpool
;
2458 atwc
.extrapool
= &extrapool
;
2459 atwc
.indpool
= &indirectpool
;
2460 atwc
.collate
= collate
;
2461 atwc
.tablewc
= &tablewc
;
2463 wchead_table_iterate (&collate
->wcheads
, add_to_tablewc
);
2465 memset (&atwc
, 0, sizeof (atwc
));
2467 collidx_table_finalize (&tablewc
);
2469 /* Now add the four tables. */
2470 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_TABLEWC
));
2471 iov
[2 + cnt
].iov_base
= tablewc
.result
;
2472 iov
[2 + cnt
].iov_len
= tablewc
.result_size
;
2473 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2474 assert (iov
[2 + cnt
].iov_len
% sizeof (int32_t) == 0);
2475 assert (idx
[cnt
] % __alignof__ (int32_t) == 0);
2478 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_WEIGHTWC
));
2479 iov
[2 + cnt
].iov_len
= obstack_object_size (&weightpool
);
2480 iov
[2 + cnt
].iov_base
= obstack_finish (&weightpool
);
2481 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2482 assert (iov
[2 + cnt
].iov_len
% sizeof (int32_t) == 0);
2483 assert (idx
[cnt
] % __alignof__ (int32_t) == 0);
2486 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_EXTRAWC
));
2487 iov
[2 + cnt
].iov_len
= obstack_object_size (&extrapool
);
2488 iov
[2 + cnt
].iov_base
= obstack_finish (&extrapool
);
2489 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2490 assert (iov
[2 + cnt
].iov_len
% sizeof (int32_t) == 0);
2491 assert (idx
[cnt
] % __alignof__ (int32_t) == 0);
2494 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_INDIRECTWC
));
2495 iov
[2 + cnt
].iov_len
= obstack_object_size (&indirectpool
);
2496 iov
[2 + cnt
].iov_base
= obstack_finish (&indirectpool
);
2497 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2498 assert (iov
[2 + cnt
].iov_len
% sizeof (int32_t) == 0);
2499 assert (idx
[cnt
] % __alignof__ (int32_t) == 0);
2503 /* Finally write the table with collation element names out. It is
2504 a hash table with a simple function which gets the name of the
2505 character as the input. One character might have many names. The
2506 value associated with the name is an index into the weight table
2507 where we are then interested in the first-level weight value.
2509 To determine how large the table should be we are counting the
2510 elements have to put in. Since we are using internal chaining
2511 using a secondary hash function we have to make the table a bit
2512 larger to avoid extremely long search times. We can achieve
2513 good results with a 40% larger table than there are entries. */
2515 runp
= collate
->start
;
2516 while (runp
!= NULL
)
2518 if (runp
->mbs
!= NULL
&& runp
->weights
!= NULL
&& !runp
->is_character
)
2519 /* Yep, the element really counts. */
2524 /* Add 40% and find the next prime number. */
2525 elem_size
= next_prime (elem_size
* 1.4);
2527 /* Allocate the table. Each entry consists of two words: the hash
2528 value and an index in a secondary table which provides the index
2529 into the weight table and the string itself (so that a match can
2531 elem_table
= (uint32_t *) obstack_alloc (&extrapool
,
2532 elem_size
* 2 * sizeof (uint32_t));
2533 memset (elem_table
, '\0', elem_size
* 2 * sizeof (uint32_t));
2535 /* Now add the elements. */
2536 runp
= collate
->start
;
2537 while (runp
!= NULL
)
2539 if (runp
->mbs
!= NULL
&& runp
->weights
!= NULL
&& !runp
->is_character
)
2541 /* Compute the hash value of the name. */
2542 uint32_t namelen
= strlen (runp
->name
);
2543 uint32_t hash
= elem_hash (runp
->name
, namelen
);
2544 size_t idx
= hash
% elem_size
;
2546 size_t start_idx
= idx
;
2549 if (elem_table
[idx
* 2] != 0)
2551 /* The spot is already taken. Try iterating using the value
2552 from the secondary hashing function. */
2553 size_t iter
= hash
% (elem_size
- 2) + 1;
2558 if (idx
>= elem_size
)
2560 assert (idx
!= start_idx
);
2562 while (elem_table
[idx
* 2] != 0);
2564 /* This is the spot where we will insert the value. */
2565 elem_table
[idx
* 2] = hash
;
2566 elem_table
[idx
* 2 + 1] = obstack_object_size (&extrapool
);
2568 /* The the string itself including length. */
2569 obstack_1grow (&extrapool
, namelen
);
2570 obstack_grow (&extrapool
, runp
->name
, namelen
);
2572 /* And the multibyte representation. */
2573 obstack_1grow (&extrapool
, runp
->nmbs
);
2574 obstack_grow (&extrapool
, runp
->mbs
, runp
->nmbs
);
2576 /* And align again to 32 bits. */
2577 if ((1 + namelen
+ 1 + runp
->nmbs
) % sizeof (int32_t) != 0)
2578 obstack_grow (&extrapool
, "\0\0",
2580 - ((1 + namelen
+ 1 + runp
->nmbs
)
2581 % sizeof (int32_t))));
2583 /* Now some 32-bit values: multibyte collation sequence,
2584 wide char string (including length), and wide char
2585 collation sequence. */
2586 obstack_int32_grow (&extrapool
, runp
->mbseqorder
);
2588 obstack_int32_grow (&extrapool
, runp
->nwcs
);
2589 obstack_grow (&extrapool
, runp
->wcs
,
2590 runp
->nwcs
* sizeof (uint32_t));
2592 obstack_int32_grow (&extrapool
, runp
->wcseqorder
);
2598 /* Prepare to write out this data. */
2599 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB
));
2600 iov
[2 + cnt
].iov_base
= &elem_size
;
2601 iov
[2 + cnt
].iov_len
= sizeof (int32_t);
2602 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2603 assert (idx
[cnt
] % __alignof__ (int32_t) == 0);
2606 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_SYMB_TABLEMB
));
2607 iov
[2 + cnt
].iov_base
= elem_table
;
2608 iov
[2 + cnt
].iov_len
= elem_size
* 2 * sizeof (int32_t);
2609 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2610 assert (idx
[cnt
] % __alignof__ (int32_t) == 0);
2613 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_SYMB_EXTRAMB
));
2614 iov
[2 + cnt
].iov_len
= obstack_object_size (&extrapool
);
2615 iov
[2 + cnt
].iov_base
= obstack_finish (&extrapool
);
2616 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2619 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQMB
));
2620 iov
[2 + cnt
].iov_base
= collate
->mbseqorder
;
2621 iov
[2 + cnt
].iov_len
= 256;
2622 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2625 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQWC
));
2626 iov
[2 + cnt
].iov_base
= collate
->wcseqorder
.result
;
2627 iov
[2 + cnt
].iov_len
= collate
->wcseqorder
.result_size
;
2628 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2629 assert (idx
[cnt
] % __alignof__ (int32_t) == 0);
2632 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_CODESET
));
2633 iov
[2 + cnt
].iov_base
= (void *) charmap
->code_set_name
;
2634 iov
[2 + cnt
].iov_len
= strlen (iov
[2 + cnt
].iov_base
) + 1;
2637 assert (cnt
== _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE
));
2639 write_locale_data (output_path
, LC_COLLATE
, "LC_COLLATE", 2 + cnt
, iov
);
2641 obstack_free (&weightpool
, NULL
);
2642 obstack_free (&extrapool
, NULL
);
2643 obstack_free (&indirectpool
, NULL
);
2648 skip_to (struct linereader
*ldfile
, struct locale_collate_t
*collate
,
2649 const struct charmap_t
*charmap
, int to_endif
)
2653 struct token
*now
= lr_token (ldfile
, charmap
, NULL
, NULL
, 0);
2654 enum token_t nowtok
= now
->tok
;
2656 if (nowtok
== tok_eof
|| nowtok
== tok_end
)
2659 if (nowtok
== tok_ifdef
|| nowtok
== tok_ifndef
)
2661 lr_error (ldfile
, _("%s: nested conditionals not supported"),
2663 nowtok
= skip_to (ldfile
, collate
, charmap
, tok_endif
);
2664 if (nowtok
== tok_eof
|| nowtok
== tok_end
)
2667 else if (nowtok
== tok_endif
|| (!to_endif
&& nowtok
== tok_else
))
2669 lr_ignore_rest (ldfile
, 1);
2672 else if (!to_endif
&& (nowtok
== tok_elifdef
|| nowtok
== tok_elifndef
))
2674 /* Do not read the rest of the line. */
2677 else if (nowtok
== tok_else
)
2679 lr_error (ldfile
, _("%s: more then one 'else'"), "LC_COLLATE");
2682 lr_ignore_rest (ldfile
, 0);
2688 collate_read (struct linereader
*ldfile
, struct localedef_t
*result
,
2689 const struct charmap_t
*charmap
, const char *repertoire_name
,
2692 struct repertoire_t
*repertoire
= NULL
;
2693 struct locale_collate_t
*collate
;
2695 struct token
*arg
= NULL
;
2696 enum token_t nowtok
;
2697 enum token_t was_ellipsis
= tok_none
;
2698 struct localedef_t
*copy_locale
= NULL
;
2701 1 - between `order-start' and `order-end'
2702 2 - after `order-end'
2703 3 - after `reorder-after', waiting for `reorder-end'
2704 4 - after `reorder-end'
2705 5 - after `reorder-sections-after', waiting for `reorder-sections-end'
2706 6 - after `reorder-sections-end'
2710 /* Get the repertoire we have to use. */
2711 if (repertoire_name
!= NULL
)
2712 repertoire
= repertoire_read (repertoire_name
);
2714 /* The rest of the line containing `LC_COLLATE' must be free. */
2715 lr_ignore_rest (ldfile
, 1);
2721 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2724 while (nowtok
== tok_eol
);
2726 if (nowtok
!= tok_define
)
2730 lr_ignore_rest (ldfile
, 0);
2733 arg
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2734 if (arg
->tok
!= tok_ident
)
2735 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2738 /* Simply add the new symbol. */
2739 struct name_list
*newsym
= xmalloc (sizeof (*newsym
)
2740 + arg
->val
.str
.lenmb
+ 1);
2741 memcpy (newsym
->str
, arg
->val
.str
.startmb
, arg
->val
.str
.lenmb
);
2742 newsym
->str
[arg
->val
.str
.lenmb
] = '\0';
2743 newsym
->next
= defined
;
2746 lr_ignore_rest (ldfile
, 1);
2751 if (nowtok
== tok_copy
)
2753 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2754 if (now
->tok
!= tok_string
)
2756 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2760 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2761 while (now
->tok
!= tok_eof
&& now
->tok
!= tok_end
);
2763 if (now
->tok
!= tok_eof
2764 || (now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
),
2765 now
->tok
== tok_eof
))
2766 lr_error (ldfile
, _("%s: premature end of file"), "LC_COLLATE");
2767 else if (now
->tok
!= tok_lc_collate
)
2769 lr_error (ldfile
, _("\
2770 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
2771 lr_ignore_rest (ldfile
, 0);
2774 lr_ignore_rest (ldfile
, 1);
2779 if (! ignore_content
)
2781 /* Get the locale definition. */
2782 copy_locale
= load_locale (LC_COLLATE
, now
->val
.str
.startmb
,
2783 repertoire_name
, charmap
, NULL
);
2784 if ((copy_locale
->avail
& COLLATE_LOCALE
) == 0)
2786 /* Not yet loaded. So do it now. */
2787 if (locfile_read (copy_locale
, charmap
) != 0)
2791 if (copy_locale
->categories
[LC_COLLATE
].collate
== NULL
)
2795 lr_ignore_rest (ldfile
, 1);
2797 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2801 /* Prepare the data structures. */
2802 collate_startup (ldfile
, result
, copy_locale
, ignore_content
);
2803 collate
= result
->categories
[LC_COLLATE
].collate
;
2811 /* Of course we don't proceed beyond the end of file. */
2812 if (nowtok
== tok_eof
)
2815 /* Ingore empty lines. */
2816 if (nowtok
== tok_eol
)
2818 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2826 /* Allow copying other locales. */
2827 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2828 if (now
->tok
!= tok_string
)
2831 if (! ignore_content
)
2832 load_locale (LC_COLLATE
, now
->val
.str
.startmb
, repertoire_name
,
2835 lr_ignore_rest (ldfile
, 1);
2838 case tok_coll_weight_max
:
2839 /* Ignore the rest of the line if we don't need the input of
2843 lr_ignore_rest (ldfile
, 0);
2850 arg
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2851 if (arg
->tok
!= tok_number
)
2853 if (collate
->col_weight_max
!= -1)
2854 lr_error (ldfile
, _("%s: duplicate definition of `%s'"),
2855 "LC_COLLATE", "col_weight_max");
2857 collate
->col_weight_max
= arg
->val
.num
;
2858 lr_ignore_rest (ldfile
, 1);
2861 case tok_section_symbol
:
2862 /* Ignore the rest of the line if we don't need the input of
2866 lr_ignore_rest (ldfile
, 0);
2873 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2874 if (arg
->tok
!= tok_bsymbol
)
2876 else if (!ignore_content
)
2878 /* Check whether this section is already known. */
2879 struct section_list
*known
= collate
->sections
;
2880 while (known
!= NULL
)
2882 if (strcmp (known
->name
, arg
->val
.str
.startmb
) == 0)
2884 known
= known
->next
;
2890 _("%s: duplicate declaration of section `%s'"),
2891 "LC_COLLATE", arg
->val
.str
.startmb
);
2892 free (arg
->val
.str
.startmb
);
2895 collate
->sections
= make_seclist_elem (collate
,
2896 arg
->val
.str
.startmb
,
2899 lr_ignore_rest (ldfile
, known
== NULL
);
2903 free (arg
->val
.str
.startmb
);
2904 lr_ignore_rest (ldfile
, 0);
2908 case tok_collating_element
:
2909 /* Ignore the rest of the line if we don't need the input of
2913 lr_ignore_rest (ldfile
, 0);
2917 if (state
!= 0 && state
!= 2)
2920 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2921 if (arg
->tok
!= tok_bsymbol
)
2925 const char *symbol
= arg
->val
.str
.startmb
;
2926 size_t symbol_len
= arg
->val
.str
.lenmb
;
2928 /* Next the `from' keyword. */
2929 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2930 if (arg
->tok
!= tok_from
)
2932 free ((char *) symbol
);
2936 ldfile
->return_widestr
= 1;
2937 ldfile
->translate_strings
= 1;
2939 /* Finally the string with the replacement. */
2940 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2942 ldfile
->return_widestr
= 0;
2943 ldfile
->translate_strings
= 0;
2945 if (arg
->tok
!= tok_string
)
2948 if (!ignore_content
&& symbol
!= NULL
)
2950 /* The name is already defined. */
2951 if (check_duplicate (ldfile
, collate
, charmap
,
2952 repertoire
, symbol
, symbol_len
))
2955 if (arg
->val
.str
.startmb
!= NULL
)
2956 insert_entry (&collate
->elem_table
, symbol
, symbol_len
,
2957 new_element (collate
,
2958 arg
->val
.str
.startmb
,
2959 arg
->val
.str
.lenmb
- 1,
2960 arg
->val
.str
.startwc
,
2961 symbol
, symbol_len
, 0));
2966 free ((char *) symbol
);
2967 free (arg
->val
.str
.startmb
);
2968 free (arg
->val
.str
.startwc
);
2970 lr_ignore_rest (ldfile
, 1);
2974 case tok_collating_symbol
:
2975 /* Ignore the rest of the line if we don't need the input of
2979 lr_ignore_rest (ldfile
, 0);
2983 if (state
!= 0 && state
!= 2)
2986 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2987 if (arg
->tok
!= tok_bsymbol
)
2991 char *symbol
= arg
->val
.str
.startmb
;
2992 size_t symbol_len
= arg
->val
.str
.lenmb
;
2993 char *endsymbol
= NULL
;
2994 size_t endsymbol_len
= 0;
2995 enum token_t ellipsis
= tok_none
;
2997 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2998 if (arg
->tok
== tok_ellipsis2
|| arg
->tok
== tok_ellipsis4
)
3000 ellipsis
= arg
->tok
;
3002 arg
= lr_token (ldfile
, charmap
, result
, repertoire
,
3004 if (arg
->tok
!= tok_bsymbol
)
3010 endsymbol
= arg
->val
.str
.startmb
;
3011 endsymbol_len
= arg
->val
.str
.lenmb
;
3013 lr_ignore_rest (ldfile
, 1);
3015 else if (arg
->tok
!= tok_eol
)
3021 if (!ignore_content
)
3024 || (ellipsis
!= tok_none
&& endsymbol
== NULL
))
3026 lr_error (ldfile
, _("\
3027 %s: unknown character in collating symbol name"),
3031 else if (ellipsis
== tok_none
)
3033 /* A single symbol, no ellipsis. */
3034 if (check_duplicate (ldfile
, collate
, charmap
,
3035 repertoire
, symbol
, symbol_len
))
3036 /* The name is already defined. */
3039 insert_entry (&collate
->sym_table
, symbol
, symbol_len
,
3040 new_symbol (collate
, symbol
, symbol_len
));
3042 else if (symbol_len
!= endsymbol_len
)
3046 _("invalid names for character range"));
3051 /* Oh my, we have to handle an ellipsis. First, as
3052 usual, determine the common prefix and then
3053 convert the rest into a range. */
3055 unsigned long int from
;
3056 unsigned long int to
;
3059 for (prefixlen
= 0; prefixlen
< symbol_len
; ++prefixlen
)
3060 if (symbol
[prefixlen
] != endsymbol
[prefixlen
])
3063 /* Convert the rest into numbers. */
3064 symbol
[symbol_len
] = '\0';
3065 from
= strtoul (&symbol
[prefixlen
], &endp
,
3066 ellipsis
== tok_ellipsis2
? 16 : 10);
3068 goto col_sym_inv_range
;
3070 endsymbol
[symbol_len
] = '\0';
3071 to
= strtoul (&endsymbol
[prefixlen
], &endp
,
3072 ellipsis
== tok_ellipsis2
? 16 : 10);
3074 goto col_sym_inv_range
;
3077 goto col_sym_inv_range
;
3079 /* Now loop over all entries. */
3084 symbuf
= (char *) obstack_alloc (&collate
->mempool
,
3087 /* Create the name. */
3089 ellipsis
== tok_ellipsis2
3090 ? "%.*s%.*lX" : "%.*s%.*lu",
3091 (int) prefixlen
, symbol
,
3092 (int) (symbol_len
- prefixlen
), from
);
3094 if (check_duplicate (ldfile
, collate
, charmap
,
3095 repertoire
, symbuf
, symbol_len
))
3096 /* The name is already defined. */
3099 insert_entry (&collate
->sym_table
, symbuf
,
3101 new_symbol (collate
, symbuf
,
3104 /* Increment the counter. */
3120 case tok_symbol_equivalence
:
3121 /* Ignore the rest of the line if we don't need the input of
3125 lr_ignore_rest (ldfile
, 0);
3132 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
3133 if (arg
->tok
!= tok_bsymbol
)
3137 const char *newname
= arg
->val
.str
.startmb
;
3138 size_t newname_len
= arg
->val
.str
.lenmb
;
3139 const char *symname
;
3141 void *symval
; /* Actually struct symbol_t* */
3143 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
3144 if (arg
->tok
!= tok_bsymbol
)
3146 free ((char *) newname
);
3150 symname
= arg
->val
.str
.startmb
;
3151 symname_len
= arg
->val
.str
.lenmb
;
3153 if (newname
== NULL
)
3155 lr_error (ldfile
, _("\
3156 %s: unknown character in equivalent definition name"),
3160 free ((char *) newname
);
3161 free ((char *) symname
);
3164 if (symname
== NULL
)
3166 lr_error (ldfile
, _("\
3167 %s: unknown character in equivalent definition value"),
3169 goto sym_equiv_free
;
3172 /* See whether the symbol name is already defined. */
3173 if (find_entry (&collate
->sym_table
, symname
, symname_len
,
3176 lr_error (ldfile
, _("\
3177 %s: unknown symbol `%s' in equivalent definition"),
3178 "LC_COLLATE", symname
);
3179 goto sym_equiv_free
;
3182 if (insert_entry (&collate
->sym_table
,
3183 newname
, newname_len
, symval
) < 0)
3185 lr_error (ldfile
, _("\
3186 error while adding equivalent collating symbol"));
3187 goto sym_equiv_free
;
3190 free ((char *) symname
);
3192 lr_ignore_rest (ldfile
, 1);
3196 /* Ignore the rest of the line if we don't need the input of
3200 lr_ignore_rest (ldfile
, 0);
3204 /* We get told about the scripts we know. */
3205 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
3206 if (arg
->tok
!= tok_bsymbol
)
3210 struct section_list
*runp
= collate
->known_sections
;
3213 while (runp
!= NULL
)
3214 if (strncmp (runp
->name
, arg
->val
.str
.startmb
,
3215 arg
->val
.str
.lenmb
) == 0
3216 && runp
->name
[arg
->val
.str
.lenmb
] == '\0')
3219 runp
= runp
->def_next
;
3223 lr_error (ldfile
, _("duplicate definition of script `%s'"),
3225 lr_ignore_rest (ldfile
, 0);
3229 runp
= (struct section_list
*) xcalloc (1, sizeof (*runp
));
3230 name
= (char *) xmalloc (arg
->val
.str
.lenmb
+ 1);
3231 memcpy (name
, arg
->val
.str
.startmb
, arg
->val
.str
.lenmb
);
3232 name
[arg
->val
.str
.lenmb
] = '\0';
3235 runp
->def_next
= collate
->known_sections
;
3236 collate
->known_sections
= runp
;
3238 lr_ignore_rest (ldfile
, 1);
3241 case tok_order_start
:
3242 /* Ignore the rest of the line if we don't need the input of
3246 lr_ignore_rest (ldfile
, 0);
3250 if (state
!= 0 && state
!= 1 && state
!= 2)
3254 /* The 14652 draft does not specify whether all `order_start' lines
3255 must contain the same number of sort-rules, but 14651 does. So
3256 we require this here as well. */
3257 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
3258 if (arg
->tok
== tok_bsymbol
)
3260 /* This better should be a section name. */
3261 struct section_list
*sp
= collate
->known_sections
;
3263 && (sp
->name
== NULL
3264 || strncmp (sp
->name
, arg
->val
.str
.startmb
,
3265 arg
->val
.str
.lenmb
) != 0
3266 || sp
->name
[arg
->val
.str
.lenmb
] != '\0'))
3271 lr_error (ldfile
, _("\
3272 %s: unknown section name `%.*s'"),
3273 "LC_COLLATE", (int) arg
->val
.str
.lenmb
,
3274 arg
->val
.str
.startmb
);
3275 /* We use the error section. */
3276 collate
->current_section
= &collate
->error_section
;
3278 if (collate
->error_section
.first
== NULL
)
3280 /* Insert &collate->error_section at the end of
3281 the collate->sections list. */
3282 if (collate
->sections
== NULL
)
3283 collate
->sections
= &collate
->error_section
;
3286 sp
= collate
->sections
;
3287 while (sp
->next
!= NULL
)
3290 sp
->next
= &collate
->error_section
;
3292 collate
->error_section
.next
= NULL
;
3297 /* One should not be allowed to open the same
3299 if (sp
->first
!= NULL
)
3300 lr_error (ldfile
, _("\
3301 %s: multiple order definitions for section `%s'"),
3302 "LC_COLLATE", sp
->name
);
3305 /* Insert sp in the collate->sections list,
3306 right after collate->current_section. */
3307 if (collate
->current_section
!= NULL
)
3309 sp
->next
= collate
->current_section
->next
;
3310 collate
->current_section
->next
= sp
;
3312 else if (collate
->sections
== NULL
)
3313 /* This is the first section to be defined. */
3314 collate
->sections
= sp
;
3316 collate
->current_section
= sp
;
3319 /* Next should come the end of the line or a semicolon. */
3320 arg
= lr_token (ldfile
, charmap
, result
, repertoire
,
3322 if (arg
->tok
== tok_eol
)
3326 /* This means we have exactly one rule: `forward'. */
3328 lr_error (ldfile
, _("\
3329 %s: invalid number of sorting rules"),
3333 sp
->rules
= obstack_alloc (&collate
->mempool
,
3334 (sizeof (enum coll_sort_rule
)
3336 for (cnt
= 0; cnt
< nrules
; ++cnt
)
3337 sp
->rules
[cnt
] = sort_forward
;
3343 /* Get the next token. */
3344 arg
= lr_token (ldfile
, charmap
, result
, repertoire
,
3350 /* There is no section symbol. Therefore we use the unnamed
3352 collate
->current_section
= &collate
->unnamed_section
;
3354 if (collate
->unnamed_section_defined
)
3355 lr_error (ldfile
, _("\
3356 %s: multiple order definitions for unnamed section"),
3360 /* Insert &collate->unnamed_section at the beginning of
3361 the collate->sections list. */
3362 collate
->unnamed_section
.next
= collate
->sections
;
3363 collate
->sections
= &collate
->unnamed_section
;
3364 collate
->unnamed_section_defined
= true;
3368 /* Now read the direction names. */
3369 read_directions (ldfile
, arg
, charmap
, repertoire
, result
);
3371 /* From now we need the strings untranslated. */
3372 ldfile
->translate_strings
= 0;
3376 /* Ignore the rest of the line if we don't need the input of
3380 lr_ignore_rest (ldfile
, 0);
3387 /* Handle ellipsis at end of list. */
3388 if (was_ellipsis
!= tok_none
)
3390 handle_ellipsis (ldfile
, NULL
, 0, was_ellipsis
, charmap
,
3391 repertoire
, result
);
3392 was_ellipsis
= tok_none
;
3396 lr_ignore_rest (ldfile
, 1);
3399 case tok_reorder_after
:
3400 /* Ignore the rest of the line if we don't need the input of
3404 lr_ignore_rest (ldfile
, 0);
3410 lr_error (ldfile
, _("%s: missing `order_end' keyword"),
3414 /* Handle ellipsis at end of list. */
3415 if (was_ellipsis
!= tok_none
)
3417 handle_ellipsis (ldfile
, arg
->val
.str
.startmb
,
3418 arg
->val
.str
.lenmb
, was_ellipsis
, charmap
,
3419 repertoire
, result
);
3420 was_ellipsis
= tok_none
;
3423 else if (state
== 0 && copy_locale
== NULL
)
3425 else if (state
!= 0 && state
!= 2 && state
!= 3)
3429 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
3430 if (arg
->tok
== tok_bsymbol
|| arg
->tok
== tok_ucs4
)
3432 /* Find this symbol in the sequence table. */
3436 struct element_t
*insp
;
3440 if (arg
->tok
== tok_bsymbol
)
3442 startmb
= arg
->val
.str
.startmb
;
3443 lenmb
= arg
->val
.str
.lenmb
;
3447 sprintf (ucsbuf
, "U%08X", arg
->val
.ucs4
);
3452 if (find_entry (&collate
->seq_table
, startmb
, lenmb
, &ptr
) == 0)
3453 /* Yes, the symbol exists. Simply point the cursor
3455 collate
->cursor
= (struct element_t
*) ptr
;
3458 struct symbol_t
*symbp
;
3461 if (find_entry (&collate
->sym_table
, startmb
, lenmb
,
3466 if (symbp
->order
->last
!= NULL
3467 || symbp
->order
->next
!= NULL
)
3468 collate
->cursor
= symbp
->order
;
3471 /* This is a collating symbol but its position
3472 is not yet defined. */
3473 lr_error (ldfile
, _("\
3474 %s: order for collating symbol %.*s not yet defined"),
3475 "LC_COLLATE", (int) lenmb
, startmb
);
3476 collate
->cursor
= NULL
;
3480 else if (find_entry (&collate
->elem_table
, startmb
, lenmb
,
3483 insp
= (struct element_t
*) ptr
;
3485 if (insp
->last
!= NULL
|| insp
->next
!= NULL
)
3486 collate
->cursor
= insp
;
3489 /* This is a collating element but its position
3490 is not yet defined. */
3491 lr_error (ldfile
, _("\
3492 %s: order for collating element %.*s not yet defined"),
3493 "LC_COLLATE", (int) lenmb
, startmb
);
3494 collate
->cursor
= NULL
;
3500 /* This is bad. The symbol after which we have to
3501 insert does not exist. */
3502 lr_error (ldfile
, _("\
3503 %s: cannot reorder after %.*s: symbol not known"),
3504 "LC_COLLATE", (int) lenmb
, startmb
);
3505 collate
->cursor
= NULL
;
3510 lr_ignore_rest (ldfile
, no_error
);
3513 /* This must not happen. */
3517 case tok_reorder_end
:
3518 /* Ignore the rest of the line if we don't need the input of
3526 lr_ignore_rest (ldfile
, 1);
3529 case tok_reorder_sections_after
:
3530 /* Ignore the rest of the line if we don't need the input of
3534 lr_ignore_rest (ldfile
, 0);
3540 lr_error (ldfile
, _("%s: missing `order_end' keyword"),
3544 /* Handle ellipsis at end of list. */
3545 if (was_ellipsis
!= tok_none
)
3547 handle_ellipsis (ldfile
, NULL
, 0, was_ellipsis
, charmap
,
3548 repertoire
, result
);
3549 was_ellipsis
= tok_none
;
3552 else if (state
== 3)
3554 WITH_CUR_LOCALE (error (0, 0, _("\
3555 %s: missing `reorder-end' keyword"), "LC_COLLATE"));
3558 else if (state
!= 2 && state
!= 4)
3562 /* Get the name of the sections we are adding after. */
3563 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
3564 if (arg
->tok
== tok_bsymbol
)
3566 /* Now find a section with this name. */
3567 struct section_list
*runp
= collate
->sections
;
3569 while (runp
!= NULL
)
3571 if (runp
->name
!= NULL
3572 && strlen (runp
->name
) == arg
->val
.str
.lenmb
3573 && memcmp (runp
->name
, arg
->val
.str
.startmb
,
3574 arg
->val
.str
.lenmb
) == 0)
3581 collate
->current_section
= runp
;
3584 /* This is bad. The section after which we have to
3585 reorder does not exist. Therefore we cannot
3586 process the whole rest of this reorder
3588 lr_error (ldfile
, _("%s: section `%.*s' not known"),
3589 "LC_COLLATE", (int) arg
->val
.str
.lenmb
,
3590 arg
->val
.str
.startmb
);
3594 lr_ignore_rest (ldfile
, 0);
3596 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
3598 while (now
->tok
== tok_reorder_sections_after
3599 || now
->tok
== tok_reorder_sections_end
3600 || now
->tok
== tok_end
);
3602 /* Process the token we just saw. */
3608 /* This must not happen. */
3612 case tok_reorder_sections_end
:
3613 /* Ignore the rest of the line if we don't need the input of
3621 lr_ignore_rest (ldfile
, 1);
3626 /* Ignore the rest of the line if we don't need the input of
3630 lr_ignore_rest (ldfile
, 0);
3634 if (state
!= 0 && state
!= 1 && state
!= 3 && state
!= 5)
3637 if ((state
== 0 || state
== 5) && nowtok
== tok_ucs4
)
3640 if (nowtok
== tok_ucs4
)
3642 snprintf (ucs4buf
, sizeof (ucs4buf
), "U%08X", now
->val
.ucs4
);
3646 else if (arg
!= NULL
)
3648 symstr
= arg
->val
.str
.startmb
;
3649 symlen
= arg
->val
.str
.lenmb
;
3653 lr_error (ldfile
, _("%s: bad symbol <%.*s>"), "LC_COLLATE",
3654 (int) ldfile
->token
.val
.str
.lenmb
,
3655 ldfile
->token
.val
.str
.startmb
);
3659 struct element_t
*seqp
;
3662 /* We are outside an `order_start' region. This means
3663 we must only accept definitions of values for
3664 collation symbols since these are purely abstract
3665 values and don't need directions associated. */
3668 if (find_entry (&collate
->seq_table
, symstr
, symlen
, &ptr
) == 0)
3672 /* It's already defined. First check whether this
3673 is really a collating symbol. */
3674 if (seqp
->is_character
)
3683 if (find_entry (&collate
->sym_table
, symstr
, symlen
,
3685 /* No collating symbol, it's an error. */
3688 /* Maybe this is the first time we define a symbol
3689 value and it is before the first actual section. */
3690 if (collate
->sections
== NULL
)
3691 collate
->sections
= collate
->current_section
=
3692 &collate
->symbol_section
;
3695 if (was_ellipsis
!= tok_none
)
3697 handle_ellipsis (ldfile
, symstr
, symlen
, was_ellipsis
,
3698 charmap
, repertoire
, result
);
3700 /* Remember that we processed the ellipsis. */
3701 was_ellipsis
= tok_none
;
3703 /* And don't add the value a second time. */
3707 else if (state
== 3)
3709 /* It is possible that we already have this collation sequence.
3710 In this case we move the entry. */
3714 /* If the symbol after which we have to insert was not found
3715 ignore all entries. */
3716 if (collate
->cursor
== NULL
)
3718 lr_ignore_rest (ldfile
, 0);
3722 if (find_entry (&collate
->seq_table
, symstr
, symlen
, &ptr
) == 0)
3724 seqp
= (struct element_t
*) ptr
;
3728 if (find_entry (&collate
->sym_table
, symstr
, symlen
, &sym
) == 0
3729 && (seqp
= ((struct symbol_t
*) sym
)->order
) != NULL
)
3732 if (find_entry (&collate
->elem_table
, symstr
, symlen
, &ptr
) == 0
3733 && (seqp
= (struct element_t
*) ptr
,
3734 seqp
->last
!= NULL
|| seqp
->next
!= NULL
3735 || (collate
->start
!= NULL
&& seqp
== collate
->start
)))
3738 /* Remove the entry from the old position. */
3739 if (seqp
->last
== NULL
)
3740 collate
->start
= seqp
->next
;
3742 seqp
->last
->next
= seqp
->next
;
3743 if (seqp
->next
!= NULL
)
3744 seqp
->next
->last
= seqp
->last
;
3746 /* We also have to check whether this entry is the
3747 first or last of a section. */
3748 if (seqp
->section
->first
== seqp
)
3750 if (seqp
->section
->first
== seqp
->section
->last
)
3751 /* This section has no content anymore. */
3752 seqp
->section
->first
= seqp
->section
->last
= NULL
;
3754 seqp
->section
->first
= seqp
->next
;
3756 else if (seqp
->section
->last
== seqp
)
3757 seqp
->section
->last
= seqp
->last
;
3759 /* Now insert it in the new place. */
3760 insert_weights (ldfile
, seqp
, charmap
, repertoire
, result
,
3765 /* Otherwise we just add a new entry. */
3767 else if (state
== 5)
3769 /* We are reordering sections. Find the named section. */
3770 struct section_list
*runp
= collate
->sections
;
3771 struct section_list
*prevp
= NULL
;
3773 while (runp
!= NULL
)
3775 if (runp
->name
!= NULL
3776 && strlen (runp
->name
) == symlen
3777 && memcmp (runp
->name
, symstr
, symlen
) == 0)
3786 lr_error (ldfile
, _("%s: section `%.*s' not known"),
3787 "LC_COLLATE", (int) symlen
, symstr
);
3788 lr_ignore_rest (ldfile
, 0);
3792 if (runp
!= collate
->current_section
)
3794 /* Remove the named section from the old place and
3795 insert it in the new one. */
3796 prevp
->next
= runp
->next
;
3798 runp
->next
= collate
->current_section
->next
;
3799 collate
->current_section
->next
= runp
;
3800 collate
->current_section
= runp
;
3803 /* Process the rest of the line which might change
3804 the collation rules. */
3805 arg
= lr_token (ldfile
, charmap
, result
, repertoire
,
3807 if (arg
->tok
!= tok_eof
&& arg
->tok
!= tok_eol
)
3808 read_directions (ldfile
, arg
, charmap
, repertoire
,
3813 else if (was_ellipsis
!= tok_none
)
3815 /* Using the information in the `ellipsis_weight'
3816 element and this and the last value we have to handle
3817 the ellipsis now. */
3818 assert (state
== 1);
3820 handle_ellipsis (ldfile
, symstr
, symlen
, was_ellipsis
, charmap
,
3821 repertoire
, result
);
3823 /* Remember that we processed the ellipsis. */
3824 was_ellipsis
= tok_none
;
3826 /* And don't add the value a second time. */
3830 /* Now insert in the new place. */
3831 insert_value (ldfile
, symstr
, symlen
, charmap
, repertoire
, result
);
3835 /* Ignore the rest of the line if we don't need the input of
3839 lr_ignore_rest (ldfile
, 0);
3846 if (was_ellipsis
!= tok_none
)
3849 _("%s: cannot have `%s' as end of ellipsis range"),
3850 "LC_COLLATE", "UNDEFINED");
3852 unlink_element (collate
);
3853 was_ellipsis
= tok_none
;
3856 /* See whether UNDEFINED already appeared somewhere. */
3857 if (collate
->undefined
.next
!= NULL
3858 || &collate
->undefined
== collate
->cursor
)
3861 _("%s: order for `%.*s' already defined at %s:%Zu"),
3862 "LC_COLLATE", 9, "UNDEFINED",
3863 collate
->undefined
.file
,
3864 collate
->undefined
.line
);
3865 lr_ignore_rest (ldfile
, 0);
3868 /* Parse the weights. */
3869 insert_weights (ldfile
, &collate
->undefined
, charmap
,
3870 repertoire
, result
, tok_none
);
3873 case tok_ellipsis2
: /* symbolic hexadecimal ellipsis */
3874 case tok_ellipsis3
: /* absolute ellipsis */
3875 case tok_ellipsis4
: /* symbolic decimal ellipsis */
3876 /* This is the symbolic (decimal or hexadecimal) or absolute
3878 if (was_ellipsis
!= tok_none
)
3881 if (state
!= 0 && state
!= 1 && state
!= 3)
3884 was_ellipsis
= nowtok
;
3886 insert_weights (ldfile
, &collate
->ellipsis_weight
, charmap
,
3887 repertoire
, result
, nowtok
);
3892 /* Next we assume `LC_COLLATE'. */
3893 if (!ignore_content
)
3895 if (state
== 0 && copy_locale
== NULL
)
3896 /* We must either see a copy statement or have
3899 _("%s: empty category description not allowed"),
3901 else if (state
== 1)
3903 lr_error (ldfile
, _("%s: missing `order_end' keyword"),
3906 /* Handle ellipsis at end of list. */
3907 if (was_ellipsis
!= tok_none
)
3909 handle_ellipsis (ldfile
, NULL
, 0, was_ellipsis
, charmap
,
3910 repertoire
, result
);
3911 was_ellipsis
= tok_none
;
3914 else if (state
== 3)
3915 WITH_CUR_LOCALE (error (0, 0, _("\
3916 %s: missing `reorder-end' keyword"), "LC_COLLATE"));
3917 else if (state
== 5)
3918 WITH_CUR_LOCALE (error (0, 0, _("\
3919 %s: missing `reorder-sections-end' keyword"), "LC_COLLATE"));
3921 arg
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
3922 if (arg
->tok
== tok_eof
)
3924 if (arg
->tok
== tok_eol
)
3925 lr_error (ldfile
, _("%s: incomplete `END' line"), "LC_COLLATE");
3926 else if (arg
->tok
!= tok_lc_collate
)
3927 lr_error (ldfile
, _("\
3928 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
3929 lr_ignore_rest (ldfile
, arg
->tok
== tok_lc_collate
);
3935 lr_ignore_rest (ldfile
, 0);
3939 arg
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
3940 if (arg
->tok
!= tok_ident
)
3943 /* Simply add the new symbol. */
3944 struct name_list
*newsym
= xmalloc (sizeof (*newsym
)
3945 + arg
->val
.str
.lenmb
+ 1);
3946 memcpy (newsym
->str
, arg
->val
.str
.startmb
, arg
->val
.str
.lenmb
);
3947 newsym
->str
[arg
->val
.str
.lenmb
] = '\0';
3948 newsym
->next
= defined
;
3951 lr_ignore_rest (ldfile
, 1);
3957 lr_ignore_rest (ldfile
, 0);
3961 arg
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
3962 if (arg
->tok
!= tok_ident
)
3965 /* Remove _all_ occurrences of the symbol from the list. */
3966 struct name_list
*prevdef
= NULL
;
3967 struct name_list
*curdef
= defined
;
3968 while (curdef
!= NULL
)
3969 if (strncmp (arg
->val
.str
.startmb
, curdef
->str
,
3970 arg
->val
.str
.lenmb
) == 0
3971 && curdef
->str
[arg
->val
.str
.lenmb
] == '\0')
3973 if (prevdef
== NULL
)
3974 defined
= curdef
->next
;
3976 prevdef
->next
= curdef
->next
;
3978 struct name_list
*olddef
= curdef
;
3979 curdef
= curdef
->next
;
3986 curdef
= curdef
->next
;
3989 lr_ignore_rest (ldfile
, 1);
3996 lr_ignore_rest (ldfile
, 0);
4001 arg
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
4002 if (arg
->tok
!= tok_ident
)
4004 lr_ignore_rest (ldfile
, 1);
4006 if (collate
->else_action
== else_none
)
4009 while (curdef
!= NULL
)
4010 if (strncmp (arg
->val
.str
.startmb
, curdef
->str
,
4011 arg
->val
.str
.lenmb
) == 0
4012 && curdef
->str
[arg
->val
.str
.lenmb
] == '\0')
4015 curdef
= curdef
->next
;
4017 if ((nowtok
== tok_ifdef
&& curdef
!= NULL
)
4018 || (nowtok
== tok_ifndef
&& curdef
== NULL
))
4020 /* We have to use the if-branch. */
4021 collate
->else_action
= else_ignore
;
4025 /* We have to use the else-branch, if there is one. */
4026 nowtok
= skip_to (ldfile
, collate
, charmap
, 0);
4027 if (nowtok
== tok_else
)
4028 collate
->else_action
= else_seen
;
4029 else if (nowtok
== tok_elifdef
)
4034 else if (nowtok
== tok_elifndef
)
4036 nowtok
= tok_ifndef
;
4039 else if (nowtok
== tok_eof
)
4041 else if (nowtok
== tok_end
)
4047 /* XXX Should it really become necessary to support nested
4048 preprocessor handling we will push the state here. */
4049 lr_error (ldfile
, _("%s: nested conditionals not supported"),
4051 nowtok
= skip_to (ldfile
, collate
, charmap
, 1);
4052 if (nowtok
== tok_eof
)
4054 else if (nowtok
== tok_end
)
4064 lr_ignore_rest (ldfile
, 0);
4068 lr_ignore_rest (ldfile
, 1);
4070 if (collate
->else_action
== else_ignore
)
4072 /* Ignore everything until the endif. */
4073 nowtok
= skip_to (ldfile
, collate
, charmap
, 1);
4074 if (nowtok
== tok_eof
)
4076 else if (nowtok
== tok_end
)
4081 assert (collate
->else_action
== else_none
);
4082 lr_error (ldfile
, _("\
4083 %s: '%s' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE",
4084 nowtok
== tok_else
? "else"
4085 : nowtok
== tok_elifdef
? "elifdef" : "elifndef");
4092 lr_ignore_rest (ldfile
, 0);
4096 lr_ignore_rest (ldfile
, 1);
4098 if (collate
->else_action
!= else_ignore
4099 && collate
->else_action
!= else_seen
)
4100 lr_error (ldfile
, _("\
4101 %s: 'endif' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE");
4103 /* XXX If we support nested preprocessor directives we pop
4105 collate
->else_action
= else_none
;
4110 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
4113 /* Prepare for the next round. */
4114 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
4119 /* When we come here we reached the end of the file. */
4120 lr_error (ldfile
, _("%s: premature end of file"), "LC_COLLATE");