1 /* Copyright (C) 1995-2003, 2005, 2006, 2007 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License version 2 as
7 published by the Free Software Foundation.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
26 #include <sys/param.h>
28 #include "localedef.h"
30 #include "localeinfo.h"
31 #include "linereader.h"
33 #include "elem-hash.h"
35 /* Uncomment the following line in the production version. */
36 /* #define NDEBUG 1 */
39 #define obstack_chunk_alloc malloc
40 #define obstack_chunk_free free
43 __attribute ((always_inline
))
44 obstack_int32_grow (struct obstack
*obstack
, int32_t data
)
46 if (sizeof (int32_t) == sizeof (int))
47 obstack_int_grow (obstack
, data
);
49 obstack_grow (obstack
, &data
, sizeof (int32_t));
53 __attribute ((always_inline
))
54 obstack_int32_grow_fast (struct obstack
*obstack
, int32_t data
)
56 if (sizeof (int32_t) == sizeof (int))
57 obstack_int_grow_fast (obstack
, data
);
59 obstack_grow (obstack
, &data
, sizeof (int32_t));
62 /* Forward declaration. */
65 /* Data type for list of strings. */
68 /* Successor in the known_sections list. */
69 struct section_list
*def_next
;
70 /* Successor in the sections list. */
71 struct section_list
*next
;
72 /* Name of the section. */
74 /* First element of this section. */
75 struct element_t
*first
;
76 /* Last element of this section. */
77 struct element_t
*last
;
78 /* These are the rules for this section. */
79 enum coll_sort_rule
*rules
;
80 /* Index of the rule set in the appropriate section of the output file. */
88 /* Number of elements. */
94 /* Data type for collating element. */
106 /* The following is a bit mask which bits are set if this element is
107 used in the appropriate level. Interesting for the singlebyte
110 XXX The type here restricts the number of levels to 32. It could
111 be changed if necessary but I doubt this is necessary. */
112 unsigned int used_in_level
;
114 struct element_list_t
*weights
;
116 /* Nonzero if this is a real character definition. */
119 /* Order of the character in the sequence. This information will
120 be used in range expressions. */
124 /* Where does the definition come from. */
128 /* Which section does this belong to. */
129 struct section_list
*section
;
131 /* Predecessor and successor in the order list. */
132 struct element_t
*last
;
133 struct element_t
*next
;
135 /* Next element in multibyte output list. */
136 struct element_t
*mbnext
;
137 struct element_t
*mblast
;
139 /* Next element in wide character output list. */
140 struct element_t
*wcnext
;
141 struct element_t
*wclast
;
144 /* Special element value. */
145 #define ELEMENT_ELLIPSIS2 ((struct element_t *) 1)
146 #define ELEMENT_ELLIPSIS3 ((struct element_t *) 2)
147 #define ELEMENT_ELLIPSIS4 ((struct element_t *) 3)
149 /* Data type for collating symbol. */
154 /* Point to place in the order list. */
155 struct element_t
*order
;
157 /* Where does the definition come from. */
162 /* Sparse table of struct element_t *. */
163 #define TABLE wchead_table
164 #define ELEMENT struct element_t *
170 /* Sparse table of int32_t. */
171 #define TABLE collidx_table
172 #define ELEMENT int32_t
176 /* Sparse table of uint32_t. */
177 #define TABLE collseq_table
178 #define ELEMENT uint32_t
179 #define DEFAULT ~((uint32_t) 0)
183 /* The real definition of the struct for the LC_COLLATE locale. */
184 struct locale_collate_t
189 /* List of known scripts. */
190 struct section_list
*known_sections
;
191 /* List of used sections. */
192 struct section_list
*sections
;
193 /* Current section using definition. */
194 struct section_list
*current_section
;
195 /* There always can be an unnamed section. */
196 struct section_list unnamed_section
;
197 /* To make handling of errors easier we have another section. */
198 struct section_list error_section
;
199 /* Sometimes we are defining the values for collating symbols before
200 the first actual section. */
201 struct section_list symbol_section
;
203 /* Start of the order list. */
204 struct element_t
*start
;
206 /* The undefined element. */
207 struct element_t undefined
;
209 /* This is the cursor for `reorder_after' insertions. */
210 struct element_t
*cursor
;
212 /* This value is used when handling ellipsis. */
213 struct element_t ellipsis_weight
;
215 /* Known collating elements. */
216 hash_table elem_table
;
218 /* Known collating symbols. */
219 hash_table sym_table
;
221 /* Known collation sequences. */
222 hash_table seq_table
;
224 struct obstack mempool
;
226 /* The LC_COLLATE category is a bit special as it is sometimes possible
227 that the definitions from more than one input file contains information.
228 Therefore we keep all relevant input in a list. */
229 struct locale_collate_t
*next
;
231 /* Arrays with heads of the list for each of the leading bytes in
232 the multibyte sequences. */
233 struct element_t
*mbheads
[256];
235 /* Arrays with heads of the list for each of the leading bytes in
236 the multibyte sequences. */
237 struct wchead_table wcheads
;
239 /* The arrays with the collation sequence order. */
240 unsigned char mbseqorder
[256];
241 struct collseq_table wcseqorder
;
245 /* We have a few global variables which are used for reading all
246 LC_COLLATE category descriptions in all files. */
247 static uint32_t nrules
;
250 /* We need UTF-8 encoding of numbers. */
252 __attribute ((always_inline
))
253 utf8_encode (char *buf
, int val
)
266 for (step
= 2; step
< 6; ++step
)
267 if ((val
& (~(uint32_t)0 << (5 * step
+ 1))) == 0)
271 *buf
= (unsigned char) (~0xff >> step
);
275 buf
[step
] = 0x80 | (val
& 0x3f);
286 static struct section_list
*
287 make_seclist_elem (struct locale_collate_t
*collate
, const char *string
,
288 struct section_list
*next
)
290 struct section_list
*newp
;
292 newp
= (struct section_list
*) obstack_alloc (&collate
->mempool
,
303 static struct element_t
*
304 new_element (struct locale_collate_t
*collate
, const char *mbs
, size_t mbslen
,
305 const uint32_t *wcs
, const char *name
, size_t namelen
,
308 struct element_t
*newp
;
310 newp
= (struct element_t
*) obstack_alloc (&collate
->mempool
,
312 newp
->name
= name
== NULL
? NULL
: obstack_copy0 (&collate
->mempool
,
316 newp
->mbs
= obstack_copy0 (&collate
->mempool
, mbs
, mbslen
);
326 size_t nwcs
= wcslen ((wchar_t *) wcs
);
328 obstack_grow (&collate
->mempool
, wcs
, nwcs
* sizeof (uint32_t));
329 obstack_grow (&collate
->mempool
, &zero
, sizeof (uint32_t));
330 newp
->wcs
= (uint32_t *) obstack_finish (&collate
->mempool
);
338 newp
->mborder
= NULL
;
340 newp
->used_in_level
= 0;
341 newp
->is_character
= is_character
;
343 /* Will be assigned later. XXX */
344 newp
->mbseqorder
= 0;
345 newp
->wcseqorder
= 0;
347 /* Will be allocated later. */
348 newp
->weights
= NULL
;
353 newp
->section
= collate
->current_section
;
368 static struct symbol_t
*
369 new_symbol (struct locale_collate_t
*collate
, const char *name
, size_t len
)
371 struct symbol_t
*newp
;
373 newp
= (struct symbol_t
*) obstack_alloc (&collate
->mempool
, sizeof (*newp
));
375 newp
->name
= obstack_copy0 (&collate
->mempool
, name
, len
);
385 /* Test whether this name is already defined somewhere. */
387 check_duplicate (struct linereader
*ldfile
, struct locale_collate_t
*collate
,
388 const struct charmap_t
*charmap
,
389 struct repertoire_t
*repertoire
, const char *symbol
,
394 if (find_entry (&charmap
->char_table
, symbol
, symbol_len
, &ignore
) == 0)
396 lr_error (ldfile
, _("`%.*s' already defined in charmap"),
397 (int) symbol_len
, symbol
);
401 if (repertoire
!= NULL
402 && (find_entry (&repertoire
->char_table
, symbol
, symbol_len
, &ignore
)
405 lr_error (ldfile
, _("`%.*s' already defined in repertoire"),
406 (int) symbol_len
, symbol
);
410 if (find_entry (&collate
->sym_table
, symbol
, symbol_len
, &ignore
) == 0)
412 lr_error (ldfile
, _("`%.*s' already defined as collating symbol"),
413 (int) symbol_len
, symbol
);
417 if (find_entry (&collate
->elem_table
, symbol
, symbol_len
, &ignore
) == 0)
419 lr_error (ldfile
, _("`%.*s' already defined as collating element"),
420 (int) symbol_len
, symbol
);
428 /* Read the direction specification. */
430 read_directions (struct linereader
*ldfile
, struct token
*arg
,
431 const struct charmap_t
*charmap
,
432 struct repertoire_t
*repertoire
, struct localedef_t
*result
)
435 int max
= nrules
?: 10;
436 enum coll_sort_rule
*rules
= calloc (max
, sizeof (*rules
));
438 struct locale_collate_t
*collate
= result
->categories
[LC_COLLATE
].collate
;
444 if (arg
->tok
== tok_forward
)
446 if (rules
[cnt
] & sort_backward
)
450 lr_error (ldfile
, _("\
451 %s: `forward' and `backward' are mutually excluding each other"),
456 else if (rules
[cnt
] & sort_forward
)
460 lr_error (ldfile
, _("\
461 %s: `%s' mentioned more than once in definition of weight %d"),
462 "LC_COLLATE", "forward", cnt
+ 1);
466 rules
[cnt
] |= sort_forward
;
470 else if (arg
->tok
== tok_backward
)
472 if (rules
[cnt
] & sort_forward
)
476 lr_error (ldfile
, _("\
477 %s: `forward' and `backward' are mutually excluding each other"),
482 else if (rules
[cnt
] & sort_backward
)
486 lr_error (ldfile
, _("\
487 %s: `%s' mentioned more than once in definition of weight %d"),
488 "LC_COLLATE", "backward", cnt
+ 1);
492 rules
[cnt
] |= sort_backward
;
496 else if (arg
->tok
== tok_position
)
498 if (rules
[cnt
] & sort_position
)
502 lr_error (ldfile
, _("\
503 %s: `%s' mentioned more than once in definition of weight %d"),
504 "LC_COLLATE", "position", cnt
+ 1);
508 rules
[cnt
] |= sort_position
;
514 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
516 if (arg
->tok
== tok_eof
|| arg
->tok
== tok_eol
|| arg
->tok
== tok_comma
517 || arg
->tok
== tok_semicolon
)
519 if (! valid
&& ! warned
)
521 lr_error (ldfile
, _("%s: syntax error"), "LC_COLLATE");
525 /* See whether we have to increment the counter. */
526 if (arg
->tok
!= tok_comma
&& rules
[cnt
] != 0)
528 /* Add the default `forward' if we have seen only `position'. */
529 if (rules
[cnt
] == sort_position
)
530 rules
[cnt
] = sort_position
| sort_forward
;
535 if (arg
->tok
== tok_eof
|| arg
->tok
== tok_eol
)
536 /* End of line or file, so we exit the loop. */
541 /* See whether we have enough room in the array. */
545 rules
= (enum coll_sort_rule
*) xrealloc (rules
,
548 memset (&rules
[cnt
], '\0', (max
- cnt
) * sizeof (*rules
));
555 /* There must not be any more rule. */
558 lr_error (ldfile
, _("\
559 %s: too many rules; first entry only had %d"),
560 "LC_COLLATE", nrules
);
564 lr_ignore_rest (ldfile
, 0);
573 lr_error (ldfile
, _("%s: syntax error"), "LC_COLLATE");
578 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
583 /* Now we know how many rules we have. */
585 rules
= (enum coll_sort_rule
*) xrealloc (rules
,
586 nrules
* sizeof (*rules
));
592 /* Not enough rules in this specification. */
594 lr_error (ldfile
, _("%s: not enough sorting rules"), "LC_COLLATE");
597 rules
[cnt
] = sort_forward
;
598 while (++cnt
< nrules
);
602 collate
->current_section
->rules
= rules
;
606 static struct element_t
*
607 find_element (struct linereader
*ldfile
, struct locale_collate_t
*collate
,
608 const char *str
, size_t len
)
612 /* Search for the entries among the collation sequences already define. */
613 if (find_entry (&collate
->seq_table
, str
, len
, &result
) != 0)
615 /* Nope, not define yet. So we see whether it is a
619 if (find_entry (&collate
->sym_table
, str
, len
, &ptr
) == 0)
621 /* It's a collation symbol. */
622 struct symbol_t
*sym
= (struct symbol_t
*) ptr
;
626 result
= sym
->order
= new_element (collate
, NULL
, 0, NULL
,
629 else if (find_entry (&collate
->elem_table
, str
, len
, &result
) != 0)
631 /* It's also no collation element. So it is a character
632 element defined later. */
633 result
= new_element (collate
, NULL
, 0, NULL
, str
, len
, 1);
634 /* Insert it into the sequence table. */
635 insert_entry (&collate
->seq_table
, str
, len
, result
);
639 return (struct element_t
*) result
;
644 unlink_element (struct locale_collate_t
*collate
)
646 if (collate
->cursor
== collate
->start
)
648 assert (collate
->cursor
->next
== NULL
);
649 assert (collate
->cursor
->last
== NULL
);
650 collate
->cursor
= NULL
;
654 if (collate
->cursor
->next
!= NULL
)
655 collate
->cursor
->next
->last
= collate
->cursor
->last
;
656 if (collate
->cursor
->last
!= NULL
)
657 collate
->cursor
->last
->next
= collate
->cursor
->next
;
658 collate
->cursor
= collate
->cursor
->last
;
664 insert_weights (struct linereader
*ldfile
, struct element_t
*elem
,
665 const struct charmap_t
*charmap
,
666 struct repertoire_t
*repertoire
, struct localedef_t
*result
,
667 enum token_t ellipsis
)
671 struct locale_collate_t
*collate
= result
->categories
[LC_COLLATE
].collate
;
673 /* Initialize all the fields. */
674 elem
->file
= ldfile
->fname
;
675 elem
->line
= ldfile
->lineno
;
677 elem
->last
= collate
->cursor
;
678 elem
->next
= collate
->cursor
? collate
->cursor
->next
: NULL
;
679 if (collate
->cursor
!= NULL
&& collate
->cursor
->next
!= NULL
)
680 collate
->cursor
->next
->last
= elem
;
681 if (collate
->cursor
!= NULL
)
682 collate
->cursor
->next
= elem
;
683 if (collate
->start
== NULL
)
685 assert (collate
->cursor
== NULL
);
686 collate
->start
= elem
;
689 elem
->section
= collate
->current_section
;
691 if (collate
->current_section
->first
== NULL
)
692 collate
->current_section
->first
= elem
;
693 if (collate
->current_section
->last
== collate
->cursor
)
694 collate
->current_section
->last
= elem
;
696 collate
->cursor
= elem
;
698 elem
->weights
= (struct element_list_t
*)
699 obstack_alloc (&collate
->mempool
, nrules
* sizeof (struct element_list_t
));
700 memset (elem
->weights
, '\0', nrules
* sizeof (struct element_list_t
));
704 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
707 if (arg
->tok
== tok_eof
|| arg
->tok
== tok_eol
)
710 if (arg
->tok
== tok_ignore
)
712 /* The weight for this level has to be ignored. We use the
713 null pointer to indicate this. */
714 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
715 obstack_alloc (&collate
->mempool
, sizeof (struct element_t
*));
716 elem
->weights
[weight_cnt
].w
[0] = NULL
;
717 elem
->weights
[weight_cnt
].cnt
= 1;
719 else if (arg
->tok
== tok_bsymbol
|| arg
->tok
== tok_ucs4
)
722 struct element_t
*val
;
726 if (arg
->tok
== tok_bsymbol
)
728 symstr
= arg
->val
.str
.startmb
;
729 symlen
= arg
->val
.str
.lenmb
;
733 snprintf (ucs4str
, sizeof (ucs4str
), "U%08X", arg
->val
.ucs4
);
738 val
= find_element (ldfile
, collate
, symstr
, symlen
);
742 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
743 obstack_alloc (&collate
->mempool
, sizeof (struct element_t
*));
744 elem
->weights
[weight_cnt
].w
[0] = val
;
745 elem
->weights
[weight_cnt
].cnt
= 1;
747 else if (arg
->tok
== tok_string
)
749 /* Split the string up in the individual characters and put
750 the element definitions in the list. */
751 const char *cp
= arg
->val
.str
.startmb
;
753 struct element_t
*charelem
;
754 struct element_t
**weights
= NULL
;
759 lr_error (ldfile
, _("%s: empty weight string not allowed"),
761 lr_ignore_rest (ldfile
, 0);
769 /* Ahh, it's a bsymbol or an UCS4 value. If it's
770 the latter we have to unify the name. */
771 const char *startp
= ++cp
;
776 if (*cp
== ldfile
->escape_char
)
779 /* It's a syntax error. */
785 if (cp
- startp
== 5 && startp
[0] == 'U'
786 && isxdigit (startp
[1]) && isxdigit (startp
[2])
787 && isxdigit (startp
[3]) && isxdigit (startp
[4]))
789 unsigned int ucs4
= strtoul (startp
+ 1, NULL
, 16);
792 newstr
= (char *) xmalloc (10);
793 snprintf (newstr
, 10, "U%08X", ucs4
);
801 charelem
= find_element (ldfile
, collate
, startp
, len
);
806 /* People really shouldn't use characters directly in
807 the string. Especially since it's not really clear
808 what this means. We interpret all characters in the
809 string as if that would be bsymbols. Otherwise we
810 would have to match back to bsymbols somehow and this
811 is normally not what people normally expect. */
812 charelem
= find_element (ldfile
, collate
, cp
++, 1);
815 if (charelem
== NULL
)
817 /* We ignore the rest of the line. */
818 lr_ignore_rest (ldfile
, 0);
822 /* Add the pointer. */
825 struct element_t
**newp
;
827 newp
= (struct element_t
**)
828 alloca (max
* sizeof (struct element_t
*));
829 memcpy (newp
, weights
, cnt
* sizeof (struct element_t
*));
832 weights
[cnt
++] = charelem
;
836 /* Now store the information. */
837 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
838 obstack_alloc (&collate
->mempool
,
839 cnt
* sizeof (struct element_t
*));
840 memcpy (elem
->weights
[weight_cnt
].w
, weights
,
841 cnt
* sizeof (struct element_t
*));
842 elem
->weights
[weight_cnt
].cnt
= cnt
;
844 /* We don't need the string anymore. */
845 free (arg
->val
.str
.startmb
);
847 else if (ellipsis
!= tok_none
848 && (arg
->tok
== tok_ellipsis2
849 || arg
->tok
== tok_ellipsis3
850 || arg
->tok
== tok_ellipsis4
))
852 /* It must be the same ellipsis as used in the initial column. */
853 if (arg
->tok
!= ellipsis
)
854 lr_error (ldfile
, _("\
855 %s: weights must use the same ellipsis symbol as the name"),
858 /* The weight for this level will depend on the element
859 iterating over the range. Put a placeholder. */
860 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
861 obstack_alloc (&collate
->mempool
, sizeof (struct element_t
*));
862 elem
->weights
[weight_cnt
].w
[0] = ELEMENT_ELLIPSIS2
;
863 elem
->weights
[weight_cnt
].cnt
= 1;
868 /* It's a syntax error. */
869 lr_error (ldfile
, _("%s: syntax error"), "LC_COLLATE");
870 lr_ignore_rest (ldfile
, 0);
874 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
875 /* This better should be the end of the line or a semicolon. */
876 if (arg
->tok
== tok_semicolon
)
877 /* OK, ignore this and read the next token. */
878 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
879 else if (arg
->tok
!= tok_eof
&& arg
->tok
!= tok_eol
)
881 /* It's a syntax error. */
882 lr_error (ldfile
, _("%s: syntax error"), "LC_COLLATE");
883 lr_ignore_rest (ldfile
, 0);
887 while (++weight_cnt
< nrules
);
889 if (weight_cnt
< nrules
)
891 /* This means the rest of the line uses the current element as
895 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
896 obstack_alloc (&collate
->mempool
, sizeof (struct element_t
*));
897 if (ellipsis
== tok_none
)
898 elem
->weights
[weight_cnt
].w
[0] = elem
;
900 elem
->weights
[weight_cnt
].w
[0] = ELEMENT_ELLIPSIS2
;
901 elem
->weights
[weight_cnt
].cnt
= 1;
903 while (++weight_cnt
< nrules
);
907 if (arg
->tok
== tok_ignore
|| arg
->tok
== tok_bsymbol
)
909 /* Too many rule values. */
910 lr_error (ldfile
, _("%s: too many values"), "LC_COLLATE");
911 lr_ignore_rest (ldfile
, 0);
914 lr_ignore_rest (ldfile
, arg
->tok
!= tok_eol
&& arg
->tok
!= tok_eof
);
920 insert_value (struct linereader
*ldfile
, const char *symstr
, size_t symlen
,
921 const struct charmap_t
*charmap
, struct repertoire_t
*repertoire
,
922 struct localedef_t
*result
)
924 /* First find out what kind of symbol this is. */
927 struct element_t
*elem
= NULL
;
928 struct locale_collate_t
*collate
= result
->categories
[LC_COLLATE
].collate
;
930 /* Try to find the character in the charmap. */
931 seq
= charmap_find_value (charmap
, symstr
, symlen
);
933 /* Determine the wide character. */
934 if (seq
== NULL
|| seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
936 wc
= repertoire_find_value (repertoire
, symstr
, symlen
);
943 if (wc
== ILLEGAL_CHAR_VALUE
&& seq
== NULL
)
945 /* It's no character, so look through the collation elements and
948 if (find_entry (&collate
->elem_table
, symstr
, symlen
, &ptr
) != 0)
951 struct symbol_t
*sym
= NULL
;
953 /* It's also collation element. Therefore it's either a
954 collating symbol or it's a character which is not
955 supported by the character set. In the later case we
956 simply create a dummy entry. */
957 if (find_entry (&collate
->sym_table
, symstr
, symlen
, &result
) == 0)
959 /* It's a collation symbol. */
960 sym
= (struct symbol_t
*) result
;
967 elem
= new_element (collate
, NULL
, 0, NULL
, symstr
, symlen
, 0);
972 /* Enter a fake element in the sequence table. This
973 won't cause anything in the output since there is
974 no multibyte or wide character associated with
976 insert_entry (&collate
->seq_table
, symstr
, symlen
, elem
);
980 /* Copy the result back. */
985 /* Otherwise the symbols stands for a character. */
987 if (find_entry (&collate
->seq_table
, symstr
, symlen
, &ptr
) != 0)
989 uint32_t wcs
[2] = { wc
, 0 };
991 /* We have to allocate an entry. */
992 elem
= new_element (collate
, seq
!= NULL
? seq
->bytes
: NULL
,
993 seq
!= NULL
? seq
->nbytes
: 0,
994 wc
== ILLEGAL_CHAR_VALUE
? NULL
: wcs
,
997 /* And add it to the table. */
998 if (insert_entry (&collate
->seq_table
, symstr
, symlen
, elem
) != 0)
999 /* This cannot happen. */
1000 assert (! "Internal error");
1004 /* Copy the result back. */
1007 /* Maybe the character was used before the definition. In this case
1008 we have to insert the byte sequences now. */
1009 if (elem
->mbs
== NULL
&& seq
!= NULL
)
1011 elem
->mbs
= obstack_copy0 (&collate
->mempool
,
1012 seq
->bytes
, seq
->nbytes
);
1013 elem
->nmbs
= seq
->nbytes
;
1016 if (elem
->wcs
== NULL
&& wc
!= ILLEGAL_CHAR_VALUE
)
1018 uint32_t wcs
[2] = { wc
, 0 };
1020 elem
->wcs
= obstack_copy (&collate
->mempool
, wcs
, sizeof (wcs
));
1026 /* Test whether this element is not already in the list. */
1027 if (elem
->next
!= NULL
|| elem
== collate
->cursor
)
1029 lr_error (ldfile
, _("order for `%.*s' already defined at %s:%Zu"),
1030 (int) symlen
, symstr
, elem
->file
, elem
->line
);
1031 lr_ignore_rest (ldfile
, 0);
1035 insert_weights (ldfile
, elem
, charmap
, repertoire
, result
, tok_none
);
1042 handle_ellipsis (struct linereader
*ldfile
, const char *symstr
, size_t symlen
,
1043 enum token_t ellipsis
, const struct charmap_t
*charmap
,
1044 struct repertoire_t
*repertoire
,
1045 struct localedef_t
*result
)
1047 struct element_t
*startp
;
1048 struct element_t
*endp
;
1049 struct locale_collate_t
*collate
= result
->categories
[LC_COLLATE
].collate
;
1051 /* Unlink the entry added for the ellipsis. */
1052 unlink_element (collate
);
1053 startp
= collate
->cursor
;
1055 /* Process and add the end-entry. */
1057 && insert_value (ldfile
, symstr
, symlen
, charmap
, repertoire
, result
))
1058 /* Something went wrong with inserting the to-value. This means
1059 we cannot process the ellipsis. */
1062 /* Reset the cursor. */
1063 collate
->cursor
= startp
;
1065 /* Now we have to handle many different situations:
1066 - we have to distinguish between the three different ellipsis forms
1067 - the is the ellipsis at the beginning, in the middle, or at the end.
1069 endp
= collate
->cursor
->next
;
1070 assert (symstr
== NULL
|| endp
!= NULL
);
1072 /* XXX The following is probably very wrong since also collating symbols
1073 can appear in ranges. But do we want/can refine the test for that? */
1075 /* Both, the start and the end symbol, must stand for characters. */
1076 if ((startp
!= NULL
&& (startp
->name
== NULL
|| ! startp
->is_character
))
1077 || (endp
!= NULL
&& (endp
->name
== NULL
|| ! endp
->is_character
)))
1079 lr_error (ldfile
, _("\
1080 %s: the start and the end symbol of a range must stand for characters"),
1086 if (ellipsis
== tok_ellipsis3
)
1088 /* One requirement we make here: the length of the byte
1089 sequences for the first and end character must be the same.
1090 This is mainly to prevent unwanted effects and this is often
1091 not what is wanted. */
1092 size_t len
= (startp
->mbs
!= NULL
? startp
->nmbs
1093 : (endp
->mbs
!= NULL
? endp
->nmbs
: 0));
1094 char mbcnt
[len
+ 1];
1095 char mbend
[len
+ 1];
1097 /* Well, this should be caught somewhere else already. Just to
1099 assert (startp
== NULL
|| startp
->wcs
== NULL
|| startp
->wcs
[1] == 0);
1100 assert (endp
== NULL
|| endp
->wcs
== NULL
|| endp
->wcs
[1] == 0);
1102 if (startp
!= NULL
&& endp
!= NULL
1103 && startp
->mbs
!= NULL
&& endp
->mbs
!= NULL
1104 && startp
->nmbs
!= endp
->nmbs
)
1106 lr_error (ldfile
, _("\
1107 %s: byte sequences of first and last character must have the same length"),
1112 /* Determine whether we have to generate multibyte sequences. */
1113 if ((startp
== NULL
|| startp
->mbs
!= NULL
)
1114 && (endp
== NULL
|| endp
->mbs
!= NULL
))
1119 /* Prepare the beginning byte sequence. This is either from the
1120 beginning byte sequence or it is all nulls if it was an
1121 initial ellipsis. */
1122 if (startp
== NULL
|| startp
->mbs
== NULL
)
1123 memset (mbcnt
, '\0', len
);
1126 memcpy (mbcnt
, startp
->mbs
, len
);
1128 /* And increment it so that the value is the first one we will
1130 for (cnt
= len
- 1; cnt
>= 0; --cnt
)
1131 if (++mbcnt
[cnt
] != '\0')
1136 /* And the end sequence. */
1137 if (endp
== NULL
|| endp
->mbs
== NULL
)
1138 memset (mbend
, '\0', len
);
1140 memcpy (mbend
, endp
->mbs
, len
);
1143 /* Test whether we have a correct range. */
1144 ret
= memcmp (mbcnt
, mbend
, len
);
1148 lr_error (ldfile
, _("%s: byte sequence of first character of \
1149 range is not lower than that of the last character"), "LC_COLLATE");
1153 /* Generate the byte sequences data. */
1156 struct charseq
*seq
;
1158 /* Quite a bit of work ahead. We have to find the character
1159 definition for the byte sequence and then determine the
1160 wide character belonging to it. */
1161 seq
= charmap_find_symbol (charmap
, mbcnt
, len
);
1164 struct element_t
*elem
;
1167 /* I don't think this can ever happen. */
1168 assert (seq
->name
!= NULL
);
1169 namelen
= strlen (seq
->name
);
1171 if (seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1172 seq
->ucs4
= repertoire_find_value (repertoire
, seq
->name
,
1175 /* Now we are ready to insert the new value in the
1176 sequence. Find out whether the element is
1179 if (find_entry (&collate
->seq_table
, seq
->name
, namelen
,
1182 uint32_t wcs
[2] = { seq
->ucs4
, 0 };
1184 /* We have to allocate an entry. */
1185 elem
= new_element (collate
, mbcnt
, len
,
1186 seq
->ucs4
== ILLEGAL_CHAR_VALUE
1187 ? NULL
: wcs
, seq
->name
,
1190 /* And add it to the table. */
1191 if (insert_entry (&collate
->seq_table
, seq
->name
,
1192 namelen
, elem
) != 0)
1193 /* This cannot happen. */
1194 assert (! "Internal error");
1197 /* Copy the result. */
1200 /* Test whether this element is not already in the list. */
1201 if (elem
->next
!= NULL
|| (collate
->cursor
!= NULL
1202 && elem
->next
== collate
->cursor
))
1204 lr_error (ldfile
, _("\
1205 order for `%.*s' already defined at %s:%Zu"),
1206 (int) namelen
, seq
->name
,
1207 elem
->file
, elem
->line
);
1211 /* Enqueue the new element. */
1212 elem
->last
= collate
->cursor
;
1213 if (collate
->cursor
== NULL
)
1217 elem
->next
= collate
->cursor
->next
;
1218 elem
->last
->next
= elem
;
1219 if (elem
->next
!= NULL
)
1220 elem
->next
->last
= elem
;
1222 if (collate
->start
== NULL
)
1224 assert (collate
->cursor
== NULL
);
1225 collate
->start
= elem
;
1227 collate
->cursor
= elem
;
1229 /* Add the weight value. We take them from the
1230 `ellipsis_weights' member of `collate'. */
1231 elem
->weights
= (struct element_list_t
*)
1232 obstack_alloc (&collate
->mempool
,
1233 nrules
* sizeof (struct element_list_t
));
1234 for (cnt
= 0; cnt
< nrules
; ++cnt
)
1235 if (collate
->ellipsis_weight
.weights
[cnt
].cnt
== 1
1236 && (collate
->ellipsis_weight
.weights
[cnt
].w
[0]
1237 == ELEMENT_ELLIPSIS2
))
1239 elem
->weights
[cnt
].w
= (struct element_t
**)
1240 obstack_alloc (&collate
->mempool
,
1241 sizeof (struct element_t
*));
1242 elem
->weights
[cnt
].w
[0] = elem
;
1243 elem
->weights
[cnt
].cnt
= 1;
1247 /* Simply use the weight from `ellipsis_weight'. */
1248 elem
->weights
[cnt
].w
=
1249 collate
->ellipsis_weight
.weights
[cnt
].w
;
1250 elem
->weights
[cnt
].cnt
=
1251 collate
->ellipsis_weight
.weights
[cnt
].cnt
;
1255 /* Increment for the next round. */
1257 for (cnt
= len
- 1; cnt
>= 0; --cnt
)
1258 if (++mbcnt
[cnt
] != '\0')
1261 /* Find out whether this was all. */
1262 if (cnt
< 0 || memcmp (mbcnt
, mbend
, len
) >= 0)
1263 /* Yep, that's all. */
1270 /* For symbolic range we naturally must have a beginning and an
1271 end specified by the user. */
1273 lr_error (ldfile
, _("\
1274 %s: symbolic range ellipsis must not directly follow `order_start'"),
1276 else if (endp
== NULL
)
1277 lr_error (ldfile
, _("\
1278 %s: symbolic range ellipsis must not be directly followed by `order_end'"),
1282 /* Determine the range. To do so we have to determine the
1283 common prefix of the both names and then the numeric
1284 values of both ends. */
1285 size_t lenfrom
= strlen (startp
->name
);
1286 size_t lento
= strlen (endp
->name
);
1287 char buf
[lento
+ 1];
1292 int base
= ellipsis
== tok_ellipsis2
? 16 : 10;
1294 if (lenfrom
!= lento
)
1297 lr_error (ldfile
, _("\
1298 `%s' and `%.*s' are not valid names for symbolic range"),
1299 startp
->name
, (int) lento
, endp
->name
);
1303 while (startp
->name
[preflen
] == endp
->name
[preflen
])
1304 if (startp
->name
[preflen
] == '\0')
1305 /* Nothing to be done. The start and end point are identical
1306 and while inserting the end point we have already given
1307 the user an error message. */
1313 from
= strtol (startp
->name
+ preflen
, &cp
, base
);
1314 if ((from
== UINT_MAX
&& errno
== ERANGE
) || *cp
!= '\0')
1318 to
= strtol (endp
->name
+ preflen
, &cp
, base
);
1319 if ((to
== UINT_MAX
&& errno
== ERANGE
) || *cp
!= '\0')
1322 /* Copy the prefix. */
1323 memcpy (buf
, startp
->name
, preflen
);
1325 /* Loop over all values. */
1326 for (++from
; from
< to
; ++from
)
1328 struct element_t
*elem
= NULL
;
1329 struct charseq
*seq
;
1333 /* Generate the name. */
1334 sprintf (buf
+ preflen
, base
== 10 ? "%0*ld" : "%0*lX",
1335 (int) (lenfrom
- preflen
), from
);
1337 /* Look whether this name is already defined. */
1339 if (find_entry (&collate
->seq_table
, buf
, symlen
, &ptr
) == 0)
1341 /* Copy back the result. */
1344 if (elem
->next
!= NULL
|| (collate
->cursor
!= NULL
1345 && elem
->next
== collate
->cursor
))
1347 lr_error (ldfile
, _("\
1348 %s: order for `%.*s' already defined at %s:%Zu"),
1349 "LC_COLLATE", (int) lenfrom
, buf
,
1350 elem
->file
, elem
->line
);
1354 if (elem
->name
== NULL
)
1356 lr_error (ldfile
, _("%s: `%s' must be a character"),
1362 if (elem
== NULL
|| (elem
->mbs
== NULL
&& elem
->wcs
== NULL
))
1364 /* Search for a character of this name. */
1365 seq
= charmap_find_value (charmap
, buf
, lenfrom
);
1366 if (seq
== NULL
|| seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1368 wc
= repertoire_find_value (repertoire
, buf
, lenfrom
);
1376 if (wc
== ILLEGAL_CHAR_VALUE
&& seq
== NULL
)
1377 /* We don't know anything about a character with this
1378 name. XXX Should we warn? */
1383 uint32_t wcs
[2] = { wc
, 0 };
1385 /* We have to allocate an entry. */
1386 elem
= new_element (collate
,
1387 seq
!= NULL
? seq
->bytes
: NULL
,
1388 seq
!= NULL
? seq
->nbytes
: 0,
1389 wc
== ILLEGAL_CHAR_VALUE
1390 ? NULL
: wcs
, buf
, lenfrom
, 1);
1394 /* Update the element. */
1397 elem
->mbs
= obstack_copy0 (&collate
->mempool
,
1398 seq
->bytes
, seq
->nbytes
);
1399 elem
->nmbs
= seq
->nbytes
;
1402 if (wc
!= ILLEGAL_CHAR_VALUE
)
1406 obstack_grow (&collate
->mempool
,
1407 &wc
, sizeof (uint32_t));
1408 obstack_grow (&collate
->mempool
,
1409 &zero
, sizeof (uint32_t));
1410 elem
->wcs
= obstack_finish (&collate
->mempool
);
1415 elem
->file
= ldfile
->fname
;
1416 elem
->line
= ldfile
->lineno
;
1417 elem
->section
= collate
->current_section
;
1420 /* Enqueue the new element. */
1421 elem
->last
= collate
->cursor
;
1422 elem
->next
= collate
->cursor
->next
;
1423 elem
->last
->next
= elem
;
1424 if (elem
->next
!= NULL
)
1425 elem
->next
->last
= elem
;
1426 collate
->cursor
= elem
;
1428 /* Now add the weights. They come from the `ellipsis_weights'
1429 member of `collate'. */
1430 elem
->weights
= (struct element_list_t
*)
1431 obstack_alloc (&collate
->mempool
,
1432 nrules
* sizeof (struct element_list_t
));
1433 for (cnt
= 0; cnt
< nrules
; ++cnt
)
1434 if (collate
->ellipsis_weight
.weights
[cnt
].cnt
== 1
1435 && (collate
->ellipsis_weight
.weights
[cnt
].w
[0]
1436 == ELEMENT_ELLIPSIS2
))
1438 elem
->weights
[cnt
].w
= (struct element_t
**)
1439 obstack_alloc (&collate
->mempool
,
1440 sizeof (struct element_t
*));
1441 elem
->weights
[cnt
].w
[0] = elem
;
1442 elem
->weights
[cnt
].cnt
= 1;
1446 /* Simly use the weight from `ellipsis_weight'. */
1447 elem
->weights
[cnt
].w
=
1448 collate
->ellipsis_weight
.weights
[cnt
].w
;
1449 elem
->weights
[cnt
].cnt
=
1450 collate
->ellipsis_weight
.weights
[cnt
].cnt
;
1459 collate_startup (struct linereader
*ldfile
, struct localedef_t
*locale
,
1460 struct localedef_t
*copy_locale
, int ignore_content
)
1462 if (!ignore_content
&& locale
->categories
[LC_COLLATE
].collate
== NULL
)
1464 struct locale_collate_t
*collate
;
1466 if (copy_locale
== NULL
)
1468 collate
= locale
->categories
[LC_COLLATE
].collate
=
1469 (struct locale_collate_t
*)
1470 xcalloc (1, sizeof (struct locale_collate_t
));
1472 /* Init the various data structures. */
1473 init_hash (&collate
->elem_table
, 100);
1474 init_hash (&collate
->sym_table
, 100);
1475 init_hash (&collate
->seq_table
, 500);
1476 obstack_init (&collate
->mempool
);
1478 collate
->col_weight_max
= -1;
1481 /* Reuse the copy_locale's data structures. */
1482 collate
= locale
->categories
[LC_COLLATE
].collate
=
1483 copy_locale
->categories
[LC_COLLATE
].collate
;
1486 ldfile
->translate_strings
= 0;
1487 ldfile
->return_widestr
= 0;
1492 collate_finish (struct localedef_t
*locale
, const struct charmap_t
*charmap
)
1494 /* Now is the time when we can assign the individual collation
1495 values for all the symbols. We have possibly different values
1496 for the wide- and the multibyte-character symbols. This is done
1497 since it might make a difference in the encoding if there is in
1498 some cases no multibyte-character but there are wide-characters.
1499 (The other way around it is not important since theencoded
1500 collation value in the wide-character case is 32 bits wide and
1501 therefore requires no encoding).
1503 The lowest collation value assigned is 2. Zero is reserved for
1504 the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm'
1505 functions and 1 is used to separate the individual passes for the
1508 We also have to construct is list with all the bytes/words which
1509 can come first in a sequence, followed by all the elements which
1510 also start with this byte/word. The order is reverse which has
1511 among others the important effect that longer strings are located
1512 first in the list. This is required for the output data since
1513 the algorithm used in `strcoll' etc depends on this.
1515 The multibyte case is easy. We simply sort into an array with
1517 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1522 struct element_t
*runp
;
1524 int need_undefined
= 0;
1525 struct section_list
*sect
;
1527 int nr_wide_elems
= 0;
1529 if (collate
== NULL
)
1531 /* No data, no check. */
1533 WITH_CUR_LOCALE (error (0, 0, _("No definition for %s category found"),
1538 /* If this assertion is hit change the type in `element_t'. */
1539 assert (nrules
<= sizeof (runp
->used_in_level
) * 8);
1541 /* Make sure that the `position' rule is used either in all sections
1543 for (i
= 0; i
< nrules
; ++i
)
1544 for (sect
= collate
->sections
; sect
!= NULL
; sect
= sect
->next
)
1545 if (sect
->rules
!= NULL
1546 && ((sect
->rules
[i
] & sort_position
)
1547 != (collate
->sections
->rules
[i
] & sort_position
)))
1549 WITH_CUR_LOCALE (error (0, 0, _("\
1550 %s: `position' must be used for a specific level in all sections or none"),
1555 /* Find out which elements are used at which level. At the same
1556 time we find out whether we have any undefined symbols. */
1557 runp
= collate
->start
;
1558 while (runp
!= NULL
)
1560 if (runp
->mbs
!= NULL
)
1562 for (i
= 0; i
< nrules
; ++i
)
1566 for (j
= 0; j
< runp
->weights
[i
].cnt
; ++j
)
1567 /* A NULL pointer as the weight means IGNORE. */
1568 if (runp
->weights
[i
].w
[j
] != NULL
)
1570 if (runp
->weights
[i
].w
[j
]->weights
== NULL
)
1572 WITH_CUR_LOCALE (error_at_line (0, 0, runp
->file
,
1574 _("symbol `%s' not defined"),
1575 runp
->weights
[i
].w
[j
]->name
));
1578 runp
->weights
[i
].w
[j
] = &collate
->undefined
;
1581 /* Set the bit for the level. */
1582 runp
->weights
[i
].w
[j
]->used_in_level
|= 1 << i
;
1587 /* Up to the next entry. */
1591 /* Walk through the list of defined sequences and assign weights. Also
1592 create the data structure which will allow generating the single byte
1593 character based tables.
1595 Since at each time only the weights for each of the rules are
1596 only compared to other weights for this rule it is possible to
1597 assign more compact weight values than simply counting all
1598 weights in sequence. We can assign weights from 3, one for each
1599 rule individually and only for those elements, which are actually
1602 Why is this important? It is not for the wide char table. But
1603 it is for the singlebyte output since here larger numbers have to
1604 be encoded to make it possible to emit the value as a byte
1606 for (i
= 0; i
< nrules
; ++i
)
1611 runp
= collate
->start
;
1612 while (runp
!= NULL
)
1614 /* Determine the order. */
1615 if (runp
->used_in_level
!= 0)
1617 runp
->mborder
= (int *) obstack_alloc (&collate
->mempool
,
1618 nrules
* sizeof (int));
1620 for (i
= 0; i
< nrules
; ++i
)
1621 if ((runp
->used_in_level
& (1 << i
)) != 0)
1622 runp
->mborder
[i
] = mbact
[i
]++;
1624 runp
->mborder
[i
] = 0;
1627 if (runp
->mbs
!= NULL
)
1629 struct element_t
**eptr
;
1630 struct element_t
*lastp
= NULL
;
1632 /* Find the point where to insert in the list. */
1633 eptr
= &collate
->mbheads
[((unsigned char *) runp
->mbs
)[0]];
1634 while (*eptr
!= NULL
)
1636 if ((*eptr
)->nmbs
< runp
->nmbs
)
1639 if ((*eptr
)->nmbs
== runp
->nmbs
)
1641 int c
= memcmp ((*eptr
)->mbs
, runp
->mbs
, runp
->nmbs
);
1645 /* This should not happen. It means that we have
1646 to symbols with the same byte sequence. It is
1647 of course an error. */
1648 WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr
)->file
,
1651 symbol `%s' has the same encoding as"), (*eptr
)->name
);
1652 error_at_line (0, 0, runp
->file
,
1659 /* Insert it here. */
1663 /* To the next entry. */
1665 eptr
= &(*eptr
)->mbnext
;
1668 /* Set the pointers. */
1669 runp
->mbnext
= *eptr
;
1670 runp
->mblast
= lastp
;
1672 (*eptr
)->mblast
= runp
;
1678 if (runp
->used_in_level
)
1680 runp
->wcorder
= wcact
++;
1682 /* We take the opportunity to count the elements which have
1687 if (runp
->is_character
)
1689 if (runp
->nmbs
== 1)
1690 collate
->mbseqorder
[((unsigned char *) runp
->mbs
)[0]] = mbseqact
++;
1692 runp
->wcseqorder
= wcseqact
++;
1694 else if (runp
->mbs
!= NULL
&& runp
->weights
!= NULL
)
1695 /* This is for collation elements. */
1696 runp
->wcseqorder
= wcseqact
++;
1698 /* Up to the next entry. */
1702 /* Find out whether any of the `mbheads' entries is unset. In this
1703 case we use the UNDEFINED entry. */
1704 for (i
= 1; i
< 256; ++i
)
1705 if (collate
->mbheads
[i
] == NULL
)
1708 collate
->mbheads
[i
] = &collate
->undefined
;
1711 /* Now to the wide character case. */
1712 collate
->wcheads
.p
= 6;
1713 collate
->wcheads
.q
= 10;
1714 wchead_table_init (&collate
->wcheads
);
1716 collate
->wcseqorder
.p
= 6;
1717 collate
->wcseqorder
.q
= 10;
1718 collseq_table_init (&collate
->wcseqorder
);
1721 runp
= collate
->start
;
1722 while (runp
!= NULL
)
1724 if (runp
->wcs
!= NULL
)
1726 struct element_t
*e
;
1727 struct element_t
**eptr
;
1728 struct element_t
*lastp
;
1730 /* Insert the collation sequence value. */
1731 if (runp
->is_character
)
1732 collseq_table_add (&collate
->wcseqorder
, runp
->wcs
[0],
1735 /* Find the point where to insert in the list. */
1736 e
= wchead_table_get (&collate
->wcheads
, runp
->wcs
[0]);
1739 while (*eptr
!= NULL
)
1741 if ((*eptr
)->nwcs
< runp
->nwcs
)
1744 if ((*eptr
)->nwcs
== runp
->nwcs
)
1746 int c
= wmemcmp ((wchar_t *) (*eptr
)->wcs
,
1747 (wchar_t *) runp
->wcs
, runp
->nwcs
);
1751 /* This should not happen. It means that we have
1752 two symbols with the same byte sequence. It is
1753 of course an error. */
1754 WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr
)->file
,
1757 symbol `%s' has the same encoding as"), (*eptr
)->name
);
1758 error_at_line (0, 0, runp
->file
,
1765 /* Insert it here. */
1769 /* To the next entry. */
1771 eptr
= &(*eptr
)->wcnext
;
1774 /* Set the pointers. */
1775 runp
->wcnext
= *eptr
;
1776 runp
->wclast
= lastp
;
1778 (*eptr
)->wclast
= runp
;
1781 wchead_table_add (&collate
->wcheads
, runp
->wcs
[0], e
);
1786 /* Up to the next entry. */
1790 collseq_table_finalize (&collate
->wcseqorder
);
1792 /* Now determine whether the UNDEFINED entry is needed and if yes,
1793 whether it was defined. */
1794 collate
->undefined
.used_in_level
= need_undefined
? ~0ul : 0;
1795 if (collate
->undefined
.file
== NULL
)
1799 /* This seems not to be enforced by recent standards. Don't
1800 emit an error, simply append UNDEFINED at the end. */
1802 WITH_CUR_LOCALE (error (0, 0, _("no definition of `UNDEFINED'")));
1804 /* Add UNDEFINED at the end. */
1805 collate
->undefined
.mborder
=
1806 (int *) obstack_alloc (&collate
->mempool
, nrules
* sizeof (int));
1808 for (i
= 0; i
< nrules
; ++i
)
1809 collate
->undefined
.mborder
[i
] = mbact
[i
]++;
1812 /* In any case we will need the definition for the wide character
1813 case. But we will not complain that it is missing since the
1814 specification strangely enough does not seem to account for
1816 collate
->undefined
.wcorder
= wcact
++;
1819 /* Finally, try to unify the rules for the sections. Whenever the rules
1820 for a section are the same as those for another section give the
1821 ruleset the same index. Since there are never many section we can
1822 use an O(n^2) algorithm here. */
1823 sect
= collate
->sections
;
1824 while (sect
!= NULL
&& sect
->rules
== NULL
)
1827 /* Bail out if we have no sections because of earlier errors. */
1830 WITH_CUR_LOCALE (error (EXIT_FAILURE
, 0,
1831 _("too many errors; giving up")));
1838 struct section_list
*osect
= collate
->sections
;
1840 while (osect
!= sect
)
1841 if (osect
->rules
!= NULL
1842 && memcmp (osect
->rules
, sect
->rules
, nrules
) == 0)
1845 osect
= osect
->next
;
1848 sect
->ruleidx
= ruleidx
++;
1850 sect
->ruleidx
= osect
->ruleidx
;
1855 while (sect
!= NULL
&& sect
->rules
== NULL
);
1857 while (sect
!= NULL
);
1858 /* We are currently not prepared for more than 128 rulesets. But this
1859 should never really be a problem. */
1860 assert (ruleidx
<= 128);
1865 output_weight (struct obstack
*pool
, struct locale_collate_t
*collate
,
1866 struct element_t
*elem
)
1871 /* Optimize the use of UNDEFINED. */
1872 if (elem
== &collate
->undefined
)
1873 /* The weights are already inserted. */
1876 /* This byte can start exactly one collation element and this is
1877 a single byte. We can directly give the index to the weights. */
1878 retval
= obstack_object_size (pool
);
1880 /* Construct the weight. */
1881 for (cnt
= 0; cnt
< nrules
; ++cnt
)
1883 char buf
[elem
->weights
[cnt
].cnt
* 7];
1887 for (i
= 0; i
< elem
->weights
[cnt
].cnt
; ++i
)
1888 /* Encode the weight value. We do nothing for IGNORE entries. */
1889 if (elem
->weights
[cnt
].w
[i
] != NULL
)
1890 len
+= utf8_encode (&buf
[len
],
1891 elem
->weights
[cnt
].w
[i
]->mborder
[cnt
]);
1893 /* And add the buffer content. */
1894 obstack_1grow (pool
, len
);
1895 obstack_grow (pool
, buf
, len
);
1898 return retval
| ((elem
->section
->ruleidx
& 0x7f) << 24);
1903 output_weightwc (struct obstack
*pool
, struct locale_collate_t
*collate
,
1904 struct element_t
*elem
)
1909 /* Optimize the use of UNDEFINED. */
1910 if (elem
== &collate
->undefined
)
1911 /* The weights are already inserted. */
1914 /* This byte can start exactly one collation element and this is
1915 a single byte. We can directly give the index to the weights. */
1916 retval
= obstack_object_size (pool
) / sizeof (int32_t);
1918 /* Construct the weight. */
1919 for (cnt
= 0; cnt
< nrules
; ++cnt
)
1921 int32_t buf
[elem
->weights
[cnt
].cnt
];
1925 for (i
= 0, j
= 0; i
< elem
->weights
[cnt
].cnt
; ++i
)
1926 if (elem
->weights
[cnt
].w
[i
] != NULL
)
1927 buf
[j
++] = elem
->weights
[cnt
].w
[i
]->wcorder
;
1929 /* And add the buffer content. */
1930 obstack_int32_grow (pool
, j
);
1932 obstack_grow (pool
, buf
, j
* sizeof (int32_t));
1935 return retval
| ((elem
->section
->ruleidx
& 0x7f) << 24);
1940 collate_output (struct localedef_t
*locale
, const struct charmap_t
*charmap
,
1941 const char *output_path
)
1943 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1944 const size_t nelems
= _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE
);
1945 struct iovec iov
[2 + nelems
];
1946 struct locale_file data
;
1947 uint32_t idx
[nelems
];
1950 int32_t tablemb
[256];
1951 struct obstack weightpool
;
1952 struct obstack extrapool
;
1953 struct obstack indirectpool
;
1954 struct section_list
*sect
;
1955 struct collidx_table tablewc
;
1957 uint32_t *elem_table
;
1959 struct element_t
*runp
;
1961 data
.magic
= LIMAGIC (LC_COLLATE
);
1963 iov
[0].iov_base
= (void *) &data
;
1964 iov
[0].iov_len
= sizeof (data
);
1966 iov
[1].iov_base
= (void *) idx
;
1967 iov
[1].iov_len
= sizeof (idx
);
1969 idx
[0] = iov
[0].iov_len
+ iov
[1].iov_len
;
1972 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_NRULES
));
1973 iov
[2 + cnt
].iov_base
= &nrules
;
1974 iov
[2 + cnt
].iov_len
= sizeof (uint32_t);
1975 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
1978 /* If we have no LC_COLLATE data emit only the number of rules as zero. */
1979 if (collate
== NULL
)
1983 while (cnt
< _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE
))
1985 /* The words have to be handled specially. */
1986 if (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB
))
1988 iov
[2 + cnt
].iov_base
= &dummy
;
1989 iov
[2 + cnt
].iov_len
= sizeof (int32_t);
1993 iov
[2 + cnt
].iov_base
= NULL
;
1994 iov
[2 + cnt
].iov_len
= 0;
1997 if (cnt
+ 1 < _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE
))
1998 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2002 assert (cnt
== _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE
));
2004 write_locale_data (output_path
, LC_COLLATE
, "LC_COLLATE", 2 + cnt
, iov
);
2009 obstack_init (&weightpool
);
2010 obstack_init (&extrapool
);
2011 obstack_init (&indirectpool
);
2013 /* Since we are using the sign of an integer to mark indirection the
2014 offsets in the arrays we are indirectly referring to must not be
2015 zero since -0 == 0. Therefore we add a bit of dummy content. */
2016 obstack_int32_grow (&extrapool
, 0);
2017 obstack_int32_grow (&indirectpool
, 0);
2019 /* Prepare the ruleset table. */
2020 for (sect
= collate
->sections
, i
= 0; sect
!= NULL
; sect
= sect
->next
)
2021 if (sect
->rules
!= NULL
&& sect
->ruleidx
== i
)
2025 obstack_make_room (&weightpool
, nrules
);
2027 for (j
= 0; j
< nrules
; ++j
)
2028 obstack_1grow_fast (&weightpool
, sect
->rules
[j
]);
2031 /* And align the output. */
2032 i
= (nrules
* i
) % __alignof__ (int32_t);
2035 obstack_1grow (&weightpool
, '\0');
2036 while (++i
< __alignof__ (int32_t));
2038 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_RULESETS
));
2039 iov
[2 + cnt
].iov_len
= obstack_object_size (&weightpool
);
2040 iov
[2 + cnt
].iov_base
= obstack_finish (&weightpool
);
2041 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2044 /* Generate the 8-bit table. Walk through the lists of sequences
2045 starting with the same byte and add them one after the other to
2046 the table. In case we have more than one sequence starting with
2047 the same byte we have to use extra indirection.
2049 First add a record for the NUL byte. This entry will never be used
2050 so it does not matter. */
2053 /* Now insert the `UNDEFINED' value if it is used. Since this value
2054 will probably be used more than once it is good to store the
2055 weights only once. */
2056 if (collate
->undefined
.used_in_level
!= 0)
2057 output_weight (&weightpool
, collate
, &collate
->undefined
);
2059 for (ch
= 1; ch
< 256; ++ch
)
2060 if (collate
->mbheads
[ch
]->mbnext
== NULL
2061 && collate
->mbheads
[ch
]->nmbs
<= 1)
2063 tablemb
[ch
] = output_weight (&weightpool
, collate
,
2064 collate
->mbheads
[ch
]);
2068 /* The entries in the list are sorted by length and then
2069 alphabetically. This is the order in which we will add the
2070 elements to the collation table. This allows simply walking
2071 the table in sequence and stopping at the first matching
2072 entry. Since the longer sequences are coming first in the
2073 list they have the possibility to match first, just as it
2074 has to be. In the worst case we are walking to the end of
2075 the list where we put, if no singlebyte sequence is defined
2076 in the locale definition, the weights for UNDEFINED.
2078 To reduce the length of the search list we compress them a bit.
2079 This happens by collecting sequences of consecutive byte
2080 sequences in one entry (having and begin and end byte sequence)
2081 and add only one index into the weight table. We can find the
2082 consecutive entries since they are also consecutive in the list. */
2083 struct element_t
*runp
= collate
->mbheads
[ch
];
2084 struct element_t
*lastp
;
2086 assert ((obstack_object_size (&extrapool
)
2087 & (__alignof__ (int32_t) - 1)) == 0);
2089 tablemb
[ch
] = -obstack_object_size (&extrapool
);
2093 /* Store the current index in the weight table. We know that
2094 the current position in the `extrapool' is aligned on a
2099 /* Find out wether this is a single entry or we have more than
2100 one consecutive entry. */
2101 if (runp
->mbnext
!= NULL
2102 && runp
->nmbs
== runp
->mbnext
->nmbs
2103 && memcmp (runp
->mbs
, runp
->mbnext
->mbs
, runp
->nmbs
- 1) == 0
2104 && (runp
->mbs
[runp
->nmbs
- 1]
2105 == runp
->mbnext
->mbs
[runp
->nmbs
- 1] + 1))
2108 struct element_t
*series_startp
= runp
;
2109 struct element_t
*curp
;
2111 /* Compute how much space we will need. */
2112 added
= ((sizeof (int32_t) + 1 + 2 * (runp
->nmbs
- 1)
2113 + __alignof__ (int32_t) - 1)
2114 & ~(__alignof__ (int32_t) - 1));
2115 assert ((obstack_object_size (&extrapool
)
2116 & (__alignof__ (int32_t) - 1)) == 0);
2117 obstack_make_room (&extrapool
, added
);
2119 /* More than one consecutive entry. We mark this by having
2120 a negative index into the indirect table. */
2121 obstack_int32_grow_fast (&extrapool
,
2122 -(obstack_object_size (&indirectpool
)
2123 / sizeof (int32_t)));
2125 /* Now search first the end of the series. */
2127 runp
= runp
->mbnext
;
2128 while (runp
->mbnext
!= NULL
2129 && runp
->nmbs
== runp
->mbnext
->nmbs
2130 && memcmp (runp
->mbs
, runp
->mbnext
->mbs
,
2131 runp
->nmbs
- 1) == 0
2132 && (runp
->mbs
[runp
->nmbs
- 1]
2133 == runp
->mbnext
->mbs
[runp
->nmbs
- 1] + 1));
2135 /* Now walk backward from here to the beginning. */
2138 assert (runp
->nmbs
<= 256);
2139 obstack_1grow_fast (&extrapool
, curp
->nmbs
- 1);
2140 for (i
= 1; i
< curp
->nmbs
; ++i
)
2141 obstack_1grow_fast (&extrapool
, curp
->mbs
[i
]);
2143 /* Now find the end of the consecutive sequence and
2144 add all the indeces in the indirect pool. */
2147 weightidx
= output_weight (&weightpool
, collate
, curp
);
2148 obstack_int32_grow (&indirectpool
, weightidx
);
2150 curp
= curp
->mblast
;
2152 while (curp
!= series_startp
);
2154 /* Add the final weight. */
2155 weightidx
= output_weight (&weightpool
, collate
, curp
);
2156 obstack_int32_grow (&indirectpool
, weightidx
);
2158 /* And add the end byte sequence. Without length this
2160 for (i
= 1; i
< curp
->nmbs
; ++i
)
2161 obstack_1grow_fast (&extrapool
, curp
->mbs
[i
]);
2165 /* A single entry. Simply add the index and the length and
2166 string (except for the first character which is already
2170 /* Output the weight info. */
2171 weightidx
= output_weight (&weightpool
, collate
, runp
);
2173 added
= ((sizeof (int32_t) + 1 + runp
->nmbs
- 1
2174 + __alignof__ (int32_t) - 1)
2175 & ~(__alignof__ (int32_t) - 1));
2176 assert ((obstack_object_size (&extrapool
)
2177 & (__alignof__ (int32_t) - 1)) == 0);
2178 obstack_make_room (&extrapool
, added
);
2180 obstack_int32_grow_fast (&extrapool
, weightidx
);
2181 assert (runp
->nmbs
<= 256);
2182 obstack_1grow_fast (&extrapool
, runp
->nmbs
- 1);
2184 for (i
= 1; i
< runp
->nmbs
; ++i
)
2185 obstack_1grow_fast (&extrapool
, runp
->mbs
[i
]);
2188 /* Add alignment bytes if necessary. */
2189 while ((obstack_object_size (&extrapool
)
2190 & (__alignof__ (int32_t) - 1)) != 0)
2191 obstack_1grow_fast (&extrapool
, '\0');
2195 runp
= runp
->mbnext
;
2197 while (runp
!= NULL
);
2199 assert ((obstack_object_size (&extrapool
)
2200 & (__alignof__ (int32_t) - 1)) == 0);
2202 /* If the final entry in the list is not a single character we
2203 add an UNDEFINED entry here. */
2204 if (lastp
->nmbs
!= 1)
2206 int added
= ((sizeof (int32_t) + 1 + 1 + __alignof__ (int32_t) - 1)
2207 & ~(__alignof__ (int32_t) - 1));
2208 obstack_make_room (&extrapool
, added
);
2210 obstack_int32_grow_fast (&extrapool
, 0);
2211 /* XXX What rule? We just pick the first. */
2212 obstack_1grow_fast (&extrapool
, 0);
2213 /* Length is zero. */
2214 obstack_1grow_fast (&extrapool
, 0);
2216 /* Add alignment bytes if necessary. */
2217 while ((obstack_object_size (&extrapool
)
2218 & (__alignof__ (int32_t) - 1)) != 0)
2219 obstack_1grow_fast (&extrapool
, '\0');
2223 /* Add padding to the tables if necessary. */
2224 while ((obstack_object_size (&weightpool
) & (__alignof__ (int32_t) - 1))
2226 obstack_1grow (&weightpool
, 0);
2228 /* Now add the four tables. */
2229 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_TABLEMB
));
2230 iov
[2 + cnt
].iov_base
= tablemb
;
2231 iov
[2 + cnt
].iov_len
= sizeof (tablemb
);
2232 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2233 assert ((iov
[2 + cnt
].iov_len
& (__alignof__ (int32_t) - 1)) == 0);
2236 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_WEIGHTMB
));
2237 iov
[2 + cnt
].iov_len
= obstack_object_size (&weightpool
);
2238 iov
[2 + cnt
].iov_base
= obstack_finish (&weightpool
);
2239 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2242 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_EXTRAMB
));
2243 iov
[2 + cnt
].iov_len
= obstack_object_size (&extrapool
);
2244 iov
[2 + cnt
].iov_base
= obstack_finish (&extrapool
);
2245 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2248 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_INDIRECTMB
));
2249 iov
[2 + cnt
].iov_len
= obstack_object_size (&indirectpool
);
2250 iov
[2 + cnt
].iov_base
= obstack_finish (&indirectpool
);
2251 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2252 assert ((iov
[2 + cnt
].iov_len
& (__alignof__ (int32_t) - 1)) == 0);
2256 /* Now the same for the wide character table. We need to store some
2257 more information here. */
2258 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_GAP1
));
2259 iov
[2 + cnt
].iov_base
= NULL
;
2260 iov
[2 + cnt
].iov_len
= 0;
2261 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2262 assert (idx
[cnt
] % __alignof__ (int32_t) == 0);
2265 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_GAP2
));
2266 iov
[2 + cnt
].iov_base
= NULL
;
2267 iov
[2 + cnt
].iov_len
= 0;
2268 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2269 assert (idx
[cnt
] % __alignof__ (int32_t) == 0);
2272 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_GAP3
));
2273 iov
[2 + cnt
].iov_base
= NULL
;
2274 iov
[2 + cnt
].iov_len
= 0;
2275 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2276 assert (idx
[cnt
] % __alignof__ (int32_t) == 0);
2279 /* Since we are using the sign of an integer to mark indirection the
2280 offsets in the arrays we are indirectly referring to must not be
2281 zero since -0 == 0. Therefore we add a bit of dummy content. */
2282 obstack_int32_grow (&extrapool
, 0);
2283 obstack_int32_grow (&indirectpool
, 0);
2285 /* Now insert the `UNDEFINED' value if it is used. Since this value
2286 will probably be used more than once it is good to store the
2287 weights only once. */
2288 if (output_weightwc (&weightpool
, collate
, &collate
->undefined
) != 0)
2291 /* Generate the table. Walk through the lists of sequences starting
2292 with the same wide character and add them one after the other to
2293 the table. In case we have more than one sequence starting with
2294 the same byte we have to use extra indirection. */
2296 auto void add_to_tablewc (uint32_t ch
, struct element_t
*runp
);
2298 void add_to_tablewc (uint32_t ch
, struct element_t
*runp
)
2300 if (runp
->wcnext
== NULL
&& runp
->nwcs
== 1)
2302 int32_t weigthidx
= output_weightwc (&weightpool
, collate
, runp
);
2303 collidx_table_add (&tablewc
, ch
, weigthidx
);
2307 /* As for the singlebyte table, we recognize sequences and
2309 struct element_t
*lastp
;
2311 collidx_table_add (&tablewc
, ch
,
2312 -(obstack_object_size (&extrapool
) / sizeof (uint32_t)));
2316 /* Store the current index in the weight table. We know that
2317 the current position in the `extrapool' is aligned on a
2322 /* Find out wether this is a single entry or we have more than
2323 one consecutive entry. */
2324 if (runp
->wcnext
!= NULL
2325 && runp
->nwcs
== runp
->wcnext
->nwcs
2326 && wmemcmp ((wchar_t *) runp
->wcs
,
2327 (wchar_t *)runp
->wcnext
->wcs
,
2328 runp
->nwcs
- 1) == 0
2329 && (runp
->wcs
[runp
->nwcs
- 1]
2330 == runp
->wcnext
->wcs
[runp
->nwcs
- 1] + 1))
2333 struct element_t
*series_startp
= runp
;
2334 struct element_t
*curp
;
2336 /* Now add first the initial byte sequence. */
2337 added
= (1 + 1 + 2 * (runp
->nwcs
- 1)) * sizeof (int32_t);
2338 if (sizeof (int32_t) == sizeof (int))
2339 obstack_make_room (&extrapool
, added
);
2341 /* More than one consecutive entry. We mark this by having
2342 a negative index into the indirect table. */
2343 obstack_int32_grow_fast (&extrapool
,
2344 -(obstack_object_size (&indirectpool
)
2345 / sizeof (int32_t)));
2346 obstack_int32_grow_fast (&extrapool
, runp
->nwcs
- 1);
2349 runp
= runp
->wcnext
;
2350 while (runp
->wcnext
!= NULL
2351 && runp
->nwcs
== runp
->wcnext
->nwcs
2352 && wmemcmp ((wchar_t *) runp
->wcs
,
2353 (wchar_t *)runp
->wcnext
->wcs
,
2354 runp
->nwcs
- 1) == 0
2355 && (runp
->wcs
[runp
->nwcs
- 1]
2356 == runp
->wcnext
->wcs
[runp
->nwcs
- 1] + 1));
2358 /* Now walk backward from here to the beginning. */
2361 for (i
= 1; i
< runp
->nwcs
; ++i
)
2362 obstack_int32_grow_fast (&extrapool
, curp
->wcs
[i
]);
2364 /* Now find the end of the consecutive sequence and
2365 add all the indeces in the indirect pool. */
2368 weightidx
= output_weightwc (&weightpool
, collate
,
2370 obstack_int32_grow (&indirectpool
, weightidx
);
2372 curp
= curp
->wclast
;
2374 while (curp
!= series_startp
);
2376 /* Add the final weight. */
2377 weightidx
= output_weightwc (&weightpool
, collate
, curp
);
2378 obstack_int32_grow (&indirectpool
, weightidx
);
2380 /* And add the end byte sequence. Without length this
2382 for (i
= 1; i
< curp
->nwcs
; ++i
)
2383 obstack_int32_grow (&extrapool
, curp
->wcs
[i
]);
2387 /* A single entry. Simply add the index and the length and
2388 string (except for the first character which is already
2392 /* Output the weight info. */
2393 weightidx
= output_weightwc (&weightpool
, collate
, runp
);
2395 added
= (1 + 1 + runp
->nwcs
- 1) * sizeof (int32_t);
2396 if (sizeof (int) == sizeof (int32_t))
2397 obstack_make_room (&extrapool
, added
);
2399 obstack_int32_grow_fast (&extrapool
, weightidx
);
2400 obstack_int32_grow_fast (&extrapool
, runp
->nwcs
- 1);
2401 for (i
= 1; i
< runp
->nwcs
; ++i
)
2402 obstack_int32_grow_fast (&extrapool
, runp
->wcs
[i
]);
2407 runp
= runp
->wcnext
;
2409 while (runp
!= NULL
);
2415 collidx_table_init (&tablewc
);
2417 wchead_table_iterate (&collate
->wcheads
, add_to_tablewc
);
2419 collidx_table_finalize (&tablewc
);
2422 /* Now add the four tables. */
2423 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_TABLEWC
));
2424 iov
[2 + cnt
].iov_base
= tablewc
.result
;
2425 iov
[2 + cnt
].iov_len
= tablewc
.result_size
;
2426 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2427 assert (iov
[2 + cnt
].iov_len
% sizeof (int32_t) == 0);
2428 assert (idx
[cnt
] % __alignof__ (int32_t) == 0);
2431 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_WEIGHTWC
));
2432 iov
[2 + cnt
].iov_len
= obstack_object_size (&weightpool
);
2433 iov
[2 + cnt
].iov_base
= obstack_finish (&weightpool
);
2434 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2435 assert (iov
[2 + cnt
].iov_len
% sizeof (int32_t) == 0);
2436 assert (idx
[cnt
] % __alignof__ (int32_t) == 0);
2439 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_EXTRAWC
));
2440 iov
[2 + cnt
].iov_len
= obstack_object_size (&extrapool
);
2441 iov
[2 + cnt
].iov_base
= obstack_finish (&extrapool
);
2442 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2443 assert (iov
[2 + cnt
].iov_len
% sizeof (int32_t) == 0);
2444 assert (idx
[cnt
] % __alignof__ (int32_t) == 0);
2447 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_INDIRECTWC
));
2448 iov
[2 + cnt
].iov_len
= obstack_object_size (&indirectpool
);
2449 iov
[2 + cnt
].iov_base
= obstack_finish (&indirectpool
);
2450 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2451 assert (iov
[2 + cnt
].iov_len
% sizeof (int32_t) == 0);
2452 assert (idx
[cnt
] % __alignof__ (int32_t) == 0);
2456 /* Finally write the table with collation element names out. It is
2457 a hash table with a simple function which gets the name of the
2458 character as the input. One character might have many names. The
2459 value associated with the name is an index into the weight table
2460 where we are then interested in the first-level weight value.
2462 To determine how large the table should be we are counting the
2463 elements have to put in. Since we are using internal chaining
2464 using a secondary hash function we have to make the table a bit
2465 larger to avoid extremely long search times. We can achieve
2466 good results with a 40% larger table than there are entries. */
2468 runp
= collate
->start
;
2469 while (runp
!= NULL
)
2471 if (runp
->mbs
!= NULL
&& runp
->weights
!= NULL
&& !runp
->is_character
)
2472 /* Yep, the element really counts. */
2477 /* Add 40% and find the next prime number. */
2478 elem_size
= next_prime (elem_size
* 1.4);
2480 /* Allocate the table. Each entry consists of two words: the hash
2481 value and an index in a secondary table which provides the index
2482 into the weight table and the string itself (so that a match can
2484 elem_table
= (uint32_t *) obstack_alloc (&extrapool
,
2485 elem_size
* 2 * sizeof (uint32_t));
2486 memset (elem_table
, '\0', elem_size
* 2 * sizeof (uint32_t));
2488 /* Now add the elements. */
2489 runp
= collate
->start
;
2490 while (runp
!= NULL
)
2492 if (runp
->mbs
!= NULL
&& runp
->weights
!= NULL
&& !runp
->is_character
)
2494 /* Compute the hash value of the name. */
2495 uint32_t namelen
= strlen (runp
->name
);
2496 uint32_t hash
= elem_hash (runp
->name
, namelen
);
2497 size_t idx
= hash
% elem_size
;
2498 size_t start_idx
= idx
;
2500 if (elem_table
[idx
* 2] != 0)
2502 /* The spot is already taken. Try iterating using the value
2503 from the secondary hashing function. */
2504 size_t iter
= hash
% (elem_size
- 2) + 1;
2509 if (idx
>= elem_size
)
2511 assert (idx
!= start_idx
);
2513 while (elem_table
[idx
* 2] != 0);
2515 /* This is the spot where we will insert the value. */
2516 elem_table
[idx
* 2] = hash
;
2517 elem_table
[idx
* 2 + 1] = obstack_object_size (&extrapool
);
2519 /* The the string itself including length. */
2520 obstack_1grow (&extrapool
, namelen
);
2521 obstack_grow (&extrapool
, runp
->name
, namelen
);
2523 /* And the multibyte representation. */
2524 obstack_1grow (&extrapool
, runp
->nmbs
);
2525 obstack_grow (&extrapool
, runp
->mbs
, runp
->nmbs
);
2527 /* And align again to 32 bits. */
2528 if ((1 + namelen
+ 1 + runp
->nmbs
) % sizeof (int32_t) != 0)
2529 obstack_grow (&extrapool
, "\0\0",
2531 - ((1 + namelen
+ 1 + runp
->nmbs
)
2532 % sizeof (int32_t))));
2534 /* Now some 32-bit values: multibyte collation sequence,
2535 wide char string (including length), and wide char
2536 collation sequence. */
2537 obstack_int32_grow (&extrapool
, runp
->mbseqorder
);
2539 obstack_int32_grow (&extrapool
, runp
->nwcs
);
2540 obstack_grow (&extrapool
, runp
->wcs
,
2541 runp
->nwcs
* sizeof (uint32_t));
2543 obstack_int32_grow (&extrapool
, runp
->wcseqorder
);
2549 /* Prepare to write out this data. */
2550 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB
));
2551 iov
[2 + cnt
].iov_base
= &elem_size
;
2552 iov
[2 + cnt
].iov_len
= sizeof (int32_t);
2553 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2554 assert (idx
[cnt
] % __alignof__ (int32_t) == 0);
2557 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_SYMB_TABLEMB
));
2558 iov
[2 + cnt
].iov_base
= elem_table
;
2559 iov
[2 + cnt
].iov_len
= elem_size
* 2 * sizeof (int32_t);
2560 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2561 assert (idx
[cnt
] % __alignof__ (int32_t) == 0);
2564 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_SYMB_EXTRAMB
));
2565 iov
[2 + cnt
].iov_len
= obstack_object_size (&extrapool
);
2566 iov
[2 + cnt
].iov_base
= obstack_finish (&extrapool
);
2567 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2570 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQMB
));
2571 iov
[2 + cnt
].iov_base
= collate
->mbseqorder
;
2572 iov
[2 + cnt
].iov_len
= 256;
2573 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2576 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQWC
));
2577 iov
[2 + cnt
].iov_base
= collate
->wcseqorder
.result
;
2578 iov
[2 + cnt
].iov_len
= collate
->wcseqorder
.result_size
;
2579 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2580 assert (idx
[cnt
] % __alignof__ (int32_t) == 0);
2583 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_CODESET
));
2584 iov
[2 + cnt
].iov_base
= (void *) charmap
->code_set_name
;
2585 iov
[2 + cnt
].iov_len
= strlen (iov
[2 + cnt
].iov_base
) + 1;
2588 assert (cnt
== _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE
));
2590 write_locale_data (output_path
, LC_COLLATE
, "LC_COLLATE", 2 + cnt
, iov
);
2592 obstack_free (&weightpool
, NULL
);
2593 obstack_free (&extrapool
, NULL
);
2594 obstack_free (&indirectpool
, NULL
);
2599 collate_read (struct linereader
*ldfile
, struct localedef_t
*result
,
2600 const struct charmap_t
*charmap
, const char *repertoire_name
,
2603 struct repertoire_t
*repertoire
= NULL
;
2604 struct locale_collate_t
*collate
;
2606 struct token
*arg
= NULL
;
2607 enum token_t nowtok
;
2608 enum token_t was_ellipsis
= tok_none
;
2609 struct localedef_t
*copy_locale
= NULL
;
2612 1 - between `order-start' and `order-end'
2613 2 - after `order-end'
2614 3 - after `reorder-after', waiting for `reorder-end'
2615 4 - after `reorder-end'
2616 5 - after `reorder-sections-after', waiting for `reorder-sections-end'
2617 6 - after `reorder-sections-end'
2621 /* Get the repertoire we have to use. */
2622 if (repertoire_name
!= NULL
)
2623 repertoire
= repertoire_read (repertoire_name
);
2625 /* The rest of the line containing `LC_COLLATE' must be free. */
2626 lr_ignore_rest (ldfile
, 1);
2630 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2633 while (nowtok
== tok_eol
);
2635 if (nowtok
== tok_copy
)
2638 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2639 if (now
->tok
!= tok_string
)
2641 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2645 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2646 while (now
->tok
!= tok_eof
&& now
->tok
!= tok_end
);
2648 if (now
->tok
!= tok_eof
2649 || (now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
),
2650 now
->tok
== tok_eof
))
2651 lr_error (ldfile
, _("%s: premature end of file"), "LC_COLLATE");
2652 else if (now
->tok
!= tok_lc_collate
)
2654 lr_error (ldfile
, _("\
2655 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
2656 lr_ignore_rest (ldfile
, 0);
2659 lr_ignore_rest (ldfile
, 1);
2664 if (! ignore_content
)
2666 /* Get the locale definition. */
2667 copy_locale
= load_locale (LC_COLLATE
, now
->val
.str
.startmb
,
2668 repertoire_name
, charmap
, NULL
);
2669 if ((copy_locale
->avail
& COLLATE_LOCALE
) == 0)
2671 /* Not yet loaded. So do it now. */
2672 if (locfile_read (copy_locale
, charmap
) != 0)
2676 if (copy_locale
->categories
[LC_COLLATE
].collate
== NULL
)
2680 lr_ignore_rest (ldfile
, 1);
2682 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2686 /* Prepare the data structures. */
2687 collate_startup (ldfile
, result
, copy_locale
, ignore_content
);
2688 collate
= result
->categories
[LC_COLLATE
].collate
;
2696 /* Of course we don't proceed beyond the end of file. */
2697 if (nowtok
== tok_eof
)
2700 /* Ingore empty lines. */
2701 if (nowtok
== tok_eol
)
2703 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2711 /* Allow copying other locales. */
2712 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2713 if (now
->tok
!= tok_string
)
2716 if (! ignore_content
)
2717 load_locale (LC_COLLATE
, now
->val
.str
.startmb
, repertoire_name
,
2720 lr_ignore_rest (ldfile
, 1);
2723 case tok_coll_weight_max
:
2724 /* Ignore the rest of the line if we don't need the input of
2728 lr_ignore_rest (ldfile
, 0);
2735 arg
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2736 if (arg
->tok
!= tok_number
)
2738 if (collate
->col_weight_max
!= -1)
2739 lr_error (ldfile
, _("%s: duplicate definition of `%s'"),
2740 "LC_COLLATE", "col_weight_max");
2742 collate
->col_weight_max
= arg
->val
.num
;
2743 lr_ignore_rest (ldfile
, 1);
2746 case tok_section_symbol
:
2747 /* Ignore the rest of the line if we don't need the input of
2751 lr_ignore_rest (ldfile
, 0);
2758 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2759 if (arg
->tok
!= tok_bsymbol
)
2761 else if (!ignore_content
)
2763 /* Check whether this section is already known. */
2764 struct section_list
*known
= collate
->sections
;
2765 while (known
!= NULL
)
2767 if (strcmp (known
->name
, arg
->val
.str
.startmb
) == 0)
2769 known
= known
->next
;
2775 _("%s: duplicate declaration of section `%s'"),
2776 "LC_COLLATE", arg
->val
.str
.startmb
);
2777 free (arg
->val
.str
.startmb
);
2780 collate
->sections
= make_seclist_elem (collate
,
2781 arg
->val
.str
.startmb
,
2784 lr_ignore_rest (ldfile
, known
== NULL
);
2788 free (arg
->val
.str
.startmb
);
2789 lr_ignore_rest (ldfile
, 0);
2793 case tok_collating_element
:
2794 /* Ignore the rest of the line if we don't need the input of
2798 lr_ignore_rest (ldfile
, 0);
2802 if (state
!= 0 && state
!= 2)
2805 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2806 if (arg
->tok
!= tok_bsymbol
)
2810 const char *symbol
= arg
->val
.str
.startmb
;
2811 size_t symbol_len
= arg
->val
.str
.lenmb
;
2813 /* Next the `from' keyword. */
2814 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2815 if (arg
->tok
!= tok_from
)
2817 free ((char *) symbol
);
2821 ldfile
->return_widestr
= 1;
2822 ldfile
->translate_strings
= 1;
2824 /* Finally the string with the replacement. */
2825 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2827 ldfile
->return_widestr
= 0;
2828 ldfile
->translate_strings
= 0;
2830 if (arg
->tok
!= tok_string
)
2833 if (!ignore_content
&& symbol
!= NULL
)
2835 /* The name is already defined. */
2836 if (check_duplicate (ldfile
, collate
, charmap
,
2837 repertoire
, symbol
, symbol_len
))
2840 if (arg
->val
.str
.startmb
!= NULL
)
2841 insert_entry (&collate
->elem_table
, symbol
, symbol_len
,
2842 new_element (collate
,
2843 arg
->val
.str
.startmb
,
2844 arg
->val
.str
.lenmb
- 1,
2845 arg
->val
.str
.startwc
,
2846 symbol
, symbol_len
, 0));
2852 free ((char *) symbol
);
2853 if (arg
->val
.str
.startmb
!= NULL
)
2854 free (arg
->val
.str
.startmb
);
2855 if (arg
->val
.str
.startwc
!= NULL
)
2856 free (arg
->val
.str
.startwc
);
2858 lr_ignore_rest (ldfile
, 1);
2862 case tok_collating_symbol
:
2863 /* Ignore the rest of the line if we don't need the input of
2867 lr_ignore_rest (ldfile
, 0);
2871 if (state
!= 0 && state
!= 2)
2874 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2875 if (arg
->tok
!= tok_bsymbol
)
2879 char *symbol
= arg
->val
.str
.startmb
;
2880 size_t symbol_len
= arg
->val
.str
.lenmb
;
2881 char *endsymbol
= NULL
;
2882 size_t endsymbol_len
= 0;
2883 enum token_t ellipsis
= tok_none
;
2885 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2886 if (arg
->tok
== tok_ellipsis2
|| arg
->tok
== tok_ellipsis4
)
2888 ellipsis
= arg
->tok
;
2890 arg
= lr_token (ldfile
, charmap
, result
, repertoire
,
2892 if (arg
->tok
!= tok_bsymbol
)
2898 endsymbol
= arg
->val
.str
.startmb
;
2899 endsymbol_len
= arg
->val
.str
.lenmb
;
2901 lr_ignore_rest (ldfile
, 1);
2903 else if (arg
->tok
!= tok_eol
)
2909 if (!ignore_content
)
2912 || (ellipsis
!= tok_none
&& endsymbol
== NULL
))
2914 lr_error (ldfile
, _("\
2915 %s: unknown character in collating symbol name"),
2919 else if (ellipsis
== tok_none
)
2921 /* A single symbol, no ellipsis. */
2922 if (check_duplicate (ldfile
, collate
, charmap
,
2923 repertoire
, symbol
, symbol_len
))
2924 /* The name is already defined. */
2927 insert_entry (&collate
->sym_table
, symbol
, symbol_len
,
2928 new_symbol (collate
, symbol
, symbol_len
));
2930 else if (symbol_len
!= endsymbol_len
)
2934 _("invalid names for character range"));
2939 /* Oh my, we have to handle an ellipsis. First, as
2940 usual, determine the common prefix and then
2941 convert the rest into a range. */
2943 unsigned long int from
;
2944 unsigned long int to
;
2947 for (prefixlen
= 0; prefixlen
< symbol_len
; ++prefixlen
)
2948 if (symbol
[prefixlen
] != endsymbol
[prefixlen
])
2951 /* Convert the rest into numbers. */
2952 symbol
[symbol_len
] = '\0';
2953 from
= strtoul (&symbol
[prefixlen
], &endp
,
2954 ellipsis
== tok_ellipsis2
? 16 : 10);
2956 goto col_sym_inv_range
;
2958 endsymbol
[symbol_len
] = '\0';
2959 to
= strtoul (&endsymbol
[prefixlen
], &endp
,
2960 ellipsis
== tok_ellipsis2
? 16 : 10);
2962 goto col_sym_inv_range
;
2965 goto col_sym_inv_range
;
2967 /* Now loop over all entries. */
2972 symbuf
= (char *) obstack_alloc (&collate
->mempool
,
2975 /* Create the name. */
2977 ellipsis
== tok_ellipsis2
2978 ? "%.*s%.*lX" : "%.*s%.*lu",
2979 (int) prefixlen
, symbol
,
2980 (int) (symbol_len
- prefixlen
), from
);
2982 if (check_duplicate (ldfile
, collate
, charmap
,
2983 repertoire
, symbuf
, symbol_len
))
2984 /* The name is already defined. */
2987 insert_entry (&collate
->sym_table
, symbuf
,
2989 new_symbol (collate
, symbuf
,
2992 /* Increment the counter. */
3004 if (endsymbol
!= NULL
)
3010 case tok_symbol_equivalence
:
3011 /* Ignore the rest of the line if we don't need the input of
3015 lr_ignore_rest (ldfile
, 0);
3022 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
3023 if (arg
->tok
!= tok_bsymbol
)
3027 const char *newname
= arg
->val
.str
.startmb
;
3028 size_t newname_len
= arg
->val
.str
.lenmb
;
3029 const char *symname
;
3031 void *symval
; /* Actually struct symbol_t* */
3033 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
3034 if (arg
->tok
!= tok_bsymbol
)
3036 if (newname
!= NULL
)
3037 free ((char *) newname
);
3041 symname
= arg
->val
.str
.startmb
;
3042 symname_len
= arg
->val
.str
.lenmb
;
3044 if (newname
== NULL
)
3046 lr_error (ldfile
, _("\
3047 %s: unknown character in equivalent definition name"),
3051 if (newname
!= NULL
)
3052 free ((char *) newname
);
3053 if (symname
!= NULL
)
3054 free ((char *) symname
);
3057 if (symname
== NULL
)
3059 lr_error (ldfile
, _("\
3060 %s: unknown character in equivalent definition value"),
3062 goto sym_equiv_free
;
3065 /* See whether the symbol name is already defined. */
3066 if (find_entry (&collate
->sym_table
, symname
, symname_len
,
3069 lr_error (ldfile
, _("\
3070 %s: unknown symbol `%s' in equivalent definition"),
3071 "LC_COLLATE", symname
);
3072 goto sym_equiv_free
;
3075 if (insert_entry (&collate
->sym_table
,
3076 newname
, newname_len
, symval
) < 0)
3078 lr_error (ldfile
, _("\
3079 error while adding equivalent collating symbol"));
3080 goto sym_equiv_free
;
3083 free ((char *) symname
);
3085 lr_ignore_rest (ldfile
, 1);
3089 /* We get told about the scripts we know. */
3090 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
3091 if (arg
->tok
!= tok_bsymbol
)
3095 struct section_list
*runp
= collate
->known_sections
;
3098 while (runp
!= NULL
)
3099 if (strncmp (runp
->name
, arg
->val
.str
.startmb
,
3100 arg
->val
.str
.lenmb
) == 0
3101 && runp
->name
[arg
->val
.str
.lenmb
] == '\0')
3104 runp
= runp
->def_next
;
3108 lr_error (ldfile
, _("duplicate definition of script `%s'"),
3110 lr_ignore_rest (ldfile
, 0);
3114 runp
= (struct section_list
*) xcalloc (1, sizeof (*runp
));
3115 name
= (char *) xmalloc (arg
->val
.str
.lenmb
+ 1);
3116 memcpy (name
, arg
->val
.str
.startmb
, arg
->val
.str
.lenmb
);
3117 name
[arg
->val
.str
.lenmb
] = '\0';
3120 runp
->def_next
= collate
->known_sections
;
3121 collate
->known_sections
= runp
;
3123 lr_ignore_rest (ldfile
, 1);
3126 case tok_order_start
:
3127 /* Ignore the rest of the line if we don't need the input of
3131 lr_ignore_rest (ldfile
, 0);
3135 if (state
!= 0 && state
!= 1 && state
!= 2)
3139 /* The 14652 draft does not specify whether all `order_start' lines
3140 must contain the same number of sort-rules, but 14651 does. So
3141 we require this here as well. */
3142 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
3143 if (arg
->tok
== tok_bsymbol
)
3145 /* This better should be a section name. */
3146 struct section_list
*sp
= collate
->known_sections
;
3148 && (sp
->name
== NULL
3149 || strncmp (sp
->name
, arg
->val
.str
.startmb
,
3150 arg
->val
.str
.lenmb
) != 0
3151 || sp
->name
[arg
->val
.str
.lenmb
] != '\0'))
3156 lr_error (ldfile
, _("\
3157 %s: unknown section name `%.*s'"),
3158 "LC_COLLATE", (int) arg
->val
.str
.lenmb
,
3159 arg
->val
.str
.startmb
);
3160 /* We use the error section. */
3161 collate
->current_section
= &collate
->error_section
;
3163 if (collate
->error_section
.first
== NULL
)
3165 /* Insert &collate->error_section at the end of
3166 the collate->sections list. */
3167 if (collate
->sections
== NULL
)
3168 collate
->sections
= &collate
->error_section
;
3171 sp
= collate
->sections
;
3172 while (sp
->next
!= NULL
)
3175 sp
->next
= &collate
->error_section
;
3177 collate
->error_section
.next
= NULL
;
3182 /* One should not be allowed to open the same
3184 if (sp
->first
!= NULL
)
3185 lr_error (ldfile
, _("\
3186 %s: multiple order definitions for section `%s'"),
3187 "LC_COLLATE", sp
->name
);
3190 /* Insert sp in the collate->sections list,
3191 right after collate->current_section. */
3192 if (collate
->current_section
== NULL
)
3193 collate
->current_section
= sp
;
3196 sp
->next
= collate
->current_section
->next
;
3197 collate
->current_section
->next
= sp
;
3201 /* Next should come the end of the line or a semicolon. */
3202 arg
= lr_token (ldfile
, charmap
, result
, repertoire
,
3204 if (arg
->tok
== tok_eol
)
3208 /* This means we have exactly one rule: `forward'. */
3210 lr_error (ldfile
, _("\
3211 %s: invalid number of sorting rules"),
3215 sp
->rules
= obstack_alloc (&collate
->mempool
,
3216 (sizeof (enum coll_sort_rule
)
3218 for (cnt
= 0; cnt
< nrules
; ++cnt
)
3219 sp
->rules
[cnt
] = sort_forward
;
3225 /* Get the next token. */
3226 arg
= lr_token (ldfile
, charmap
, result
, repertoire
,
3232 /* There is no section symbol. Therefore we use the unnamed
3234 collate
->current_section
= &collate
->unnamed_section
;
3236 if (collate
->unnamed_section
.first
!= NULL
)
3237 lr_error (ldfile
, _("\
3238 %s: multiple order definitions for unnamed section"),
3242 /* Insert &collate->unnamed_section at the beginning of
3243 the collate->sections list. */
3244 collate
->unnamed_section
.next
= collate
->sections
;
3245 collate
->sections
= &collate
->unnamed_section
;
3249 /* Now read the direction names. */
3250 read_directions (ldfile
, arg
, charmap
, repertoire
, result
);
3252 /* From now we need the strings untranslated. */
3253 ldfile
->translate_strings
= 0;
3257 /* Ignore the rest of the line if we don't need the input of
3261 lr_ignore_rest (ldfile
, 0);
3268 /* Handle ellipsis at end of list. */
3269 if (was_ellipsis
!= tok_none
)
3271 handle_ellipsis (ldfile
, NULL
, 0, was_ellipsis
, charmap
,
3272 repertoire
, result
);
3273 was_ellipsis
= tok_none
;
3277 lr_ignore_rest (ldfile
, 1);
3280 case tok_reorder_after
:
3281 /* Ignore the rest of the line if we don't need the input of
3285 lr_ignore_rest (ldfile
, 0);
3291 lr_error (ldfile
, _("%s: missing `order_end' keyword"),
3295 /* Handle ellipsis at end of list. */
3296 if (was_ellipsis
!= tok_none
)
3298 handle_ellipsis (ldfile
, arg
->val
.str
.startmb
,
3299 arg
->val
.str
.lenmb
, was_ellipsis
, charmap
,
3300 repertoire
, result
);
3301 was_ellipsis
= tok_none
;
3304 else if (state
!= 2 && state
!= 3)
3308 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
3309 if (arg
->tok
== tok_bsymbol
|| arg
->tok
== tok_ucs4
)
3311 /* Find this symbol in the sequence table. */
3315 struct element_t
*insp
;
3319 if (arg
->tok
== tok_bsymbol
)
3321 startmb
= arg
->val
.str
.startmb
;
3322 lenmb
= arg
->val
.str
.lenmb
;
3326 sprintf (ucsbuf
, "U%08X", arg
->val
.ucs4
);
3331 if (find_entry (&collate
->seq_table
, startmb
, lenmb
, &ptr
) == 0)
3332 /* Yes, the symbol exists. Simply point the cursor
3334 collate
->cursor
= (struct element_t
*) ptr
;
3337 struct symbol_t
*symbp
;
3340 if (find_entry (&collate
->sym_table
, startmb
, lenmb
,
3345 if (symbp
->order
->last
!= NULL
3346 || symbp
->order
->next
!= NULL
)
3347 collate
->cursor
= symbp
->order
;
3350 /* This is a collating symbol but its position
3351 is not yet defined. */
3352 lr_error (ldfile
, _("\
3353 %s: order for collating symbol %.*s not yet defined"),
3354 "LC_COLLATE", (int) lenmb
, startmb
);
3355 collate
->cursor
= NULL
;
3359 else if (find_entry (&collate
->elem_table
, startmb
, lenmb
,
3362 insp
= (struct element_t
*) ptr
;
3364 if (insp
->last
!= NULL
|| insp
->next
!= NULL
)
3365 collate
->cursor
= insp
;
3368 /* This is a collating element but its position
3369 is not yet defined. */
3370 lr_error (ldfile
, _("\
3371 %s: order for collating element %.*s not yet defined"),
3372 "LC_COLLATE", (int) lenmb
, startmb
);
3373 collate
->cursor
= NULL
;
3379 /* This is bad. The symbol after which we have to
3380 insert does not exist. */
3381 lr_error (ldfile
, _("\
3382 %s: cannot reorder after %.*s: symbol not known"),
3383 "LC_COLLATE", (int) lenmb
, startmb
);
3384 collate
->cursor
= NULL
;
3389 lr_ignore_rest (ldfile
, no_error
);
3392 /* This must not happen. */
3396 case tok_reorder_end
:
3397 /* Ignore the rest of the line if we don't need the input of
3405 lr_ignore_rest (ldfile
, 1);
3408 case tok_reorder_sections_after
:
3409 /* Ignore the rest of the line if we don't need the input of
3413 lr_ignore_rest (ldfile
, 0);
3419 lr_error (ldfile
, _("%s: missing `order_end' keyword"),
3423 /* Handle ellipsis at end of list. */
3424 if (was_ellipsis
!= tok_none
)
3426 handle_ellipsis (ldfile
, NULL
, 0, was_ellipsis
, charmap
,
3427 repertoire
, result
);
3428 was_ellipsis
= tok_none
;
3431 else if (state
== 3)
3433 WITH_CUR_LOCALE (error (0, 0, _("\
3434 %s: missing `reorder-end' keyword"), "LC_COLLATE"));
3437 else if (state
!= 2 && state
!= 4)
3441 /* Get the name of the sections we are adding after. */
3442 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
3443 if (arg
->tok
== tok_bsymbol
)
3445 /* Now find a section with this name. */
3446 struct section_list
*runp
= collate
->sections
;
3448 while (runp
!= NULL
)
3450 if (runp
->name
!= NULL
3451 && strlen (runp
->name
) == arg
->val
.str
.lenmb
3452 && memcmp (runp
->name
, arg
->val
.str
.startmb
,
3453 arg
->val
.str
.lenmb
) == 0)
3460 collate
->current_section
= runp
;
3463 /* This is bad. The section after which we have to
3464 reorder does not exist. Therefore we cannot
3465 process the whole rest of this reorder
3467 lr_error (ldfile
, _("%s: section `%.*s' not known"),
3468 "LC_COLLATE", (int) arg
->val
.str
.lenmb
,
3469 arg
->val
.str
.startmb
);
3473 lr_ignore_rest (ldfile
, 0);
3475 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
3477 while (now
->tok
== tok_reorder_sections_after
3478 || now
->tok
== tok_reorder_sections_end
3479 || now
->tok
== tok_end
);
3481 /* Process the token we just saw. */
3487 /* This must not happen. */
3491 case tok_reorder_sections_end
:
3492 /* Ignore the rest of the line if we don't need the input of
3500 lr_ignore_rest (ldfile
, 1);
3505 /* Ignore the rest of the line if we don't need the input of
3509 lr_ignore_rest (ldfile
, 0);
3513 if (state
!= 0 && state
!= 1 && state
!= 3 && state
!= 5)
3516 if ((state
== 0 || state
== 5) && nowtok
== tok_ucs4
)
3519 if (nowtok
== tok_ucs4
)
3521 snprintf (ucs4buf
, sizeof (ucs4buf
), "U%08X", now
->val
.ucs4
);
3525 else if (arg
!= NULL
)
3527 symstr
= arg
->val
.str
.startmb
;
3528 symlen
= arg
->val
.str
.lenmb
;
3532 lr_error (ldfile
, _("%s: bad symbol <%.*s>"), "LC_COLLATE",
3533 (int) ldfile
->token
.val
.str
.lenmb
,
3534 ldfile
->token
.val
.str
.startmb
);
3538 struct element_t
*seqp
;
3541 /* We are outside an `order_start' region. This means
3542 we must only accept definitions of values for
3543 collation symbols since these are purely abstract
3544 values and don't need directions associated. */
3547 if (find_entry (&collate
->seq_table
, symstr
, symlen
, &ptr
) == 0)
3551 /* It's already defined. First check whether this
3552 is really a collating symbol. */
3553 if (seqp
->is_character
)
3562 if (find_entry (&collate
->sym_table
, symstr
, symlen
,
3564 /* No collating symbol, it's an error. */
3567 /* Maybe this is the first time we define a symbol
3568 value and it is before the first actual section. */
3569 if (collate
->sections
== NULL
)
3570 collate
->sections
= collate
->current_section
=
3571 &collate
->symbol_section
;
3574 if (was_ellipsis
!= tok_none
)
3576 handle_ellipsis (ldfile
, symstr
, symlen
, was_ellipsis
,
3577 charmap
, repertoire
, result
);
3579 /* Remember that we processed the ellipsis. */
3580 was_ellipsis
= tok_none
;
3582 /* And don't add the value a second time. */
3586 else if (state
== 3)
3588 /* It is possible that we already have this collation sequence.
3589 In this case we move the entry. */
3593 /* If the symbol after which we have to insert was not found
3594 ignore all entries. */
3595 if (collate
->cursor
== NULL
)
3597 lr_ignore_rest (ldfile
, 0);
3601 if (find_entry (&collate
->seq_table
, symstr
, symlen
, &ptr
) == 0)
3603 seqp
= (struct element_t
*) ptr
;
3607 if (find_entry (&collate
->sym_table
, symstr
, symlen
, &sym
) == 0
3608 && (seqp
= ((struct symbol_t
*) sym
)->order
) != NULL
)
3611 if (find_entry (&collate
->elem_table
, symstr
, symlen
, &ptr
) == 0
3612 && (seqp
= (struct element_t
*) ptr
,
3613 seqp
->last
!= NULL
|| seqp
->next
!= NULL
3614 || (collate
->start
!= NULL
&& seqp
== collate
->start
)))
3617 /* Remove the entry from the old position. */
3618 if (seqp
->last
== NULL
)
3619 collate
->start
= seqp
->next
;
3621 seqp
->last
->next
= seqp
->next
;
3622 if (seqp
->next
!= NULL
)
3623 seqp
->next
->last
= seqp
->last
;
3625 /* We also have to check whether this entry is the
3626 first or last of a section. */
3627 if (seqp
->section
->first
== seqp
)
3629 if (seqp
->section
->first
== seqp
->section
->last
)
3630 /* This section has no content anymore. */
3631 seqp
->section
->first
= seqp
->section
->last
= NULL
;
3633 seqp
->section
->first
= seqp
->next
;
3635 else if (seqp
->section
->last
== seqp
)
3636 seqp
->section
->last
= seqp
->last
;
3638 /* Now insert it in the new place. */
3639 insert_weights (ldfile
, seqp
, charmap
, repertoire
, result
,
3644 /* Otherwise we just add a new entry. */
3646 else if (state
== 5)
3648 /* We are reordering sections. Find the named section. */
3649 struct section_list
*runp
= collate
->sections
;
3650 struct section_list
*prevp
= NULL
;
3652 while (runp
!= NULL
)
3654 if (runp
->name
!= NULL
3655 && strlen (runp
->name
) == symlen
3656 && memcmp (runp
->name
, symstr
, symlen
) == 0)
3665 lr_error (ldfile
, _("%s: section `%.*s' not known"),
3666 "LC_COLLATE", (int) symlen
, symstr
);
3667 lr_ignore_rest (ldfile
, 0);
3671 if (runp
!= collate
->current_section
)
3673 /* Remove the named section from the old place and
3674 insert it in the new one. */
3675 prevp
->next
= runp
->next
;
3677 runp
->next
= collate
->current_section
->next
;
3678 collate
->current_section
->next
= runp
;
3679 collate
->current_section
= runp
;
3682 /* Process the rest of the line which might change
3683 the collation rules. */
3684 arg
= lr_token (ldfile
, charmap
, result
, repertoire
,
3686 if (arg
->tok
!= tok_eof
&& arg
->tok
!= tok_eol
)
3687 read_directions (ldfile
, arg
, charmap
, repertoire
,
3692 else if (was_ellipsis
!= tok_none
)
3694 /* Using the information in the `ellipsis_weight'
3695 element and this and the last value we have to handle
3696 the ellipsis now. */
3697 assert (state
== 1);
3699 handle_ellipsis (ldfile
, symstr
, symlen
, was_ellipsis
, charmap
,
3700 repertoire
, result
);
3702 /* Remember that we processed the ellipsis. */
3703 was_ellipsis
= tok_none
;
3705 /* And don't add the value a second time. */
3709 /* Now insert in the new place. */
3710 insert_value (ldfile
, symstr
, symlen
, charmap
, repertoire
, result
);
3714 /* Ignore the rest of the line if we don't need the input of
3718 lr_ignore_rest (ldfile
, 0);
3725 if (was_ellipsis
!= tok_none
)
3728 _("%s: cannot have `%s' as end of ellipsis range"),
3729 "LC_COLLATE", "UNDEFINED");
3731 unlink_element (collate
);
3732 was_ellipsis
= tok_none
;
3735 /* See whether UNDEFINED already appeared somewhere. */
3736 if (collate
->undefined
.next
!= NULL
3737 || &collate
->undefined
== collate
->cursor
)
3740 _("%s: order for `%.*s' already defined at %s:%Zu"),
3741 "LC_COLLATE", 9, "UNDEFINED",
3742 collate
->undefined
.file
,
3743 collate
->undefined
.line
);
3744 lr_ignore_rest (ldfile
, 0);
3747 /* Parse the weights. */
3748 insert_weights (ldfile
, &collate
->undefined
, charmap
,
3749 repertoire
, result
, tok_none
);
3752 case tok_ellipsis2
: /* symbolic hexadecimal ellipsis */
3753 case tok_ellipsis3
: /* absolute ellipsis */
3754 case tok_ellipsis4
: /* symbolic decimal ellipsis */
3755 /* This is the symbolic (decimal or hexadecimal) or absolute
3757 if (was_ellipsis
!= tok_none
)
3760 if (state
!= 0 && state
!= 1 && state
!= 3)
3763 was_ellipsis
= nowtok
;
3765 insert_weights (ldfile
, &collate
->ellipsis_weight
, charmap
,
3766 repertoire
, result
, nowtok
);
3770 /* Next we assume `LC_COLLATE'. */
3771 if (!ignore_content
)
3774 /* We must either see a copy statement or have
3777 _("%s: empty category description not allowed"),
3779 else if (state
== 1)
3781 lr_error (ldfile
, _("%s: missing `order_end' keyword"),
3784 /* Handle ellipsis at end of list. */
3785 if (was_ellipsis
!= tok_none
)
3787 handle_ellipsis (ldfile
, NULL
, 0, was_ellipsis
, charmap
,
3788 repertoire
, result
);
3789 was_ellipsis
= tok_none
;
3792 else if (state
== 3)
3793 WITH_CUR_LOCALE (error (0, 0, _("\
3794 %s: missing `reorder-end' keyword"), "LC_COLLATE"));
3795 else if (state
== 5)
3796 WITH_CUR_LOCALE (error (0, 0, _("\
3797 %s: missing `reorder-sections-end' keyword"), "LC_COLLATE"));
3799 arg
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
3800 if (arg
->tok
== tok_eof
)
3802 if (arg
->tok
== tok_eol
)
3803 lr_error (ldfile
, _("%s: incomplete `END' line"), "LC_COLLATE");
3804 else if (arg
->tok
!= tok_lc_collate
)
3805 lr_error (ldfile
, _("\
3806 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
3807 lr_ignore_rest (ldfile
, arg
->tok
== tok_lc_collate
);
3812 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
3815 /* Prepare for the next round. */
3816 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
3820 /* When we come here we reached the end of the file. */
3821 lr_error (ldfile
, _("%s: premature end of file"), "LC_COLLATE");