1 /* Copyright (C) 1995-2003, 2005, 2006, 2007 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
27 #include <sys/param.h>
29 #include "localedef.h"
31 #include "localeinfo.h"
32 #include "linereader.h"
34 #include "elem-hash.h"
36 /* Uncomment the following line in the production version. */
37 /* #define NDEBUG 1 */
40 #define obstack_chunk_alloc malloc
41 #define obstack_chunk_free free
44 __attribute ((always_inline
))
45 obstack_int32_grow (struct obstack
*obstack
, int32_t data
)
47 if (sizeof (int32_t) == sizeof (int))
48 obstack_int_grow (obstack
, data
);
50 obstack_grow (obstack
, &data
, sizeof (int32_t));
54 __attribute ((always_inline
))
55 obstack_int32_grow_fast (struct obstack
*obstack
, int32_t data
)
57 if (sizeof (int32_t) == sizeof (int))
58 obstack_int_grow_fast (obstack
, data
);
60 obstack_grow (obstack
, &data
, sizeof (int32_t));
63 /* Forward declaration. */
66 /* Data type for list of strings. */
69 /* Successor in the known_sections list. */
70 struct section_list
*def_next
;
71 /* Successor in the sections list. */
72 struct section_list
*next
;
73 /* Name of the section. */
75 /* First element of this section. */
76 struct element_t
*first
;
77 /* Last element of this section. */
78 struct element_t
*last
;
79 /* These are the rules for this section. */
80 enum coll_sort_rule
*rules
;
81 /* Index of the rule set in the appropriate section of the output file. */
89 /* Number of elements. */
95 /* Data type for collating element. */
107 /* The following is a bit mask which bits are set if this element is
108 used in the appropriate level. Interesting for the singlebyte
111 XXX The type here restricts the number of levels to 32. It could
112 be changed if necessary but I doubt this is necessary. */
113 unsigned int used_in_level
;
115 struct element_list_t
*weights
;
117 /* Nonzero if this is a real character definition. */
120 /* Order of the character in the sequence. This information will
121 be used in range expressions. */
125 /* Where does the definition come from. */
129 /* Which section does this belong to. */
130 struct section_list
*section
;
132 /* Predecessor and successor in the order list. */
133 struct element_t
*last
;
134 struct element_t
*next
;
136 /* Next element in multibyte output list. */
137 struct element_t
*mbnext
;
138 struct element_t
*mblast
;
140 /* Next element in wide character output list. */
141 struct element_t
*wcnext
;
142 struct element_t
*wclast
;
145 /* Special element value. */
146 #define ELEMENT_ELLIPSIS2 ((struct element_t *) 1)
147 #define ELEMENT_ELLIPSIS3 ((struct element_t *) 2)
148 #define ELEMENT_ELLIPSIS4 ((struct element_t *) 3)
150 /* Data type for collating symbol. */
155 /* Point to place in the order list. */
156 struct element_t
*order
;
158 /* Where does the definition come from. */
163 /* Sparse table of struct element_t *. */
164 #define TABLE wchead_table
165 #define ELEMENT struct element_t *
171 /* Sparse table of int32_t. */
172 #define TABLE collidx_table
173 #define ELEMENT int32_t
177 /* Sparse table of uint32_t. */
178 #define TABLE collseq_table
179 #define ELEMENT uint32_t
180 #define DEFAULT ~((uint32_t) 0)
184 /* The real definition of the struct for the LC_COLLATE locale. */
185 struct locale_collate_t
190 /* List of known scripts. */
191 struct section_list
*known_sections
;
192 /* List of used sections. */
193 struct section_list
*sections
;
194 /* Current section using definition. */
195 struct section_list
*current_section
;
196 /* There always can be an unnamed section. */
197 struct section_list unnamed_section
;
198 /* To make handling of errors easier we have another section. */
199 struct section_list error_section
;
200 /* Sometimes we are defining the values for collating symbols before
201 the first actual section. */
202 struct section_list symbol_section
;
204 /* Start of the order list. */
205 struct element_t
*start
;
207 /* The undefined element. */
208 struct element_t undefined
;
210 /* This is the cursor for `reorder_after' insertions. */
211 struct element_t
*cursor
;
213 /* This value is used when handling ellipsis. */
214 struct element_t ellipsis_weight
;
216 /* Known collating elements. */
217 hash_table elem_table
;
219 /* Known collating symbols. */
220 hash_table sym_table
;
222 /* Known collation sequences. */
223 hash_table seq_table
;
225 struct obstack mempool
;
227 /* The LC_COLLATE category is a bit special as it is sometimes possible
228 that the definitions from more than one input file contains information.
229 Therefore we keep all relevant input in a list. */
230 struct locale_collate_t
*next
;
232 /* Arrays with heads of the list for each of the leading bytes in
233 the multibyte sequences. */
234 struct element_t
*mbheads
[256];
236 /* Arrays with heads of the list for each of the leading bytes in
237 the multibyte sequences. */
238 struct wchead_table wcheads
;
240 /* The arrays with the collation sequence order. */
241 unsigned char mbseqorder
[256];
242 struct collseq_table wcseqorder
;
246 /* We have a few global variables which are used for reading all
247 LC_COLLATE category descriptions in all files. */
248 static uint32_t nrules
;
251 /* We need UTF-8 encoding of numbers. */
253 __attribute ((always_inline
))
254 utf8_encode (char *buf
, int val
)
267 for (step
= 2; step
< 6; ++step
)
268 if ((val
& (~(uint32_t)0 << (5 * step
+ 1))) == 0)
272 *buf
= (unsigned char) (~0xff >> step
);
276 buf
[step
] = 0x80 | (val
& 0x3f);
287 static struct section_list
*
288 make_seclist_elem (struct locale_collate_t
*collate
, const char *string
,
289 struct section_list
*next
)
291 struct section_list
*newp
;
293 newp
= (struct section_list
*) obstack_alloc (&collate
->mempool
,
304 static struct element_t
*
305 new_element (struct locale_collate_t
*collate
, const char *mbs
, size_t mbslen
,
306 const uint32_t *wcs
, const char *name
, size_t namelen
,
309 struct element_t
*newp
;
311 newp
= (struct element_t
*) obstack_alloc (&collate
->mempool
,
313 newp
->name
= name
== NULL
? NULL
: obstack_copy0 (&collate
->mempool
,
317 newp
->mbs
= obstack_copy0 (&collate
->mempool
, mbs
, mbslen
);
327 size_t nwcs
= wcslen ((wchar_t *) wcs
);
329 obstack_grow (&collate
->mempool
, wcs
, nwcs
* sizeof (uint32_t));
330 obstack_grow (&collate
->mempool
, &zero
, sizeof (uint32_t));
331 newp
->wcs
= (uint32_t *) obstack_finish (&collate
->mempool
);
339 newp
->mborder
= NULL
;
341 newp
->used_in_level
= 0;
342 newp
->is_character
= is_character
;
344 /* Will be assigned later. XXX */
345 newp
->mbseqorder
= 0;
346 newp
->wcseqorder
= 0;
348 /* Will be allocated later. */
349 newp
->weights
= NULL
;
354 newp
->section
= collate
->current_section
;
369 static struct symbol_t
*
370 new_symbol (struct locale_collate_t
*collate
, const char *name
, size_t len
)
372 struct symbol_t
*newp
;
374 newp
= (struct symbol_t
*) obstack_alloc (&collate
->mempool
, sizeof (*newp
));
376 newp
->name
= obstack_copy0 (&collate
->mempool
, name
, len
);
386 /* Test whether this name is already defined somewhere. */
388 check_duplicate (struct linereader
*ldfile
, struct locale_collate_t
*collate
,
389 const struct charmap_t
*charmap
,
390 struct repertoire_t
*repertoire
, const char *symbol
,
395 if (find_entry (&charmap
->char_table
, symbol
, symbol_len
, &ignore
) == 0)
397 lr_error (ldfile
, _("`%.*s' already defined in charmap"),
398 (int) symbol_len
, symbol
);
402 if (repertoire
!= NULL
403 && (find_entry (&repertoire
->char_table
, symbol
, symbol_len
, &ignore
)
406 lr_error (ldfile
, _("`%.*s' already defined in repertoire"),
407 (int) symbol_len
, symbol
);
411 if (find_entry (&collate
->sym_table
, symbol
, symbol_len
, &ignore
) == 0)
413 lr_error (ldfile
, _("`%.*s' already defined as collating symbol"),
414 (int) symbol_len
, symbol
);
418 if (find_entry (&collate
->elem_table
, symbol
, symbol_len
, &ignore
) == 0)
420 lr_error (ldfile
, _("`%.*s' already defined as collating element"),
421 (int) symbol_len
, symbol
);
429 /* Read the direction specification. */
431 read_directions (struct linereader
*ldfile
, struct token
*arg
,
432 const struct charmap_t
*charmap
,
433 struct repertoire_t
*repertoire
, struct localedef_t
*result
)
436 int max
= nrules
?: 10;
437 enum coll_sort_rule
*rules
= calloc (max
, sizeof (*rules
));
439 struct locale_collate_t
*collate
= result
->categories
[LC_COLLATE
].collate
;
445 if (arg
->tok
== tok_forward
)
447 if (rules
[cnt
] & sort_backward
)
451 lr_error (ldfile
, _("\
452 %s: `forward' and `backward' are mutually excluding each other"),
457 else if (rules
[cnt
] & sort_forward
)
461 lr_error (ldfile
, _("\
462 %s: `%s' mentioned more than once in definition of weight %d"),
463 "LC_COLLATE", "forward", cnt
+ 1);
467 rules
[cnt
] |= sort_forward
;
471 else if (arg
->tok
== tok_backward
)
473 if (rules
[cnt
] & sort_forward
)
477 lr_error (ldfile
, _("\
478 %s: `forward' and `backward' are mutually excluding each other"),
483 else if (rules
[cnt
] & sort_backward
)
487 lr_error (ldfile
, _("\
488 %s: `%s' mentioned more than once in definition of weight %d"),
489 "LC_COLLATE", "backward", cnt
+ 1);
493 rules
[cnt
] |= sort_backward
;
497 else if (arg
->tok
== tok_position
)
499 if (rules
[cnt
] & sort_position
)
503 lr_error (ldfile
, _("\
504 %s: `%s' mentioned more than once in definition of weight %d"),
505 "LC_COLLATE", "position", cnt
+ 1);
509 rules
[cnt
] |= sort_position
;
515 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
517 if (arg
->tok
== tok_eof
|| arg
->tok
== tok_eol
|| arg
->tok
== tok_comma
518 || arg
->tok
== tok_semicolon
)
520 if (! valid
&& ! warned
)
522 lr_error (ldfile
, _("%s: syntax error"), "LC_COLLATE");
526 /* See whether we have to increment the counter. */
527 if (arg
->tok
!= tok_comma
&& rules
[cnt
] != 0)
529 /* Add the default `forward' if we have seen only `position'. */
530 if (rules
[cnt
] == sort_position
)
531 rules
[cnt
] = sort_position
| sort_forward
;
536 if (arg
->tok
== tok_eof
|| arg
->tok
== tok_eol
)
537 /* End of line or file, so we exit the loop. */
542 /* See whether we have enough room in the array. */
546 rules
= (enum coll_sort_rule
*) xrealloc (rules
,
549 memset (&rules
[cnt
], '\0', (max
- cnt
) * sizeof (*rules
));
556 /* There must not be any more rule. */
559 lr_error (ldfile
, _("\
560 %s: too many rules; first entry only had %d"),
561 "LC_COLLATE", nrules
);
565 lr_ignore_rest (ldfile
, 0);
574 lr_error (ldfile
, _("%s: syntax error"), "LC_COLLATE");
579 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
584 /* Now we know how many rules we have. */
586 rules
= (enum coll_sort_rule
*) xrealloc (rules
,
587 nrules
* sizeof (*rules
));
593 /* Not enough rules in this specification. */
595 lr_error (ldfile
, _("%s: not enough sorting rules"), "LC_COLLATE");
598 rules
[cnt
] = sort_forward
;
599 while (++cnt
< nrules
);
603 collate
->current_section
->rules
= rules
;
607 static struct element_t
*
608 find_element (struct linereader
*ldfile
, struct locale_collate_t
*collate
,
609 const char *str
, size_t len
)
613 /* Search for the entries among the collation sequences already define. */
614 if (find_entry (&collate
->seq_table
, str
, len
, &result
) != 0)
616 /* Nope, not define yet. So we see whether it is a
620 if (find_entry (&collate
->sym_table
, str
, len
, &ptr
) == 0)
622 /* It's a collation symbol. */
623 struct symbol_t
*sym
= (struct symbol_t
*) ptr
;
627 result
= sym
->order
= new_element (collate
, NULL
, 0, NULL
,
630 else if (find_entry (&collate
->elem_table
, str
, len
, &result
) != 0)
632 /* It's also no collation element. So it is a character
633 element defined later. */
634 result
= new_element (collate
, NULL
, 0, NULL
, str
, len
, 1);
635 /* Insert it into the sequence table. */
636 insert_entry (&collate
->seq_table
, str
, len
, result
);
640 return (struct element_t
*) result
;
645 unlink_element (struct locale_collate_t
*collate
)
647 if (collate
->cursor
== collate
->start
)
649 assert (collate
->cursor
->next
== NULL
);
650 assert (collate
->cursor
->last
== NULL
);
651 collate
->cursor
= NULL
;
655 if (collate
->cursor
->next
!= NULL
)
656 collate
->cursor
->next
->last
= collate
->cursor
->last
;
657 if (collate
->cursor
->last
!= NULL
)
658 collate
->cursor
->last
->next
= collate
->cursor
->next
;
659 collate
->cursor
= collate
->cursor
->last
;
665 insert_weights (struct linereader
*ldfile
, struct element_t
*elem
,
666 const struct charmap_t
*charmap
,
667 struct repertoire_t
*repertoire
, struct localedef_t
*result
,
668 enum token_t ellipsis
)
672 struct locale_collate_t
*collate
= result
->categories
[LC_COLLATE
].collate
;
674 /* Initialize all the fields. */
675 elem
->file
= ldfile
->fname
;
676 elem
->line
= ldfile
->lineno
;
678 elem
->last
= collate
->cursor
;
679 elem
->next
= collate
->cursor
? collate
->cursor
->next
: NULL
;
680 if (collate
->cursor
!= NULL
&& collate
->cursor
->next
!= NULL
)
681 collate
->cursor
->next
->last
= elem
;
682 if (collate
->cursor
!= NULL
)
683 collate
->cursor
->next
= elem
;
684 if (collate
->start
== NULL
)
686 assert (collate
->cursor
== NULL
);
687 collate
->start
= elem
;
690 elem
->section
= collate
->current_section
;
692 if (collate
->current_section
->first
== NULL
)
693 collate
->current_section
->first
= elem
;
694 if (collate
->current_section
->last
== collate
->cursor
)
695 collate
->current_section
->last
= elem
;
697 collate
->cursor
= elem
;
699 elem
->weights
= (struct element_list_t
*)
700 obstack_alloc (&collate
->mempool
, nrules
* sizeof (struct element_list_t
));
701 memset (elem
->weights
, '\0', nrules
* sizeof (struct element_list_t
));
705 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
708 if (arg
->tok
== tok_eof
|| arg
->tok
== tok_eol
)
711 if (arg
->tok
== tok_ignore
)
713 /* The weight for this level has to be ignored. We use the
714 null pointer to indicate this. */
715 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
716 obstack_alloc (&collate
->mempool
, sizeof (struct element_t
*));
717 elem
->weights
[weight_cnt
].w
[0] = NULL
;
718 elem
->weights
[weight_cnt
].cnt
= 1;
720 else if (arg
->tok
== tok_bsymbol
|| arg
->tok
== tok_ucs4
)
723 struct element_t
*val
;
727 if (arg
->tok
== tok_bsymbol
)
729 symstr
= arg
->val
.str
.startmb
;
730 symlen
= arg
->val
.str
.lenmb
;
734 snprintf (ucs4str
, sizeof (ucs4str
), "U%08X", arg
->val
.ucs4
);
739 val
= find_element (ldfile
, collate
, symstr
, symlen
);
743 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
744 obstack_alloc (&collate
->mempool
, sizeof (struct element_t
*));
745 elem
->weights
[weight_cnt
].w
[0] = val
;
746 elem
->weights
[weight_cnt
].cnt
= 1;
748 else if (arg
->tok
== tok_string
)
750 /* Split the string up in the individual characters and put
751 the element definitions in the list. */
752 const char *cp
= arg
->val
.str
.startmb
;
754 struct element_t
*charelem
;
755 struct element_t
**weights
= NULL
;
760 lr_error (ldfile
, _("%s: empty weight string not allowed"),
762 lr_ignore_rest (ldfile
, 0);
770 /* Ahh, it's a bsymbol or an UCS4 value. If it's
771 the latter we have to unify the name. */
772 const char *startp
= ++cp
;
777 if (*cp
== ldfile
->escape_char
)
780 /* It's a syntax error. */
786 if (cp
- startp
== 5 && startp
[0] == 'U'
787 && isxdigit (startp
[1]) && isxdigit (startp
[2])
788 && isxdigit (startp
[3]) && isxdigit (startp
[4]))
790 unsigned int ucs4
= strtoul (startp
+ 1, NULL
, 16);
793 newstr
= (char *) xmalloc (10);
794 snprintf (newstr
, 10, "U%08X", ucs4
);
802 charelem
= find_element (ldfile
, collate
, startp
, len
);
807 /* People really shouldn't use characters directly in
808 the string. Especially since it's not really clear
809 what this means. We interpret all characters in the
810 string as if that would be bsymbols. Otherwise we
811 would have to match back to bsymbols somehow and this
812 is normally not what people normally expect. */
813 charelem
= find_element (ldfile
, collate
, cp
++, 1);
816 if (charelem
== NULL
)
818 /* We ignore the rest of the line. */
819 lr_ignore_rest (ldfile
, 0);
823 /* Add the pointer. */
826 struct element_t
**newp
;
828 newp
= (struct element_t
**)
829 alloca (max
* sizeof (struct element_t
*));
830 memcpy (newp
, weights
, cnt
* sizeof (struct element_t
*));
833 weights
[cnt
++] = charelem
;
837 /* Now store the information. */
838 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
839 obstack_alloc (&collate
->mempool
,
840 cnt
* sizeof (struct element_t
*));
841 memcpy (elem
->weights
[weight_cnt
].w
, weights
,
842 cnt
* sizeof (struct element_t
*));
843 elem
->weights
[weight_cnt
].cnt
= cnt
;
845 /* We don't need the string anymore. */
846 free (arg
->val
.str
.startmb
);
848 else if (ellipsis
!= tok_none
849 && (arg
->tok
== tok_ellipsis2
850 || arg
->tok
== tok_ellipsis3
851 || arg
->tok
== tok_ellipsis4
))
853 /* It must be the same ellipsis as used in the initial column. */
854 if (arg
->tok
!= ellipsis
)
855 lr_error (ldfile
, _("\
856 %s: weights must use the same ellipsis symbol as the name"),
859 /* The weight for this level will depend on the element
860 iterating over the range. Put a placeholder. */
861 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
862 obstack_alloc (&collate
->mempool
, sizeof (struct element_t
*));
863 elem
->weights
[weight_cnt
].w
[0] = ELEMENT_ELLIPSIS2
;
864 elem
->weights
[weight_cnt
].cnt
= 1;
869 /* It's a syntax error. */
870 lr_error (ldfile
, _("%s: syntax error"), "LC_COLLATE");
871 lr_ignore_rest (ldfile
, 0);
875 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
876 /* This better should be the end of the line or a semicolon. */
877 if (arg
->tok
== tok_semicolon
)
878 /* OK, ignore this and read the next token. */
879 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
880 else if (arg
->tok
!= tok_eof
&& arg
->tok
!= tok_eol
)
882 /* It's a syntax error. */
883 lr_error (ldfile
, _("%s: syntax error"), "LC_COLLATE");
884 lr_ignore_rest (ldfile
, 0);
888 while (++weight_cnt
< nrules
);
890 if (weight_cnt
< nrules
)
892 /* This means the rest of the line uses the current element as
896 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
897 obstack_alloc (&collate
->mempool
, sizeof (struct element_t
*));
898 if (ellipsis
== tok_none
)
899 elem
->weights
[weight_cnt
].w
[0] = elem
;
901 elem
->weights
[weight_cnt
].w
[0] = ELEMENT_ELLIPSIS2
;
902 elem
->weights
[weight_cnt
].cnt
= 1;
904 while (++weight_cnt
< nrules
);
908 if (arg
->tok
== tok_ignore
|| arg
->tok
== tok_bsymbol
)
910 /* Too many rule values. */
911 lr_error (ldfile
, _("%s: too many values"), "LC_COLLATE");
912 lr_ignore_rest (ldfile
, 0);
915 lr_ignore_rest (ldfile
, arg
->tok
!= tok_eol
&& arg
->tok
!= tok_eof
);
921 insert_value (struct linereader
*ldfile
, const char *symstr
, size_t symlen
,
922 const struct charmap_t
*charmap
, struct repertoire_t
*repertoire
,
923 struct localedef_t
*result
)
925 /* First find out what kind of symbol this is. */
928 struct element_t
*elem
= NULL
;
929 struct locale_collate_t
*collate
= result
->categories
[LC_COLLATE
].collate
;
931 /* Try to find the character in the charmap. */
932 seq
= charmap_find_value (charmap
, symstr
, symlen
);
934 /* Determine the wide character. */
935 if (seq
== NULL
|| seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
937 wc
= repertoire_find_value (repertoire
, symstr
, symlen
);
944 if (wc
== ILLEGAL_CHAR_VALUE
&& seq
== NULL
)
946 /* It's no character, so look through the collation elements and
949 if (find_entry (&collate
->elem_table
, symstr
, symlen
, &ptr
) != 0)
952 struct symbol_t
*sym
= NULL
;
954 /* It's also collation element. Therefore it's either a
955 collating symbol or it's a character which is not
956 supported by the character set. In the later case we
957 simply create a dummy entry. */
958 if (find_entry (&collate
->sym_table
, symstr
, symlen
, &result
) == 0)
960 /* It's a collation symbol. */
961 sym
= (struct symbol_t
*) result
;
968 elem
= new_element (collate
, NULL
, 0, NULL
, symstr
, symlen
, 0);
973 /* Enter a fake element in the sequence table. This
974 won't cause anything in the output since there is
975 no multibyte or wide character associated with
977 insert_entry (&collate
->seq_table
, symstr
, symlen
, elem
);
981 /* Copy the result back. */
986 /* Otherwise the symbols stands for a character. */
988 if (find_entry (&collate
->seq_table
, symstr
, symlen
, &ptr
) != 0)
990 uint32_t wcs
[2] = { wc
, 0 };
992 /* We have to allocate an entry. */
993 elem
= new_element (collate
,
994 seq
!= NULL
? (char *) seq
->bytes
: NULL
,
995 seq
!= NULL
? seq
->nbytes
: 0,
996 wc
== ILLEGAL_CHAR_VALUE
? NULL
: wcs
,
999 /* And add it to the table. */
1000 if (insert_entry (&collate
->seq_table
, symstr
, symlen
, elem
) != 0)
1001 /* This cannot happen. */
1002 assert (! "Internal error");
1006 /* Copy the result back. */
1009 /* Maybe the character was used before the definition. In this case
1010 we have to insert the byte sequences now. */
1011 if (elem
->mbs
== NULL
&& seq
!= NULL
)
1013 elem
->mbs
= obstack_copy0 (&collate
->mempool
,
1014 seq
->bytes
, seq
->nbytes
);
1015 elem
->nmbs
= seq
->nbytes
;
1018 if (elem
->wcs
== NULL
&& wc
!= ILLEGAL_CHAR_VALUE
)
1020 uint32_t wcs
[2] = { wc
, 0 };
1022 elem
->wcs
= obstack_copy (&collate
->mempool
, wcs
, sizeof (wcs
));
1028 /* Test whether this element is not already in the list. */
1029 if (elem
->next
!= NULL
|| elem
== collate
->cursor
)
1031 lr_error (ldfile
, _("order for `%.*s' already defined at %s:%Zu"),
1032 (int) symlen
, symstr
, elem
->file
, elem
->line
);
1033 lr_ignore_rest (ldfile
, 0);
1037 insert_weights (ldfile
, elem
, charmap
, repertoire
, result
, tok_none
);
1044 handle_ellipsis (struct linereader
*ldfile
, const char *symstr
, size_t symlen
,
1045 enum token_t ellipsis
, const struct charmap_t
*charmap
,
1046 struct repertoire_t
*repertoire
,
1047 struct localedef_t
*result
)
1049 struct element_t
*startp
;
1050 struct element_t
*endp
;
1051 struct locale_collate_t
*collate
= result
->categories
[LC_COLLATE
].collate
;
1053 /* Unlink the entry added for the ellipsis. */
1054 unlink_element (collate
);
1055 startp
= collate
->cursor
;
1057 /* Process and add the end-entry. */
1059 && insert_value (ldfile
, symstr
, symlen
, charmap
, repertoire
, result
))
1060 /* Something went wrong with inserting the to-value. This means
1061 we cannot process the ellipsis. */
1064 /* Reset the cursor. */
1065 collate
->cursor
= startp
;
1067 /* Now we have to handle many different situations:
1068 - we have to distinguish between the three different ellipsis forms
1069 - the is the ellipsis at the beginning, in the middle, or at the end.
1071 endp
= collate
->cursor
->next
;
1072 assert (symstr
== NULL
|| endp
!= NULL
);
1074 /* XXX The following is probably very wrong since also collating symbols
1075 can appear in ranges. But do we want/can refine the test for that? */
1077 /* Both, the start and the end symbol, must stand for characters. */
1078 if ((startp
!= NULL
&& (startp
->name
== NULL
|| ! startp
->is_character
))
1079 || (endp
!= NULL
&& (endp
->name
== NULL
|| ! endp
->is_character
)))
1081 lr_error (ldfile
, _("\
1082 %s: the start and the end symbol of a range must stand for characters"),
1088 if (ellipsis
== tok_ellipsis3
)
1090 /* One requirement we make here: the length of the byte
1091 sequences for the first and end character must be the same.
1092 This is mainly to prevent unwanted effects and this is often
1093 not what is wanted. */
1094 size_t len
= (startp
->mbs
!= NULL
? startp
->nmbs
1095 : (endp
->mbs
!= NULL
? endp
->nmbs
: 0));
1096 char mbcnt
[len
+ 1];
1097 char mbend
[len
+ 1];
1099 /* Well, this should be caught somewhere else already. Just to
1101 assert (startp
== NULL
|| startp
->wcs
== NULL
|| startp
->wcs
[1] == 0);
1102 assert (endp
== NULL
|| endp
->wcs
== NULL
|| endp
->wcs
[1] == 0);
1104 if (startp
!= NULL
&& endp
!= NULL
1105 && startp
->mbs
!= NULL
&& endp
->mbs
!= NULL
1106 && startp
->nmbs
!= endp
->nmbs
)
1108 lr_error (ldfile
, _("\
1109 %s: byte sequences of first and last character must have the same length"),
1114 /* Determine whether we have to generate multibyte sequences. */
1115 if ((startp
== NULL
|| startp
->mbs
!= NULL
)
1116 && (endp
== NULL
|| endp
->mbs
!= NULL
))
1121 /* Prepare the beginning byte sequence. This is either from the
1122 beginning byte sequence or it is all nulls if it was an
1123 initial ellipsis. */
1124 if (startp
== NULL
|| startp
->mbs
== NULL
)
1125 memset (mbcnt
, '\0', len
);
1128 memcpy (mbcnt
, startp
->mbs
, len
);
1130 /* And increment it so that the value is the first one we will
1132 for (cnt
= len
- 1; cnt
>= 0; --cnt
)
1133 if (++mbcnt
[cnt
] != '\0')
1138 /* And the end sequence. */
1139 if (endp
== NULL
|| endp
->mbs
== NULL
)
1140 memset (mbend
, '\0', len
);
1142 memcpy (mbend
, endp
->mbs
, len
);
1145 /* Test whether we have a correct range. */
1146 ret
= memcmp (mbcnt
, mbend
, len
);
1150 lr_error (ldfile
, _("%s: byte sequence of first character of \
1151 range is not lower than that of the last character"), "LC_COLLATE");
1155 /* Generate the byte sequences data. */
1158 struct charseq
*seq
;
1160 /* Quite a bit of work ahead. We have to find the character
1161 definition for the byte sequence and then determine the
1162 wide character belonging to it. */
1163 seq
= charmap_find_symbol (charmap
, mbcnt
, len
);
1166 struct element_t
*elem
;
1169 /* I don't think this can ever happen. */
1170 assert (seq
->name
!= NULL
);
1171 namelen
= strlen (seq
->name
);
1173 if (seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1174 seq
->ucs4
= repertoire_find_value (repertoire
, seq
->name
,
1177 /* Now we are ready to insert the new value in the
1178 sequence. Find out whether the element is
1181 if (find_entry (&collate
->seq_table
, seq
->name
, namelen
,
1184 uint32_t wcs
[2] = { seq
->ucs4
, 0 };
1186 /* We have to allocate an entry. */
1187 elem
= new_element (collate
, mbcnt
, len
,
1188 seq
->ucs4
== ILLEGAL_CHAR_VALUE
1189 ? NULL
: wcs
, seq
->name
,
1192 /* And add it to the table. */
1193 if (insert_entry (&collate
->seq_table
, seq
->name
,
1194 namelen
, elem
) != 0)
1195 /* This cannot happen. */
1196 assert (! "Internal error");
1199 /* Copy the result. */
1202 /* Test whether this element is not already in the list. */
1203 if (elem
->next
!= NULL
|| (collate
->cursor
!= NULL
1204 && elem
->next
== collate
->cursor
))
1206 lr_error (ldfile
, _("\
1207 order for `%.*s' already defined at %s:%Zu"),
1208 (int) namelen
, seq
->name
,
1209 elem
->file
, elem
->line
);
1213 /* Enqueue the new element. */
1214 elem
->last
= collate
->cursor
;
1215 if (collate
->cursor
== NULL
)
1219 elem
->next
= collate
->cursor
->next
;
1220 elem
->last
->next
= elem
;
1221 if (elem
->next
!= NULL
)
1222 elem
->next
->last
= elem
;
1224 if (collate
->start
== NULL
)
1226 assert (collate
->cursor
== NULL
);
1227 collate
->start
= elem
;
1229 collate
->cursor
= elem
;
1231 /* Add the weight value. We take them from the
1232 `ellipsis_weights' member of `collate'. */
1233 elem
->weights
= (struct element_list_t
*)
1234 obstack_alloc (&collate
->mempool
,
1235 nrules
* sizeof (struct element_list_t
));
1236 for (cnt
= 0; cnt
< nrules
; ++cnt
)
1237 if (collate
->ellipsis_weight
.weights
[cnt
].cnt
== 1
1238 && (collate
->ellipsis_weight
.weights
[cnt
].w
[0]
1239 == ELEMENT_ELLIPSIS2
))
1241 elem
->weights
[cnt
].w
= (struct element_t
**)
1242 obstack_alloc (&collate
->mempool
,
1243 sizeof (struct element_t
*));
1244 elem
->weights
[cnt
].w
[0] = elem
;
1245 elem
->weights
[cnt
].cnt
= 1;
1249 /* Simply use the weight from `ellipsis_weight'. */
1250 elem
->weights
[cnt
].w
=
1251 collate
->ellipsis_weight
.weights
[cnt
].w
;
1252 elem
->weights
[cnt
].cnt
=
1253 collate
->ellipsis_weight
.weights
[cnt
].cnt
;
1257 /* Increment for the next round. */
1259 for (cnt
= len
- 1; cnt
>= 0; --cnt
)
1260 if (++mbcnt
[cnt
] != '\0')
1263 /* Find out whether this was all. */
1264 if (cnt
< 0 || memcmp (mbcnt
, mbend
, len
) >= 0)
1265 /* Yep, that's all. */
1272 /* For symbolic range we naturally must have a beginning and an
1273 end specified by the user. */
1275 lr_error (ldfile
, _("\
1276 %s: symbolic range ellipsis must not directly follow `order_start'"),
1278 else if (endp
== NULL
)
1279 lr_error (ldfile
, _("\
1280 %s: symbolic range ellipsis must not be directly followed by `order_end'"),
1284 /* Determine the range. To do so we have to determine the
1285 common prefix of the both names and then the numeric
1286 values of both ends. */
1287 size_t lenfrom
= strlen (startp
->name
);
1288 size_t lento
= strlen (endp
->name
);
1289 char buf
[lento
+ 1];
1294 int base
= ellipsis
== tok_ellipsis2
? 16 : 10;
1296 if (lenfrom
!= lento
)
1299 lr_error (ldfile
, _("\
1300 `%s' and `%.*s' are not valid names for symbolic range"),
1301 startp
->name
, (int) lento
, endp
->name
);
1305 while (startp
->name
[preflen
] == endp
->name
[preflen
])
1306 if (startp
->name
[preflen
] == '\0')
1307 /* Nothing to be done. The start and end point are identical
1308 and while inserting the end point we have already given
1309 the user an error message. */
1315 from
= strtol (startp
->name
+ preflen
, &cp
, base
);
1316 if ((from
== UINT_MAX
&& errno
== ERANGE
) || *cp
!= '\0')
1320 to
= strtol (endp
->name
+ preflen
, &cp
, base
);
1321 if ((to
== UINT_MAX
&& errno
== ERANGE
) || *cp
!= '\0')
1324 /* Copy the prefix. */
1325 memcpy (buf
, startp
->name
, preflen
);
1327 /* Loop over all values. */
1328 for (++from
; from
< to
; ++from
)
1330 struct element_t
*elem
= NULL
;
1331 struct charseq
*seq
;
1335 /* Generate the name. */
1336 sprintf (buf
+ preflen
, base
== 10 ? "%0*ld" : "%0*lX",
1337 (int) (lenfrom
- preflen
), from
);
1339 /* Look whether this name is already defined. */
1341 if (find_entry (&collate
->seq_table
, buf
, symlen
, &ptr
) == 0)
1343 /* Copy back the result. */
1346 if (elem
->next
!= NULL
|| (collate
->cursor
!= NULL
1347 && elem
->next
== collate
->cursor
))
1349 lr_error (ldfile
, _("\
1350 %s: order for `%.*s' already defined at %s:%Zu"),
1351 "LC_COLLATE", (int) lenfrom
, buf
,
1352 elem
->file
, elem
->line
);
1356 if (elem
->name
== NULL
)
1358 lr_error (ldfile
, _("%s: `%s' must be a character"),
1364 if (elem
== NULL
|| (elem
->mbs
== NULL
&& elem
->wcs
== NULL
))
1366 /* Search for a character of this name. */
1367 seq
= charmap_find_value (charmap
, buf
, lenfrom
);
1368 if (seq
== NULL
|| seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1370 wc
= repertoire_find_value (repertoire
, buf
, lenfrom
);
1378 if (wc
== ILLEGAL_CHAR_VALUE
&& seq
== NULL
)
1379 /* We don't know anything about a character with this
1380 name. XXX Should we warn? */
1385 uint32_t wcs
[2] = { wc
, 0 };
1387 /* We have to allocate an entry. */
1388 elem
= new_element (collate
,
1390 ? (char *) seq
->bytes
: NULL
,
1391 seq
!= NULL
? seq
->nbytes
: 0,
1392 wc
== ILLEGAL_CHAR_VALUE
1393 ? NULL
: wcs
, buf
, lenfrom
, 1);
1397 /* Update the element. */
1400 elem
->mbs
= obstack_copy0 (&collate
->mempool
,
1401 seq
->bytes
, seq
->nbytes
);
1402 elem
->nmbs
= seq
->nbytes
;
1405 if (wc
!= ILLEGAL_CHAR_VALUE
)
1409 obstack_grow (&collate
->mempool
,
1410 &wc
, sizeof (uint32_t));
1411 obstack_grow (&collate
->mempool
,
1412 &zero
, sizeof (uint32_t));
1413 elem
->wcs
= obstack_finish (&collate
->mempool
);
1418 elem
->file
= ldfile
->fname
;
1419 elem
->line
= ldfile
->lineno
;
1420 elem
->section
= collate
->current_section
;
1423 /* Enqueue the new element. */
1424 elem
->last
= collate
->cursor
;
1425 elem
->next
= collate
->cursor
->next
;
1426 elem
->last
->next
= elem
;
1427 if (elem
->next
!= NULL
)
1428 elem
->next
->last
= elem
;
1429 collate
->cursor
= elem
;
1431 /* Now add the weights. They come from the `ellipsis_weights'
1432 member of `collate'. */
1433 elem
->weights
= (struct element_list_t
*)
1434 obstack_alloc (&collate
->mempool
,
1435 nrules
* sizeof (struct element_list_t
));
1436 for (cnt
= 0; cnt
< nrules
; ++cnt
)
1437 if (collate
->ellipsis_weight
.weights
[cnt
].cnt
== 1
1438 && (collate
->ellipsis_weight
.weights
[cnt
].w
[0]
1439 == ELEMENT_ELLIPSIS2
))
1441 elem
->weights
[cnt
].w
= (struct element_t
**)
1442 obstack_alloc (&collate
->mempool
,
1443 sizeof (struct element_t
*));
1444 elem
->weights
[cnt
].w
[0] = elem
;
1445 elem
->weights
[cnt
].cnt
= 1;
1449 /* Simly use the weight from `ellipsis_weight'. */
1450 elem
->weights
[cnt
].w
=
1451 collate
->ellipsis_weight
.weights
[cnt
].w
;
1452 elem
->weights
[cnt
].cnt
=
1453 collate
->ellipsis_weight
.weights
[cnt
].cnt
;
1462 collate_startup (struct linereader
*ldfile
, struct localedef_t
*locale
,
1463 struct localedef_t
*copy_locale
, int ignore_content
)
1465 if (!ignore_content
&& locale
->categories
[LC_COLLATE
].collate
== NULL
)
1467 struct locale_collate_t
*collate
;
1469 if (copy_locale
== NULL
)
1471 collate
= locale
->categories
[LC_COLLATE
].collate
=
1472 (struct locale_collate_t
*)
1473 xcalloc (1, sizeof (struct locale_collate_t
));
1475 /* Init the various data structures. */
1476 init_hash (&collate
->elem_table
, 100);
1477 init_hash (&collate
->sym_table
, 100);
1478 init_hash (&collate
->seq_table
, 500);
1479 obstack_init (&collate
->mempool
);
1481 collate
->col_weight_max
= -1;
1484 /* Reuse the copy_locale's data structures. */
1485 collate
= locale
->categories
[LC_COLLATE
].collate
=
1486 copy_locale
->categories
[LC_COLLATE
].collate
;
1489 ldfile
->translate_strings
= 0;
1490 ldfile
->return_widestr
= 0;
1495 collate_finish (struct localedef_t
*locale
, const struct charmap_t
*charmap
)
1497 /* Now is the time when we can assign the individual collation
1498 values for all the symbols. We have possibly different values
1499 for the wide- and the multibyte-character symbols. This is done
1500 since it might make a difference in the encoding if there is in
1501 some cases no multibyte-character but there are wide-characters.
1502 (The other way around it is not important since theencoded
1503 collation value in the wide-character case is 32 bits wide and
1504 therefore requires no encoding).
1506 The lowest collation value assigned is 2. Zero is reserved for
1507 the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm'
1508 functions and 1 is used to separate the individual passes for the
1511 We also have to construct is list with all the bytes/words which
1512 can come first in a sequence, followed by all the elements which
1513 also start with this byte/word. The order is reverse which has
1514 among others the important effect that longer strings are located
1515 first in the list. This is required for the output data since
1516 the algorithm used in `strcoll' etc depends on this.
1518 The multibyte case is easy. We simply sort into an array with
1520 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1525 struct element_t
*runp
;
1527 int need_undefined
= 0;
1528 struct section_list
*sect
;
1530 int nr_wide_elems
= 0;
1532 if (collate
== NULL
)
1534 /* No data, no check. */
1536 WITH_CUR_LOCALE (error (0, 0, _("No definition for %s category found"),
1541 /* If this assertion is hit change the type in `element_t'. */
1542 assert (nrules
<= sizeof (runp
->used_in_level
) * 8);
1544 /* Make sure that the `position' rule is used either in all sections
1546 for (i
= 0; i
< nrules
; ++i
)
1547 for (sect
= collate
->sections
; sect
!= NULL
; sect
= sect
->next
)
1548 if (sect
->rules
!= NULL
1549 && ((sect
->rules
[i
] & sort_position
)
1550 != (collate
->sections
->rules
[i
] & sort_position
)))
1552 WITH_CUR_LOCALE (error (0, 0, _("\
1553 %s: `position' must be used for a specific level in all sections or none"),
1558 /* Find out which elements are used at which level. At the same
1559 time we find out whether we have any undefined symbols. */
1560 runp
= collate
->start
;
1561 while (runp
!= NULL
)
1563 if (runp
->mbs
!= NULL
)
1565 for (i
= 0; i
< nrules
; ++i
)
1569 for (j
= 0; j
< runp
->weights
[i
].cnt
; ++j
)
1570 /* A NULL pointer as the weight means IGNORE. */
1571 if (runp
->weights
[i
].w
[j
] != NULL
)
1573 if (runp
->weights
[i
].w
[j
]->weights
== NULL
)
1575 WITH_CUR_LOCALE (error_at_line (0, 0, runp
->file
,
1577 _("symbol `%s' not defined"),
1578 runp
->weights
[i
].w
[j
]->name
));
1581 runp
->weights
[i
].w
[j
] = &collate
->undefined
;
1584 /* Set the bit for the level. */
1585 runp
->weights
[i
].w
[j
]->used_in_level
|= 1 << i
;
1590 /* Up to the next entry. */
1594 /* Walk through the list of defined sequences and assign weights. Also
1595 create the data structure which will allow generating the single byte
1596 character based tables.
1598 Since at each time only the weights for each of the rules are
1599 only compared to other weights for this rule it is possible to
1600 assign more compact weight values than simply counting all
1601 weights in sequence. We can assign weights from 3, one for each
1602 rule individually and only for those elements, which are actually
1605 Why is this important? It is not for the wide char table. But
1606 it is for the singlebyte output since here larger numbers have to
1607 be encoded to make it possible to emit the value as a byte
1609 for (i
= 0; i
< nrules
; ++i
)
1614 runp
= collate
->start
;
1615 while (runp
!= NULL
)
1617 /* Determine the order. */
1618 if (runp
->used_in_level
!= 0)
1620 runp
->mborder
= (int *) obstack_alloc (&collate
->mempool
,
1621 nrules
* sizeof (int));
1623 for (i
= 0; i
< nrules
; ++i
)
1624 if ((runp
->used_in_level
& (1 << i
)) != 0)
1625 runp
->mborder
[i
] = mbact
[i
]++;
1627 runp
->mborder
[i
] = 0;
1630 if (runp
->mbs
!= NULL
)
1632 struct element_t
**eptr
;
1633 struct element_t
*lastp
= NULL
;
1635 /* Find the point where to insert in the list. */
1636 eptr
= &collate
->mbheads
[((unsigned char *) runp
->mbs
)[0]];
1637 while (*eptr
!= NULL
)
1639 if ((*eptr
)->nmbs
< runp
->nmbs
)
1642 if ((*eptr
)->nmbs
== runp
->nmbs
)
1644 int c
= memcmp ((*eptr
)->mbs
, runp
->mbs
, runp
->nmbs
);
1648 /* This should not happen. It means that we have
1649 to symbols with the same byte sequence. It is
1650 of course an error. */
1651 WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr
)->file
,
1654 symbol `%s' has the same encoding as"), (*eptr
)->name
);
1655 error_at_line (0, 0, runp
->file
,
1662 /* Insert it here. */
1666 /* To the next entry. */
1668 eptr
= &(*eptr
)->mbnext
;
1671 /* Set the pointers. */
1672 runp
->mbnext
= *eptr
;
1673 runp
->mblast
= lastp
;
1675 (*eptr
)->mblast
= runp
;
1681 if (runp
->used_in_level
)
1683 runp
->wcorder
= wcact
++;
1685 /* We take the opportunity to count the elements which have
1690 if (runp
->is_character
)
1692 if (runp
->nmbs
== 1)
1693 collate
->mbseqorder
[((unsigned char *) runp
->mbs
)[0]] = mbseqact
++;
1695 runp
->wcseqorder
= wcseqact
++;
1697 else if (runp
->mbs
!= NULL
&& runp
->weights
!= NULL
)
1698 /* This is for collation elements. */
1699 runp
->wcseqorder
= wcseqact
++;
1701 /* Up to the next entry. */
1705 /* Find out whether any of the `mbheads' entries is unset. In this
1706 case we use the UNDEFINED entry. */
1707 for (i
= 1; i
< 256; ++i
)
1708 if (collate
->mbheads
[i
] == NULL
)
1711 collate
->mbheads
[i
] = &collate
->undefined
;
1714 /* Now to the wide character case. */
1715 collate
->wcheads
.p
= 6;
1716 collate
->wcheads
.q
= 10;
1717 wchead_table_init (&collate
->wcheads
);
1719 collate
->wcseqorder
.p
= 6;
1720 collate
->wcseqorder
.q
= 10;
1721 collseq_table_init (&collate
->wcseqorder
);
1724 runp
= collate
->start
;
1725 while (runp
!= NULL
)
1727 if (runp
->wcs
!= NULL
)
1729 struct element_t
*e
;
1730 struct element_t
**eptr
;
1731 struct element_t
*lastp
;
1733 /* Insert the collation sequence value. */
1734 if (runp
->is_character
)
1735 collseq_table_add (&collate
->wcseqorder
, runp
->wcs
[0],
1738 /* Find the point where to insert in the list. */
1739 e
= wchead_table_get (&collate
->wcheads
, runp
->wcs
[0]);
1742 while (*eptr
!= NULL
)
1744 if ((*eptr
)->nwcs
< runp
->nwcs
)
1747 if ((*eptr
)->nwcs
== runp
->nwcs
)
1749 int c
= wmemcmp ((wchar_t *) (*eptr
)->wcs
,
1750 (wchar_t *) runp
->wcs
, runp
->nwcs
);
1754 /* This should not happen. It means that we have
1755 two symbols with the same byte sequence. It is
1756 of course an error. */
1757 WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr
)->file
,
1760 symbol `%s' has the same encoding as"), (*eptr
)->name
);
1761 error_at_line (0, 0, runp
->file
,
1768 /* Insert it here. */
1772 /* To the next entry. */
1774 eptr
= &(*eptr
)->wcnext
;
1777 /* Set the pointers. */
1778 runp
->wcnext
= *eptr
;
1779 runp
->wclast
= lastp
;
1781 (*eptr
)->wclast
= runp
;
1784 wchead_table_add (&collate
->wcheads
, runp
->wcs
[0], e
);
1789 /* Up to the next entry. */
1793 collseq_table_finalize (&collate
->wcseqorder
);
1795 /* Now determine whether the UNDEFINED entry is needed and if yes,
1796 whether it was defined. */
1797 collate
->undefined
.used_in_level
= need_undefined
? ~0ul : 0;
1798 if (collate
->undefined
.file
== NULL
)
1802 /* This seems not to be enforced by recent standards. Don't
1803 emit an error, simply append UNDEFINED at the end. */
1805 WITH_CUR_LOCALE (error (0, 0, _("no definition of `UNDEFINED'")));
1807 /* Add UNDEFINED at the end. */
1808 collate
->undefined
.mborder
=
1809 (int *) obstack_alloc (&collate
->mempool
, nrules
* sizeof (int));
1811 for (i
= 0; i
< nrules
; ++i
)
1812 collate
->undefined
.mborder
[i
] = mbact
[i
]++;
1815 /* In any case we will need the definition for the wide character
1816 case. But we will not complain that it is missing since the
1817 specification strangely enough does not seem to account for
1819 collate
->undefined
.wcorder
= wcact
++;
1822 /* Finally, try to unify the rules for the sections. Whenever the rules
1823 for a section are the same as those for another section give the
1824 ruleset the same index. Since there are never many section we can
1825 use an O(n^2) algorithm here. */
1826 sect
= collate
->sections
;
1827 while (sect
!= NULL
&& sect
->rules
== NULL
)
1830 /* Bail out if we have no sections because of earlier errors. */
1833 WITH_CUR_LOCALE (error (EXIT_FAILURE
, 0,
1834 _("too many errors; giving up")));
1841 struct section_list
*osect
= collate
->sections
;
1843 while (osect
!= sect
)
1844 if (osect
->rules
!= NULL
1845 && memcmp (osect
->rules
, sect
->rules
,
1846 nrules
* sizeof (osect
->rules
[0])) == 0)
1849 osect
= osect
->next
;
1852 sect
->ruleidx
= ruleidx
++;
1854 sect
->ruleidx
= osect
->ruleidx
;
1859 while (sect
!= NULL
&& sect
->rules
== NULL
);
1861 while (sect
!= NULL
);
1862 /* We are currently not prepared for more than 128 rulesets. But this
1863 should never really be a problem. */
1864 assert (ruleidx
<= 128);
1869 output_weight (struct obstack
*pool
, struct locale_collate_t
*collate
,
1870 struct element_t
*elem
)
1875 /* Optimize the use of UNDEFINED. */
1876 if (elem
== &collate
->undefined
)
1877 /* The weights are already inserted. */
1880 /* This byte can start exactly one collation element and this is
1881 a single byte. We can directly give the index to the weights. */
1882 retval
= obstack_object_size (pool
);
1884 /* Construct the weight. */
1885 for (cnt
= 0; cnt
< nrules
; ++cnt
)
1887 char buf
[elem
->weights
[cnt
].cnt
* 7];
1891 for (i
= 0; i
< elem
->weights
[cnt
].cnt
; ++i
)
1892 /* Encode the weight value. We do nothing for IGNORE entries. */
1893 if (elem
->weights
[cnt
].w
[i
] != NULL
)
1894 len
+= utf8_encode (&buf
[len
],
1895 elem
->weights
[cnt
].w
[i
]->mborder
[cnt
]);
1897 /* And add the buffer content. */
1898 obstack_1grow (pool
, len
);
1899 obstack_grow (pool
, buf
, len
);
1902 return retval
| ((elem
->section
->ruleidx
& 0x7f) << 24);
1907 output_weightwc (struct obstack
*pool
, struct locale_collate_t
*collate
,
1908 struct element_t
*elem
)
1913 /* Optimize the use of UNDEFINED. */
1914 if (elem
== &collate
->undefined
)
1915 /* The weights are already inserted. */
1918 /* This byte can start exactly one collation element and this is
1919 a single byte. We can directly give the index to the weights. */
1920 retval
= obstack_object_size (pool
) / sizeof (int32_t);
1922 /* Construct the weight. */
1923 for (cnt
= 0; cnt
< nrules
; ++cnt
)
1925 int32_t buf
[elem
->weights
[cnt
].cnt
];
1929 for (i
= 0, j
= 0; i
< elem
->weights
[cnt
].cnt
; ++i
)
1930 if (elem
->weights
[cnt
].w
[i
] != NULL
)
1931 buf
[j
++] = elem
->weights
[cnt
].w
[i
]->wcorder
;
1933 /* And add the buffer content. */
1934 obstack_int32_grow (pool
, j
);
1936 obstack_grow (pool
, buf
, j
* sizeof (int32_t));
1939 return retval
| ((elem
->section
->ruleidx
& 0x7f) << 24);
1942 /* If localedef is every threaded, this would need to be __thread var. */
1945 struct obstack
*weightpool
;
1946 struct obstack
*extrapool
;
1947 struct obstack
*indpool
;
1948 struct locale_collate_t
*collate
;
1949 struct collidx_table
*tablewc
;
1952 static void add_to_tablewc (uint32_t ch
, struct element_t
*runp
);
1955 add_to_tablewc (uint32_t ch
, struct element_t
*runp
)
1957 if (runp
->wcnext
== NULL
&& runp
->nwcs
== 1)
1959 int32_t weigthidx
= output_weightwc (atwc
.weightpool
, atwc
.collate
,
1961 collidx_table_add (atwc
.tablewc
, ch
, weigthidx
);
1965 /* As for the singlebyte table, we recognize sequences and
1967 struct element_t
*lastp
;
1969 collidx_table_add (atwc
.tablewc
, ch
,
1970 -(obstack_object_size (atwc
.extrapool
)
1971 / sizeof (uint32_t)));
1975 /* Store the current index in the weight table. We know that
1976 the current position in the `extrapool' is aligned on a
1981 /* Find out wether this is a single entry or we have more than
1982 one consecutive entry. */
1983 if (runp
->wcnext
!= NULL
1984 && runp
->nwcs
== runp
->wcnext
->nwcs
1985 && wmemcmp ((wchar_t *) runp
->wcs
,
1986 (wchar_t *)runp
->wcnext
->wcs
,
1987 runp
->nwcs
- 1) == 0
1988 && (runp
->wcs
[runp
->nwcs
- 1]
1989 == runp
->wcnext
->wcs
[runp
->nwcs
- 1] + 1))
1992 struct element_t
*series_startp
= runp
;
1993 struct element_t
*curp
;
1995 /* Now add first the initial byte sequence. */
1996 added
= (1 + 1 + 2 * (runp
->nwcs
- 1)) * sizeof (int32_t);
1997 if (sizeof (int32_t) == sizeof (int))
1998 obstack_make_room (atwc
.extrapool
, added
);
2000 /* More than one consecutive entry. We mark this by having
2001 a negative index into the indirect table. */
2002 obstack_int32_grow_fast (atwc
.extrapool
,
2003 -(obstack_object_size (atwc
.indpool
)
2004 / sizeof (int32_t)));
2005 obstack_int32_grow_fast (atwc
.extrapool
, runp
->nwcs
- 1);
2008 runp
= runp
->wcnext
;
2009 while (runp
->wcnext
!= NULL
2010 && runp
->nwcs
== runp
->wcnext
->nwcs
2011 && wmemcmp ((wchar_t *) runp
->wcs
,
2012 (wchar_t *)runp
->wcnext
->wcs
,
2013 runp
->nwcs
- 1) == 0
2014 && (runp
->wcs
[runp
->nwcs
- 1]
2015 == runp
->wcnext
->wcs
[runp
->nwcs
- 1] + 1));
2017 /* Now walk backward from here to the beginning. */
2020 for (i
= 1; i
< runp
->nwcs
; ++i
)
2021 obstack_int32_grow_fast (atwc
.extrapool
, curp
->wcs
[i
]);
2023 /* Now find the end of the consecutive sequence and
2024 add all the indeces in the indirect pool. */
2027 weightidx
= output_weightwc (atwc
.weightpool
, atwc
.collate
,
2029 obstack_int32_grow (atwc
.indpool
, weightidx
);
2031 curp
= curp
->wclast
;
2033 while (curp
!= series_startp
);
2035 /* Add the final weight. */
2036 weightidx
= output_weightwc (atwc
.weightpool
, atwc
.collate
,
2038 obstack_int32_grow (atwc
.indpool
, weightidx
);
2040 /* And add the end byte sequence. Without length this
2042 for (i
= 1; i
< curp
->nwcs
; ++i
)
2043 obstack_int32_grow (atwc
.extrapool
, curp
->wcs
[i
]);
2047 /* A single entry. Simply add the index and the length and
2048 string (except for the first character which is already
2052 /* Output the weight info. */
2053 weightidx
= output_weightwc (atwc
.weightpool
, atwc
.collate
,
2056 added
= (1 + 1 + runp
->nwcs
- 1) * sizeof (int32_t);
2057 if (sizeof (int) == sizeof (int32_t))
2058 obstack_make_room (atwc
.extrapool
, added
);
2060 obstack_int32_grow_fast (atwc
.extrapool
, weightidx
);
2061 obstack_int32_grow_fast (atwc
.extrapool
, runp
->nwcs
- 1);
2062 for (i
= 1; i
< runp
->nwcs
; ++i
)
2063 obstack_int32_grow_fast (atwc
.extrapool
, runp
->wcs
[i
]);
2068 runp
= runp
->wcnext
;
2070 while (runp
!= NULL
);
2075 collate_output (struct localedef_t
*locale
, const struct charmap_t
*charmap
,
2076 const char *output_path
)
2078 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
2079 const size_t nelems
= _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE
);
2080 struct iovec iov
[2 + nelems
];
2081 struct locale_file data
;
2082 uint32_t idx
[nelems
];
2085 int32_t tablemb
[256];
2086 struct obstack weightpool
;
2087 struct obstack extrapool
;
2088 struct obstack indirectpool
;
2089 struct section_list
*sect
;
2090 struct collidx_table tablewc
;
2092 uint32_t *elem_table
;
2094 struct element_t
*runp
;
2096 data
.magic
= LIMAGIC (LC_COLLATE
);
2098 iov
[0].iov_base
= (void *) &data
;
2099 iov
[0].iov_len
= sizeof (data
);
2101 iov
[1].iov_base
= (void *) idx
;
2102 iov
[1].iov_len
= sizeof (idx
);
2104 idx
[0] = iov
[0].iov_len
+ iov
[1].iov_len
;
2107 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_NRULES
));
2108 iov
[2 + cnt
].iov_base
= &nrules
;
2109 iov
[2 + cnt
].iov_len
= sizeof (uint32_t);
2110 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2113 /* If we have no LC_COLLATE data emit only the number of rules as zero. */
2114 if (collate
== NULL
)
2118 while (cnt
< _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE
))
2120 /* The words have to be handled specially. */
2121 if (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB
))
2123 iov
[2 + cnt
].iov_base
= &dummy
;
2124 iov
[2 + cnt
].iov_len
= sizeof (int32_t);
2128 iov
[2 + cnt
].iov_base
= NULL
;
2129 iov
[2 + cnt
].iov_len
= 0;
2132 if (cnt
+ 1 < _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE
))
2133 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2137 assert (cnt
== _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE
));
2139 write_locale_data (output_path
, LC_COLLATE
, "LC_COLLATE", 2 + cnt
, iov
);
2144 obstack_init (&weightpool
);
2145 obstack_init (&extrapool
);
2146 obstack_init (&indirectpool
);
2148 /* Since we are using the sign of an integer to mark indirection the
2149 offsets in the arrays we are indirectly referring to must not be
2150 zero since -0 == 0. Therefore we add a bit of dummy content. */
2151 obstack_int32_grow (&extrapool
, 0);
2152 obstack_int32_grow (&indirectpool
, 0);
2154 /* Prepare the ruleset table. */
2155 for (sect
= collate
->sections
, i
= 0; sect
!= NULL
; sect
= sect
->next
)
2156 if (sect
->rules
!= NULL
&& sect
->ruleidx
== i
)
2160 obstack_make_room (&weightpool
, nrules
);
2162 for (j
= 0; j
< nrules
; ++j
)
2163 obstack_1grow_fast (&weightpool
, sect
->rules
[j
]);
2166 /* And align the output. */
2167 i
= (nrules
* i
) % __alignof__ (int32_t);
2170 obstack_1grow (&weightpool
, '\0');
2171 while (++i
< __alignof__ (int32_t));
2173 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_RULESETS
));
2174 iov
[2 + cnt
].iov_len
= obstack_object_size (&weightpool
);
2175 iov
[2 + cnt
].iov_base
= obstack_finish (&weightpool
);
2176 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2179 /* Generate the 8-bit table. Walk through the lists of sequences
2180 starting with the same byte and add them one after the other to
2181 the table. In case we have more than one sequence starting with
2182 the same byte we have to use extra indirection.
2184 First add a record for the NUL byte. This entry will never be used
2185 so it does not matter. */
2188 /* Now insert the `UNDEFINED' value if it is used. Since this value
2189 will probably be used more than once it is good to store the
2190 weights only once. */
2191 if (collate
->undefined
.used_in_level
!= 0)
2192 output_weight (&weightpool
, collate
, &collate
->undefined
);
2194 for (ch
= 1; ch
< 256; ++ch
)
2195 if (collate
->mbheads
[ch
]->mbnext
== NULL
2196 && collate
->mbheads
[ch
]->nmbs
<= 1)
2198 tablemb
[ch
] = output_weight (&weightpool
, collate
,
2199 collate
->mbheads
[ch
]);
2203 /* The entries in the list are sorted by length and then
2204 alphabetically. This is the order in which we will add the
2205 elements to the collation table. This allows simply walking
2206 the table in sequence and stopping at the first matching
2207 entry. Since the longer sequences are coming first in the
2208 list they have the possibility to match first, just as it
2209 has to be. In the worst case we are walking to the end of
2210 the list where we put, if no singlebyte sequence is defined
2211 in the locale definition, the weights for UNDEFINED.
2213 To reduce the length of the search list we compress them a bit.
2214 This happens by collecting sequences of consecutive byte
2215 sequences in one entry (having and begin and end byte sequence)
2216 and add only one index into the weight table. We can find the
2217 consecutive entries since they are also consecutive in the list. */
2218 struct element_t
*runp
= collate
->mbheads
[ch
];
2219 struct element_t
*lastp
;
2221 assert ((obstack_object_size (&extrapool
)
2222 & (__alignof__ (int32_t) - 1)) == 0);
2224 tablemb
[ch
] = -obstack_object_size (&extrapool
);
2228 /* Store the current index in the weight table. We know that
2229 the current position in the `extrapool' is aligned on a
2234 /* Find out wether this is a single entry or we have more than
2235 one consecutive entry. */
2236 if (runp
->mbnext
!= NULL
2237 && runp
->nmbs
== runp
->mbnext
->nmbs
2238 && memcmp (runp
->mbs
, runp
->mbnext
->mbs
, runp
->nmbs
- 1) == 0
2239 && (runp
->mbs
[runp
->nmbs
- 1]
2240 == runp
->mbnext
->mbs
[runp
->nmbs
- 1] + 1))
2243 struct element_t
*series_startp
= runp
;
2244 struct element_t
*curp
;
2246 /* Compute how much space we will need. */
2247 added
= ((sizeof (int32_t) + 1 + 2 * (runp
->nmbs
- 1)
2248 + __alignof__ (int32_t) - 1)
2249 & ~(__alignof__ (int32_t) - 1));
2250 assert ((obstack_object_size (&extrapool
)
2251 & (__alignof__ (int32_t) - 1)) == 0);
2252 obstack_make_room (&extrapool
, added
);
2254 /* More than one consecutive entry. We mark this by having
2255 a negative index into the indirect table. */
2256 obstack_int32_grow_fast (&extrapool
,
2257 -(obstack_object_size (&indirectpool
)
2258 / sizeof (int32_t)));
2260 /* Now search first the end of the series. */
2262 runp
= runp
->mbnext
;
2263 while (runp
->mbnext
!= NULL
2264 && runp
->nmbs
== runp
->mbnext
->nmbs
2265 && memcmp (runp
->mbs
, runp
->mbnext
->mbs
,
2266 runp
->nmbs
- 1) == 0
2267 && (runp
->mbs
[runp
->nmbs
- 1]
2268 == runp
->mbnext
->mbs
[runp
->nmbs
- 1] + 1));
2270 /* Now walk backward from here to the beginning. */
2273 assert (runp
->nmbs
<= 256);
2274 obstack_1grow_fast (&extrapool
, curp
->nmbs
- 1);
2275 for (i
= 1; i
< curp
->nmbs
; ++i
)
2276 obstack_1grow_fast (&extrapool
, curp
->mbs
[i
]);
2278 /* Now find the end of the consecutive sequence and
2279 add all the indeces in the indirect pool. */
2282 weightidx
= output_weight (&weightpool
, collate
, curp
);
2283 obstack_int32_grow (&indirectpool
, weightidx
);
2285 curp
= curp
->mblast
;
2287 while (curp
!= series_startp
);
2289 /* Add the final weight. */
2290 weightidx
= output_weight (&weightpool
, collate
, curp
);
2291 obstack_int32_grow (&indirectpool
, weightidx
);
2293 /* And add the end byte sequence. Without length this
2295 for (i
= 1; i
< curp
->nmbs
; ++i
)
2296 obstack_1grow_fast (&extrapool
, curp
->mbs
[i
]);
2300 /* A single entry. Simply add the index and the length and
2301 string (except for the first character which is already
2305 /* Output the weight info. */
2306 weightidx
= output_weight (&weightpool
, collate
, runp
);
2308 added
= ((sizeof (int32_t) + 1 + runp
->nmbs
- 1
2309 + __alignof__ (int32_t) - 1)
2310 & ~(__alignof__ (int32_t) - 1));
2311 assert ((obstack_object_size (&extrapool
)
2312 & (__alignof__ (int32_t) - 1)) == 0);
2313 obstack_make_room (&extrapool
, added
);
2315 obstack_int32_grow_fast (&extrapool
, weightidx
);
2316 assert (runp
->nmbs
<= 256);
2317 obstack_1grow_fast (&extrapool
, runp
->nmbs
- 1);
2319 for (i
= 1; i
< runp
->nmbs
; ++i
)
2320 obstack_1grow_fast (&extrapool
, runp
->mbs
[i
]);
2323 /* Add alignment bytes if necessary. */
2324 while ((obstack_object_size (&extrapool
)
2325 & (__alignof__ (int32_t) - 1)) != 0)
2326 obstack_1grow_fast (&extrapool
, '\0');
2330 runp
= runp
->mbnext
;
2332 while (runp
!= NULL
);
2334 assert ((obstack_object_size (&extrapool
)
2335 & (__alignof__ (int32_t) - 1)) == 0);
2337 /* If the final entry in the list is not a single character we
2338 add an UNDEFINED entry here. */
2339 if (lastp
->nmbs
!= 1)
2341 int added
= ((sizeof (int32_t) + 1 + 1 + __alignof__ (int32_t) - 1)
2342 & ~(__alignof__ (int32_t) - 1));
2343 obstack_make_room (&extrapool
, added
);
2345 obstack_int32_grow_fast (&extrapool
, 0);
2346 /* XXX What rule? We just pick the first. */
2347 obstack_1grow_fast (&extrapool
, 0);
2348 /* Length is zero. */
2349 obstack_1grow_fast (&extrapool
, 0);
2351 /* Add alignment bytes if necessary. */
2352 while ((obstack_object_size (&extrapool
)
2353 & (__alignof__ (int32_t) - 1)) != 0)
2354 obstack_1grow_fast (&extrapool
, '\0');
2358 /* Add padding to the tables if necessary. */
2359 while ((obstack_object_size (&weightpool
) & (__alignof__ (int32_t) - 1))
2361 obstack_1grow (&weightpool
, 0);
2363 /* Now add the four tables. */
2364 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_TABLEMB
));
2365 iov
[2 + cnt
].iov_base
= tablemb
;
2366 iov
[2 + cnt
].iov_len
= sizeof (tablemb
);
2367 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2368 assert ((iov
[2 + cnt
].iov_len
& (__alignof__ (int32_t) - 1)) == 0);
2371 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_WEIGHTMB
));
2372 iov
[2 + cnt
].iov_len
= obstack_object_size (&weightpool
);
2373 iov
[2 + cnt
].iov_base
= obstack_finish (&weightpool
);
2374 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2377 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_EXTRAMB
));
2378 iov
[2 + cnt
].iov_len
= obstack_object_size (&extrapool
);
2379 iov
[2 + cnt
].iov_base
= obstack_finish (&extrapool
);
2380 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2383 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_INDIRECTMB
));
2384 iov
[2 + cnt
].iov_len
= obstack_object_size (&indirectpool
);
2385 iov
[2 + cnt
].iov_base
= obstack_finish (&indirectpool
);
2386 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2387 assert ((iov
[2 + cnt
].iov_len
& (__alignof__ (int32_t) - 1)) == 0);
2391 /* Now the same for the wide character table. We need to store some
2392 more information here. */
2393 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_GAP1
));
2394 iov
[2 + cnt
].iov_base
= NULL
;
2395 iov
[2 + cnt
].iov_len
= 0;
2396 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2397 assert (idx
[cnt
] % __alignof__ (int32_t) == 0);
2400 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_GAP2
));
2401 iov
[2 + cnt
].iov_base
= NULL
;
2402 iov
[2 + cnt
].iov_len
= 0;
2403 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2404 assert (idx
[cnt
] % __alignof__ (int32_t) == 0);
2407 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_GAP3
));
2408 iov
[2 + cnt
].iov_base
= NULL
;
2409 iov
[2 + cnt
].iov_len
= 0;
2410 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2411 assert (idx
[cnt
] % __alignof__ (int32_t) == 0);
2414 /* Since we are using the sign of an integer to mark indirection the
2415 offsets in the arrays we are indirectly referring to must not be
2416 zero since -0 == 0. Therefore we add a bit of dummy content. */
2417 obstack_int32_grow (&extrapool
, 0);
2418 obstack_int32_grow (&indirectpool
, 0);
2420 /* Now insert the `UNDEFINED' value if it is used. Since this value
2421 will probably be used more than once it is good to store the
2422 weights only once. */
2423 if (output_weightwc (&weightpool
, collate
, &collate
->undefined
) != 0)
2426 /* Generate the table. Walk through the lists of sequences starting
2427 with the same wide character and add them one after the other to
2428 the table. In case we have more than one sequence starting with
2429 the same byte we have to use extra indirection. */
2432 collidx_table_init (&tablewc
);
2434 atwc
.weightpool
= &weightpool
;
2435 atwc
.extrapool
= &extrapool
;
2436 atwc
.indpool
= &indirectpool
;
2437 atwc
.collate
= collate
;
2438 atwc
.tablewc
= &tablewc
;
2440 wchead_table_iterate (&collate
->wcheads
, add_to_tablewc
);
2442 memset (&atwc
, 0, sizeof (atwc
));
2444 collidx_table_finalize (&tablewc
);
2446 /* Now add the four tables. */
2447 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_TABLEWC
));
2448 iov
[2 + cnt
].iov_base
= tablewc
.result
;
2449 iov
[2 + cnt
].iov_len
= tablewc
.result_size
;
2450 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2451 assert (iov
[2 + cnt
].iov_len
% sizeof (int32_t) == 0);
2452 assert (idx
[cnt
] % __alignof__ (int32_t) == 0);
2455 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_WEIGHTWC
));
2456 iov
[2 + cnt
].iov_len
= obstack_object_size (&weightpool
);
2457 iov
[2 + cnt
].iov_base
= obstack_finish (&weightpool
);
2458 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2459 assert (iov
[2 + cnt
].iov_len
% sizeof (int32_t) == 0);
2460 assert (idx
[cnt
] % __alignof__ (int32_t) == 0);
2463 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_EXTRAWC
));
2464 iov
[2 + cnt
].iov_len
= obstack_object_size (&extrapool
);
2465 iov
[2 + cnt
].iov_base
= obstack_finish (&extrapool
);
2466 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2467 assert (iov
[2 + cnt
].iov_len
% sizeof (int32_t) == 0);
2468 assert (idx
[cnt
] % __alignof__ (int32_t) == 0);
2471 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_INDIRECTWC
));
2472 iov
[2 + cnt
].iov_len
= obstack_object_size (&indirectpool
);
2473 iov
[2 + cnt
].iov_base
= obstack_finish (&indirectpool
);
2474 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2475 assert (iov
[2 + cnt
].iov_len
% sizeof (int32_t) == 0);
2476 assert (idx
[cnt
] % __alignof__ (int32_t) == 0);
2480 /* Finally write the table with collation element names out. It is
2481 a hash table with a simple function which gets the name of the
2482 character as the input. One character might have many names. The
2483 value associated with the name is an index into the weight table
2484 where we are then interested in the first-level weight value.
2486 To determine how large the table should be we are counting the
2487 elements have to put in. Since we are using internal chaining
2488 using a secondary hash function we have to make the table a bit
2489 larger to avoid extremely long search times. We can achieve
2490 good results with a 40% larger table than there are entries. */
2492 runp
= collate
->start
;
2493 while (runp
!= NULL
)
2495 if (runp
->mbs
!= NULL
&& runp
->weights
!= NULL
&& !runp
->is_character
)
2496 /* Yep, the element really counts. */
2501 /* Add 40% and find the next prime number. */
2502 elem_size
= next_prime (elem_size
* 1.4);
2504 /* Allocate the table. Each entry consists of two words: the hash
2505 value and an index in a secondary table which provides the index
2506 into the weight table and the string itself (so that a match can
2508 elem_table
= (uint32_t *) obstack_alloc (&extrapool
,
2509 elem_size
* 2 * sizeof (uint32_t));
2510 memset (elem_table
, '\0', elem_size
* 2 * sizeof (uint32_t));
2512 /* Now add the elements. */
2513 runp
= collate
->start
;
2514 while (runp
!= NULL
)
2516 if (runp
->mbs
!= NULL
&& runp
->weights
!= NULL
&& !runp
->is_character
)
2518 /* Compute the hash value of the name. */
2519 uint32_t namelen
= strlen (runp
->name
);
2520 uint32_t hash
= elem_hash (runp
->name
, namelen
);
2521 size_t idx
= hash
% elem_size
;
2523 size_t start_idx
= idx
;
2526 if (elem_table
[idx
* 2] != 0)
2528 /* The spot is already taken. Try iterating using the value
2529 from the secondary hashing function. */
2530 size_t iter
= hash
% (elem_size
- 2) + 1;
2535 if (idx
>= elem_size
)
2537 assert (idx
!= start_idx
);
2539 while (elem_table
[idx
* 2] != 0);
2541 /* This is the spot where we will insert the value. */
2542 elem_table
[idx
* 2] = hash
;
2543 elem_table
[idx
* 2 + 1] = obstack_object_size (&extrapool
);
2545 /* The the string itself including length. */
2546 obstack_1grow (&extrapool
, namelen
);
2547 obstack_grow (&extrapool
, runp
->name
, namelen
);
2549 /* And the multibyte representation. */
2550 obstack_1grow (&extrapool
, runp
->nmbs
);
2551 obstack_grow (&extrapool
, runp
->mbs
, runp
->nmbs
);
2553 /* And align again to 32 bits. */
2554 if ((1 + namelen
+ 1 + runp
->nmbs
) % sizeof (int32_t) != 0)
2555 obstack_grow (&extrapool
, "\0\0",
2557 - ((1 + namelen
+ 1 + runp
->nmbs
)
2558 % sizeof (int32_t))));
2560 /* Now some 32-bit values: multibyte collation sequence,
2561 wide char string (including length), and wide char
2562 collation sequence. */
2563 obstack_int32_grow (&extrapool
, runp
->mbseqorder
);
2565 obstack_int32_grow (&extrapool
, runp
->nwcs
);
2566 obstack_grow (&extrapool
, runp
->wcs
,
2567 runp
->nwcs
* sizeof (uint32_t));
2569 obstack_int32_grow (&extrapool
, runp
->wcseqorder
);
2575 /* Prepare to write out this data. */
2576 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB
));
2577 iov
[2 + cnt
].iov_base
= &elem_size
;
2578 iov
[2 + cnt
].iov_len
= sizeof (int32_t);
2579 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2580 assert (idx
[cnt
] % __alignof__ (int32_t) == 0);
2583 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_SYMB_TABLEMB
));
2584 iov
[2 + cnt
].iov_base
= elem_table
;
2585 iov
[2 + cnt
].iov_len
= elem_size
* 2 * sizeof (int32_t);
2586 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2587 assert (idx
[cnt
] % __alignof__ (int32_t) == 0);
2590 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_SYMB_EXTRAMB
));
2591 iov
[2 + cnt
].iov_len
= obstack_object_size (&extrapool
);
2592 iov
[2 + cnt
].iov_base
= obstack_finish (&extrapool
);
2593 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2596 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQMB
));
2597 iov
[2 + cnt
].iov_base
= collate
->mbseqorder
;
2598 iov
[2 + cnt
].iov_len
= 256;
2599 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2602 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQWC
));
2603 iov
[2 + cnt
].iov_base
= collate
->wcseqorder
.result
;
2604 iov
[2 + cnt
].iov_len
= collate
->wcseqorder
.result_size
;
2605 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2606 assert (idx
[cnt
] % __alignof__ (int32_t) == 0);
2609 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_CODESET
));
2610 iov
[2 + cnt
].iov_base
= (void *) charmap
->code_set_name
;
2611 iov
[2 + cnt
].iov_len
= strlen (iov
[2 + cnt
].iov_base
) + 1;
2614 assert (cnt
== _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE
));
2616 write_locale_data (output_path
, LC_COLLATE
, "LC_COLLATE", 2 + cnt
, iov
);
2618 obstack_free (&weightpool
, NULL
);
2619 obstack_free (&extrapool
, NULL
);
2620 obstack_free (&indirectpool
, NULL
);
2625 collate_read (struct linereader
*ldfile
, struct localedef_t
*result
,
2626 const struct charmap_t
*charmap
, const char *repertoire_name
,
2629 struct repertoire_t
*repertoire
= NULL
;
2630 struct locale_collate_t
*collate
;
2632 struct token
*arg
= NULL
;
2633 enum token_t nowtok
;
2634 enum token_t was_ellipsis
= tok_none
;
2635 struct localedef_t
*copy_locale
= NULL
;
2638 1 - between `order-start' and `order-end'
2639 2 - after `order-end'
2640 3 - after `reorder-after', waiting for `reorder-end'
2641 4 - after `reorder-end'
2642 5 - after `reorder-sections-after', waiting for `reorder-sections-end'
2643 6 - after `reorder-sections-end'
2647 /* Get the repertoire we have to use. */
2648 if (repertoire_name
!= NULL
)
2649 repertoire
= repertoire_read (repertoire_name
);
2651 /* The rest of the line containing `LC_COLLATE' must be free. */
2652 lr_ignore_rest (ldfile
, 1);
2656 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2659 while (nowtok
== tok_eol
);
2661 if (nowtok
== tok_copy
)
2664 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2665 if (now
->tok
!= tok_string
)
2667 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2671 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2672 while (now
->tok
!= tok_eof
&& now
->tok
!= tok_end
);
2674 if (now
->tok
!= tok_eof
2675 || (now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
),
2676 now
->tok
== tok_eof
))
2677 lr_error (ldfile
, _("%s: premature end of file"), "LC_COLLATE");
2678 else if (now
->tok
!= tok_lc_collate
)
2680 lr_error (ldfile
, _("\
2681 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
2682 lr_ignore_rest (ldfile
, 0);
2685 lr_ignore_rest (ldfile
, 1);
2690 if (! ignore_content
)
2692 /* Get the locale definition. */
2693 copy_locale
= load_locale (LC_COLLATE
, now
->val
.str
.startmb
,
2694 repertoire_name
, charmap
, NULL
);
2695 if ((copy_locale
->avail
& COLLATE_LOCALE
) == 0)
2697 /* Not yet loaded. So do it now. */
2698 if (locfile_read (copy_locale
, charmap
) != 0)
2702 if (copy_locale
->categories
[LC_COLLATE
].collate
== NULL
)
2706 lr_ignore_rest (ldfile
, 1);
2708 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2712 /* Prepare the data structures. */
2713 collate_startup (ldfile
, result
, copy_locale
, ignore_content
);
2714 collate
= result
->categories
[LC_COLLATE
].collate
;
2722 /* Of course we don't proceed beyond the end of file. */
2723 if (nowtok
== tok_eof
)
2726 /* Ingore empty lines. */
2727 if (nowtok
== tok_eol
)
2729 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2737 /* Allow copying other locales. */
2738 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2739 if (now
->tok
!= tok_string
)
2742 if (! ignore_content
)
2743 load_locale (LC_COLLATE
, now
->val
.str
.startmb
, repertoire_name
,
2746 lr_ignore_rest (ldfile
, 1);
2749 case tok_coll_weight_max
:
2750 /* Ignore the rest of the line if we don't need the input of
2754 lr_ignore_rest (ldfile
, 0);
2761 arg
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2762 if (arg
->tok
!= tok_number
)
2764 if (collate
->col_weight_max
!= -1)
2765 lr_error (ldfile
, _("%s: duplicate definition of `%s'"),
2766 "LC_COLLATE", "col_weight_max");
2768 collate
->col_weight_max
= arg
->val
.num
;
2769 lr_ignore_rest (ldfile
, 1);
2772 case tok_section_symbol
:
2773 /* Ignore the rest of the line if we don't need the input of
2777 lr_ignore_rest (ldfile
, 0);
2784 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2785 if (arg
->tok
!= tok_bsymbol
)
2787 else if (!ignore_content
)
2789 /* Check whether this section is already known. */
2790 struct section_list
*known
= collate
->sections
;
2791 while (known
!= NULL
)
2793 if (strcmp (known
->name
, arg
->val
.str
.startmb
) == 0)
2795 known
= known
->next
;
2801 _("%s: duplicate declaration of section `%s'"),
2802 "LC_COLLATE", arg
->val
.str
.startmb
);
2803 free (arg
->val
.str
.startmb
);
2806 collate
->sections
= make_seclist_elem (collate
,
2807 arg
->val
.str
.startmb
,
2810 lr_ignore_rest (ldfile
, known
== NULL
);
2814 free (arg
->val
.str
.startmb
);
2815 lr_ignore_rest (ldfile
, 0);
2819 case tok_collating_element
:
2820 /* Ignore the rest of the line if we don't need the input of
2824 lr_ignore_rest (ldfile
, 0);
2828 if (state
!= 0 && state
!= 2)
2831 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2832 if (arg
->tok
!= tok_bsymbol
)
2836 const char *symbol
= arg
->val
.str
.startmb
;
2837 size_t symbol_len
= arg
->val
.str
.lenmb
;
2839 /* Next the `from' keyword. */
2840 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2841 if (arg
->tok
!= tok_from
)
2843 free ((char *) symbol
);
2847 ldfile
->return_widestr
= 1;
2848 ldfile
->translate_strings
= 1;
2850 /* Finally the string with the replacement. */
2851 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2853 ldfile
->return_widestr
= 0;
2854 ldfile
->translate_strings
= 0;
2856 if (arg
->tok
!= tok_string
)
2859 if (!ignore_content
&& symbol
!= NULL
)
2861 /* The name is already defined. */
2862 if (check_duplicate (ldfile
, collate
, charmap
,
2863 repertoire
, symbol
, symbol_len
))
2866 if (arg
->val
.str
.startmb
!= NULL
)
2867 insert_entry (&collate
->elem_table
, symbol
, symbol_len
,
2868 new_element (collate
,
2869 arg
->val
.str
.startmb
,
2870 arg
->val
.str
.lenmb
- 1,
2871 arg
->val
.str
.startwc
,
2872 symbol
, symbol_len
, 0));
2878 free ((char *) symbol
);
2879 if (arg
->val
.str
.startmb
!= NULL
)
2880 free (arg
->val
.str
.startmb
);
2881 if (arg
->val
.str
.startwc
!= NULL
)
2882 free (arg
->val
.str
.startwc
);
2884 lr_ignore_rest (ldfile
, 1);
2888 case tok_collating_symbol
:
2889 /* Ignore the rest of the line if we don't need the input of
2893 lr_ignore_rest (ldfile
, 0);
2897 if (state
!= 0 && state
!= 2)
2900 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2901 if (arg
->tok
!= tok_bsymbol
)
2905 char *symbol
= arg
->val
.str
.startmb
;
2906 size_t symbol_len
= arg
->val
.str
.lenmb
;
2907 char *endsymbol
= NULL
;
2908 size_t endsymbol_len
= 0;
2909 enum token_t ellipsis
= tok_none
;
2911 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2912 if (arg
->tok
== tok_ellipsis2
|| arg
->tok
== tok_ellipsis4
)
2914 ellipsis
= arg
->tok
;
2916 arg
= lr_token (ldfile
, charmap
, result
, repertoire
,
2918 if (arg
->tok
!= tok_bsymbol
)
2924 endsymbol
= arg
->val
.str
.startmb
;
2925 endsymbol_len
= arg
->val
.str
.lenmb
;
2927 lr_ignore_rest (ldfile
, 1);
2929 else if (arg
->tok
!= tok_eol
)
2935 if (!ignore_content
)
2938 || (ellipsis
!= tok_none
&& endsymbol
== NULL
))
2940 lr_error (ldfile
, _("\
2941 %s: unknown character in collating symbol name"),
2945 else if (ellipsis
== tok_none
)
2947 /* A single symbol, no ellipsis. */
2948 if (check_duplicate (ldfile
, collate
, charmap
,
2949 repertoire
, symbol
, symbol_len
))
2950 /* The name is already defined. */
2953 insert_entry (&collate
->sym_table
, symbol
, symbol_len
,
2954 new_symbol (collate
, symbol
, symbol_len
));
2956 else if (symbol_len
!= endsymbol_len
)
2960 _("invalid names for character range"));
2965 /* Oh my, we have to handle an ellipsis. First, as
2966 usual, determine the common prefix and then
2967 convert the rest into a range. */
2969 unsigned long int from
;
2970 unsigned long int to
;
2973 for (prefixlen
= 0; prefixlen
< symbol_len
; ++prefixlen
)
2974 if (symbol
[prefixlen
] != endsymbol
[prefixlen
])
2977 /* Convert the rest into numbers. */
2978 symbol
[symbol_len
] = '\0';
2979 from
= strtoul (&symbol
[prefixlen
], &endp
,
2980 ellipsis
== tok_ellipsis2
? 16 : 10);
2982 goto col_sym_inv_range
;
2984 endsymbol
[symbol_len
] = '\0';
2985 to
= strtoul (&endsymbol
[prefixlen
], &endp
,
2986 ellipsis
== tok_ellipsis2
? 16 : 10);
2988 goto col_sym_inv_range
;
2991 goto col_sym_inv_range
;
2993 /* Now loop over all entries. */
2998 symbuf
= (char *) obstack_alloc (&collate
->mempool
,
3001 /* Create the name. */
3003 ellipsis
== tok_ellipsis2
3004 ? "%.*s%.*lX" : "%.*s%.*lu",
3005 (int) prefixlen
, symbol
,
3006 (int) (symbol_len
- prefixlen
), from
);
3008 if (check_duplicate (ldfile
, collate
, charmap
,
3009 repertoire
, symbuf
, symbol_len
))
3010 /* The name is already defined. */
3013 insert_entry (&collate
->sym_table
, symbuf
,
3015 new_symbol (collate
, symbuf
,
3018 /* Increment the counter. */
3030 if (endsymbol
!= NULL
)
3036 case tok_symbol_equivalence
:
3037 /* Ignore the rest of the line if we don't need the input of
3041 lr_ignore_rest (ldfile
, 0);
3048 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
3049 if (arg
->tok
!= tok_bsymbol
)
3053 const char *newname
= arg
->val
.str
.startmb
;
3054 size_t newname_len
= arg
->val
.str
.lenmb
;
3055 const char *symname
;
3057 void *symval
; /* Actually struct symbol_t* */
3059 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
3060 if (arg
->tok
!= tok_bsymbol
)
3062 if (newname
!= NULL
)
3063 free ((char *) newname
);
3067 symname
= arg
->val
.str
.startmb
;
3068 symname_len
= arg
->val
.str
.lenmb
;
3070 if (newname
== NULL
)
3072 lr_error (ldfile
, _("\
3073 %s: unknown character in equivalent definition name"),
3077 if (newname
!= NULL
)
3078 free ((char *) newname
);
3079 if (symname
!= NULL
)
3080 free ((char *) symname
);
3083 if (symname
== NULL
)
3085 lr_error (ldfile
, _("\
3086 %s: unknown character in equivalent definition value"),
3088 goto sym_equiv_free
;
3091 /* See whether the symbol name is already defined. */
3092 if (find_entry (&collate
->sym_table
, symname
, symname_len
,
3095 lr_error (ldfile
, _("\
3096 %s: unknown symbol `%s' in equivalent definition"),
3097 "LC_COLLATE", symname
);
3098 goto sym_equiv_free
;
3101 if (insert_entry (&collate
->sym_table
,
3102 newname
, newname_len
, symval
) < 0)
3104 lr_error (ldfile
, _("\
3105 error while adding equivalent collating symbol"));
3106 goto sym_equiv_free
;
3109 free ((char *) symname
);
3111 lr_ignore_rest (ldfile
, 1);
3115 /* We get told about the scripts we know. */
3116 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
3117 if (arg
->tok
!= tok_bsymbol
)
3121 struct section_list
*runp
= collate
->known_sections
;
3124 while (runp
!= NULL
)
3125 if (strncmp (runp
->name
, arg
->val
.str
.startmb
,
3126 arg
->val
.str
.lenmb
) == 0
3127 && runp
->name
[arg
->val
.str
.lenmb
] == '\0')
3130 runp
= runp
->def_next
;
3134 lr_error (ldfile
, _("duplicate definition of script `%s'"),
3136 lr_ignore_rest (ldfile
, 0);
3140 runp
= (struct section_list
*) xcalloc (1, sizeof (*runp
));
3141 name
= (char *) xmalloc (arg
->val
.str
.lenmb
+ 1);
3142 memcpy (name
, arg
->val
.str
.startmb
, arg
->val
.str
.lenmb
);
3143 name
[arg
->val
.str
.lenmb
] = '\0';
3146 runp
->def_next
= collate
->known_sections
;
3147 collate
->known_sections
= runp
;
3149 lr_ignore_rest (ldfile
, 1);
3152 case tok_order_start
:
3153 /* Ignore the rest of the line if we don't need the input of
3157 lr_ignore_rest (ldfile
, 0);
3161 if (state
!= 0 && state
!= 1 && state
!= 2)
3165 /* The 14652 draft does not specify whether all `order_start' lines
3166 must contain the same number of sort-rules, but 14651 does. So
3167 we require this here as well. */
3168 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
3169 if (arg
->tok
== tok_bsymbol
)
3171 /* This better should be a section name. */
3172 struct section_list
*sp
= collate
->known_sections
;
3174 && (sp
->name
== NULL
3175 || strncmp (sp
->name
, arg
->val
.str
.startmb
,
3176 arg
->val
.str
.lenmb
) != 0
3177 || sp
->name
[arg
->val
.str
.lenmb
] != '\0'))
3182 lr_error (ldfile
, _("\
3183 %s: unknown section name `%.*s'"),
3184 "LC_COLLATE", (int) arg
->val
.str
.lenmb
,
3185 arg
->val
.str
.startmb
);
3186 /* We use the error section. */
3187 collate
->current_section
= &collate
->error_section
;
3189 if (collate
->error_section
.first
== NULL
)
3191 /* Insert &collate->error_section at the end of
3192 the collate->sections list. */
3193 if (collate
->sections
== NULL
)
3194 collate
->sections
= &collate
->error_section
;
3197 sp
= collate
->sections
;
3198 while (sp
->next
!= NULL
)
3201 sp
->next
= &collate
->error_section
;
3203 collate
->error_section
.next
= NULL
;
3208 /* One should not be allowed to open the same
3210 if (sp
->first
!= NULL
)
3211 lr_error (ldfile
, _("\
3212 %s: multiple order definitions for section `%s'"),
3213 "LC_COLLATE", sp
->name
);
3216 /* Insert sp in the collate->sections list,
3217 right after collate->current_section. */
3218 if (collate
->current_section
== NULL
)
3219 collate
->current_section
= sp
;
3222 sp
->next
= collate
->current_section
->next
;
3223 collate
->current_section
->next
= sp
;
3227 /* Next should come the end of the line or a semicolon. */
3228 arg
= lr_token (ldfile
, charmap
, result
, repertoire
,
3230 if (arg
->tok
== tok_eol
)
3234 /* This means we have exactly one rule: `forward'. */
3236 lr_error (ldfile
, _("\
3237 %s: invalid number of sorting rules"),
3241 sp
->rules
= obstack_alloc (&collate
->mempool
,
3242 (sizeof (enum coll_sort_rule
)
3244 for (cnt
= 0; cnt
< nrules
; ++cnt
)
3245 sp
->rules
[cnt
] = sort_forward
;
3251 /* Get the next token. */
3252 arg
= lr_token (ldfile
, charmap
, result
, repertoire
,
3258 /* There is no section symbol. Therefore we use the unnamed
3260 collate
->current_section
= &collate
->unnamed_section
;
3262 if (collate
->unnamed_section
.first
!= NULL
)
3263 lr_error (ldfile
, _("\
3264 %s: multiple order definitions for unnamed section"),
3268 /* Insert &collate->unnamed_section at the beginning of
3269 the collate->sections list. */
3270 collate
->unnamed_section
.next
= collate
->sections
;
3271 collate
->sections
= &collate
->unnamed_section
;
3275 /* Now read the direction names. */
3276 read_directions (ldfile
, arg
, charmap
, repertoire
, result
);
3278 /* From now we need the strings untranslated. */
3279 ldfile
->translate_strings
= 0;
3283 /* Ignore the rest of the line if we don't need the input of
3287 lr_ignore_rest (ldfile
, 0);
3294 /* Handle ellipsis at end of list. */
3295 if (was_ellipsis
!= tok_none
)
3297 handle_ellipsis (ldfile
, NULL
, 0, was_ellipsis
, charmap
,
3298 repertoire
, result
);
3299 was_ellipsis
= tok_none
;
3303 lr_ignore_rest (ldfile
, 1);
3306 case tok_reorder_after
:
3307 /* Ignore the rest of the line if we don't need the input of
3311 lr_ignore_rest (ldfile
, 0);
3317 lr_error (ldfile
, _("%s: missing `order_end' keyword"),
3321 /* Handle ellipsis at end of list. */
3322 if (was_ellipsis
!= tok_none
)
3324 handle_ellipsis (ldfile
, arg
->val
.str
.startmb
,
3325 arg
->val
.str
.lenmb
, was_ellipsis
, charmap
,
3326 repertoire
, result
);
3327 was_ellipsis
= tok_none
;
3330 else if (state
!= 2 && state
!= 3)
3334 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
3335 if (arg
->tok
== tok_bsymbol
|| arg
->tok
== tok_ucs4
)
3337 /* Find this symbol in the sequence table. */
3341 struct element_t
*insp
;
3345 if (arg
->tok
== tok_bsymbol
)
3347 startmb
= arg
->val
.str
.startmb
;
3348 lenmb
= arg
->val
.str
.lenmb
;
3352 sprintf (ucsbuf
, "U%08X", arg
->val
.ucs4
);
3357 if (find_entry (&collate
->seq_table
, startmb
, lenmb
, &ptr
) == 0)
3358 /* Yes, the symbol exists. Simply point the cursor
3360 collate
->cursor
= (struct element_t
*) ptr
;
3363 struct symbol_t
*symbp
;
3366 if (find_entry (&collate
->sym_table
, startmb
, lenmb
,
3371 if (symbp
->order
->last
!= NULL
3372 || symbp
->order
->next
!= NULL
)
3373 collate
->cursor
= symbp
->order
;
3376 /* This is a collating symbol but its position
3377 is not yet defined. */
3378 lr_error (ldfile
, _("\
3379 %s: order for collating symbol %.*s not yet defined"),
3380 "LC_COLLATE", (int) lenmb
, startmb
);
3381 collate
->cursor
= NULL
;
3385 else if (find_entry (&collate
->elem_table
, startmb
, lenmb
,
3388 insp
= (struct element_t
*) ptr
;
3390 if (insp
->last
!= NULL
|| insp
->next
!= NULL
)
3391 collate
->cursor
= insp
;
3394 /* This is a collating element but its position
3395 is not yet defined. */
3396 lr_error (ldfile
, _("\
3397 %s: order for collating element %.*s not yet defined"),
3398 "LC_COLLATE", (int) lenmb
, startmb
);
3399 collate
->cursor
= NULL
;
3405 /* This is bad. The symbol after which we have to
3406 insert does not exist. */
3407 lr_error (ldfile
, _("\
3408 %s: cannot reorder after %.*s: symbol not known"),
3409 "LC_COLLATE", (int) lenmb
, startmb
);
3410 collate
->cursor
= NULL
;
3415 lr_ignore_rest (ldfile
, no_error
);
3418 /* This must not happen. */
3422 case tok_reorder_end
:
3423 /* Ignore the rest of the line if we don't need the input of
3431 lr_ignore_rest (ldfile
, 1);
3434 case tok_reorder_sections_after
:
3435 /* Ignore the rest of the line if we don't need the input of
3439 lr_ignore_rest (ldfile
, 0);
3445 lr_error (ldfile
, _("%s: missing `order_end' keyword"),
3449 /* Handle ellipsis at end of list. */
3450 if (was_ellipsis
!= tok_none
)
3452 handle_ellipsis (ldfile
, NULL
, 0, was_ellipsis
, charmap
,
3453 repertoire
, result
);
3454 was_ellipsis
= tok_none
;
3457 else if (state
== 3)
3459 WITH_CUR_LOCALE (error (0, 0, _("\
3460 %s: missing `reorder-end' keyword"), "LC_COLLATE"));
3463 else if (state
!= 2 && state
!= 4)
3467 /* Get the name of the sections we are adding after. */
3468 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
3469 if (arg
->tok
== tok_bsymbol
)
3471 /* Now find a section with this name. */
3472 struct section_list
*runp
= collate
->sections
;
3474 while (runp
!= NULL
)
3476 if (runp
->name
!= NULL
3477 && strlen (runp
->name
) == arg
->val
.str
.lenmb
3478 && memcmp (runp
->name
, arg
->val
.str
.startmb
,
3479 arg
->val
.str
.lenmb
) == 0)
3486 collate
->current_section
= runp
;
3489 /* This is bad. The section after which we have to
3490 reorder does not exist. Therefore we cannot
3491 process the whole rest of this reorder
3493 lr_error (ldfile
, _("%s: section `%.*s' not known"),
3494 "LC_COLLATE", (int) arg
->val
.str
.lenmb
,
3495 arg
->val
.str
.startmb
);
3499 lr_ignore_rest (ldfile
, 0);
3501 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
3503 while (now
->tok
== tok_reorder_sections_after
3504 || now
->tok
== tok_reorder_sections_end
3505 || now
->tok
== tok_end
);
3507 /* Process the token we just saw. */
3513 /* This must not happen. */
3517 case tok_reorder_sections_end
:
3518 /* Ignore the rest of the line if we don't need the input of
3526 lr_ignore_rest (ldfile
, 1);
3531 /* Ignore the rest of the line if we don't need the input of
3535 lr_ignore_rest (ldfile
, 0);
3539 if (state
!= 0 && state
!= 1 && state
!= 3 && state
!= 5)
3542 if ((state
== 0 || state
== 5) && nowtok
== tok_ucs4
)
3545 if (nowtok
== tok_ucs4
)
3547 snprintf (ucs4buf
, sizeof (ucs4buf
), "U%08X", now
->val
.ucs4
);
3551 else if (arg
!= NULL
)
3553 symstr
= arg
->val
.str
.startmb
;
3554 symlen
= arg
->val
.str
.lenmb
;
3558 lr_error (ldfile
, _("%s: bad symbol <%.*s>"), "LC_COLLATE",
3559 (int) ldfile
->token
.val
.str
.lenmb
,
3560 ldfile
->token
.val
.str
.startmb
);
3564 struct element_t
*seqp
;
3567 /* We are outside an `order_start' region. This means
3568 we must only accept definitions of values for
3569 collation symbols since these are purely abstract
3570 values and don't need directions associated. */
3573 if (find_entry (&collate
->seq_table
, symstr
, symlen
, &ptr
) == 0)
3577 /* It's already defined. First check whether this
3578 is really a collating symbol. */
3579 if (seqp
->is_character
)
3588 if (find_entry (&collate
->sym_table
, symstr
, symlen
,
3590 /* No collating symbol, it's an error. */
3593 /* Maybe this is the first time we define a symbol
3594 value and it is before the first actual section. */
3595 if (collate
->sections
== NULL
)
3596 collate
->sections
= collate
->current_section
=
3597 &collate
->symbol_section
;
3600 if (was_ellipsis
!= tok_none
)
3602 handle_ellipsis (ldfile
, symstr
, symlen
, was_ellipsis
,
3603 charmap
, repertoire
, result
);
3605 /* Remember that we processed the ellipsis. */
3606 was_ellipsis
= tok_none
;
3608 /* And don't add the value a second time. */
3612 else if (state
== 3)
3614 /* It is possible that we already have this collation sequence.
3615 In this case we move the entry. */
3619 /* If the symbol after which we have to insert was not found
3620 ignore all entries. */
3621 if (collate
->cursor
== NULL
)
3623 lr_ignore_rest (ldfile
, 0);
3627 if (find_entry (&collate
->seq_table
, symstr
, symlen
, &ptr
) == 0)
3629 seqp
= (struct element_t
*) ptr
;
3633 if (find_entry (&collate
->sym_table
, symstr
, symlen
, &sym
) == 0
3634 && (seqp
= ((struct symbol_t
*) sym
)->order
) != NULL
)
3637 if (find_entry (&collate
->elem_table
, symstr
, symlen
, &ptr
) == 0
3638 && (seqp
= (struct element_t
*) ptr
,
3639 seqp
->last
!= NULL
|| seqp
->next
!= NULL
3640 || (collate
->start
!= NULL
&& seqp
== collate
->start
)))
3643 /* Remove the entry from the old position. */
3644 if (seqp
->last
== NULL
)
3645 collate
->start
= seqp
->next
;
3647 seqp
->last
->next
= seqp
->next
;
3648 if (seqp
->next
!= NULL
)
3649 seqp
->next
->last
= seqp
->last
;
3651 /* We also have to check whether this entry is the
3652 first or last of a section. */
3653 if (seqp
->section
->first
== seqp
)
3655 if (seqp
->section
->first
== seqp
->section
->last
)
3656 /* This section has no content anymore. */
3657 seqp
->section
->first
= seqp
->section
->last
= NULL
;
3659 seqp
->section
->first
= seqp
->next
;
3661 else if (seqp
->section
->last
== seqp
)
3662 seqp
->section
->last
= seqp
->last
;
3664 /* Now insert it in the new place. */
3665 insert_weights (ldfile
, seqp
, charmap
, repertoire
, result
,
3670 /* Otherwise we just add a new entry. */
3672 else if (state
== 5)
3674 /* We are reordering sections. Find the named section. */
3675 struct section_list
*runp
= collate
->sections
;
3676 struct section_list
*prevp
= NULL
;
3678 while (runp
!= NULL
)
3680 if (runp
->name
!= NULL
3681 && strlen (runp
->name
) == symlen
3682 && memcmp (runp
->name
, symstr
, symlen
) == 0)
3691 lr_error (ldfile
, _("%s: section `%.*s' not known"),
3692 "LC_COLLATE", (int) symlen
, symstr
);
3693 lr_ignore_rest (ldfile
, 0);
3697 if (runp
!= collate
->current_section
)
3699 /* Remove the named section from the old place and
3700 insert it in the new one. */
3701 prevp
->next
= runp
->next
;
3703 runp
->next
= collate
->current_section
->next
;
3704 collate
->current_section
->next
= runp
;
3705 collate
->current_section
= runp
;
3708 /* Process the rest of the line which might change
3709 the collation rules. */
3710 arg
= lr_token (ldfile
, charmap
, result
, repertoire
,
3712 if (arg
->tok
!= tok_eof
&& arg
->tok
!= tok_eol
)
3713 read_directions (ldfile
, arg
, charmap
, repertoire
,
3718 else if (was_ellipsis
!= tok_none
)
3720 /* Using the information in the `ellipsis_weight'
3721 element and this and the last value we have to handle
3722 the ellipsis now. */
3723 assert (state
== 1);
3725 handle_ellipsis (ldfile
, symstr
, symlen
, was_ellipsis
, charmap
,
3726 repertoire
, result
);
3728 /* Remember that we processed the ellipsis. */
3729 was_ellipsis
= tok_none
;
3731 /* And don't add the value a second time. */
3735 /* Now insert in the new place. */
3736 insert_value (ldfile
, symstr
, symlen
, charmap
, repertoire
, result
);
3740 /* Ignore the rest of the line if we don't need the input of
3744 lr_ignore_rest (ldfile
, 0);
3751 if (was_ellipsis
!= tok_none
)
3754 _("%s: cannot have `%s' as end of ellipsis range"),
3755 "LC_COLLATE", "UNDEFINED");
3757 unlink_element (collate
);
3758 was_ellipsis
= tok_none
;
3761 /* See whether UNDEFINED already appeared somewhere. */
3762 if (collate
->undefined
.next
!= NULL
3763 || &collate
->undefined
== collate
->cursor
)
3766 _("%s: order for `%.*s' already defined at %s:%Zu"),
3767 "LC_COLLATE", 9, "UNDEFINED",
3768 collate
->undefined
.file
,
3769 collate
->undefined
.line
);
3770 lr_ignore_rest (ldfile
, 0);
3773 /* Parse the weights. */
3774 insert_weights (ldfile
, &collate
->undefined
, charmap
,
3775 repertoire
, result
, tok_none
);
3778 case tok_ellipsis2
: /* symbolic hexadecimal ellipsis */
3779 case tok_ellipsis3
: /* absolute ellipsis */
3780 case tok_ellipsis4
: /* symbolic decimal ellipsis */
3781 /* This is the symbolic (decimal or hexadecimal) or absolute
3783 if (was_ellipsis
!= tok_none
)
3786 if (state
!= 0 && state
!= 1 && state
!= 3)
3789 was_ellipsis
= nowtok
;
3791 insert_weights (ldfile
, &collate
->ellipsis_weight
, charmap
,
3792 repertoire
, result
, nowtok
);
3796 /* Next we assume `LC_COLLATE'. */
3797 if (!ignore_content
)
3800 /* We must either see a copy statement or have
3803 _("%s: empty category description not allowed"),
3805 else if (state
== 1)
3807 lr_error (ldfile
, _("%s: missing `order_end' keyword"),
3810 /* Handle ellipsis at end of list. */
3811 if (was_ellipsis
!= tok_none
)
3813 handle_ellipsis (ldfile
, NULL
, 0, was_ellipsis
, charmap
,
3814 repertoire
, result
);
3815 was_ellipsis
= tok_none
;
3818 else if (state
== 3)
3819 WITH_CUR_LOCALE (error (0, 0, _("\
3820 %s: missing `reorder-end' keyword"), "LC_COLLATE"));
3821 else if (state
== 5)
3822 WITH_CUR_LOCALE (error (0, 0, _("\
3823 %s: missing `reorder-sections-end' keyword"), "LC_COLLATE"));
3825 arg
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
3826 if (arg
->tok
== tok_eof
)
3828 if (arg
->tok
== tok_eol
)
3829 lr_error (ldfile
, _("%s: incomplete `END' line"), "LC_COLLATE");
3830 else if (arg
->tok
!= tok_lc_collate
)
3831 lr_error (ldfile
, _("\
3832 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
3833 lr_ignore_rest (ldfile
, arg
->tok
== tok_lc_collate
);
3838 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
3841 /* Prepare for the next round. */
3842 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
3846 /* When we come here we reached the end of the file. */
3847 lr_error (ldfile
, _("%s: premature end of file"), "LC_COLLATE");