1 /* Copyright (C) 1995-2003, 2005, 2006, 2007 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
27 #include <sys/param.h>
29 #include "localedef.h"
31 #include "localeinfo.h"
32 #include "linereader.h"
34 #include "elem-hash.h"
36 /* Uncomment the following line in the production version. */
37 /* #define NDEBUG 1 */
40 #define obstack_chunk_alloc malloc
41 #define obstack_chunk_free free
44 __attribute ((always_inline
))
45 obstack_int32_grow (struct obstack
*obstack
, int32_t data
)
47 if (sizeof (int32_t) == sizeof (int))
48 obstack_int_grow (obstack
, data
);
50 obstack_grow (obstack
, &data
, sizeof (int32_t));
54 __attribute ((always_inline
))
55 obstack_int32_grow_fast (struct obstack
*obstack
, int32_t data
)
57 if (sizeof (int32_t) == sizeof (int))
58 obstack_int_grow_fast (obstack
, data
);
60 obstack_grow (obstack
, &data
, sizeof (int32_t));
63 /* Forward declaration. */
66 /* Data type for list of strings. */
69 /* Successor in the known_sections list. */
70 struct section_list
*def_next
;
71 /* Successor in the sections list. */
72 struct section_list
*next
;
73 /* Name of the section. */
75 /* First element of this section. */
76 struct element_t
*first
;
77 /* Last element of this section. */
78 struct element_t
*last
;
79 /* These are the rules for this section. */
80 enum coll_sort_rule
*rules
;
81 /* Index of the rule set in the appropriate section of the output file. */
89 /* Number of elements. */
95 /* Data type for collating element. */
107 /* The following is a bit mask which bits are set if this element is
108 used in the appropriate level. Interesting for the singlebyte
111 XXX The type here restricts the number of levels to 32. It could
112 be changed if necessary but I doubt this is necessary. */
113 unsigned int used_in_level
;
115 struct element_list_t
*weights
;
117 /* Nonzero if this is a real character definition. */
120 /* Order of the character in the sequence. This information will
121 be used in range expressions. */
125 /* Where does the definition come from. */
129 /* Which section does this belong to. */
130 struct section_list
*section
;
132 /* Predecessor and successor in the order list. */
133 struct element_t
*last
;
134 struct element_t
*next
;
136 /* Next element in multibyte output list. */
137 struct element_t
*mbnext
;
138 struct element_t
*mblast
;
140 /* Next element in wide character output list. */
141 struct element_t
*wcnext
;
142 struct element_t
*wclast
;
145 /* Special element value. */
146 #define ELEMENT_ELLIPSIS2 ((struct element_t *) 1)
147 #define ELEMENT_ELLIPSIS3 ((struct element_t *) 2)
148 #define ELEMENT_ELLIPSIS4 ((struct element_t *) 3)
150 /* Data type for collating symbol. */
155 /* Point to place in the order list. */
156 struct element_t
*order
;
158 /* Where does the definition come from. */
163 /* Sparse table of struct element_t *. */
164 #define TABLE wchead_table
165 #define ELEMENT struct element_t *
171 /* Sparse table of int32_t. */
172 #define TABLE collidx_table
173 #define ELEMENT int32_t
177 /* Sparse table of uint32_t. */
178 #define TABLE collseq_table
179 #define ELEMENT uint32_t
180 #define DEFAULT ~((uint32_t) 0)
184 /* The real definition of the struct for the LC_COLLATE locale. */
185 struct locale_collate_t
190 /* List of known scripts. */
191 struct section_list
*known_sections
;
192 /* List of used sections. */
193 struct section_list
*sections
;
194 /* Current section using definition. */
195 struct section_list
*current_section
;
196 /* There always can be an unnamed section. */
197 struct section_list unnamed_section
;
198 /* To make handling of errors easier we have another section. */
199 struct section_list error_section
;
200 /* Sometimes we are defining the values for collating symbols before
201 the first actual section. */
202 struct section_list symbol_section
;
204 /* Start of the order list. */
205 struct element_t
*start
;
207 /* The undefined element. */
208 struct element_t undefined
;
210 /* This is the cursor for `reorder_after' insertions. */
211 struct element_t
*cursor
;
213 /* This value is used when handling ellipsis. */
214 struct element_t ellipsis_weight
;
216 /* Known collating elements. */
217 hash_table elem_table
;
219 /* Known collating symbols. */
220 hash_table sym_table
;
222 /* Known collation sequences. */
223 hash_table seq_table
;
225 struct obstack mempool
;
227 /* The LC_COLLATE category is a bit special as it is sometimes possible
228 that the definitions from more than one input file contains information.
229 Therefore we keep all relevant input in a list. */
230 struct locale_collate_t
*next
;
232 /* Arrays with heads of the list for each of the leading bytes in
233 the multibyte sequences. */
234 struct element_t
*mbheads
[256];
236 /* Arrays with heads of the list for each of the leading bytes in
237 the multibyte sequences. */
238 struct wchead_table wcheads
;
240 /* The arrays with the collation sequence order. */
241 unsigned char mbseqorder
[256];
242 struct collseq_table wcseqorder
;
246 /* We have a few global variables which are used for reading all
247 LC_COLLATE category descriptions in all files. */
248 static uint32_t nrules
;
251 /* We need UTF-8 encoding of numbers. */
253 __attribute ((always_inline
))
254 utf8_encode (char *buf
, int val
)
267 for (step
= 2; step
< 6; ++step
)
268 if ((val
& (~(uint32_t)0 << (5 * step
+ 1))) == 0)
272 *buf
= (unsigned char) (~0xff >> step
);
276 buf
[step
] = 0x80 | (val
& 0x3f);
287 static struct section_list
*
288 make_seclist_elem (struct locale_collate_t
*collate
, const char *string
,
289 struct section_list
*next
)
291 struct section_list
*newp
;
293 newp
= (struct section_list
*) obstack_alloc (&collate
->mempool
,
304 static struct element_t
*
305 new_element (struct locale_collate_t
*collate
, const char *mbs
, size_t mbslen
,
306 const uint32_t *wcs
, const char *name
, size_t namelen
,
309 struct element_t
*newp
;
311 newp
= (struct element_t
*) obstack_alloc (&collate
->mempool
,
313 newp
->name
= name
== NULL
? NULL
: obstack_copy0 (&collate
->mempool
,
317 newp
->mbs
= obstack_copy0 (&collate
->mempool
, mbs
, mbslen
);
327 size_t nwcs
= wcslen ((wchar_t *) wcs
);
329 obstack_grow (&collate
->mempool
, wcs
, nwcs
* sizeof (uint32_t));
330 obstack_grow (&collate
->mempool
, &zero
, sizeof (uint32_t));
331 newp
->wcs
= (uint32_t *) obstack_finish (&collate
->mempool
);
339 newp
->mborder
= NULL
;
341 newp
->used_in_level
= 0;
342 newp
->is_character
= is_character
;
344 /* Will be assigned later. XXX */
345 newp
->mbseqorder
= 0;
346 newp
->wcseqorder
= 0;
348 /* Will be allocated later. */
349 newp
->weights
= NULL
;
354 newp
->section
= collate
->current_section
;
369 static struct symbol_t
*
370 new_symbol (struct locale_collate_t
*collate
, const char *name
, size_t len
)
372 struct symbol_t
*newp
;
374 newp
= (struct symbol_t
*) obstack_alloc (&collate
->mempool
, sizeof (*newp
));
376 newp
->name
= obstack_copy0 (&collate
->mempool
, name
, len
);
386 /* Test whether this name is already defined somewhere. */
388 check_duplicate (struct linereader
*ldfile
, struct locale_collate_t
*collate
,
389 const struct charmap_t
*charmap
,
390 struct repertoire_t
*repertoire
, const char *symbol
,
395 if (find_entry (&charmap
->char_table
, symbol
, symbol_len
, &ignore
) == 0)
397 lr_error (ldfile
, _("`%.*s' already defined in charmap"),
398 (int) symbol_len
, symbol
);
402 if (repertoire
!= NULL
403 && (find_entry (&repertoire
->char_table
, symbol
, symbol_len
, &ignore
)
406 lr_error (ldfile
, _("`%.*s' already defined in repertoire"),
407 (int) symbol_len
, symbol
);
411 if (find_entry (&collate
->sym_table
, symbol
, symbol_len
, &ignore
) == 0)
413 lr_error (ldfile
, _("`%.*s' already defined as collating symbol"),
414 (int) symbol_len
, symbol
);
418 if (find_entry (&collate
->elem_table
, symbol
, symbol_len
, &ignore
) == 0)
420 lr_error (ldfile
, _("`%.*s' already defined as collating element"),
421 (int) symbol_len
, symbol
);
429 /* Read the direction specification. */
431 read_directions (struct linereader
*ldfile
, struct token
*arg
,
432 const struct charmap_t
*charmap
,
433 struct repertoire_t
*repertoire
, struct localedef_t
*result
)
436 int max
= nrules
?: 10;
437 enum coll_sort_rule
*rules
= calloc (max
, sizeof (*rules
));
439 struct locale_collate_t
*collate
= result
->categories
[LC_COLLATE
].collate
;
445 if (arg
->tok
== tok_forward
)
447 if (rules
[cnt
] & sort_backward
)
451 lr_error (ldfile
, _("\
452 %s: `forward' and `backward' are mutually excluding each other"),
457 else if (rules
[cnt
] & sort_forward
)
461 lr_error (ldfile
, _("\
462 %s: `%s' mentioned more than once in definition of weight %d"),
463 "LC_COLLATE", "forward", cnt
+ 1);
467 rules
[cnt
] |= sort_forward
;
471 else if (arg
->tok
== tok_backward
)
473 if (rules
[cnt
] & sort_forward
)
477 lr_error (ldfile
, _("\
478 %s: `forward' and `backward' are mutually excluding each other"),
483 else if (rules
[cnt
] & sort_backward
)
487 lr_error (ldfile
, _("\
488 %s: `%s' mentioned more than once in definition of weight %d"),
489 "LC_COLLATE", "backward", cnt
+ 1);
493 rules
[cnt
] |= sort_backward
;
497 else if (arg
->tok
== tok_position
)
499 if (rules
[cnt
] & sort_position
)
503 lr_error (ldfile
, _("\
504 %s: `%s' mentioned more than once in definition of weight %d"),
505 "LC_COLLATE", "position", cnt
+ 1);
509 rules
[cnt
] |= sort_position
;
515 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
517 if (arg
->tok
== tok_eof
|| arg
->tok
== tok_eol
|| arg
->tok
== tok_comma
518 || arg
->tok
== tok_semicolon
)
520 if (! valid
&& ! warned
)
522 lr_error (ldfile
, _("%s: syntax error"), "LC_COLLATE");
526 /* See whether we have to increment the counter. */
527 if (arg
->tok
!= tok_comma
&& rules
[cnt
] != 0)
529 /* Add the default `forward' if we have seen only `position'. */
530 if (rules
[cnt
] == sort_position
)
531 rules
[cnt
] = sort_position
| sort_forward
;
536 if (arg
->tok
== tok_eof
|| arg
->tok
== tok_eol
)
537 /* End of line or file, so we exit the loop. */
542 /* See whether we have enough room in the array. */
546 rules
= (enum coll_sort_rule
*) xrealloc (rules
,
549 memset (&rules
[cnt
], '\0', (max
- cnt
) * sizeof (*rules
));
556 /* There must not be any more rule. */
559 lr_error (ldfile
, _("\
560 %s: too many rules; first entry only had %d"),
561 "LC_COLLATE", nrules
);
565 lr_ignore_rest (ldfile
, 0);
574 lr_error (ldfile
, _("%s: syntax error"), "LC_COLLATE");
579 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
584 /* Now we know how many rules we have. */
586 rules
= (enum coll_sort_rule
*) xrealloc (rules
,
587 nrules
* sizeof (*rules
));
593 /* Not enough rules in this specification. */
595 lr_error (ldfile
, _("%s: not enough sorting rules"), "LC_COLLATE");
598 rules
[cnt
] = sort_forward
;
599 while (++cnt
< nrules
);
603 collate
->current_section
->rules
= rules
;
607 static struct element_t
*
608 find_element (struct linereader
*ldfile
, struct locale_collate_t
*collate
,
609 const char *str
, size_t len
)
613 /* Search for the entries among the collation sequences already define. */
614 if (find_entry (&collate
->seq_table
, str
, len
, &result
) != 0)
616 /* Nope, not define yet. So we see whether it is a
620 if (find_entry (&collate
->sym_table
, str
, len
, &ptr
) == 0)
622 /* It's a collation symbol. */
623 struct symbol_t
*sym
= (struct symbol_t
*) ptr
;
627 result
= sym
->order
= new_element (collate
, NULL
, 0, NULL
,
630 else if (find_entry (&collate
->elem_table
, str
, len
, &result
) != 0)
632 /* It's also no collation element. So it is a character
633 element defined later. */
634 result
= new_element (collate
, NULL
, 0, NULL
, str
, len
, 1);
635 /* Insert it into the sequence table. */
636 insert_entry (&collate
->seq_table
, str
, len
, result
);
640 return (struct element_t
*) result
;
645 unlink_element (struct locale_collate_t
*collate
)
647 if (collate
->cursor
== collate
->start
)
649 assert (collate
->cursor
->next
== NULL
);
650 assert (collate
->cursor
->last
== NULL
);
651 collate
->cursor
= NULL
;
655 if (collate
->cursor
->next
!= NULL
)
656 collate
->cursor
->next
->last
= collate
->cursor
->last
;
657 if (collate
->cursor
->last
!= NULL
)
658 collate
->cursor
->last
->next
= collate
->cursor
->next
;
659 collate
->cursor
= collate
->cursor
->last
;
665 insert_weights (struct linereader
*ldfile
, struct element_t
*elem
,
666 const struct charmap_t
*charmap
,
667 struct repertoire_t
*repertoire
, struct localedef_t
*result
,
668 enum token_t ellipsis
)
672 struct locale_collate_t
*collate
= result
->categories
[LC_COLLATE
].collate
;
674 /* Initialize all the fields. */
675 elem
->file
= ldfile
->fname
;
676 elem
->line
= ldfile
->lineno
;
678 elem
->last
= collate
->cursor
;
679 elem
->next
= collate
->cursor
? collate
->cursor
->next
: NULL
;
680 if (collate
->cursor
!= NULL
&& collate
->cursor
->next
!= NULL
)
681 collate
->cursor
->next
->last
= elem
;
682 if (collate
->cursor
!= NULL
)
683 collate
->cursor
->next
= elem
;
684 if (collate
->start
== NULL
)
686 assert (collate
->cursor
== NULL
);
687 collate
->start
= elem
;
690 elem
->section
= collate
->current_section
;
692 if (collate
->current_section
->first
== NULL
)
693 collate
->current_section
->first
= elem
;
694 if (collate
->current_section
->last
== collate
->cursor
)
695 collate
->current_section
->last
= elem
;
697 collate
->cursor
= elem
;
699 elem
->weights
= (struct element_list_t
*)
700 obstack_alloc (&collate
->mempool
, nrules
* sizeof (struct element_list_t
));
701 memset (elem
->weights
, '\0', nrules
* sizeof (struct element_list_t
));
705 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
708 if (arg
->tok
== tok_eof
|| arg
->tok
== tok_eol
)
711 if (arg
->tok
== tok_ignore
)
713 /* The weight for this level has to be ignored. We use the
714 null pointer to indicate this. */
715 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
716 obstack_alloc (&collate
->mempool
, sizeof (struct element_t
*));
717 elem
->weights
[weight_cnt
].w
[0] = NULL
;
718 elem
->weights
[weight_cnt
].cnt
= 1;
720 else if (arg
->tok
== tok_bsymbol
|| arg
->tok
== tok_ucs4
)
723 struct element_t
*val
;
727 if (arg
->tok
== tok_bsymbol
)
729 symstr
= arg
->val
.str
.startmb
;
730 symlen
= arg
->val
.str
.lenmb
;
734 snprintf (ucs4str
, sizeof (ucs4str
), "U%08X", arg
->val
.ucs4
);
739 val
= find_element (ldfile
, collate
, symstr
, symlen
);
743 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
744 obstack_alloc (&collate
->mempool
, sizeof (struct element_t
*));
745 elem
->weights
[weight_cnt
].w
[0] = val
;
746 elem
->weights
[weight_cnt
].cnt
= 1;
748 else if (arg
->tok
== tok_string
)
750 /* Split the string up in the individual characters and put
751 the element definitions in the list. */
752 const char *cp
= arg
->val
.str
.startmb
;
754 struct element_t
*charelem
;
755 struct element_t
**weights
= NULL
;
760 lr_error (ldfile
, _("%s: empty weight string not allowed"),
762 lr_ignore_rest (ldfile
, 0);
770 /* Ahh, it's a bsymbol or an UCS4 value. If it's
771 the latter we have to unify the name. */
772 const char *startp
= ++cp
;
777 if (*cp
== ldfile
->escape_char
)
780 /* It's a syntax error. */
786 if (cp
- startp
== 5 && startp
[0] == 'U'
787 && isxdigit (startp
[1]) && isxdigit (startp
[2])
788 && isxdigit (startp
[3]) && isxdigit (startp
[4]))
790 unsigned int ucs4
= strtoul (startp
+ 1, NULL
, 16);
793 newstr
= (char *) xmalloc (10);
794 snprintf (newstr
, 10, "U%08X", ucs4
);
802 charelem
= find_element (ldfile
, collate
, startp
, len
);
807 /* People really shouldn't use characters directly in
808 the string. Especially since it's not really clear
809 what this means. We interpret all characters in the
810 string as if that would be bsymbols. Otherwise we
811 would have to match back to bsymbols somehow and this
812 is normally not what people normally expect. */
813 charelem
= find_element (ldfile
, collate
, cp
++, 1);
816 if (charelem
== NULL
)
818 /* We ignore the rest of the line. */
819 lr_ignore_rest (ldfile
, 0);
823 /* Add the pointer. */
826 struct element_t
**newp
;
828 newp
= (struct element_t
**)
829 alloca (max
* sizeof (struct element_t
*));
830 memcpy (newp
, weights
, cnt
* sizeof (struct element_t
*));
833 weights
[cnt
++] = charelem
;
837 /* Now store the information. */
838 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
839 obstack_alloc (&collate
->mempool
,
840 cnt
* sizeof (struct element_t
*));
841 memcpy (elem
->weights
[weight_cnt
].w
, weights
,
842 cnt
* sizeof (struct element_t
*));
843 elem
->weights
[weight_cnt
].cnt
= cnt
;
845 /* We don't need the string anymore. */
846 free (arg
->val
.str
.startmb
);
848 else if (ellipsis
!= tok_none
849 && (arg
->tok
== tok_ellipsis2
850 || arg
->tok
== tok_ellipsis3
851 || arg
->tok
== tok_ellipsis4
))
853 /* It must be the same ellipsis as used in the initial column. */
854 if (arg
->tok
!= ellipsis
)
855 lr_error (ldfile
, _("\
856 %s: weights must use the same ellipsis symbol as the name"),
859 /* The weight for this level will depend on the element
860 iterating over the range. Put a placeholder. */
861 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
862 obstack_alloc (&collate
->mempool
, sizeof (struct element_t
*));
863 elem
->weights
[weight_cnt
].w
[0] = ELEMENT_ELLIPSIS2
;
864 elem
->weights
[weight_cnt
].cnt
= 1;
869 /* It's a syntax error. */
870 lr_error (ldfile
, _("%s: syntax error"), "LC_COLLATE");
871 lr_ignore_rest (ldfile
, 0);
875 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
876 /* This better should be the end of the line or a semicolon. */
877 if (arg
->tok
== tok_semicolon
)
878 /* OK, ignore this and read the next token. */
879 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
880 else if (arg
->tok
!= tok_eof
&& arg
->tok
!= tok_eol
)
882 /* It's a syntax error. */
883 lr_error (ldfile
, _("%s: syntax error"), "LC_COLLATE");
884 lr_ignore_rest (ldfile
, 0);
888 while (++weight_cnt
< nrules
);
890 if (weight_cnt
< nrules
)
892 /* This means the rest of the line uses the current element as
896 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
897 obstack_alloc (&collate
->mempool
, sizeof (struct element_t
*));
898 if (ellipsis
== tok_none
)
899 elem
->weights
[weight_cnt
].w
[0] = elem
;
901 elem
->weights
[weight_cnt
].w
[0] = ELEMENT_ELLIPSIS2
;
902 elem
->weights
[weight_cnt
].cnt
= 1;
904 while (++weight_cnt
< nrules
);
908 if (arg
->tok
== tok_ignore
|| arg
->tok
== tok_bsymbol
)
910 /* Too many rule values. */
911 lr_error (ldfile
, _("%s: too many values"), "LC_COLLATE");
912 lr_ignore_rest (ldfile
, 0);
915 lr_ignore_rest (ldfile
, arg
->tok
!= tok_eol
&& arg
->tok
!= tok_eof
);
921 insert_value (struct linereader
*ldfile
, const char *symstr
, size_t symlen
,
922 const struct charmap_t
*charmap
, struct repertoire_t
*repertoire
,
923 struct localedef_t
*result
)
925 /* First find out what kind of symbol this is. */
928 struct element_t
*elem
= NULL
;
929 struct locale_collate_t
*collate
= result
->categories
[LC_COLLATE
].collate
;
931 /* Try to find the character in the charmap. */
932 seq
= charmap_find_value (charmap
, symstr
, symlen
);
934 /* Determine the wide character. */
935 if (seq
== NULL
|| seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
937 wc
= repertoire_find_value (repertoire
, symstr
, symlen
);
944 if (wc
== ILLEGAL_CHAR_VALUE
&& seq
== NULL
)
946 /* It's no character, so look through the collation elements and
949 if (find_entry (&collate
->elem_table
, symstr
, symlen
, &ptr
) != 0)
952 struct symbol_t
*sym
= NULL
;
954 /* It's also collation element. Therefore it's either a
955 collating symbol or it's a character which is not
956 supported by the character set. In the later case we
957 simply create a dummy entry. */
958 if (find_entry (&collate
->sym_table
, symstr
, symlen
, &result
) == 0)
960 /* It's a collation symbol. */
961 sym
= (struct symbol_t
*) result
;
968 elem
= new_element (collate
, NULL
, 0, NULL
, symstr
, symlen
, 0);
973 /* Enter a fake element in the sequence table. This
974 won't cause anything in the output since there is
975 no multibyte or wide character associated with
977 insert_entry (&collate
->seq_table
, symstr
, symlen
, elem
);
981 /* Copy the result back. */
986 /* Otherwise the symbols stands for a character. */
988 if (find_entry (&collate
->seq_table
, symstr
, symlen
, &ptr
) != 0)
990 uint32_t wcs
[2] = { wc
, 0 };
992 /* We have to allocate an entry. */
993 elem
= new_element (collate
, seq
!= NULL
? seq
->bytes
: NULL
,
994 seq
!= NULL
? seq
->nbytes
: 0,
995 wc
== ILLEGAL_CHAR_VALUE
? NULL
: wcs
,
998 /* And add it to the table. */
999 if (insert_entry (&collate
->seq_table
, symstr
, symlen
, elem
) != 0)
1000 /* This cannot happen. */
1001 assert (! "Internal error");
1005 /* Copy the result back. */
1008 /* Maybe the character was used before the definition. In this case
1009 we have to insert the byte sequences now. */
1010 if (elem
->mbs
== NULL
&& seq
!= NULL
)
1012 elem
->mbs
= obstack_copy0 (&collate
->mempool
,
1013 seq
->bytes
, seq
->nbytes
);
1014 elem
->nmbs
= seq
->nbytes
;
1017 if (elem
->wcs
== NULL
&& wc
!= ILLEGAL_CHAR_VALUE
)
1019 uint32_t wcs
[2] = { wc
, 0 };
1021 elem
->wcs
= obstack_copy (&collate
->mempool
, wcs
, sizeof (wcs
));
1027 /* Test whether this element is not already in the list. */
1028 if (elem
->next
!= NULL
|| elem
== collate
->cursor
)
1030 lr_error (ldfile
, _("order for `%.*s' already defined at %s:%Zu"),
1031 (int) symlen
, symstr
, elem
->file
, elem
->line
);
1032 lr_ignore_rest (ldfile
, 0);
1036 insert_weights (ldfile
, elem
, charmap
, repertoire
, result
, tok_none
);
1043 handle_ellipsis (struct linereader
*ldfile
, const char *symstr
, size_t symlen
,
1044 enum token_t ellipsis
, const struct charmap_t
*charmap
,
1045 struct repertoire_t
*repertoire
,
1046 struct localedef_t
*result
)
1048 struct element_t
*startp
;
1049 struct element_t
*endp
;
1050 struct locale_collate_t
*collate
= result
->categories
[LC_COLLATE
].collate
;
1052 /* Unlink the entry added for the ellipsis. */
1053 unlink_element (collate
);
1054 startp
= collate
->cursor
;
1056 /* Process and add the end-entry. */
1058 && insert_value (ldfile
, symstr
, symlen
, charmap
, repertoire
, result
))
1059 /* Something went wrong with inserting the to-value. This means
1060 we cannot process the ellipsis. */
1063 /* Reset the cursor. */
1064 collate
->cursor
= startp
;
1066 /* Now we have to handle many different situations:
1067 - we have to distinguish between the three different ellipsis forms
1068 - the is the ellipsis at the beginning, in the middle, or at the end.
1070 endp
= collate
->cursor
->next
;
1071 assert (symstr
== NULL
|| endp
!= NULL
);
1073 /* XXX The following is probably very wrong since also collating symbols
1074 can appear in ranges. But do we want/can refine the test for that? */
1076 /* Both, the start and the end symbol, must stand for characters. */
1077 if ((startp
!= NULL
&& (startp
->name
== NULL
|| ! startp
->is_character
))
1078 || (endp
!= NULL
&& (endp
->name
== NULL
|| ! endp
->is_character
)))
1080 lr_error (ldfile
, _("\
1081 %s: the start and the end symbol of a range must stand for characters"),
1087 if (ellipsis
== tok_ellipsis3
)
1089 /* One requirement we make here: the length of the byte
1090 sequences for the first and end character must be the same.
1091 This is mainly to prevent unwanted effects and this is often
1092 not what is wanted. */
1093 size_t len
= (startp
->mbs
!= NULL
? startp
->nmbs
1094 : (endp
->mbs
!= NULL
? endp
->nmbs
: 0));
1095 char mbcnt
[len
+ 1];
1096 char mbend
[len
+ 1];
1098 /* Well, this should be caught somewhere else already. Just to
1100 assert (startp
== NULL
|| startp
->wcs
== NULL
|| startp
->wcs
[1] == 0);
1101 assert (endp
== NULL
|| endp
->wcs
== NULL
|| endp
->wcs
[1] == 0);
1103 if (startp
!= NULL
&& endp
!= NULL
1104 && startp
->mbs
!= NULL
&& endp
->mbs
!= NULL
1105 && startp
->nmbs
!= endp
->nmbs
)
1107 lr_error (ldfile
, _("\
1108 %s: byte sequences of first and last character must have the same length"),
1113 /* Determine whether we have to generate multibyte sequences. */
1114 if ((startp
== NULL
|| startp
->mbs
!= NULL
)
1115 && (endp
== NULL
|| endp
->mbs
!= NULL
))
1120 /* Prepare the beginning byte sequence. This is either from the
1121 beginning byte sequence or it is all nulls if it was an
1122 initial ellipsis. */
1123 if (startp
== NULL
|| startp
->mbs
== NULL
)
1124 memset (mbcnt
, '\0', len
);
1127 memcpy (mbcnt
, startp
->mbs
, len
);
1129 /* And increment it so that the value is the first one we will
1131 for (cnt
= len
- 1; cnt
>= 0; --cnt
)
1132 if (++mbcnt
[cnt
] != '\0')
1137 /* And the end sequence. */
1138 if (endp
== NULL
|| endp
->mbs
== NULL
)
1139 memset (mbend
, '\0', len
);
1141 memcpy (mbend
, endp
->mbs
, len
);
1144 /* Test whether we have a correct range. */
1145 ret
= memcmp (mbcnt
, mbend
, len
);
1149 lr_error (ldfile
, _("%s: byte sequence of first character of \
1150 range is not lower than that of the last character"), "LC_COLLATE");
1154 /* Generate the byte sequences data. */
1157 struct charseq
*seq
;
1159 /* Quite a bit of work ahead. We have to find the character
1160 definition for the byte sequence and then determine the
1161 wide character belonging to it. */
1162 seq
= charmap_find_symbol (charmap
, mbcnt
, len
);
1165 struct element_t
*elem
;
1168 /* I don't think this can ever happen. */
1169 assert (seq
->name
!= NULL
);
1170 namelen
= strlen (seq
->name
);
1172 if (seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1173 seq
->ucs4
= repertoire_find_value (repertoire
, seq
->name
,
1176 /* Now we are ready to insert the new value in the
1177 sequence. Find out whether the element is
1180 if (find_entry (&collate
->seq_table
, seq
->name
, namelen
,
1183 uint32_t wcs
[2] = { seq
->ucs4
, 0 };
1185 /* We have to allocate an entry. */
1186 elem
= new_element (collate
, mbcnt
, len
,
1187 seq
->ucs4
== ILLEGAL_CHAR_VALUE
1188 ? NULL
: wcs
, seq
->name
,
1191 /* And add it to the table. */
1192 if (insert_entry (&collate
->seq_table
, seq
->name
,
1193 namelen
, elem
) != 0)
1194 /* This cannot happen. */
1195 assert (! "Internal error");
1198 /* Copy the result. */
1201 /* Test whether this element is not already in the list. */
1202 if (elem
->next
!= NULL
|| (collate
->cursor
!= NULL
1203 && elem
->next
== collate
->cursor
))
1205 lr_error (ldfile
, _("\
1206 order for `%.*s' already defined at %s:%Zu"),
1207 (int) namelen
, seq
->name
,
1208 elem
->file
, elem
->line
);
1212 /* Enqueue the new element. */
1213 elem
->last
= collate
->cursor
;
1214 if (collate
->cursor
== NULL
)
1218 elem
->next
= collate
->cursor
->next
;
1219 elem
->last
->next
= elem
;
1220 if (elem
->next
!= NULL
)
1221 elem
->next
->last
= elem
;
1223 if (collate
->start
== NULL
)
1225 assert (collate
->cursor
== NULL
);
1226 collate
->start
= elem
;
1228 collate
->cursor
= elem
;
1230 /* Add the weight value. We take them from the
1231 `ellipsis_weights' member of `collate'. */
1232 elem
->weights
= (struct element_list_t
*)
1233 obstack_alloc (&collate
->mempool
,
1234 nrules
* sizeof (struct element_list_t
));
1235 for (cnt
= 0; cnt
< nrules
; ++cnt
)
1236 if (collate
->ellipsis_weight
.weights
[cnt
].cnt
== 1
1237 && (collate
->ellipsis_weight
.weights
[cnt
].w
[0]
1238 == ELEMENT_ELLIPSIS2
))
1240 elem
->weights
[cnt
].w
= (struct element_t
**)
1241 obstack_alloc (&collate
->mempool
,
1242 sizeof (struct element_t
*));
1243 elem
->weights
[cnt
].w
[0] = elem
;
1244 elem
->weights
[cnt
].cnt
= 1;
1248 /* Simply use the weight from `ellipsis_weight'. */
1249 elem
->weights
[cnt
].w
=
1250 collate
->ellipsis_weight
.weights
[cnt
].w
;
1251 elem
->weights
[cnt
].cnt
=
1252 collate
->ellipsis_weight
.weights
[cnt
].cnt
;
1256 /* Increment for the next round. */
1258 for (cnt
= len
- 1; cnt
>= 0; --cnt
)
1259 if (++mbcnt
[cnt
] != '\0')
1262 /* Find out whether this was all. */
1263 if (cnt
< 0 || memcmp (mbcnt
, mbend
, len
) >= 0)
1264 /* Yep, that's all. */
1271 /* For symbolic range we naturally must have a beginning and an
1272 end specified by the user. */
1274 lr_error (ldfile
, _("\
1275 %s: symbolic range ellipsis must not directly follow `order_start'"),
1277 else if (endp
== NULL
)
1278 lr_error (ldfile
, _("\
1279 %s: symbolic range ellipsis must not be directly followed by `order_end'"),
1283 /* Determine the range. To do so we have to determine the
1284 common prefix of the both names and then the numeric
1285 values of both ends. */
1286 size_t lenfrom
= strlen (startp
->name
);
1287 size_t lento
= strlen (endp
->name
);
1288 char buf
[lento
+ 1];
1293 int base
= ellipsis
== tok_ellipsis2
? 16 : 10;
1295 if (lenfrom
!= lento
)
1298 lr_error (ldfile
, _("\
1299 `%s' and `%.*s' are not valid names for symbolic range"),
1300 startp
->name
, (int) lento
, endp
->name
);
1304 while (startp
->name
[preflen
] == endp
->name
[preflen
])
1305 if (startp
->name
[preflen
] == '\0')
1306 /* Nothing to be done. The start and end point are identical
1307 and while inserting the end point we have already given
1308 the user an error message. */
1314 from
= strtol (startp
->name
+ preflen
, &cp
, base
);
1315 if ((from
== UINT_MAX
&& errno
== ERANGE
) || *cp
!= '\0')
1319 to
= strtol (endp
->name
+ preflen
, &cp
, base
);
1320 if ((to
== UINT_MAX
&& errno
== ERANGE
) || *cp
!= '\0')
1323 /* Copy the prefix. */
1324 memcpy (buf
, startp
->name
, preflen
);
1326 /* Loop over all values. */
1327 for (++from
; from
< to
; ++from
)
1329 struct element_t
*elem
= NULL
;
1330 struct charseq
*seq
;
1334 /* Generate the name. */
1335 sprintf (buf
+ preflen
, base
== 10 ? "%0*ld" : "%0*lX",
1336 (int) (lenfrom
- preflen
), from
);
1338 /* Look whether this name is already defined. */
1340 if (find_entry (&collate
->seq_table
, buf
, symlen
, &ptr
) == 0)
1342 /* Copy back the result. */
1345 if (elem
->next
!= NULL
|| (collate
->cursor
!= NULL
1346 && elem
->next
== collate
->cursor
))
1348 lr_error (ldfile
, _("\
1349 %s: order for `%.*s' already defined at %s:%Zu"),
1350 "LC_COLLATE", (int) lenfrom
, buf
,
1351 elem
->file
, elem
->line
);
1355 if (elem
->name
== NULL
)
1357 lr_error (ldfile
, _("%s: `%s' must be a character"),
1363 if (elem
== NULL
|| (elem
->mbs
== NULL
&& elem
->wcs
== NULL
))
1365 /* Search for a character of this name. */
1366 seq
= charmap_find_value (charmap
, buf
, lenfrom
);
1367 if (seq
== NULL
|| seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1369 wc
= repertoire_find_value (repertoire
, buf
, lenfrom
);
1377 if (wc
== ILLEGAL_CHAR_VALUE
&& seq
== NULL
)
1378 /* We don't know anything about a character with this
1379 name. XXX Should we warn? */
1384 uint32_t wcs
[2] = { wc
, 0 };
1386 /* We have to allocate an entry. */
1387 elem
= new_element (collate
,
1388 seq
!= NULL
? seq
->bytes
: NULL
,
1389 seq
!= NULL
? seq
->nbytes
: 0,
1390 wc
== ILLEGAL_CHAR_VALUE
1391 ? NULL
: wcs
, buf
, lenfrom
, 1);
1395 /* Update the element. */
1398 elem
->mbs
= obstack_copy0 (&collate
->mempool
,
1399 seq
->bytes
, seq
->nbytes
);
1400 elem
->nmbs
= seq
->nbytes
;
1403 if (wc
!= ILLEGAL_CHAR_VALUE
)
1407 obstack_grow (&collate
->mempool
,
1408 &wc
, sizeof (uint32_t));
1409 obstack_grow (&collate
->mempool
,
1410 &zero
, sizeof (uint32_t));
1411 elem
->wcs
= obstack_finish (&collate
->mempool
);
1416 elem
->file
= ldfile
->fname
;
1417 elem
->line
= ldfile
->lineno
;
1418 elem
->section
= collate
->current_section
;
1421 /* Enqueue the new element. */
1422 elem
->last
= collate
->cursor
;
1423 elem
->next
= collate
->cursor
->next
;
1424 elem
->last
->next
= elem
;
1425 if (elem
->next
!= NULL
)
1426 elem
->next
->last
= elem
;
1427 collate
->cursor
= elem
;
1429 /* Now add the weights. They come from the `ellipsis_weights'
1430 member of `collate'. */
1431 elem
->weights
= (struct element_list_t
*)
1432 obstack_alloc (&collate
->mempool
,
1433 nrules
* sizeof (struct element_list_t
));
1434 for (cnt
= 0; cnt
< nrules
; ++cnt
)
1435 if (collate
->ellipsis_weight
.weights
[cnt
].cnt
== 1
1436 && (collate
->ellipsis_weight
.weights
[cnt
].w
[0]
1437 == ELEMENT_ELLIPSIS2
))
1439 elem
->weights
[cnt
].w
= (struct element_t
**)
1440 obstack_alloc (&collate
->mempool
,
1441 sizeof (struct element_t
*));
1442 elem
->weights
[cnt
].w
[0] = elem
;
1443 elem
->weights
[cnt
].cnt
= 1;
1447 /* Simly use the weight from `ellipsis_weight'. */
1448 elem
->weights
[cnt
].w
=
1449 collate
->ellipsis_weight
.weights
[cnt
].w
;
1450 elem
->weights
[cnt
].cnt
=
1451 collate
->ellipsis_weight
.weights
[cnt
].cnt
;
1460 collate_startup (struct linereader
*ldfile
, struct localedef_t
*locale
,
1461 struct localedef_t
*copy_locale
, int ignore_content
)
1463 if (!ignore_content
&& locale
->categories
[LC_COLLATE
].collate
== NULL
)
1465 struct locale_collate_t
*collate
;
1467 if (copy_locale
== NULL
)
1469 collate
= locale
->categories
[LC_COLLATE
].collate
=
1470 (struct locale_collate_t
*)
1471 xcalloc (1, sizeof (struct locale_collate_t
));
1473 /* Init the various data structures. */
1474 init_hash (&collate
->elem_table
, 100);
1475 init_hash (&collate
->sym_table
, 100);
1476 init_hash (&collate
->seq_table
, 500);
1477 obstack_init (&collate
->mempool
);
1479 collate
->col_weight_max
= -1;
1482 /* Reuse the copy_locale's data structures. */
1483 collate
= locale
->categories
[LC_COLLATE
].collate
=
1484 copy_locale
->categories
[LC_COLLATE
].collate
;
1487 ldfile
->translate_strings
= 0;
1488 ldfile
->return_widestr
= 0;
1493 collate_finish (struct localedef_t
*locale
, const struct charmap_t
*charmap
)
1495 /* Now is the time when we can assign the individual collation
1496 values for all the symbols. We have possibly different values
1497 for the wide- and the multibyte-character symbols. This is done
1498 since it might make a difference in the encoding if there is in
1499 some cases no multibyte-character but there are wide-characters.
1500 (The other way around it is not important since theencoded
1501 collation value in the wide-character case is 32 bits wide and
1502 therefore requires no encoding).
1504 The lowest collation value assigned is 2. Zero is reserved for
1505 the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm'
1506 functions and 1 is used to separate the individual passes for the
1509 We also have to construct is list with all the bytes/words which
1510 can come first in a sequence, followed by all the elements which
1511 also start with this byte/word. The order is reverse which has
1512 among others the important effect that longer strings are located
1513 first in the list. This is required for the output data since
1514 the algorithm used in `strcoll' etc depends on this.
1516 The multibyte case is easy. We simply sort into an array with
1518 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1523 struct element_t
*runp
;
1525 int need_undefined
= 0;
1526 struct section_list
*sect
;
1528 int nr_wide_elems
= 0;
1530 if (collate
== NULL
)
1532 /* No data, no check. */
1534 WITH_CUR_LOCALE (error (0, 0, _("No definition for %s category found"),
1539 /* If this assertion is hit change the type in `element_t'. */
1540 assert (nrules
<= sizeof (runp
->used_in_level
) * 8);
1542 /* Make sure that the `position' rule is used either in all sections
1544 for (i
= 0; i
< nrules
; ++i
)
1545 for (sect
= collate
->sections
; sect
!= NULL
; sect
= sect
->next
)
1546 if (sect
->rules
!= NULL
1547 && ((sect
->rules
[i
] & sort_position
)
1548 != (collate
->sections
->rules
[i
] & sort_position
)))
1550 WITH_CUR_LOCALE (error (0, 0, _("\
1551 %s: `position' must be used for a specific level in all sections or none"),
1556 /* Find out which elements are used at which level. At the same
1557 time we find out whether we have any undefined symbols. */
1558 runp
= collate
->start
;
1559 while (runp
!= NULL
)
1561 if (runp
->mbs
!= NULL
)
1563 for (i
= 0; i
< nrules
; ++i
)
1567 for (j
= 0; j
< runp
->weights
[i
].cnt
; ++j
)
1568 /* A NULL pointer as the weight means IGNORE. */
1569 if (runp
->weights
[i
].w
[j
] != NULL
)
1571 if (runp
->weights
[i
].w
[j
]->weights
== NULL
)
1573 WITH_CUR_LOCALE (error_at_line (0, 0, runp
->file
,
1575 _("symbol `%s' not defined"),
1576 runp
->weights
[i
].w
[j
]->name
));
1579 runp
->weights
[i
].w
[j
] = &collate
->undefined
;
1582 /* Set the bit for the level. */
1583 runp
->weights
[i
].w
[j
]->used_in_level
|= 1 << i
;
1588 /* Up to the next entry. */
1592 /* Walk through the list of defined sequences and assign weights. Also
1593 create the data structure which will allow generating the single byte
1594 character based tables.
1596 Since at each time only the weights for each of the rules are
1597 only compared to other weights for this rule it is possible to
1598 assign more compact weight values than simply counting all
1599 weights in sequence. We can assign weights from 3, one for each
1600 rule individually and only for those elements, which are actually
1603 Why is this important? It is not for the wide char table. But
1604 it is for the singlebyte output since here larger numbers have to
1605 be encoded to make it possible to emit the value as a byte
1607 for (i
= 0; i
< nrules
; ++i
)
1612 runp
= collate
->start
;
1613 while (runp
!= NULL
)
1615 /* Determine the order. */
1616 if (runp
->used_in_level
!= 0)
1618 runp
->mborder
= (int *) obstack_alloc (&collate
->mempool
,
1619 nrules
* sizeof (int));
1621 for (i
= 0; i
< nrules
; ++i
)
1622 if ((runp
->used_in_level
& (1 << i
)) != 0)
1623 runp
->mborder
[i
] = mbact
[i
]++;
1625 runp
->mborder
[i
] = 0;
1628 if (runp
->mbs
!= NULL
)
1630 struct element_t
**eptr
;
1631 struct element_t
*lastp
= NULL
;
1633 /* Find the point where to insert in the list. */
1634 eptr
= &collate
->mbheads
[((unsigned char *) runp
->mbs
)[0]];
1635 while (*eptr
!= NULL
)
1637 if ((*eptr
)->nmbs
< runp
->nmbs
)
1640 if ((*eptr
)->nmbs
== runp
->nmbs
)
1642 int c
= memcmp ((*eptr
)->mbs
, runp
->mbs
, runp
->nmbs
);
1646 /* This should not happen. It means that we have
1647 to symbols with the same byte sequence. It is
1648 of course an error. */
1649 WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr
)->file
,
1652 symbol `%s' has the same encoding as"), (*eptr
)->name
);
1653 error_at_line (0, 0, runp
->file
,
1660 /* Insert it here. */
1664 /* To the next entry. */
1666 eptr
= &(*eptr
)->mbnext
;
1669 /* Set the pointers. */
1670 runp
->mbnext
= *eptr
;
1671 runp
->mblast
= lastp
;
1673 (*eptr
)->mblast
= runp
;
1679 if (runp
->used_in_level
)
1681 runp
->wcorder
= wcact
++;
1683 /* We take the opportunity to count the elements which have
1688 if (runp
->is_character
)
1690 if (runp
->nmbs
== 1)
1691 collate
->mbseqorder
[((unsigned char *) runp
->mbs
)[0]] = mbseqact
++;
1693 runp
->wcseqorder
= wcseqact
++;
1695 else if (runp
->mbs
!= NULL
&& runp
->weights
!= NULL
)
1696 /* This is for collation elements. */
1697 runp
->wcseqorder
= wcseqact
++;
1699 /* Up to the next entry. */
1703 /* Find out whether any of the `mbheads' entries is unset. In this
1704 case we use the UNDEFINED entry. */
1705 for (i
= 1; i
< 256; ++i
)
1706 if (collate
->mbheads
[i
] == NULL
)
1709 collate
->mbheads
[i
] = &collate
->undefined
;
1712 /* Now to the wide character case. */
1713 collate
->wcheads
.p
= 6;
1714 collate
->wcheads
.q
= 10;
1715 wchead_table_init (&collate
->wcheads
);
1717 collate
->wcseqorder
.p
= 6;
1718 collate
->wcseqorder
.q
= 10;
1719 collseq_table_init (&collate
->wcseqorder
);
1722 runp
= collate
->start
;
1723 while (runp
!= NULL
)
1725 if (runp
->wcs
!= NULL
)
1727 struct element_t
*e
;
1728 struct element_t
**eptr
;
1729 struct element_t
*lastp
;
1731 /* Insert the collation sequence value. */
1732 if (runp
->is_character
)
1733 collseq_table_add (&collate
->wcseqorder
, runp
->wcs
[0],
1736 /* Find the point where to insert in the list. */
1737 e
= wchead_table_get (&collate
->wcheads
, runp
->wcs
[0]);
1740 while (*eptr
!= NULL
)
1742 if ((*eptr
)->nwcs
< runp
->nwcs
)
1745 if ((*eptr
)->nwcs
== runp
->nwcs
)
1747 int c
= wmemcmp ((wchar_t *) (*eptr
)->wcs
,
1748 (wchar_t *) runp
->wcs
, runp
->nwcs
);
1752 /* This should not happen. It means that we have
1753 two symbols with the same byte sequence. It is
1754 of course an error. */
1755 WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr
)->file
,
1758 symbol `%s' has the same encoding as"), (*eptr
)->name
);
1759 error_at_line (0, 0, runp
->file
,
1766 /* Insert it here. */
1770 /* To the next entry. */
1772 eptr
= &(*eptr
)->wcnext
;
1775 /* Set the pointers. */
1776 runp
->wcnext
= *eptr
;
1777 runp
->wclast
= lastp
;
1779 (*eptr
)->wclast
= runp
;
1782 wchead_table_add (&collate
->wcheads
, runp
->wcs
[0], e
);
1787 /* Up to the next entry. */
1791 collseq_table_finalize (&collate
->wcseqorder
);
1793 /* Now determine whether the UNDEFINED entry is needed and if yes,
1794 whether it was defined. */
1795 collate
->undefined
.used_in_level
= need_undefined
? ~0ul : 0;
1796 if (collate
->undefined
.file
== NULL
)
1800 /* This seems not to be enforced by recent standards. Don't
1801 emit an error, simply append UNDEFINED at the end. */
1803 WITH_CUR_LOCALE (error (0, 0, _("no definition of `UNDEFINED'")));
1805 /* Add UNDEFINED at the end. */
1806 collate
->undefined
.mborder
=
1807 (int *) obstack_alloc (&collate
->mempool
, nrules
* sizeof (int));
1809 for (i
= 0; i
< nrules
; ++i
)
1810 collate
->undefined
.mborder
[i
] = mbact
[i
]++;
1813 /* In any case we will need the definition for the wide character
1814 case. But we will not complain that it is missing since the
1815 specification strangely enough does not seem to account for
1817 collate
->undefined
.wcorder
= wcact
++;
1820 /* Finally, try to unify the rules for the sections. Whenever the rules
1821 for a section are the same as those for another section give the
1822 ruleset the same index. Since there are never many section we can
1823 use an O(n^2) algorithm here. */
1824 sect
= collate
->sections
;
1825 while (sect
!= NULL
&& sect
->rules
== NULL
)
1828 /* Bail out if we have no sections because of earlier errors. */
1831 WITH_CUR_LOCALE (error (EXIT_FAILURE
, 0,
1832 _("too many errors; giving up")));
1839 struct section_list
*osect
= collate
->sections
;
1841 while (osect
!= sect
)
1842 if (osect
->rules
!= NULL
1843 && memcmp (osect
->rules
, sect
->rules
, nrules
) == 0)
1846 osect
= osect
->next
;
1849 sect
->ruleidx
= ruleidx
++;
1851 sect
->ruleidx
= osect
->ruleidx
;
1856 while (sect
!= NULL
&& sect
->rules
== NULL
);
1858 while (sect
!= NULL
);
1859 /* We are currently not prepared for more than 128 rulesets. But this
1860 should never really be a problem. */
1861 assert (ruleidx
<= 128);
1866 output_weight (struct obstack
*pool
, struct locale_collate_t
*collate
,
1867 struct element_t
*elem
)
1872 /* Optimize the use of UNDEFINED. */
1873 if (elem
== &collate
->undefined
)
1874 /* The weights are already inserted. */
1877 /* This byte can start exactly one collation element and this is
1878 a single byte. We can directly give the index to the weights. */
1879 retval
= obstack_object_size (pool
);
1881 /* Construct the weight. */
1882 for (cnt
= 0; cnt
< nrules
; ++cnt
)
1884 char buf
[elem
->weights
[cnt
].cnt
* 7];
1888 for (i
= 0; i
< elem
->weights
[cnt
].cnt
; ++i
)
1889 /* Encode the weight value. We do nothing for IGNORE entries. */
1890 if (elem
->weights
[cnt
].w
[i
] != NULL
)
1891 len
+= utf8_encode (&buf
[len
],
1892 elem
->weights
[cnt
].w
[i
]->mborder
[cnt
]);
1894 /* And add the buffer content. */
1895 obstack_1grow (pool
, len
);
1896 obstack_grow (pool
, buf
, len
);
1899 return retval
| ((elem
->section
->ruleidx
& 0x7f) << 24);
1904 output_weightwc (struct obstack
*pool
, struct locale_collate_t
*collate
,
1905 struct element_t
*elem
)
1910 /* Optimize the use of UNDEFINED. */
1911 if (elem
== &collate
->undefined
)
1912 /* The weights are already inserted. */
1915 /* This byte can start exactly one collation element and this is
1916 a single byte. We can directly give the index to the weights. */
1917 retval
= obstack_object_size (pool
) / sizeof (int32_t);
1919 /* Construct the weight. */
1920 for (cnt
= 0; cnt
< nrules
; ++cnt
)
1922 int32_t buf
[elem
->weights
[cnt
].cnt
];
1926 for (i
= 0, j
= 0; i
< elem
->weights
[cnt
].cnt
; ++i
)
1927 if (elem
->weights
[cnt
].w
[i
] != NULL
)
1928 buf
[j
++] = elem
->weights
[cnt
].w
[i
]->wcorder
;
1930 /* And add the buffer content. */
1931 obstack_int32_grow (pool
, j
);
1933 obstack_grow (pool
, buf
, j
* sizeof (int32_t));
1936 return retval
| ((elem
->section
->ruleidx
& 0x7f) << 24);
1941 collate_output (struct localedef_t
*locale
, const struct charmap_t
*charmap
,
1942 const char *output_path
)
1944 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1945 const size_t nelems
= _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE
);
1946 struct iovec iov
[2 + nelems
];
1947 struct locale_file data
;
1948 uint32_t idx
[nelems
];
1951 int32_t tablemb
[256];
1952 struct obstack weightpool
;
1953 struct obstack extrapool
;
1954 struct obstack indirectpool
;
1955 struct section_list
*sect
;
1956 struct collidx_table tablewc
;
1958 uint32_t *elem_table
;
1960 struct element_t
*runp
;
1962 data
.magic
= LIMAGIC (LC_COLLATE
);
1964 iov
[0].iov_base
= (void *) &data
;
1965 iov
[0].iov_len
= sizeof (data
);
1967 iov
[1].iov_base
= (void *) idx
;
1968 iov
[1].iov_len
= sizeof (idx
);
1970 idx
[0] = iov
[0].iov_len
+ iov
[1].iov_len
;
1973 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_NRULES
));
1974 iov
[2 + cnt
].iov_base
= &nrules
;
1975 iov
[2 + cnt
].iov_len
= sizeof (uint32_t);
1976 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
1979 /* If we have no LC_COLLATE data emit only the number of rules as zero. */
1980 if (collate
== NULL
)
1984 while (cnt
< _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE
))
1986 /* The words have to be handled specially. */
1987 if (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB
))
1989 iov
[2 + cnt
].iov_base
= &dummy
;
1990 iov
[2 + cnt
].iov_len
= sizeof (int32_t);
1994 iov
[2 + cnt
].iov_base
= NULL
;
1995 iov
[2 + cnt
].iov_len
= 0;
1998 if (cnt
+ 1 < _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE
))
1999 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2003 assert (cnt
== _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE
));
2005 write_locale_data (output_path
, LC_COLLATE
, "LC_COLLATE", 2 + cnt
, iov
);
2010 obstack_init (&weightpool
);
2011 obstack_init (&extrapool
);
2012 obstack_init (&indirectpool
);
2014 /* Since we are using the sign of an integer to mark indirection the
2015 offsets in the arrays we are indirectly referring to must not be
2016 zero since -0 == 0. Therefore we add a bit of dummy content. */
2017 obstack_int32_grow (&extrapool
, 0);
2018 obstack_int32_grow (&indirectpool
, 0);
2020 /* Prepare the ruleset table. */
2021 for (sect
= collate
->sections
, i
= 0; sect
!= NULL
; sect
= sect
->next
)
2022 if (sect
->rules
!= NULL
&& sect
->ruleidx
== i
)
2026 obstack_make_room (&weightpool
, nrules
);
2028 for (j
= 0; j
< nrules
; ++j
)
2029 obstack_1grow_fast (&weightpool
, sect
->rules
[j
]);
2032 /* And align the output. */
2033 i
= (nrules
* i
) % __alignof__ (int32_t);
2036 obstack_1grow (&weightpool
, '\0');
2037 while (++i
< __alignof__ (int32_t));
2039 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_RULESETS
));
2040 iov
[2 + cnt
].iov_len
= obstack_object_size (&weightpool
);
2041 iov
[2 + cnt
].iov_base
= obstack_finish (&weightpool
);
2042 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2045 /* Generate the 8-bit table. Walk through the lists of sequences
2046 starting with the same byte and add them one after the other to
2047 the table. In case we have more than one sequence starting with
2048 the same byte we have to use extra indirection.
2050 First add a record for the NUL byte. This entry will never be used
2051 so it does not matter. */
2054 /* Now insert the `UNDEFINED' value if it is used. Since this value
2055 will probably be used more than once it is good to store the
2056 weights only once. */
2057 if (collate
->undefined
.used_in_level
!= 0)
2058 output_weight (&weightpool
, collate
, &collate
->undefined
);
2060 for (ch
= 1; ch
< 256; ++ch
)
2061 if (collate
->mbheads
[ch
]->mbnext
== NULL
2062 && collate
->mbheads
[ch
]->nmbs
<= 1)
2064 tablemb
[ch
] = output_weight (&weightpool
, collate
,
2065 collate
->mbheads
[ch
]);
2069 /* The entries in the list are sorted by length and then
2070 alphabetically. This is the order in which we will add the
2071 elements to the collation table. This allows simply walking
2072 the table in sequence and stopping at the first matching
2073 entry. Since the longer sequences are coming first in the
2074 list they have the possibility to match first, just as it
2075 has to be. In the worst case we are walking to the end of
2076 the list where we put, if no singlebyte sequence is defined
2077 in the locale definition, the weights for UNDEFINED.
2079 To reduce the length of the search list we compress them a bit.
2080 This happens by collecting sequences of consecutive byte
2081 sequences in one entry (having and begin and end byte sequence)
2082 and add only one index into the weight table. We can find the
2083 consecutive entries since they are also consecutive in the list. */
2084 struct element_t
*runp
= collate
->mbheads
[ch
];
2085 struct element_t
*lastp
;
2087 assert ((obstack_object_size (&extrapool
)
2088 & (__alignof__ (int32_t) - 1)) == 0);
2090 tablemb
[ch
] = -obstack_object_size (&extrapool
);
2094 /* Store the current index in the weight table. We know that
2095 the current position in the `extrapool' is aligned on a
2100 /* Find out wether this is a single entry or we have more than
2101 one consecutive entry. */
2102 if (runp
->mbnext
!= NULL
2103 && runp
->nmbs
== runp
->mbnext
->nmbs
2104 && memcmp (runp
->mbs
, runp
->mbnext
->mbs
, runp
->nmbs
- 1) == 0
2105 && (runp
->mbs
[runp
->nmbs
- 1]
2106 == runp
->mbnext
->mbs
[runp
->nmbs
- 1] + 1))
2109 struct element_t
*series_startp
= runp
;
2110 struct element_t
*curp
;
2112 /* Compute how much space we will need. */
2113 added
= ((sizeof (int32_t) + 1 + 2 * (runp
->nmbs
- 1)
2114 + __alignof__ (int32_t) - 1)
2115 & ~(__alignof__ (int32_t) - 1));
2116 assert ((obstack_object_size (&extrapool
)
2117 & (__alignof__ (int32_t) - 1)) == 0);
2118 obstack_make_room (&extrapool
, added
);
2120 /* More than one consecutive entry. We mark this by having
2121 a negative index into the indirect table. */
2122 obstack_int32_grow_fast (&extrapool
,
2123 -(obstack_object_size (&indirectpool
)
2124 / sizeof (int32_t)));
2126 /* Now search first the end of the series. */
2128 runp
= runp
->mbnext
;
2129 while (runp
->mbnext
!= NULL
2130 && runp
->nmbs
== runp
->mbnext
->nmbs
2131 && memcmp (runp
->mbs
, runp
->mbnext
->mbs
,
2132 runp
->nmbs
- 1) == 0
2133 && (runp
->mbs
[runp
->nmbs
- 1]
2134 == runp
->mbnext
->mbs
[runp
->nmbs
- 1] + 1));
2136 /* Now walk backward from here to the beginning. */
2139 assert (runp
->nmbs
<= 256);
2140 obstack_1grow_fast (&extrapool
, curp
->nmbs
- 1);
2141 for (i
= 1; i
< curp
->nmbs
; ++i
)
2142 obstack_1grow_fast (&extrapool
, curp
->mbs
[i
]);
2144 /* Now find the end of the consecutive sequence and
2145 add all the indeces in the indirect pool. */
2148 weightidx
= output_weight (&weightpool
, collate
, curp
);
2149 obstack_int32_grow (&indirectpool
, weightidx
);
2151 curp
= curp
->mblast
;
2153 while (curp
!= series_startp
);
2155 /* Add the final weight. */
2156 weightidx
= output_weight (&weightpool
, collate
, curp
);
2157 obstack_int32_grow (&indirectpool
, weightidx
);
2159 /* And add the end byte sequence. Without length this
2161 for (i
= 1; i
< curp
->nmbs
; ++i
)
2162 obstack_1grow_fast (&extrapool
, curp
->mbs
[i
]);
2166 /* A single entry. Simply add the index and the length and
2167 string (except for the first character which is already
2171 /* Output the weight info. */
2172 weightidx
= output_weight (&weightpool
, collate
, runp
);
2174 added
= ((sizeof (int32_t) + 1 + runp
->nmbs
- 1
2175 + __alignof__ (int32_t) - 1)
2176 & ~(__alignof__ (int32_t) - 1));
2177 assert ((obstack_object_size (&extrapool
)
2178 & (__alignof__ (int32_t) - 1)) == 0);
2179 obstack_make_room (&extrapool
, added
);
2181 obstack_int32_grow_fast (&extrapool
, weightidx
);
2182 assert (runp
->nmbs
<= 256);
2183 obstack_1grow_fast (&extrapool
, runp
->nmbs
- 1);
2185 for (i
= 1; i
< runp
->nmbs
; ++i
)
2186 obstack_1grow_fast (&extrapool
, runp
->mbs
[i
]);
2189 /* Add alignment bytes if necessary. */
2190 while ((obstack_object_size (&extrapool
)
2191 & (__alignof__ (int32_t) - 1)) != 0)
2192 obstack_1grow_fast (&extrapool
, '\0');
2196 runp
= runp
->mbnext
;
2198 while (runp
!= NULL
);
2200 assert ((obstack_object_size (&extrapool
)
2201 & (__alignof__ (int32_t) - 1)) == 0);
2203 /* If the final entry in the list is not a single character we
2204 add an UNDEFINED entry here. */
2205 if (lastp
->nmbs
!= 1)
2207 int added
= ((sizeof (int32_t) + 1 + 1 + __alignof__ (int32_t) - 1)
2208 & ~(__alignof__ (int32_t) - 1));
2209 obstack_make_room (&extrapool
, added
);
2211 obstack_int32_grow_fast (&extrapool
, 0);
2212 /* XXX What rule? We just pick the first. */
2213 obstack_1grow_fast (&extrapool
, 0);
2214 /* Length is zero. */
2215 obstack_1grow_fast (&extrapool
, 0);
2217 /* Add alignment bytes if necessary. */
2218 while ((obstack_object_size (&extrapool
)
2219 & (__alignof__ (int32_t) - 1)) != 0)
2220 obstack_1grow_fast (&extrapool
, '\0');
2224 /* Add padding to the tables if necessary. */
2225 while ((obstack_object_size (&weightpool
) & (__alignof__ (int32_t) - 1))
2227 obstack_1grow (&weightpool
, 0);
2229 /* Now add the four tables. */
2230 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_TABLEMB
));
2231 iov
[2 + cnt
].iov_base
= tablemb
;
2232 iov
[2 + cnt
].iov_len
= sizeof (tablemb
);
2233 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2234 assert ((iov
[2 + cnt
].iov_len
& (__alignof__ (int32_t) - 1)) == 0);
2237 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_WEIGHTMB
));
2238 iov
[2 + cnt
].iov_len
= obstack_object_size (&weightpool
);
2239 iov
[2 + cnt
].iov_base
= obstack_finish (&weightpool
);
2240 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2243 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_EXTRAMB
));
2244 iov
[2 + cnt
].iov_len
= obstack_object_size (&extrapool
);
2245 iov
[2 + cnt
].iov_base
= obstack_finish (&extrapool
);
2246 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2249 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_INDIRECTMB
));
2250 iov
[2 + cnt
].iov_len
= obstack_object_size (&indirectpool
);
2251 iov
[2 + cnt
].iov_base
= obstack_finish (&indirectpool
);
2252 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2253 assert ((iov
[2 + cnt
].iov_len
& (__alignof__ (int32_t) - 1)) == 0);
2257 /* Now the same for the wide character table. We need to store some
2258 more information here. */
2259 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_GAP1
));
2260 iov
[2 + cnt
].iov_base
= NULL
;
2261 iov
[2 + cnt
].iov_len
= 0;
2262 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2263 assert (idx
[cnt
] % __alignof__ (int32_t) == 0);
2266 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_GAP2
));
2267 iov
[2 + cnt
].iov_base
= NULL
;
2268 iov
[2 + cnt
].iov_len
= 0;
2269 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2270 assert (idx
[cnt
] % __alignof__ (int32_t) == 0);
2273 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_GAP3
));
2274 iov
[2 + cnt
].iov_base
= NULL
;
2275 iov
[2 + cnt
].iov_len
= 0;
2276 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2277 assert (idx
[cnt
] % __alignof__ (int32_t) == 0);
2280 /* Since we are using the sign of an integer to mark indirection the
2281 offsets in the arrays we are indirectly referring to must not be
2282 zero since -0 == 0. Therefore we add a bit of dummy content. */
2283 obstack_int32_grow (&extrapool
, 0);
2284 obstack_int32_grow (&indirectpool
, 0);
2286 /* Now insert the `UNDEFINED' value if it is used. Since this value
2287 will probably be used more than once it is good to store the
2288 weights only once. */
2289 if (output_weightwc (&weightpool
, collate
, &collate
->undefined
) != 0)
2292 /* Generate the table. Walk through the lists of sequences starting
2293 with the same wide character and add them one after the other to
2294 the table. In case we have more than one sequence starting with
2295 the same byte we have to use extra indirection. */
2297 auto void add_to_tablewc (uint32_t ch
, struct element_t
*runp
);
2299 void add_to_tablewc (uint32_t ch
, struct element_t
*runp
)
2301 if (runp
->wcnext
== NULL
&& runp
->nwcs
== 1)
2303 int32_t weigthidx
= output_weightwc (&weightpool
, collate
, runp
);
2304 collidx_table_add (&tablewc
, ch
, weigthidx
);
2308 /* As for the singlebyte table, we recognize sequences and
2310 struct element_t
*lastp
;
2312 collidx_table_add (&tablewc
, ch
,
2313 -(obstack_object_size (&extrapool
) / sizeof (uint32_t)));
2317 /* Store the current index in the weight table. We know that
2318 the current position in the `extrapool' is aligned on a
2323 /* Find out wether this is a single entry or we have more than
2324 one consecutive entry. */
2325 if (runp
->wcnext
!= NULL
2326 && runp
->nwcs
== runp
->wcnext
->nwcs
2327 && wmemcmp ((wchar_t *) runp
->wcs
,
2328 (wchar_t *)runp
->wcnext
->wcs
,
2329 runp
->nwcs
- 1) == 0
2330 && (runp
->wcs
[runp
->nwcs
- 1]
2331 == runp
->wcnext
->wcs
[runp
->nwcs
- 1] + 1))
2334 struct element_t
*series_startp
= runp
;
2335 struct element_t
*curp
;
2337 /* Now add first the initial byte sequence. */
2338 added
= (1 + 1 + 2 * (runp
->nwcs
- 1)) * sizeof (int32_t);
2339 if (sizeof (int32_t) == sizeof (int))
2340 obstack_make_room (&extrapool
, added
);
2342 /* More than one consecutive entry. We mark this by having
2343 a negative index into the indirect table. */
2344 obstack_int32_grow_fast (&extrapool
,
2345 -(obstack_object_size (&indirectpool
)
2346 / sizeof (int32_t)));
2347 obstack_int32_grow_fast (&extrapool
, runp
->nwcs
- 1);
2350 runp
= runp
->wcnext
;
2351 while (runp
->wcnext
!= NULL
2352 && runp
->nwcs
== runp
->wcnext
->nwcs
2353 && wmemcmp ((wchar_t *) runp
->wcs
,
2354 (wchar_t *)runp
->wcnext
->wcs
,
2355 runp
->nwcs
- 1) == 0
2356 && (runp
->wcs
[runp
->nwcs
- 1]
2357 == runp
->wcnext
->wcs
[runp
->nwcs
- 1] + 1));
2359 /* Now walk backward from here to the beginning. */
2362 for (i
= 1; i
< runp
->nwcs
; ++i
)
2363 obstack_int32_grow_fast (&extrapool
, curp
->wcs
[i
]);
2365 /* Now find the end of the consecutive sequence and
2366 add all the indeces in the indirect pool. */
2369 weightidx
= output_weightwc (&weightpool
, collate
,
2371 obstack_int32_grow (&indirectpool
, weightidx
);
2373 curp
= curp
->wclast
;
2375 while (curp
!= series_startp
);
2377 /* Add the final weight. */
2378 weightidx
= output_weightwc (&weightpool
, collate
, curp
);
2379 obstack_int32_grow (&indirectpool
, weightidx
);
2381 /* And add the end byte sequence. Without length this
2383 for (i
= 1; i
< curp
->nwcs
; ++i
)
2384 obstack_int32_grow (&extrapool
, curp
->wcs
[i
]);
2388 /* A single entry. Simply add the index and the length and
2389 string (except for the first character which is already
2393 /* Output the weight info. */
2394 weightidx
= output_weightwc (&weightpool
, collate
, runp
);
2396 added
= (1 + 1 + runp
->nwcs
- 1) * sizeof (int32_t);
2397 if (sizeof (int) == sizeof (int32_t))
2398 obstack_make_room (&extrapool
, added
);
2400 obstack_int32_grow_fast (&extrapool
, weightidx
);
2401 obstack_int32_grow_fast (&extrapool
, runp
->nwcs
- 1);
2402 for (i
= 1; i
< runp
->nwcs
; ++i
)
2403 obstack_int32_grow_fast (&extrapool
, runp
->wcs
[i
]);
2408 runp
= runp
->wcnext
;
2410 while (runp
!= NULL
);
2416 collidx_table_init (&tablewc
);
2418 wchead_table_iterate (&collate
->wcheads
, add_to_tablewc
);
2420 collidx_table_finalize (&tablewc
);
2423 /* Now add the four tables. */
2424 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_TABLEWC
));
2425 iov
[2 + cnt
].iov_base
= tablewc
.result
;
2426 iov
[2 + cnt
].iov_len
= tablewc
.result_size
;
2427 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2428 assert (iov
[2 + cnt
].iov_len
% sizeof (int32_t) == 0);
2429 assert (idx
[cnt
] % __alignof__ (int32_t) == 0);
2432 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_WEIGHTWC
));
2433 iov
[2 + cnt
].iov_len
= obstack_object_size (&weightpool
);
2434 iov
[2 + cnt
].iov_base
= obstack_finish (&weightpool
);
2435 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2436 assert (iov
[2 + cnt
].iov_len
% sizeof (int32_t) == 0);
2437 assert (idx
[cnt
] % __alignof__ (int32_t) == 0);
2440 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_EXTRAWC
));
2441 iov
[2 + cnt
].iov_len
= obstack_object_size (&extrapool
);
2442 iov
[2 + cnt
].iov_base
= obstack_finish (&extrapool
);
2443 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2444 assert (iov
[2 + cnt
].iov_len
% sizeof (int32_t) == 0);
2445 assert (idx
[cnt
] % __alignof__ (int32_t) == 0);
2448 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_INDIRECTWC
));
2449 iov
[2 + cnt
].iov_len
= obstack_object_size (&indirectpool
);
2450 iov
[2 + cnt
].iov_base
= obstack_finish (&indirectpool
);
2451 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2452 assert (iov
[2 + cnt
].iov_len
% sizeof (int32_t) == 0);
2453 assert (idx
[cnt
] % __alignof__ (int32_t) == 0);
2457 /* Finally write the table with collation element names out. It is
2458 a hash table with a simple function which gets the name of the
2459 character as the input. One character might have many names. The
2460 value associated with the name is an index into the weight table
2461 where we are then interested in the first-level weight value.
2463 To determine how large the table should be we are counting the
2464 elements have to put in. Since we are using internal chaining
2465 using a secondary hash function we have to make the table a bit
2466 larger to avoid extremely long search times. We can achieve
2467 good results with a 40% larger table than there are entries. */
2469 runp
= collate
->start
;
2470 while (runp
!= NULL
)
2472 if (runp
->mbs
!= NULL
&& runp
->weights
!= NULL
&& !runp
->is_character
)
2473 /* Yep, the element really counts. */
2478 /* Add 40% and find the next prime number. */
2479 elem_size
= next_prime (elem_size
* 1.4);
2481 /* Allocate the table. Each entry consists of two words: the hash
2482 value and an index in a secondary table which provides the index
2483 into the weight table and the string itself (so that a match can
2485 elem_table
= (uint32_t *) obstack_alloc (&extrapool
,
2486 elem_size
* 2 * sizeof (uint32_t));
2487 memset (elem_table
, '\0', elem_size
* 2 * sizeof (uint32_t));
2489 /* Now add the elements. */
2490 runp
= collate
->start
;
2491 while (runp
!= NULL
)
2493 if (runp
->mbs
!= NULL
&& runp
->weights
!= NULL
&& !runp
->is_character
)
2495 /* Compute the hash value of the name. */
2496 uint32_t namelen
= strlen (runp
->name
);
2497 uint32_t hash
= elem_hash (runp
->name
, namelen
);
2498 size_t idx
= hash
% elem_size
;
2499 size_t start_idx
= idx
;
2501 if (elem_table
[idx
* 2] != 0)
2503 /* The spot is already taken. Try iterating using the value
2504 from the secondary hashing function. */
2505 size_t iter
= hash
% (elem_size
- 2) + 1;
2510 if (idx
>= elem_size
)
2512 assert (idx
!= start_idx
);
2514 while (elem_table
[idx
* 2] != 0);
2516 /* This is the spot where we will insert the value. */
2517 elem_table
[idx
* 2] = hash
;
2518 elem_table
[idx
* 2 + 1] = obstack_object_size (&extrapool
);
2520 /* The the string itself including length. */
2521 obstack_1grow (&extrapool
, namelen
);
2522 obstack_grow (&extrapool
, runp
->name
, namelen
);
2524 /* And the multibyte representation. */
2525 obstack_1grow (&extrapool
, runp
->nmbs
);
2526 obstack_grow (&extrapool
, runp
->mbs
, runp
->nmbs
);
2528 /* And align again to 32 bits. */
2529 if ((1 + namelen
+ 1 + runp
->nmbs
) % sizeof (int32_t) != 0)
2530 obstack_grow (&extrapool
, "\0\0",
2532 - ((1 + namelen
+ 1 + runp
->nmbs
)
2533 % sizeof (int32_t))));
2535 /* Now some 32-bit values: multibyte collation sequence,
2536 wide char string (including length), and wide char
2537 collation sequence. */
2538 obstack_int32_grow (&extrapool
, runp
->mbseqorder
);
2540 obstack_int32_grow (&extrapool
, runp
->nwcs
);
2541 obstack_grow (&extrapool
, runp
->wcs
,
2542 runp
->nwcs
* sizeof (uint32_t));
2544 obstack_int32_grow (&extrapool
, runp
->wcseqorder
);
2550 /* Prepare to write out this data. */
2551 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB
));
2552 iov
[2 + cnt
].iov_base
= &elem_size
;
2553 iov
[2 + cnt
].iov_len
= sizeof (int32_t);
2554 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2555 assert (idx
[cnt
] % __alignof__ (int32_t) == 0);
2558 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_SYMB_TABLEMB
));
2559 iov
[2 + cnt
].iov_base
= elem_table
;
2560 iov
[2 + cnt
].iov_len
= elem_size
* 2 * sizeof (int32_t);
2561 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2562 assert (idx
[cnt
] % __alignof__ (int32_t) == 0);
2565 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_SYMB_EXTRAMB
));
2566 iov
[2 + cnt
].iov_len
= obstack_object_size (&extrapool
);
2567 iov
[2 + cnt
].iov_base
= obstack_finish (&extrapool
);
2568 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2571 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQMB
));
2572 iov
[2 + cnt
].iov_base
= collate
->mbseqorder
;
2573 iov
[2 + cnt
].iov_len
= 256;
2574 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2577 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQWC
));
2578 iov
[2 + cnt
].iov_base
= collate
->wcseqorder
.result
;
2579 iov
[2 + cnt
].iov_len
= collate
->wcseqorder
.result_size
;
2580 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2581 assert (idx
[cnt
] % __alignof__ (int32_t) == 0);
2584 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_CODESET
));
2585 iov
[2 + cnt
].iov_base
= (void *) charmap
->code_set_name
;
2586 iov
[2 + cnt
].iov_len
= strlen (iov
[2 + cnt
].iov_base
) + 1;
2589 assert (cnt
== _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE
));
2591 write_locale_data (output_path
, LC_COLLATE
, "LC_COLLATE", 2 + cnt
, iov
);
2593 obstack_free (&weightpool
, NULL
);
2594 obstack_free (&extrapool
, NULL
);
2595 obstack_free (&indirectpool
, NULL
);
2600 collate_read (struct linereader
*ldfile
, struct localedef_t
*result
,
2601 const struct charmap_t
*charmap
, const char *repertoire_name
,
2604 struct repertoire_t
*repertoire
= NULL
;
2605 struct locale_collate_t
*collate
;
2607 struct token
*arg
= NULL
;
2608 enum token_t nowtok
;
2609 enum token_t was_ellipsis
= tok_none
;
2610 struct localedef_t
*copy_locale
= NULL
;
2613 1 - between `order-start' and `order-end'
2614 2 - after `order-end'
2615 3 - after `reorder-after', waiting for `reorder-end'
2616 4 - after `reorder-end'
2617 5 - after `reorder-sections-after', waiting for `reorder-sections-end'
2618 6 - after `reorder-sections-end'
2622 /* Get the repertoire we have to use. */
2623 if (repertoire_name
!= NULL
)
2624 repertoire
= repertoire_read (repertoire_name
);
2626 /* The rest of the line containing `LC_COLLATE' must be free. */
2627 lr_ignore_rest (ldfile
, 1);
2631 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2634 while (nowtok
== tok_eol
);
2636 if (nowtok
== tok_copy
)
2639 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2640 if (now
->tok
!= tok_string
)
2642 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2646 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2647 while (now
->tok
!= tok_eof
&& now
->tok
!= tok_end
);
2649 if (now
->tok
!= tok_eof
2650 || (now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
),
2651 now
->tok
== tok_eof
))
2652 lr_error (ldfile
, _("%s: premature end of file"), "LC_COLLATE");
2653 else if (now
->tok
!= tok_lc_collate
)
2655 lr_error (ldfile
, _("\
2656 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
2657 lr_ignore_rest (ldfile
, 0);
2660 lr_ignore_rest (ldfile
, 1);
2665 if (! ignore_content
)
2667 /* Get the locale definition. */
2668 copy_locale
= load_locale (LC_COLLATE
, now
->val
.str
.startmb
,
2669 repertoire_name
, charmap
, NULL
);
2670 if ((copy_locale
->avail
& COLLATE_LOCALE
) == 0)
2672 /* Not yet loaded. So do it now. */
2673 if (locfile_read (copy_locale
, charmap
) != 0)
2677 if (copy_locale
->categories
[LC_COLLATE
].collate
== NULL
)
2681 lr_ignore_rest (ldfile
, 1);
2683 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2687 /* Prepare the data structures. */
2688 collate_startup (ldfile
, result
, copy_locale
, ignore_content
);
2689 collate
= result
->categories
[LC_COLLATE
].collate
;
2697 /* Of course we don't proceed beyond the end of file. */
2698 if (nowtok
== tok_eof
)
2701 /* Ingore empty lines. */
2702 if (nowtok
== tok_eol
)
2704 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2712 /* Allow copying other locales. */
2713 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2714 if (now
->tok
!= tok_string
)
2717 if (! ignore_content
)
2718 load_locale (LC_COLLATE
, now
->val
.str
.startmb
, repertoire_name
,
2721 lr_ignore_rest (ldfile
, 1);
2724 case tok_coll_weight_max
:
2725 /* Ignore the rest of the line if we don't need the input of
2729 lr_ignore_rest (ldfile
, 0);
2736 arg
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2737 if (arg
->tok
!= tok_number
)
2739 if (collate
->col_weight_max
!= -1)
2740 lr_error (ldfile
, _("%s: duplicate definition of `%s'"),
2741 "LC_COLLATE", "col_weight_max");
2743 collate
->col_weight_max
= arg
->val
.num
;
2744 lr_ignore_rest (ldfile
, 1);
2747 case tok_section_symbol
:
2748 /* Ignore the rest of the line if we don't need the input of
2752 lr_ignore_rest (ldfile
, 0);
2759 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2760 if (arg
->tok
!= tok_bsymbol
)
2762 else if (!ignore_content
)
2764 /* Check whether this section is already known. */
2765 struct section_list
*known
= collate
->sections
;
2766 while (known
!= NULL
)
2768 if (strcmp (known
->name
, arg
->val
.str
.startmb
) == 0)
2770 known
= known
->next
;
2776 _("%s: duplicate declaration of section `%s'"),
2777 "LC_COLLATE", arg
->val
.str
.startmb
);
2778 free (arg
->val
.str
.startmb
);
2781 collate
->sections
= make_seclist_elem (collate
,
2782 arg
->val
.str
.startmb
,
2785 lr_ignore_rest (ldfile
, known
== NULL
);
2789 free (arg
->val
.str
.startmb
);
2790 lr_ignore_rest (ldfile
, 0);
2794 case tok_collating_element
:
2795 /* Ignore the rest of the line if we don't need the input of
2799 lr_ignore_rest (ldfile
, 0);
2803 if (state
!= 0 && state
!= 2)
2806 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2807 if (arg
->tok
!= tok_bsymbol
)
2811 const char *symbol
= arg
->val
.str
.startmb
;
2812 size_t symbol_len
= arg
->val
.str
.lenmb
;
2814 /* Next the `from' keyword. */
2815 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2816 if (arg
->tok
!= tok_from
)
2818 free ((char *) symbol
);
2822 ldfile
->return_widestr
= 1;
2823 ldfile
->translate_strings
= 1;
2825 /* Finally the string with the replacement. */
2826 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2828 ldfile
->return_widestr
= 0;
2829 ldfile
->translate_strings
= 0;
2831 if (arg
->tok
!= tok_string
)
2834 if (!ignore_content
&& symbol
!= NULL
)
2836 /* The name is already defined. */
2837 if (check_duplicate (ldfile
, collate
, charmap
,
2838 repertoire
, symbol
, symbol_len
))
2841 if (arg
->val
.str
.startmb
!= NULL
)
2842 insert_entry (&collate
->elem_table
, symbol
, symbol_len
,
2843 new_element (collate
,
2844 arg
->val
.str
.startmb
,
2845 arg
->val
.str
.lenmb
- 1,
2846 arg
->val
.str
.startwc
,
2847 symbol
, symbol_len
, 0));
2853 free ((char *) symbol
);
2854 if (arg
->val
.str
.startmb
!= NULL
)
2855 free (arg
->val
.str
.startmb
);
2856 if (arg
->val
.str
.startwc
!= NULL
)
2857 free (arg
->val
.str
.startwc
);
2859 lr_ignore_rest (ldfile
, 1);
2863 case tok_collating_symbol
:
2864 /* Ignore the rest of the line if we don't need the input of
2868 lr_ignore_rest (ldfile
, 0);
2872 if (state
!= 0 && state
!= 2)
2875 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2876 if (arg
->tok
!= tok_bsymbol
)
2880 char *symbol
= arg
->val
.str
.startmb
;
2881 size_t symbol_len
= arg
->val
.str
.lenmb
;
2882 char *endsymbol
= NULL
;
2883 size_t endsymbol_len
= 0;
2884 enum token_t ellipsis
= tok_none
;
2886 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2887 if (arg
->tok
== tok_ellipsis2
|| arg
->tok
== tok_ellipsis4
)
2889 ellipsis
= arg
->tok
;
2891 arg
= lr_token (ldfile
, charmap
, result
, repertoire
,
2893 if (arg
->tok
!= tok_bsymbol
)
2899 endsymbol
= arg
->val
.str
.startmb
;
2900 endsymbol_len
= arg
->val
.str
.lenmb
;
2902 lr_ignore_rest (ldfile
, 1);
2904 else if (arg
->tok
!= tok_eol
)
2910 if (!ignore_content
)
2913 || (ellipsis
!= tok_none
&& endsymbol
== NULL
))
2915 lr_error (ldfile
, _("\
2916 %s: unknown character in collating symbol name"),
2920 else if (ellipsis
== tok_none
)
2922 /* A single symbol, no ellipsis. */
2923 if (check_duplicate (ldfile
, collate
, charmap
,
2924 repertoire
, symbol
, symbol_len
))
2925 /* The name is already defined. */
2928 insert_entry (&collate
->sym_table
, symbol
, symbol_len
,
2929 new_symbol (collate
, symbol
, symbol_len
));
2931 else if (symbol_len
!= endsymbol_len
)
2935 _("invalid names for character range"));
2940 /* Oh my, we have to handle an ellipsis. First, as
2941 usual, determine the common prefix and then
2942 convert the rest into a range. */
2944 unsigned long int from
;
2945 unsigned long int to
;
2948 for (prefixlen
= 0; prefixlen
< symbol_len
; ++prefixlen
)
2949 if (symbol
[prefixlen
] != endsymbol
[prefixlen
])
2952 /* Convert the rest into numbers. */
2953 symbol
[symbol_len
] = '\0';
2954 from
= strtoul (&symbol
[prefixlen
], &endp
,
2955 ellipsis
== tok_ellipsis2
? 16 : 10);
2957 goto col_sym_inv_range
;
2959 endsymbol
[symbol_len
] = '\0';
2960 to
= strtoul (&endsymbol
[prefixlen
], &endp
,
2961 ellipsis
== tok_ellipsis2
? 16 : 10);
2963 goto col_sym_inv_range
;
2966 goto col_sym_inv_range
;
2968 /* Now loop over all entries. */
2973 symbuf
= (char *) obstack_alloc (&collate
->mempool
,
2976 /* Create the name. */
2978 ellipsis
== tok_ellipsis2
2979 ? "%.*s%.*lX" : "%.*s%.*lu",
2980 (int) prefixlen
, symbol
,
2981 (int) (symbol_len
- prefixlen
), from
);
2983 if (check_duplicate (ldfile
, collate
, charmap
,
2984 repertoire
, symbuf
, symbol_len
))
2985 /* The name is already defined. */
2988 insert_entry (&collate
->sym_table
, symbuf
,
2990 new_symbol (collate
, symbuf
,
2993 /* Increment the counter. */
3005 if (endsymbol
!= NULL
)
3011 case tok_symbol_equivalence
:
3012 /* Ignore the rest of the line if we don't need the input of
3016 lr_ignore_rest (ldfile
, 0);
3023 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
3024 if (arg
->tok
!= tok_bsymbol
)
3028 const char *newname
= arg
->val
.str
.startmb
;
3029 size_t newname_len
= arg
->val
.str
.lenmb
;
3030 const char *symname
;
3032 void *symval
; /* Actually struct symbol_t* */
3034 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
3035 if (arg
->tok
!= tok_bsymbol
)
3037 if (newname
!= NULL
)
3038 free ((char *) newname
);
3042 symname
= arg
->val
.str
.startmb
;
3043 symname_len
= arg
->val
.str
.lenmb
;
3045 if (newname
== NULL
)
3047 lr_error (ldfile
, _("\
3048 %s: unknown character in equivalent definition name"),
3052 if (newname
!= NULL
)
3053 free ((char *) newname
);
3054 if (symname
!= NULL
)
3055 free ((char *) symname
);
3058 if (symname
== NULL
)
3060 lr_error (ldfile
, _("\
3061 %s: unknown character in equivalent definition value"),
3063 goto sym_equiv_free
;
3066 /* See whether the symbol name is already defined. */
3067 if (find_entry (&collate
->sym_table
, symname
, symname_len
,
3070 lr_error (ldfile
, _("\
3071 %s: unknown symbol `%s' in equivalent definition"),
3072 "LC_COLLATE", symname
);
3073 goto sym_equiv_free
;
3076 if (insert_entry (&collate
->sym_table
,
3077 newname
, newname_len
, symval
) < 0)
3079 lr_error (ldfile
, _("\
3080 error while adding equivalent collating symbol"));
3081 goto sym_equiv_free
;
3084 free ((char *) symname
);
3086 lr_ignore_rest (ldfile
, 1);
3090 /* We get told about the scripts we know. */
3091 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
3092 if (arg
->tok
!= tok_bsymbol
)
3096 struct section_list
*runp
= collate
->known_sections
;
3099 while (runp
!= NULL
)
3100 if (strncmp (runp
->name
, arg
->val
.str
.startmb
,
3101 arg
->val
.str
.lenmb
) == 0
3102 && runp
->name
[arg
->val
.str
.lenmb
] == '\0')
3105 runp
= runp
->def_next
;
3109 lr_error (ldfile
, _("duplicate definition of script `%s'"),
3111 lr_ignore_rest (ldfile
, 0);
3115 runp
= (struct section_list
*) xcalloc (1, sizeof (*runp
));
3116 name
= (char *) xmalloc (arg
->val
.str
.lenmb
+ 1);
3117 memcpy (name
, arg
->val
.str
.startmb
, arg
->val
.str
.lenmb
);
3118 name
[arg
->val
.str
.lenmb
] = '\0';
3121 runp
->def_next
= collate
->known_sections
;
3122 collate
->known_sections
= runp
;
3124 lr_ignore_rest (ldfile
, 1);
3127 case tok_order_start
:
3128 /* Ignore the rest of the line if we don't need the input of
3132 lr_ignore_rest (ldfile
, 0);
3136 if (state
!= 0 && state
!= 1 && state
!= 2)
3140 /* The 14652 draft does not specify whether all `order_start' lines
3141 must contain the same number of sort-rules, but 14651 does. So
3142 we require this here as well. */
3143 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
3144 if (arg
->tok
== tok_bsymbol
)
3146 /* This better should be a section name. */
3147 struct section_list
*sp
= collate
->known_sections
;
3149 && (sp
->name
== NULL
3150 || strncmp (sp
->name
, arg
->val
.str
.startmb
,
3151 arg
->val
.str
.lenmb
) != 0
3152 || sp
->name
[arg
->val
.str
.lenmb
] != '\0'))
3157 lr_error (ldfile
, _("\
3158 %s: unknown section name `%.*s'"),
3159 "LC_COLLATE", (int) arg
->val
.str
.lenmb
,
3160 arg
->val
.str
.startmb
);
3161 /* We use the error section. */
3162 collate
->current_section
= &collate
->error_section
;
3164 if (collate
->error_section
.first
== NULL
)
3166 /* Insert &collate->error_section at the end of
3167 the collate->sections list. */
3168 if (collate
->sections
== NULL
)
3169 collate
->sections
= &collate
->error_section
;
3172 sp
= collate
->sections
;
3173 while (sp
->next
!= NULL
)
3176 sp
->next
= &collate
->error_section
;
3178 collate
->error_section
.next
= NULL
;
3183 /* One should not be allowed to open the same
3185 if (sp
->first
!= NULL
)
3186 lr_error (ldfile
, _("\
3187 %s: multiple order definitions for section `%s'"),
3188 "LC_COLLATE", sp
->name
);
3191 /* Insert sp in the collate->sections list,
3192 right after collate->current_section. */
3193 if (collate
->current_section
== NULL
)
3194 collate
->current_section
= sp
;
3197 sp
->next
= collate
->current_section
->next
;
3198 collate
->current_section
->next
= sp
;
3202 /* Next should come the end of the line or a semicolon. */
3203 arg
= lr_token (ldfile
, charmap
, result
, repertoire
,
3205 if (arg
->tok
== tok_eol
)
3209 /* This means we have exactly one rule: `forward'. */
3211 lr_error (ldfile
, _("\
3212 %s: invalid number of sorting rules"),
3216 sp
->rules
= obstack_alloc (&collate
->mempool
,
3217 (sizeof (enum coll_sort_rule
)
3219 for (cnt
= 0; cnt
< nrules
; ++cnt
)
3220 sp
->rules
[cnt
] = sort_forward
;
3226 /* Get the next token. */
3227 arg
= lr_token (ldfile
, charmap
, result
, repertoire
,
3233 /* There is no section symbol. Therefore we use the unnamed
3235 collate
->current_section
= &collate
->unnamed_section
;
3237 if (collate
->unnamed_section
.first
!= NULL
)
3238 lr_error (ldfile
, _("\
3239 %s: multiple order definitions for unnamed section"),
3243 /* Insert &collate->unnamed_section at the beginning of
3244 the collate->sections list. */
3245 collate
->unnamed_section
.next
= collate
->sections
;
3246 collate
->sections
= &collate
->unnamed_section
;
3250 /* Now read the direction names. */
3251 read_directions (ldfile
, arg
, charmap
, repertoire
, result
);
3253 /* From now we need the strings untranslated. */
3254 ldfile
->translate_strings
= 0;
3258 /* Ignore the rest of the line if we don't need the input of
3262 lr_ignore_rest (ldfile
, 0);
3269 /* Handle ellipsis at end of list. */
3270 if (was_ellipsis
!= tok_none
)
3272 handle_ellipsis (ldfile
, NULL
, 0, was_ellipsis
, charmap
,
3273 repertoire
, result
);
3274 was_ellipsis
= tok_none
;
3278 lr_ignore_rest (ldfile
, 1);
3281 case tok_reorder_after
:
3282 /* Ignore the rest of the line if we don't need the input of
3286 lr_ignore_rest (ldfile
, 0);
3292 lr_error (ldfile
, _("%s: missing `order_end' keyword"),
3296 /* Handle ellipsis at end of list. */
3297 if (was_ellipsis
!= tok_none
)
3299 handle_ellipsis (ldfile
, arg
->val
.str
.startmb
,
3300 arg
->val
.str
.lenmb
, was_ellipsis
, charmap
,
3301 repertoire
, result
);
3302 was_ellipsis
= tok_none
;
3305 else if (state
!= 2 && state
!= 3)
3309 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
3310 if (arg
->tok
== tok_bsymbol
|| arg
->tok
== tok_ucs4
)
3312 /* Find this symbol in the sequence table. */
3316 struct element_t
*insp
;
3320 if (arg
->tok
== tok_bsymbol
)
3322 startmb
= arg
->val
.str
.startmb
;
3323 lenmb
= arg
->val
.str
.lenmb
;
3327 sprintf (ucsbuf
, "U%08X", arg
->val
.ucs4
);
3332 if (find_entry (&collate
->seq_table
, startmb
, lenmb
, &ptr
) == 0)
3333 /* Yes, the symbol exists. Simply point the cursor
3335 collate
->cursor
= (struct element_t
*) ptr
;
3338 struct symbol_t
*symbp
;
3341 if (find_entry (&collate
->sym_table
, startmb
, lenmb
,
3346 if (symbp
->order
->last
!= NULL
3347 || symbp
->order
->next
!= NULL
)
3348 collate
->cursor
= symbp
->order
;
3351 /* This is a collating symbol but its position
3352 is not yet defined. */
3353 lr_error (ldfile
, _("\
3354 %s: order for collating symbol %.*s not yet defined"),
3355 "LC_COLLATE", (int) lenmb
, startmb
);
3356 collate
->cursor
= NULL
;
3360 else if (find_entry (&collate
->elem_table
, startmb
, lenmb
,
3363 insp
= (struct element_t
*) ptr
;
3365 if (insp
->last
!= NULL
|| insp
->next
!= NULL
)
3366 collate
->cursor
= insp
;
3369 /* This is a collating element but its position
3370 is not yet defined. */
3371 lr_error (ldfile
, _("\
3372 %s: order for collating element %.*s not yet defined"),
3373 "LC_COLLATE", (int) lenmb
, startmb
);
3374 collate
->cursor
= NULL
;
3380 /* This is bad. The symbol after which we have to
3381 insert does not exist. */
3382 lr_error (ldfile
, _("\
3383 %s: cannot reorder after %.*s: symbol not known"),
3384 "LC_COLLATE", (int) lenmb
, startmb
);
3385 collate
->cursor
= NULL
;
3390 lr_ignore_rest (ldfile
, no_error
);
3393 /* This must not happen. */
3397 case tok_reorder_end
:
3398 /* Ignore the rest of the line if we don't need the input of
3406 lr_ignore_rest (ldfile
, 1);
3409 case tok_reorder_sections_after
:
3410 /* Ignore the rest of the line if we don't need the input of
3414 lr_ignore_rest (ldfile
, 0);
3420 lr_error (ldfile
, _("%s: missing `order_end' keyword"),
3424 /* Handle ellipsis at end of list. */
3425 if (was_ellipsis
!= tok_none
)
3427 handle_ellipsis (ldfile
, NULL
, 0, was_ellipsis
, charmap
,
3428 repertoire
, result
);
3429 was_ellipsis
= tok_none
;
3432 else if (state
== 3)
3434 WITH_CUR_LOCALE (error (0, 0, _("\
3435 %s: missing `reorder-end' keyword"), "LC_COLLATE"));
3438 else if (state
!= 2 && state
!= 4)
3442 /* Get the name of the sections we are adding after. */
3443 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
3444 if (arg
->tok
== tok_bsymbol
)
3446 /* Now find a section with this name. */
3447 struct section_list
*runp
= collate
->sections
;
3449 while (runp
!= NULL
)
3451 if (runp
->name
!= NULL
3452 && strlen (runp
->name
) == arg
->val
.str
.lenmb
3453 && memcmp (runp
->name
, arg
->val
.str
.startmb
,
3454 arg
->val
.str
.lenmb
) == 0)
3461 collate
->current_section
= runp
;
3464 /* This is bad. The section after which we have to
3465 reorder does not exist. Therefore we cannot
3466 process the whole rest of this reorder
3468 lr_error (ldfile
, _("%s: section `%.*s' not known"),
3469 "LC_COLLATE", (int) arg
->val
.str
.lenmb
,
3470 arg
->val
.str
.startmb
);
3474 lr_ignore_rest (ldfile
, 0);
3476 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
3478 while (now
->tok
== tok_reorder_sections_after
3479 || now
->tok
== tok_reorder_sections_end
3480 || now
->tok
== tok_end
);
3482 /* Process the token we just saw. */
3488 /* This must not happen. */
3492 case tok_reorder_sections_end
:
3493 /* Ignore the rest of the line if we don't need the input of
3501 lr_ignore_rest (ldfile
, 1);
3506 /* Ignore the rest of the line if we don't need the input of
3510 lr_ignore_rest (ldfile
, 0);
3514 if (state
!= 0 && state
!= 1 && state
!= 3 && state
!= 5)
3517 if ((state
== 0 || state
== 5) && nowtok
== tok_ucs4
)
3520 if (nowtok
== tok_ucs4
)
3522 snprintf (ucs4buf
, sizeof (ucs4buf
), "U%08X", now
->val
.ucs4
);
3526 else if (arg
!= NULL
)
3528 symstr
= arg
->val
.str
.startmb
;
3529 symlen
= arg
->val
.str
.lenmb
;
3533 lr_error (ldfile
, _("%s: bad symbol <%.*s>"), "LC_COLLATE",
3534 (int) ldfile
->token
.val
.str
.lenmb
,
3535 ldfile
->token
.val
.str
.startmb
);
3539 struct element_t
*seqp
;
3542 /* We are outside an `order_start' region. This means
3543 we must only accept definitions of values for
3544 collation symbols since these are purely abstract
3545 values and don't need directions associated. */
3548 if (find_entry (&collate
->seq_table
, symstr
, symlen
, &ptr
) == 0)
3552 /* It's already defined. First check whether this
3553 is really a collating symbol. */
3554 if (seqp
->is_character
)
3563 if (find_entry (&collate
->sym_table
, symstr
, symlen
,
3565 /* No collating symbol, it's an error. */
3568 /* Maybe this is the first time we define a symbol
3569 value and it is before the first actual section. */
3570 if (collate
->sections
== NULL
)
3571 collate
->sections
= collate
->current_section
=
3572 &collate
->symbol_section
;
3575 if (was_ellipsis
!= tok_none
)
3577 handle_ellipsis (ldfile
, symstr
, symlen
, was_ellipsis
,
3578 charmap
, repertoire
, result
);
3580 /* Remember that we processed the ellipsis. */
3581 was_ellipsis
= tok_none
;
3583 /* And don't add the value a second time. */
3587 else if (state
== 3)
3589 /* It is possible that we already have this collation sequence.
3590 In this case we move the entry. */
3594 /* If the symbol after which we have to insert was not found
3595 ignore all entries. */
3596 if (collate
->cursor
== NULL
)
3598 lr_ignore_rest (ldfile
, 0);
3602 if (find_entry (&collate
->seq_table
, symstr
, symlen
, &ptr
) == 0)
3604 seqp
= (struct element_t
*) ptr
;
3608 if (find_entry (&collate
->sym_table
, symstr
, symlen
, &sym
) == 0
3609 && (seqp
= ((struct symbol_t
*) sym
)->order
) != NULL
)
3612 if (find_entry (&collate
->elem_table
, symstr
, symlen
, &ptr
) == 0
3613 && (seqp
= (struct element_t
*) ptr
,
3614 seqp
->last
!= NULL
|| seqp
->next
!= NULL
3615 || (collate
->start
!= NULL
&& seqp
== collate
->start
)))
3618 /* Remove the entry from the old position. */
3619 if (seqp
->last
== NULL
)
3620 collate
->start
= seqp
->next
;
3622 seqp
->last
->next
= seqp
->next
;
3623 if (seqp
->next
!= NULL
)
3624 seqp
->next
->last
= seqp
->last
;
3626 /* We also have to check whether this entry is the
3627 first or last of a section. */
3628 if (seqp
->section
->first
== seqp
)
3630 if (seqp
->section
->first
== seqp
->section
->last
)
3631 /* This section has no content anymore. */
3632 seqp
->section
->first
= seqp
->section
->last
= NULL
;
3634 seqp
->section
->first
= seqp
->next
;
3636 else if (seqp
->section
->last
== seqp
)
3637 seqp
->section
->last
= seqp
->last
;
3639 /* Now insert it in the new place. */
3640 insert_weights (ldfile
, seqp
, charmap
, repertoire
, result
,
3645 /* Otherwise we just add a new entry. */
3647 else if (state
== 5)
3649 /* We are reordering sections. Find the named section. */
3650 struct section_list
*runp
= collate
->sections
;
3651 struct section_list
*prevp
= NULL
;
3653 while (runp
!= NULL
)
3655 if (runp
->name
!= NULL
3656 && strlen (runp
->name
) == symlen
3657 && memcmp (runp
->name
, symstr
, symlen
) == 0)
3666 lr_error (ldfile
, _("%s: section `%.*s' not known"),
3667 "LC_COLLATE", (int) symlen
, symstr
);
3668 lr_ignore_rest (ldfile
, 0);
3672 if (runp
!= collate
->current_section
)
3674 /* Remove the named section from the old place and
3675 insert it in the new one. */
3676 prevp
->next
= runp
->next
;
3678 runp
->next
= collate
->current_section
->next
;
3679 collate
->current_section
->next
= runp
;
3680 collate
->current_section
= runp
;
3683 /* Process the rest of the line which might change
3684 the collation rules. */
3685 arg
= lr_token (ldfile
, charmap
, result
, repertoire
,
3687 if (arg
->tok
!= tok_eof
&& arg
->tok
!= tok_eol
)
3688 read_directions (ldfile
, arg
, charmap
, repertoire
,
3693 else if (was_ellipsis
!= tok_none
)
3695 /* Using the information in the `ellipsis_weight'
3696 element and this and the last value we have to handle
3697 the ellipsis now. */
3698 assert (state
== 1);
3700 handle_ellipsis (ldfile
, symstr
, symlen
, was_ellipsis
, charmap
,
3701 repertoire
, result
);
3703 /* Remember that we processed the ellipsis. */
3704 was_ellipsis
= tok_none
;
3706 /* And don't add the value a second time. */
3710 /* Now insert in the new place. */
3711 insert_value (ldfile
, symstr
, symlen
, charmap
, repertoire
, result
);
3715 /* Ignore the rest of the line if we don't need the input of
3719 lr_ignore_rest (ldfile
, 0);
3726 if (was_ellipsis
!= tok_none
)
3729 _("%s: cannot have `%s' as end of ellipsis range"),
3730 "LC_COLLATE", "UNDEFINED");
3732 unlink_element (collate
);
3733 was_ellipsis
= tok_none
;
3736 /* See whether UNDEFINED already appeared somewhere. */
3737 if (collate
->undefined
.next
!= NULL
3738 || &collate
->undefined
== collate
->cursor
)
3741 _("%s: order for `%.*s' already defined at %s:%Zu"),
3742 "LC_COLLATE", 9, "UNDEFINED",
3743 collate
->undefined
.file
,
3744 collate
->undefined
.line
);
3745 lr_ignore_rest (ldfile
, 0);
3748 /* Parse the weights. */
3749 insert_weights (ldfile
, &collate
->undefined
, charmap
,
3750 repertoire
, result
, tok_none
);
3753 case tok_ellipsis2
: /* symbolic hexadecimal ellipsis */
3754 case tok_ellipsis3
: /* absolute ellipsis */
3755 case tok_ellipsis4
: /* symbolic decimal ellipsis */
3756 /* This is the symbolic (decimal or hexadecimal) or absolute
3758 if (was_ellipsis
!= tok_none
)
3761 if (state
!= 0 && state
!= 1 && state
!= 3)
3764 was_ellipsis
= nowtok
;
3766 insert_weights (ldfile
, &collate
->ellipsis_weight
, charmap
,
3767 repertoire
, result
, nowtok
);
3771 /* Next we assume `LC_COLLATE'. */
3772 if (!ignore_content
)
3775 /* We must either see a copy statement or have
3778 _("%s: empty category description not allowed"),
3780 else if (state
== 1)
3782 lr_error (ldfile
, _("%s: missing `order_end' keyword"),
3785 /* Handle ellipsis at end of list. */
3786 if (was_ellipsis
!= tok_none
)
3788 handle_ellipsis (ldfile
, NULL
, 0, was_ellipsis
, charmap
,
3789 repertoire
, result
);
3790 was_ellipsis
= tok_none
;
3793 else if (state
== 3)
3794 WITH_CUR_LOCALE (error (0, 0, _("\
3795 %s: missing `reorder-end' keyword"), "LC_COLLATE"));
3796 else if (state
== 5)
3797 WITH_CUR_LOCALE (error (0, 0, _("\
3798 %s: missing `reorder-sections-end' keyword"), "LC_COLLATE"));
3800 arg
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
3801 if (arg
->tok
== tok_eof
)
3803 if (arg
->tok
== tok_eol
)
3804 lr_error (ldfile
, _("%s: incomplete `END' line"), "LC_COLLATE");
3805 else if (arg
->tok
!= tok_lc_collate
)
3806 lr_error (ldfile
, _("\
3807 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
3808 lr_ignore_rest (ldfile
, arg
->tok
== tok_lc_collate
);
3813 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
3816 /* Prepare for the next round. */
3817 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
3821 /* When we come here we reached the end of the file. */
3822 lr_error (ldfile
, _("%s: premature end of file"), "LC_COLLATE");