2.12.90-11
[glibc.git] / locale / programs / ld-collate.c
blob11bd7eacad78340318bdcb03aabfcaf420024267
1 /* Copyright (C) 1995-2003, 2005-2008, 2009 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
19 #ifdef HAVE_CONFIG_H
20 # include <config.h>
21 #endif
23 #include <errno.h>
24 #include <error.h>
25 #include <stdlib.h>
26 #include <wchar.h>
27 #include <sys/param.h>
29 #include "localedef.h"
30 #include "charmap.h"
31 #include "localeinfo.h"
32 #include "linereader.h"
33 #include "locfile.h"
34 #include "elem-hash.h"
36 /* Uncomment the following line in the production version. */
37 /* #define NDEBUG 1 */
38 #include <assert.h>
40 #define obstack_chunk_alloc malloc
41 #define obstack_chunk_free free
43 static inline void
44 __attribute ((always_inline))
45 obstack_int32_grow (struct obstack *obstack, int32_t data)
47 if (sizeof (int32_t) == sizeof (int))
48 obstack_int_grow (obstack, data);
49 else
50 obstack_grow (obstack, &data, sizeof (int32_t));
53 static inline void
54 __attribute ((always_inline))
55 obstack_int32_grow_fast (struct obstack *obstack, int32_t data)
57 if (sizeof (int32_t) == sizeof (int))
58 obstack_int_grow_fast (obstack, data);
59 else
60 obstack_grow (obstack, &data, sizeof (int32_t));
63 /* Forward declaration. */
64 struct element_t;
66 /* Data type for list of strings. */
67 struct section_list
69 /* Successor in the known_sections list. */
70 struct section_list *def_next;
71 /* Successor in the sections list. */
72 struct section_list *next;
73 /* Name of the section. */
74 const char *name;
75 /* First element of this section. */
76 struct element_t *first;
77 /* Last element of this section. */
78 struct element_t *last;
79 /* These are the rules for this section. */
80 enum coll_sort_rule *rules;
81 /* Index of the rule set in the appropriate section of the output file. */
82 int ruleidx;
85 struct element_t;
87 struct element_list_t
89 /* Number of elements. */
90 int cnt;
92 struct element_t **w;
95 /* Data type for collating element. */
96 struct element_t
98 const char *name;
100 const char *mbs;
101 size_t nmbs;
102 const uint32_t *wcs;
103 size_t nwcs;
104 int *mborder;
105 int wcorder;
107 /* The following is a bit mask which bits are set if this element is
108 used in the appropriate level. Interesting for the singlebyte
109 weight computation.
111 XXX The type here restricts the number of levels to 32. It could
112 be changed if necessary but I doubt this is necessary. */
113 unsigned int used_in_level;
115 struct element_list_t *weights;
117 /* Nonzero if this is a real character definition. */
118 int is_character;
120 /* Order of the character in the sequence. This information will
121 be used in range expressions. */
122 int mbseqorder;
123 int wcseqorder;
125 /* Where does the definition come from. */
126 const char *file;
127 size_t line;
129 /* Which section does this belong to. */
130 struct section_list *section;
132 /* Predecessor and successor in the order list. */
133 struct element_t *last;
134 struct element_t *next;
136 /* Next element in multibyte output list. */
137 struct element_t *mbnext;
138 struct element_t *mblast;
140 /* Next element in wide character output list. */
141 struct element_t *wcnext;
142 struct element_t *wclast;
145 /* Special element value. */
146 #define ELEMENT_ELLIPSIS2 ((struct element_t *) 1)
147 #define ELEMENT_ELLIPSIS3 ((struct element_t *) 2)
148 #define ELEMENT_ELLIPSIS4 ((struct element_t *) 3)
150 /* Data type for collating symbol. */
151 struct symbol_t
153 const char *name;
155 /* Point to place in the order list. */
156 struct element_t *order;
158 /* Where does the definition come from. */
159 const char *file;
160 size_t line;
163 /* Sparse table of struct element_t *. */
164 #define TABLE wchead_table
165 #define ELEMENT struct element_t *
166 #define DEFAULT NULL
167 #define ITERATE
168 #define NO_FINALIZE
169 #include "3level.h"
171 /* Sparse table of int32_t. */
172 #define TABLE collidx_table
173 #define ELEMENT int32_t
174 #define DEFAULT 0
175 #include "3level.h"
177 /* Sparse table of uint32_t. */
178 #define TABLE collseq_table
179 #define ELEMENT uint32_t
180 #define DEFAULT ~((uint32_t) 0)
181 #include "3level.h"
184 /* Simple name list for the preprocessor. */
185 struct name_list
187 struct name_list *next;
188 char str[0];
192 /* The real definition of the struct for the LC_COLLATE locale. */
193 struct locale_collate_t
195 int col_weight_max;
196 int cur_weight_max;
198 /* List of known scripts. */
199 struct section_list *known_sections;
200 /* List of used sections. */
201 struct section_list *sections;
202 /* Current section using definition. */
203 struct section_list *current_section;
204 /* There always can be an unnamed section. */
205 struct section_list unnamed_section;
206 /* Flag whether the unnamed section has been defined. */
207 bool unnamed_section_defined;
208 /* To make handling of errors easier we have another section. */
209 struct section_list error_section;
210 /* Sometimes we are defining the values for collating symbols before
211 the first actual section. */
212 struct section_list symbol_section;
214 /* Start of the order list. */
215 struct element_t *start;
217 /* The undefined element. */
218 struct element_t undefined;
220 /* This is the cursor for `reorder_after' insertions. */
221 struct element_t *cursor;
223 /* This value is used when handling ellipsis. */
224 struct element_t ellipsis_weight;
226 /* Known collating elements. */
227 hash_table elem_table;
229 /* Known collating symbols. */
230 hash_table sym_table;
232 /* Known collation sequences. */
233 hash_table seq_table;
235 struct obstack mempool;
237 /* The LC_COLLATE category is a bit special as it is sometimes possible
238 that the definitions from more than one input file contains information.
239 Therefore we keep all relevant input in a list. */
240 struct locale_collate_t *next;
242 /* Arrays with heads of the list for each of the leading bytes in
243 the multibyte sequences. */
244 struct element_t *mbheads[256];
246 /* Arrays with heads of the list for each of the leading bytes in
247 the multibyte sequences. */
248 struct wchead_table wcheads;
250 /* The arrays with the collation sequence order. */
251 unsigned char mbseqorder[256];
252 struct collseq_table wcseqorder;
254 /* State of the preprocessor. */
255 enum
257 else_none = 0,
258 else_ignore,
259 else_seen
261 else_action;
265 /* We have a few global variables which are used for reading all
266 LC_COLLATE category descriptions in all files. */
267 static uint32_t nrules;
269 /* List of defined preprocessor symbols. */
270 static struct name_list *defined;
273 /* We need UTF-8 encoding of numbers. */
274 static inline int
275 __attribute ((always_inline))
276 utf8_encode (char *buf, int val)
278 int retval;
280 if (val < 0x80)
282 *buf++ = (char) val;
283 retval = 1;
285 else
287 int step;
289 for (step = 2; step < 6; ++step)
290 if ((val & (~(uint32_t)0 << (5 * step + 1))) == 0)
291 break;
292 retval = step;
294 *buf = (unsigned char) (~0xff >> step);
295 --step;
298 buf[step] = 0x80 | (val & 0x3f);
299 val >>= 6;
301 while (--step > 0);
302 *buf |= val;
305 return retval;
309 static struct section_list *
310 make_seclist_elem (struct locale_collate_t *collate, const char *string,
311 struct section_list *next)
313 struct section_list *newp;
315 newp = (struct section_list *) obstack_alloc (&collate->mempool,
316 sizeof (*newp));
317 newp->next = next;
318 newp->name = string;
319 newp->first = NULL;
320 newp->last = NULL;
322 return newp;
326 static struct element_t *
327 new_element (struct locale_collate_t *collate, const char *mbs, size_t mbslen,
328 const uint32_t *wcs, const char *name, size_t namelen,
329 int is_character)
331 struct element_t *newp;
333 newp = (struct element_t *) obstack_alloc (&collate->mempool,
334 sizeof (*newp));
335 newp->name = name == NULL ? NULL : obstack_copy0 (&collate->mempool,
336 name, namelen);
337 if (mbs != NULL)
339 newp->mbs = obstack_copy0 (&collate->mempool, mbs, mbslen);
340 newp->nmbs = mbslen;
342 else
344 newp->mbs = NULL;
345 newp->nmbs = 0;
347 if (wcs != NULL)
349 size_t nwcs = wcslen ((wchar_t *) wcs);
350 uint32_t zero = 0;
351 obstack_grow (&collate->mempool, wcs, nwcs * sizeof (uint32_t));
352 obstack_grow (&collate->mempool, &zero, sizeof (uint32_t));
353 newp->wcs = (uint32_t *) obstack_finish (&collate->mempool);
354 newp->nwcs = nwcs;
356 else
358 newp->wcs = NULL;
359 newp->nwcs = 0;
361 newp->mborder = NULL;
362 newp->wcorder = 0;
363 newp->used_in_level = 0;
364 newp->is_character = is_character;
366 /* Will be assigned later. XXX */
367 newp->mbseqorder = 0;
368 newp->wcseqorder = 0;
370 /* Will be allocated later. */
371 newp->weights = NULL;
373 newp->file = NULL;
374 newp->line = 0;
376 newp->section = collate->current_section;
378 newp->last = NULL;
379 newp->next = NULL;
381 newp->mbnext = NULL;
382 newp->mblast = NULL;
384 newp->wcnext = NULL;
385 newp->wclast = NULL;
387 return newp;
391 static struct symbol_t *
392 new_symbol (struct locale_collate_t *collate, const char *name, size_t len)
394 struct symbol_t *newp;
396 newp = (struct symbol_t *) obstack_alloc (&collate->mempool, sizeof (*newp));
398 newp->name = obstack_copy0 (&collate->mempool, name, len);
399 newp->order = NULL;
401 newp->file = NULL;
402 newp->line = 0;
404 return newp;
408 /* Test whether this name is already defined somewhere. */
409 static int
410 check_duplicate (struct linereader *ldfile, struct locale_collate_t *collate,
411 const struct charmap_t *charmap,
412 struct repertoire_t *repertoire, const char *symbol,
413 size_t symbol_len)
415 void *ignore = NULL;
417 if (find_entry (&charmap->char_table, symbol, symbol_len, &ignore) == 0)
419 lr_error (ldfile, _("`%.*s' already defined in charmap"),
420 (int) symbol_len, symbol);
421 return 1;
424 if (repertoire != NULL
425 && (find_entry (&repertoire->char_table, symbol, symbol_len, &ignore)
426 == 0))
428 lr_error (ldfile, _("`%.*s' already defined in repertoire"),
429 (int) symbol_len, symbol);
430 return 1;
433 if (find_entry (&collate->sym_table, symbol, symbol_len, &ignore) == 0)
435 lr_error (ldfile, _("`%.*s' already defined as collating symbol"),
436 (int) symbol_len, symbol);
437 return 1;
440 if (find_entry (&collate->elem_table, symbol, symbol_len, &ignore) == 0)
442 lr_error (ldfile, _("`%.*s' already defined as collating element"),
443 (int) symbol_len, symbol);
444 return 1;
447 return 0;
451 /* Read the direction specification. */
452 static void
453 read_directions (struct linereader *ldfile, struct token *arg,
454 const struct charmap_t *charmap,
455 struct repertoire_t *repertoire, struct localedef_t *result)
457 int cnt = 0;
458 int max = nrules ?: 10;
459 enum coll_sort_rule *rules = calloc (max, sizeof (*rules));
460 int warned = 0;
461 struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
463 while (1)
465 int valid = 0;
467 if (arg->tok == tok_forward)
469 if (rules[cnt] & sort_backward)
471 if (! warned)
473 lr_error (ldfile, _("\
474 %s: `forward' and `backward' are mutually excluding each other"),
475 "LC_COLLATE");
476 warned = 1;
479 else if (rules[cnt] & sort_forward)
481 if (! warned)
483 lr_error (ldfile, _("\
484 %s: `%s' mentioned more than once in definition of weight %d"),
485 "LC_COLLATE", "forward", cnt + 1);
488 else
489 rules[cnt] |= sort_forward;
491 valid = 1;
493 else if (arg->tok == tok_backward)
495 if (rules[cnt] & sort_forward)
497 if (! warned)
499 lr_error (ldfile, _("\
500 %s: `forward' and `backward' are mutually excluding each other"),
501 "LC_COLLATE");
502 warned = 1;
505 else if (rules[cnt] & sort_backward)
507 if (! warned)
509 lr_error (ldfile, _("\
510 %s: `%s' mentioned more than once in definition of weight %d"),
511 "LC_COLLATE", "backward", cnt + 1);
514 else
515 rules[cnt] |= sort_backward;
517 valid = 1;
519 else if (arg->tok == tok_position)
521 if (rules[cnt] & sort_position)
523 if (! warned)
525 lr_error (ldfile, _("\
526 %s: `%s' mentioned more than once in definition of weight %d"),
527 "LC_COLLATE", "position", cnt + 1);
530 else
531 rules[cnt] |= sort_position;
533 valid = 1;
536 if (valid)
537 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
539 if (arg->tok == tok_eof || arg->tok == tok_eol || arg->tok == tok_comma
540 || arg->tok == tok_semicolon)
542 if (! valid && ! warned)
544 lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
545 warned = 1;
548 /* See whether we have to increment the counter. */
549 if (arg->tok != tok_comma && rules[cnt] != 0)
551 /* Add the default `forward' if we have seen only `position'. */
552 if (rules[cnt] == sort_position)
553 rules[cnt] = sort_position | sort_forward;
555 ++cnt;
558 if (arg->tok == tok_eof || arg->tok == tok_eol)
559 /* End of line or file, so we exit the loop. */
560 break;
562 if (nrules == 0)
564 /* See whether we have enough room in the array. */
565 if (cnt == max)
567 max += 10;
568 rules = (enum coll_sort_rule *) xrealloc (rules,
570 * sizeof (*rules));
571 memset (&rules[cnt], '\0', (max - cnt) * sizeof (*rules));
574 else
576 if (cnt == nrules)
578 /* There must not be any more rule. */
579 if (! warned)
581 lr_error (ldfile, _("\
582 %s: too many rules; first entry only had %d"),
583 "LC_COLLATE", nrules);
584 warned = 1;
587 lr_ignore_rest (ldfile, 0);
588 break;
592 else
594 if (! warned)
596 lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
597 warned = 1;
601 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
604 if (nrules == 0)
606 /* Now we know how many rules we have. */
607 nrules = cnt;
608 rules = (enum coll_sort_rule *) xrealloc (rules,
609 nrules * sizeof (*rules));
611 else
613 if (cnt < nrules)
615 /* Not enough rules in this specification. */
616 if (! warned)
617 lr_error (ldfile, _("%s: not enough sorting rules"), "LC_COLLATE");
620 rules[cnt] = sort_forward;
621 while (++cnt < nrules);
625 collate->current_section->rules = rules;
629 static struct element_t *
630 find_element (struct linereader *ldfile, struct locale_collate_t *collate,
631 const char *str, size_t len)
633 void *result = NULL;
635 /* Search for the entries among the collation sequences already define. */
636 if (find_entry (&collate->seq_table, str, len, &result) != 0)
638 /* Nope, not define yet. So we see whether it is a
639 collation symbol. */
640 void *ptr;
642 if (find_entry (&collate->sym_table, str, len, &ptr) == 0)
644 /* It's a collation symbol. */
645 struct symbol_t *sym = (struct symbol_t *) ptr;
646 result = sym->order;
648 if (result == NULL)
649 result = sym->order = new_element (collate, NULL, 0, NULL,
650 NULL, 0, 0);
652 else if (find_entry (&collate->elem_table, str, len, &result) != 0)
654 /* It's also no collation element. So it is a character
655 element defined later. */
656 result = new_element (collate, NULL, 0, NULL, str, len, 1);
657 /* Insert it into the sequence table. */
658 insert_entry (&collate->seq_table, str, len, result);
662 return (struct element_t *) result;
666 static void
667 unlink_element (struct locale_collate_t *collate)
669 if (collate->cursor == collate->start)
671 assert (collate->cursor->next == NULL);
672 assert (collate->cursor->last == NULL);
673 collate->cursor = NULL;
675 else
677 if (collate->cursor->next != NULL)
678 collate->cursor->next->last = collate->cursor->last;
679 if (collate->cursor->last != NULL)
680 collate->cursor->last->next = collate->cursor->next;
681 collate->cursor = collate->cursor->last;
686 static void
687 insert_weights (struct linereader *ldfile, struct element_t *elem,
688 const struct charmap_t *charmap,
689 struct repertoire_t *repertoire, struct localedef_t *result,
690 enum token_t ellipsis)
692 int weight_cnt;
693 struct token *arg;
694 struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
696 /* Initialize all the fields. */
697 elem->file = ldfile->fname;
698 elem->line = ldfile->lineno;
700 elem->last = collate->cursor;
701 elem->next = collate->cursor ? collate->cursor->next : NULL;
702 if (collate->cursor != NULL && collate->cursor->next != NULL)
703 collate->cursor->next->last = elem;
704 if (collate->cursor != NULL)
705 collate->cursor->next = elem;
706 if (collate->start == NULL)
708 assert (collate->cursor == NULL);
709 collate->start = elem;
712 elem->section = collate->current_section;
714 if (collate->current_section->first == NULL)
715 collate->current_section->first = elem;
716 if (collate->current_section->last == collate->cursor)
717 collate->current_section->last = elem;
719 collate->cursor = elem;
721 elem->weights = (struct element_list_t *)
722 obstack_alloc (&collate->mempool, nrules * sizeof (struct element_list_t));
723 memset (elem->weights, '\0', nrules * sizeof (struct element_list_t));
725 weight_cnt = 0;
727 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
730 if (arg->tok == tok_eof || arg->tok == tok_eol)
731 break;
733 if (arg->tok == tok_ignore)
735 /* The weight for this level has to be ignored. We use the
736 null pointer to indicate this. */
737 elem->weights[weight_cnt].w = (struct element_t **)
738 obstack_alloc (&collate->mempool, sizeof (struct element_t *));
739 elem->weights[weight_cnt].w[0] = NULL;
740 elem->weights[weight_cnt].cnt = 1;
742 else if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
744 char ucs4str[10];
745 struct element_t *val;
746 char *symstr;
747 size_t symlen;
749 if (arg->tok == tok_bsymbol)
751 symstr = arg->val.str.startmb;
752 symlen = arg->val.str.lenmb;
754 else
756 snprintf (ucs4str, sizeof (ucs4str), "U%08X", arg->val.ucs4);
757 symstr = ucs4str;
758 symlen = 9;
761 val = find_element (ldfile, collate, symstr, symlen);
762 if (val == NULL)
763 break;
765 elem->weights[weight_cnt].w = (struct element_t **)
766 obstack_alloc (&collate->mempool, sizeof (struct element_t *));
767 elem->weights[weight_cnt].w[0] = val;
768 elem->weights[weight_cnt].cnt = 1;
770 else if (arg->tok == tok_string)
772 /* Split the string up in the individual characters and put
773 the element definitions in the list. */
774 const char *cp = arg->val.str.startmb;
775 int cnt = 0;
776 struct element_t *charelem;
777 struct element_t **weights = NULL;
778 int max = 0;
780 if (*cp == '\0')
782 lr_error (ldfile, _("%s: empty weight string not allowed"),
783 "LC_COLLATE");
784 lr_ignore_rest (ldfile, 0);
785 break;
790 if (*cp == '<')
792 /* Ahh, it's a bsymbol or an UCS4 value. If it's
793 the latter we have to unify the name. */
794 const char *startp = ++cp;
795 size_t len;
797 while (*cp != '>')
799 if (*cp == ldfile->escape_char)
800 ++cp;
801 if (*cp == '\0')
802 /* It's a syntax error. */
803 goto syntax;
805 ++cp;
808 if (cp - startp == 5 && startp[0] == 'U'
809 && isxdigit (startp[1]) && isxdigit (startp[2])
810 && isxdigit (startp[3]) && isxdigit (startp[4]))
812 unsigned int ucs4 = strtoul (startp + 1, NULL, 16);
813 char *newstr;
815 newstr = (char *) xmalloc (10);
816 snprintf (newstr, 10, "U%08X", ucs4);
817 startp = newstr;
819 len = 9;
821 else
822 len = cp - startp;
824 charelem = find_element (ldfile, collate, startp, len);
825 ++cp;
827 else
829 /* People really shouldn't use characters directly in
830 the string. Especially since it's not really clear
831 what this means. We interpret all characters in the
832 string as if that would be bsymbols. Otherwise we
833 would have to match back to bsymbols somehow and this
834 is normally not what people normally expect. */
835 charelem = find_element (ldfile, collate, cp++, 1);
838 if (charelem == NULL)
840 /* We ignore the rest of the line. */
841 lr_ignore_rest (ldfile, 0);
842 break;
845 /* Add the pointer. */
846 if (cnt >= max)
848 struct element_t **newp;
849 max += 10;
850 newp = (struct element_t **)
851 alloca (max * sizeof (struct element_t *));
852 memcpy (newp, weights, cnt * sizeof (struct element_t *));
853 weights = newp;
855 weights[cnt++] = charelem;
857 while (*cp != '\0');
859 /* Now store the information. */
860 elem->weights[weight_cnt].w = (struct element_t **)
861 obstack_alloc (&collate->mempool,
862 cnt * sizeof (struct element_t *));
863 memcpy (elem->weights[weight_cnt].w, weights,
864 cnt * sizeof (struct element_t *));
865 elem->weights[weight_cnt].cnt = cnt;
867 /* We don't need the string anymore. */
868 free (arg->val.str.startmb);
870 else if (ellipsis != tok_none
871 && (arg->tok == tok_ellipsis2
872 || arg->tok == tok_ellipsis3
873 || arg->tok == tok_ellipsis4))
875 /* It must be the same ellipsis as used in the initial column. */
876 if (arg->tok != ellipsis)
877 lr_error (ldfile, _("\
878 %s: weights must use the same ellipsis symbol as the name"),
879 "LC_COLLATE");
881 /* The weight for this level will depend on the element
882 iterating over the range. Put a placeholder. */
883 elem->weights[weight_cnt].w = (struct element_t **)
884 obstack_alloc (&collate->mempool, sizeof (struct element_t *));
885 elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
886 elem->weights[weight_cnt].cnt = 1;
888 else
890 syntax:
891 /* It's a syntax error. */
892 lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
893 lr_ignore_rest (ldfile, 0);
894 break;
897 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
898 /* This better should be the end of the line or a semicolon. */
899 if (arg->tok == tok_semicolon)
900 /* OK, ignore this and read the next token. */
901 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
902 else if (arg->tok != tok_eof && arg->tok != tok_eol)
904 /* It's a syntax error. */
905 lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
906 lr_ignore_rest (ldfile, 0);
907 break;
910 while (++weight_cnt < nrules);
912 if (weight_cnt < nrules)
914 /* This means the rest of the line uses the current element as
915 the weight. */
918 elem->weights[weight_cnt].w = (struct element_t **)
919 obstack_alloc (&collate->mempool, sizeof (struct element_t *));
920 if (ellipsis == tok_none)
921 elem->weights[weight_cnt].w[0] = elem;
922 else
923 elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
924 elem->weights[weight_cnt].cnt = 1;
926 while (++weight_cnt < nrules);
928 else
930 if (arg->tok == tok_ignore || arg->tok == tok_bsymbol)
932 /* Too many rule values. */
933 lr_error (ldfile, _("%s: too many values"), "LC_COLLATE");
934 lr_ignore_rest (ldfile, 0);
936 else
937 lr_ignore_rest (ldfile, arg->tok != tok_eol && arg->tok != tok_eof);
942 static int
943 insert_value (struct linereader *ldfile, const char *symstr, size_t symlen,
944 const struct charmap_t *charmap, struct repertoire_t *repertoire,
945 struct localedef_t *result)
947 /* First find out what kind of symbol this is. */
948 struct charseq *seq;
949 uint32_t wc;
950 struct element_t *elem = NULL;
951 struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
953 /* Try to find the character in the charmap. */
954 seq = charmap_find_value (charmap, symstr, symlen);
956 /* Determine the wide character. */
957 if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
959 wc = repertoire_find_value (repertoire, symstr, symlen);
960 if (seq != NULL)
961 seq->ucs4 = wc;
963 else
964 wc = seq->ucs4;
966 if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
968 /* It's no character, so look through the collation elements and
969 symbol list. */
970 void *ptr = elem;
971 if (find_entry (&collate->elem_table, symstr, symlen, &ptr) != 0)
973 void *result;
974 struct symbol_t *sym = NULL;
976 /* It's also collation element. Therefore it's either a
977 collating symbol or it's a character which is not
978 supported by the character set. In the later case we
979 simply create a dummy entry. */
980 if (find_entry (&collate->sym_table, symstr, symlen, &result) == 0)
982 /* It's a collation symbol. */
983 sym = (struct symbol_t *) result;
985 elem = sym->order;
988 if (elem == NULL)
990 elem = new_element (collate, NULL, 0, NULL, symstr, symlen, 0);
992 if (sym != NULL)
993 sym->order = elem;
994 else
995 /* Enter a fake element in the sequence table. This
996 won't cause anything in the output since there is
997 no multibyte or wide character associated with
998 it. */
999 insert_entry (&collate->seq_table, symstr, symlen, elem);
1002 else
1003 /* Copy the result back. */
1004 elem = ptr;
1006 else
1008 /* Otherwise the symbols stands for a character. */
1009 void *ptr = elem;
1010 if (find_entry (&collate->seq_table, symstr, symlen, &ptr) != 0)
1012 uint32_t wcs[2] = { wc, 0 };
1014 /* We have to allocate an entry. */
1015 elem = new_element (collate,
1016 seq != NULL ? (char *) seq->bytes : NULL,
1017 seq != NULL ? seq->nbytes : 0,
1018 wc == ILLEGAL_CHAR_VALUE ? NULL : wcs,
1019 symstr, symlen, 1);
1021 /* And add it to the table. */
1022 if (insert_entry (&collate->seq_table, symstr, symlen, elem) != 0)
1023 /* This cannot happen. */
1024 assert (! "Internal error");
1026 else
1028 /* Copy the result back. */
1029 elem = ptr;
1031 /* Maybe the character was used before the definition. In this case
1032 we have to insert the byte sequences now. */
1033 if (elem->mbs == NULL && seq != NULL)
1035 elem->mbs = obstack_copy0 (&collate->mempool,
1036 seq->bytes, seq->nbytes);
1037 elem->nmbs = seq->nbytes;
1040 if (elem->wcs == NULL && wc != ILLEGAL_CHAR_VALUE)
1042 uint32_t wcs[2] = { wc, 0 };
1044 elem->wcs = obstack_copy (&collate->mempool, wcs, sizeof (wcs));
1045 elem->nwcs = 1;
1050 /* Test whether this element is not already in the list. */
1051 if (elem->next != NULL || elem == collate->cursor)
1053 lr_error (ldfile, _("order for `%.*s' already defined at %s:%Zu"),
1054 (int) symlen, symstr, elem->file, elem->line);
1055 lr_ignore_rest (ldfile, 0);
1056 return 1;
1059 insert_weights (ldfile, elem, charmap, repertoire, result, tok_none);
1061 return 0;
1065 static void
1066 handle_ellipsis (struct linereader *ldfile, const char *symstr, size_t symlen,
1067 enum token_t ellipsis, const struct charmap_t *charmap,
1068 struct repertoire_t *repertoire,
1069 struct localedef_t *result)
1071 struct element_t *startp;
1072 struct element_t *endp;
1073 struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
1075 /* Unlink the entry added for the ellipsis. */
1076 unlink_element (collate);
1077 startp = collate->cursor;
1079 /* Process and add the end-entry. */
1080 if (symstr != NULL
1081 && insert_value (ldfile, symstr, symlen, charmap, repertoire, result))
1082 /* Something went wrong with inserting the to-value. This means
1083 we cannot process the ellipsis. */
1084 return;
1086 /* Reset the cursor. */
1087 collate->cursor = startp;
1089 /* Now we have to handle many different situations:
1090 - we have to distinguish between the three different ellipsis forms
1091 - the is the ellipsis at the beginning, in the middle, or at the end.
1093 endp = collate->cursor->next;
1094 assert (symstr == NULL || endp != NULL);
1096 /* XXX The following is probably very wrong since also collating symbols
1097 can appear in ranges. But do we want/can refine the test for that? */
1098 #if 0
1099 /* Both, the start and the end symbol, must stand for characters. */
1100 if ((startp != NULL && (startp->name == NULL || ! startp->is_character))
1101 || (endp != NULL && (endp->name == NULL|| ! endp->is_character)))
1103 lr_error (ldfile, _("\
1104 %s: the start and the end symbol of a range must stand for characters"),
1105 "LC_COLLATE");
1106 return;
1108 #endif
1110 if (ellipsis == tok_ellipsis3)
1112 /* One requirement we make here: the length of the byte
1113 sequences for the first and end character must be the same.
1114 This is mainly to prevent unwanted effects and this is often
1115 not what is wanted. */
1116 size_t len = (startp->mbs != NULL ? startp->nmbs
1117 : (endp->mbs != NULL ? endp->nmbs : 0));
1118 char mbcnt[len + 1];
1119 char mbend[len + 1];
1121 /* Well, this should be caught somewhere else already. Just to
1122 make sure. */
1123 assert (startp == NULL || startp->wcs == NULL || startp->wcs[1] == 0);
1124 assert (endp == NULL || endp->wcs == NULL || endp->wcs[1] == 0);
1126 if (startp != NULL && endp != NULL
1127 && startp->mbs != NULL && endp->mbs != NULL
1128 && startp->nmbs != endp->nmbs)
1130 lr_error (ldfile, _("\
1131 %s: byte sequences of first and last character must have the same length"),
1132 "LC_COLLATE");
1133 return;
1136 /* Determine whether we have to generate multibyte sequences. */
1137 if ((startp == NULL || startp->mbs != NULL)
1138 && (endp == NULL || endp->mbs != NULL))
1140 int cnt;
1141 int ret;
1143 /* Prepare the beginning byte sequence. This is either from the
1144 beginning byte sequence or it is all nulls if it was an
1145 initial ellipsis. */
1146 if (startp == NULL || startp->mbs == NULL)
1147 memset (mbcnt, '\0', len);
1148 else
1150 memcpy (mbcnt, startp->mbs, len);
1152 /* And increment it so that the value is the first one we will
1153 try to insert. */
1154 for (cnt = len - 1; cnt >= 0; --cnt)
1155 if (++mbcnt[cnt] != '\0')
1156 break;
1158 mbcnt[len] = '\0';
1160 /* And the end sequence. */
1161 if (endp == NULL || endp->mbs == NULL)
1162 memset (mbend, '\0', len);
1163 else
1164 memcpy (mbend, endp->mbs, len);
1165 mbend[len] = '\0';
1167 /* Test whether we have a correct range. */
1168 ret = memcmp (mbcnt, mbend, len);
1169 if (ret >= 0)
1171 if (ret > 0)
1172 lr_error (ldfile, _("%s: byte sequence of first character of \
1173 range is not lower than that of the last character"), "LC_COLLATE");
1174 return;
1177 /* Generate the byte sequences data. */
1178 while (1)
1180 struct charseq *seq;
1182 /* Quite a bit of work ahead. We have to find the character
1183 definition for the byte sequence and then determine the
1184 wide character belonging to it. */
1185 seq = charmap_find_symbol (charmap, mbcnt, len);
1186 if (seq != NULL)
1188 struct element_t *elem;
1189 size_t namelen;
1191 /* I don't think this can ever happen. */
1192 assert (seq->name != NULL);
1193 namelen = strlen (seq->name);
1195 if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1196 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1197 namelen);
1199 /* Now we are ready to insert the new value in the
1200 sequence. Find out whether the element is
1201 already known. */
1202 void *ptr;
1203 if (find_entry (&collate->seq_table, seq->name, namelen,
1204 &ptr) != 0)
1206 uint32_t wcs[2] = { seq->ucs4, 0 };
1208 /* We have to allocate an entry. */
1209 elem = new_element (collate, mbcnt, len,
1210 seq->ucs4 == ILLEGAL_CHAR_VALUE
1211 ? NULL : wcs, seq->name,
1212 namelen, 1);
1214 /* And add it to the table. */
1215 if (insert_entry (&collate->seq_table, seq->name,
1216 namelen, elem) != 0)
1217 /* This cannot happen. */
1218 assert (! "Internal error");
1220 else
1221 /* Copy the result. */
1222 elem = ptr;
1224 /* Test whether this element is not already in the list. */
1225 if (elem->next != NULL || (collate->cursor != NULL
1226 && elem->next == collate->cursor))
1228 lr_error (ldfile, _("\
1229 order for `%.*s' already defined at %s:%Zu"),
1230 (int) namelen, seq->name,
1231 elem->file, elem->line);
1232 goto increment;
1235 /* Enqueue the new element. */
1236 elem->last = collate->cursor;
1237 if (collate->cursor == NULL)
1238 elem->next = NULL;
1239 else
1241 elem->next = collate->cursor->next;
1242 elem->last->next = elem;
1243 if (elem->next != NULL)
1244 elem->next->last = elem;
1246 if (collate->start == NULL)
1248 assert (collate->cursor == NULL);
1249 collate->start = elem;
1251 collate->cursor = elem;
1253 /* Add the weight value. We take them from the
1254 `ellipsis_weights' member of `collate'. */
1255 elem->weights = (struct element_list_t *)
1256 obstack_alloc (&collate->mempool,
1257 nrules * sizeof (struct element_list_t));
1258 for (cnt = 0; cnt < nrules; ++cnt)
1259 if (collate->ellipsis_weight.weights[cnt].cnt == 1
1260 && (collate->ellipsis_weight.weights[cnt].w[0]
1261 == ELEMENT_ELLIPSIS2))
1263 elem->weights[cnt].w = (struct element_t **)
1264 obstack_alloc (&collate->mempool,
1265 sizeof (struct element_t *));
1266 elem->weights[cnt].w[0] = elem;
1267 elem->weights[cnt].cnt = 1;
1269 else
1271 /* Simply use the weight from `ellipsis_weight'. */
1272 elem->weights[cnt].w =
1273 collate->ellipsis_weight.weights[cnt].w;
1274 elem->weights[cnt].cnt =
1275 collate->ellipsis_weight.weights[cnt].cnt;
1279 /* Increment for the next round. */
1280 increment:
1281 for (cnt = len - 1; cnt >= 0; --cnt)
1282 if (++mbcnt[cnt] != '\0')
1283 break;
1285 /* Find out whether this was all. */
1286 if (cnt < 0 || memcmp (mbcnt, mbend, len) >= 0)
1287 /* Yep, that's all. */
1288 break;
1292 else
1294 /* For symbolic range we naturally must have a beginning and an
1295 end specified by the user. */
1296 if (startp == NULL)
1297 lr_error (ldfile, _("\
1298 %s: symbolic range ellipsis must not directly follow `order_start'"),
1299 "LC_COLLATE");
1300 else if (endp == NULL)
1301 lr_error (ldfile, _("\
1302 %s: symbolic range ellipsis must not be directly followed by `order_end'"),
1303 "LC_COLLATE");
1304 else
1306 /* Determine the range. To do so we have to determine the
1307 common prefix of the both names and then the numeric
1308 values of both ends. */
1309 size_t lenfrom = strlen (startp->name);
1310 size_t lento = strlen (endp->name);
1311 char buf[lento + 1];
1312 int preflen = 0;
1313 long int from;
1314 long int to;
1315 char *cp;
1316 int base = ellipsis == tok_ellipsis2 ? 16 : 10;
1318 if (lenfrom != lento)
1320 invalid_range:
1321 lr_error (ldfile, _("\
1322 `%s' and `%.*s' are not valid names for symbolic range"),
1323 startp->name, (int) lento, endp->name);
1324 return;
1327 while (startp->name[preflen] == endp->name[preflen])
1328 if (startp->name[preflen] == '\0')
1329 /* Nothing to be done. The start and end point are identical
1330 and while inserting the end point we have already given
1331 the user an error message. */
1332 return;
1333 else
1334 ++preflen;
1336 errno = 0;
1337 from = strtol (startp->name + preflen, &cp, base);
1338 if ((from == UINT_MAX && errno == ERANGE) || *cp != '\0')
1339 goto invalid_range;
1341 errno = 0;
1342 to = strtol (endp->name + preflen, &cp, base);
1343 if ((to == UINT_MAX && errno == ERANGE) || *cp != '\0')
1344 goto invalid_range;
1346 /* Copy the prefix. */
1347 memcpy (buf, startp->name, preflen);
1349 /* Loop over all values. */
1350 for (++from; from < to; ++from)
1352 struct element_t *elem = NULL;
1353 struct charseq *seq;
1354 uint32_t wc;
1355 int cnt;
1357 /* Generate the name. */
1358 sprintf (buf + preflen, base == 10 ? "%0*ld" : "%0*lX",
1359 (int) (lenfrom - preflen), from);
1361 /* Look whether this name is already defined. */
1362 void *ptr;
1363 if (find_entry (&collate->seq_table, buf, symlen, &ptr) == 0)
1365 /* Copy back the result. */
1366 elem = ptr;
1368 if (elem->next != NULL || (collate->cursor != NULL
1369 && elem->next == collate->cursor))
1371 lr_error (ldfile, _("\
1372 %s: order for `%.*s' already defined at %s:%Zu"),
1373 "LC_COLLATE", (int) lenfrom, buf,
1374 elem->file, elem->line);
1375 continue;
1378 if (elem->name == NULL)
1380 lr_error (ldfile, _("%s: `%s' must be a character"),
1381 "LC_COLLATE", buf);
1382 continue;
1386 if (elem == NULL || (elem->mbs == NULL && elem->wcs == NULL))
1388 /* Search for a character of this name. */
1389 seq = charmap_find_value (charmap, buf, lenfrom);
1390 if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1392 wc = repertoire_find_value (repertoire, buf, lenfrom);
1394 if (seq != NULL)
1395 seq->ucs4 = wc;
1397 else
1398 wc = seq->ucs4;
1400 if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
1401 /* We don't know anything about a character with this
1402 name. XXX Should we warn? */
1403 continue;
1405 if (elem == NULL)
1407 uint32_t wcs[2] = { wc, 0 };
1409 /* We have to allocate an entry. */
1410 elem = new_element (collate,
1411 seq != NULL
1412 ? (char *) seq->bytes : NULL,
1413 seq != NULL ? seq->nbytes : 0,
1414 wc == ILLEGAL_CHAR_VALUE
1415 ? NULL : wcs, buf, lenfrom, 1);
1417 else
1419 /* Update the element. */
1420 if (seq != NULL)
1422 elem->mbs = obstack_copy0 (&collate->mempool,
1423 seq->bytes, seq->nbytes);
1424 elem->nmbs = seq->nbytes;
1427 if (wc != ILLEGAL_CHAR_VALUE)
1429 uint32_t zero = 0;
1431 obstack_grow (&collate->mempool,
1432 &wc, sizeof (uint32_t));
1433 obstack_grow (&collate->mempool,
1434 &zero, sizeof (uint32_t));
1435 elem->wcs = obstack_finish (&collate->mempool);
1436 elem->nwcs = 1;
1440 elem->file = ldfile->fname;
1441 elem->line = ldfile->lineno;
1442 elem->section = collate->current_section;
1445 /* Enqueue the new element. */
1446 elem->last = collate->cursor;
1447 elem->next = collate->cursor->next;
1448 elem->last->next = elem;
1449 if (elem->next != NULL)
1450 elem->next->last = elem;
1451 collate->cursor = elem;
1453 /* Now add the weights. They come from the `ellipsis_weights'
1454 member of `collate'. */
1455 elem->weights = (struct element_list_t *)
1456 obstack_alloc (&collate->mempool,
1457 nrules * sizeof (struct element_list_t));
1458 for (cnt = 0; cnt < nrules; ++cnt)
1459 if (collate->ellipsis_weight.weights[cnt].cnt == 1
1460 && (collate->ellipsis_weight.weights[cnt].w[0]
1461 == ELEMENT_ELLIPSIS2))
1463 elem->weights[cnt].w = (struct element_t **)
1464 obstack_alloc (&collate->mempool,
1465 sizeof (struct element_t *));
1466 elem->weights[cnt].w[0] = elem;
1467 elem->weights[cnt].cnt = 1;
1469 else
1471 /* Simly use the weight from `ellipsis_weight'. */
1472 elem->weights[cnt].w =
1473 collate->ellipsis_weight.weights[cnt].w;
1474 elem->weights[cnt].cnt =
1475 collate->ellipsis_weight.weights[cnt].cnt;
1483 static void
1484 collate_startup (struct linereader *ldfile, struct localedef_t *locale,
1485 struct localedef_t *copy_locale, int ignore_content)
1487 if (!ignore_content && locale->categories[LC_COLLATE].collate == NULL)
1489 struct locale_collate_t *collate;
1491 if (copy_locale == NULL)
1493 collate = locale->categories[LC_COLLATE].collate =
1494 (struct locale_collate_t *)
1495 xcalloc (1, sizeof (struct locale_collate_t));
1497 /* Init the various data structures. */
1498 init_hash (&collate->elem_table, 100);
1499 init_hash (&collate->sym_table, 100);
1500 init_hash (&collate->seq_table, 500);
1501 obstack_init (&collate->mempool);
1503 collate->col_weight_max = -1;
1505 else
1506 /* Reuse the copy_locale's data structures. */
1507 collate = locale->categories[LC_COLLATE].collate =
1508 copy_locale->categories[LC_COLLATE].collate;
1511 ldfile->translate_strings = 0;
1512 ldfile->return_widestr = 0;
1516 void
1517 collate_finish (struct localedef_t *locale, const struct charmap_t *charmap)
1519 /* Now is the time when we can assign the individual collation
1520 values for all the symbols. We have possibly different values
1521 for the wide- and the multibyte-character symbols. This is done
1522 since it might make a difference in the encoding if there is in
1523 some cases no multibyte-character but there are wide-characters.
1524 (The other way around it is not important since theencoded
1525 collation value in the wide-character case is 32 bits wide and
1526 therefore requires no encoding).
1528 The lowest collation value assigned is 2. Zero is reserved for
1529 the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm'
1530 functions and 1 is used to separate the individual passes for the
1531 different rules.
1533 We also have to construct is list with all the bytes/words which
1534 can come first in a sequence, followed by all the elements which
1535 also start with this byte/word. The order is reverse which has
1536 among others the important effect that longer strings are located
1537 first in the list. This is required for the output data since
1538 the algorithm used in `strcoll' etc depends on this.
1540 The multibyte case is easy. We simply sort into an array with
1541 256 elements. */
1542 struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1543 int mbact[nrules];
1544 int wcact;
1545 int mbseqact;
1546 int wcseqact;
1547 struct element_t *runp;
1548 int i;
1549 int need_undefined = 0;
1550 struct section_list *sect;
1551 int ruleidx;
1552 int nr_wide_elems = 0;
1554 if (collate == NULL)
1556 /* No data, no check. */
1557 if (! be_quiet)
1558 WITH_CUR_LOCALE (error (0, 0, _("No definition for %s category found"),
1559 "LC_COLLATE"));
1560 return;
1563 /* If this assertion is hit change the type in `element_t'. */
1564 assert (nrules <= sizeof (runp->used_in_level) * 8);
1566 /* Make sure that the `position' rule is used either in all sections
1567 or in none. */
1568 for (i = 0; i < nrules; ++i)
1569 for (sect = collate->sections; sect != NULL; sect = sect->next)
1570 if (sect != collate->current_section
1571 && sect->rules != NULL
1572 && ((sect->rules[i] & sort_position)
1573 != (collate->current_section->rules[i] & sort_position)))
1575 WITH_CUR_LOCALE (error (0, 0, _("\
1576 %s: `position' must be used for a specific level in all sections or none"),
1577 "LC_COLLATE"));
1578 break;
1581 /* Find out which elements are used at which level. At the same
1582 time we find out whether we have any undefined symbols. */
1583 runp = collate->start;
1584 while (runp != NULL)
1586 if (runp->mbs != NULL)
1588 for (i = 0; i < nrules; ++i)
1590 int j;
1592 for (j = 0; j < runp->weights[i].cnt; ++j)
1593 /* A NULL pointer as the weight means IGNORE. */
1594 if (runp->weights[i].w[j] != NULL)
1596 if (runp->weights[i].w[j]->weights == NULL)
1598 WITH_CUR_LOCALE (error_at_line (0, 0, runp->file,
1599 runp->line,
1600 _("symbol `%s' not defined"),
1601 runp->weights[i].w[j]->name));
1603 need_undefined = 1;
1604 runp->weights[i].w[j] = &collate->undefined;
1606 else
1607 /* Set the bit for the level. */
1608 runp->weights[i].w[j]->used_in_level |= 1 << i;
1613 /* Up to the next entry. */
1614 runp = runp->next;
1617 /* Walk through the list of defined sequences and assign weights. Also
1618 create the data structure which will allow generating the single byte
1619 character based tables.
1621 Since at each time only the weights for each of the rules are
1622 only compared to other weights for this rule it is possible to
1623 assign more compact weight values than simply counting all
1624 weights in sequence. We can assign weights from 3, one for each
1625 rule individually and only for those elements, which are actually
1626 used for this rule.
1628 Why is this important? It is not for the wide char table. But
1629 it is for the singlebyte output since here larger numbers have to
1630 be encoded to make it possible to emit the value as a byte
1631 string. */
1632 for (i = 0; i < nrules; ++i)
1633 mbact[i] = 2;
1634 wcact = 2;
1635 mbseqact = 0;
1636 wcseqact = 0;
1637 runp = collate->start;
1638 while (runp != NULL)
1640 /* Determine the order. */
1641 if (runp->used_in_level != 0)
1643 runp->mborder = (int *) obstack_alloc (&collate->mempool,
1644 nrules * sizeof (int));
1646 for (i = 0; i < nrules; ++i)
1647 if ((runp->used_in_level & (1 << i)) != 0)
1648 runp->mborder[i] = mbact[i]++;
1649 else
1650 runp->mborder[i] = 0;
1653 if (runp->mbs != NULL)
1655 struct element_t **eptr;
1656 struct element_t *lastp = NULL;
1658 /* Find the point where to insert in the list. */
1659 eptr = &collate->mbheads[((unsigned char *) runp->mbs)[0]];
1660 while (*eptr != NULL)
1662 if ((*eptr)->nmbs < runp->nmbs)
1663 break;
1665 if ((*eptr)->nmbs == runp->nmbs)
1667 int c = memcmp ((*eptr)->mbs, runp->mbs, runp->nmbs);
1669 if (c == 0)
1671 /* This should not happen. It means that we have
1672 to symbols with the same byte sequence. It is
1673 of course an error. */
1674 WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr)->file,
1675 (*eptr)->line,
1676 _("\
1677 symbol `%s' has the same encoding as"), (*eptr)->name);
1678 error_at_line (0, 0, runp->file,
1679 runp->line,
1680 _("symbol `%s'"),
1681 runp->name));
1682 goto dont_insert;
1684 else if (c < 0)
1685 /* Insert it here. */
1686 break;
1689 /* To the next entry. */
1690 lastp = *eptr;
1691 eptr = &(*eptr)->mbnext;
1694 /* Set the pointers. */
1695 runp->mbnext = *eptr;
1696 runp->mblast = lastp;
1697 if (*eptr != NULL)
1698 (*eptr)->mblast = runp;
1699 *eptr = runp;
1700 dont_insert:
1704 if (runp->used_in_level)
1706 runp->wcorder = wcact++;
1708 /* We take the opportunity to count the elements which have
1709 wide characters. */
1710 ++nr_wide_elems;
1713 if (runp->is_character)
1715 if (runp->nmbs == 1)
1716 collate->mbseqorder[((unsigned char *) runp->mbs)[0]] = mbseqact++;
1718 runp->wcseqorder = wcseqact++;
1720 else if (runp->mbs != NULL && runp->weights != NULL)
1721 /* This is for collation elements. */
1722 runp->wcseqorder = wcseqact++;
1724 /* Up to the next entry. */
1725 runp = runp->next;
1728 /* Find out whether any of the `mbheads' entries is unset. In this
1729 case we use the UNDEFINED entry. */
1730 for (i = 1; i < 256; ++i)
1731 if (collate->mbheads[i] == NULL)
1733 need_undefined = 1;
1734 collate->mbheads[i] = &collate->undefined;
1737 /* Now to the wide character case. */
1738 collate->wcheads.p = 6;
1739 collate->wcheads.q = 10;
1740 wchead_table_init (&collate->wcheads);
1742 collate->wcseqorder.p = 6;
1743 collate->wcseqorder.q = 10;
1744 collseq_table_init (&collate->wcseqorder);
1746 /* Start adding. */
1747 runp = collate->start;
1748 while (runp != NULL)
1750 if (runp->wcs != NULL)
1752 struct element_t *e;
1753 struct element_t **eptr;
1754 struct element_t *lastp;
1756 /* Insert the collation sequence value. */
1757 if (runp->is_character)
1758 collseq_table_add (&collate->wcseqorder, runp->wcs[0],
1759 runp->wcseqorder);
1761 /* Find the point where to insert in the list. */
1762 e = wchead_table_get (&collate->wcheads, runp->wcs[0]);
1763 eptr = &e;
1764 lastp = NULL;
1765 while (*eptr != NULL)
1767 if ((*eptr)->nwcs < runp->nwcs)
1768 break;
1770 if ((*eptr)->nwcs == runp->nwcs)
1772 int c = wmemcmp ((wchar_t *) (*eptr)->wcs,
1773 (wchar_t *) runp->wcs, runp->nwcs);
1775 if (c == 0)
1777 /* This should not happen. It means that we have
1778 two symbols with the same byte sequence. It is
1779 of course an error. */
1780 WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr)->file,
1781 (*eptr)->line,
1782 _("\
1783 symbol `%s' has the same encoding as"), (*eptr)->name);
1784 error_at_line (0, 0, runp->file,
1785 runp->line,
1786 _("symbol `%s'"),
1787 runp->name));
1788 goto dont_insertwc;
1790 else if (c < 0)
1791 /* Insert it here. */
1792 break;
1795 /* To the next entry. */
1796 lastp = *eptr;
1797 eptr = &(*eptr)->wcnext;
1800 /* Set the pointers. */
1801 runp->wcnext = *eptr;
1802 runp->wclast = lastp;
1803 if (*eptr != NULL)
1804 (*eptr)->wclast = runp;
1805 *eptr = runp;
1806 if (eptr == &e)
1807 wchead_table_add (&collate->wcheads, runp->wcs[0], e);
1808 dont_insertwc:
1812 /* Up to the next entry. */
1813 runp = runp->next;
1816 collseq_table_finalize (&collate->wcseqorder);
1818 /* Now determine whether the UNDEFINED entry is needed and if yes,
1819 whether it was defined. */
1820 collate->undefined.used_in_level = need_undefined ? ~0ul : 0;
1821 if (collate->undefined.file == NULL)
1823 if (need_undefined)
1825 /* This seems not to be enforced by recent standards. Don't
1826 emit an error, simply append UNDEFINED at the end. */
1827 if (0)
1828 WITH_CUR_LOCALE (error (0, 0, _("no definition of `UNDEFINED'")));
1830 /* Add UNDEFINED at the end. */
1831 collate->undefined.mborder =
1832 (int *) obstack_alloc (&collate->mempool, nrules * sizeof (int));
1834 for (i = 0; i < nrules; ++i)
1835 collate->undefined.mborder[i] = mbact[i]++;
1838 /* In any case we will need the definition for the wide character
1839 case. But we will not complain that it is missing since the
1840 specification strangely enough does not seem to account for
1841 this. */
1842 collate->undefined.wcorder = wcact++;
1845 /* Finally, try to unify the rules for the sections. Whenever the rules
1846 for a section are the same as those for another section give the
1847 ruleset the same index. Since there are never many section we can
1848 use an O(n^2) algorithm here. */
1849 sect = collate->sections;
1850 while (sect != NULL && sect->rules == NULL)
1851 sect = sect->next;
1853 /* Bail out if we have no sections because of earlier errors. */
1854 if (sect == NULL)
1856 WITH_CUR_LOCALE (error (EXIT_FAILURE, 0,
1857 _("too many errors; giving up")));
1858 return;
1861 ruleidx = 0;
1864 struct section_list *osect = collate->sections;
1866 while (osect != sect)
1867 if (osect->rules != NULL
1868 && memcmp (osect->rules, sect->rules,
1869 nrules * sizeof (osect->rules[0])) == 0)
1870 break;
1871 else
1872 osect = osect->next;
1874 if (osect == sect)
1875 sect->ruleidx = ruleidx++;
1876 else
1877 sect->ruleidx = osect->ruleidx;
1879 /* Next section. */
1881 sect = sect->next;
1882 while (sect != NULL && sect->rules == NULL);
1884 while (sect != NULL);
1885 /* We are currently not prepared for more than 128 rulesets. But this
1886 should never really be a problem. */
1887 assert (ruleidx <= 128);
1891 static int32_t
1892 output_weight (struct obstack *pool, struct locale_collate_t *collate,
1893 struct element_t *elem)
1895 size_t cnt;
1896 int32_t retval;
1898 /* Optimize the use of UNDEFINED. */
1899 if (elem == &collate->undefined)
1900 /* The weights are already inserted. */
1901 return 0;
1903 /* This byte can start exactly one collation element and this is
1904 a single byte. We can directly give the index to the weights. */
1905 retval = obstack_object_size (pool);
1907 /* Construct the weight. */
1908 for (cnt = 0; cnt < nrules; ++cnt)
1910 char buf[elem->weights[cnt].cnt * 7];
1911 int len = 0;
1912 int i;
1914 for (i = 0; i < elem->weights[cnt].cnt; ++i)
1915 /* Encode the weight value. We do nothing for IGNORE entries. */
1916 if (elem->weights[cnt].w[i] != NULL)
1917 len += utf8_encode (&buf[len],
1918 elem->weights[cnt].w[i]->mborder[cnt]);
1920 /* And add the buffer content. */
1921 obstack_1grow (pool, len);
1922 obstack_grow (pool, buf, len);
1925 return retval | ((elem->section->ruleidx & 0x7f) << 24);
1929 static int32_t
1930 output_weightwc (struct obstack *pool, struct locale_collate_t *collate,
1931 struct element_t *elem)
1933 size_t cnt;
1934 int32_t retval;
1936 /* Optimize the use of UNDEFINED. */
1937 if (elem == &collate->undefined)
1938 /* The weights are already inserted. */
1939 return 0;
1941 /* This byte can start exactly one collation element and this is
1942 a single byte. We can directly give the index to the weights. */
1943 retval = obstack_object_size (pool) / sizeof (int32_t);
1945 /* Construct the weight. */
1946 for (cnt = 0; cnt < nrules; ++cnt)
1948 int32_t buf[elem->weights[cnt].cnt];
1949 int i;
1950 int32_t j;
1952 for (i = 0, j = 0; i < elem->weights[cnt].cnt; ++i)
1953 if (elem->weights[cnt].w[i] != NULL)
1954 buf[j++] = elem->weights[cnt].w[i]->wcorder;
1956 /* And add the buffer content. */
1957 obstack_int32_grow (pool, j);
1959 obstack_grow (pool, buf, j * sizeof (int32_t));
1962 return retval | ((elem->section->ruleidx & 0x7f) << 24);
1965 /* If localedef is every threaded, this would need to be __thread var. */
1966 static struct
1968 struct obstack *weightpool;
1969 struct obstack *extrapool;
1970 struct obstack *indpool;
1971 struct locale_collate_t *collate;
1972 struct collidx_table *tablewc;
1973 } atwc;
1975 static void add_to_tablewc (uint32_t ch, struct element_t *runp);
1977 static void
1978 add_to_tablewc (uint32_t ch, struct element_t *runp)
1980 if (runp->wcnext == NULL && runp->nwcs == 1)
1982 int32_t weigthidx = output_weightwc (atwc.weightpool, atwc.collate,
1983 runp);
1984 collidx_table_add (atwc.tablewc, ch, weigthidx);
1986 else
1988 /* As for the singlebyte table, we recognize sequences and
1989 compress them. */
1990 struct element_t *lastp;
1992 collidx_table_add (atwc.tablewc, ch,
1993 -(obstack_object_size (atwc.extrapool)
1994 / sizeof (uint32_t)));
1998 /* Store the current index in the weight table. We know that
1999 the current position in the `extrapool' is aligned on a
2000 32-bit address. */
2001 int32_t weightidx;
2002 int added;
2004 /* Find out wether this is a single entry or we have more than
2005 one consecutive entry. */
2006 if (runp->wcnext != NULL
2007 && runp->nwcs == runp->wcnext->nwcs
2008 && wmemcmp ((wchar_t *) runp->wcs,
2009 (wchar_t *)runp->wcnext->wcs,
2010 runp->nwcs - 1) == 0
2011 && (runp->wcs[runp->nwcs - 1]
2012 == runp->wcnext->wcs[runp->nwcs - 1] + 1))
2014 int i;
2015 struct element_t *series_startp = runp;
2016 struct element_t *curp;
2018 /* Now add first the initial byte sequence. */
2019 added = (1 + 1 + 2 * (runp->nwcs - 1)) * sizeof (int32_t);
2020 if (sizeof (int32_t) == sizeof (int))
2021 obstack_make_room (atwc.extrapool, added);
2023 /* More than one consecutive entry. We mark this by having
2024 a negative index into the indirect table. */
2025 obstack_int32_grow_fast (atwc.extrapool,
2026 -(obstack_object_size (atwc.indpool)
2027 / sizeof (int32_t)));
2028 obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - 1);
2031 runp = runp->wcnext;
2032 while (runp->wcnext != NULL
2033 && runp->nwcs == runp->wcnext->nwcs
2034 && wmemcmp ((wchar_t *) runp->wcs,
2035 (wchar_t *)runp->wcnext->wcs,
2036 runp->nwcs - 1) == 0
2037 && (runp->wcs[runp->nwcs - 1]
2038 == runp->wcnext->wcs[runp->nwcs - 1] + 1));
2040 /* Now walk backward from here to the beginning. */
2041 curp = runp;
2043 for (i = 1; i < runp->nwcs; ++i)
2044 obstack_int32_grow_fast (atwc.extrapool, curp->wcs[i]);
2046 /* Now find the end of the consecutive sequence and
2047 add all the indeces in the indirect pool. */
2050 weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2051 curp);
2052 obstack_int32_grow (atwc.indpool, weightidx);
2054 curp = curp->wclast;
2056 while (curp != series_startp);
2058 /* Add the final weight. */
2059 weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2060 curp);
2061 obstack_int32_grow (atwc.indpool, weightidx);
2063 /* And add the end byte sequence. Without length this
2064 time. */
2065 for (i = 1; i < curp->nwcs; ++i)
2066 obstack_int32_grow (atwc.extrapool, curp->wcs[i]);
2068 else
2070 /* A single entry. Simply add the index and the length and
2071 string (except for the first character which is already
2072 tested for). */
2073 int i;
2075 /* Output the weight info. */
2076 weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2077 runp);
2079 added = (1 + 1 + runp->nwcs - 1) * sizeof (int32_t);
2080 if (sizeof (int) == sizeof (int32_t))
2081 obstack_make_room (atwc.extrapool, added);
2083 obstack_int32_grow_fast (atwc.extrapool, weightidx);
2084 obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - 1);
2085 for (i = 1; i < runp->nwcs; ++i)
2086 obstack_int32_grow_fast (atwc.extrapool, runp->wcs[i]);
2089 /* Next entry. */
2090 lastp = runp;
2091 runp = runp->wcnext;
2093 while (runp != NULL);
2097 void
2098 collate_output (struct localedef_t *locale, const struct charmap_t *charmap,
2099 const char *output_path)
2101 struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
2102 const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE);
2103 struct iovec iov[2 + nelems];
2104 struct locale_file data;
2105 uint32_t idx[nelems];
2106 size_t cnt;
2107 size_t ch;
2108 int32_t tablemb[256];
2109 struct obstack weightpool;
2110 struct obstack extrapool;
2111 struct obstack indirectpool;
2112 struct section_list *sect;
2113 struct collidx_table tablewc;
2114 uint32_t elem_size;
2115 uint32_t *elem_table;
2116 int i;
2117 struct element_t *runp;
2119 data.magic = LIMAGIC (LC_COLLATE);
2120 data.n = nelems;
2121 iov[0].iov_base = (void *) &data;
2122 iov[0].iov_len = sizeof (data);
2124 iov[1].iov_base = (void *) idx;
2125 iov[1].iov_len = sizeof (idx);
2127 idx[0] = iov[0].iov_len + iov[1].iov_len;
2128 cnt = 0;
2130 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_NRULES));
2131 iov[2 + cnt].iov_base = &nrules;
2132 iov[2 + cnt].iov_len = sizeof (uint32_t);
2133 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2134 ++cnt;
2136 /* If we have no LC_COLLATE data emit only the number of rules as zero. */
2137 if (collate == NULL)
2139 int32_t dummy = 0;
2141 while (cnt < _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE))
2143 /* The words have to be handled specially. */
2144 if (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB))
2146 iov[2 + cnt].iov_base = &dummy;
2147 iov[2 + cnt].iov_len = sizeof (int32_t);
2149 else
2151 iov[2 + cnt].iov_base = NULL;
2152 iov[2 + cnt].iov_len = 0;
2155 if (cnt + 1 < _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE))
2156 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2157 ++cnt;
2160 assert (cnt == _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE));
2162 write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", 2 + cnt, iov);
2164 return;
2167 obstack_init (&weightpool);
2168 obstack_init (&extrapool);
2169 obstack_init (&indirectpool);
2171 /* Since we are using the sign of an integer to mark indirection the
2172 offsets in the arrays we are indirectly referring to must not be
2173 zero since -0 == 0. Therefore we add a bit of dummy content. */
2174 obstack_int32_grow (&extrapool, 0);
2175 obstack_int32_grow (&indirectpool, 0);
2177 /* Prepare the ruleset table. */
2178 for (sect = collate->sections, i = 0; sect != NULL; sect = sect->next)
2179 if (sect->rules != NULL && sect->ruleidx == i)
2181 int j;
2183 obstack_make_room (&weightpool, nrules);
2185 for (j = 0; j < nrules; ++j)
2186 obstack_1grow_fast (&weightpool, sect->rules[j]);
2187 ++i;
2189 /* And align the output. */
2190 i = (nrules * i) % __alignof__ (int32_t);
2191 if (i > 0)
2193 obstack_1grow (&weightpool, '\0');
2194 while (++i < __alignof__ (int32_t));
2196 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_RULESETS));
2197 iov[2 + cnt].iov_len = obstack_object_size (&weightpool);
2198 iov[2 + cnt].iov_base = obstack_finish (&weightpool);
2199 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2200 ++cnt;
2202 /* Generate the 8-bit table. Walk through the lists of sequences
2203 starting with the same byte and add them one after the other to
2204 the table. In case we have more than one sequence starting with
2205 the same byte we have to use extra indirection.
2207 First add a record for the NUL byte. This entry will never be used
2208 so it does not matter. */
2209 tablemb[0] = 0;
2211 /* Now insert the `UNDEFINED' value if it is used. Since this value
2212 will probably be used more than once it is good to store the
2213 weights only once. */
2214 if (collate->undefined.used_in_level != 0)
2215 output_weight (&weightpool, collate, &collate->undefined);
2217 for (ch = 1; ch < 256; ++ch)
2218 if (collate->mbheads[ch]->mbnext == NULL
2219 && collate->mbheads[ch]->nmbs <= 1)
2221 tablemb[ch] = output_weight (&weightpool, collate,
2222 collate->mbheads[ch]);
2224 else
2226 /* The entries in the list are sorted by length and then
2227 alphabetically. This is the order in which we will add the
2228 elements to the collation table. This allows simply walking
2229 the table in sequence and stopping at the first matching
2230 entry. Since the longer sequences are coming first in the
2231 list they have the possibility to match first, just as it
2232 has to be. In the worst case we are walking to the end of
2233 the list where we put, if no singlebyte sequence is defined
2234 in the locale definition, the weights for UNDEFINED.
2236 To reduce the length of the search list we compress them a bit.
2237 This happens by collecting sequences of consecutive byte
2238 sequences in one entry (having and begin and end byte sequence)
2239 and add only one index into the weight table. We can find the
2240 consecutive entries since they are also consecutive in the list. */
2241 struct element_t *runp = collate->mbheads[ch];
2242 struct element_t *lastp;
2244 assert ((obstack_object_size (&extrapool)
2245 & (__alignof__ (int32_t) - 1)) == 0);
2247 tablemb[ch] = -obstack_object_size (&extrapool);
2251 /* Store the current index in the weight table. We know that
2252 the current position in the `extrapool' is aligned on a
2253 32-bit address. */
2254 int32_t weightidx;
2255 int added;
2257 /* Find out wether this is a single entry or we have more than
2258 one consecutive entry. */
2259 if (runp->mbnext != NULL
2260 && runp->nmbs == runp->mbnext->nmbs
2261 && memcmp (runp->mbs, runp->mbnext->mbs, runp->nmbs - 1) == 0
2262 && (runp->mbs[runp->nmbs - 1]
2263 == runp->mbnext->mbs[runp->nmbs - 1] + 1))
2265 int i;
2266 struct element_t *series_startp = runp;
2267 struct element_t *curp;
2269 /* Compute how much space we will need. */
2270 added = ((sizeof (int32_t) + 1 + 2 * (runp->nmbs - 1)
2271 + __alignof__ (int32_t) - 1)
2272 & ~(__alignof__ (int32_t) - 1));
2273 assert ((obstack_object_size (&extrapool)
2274 & (__alignof__ (int32_t) - 1)) == 0);
2275 obstack_make_room (&extrapool, added);
2277 /* More than one consecutive entry. We mark this by having
2278 a negative index into the indirect table. */
2279 obstack_int32_grow_fast (&extrapool,
2280 -(obstack_object_size (&indirectpool)
2281 / sizeof (int32_t)));
2283 /* Now search first the end of the series. */
2285 runp = runp->mbnext;
2286 while (runp->mbnext != NULL
2287 && runp->nmbs == runp->mbnext->nmbs
2288 && memcmp (runp->mbs, runp->mbnext->mbs,
2289 runp->nmbs - 1) == 0
2290 && (runp->mbs[runp->nmbs - 1]
2291 == runp->mbnext->mbs[runp->nmbs - 1] + 1));
2293 /* Now walk backward from here to the beginning. */
2294 curp = runp;
2296 assert (runp->nmbs <= 256);
2297 obstack_1grow_fast (&extrapool, curp->nmbs - 1);
2298 for (i = 1; i < curp->nmbs; ++i)
2299 obstack_1grow_fast (&extrapool, curp->mbs[i]);
2301 /* Now find the end of the consecutive sequence and
2302 add all the indeces in the indirect pool. */
2305 weightidx = output_weight (&weightpool, collate, curp);
2306 obstack_int32_grow (&indirectpool, weightidx);
2308 curp = curp->mblast;
2310 while (curp != series_startp);
2312 /* Add the final weight. */
2313 weightidx = output_weight (&weightpool, collate, curp);
2314 obstack_int32_grow (&indirectpool, weightidx);
2316 /* And add the end byte sequence. Without length this
2317 time. */
2318 for (i = 1; i < curp->nmbs; ++i)
2319 obstack_1grow_fast (&extrapool, curp->mbs[i]);
2321 else
2323 /* A single entry. Simply add the index and the length and
2324 string (except for the first character which is already
2325 tested for). */
2326 int i;
2328 /* Output the weight info. */
2329 weightidx = output_weight (&weightpool, collate, runp);
2331 added = ((sizeof (int32_t) + 1 + runp->nmbs - 1
2332 + __alignof__ (int32_t) - 1)
2333 & ~(__alignof__ (int32_t) - 1));
2334 assert ((obstack_object_size (&extrapool)
2335 & (__alignof__ (int32_t) - 1)) == 0);
2336 obstack_make_room (&extrapool, added);
2338 obstack_int32_grow_fast (&extrapool, weightidx);
2339 assert (runp->nmbs <= 256);
2340 obstack_1grow_fast (&extrapool, runp->nmbs - 1);
2342 for (i = 1; i < runp->nmbs; ++i)
2343 obstack_1grow_fast (&extrapool, runp->mbs[i]);
2346 /* Add alignment bytes if necessary. */
2347 while ((obstack_object_size (&extrapool)
2348 & (__alignof__ (int32_t) - 1)) != 0)
2349 obstack_1grow_fast (&extrapool, '\0');
2351 /* Next entry. */
2352 lastp = runp;
2353 runp = runp->mbnext;
2355 while (runp != NULL);
2357 assert ((obstack_object_size (&extrapool)
2358 & (__alignof__ (int32_t) - 1)) == 0);
2360 /* If the final entry in the list is not a single character we
2361 add an UNDEFINED entry here. */
2362 if (lastp->nmbs != 1)
2364 int added = ((sizeof (int32_t) + 1 + 1 + __alignof__ (int32_t) - 1)
2365 & ~(__alignof__ (int32_t) - 1));
2366 obstack_make_room (&extrapool, added);
2368 obstack_int32_grow_fast (&extrapool, 0);
2369 /* XXX What rule? We just pick the first. */
2370 obstack_1grow_fast (&extrapool, 0);
2371 /* Length is zero. */
2372 obstack_1grow_fast (&extrapool, 0);
2374 /* Add alignment bytes if necessary. */
2375 while ((obstack_object_size (&extrapool)
2376 & (__alignof__ (int32_t) - 1)) != 0)
2377 obstack_1grow_fast (&extrapool, '\0');
2381 /* Add padding to the tables if necessary. */
2382 while ((obstack_object_size (&weightpool) & (__alignof__ (int32_t) - 1))
2383 != 0)
2384 obstack_1grow (&weightpool, 0);
2386 /* Now add the four tables. */
2387 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_TABLEMB));
2388 iov[2 + cnt].iov_base = tablemb;
2389 iov[2 + cnt].iov_len = sizeof (tablemb);
2390 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2391 assert ((iov[2 + cnt].iov_len & (__alignof__ (int32_t) - 1)) == 0);
2392 ++cnt;
2394 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_WEIGHTMB));
2395 iov[2 + cnt].iov_len = obstack_object_size (&weightpool);
2396 iov[2 + cnt].iov_base = obstack_finish (&weightpool);
2397 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2398 ++cnt;
2400 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_EXTRAMB));
2401 iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
2402 iov[2 + cnt].iov_base = obstack_finish (&extrapool);
2403 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2404 ++cnt;
2406 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_INDIRECTMB));
2407 iov[2 + cnt].iov_len = obstack_object_size (&indirectpool);
2408 iov[2 + cnt].iov_base = obstack_finish (&indirectpool);
2409 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2410 assert ((iov[2 + cnt].iov_len & (__alignof__ (int32_t) - 1)) == 0);
2411 ++cnt;
2414 /* Now the same for the wide character table. We need to store some
2415 more information here. */
2416 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_GAP1));
2417 iov[2 + cnt].iov_base = NULL;
2418 iov[2 + cnt].iov_len = 0;
2419 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2420 assert (idx[cnt] % __alignof__ (int32_t) == 0);
2421 ++cnt;
2423 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_GAP2));
2424 iov[2 + cnt].iov_base = NULL;
2425 iov[2 + cnt].iov_len = 0;
2426 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2427 assert (idx[cnt] % __alignof__ (int32_t) == 0);
2428 ++cnt;
2430 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_GAP3));
2431 iov[2 + cnt].iov_base = NULL;
2432 iov[2 + cnt].iov_len = 0;
2433 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2434 assert (idx[cnt] % __alignof__ (int32_t) == 0);
2435 ++cnt;
2437 /* Since we are using the sign of an integer to mark indirection the
2438 offsets in the arrays we are indirectly referring to must not be
2439 zero since -0 == 0. Therefore we add a bit of dummy content. */
2440 obstack_int32_grow (&extrapool, 0);
2441 obstack_int32_grow (&indirectpool, 0);
2443 /* Now insert the `UNDEFINED' value if it is used. Since this value
2444 will probably be used more than once it is good to store the
2445 weights only once. */
2446 if (output_weightwc (&weightpool, collate, &collate->undefined) != 0)
2447 abort ();
2449 /* Generate the table. Walk through the lists of sequences starting
2450 with the same wide character and add them one after the other to
2451 the table. In case we have more than one sequence starting with
2452 the same byte we have to use extra indirection. */
2453 tablewc.p = 6;
2454 tablewc.q = 10;
2455 collidx_table_init (&tablewc);
2457 atwc.weightpool = &weightpool;
2458 atwc.extrapool = &extrapool;
2459 atwc.indpool = &indirectpool;
2460 atwc.collate = collate;
2461 atwc.tablewc = &tablewc;
2463 wchead_table_iterate (&collate->wcheads, add_to_tablewc);
2465 memset (&atwc, 0, sizeof (atwc));
2467 collidx_table_finalize (&tablewc);
2469 /* Now add the four tables. */
2470 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_TABLEWC));
2471 iov[2 + cnt].iov_base = tablewc.result;
2472 iov[2 + cnt].iov_len = tablewc.result_size;
2473 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2474 assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2475 assert (idx[cnt] % __alignof__ (int32_t) == 0);
2476 ++cnt;
2478 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_WEIGHTWC));
2479 iov[2 + cnt].iov_len = obstack_object_size (&weightpool);
2480 iov[2 + cnt].iov_base = obstack_finish (&weightpool);
2481 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2482 assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2483 assert (idx[cnt] % __alignof__ (int32_t) == 0);
2484 ++cnt;
2486 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_EXTRAWC));
2487 iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
2488 iov[2 + cnt].iov_base = obstack_finish (&extrapool);
2489 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2490 assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2491 assert (idx[cnt] % __alignof__ (int32_t) == 0);
2492 ++cnt;
2494 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_INDIRECTWC));
2495 iov[2 + cnt].iov_len = obstack_object_size (&indirectpool);
2496 iov[2 + cnt].iov_base = obstack_finish (&indirectpool);
2497 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2498 assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2499 assert (idx[cnt] % __alignof__ (int32_t) == 0);
2500 ++cnt;
2503 /* Finally write the table with collation element names out. It is
2504 a hash table with a simple function which gets the name of the
2505 character as the input. One character might have many names. The
2506 value associated with the name is an index into the weight table
2507 where we are then interested in the first-level weight value.
2509 To determine how large the table should be we are counting the
2510 elements have to put in. Since we are using internal chaining
2511 using a secondary hash function we have to make the table a bit
2512 larger to avoid extremely long search times. We can achieve
2513 good results with a 40% larger table than there are entries. */
2514 elem_size = 0;
2515 runp = collate->start;
2516 while (runp != NULL)
2518 if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
2519 /* Yep, the element really counts. */
2520 ++elem_size;
2522 runp = runp->next;
2524 /* Add 40% and find the next prime number. */
2525 elem_size = next_prime (elem_size * 1.4);
2527 /* Allocate the table. Each entry consists of two words: the hash
2528 value and an index in a secondary table which provides the index
2529 into the weight table and the string itself (so that a match can
2530 be determined). */
2531 elem_table = (uint32_t *) obstack_alloc (&extrapool,
2532 elem_size * 2 * sizeof (uint32_t));
2533 memset (elem_table, '\0', elem_size * 2 * sizeof (uint32_t));
2535 /* Now add the elements. */
2536 runp = collate->start;
2537 while (runp != NULL)
2539 if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
2541 /* Compute the hash value of the name. */
2542 uint32_t namelen = strlen (runp->name);
2543 uint32_t hash = elem_hash (runp->name, namelen);
2544 size_t idx = hash % elem_size;
2545 #ifndef NDEBUG
2546 size_t start_idx = idx;
2547 #endif
2549 if (elem_table[idx * 2] != 0)
2551 /* The spot is already taken. Try iterating using the value
2552 from the secondary hashing function. */
2553 size_t iter = hash % (elem_size - 2) + 1;
2557 idx += iter;
2558 if (idx >= elem_size)
2559 idx -= elem_size;
2560 assert (idx != start_idx);
2562 while (elem_table[idx * 2] != 0);
2564 /* This is the spot where we will insert the value. */
2565 elem_table[idx * 2] = hash;
2566 elem_table[idx * 2 + 1] = obstack_object_size (&extrapool);
2568 /* The the string itself including length. */
2569 obstack_1grow (&extrapool, namelen);
2570 obstack_grow (&extrapool, runp->name, namelen);
2572 /* And the multibyte representation. */
2573 obstack_1grow (&extrapool, runp->nmbs);
2574 obstack_grow (&extrapool, runp->mbs, runp->nmbs);
2576 /* And align again to 32 bits. */
2577 if ((1 + namelen + 1 + runp->nmbs) % sizeof (int32_t) != 0)
2578 obstack_grow (&extrapool, "\0\0",
2579 (sizeof (int32_t)
2580 - ((1 + namelen + 1 + runp->nmbs)
2581 % sizeof (int32_t))));
2583 /* Now some 32-bit values: multibyte collation sequence,
2584 wide char string (including length), and wide char
2585 collation sequence. */
2586 obstack_int32_grow (&extrapool, runp->mbseqorder);
2588 obstack_int32_grow (&extrapool, runp->nwcs);
2589 obstack_grow (&extrapool, runp->wcs,
2590 runp->nwcs * sizeof (uint32_t));
2592 obstack_int32_grow (&extrapool, runp->wcseqorder);
2595 runp = runp->next;
2598 /* Prepare to write out this data. */
2599 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB));
2600 iov[2 + cnt].iov_base = &elem_size;
2601 iov[2 + cnt].iov_len = sizeof (int32_t);
2602 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2603 assert (idx[cnt] % __alignof__ (int32_t) == 0);
2604 ++cnt;
2606 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_TABLEMB));
2607 iov[2 + cnt].iov_base = elem_table;
2608 iov[2 + cnt].iov_len = elem_size * 2 * sizeof (int32_t);
2609 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2610 assert (idx[cnt] % __alignof__ (int32_t) == 0);
2611 ++cnt;
2613 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_EXTRAMB));
2614 iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
2615 iov[2 + cnt].iov_base = obstack_finish (&extrapool);
2616 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2617 ++cnt;
2619 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQMB));
2620 iov[2 + cnt].iov_base = collate->mbseqorder;
2621 iov[2 + cnt].iov_len = 256;
2622 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2623 ++cnt;
2625 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQWC));
2626 iov[2 + cnt].iov_base = collate->wcseqorder.result;
2627 iov[2 + cnt].iov_len = collate->wcseqorder.result_size;
2628 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2629 assert (idx[cnt] % __alignof__ (int32_t) == 0);
2630 ++cnt;
2632 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_CODESET));
2633 iov[2 + cnt].iov_base = (void *) charmap->code_set_name;
2634 iov[2 + cnt].iov_len = strlen (iov[2 + cnt].iov_base) + 1;
2635 ++cnt;
2637 assert (cnt == _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE));
2639 write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", 2 + cnt, iov);
2641 obstack_free (&weightpool, NULL);
2642 obstack_free (&extrapool, NULL);
2643 obstack_free (&indirectpool, NULL);
2647 static enum token_t
2648 skip_to (struct linereader *ldfile, struct locale_collate_t *collate,
2649 const struct charmap_t *charmap, int to_endif)
2651 while (1)
2653 struct token *now = lr_token (ldfile, charmap, NULL, NULL, 0);
2654 enum token_t nowtok = now->tok;
2656 if (nowtok == tok_eof || nowtok == tok_end)
2657 return nowtok;
2659 if (nowtok == tok_ifdef || nowtok == tok_ifndef)
2661 lr_error (ldfile, _("%s: nested conditionals not supported"),
2662 "LC_COLLATE");
2663 nowtok = skip_to (ldfile, collate, charmap, tok_endif);
2664 if (nowtok == tok_eof || nowtok == tok_end)
2665 return nowtok;
2667 else if (nowtok == tok_endif || (!to_endif && nowtok == tok_else))
2669 lr_ignore_rest (ldfile, 1);
2670 return nowtok;
2672 else if (!to_endif && (nowtok == tok_elifdef || nowtok == tok_elifndef))
2674 /* Do not read the rest of the line. */
2675 return nowtok;
2677 else if (nowtok == tok_else)
2679 lr_error (ldfile, _("%s: more then one 'else'"), "LC_COLLATE");
2682 lr_ignore_rest (ldfile, 0);
2687 void
2688 collate_read (struct linereader *ldfile, struct localedef_t *result,
2689 const struct charmap_t *charmap, const char *repertoire_name,
2690 int ignore_content)
2692 struct repertoire_t *repertoire = NULL;
2693 struct locale_collate_t *collate;
2694 struct token *now;
2695 struct token *arg = NULL;
2696 enum token_t nowtok;
2697 enum token_t was_ellipsis = tok_none;
2698 struct localedef_t *copy_locale = NULL;
2699 /* Parsing state:
2700 0 - start
2701 1 - between `order-start' and `order-end'
2702 2 - after `order-end'
2703 3 - after `reorder-after', waiting for `reorder-end'
2704 4 - after `reorder-end'
2705 5 - after `reorder-sections-after', waiting for `reorder-sections-end'
2706 6 - after `reorder-sections-end'
2708 int state = 0;
2710 /* Get the repertoire we have to use. */
2711 if (repertoire_name != NULL)
2712 repertoire = repertoire_read (repertoire_name);
2714 /* The rest of the line containing `LC_COLLATE' must be free. */
2715 lr_ignore_rest (ldfile, 1);
2717 while (1)
2721 now = lr_token (ldfile, charmap, result, NULL, verbose);
2722 nowtok = now->tok;
2724 while (nowtok == tok_eol);
2726 if (nowtok != tok_define)
2727 break;
2729 if (ignore_content)
2730 lr_ignore_rest (ldfile, 0);
2731 else
2733 arg = lr_token (ldfile, charmap, result, NULL, verbose);
2734 if (arg->tok != tok_ident)
2735 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2736 else
2738 /* Simply add the new symbol. */
2739 struct name_list *newsym = xmalloc (sizeof (*newsym)
2740 + arg->val.str.lenmb + 1);
2741 memcpy (newsym->str, arg->val.str.startmb, arg->val.str.lenmb);
2742 newsym->str[arg->val.str.lenmb] = '\0';
2743 newsym->next = defined;
2744 defined = newsym;
2746 lr_ignore_rest (ldfile, 1);
2751 if (nowtok == tok_copy)
2753 now = lr_token (ldfile, charmap, result, NULL, verbose);
2754 if (now->tok != tok_string)
2756 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2758 skip_category:
2760 now = lr_token (ldfile, charmap, result, NULL, verbose);
2761 while (now->tok != tok_eof && now->tok != tok_end);
2763 if (now->tok != tok_eof
2764 || (now = lr_token (ldfile, charmap, result, NULL, verbose),
2765 now->tok == tok_eof))
2766 lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
2767 else if (now->tok != tok_lc_collate)
2769 lr_error (ldfile, _("\
2770 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
2771 lr_ignore_rest (ldfile, 0);
2773 else
2774 lr_ignore_rest (ldfile, 1);
2776 return;
2779 if (! ignore_content)
2781 /* Get the locale definition. */
2782 copy_locale = load_locale (LC_COLLATE, now->val.str.startmb,
2783 repertoire_name, charmap, NULL);
2784 if ((copy_locale->avail & COLLATE_LOCALE) == 0)
2786 /* Not yet loaded. So do it now. */
2787 if (locfile_read (copy_locale, charmap) != 0)
2788 goto skip_category;
2791 if (copy_locale->categories[LC_COLLATE].collate == NULL)
2792 return;
2795 lr_ignore_rest (ldfile, 1);
2797 now = lr_token (ldfile, charmap, result, NULL, verbose);
2798 nowtok = now->tok;
2801 /* Prepare the data structures. */
2802 collate_startup (ldfile, result, copy_locale, ignore_content);
2803 collate = result->categories[LC_COLLATE].collate;
2805 while (1)
2807 char ucs4buf[10];
2808 char *symstr;
2809 size_t symlen;
2811 /* Of course we don't proceed beyond the end of file. */
2812 if (nowtok == tok_eof)
2813 break;
2815 /* Ingore empty lines. */
2816 if (nowtok == tok_eol)
2818 now = lr_token (ldfile, charmap, result, NULL, verbose);
2819 nowtok = now->tok;
2820 continue;
2823 switch (nowtok)
2825 case tok_copy:
2826 /* Allow copying other locales. */
2827 now = lr_token (ldfile, charmap, result, NULL, verbose);
2828 if (now->tok != tok_string)
2829 goto err_label;
2831 if (! ignore_content)
2832 load_locale (LC_COLLATE, now->val.str.startmb, repertoire_name,
2833 charmap, result);
2835 lr_ignore_rest (ldfile, 1);
2836 break;
2838 case tok_coll_weight_max:
2839 /* Ignore the rest of the line if we don't need the input of
2840 this line. */
2841 if (ignore_content)
2843 lr_ignore_rest (ldfile, 0);
2844 break;
2847 if (state != 0)
2848 goto err_label;
2850 arg = lr_token (ldfile, charmap, result, NULL, verbose);
2851 if (arg->tok != tok_number)
2852 goto err_label;
2853 if (collate->col_weight_max != -1)
2854 lr_error (ldfile, _("%s: duplicate definition of `%s'"),
2855 "LC_COLLATE", "col_weight_max");
2856 else
2857 collate->col_weight_max = arg->val.num;
2858 lr_ignore_rest (ldfile, 1);
2859 break;
2861 case tok_section_symbol:
2862 /* Ignore the rest of the line if we don't need the input of
2863 this line. */
2864 if (ignore_content)
2866 lr_ignore_rest (ldfile, 0);
2867 break;
2870 if (state != 0)
2871 goto err_label;
2873 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2874 if (arg->tok != tok_bsymbol)
2875 goto err_label;
2876 else if (!ignore_content)
2878 /* Check whether this section is already known. */
2879 struct section_list *known = collate->sections;
2880 while (known != NULL)
2882 if (strcmp (known->name, arg->val.str.startmb) == 0)
2883 break;
2884 known = known->next;
2887 if (known != NULL)
2889 lr_error (ldfile,
2890 _("%s: duplicate declaration of section `%s'"),
2891 "LC_COLLATE", arg->val.str.startmb);
2892 free (arg->val.str.startmb);
2894 else
2895 collate->sections = make_seclist_elem (collate,
2896 arg->val.str.startmb,
2897 collate->sections);
2899 lr_ignore_rest (ldfile, known == NULL);
2901 else
2903 free (arg->val.str.startmb);
2904 lr_ignore_rest (ldfile, 0);
2906 break;
2908 case tok_collating_element:
2909 /* Ignore the rest of the line if we don't need the input of
2910 this line. */
2911 if (ignore_content)
2913 lr_ignore_rest (ldfile, 0);
2914 break;
2917 if (state != 0 && state != 2)
2918 goto err_label;
2920 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2921 if (arg->tok != tok_bsymbol)
2922 goto err_label;
2923 else
2925 const char *symbol = arg->val.str.startmb;
2926 size_t symbol_len = arg->val.str.lenmb;
2928 /* Next the `from' keyword. */
2929 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2930 if (arg->tok != tok_from)
2932 free ((char *) symbol);
2933 goto err_label;
2936 ldfile->return_widestr = 1;
2937 ldfile->translate_strings = 1;
2939 /* Finally the string with the replacement. */
2940 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2942 ldfile->return_widestr = 0;
2943 ldfile->translate_strings = 0;
2945 if (arg->tok != tok_string)
2946 goto err_label;
2948 if (!ignore_content && symbol != NULL)
2950 /* The name is already defined. */
2951 if (check_duplicate (ldfile, collate, charmap,
2952 repertoire, symbol, symbol_len))
2953 goto col_elem_free;
2955 if (arg->val.str.startmb != NULL)
2956 insert_entry (&collate->elem_table, symbol, symbol_len,
2957 new_element (collate,
2958 arg->val.str.startmb,
2959 arg->val.str.lenmb - 1,
2960 arg->val.str.startwc,
2961 symbol, symbol_len, 0));
2963 else
2965 col_elem_free:
2966 free ((char *) symbol);
2967 free (arg->val.str.startmb);
2968 free (arg->val.str.startwc);
2970 lr_ignore_rest (ldfile, 1);
2972 break;
2974 case tok_collating_symbol:
2975 /* Ignore the rest of the line if we don't need the input of
2976 this line. */
2977 if (ignore_content)
2979 lr_ignore_rest (ldfile, 0);
2980 break;
2983 if (state != 0 && state != 2)
2984 goto err_label;
2986 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2987 if (arg->tok != tok_bsymbol)
2988 goto err_label;
2989 else
2991 char *symbol = arg->val.str.startmb;
2992 size_t symbol_len = arg->val.str.lenmb;
2993 char *endsymbol = NULL;
2994 size_t endsymbol_len = 0;
2995 enum token_t ellipsis = tok_none;
2997 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2998 if (arg->tok == tok_ellipsis2 || arg->tok == tok_ellipsis4)
3000 ellipsis = arg->tok;
3002 arg = lr_token (ldfile, charmap, result, repertoire,
3003 verbose);
3004 if (arg->tok != tok_bsymbol)
3006 free (symbol);
3007 goto err_label;
3010 endsymbol = arg->val.str.startmb;
3011 endsymbol_len = arg->val.str.lenmb;
3013 lr_ignore_rest (ldfile, 1);
3015 else if (arg->tok != tok_eol)
3017 free (symbol);
3018 goto err_label;
3021 if (!ignore_content)
3023 if (symbol == NULL
3024 || (ellipsis != tok_none && endsymbol == NULL))
3026 lr_error (ldfile, _("\
3027 %s: unknown character in collating symbol name"),
3028 "LC_COLLATE");
3029 goto col_sym_free;
3031 else if (ellipsis == tok_none)
3033 /* A single symbol, no ellipsis. */
3034 if (check_duplicate (ldfile, collate, charmap,
3035 repertoire, symbol, symbol_len))
3036 /* The name is already defined. */
3037 goto col_sym_free;
3039 insert_entry (&collate->sym_table, symbol, symbol_len,
3040 new_symbol (collate, symbol, symbol_len));
3042 else if (symbol_len != endsymbol_len)
3044 col_sym_inv_range:
3045 lr_error (ldfile,
3046 _("invalid names for character range"));
3047 goto col_sym_free;
3049 else
3051 /* Oh my, we have to handle an ellipsis. First, as
3052 usual, determine the common prefix and then
3053 convert the rest into a range. */
3054 size_t prefixlen;
3055 unsigned long int from;
3056 unsigned long int to;
3057 char *endp;
3059 for (prefixlen = 0; prefixlen < symbol_len; ++prefixlen)
3060 if (symbol[prefixlen] != endsymbol[prefixlen])
3061 break;
3063 /* Convert the rest into numbers. */
3064 symbol[symbol_len] = '\0';
3065 from = strtoul (&symbol[prefixlen], &endp,
3066 ellipsis == tok_ellipsis2 ? 16 : 10);
3067 if (*endp != '\0')
3068 goto col_sym_inv_range;
3070 endsymbol[symbol_len] = '\0';
3071 to = strtoul (&endsymbol[prefixlen], &endp,
3072 ellipsis == tok_ellipsis2 ? 16 : 10);
3073 if (*endp != '\0')
3074 goto col_sym_inv_range;
3076 if (from > to)
3077 goto col_sym_inv_range;
3079 /* Now loop over all entries. */
3080 while (from <= to)
3082 char *symbuf;
3084 symbuf = (char *) obstack_alloc (&collate->mempool,
3085 symbol_len + 1);
3087 /* Create the name. */
3088 sprintf (symbuf,
3089 ellipsis == tok_ellipsis2
3090 ? "%.*s%.*lX" : "%.*s%.*lu",
3091 (int) prefixlen, symbol,
3092 (int) (symbol_len - prefixlen), from);
3094 if (check_duplicate (ldfile, collate, charmap,
3095 repertoire, symbuf, symbol_len))
3096 /* The name is already defined. */
3097 goto col_sym_free;
3099 insert_entry (&collate->sym_table, symbuf,
3100 symbol_len,
3101 new_symbol (collate, symbuf,
3102 symbol_len));
3104 /* Increment the counter. */
3105 ++from;
3108 goto col_sym_free;
3111 else
3113 col_sym_free:
3114 free (symbol);
3115 free (endsymbol);
3118 break;
3120 case tok_symbol_equivalence:
3121 /* Ignore the rest of the line if we don't need the input of
3122 this line. */
3123 if (ignore_content)
3125 lr_ignore_rest (ldfile, 0);
3126 break;
3129 if (state != 0)
3130 goto err_label;
3132 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3133 if (arg->tok != tok_bsymbol)
3134 goto err_label;
3135 else
3137 const char *newname = arg->val.str.startmb;
3138 size_t newname_len = arg->val.str.lenmb;
3139 const char *symname;
3140 size_t symname_len;
3141 void *symval; /* Actually struct symbol_t* */
3143 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3144 if (arg->tok != tok_bsymbol)
3146 free ((char *) newname);
3147 goto err_label;
3150 symname = arg->val.str.startmb;
3151 symname_len = arg->val.str.lenmb;
3153 if (newname == NULL)
3155 lr_error (ldfile, _("\
3156 %s: unknown character in equivalent definition name"),
3157 "LC_COLLATE");
3159 sym_equiv_free:
3160 free ((char *) newname);
3161 free ((char *) symname);
3162 break;
3164 if (symname == NULL)
3166 lr_error (ldfile, _("\
3167 %s: unknown character in equivalent definition value"),
3168 "LC_COLLATE");
3169 goto sym_equiv_free;
3172 /* See whether the symbol name is already defined. */
3173 if (find_entry (&collate->sym_table, symname, symname_len,
3174 &symval) != 0)
3176 lr_error (ldfile, _("\
3177 %s: unknown symbol `%s' in equivalent definition"),
3178 "LC_COLLATE", symname);
3179 goto sym_equiv_free;
3182 if (insert_entry (&collate->sym_table,
3183 newname, newname_len, symval) < 0)
3185 lr_error (ldfile, _("\
3186 error while adding equivalent collating symbol"));
3187 goto sym_equiv_free;
3190 free ((char *) symname);
3192 lr_ignore_rest (ldfile, 1);
3193 break;
3195 case tok_script:
3196 /* Ignore the rest of the line if we don't need the input of
3197 this line. */
3198 if (ignore_content)
3200 lr_ignore_rest (ldfile, 0);
3201 break;
3204 /* We get told about the scripts we know. */
3205 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3206 if (arg->tok != tok_bsymbol)
3207 goto err_label;
3208 else
3210 struct section_list *runp = collate->known_sections;
3211 char *name;
3213 while (runp != NULL)
3214 if (strncmp (runp->name, arg->val.str.startmb,
3215 arg->val.str.lenmb) == 0
3216 && runp->name[arg->val.str.lenmb] == '\0')
3217 break;
3218 else
3219 runp = runp->def_next;
3221 if (runp != NULL)
3223 lr_error (ldfile, _("duplicate definition of script `%s'"),
3224 runp->name);
3225 lr_ignore_rest (ldfile, 0);
3226 break;
3229 runp = (struct section_list *) xcalloc (1, sizeof (*runp));
3230 name = (char *) xmalloc (arg->val.str.lenmb + 1);
3231 memcpy (name, arg->val.str.startmb, arg->val.str.lenmb);
3232 name[arg->val.str.lenmb] = '\0';
3233 runp->name = name;
3235 runp->def_next = collate->known_sections;
3236 collate->known_sections = runp;
3238 lr_ignore_rest (ldfile, 1);
3239 break;
3241 case tok_order_start:
3242 /* Ignore the rest of the line if we don't need the input of
3243 this line. */
3244 if (ignore_content)
3246 lr_ignore_rest (ldfile, 0);
3247 break;
3250 if (state != 0 && state != 1 && state != 2)
3251 goto err_label;
3252 state = 1;
3254 /* The 14652 draft does not specify whether all `order_start' lines
3255 must contain the same number of sort-rules, but 14651 does. So
3256 we require this here as well. */
3257 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3258 if (arg->tok == tok_bsymbol)
3260 /* This better should be a section name. */
3261 struct section_list *sp = collate->known_sections;
3262 while (sp != NULL
3263 && (sp->name == NULL
3264 || strncmp (sp->name, arg->val.str.startmb,
3265 arg->val.str.lenmb) != 0
3266 || sp->name[arg->val.str.lenmb] != '\0'))
3267 sp = sp->def_next;
3269 if (sp == NULL)
3271 lr_error (ldfile, _("\
3272 %s: unknown section name `%.*s'"),
3273 "LC_COLLATE", (int) arg->val.str.lenmb,
3274 arg->val.str.startmb);
3275 /* We use the error section. */
3276 collate->current_section = &collate->error_section;
3278 if (collate->error_section.first == NULL)
3280 /* Insert &collate->error_section at the end of
3281 the collate->sections list. */
3282 if (collate->sections == NULL)
3283 collate->sections = &collate->error_section;
3284 else
3286 sp = collate->sections;
3287 while (sp->next != NULL)
3288 sp = sp->next;
3290 sp->next = &collate->error_section;
3292 collate->error_section.next = NULL;
3295 else
3297 /* One should not be allowed to open the same
3298 section twice. */
3299 if (sp->first != NULL)
3300 lr_error (ldfile, _("\
3301 %s: multiple order definitions for section `%s'"),
3302 "LC_COLLATE", sp->name);
3303 else
3305 /* Insert sp in the collate->sections list,
3306 right after collate->current_section. */
3307 if (collate->current_section != NULL)
3309 sp->next = collate->current_section->next;
3310 collate->current_section->next = sp;
3312 else if (collate->sections == NULL)
3313 /* This is the first section to be defined. */
3314 collate->sections = sp;
3316 collate->current_section = sp;
3319 /* Next should come the end of the line or a semicolon. */
3320 arg = lr_token (ldfile, charmap, result, repertoire,
3321 verbose);
3322 if (arg->tok == tok_eol)
3324 uint32_t cnt;
3326 /* This means we have exactly one rule: `forward'. */
3327 if (nrules > 1)
3328 lr_error (ldfile, _("\
3329 %s: invalid number of sorting rules"),
3330 "LC_COLLATE");
3331 else
3332 nrules = 1;
3333 sp->rules = obstack_alloc (&collate->mempool,
3334 (sizeof (enum coll_sort_rule)
3335 * nrules));
3336 for (cnt = 0; cnt < nrules; ++cnt)
3337 sp->rules[cnt] = sort_forward;
3339 /* Next line. */
3340 break;
3343 /* Get the next token. */
3344 arg = lr_token (ldfile, charmap, result, repertoire,
3345 verbose);
3348 else
3350 /* There is no section symbol. Therefore we use the unnamed
3351 section. */
3352 collate->current_section = &collate->unnamed_section;
3354 if (collate->unnamed_section_defined)
3355 lr_error (ldfile, _("\
3356 %s: multiple order definitions for unnamed section"),
3357 "LC_COLLATE");
3358 else
3360 /* Insert &collate->unnamed_section at the beginning of
3361 the collate->sections list. */
3362 collate->unnamed_section.next = collate->sections;
3363 collate->sections = &collate->unnamed_section;
3364 collate->unnamed_section_defined = true;
3368 /* Now read the direction names. */
3369 read_directions (ldfile, arg, charmap, repertoire, result);
3371 /* From now we need the strings untranslated. */
3372 ldfile->translate_strings = 0;
3373 break;
3375 case tok_order_end:
3376 /* Ignore the rest of the line if we don't need the input of
3377 this line. */
3378 if (ignore_content)
3380 lr_ignore_rest (ldfile, 0);
3381 break;
3384 if (state != 1)
3385 goto err_label;
3387 /* Handle ellipsis at end of list. */
3388 if (was_ellipsis != tok_none)
3390 handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3391 repertoire, result);
3392 was_ellipsis = tok_none;
3395 state = 2;
3396 lr_ignore_rest (ldfile, 1);
3397 break;
3399 case tok_reorder_after:
3400 /* Ignore the rest of the line if we don't need the input of
3401 this line. */
3402 if (ignore_content)
3404 lr_ignore_rest (ldfile, 0);
3405 break;
3408 if (state == 1)
3410 lr_error (ldfile, _("%s: missing `order_end' keyword"),
3411 "LC_COLLATE");
3412 state = 2;
3414 /* Handle ellipsis at end of list. */
3415 if (was_ellipsis != tok_none)
3417 handle_ellipsis (ldfile, arg->val.str.startmb,
3418 arg->val.str.lenmb, was_ellipsis, charmap,
3419 repertoire, result);
3420 was_ellipsis = tok_none;
3423 else if (state == 0 && copy_locale == NULL)
3424 goto err_label;
3425 else if (state != 0 && state != 2 && state != 3)
3426 goto err_label;
3427 state = 3;
3429 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3430 if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
3432 /* Find this symbol in the sequence table. */
3433 char ucsbuf[10];
3434 char *startmb;
3435 size_t lenmb;
3436 struct element_t *insp;
3437 int no_error = 1;
3438 void *ptr;
3440 if (arg->tok == tok_bsymbol)
3442 startmb = arg->val.str.startmb;
3443 lenmb = arg->val.str.lenmb;
3445 else
3447 sprintf (ucsbuf, "U%08X", arg->val.ucs4);
3448 startmb = ucsbuf;
3449 lenmb = 9;
3452 if (find_entry (&collate->seq_table, startmb, lenmb, &ptr) == 0)
3453 /* Yes, the symbol exists. Simply point the cursor
3454 to it. */
3455 collate->cursor = (struct element_t *) ptr;
3456 else
3458 struct symbol_t *symbp;
3459 void *ptr;
3461 if (find_entry (&collate->sym_table, startmb, lenmb,
3462 &ptr) == 0)
3464 symbp = ptr;
3466 if (symbp->order->last != NULL
3467 || symbp->order->next != NULL)
3468 collate->cursor = symbp->order;
3469 else
3471 /* This is a collating symbol but its position
3472 is not yet defined. */
3473 lr_error (ldfile, _("\
3474 %s: order for collating symbol %.*s not yet defined"),
3475 "LC_COLLATE", (int) lenmb, startmb);
3476 collate->cursor = NULL;
3477 no_error = 0;
3480 else if (find_entry (&collate->elem_table, startmb, lenmb,
3481 &ptr) == 0)
3483 insp = (struct element_t *) ptr;
3485 if (insp->last != NULL || insp->next != NULL)
3486 collate->cursor = insp;
3487 else
3489 /* This is a collating element but its position
3490 is not yet defined. */
3491 lr_error (ldfile, _("\
3492 %s: order for collating element %.*s not yet defined"),
3493 "LC_COLLATE", (int) lenmb, startmb);
3494 collate->cursor = NULL;
3495 no_error = 0;
3498 else
3500 /* This is bad. The symbol after which we have to
3501 insert does not exist. */
3502 lr_error (ldfile, _("\
3503 %s: cannot reorder after %.*s: symbol not known"),
3504 "LC_COLLATE", (int) lenmb, startmb);
3505 collate->cursor = NULL;
3506 no_error = 0;
3510 lr_ignore_rest (ldfile, no_error);
3512 else
3513 /* This must not happen. */
3514 goto err_label;
3515 break;
3517 case tok_reorder_end:
3518 /* Ignore the rest of the line if we don't need the input of
3519 this line. */
3520 if (ignore_content)
3521 break;
3523 if (state != 3)
3524 goto err_label;
3525 state = 4;
3526 lr_ignore_rest (ldfile, 1);
3527 break;
3529 case tok_reorder_sections_after:
3530 /* Ignore the rest of the line if we don't need the input of
3531 this line. */
3532 if (ignore_content)
3534 lr_ignore_rest (ldfile, 0);
3535 break;
3538 if (state == 1)
3540 lr_error (ldfile, _("%s: missing `order_end' keyword"),
3541 "LC_COLLATE");
3542 state = 2;
3544 /* Handle ellipsis at end of list. */
3545 if (was_ellipsis != tok_none)
3547 handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3548 repertoire, result);
3549 was_ellipsis = tok_none;
3552 else if (state == 3)
3554 WITH_CUR_LOCALE (error (0, 0, _("\
3555 %s: missing `reorder-end' keyword"), "LC_COLLATE"));
3556 state = 4;
3558 else if (state != 2 && state != 4)
3559 goto err_label;
3560 state = 5;
3562 /* Get the name of the sections we are adding after. */
3563 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3564 if (arg->tok == tok_bsymbol)
3566 /* Now find a section with this name. */
3567 struct section_list *runp = collate->sections;
3569 while (runp != NULL)
3571 if (runp->name != NULL
3572 && strlen (runp->name) == arg->val.str.lenmb
3573 && memcmp (runp->name, arg->val.str.startmb,
3574 arg->val.str.lenmb) == 0)
3575 break;
3577 runp = runp->next;
3580 if (runp != NULL)
3581 collate->current_section = runp;
3582 else
3584 /* This is bad. The section after which we have to
3585 reorder does not exist. Therefore we cannot
3586 process the whole rest of this reorder
3587 specification. */
3588 lr_error (ldfile, _("%s: section `%.*s' not known"),
3589 "LC_COLLATE", (int) arg->val.str.lenmb,
3590 arg->val.str.startmb);
3594 lr_ignore_rest (ldfile, 0);
3596 now = lr_token (ldfile, charmap, result, NULL, verbose);
3598 while (now->tok == tok_reorder_sections_after
3599 || now->tok == tok_reorder_sections_end
3600 || now->tok == tok_end);
3602 /* Process the token we just saw. */
3603 nowtok = now->tok;
3604 continue;
3607 else
3608 /* This must not happen. */
3609 goto err_label;
3610 break;
3612 case tok_reorder_sections_end:
3613 /* Ignore the rest of the line if we don't need the input of
3614 this line. */
3615 if (ignore_content)
3616 break;
3618 if (state != 5)
3619 goto err_label;
3620 state = 6;
3621 lr_ignore_rest (ldfile, 1);
3622 break;
3624 case tok_bsymbol:
3625 case tok_ucs4:
3626 /* Ignore the rest of the line if we don't need the input of
3627 this line. */
3628 if (ignore_content)
3630 lr_ignore_rest (ldfile, 0);
3631 break;
3634 if (state != 0 && state != 1 && state != 3 && state != 5)
3635 goto err_label;
3637 if ((state == 0 || state == 5) && nowtok == tok_ucs4)
3638 goto err_label;
3640 if (nowtok == tok_ucs4)
3642 snprintf (ucs4buf, sizeof (ucs4buf), "U%08X", now->val.ucs4);
3643 symstr = ucs4buf;
3644 symlen = 9;
3646 else if (arg != NULL)
3648 symstr = arg->val.str.startmb;
3649 symlen = arg->val.str.lenmb;
3651 else
3653 lr_error (ldfile, _("%s: bad symbol <%.*s>"), "LC_COLLATE",
3654 (int) ldfile->token.val.str.lenmb,
3655 ldfile->token.val.str.startmb);
3656 break;
3659 struct element_t *seqp;
3660 if (state == 0)
3662 /* We are outside an `order_start' region. This means
3663 we must only accept definitions of values for
3664 collation symbols since these are purely abstract
3665 values and don't need directions associated. */
3666 void *ptr;
3668 if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == 0)
3670 seqp = ptr;
3672 /* It's already defined. First check whether this
3673 is really a collating symbol. */
3674 if (seqp->is_character)
3675 goto err_label;
3677 goto move_entry;
3679 else
3681 void *result;
3683 if (find_entry (&collate->sym_table, symstr, symlen,
3684 &result) != 0)
3685 /* No collating symbol, it's an error. */
3686 goto err_label;
3688 /* Maybe this is the first time we define a symbol
3689 value and it is before the first actual section. */
3690 if (collate->sections == NULL)
3691 collate->sections = collate->current_section =
3692 &collate->symbol_section;
3695 if (was_ellipsis != tok_none)
3697 handle_ellipsis (ldfile, symstr, symlen, was_ellipsis,
3698 charmap, repertoire, result);
3700 /* Remember that we processed the ellipsis. */
3701 was_ellipsis = tok_none;
3703 /* And don't add the value a second time. */
3704 break;
3707 else if (state == 3)
3709 /* It is possible that we already have this collation sequence.
3710 In this case we move the entry. */
3711 void *sym;
3712 void *ptr;
3714 /* If the symbol after which we have to insert was not found
3715 ignore all entries. */
3716 if (collate->cursor == NULL)
3718 lr_ignore_rest (ldfile, 0);
3719 break;
3722 if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == 0)
3724 seqp = (struct element_t *) ptr;
3725 goto move_entry;
3728 if (find_entry (&collate->sym_table, symstr, symlen, &sym) == 0
3729 && (seqp = ((struct symbol_t *) sym)->order) != NULL)
3730 goto move_entry;
3732 if (find_entry (&collate->elem_table, symstr, symlen, &ptr) == 0
3733 && (seqp = (struct element_t *) ptr,
3734 seqp->last != NULL || seqp->next != NULL
3735 || (collate->start != NULL && seqp == collate->start)))
3737 move_entry:
3738 /* Remove the entry from the old position. */
3739 if (seqp->last == NULL)
3740 collate->start = seqp->next;
3741 else
3742 seqp->last->next = seqp->next;
3743 if (seqp->next != NULL)
3744 seqp->next->last = seqp->last;
3746 /* We also have to check whether this entry is the
3747 first or last of a section. */
3748 if (seqp->section->first == seqp)
3750 if (seqp->section->first == seqp->section->last)
3751 /* This section has no content anymore. */
3752 seqp->section->first = seqp->section->last = NULL;
3753 else
3754 seqp->section->first = seqp->next;
3756 else if (seqp->section->last == seqp)
3757 seqp->section->last = seqp->last;
3759 /* Now insert it in the new place. */
3760 insert_weights (ldfile, seqp, charmap, repertoire, result,
3761 tok_none);
3762 break;
3765 /* Otherwise we just add a new entry. */
3767 else if (state == 5)
3769 /* We are reordering sections. Find the named section. */
3770 struct section_list *runp = collate->sections;
3771 struct section_list *prevp = NULL;
3773 while (runp != NULL)
3775 if (runp->name != NULL
3776 && strlen (runp->name) == symlen
3777 && memcmp (runp->name, symstr, symlen) == 0)
3778 break;
3780 prevp = runp;
3781 runp = runp->next;
3784 if (runp == NULL)
3786 lr_error (ldfile, _("%s: section `%.*s' not known"),
3787 "LC_COLLATE", (int) symlen, symstr);
3788 lr_ignore_rest (ldfile, 0);
3790 else
3792 if (runp != collate->current_section)
3794 /* Remove the named section from the old place and
3795 insert it in the new one. */
3796 prevp->next = runp->next;
3798 runp->next = collate->current_section->next;
3799 collate->current_section->next = runp;
3800 collate->current_section = runp;
3803 /* Process the rest of the line which might change
3804 the collation rules. */
3805 arg = lr_token (ldfile, charmap, result, repertoire,
3806 verbose);
3807 if (arg->tok != tok_eof && arg->tok != tok_eol)
3808 read_directions (ldfile, arg, charmap, repertoire,
3809 result);
3811 break;
3813 else if (was_ellipsis != tok_none)
3815 /* Using the information in the `ellipsis_weight'
3816 element and this and the last value we have to handle
3817 the ellipsis now. */
3818 assert (state == 1);
3820 handle_ellipsis (ldfile, symstr, symlen, was_ellipsis, charmap,
3821 repertoire, result);
3823 /* Remember that we processed the ellipsis. */
3824 was_ellipsis = tok_none;
3826 /* And don't add the value a second time. */
3827 break;
3830 /* Now insert in the new place. */
3831 insert_value (ldfile, symstr, symlen, charmap, repertoire, result);
3832 break;
3834 case tok_undefined:
3835 /* Ignore the rest of the line if we don't need the input of
3836 this line. */
3837 if (ignore_content)
3839 lr_ignore_rest (ldfile, 0);
3840 break;
3843 if (state != 1)
3844 goto err_label;
3846 if (was_ellipsis != tok_none)
3848 lr_error (ldfile,
3849 _("%s: cannot have `%s' as end of ellipsis range"),
3850 "LC_COLLATE", "UNDEFINED");
3852 unlink_element (collate);
3853 was_ellipsis = tok_none;
3856 /* See whether UNDEFINED already appeared somewhere. */
3857 if (collate->undefined.next != NULL
3858 || &collate->undefined == collate->cursor)
3860 lr_error (ldfile,
3861 _("%s: order for `%.*s' already defined at %s:%Zu"),
3862 "LC_COLLATE", 9, "UNDEFINED",
3863 collate->undefined.file,
3864 collate->undefined.line);
3865 lr_ignore_rest (ldfile, 0);
3867 else
3868 /* Parse the weights. */
3869 insert_weights (ldfile, &collate->undefined, charmap,
3870 repertoire, result, tok_none);
3871 break;
3873 case tok_ellipsis2: /* symbolic hexadecimal ellipsis */
3874 case tok_ellipsis3: /* absolute ellipsis */
3875 case tok_ellipsis4: /* symbolic decimal ellipsis */
3876 /* This is the symbolic (decimal or hexadecimal) or absolute
3877 ellipsis. */
3878 if (was_ellipsis != tok_none)
3879 goto err_label;
3881 if (state != 0 && state != 1 && state != 3)
3882 goto err_label;
3884 was_ellipsis = nowtok;
3886 insert_weights (ldfile, &collate->ellipsis_weight, charmap,
3887 repertoire, result, nowtok);
3888 break;
3890 case tok_end:
3891 seen_end:
3892 /* Next we assume `LC_COLLATE'. */
3893 if (!ignore_content)
3895 if (state == 0 && copy_locale == NULL)
3896 /* We must either see a copy statement or have
3897 ordering values. */
3898 lr_error (ldfile,
3899 _("%s: empty category description not allowed"),
3900 "LC_COLLATE");
3901 else if (state == 1)
3903 lr_error (ldfile, _("%s: missing `order_end' keyword"),
3904 "LC_COLLATE");
3906 /* Handle ellipsis at end of list. */
3907 if (was_ellipsis != tok_none)
3909 handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3910 repertoire, result);
3911 was_ellipsis = tok_none;
3914 else if (state == 3)
3915 WITH_CUR_LOCALE (error (0, 0, _("\
3916 %s: missing `reorder-end' keyword"), "LC_COLLATE"));
3917 else if (state == 5)
3918 WITH_CUR_LOCALE (error (0, 0, _("\
3919 %s: missing `reorder-sections-end' keyword"), "LC_COLLATE"));
3921 arg = lr_token (ldfile, charmap, result, NULL, verbose);
3922 if (arg->tok == tok_eof)
3923 break;
3924 if (arg->tok == tok_eol)
3925 lr_error (ldfile, _("%s: incomplete `END' line"), "LC_COLLATE");
3926 else if (arg->tok != tok_lc_collate)
3927 lr_error (ldfile, _("\
3928 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
3929 lr_ignore_rest (ldfile, arg->tok == tok_lc_collate);
3930 return;
3932 case tok_define:
3933 if (ignore_content)
3935 lr_ignore_rest (ldfile, 0);
3936 break;
3939 arg = lr_token (ldfile, charmap, result, NULL, verbose);
3940 if (arg->tok != tok_ident)
3941 goto err_label;
3943 /* Simply add the new symbol. */
3944 struct name_list *newsym = xmalloc (sizeof (*newsym)
3945 + arg->val.str.lenmb + 1);
3946 memcpy (newsym->str, arg->val.str.startmb, arg->val.str.lenmb);
3947 newsym->str[arg->val.str.lenmb] = '\0';
3948 newsym->next = defined;
3949 defined = newsym;
3951 lr_ignore_rest (ldfile, 1);
3952 break;
3954 case tok_undef:
3955 if (ignore_content)
3957 lr_ignore_rest (ldfile, 0);
3958 break;
3961 arg = lr_token (ldfile, charmap, result, NULL, verbose);
3962 if (arg->tok != tok_ident)
3963 goto err_label;
3965 /* Remove _all_ occurrences of the symbol from the list. */
3966 struct name_list *prevdef = NULL;
3967 struct name_list *curdef = defined;
3968 while (curdef != NULL)
3969 if (strncmp (arg->val.str.startmb, curdef->str,
3970 arg->val.str.lenmb) == 0
3971 && curdef->str[arg->val.str.lenmb] == '\0')
3973 if (prevdef == NULL)
3974 defined = curdef->next;
3975 else
3976 prevdef->next = curdef->next;
3978 struct name_list *olddef = curdef;
3979 curdef = curdef->next;
3981 free (olddef);
3983 else
3985 prevdef = curdef;
3986 curdef = curdef->next;
3989 lr_ignore_rest (ldfile, 1);
3990 break;
3992 case tok_ifdef:
3993 case tok_ifndef:
3994 if (ignore_content)
3996 lr_ignore_rest (ldfile, 0);
3997 break;
4000 found_ifdef:
4001 arg = lr_token (ldfile, charmap, result, NULL, verbose);
4002 if (arg->tok != tok_ident)
4003 goto err_label;
4004 lr_ignore_rest (ldfile, 1);
4006 if (collate->else_action == else_none)
4008 curdef = defined;
4009 while (curdef != NULL)
4010 if (strncmp (arg->val.str.startmb, curdef->str,
4011 arg->val.str.lenmb) == 0
4012 && curdef->str[arg->val.str.lenmb] == '\0')
4013 break;
4014 else
4015 curdef = curdef->next;
4017 if ((nowtok == tok_ifdef && curdef != NULL)
4018 || (nowtok == tok_ifndef && curdef == NULL))
4020 /* We have to use the if-branch. */
4021 collate->else_action = else_ignore;
4023 else
4025 /* We have to use the else-branch, if there is one. */
4026 nowtok = skip_to (ldfile, collate, charmap, 0);
4027 if (nowtok == tok_else)
4028 collate->else_action = else_seen;
4029 else if (nowtok == tok_elifdef)
4031 nowtok = tok_ifdef;
4032 goto found_ifdef;
4034 else if (nowtok == tok_elifndef)
4036 nowtok = tok_ifndef;
4037 goto found_ifdef;
4039 else if (nowtok == tok_eof)
4040 goto seen_eof;
4041 else if (nowtok == tok_end)
4042 goto seen_end;
4045 else
4047 /* XXX Should it really become necessary to support nested
4048 preprocessor handling we will push the state here. */
4049 lr_error (ldfile, _("%s: nested conditionals not supported"),
4050 "LC_COLLATE");
4051 nowtok = skip_to (ldfile, collate, charmap, 1);
4052 if (nowtok == tok_eof)
4053 goto seen_eof;
4054 else if (nowtok == tok_end)
4055 goto seen_end;
4057 break;
4059 case tok_elifdef:
4060 case tok_elifndef:
4061 case tok_else:
4062 if (ignore_content)
4064 lr_ignore_rest (ldfile, 0);
4065 break;
4068 lr_ignore_rest (ldfile, 1);
4070 if (collate->else_action == else_ignore)
4072 /* Ignore everything until the endif. */
4073 nowtok = skip_to (ldfile, collate, charmap, 1);
4074 if (nowtok == tok_eof)
4075 goto seen_eof;
4076 else if (nowtok == tok_end)
4077 goto seen_end;
4079 else
4081 assert (collate->else_action == else_none);
4082 lr_error (ldfile, _("\
4083 %s: '%s' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE",
4084 nowtok == tok_else ? "else"
4085 : nowtok == tok_elifdef ? "elifdef" : "elifndef");
4087 break;
4089 case tok_endif:
4090 if (ignore_content)
4092 lr_ignore_rest (ldfile, 0);
4093 break;
4096 lr_ignore_rest (ldfile, 1);
4098 if (collate->else_action != else_ignore
4099 && collate->else_action != else_seen)
4100 lr_error (ldfile, _("\
4101 %s: 'endif' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE");
4103 /* XXX If we support nested preprocessor directives we pop
4104 the state here. */
4105 collate->else_action = else_none;
4106 break;
4108 default:
4109 err_label:
4110 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
4113 /* Prepare for the next round. */
4114 now = lr_token (ldfile, charmap, result, NULL, verbose);
4115 nowtok = now->tok;
4118 seen_eof:
4119 /* When we come here we reached the end of the file. */
4120 lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");