Replace FSF snail mail address with URLs.
[glibc.git] / locale / programs / ld-collate.c
blobb4d395fa25268f8f91ccbf565f904b5ef7e76abb
1 /* Copyright (C) 1995-2003, 2005-2008, 2009, 2011 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, see <http://www.gnu.org/licenses/>. */
18 #ifdef HAVE_CONFIG_H
19 # include <config.h>
20 #endif
22 #include <errno.h>
23 #include <error.h>
24 #include <stdlib.h>
25 #include <wchar.h>
26 #include <sys/param.h>
28 #include "localedef.h"
29 #include "charmap.h"
30 #include "localeinfo.h"
31 #include "linereader.h"
32 #include "locfile.h"
33 #include "elem-hash.h"
35 /* Uncomment the following line in the production version. */
36 /* #define NDEBUG 1 */
37 #include <assert.h>
39 #define obstack_chunk_alloc malloc
40 #define obstack_chunk_free free
42 static inline void
43 __attribute ((always_inline))
44 obstack_int32_grow (struct obstack *obstack, int32_t data)
46 if (sizeof (int32_t) == sizeof (int))
47 obstack_int_grow (obstack, data);
48 else
49 obstack_grow (obstack, &data, sizeof (int32_t));
52 static inline void
53 __attribute ((always_inline))
54 obstack_int32_grow_fast (struct obstack *obstack, int32_t data)
56 if (sizeof (int32_t) == sizeof (int))
57 obstack_int_grow_fast (obstack, data);
58 else
59 obstack_grow (obstack, &data, sizeof (int32_t));
62 /* Forward declaration. */
63 struct element_t;
65 /* Data type for list of strings. */
66 struct section_list
68 /* Successor in the known_sections list. */
69 struct section_list *def_next;
70 /* Successor in the sections list. */
71 struct section_list *next;
72 /* Name of the section. */
73 const char *name;
74 /* First element of this section. */
75 struct element_t *first;
76 /* Last element of this section. */
77 struct element_t *last;
78 /* These are the rules for this section. */
79 enum coll_sort_rule *rules;
80 /* Index of the rule set in the appropriate section of the output file. */
81 int ruleidx;
84 struct element_t;
86 struct element_list_t
88 /* Number of elements. */
89 int cnt;
91 struct element_t **w;
94 /* Data type for collating element. */
95 struct element_t
97 const char *name;
99 const char *mbs;
100 size_t nmbs;
101 const uint32_t *wcs;
102 size_t nwcs;
103 int *mborder;
104 int wcorder;
106 /* The following is a bit mask which bits are set if this element is
107 used in the appropriate level. Interesting for the singlebyte
108 weight computation.
110 XXX The type here restricts the number of levels to 32. It could
111 be changed if necessary but I doubt this is necessary. */
112 unsigned int used_in_level;
114 struct element_list_t *weights;
116 /* Nonzero if this is a real character definition. */
117 int is_character;
119 /* Order of the character in the sequence. This information will
120 be used in range expressions. */
121 int mbseqorder;
122 int wcseqorder;
124 /* Where does the definition come from. */
125 const char *file;
126 size_t line;
128 /* Which section does this belong to. */
129 struct section_list *section;
131 /* Predecessor and successor in the order list. */
132 struct element_t *last;
133 struct element_t *next;
135 /* Next element in multibyte output list. */
136 struct element_t *mbnext;
137 struct element_t *mblast;
139 /* Next element in wide character output list. */
140 struct element_t *wcnext;
141 struct element_t *wclast;
144 /* Special element value. */
145 #define ELEMENT_ELLIPSIS2 ((struct element_t *) 1)
146 #define ELEMENT_ELLIPSIS3 ((struct element_t *) 2)
147 #define ELEMENT_ELLIPSIS4 ((struct element_t *) 3)
149 /* Data type for collating symbol. */
150 struct symbol_t
152 const char *name;
154 /* Point to place in the order list. */
155 struct element_t *order;
157 /* Where does the definition come from. */
158 const char *file;
159 size_t line;
162 /* Sparse table of struct element_t *. */
163 #define TABLE wchead_table
164 #define ELEMENT struct element_t *
165 #define DEFAULT NULL
166 #define ITERATE
167 #define NO_FINALIZE
168 #include "3level.h"
170 /* Sparse table of int32_t. */
171 #define TABLE collidx_table
172 #define ELEMENT int32_t
173 #define DEFAULT 0
174 #include "3level.h"
176 /* Sparse table of uint32_t. */
177 #define TABLE collseq_table
178 #define ELEMENT uint32_t
179 #define DEFAULT ~((uint32_t) 0)
180 #include "3level.h"
183 /* Simple name list for the preprocessor. */
184 struct name_list
186 struct name_list *next;
187 char str[0];
191 /* The real definition of the struct for the LC_COLLATE locale. */
192 struct locale_collate_t
194 int col_weight_max;
195 int cur_weight_max;
197 /* List of known scripts. */
198 struct section_list *known_sections;
199 /* List of used sections. */
200 struct section_list *sections;
201 /* Current section using definition. */
202 struct section_list *current_section;
203 /* There always can be an unnamed section. */
204 struct section_list unnamed_section;
205 /* Flag whether the unnamed section has been defined. */
206 bool unnamed_section_defined;
207 /* To make handling of errors easier we have another section. */
208 struct section_list error_section;
209 /* Sometimes we are defining the values for collating symbols before
210 the first actual section. */
211 struct section_list symbol_section;
213 /* Start of the order list. */
214 struct element_t *start;
216 /* The undefined element. */
217 struct element_t undefined;
219 /* This is the cursor for `reorder_after' insertions. */
220 struct element_t *cursor;
222 /* This value is used when handling ellipsis. */
223 struct element_t ellipsis_weight;
225 /* Known collating elements. */
226 hash_table elem_table;
228 /* Known collating symbols. */
229 hash_table sym_table;
231 /* Known collation sequences. */
232 hash_table seq_table;
234 struct obstack mempool;
236 /* The LC_COLLATE category is a bit special as it is sometimes possible
237 that the definitions from more than one input file contains information.
238 Therefore we keep all relevant input in a list. */
239 struct locale_collate_t *next;
241 /* Arrays with heads of the list for each of the leading bytes in
242 the multibyte sequences. */
243 struct element_t *mbheads[256];
245 /* Arrays with heads of the list for each of the leading bytes in
246 the multibyte sequences. */
247 struct wchead_table wcheads;
249 /* The arrays with the collation sequence order. */
250 unsigned char mbseqorder[256];
251 struct collseq_table wcseqorder;
253 /* State of the preprocessor. */
254 enum
256 else_none = 0,
257 else_ignore,
258 else_seen
260 else_action;
264 /* We have a few global variables which are used for reading all
265 LC_COLLATE category descriptions in all files. */
266 static uint32_t nrules;
268 /* List of defined preprocessor symbols. */
269 static struct name_list *defined;
272 /* We need UTF-8 encoding of numbers. */
273 static inline int
274 __attribute ((always_inline))
275 utf8_encode (char *buf, int val)
277 int retval;
279 if (val < 0x80)
281 *buf++ = (char) val;
282 retval = 1;
284 else
286 int step;
288 for (step = 2; step < 6; ++step)
289 if ((val & (~(uint32_t)0 << (5 * step + 1))) == 0)
290 break;
291 retval = step;
293 *buf = (unsigned char) (~0xff >> step);
294 --step;
297 buf[step] = 0x80 | (val & 0x3f);
298 val >>= 6;
300 while (--step > 0);
301 *buf |= val;
304 return retval;
308 static struct section_list *
309 make_seclist_elem (struct locale_collate_t *collate, const char *string,
310 struct section_list *next)
312 struct section_list *newp;
314 newp = (struct section_list *) obstack_alloc (&collate->mempool,
315 sizeof (*newp));
316 newp->next = next;
317 newp->name = string;
318 newp->first = NULL;
319 newp->last = NULL;
321 return newp;
325 static struct element_t *
326 new_element (struct locale_collate_t *collate, const char *mbs, size_t mbslen,
327 const uint32_t *wcs, const char *name, size_t namelen,
328 int is_character)
330 struct element_t *newp;
332 newp = (struct element_t *) obstack_alloc (&collate->mempool,
333 sizeof (*newp));
334 newp->name = name == NULL ? NULL : obstack_copy0 (&collate->mempool,
335 name, namelen);
336 if (mbs != NULL)
338 newp->mbs = obstack_copy0 (&collate->mempool, mbs, mbslen);
339 newp->nmbs = mbslen;
341 else
343 newp->mbs = NULL;
344 newp->nmbs = 0;
346 if (wcs != NULL)
348 size_t nwcs = wcslen ((wchar_t *) wcs);
349 uint32_t zero = 0;
350 obstack_grow (&collate->mempool, wcs, nwcs * sizeof (uint32_t));
351 obstack_grow (&collate->mempool, &zero, sizeof (uint32_t));
352 newp->wcs = (uint32_t *) obstack_finish (&collate->mempool);
353 newp->nwcs = nwcs;
355 else
357 newp->wcs = NULL;
358 newp->nwcs = 0;
360 newp->mborder = NULL;
361 newp->wcorder = 0;
362 newp->used_in_level = 0;
363 newp->is_character = is_character;
365 /* Will be assigned later. XXX */
366 newp->mbseqorder = 0;
367 newp->wcseqorder = 0;
369 /* Will be allocated later. */
370 newp->weights = NULL;
372 newp->file = NULL;
373 newp->line = 0;
375 newp->section = collate->current_section;
377 newp->last = NULL;
378 newp->next = NULL;
380 newp->mbnext = NULL;
381 newp->mblast = NULL;
383 newp->wcnext = NULL;
384 newp->wclast = NULL;
386 return newp;
390 static struct symbol_t *
391 new_symbol (struct locale_collate_t *collate, const char *name, size_t len)
393 struct symbol_t *newp;
395 newp = (struct symbol_t *) obstack_alloc (&collate->mempool, sizeof (*newp));
397 newp->name = obstack_copy0 (&collate->mempool, name, len);
398 newp->order = NULL;
400 newp->file = NULL;
401 newp->line = 0;
403 return newp;
407 /* Test whether this name is already defined somewhere. */
408 static int
409 check_duplicate (struct linereader *ldfile, struct locale_collate_t *collate,
410 const struct charmap_t *charmap,
411 struct repertoire_t *repertoire, const char *symbol,
412 size_t symbol_len)
414 void *ignore = NULL;
416 if (find_entry (&charmap->char_table, symbol, symbol_len, &ignore) == 0)
418 lr_error (ldfile, _("`%.*s' already defined in charmap"),
419 (int) symbol_len, symbol);
420 return 1;
423 if (repertoire != NULL
424 && (find_entry (&repertoire->char_table, symbol, symbol_len, &ignore)
425 == 0))
427 lr_error (ldfile, _("`%.*s' already defined in repertoire"),
428 (int) symbol_len, symbol);
429 return 1;
432 if (find_entry (&collate->sym_table, symbol, symbol_len, &ignore) == 0)
434 lr_error (ldfile, _("`%.*s' already defined as collating symbol"),
435 (int) symbol_len, symbol);
436 return 1;
439 if (find_entry (&collate->elem_table, symbol, symbol_len, &ignore) == 0)
441 lr_error (ldfile, _("`%.*s' already defined as collating element"),
442 (int) symbol_len, symbol);
443 return 1;
446 return 0;
450 /* Read the direction specification. */
451 static void
452 read_directions (struct linereader *ldfile, struct token *arg,
453 const struct charmap_t *charmap,
454 struct repertoire_t *repertoire, struct localedef_t *result)
456 int cnt = 0;
457 int max = nrules ?: 10;
458 enum coll_sort_rule *rules = calloc (max, sizeof (*rules));
459 int warned = 0;
460 struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
462 while (1)
464 int valid = 0;
466 if (arg->tok == tok_forward)
468 if (rules[cnt] & sort_backward)
470 if (! warned)
472 lr_error (ldfile, _("\
473 %s: `forward' and `backward' are mutually excluding each other"),
474 "LC_COLLATE");
475 warned = 1;
478 else if (rules[cnt] & sort_forward)
480 if (! warned)
482 lr_error (ldfile, _("\
483 %s: `%s' mentioned more than once in definition of weight %d"),
484 "LC_COLLATE", "forward", cnt + 1);
487 else
488 rules[cnt] |= sort_forward;
490 valid = 1;
492 else if (arg->tok == tok_backward)
494 if (rules[cnt] & sort_forward)
496 if (! warned)
498 lr_error (ldfile, _("\
499 %s: `forward' and `backward' are mutually excluding each other"),
500 "LC_COLLATE");
501 warned = 1;
504 else if (rules[cnt] & sort_backward)
506 if (! warned)
508 lr_error (ldfile, _("\
509 %s: `%s' mentioned more than once in definition of weight %d"),
510 "LC_COLLATE", "backward", cnt + 1);
513 else
514 rules[cnt] |= sort_backward;
516 valid = 1;
518 else if (arg->tok == tok_position)
520 if (rules[cnt] & sort_position)
522 if (! warned)
524 lr_error (ldfile, _("\
525 %s: `%s' mentioned more than once in definition of weight %d"),
526 "LC_COLLATE", "position", cnt + 1);
529 else
530 rules[cnt] |= sort_position;
532 valid = 1;
535 if (valid)
536 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
538 if (arg->tok == tok_eof || arg->tok == tok_eol || arg->tok == tok_comma
539 || arg->tok == tok_semicolon)
541 if (! valid && ! warned)
543 lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
544 warned = 1;
547 /* See whether we have to increment the counter. */
548 if (arg->tok != tok_comma && rules[cnt] != 0)
550 /* Add the default `forward' if we have seen only `position'. */
551 if (rules[cnt] == sort_position)
552 rules[cnt] = sort_position | sort_forward;
554 ++cnt;
557 if (arg->tok == tok_eof || arg->tok == tok_eol)
558 /* End of line or file, so we exit the loop. */
559 break;
561 if (nrules == 0)
563 /* See whether we have enough room in the array. */
564 if (cnt == max)
566 max += 10;
567 rules = (enum coll_sort_rule *) xrealloc (rules,
569 * sizeof (*rules));
570 memset (&rules[cnt], '\0', (max - cnt) * sizeof (*rules));
573 else
575 if (cnt == nrules)
577 /* There must not be any more rule. */
578 if (! warned)
580 lr_error (ldfile, _("\
581 %s: too many rules; first entry only had %d"),
582 "LC_COLLATE", nrules);
583 warned = 1;
586 lr_ignore_rest (ldfile, 0);
587 break;
591 else
593 if (! warned)
595 lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
596 warned = 1;
600 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
603 if (nrules == 0)
605 /* Now we know how many rules we have. */
606 nrules = cnt;
607 rules = (enum coll_sort_rule *) xrealloc (rules,
608 nrules * sizeof (*rules));
610 else
612 if (cnt < nrules)
614 /* Not enough rules in this specification. */
615 if (! warned)
616 lr_error (ldfile, _("%s: not enough sorting rules"), "LC_COLLATE");
619 rules[cnt] = sort_forward;
620 while (++cnt < nrules);
624 collate->current_section->rules = rules;
628 static struct element_t *
629 find_element (struct linereader *ldfile, struct locale_collate_t *collate,
630 const char *str, size_t len)
632 void *result = NULL;
634 /* Search for the entries among the collation sequences already define. */
635 if (find_entry (&collate->seq_table, str, len, &result) != 0)
637 /* Nope, not define yet. So we see whether it is a
638 collation symbol. */
639 void *ptr;
641 if (find_entry (&collate->sym_table, str, len, &ptr) == 0)
643 /* It's a collation symbol. */
644 struct symbol_t *sym = (struct symbol_t *) ptr;
645 result = sym->order;
647 if (result == NULL)
648 result = sym->order = new_element (collate, NULL, 0, NULL,
649 NULL, 0, 0);
651 else if (find_entry (&collate->elem_table, str, len, &result) != 0)
653 /* It's also no collation element. So it is a character
654 element defined later. */
655 result = new_element (collate, NULL, 0, NULL, str, len, 1);
656 /* Insert it into the sequence table. */
657 insert_entry (&collate->seq_table, str, len, result);
661 return (struct element_t *) result;
665 static void
666 unlink_element (struct locale_collate_t *collate)
668 if (collate->cursor == collate->start)
670 assert (collate->cursor->next == NULL);
671 assert (collate->cursor->last == NULL);
672 collate->cursor = NULL;
674 else
676 if (collate->cursor->next != NULL)
677 collate->cursor->next->last = collate->cursor->last;
678 if (collate->cursor->last != NULL)
679 collate->cursor->last->next = collate->cursor->next;
680 collate->cursor = collate->cursor->last;
685 static void
686 insert_weights (struct linereader *ldfile, struct element_t *elem,
687 const struct charmap_t *charmap,
688 struct repertoire_t *repertoire, struct localedef_t *result,
689 enum token_t ellipsis)
691 int weight_cnt;
692 struct token *arg;
693 struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
695 /* Initialize all the fields. */
696 elem->file = ldfile->fname;
697 elem->line = ldfile->lineno;
699 elem->last = collate->cursor;
700 elem->next = collate->cursor ? collate->cursor->next : NULL;
701 if (collate->cursor != NULL && collate->cursor->next != NULL)
702 collate->cursor->next->last = elem;
703 if (collate->cursor != NULL)
704 collate->cursor->next = elem;
705 if (collate->start == NULL)
707 assert (collate->cursor == NULL);
708 collate->start = elem;
711 elem->section = collate->current_section;
713 if (collate->current_section->first == NULL)
714 collate->current_section->first = elem;
715 if (collate->current_section->last == collate->cursor)
716 collate->current_section->last = elem;
718 collate->cursor = elem;
720 elem->weights = (struct element_list_t *)
721 obstack_alloc (&collate->mempool, nrules * sizeof (struct element_list_t));
722 memset (elem->weights, '\0', nrules * sizeof (struct element_list_t));
724 weight_cnt = 0;
726 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
729 if (arg->tok == tok_eof || arg->tok == tok_eol)
730 break;
732 if (arg->tok == tok_ignore)
734 /* The weight for this level has to be ignored. We use the
735 null pointer to indicate this. */
736 elem->weights[weight_cnt].w = (struct element_t **)
737 obstack_alloc (&collate->mempool, sizeof (struct element_t *));
738 elem->weights[weight_cnt].w[0] = NULL;
739 elem->weights[weight_cnt].cnt = 1;
741 else if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
743 char ucs4str[10];
744 struct element_t *val;
745 char *symstr;
746 size_t symlen;
748 if (arg->tok == tok_bsymbol)
750 symstr = arg->val.str.startmb;
751 symlen = arg->val.str.lenmb;
753 else
755 snprintf (ucs4str, sizeof (ucs4str), "U%08X", arg->val.ucs4);
756 symstr = ucs4str;
757 symlen = 9;
760 val = find_element (ldfile, collate, symstr, symlen);
761 if (val == NULL)
762 break;
764 elem->weights[weight_cnt].w = (struct element_t **)
765 obstack_alloc (&collate->mempool, sizeof (struct element_t *));
766 elem->weights[weight_cnt].w[0] = val;
767 elem->weights[weight_cnt].cnt = 1;
769 else if (arg->tok == tok_string)
771 /* Split the string up in the individual characters and put
772 the element definitions in the list. */
773 const char *cp = arg->val.str.startmb;
774 int cnt = 0;
775 struct element_t *charelem;
776 struct element_t **weights = NULL;
777 int max = 0;
779 if (*cp == '\0')
781 lr_error (ldfile, _("%s: empty weight string not allowed"),
782 "LC_COLLATE");
783 lr_ignore_rest (ldfile, 0);
784 break;
789 if (*cp == '<')
791 /* Ahh, it's a bsymbol or an UCS4 value. If it's
792 the latter we have to unify the name. */
793 const char *startp = ++cp;
794 size_t len;
796 while (*cp != '>')
798 if (*cp == ldfile->escape_char)
799 ++cp;
800 if (*cp == '\0')
801 /* It's a syntax error. */
802 goto syntax;
804 ++cp;
807 if (cp - startp == 5 && startp[0] == 'U'
808 && isxdigit (startp[1]) && isxdigit (startp[2])
809 && isxdigit (startp[3]) && isxdigit (startp[4]))
811 unsigned int ucs4 = strtoul (startp + 1, NULL, 16);
812 char *newstr;
814 newstr = (char *) xmalloc (10);
815 snprintf (newstr, 10, "U%08X", ucs4);
816 startp = newstr;
818 len = 9;
820 else
821 len = cp - startp;
823 charelem = find_element (ldfile, collate, startp, len);
824 ++cp;
826 else
828 /* People really shouldn't use characters directly in
829 the string. Especially since it's not really clear
830 what this means. We interpret all characters in the
831 string as if that would be bsymbols. Otherwise we
832 would have to match back to bsymbols somehow and this
833 is normally not what people normally expect. */
834 charelem = find_element (ldfile, collate, cp++, 1);
837 if (charelem == NULL)
839 /* We ignore the rest of the line. */
840 lr_ignore_rest (ldfile, 0);
841 break;
844 /* Add the pointer. */
845 if (cnt >= max)
847 struct element_t **newp;
848 max += 10;
849 newp = (struct element_t **)
850 alloca (max * sizeof (struct element_t *));
851 memcpy (newp, weights, cnt * sizeof (struct element_t *));
852 weights = newp;
854 weights[cnt++] = charelem;
856 while (*cp != '\0');
858 /* Now store the information. */
859 elem->weights[weight_cnt].w = (struct element_t **)
860 obstack_alloc (&collate->mempool,
861 cnt * sizeof (struct element_t *));
862 memcpy (elem->weights[weight_cnt].w, weights,
863 cnt * sizeof (struct element_t *));
864 elem->weights[weight_cnt].cnt = cnt;
866 /* We don't need the string anymore. */
867 free (arg->val.str.startmb);
869 else if (ellipsis != tok_none
870 && (arg->tok == tok_ellipsis2
871 || arg->tok == tok_ellipsis3
872 || arg->tok == tok_ellipsis4))
874 /* It must be the same ellipsis as used in the initial column. */
875 if (arg->tok != ellipsis)
876 lr_error (ldfile, _("\
877 %s: weights must use the same ellipsis symbol as the name"),
878 "LC_COLLATE");
880 /* The weight for this level will depend on the element
881 iterating over the range. Put a placeholder. */
882 elem->weights[weight_cnt].w = (struct element_t **)
883 obstack_alloc (&collate->mempool, sizeof (struct element_t *));
884 elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
885 elem->weights[weight_cnt].cnt = 1;
887 else
889 syntax:
890 /* It's a syntax error. */
891 lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
892 lr_ignore_rest (ldfile, 0);
893 break;
896 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
897 /* This better should be the end of the line or a semicolon. */
898 if (arg->tok == tok_semicolon)
899 /* OK, ignore this and read the next token. */
900 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
901 else if (arg->tok != tok_eof && arg->tok != tok_eol)
903 /* It's a syntax error. */
904 lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
905 lr_ignore_rest (ldfile, 0);
906 break;
909 while (++weight_cnt < nrules);
911 if (weight_cnt < nrules)
913 /* This means the rest of the line uses the current element as
914 the weight. */
917 elem->weights[weight_cnt].w = (struct element_t **)
918 obstack_alloc (&collate->mempool, sizeof (struct element_t *));
919 if (ellipsis == tok_none)
920 elem->weights[weight_cnt].w[0] = elem;
921 else
922 elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
923 elem->weights[weight_cnt].cnt = 1;
925 while (++weight_cnt < nrules);
927 else
929 if (arg->tok == tok_ignore || arg->tok == tok_bsymbol)
931 /* Too many rule values. */
932 lr_error (ldfile, _("%s: too many values"), "LC_COLLATE");
933 lr_ignore_rest (ldfile, 0);
935 else
936 lr_ignore_rest (ldfile, arg->tok != tok_eol && arg->tok != tok_eof);
941 static int
942 insert_value (struct linereader *ldfile, const char *symstr, size_t symlen,
943 const struct charmap_t *charmap, struct repertoire_t *repertoire,
944 struct localedef_t *result)
946 /* First find out what kind of symbol this is. */
947 struct charseq *seq;
948 uint32_t wc;
949 struct element_t *elem = NULL;
950 struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
952 /* Try to find the character in the charmap. */
953 seq = charmap_find_value (charmap, symstr, symlen);
955 /* Determine the wide character. */
956 if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
958 wc = repertoire_find_value (repertoire, symstr, symlen);
959 if (seq != NULL)
960 seq->ucs4 = wc;
962 else
963 wc = seq->ucs4;
965 if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
967 /* It's no character, so look through the collation elements and
968 symbol list. */
969 void *ptr = elem;
970 if (find_entry (&collate->elem_table, symstr, symlen, &ptr) != 0)
972 void *result;
973 struct symbol_t *sym = NULL;
975 /* It's also collation element. Therefore it's either a
976 collating symbol or it's a character which is not
977 supported by the character set. In the later case we
978 simply create a dummy entry. */
979 if (find_entry (&collate->sym_table, symstr, symlen, &result) == 0)
981 /* It's a collation symbol. */
982 sym = (struct symbol_t *) result;
984 elem = sym->order;
987 if (elem == NULL)
989 elem = new_element (collate, NULL, 0, NULL, symstr, symlen, 0);
991 if (sym != NULL)
992 sym->order = elem;
993 else
994 /* Enter a fake element in the sequence table. This
995 won't cause anything in the output since there is
996 no multibyte or wide character associated with
997 it. */
998 insert_entry (&collate->seq_table, symstr, symlen, elem);
1001 else
1002 /* Copy the result back. */
1003 elem = ptr;
1005 else
1007 /* Otherwise the symbols stands for a character. */
1008 void *ptr = elem;
1009 if (find_entry (&collate->seq_table, symstr, symlen, &ptr) != 0)
1011 uint32_t wcs[2] = { wc, 0 };
1013 /* We have to allocate an entry. */
1014 elem = new_element (collate,
1015 seq != NULL ? (char *) seq->bytes : NULL,
1016 seq != NULL ? seq->nbytes : 0,
1017 wc == ILLEGAL_CHAR_VALUE ? NULL : wcs,
1018 symstr, symlen, 1);
1020 /* And add it to the table. */
1021 if (insert_entry (&collate->seq_table, symstr, symlen, elem) != 0)
1022 /* This cannot happen. */
1023 assert (! "Internal error");
1025 else
1027 /* Copy the result back. */
1028 elem = ptr;
1030 /* Maybe the character was used before the definition. In this case
1031 we have to insert the byte sequences now. */
1032 if (elem->mbs == NULL && seq != NULL)
1034 elem->mbs = obstack_copy0 (&collate->mempool,
1035 seq->bytes, seq->nbytes);
1036 elem->nmbs = seq->nbytes;
1039 if (elem->wcs == NULL && wc != ILLEGAL_CHAR_VALUE)
1041 uint32_t wcs[2] = { wc, 0 };
1043 elem->wcs = obstack_copy (&collate->mempool, wcs, sizeof (wcs));
1044 elem->nwcs = 1;
1049 /* Test whether this element is not already in the list. */
1050 if (elem->next != NULL || elem == collate->cursor)
1052 lr_error (ldfile, _("order for `%.*s' already defined at %s:%Zu"),
1053 (int) symlen, symstr, elem->file, elem->line);
1054 lr_ignore_rest (ldfile, 0);
1055 return 1;
1058 insert_weights (ldfile, elem, charmap, repertoire, result, tok_none);
1060 return 0;
1064 static void
1065 handle_ellipsis (struct linereader *ldfile, const char *symstr, size_t symlen,
1066 enum token_t ellipsis, const struct charmap_t *charmap,
1067 struct repertoire_t *repertoire,
1068 struct localedef_t *result)
1070 struct element_t *startp;
1071 struct element_t *endp;
1072 struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
1074 /* Unlink the entry added for the ellipsis. */
1075 unlink_element (collate);
1076 startp = collate->cursor;
1078 /* Process and add the end-entry. */
1079 if (symstr != NULL
1080 && insert_value (ldfile, symstr, symlen, charmap, repertoire, result))
1081 /* Something went wrong with inserting the to-value. This means
1082 we cannot process the ellipsis. */
1083 return;
1085 /* Reset the cursor. */
1086 collate->cursor = startp;
1088 /* Now we have to handle many different situations:
1089 - we have to distinguish between the three different ellipsis forms
1090 - the is the ellipsis at the beginning, in the middle, or at the end.
1092 endp = collate->cursor->next;
1093 assert (symstr == NULL || endp != NULL);
1095 /* XXX The following is probably very wrong since also collating symbols
1096 can appear in ranges. But do we want/can refine the test for that? */
1097 #if 0
1098 /* Both, the start and the end symbol, must stand for characters. */
1099 if ((startp != NULL && (startp->name == NULL || ! startp->is_character))
1100 || (endp != NULL && (endp->name == NULL|| ! endp->is_character)))
1102 lr_error (ldfile, _("\
1103 %s: the start and the end symbol of a range must stand for characters"),
1104 "LC_COLLATE");
1105 return;
1107 #endif
1109 if (ellipsis == tok_ellipsis3)
1111 /* One requirement we make here: the length of the byte
1112 sequences for the first and end character must be the same.
1113 This is mainly to prevent unwanted effects and this is often
1114 not what is wanted. */
1115 size_t len = (startp->mbs != NULL ? startp->nmbs
1116 : (endp->mbs != NULL ? endp->nmbs : 0));
1117 char mbcnt[len + 1];
1118 char mbend[len + 1];
1120 /* Well, this should be caught somewhere else already. Just to
1121 make sure. */
1122 assert (startp == NULL || startp->wcs == NULL || startp->wcs[1] == 0);
1123 assert (endp == NULL || endp->wcs == NULL || endp->wcs[1] == 0);
1125 if (startp != NULL && endp != NULL
1126 && startp->mbs != NULL && endp->mbs != NULL
1127 && startp->nmbs != endp->nmbs)
1129 lr_error (ldfile, _("\
1130 %s: byte sequences of first and last character must have the same length"),
1131 "LC_COLLATE");
1132 return;
1135 /* Determine whether we have to generate multibyte sequences. */
1136 if ((startp == NULL || startp->mbs != NULL)
1137 && (endp == NULL || endp->mbs != NULL))
1139 int cnt;
1140 int ret;
1142 /* Prepare the beginning byte sequence. This is either from the
1143 beginning byte sequence or it is all nulls if it was an
1144 initial ellipsis. */
1145 if (startp == NULL || startp->mbs == NULL)
1146 memset (mbcnt, '\0', len);
1147 else
1149 memcpy (mbcnt, startp->mbs, len);
1151 /* And increment it so that the value is the first one we will
1152 try to insert. */
1153 for (cnt = len - 1; cnt >= 0; --cnt)
1154 if (++mbcnt[cnt] != '\0')
1155 break;
1157 mbcnt[len] = '\0';
1159 /* And the end sequence. */
1160 if (endp == NULL || endp->mbs == NULL)
1161 memset (mbend, '\0', len);
1162 else
1163 memcpy (mbend, endp->mbs, len);
1164 mbend[len] = '\0';
1166 /* Test whether we have a correct range. */
1167 ret = memcmp (mbcnt, mbend, len);
1168 if (ret >= 0)
1170 if (ret > 0)
1171 lr_error (ldfile, _("%s: byte sequence of first character of \
1172 range is not lower than that of the last character"), "LC_COLLATE");
1173 return;
1176 /* Generate the byte sequences data. */
1177 while (1)
1179 struct charseq *seq;
1181 /* Quite a bit of work ahead. We have to find the character
1182 definition for the byte sequence and then determine the
1183 wide character belonging to it. */
1184 seq = charmap_find_symbol (charmap, mbcnt, len);
1185 if (seq != NULL)
1187 struct element_t *elem;
1188 size_t namelen;
1190 /* I don't think this can ever happen. */
1191 assert (seq->name != NULL);
1192 namelen = strlen (seq->name);
1194 if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1195 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1196 namelen);
1198 /* Now we are ready to insert the new value in the
1199 sequence. Find out whether the element is
1200 already known. */
1201 void *ptr;
1202 if (find_entry (&collate->seq_table, seq->name, namelen,
1203 &ptr) != 0)
1205 uint32_t wcs[2] = { seq->ucs4, 0 };
1207 /* We have to allocate an entry. */
1208 elem = new_element (collate, mbcnt, len,
1209 seq->ucs4 == ILLEGAL_CHAR_VALUE
1210 ? NULL : wcs, seq->name,
1211 namelen, 1);
1213 /* And add it to the table. */
1214 if (insert_entry (&collate->seq_table, seq->name,
1215 namelen, elem) != 0)
1216 /* This cannot happen. */
1217 assert (! "Internal error");
1219 else
1220 /* Copy the result. */
1221 elem = ptr;
1223 /* Test whether this element is not already in the list. */
1224 if (elem->next != NULL || (collate->cursor != NULL
1225 && elem->next == collate->cursor))
1227 lr_error (ldfile, _("\
1228 order for `%.*s' already defined at %s:%Zu"),
1229 (int) namelen, seq->name,
1230 elem->file, elem->line);
1231 goto increment;
1234 /* Enqueue the new element. */
1235 elem->last = collate->cursor;
1236 if (collate->cursor == NULL)
1237 elem->next = NULL;
1238 else
1240 elem->next = collate->cursor->next;
1241 elem->last->next = elem;
1242 if (elem->next != NULL)
1243 elem->next->last = elem;
1245 if (collate->start == NULL)
1247 assert (collate->cursor == NULL);
1248 collate->start = elem;
1250 collate->cursor = elem;
1252 /* Add the weight value. We take them from the
1253 `ellipsis_weights' member of `collate'. */
1254 elem->weights = (struct element_list_t *)
1255 obstack_alloc (&collate->mempool,
1256 nrules * sizeof (struct element_list_t));
1257 for (cnt = 0; cnt < nrules; ++cnt)
1258 if (collate->ellipsis_weight.weights[cnt].cnt == 1
1259 && (collate->ellipsis_weight.weights[cnt].w[0]
1260 == ELEMENT_ELLIPSIS2))
1262 elem->weights[cnt].w = (struct element_t **)
1263 obstack_alloc (&collate->mempool,
1264 sizeof (struct element_t *));
1265 elem->weights[cnt].w[0] = elem;
1266 elem->weights[cnt].cnt = 1;
1268 else
1270 /* Simply use the weight from `ellipsis_weight'. */
1271 elem->weights[cnt].w =
1272 collate->ellipsis_weight.weights[cnt].w;
1273 elem->weights[cnt].cnt =
1274 collate->ellipsis_weight.weights[cnt].cnt;
1278 /* Increment for the next round. */
1279 increment:
1280 for (cnt = len - 1; cnt >= 0; --cnt)
1281 if (++mbcnt[cnt] != '\0')
1282 break;
1284 /* Find out whether this was all. */
1285 if (cnt < 0 || memcmp (mbcnt, mbend, len) >= 0)
1286 /* Yep, that's all. */
1287 break;
1291 else
1293 /* For symbolic range we naturally must have a beginning and an
1294 end specified by the user. */
1295 if (startp == NULL)
1296 lr_error (ldfile, _("\
1297 %s: symbolic range ellipsis must not directly follow `order_start'"),
1298 "LC_COLLATE");
1299 else if (endp == NULL)
1300 lr_error (ldfile, _("\
1301 %s: symbolic range ellipsis must not be directly followed by `order_end'"),
1302 "LC_COLLATE");
1303 else
1305 /* Determine the range. To do so we have to determine the
1306 common prefix of the both names and then the numeric
1307 values of both ends. */
1308 size_t lenfrom = strlen (startp->name);
1309 size_t lento = strlen (endp->name);
1310 char buf[lento + 1];
1311 int preflen = 0;
1312 long int from;
1313 long int to;
1314 char *cp;
1315 int base = ellipsis == tok_ellipsis2 ? 16 : 10;
1317 if (lenfrom != lento)
1319 invalid_range:
1320 lr_error (ldfile, _("\
1321 `%s' and `%.*s' are not valid names for symbolic range"),
1322 startp->name, (int) lento, endp->name);
1323 return;
1326 while (startp->name[preflen] == endp->name[preflen])
1327 if (startp->name[preflen] == '\0')
1328 /* Nothing to be done. The start and end point are identical
1329 and while inserting the end point we have already given
1330 the user an error message. */
1331 return;
1332 else
1333 ++preflen;
1335 errno = 0;
1336 from = strtol (startp->name + preflen, &cp, base);
1337 if ((from == UINT_MAX && errno == ERANGE) || *cp != '\0')
1338 goto invalid_range;
1340 errno = 0;
1341 to = strtol (endp->name + preflen, &cp, base);
1342 if ((to == UINT_MAX && errno == ERANGE) || *cp != '\0')
1343 goto invalid_range;
1345 /* Copy the prefix. */
1346 memcpy (buf, startp->name, preflen);
1348 /* Loop over all values. */
1349 for (++from; from < to; ++from)
1351 struct element_t *elem = NULL;
1352 struct charseq *seq;
1353 uint32_t wc;
1354 int cnt;
1356 /* Generate the name. */
1357 sprintf (buf + preflen, base == 10 ? "%0*ld" : "%0*lX",
1358 (int) (lenfrom - preflen), from);
1360 /* Look whether this name is already defined. */
1361 void *ptr;
1362 if (find_entry (&collate->seq_table, buf, symlen, &ptr) == 0)
1364 /* Copy back the result. */
1365 elem = ptr;
1367 if (elem->next != NULL || (collate->cursor != NULL
1368 && elem->next == collate->cursor))
1370 lr_error (ldfile, _("\
1371 %s: order for `%.*s' already defined at %s:%Zu"),
1372 "LC_COLLATE", (int) lenfrom, buf,
1373 elem->file, elem->line);
1374 continue;
1377 if (elem->name == NULL)
1379 lr_error (ldfile, _("%s: `%s' must be a character"),
1380 "LC_COLLATE", buf);
1381 continue;
1385 if (elem == NULL || (elem->mbs == NULL && elem->wcs == NULL))
1387 /* Search for a character of this name. */
1388 seq = charmap_find_value (charmap, buf, lenfrom);
1389 if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1391 wc = repertoire_find_value (repertoire, buf, lenfrom);
1393 if (seq != NULL)
1394 seq->ucs4 = wc;
1396 else
1397 wc = seq->ucs4;
1399 if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
1400 /* We don't know anything about a character with this
1401 name. XXX Should we warn? */
1402 continue;
1404 if (elem == NULL)
1406 uint32_t wcs[2] = { wc, 0 };
1408 /* We have to allocate an entry. */
1409 elem = new_element (collate,
1410 seq != NULL
1411 ? (char *) seq->bytes : NULL,
1412 seq != NULL ? seq->nbytes : 0,
1413 wc == ILLEGAL_CHAR_VALUE
1414 ? NULL : wcs, buf, lenfrom, 1);
1416 else
1418 /* Update the element. */
1419 if (seq != NULL)
1421 elem->mbs = obstack_copy0 (&collate->mempool,
1422 seq->bytes, seq->nbytes);
1423 elem->nmbs = seq->nbytes;
1426 if (wc != ILLEGAL_CHAR_VALUE)
1428 uint32_t zero = 0;
1430 obstack_grow (&collate->mempool,
1431 &wc, sizeof (uint32_t));
1432 obstack_grow (&collate->mempool,
1433 &zero, sizeof (uint32_t));
1434 elem->wcs = obstack_finish (&collate->mempool);
1435 elem->nwcs = 1;
1439 elem->file = ldfile->fname;
1440 elem->line = ldfile->lineno;
1441 elem->section = collate->current_section;
1444 /* Enqueue the new element. */
1445 elem->last = collate->cursor;
1446 elem->next = collate->cursor->next;
1447 elem->last->next = elem;
1448 if (elem->next != NULL)
1449 elem->next->last = elem;
1450 collate->cursor = elem;
1452 /* Now add the weights. They come from the `ellipsis_weights'
1453 member of `collate'. */
1454 elem->weights = (struct element_list_t *)
1455 obstack_alloc (&collate->mempool,
1456 nrules * sizeof (struct element_list_t));
1457 for (cnt = 0; cnt < nrules; ++cnt)
1458 if (collate->ellipsis_weight.weights[cnt].cnt == 1
1459 && (collate->ellipsis_weight.weights[cnt].w[0]
1460 == ELEMENT_ELLIPSIS2))
1462 elem->weights[cnt].w = (struct element_t **)
1463 obstack_alloc (&collate->mempool,
1464 sizeof (struct element_t *));
1465 elem->weights[cnt].w[0] = elem;
1466 elem->weights[cnt].cnt = 1;
1468 else
1470 /* Simly use the weight from `ellipsis_weight'. */
1471 elem->weights[cnt].w =
1472 collate->ellipsis_weight.weights[cnt].w;
1473 elem->weights[cnt].cnt =
1474 collate->ellipsis_weight.weights[cnt].cnt;
1482 static void
1483 collate_startup (struct linereader *ldfile, struct localedef_t *locale,
1484 struct localedef_t *copy_locale, int ignore_content)
1486 if (!ignore_content && locale->categories[LC_COLLATE].collate == NULL)
1488 struct locale_collate_t *collate;
1490 if (copy_locale == NULL)
1492 collate = locale->categories[LC_COLLATE].collate =
1493 (struct locale_collate_t *)
1494 xcalloc (1, sizeof (struct locale_collate_t));
1496 /* Init the various data structures. */
1497 init_hash (&collate->elem_table, 100);
1498 init_hash (&collate->sym_table, 100);
1499 init_hash (&collate->seq_table, 500);
1500 obstack_init (&collate->mempool);
1502 collate->col_weight_max = -1;
1504 else
1505 /* Reuse the copy_locale's data structures. */
1506 collate = locale->categories[LC_COLLATE].collate =
1507 copy_locale->categories[LC_COLLATE].collate;
1510 ldfile->translate_strings = 0;
1511 ldfile->return_widestr = 0;
1515 void
1516 collate_finish (struct localedef_t *locale, const struct charmap_t *charmap)
1518 /* Now is the time when we can assign the individual collation
1519 values for all the symbols. We have possibly different values
1520 for the wide- and the multibyte-character symbols. This is done
1521 since it might make a difference in the encoding if there is in
1522 some cases no multibyte-character but there are wide-characters.
1523 (The other way around it is not important since theencoded
1524 collation value in the wide-character case is 32 bits wide and
1525 therefore requires no encoding).
1527 The lowest collation value assigned is 2. Zero is reserved for
1528 the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm'
1529 functions and 1 is used to separate the individual passes for the
1530 different rules.
1532 We also have to construct is list with all the bytes/words which
1533 can come first in a sequence, followed by all the elements which
1534 also start with this byte/word. The order is reverse which has
1535 among others the important effect that longer strings are located
1536 first in the list. This is required for the output data since
1537 the algorithm used in `strcoll' etc depends on this.
1539 The multibyte case is easy. We simply sort into an array with
1540 256 elements. */
1541 struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1542 int mbact[nrules];
1543 int wcact;
1544 int mbseqact;
1545 int wcseqact;
1546 struct element_t *runp;
1547 int i;
1548 int need_undefined = 0;
1549 struct section_list *sect;
1550 int ruleidx;
1551 int nr_wide_elems = 0;
1553 if (collate == NULL)
1555 /* No data, no check. */
1556 if (! be_quiet)
1557 WITH_CUR_LOCALE (error (0, 0, _("No definition for %s category found"),
1558 "LC_COLLATE"));
1559 return;
1562 /* If this assertion is hit change the type in `element_t'. */
1563 assert (nrules <= sizeof (runp->used_in_level) * 8);
1565 /* Make sure that the `position' rule is used either in all sections
1566 or in none. */
1567 for (i = 0; i < nrules; ++i)
1568 for (sect = collate->sections; sect != NULL; sect = sect->next)
1569 if (sect != collate->current_section
1570 && sect->rules != NULL
1571 && ((sect->rules[i] & sort_position)
1572 != (collate->current_section->rules[i] & sort_position)))
1574 WITH_CUR_LOCALE (error (0, 0, _("\
1575 %s: `position' must be used for a specific level in all sections or none"),
1576 "LC_COLLATE"));
1577 break;
1580 /* Find out which elements are used at which level. At the same
1581 time we find out whether we have any undefined symbols. */
1582 runp = collate->start;
1583 while (runp != NULL)
1585 if (runp->mbs != NULL)
1587 for (i = 0; i < nrules; ++i)
1589 int j;
1591 for (j = 0; j < runp->weights[i].cnt; ++j)
1592 /* A NULL pointer as the weight means IGNORE. */
1593 if (runp->weights[i].w[j] != NULL)
1595 if (runp->weights[i].w[j]->weights == NULL)
1597 WITH_CUR_LOCALE (error_at_line (0, 0, runp->file,
1598 runp->line,
1599 _("symbol `%s' not defined"),
1600 runp->weights[i].w[j]->name));
1602 need_undefined = 1;
1603 runp->weights[i].w[j] = &collate->undefined;
1605 else
1606 /* Set the bit for the level. */
1607 runp->weights[i].w[j]->used_in_level |= 1 << i;
1612 /* Up to the next entry. */
1613 runp = runp->next;
1616 /* Walk through the list of defined sequences and assign weights. Also
1617 create the data structure which will allow generating the single byte
1618 character based tables.
1620 Since at each time only the weights for each of the rules are
1621 only compared to other weights for this rule it is possible to
1622 assign more compact weight values than simply counting all
1623 weights in sequence. We can assign weights from 3, one for each
1624 rule individually and only for those elements, which are actually
1625 used for this rule.
1627 Why is this important? It is not for the wide char table. But
1628 it is for the singlebyte output since here larger numbers have to
1629 be encoded to make it possible to emit the value as a byte
1630 string. */
1631 for (i = 0; i < nrules; ++i)
1632 mbact[i] = 2;
1633 wcact = 2;
1634 mbseqact = 0;
1635 wcseqact = 0;
1636 runp = collate->start;
1637 while (runp != NULL)
1639 /* Determine the order. */
1640 if (runp->used_in_level != 0)
1642 runp->mborder = (int *) obstack_alloc (&collate->mempool,
1643 nrules * sizeof (int));
1645 for (i = 0; i < nrules; ++i)
1646 if ((runp->used_in_level & (1 << i)) != 0)
1647 runp->mborder[i] = mbact[i]++;
1648 else
1649 runp->mborder[i] = 0;
1652 if (runp->mbs != NULL)
1654 struct element_t **eptr;
1655 struct element_t *lastp = NULL;
1657 /* Find the point where to insert in the list. */
1658 eptr = &collate->mbheads[((unsigned char *) runp->mbs)[0]];
1659 while (*eptr != NULL)
1661 if ((*eptr)->nmbs < runp->nmbs)
1662 break;
1664 if ((*eptr)->nmbs == runp->nmbs)
1666 int c = memcmp ((*eptr)->mbs, runp->mbs, runp->nmbs);
1668 if (c == 0)
1670 /* This should not happen. It means that we have
1671 to symbols with the same byte sequence. It is
1672 of course an error. */
1673 WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr)->file,
1674 (*eptr)->line,
1675 _("\
1676 symbol `%s' has the same encoding as"), (*eptr)->name);
1677 error_at_line (0, 0, runp->file,
1678 runp->line,
1679 _("symbol `%s'"),
1680 runp->name));
1681 goto dont_insert;
1683 else if (c < 0)
1684 /* Insert it here. */
1685 break;
1688 /* To the next entry. */
1689 lastp = *eptr;
1690 eptr = &(*eptr)->mbnext;
1693 /* Set the pointers. */
1694 runp->mbnext = *eptr;
1695 runp->mblast = lastp;
1696 if (*eptr != NULL)
1697 (*eptr)->mblast = runp;
1698 *eptr = runp;
1699 dont_insert:
1703 if (runp->used_in_level)
1705 runp->wcorder = wcact++;
1707 /* We take the opportunity to count the elements which have
1708 wide characters. */
1709 ++nr_wide_elems;
1712 if (runp->is_character)
1714 if (runp->nmbs == 1)
1715 collate->mbseqorder[((unsigned char *) runp->mbs)[0]] = mbseqact++;
1717 runp->wcseqorder = wcseqact++;
1719 else if (runp->mbs != NULL && runp->weights != NULL)
1720 /* This is for collation elements. */
1721 runp->wcseqorder = wcseqact++;
1723 /* Up to the next entry. */
1724 runp = runp->next;
1727 /* Find out whether any of the `mbheads' entries is unset. In this
1728 case we use the UNDEFINED entry. */
1729 for (i = 1; i < 256; ++i)
1730 if (collate->mbheads[i] == NULL)
1732 need_undefined = 1;
1733 collate->mbheads[i] = &collate->undefined;
1736 /* Now to the wide character case. */
1737 collate->wcheads.p = 6;
1738 collate->wcheads.q = 10;
1739 wchead_table_init (&collate->wcheads);
1741 collate->wcseqorder.p = 6;
1742 collate->wcseqorder.q = 10;
1743 collseq_table_init (&collate->wcseqorder);
1745 /* Start adding. */
1746 runp = collate->start;
1747 while (runp != NULL)
1749 if (runp->wcs != NULL)
1751 struct element_t *e;
1752 struct element_t **eptr;
1753 struct element_t *lastp;
1755 /* Insert the collation sequence value. */
1756 if (runp->is_character)
1757 collseq_table_add (&collate->wcseqorder, runp->wcs[0],
1758 runp->wcseqorder);
1760 /* Find the point where to insert in the list. */
1761 e = wchead_table_get (&collate->wcheads, runp->wcs[0]);
1762 eptr = &e;
1763 lastp = NULL;
1764 while (*eptr != NULL)
1766 if ((*eptr)->nwcs < runp->nwcs)
1767 break;
1769 if ((*eptr)->nwcs == runp->nwcs)
1771 int c = wmemcmp ((wchar_t *) (*eptr)->wcs,
1772 (wchar_t *) runp->wcs, runp->nwcs);
1774 if (c == 0)
1776 /* This should not happen. It means that we have
1777 two symbols with the same byte sequence. It is
1778 of course an error. */
1779 WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr)->file,
1780 (*eptr)->line,
1781 _("\
1782 symbol `%s' has the same encoding as"), (*eptr)->name);
1783 error_at_line (0, 0, runp->file,
1784 runp->line,
1785 _("symbol `%s'"),
1786 runp->name));
1787 goto dont_insertwc;
1789 else if (c < 0)
1790 /* Insert it here. */
1791 break;
1794 /* To the next entry. */
1795 lastp = *eptr;
1796 eptr = &(*eptr)->wcnext;
1799 /* Set the pointers. */
1800 runp->wcnext = *eptr;
1801 runp->wclast = lastp;
1802 if (*eptr != NULL)
1803 (*eptr)->wclast = runp;
1804 *eptr = runp;
1805 if (eptr == &e)
1806 wchead_table_add (&collate->wcheads, runp->wcs[0], e);
1807 dont_insertwc:
1811 /* Up to the next entry. */
1812 runp = runp->next;
1815 collseq_table_finalize (&collate->wcseqorder);
1817 /* Now determine whether the UNDEFINED entry is needed and if yes,
1818 whether it was defined. */
1819 collate->undefined.used_in_level = need_undefined ? ~0ul : 0;
1820 if (collate->undefined.file == NULL)
1822 if (need_undefined)
1824 /* This seems not to be enforced by recent standards. Don't
1825 emit an error, simply append UNDEFINED at the end. */
1826 if (0)
1827 WITH_CUR_LOCALE (error (0, 0, _("no definition of `UNDEFINED'")));
1829 /* Add UNDEFINED at the end. */
1830 collate->undefined.mborder =
1831 (int *) obstack_alloc (&collate->mempool, nrules * sizeof (int));
1833 for (i = 0; i < nrules; ++i)
1834 collate->undefined.mborder[i] = mbact[i]++;
1837 /* In any case we will need the definition for the wide character
1838 case. But we will not complain that it is missing since the
1839 specification strangely enough does not seem to account for
1840 this. */
1841 collate->undefined.wcorder = wcact++;
1844 /* Finally, try to unify the rules for the sections. Whenever the rules
1845 for a section are the same as those for another section give the
1846 ruleset the same index. Since there are never many section we can
1847 use an O(n^2) algorithm here. */
1848 sect = collate->sections;
1849 while (sect != NULL && sect->rules == NULL)
1850 sect = sect->next;
1852 /* Bail out if we have no sections because of earlier errors. */
1853 if (sect == NULL)
1855 WITH_CUR_LOCALE (error (EXIT_FAILURE, 0,
1856 _("too many errors; giving up")));
1857 return;
1860 ruleidx = 0;
1863 struct section_list *osect = collate->sections;
1865 while (osect != sect)
1866 if (osect->rules != NULL
1867 && memcmp (osect->rules, sect->rules,
1868 nrules * sizeof (osect->rules[0])) == 0)
1869 break;
1870 else
1871 osect = osect->next;
1873 if (osect == sect)
1874 sect->ruleidx = ruleidx++;
1875 else
1876 sect->ruleidx = osect->ruleidx;
1878 /* Next section. */
1880 sect = sect->next;
1881 while (sect != NULL && sect->rules == NULL);
1883 while (sect != NULL);
1884 /* We are currently not prepared for more than 128 rulesets. But this
1885 should never really be a problem. */
1886 assert (ruleidx <= 128);
1890 static int32_t
1891 output_weight (struct obstack *pool, struct locale_collate_t *collate,
1892 struct element_t *elem)
1894 size_t cnt;
1895 int32_t retval;
1897 /* Optimize the use of UNDEFINED. */
1898 if (elem == &collate->undefined)
1899 /* The weights are already inserted. */
1900 return 0;
1902 /* This byte can start exactly one collation element and this is
1903 a single byte. We can directly give the index to the weights. */
1904 retval = obstack_object_size (pool);
1906 /* Construct the weight. */
1907 for (cnt = 0; cnt < nrules; ++cnt)
1909 char buf[elem->weights[cnt].cnt * 7];
1910 int len = 0;
1911 int i;
1913 for (i = 0; i < elem->weights[cnt].cnt; ++i)
1914 /* Encode the weight value. We do nothing for IGNORE entries. */
1915 if (elem->weights[cnt].w[i] != NULL)
1916 len += utf8_encode (&buf[len],
1917 elem->weights[cnt].w[i]->mborder[cnt]);
1919 /* And add the buffer content. */
1920 obstack_1grow (pool, len);
1921 obstack_grow (pool, buf, len);
1924 return retval | ((elem->section->ruleidx & 0x7f) << 24);
1928 static int32_t
1929 output_weightwc (struct obstack *pool, struct locale_collate_t *collate,
1930 struct element_t *elem)
1932 size_t cnt;
1933 int32_t retval;
1935 /* Optimize the use of UNDEFINED. */
1936 if (elem == &collate->undefined)
1937 /* The weights are already inserted. */
1938 return 0;
1940 /* This byte can start exactly one collation element and this is
1941 a single byte. We can directly give the index to the weights. */
1942 retval = obstack_object_size (pool) / sizeof (int32_t);
1944 /* Construct the weight. */
1945 for (cnt = 0; cnt < nrules; ++cnt)
1947 int32_t buf[elem->weights[cnt].cnt];
1948 int i;
1949 int32_t j;
1951 for (i = 0, j = 0; i < elem->weights[cnt].cnt; ++i)
1952 if (elem->weights[cnt].w[i] != NULL)
1953 buf[j++] = elem->weights[cnt].w[i]->wcorder;
1955 /* And add the buffer content. */
1956 obstack_int32_grow (pool, j);
1958 obstack_grow (pool, buf, j * sizeof (int32_t));
1961 return retval | ((elem->section->ruleidx & 0x7f) << 24);
1964 /* If localedef is every threaded, this would need to be __thread var. */
1965 static struct
1967 struct obstack *weightpool;
1968 struct obstack *extrapool;
1969 struct obstack *indpool;
1970 struct locale_collate_t *collate;
1971 struct collidx_table *tablewc;
1972 } atwc;
1974 static void add_to_tablewc (uint32_t ch, struct element_t *runp);
1976 static void
1977 add_to_tablewc (uint32_t ch, struct element_t *runp)
1979 if (runp->wcnext == NULL && runp->nwcs == 1)
1981 int32_t weigthidx = output_weightwc (atwc.weightpool, atwc.collate,
1982 runp);
1983 collidx_table_add (atwc.tablewc, ch, weigthidx);
1985 else
1987 /* As for the singlebyte table, we recognize sequences and
1988 compress them. */
1990 collidx_table_add (atwc.tablewc, ch,
1991 -(obstack_object_size (atwc.extrapool)
1992 / sizeof (uint32_t)));
1996 /* Store the current index in the weight table. We know that
1997 the current position in the `extrapool' is aligned on a
1998 32-bit address. */
1999 int32_t weightidx;
2000 int added;
2002 /* Find out wether this is a single entry or we have more than
2003 one consecutive entry. */
2004 if (runp->wcnext != NULL
2005 && runp->nwcs == runp->wcnext->nwcs
2006 && wmemcmp ((wchar_t *) runp->wcs,
2007 (wchar_t *)runp->wcnext->wcs,
2008 runp->nwcs - 1) == 0
2009 && (runp->wcs[runp->nwcs - 1]
2010 == runp->wcnext->wcs[runp->nwcs - 1] + 1))
2012 int i;
2013 struct element_t *series_startp = runp;
2014 struct element_t *curp;
2016 /* Now add first the initial byte sequence. */
2017 added = (1 + 1 + 2 * (runp->nwcs - 1)) * sizeof (int32_t);
2018 if (sizeof (int32_t) == sizeof (int))
2019 obstack_make_room (atwc.extrapool, added);
2021 /* More than one consecutive entry. We mark this by having
2022 a negative index into the indirect table. */
2023 obstack_int32_grow_fast (atwc.extrapool,
2024 -(obstack_object_size (atwc.indpool)
2025 / sizeof (int32_t)));
2026 obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - 1);
2029 runp = runp->wcnext;
2030 while (runp->wcnext != NULL
2031 && runp->nwcs == runp->wcnext->nwcs
2032 && wmemcmp ((wchar_t *) runp->wcs,
2033 (wchar_t *)runp->wcnext->wcs,
2034 runp->nwcs - 1) == 0
2035 && (runp->wcs[runp->nwcs - 1]
2036 == runp->wcnext->wcs[runp->nwcs - 1] + 1));
2038 /* Now walk backward from here to the beginning. */
2039 curp = runp;
2041 for (i = 1; i < runp->nwcs; ++i)
2042 obstack_int32_grow_fast (atwc.extrapool, curp->wcs[i]);
2044 /* Now find the end of the consecutive sequence and
2045 add all the indeces in the indirect pool. */
2048 weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2049 curp);
2050 obstack_int32_grow (atwc.indpool, weightidx);
2052 curp = curp->wclast;
2054 while (curp != series_startp);
2056 /* Add the final weight. */
2057 weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2058 curp);
2059 obstack_int32_grow (atwc.indpool, weightidx);
2061 /* And add the end byte sequence. Without length this
2062 time. */
2063 for (i = 1; i < curp->nwcs; ++i)
2064 obstack_int32_grow (atwc.extrapool, curp->wcs[i]);
2066 else
2068 /* A single entry. Simply add the index and the length and
2069 string (except for the first character which is already
2070 tested for). */
2071 int i;
2073 /* Output the weight info. */
2074 weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2075 runp);
2077 added = (1 + 1 + runp->nwcs - 1) * sizeof (int32_t);
2078 if (sizeof (int) == sizeof (int32_t))
2079 obstack_make_room (atwc.extrapool, added);
2081 obstack_int32_grow_fast (atwc.extrapool, weightidx);
2082 obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - 1);
2083 for (i = 1; i < runp->nwcs; ++i)
2084 obstack_int32_grow_fast (atwc.extrapool, runp->wcs[i]);
2087 /* Next entry. */
2088 runp = runp->wcnext;
2090 while (runp != NULL);
2094 void
2095 collate_output (struct localedef_t *locale, const struct charmap_t *charmap,
2096 const char *output_path)
2098 struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
2099 const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE);
2100 struct iovec iov[2 + nelems];
2101 struct locale_file data;
2102 uint32_t idx[nelems];
2103 size_t cnt;
2104 size_t ch;
2105 int32_t tablemb[256];
2106 struct obstack weightpool;
2107 struct obstack extrapool;
2108 struct obstack indirectpool;
2109 struct section_list *sect;
2110 struct collidx_table tablewc;
2111 uint32_t elem_size;
2112 uint32_t *elem_table;
2113 int i;
2114 struct element_t *runp;
2116 data.magic = LIMAGIC (LC_COLLATE);
2117 data.n = nelems;
2118 iov[0].iov_base = (void *) &data;
2119 iov[0].iov_len = sizeof (data);
2121 iov[1].iov_base = (void *) idx;
2122 iov[1].iov_len = sizeof (idx);
2124 idx[0] = iov[0].iov_len + iov[1].iov_len;
2125 cnt = 0;
2127 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_NRULES));
2128 iov[2 + cnt].iov_base = &nrules;
2129 iov[2 + cnt].iov_len = sizeof (uint32_t);
2130 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2131 ++cnt;
2133 /* If we have no LC_COLLATE data emit only the number of rules as zero. */
2134 if (collate == NULL)
2136 int32_t dummy = 0;
2138 while (cnt < _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE))
2140 /* The words have to be handled specially. */
2141 if (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB))
2143 iov[2 + cnt].iov_base = &dummy;
2144 iov[2 + cnt].iov_len = sizeof (int32_t);
2146 else
2148 iov[2 + cnt].iov_base = NULL;
2149 iov[2 + cnt].iov_len = 0;
2152 if (cnt + 1 < _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE))
2153 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2154 ++cnt;
2157 assert (cnt == _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE));
2159 write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", 2 + cnt, iov);
2161 return;
2164 obstack_init (&weightpool);
2165 obstack_init (&extrapool);
2166 obstack_init (&indirectpool);
2168 /* Since we are using the sign of an integer to mark indirection the
2169 offsets in the arrays we are indirectly referring to must not be
2170 zero since -0 == 0. Therefore we add a bit of dummy content. */
2171 obstack_int32_grow (&extrapool, 0);
2172 obstack_int32_grow (&indirectpool, 0);
2174 /* Prepare the ruleset table. */
2175 for (sect = collate->sections, i = 0; sect != NULL; sect = sect->next)
2176 if (sect->rules != NULL && sect->ruleidx == i)
2178 int j;
2180 obstack_make_room (&weightpool, nrules);
2182 for (j = 0; j < nrules; ++j)
2183 obstack_1grow_fast (&weightpool, sect->rules[j]);
2184 ++i;
2186 /* And align the output. */
2187 i = (nrules * i) % __alignof__ (int32_t);
2188 if (i > 0)
2190 obstack_1grow (&weightpool, '\0');
2191 while (++i < __alignof__ (int32_t));
2193 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_RULESETS));
2194 iov[2 + cnt].iov_len = obstack_object_size (&weightpool);
2195 iov[2 + cnt].iov_base = obstack_finish (&weightpool);
2196 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2197 ++cnt;
2199 /* Generate the 8-bit table. Walk through the lists of sequences
2200 starting with the same byte and add them one after the other to
2201 the table. In case we have more than one sequence starting with
2202 the same byte we have to use extra indirection.
2204 First add a record for the NUL byte. This entry will never be used
2205 so it does not matter. */
2206 tablemb[0] = 0;
2208 /* Now insert the `UNDEFINED' value if it is used. Since this value
2209 will probably be used more than once it is good to store the
2210 weights only once. */
2211 if (collate->undefined.used_in_level != 0)
2212 output_weight (&weightpool, collate, &collate->undefined);
2214 for (ch = 1; ch < 256; ++ch)
2215 if (collate->mbheads[ch]->mbnext == NULL
2216 && collate->mbheads[ch]->nmbs <= 1)
2218 tablemb[ch] = output_weight (&weightpool, collate,
2219 collate->mbheads[ch]);
2221 else
2223 /* The entries in the list are sorted by length and then
2224 alphabetically. This is the order in which we will add the
2225 elements to the collation table. This allows simply walking
2226 the table in sequence and stopping at the first matching
2227 entry. Since the longer sequences are coming first in the
2228 list they have the possibility to match first, just as it
2229 has to be. In the worst case we are walking to the end of
2230 the list where we put, if no singlebyte sequence is defined
2231 in the locale definition, the weights for UNDEFINED.
2233 To reduce the length of the search list we compress them a bit.
2234 This happens by collecting sequences of consecutive byte
2235 sequences in one entry (having and begin and end byte sequence)
2236 and add only one index into the weight table. We can find the
2237 consecutive entries since they are also consecutive in the list. */
2238 struct element_t *runp = collate->mbheads[ch];
2239 struct element_t *lastp;
2241 assert ((obstack_object_size (&extrapool)
2242 & (__alignof__ (int32_t) - 1)) == 0);
2244 tablemb[ch] = -obstack_object_size (&extrapool);
2248 /* Store the current index in the weight table. We know that
2249 the current position in the `extrapool' is aligned on a
2250 32-bit address. */
2251 int32_t weightidx;
2252 int added;
2254 /* Find out wether this is a single entry or we have more than
2255 one consecutive entry. */
2256 if (runp->mbnext != NULL
2257 && runp->nmbs == runp->mbnext->nmbs
2258 && memcmp (runp->mbs, runp->mbnext->mbs, runp->nmbs - 1) == 0
2259 && (runp->mbs[runp->nmbs - 1]
2260 == runp->mbnext->mbs[runp->nmbs - 1] + 1))
2262 int i;
2263 struct element_t *series_startp = runp;
2264 struct element_t *curp;
2266 /* Compute how much space we will need. */
2267 added = ((sizeof (int32_t) + 1 + 2 * (runp->nmbs - 1)
2268 + __alignof__ (int32_t) - 1)
2269 & ~(__alignof__ (int32_t) - 1));
2270 assert ((obstack_object_size (&extrapool)
2271 & (__alignof__ (int32_t) - 1)) == 0);
2272 obstack_make_room (&extrapool, added);
2274 /* More than one consecutive entry. We mark this by having
2275 a negative index into the indirect table. */
2276 obstack_int32_grow_fast (&extrapool,
2277 -(obstack_object_size (&indirectpool)
2278 / sizeof (int32_t)));
2280 /* Now search first the end of the series. */
2282 runp = runp->mbnext;
2283 while (runp->mbnext != NULL
2284 && runp->nmbs == runp->mbnext->nmbs
2285 && memcmp (runp->mbs, runp->mbnext->mbs,
2286 runp->nmbs - 1) == 0
2287 && (runp->mbs[runp->nmbs - 1]
2288 == runp->mbnext->mbs[runp->nmbs - 1] + 1));
2290 /* Now walk backward from here to the beginning. */
2291 curp = runp;
2293 assert (runp->nmbs <= 256);
2294 obstack_1grow_fast (&extrapool, curp->nmbs - 1);
2295 for (i = 1; i < curp->nmbs; ++i)
2296 obstack_1grow_fast (&extrapool, curp->mbs[i]);
2298 /* Now find the end of the consecutive sequence and
2299 add all the indeces in the indirect pool. */
2302 weightidx = output_weight (&weightpool, collate, curp);
2303 obstack_int32_grow (&indirectpool, weightidx);
2305 curp = curp->mblast;
2307 while (curp != series_startp);
2309 /* Add the final weight. */
2310 weightidx = output_weight (&weightpool, collate, curp);
2311 obstack_int32_grow (&indirectpool, weightidx);
2313 /* And add the end byte sequence. Without length this
2314 time. */
2315 for (i = 1; i < curp->nmbs; ++i)
2316 obstack_1grow_fast (&extrapool, curp->mbs[i]);
2318 else
2320 /* A single entry. Simply add the index and the length and
2321 string (except for the first character which is already
2322 tested for). */
2323 int i;
2325 /* Output the weight info. */
2326 weightidx = output_weight (&weightpool, collate, runp);
2328 added = ((sizeof (int32_t) + 1 + runp->nmbs - 1
2329 + __alignof__ (int32_t) - 1)
2330 & ~(__alignof__ (int32_t) - 1));
2331 assert ((obstack_object_size (&extrapool)
2332 & (__alignof__ (int32_t) - 1)) == 0);
2333 obstack_make_room (&extrapool, added);
2335 obstack_int32_grow_fast (&extrapool, weightidx);
2336 assert (runp->nmbs <= 256);
2337 obstack_1grow_fast (&extrapool, runp->nmbs - 1);
2339 for (i = 1; i < runp->nmbs; ++i)
2340 obstack_1grow_fast (&extrapool, runp->mbs[i]);
2343 /* Add alignment bytes if necessary. */
2344 while ((obstack_object_size (&extrapool)
2345 & (__alignof__ (int32_t) - 1)) != 0)
2346 obstack_1grow_fast (&extrapool, '\0');
2348 /* Next entry. */
2349 lastp = runp;
2350 runp = runp->mbnext;
2352 while (runp != NULL);
2354 assert ((obstack_object_size (&extrapool)
2355 & (__alignof__ (int32_t) - 1)) == 0);
2357 /* If the final entry in the list is not a single character we
2358 add an UNDEFINED entry here. */
2359 if (lastp->nmbs != 1)
2361 int added = ((sizeof (int32_t) + 1 + 1 + __alignof__ (int32_t) - 1)
2362 & ~(__alignof__ (int32_t) - 1));
2363 obstack_make_room (&extrapool, added);
2365 obstack_int32_grow_fast (&extrapool, 0);
2366 /* XXX What rule? We just pick the first. */
2367 obstack_1grow_fast (&extrapool, 0);
2368 /* Length is zero. */
2369 obstack_1grow_fast (&extrapool, 0);
2371 /* Add alignment bytes if necessary. */
2372 while ((obstack_object_size (&extrapool)
2373 & (__alignof__ (int32_t) - 1)) != 0)
2374 obstack_1grow_fast (&extrapool, '\0');
2378 /* Add padding to the tables if necessary. */
2379 while ((obstack_object_size (&weightpool) & (__alignof__ (int32_t) - 1))
2380 != 0)
2381 obstack_1grow (&weightpool, 0);
2383 /* Now add the four tables. */
2384 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_TABLEMB));
2385 iov[2 + cnt].iov_base = tablemb;
2386 iov[2 + cnt].iov_len = sizeof (tablemb);
2387 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2388 assert ((iov[2 + cnt].iov_len & (__alignof__ (int32_t) - 1)) == 0);
2389 ++cnt;
2391 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_WEIGHTMB));
2392 iov[2 + cnt].iov_len = obstack_object_size (&weightpool);
2393 iov[2 + cnt].iov_base = obstack_finish (&weightpool);
2394 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2395 ++cnt;
2397 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_EXTRAMB));
2398 iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
2399 iov[2 + cnt].iov_base = obstack_finish (&extrapool);
2400 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2401 ++cnt;
2403 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_INDIRECTMB));
2404 iov[2 + cnt].iov_len = obstack_object_size (&indirectpool);
2405 iov[2 + cnt].iov_base = obstack_finish (&indirectpool);
2406 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2407 assert ((iov[2 + cnt].iov_len & (__alignof__ (int32_t) - 1)) == 0);
2408 ++cnt;
2411 /* Now the same for the wide character table. We need to store some
2412 more information here. */
2413 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_GAP1));
2414 iov[2 + cnt].iov_base = NULL;
2415 iov[2 + cnt].iov_len = 0;
2416 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2417 assert (idx[cnt] % __alignof__ (int32_t) == 0);
2418 ++cnt;
2420 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_GAP2));
2421 iov[2 + cnt].iov_base = NULL;
2422 iov[2 + cnt].iov_len = 0;
2423 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2424 assert (idx[cnt] % __alignof__ (int32_t) == 0);
2425 ++cnt;
2427 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_GAP3));
2428 iov[2 + cnt].iov_base = NULL;
2429 iov[2 + cnt].iov_len = 0;
2430 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2431 assert (idx[cnt] % __alignof__ (int32_t) == 0);
2432 ++cnt;
2434 /* Since we are using the sign of an integer to mark indirection the
2435 offsets in the arrays we are indirectly referring to must not be
2436 zero since -0 == 0. Therefore we add a bit of dummy content. */
2437 obstack_int32_grow (&extrapool, 0);
2438 obstack_int32_grow (&indirectpool, 0);
2440 /* Now insert the `UNDEFINED' value if it is used. Since this value
2441 will probably be used more than once it is good to store the
2442 weights only once. */
2443 if (output_weightwc (&weightpool, collate, &collate->undefined) != 0)
2444 abort ();
2446 /* Generate the table. Walk through the lists of sequences starting
2447 with the same wide character and add them one after the other to
2448 the table. In case we have more than one sequence starting with
2449 the same byte we have to use extra indirection. */
2450 tablewc.p = 6;
2451 tablewc.q = 10;
2452 collidx_table_init (&tablewc);
2454 atwc.weightpool = &weightpool;
2455 atwc.extrapool = &extrapool;
2456 atwc.indpool = &indirectpool;
2457 atwc.collate = collate;
2458 atwc.tablewc = &tablewc;
2460 wchead_table_iterate (&collate->wcheads, add_to_tablewc);
2462 memset (&atwc, 0, sizeof (atwc));
2464 collidx_table_finalize (&tablewc);
2466 /* Now add the four tables. */
2467 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_TABLEWC));
2468 iov[2 + cnt].iov_base = tablewc.result;
2469 iov[2 + cnt].iov_len = tablewc.result_size;
2470 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2471 assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2472 assert (idx[cnt] % __alignof__ (int32_t) == 0);
2473 ++cnt;
2475 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_WEIGHTWC));
2476 iov[2 + cnt].iov_len = obstack_object_size (&weightpool);
2477 iov[2 + cnt].iov_base = obstack_finish (&weightpool);
2478 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2479 assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2480 assert (idx[cnt] % __alignof__ (int32_t) == 0);
2481 ++cnt;
2483 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_EXTRAWC));
2484 iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
2485 iov[2 + cnt].iov_base = obstack_finish (&extrapool);
2486 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2487 assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2488 assert (idx[cnt] % __alignof__ (int32_t) == 0);
2489 ++cnt;
2491 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_INDIRECTWC));
2492 iov[2 + cnt].iov_len = obstack_object_size (&indirectpool);
2493 iov[2 + cnt].iov_base = obstack_finish (&indirectpool);
2494 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2495 assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2496 assert (idx[cnt] % __alignof__ (int32_t) == 0);
2497 ++cnt;
2500 /* Finally write the table with collation element names out. It is
2501 a hash table with a simple function which gets the name of the
2502 character as the input. One character might have many names. The
2503 value associated with the name is an index into the weight table
2504 where we are then interested in the first-level weight value.
2506 To determine how large the table should be we are counting the
2507 elements have to put in. Since we are using internal chaining
2508 using a secondary hash function we have to make the table a bit
2509 larger to avoid extremely long search times. We can achieve
2510 good results with a 40% larger table than there are entries. */
2511 elem_size = 0;
2512 runp = collate->start;
2513 while (runp != NULL)
2515 if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
2516 /* Yep, the element really counts. */
2517 ++elem_size;
2519 runp = runp->next;
2521 /* Add 40% and find the next prime number. */
2522 elem_size = next_prime (elem_size * 1.4);
2524 /* Allocate the table. Each entry consists of two words: the hash
2525 value and an index in a secondary table which provides the index
2526 into the weight table and the string itself (so that a match can
2527 be determined). */
2528 elem_table = (uint32_t *) obstack_alloc (&extrapool,
2529 elem_size * 2 * sizeof (uint32_t));
2530 memset (elem_table, '\0', elem_size * 2 * sizeof (uint32_t));
2532 /* Now add the elements. */
2533 runp = collate->start;
2534 while (runp != NULL)
2536 if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
2538 /* Compute the hash value of the name. */
2539 uint32_t namelen = strlen (runp->name);
2540 uint32_t hash = elem_hash (runp->name, namelen);
2541 size_t idx = hash % elem_size;
2542 #ifndef NDEBUG
2543 size_t start_idx = idx;
2544 #endif
2546 if (elem_table[idx * 2] != 0)
2548 /* The spot is already taken. Try iterating using the value
2549 from the secondary hashing function. */
2550 size_t iter = hash % (elem_size - 2) + 1;
2554 idx += iter;
2555 if (idx >= elem_size)
2556 idx -= elem_size;
2557 assert (idx != start_idx);
2559 while (elem_table[idx * 2] != 0);
2561 /* This is the spot where we will insert the value. */
2562 elem_table[idx * 2] = hash;
2563 elem_table[idx * 2 + 1] = obstack_object_size (&extrapool);
2565 /* The string itself including length. */
2566 obstack_1grow (&extrapool, namelen);
2567 obstack_grow (&extrapool, runp->name, namelen);
2569 /* And the multibyte representation. */
2570 obstack_1grow (&extrapool, runp->nmbs);
2571 obstack_grow (&extrapool, runp->mbs, runp->nmbs);
2573 /* And align again to 32 bits. */
2574 if ((1 + namelen + 1 + runp->nmbs) % sizeof (int32_t) != 0)
2575 obstack_grow (&extrapool, "\0\0",
2576 (sizeof (int32_t)
2577 - ((1 + namelen + 1 + runp->nmbs)
2578 % sizeof (int32_t))));
2580 /* Now some 32-bit values: multibyte collation sequence,
2581 wide char string (including length), and wide char
2582 collation sequence. */
2583 obstack_int32_grow (&extrapool, runp->mbseqorder);
2585 obstack_int32_grow (&extrapool, runp->nwcs);
2586 obstack_grow (&extrapool, runp->wcs,
2587 runp->nwcs * sizeof (uint32_t));
2589 obstack_int32_grow (&extrapool, runp->wcseqorder);
2592 runp = runp->next;
2595 /* Prepare to write out this data. */
2596 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB));
2597 iov[2 + cnt].iov_base = &elem_size;
2598 iov[2 + cnt].iov_len = sizeof (int32_t);
2599 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2600 assert (idx[cnt] % __alignof__ (int32_t) == 0);
2601 ++cnt;
2603 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_TABLEMB));
2604 iov[2 + cnt].iov_base = elem_table;
2605 iov[2 + cnt].iov_len = elem_size * 2 * sizeof (int32_t);
2606 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2607 assert (idx[cnt] % __alignof__ (int32_t) == 0);
2608 ++cnt;
2610 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_EXTRAMB));
2611 iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
2612 iov[2 + cnt].iov_base = obstack_finish (&extrapool);
2613 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2614 ++cnt;
2616 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQMB));
2617 iov[2 + cnt].iov_base = collate->mbseqorder;
2618 iov[2 + cnt].iov_len = 256;
2619 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2620 ++cnt;
2622 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQWC));
2623 iov[2 + cnt].iov_base = collate->wcseqorder.result;
2624 iov[2 + cnt].iov_len = collate->wcseqorder.result_size;
2625 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2626 assert (idx[cnt] % __alignof__ (int32_t) == 0);
2627 ++cnt;
2629 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_CODESET));
2630 iov[2 + cnt].iov_base = (void *) charmap->code_set_name;
2631 iov[2 + cnt].iov_len = strlen (iov[2 + cnt].iov_base) + 1;
2632 ++cnt;
2634 assert (cnt == _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE));
2636 write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", 2 + cnt, iov);
2638 obstack_free (&weightpool, NULL);
2639 obstack_free (&extrapool, NULL);
2640 obstack_free (&indirectpool, NULL);
2644 static enum token_t
2645 skip_to (struct linereader *ldfile, struct locale_collate_t *collate,
2646 const struct charmap_t *charmap, int to_endif)
2648 while (1)
2650 struct token *now = lr_token (ldfile, charmap, NULL, NULL, 0);
2651 enum token_t nowtok = now->tok;
2653 if (nowtok == tok_eof || nowtok == tok_end)
2654 return nowtok;
2656 if (nowtok == tok_ifdef || nowtok == tok_ifndef)
2658 lr_error (ldfile, _("%s: nested conditionals not supported"),
2659 "LC_COLLATE");
2660 nowtok = skip_to (ldfile, collate, charmap, tok_endif);
2661 if (nowtok == tok_eof || nowtok == tok_end)
2662 return nowtok;
2664 else if (nowtok == tok_endif || (!to_endif && nowtok == tok_else))
2666 lr_ignore_rest (ldfile, 1);
2667 return nowtok;
2669 else if (!to_endif && (nowtok == tok_elifdef || nowtok == tok_elifndef))
2671 /* Do not read the rest of the line. */
2672 return nowtok;
2674 else if (nowtok == tok_else)
2676 lr_error (ldfile, _("%s: more then one 'else'"), "LC_COLLATE");
2679 lr_ignore_rest (ldfile, 0);
2684 void
2685 collate_read (struct linereader *ldfile, struct localedef_t *result,
2686 const struct charmap_t *charmap, const char *repertoire_name,
2687 int ignore_content)
2689 struct repertoire_t *repertoire = NULL;
2690 struct locale_collate_t *collate;
2691 struct token *now;
2692 struct token *arg = NULL;
2693 enum token_t nowtok;
2694 enum token_t was_ellipsis = tok_none;
2695 struct localedef_t *copy_locale = NULL;
2696 /* Parsing state:
2697 0 - start
2698 1 - between `order-start' and `order-end'
2699 2 - after `order-end'
2700 3 - after `reorder-after', waiting for `reorder-end'
2701 4 - after `reorder-end'
2702 5 - after `reorder-sections-after', waiting for `reorder-sections-end'
2703 6 - after `reorder-sections-end'
2705 int state = 0;
2707 /* Get the repertoire we have to use. */
2708 if (repertoire_name != NULL)
2709 repertoire = repertoire_read (repertoire_name);
2711 /* The rest of the line containing `LC_COLLATE' must be free. */
2712 lr_ignore_rest (ldfile, 1);
2714 while (1)
2718 now = lr_token (ldfile, charmap, result, NULL, verbose);
2719 nowtok = now->tok;
2721 while (nowtok == tok_eol);
2723 if (nowtok != tok_define)
2724 break;
2726 if (ignore_content)
2727 lr_ignore_rest (ldfile, 0);
2728 else
2730 arg = lr_token (ldfile, charmap, result, NULL, verbose);
2731 if (arg->tok != tok_ident)
2732 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2733 else
2735 /* Simply add the new symbol. */
2736 struct name_list *newsym = xmalloc (sizeof (*newsym)
2737 + arg->val.str.lenmb + 1);
2738 memcpy (newsym->str, arg->val.str.startmb, arg->val.str.lenmb);
2739 newsym->str[arg->val.str.lenmb] = '\0';
2740 newsym->next = defined;
2741 defined = newsym;
2743 lr_ignore_rest (ldfile, 1);
2748 if (nowtok == tok_copy)
2750 now = lr_token (ldfile, charmap, result, NULL, verbose);
2751 if (now->tok != tok_string)
2753 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2755 skip_category:
2757 now = lr_token (ldfile, charmap, result, NULL, verbose);
2758 while (now->tok != tok_eof && now->tok != tok_end);
2760 if (now->tok != tok_eof
2761 || (now = lr_token (ldfile, charmap, result, NULL, verbose),
2762 now->tok == tok_eof))
2763 lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
2764 else if (now->tok != tok_lc_collate)
2766 lr_error (ldfile, _("\
2767 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
2768 lr_ignore_rest (ldfile, 0);
2770 else
2771 lr_ignore_rest (ldfile, 1);
2773 return;
2776 if (! ignore_content)
2778 /* Get the locale definition. */
2779 copy_locale = load_locale (LC_COLLATE, now->val.str.startmb,
2780 repertoire_name, charmap, NULL);
2781 if ((copy_locale->avail & COLLATE_LOCALE) == 0)
2783 /* Not yet loaded. So do it now. */
2784 if (locfile_read (copy_locale, charmap) != 0)
2785 goto skip_category;
2788 if (copy_locale->categories[LC_COLLATE].collate == NULL)
2789 return;
2792 lr_ignore_rest (ldfile, 1);
2794 now = lr_token (ldfile, charmap, result, NULL, verbose);
2795 nowtok = now->tok;
2798 /* Prepare the data structures. */
2799 collate_startup (ldfile, result, copy_locale, ignore_content);
2800 collate = result->categories[LC_COLLATE].collate;
2802 while (1)
2804 char ucs4buf[10];
2805 char *symstr;
2806 size_t symlen;
2808 /* Of course we don't proceed beyond the end of file. */
2809 if (nowtok == tok_eof)
2810 break;
2812 /* Ingore empty lines. */
2813 if (nowtok == tok_eol)
2815 now = lr_token (ldfile, charmap, result, NULL, verbose);
2816 nowtok = now->tok;
2817 continue;
2820 switch (nowtok)
2822 case tok_copy:
2823 /* Allow copying other locales. */
2824 now = lr_token (ldfile, charmap, result, NULL, verbose);
2825 if (now->tok != tok_string)
2826 goto err_label;
2828 if (! ignore_content)
2829 load_locale (LC_COLLATE, now->val.str.startmb, repertoire_name,
2830 charmap, result);
2832 lr_ignore_rest (ldfile, 1);
2833 break;
2835 case tok_coll_weight_max:
2836 /* Ignore the rest of the line if we don't need the input of
2837 this line. */
2838 if (ignore_content)
2840 lr_ignore_rest (ldfile, 0);
2841 break;
2844 if (state != 0)
2845 goto err_label;
2847 arg = lr_token (ldfile, charmap, result, NULL, verbose);
2848 if (arg->tok != tok_number)
2849 goto err_label;
2850 if (collate->col_weight_max != -1)
2851 lr_error (ldfile, _("%s: duplicate definition of `%s'"),
2852 "LC_COLLATE", "col_weight_max");
2853 else
2854 collate->col_weight_max = arg->val.num;
2855 lr_ignore_rest (ldfile, 1);
2856 break;
2858 case tok_section_symbol:
2859 /* Ignore the rest of the line if we don't need the input of
2860 this line. */
2861 if (ignore_content)
2863 lr_ignore_rest (ldfile, 0);
2864 break;
2867 if (state != 0)
2868 goto err_label;
2870 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2871 if (arg->tok != tok_bsymbol)
2872 goto err_label;
2873 else if (!ignore_content)
2875 /* Check whether this section is already known. */
2876 struct section_list *known = collate->sections;
2877 while (known != NULL)
2879 if (strcmp (known->name, arg->val.str.startmb) == 0)
2880 break;
2881 known = known->next;
2884 if (known != NULL)
2886 lr_error (ldfile,
2887 _("%s: duplicate declaration of section `%s'"),
2888 "LC_COLLATE", arg->val.str.startmb);
2889 free (arg->val.str.startmb);
2891 else
2892 collate->sections = make_seclist_elem (collate,
2893 arg->val.str.startmb,
2894 collate->sections);
2896 lr_ignore_rest (ldfile, known == NULL);
2898 else
2900 free (arg->val.str.startmb);
2901 lr_ignore_rest (ldfile, 0);
2903 break;
2905 case tok_collating_element:
2906 /* Ignore the rest of the line if we don't need the input of
2907 this line. */
2908 if (ignore_content)
2910 lr_ignore_rest (ldfile, 0);
2911 break;
2914 if (state != 0 && state != 2)
2915 goto err_label;
2917 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2918 if (arg->tok != tok_bsymbol)
2919 goto err_label;
2920 else
2922 const char *symbol = arg->val.str.startmb;
2923 size_t symbol_len = arg->val.str.lenmb;
2925 /* Next the `from' keyword. */
2926 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2927 if (arg->tok != tok_from)
2929 free ((char *) symbol);
2930 goto err_label;
2933 ldfile->return_widestr = 1;
2934 ldfile->translate_strings = 1;
2936 /* Finally the string with the replacement. */
2937 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2939 ldfile->return_widestr = 0;
2940 ldfile->translate_strings = 0;
2942 if (arg->tok != tok_string)
2943 goto err_label;
2945 if (!ignore_content && symbol != NULL)
2947 /* The name is already defined. */
2948 if (check_duplicate (ldfile, collate, charmap,
2949 repertoire, symbol, symbol_len))
2950 goto col_elem_free;
2952 if (arg->val.str.startmb != NULL)
2953 insert_entry (&collate->elem_table, symbol, symbol_len,
2954 new_element (collate,
2955 arg->val.str.startmb,
2956 arg->val.str.lenmb - 1,
2957 arg->val.str.startwc,
2958 symbol, symbol_len, 0));
2960 else
2962 col_elem_free:
2963 free ((char *) symbol);
2964 free (arg->val.str.startmb);
2965 free (arg->val.str.startwc);
2967 lr_ignore_rest (ldfile, 1);
2969 break;
2971 case tok_collating_symbol:
2972 /* Ignore the rest of the line if we don't need the input of
2973 this line. */
2974 if (ignore_content)
2976 lr_ignore_rest (ldfile, 0);
2977 break;
2980 if (state != 0 && state != 2)
2981 goto err_label;
2983 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2984 if (arg->tok != tok_bsymbol)
2985 goto err_label;
2986 else
2988 char *symbol = arg->val.str.startmb;
2989 size_t symbol_len = arg->val.str.lenmb;
2990 char *endsymbol = NULL;
2991 size_t endsymbol_len = 0;
2992 enum token_t ellipsis = tok_none;
2994 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2995 if (arg->tok == tok_ellipsis2 || arg->tok == tok_ellipsis4)
2997 ellipsis = arg->tok;
2999 arg = lr_token (ldfile, charmap, result, repertoire,
3000 verbose);
3001 if (arg->tok != tok_bsymbol)
3003 free (symbol);
3004 goto err_label;
3007 endsymbol = arg->val.str.startmb;
3008 endsymbol_len = arg->val.str.lenmb;
3010 lr_ignore_rest (ldfile, 1);
3012 else if (arg->tok != tok_eol)
3014 free (symbol);
3015 goto err_label;
3018 if (!ignore_content)
3020 if (symbol == NULL
3021 || (ellipsis != tok_none && endsymbol == NULL))
3023 lr_error (ldfile, _("\
3024 %s: unknown character in collating symbol name"),
3025 "LC_COLLATE");
3026 goto col_sym_free;
3028 else if (ellipsis == tok_none)
3030 /* A single symbol, no ellipsis. */
3031 if (check_duplicate (ldfile, collate, charmap,
3032 repertoire, symbol, symbol_len))
3033 /* The name is already defined. */
3034 goto col_sym_free;
3036 insert_entry (&collate->sym_table, symbol, symbol_len,
3037 new_symbol (collate, symbol, symbol_len));
3039 else if (symbol_len != endsymbol_len)
3041 col_sym_inv_range:
3042 lr_error (ldfile,
3043 _("invalid names for character range"));
3044 goto col_sym_free;
3046 else
3048 /* Oh my, we have to handle an ellipsis. First, as
3049 usual, determine the common prefix and then
3050 convert the rest into a range. */
3051 size_t prefixlen;
3052 unsigned long int from;
3053 unsigned long int to;
3054 char *endp;
3056 for (prefixlen = 0; prefixlen < symbol_len; ++prefixlen)
3057 if (symbol[prefixlen] != endsymbol[prefixlen])
3058 break;
3060 /* Convert the rest into numbers. */
3061 symbol[symbol_len] = '\0';
3062 from = strtoul (&symbol[prefixlen], &endp,
3063 ellipsis == tok_ellipsis2 ? 16 : 10);
3064 if (*endp != '\0')
3065 goto col_sym_inv_range;
3067 endsymbol[symbol_len] = '\0';
3068 to = strtoul (&endsymbol[prefixlen], &endp,
3069 ellipsis == tok_ellipsis2 ? 16 : 10);
3070 if (*endp != '\0')
3071 goto col_sym_inv_range;
3073 if (from > to)
3074 goto col_sym_inv_range;
3076 /* Now loop over all entries. */
3077 while (from <= to)
3079 char *symbuf;
3081 symbuf = (char *) obstack_alloc (&collate->mempool,
3082 symbol_len + 1);
3084 /* Create the name. */
3085 sprintf (symbuf,
3086 ellipsis == tok_ellipsis2
3087 ? "%.*s%.*lX" : "%.*s%.*lu",
3088 (int) prefixlen, symbol,
3089 (int) (symbol_len - prefixlen), from);
3091 if (check_duplicate (ldfile, collate, charmap,
3092 repertoire, symbuf, symbol_len))
3093 /* The name is already defined. */
3094 goto col_sym_free;
3096 insert_entry (&collate->sym_table, symbuf,
3097 symbol_len,
3098 new_symbol (collate, symbuf,
3099 symbol_len));
3101 /* Increment the counter. */
3102 ++from;
3105 goto col_sym_free;
3108 else
3110 col_sym_free:
3111 free (symbol);
3112 free (endsymbol);
3115 break;
3117 case tok_symbol_equivalence:
3118 /* Ignore the rest of the line if we don't need the input of
3119 this line. */
3120 if (ignore_content)
3122 lr_ignore_rest (ldfile, 0);
3123 break;
3126 if (state != 0)
3127 goto err_label;
3129 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3130 if (arg->tok != tok_bsymbol)
3131 goto err_label;
3132 else
3134 const char *newname = arg->val.str.startmb;
3135 size_t newname_len = arg->val.str.lenmb;
3136 const char *symname;
3137 size_t symname_len;
3138 void *symval; /* Actually struct symbol_t* */
3140 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3141 if (arg->tok != tok_bsymbol)
3143 free ((char *) newname);
3144 goto err_label;
3147 symname = arg->val.str.startmb;
3148 symname_len = arg->val.str.lenmb;
3150 if (newname == NULL)
3152 lr_error (ldfile, _("\
3153 %s: unknown character in equivalent definition name"),
3154 "LC_COLLATE");
3156 sym_equiv_free:
3157 free ((char *) newname);
3158 free ((char *) symname);
3159 break;
3161 if (symname == NULL)
3163 lr_error (ldfile, _("\
3164 %s: unknown character in equivalent definition value"),
3165 "LC_COLLATE");
3166 goto sym_equiv_free;
3169 /* See whether the symbol name is already defined. */
3170 if (find_entry (&collate->sym_table, symname, symname_len,
3171 &symval) != 0)
3173 lr_error (ldfile, _("\
3174 %s: unknown symbol `%s' in equivalent definition"),
3175 "LC_COLLATE", symname);
3176 goto sym_equiv_free;
3179 if (insert_entry (&collate->sym_table,
3180 newname, newname_len, symval) < 0)
3182 lr_error (ldfile, _("\
3183 error while adding equivalent collating symbol"));
3184 goto sym_equiv_free;
3187 free ((char *) symname);
3189 lr_ignore_rest (ldfile, 1);
3190 break;
3192 case tok_script:
3193 /* Ignore the rest of the line if we don't need the input of
3194 this line. */
3195 if (ignore_content)
3197 lr_ignore_rest (ldfile, 0);
3198 break;
3201 /* We get told about the scripts we know. */
3202 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3203 if (arg->tok != tok_bsymbol)
3204 goto err_label;
3205 else
3207 struct section_list *runp = collate->known_sections;
3208 char *name;
3210 while (runp != NULL)
3211 if (strncmp (runp->name, arg->val.str.startmb,
3212 arg->val.str.lenmb) == 0
3213 && runp->name[arg->val.str.lenmb] == '\0')
3214 break;
3215 else
3216 runp = runp->def_next;
3218 if (runp != NULL)
3220 lr_error (ldfile, _("duplicate definition of script `%s'"),
3221 runp->name);
3222 lr_ignore_rest (ldfile, 0);
3223 break;
3226 runp = (struct section_list *) xcalloc (1, sizeof (*runp));
3227 name = (char *) xmalloc (arg->val.str.lenmb + 1);
3228 memcpy (name, arg->val.str.startmb, arg->val.str.lenmb);
3229 name[arg->val.str.lenmb] = '\0';
3230 runp->name = name;
3232 runp->def_next = collate->known_sections;
3233 collate->known_sections = runp;
3235 lr_ignore_rest (ldfile, 1);
3236 break;
3238 case tok_order_start:
3239 /* Ignore the rest of the line if we don't need the input of
3240 this line. */
3241 if (ignore_content)
3243 lr_ignore_rest (ldfile, 0);
3244 break;
3247 if (state != 0 && state != 1 && state != 2)
3248 goto err_label;
3249 state = 1;
3251 /* The 14652 draft does not specify whether all `order_start' lines
3252 must contain the same number of sort-rules, but 14651 does. So
3253 we require this here as well. */
3254 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3255 if (arg->tok == tok_bsymbol)
3257 /* This better should be a section name. */
3258 struct section_list *sp = collate->known_sections;
3259 while (sp != NULL
3260 && (sp->name == NULL
3261 || strncmp (sp->name, arg->val.str.startmb,
3262 arg->val.str.lenmb) != 0
3263 || sp->name[arg->val.str.lenmb] != '\0'))
3264 sp = sp->def_next;
3266 if (sp == NULL)
3268 lr_error (ldfile, _("\
3269 %s: unknown section name `%.*s'"),
3270 "LC_COLLATE", (int) arg->val.str.lenmb,
3271 arg->val.str.startmb);
3272 /* We use the error section. */
3273 collate->current_section = &collate->error_section;
3275 if (collate->error_section.first == NULL)
3277 /* Insert &collate->error_section at the end of
3278 the collate->sections list. */
3279 if (collate->sections == NULL)
3280 collate->sections = &collate->error_section;
3281 else
3283 sp = collate->sections;
3284 while (sp->next != NULL)
3285 sp = sp->next;
3287 sp->next = &collate->error_section;
3289 collate->error_section.next = NULL;
3292 else
3294 /* One should not be allowed to open the same
3295 section twice. */
3296 if (sp->first != NULL)
3297 lr_error (ldfile, _("\
3298 %s: multiple order definitions for section `%s'"),
3299 "LC_COLLATE", sp->name);
3300 else
3302 /* Insert sp in the collate->sections list,
3303 right after collate->current_section. */
3304 if (collate->current_section != NULL)
3306 sp->next = collate->current_section->next;
3307 collate->current_section->next = sp;
3309 else if (collate->sections == NULL)
3310 /* This is the first section to be defined. */
3311 collate->sections = sp;
3313 collate->current_section = sp;
3316 /* Next should come the end of the line or a semicolon. */
3317 arg = lr_token (ldfile, charmap, result, repertoire,
3318 verbose);
3319 if (arg->tok == tok_eol)
3321 uint32_t cnt;
3323 /* This means we have exactly one rule: `forward'. */
3324 if (nrules > 1)
3325 lr_error (ldfile, _("\
3326 %s: invalid number of sorting rules"),
3327 "LC_COLLATE");
3328 else
3329 nrules = 1;
3330 sp->rules = obstack_alloc (&collate->mempool,
3331 (sizeof (enum coll_sort_rule)
3332 * nrules));
3333 for (cnt = 0; cnt < nrules; ++cnt)
3334 sp->rules[cnt] = sort_forward;
3336 /* Next line. */
3337 break;
3340 /* Get the next token. */
3341 arg = lr_token (ldfile, charmap, result, repertoire,
3342 verbose);
3345 else
3347 /* There is no section symbol. Therefore we use the unnamed
3348 section. */
3349 collate->current_section = &collate->unnamed_section;
3351 if (collate->unnamed_section_defined)
3352 lr_error (ldfile, _("\
3353 %s: multiple order definitions for unnamed section"),
3354 "LC_COLLATE");
3355 else
3357 /* Insert &collate->unnamed_section at the beginning of
3358 the collate->sections list. */
3359 collate->unnamed_section.next = collate->sections;
3360 collate->sections = &collate->unnamed_section;
3361 collate->unnamed_section_defined = true;
3365 /* Now read the direction names. */
3366 read_directions (ldfile, arg, charmap, repertoire, result);
3368 /* From now we need the strings untranslated. */
3369 ldfile->translate_strings = 0;
3370 break;
3372 case tok_order_end:
3373 /* Ignore the rest of the line if we don't need the input of
3374 this line. */
3375 if (ignore_content)
3377 lr_ignore_rest (ldfile, 0);
3378 break;
3381 if (state != 1)
3382 goto err_label;
3384 /* Handle ellipsis at end of list. */
3385 if (was_ellipsis != tok_none)
3387 handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3388 repertoire, result);
3389 was_ellipsis = tok_none;
3392 state = 2;
3393 lr_ignore_rest (ldfile, 1);
3394 break;
3396 case tok_reorder_after:
3397 /* Ignore the rest of the line if we don't need the input of
3398 this line. */
3399 if (ignore_content)
3401 lr_ignore_rest (ldfile, 0);
3402 break;
3405 if (state == 1)
3407 lr_error (ldfile, _("%s: missing `order_end' keyword"),
3408 "LC_COLLATE");
3409 state = 2;
3411 /* Handle ellipsis at end of list. */
3412 if (was_ellipsis != tok_none)
3414 handle_ellipsis (ldfile, arg->val.str.startmb,
3415 arg->val.str.lenmb, was_ellipsis, charmap,
3416 repertoire, result);
3417 was_ellipsis = tok_none;
3420 else if (state == 0 && copy_locale == NULL)
3421 goto err_label;
3422 else if (state != 0 && state != 2 && state != 3)
3423 goto err_label;
3424 state = 3;
3426 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3427 if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
3429 /* Find this symbol in the sequence table. */
3430 char ucsbuf[10];
3431 char *startmb;
3432 size_t lenmb;
3433 struct element_t *insp;
3434 int no_error = 1;
3435 void *ptr;
3437 if (arg->tok == tok_bsymbol)
3439 startmb = arg->val.str.startmb;
3440 lenmb = arg->val.str.lenmb;
3442 else
3444 sprintf (ucsbuf, "U%08X", arg->val.ucs4);
3445 startmb = ucsbuf;
3446 lenmb = 9;
3449 if (find_entry (&collate->seq_table, startmb, lenmb, &ptr) == 0)
3450 /* Yes, the symbol exists. Simply point the cursor
3451 to it. */
3452 collate->cursor = (struct element_t *) ptr;
3453 else
3455 struct symbol_t *symbp;
3456 void *ptr;
3458 if (find_entry (&collate->sym_table, startmb, lenmb,
3459 &ptr) == 0)
3461 symbp = ptr;
3463 if (symbp->order->last != NULL
3464 || symbp->order->next != NULL)
3465 collate->cursor = symbp->order;
3466 else
3468 /* This is a collating symbol but its position
3469 is not yet defined. */
3470 lr_error (ldfile, _("\
3471 %s: order for collating symbol %.*s not yet defined"),
3472 "LC_COLLATE", (int) lenmb, startmb);
3473 collate->cursor = NULL;
3474 no_error = 0;
3477 else if (find_entry (&collate->elem_table, startmb, lenmb,
3478 &ptr) == 0)
3480 insp = (struct element_t *) ptr;
3482 if (insp->last != NULL || insp->next != NULL)
3483 collate->cursor = insp;
3484 else
3486 /* This is a collating element but its position
3487 is not yet defined. */
3488 lr_error (ldfile, _("\
3489 %s: order for collating element %.*s not yet defined"),
3490 "LC_COLLATE", (int) lenmb, startmb);
3491 collate->cursor = NULL;
3492 no_error = 0;
3495 else
3497 /* This is bad. The symbol after which we have to
3498 insert does not exist. */
3499 lr_error (ldfile, _("\
3500 %s: cannot reorder after %.*s: symbol not known"),
3501 "LC_COLLATE", (int) lenmb, startmb);
3502 collate->cursor = NULL;
3503 no_error = 0;
3507 lr_ignore_rest (ldfile, no_error);
3509 else
3510 /* This must not happen. */
3511 goto err_label;
3512 break;
3514 case tok_reorder_end:
3515 /* Ignore the rest of the line if we don't need the input of
3516 this line. */
3517 if (ignore_content)
3518 break;
3520 if (state != 3)
3521 goto err_label;
3522 state = 4;
3523 lr_ignore_rest (ldfile, 1);
3524 break;
3526 case tok_reorder_sections_after:
3527 /* Ignore the rest of the line if we don't need the input of
3528 this line. */
3529 if (ignore_content)
3531 lr_ignore_rest (ldfile, 0);
3532 break;
3535 if (state == 1)
3537 lr_error (ldfile, _("%s: missing `order_end' keyword"),
3538 "LC_COLLATE");
3539 state = 2;
3541 /* Handle ellipsis at end of list. */
3542 if (was_ellipsis != tok_none)
3544 handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3545 repertoire, result);
3546 was_ellipsis = tok_none;
3549 else if (state == 3)
3551 WITH_CUR_LOCALE (error (0, 0, _("\
3552 %s: missing `reorder-end' keyword"), "LC_COLLATE"));
3553 state = 4;
3555 else if (state != 2 && state != 4)
3556 goto err_label;
3557 state = 5;
3559 /* Get the name of the sections we are adding after. */
3560 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3561 if (arg->tok == tok_bsymbol)
3563 /* Now find a section with this name. */
3564 struct section_list *runp = collate->sections;
3566 while (runp != NULL)
3568 if (runp->name != NULL
3569 && strlen (runp->name) == arg->val.str.lenmb
3570 && memcmp (runp->name, arg->val.str.startmb,
3571 arg->val.str.lenmb) == 0)
3572 break;
3574 runp = runp->next;
3577 if (runp != NULL)
3578 collate->current_section = runp;
3579 else
3581 /* This is bad. The section after which we have to
3582 reorder does not exist. Therefore we cannot
3583 process the whole rest of this reorder
3584 specification. */
3585 lr_error (ldfile, _("%s: section `%.*s' not known"),
3586 "LC_COLLATE", (int) arg->val.str.lenmb,
3587 arg->val.str.startmb);
3591 lr_ignore_rest (ldfile, 0);
3593 now = lr_token (ldfile, charmap, result, NULL, verbose);
3595 while (now->tok == tok_reorder_sections_after
3596 || now->tok == tok_reorder_sections_end
3597 || now->tok == tok_end);
3599 /* Process the token we just saw. */
3600 nowtok = now->tok;
3601 continue;
3604 else
3605 /* This must not happen. */
3606 goto err_label;
3607 break;
3609 case tok_reorder_sections_end:
3610 /* Ignore the rest of the line if we don't need the input of
3611 this line. */
3612 if (ignore_content)
3613 break;
3615 if (state != 5)
3616 goto err_label;
3617 state = 6;
3618 lr_ignore_rest (ldfile, 1);
3619 break;
3621 case tok_bsymbol:
3622 case tok_ucs4:
3623 /* Ignore the rest of the line if we don't need the input of
3624 this line. */
3625 if (ignore_content)
3627 lr_ignore_rest (ldfile, 0);
3628 break;
3631 if (state != 0 && state != 1 && state != 3 && state != 5)
3632 goto err_label;
3634 if ((state == 0 || state == 5) && nowtok == tok_ucs4)
3635 goto err_label;
3637 if (nowtok == tok_ucs4)
3639 snprintf (ucs4buf, sizeof (ucs4buf), "U%08X", now->val.ucs4);
3640 symstr = ucs4buf;
3641 symlen = 9;
3643 else if (arg != NULL)
3645 symstr = arg->val.str.startmb;
3646 symlen = arg->val.str.lenmb;
3648 else
3650 lr_error (ldfile, _("%s: bad symbol <%.*s>"), "LC_COLLATE",
3651 (int) ldfile->token.val.str.lenmb,
3652 ldfile->token.val.str.startmb);
3653 break;
3656 struct element_t *seqp;
3657 if (state == 0)
3659 /* We are outside an `order_start' region. This means
3660 we must only accept definitions of values for
3661 collation symbols since these are purely abstract
3662 values and don't need directions associated. */
3663 void *ptr;
3665 if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == 0)
3667 seqp = ptr;
3669 /* It's already defined. First check whether this
3670 is really a collating symbol. */
3671 if (seqp->is_character)
3672 goto err_label;
3674 goto move_entry;
3676 else
3678 void *result;
3680 if (find_entry (&collate->sym_table, symstr, symlen,
3681 &result) != 0)
3682 /* No collating symbol, it's an error. */
3683 goto err_label;
3685 /* Maybe this is the first time we define a symbol
3686 value and it is before the first actual section. */
3687 if (collate->sections == NULL)
3688 collate->sections = collate->current_section =
3689 &collate->symbol_section;
3692 if (was_ellipsis != tok_none)
3694 handle_ellipsis (ldfile, symstr, symlen, was_ellipsis,
3695 charmap, repertoire, result);
3697 /* Remember that we processed the ellipsis. */
3698 was_ellipsis = tok_none;
3700 /* And don't add the value a second time. */
3701 break;
3704 else if (state == 3)
3706 /* It is possible that we already have this collation sequence.
3707 In this case we move the entry. */
3708 void *sym;
3709 void *ptr;
3711 /* If the symbol after which we have to insert was not found
3712 ignore all entries. */
3713 if (collate->cursor == NULL)
3715 lr_ignore_rest (ldfile, 0);
3716 break;
3719 if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == 0)
3721 seqp = (struct element_t *) ptr;
3722 goto move_entry;
3725 if (find_entry (&collate->sym_table, symstr, symlen, &sym) == 0
3726 && (seqp = ((struct symbol_t *) sym)->order) != NULL)
3727 goto move_entry;
3729 if (find_entry (&collate->elem_table, symstr, symlen, &ptr) == 0
3730 && (seqp = (struct element_t *) ptr,
3731 seqp->last != NULL || seqp->next != NULL
3732 || (collate->start != NULL && seqp == collate->start)))
3734 move_entry:
3735 /* Remove the entry from the old position. */
3736 if (seqp->last == NULL)
3737 collate->start = seqp->next;
3738 else
3739 seqp->last->next = seqp->next;
3740 if (seqp->next != NULL)
3741 seqp->next->last = seqp->last;
3743 /* We also have to check whether this entry is the
3744 first or last of a section. */
3745 if (seqp->section->first == seqp)
3747 if (seqp->section->first == seqp->section->last)
3748 /* This section has no content anymore. */
3749 seqp->section->first = seqp->section->last = NULL;
3750 else
3751 seqp->section->first = seqp->next;
3753 else if (seqp->section->last == seqp)
3754 seqp->section->last = seqp->last;
3756 /* Now insert it in the new place. */
3757 insert_weights (ldfile, seqp, charmap, repertoire, result,
3758 tok_none);
3759 break;
3762 /* Otherwise we just add a new entry. */
3764 else if (state == 5)
3766 /* We are reordering sections. Find the named section. */
3767 struct section_list *runp = collate->sections;
3768 struct section_list *prevp = NULL;
3770 while (runp != NULL)
3772 if (runp->name != NULL
3773 && strlen (runp->name) == symlen
3774 && memcmp (runp->name, symstr, symlen) == 0)
3775 break;
3777 prevp = runp;
3778 runp = runp->next;
3781 if (runp == NULL)
3783 lr_error (ldfile, _("%s: section `%.*s' not known"),
3784 "LC_COLLATE", (int) symlen, symstr);
3785 lr_ignore_rest (ldfile, 0);
3787 else
3789 if (runp != collate->current_section)
3791 /* Remove the named section from the old place and
3792 insert it in the new one. */
3793 prevp->next = runp->next;
3795 runp->next = collate->current_section->next;
3796 collate->current_section->next = runp;
3797 collate->current_section = runp;
3800 /* Process the rest of the line which might change
3801 the collation rules. */
3802 arg = lr_token (ldfile, charmap, result, repertoire,
3803 verbose);
3804 if (arg->tok != tok_eof && arg->tok != tok_eol)
3805 read_directions (ldfile, arg, charmap, repertoire,
3806 result);
3808 break;
3810 else if (was_ellipsis != tok_none)
3812 /* Using the information in the `ellipsis_weight'
3813 element and this and the last value we have to handle
3814 the ellipsis now. */
3815 assert (state == 1);
3817 handle_ellipsis (ldfile, symstr, symlen, was_ellipsis, charmap,
3818 repertoire, result);
3820 /* Remember that we processed the ellipsis. */
3821 was_ellipsis = tok_none;
3823 /* And don't add the value a second time. */
3824 break;
3827 /* Now insert in the new place. */
3828 insert_value (ldfile, symstr, symlen, charmap, repertoire, result);
3829 break;
3831 case tok_undefined:
3832 /* Ignore the rest of the line if we don't need the input of
3833 this line. */
3834 if (ignore_content)
3836 lr_ignore_rest (ldfile, 0);
3837 break;
3840 if (state != 1)
3841 goto err_label;
3843 if (was_ellipsis != tok_none)
3845 lr_error (ldfile,
3846 _("%s: cannot have `%s' as end of ellipsis range"),
3847 "LC_COLLATE", "UNDEFINED");
3849 unlink_element (collate);
3850 was_ellipsis = tok_none;
3853 /* See whether UNDEFINED already appeared somewhere. */
3854 if (collate->undefined.next != NULL
3855 || &collate->undefined == collate->cursor)
3857 lr_error (ldfile,
3858 _("%s: order for `%.*s' already defined at %s:%Zu"),
3859 "LC_COLLATE", 9, "UNDEFINED",
3860 collate->undefined.file,
3861 collate->undefined.line);
3862 lr_ignore_rest (ldfile, 0);
3864 else
3865 /* Parse the weights. */
3866 insert_weights (ldfile, &collate->undefined, charmap,
3867 repertoire, result, tok_none);
3868 break;
3870 case tok_ellipsis2: /* symbolic hexadecimal ellipsis */
3871 case tok_ellipsis3: /* absolute ellipsis */
3872 case tok_ellipsis4: /* symbolic decimal ellipsis */
3873 /* This is the symbolic (decimal or hexadecimal) or absolute
3874 ellipsis. */
3875 if (was_ellipsis != tok_none)
3876 goto err_label;
3878 if (state != 0 && state != 1 && state != 3)
3879 goto err_label;
3881 was_ellipsis = nowtok;
3883 insert_weights (ldfile, &collate->ellipsis_weight, charmap,
3884 repertoire, result, nowtok);
3885 break;
3887 case tok_end:
3888 seen_end:
3889 /* Next we assume `LC_COLLATE'. */
3890 if (!ignore_content)
3892 if (state == 0 && copy_locale == NULL)
3893 /* We must either see a copy statement or have
3894 ordering values. */
3895 lr_error (ldfile,
3896 _("%s: empty category description not allowed"),
3897 "LC_COLLATE");
3898 else if (state == 1)
3900 lr_error (ldfile, _("%s: missing `order_end' keyword"),
3901 "LC_COLLATE");
3903 /* Handle ellipsis at end of list. */
3904 if (was_ellipsis != tok_none)
3906 handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3907 repertoire, result);
3908 was_ellipsis = tok_none;
3911 else if (state == 3)
3912 WITH_CUR_LOCALE (error (0, 0, _("\
3913 %s: missing `reorder-end' keyword"), "LC_COLLATE"));
3914 else if (state == 5)
3915 WITH_CUR_LOCALE (error (0, 0, _("\
3916 %s: missing `reorder-sections-end' keyword"), "LC_COLLATE"));
3918 arg = lr_token (ldfile, charmap, result, NULL, verbose);
3919 if (arg->tok == tok_eof)
3920 break;
3921 if (arg->tok == tok_eol)
3922 lr_error (ldfile, _("%s: incomplete `END' line"), "LC_COLLATE");
3923 else if (arg->tok != tok_lc_collate)
3924 lr_error (ldfile, _("\
3925 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
3926 lr_ignore_rest (ldfile, arg->tok == tok_lc_collate);
3927 return;
3929 case tok_define:
3930 if (ignore_content)
3932 lr_ignore_rest (ldfile, 0);
3933 break;
3936 arg = lr_token (ldfile, charmap, result, NULL, verbose);
3937 if (arg->tok != tok_ident)
3938 goto err_label;
3940 /* Simply add the new symbol. */
3941 struct name_list *newsym = xmalloc (sizeof (*newsym)
3942 + arg->val.str.lenmb + 1);
3943 memcpy (newsym->str, arg->val.str.startmb, arg->val.str.lenmb);
3944 newsym->str[arg->val.str.lenmb] = '\0';
3945 newsym->next = defined;
3946 defined = newsym;
3948 lr_ignore_rest (ldfile, 1);
3949 break;
3951 case tok_undef:
3952 if (ignore_content)
3954 lr_ignore_rest (ldfile, 0);
3955 break;
3958 arg = lr_token (ldfile, charmap, result, NULL, verbose);
3959 if (arg->tok != tok_ident)
3960 goto err_label;
3962 /* Remove _all_ occurrences of the symbol from the list. */
3963 struct name_list *prevdef = NULL;
3964 struct name_list *curdef = defined;
3965 while (curdef != NULL)
3966 if (strncmp (arg->val.str.startmb, curdef->str,
3967 arg->val.str.lenmb) == 0
3968 && curdef->str[arg->val.str.lenmb] == '\0')
3970 if (prevdef == NULL)
3971 defined = curdef->next;
3972 else
3973 prevdef->next = curdef->next;
3975 struct name_list *olddef = curdef;
3976 curdef = curdef->next;
3978 free (olddef);
3980 else
3982 prevdef = curdef;
3983 curdef = curdef->next;
3986 lr_ignore_rest (ldfile, 1);
3987 break;
3989 case tok_ifdef:
3990 case tok_ifndef:
3991 if (ignore_content)
3993 lr_ignore_rest (ldfile, 0);
3994 break;
3997 found_ifdef:
3998 arg = lr_token (ldfile, charmap, result, NULL, verbose);
3999 if (arg->tok != tok_ident)
4000 goto err_label;
4001 lr_ignore_rest (ldfile, 1);
4003 if (collate->else_action == else_none)
4005 curdef = defined;
4006 while (curdef != NULL)
4007 if (strncmp (arg->val.str.startmb, curdef->str,
4008 arg->val.str.lenmb) == 0
4009 && curdef->str[arg->val.str.lenmb] == '\0')
4010 break;
4011 else
4012 curdef = curdef->next;
4014 if ((nowtok == tok_ifdef && curdef != NULL)
4015 || (nowtok == tok_ifndef && curdef == NULL))
4017 /* We have to use the if-branch. */
4018 collate->else_action = else_ignore;
4020 else
4022 /* We have to use the else-branch, if there is one. */
4023 nowtok = skip_to (ldfile, collate, charmap, 0);
4024 if (nowtok == tok_else)
4025 collate->else_action = else_seen;
4026 else if (nowtok == tok_elifdef)
4028 nowtok = tok_ifdef;
4029 goto found_ifdef;
4031 else if (nowtok == tok_elifndef)
4033 nowtok = tok_ifndef;
4034 goto found_ifdef;
4036 else if (nowtok == tok_eof)
4037 goto seen_eof;
4038 else if (nowtok == tok_end)
4039 goto seen_end;
4042 else
4044 /* XXX Should it really become necessary to support nested
4045 preprocessor handling we will push the state here. */
4046 lr_error (ldfile, _("%s: nested conditionals not supported"),
4047 "LC_COLLATE");
4048 nowtok = skip_to (ldfile, collate, charmap, 1);
4049 if (nowtok == tok_eof)
4050 goto seen_eof;
4051 else if (nowtok == tok_end)
4052 goto seen_end;
4054 break;
4056 case tok_elifdef:
4057 case tok_elifndef:
4058 case tok_else:
4059 if (ignore_content)
4061 lr_ignore_rest (ldfile, 0);
4062 break;
4065 lr_ignore_rest (ldfile, 1);
4067 if (collate->else_action == else_ignore)
4069 /* Ignore everything until the endif. */
4070 nowtok = skip_to (ldfile, collate, charmap, 1);
4071 if (nowtok == tok_eof)
4072 goto seen_eof;
4073 else if (nowtok == tok_end)
4074 goto seen_end;
4076 else
4078 assert (collate->else_action == else_none);
4079 lr_error (ldfile, _("\
4080 %s: '%s' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE",
4081 nowtok == tok_else ? "else"
4082 : nowtok == tok_elifdef ? "elifdef" : "elifndef");
4084 break;
4086 case tok_endif:
4087 if (ignore_content)
4089 lr_ignore_rest (ldfile, 0);
4090 break;
4093 lr_ignore_rest (ldfile, 1);
4095 if (collate->else_action != else_ignore
4096 && collate->else_action != else_seen)
4097 lr_error (ldfile, _("\
4098 %s: 'endif' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE");
4100 /* XXX If we support nested preprocessor directives we pop
4101 the state here. */
4102 collate->else_action = else_none;
4103 break;
4105 default:
4106 err_label:
4107 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
4110 /* Prepare for the next round. */
4111 now = lr_token (ldfile, charmap, result, NULL, verbose);
4112 nowtok = now->tok;
4115 seen_eof:
4116 /* When we come here we reached the end of the file. */
4117 lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");