2.9
[glibc/nacl-glibc.git] / locale / programs / ld-collate.c
blobbf50e77aab2691a6765d558961f8fe12e39478d6
1 /* Copyright (C) 1995-2003, 2005-2007, 2008 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
19 #ifdef HAVE_CONFIG_H
20 # include <config.h>
21 #endif
23 #include <errno.h>
24 #include <error.h>
25 #include <stdlib.h>
26 #include <wchar.h>
27 #include <sys/param.h>
29 #include "localedef.h"
30 #include "charmap.h"
31 #include "localeinfo.h"
32 #include "linereader.h"
33 #include "locfile.h"
34 #include "elem-hash.h"
36 /* Uncomment the following line in the production version. */
37 /* #define NDEBUG 1 */
38 #include <assert.h>
40 #define obstack_chunk_alloc malloc
41 #define obstack_chunk_free free
43 static inline void
44 __attribute ((always_inline))
45 obstack_int32_grow (struct obstack *obstack, int32_t data)
47 if (sizeof (int32_t) == sizeof (int))
48 obstack_int_grow (obstack, data);
49 else
50 obstack_grow (obstack, &data, sizeof (int32_t));
53 static inline void
54 __attribute ((always_inline))
55 obstack_int32_grow_fast (struct obstack *obstack, int32_t data)
57 if (sizeof (int32_t) == sizeof (int))
58 obstack_int_grow_fast (obstack, data);
59 else
60 obstack_grow (obstack, &data, sizeof (int32_t));
63 /* Forward declaration. */
64 struct element_t;
66 /* Data type for list of strings. */
67 struct section_list
69 /* Successor in the known_sections list. */
70 struct section_list *def_next;
71 /* Successor in the sections list. */
72 struct section_list *next;
73 /* Name of the section. */
74 const char *name;
75 /* First element of this section. */
76 struct element_t *first;
77 /* Last element of this section. */
78 struct element_t *last;
79 /* These are the rules for this section. */
80 enum coll_sort_rule *rules;
81 /* Index of the rule set in the appropriate section of the output file. */
82 int ruleidx;
85 struct element_t;
87 struct element_list_t
89 /* Number of elements. */
90 int cnt;
92 struct element_t **w;
95 /* Data type for collating element. */
96 struct element_t
98 const char *name;
100 const char *mbs;
101 size_t nmbs;
102 const uint32_t *wcs;
103 size_t nwcs;
104 int *mborder;
105 int wcorder;
107 /* The following is a bit mask which bits are set if this element is
108 used in the appropriate level. Interesting for the singlebyte
109 weight computation.
111 XXX The type here restricts the number of levels to 32. It could
112 be changed if necessary but I doubt this is necessary. */
113 unsigned int used_in_level;
115 struct element_list_t *weights;
117 /* Nonzero if this is a real character definition. */
118 int is_character;
120 /* Order of the character in the sequence. This information will
121 be used in range expressions. */
122 int mbseqorder;
123 int wcseqorder;
125 /* Where does the definition come from. */
126 const char *file;
127 size_t line;
129 /* Which section does this belong to. */
130 struct section_list *section;
132 /* Predecessor and successor in the order list. */
133 struct element_t *last;
134 struct element_t *next;
136 /* Next element in multibyte output list. */
137 struct element_t *mbnext;
138 struct element_t *mblast;
140 /* Next element in wide character output list. */
141 struct element_t *wcnext;
142 struct element_t *wclast;
145 /* Special element value. */
146 #define ELEMENT_ELLIPSIS2 ((struct element_t *) 1)
147 #define ELEMENT_ELLIPSIS3 ((struct element_t *) 2)
148 #define ELEMENT_ELLIPSIS4 ((struct element_t *) 3)
150 /* Data type for collating symbol. */
151 struct symbol_t
153 const char *name;
155 /* Point to place in the order list. */
156 struct element_t *order;
158 /* Where does the definition come from. */
159 const char *file;
160 size_t line;
163 /* Sparse table of struct element_t *. */
164 #define TABLE wchead_table
165 #define ELEMENT struct element_t *
166 #define DEFAULT NULL
167 #define ITERATE
168 #define NO_FINALIZE
169 #include "3level.h"
171 /* Sparse table of int32_t. */
172 #define TABLE collidx_table
173 #define ELEMENT int32_t
174 #define DEFAULT 0
175 #include "3level.h"
177 /* Sparse table of uint32_t. */
178 #define TABLE collseq_table
179 #define ELEMENT uint32_t
180 #define DEFAULT ~((uint32_t) 0)
181 #include "3level.h"
184 /* Simple name list for the preprocessor. */
185 struct name_list
187 struct name_list *next;
188 char str[0];
192 /* The real definition of the struct for the LC_COLLATE locale. */
193 struct locale_collate_t
195 int col_weight_max;
196 int cur_weight_max;
198 /* List of known scripts. */
199 struct section_list *known_sections;
200 /* List of used sections. */
201 struct section_list *sections;
202 /* Current section using definition. */
203 struct section_list *current_section;
204 /* There always can be an unnamed section. */
205 struct section_list unnamed_section;
206 /* To make handling of errors easier we have another section. */
207 struct section_list error_section;
208 /* Sometimes we are defining the values for collating symbols before
209 the first actual section. */
210 struct section_list symbol_section;
212 /* Start of the order list. */
213 struct element_t *start;
215 /* The undefined element. */
216 struct element_t undefined;
218 /* This is the cursor for `reorder_after' insertions. */
219 struct element_t *cursor;
221 /* This value is used when handling ellipsis. */
222 struct element_t ellipsis_weight;
224 /* Known collating elements. */
225 hash_table elem_table;
227 /* Known collating symbols. */
228 hash_table sym_table;
230 /* Known collation sequences. */
231 hash_table seq_table;
233 struct obstack mempool;
235 /* The LC_COLLATE category is a bit special as it is sometimes possible
236 that the definitions from more than one input file contains information.
237 Therefore we keep all relevant input in a list. */
238 struct locale_collate_t *next;
240 /* Arrays with heads of the list for each of the leading bytes in
241 the multibyte sequences. */
242 struct element_t *mbheads[256];
244 /* Arrays with heads of the list for each of the leading bytes in
245 the multibyte sequences. */
246 struct wchead_table wcheads;
248 /* The arrays with the collation sequence order. */
249 unsigned char mbseqorder[256];
250 struct collseq_table wcseqorder;
252 /* State of the preprocessor. */
253 enum
255 else_none = 0,
256 else_ignore,
257 else_seen
259 else_action;
263 /* We have a few global variables which are used for reading all
264 LC_COLLATE category descriptions in all files. */
265 static uint32_t nrules;
267 /* List of defined preprocessor symbols. */
268 static struct name_list *defined;
271 /* We need UTF-8 encoding of numbers. */
272 static inline int
273 __attribute ((always_inline))
274 utf8_encode (char *buf, int val)
276 int retval;
278 if (val < 0x80)
280 *buf++ = (char) val;
281 retval = 1;
283 else
285 int step;
287 for (step = 2; step < 6; ++step)
288 if ((val & (~(uint32_t)0 << (5 * step + 1))) == 0)
289 break;
290 retval = step;
292 *buf = (unsigned char) (~0xff >> step);
293 --step;
296 buf[step] = 0x80 | (val & 0x3f);
297 val >>= 6;
299 while (--step > 0);
300 *buf |= val;
303 return retval;
307 static struct section_list *
308 make_seclist_elem (struct locale_collate_t *collate, const char *string,
309 struct section_list *next)
311 struct section_list *newp;
313 newp = (struct section_list *) obstack_alloc (&collate->mempool,
314 sizeof (*newp));
315 newp->next = next;
316 newp->name = string;
317 newp->first = NULL;
318 newp->last = NULL;
320 return newp;
324 static struct element_t *
325 new_element (struct locale_collate_t *collate, const char *mbs, size_t mbslen,
326 const uint32_t *wcs, const char *name, size_t namelen,
327 int is_character)
329 struct element_t *newp;
331 newp = (struct element_t *) obstack_alloc (&collate->mempool,
332 sizeof (*newp));
333 newp->name = name == NULL ? NULL : obstack_copy0 (&collate->mempool,
334 name, namelen);
335 if (mbs != NULL)
337 newp->mbs = obstack_copy0 (&collate->mempool, mbs, mbslen);
338 newp->nmbs = mbslen;
340 else
342 newp->mbs = NULL;
343 newp->nmbs = 0;
345 if (wcs != NULL)
347 size_t nwcs = wcslen ((wchar_t *) wcs);
348 uint32_t zero = 0;
349 obstack_grow (&collate->mempool, wcs, nwcs * sizeof (uint32_t));
350 obstack_grow (&collate->mempool, &zero, sizeof (uint32_t));
351 newp->wcs = (uint32_t *) obstack_finish (&collate->mempool);
352 newp->nwcs = nwcs;
354 else
356 newp->wcs = NULL;
357 newp->nwcs = 0;
359 newp->mborder = NULL;
360 newp->wcorder = 0;
361 newp->used_in_level = 0;
362 newp->is_character = is_character;
364 /* Will be assigned later. XXX */
365 newp->mbseqorder = 0;
366 newp->wcseqorder = 0;
368 /* Will be allocated later. */
369 newp->weights = NULL;
371 newp->file = NULL;
372 newp->line = 0;
374 newp->section = collate->current_section;
376 newp->last = NULL;
377 newp->next = NULL;
379 newp->mbnext = NULL;
380 newp->mblast = NULL;
382 newp->wcnext = NULL;
383 newp->wclast = NULL;
385 return newp;
389 static struct symbol_t *
390 new_symbol (struct locale_collate_t *collate, const char *name, size_t len)
392 struct symbol_t *newp;
394 newp = (struct symbol_t *) obstack_alloc (&collate->mempool, sizeof (*newp));
396 newp->name = obstack_copy0 (&collate->mempool, name, len);
397 newp->order = NULL;
399 newp->file = NULL;
400 newp->line = 0;
402 return newp;
406 /* Test whether this name is already defined somewhere. */
407 static int
408 check_duplicate (struct linereader *ldfile, struct locale_collate_t *collate,
409 const struct charmap_t *charmap,
410 struct repertoire_t *repertoire, const char *symbol,
411 size_t symbol_len)
413 void *ignore = NULL;
415 if (find_entry (&charmap->char_table, symbol, symbol_len, &ignore) == 0)
417 lr_error (ldfile, _("`%.*s' already defined in charmap"),
418 (int) symbol_len, symbol);
419 return 1;
422 if (repertoire != NULL
423 && (find_entry (&repertoire->char_table, symbol, symbol_len, &ignore)
424 == 0))
426 lr_error (ldfile, _("`%.*s' already defined in repertoire"),
427 (int) symbol_len, symbol);
428 return 1;
431 if (find_entry (&collate->sym_table, symbol, symbol_len, &ignore) == 0)
433 lr_error (ldfile, _("`%.*s' already defined as collating symbol"),
434 (int) symbol_len, symbol);
435 return 1;
438 if (find_entry (&collate->elem_table, symbol, symbol_len, &ignore) == 0)
440 lr_error (ldfile, _("`%.*s' already defined as collating element"),
441 (int) symbol_len, symbol);
442 return 1;
445 return 0;
449 /* Read the direction specification. */
450 static void
451 read_directions (struct linereader *ldfile, struct token *arg,
452 const struct charmap_t *charmap,
453 struct repertoire_t *repertoire, struct localedef_t *result)
455 int cnt = 0;
456 int max = nrules ?: 10;
457 enum coll_sort_rule *rules = calloc (max, sizeof (*rules));
458 int warned = 0;
459 struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
461 while (1)
463 int valid = 0;
465 if (arg->tok == tok_forward)
467 if (rules[cnt] & sort_backward)
469 if (! warned)
471 lr_error (ldfile, _("\
472 %s: `forward' and `backward' are mutually excluding each other"),
473 "LC_COLLATE");
474 warned = 1;
477 else if (rules[cnt] & sort_forward)
479 if (! warned)
481 lr_error (ldfile, _("\
482 %s: `%s' mentioned more than once in definition of weight %d"),
483 "LC_COLLATE", "forward", cnt + 1);
486 else
487 rules[cnt] |= sort_forward;
489 valid = 1;
491 else if (arg->tok == tok_backward)
493 if (rules[cnt] & sort_forward)
495 if (! warned)
497 lr_error (ldfile, _("\
498 %s: `forward' and `backward' are mutually excluding each other"),
499 "LC_COLLATE");
500 warned = 1;
503 else if (rules[cnt] & sort_backward)
505 if (! warned)
507 lr_error (ldfile, _("\
508 %s: `%s' mentioned more than once in definition of weight %d"),
509 "LC_COLLATE", "backward", cnt + 1);
512 else
513 rules[cnt] |= sort_backward;
515 valid = 1;
517 else if (arg->tok == tok_position)
519 if (rules[cnt] & sort_position)
521 if (! warned)
523 lr_error (ldfile, _("\
524 %s: `%s' mentioned more than once in definition of weight %d"),
525 "LC_COLLATE", "position", cnt + 1);
528 else
529 rules[cnt] |= sort_position;
531 valid = 1;
534 if (valid)
535 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
537 if (arg->tok == tok_eof || arg->tok == tok_eol || arg->tok == tok_comma
538 || arg->tok == tok_semicolon)
540 if (! valid && ! warned)
542 lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
543 warned = 1;
546 /* See whether we have to increment the counter. */
547 if (arg->tok != tok_comma && rules[cnt] != 0)
549 /* Add the default `forward' if we have seen only `position'. */
550 if (rules[cnt] == sort_position)
551 rules[cnt] = sort_position | sort_forward;
553 ++cnt;
556 if (arg->tok == tok_eof || arg->tok == tok_eol)
557 /* End of line or file, so we exit the loop. */
558 break;
560 if (nrules == 0)
562 /* See whether we have enough room in the array. */
563 if (cnt == max)
565 max += 10;
566 rules = (enum coll_sort_rule *) xrealloc (rules,
568 * sizeof (*rules));
569 memset (&rules[cnt], '\0', (max - cnt) * sizeof (*rules));
572 else
574 if (cnt == nrules)
576 /* There must not be any more rule. */
577 if (! warned)
579 lr_error (ldfile, _("\
580 %s: too many rules; first entry only had %d"),
581 "LC_COLLATE", nrules);
582 warned = 1;
585 lr_ignore_rest (ldfile, 0);
586 break;
590 else
592 if (! warned)
594 lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
595 warned = 1;
599 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
602 if (nrules == 0)
604 /* Now we know how many rules we have. */
605 nrules = cnt;
606 rules = (enum coll_sort_rule *) xrealloc (rules,
607 nrules * sizeof (*rules));
609 else
611 if (cnt < nrules)
613 /* Not enough rules in this specification. */
614 if (! warned)
615 lr_error (ldfile, _("%s: not enough sorting rules"), "LC_COLLATE");
618 rules[cnt] = sort_forward;
619 while (++cnt < nrules);
623 collate->current_section->rules = rules;
627 static struct element_t *
628 find_element (struct linereader *ldfile, struct locale_collate_t *collate,
629 const char *str, size_t len)
631 void *result = NULL;
633 /* Search for the entries among the collation sequences already define. */
634 if (find_entry (&collate->seq_table, str, len, &result) != 0)
636 /* Nope, not define yet. So we see whether it is a
637 collation symbol. */
638 void *ptr;
640 if (find_entry (&collate->sym_table, str, len, &ptr) == 0)
642 /* It's a collation symbol. */
643 struct symbol_t *sym = (struct symbol_t *) ptr;
644 result = sym->order;
646 if (result == NULL)
647 result = sym->order = new_element (collate, NULL, 0, NULL,
648 NULL, 0, 0);
650 else if (find_entry (&collate->elem_table, str, len, &result) != 0)
652 /* It's also no collation element. So it is a character
653 element defined later. */
654 result = new_element (collate, NULL, 0, NULL, str, len, 1);
655 /* Insert it into the sequence table. */
656 insert_entry (&collate->seq_table, str, len, result);
660 return (struct element_t *) result;
664 static void
665 unlink_element (struct locale_collate_t *collate)
667 if (collate->cursor == collate->start)
669 assert (collate->cursor->next == NULL);
670 assert (collate->cursor->last == NULL);
671 collate->cursor = NULL;
673 else
675 if (collate->cursor->next != NULL)
676 collate->cursor->next->last = collate->cursor->last;
677 if (collate->cursor->last != NULL)
678 collate->cursor->last->next = collate->cursor->next;
679 collate->cursor = collate->cursor->last;
684 static void
685 insert_weights (struct linereader *ldfile, struct element_t *elem,
686 const struct charmap_t *charmap,
687 struct repertoire_t *repertoire, struct localedef_t *result,
688 enum token_t ellipsis)
690 int weight_cnt;
691 struct token *arg;
692 struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
694 /* Initialize all the fields. */
695 elem->file = ldfile->fname;
696 elem->line = ldfile->lineno;
698 elem->last = collate->cursor;
699 elem->next = collate->cursor ? collate->cursor->next : NULL;
700 if (collate->cursor != NULL && collate->cursor->next != NULL)
701 collate->cursor->next->last = elem;
702 if (collate->cursor != NULL)
703 collate->cursor->next = elem;
704 if (collate->start == NULL)
706 assert (collate->cursor == NULL);
707 collate->start = elem;
710 elem->section = collate->current_section;
712 if (collate->current_section->first == NULL)
713 collate->current_section->first = elem;
714 if (collate->current_section->last == collate->cursor)
715 collate->current_section->last = elem;
717 collate->cursor = elem;
719 elem->weights = (struct element_list_t *)
720 obstack_alloc (&collate->mempool, nrules * sizeof (struct element_list_t));
721 memset (elem->weights, '\0', nrules * sizeof (struct element_list_t));
723 weight_cnt = 0;
725 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
728 if (arg->tok == tok_eof || arg->tok == tok_eol)
729 break;
731 if (arg->tok == tok_ignore)
733 /* The weight for this level has to be ignored. We use the
734 null pointer to indicate this. */
735 elem->weights[weight_cnt].w = (struct element_t **)
736 obstack_alloc (&collate->mempool, sizeof (struct element_t *));
737 elem->weights[weight_cnt].w[0] = NULL;
738 elem->weights[weight_cnt].cnt = 1;
740 else if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
742 char ucs4str[10];
743 struct element_t *val;
744 char *symstr;
745 size_t symlen;
747 if (arg->tok == tok_bsymbol)
749 symstr = arg->val.str.startmb;
750 symlen = arg->val.str.lenmb;
752 else
754 snprintf (ucs4str, sizeof (ucs4str), "U%08X", arg->val.ucs4);
755 symstr = ucs4str;
756 symlen = 9;
759 val = find_element (ldfile, collate, symstr, symlen);
760 if (val == NULL)
761 break;
763 elem->weights[weight_cnt].w = (struct element_t **)
764 obstack_alloc (&collate->mempool, sizeof (struct element_t *));
765 elem->weights[weight_cnt].w[0] = val;
766 elem->weights[weight_cnt].cnt = 1;
768 else if (arg->tok == tok_string)
770 /* Split the string up in the individual characters and put
771 the element definitions in the list. */
772 const char *cp = arg->val.str.startmb;
773 int cnt = 0;
774 struct element_t *charelem;
775 struct element_t **weights = NULL;
776 int max = 0;
778 if (*cp == '\0')
780 lr_error (ldfile, _("%s: empty weight string not allowed"),
781 "LC_COLLATE");
782 lr_ignore_rest (ldfile, 0);
783 break;
788 if (*cp == '<')
790 /* Ahh, it's a bsymbol or an UCS4 value. If it's
791 the latter we have to unify the name. */
792 const char *startp = ++cp;
793 size_t len;
795 while (*cp != '>')
797 if (*cp == ldfile->escape_char)
798 ++cp;
799 if (*cp == '\0')
800 /* It's a syntax error. */
801 goto syntax;
803 ++cp;
806 if (cp - startp == 5 && startp[0] == 'U'
807 && isxdigit (startp[1]) && isxdigit (startp[2])
808 && isxdigit (startp[3]) && isxdigit (startp[4]))
810 unsigned int ucs4 = strtoul (startp + 1, NULL, 16);
811 char *newstr;
813 newstr = (char *) xmalloc (10);
814 snprintf (newstr, 10, "U%08X", ucs4);
815 startp = newstr;
817 len = 9;
819 else
820 len = cp - startp;
822 charelem = find_element (ldfile, collate, startp, len);
823 ++cp;
825 else
827 /* People really shouldn't use characters directly in
828 the string. Especially since it's not really clear
829 what this means. We interpret all characters in the
830 string as if that would be bsymbols. Otherwise we
831 would have to match back to bsymbols somehow and this
832 is normally not what people normally expect. */
833 charelem = find_element (ldfile, collate, cp++, 1);
836 if (charelem == NULL)
838 /* We ignore the rest of the line. */
839 lr_ignore_rest (ldfile, 0);
840 break;
843 /* Add the pointer. */
844 if (cnt >= max)
846 struct element_t **newp;
847 max += 10;
848 newp = (struct element_t **)
849 alloca (max * sizeof (struct element_t *));
850 memcpy (newp, weights, cnt * sizeof (struct element_t *));
851 weights = newp;
853 weights[cnt++] = charelem;
855 while (*cp != '\0');
857 /* Now store the information. */
858 elem->weights[weight_cnt].w = (struct element_t **)
859 obstack_alloc (&collate->mempool,
860 cnt * sizeof (struct element_t *));
861 memcpy (elem->weights[weight_cnt].w, weights,
862 cnt * sizeof (struct element_t *));
863 elem->weights[weight_cnt].cnt = cnt;
865 /* We don't need the string anymore. */
866 free (arg->val.str.startmb);
868 else if (ellipsis != tok_none
869 && (arg->tok == tok_ellipsis2
870 || arg->tok == tok_ellipsis3
871 || arg->tok == tok_ellipsis4))
873 /* It must be the same ellipsis as used in the initial column. */
874 if (arg->tok != ellipsis)
875 lr_error (ldfile, _("\
876 %s: weights must use the same ellipsis symbol as the name"),
877 "LC_COLLATE");
879 /* The weight for this level will depend on the element
880 iterating over the range. Put a placeholder. */
881 elem->weights[weight_cnt].w = (struct element_t **)
882 obstack_alloc (&collate->mempool, sizeof (struct element_t *));
883 elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
884 elem->weights[weight_cnt].cnt = 1;
886 else
888 syntax:
889 /* It's a syntax error. */
890 lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
891 lr_ignore_rest (ldfile, 0);
892 break;
895 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
896 /* This better should be the end of the line or a semicolon. */
897 if (arg->tok == tok_semicolon)
898 /* OK, ignore this and read the next token. */
899 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
900 else if (arg->tok != tok_eof && arg->tok != tok_eol)
902 /* It's a syntax error. */
903 lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
904 lr_ignore_rest (ldfile, 0);
905 break;
908 while (++weight_cnt < nrules);
910 if (weight_cnt < nrules)
912 /* This means the rest of the line uses the current element as
913 the weight. */
916 elem->weights[weight_cnt].w = (struct element_t **)
917 obstack_alloc (&collate->mempool, sizeof (struct element_t *));
918 if (ellipsis == tok_none)
919 elem->weights[weight_cnt].w[0] = elem;
920 else
921 elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
922 elem->weights[weight_cnt].cnt = 1;
924 while (++weight_cnt < nrules);
926 else
928 if (arg->tok == tok_ignore || arg->tok == tok_bsymbol)
930 /* Too many rule values. */
931 lr_error (ldfile, _("%s: too many values"), "LC_COLLATE");
932 lr_ignore_rest (ldfile, 0);
934 else
935 lr_ignore_rest (ldfile, arg->tok != tok_eol && arg->tok != tok_eof);
940 static int
941 insert_value (struct linereader *ldfile, const char *symstr, size_t symlen,
942 const struct charmap_t *charmap, struct repertoire_t *repertoire,
943 struct localedef_t *result)
945 /* First find out what kind of symbol this is. */
946 struct charseq *seq;
947 uint32_t wc;
948 struct element_t *elem = NULL;
949 struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
951 /* Try to find the character in the charmap. */
952 seq = charmap_find_value (charmap, symstr, symlen);
954 /* Determine the wide character. */
955 if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
957 wc = repertoire_find_value (repertoire, symstr, symlen);
958 if (seq != NULL)
959 seq->ucs4 = wc;
961 else
962 wc = seq->ucs4;
964 if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
966 /* It's no character, so look through the collation elements and
967 symbol list. */
968 void *ptr = elem;
969 if (find_entry (&collate->elem_table, symstr, symlen, &ptr) != 0)
971 void *result;
972 struct symbol_t *sym = NULL;
974 /* It's also collation element. Therefore it's either a
975 collating symbol or it's a character which is not
976 supported by the character set. In the later case we
977 simply create a dummy entry. */
978 if (find_entry (&collate->sym_table, symstr, symlen, &result) == 0)
980 /* It's a collation symbol. */
981 sym = (struct symbol_t *) result;
983 elem = sym->order;
986 if (elem == NULL)
988 elem = new_element (collate, NULL, 0, NULL, symstr, symlen, 0);
990 if (sym != NULL)
991 sym->order = elem;
992 else
993 /* Enter a fake element in the sequence table. This
994 won't cause anything in the output since there is
995 no multibyte or wide character associated with
996 it. */
997 insert_entry (&collate->seq_table, symstr, symlen, elem);
1000 else
1001 /* Copy the result back. */
1002 elem = ptr;
1004 else
1006 /* Otherwise the symbols stands for a character. */
1007 void *ptr = elem;
1008 if (find_entry (&collate->seq_table, symstr, symlen, &ptr) != 0)
1010 uint32_t wcs[2] = { wc, 0 };
1012 /* We have to allocate an entry. */
1013 elem = new_element (collate,
1014 seq != NULL ? (char *) seq->bytes : NULL,
1015 seq != NULL ? seq->nbytes : 0,
1016 wc == ILLEGAL_CHAR_VALUE ? NULL : wcs,
1017 symstr, symlen, 1);
1019 /* And add it to the table. */
1020 if (insert_entry (&collate->seq_table, symstr, symlen, elem) != 0)
1021 /* This cannot happen. */
1022 assert (! "Internal error");
1024 else
1026 /* Copy the result back. */
1027 elem = ptr;
1029 /* Maybe the character was used before the definition. In this case
1030 we have to insert the byte sequences now. */
1031 if (elem->mbs == NULL && seq != NULL)
1033 elem->mbs = obstack_copy0 (&collate->mempool,
1034 seq->bytes, seq->nbytes);
1035 elem->nmbs = seq->nbytes;
1038 if (elem->wcs == NULL && wc != ILLEGAL_CHAR_VALUE)
1040 uint32_t wcs[2] = { wc, 0 };
1042 elem->wcs = obstack_copy (&collate->mempool, wcs, sizeof (wcs));
1043 elem->nwcs = 1;
1048 /* Test whether this element is not already in the list. */
1049 if (elem->next != NULL || elem == collate->cursor)
1051 lr_error (ldfile, _("order for `%.*s' already defined at %s:%Zu"),
1052 (int) symlen, symstr, elem->file, elem->line);
1053 lr_ignore_rest (ldfile, 0);
1054 return 1;
1057 insert_weights (ldfile, elem, charmap, repertoire, result, tok_none);
1059 return 0;
1063 static void
1064 handle_ellipsis (struct linereader *ldfile, const char *symstr, size_t symlen,
1065 enum token_t ellipsis, const struct charmap_t *charmap,
1066 struct repertoire_t *repertoire,
1067 struct localedef_t *result)
1069 struct element_t *startp;
1070 struct element_t *endp;
1071 struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
1073 /* Unlink the entry added for the ellipsis. */
1074 unlink_element (collate);
1075 startp = collate->cursor;
1077 /* Process and add the end-entry. */
1078 if (symstr != NULL
1079 && insert_value (ldfile, symstr, symlen, charmap, repertoire, result))
1080 /* Something went wrong with inserting the to-value. This means
1081 we cannot process the ellipsis. */
1082 return;
1084 /* Reset the cursor. */
1085 collate->cursor = startp;
1087 /* Now we have to handle many different situations:
1088 - we have to distinguish between the three different ellipsis forms
1089 - the is the ellipsis at the beginning, in the middle, or at the end.
1091 endp = collate->cursor->next;
1092 assert (symstr == NULL || endp != NULL);
1094 /* XXX The following is probably very wrong since also collating symbols
1095 can appear in ranges. But do we want/can refine the test for that? */
1096 #if 0
1097 /* Both, the start and the end symbol, must stand for characters. */
1098 if ((startp != NULL && (startp->name == NULL || ! startp->is_character))
1099 || (endp != NULL && (endp->name == NULL|| ! endp->is_character)))
1101 lr_error (ldfile, _("\
1102 %s: the start and the end symbol of a range must stand for characters"),
1103 "LC_COLLATE");
1104 return;
1106 #endif
1108 if (ellipsis == tok_ellipsis3)
1110 /* One requirement we make here: the length of the byte
1111 sequences for the first and end character must be the same.
1112 This is mainly to prevent unwanted effects and this is often
1113 not what is wanted. */
1114 size_t len = (startp->mbs != NULL ? startp->nmbs
1115 : (endp->mbs != NULL ? endp->nmbs : 0));
1116 char mbcnt[len + 1];
1117 char mbend[len + 1];
1119 /* Well, this should be caught somewhere else already. Just to
1120 make sure. */
1121 assert (startp == NULL || startp->wcs == NULL || startp->wcs[1] == 0);
1122 assert (endp == NULL || endp->wcs == NULL || endp->wcs[1] == 0);
1124 if (startp != NULL && endp != NULL
1125 && startp->mbs != NULL && endp->mbs != NULL
1126 && startp->nmbs != endp->nmbs)
1128 lr_error (ldfile, _("\
1129 %s: byte sequences of first and last character must have the same length"),
1130 "LC_COLLATE");
1131 return;
1134 /* Determine whether we have to generate multibyte sequences. */
1135 if ((startp == NULL || startp->mbs != NULL)
1136 && (endp == NULL || endp->mbs != NULL))
1138 int cnt;
1139 int ret;
1141 /* Prepare the beginning byte sequence. This is either from the
1142 beginning byte sequence or it is all nulls if it was an
1143 initial ellipsis. */
1144 if (startp == NULL || startp->mbs == NULL)
1145 memset (mbcnt, '\0', len);
1146 else
1148 memcpy (mbcnt, startp->mbs, len);
1150 /* And increment it so that the value is the first one we will
1151 try to insert. */
1152 for (cnt = len - 1; cnt >= 0; --cnt)
1153 if (++mbcnt[cnt] != '\0')
1154 break;
1156 mbcnt[len] = '\0';
1158 /* And the end sequence. */
1159 if (endp == NULL || endp->mbs == NULL)
1160 memset (mbend, '\0', len);
1161 else
1162 memcpy (mbend, endp->mbs, len);
1163 mbend[len] = '\0';
1165 /* Test whether we have a correct range. */
1166 ret = memcmp (mbcnt, mbend, len);
1167 if (ret >= 0)
1169 if (ret > 0)
1170 lr_error (ldfile, _("%s: byte sequence of first character of \
1171 range is not lower than that of the last character"), "LC_COLLATE");
1172 return;
1175 /* Generate the byte sequences data. */
1176 while (1)
1178 struct charseq *seq;
1180 /* Quite a bit of work ahead. We have to find the character
1181 definition for the byte sequence and then determine the
1182 wide character belonging to it. */
1183 seq = charmap_find_symbol (charmap, mbcnt, len);
1184 if (seq != NULL)
1186 struct element_t *elem;
1187 size_t namelen;
1189 /* I don't think this can ever happen. */
1190 assert (seq->name != NULL);
1191 namelen = strlen (seq->name);
1193 if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1194 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1195 namelen);
1197 /* Now we are ready to insert the new value in the
1198 sequence. Find out whether the element is
1199 already known. */
1200 void *ptr;
1201 if (find_entry (&collate->seq_table, seq->name, namelen,
1202 &ptr) != 0)
1204 uint32_t wcs[2] = { seq->ucs4, 0 };
1206 /* We have to allocate an entry. */
1207 elem = new_element (collate, mbcnt, len,
1208 seq->ucs4 == ILLEGAL_CHAR_VALUE
1209 ? NULL : wcs, seq->name,
1210 namelen, 1);
1212 /* And add it to the table. */
1213 if (insert_entry (&collate->seq_table, seq->name,
1214 namelen, elem) != 0)
1215 /* This cannot happen. */
1216 assert (! "Internal error");
1218 else
1219 /* Copy the result. */
1220 elem = ptr;
1222 /* Test whether this element is not already in the list. */
1223 if (elem->next != NULL || (collate->cursor != NULL
1224 && elem->next == collate->cursor))
1226 lr_error (ldfile, _("\
1227 order for `%.*s' already defined at %s:%Zu"),
1228 (int) namelen, seq->name,
1229 elem->file, elem->line);
1230 goto increment;
1233 /* Enqueue the new element. */
1234 elem->last = collate->cursor;
1235 if (collate->cursor == NULL)
1236 elem->next = NULL;
1237 else
1239 elem->next = collate->cursor->next;
1240 elem->last->next = elem;
1241 if (elem->next != NULL)
1242 elem->next->last = elem;
1244 if (collate->start == NULL)
1246 assert (collate->cursor == NULL);
1247 collate->start = elem;
1249 collate->cursor = elem;
1251 /* Add the weight value. We take them from the
1252 `ellipsis_weights' member of `collate'. */
1253 elem->weights = (struct element_list_t *)
1254 obstack_alloc (&collate->mempool,
1255 nrules * sizeof (struct element_list_t));
1256 for (cnt = 0; cnt < nrules; ++cnt)
1257 if (collate->ellipsis_weight.weights[cnt].cnt == 1
1258 && (collate->ellipsis_weight.weights[cnt].w[0]
1259 == ELEMENT_ELLIPSIS2))
1261 elem->weights[cnt].w = (struct element_t **)
1262 obstack_alloc (&collate->mempool,
1263 sizeof (struct element_t *));
1264 elem->weights[cnt].w[0] = elem;
1265 elem->weights[cnt].cnt = 1;
1267 else
1269 /* Simply use the weight from `ellipsis_weight'. */
1270 elem->weights[cnt].w =
1271 collate->ellipsis_weight.weights[cnt].w;
1272 elem->weights[cnt].cnt =
1273 collate->ellipsis_weight.weights[cnt].cnt;
1277 /* Increment for the next round. */
1278 increment:
1279 for (cnt = len - 1; cnt >= 0; --cnt)
1280 if (++mbcnt[cnt] != '\0')
1281 break;
1283 /* Find out whether this was all. */
1284 if (cnt < 0 || memcmp (mbcnt, mbend, len) >= 0)
1285 /* Yep, that's all. */
1286 break;
1290 else
1292 /* For symbolic range we naturally must have a beginning and an
1293 end specified by the user. */
1294 if (startp == NULL)
1295 lr_error (ldfile, _("\
1296 %s: symbolic range ellipsis must not directly follow `order_start'"),
1297 "LC_COLLATE");
1298 else if (endp == NULL)
1299 lr_error (ldfile, _("\
1300 %s: symbolic range ellipsis must not be directly followed by `order_end'"),
1301 "LC_COLLATE");
1302 else
1304 /* Determine the range. To do so we have to determine the
1305 common prefix of the both names and then the numeric
1306 values of both ends. */
1307 size_t lenfrom = strlen (startp->name);
1308 size_t lento = strlen (endp->name);
1309 char buf[lento + 1];
1310 int preflen = 0;
1311 long int from;
1312 long int to;
1313 char *cp;
1314 int base = ellipsis == tok_ellipsis2 ? 16 : 10;
1316 if (lenfrom != lento)
1318 invalid_range:
1319 lr_error (ldfile, _("\
1320 `%s' and `%.*s' are not valid names for symbolic range"),
1321 startp->name, (int) lento, endp->name);
1322 return;
1325 while (startp->name[preflen] == endp->name[preflen])
1326 if (startp->name[preflen] == '\0')
1327 /* Nothing to be done. The start and end point are identical
1328 and while inserting the end point we have already given
1329 the user an error message. */
1330 return;
1331 else
1332 ++preflen;
1334 errno = 0;
1335 from = strtol (startp->name + preflen, &cp, base);
1336 if ((from == UINT_MAX && errno == ERANGE) || *cp != '\0')
1337 goto invalid_range;
1339 errno = 0;
1340 to = strtol (endp->name + preflen, &cp, base);
1341 if ((to == UINT_MAX && errno == ERANGE) || *cp != '\0')
1342 goto invalid_range;
1344 /* Copy the prefix. */
1345 memcpy (buf, startp->name, preflen);
1347 /* Loop over all values. */
1348 for (++from; from < to; ++from)
1350 struct element_t *elem = NULL;
1351 struct charseq *seq;
1352 uint32_t wc;
1353 int cnt;
1355 /* Generate the name. */
1356 sprintf (buf + preflen, base == 10 ? "%0*ld" : "%0*lX",
1357 (int) (lenfrom - preflen), from);
1359 /* Look whether this name is already defined. */
1360 void *ptr;
1361 if (find_entry (&collate->seq_table, buf, symlen, &ptr) == 0)
1363 /* Copy back the result. */
1364 elem = ptr;
1366 if (elem->next != NULL || (collate->cursor != NULL
1367 && elem->next == collate->cursor))
1369 lr_error (ldfile, _("\
1370 %s: order for `%.*s' already defined at %s:%Zu"),
1371 "LC_COLLATE", (int) lenfrom, buf,
1372 elem->file, elem->line);
1373 continue;
1376 if (elem->name == NULL)
1378 lr_error (ldfile, _("%s: `%s' must be a character"),
1379 "LC_COLLATE", buf);
1380 continue;
1384 if (elem == NULL || (elem->mbs == NULL && elem->wcs == NULL))
1386 /* Search for a character of this name. */
1387 seq = charmap_find_value (charmap, buf, lenfrom);
1388 if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1390 wc = repertoire_find_value (repertoire, buf, lenfrom);
1392 if (seq != NULL)
1393 seq->ucs4 = wc;
1395 else
1396 wc = seq->ucs4;
1398 if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
1399 /* We don't know anything about a character with this
1400 name. XXX Should we warn? */
1401 continue;
1403 if (elem == NULL)
1405 uint32_t wcs[2] = { wc, 0 };
1407 /* We have to allocate an entry. */
1408 elem = new_element (collate,
1409 seq != NULL
1410 ? (char *) seq->bytes : NULL,
1411 seq != NULL ? seq->nbytes : 0,
1412 wc == ILLEGAL_CHAR_VALUE
1413 ? NULL : wcs, buf, lenfrom, 1);
1415 else
1417 /* Update the element. */
1418 if (seq != NULL)
1420 elem->mbs = obstack_copy0 (&collate->mempool,
1421 seq->bytes, seq->nbytes);
1422 elem->nmbs = seq->nbytes;
1425 if (wc != ILLEGAL_CHAR_VALUE)
1427 uint32_t zero = 0;
1429 obstack_grow (&collate->mempool,
1430 &wc, sizeof (uint32_t));
1431 obstack_grow (&collate->mempool,
1432 &zero, sizeof (uint32_t));
1433 elem->wcs = obstack_finish (&collate->mempool);
1434 elem->nwcs = 1;
1438 elem->file = ldfile->fname;
1439 elem->line = ldfile->lineno;
1440 elem->section = collate->current_section;
1443 /* Enqueue the new element. */
1444 elem->last = collate->cursor;
1445 elem->next = collate->cursor->next;
1446 elem->last->next = elem;
1447 if (elem->next != NULL)
1448 elem->next->last = elem;
1449 collate->cursor = elem;
1451 /* Now add the weights. They come from the `ellipsis_weights'
1452 member of `collate'. */
1453 elem->weights = (struct element_list_t *)
1454 obstack_alloc (&collate->mempool,
1455 nrules * sizeof (struct element_list_t));
1456 for (cnt = 0; cnt < nrules; ++cnt)
1457 if (collate->ellipsis_weight.weights[cnt].cnt == 1
1458 && (collate->ellipsis_weight.weights[cnt].w[0]
1459 == ELEMENT_ELLIPSIS2))
1461 elem->weights[cnt].w = (struct element_t **)
1462 obstack_alloc (&collate->mempool,
1463 sizeof (struct element_t *));
1464 elem->weights[cnt].w[0] = elem;
1465 elem->weights[cnt].cnt = 1;
1467 else
1469 /* Simly use the weight from `ellipsis_weight'. */
1470 elem->weights[cnt].w =
1471 collate->ellipsis_weight.weights[cnt].w;
1472 elem->weights[cnt].cnt =
1473 collate->ellipsis_weight.weights[cnt].cnt;
1481 static void
1482 collate_startup (struct linereader *ldfile, struct localedef_t *locale,
1483 struct localedef_t *copy_locale, int ignore_content)
1485 if (!ignore_content && locale->categories[LC_COLLATE].collate == NULL)
1487 struct locale_collate_t *collate;
1489 if (copy_locale == NULL)
1491 collate = locale->categories[LC_COLLATE].collate =
1492 (struct locale_collate_t *)
1493 xcalloc (1, sizeof (struct locale_collate_t));
1495 /* Init the various data structures. */
1496 init_hash (&collate->elem_table, 100);
1497 init_hash (&collate->sym_table, 100);
1498 init_hash (&collate->seq_table, 500);
1499 obstack_init (&collate->mempool);
1501 collate->col_weight_max = -1;
1503 else
1504 /* Reuse the copy_locale's data structures. */
1505 collate = locale->categories[LC_COLLATE].collate =
1506 copy_locale->categories[LC_COLLATE].collate;
1509 ldfile->translate_strings = 0;
1510 ldfile->return_widestr = 0;
1514 void
1515 collate_finish (struct localedef_t *locale, const struct charmap_t *charmap)
1517 /* Now is the time when we can assign the individual collation
1518 values for all the symbols. We have possibly different values
1519 for the wide- and the multibyte-character symbols. This is done
1520 since it might make a difference in the encoding if there is in
1521 some cases no multibyte-character but there are wide-characters.
1522 (The other way around it is not important since theencoded
1523 collation value in the wide-character case is 32 bits wide and
1524 therefore requires no encoding).
1526 The lowest collation value assigned is 2. Zero is reserved for
1527 the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm'
1528 functions and 1 is used to separate the individual passes for the
1529 different rules.
1531 We also have to construct is list with all the bytes/words which
1532 can come first in a sequence, followed by all the elements which
1533 also start with this byte/word. The order is reverse which has
1534 among others the important effect that longer strings are located
1535 first in the list. This is required for the output data since
1536 the algorithm used in `strcoll' etc depends on this.
1538 The multibyte case is easy. We simply sort into an array with
1539 256 elements. */
1540 struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1541 int mbact[nrules];
1542 int wcact;
1543 int mbseqact;
1544 int wcseqact;
1545 struct element_t *runp;
1546 int i;
1547 int need_undefined = 0;
1548 struct section_list *sect;
1549 int ruleidx;
1550 int nr_wide_elems = 0;
1552 if (collate == NULL)
1554 /* No data, no check. */
1555 if (! be_quiet)
1556 WITH_CUR_LOCALE (error (0, 0, _("No definition for %s category found"),
1557 "LC_COLLATE"));
1558 return;
1561 /* If this assertion is hit change the type in `element_t'. */
1562 assert (nrules <= sizeof (runp->used_in_level) * 8);
1564 /* Make sure that the `position' rule is used either in all sections
1565 or in none. */
1566 for (i = 0; i < nrules; ++i)
1567 for (sect = collate->sections; sect != NULL; sect = sect->next)
1568 if (sect != collate->current_section
1569 && sect->rules != NULL
1570 && ((sect->rules[i] & sort_position)
1571 != (collate->current_section->rules[i] & sort_position)))
1573 WITH_CUR_LOCALE (error (0, 0, _("\
1574 %s: `position' must be used for a specific level in all sections or none"),
1575 "LC_COLLATE"));
1576 break;
1579 /* Find out which elements are used at which level. At the same
1580 time we find out whether we have any undefined symbols. */
1581 runp = collate->start;
1582 while (runp != NULL)
1584 if (runp->mbs != NULL)
1586 for (i = 0; i < nrules; ++i)
1588 int j;
1590 for (j = 0; j < runp->weights[i].cnt; ++j)
1591 /* A NULL pointer as the weight means IGNORE. */
1592 if (runp->weights[i].w[j] != NULL)
1594 if (runp->weights[i].w[j]->weights == NULL)
1596 WITH_CUR_LOCALE (error_at_line (0, 0, runp->file,
1597 runp->line,
1598 _("symbol `%s' not defined"),
1599 runp->weights[i].w[j]->name));
1601 need_undefined = 1;
1602 runp->weights[i].w[j] = &collate->undefined;
1604 else
1605 /* Set the bit for the level. */
1606 runp->weights[i].w[j]->used_in_level |= 1 << i;
1611 /* Up to the next entry. */
1612 runp = runp->next;
1615 /* Walk through the list of defined sequences and assign weights. Also
1616 create the data structure which will allow generating the single byte
1617 character based tables.
1619 Since at each time only the weights for each of the rules are
1620 only compared to other weights for this rule it is possible to
1621 assign more compact weight values than simply counting all
1622 weights in sequence. We can assign weights from 3, one for each
1623 rule individually and only for those elements, which are actually
1624 used for this rule.
1626 Why is this important? It is not for the wide char table. But
1627 it is for the singlebyte output since here larger numbers have to
1628 be encoded to make it possible to emit the value as a byte
1629 string. */
1630 for (i = 0; i < nrules; ++i)
1631 mbact[i] = 2;
1632 wcact = 2;
1633 mbseqact = 0;
1634 wcseqact = 0;
1635 runp = collate->start;
1636 while (runp != NULL)
1638 /* Determine the order. */
1639 if (runp->used_in_level != 0)
1641 runp->mborder = (int *) obstack_alloc (&collate->mempool,
1642 nrules * sizeof (int));
1644 for (i = 0; i < nrules; ++i)
1645 if ((runp->used_in_level & (1 << i)) != 0)
1646 runp->mborder[i] = mbact[i]++;
1647 else
1648 runp->mborder[i] = 0;
1651 if (runp->mbs != NULL)
1653 struct element_t **eptr;
1654 struct element_t *lastp = NULL;
1656 /* Find the point where to insert in the list. */
1657 eptr = &collate->mbheads[((unsigned char *) runp->mbs)[0]];
1658 while (*eptr != NULL)
1660 if ((*eptr)->nmbs < runp->nmbs)
1661 break;
1663 if ((*eptr)->nmbs == runp->nmbs)
1665 int c = memcmp ((*eptr)->mbs, runp->mbs, runp->nmbs);
1667 if (c == 0)
1669 /* This should not happen. It means that we have
1670 to symbols with the same byte sequence. It is
1671 of course an error. */
1672 WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr)->file,
1673 (*eptr)->line,
1674 _("\
1675 symbol `%s' has the same encoding as"), (*eptr)->name);
1676 error_at_line (0, 0, runp->file,
1677 runp->line,
1678 _("symbol `%s'"),
1679 runp->name));
1680 goto dont_insert;
1682 else if (c < 0)
1683 /* Insert it here. */
1684 break;
1687 /* To the next entry. */
1688 lastp = *eptr;
1689 eptr = &(*eptr)->mbnext;
1692 /* Set the pointers. */
1693 runp->mbnext = *eptr;
1694 runp->mblast = lastp;
1695 if (*eptr != NULL)
1696 (*eptr)->mblast = runp;
1697 *eptr = runp;
1698 dont_insert:
1702 if (runp->used_in_level)
1704 runp->wcorder = wcact++;
1706 /* We take the opportunity to count the elements which have
1707 wide characters. */
1708 ++nr_wide_elems;
1711 if (runp->is_character)
1713 if (runp->nmbs == 1)
1714 collate->mbseqorder[((unsigned char *) runp->mbs)[0]] = mbseqact++;
1716 runp->wcseqorder = wcseqact++;
1718 else if (runp->mbs != NULL && runp->weights != NULL)
1719 /* This is for collation elements. */
1720 runp->wcseqorder = wcseqact++;
1722 /* Up to the next entry. */
1723 runp = runp->next;
1726 /* Find out whether any of the `mbheads' entries is unset. In this
1727 case we use the UNDEFINED entry. */
1728 for (i = 1; i < 256; ++i)
1729 if (collate->mbheads[i] == NULL)
1731 need_undefined = 1;
1732 collate->mbheads[i] = &collate->undefined;
1735 /* Now to the wide character case. */
1736 collate->wcheads.p = 6;
1737 collate->wcheads.q = 10;
1738 wchead_table_init (&collate->wcheads);
1740 collate->wcseqorder.p = 6;
1741 collate->wcseqorder.q = 10;
1742 collseq_table_init (&collate->wcseqorder);
1744 /* Start adding. */
1745 runp = collate->start;
1746 while (runp != NULL)
1748 if (runp->wcs != NULL)
1750 struct element_t *e;
1751 struct element_t **eptr;
1752 struct element_t *lastp;
1754 /* Insert the collation sequence value. */
1755 if (runp->is_character)
1756 collseq_table_add (&collate->wcseqorder, runp->wcs[0],
1757 runp->wcseqorder);
1759 /* Find the point where to insert in the list. */
1760 e = wchead_table_get (&collate->wcheads, runp->wcs[0]);
1761 eptr = &e;
1762 lastp = NULL;
1763 while (*eptr != NULL)
1765 if ((*eptr)->nwcs < runp->nwcs)
1766 break;
1768 if ((*eptr)->nwcs == runp->nwcs)
1770 int c = wmemcmp ((wchar_t *) (*eptr)->wcs,
1771 (wchar_t *) runp->wcs, runp->nwcs);
1773 if (c == 0)
1775 /* This should not happen. It means that we have
1776 two symbols with the same byte sequence. It is
1777 of course an error. */
1778 WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr)->file,
1779 (*eptr)->line,
1780 _("\
1781 symbol `%s' has the same encoding as"), (*eptr)->name);
1782 error_at_line (0, 0, runp->file,
1783 runp->line,
1784 _("symbol `%s'"),
1785 runp->name));
1786 goto dont_insertwc;
1788 else if (c < 0)
1789 /* Insert it here. */
1790 break;
1793 /* To the next entry. */
1794 lastp = *eptr;
1795 eptr = &(*eptr)->wcnext;
1798 /* Set the pointers. */
1799 runp->wcnext = *eptr;
1800 runp->wclast = lastp;
1801 if (*eptr != NULL)
1802 (*eptr)->wclast = runp;
1803 *eptr = runp;
1804 if (eptr == &e)
1805 wchead_table_add (&collate->wcheads, runp->wcs[0], e);
1806 dont_insertwc:
1810 /* Up to the next entry. */
1811 runp = runp->next;
1814 collseq_table_finalize (&collate->wcseqorder);
1816 /* Now determine whether the UNDEFINED entry is needed and if yes,
1817 whether it was defined. */
1818 collate->undefined.used_in_level = need_undefined ? ~0ul : 0;
1819 if (collate->undefined.file == NULL)
1821 if (need_undefined)
1823 /* This seems not to be enforced by recent standards. Don't
1824 emit an error, simply append UNDEFINED at the end. */
1825 if (0)
1826 WITH_CUR_LOCALE (error (0, 0, _("no definition of `UNDEFINED'")));
1828 /* Add UNDEFINED at the end. */
1829 collate->undefined.mborder =
1830 (int *) obstack_alloc (&collate->mempool, nrules * sizeof (int));
1832 for (i = 0; i < nrules; ++i)
1833 collate->undefined.mborder[i] = mbact[i]++;
1836 /* In any case we will need the definition for the wide character
1837 case. But we will not complain that it is missing since the
1838 specification strangely enough does not seem to account for
1839 this. */
1840 collate->undefined.wcorder = wcact++;
1843 /* Finally, try to unify the rules for the sections. Whenever the rules
1844 for a section are the same as those for another section give the
1845 ruleset the same index. Since there are never many section we can
1846 use an O(n^2) algorithm here. */
1847 sect = collate->sections;
1848 while (sect != NULL && sect->rules == NULL)
1849 sect = sect->next;
1851 /* Bail out if we have no sections because of earlier errors. */
1852 if (sect == NULL)
1854 WITH_CUR_LOCALE (error (EXIT_FAILURE, 0,
1855 _("too many errors; giving up")));
1856 return;
1859 ruleidx = 0;
1862 struct section_list *osect = collate->sections;
1864 while (osect != sect)
1865 if (osect->rules != NULL
1866 && memcmp (osect->rules, sect->rules,
1867 nrules * sizeof (osect->rules[0])) == 0)
1868 break;
1869 else
1870 osect = osect->next;
1872 if (osect == sect)
1873 sect->ruleidx = ruleidx++;
1874 else
1875 sect->ruleidx = osect->ruleidx;
1877 /* Next section. */
1879 sect = sect->next;
1880 while (sect != NULL && sect->rules == NULL);
1882 while (sect != NULL);
1883 /* We are currently not prepared for more than 128 rulesets. But this
1884 should never really be a problem. */
1885 assert (ruleidx <= 128);
1889 static int32_t
1890 output_weight (struct obstack *pool, struct locale_collate_t *collate,
1891 struct element_t *elem)
1893 size_t cnt;
1894 int32_t retval;
1896 /* Optimize the use of UNDEFINED. */
1897 if (elem == &collate->undefined)
1898 /* The weights are already inserted. */
1899 return 0;
1901 /* This byte can start exactly one collation element and this is
1902 a single byte. We can directly give the index to the weights. */
1903 retval = obstack_object_size (pool);
1905 /* Construct the weight. */
1906 for (cnt = 0; cnt < nrules; ++cnt)
1908 char buf[elem->weights[cnt].cnt * 7];
1909 int len = 0;
1910 int i;
1912 for (i = 0; i < elem->weights[cnt].cnt; ++i)
1913 /* Encode the weight value. We do nothing for IGNORE entries. */
1914 if (elem->weights[cnt].w[i] != NULL)
1915 len += utf8_encode (&buf[len],
1916 elem->weights[cnt].w[i]->mborder[cnt]);
1918 /* And add the buffer content. */
1919 obstack_1grow (pool, len);
1920 obstack_grow (pool, buf, len);
1923 return retval | ((elem->section->ruleidx & 0x7f) << 24);
1927 static int32_t
1928 output_weightwc (struct obstack *pool, struct locale_collate_t *collate,
1929 struct element_t *elem)
1931 size_t cnt;
1932 int32_t retval;
1934 /* Optimize the use of UNDEFINED. */
1935 if (elem == &collate->undefined)
1936 /* The weights are already inserted. */
1937 return 0;
1939 /* This byte can start exactly one collation element and this is
1940 a single byte. We can directly give the index to the weights. */
1941 retval = obstack_object_size (pool) / sizeof (int32_t);
1943 /* Construct the weight. */
1944 for (cnt = 0; cnt < nrules; ++cnt)
1946 int32_t buf[elem->weights[cnt].cnt];
1947 int i;
1948 int32_t j;
1950 for (i = 0, j = 0; i < elem->weights[cnt].cnt; ++i)
1951 if (elem->weights[cnt].w[i] != NULL)
1952 buf[j++] = elem->weights[cnt].w[i]->wcorder;
1954 /* And add the buffer content. */
1955 obstack_int32_grow (pool, j);
1957 obstack_grow (pool, buf, j * sizeof (int32_t));
1960 return retval | ((elem->section->ruleidx & 0x7f) << 24);
1963 /* If localedef is every threaded, this would need to be __thread var. */
1964 static struct
1966 struct obstack *weightpool;
1967 struct obstack *extrapool;
1968 struct obstack *indpool;
1969 struct locale_collate_t *collate;
1970 struct collidx_table *tablewc;
1971 } atwc;
1973 static void add_to_tablewc (uint32_t ch, struct element_t *runp);
1975 static void
1976 add_to_tablewc (uint32_t ch, struct element_t *runp)
1978 if (runp->wcnext == NULL && runp->nwcs == 1)
1980 int32_t weigthidx = output_weightwc (atwc.weightpool, atwc.collate,
1981 runp);
1982 collidx_table_add (atwc.tablewc, ch, weigthidx);
1984 else
1986 /* As for the singlebyte table, we recognize sequences and
1987 compress them. */
1988 struct element_t *lastp;
1990 collidx_table_add (atwc.tablewc, ch,
1991 -(obstack_object_size (atwc.extrapool)
1992 / sizeof (uint32_t)));
1996 /* Store the current index in the weight table. We know that
1997 the current position in the `extrapool' is aligned on a
1998 32-bit address. */
1999 int32_t weightidx;
2000 int added;
2002 /* Find out wether this is a single entry or we have more than
2003 one consecutive entry. */
2004 if (runp->wcnext != NULL
2005 && runp->nwcs == runp->wcnext->nwcs
2006 && wmemcmp ((wchar_t *) runp->wcs,
2007 (wchar_t *)runp->wcnext->wcs,
2008 runp->nwcs - 1) == 0
2009 && (runp->wcs[runp->nwcs - 1]
2010 == runp->wcnext->wcs[runp->nwcs - 1] + 1))
2012 int i;
2013 struct element_t *series_startp = runp;
2014 struct element_t *curp;
2016 /* Now add first the initial byte sequence. */
2017 added = (1 + 1 + 2 * (runp->nwcs - 1)) * sizeof (int32_t);
2018 if (sizeof (int32_t) == sizeof (int))
2019 obstack_make_room (atwc.extrapool, added);
2021 /* More than one consecutive entry. We mark this by having
2022 a negative index into the indirect table. */
2023 obstack_int32_grow_fast (atwc.extrapool,
2024 -(obstack_object_size (atwc.indpool)
2025 / sizeof (int32_t)));
2026 obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - 1);
2029 runp = runp->wcnext;
2030 while (runp->wcnext != NULL
2031 && runp->nwcs == runp->wcnext->nwcs
2032 && wmemcmp ((wchar_t *) runp->wcs,
2033 (wchar_t *)runp->wcnext->wcs,
2034 runp->nwcs - 1) == 0
2035 && (runp->wcs[runp->nwcs - 1]
2036 == runp->wcnext->wcs[runp->nwcs - 1] + 1));
2038 /* Now walk backward from here to the beginning. */
2039 curp = runp;
2041 for (i = 1; i < runp->nwcs; ++i)
2042 obstack_int32_grow_fast (atwc.extrapool, curp->wcs[i]);
2044 /* Now find the end of the consecutive sequence and
2045 add all the indeces in the indirect pool. */
2048 weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2049 curp);
2050 obstack_int32_grow (atwc.indpool, weightidx);
2052 curp = curp->wclast;
2054 while (curp != series_startp);
2056 /* Add the final weight. */
2057 weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2058 curp);
2059 obstack_int32_grow (atwc.indpool, weightidx);
2061 /* And add the end byte sequence. Without length this
2062 time. */
2063 for (i = 1; i < curp->nwcs; ++i)
2064 obstack_int32_grow (atwc.extrapool, curp->wcs[i]);
2066 else
2068 /* A single entry. Simply add the index and the length and
2069 string (except for the first character which is already
2070 tested for). */
2071 int i;
2073 /* Output the weight info. */
2074 weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2075 runp);
2077 added = (1 + 1 + runp->nwcs - 1) * sizeof (int32_t);
2078 if (sizeof (int) == sizeof (int32_t))
2079 obstack_make_room (atwc.extrapool, added);
2081 obstack_int32_grow_fast (atwc.extrapool, weightidx);
2082 obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - 1);
2083 for (i = 1; i < runp->nwcs; ++i)
2084 obstack_int32_grow_fast (atwc.extrapool, runp->wcs[i]);
2087 /* Next entry. */
2088 lastp = runp;
2089 runp = runp->wcnext;
2091 while (runp != NULL);
2095 void
2096 collate_output (struct localedef_t *locale, const struct charmap_t *charmap,
2097 const char *output_path)
2099 struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
2100 const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE);
2101 struct iovec iov[2 + nelems];
2102 struct locale_file data;
2103 uint32_t idx[nelems];
2104 size_t cnt;
2105 size_t ch;
2106 int32_t tablemb[256];
2107 struct obstack weightpool;
2108 struct obstack extrapool;
2109 struct obstack indirectpool;
2110 struct section_list *sect;
2111 struct collidx_table tablewc;
2112 uint32_t elem_size;
2113 uint32_t *elem_table;
2114 int i;
2115 struct element_t *runp;
2117 data.magic = LIMAGIC (LC_COLLATE);
2118 data.n = nelems;
2119 iov[0].iov_base = (void *) &data;
2120 iov[0].iov_len = sizeof (data);
2122 iov[1].iov_base = (void *) idx;
2123 iov[1].iov_len = sizeof (idx);
2125 idx[0] = iov[0].iov_len + iov[1].iov_len;
2126 cnt = 0;
2128 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_NRULES));
2129 iov[2 + cnt].iov_base = &nrules;
2130 iov[2 + cnt].iov_len = sizeof (uint32_t);
2131 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2132 ++cnt;
2134 /* If we have no LC_COLLATE data emit only the number of rules as zero. */
2135 if (collate == NULL)
2137 int32_t dummy = 0;
2139 while (cnt < _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE))
2141 /* The words have to be handled specially. */
2142 if (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB))
2144 iov[2 + cnt].iov_base = &dummy;
2145 iov[2 + cnt].iov_len = sizeof (int32_t);
2147 else
2149 iov[2 + cnt].iov_base = NULL;
2150 iov[2 + cnt].iov_len = 0;
2153 if (cnt + 1 < _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE))
2154 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2155 ++cnt;
2158 assert (cnt == _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE));
2160 write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", 2 + cnt, iov);
2162 return;
2165 obstack_init (&weightpool);
2166 obstack_init (&extrapool);
2167 obstack_init (&indirectpool);
2169 /* Since we are using the sign of an integer to mark indirection the
2170 offsets in the arrays we are indirectly referring to must not be
2171 zero since -0 == 0. Therefore we add a bit of dummy content. */
2172 obstack_int32_grow (&extrapool, 0);
2173 obstack_int32_grow (&indirectpool, 0);
2175 /* Prepare the ruleset table. */
2176 for (sect = collate->sections, i = 0; sect != NULL; sect = sect->next)
2177 if (sect->rules != NULL && sect->ruleidx == i)
2179 int j;
2181 obstack_make_room (&weightpool, nrules);
2183 for (j = 0; j < nrules; ++j)
2184 obstack_1grow_fast (&weightpool, sect->rules[j]);
2185 ++i;
2187 /* And align the output. */
2188 i = (nrules * i) % __alignof__ (int32_t);
2189 if (i > 0)
2191 obstack_1grow (&weightpool, '\0');
2192 while (++i < __alignof__ (int32_t));
2194 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_RULESETS));
2195 iov[2 + cnt].iov_len = obstack_object_size (&weightpool);
2196 iov[2 + cnt].iov_base = obstack_finish (&weightpool);
2197 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2198 ++cnt;
2200 /* Generate the 8-bit table. Walk through the lists of sequences
2201 starting with the same byte and add them one after the other to
2202 the table. In case we have more than one sequence starting with
2203 the same byte we have to use extra indirection.
2205 First add a record for the NUL byte. This entry will never be used
2206 so it does not matter. */
2207 tablemb[0] = 0;
2209 /* Now insert the `UNDEFINED' value if it is used. Since this value
2210 will probably be used more than once it is good to store the
2211 weights only once. */
2212 if (collate->undefined.used_in_level != 0)
2213 output_weight (&weightpool, collate, &collate->undefined);
2215 for (ch = 1; ch < 256; ++ch)
2216 if (collate->mbheads[ch]->mbnext == NULL
2217 && collate->mbheads[ch]->nmbs <= 1)
2219 tablemb[ch] = output_weight (&weightpool, collate,
2220 collate->mbheads[ch]);
2222 else
2224 /* The entries in the list are sorted by length and then
2225 alphabetically. This is the order in which we will add the
2226 elements to the collation table. This allows simply walking
2227 the table in sequence and stopping at the first matching
2228 entry. Since the longer sequences are coming first in the
2229 list they have the possibility to match first, just as it
2230 has to be. In the worst case we are walking to the end of
2231 the list where we put, if no singlebyte sequence is defined
2232 in the locale definition, the weights for UNDEFINED.
2234 To reduce the length of the search list we compress them a bit.
2235 This happens by collecting sequences of consecutive byte
2236 sequences in one entry (having and begin and end byte sequence)
2237 and add only one index into the weight table. We can find the
2238 consecutive entries since they are also consecutive in the list. */
2239 struct element_t *runp = collate->mbheads[ch];
2240 struct element_t *lastp;
2242 assert ((obstack_object_size (&extrapool)
2243 & (__alignof__ (int32_t) - 1)) == 0);
2245 tablemb[ch] = -obstack_object_size (&extrapool);
2249 /* Store the current index in the weight table. We know that
2250 the current position in the `extrapool' is aligned on a
2251 32-bit address. */
2252 int32_t weightidx;
2253 int added;
2255 /* Find out wether this is a single entry or we have more than
2256 one consecutive entry. */
2257 if (runp->mbnext != NULL
2258 && runp->nmbs == runp->mbnext->nmbs
2259 && memcmp (runp->mbs, runp->mbnext->mbs, runp->nmbs - 1) == 0
2260 && (runp->mbs[runp->nmbs - 1]
2261 == runp->mbnext->mbs[runp->nmbs - 1] + 1))
2263 int i;
2264 struct element_t *series_startp = runp;
2265 struct element_t *curp;
2267 /* Compute how much space we will need. */
2268 added = ((sizeof (int32_t) + 1 + 2 * (runp->nmbs - 1)
2269 + __alignof__ (int32_t) - 1)
2270 & ~(__alignof__ (int32_t) - 1));
2271 assert ((obstack_object_size (&extrapool)
2272 & (__alignof__ (int32_t) - 1)) == 0);
2273 obstack_make_room (&extrapool, added);
2275 /* More than one consecutive entry. We mark this by having
2276 a negative index into the indirect table. */
2277 obstack_int32_grow_fast (&extrapool,
2278 -(obstack_object_size (&indirectpool)
2279 / sizeof (int32_t)));
2281 /* Now search first the end of the series. */
2283 runp = runp->mbnext;
2284 while (runp->mbnext != NULL
2285 && runp->nmbs == runp->mbnext->nmbs
2286 && memcmp (runp->mbs, runp->mbnext->mbs,
2287 runp->nmbs - 1) == 0
2288 && (runp->mbs[runp->nmbs - 1]
2289 == runp->mbnext->mbs[runp->nmbs - 1] + 1));
2291 /* Now walk backward from here to the beginning. */
2292 curp = runp;
2294 assert (runp->nmbs <= 256);
2295 obstack_1grow_fast (&extrapool, curp->nmbs - 1);
2296 for (i = 1; i < curp->nmbs; ++i)
2297 obstack_1grow_fast (&extrapool, curp->mbs[i]);
2299 /* Now find the end of the consecutive sequence and
2300 add all the indeces in the indirect pool. */
2303 weightidx = output_weight (&weightpool, collate, curp);
2304 obstack_int32_grow (&indirectpool, weightidx);
2306 curp = curp->mblast;
2308 while (curp != series_startp);
2310 /* Add the final weight. */
2311 weightidx = output_weight (&weightpool, collate, curp);
2312 obstack_int32_grow (&indirectpool, weightidx);
2314 /* And add the end byte sequence. Without length this
2315 time. */
2316 for (i = 1; i < curp->nmbs; ++i)
2317 obstack_1grow_fast (&extrapool, curp->mbs[i]);
2319 else
2321 /* A single entry. Simply add the index and the length and
2322 string (except for the first character which is already
2323 tested for). */
2324 int i;
2326 /* Output the weight info. */
2327 weightidx = output_weight (&weightpool, collate, runp);
2329 added = ((sizeof (int32_t) + 1 + runp->nmbs - 1
2330 + __alignof__ (int32_t) - 1)
2331 & ~(__alignof__ (int32_t) - 1));
2332 assert ((obstack_object_size (&extrapool)
2333 & (__alignof__ (int32_t) - 1)) == 0);
2334 obstack_make_room (&extrapool, added);
2336 obstack_int32_grow_fast (&extrapool, weightidx);
2337 assert (runp->nmbs <= 256);
2338 obstack_1grow_fast (&extrapool, runp->nmbs - 1);
2340 for (i = 1; i < runp->nmbs; ++i)
2341 obstack_1grow_fast (&extrapool, runp->mbs[i]);
2344 /* Add alignment bytes if necessary. */
2345 while ((obstack_object_size (&extrapool)
2346 & (__alignof__ (int32_t) - 1)) != 0)
2347 obstack_1grow_fast (&extrapool, '\0');
2349 /* Next entry. */
2350 lastp = runp;
2351 runp = runp->mbnext;
2353 while (runp != NULL);
2355 assert ((obstack_object_size (&extrapool)
2356 & (__alignof__ (int32_t) - 1)) == 0);
2358 /* If the final entry in the list is not a single character we
2359 add an UNDEFINED entry here. */
2360 if (lastp->nmbs != 1)
2362 int added = ((sizeof (int32_t) + 1 + 1 + __alignof__ (int32_t) - 1)
2363 & ~(__alignof__ (int32_t) - 1));
2364 obstack_make_room (&extrapool, added);
2366 obstack_int32_grow_fast (&extrapool, 0);
2367 /* XXX What rule? We just pick the first. */
2368 obstack_1grow_fast (&extrapool, 0);
2369 /* Length is zero. */
2370 obstack_1grow_fast (&extrapool, 0);
2372 /* Add alignment bytes if necessary. */
2373 while ((obstack_object_size (&extrapool)
2374 & (__alignof__ (int32_t) - 1)) != 0)
2375 obstack_1grow_fast (&extrapool, '\0');
2379 /* Add padding to the tables if necessary. */
2380 while ((obstack_object_size (&weightpool) & (__alignof__ (int32_t) - 1))
2381 != 0)
2382 obstack_1grow (&weightpool, 0);
2384 /* Now add the four tables. */
2385 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_TABLEMB));
2386 iov[2 + cnt].iov_base = tablemb;
2387 iov[2 + cnt].iov_len = sizeof (tablemb);
2388 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2389 assert ((iov[2 + cnt].iov_len & (__alignof__ (int32_t) - 1)) == 0);
2390 ++cnt;
2392 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_WEIGHTMB));
2393 iov[2 + cnt].iov_len = obstack_object_size (&weightpool);
2394 iov[2 + cnt].iov_base = obstack_finish (&weightpool);
2395 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2396 ++cnt;
2398 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_EXTRAMB));
2399 iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
2400 iov[2 + cnt].iov_base = obstack_finish (&extrapool);
2401 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2402 ++cnt;
2404 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_INDIRECTMB));
2405 iov[2 + cnt].iov_len = obstack_object_size (&indirectpool);
2406 iov[2 + cnt].iov_base = obstack_finish (&indirectpool);
2407 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2408 assert ((iov[2 + cnt].iov_len & (__alignof__ (int32_t) - 1)) == 0);
2409 ++cnt;
2412 /* Now the same for the wide character table. We need to store some
2413 more information here. */
2414 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_GAP1));
2415 iov[2 + cnt].iov_base = NULL;
2416 iov[2 + cnt].iov_len = 0;
2417 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2418 assert (idx[cnt] % __alignof__ (int32_t) == 0);
2419 ++cnt;
2421 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_GAP2));
2422 iov[2 + cnt].iov_base = NULL;
2423 iov[2 + cnt].iov_len = 0;
2424 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2425 assert (idx[cnt] % __alignof__ (int32_t) == 0);
2426 ++cnt;
2428 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_GAP3));
2429 iov[2 + cnt].iov_base = NULL;
2430 iov[2 + cnt].iov_len = 0;
2431 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2432 assert (idx[cnt] % __alignof__ (int32_t) == 0);
2433 ++cnt;
2435 /* Since we are using the sign of an integer to mark indirection the
2436 offsets in the arrays we are indirectly referring to must not be
2437 zero since -0 == 0. Therefore we add a bit of dummy content. */
2438 obstack_int32_grow (&extrapool, 0);
2439 obstack_int32_grow (&indirectpool, 0);
2441 /* Now insert the `UNDEFINED' value if it is used. Since this value
2442 will probably be used more than once it is good to store the
2443 weights only once. */
2444 if (output_weightwc (&weightpool, collate, &collate->undefined) != 0)
2445 abort ();
2447 /* Generate the table. Walk through the lists of sequences starting
2448 with the same wide character and add them one after the other to
2449 the table. In case we have more than one sequence starting with
2450 the same byte we have to use extra indirection. */
2451 tablewc.p = 6;
2452 tablewc.q = 10;
2453 collidx_table_init (&tablewc);
2455 atwc.weightpool = &weightpool;
2456 atwc.extrapool = &extrapool;
2457 atwc.indpool = &indirectpool;
2458 atwc.collate = collate;
2459 atwc.tablewc = &tablewc;
2461 wchead_table_iterate (&collate->wcheads, add_to_tablewc);
2463 memset (&atwc, 0, sizeof (atwc));
2465 collidx_table_finalize (&tablewc);
2467 /* Now add the four tables. */
2468 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_TABLEWC));
2469 iov[2 + cnt].iov_base = tablewc.result;
2470 iov[2 + cnt].iov_len = tablewc.result_size;
2471 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2472 assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2473 assert (idx[cnt] % __alignof__ (int32_t) == 0);
2474 ++cnt;
2476 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_WEIGHTWC));
2477 iov[2 + cnt].iov_len = obstack_object_size (&weightpool);
2478 iov[2 + cnt].iov_base = obstack_finish (&weightpool);
2479 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2480 assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2481 assert (idx[cnt] % __alignof__ (int32_t) == 0);
2482 ++cnt;
2484 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_EXTRAWC));
2485 iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
2486 iov[2 + cnt].iov_base = obstack_finish (&extrapool);
2487 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2488 assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2489 assert (idx[cnt] % __alignof__ (int32_t) == 0);
2490 ++cnt;
2492 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_INDIRECTWC));
2493 iov[2 + cnt].iov_len = obstack_object_size (&indirectpool);
2494 iov[2 + cnt].iov_base = obstack_finish (&indirectpool);
2495 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2496 assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2497 assert (idx[cnt] % __alignof__ (int32_t) == 0);
2498 ++cnt;
2501 /* Finally write the table with collation element names out. It is
2502 a hash table with a simple function which gets the name of the
2503 character as the input. One character might have many names. The
2504 value associated with the name is an index into the weight table
2505 where we are then interested in the first-level weight value.
2507 To determine how large the table should be we are counting the
2508 elements have to put in. Since we are using internal chaining
2509 using a secondary hash function we have to make the table a bit
2510 larger to avoid extremely long search times. We can achieve
2511 good results with a 40% larger table than there are entries. */
2512 elem_size = 0;
2513 runp = collate->start;
2514 while (runp != NULL)
2516 if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
2517 /* Yep, the element really counts. */
2518 ++elem_size;
2520 runp = runp->next;
2522 /* Add 40% and find the next prime number. */
2523 elem_size = next_prime (elem_size * 1.4);
2525 /* Allocate the table. Each entry consists of two words: the hash
2526 value and an index in a secondary table which provides the index
2527 into the weight table and the string itself (so that a match can
2528 be determined). */
2529 elem_table = (uint32_t *) obstack_alloc (&extrapool,
2530 elem_size * 2 * sizeof (uint32_t));
2531 memset (elem_table, '\0', elem_size * 2 * sizeof (uint32_t));
2533 /* Now add the elements. */
2534 runp = collate->start;
2535 while (runp != NULL)
2537 if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
2539 /* Compute the hash value of the name. */
2540 uint32_t namelen = strlen (runp->name);
2541 uint32_t hash = elem_hash (runp->name, namelen);
2542 size_t idx = hash % elem_size;
2543 #ifndef NDEBUG
2544 size_t start_idx = idx;
2545 #endif
2547 if (elem_table[idx * 2] != 0)
2549 /* The spot is already taken. Try iterating using the value
2550 from the secondary hashing function. */
2551 size_t iter = hash % (elem_size - 2) + 1;
2555 idx += iter;
2556 if (idx >= elem_size)
2557 idx -= elem_size;
2558 assert (idx != start_idx);
2560 while (elem_table[idx * 2] != 0);
2562 /* This is the spot where we will insert the value. */
2563 elem_table[idx * 2] = hash;
2564 elem_table[idx * 2 + 1] = obstack_object_size (&extrapool);
2566 /* The the string itself including length. */
2567 obstack_1grow (&extrapool, namelen);
2568 obstack_grow (&extrapool, runp->name, namelen);
2570 /* And the multibyte representation. */
2571 obstack_1grow (&extrapool, runp->nmbs);
2572 obstack_grow (&extrapool, runp->mbs, runp->nmbs);
2574 /* And align again to 32 bits. */
2575 if ((1 + namelen + 1 + runp->nmbs) % sizeof (int32_t) != 0)
2576 obstack_grow (&extrapool, "\0\0",
2577 (sizeof (int32_t)
2578 - ((1 + namelen + 1 + runp->nmbs)
2579 % sizeof (int32_t))));
2581 /* Now some 32-bit values: multibyte collation sequence,
2582 wide char string (including length), and wide char
2583 collation sequence. */
2584 obstack_int32_grow (&extrapool, runp->mbseqorder);
2586 obstack_int32_grow (&extrapool, runp->nwcs);
2587 obstack_grow (&extrapool, runp->wcs,
2588 runp->nwcs * sizeof (uint32_t));
2590 obstack_int32_grow (&extrapool, runp->wcseqorder);
2593 runp = runp->next;
2596 /* Prepare to write out this data. */
2597 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB));
2598 iov[2 + cnt].iov_base = &elem_size;
2599 iov[2 + cnt].iov_len = sizeof (int32_t);
2600 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2601 assert (idx[cnt] % __alignof__ (int32_t) == 0);
2602 ++cnt;
2604 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_TABLEMB));
2605 iov[2 + cnt].iov_base = elem_table;
2606 iov[2 + cnt].iov_len = elem_size * 2 * sizeof (int32_t);
2607 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2608 assert (idx[cnt] % __alignof__ (int32_t) == 0);
2609 ++cnt;
2611 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_EXTRAMB));
2612 iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
2613 iov[2 + cnt].iov_base = obstack_finish (&extrapool);
2614 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2615 ++cnt;
2617 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQMB));
2618 iov[2 + cnt].iov_base = collate->mbseqorder;
2619 iov[2 + cnt].iov_len = 256;
2620 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2621 ++cnt;
2623 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQWC));
2624 iov[2 + cnt].iov_base = collate->wcseqorder.result;
2625 iov[2 + cnt].iov_len = collate->wcseqorder.result_size;
2626 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2627 assert (idx[cnt] % __alignof__ (int32_t) == 0);
2628 ++cnt;
2630 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_CODESET));
2631 iov[2 + cnt].iov_base = (void *) charmap->code_set_name;
2632 iov[2 + cnt].iov_len = strlen (iov[2 + cnt].iov_base) + 1;
2633 ++cnt;
2635 assert (cnt == _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE));
2637 write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", 2 + cnt, iov);
2639 obstack_free (&weightpool, NULL);
2640 obstack_free (&extrapool, NULL);
2641 obstack_free (&indirectpool, NULL);
2645 static enum token_t
2646 skip_to (struct linereader *ldfile, struct locale_collate_t *collate,
2647 const struct charmap_t *charmap, int to_endif)
2649 while (1)
2651 struct token *now = lr_token (ldfile, charmap, NULL, NULL, 0);
2652 enum token_t nowtok = now->tok;
2654 if (nowtok == tok_eof || nowtok == tok_end)
2655 return nowtok;
2657 if (nowtok == tok_ifdef || nowtok == tok_ifndef)
2659 lr_error (ldfile, _("%s: nested conditionals not supported"),
2660 "LC_COLLATE");
2661 nowtok = skip_to (ldfile, collate, charmap, tok_endif);
2662 if (nowtok == tok_eof || nowtok == tok_end)
2663 return nowtok;
2665 else if (nowtok == tok_endif || (!to_endif && nowtok == tok_else))
2667 lr_ignore_rest (ldfile, 1);
2668 return nowtok;
2670 else if (!to_endif && (nowtok == tok_elifdef || nowtok == tok_elifndef))
2672 /* Do not read the rest of the line. */
2673 return nowtok;
2675 else if (nowtok == tok_else)
2677 lr_error (ldfile, _("%s: more then one 'else'"), "LC_COLLATE");
2680 lr_ignore_rest (ldfile, 0);
2685 void
2686 collate_read (struct linereader *ldfile, struct localedef_t *result,
2687 const struct charmap_t *charmap, const char *repertoire_name,
2688 int ignore_content)
2690 struct repertoire_t *repertoire = NULL;
2691 struct locale_collate_t *collate;
2692 struct token *now;
2693 struct token *arg = NULL;
2694 enum token_t nowtok;
2695 enum token_t was_ellipsis = tok_none;
2696 struct localedef_t *copy_locale = NULL;
2697 /* Parsing state:
2698 0 - start
2699 1 - between `order-start' and `order-end'
2700 2 - after `order-end'
2701 3 - after `reorder-after', waiting for `reorder-end'
2702 4 - after `reorder-end'
2703 5 - after `reorder-sections-after', waiting for `reorder-sections-end'
2704 6 - after `reorder-sections-end'
2706 int state = 0;
2708 /* Get the repertoire we have to use. */
2709 if (repertoire_name != NULL)
2710 repertoire = repertoire_read (repertoire_name);
2712 /* The rest of the line containing `LC_COLLATE' must be free. */
2713 lr_ignore_rest (ldfile, 1);
2715 while (1)
2719 now = lr_token (ldfile, charmap, result, NULL, verbose);
2720 nowtok = now->tok;
2722 while (nowtok == tok_eol);
2724 if (nowtok != tok_define)
2725 break;
2727 if (ignore_content)
2728 lr_ignore_rest (ldfile, 0);
2729 else
2731 arg = lr_token (ldfile, charmap, result, NULL, verbose);
2732 if (arg->tok != tok_ident)
2733 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2734 else
2736 /* Simply add the new symbol. */
2737 struct name_list *newsym = xmalloc (sizeof (*newsym)
2738 + arg->val.str.lenmb + 1);
2739 memcpy (newsym->str, arg->val.str.startmb, arg->val.str.lenmb);
2740 newsym->str[arg->val.str.lenmb] = '\0';
2741 newsym->next = defined;
2742 defined = newsym;
2744 lr_ignore_rest (ldfile, 1);
2749 if (nowtok == tok_copy)
2751 now = lr_token (ldfile, charmap, result, NULL, verbose);
2752 if (now->tok != tok_string)
2754 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2756 skip_category:
2758 now = lr_token (ldfile, charmap, result, NULL, verbose);
2759 while (now->tok != tok_eof && now->tok != tok_end);
2761 if (now->tok != tok_eof
2762 || (now = lr_token (ldfile, charmap, result, NULL, verbose),
2763 now->tok == tok_eof))
2764 lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
2765 else if (now->tok != tok_lc_collate)
2767 lr_error (ldfile, _("\
2768 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
2769 lr_ignore_rest (ldfile, 0);
2771 else
2772 lr_ignore_rest (ldfile, 1);
2774 return;
2777 if (! ignore_content)
2779 /* Get the locale definition. */
2780 copy_locale = load_locale (LC_COLLATE, now->val.str.startmb,
2781 repertoire_name, charmap, NULL);
2782 if ((copy_locale->avail & COLLATE_LOCALE) == 0)
2784 /* Not yet loaded. So do it now. */
2785 if (locfile_read (copy_locale, charmap) != 0)
2786 goto skip_category;
2789 if (copy_locale->categories[LC_COLLATE].collate == NULL)
2790 return;
2793 lr_ignore_rest (ldfile, 1);
2795 now = lr_token (ldfile, charmap, result, NULL, verbose);
2796 nowtok = now->tok;
2799 /* Prepare the data structures. */
2800 collate_startup (ldfile, result, copy_locale, ignore_content);
2801 collate = result->categories[LC_COLLATE].collate;
2803 while (1)
2805 char ucs4buf[10];
2806 char *symstr;
2807 size_t symlen;
2809 /* Of course we don't proceed beyond the end of file. */
2810 if (nowtok == tok_eof)
2811 break;
2813 /* Ingore empty lines. */
2814 if (nowtok == tok_eol)
2816 now = lr_token (ldfile, charmap, result, NULL, verbose);
2817 nowtok = now->tok;
2818 continue;
2821 switch (nowtok)
2823 case tok_copy:
2824 /* Allow copying other locales. */
2825 now = lr_token (ldfile, charmap, result, NULL, verbose);
2826 if (now->tok != tok_string)
2827 goto err_label;
2829 if (! ignore_content)
2830 load_locale (LC_COLLATE, now->val.str.startmb, repertoire_name,
2831 charmap, result);
2833 lr_ignore_rest (ldfile, 1);
2834 break;
2836 case tok_coll_weight_max:
2837 /* Ignore the rest of the line if we don't need the input of
2838 this line. */
2839 if (ignore_content)
2841 lr_ignore_rest (ldfile, 0);
2842 break;
2845 if (state != 0)
2846 goto err_label;
2848 arg = lr_token (ldfile, charmap, result, NULL, verbose);
2849 if (arg->tok != tok_number)
2850 goto err_label;
2851 if (collate->col_weight_max != -1)
2852 lr_error (ldfile, _("%s: duplicate definition of `%s'"),
2853 "LC_COLLATE", "col_weight_max");
2854 else
2855 collate->col_weight_max = arg->val.num;
2856 lr_ignore_rest (ldfile, 1);
2857 break;
2859 case tok_section_symbol:
2860 /* Ignore the rest of the line if we don't need the input of
2861 this line. */
2862 if (ignore_content)
2864 lr_ignore_rest (ldfile, 0);
2865 break;
2868 if (state != 0)
2869 goto err_label;
2871 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2872 if (arg->tok != tok_bsymbol)
2873 goto err_label;
2874 else if (!ignore_content)
2876 /* Check whether this section is already known. */
2877 struct section_list *known = collate->sections;
2878 while (known != NULL)
2880 if (strcmp (known->name, arg->val.str.startmb) == 0)
2881 break;
2882 known = known->next;
2885 if (known != NULL)
2887 lr_error (ldfile,
2888 _("%s: duplicate declaration of section `%s'"),
2889 "LC_COLLATE", arg->val.str.startmb);
2890 free (arg->val.str.startmb);
2892 else
2893 collate->sections = make_seclist_elem (collate,
2894 arg->val.str.startmb,
2895 collate->sections);
2897 lr_ignore_rest (ldfile, known == NULL);
2899 else
2901 free (arg->val.str.startmb);
2902 lr_ignore_rest (ldfile, 0);
2904 break;
2906 case tok_collating_element:
2907 /* Ignore the rest of the line if we don't need the input of
2908 this line. */
2909 if (ignore_content)
2911 lr_ignore_rest (ldfile, 0);
2912 break;
2915 if (state != 0 && state != 2)
2916 goto err_label;
2918 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2919 if (arg->tok != tok_bsymbol)
2920 goto err_label;
2921 else
2923 const char *symbol = arg->val.str.startmb;
2924 size_t symbol_len = arg->val.str.lenmb;
2926 /* Next the `from' keyword. */
2927 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2928 if (arg->tok != tok_from)
2930 free ((char *) symbol);
2931 goto err_label;
2934 ldfile->return_widestr = 1;
2935 ldfile->translate_strings = 1;
2937 /* Finally the string with the replacement. */
2938 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2940 ldfile->return_widestr = 0;
2941 ldfile->translate_strings = 0;
2943 if (arg->tok != tok_string)
2944 goto err_label;
2946 if (!ignore_content && symbol != NULL)
2948 /* The name is already defined. */
2949 if (check_duplicate (ldfile, collate, charmap,
2950 repertoire, symbol, symbol_len))
2951 goto col_elem_free;
2953 if (arg->val.str.startmb != NULL)
2954 insert_entry (&collate->elem_table, symbol, symbol_len,
2955 new_element (collate,
2956 arg->val.str.startmb,
2957 arg->val.str.lenmb - 1,
2958 arg->val.str.startwc,
2959 symbol, symbol_len, 0));
2961 else
2963 col_elem_free:
2964 free ((char *) symbol);
2965 free (arg->val.str.startmb);
2966 free (arg->val.str.startwc);
2968 lr_ignore_rest (ldfile, 1);
2970 break;
2972 case tok_collating_symbol:
2973 /* Ignore the rest of the line if we don't need the input of
2974 this line. */
2975 if (ignore_content)
2977 lr_ignore_rest (ldfile, 0);
2978 break;
2981 if (state != 0 && state != 2)
2982 goto err_label;
2984 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2985 if (arg->tok != tok_bsymbol)
2986 goto err_label;
2987 else
2989 char *symbol = arg->val.str.startmb;
2990 size_t symbol_len = arg->val.str.lenmb;
2991 char *endsymbol = NULL;
2992 size_t endsymbol_len = 0;
2993 enum token_t ellipsis = tok_none;
2995 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2996 if (arg->tok == tok_ellipsis2 || arg->tok == tok_ellipsis4)
2998 ellipsis = arg->tok;
3000 arg = lr_token (ldfile, charmap, result, repertoire,
3001 verbose);
3002 if (arg->tok != tok_bsymbol)
3004 free (symbol);
3005 goto err_label;
3008 endsymbol = arg->val.str.startmb;
3009 endsymbol_len = arg->val.str.lenmb;
3011 lr_ignore_rest (ldfile, 1);
3013 else if (arg->tok != tok_eol)
3015 free (symbol);
3016 goto err_label;
3019 if (!ignore_content)
3021 if (symbol == NULL
3022 || (ellipsis != tok_none && endsymbol == NULL))
3024 lr_error (ldfile, _("\
3025 %s: unknown character in collating symbol name"),
3026 "LC_COLLATE");
3027 goto col_sym_free;
3029 else if (ellipsis == tok_none)
3031 /* A single symbol, no ellipsis. */
3032 if (check_duplicate (ldfile, collate, charmap,
3033 repertoire, symbol, symbol_len))
3034 /* The name is already defined. */
3035 goto col_sym_free;
3037 insert_entry (&collate->sym_table, symbol, symbol_len,
3038 new_symbol (collate, symbol, symbol_len));
3040 else if (symbol_len != endsymbol_len)
3042 col_sym_inv_range:
3043 lr_error (ldfile,
3044 _("invalid names for character range"));
3045 goto col_sym_free;
3047 else
3049 /* Oh my, we have to handle an ellipsis. First, as
3050 usual, determine the common prefix and then
3051 convert the rest into a range. */
3052 size_t prefixlen;
3053 unsigned long int from;
3054 unsigned long int to;
3055 char *endp;
3057 for (prefixlen = 0; prefixlen < symbol_len; ++prefixlen)
3058 if (symbol[prefixlen] != endsymbol[prefixlen])
3059 break;
3061 /* Convert the rest into numbers. */
3062 symbol[symbol_len] = '\0';
3063 from = strtoul (&symbol[prefixlen], &endp,
3064 ellipsis == tok_ellipsis2 ? 16 : 10);
3065 if (*endp != '\0')
3066 goto col_sym_inv_range;
3068 endsymbol[symbol_len] = '\0';
3069 to = strtoul (&endsymbol[prefixlen], &endp,
3070 ellipsis == tok_ellipsis2 ? 16 : 10);
3071 if (*endp != '\0')
3072 goto col_sym_inv_range;
3074 if (from > to)
3075 goto col_sym_inv_range;
3077 /* Now loop over all entries. */
3078 while (from <= to)
3080 char *symbuf;
3082 symbuf = (char *) obstack_alloc (&collate->mempool,
3083 symbol_len + 1);
3085 /* Create the name. */
3086 sprintf (symbuf,
3087 ellipsis == tok_ellipsis2
3088 ? "%.*s%.*lX" : "%.*s%.*lu",
3089 (int) prefixlen, symbol,
3090 (int) (symbol_len - prefixlen), from);
3092 if (check_duplicate (ldfile, collate, charmap,
3093 repertoire, symbuf, symbol_len))
3094 /* The name is already defined. */
3095 goto col_sym_free;
3097 insert_entry (&collate->sym_table, symbuf,
3098 symbol_len,
3099 new_symbol (collate, symbuf,
3100 symbol_len));
3102 /* Increment the counter. */
3103 ++from;
3106 goto col_sym_free;
3109 else
3111 col_sym_free:
3112 free (symbol);
3113 free (endsymbol);
3116 break;
3118 case tok_symbol_equivalence:
3119 /* Ignore the rest of the line if we don't need the input of
3120 this line. */
3121 if (ignore_content)
3123 lr_ignore_rest (ldfile, 0);
3124 break;
3127 if (state != 0)
3128 goto err_label;
3130 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3131 if (arg->tok != tok_bsymbol)
3132 goto err_label;
3133 else
3135 const char *newname = arg->val.str.startmb;
3136 size_t newname_len = arg->val.str.lenmb;
3137 const char *symname;
3138 size_t symname_len;
3139 void *symval; /* Actually struct symbol_t* */
3141 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3142 if (arg->tok != tok_bsymbol)
3144 free ((char *) newname);
3145 goto err_label;
3148 symname = arg->val.str.startmb;
3149 symname_len = arg->val.str.lenmb;
3151 if (newname == NULL)
3153 lr_error (ldfile, _("\
3154 %s: unknown character in equivalent definition name"),
3155 "LC_COLLATE");
3157 sym_equiv_free:
3158 free ((char *) newname);
3159 free ((char *) symname);
3160 break;
3162 if (symname == NULL)
3164 lr_error (ldfile, _("\
3165 %s: unknown character in equivalent definition value"),
3166 "LC_COLLATE");
3167 goto sym_equiv_free;
3170 /* See whether the symbol name is already defined. */
3171 if (find_entry (&collate->sym_table, symname, symname_len,
3172 &symval) != 0)
3174 lr_error (ldfile, _("\
3175 %s: unknown symbol `%s' in equivalent definition"),
3176 "LC_COLLATE", symname);
3177 goto sym_equiv_free;
3180 if (insert_entry (&collate->sym_table,
3181 newname, newname_len, symval) < 0)
3183 lr_error (ldfile, _("\
3184 error while adding equivalent collating symbol"));
3185 goto sym_equiv_free;
3188 free ((char *) symname);
3190 lr_ignore_rest (ldfile, 1);
3191 break;
3193 case tok_script:
3194 /* Ignore the rest of the line if we don't need the input of
3195 this line. */
3196 if (ignore_content)
3198 lr_ignore_rest (ldfile, 0);
3199 break;
3202 /* We get told about the scripts we know. */
3203 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3204 if (arg->tok != tok_bsymbol)
3205 goto err_label;
3206 else
3208 struct section_list *runp = collate->known_sections;
3209 char *name;
3211 while (runp != NULL)
3212 if (strncmp (runp->name, arg->val.str.startmb,
3213 arg->val.str.lenmb) == 0
3214 && runp->name[arg->val.str.lenmb] == '\0')
3215 break;
3216 else
3217 runp = runp->def_next;
3219 if (runp != NULL)
3221 lr_error (ldfile, _("duplicate definition of script `%s'"),
3222 runp->name);
3223 lr_ignore_rest (ldfile, 0);
3224 break;
3227 runp = (struct section_list *) xcalloc (1, sizeof (*runp));
3228 name = (char *) xmalloc (arg->val.str.lenmb + 1);
3229 memcpy (name, arg->val.str.startmb, arg->val.str.lenmb);
3230 name[arg->val.str.lenmb] = '\0';
3231 runp->name = name;
3233 runp->def_next = collate->known_sections;
3234 collate->known_sections = runp;
3236 lr_ignore_rest (ldfile, 1);
3237 break;
3239 case tok_order_start:
3240 /* Ignore the rest of the line if we don't need the input of
3241 this line. */
3242 if (ignore_content)
3244 lr_ignore_rest (ldfile, 0);
3245 break;
3248 if (state != 0 && state != 1 && state != 2)
3249 goto err_label;
3250 state = 1;
3252 /* The 14652 draft does not specify whether all `order_start' lines
3253 must contain the same number of sort-rules, but 14651 does. So
3254 we require this here as well. */
3255 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3256 if (arg->tok == tok_bsymbol)
3258 /* This better should be a section name. */
3259 struct section_list *sp = collate->known_sections;
3260 while (sp != NULL
3261 && (sp->name == NULL
3262 || strncmp (sp->name, arg->val.str.startmb,
3263 arg->val.str.lenmb) != 0
3264 || sp->name[arg->val.str.lenmb] != '\0'))
3265 sp = sp->def_next;
3267 if (sp == NULL)
3269 lr_error (ldfile, _("\
3270 %s: unknown section name `%.*s'"),
3271 "LC_COLLATE", (int) arg->val.str.lenmb,
3272 arg->val.str.startmb);
3273 /* We use the error section. */
3274 collate->current_section = &collate->error_section;
3276 if (collate->error_section.first == NULL)
3278 /* Insert &collate->error_section at the end of
3279 the collate->sections list. */
3280 if (collate->sections == NULL)
3281 collate->sections = &collate->error_section;
3282 else
3284 sp = collate->sections;
3285 while (sp->next != NULL)
3286 sp = sp->next;
3288 sp->next = &collate->error_section;
3290 collate->error_section.next = NULL;
3293 else
3295 /* One should not be allowed to open the same
3296 section twice. */
3297 if (sp->first != NULL)
3298 lr_error (ldfile, _("\
3299 %s: multiple order definitions for section `%s'"),
3300 "LC_COLLATE", sp->name);
3301 else
3303 /* Insert sp in the collate->sections list,
3304 right after collate->current_section. */
3305 if (collate->current_section != NULL)
3307 sp->next = collate->current_section->next;
3308 collate->current_section->next = sp;
3310 else if (collate->sections == NULL)
3311 /* This is the first section to be defined. */
3312 collate->sections = sp;
3314 collate->current_section = sp;
3317 /* Next should come the end of the line or a semicolon. */
3318 arg = lr_token (ldfile, charmap, result, repertoire,
3319 verbose);
3320 if (arg->tok == tok_eol)
3322 uint32_t cnt;
3324 /* This means we have exactly one rule: `forward'. */
3325 if (nrules > 1)
3326 lr_error (ldfile, _("\
3327 %s: invalid number of sorting rules"),
3328 "LC_COLLATE");
3329 else
3330 nrules = 1;
3331 sp->rules = obstack_alloc (&collate->mempool,
3332 (sizeof (enum coll_sort_rule)
3333 * nrules));
3334 for (cnt = 0; cnt < nrules; ++cnt)
3335 sp->rules[cnt] = sort_forward;
3337 /* Next line. */
3338 break;
3341 /* Get the next token. */
3342 arg = lr_token (ldfile, charmap, result, repertoire,
3343 verbose);
3346 else
3348 /* There is no section symbol. Therefore we use the unnamed
3349 section. */
3350 collate->current_section = &collate->unnamed_section;
3352 if (collate->unnamed_section.first != NULL)
3353 lr_error (ldfile, _("\
3354 %s: multiple order definitions for unnamed section"),
3355 "LC_COLLATE");
3356 else
3358 /* Insert &collate->unnamed_section at the beginning of
3359 the collate->sections list. */
3360 collate->unnamed_section.next = collate->sections;
3361 collate->sections = &collate->unnamed_section;
3365 /* Now read the direction names. */
3366 read_directions (ldfile, arg, charmap, repertoire, result);
3368 /* From now we need the strings untranslated. */
3369 ldfile->translate_strings = 0;
3370 break;
3372 case tok_order_end:
3373 /* Ignore the rest of the line if we don't need the input of
3374 this line. */
3375 if (ignore_content)
3377 lr_ignore_rest (ldfile, 0);
3378 break;
3381 if (state != 1)
3382 goto err_label;
3384 /* Handle ellipsis at end of list. */
3385 if (was_ellipsis != tok_none)
3387 handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3388 repertoire, result);
3389 was_ellipsis = tok_none;
3392 state = 2;
3393 lr_ignore_rest (ldfile, 1);
3394 break;
3396 case tok_reorder_after:
3397 /* Ignore the rest of the line if we don't need the input of
3398 this line. */
3399 if (ignore_content)
3401 lr_ignore_rest (ldfile, 0);
3402 break;
3405 if (state == 1)
3407 lr_error (ldfile, _("%s: missing `order_end' keyword"),
3408 "LC_COLLATE");
3409 state = 2;
3411 /* Handle ellipsis at end of list. */
3412 if (was_ellipsis != tok_none)
3414 handle_ellipsis (ldfile, arg->val.str.startmb,
3415 arg->val.str.lenmb, was_ellipsis, charmap,
3416 repertoire, result);
3417 was_ellipsis = tok_none;
3420 else if (state == 0 && copy_locale == NULL)
3421 goto err_label;
3422 else if (state != 0 && state != 2 && state != 3)
3423 goto err_label;
3424 state = 3;
3426 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3427 if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
3429 /* Find this symbol in the sequence table. */
3430 char ucsbuf[10];
3431 char *startmb;
3432 size_t lenmb;
3433 struct element_t *insp;
3434 int no_error = 1;
3435 void *ptr;
3437 if (arg->tok == tok_bsymbol)
3439 startmb = arg->val.str.startmb;
3440 lenmb = arg->val.str.lenmb;
3442 else
3444 sprintf (ucsbuf, "U%08X", arg->val.ucs4);
3445 startmb = ucsbuf;
3446 lenmb = 9;
3449 if (find_entry (&collate->seq_table, startmb, lenmb, &ptr) == 0)
3450 /* Yes, the symbol exists. Simply point the cursor
3451 to it. */
3452 collate->cursor = (struct element_t *) ptr;
3453 else
3455 struct symbol_t *symbp;
3456 void *ptr;
3458 if (find_entry (&collate->sym_table, startmb, lenmb,
3459 &ptr) == 0)
3461 symbp = ptr;
3463 if (symbp->order->last != NULL
3464 || symbp->order->next != NULL)
3465 collate->cursor = symbp->order;
3466 else
3468 /* This is a collating symbol but its position
3469 is not yet defined. */
3470 lr_error (ldfile, _("\
3471 %s: order for collating symbol %.*s not yet defined"),
3472 "LC_COLLATE", (int) lenmb, startmb);
3473 collate->cursor = NULL;
3474 no_error = 0;
3477 else if (find_entry (&collate->elem_table, startmb, lenmb,
3478 &ptr) == 0)
3480 insp = (struct element_t *) ptr;
3482 if (insp->last != NULL || insp->next != NULL)
3483 collate->cursor = insp;
3484 else
3486 /* This is a collating element but its position
3487 is not yet defined. */
3488 lr_error (ldfile, _("\
3489 %s: order for collating element %.*s not yet defined"),
3490 "LC_COLLATE", (int) lenmb, startmb);
3491 collate->cursor = NULL;
3492 no_error = 0;
3495 else
3497 /* This is bad. The symbol after which we have to
3498 insert does not exist. */
3499 lr_error (ldfile, _("\
3500 %s: cannot reorder after %.*s: symbol not known"),
3501 "LC_COLLATE", (int) lenmb, startmb);
3502 collate->cursor = NULL;
3503 no_error = 0;
3507 lr_ignore_rest (ldfile, no_error);
3509 else
3510 /* This must not happen. */
3511 goto err_label;
3512 break;
3514 case tok_reorder_end:
3515 /* Ignore the rest of the line if we don't need the input of
3516 this line. */
3517 if (ignore_content)
3518 break;
3520 if (state != 3)
3521 goto err_label;
3522 state = 4;
3523 lr_ignore_rest (ldfile, 1);
3524 break;
3526 case tok_reorder_sections_after:
3527 /* Ignore the rest of the line if we don't need the input of
3528 this line. */
3529 if (ignore_content)
3531 lr_ignore_rest (ldfile, 0);
3532 break;
3535 if (state == 1)
3537 lr_error (ldfile, _("%s: missing `order_end' keyword"),
3538 "LC_COLLATE");
3539 state = 2;
3541 /* Handle ellipsis at end of list. */
3542 if (was_ellipsis != tok_none)
3544 handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3545 repertoire, result);
3546 was_ellipsis = tok_none;
3549 else if (state == 3)
3551 WITH_CUR_LOCALE (error (0, 0, _("\
3552 %s: missing `reorder-end' keyword"), "LC_COLLATE"));
3553 state = 4;
3555 else if (state != 2 && state != 4)
3556 goto err_label;
3557 state = 5;
3559 /* Get the name of the sections we are adding after. */
3560 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3561 if (arg->tok == tok_bsymbol)
3563 /* Now find a section with this name. */
3564 struct section_list *runp = collate->sections;
3566 while (runp != NULL)
3568 if (runp->name != NULL
3569 && strlen (runp->name) == arg->val.str.lenmb
3570 && memcmp (runp->name, arg->val.str.startmb,
3571 arg->val.str.lenmb) == 0)
3572 break;
3574 runp = runp->next;
3577 if (runp != NULL)
3578 collate->current_section = runp;
3579 else
3581 /* This is bad. The section after which we have to
3582 reorder does not exist. Therefore we cannot
3583 process the whole rest of this reorder
3584 specification. */
3585 lr_error (ldfile, _("%s: section `%.*s' not known"),
3586 "LC_COLLATE", (int) arg->val.str.lenmb,
3587 arg->val.str.startmb);
3591 lr_ignore_rest (ldfile, 0);
3593 now = lr_token (ldfile, charmap, result, NULL, verbose);
3595 while (now->tok == tok_reorder_sections_after
3596 || now->tok == tok_reorder_sections_end
3597 || now->tok == tok_end);
3599 /* Process the token we just saw. */
3600 nowtok = now->tok;
3601 continue;
3604 else
3605 /* This must not happen. */
3606 goto err_label;
3607 break;
3609 case tok_reorder_sections_end:
3610 /* Ignore the rest of the line if we don't need the input of
3611 this line. */
3612 if (ignore_content)
3613 break;
3615 if (state != 5)
3616 goto err_label;
3617 state = 6;
3618 lr_ignore_rest (ldfile, 1);
3619 break;
3621 case tok_bsymbol:
3622 case tok_ucs4:
3623 /* Ignore the rest of the line if we don't need the input of
3624 this line. */
3625 if (ignore_content)
3627 lr_ignore_rest (ldfile, 0);
3628 break;
3631 if (state != 0 && state != 1 && state != 3 && state != 5)
3632 goto err_label;
3634 if ((state == 0 || state == 5) && nowtok == tok_ucs4)
3635 goto err_label;
3637 if (nowtok == tok_ucs4)
3639 snprintf (ucs4buf, sizeof (ucs4buf), "U%08X", now->val.ucs4);
3640 symstr = ucs4buf;
3641 symlen = 9;
3643 else if (arg != NULL)
3645 symstr = arg->val.str.startmb;
3646 symlen = arg->val.str.lenmb;
3648 else
3650 lr_error (ldfile, _("%s: bad symbol <%.*s>"), "LC_COLLATE",
3651 (int) ldfile->token.val.str.lenmb,
3652 ldfile->token.val.str.startmb);
3653 break;
3656 struct element_t *seqp;
3657 if (state == 0)
3659 /* We are outside an `order_start' region. This means
3660 we must only accept definitions of values for
3661 collation symbols since these are purely abstract
3662 values and don't need directions associated. */
3663 void *ptr;
3665 if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == 0)
3667 seqp = ptr;
3669 /* It's already defined. First check whether this
3670 is really a collating symbol. */
3671 if (seqp->is_character)
3672 goto err_label;
3674 goto move_entry;
3676 else
3678 void *result;
3680 if (find_entry (&collate->sym_table, symstr, symlen,
3681 &result) != 0)
3682 /* No collating symbol, it's an error. */
3683 goto err_label;
3685 /* Maybe this is the first time we define a symbol
3686 value and it is before the first actual section. */
3687 if (collate->sections == NULL)
3688 collate->sections = collate->current_section =
3689 &collate->symbol_section;
3692 if (was_ellipsis != tok_none)
3694 handle_ellipsis (ldfile, symstr, symlen, was_ellipsis,
3695 charmap, repertoire, result);
3697 /* Remember that we processed the ellipsis. */
3698 was_ellipsis = tok_none;
3700 /* And don't add the value a second time. */
3701 break;
3704 else if (state == 3)
3706 /* It is possible that we already have this collation sequence.
3707 In this case we move the entry. */
3708 void *sym;
3709 void *ptr;
3711 /* If the symbol after which we have to insert was not found
3712 ignore all entries. */
3713 if (collate->cursor == NULL)
3715 lr_ignore_rest (ldfile, 0);
3716 break;
3719 if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == 0)
3721 seqp = (struct element_t *) ptr;
3722 goto move_entry;
3725 if (find_entry (&collate->sym_table, symstr, symlen, &sym) == 0
3726 && (seqp = ((struct symbol_t *) sym)->order) != NULL)
3727 goto move_entry;
3729 if (find_entry (&collate->elem_table, symstr, symlen, &ptr) == 0
3730 && (seqp = (struct element_t *) ptr,
3731 seqp->last != NULL || seqp->next != NULL
3732 || (collate->start != NULL && seqp == collate->start)))
3734 move_entry:
3735 /* Remove the entry from the old position. */
3736 if (seqp->last == NULL)
3737 collate->start = seqp->next;
3738 else
3739 seqp->last->next = seqp->next;
3740 if (seqp->next != NULL)
3741 seqp->next->last = seqp->last;
3743 /* We also have to check whether this entry is the
3744 first or last of a section. */
3745 if (seqp->section->first == seqp)
3747 if (seqp->section->first == seqp->section->last)
3748 /* This section has no content anymore. */
3749 seqp->section->first = seqp->section->last = NULL;
3750 else
3751 seqp->section->first = seqp->next;
3753 else if (seqp->section->last == seqp)
3754 seqp->section->last = seqp->last;
3756 /* Now insert it in the new place. */
3757 insert_weights (ldfile, seqp, charmap, repertoire, result,
3758 tok_none);
3759 break;
3762 /* Otherwise we just add a new entry. */
3764 else if (state == 5)
3766 /* We are reordering sections. Find the named section. */
3767 struct section_list *runp = collate->sections;
3768 struct section_list *prevp = NULL;
3770 while (runp != NULL)
3772 if (runp->name != NULL
3773 && strlen (runp->name) == symlen
3774 && memcmp (runp->name, symstr, symlen) == 0)
3775 break;
3777 prevp = runp;
3778 runp = runp->next;
3781 if (runp == NULL)
3783 lr_error (ldfile, _("%s: section `%.*s' not known"),
3784 "LC_COLLATE", (int) symlen, symstr);
3785 lr_ignore_rest (ldfile, 0);
3787 else
3789 if (runp != collate->current_section)
3791 /* Remove the named section from the old place and
3792 insert it in the new one. */
3793 prevp->next = runp->next;
3795 runp->next = collate->current_section->next;
3796 collate->current_section->next = runp;
3797 collate->current_section = runp;
3800 /* Process the rest of the line which might change
3801 the collation rules. */
3802 arg = lr_token (ldfile, charmap, result, repertoire,
3803 verbose);
3804 if (arg->tok != tok_eof && arg->tok != tok_eol)
3805 read_directions (ldfile, arg, charmap, repertoire,
3806 result);
3808 break;
3810 else if (was_ellipsis != tok_none)
3812 /* Using the information in the `ellipsis_weight'
3813 element and this and the last value we have to handle
3814 the ellipsis now. */
3815 assert (state == 1);
3817 handle_ellipsis (ldfile, symstr, symlen, was_ellipsis, charmap,
3818 repertoire, result);
3820 /* Remember that we processed the ellipsis. */
3821 was_ellipsis = tok_none;
3823 /* And don't add the value a second time. */
3824 break;
3827 /* Now insert in the new place. */
3828 insert_value (ldfile, symstr, symlen, charmap, repertoire, result);
3829 break;
3831 case tok_undefined:
3832 /* Ignore the rest of the line if we don't need the input of
3833 this line. */
3834 if (ignore_content)
3836 lr_ignore_rest (ldfile, 0);
3837 break;
3840 if (state != 1)
3841 goto err_label;
3843 if (was_ellipsis != tok_none)
3845 lr_error (ldfile,
3846 _("%s: cannot have `%s' as end of ellipsis range"),
3847 "LC_COLLATE", "UNDEFINED");
3849 unlink_element (collate);
3850 was_ellipsis = tok_none;
3853 /* See whether UNDEFINED already appeared somewhere. */
3854 if (collate->undefined.next != NULL
3855 || &collate->undefined == collate->cursor)
3857 lr_error (ldfile,
3858 _("%s: order for `%.*s' already defined at %s:%Zu"),
3859 "LC_COLLATE", 9, "UNDEFINED",
3860 collate->undefined.file,
3861 collate->undefined.line);
3862 lr_ignore_rest (ldfile, 0);
3864 else
3865 /* Parse the weights. */
3866 insert_weights (ldfile, &collate->undefined, charmap,
3867 repertoire, result, tok_none);
3868 break;
3870 case tok_ellipsis2: /* symbolic hexadecimal ellipsis */
3871 case tok_ellipsis3: /* absolute ellipsis */
3872 case tok_ellipsis4: /* symbolic decimal ellipsis */
3873 /* This is the symbolic (decimal or hexadecimal) or absolute
3874 ellipsis. */
3875 if (was_ellipsis != tok_none)
3876 goto err_label;
3878 if (state != 0 && state != 1 && state != 3)
3879 goto err_label;
3881 was_ellipsis = nowtok;
3883 insert_weights (ldfile, &collate->ellipsis_weight, charmap,
3884 repertoire, result, nowtok);
3885 break;
3887 case tok_end:
3888 seen_end:
3889 /* Next we assume `LC_COLLATE'. */
3890 if (!ignore_content)
3892 if (state == 0 && copy_locale == NULL)
3893 /* We must either see a copy statement or have
3894 ordering values. */
3895 lr_error (ldfile,
3896 _("%s: empty category description not allowed"),
3897 "LC_COLLATE");
3898 else if (state == 1)
3900 lr_error (ldfile, _("%s: missing `order_end' keyword"),
3901 "LC_COLLATE");
3903 /* Handle ellipsis at end of list. */
3904 if (was_ellipsis != tok_none)
3906 handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3907 repertoire, result);
3908 was_ellipsis = tok_none;
3911 else if (state == 3)
3912 WITH_CUR_LOCALE (error (0, 0, _("\
3913 %s: missing `reorder-end' keyword"), "LC_COLLATE"));
3914 else if (state == 5)
3915 WITH_CUR_LOCALE (error (0, 0, _("\
3916 %s: missing `reorder-sections-end' keyword"), "LC_COLLATE"));
3918 arg = lr_token (ldfile, charmap, result, NULL, verbose);
3919 if (arg->tok == tok_eof)
3920 break;
3921 if (arg->tok == tok_eol)
3922 lr_error (ldfile, _("%s: incomplete `END' line"), "LC_COLLATE");
3923 else if (arg->tok != tok_lc_collate)
3924 lr_error (ldfile, _("\
3925 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
3926 lr_ignore_rest (ldfile, arg->tok == tok_lc_collate);
3927 return;
3929 case tok_define:
3930 if (ignore_content)
3932 lr_ignore_rest (ldfile, 0);
3933 break;
3936 arg = lr_token (ldfile, charmap, result, NULL, verbose);
3937 if (arg->tok != tok_ident)
3938 goto err_label;
3940 /* Simply add the new symbol. */
3941 struct name_list *newsym = xmalloc (sizeof (*newsym)
3942 + arg->val.str.lenmb + 1);
3943 memcpy (newsym->str, arg->val.str.startmb, arg->val.str.lenmb);
3944 newsym->str[arg->val.str.lenmb] = '\0';
3945 newsym->next = defined;
3946 defined = newsym;
3948 lr_ignore_rest (ldfile, 1);
3949 break;
3951 case tok_undef:
3952 if (ignore_content)
3954 lr_ignore_rest (ldfile, 0);
3955 break;
3958 arg = lr_token (ldfile, charmap, result, NULL, verbose);
3959 if (arg->tok != tok_ident)
3960 goto err_label;
3962 /* Remove _all_ occurrences of the symbol from the list. */
3963 struct name_list *prevdef = NULL;
3964 struct name_list *curdef = defined;
3965 while (curdef != NULL)
3966 if (strncmp (arg->val.str.startmb, curdef->str,
3967 arg->val.str.lenmb) == 0
3968 && curdef->str[arg->val.str.lenmb] == '\0')
3970 if (prevdef == NULL)
3971 defined = curdef->next;
3972 else
3973 prevdef->next = curdef->next;
3975 struct name_list *olddef = curdef;
3976 curdef = curdef->next;
3978 free (olddef);
3980 else
3982 prevdef = curdef;
3983 curdef = curdef->next;
3986 lr_ignore_rest (ldfile, 1);
3987 break;
3989 case tok_ifdef:
3990 case tok_ifndef:
3991 if (ignore_content)
3993 lr_ignore_rest (ldfile, 0);
3994 break;
3997 found_ifdef:
3998 arg = lr_token (ldfile, charmap, result, NULL, verbose);
3999 if (arg->tok != tok_ident)
4000 goto err_label;
4001 lr_ignore_rest (ldfile, 1);
4003 if (collate->else_action == else_none)
4005 curdef = defined;
4006 while (curdef != NULL)
4007 if (strncmp (arg->val.str.startmb, curdef->str,
4008 arg->val.str.lenmb) == 0
4009 && curdef->str[arg->val.str.lenmb] == '\0')
4010 break;
4011 else
4012 curdef = curdef->next;
4014 if ((nowtok == tok_ifdef && curdef != NULL)
4015 || (nowtok == tok_ifndef && curdef == NULL))
4017 /* We have to use the if-branch. */
4018 collate->else_action = else_ignore;
4020 else
4022 /* We have to use the else-branch, if there is one. */
4023 nowtok = skip_to (ldfile, collate, charmap, 0);
4024 if (nowtok == tok_else)
4025 collate->else_action = else_seen;
4026 else if (nowtok == tok_elifdef)
4028 nowtok = tok_ifdef;
4029 goto found_ifdef;
4031 else if (nowtok == tok_elifndef)
4033 nowtok = tok_ifndef;
4034 goto found_ifdef;
4036 else if (nowtok == tok_eof)
4037 goto seen_eof;
4038 else if (nowtok == tok_end)
4039 goto seen_end;
4042 else
4044 /* XXX Should it really become necessary to support nested
4045 preprocessor handling we will push the state here. */
4046 lr_error (ldfile, _("%s: nested conditionals not supported"),
4047 "LC_COLLATE");
4048 nowtok = skip_to (ldfile, collate, charmap, 1);
4049 if (nowtok == tok_eof)
4050 goto seen_eof;
4051 else if (nowtok == tok_end)
4052 goto seen_end;
4054 break;
4056 case tok_elifdef:
4057 case tok_elifndef:
4058 case tok_else:
4059 if (ignore_content)
4061 lr_ignore_rest (ldfile, 0);
4062 break;
4065 lr_ignore_rest (ldfile, 1);
4067 if (collate->else_action == else_ignore)
4069 /* Ignore everything until the endif. */
4070 nowtok = skip_to (ldfile, collate, charmap, 1);
4071 if (nowtok == tok_eof)
4072 goto seen_eof;
4073 else if (nowtok == tok_end)
4074 goto seen_end;
4076 else
4078 assert (collate->else_action == else_none);
4079 lr_error (ldfile, _("\
4080 %s: '%s' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE",
4081 nowtok == tok_else ? "else"
4082 : nowtok == tok_elifdef ? "elifdef" : "elifndef");
4084 break;
4086 case tok_endif:
4087 if (ignore_content)
4089 lr_ignore_rest (ldfile, 0);
4090 break;
4093 lr_ignore_rest (ldfile, 1);
4095 if (collate->else_action != else_ignore
4096 && collate->else_action != else_seen)
4097 lr_error (ldfile, _("\
4098 %s: 'endif' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE");
4100 /* XXX If we support nested preprocessor directives we pop
4101 the state here. */
4102 collate->else_action = else_none;
4103 break;
4105 default:
4106 err_label:
4107 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
4110 /* Prepare for the next round. */
4111 now = lr_token (ldfile, charmap, result, NULL, verbose);
4112 nowtok = now->tok;
4115 seen_eof:
4116 /* When we come here we reached the end of the file. */
4117 lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");