(insert_value): Correct order of arguments for lr_error call.
[glibc.git] / locale / programs / ld-collate.c
blob0d6e1138270eb438b7fb6420204dc27d3b6681b0
1 /* Copyright (C) 1995, 1996, 1997, 1998, 1999 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
20 #ifdef HAVE_CONFIG_H
21 # include <config.h>
22 #endif
24 #include <error.h>
25 #include <stdlib.h>
27 #include "charmap.h"
28 #include "localeinfo.h"
29 #include "linereader.h"
30 #include "locfile.h"
31 #include "localedef.h"
33 /* Uncomment the following line in the production version. */
34 /* #define NDEBUG 1 */
35 #include <assert.h>
37 #define obstack_chunk_alloc malloc
38 #define obstack_chunk_free free
40 /* Forward declaration. */
41 struct element_t;
43 /* Data type for list of strings. */
44 struct section_list
46 struct section_list *next;
47 /* Name of the section. */
48 const char *name;
49 /* First element of this section. */
50 struct element_t *first;
51 /* Last element of this section. */
52 struct element_t *last;
53 /* These are the rules for this section. */
54 enum coll_sort_rule *rules;
57 /* Data type for collating element. */
58 struct element_t
60 const char *mbs;
61 const uint32_t *wcs;
62 int order;
64 struct element_t **weights;
66 /* Where does the definition come from. */
67 const char *file;
68 size_t line;
70 /* Which section does this belong to. */
71 struct section_list *section;
73 /* Predecessor and successor in the order list. */
74 struct element_t *last;
75 struct element_t *next;
78 /* Data type for collating symbol. */
79 struct symbol_t
81 /* Point to place in the order list. */
82 struct element_t *order;
84 /* Where does the definition come from. */
85 const char *file;
86 size_t line;
90 /* The real definition of the struct for the LC_COLLATE locale. */
91 struct locale_collate_t
93 int col_weight_max;
94 int cur_weight_max;
96 /* List of known scripts. */
97 struct section_list *sections;
98 /* Current section using definition. */
99 struct section_list *current_section;
100 /* There always can be an unnamed section. */
101 struct section_list unnamed_section;
102 /* To make handling of errors easier we have another section. */
103 struct section_list error_section;
105 /* Number of sorting rules given in order_start line. */
106 uint32_t nrules;
108 /* Start of the order list. */
109 struct element_t *start;
111 /* The undefined element. */
112 struct element_t undefined;
114 /* This is the cursor for `reorder_after' insertions. */
115 struct element_t *cursor;
117 /* Remember whether last weight was an ellipsis. */
118 int was_ellipsis;
120 /* Known collating elements. */
121 hash_table elem_table;
123 /* Known collating symbols. */
124 hash_table sym_table;
126 /* Known collation sequences. */
127 hash_table seq_table;
129 struct obstack mempool;
131 /* The LC_COLLATE category is a bit special as it is sometimes possible
132 that the definitions from more than one input file contains information.
133 Therefore we keep all relevant input in a list. */
134 struct locale_collate_t *next;
138 /* We have a few global variables which are used for reading all
139 LC_COLLATE category descriptions in all files. */
140 static int nrules;
143 static struct section_list *
144 make_seclist_elem (struct locale_collate_t *collate, const char *string,
145 struct section_list *next)
147 struct section_list *newp;
149 newp = (struct section_list *) obstack_alloc (&collate->mempool,
150 sizeof (*newp));
151 newp->next = next;
152 newp->name = string;
153 newp->first = NULL;
155 return newp;
159 static struct element_t *
160 new_element (struct locale_collate_t *collate, const char *mbs,
161 const uint32_t *wcs)
163 struct element_t *newp;
165 newp = (struct element_t *) obstack_alloc (&collate->mempool,
166 sizeof (*newp));
167 newp->mbs = mbs;
168 newp->wcs = wcs;
169 newp->order = 0;
171 newp->file = NULL;
172 newp->line = 0;
174 newp->section = NULL;
176 newp->last = NULL;
177 newp->next = NULL;
179 return newp;
183 static struct symbol_t *
184 new_symbol (struct locale_collate_t *collate)
186 struct symbol_t *newp;
188 newp = (struct symbol_t *) obstack_alloc (&collate->mempool, sizeof (*newp));
190 newp->order = NULL;
192 newp->file = NULL;
193 newp->line = 0;
195 return newp;
199 /* Test whether this name is already defined somewhere. */
200 static int
201 check_duplicate (struct linereader *ldfile, struct locale_collate_t *collate,
202 struct charmap_t *charmap, struct repertoire_t *repertoire,
203 const char *symbol, size_t symbol_len)
205 void *ignore = NULL;
207 if (find_entry (&charmap->char_table, symbol, symbol_len, &ignore) == 0)
209 lr_error (ldfile, _("`%s' already defined in charmap"), symbol);
210 return 1;
213 if (find_entry (&repertoire->char_table, symbol, symbol_len, &ignore) == 0)
215 lr_error (ldfile, _("`%s' already defined in repertoire"), symbol);
216 return 1;
219 if (find_entry (&collate->sym_table, symbol, symbol_len, &ignore) == 0)
221 lr_error (ldfile, _("`%s' already defined as collating symbol"), symbol);
222 return 1;
225 if (find_entry (&collate->elem_table, symbol, symbol_len, &ignore) == 0)
227 lr_error (ldfile, _("`%s' already defined as collating element"),
228 symbol);
229 return 1;
232 return 0;
236 /* Read the direction specification. */
237 static void
238 read_directions (struct linereader *ldfile, struct token *arg,
239 struct charmap_t *charmap, struct repertoire_t *repertoire,
240 struct locale_collate_t *collate)
242 int cnt = 0;
243 int max = nrules ?: 10;
244 enum coll_sort_rule *rules = calloc (max, sizeof (*rules));
245 int warned = 0;
247 while (1)
249 int valid = 0;
251 if (arg->tok == tok_forward)
253 if (rules[cnt] & sort_backward)
255 if (! warned)
257 lr_error (ldfile, _("\
258 %s: `forward' and `backward' are mutually excluding each other"),
259 "LC_COLLATE");
260 warned = 1;
263 else if (rules[cnt] & sort_forward)
265 if (! warned)
267 lr_error (ldfile, _("\
268 %s: `%s' mentioned twice in definition of weight %d"),
269 "LC_COLLATE", "forward", cnt + 1);
272 else
273 rules[cnt] |= sort_forward;
275 valid = 1;
277 else if (arg->tok == tok_backward)
279 if (rules[cnt] & sort_forward)
281 if (! warned)
283 lr_error (ldfile, _("\
284 %s: `forward' and `backward' are mutually excluding each other"),
285 "LC_COLLATE");
286 warned = 1;
289 else if (rules[cnt] & sort_backward)
291 if (! warned)
293 lr_error (ldfile, _("\
294 %s: `%s' mentioned twice in definition of weight %d"),
295 "LC_COLLATE", "backward", cnt + 1);
298 else
299 rules[cnt] |= sort_backward;
301 valid = 1;
303 else if (arg->tok == tok_position)
305 if (rules[cnt] & sort_position)
307 if (! warned)
309 lr_error (ldfile, _("\
310 %s: `%s' mentioned twice in definition of weight %d in category `%s'"),
311 "LC_COLLATE", "position", cnt + 1);
314 else
315 rules[cnt] |= sort_position;
317 valid = 1;
320 if (valid)
321 arg = lr_token (ldfile, charmap, repertoire);
323 if (arg->tok == tok_eof || arg->tok == tok_eol || arg->tok == tok_comma
324 || arg->tok == tok_semicolon)
326 if (! valid && ! warned)
328 lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
329 warned = 1;
332 /* See whether we have to increment the counter. */
333 if (arg->tok != tok_comma && rules[cnt] != 0)
334 ++cnt;
336 if (arg->tok == tok_eof || arg->tok == tok_eol)
337 /* End of line or file, so we exit the loop. */
338 break;
340 if (nrules == 0)
342 /* See whether we have enough room in the array. */
343 if (cnt == max)
345 max += 10;
346 rules = (enum coll_sort_rule *) xrealloc (rules,
348 * sizeof (*rules));
349 memset (&rules[cnt], '\0', (max - cnt) * sizeof (*rules));
352 else
354 if (cnt == nrules)
356 /* There must not be any more rule. */
357 if (! warned)
359 lr_error (ldfile, _("\
360 %s: too many rules; first entry only had %d"),
361 "LC_COLLATE", nrules);
362 warned = 1;
365 lr_ignore_rest (ldfile, 0);
366 break;
370 else
372 if (! warned)
374 lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
375 warned = 1;
379 arg = lr_token (ldfile, charmap, repertoire);
382 if (nrules == 0)
384 /* Now we know how many rules we have. */
385 nrules = cnt;
386 rules = (enum coll_sort_rule *) xrealloc (rules,
387 nrules * sizeof (*rules));
389 else
391 if (cnt < nrules)
393 /* Not enough rules in this specification. */
394 if (! warned)
395 lr_error (ldfile, _("%s: not enough sorting rules"), "LC_COLLATE");
398 rules[cnt] = sort_forward;
399 while (++cnt < nrules);
403 collate->current_section->rules = rules;
407 static void
408 insert_value (struct linereader *ldfile, struct token *arg,
409 struct charmap_t *charmap, struct repertoire_t *repertoire,
410 struct locale_collate_t *collate)
412 /* First find out what kind of symbol this is. */
413 struct charseq *seq;
414 uint32_t wc;
415 struct element_t *elem = NULL;
416 int weight_cnt;
418 /* First determine the wide character. There must be such a value,
419 otherwise we ignore it (if it is no collatio symbol or element). */
420 wc = repertoire_find_value (repertoire, arg->val.str.startmb,
421 arg->val.str.lenmb);
423 /* Try to find the character in the charmap. */
424 seq = charmap_find_value (charmap, arg->val.str.startmb, arg->val.str.lenmb);
426 if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
428 /* It's no character, so look through the collation elements and
429 symbol list. */
430 void *result;
432 if (find_entry (&collate->sym_table, arg->val.str.startmb,
433 arg->val.str.lenmb, &result) == 0)
435 /* It's a collation symbol. */
436 struct symbol_t *sym = (struct symbol_t *) result;
437 elem = sym->order;
439 if (elem == NULL)
440 elem = sym->order = new_element (collate, arg->val.str.startmb,
441 arg->val.str.startwc);
443 else if (find_entry (&collate->elem_table, arg->val.str.startmb,
444 arg->val.str.lenmb, (void **) &elem) != 0)
445 /* It's also no collation element. Therefore ignore it. */
446 return;
448 else
450 /* Otherwise the symbols stands for an character. Make sure it is
451 not already in the table. */
455 if (elem == NULL)
456 /* XXX HACK HACK HACK */
457 return;
459 /* Test whether this element is not already in the list. */
460 if (elem->next != NULL || elem->next == collate->cursor)
462 lr_error (ldfile, _("order for `%.*s' already defined at %s:%Z"),
463 arg->val.str.lenmb, arg->val.str.startmb,
464 elem->file, elem->line);
465 return;
468 /* Initialize all the fields. */
469 elem->file = ldfile->fname;
470 elem->line = ldfile->lineno;
471 elem->last = collate->cursor;
472 elem->next = collate->cursor ? collate->cursor->next : NULL;
473 elem->weights = (struct element_t **)
474 obstack_alloc (&collate->mempool, nrules * sizeof (struct element_t *));
475 memset (elem->weights, '\0', nrules * sizeof (struct element_t *));
477 if (collate->current_section->first == NULL)
478 collate->current_section->first = elem;
479 if (collate->current_section->last == collate->cursor)
480 collate->current_section->last = elem;
482 collate->cursor = elem;
484 /* Now read the rest of the line. */
485 ldfile->return_widestr = 1;
487 weight_cnt = 0;
490 arg = lr_token (ldfile, charmap, repertoire);
492 if (arg->tok == tok_eof || arg->tok == tok_eol)
494 /* This means the rest of the line uses the current element
495 as the weight. */
497 elem->weights[weight_cnt] = elem;
498 while (++weight_cnt < nrules);
500 return;
503 if (arg->tok == tok_ignore)
505 /* The weight for this level has to be ignored. We use the
506 null pointer to indicate this. */
508 else if (arg->tok == tok_bsymbol)
513 while (++weight_cnt < nrules);
515 lr_ignore_rest (ldfile, weight_cnt == nrules);
519 static void
520 collate_startup (struct linereader *ldfile, struct localedef_t *locale,
521 int ignore_content)
523 if (!ignore_content)
525 struct locale_collate_t *collate;
527 collate = locale->categories[LC_COLLATE].collate =
528 (struct locale_collate_t *) xcalloc (1,
529 sizeof (struct locale_collate_t));
531 /* Init the various data structures. */
532 init_hash (&collate->elem_table, 100);
533 init_hash (&collate->sym_table, 100);
534 init_hash (&collate->seq_table, 500);
535 obstack_init (&collate->mempool);
537 collate->col_weight_max = -1;
540 ldfile->translate_strings = 1;
541 ldfile->return_widestr = 0;
545 void
546 collate_finish (struct localedef_t *locale, struct charmap_t *charmap)
551 void
552 collate_output (struct localedef_t *locale, struct charmap_t *charmap,
553 const char *output_path)
558 void
559 collate_read (struct linereader *ldfile, struct localedef_t *result,
560 struct charmap_t *charmap, const char *repertoire_name,
561 int ignore_content)
563 struct repertoire_t *repertoire = NULL;
564 struct locale_collate_t *collate;
565 struct token *now;
566 struct token *arg;
567 enum token_t nowtok;
568 int state = 0;
569 int was_ellipsis = 0;
571 /* Get the repertoire we have to use. */
572 if (repertoire_name != NULL)
573 repertoire = repertoire_read (repertoire_name);
575 /* The rest of the line containing `LC_COLLATE' must be free. */
576 lr_ignore_rest (ldfile, 1);
580 now = lr_token (ldfile, charmap, NULL);
581 nowtok = now->tok;
583 while (nowtok == tok_eol);
585 if (nowtok == tok_copy)
587 state = 2;
588 now = lr_token (ldfile, charmap, NULL);
589 if (now->tok != tok_string)
590 goto err_label;
591 /* XXX Use the name */
592 lr_ignore_rest (ldfile, 1);
594 now = lr_token (ldfile, charmap, NULL);
595 nowtok = now->tok;
598 /* Prepare the data structures. */
599 collate_startup (ldfile, result, ignore_content);
600 collate = result->categories[LC_COLLATE].collate;
602 while (1)
604 /* Of course we don't proceed beyond the end of file. */
605 if (nowtok == tok_eof)
606 break;
608 /* Ingore empty lines. */
609 if (nowtok == tok_eol)
611 now = lr_token (ldfile, charmap, NULL);
612 nowtok = now->tok;
613 continue;
616 switch (nowtok)
618 case tok_coll_weight_max:
619 /* Ignore the rest of the line if we don't need the input of
620 this line. */
621 if (ignore_content)
623 lr_ignore_rest (ldfile, 0);
624 break;
627 if (state != 0)
628 goto err_label;
630 arg = lr_token (ldfile, charmap, NULL);
631 if (arg->tok != tok_number)
632 goto err_label;
633 if (collate->col_weight_max != -1)
634 lr_error (ldfile, _("%s: duplicate definition of `%s'"),
635 "LC_COLLATE", "col_weight_max");
636 else
637 collate->col_weight_max = arg->val.num;
638 lr_ignore_rest (ldfile, 1);
639 break;
641 case tok_section_symbol:
642 /* Ignore the rest of the line if we don't need the input of
643 this line. */
644 if (ignore_content)
646 lr_ignore_rest (ldfile, 0);
647 break;
650 if (state != 0)
651 goto err_label;
653 arg = lr_token (ldfile, charmap, repertoire);
654 if (arg->tok != tok_bsymbol)
655 goto err_label;
656 else if (!ignore_content)
658 /* Check whether this section is already known. */
659 struct section_list *known = collate->sections;
660 while (known != NULL)
661 if (strcmp (known->name, arg->val.str.startmb) == 0)
662 break;
664 if (known != NULL)
666 lr_error (ldfile,
667 _("%s: duplicate declaration of section `%s'"),
668 "LC_COLLATE", arg->val.str.startmb);
669 free (arg->val.str.startmb);
671 else
672 collate->sections = make_seclist_elem (collate,
673 arg->val.str.startmb,
674 collate->sections);
676 lr_ignore_rest (ldfile, known == NULL);
678 else
680 free (arg->val.str.startmb);
681 lr_ignore_rest (ldfile, 0);
683 break;
685 case tok_collating_element:
686 /* Ignore the rest of the line if we don't need the input of
687 this line. */
688 if (ignore_content)
690 lr_ignore_rest (ldfile, 0);
691 break;
694 if (state != 0)
695 goto err_label;
697 arg = lr_token (ldfile, charmap, repertoire);
698 if (arg->tok != tok_bsymbol)
699 goto err_label;
700 else
702 const char *symbol = arg->val.str.startmb;
703 size_t symbol_len = arg->val.str.lenmb;
705 /* Next the `from' keyword. */
706 arg = lr_token (ldfile, charmap, repertoire);
707 if (arg->tok != tok_from)
709 free ((char *) symbol);
710 goto err_label;
713 ldfile->return_widestr = 1;
715 /* Finally the string with the replacement. */
716 arg = lr_token (ldfile, charmap, repertoire);
717 ldfile->return_widestr = 0;
718 if (arg->tok != tok_string)
719 goto err_label;
721 if (!ignore_content)
723 if (symbol == NULL)
724 lr_error (ldfile, _("\
725 %s: unknown character in collating element name"),
726 "LC_COLLATE");
727 if (arg->val.str.startmb == NULL)
728 lr_error (ldfile, _("\
729 %s: unknown character in collating element definition"),
730 "LC_COLLATE");
731 if (arg->val.str.startwc == NULL)
732 lr_error (ldfile, _("\
733 %s: unknown wide character in collating element definition"),
734 "LC_COLLATE");
735 else if (arg->val.str.lenwc < 2)
736 lr_error (ldfile, _("\
737 %s: substitution string in collating element definition must have at least two characters"),
738 "LC_COLLATE");
740 if (symbol != NULL)
742 /* The name is already defined. */
743 if (check_duplicate (ldfile, collate, charmap,
744 repertoire, symbol, symbol_len))
745 goto col_elem_free;
747 if (insert_entry (&collate->elem_table,
748 symbol, symbol_len,
749 new_element (collate,
750 arg->val.str.startmb,
751 arg->val.str.startwc))
752 < 0)
753 lr_error (ldfile, _("\
754 error while adding collating element"));
756 else
757 goto col_elem_free;
759 else
761 col_elem_free:
762 if (symbol != NULL)
763 free ((char *) symbol);
764 if (arg->val.str.startmb != NULL)
765 free (arg->val.str.startmb);
766 if (arg->val.str.startwc != NULL)
767 free (arg->val.str.startwc);
769 lr_ignore_rest (ldfile, 1);
771 break;
773 case tok_collating_symbol:
774 /* Ignore the rest of the line if we don't need the input of
775 this line. */
776 if (ignore_content)
778 lr_ignore_rest (ldfile, 0);
779 break;
782 if (state != 0)
783 goto err_label;
785 arg = lr_token (ldfile, charmap, repertoire);
786 if (arg->tok != tok_bsymbol)
787 goto err_label;
788 else
790 const char *symbol = arg->val.str.startmb;
791 size_t symbol_len = arg->val.str.lenmb;
793 if (!ignore_content)
795 if (symbol == NULL)
796 lr_error (ldfile, _("\
797 %s: unknown character in collating symbol name"),
798 "LC_COLLATE");
799 else
801 /* The name is already defined. */
802 if (check_duplicate (ldfile, collate, charmap,
803 repertoire, symbol, symbol_len))
804 goto col_sym_free;
806 if (insert_entry (&collate->sym_table,
807 symbol, symbol_len,
808 new_symbol (collate)) < 0)
809 lr_error (ldfile, _("\
810 error while adding collating symbol"));
813 else
815 col_sym_free:
816 if (symbol != NULL)
817 free ((char *) symbol);
819 lr_ignore_rest (ldfile, 1);
821 break;
823 case tok_symbol_equivalence:
824 /* Ignore the rest of the line if we don't need the input of
825 this line. */
826 if (ignore_content)
828 lr_ignore_rest (ldfile, 0);
829 break;
832 if (state != 0)
833 goto err_label;
835 arg = lr_token (ldfile, charmap, repertoire);
836 if (arg->tok != tok_bsymbol)
837 goto err_label;
838 else
840 const char *newname = arg->val.str.startmb;
841 size_t newname_len = arg->val.str.lenmb;
842 const char *symname;
843 size_t symname_len;
844 struct symbol_t *symval;
846 arg = lr_token (ldfile, charmap, repertoire);
847 if (arg->tok != tok_bsymbol)
849 if (newname != NULL)
850 free ((char *) newname);
851 goto err_label;
854 symname = arg->val.str.startmb;
855 symname_len = arg->val.str.lenmb;
857 if (!ignore_content)
859 if (newname == NULL)
861 lr_error (ldfile, _("\
862 %s: unknown character in equivalent definition name"),
863 "LC_COLLATE");
864 goto sym_equiv_free;
866 if (symname == NULL)
868 lr_error (ldfile, _("\
869 %s: unknown character in equivalent definition value"),
870 "LC_COLLATE");
871 goto sym_equiv_free;
873 /* The name is already defined. */
874 if (check_duplicate (ldfile, collate, charmap,
875 repertoire, symname, symname_len))
876 goto col_sym_free;
878 /* See whether the symbol name is already defined. */
879 if (find_entry (&collate->sym_table, symname, symname_len,
880 (void **) &symval) != 0)
882 lr_error (ldfile, _("\
883 %s: unknown symbol `%s' in equivalent definition"),
884 "LC_COLLATE", symname);
885 goto col_sym_free;
888 if (insert_entry (&collate->sym_table,
889 newname, newname_len, symval) < 0)
891 lr_error (ldfile, _("\
892 error while adding equivalent collating symbol"));
893 goto sym_equiv_free;
896 free ((char *) symname);
898 else
900 sym_equiv_free:
901 if (newname != NULL)
902 free ((char *) newname);
903 if (symname != NULL)
904 free ((char *) symname);
906 lr_ignore_rest (ldfile, 1);
908 break;
910 case tok_order_start:
911 /* Ignore the rest of the line if we don't need the input of
912 this line. */
913 if (ignore_content)
915 lr_ignore_rest (ldfile, 0);
916 break;
919 if (state != 0 && state != 1)
920 goto err_label;
921 state = 1;
923 /* The 14652 draft does not specify whether all `order_start' lines
924 must contain the same number of sort-rules, but 14651 does. So
925 we require this here as well. */
926 arg = lr_token (ldfile, charmap, repertoire);
927 if (arg->tok == tok_bsymbol)
929 /* This better should be a section name. */
930 struct section_list *sp = collate->sections;
931 while (sp != NULL
932 && strcmp (sp->name, arg->val.str.startmb) != 0)
933 sp = sp->next;
935 if (sp == NULL)
937 lr_error (ldfile, _("\
938 %s: unknown section name `%s'"),
939 "LC_COLLATE", arg->val.str.startmb);
940 /* We use the error section. */
941 collate->current_section = &collate->error_section;
943 else
945 /* Remember this section. */
946 collate->current_section = sp;
948 /* One should not be allowed to open the same
949 section twice. */
950 if (sp->first != NULL)
951 lr_error (ldfile, _("\
952 %s: multiple order definitions for section `%s'"),
953 "LC_COLLATE", sp->name);
955 /* Next should come the end of the line or a semicolon. */
956 arg = lr_token (ldfile, charmap, repertoire);
957 if (arg->tok == tok_eol)
959 uint32_t cnt;
961 /* This means we have exactly one rule: `forward'. */
962 if (collate->nrules > 1)
963 lr_error (ldfile, _("\
964 %s: invalid number of sorting rules"),
965 "LC_COLLATE");
966 else
967 collate->nrules = 1;
968 sp->rules = obstack_alloc (&collate->mempool,
969 (sizeof (enum coll_sort_rule)
970 * collate->nrules));
971 for (cnt = 0; cnt < collate->nrules; ++cnt)
972 sp->rules[cnt] = sort_forward;
974 /* Next line. */
975 break;
978 /* Get the next token. */
979 arg = lr_token (ldfile, charmap, repertoire);
982 else
984 /* There is no section symbol. Therefore we use the unnamed
985 section. */
986 collate->current_section = &collate->unnamed_section;
988 if (collate->unnamed_section.first != NULL)
989 lr_error (ldfile, _("\
990 %s: multiple order definitions for unnamed section"),
991 "LC_COLLATE");
994 /* Now read the direction names. */
995 read_directions (ldfile, arg, charmap, repertoire, collate);
996 break;
998 case tok_order_end:
999 /* Ignore the rest of the line if we don't need the input of
1000 this line. */
1001 if (ignore_content)
1003 lr_ignore_rest (ldfile, 0);
1004 break;
1007 if (state != 1)
1008 goto err_label;
1009 state = 2;
1010 lr_ignore_rest (ldfile, 1);
1011 break;
1013 case tok_reorder_after:
1014 /* Ignore the rest of the line if we don't need the input of
1015 this line. */
1016 if (ignore_content)
1018 lr_ignore_rest (ldfile, 0);
1019 break;
1022 if (state != 2 && state != 3)
1023 goto err_label;
1024 state = 3;
1025 /* XXX get symbol */
1026 break;
1028 case tok_reorder_end:
1029 /* Ignore the rest of the line if we don't need the input of
1030 this line. */
1031 if (ignore_content)
1032 break;
1034 if (state != 3)
1035 goto err_label;
1036 state = 4;
1037 lr_ignore_rest (ldfile, 1);
1038 break;
1040 case tok_bsymbol:
1041 /* Ignore the rest of the line if we don't need the input of
1042 this line. */
1043 if (ignore_content)
1045 lr_ignore_rest (ldfile, 0);
1046 break;
1049 if (state != 1 && state != 3)
1050 goto err_label;
1052 if (state == 3)
1054 /* It is possible that we already have this collation sequence.
1055 In this case we move the entry. */
1056 struct element_t *seqp;
1058 if (find_entry (&collate->seq_table, arg->val.str.startmb,
1059 arg->val.str.lenmb, (void **) &seqp) == 0)
1061 /* Remove the entry from the old position. */
1062 if (seqp->last == NULL)
1063 collate->start = seqp->next;
1064 else
1065 seqp->last->next = seqp->next;
1066 if (seqp->next != NULL)
1067 seqp->next->last = seqp->last;
1069 /* We also have to check whether this entry is the
1070 first or last of a section. */
1071 if (seqp->section->first == seqp)
1073 if (seqp->section->first == seqp->section->last)
1074 /* This setion has no content anymore. */
1075 seqp->section->first = seqp->section->last = NULL;
1076 else
1077 seqp->section->first = seqp->next;
1079 else if (seqp->section->last == seqp)
1080 seqp->section->last = seqp->last;
1082 seqp->last = seqp->next = NULL;
1086 /* Now insert in the new place. */
1087 insert_value (ldfile, arg, charmap, repertoire, collate);
1088 break;
1090 case tok_undefined:
1091 /* Ignore the rest of the line if we don't need the input of
1092 this line. */
1093 if (ignore_content)
1095 lr_ignore_rest (ldfile, 0);
1096 break;
1099 if (state != 1)
1100 goto err_label;
1101 /* XXX handle UNDEFINED weight */
1102 break;
1104 case tok_ellipsis3:
1105 /* Ignore the rest of the line if we don't need the input of
1106 this line. */
1107 if (ignore_content)
1109 lr_ignore_rest (ldfile, 0);
1110 break;
1113 if (state != 1 && state != 3)
1114 goto err_label;
1116 was_ellipsis = 1;
1117 /* XXX Read the remainder of the line and remember what are
1118 the weights. */
1119 break;
1121 case tok_end:
1122 /* Next we assume `LC_COLLATE'. */
1123 if (!ignore_content)
1125 if (state == 0)
1126 /* We must either see a copy statement or have
1127 ordering values. */
1128 lr_error (ldfile,
1129 _("%s: empty category description not allowed"),
1130 "LC_COLLATE");
1131 else if (state == 1)
1132 lr_error (ldfile, _("%s: missing `order_end' keyword"),
1133 "LC_COLLATE");
1134 else if (state == 3)
1135 error (0, 0, _("%s: missing `reorder-end' keyword"),
1136 "LC_COLLATE");
1138 arg = lr_token (ldfile, charmap, NULL);
1139 if (arg->tok == tok_eof)
1140 break;
1141 if (arg->tok == tok_eol)
1142 lr_error (ldfile, _("%s: incomplete `END' line"), "LC_COLLATE");
1143 else if (arg->tok != tok_lc_collate)
1144 lr_error (ldfile, _("\
1145 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
1146 lr_ignore_rest (ldfile, arg->tok == tok_lc_collate);
1147 return;
1149 default:
1150 err_label:
1151 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
1154 /* Prepare for the next round. */
1155 now = lr_token (ldfile, charmap, NULL);
1156 nowtok = now->tok;
1159 /* When we come here we reached the end of the file. */
1160 lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
1164 #if 0
1166 /* What kind of symbols get defined? */
1167 enum coll_symbol
1169 undefined,
1170 ellipsis,
1171 character,
1172 element,
1173 symbol
1177 typedef struct patch_t
1179 const char *fname;
1180 size_t lineno;
1181 const char *token;
1182 union
1184 unsigned int *pos;
1185 size_t idx;
1186 } where;
1187 struct patch_t *next;
1188 } patch_t;
1191 typedef struct element_t
1193 const char *namemb;
1194 const uint32_t *namewc;
1195 unsigned int this_weight;
1197 struct element_t *next;
1199 unsigned int *ordering;
1200 size_t ordering_len;
1201 } element_t;
1204 /* The real definition of the struct for the LC_COLLATE locale. */
1205 struct locale_collate_t
1207 /* Collate symbol table. Simple mapping to number. */
1208 hash_table symbols;
1210 /* The collation elements. */
1211 hash_table elements;
1212 struct obstack element_mem;
1214 /* The result tables. */
1215 hash_table resultmb;
1216 hash_table resultwc;
1218 /* Sorting rules given in order_start line. */
1219 uint32_t nrules;
1220 enum coll_sort_rule *rules;
1222 /* Used while recognizing symbol composed of multiple tokens
1223 (collating-element). */
1224 const char *combine_token;
1225 size_t combine_token_len;
1227 /* How many sorting order specifications so far. */
1228 unsigned int order_cnt;
1230 /* Was lastline ellipsis? */
1231 int was_ellipsis;
1232 /* Value of last entry if was character. */
1233 uint32_t last_char;
1234 /* Current element. */
1235 element_t *current_element;
1236 /* What kind of symbol is current element. */
1237 enum coll_symbol kind;
1239 /* Patch lists. */
1240 patch_t *current_patch;
1241 patch_t *all_patches;
1243 /* Room for the UNDEFINED information. */
1244 element_t undefined;
1245 unsigned int undefined_len;
1247 /* Script information. */
1248 const char **scripts;
1249 unsigned int nscripts;
1253 /* Be verbose? Defined in localedef.c. */
1254 extern int verbose;
1258 #define obstack_chunk_alloc malloc
1259 #define obstack_chunk_free free
1262 /* Prototypes for local functions. */
1263 static void collate_startup (struct linereader *ldfile,
1264 struct localedef_t *locale,
1265 struct charmap_t *charmap, int ignore_content);
1268 static void
1269 collate_startup (struct linereader *ldfile, struct localedef_t *locale,
1270 struct charmap_t *charset, int ignore_content)
1272 struct locale_collate_t *collate;
1274 /* Allocate the needed room. */
1275 locale->categories[LC_COLLATE].collate = collate =
1276 (struct locale_collate_t *) xmalloc (sizeof (struct locale_collate_t));
1278 /* Allocate hash table for collating elements. */
1279 if (init_hash (&collate->elements, 512))
1280 error (4, 0, _("memory exhausted"));
1281 collate->combine_token = NULL;
1282 obstack_init (&collate->element_mem);
1284 /* Allocate hash table for collating elements. */
1285 if (init_hash (&collate->symbols, 64))
1286 error (4, 0, _("memory exhausted"));
1288 /* Allocate hash table for result. */
1289 if (init_hash (&collate->result, 512))
1290 error (4, 0, _("memory exhausted"));
1292 collate->nrules = 0;
1293 collate->nrules_max = 10;
1294 collate->rules
1295 = (enum coll_sort_rule *) xmalloc (collate->nrules_max
1296 * sizeof (enum coll_sort_rule));
1298 collate->order_cnt = 1; /* The smallest weight is 2. */
1300 collate->was_ellipsis = 0;
1301 collate->last_char = L'\0'; /* 0 because leading ellipsis is allowed. */
1303 collate->all_patches = NULL;
1305 /* This tells us no UNDEFINED entry was found until now. */
1306 memset (&collate->undefined, '\0', sizeof (collate->undefined));
1308 ldfile->translate_strings = 0;
1309 ldfile->return_widestr = 0;
1313 void
1314 collate_finish (struct localedef_t *locale, struct charset_t *charset,
1315 struct repertoire_t *repertoire)
1317 struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1318 patch_t *patch;
1319 size_t cnt;
1321 /* Patch the constructed table so that forward references are
1322 correctly filled. */
1323 for (patch = collate->all_patches; patch != NULL; patch = patch->next)
1325 uint32_t wch;
1326 size_t toklen = strlen (patch->token);
1327 void *ptmp;
1328 unsigned int value = 0;
1330 wch = charset_find_value (&charset->char_table, patch->token, toklen);
1331 if (wch != ILLEGAL_CHAR_VALUE)
1333 element_t *runp;
1335 if (find_entry (&collate->result, &wch, sizeof (uint32_t),
1336 (void *) &runp) < 0)
1337 runp = NULL;
1338 for (; runp != NULL; runp = runp->next)
1339 if (runp->name[0] == wch && runp->name[1] == L'\0')
1340 break;
1342 value = runp == NULL ? 0 : runp->this_weight;
1344 else if (find_entry (&collate->elements, patch->token, toklen, &ptmp)
1345 >= 0)
1347 value = ((element_t *) ptmp)->this_weight;
1349 else if (find_entry (&collate->symbols, patch->token, toklen, &ptmp)
1350 >= 0)
1352 value = (unsigned long int) ptmp;
1354 else
1355 value = 0;
1357 if (value == 0)
1359 if (!be_quiet)
1360 error_at_line (0, 0, patch->fname, patch->lineno,
1361 _("no weight defined for symbol `%s'"),
1362 patch->token);
1364 else
1365 *patch->where.pos = value;
1368 /* If no definition for UNDEFINED is given, all characters in the
1369 given charset must be specified. */
1370 if (collate->undefined.ordering == NULL)
1372 /**************************************************************\
1373 |* XXX We should test whether really an unspecified character *|
1374 |* exists before giving the message. *|
1375 \**************************************************************/
1376 uint32_t weight;
1378 if (!be_quiet)
1379 error (0, 0, _("no definition of `UNDEFINED'"));
1381 collate->undefined.ordering_len = collate->nrules;
1382 weight = ++collate->order_cnt;
1384 for (cnt = 0; cnt < collate->nrules; ++cnt)
1386 uint32_t one = 1;
1387 obstack_grow (&collate->element_mem, &one, sizeof (one));
1390 for (cnt = 0; cnt < collate->nrules; ++cnt)
1391 obstack_grow (&collate->element_mem, &weight, sizeof (weight));
1393 collate->undefined.ordering = obstack_finish (&collate->element_mem);
1396 collate->undefined_len = 2; /* For the name: 1 x uint32_t + L'\0'. */
1397 for (cnt = 0; cnt < collate->nrules; ++cnt)
1398 collate->undefined_len += 1 + collate->undefined.ordering[cnt];
1403 void
1404 collate_output (struct localedef_t *locale, struct charset_t *charset,
1405 struct repertoire_t *repertoire, const char *output_path)
1407 struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1408 uint32_t table_size, table_best, level_best, sum_best;
1409 void *last;
1410 element_t *pelem;
1411 uint32_t *name;
1412 size_t len;
1413 const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE);
1414 struct iovec iov[2 + nelems];
1415 struct locale_file data;
1416 uint32_t idx[nelems];
1417 struct obstack non_simple;
1418 struct obstack string_pool;
1419 size_t cnt, entry_size;
1420 uint32_t undefined_offset = UINT_MAX;
1421 uint32_t *table, *extra, *table2, *extra2;
1422 size_t extra_len;
1423 uint32_t element_hash_tab_size;
1424 uint32_t *element_hash_tab;
1425 uint32_t *element_hash_tab_ob;
1426 uint32_t element_string_pool_size;
1427 char *element_string_pool;
1428 uint32_t element_value_size;
1429 uint32_t *element_value;
1430 uint32_t *element_value_ob;
1431 uint32_t symbols_hash_tab_size;
1432 uint32_t *symbols_hash_tab;
1433 uint32_t *symbols_hash_tab_ob;
1434 uint32_t symbols_string_pool_size;
1435 char *symbols_string_pool;
1436 uint32_t symbols_class_size;
1437 uint32_t *symbols_class;
1438 uint32_t *symbols_class_ob;
1439 hash_table *hash_tab;
1440 unsigned int dummy_weights[collate->nrules + 1];
1442 sum_best = UINT_MAX;
1443 table_best = 0xffff;
1444 level_best = 0xffff;
1446 /* Compute table size. */
1447 if (!be_quiet)
1448 fputs (_("\
1449 Computing table size for collation information might take a while..."),
1450 stderr);
1451 for (table_size = 256; table_size < sum_best; ++table_size)
1453 size_t hits[table_size];
1454 unsigned int worst = 1;
1455 size_t cnt;
1457 last = NULL;
1459 for (cnt = 0; cnt < 256; ++cnt)
1460 hits[cnt] = 1;
1461 memset (&hits[256], '\0', sizeof (hits) - 256 * sizeof (size_t));
1463 while (iterate_table (&collate->result, &last, (const void **) &name,
1464 &len, (void **) &pelem) >= 0)
1465 if (pelem->ordering != NULL && pelem->name[0] > 0xff)
1466 if (++hits[(unsigned int) pelem->name[0] % table_size] > worst)
1468 worst = hits[(unsigned int) pelem->name[0] % table_size];
1469 if (table_size * worst > sum_best)
1470 break;
1473 if (table_size * worst < sum_best)
1475 sum_best = table_size * worst;
1476 table_best = table_size;
1477 level_best = worst;
1480 assert (table_best != 0xffff || level_best != 0xffff);
1481 if (!be_quiet)
1482 fputs (_(" done\n"), stderr);
1484 obstack_init (&non_simple);
1485 obstack_init (&string_pool);
1487 data.magic = LIMAGIC (LC_COLLATE);
1488 data.n = nelems;
1489 iov[0].iov_base = (void *) &data;
1490 iov[0].iov_len = sizeof (data);
1492 iov[1].iov_base = (void *) idx;
1493 iov[1].iov_len = sizeof (idx);
1495 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_NRULES)].iov_base = &collate->nrules;
1496 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_NRULES)].iov_len = sizeof (uint32_t);
1498 table = (uint32_t *) alloca (collate->nrules * sizeof (uint32_t));
1499 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_RULES)].iov_base = table;
1500 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_RULES)].iov_len
1501 = collate->nrules * sizeof (uint32_t);
1502 /* Another trick here. Describing the collation method needs only a
1503 few bits (3, to be exact). But the binary file should be
1504 accessible by machines with both endianesses and so we store both
1505 forms in the same word. */
1506 for (cnt = 0; cnt < collate->nrules; ++cnt)
1507 table[cnt] = collate->rules[cnt] | bswap_32 (collate->rules[cnt]);
1509 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE)].iov_base = &table_best;
1510 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE)].iov_len = sizeof (uint32_t);
1512 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS)].iov_base = &level_best;
1513 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS)].iov_len
1514 = sizeof (uint32_t);
1516 entry_size = 1 + MAX (collate->nrules, 2);
1518 table = (uint32_t *) alloca (table_best * level_best * entry_size
1519 * sizeof (table[0]));
1520 memset (table, '\0', table_best * level_best * entry_size
1521 * sizeof (table[0]));
1524 /* Macros for inserting in output table. */
1525 #define ADD_VALUE(expr) \
1526 do { \
1527 uint32_t to_write = (uint32_t) expr; \
1528 obstack_grow (&non_simple, &to_write, sizeof (to_write)); \
1529 } while (0)
1531 #define ADD_ELEMENT(pelem, len) \
1532 do { \
1533 size_t cnt, idx; \
1535 ADD_VALUE (len); \
1537 wlen = wcslen (pelem->name); \
1538 obstack_grow (&non_simple, pelem->name, (wlen + 1) * sizeof (uint32_t)); \
1540 idx = collate->nrules; \
1541 for (cnt = 0; cnt < collate->nrules; ++cnt) \
1543 size_t disp; \
1545 ADD_VALUE (pelem->ordering[cnt]); \
1546 for (disp = 0; disp < pelem->ordering[cnt]; ++disp) \
1547 ADD_VALUE (pelem->ordering[idx++]); \
1549 } while (0)
1551 #define ADD_FORWARD(pelem) \
1552 do { \
1553 /* We leave a reference in the main table and put all \
1554 information in the table for the extended entries. */ \
1555 element_t *runp; \
1556 element_t *has_simple = NULL; \
1557 size_t wlen; \
1559 table[(level * table_best + slot) * entry_size + 1] \
1560 = FORWARD_CHAR; \
1561 table[(level * table_best + slot) * entry_size + 2] \
1562 = obstack_object_size (&non_simple) / sizeof (uint32_t); \
1564 /* Here we have to construct the non-simple table entry. First \
1565 compute the total length of this entry. */ \
1566 for (runp = (pelem); runp != NULL; runp = runp->next) \
1567 if (runp->ordering != NULL) \
1569 uint32_t value; \
1570 size_t cnt; \
1572 value = 1 + wcslen (runp->name) + 1; \
1574 for (cnt = 0; cnt < collate->nrules; ++cnt) \
1575 /* We have to take care for entries without ordering \
1576 information. While reading them they get inserted in the \
1577 table and later not removed when something goes wrong with \
1578 reading its weights. */ \
1579 value += 1 + runp->ordering[cnt]; \
1581 if (runp->name[1] == L'\0') \
1582 has_simple = runp; \
1584 ADD_ELEMENT (runp, value); \
1587 if (has_simple == NULL) \
1589 size_t idx, cnt; \
1591 ADD_VALUE (collate->undefined_len + 1); \
1593 /* Add the name. */ \
1594 ADD_VALUE ((pelem)->name[0]); \
1595 ADD_VALUE (0); \
1597 idx = collate->nrules; \
1598 for (cnt = 0; cnt < collate->nrules; ++cnt) \
1600 size_t disp; \
1602 ADD_VALUE (collate->undefined.ordering[cnt]); \
1603 for (disp = 0; disp < collate->undefined.ordering[cnt]; ++disp) \
1605 if ((uint32_t) collate->undefined.ordering[idx] \
1606 == ELLIPSIS_CHAR) \
1607 ADD_VALUE ((pelem)->name[0]); \
1608 else \
1609 ADD_VALUE (collate->undefined.ordering[idx++]); \
1610 ++idx; \
1614 } while (0)
1618 /* Fill the table now. First we look for all the characters which
1619 fit into one single byte. This speeds up the 8-bit string
1620 functions. */
1621 last = NULL;
1622 while (iterate_table (&collate->result, &last, (const void **) &name,
1623 &len, (void **) &pelem) >= 0)
1624 if (pelem->name[0] <= 0xff)
1626 /* We have a single byte name. Now we must distinguish
1627 between entries in simple form (i.e., only one value per
1628 weight and no collation element starting with the same
1629 character) and those which are not. */
1630 size_t slot = ((size_t) pelem->name[0]);
1631 const size_t level = 0;
1633 table[slot * entry_size] = pelem->name[0];
1635 if (pelem->name[1] == L'\0' && pelem->next == NULL
1636 && pelem->ordering_len == collate->nrules)
1638 /* Yes, we have a simple one. Lucky us. */
1639 size_t cnt;
1641 for (cnt = 0; cnt < collate->nrules; ++cnt)
1642 table[slot * entry_size + 1 + cnt]
1643 = pelem->ordering[collate->nrules + cnt];
1645 else
1646 ADD_FORWARD (pelem);
1649 /* Now check for missing single byte entries. If one exist we fill
1650 with the UNDEFINED entry. */
1651 for (cnt = 0; cnt < 256; ++cnt)
1652 /* The first weight is never 0 for existing entries. */
1653 if (table[cnt * entry_size + 1] == 0)
1655 /* We have to fill in the information from the UNDEFINED
1656 entry. */
1657 table[cnt * entry_size] = (uint32_t) cnt;
1659 if (collate->undefined.ordering_len == collate->nrules)
1661 size_t inner;
1663 for (inner = 0; inner < collate->nrules; ++inner)
1664 if ((uint32_t)collate->undefined.ordering[collate->nrules
1665 + inner]
1666 == ELLIPSIS_CHAR)
1667 table[cnt * entry_size + 1 + inner] = cnt;
1668 else
1669 table[cnt * entry_size + 1 + inner]
1670 = collate->undefined.ordering[collate->nrules + inner];
1672 else
1674 if (undefined_offset != UINT_MAX)
1676 table[cnt * entry_size + 1] = FORWARD_CHAR;
1677 table[cnt * entry_size + 2] = undefined_offset;
1679 else
1681 const size_t slot = cnt;
1682 const size_t level = 0;
1684 ADD_FORWARD (&collate->undefined);
1685 undefined_offset = table[cnt * entry_size + 2];
1690 /* Now we are ready for inserting the whole rest. */
1691 last = NULL;
1692 while (iterate_table (&collate->result, &last, (const void **) &name,
1693 &len, (void **) &pelem) >= 0)
1694 if (pelem->name[0] > 0xff)
1696 /* Find the position. */
1697 size_t slot = ((size_t) pelem->name[0]) % table_best;
1698 size_t level = 0;
1700 while (table[(level * table_best + slot) * entry_size + 1] != 0)
1701 ++level;
1702 assert (level < level_best);
1704 if (pelem->name[1] == L'\0' && pelem->next == NULL
1705 && pelem->ordering_len == collate->nrules)
1707 /* Again a simple entry. */
1708 size_t inner;
1710 for (inner = 0; inner < collate->nrules; ++inner)
1711 table[(level * table_best + slot) * entry_size + 1 + inner]
1712 = pelem->ordering[collate->nrules + inner];
1714 else
1715 ADD_FORWARD (pelem);
1718 /* Add the UNDEFINED entry. */
1720 /* Here we have to construct the non-simple table entry. */
1721 size_t idx, cnt;
1723 undefined_offset = obstack_object_size (&non_simple);
1725 idx = collate->nrules;
1726 for (cnt = 0; cnt < collate->nrules; ++cnt)
1728 size_t disp;
1730 ADD_VALUE (collate->undefined.ordering[cnt]);
1731 for (disp = 0; disp < collate->undefined.ordering[cnt]; ++disp)
1732 ADD_VALUE (collate->undefined.ordering[idx++]);
1736 /* Finish the extra block. */
1737 extra_len = obstack_object_size (&non_simple);
1738 extra = (uint32_t *) obstack_finish (&non_simple);
1739 assert ((extra_len % sizeof (uint32_t)) == 0);
1741 /* Now we have to build the two array for the other byte ordering. */
1742 table2 = (uint32_t *) alloca (table_best * level_best * entry_size
1743 * sizeof (table[0]));
1744 extra2 = (uint32_t *) alloca (extra_len);
1746 for (cnt = 0; cnt < table_best * level_best * entry_size; ++cnt)
1747 table2[cnt] = bswap_32 (table[cnt]);
1749 for (cnt = 0; cnt < extra_len / sizeof (uint32_t); ++cnt)
1750 extra2[cnt] = bswap_32 (extra2[cnt]);
1752 /* We need a simple hashing table to get a collation-element->chars
1753 mapping. We again use internal hashing using a secondary hashing
1754 function.
1756 Each string has an associate hashing value V, computed by a
1757 fixed function. To locate the string we use open addressing with
1758 double hashing. The first index will be V % M, where M is the
1759 size of the hashing table. If no entry is found, iterating with
1760 a second, independent hashing function takes place. This second
1761 value will be 1 + V % (M - 2). The approximate number of probes
1762 will be
1764 for unsuccessful search: (1 - N / M) ^ -1
1765 for successful search: - (N / M) ^ -1 * ln (1 - N / M)
1767 where N is the number of keys.
1769 If we now choose M to be the next prime bigger than 4 / 3 * N,
1770 we get the values 4 and 1.85 resp. Because unsuccessful searches
1771 are unlikely this is a good value. Formulas: [Knuth, The Art of
1772 Computer Programming, Volume 3, Sorting and Searching, 1973,
1773 Addison Wesley] */
1774 if (collate->elements.filled == 0)
1776 /* We don't need any element table since there are no collating
1777 elements. */
1778 element_hash_tab_size = 0;
1779 element_hash_tab = NULL;
1780 element_hash_tab_ob = NULL;
1781 element_string_pool_size = 0;
1782 element_string_pool = NULL;
1783 element_value_size = 0;
1784 element_value = NULL;
1785 element_value_ob = NULL;
1787 else
1789 void *ptr; /* Running pointer. */
1790 const char *key; /* Key for current bucket. */
1791 size_t keylen; /* Length of key data. */
1792 const element_t *data; /* Data, i.e., the character sequence. */
1794 element_hash_tab_size = next_prime ((collate->elements.filled * 4) / 3);
1795 if (element_hash_tab_size < 7)
1796 /* We need a minimum to make the following code work. */
1797 element_hash_tab_size = 7;
1799 element_hash_tab = obstack_alloc (&non_simple, (2 * element_hash_tab_size
1800 * sizeof (uint32_t)));
1801 memset (element_hash_tab, '\377', (2 * element_hash_tab_size
1802 * sizeof (uint32_t)));
1804 ptr = NULL;
1805 while (iterate_table (&collate->elements, &ptr, (const void **) &key,
1806 &keylen, (void **) &data) == 0)
1808 size_t hash_val = hash_string (key, keylen);
1809 size_t idx = hash_val % element_hash_tab_size;
1811 if (element_hash_tab[2 * idx] != (~((uint32_t) 0)))
1813 /* We need the second hashing function. */
1814 size_t c = 1 + (hash_val % (element_hash_tab_size - 2));
1817 if (idx >= element_hash_tab_size - c)
1818 idx -= element_hash_tab_size - c;
1819 else
1820 idx += c;
1821 while (element_hash_tab[2 * idx] != (~((uint32_t) 0)));
1824 element_hash_tab[2 * idx] = obstack_object_size (&non_simple);
1825 element_hash_tab[2 * idx + 1] = (obstack_object_size (&string_pool)
1826 / sizeof (uint32_t));
1828 obstack_grow0 (&non_simple, key, keylen);
1829 obstack_grow (&string_pool, data->name,
1830 (wcslen (data->name) + 1) * sizeof (uint32_t));
1833 if (obstack_object_size (&non_simple) % 4 != 0)
1834 obstack_blank (&non_simple,
1835 4 - (obstack_object_size (&non_simple) % 4));
1836 element_string_pool_size = obstack_object_size (&non_simple);
1837 element_string_pool = obstack_finish (&non_simple);
1839 element_value_size = obstack_object_size (&string_pool);
1840 element_value = obstack_finish (&string_pool);
1842 /* Create the tables for the other byte order. */
1843 element_hash_tab_ob = obstack_alloc (&non_simple,
1844 (2 * element_hash_tab_size
1845 * sizeof (uint32_t)));
1846 for (cnt = 0; cnt < 2 * element_hash_tab_size; ++cnt)
1847 element_hash_tab_ob[cnt] = bswap_U32 (element_hash_tab[cnt]);
1849 element_value_ob = obstack_alloc (&string_pool, element_value_size);
1850 for (cnt = 0; cnt < element_value_size / 4; ++cnt)
1851 element_value_ob[cnt] = bswap_32 (element_value[cnt]);
1854 /* Store collation elements as map to collation class. There are
1855 three kinds of symbols:
1856 - simple characters
1857 - collation elements
1858 - collation symbols
1859 We need to make a table which lets the user to access the primary
1860 weight based on the symbol string. */
1861 symbols_hash_tab_size = next_prime ((4 * (charset->char_table.filled
1862 + collate->elements.filled
1863 + collate->symbols.filled)) / 3);
1864 symbols_hash_tab = obstack_alloc (&non_simple, (2 * symbols_hash_tab_size
1865 * sizeof (uint32_t)));
1866 memset (symbols_hash_tab, '\377', (2 * symbols_hash_tab_size
1867 * sizeof (uint32_t)));
1869 /* Now fill the array. First the symbols from the character set,
1870 then the collation elements and last the collation symbols. */
1871 hash_tab = &charset->char_table;
1872 while (1)
1874 void *ptr; /* Running pointer. */
1875 const char *key; /* Key for current bucket. */
1876 size_t keylen; /* Length of key data. */
1877 void *data; /* Data. */
1879 ptr = NULL;
1880 while (iterate_table (hash_tab, &ptr, (const void **) &key,
1881 &keylen, (void **) &data) == 0)
1883 size_t hash_val;
1884 size_t idx;
1885 uint32_t word;
1886 unsigned int *weights;
1888 if (hash_tab == &charset->char_table
1889 || hash_tab == &collate->elements)
1891 element_t *lastp, *firstp;
1892 uint32_t dummy_name[2];
1893 const uint32_t *name;
1894 size_t name_len;
1896 if (hash_tab == &charset->char_table)
1898 dummy_name[0] = (uint32_t) ((unsigned long int) data);
1899 dummy_name[1] = L'\0';
1900 name = dummy_name;
1901 name_len = sizeof (uint32_t);
1903 else
1905 element_t *elemp = (element_t *) data;
1906 name = elemp->name;
1907 name_len = wcslen (name) * sizeof (uint32_t);
1910 /* First check whether this character is used at all. */
1911 if (find_entry (&collate->result, name, name_len,
1912 (void *) &firstp) < 0)
1913 /* The symbol is not directly mentioned in the collation.
1914 I.e., we use the value for UNDEFINED. */
1915 lastp = &collate->undefined;
1916 else
1918 /* The entry for the simple character is always found at
1919 the end. */
1920 lastp = firstp;
1921 while (lastp->next != NULL && wcscmp (name, lastp->name))
1922 lastp = lastp->next;
1925 weights = lastp->ordering;
1927 else
1929 dummy_weights[0] = 1;
1930 dummy_weights[collate->nrules]
1931 = (unsigned int) ((unsigned long int) data);
1933 weights = dummy_weights;
1936 /* In LASTP->ordering we now have the collation class.
1937 Determine the place in the hashing table next. */
1938 hash_val = hash_string (key, keylen);
1939 idx = hash_val % symbols_hash_tab_size;
1941 if (symbols_hash_tab[2 * idx] != (~((uint32_t) 0)))
1943 /* We need the second hashing function. */
1944 size_t c = 1 + (hash_val % (symbols_hash_tab_size - 2));
1947 if (idx >= symbols_hash_tab_size - c)
1948 idx -= symbols_hash_tab_size - c;
1949 else
1950 idx += c;
1951 while (symbols_hash_tab[2 * idx] != (~((uint32_t) 0)));
1954 symbols_hash_tab[2 * idx] = obstack_object_size (&string_pool);
1955 symbols_hash_tab[2 * idx + 1] = (obstack_object_size (&non_simple)
1956 / sizeof (uint32_t));
1958 obstack_grow0 (&string_pool, key, keylen);
1959 /* Adding the first weight looks complicated. We have to deal
1960 with the kind it is stored and with the fact that original
1961 form uses `unsigned int's while we need `uint32_t' here. */
1962 word = weights[0];
1963 obstack_grow (&non_simple, &word, sizeof (uint32_t));
1964 for (cnt = 0; cnt < weights[0]; ++cnt)
1966 word = weights[collate->nrules + cnt];
1967 obstack_grow (&non_simple, &word, sizeof (uint32_t));
1971 if (hash_tab == &charset->char_table)
1972 hash_tab = &collate->elements;
1973 else if (hash_tab == &collate->elements)
1974 hash_tab = &collate->symbols;
1975 else
1976 break;
1979 /* Now we have the complete tables. */
1980 if (obstack_object_size (&string_pool) % 4 != 0)
1981 obstack_blank (&non_simple, 4 - (obstack_object_size (&string_pool) % 4));
1982 symbols_string_pool_size = obstack_object_size (&string_pool);
1983 symbols_string_pool = obstack_finish (&string_pool);
1985 symbols_class_size = obstack_object_size (&non_simple);
1986 symbols_class = obstack_finish (&non_simple);
1988 /* Generate tables with other byte order. */
1989 symbols_hash_tab_ob = obstack_alloc (&non_simple, (2 * symbols_hash_tab_size
1990 * sizeof (uint32_t)));
1991 for (cnt = 0; cnt < 2 * symbols_hash_tab_size; ++cnt)
1992 symbols_hash_tab_ob[cnt] = bswap_32 (symbols_hash_tab[cnt]);
1994 symbols_class_ob = obstack_alloc (&non_simple, symbols_class_size);
1995 for (cnt = 0; cnt < symbols_class_size / 4; ++cnt)
1996 symbols_class_ob[cnt] = bswap_32 (symbols_class[cnt]);
1999 /* Store table addresses and lengths. */
2000 #if __BYTE_ORDER == __BIG_ENDIAN
2001 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB)].iov_base = table;
2002 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB)].iov_len
2003 = table_best * level_best * entry_size * sizeof (table[0]);
2005 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL)].iov_base = table2;
2006 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL)].iov_len
2007 = table_best * level_best * entry_size * sizeof (table[0]);
2009 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB)].iov_base = extra;
2010 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB)].iov_len = extra_len;
2012 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL)].iov_base = extra2;
2013 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL)].iov_len = extra_len;
2014 #else
2015 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB)].iov_base = table2;
2016 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB)].iov_len
2017 = table_best * level_best * entry_size * sizeof (table[0]);
2019 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL)].iov_base = table;
2020 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL)].iov_len
2021 = table_best * level_best * entry_size * sizeof (table[0]);
2023 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB)].iov_base = extra2;
2024 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB)].iov_len = extra_len;
2026 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL)].iov_base = extra;
2027 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL)].iov_len = extra_len;
2028 #endif
2030 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_UNDEFINED)].iov_base = &undefined_offset;
2031 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_UNDEFINED)].iov_len = sizeof (uint32_t);
2034 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_SIZE)].iov_base
2035 = &element_hash_tab_size;
2036 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_SIZE)].iov_len
2037 = sizeof (uint32_t);
2039 #if __BYTE_ORDER == __BIG_ENDIAN
2040 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB)].iov_base
2041 = element_hash_tab;
2042 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB)].iov_len
2043 = 2 * element_hash_tab_size * sizeof (uint32_t);
2045 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL)].iov_base
2046 = element_hash_tab_ob;
2047 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL)].iov_len
2048 = 2 * element_hash_tab_size * sizeof (uint32_t);
2049 #else
2050 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL)].iov_base
2051 = element_hash_tab;
2052 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL)].iov_len
2053 = 2 * element_hash_tab_size * sizeof (uint32_t);
2055 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB)].iov_base
2056 = element_hash_tab_ob;
2057 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB)].iov_len
2058 = 2 * element_hash_tab_size * sizeof (uint32_t);
2059 #endif
2061 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_STR_POOL)].iov_base
2062 = element_string_pool;
2063 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_STR_POOL)].iov_len
2064 = element_string_pool_size;
2066 #if __BYTE_ORDER == __BIG_ENDIAN
2067 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EB)].iov_base
2068 = element_value;
2069 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EB)].iov_len
2070 = element_value_size;
2072 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EL)].iov_base
2073 = element_value_ob;
2074 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EL)].iov_len
2075 = element_value_size;
2076 #else
2077 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EL)].iov_base
2078 = element_value;
2079 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EL)].iov_len
2080 = element_value_size;
2082 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EB)].iov_base
2083 = element_value_ob;
2084 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EB)].iov_len
2085 = element_value_size;
2086 #endif
2088 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZE)].iov_base
2089 = &symbols_hash_tab_size;
2090 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZE)].iov_len
2091 = sizeof (uint32_t);
2093 #if __BYTE_ORDER == __BIG_ENDIAN
2094 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB)].iov_base
2095 = symbols_hash_tab;
2096 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB)].iov_len
2097 = 2 * symbols_hash_tab_size * sizeof (uint32_t);
2099 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL)].iov_base
2100 = symbols_hash_tab_ob;
2101 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL)].iov_len
2102 = 2 * symbols_hash_tab_size * sizeof (uint32_t);
2103 #else
2104 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL)].iov_base
2105 = symbols_hash_tab;
2106 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL)].iov_len
2107 = 2 * symbols_hash_tab_size * sizeof (uint32_t);
2109 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB)].iov_base
2110 = symbols_hash_tab_ob;
2111 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB)].iov_len
2112 = 2 * symbols_hash_tab_size * sizeof (uint32_t);
2113 #endif
2115 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_STR_POOL)].iov_base
2116 = symbols_string_pool;
2117 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_STR_POOL)].iov_len
2118 = symbols_string_pool_size;
2120 #if __BYTE_ORDER == __BIG_ENDIAN
2121 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EB)].iov_base
2122 = symbols_class;
2123 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EB)].iov_len
2124 = symbols_class_size;
2126 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EL)].iov_base
2127 = symbols_class_ob;
2128 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EL)].iov_len
2129 = symbols_class_size;
2130 #else
2131 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EL)].iov_base
2132 = symbols_class;
2133 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EL)].iov_len
2134 = symbols_class_size;
2136 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EB)].iov_base
2137 = symbols_class_ob;
2138 iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EB)].iov_len
2139 = symbols_class_size;
2140 #endif
2142 /* Update idx array. */
2143 idx[0] = iov[0].iov_len + iov[1].iov_len;
2144 for (cnt = 1; cnt < nelems; ++cnt)
2145 idx[cnt] = idx[cnt - 1] + iov[1 + cnt].iov_len;
2147 write_locale_data (output_path, "LC_COLLATE", 2 + nelems, iov);
2149 obstack_free (&non_simple, NULL);
2150 obstack_free (&string_pool, NULL);
2154 static int
2155 collate_element_to (struct linereader *ldfile,
2156 struct locale_collate_t *collate,
2157 struct token *code, struct charmap_t *charmap,
2158 struct repertoire_t *repertoire)
2160 struct charseq *seq;
2161 uint32_t value;
2162 void *not_used;
2164 seq = charmap_find_value (charmap, code->val.str.start, code->val.str.len);
2165 if (seq != NULL)
2167 lr_error (ldfile, _("symbol for multicharacter collating element "
2168 "`%.*s' duplicates symbolic name in charmap"),
2169 (int) code->val.str.len, code->val.str.start);
2170 return 1;
2173 value = repertoire_find_value (repertoire, code->val.str.start,
2174 code->val.str.len);
2175 if (value != ILLEGAL_CHAR_VALUE)
2177 lr_error (ldfile, _("symbol for multicharacter collating element "
2178 "`%.*s' duplicates symbolic name in repertoire"),
2179 (int) code->val.str.len, code->val.str.start);
2180 return 1;
2183 if (find_entry (&collate->elements, code->val.str.start, code->val.str.len,
2184 &not_used) >= 0)
2186 lr_error (ldfile, _("symbol for multicharacter collating element "
2187 "`%.*s' duplicates other element definition"),
2188 (int) code->val.str.len, code->val.str.start);
2189 return 1;
2192 if (find_entry (&collate->elements, code->val.str.start, code->val.str.len,
2193 &not_used) >= 0)
2195 lr_error (ldfile, _("symbol for multicharacter collating element "
2196 "`%.*s' duplicates symbol definition"),
2197 (int) code->val.str.len, code->val.str.start);
2198 return 1;
2201 return 0;
2205 static void
2206 collate_element_from (struct linereader *ldfile,
2207 struct locale_collate_t *collate,
2208 const char *to_str, struct token *code,
2209 struct charmap_t *charmap,
2210 struct repertoire_t *repertoire)
2212 element_t *elemp, *runp;
2214 /* CODE is a string. */
2215 elemp = (element_t *) obstack_alloc (&collate->element_mem,
2216 sizeof (element_t));
2218 /* We have to translate the string. It may contain <...> character
2219 names. */
2220 elemp->namemb = code->val.str.startmb;
2221 elemp->namewc = code->val.str.startwc;
2222 elemp->this_weight = 0;
2223 elemp->ordering = NULL;
2224 elemp->ordering_len = 0;
2226 if (elemp->namemb == NULL && elemp->namewc == NULL)
2228 /* The string contains characters which are not in the charmap nor
2229 in the repertoire. Ignore the string. */
2230 if (verbose)
2231 lr_error (ldfile, _("\
2232 `from' string in collation element declaration contains unknown character"));
2233 return;
2236 /* The entries in the linked lists of RESULT are sorting in
2237 descending order. The order is important for the `strcoll' and
2238 `wcscoll' functions. */
2239 if (find_entry (&collate->resultwc, elemp->namewc, sizeof (uint32_t),
2240 (void *) &runp) >= 0)
2242 /* We already have an entry with this key. Check whether it is
2243 identical. */
2244 element_t *prevp = NULL;
2245 int cmpres;
2249 cmpres = wcscmp (elemp->namewc, runp->namewc);
2250 if (cmpres <= 0)
2251 break;
2252 prevp = runp;
2254 while ((runp = runp->next) != NULL);
2256 if (cmpres == 0)
2257 lr_error (ldfile, _("\
2258 duplicate collating element definition (repertoire)"));
2259 else
2261 elemp->next = runp;
2262 if (prevp == NULL)
2264 if (set_entry (&collate->resultwc, elemp->namewc,
2265 sizeof (uint32_t), elemp) < 0)
2266 error (EXIT_FAILURE, 0, _("\
2267 error while inserting collation element into hash table"));
2269 else
2270 prevp->next = elemp;
2273 else
2275 elemp->next = NULL;
2276 if (insert_entry (&collate->resultwc, elemp->namewc, sizeof (uint32_t),
2277 elemp) < 0)
2278 error (EXIT_FAILURE, errno, _("error while inserting to hash table"));
2281 /* Now also insert the element definition in the multibyte table. */
2282 if (find_entry (&collate->resultmb, elemp->namemb, 1, (void *) &runp) >= 0)
2284 /* We already have an entry with this key. Check whether it is
2285 identical. */
2286 element_t *prevp = NULL;
2287 int cmpres;
2291 cmpres = strcmp (elemp->namemb, runp->namemb);
2292 if (cmpres <= 0)
2293 break;
2294 prevp = runp;
2296 while ((runp = runp->next) != NULL);
2298 if (cmpres == 0)
2299 lr_error (ldfile, _("\
2300 duplicate collating element definition (charmap)"));
2301 else
2303 elemp->next = runp;
2304 if (prevp == NULL)
2306 if (set_entry (&collate->resultmb, elemp->namemb, 1, elemp) < 0)
2307 error (EXIT_FAILURE, 0, _("\
2308 error while inserting collation element into hash table"));
2310 else
2311 prevp->next = elemp;
2314 else
2316 elemp->next = NULL;
2317 if (insert_entry (&collate->resultmb, elemp->namemb, 1, elemp) < 0)
2318 error (EXIT_FAILURE, errno, _("error while inserting to hash table"));
2321 /* Finally install the mapping from the `to'-name to the `from'-name. */
2322 if (insert_entry (&collate->elements, to_str, strlen (to_str),
2323 (void *) elemp) < 0)
2324 lr_error (ldfile, _("cannot insert new collating symbol definition: %s"),
2325 strerror (errno));
2329 static void
2330 collate_symbol (struct linereader *ldfile, struct locale_collate_t *collate,
2331 struct token *code, struct charmap_t *charmap,
2332 struct repertoire_t *repertoire)
2334 uint32_t value;
2335 struct charseq *seq;
2336 void *not_used;
2338 seq = charset_find_value (charmap, code->val.str.start, code->val.str.len);
2339 if (seq != NULL)
2341 lr_error (ldfile, _("symbol for multicharacter collating element "
2342 "`%.*s' duplicates symbolic name in charmap"),
2343 (int) code->val.str.len, code->val.str.start);
2344 return;
2347 value = repertoire (repertoire, code->val.str.start, code->val.str.len);
2348 if (value != ILLEGAL_CHAR_VALUE)
2350 lr_error (ldfile, _("symbol for multicharacter collating element "
2351 "`%.*s' duplicates symbolic name in repertoire"),
2352 (int) code->val.str.len, code->val.str.start);
2353 return;
2356 if (find_entry (&collate->elements, code->val.str.start, code->val.str.len,
2357 &not_used) >= 0)
2359 lr_error (ldfile, _("symbol for multicharacter collating element "
2360 "`%.*s' duplicates element definition"),
2361 (int) code->val.str.len, code->val.str.start);
2362 return;
2365 if (find_entry (&collate->symbols, code->val.str.start, code->val.str.len,
2366 &not_used) >= 0)
2368 lr_error (ldfile, _("symbol for multicharacter collating element "
2369 "`%.*s' duplicates other symbol definition"),
2370 (int) code->val.str.len, code->val.str.start);
2371 return;
2374 if (insert_entry (&collate->symbols, code->val.str.start, code->val.str.len,
2375 (void *) 0) < 0)
2376 lr_error (ldfile, _("cannot insert new collating symbol definition: %s"),
2377 strerror (errno));
2381 void
2382 collate_new_order (struct linereader *ldfile, struct localedef_t *locale,
2383 enum coll_sort_rule sort_rule)
2385 struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
2387 if (collate->nrules >= collate->nrules_max)
2389 collate->nrules_max *= 2;
2390 collate->rules
2391 = (enum coll_sort_rule *) xrealloc (collate->rules,
2392 collate->nrules_max
2393 * sizeof (enum coll_sort_rule));
2396 collate->rules[collate->nrules++] = sort_rule;
2400 void
2401 collate_build_arrays (struct linereader *ldfile, struct localedef_t *locale)
2403 struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
2405 collate->rules
2406 = (enum coll_sort_rule *) xrealloc (collate->rules,
2407 collate->nrules
2408 * sizeof (enum coll_sort_rule));
2410 /* Allocate arrays for temporary weights. */
2411 collate->weight_cnt = (int *) xmalloc (collate->nrules * sizeof (int));
2413 /* Choose arbitrary start value for table size. */
2414 collate->nweight_max = 5 * collate->nrules;
2415 collate->weight = (int *) xmalloc (collate->nweight_max * sizeof (int));
2420 collate_order_elem (struct linereader *ldfile, struct localedef_t *locale,
2421 struct token *code, struct charset_t *charset)
2423 const uint32_t zero = L'\0';
2424 struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
2425 int result = 0;
2426 uint32_t value;
2427 void *tmp;
2428 unsigned int i;
2430 switch (code->tok)
2432 case tok_bsymbol:
2433 /* We have a string to find in one of the three hashing tables. */
2434 value = charset_find_value (&charset->char_table, code->val.str.start,
2435 code->val.str.len);
2436 if (value != ILLEGAL_CHAR_VALUE)
2438 element_t *lastp, *firstp;
2440 collate->kind = character;
2442 if (find_entry (&collate->result, &value, sizeof (uint32_t),
2443 (void *) &firstp) < 0)
2444 firstp = lastp = NULL;
2445 else
2447 /* The entry for the simple character is always found at
2448 the end. */
2449 lastp = firstp;
2450 while (lastp->next != NULL)
2451 lastp = lastp->next;
2453 if (lastp->name[0] == value && lastp->name[1] == L'\0')
2455 lr_error (ldfile,
2456 _("duplicate definition for character `%.*s'"),
2457 (int) code->val.str.len, code->val.str.start);
2458 lr_ignore_rest (ldfile, 0);
2459 result = -1;
2460 break;
2464 collate->current_element
2465 = (element_t *) obstack_alloc (&collate->element_mem,
2466 sizeof (element_t));
2468 obstack_grow (&collate->element_mem, &value, sizeof (value));
2469 obstack_grow (&collate->element_mem, &zero, sizeof (zero));
2471 collate->current_element->name =
2472 (const uint32_t *) obstack_finish (&collate->element_mem);
2474 collate->current_element->this_weight = ++collate->order_cnt;
2476 collate->current_element->next = NULL;
2478 if (firstp == NULL)
2480 if (insert_entry (&collate->result, &value, sizeof (uint32_t),
2481 (void *) collate->current_element) < 0)
2483 lr_error (ldfile, _("cannot insert collation element `%.*s'"),
2484 (int) code->val.str.len, code->val.str.start);
2485 exit (4);
2488 else
2489 lastp->next = collate->current_element;
2491 else if (find_entry (&collate->elements, code->val.str.start,
2492 code->val.str.len, &tmp) >= 0)
2494 collate->current_element = (element_t *) tmp;
2496 if (collate->current_element->this_weight != 0)
2498 lr_error (ldfile, _("\
2499 collation element `%.*s' appears more than once: ignore line"),
2500 (int) code->val.str.len, code->val.str.start);
2501 lr_ignore_rest (ldfile, 0);
2502 result = -1;
2503 break;
2506 collate->kind = element;
2507 collate->current_element->this_weight = ++collate->order_cnt;
2509 else if (find_entry (&collate->symbols, code->val.str.start,
2510 code->val.str.len, &tmp) >= 0)
2512 unsigned int order = ++collate->order_cnt;
2514 if ((unsigned long int) tmp != 0ul)
2516 lr_error (ldfile, _("\
2517 collation symbol `%.*s' appears more than once: ignore line"),
2518 (int) code->val.str.len, code->val.str.start);
2519 lr_ignore_rest (ldfile, 0);
2520 result = -1;
2521 break;
2524 collate->kind = symbol;
2526 if (set_entry (&collate->symbols, code->val.str.start,
2527 code->val.str.len, (void *) order) < 0)
2529 lr_error (ldfile, _("cannot process order specification"));
2530 exit (4);
2533 else
2535 if (verbose)
2536 lr_error (ldfile, _("unknown symbol `%.*s': line ignored"),
2537 (int) code->val.str.len, code->val.str.start);
2538 lr_ignore_rest (ldfile, 0);
2540 result = -1;
2542 break;
2544 case tok_undefined:
2545 collate->kind = undefined;
2546 collate->current_element = &collate->undefined;
2547 break;
2549 case tok_ellipsis:
2550 if (collate->was_ellipsis)
2552 lr_error (ldfile, _("\
2553 two lines in a row containing `...' are not allowed"));
2554 result = -1;
2556 else if (collate->kind != character)
2558 /* An ellipsis requires the previous line to be an
2559 character definition. */
2560 lr_error (ldfile, _("\
2561 line before ellipsis does not contain definition for character constant"));
2562 lr_ignore_rest (ldfile, 0);
2563 result = -1;
2565 else
2566 collate->kind = ellipsis;
2567 break;
2569 default:
2570 assert (! "illegal token in `collate_order_elem'");
2573 /* Now it's time to handle the ellipsis in the previous line. We do
2574 this only when the last line contained an definition for a
2575 character, the current line also defines an character, the
2576 character code for the later is bigger than the former. */
2577 if (collate->was_ellipsis)
2579 if (collate->kind != character)
2581 lr_error (ldfile, _("\
2582 line after ellipsis must contain character definition"));
2583 lr_ignore_rest (ldfile, 0);
2584 result = -1;
2586 else if (collate->last_char > value)
2588 lr_error (ldfile, _("end point of ellipsis range is bigger then start"));
2589 lr_ignore_rest (ldfile, 0);
2590 result = -1;
2592 else
2594 /* We can fill the arrays with the information we need. */
2595 uint32_t name[2];
2596 unsigned int *data;
2597 size_t *ptr;
2598 size_t cnt;
2600 name[0] = collate->last_char + 1;
2601 name[1] = L'\0';
2603 data = (unsigned int *) alloca ((collate->nrules + collate->nweight)
2604 * sizeof (unsigned int));
2605 ptr = (size_t *) alloca (collate->nrules * sizeof (size_t));
2607 /* Prepare data. Because the characters covered by an
2608 ellipsis all have equal values we prepare the data once
2609 and only change the variable number (if there are any).
2610 PTR[...] will point to the entries which will have to be
2611 fixed during the output loop. */
2612 for (cnt = 0; cnt < collate->nrules; ++cnt)
2614 data[cnt] = collate->weight_cnt[cnt];
2615 ptr[cnt] = (cnt == 0
2616 ? collate->nweight
2617 : ptr[cnt - 1] + collate->weight_cnt[cnt - 1]);
2620 for (cnt = 0; cnt < collate->nweight; ++cnt)
2621 data[collate->nrules + cnt] = collate->weight[cnt];
2623 for (cnt = 0; cnt < collate->nrules; ++cnt)
2624 if ((uint32_t) data[ptr[cnt]] != ELLIPSIS_CHAR)
2625 ptr[cnt] = 0;
2627 while (name[0] <= value)
2629 element_t *pelem;
2631 pelem = (element_t *) obstack_alloc (&collate->element_mem,
2632 sizeof (element_t));
2633 pelem->name
2634 = (const uint32_t *) obstack_copy (&collate->element_mem,
2635 name, 2 * sizeof (uint32_t));
2636 pelem->this_weight = ++collate->order_cnt;
2638 pelem->ordering_len = collate->nweight;
2639 pelem->ordering
2640 = (unsigned int *) obstack_copy (&collate->element_mem, data,
2641 (collate->nrules
2642 + pelem->ordering_len)
2643 * sizeof (unsigned int));
2645 /* `...' weights need to be adjusted. */
2646 for (cnt = 0; cnt < collate->nrules; ++cnt)
2647 if (ptr[cnt] != 0)
2648 pelem->ordering[ptr[cnt]] = pelem->this_weight;
2650 /* Insert new entry into result table. */
2651 if (find_entry (&collate->result, name, sizeof (uint32_t),
2652 (void *) &pelem->next) >= 0)
2654 if (set_entry (&collate->result, name, sizeof (uint32_t),
2655 (void *) pelem) < 0)
2656 error (4, 0, _("cannot insert into result table"));
2658 else
2660 pelem->next = NULL;
2661 if (insert_entry (&collate->result, name, sizeof (uint32_t),
2662 (void *) pelem) < 0)
2663 error (4, 0, _("cannot insert into result table"));
2666 /* Increment counter. */
2667 ++name[0];
2672 /* Reset counters for weights. */
2673 collate->weight_idx = 0;
2674 collate->nweight = 0;
2675 for (i = 0; i < collate->nrules; ++i)
2676 collate->weight_cnt[i] = 0;
2677 collate->current_patch = NULL;
2679 return result;
2684 collate_weight_bsymbol (struct linereader *ldfile, struct localedef_t *locale,
2685 struct token *code, struct charset_t *charset)
2687 struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
2688 unsigned int here_weight;
2689 uint32_t value;
2690 void *tmp;
2692 assert (code->tok == tok_bsymbol);
2694 value = charset_find_value (&charset->char_table, code->val.str.start,
2695 code->val.str.len);
2696 if (value != ILLEGAL_CHAR_VALUE)
2698 element_t *runp;
2700 if (find_entry (&collate->result, &value, sizeof (uint32_t),
2701 (void *)&runp) < 0)
2702 runp = NULL;
2704 while (runp != NULL
2705 && (runp->name[0] != value || runp->name[1] != L'\0'))
2706 runp = runp->next;
2708 here_weight = runp == NULL ? 0 : runp->this_weight;
2710 else if (find_entry (&collate->elements, code->val.str.start,
2711 code->val.str.len, &tmp) >= 0)
2713 element_t *runp = (element_t *) tmp;
2715 here_weight = runp->this_weight;
2717 else if (find_entry (&collate->symbols, code->val.str.start,
2718 code->val.str.len, &tmp) >= 0)
2720 here_weight = (unsigned int) tmp;
2722 else
2724 if (verbose)
2725 lr_error (ldfile, _("unknown symbol `%.*s': line ignored"),
2726 (int) code->val.str.len, code->val.str.start);
2727 lr_ignore_rest (ldfile, 0);
2728 return -1;
2731 /* When we currently work on a collation symbol we do not expect any
2732 weight. */
2733 if (collate->kind == symbol)
2735 lr_error (ldfile, _("\
2736 specification of sorting weight for collation symbol does not make sense"));
2737 lr_ignore_rest (ldfile, 0);
2738 return -1;
2741 /* Add to the current collection of weights. */
2742 if (collate->nweight >= collate->nweight_max)
2744 collate->nweight_max *= 2;
2745 collate->weight = (unsigned int *) xrealloc (collate->weight,
2746 collate->nweight_max);
2749 /* If the weight is currently not known, we remember to patch the
2750 resulting tables. */
2751 if (here_weight == 0)
2753 patch_t *newp;
2755 newp = (patch_t *) obstack_alloc (&collate->element_mem,
2756 sizeof (patch_t));
2757 newp->fname = ldfile->fname;
2758 newp->lineno = ldfile->lineno;
2759 newp->token = (const char *) obstack_copy0 (&collate->element_mem,
2760 code->val.str.start,
2761 code->val.str.len);
2762 newp->where.idx = collate->nweight++;
2763 newp->next = collate->current_patch;
2764 collate->current_patch = newp;
2766 else
2767 collate->weight[collate->nweight++] = here_weight;
2768 ++collate->weight_cnt[collate->weight_idx];
2770 return 0;
2775 collate_next_weight (struct linereader *ldfile, struct localedef_t *locale)
2777 struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
2779 if (collate->kind == symbol)
2781 lr_error (ldfile, _("\
2782 specification of sorting weight for collation symbol does not make sense"));
2783 lr_ignore_rest (ldfile, 0);
2784 return -1;
2787 ++collate->weight_idx;
2788 if (collate->weight_idx >= collate->nrules)
2790 lr_error (ldfile, _("too many weights"));
2791 lr_ignore_rest (ldfile, 0);
2792 return -1;
2795 return 0;
2800 collate_simple_weight (struct linereader *ldfile, struct localedef_t *locale,
2801 struct token *code, struct charset_t *charset)
2803 struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
2804 unsigned int value = 0;
2806 /* There current tokens can be `IGNORE', `...', or a string. */
2807 switch (code->tok)
2809 case tok_ignore:
2810 /* This token is allowed in all situations. */
2811 value = IGNORE_CHAR;
2812 break;
2814 case tok_ellipsis:
2815 /* The ellipsis is only allowed for the `...' or `UNDEFINED'
2816 entry. */
2817 if (collate->kind != ellipsis && collate->kind != undefined)
2819 lr_error (ldfile, _("\
2820 `...' must only be used in `...' and `UNDEFINED' entries"));
2821 lr_ignore_rest (ldfile, 0);
2822 return -1;
2824 value = ELLIPSIS_CHAR;
2825 break;
2827 case tok_string:
2828 /* This can become difficult. We have to get the weights which
2829 correspond to the single wide chars in the string. But some
2830 of the `chars' might not be real characters, but collation
2831 elements or symbols. And so the string decoder might have
2832 signaled errors. The string at this point is not translated.
2833 I.e., all <...> sequences are still there. */
2835 char *runp = code->val.str.start;
2836 void *tmp;
2838 while (*runp != '\0')
2840 char *startp = (char *) runp;
2841 char *putp = (char *) runp;
2842 uint32_t wch;
2844 /* Lookup weight for char and store it. */
2845 if (*runp == '<')
2847 while (*++runp != '\0' && *runp != '>')
2849 if (*runp == ldfile->escape_char)
2850 if (*++runp == '\0')
2852 lr_error (ldfile, _("unterminated weight name"));
2853 lr_ignore_rest (ldfile, 0);
2854 return -1;
2856 *putp++ = *runp;
2858 if (*runp == '>')
2859 ++runp;
2861 if (putp == startp)
2863 lr_error (ldfile, _("empty weight name: line ignored"));
2864 lr_ignore_rest (ldfile, 0);
2865 return -1;
2868 wch = charset_find_value (&charset->char_table, startp,
2869 putp - startp);
2870 if (wch != ILLEGAL_CHAR_VALUE)
2872 element_t *pelem;
2874 if (find_entry (&collate->result, &wch, sizeof (uint32_t),
2875 (void *)&pelem) < 0)
2876 pelem = NULL;
2878 while (pelem != NULL
2879 && (pelem->name[0] != wch
2880 || pelem->name[1] != L'\0'))
2881 pelem = pelem->next;
2883 value = pelem == NULL ? 0 : pelem->this_weight;
2885 else if (find_entry (&collate->elements, startp, putp - startp,
2886 &tmp) >= 0)
2888 element_t *pelem = (element_t *) tmp;
2890 value = pelem->this_weight;
2892 else if (find_entry (&collate->symbols, startp, putp - startp,
2893 &tmp) >= 0)
2895 value = (unsigned int) tmp;
2897 else
2899 if (verbose)
2900 lr_error (ldfile, _("unknown symbol `%.*s': line ignored"),
2901 (int) (putp - startp), startp);
2902 lr_ignore_rest (ldfile, 0);
2903 return -1;
2906 else
2908 element_t *wp;
2909 uint32_t wch;
2911 if (*runp == ldfile->escape_char)
2913 static const char digits[] = "0123456789abcdef";
2914 const char *dp;
2915 int base;
2917 ++runp;
2918 if (tolower (*runp) == 'x')
2920 ++runp;
2921 base = 16;
2923 else if (tolower (*runp) == 'd')
2925 ++runp;
2926 base = 10;
2928 else
2929 base = 8;
2931 dp = strchr (digits, tolower (*runp));
2932 if (dp == NULL || (dp - digits) >= base)
2934 illegal_char:
2935 lr_error (ldfile, _("\
2936 illegal character constant in string"));
2937 lr_ignore_rest (ldfile, 0);
2938 return -1;
2940 wch = dp - digits;
2941 ++runp;
2943 dp = strchr (digits, tolower (*runp));
2944 if (dp == NULL || (dp - digits) >= base)
2945 goto illegal_char;
2946 wch *= base;
2947 wch += dp - digits;
2948 ++runp;
2950 if (base != 16)
2952 dp = strchr (digits, tolower (*runp));
2953 if (dp != NULL && (dp - digits < base))
2955 wch *= base;
2956 wch += dp - digits;
2957 ++runp;
2961 else
2962 wch = (uint32_t) *runp++;
2964 /* Lookup the weight for WCH. */
2965 if (find_entry (&collate->result, &wch, sizeof (wch),
2966 (void *)&wp) < 0)
2967 wp = NULL;
2969 while (wp != NULL
2970 && (wp->name[0] != wch || wp->name[1] != L'\0'))
2971 wp = wp->next;
2973 value = wp == NULL ? 0 : wp->this_weight;
2975 /* To get the correct name for the error message. */
2976 putp = runp;
2978 /**************************************************\
2979 |* I know here is something wrong. Characters in *|
2980 |* the string which are not in the <...> form *|
2981 |* cannot be declared forward for now!!! *|
2982 \**************************************************/
2985 /* Store in weight array. */
2986 if (collate->nweight >= collate->nweight_max)
2988 collate->nweight_max *= 2;
2989 collate->weight
2990 = (unsigned int *) xrealloc (collate->weight,
2991 collate->nweight_max);
2994 if (value == 0)
2996 patch_t *newp;
2998 newp = (patch_t *) obstack_alloc (&collate->element_mem,
2999 sizeof (patch_t));
3000 newp->fname = ldfile->fname;
3001 newp->lineno = ldfile->lineno;
3002 newp->token
3003 = (const char *) obstack_copy0 (&collate->element_mem,
3004 startp, putp - startp);
3005 newp->where.idx = collate->nweight++;
3006 newp->next = collate->current_patch;
3007 collate->current_patch = newp;
3009 else
3010 collate->weight[collate->nweight++] = value;
3011 ++collate->weight_cnt[collate->weight_idx];
3014 return 0;
3016 default:
3017 assert (! "should not happen");
3021 if (collate->nweight >= collate->nweight_max)
3023 collate->nweight_max *= 2;
3024 collate->weight = (unsigned int *) xrealloc (collate->weight,
3025 collate->nweight_max);
3028 collate->weight[collate->nweight++] = value;
3029 ++collate->weight_cnt[collate->weight_idx];
3031 return 0;
3035 void
3036 collate_end_weight (struct linereader *ldfile, struct localedef_t *locale)
3038 struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
3039 element_t *pelem = collate->current_element;
3041 if (collate->kind == symbol)
3043 /* We don't have to do anything. */
3044 collate->was_ellipsis = 0;
3045 return;
3048 if (collate->kind == ellipsis)
3050 /* Before the next line is processed the ellipsis is handled. */
3051 collate->was_ellipsis = 1;
3052 return;
3055 assert (collate->kind == character || collate->kind == element
3056 || collate->kind == undefined);
3058 /* Fill in the missing weights. */
3059 while (++collate->weight_idx < collate->nrules)
3061 collate->weight[collate->nweight++] = pelem->this_weight;
3062 ++collate->weight_cnt[collate->weight_idx];
3065 /* Now we know how many ordering weights the current
3066 character/element has. Allocate room in the element structure
3067 and copy information. */
3068 pelem->ordering_len = collate->nweight;
3070 /* First we write an array with the number of values for each
3071 weight. */
3072 obstack_grow (&collate->element_mem, collate->weight_cnt,
3073 collate->nrules * sizeof (unsigned int));
3075 /* Now the weights itselves. */
3076 obstack_grow (&collate->element_mem, collate->weight,
3077 collate->nweight * sizeof (unsigned int));
3079 /* Get result. */
3080 pelem->ordering = obstack_finish (&collate->element_mem);
3082 /* Now we handle the "patches". */
3083 while (collate->current_patch != NULL)
3085 patch_t *this_patch;
3087 this_patch = collate->current_patch;
3089 this_patch->where.pos = &pelem->ordering[collate->nrules
3090 + this_patch->where.idx];
3092 collate->current_patch = this_patch->next;
3093 this_patch->next = collate->all_patches;
3094 collate->all_patches = this_patch;
3097 /* Set information for next round. */
3098 collate->was_ellipsis = 0;
3099 if (collate->kind != undefined)
3100 collate->last_char = pelem->name[0];
3104 /* The parser for the LC_CTYPE section of the locale definition. */
3105 void
3106 read_lc_collate (struct linereader *ldfile, struct localedef_t *result,
3107 struct charmap_t *charmap, struct repertoire_t *repertoire,
3108 int ignore_content)
3110 struct locale_collate_t *collate;
3111 int did_copy = 0;
3112 const char *save_str;
3114 /* The rest of the line containing `LC_COLLATE' must be free. */
3115 lr_ignore_rest (ldfile, 1);
3117 now = lr_token (ldfile, charmap, NULL);
3118 nowtok = now->tok;
3120 /* If we see `copy' now we are almost done. */
3121 if (nowtok == tok_copy)
3123 handle_copy (ldfile, charmap, repertoire, result, tok_lc_collate,
3124 LC_COLLATE, "LC_COLLATE", ignore_content);
3125 did_copy = 1;
3128 /* Prepare the data structures. */
3129 collate_startup (ldfile, result, charmap, ignore_content);
3130 collate = result->categories[LC_COLLATE].collate;
3132 while (1)
3134 /* Of course we don't proceed beyond the end of file. */
3135 if (nowtok == tok_eof)
3136 break;
3138 /* Ignore empty lines. */
3139 if (nowtok == tok_eol)
3141 now = lr_token (ldfile, charmap, NULL);
3142 nowtok = now->tok;
3143 continue;
3146 switch (nowtok)
3148 case tok_coll_weight_max:
3149 if (did_copy)
3150 goto err_label;
3151 /* The rest of the line must be a single integer value. */
3152 now = lr_token (ldfile, charmap, NULL);
3153 if (now->tok != tok_number)
3154 goto err_label;
3155 /* We simply forget about the value we just read, the implementation
3156 has no fixed limits. */
3157 lr_ignore_rest (ldfile, 1);
3158 break;
3160 case tok_script:
3161 if (did_copy)
3162 goto err_label;
3163 /* We expect the name of the script in brackets. */
3164 now = lr_token (ldfile, charmap, NULL);
3165 if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
3166 goto err_label;
3167 if (now->tok != tok_bsymbol)
3169 lr_error (ldfile, _("\
3170 script name `%s' must not duplicate any known name"),
3171 tok->val.str.startmb);
3172 lr_ignore_rest (ldfile, 0);
3173 break;
3175 collate->scripts = xmalloc (collate->scripts,
3176 (collate->nscripts
3177 * sizeof (const char *)));
3178 collate->scripts[collate->nscripts++] = tok->val.str.startmb;
3179 lr_ignore_rest (ldfile, 1);
3180 break;
3182 case tok_collating_element:
3183 if (did_copy)
3184 goto err_label;
3185 /* Get the first argument, a symbol in brackets. */
3186 now = lr_token (ldfile, charmap, NULL);
3187 if (now->tok != tok_bsymbol)
3188 goto err_label;
3189 /* Test it. */
3190 if (collate_element_to (ldfile, collate, now, charmap, repertoire))
3192 /* An error occurred. */
3193 lr_ignore_rest (ldfile, 0);
3194 break;
3196 save_str = tok->val.str.startmb;
3197 /* Next comes `from'. */
3198 now = lr_token (ldfile, charmap, NULL);
3199 if (now->tok != tok_from)
3200 goto err_label;
3201 /* Now comes a string. */
3202 now = lr_token (ldfile, charmap, repertoire);
3203 if (now->tok != tok_string)
3204 goto err_label;
3205 collate_element_from (ldfile, collate, save_str, now, charmap,
3206 repertoire);
3207 /* The rest of the line should be empty. */
3208 lr_ignore_rest (ldfile, 1);
3209 break;
3211 case tok_collating_symbol:
3212 if (did_copy)
3213 goto err_label;
3214 /* Get the argument, a single symbol in brackets. */
3215 now = lr_token (ldfile, charmap, NULL);
3216 if (now->tok != tok_bsymbol)
3217 goto err_label;
3218 collate_symbol (ldfile, collate, now, charmap, repertoire);
3219 break;
3221 case tok_order_start:
3222 if (did_copy)
3223 goto err_label;
3225 /* We expect now a scripting symbol or start right away
3226 with the order keywords. Or we have no argument at all
3227 in which means `forward'. */
3228 now = lr_token (ldfile, charmap, NULL);
3229 if (now->tok == tok_eol)
3231 static enum coll_sort_rule default_rule = sort_forward;
3232 /* Use a single `forward' rule. */
3233 collate->nrules = 1;
3234 collate->rules = &default_rule;
3236 else
3238 /* XXX We don't recognize the ISO 14651 extensions yet. */
3239 uint32_t nrules = 0;
3240 uint32_t nrules_max = 32;
3241 enum coll_sort_rule *rules = alloca (nrules_max
3242 * sizeof (*rules));
3243 int saw_semicolon = 0;
3245 memset (rules, '\0', nrules_max * sizeof (*rules));
3248 if (now->tok != tok_forward && now->tok != tok_backward
3249 && now->tok != tok_position)
3250 goto err_label;
3252 if (saw_semicolon)
3254 if (nrules == nrules_max)
3256 newp = alloca (nrules_max * 2 * sizeof (*rules));
3257 rules = memcpy (newp, rules,
3258 nrules_max * sizeof (*rules));
3259 memset (&rules[nrules_max], '\0',
3260 nrules_max * sizeof (*rules));
3261 nrules_max *= 2;
3263 ++nrules;
3266 switch (now->tok)
3268 case tok_forward:
3269 if ((rules[nrules] & sort_backward) != 0)
3271 lr_error (ldfile, _("\
3272 `forward' and `backward' order exclude each other"));
3273 lr_ignore_rest (ldfile, 0);
3274 goto error_sort;
3276 rules[nrules] |= sort_forward;
3277 break;
3278 case tok_backward:
3279 if ((rules[nrules] & sort_forward) != 0)
3281 lr_error (ldfile, _("\
3282 `forward' and `backward' order exclude each other"));
3283 lr_ignore_rest (ldfile, 0);
3284 goto error_sort;
3286 rules[nrules] |= sort_backward;
3287 break;
3288 case tok_position:
3289 rules[nrules] |= tok_position;
3290 break;
3293 /* Get the next token. This is either the end of the line,
3294 a comma or a semicolon. */
3295 now = lr_token (ldfile, charmap, NULL);
3296 if (now->tok == tok_comma || now->tok == tok_semicolon)
3298 saw_semicolon = now->tok == tok_semicolon;
3299 now = lr_token (ldfile, charmap, NULL);
3302 while (now->tok != tok_eol || now->tok != tok_eof);
3304 error_sort:
3305 collate->nrules = nrules;
3306 collate->rules = memcpy (xmalloc (nrules * sizeof (*rules)),
3307 rules, nrules * sizeof (*rules));
3310 /* Now read the rules. */
3311 read_rules (ldfile, collate, charmap, repertoire);
3312 break;
3314 case tok_reorder_after:
3315 break;
3317 case tok_reorder_script_after:
3318 break;
3320 default:
3321 err_label:
3322 if (now->tok != tok_eof)
3323 SYNTAX_ERROR (_("syntax error in %s locale definition"),
3324 "LC_COLLATE");
3327 /* Prepare for the next round. */
3328 now = lr_token (ldfile, charmap, NULL);
3329 nowtok = now->tok;
3332 /* When we come here we reached the end of the file. */
3333 lr_error (ldfile, _("premature end of file while reading category `%s'"),
3334 "LC_COLLATE");
3337 #endif