1 /* Copyright (C) 1995, 1996, 1997, 1998, 1999 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
28 #include "localeinfo.h"
29 #include "linereader.h"
31 #include "localedef.h"
33 /* Uncomment the following line in the production version. */
34 /* #define NDEBUG 1 */
37 #define obstack_chunk_alloc malloc
38 #define obstack_chunk_free free
40 /* Forward declaration. */
43 /* Data type for list of strings. */
46 struct section_list
*next
;
47 /* Name of the section. */
49 /* First element of this section. */
50 struct element_t
*first
;
51 /* Last element of this section. */
52 struct element_t
*last
;
53 /* These are the rules for this section. */
54 enum coll_sort_rule
*rules
;
57 /* Data type for collating element. */
64 struct element_t
**weights
;
66 /* Where does the definition come from. */
70 /* Which section does this belong to. */
71 struct section_list
*section
;
73 /* Predecessor and successor in the order list. */
74 struct element_t
*last
;
75 struct element_t
*next
;
78 /* Data type for collating symbol. */
81 /* Point to place in the order list. */
82 struct element_t
*order
;
84 /* Where does the definition come from. */
90 /* The real definition of the struct for the LC_COLLATE locale. */
91 struct locale_collate_t
96 /* List of known scripts. */
97 struct section_list
*sections
;
98 /* Current section using definition. */
99 struct section_list
*current_section
;
100 /* There always can be an unnamed section. */
101 struct section_list unnamed_section
;
102 /* To make handling of errors easier we have another section. */
103 struct section_list error_section
;
105 /* Number of sorting rules given in order_start line. */
108 /* Start of the order list. */
109 struct element_t
*start
;
111 /* The undefined element. */
112 struct element_t undefined
;
114 /* This is the cursor for `reorder_after' insertions. */
115 struct element_t
*cursor
;
117 /* Remember whether last weight was an ellipsis. */
120 /* Known collating elements. */
121 hash_table elem_table
;
123 /* Known collating symbols. */
124 hash_table sym_table
;
126 /* Known collation sequences. */
127 hash_table seq_table
;
129 struct obstack mempool
;
131 /* The LC_COLLATE category is a bit special as it is sometimes possible
132 that the definitions from more than one input file contains information.
133 Therefore we keep all relevant input in a list. */
134 struct locale_collate_t
*next
;
138 /* We have a few global variables which are used for reading all
139 LC_COLLATE category descriptions in all files. */
143 static struct section_list
*
144 make_seclist_elem (struct locale_collate_t
*collate
, const char *string
,
145 struct section_list
*next
)
147 struct section_list
*newp
;
149 newp
= (struct section_list
*) obstack_alloc (&collate
->mempool
,
159 static struct element_t
*
160 new_element (struct locale_collate_t
*collate
, const char *mbs
,
163 struct element_t
*newp
;
165 newp
= (struct element_t
*) obstack_alloc (&collate
->mempool
,
174 newp
->section
= NULL
;
183 static struct symbol_t
*
184 new_symbol (struct locale_collate_t
*collate
)
186 struct symbol_t
*newp
;
188 newp
= (struct symbol_t
*) obstack_alloc (&collate
->mempool
, sizeof (*newp
));
199 /* Test whether this name is already defined somewhere. */
201 check_duplicate (struct linereader
*ldfile
, struct locale_collate_t
*collate
,
202 struct charmap_t
*charmap
, struct repertoire_t
*repertoire
,
203 const char *symbol
, size_t symbol_len
)
207 if (find_entry (&charmap
->char_table
, symbol
, symbol_len
, &ignore
) == 0)
209 lr_error (ldfile
, _("`%s' already defined in charmap"), symbol
);
213 if (find_entry (&repertoire
->char_table
, symbol
, symbol_len
, &ignore
) == 0)
215 lr_error (ldfile
, _("`%s' already defined in repertoire"), symbol
);
219 if (find_entry (&collate
->sym_table
, symbol
, symbol_len
, &ignore
) == 0)
221 lr_error (ldfile
, _("`%s' already defined as collating symbol"), symbol
);
225 if (find_entry (&collate
->elem_table
, symbol
, symbol_len
, &ignore
) == 0)
227 lr_error (ldfile
, _("`%s' already defined as collating element"),
236 /* Read the direction specification. */
238 read_directions (struct linereader
*ldfile
, struct token
*arg
,
239 struct charmap_t
*charmap
, struct repertoire_t
*repertoire
,
240 struct locale_collate_t
*collate
)
243 int max
= nrules
?: 10;
244 enum coll_sort_rule
*rules
= calloc (max
, sizeof (*rules
));
251 if (arg
->tok
== tok_forward
)
253 if (rules
[cnt
] & sort_backward
)
257 lr_error (ldfile
, _("\
258 %s: `forward' and `backward' are mutually excluding each other"),
263 else if (rules
[cnt
] & sort_forward
)
267 lr_error (ldfile
, _("\
268 %s: `%s' mentioned twice in definition of weight %d"),
269 "LC_COLLATE", "forward", cnt
+ 1);
273 rules
[cnt
] |= sort_forward
;
277 else if (arg
->tok
== tok_backward
)
279 if (rules
[cnt
] & sort_forward
)
283 lr_error (ldfile
, _("\
284 %s: `forward' and `backward' are mutually excluding each other"),
289 else if (rules
[cnt
] & sort_backward
)
293 lr_error (ldfile
, _("\
294 %s: `%s' mentioned twice in definition of weight %d"),
295 "LC_COLLATE", "backward", cnt
+ 1);
299 rules
[cnt
] |= sort_backward
;
303 else if (arg
->tok
== tok_position
)
305 if (rules
[cnt
] & sort_position
)
309 lr_error (ldfile
, _("\
310 %s: `%s' mentioned twice in definition of weight %d in category `%s'"),
311 "LC_COLLATE", "position", cnt
+ 1);
315 rules
[cnt
] |= sort_position
;
321 arg
= lr_token (ldfile
, charmap
, repertoire
);
323 if (arg
->tok
== tok_eof
|| arg
->tok
== tok_eol
|| arg
->tok
== tok_comma
324 || arg
->tok
== tok_semicolon
)
326 if (! valid
&& ! warned
)
328 lr_error (ldfile
, _("%s: syntax error"), "LC_COLLATE");
332 /* See whether we have to increment the counter. */
333 if (arg
->tok
!= tok_comma
&& rules
[cnt
] != 0)
336 if (arg
->tok
== tok_eof
|| arg
->tok
== tok_eol
)
337 /* End of line or file, so we exit the loop. */
342 /* See whether we have enough room in the array. */
346 rules
= (enum coll_sort_rule
*) xrealloc (rules
,
349 memset (&rules
[cnt
], '\0', (max
- cnt
) * sizeof (*rules
));
356 /* There must not be any more rule. */
359 lr_error (ldfile
, _("\
360 %s: too many rules; first entry only had %d"),
361 "LC_COLLATE", nrules
);
365 lr_ignore_rest (ldfile
, 0);
374 lr_error (ldfile
, _("%s: syntax error"), "LC_COLLATE");
379 arg
= lr_token (ldfile
, charmap
, repertoire
);
384 /* Now we know how many rules we have. */
386 rules
= (enum coll_sort_rule
*) xrealloc (rules
,
387 nrules
* sizeof (*rules
));
393 /* Not enough rules in this specification. */
395 lr_error (ldfile
, _("%s: not enough sorting rules"), "LC_COLLATE");
398 rules
[cnt
] = sort_forward
;
399 while (++cnt
< nrules
);
403 collate
->current_section
->rules
= rules
;
408 insert_value (struct linereader
*ldfile
, struct token
*arg
,
409 struct charmap_t
*charmap
, struct repertoire_t
*repertoire
,
410 struct locale_collate_t
*collate
)
412 /* First find out what kind of symbol this is. */
415 struct element_t
*elem
= NULL
;
418 /* First determine the wide character. There must be such a value,
419 otherwise we ignore it (if it is no collatio symbol or element). */
420 wc
= repertoire_find_value (repertoire
, arg
->val
.str
.startmb
,
423 /* Try to find the character in the charmap. */
424 seq
= charmap_find_value (charmap
, arg
->val
.str
.startmb
, arg
->val
.str
.lenmb
);
426 if (wc
== ILLEGAL_CHAR_VALUE
&& seq
== NULL
)
428 /* It's no character, so look through the collation elements and
432 if (find_entry (&collate
->sym_table
, arg
->val
.str
.startmb
,
433 arg
->val
.str
.lenmb
, &result
) == 0)
435 /* It's a collation symbol. */
436 struct symbol_t
*sym
= (struct symbol_t
*) result
;
440 elem
= sym
->order
= new_element (collate
, arg
->val
.str
.startmb
,
441 arg
->val
.str
.startwc
);
443 else if (find_entry (&collate
->elem_table
, arg
->val
.str
.startmb
,
444 arg
->val
.str
.lenmb
, (void **) &elem
) != 0)
445 /* It's also no collation element. Therefore ignore it. */
450 /* Otherwise the symbols stands for an character. Make sure it is
451 not already in the table. */
456 /* XXX HACK HACK HACK */
459 /* Test whether this element is not already in the list. */
460 if (elem
->next
!= NULL
|| elem
->next
== collate
->cursor
)
462 lr_error (ldfile
, _("order for `%.*s' already defined at %s:%Z"),
463 arg
->val
.str
.lenmb
, arg
->val
.str
.startmb
,
464 elem
->file
, elem
->line
);
468 /* Initialize all the fields. */
469 elem
->file
= ldfile
->fname
;
470 elem
->line
= ldfile
->lineno
;
471 elem
->last
= collate
->cursor
;
472 elem
->next
= collate
->cursor
? collate
->cursor
->next
: NULL
;
473 elem
->weights
= (struct element_t
**)
474 obstack_alloc (&collate
->mempool
, nrules
* sizeof (struct element_t
*));
475 memset (elem
->weights
, '\0', nrules
* sizeof (struct element_t
*));
477 if (collate
->current_section
->first
== NULL
)
478 collate
->current_section
->first
= elem
;
479 if (collate
->current_section
->last
== collate
->cursor
)
480 collate
->current_section
->last
= elem
;
482 collate
->cursor
= elem
;
484 /* Now read the rest of the line. */
485 ldfile
->return_widestr
= 1;
490 arg
= lr_token (ldfile
, charmap
, repertoire
);
492 if (arg
->tok
== tok_eof
|| arg
->tok
== tok_eol
)
494 /* This means the rest of the line uses the current element
497 elem
->weights
[weight_cnt
] = elem
;
498 while (++weight_cnt
< nrules
);
503 if (arg
->tok
== tok_ignore
)
505 /* The weight for this level has to be ignored. We use the
506 null pointer to indicate this. */
508 else if (arg
->tok
== tok_bsymbol
)
513 while (++weight_cnt
< nrules
);
515 lr_ignore_rest (ldfile
, weight_cnt
== nrules
);
520 collate_startup (struct linereader
*ldfile
, struct localedef_t
*locale
,
525 struct locale_collate_t
*collate
;
527 collate
= locale
->categories
[LC_COLLATE
].collate
=
528 (struct locale_collate_t
*) xcalloc (1,
529 sizeof (struct locale_collate_t
));
531 /* Init the various data structures. */
532 init_hash (&collate
->elem_table
, 100);
533 init_hash (&collate
->sym_table
, 100);
534 init_hash (&collate
->seq_table
, 500);
535 obstack_init (&collate
->mempool
);
537 collate
->col_weight_max
= -1;
540 ldfile
->translate_strings
= 1;
541 ldfile
->return_widestr
= 0;
546 collate_finish (struct localedef_t
*locale
, struct charmap_t
*charmap
)
552 collate_output (struct localedef_t
*locale
, struct charmap_t
*charmap
,
553 const char *output_path
)
559 collate_read (struct linereader
*ldfile
, struct localedef_t
*result
,
560 struct charmap_t
*charmap
, const char *repertoire_name
,
563 struct repertoire_t
*repertoire
= NULL
;
564 struct locale_collate_t
*collate
;
569 int was_ellipsis
= 0;
571 /* Get the repertoire we have to use. */
572 if (repertoire_name
!= NULL
)
573 repertoire
= repertoire_read (repertoire_name
);
575 /* The rest of the line containing `LC_COLLATE' must be free. */
576 lr_ignore_rest (ldfile
, 1);
580 now
= lr_token (ldfile
, charmap
, NULL
);
583 while (nowtok
== tok_eol
);
585 if (nowtok
== tok_copy
)
588 now
= lr_token (ldfile
, charmap
, NULL
);
589 if (now
->tok
!= tok_string
)
591 /* XXX Use the name */
592 lr_ignore_rest (ldfile
, 1);
594 now
= lr_token (ldfile
, charmap
, NULL
);
598 /* Prepare the data structures. */
599 collate_startup (ldfile
, result
, ignore_content
);
600 collate
= result
->categories
[LC_COLLATE
].collate
;
604 /* Of course we don't proceed beyond the end of file. */
605 if (nowtok
== tok_eof
)
608 /* Ingore empty lines. */
609 if (nowtok
== tok_eol
)
611 now
= lr_token (ldfile
, charmap
, NULL
);
618 case tok_coll_weight_max
:
619 /* Ignore the rest of the line if we don't need the input of
623 lr_ignore_rest (ldfile
, 0);
630 arg
= lr_token (ldfile
, charmap
, NULL
);
631 if (arg
->tok
!= tok_number
)
633 if (collate
->col_weight_max
!= -1)
634 lr_error (ldfile
, _("%s: duplicate definition of `%s'"),
635 "LC_COLLATE", "col_weight_max");
637 collate
->col_weight_max
= arg
->val
.num
;
638 lr_ignore_rest (ldfile
, 1);
641 case tok_section_symbol
:
642 /* Ignore the rest of the line if we don't need the input of
646 lr_ignore_rest (ldfile
, 0);
653 arg
= lr_token (ldfile
, charmap
, repertoire
);
654 if (arg
->tok
!= tok_bsymbol
)
656 else if (!ignore_content
)
658 /* Check whether this section is already known. */
659 struct section_list
*known
= collate
->sections
;
660 while (known
!= NULL
)
661 if (strcmp (known
->name
, arg
->val
.str
.startmb
) == 0)
667 _("%s: duplicate declaration of section `%s'"),
668 "LC_COLLATE", arg
->val
.str
.startmb
);
669 free (arg
->val
.str
.startmb
);
672 collate
->sections
= make_seclist_elem (collate
,
673 arg
->val
.str
.startmb
,
676 lr_ignore_rest (ldfile
, known
== NULL
);
680 free (arg
->val
.str
.startmb
);
681 lr_ignore_rest (ldfile
, 0);
685 case tok_collating_element
:
686 /* Ignore the rest of the line if we don't need the input of
690 lr_ignore_rest (ldfile
, 0);
697 arg
= lr_token (ldfile
, charmap
, repertoire
);
698 if (arg
->tok
!= tok_bsymbol
)
702 const char *symbol
= arg
->val
.str
.startmb
;
703 size_t symbol_len
= arg
->val
.str
.lenmb
;
705 /* Next the `from' keyword. */
706 arg
= lr_token (ldfile
, charmap
, repertoire
);
707 if (arg
->tok
!= tok_from
)
709 free ((char *) symbol
);
713 ldfile
->return_widestr
= 1;
715 /* Finally the string with the replacement. */
716 arg
= lr_token (ldfile
, charmap
, repertoire
);
717 ldfile
->return_widestr
= 0;
718 if (arg
->tok
!= tok_string
)
724 lr_error (ldfile
, _("\
725 %s: unknown character in collating element name"),
727 if (arg
->val
.str
.startmb
== NULL
)
728 lr_error (ldfile
, _("\
729 %s: unknown character in collating element definition"),
731 if (arg
->val
.str
.startwc
== NULL
)
732 lr_error (ldfile
, _("\
733 %s: unknown wide character in collating element definition"),
735 else if (arg
->val
.str
.lenwc
< 2)
736 lr_error (ldfile
, _("\
737 %s: substitution string in collating element definition must have at least two characters"),
742 /* The name is already defined. */
743 if (check_duplicate (ldfile
, collate
, charmap
,
744 repertoire
, symbol
, symbol_len
))
747 if (insert_entry (&collate
->elem_table
,
749 new_element (collate
,
750 arg
->val
.str
.startmb
,
751 arg
->val
.str
.startwc
))
753 lr_error (ldfile
, _("\
754 error while adding collating element"));
763 free ((char *) symbol
);
764 if (arg
->val
.str
.startmb
!= NULL
)
765 free (arg
->val
.str
.startmb
);
766 if (arg
->val
.str
.startwc
!= NULL
)
767 free (arg
->val
.str
.startwc
);
769 lr_ignore_rest (ldfile
, 1);
773 case tok_collating_symbol
:
774 /* Ignore the rest of the line if we don't need the input of
778 lr_ignore_rest (ldfile
, 0);
785 arg
= lr_token (ldfile
, charmap
, repertoire
);
786 if (arg
->tok
!= tok_bsymbol
)
790 const char *symbol
= arg
->val
.str
.startmb
;
791 size_t symbol_len
= arg
->val
.str
.lenmb
;
796 lr_error (ldfile
, _("\
797 %s: unknown character in collating symbol name"),
801 /* The name is already defined. */
802 if (check_duplicate (ldfile
, collate
, charmap
,
803 repertoire
, symbol
, symbol_len
))
806 if (insert_entry (&collate
->sym_table
,
808 new_symbol (collate
)) < 0)
809 lr_error (ldfile
, _("\
810 error while adding collating symbol"));
817 free ((char *) symbol
);
819 lr_ignore_rest (ldfile
, 1);
823 case tok_symbol_equivalence
:
824 /* Ignore the rest of the line if we don't need the input of
828 lr_ignore_rest (ldfile
, 0);
835 arg
= lr_token (ldfile
, charmap
, repertoire
);
836 if (arg
->tok
!= tok_bsymbol
)
840 const char *newname
= arg
->val
.str
.startmb
;
841 size_t newname_len
= arg
->val
.str
.lenmb
;
844 struct symbol_t
*symval
;
846 arg
= lr_token (ldfile
, charmap
, repertoire
);
847 if (arg
->tok
!= tok_bsymbol
)
850 free ((char *) newname
);
854 symname
= arg
->val
.str
.startmb
;
855 symname_len
= arg
->val
.str
.lenmb
;
861 lr_error (ldfile
, _("\
862 %s: unknown character in equivalent definition name"),
868 lr_error (ldfile
, _("\
869 %s: unknown character in equivalent definition value"),
873 /* The name is already defined. */
874 if (check_duplicate (ldfile
, collate
, charmap
,
875 repertoire
, symname
, symname_len
))
878 /* See whether the symbol name is already defined. */
879 if (find_entry (&collate
->sym_table
, symname
, symname_len
,
880 (void **) &symval
) != 0)
882 lr_error (ldfile
, _("\
883 %s: unknown symbol `%s' in equivalent definition"),
884 "LC_COLLATE", symname
);
888 if (insert_entry (&collate
->sym_table
,
889 newname
, newname_len
, symval
) < 0)
891 lr_error (ldfile
, _("\
892 error while adding equivalent collating symbol"));
896 free ((char *) symname
);
902 free ((char *) newname
);
904 free ((char *) symname
);
906 lr_ignore_rest (ldfile
, 1);
910 case tok_order_start
:
911 /* Ignore the rest of the line if we don't need the input of
915 lr_ignore_rest (ldfile
, 0);
919 if (state
!= 0 && state
!= 1)
923 /* The 14652 draft does not specify whether all `order_start' lines
924 must contain the same number of sort-rules, but 14651 does. So
925 we require this here as well. */
926 arg
= lr_token (ldfile
, charmap
, repertoire
);
927 if (arg
->tok
== tok_bsymbol
)
929 /* This better should be a section name. */
930 struct section_list
*sp
= collate
->sections
;
932 && strcmp (sp
->name
, arg
->val
.str
.startmb
) != 0)
937 lr_error (ldfile
, _("\
938 %s: unknown section name `%s'"),
939 "LC_COLLATE", arg
->val
.str
.startmb
);
940 /* We use the error section. */
941 collate
->current_section
= &collate
->error_section
;
945 /* Remember this section. */
946 collate
->current_section
= sp
;
948 /* One should not be allowed to open the same
950 if (sp
->first
!= NULL
)
951 lr_error (ldfile
, _("\
952 %s: multiple order definitions for section `%s'"),
953 "LC_COLLATE", sp
->name
);
955 /* Next should come the end of the line or a semicolon. */
956 arg
= lr_token (ldfile
, charmap
, repertoire
);
957 if (arg
->tok
== tok_eol
)
961 /* This means we have exactly one rule: `forward'. */
962 if (collate
->nrules
> 1)
963 lr_error (ldfile
, _("\
964 %s: invalid number of sorting rules"),
968 sp
->rules
= obstack_alloc (&collate
->mempool
,
969 (sizeof (enum coll_sort_rule
)
971 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
972 sp
->rules
[cnt
] = sort_forward
;
978 /* Get the next token. */
979 arg
= lr_token (ldfile
, charmap
, repertoire
);
984 /* There is no section symbol. Therefore we use the unnamed
986 collate
->current_section
= &collate
->unnamed_section
;
988 if (collate
->unnamed_section
.first
!= NULL
)
989 lr_error (ldfile
, _("\
990 %s: multiple order definitions for unnamed section"),
994 /* Now read the direction names. */
995 read_directions (ldfile
, arg
, charmap
, repertoire
, collate
);
999 /* Ignore the rest of the line if we don't need the input of
1003 lr_ignore_rest (ldfile
, 0);
1010 lr_ignore_rest (ldfile
, 1);
1013 case tok_reorder_after
:
1014 /* Ignore the rest of the line if we don't need the input of
1018 lr_ignore_rest (ldfile
, 0);
1022 if (state
!= 2 && state
!= 3)
1025 /* XXX get symbol */
1028 case tok_reorder_end
:
1029 /* Ignore the rest of the line if we don't need the input of
1037 lr_ignore_rest (ldfile
, 1);
1041 /* Ignore the rest of the line if we don't need the input of
1045 lr_ignore_rest (ldfile
, 0);
1049 if (state
!= 1 && state
!= 3)
1054 /* It is possible that we already have this collation sequence.
1055 In this case we move the entry. */
1056 struct element_t
*seqp
;
1058 if (find_entry (&collate
->seq_table
, arg
->val
.str
.startmb
,
1059 arg
->val
.str
.lenmb
, (void **) &seqp
) == 0)
1061 /* Remove the entry from the old position. */
1062 if (seqp
->last
== NULL
)
1063 collate
->start
= seqp
->next
;
1065 seqp
->last
->next
= seqp
->next
;
1066 if (seqp
->next
!= NULL
)
1067 seqp
->next
->last
= seqp
->last
;
1069 /* We also have to check whether this entry is the
1070 first or last of a section. */
1071 if (seqp
->section
->first
== seqp
)
1073 if (seqp
->section
->first
== seqp
->section
->last
)
1074 /* This setion has no content anymore. */
1075 seqp
->section
->first
= seqp
->section
->last
= NULL
;
1077 seqp
->section
->first
= seqp
->next
;
1079 else if (seqp
->section
->last
== seqp
)
1080 seqp
->section
->last
= seqp
->last
;
1082 seqp
->last
= seqp
->next
= NULL
;
1086 /* Now insert in the new place. */
1087 insert_value (ldfile
, arg
, charmap
, repertoire
, collate
);
1091 /* Ignore the rest of the line if we don't need the input of
1095 lr_ignore_rest (ldfile
, 0);
1101 /* XXX handle UNDEFINED weight */
1105 /* Ignore the rest of the line if we don't need the input of
1109 lr_ignore_rest (ldfile
, 0);
1113 if (state
!= 1 && state
!= 3)
1117 /* XXX Read the remainder of the line and remember what are
1122 /* Next we assume `LC_COLLATE'. */
1123 if (!ignore_content
)
1126 /* We must either see a copy statement or have
1129 _("%s: empty category description not allowed"),
1131 else if (state
== 1)
1132 lr_error (ldfile
, _("%s: missing `order_end' keyword"),
1134 else if (state
== 3)
1135 error (0, 0, _("%s: missing `reorder-end' keyword"),
1138 arg
= lr_token (ldfile
, charmap
, NULL
);
1139 if (arg
->tok
== tok_eof
)
1141 if (arg
->tok
== tok_eol
)
1142 lr_error (ldfile
, _("%s: incomplete `END' line"), "LC_COLLATE");
1143 else if (arg
->tok
!= tok_lc_collate
)
1144 lr_error (ldfile
, _("\
1145 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
1146 lr_ignore_rest (ldfile
, arg
->tok
== tok_lc_collate
);
1151 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
1154 /* Prepare for the next round. */
1155 now
= lr_token (ldfile
, charmap
, NULL
);
1159 /* When we come here we reached the end of the file. */
1160 lr_error (ldfile
, _("%s: premature end of file"), "LC_COLLATE");
1166 /* What kind of symbols get defined? */
1177 typedef struct patch_t
1187 struct patch_t
*next
;
1191 typedef struct element_t
1194 const uint32_t *namewc
;
1195 unsigned int this_weight
;
1197 struct element_t
*next
;
1199 unsigned int *ordering
;
1200 size_t ordering_len
;
1204 /* The real definition of the struct for the LC_COLLATE locale. */
1205 struct locale_collate_t
1207 /* Collate symbol table. Simple mapping to number. */
1210 /* The collation elements. */
1211 hash_table elements
;
1212 struct obstack element_mem
;
1214 /* The result tables. */
1215 hash_table resultmb
;
1216 hash_table resultwc
;
1218 /* Sorting rules given in order_start line. */
1220 enum coll_sort_rule
*rules
;
1222 /* Used while recognizing symbol composed of multiple tokens
1223 (collating-element). */
1224 const char *combine_token
;
1225 size_t combine_token_len
;
1227 /* How many sorting order specifications so far. */
1228 unsigned int order_cnt
;
1230 /* Was lastline ellipsis? */
1232 /* Value of last entry if was character. */
1234 /* Current element. */
1235 element_t
*current_element
;
1236 /* What kind of symbol is current element. */
1237 enum coll_symbol kind
;
1240 patch_t
*current_patch
;
1241 patch_t
*all_patches
;
1243 /* Room for the UNDEFINED information. */
1244 element_t undefined
;
1245 unsigned int undefined_len
;
1247 /* Script information. */
1248 const char **scripts
;
1249 unsigned int nscripts
;
1253 /* Be verbose? Defined in localedef.c. */
1258 #define obstack_chunk_alloc malloc
1259 #define obstack_chunk_free free
1262 /* Prototypes for local functions. */
1263 static void collate_startup (struct linereader
*ldfile
,
1264 struct localedef_t
*locale
,
1265 struct charmap_t
*charmap
, int ignore_content
);
1269 collate_startup (struct linereader
*ldfile
, struct localedef_t
*locale
,
1270 struct charmap_t
*charset
, int ignore_content
)
1272 struct locale_collate_t
*collate
;
1274 /* Allocate the needed room. */
1275 locale
->categories
[LC_COLLATE
].collate
= collate
=
1276 (struct locale_collate_t
*) xmalloc (sizeof (struct locale_collate_t
));
1278 /* Allocate hash table for collating elements. */
1279 if (init_hash (&collate
->elements
, 512))
1280 error (4, 0, _("memory exhausted"));
1281 collate
->combine_token
= NULL
;
1282 obstack_init (&collate
->element_mem
);
1284 /* Allocate hash table for collating elements. */
1285 if (init_hash (&collate
->symbols
, 64))
1286 error (4, 0, _("memory exhausted"));
1288 /* Allocate hash table for result. */
1289 if (init_hash (&collate
->result
, 512))
1290 error (4, 0, _("memory exhausted"));
1292 collate
->nrules
= 0;
1293 collate
->nrules_max
= 10;
1295 = (enum coll_sort_rule
*) xmalloc (collate
->nrules_max
1296 * sizeof (enum coll_sort_rule
));
1298 collate
->order_cnt
= 1; /* The smallest weight is 2. */
1300 collate
->was_ellipsis
= 0;
1301 collate
->last_char
= L
'\0'; /* 0 because leading ellipsis is allowed. */
1303 collate
->all_patches
= NULL
;
1305 /* This tells us no UNDEFINED entry was found until now. */
1306 memset (&collate
->undefined
, '\0', sizeof (collate
->undefined
));
1308 ldfile
->translate_strings
= 0;
1309 ldfile
->return_widestr
= 0;
1314 collate_finish (struct localedef_t
*locale
, struct charset_t
*charset
,
1315 struct repertoire_t
*repertoire
)
1317 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1321 /* Patch the constructed table so that forward references are
1322 correctly filled. */
1323 for (patch
= collate
->all_patches
; patch
!= NULL
; patch
= patch
->next
)
1326 size_t toklen
= strlen (patch
->token
);
1328 unsigned int value
= 0;
1330 wch
= charset_find_value (&charset
->char_table
, patch
->token
, toklen
);
1331 if (wch
!= ILLEGAL_CHAR_VALUE
)
1335 if (find_entry (&collate
->result
, &wch
, sizeof (uint32_t),
1336 (void *) &runp
) < 0)
1338 for (; runp
!= NULL
; runp
= runp
->next
)
1339 if (runp
->name
[0] == wch
&& runp
->name
[1] == L
'\0')
1342 value
= runp
== NULL
? 0 : runp
->this_weight
;
1344 else if (find_entry (&collate
->elements
, patch
->token
, toklen
, &ptmp
)
1347 value
= ((element_t
*) ptmp
)->this_weight
;
1349 else if (find_entry (&collate
->symbols
, patch
->token
, toklen
, &ptmp
)
1352 value
= (unsigned long int) ptmp
;
1360 error_at_line (0, 0, patch
->fname
, patch
->lineno
,
1361 _("no weight defined for symbol `%s'"),
1365 *patch
->where
.pos
= value
;
1368 /* If no definition for UNDEFINED is given, all characters in the
1369 given charset must be specified. */
1370 if (collate
->undefined
.ordering
== NULL
)
1372 /**************************************************************\
1373 |* XXX We should test whether really an unspecified character *|
1374 |* exists before giving the message. *|
1375 \**************************************************************/
1379 error (0, 0, _("no definition of `UNDEFINED'"));
1381 collate
->undefined
.ordering_len
= collate
->nrules
;
1382 weight
= ++collate
->order_cnt
;
1384 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
1387 obstack_grow (&collate
->element_mem
, &one
, sizeof (one
));
1390 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
1391 obstack_grow (&collate
->element_mem
, &weight
, sizeof (weight
));
1393 collate
->undefined
.ordering
= obstack_finish (&collate
->element_mem
);
1396 collate
->undefined_len
= 2; /* For the name: 1 x uint32_t + L'\0'. */
1397 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
1398 collate
->undefined_len
+= 1 + collate
->undefined
.ordering
[cnt
];
1404 collate_output (struct localedef_t
*locale
, struct charset_t
*charset
,
1405 struct repertoire_t
*repertoire
, const char *output_path
)
1407 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1408 uint32_t table_size
, table_best
, level_best
, sum_best
;
1413 const size_t nelems
= _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE
);
1414 struct iovec iov
[2 + nelems
];
1415 struct locale_file data
;
1416 uint32_t idx
[nelems
];
1417 struct obstack non_simple
;
1418 struct obstack string_pool
;
1419 size_t cnt
, entry_size
;
1420 uint32_t undefined_offset
= UINT_MAX
;
1421 uint32_t *table
, *extra
, *table2
, *extra2
;
1423 uint32_t element_hash_tab_size
;
1424 uint32_t *element_hash_tab
;
1425 uint32_t *element_hash_tab_ob
;
1426 uint32_t element_string_pool_size
;
1427 char *element_string_pool
;
1428 uint32_t element_value_size
;
1429 uint32_t *element_value
;
1430 uint32_t *element_value_ob
;
1431 uint32_t symbols_hash_tab_size
;
1432 uint32_t *symbols_hash_tab
;
1433 uint32_t *symbols_hash_tab_ob
;
1434 uint32_t symbols_string_pool_size
;
1435 char *symbols_string_pool
;
1436 uint32_t symbols_class_size
;
1437 uint32_t *symbols_class
;
1438 uint32_t *symbols_class_ob
;
1439 hash_table
*hash_tab
;
1440 unsigned int dummy_weights
[collate
->nrules
+ 1];
1442 sum_best
= UINT_MAX
;
1443 table_best
= 0xffff;
1444 level_best
= 0xffff;
1446 /* Compute table size. */
1449 Computing table size for collation information might take a while..."),
1451 for (table_size
= 256; table_size
< sum_best
; ++table_size
)
1453 size_t hits
[table_size
];
1454 unsigned int worst
= 1;
1459 for (cnt
= 0; cnt
< 256; ++cnt
)
1461 memset (&hits
[256], '\0', sizeof (hits
) - 256 * sizeof (size_t));
1463 while (iterate_table (&collate
->result
, &last
, (const void **) &name
,
1464 &len
, (void **) &pelem
) >= 0)
1465 if (pelem
->ordering
!= NULL
&& pelem
->name
[0] > 0xff)
1466 if (++hits
[(unsigned int) pelem
->name
[0] % table_size
] > worst
)
1468 worst
= hits
[(unsigned int) pelem
->name
[0] % table_size
];
1469 if (table_size
* worst
> sum_best
)
1473 if (table_size
* worst
< sum_best
)
1475 sum_best
= table_size
* worst
;
1476 table_best
= table_size
;
1480 assert (table_best
!= 0xffff || level_best
!= 0xffff);
1482 fputs (_(" done\n"), stderr
);
1484 obstack_init (&non_simple
);
1485 obstack_init (&string_pool
);
1487 data
.magic
= LIMAGIC (LC_COLLATE
);
1489 iov
[0].iov_base
= (void *) &data
;
1490 iov
[0].iov_len
= sizeof (data
);
1492 iov
[1].iov_base
= (void *) idx
;
1493 iov
[1].iov_len
= sizeof (idx
);
1495 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_NRULES
)].iov_base
= &collate
->nrules
;
1496 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_NRULES
)].iov_len
= sizeof (uint32_t);
1498 table
= (uint32_t *) alloca (collate
->nrules
* sizeof (uint32_t));
1499 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_RULES
)].iov_base
= table
;
1500 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_RULES
)].iov_len
1501 = collate
->nrules
* sizeof (uint32_t);
1502 /* Another trick here. Describing the collation method needs only a
1503 few bits (3, to be exact). But the binary file should be
1504 accessible by machines with both endianesses and so we store both
1505 forms in the same word. */
1506 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
1507 table
[cnt
] = collate
->rules
[cnt
] | bswap_32 (collate
->rules
[cnt
]);
1509 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE
)].iov_base
= &table_best
;
1510 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE
)].iov_len
= sizeof (uint32_t);
1512 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS
)].iov_base
= &level_best
;
1513 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS
)].iov_len
1514 = sizeof (uint32_t);
1516 entry_size
= 1 + MAX (collate
->nrules
, 2);
1518 table
= (uint32_t *) alloca (table_best
* level_best
* entry_size
1519 * sizeof (table
[0]));
1520 memset (table
, '\0', table_best
* level_best
* entry_size
1521 * sizeof (table
[0]));
1524 /* Macros for inserting in output table. */
1525 #define ADD_VALUE(expr) \
1527 uint32_t to_write = (uint32_t) expr; \
1528 obstack_grow (&non_simple, &to_write, sizeof (to_write)); \
1531 #define ADD_ELEMENT(pelem, len) \
1537 wlen = wcslen (pelem->name); \
1538 obstack_grow (&non_simple, pelem->name, (wlen + 1) * sizeof (uint32_t)); \
1540 idx = collate->nrules; \
1541 for (cnt = 0; cnt < collate->nrules; ++cnt) \
1545 ADD_VALUE (pelem->ordering[cnt]); \
1546 for (disp = 0; disp < pelem->ordering[cnt]; ++disp) \
1547 ADD_VALUE (pelem->ordering[idx++]); \
1551 #define ADD_FORWARD(pelem) \
1553 /* We leave a reference in the main table and put all \
1554 information in the table for the extended entries. */ \
1556 element_t *has_simple = NULL; \
1559 table[(level * table_best + slot) * entry_size + 1] \
1561 table[(level * table_best + slot) * entry_size + 2] \
1562 = obstack_object_size (&non_simple) / sizeof (uint32_t); \
1564 /* Here we have to construct the non-simple table entry. First \
1565 compute the total length of this entry. */ \
1566 for (runp = (pelem); runp != NULL; runp = runp->next) \
1567 if (runp->ordering != NULL) \
1572 value = 1 + wcslen (runp->name) + 1; \
1574 for (cnt = 0; cnt < collate->nrules; ++cnt) \
1575 /* We have to take care for entries without ordering \
1576 information. While reading them they get inserted in the \
1577 table and later not removed when something goes wrong with \
1578 reading its weights. */ \
1579 value += 1 + runp->ordering[cnt]; \
1581 if (runp->name[1] == L'\0') \
1582 has_simple = runp; \
1584 ADD_ELEMENT (runp, value); \
1587 if (has_simple == NULL) \
1591 ADD_VALUE (collate->undefined_len + 1); \
1593 /* Add the name. */ \
1594 ADD_VALUE ((pelem)->name[0]); \
1597 idx = collate->nrules; \
1598 for (cnt = 0; cnt < collate->nrules; ++cnt) \
1602 ADD_VALUE (collate->undefined.ordering[cnt]); \
1603 for (disp = 0; disp < collate->undefined.ordering[cnt]; ++disp) \
1605 if ((uint32_t) collate->undefined.ordering[idx] \
1607 ADD_VALUE ((pelem)->name[0]); \
1609 ADD_VALUE (collate->undefined.ordering[idx++]); \
1618 /* Fill the table now. First we look for all the characters which
1619 fit into one single byte. This speeds up the 8-bit string
1622 while (iterate_table (&collate
->result
, &last
, (const void **) &name
,
1623 &len
, (void **) &pelem
) >= 0)
1624 if (pelem
->name
[0] <= 0xff)
1626 /* We have a single byte name. Now we must distinguish
1627 between entries in simple form (i.e., only one value per
1628 weight and no collation element starting with the same
1629 character) and those which are not. */
1630 size_t slot
= ((size_t) pelem
->name
[0]);
1631 const size_t level
= 0;
1633 table
[slot
* entry_size
] = pelem
->name
[0];
1635 if (pelem
->name
[1] == L
'\0' && pelem
->next
== NULL
1636 && pelem
->ordering_len
== collate
->nrules
)
1638 /* Yes, we have a simple one. Lucky us. */
1641 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
1642 table
[slot
* entry_size
+ 1 + cnt
]
1643 = pelem
->ordering
[collate
->nrules
+ cnt
];
1646 ADD_FORWARD (pelem
);
1649 /* Now check for missing single byte entries. If one exist we fill
1650 with the UNDEFINED entry. */
1651 for (cnt
= 0; cnt
< 256; ++cnt
)
1652 /* The first weight is never 0 for existing entries. */
1653 if (table
[cnt
* entry_size
+ 1] == 0)
1655 /* We have to fill in the information from the UNDEFINED
1657 table
[cnt
* entry_size
] = (uint32_t) cnt
;
1659 if (collate
->undefined
.ordering_len
== collate
->nrules
)
1663 for (inner
= 0; inner
< collate
->nrules
; ++inner
)
1664 if ((uint32_t)collate
->undefined
.ordering
[collate
->nrules
1667 table
[cnt
* entry_size
+ 1 + inner
] = cnt
;
1669 table
[cnt
* entry_size
+ 1 + inner
]
1670 = collate
->undefined
.ordering
[collate
->nrules
+ inner
];
1674 if (undefined_offset
!= UINT_MAX
)
1676 table
[cnt
* entry_size
+ 1] = FORWARD_CHAR
;
1677 table
[cnt
* entry_size
+ 2] = undefined_offset
;
1681 const size_t slot
= cnt
;
1682 const size_t level
= 0;
1684 ADD_FORWARD (&collate
->undefined
);
1685 undefined_offset
= table
[cnt
* entry_size
+ 2];
1690 /* Now we are ready for inserting the whole rest. */
1692 while (iterate_table (&collate
->result
, &last
, (const void **) &name
,
1693 &len
, (void **) &pelem
) >= 0)
1694 if (pelem
->name
[0] > 0xff)
1696 /* Find the position. */
1697 size_t slot
= ((size_t) pelem
->name
[0]) % table_best
;
1700 while (table
[(level
* table_best
+ slot
) * entry_size
+ 1] != 0)
1702 assert (level
< level_best
);
1704 if (pelem
->name
[1] == L
'\0' && pelem
->next
== NULL
1705 && pelem
->ordering_len
== collate
->nrules
)
1707 /* Again a simple entry. */
1710 for (inner
= 0; inner
< collate
->nrules
; ++inner
)
1711 table
[(level
* table_best
+ slot
) * entry_size
+ 1 + inner
]
1712 = pelem
->ordering
[collate
->nrules
+ inner
];
1715 ADD_FORWARD (pelem
);
1718 /* Add the UNDEFINED entry. */
1720 /* Here we have to construct the non-simple table entry. */
1723 undefined_offset
= obstack_object_size (&non_simple
);
1725 idx
= collate
->nrules
;
1726 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
1730 ADD_VALUE (collate
->undefined
.ordering
[cnt
]);
1731 for (disp
= 0; disp
< collate
->undefined
.ordering
[cnt
]; ++disp
)
1732 ADD_VALUE (collate
->undefined
.ordering
[idx
++]);
1736 /* Finish the extra block. */
1737 extra_len
= obstack_object_size (&non_simple
);
1738 extra
= (uint32_t *) obstack_finish (&non_simple
);
1739 assert ((extra_len
% sizeof (uint32_t)) == 0);
1741 /* Now we have to build the two array for the other byte ordering. */
1742 table2
= (uint32_t *) alloca (table_best
* level_best
* entry_size
1743 * sizeof (table
[0]));
1744 extra2
= (uint32_t *) alloca (extra_len
);
1746 for (cnt
= 0; cnt
< table_best
* level_best
* entry_size
; ++cnt
)
1747 table2
[cnt
] = bswap_32 (table
[cnt
]);
1749 for (cnt
= 0; cnt
< extra_len
/ sizeof (uint32_t); ++cnt
)
1750 extra2
[cnt
] = bswap_32 (extra2
[cnt
]);
1752 /* We need a simple hashing table to get a collation-element->chars
1753 mapping. We again use internal hashing using a secondary hashing
1756 Each string has an associate hashing value V, computed by a
1757 fixed function. To locate the string we use open addressing with
1758 double hashing. The first index will be V % M, where M is the
1759 size of the hashing table. If no entry is found, iterating with
1760 a second, independent hashing function takes place. This second
1761 value will be 1 + V % (M - 2). The approximate number of probes
1764 for unsuccessful search: (1 - N / M) ^ -1
1765 for successful search: - (N / M) ^ -1 * ln (1 - N / M)
1767 where N is the number of keys.
1769 If we now choose M to be the next prime bigger than 4 / 3 * N,
1770 we get the values 4 and 1.85 resp. Because unsuccessful searches
1771 are unlikely this is a good value. Formulas: [Knuth, The Art of
1772 Computer Programming, Volume 3, Sorting and Searching, 1973,
1774 if (collate
->elements
.filled
== 0)
1776 /* We don't need any element table since there are no collating
1778 element_hash_tab_size
= 0;
1779 element_hash_tab
= NULL
;
1780 element_hash_tab_ob
= NULL
;
1781 element_string_pool_size
= 0;
1782 element_string_pool
= NULL
;
1783 element_value_size
= 0;
1784 element_value
= NULL
;
1785 element_value_ob
= NULL
;
1789 void *ptr
; /* Running pointer. */
1790 const char *key
; /* Key for current bucket. */
1791 size_t keylen
; /* Length of key data. */
1792 const element_t
*data
; /* Data, i.e., the character sequence. */
1794 element_hash_tab_size
= next_prime ((collate
->elements
.filled
* 4) / 3);
1795 if (element_hash_tab_size
< 7)
1796 /* We need a minimum to make the following code work. */
1797 element_hash_tab_size
= 7;
1799 element_hash_tab
= obstack_alloc (&non_simple
, (2 * element_hash_tab_size
1800 * sizeof (uint32_t)));
1801 memset (element_hash_tab
, '\377', (2 * element_hash_tab_size
1802 * sizeof (uint32_t)));
1805 while (iterate_table (&collate
->elements
, &ptr
, (const void **) &key
,
1806 &keylen
, (void **) &data
) == 0)
1808 size_t hash_val
= hash_string (key
, keylen
);
1809 size_t idx
= hash_val
% element_hash_tab_size
;
1811 if (element_hash_tab
[2 * idx
] != (~((uint32_t) 0)))
1813 /* We need the second hashing function. */
1814 size_t c
= 1 + (hash_val
% (element_hash_tab_size
- 2));
1817 if (idx
>= element_hash_tab_size
- c
)
1818 idx
-= element_hash_tab_size
- c
;
1821 while (element_hash_tab
[2 * idx
] != (~((uint32_t) 0)));
1824 element_hash_tab
[2 * idx
] = obstack_object_size (&non_simple
);
1825 element_hash_tab
[2 * idx
+ 1] = (obstack_object_size (&string_pool
)
1826 / sizeof (uint32_t));
1828 obstack_grow0 (&non_simple
, key
, keylen
);
1829 obstack_grow (&string_pool
, data
->name
,
1830 (wcslen (data
->name
) + 1) * sizeof (uint32_t));
1833 if (obstack_object_size (&non_simple
) % 4 != 0)
1834 obstack_blank (&non_simple
,
1835 4 - (obstack_object_size (&non_simple
) % 4));
1836 element_string_pool_size
= obstack_object_size (&non_simple
);
1837 element_string_pool
= obstack_finish (&non_simple
);
1839 element_value_size
= obstack_object_size (&string_pool
);
1840 element_value
= obstack_finish (&string_pool
);
1842 /* Create the tables for the other byte order. */
1843 element_hash_tab_ob
= obstack_alloc (&non_simple
,
1844 (2 * element_hash_tab_size
1845 * sizeof (uint32_t)));
1846 for (cnt
= 0; cnt
< 2 * element_hash_tab_size
; ++cnt
)
1847 element_hash_tab_ob
[cnt
] = bswap_U32 (element_hash_tab
[cnt
]);
1849 element_value_ob
= obstack_alloc (&string_pool
, element_value_size
);
1850 for (cnt
= 0; cnt
< element_value_size
/ 4; ++cnt
)
1851 element_value_ob
[cnt
] = bswap_32 (element_value
[cnt
]);
1854 /* Store collation elements as map to collation class. There are
1855 three kinds of symbols:
1857 - collation elements
1859 We need to make a table which lets the user to access the primary
1860 weight based on the symbol string. */
1861 symbols_hash_tab_size
= next_prime ((4 * (charset
->char_table
.filled
1862 + collate
->elements
.filled
1863 + collate
->symbols
.filled
)) / 3);
1864 symbols_hash_tab
= obstack_alloc (&non_simple
, (2 * symbols_hash_tab_size
1865 * sizeof (uint32_t)));
1866 memset (symbols_hash_tab
, '\377', (2 * symbols_hash_tab_size
1867 * sizeof (uint32_t)));
1869 /* Now fill the array. First the symbols from the character set,
1870 then the collation elements and last the collation symbols. */
1871 hash_tab
= &charset
->char_table
;
1874 void *ptr
; /* Running pointer. */
1875 const char *key
; /* Key for current bucket. */
1876 size_t keylen
; /* Length of key data. */
1877 void *data
; /* Data. */
1880 while (iterate_table (hash_tab
, &ptr
, (const void **) &key
,
1881 &keylen
, (void **) &data
) == 0)
1886 unsigned int *weights
;
1888 if (hash_tab
== &charset
->char_table
1889 || hash_tab
== &collate
->elements
)
1891 element_t
*lastp
, *firstp
;
1892 uint32_t dummy_name
[2];
1893 const uint32_t *name
;
1896 if (hash_tab
== &charset
->char_table
)
1898 dummy_name
[0] = (uint32_t) ((unsigned long int) data
);
1899 dummy_name
[1] = L
'\0';
1901 name_len
= sizeof (uint32_t);
1905 element_t
*elemp
= (element_t
*) data
;
1907 name_len
= wcslen (name
) * sizeof (uint32_t);
1910 /* First check whether this character is used at all. */
1911 if (find_entry (&collate
->result
, name
, name_len
,
1912 (void *) &firstp
) < 0)
1913 /* The symbol is not directly mentioned in the collation.
1914 I.e., we use the value for UNDEFINED. */
1915 lastp
= &collate
->undefined
;
1918 /* The entry for the simple character is always found at
1921 while (lastp
->next
!= NULL
&& wcscmp (name
, lastp
->name
))
1922 lastp
= lastp
->next
;
1925 weights
= lastp
->ordering
;
1929 dummy_weights
[0] = 1;
1930 dummy_weights
[collate
->nrules
]
1931 = (unsigned int) ((unsigned long int) data
);
1933 weights
= dummy_weights
;
1936 /* In LASTP->ordering we now have the collation class.
1937 Determine the place in the hashing table next. */
1938 hash_val
= hash_string (key
, keylen
);
1939 idx
= hash_val
% symbols_hash_tab_size
;
1941 if (symbols_hash_tab
[2 * idx
] != (~((uint32_t) 0)))
1943 /* We need the second hashing function. */
1944 size_t c
= 1 + (hash_val
% (symbols_hash_tab_size
- 2));
1947 if (idx
>= symbols_hash_tab_size
- c
)
1948 idx
-= symbols_hash_tab_size
- c
;
1951 while (symbols_hash_tab
[2 * idx
] != (~((uint32_t) 0)));
1954 symbols_hash_tab
[2 * idx
] = obstack_object_size (&string_pool
);
1955 symbols_hash_tab
[2 * idx
+ 1] = (obstack_object_size (&non_simple
)
1956 / sizeof (uint32_t));
1958 obstack_grow0 (&string_pool
, key
, keylen
);
1959 /* Adding the first weight looks complicated. We have to deal
1960 with the kind it is stored and with the fact that original
1961 form uses `unsigned int's while we need `uint32_t' here. */
1963 obstack_grow (&non_simple
, &word
, sizeof (uint32_t));
1964 for (cnt
= 0; cnt
< weights
[0]; ++cnt
)
1966 word
= weights
[collate
->nrules
+ cnt
];
1967 obstack_grow (&non_simple
, &word
, sizeof (uint32_t));
1971 if (hash_tab
== &charset
->char_table
)
1972 hash_tab
= &collate
->elements
;
1973 else if (hash_tab
== &collate
->elements
)
1974 hash_tab
= &collate
->symbols
;
1979 /* Now we have the complete tables. */
1980 if (obstack_object_size (&string_pool
) % 4 != 0)
1981 obstack_blank (&non_simple
, 4 - (obstack_object_size (&string_pool
) % 4));
1982 symbols_string_pool_size
= obstack_object_size (&string_pool
);
1983 symbols_string_pool
= obstack_finish (&string_pool
);
1985 symbols_class_size
= obstack_object_size (&non_simple
);
1986 symbols_class
= obstack_finish (&non_simple
);
1988 /* Generate tables with other byte order. */
1989 symbols_hash_tab_ob
= obstack_alloc (&non_simple
, (2 * symbols_hash_tab_size
1990 * sizeof (uint32_t)));
1991 for (cnt
= 0; cnt
< 2 * symbols_hash_tab_size
; ++cnt
)
1992 symbols_hash_tab_ob
[cnt
] = bswap_32 (symbols_hash_tab
[cnt
]);
1994 symbols_class_ob
= obstack_alloc (&non_simple
, symbols_class_size
);
1995 for (cnt
= 0; cnt
< symbols_class_size
/ 4; ++cnt
)
1996 symbols_class_ob
[cnt
] = bswap_32 (symbols_class
[cnt
]);
1999 /* Store table addresses and lengths. */
2000 #if __BYTE_ORDER == __BIG_ENDIAN
2001 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB
)].iov_base
= table
;
2002 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB
)].iov_len
2003 = table_best
* level_best
* entry_size
* sizeof (table
[0]);
2005 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL
)].iov_base
= table2
;
2006 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL
)].iov_len
2007 = table_best
* level_best
* entry_size
* sizeof (table
[0]);
2009 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB
)].iov_base
= extra
;
2010 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB
)].iov_len
= extra_len
;
2012 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL
)].iov_base
= extra2
;
2013 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL
)].iov_len
= extra_len
;
2015 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB
)].iov_base
= table2
;
2016 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB
)].iov_len
2017 = table_best
* level_best
* entry_size
* sizeof (table
[0]);
2019 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL
)].iov_base
= table
;
2020 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL
)].iov_len
2021 = table_best
* level_best
* entry_size
* sizeof (table
[0]);
2023 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB
)].iov_base
= extra2
;
2024 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB
)].iov_len
= extra_len
;
2026 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL
)].iov_base
= extra
;
2027 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL
)].iov_len
= extra_len
;
2030 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_UNDEFINED
)].iov_base
= &undefined_offset
;
2031 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_UNDEFINED
)].iov_len
= sizeof (uint32_t);
2034 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_SIZE
)].iov_base
2035 = &element_hash_tab_size
;
2036 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_SIZE
)].iov_len
2037 = sizeof (uint32_t);
2039 #if __BYTE_ORDER == __BIG_ENDIAN
2040 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB
)].iov_base
2042 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB
)].iov_len
2043 = 2 * element_hash_tab_size
* sizeof (uint32_t);
2045 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL
)].iov_base
2046 = element_hash_tab_ob
;
2047 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL
)].iov_len
2048 = 2 * element_hash_tab_size
* sizeof (uint32_t);
2050 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL
)].iov_base
2052 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL
)].iov_len
2053 = 2 * element_hash_tab_size
* sizeof (uint32_t);
2055 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB
)].iov_base
2056 = element_hash_tab_ob
;
2057 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB
)].iov_len
2058 = 2 * element_hash_tab_size
* sizeof (uint32_t);
2061 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_STR_POOL
)].iov_base
2062 = element_string_pool
;
2063 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_STR_POOL
)].iov_len
2064 = element_string_pool_size
;
2066 #if __BYTE_ORDER == __BIG_ENDIAN
2067 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EB
)].iov_base
2069 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EB
)].iov_len
2070 = element_value_size
;
2072 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EL
)].iov_base
2074 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EL
)].iov_len
2075 = element_value_size
;
2077 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EL
)].iov_base
2079 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EL
)].iov_len
2080 = element_value_size
;
2082 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EB
)].iov_base
2084 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EB
)].iov_len
2085 = element_value_size
;
2088 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZE
)].iov_base
2089 = &symbols_hash_tab_size
;
2090 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZE
)].iov_len
2091 = sizeof (uint32_t);
2093 #if __BYTE_ORDER == __BIG_ENDIAN
2094 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB
)].iov_base
2096 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB
)].iov_len
2097 = 2 * symbols_hash_tab_size
* sizeof (uint32_t);
2099 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL
)].iov_base
2100 = symbols_hash_tab_ob
;
2101 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL
)].iov_len
2102 = 2 * symbols_hash_tab_size
* sizeof (uint32_t);
2104 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL
)].iov_base
2106 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL
)].iov_len
2107 = 2 * symbols_hash_tab_size
* sizeof (uint32_t);
2109 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB
)].iov_base
2110 = symbols_hash_tab_ob
;
2111 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB
)].iov_len
2112 = 2 * symbols_hash_tab_size
* sizeof (uint32_t);
2115 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_STR_POOL
)].iov_base
2116 = symbols_string_pool
;
2117 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_STR_POOL
)].iov_len
2118 = symbols_string_pool_size
;
2120 #if __BYTE_ORDER == __BIG_ENDIAN
2121 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EB
)].iov_base
2123 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EB
)].iov_len
2124 = symbols_class_size
;
2126 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EL
)].iov_base
2128 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EL
)].iov_len
2129 = symbols_class_size
;
2131 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EL
)].iov_base
2133 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EL
)].iov_len
2134 = symbols_class_size
;
2136 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EB
)].iov_base
2138 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EB
)].iov_len
2139 = symbols_class_size
;
2142 /* Update idx array. */
2143 idx
[0] = iov
[0].iov_len
+ iov
[1].iov_len
;
2144 for (cnt
= 1; cnt
< nelems
; ++cnt
)
2145 idx
[cnt
] = idx
[cnt
- 1] + iov
[1 + cnt
].iov_len
;
2147 write_locale_data (output_path
, "LC_COLLATE", 2 + nelems
, iov
);
2149 obstack_free (&non_simple
, NULL
);
2150 obstack_free (&string_pool
, NULL
);
2155 collate_element_to (struct linereader
*ldfile
,
2156 struct locale_collate_t
*collate
,
2157 struct token
*code
, struct charmap_t
*charmap
,
2158 struct repertoire_t
*repertoire
)
2160 struct charseq
*seq
;
2164 seq
= charmap_find_value (charmap
, code
->val
.str
.start
, code
->val
.str
.len
);
2167 lr_error (ldfile
, _("symbol for multicharacter collating element "
2168 "`%.*s' duplicates symbolic name in charmap"),
2169 (int) code
->val
.str
.len
, code
->val
.str
.start
);
2173 value
= repertoire_find_value (repertoire
, code
->val
.str
.start
,
2175 if (value
!= ILLEGAL_CHAR_VALUE
)
2177 lr_error (ldfile
, _("symbol for multicharacter collating element "
2178 "`%.*s' duplicates symbolic name in repertoire"),
2179 (int) code
->val
.str
.len
, code
->val
.str
.start
);
2183 if (find_entry (&collate
->elements
, code
->val
.str
.start
, code
->val
.str
.len
,
2186 lr_error (ldfile
, _("symbol for multicharacter collating element "
2187 "`%.*s' duplicates other element definition"),
2188 (int) code
->val
.str
.len
, code
->val
.str
.start
);
2192 if (find_entry (&collate
->elements
, code
->val
.str
.start
, code
->val
.str
.len
,
2195 lr_error (ldfile
, _("symbol for multicharacter collating element "
2196 "`%.*s' duplicates symbol definition"),
2197 (int) code
->val
.str
.len
, code
->val
.str
.start
);
2206 collate_element_from (struct linereader
*ldfile
,
2207 struct locale_collate_t
*collate
,
2208 const char *to_str
, struct token
*code
,
2209 struct charmap_t
*charmap
,
2210 struct repertoire_t
*repertoire
)
2212 element_t
*elemp
, *runp
;
2214 /* CODE is a string. */
2215 elemp
= (element_t
*) obstack_alloc (&collate
->element_mem
,
2216 sizeof (element_t
));
2218 /* We have to translate the string. It may contain <...> character
2220 elemp
->namemb
= code
->val
.str
.startmb
;
2221 elemp
->namewc
= code
->val
.str
.startwc
;
2222 elemp
->this_weight
= 0;
2223 elemp
->ordering
= NULL
;
2224 elemp
->ordering_len
= 0;
2226 if (elemp
->namemb
== NULL
&& elemp
->namewc
== NULL
)
2228 /* The string contains characters which are not in the charmap nor
2229 in the repertoire. Ignore the string. */
2231 lr_error (ldfile
, _("\
2232 `from' string in collation element declaration contains unknown character"));
2236 /* The entries in the linked lists of RESULT are sorting in
2237 descending order. The order is important for the `strcoll' and
2238 `wcscoll' functions. */
2239 if (find_entry (&collate
->resultwc
, elemp
->namewc
, sizeof (uint32_t),
2240 (void *) &runp
) >= 0)
2242 /* We already have an entry with this key. Check whether it is
2244 element_t
*prevp
= NULL
;
2249 cmpres
= wcscmp (elemp
->namewc
, runp
->namewc
);
2254 while ((runp
= runp
->next
) != NULL
);
2257 lr_error (ldfile
, _("\
2258 duplicate collating element definition (repertoire)"));
2264 if (set_entry (&collate
->resultwc
, elemp
->namewc
,
2265 sizeof (uint32_t), elemp
) < 0)
2266 error (EXIT_FAILURE
, 0, _("\
2267 error while inserting collation element into hash table"));
2270 prevp
->next
= elemp
;
2276 if (insert_entry (&collate
->resultwc
, elemp
->namewc
, sizeof (uint32_t),
2278 error (EXIT_FAILURE
, errno
, _("error while inserting to hash table"));
2281 /* Now also insert the element definition in the multibyte table. */
2282 if (find_entry (&collate
->resultmb
, elemp
->namemb
, 1, (void *) &runp
) >= 0)
2284 /* We already have an entry with this key. Check whether it is
2286 element_t
*prevp
= NULL
;
2291 cmpres
= strcmp (elemp
->namemb
, runp
->namemb
);
2296 while ((runp
= runp
->next
) != NULL
);
2299 lr_error (ldfile
, _("\
2300 duplicate collating element definition (charmap)"));
2306 if (set_entry (&collate
->resultmb
, elemp
->namemb
, 1, elemp
) < 0)
2307 error (EXIT_FAILURE
, 0, _("\
2308 error while inserting collation element into hash table"));
2311 prevp
->next
= elemp
;
2317 if (insert_entry (&collate
->resultmb
, elemp
->namemb
, 1, elemp
) < 0)
2318 error (EXIT_FAILURE
, errno
, _("error while inserting to hash table"));
2321 /* Finally install the mapping from the `to'-name to the `from'-name. */
2322 if (insert_entry (&collate
->elements
, to_str
, strlen (to_str
),
2323 (void *) elemp
) < 0)
2324 lr_error (ldfile
, _("cannot insert new collating symbol definition: %s"),
2330 collate_symbol (struct linereader
*ldfile
, struct locale_collate_t
*collate
,
2331 struct token
*code
, struct charmap_t
*charmap
,
2332 struct repertoire_t
*repertoire
)
2335 struct charseq
*seq
;
2338 seq
= charset_find_value (charmap
, code
->val
.str
.start
, code
->val
.str
.len
);
2341 lr_error (ldfile
, _("symbol for multicharacter collating element "
2342 "`%.*s' duplicates symbolic name in charmap"),
2343 (int) code
->val
.str
.len
, code
->val
.str
.start
);
2347 value
= repertoire (repertoire
, code
->val
.str
.start
, code
->val
.str
.len
);
2348 if (value
!= ILLEGAL_CHAR_VALUE
)
2350 lr_error (ldfile
, _("symbol for multicharacter collating element "
2351 "`%.*s' duplicates symbolic name in repertoire"),
2352 (int) code
->val
.str
.len
, code
->val
.str
.start
);
2356 if (find_entry (&collate
->elements
, code
->val
.str
.start
, code
->val
.str
.len
,
2359 lr_error (ldfile
, _("symbol for multicharacter collating element "
2360 "`%.*s' duplicates element definition"),
2361 (int) code
->val
.str
.len
, code
->val
.str
.start
);
2365 if (find_entry (&collate
->symbols
, code
->val
.str
.start
, code
->val
.str
.len
,
2368 lr_error (ldfile
, _("symbol for multicharacter collating element "
2369 "`%.*s' duplicates other symbol definition"),
2370 (int) code
->val
.str
.len
, code
->val
.str
.start
);
2374 if (insert_entry (&collate
->symbols
, code
->val
.str
.start
, code
->val
.str
.len
,
2376 lr_error (ldfile
, _("cannot insert new collating symbol definition: %s"),
2382 collate_new_order (struct linereader
*ldfile
, struct localedef_t
*locale
,
2383 enum coll_sort_rule sort_rule
)
2385 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
2387 if (collate
->nrules
>= collate
->nrules_max
)
2389 collate
->nrules_max
*= 2;
2391 = (enum coll_sort_rule
*) xrealloc (collate
->rules
,
2393 * sizeof (enum coll_sort_rule
));
2396 collate
->rules
[collate
->nrules
++] = sort_rule
;
2401 collate_build_arrays (struct linereader
*ldfile
, struct localedef_t
*locale
)
2403 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
2406 = (enum coll_sort_rule
*) xrealloc (collate
->rules
,
2408 * sizeof (enum coll_sort_rule
));
2410 /* Allocate arrays for temporary weights. */
2411 collate
->weight_cnt
= (int *) xmalloc (collate
->nrules
* sizeof (int));
2413 /* Choose arbitrary start value for table size. */
2414 collate
->nweight_max
= 5 * collate
->nrules
;
2415 collate
->weight
= (int *) xmalloc (collate
->nweight_max
* sizeof (int));
2420 collate_order_elem (struct linereader
*ldfile
, struct localedef_t
*locale
,
2421 struct token
*code
, struct charset_t
*charset
)
2423 const uint32_t zero
= L
'\0';
2424 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
2433 /* We have a string to find in one of the three hashing tables. */
2434 value
= charset_find_value (&charset
->char_table
, code
->val
.str
.start
,
2436 if (value
!= ILLEGAL_CHAR_VALUE
)
2438 element_t
*lastp
, *firstp
;
2440 collate
->kind
= character
;
2442 if (find_entry (&collate
->result
, &value
, sizeof (uint32_t),
2443 (void *) &firstp
) < 0)
2444 firstp
= lastp
= NULL
;
2447 /* The entry for the simple character is always found at
2450 while (lastp
->next
!= NULL
)
2451 lastp
= lastp
->next
;
2453 if (lastp
->name
[0] == value
&& lastp
->name
[1] == L
'\0')
2456 _("duplicate definition for character `%.*s'"),
2457 (int) code
->val
.str
.len
, code
->val
.str
.start
);
2458 lr_ignore_rest (ldfile
, 0);
2464 collate
->current_element
2465 = (element_t
*) obstack_alloc (&collate
->element_mem
,
2466 sizeof (element_t
));
2468 obstack_grow (&collate
->element_mem
, &value
, sizeof (value
));
2469 obstack_grow (&collate
->element_mem
, &zero
, sizeof (zero
));
2471 collate
->current_element
->name
=
2472 (const uint32_t *) obstack_finish (&collate
->element_mem
);
2474 collate
->current_element
->this_weight
= ++collate
->order_cnt
;
2476 collate
->current_element
->next
= NULL
;
2480 if (insert_entry (&collate
->result
, &value
, sizeof (uint32_t),
2481 (void *) collate
->current_element
) < 0)
2483 lr_error (ldfile
, _("cannot insert collation element `%.*s'"),
2484 (int) code
->val
.str
.len
, code
->val
.str
.start
);
2489 lastp
->next
= collate
->current_element
;
2491 else if (find_entry (&collate
->elements
, code
->val
.str
.start
,
2492 code
->val
.str
.len
, &tmp
) >= 0)
2494 collate
->current_element
= (element_t
*) tmp
;
2496 if (collate
->current_element
->this_weight
!= 0)
2498 lr_error (ldfile
, _("\
2499 collation element `%.*s' appears more than once: ignore line"),
2500 (int) code
->val
.str
.len
, code
->val
.str
.start
);
2501 lr_ignore_rest (ldfile
, 0);
2506 collate
->kind
= element
;
2507 collate
->current_element
->this_weight
= ++collate
->order_cnt
;
2509 else if (find_entry (&collate
->symbols
, code
->val
.str
.start
,
2510 code
->val
.str
.len
, &tmp
) >= 0)
2512 unsigned int order
= ++collate
->order_cnt
;
2514 if ((unsigned long int) tmp
!= 0ul)
2516 lr_error (ldfile
, _("\
2517 collation symbol `%.*s' appears more than once: ignore line"),
2518 (int) code
->val
.str
.len
, code
->val
.str
.start
);
2519 lr_ignore_rest (ldfile
, 0);
2524 collate
->kind
= symbol
;
2526 if (set_entry (&collate
->symbols
, code
->val
.str
.start
,
2527 code
->val
.str
.len
, (void *) order
) < 0)
2529 lr_error (ldfile
, _("cannot process order specification"));
2536 lr_error (ldfile
, _("unknown symbol `%.*s': line ignored"),
2537 (int) code
->val
.str
.len
, code
->val
.str
.start
);
2538 lr_ignore_rest (ldfile
, 0);
2545 collate
->kind
= undefined
;
2546 collate
->current_element
= &collate
->undefined
;
2550 if (collate
->was_ellipsis
)
2552 lr_error (ldfile
, _("\
2553 two lines in a row containing `...' are not allowed"));
2556 else if (collate
->kind
!= character
)
2558 /* An ellipsis requires the previous line to be an
2559 character definition. */
2560 lr_error (ldfile
, _("\
2561 line before ellipsis does not contain definition for character constant"));
2562 lr_ignore_rest (ldfile
, 0);
2566 collate
->kind
= ellipsis
;
2570 assert (! "illegal token in `collate_order_elem'");
2573 /* Now it's time to handle the ellipsis in the previous line. We do
2574 this only when the last line contained an definition for a
2575 character, the current line also defines an character, the
2576 character code for the later is bigger than the former. */
2577 if (collate
->was_ellipsis
)
2579 if (collate
->kind
!= character
)
2581 lr_error (ldfile
, _("\
2582 line after ellipsis must contain character definition"));
2583 lr_ignore_rest (ldfile
, 0);
2586 else if (collate
->last_char
> value
)
2588 lr_error (ldfile
, _("end point of ellipsis range is bigger then start"));
2589 lr_ignore_rest (ldfile
, 0);
2594 /* We can fill the arrays with the information we need. */
2600 name
[0] = collate
->last_char
+ 1;
2603 data
= (unsigned int *) alloca ((collate
->nrules
+ collate
->nweight
)
2604 * sizeof (unsigned int));
2605 ptr
= (size_t *) alloca (collate
->nrules
* sizeof (size_t));
2607 /* Prepare data. Because the characters covered by an
2608 ellipsis all have equal values we prepare the data once
2609 and only change the variable number (if there are any).
2610 PTR[...] will point to the entries which will have to be
2611 fixed during the output loop. */
2612 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
2614 data
[cnt
] = collate
->weight_cnt
[cnt
];
2615 ptr
[cnt
] = (cnt
== 0
2617 : ptr
[cnt
- 1] + collate
->weight_cnt
[cnt
- 1]);
2620 for (cnt
= 0; cnt
< collate
->nweight
; ++cnt
)
2621 data
[collate
->nrules
+ cnt
] = collate
->weight
[cnt
];
2623 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
2624 if ((uint32_t) data
[ptr
[cnt
]] != ELLIPSIS_CHAR
)
2627 while (name
[0] <= value
)
2631 pelem
= (element_t
*) obstack_alloc (&collate
->element_mem
,
2632 sizeof (element_t
));
2634 = (const uint32_t *) obstack_copy (&collate
->element_mem
,
2635 name
, 2 * sizeof (uint32_t));
2636 pelem
->this_weight
= ++collate
->order_cnt
;
2638 pelem
->ordering_len
= collate
->nweight
;
2640 = (unsigned int *) obstack_copy (&collate
->element_mem
, data
,
2642 + pelem
->ordering_len
)
2643 * sizeof (unsigned int));
2645 /* `...' weights need to be adjusted. */
2646 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
2648 pelem
->ordering
[ptr
[cnt
]] = pelem
->this_weight
;
2650 /* Insert new entry into result table. */
2651 if (find_entry (&collate
->result
, name
, sizeof (uint32_t),
2652 (void *) &pelem
->next
) >= 0)
2654 if (set_entry (&collate
->result
, name
, sizeof (uint32_t),
2655 (void *) pelem
) < 0)
2656 error (4, 0, _("cannot insert into result table"));
2661 if (insert_entry (&collate
->result
, name
, sizeof (uint32_t),
2662 (void *) pelem
) < 0)
2663 error (4, 0, _("cannot insert into result table"));
2666 /* Increment counter. */
2672 /* Reset counters for weights. */
2673 collate
->weight_idx
= 0;
2674 collate
->nweight
= 0;
2675 for (i
= 0; i
< collate
->nrules
; ++i
)
2676 collate
->weight_cnt
[i
] = 0;
2677 collate
->current_patch
= NULL
;
2684 collate_weight_bsymbol (struct linereader
*ldfile
, struct localedef_t
*locale
,
2685 struct token
*code
, struct charset_t
*charset
)
2687 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
2688 unsigned int here_weight
;
2692 assert (code
->tok
== tok_bsymbol
);
2694 value
= charset_find_value (&charset
->char_table
, code
->val
.str
.start
,
2696 if (value
!= ILLEGAL_CHAR_VALUE
)
2700 if (find_entry (&collate
->result
, &value
, sizeof (uint32_t),
2705 && (runp
->name
[0] != value
|| runp
->name
[1] != L
'\0'))
2708 here_weight
= runp
== NULL
? 0 : runp
->this_weight
;
2710 else if (find_entry (&collate
->elements
, code
->val
.str
.start
,
2711 code
->val
.str
.len
, &tmp
) >= 0)
2713 element_t
*runp
= (element_t
*) tmp
;
2715 here_weight
= runp
->this_weight
;
2717 else if (find_entry (&collate
->symbols
, code
->val
.str
.start
,
2718 code
->val
.str
.len
, &tmp
) >= 0)
2720 here_weight
= (unsigned int) tmp
;
2725 lr_error (ldfile
, _("unknown symbol `%.*s': line ignored"),
2726 (int) code
->val
.str
.len
, code
->val
.str
.start
);
2727 lr_ignore_rest (ldfile
, 0);
2731 /* When we currently work on a collation symbol we do not expect any
2733 if (collate
->kind
== symbol
)
2735 lr_error (ldfile
, _("\
2736 specification of sorting weight for collation symbol does not make sense"));
2737 lr_ignore_rest (ldfile
, 0);
2741 /* Add to the current collection of weights. */
2742 if (collate
->nweight
>= collate
->nweight_max
)
2744 collate
->nweight_max
*= 2;
2745 collate
->weight
= (unsigned int *) xrealloc (collate
->weight
,
2746 collate
->nweight_max
);
2749 /* If the weight is currently not known, we remember to patch the
2750 resulting tables. */
2751 if (here_weight
== 0)
2755 newp
= (patch_t
*) obstack_alloc (&collate
->element_mem
,
2757 newp
->fname
= ldfile
->fname
;
2758 newp
->lineno
= ldfile
->lineno
;
2759 newp
->token
= (const char *) obstack_copy0 (&collate
->element_mem
,
2760 code
->val
.str
.start
,
2762 newp
->where
.idx
= collate
->nweight
++;
2763 newp
->next
= collate
->current_patch
;
2764 collate
->current_patch
= newp
;
2767 collate
->weight
[collate
->nweight
++] = here_weight
;
2768 ++collate
->weight_cnt
[collate
->weight_idx
];
2775 collate_next_weight (struct linereader
*ldfile
, struct localedef_t
*locale
)
2777 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
2779 if (collate
->kind
== symbol
)
2781 lr_error (ldfile
, _("\
2782 specification of sorting weight for collation symbol does not make sense"));
2783 lr_ignore_rest (ldfile
, 0);
2787 ++collate
->weight_idx
;
2788 if (collate
->weight_idx
>= collate
->nrules
)
2790 lr_error (ldfile
, _("too many weights"));
2791 lr_ignore_rest (ldfile
, 0);
2800 collate_simple_weight (struct linereader
*ldfile
, struct localedef_t
*locale
,
2801 struct token
*code
, struct charset_t
*charset
)
2803 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
2804 unsigned int value
= 0;
2806 /* There current tokens can be `IGNORE', `...', or a string. */
2810 /* This token is allowed in all situations. */
2811 value
= IGNORE_CHAR
;
2815 /* The ellipsis is only allowed for the `...' or `UNDEFINED'
2817 if (collate
->kind
!= ellipsis
&& collate
->kind
!= undefined
)
2819 lr_error (ldfile
, _("\
2820 `...' must only be used in `...' and `UNDEFINED' entries"));
2821 lr_ignore_rest (ldfile
, 0);
2824 value
= ELLIPSIS_CHAR
;
2828 /* This can become difficult. We have to get the weights which
2829 correspond to the single wide chars in the string. But some
2830 of the `chars' might not be real characters, but collation
2831 elements or symbols. And so the string decoder might have
2832 signaled errors. The string at this point is not translated.
2833 I.e., all <...> sequences are still there. */
2835 char *runp
= code
->val
.str
.start
;
2838 while (*runp
!= '\0')
2840 char *startp
= (char *) runp
;
2841 char *putp
= (char *) runp
;
2844 /* Lookup weight for char and store it. */
2847 while (*++runp
!= '\0' && *runp
!= '>')
2849 if (*runp
== ldfile
->escape_char
)
2850 if (*++runp
== '\0')
2852 lr_error (ldfile
, _("unterminated weight name"));
2853 lr_ignore_rest (ldfile
, 0);
2863 lr_error (ldfile
, _("empty weight name: line ignored"));
2864 lr_ignore_rest (ldfile
, 0);
2868 wch
= charset_find_value (&charset
->char_table
, startp
,
2870 if (wch
!= ILLEGAL_CHAR_VALUE
)
2874 if (find_entry (&collate
->result
, &wch
, sizeof (uint32_t),
2875 (void *)&pelem
) < 0)
2878 while (pelem
!= NULL
2879 && (pelem
->name
[0] != wch
2880 || pelem
->name
[1] != L
'\0'))
2881 pelem
= pelem
->next
;
2883 value
= pelem
== NULL
? 0 : pelem
->this_weight
;
2885 else if (find_entry (&collate
->elements
, startp
, putp
- startp
,
2888 element_t
*pelem
= (element_t
*) tmp
;
2890 value
= pelem
->this_weight
;
2892 else if (find_entry (&collate
->symbols
, startp
, putp
- startp
,
2895 value
= (unsigned int) tmp
;
2900 lr_error (ldfile
, _("unknown symbol `%.*s': line ignored"),
2901 (int) (putp
- startp
), startp
);
2902 lr_ignore_rest (ldfile
, 0);
2911 if (*runp
== ldfile
->escape_char
)
2913 static const char digits
[] = "0123456789abcdef";
2918 if (tolower (*runp
) == 'x')
2923 else if (tolower (*runp
) == 'd')
2931 dp
= strchr (digits
, tolower (*runp
));
2932 if (dp
== NULL
|| (dp
- digits
) >= base
)
2935 lr_error (ldfile
, _("\
2936 illegal character constant in string"));
2937 lr_ignore_rest (ldfile
, 0);
2943 dp
= strchr (digits
, tolower (*runp
));
2944 if (dp
== NULL
|| (dp
- digits
) >= base
)
2952 dp
= strchr (digits
, tolower (*runp
));
2953 if (dp
!= NULL
&& (dp
- digits
< base
))
2962 wch
= (uint32_t) *runp
++;
2964 /* Lookup the weight for WCH. */
2965 if (find_entry (&collate
->result
, &wch
, sizeof (wch
),
2970 && (wp
->name
[0] != wch
|| wp
->name
[1] != L
'\0'))
2973 value
= wp
== NULL
? 0 : wp
->this_weight
;
2975 /* To get the correct name for the error message. */
2978 /**************************************************\
2979 |* I know here is something wrong. Characters in *|
2980 |* the string which are not in the <...> form *|
2981 |* cannot be declared forward for now!!! *|
2982 \**************************************************/
2985 /* Store in weight array. */
2986 if (collate
->nweight
>= collate
->nweight_max
)
2988 collate
->nweight_max
*= 2;
2990 = (unsigned int *) xrealloc (collate
->weight
,
2991 collate
->nweight_max
);
2998 newp
= (patch_t
*) obstack_alloc (&collate
->element_mem
,
3000 newp
->fname
= ldfile
->fname
;
3001 newp
->lineno
= ldfile
->lineno
;
3003 = (const char *) obstack_copy0 (&collate
->element_mem
,
3004 startp
, putp
- startp
);
3005 newp
->where
.idx
= collate
->nweight
++;
3006 newp
->next
= collate
->current_patch
;
3007 collate
->current_patch
= newp
;
3010 collate
->weight
[collate
->nweight
++] = value
;
3011 ++collate
->weight_cnt
[collate
->weight_idx
];
3017 assert (! "should not happen");
3021 if (collate
->nweight
>= collate
->nweight_max
)
3023 collate
->nweight_max
*= 2;
3024 collate
->weight
= (unsigned int *) xrealloc (collate
->weight
,
3025 collate
->nweight_max
);
3028 collate
->weight
[collate
->nweight
++] = value
;
3029 ++collate
->weight_cnt
[collate
->weight_idx
];
3036 collate_end_weight (struct linereader
*ldfile
, struct localedef_t
*locale
)
3038 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
3039 element_t
*pelem
= collate
->current_element
;
3041 if (collate
->kind
== symbol
)
3043 /* We don't have to do anything. */
3044 collate
->was_ellipsis
= 0;
3048 if (collate
->kind
== ellipsis
)
3050 /* Before the next line is processed the ellipsis is handled. */
3051 collate
->was_ellipsis
= 1;
3055 assert (collate
->kind
== character
|| collate
->kind
== element
3056 || collate
->kind
== undefined
);
3058 /* Fill in the missing weights. */
3059 while (++collate
->weight_idx
< collate
->nrules
)
3061 collate
->weight
[collate
->nweight
++] = pelem
->this_weight
;
3062 ++collate
->weight_cnt
[collate
->weight_idx
];
3065 /* Now we know how many ordering weights the current
3066 character/element has. Allocate room in the element structure
3067 and copy information. */
3068 pelem
->ordering_len
= collate
->nweight
;
3070 /* First we write an array with the number of values for each
3072 obstack_grow (&collate
->element_mem
, collate
->weight_cnt
,
3073 collate
->nrules
* sizeof (unsigned int));
3075 /* Now the weights itselves. */
3076 obstack_grow (&collate
->element_mem
, collate
->weight
,
3077 collate
->nweight
* sizeof (unsigned int));
3080 pelem
->ordering
= obstack_finish (&collate
->element_mem
);
3082 /* Now we handle the "patches". */
3083 while (collate
->current_patch
!= NULL
)
3085 patch_t
*this_patch
;
3087 this_patch
= collate
->current_patch
;
3089 this_patch
->where
.pos
= &pelem
->ordering
[collate
->nrules
3090 + this_patch
->where
.idx
];
3092 collate
->current_patch
= this_patch
->next
;
3093 this_patch
->next
= collate
->all_patches
;
3094 collate
->all_patches
= this_patch
;
3097 /* Set information for next round. */
3098 collate
->was_ellipsis
= 0;
3099 if (collate
->kind
!= undefined
)
3100 collate
->last_char
= pelem
->name
[0];
3104 /* The parser for the LC_CTYPE section of the locale definition. */
3106 read_lc_collate (struct linereader
*ldfile
, struct localedef_t
*result
,
3107 struct charmap_t
*charmap
, struct repertoire_t
*repertoire
,
3110 struct locale_collate_t
*collate
;
3112 const char *save_str
;
3114 /* The rest of the line containing `LC_COLLATE' must be free. */
3115 lr_ignore_rest (ldfile
, 1);
3117 now
= lr_token (ldfile
, charmap
, NULL
);
3120 /* If we see `copy' now we are almost done. */
3121 if (nowtok
== tok_copy
)
3123 handle_copy (ldfile
, charmap
, repertoire
, result
, tok_lc_collate
,
3124 LC_COLLATE
, "LC_COLLATE", ignore_content
);
3128 /* Prepare the data structures. */
3129 collate_startup (ldfile
, result
, charmap
, ignore_content
);
3130 collate
= result
->categories
[LC_COLLATE
].collate
;
3134 /* Of course we don't proceed beyond the end of file. */
3135 if (nowtok
== tok_eof
)
3138 /* Ignore empty lines. */
3139 if (nowtok
== tok_eol
)
3141 now
= lr_token (ldfile
, charmap
, NULL
);
3148 case tok_coll_weight_max
:
3151 /* The rest of the line must be a single integer value. */
3152 now
= lr_token (ldfile
, charmap
, NULL
);
3153 if (now
->tok
!= tok_number
)
3155 /* We simply forget about the value we just read, the implementation
3156 has no fixed limits. */
3157 lr_ignore_rest (ldfile
, 1);
3163 /* We expect the name of the script in brackets. */
3164 now
= lr_token (ldfile
, charmap
, NULL
);
3165 if (now
->tok
!= tok_bsymbol
&& now
->tok
!= tok_ucs4
)
3167 if (now
->tok
!= tok_bsymbol
)
3169 lr_error (ldfile
, _("\
3170 script name `%s' must not duplicate any known name"),
3171 tok
->val
.str
.startmb
);
3172 lr_ignore_rest (ldfile
, 0);
3175 collate
->scripts
= xmalloc (collate
->scripts
,
3177 * sizeof (const char *)));
3178 collate
->scripts
[collate
->nscripts
++] = tok
->val
.str
.startmb
;
3179 lr_ignore_rest (ldfile
, 1);
3182 case tok_collating_element
:
3185 /* Get the first argument, a symbol in brackets. */
3186 now
= lr_token (ldfile
, charmap
, NULL
);
3187 if (now
->tok
!= tok_bsymbol
)
3190 if (collate_element_to (ldfile
, collate
, now
, charmap
, repertoire
))
3192 /* An error occurred. */
3193 lr_ignore_rest (ldfile
, 0);
3196 save_str
= tok
->val
.str
.startmb
;
3197 /* Next comes `from'. */
3198 now
= lr_token (ldfile
, charmap
, NULL
);
3199 if (now
->tok
!= tok_from
)
3201 /* Now comes a string. */
3202 now
= lr_token (ldfile
, charmap
, repertoire
);
3203 if (now
->tok
!= tok_string
)
3205 collate_element_from (ldfile
, collate
, save_str
, now
, charmap
,
3207 /* The rest of the line should be empty. */
3208 lr_ignore_rest (ldfile
, 1);
3211 case tok_collating_symbol
:
3214 /* Get the argument, a single symbol in brackets. */
3215 now
= lr_token (ldfile
, charmap
, NULL
);
3216 if (now
->tok
!= tok_bsymbol
)
3218 collate_symbol (ldfile
, collate
, now
, charmap
, repertoire
);
3221 case tok_order_start
:
3225 /* We expect now a scripting symbol or start right away
3226 with the order keywords. Or we have no argument at all
3227 in which means `forward'. */
3228 now
= lr_token (ldfile
, charmap
, NULL
);
3229 if (now
->tok
== tok_eol
)
3231 static enum coll_sort_rule default_rule
= sort_forward
;
3232 /* Use a single `forward' rule. */
3233 collate
->nrules
= 1;
3234 collate
->rules
= &default_rule
;
3238 /* XXX We don't recognize the ISO 14651 extensions yet. */
3239 uint32_t nrules
= 0;
3240 uint32_t nrules_max
= 32;
3241 enum coll_sort_rule
*rules
= alloca (nrules_max
3243 int saw_semicolon
= 0;
3245 memset (rules
, '\0', nrules_max
* sizeof (*rules
));
3248 if (now
->tok
!= tok_forward
&& now
->tok
!= tok_backward
3249 && now
->tok
!= tok_position
)
3254 if (nrules
== nrules_max
)
3256 newp
= alloca (nrules_max
* 2 * sizeof (*rules
));
3257 rules
= memcpy (newp
, rules
,
3258 nrules_max
* sizeof (*rules
));
3259 memset (&rules
[nrules_max
], '\0',
3260 nrules_max
* sizeof (*rules
));
3269 if ((rules
[nrules
] & sort_backward
) != 0)
3271 lr_error (ldfile
, _("\
3272 `forward' and `backward' order exclude each other"));
3273 lr_ignore_rest (ldfile
, 0);
3276 rules
[nrules
] |= sort_forward
;
3279 if ((rules
[nrules
] & sort_forward
) != 0)
3281 lr_error (ldfile
, _("\
3282 `forward' and `backward' order exclude each other"));
3283 lr_ignore_rest (ldfile
, 0);
3286 rules
[nrules
] |= sort_backward
;
3289 rules
[nrules
] |= tok_position
;
3293 /* Get the next token. This is either the end of the line,
3294 a comma or a semicolon. */
3295 now
= lr_token (ldfile
, charmap
, NULL
);
3296 if (now
->tok
== tok_comma
|| now
->tok
== tok_semicolon
)
3298 saw_semicolon
= now
->tok
== tok_semicolon
;
3299 now
= lr_token (ldfile
, charmap
, NULL
);
3302 while (now
->tok
!= tok_eol
|| now
->tok
!= tok_eof
);
3305 collate
->nrules
= nrules
;
3306 collate
->rules
= memcpy (xmalloc (nrules
* sizeof (*rules
)),
3307 rules
, nrules
* sizeof (*rules
));
3310 /* Now read the rules. */
3311 read_rules (ldfile
, collate
, charmap
, repertoire
);
3314 case tok_reorder_after
:
3317 case tok_reorder_script_after
:
3322 if (now
->tok
!= tok_eof
)
3323 SYNTAX_ERROR (_("syntax error in %s locale definition"),
3327 /* Prepare for the next round. */
3328 now
= lr_token (ldfile
, charmap
, NULL
);
3332 /* When we come here we reached the end of the file. */
3333 lr_error (ldfile
, _("premature end of file while reading category `%s'"),