1 /* Copyright (C) 1995, 1996, 1997, 1998, 1999 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
33 #include "localeinfo.h"
35 #include "simple-hash.h"
36 #include "stringtrans.h"
37 #include "strlen-hash.h"
39 /* Uncomment the following line in the production version. */
40 /* #define NDEBUG 1 */
44 #define MAX(a, b) ((a) > (b) ? (a) : (b))
47 (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24))
50 /* What kind of symbols get defined? */
61 typedef struct patch_t
75 typedef struct element_t
78 unsigned int this_weight
;
80 struct element_t
*next
;
82 unsigned int *ordering
;
87 /* The real definition of the struct for the LC_COLLATE locale. */
88 struct locale_collate_t
90 /* Collate symbol table. Simple mapping to number. */
93 /* The collation elements. */
95 struct obstack element_mem
;
97 /* The result table. */
100 /* Sorting rules given in order_start line. */
102 u_int32_t nrules_max
;
103 enum coll_sort_rule
*rules
;
105 /* Used while recognizing symbol composed of multiple tokens
106 (collating-element). */
107 const char *combine_token
;
108 size_t combine_token_len
;
110 /* How many sorting order specifications so far. */
111 unsigned int order_cnt
;
113 /* Was lastline ellipsis? */
115 /* Value of last entry if was character. */
117 /* Current element. */
118 element_t
*current_element
;
119 /* What kind of symbol is current element. */
120 enum coll_symbol kind
;
122 /* While collecting the weights we need some temporary space. */
123 unsigned int current_order
;
125 unsigned int weight_idx
;
126 unsigned int *weight
;
131 patch_t
*current_patch
;
132 patch_t
*all_patches
;
134 /* Room for the UNDEFINED information. */
136 unsigned int undefined_len
;
140 /* Be verbose? Defined in localedef.c. */
144 void *xmalloc (size_t __n
);
145 void *xrealloc (void *__p
, size_t __n
);
148 #define obstack_chunk_alloc malloc
149 #define obstack_chunk_free free
153 collate_startup (struct linereader
*lr
, struct localedef_t
*locale
,
154 struct charset_t
*charset
)
156 struct locale_collate_t
*collate
;
158 /* We have a definition for LC_COLLATE. */
159 copy_posix
.mask
&= ~(1 << LC_COLLATE
);
161 /* It is important that we always use UCS4 encoding for strings now. */
162 encoding_method
= ENC_UCS4
;
164 /* Allocate the needed room. */
165 locale
->categories
[LC_COLLATE
].collate
= collate
=
166 (struct locale_collate_t
*) xmalloc (sizeof (struct locale_collate_t
));
168 /* Allocate hash table for collating elements. */
169 if (init_hash (&collate
->elements
, 512))
170 error (4, 0, _("memory exhausted"));
171 collate
->combine_token
= NULL
;
172 obstack_init (&collate
->element_mem
);
174 /* Allocate hash table for collating elements. */
175 if (init_hash (&collate
->symbols
, 64))
176 error (4, 0, _("memory exhausted"));
178 /* Allocate hash table for result. */
179 if (init_hash (&collate
->result
, 512))
180 error (4, 0, _("memory exhausted"));
183 collate
->nrules_max
= 10;
185 = (enum coll_sort_rule
*) xmalloc (collate
->nrules_max
186 * sizeof (enum coll_sort_rule
));
188 collate
->order_cnt
= 1; /* The smallest weight is 2. */
190 collate
->was_ellipsis
= 0;
191 collate
->last_char
= L
'\0'; /* 0 because leading ellipsis is allowed. */
193 collate
->all_patches
= NULL
;
195 /* This tells us no UNDEFINED entry was found until now. */
196 memset (&collate
->undefined
, '\0', sizeof (collate
->undefined
));
198 lr
->translate_strings
= 0;
203 collate_finish (struct localedef_t
*locale
, struct charset_t
*charset
)
205 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
209 /* Patch the constructed table so that forward references are
211 for (patch
= collate
->all_patches
; patch
!= NULL
; patch
= patch
->next
)
214 size_t toklen
= strlen (patch
->token
);
216 unsigned int value
= 0;
218 wch
= charset_find_value (&charset
->char_table
, patch
->token
, toklen
);
219 if (wch
!= ILLEGAL_CHAR_VALUE
)
223 if (find_entry (&collate
->result
, &wch
, sizeof (wchar_t),
226 for (; runp
!= NULL
; runp
= runp
->next
)
227 if (runp
->name
[0] == wch
&& runp
->name
[1] == L
'\0')
230 value
= runp
== NULL
? 0 : runp
->this_weight
;
232 else if (find_entry (&collate
->elements
, patch
->token
, toklen
, &ptmp
)
235 value
= ((element_t
*) ptmp
)->this_weight
;
237 else if (find_entry (&collate
->symbols
, patch
->token
, toklen
, &ptmp
)
240 value
= (unsigned long int) ptmp
;
248 error_at_line (0, 0, patch
->fname
, patch
->lineno
,
249 _("no weight defined for symbol `%s'"),
253 *patch
->where
.pos
= value
;
256 /* If no definition for UNDEFINED is given, all characters in the
257 given charset must be specified. */
258 if (collate
->undefined
.ordering
== NULL
)
260 /**************************************************************\
261 |* XXX We should test whether really an unspecified character *|
262 |* exists before giving the message. *|
263 \**************************************************************/
266 if (/* XXX Remove the 0 & */ 0 && !be_quiet
)
267 error (0, 0, _("no definition of `UNDEFINED'"));
269 collate
->undefined
.ordering_len
= collate
->nrules
;
270 weight
= ++collate
->order_cnt
;
272 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
275 obstack_grow (&collate
->element_mem
, &one
, sizeof (one
));
278 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
279 obstack_grow (&collate
->element_mem
, &weight
, sizeof (weight
));
281 collate
->undefined
.ordering
= obstack_finish (&collate
->element_mem
);
284 collate
->undefined_len
= 2; /* For the name: 1 x wchar_t + L'\0'. */
285 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
286 collate
->undefined_len
+= 1 + collate
->undefined
.ordering
[cnt
];
292 collate_output (struct localedef_t
*locale
, struct charset_t
*charset
,
293 const char *output_path
)
295 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
296 u_int32_t table_size
, table_best
, level_best
, sum_best
;
301 const size_t nelems
= _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE
);
302 struct iovec iov
[2 + nelems
];
303 struct locale_file data
;
304 u_int32_t idx
[nelems
];
305 struct obstack non_simple
;
306 struct obstack string_pool
;
307 size_t cnt
, entry_size
;
308 u_int32_t undefined_offset
= UINT_MAX
;
309 u_int32_t
*table
, *extra
, *table2
, *extra2
;
311 u_int32_t element_hash_tab_size
;
312 u_int32_t
*element_hash_tab
;
313 u_int32_t
*element_hash_tab_ob
;
314 u_int32_t element_string_pool_size
;
315 char *element_string_pool
;
316 u_int32_t element_value_size
;
317 wchar_t *element_value
;
318 wchar_t *element_value_ob
;
319 u_int32_t symbols_hash_tab_size
;
320 u_int32_t
*symbols_hash_tab
;
321 u_int32_t
*symbols_hash_tab_ob
;
322 u_int32_t symbols_string_pool_size
;
323 char *symbols_string_pool
;
324 u_int32_t symbols_class_size
;
325 u_int32_t
*symbols_class
;
326 u_int32_t
*symbols_class_ob
;
327 hash_table
*hash_tab
;
328 unsigned int dummy_weights
[collate
->nrules
+ 1];
334 /* Compute table size. */
337 Computing table size for collation information might take a while..."),
339 for (table_size
= 256; table_size
< sum_best
; ++table_size
)
341 size_t hits
[table_size
];
342 unsigned int worst
= 1;
347 for (cnt
= 0; cnt
< 256; ++cnt
)
349 memset (&hits
[256], '\0', sizeof (hits
) - 256 * sizeof (size_t));
351 while (iterate_table (&collate
->result
, &last
, (const void **) &name
,
352 &len
, (void **) &pelem
) >= 0)
353 if (pelem
->ordering
!= NULL
&& pelem
->name
[0] > 0xff)
354 if (++hits
[(unsigned int) pelem
->name
[0] % table_size
] > worst
)
356 worst
= hits
[(unsigned int) pelem
->name
[0] % table_size
];
357 if (table_size
* worst
> sum_best
)
361 if (table_size
* worst
< sum_best
)
363 sum_best
= table_size
* worst
;
364 table_best
= table_size
;
368 assert (table_best
!= 0xffff || level_best
!= 0xffff);
370 fputs (_(" done\n"), stderr
);
372 obstack_init (&non_simple
);
373 obstack_init (&string_pool
);
375 data
.magic
= LIMAGIC (LC_COLLATE
);
377 iov
[0].iov_base
= (void *) &data
;
378 iov
[0].iov_len
= sizeof (data
);
380 iov
[1].iov_base
= (void *) idx
;
381 iov
[1].iov_len
= sizeof (idx
);
383 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_NRULES
)].iov_base
= &collate
->nrules
;
384 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_NRULES
)].iov_len
= sizeof (u_int32_t
);
386 table
= (u_int32_t
*) alloca (collate
->nrules
* sizeof (u_int32_t
));
387 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_RULES
)].iov_base
= table
;
388 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_RULES
)].iov_len
389 = collate
->nrules
* sizeof (u_int32_t
);
390 /* Another trick here. Describing the collation method needs only a
391 few bits (3, to be exact). But the binary file should be
392 accessible by machines with both endianesses and so we store both
393 forms in the same word. */
394 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
395 table
[cnt
] = collate
->rules
[cnt
] | SWAPU32 (collate
->rules
[cnt
]);
397 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE
)].iov_base
= &table_best
;
398 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE
)].iov_len
= sizeof (u_int32_t
);
400 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS
)].iov_base
= &level_best
;
401 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS
)].iov_len
402 = sizeof (u_int32_t
);
404 entry_size
= 1 + MAX (collate
->nrules
, 2);
406 table
= (u_int32_t
*) alloca (table_best
* level_best
* entry_size
407 * sizeof (table
[0]));
408 memset (table
, '\0', table_best
* level_best
* entry_size
409 * sizeof (table
[0]));
412 /* Macros for inserting in output table. */
413 #define ADD_VALUE(expr) \
415 u_int32_t to_write = (u_int32_t) expr; \
416 obstack_grow (&non_simple, &to_write, sizeof (to_write)); \
419 #define ADD_ELEMENT(pelem, len) \
425 wlen = wcslen (pelem->name); \
426 obstack_grow (&non_simple, pelem->name, (wlen + 1) * sizeof (u_int32_t)); \
428 idx = collate->nrules; \
429 for (cnt = 0; cnt < collate->nrules; ++cnt) \
433 ADD_VALUE (pelem->ordering[cnt]); \
434 for (disp = 0; disp < pelem->ordering[cnt]; ++disp) \
435 ADD_VALUE (pelem->ordering[idx++]); \
439 #define ADD_FORWARD(pelem) \
441 /* We leave a reference in the main table and put all \
442 information in the table for the extended entries. */ \
444 element_t *has_simple = NULL; \
447 table[(level * table_best + slot) * entry_size + 1] \
449 table[(level * table_best + slot) * entry_size + 2] \
450 = obstack_object_size (&non_simple) / sizeof (u_int32_t); \
452 /* Here we have to construct the non-simple table entry. First \
453 compute the total length of this entry. */ \
454 for (runp = (pelem); runp != NULL; runp = runp->next) \
455 if (runp->ordering != NULL) \
460 value = 1 + wcslen (runp->name) + 1; \
462 for (cnt = 0; cnt < collate->nrules; ++cnt) \
463 /* We have to take care for entries without ordering \
464 information. While reading them they get inserted in the \
465 table and later not removed when something goes wrong with \
466 reading its weights. */ \
467 value += 1 + runp->ordering[cnt]; \
469 if (runp->name[1] == L'\0') \
472 ADD_ELEMENT (runp, value); \
475 if (has_simple == NULL) \
479 ADD_VALUE (collate->undefined_len + 1); \
481 /* Add the name. */ \
482 ADD_VALUE ((pelem)->name[0]); \
485 idx = collate->nrules; \
486 for (cnt = 0; cnt < collate->nrules; ++cnt) \
490 ADD_VALUE (collate->undefined.ordering[cnt]); \
491 for (disp = 0; disp < collate->undefined.ordering[cnt]; ++disp) \
493 if ((wchar_t) collate->undefined.ordering[idx] \
495 ADD_VALUE ((pelem)->name[0]); \
497 ADD_VALUE (collate->undefined.ordering[idx++]); \
506 /* Fill the table now. First we look for all the characters which
507 fit into one single byte. This speeds up the 8-bit string
510 while (iterate_table (&collate
->result
, &last
, (const void **) &name
,
511 &len
, (void **) &pelem
) >= 0)
512 if (pelem
->name
[0] <= 0xff)
514 /* We have a single byte name. Now we must distinguish
515 between entries in simple form (i.e., only one value per
516 weight and no collation element starting with the same
517 character) and those which are not. */
518 size_t slot
= ((size_t) pelem
->name
[0]);
519 const size_t level
= 0;
521 table
[slot
* entry_size
] = pelem
->name
[0];
523 if (pelem
->name
[1] == L
'\0' && pelem
->next
== NULL
524 && pelem
->ordering_len
== collate
->nrules
)
526 /* Yes, we have a simple one. Lucky us. */
529 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
530 table
[slot
* entry_size
+ 1 + cnt
]
531 = pelem
->ordering
[collate
->nrules
+ cnt
];
537 /* Now check for missing single byte entries. If one exist we fill
538 with the UNDEFINED entry. */
539 for (cnt
= 0; cnt
< 256; ++cnt
)
540 /* The first weight is never 0 for existing entries. */
541 if (table
[cnt
* entry_size
+ 1] == 0)
543 /* We have to fill in the information from the UNDEFINED
545 table
[cnt
* entry_size
] = (u_int32_t
) cnt
;
547 if (collate
->undefined
.ordering_len
== collate
->nrules
)
551 for (inner
= 0; inner
< collate
->nrules
; ++inner
)
552 if ((wchar_t)collate
->undefined
.ordering
[collate
->nrules
+ inner
]
554 table
[cnt
* entry_size
+ 1 + inner
] = cnt
;
556 table
[cnt
* entry_size
+ 1 + inner
]
557 = collate
->undefined
.ordering
[collate
->nrules
+ inner
];
561 if (undefined_offset
!= UINT_MAX
)
563 table
[cnt
* entry_size
+ 1] = FORWARD_CHAR
;
564 table
[cnt
* entry_size
+ 2] = undefined_offset
;
568 const size_t slot
= cnt
;
569 const size_t level
= 0;
571 ADD_FORWARD (&collate
->undefined
);
572 undefined_offset
= table
[cnt
* entry_size
+ 2];
577 /* Now we are ready for inserting the whole rest. */
579 while (iterate_table (&collate
->result
, &last
, (const void **) &name
,
580 &len
, (void **) &pelem
) >= 0)
581 if (pelem
->name
[0] > 0xff)
583 /* Find the position. */
584 size_t slot
= ((size_t) pelem
->name
[0]) % table_best
;
587 while (table
[(level
* table_best
+ slot
) * entry_size
+ 1] != 0)
589 assert (level
< level_best
);
591 if (pelem
->name
[1] == L
'\0' && pelem
->next
== NULL
592 && pelem
->ordering_len
== collate
->nrules
)
594 /* Again a simple entry. */
597 for (inner
= 0; inner
< collate
->nrules
; ++inner
)
598 table
[(level
* table_best
+ slot
) * entry_size
+ 1 + inner
]
599 = pelem
->ordering
[collate
->nrules
+ inner
];
605 /* Add the UNDEFINED entry. */
607 /* Here we have to construct the non-simple table entry. */
610 undefined_offset
= obstack_object_size (&non_simple
);
612 idx
= collate
->nrules
;
613 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
617 ADD_VALUE (collate
->undefined
.ordering
[cnt
]);
618 for (disp
= 0; disp
< collate
->undefined
.ordering
[cnt
]; ++disp
)
619 ADD_VALUE (collate
->undefined
.ordering
[idx
++]);
623 /* Finish the extra block. */
624 extra_len
= obstack_object_size (&non_simple
);
625 extra
= (u_int32_t
*) obstack_finish (&non_simple
);
626 assert ((extra_len
% sizeof (u_int32_t
)) == 0);
628 /* Now we have to build the two array for the other byte ordering. */
629 table2
= (u_int32_t
*) alloca (table_best
* level_best
* entry_size
630 * sizeof (table
[0]));
631 extra2
= (u_int32_t
*) alloca (extra_len
);
633 for (cnt
= 0; cnt
< table_best
* level_best
* entry_size
; ++cnt
)
634 table2
[cnt
] = SWAPU32 (table
[cnt
]);
636 for (cnt
= 0; cnt
< extra_len
/ sizeof (u_int32_t
); ++cnt
)
637 extra2
[cnt
] = SWAPU32 (extra2
[cnt
]);
639 /* We need a simple hashing table to get a collation-element->chars
640 mapping. We again use internal hashing using a secondary hashing
643 Each string has an associate hashing value V, computed by a
644 fixed function. To locate the string we use open addressing with
645 double hashing. The first index will be V % M, where M is the
646 size of the hashing table. If no entry is found, iterating with
647 a second, independent hashing function takes place. This second
648 value will be 1 + V % (M - 2). The approximate number of probes
651 for unsuccessful search: (1 - N / M) ^ -1
652 for successful search: - (N / M) ^ -1 * ln (1 - N / M)
654 where N is the number of keys.
656 If we now choose M to be the next prime bigger than 4 / 3 * N,
657 we get the values 4 and 1.85 resp. Because unsuccessful searches
658 are unlikely this is a good value. Formulas: [Knuth, The Art of
659 Computer Programming, Volume 3, Sorting and Searching, 1973,
661 if (collate
->elements
.filled
== 0)
663 /* We don't need any element table since there are no collating
665 element_hash_tab_size
= 0;
666 element_hash_tab
= NULL
;
667 element_hash_tab_ob
= NULL
;
668 element_string_pool_size
= 0;
669 element_string_pool
= NULL
;
670 element_value_size
= 0;
671 element_value
= NULL
;
672 element_value_ob
= NULL
;
676 void *ptr
; /* Running pointer. */
677 const char *key
; /* Key for current bucket. */
678 size_t keylen
; /* Length of key data. */
679 const element_t
*data
; /* Data, i.e., the character sequence. */
681 element_hash_tab_size
= next_prime ((collate
->elements
.filled
* 4) / 3);
682 if (element_hash_tab_size
< 7)
683 /* We need a minimum to make the following code work. */
684 element_hash_tab_size
= 7;
686 element_hash_tab
= obstack_alloc (&non_simple
, (2 * element_hash_tab_size
687 * sizeof (u_int32_t
)));
688 memset (element_hash_tab
, '\377', (2 * element_hash_tab_size
689 * sizeof (u_int32_t
)));
692 while (iterate_table (&collate
->elements
, &ptr
, (const void **) &key
,
693 &keylen
, (void **) &data
) == 0)
695 size_t hash_val
= hash_string (key
, keylen
);
696 size_t idx
= hash_val
% element_hash_tab_size
;
698 if (element_hash_tab
[2 * idx
] != (~((u_int32_t
) 0)))
700 /* We need the second hashing function. */
701 size_t c
= 1 + (hash_val
% (element_hash_tab_size
- 2));
704 if (idx
>= element_hash_tab_size
- c
)
705 idx
-= element_hash_tab_size
- c
;
708 while (element_hash_tab
[2 * idx
] != (~((u_int32_t
) 0)));
711 element_hash_tab
[2 * idx
] = obstack_object_size (&non_simple
);
712 element_hash_tab
[2 * idx
+ 1] = (obstack_object_size (&string_pool
)
715 obstack_grow0 (&non_simple
, key
, keylen
);
716 obstack_grow (&string_pool
, data
->name
,
717 (wcslen (data
->name
) + 1) * sizeof (wchar_t));
720 if (obstack_object_size (&non_simple
) % 4 != 0)
721 obstack_blank (&non_simple
,
722 4 - (obstack_object_size (&non_simple
) % 4));
723 element_string_pool_size
= obstack_object_size (&non_simple
);
724 element_string_pool
= obstack_finish (&non_simple
);
726 element_value_size
= obstack_object_size (&string_pool
);
727 element_value
= obstack_finish (&string_pool
);
729 /* Create the tables for the other byte order. */
730 element_hash_tab_ob
= obstack_alloc (&non_simple
,
731 (2 * element_hash_tab_size
732 * sizeof (u_int32_t
)));
733 for (cnt
= 0; cnt
< 2 * element_hash_tab_size
; ++cnt
)
734 element_hash_tab_ob
[cnt
] = SWAPU32 (element_hash_tab
[cnt
]);
736 element_value_ob
= obstack_alloc (&string_pool
, element_value_size
);
737 if (sizeof (wchar_t) != 4)
739 fputs ("sizeof (wchar_t) != 4 currently not handled", stderr
);
742 for (cnt
= 0; cnt
< element_value_size
/ 4; ++cnt
)
743 element_value_ob
[cnt
] = SWAPU32 (element_value
[cnt
]);
746 /* Store collation elements as map to collation class. There are
747 three kinds of symbols:
751 We need to make a table which lets the user to access the primary
752 weight based on the symbol string. */
753 symbols_hash_tab_size
= next_prime ((4 * (charset
->char_table
.filled
754 + collate
->elements
.filled
755 + collate
->symbols
.filled
)) / 3);
756 symbols_hash_tab
= obstack_alloc (&non_simple
, (2 * symbols_hash_tab_size
757 * sizeof (u_int32_t
)));
758 memset (symbols_hash_tab
, '\377', (2 * symbols_hash_tab_size
759 * sizeof (u_int32_t
)));
761 /* Now fill the array. First the symbols from the character set,
762 then the collation elements and last the collation symbols. */
763 hash_tab
= &charset
->char_table
;
766 void *ptr
; /* Running pointer. */
767 const char *key
; /* Key for current bucket. */
768 size_t keylen
; /* Length of key data. */
769 void *data
; /* Data. */
772 while (iterate_table (hash_tab
, &ptr
, (const void **) &key
,
773 &keylen
, (void **) &data
) == 0)
778 unsigned int *weights
;
780 if (hash_tab
== &charset
->char_table
781 || hash_tab
== &collate
->elements
)
783 element_t
*lastp
, *firstp
;
784 wchar_t dummy_name
[2];
788 if (hash_tab
== &charset
->char_table
)
790 dummy_name
[0] = (wchar_t) ((unsigned long int) data
);
791 dummy_name
[1] = L
'\0';
793 name_len
= sizeof (wchar_t);
797 element_t
*elemp
= (element_t
*) data
;
799 name_len
= wcslen (name
) * sizeof (wchar_t);
802 /* First check whether this character is used at all. */
803 if (find_entry (&collate
->result
, name
, name_len
,
804 (void *) &firstp
) < 0)
805 /* The symbol is not directly mentioned in the collation.
806 I.e., we use the value for UNDEFINED. */
807 lastp
= &collate
->undefined
;
810 /* The entry for the simple character is always found at
813 while (lastp
->next
!= NULL
&& wcscmp (name
, lastp
->name
))
815 if (lastp
->ordering
== NULL
)
816 lastp
= &collate
->undefined
;
819 weights
= lastp
->ordering
;
823 dummy_weights
[0] = 1;
824 dummy_weights
[collate
->nrules
]
825 = (unsigned int) ((unsigned long int) data
);
827 weights
= dummy_weights
;
830 /* In LASTP->ordering we now have the collation class.
831 Determine the place in the hashing table next. */
832 hash_val
= hash_string (key
, keylen
);
833 idx
= hash_val
% symbols_hash_tab_size
;
835 if (symbols_hash_tab
[2 * idx
] != (~((u_int32_t
) 0)))
837 /* We need the second hashing function. */
838 size_t c
= 1 + (hash_val
% (symbols_hash_tab_size
- 2));
841 if (idx
>= symbols_hash_tab_size
- c
)
842 idx
-= symbols_hash_tab_size
- c
;
845 while (symbols_hash_tab
[2 * idx
] != (~((u_int32_t
) 0)));
848 symbols_hash_tab
[2 * idx
] = obstack_object_size (&string_pool
);
849 symbols_hash_tab
[2 * idx
+ 1] = (obstack_object_size (&non_simple
)
850 / sizeof (u_int32_t
));
852 obstack_grow0 (&string_pool
, key
, keylen
);
853 /* Adding the first weight looks complicated. We have to deal
854 with the kind it is stored and with the fact that original
855 form uses `unsigned int's while we need `u_int32_t' here. */
857 obstack_grow (&non_simple
, &word
, sizeof (u_int32_t
));
858 for (cnt
= 0; cnt
< weights
[0]; ++cnt
)
860 word
= weights
[collate
->nrules
+ cnt
];
861 obstack_grow (&non_simple
, &word
, sizeof (u_int32_t
));
865 if (hash_tab
== &charset
->char_table
)
866 hash_tab
= &collate
->elements
;
867 else if (hash_tab
== &collate
->elements
)
868 hash_tab
= &collate
->symbols
;
873 /* Now we have the complete tables. */
874 if (obstack_object_size (&string_pool
) % 4 != 0)
875 obstack_blank (&non_simple
, 4 - (obstack_object_size (&string_pool
) % 4));
876 symbols_string_pool_size
= obstack_object_size (&string_pool
);
877 symbols_string_pool
= obstack_finish (&string_pool
);
879 symbols_class_size
= obstack_object_size (&non_simple
);
880 symbols_class
= obstack_finish (&non_simple
);
882 /* Generate tables with other byte order. */
883 symbols_hash_tab_ob
= obstack_alloc (&non_simple
, (2 * symbols_hash_tab_size
884 * sizeof (u_int32_t
)));
885 for (cnt
= 0; cnt
< 2 * symbols_hash_tab_size
; ++cnt
)
886 symbols_hash_tab_ob
[cnt
] = SWAPU32 (symbols_hash_tab
[cnt
]);
888 symbols_class_ob
= obstack_alloc (&non_simple
, symbols_class_size
);
889 for (cnt
= 0; cnt
< symbols_class_size
/ 4; ++cnt
)
890 symbols_class_ob
[cnt
] = SWAPU32 (symbols_class
[cnt
]);
893 /* Store table addresses and lengths. */
894 #if __BYTE_ORDER == __BIG_ENDIAN
895 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB
)].iov_base
= table
;
896 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB
)].iov_len
897 = table_best
* level_best
* entry_size
* sizeof (table
[0]);
899 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL
)].iov_base
= table2
;
900 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL
)].iov_len
901 = table_best
* level_best
* entry_size
* sizeof (table
[0]);
903 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB
)].iov_base
= extra
;
904 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB
)].iov_len
= extra_len
;
906 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL
)].iov_base
= extra2
;
907 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL
)].iov_len
= extra_len
;
909 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB
)].iov_base
= table2
;
910 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB
)].iov_len
911 = table_best
* level_best
* entry_size
* sizeof (table
[0]);
913 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL
)].iov_base
= table
;
914 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL
)].iov_len
915 = table_best
* level_best
* entry_size
* sizeof (table
[0]);
917 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB
)].iov_base
= extra2
;
918 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB
)].iov_len
= extra_len
;
920 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL
)].iov_base
= extra
;
921 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL
)].iov_len
= extra_len
;
924 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_UNDEFINED
)].iov_base
= &undefined_offset
;
925 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_UNDEFINED
)].iov_len
= sizeof (u_int32_t
);
928 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_SIZE
)].iov_base
929 = &element_hash_tab_size
;
930 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_SIZE
)].iov_len
931 = sizeof (u_int32_t
);
933 #if __BYTE_ORDER == __BIG_ENDIAN
934 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB
)].iov_base
936 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB
)].iov_len
937 = 2 * element_hash_tab_size
* sizeof (u_int32_t
);
939 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL
)].iov_base
940 = element_hash_tab_ob
;
941 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL
)].iov_len
942 = 2 * element_hash_tab_size
* sizeof (u_int32_t
);
944 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL
)].iov_base
946 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL
)].iov_len
947 = 2 * element_hash_tab_size
* sizeof (u_int32_t
);
949 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB
)].iov_base
950 = element_hash_tab_ob
;
951 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB
)].iov_len
952 = 2 * element_hash_tab_size
* sizeof (u_int32_t
);
955 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_STR_POOL
)].iov_base
956 = element_string_pool
;
957 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_STR_POOL
)].iov_len
958 = element_string_pool_size
;
960 #if __BYTE_ORDER == __BIG_ENDIAN
961 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EB
)].iov_base
963 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EB
)].iov_len
964 = element_value_size
;
966 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EL
)].iov_base
968 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EL
)].iov_len
969 = element_value_size
;
971 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EL
)].iov_base
973 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EL
)].iov_len
974 = element_value_size
;
976 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EB
)].iov_base
978 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EB
)].iov_len
979 = element_value_size
;
982 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZE
)].iov_base
983 = &symbols_hash_tab_size
;
984 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZE
)].iov_len
985 = sizeof (u_int32_t
);
987 #if __BYTE_ORDER == __BIG_ENDIAN
988 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB
)].iov_base
990 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB
)].iov_len
991 = 2 * symbols_hash_tab_size
* sizeof (u_int32_t
);
993 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL
)].iov_base
994 = symbols_hash_tab_ob
;
995 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL
)].iov_len
996 = 2 * symbols_hash_tab_size
* sizeof (u_int32_t
);
998 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL
)].iov_base
1000 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL
)].iov_len
1001 = 2 * symbols_hash_tab_size
* sizeof (u_int32_t
);
1003 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB
)].iov_base
1004 = symbols_hash_tab_ob
;
1005 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB
)].iov_len
1006 = 2 * symbols_hash_tab_size
* sizeof (u_int32_t
);
1009 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_STR_POOL
)].iov_base
1010 = symbols_string_pool
;
1011 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_STR_POOL
)].iov_len
1012 = symbols_string_pool_size
;
1014 #if __BYTE_ORDER == __BIG_ENDIAN
1015 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EB
)].iov_base
1017 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EB
)].iov_len
1018 = symbols_class_size
;
1020 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EL
)].iov_base
1022 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EL
)].iov_len
1023 = symbols_class_size
;
1025 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EL
)].iov_base
1027 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EL
)].iov_len
1028 = symbols_class_size
;
1030 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EB
)].iov_base
1032 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EB
)].iov_len
1033 = symbols_class_size
;
1036 /* Update idx array. */
1037 idx
[0] = iov
[0].iov_len
+ iov
[1].iov_len
;
1038 for (cnt
= 1; cnt
< nelems
; ++cnt
)
1039 idx
[cnt
] = idx
[cnt
- 1] + iov
[1 + cnt
].iov_len
;
1041 write_locale_data (output_path
, "LC_COLLATE", 2 + nelems
, iov
);
1043 obstack_free (&non_simple
, NULL
);
1044 obstack_free (&string_pool
, NULL
);
1049 collate_element_to (struct linereader
*lr
, struct localedef_t
*locale
,
1050 struct token
*code
, struct charset_t
*charset
)
1052 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1056 if (collate
->combine_token
!= NULL
)
1058 free ((void *) collate
->combine_token
);
1059 collate
->combine_token
= NULL
;
1062 value
= charset_find_value (&charset
->char_table
, code
->val
.str
.start
,
1064 if ((wchar_t) value
!= ILLEGAL_CHAR_VALUE
)
1066 lr_error (lr
, _("symbol for multicharacter collating element "
1067 "`%.*s' duplicates symbolic name in charset"),
1068 (int) code
->val
.str
.len
, code
->val
.str
.start
);
1072 if (find_entry (&collate
->elements
, code
->val
.str
.start
, code
->val
.str
.len
,
1075 lr_error (lr
, _("symbol for multicharacter collating element "
1076 "`%.*s' duplicates element definition"),
1077 (int) code
->val
.str
.len
, code
->val
.str
.start
);
1081 if (find_entry (&collate
->elements
, code
->val
.str
.start
, code
->val
.str
.len
,
1084 lr_error (lr
, _("symbol for multicharacter collating element "
1085 "`%.*s' duplicates symbol definition"),
1086 (int) code
->val
.str
.len
, code
->val
.str
.start
);
1090 collate
->combine_token
= code
->val
.str
.start
;
1091 collate
->combine_token_len
= code
->val
.str
.len
;
1096 collate_element_from (struct linereader
*lr
, struct localedef_t
*locale
,
1097 struct token
*code
, struct charset_t
*charset
)
1099 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1100 element_t
*elemp
, *runp
;
1102 /* CODE is a string. */
1103 elemp
= (element_t
*) obstack_alloc (&collate
->element_mem
,
1104 sizeof (element_t
));
1106 /* We have to translate the string. It may contain <...> character
1108 elemp
->name
= (wchar_t *) translate_string (code
->val
.str
.start
, charset
);
1109 elemp
->this_weight
= 0;
1110 elemp
->ordering
= NULL
;
1111 elemp
->ordering_len
= 0;
1113 free (code
->val
.str
.start
);
1115 if (elemp
->name
== NULL
)
1117 /* At least one character in the string is not defined. We simply
1121 `from' string in collation element declaration contains unknown character"));
1125 if (elemp
->name
[0] == L
'\0' || elemp
->name
[1] == L
'\0')
1127 lr_error (lr
, _("illegal collation element"));
1131 /* The entries in the linked lists of RESULT are sorting in
1132 descending order. The order is important for the `strcoll' and
1133 `wcscoll' functions. */
1134 if (find_entry (&collate
->result
, elemp
->name
, sizeof (wchar_t),
1135 (void *) &runp
) >= 0)
1137 /* We already have an entry with this key. Check whether it is
1139 element_t
*prevp
= NULL
;
1144 cmpres
= wcscmp (elemp
->name
, runp
->name
);
1149 while ((runp
= runp
->next
) != NULL
);
1152 lr_error (lr
, _("duplicate collating element definition"));
1158 if (set_entry (&collate
->result
, elemp
->name
, sizeof (wchar_t),
1160 error (EXIT_FAILURE
, 0, _("\
1161 error while inserting collation element into hash table"));
1164 prevp
->next
= elemp
;
1170 if (insert_entry (&collate
->result
, elemp
->name
, sizeof (wchar_t), elemp
)
1172 error (EXIT_FAILURE
, errno
, _("error while inserting to hash table"));
1175 if (insert_entry (&collate
->elements
, collate
->combine_token
,
1176 collate
->combine_token_len
, (void *) elemp
) < 0)
1177 lr_error (lr
, _("cannot insert new collating symbol definition: %s"),
1183 collate_symbol (struct linereader
*lr
, struct localedef_t
*locale
,
1184 struct token
*code
, struct charset_t
*charset
)
1186 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1190 value
= charset_find_value (&charset
->char_table
, code
->val
.str
.start
,
1192 if (value
!= ILLEGAL_CHAR_VALUE
)
1194 lr_error (lr
, _("symbol for multicharacter collating element "
1195 "`%.*s' duplicates symbolic name in charset"),
1196 (int) code
->val
.str
.len
, code
->val
.str
.start
);
1200 if (find_entry (&collate
->elements
, code
->val
.str
.start
, code
->val
.str
.len
,
1203 lr_error (lr
, _("symbol for multicharacter collating element "
1204 "`%.*s' duplicates element definition"),
1205 (int) code
->val
.str
.len
, code
->val
.str
.start
);
1209 if (find_entry (&collate
->symbols
, code
->val
.str
.start
, code
->val
.str
.len
,
1212 lr_error (lr
, _("symbol for multicharacter collating element "
1213 "`%.*s' duplicates other symbol definition"),
1214 (int) code
->val
.str
.len
, code
->val
.str
.start
);
1218 if (insert_entry (&collate
->symbols
, code
->val
.str
.start
, code
->val
.str
.len
,
1220 lr_error (lr
, _("cannot insert new collating symbol definition: %s"),
1226 collate_new_order (struct linereader
*lr
, struct localedef_t
*locale
,
1227 enum coll_sort_rule sort_rule
)
1229 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1231 if (collate
->nrules
>= collate
->nrules_max
)
1233 collate
->nrules_max
*= 2;
1235 = (enum coll_sort_rule
*) xrealloc (collate
->rules
,
1237 * sizeof (enum coll_sort_rule
));
1240 collate
->rules
[collate
->nrules
++] = sort_rule
;
1245 collate_build_arrays (struct linereader
*lr
, struct localedef_t
*locale
)
1247 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1250 = (enum coll_sort_rule
*) xrealloc (collate
->rules
,
1252 * sizeof (enum coll_sort_rule
));
1254 /* Allocate arrays for temporary weights. */
1255 collate
->weight_cnt
= (int *) xmalloc (collate
->nrules
* sizeof (int));
1257 /* Choose arbitrary start value for table size. */
1258 collate
->nweight_max
= 5 * collate
->nrules
;
1259 collate
->weight
= (int *) xmalloc (collate
->nweight_max
* sizeof (int));
1264 collate_order_elem (struct linereader
*lr
, struct localedef_t
*locale
,
1265 struct token
*code
, struct charset_t
*charset
)
1267 const wchar_t zero
= L
'\0';
1268 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1277 /* We have a string to find in one of the three hashing tables. */
1278 value
= charset_find_value (&charset
->char_table
, code
->val
.str
.start
,
1280 if (value
!= ILLEGAL_CHAR_VALUE
)
1282 element_t
*lastp
, *firstp
;
1284 collate
->kind
= character
;
1286 if (find_entry (&collate
->result
, &value
, sizeof (wchar_t),
1287 (void *) &firstp
) < 0)
1288 firstp
= lastp
= NULL
;
1291 /* The entry for the simple character is always found at
1294 while (lastp
->next
!= NULL
)
1295 lastp
= lastp
->next
;
1297 if (lastp
->name
[0] == value
&& lastp
->name
[1] == L
'\0')
1299 lr_error (lr
, _("duplicate definition for character `%.*s'"),
1300 (int) code
->val
.str
.len
, code
->val
.str
.start
);
1301 lr_ignore_rest (lr
, 0);
1307 collate
->current_element
1308 = (element_t
*) obstack_alloc (&collate
->element_mem
,
1309 sizeof (element_t
));
1311 obstack_grow (&collate
->element_mem
, &value
, sizeof (value
));
1312 obstack_grow (&collate
->element_mem
, &zero
, sizeof (zero
));
1314 collate
->current_element
->name
=
1315 (const wchar_t *) obstack_finish (&collate
->element_mem
);
1317 collate
->current_element
->this_weight
= ++collate
->order_cnt
;
1319 collate
->current_element
->next
= NULL
;
1323 if (insert_entry (&collate
->result
, &value
, sizeof (wchar_t),
1324 (void *) collate
->current_element
) < 0)
1326 lr_error (lr
, _("cannot insert collation element `%.*s'"),
1327 (int) code
->val
.str
.len
, code
->val
.str
.start
);
1332 lastp
->next
= collate
->current_element
;
1334 else if (find_entry (&collate
->elements
, code
->val
.str
.start
,
1335 code
->val
.str
.len
, &tmp
) >= 0)
1337 collate
->current_element
= (element_t
*) tmp
;
1339 if (collate
->current_element
->this_weight
!= 0)
1342 collation element `%.*s' appears more than once: ignore line"),
1343 (int) code
->val
.str
.len
, code
->val
.str
.start
);
1344 lr_ignore_rest (lr
, 0);
1349 collate
->kind
= element
;
1350 collate
->current_element
->this_weight
= ++collate
->order_cnt
;
1352 else if (find_entry (&collate
->symbols
, code
->val
.str
.start
,
1353 code
->val
.str
.len
, &tmp
) >= 0)
1355 unsigned int order
= ++collate
->order_cnt
;
1357 if ((unsigned long int) tmp
!= 0ul)
1360 collation symbol `%.*s' appears more than once: ignore line"),
1361 (int) code
->val
.str
.len
, code
->val
.str
.start
);
1362 lr_ignore_rest (lr
, 0);
1367 collate
->kind
= symbol
;
1369 if (set_entry (&collate
->symbols
, code
->val
.str
.start
,
1370 code
->val
.str
.len
, (void *) order
) < 0)
1372 lr_error (lr
, _("cannot process order specification"));
1379 lr_error (lr
, _("unknown symbol `%.*s': line ignored"),
1380 (int) code
->val
.str
.len
, code
->val
.str
.start
);
1381 lr_ignore_rest (lr
, 0);
1388 collate
->kind
= undefined
;
1389 collate
->current_element
= &collate
->undefined
;
1393 if (collate
->was_ellipsis
)
1396 two lines in a row containing `...' are not allowed"));
1399 else if (collate
->kind
!= character
)
1401 /* An ellipsis requires the previous line to be an
1402 character definition. */
1404 line before ellipsis does not contain definition for character constant"));
1405 lr_ignore_rest (lr
, 0);
1409 collate
->kind
= ellipsis
;
1413 assert (! "illegal token in `collate_order_elem'");
1416 /* Now it's time to handle the ellipsis in the previous line. We do
1417 this only when the last line contained an definition for a
1418 character, the current line also defines an character, the
1419 character code for the later is bigger than the former. */
1420 if (collate
->was_ellipsis
)
1422 if (collate
->kind
!= character
)
1425 line after ellipsis must contain character definition"));
1426 lr_ignore_rest (lr
, 0);
1429 else if (collate
->last_char
> value
)
1431 lr_error (lr
, _("end point of ellipsis range is bigger then start"));
1432 lr_ignore_rest (lr
, 0);
1437 /* We can fill the arrays with the information we need. */
1443 name
[0] = collate
->last_char
+ 1;
1446 data
= (unsigned int *) alloca ((collate
->nrules
+ collate
->nweight
)
1447 * sizeof (unsigned int));
1448 ptr
= (size_t *) alloca (collate
->nrules
* sizeof (size_t));
1450 if (data
== NULL
|| ptr
== NULL
)
1451 error (4, 0, _("memory exhausted"));
1453 /* Prepare data. Because the characters covered by an
1454 ellipsis all have equal values we prepare the data once
1455 and only change the variable number (if there are any).
1456 PTR[...] will point to the entries which will have to be
1457 fixed during the output loop. */
1458 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
1460 data
[cnt
] = collate
->weight_cnt
[cnt
];
1461 ptr
[cnt
] = (cnt
== 0
1463 : ptr
[cnt
- 1] + collate
->weight_cnt
[cnt
- 1]);
1466 for (cnt
= 0; cnt
< collate
->nweight
; ++cnt
)
1467 data
[collate
->nrules
+ cnt
] = collate
->weight
[cnt
];
1469 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
1470 if ((wchar_t) data
[ptr
[cnt
]] != ELLIPSIS_CHAR
)
1473 while (name
[0] <= value
)
1477 pelem
= (element_t
*) obstack_alloc (&collate
->element_mem
,
1478 sizeof (element_t
));
1480 error (4, 0, _("memory exhausted"));
1483 = (const wchar_t *) obstack_copy (&collate
->element_mem
,
1484 name
, 2 * sizeof (wchar_t));
1485 pelem
->this_weight
= ++collate
->order_cnt
;
1487 pelem
->ordering_len
= collate
->nweight
;
1489 = (unsigned int *) obstack_copy (&collate
->element_mem
, data
,
1491 + pelem
->ordering_len
)
1492 * sizeof (unsigned int));
1494 /* `...' weights need to be adjusted. */
1495 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
1497 pelem
->ordering
[ptr
[cnt
]] = pelem
->this_weight
;
1499 /* Insert new entry into result table. */
1500 if (find_entry (&collate
->result
, name
, sizeof (wchar_t),
1501 (void *) &pelem
->next
) >= 0)
1503 if (set_entry (&collate
->result
, name
, sizeof (wchar_t),
1504 (void *) pelem
) < 0)
1505 error (4, 0, _("cannot insert into result table"));
1510 if (insert_entry (&collate
->result
, name
, sizeof (wchar_t),
1511 (void *) pelem
) < 0)
1512 error (4, 0, _("cannot insert into result table"));
1515 /* Increment counter. */
1521 /* Reset counters for weights. */
1522 collate
->weight_idx
= 0;
1523 collate
->nweight
= 0;
1524 for (i
= 0; i
< collate
->nrules
; ++i
)
1525 collate
->weight_cnt
[i
] = 0;
1526 collate
->current_patch
= NULL
;
1533 collate_weight_bsymbol (struct linereader
*lr
, struct localedef_t
*locale
,
1534 struct token
*code
, struct charset_t
*charset
)
1536 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1537 unsigned int here_weight
;
1541 assert (code
->tok
== tok_bsymbol
);
1543 value
= charset_find_value (&charset
->char_table
, code
->val
.str
.start
,
1545 if (value
!= ILLEGAL_CHAR_VALUE
)
1549 if (find_entry (&collate
->result
, &value
, sizeof (wchar_t),
1554 && (runp
->name
[0] != value
|| runp
->name
[1] != L
'\0'))
1557 here_weight
= runp
== NULL
? 0 : runp
->this_weight
;
1559 else if (find_entry (&collate
->elements
, code
->val
.str
.start
,
1560 code
->val
.str
.len
, &tmp
) >= 0)
1562 element_t
*runp
= (element_t
*) tmp
;
1564 here_weight
= runp
->this_weight
;
1566 else if (find_entry (&collate
->symbols
, code
->val
.str
.start
,
1567 code
->val
.str
.len
, &tmp
) >= 0)
1569 here_weight
= (unsigned int) tmp
;
1574 lr_error (lr
, _("unknown symbol `%.*s': line ignored"),
1575 (int) code
->val
.str
.len
, code
->val
.str
.start
);
1576 lr_ignore_rest (lr
, 0);
1580 /* When we currently work on a collation symbol we do not expect any
1582 if (collate
->kind
== symbol
)
1585 specification of sorting weight for collation symbol does not make sense"));
1586 lr_ignore_rest (lr
, 0);
1590 /* Add to the current collection of weights. */
1591 if (collate
->nweight
>= collate
->nweight_max
)
1593 collate
->nweight_max
*= 2;
1594 collate
->weight
= (unsigned int *) xrealloc (collate
->weight
,
1595 collate
->nweight_max
);
1598 /* If the weight is currently not known, we remember to patch the
1599 resulting tables. */
1600 if (here_weight
== 0)
1604 newp
= (patch_t
*) obstack_alloc (&collate
->element_mem
,
1606 newp
->fname
= lr
->fname
;
1607 newp
->lineno
= lr
->lineno
;
1608 newp
->token
= (const char *) obstack_copy0 (&collate
->element_mem
,
1609 code
->val
.str
.start
,
1611 newp
->where
.idx
= collate
->nweight
++;
1612 newp
->next
= collate
->current_patch
;
1613 collate
->current_patch
= newp
;
1616 collate
->weight
[collate
->nweight
++] = here_weight
;
1617 ++collate
->weight_cnt
[collate
->weight_idx
];
1624 collate_next_weight (struct linereader
*lr
, struct localedef_t
*locale
)
1626 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1628 if (collate
->kind
== symbol
)
1631 specification of sorting weight for collation symbol does not make sense"));
1632 lr_ignore_rest (lr
, 0);
1636 ++collate
->weight_idx
;
1637 if (collate
->weight_idx
>= collate
->nrules
)
1639 lr_error (lr
, _("too many weights"));
1640 lr_ignore_rest (lr
, 0);
1649 collate_simple_weight (struct linereader
*lr
, struct localedef_t
*locale
,
1650 struct token
*code
, struct charset_t
*charset
)
1652 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1653 unsigned int value
= 0;
1655 /* There current tokens can be `IGNORE', `...', or a string. */
1659 /* This token is allowed in all situations. */
1660 value
= IGNORE_CHAR
;
1664 /* The ellipsis is only allowed for the `...' or `UNDEFINED'
1666 if (collate
->kind
!= ellipsis
&& collate
->kind
!= undefined
)
1669 `...' must only be used in `...' and `UNDEFINED' entries"));
1670 lr_ignore_rest (lr
, 0);
1673 value
= ELLIPSIS_CHAR
;
1677 /* This can become difficult. We have to get the weights which
1678 correspond to the single wide chars in the string. But some
1679 of the `chars' might not be real characters, but collation
1680 elements or symbols. And so the string decoder might have
1681 signaled errors. The string at this point is not translated.
1682 I.e., all <...> sequences are still there. */
1684 char *runp
= code
->val
.str
.start
;
1687 while (*runp
!= '\0')
1689 char *startp
= (char *) runp
;
1690 char *putp
= (char *) runp
;
1693 /* Lookup weight for char and store it. */
1696 while (*++runp
!= '\0' && *runp
!= '>')
1698 if (*runp
== lr
->escape_char
)
1699 if (*++runp
== '\0')
1701 lr_error (lr
, _("unterminated weight name"));
1702 lr_ignore_rest (lr
, 0);
1712 lr_error (lr
, _("empty weight name: line ignored"));
1713 lr_ignore_rest (lr
, 0);
1717 wch
= charset_find_value (&charset
->char_table
, startp
,
1719 if (wch
!= ILLEGAL_CHAR_VALUE
)
1723 if (find_entry (&collate
->result
, &wch
, sizeof (wchar_t),
1724 (void *)&pelem
) < 0)
1727 while (pelem
!= NULL
1728 && (pelem
->name
[0] != wch
1729 || pelem
->name
[1] != L
'\0'))
1730 pelem
= pelem
->next
;
1732 value
= pelem
== NULL
? 0 : pelem
->this_weight
;
1734 else if (find_entry (&collate
->elements
, startp
, putp
- startp
,
1737 element_t
*pelem
= (element_t
*) tmp
;
1739 value
= pelem
->this_weight
;
1741 else if (find_entry (&collate
->symbols
, startp
, putp
- startp
,
1744 value
= (unsigned int) tmp
;
1749 lr_error (lr
, _("unknown symbol `%.*s': line ignored"),
1750 (int) (putp
- startp
), startp
);
1751 lr_ignore_rest (lr
, 0);
1760 if (*runp
== lr
->escape_char
)
1762 static const char digits
[] = "0123456789abcdef";
1767 if (_tolower (*runp
) == 'x')
1772 else if (_tolower (*runp
) == 'd')
1780 dp
= strchr (digits
, _tolower (*runp
));
1781 if (dp
== NULL
|| (dp
- digits
) >= base
)
1785 illegal character constant in string"));
1786 lr_ignore_rest (lr
, 0);
1792 dp
= strchr (digits
, _tolower (*runp
));
1793 if (dp
== NULL
|| (dp
- digits
) >= base
)
1801 dp
= strchr (digits
, _tolower (*runp
));
1802 if (dp
!= NULL
&& (dp
- digits
< base
))
1811 wch
= (wchar_t) *runp
++;
1813 /* Lookup the weight for WCH. */
1814 if (find_entry (&collate
->result
, &wch
, sizeof (wch
),
1819 && (wp
->name
[0] != wch
|| wp
->name
[1] != L
'\0'))
1822 value
= wp
== NULL
? 0 : wp
->this_weight
;
1824 /* To get the correct name for the error message. */
1827 /**************************************************\
1828 |* I know here is something wrong. Characters in *|
1829 |* the string which are not in the <...> form *|
1830 |* cannot be declared forward for now!!! *|
1831 \**************************************************/
1834 /* Store in weight array. */
1835 if (collate
->nweight
>= collate
->nweight_max
)
1837 collate
->nweight_max
*= 2;
1839 = (unsigned int *) xrealloc (collate
->weight
,
1840 collate
->nweight_max
);
1847 newp
= (patch_t
*) obstack_alloc (&collate
->element_mem
,
1849 newp
->fname
= lr
->fname
;
1850 newp
->lineno
= lr
->lineno
;
1852 = (const char *) obstack_copy0 (&collate
->element_mem
,
1853 startp
, putp
- startp
);
1854 newp
->where
.idx
= collate
->nweight
++;
1855 newp
->next
= collate
->current_patch
;
1856 collate
->current_patch
= newp
;
1859 collate
->weight
[collate
->nweight
++] = value
;
1860 ++collate
->weight_cnt
[collate
->weight_idx
];
1866 assert (! "should not happen");
1870 if (collate
->nweight
>= collate
->nweight_max
)
1872 collate
->nweight_max
*= 2;
1873 collate
->weight
= (unsigned int *) xrealloc (collate
->weight
,
1874 collate
->nweight_max
);
1877 collate
->weight
[collate
->nweight
++] = value
;
1878 ++collate
->weight_cnt
[collate
->weight_idx
];
1885 collate_end_weight (struct linereader
*lr
, struct localedef_t
*locale
)
1887 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1888 element_t
*pelem
= collate
->current_element
;
1890 if (collate
->kind
== symbol
)
1892 /* We don't have to do anything. */
1893 collate
->was_ellipsis
= 0;
1897 if (collate
->kind
== ellipsis
)
1899 /* Before the next line is processed the ellipsis is handled. */
1900 collate
->was_ellipsis
= 1;
1904 assert (collate
->kind
== character
|| collate
->kind
== element
1905 || collate
->kind
== undefined
);
1907 /* Fill in the missing weights. */
1908 while (++collate
->weight_idx
< collate
->nrules
)
1910 collate
->weight
[collate
->nweight
++] = pelem
->this_weight
;
1911 ++collate
->weight_cnt
[collate
->weight_idx
];
1914 /* Now we know how many ordering weights the current
1915 character/element has. Allocate room in the element structure
1916 and copy information. */
1917 pelem
->ordering_len
= collate
->nweight
;
1919 /* First we write an array with the number of values for each
1921 obstack_grow (&collate
->element_mem
, collate
->weight_cnt
,
1922 collate
->nrules
* sizeof (unsigned int));
1924 /* Now the weights itselves. */
1925 obstack_grow (&collate
->element_mem
, collate
->weight
,
1926 collate
->nweight
* sizeof (unsigned int));
1929 pelem
->ordering
= obstack_finish (&collate
->element_mem
);
1931 /* Now we handle the "patches". */
1932 while (collate
->current_patch
!= NULL
)
1934 patch_t
*this_patch
;
1936 this_patch
= collate
->current_patch
;
1938 this_patch
->where
.pos
= &pelem
->ordering
[collate
->nrules
1939 + this_patch
->where
.idx
];
1941 collate
->current_patch
= this_patch
->next
;
1942 this_patch
->next
= collate
->all_patches
;
1943 collate
->all_patches
= this_patch
;
1946 /* Set information for next round. */
1947 collate
->was_ellipsis
= 0;
1948 if (collate
->kind
!= undefined
)
1949 collate
->last_char
= pelem
->name
[0];