1 /* Copyright (C) 1995, 1996 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If
17 not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
33 #include "localeinfo.h"
35 #include "simple-hash.h"
36 #include "stringtrans.h"
37 #include "strlen-hash.h"
39 /* Uncomment the following line in the production version. */
40 /* #define NDEBUG 1 */
44 #define MAX(a, b) ((a) > (b) ? (a) : (b))
47 (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24))
50 /* What kind of symbols get defined? */
61 typedef struct patch_t
75 typedef struct element_t
78 unsigned int this_weight
;
80 struct element_t
*next
;
82 unsigned int *ordering
;
87 /* The real definition of the struct for the LC_COLLATE locale. */
88 struct locale_collate_t
90 /* Collate symbol table. Simple mapping to number. */
93 /* The collation elements. */
95 struct obstack element_mem
;
97 /* The result table. */
100 /* Sorting rules given in order_start line. */
102 u_int32_t nrules_max
;
103 enum coll_sort_rule
*rules
;
105 /* Used while recognizing symbol composed of multiple tokens
106 (collating-element). */
107 const char *combine_token
;
108 size_t combine_token_len
;
110 /* How many sorting order specifications so far. */
111 unsigned int order_cnt
;
113 /* Was lastline ellipsis? */
115 /* Value of last entry if was character. */
117 /* Current element. */
118 element_t
*current_element
;
119 /* What kind of symbol is current element. */
120 enum coll_symbol kind
;
122 /* While collecting the weigths we need some temporary space. */
123 unsigned int current_order
;
125 unsigned int weight_idx
;
126 unsigned int *weight
;
131 patch_t
*current_patch
;
132 patch_t
*all_patches
;
134 /* Room for the UNDEFINED information. */
136 unsigned int undefined_len
;
140 /* Be verbose? Defined in localedef.c. */
144 void *xmalloc (size_t __n
);
145 void *xrealloc (void *__p
, size_t __n
);
148 #define obstack_chunk_alloc xmalloc
149 #define obstack_chunk_free free
153 collate_startup (struct linereader
*lr
, struct localedef_t
*locale
,
154 struct charset_t
*charset
)
156 struct locale_collate_t
*collate
;
158 /* It is important that we always use UCS4 encoding for strings now. */
159 encoding_method
= ENC_UCS4
;
161 /* Allocate the needed room. */
162 locale
->categories
[LC_COLLATE
].collate
= collate
=
163 (struct locale_collate_t
*) xmalloc (sizeof (struct locale_collate_t
));
165 /* Allocate hash table for collating elements. */
166 if (init_hash (&collate
->elements
, 512))
167 error (4, 0, _("memory exhausted"));
168 collate
->combine_token
= NULL
;
169 obstack_init (&collate
->element_mem
);
171 /* Allocate hash table for collating elements. */
172 if (init_hash (&collate
->symbols
, 64))
173 error (4, 0, _("memory exhausted"));
175 /* Allocate hash table for result. */
176 if (init_hash (&collate
->result
, 512))
177 error (4, 0, _("memory exhausted"));
180 collate
->nrules_max
= 10;
182 = (enum coll_sort_rule
*) xmalloc (collate
->nrules_max
183 * sizeof (enum coll_sort_rule
));
185 collate
->order_cnt
= 1; /* The smallest weight is 2. */
187 collate
->was_ellipsis
= 0;
188 collate
->last_char
= L
'\0'; /* 0 because leading ellipsis is allowed. */
190 collate
->all_patches
= NULL
;
192 /* This tells us no UNDEFINED entry was found until now. */
193 collate
->undefined
.this_weight
= 0;
195 lr
->translate_strings
= 0;
200 collate_finish (struct localedef_t
*locale
, struct charset_t
*charset
)
202 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
206 /* Patch the constructed table so that forward references are
208 for (patch
= collate
->all_patches
; patch
!= NULL
; patch
= patch
->next
)
211 size_t toklen
= strlen (patch
->token
);
213 unsigned int value
= 0;
215 wch
= charset_find_value (charset
, patch
->token
, toklen
);
216 if (wch
!= ILLEGAL_CHAR_VALUE
)
220 if (find_entry (&collate
->result
, &wch
, sizeof (wchar_t),
223 for (; runp
!= NULL
; runp
= runp
->next
)
224 if (runp
->name
[0] == wch
&& runp
->name
[1] == L
'\0')
227 value
= runp
== NULL
? 0 : runp
->this_weight
;
229 else if (find_entry (&collate
->elements
, patch
->token
, toklen
, &ptmp
)
232 value
= ((element_t
*) ptmp
)->this_weight
;
234 else if (find_entry (&collate
->symbols
, patch
->token
, toklen
, &ptmp
)
237 value
= (unsigned long int) ptmp
;
243 error_at_line (0, 0, patch
->fname
, patch
->lineno
,
244 _("no weight defined for symbol `%s'"), patch
->token
);
246 *patch
->where
.pos
= value
;
249 /* If no definition for UNDEFINED is given, all characters in the
250 given charset must be specified. */
251 if (collate
->undefined
.ordering
== NULL
)
253 /**************************************************************\
254 |* XXX We should test whether really an unspecified character *|
255 |* exists before giving the message. *|
256 \**************************************************************/
259 error (0, 0, _("no definition of `UNDEFINED'"));
261 collate
->undefined
.ordering_len
= collate
->nrules
;
262 weight
= ++collate
->order_cnt
;
264 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
267 obstack_grow (&collate
->element_mem
, &one
, sizeof (one
));
270 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
271 obstack_grow (&collate
->element_mem
, &weight
, sizeof (weight
));
273 collate
->undefined
.ordering
= obstack_finish (&collate
->element_mem
);
276 collate
->undefined_len
= 2; /* For the name: 1 x wchar_t + L'\0'. */
277 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
278 collate
->undefined_len
+= 1 + collate
->undefined
.ordering
[cnt
];
284 collate_output (struct localedef_t
*locale
, struct charset_t
*charset
,
285 const char *output_path
)
287 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
288 u_int32_t table_size
, table_best
, level_best
, sum_best
;
293 const size_t nelems
= _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE
);
294 struct iovec iov
[2 + nelems
];
295 struct locale_file data
;
296 u_int32_t idx
[nelems
];
297 struct obstack non_simple
;
298 struct obstack string_pool
;
299 size_t cnt
, entry_size
;
300 u_int32_t undefined_offset
= UINT_MAX
;
301 u_int32_t
*table
, *extra
, *table2
, *extra2
;
303 u_int32_t element_hash_tab_size
;
304 u_int32_t
*element_hash_tab
;
305 u_int32_t
*element_hash_tab_ob
;
306 u_int32_t element_string_pool_size
;
307 char *element_string_pool
;
308 u_int32_t element_value_size
;
309 wchar_t *element_value
;
310 wchar_t *element_value_ob
;
311 u_int32_t symbols_hash_tab_size
;
312 u_int32_t
*symbols_hash_tab
;
313 u_int32_t
*symbols_hash_tab_ob
;
314 u_int32_t symbols_string_pool_size
;
315 char *symbols_string_pool
;
316 u_int32_t symbols_class_size
;
317 u_int32_t
*symbols_class
;
318 u_int32_t
*symbols_class_ob
;
319 hash_table
*hash_tab
;
320 unsigned int dummy_weights
[collate
->nrules
+ 1];
326 /* Compute table size. */
328 Computing table size for collation information might take a while..."),
330 for (table_size
= 256; table_size
< sum_best
; ++table_size
)
332 size_t hits
[table_size
];
333 unsigned int worst
= 1;
338 for (cnt
= 0; cnt
< 256; ++cnt
)
340 memset (&hits
[256], '\0', sizeof (hits
) - 256 * sizeof (size_t));
342 while (iterate_table (&collate
->result
, &last
, (const void **) &name
,
343 &len
, (void **) &pelem
) >= 0)
344 if (pelem
->ordering
!= NULL
&& pelem
->name
[0] > 0xff)
345 if (++hits
[(unsigned int) pelem
->name
[0] % table_size
] > worst
)
347 worst
= hits
[(unsigned int) pelem
->name
[0] % table_size
];
348 if (table_size
* worst
> sum_best
)
352 if (table_size
* worst
< sum_best
)
354 sum_best
= table_size
* worst
;
355 table_best
= table_size
;
359 assert (table_best
!= 0xffff || level_best
!= 0xffff);
360 fputs (_(" done\n"), stderr
);
362 obstack_init (&non_simple
);
363 obstack_init (&string_pool
);
365 data
.magic
= LIMAGIC (LC_COLLATE
);
367 iov
[0].iov_base
= (void *) &data
;
368 iov
[0].iov_len
= sizeof (data
);
370 iov
[1].iov_base
= (void *) idx
;
371 iov
[1].iov_len
= sizeof (idx
);
373 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_NRULES
)].iov_base
= &collate
->nrules
;
374 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_NRULES
)].iov_len
= sizeof (u_int32_t
);
376 table
= (u_int32_t
*) alloca (collate
->nrules
* sizeof (u_int32_t
));
377 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_RULES
)].iov_base
= table
;
378 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_RULES
)].iov_len
379 = collate
->nrules
* sizeof (u_int32_t
);
380 /* Another trick here. Describing the collation method needs only a
381 few bits (3, to be exact). But the binary file should be
382 accessible by maschines with both endianesses and so we store both
383 information in the same word. */
384 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
385 table
[cnt
] = collate
->rules
[cnt
] | SWAPU32 (collate
->rules
[cnt
]);
387 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE
)].iov_base
= &table_best
;
388 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE
)].iov_len
= sizeof (u_int32_t
);
390 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS
)].iov_base
= &level_best
;
391 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS
)].iov_len
392 = sizeof (u_int32_t
);
394 entry_size
= 1 + MAX (collate
->nrules
, 2);
396 table
= (u_int32_t
*) alloca (table_best
* level_best
* entry_size
397 * sizeof (table
[0]));
398 memset (table
, '\0', table_best
* level_best
* entry_size
399 * sizeof (table
[0]));
402 /* Macros for inserting in output table. */
403 #define ADD_VALUE(expr) \
405 u_int32_t to_write = (u_int32_t) expr; \
406 obstack_grow (&non_simple, &to_write, sizeof (to_write)); \
409 #define ADD_ELEMENT(pelem, len) \
415 wlen = wcslen (pelem->name); \
416 obstack_grow (&non_simple, pelem->name, (wlen + 1) * sizeof (u_int32_t)); \
418 idx = collate->nrules; \
419 for (cnt = 0; cnt < collate->nrules; ++cnt) \
423 ADD_VALUE (pelem->ordering[cnt]); \
424 for (disp = 0; disp < pelem->ordering[cnt]; ++disp) \
425 ADD_VALUE (pelem->ordering[idx++]); \
429 #define ADD_FORWARD(pelem) \
431 /* We leave a reference in the main table and put all \
432 information in the table for the extended entries. */ \
434 element_t *has_simple = NULL; \
437 table[(level * table_best + slot) * entry_size + 1] \
439 table[(level * table_best + slot) * entry_size + 2] \
440 = obstack_object_size (&non_simple) / sizeof (u_int32_t); \
442 /* Here we have to construct the non-simple table entry. First \
443 compute the total length of this entry. */ \
444 for (runp = (pelem); runp != NULL; runp = runp->next) \
445 if (runp->ordering != NULL) \
450 value = 1 + wcslen (runp->name) + 1; \
452 for (cnt = 0; cnt < collate->nrules; ++cnt) \
453 /* We have to take care for entries without ordering \
454 information. While reading them they get inserted in the \
455 table and later not removed when something goes wrong with \
456 reading its weights. */ \
458 value += 1 + runp->ordering[cnt]; \
460 if (runp->name[1] == L'\0') \
464 ADD_ELEMENT (runp, value); \
467 if (has_simple == NULL) \
471 ADD_VALUE (collate->undefined_len + 1); \
473 /* Add the name. */ \
474 ADD_VALUE ((pelem)->name[0]); \
477 idx = collate->nrules; \
478 for (cnt = 0; cnt < collate->nrules; ++cnt) \
482 ADD_VALUE (collate->undefined.ordering[cnt]); \
483 for (disp = 0; disp < collate->undefined.ordering[cnt]; ++disp) \
485 if ((wchar_t) collate->undefined.ordering[idx] \
487 ADD_VALUE ((pelem)->name[0]); \
489 ADD_VALUE (collate->undefined.ordering[idx++]); \
498 /* Fill the table now. First we look for all the characters which
499 fit into one single byte. This speeds up the 8-bit string
502 while (iterate_table (&collate
->result
, &last
, (const void **) &name
,
503 &len
, (void **) &pelem
) >= 0)
504 if (pelem
->name
[0] <= 0xff)
506 /* We have a single byte name. Now we must distinguish
507 between entries in simple form (i.e., only one value per
508 weight and no collation element starting with the same
509 character) and those which are not. */
510 size_t slot
= ((size_t) pelem
->name
[0]);
511 const size_t level
= 0;
513 table
[slot
* entry_size
] = pelem
->name
[0];
515 if (pelem
->name
[1] == L
'\0' && pelem
->next
== NULL
516 && pelem
->ordering_len
== collate
->nrules
)
518 /* Yes, we have a simple one. Lucky us. */
521 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
522 table
[slot
* entry_size
+ 1 + cnt
]
523 = pelem
->ordering
[collate
->nrules
+ cnt
];
529 /* Now check for missing single byte entries. If one exist we fill
530 with the UNDEFINED entry. */
531 for (cnt
= 0; cnt
< 256; ++cnt
)
532 /* The first weight is never 0 for existing entries. */
533 if (table
[cnt
* entry_size
+ 1] == 0)
535 /* We have to fill in the information from the UNDEFINED
537 table
[cnt
* entry_size
] = (u_int32_t
) cnt
;
539 if (collate
->undefined
.ordering_len
== collate
->nrules
)
543 for (inner
= 0; inner
< collate
->nrules
; ++inner
)
544 if ((wchar_t)collate
->undefined
.ordering
[collate
->nrules
+ inner
]
546 table
[cnt
* entry_size
+ 1 + inner
] = cnt
;
548 table
[cnt
* entry_size
+ 1 + inner
]
549 = collate
->undefined
.ordering
[collate
->nrules
+ inner
];
553 if (undefined_offset
!= UINT_MAX
)
555 table
[cnt
* entry_size
+ 1] = FORWARD_CHAR
;
556 table
[cnt
* entry_size
+ 2] = undefined_offset
;
560 const size_t slot
= cnt
;
561 const size_t level
= 0;
563 ADD_FORWARD (&collate
->undefined
);
564 undefined_offset
= table
[cnt
* entry_size
+ 2];
569 /* Now we are ready for inserting the whole rest. */
571 while (iterate_table (&collate
->result
, &last
, (const void **) &name
,
572 &len
, (void **) &pelem
) >= 0)
573 if (pelem
->name
[0] > 0xff)
575 /* Find the position. */
576 size_t slot
= ((size_t) pelem
->name
[0]) % table_best
;
579 while (table
[(level
* table_best
+ slot
) * entry_size
+ 1] != 0)
581 assert (level
< level_best
);
583 if (pelem
->name
[1] == L
'\0' && pelem
->next
== NULL
584 && pelem
->ordering_len
== collate
->nrules
)
586 /* Again a simple entry. */
589 for (inner
= 0; inner
< collate
->nrules
; ++inner
)
590 table
[(level
* table_best
+ slot
) * entry_size
+ 1 + inner
]
591 = pelem
->ordering
[collate
->nrules
+ inner
];
597 /* Add the UNDEFINED entry. */
599 /* Here we have to construct the non-simple table entry. */
602 undefined_offset
= obstack_object_size (&non_simple
);
604 idx
= collate
->nrules
;
605 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
609 ADD_VALUE (collate
->undefined
.ordering
[cnt
]);
610 for (disp
= 0; disp
< collate
->undefined
.ordering
[cnt
]; ++disp
)
611 ADD_VALUE (collate
->undefined
.ordering
[idx
++]);
615 /* Finish the extra block. */
616 extra_len
= obstack_object_size (&non_simple
);
617 extra
= (u_int32_t
*) obstack_finish (&non_simple
);
618 assert ((extra_len
% sizeof (u_int32_t
)) == 0);
620 /* Now we have to build the two array for the other byte ordering. */
621 table2
= (u_int32_t
*) alloca (table_best
* level_best
* entry_size
622 * sizeof (table
[0]));
623 extra2
= (u_int32_t
*) alloca (extra_len
);
625 for (cnt
= 0; cnt
< table_best
* level_best
* entry_size
; ++cnt
)
626 table2
[cnt
] = SWAPU32 (table
[cnt
]);
628 for (cnt
= 0; cnt
< extra_len
/ sizeof (u_int32_t
); ++cnt
)
629 extra2
[cnt
] = SWAPU32 (extra2
[cnt
]);
631 /* We need a simple hashing table to get a collation-element->chars
632 mapping. We again use internal hasing using a secondary hashing
635 Each string has an associate hashing value V, computed by a
636 fixed function. To locate the string we use open addressing with
637 double hashing. The first index will be V % M, where M is the
638 size of the hashing table. If no entry is found, iterating with
639 a second, independent hashing function takes place. This second
640 value will be 1 + V % (M - 2). The approximate number of probes
643 for unsuccessful search: (1 - N / M) ^ -1
644 for successful search: - (N / M) ^ -1 * ln (1 - N / M)
646 where N is the number of keys.
648 If we now choose M to be the next prime bigger than 4 / 3 * N,
649 we get the values 4 and 1.85 resp. Because unsuccesful searches
650 are unlikely this is a good value. Formulas: [Knuth, The Art of
651 Computer Programming, Volume 3, Sorting and Searching, 1973,
653 if (collate
->elements
.filled
== 0)
655 /* We don't need any element table since there are no collating
657 element_hash_tab_size
= 0;
658 element_hash_tab
= NULL
;
659 element_hash_tab_ob
= NULL
;
660 element_string_pool_size
= 0;
661 element_string_pool
= NULL
;
662 element_value_size
= 0;
663 element_value
= NULL
;
664 element_value_ob
= NULL
;
668 void *ptr
; /* Running pointer. */
669 const char *key
; /* Key for current bucket. */
670 size_t keylen
; /* Length of key data. */
671 const element_t
*data
; /* Data, i.e., the character sequence. */
673 element_hash_tab_size
= next_prime ((collate
->elements
.filled
* 4) / 3);
674 if (element_hash_tab_size
< 7)
675 /* We need a minimum to make the following code work. */
676 element_hash_tab_size
= 7;
678 element_hash_tab
= obstack_alloc (&non_simple
, (2 * element_hash_tab_size
679 * sizeof (u_int32_t
)));
680 memset (element_hash_tab
, '\377', (2 * element_hash_tab_size
681 * sizeof (u_int32_t
)));
684 while (iterate_table (&collate
->elements
, &ptr
, (const void **) &key
,
685 &keylen
, (void **) &data
) == 0)
687 size_t hash_val
= hash_string (key
, keylen
);
688 size_t idx
= hash_val
% element_hash_tab_size
;
690 if (element_hash_tab
[2 * idx
] != (~((u_int32_t
) 0)))
692 /* We need the second hashing function. */
693 size_t c
= 1 + (hash_val
% (element_hash_tab_size
- 2));
696 if (idx
>= element_hash_tab_size
- c
)
697 idx
-= element_hash_tab_size
- c
;
700 while (element_hash_tab
[2 * idx
] != (~((u_int32_t
) 0)));
703 element_hash_tab
[2 * idx
] = obstack_object_size (&non_simple
);
704 element_hash_tab
[2 * idx
+ 1] = (obstack_object_size (&string_pool
)
707 obstack_grow0 (&non_simple
, key
, keylen
);
708 obstack_grow (&string_pool
, data
->name
,
709 (wcslen (data
->name
) + 1) * sizeof (wchar_t));
712 if (obstack_object_size (&non_simple
) % 4 != 0)
713 obstack_blank (&non_simple
,
714 4 - (obstack_object_size (&non_simple
) % 4));
715 element_string_pool_size
= obstack_object_size (&non_simple
);
716 element_string_pool
= obstack_finish (&non_simple
);
718 element_value_size
= obstack_object_size (&string_pool
);
719 element_value
= obstack_finish (&string_pool
);
721 /* Create the tables for the other byte order. */
722 element_hash_tab_ob
= obstack_alloc (&non_simple
,
723 (2 * element_hash_tab_size
724 * sizeof (u_int32_t
)));
725 for (cnt
= 0; cnt
< 2 * element_hash_tab_size
; ++cnt
)
726 element_hash_tab_ob
[cnt
] = SWAPU32 (element_hash_tab
[cnt
]);
728 element_value_ob
= obstack_alloc (&string_pool
, element_value_size
);
729 if (sizeof (wchar_t) != 4)
731 fputs ("sizeof (wchar_t) != 4 currently not handled", stderr
);
734 for (cnt
= 0; cnt
< element_value_size
/ 4; ++cnt
)
735 element_value_ob
[cnt
] = SWAPU32 (element_value
[cnt
]);
738 /* Store collation elements as map to collation class. There are
739 three kinds of symbols:
743 We need to make a table which lets the user to access the primary
744 weight based on the symbol string. */
745 symbols_hash_tab_size
= next_prime ((4 * (charset
->char_table
.filled
746 + collate
->elements
.filled
747 + collate
->symbols
.filled
)) / 3);
748 symbols_hash_tab
= obstack_alloc (&non_simple
, (2 * symbols_hash_tab_size
749 * sizeof (u_int32_t
)));
750 memset (symbols_hash_tab
, '\377', (2 * symbols_hash_tab_size
751 * sizeof (u_int32_t
)));
753 /* Now fill the array. First the symbols from the character set,
754 then the collation elements and last the collation symbols. */
755 hash_tab
= &charset
->char_table
;
758 void *ptr
; /* Running pointer. */
759 const char *key
; /* Key for current bucket. */
760 size_t keylen
; /* Length of key data. */
761 void *data
; /* Data. */
764 while (iterate_table (hash_tab
, &ptr
, (const void **) &key
,
765 &keylen
, (void **) &data
) == 0)
770 unsigned int *weights
;
772 if (hash_tab
== &charset
->char_table
773 || hash_tab
== &collate
->elements
)
775 element_t
*lastp
, *firstp
;
776 wchar_t dummy_name
[2];
780 if (hash_tab
== &charset
->char_table
)
782 dummy_name
[0] = (wchar_t) ((unsigned long int) data
);
783 dummy_name
[1] = L
'\0';
785 name_len
= sizeof (wchar_t);
789 element_t
*elemp
= (element_t
*) data
;
791 name_len
= wcslen (name
) * sizeof (wchar_t);
794 /* First check whether this character is used at all. */
795 if (find_entry (&collate
->result
, name
, name_len
,
796 (void *) &firstp
) < 0)
797 /* The symbol is not directly mentioned in the collation.
798 I.e., we use the value for UNDEFINED. */
799 lastp
= &collate
->undefined
;
802 /* The entry for the simple character is always found at
805 while (lastp
->next
!= NULL
&& wcscmp (name
, lastp
->name
))
809 weights
= lastp
->ordering
;
813 dummy_weights
[0] = 1;
814 dummy_weights
[collate
->nrules
]
815 = (unsigned int) ((unsigned long int) data
);
817 weights
= dummy_weights
;
820 /* In LASTP->ordering we now have the collation class.
821 Determine the place in the hashing table next. */
822 hash_val
= hash_string (key
, keylen
);
823 idx
= hash_val
% symbols_hash_tab_size
;
825 if (symbols_hash_tab
[2 * idx
] != (~((u_int32_t
) 0)))
827 /* We need the second hashing function. */
828 size_t c
= 1 + (hash_val
% (symbols_hash_tab_size
- 2));
831 if (idx
>= symbols_hash_tab_size
- c
)
832 idx
-= symbols_hash_tab_size
- c
;
835 while (symbols_hash_tab
[2 * idx
] != (~((u_int32_t
) 0)));
838 symbols_hash_tab
[2 * idx
] = obstack_object_size (&string_pool
);
839 symbols_hash_tab
[2 * idx
+ 1] = (obstack_object_size (&non_simple
)
840 / sizeof (u_int32_t
));
842 obstack_grow0 (&string_pool
, key
, keylen
);
843 /* Adding the first weight looks complicated. We have to deal
844 with the kind it is stored and with the fact that original
845 form uses `unsigned int's while we need `u_int32_t' here. */
847 obstack_grow (&non_simple
, &word
, sizeof (u_int32_t
));
848 for (cnt
= 0; cnt
< weights
[0]; ++cnt
)
850 word
= weights
[collate
->nrules
+ cnt
];
851 obstack_grow (&non_simple
, &word
, sizeof (u_int32_t
));
855 if (hash_tab
== &charset
->char_table
)
856 hash_tab
= &collate
->elements
;
857 else if (hash_tab
== &collate
->elements
)
858 hash_tab
= &collate
->symbols
;
863 /* Now we have the complete tables. */
864 if (obstack_object_size (&string_pool
) % 4 != 0)
865 obstack_blank (&non_simple
, 4 - (obstack_object_size (&string_pool
) % 4));
866 symbols_string_pool_size
= obstack_object_size (&string_pool
);
867 symbols_string_pool
= obstack_finish (&string_pool
);
869 symbols_class_size
= obstack_object_size (&non_simple
);
870 symbols_class
= obstack_finish (&non_simple
);
872 /* Generate tables with other byte order. */
873 symbols_hash_tab_ob
= obstack_alloc (&non_simple
, (2 * symbols_hash_tab_size
874 * sizeof (u_int32_t
)));
875 for (cnt
= 0; cnt
< 2 * symbols_hash_tab_size
; ++cnt
)
876 symbols_hash_tab_ob
[cnt
] = SWAPU32 (symbols_hash_tab
[cnt
]);
878 symbols_class_ob
= obstack_alloc (&non_simple
, symbols_class_size
);
879 for (cnt
= 0; cnt
< symbols_class_size
/ 4; ++cnt
)
880 symbols_class_ob
[cnt
] = SWAPU32 (symbols_class
[cnt
]);
883 /* Store table adresses and lengths. */
884 #if __BYTE_ORDER == __BIG_ENDIAN
885 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB
)].iov_base
= table
;
886 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB
)].iov_len
887 = table_best
* level_best
* entry_size
* sizeof (table
[0]);
889 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL
)].iov_base
= table2
;
890 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL
)].iov_len
891 = table_best
* level_best
* entry_size
* sizeof (table
[0]);
893 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB
)].iov_base
= extra
;
894 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB
)].iov_len
= extra_len
;
896 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL
)].iov_base
= extra2
;
897 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL
)].iov_len
= extra_len
;
899 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB
)].iov_base
= table2
;
900 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB
)].iov_len
901 = table_best
* level_best
* entry_size
* sizeof (table
[0]);
903 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL
)].iov_base
= table
;
904 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL
)].iov_len
905 = table_best
* level_best
* entry_size
* sizeof (table
[0]);
907 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB
)].iov_base
= extra2
;
908 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB
)].iov_len
= extra_len
;
910 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL
)].iov_base
= extra
;
911 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL
)].iov_len
= extra_len
;
914 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_UNDEFINED
)].iov_base
= &undefined_offset
;
915 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_UNDEFINED
)].iov_len
= sizeof (u_int32_t
);
918 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_SIZE
)].iov_base
919 = &element_hash_tab_size
;
920 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_SIZE
)].iov_len
921 = sizeof (u_int32_t
);
923 #if __BYTE_ORDER == __BIG_ENDIAN
924 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB
)].iov_base
926 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB
)].iov_len
927 = 2 * element_hash_tab_size
* sizeof (u_int32_t
);
929 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL
)].iov_base
930 = element_hash_tab_ob
;
931 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL
)].iov_len
932 = 2 * element_hash_tab_size
* sizeof (u_int32_t
);
934 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL
)].iov_base
936 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL
)].iov_len
937 = 2 * element_hash_tab_size
* sizeof (u_int32_t
);
939 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB
)].iov_base
940 = element_hash_tab_ob
;
941 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB
)].iov_len
942 = 2 * element_hash_tab_size
* sizeof (u_int32_t
);
945 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_STR_POOL
)].iov_base
946 = element_string_pool
;
947 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_STR_POOL
)].iov_len
948 = element_string_pool_size
;
950 #if __BYTE_ORDER == __BIG_ENDIAN
951 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EB
)].iov_base
953 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EB
)].iov_len
954 = element_value_size
;
956 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EL
)].iov_base
958 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EL
)].iov_len
959 = element_value_size
;
961 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EL
)].iov_base
963 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EL
)].iov_len
964 = element_value_size
;
966 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EB
)].iov_base
968 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EB
)].iov_len
969 = element_value_size
;
972 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZE
)].iov_base
973 = &symbols_hash_tab_size
;
974 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZE
)].iov_len
975 = sizeof (u_int32_t
);
977 #if __BYTE_ORDER == __BIG_ENDIAN
978 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB
)].iov_base
980 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB
)].iov_len
981 = 2 * symbols_hash_tab_size
* sizeof (u_int32_t
);
983 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL
)].iov_base
984 = symbols_hash_tab_ob
;
985 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL
)].iov_len
986 = 2 * symbols_hash_tab_size
* sizeof (u_int32_t
);
988 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL
)].iov_base
990 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL
)].iov_len
991 = 2 * symbols_hash_tab_size
* sizeof (u_int32_t
);
993 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB
)].iov_base
994 = symbols_hash_tab_ob
;
995 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB
)].iov_len
996 = 2 * symbols_hash_tab_size
* sizeof (u_int32_t
);
999 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_STR_POOL
)].iov_base
1000 = symbols_string_pool
;
1001 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_STR_POOL
)].iov_len
1002 = symbols_string_pool_size
;
1004 #if __BYTE_ORDER == __BIG_ENDIAN
1005 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EB
)].iov_base
1007 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EB
)].iov_len
1008 = symbols_class_size
;
1010 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EL
)].iov_base
1012 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EL
)].iov_len
1013 = symbols_class_size
;
1015 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EL
)].iov_base
1017 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EL
)].iov_len
1018 = symbols_class_size
;
1020 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EB
)].iov_base
1022 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EB
)].iov_len
1023 = symbols_class_size
;
1026 /* Update idx array. */
1027 idx
[0] = iov
[0].iov_len
+ iov
[1].iov_len
;
1028 for (cnt
= 1; cnt
< nelems
; ++cnt
)
1029 idx
[cnt
] = idx
[cnt
- 1] + iov
[1 + cnt
].iov_len
;
1031 write_locale_data (output_path
, "LC_COLLATE", 2 + nelems
, iov
);
1033 obstack_free (&non_simple
, NULL
);
1034 obstack_free (&string_pool
, NULL
);
1039 collate_element_to (struct linereader
*lr
, struct localedef_t
*locale
,
1040 struct token
*code
, struct charset_t
*charset
)
1042 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1046 if (collate
->combine_token
!= NULL
)
1048 free ((void *) collate
->combine_token
);
1049 collate
->combine_token
= NULL
;
1052 value
= charset_find_value (charset
, code
->val
.str
.start
, code
->val
.str
.len
);
1053 if ((wchar_t) value
!= ILLEGAL_CHAR_VALUE
)
1055 lr_error (lr
, _("symbol for multicharacter collating element "
1056 "`%.*s' duplicates symbolic name in charset"),
1057 (int) code
->val
.str
.len
, code
->val
.str
.start
);
1061 if (find_entry (&collate
->elements
, code
->val
.str
.start
, code
->val
.str
.len
,
1064 lr_error (lr
, _("symbol for multicharacter collating element "
1065 "`%.*s' duplicates other element definition"),
1066 (int) code
->val
.str
.len
, code
->val
.str
.start
);
1070 if (find_entry (&collate
->elements
, code
->val
.str
.start
, code
->val
.str
.len
,
1073 lr_error (lr
, _("symbol for multicharacter collating element "
1074 "`%.*s' duplicates symbol definition"),
1075 (int) code
->val
.str
.len
, code
->val
.str
.start
);
1079 collate
->combine_token
= code
->val
.str
.start
;
1080 collate
->combine_token_len
= code
->val
.str
.len
;
1085 collate_element_from (struct linereader
*lr
, struct localedef_t
*locale
,
1086 struct token
*code
, struct charset_t
*charset
)
1088 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1089 element_t
*elemp
, *runp
;
1091 /* CODE is a string. */
1092 elemp
= (element_t
*) obstack_alloc (&collate
->element_mem
,
1093 sizeof (element_t
));
1095 /* We have to translate the string. It may contain <...> character
1097 elemp
->name
= (wchar_t *) translate_string (code
->val
.str
.start
, charset
);
1098 elemp
->this_weight
= 0;
1099 elemp
->ordering
= NULL
;
1100 elemp
->ordering_len
= 0;
1102 free (code
->val
.str
.start
);
1104 if (elemp
->name
== NULL
)
1106 /* At least one character in the string is not defined. We simply
1110 `from' string in collation element declaration contains unknown character"));
1114 if (elemp
->name
[0] == L
'\0' || elemp
->name
[1] == L
'\0')
1116 lr_error (lr
, _("illegal collation element"));
1120 /* The entries in the linked lists of RESULT are sorting in
1121 descending order. The order is important for the `strcoll' and
1122 `wcscoll' functions. */
1123 if (find_entry (&collate
->result
, elemp
->name
, sizeof (wchar_t),
1124 (void *) &runp
) >= 0)
1126 /* We already have an entry with this key. Check whether it is
1128 element_t
*prevp
= NULL
;
1133 cmpres
= wcscmp (elemp
->name
, runp
->name
);
1138 while ((runp
= runp
->next
) != NULL
);
1141 lr_error (lr
, _("duplicate collating element definition"));
1147 if (set_entry (&collate
->result
, elemp
->name
, sizeof (wchar_t),
1149 error (EXIT_FAILURE
, 0, _("\
1150 error while inserting collation element into hash table"));
1153 prevp
->next
= elemp
;
1159 if (insert_entry (&collate
->result
, elemp
->name
, sizeof (wchar_t), elemp
)
1161 error (EXIT_FAILURE
, errno
, _("error while inserting to hash table"));
1164 if (insert_entry (&collate
->elements
, collate
->combine_token
,
1165 collate
->combine_token_len
, (void *) elemp
) < 0)
1166 lr_error (lr
, _("cannot insert new collating symbol definition: %s"),
1172 collate_symbol (struct linereader
*lr
, struct localedef_t
*locale
,
1173 struct token
*code
, struct charset_t
*charset
)
1175 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1179 value
= charset_find_value (charset
, code
->val
.str
.start
, code
->val
.str
.len
);
1180 if (value
!= ILLEGAL_CHAR_VALUE
)
1182 lr_error (lr
, _("symbol for multicharacter collating element "
1183 "`%.*s' duplicates symbolic name in charset"),
1184 (int) code
->val
.str
.len
, code
->val
.str
.start
);
1188 if (find_entry (&collate
->elements
, code
->val
.str
.start
, code
->val
.str
.len
,
1191 lr_error (lr
, _("symbol for multicharacter collating element "
1192 "`%.*s' duplicates element definition"),
1193 (int) code
->val
.str
.len
, code
->val
.str
.start
);
1197 if (find_entry (&collate
->symbols
, code
->val
.str
.start
, code
->val
.str
.len
,
1200 lr_error (lr
, _("symbol for multicharacter collating element "
1201 "`%.*s' duplicates other symbol definition"),
1202 (int) code
->val
.str
.len
, code
->val
.str
.start
);
1206 if (insert_entry (&collate
->symbols
, code
->val
.str
.start
, code
->val
.str
.len
,
1208 lr_error (lr
, _("cannot insert new collating symbol definition: %s"),
1214 collate_new_order (struct linereader
*lr
, struct localedef_t
*locale
,
1215 enum coll_sort_rule sort_rule
)
1217 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1219 if (collate
->nrules
>= collate
->nrules_max
)
1221 collate
->nrules_max
*= 2;
1223 = (enum coll_sort_rule
*) xrealloc (collate
->rules
,
1225 * sizeof (enum coll_sort_rule
));
1228 collate
->rules
[collate
->nrules
++] = sort_rule
;
1233 collate_build_arrays (struct linereader
*lr
, struct localedef_t
*locale
)
1235 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1238 = (enum coll_sort_rule
*) xrealloc (collate
->rules
,
1240 * sizeof (enum coll_sort_rule
));
1242 /* Allocate arrays for temporary weights. */
1243 collate
->weight_cnt
= (int *) xmalloc (collate
->nrules
* sizeof (int));
1245 /* Choose arbitrary start value for table size. */
1246 collate
->nweight_max
= 5 * collate
->nrules
;
1247 collate
->weight
= (int *) xmalloc (collate
->nweight_max
* sizeof (int));
1252 collate_order_elem (struct linereader
*lr
, struct localedef_t
*locale
,
1253 struct token
*code
, struct charset_t
*charset
)
1255 const wchar_t zero
= L
'\0';
1256 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1265 /* We have a string to find in one of the three hashing tables. */
1266 value
= charset_find_value (charset
, code
->val
.str
.start
,
1268 if (value
!= ILLEGAL_CHAR_VALUE
)
1270 element_t
*lastp
, *firstp
;
1272 collate
->kind
= character
;
1274 if (find_entry (&collate
->result
, &value
, sizeof (wchar_t),
1275 (void *) &firstp
) < 0)
1276 firstp
= lastp
= NULL
;
1279 /* The entry for the simple character is always found at
1282 while (lastp
->next
!= NULL
)
1283 lastp
= lastp
->next
;
1285 if (lastp
->name
[0] == value
&& lastp
->name
[1] == L
'\0')
1287 lr_error (lr
, _("duplicate definition for character `%.*s'"),
1288 (int) code
->val
.str
.len
, code
->val
.str
.start
);
1289 lr_ignore_rest (lr
, 0);
1295 collate
->current_element
1296 = (element_t
*) obstack_alloc (&collate
->element_mem
,
1297 sizeof (element_t
));
1299 obstack_grow (&collate
->element_mem
, &value
, sizeof (value
));
1300 obstack_grow (&collate
->element_mem
, &zero
, sizeof (zero
));
1302 collate
->current_element
->name
=
1303 (const wchar_t *) obstack_finish (&collate
->element_mem
);
1305 collate
->current_element
->this_weight
= ++collate
->order_cnt
;
1307 collate
->current_element
->next
= NULL
;
1311 if (insert_entry (&collate
->result
, &value
, sizeof (wchar_t),
1312 (void *) collate
->current_element
) < 0)
1314 lr_error (lr
, _("cannot insert collation element `%.*s'"),
1315 (int) code
->val
.str
.len
, code
->val
.str
.start
);
1320 lastp
->next
= collate
->current_element
;
1322 else if (find_entry (&collate
->elements
, code
->val
.str
.start
,
1323 code
->val
.str
.len
, &tmp
) >= 0)
1325 collate
->current_element
= (element_t
*) tmp
;
1327 if (collate
->current_element
->this_weight
!= 0)
1330 collation element `%.*s' appears more than once: ignore line"),
1331 (int) code
->val
.str
.len
, code
->val
.str
.start
);
1332 lr_ignore_rest (lr
, 0);
1337 collate
->kind
= element
;
1338 collate
->current_element
->this_weight
= ++collate
->order_cnt
;
1340 else if (find_entry (&collate
->symbols
, code
->val
.str
.start
,
1341 code
->val
.str
.len
, &tmp
) >= 0)
1343 unsigned int order
= ++collate
->order_cnt
;
1345 if ((unsigned long int) tmp
!= 0ul)
1348 collation symbol `%.*s' appears more than once: ignore line"),
1349 (int) code
->val
.str
.len
, code
->val
.str
.start
);
1350 lr_ignore_rest (lr
, 0);
1355 collate
->kind
= symbol
;
1357 if (set_entry (&collate
->symbols
, code
->val
.str
.start
,
1358 code
->val
.str
.len
, (void *) order
) < 0)
1360 lr_error (lr
, _("cannot process order specification"));
1367 lr_error (lr
, _("unknown symbol `%.*s': line ignored"),
1368 (int) code
->val
.str
.len
, code
->val
.str
.start
);
1369 lr_ignore_rest (lr
, 0);
1376 collate
->kind
= undefined
;
1377 collate
->current_element
= &collate
->undefined
;
1381 if (collate
->was_ellipsis
)
1384 two lines in a row containing `...' are not allowed"));
1387 else if (collate
->kind
!= character
)
1389 /* An ellipsis requires the previous line to be an
1390 character definition. */
1392 line before ellipsis does not contain definition for character constant"));
1393 lr_ignore_rest (lr
, 0);
1397 collate
->kind
= ellipsis
;
1401 assert (! "illegal token in `collate_order_elem'");
1404 /* Now it's time to handle the ellipsis in the previous line. We do
1405 this only when the last line contained an definition for a
1406 character, the current line also defines an character, the
1407 character code for the later is bigger than the former. */
1408 if (collate
->was_ellipsis
)
1410 if (collate
->kind
!= character
)
1413 line after ellipsis must contain character definition"));
1414 lr_ignore_rest (lr
, 0);
1417 else if (collate
->last_char
> value
)
1419 lr_error (lr
, _("end point of ellipsis range is bigger then start"));
1420 lr_ignore_rest (lr
, 0);
1425 /* We can fill the arrays with the information we need. */
1431 name
[0] = collate
->last_char
+ 1;
1434 data
= (unsigned int *) alloca ((collate
->nrules
+ collate
->nweight
)
1435 * sizeof (unsigned int));
1436 ptr
= (size_t *) alloca (collate
->nrules
* sizeof (size_t));
1438 if (data
== NULL
|| ptr
== NULL
)
1439 error (4, 0, _("memory exhausted"));
1441 /* Prepare data. Because the characters covered by an
1442 ellipsis all have equal values we prepare the data once
1443 and only change the variable number (if there are any).
1444 PTR[...] will point to the entries which will have to be
1445 fixed during the output loop. */
1446 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
1448 data
[cnt
] = collate
->weight_cnt
[cnt
];
1449 ptr
[cnt
] = (cnt
== 0
1451 : ptr
[cnt
- 1] + collate
->weight_cnt
[cnt
- 1]);
1454 for (cnt
= 0; cnt
< collate
->nweight
; ++cnt
)
1455 data
[collate
->nrules
+ cnt
] = collate
->weight
[cnt
];
1457 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
1458 if ((wchar_t) data
[ptr
[cnt
]] != ELLIPSIS_CHAR
)
1461 while (name
[0] <= value
)
1465 pelem
= (element_t
*) obstack_alloc (&collate
->element_mem
,
1466 sizeof (element_t
));
1468 error (4, 0, _("memory exhausted"));
1471 = (const wchar_t *) obstack_copy (&collate
->element_mem
,
1472 name
, 2 * sizeof (wchar_t));
1473 pelem
->this_weight
= ++collate
->order_cnt
;
1475 pelem
->ordering_len
= collate
->nweight
;
1477 = (unsigned int *) obstack_copy (&collate
->element_mem
, data
,
1479 * pelem
->ordering_len
)
1480 * sizeof (unsigned int));
1482 /* `...' weights need to be adjusted. */
1483 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
1485 pelem
->ordering
[ptr
[cnt
]] = pelem
->this_weight
;
1487 /* Insert new entry into result table. */
1488 if (find_entry (&collate
->result
, name
, sizeof (wchar_t),
1489 (void *) &pelem
->next
) >= 0)
1491 if (set_entry (&collate
->result
, name
, sizeof (wchar_t),
1492 (void *) pelem
->next
) < 0)
1493 error (4, 0, _("cannot insert into result table"));
1496 if (insert_entry (&collate
->result
, name
, sizeof (wchar_t),
1497 (void *) pelem
->next
) < 0)
1498 error (4, 0, _("cannot insert into result table"));
1500 /* Increment counter. */
1506 /* Reset counters for weights. */
1507 collate
->weight_idx
= 0;
1508 collate
->nweight
= 0;
1509 for (i
= 0; i
< collate
->nrules
; ++i
)
1510 collate
->weight_cnt
[i
] = 0;
1511 collate
->current_patch
= NULL
;
1518 collate_weight_bsymbol (struct linereader
*lr
, struct localedef_t
*locale
,
1519 struct token
*code
, struct charset_t
*charset
)
1521 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1522 unsigned int here_weight
;
1526 assert (code
->tok
== tok_bsymbol
);
1528 value
= charset_find_value (charset
, code
->val
.str
.start
, code
->val
.str
.len
);
1529 if (value
!= ILLEGAL_CHAR_VALUE
)
1533 if (find_entry (&collate
->result
, &value
, sizeof (wchar_t),
1538 && (runp
->name
[0] != value
|| runp
->name
[1] != L
'\0'))
1541 here_weight
= runp
== NULL
? 0 : runp
->this_weight
;
1543 else if (find_entry (&collate
->elements
, code
->val
.str
.start
,
1544 code
->val
.str
.len
, &tmp
) >= 0)
1546 element_t
*runp
= (element_t
*) tmp
;
1548 here_weight
= runp
->this_weight
;
1550 else if (find_entry (&collate
->symbols
, code
->val
.str
.start
,
1551 code
->val
.str
.len
, &tmp
) >= 0)
1553 here_weight
= (unsigned int) tmp
;
1558 lr_error (lr
, _("unknown symbol `%.*s': line ignored"),
1559 (int) code
->val
.str
.len
, code
->val
.str
.start
);
1560 lr_ignore_rest (lr
, 0);
1564 /* When we currently work on a collation symbol we do not expect any
1566 if (collate
->kind
== symbol
)
1569 specification of sorting weight for collation symbol does not make sense"));
1570 lr_ignore_rest (lr
, 0);
1574 /* Add to the current collection of weights. */
1575 if (collate
->nweight
>= collate
->nweight_max
)
1577 collate
->nweight_max
*= 2;
1578 collate
->weight
= (unsigned int *) xrealloc (collate
->weight
,
1579 collate
->nweight_max
);
1582 /* If the weight is currently not known, we remember to patch the
1583 resulting tables. */
1584 if (here_weight
== 0)
1588 newp
= (patch_t
*) obstack_alloc (&collate
->element_mem
,
1590 newp
->fname
= lr
->fname
;
1591 newp
->lineno
= lr
->lineno
;
1592 newp
->token
= (const char *) obstack_copy0 (&collate
->element_mem
,
1593 code
->val
.str
.start
,
1595 newp
->where
.idx
= collate
->nweight
++;
1596 newp
->next
= collate
->current_patch
;
1597 collate
->current_patch
= newp
;
1600 collate
->weight
[collate
->nweight
++] = here_weight
;
1601 ++collate
->weight_cnt
[collate
->weight_idx
];
1608 collate_next_weight (struct linereader
*lr
, struct localedef_t
*locale
)
1610 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1612 if (collate
->kind
== symbol
)
1615 specification of sorting weight for collation symbol does not make sense"));
1616 lr_ignore_rest (lr
, 0);
1620 ++collate
->weight_idx
;
1621 if (collate
->weight_idx
>= collate
->nrules
)
1623 lr_error (lr
, _("too many weights"));
1624 lr_ignore_rest (lr
, 0);
1633 collate_simple_weight (struct linereader
*lr
, struct localedef_t
*locale
,
1634 struct token
*code
, struct charset_t
*charset
)
1636 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1637 unsigned int value
= 0;
1639 /* There current tokens can be `IGNORE', `...', or a string. */
1643 /* This token is allowed in all situations. */
1644 value
= IGNORE_CHAR
;
1648 /* The ellipsis is only allowed for the `...' or `UNDEFINED'
1650 if (collate
->kind
!= ellipsis
&& collate
->kind
!= undefined
)
1653 `...' must only be used in `...' and `UNDEFINED' entries"));
1654 lr_ignore_rest (lr
, 0);
1657 value
= ELLIPSIS_CHAR
;
1661 /* This can become difficult. We have to get the weights which
1662 correspind the the single wide chars in the string. But some
1663 of the `chars' might not be real characters, but collation
1664 elements or symbols. And so the string decoder might have
1665 signaled errors. The string at this point is not translated.
1666 I.e., all <...> sequences are still there. */
1668 char *runp
= code
->val
.str
.start
;
1671 while (*runp
!= '\0')
1673 char *startp
= (char *) runp
;
1674 char *putp
= (char *) runp
;
1677 /* Lookup weight for char and store it. */
1680 while (*++runp
!= '\0' && *runp
!= '>')
1682 if (*runp
== lr
->escape_char
)
1683 if (*++runp
== '\0')
1685 lr_error (lr
, _("unterminated weight name"));
1686 lr_ignore_rest (lr
, 0);
1696 lr_error (lr
, _("empty weight name: line ignored"));
1697 lr_ignore_rest (lr
, 0);
1701 wch
= charset_find_value (charset
, startp
, putp
- startp
);
1702 if (wch
!= ILLEGAL_CHAR_VALUE
)
1706 if (find_entry (&collate
->result
, &wch
, sizeof (wchar_t),
1707 (void *)&pelem
) < 0)
1710 while (pelem
!= NULL
1711 && (pelem
->name
[0] != wch
1712 || pelem
->name
[1] != L
'\0'))
1713 pelem
= pelem
->next
;
1715 value
= pelem
== NULL
? 0 : pelem
->this_weight
;
1717 else if (find_entry (&collate
->elements
, startp
, putp
- startp
,
1720 element_t
*pelem
= (element_t
*) tmp
;
1722 value
= pelem
->this_weight
;
1724 else if (find_entry (&collate
->symbols
, startp
, putp
- startp
,
1727 value
= (unsigned int) tmp
;
1732 lr_error (lr
, _("unknown symbol `%.*s': line ignored"),
1733 (int) (putp
- startp
), startp
);
1734 lr_ignore_rest (lr
, 0);
1743 if (*runp
== lr
->escape_char
)
1745 static const char digits
[] = "0123456789abcdef";
1750 if (tolower (*runp
) == 'x')
1755 else if (tolower (*runp
) == 'd')
1763 dp
= strchr (digits
, tolower (*runp
));
1764 if (dp
== NULL
|| (dp
- digits
) >= base
)
1768 illegal character constant in string"));
1769 lr_ignore_rest (lr
, 0);
1775 dp
= strchr (digits
, tolower (*runp
));
1776 if (dp
== NULL
|| (dp
- digits
) >= base
)
1784 dp
= strchr (digits
, tolower (*runp
));
1785 if (dp
!= NULL
&& (dp
- digits
< base
))
1794 wch
= (wchar_t) *runp
++;
1796 /* Lookup the weight for WCH. */
1797 if (find_entry (&collate
->result
, &wch
, sizeof (wch
),
1802 && (wp
->name
[0] != wch
|| wp
->name
[1] != L
'\0'))
1805 value
= wp
== NULL
? 0 : wp
->this_weight
;
1807 /* To get the correct name for the error message. */
1810 /**************************************************\
1811 |* I know here is something wrong. Characters in *|
1812 |* the string which are not in the <...> form *|
1813 |* cannot be declared forward for now!!! *|
1814 \**************************************************/
1817 /* Store in weight array. */
1818 if (collate
->nweight
>= collate
->nweight_max
)
1820 collate
->nweight_max
*= 2;
1822 = (unsigned int *) xrealloc (collate
->weight
,
1823 collate
->nweight_max
);
1830 newp
= (patch_t
*) obstack_alloc (&collate
->element_mem
,
1832 newp
->fname
= lr
->fname
;
1833 newp
->lineno
= lr
->lineno
;
1835 = (const char *) obstack_copy0 (&collate
->element_mem
,
1836 startp
, putp
- startp
);
1837 newp
->where
.idx
= collate
->nweight
++;
1838 newp
->next
= collate
->current_patch
;
1839 collate
->current_patch
= newp
;
1842 collate
->weight
[collate
->nweight
++] = value
;
1843 ++collate
->weight_cnt
[collate
->weight_idx
];
1849 assert (! "should not happen");
1853 if (collate
->nweight
>= collate
->nweight_max
)
1855 collate
->nweight_max
*= 2;
1856 collate
->weight
= (unsigned int *) xrealloc (collate
->weight
,
1857 collate
->nweight_max
);
1860 collate
->weight
[collate
->nweight
++] = value
;
1861 ++collate
->weight_cnt
[collate
->weight_idx
];
1868 collate_end_weight (struct linereader
*lr
, struct localedef_t
*locale
)
1870 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1871 element_t
*pelem
= collate
->current_element
;
1873 if (collate
->kind
== symbol
)
1875 /* We don't have to do anything. */
1876 collate
->was_ellipsis
= 0;
1880 if (collate
->kind
== ellipsis
)
1882 /* Before the next line is processed the ellipsis is handled. */
1883 collate
->was_ellipsis
= 1;
1887 assert (collate
->kind
== character
|| collate
->kind
== element
1888 || collate
->kind
== undefined
);
1890 /* Fill in the missing weights. */
1891 while (++collate
->weight_idx
< collate
->nrules
)
1893 collate
->weight
[collate
->nweight
++] = pelem
->this_weight
;
1894 ++collate
->weight_cnt
[collate
->weight_idx
];
1897 /* Now we know how many ordering weights the current
1898 character/element has. Allocate room in the element structure
1899 and copy information. */
1900 pelem
->ordering_len
= collate
->nweight
;
1902 /* First we write an array with the number of values for each
1904 obstack_grow (&collate
->element_mem
, collate
->weight_cnt
,
1905 collate
->nrules
* sizeof (unsigned int));
1907 /* Now the weights itselves. */
1908 obstack_grow (&collate
->element_mem
, collate
->weight
,
1909 collate
->nweight
* sizeof (unsigned int));
1912 pelem
->ordering
= obstack_finish (&collate
->element_mem
);
1914 /* Now we handle the "patches". */
1915 while (collate
->current_patch
!= NULL
)
1917 patch_t
*this_patch
;
1919 this_patch
= collate
->current_patch
;
1921 this_patch
->where
.pos
= &pelem
->ordering
[collate
->nrules
1922 + this_patch
->where
.idx
];
1924 collate
->current_patch
= this_patch
->next
;
1925 this_patch
->next
= collate
->all_patches
;
1926 collate
->all_patches
= this_patch
;
1929 /* Set information for next round. */
1930 collate
->was_ellipsis
= 0;
1931 if (collate
->kind
!= undefined
)
1932 collate
->last_char
= pelem
->name
[0];