1 /* Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
33 #include "localeinfo.h"
35 #include "simple-hash.h"
36 #include "stringtrans.h"
37 #include "strlen-hash.h"
39 /* Uncomment the following line in the production version. */
40 /* #define NDEBUG 1 */
44 #define MAX(a, b) ((a) > (b) ? (a) : (b))
47 (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24))
50 /* What kind of symbols get defined? */
61 typedef struct patch_t
75 typedef struct element_t
78 unsigned int this_weight
;
80 struct element_t
*next
;
82 unsigned int *ordering
;
87 /* The real definition of the struct for the LC_COLLATE locale. */
88 struct locale_collate_t
90 /* Collate symbol table. Simple mapping to number. */
93 /* The collation elements. */
95 struct obstack element_mem
;
97 /* The result table. */
100 /* Sorting rules given in order_start line. */
102 u_int32_t nrules_max
;
103 enum coll_sort_rule
*rules
;
105 /* Used while recognizing symbol composed of multiple tokens
106 (collating-element). */
107 const char *combine_token
;
108 size_t combine_token_len
;
110 /* How many sorting order specifications so far. */
111 unsigned int order_cnt
;
113 /* Was lastline ellipsis? */
115 /* Value of last entry if was character. */
117 /* Current element. */
118 element_t
*current_element
;
119 /* What kind of symbol is current element. */
120 enum coll_symbol kind
;
122 /* While collecting the weights we need some temporary space. */
123 unsigned int current_order
;
125 unsigned int weight_idx
;
126 unsigned int *weight
;
131 patch_t
*current_patch
;
132 patch_t
*all_patches
;
134 /* Room for the UNDEFINED information. */
136 unsigned int undefined_len
;
140 /* Be verbose? Defined in localedef.c. */
144 void *xmalloc (size_t __n
);
145 void *xrealloc (void *__p
, size_t __n
);
148 #define obstack_chunk_alloc malloc
149 #define obstack_chunk_free free
153 collate_startup (struct linereader
*lr
, struct localedef_t
*locale
,
154 struct charset_t
*charset
)
156 struct locale_collate_t
*collate
;
158 /* It is important that we always use UCS4 encoding for strings now. */
159 encoding_method
= ENC_UCS4
;
161 /* Allocate the needed room. */
162 locale
->categories
[LC_COLLATE
].collate
= collate
=
163 (struct locale_collate_t
*) xmalloc (sizeof (struct locale_collate_t
));
165 /* Allocate hash table for collating elements. */
166 if (init_hash (&collate
->elements
, 512))
167 error (4, 0, _("memory exhausted"));
168 collate
->combine_token
= NULL
;
169 obstack_init (&collate
->element_mem
);
171 /* Allocate hash table for collating elements. */
172 if (init_hash (&collate
->symbols
, 64))
173 error (4, 0, _("memory exhausted"));
175 /* Allocate hash table for result. */
176 if (init_hash (&collate
->result
, 512))
177 error (4, 0, _("memory exhausted"));
180 collate
->nrules_max
= 10;
182 = (enum coll_sort_rule
*) xmalloc (collate
->nrules_max
183 * sizeof (enum coll_sort_rule
));
185 collate
->order_cnt
= 1; /* The smallest weight is 2. */
187 collate
->was_ellipsis
= 0;
188 collate
->last_char
= L
'\0'; /* 0 because leading ellipsis is allowed. */
190 collate
->all_patches
= NULL
;
192 /* This tells us no UNDEFINED entry was found until now. */
193 memset (&collate
->undefined
, '\0', sizeof (collate
->undefined
));
195 lr
->translate_strings
= 0;
200 collate_finish (struct localedef_t
*locale
, struct charset_t
*charset
)
202 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
206 /* Patch the constructed table so that forward references are
208 for (patch
= collate
->all_patches
; patch
!= NULL
; patch
= patch
->next
)
211 size_t toklen
= strlen (patch
->token
);
213 unsigned int value
= 0;
215 wch
= charset_find_value (&charset
->char_table
, patch
->token
, toklen
);
216 if (wch
!= ILLEGAL_CHAR_VALUE
)
220 if (find_entry (&collate
->result
, &wch
, sizeof (wchar_t),
223 for (; runp
!= NULL
; runp
= runp
->next
)
224 if (runp
->name
[0] == wch
&& runp
->name
[1] == L
'\0')
227 value
= runp
== NULL
? 0 : runp
->this_weight
;
229 else if (find_entry (&collate
->elements
, patch
->token
, toklen
, &ptmp
)
232 value
= ((element_t
*) ptmp
)->this_weight
;
234 else if (find_entry (&collate
->symbols
, patch
->token
, toklen
, &ptmp
)
237 value
= (unsigned long int) ptmp
;
245 error_at_line (0, 0, patch
->fname
, patch
->lineno
,
246 _("no weight defined for symbol `%s'"),
250 *patch
->where
.pos
= value
;
253 /* If no definition for UNDEFINED is given, all characters in the
254 given charset must be specified. */
255 if (collate
->undefined
.ordering
== NULL
)
257 /**************************************************************\
258 |* XXX We should test whether really an unspecified character *|
259 |* exists before giving the message. *|
260 \**************************************************************/
264 error (0, 0, _("no definition of `UNDEFINED'"));
266 collate
->undefined
.ordering_len
= collate
->nrules
;
267 weight
= ++collate
->order_cnt
;
269 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
272 obstack_grow (&collate
->element_mem
, &one
, sizeof (one
));
275 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
276 obstack_grow (&collate
->element_mem
, &weight
, sizeof (weight
));
278 collate
->undefined
.ordering
= obstack_finish (&collate
->element_mem
);
281 collate
->undefined_len
= 2; /* For the name: 1 x wchar_t + L'\0'. */
282 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
283 collate
->undefined_len
+= 1 + collate
->undefined
.ordering
[cnt
];
289 collate_output (struct localedef_t
*locale
, struct charset_t
*charset
,
290 const char *output_path
)
292 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
293 u_int32_t table_size
, table_best
, level_best
, sum_best
;
298 const size_t nelems
= _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE
);
299 struct iovec iov
[2 + nelems
];
300 struct locale_file data
;
301 u_int32_t idx
[nelems
];
302 struct obstack non_simple
;
303 struct obstack string_pool
;
304 size_t cnt
, entry_size
;
305 u_int32_t undefined_offset
= UINT_MAX
;
306 u_int32_t
*table
, *extra
, *table2
, *extra2
;
308 u_int32_t element_hash_tab_size
;
309 u_int32_t
*element_hash_tab
;
310 u_int32_t
*element_hash_tab_ob
;
311 u_int32_t element_string_pool_size
;
312 char *element_string_pool
;
313 u_int32_t element_value_size
;
314 wchar_t *element_value
;
315 wchar_t *element_value_ob
;
316 u_int32_t symbols_hash_tab_size
;
317 u_int32_t
*symbols_hash_tab
;
318 u_int32_t
*symbols_hash_tab_ob
;
319 u_int32_t symbols_string_pool_size
;
320 char *symbols_string_pool
;
321 u_int32_t symbols_class_size
;
322 u_int32_t
*symbols_class
;
323 u_int32_t
*symbols_class_ob
;
324 hash_table
*hash_tab
;
325 unsigned int dummy_weights
[collate
->nrules
+ 1];
331 /* Compute table size. */
334 Computing table size for collation information might take a while..."),
336 for (table_size
= 256; table_size
< sum_best
; ++table_size
)
338 size_t hits
[table_size
];
339 unsigned int worst
= 1;
344 for (cnt
= 0; cnt
< 256; ++cnt
)
346 memset (&hits
[256], '\0', sizeof (hits
) - 256 * sizeof (size_t));
348 while (iterate_table (&collate
->result
, &last
, (const void **) &name
,
349 &len
, (void **) &pelem
) >= 0)
350 if (pelem
->ordering
!= NULL
&& pelem
->name
[0] > 0xff)
351 if (++hits
[(unsigned int) pelem
->name
[0] % table_size
] > worst
)
353 worst
= hits
[(unsigned int) pelem
->name
[0] % table_size
];
354 if (table_size
* worst
> sum_best
)
358 if (table_size
* worst
< sum_best
)
360 sum_best
= table_size
* worst
;
361 table_best
= table_size
;
365 assert (table_best
!= 0xffff || level_best
!= 0xffff);
367 fputs (_(" done\n"), stderr
);
369 obstack_init (&non_simple
);
370 obstack_init (&string_pool
);
372 data
.magic
= LIMAGIC (LC_COLLATE
);
374 iov
[0].iov_base
= (void *) &data
;
375 iov
[0].iov_len
= sizeof (data
);
377 iov
[1].iov_base
= (void *) idx
;
378 iov
[1].iov_len
= sizeof (idx
);
380 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_NRULES
)].iov_base
= &collate
->nrules
;
381 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_NRULES
)].iov_len
= sizeof (u_int32_t
);
383 table
= (u_int32_t
*) alloca (collate
->nrules
* sizeof (u_int32_t
));
384 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_RULES
)].iov_base
= table
;
385 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_RULES
)].iov_len
386 = collate
->nrules
* sizeof (u_int32_t
);
387 /* Another trick here. Describing the collation method needs only a
388 few bits (3, to be exact). But the binary file should be
389 accessible by machines with both endianesses and so we store both
390 forms in the same word. */
391 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
392 table
[cnt
] = collate
->rules
[cnt
] | SWAPU32 (collate
->rules
[cnt
]);
394 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE
)].iov_base
= &table_best
;
395 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE
)].iov_len
= sizeof (u_int32_t
);
397 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS
)].iov_base
= &level_best
;
398 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS
)].iov_len
399 = sizeof (u_int32_t
);
401 entry_size
= 1 + MAX (collate
->nrules
, 2);
403 table
= (u_int32_t
*) alloca (table_best
* level_best
* entry_size
404 * sizeof (table
[0]));
405 memset (table
, '\0', table_best
* level_best
* entry_size
406 * sizeof (table
[0]));
409 /* Macros for inserting in output table. */
410 #define ADD_VALUE(expr) \
412 u_int32_t to_write = (u_int32_t) expr; \
413 obstack_grow (&non_simple, &to_write, sizeof (to_write)); \
416 #define ADD_ELEMENT(pelem, len) \
422 wlen = wcslen (pelem->name); \
423 obstack_grow (&non_simple, pelem->name, (wlen + 1) * sizeof (u_int32_t)); \
425 idx = collate->nrules; \
426 for (cnt = 0; cnt < collate->nrules; ++cnt) \
430 ADD_VALUE (pelem->ordering[cnt]); \
431 for (disp = 0; disp < pelem->ordering[cnt]; ++disp) \
432 ADD_VALUE (pelem->ordering[idx++]); \
436 #define ADD_FORWARD(pelem) \
438 /* We leave a reference in the main table and put all \
439 information in the table for the extended entries. */ \
441 element_t *has_simple = NULL; \
444 table[(level * table_best + slot) * entry_size + 1] \
446 table[(level * table_best + slot) * entry_size + 2] \
447 = obstack_object_size (&non_simple) / sizeof (u_int32_t); \
449 /* Here we have to construct the non-simple table entry. First \
450 compute the total length of this entry. */ \
451 for (runp = (pelem); runp != NULL; runp = runp->next) \
452 if (runp->ordering != NULL) \
457 value = 1 + wcslen (runp->name) + 1; \
459 for (cnt = 0; cnt < collate->nrules; ++cnt) \
460 /* We have to take care for entries without ordering \
461 information. While reading them they get inserted in the \
462 table and later not removed when something goes wrong with \
463 reading its weights. */ \
464 value += 1 + runp->ordering[cnt]; \
466 if (runp->name[1] == L'\0') \
469 ADD_ELEMENT (runp, value); \
472 if (has_simple == NULL) \
476 ADD_VALUE (collate->undefined_len + 1); \
478 /* Add the name. */ \
479 ADD_VALUE ((pelem)->name[0]); \
482 idx = collate->nrules; \
483 for (cnt = 0; cnt < collate->nrules; ++cnt) \
487 ADD_VALUE (collate->undefined.ordering[cnt]); \
488 for (disp = 0; disp < collate->undefined.ordering[cnt]; ++disp) \
490 if ((wchar_t) collate->undefined.ordering[idx] \
492 ADD_VALUE ((pelem)->name[0]); \
494 ADD_VALUE (collate->undefined.ordering[idx++]); \
503 /* Fill the table now. First we look for all the characters which
504 fit into one single byte. This speeds up the 8-bit string
507 while (iterate_table (&collate
->result
, &last
, (const void **) &name
,
508 &len
, (void **) &pelem
) >= 0)
509 if (pelem
->name
[0] <= 0xff)
511 /* We have a single byte name. Now we must distinguish
512 between entries in simple form (i.e., only one value per
513 weight and no collation element starting with the same
514 character) and those which are not. */
515 size_t slot
= ((size_t) pelem
->name
[0]);
516 const size_t level
= 0;
518 table
[slot
* entry_size
] = pelem
->name
[0];
520 if (pelem
->name
[1] == L
'\0' && pelem
->next
== NULL
521 && pelem
->ordering_len
== collate
->nrules
)
523 /* Yes, we have a simple one. Lucky us. */
526 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
527 table
[slot
* entry_size
+ 1 + cnt
]
528 = pelem
->ordering
[collate
->nrules
+ cnt
];
534 /* Now check for missing single byte entries. If one exist we fill
535 with the UNDEFINED entry. */
536 for (cnt
= 0; cnt
< 256; ++cnt
)
537 /* The first weight is never 0 for existing entries. */
538 if (table
[cnt
* entry_size
+ 1] == 0)
540 /* We have to fill in the information from the UNDEFINED
542 table
[cnt
* entry_size
] = (u_int32_t
) cnt
;
544 if (collate
->undefined
.ordering_len
== collate
->nrules
)
548 for (inner
= 0; inner
< collate
->nrules
; ++inner
)
549 if ((wchar_t)collate
->undefined
.ordering
[collate
->nrules
+ inner
]
551 table
[cnt
* entry_size
+ 1 + inner
] = cnt
;
553 table
[cnt
* entry_size
+ 1 + inner
]
554 = collate
->undefined
.ordering
[collate
->nrules
+ inner
];
558 if (undefined_offset
!= UINT_MAX
)
560 table
[cnt
* entry_size
+ 1] = FORWARD_CHAR
;
561 table
[cnt
* entry_size
+ 2] = undefined_offset
;
565 const size_t slot
= cnt
;
566 const size_t level
= 0;
568 ADD_FORWARD (&collate
->undefined
);
569 undefined_offset
= table
[cnt
* entry_size
+ 2];
574 /* Now we are ready for inserting the whole rest. */
576 while (iterate_table (&collate
->result
, &last
, (const void **) &name
,
577 &len
, (void **) &pelem
) >= 0)
578 if (pelem
->name
[0] > 0xff)
580 /* Find the position. */
581 size_t slot
= ((size_t) pelem
->name
[0]) % table_best
;
584 while (table
[(level
* table_best
+ slot
) * entry_size
+ 1] != 0)
586 assert (level
< level_best
);
588 if (pelem
->name
[1] == L
'\0' && pelem
->next
== NULL
589 && pelem
->ordering_len
== collate
->nrules
)
591 /* Again a simple entry. */
594 for (inner
= 0; inner
< collate
->nrules
; ++inner
)
595 table
[(level
* table_best
+ slot
) * entry_size
+ 1 + inner
]
596 = pelem
->ordering
[collate
->nrules
+ inner
];
602 /* Add the UNDEFINED entry. */
604 /* Here we have to construct the non-simple table entry. */
607 undefined_offset
= obstack_object_size (&non_simple
);
609 idx
= collate
->nrules
;
610 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
614 ADD_VALUE (collate
->undefined
.ordering
[cnt
]);
615 for (disp
= 0; disp
< collate
->undefined
.ordering
[cnt
]; ++disp
)
616 ADD_VALUE (collate
->undefined
.ordering
[idx
++]);
620 /* Finish the extra block. */
621 extra_len
= obstack_object_size (&non_simple
);
622 extra
= (u_int32_t
*) obstack_finish (&non_simple
);
623 assert ((extra_len
% sizeof (u_int32_t
)) == 0);
625 /* Now we have to build the two array for the other byte ordering. */
626 table2
= (u_int32_t
*) alloca (table_best
* level_best
* entry_size
627 * sizeof (table
[0]));
628 extra2
= (u_int32_t
*) alloca (extra_len
);
630 for (cnt
= 0; cnt
< table_best
* level_best
* entry_size
; ++cnt
)
631 table2
[cnt
] = SWAPU32 (table
[cnt
]);
633 for (cnt
= 0; cnt
< extra_len
/ sizeof (u_int32_t
); ++cnt
)
634 extra2
[cnt
] = SWAPU32 (extra2
[cnt
]);
636 /* We need a simple hashing table to get a collation-element->chars
637 mapping. We again use internal hashing using a secondary hashing
640 Each string has an associate hashing value V, computed by a
641 fixed function. To locate the string we use open addressing with
642 double hashing. The first index will be V % M, where M is the
643 size of the hashing table. If no entry is found, iterating with
644 a second, independent hashing function takes place. This second
645 value will be 1 + V % (M - 2). The approximate number of probes
648 for unsuccessful search: (1 - N / M) ^ -1
649 for successful search: - (N / M) ^ -1 * ln (1 - N / M)
651 where N is the number of keys.
653 If we now choose M to be the next prime bigger than 4 / 3 * N,
654 we get the values 4 and 1.85 resp. Because unsuccessful searches
655 are unlikely this is a good value. Formulas: [Knuth, The Art of
656 Computer Programming, Volume 3, Sorting and Searching, 1973,
658 if (collate
->elements
.filled
== 0)
660 /* We don't need any element table since there are no collating
662 element_hash_tab_size
= 0;
663 element_hash_tab
= NULL
;
664 element_hash_tab_ob
= NULL
;
665 element_string_pool_size
= 0;
666 element_string_pool
= NULL
;
667 element_value_size
= 0;
668 element_value
= NULL
;
669 element_value_ob
= NULL
;
673 void *ptr
; /* Running pointer. */
674 const char *key
; /* Key for current bucket. */
675 size_t keylen
; /* Length of key data. */
676 const element_t
*data
; /* Data, i.e., the character sequence. */
678 element_hash_tab_size
= next_prime ((collate
->elements
.filled
* 4) / 3);
679 if (element_hash_tab_size
< 7)
680 /* We need a minimum to make the following code work. */
681 element_hash_tab_size
= 7;
683 element_hash_tab
= obstack_alloc (&non_simple
, (2 * element_hash_tab_size
684 * sizeof (u_int32_t
)));
685 memset (element_hash_tab
, '\377', (2 * element_hash_tab_size
686 * sizeof (u_int32_t
)));
689 while (iterate_table (&collate
->elements
, &ptr
, (const void **) &key
,
690 &keylen
, (void **) &data
) == 0)
692 size_t hash_val
= hash_string (key
, keylen
);
693 size_t idx
= hash_val
% element_hash_tab_size
;
695 if (element_hash_tab
[2 * idx
] != (~((u_int32_t
) 0)))
697 /* We need the second hashing function. */
698 size_t c
= 1 + (hash_val
% (element_hash_tab_size
- 2));
701 if (idx
>= element_hash_tab_size
- c
)
702 idx
-= element_hash_tab_size
- c
;
705 while (element_hash_tab
[2 * idx
] != (~((u_int32_t
) 0)));
708 element_hash_tab
[2 * idx
] = obstack_object_size (&non_simple
);
709 element_hash_tab
[2 * idx
+ 1] = (obstack_object_size (&string_pool
)
712 obstack_grow0 (&non_simple
, key
, keylen
);
713 obstack_grow (&string_pool
, data
->name
,
714 (wcslen (data
->name
) + 1) * sizeof (wchar_t));
717 if (obstack_object_size (&non_simple
) % 4 != 0)
718 obstack_blank (&non_simple
,
719 4 - (obstack_object_size (&non_simple
) % 4));
720 element_string_pool_size
= obstack_object_size (&non_simple
);
721 element_string_pool
= obstack_finish (&non_simple
);
723 element_value_size
= obstack_object_size (&string_pool
);
724 element_value
= obstack_finish (&string_pool
);
726 /* Create the tables for the other byte order. */
727 element_hash_tab_ob
= obstack_alloc (&non_simple
,
728 (2 * element_hash_tab_size
729 * sizeof (u_int32_t
)));
730 for (cnt
= 0; cnt
< 2 * element_hash_tab_size
; ++cnt
)
731 element_hash_tab_ob
[cnt
] = SWAPU32 (element_hash_tab
[cnt
]);
733 element_value_ob
= obstack_alloc (&string_pool
, element_value_size
);
734 if (sizeof (wchar_t) != 4)
736 fputs ("sizeof (wchar_t) != 4 currently not handled", stderr
);
739 for (cnt
= 0; cnt
< element_value_size
/ 4; ++cnt
)
740 element_value_ob
[cnt
] = SWAPU32 (element_value
[cnt
]);
743 /* Store collation elements as map to collation class. There are
744 three kinds of symbols:
748 We need to make a table which lets the user to access the primary
749 weight based on the symbol string. */
750 symbols_hash_tab_size
= next_prime ((4 * (charset
->char_table
.filled
751 + collate
->elements
.filled
752 + collate
->symbols
.filled
)) / 3);
753 symbols_hash_tab
= obstack_alloc (&non_simple
, (2 * symbols_hash_tab_size
754 * sizeof (u_int32_t
)));
755 memset (symbols_hash_tab
, '\377', (2 * symbols_hash_tab_size
756 * sizeof (u_int32_t
)));
758 /* Now fill the array. First the symbols from the character set,
759 then the collation elements and last the collation symbols. */
760 hash_tab
= &charset
->char_table
;
763 void *ptr
; /* Running pointer. */
764 const char *key
; /* Key for current bucket. */
765 size_t keylen
; /* Length of key data. */
766 void *data
; /* Data. */
769 while (iterate_table (hash_tab
, &ptr
, (const void **) &key
,
770 &keylen
, (void **) &data
) == 0)
775 unsigned int *weights
;
777 if (hash_tab
== &charset
->char_table
778 || hash_tab
== &collate
->elements
)
780 element_t
*lastp
, *firstp
;
781 wchar_t dummy_name
[2];
785 if (hash_tab
== &charset
->char_table
)
787 dummy_name
[0] = (wchar_t) ((unsigned long int) data
);
788 dummy_name
[1] = L
'\0';
790 name_len
= sizeof (wchar_t);
794 element_t
*elemp
= (element_t
*) data
;
796 name_len
= wcslen (name
) * sizeof (wchar_t);
799 /* First check whether this character is used at all. */
800 if (find_entry (&collate
->result
, name
, name_len
,
801 (void *) &firstp
) < 0)
802 /* The symbol is not directly mentioned in the collation.
803 I.e., we use the value for UNDEFINED. */
804 lastp
= &collate
->undefined
;
807 /* The entry for the simple character is always found at
810 while (lastp
->next
!= NULL
&& wcscmp (name
, lastp
->name
))
814 weights
= lastp
->ordering
;
818 dummy_weights
[0] = 1;
819 dummy_weights
[collate
->nrules
]
820 = (unsigned int) ((unsigned long int) data
);
822 weights
= dummy_weights
;
825 /* In LASTP->ordering we now have the collation class.
826 Determine the place in the hashing table next. */
827 hash_val
= hash_string (key
, keylen
);
828 idx
= hash_val
% symbols_hash_tab_size
;
830 if (symbols_hash_tab
[2 * idx
] != (~((u_int32_t
) 0)))
832 /* We need the second hashing function. */
833 size_t c
= 1 + (hash_val
% (symbols_hash_tab_size
- 2));
836 if (idx
>= symbols_hash_tab_size
- c
)
837 idx
-= symbols_hash_tab_size
- c
;
840 while (symbols_hash_tab
[2 * idx
] != (~((u_int32_t
) 0)));
843 symbols_hash_tab
[2 * idx
] = obstack_object_size (&string_pool
);
844 symbols_hash_tab
[2 * idx
+ 1] = (obstack_object_size (&non_simple
)
845 / sizeof (u_int32_t
));
847 obstack_grow0 (&string_pool
, key
, keylen
);
848 /* Adding the first weight looks complicated. We have to deal
849 with the kind it is stored and with the fact that original
850 form uses `unsigned int's while we need `u_int32_t' here. */
852 obstack_grow (&non_simple
, &word
, sizeof (u_int32_t
));
853 for (cnt
= 0; cnt
< weights
[0]; ++cnt
)
855 word
= weights
[collate
->nrules
+ cnt
];
856 obstack_grow (&non_simple
, &word
, sizeof (u_int32_t
));
860 if (hash_tab
== &charset
->char_table
)
861 hash_tab
= &collate
->elements
;
862 else if (hash_tab
== &collate
->elements
)
863 hash_tab
= &collate
->symbols
;
868 /* Now we have the complete tables. */
869 if (obstack_object_size (&string_pool
) % 4 != 0)
870 obstack_blank (&non_simple
, 4 - (obstack_object_size (&string_pool
) % 4));
871 symbols_string_pool_size
= obstack_object_size (&string_pool
);
872 symbols_string_pool
= obstack_finish (&string_pool
);
874 symbols_class_size
= obstack_object_size (&non_simple
);
875 symbols_class
= obstack_finish (&non_simple
);
877 /* Generate tables with other byte order. */
878 symbols_hash_tab_ob
= obstack_alloc (&non_simple
, (2 * symbols_hash_tab_size
879 * sizeof (u_int32_t
)));
880 for (cnt
= 0; cnt
< 2 * symbols_hash_tab_size
; ++cnt
)
881 symbols_hash_tab_ob
[cnt
] = SWAPU32 (symbols_hash_tab
[cnt
]);
883 symbols_class_ob
= obstack_alloc (&non_simple
, symbols_class_size
);
884 for (cnt
= 0; cnt
< symbols_class_size
/ 4; ++cnt
)
885 symbols_class_ob
[cnt
] = SWAPU32 (symbols_class
[cnt
]);
888 /* Store table addresses and lengths. */
889 #if __BYTE_ORDER == __BIG_ENDIAN
890 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB
)].iov_base
= table
;
891 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB
)].iov_len
892 = table_best
* level_best
* entry_size
* sizeof (table
[0]);
894 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL
)].iov_base
= table2
;
895 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL
)].iov_len
896 = table_best
* level_best
* entry_size
* sizeof (table
[0]);
898 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB
)].iov_base
= extra
;
899 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB
)].iov_len
= extra_len
;
901 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL
)].iov_base
= extra2
;
902 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL
)].iov_len
= extra_len
;
904 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB
)].iov_base
= table2
;
905 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB
)].iov_len
906 = table_best
* level_best
* entry_size
* sizeof (table
[0]);
908 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL
)].iov_base
= table
;
909 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL
)].iov_len
910 = table_best
* level_best
* entry_size
* sizeof (table
[0]);
912 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB
)].iov_base
= extra2
;
913 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB
)].iov_len
= extra_len
;
915 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL
)].iov_base
= extra
;
916 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL
)].iov_len
= extra_len
;
919 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_UNDEFINED
)].iov_base
= &undefined_offset
;
920 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_UNDEFINED
)].iov_len
= sizeof (u_int32_t
);
923 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_SIZE
)].iov_base
924 = &element_hash_tab_size
;
925 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_SIZE
)].iov_len
926 = sizeof (u_int32_t
);
928 #if __BYTE_ORDER == __BIG_ENDIAN
929 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB
)].iov_base
931 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB
)].iov_len
932 = 2 * element_hash_tab_size
* sizeof (u_int32_t
);
934 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL
)].iov_base
935 = element_hash_tab_ob
;
936 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL
)].iov_len
937 = 2 * element_hash_tab_size
* sizeof (u_int32_t
);
939 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL
)].iov_base
941 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL
)].iov_len
942 = 2 * element_hash_tab_size
* sizeof (u_int32_t
);
944 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB
)].iov_base
945 = element_hash_tab_ob
;
946 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB
)].iov_len
947 = 2 * element_hash_tab_size
* sizeof (u_int32_t
);
950 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_STR_POOL
)].iov_base
951 = element_string_pool
;
952 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_STR_POOL
)].iov_len
953 = element_string_pool_size
;
955 #if __BYTE_ORDER == __BIG_ENDIAN
956 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EB
)].iov_base
958 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EB
)].iov_len
959 = element_value_size
;
961 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EL
)].iov_base
963 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EL
)].iov_len
964 = element_value_size
;
966 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EL
)].iov_base
968 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EL
)].iov_len
969 = element_value_size
;
971 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EB
)].iov_base
973 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EB
)].iov_len
974 = element_value_size
;
977 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZE
)].iov_base
978 = &symbols_hash_tab_size
;
979 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZE
)].iov_len
980 = sizeof (u_int32_t
);
982 #if __BYTE_ORDER == __BIG_ENDIAN
983 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB
)].iov_base
985 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB
)].iov_len
986 = 2 * symbols_hash_tab_size
* sizeof (u_int32_t
);
988 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL
)].iov_base
989 = symbols_hash_tab_ob
;
990 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL
)].iov_len
991 = 2 * symbols_hash_tab_size
* sizeof (u_int32_t
);
993 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL
)].iov_base
995 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL
)].iov_len
996 = 2 * symbols_hash_tab_size
* sizeof (u_int32_t
);
998 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB
)].iov_base
999 = symbols_hash_tab_ob
;
1000 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB
)].iov_len
1001 = 2 * symbols_hash_tab_size
* sizeof (u_int32_t
);
1004 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_STR_POOL
)].iov_base
1005 = symbols_string_pool
;
1006 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_STR_POOL
)].iov_len
1007 = symbols_string_pool_size
;
1009 #if __BYTE_ORDER == __BIG_ENDIAN
1010 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EB
)].iov_base
1012 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EB
)].iov_len
1013 = symbols_class_size
;
1015 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EL
)].iov_base
1017 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EL
)].iov_len
1018 = symbols_class_size
;
1020 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EL
)].iov_base
1022 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EL
)].iov_len
1023 = symbols_class_size
;
1025 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EB
)].iov_base
1027 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EB
)].iov_len
1028 = symbols_class_size
;
1031 /* Update idx array. */
1032 idx
[0] = iov
[0].iov_len
+ iov
[1].iov_len
;
1033 for (cnt
= 1; cnt
< nelems
; ++cnt
)
1034 idx
[cnt
] = idx
[cnt
- 1] + iov
[1 + cnt
].iov_len
;
1036 write_locale_data (output_path
, "LC_COLLATE", 2 + nelems
, iov
);
1038 obstack_free (&non_simple
, NULL
);
1039 obstack_free (&string_pool
, NULL
);
1044 collate_element_to (struct linereader
*lr
, struct localedef_t
*locale
,
1045 struct token
*code
, struct charset_t
*charset
)
1047 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1051 if (collate
->combine_token
!= NULL
)
1053 free ((void *) collate
->combine_token
);
1054 collate
->combine_token
= NULL
;
1057 value
= charset_find_value (&charset
->char_table
, code
->val
.str
.start
,
1059 if ((wchar_t) value
!= ILLEGAL_CHAR_VALUE
)
1061 lr_error (lr
, _("symbol for multicharacter collating element "
1062 "`%.*s' duplicates symbolic name in charset"),
1063 (int) code
->val
.str
.len
, code
->val
.str
.start
);
1067 if (find_entry (&collate
->elements
, code
->val
.str
.start
, code
->val
.str
.len
,
1070 lr_error (lr
, _("symbol for multicharacter collating element "
1071 "`%.*s' duplicates other element definition"),
1072 (int) code
->val
.str
.len
, code
->val
.str
.start
);
1076 if (find_entry (&collate
->elements
, code
->val
.str
.start
, code
->val
.str
.len
,
1079 lr_error (lr
, _("symbol for multicharacter collating element "
1080 "`%.*s' duplicates symbol definition"),
1081 (int) code
->val
.str
.len
, code
->val
.str
.start
);
1085 collate
->combine_token
= code
->val
.str
.start
;
1086 collate
->combine_token_len
= code
->val
.str
.len
;
1091 collate_element_from (struct linereader
*lr
, struct localedef_t
*locale
,
1092 struct token
*code
, struct charset_t
*charset
)
1094 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1095 element_t
*elemp
, *runp
;
1097 /* CODE is a string. */
1098 elemp
= (element_t
*) obstack_alloc (&collate
->element_mem
,
1099 sizeof (element_t
));
1101 /* We have to translate the string. It may contain <...> character
1103 elemp
->name
= (wchar_t *) translate_string (code
->val
.str
.start
, charset
);
1104 elemp
->this_weight
= 0;
1105 elemp
->ordering
= NULL
;
1106 elemp
->ordering_len
= 0;
1108 free (code
->val
.str
.start
);
1110 if (elemp
->name
== NULL
)
1112 /* At least one character in the string is not defined. We simply
1116 `from' string in collation element declaration contains unknown character"));
1120 if (elemp
->name
[0] == L
'\0' || elemp
->name
[1] == L
'\0')
1122 lr_error (lr
, _("illegal collation element"));
1126 /* The entries in the linked lists of RESULT are sorting in
1127 descending order. The order is important for the `strcoll' and
1128 `wcscoll' functions. */
1129 if (find_entry (&collate
->result
, elemp
->name
, sizeof (wchar_t),
1130 (void *) &runp
) >= 0)
1132 /* We already have an entry with this key. Check whether it is
1134 element_t
*prevp
= NULL
;
1139 cmpres
= wcscmp (elemp
->name
, runp
->name
);
1144 while ((runp
= runp
->next
) != NULL
);
1147 lr_error (lr
, _("duplicate collating element definition"));
1153 if (set_entry (&collate
->result
, elemp
->name
, sizeof (wchar_t),
1155 error (EXIT_FAILURE
, 0, _("\
1156 error while inserting collation element into hash table"));
1159 prevp
->next
= elemp
;
1165 if (insert_entry (&collate
->result
, elemp
->name
, sizeof (wchar_t), elemp
)
1167 error (EXIT_FAILURE
, errno
, _("error while inserting to hash table"));
1170 if (insert_entry (&collate
->elements
, collate
->combine_token
,
1171 collate
->combine_token_len
, (void *) elemp
) < 0)
1172 lr_error (lr
, _("cannot insert new collating symbol definition: %s"),
1178 collate_symbol (struct linereader
*lr
, struct localedef_t
*locale
,
1179 struct token
*code
, struct charset_t
*charset
)
1181 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1185 value
= charset_find_value (&charset
->char_table
, code
->val
.str
.start
,
1187 if (value
!= ILLEGAL_CHAR_VALUE
)
1189 lr_error (lr
, _("symbol for multicharacter collating element "
1190 "`%.*s' duplicates symbolic name in charset"),
1191 (int) code
->val
.str
.len
, code
->val
.str
.start
);
1195 if (find_entry (&collate
->elements
, code
->val
.str
.start
, code
->val
.str
.len
,
1198 lr_error (lr
, _("symbol for multicharacter collating element "
1199 "`%.*s' duplicates element definition"),
1200 (int) code
->val
.str
.len
, code
->val
.str
.start
);
1204 if (find_entry (&collate
->symbols
, code
->val
.str
.start
, code
->val
.str
.len
,
1207 lr_error (lr
, _("symbol for multicharacter collating element "
1208 "`%.*s' duplicates other symbol definition"),
1209 (int) code
->val
.str
.len
, code
->val
.str
.start
);
1213 if (insert_entry (&collate
->symbols
, code
->val
.str
.start
, code
->val
.str
.len
,
1215 lr_error (lr
, _("cannot insert new collating symbol definition: %s"),
1221 collate_new_order (struct linereader
*lr
, struct localedef_t
*locale
,
1222 enum coll_sort_rule sort_rule
)
1224 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1226 if (collate
->nrules
>= collate
->nrules_max
)
1228 collate
->nrules_max
*= 2;
1230 = (enum coll_sort_rule
*) xrealloc (collate
->rules
,
1232 * sizeof (enum coll_sort_rule
));
1235 collate
->rules
[collate
->nrules
++] = sort_rule
;
1240 collate_build_arrays (struct linereader
*lr
, struct localedef_t
*locale
)
1242 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1245 = (enum coll_sort_rule
*) xrealloc (collate
->rules
,
1247 * sizeof (enum coll_sort_rule
));
1249 /* Allocate arrays for temporary weights. */
1250 collate
->weight_cnt
= (int *) xmalloc (collate
->nrules
* sizeof (int));
1252 /* Choose arbitrary start value for table size. */
1253 collate
->nweight_max
= 5 * collate
->nrules
;
1254 collate
->weight
= (int *) xmalloc (collate
->nweight_max
* sizeof (int));
1259 collate_order_elem (struct linereader
*lr
, struct localedef_t
*locale
,
1260 struct token
*code
, struct charset_t
*charset
)
1262 const wchar_t zero
= L
'\0';
1263 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1272 /* We have a string to find in one of the three hashing tables. */
1273 value
= charset_find_value (&charset
->char_table
, code
->val
.str
.start
,
1275 if (value
!= ILLEGAL_CHAR_VALUE
)
1277 element_t
*lastp
, *firstp
;
1279 collate
->kind
= character
;
1281 if (find_entry (&collate
->result
, &value
, sizeof (wchar_t),
1282 (void *) &firstp
) < 0)
1283 firstp
= lastp
= NULL
;
1286 /* The entry for the simple character is always found at
1289 while (lastp
->next
!= NULL
)
1290 lastp
= lastp
->next
;
1292 if (lastp
->name
[0] == value
&& lastp
->name
[1] == L
'\0')
1294 lr_error (lr
, _("duplicate definition for character `%.*s'"),
1295 (int) code
->val
.str
.len
, code
->val
.str
.start
);
1296 lr_ignore_rest (lr
, 0);
1302 collate
->current_element
1303 = (element_t
*) obstack_alloc (&collate
->element_mem
,
1304 sizeof (element_t
));
1306 obstack_grow (&collate
->element_mem
, &value
, sizeof (value
));
1307 obstack_grow (&collate
->element_mem
, &zero
, sizeof (zero
));
1309 collate
->current_element
->name
=
1310 (const wchar_t *) obstack_finish (&collate
->element_mem
);
1312 collate
->current_element
->this_weight
= ++collate
->order_cnt
;
1314 collate
->current_element
->next
= NULL
;
1318 if (insert_entry (&collate
->result
, &value
, sizeof (wchar_t),
1319 (void *) collate
->current_element
) < 0)
1321 lr_error (lr
, _("cannot insert collation element `%.*s'"),
1322 (int) code
->val
.str
.len
, code
->val
.str
.start
);
1327 lastp
->next
= collate
->current_element
;
1329 else if (find_entry (&collate
->elements
, code
->val
.str
.start
,
1330 code
->val
.str
.len
, &tmp
) >= 0)
1332 collate
->current_element
= (element_t
*) tmp
;
1334 if (collate
->current_element
->this_weight
!= 0)
1337 collation element `%.*s' appears more than once: ignore line"),
1338 (int) code
->val
.str
.len
, code
->val
.str
.start
);
1339 lr_ignore_rest (lr
, 0);
1344 collate
->kind
= element
;
1345 collate
->current_element
->this_weight
= ++collate
->order_cnt
;
1347 else if (find_entry (&collate
->symbols
, code
->val
.str
.start
,
1348 code
->val
.str
.len
, &tmp
) >= 0)
1350 unsigned int order
= ++collate
->order_cnt
;
1352 if ((unsigned long int) tmp
!= 0ul)
1355 collation symbol `%.*s' appears more than once: ignore line"),
1356 (int) code
->val
.str
.len
, code
->val
.str
.start
);
1357 lr_ignore_rest (lr
, 0);
1362 collate
->kind
= symbol
;
1364 if (set_entry (&collate
->symbols
, code
->val
.str
.start
,
1365 code
->val
.str
.len
, (void *) order
) < 0)
1367 lr_error (lr
, _("cannot process order specification"));
1374 lr_error (lr
, _("unknown symbol `%.*s': line ignored"),
1375 (int) code
->val
.str
.len
, code
->val
.str
.start
);
1376 lr_ignore_rest (lr
, 0);
1383 collate
->kind
= undefined
;
1384 collate
->current_element
= &collate
->undefined
;
1388 if (collate
->was_ellipsis
)
1391 two lines in a row containing `...' are not allowed"));
1394 else if (collate
->kind
!= character
)
1396 /* An ellipsis requires the previous line to be an
1397 character definition. */
1399 line before ellipsis does not contain definition for character constant"));
1400 lr_ignore_rest (lr
, 0);
1404 collate
->kind
= ellipsis
;
1408 assert (! "illegal token in `collate_order_elem'");
1411 /* Now it's time to handle the ellipsis in the previous line. We do
1412 this only when the last line contained an definition for a
1413 character, the current line also defines an character, the
1414 character code for the later is bigger than the former. */
1415 if (collate
->was_ellipsis
)
1417 if (collate
->kind
!= character
)
1420 line after ellipsis must contain character definition"));
1421 lr_ignore_rest (lr
, 0);
1424 else if (collate
->last_char
> value
)
1426 lr_error (lr
, _("end point of ellipsis range is bigger then start"));
1427 lr_ignore_rest (lr
, 0);
1432 /* We can fill the arrays with the information we need. */
1438 name
[0] = collate
->last_char
+ 1;
1441 data
= (unsigned int *) alloca ((collate
->nrules
+ collate
->nweight
)
1442 * sizeof (unsigned int));
1443 ptr
= (size_t *) alloca (collate
->nrules
* sizeof (size_t));
1445 if (data
== NULL
|| ptr
== NULL
)
1446 error (4, 0, _("memory exhausted"));
1448 /* Prepare data. Because the characters covered by an
1449 ellipsis all have equal values we prepare the data once
1450 and only change the variable number (if there are any).
1451 PTR[...] will point to the entries which will have to be
1452 fixed during the output loop. */
1453 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
1455 data
[cnt
] = collate
->weight_cnt
[cnt
];
1456 ptr
[cnt
] = (cnt
== 0
1458 : ptr
[cnt
- 1] + collate
->weight_cnt
[cnt
- 1]);
1461 for (cnt
= 0; cnt
< collate
->nweight
; ++cnt
)
1462 data
[collate
->nrules
+ cnt
] = collate
->weight
[cnt
];
1464 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
1465 if ((wchar_t) data
[ptr
[cnt
]] != ELLIPSIS_CHAR
)
1468 while (name
[0] <= value
)
1472 pelem
= (element_t
*) obstack_alloc (&collate
->element_mem
,
1473 sizeof (element_t
));
1475 error (4, 0, _("memory exhausted"));
1478 = (const wchar_t *) obstack_copy (&collate
->element_mem
,
1479 name
, 2 * sizeof (wchar_t));
1480 pelem
->this_weight
= ++collate
->order_cnt
;
1482 pelem
->ordering_len
= collate
->nweight
;
1484 = (unsigned int *) obstack_copy (&collate
->element_mem
, data
,
1486 + pelem
->ordering_len
)
1487 * sizeof (unsigned int));
1489 /* `...' weights need to be adjusted. */
1490 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
1492 pelem
->ordering
[ptr
[cnt
]] = pelem
->this_weight
;
1494 /* Insert new entry into result table. */
1495 if (find_entry (&collate
->result
, name
, sizeof (wchar_t),
1496 (void *) &pelem
->next
) >= 0)
1498 if (set_entry (&collate
->result
, name
, sizeof (wchar_t),
1499 (void *) pelem
) < 0)
1500 error (4, 0, _("cannot insert into result table"));
1505 if (insert_entry (&collate
->result
, name
, sizeof (wchar_t),
1506 (void *) pelem
) < 0)
1507 error (4, 0, _("cannot insert into result table"));
1510 /* Increment counter. */
1516 /* Reset counters for weights. */
1517 collate
->weight_idx
= 0;
1518 collate
->nweight
= 0;
1519 for (i
= 0; i
< collate
->nrules
; ++i
)
1520 collate
->weight_cnt
[i
] = 0;
1521 collate
->current_patch
= NULL
;
1528 collate_weight_bsymbol (struct linereader
*lr
, struct localedef_t
*locale
,
1529 struct token
*code
, struct charset_t
*charset
)
1531 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1532 unsigned int here_weight
;
1536 assert (code
->tok
== tok_bsymbol
);
1538 value
= charset_find_value (&charset
->char_table
, code
->val
.str
.start
,
1540 if (value
!= ILLEGAL_CHAR_VALUE
)
1544 if (find_entry (&collate
->result
, &value
, sizeof (wchar_t),
1549 && (runp
->name
[0] != value
|| runp
->name
[1] != L
'\0'))
1552 here_weight
= runp
== NULL
? 0 : runp
->this_weight
;
1554 else if (find_entry (&collate
->elements
, code
->val
.str
.start
,
1555 code
->val
.str
.len
, &tmp
) >= 0)
1557 element_t
*runp
= (element_t
*) tmp
;
1559 here_weight
= runp
->this_weight
;
1561 else if (find_entry (&collate
->symbols
, code
->val
.str
.start
,
1562 code
->val
.str
.len
, &tmp
) >= 0)
1564 here_weight
= (unsigned int) tmp
;
1569 lr_error (lr
, _("unknown symbol `%.*s': line ignored"),
1570 (int) code
->val
.str
.len
, code
->val
.str
.start
);
1571 lr_ignore_rest (lr
, 0);
1575 /* When we currently work on a collation symbol we do not expect any
1577 if (collate
->kind
== symbol
)
1580 specification of sorting weight for collation symbol does not make sense"));
1581 lr_ignore_rest (lr
, 0);
1585 /* Add to the current collection of weights. */
1586 if (collate
->nweight
>= collate
->nweight_max
)
1588 collate
->nweight_max
*= 2;
1589 collate
->weight
= (unsigned int *) xrealloc (collate
->weight
,
1590 collate
->nweight_max
);
1593 /* If the weight is currently not known, we remember to patch the
1594 resulting tables. */
1595 if (here_weight
== 0)
1599 newp
= (patch_t
*) obstack_alloc (&collate
->element_mem
,
1601 newp
->fname
= lr
->fname
;
1602 newp
->lineno
= lr
->lineno
;
1603 newp
->token
= (const char *) obstack_copy0 (&collate
->element_mem
,
1604 code
->val
.str
.start
,
1606 newp
->where
.idx
= collate
->nweight
++;
1607 newp
->next
= collate
->current_patch
;
1608 collate
->current_patch
= newp
;
1611 collate
->weight
[collate
->nweight
++] = here_weight
;
1612 ++collate
->weight_cnt
[collate
->weight_idx
];
1619 collate_next_weight (struct linereader
*lr
, struct localedef_t
*locale
)
1621 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1623 if (collate
->kind
== symbol
)
1626 specification of sorting weight for collation symbol does not make sense"));
1627 lr_ignore_rest (lr
, 0);
1631 ++collate
->weight_idx
;
1632 if (collate
->weight_idx
>= collate
->nrules
)
1634 lr_error (lr
, _("too many weights"));
1635 lr_ignore_rest (lr
, 0);
1644 collate_simple_weight (struct linereader
*lr
, struct localedef_t
*locale
,
1645 struct token
*code
, struct charset_t
*charset
)
1647 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1648 unsigned int value
= 0;
1650 /* There current tokens can be `IGNORE', `...', or a string. */
1654 /* This token is allowed in all situations. */
1655 value
= IGNORE_CHAR
;
1659 /* The ellipsis is only allowed for the `...' or `UNDEFINED'
1661 if (collate
->kind
!= ellipsis
&& collate
->kind
!= undefined
)
1664 `...' must only be used in `...' and `UNDEFINED' entries"));
1665 lr_ignore_rest (lr
, 0);
1668 value
= ELLIPSIS_CHAR
;
1672 /* This can become difficult. We have to get the weights which
1673 correspond to the single wide chars in the string. But some
1674 of the `chars' might not be real characters, but collation
1675 elements or symbols. And so the string decoder might have
1676 signaled errors. The string at this point is not translated.
1677 I.e., all <...> sequences are still there. */
1679 char *runp
= code
->val
.str
.start
;
1682 while (*runp
!= '\0')
1684 char *startp
= (char *) runp
;
1685 char *putp
= (char *) runp
;
1688 /* Lookup weight for char and store it. */
1691 while (*++runp
!= '\0' && *runp
!= '>')
1693 if (*runp
== lr
->escape_char
)
1694 if (*++runp
== '\0')
1696 lr_error (lr
, _("unterminated weight name"));
1697 lr_ignore_rest (lr
, 0);
1707 lr_error (lr
, _("empty weight name: line ignored"));
1708 lr_ignore_rest (lr
, 0);
1712 wch
= charset_find_value (&charset
->char_table
, startp
,
1714 if (wch
!= ILLEGAL_CHAR_VALUE
)
1718 if (find_entry (&collate
->result
, &wch
, sizeof (wchar_t),
1719 (void *)&pelem
) < 0)
1722 while (pelem
!= NULL
1723 && (pelem
->name
[0] != wch
1724 || pelem
->name
[1] != L
'\0'))
1725 pelem
= pelem
->next
;
1727 value
= pelem
== NULL
? 0 : pelem
->this_weight
;
1729 else if (find_entry (&collate
->elements
, startp
, putp
- startp
,
1732 element_t
*pelem
= (element_t
*) tmp
;
1734 value
= pelem
->this_weight
;
1736 else if (find_entry (&collate
->symbols
, startp
, putp
- startp
,
1739 value
= (unsigned int) tmp
;
1744 lr_error (lr
, _("unknown symbol `%.*s': line ignored"),
1745 (int) (putp
- startp
), startp
);
1746 lr_ignore_rest (lr
, 0);
1755 if (*runp
== lr
->escape_char
)
1757 static const char digits
[] = "0123456789abcdef";
1762 if (tolower (*runp
) == 'x')
1767 else if (tolower (*runp
) == 'd')
1775 dp
= strchr (digits
, tolower (*runp
));
1776 if (dp
== NULL
|| (dp
- digits
) >= base
)
1780 illegal character constant in string"));
1781 lr_ignore_rest (lr
, 0);
1787 dp
= strchr (digits
, tolower (*runp
));
1788 if (dp
== NULL
|| (dp
- digits
) >= base
)
1796 dp
= strchr (digits
, tolower (*runp
));
1797 if (dp
!= NULL
&& (dp
- digits
< base
))
1806 wch
= (wchar_t) *runp
++;
1808 /* Lookup the weight for WCH. */
1809 if (find_entry (&collate
->result
, &wch
, sizeof (wch
),
1814 && (wp
->name
[0] != wch
|| wp
->name
[1] != L
'\0'))
1817 value
= wp
== NULL
? 0 : wp
->this_weight
;
1819 /* To get the correct name for the error message. */
1822 /**************************************************\
1823 |* I know here is something wrong. Characters in *|
1824 |* the string which are not in the <...> form *|
1825 |* cannot be declared forward for now!!! *|
1826 \**************************************************/
1829 /* Store in weight array. */
1830 if (collate
->nweight
>= collate
->nweight_max
)
1832 collate
->nweight_max
*= 2;
1834 = (unsigned int *) xrealloc (collate
->weight
,
1835 collate
->nweight_max
);
1842 newp
= (patch_t
*) obstack_alloc (&collate
->element_mem
,
1844 newp
->fname
= lr
->fname
;
1845 newp
->lineno
= lr
->lineno
;
1847 = (const char *) obstack_copy0 (&collate
->element_mem
,
1848 startp
, putp
- startp
);
1849 newp
->where
.idx
= collate
->nweight
++;
1850 newp
->next
= collate
->current_patch
;
1851 collate
->current_patch
= newp
;
1854 collate
->weight
[collate
->nweight
++] = value
;
1855 ++collate
->weight_cnt
[collate
->weight_idx
];
1861 assert (! "should not happen");
1865 if (collate
->nweight
>= collate
->nweight_max
)
1867 collate
->nweight_max
*= 2;
1868 collate
->weight
= (unsigned int *) xrealloc (collate
->weight
,
1869 collate
->nweight_max
);
1872 collate
->weight
[collate
->nweight
++] = value
;
1873 ++collate
->weight_cnt
[collate
->weight_idx
];
1880 collate_end_weight (struct linereader
*lr
, struct localedef_t
*locale
)
1882 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1883 element_t
*pelem
= collate
->current_element
;
1885 if (collate
->kind
== symbol
)
1887 /* We don't have to do anything. */
1888 collate
->was_ellipsis
= 0;
1892 if (collate
->kind
== ellipsis
)
1894 /* Before the next line is processed the ellipsis is handled. */
1895 collate
->was_ellipsis
= 1;
1899 assert (collate
->kind
== character
|| collate
->kind
== element
1900 || collate
->kind
== undefined
);
1902 /* Fill in the missing weights. */
1903 while (++collate
->weight_idx
< collate
->nrules
)
1905 collate
->weight
[collate
->nweight
++] = pelem
->this_weight
;
1906 ++collate
->weight_cnt
[collate
->weight_idx
];
1909 /* Now we know how many ordering weights the current
1910 character/element has. Allocate room in the element structure
1911 and copy information. */
1912 pelem
->ordering_len
= collate
->nweight
;
1914 /* First we write an array with the number of values for each
1916 obstack_grow (&collate
->element_mem
, collate
->weight_cnt
,
1917 collate
->nrules
* sizeof (unsigned int));
1919 /* Now the weights itselves. */
1920 obstack_grow (&collate
->element_mem
, collate
->weight
,
1921 collate
->nweight
* sizeof (unsigned int));
1924 pelem
->ordering
= obstack_finish (&collate
->element_mem
);
1926 /* Now we handle the "patches". */
1927 while (collate
->current_patch
!= NULL
)
1929 patch_t
*this_patch
;
1931 this_patch
= collate
->current_patch
;
1933 this_patch
->where
.pos
= &pelem
->ordering
[collate
->nrules
1934 + this_patch
->where
.idx
];
1936 collate
->current_patch
= this_patch
->next
;
1937 this_patch
->next
= collate
->all_patches
;
1938 collate
->all_patches
= this_patch
;
1941 /* Set information for next round. */
1942 collate
->was_ellipsis
= 0;
1943 if (collate
->kind
!= undefined
)
1944 collate
->last_char
= pelem
->name
[0];