1 /* Copyright (C) 1995, 1996 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If
17 not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
33 #include "localeinfo.h"
35 #include "simple-hash.h"
36 #include "stringtrans.h"
37 #include "strlen-hash.h"
39 /* Uncomment the following line in the production version. */
40 /* #define NDEBUG 1 */
44 #define MAX(a, b) ((a) > (b) ? (a) : (b))
47 (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24))
50 /* What kind of symbols get defined? */
61 typedef struct patch_t
75 typedef struct element_t
78 unsigned int this_weight
;
80 struct element_t
*next
;
82 unsigned int *ordering
;
87 /* The real definition of the struct for the LC_COLLATE locale. */
88 struct locale_collate_t
90 /* Collate symbol table. Simple mapping to number. */
93 /* The collation elements. */
95 struct obstack element_mem
;
97 /* The result table. */
100 /* Sorting rules given in order_start line. */
103 enum coll_sort_rule
*rules
;
105 /* Used while recognizing symbol composed of multiple tokens
106 (collating-element). */
107 const char *combine_token
;
108 size_t combine_token_len
;
110 /* How many sorting order specifications so far. */
111 unsigned int order_cnt
;
113 /* Was lastline ellipsis? */
115 /* Value of last entry if was character. */
117 /* Current element. */
118 element_t
*current_element
;
119 /* What kind of symbol is current element. */
120 enum coll_symbol kind
;
122 /* While collecting the weigths we need some temporary space. */
123 unsigned int current_order
;
126 unsigned int *weight
;
131 patch_t
*current_patch
;
132 patch_t
*all_patches
;
134 /* Room for the UNDEFINED information. */
136 unsigned int undefined_len
;
140 /* Be verbose? Defined in localedef.c. */
144 void *xmalloc (size_t __n
);
145 void *xrealloc (void *__p
, size_t __n
);
148 #define obstack_chunk_alloc xmalloc
149 #define obstack_chunk_free free
153 collate_startup (struct linereader
*lr
, struct localedef_t
*locale
,
154 struct charset_t
*charset
)
156 struct locale_collate_t
*collate
;
158 /* It is important that we always use UCS4 encoding for strings now. */
159 encoding_method
= ENC_UCS4
;
161 /* Allocate the needed room. */
162 locale
->categories
[LC_COLLATE
].collate
= collate
=
163 (struct locale_collate_t
*) xmalloc (sizeof (struct locale_collate_t
));
165 /* Allocate hash table for collating elements. */
166 if (init_hash (&collate
->elements
, 512))
167 error (4, 0, _("memory exhausted"));
168 collate
->combine_token
= NULL
;
169 obstack_init (&collate
->element_mem
);
171 /* Allocate hash table for collating elements. */
172 if (init_hash (&collate
->symbols
, 64))
173 error (4, 0, _("memory exhausted"));
175 /* Allocate hash table for result. */
176 if (init_hash (&collate
->result
, 512))
177 error (4, 0, _("memory exhausted"));
180 collate
->nrules_max
= 10;
182 = (enum coll_sort_rule
*) xmalloc (collate
->nrules_max
183 * sizeof (enum coll_sort_rule
));
185 collate
->order_cnt
= 1; /* The smallest weight is 2. */
187 collate
->was_ellipsis
= 0;
188 collate
->last_char
= L
'\0'; /* 0 because leading ellipsis is allowed. */
190 collate
->all_patches
= NULL
;
192 /* This tells us no UNDEFINED entry was found until now. */
193 collate
->undefined
.this_weight
= 0;
195 lr
->translate_strings
= 0;
200 collate_finish (struct localedef_t
*locale
, struct charset_t
*charset
)
202 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
206 /* Patch the constructed table so that forward references are
208 for (patch
= collate
->all_patches
; patch
!= NULL
; patch
= patch
->next
)
211 size_t toklen
= strlen (patch
->token
);
213 unsigned int value
= 0;
215 wch
= charset_find_value (charset
, patch
->token
, toklen
);
216 if (wch
!= ILLEGAL_CHAR_VALUE
)
220 if (find_entry (&collate
->result
, &wch
, sizeof (wchar_t),
223 for (; runp
!= NULL
; runp
= runp
->next
)
224 if (runp
->name
[0] == wch
&& runp
->name
[1] == L
'\0')
227 value
= runp
== NULL
? 0 : runp
->this_weight
;
229 else if (find_entry (&collate
->elements
, patch
->token
, toklen
, &ptmp
)
232 value
= ((element_t
*) ptmp
)->this_weight
;
234 else if (find_entry (&collate
->symbols
, patch
->token
, toklen
, &ptmp
)
237 value
= (unsigned long int) ptmp
;
243 error_at_line (0, 0, patch
->fname
, patch
->lineno
,
244 _("no weight defined for symbol `%s'"), patch
->token
);
246 *patch
->where
.pos
= value
;
249 /* If no definition for UNDEFINED is given, all characters in the
250 given charset must be specified. */
251 if (collate
->undefined
.ordering
== NULL
)
253 /**************************************************************\
254 |* XXX We should test whether really an unspecified character *|
255 |* exists before giving the message. *|
256 \**************************************************************/
259 error (0, 0, _("no definition of `UNDEFINED'"));
261 collate
->undefined
.ordering_len
= collate
->nrules
;
262 weight
= ++collate
->order_cnt
;
264 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
267 obstack_grow (&collate
->element_mem
, &one
, sizeof (one
));
270 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
271 obstack_grow (&collate
->element_mem
, &weight
, sizeof (weight
));
273 collate
->undefined
.ordering
= obstack_finish (&collate
->element_mem
);
276 collate
->undefined_len
= 2; /* For the name: 1 x wchar_t + L'\0'. */
277 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
278 collate
->undefined_len
+= 1 + collate
->undefined
.ordering
[cnt
];
284 collate_output (struct localedef_t
*locale
, struct charset_t
*charset
,
285 const char *output_path
)
287 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
288 u_int32_t table_size
, table_best
, level_best
, sum_best
;
293 const size_t nelems
= _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE
);
294 struct iovec iov
[2 + nelems
];
295 struct locale_file data
;
296 u_int32_t idx
[nelems
];
297 struct obstack non_simple
;
298 struct obstack string_pool
;
299 size_t cnt
, entry_size
;
300 u_int32_t undefined_offset
= UINT_MAX
;
301 u_int32_t
*table
, *extra
, *table2
, *extra2
;
303 u_int32_t element_hash_tab_size
;
304 u_int32_t
*element_hash_tab
;
305 u_int32_t
*element_hash_tab_ob
;
306 u_int32_t element_string_pool_size
;
307 char *element_string_pool
;
308 u_int32_t element_value_size
;
309 wchar_t *element_value
;
310 wchar_t *element_value_ob
;
311 u_int32_t symbols_hash_tab_size
;
312 u_int32_t
*symbols_hash_tab
;
313 u_int32_t
*symbols_hash_tab_ob
;
314 u_int32_t symbols_string_pool_size
;
315 char *symbols_string_pool
;
316 u_int32_t symbols_class_size
;
317 u_int32_t
*symbols_class
;
318 u_int32_t
*symbols_class_ob
;
319 hash_table
*hash_tab
;
320 unsigned int dummy_weights
[collate
->nrules
+ 1];
326 /* Compute table size. */
328 Computing table size for collation information might take a while..."),
330 for (table_size
= 256; table_size
< sum_best
; ++table_size
)
332 size_t hits
[table_size
];
333 unsigned int worst
= 1;
338 for (cnt
= 0; cnt
< 256; ++cnt
)
340 memset (&hits
[256], '\0', sizeof (hits
) - 256 * sizeof (size_t));
342 while (iterate_table (&collate
->result
, &last
, (const void **) &name
,
343 &len
, (void **) &pelem
) >= 0)
344 if (pelem
->ordering
!= NULL
&& pelem
->name
[0] > 0xff)
345 if (++hits
[(unsigned int) pelem
->name
[0] % table_size
] > worst
)
347 worst
= hits
[(unsigned int) pelem
->name
[0] % table_size
];
348 if (table_size
* worst
> sum_best
)
352 if (table_size
* worst
< sum_best
)
354 sum_best
= table_size
* worst
;
355 table_best
= table_size
;
359 assert (table_best
!= 0xffff || level_best
!= 0xffff);
360 fputs (_(" done\n"), stderr
);
362 obstack_init (&non_simple
);
363 obstack_init (&string_pool
);
365 data
.magic
= LIMAGIC (LC_COLLATE
);
367 iov
[0].iov_base
= (void *) &data
;
368 iov
[0].iov_len
= sizeof (data
);
370 iov
[1].iov_base
= (void *) idx
;
371 iov
[1].iov_len
= sizeof (idx
);
373 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_NRULES
)].iov_base
= &collate
->nrules
;
374 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_NRULES
)].iov_len
= sizeof (u_int32_t
);
376 table
= (u_int32_t
*) alloca (collate
->nrules
* sizeof (u_int32_t
));
377 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_RULES
)].iov_base
= table
;
378 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_RULES
)].iov_len
379 = collate
->nrules
* sizeof (u_int32_t
);
380 /* Another trick here. Describing the collation method needs only a
381 few bits (3, to be exact). But the binary file should be
382 accessible by maschines with both endianesses and so we store both
383 information in the same word. */
384 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
385 table
[cnt
] = collate
->rules
[cnt
] | SWAPU32 (collate
->rules
[cnt
]);
387 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE
)].iov_base
= &table_best
;
388 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE
)].iov_len
= sizeof (u_int32_t
);
390 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS
)].iov_base
= &level_best
;
391 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS
)].iov_len
392 = sizeof (u_int32_t
);
394 entry_size
= 1 + MAX (collate
->nrules
, 2);
396 table
= (u_int32_t
*) alloca (table_best
* level_best
* entry_size
397 * sizeof (table
[0]));
398 memset (table
, '\0', table_best
* level_best
* entry_size
399 * sizeof (table
[0]));
402 /* Macros for inserting in output table. */
403 #define ADD_VALUE(expr) \
405 u_int32_t to_write = (u_int32_t) expr; \
406 obstack_grow (&non_simple, &to_write, sizeof (to_write)); \
409 #define ADD_ELEMENT(pelem, len) \
415 wlen = wcslen (pelem->name); \
416 obstack_grow (&non_simple, pelem->name, (wlen + 1) * sizeof (u_int32_t)); \
418 idx = collate->nrules; \
419 for (cnt = 0; cnt < collate->nrules; ++cnt) \
423 ADD_VALUE (pelem->ordering[cnt]); \
424 for (disp = 0; disp < pelem->ordering[cnt]; ++disp) \
425 ADD_VALUE (pelem->ordering[idx++]); \
429 #define ADD_FORWARD(pelem) \
431 /* We leave a reference in the main table and put all \
432 information in the table for the extended entries. */ \
434 element_t *has_simple = NULL; \
437 table[(level * table_best + slot) * entry_size + 1] \
439 table[(level * table_best + slot) * entry_size + 2] \
440 = obstack_object_size (&non_simple) / sizeof (u_int32_t); \
442 /* Here we have to construct the non-simple table entry. First \
443 compute the total length of this entry. */ \
444 for (runp = (pelem); runp != NULL; runp = runp->next) \
445 if (runp->ordering != NULL) \
450 value = 1 + wcslen (runp->name) + 1; \
452 for (cnt = 0; cnt < collate->nrules; ++cnt) \
453 /* We have to take care for entries without ordering \
454 information. While reading them they get inserted in the \
455 table and later not removed when something goes wrong with \
456 reading its weights. */ \
458 value += 1 + runp->ordering[cnt]; \
460 if (runp->name[1] == L'\0') \
464 ADD_ELEMENT (runp, value); \
467 if (has_simple == NULL) \
471 ADD_VALUE (collate->undefined_len + 1); \
473 /* Add the name. */ \
474 ADD_VALUE ((pelem)->name[0]); \
477 idx = collate->nrules; \
478 for (cnt = 0; cnt < collate->nrules; ++cnt) \
482 ADD_VALUE (collate->undefined.ordering[cnt]); \
483 for (disp = 0; disp < collate->undefined.ordering[cnt]; ++disp) \
485 if (collate->undefined.ordering[idx] == ELLIPSIS_CHAR) \
486 ADD_VALUE ((pelem)->name[0]); \
488 ADD_VALUE (collate->undefined.ordering[idx++]); \
497 /* Fill the table now. First we look for all the characters which
498 fit into one single byte. This speeds up the 8-bit string
501 while (iterate_table (&collate
->result
, &last
, (const void **) &name
,
502 &len
, (void **) &pelem
) >= 0)
503 if (pelem
->name
[0] <= 0xff)
505 /* We have a single byte name. Now we must distinguish
506 between entries in simple form (i.e., only one value per
507 weight and no collation element starting with the same
508 character) and those which are not. */
509 size_t slot
= ((size_t) pelem
->name
[0]);
510 const size_t level
= 0;
512 table
[slot
* entry_size
] = pelem
->name
[0];
514 if (pelem
->name
[1] == L
'\0' && pelem
->next
== NULL
515 && pelem
->ordering_len
== collate
->nrules
)
517 /* Yes, we have a simple one. Lucky us. */
520 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
521 table
[slot
* entry_size
+ 1 + cnt
]
522 = pelem
->ordering
[collate
->nrules
+ cnt
];
528 /* Now check for missing single byte entries. If one exist we fill
529 with the UNDEFINED entry. */
530 for (cnt
= 0; cnt
< 256; ++cnt
)
531 /* The first weight is never 0 for existing entries. */
532 if (table
[cnt
* entry_size
+ 1] == 0)
534 /* We have to fill in the information from the UNDEFINED
536 table
[cnt
* entry_size
] = (u_int32_t
) cnt
;
538 if (collate
->undefined
.ordering_len
== collate
->nrules
)
542 for (inner
= 0; inner
< collate
->nrules
; ++inner
)
543 if (collate
->undefined
.ordering
[collate
->nrules
+ inner
]
545 table
[cnt
* entry_size
+ 1 + inner
] = cnt
;
547 table
[cnt
* entry_size
+ 1 + inner
]
548 = collate
->undefined
.ordering
[collate
->nrules
+ inner
];
552 if (undefined_offset
!= UINT_MAX
)
554 table
[cnt
* entry_size
+ 1] = FORWARD_CHAR
;
555 table
[cnt
* entry_size
+ 2] = undefined_offset
;
559 const size_t slot
= cnt
;
560 const size_t level
= 0;
562 ADD_FORWARD (&collate
->undefined
);
563 undefined_offset
= table
[cnt
* entry_size
+ 2];
568 /* Now we are ready for inserting the whole rest. */
570 while (iterate_table (&collate
->result
, &last
, (const void **) &name
,
571 &len
, (void **) &pelem
) >= 0)
572 if (pelem
->name
[0] > 0xff)
574 /* Find the position. */
575 size_t slot
= ((size_t) pelem
->name
[0]) % table_best
;
578 while (table
[(level
* table_best
+ slot
) * entry_size
+ 1] != 0)
580 assert (level
< level_best
);
582 if (pelem
->name
[1] == L
'\0' && pelem
->next
== NULL
583 && pelem
->ordering_len
== collate
->nrules
)
585 /* Again a simple entry. */
588 for (inner
= 0; inner
< collate
->nrules
; ++inner
)
589 table
[(level
* table_best
+ slot
) * entry_size
+ 1 + inner
]
590 = pelem
->ordering
[collate
->nrules
+ inner
];
596 /* Add the UNDEFINED entry. */
598 /* Here we have to construct the non-simple table entry. */
601 undefined_offset
= obstack_object_size (&non_simple
);
603 idx
= collate
->nrules
;
604 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
608 ADD_VALUE (collate
->undefined
.ordering
[cnt
]);
609 for (disp
= 0; disp
< collate
->undefined
.ordering
[cnt
]; ++disp
)
610 ADD_VALUE (collate
->undefined
.ordering
[idx
++]);
614 /* Finish the extra block. */
615 extra_len
= obstack_object_size (&non_simple
);
616 extra
= (u_int32_t
*) obstack_finish (&non_simple
);
617 assert ((extra_len
% sizeof (u_int32_t
)) == 0);
619 /* Now we have to build the two array for the other byte ordering. */
620 table2
= (u_int32_t
*) alloca (table_best
* level_best
* entry_size
621 * sizeof (table
[0]));
622 extra2
= (u_int32_t
*) alloca (extra_len
);
624 for (cnt
= 0; cnt
< table_best
* level_best
* entry_size
; ++cnt
)
625 table2
[cnt
] = SWAPU32 (table
[cnt
]);
627 for (cnt
= 0; cnt
< extra_len
/ sizeof (u_int32_t
); ++cnt
)
628 extra2
[cnt
] = SWAPU32 (extra2
[cnt
]);
630 /* We need a simple hashing table to get a collation-element->chars
631 mapping. We again use internal hasing using a secondary hashing
634 Each string has an associate hashing value V, computed by a
635 fixed function. To locate the string we use open addressing with
636 double hashing. The first index will be V % M, where M is the
637 size of the hashing table. If no entry is found, iterating with
638 a second, independent hashing function takes place. This second
639 value will be 1 + V % (M - 2). The approximate number of probes
642 for unsuccessful search: (1 - N / M) ^ -1
643 for successful search: - (N / M) ^ -1 * ln (1 - N / M)
645 where N is the number of keys.
647 If we now choose M to be the next prime bigger than 4 / 3 * N,
648 we get the values 4 and 1.85 resp. Because unsuccesful searches
649 are unlikely this is a good value. Formulas: [Knuth, The Art of
650 Computer Programming, Volume 3, Sorting and Searching, 1973,
652 if (collate
->elements
.filled
== 0)
654 /* We don't need any element table since there are no collating
656 element_hash_tab_size
= 0;
657 element_hash_tab
= NULL
;
658 element_hash_tab_ob
= NULL
;
659 element_string_pool_size
= 0;
660 element_string_pool
= NULL
;
661 element_value_size
= 0;
662 element_value
= NULL
;
663 element_value_ob
= NULL
;
667 void *ptr
; /* Running pointer. */
668 const char *key
; /* Key for current bucket. */
669 size_t keylen
; /* Length of key data. */
670 const element_t
*data
; /* Data, i.e., the character sequence. */
672 element_hash_tab_size
= next_prime ((collate
->elements
.filled
* 4) / 3);
673 if (element_hash_tab_size
< 7)
674 /* We need a minimum to make the following code work. */
675 element_hash_tab_size
= 7;
677 element_hash_tab
= obstack_alloc (&non_simple
, (2 * element_hash_tab_size
678 * sizeof (u_int32_t
)));
679 memset (element_hash_tab
, '\377', (2 * element_hash_tab_size
680 * sizeof (u_int32_t
)));
683 while (iterate_table (&collate
->elements
, &ptr
, (const void **) &key
,
684 &keylen
, (void **) &data
) == 0)
686 size_t hash_val
= hash_string (key
, keylen
);
687 size_t idx
= hash_val
% element_hash_tab_size
;
689 if (element_hash_tab
[2 * idx
] != (~((u_int32_t
) 0)))
691 /* We need the second hashing function. */
692 size_t c
= 1 + (hash_val
% (element_hash_tab_size
- 2));
695 if (idx
>= element_hash_tab_size
- c
)
696 idx
-= element_hash_tab_size
- c
;
699 while (element_hash_tab
[2 * idx
] != (~((u_int32_t
) 0)));
702 element_hash_tab
[2 * idx
] = obstack_object_size (&non_simple
);
703 element_hash_tab
[2 * idx
+ 1] = (obstack_object_size (&string_pool
)
706 obstack_grow0 (&non_simple
, key
, keylen
);
707 obstack_grow (&string_pool
, data
->name
,
708 (wcslen (data
->name
) + 1) * sizeof (wchar_t));
711 if (obstack_object_size (&non_simple
) % 4 != 0)
712 obstack_blank (&non_simple
,
713 4 - (obstack_object_size (&non_simple
) % 4));
714 element_string_pool_size
= obstack_object_size (&non_simple
);
715 element_string_pool
= obstack_finish (&non_simple
);
717 element_value_size
= obstack_object_size (&string_pool
);
718 element_value
= obstack_finish (&string_pool
);
720 /* Create the tables for the other byte order. */
721 element_hash_tab_ob
= obstack_alloc (&non_simple
,
722 (2 * element_hash_tab_size
723 * sizeof (u_int32_t
)));
724 for (cnt
= 0; cnt
< 2 * element_hash_tab_size
; ++cnt
)
725 element_hash_tab_ob
[cnt
] = SWAPU32 (element_hash_tab
[cnt
]);
727 element_value_ob
= obstack_alloc (&string_pool
, element_value_size
);
728 if (sizeof (wchar_t) != 4)
730 fputs ("sizeof (wchar_t) != 4 currently not handled", stderr
);
733 for (cnt
= 0; cnt
< element_value_size
/ 4; ++cnt
)
734 element_value_ob
[cnt
] = SWAPU32 (element_value
[cnt
]);
737 /* Store collation elements as map to collation class. There are
738 three kinds of symbols:
742 We need to make a table which lets the user to access the primary
743 weight based on the symbol string. */
744 symbols_hash_tab_size
= next_prime ((4 * (charset
->char_table
.filled
745 + collate
->elements
.filled
746 + collate
->symbols
.filled
)) / 3);
747 symbols_hash_tab
= obstack_alloc (&non_simple
, (2 * symbols_hash_tab_size
748 * sizeof (u_int32_t
)));
749 memset (symbols_hash_tab
, '\377', (2 * symbols_hash_tab_size
750 * sizeof (u_int32_t
)));
752 /* Now fill the array. First the symbols from the character set,
753 then the collation elements and last the collation symbols. */
754 hash_tab
= &charset
->char_table
;
757 void *ptr
; /* Running pointer. */
758 const char *key
; /* Key for current bucket. */
759 size_t keylen
; /* Length of key data. */
760 void *data
; /* Data. */
763 while (iterate_table (hash_tab
, &ptr
, (const void **) &key
,
764 &keylen
, (void **) &data
) == 0)
769 unsigned int *weights
;
771 if (hash_tab
== &charset
->char_table
772 || hash_tab
== &collate
->elements
)
774 element_t
*lastp
, *firstp
;
775 wchar_t dummy_name
[2];
779 if (hash_tab
== &charset
->char_table
)
781 dummy_name
[0] = (wchar_t) ((unsigned long int) data
);
782 dummy_name
[1] = L
'\0';
784 name_len
= sizeof (wchar_t);
788 element_t
*elemp
= (element_t
*) data
;
790 name_len
= wcslen (name
) * sizeof (wchar_t);
793 /* First check whether this character is used at all. */
794 if (find_entry (&collate
->result
, name
, name_len
,
795 (void *) &firstp
) < 0)
796 /* The symbol is not directly mentioned in the collation.
797 I.e., we use the value for UNDEFINED. */
798 lastp
= &collate
->undefined
;
801 /* The entry for the simple character is always found at
804 while (lastp
->next
!= NULL
&& wcscmp (name
, lastp
->name
))
808 weights
= lastp
->ordering
;
812 dummy_weights
[0] = 1;
813 dummy_weights
[collate
->nrules
]
814 = (unsigned int) ((unsigned long int) data
);
816 weights
= dummy_weights
;
819 /* In LASTP->ordering we now have the collation class.
820 Determine the place in the hashing table next. */
821 hash_val
= hash_string (key
, keylen
);
822 idx
= hash_val
% symbols_hash_tab_size
;
824 if (symbols_hash_tab
[2 * idx
] != (~((u_int32_t
) 0)))
826 /* We need the second hashing function. */
827 size_t c
= 1 + (hash_val
% (symbols_hash_tab_size
- 2));
830 if (idx
>= symbols_hash_tab_size
- c
)
831 idx
-= symbols_hash_tab_size
- c
;
834 while (symbols_hash_tab
[2 * idx
] != (~((u_int32_t
) 0)));
837 symbols_hash_tab
[2 * idx
] = obstack_object_size (&string_pool
);
838 symbols_hash_tab
[2 * idx
+ 1] = (obstack_object_size (&non_simple
)
839 / sizeof (u_int32_t
));
841 obstack_grow0 (&string_pool
, key
, keylen
);
842 /* Adding the first weight looks complicated. We have to deal
843 with the kind it is stored and with the fact that original
844 form uses `unsigned int's while we need `u_int32_t' here. */
846 obstack_grow (&non_simple
, &word
, sizeof (u_int32_t
));
847 for (cnt
= 0; cnt
< weights
[0]; ++cnt
)
849 word
= weights
[collate
->nrules
+ cnt
];
850 obstack_grow (&non_simple
, &word
, sizeof (u_int32_t
));
854 if (hash_tab
== &charset
->char_table
)
855 hash_tab
= &collate
->elements
;
856 else if (hash_tab
== &collate
->elements
)
857 hash_tab
= &collate
->symbols
;
862 /* Now we have the complete tables. */
863 if (obstack_object_size (&string_pool
) % 4 != 0)
864 obstack_blank (&non_simple
, 4 - (obstack_object_size (&string_pool
) % 4));
865 symbols_string_pool_size
= obstack_object_size (&string_pool
);
866 symbols_string_pool
= obstack_finish (&string_pool
);
868 symbols_class_size
= obstack_object_size (&non_simple
);
869 symbols_class
= obstack_finish (&non_simple
);
871 /* Generate tables with other byte order. */
872 symbols_hash_tab_ob
= obstack_alloc (&non_simple
, (2 * symbols_hash_tab_size
873 * sizeof (u_int32_t
)));
874 for (cnt
= 0; cnt
< 2 * symbols_hash_tab_size
; ++cnt
)
875 symbols_hash_tab_ob
[cnt
] = SWAPU32 (symbols_hash_tab
[cnt
]);
877 symbols_class_ob
= obstack_alloc (&non_simple
, symbols_class_size
);
878 for (cnt
= 0; cnt
< symbols_class_size
/ 4; ++cnt
)
879 symbols_class_ob
[cnt
] = SWAPU32 (symbols_class
[cnt
]);
882 /* Store table adresses and lengths. */
883 #if __BYTE_ORDER == __BIG_ENDIAN
884 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB
)].iov_base
= table
;
885 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB
)].iov_len
886 = table_best
* level_best
* entry_size
* sizeof (table
[0]);
888 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL
)].iov_base
= table2
;
889 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL
)].iov_len
890 = table_best
* level_best
* entry_size
* sizeof (table
[0]);
892 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB
)].iov_base
= extra
;
893 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB
)].iov_len
= extra_len
;
895 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL
)].iov_base
= extra2
;
896 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL
)].iov_len
= extra_len
;
898 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB
)].iov_base
= table2
;
899 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB
)].iov_len
900 = table_best
* level_best
* entry_size
* sizeof (table
[0]);
902 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL
)].iov_base
= table
;
903 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL
)].iov_len
904 = table_best
* level_best
* entry_size
* sizeof (table
[0]);
906 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB
)].iov_base
= extra2
;
907 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB
)].iov_len
= extra_len
;
909 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL
)].iov_base
= extra
;
910 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL
)].iov_len
= extra_len
;
913 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_UNDEFINED
)].iov_base
= &undefined_offset
;
914 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_UNDEFINED
)].iov_len
= sizeof (u_int32_t
);
917 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_SIZE
)].iov_base
918 = &element_hash_tab_size
;
919 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_SIZE
)].iov_len
920 = sizeof (u_int32_t
);
922 #if __BYTE_ORDER == __BIG_ENDIAN
923 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB
)].iov_base
925 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB
)].iov_len
926 = 2 * element_hash_tab_size
* sizeof (u_int32_t
);
928 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL
)].iov_base
929 = element_hash_tab_ob
;
930 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL
)].iov_len
931 = 2 * element_hash_tab_size
* sizeof (u_int32_t
);
933 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL
)].iov_base
935 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL
)].iov_len
936 = 2 * element_hash_tab_size
* sizeof (u_int32_t
);
938 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB
)].iov_base
939 = element_hash_tab_ob
;
940 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB
)].iov_len
941 = 2 * element_hash_tab_size
* sizeof (u_int32_t
);
944 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_STR_POOL
)].iov_base
945 = element_string_pool
;
946 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_STR_POOL
)].iov_len
947 = element_string_pool_size
;
949 #if __BYTE_ORDER == __BIG_ENDIAN
950 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EB
)].iov_base
952 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EB
)].iov_len
953 = element_value_size
;
955 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EL
)].iov_base
957 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EL
)].iov_len
958 = element_value_size
;
960 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EL
)].iov_base
962 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EL
)].iov_len
963 = element_value_size
;
965 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EB
)].iov_base
967 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EB
)].iov_len
968 = element_value_size
;
971 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZE
)].iov_base
972 = &symbols_hash_tab_size
;
973 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZE
)].iov_len
974 = sizeof (u_int32_t
);
976 #if __BYTE_ORDER == __BIG_ENDIAN
977 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB
)].iov_base
979 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB
)].iov_len
980 = 2 * symbols_hash_tab_size
* sizeof (u_int32_t
);
982 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL
)].iov_base
983 = symbols_hash_tab_ob
;
984 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL
)].iov_len
985 = 2 * symbols_hash_tab_size
* sizeof (u_int32_t
);
987 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL
)].iov_base
989 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL
)].iov_len
990 = 2 * symbols_hash_tab_size
* sizeof (u_int32_t
);
992 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB
)].iov_base
993 = symbols_hash_tab_ob
;
994 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB
)].iov_len
995 = 2 * symbols_hash_tab_size
* sizeof (u_int32_t
);
998 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_STR_POOL
)].iov_base
999 = symbols_string_pool
;
1000 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_STR_POOL
)].iov_len
1001 = symbols_string_pool_size
;
1003 #if __BYTE_ORDER == __BIG_ENDIAN
1004 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EB
)].iov_base
1006 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EB
)].iov_len
1007 = symbols_class_size
;
1009 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EL
)].iov_base
1011 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EL
)].iov_len
1012 = symbols_class_size
;
1014 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EL
)].iov_base
1016 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EL
)].iov_len
1017 = symbols_class_size
;
1019 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EB
)].iov_base
1021 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EB
)].iov_len
1022 = symbols_class_size
;
1025 /* Update idx array. */
1026 idx
[0] = iov
[0].iov_len
+ iov
[1].iov_len
;
1027 for (cnt
= 1; cnt
< nelems
; ++cnt
)
1028 idx
[cnt
] = idx
[cnt
- 1] + iov
[1 + cnt
].iov_len
;
1030 write_locale_data (output_path
, "LC_COLLATE", 2 + nelems
, iov
);
1032 obstack_free (&non_simple
, NULL
);
1033 obstack_free (&string_pool
, NULL
);
1038 collate_element_to (struct linereader
*lr
, struct localedef_t
*locale
,
1039 struct token
*code
, struct charset_t
*charset
)
1041 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1045 if (collate
->combine_token
!= NULL
)
1047 free ((void *) collate
->combine_token
);
1048 collate
->combine_token
= NULL
;
1051 value
= charset_find_value (charset
, code
->val
.str
.start
, code
->val
.str
.len
);
1052 if (value
!= ILLEGAL_CHAR_VALUE
)
1054 lr_error (lr
, _("symbol for multicharacter collating element "
1055 "`%.*s' duplicates symbolic name in charset"),
1056 code
->val
.str
.len
, code
->val
.str
.start
);
1060 if (find_entry (&collate
->elements
, code
->val
.str
.start
, code
->val
.str
.len
,
1063 lr_error (lr
, _("symbol for multicharacter collating element "
1064 "`%.*s' duplicates other element definition"),
1065 code
->val
.str
.len
, code
->val
.str
.start
);
1069 if (find_entry (&collate
->elements
, code
->val
.str
.start
, code
->val
.str
.len
,
1072 lr_error (lr
, _("symbol for multicharacter collating element "
1073 "`%.*s' duplicates symbol definition"),
1074 code
->val
.str
.len
, code
->val
.str
.start
);
1078 collate
->combine_token
= code
->val
.str
.start
;
1079 collate
->combine_token_len
= code
->val
.str
.len
;
1084 collate_element_from (struct linereader
*lr
, struct localedef_t
*locale
,
1085 struct token
*code
, struct charset_t
*charset
)
1087 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1088 element_t
*elemp
, *runp
;
1090 /* CODE is a string. */
1091 elemp
= (element_t
*) obstack_alloc (&collate
->element_mem
,
1092 sizeof (element_t
));
1094 /* We have to translate the string. It may contain <...> character
1096 elemp
->name
= (wchar_t *) translate_string (code
->val
.str
.start
, charset
);
1097 elemp
->this_weight
= 0;
1098 elemp
->ordering
= NULL
;
1099 elemp
->ordering_len
= 0;
1101 free (code
->val
.str
.start
);
1103 if (elemp
->name
== NULL
)
1105 /* At least one character in the string is not defined. We simply
1109 `from' string in collation element declaration contains unknown character"));
1113 if (elemp
->name
[0] == L
'\0' || elemp
->name
[1] == L
'\0')
1115 lr_error (lr
, _("illegal collation element"));
1119 /* The entries in the linked lists of RESULT are sorting in
1120 descending order. The order is important for the `strcoll' and
1121 `wcscoll' functions. */
1122 if (find_entry (&collate
->result
, elemp
->name
, sizeof (wchar_t),
1123 (void *) &runp
) >= 0)
1125 /* We already have an entry with this key. Check whether it is
1127 element_t
*prevp
= NULL
;
1132 cmpres
= wcscmp (elemp
->name
, runp
->name
);
1137 while ((runp
= runp
->next
) != NULL
);
1140 lr_error (lr
, _("duplicate collating element definition"));
1146 if (set_entry (&collate
->result
, elemp
->name
, sizeof (wchar_t),
1148 error (EXIT_FAILURE
, 0, _("\
1149 error while inserting collation element into hash table"));
1152 prevp
->next
= elemp
;
1158 if (insert_entry (&collate
->result
, elemp
->name
, sizeof (wchar_t), elemp
)
1160 error (EXIT_FAILURE
, errno
, _("error while inserting to hash table"));
1163 if (insert_entry (&collate
->elements
, collate
->combine_token
,
1164 collate
->combine_token_len
, (void *) elemp
) < 0)
1165 lr_error (lr
, _("cannot insert new collating symbol definition: %s"),
1171 collate_symbol (struct linereader
*lr
, struct localedef_t
*locale
,
1172 struct token
*code
, struct charset_t
*charset
)
1174 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1178 value
= charset_find_value (charset
, code
->val
.str
.start
, code
->val
.str
.len
);
1179 if (value
!= ILLEGAL_CHAR_VALUE
)
1181 lr_error (lr
, _("symbol for multicharacter collating element "
1182 "`%.*s' duplicates symbolic name in charset"),
1183 code
->val
.str
.len
, code
->val
.str
.start
);
1187 if (find_entry (&collate
->elements
, code
->val
.str
.start
, code
->val
.str
.len
,
1190 lr_error (lr
, _("symbol for multicharacter collating element "
1191 "`%.*s' duplicates element definition"),
1192 code
->val
.str
.len
, code
->val
.str
.start
);
1196 if (find_entry (&collate
->symbols
, code
->val
.str
.start
, code
->val
.str
.len
,
1199 lr_error (lr
, _("symbol for multicharacter collating element "
1200 "`%.*s' duplicates other symbol definition"),
1201 code
->val
.str
.len
, code
->val
.str
.start
);
1205 if (insert_entry (&collate
->symbols
, code
->val
.str
.start
, code
->val
.str
.len
,
1207 lr_error (lr
, _("cannot insert new collating symbol definition: %s"),
1213 collate_new_order (struct linereader
*lr
, struct localedef_t
*locale
,
1214 enum coll_sort_rule sort_rule
)
1216 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1218 if (collate
->nrules
>= collate
->nrules_max
)
1220 collate
->nrules_max
*= 2;
1222 = (enum coll_sort_rule
*) xrealloc (collate
->rules
,
1224 * sizeof (enum coll_sort_rule
));
1227 collate
->rules
[collate
->nrules
++] = sort_rule
;
1232 collate_build_arrays (struct linereader
*lr
, struct localedef_t
*locale
)
1234 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1237 = (enum coll_sort_rule
*) xrealloc (collate
->rules
,
1239 * sizeof (enum coll_sort_rule
));
1241 /* Allocate arrays for temporary weights. */
1242 collate
->weight_cnt
= (int *) xmalloc (collate
->nrules
* sizeof (int));
1244 /* Choose arbitrary start value for table size. */
1245 collate
->nweight_max
= 5 * collate
->nrules
;
1246 collate
->weight
= (int *) xmalloc (collate
->nweight_max
* sizeof (int));
1251 collate_order_elem (struct linereader
*lr
, struct localedef_t
*locale
,
1252 struct token
*code
, struct charset_t
*charset
)
1254 const wchar_t zero
= L
'\0';
1255 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1264 /* We have a string to find in one of the three hashing tables. */
1265 value
= charset_find_value (charset
, code
->val
.str
.start
,
1267 if (value
!= ILLEGAL_CHAR_VALUE
)
1269 element_t
*lastp
, *firstp
;
1271 collate
->kind
= character
;
1273 if (find_entry (&collate
->result
, &value
, sizeof (wchar_t),
1274 (void *) &firstp
) < 0)
1275 firstp
= lastp
= NULL
;
1278 /* The entry for the simple character is always found at
1281 while (lastp
->next
!= NULL
)
1282 lastp
= lastp
->next
;
1284 if (lastp
->name
[0] == value
&& lastp
->name
[1] == L
'\0')
1286 lr_error (lr
, _("duplicate definition for character `%.*s'"),
1287 code
->val
.str
.len
, code
->val
.str
.start
);
1288 lr_ignore_rest (lr
, 0);
1294 collate
->current_element
1295 = (element_t
*) obstack_alloc (&collate
->element_mem
,
1296 sizeof (element_t
));
1298 obstack_grow (&collate
->element_mem
, &value
, sizeof (value
));
1299 obstack_grow (&collate
->element_mem
, &zero
, sizeof (zero
));
1301 collate
->current_element
->name
=
1302 (const wchar_t *) obstack_finish (&collate
->element_mem
);
1304 collate
->current_element
->this_weight
= ++collate
->order_cnt
;
1306 collate
->current_element
->next
= NULL
;
1310 if (insert_entry (&collate
->result
, &value
, sizeof (wchar_t),
1311 (void *) collate
->current_element
) < 0)
1313 lr_error (lr
, _("cannot insert collation element `%.*s'"),
1314 code
->val
.str
.len
, code
->val
.str
.start
);
1319 lastp
->next
= collate
->current_element
;
1321 else if (find_entry (&collate
->elements
, code
->val
.str
.start
,
1322 code
->val
.str
.len
, &tmp
) >= 0)
1324 collate
->current_element
= (element_t
*) tmp
;
1326 if (collate
->current_element
->this_weight
!= 0)
1329 collation element `%.*s' appears more than once: ignore line"),
1330 code
->val
.str
.len
, code
->val
.str
.start
);
1331 lr_ignore_rest (lr
, 0);
1336 collate
->kind
= element
;
1337 collate
->current_element
->this_weight
= ++collate
->order_cnt
;
1339 else if (find_entry (&collate
->symbols
, code
->val
.str
.start
,
1340 code
->val
.str
.len
, &tmp
) >= 0)
1342 unsigned int order
= ++collate
->order_cnt
;
1344 if ((unsigned long int) tmp
!= 0ul)
1347 collation symbol `.*s' appears more than once: ignore line"),
1348 code
->val
.str
.len
, code
->val
.str
.start
);
1349 lr_ignore_rest (lr
, 0);
1354 collate
->kind
= symbol
;
1356 if (set_entry (&collate
->symbols
, code
->val
.str
.start
,
1357 code
->val
.str
.len
, (void *) order
) < 0)
1359 lr_error (lr
, _("cannot process order specification"));
1366 lr_error (lr
, _("unknown symbol `%.*s': line ignored"),
1367 code
->val
.str
.len
, code
->val
.str
.start
);
1368 lr_ignore_rest (lr
, 0);
1375 collate
->kind
= undefined
;
1376 collate
->current_element
= &collate
->undefined
;
1380 if (collate
->was_ellipsis
)
1383 two lines in a row containing `...' are not allowed"));
1386 else if (collate
->kind
!= character
)
1388 /* An ellipsis requires the previous line to be an
1389 character definition. */
1391 line before ellipsis does not contain definition for character constant"));
1392 lr_ignore_rest (lr
, 0);
1396 collate
->kind
= ellipsis
;
1400 assert (! "illegal token in `collate_order_elem'");
1403 /* Now it's time to handle the ellipsis in the previous line. We do
1404 this only when the last line contained an definition for a
1405 character, the current line also defines an character, the
1406 character code for the later is bigger than the former. */
1407 if (collate
->was_ellipsis
)
1409 if (collate
->kind
!= character
)
1412 line after ellipsis must contain character definition"));
1413 lr_ignore_rest (lr
, 0);
1416 else if (collate
->last_char
> value
)
1418 lr_error (lr
, _("end point of ellipsis range is bigger then start"));
1419 lr_ignore_rest (lr
, 0);
1424 /* We can fill the arrays with the information we need. */
1430 name
[0] = collate
->last_char
+ 1;
1433 data
= (unsigned int *) alloca ((collate
->nrules
+ collate
->nweight
)
1434 * sizeof (unsigned int));
1435 ptr
= (size_t *) alloca (collate
->nrules
* sizeof (size_t));
1437 if (data
== NULL
|| ptr
== NULL
)
1438 error (4, 0, _("memory exhausted"));
1440 /* Prepare data. Because the characters covered by an
1441 ellipsis all have equal values we prepare the data once
1442 and only change the variable number (if there are any).
1443 PTR[...] will point to the entries which will have to be
1444 fixed during the output loop. */
1445 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
1447 data
[cnt
] = collate
->weight_cnt
[cnt
];
1448 ptr
[cnt
] = (cnt
== 0
1450 : ptr
[cnt
- 1] + collate
->weight_cnt
[cnt
- 1]);
1453 for (cnt
= 0; cnt
< collate
->nweight
; ++cnt
)
1454 data
[collate
->nrules
+ cnt
] = collate
->weight
[cnt
];
1456 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
1457 if (data
[ptr
[cnt
]] != ELLIPSIS_CHAR
)
1460 while (name
[0] <= value
)
1464 pelem
= (element_t
*) obstack_alloc (&collate
->element_mem
,
1465 sizeof (element_t
));
1467 error (4, 0, _("memory exhausted"));
1470 = (const wchar_t *) obstack_copy (&collate
->element_mem
,
1471 name
, 2 * sizeof (wchar_t));
1472 pelem
->this_weight
= ++collate
->order_cnt
;
1474 pelem
->ordering_len
= collate
->nweight
;
1476 = (unsigned int *) obstack_copy (&collate
->element_mem
, data
,
1478 * pelem
->ordering_len
)
1479 * sizeof (unsigned int));
1481 /* `...' weights need to be adjusted. */
1482 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
1484 pelem
->ordering
[ptr
[cnt
]] = pelem
->this_weight
;
1486 /* Insert new entry into result table. */
1487 if (find_entry (&collate
->result
, name
, sizeof (wchar_t),
1488 (void *) &pelem
->next
) >= 0)
1490 if (set_entry (&collate
->result
, name
, sizeof (wchar_t),
1491 (void *) pelem
->next
) < 0)
1492 error (4, 0, _("cannot insert into result table"));
1495 if (insert_entry (&collate
->result
, name
, sizeof (wchar_t),
1496 (void *) pelem
->next
) < 0)
1497 error (4, 0, _("cannot insert into result table"));
1499 /* Increment counter. */
1505 /* Reset counters for weights. */
1506 collate
->weight_idx
= 0;
1507 collate
->nweight
= 0;
1508 for (i
= 0; i
< collate
->nrules
; ++i
)
1509 collate
->weight_cnt
[i
] = 0;
1510 collate
->current_patch
= NULL
;
1517 collate_weight_bsymbol (struct linereader
*lr
, struct localedef_t
*locale
,
1518 struct token
*code
, struct charset_t
*charset
)
1520 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1521 unsigned int here_weight
;
1525 assert (code
->tok
== tok_bsymbol
);
1527 value
= charset_find_value (charset
, code
->val
.str
.start
, code
->val
.str
.len
);
1528 if (value
!= ILLEGAL_CHAR_VALUE
)
1532 if (find_entry (&collate
->result
, &value
, sizeof (wchar_t),
1537 && (runp
->name
[0] != value
|| runp
->name
[1] != L
'\0'))
1540 here_weight
= runp
== NULL
? 0 : runp
->this_weight
;
1542 else if (find_entry (&collate
->elements
, code
->val
.str
.start
,
1543 code
->val
.str
.len
, &tmp
) >= 0)
1545 element_t
*runp
= (element_t
*) tmp
;
1547 here_weight
= runp
->this_weight
;
1549 else if (find_entry (&collate
->symbols
, code
->val
.str
.start
,
1550 code
->val
.str
.len
, &tmp
) >= 0)
1552 here_weight
= (unsigned int) tmp
;
1557 lr_error (lr
, _("unknown symbol `%.*s': line ignored"),
1558 code
->val
.str
.len
, code
->val
.str
.start
);
1559 lr_ignore_rest (lr
, 0);
1563 /* When we currently work on a collation symbol we do not expect any
1565 if (collate
->kind
== symbol
)
1568 specification of sorting weight for collation symbol does not make sense"));
1569 lr_ignore_rest (lr
, 0);
1573 /* Add to the current collection of weights. */
1574 if (collate
->nweight
>= collate
->nweight_max
)
1576 collate
->nweight_max
*= 2;
1577 collate
->weight
= (unsigned int *) xrealloc (collate
->weight
,
1578 collate
->nweight_max
);
1581 /* If the weight is currently not known, we remember to patch the
1582 resulting tables. */
1583 if (here_weight
== 0)
1587 newp
= (patch_t
*) obstack_alloc (&collate
->element_mem
,
1589 newp
->fname
= lr
->fname
;
1590 newp
->lineno
= lr
->lineno
;
1591 newp
->token
= (const char *) obstack_copy0 (&collate
->element_mem
,
1592 code
->val
.str
.start
,
1594 newp
->where
.idx
= collate
->nweight
++;
1595 newp
->next
= collate
->current_patch
;
1596 collate
->current_patch
= newp
;
1599 collate
->weight
[collate
->nweight
++] = here_weight
;
1600 ++collate
->weight_cnt
[collate
->weight_idx
];
1607 collate_next_weight (struct linereader
*lr
, struct localedef_t
*locale
)
1609 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1611 if (collate
->kind
== symbol
)
1614 specification of sorting weight for collation symbol does not make sense"));
1615 lr_ignore_rest (lr
, 0);
1619 ++collate
->weight_idx
;
1620 if (collate
->weight_idx
>= collate
->nrules
)
1622 lr_error (lr
, _("too many weights"));
1623 lr_ignore_rest (lr
, 0);
1632 collate_simple_weight (struct linereader
*lr
, struct localedef_t
*locale
,
1633 struct token
*code
, struct charset_t
*charset
)
1635 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1636 unsigned int value
= 0;
1638 /* There current tokens can be `IGNORE', `...', or a string. */
1642 /* This token is allowed in all situations. */
1643 value
= IGNORE_CHAR
;
1647 /* The ellipsis is only allowed for the `...' or `UNDEFINED'
1649 if (collate
->kind
!= ellipsis
&& collate
->kind
!= undefined
)
1652 `...' must only be used in `...' and `UNDEFINED' entries"));
1653 lr_ignore_rest (lr
, 0);
1656 value
= ELLIPSIS_CHAR
;
1660 /* This can become difficult. We have to get the weights which
1661 correspind the the single wide chars in the string. But some
1662 of the `chars' might not be real characters, but collation
1663 elements or symbols. And so the string decoder might have
1664 signaled errors. The string at this point is not translated.
1665 I.e., all <...> sequences are still there. */
1667 char *runp
= code
->val
.str
.start
;
1670 while (*runp
!= '\0')
1672 char *startp
= (char *) runp
;
1673 char *putp
= (char *) runp
;
1676 /* Lookup weight for char and store it. */
1679 while (*++runp
!= '\0' && *runp
!= '>')
1681 if (*runp
== lr
->escape_char
)
1682 if (*++runp
== '\0')
1684 lr_error (lr
, _("unterminated weight name"));
1685 lr_ignore_rest (lr
, 0);
1695 lr_error (lr
, _("empty weight name: line ignored"));
1696 lr_ignore_rest (lr
, 0);
1700 wch
= charset_find_value (charset
, startp
, putp
- startp
);
1701 if (wch
!= ILLEGAL_CHAR_VALUE
)
1705 if (find_entry (&collate
->result
, &wch
, sizeof (wchar_t),
1706 (void *)&pelem
) < 0)
1709 while (pelem
!= NULL
1710 && (pelem
->name
[0] != wch
1711 || pelem
->name
[1] != L
'\0'))
1712 pelem
= pelem
->next
;
1714 value
= pelem
== NULL
? 0 : pelem
->this_weight
;
1716 else if (find_entry (&collate
->elements
, startp
, putp
- startp
,
1719 element_t
*pelem
= (element_t
*) tmp
;
1721 value
= pelem
->this_weight
;
1723 else if (find_entry (&collate
->symbols
, startp
, putp
- startp
,
1726 value
= (unsigned int) tmp
;
1731 lr_error (lr
, _("unknown symbol `%.*s': line ignored"),
1732 putp
- startp
, startp
);
1733 lr_ignore_rest (lr
, 0);
1742 if (*runp
== lr
->escape_char
)
1744 static char digits
[] = "0123456789abcdef";
1749 if (tolower (*runp
) == 'x')
1754 else if (tolower (*runp
) == 'd')
1762 dp
= strchr (digits
, tolower (*runp
));
1763 if (dp
== NULL
|| (dp
- digits
) >= base
)
1767 illegal character constant in string"));
1768 lr_ignore_rest (lr
, 0);
1774 dp
= strchr (digits
, tolower (*runp
));
1775 if (dp
== NULL
|| (dp
- digits
) >= base
)
1783 dp
= strchr (digits
, tolower (*runp
));
1784 if (dp
!= NULL
&& (dp
- digits
< base
))
1793 wch
= (wchar_t) *runp
++;
1795 /* Lookup the weight for WCH. */
1796 if (find_entry (&collate
->result
, &wch
, sizeof (wch
),
1801 && (wp
->name
[0] != wch
|| wp
->name
[1] != L
'\0'))
1804 value
= wp
== NULL
? 0 : wp
->this_weight
;
1806 /* To get the correct name for the error message. */
1809 /**************************************************\
1810 |* I know here is something wrong. Characters in *|
1811 |* the string which are not in the <...> form *|
1812 |* cannot be declared forward for now!!! *|
1813 \**************************************************/
1816 /* Store in weight array. */
1817 if (collate
->nweight
>= collate
->nweight_max
)
1819 collate
->nweight_max
*= 2;
1821 = (unsigned int *) xrealloc (collate
->weight
,
1822 collate
->nweight_max
);
1829 newp
= (patch_t
*) obstack_alloc (&collate
->element_mem
,
1831 newp
->fname
= lr
->fname
;
1832 newp
->lineno
= lr
->lineno
;
1834 = (const char *) obstack_copy0 (&collate
->element_mem
,
1835 startp
, putp
- startp
);
1836 newp
->where
.idx
= collate
->nweight
++;
1837 newp
->next
= collate
->current_patch
;
1838 collate
->current_patch
= newp
;
1841 collate
->weight
[collate
->nweight
++] = value
;
1842 ++collate
->weight_cnt
[collate
->weight_idx
];
1848 assert (! "should not happen");
1852 if (collate
->nweight
>= collate
->nweight_max
)
1854 collate
->nweight_max
*= 2;
1855 collate
->weight
= (unsigned int *) xrealloc (collate
->weight
,
1856 collate
->nweight_max
);
1859 collate
->weight
[collate
->nweight
++] = value
;
1860 ++collate
->weight_cnt
[collate
->weight_idx
];
1867 collate_end_weight (struct linereader
*lr
, struct localedef_t
*locale
)
1869 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1870 element_t
*pelem
= collate
->current_element
;
1872 if (collate
->kind
== symbol
)
1874 /* We don't have to do anything. */
1875 collate
->was_ellipsis
= 0;
1879 if (collate
->kind
== ellipsis
)
1881 /* Before the next line is processed the ellipsis is handled. */
1882 collate
->was_ellipsis
= 1;
1886 assert (collate
->kind
== character
|| collate
->kind
== element
1887 || collate
->kind
== undefined
);
1889 /* Fill in the missing weights. */
1890 while (++collate
->weight_idx
< collate
->nrules
)
1892 collate
->weight
[collate
->nweight
++] = pelem
->this_weight
;
1893 ++collate
->weight_cnt
[collate
->weight_idx
];
1896 /* Now we know how many ordering weights the current
1897 character/element has. Allocate room in the element structure
1898 and copy information. */
1899 pelem
->ordering_len
= collate
->nweight
;
1901 /* First we write an array with the number of values for each
1903 obstack_grow (&collate
->element_mem
, collate
->weight_cnt
,
1904 collate
->nrules
* sizeof (unsigned int));
1906 /* Now the weights itselves. */
1907 obstack_grow (&collate
->element_mem
, collate
->weight
,
1908 collate
->nweight
* sizeof (unsigned int));
1911 pelem
->ordering
= obstack_finish (&collate
->element_mem
);
1913 /* Now we handle the "patches". */
1914 while (collate
->current_patch
!= NULL
)
1916 patch_t
*this_patch
;
1918 this_patch
= collate
->current_patch
;
1920 this_patch
->where
.pos
= &pelem
->ordering
[collate
->nrules
1921 + this_patch
->where
.idx
];
1923 collate
->current_patch
= this_patch
->next
;
1924 this_patch
->next
= collate
->all_patches
;
1925 collate
->all_patches
= this_patch
;
1928 /* Set information for next round. */
1929 collate
->was_ellipsis
= 0;
1930 if (collate
->kind
!= undefined
)
1931 collate
->last_char
= pelem
->name
[0];