* sysdeps/unix/sysv/linux/m68k/semtimedop.S: New file.
[glibc/pb-stable.git] / string / strcoll.c
blob26072018ac41321ad674f1f3954a6bd539f7d7d1
1 /* Copyright (C) 1995,96,97,98,99,2000,2001,2002 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Written by Ulrich Drepper <drepper@cygnus.com>, 1995.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, write to the Free
17 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18 02111-1307 USA. */
20 #include <assert.h>
21 #include <langinfo.h>
22 #include <locale.h>
23 #include <stddef.h>
24 #include <stdint.h>
25 #include <stdlib.h>
26 #include <string.h>
28 #ifndef STRING_TYPE
29 # define STRING_TYPE char
30 # define USTRING_TYPE unsigned char
31 # ifdef USE_IN_EXTENDED_LOCALE_MODEL
32 # define STRCOLL __strcoll_l
33 # else
34 # define STRCOLL strcoll
35 # endif
36 # define STRCMP strcmp
37 # define STRLEN strlen
38 # define WEIGHT_H "../locale/weight.h"
39 # define SUFFIX MB
40 # define L(arg) arg
41 #endif
43 #define CONCAT(a,b) CONCAT1(a,b)
44 #define CONCAT1(a,b) a##b
46 #include "../locale/localeinfo.h"
48 #ifndef USE_IN_EXTENDED_LOCALE_MODEL
49 int
50 STRCOLL (s1, s2)
51 const STRING_TYPE *s1;
52 const STRING_TYPE *s2;
53 #else
54 int
55 STRCOLL (s1, s2, l)
56 const STRING_TYPE *s1;
57 const STRING_TYPE *s2;
58 __locale_t l;
59 #endif
61 #ifdef USE_IN_EXTENDED_LOCALE_MODEL
62 struct locale_data *current = l->__locales[LC_COLLATE];
63 uint_fast32_t nrules = current->values[_NL_ITEM_INDEX (_NL_COLLATE_NRULES)].word;
64 #else
65 uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
66 #endif
67 /* We don't assign the following values right away since it might be
68 unnecessary in case there are no rules. */
69 const unsigned char *rulesets;
70 const int32_t *table;
71 const USTRING_TYPE *weights;
72 const USTRING_TYPE *extra;
73 const int32_t *indirect;
74 uint_fast32_t pass;
75 int result = 0;
76 const USTRING_TYPE *us1;
77 const USTRING_TYPE *us2;
78 size_t s1len;
79 size_t s2len;
80 int32_t *idx1arr;
81 int32_t *idx2arr;
82 unsigned char *rule1arr;
83 unsigned char *rule2arr;
84 size_t idx1max;
85 size_t idx2max;
86 size_t idx1cnt;
87 size_t idx2cnt;
88 size_t idx1now;
89 size_t idx2now;
90 size_t backw1_stop;
91 size_t backw2_stop;
92 size_t backw1;
93 size_t backw2;
94 int val1;
95 int val2;
96 int position;
97 int seq1len;
98 int seq2len;
99 int use_malloc;
101 #include WEIGHT_H
103 if (nrules == 0)
104 return STRCMP (s1, s2);
106 #ifdef USE_IN_EXTENDED_LOCALE_MODEL
107 rulesets = (const unsigned char *)
108 current->values[_NL_ITEM_INDEX (_NL_COLLATE_RULESETS)].string;
109 table = (const int32_t *)
110 current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_TABLE,SUFFIX))].string;
111 weights = (const USTRING_TYPE *)
112 current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_WEIGHT,SUFFIX))].string;
113 extra = (const USTRING_TYPE *)
114 current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_EXTRA,SUFFIX))].string;
115 indirect = (const int32_t *)
116 current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_INDIRECT,SUFFIX))].string;
117 #else
118 rulesets = (const unsigned char *)
119 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_RULESETS);
120 table = (const int32_t *)
121 _NL_CURRENT (LC_COLLATE, CONCAT(_NL_COLLATE_TABLE,SUFFIX));
122 weights = (const USTRING_TYPE *)
123 _NL_CURRENT (LC_COLLATE, CONCAT(_NL_COLLATE_WEIGHT,SUFFIX));
124 extra = (const USTRING_TYPE *)
125 _NL_CURRENT (LC_COLLATE, CONCAT(_NL_COLLATE_EXTRA,SUFFIX));
126 indirect = (const int32_t *)
127 _NL_CURRENT (LC_COLLATE, CONCAT(_NL_COLLATE_INDIRECT,SUFFIX));
128 #endif
129 use_malloc = 0;
131 assert (((uintptr_t) table) % __alignof__ (table[0]) == 0);
132 assert (((uintptr_t) weights) % __alignof__ (weights[0]) == 0);
133 assert (((uintptr_t) extra) % __alignof__ (extra[0]) == 0);
134 assert (((uintptr_t) indirect) % __alignof__ (indirect[0]) == 0);
136 /* We need this a few times. */
137 s1len = STRLEN (s1);
138 s2len = STRLEN (s2);
140 /* Catch empty strings. */
141 if (__builtin_expect (s1len == 0, 0) || __builtin_expect (s2len == 0, 0))
142 return (s1len != 0) - (s2len != 0);
144 /* We need the elements of the strings as unsigned values since they
145 are used as indeces. */
146 us1 = (const USTRING_TYPE *) s1;
147 us2 = (const USTRING_TYPE *) s2;
149 /* Perform the first pass over the string and while doing this find
150 and store the weights for each character. Since we want this to
151 be as fast as possible we are using `alloca' to store the temporary
152 values. But since there is no limit on the length of the string
153 we have to use `malloc' if the string is too long. We should be
154 very conservative here.
156 Please note that the localedef programs makes sure that `position'
157 is not used at the first level. */
158 if (! __libc_use_alloca (s1len + s2len))
160 idx1arr = (int32_t *) malloc ((s1len + s2len) * (sizeof (int32_t) + 1));
161 idx2arr = &idx1arr[s1len];
162 rule1arr = (unsigned char *) &idx2arr[s2len];
163 rule2arr = &rule1arr[s1len];
165 if (idx1arr == NULL)
166 /* No memory. Well, go with the stack then.
168 XXX Once this implementation is stable we will handle this
169 differently. Instead of precomputing the indeces we will
170 do this in time. This means, though, that this happens for
171 every pass again. */
172 goto try_stack;
173 use_malloc = 1;
175 else
177 try_stack:
178 idx1arr = (int32_t *) alloca (s1len * sizeof (int32_t));
179 idx2arr = (int32_t *) alloca (s2len * sizeof (int32_t));
180 rule1arr = (unsigned char *) alloca (s1len);
181 rule2arr = (unsigned char *) alloca (s2len);
184 idx1cnt = 0;
185 idx2cnt = 0;
186 idx1max = 0;
187 idx2max = 0;
188 idx1now = 0;
189 idx2now = 0;
190 backw1_stop = ~0ul;
191 backw2_stop = ~0ul;
192 backw1 = ~0ul;
193 backw2 = ~0ul;
194 seq1len = 0;
195 seq2len = 0;
196 position = rulesets[0] & sort_position;
197 while (1)
199 val1 = 0;
200 val2 = 0;
202 /* Get the next non-IGNOREd element for string `s1'. */
203 if (seq1len == 0)
206 ++val1;
208 if (backw1_stop != ~0ul)
210 /* The is something pushed. */
211 if (backw1 == backw1_stop)
213 /* The last pushed character was handled. Continue
214 with forward characters. */
215 if (idx1cnt < idx1max)
216 idx1now = idx1cnt;
217 else
218 /* Nothing anymore. The backward sequence ended with
219 the last sequence in the string. Note that seq1len
220 is still zero. */
221 break;
223 else
224 idx1now = --backw1;
226 else
228 backw1_stop = idx1max;
230 while (*us1 != L('\0'))
232 int32_t tmp = findidx (&us1);
233 rule1arr[idx1max] = tmp >> 24;
234 idx1arr[idx1max] = tmp & 0xffffff;
235 idx1cnt = idx1max++;
237 if ((rulesets[rule1arr[idx1cnt] * nrules]
238 & sort_backward) == 0)
239 /* No more backward characters to push. */
240 break;
241 ++idx1cnt;
244 if (backw1_stop >= idx1cnt)
246 /* No sequence at all or just one. */
247 if (idx1cnt == idx1max || backw1_stop > idx1cnt)
248 /* Note that seq1len is still zero. */
249 break;
251 backw1_stop = ~0ul;
252 idx1now = idx1cnt;
254 else
255 /* We pushed backward sequences. */
256 idx1now = backw1 = idx1cnt - 1;
259 while ((seq1len = weights[idx1arr[idx1now]++]) == 0);
261 /* And the same for string `s2'. */
262 if (seq2len == 0)
265 ++val2;
267 if (backw2_stop != ~0ul)
269 /* The is something pushed. */
270 if (backw2 == backw2_stop)
272 /* The last pushed character was handled. Continue
273 with forward characters. */
274 if (idx2cnt < idx2max)
275 idx2now = idx2cnt;
276 else
277 /* Nothing anymore. The backward sequence ended with
278 the last sequence in the string. Note that seq2len
279 is still zero. */
280 break;
282 else
283 idx2now = --backw2;
285 else
287 backw2_stop = idx2max;
289 while (*us2 != L('\0'))
291 int32_t tmp = findidx (&us2);
292 rule2arr[idx2max] = tmp >> 24;
293 idx2arr[idx2max] = tmp & 0xffffff;
294 idx2cnt = idx2max++;
296 if ((rulesets[rule2arr[idx2cnt] * nrules]
297 & sort_backward) == 0)
298 /* No more backward characters to push. */
299 break;
300 ++idx2cnt;
303 if (backw2_stop >= idx2cnt)
305 /* No sequence at all or just one. */
306 if (idx2cnt == idx2max || backw2_stop > idx2cnt)
307 /* Note that seq1len is still zero. */
308 break;
310 backw2_stop = ~0ul;
311 idx2now = idx2cnt;
313 else
314 /* We pushed backward sequences. */
315 idx2now = backw2 = idx2cnt - 1;
318 while ((seq2len = weights[idx2arr[idx2now]++]) == 0);
320 /* See whether any or both strings are empty. */
321 if (seq1len == 0 || seq2len == 0)
323 if (seq1len == seq2len)
324 /* Both ended. So far so good, both strings are equal at the
325 first level. */
326 break;
328 /* This means one string is shorter than the other. Find out
329 which one and return an appropriate value. */
330 result = seq1len == 0 ? -1 : 1;
331 goto free_and_return;
334 /* Test for position if necessary. */
335 if (position && val1 != val2)
337 result = val1 - val2;
338 goto free_and_return;
341 /* Compare the two sequences. */
344 if (weights[idx1arr[idx1now]] != weights[idx2arr[idx2now]])
346 /* The sequences differ. */
347 result = weights[idx1arr[idx1now]] - weights[idx2arr[idx2now]];
348 goto free_and_return;
351 /* Increment the offsets. */
352 ++idx1arr[idx1now];
353 ++idx2arr[idx2now];
355 --seq1len;
356 --seq2len;
358 while (seq1len > 0 && seq2len > 0);
360 if (position && seq1len != seq2len)
362 result = seq1len - seq2len;
363 goto free_and_return;
367 /* Now the remaining passes over the weights. We now use the
368 indeces we found before. */
369 for (pass = 1; pass < nrules; ++pass)
371 /* We assume that if a rule has defined `position' in one section
372 this is true for all of them. */
373 idx1cnt = 0;
374 idx2cnt = 0;
375 backw1_stop = ~0ul;
376 backw2_stop = ~0ul;
377 backw1 = ~0ul;
378 backw2 = ~0ul;
379 position = rulesets[rule1arr[0] * nrules + pass] & sort_position;
381 while (1)
383 val1 = 0;
384 val2 = 0;
386 /* Get the next non-IGNOREd element for string `s1'. */
387 if (seq1len == 0)
390 ++val1;
392 if (backw1_stop != ~0ul)
394 /* The is something pushed. */
395 if (backw1 == backw1_stop)
397 /* The last pushed character was handled. Continue
398 with forward characters. */
399 if (idx1cnt < idx1max)
400 idx1now = idx1cnt;
401 else
403 /* Nothing anymore. The backward sequence
404 ended with the last sequence in the string. */
405 idx1now = ~0ul;
406 break;
409 else
410 idx1now = --backw1;
412 else
414 backw1_stop = idx1cnt;
416 while (idx1cnt < idx1max)
418 if ((rulesets[rule1arr[idx1cnt] * nrules + pass]
419 & sort_backward) == 0)
420 /* No more backward characters to push. */
421 break;
422 ++idx1cnt;
425 if (backw1_stop == idx1cnt)
427 /* No sequence at all or just one. */
428 if (idx1cnt == idx1max)
429 /* Note that seq1len is still zero. */
430 break;
432 backw1_stop = ~0ul;
433 idx1now = idx1cnt++;
435 else
436 /* We pushed backward sequences. */
437 idx1now = backw1 = idx1cnt - 1;
440 while ((seq1len = weights[idx1arr[idx1now]++]) == 0);
442 /* And the same for string `s2'. */
443 if (seq2len == 0)
446 ++val2;
448 if (backw2_stop != ~0ul)
450 /* The is something pushed. */
451 if (backw2 == backw2_stop)
453 /* The last pushed character was handled. Continue
454 with forward characters. */
455 if (idx2cnt < idx2max)
456 idx2now = idx2cnt;
457 else
459 /* Nothing anymore. The backward sequence
460 ended with the last sequence in the string. */
461 idx2now = ~0ul;
462 break;
465 else
466 idx2now = --backw2;
468 else
470 backw2_stop = idx2cnt;
472 while (idx2cnt < idx2max)
474 if ((rulesets[rule2arr[idx2cnt] * nrules + pass]
475 & sort_backward) == 0)
476 /* No more backward characters to push. */
477 break;
478 ++idx2cnt;
481 if (backw2_stop == idx2cnt)
483 /* No sequence at all or just one. */
484 if (idx2cnt == idx2max)
485 /* Note that seq2len is still zero. */
486 break;
488 backw2_stop = ~0ul;
489 idx2now = idx2cnt++;
491 else
492 /* We pushed backward sequences. */
493 idx2now = backw2 = idx2cnt - 1;
496 while ((seq2len = weights[idx2arr[idx2now]++]) == 0);
498 /* See whether any or both strings are empty. */
499 if (seq1len == 0 || seq2len == 0)
501 if (seq1len == seq2len)
502 /* Both ended. So far so good, both strings are equal
503 at this level. */
504 break;
506 /* This means one string is shorter than the other. Find out
507 which one and return an appropriate value. */
508 result = seq1len == 0 ? -1 : 1;
509 goto free_and_return;
512 /* Test for position if necessary. */
513 if (position && val1 != val2)
515 result = val1 - val2;
516 goto free_and_return;
519 /* Compare the two sequences. */
522 if (weights[idx1arr[idx1now]] != weights[idx2arr[idx2now]])
524 /* The sequences differ. */
525 result = (weights[idx1arr[idx1now]]
526 - weights[idx2arr[idx2now]]);
527 goto free_and_return;
530 /* Increment the offsets. */
531 ++idx1arr[idx1now];
532 ++idx2arr[idx2now];
534 --seq1len;
535 --seq2len;
537 while (seq1len > 0 && seq2len > 0);
539 if (position && seq1len != seq2len)
541 result = seq1len - seq2len;
542 goto free_and_return;
547 /* Free the memory if needed. */
548 free_and_return:
549 if (use_malloc)
550 free (idx1arr);
552 return result;
554 #if !defined WIDE_CHAR_VERSION && !defined USE_IN_EXTENDED_LOCALE_MODEL
555 libc_hidden_def (strcoll)
556 #endif