Small optimizations to cancellable x86-64 code.
[glibc/pb-stable.git] / string / strcoll_l.c
blob8bd84b10aa27484c038a71e55cd4a9aae2cb8139
1 /* Copyright (C) 1995,96,97,2002, 2004, 2007 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Written by Ulrich Drepper <drepper@gnu.org>, 1995.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, write to the Free
17 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18 02111-1307 USA. */
21 #include <assert.h>
22 #include <langinfo.h>
23 #include <locale.h>
24 #include <stddef.h>
25 #include <stdint.h>
26 #include <stdlib.h>
27 #include <string.h>
29 #ifndef STRING_TYPE
30 # define STRING_TYPE char
31 # define USTRING_TYPE unsigned char
32 # define STRCOLL __strcoll_l
33 # define STRCMP strcmp
34 # define STRLEN strlen
35 # define WEIGHT_H "../locale/weight.h"
36 # define SUFFIX MB
37 # define L(arg) arg
38 #endif
40 #define CONCAT(a,b) CONCAT1(a,b)
41 #define CONCAT1(a,b) a##b
43 #include "../locale/localeinfo.h"
45 int
46 STRCOLL (s1, s2, l)
47 const STRING_TYPE *s1;
48 const STRING_TYPE *s2;
49 __locale_t l;
51 struct locale_data *current = l->__locales[LC_COLLATE];
52 uint_fast32_t nrules = current->values[_NL_ITEM_INDEX (_NL_COLLATE_NRULES)].word;
53 /* We don't assign the following values right away since it might be
54 unnecessary in case there are no rules. */
55 const unsigned char *rulesets;
56 const int32_t *table;
57 const USTRING_TYPE *weights;
58 const USTRING_TYPE *extra;
59 const int32_t *indirect;
60 uint_fast32_t pass;
61 int result = 0;
62 const USTRING_TYPE *us1;
63 const USTRING_TYPE *us2;
64 size_t s1len;
65 size_t s2len;
66 int32_t *idx1arr;
67 int32_t *idx2arr;
68 unsigned char *rule1arr;
69 unsigned char *rule2arr;
70 size_t idx1max;
71 size_t idx2max;
72 size_t idx1cnt;
73 size_t idx2cnt;
74 size_t idx1now;
75 size_t idx2now;
76 size_t backw1_stop;
77 size_t backw2_stop;
78 size_t backw1;
79 size_t backw2;
80 int val1;
81 int val2;
82 int position;
83 int seq1len;
84 int seq2len;
85 int use_malloc;
87 #include WEIGHT_H
89 if (nrules == 0)
90 return STRCMP (s1, s2);
92 rulesets = (const unsigned char *)
93 current->values[_NL_ITEM_INDEX (_NL_COLLATE_RULESETS)].string;
94 table = (const int32_t *)
95 current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_TABLE,SUFFIX))].string;
96 weights = (const USTRING_TYPE *)
97 current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_WEIGHT,SUFFIX))].string;
98 extra = (const USTRING_TYPE *)
99 current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_EXTRA,SUFFIX))].string;
100 indirect = (const int32_t *)
101 current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_INDIRECT,SUFFIX))].string;
102 use_malloc = 0;
104 assert (((uintptr_t) table) % __alignof__ (table[0]) == 0);
105 assert (((uintptr_t) weights) % __alignof__ (weights[0]) == 0);
106 assert (((uintptr_t) extra) % __alignof__ (extra[0]) == 0);
107 assert (((uintptr_t) indirect) % __alignof__ (indirect[0]) == 0);
109 /* We need this a few times. */
110 s1len = STRLEN (s1);
111 s2len = STRLEN (s2);
113 /* Catch empty strings. */
114 if (__builtin_expect (s1len == 0, 0) || __builtin_expect (s2len == 0, 0))
115 return (s1len != 0) - (s2len != 0);
117 /* We need the elements of the strings as unsigned values since they
118 are used as indeces. */
119 us1 = (const USTRING_TYPE *) s1;
120 us2 = (const USTRING_TYPE *) s2;
122 /* Perform the first pass over the string and while doing this find
123 and store the weights for each character. Since we want this to
124 be as fast as possible we are using `alloca' to store the temporary
125 values. But since there is no limit on the length of the string
126 we have to use `malloc' if the string is too long. We should be
127 very conservative here.
129 Please note that the localedef programs makes sure that `position'
130 is not used at the first level. */
131 if (! __libc_use_alloca ((s1len + s2len) * (sizeof (int32_t) + 1)))
133 idx1arr = (int32_t *) malloc ((s1len + s2len) * (sizeof (int32_t) + 1));
134 idx2arr = &idx1arr[s1len];
135 rule1arr = (unsigned char *) &idx2arr[s2len];
136 rule2arr = &rule1arr[s1len];
138 if (idx1arr == NULL)
139 /* No memory. Well, go with the stack then.
141 XXX Once this implementation is stable we will handle this
142 differently. Instead of precomputing the indeces we will
143 do this in time. This means, though, that this happens for
144 every pass again. */
145 goto try_stack;
146 use_malloc = 1;
148 else
150 try_stack:
151 idx1arr = (int32_t *) alloca (s1len * sizeof (int32_t));
152 idx2arr = (int32_t *) alloca (s2len * sizeof (int32_t));
153 rule1arr = (unsigned char *) alloca (s1len);
154 rule2arr = (unsigned char *) alloca (s2len);
157 idx1cnt = 0;
158 idx2cnt = 0;
159 idx1max = 0;
160 idx2max = 0;
161 idx1now = 0;
162 idx2now = 0;
163 backw1_stop = ~0ul;
164 backw2_stop = ~0ul;
165 backw1 = ~0ul;
166 backw2 = ~0ul;
167 seq1len = 0;
168 seq2len = 0;
169 position = rulesets[0] & sort_position;
170 while (1)
172 val1 = 0;
173 val2 = 0;
175 /* Get the next non-IGNOREd element for string `s1'. */
176 if (seq1len == 0)
179 ++val1;
181 if (backw1_stop != ~0ul)
183 /* The is something pushed. */
184 if (backw1 == backw1_stop)
186 /* The last pushed character was handled. Continue
187 with forward characters. */
188 if (idx1cnt < idx1max)
190 idx1now = idx1cnt;
191 backw1_stop = ~0ul;
193 else
194 /* Nothing anymore. The backward sequence ended with
195 the last sequence in the string. Note that seq1len
196 is still zero. */
197 break;
199 else
200 idx1now = --backw1;
202 else
204 backw1_stop = idx1max;
206 while (*us1 != L('\0'))
208 int32_t tmp = findidx (&us1);
209 rule1arr[idx1max] = tmp >> 24;
210 idx1arr[idx1max] = tmp & 0xffffff;
211 idx1cnt = idx1max++;
213 if ((rulesets[rule1arr[idx1cnt] * nrules]
214 & sort_backward) == 0)
215 /* No more backward characters to push. */
216 break;
217 ++idx1cnt;
220 if (backw1_stop >= idx1cnt)
222 /* No sequence at all or just one. */
223 if (idx1cnt == idx1max || backw1_stop > idx1cnt)
224 /* Note that seq1len is still zero. */
225 break;
227 backw1_stop = ~0ul;
228 idx1now = idx1cnt;
230 else
231 /* We pushed backward sequences. */
232 idx1now = backw1 = idx1cnt - 1;
235 while ((seq1len = weights[idx1arr[idx1now]++]) == 0);
237 /* And the same for string `s2'. */
238 if (seq2len == 0)
241 ++val2;
243 if (backw2_stop != ~0ul)
245 /* The is something pushed. */
246 if (backw2 == backw2_stop)
248 /* The last pushed character was handled. Continue
249 with forward characters. */
250 if (idx2cnt < idx2max)
252 idx2now = idx2cnt;
253 backw2_stop = ~0ul;
255 else
256 /* Nothing anymore. The backward sequence ended with
257 the last sequence in the string. Note that seq2len
258 is still zero. */
259 break;
261 else
262 idx2now = --backw2;
264 else
266 backw2_stop = idx2max;
268 while (*us2 != L('\0'))
270 int32_t tmp = findidx (&us2);
271 rule2arr[idx2max] = tmp >> 24;
272 idx2arr[idx2max] = tmp & 0xffffff;
273 idx2cnt = idx2max++;
275 if ((rulesets[rule2arr[idx2cnt] * nrules]
276 & sort_backward) == 0)
277 /* No more backward characters to push. */
278 break;
279 ++idx2cnt;
282 if (backw2_stop >= idx2cnt)
284 /* No sequence at all or just one. */
285 if (idx2cnt == idx2max || backw2_stop > idx2cnt)
286 /* Note that seq1len is still zero. */
287 break;
289 backw2_stop = ~0ul;
290 idx2now = idx2cnt;
292 else
293 /* We pushed backward sequences. */
294 idx2now = backw2 = idx2cnt - 1;
297 while ((seq2len = weights[idx2arr[idx2now]++]) == 0);
299 /* See whether any or both strings are empty. */
300 if (seq1len == 0 || seq2len == 0)
302 if (seq1len == seq2len)
303 /* Both ended. So far so good, both strings are equal at the
304 first level. */
305 break;
307 /* This means one string is shorter than the other. Find out
308 which one and return an appropriate value. */
309 result = seq1len == 0 ? -1 : 1;
310 goto free_and_return;
313 /* Test for position if necessary. */
314 if (position && val1 != val2)
316 result = val1 - val2;
317 goto free_and_return;
320 /* Compare the two sequences. */
323 if (weights[idx1arr[idx1now]] != weights[idx2arr[idx2now]])
325 /* The sequences differ. */
326 result = weights[idx1arr[idx1now]] - weights[idx2arr[idx2now]];
327 goto free_and_return;
330 /* Increment the offsets. */
331 ++idx1arr[idx1now];
332 ++idx2arr[idx2now];
334 --seq1len;
335 --seq2len;
337 while (seq1len > 0 && seq2len > 0);
339 if (position && seq1len != seq2len)
341 result = seq1len - seq2len;
342 goto free_and_return;
346 /* Now the remaining passes over the weights. We now use the
347 indeces we found before. */
348 for (pass = 1; pass < nrules; ++pass)
350 /* We assume that if a rule has defined `position' in one section
351 this is true for all of them. */
352 idx1cnt = 0;
353 idx2cnt = 0;
354 backw1_stop = ~0ul;
355 backw2_stop = ~0ul;
356 backw1 = ~0ul;
357 backw2 = ~0ul;
358 position = rulesets[rule1arr[0] * nrules + pass] & sort_position;
360 while (1)
362 val1 = 0;
363 val2 = 0;
365 /* Get the next non-IGNOREd element for string `s1'. */
366 if (seq1len == 0)
369 ++val1;
371 if (backw1_stop != ~0ul)
373 /* The is something pushed. */
374 if (backw1 == backw1_stop)
376 /* The last pushed character was handled. Continue
377 with forward characters. */
378 if (idx1cnt < idx1max)
380 idx1now = idx1cnt;
381 backw1_stop = ~0ul;
383 else
385 /* Nothing anymore. The backward sequence
386 ended with the last sequence in the string. */
387 idx1now = ~0ul;
388 break;
391 else
392 idx1now = --backw1;
394 else
396 backw1_stop = idx1cnt;
398 while (idx1cnt < idx1max)
400 if ((rulesets[rule1arr[idx1cnt] * nrules + pass]
401 & sort_backward) == 0)
402 /* No more backward characters to push. */
403 break;
404 ++idx1cnt;
407 if (backw1_stop == idx1cnt)
409 /* No sequence at all or just one. */
410 if (idx1cnt == idx1max)
411 /* Note that seq1len is still zero. */
412 break;
414 backw1_stop = ~0ul;
415 idx1now = idx1cnt++;
417 else
418 /* We pushed backward sequences. */
419 idx1now = backw1 = idx1cnt - 1;
422 while ((seq1len = weights[idx1arr[idx1now]++]) == 0);
424 /* And the same for string `s2'. */
425 if (seq2len == 0)
428 ++val2;
430 if (backw2_stop != ~0ul)
432 /* The is something pushed. */
433 if (backw2 == backw2_stop)
435 /* The last pushed character was handled. Continue
436 with forward characters. */
437 if (idx2cnt < idx2max)
439 idx2now = idx2cnt;
440 backw2_stop = ~0ul;
442 else
444 /* Nothing anymore. The backward sequence
445 ended with the last sequence in the string. */
446 idx2now = ~0ul;
447 break;
450 else
451 idx2now = --backw2;
453 else
455 backw2_stop = idx2cnt;
457 while (idx2cnt < idx2max)
459 if ((rulesets[rule2arr[idx2cnt] * nrules + pass]
460 & sort_backward) == 0)
461 /* No more backward characters to push. */
462 break;
463 ++idx2cnt;
466 if (backw2_stop == idx2cnt)
468 /* No sequence at all or just one. */
469 if (idx2cnt == idx2max)
470 /* Note that seq2len is still zero. */
471 break;
473 backw2_stop = ~0ul;
474 idx2now = idx2cnt++;
476 else
477 /* We pushed backward sequences. */
478 idx2now = backw2 = idx2cnt - 1;
481 while ((seq2len = weights[idx2arr[idx2now]++]) == 0);
483 /* See whether any or both strings are empty. */
484 if (seq1len == 0 || seq2len == 0)
486 if (seq1len == seq2len)
487 /* Both ended. So far so good, both strings are equal
488 at this level. */
489 break;
491 /* This means one string is shorter than the other. Find out
492 which one and return an appropriate value. */
493 result = seq1len == 0 ? -1 : 1;
494 goto free_and_return;
497 /* Test for position if necessary. */
498 if (position && val1 != val2)
500 result = val1 - val2;
501 goto free_and_return;
504 /* Compare the two sequences. */
507 if (weights[idx1arr[idx1now]] != weights[idx2arr[idx2now]])
509 /* The sequences differ. */
510 result = (weights[idx1arr[idx1now]]
511 - weights[idx2arr[idx2now]]);
512 goto free_and_return;
515 /* Increment the offsets. */
516 ++idx1arr[idx1now];
517 ++idx2arr[idx2now];
519 --seq1len;
520 --seq2len;
522 while (seq1len > 0 && seq2len > 0);
524 if (position && seq1len != seq2len)
526 result = seq1len - seq2len;
527 goto free_and_return;
532 /* Free the memory if needed. */
533 free_and_return:
534 if (use_malloc)
535 free (idx1arr);
537 return result;
539 libc_hidden_def (STRCOLL)
541 #ifndef WIDE_CHAR_VERSION
542 weak_alias (__strcoll_l, strcoll_l)
543 #endif