1 /* Copyright (C) 1995-2013 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Written by Ulrich Drepper <drepper@gnu.org>, 1995.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
29 # define STRING_TYPE char
30 # define USTRING_TYPE unsigned char
31 # define STRCOLL __strcoll_l
32 # define STRCMP strcmp
33 # define STRLEN strlen
34 # define WEIGHT_H "../locale/weight.h"
39 #define CONCAT(a,b) CONCAT1(a,b)
40 #define CONCAT1(a,b) a##b
42 #include "../locale/localeinfo.h"
46 const STRING_TYPE
*s1
;
47 const STRING_TYPE
*s2
;
50 struct __locale_data
*current
= l
->__locales
[LC_COLLATE
];
51 uint_fast32_t nrules
= current
->values
[_NL_ITEM_INDEX (_NL_COLLATE_NRULES
)].word
;
52 /* We don't assign the following values right away since it might be
53 unnecessary in case there are no rules. */
54 const unsigned char *rulesets
;
56 const USTRING_TYPE
*weights
;
57 const USTRING_TYPE
*extra
;
58 const int32_t *indirect
;
61 const USTRING_TYPE
*us1
;
62 const USTRING_TYPE
*us2
;
67 unsigned char *rule1arr
;
68 unsigned char *rule2arr
;
89 return STRCMP (s1
, s2
);
91 rulesets
= (const unsigned char *)
92 current
->values
[_NL_ITEM_INDEX (_NL_COLLATE_RULESETS
)].string
;
93 table
= (const int32_t *)
94 current
->values
[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_TABLE
,SUFFIX
))].string
;
95 weights
= (const USTRING_TYPE
*)
96 current
->values
[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_WEIGHT
,SUFFIX
))].string
;
97 extra
= (const USTRING_TYPE
*)
98 current
->values
[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_EXTRA
,SUFFIX
))].string
;
99 indirect
= (const int32_t *)
100 current
->values
[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_INDIRECT
,SUFFIX
))].string
;
103 assert (((uintptr_t) table
) % __alignof__ (table
[0]) == 0);
104 assert (((uintptr_t) weights
) % __alignof__ (weights
[0]) == 0);
105 assert (((uintptr_t) extra
) % __alignof__ (extra
[0]) == 0);
106 assert (((uintptr_t) indirect
) % __alignof__ (indirect
[0]) == 0);
108 /* We need this a few times. */
112 /* Catch empty strings. */
113 if (__builtin_expect (s1len
== 0, 0) || __builtin_expect (s2len
== 0, 0))
114 return (s1len
!= 0) - (s2len
!= 0);
116 /* We need the elements of the strings as unsigned values since they
117 are used as indeces. */
118 us1
= (const USTRING_TYPE
*) s1
;
119 us2
= (const USTRING_TYPE
*) s2
;
121 /* Perform the first pass over the string and while doing this find
122 and store the weights for each character. Since we want this to
123 be as fast as possible we are using `alloca' to store the temporary
124 values. But since there is no limit on the length of the string
125 we have to use `malloc' if the string is too long. We should be
126 very conservative here.
128 Please note that the localedef programs makes sure that `position'
129 is not used at the first level. */
130 if (! __libc_use_alloca ((s1len
+ s2len
) * (sizeof (int32_t) + 1)))
132 idx1arr
= (int32_t *) malloc ((s1len
+ s2len
) * (sizeof (int32_t) + 1));
133 idx2arr
= &idx1arr
[s1len
];
134 rule1arr
= (unsigned char *) &idx2arr
[s2len
];
135 rule2arr
= &rule1arr
[s1len
];
138 /* No memory. Well, go with the stack then.
140 XXX Once this implementation is stable we will handle this
141 differently. Instead of precomputing the indeces we will
142 do this in time. This means, though, that this happens for
150 idx1arr
= (int32_t *) alloca (s1len
* sizeof (int32_t));
151 idx2arr
= (int32_t *) alloca (s2len
* sizeof (int32_t));
152 rule1arr
= (unsigned char *) alloca (s1len
);
153 rule2arr
= (unsigned char *) alloca (s2len
);
168 position
= rulesets
[0] & sort_position
;
174 /* Get the next non-IGNOREd element for string `s1'. */
180 if (backw1_stop
!= ~0ul)
182 /* The is something pushed. */
183 if (backw1
== backw1_stop
)
185 /* The last pushed character was handled. Continue
186 with forward characters. */
187 if (idx1cnt
< idx1max
)
193 /* Nothing anymore. The backward sequence ended with
194 the last sequence in the string. Note that seq1len
203 backw1_stop
= idx1max
;
205 while (*us1
!= L('\0'))
207 int32_t tmp
= findidx (&us1
, -1);
208 rule1arr
[idx1max
] = tmp
>> 24;
209 idx1arr
[idx1max
] = tmp
& 0xffffff;
212 if ((rulesets
[rule1arr
[idx1cnt
] * nrules
]
213 & sort_backward
) == 0)
214 /* No more backward characters to push. */
219 if (backw1_stop
>= idx1cnt
)
221 /* No sequence at all or just one. */
222 if (idx1cnt
== idx1max
|| backw1_stop
> idx1cnt
)
223 /* Note that seq1len is still zero. */
230 /* We pushed backward sequences. */
231 idx1now
= backw1
= idx1cnt
- 1;
234 while ((seq1len
= weights
[idx1arr
[idx1now
]++]) == 0);
236 /* And the same for string `s2'. */
242 if (backw2_stop
!= ~0ul)
244 /* The is something pushed. */
245 if (backw2
== backw2_stop
)
247 /* The last pushed character was handled. Continue
248 with forward characters. */
249 if (idx2cnt
< idx2max
)
255 /* Nothing anymore. The backward sequence ended with
256 the last sequence in the string. Note that seq2len
265 backw2_stop
= idx2max
;
267 while (*us2
!= L('\0'))
269 int32_t tmp
= findidx (&us2
, -1);
270 rule2arr
[idx2max
] = tmp
>> 24;
271 idx2arr
[idx2max
] = tmp
& 0xffffff;
274 if ((rulesets
[rule2arr
[idx2cnt
] * nrules
]
275 & sort_backward
) == 0)
276 /* No more backward characters to push. */
281 if (backw2_stop
>= idx2cnt
)
283 /* No sequence at all or just one. */
284 if (idx2cnt
== idx2max
|| backw2_stop
> idx2cnt
)
285 /* Note that seq1len is still zero. */
292 /* We pushed backward sequences. */
293 idx2now
= backw2
= idx2cnt
- 1;
296 while ((seq2len
= weights
[idx2arr
[idx2now
]++]) == 0);
298 /* See whether any or both strings are empty. */
299 if (seq1len
== 0 || seq2len
== 0)
301 if (seq1len
== seq2len
)
302 /* Both ended. So far so good, both strings are equal at the
306 /* This means one string is shorter than the other. Find out
307 which one and return an appropriate value. */
308 result
= seq1len
== 0 ? -1 : 1;
309 goto free_and_return
;
312 /* Test for position if necessary. */
313 if (position
&& val1
!= val2
)
315 result
= val1
- val2
;
316 goto free_and_return
;
319 /* Compare the two sequences. */
322 if (weights
[idx1arr
[idx1now
]] != weights
[idx2arr
[idx2now
]])
324 /* The sequences differ. */
325 result
= weights
[idx1arr
[idx1now
]] - weights
[idx2arr
[idx2now
]];
326 goto free_and_return
;
329 /* Increment the offsets. */
336 while (seq1len
> 0 && seq2len
> 0);
338 if (position
&& seq1len
!= seq2len
)
340 result
= seq1len
- seq2len
;
341 goto free_and_return
;
345 /* Now the remaining passes over the weights. We now use the
346 indeces we found before. */
347 for (pass
= 1; pass
< nrules
; ++pass
)
349 /* We assume that if a rule has defined `position' in one section
350 this is true for all of them. */
357 position
= rulesets
[rule1arr
[0] * nrules
+ pass
] & sort_position
;
364 /* Get the next non-IGNOREd element for string `s1'. */
370 if (backw1_stop
!= ~0ul)
372 /* The is something pushed. */
373 if (backw1
== backw1_stop
)
375 /* The last pushed character was handled. Continue
376 with forward characters. */
377 if (idx1cnt
< idx1max
)
384 /* Nothing anymore. The backward sequence
385 ended with the last sequence in the string. */
395 backw1_stop
= idx1cnt
;
397 while (idx1cnt
< idx1max
)
399 if ((rulesets
[rule1arr
[idx1cnt
] * nrules
+ pass
]
400 & sort_backward
) == 0)
401 /* No more backward characters to push. */
406 if (backw1_stop
== idx1cnt
)
408 /* No sequence at all or just one. */
409 if (idx1cnt
== idx1max
)
410 /* Note that seq1len is still zero. */
417 /* We pushed backward sequences. */
418 idx1now
= backw1
= idx1cnt
- 1;
421 while ((seq1len
= weights
[idx1arr
[idx1now
]++]) == 0);
423 /* And the same for string `s2'. */
429 if (backw2_stop
!= ~0ul)
431 /* The is something pushed. */
432 if (backw2
== backw2_stop
)
434 /* The last pushed character was handled. Continue
435 with forward characters. */
436 if (idx2cnt
< idx2max
)
443 /* Nothing anymore. The backward sequence
444 ended with the last sequence in the string. */
454 backw2_stop
= idx2cnt
;
456 while (idx2cnt
< idx2max
)
458 if ((rulesets
[rule2arr
[idx2cnt
] * nrules
+ pass
]
459 & sort_backward
) == 0)
460 /* No more backward characters to push. */
465 if (backw2_stop
== idx2cnt
)
467 /* No sequence at all or just one. */
468 if (idx2cnt
== idx2max
)
469 /* Note that seq2len is still zero. */
476 /* We pushed backward sequences. */
477 idx2now
= backw2
= idx2cnt
- 1;
480 while ((seq2len
= weights
[idx2arr
[idx2now
]++]) == 0);
482 /* See whether any or both strings are empty. */
483 if (seq1len
== 0 || seq2len
== 0)
485 if (seq1len
== seq2len
)
486 /* Both ended. So far so good, both strings are equal
490 /* This means one string is shorter than the other. Find out
491 which one and return an appropriate value. */
492 result
= seq1len
== 0 ? -1 : 1;
493 goto free_and_return
;
496 /* Test for position if necessary. */
497 if (position
&& val1
!= val2
)
499 result
= val1
- val2
;
500 goto free_and_return
;
503 /* Compare the two sequences. */
506 if (weights
[idx1arr
[idx1now
]] != weights
[idx2arr
[idx2now
]])
508 /* The sequences differ. */
509 result
= (weights
[idx1arr
[idx1now
]]
510 - weights
[idx2arr
[idx2now
]]);
511 goto free_and_return
;
514 /* Increment the offsets. */
521 while (seq1len
> 0 && seq2len
> 0);
523 if (position
&& seq1len
!= seq2len
)
525 result
= seq1len
- seq2len
;
526 goto free_and_return
;
531 /* Free the memory if needed. */
538 libc_hidden_def (STRCOLL
)
540 #ifndef WIDE_CHAR_VERSION
541 weak_alias (__strcoll_l
, strcoll_l
)