1 /* Copyright (C) 1995,96,97,2002, 2004, 2007 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Written by Ulrich Drepper <drepper@gnu.org>, 1995.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, write to the Free
17 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
30 # define STRING_TYPE char
31 # define USTRING_TYPE unsigned char
32 # define STRCOLL __strcoll_l
33 # define STRCMP strcmp
34 # define STRLEN strlen
35 # define WEIGHT_H "../locale/weight.h"
40 #define CONCAT(a,b) CONCAT1(a,b)
41 #define CONCAT1(a,b) a##b
43 #include "../locale/localeinfo.h"
47 const STRING_TYPE
*s1
;
48 const STRING_TYPE
*s2
;
51 struct locale_data
*current
= l
->__locales
[LC_COLLATE
];
52 uint_fast32_t nrules
= current
->values
[_NL_ITEM_INDEX (_NL_COLLATE_NRULES
)].word
;
53 /* We don't assign the following values right away since it might be
54 unnecessary in case there are no rules. */
55 const unsigned char *rulesets
;
57 const USTRING_TYPE
*weights
;
58 const USTRING_TYPE
*extra
;
59 const int32_t *indirect
;
62 const USTRING_TYPE
*us1
;
63 const USTRING_TYPE
*us2
;
68 unsigned char *rule1arr
;
69 unsigned char *rule2arr
;
90 return STRCMP (s1
, s2
);
92 rulesets
= (const unsigned char *)
93 current
->values
[_NL_ITEM_INDEX (_NL_COLLATE_RULESETS
)].string
;
94 table
= (const int32_t *)
95 current
->values
[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_TABLE
,SUFFIX
))].string
;
96 weights
= (const USTRING_TYPE
*)
97 current
->values
[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_WEIGHT
,SUFFIX
))].string
;
98 extra
= (const USTRING_TYPE
*)
99 current
->values
[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_EXTRA
,SUFFIX
))].string
;
100 indirect
= (const int32_t *)
101 current
->values
[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_INDIRECT
,SUFFIX
))].string
;
104 assert (((uintptr_t) table
) % __alignof__ (table
[0]) == 0);
105 assert (((uintptr_t) weights
) % __alignof__ (weights
[0]) == 0);
106 assert (((uintptr_t) extra
) % __alignof__ (extra
[0]) == 0);
107 assert (((uintptr_t) indirect
) % __alignof__ (indirect
[0]) == 0);
109 /* We need this a few times. */
113 /* Catch empty strings. */
114 if (__builtin_expect (s1len
== 0, 0) || __builtin_expect (s2len
== 0, 0))
115 return (s1len
!= 0) - (s2len
!= 0);
117 /* We need the elements of the strings as unsigned values since they
118 are used as indeces. */
119 us1
= (const USTRING_TYPE
*) s1
;
120 us2
= (const USTRING_TYPE
*) s2
;
122 /* Perform the first pass over the string and while doing this find
123 and store the weights for each character. Since we want this to
124 be as fast as possible we are using `alloca' to store the temporary
125 values. But since there is no limit on the length of the string
126 we have to use `malloc' if the string is too long. We should be
127 very conservative here.
129 Please note that the localedef programs makes sure that `position'
130 is not used at the first level. */
131 if (! __libc_use_alloca ((s1len
+ s2len
) * (sizeof (int32_t) + 1)))
133 idx1arr
= (int32_t *) malloc ((s1len
+ s2len
) * (sizeof (int32_t) + 1));
134 idx2arr
= &idx1arr
[s1len
];
135 rule1arr
= (unsigned char *) &idx2arr
[s2len
];
136 rule2arr
= &rule1arr
[s1len
];
139 /* No memory. Well, go with the stack then.
141 XXX Once this implementation is stable we will handle this
142 differently. Instead of precomputing the indeces we will
143 do this in time. This means, though, that this happens for
151 idx1arr
= (int32_t *) alloca (s1len
* sizeof (int32_t));
152 idx2arr
= (int32_t *) alloca (s2len
* sizeof (int32_t));
153 rule1arr
= (unsigned char *) alloca (s1len
);
154 rule2arr
= (unsigned char *) alloca (s2len
);
169 position
= rulesets
[0] & sort_position
;
175 /* Get the next non-IGNOREd element for string `s1'. */
181 if (backw1_stop
!= ~0ul)
183 /* The is something pushed. */
184 if (backw1
== backw1_stop
)
186 /* The last pushed character was handled. Continue
187 with forward characters. */
188 if (idx1cnt
< idx1max
)
194 /* Nothing anymore. The backward sequence ended with
195 the last sequence in the string. Note that seq1len
204 backw1_stop
= idx1max
;
206 while (*us1
!= L('\0'))
208 int32_t tmp
= findidx (&us1
);
209 rule1arr
[idx1max
] = tmp
>> 24;
210 idx1arr
[idx1max
] = tmp
& 0xffffff;
213 if ((rulesets
[rule1arr
[idx1cnt
] * nrules
]
214 & sort_backward
) == 0)
215 /* No more backward characters to push. */
220 if (backw1_stop
>= idx1cnt
)
222 /* No sequence at all or just one. */
223 if (idx1cnt
== idx1max
|| backw1_stop
> idx1cnt
)
224 /* Note that seq1len is still zero. */
231 /* We pushed backward sequences. */
232 idx1now
= backw1
= idx1cnt
- 1;
235 while ((seq1len
= weights
[idx1arr
[idx1now
]++]) == 0);
237 /* And the same for string `s2'. */
243 if (backw2_stop
!= ~0ul)
245 /* The is something pushed. */
246 if (backw2
== backw2_stop
)
248 /* The last pushed character was handled. Continue
249 with forward characters. */
250 if (idx2cnt
< idx2max
)
256 /* Nothing anymore. The backward sequence ended with
257 the last sequence in the string. Note that seq2len
266 backw2_stop
= idx2max
;
268 while (*us2
!= L('\0'))
270 int32_t tmp
= findidx (&us2
);
271 rule2arr
[idx2max
] = tmp
>> 24;
272 idx2arr
[idx2max
] = tmp
& 0xffffff;
275 if ((rulesets
[rule2arr
[idx2cnt
] * nrules
]
276 & sort_backward
) == 0)
277 /* No more backward characters to push. */
282 if (backw2_stop
>= idx2cnt
)
284 /* No sequence at all or just one. */
285 if (idx2cnt
== idx2max
|| backw2_stop
> idx2cnt
)
286 /* Note that seq1len is still zero. */
293 /* We pushed backward sequences. */
294 idx2now
= backw2
= idx2cnt
- 1;
297 while ((seq2len
= weights
[idx2arr
[idx2now
]++]) == 0);
299 /* See whether any or both strings are empty. */
300 if (seq1len
== 0 || seq2len
== 0)
302 if (seq1len
== seq2len
)
303 /* Both ended. So far so good, both strings are equal at the
307 /* This means one string is shorter than the other. Find out
308 which one and return an appropriate value. */
309 result
= seq1len
== 0 ? -1 : 1;
310 goto free_and_return
;
313 /* Test for position if necessary. */
314 if (position
&& val1
!= val2
)
316 result
= val1
- val2
;
317 goto free_and_return
;
320 /* Compare the two sequences. */
323 if (weights
[idx1arr
[idx1now
]] != weights
[idx2arr
[idx2now
]])
325 /* The sequences differ. */
326 result
= weights
[idx1arr
[idx1now
]] - weights
[idx2arr
[idx2now
]];
327 goto free_and_return
;
330 /* Increment the offsets. */
337 while (seq1len
> 0 && seq2len
> 0);
339 if (position
&& seq1len
!= seq2len
)
341 result
= seq1len
- seq2len
;
342 goto free_and_return
;
346 /* Now the remaining passes over the weights. We now use the
347 indeces we found before. */
348 for (pass
= 1; pass
< nrules
; ++pass
)
350 /* We assume that if a rule has defined `position' in one section
351 this is true for all of them. */
358 position
= rulesets
[rule1arr
[0] * nrules
+ pass
] & sort_position
;
365 /* Get the next non-IGNOREd element for string `s1'. */
371 if (backw1_stop
!= ~0ul)
373 /* The is something pushed. */
374 if (backw1
== backw1_stop
)
376 /* The last pushed character was handled. Continue
377 with forward characters. */
378 if (idx1cnt
< idx1max
)
385 /* Nothing anymore. The backward sequence
386 ended with the last sequence in the string. */
396 backw1_stop
= idx1cnt
;
398 while (idx1cnt
< idx1max
)
400 if ((rulesets
[rule1arr
[idx1cnt
] * nrules
+ pass
]
401 & sort_backward
) == 0)
402 /* No more backward characters to push. */
407 if (backw1_stop
== idx1cnt
)
409 /* No sequence at all or just one. */
410 if (idx1cnt
== idx1max
)
411 /* Note that seq1len is still zero. */
418 /* We pushed backward sequences. */
419 idx1now
= backw1
= idx1cnt
- 1;
422 while ((seq1len
= weights
[idx1arr
[idx1now
]++]) == 0);
424 /* And the same for string `s2'. */
430 if (backw2_stop
!= ~0ul)
432 /* The is something pushed. */
433 if (backw2
== backw2_stop
)
435 /* The last pushed character was handled. Continue
436 with forward characters. */
437 if (idx2cnt
< idx2max
)
444 /* Nothing anymore. The backward sequence
445 ended with the last sequence in the string. */
455 backw2_stop
= idx2cnt
;
457 while (idx2cnt
< idx2max
)
459 if ((rulesets
[rule2arr
[idx2cnt
] * nrules
+ pass
]
460 & sort_backward
) == 0)
461 /* No more backward characters to push. */
466 if (backw2_stop
== idx2cnt
)
468 /* No sequence at all or just one. */
469 if (idx2cnt
== idx2max
)
470 /* Note that seq2len is still zero. */
477 /* We pushed backward sequences. */
478 idx2now
= backw2
= idx2cnt
- 1;
481 while ((seq2len
= weights
[idx2arr
[idx2now
]++]) == 0);
483 /* See whether any or both strings are empty. */
484 if (seq1len
== 0 || seq2len
== 0)
486 if (seq1len
== seq2len
)
487 /* Both ended. So far so good, both strings are equal
491 /* This means one string is shorter than the other. Find out
492 which one and return an appropriate value. */
493 result
= seq1len
== 0 ? -1 : 1;
494 goto free_and_return
;
497 /* Test for position if necessary. */
498 if (position
&& val1
!= val2
)
500 result
= val1
- val2
;
501 goto free_and_return
;
504 /* Compare the two sequences. */
507 if (weights
[idx1arr
[idx1now
]] != weights
[idx2arr
[idx2now
]])
509 /* The sequences differ. */
510 result
= (weights
[idx1arr
[idx1now
]]
511 - weights
[idx2arr
[idx2now
]]);
512 goto free_and_return
;
515 /* Increment the offsets. */
522 while (seq1len
> 0 && seq2len
> 0);
524 if (position
&& seq1len
!= seq2len
)
526 result
= seq1len
- seq2len
;
527 goto free_and_return
;
532 /* Free the memory if needed. */
539 libc_hidden_def (STRCOLL
)
541 #ifndef WIDE_CHAR_VERSION
542 weak_alias (__strcoll_l
, strcoll_l
)