1 /* Copyright (C) 1995,96,97,2002, 2004 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Written by Ulrich Drepper <drepper@gnu.org>, 1995.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, write to the Free
17 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
30 # define STRING_TYPE char
31 # define USTRING_TYPE unsigned char
32 # define STRCOLL __strcoll_l
33 # define STRCMP strcmp
34 # define STRLEN strlen
35 # define WEIGHT_H "../locale/weight.h"
40 #define CONCAT(a,b) CONCAT1(a,b)
41 #define CONCAT1(a,b) a##b
43 #include "../locale/localeinfo.h"
47 const STRING_TYPE
*s1
;
48 const STRING_TYPE
*s2
;
51 struct locale_data
*current
= l
->__locales
[LC_COLLATE
];
52 uint_fast32_t nrules
= current
->values
[_NL_ITEM_INDEX (_NL_COLLATE_NRULES
)].word
;
53 /* We don't assign the following values right away since it might be
54 unnecessary in case there are no rules. */
55 const unsigned char *rulesets
;
57 const USTRING_TYPE
*weights
;
58 const USTRING_TYPE
*extra
;
59 const int32_t *indirect
;
62 const USTRING_TYPE
*us1
;
63 const USTRING_TYPE
*us2
;
68 unsigned char *rule1arr
;
69 unsigned char *rule2arr
;
90 return STRCMP (s1
, s2
);
92 rulesets
= (const unsigned char *)
93 current
->values
[_NL_ITEM_INDEX (_NL_COLLATE_RULESETS
)].string
;
94 table
= (const int32_t *)
95 current
->values
[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_TABLE
,SUFFIX
))].string
;
96 weights
= (const USTRING_TYPE
*)
97 current
->values
[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_WEIGHT
,SUFFIX
))].string
;
98 extra
= (const USTRING_TYPE
*)
99 current
->values
[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_EXTRA
,SUFFIX
))].string
;
100 indirect
= (const int32_t *)
101 current
->values
[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_INDIRECT
,SUFFIX
))].string
;
104 assert (((uintptr_t) table
) % __alignof__ (table
[0]) == 0);
105 assert (((uintptr_t) weights
) % __alignof__ (weights
[0]) == 0);
106 assert (((uintptr_t) extra
) % __alignof__ (extra
[0]) == 0);
107 assert (((uintptr_t) indirect
) % __alignof__ (indirect
[0]) == 0);
109 /* We need this a few times. */
113 /* Catch empty strings. */
114 if (__builtin_expect (s1len
== 0, 0) || __builtin_expect (s2len
== 0, 0))
115 return (s1len
!= 0) - (s2len
!= 0);
117 /* We need the elements of the strings as unsigned values since they
118 are used as indeces. */
119 us1
= (const USTRING_TYPE
*) s1
;
120 us2
= (const USTRING_TYPE
*) s2
;
122 /* Perform the first pass over the string and while doing this find
123 and store the weights for each character. Since we want this to
124 be as fast as possible we are using `alloca' to store the temporary
125 values. But since there is no limit on the length of the string
126 we have to use `malloc' if the string is too long. We should be
127 very conservative here.
129 Please note that the localedef programs makes sure that `position'
130 is not used at the first level. */
131 if (! __libc_use_alloca (s1len
+ s2len
))
133 idx1arr
= (int32_t *) malloc ((s1len
+ s2len
) * (sizeof (int32_t) + 1));
134 idx2arr
= &idx1arr
[s1len
];
135 rule1arr
= (unsigned char *) &idx2arr
[s2len
];
136 rule2arr
= &rule1arr
[s1len
];
139 /* No memory. Well, go with the stack then.
141 XXX Once this implementation is stable we will handle this
142 differently. Instead of precomputing the indeces we will
143 do this in time. This means, though, that this happens for
151 idx1arr
= (int32_t *) alloca (s1len
* sizeof (int32_t));
152 idx2arr
= (int32_t *) alloca (s2len
* sizeof (int32_t));
153 rule1arr
= (unsigned char *) alloca (s1len
);
154 rule2arr
= (unsigned char *) alloca (s2len
);
169 position
= rulesets
[0] & sort_position
;
175 /* Get the next non-IGNOREd element for string `s1'. */
181 if (backw1_stop
!= ~0ul)
183 /* The is something pushed. */
184 if (backw1
== backw1_stop
)
186 /* The last pushed character was handled. Continue
187 with forward characters. */
188 if (idx1cnt
< idx1max
)
191 /* Nothing anymore. The backward sequence ended with
192 the last sequence in the string. Note that seq1len
201 backw1_stop
= idx1max
;
203 while (*us1
!= L('\0'))
205 int32_t tmp
= findidx (&us1
);
206 rule1arr
[idx1max
] = tmp
>> 24;
207 idx1arr
[idx1max
] = tmp
& 0xffffff;
210 if ((rulesets
[rule1arr
[idx1cnt
] * nrules
]
211 & sort_backward
) == 0)
212 /* No more backward characters to push. */
217 if (backw1_stop
>= idx1cnt
)
219 /* No sequence at all or just one. */
220 if (idx1cnt
== idx1max
|| backw1_stop
> idx1cnt
)
221 /* Note that seq1len is still zero. */
228 /* We pushed backward sequences. */
229 idx1now
= backw1
= idx1cnt
- 1;
232 while ((seq1len
= weights
[idx1arr
[idx1now
]++]) == 0);
234 /* And the same for string `s2'. */
240 if (backw2_stop
!= ~0ul)
242 /* The is something pushed. */
243 if (backw2
== backw2_stop
)
245 /* The last pushed character was handled. Continue
246 with forward characters. */
247 if (idx2cnt
< idx2max
)
250 /* Nothing anymore. The backward sequence ended with
251 the last sequence in the string. Note that seq2len
260 backw2_stop
= idx2max
;
262 while (*us2
!= L('\0'))
264 int32_t tmp
= findidx (&us2
);
265 rule2arr
[idx2max
] = tmp
>> 24;
266 idx2arr
[idx2max
] = tmp
& 0xffffff;
269 if ((rulesets
[rule2arr
[idx2cnt
] * nrules
]
270 & sort_backward
) == 0)
271 /* No more backward characters to push. */
276 if (backw2_stop
>= idx2cnt
)
278 /* No sequence at all or just one. */
279 if (idx2cnt
== idx2max
|| backw2_stop
> idx2cnt
)
280 /* Note that seq1len is still zero. */
287 /* We pushed backward sequences. */
288 idx2now
= backw2
= idx2cnt
- 1;
291 while ((seq2len
= weights
[idx2arr
[idx2now
]++]) == 0);
293 /* See whether any or both strings are empty. */
294 if (seq1len
== 0 || seq2len
== 0)
296 if (seq1len
== seq2len
)
297 /* Both ended. So far so good, both strings are equal at the
301 /* This means one string is shorter than the other. Find out
302 which one and return an appropriate value. */
303 result
= seq1len
== 0 ? -1 : 1;
304 goto free_and_return
;
307 /* Test for position if necessary. */
308 if (position
&& val1
!= val2
)
310 result
= val1
- val2
;
311 goto free_and_return
;
314 /* Compare the two sequences. */
317 if (weights
[idx1arr
[idx1now
]] != weights
[idx2arr
[idx2now
]])
319 /* The sequences differ. */
320 result
= weights
[idx1arr
[idx1now
]] - weights
[idx2arr
[idx2now
]];
321 goto free_and_return
;
324 /* Increment the offsets. */
331 while (seq1len
> 0 && seq2len
> 0);
333 if (position
&& seq1len
!= seq2len
)
335 result
= seq1len
- seq2len
;
336 goto free_and_return
;
340 /* Now the remaining passes over the weights. We now use the
341 indeces we found before. */
342 for (pass
= 1; pass
< nrules
; ++pass
)
344 /* We assume that if a rule has defined `position' in one section
345 this is true for all of them. */
352 position
= rulesets
[rule1arr
[0] * nrules
+ pass
] & sort_position
;
359 /* Get the next non-IGNOREd element for string `s1'. */
365 if (backw1_stop
!= ~0ul)
367 /* The is something pushed. */
368 if (backw1
== backw1_stop
)
370 /* The last pushed character was handled. Continue
371 with forward characters. */
372 if (idx1cnt
< idx1max
)
376 /* Nothing anymore. The backward sequence
377 ended with the last sequence in the string. */
387 backw1_stop
= idx1cnt
;
389 while (idx1cnt
< idx1max
)
391 if ((rulesets
[rule1arr
[idx1cnt
] * nrules
+ pass
]
392 & sort_backward
) == 0)
393 /* No more backward characters to push. */
398 if (backw1_stop
== idx1cnt
)
400 /* No sequence at all or just one. */
401 if (idx1cnt
== idx1max
)
402 /* Note that seq1len is still zero. */
409 /* We pushed backward sequences. */
410 idx1now
= backw1
= idx1cnt
- 1;
413 while ((seq1len
= weights
[idx1arr
[idx1now
]++]) == 0);
415 /* And the same for string `s2'. */
421 if (backw2_stop
!= ~0ul)
423 /* The is something pushed. */
424 if (backw2
== backw2_stop
)
426 /* The last pushed character was handled. Continue
427 with forward characters. */
428 if (idx2cnt
< idx2max
)
432 /* Nothing anymore. The backward sequence
433 ended with the last sequence in the string. */
443 backw2_stop
= idx2cnt
;
445 while (idx2cnt
< idx2max
)
447 if ((rulesets
[rule2arr
[idx2cnt
] * nrules
+ pass
]
448 & sort_backward
) == 0)
449 /* No more backward characters to push. */
454 if (backw2_stop
== idx2cnt
)
456 /* No sequence at all or just one. */
457 if (idx2cnt
== idx2max
)
458 /* Note that seq2len is still zero. */
465 /* We pushed backward sequences. */
466 idx2now
= backw2
= idx2cnt
- 1;
469 while ((seq2len
= weights
[idx2arr
[idx2now
]++]) == 0);
471 /* See whether any or both strings are empty. */
472 if (seq1len
== 0 || seq2len
== 0)
474 if (seq1len
== seq2len
)
475 /* Both ended. So far so good, both strings are equal
479 /* This means one string is shorter than the other. Find out
480 which one and return an appropriate value. */
481 result
= seq1len
== 0 ? -1 : 1;
482 goto free_and_return
;
485 /* Test for position if necessary. */
486 if (position
&& val1
!= val2
)
488 result
= val1
- val2
;
489 goto free_and_return
;
492 /* Compare the two sequences. */
495 if (weights
[idx1arr
[idx1now
]] != weights
[idx2arr
[idx2now
]])
497 /* The sequences differ. */
498 result
= (weights
[idx1arr
[idx1now
]]
499 - weights
[idx2arr
[idx2now
]]);
500 goto free_and_return
;
503 /* Increment the offsets. */
510 while (seq1len
> 0 && seq2len
> 0);
512 if (position
&& seq1len
!= seq2len
)
514 result
= seq1len
- seq2len
;
515 goto free_and_return
;
520 /* Free the memory if needed. */
527 libc_hidden_def (STRCOLL
)
529 #ifndef WIDE_CHAR_VERSION
530 weak_alias (__strcoll_l
, strcoll_l
)