1 /* Copyright (C) 1995,96,97,98,99,2000,2001,2002 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Written by Ulrich Drepper <drepper@cygnus.com>, 1995.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, write to the Free
17 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
29 # define STRING_TYPE char
30 # define USTRING_TYPE unsigned char
31 # ifdef USE_IN_EXTENDED_LOCALE_MODEL
32 # define STRCOLL __strcoll_l
34 # define STRCOLL strcoll
36 # define STRCMP strcmp
37 # define STRLEN strlen
38 # define WEIGHT_H "../locale/weight.h"
43 #define CONCAT(a,b) CONCAT1(a,b)
44 #define CONCAT1(a,b) a##b
46 #include "../locale/localeinfo.h"
48 #ifndef USE_IN_EXTENDED_LOCALE_MODEL
51 const STRING_TYPE
*s1
;
52 const STRING_TYPE
*s2
;
56 const STRING_TYPE
*s1
;
57 const STRING_TYPE
*s2
;
61 #ifdef USE_IN_EXTENDED_LOCALE_MODEL
62 struct locale_data
*current
= l
->__locales
[LC_COLLATE
];
63 uint_fast32_t nrules
= current
->values
[_NL_ITEM_INDEX (_NL_COLLATE_NRULES
)].word
;
65 uint_fast32_t nrules
= _NL_CURRENT_WORD (LC_COLLATE
, _NL_COLLATE_NRULES
);
67 /* We don't assign the following values right away since it might be
68 unnecessary in case there are no rules. */
69 const unsigned char *rulesets
;
71 const USTRING_TYPE
*weights
;
72 const USTRING_TYPE
*extra
;
73 const int32_t *indirect
;
76 const USTRING_TYPE
*us1
;
77 const USTRING_TYPE
*us2
;
82 unsigned char *rule1arr
;
83 unsigned char *rule2arr
;
104 return STRCMP (s1
, s2
);
106 #ifdef USE_IN_EXTENDED_LOCALE_MODEL
107 rulesets
= (const unsigned char *)
108 current
->values
[_NL_ITEM_INDEX (_NL_COLLATE_RULESETS
)].string
;
109 table
= (const int32_t *)
110 current
->values
[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_TABLE
,SUFFIX
))].string
;
111 weights
= (const USTRING_TYPE
*)
112 current
->values
[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_WEIGHT
,SUFFIX
))].string
;
113 extra
= (const USTRING_TYPE
*)
114 current
->values
[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_EXTRA
,SUFFIX
))].string
;
115 indirect
= (const int32_t *)
116 current
->values
[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_INDIRECT
,SUFFIX
))].string
;
118 rulesets
= (const unsigned char *)
119 _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_RULESETS
);
120 table
= (const int32_t *)
121 _NL_CURRENT (LC_COLLATE
, CONCAT(_NL_COLLATE_TABLE
,SUFFIX
));
122 weights
= (const USTRING_TYPE
*)
123 _NL_CURRENT (LC_COLLATE
, CONCAT(_NL_COLLATE_WEIGHT
,SUFFIX
));
124 extra
= (const USTRING_TYPE
*)
125 _NL_CURRENT (LC_COLLATE
, CONCAT(_NL_COLLATE_EXTRA
,SUFFIX
));
126 indirect
= (const int32_t *)
127 _NL_CURRENT (LC_COLLATE
, CONCAT(_NL_COLLATE_INDIRECT
,SUFFIX
));
131 assert (((uintptr_t) table
) % __alignof__ (table
[0]) == 0);
132 assert (((uintptr_t) weights
) % __alignof__ (weights
[0]) == 0);
133 assert (((uintptr_t) extra
) % __alignof__ (extra
[0]) == 0);
134 assert (((uintptr_t) indirect
) % __alignof__ (indirect
[0]) == 0);
136 /* We need this a few times. */
140 /* Catch empty strings. */
141 if (__builtin_expect (s1len
== 0, 0) || __builtin_expect (s2len
== 0, 0))
142 return (s1len
!= 0) - (s2len
!= 0);
144 /* We need the elements of the strings as unsigned values since they
145 are used as indeces. */
146 us1
= (const USTRING_TYPE
*) s1
;
147 us2
= (const USTRING_TYPE
*) s2
;
149 /* Perform the first pass over the string and while doing this find
150 and store the weights for each character. Since we want this to
151 be as fast as possible we are using `alloca' to store the temporary
152 values. But since there is no limit on the length of the string
153 we have to use `malloc' if the string is too long. We should be
154 very conservative here.
156 Please note that the localedef programs makes sure that `position'
157 is not used at the first level. */
158 if (! __libc_use_alloca (s1len
+ s2len
))
160 idx1arr
= (int32_t *) malloc ((s1len
+ s2len
) * (sizeof (int32_t) + 1));
161 idx2arr
= &idx1arr
[s1len
];
162 rule1arr
= (unsigned char *) &idx2arr
[s2len
];
163 rule2arr
= &rule1arr
[s1len
];
166 /* No memory. Well, go with the stack then.
168 XXX Once this implementation is stable we will handle this
169 differently. Instead of precomputing the indeces we will
170 do this in time. This means, though, that this happens for
178 idx1arr
= (int32_t *) alloca (s1len
* sizeof (int32_t));
179 idx2arr
= (int32_t *) alloca (s2len
* sizeof (int32_t));
180 rule1arr
= (unsigned char *) alloca (s1len
);
181 rule2arr
= (unsigned char *) alloca (s2len
);
196 position
= rulesets
[0] & sort_position
;
202 /* Get the next non-IGNOREd element for string `s1'. */
208 if (backw1_stop
!= ~0ul)
210 /* The is something pushed. */
211 if (backw1
== backw1_stop
)
213 /* The last pushed character was handled. Continue
214 with forward characters. */
215 if (idx1cnt
< idx1max
)
218 /* Nothing anymore. The backward sequence ended with
219 the last sequence in the string. Note that seq1len
228 backw1_stop
= idx1max
;
230 while (*us1
!= L('\0'))
232 int32_t tmp
= findidx (&us1
);
233 rule1arr
[idx1max
] = tmp
>> 24;
234 idx1arr
[idx1max
] = tmp
& 0xffffff;
237 if ((rulesets
[rule1arr
[idx1cnt
] * nrules
]
238 & sort_backward
) == 0)
239 /* No more backward characters to push. */
244 if (backw1_stop
>= idx1cnt
)
246 /* No sequence at all or just one. */
247 if (idx1cnt
== idx1max
|| backw1_stop
> idx1cnt
)
248 /* Note that seq1len is still zero. */
255 /* We pushed backward sequences. */
256 idx1now
= backw1
= idx1cnt
- 1;
259 while ((seq1len
= weights
[idx1arr
[idx1now
]++]) == 0);
261 /* And the same for string `s2'. */
267 if (backw2_stop
!= ~0ul)
269 /* The is something pushed. */
270 if (backw2
== backw2_stop
)
272 /* The last pushed character was handled. Continue
273 with forward characters. */
274 if (idx2cnt
< idx2max
)
277 /* Nothing anymore. The backward sequence ended with
278 the last sequence in the string. Note that seq2len
287 backw2_stop
= idx2max
;
289 while (*us2
!= L('\0'))
291 int32_t tmp
= findidx (&us2
);
292 rule2arr
[idx2max
] = tmp
>> 24;
293 idx2arr
[idx2max
] = tmp
& 0xffffff;
296 if ((rulesets
[rule2arr
[idx2cnt
] * nrules
]
297 & sort_backward
) == 0)
298 /* No more backward characters to push. */
303 if (backw2_stop
>= idx2cnt
)
305 /* No sequence at all or just one. */
306 if (idx2cnt
== idx2max
|| backw2_stop
> idx2cnt
)
307 /* Note that seq1len is still zero. */
314 /* We pushed backward sequences. */
315 idx2now
= backw2
= idx2cnt
- 1;
318 while ((seq2len
= weights
[idx2arr
[idx2now
]++]) == 0);
320 /* See whether any or both strings are empty. */
321 if (seq1len
== 0 || seq2len
== 0)
323 if (seq1len
== seq2len
)
324 /* Both ended. So far so good, both strings are equal at the
328 /* This means one string is shorter than the other. Find out
329 which one and return an appropriate value. */
330 result
= seq1len
== 0 ? -1 : 1;
331 goto free_and_return
;
334 /* Test for position if necessary. */
335 if (position
&& val1
!= val2
)
337 result
= val1
- val2
;
338 goto free_and_return
;
341 /* Compare the two sequences. */
344 if (weights
[idx1arr
[idx1now
]] != weights
[idx2arr
[idx2now
]])
346 /* The sequences differ. */
347 result
= weights
[idx1arr
[idx1now
]] - weights
[idx2arr
[idx2now
]];
348 goto free_and_return
;
351 /* Increment the offsets. */
358 while (seq1len
> 0 && seq2len
> 0);
360 if (position
&& seq1len
!= seq2len
)
362 result
= seq1len
- seq2len
;
363 goto free_and_return
;
367 /* Now the remaining passes over the weights. We now use the
368 indeces we found before. */
369 for (pass
= 1; pass
< nrules
; ++pass
)
371 /* We assume that if a rule has defined `position' in one section
372 this is true for all of them. */
379 position
= rulesets
[rule1arr
[0] * nrules
+ pass
] & sort_position
;
386 /* Get the next non-IGNOREd element for string `s1'. */
392 if (backw1_stop
!= ~0ul)
394 /* The is something pushed. */
395 if (backw1
== backw1_stop
)
397 /* The last pushed character was handled. Continue
398 with forward characters. */
399 if (idx1cnt
< idx1max
)
403 /* Nothing anymore. The backward sequence
404 ended with the last sequence in the string. */
414 backw1_stop
= idx1cnt
;
416 while (idx1cnt
< idx1max
)
418 if ((rulesets
[rule1arr
[idx1cnt
] * nrules
+ pass
]
419 & sort_backward
) == 0)
420 /* No more backward characters to push. */
425 if (backw1_stop
== idx1cnt
)
427 /* No sequence at all or just one. */
428 if (idx1cnt
== idx1max
)
429 /* Note that seq1len is still zero. */
436 /* We pushed backward sequences. */
437 idx1now
= backw1
= idx1cnt
- 1;
440 while ((seq1len
= weights
[idx1arr
[idx1now
]++]) == 0);
442 /* And the same for string `s2'. */
448 if (backw2_stop
!= ~0ul)
450 /* The is something pushed. */
451 if (backw2
== backw2_stop
)
453 /* The last pushed character was handled. Continue
454 with forward characters. */
455 if (idx2cnt
< idx2max
)
459 /* Nothing anymore. The backward sequence
460 ended with the last sequence in the string. */
470 backw2_stop
= idx2cnt
;
472 while (idx2cnt
< idx2max
)
474 if ((rulesets
[rule2arr
[idx2cnt
] * nrules
+ pass
]
475 & sort_backward
) == 0)
476 /* No more backward characters to push. */
481 if (backw2_stop
== idx2cnt
)
483 /* No sequence at all or just one. */
484 if (idx2cnt
== idx2max
)
485 /* Note that seq2len is still zero. */
492 /* We pushed backward sequences. */
493 idx2now
= backw2
= idx2cnt
- 1;
496 while ((seq2len
= weights
[idx2arr
[idx2now
]++]) == 0);
498 /* See whether any or both strings are empty. */
499 if (seq1len
== 0 || seq2len
== 0)
501 if (seq1len
== seq2len
)
502 /* Both ended. So far so good, both strings are equal
506 /* This means one string is shorter than the other. Find out
507 which one and return an appropriate value. */
508 result
= seq1len
== 0 ? -1 : 1;
509 goto free_and_return
;
512 /* Test for position if necessary. */
513 if (position
&& val1
!= val2
)
515 result
= val1
- val2
;
516 goto free_and_return
;
519 /* Compare the two sequences. */
522 if (weights
[idx1arr
[idx1now
]] != weights
[idx2arr
[idx2now
]])
524 /* The sequences differ. */
525 result
= (weights
[idx1arr
[idx1now
]]
526 - weights
[idx2arr
[idx2now
]]);
527 goto free_and_return
;
530 /* Increment the offsets. */
537 while (seq1len
> 0 && seq2len
> 0);
539 if (position
&& seq1len
!= seq2len
)
541 result
= seq1len
- seq2len
;
542 goto free_and_return
;
547 /* Free the memory if needed. */
554 #if !defined WIDE_CHAR_VERSION && !defined USE_IN_EXTENDED_LOCALE_MODEL
555 libc_hidden_def (strcoll
)