1 /* Copyright (C) 1995,96,97,98,99,2000,2001 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Written by Ulrich Drepper <drepper@cygnus.com>, 1995.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
29 # define STRING_TYPE char
30 # define USTRING_TYPE unsigned char
31 # ifdef USE_IN_EXTENDED_LOCALE_MODEL
32 # define STRCOLL __strcoll_l
34 # define STRCOLL strcoll
36 # define STRCMP strcmp
37 # define STRLEN strlen
38 # define WEIGHT_H "../locale/weight.h"
43 #define CONCAT(a,b) CONCAT1(a,b)
44 #define CONCAT1(a,b) a##b
46 #include "../locale/localeinfo.h"
48 #ifndef USE_IN_EXTENDED_LOCALE_MODEL
51 const STRING_TYPE
*s1
;
52 const STRING_TYPE
*s2
;
56 const STRING_TYPE
*s1
;
57 const STRING_TYPE
*s2
;
61 #ifdef USE_IN_EXTENDED_LOCALE_MODEL
62 struct locale_data
*current
= l
->__locales
[LC_COLLATE
];
63 uint_fast32_t nrules
= *((const uint32_t *) current
->values
[_NL_ITEM_INDEX (_NL_COLLATE_NRULES
)].string
);
65 uint_fast32_t nrules
= _NL_CURRENT_WORD (LC_COLLATE
, _NL_COLLATE_NRULES
);
67 /* We don't assign the following values right away since it might be
68 unnecessary in case there are no rules. */
69 const unsigned char *rulesets
;
71 const USTRING_TYPE
*weights
;
72 const USTRING_TYPE
*extra
;
73 const int32_t *indirect
;
76 const USTRING_TYPE
*us1
;
77 const USTRING_TYPE
*us2
;
82 unsigned char *rule1arr
;
83 unsigned char *rule2arr
;
104 return STRCMP (s1
, s2
);
106 #ifdef USE_IN_EXTENDED_LOCALE_MODEL
107 rulesets
= (const unsigned char *)
108 current
->values
[_NL_ITEM_INDEX (_NL_COLLATE_RULESETS
)].string
;
109 table
= (const int32_t *)
110 current
->values
[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_TABLE
,SUFFIX
))].string
;
111 weights
= (const USTRING_TYPE
*)
112 current
->values
[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_WEIGHT
,SUFFIX
))].string
;
113 extra
= (const USTRING_TYPE
*)
114 current
->values
[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_EXTRA
,SUFFIX
))].string
;
115 indirect
= (const int32_t *)
116 current
->values
[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_INDIRECT
,SUFFIX
))].string
;
118 rulesets
= (const unsigned char *)
119 _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_RULESETS
);
120 table
= (const int32_t *)
121 _NL_CURRENT (LC_COLLATE
, CONCAT(_NL_COLLATE_TABLE
,SUFFIX
));
122 weights
= (const USTRING_TYPE
*)
123 _NL_CURRENT (LC_COLLATE
, CONCAT(_NL_COLLATE_WEIGHT
,SUFFIX
));
124 extra
= (const USTRING_TYPE
*)
125 _NL_CURRENT (LC_COLLATE
, CONCAT(_NL_COLLATE_EXTRA
,SUFFIX
));
126 indirect
= (const int32_t *)
127 _NL_CURRENT (LC_COLLATE
, CONCAT(_NL_COLLATE_INDIRECT
,SUFFIX
));
131 assert (((uintptr_t) table
) % __alignof__ (table
[0]) == 0);
132 assert (((uintptr_t) weights
) % __alignof__ (weights
[0]) == 0);
133 assert (((uintptr_t) extra
) % __alignof__ (extra
[0]) == 0);
134 assert (((uintptr_t) indirect
) % __alignof__ (indirect
[0]) == 0);
136 /* We need this a few times. */
140 /* We need the elements of the strings as unsigned values since they
141 are used as indeces. */
142 us1
= (const USTRING_TYPE
*) s1
;
143 us2
= (const USTRING_TYPE
*) s2
;
145 /* Perform the first pass over the string and while doing this find
146 and store the weights for each character. Since we want this to
147 be as fast as possible we are using `alloca' to store the temporary
148 values. But since there is no limit on the length of the string
149 we have to use `malloc' if the string is too long. We should be
150 very conservative here.
152 Please note that the localedef programs makes sure that `position'
153 is not used at the first level. */
154 if (s1len
+ s2len
>= 16384)
156 idx1arr
= (int32_t *) malloc ((s1len
+ s2len
) * (sizeof (int32_t) + 1));
157 idx2arr
= &idx1arr
[s2len
];
158 rule1arr
= (unsigned char *) &idx2arr
[s2len
];
159 rule2arr
= &rule1arr
[s1len
];
162 /* No memory. Well, go with the stack then.
164 XXX Once this implementation is stable we will handle this
165 differently. Instead of precomputing the indeces we will
166 do this in time. This means, though, that this happens for
174 idx1arr
= (int32_t *) alloca (s1len
* sizeof (int32_t));
175 idx2arr
= (int32_t *) alloca (s2len
* sizeof (int32_t));
176 rule1arr
= (unsigned char *) alloca (s2len
);
177 rule2arr
= (unsigned char *) alloca (s2len
);
192 position
= rulesets
[0] & sort_position
;
198 /* Get the next non-IGNOREd element for string `s1'. */
204 if (backw1_stop
!= ~0ul)
206 /* The is something pushed. */
207 if (backw1
== backw1_stop
)
209 /* The last pushed character was handled. Continue
210 with forward characters. */
211 if (idx1cnt
< idx1max
)
214 /* Nothing anymore. The backward sequence ended with
215 the last sequence in the string. Note that seq1len
224 backw1_stop
= idx1max
;
226 while (*us1
!= L('\0'))
228 int32_t tmp
= findidx (&us1
);
229 rule1arr
[idx1max
] = tmp
>> 24;
230 idx1arr
[idx1max
] = tmp
& 0xffffff;
233 if ((rulesets
[rule1arr
[idx1cnt
] * nrules
]
234 & sort_backward
) == 0)
235 /* No more backward characters to push. */
240 if (backw1_stop
>= idx1cnt
)
242 /* No sequence at all or just one. */
243 if (idx1cnt
== idx1max
|| backw1_stop
> idx1cnt
)
244 /* Note that seq1len is still zero. */
251 /* We pushed backward sequences. */
252 idx1now
= backw1
= idx1cnt
- 1;
255 while ((seq1len
= weights
[idx1arr
[idx1now
]++]) == 0);
257 /* And the same for string `s2'. */
263 if (backw2_stop
!= ~0ul)
265 /* The is something pushed. */
266 if (backw2
== backw2_stop
)
268 /* The last pushed character was handled. Continue
269 with forward characters. */
270 if (idx2cnt
< idx2max
)
273 /* Nothing anymore. The backward sequence ended with
274 the last sequence in the string. Note that seq2len
283 backw2_stop
= idx2max
;
285 while (*us2
!= L('\0'))
287 int32_t tmp
= findidx (&us2
);
288 rule2arr
[idx2max
] = tmp
>> 24;
289 idx2arr
[idx2max
] = tmp
& 0xffffff;
292 if ((rulesets
[rule2arr
[idx2cnt
] * nrules
]
293 & sort_backward
) == 0)
294 /* No more backward characters to push. */
299 if (backw2_stop
>= idx2cnt
)
301 /* No sequence at all or just one. */
302 if (idx2cnt
== idx2max
|| backw2_stop
> idx2cnt
)
303 /* Note that seq1len is still zero. */
310 /* We pushed backward sequences. */
311 idx2now
= backw2
= idx2cnt
- 1;
314 while ((seq2len
= weights
[idx2arr
[idx2now
]++]) == 0);
316 /* See whether any or both strings are empty. */
317 if (seq1len
== 0 || seq2len
== 0)
319 if (seq1len
== seq2len
)
320 /* Both ended. So far so good, both strings are equal at the
324 /* This means one string is shorter than the other. Find out
325 which one and return an appropriate value. */
326 result
= seq1len
== 0 ? -1 : 1;
327 goto free_and_return
;
330 /* Test for position if necessary. */
331 if (position
&& val1
!= val2
)
333 result
= val1
- val2
;
334 goto free_and_return
;
337 /* Compare the two sequences. */
340 if (weights
[idx1arr
[idx1now
]] != weights
[idx2arr
[idx2now
]])
342 /* The sequences differ. */
343 result
= weights
[idx1arr
[idx1now
]] - weights
[idx2arr
[idx2now
]];
344 goto free_and_return
;
347 /* Increment the offsets. */
354 while (seq1len
> 0 && seq2len
> 0);
356 if (position
&& seq1len
!= seq2len
)
358 result
= seq1len
- seq2len
;
359 goto free_and_return
;
363 /* Now the remaining passes over the weights. We now use the
364 indeces we found before. */
365 for (pass
= 1; pass
< nrules
; ++pass
)
367 /* We assume that if a rule has defined `position' in one section
368 this is true for all of them. */
375 position
= rulesets
[rule1arr
[0] * nrules
+ pass
] & sort_position
;
382 /* Get the next non-IGNOREd element for string `s1'. */
388 if (backw1_stop
!= ~0ul)
390 /* The is something pushed. */
391 if (backw1
== backw1_stop
)
393 /* The last pushed character was handled. Continue
394 with forward characters. */
395 if (idx1cnt
< idx1max
)
399 /* Nothing anymore. The backward sequence
400 ended with the last sequence in the string. */
410 backw1_stop
= idx1cnt
;
412 while (idx1cnt
< idx1max
)
414 if ((rulesets
[rule1arr
[idx1cnt
] * nrules
+ pass
]
415 & sort_backward
) == 0)
416 /* No more backward characters to push. */
421 if (backw1_stop
== idx1cnt
)
423 /* No sequence at all or just one. */
424 if (idx1cnt
== idx1max
)
425 /* Note that seq2len is still zero. */
432 /* We pushed backward sequences. */
433 idx1now
= backw1
= idx1cnt
- 1;
436 while ((seq1len
= weights
[idx1arr
[idx1now
]++]) == 0);
438 /* And the same for string `s2'. */
444 if (backw2_stop
!= ~0ul)
446 /* The is something pushed. */
447 if (backw2
== backw2_stop
)
449 /* The last pushed character was handled. Continue
450 with forward characters. */
451 if (idx2cnt
< idx2max
)
455 /* Nothing anymore. The backward sequence
456 ended with the last sequence in the string. */
466 backw2_stop
= idx2cnt
;
468 while (idx2cnt
< idx2max
)
470 if ((rulesets
[rule2arr
[idx2cnt
] * nrules
+ pass
]
471 & sort_backward
) == 0)
472 /* No more backward characters to push. */
477 if (backw2_stop
== idx2cnt
)
479 /* No sequence at all or just one. */
480 if (idx2cnt
== idx2max
)
481 /* Note that seq2len is still zero. */
488 /* We pushed backward sequences. */
489 idx2now
= backw2
= idx2cnt
- 1;
492 while ((seq2len
= weights
[idx2arr
[idx2now
]++]) == 0);
494 /* See whether any or both strings are empty. */
495 if (seq1len
== 0 || seq2len
== 0)
497 if (seq1len
== seq2len
)
498 /* Both ended. So far so good, both strings are equal
502 /* This means one string is shorter than the other. Find out
503 which one and return an appropriate value. */
504 result
= seq1len
== 0 ? -1 : 1;
505 goto free_and_return
;
508 /* Test for position if necessary. */
509 if (position
&& val1
!= val2
)
511 result
= val1
- val2
;
512 goto free_and_return
;
515 /* Compare the two sequences. */
518 if (weights
[idx1arr
[idx1now
]] != weights
[idx2arr
[idx2now
]])
520 /* The sequences differ. */
521 result
= (weights
[idx1arr
[idx1now
]]
522 - weights
[idx2arr
[idx2now
]]);
523 goto free_and_return
;
526 /* Increment the offsets. */
533 while (seq1len
> 0 && seq2len
> 0);
535 if (position
&& seq1len
!= seq2len
)
537 result
= seq1len
- seq2len
;
538 goto free_and_return
;
543 /* Free the memory if needed. */