1 /* Copyright (C) 1995, 96, 97, 98, 99, 2000 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Written by Ulrich Drepper <drepper@cygnus.com>, 1995.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
28 # define STRING_TYPE char
29 # define USTRING_TYPE unsigned char
30 # ifdef USE_IN_EXTENDED_LOCALE_MODEL
31 # define STRCOLL __strcoll_l
33 # define STRCOLL strcoll
35 # define STRCMP strcmp
36 # define STRLEN strlen
37 # define WEIGHT_H "../locale/weight.h"
42 #define CONCAT(a,b) CONCAT1(a,b)
43 #define CONCAT1(a,b) a##b
45 #include "../locale/localeinfo.h"
47 #ifndef USE_IN_EXTENDED_LOCALE_MODEL
50 const STRING_TYPE
*s1
;
51 const STRING_TYPE
*s2
;
55 const STRING_TYPE
*s1
;
56 const STRING_TYPE
*s2
;
60 #ifdef USE_IN_EXTENDED_LOCALE_MODEL
61 struct locale_data
*current
= l
->__locales
[LC_COLLATE
];
62 uint_fast32_t nrules
= *((uint32_t *) current
->values
[_NL_ITEM_INDEX (_NL_COLLATE_NRULES
)].string
);
64 uint_fast32_t nrules
= _NL_CURRENT_WORD (LC_COLLATE
, _NL_COLLATE_NRULES
);
66 /* We don't assign the following values right away since it might be
67 unnecessary in case there are no rules. */
68 const unsigned char *rulesets
;
70 const USTRING_TYPE
*weights
;
71 const USTRING_TYPE
*extra
;
72 const int32_t *indirect
;
75 const USTRING_TYPE
*us1
;
76 const USTRING_TYPE
*us2
;
81 unsigned char *rule1arr
;
82 unsigned char *rule2arr
;
103 return STRCMP (s1
, s2
);
105 #ifdef USE_IN_EXTENDED_LOCALE_MODEL
106 rulesets
= (const unsigned char *)
107 current
->values
[_NL_ITEM_INDEX (_NL_COLLATE_RULESETS
)].string
;
108 table
= (const int32_t *)
109 current
->values
[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_TABLE
,SUFFIX
))].string
;
110 weights
= (const USTRING_TYPE
*)
111 current
->values
[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_WEIGHT
,SUFFIX
))].string
;
112 extra
= (const USTRING_TYPE
*)
113 current
->values
[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_EXTRA
,SUFFIX
))].string
;
114 indirect
= (const int32_t *)
115 current
->values
[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_INDIRECT
,SUFFIX
))].string
;
117 rulesets
= (const unsigned char *)
118 _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_RULESETS
);
119 table
= (const int32_t *)
120 _NL_CURRENT (LC_COLLATE
, CONCAT(_NL_COLLATE_TABLE
,SUFFIX
));
121 weights
= (const USTRING_TYPE
*)
122 _NL_CURRENT (LC_COLLATE
, CONCAT(_NL_COLLATE_WEIGHT
,SUFFIX
));
123 extra
= (const USTRING_TYPE
*)
124 _NL_CURRENT (LC_COLLATE
, CONCAT(_NL_COLLATE_EXTRA
,SUFFIX
));
125 indirect
= (const int32_t *)
126 _NL_CURRENT (LC_COLLATE
, CONCAT(_NL_COLLATE_INDIRECT
,SUFFIX
));
130 assert (((uintptr_t) table
) % __alignof__ (table
[0]) == 0);
131 assert (((uintptr_t) weights
) % __alignof__ (weights
[0]) == 0);
132 assert (((uintptr_t) extra
) % __alignof__ (extra
[0]) == 0);
133 assert (((uintptr_t) indirect
) % __alignof__ (indirect
[0]) == 0);
135 /* We need this a few times. */
139 /* We need the elements of the strings as unsigned values since they
140 are used as indeces. */
141 us1
= (const USTRING_TYPE
*) s1
;
142 us2
= (const USTRING_TYPE
*) s2
;
144 /* Perform the first pass over the string and while doing this find
145 and store the weights for each character. Since we want this to
146 be as fast as possible we are using `alloca' to store the temporary
147 values. But since there is no limit on the length of the string
148 we have to use `malloc' if the string is too long. We should be
149 very conservative here.
151 Please note that the localedef programs makes sure that `position'
152 is not used at the first level. */
153 if (s1len
+ s2len
>= 16384)
155 idx1arr
= (int32_t *) malloc ((s1len
+ s2len
) * (sizeof (int32_t) + 1));
156 idx2arr
= &idx1arr
[s2len
];
157 rule1arr
= (unsigned char *) &idx2arr
[s2len
];
158 rule2arr
= &rule1arr
[s1len
];
161 /* No memory. Well, go with the stack then.
163 XXX Once this implementation is stable we will handle this
164 differently. Instead of precomputing the indeces we will
165 do this in time. This means, though, that this happens for
173 idx1arr
= (int32_t *) alloca (s1len
* sizeof (int32_t));
174 idx2arr
= (int32_t *) alloca (s2len
* sizeof (int32_t));
175 rule1arr
= (unsigned char *) alloca (s2len
);
176 rule2arr
= (unsigned char *) alloca (s2len
);
191 position
= rulesets
[0] & sort_position
;
197 /* Get the next non-IGNOREd element for string `s1'. */
203 if (backw1_stop
!= ~0ul)
205 /* The is something pushed. */
206 if (backw1
== backw1_stop
)
208 /* The last pushed character was handled. Continue
209 with forward characters. */
210 if (idx1cnt
< idx1max
)
213 /* Nothing anymore. The backward sequence ended with
214 the last sequence in the string. Note that seq1len
223 backw1_stop
= idx1max
;
225 while (*us1
!= L('\0'))
227 int32_t tmp
= findidx (&us1
);
228 rule1arr
[idx1max
] = tmp
>> 24;
229 idx1arr
[idx1max
] = tmp
& 0xffffff;
232 if ((rulesets
[rule1arr
[idx1cnt
] * nrules
]
233 & sort_backward
) == 0)
234 /* No more backward characters to push. */
239 if (backw1_stop
>= idx1cnt
)
241 /* No sequence at all or just one. */
242 if (idx1cnt
== idx1max
|| backw1_stop
> idx1cnt
)
243 /* Note that seq1len is still zero. */
250 /* We pushed backward sequences. */
251 idx1now
= backw1
= idx1cnt
- 1;
254 while ((seq1len
= weights
[idx1arr
[idx1now
]++]) == 0);
256 /* And the same for string `s2'. */
262 if (backw2_stop
!= ~0ul)
264 /* The is something pushed. */
265 if (backw2
== backw2_stop
)
267 /* The last pushed character was handled. Continue
268 with forward characters. */
269 if (idx2cnt
< idx2max
)
272 /* Nothing anymore. The backward sequence ended with
273 the last sequence in the string. Note that seq2len
282 backw2_stop
= idx2max
;
284 while (*us2
!= L('\0'))
286 int32_t tmp
= findidx (&us2
);
287 rule2arr
[idx2max
] = tmp
>> 24;
288 idx2arr
[idx2max
] = tmp
& 0xffffff;
291 if ((rulesets
[rule2arr
[idx2cnt
] * nrules
]
292 & sort_backward
) == 0)
293 /* No more backward characters to push. */
298 if (backw2_stop
>= idx2cnt
)
300 /* No sequence at all or just one. */
301 if (idx2cnt
== idx2max
|| backw2_stop
> idx2cnt
)
302 /* Note that seq1len is still zero. */
309 /* We pushed backward sequences. */
310 idx2now
= backw2
= idx2cnt
- 1;
313 while ((seq2len
= weights
[idx2arr
[idx2now
]++]) == 0);
315 /* See whether any or both strings are empty. */
316 if (seq1len
== 0 || seq2len
== 0)
318 if (seq1len
== seq2len
)
319 /* Both ended. So far so good, both strings are equal at the
323 /* This means one string is shorter than the other. Find out
324 which one and return an appropriate value. */
325 result
= seq1len
== 0 ? -1 : 1;
326 goto free_and_return
;
329 /* Test for position if necessary. */
330 if (position
&& val1
!= val2
)
332 result
= val1
- val2
;
333 goto free_and_return
;
336 /* Compare the two sequences. */
339 if (weights
[idx1arr
[idx1now
]] != weights
[idx2arr
[idx2now
]])
341 /* The sequences differ. */
342 result
= weights
[idx1arr
[idx1now
]] - weights
[idx2arr
[idx2now
]];
343 goto free_and_return
;
346 /* Increment the offsets. */
353 while (seq1len
> 0 && seq2len
> 0);
355 if (position
&& seq1len
!= seq2len
)
357 result
= seq1len
- seq2len
;
358 goto free_and_return
;
362 /* Now the remaining passes over the weights. We now use the
363 indeces we found before. */
364 for (pass
= 1; pass
< nrules
; ++pass
)
366 /* We assume that if a rule has defined `position' in one section
367 this is true for all of them. */
374 position
= rulesets
[rule1arr
[0] * nrules
+ pass
] & sort_position
;
381 /* Get the next non-IGNOREd element for string `s1'. */
387 if (backw1_stop
!= ~0ul)
389 /* The is something pushed. */
390 if (backw1
== backw1_stop
)
392 /* The last pushed character was handled. Continue
393 with forward characters. */
394 if (idx1cnt
< idx1max
)
398 /* Nothing anymore. The backward sequence
399 ended with the last sequence in the string. */
409 backw1_stop
= idx1cnt
;
411 while (idx1cnt
< idx1max
)
413 if ((rulesets
[rule1arr
[idx1cnt
] * nrules
+ pass
]
414 & sort_backward
) == 0)
415 /* No more backward characters to push. */
420 if (backw1_stop
== idx1cnt
)
422 /* No sequence at all or just one. */
423 if (idx1cnt
== idx1max
)
424 /* Note that seq2len is still zero. */
431 /* We pushed backward sequences. */
432 idx1now
= backw1
= idx1cnt
- 1;
435 while ((seq1len
= weights
[idx1arr
[idx1now
]++]) == 0);
437 /* And the same for string `s2'. */
443 if (backw2_stop
!= ~0ul)
445 /* The is something pushed. */
446 if (backw2
== backw2_stop
)
448 /* The last pushed character was handled. Continue
449 with forward characters. */
450 if (idx2cnt
< idx2max
)
454 /* Nothing anymore. The backward sequence
455 ended with the last sequence in the string. */
465 backw2_stop
= idx2cnt
;
467 while (idx2cnt
< idx2max
)
469 if ((rulesets
[rule2arr
[idx2cnt
] * nrules
+ pass
]
470 & sort_backward
) == 0)
471 /* No more backward characters to push. */
476 if (backw2_stop
== idx2cnt
)
478 /* No sequence at all or just one. */
479 if (idx2cnt
== idx2max
)
480 /* Note that seq2len is still zero. */
487 /* We pushed backward sequences. */
488 idx2now
= backw2
= idx2cnt
- 1;
491 while ((seq2len
= weights
[idx2arr
[idx2now
]++]) == 0);
493 /* See whether any or both strings are empty. */
494 if (seq1len
== 0 || seq2len
== 0)
496 if (seq1len
== seq2len
)
497 /* Both ended. So far so good, both strings are equal
501 /* This means one string is shorter than the other. Find out
502 which one and return an appropriate value. */
503 result
= seq1len
== 0 ? -1 : 1;
504 goto free_and_return
;
507 /* Test for position if necessary. */
508 if (position
&& val1
!= val2
)
510 result
= val1
- val2
;
511 goto free_and_return
;
514 /* Compare the two sequences. */
517 if (weights
[idx1arr
[idx1now
]] != weights
[idx2arr
[idx2now
]])
519 /* The sequences differ. */
520 result
= (weights
[idx1arr
[idx1now
]]
521 - weights
[idx2arr
[idx2now
]]);
522 goto free_and_return
;
525 /* Increment the offsets. */
532 while (seq1len
> 0 && seq2len
> 0);
534 if (position
&& seq1len
!= seq2len
)
536 result
= seq1len
- seq2len
;
537 goto free_and_return
;
542 /* Free the memory if needed. */