1 /* Copyright (C) 1995, 1996, 1997, 1998, 1999 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Written by Ulrich Drepper <drepper@cygnus.com>, 1995.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
28 # define STRING_TYPE char
29 # define USTRING_TYPE unsigned char
30 # ifdef USE_IN_EXTENDED_LOCALE_MODEL
31 # define STRCOLL __strcoll_l
33 # define STRCOLL strcoll
35 # define STRCMP strcmp
36 # define STRLEN strlen
37 # define WEIGHT_H "../locale/weight.h"
42 #define CONCAT(a,b) CONCAT1(a,b)
43 #define CONCAT1(a,b) a##b
45 #include "../locale/localeinfo.h"
47 #ifndef USE_IN_EXTENDED_LOCALE_MODEL
50 const STRING_TYPE
*s1
;
51 const STRING_TYPE
*s2
;
55 const STRING_TYPE
*s1
;
56 const STRING_TYPE
*s2
;
60 #ifdef USE_IN_EXTENDED_LOCALE_MODEL
61 struct locale_data
*current
= l
->__locales
[LC_COLLATE
];
62 uint_fast32_t nrules
= *((uint32_t *) current
->values
[_NL_ITEM_INDEX (_NL_COLLATE_NRULES
)].string
);
64 uint_fast32_t nrules
= _NL_CURRENT_WORD (LC_COLLATE
, _NL_COLLATE_NRULES
);
66 /* We don't assign the following values right away since it might be
67 unnecessary in case there are no rules. */
68 const unsigned char *rulesets
;
70 const USTRING_TYPE
*weights
;
71 const USTRING_TYPE
*extra
;
72 const int32_t *indirect
;
75 const USTRING_TYPE
*us1
;
76 const USTRING_TYPE
*us2
;
81 unsigned char *rule1arr
;
82 unsigned char *rule2arr
;
99 #ifdef WIDE_CHAR_VERSION
108 return STRCMP (s1
, s2
);
110 #ifdef USE_IN_EXTENDED_LOCALE_MODEL
111 rulesets
= (const unsigned char *)
112 current
->values
[_NL_ITEM_INDEX (_NL_COLLATE_RULESETS
)].string
;
113 table
= (const int32_t *)
114 current
->values
[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_TABLE
,SUFFIX
))].string
;
115 weights
= (const USTRING_TYPE
*)
116 current
->values
[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_WEIGHT
,SUFFIX
))].string
;
117 extra
= (const USTRING_TYPE
*)
118 current
->values
[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_EXTRA
,SUFFIX
))].string
;
119 indirect
= (const int32_t *)
120 current
->values
[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_INDIRECT
,SUFFIX
))].string
;
121 # ifdef WIDE_CHAR_VERSION
122 names
= (const wint_t *)
123 current
->values
[_NL_ITEM_INDEX (_NL_COLLATE_NAMES
)].string
;
124 size
= current
->values
[_NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE
)].word
;
125 layers
= current
->values
[_NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS
)].word
;
128 rulesets
= (const unsigned char *)
129 _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_RULESETS
);
130 table
= (const int32_t *)
131 _NL_CURRENT (LC_COLLATE
, CONCAT(_NL_COLLATE_TABLE
,SUFFIX
));
132 weights
= (const USTRING_TYPE
*)
133 _NL_CURRENT (LC_COLLATE
, CONCAT(_NL_COLLATE_WEIGHT
,SUFFIX
));
134 extra
= (const USTRING_TYPE
*)
135 _NL_CURRENT (LC_COLLATE
, CONCAT(_NL_COLLATE_EXTRA
,SUFFIX
));
136 indirect
= (const int32_t *)
137 _NL_CURRENT (LC_COLLATE
, CONCAT(_NL_COLLATE_INDIRECT
,SUFFIX
));
138 # ifdef WIDE_CHAR_VERSION
139 names
= (const wint_t *) _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_NAMES
);
140 size
= _NL_CURRENT_WORD (LC_COLLATE
, _NL_COLLATE_HASH_SIZE
);
141 layers
= _NL_CURRENT_WORD (LC_COLLATE
, _NL_COLLATE_HASH_LAYERS
);
146 /* We need this a few times. */
150 /* We need the elements of the strings as unsigned values since they
151 are used as indeces. */
152 us1
= (const USTRING_TYPE
*) s1
;
153 us2
= (const USTRING_TYPE
*) s2
;
155 /* Perform the first pass over the string and while doing this find
156 and store the weights for each character. Since we want this to
157 be as fast as possible we are using `alloca' to store the temporary
158 values. But since there is no limit on the length of the string
159 we have to use `malloc' if the string is too long. We should be
160 very conservative here.
162 Please note that the localedef programs makes sure that `position'
163 is not used at the first level. */
164 if (s1len
+ s2len
>= 16384)
166 idx1arr
= (int32_t *) malloc ((s1len
+ s2len
) * (sizeof (int32_t) + 1));
167 idx2arr
= &idx1arr
[s2len
];
168 rule1arr
= (unsigned char *) &idx2arr
[s2len
];
169 rule2arr
= &rule1arr
[s1len
];
172 /* No memory. Well, go with the stack then.
174 XXX Once this implementation is stable we will handle this
175 differently. Instead of precomputing the indeces we will
176 do this in time. This means, though, that this happens for
184 idx1arr
= (int32_t *) alloca (s1len
* sizeof (int32_t));
185 idx2arr
= (int32_t *) alloca (s2len
* sizeof (int32_t));
186 rule1arr
= (unsigned char *) alloca (s2len
);
187 rule2arr
= (unsigned char *) alloca (s2len
);
202 position
= rulesets
[0] & sort_position
;
208 /* Get the next non-IGNOREd element for string `s1'. */
214 if (backw1_stop
!= ~0ul)
216 /* The is something pushed. */
217 if (backw1
== backw1_stop
)
219 /* The last pushed character was handled. Continue
220 with forward characters. */
221 if (idx1cnt
< idx1max
)
224 /* Nothing anymore. The backward sequence ended with
225 the last sequence in the string. Note that seq1len
234 backw1_stop
= idx1max
;
236 while (*us1
!= L('\0'))
238 int32_t tmp
= findidx (&us1
);
239 rule1arr
[idx1max
] = tmp
>> 24;
240 idx1arr
[idx1max
] = tmp
& 0xffffff;
243 if ((rulesets
[rule1arr
[idx1cnt
] * nrules
]
244 & sort_backward
) == 0)
245 /* No more backward characters to push. */
250 if (backw1_stop
>= idx1cnt
)
252 /* No sequence at all or just one. */
253 if (idx1cnt
== idx1max
|| backw1_stop
> idx1cnt
)
254 /* Note that seq1len is still zero. */
261 /* We pushed backward sequences. */
262 idx1now
= backw1
= idx1cnt
- 1;
265 while ((seq1len
= weights
[idx1arr
[idx1now
]++]) == 0);
267 /* And the same for string `s2'. */
273 if (backw2_stop
!= ~0ul)
275 /* The is something pushed. */
276 if (backw2
== backw2_stop
)
278 /* The last pushed character was handled. Continue
279 with forward characters. */
280 if (idx2cnt
< idx2max
)
283 /* Nothing anymore. The backward sequence ended with
284 the last sequence in the string. Note that seq2len
293 backw2_stop
= idx2max
;
295 while (*us2
!= L('\0'))
297 int32_t tmp
= findidx (&us2
);
298 rule2arr
[idx2max
] = tmp
>> 24;
299 idx2arr
[idx2max
] = tmp
& 0xffffff;
302 if ((rulesets
[rule2arr
[idx2cnt
] * nrules
]
303 & sort_backward
) == 0)
304 /* No more backward characters to push. */
309 if (backw2_stop
>= idx2cnt
)
311 /* No sequence at all or just one. */
312 if (idx2cnt
== idx2max
|| backw2_stop
> idx2cnt
)
313 /* Note that seq1len is still zero. */
320 /* We pushed backward sequences. */
321 idx2now
= backw2
= idx2cnt
- 1;
324 while ((seq2len
= weights
[idx2arr
[idx2now
]++]) == 0);
326 /* See whether any or both strings are empty. */
327 if (seq1len
== 0 || seq2len
== 0)
329 if (seq1len
== seq2len
)
330 /* Both ended. So far so good, both strings are equal at the
334 /* This means one string is shorter than the other. Find out
335 which one and return an appropriate value. */
336 result
= seq1len
== 0 ? -1 : 1;
337 goto free_and_return
;
340 /* Test for position if necessary. */
341 if (position
&& val1
!= val2
)
343 result
= val1
- val2
;
344 goto free_and_return
;
347 /* Compare the two sequences. */
350 if (weights
[idx1arr
[idx1now
]] != weights
[idx2arr
[idx2now
]])
352 /* The sequences differ. */
353 result
= weights
[idx1arr
[idx1now
]] - weights
[idx2arr
[idx2now
]];
354 goto free_and_return
;
357 /* Increment the offsets. */
364 while (seq1len
> 0 && seq2len
> 0);
366 if (position
&& seq1len
!= seq2len
)
368 result
= seq1len
- seq2len
;
369 goto free_and_return
;
373 /* Now the remaining passes over the weights. We now use the
374 indeces we found before. */
375 for (pass
= 1; pass
< nrules
; ++pass
)
377 /* We assume that if a rule has defined `position' in one section
378 this is true for all of them. */
385 position
= rulesets
[rule1arr
[0] * nrules
+ pass
] & sort_position
;
392 /* Get the next non-IGNOREd element for string `s1'. */
398 if (backw1_stop
!= ~0ul)
400 /* The is something pushed. */
401 if (backw1
== backw1_stop
)
403 /* The last pushed character was handled. Continue
404 with forward characters. */
405 if (idx1cnt
< idx1max
)
409 /* Nothing anymore. The backward sequence
410 ended with the last sequence in the string. */
420 backw1_stop
= idx1cnt
;
422 while (idx1cnt
< idx1max
)
424 if ((rulesets
[rule1arr
[idx1cnt
] * nrules
+ pass
]
425 & sort_backward
) == 0)
426 /* No more backward characters to push. */
431 if (backw1_stop
== idx1cnt
)
433 /* No sequence at all or just one. */
434 if (idx1cnt
== idx1max
)
435 /* Note that seq2len is still zero. */
442 /* We pushed backward sequences. */
443 idx1now
= backw1
= idx1cnt
- 1;
446 while ((seq1len
= weights
[idx1arr
[idx1now
]++]) == 0);
448 /* And the same for string `s2'. */
454 if (backw2_stop
!= ~0ul)
456 /* The is something pushed. */
457 if (backw2
== backw2_stop
)
459 /* The last pushed character was handled. Continue
460 with forward characters. */
461 if (idx2cnt
< idx2max
)
465 /* Nothing anymore. The backward sequence
466 ended with the last sequence in the string. */
476 backw2_stop
= idx2cnt
;
478 while (idx2cnt
< idx2max
)
480 if ((rulesets
[rule2arr
[idx2cnt
] * nrules
+ pass
]
481 & sort_backward
) == 0)
482 /* No more backward characters to push. */
487 if (backw2_stop
== idx2cnt
)
489 /* No sequence at all or just one. */
490 if (idx2cnt
== idx2max
)
491 /* Note that seq2len is still zero. */
498 /* We pushed backward sequences. */
499 idx2now
= backw2
= idx2cnt
- 1;
502 while ((seq2len
= weights
[idx2arr
[idx2now
]++]) == 0);
504 /* See whether any or both strings are empty. */
505 if (seq1len
== 0 || seq2len
== 0)
507 if (seq1len
== seq2len
)
508 /* Both ended. So far so good, both strings are equal
512 /* This means one string is shorter than the other. Find out
513 which one and return an appropriate value. */
514 result
= seq1len
== 0 ? -1 : 1;
515 goto free_and_return
;
518 /* Test for position if necessary. */
519 if (position
&& val1
!= val2
)
521 result
= val1
- val2
;
522 goto free_and_return
;
525 /* Compare the two sequences. */
528 if (weights
[idx1arr
[idx1now
]] != weights
[idx2arr
[idx2now
]])
530 /* The sequences differ. */
531 result
= (weights
[idx1arr
[idx1now
]]
532 - weights
[idx2arr
[idx2now
]]);
533 goto free_and_return
;
536 /* Increment the offsets. */
543 while (seq1len
> 0 && seq2len
> 0);
545 if (position
&& seq1len
!= seq2len
)
547 result
= seq1len
- seq2len
;
548 goto free_and_return
;
553 /* Free the memory if needed. */