1 /* Copyright (C) 1995-2014 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Written by Ulrich Drepper <drepper@gnu.org>, 1995.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
26 #include <sys/param.h>
29 # define STRING_TYPE char
30 # define USTRING_TYPE unsigned char
31 # define STRCOLL __strcoll_l
32 # define STRCMP strcmp
33 # define STRLEN strlen
34 # define WEIGHT_H "../locale/weight.h"
39 #define CONCAT(a,b) CONCAT1(a,b)
40 #define CONCAT1(a,b) a##b
42 #include "../locale/localeinfo.h"
45 /* Track status while looking for sequences in a string. */
48 int len
; /* Length of the current sequence. */
49 size_t val
; /* Position of the sequence relative to the
50 previous non-ignored sequence. */
51 size_t idxnow
; /* Current index in sequences. */
52 size_t idxmax
; /* Maximum index in sequences. */
53 size_t idxcnt
; /* Current count of indices. */
54 size_t backw
; /* Current Backward sequence index. */
55 size_t backw_stop
; /* Index where the backward sequences stop. */
56 const USTRING_TYPE
*us
; /* The string. */
57 unsigned char rule
; /* Saved rule for the first sequence. */
58 int32_t idx
; /* Index to weight of the current sequence. */
59 int32_t save_idx
; /* Save looked up index of a forward
60 sequence after the last backward
62 const USTRING_TYPE
*back_us
; /* Beginning of the backward sequence. */
65 /* Get next sequence. Traverse the string as required. */
67 get_next_seq (coll_seq
*seq
, int nrules
, const unsigned char *rulesets
,
68 const USTRING_TYPE
*weights
, const int32_t *table
,
69 const USTRING_TYPE
*extra
, const int32_t *indirect
,
72 size_t val
= seq
->val
= 0;
74 size_t backw_stop
= seq
->backw_stop
;
75 size_t backw
= seq
->backw
;
76 size_t idxcnt
= seq
->idxcnt
;
77 size_t idxmax
= seq
->idxmax
;
78 int32_t idx
= seq
->idx
;
79 const USTRING_TYPE
*us
= seq
->us
;
84 if (backw_stop
!= ~0ul)
86 /* There is something pushed. */
87 if (backw
== backw_stop
)
89 /* The last pushed character was handled. Continue
90 with forward characters. */
98 /* Nothing anymore. The backward sequence ended with
99 the last sequence in the string. Note that len is
107 /* XXX Traverse BACKW sequences from the beginning of
108 BACKW_STOP to get the next sequence. Is ther a quicker way
110 size_t i
= backw_stop
;
114 int32_t tmp
= findidx (table
, indirect
, extra
, &us
, -1);
115 idx
= tmp
& 0xffffff;
125 int32_t prev_idx
= idx
;
127 while (*us
!= L('\0'))
129 int32_t tmp
= findidx (table
, indirect
, extra
, &us
, -1);
130 unsigned char rule
= tmp
>> 24;
132 idx
= tmp
& 0xffffff;
135 /* Save the rule for the first sequence. */
136 if (__glibc_unlikely (idxcnt
== 0))
139 if ((rulesets
[rule
* nrules
+ pass
]
140 & sort_backward
) == 0)
141 /* No more backward characters to push. */
146 if (backw_stop
>= idxcnt
)
148 /* No sequence at all or just one. */
149 if (idxcnt
== idxmax
|| backw_stop
> idxcnt
)
150 /* Note that len is still zero. */
157 /* We pushed backward sequences. If the stream ended with the
158 backward sequence, then we process the last sequence we
159 found. Otherwise we process the sequence before the last
160 one since the last one was a forward sequence. */
161 seq
->back_us
= seq
->us
;
170 if (backw
> backw_stop
)
175 len
= weights
[idx
++];
176 /* Skip over indices of previous levels. */
177 for (int i
= 0; i
< pass
; i
++)
185 /* Update the structure. */
188 seq
->backw_stop
= backw_stop
;
190 seq
->idxcnt
= idxcnt
;
191 seq
->idxmax
= idxmax
;
196 /* Compare two sequences. */
198 do_compare (coll_seq
*seq1
, coll_seq
*seq2
, int position
,
199 const USTRING_TYPE
*weights
)
201 int seq1len
= seq1
->len
;
202 int seq2len
= seq2
->len
;
203 size_t val1
= seq1
->val
;
204 size_t val2
= seq2
->val
;
205 int idx1
= seq1
->idx
;
206 int idx2
= seq2
->idx
;
209 /* Test for position if necessary. */
210 if (position
&& val1
!= val2
)
212 result
= val1
> val2
? 1 : -1;
216 /* Compare the two sequences. */
219 if (weights
[idx1
] != weights
[idx2
])
221 /* The sequences differ. */
222 result
= weights
[idx1
] - weights
[idx2
];
226 /* Increment the offsets. */
233 while (seq1len
> 0 && seq2len
> 0);
235 if (position
&& seq1len
!= seq2len
)
236 result
= seq1len
- seq2len
;
247 STRCOLL (const STRING_TYPE
*s1
, const STRING_TYPE
*s2
, __locale_t l
)
249 struct __locale_data
*current
= l
->__locales
[LC_COLLATE
];
250 uint_fast32_t nrules
= current
->values
[_NL_ITEM_INDEX (_NL_COLLATE_NRULES
)].word
;
251 /* We don't assign the following values right away since it might be
252 unnecessary in case there are no rules. */
253 const unsigned char *rulesets
;
254 const int32_t *table
;
255 const USTRING_TYPE
*weights
;
256 const USTRING_TYPE
*extra
;
257 const int32_t *indirect
;
260 return STRCMP (s1
, s2
);
262 /* Catch empty strings. */
263 if (__glibc_unlikely (*s1
== '\0') || __glibc_unlikely (*s2
== '\0'))
264 return (*s1
!= '\0') - (*s2
!= '\0');
266 rulesets
= (const unsigned char *)
267 current
->values
[_NL_ITEM_INDEX (_NL_COLLATE_RULESETS
)].string
;
268 table
= (const int32_t *)
269 current
->values
[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_TABLE
,SUFFIX
))].string
;
270 weights
= (const USTRING_TYPE
*)
271 current
->values
[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_WEIGHT
,SUFFIX
))].string
;
272 extra
= (const USTRING_TYPE
*)
273 current
->values
[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_EXTRA
,SUFFIX
))].string
;
274 indirect
= (const int32_t *)
275 current
->values
[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_INDIRECT
,SUFFIX
))].string
;
277 assert (((uintptr_t) table
) % __alignof__ (table
[0]) == 0);
278 assert (((uintptr_t) weights
) % __alignof__ (weights
[0]) == 0);
279 assert (((uintptr_t) extra
) % __alignof__ (extra
[0]) == 0);
280 assert (((uintptr_t) indirect
) % __alignof__ (indirect
[0]) == 0);
282 int result
= 0, rule
= 0;
285 memset (&seq1
, 0, sizeof (seq1
));
288 for (int pass
= 0; pass
< nrules
; ++pass
)
293 seq1
.backw_stop
= ~0ul;
296 seq2
.backw_stop
= ~0ul;
299 /* We need the elements of the strings as unsigned values since they
300 are used as indices. */
301 seq1
.us
= (const USTRING_TYPE
*) s1
;
302 seq2
.us
= (const USTRING_TYPE
*) s2
;
304 /* We assume that if a rule has defined `position' in one section
305 this is true for all of them. Please note that the localedef programs
306 makes sure that `position' is not used at the first level. */
308 int position
= rulesets
[rule
* nrules
+ pass
] & sort_position
;
312 get_next_seq (&seq1
, nrules
, rulesets
, weights
, table
,
313 extra
, indirect
, pass
);
314 get_next_seq (&seq2
, nrules
, rulesets
, weights
, table
,
315 extra
, indirect
, pass
);
316 /* See whether any or both strings are empty. */
317 if (seq1
.len
== 0 || seq2
.len
== 0)
319 if (seq1
.len
== seq2
.len
)
321 /* Both strings ended and are equal at this level. Do a
322 byte-level comparison to ensure that we don't waste time
323 going through multiple passes for totally equal strings
324 before proceeding to subsequent passes. */
325 if (pass
== 0 && STRCMP (s1
, s2
) == 0)
331 /* This means one string is shorter than the other. Find out
332 which one and return an appropriate value. */
333 return seq1
.len
== 0 ? -1 : 1;
336 result
= do_compare (&seq1
, &seq2
, position
, weights
);
346 libc_hidden_def (STRCOLL
)
348 #ifndef WIDE_CHAR_VERSION
349 weak_alias (__strcoll_l
, strcoll_l
)