Update copyright dates with scripts/update-copyrights.
[glibc.git] / string / strcoll_l.c
blob85422bdd8700265b3284bb39f5e865cc56ce0617
1 /* Copyright (C) 1995-2015 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Written by Ulrich Drepper <drepper@gnu.org>, 1995.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
20 #include <assert.h>
21 #include <langinfo.h>
22 #include <locale.h>
23 #include <stddef.h>
24 #include <stdint.h>
25 #include <string.h>
26 #include <sys/param.h>
28 #ifndef STRING_TYPE
29 # define STRING_TYPE char
30 # define USTRING_TYPE unsigned char
31 # define STRCOLL __strcoll_l
32 # define STRCMP strcmp
33 # define STRLEN strlen
34 # define WEIGHT_H "../locale/weight.h"
35 # define SUFFIX MB
36 # define L(arg) arg
37 #endif
39 #define CONCAT(a,b) CONCAT1(a,b)
40 #define CONCAT1(a,b) a##b
42 #include "../locale/localeinfo.h"
43 #include WEIGHT_H
45 /* Track status while looking for sequences in a string. */
46 typedef struct
48 int len; /* Length of the current sequence. */
49 size_t val; /* Position of the sequence relative to the
50 previous non-ignored sequence. */
51 size_t idxnow; /* Current index in sequences. */
52 size_t idxmax; /* Maximum index in sequences. */
53 size_t idxcnt; /* Current count of indices. */
54 size_t backw; /* Current Backward sequence index. */
55 size_t backw_stop; /* Index where the backward sequences stop. */
56 const USTRING_TYPE *us; /* The string. */
57 unsigned char rule; /* Saved rule for the first sequence. */
58 int32_t idx; /* Index to weight of the current sequence. */
59 int32_t save_idx; /* Save looked up index of a forward
60 sequence after the last backward
61 sequence. */
62 const USTRING_TYPE *back_us; /* Beginning of the backward sequence. */
63 } coll_seq;
65 /* Get next sequence. Traverse the string as required. */
66 static __always_inline void
67 get_next_seq (coll_seq *seq, int nrules, const unsigned char *rulesets,
68 const USTRING_TYPE *weights, const int32_t *table,
69 const USTRING_TYPE *extra, const int32_t *indirect,
70 int pass)
72 size_t val = seq->val = 0;
73 int len = seq->len;
74 size_t backw_stop = seq->backw_stop;
75 size_t backw = seq->backw;
76 size_t idxcnt = seq->idxcnt;
77 size_t idxmax = seq->idxmax;
78 int32_t idx = seq->idx;
79 const USTRING_TYPE *us = seq->us;
81 while (len == 0)
83 ++val;
84 if (backw_stop != ~0ul)
86 /* There is something pushed. */
87 if (backw == backw_stop)
89 /* The last pushed character was handled. Continue
90 with forward characters. */
91 if (idxcnt < idxmax)
93 idx = seq->save_idx;
94 backw_stop = ~0ul;
96 else
98 /* Nothing anymore. The backward sequence ended with
99 the last sequence in the string. Note that len is
100 still zero. */
101 idx = 0;
102 break;
105 else
107 /* XXX Traverse BACKW sequences from the beginning of
108 BACKW_STOP to get the next sequence. Is ther a quicker way
109 to do this? */
110 size_t i = backw_stop;
111 us = seq->back_us;
112 while (i < backw)
114 int32_t tmp = findidx (table, indirect, extra, &us, -1);
115 idx = tmp & 0xffffff;
116 i++;
118 --backw;
119 us = seq->us;
122 else
124 backw_stop = idxmax;
125 int32_t prev_idx = idx;
127 while (*us != L('\0'))
129 int32_t tmp = findidx (table, indirect, extra, &us, -1);
130 unsigned char rule = tmp >> 24;
131 prev_idx = idx;
132 idx = tmp & 0xffffff;
133 idxcnt = idxmax++;
135 /* Save the rule for the first sequence. */
136 if (__glibc_unlikely (idxcnt == 0))
137 seq->rule = rule;
139 if ((rulesets[rule * nrules + pass]
140 & sort_backward) == 0)
141 /* No more backward characters to push. */
142 break;
143 ++idxcnt;
146 if (backw_stop >= idxcnt)
148 /* No sequence at all or just one. */
149 if (idxcnt == idxmax || backw_stop > idxcnt)
150 /* Note that len is still zero. */
151 break;
153 backw_stop = ~0ul;
155 else
157 /* We pushed backward sequences. If the stream ended with the
158 backward sequence, then we process the last sequence we
159 found. Otherwise we process the sequence before the last
160 one since the last one was a forward sequence. */
161 seq->back_us = seq->us;
162 seq->us = us;
163 backw = idxcnt;
164 if (idxmax > idxcnt)
166 backw--;
167 seq->save_idx = idx;
168 idx = prev_idx;
170 if (backw > backw_stop)
171 backw--;
175 len = weights[idx++];
176 /* Skip over indices of previous levels. */
177 for (int i = 0; i < pass; i++)
179 idx += len;
180 len = weights[idx];
181 idx++;
185 /* Update the structure. */
186 seq->val = val;
187 seq->len = len;
188 seq->backw_stop = backw_stop;
189 seq->backw = backw;
190 seq->idxcnt = idxcnt;
191 seq->idxmax = idxmax;
192 seq->us = us;
193 seq->idx = idx;
196 /* Compare two sequences. */
197 static __always_inline int
198 do_compare (coll_seq *seq1, coll_seq *seq2, int position,
199 const USTRING_TYPE *weights)
201 int seq1len = seq1->len;
202 int seq2len = seq2->len;
203 size_t val1 = seq1->val;
204 size_t val2 = seq2->val;
205 int idx1 = seq1->idx;
206 int idx2 = seq2->idx;
207 int result = 0;
209 /* Test for position if necessary. */
210 if (position && val1 != val2)
212 result = val1 > val2 ? 1 : -1;
213 goto out;
216 /* Compare the two sequences. */
219 if (weights[idx1] != weights[idx2])
221 /* The sequences differ. */
222 result = weights[idx1] - weights[idx2];
223 goto out;
226 /* Increment the offsets. */
227 ++idx1;
228 ++idx2;
230 --seq1len;
231 --seq2len;
233 while (seq1len > 0 && seq2len > 0);
235 if (position && seq1len != seq2len)
236 result = seq1len - seq2len;
238 out:
239 seq1->len = seq1len;
240 seq2->len = seq2len;
241 seq1->idx = idx1;
242 seq2->idx = idx2;
243 return result;
247 STRCOLL (const STRING_TYPE *s1, const STRING_TYPE *s2, __locale_t l)
249 struct __locale_data *current = l->__locales[LC_COLLATE];
250 uint_fast32_t nrules = current->values[_NL_ITEM_INDEX (_NL_COLLATE_NRULES)].word;
251 /* We don't assign the following values right away since it might be
252 unnecessary in case there are no rules. */
253 const unsigned char *rulesets;
254 const int32_t *table;
255 const USTRING_TYPE *weights;
256 const USTRING_TYPE *extra;
257 const int32_t *indirect;
259 if (nrules == 0)
260 return STRCMP (s1, s2);
262 /* Catch empty strings. */
263 if (__glibc_unlikely (*s1 == '\0') || __glibc_unlikely (*s2 == '\0'))
264 return (*s1 != '\0') - (*s2 != '\0');
266 rulesets = (const unsigned char *)
267 current->values[_NL_ITEM_INDEX (_NL_COLLATE_RULESETS)].string;
268 table = (const int32_t *)
269 current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_TABLE,SUFFIX))].string;
270 weights = (const USTRING_TYPE *)
271 current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_WEIGHT,SUFFIX))].string;
272 extra = (const USTRING_TYPE *)
273 current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_EXTRA,SUFFIX))].string;
274 indirect = (const int32_t *)
275 current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_INDIRECT,SUFFIX))].string;
277 assert (((uintptr_t) table) % __alignof__ (table[0]) == 0);
278 assert (((uintptr_t) weights) % __alignof__ (weights[0]) == 0);
279 assert (((uintptr_t) extra) % __alignof__ (extra[0]) == 0);
280 assert (((uintptr_t) indirect) % __alignof__ (indirect[0]) == 0);
282 int result = 0, rule = 0;
284 coll_seq seq1, seq2;
285 memset (&seq1, 0, sizeof (seq1));
286 seq2 = seq1;
288 for (int pass = 0; pass < nrules; ++pass)
290 seq1.idxcnt = 0;
291 seq1.idx = 0;
292 seq2.idx = 0;
293 seq1.backw_stop = ~0ul;
294 seq1.backw = ~0ul;
295 seq2.idxcnt = 0;
296 seq2.backw_stop = ~0ul;
297 seq2.backw = ~0ul;
299 /* We need the elements of the strings as unsigned values since they
300 are used as indices. */
301 seq1.us = (const USTRING_TYPE *) s1;
302 seq2.us = (const USTRING_TYPE *) s2;
304 /* We assume that if a rule has defined `position' in one section
305 this is true for all of them. Please note that the localedef programs
306 makes sure that `position' is not used at the first level. */
308 int position = rulesets[rule * nrules + pass] & sort_position;
310 while (1)
312 get_next_seq (&seq1, nrules, rulesets, weights, table,
313 extra, indirect, pass);
314 get_next_seq (&seq2, nrules, rulesets, weights, table,
315 extra, indirect, pass);
316 /* See whether any or both strings are empty. */
317 if (seq1.len == 0 || seq2.len == 0)
319 if (seq1.len == seq2.len)
321 /* Both strings ended and are equal at this level. Do a
322 byte-level comparison to ensure that we don't waste time
323 going through multiple passes for totally equal strings
324 before proceeding to subsequent passes. */
325 if (pass == 0 && STRCMP (s1, s2) == 0)
326 return result;
327 else
328 break;
331 /* This means one string is shorter than the other. Find out
332 which one and return an appropriate value. */
333 return seq1.len == 0 ? -1 : 1;
336 result = do_compare (&seq1, &seq2, position, weights);
337 if (result != 0)
338 return result;
341 rule = seq1.rule;
344 return result;
346 libc_hidden_def (STRCOLL)
348 #ifndef WIDE_CHAR_VERSION
349 weak_alias (__strcoll_l, strcoll_l)
350 #endif