Cleanup: add missing #include's
[glibc.git] / string / strcoll_l.c
blob658d5b9b906a23edcd775f85998b06b5516bf1c0
1 /* Copyright (C) 1995-2015 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Written by Ulrich Drepper <drepper@gnu.org>, 1995.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
20 #include <assert.h>
21 #include <langinfo.h>
22 #include <locale.h>
23 #include <stddef.h>
24 #include <stdint.h>
25 #include <string.h>
26 #include <sys/param.h>
28 #ifndef STRING_TYPE
29 # define STRING_TYPE char
30 # define USTRING_TYPE unsigned char
31 # define STRCOLL __strcoll_l
32 # define STRCMP strcmp
33 # define WEIGHT_H "../locale/weight.h"
34 # define SUFFIX MB
35 # define L(arg) arg
36 #endif
38 #define CONCAT(a,b) CONCAT1(a,b)
39 #define CONCAT1(a,b) a##b
41 #include "../locale/localeinfo.h"
42 #include WEIGHT_H
44 /* Track status while looking for sequences in a string. */
45 typedef struct
47 int len; /* Length of the current sequence. */
48 size_t val; /* Position of the sequence relative to the
49 previous non-ignored sequence. */
50 size_t idxnow; /* Current index in sequences. */
51 size_t idxmax; /* Maximum index in sequences. */
52 size_t idxcnt; /* Current count of indices. */
53 size_t backw; /* Current Backward sequence index. */
54 size_t backw_stop; /* Index where the backward sequences stop. */
55 const USTRING_TYPE *us; /* The string. */
56 unsigned char rule; /* Saved rule for the first sequence. */
57 int32_t idx; /* Index to weight of the current sequence. */
58 int32_t save_idx; /* Save looked up index of a forward
59 sequence after the last backward
60 sequence. */
61 const USTRING_TYPE *back_us; /* Beginning of the backward sequence. */
62 } coll_seq;
64 /* Get next sequence. Traverse the string as required. */
65 static __always_inline void
66 get_next_seq (coll_seq *seq, int nrules, const unsigned char *rulesets,
67 const USTRING_TYPE *weights, const int32_t *table,
68 const USTRING_TYPE *extra, const int32_t *indirect,
69 int pass)
71 size_t val = seq->val = 0;
72 int len = seq->len;
73 size_t backw_stop = seq->backw_stop;
74 size_t backw = seq->backw;
75 size_t idxcnt = seq->idxcnt;
76 size_t idxmax = seq->idxmax;
77 int32_t idx = seq->idx;
78 const USTRING_TYPE *us = seq->us;
80 while (len == 0)
82 ++val;
83 if (backw_stop != ~0ul)
85 /* There is something pushed. */
86 if (backw == backw_stop)
88 /* The last pushed character was handled. Continue
89 with forward characters. */
90 if (idxcnt < idxmax)
92 idx = seq->save_idx;
93 backw_stop = ~0ul;
95 else
97 /* Nothing anymore. The backward sequence ended with
98 the last sequence in the string. Note that len is
99 still zero. */
100 idx = 0;
101 break;
104 else
106 /* XXX Traverse BACKW sequences from the beginning of
107 BACKW_STOP to get the next sequence. Is ther a quicker way
108 to do this? */
109 size_t i = backw_stop;
110 us = seq->back_us;
111 while (i < backw)
113 int32_t tmp = findidx (table, indirect, extra, &us, -1);
114 idx = tmp & 0xffffff;
115 i++;
117 --backw;
118 us = seq->us;
121 else
123 backw_stop = idxmax;
124 int32_t prev_idx = idx;
126 while (*us != L('\0'))
128 int32_t tmp = findidx (table, indirect, extra, &us, -1);
129 unsigned char rule = tmp >> 24;
130 prev_idx = idx;
131 idx = tmp & 0xffffff;
132 idxcnt = idxmax++;
134 /* Save the rule for the first sequence. */
135 if (__glibc_unlikely (idxcnt == 0))
136 seq->rule = rule;
138 if ((rulesets[rule * nrules + pass]
139 & sort_backward) == 0)
140 /* No more backward characters to push. */
141 break;
142 ++idxcnt;
145 if (backw_stop >= idxcnt)
147 /* No sequence at all or just one. */
148 if (idxcnt == idxmax || backw_stop > idxcnt)
149 /* Note that len is still zero. */
150 break;
152 backw_stop = ~0ul;
154 else
156 /* We pushed backward sequences. If the stream ended with the
157 backward sequence, then we process the last sequence we
158 found. Otherwise we process the sequence before the last
159 one since the last one was a forward sequence. */
160 seq->back_us = seq->us;
161 seq->us = us;
162 backw = idxcnt;
163 if (idxmax > idxcnt)
165 backw--;
166 seq->save_idx = idx;
167 idx = prev_idx;
169 if (backw > backw_stop)
170 backw--;
174 len = weights[idx++];
175 /* Skip over indices of previous levels. */
176 for (int i = 0; i < pass; i++)
178 idx += len;
179 len = weights[idx];
180 idx++;
184 /* Update the structure. */
185 seq->val = val;
186 seq->len = len;
187 seq->backw_stop = backw_stop;
188 seq->backw = backw;
189 seq->idxcnt = idxcnt;
190 seq->idxmax = idxmax;
191 seq->us = us;
192 seq->idx = idx;
195 /* Compare two sequences. */
196 static __always_inline int
197 do_compare (coll_seq *seq1, coll_seq *seq2, int position,
198 const USTRING_TYPE *weights)
200 int seq1len = seq1->len;
201 int seq2len = seq2->len;
202 size_t val1 = seq1->val;
203 size_t val2 = seq2->val;
204 int idx1 = seq1->idx;
205 int idx2 = seq2->idx;
206 int result = 0;
208 /* Test for position if necessary. */
209 if (position && val1 != val2)
211 result = val1 > val2 ? 1 : -1;
212 goto out;
215 /* Compare the two sequences. */
218 if (weights[idx1] != weights[idx2])
220 /* The sequences differ. */
221 result = weights[idx1] - weights[idx2];
222 goto out;
225 /* Increment the offsets. */
226 ++idx1;
227 ++idx2;
229 --seq1len;
230 --seq2len;
232 while (seq1len > 0 && seq2len > 0);
234 if (position && seq1len != seq2len)
235 result = seq1len - seq2len;
237 out:
238 seq1->len = seq1len;
239 seq2->len = seq2len;
240 seq1->idx = idx1;
241 seq2->idx = idx2;
242 return result;
246 STRCOLL (const STRING_TYPE *s1, const STRING_TYPE *s2, __locale_t l)
248 struct __locale_data *current = l->__locales[LC_COLLATE];
249 uint_fast32_t nrules = current->values[_NL_ITEM_INDEX (_NL_COLLATE_NRULES)].word;
250 /* We don't assign the following values right away since it might be
251 unnecessary in case there are no rules. */
252 const unsigned char *rulesets;
253 const int32_t *table;
254 const USTRING_TYPE *weights;
255 const USTRING_TYPE *extra;
256 const int32_t *indirect;
258 if (nrules == 0)
259 return STRCMP (s1, s2);
261 /* Catch empty strings. */
262 if (__glibc_unlikely (*s1 == '\0') || __glibc_unlikely (*s2 == '\0'))
263 return (*s1 != '\0') - (*s2 != '\0');
265 rulesets = (const unsigned char *)
266 current->values[_NL_ITEM_INDEX (_NL_COLLATE_RULESETS)].string;
267 table = (const int32_t *)
268 current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_TABLE,SUFFIX))].string;
269 weights = (const USTRING_TYPE *)
270 current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_WEIGHT,SUFFIX))].string;
271 extra = (const USTRING_TYPE *)
272 current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_EXTRA,SUFFIX))].string;
273 indirect = (const int32_t *)
274 current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_INDIRECT,SUFFIX))].string;
276 assert (((uintptr_t) table) % __alignof__ (table[0]) == 0);
277 assert (((uintptr_t) weights) % __alignof__ (weights[0]) == 0);
278 assert (((uintptr_t) extra) % __alignof__ (extra[0]) == 0);
279 assert (((uintptr_t) indirect) % __alignof__ (indirect[0]) == 0);
281 int result = 0, rule = 0;
283 coll_seq seq1, seq2;
284 memset (&seq1, 0, sizeof (seq1));
285 seq2 = seq1;
287 for (int pass = 0; pass < nrules; ++pass)
289 seq1.idxcnt = 0;
290 seq1.idx = 0;
291 seq2.idx = 0;
292 seq1.backw_stop = ~0ul;
293 seq1.backw = ~0ul;
294 seq2.idxcnt = 0;
295 seq2.backw_stop = ~0ul;
296 seq2.backw = ~0ul;
298 /* We need the elements of the strings as unsigned values since they
299 are used as indices. */
300 seq1.us = (const USTRING_TYPE *) s1;
301 seq2.us = (const USTRING_TYPE *) s2;
303 /* We assume that if a rule has defined `position' in one section
304 this is true for all of them. Please note that the localedef programs
305 makes sure that `position' is not used at the first level. */
307 int position = rulesets[rule * nrules + pass] & sort_position;
309 while (1)
311 get_next_seq (&seq1, nrules, rulesets, weights, table,
312 extra, indirect, pass);
313 get_next_seq (&seq2, nrules, rulesets, weights, table,
314 extra, indirect, pass);
315 /* See whether any or both strings are empty. */
316 if (seq1.len == 0 || seq2.len == 0)
318 if (seq1.len == seq2.len)
320 /* Both strings ended and are equal at this level. Do a
321 byte-level comparison to ensure that we don't waste time
322 going through multiple passes for totally equal strings
323 before proceeding to subsequent passes. */
324 if (pass == 0 && STRCMP (s1, s2) == 0)
325 return result;
326 else
327 break;
330 /* This means one string is shorter than the other. Find out
331 which one and return an appropriate value. */
332 return seq1.len == 0 ? -1 : 1;
335 result = do_compare (&seq1, &seq2, position, weights);
336 if (result != 0)
337 return result;
340 rule = seq1.rule;
343 return result;
345 libc_hidden_def (STRCOLL)
347 #ifndef WIDE_CHAR_VERSION
348 weak_alias (__strcoll_l, strcoll_l)
349 #endif