tst-fanotify: skip when we get back EPERM
[glibc.git] / string / strcoll_l.c
blob50ed84d281caea029673243cb99c43166cd0048f
1 /* Copyright (C) 1995-2013 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Written by Ulrich Drepper <drepper@gnu.org>, 1995.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
20 #include <assert.h>
21 #include <langinfo.h>
22 #include <locale.h>
23 #include <stddef.h>
24 #include <stdint.h>
25 #include <stdlib.h>
26 #include <string.h>
28 #ifndef STRING_TYPE
29 # define STRING_TYPE char
30 # define USTRING_TYPE unsigned char
31 # define STRCOLL __strcoll_l
32 # define STRCMP strcmp
33 # define STRLEN strlen
34 # define WEIGHT_H "../locale/weight.h"
35 # define SUFFIX MB
36 # define L(arg) arg
37 #endif
39 #define CONCAT(a,b) CONCAT1(a,b)
40 #define CONCAT1(a,b) a##b
42 #include "../locale/localeinfo.h"
44 /* Track status while looking for sequences in a string. */
45 typedef struct
47 int len; /* Length of the current sequence. */
48 int val; /* Position of the sequence relative to the
49 previous non-ignored sequence. */
50 size_t idxnow; /* Current index in sequences. */
51 size_t idxmax; /* Maximum index in sequences. */
52 size_t idxcnt; /* Current count of indices. */
53 size_t backw; /* Current Backward sequence index. */
54 size_t backw_stop; /* Index where the backward sequences stop. */
55 const USTRING_TYPE *us; /* The string. */
56 int32_t *idxarr; /* Array to cache weight indices. */
57 unsigned char *rulearr; /* Array to cache rules. */
58 } coll_seq;
60 /* Get next sequence. The weight indices are cached, so we don't need to
61 traverse the string. */
62 static void
63 get_next_seq_cached (coll_seq *seq, int nrules, int pass,
64 const unsigned char *rulesets,
65 const USTRING_TYPE *weights)
67 int val = seq->val = 0;
68 int len = seq->len;
69 size_t backw_stop = seq->backw_stop;
70 size_t backw = seq->backw;
71 size_t idxcnt = seq->idxcnt;
72 size_t idxmax = seq->idxmax;
73 size_t idxnow = seq->idxnow;
74 unsigned char *rulearr = seq->rulearr;
75 int32_t *idxarr = seq->idxarr;
77 while (len == 0)
79 ++val;
80 if (backw_stop != ~0ul)
82 /* There is something pushed. */
83 if (backw == backw_stop)
85 /* The last pushed character was handled. Continue
86 with forward characters. */
87 if (idxcnt < idxmax)
89 idxnow = idxcnt;
90 backw_stop = ~0ul;
92 else
94 /* Nothing any more. The backward sequence
95 ended with the last sequence in the string. */
96 idxnow = ~0ul;
97 break;
100 else
101 idxnow = --backw;
103 else
105 backw_stop = idxcnt;
107 while (idxcnt < idxmax)
109 if ((rulesets[rulearr[idxcnt] * nrules + pass]
110 & sort_backward) == 0)
111 /* No more backward characters to push. */
112 break;
113 ++idxcnt;
116 if (backw_stop == idxcnt)
118 /* No sequence at all or just one. */
119 if (idxcnt == idxmax)
120 /* Note that LEN is still zero. */
121 break;
123 backw_stop = ~0ul;
124 idxnow = idxcnt++;
126 else
127 /* We pushed backward sequences. */
128 idxnow = backw = idxcnt - 1;
130 len = weights[idxarr[idxnow]++];
133 /* Update the structure. */
134 seq->val = val;
135 seq->len = len;
136 seq->backw_stop = backw_stop;
137 seq->backw = backw;
138 seq->idxcnt = idxcnt;
139 seq->idxnow = idxnow;
142 /* Get next sequence. Traverse the string as required. */
143 static void
144 get_next_seq (coll_seq *seq, int nrules, const unsigned char *rulesets,
145 const USTRING_TYPE *weights, const int32_t *table,
146 const USTRING_TYPE *extra, const int32_t *indirect)
148 #include WEIGHT_H
149 int val = seq->val = 0;
150 int len = seq->len;
151 size_t backw_stop = seq->backw_stop;
152 size_t backw = seq->backw;
153 size_t idxcnt = seq->idxcnt;
154 size_t idxmax = seq->idxmax;
155 size_t idxnow = seq->idxnow;
156 unsigned char *rulearr = seq->rulearr;
157 int32_t *idxarr = seq->idxarr;
158 const USTRING_TYPE *us = seq->us;
160 while (len == 0)
162 ++val;
163 if (backw_stop != ~0ul)
165 /* The is something pushed. */
166 if (backw == backw_stop)
168 /* The last pushed character was handled. Continue
169 with forward characters. */
170 if (idxcnt < idxmax)
172 idxnow = idxcnt;
173 backw_stop = ~0ul;
175 else
176 /* Nothing any more. The backward sequence ended with
177 the last sequence in the string. Note that LEN
178 is still zero. */
179 break;
181 else
182 idxnow = --backw;
184 else
186 backw_stop = idxmax;
188 while (*us != L('\0'))
190 int32_t tmp = findidx (&us, -1);
191 rulearr[idxmax] = tmp >> 24;
192 idxarr[idxmax] = tmp & 0xffffff;
193 idxcnt = idxmax++;
195 if ((rulesets[rulearr[idxcnt] * nrules]
196 & sort_backward) == 0)
197 /* No more backward characters to push. */
198 break;
199 ++idxcnt;
202 if (backw_stop >= idxcnt)
204 /* No sequence at all or just one. */
205 if (idxcnt == idxmax || backw_stop > idxcnt)
206 /* Note that LEN is still zero. */
207 break;
209 backw_stop = ~0ul;
210 idxnow = idxcnt;
212 else
213 /* We pushed backward sequences. */
214 idxnow = backw = idxcnt - 1;
216 len = weights[idxarr[idxnow]++];
219 /* Update the structure. */
220 seq->val = val;
221 seq->len = len;
222 seq->backw_stop = backw_stop;
223 seq->backw = backw;
224 seq->idxcnt = idxcnt;
225 seq->idxmax = idxmax;
226 seq->idxnow = idxnow;
227 seq->us = us;
230 /* Compare two sequences. */
231 static int
232 do_compare (coll_seq *seq1, coll_seq *seq2, int position,
233 const USTRING_TYPE *weights)
235 int seq1len = seq1->len;
236 int seq2len = seq2->len;
237 int val1 = seq1->val;
238 int val2 = seq2->val;
239 int32_t *idx1arr = seq1->idxarr;
240 int32_t *idx2arr = seq2->idxarr;
241 int idx1now = seq1->idxnow;
242 int idx2now = seq2->idxnow;
243 int result = 0;
245 /* Test for position if necessary. */
246 if (position && val1 != val2)
248 result = val1 - val2;
249 goto out;
252 /* Compare the two sequences. */
255 if (weights[idx1arr[idx1now]] != weights[idx2arr[idx2now]])
257 /* The sequences differ. */
258 result = weights[idx1arr[idx1now]] - weights[idx2arr[idx2now]];
259 goto out;
262 /* Increment the offsets. */
263 ++idx1arr[idx1now];
264 ++idx2arr[idx2now];
266 --seq1len;
267 --seq2len;
269 while (seq1len > 0 && seq2len > 0);
271 if (position && seq1len != seq2len)
272 result = seq1len - seq2len;
274 out:
275 seq1->len = seq1len;
276 seq2->len = seq2len;
277 return result;
281 STRCOLL (const STRING_TYPE *s1, const STRING_TYPE *s2, __locale_t l)
283 struct __locale_data *current = l->__locales[LC_COLLATE];
284 uint_fast32_t nrules = current->values[_NL_ITEM_INDEX (_NL_COLLATE_NRULES)].word;
285 /* We don't assign the following values right away since it might be
286 unnecessary in case there are no rules. */
287 const unsigned char *rulesets;
288 const int32_t *table;
289 const USTRING_TYPE *weights;
290 const USTRING_TYPE *extra;
291 const int32_t *indirect;
293 if (nrules == 0)
294 return STRCMP (s1, s2);
296 rulesets = (const unsigned char *)
297 current->values[_NL_ITEM_INDEX (_NL_COLLATE_RULESETS)].string;
298 table = (const int32_t *)
299 current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_TABLE,SUFFIX))].string;
300 weights = (const USTRING_TYPE *)
301 current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_WEIGHT,SUFFIX))].string;
302 extra = (const USTRING_TYPE *)
303 current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_EXTRA,SUFFIX))].string;
304 indirect = (const int32_t *)
305 current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_INDIRECT,SUFFIX))].string;
307 assert (((uintptr_t) table) % __alignof__ (table[0]) == 0);
308 assert (((uintptr_t) weights) % __alignof__ (weights[0]) == 0);
309 assert (((uintptr_t) extra) % __alignof__ (extra[0]) == 0);
310 assert (((uintptr_t) indirect) % __alignof__ (indirect[0]) == 0);
312 /* We need this a few times. */
313 size_t s1len = STRLEN (s1);
314 size_t s2len = STRLEN (s2);
316 /* Catch empty strings. */
317 if (__glibc_unlikely (s1len == 0) || __glibc_unlikely (s2len == 0))
318 return (s1len != 0) - (s2len != 0);
320 /* Perform the first pass over the string and while doing this find
321 and store the weights for each character. Since we want this to
322 be as fast as possible we are using `alloca' to store the temporary
323 values. But since there is no limit on the length of the string
324 we have to use `malloc' if the string is too long. We should be
325 very conservative here.
327 Please note that the localedef programs makes sure that `position'
328 is not used at the first level. */
330 coll_seq seq1, seq2;
331 bool use_malloc = false;
332 int result = 0;
334 memset (&seq1, 0, sizeof (seq1));
335 seq2 = seq1;
337 /* We need the elements of the strings as unsigned values since they
338 are used as indices. */
339 seq1.us = (const USTRING_TYPE *) s1;
340 seq2.us = (const USTRING_TYPE *) s2;
342 if (! __libc_use_alloca ((s1len + s2len) * (sizeof (int32_t) + 1)))
344 seq1.idxarr = (int32_t *) malloc ((s1len + s2len) * (sizeof (int32_t) + 1));
345 seq2.idxarr = &seq1.idxarr[s1len];
346 seq1.rulearr = (unsigned char *) &seq2.idxarr[s2len];
347 seq2.rulearr = &seq1.rulearr[s1len];
349 if (seq1.idxarr == NULL)
350 /* No memory. Well, go with the stack then.
352 XXX Once this implementation is stable we will handle this
353 differently. Instead of precomputing the indices we will
354 do this in time. This means, though, that this happens for
355 every pass again. */
356 goto try_stack;
357 use_malloc = true;
359 else
361 try_stack:
362 seq1.idxarr = (int32_t *) alloca (s1len * sizeof (int32_t));
363 seq2.idxarr = (int32_t *) alloca (s2len * sizeof (int32_t));
364 seq1.rulearr = (unsigned char *) alloca (s1len);
365 seq2.rulearr = (unsigned char *) alloca (s2len);
368 seq1.rulearr[0] = 0;
370 /* Cache values in the first pass and if needed, use them in subsequent
371 passes. */
372 for (int pass = 0; pass < nrules; ++pass)
374 seq1.idxcnt = 0;
375 seq1.backw_stop = ~0ul;
376 seq1.backw = ~0ul;
377 seq2.idxcnt = 0;
378 seq2.backw_stop = ~0ul;
379 seq2.backw = ~0ul;
381 /* We assume that if a rule has defined `position' in one section
382 this is true for all of them. */
383 int position = rulesets[seq1.rulearr[0] * nrules + pass] & sort_position;
385 while (1)
387 if (pass == 0)
389 get_next_seq (&seq1, nrules, rulesets, weights, table, extra,
390 indirect);
391 get_next_seq (&seq2, nrules, rulesets, weights, table, extra,
392 indirect);
394 else
396 get_next_seq_cached (&seq1, nrules, pass, rulesets, weights);
397 get_next_seq_cached (&seq2, nrules, pass, rulesets, weights);
400 /* See whether any or both strings are empty. */
401 if (seq1.len == 0 || seq2.len == 0)
403 if (seq1.len == seq2.len)
404 /* Both ended. So far so good, both strings are equal
405 at this level. */
406 break;
408 /* This means one string is shorter than the other. Find out
409 which one and return an appropriate value. */
410 result = seq1.len == 0 ? -1 : 1;
411 goto free_and_return;
414 result = do_compare (&seq1, &seq2, position, weights);
415 if (result != 0)
416 goto free_and_return;
420 /* Free the memory if needed. */
421 free_and_return:
422 if (use_malloc)
423 free (seq1.idxarr);
425 return result;
427 libc_hidden_def (STRCOLL)
429 #ifndef WIDE_CHAR_VERSION
430 weak_alias (__strcoll_l, strcoll_l)
431 #endif