Add bug 18604 to the correct section
[glibc.git] / posix / fnmatch_loop.c
blob8d4049d6dcb0ed26076387f6ee7827d1a436aaab
1 /* Copyright (C) 1991-2015 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, see
16 <http://www.gnu.org/licenses/>. */
18 #include <stdint.h>
20 struct STRUCT
22 const CHAR *pattern;
23 const CHAR *string;
24 int no_leading_period;
27 /* Match STRING against the filename pattern PATTERN, returning zero if
28 it matches, nonzero if not. */
29 static int FCT (const CHAR *pattern, const CHAR *string,
30 const CHAR *string_end, int no_leading_period, int flags,
31 struct STRUCT *ends, size_t alloca_used)
32 internal_function;
33 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
34 const CHAR *string_end, int no_leading_period, int flags,
35 size_t alloca_used)
36 internal_function;
37 static const CHAR *END (const CHAR *patternp) internal_function;
39 static int
40 internal_function
41 FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
42 int no_leading_period, int flags, struct STRUCT *ends, size_t alloca_used)
44 const CHAR *p = pattern, *n = string;
45 UCHAR c;
46 #ifdef _LIBC
47 # if WIDE_CHAR_VERSION
48 const char *collseq = (const char *)
49 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
50 # else
51 const UCHAR *collseq = (const UCHAR *)
52 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
53 # endif
54 #endif
56 while ((c = *p++) != L('\0'))
58 int new_no_leading_period = 0;
59 c = FOLD (c);
61 switch (c)
63 case L('?'):
64 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
66 int res = EXT (c, p, n, string_end, no_leading_period,
67 flags, alloca_used);
68 if (res != -1)
69 return res;
72 if (n == string_end)
73 return FNM_NOMATCH;
74 else if (*n == L('/') && (flags & FNM_FILE_NAME))
75 return FNM_NOMATCH;
76 else if (*n == L('.') && no_leading_period)
77 return FNM_NOMATCH;
78 break;
80 case L('\\'):
81 if (!(flags & FNM_NOESCAPE))
83 c = *p++;
84 if (c == L('\0'))
85 /* Trailing \ loses. */
86 return FNM_NOMATCH;
87 c = FOLD (c);
89 if (n == string_end || FOLD ((UCHAR) *n) != c)
90 return FNM_NOMATCH;
91 break;
93 case L('*'):
94 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
96 int res = EXT (c, p, n, string_end, no_leading_period,
97 flags, alloca_used);
98 if (res != -1)
99 return res;
101 else if (ends != NULL)
103 ends->pattern = p - 1;
104 ends->string = n;
105 ends->no_leading_period = no_leading_period;
106 return 0;
109 if (n != string_end && *n == L('.') && no_leading_period)
110 return FNM_NOMATCH;
112 for (c = *p++; c == L('?') || c == L('*'); c = *p++)
114 if (*p == L('(') && (flags & FNM_EXTMATCH) != 0)
116 const CHAR *endp = END (p);
117 if (endp != p)
119 /* This is a pattern. Skip over it. */
120 p = endp;
121 continue;
125 if (c == L('?'))
127 /* A ? needs to match one character. */
128 if (n == string_end)
129 /* There isn't another character; no match. */
130 return FNM_NOMATCH;
131 else if (*n == L('/')
132 && __builtin_expect (flags & FNM_FILE_NAME, 0))
133 /* A slash does not match a wildcard under
134 FNM_FILE_NAME. */
135 return FNM_NOMATCH;
136 else
137 /* One character of the string is consumed in matching
138 this ? wildcard, so *??? won't match if there are
139 less than three characters. */
140 ++n;
144 if (c == L('\0'))
145 /* The wildcard(s) is/are the last element of the pattern.
146 If the name is a file name and contains another slash
147 this means it cannot match, unless the FNM_LEADING_DIR
148 flag is set. */
150 int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
152 if (flags & FNM_FILE_NAME)
154 if (flags & FNM_LEADING_DIR)
155 result = 0;
156 else
158 if (MEMCHR (n, L('/'), string_end - n) == NULL)
159 result = 0;
163 return result;
165 else
167 const CHAR *endp;
168 struct STRUCT end;
170 end.pattern = NULL;
171 endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L('/') : L('\0'),
172 string_end - n);
173 if (endp == NULL)
174 endp = string_end;
176 if (c == L('[')
177 || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
178 && (c == L('@') || c == L('+') || c == L('!'))
179 && *p == L('(')))
181 int flags2 = ((flags & FNM_FILE_NAME)
182 ? flags : (flags & ~FNM_PERIOD));
184 for (--p; n < endp; ++n, no_leading_period = 0)
185 if (FCT (p, n, string_end, no_leading_period, flags2,
186 &end, alloca_used) == 0)
187 goto found;
189 else if (c == L('/') && (flags & FNM_FILE_NAME))
191 while (n < string_end && *n != L('/'))
192 ++n;
193 if (n < string_end && *n == L('/')
194 && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags,
195 NULL, alloca_used) == 0))
196 return 0;
198 else
200 int flags2 = ((flags & FNM_FILE_NAME)
201 ? flags : (flags & ~FNM_PERIOD));
203 if (c == L('\\') && !(flags & FNM_NOESCAPE))
204 c = *p;
205 c = FOLD (c);
206 for (--p; n < endp; ++n, no_leading_period = 0)
207 if (FOLD ((UCHAR) *n) == c
208 && (FCT (p, n, string_end, no_leading_period, flags2,
209 &end, alloca_used) == 0))
211 found:
212 if (end.pattern == NULL)
213 return 0;
214 break;
216 if (end.pattern != NULL)
218 p = end.pattern;
219 n = end.string;
220 no_leading_period = end.no_leading_period;
221 continue;
226 /* If we come here no match is possible with the wildcard. */
227 return FNM_NOMATCH;
229 case L('['):
231 /* Nonzero if the sense of the character class is inverted. */
232 const CHAR *p_init = p;
233 const CHAR *n_init = n;
234 int not;
235 CHAR cold;
236 UCHAR fn;
238 if (posixly_correct == 0)
239 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
241 if (n == string_end)
242 return FNM_NOMATCH;
244 if (*n == L('.') && no_leading_period)
245 return FNM_NOMATCH;
247 if (*n == L('/') && (flags & FNM_FILE_NAME))
248 /* `/' cannot be matched. */
249 return FNM_NOMATCH;
251 not = (*p == L('!') || (posixly_correct < 0 && *p == L('^')));
252 if (not)
253 ++p;
255 fn = FOLD ((UCHAR) *n);
257 c = *p++;
258 for (;;)
260 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
262 if (*p == L('\0'))
263 return FNM_NOMATCH;
264 c = FOLD ((UCHAR) *p);
265 ++p;
267 goto normal_bracket;
269 else if (c == L('[') && *p == L(':'))
271 /* Leave room for the null. */
272 CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
273 size_t c1 = 0;
274 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
275 wctype_t wt;
276 #endif
277 const CHAR *startp = p;
279 for (;;)
281 if (c1 == CHAR_CLASS_MAX_LENGTH)
282 /* The name is too long and therefore the pattern
283 is ill-formed. */
284 return FNM_NOMATCH;
286 c = *++p;
287 if (c == L(':') && p[1] == L(']'))
289 p += 2;
290 break;
292 if (c < L('a') || c >= L('z'))
294 /* This cannot possibly be a character class name.
295 Match it as a normal range. */
296 p = startp;
297 c = L('[');
298 goto normal_bracket;
300 str[c1++] = c;
302 str[c1] = L('\0');
304 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
305 wt = IS_CHAR_CLASS (str);
306 if (wt == 0)
307 /* Invalid character class name. */
308 return FNM_NOMATCH;
310 # if defined _LIBC && ! WIDE_CHAR_VERSION
311 /* The following code is glibc specific but does
312 there a good job in speeding up the code since
313 we can avoid the btowc() call. */
314 if (_ISCTYPE ((UCHAR) *n, wt))
315 goto matched;
316 # else
317 if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
318 goto matched;
319 # endif
320 #else
321 if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n))
322 || (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n))
323 || (STREQ (str, L("blank")) && ISBLANK ((UCHAR) *n))
324 || (STREQ (str, L("cntrl")) && ISCNTRL ((UCHAR) *n))
325 || (STREQ (str, L("digit")) && ISDIGIT ((UCHAR) *n))
326 || (STREQ (str, L("graph")) && ISGRAPH ((UCHAR) *n))
327 || (STREQ (str, L("lower")) && ISLOWER ((UCHAR) *n))
328 || (STREQ (str, L("print")) && ISPRINT ((UCHAR) *n))
329 || (STREQ (str, L("punct")) && ISPUNCT ((UCHAR) *n))
330 || (STREQ (str, L("space")) && ISSPACE ((UCHAR) *n))
331 || (STREQ (str, L("upper")) && ISUPPER ((UCHAR) *n))
332 || (STREQ (str, L("xdigit")) && ISXDIGIT ((UCHAR) *n)))
333 goto matched;
334 #endif
335 c = *p++;
337 #ifdef _LIBC
338 else if (c == L('[') && *p == L('='))
340 /* It's important that STR be a scalar variable rather
341 than a one-element array, because GCC (at least 4.9.2
342 -O2 on x86-64) can be confused by the array and
343 diagnose a "used initialized" in a dead branch in the
344 findidx function. */
345 UCHAR str;
346 uint32_t nrules =
347 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
348 const CHAR *startp = p;
350 c = *++p;
351 if (c == L('\0'))
353 p = startp;
354 c = L('[');
355 goto normal_bracket;
357 str = c;
359 c = *++p;
360 if (c != L('=') || p[1] != L(']'))
362 p = startp;
363 c = L('[');
364 goto normal_bracket;
366 p += 2;
368 if (nrules == 0)
370 if ((UCHAR) *n == str)
371 goto matched;
373 else
375 const int32_t *table;
376 # if WIDE_CHAR_VERSION
377 const int32_t *weights;
378 const wint_t *extra;
379 # else
380 const unsigned char *weights;
381 const unsigned char *extra;
382 # endif
383 const int32_t *indirect;
384 int32_t idx;
385 const UCHAR *cp = (const UCHAR *) &str;
387 # if WIDE_CHAR_VERSION
388 table = (const int32_t *)
389 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
390 weights = (const int32_t *)
391 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
392 extra = (const wint_t *)
393 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
394 indirect = (const int32_t *)
395 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
396 # else
397 table = (const int32_t *)
398 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
399 weights = (const unsigned char *)
400 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
401 extra = (const unsigned char *)
402 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
403 indirect = (const int32_t *)
404 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
405 # endif
407 idx = FINDIDX (table, indirect, extra, &cp, 1);
408 if (idx != 0)
410 /* We found a table entry. Now see whether the
411 character we are currently at has the same
412 equivalance class value. */
413 int len = weights[idx & 0xffffff];
414 int32_t idx2;
415 const UCHAR *np = (const UCHAR *) n;
417 idx2 = FINDIDX (table, indirect, extra,
418 &np, string_end - n);
419 if (idx2 != 0
420 && (idx >> 24) == (idx2 >> 24)
421 && len == weights[idx2 & 0xffffff])
423 int cnt = 0;
425 idx &= 0xffffff;
426 idx2 &= 0xffffff;
428 while (cnt < len
429 && (weights[idx + 1 + cnt]
430 == weights[idx2 + 1 + cnt]))
431 ++cnt;
433 if (cnt == len)
434 goto matched;
439 c = *p++;
441 #endif
442 else if (c == L('\0'))
444 /* [ unterminated, treat as normal character. */
445 p = p_init;
446 n = n_init;
447 c = L('[');
448 goto normal_match;
450 else
452 int is_range = 0;
454 #ifdef _LIBC
455 int is_seqval = 0;
457 if (c == L('[') && *p == L('.'))
459 uint32_t nrules =
460 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
461 const CHAR *startp = p;
462 size_t c1 = 0;
464 while (1)
466 c = *++p;
467 if (c == L('.') && p[1] == L(']'))
469 p += 2;
470 break;
472 if (c == '\0')
473 return FNM_NOMATCH;
474 ++c1;
477 /* We have to handling the symbols differently in
478 ranges since then the collation sequence is
479 important. */
480 is_range = *p == L('-') && p[1] != L('\0');
482 if (nrules == 0)
484 /* There are no names defined in the collation
485 data. Therefore we only accept the trivial
486 names consisting of the character itself. */
487 if (c1 != 1)
488 return FNM_NOMATCH;
490 if (!is_range && *n == startp[1])
491 goto matched;
493 cold = startp[1];
494 c = *p++;
496 else
498 int32_t table_size;
499 const int32_t *symb_table;
500 # if WIDE_CHAR_VERSION
501 char str[c1];
502 unsigned int strcnt;
503 # else
504 # define str (startp + 1)
505 # endif
506 const unsigned char *extra;
507 int32_t idx;
508 int32_t elem;
509 int32_t second;
510 int32_t hash;
512 # if WIDE_CHAR_VERSION
513 /* We have to convert the name to a single-byte
514 string. This is possible since the names
515 consist of ASCII characters and the internal
516 representation is UCS4. */
517 for (strcnt = 0; strcnt < c1; ++strcnt)
518 str[strcnt] = startp[1 + strcnt];
519 #endif
521 table_size =
522 _NL_CURRENT_WORD (LC_COLLATE,
523 _NL_COLLATE_SYMB_HASH_SIZEMB);
524 symb_table = (const int32_t *)
525 _NL_CURRENT (LC_COLLATE,
526 _NL_COLLATE_SYMB_TABLEMB);
527 extra = (const unsigned char *)
528 _NL_CURRENT (LC_COLLATE,
529 _NL_COLLATE_SYMB_EXTRAMB);
531 /* Locate the character in the hashing table. */
532 hash = elem_hash (str, c1);
534 idx = 0;
535 elem = hash % table_size;
536 if (symb_table[2 * elem] != 0)
538 second = hash % (table_size - 2) + 1;
542 /* First compare the hashing value. */
543 if (symb_table[2 * elem] == hash
544 && (c1
545 == extra[symb_table[2 * elem + 1]])
546 && memcmp (str,
547 &extra[symb_table[2 * elem
548 + 1]
549 + 1], c1) == 0)
551 /* Yep, this is the entry. */
552 idx = symb_table[2 * elem + 1];
553 idx += 1 + extra[idx];
554 break;
557 /* Next entry. */
558 elem += second;
560 while (symb_table[2 * elem] != 0);
563 if (symb_table[2 * elem] != 0)
565 /* Compare the byte sequence but only if
566 this is not part of a range. */
567 # if WIDE_CHAR_VERSION
568 int32_t *wextra;
570 idx += 1 + extra[idx];
571 /* Adjust for the alignment. */
572 idx = (idx + 3) & ~3;
574 wextra = (int32_t *) &extra[idx + 4];
575 # endif
577 if (! is_range)
579 # if WIDE_CHAR_VERSION
580 for (c1 = 0;
581 (int32_t) c1 < wextra[idx];
582 ++c1)
583 if (n[c1] != wextra[1 + c1])
584 break;
586 if ((int32_t) c1 == wextra[idx])
587 goto matched;
588 # else
589 for (c1 = 0; c1 < extra[idx]; ++c1)
590 if (n[c1] != extra[1 + c1])
591 break;
593 if (c1 == extra[idx])
594 goto matched;
595 # endif
598 /* Get the collation sequence value. */
599 is_seqval = 1;
600 # if WIDE_CHAR_VERSION
601 cold = wextra[1 + wextra[idx]];
602 # else
603 /* Adjust for the alignment. */
604 idx += 1 + extra[idx];
605 idx = (idx + 3) & ~4;
606 cold = *((int32_t *) &extra[idx]);
607 # endif
609 c = *p++;
611 else if (c1 == 1)
613 /* No valid character. Match it as a
614 single byte. */
615 if (!is_range && *n == str[0])
616 goto matched;
618 cold = str[0];
619 c = *p++;
621 else
622 return FNM_NOMATCH;
625 else
626 # undef str
627 #endif
629 c = FOLD (c);
630 normal_bracket:
632 /* We have to handling the symbols differently in
633 ranges since then the collation sequence is
634 important. */
635 is_range = (*p == L('-') && p[1] != L('\0')
636 && p[1] != L(']'));
638 if (!is_range && c == fn)
639 goto matched;
641 /* This is needed if we goto normal_bracket; from
642 outside of is_seqval's scope. */
643 is_seqval = 0;
644 cold = c;
645 c = *p++;
648 if (c == L('-') && *p != L(']'))
650 #if _LIBC
651 /* We have to find the collation sequence
652 value for C. Collation sequence is nothing
653 we can regularly access. The sequence
654 value is defined by the order in which the
655 definitions of the collation values for the
656 various characters appear in the source
657 file. A strange concept, nowhere
658 documented. */
659 uint32_t fcollseq;
660 uint32_t lcollseq;
661 UCHAR cend = *p++;
663 # if WIDE_CHAR_VERSION
664 /* Search in the `names' array for the characters. */
665 fcollseq = __collseq_table_lookup (collseq, fn);
666 if (fcollseq == ~((uint32_t) 0))
667 /* XXX We don't know anything about the character
668 we are supposed to match. This means we are
669 failing. */
670 goto range_not_matched;
672 if (is_seqval)
673 lcollseq = cold;
674 else
675 lcollseq = __collseq_table_lookup (collseq, cold);
676 # else
677 fcollseq = collseq[fn];
678 lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
679 # endif
681 is_seqval = 0;
682 if (cend == L('[') && *p == L('.'))
684 uint32_t nrules =
685 _NL_CURRENT_WORD (LC_COLLATE,
686 _NL_COLLATE_NRULES);
687 const CHAR *startp = p;
688 size_t c1 = 0;
690 while (1)
692 c = *++p;
693 if (c == L('.') && p[1] == L(']'))
695 p += 2;
696 break;
698 if (c == '\0')
699 return FNM_NOMATCH;
700 ++c1;
703 if (nrules == 0)
705 /* There are no names defined in the
706 collation data. Therefore we only
707 accept the trivial names consisting
708 of the character itself. */
709 if (c1 != 1)
710 return FNM_NOMATCH;
712 cend = startp[1];
714 else
716 int32_t table_size;
717 const int32_t *symb_table;
718 # if WIDE_CHAR_VERSION
719 char str[c1];
720 unsigned int strcnt;
721 # else
722 # define str (startp + 1)
723 # endif
724 const unsigned char *extra;
725 int32_t idx;
726 int32_t elem;
727 int32_t second;
728 int32_t hash;
730 # if WIDE_CHAR_VERSION
731 /* We have to convert the name to a single-byte
732 string. This is possible since the names
733 consist of ASCII characters and the internal
734 representation is UCS4. */
735 for (strcnt = 0; strcnt < c1; ++strcnt)
736 str[strcnt] = startp[1 + strcnt];
737 # endif
739 table_size =
740 _NL_CURRENT_WORD (LC_COLLATE,
741 _NL_COLLATE_SYMB_HASH_SIZEMB);
742 symb_table = (const int32_t *)
743 _NL_CURRENT (LC_COLLATE,
744 _NL_COLLATE_SYMB_TABLEMB);
745 extra = (const unsigned char *)
746 _NL_CURRENT (LC_COLLATE,
747 _NL_COLLATE_SYMB_EXTRAMB);
749 /* Locate the character in the hashing
750 table. */
751 hash = elem_hash (str, c1);
753 idx = 0;
754 elem = hash % table_size;
755 if (symb_table[2 * elem] != 0)
757 second = hash % (table_size - 2) + 1;
761 /* First compare the hashing value. */
762 if (symb_table[2 * elem] == hash
763 && (c1
764 == extra[symb_table[2 * elem + 1]])
765 && memcmp (str,
766 &extra[symb_table[2 * elem + 1]
767 + 1], c1) == 0)
769 /* Yep, this is the entry. */
770 idx = symb_table[2 * elem + 1];
771 idx += 1 + extra[idx];
772 break;
775 /* Next entry. */
776 elem += second;
778 while (symb_table[2 * elem] != 0);
781 if (symb_table[2 * elem] != 0)
783 /* Compare the byte sequence but only if
784 this is not part of a range. */
785 # if WIDE_CHAR_VERSION
786 int32_t *wextra;
788 idx += 1 + extra[idx];
789 /* Adjust for the alignment. */
790 idx = (idx + 3) & ~4;
792 wextra = (int32_t *) &extra[idx + 4];
793 # endif
794 /* Get the collation sequence value. */
795 is_seqval = 1;
796 # if WIDE_CHAR_VERSION
797 cend = wextra[1 + wextra[idx]];
798 # else
799 /* Adjust for the alignment. */
800 idx += 1 + extra[idx];
801 idx = (idx + 3) & ~4;
802 cend = *((int32_t *) &extra[idx]);
803 # endif
805 else if (symb_table[2 * elem] != 0 && c1 == 1)
807 cend = str[0];
808 c = *p++;
810 else
811 return FNM_NOMATCH;
813 # undef str
815 else
817 if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
818 cend = *p++;
819 if (cend == L('\0'))
820 return FNM_NOMATCH;
821 cend = FOLD (cend);
824 /* XXX It is not entirely clear to me how to handle
825 characters which are not mentioned in the
826 collation specification. */
827 if (
828 # if WIDE_CHAR_VERSION
829 lcollseq == 0xffffffff ||
830 # endif
831 lcollseq <= fcollseq)
833 /* We have to look at the upper bound. */
834 uint32_t hcollseq;
836 if (is_seqval)
837 hcollseq = cend;
838 else
840 # if WIDE_CHAR_VERSION
841 hcollseq =
842 __collseq_table_lookup (collseq, cend);
843 if (hcollseq == ~((uint32_t) 0))
845 /* Hum, no information about the upper
846 bound. The matching succeeds if the
847 lower bound is matched exactly. */
848 if (lcollseq != fcollseq)
849 goto range_not_matched;
851 goto matched;
853 # else
854 hcollseq = collseq[cend];
855 # endif
858 if (lcollseq <= hcollseq && fcollseq <= hcollseq)
859 goto matched;
861 # if WIDE_CHAR_VERSION
862 range_not_matched:
863 # endif
864 #else
865 /* We use a boring value comparison of the character
866 values. This is better than comparing using
867 `strcoll' since the latter would have surprising
868 and sometimes fatal consequences. */
869 UCHAR cend = *p++;
871 if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
872 cend = *p++;
873 if (cend == L('\0'))
874 return FNM_NOMATCH;
876 /* It is a range. */
877 if (cold <= fn && fn <= cend)
878 goto matched;
879 #endif
881 c = *p++;
885 if (c == L(']'))
886 break;
889 if (!not)
890 return FNM_NOMATCH;
891 break;
893 matched:
894 /* Skip the rest of the [...] that already matched. */
895 while ((c = *p++) != L (']'))
897 if (c == L('\0'))
898 /* [... (unterminated) loses. */
899 return FNM_NOMATCH;
901 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
903 if (*p == L('\0'))
904 return FNM_NOMATCH;
905 /* XXX 1003.2d11 is unclear if this is right. */
906 ++p;
908 else if (c == L('[') && *p == L(':'))
910 int c1 = 0;
911 const CHAR *startp = p;
913 while (1)
915 c = *++p;
916 if (++c1 == CHAR_CLASS_MAX_LENGTH)
917 return FNM_NOMATCH;
919 if (*p == L(':') && p[1] == L(']'))
920 break;
922 if (c < L('a') || c >= L('z'))
924 p = startp - 2;
925 break;
928 p += 2;
930 else if (c == L('[') && *p == L('='))
932 c = *++p;
933 if (c == L('\0'))
934 return FNM_NOMATCH;
935 c = *++p;
936 if (c != L('=') || p[1] != L(']'))
937 return FNM_NOMATCH;
938 p += 2;
940 else if (c == L('[') && *p == L('.'))
942 while (1)
944 c = *++p;
945 if (c == L('\0'))
946 return FNM_NOMATCH;
948 if (c == L('.') && p[1] == L(']'))
949 break;
951 p += 2;
954 if (not)
955 return FNM_NOMATCH;
957 break;
959 case L('+'):
960 case L('@'):
961 case L('!'):
962 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
964 int res = EXT (c, p, n, string_end, no_leading_period, flags,
965 alloca_used);
966 if (res != -1)
967 return res;
969 goto normal_match;
971 case L('/'):
972 if (NO_LEADING_PERIOD (flags))
974 if (n == string_end || c != (UCHAR) *n)
975 return FNM_NOMATCH;
977 new_no_leading_period = 1;
978 break;
980 /* FALLTHROUGH */
981 default:
982 normal_match:
983 if (n == string_end || c != FOLD ((UCHAR) *n))
984 return FNM_NOMATCH;
987 no_leading_period = new_no_leading_period;
988 ++n;
991 if (n == string_end)
992 return 0;
994 if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L('/'))
995 /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */
996 return 0;
998 return FNM_NOMATCH;
1002 static const CHAR *
1003 internal_function
1004 END (const CHAR *pattern)
1006 const CHAR *p = pattern;
1008 while (1)
1009 if (*++p == L('\0'))
1010 /* This is an invalid pattern. */
1011 return pattern;
1012 else if (*p == L('['))
1014 /* Handle brackets special. */
1015 if (posixly_correct == 0)
1016 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1018 /* Skip the not sign. We have to recognize it because of a possibly
1019 following ']'. */
1020 if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1021 ++p;
1022 /* A leading ']' is recognized as such. */
1023 if (*p == L(']'))
1024 ++p;
1025 /* Skip over all characters of the list. */
1026 while (*p != L(']'))
1027 if (*p++ == L('\0'))
1028 /* This is no valid pattern. */
1029 return pattern;
1031 else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1032 || *p == L('!')) && p[1] == L('('))
1034 p = END (p + 1);
1035 if (*p == L('\0'))
1036 /* This is an invalid pattern. */
1037 return pattern;
1039 else if (*p == L(')'))
1040 break;
1042 return p + 1;
1046 static int
1047 internal_function
1048 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
1049 int no_leading_period, int flags, size_t alloca_used)
1051 const CHAR *startp;
1052 int level;
1053 struct patternlist
1055 struct patternlist *next;
1056 CHAR malloced;
1057 CHAR str[0];
1058 } *list = NULL;
1059 struct patternlist **lastp = &list;
1060 size_t pattern_len = STRLEN (pattern);
1061 int any_malloced = 0;
1062 const CHAR *p;
1063 const CHAR *rs;
1064 int retval = 0;
1066 /* Parse the pattern. Store the individual parts in the list. */
1067 level = 0;
1068 for (startp = p = pattern + 1; level >= 0; ++p)
1069 if (*p == L('\0'))
1071 /* This is an invalid pattern. */
1072 retval = -1;
1073 goto out;
1075 else if (*p == L('['))
1077 /* Handle brackets special. */
1078 if (posixly_correct == 0)
1079 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1081 /* Skip the not sign. We have to recognize it because of a possibly
1082 following ']'. */
1083 if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1084 ++p;
1085 /* A leading ']' is recognized as such. */
1086 if (*p == L(']'))
1087 ++p;
1088 /* Skip over all characters of the list. */
1089 while (*p != L(']'))
1090 if (*p++ == L('\0'))
1092 /* This is no valid pattern. */
1093 retval = -1;
1094 goto out;
1097 else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1098 || *p == L('!')) && p[1] == L('('))
1099 /* Remember the nesting level. */
1100 ++level;
1101 else if (*p == L(')'))
1103 if (level-- == 0)
1105 /* This means we found the end of the pattern. */
1106 #define NEW_PATTERN \
1107 struct patternlist *newp; \
1108 size_t slen = (opt == L('?') || opt == L('@') \
1109 ? pattern_len : (p - startp + 1)); \
1110 slen = sizeof (struct patternlist) + (slen * sizeof (CHAR)); \
1111 int malloced = ! __libc_use_alloca (alloca_used + slen); \
1112 if (__builtin_expect (malloced, 0)) \
1114 newp = malloc (slen); \
1115 if (newp == NULL) \
1117 retval = -2; \
1118 goto out; \
1120 any_malloced = 1; \
1122 else \
1123 newp = alloca_account (slen, alloca_used); \
1124 newp->next = NULL; \
1125 newp->malloced = malloced; \
1126 *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L('\0'); \
1127 *lastp = newp; \
1128 lastp = &newp->next
1129 NEW_PATTERN;
1132 else if (*p == L('|'))
1134 if (level == 0)
1136 NEW_PATTERN;
1137 startp = p + 1;
1140 assert (list != NULL);
1141 assert (p[-1] == L(')'));
1142 #undef NEW_PATTERN
1144 switch (opt)
1146 case L('*'):
1147 if (FCT (p, string, string_end, no_leading_period, flags, NULL,
1148 alloca_used) == 0)
1149 goto success;
1150 /* FALLTHROUGH */
1152 case L('+'):
1155 for (rs = string; rs <= string_end; ++rs)
1156 /* First match the prefix with the current pattern with the
1157 current pattern. */
1158 if (FCT (list->str, string, rs, no_leading_period,
1159 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1160 NULL, alloca_used) == 0
1161 /* This was successful. Now match the rest with the rest
1162 of the pattern. */
1163 && (FCT (p, rs, string_end,
1164 rs == string
1165 ? no_leading_period
1166 : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1167 flags & FNM_FILE_NAME
1168 ? flags : flags & ~FNM_PERIOD, NULL, alloca_used) == 0
1169 /* This didn't work. Try the whole pattern. */
1170 || (rs != string
1171 && FCT (pattern - 1, rs, string_end,
1172 rs == string
1173 ? no_leading_period
1174 : (rs[-1] == '/' && NO_LEADING_PERIOD (flags)
1175 ? 1 : 0),
1176 flags & FNM_FILE_NAME
1177 ? flags : flags & ~FNM_PERIOD, NULL,
1178 alloca_used) == 0)))
1179 /* It worked. Signal success. */
1180 goto success;
1182 while ((list = list->next) != NULL);
1184 /* None of the patterns lead to a match. */
1185 retval = FNM_NOMATCH;
1186 break;
1188 case L('?'):
1189 if (FCT (p, string, string_end, no_leading_period, flags, NULL,
1190 alloca_used) == 0)
1191 goto success;
1192 /* FALLTHROUGH */
1194 case L('@'):
1196 /* I cannot believe it but `strcat' is actually acceptable
1197 here. Match the entire string with the prefix from the
1198 pattern list and the rest of the pattern following the
1199 pattern list. */
1200 if (FCT (STRCAT (list->str, p), string, string_end,
1201 no_leading_period,
1202 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1203 NULL, alloca_used) == 0)
1204 /* It worked. Signal success. */
1205 goto success;
1206 while ((list = list->next) != NULL);
1208 /* None of the patterns lead to a match. */
1209 retval = FNM_NOMATCH;
1210 break;
1212 case L('!'):
1213 for (rs = string; rs <= string_end; ++rs)
1215 struct patternlist *runp;
1217 for (runp = list; runp != NULL; runp = runp->next)
1218 if (FCT (runp->str, string, rs, no_leading_period,
1219 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1220 NULL, alloca_used) == 0)
1221 break;
1223 /* If none of the patterns matched see whether the rest does. */
1224 if (runp == NULL
1225 && (FCT (p, rs, string_end,
1226 rs == string
1227 ? no_leading_period
1228 : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1229 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1230 NULL, alloca_used) == 0))
1231 /* This is successful. */
1232 goto success;
1235 /* None of the patterns together with the rest of the pattern
1236 lead to a match. */
1237 retval = FNM_NOMATCH;
1238 break;
1240 default:
1241 assert (! "Invalid extended matching operator");
1242 retval = -1;
1243 break;
1246 success:
1247 out:
1248 if (any_malloced)
1249 while (list != NULL)
1251 struct patternlist *old = list;
1252 list = list->next;
1253 if (old->malloced)
1254 free (old);
1257 return retval;
1261 #undef FOLD
1262 #undef CHAR
1263 #undef UCHAR
1264 #undef INT
1265 #undef FCT
1266 #undef EXT
1267 #undef END
1268 #undef STRUCT
1269 #undef MEMPCPY
1270 #undef MEMCHR
1271 #undef STRCOLL
1272 #undef STRLEN
1273 #undef STRCAT
1274 #undef L
1275 #undef BTOWC
1276 #undef WIDE_CHAR_VERSION
1277 #undef FINDIDX