Don't read past end of pattern in fnmatch (BZ #17062)
[glibc.git] / posix / fnmatch_loop.c
blob544769b4f64ff0a42cefc1aacf88a009f8df9b14
1 /* Copyright (C) 1991-2014 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, see
16 <http://www.gnu.org/licenses/>. */
18 #include <stdint.h>
20 struct STRUCT
22 const CHAR *pattern;
23 const CHAR *string;
24 int no_leading_period;
27 /* Match STRING against the filename pattern PATTERN, returning zero if
28 it matches, nonzero if not. */
29 static int FCT (const CHAR *pattern, const CHAR *string,
30 const CHAR *string_end, int no_leading_period, int flags,
31 struct STRUCT *ends, size_t alloca_used)
32 internal_function;
33 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
34 const CHAR *string_end, int no_leading_period, int flags,
35 size_t alloca_used)
36 internal_function;
37 static const CHAR *END (const CHAR *patternp) internal_function;
39 static int
40 internal_function
41 FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used)
42 const CHAR *pattern;
43 const CHAR *string;
44 const CHAR *string_end;
45 int no_leading_period;
46 int flags;
47 struct STRUCT *ends;
48 size_t alloca_used;
50 const CHAR *p = pattern, *n = string;
51 UCHAR c;
52 #ifdef _LIBC
53 # if WIDE_CHAR_VERSION
54 const char *collseq = (const char *)
55 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
56 # else
57 const UCHAR *collseq = (const UCHAR *)
58 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
59 # endif
60 #endif
62 while ((c = *p++) != L('\0'))
64 int new_no_leading_period = 0;
65 c = FOLD (c);
67 switch (c)
69 case L('?'):
70 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
72 int res = EXT (c, p, n, string_end, no_leading_period,
73 flags, alloca_used);
74 if (res != -1)
75 return res;
78 if (n == string_end)
79 return FNM_NOMATCH;
80 else if (*n == L('/') && (flags & FNM_FILE_NAME))
81 return FNM_NOMATCH;
82 else if (*n == L('.') && no_leading_period)
83 return FNM_NOMATCH;
84 break;
86 case L('\\'):
87 if (!(flags & FNM_NOESCAPE))
89 c = *p++;
90 if (c == L('\0'))
91 /* Trailing \ loses. */
92 return FNM_NOMATCH;
93 c = FOLD (c);
95 if (n == string_end || FOLD ((UCHAR) *n) != c)
96 return FNM_NOMATCH;
97 break;
99 case L('*'):
100 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
102 int res = EXT (c, p, n, string_end, no_leading_period,
103 flags, alloca_used);
104 if (res != -1)
105 return res;
107 else if (ends != NULL)
109 ends->pattern = p - 1;
110 ends->string = n;
111 ends->no_leading_period = no_leading_period;
112 return 0;
115 if (n != string_end && *n == L('.') && no_leading_period)
116 return FNM_NOMATCH;
118 for (c = *p++; c == L('?') || c == L('*'); c = *p++)
120 if (*p == L('(') && (flags & FNM_EXTMATCH) != 0)
122 const CHAR *endp = END (p);
123 if (endp != p)
125 /* This is a pattern. Skip over it. */
126 p = endp;
127 continue;
131 if (c == L('?'))
133 /* A ? needs to match one character. */
134 if (n == string_end)
135 /* There isn't another character; no match. */
136 return FNM_NOMATCH;
137 else if (*n == L('/')
138 && __builtin_expect (flags & FNM_FILE_NAME, 0))
139 /* A slash does not match a wildcard under
140 FNM_FILE_NAME. */
141 return FNM_NOMATCH;
142 else
143 /* One character of the string is consumed in matching
144 this ? wildcard, so *??? won't match if there are
145 less than three characters. */
146 ++n;
150 if (c == L('\0'))
151 /* The wildcard(s) is/are the last element of the pattern.
152 If the name is a file name and contains another slash
153 this means it cannot match, unless the FNM_LEADING_DIR
154 flag is set. */
156 int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
158 if (flags & FNM_FILE_NAME)
160 if (flags & FNM_LEADING_DIR)
161 result = 0;
162 else
164 if (MEMCHR (n, L('/'), string_end - n) == NULL)
165 result = 0;
169 return result;
171 else
173 const CHAR *endp;
174 struct STRUCT end;
176 end.pattern = NULL;
177 endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L('/') : L('\0'),
178 string_end - n);
179 if (endp == NULL)
180 endp = string_end;
182 if (c == L('[')
183 || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
184 && (c == L('@') || c == L('+') || c == L('!'))
185 && *p == L('(')))
187 int flags2 = ((flags & FNM_FILE_NAME)
188 ? flags : (flags & ~FNM_PERIOD));
190 for (--p; n < endp; ++n, no_leading_period = 0)
191 if (FCT (p, n, string_end, no_leading_period, flags2,
192 &end, alloca_used) == 0)
193 goto found;
195 else if (c == L('/') && (flags & FNM_FILE_NAME))
197 while (n < string_end && *n != L('/'))
198 ++n;
199 if (n < string_end && *n == L('/')
200 && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags,
201 NULL, alloca_used) == 0))
202 return 0;
204 else
206 int flags2 = ((flags & FNM_FILE_NAME)
207 ? flags : (flags & ~FNM_PERIOD));
209 if (c == L('\\') && !(flags & FNM_NOESCAPE))
210 c = *p;
211 c = FOLD (c);
212 for (--p; n < endp; ++n, no_leading_period = 0)
213 if (FOLD ((UCHAR) *n) == c
214 && (FCT (p, n, string_end, no_leading_period, flags2,
215 &end, alloca_used) == 0))
217 found:
218 if (end.pattern == NULL)
219 return 0;
220 break;
222 if (end.pattern != NULL)
224 p = end.pattern;
225 n = end.string;
226 no_leading_period = end.no_leading_period;
227 continue;
232 /* If we come here no match is possible with the wildcard. */
233 return FNM_NOMATCH;
235 case L('['):
237 /* Nonzero if the sense of the character class is inverted. */
238 const CHAR *p_init = p;
239 const CHAR *n_init = n;
240 int not;
241 CHAR cold;
242 UCHAR fn;
244 if (posixly_correct == 0)
245 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
247 if (n == string_end)
248 return FNM_NOMATCH;
250 if (*n == L('.') && no_leading_period)
251 return FNM_NOMATCH;
253 if (*n == L('/') && (flags & FNM_FILE_NAME))
254 /* `/' cannot be matched. */
255 return FNM_NOMATCH;
257 not = (*p == L('!') || (posixly_correct < 0 && *p == L('^')));
258 if (not)
259 ++p;
261 fn = FOLD ((UCHAR) *n);
263 c = *p++;
264 for (;;)
266 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
268 if (*p == L('\0'))
269 return FNM_NOMATCH;
270 c = FOLD ((UCHAR) *p);
271 ++p;
273 goto normal_bracket;
275 else if (c == L('[') && *p == L(':'))
277 /* Leave room for the null. */
278 CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
279 size_t c1 = 0;
280 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
281 wctype_t wt;
282 #endif
283 const CHAR *startp = p;
285 for (;;)
287 if (c1 == CHAR_CLASS_MAX_LENGTH)
288 /* The name is too long and therefore the pattern
289 is ill-formed. */
290 return FNM_NOMATCH;
292 c = *++p;
293 if (c == L(':') && p[1] == L(']'))
295 p += 2;
296 break;
298 if (c < L('a') || c >= L('z'))
300 /* This cannot possibly be a character class name.
301 Match it as a normal range. */
302 p = startp;
303 c = L('[');
304 goto normal_bracket;
306 str[c1++] = c;
308 str[c1] = L('\0');
310 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
311 wt = IS_CHAR_CLASS (str);
312 if (wt == 0)
313 /* Invalid character class name. */
314 return FNM_NOMATCH;
316 # if defined _LIBC && ! WIDE_CHAR_VERSION
317 /* The following code is glibc specific but does
318 there a good job in speeding up the code since
319 we can avoid the btowc() call. */
320 if (_ISCTYPE ((UCHAR) *n, wt))
321 goto matched;
322 # else
323 if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
324 goto matched;
325 # endif
326 #else
327 if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n))
328 || (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n))
329 || (STREQ (str, L("blank")) && ISBLANK ((UCHAR) *n))
330 || (STREQ (str, L("cntrl")) && ISCNTRL ((UCHAR) *n))
331 || (STREQ (str, L("digit")) && ISDIGIT ((UCHAR) *n))
332 || (STREQ (str, L("graph")) && ISGRAPH ((UCHAR) *n))
333 || (STREQ (str, L("lower")) && ISLOWER ((UCHAR) *n))
334 || (STREQ (str, L("print")) && ISPRINT ((UCHAR) *n))
335 || (STREQ (str, L("punct")) && ISPUNCT ((UCHAR) *n))
336 || (STREQ (str, L("space")) && ISSPACE ((UCHAR) *n))
337 || (STREQ (str, L("upper")) && ISUPPER ((UCHAR) *n))
338 || (STREQ (str, L("xdigit")) && ISXDIGIT ((UCHAR) *n)))
339 goto matched;
340 #endif
341 c = *p++;
343 #ifdef _LIBC
344 else if (c == L('[') && *p == L('='))
346 UCHAR str[1];
347 uint32_t nrules =
348 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
349 const CHAR *startp = p;
351 c = *++p;
352 if (c == L('\0'))
354 p = startp;
355 c = L('[');
356 goto normal_bracket;
358 str[0] = c;
360 c = *++p;
361 if (c != L('=') || p[1] != L(']'))
363 p = startp;
364 c = L('[');
365 goto normal_bracket;
367 p += 2;
369 if (nrules == 0)
371 if ((UCHAR) *n == str[0])
372 goto matched;
374 else
376 const int32_t *table;
377 # if WIDE_CHAR_VERSION
378 const int32_t *weights;
379 const int32_t *extra;
380 # else
381 const unsigned char *weights;
382 const unsigned char *extra;
383 # endif
384 const int32_t *indirect;
385 int32_t idx;
386 const UCHAR *cp = (const UCHAR *) str;
388 /* This #include defines a local function! */
389 # if WIDE_CHAR_VERSION
390 # include <locale/weightwc.h>
391 # else
392 # include <locale/weight.h>
393 # endif
395 # if WIDE_CHAR_VERSION
396 table = (const int32_t *)
397 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
398 weights = (const int32_t *)
399 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
400 extra = (const int32_t *)
401 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
402 indirect = (const int32_t *)
403 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
404 # else
405 table = (const int32_t *)
406 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
407 weights = (const unsigned char *)
408 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
409 extra = (const unsigned char *)
410 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
411 indirect = (const int32_t *)
412 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
413 # endif
415 idx = findidx (&cp, 1);
416 if (idx != 0)
418 /* We found a table entry. Now see whether the
419 character we are currently at has the same
420 equivalance class value. */
421 int len = weights[idx & 0xffffff];
422 int32_t idx2;
423 const UCHAR *np = (const UCHAR *) n;
425 idx2 = findidx (&np, string_end - n);
426 if (idx2 != 0
427 && (idx >> 24) == (idx2 >> 24)
428 && len == weights[idx2 & 0xffffff])
430 int cnt = 0;
432 idx &= 0xffffff;
433 idx2 &= 0xffffff;
435 while (cnt < len
436 && (weights[idx + 1 + cnt]
437 == weights[idx2 + 1 + cnt]))
438 ++cnt;
440 if (cnt == len)
441 goto matched;
446 c = *p++;
448 #endif
449 else if (c == L('\0'))
451 /* [ unterminated, treat as normal character. */
452 p = p_init;
453 n = n_init;
454 c = L('[');
455 goto normal_match;
457 else
459 int is_range = 0;
461 #ifdef _LIBC
462 int is_seqval = 0;
464 if (c == L('[') && *p == L('.'))
466 uint32_t nrules =
467 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
468 const CHAR *startp = p;
469 size_t c1 = 0;
471 while (1)
473 c = *++p;
474 if (c == L('.') && p[1] == L(']'))
476 p += 2;
477 break;
479 if (c == '\0')
480 return FNM_NOMATCH;
481 ++c1;
484 /* We have to handling the symbols differently in
485 ranges since then the collation sequence is
486 important. */
487 is_range = *p == L('-') && p[1] != L('\0');
489 if (nrules == 0)
491 /* There are no names defined in the collation
492 data. Therefore we only accept the trivial
493 names consisting of the character itself. */
494 if (c1 != 1)
495 return FNM_NOMATCH;
497 if (!is_range && *n == startp[1])
498 goto matched;
500 cold = startp[1];
501 c = *p++;
503 else
505 int32_t table_size;
506 const int32_t *symb_table;
507 # ifdef WIDE_CHAR_VERSION
508 char str[c1];
509 unsigned int strcnt;
510 # else
511 # define str (startp + 1)
512 # endif
513 const unsigned char *extra;
514 int32_t idx;
515 int32_t elem;
516 int32_t second;
517 int32_t hash;
519 # ifdef WIDE_CHAR_VERSION
520 /* We have to convert the name to a single-byte
521 string. This is possible since the names
522 consist of ASCII characters and the internal
523 representation is UCS4. */
524 for (strcnt = 0; strcnt < c1; ++strcnt)
525 str[strcnt] = startp[1 + strcnt];
526 #endif
528 table_size =
529 _NL_CURRENT_WORD (LC_COLLATE,
530 _NL_COLLATE_SYMB_HASH_SIZEMB);
531 symb_table = (const int32_t *)
532 _NL_CURRENT (LC_COLLATE,
533 _NL_COLLATE_SYMB_TABLEMB);
534 extra = (const unsigned char *)
535 _NL_CURRENT (LC_COLLATE,
536 _NL_COLLATE_SYMB_EXTRAMB);
538 /* Locate the character in the hashing table. */
539 hash = elem_hash (str, c1);
541 idx = 0;
542 elem = hash % table_size;
543 if (symb_table[2 * elem] != 0)
545 second = hash % (table_size - 2) + 1;
549 /* First compare the hashing value. */
550 if (symb_table[2 * elem] == hash
551 && (c1
552 == extra[symb_table[2 * elem + 1]])
553 && memcmp (str,
554 &extra[symb_table[2 * elem
555 + 1]
556 + 1], c1) == 0)
558 /* Yep, this is the entry. */
559 idx = symb_table[2 * elem + 1];
560 idx += 1 + extra[idx];
561 break;
564 /* Next entry. */
565 elem += second;
567 while (symb_table[2 * elem] != 0);
570 if (symb_table[2 * elem] != 0)
572 /* Compare the byte sequence but only if
573 this is not part of a range. */
574 # ifdef WIDE_CHAR_VERSION
575 int32_t *wextra;
577 idx += 1 + extra[idx];
578 /* Adjust for the alignment. */
579 idx = (idx + 3) & ~3;
581 wextra = (int32_t *) &extra[idx + 4];
582 # endif
584 if (! is_range)
586 # ifdef WIDE_CHAR_VERSION
587 for (c1 = 0;
588 (int32_t) c1 < wextra[idx];
589 ++c1)
590 if (n[c1] != wextra[1 + c1])
591 break;
593 if ((int32_t) c1 == wextra[idx])
594 goto matched;
595 # else
596 for (c1 = 0; c1 < extra[idx]; ++c1)
597 if (n[c1] != extra[1 + c1])
598 break;
600 if (c1 == extra[idx])
601 goto matched;
602 # endif
605 /* Get the collation sequence value. */
606 is_seqval = 1;
607 # ifdef WIDE_CHAR_VERSION
608 cold = wextra[1 + wextra[idx]];
609 # else
610 /* Adjust for the alignment. */
611 idx += 1 + extra[idx];
612 idx = (idx + 3) & ~4;
613 cold = *((int32_t *) &extra[idx]);
614 # endif
616 c = *p++;
618 else if (c1 == 1)
620 /* No valid character. Match it as a
621 single byte. */
622 if (!is_range && *n == str[0])
623 goto matched;
625 cold = str[0];
626 c = *p++;
628 else
629 return FNM_NOMATCH;
632 else
633 # undef str
634 #endif
636 c = FOLD (c);
637 normal_bracket:
639 /* We have to handling the symbols differently in
640 ranges since then the collation sequence is
641 important. */
642 is_range = (*p == L('-') && p[1] != L('\0')
643 && p[1] != L(']'));
645 if (!is_range && c == fn)
646 goto matched;
648 /* This is needed if we goto normal_bracket; from
649 outside of is_seqval's scope. */
650 is_seqval = 0;
651 cold = c;
652 c = *p++;
655 if (c == L('-') && *p != L(']'))
657 #if _LIBC
658 /* We have to find the collation sequence
659 value for C. Collation sequence is nothing
660 we can regularly access. The sequence
661 value is defined by the order in which the
662 definitions of the collation values for the
663 various characters appear in the source
664 file. A strange concept, nowhere
665 documented. */
666 uint32_t fcollseq;
667 uint32_t lcollseq;
668 UCHAR cend = *p++;
670 # ifdef WIDE_CHAR_VERSION
671 /* Search in the `names' array for the characters. */
672 fcollseq = __collseq_table_lookup (collseq, fn);
673 if (fcollseq == ~((uint32_t) 0))
674 /* XXX We don't know anything about the character
675 we are supposed to match. This means we are
676 failing. */
677 goto range_not_matched;
679 if (is_seqval)
680 lcollseq = cold;
681 else
682 lcollseq = __collseq_table_lookup (collseq, cold);
683 # else
684 fcollseq = collseq[fn];
685 lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
686 # endif
688 is_seqval = 0;
689 if (cend == L('[') && *p == L('.'))
691 uint32_t nrules =
692 _NL_CURRENT_WORD (LC_COLLATE,
693 _NL_COLLATE_NRULES);
694 const CHAR *startp = p;
695 size_t c1 = 0;
697 while (1)
699 c = *++p;
700 if (c == L('.') && p[1] == L(']'))
702 p += 2;
703 break;
705 if (c == '\0')
706 return FNM_NOMATCH;
707 ++c1;
710 if (nrules == 0)
712 /* There are no names defined in the
713 collation data. Therefore we only
714 accept the trivial names consisting
715 of the character itself. */
716 if (c1 != 1)
717 return FNM_NOMATCH;
719 cend = startp[1];
721 else
723 int32_t table_size;
724 const int32_t *symb_table;
725 # ifdef WIDE_CHAR_VERSION
726 char str[c1];
727 unsigned int strcnt;
728 # else
729 # define str (startp + 1)
730 # endif
731 const unsigned char *extra;
732 int32_t idx;
733 int32_t elem;
734 int32_t second;
735 int32_t hash;
737 # ifdef WIDE_CHAR_VERSION
738 /* We have to convert the name to a single-byte
739 string. This is possible since the names
740 consist of ASCII characters and the internal
741 representation is UCS4. */
742 for (strcnt = 0; strcnt < c1; ++strcnt)
743 str[strcnt] = startp[1 + strcnt];
744 # endif
746 table_size =
747 _NL_CURRENT_WORD (LC_COLLATE,
748 _NL_COLLATE_SYMB_HASH_SIZEMB);
749 symb_table = (const int32_t *)
750 _NL_CURRENT (LC_COLLATE,
751 _NL_COLLATE_SYMB_TABLEMB);
752 extra = (const unsigned char *)
753 _NL_CURRENT (LC_COLLATE,
754 _NL_COLLATE_SYMB_EXTRAMB);
756 /* Locate the character in the hashing
757 table. */
758 hash = elem_hash (str, c1);
760 idx = 0;
761 elem = hash % table_size;
762 if (symb_table[2 * elem] != 0)
764 second = hash % (table_size - 2) + 1;
768 /* First compare the hashing value. */
769 if (symb_table[2 * elem] == hash
770 && (c1
771 == extra[symb_table[2 * elem + 1]])
772 && memcmp (str,
773 &extra[symb_table[2 * elem + 1]
774 + 1], c1) == 0)
776 /* Yep, this is the entry. */
777 idx = symb_table[2 * elem + 1];
778 idx += 1 + extra[idx];
779 break;
782 /* Next entry. */
783 elem += second;
785 while (symb_table[2 * elem] != 0);
788 if (symb_table[2 * elem] != 0)
790 /* Compare the byte sequence but only if
791 this is not part of a range. */
792 # ifdef WIDE_CHAR_VERSION
793 int32_t *wextra;
795 idx += 1 + extra[idx];
796 /* Adjust for the alignment. */
797 idx = (idx + 3) & ~4;
799 wextra = (int32_t *) &extra[idx + 4];
800 # endif
801 /* Get the collation sequence value. */
802 is_seqval = 1;
803 # ifdef WIDE_CHAR_VERSION
804 cend = wextra[1 + wextra[idx]];
805 # else
806 /* Adjust for the alignment. */
807 idx += 1 + extra[idx];
808 idx = (idx + 3) & ~4;
809 cend = *((int32_t *) &extra[idx]);
810 # endif
812 else if (symb_table[2 * elem] != 0 && c1 == 1)
814 cend = str[0];
815 c = *p++;
817 else
818 return FNM_NOMATCH;
820 # undef str
822 else
824 if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
825 cend = *p++;
826 if (cend == L('\0'))
827 return FNM_NOMATCH;
828 cend = FOLD (cend);
831 /* XXX It is not entirely clear to me how to handle
832 characters which are not mentioned in the
833 collation specification. */
834 if (
835 # ifdef WIDE_CHAR_VERSION
836 lcollseq == 0xffffffff ||
837 # endif
838 lcollseq <= fcollseq)
840 /* We have to look at the upper bound. */
841 uint32_t hcollseq;
843 if (is_seqval)
844 hcollseq = cend;
845 else
847 # ifdef WIDE_CHAR_VERSION
848 hcollseq =
849 __collseq_table_lookup (collseq, cend);
850 if (hcollseq == ~((uint32_t) 0))
852 /* Hum, no information about the upper
853 bound. The matching succeeds if the
854 lower bound is matched exactly. */
855 if (lcollseq != fcollseq)
856 goto range_not_matched;
858 goto matched;
860 # else
861 hcollseq = collseq[cend];
862 # endif
865 if (lcollseq <= hcollseq && fcollseq <= hcollseq)
866 goto matched;
868 # ifdef WIDE_CHAR_VERSION
869 range_not_matched:
870 # endif
871 #else
872 /* We use a boring value comparison of the character
873 values. This is better than comparing using
874 `strcoll' since the latter would have surprising
875 and sometimes fatal consequences. */
876 UCHAR cend = *p++;
878 if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
879 cend = *p++;
880 if (cend == L('\0'))
881 return FNM_NOMATCH;
883 /* It is a range. */
884 if (cold <= fn && fn <= cend)
885 goto matched;
886 #endif
888 c = *p++;
892 if (c == L(']'))
893 break;
896 if (!not)
897 return FNM_NOMATCH;
898 break;
900 matched:
901 /* Skip the rest of the [...] that already matched. */
902 while ((c = *p++) != L (']'))
904 if (c == L('\0'))
905 /* [... (unterminated) loses. */
906 return FNM_NOMATCH;
908 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
910 if (*p == L('\0'))
911 return FNM_NOMATCH;
912 /* XXX 1003.2d11 is unclear if this is right. */
913 ++p;
915 else if (c == L('[') && *p == L(':'))
917 int c1 = 0;
918 const CHAR *startp = p;
920 while (1)
922 c = *++p;
923 if (++c1 == CHAR_CLASS_MAX_LENGTH)
924 return FNM_NOMATCH;
926 if (*p == L(':') && p[1] == L(']'))
927 break;
929 if (c < L('a') || c >= L('z'))
931 p = startp - 2;
932 break;
935 p += 2;
937 else if (c == L('[') && *p == L('='))
939 c = *++p;
940 if (c == L('\0'))
941 return FNM_NOMATCH;
942 c = *++p;
943 if (c != L('=') || p[1] != L(']'))
944 return FNM_NOMATCH;
945 p += 2;
947 else if (c == L('[') && *p == L('.'))
949 ++p;
950 while (1)
952 c = *++p;
953 if (c == '\0')
954 return FNM_NOMATCH;
956 if (*p == L('.') && p[1] == L(']'))
957 break;
959 p += 2;
962 if (not)
963 return FNM_NOMATCH;
965 break;
967 case L('+'):
968 case L('@'):
969 case L('!'):
970 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
972 int res = EXT (c, p, n, string_end, no_leading_period, flags,
973 alloca_used);
974 if (res != -1)
975 return res;
977 goto normal_match;
979 case L('/'):
980 if (NO_LEADING_PERIOD (flags))
982 if (n == string_end || c != (UCHAR) *n)
983 return FNM_NOMATCH;
985 new_no_leading_period = 1;
986 break;
988 /* FALLTHROUGH */
989 default:
990 normal_match:
991 if (n == string_end || c != FOLD ((UCHAR) *n))
992 return FNM_NOMATCH;
995 no_leading_period = new_no_leading_period;
996 ++n;
999 if (n == string_end)
1000 return 0;
1002 if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L('/'))
1003 /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */
1004 return 0;
1006 return FNM_NOMATCH;
1010 static const CHAR *
1011 internal_function
1012 END (const CHAR *pattern)
1014 const CHAR *p = pattern;
1016 while (1)
1017 if (*++p == L('\0'))
1018 /* This is an invalid pattern. */
1019 return pattern;
1020 else if (*p == L('['))
1022 /* Handle brackets special. */
1023 if (posixly_correct == 0)
1024 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1026 /* Skip the not sign. We have to recognize it because of a possibly
1027 following ']'. */
1028 if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1029 ++p;
1030 /* A leading ']' is recognized as such. */
1031 if (*p == L(']'))
1032 ++p;
1033 /* Skip over all characters of the list. */
1034 while (*p != L(']'))
1035 if (*p++ == L('\0'))
1036 /* This is no valid pattern. */
1037 return pattern;
1039 else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1040 || *p == L('!')) && p[1] == L('('))
1041 p = END (p + 1);
1042 else if (*p == L(')'))
1043 break;
1045 return p + 1;
1049 static int
1050 internal_function
1051 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
1052 int no_leading_period, int flags, size_t alloca_used)
1054 const CHAR *startp;
1055 int level;
1056 struct patternlist
1058 struct patternlist *next;
1059 CHAR malloced;
1060 CHAR str[0];
1061 } *list = NULL;
1062 struct patternlist **lastp = &list;
1063 size_t pattern_len = STRLEN (pattern);
1064 int any_malloced = 0;
1065 const CHAR *p;
1066 const CHAR *rs;
1067 int retval = 0;
1069 /* Parse the pattern. Store the individual parts in the list. */
1070 level = 0;
1071 for (startp = p = pattern + 1; level >= 0; ++p)
1072 if (*p == L('\0'))
1074 /* This is an invalid pattern. */
1075 retval = -1;
1076 goto out;
1078 else if (*p == L('['))
1080 /* Handle brackets special. */
1081 if (posixly_correct == 0)
1082 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1084 /* Skip the not sign. We have to recognize it because of a possibly
1085 following ']'. */
1086 if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1087 ++p;
1088 /* A leading ']' is recognized as such. */
1089 if (*p == L(']'))
1090 ++p;
1091 /* Skip over all characters of the list. */
1092 while (*p != L(']'))
1093 if (*p++ == L('\0'))
1095 /* This is no valid pattern. */
1096 retval = -1;
1097 goto out;
1100 else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1101 || *p == L('!')) && p[1] == L('('))
1102 /* Remember the nesting level. */
1103 ++level;
1104 else if (*p == L(')'))
1106 if (level-- == 0)
1108 /* This means we found the end of the pattern. */
1109 #define NEW_PATTERN \
1110 struct patternlist *newp; \
1111 size_t slen = (opt == L('?') || opt == L('@') \
1112 ? pattern_len : (p - startp + 1)); \
1113 slen = sizeof (struct patternlist) + (slen * sizeof (CHAR)); \
1114 int malloced = ! __libc_use_alloca (alloca_used + slen); \
1115 if (__builtin_expect (malloced, 0)) \
1117 newp = malloc (slen); \
1118 if (newp == NULL) \
1120 retval = -2; \
1121 goto out; \
1123 any_malloced = 1; \
1125 else \
1126 newp = alloca_account (slen, alloca_used); \
1127 newp->next = NULL; \
1128 newp->malloced = malloced; \
1129 *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L('\0'); \
1130 *lastp = newp; \
1131 lastp = &newp->next
1132 NEW_PATTERN;
1135 else if (*p == L('|'))
1137 if (level == 0)
1139 NEW_PATTERN;
1140 startp = p + 1;
1143 assert (list != NULL);
1144 assert (p[-1] == L(')'));
1145 #undef NEW_PATTERN
1147 switch (opt)
1149 case L('*'):
1150 if (FCT (p, string, string_end, no_leading_period, flags, NULL,
1151 alloca_used) == 0)
1152 goto success;
1153 /* FALLTHROUGH */
1155 case L('+'):
1158 for (rs = string; rs <= string_end; ++rs)
1159 /* First match the prefix with the current pattern with the
1160 current pattern. */
1161 if (FCT (list->str, string, rs, no_leading_period,
1162 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1163 NULL, alloca_used) == 0
1164 /* This was successful. Now match the rest with the rest
1165 of the pattern. */
1166 && (FCT (p, rs, string_end,
1167 rs == string
1168 ? no_leading_period
1169 : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1170 flags & FNM_FILE_NAME
1171 ? flags : flags & ~FNM_PERIOD, NULL, alloca_used) == 0
1172 /* This didn't work. Try the whole pattern. */
1173 || (rs != string
1174 && FCT (pattern - 1, rs, string_end,
1175 rs == string
1176 ? no_leading_period
1177 : (rs[-1] == '/' && NO_LEADING_PERIOD (flags)
1178 ? 1 : 0),
1179 flags & FNM_FILE_NAME
1180 ? flags : flags & ~FNM_PERIOD, NULL,
1181 alloca_used) == 0)))
1182 /* It worked. Signal success. */
1183 goto success;
1185 while ((list = list->next) != NULL);
1187 /* None of the patterns lead to a match. */
1188 retval = FNM_NOMATCH;
1189 break;
1191 case L('?'):
1192 if (FCT (p, string, string_end, no_leading_period, flags, NULL,
1193 alloca_used) == 0)
1194 goto success;
1195 /* FALLTHROUGH */
1197 case L('@'):
1199 /* I cannot believe it but `strcat' is actually acceptable
1200 here. Match the entire string with the prefix from the
1201 pattern list and the rest of the pattern following the
1202 pattern list. */
1203 if (FCT (STRCAT (list->str, p), string, string_end,
1204 no_leading_period,
1205 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1206 NULL, alloca_used) == 0)
1207 /* It worked. Signal success. */
1208 goto success;
1209 while ((list = list->next) != NULL);
1211 /* None of the patterns lead to a match. */
1212 retval = FNM_NOMATCH;
1213 break;
1215 case L('!'):
1216 for (rs = string; rs <= string_end; ++rs)
1218 struct patternlist *runp;
1220 for (runp = list; runp != NULL; runp = runp->next)
1221 if (FCT (runp->str, string, rs, no_leading_period,
1222 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1223 NULL, alloca_used) == 0)
1224 break;
1226 /* If none of the patterns matched see whether the rest does. */
1227 if (runp == NULL
1228 && (FCT (p, rs, string_end,
1229 rs == string
1230 ? no_leading_period
1231 : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1232 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1233 NULL, alloca_used) == 0))
1234 /* This is successful. */
1235 goto success;
1238 /* None of the patterns together with the rest of the pattern
1239 lead to a match. */
1240 retval = FNM_NOMATCH;
1241 break;
1243 default:
1244 assert (! "Invalid extended matching operator");
1245 retval = -1;
1246 break;
1249 success:
1250 out:
1251 if (any_malloced)
1252 while (list != NULL)
1254 struct patternlist *old = list;
1255 list = list->next;
1256 if (old->malloced)
1257 free (old);
1260 return retval;
1264 #undef FOLD
1265 #undef CHAR
1266 #undef UCHAR
1267 #undef INT
1268 #undef FCT
1269 #undef EXT
1270 #undef END
1271 #undef STRUCT
1272 #undef MEMPCPY
1273 #undef MEMCHR
1274 #undef STRCOLL
1275 #undef STRLEN
1276 #undef STRCAT
1277 #undef L
1278 #undef BTOWC