Fix BZ 18036 buffer overflow (read past end of buffer) in internal_fnmatch
[glibc.git] / posix / fnmatch_loop.c
blobf46c9dfedb6e95cfcce1f21934894ebd049b2fe7
1 /* Copyright (C) 1991-2015 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, see
16 <http://www.gnu.org/licenses/>. */
18 #include <stdint.h>
20 struct STRUCT
22 const CHAR *pattern;
23 const CHAR *string;
24 int no_leading_period;
27 /* Match STRING against the filename pattern PATTERN, returning zero if
28 it matches, nonzero if not. */
29 static int FCT (const CHAR *pattern, const CHAR *string,
30 const CHAR *string_end, int no_leading_period, int flags,
31 struct STRUCT *ends, size_t alloca_used)
32 internal_function;
33 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
34 const CHAR *string_end, int no_leading_period, int flags,
35 size_t alloca_used)
36 internal_function;
37 static const CHAR *END (const CHAR *patternp) internal_function;
39 static int
40 internal_function
41 FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used)
42 const CHAR *pattern;
43 const CHAR *string;
44 const CHAR *string_end;
45 int no_leading_period;
46 int flags;
47 struct STRUCT *ends;
48 size_t alloca_used;
50 const CHAR *p = pattern, *n = string;
51 UCHAR c;
52 #ifdef _LIBC
53 # if WIDE_CHAR_VERSION
54 const char *collseq = (const char *)
55 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
56 # else
57 const UCHAR *collseq = (const UCHAR *)
58 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
59 # endif
60 #endif
62 while ((c = *p++) != L('\0'))
64 int new_no_leading_period = 0;
65 c = FOLD (c);
67 switch (c)
69 case L('?'):
70 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
72 int res = EXT (c, p, n, string_end, no_leading_period,
73 flags, alloca_used);
74 if (res != -1)
75 return res;
78 if (n == string_end)
79 return FNM_NOMATCH;
80 else if (*n == L('/') && (flags & FNM_FILE_NAME))
81 return FNM_NOMATCH;
82 else if (*n == L('.') && no_leading_period)
83 return FNM_NOMATCH;
84 break;
86 case L('\\'):
87 if (!(flags & FNM_NOESCAPE))
89 c = *p++;
90 if (c == L('\0'))
91 /* Trailing \ loses. */
92 return FNM_NOMATCH;
93 c = FOLD (c);
95 if (n == string_end || FOLD ((UCHAR) *n) != c)
96 return FNM_NOMATCH;
97 break;
99 case L('*'):
100 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
102 int res = EXT (c, p, n, string_end, no_leading_period,
103 flags, alloca_used);
104 if (res != -1)
105 return res;
107 else if (ends != NULL)
109 ends->pattern = p - 1;
110 ends->string = n;
111 ends->no_leading_period = no_leading_period;
112 return 0;
115 if (n != string_end && *n == L('.') && no_leading_period)
116 return FNM_NOMATCH;
118 for (c = *p++; c == L('?') || c == L('*'); c = *p++)
120 if (*p == L('(') && (flags & FNM_EXTMATCH) != 0)
122 const CHAR *endp = END (p);
123 if (endp != p)
125 /* This is a pattern. Skip over it. */
126 p = endp;
127 continue;
131 if (c == L('?'))
133 /* A ? needs to match one character. */
134 if (n == string_end)
135 /* There isn't another character; no match. */
136 return FNM_NOMATCH;
137 else if (*n == L('/')
138 && __builtin_expect (flags & FNM_FILE_NAME, 0))
139 /* A slash does not match a wildcard under
140 FNM_FILE_NAME. */
141 return FNM_NOMATCH;
142 else
143 /* One character of the string is consumed in matching
144 this ? wildcard, so *??? won't match if there are
145 less than three characters. */
146 ++n;
150 if (c == L('\0'))
151 /* The wildcard(s) is/are the last element of the pattern.
152 If the name is a file name and contains another slash
153 this means it cannot match, unless the FNM_LEADING_DIR
154 flag is set. */
156 int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
158 if (flags & FNM_FILE_NAME)
160 if (flags & FNM_LEADING_DIR)
161 result = 0;
162 else
164 if (MEMCHR (n, L('/'), string_end - n) == NULL)
165 result = 0;
169 return result;
171 else
173 const CHAR *endp;
174 struct STRUCT end;
176 end.pattern = NULL;
177 endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L('/') : L('\0'),
178 string_end - n);
179 if (endp == NULL)
180 endp = string_end;
182 if (c == L('[')
183 || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
184 && (c == L('@') || c == L('+') || c == L('!'))
185 && *p == L('(')))
187 int flags2 = ((flags & FNM_FILE_NAME)
188 ? flags : (flags & ~FNM_PERIOD));
190 for (--p; n < endp; ++n, no_leading_period = 0)
191 if (FCT (p, n, string_end, no_leading_period, flags2,
192 &end, alloca_used) == 0)
193 goto found;
195 else if (c == L('/') && (flags & FNM_FILE_NAME))
197 while (n < string_end && *n != L('/'))
198 ++n;
199 if (n < string_end && *n == L('/')
200 && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags,
201 NULL, alloca_used) == 0))
202 return 0;
204 else
206 int flags2 = ((flags & FNM_FILE_NAME)
207 ? flags : (flags & ~FNM_PERIOD));
209 if (c == L('\\') && !(flags & FNM_NOESCAPE))
210 c = *p;
211 c = FOLD (c);
212 for (--p; n < endp; ++n, no_leading_period = 0)
213 if (FOLD ((UCHAR) *n) == c
214 && (FCT (p, n, string_end, no_leading_period, flags2,
215 &end, alloca_used) == 0))
217 found:
218 if (end.pattern == NULL)
219 return 0;
220 break;
222 if (end.pattern != NULL)
224 p = end.pattern;
225 n = end.string;
226 no_leading_period = end.no_leading_period;
227 continue;
232 /* If we come here no match is possible with the wildcard. */
233 return FNM_NOMATCH;
235 case L('['):
237 /* Nonzero if the sense of the character class is inverted. */
238 const CHAR *p_init = p;
239 const CHAR *n_init = n;
240 int not;
241 CHAR cold;
242 UCHAR fn;
244 if (posixly_correct == 0)
245 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
247 if (n == string_end)
248 return FNM_NOMATCH;
250 if (*n == L('.') && no_leading_period)
251 return FNM_NOMATCH;
253 if (*n == L('/') && (flags & FNM_FILE_NAME))
254 /* `/' cannot be matched. */
255 return FNM_NOMATCH;
257 not = (*p == L('!') || (posixly_correct < 0 && *p == L('^')));
258 if (not)
259 ++p;
261 fn = FOLD ((UCHAR) *n);
263 c = *p++;
264 for (;;)
266 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
268 if (*p == L('\0'))
269 return FNM_NOMATCH;
270 c = FOLD ((UCHAR) *p);
271 ++p;
273 goto normal_bracket;
275 else if (c == L('[') && *p == L(':'))
277 /* Leave room for the null. */
278 CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
279 size_t c1 = 0;
280 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
281 wctype_t wt;
282 #endif
283 const CHAR *startp = p;
285 for (;;)
287 if (c1 == CHAR_CLASS_MAX_LENGTH)
288 /* The name is too long and therefore the pattern
289 is ill-formed. */
290 return FNM_NOMATCH;
292 c = *++p;
293 if (c == L(':') && p[1] == L(']'))
295 p += 2;
296 break;
298 if (c < L('a') || c >= L('z'))
300 /* This cannot possibly be a character class name.
301 Match it as a normal range. */
302 p = startp;
303 c = L('[');
304 goto normal_bracket;
306 str[c1++] = c;
308 str[c1] = L('\0');
310 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
311 wt = IS_CHAR_CLASS (str);
312 if (wt == 0)
313 /* Invalid character class name. */
314 return FNM_NOMATCH;
316 # if defined _LIBC && ! WIDE_CHAR_VERSION
317 /* The following code is glibc specific but does
318 there a good job in speeding up the code since
319 we can avoid the btowc() call. */
320 if (_ISCTYPE ((UCHAR) *n, wt))
321 goto matched;
322 # else
323 if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
324 goto matched;
325 # endif
326 #else
327 if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n))
328 || (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n))
329 || (STREQ (str, L("blank")) && ISBLANK ((UCHAR) *n))
330 || (STREQ (str, L("cntrl")) && ISCNTRL ((UCHAR) *n))
331 || (STREQ (str, L("digit")) && ISDIGIT ((UCHAR) *n))
332 || (STREQ (str, L("graph")) && ISGRAPH ((UCHAR) *n))
333 || (STREQ (str, L("lower")) && ISLOWER ((UCHAR) *n))
334 || (STREQ (str, L("print")) && ISPRINT ((UCHAR) *n))
335 || (STREQ (str, L("punct")) && ISPUNCT ((UCHAR) *n))
336 || (STREQ (str, L("space")) && ISSPACE ((UCHAR) *n))
337 || (STREQ (str, L("upper")) && ISUPPER ((UCHAR) *n))
338 || (STREQ (str, L("xdigit")) && ISXDIGIT ((UCHAR) *n)))
339 goto matched;
340 #endif
341 c = *p++;
343 #ifdef _LIBC
344 else if (c == L('[') && *p == L('='))
346 /* It's important that STR be a scalar variable rather
347 than a one-element array, because GCC (at least 4.9.2
348 -O2 on x86-64) can be confused by the array and
349 diagnose a "used initialized" in a dead branch in the
350 findidx function. */
351 UCHAR str;
352 uint32_t nrules =
353 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
354 const CHAR *startp = p;
356 c = *++p;
357 if (c == L('\0'))
359 p = startp;
360 c = L('[');
361 goto normal_bracket;
363 str = c;
365 c = *++p;
366 if (c != L('=') || p[1] != L(']'))
368 p = startp;
369 c = L('[');
370 goto normal_bracket;
372 p += 2;
374 if (nrules == 0)
376 if ((UCHAR) *n == str)
377 goto matched;
379 else
381 const int32_t *table;
382 # if WIDE_CHAR_VERSION
383 const int32_t *weights;
384 const wint_t *extra;
385 # else
386 const unsigned char *weights;
387 const unsigned char *extra;
388 # endif
389 const int32_t *indirect;
390 int32_t idx;
391 const UCHAR *cp = (const UCHAR *) &str;
393 # if WIDE_CHAR_VERSION
394 table = (const int32_t *)
395 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
396 weights = (const int32_t *)
397 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
398 extra = (const wint_t *)
399 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
400 indirect = (const int32_t *)
401 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
402 # else
403 table = (const int32_t *)
404 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
405 weights = (const unsigned char *)
406 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
407 extra = (const unsigned char *)
408 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
409 indirect = (const int32_t *)
410 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
411 # endif
413 idx = FINDIDX (table, indirect, extra, &cp, 1);
414 if (idx != 0)
416 /* We found a table entry. Now see whether the
417 character we are currently at has the same
418 equivalance class value. */
419 int len = weights[idx & 0xffffff];
420 int32_t idx2;
421 const UCHAR *np = (const UCHAR *) n;
423 idx2 = FINDIDX (table, indirect, extra,
424 &np, string_end - n);
425 if (idx2 != 0
426 && (idx >> 24) == (idx2 >> 24)
427 && len == weights[idx2 & 0xffffff])
429 int cnt = 0;
431 idx &= 0xffffff;
432 idx2 &= 0xffffff;
434 while (cnt < len
435 && (weights[idx + 1 + cnt]
436 == weights[idx2 + 1 + cnt]))
437 ++cnt;
439 if (cnt == len)
440 goto matched;
445 c = *p++;
447 #endif
448 else if (c == L('\0'))
450 /* [ unterminated, treat as normal character. */
451 p = p_init;
452 n = n_init;
453 c = L('[');
454 goto normal_match;
456 else
458 int is_range = 0;
460 #ifdef _LIBC
461 int is_seqval = 0;
463 if (c == L('[') && *p == L('.'))
465 uint32_t nrules =
466 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
467 const CHAR *startp = p;
468 size_t c1 = 0;
470 while (1)
472 c = *++p;
473 if (c == L('.') && p[1] == L(']'))
475 p += 2;
476 break;
478 if (c == '\0')
479 return FNM_NOMATCH;
480 ++c1;
483 /* We have to handling the symbols differently in
484 ranges since then the collation sequence is
485 important. */
486 is_range = *p == L('-') && p[1] != L('\0');
488 if (nrules == 0)
490 /* There are no names defined in the collation
491 data. Therefore we only accept the trivial
492 names consisting of the character itself. */
493 if (c1 != 1)
494 return FNM_NOMATCH;
496 if (!is_range && *n == startp[1])
497 goto matched;
499 cold = startp[1];
500 c = *p++;
502 else
504 int32_t table_size;
505 const int32_t *symb_table;
506 # if WIDE_CHAR_VERSION
507 char str[c1];
508 unsigned int strcnt;
509 # else
510 # define str (startp + 1)
511 # endif
512 const unsigned char *extra;
513 int32_t idx;
514 int32_t elem;
515 int32_t second;
516 int32_t hash;
518 # if WIDE_CHAR_VERSION
519 /* We have to convert the name to a single-byte
520 string. This is possible since the names
521 consist of ASCII characters and the internal
522 representation is UCS4. */
523 for (strcnt = 0; strcnt < c1; ++strcnt)
524 str[strcnt] = startp[1 + strcnt];
525 #endif
527 table_size =
528 _NL_CURRENT_WORD (LC_COLLATE,
529 _NL_COLLATE_SYMB_HASH_SIZEMB);
530 symb_table = (const int32_t *)
531 _NL_CURRENT (LC_COLLATE,
532 _NL_COLLATE_SYMB_TABLEMB);
533 extra = (const unsigned char *)
534 _NL_CURRENT (LC_COLLATE,
535 _NL_COLLATE_SYMB_EXTRAMB);
537 /* Locate the character in the hashing table. */
538 hash = elem_hash (str, c1);
540 idx = 0;
541 elem = hash % table_size;
542 if (symb_table[2 * elem] != 0)
544 second = hash % (table_size - 2) + 1;
548 /* First compare the hashing value. */
549 if (symb_table[2 * elem] == hash
550 && (c1
551 == extra[symb_table[2 * elem + 1]])
552 && memcmp (str,
553 &extra[symb_table[2 * elem
554 + 1]
555 + 1], c1) == 0)
557 /* Yep, this is the entry. */
558 idx = symb_table[2 * elem + 1];
559 idx += 1 + extra[idx];
560 break;
563 /* Next entry. */
564 elem += second;
566 while (symb_table[2 * elem] != 0);
569 if (symb_table[2 * elem] != 0)
571 /* Compare the byte sequence but only if
572 this is not part of a range. */
573 # if WIDE_CHAR_VERSION
574 int32_t *wextra;
576 idx += 1 + extra[idx];
577 /* Adjust for the alignment. */
578 idx = (idx + 3) & ~3;
580 wextra = (int32_t *) &extra[idx + 4];
581 # endif
583 if (! is_range)
585 # if WIDE_CHAR_VERSION
586 for (c1 = 0;
587 (int32_t) c1 < wextra[idx];
588 ++c1)
589 if (n[c1] != wextra[1 + c1])
590 break;
592 if ((int32_t) c1 == wextra[idx])
593 goto matched;
594 # else
595 for (c1 = 0; c1 < extra[idx]; ++c1)
596 if (n[c1] != extra[1 + c1])
597 break;
599 if (c1 == extra[idx])
600 goto matched;
601 # endif
604 /* Get the collation sequence value. */
605 is_seqval = 1;
606 # if WIDE_CHAR_VERSION
607 cold = wextra[1 + wextra[idx]];
608 # else
609 /* Adjust for the alignment. */
610 idx += 1 + extra[idx];
611 idx = (idx + 3) & ~4;
612 cold = *((int32_t *) &extra[idx]);
613 # endif
615 c = *p++;
617 else if (c1 == 1)
619 /* No valid character. Match it as a
620 single byte. */
621 if (!is_range && *n == str[0])
622 goto matched;
624 cold = str[0];
625 c = *p++;
627 else
628 return FNM_NOMATCH;
631 else
632 # undef str
633 #endif
635 c = FOLD (c);
636 normal_bracket:
638 /* We have to handling the symbols differently in
639 ranges since then the collation sequence is
640 important. */
641 is_range = (*p == L('-') && p[1] != L('\0')
642 && p[1] != L(']'));
644 if (!is_range && c == fn)
645 goto matched;
647 /* This is needed if we goto normal_bracket; from
648 outside of is_seqval's scope. */
649 is_seqval = 0;
650 cold = c;
651 c = *p++;
654 if (c == L('-') && *p != L(']'))
656 #if _LIBC
657 /* We have to find the collation sequence
658 value for C. Collation sequence is nothing
659 we can regularly access. The sequence
660 value is defined by the order in which the
661 definitions of the collation values for the
662 various characters appear in the source
663 file. A strange concept, nowhere
664 documented. */
665 uint32_t fcollseq;
666 uint32_t lcollseq;
667 UCHAR cend = *p++;
669 # if WIDE_CHAR_VERSION
670 /* Search in the `names' array for the characters. */
671 fcollseq = __collseq_table_lookup (collseq, fn);
672 if (fcollseq == ~((uint32_t) 0))
673 /* XXX We don't know anything about the character
674 we are supposed to match. This means we are
675 failing. */
676 goto range_not_matched;
678 if (is_seqval)
679 lcollseq = cold;
680 else
681 lcollseq = __collseq_table_lookup (collseq, cold);
682 # else
683 fcollseq = collseq[fn];
684 lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
685 # endif
687 is_seqval = 0;
688 if (cend == L('[') && *p == L('.'))
690 uint32_t nrules =
691 _NL_CURRENT_WORD (LC_COLLATE,
692 _NL_COLLATE_NRULES);
693 const CHAR *startp = p;
694 size_t c1 = 0;
696 while (1)
698 c = *++p;
699 if (c == L('.') && p[1] == L(']'))
701 p += 2;
702 break;
704 if (c == '\0')
705 return FNM_NOMATCH;
706 ++c1;
709 if (nrules == 0)
711 /* There are no names defined in the
712 collation data. Therefore we only
713 accept the trivial names consisting
714 of the character itself. */
715 if (c1 != 1)
716 return FNM_NOMATCH;
718 cend = startp[1];
720 else
722 int32_t table_size;
723 const int32_t *symb_table;
724 # if WIDE_CHAR_VERSION
725 char str[c1];
726 unsigned int strcnt;
727 # else
728 # define str (startp + 1)
729 # endif
730 const unsigned char *extra;
731 int32_t idx;
732 int32_t elem;
733 int32_t second;
734 int32_t hash;
736 # if WIDE_CHAR_VERSION
737 /* We have to convert the name to a single-byte
738 string. This is possible since the names
739 consist of ASCII characters and the internal
740 representation is UCS4. */
741 for (strcnt = 0; strcnt < c1; ++strcnt)
742 str[strcnt] = startp[1 + strcnt];
743 # endif
745 table_size =
746 _NL_CURRENT_WORD (LC_COLLATE,
747 _NL_COLLATE_SYMB_HASH_SIZEMB);
748 symb_table = (const int32_t *)
749 _NL_CURRENT (LC_COLLATE,
750 _NL_COLLATE_SYMB_TABLEMB);
751 extra = (const unsigned char *)
752 _NL_CURRENT (LC_COLLATE,
753 _NL_COLLATE_SYMB_EXTRAMB);
755 /* Locate the character in the hashing
756 table. */
757 hash = elem_hash (str, c1);
759 idx = 0;
760 elem = hash % table_size;
761 if (symb_table[2 * elem] != 0)
763 second = hash % (table_size - 2) + 1;
767 /* First compare the hashing value. */
768 if (symb_table[2 * elem] == hash
769 && (c1
770 == extra[symb_table[2 * elem + 1]])
771 && memcmp (str,
772 &extra[symb_table[2 * elem + 1]
773 + 1], c1) == 0)
775 /* Yep, this is the entry. */
776 idx = symb_table[2 * elem + 1];
777 idx += 1 + extra[idx];
778 break;
781 /* Next entry. */
782 elem += second;
784 while (symb_table[2 * elem] != 0);
787 if (symb_table[2 * elem] != 0)
789 /* Compare the byte sequence but only if
790 this is not part of a range. */
791 # if WIDE_CHAR_VERSION
792 int32_t *wextra;
794 idx += 1 + extra[idx];
795 /* Adjust for the alignment. */
796 idx = (idx + 3) & ~4;
798 wextra = (int32_t *) &extra[idx + 4];
799 # endif
800 /* Get the collation sequence value. */
801 is_seqval = 1;
802 # if WIDE_CHAR_VERSION
803 cend = wextra[1 + wextra[idx]];
804 # else
805 /* Adjust for the alignment. */
806 idx += 1 + extra[idx];
807 idx = (idx + 3) & ~4;
808 cend = *((int32_t *) &extra[idx]);
809 # endif
811 else if (symb_table[2 * elem] != 0 && c1 == 1)
813 cend = str[0];
814 c = *p++;
816 else
817 return FNM_NOMATCH;
819 # undef str
821 else
823 if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
824 cend = *p++;
825 if (cend == L('\0'))
826 return FNM_NOMATCH;
827 cend = FOLD (cend);
830 /* XXX It is not entirely clear to me how to handle
831 characters which are not mentioned in the
832 collation specification. */
833 if (
834 # if WIDE_CHAR_VERSION
835 lcollseq == 0xffffffff ||
836 # endif
837 lcollseq <= fcollseq)
839 /* We have to look at the upper bound. */
840 uint32_t hcollseq;
842 if (is_seqval)
843 hcollseq = cend;
844 else
846 # if WIDE_CHAR_VERSION
847 hcollseq =
848 __collseq_table_lookup (collseq, cend);
849 if (hcollseq == ~((uint32_t) 0))
851 /* Hum, no information about the upper
852 bound. The matching succeeds if the
853 lower bound is matched exactly. */
854 if (lcollseq != fcollseq)
855 goto range_not_matched;
857 goto matched;
859 # else
860 hcollseq = collseq[cend];
861 # endif
864 if (lcollseq <= hcollseq && fcollseq <= hcollseq)
865 goto matched;
867 # if WIDE_CHAR_VERSION
868 range_not_matched:
869 # endif
870 #else
871 /* We use a boring value comparison of the character
872 values. This is better than comparing using
873 `strcoll' since the latter would have surprising
874 and sometimes fatal consequences. */
875 UCHAR cend = *p++;
877 if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
878 cend = *p++;
879 if (cend == L('\0'))
880 return FNM_NOMATCH;
882 /* It is a range. */
883 if (cold <= fn && fn <= cend)
884 goto matched;
885 #endif
887 c = *p++;
891 if (c == L(']'))
892 break;
895 if (!not)
896 return FNM_NOMATCH;
897 break;
899 matched:
900 /* Skip the rest of the [...] that already matched. */
901 while ((c = *p++) != L (']'))
903 if (c == L('\0'))
904 /* [... (unterminated) loses. */
905 return FNM_NOMATCH;
907 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
909 if (*p == L('\0'))
910 return FNM_NOMATCH;
911 /* XXX 1003.2d11 is unclear if this is right. */
912 ++p;
914 else if (c == L('[') && *p == L(':'))
916 int c1 = 0;
917 const CHAR *startp = p;
919 while (1)
921 c = *++p;
922 if (++c1 == CHAR_CLASS_MAX_LENGTH)
923 return FNM_NOMATCH;
925 if (*p == L(':') && p[1] == L(']'))
926 break;
928 if (c < L('a') || c >= L('z'))
930 p = startp - 2;
931 break;
934 p += 2;
936 else if (c == L('[') && *p == L('='))
938 c = *++p;
939 if (c == L('\0'))
940 return FNM_NOMATCH;
941 c = *++p;
942 if (c != L('=') || p[1] != L(']'))
943 return FNM_NOMATCH;
944 p += 2;
946 else if (c == L('[') && *p == L('.'))
948 while (1)
950 c = *++p;
951 if (c == L('\0'))
952 return FNM_NOMATCH;
954 if (c == L('.') && p[1] == L(']'))
955 break;
957 p += 2;
960 if (not)
961 return FNM_NOMATCH;
963 break;
965 case L('+'):
966 case L('@'):
967 case L('!'):
968 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
970 int res = EXT (c, p, n, string_end, no_leading_period, flags,
971 alloca_used);
972 if (res != -1)
973 return res;
975 goto normal_match;
977 case L('/'):
978 if (NO_LEADING_PERIOD (flags))
980 if (n == string_end || c != (UCHAR) *n)
981 return FNM_NOMATCH;
983 new_no_leading_period = 1;
984 break;
986 /* FALLTHROUGH */
987 default:
988 normal_match:
989 if (n == string_end || c != FOLD ((UCHAR) *n))
990 return FNM_NOMATCH;
993 no_leading_period = new_no_leading_period;
994 ++n;
997 if (n == string_end)
998 return 0;
1000 if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L('/'))
1001 /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */
1002 return 0;
1004 return FNM_NOMATCH;
1008 static const CHAR *
1009 internal_function
1010 END (const CHAR *pattern)
1012 const CHAR *p = pattern;
1014 while (1)
1015 if (*++p == L('\0'))
1016 /* This is an invalid pattern. */
1017 return pattern;
1018 else if (*p == L('['))
1020 /* Handle brackets special. */
1021 if (posixly_correct == 0)
1022 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1024 /* Skip the not sign. We have to recognize it because of a possibly
1025 following ']'. */
1026 if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1027 ++p;
1028 /* A leading ']' is recognized as such. */
1029 if (*p == L(']'))
1030 ++p;
1031 /* Skip over all characters of the list. */
1032 while (*p != L(']'))
1033 if (*p++ == L('\0'))
1034 /* This is no valid pattern. */
1035 return pattern;
1037 else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1038 || *p == L('!')) && p[1] == L('('))
1040 p = END (p + 1);
1041 if (*p == L('\0'))
1042 /* This is an invalid pattern. */
1043 return pattern;
1045 else if (*p == L(')'))
1046 break;
1048 return p + 1;
1052 static int
1053 internal_function
1054 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
1055 int no_leading_period, int flags, size_t alloca_used)
1057 const CHAR *startp;
1058 int level;
1059 struct patternlist
1061 struct patternlist *next;
1062 CHAR malloced;
1063 CHAR str[0];
1064 } *list = NULL;
1065 struct patternlist **lastp = &list;
1066 size_t pattern_len = STRLEN (pattern);
1067 int any_malloced = 0;
1068 const CHAR *p;
1069 const CHAR *rs;
1070 int retval = 0;
1072 /* Parse the pattern. Store the individual parts in the list. */
1073 level = 0;
1074 for (startp = p = pattern + 1; level >= 0; ++p)
1075 if (*p == L('\0'))
1077 /* This is an invalid pattern. */
1078 retval = -1;
1079 goto out;
1081 else if (*p == L('['))
1083 /* Handle brackets special. */
1084 if (posixly_correct == 0)
1085 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1087 /* Skip the not sign. We have to recognize it because of a possibly
1088 following ']'. */
1089 if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1090 ++p;
1091 /* A leading ']' is recognized as such. */
1092 if (*p == L(']'))
1093 ++p;
1094 /* Skip over all characters of the list. */
1095 while (*p != L(']'))
1096 if (*p++ == L('\0'))
1098 /* This is no valid pattern. */
1099 retval = -1;
1100 goto out;
1103 else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1104 || *p == L('!')) && p[1] == L('('))
1105 /* Remember the nesting level. */
1106 ++level;
1107 else if (*p == L(')'))
1109 if (level-- == 0)
1111 /* This means we found the end of the pattern. */
1112 #define NEW_PATTERN \
1113 struct patternlist *newp; \
1114 size_t slen = (opt == L('?') || opt == L('@') \
1115 ? pattern_len : (p - startp + 1)); \
1116 slen = sizeof (struct patternlist) + (slen * sizeof (CHAR)); \
1117 int malloced = ! __libc_use_alloca (alloca_used + slen); \
1118 if (__builtin_expect (malloced, 0)) \
1120 newp = malloc (slen); \
1121 if (newp == NULL) \
1123 retval = -2; \
1124 goto out; \
1126 any_malloced = 1; \
1128 else \
1129 newp = alloca_account (slen, alloca_used); \
1130 newp->next = NULL; \
1131 newp->malloced = malloced; \
1132 *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L('\0'); \
1133 *lastp = newp; \
1134 lastp = &newp->next
1135 NEW_PATTERN;
1138 else if (*p == L('|'))
1140 if (level == 0)
1142 NEW_PATTERN;
1143 startp = p + 1;
1146 assert (list != NULL);
1147 assert (p[-1] == L(')'));
1148 #undef NEW_PATTERN
1150 switch (opt)
1152 case L('*'):
1153 if (FCT (p, string, string_end, no_leading_period, flags, NULL,
1154 alloca_used) == 0)
1155 goto success;
1156 /* FALLTHROUGH */
1158 case L('+'):
1161 for (rs = string; rs <= string_end; ++rs)
1162 /* First match the prefix with the current pattern with the
1163 current pattern. */
1164 if (FCT (list->str, string, rs, no_leading_period,
1165 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1166 NULL, alloca_used) == 0
1167 /* This was successful. Now match the rest with the rest
1168 of the pattern. */
1169 && (FCT (p, rs, string_end,
1170 rs == string
1171 ? no_leading_period
1172 : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1173 flags & FNM_FILE_NAME
1174 ? flags : flags & ~FNM_PERIOD, NULL, alloca_used) == 0
1175 /* This didn't work. Try the whole pattern. */
1176 || (rs != string
1177 && FCT (pattern - 1, rs, string_end,
1178 rs == string
1179 ? no_leading_period
1180 : (rs[-1] == '/' && NO_LEADING_PERIOD (flags)
1181 ? 1 : 0),
1182 flags & FNM_FILE_NAME
1183 ? flags : flags & ~FNM_PERIOD, NULL,
1184 alloca_used) == 0)))
1185 /* It worked. Signal success. */
1186 goto success;
1188 while ((list = list->next) != NULL);
1190 /* None of the patterns lead to a match. */
1191 retval = FNM_NOMATCH;
1192 break;
1194 case L('?'):
1195 if (FCT (p, string, string_end, no_leading_period, flags, NULL,
1196 alloca_used) == 0)
1197 goto success;
1198 /* FALLTHROUGH */
1200 case L('@'):
1202 /* I cannot believe it but `strcat' is actually acceptable
1203 here. Match the entire string with the prefix from the
1204 pattern list and the rest of the pattern following the
1205 pattern list. */
1206 if (FCT (STRCAT (list->str, p), string, string_end,
1207 no_leading_period,
1208 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1209 NULL, alloca_used) == 0)
1210 /* It worked. Signal success. */
1211 goto success;
1212 while ((list = list->next) != NULL);
1214 /* None of the patterns lead to a match. */
1215 retval = FNM_NOMATCH;
1216 break;
1218 case L('!'):
1219 for (rs = string; rs <= string_end; ++rs)
1221 struct patternlist *runp;
1223 for (runp = list; runp != NULL; runp = runp->next)
1224 if (FCT (runp->str, string, rs, no_leading_period,
1225 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1226 NULL, alloca_used) == 0)
1227 break;
1229 /* If none of the patterns matched see whether the rest does. */
1230 if (runp == NULL
1231 && (FCT (p, rs, string_end,
1232 rs == string
1233 ? no_leading_period
1234 : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1235 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1236 NULL, alloca_used) == 0))
1237 /* This is successful. */
1238 goto success;
1241 /* None of the patterns together with the rest of the pattern
1242 lead to a match. */
1243 retval = FNM_NOMATCH;
1244 break;
1246 default:
1247 assert (! "Invalid extended matching operator");
1248 retval = -1;
1249 break;
1252 success:
1253 out:
1254 if (any_malloced)
1255 while (list != NULL)
1257 struct patternlist *old = list;
1258 list = list->next;
1259 if (old->malloced)
1260 free (old);
1263 return retval;
1267 #undef FOLD
1268 #undef CHAR
1269 #undef UCHAR
1270 #undef INT
1271 #undef FCT
1272 #undef EXT
1273 #undef END
1274 #undef STRUCT
1275 #undef MEMPCPY
1276 #undef MEMCHR
1277 #undef STRCOLL
1278 #undef STRLEN
1279 #undef STRCAT
1280 #undef L
1281 #undef BTOWC
1282 #undef WIDE_CHAR_VERSION
1283 #undef FINDIDX