benchtests: Improve benchtests for strstr
[glibc.git] / posix / fnmatch_loop.c
blob6994a09ab52feeac384ab036bb5bbc9de36293c7
1 /* Copyright (C) 1991-2024 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, see
16 <https://www.gnu.org/licenses/>. */
18 #ifdef _LIBC
19 # include <stdint.h>
20 #endif
22 struct STRUCT
24 const CHAR *pattern;
25 const CHAR *string;
26 bool no_leading_period;
29 /* Match STRING against the file name pattern PATTERN, returning zero if
30 it matches, nonzero if not. */
31 static int FCT (const CHAR *pattern, const CHAR *string,
32 const CHAR *string_end, bool no_leading_period, int flags,
33 struct STRUCT *ends);
34 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
35 const CHAR *string_end, bool no_leading_period, int flags);
36 static const CHAR *END (const CHAR *patternp);
38 static int
39 FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
40 bool no_leading_period, int flags, struct STRUCT *ends)
42 const CHAR *p = pattern, *n = string;
43 UCHAR c;
44 #ifdef _LIBC
45 # if WIDE_CHAR_VERSION
46 const char *collseq = (const char *)
47 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
48 # else
49 const UCHAR *collseq = (const UCHAR *)
50 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
51 # endif
52 #endif
54 while ((c = *p++) != L_('\0'))
56 bool new_no_leading_period = false;
57 c = FOLD (c);
59 switch (c)
61 case L_('?'):
62 if (__glibc_unlikely (flags & FNM_EXTMATCH) && *p == '(')
64 int res = EXT (c, p, n, string_end, no_leading_period, flags);
65 if (res != -1)
66 return res;
69 if (n == string_end)
70 return FNM_NOMATCH;
71 else if (*n == L_('/') && (flags & FNM_FILE_NAME))
72 return FNM_NOMATCH;
73 else if (*n == L_('.') && no_leading_period)
74 return FNM_NOMATCH;
75 break;
77 case L_('\\'):
78 if (!(flags & FNM_NOESCAPE))
80 c = *p++;
81 if (c == L_('\0'))
82 /* Trailing \ loses. */
83 return FNM_NOMATCH;
84 c = FOLD (c);
86 if (n == string_end || FOLD ((UCHAR) *n) != c)
87 return FNM_NOMATCH;
88 break;
90 case L_('*'):
91 if (__glibc_unlikely (flags & FNM_EXTMATCH) && *p == '(')
93 int res = EXT (c, p, n, string_end, no_leading_period, flags);
94 if (res != -1)
95 return res;
97 else if (ends != NULL)
99 ends->pattern = p - 1;
100 ends->string = n;
101 ends->no_leading_period = no_leading_period;
102 return 0;
105 if (n != string_end && *n == L_('.') && no_leading_period)
106 return FNM_NOMATCH;
108 for (c = *p++; c == L_('?') || c == L_('*'); c = *p++)
110 if (*p == L_('(') && (flags & FNM_EXTMATCH) != 0)
112 const CHAR *endp = END (p);
113 if (endp != p)
115 /* This is a pattern. Skip over it. */
116 p = endp;
117 continue;
121 if (c == L_('?'))
123 /* A ? needs to match one character. */
124 if (n == string_end)
125 /* There isn't another character; no match. */
126 return FNM_NOMATCH;
127 else if (*n == L_('/')
128 && __glibc_unlikely (flags & FNM_FILE_NAME))
129 /* A slash does not match a wildcard under
130 FNM_FILE_NAME. */
131 return FNM_NOMATCH;
132 else
133 /* One character of the string is consumed in matching
134 this ? wildcard, so *??? won't match if there are
135 less than three characters. */
136 ++n;
140 if (c == L_('\0'))
141 /* The wildcard(s) is/are the last element of the pattern.
142 If the name is a file name and contains another slash
143 this means it cannot match, unless the FNM_LEADING_DIR
144 flag is set. */
146 int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
148 if (flags & FNM_FILE_NAME)
150 if (flags & FNM_LEADING_DIR)
151 result = 0;
152 else
154 if (MEMCHR (n, L_('/'), string_end - n) == NULL)
155 result = 0;
159 return result;
161 else
163 const CHAR *endp;
164 struct STRUCT end;
166 end.pattern = NULL;
167 endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L_('/') : L_('\0'),
168 string_end - n);
169 if (endp == NULL)
170 endp = string_end;
172 if (c == L_('[')
173 || (__glibc_unlikely (flags & FNM_EXTMATCH)
174 && (c == L_('@') || c == L_('+') || c == L_('!'))
175 && *p == L_('(')))
177 int flags2 = ((flags & FNM_FILE_NAME)
178 ? flags : (flags & ~FNM_PERIOD));
180 for (--p; n < endp; ++n, no_leading_period = false)
181 if (FCT (p, n, string_end, no_leading_period, flags2,
182 &end) == 0)
183 goto found;
185 else if (c == L_('/') && (flags & FNM_FILE_NAME))
187 while (n < string_end && *n != L_('/'))
188 ++n;
189 if (n < string_end && *n == L_('/')
190 && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags,
191 NULL) == 0))
192 return 0;
194 else
196 int flags2 = ((flags & FNM_FILE_NAME)
197 ? flags : (flags & ~FNM_PERIOD));
199 if (c == L_('\\') && !(flags & FNM_NOESCAPE))
200 c = *p;
201 c = FOLD (c);
202 for (--p; n < endp; ++n, no_leading_period = false)
203 if (FOLD ((UCHAR) *n) == c
204 && (FCT (p, n, string_end, no_leading_period, flags2,
205 &end) == 0))
207 found:
208 if (end.pattern == NULL)
209 return 0;
210 break;
212 if (end.pattern != NULL)
214 p = end.pattern;
215 n = end.string;
216 no_leading_period = end.no_leading_period;
217 continue;
222 /* If we come here no match is possible with the wildcard. */
223 return FNM_NOMATCH;
225 case L_('['):
227 /* Nonzero if the sense of the character class is inverted. */
228 const CHAR *p_init = p;
229 const CHAR *n_init = n;
230 bool not;
231 CHAR cold;
232 UCHAR fn;
234 if (posixly_correct == 0)
235 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
237 if (n == string_end)
238 return FNM_NOMATCH;
240 if (*n == L_('.') && no_leading_period)
241 return FNM_NOMATCH;
243 if (*n == L_('/') && (flags & FNM_FILE_NAME))
244 /* '/' cannot be matched. */
245 return FNM_NOMATCH;
247 not = (*p == L_('!') || (posixly_correct < 0 && *p == L_('^')));
248 if (not)
249 ++p;
251 fn = FOLD ((UCHAR) *n);
253 c = *p++;
254 for (;;)
256 if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
258 if (*p == L_('\0'))
259 return FNM_NOMATCH;
260 c = FOLD ((UCHAR) *p);
261 ++p;
263 goto normal_bracket;
265 else if (c == L_('[') && *p == L_(':'))
267 /* Leave room for the null. */
268 CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
269 size_t c1 = 0;
270 wctype_t wt;
271 const CHAR *startp = p;
273 for (;;)
275 if (c1 == CHAR_CLASS_MAX_LENGTH)
276 /* The name is too long and therefore the pattern
277 is ill-formed. */
278 return FNM_NOMATCH;
280 c = *++p;
281 if (c == L_(':') && p[1] == L_(']'))
283 p += 2;
284 break;
286 if (c < L_('a') || c >= L_('z'))
288 /* This cannot possibly be a character class name.
289 Match it as a normal range. */
290 p = startp;
291 c = L_('[');
292 goto normal_bracket;
294 str[c1++] = c;
296 str[c1] = L_('\0');
298 wt = IS_CHAR_CLASS (str);
299 if (wt == 0)
300 /* Invalid character class name. */
301 return FNM_NOMATCH;
303 #if defined _LIBC && ! WIDE_CHAR_VERSION
304 /* The following code is glibc specific but does
305 there a good job in speeding up the code since
306 we can avoid the btowc() call. */
307 if (_ISCTYPE ((UCHAR) *n, wt))
308 goto matched;
309 #else
310 if (iswctype (BTOWC ((UCHAR) *n), wt))
311 goto matched;
312 #endif
313 c = *p++;
315 #ifdef _LIBC
316 else if (c == L_('[') && *p == L_('='))
318 /* It's important that STR be a scalar variable rather
319 than a one-element array, because GCC (at least 4.9.2
320 -O2 on x86-64) can be confused by the array and
321 diagnose a "used initialized" in a dead branch in the
322 findidx function. */
323 UCHAR str;
324 uint32_t nrules =
325 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
326 const CHAR *startp = p;
328 c = *++p;
329 if (c == L_('\0'))
331 p = startp;
332 c = L_('[');
333 goto normal_bracket;
335 str = c;
337 c = *++p;
338 if (c != L_('=') || p[1] != L_(']'))
340 p = startp;
341 c = L_('[');
342 goto normal_bracket;
344 p += 2;
346 if (nrules == 0)
348 if ((UCHAR) *n == str)
349 goto matched;
351 else
353 const int32_t *table;
354 # if WIDE_CHAR_VERSION
355 const int32_t *weights;
356 const wint_t *extra;
357 # else
358 const unsigned char *weights;
359 const unsigned char *extra;
360 # endif
361 const int32_t *indirect;
362 int32_t idx;
363 const UCHAR *cp = (const UCHAR *) &str;
365 # if WIDE_CHAR_VERSION
366 table = (const int32_t *)
367 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
368 weights = (const int32_t *)
369 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
370 extra = (const wint_t *)
371 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
372 indirect = (const int32_t *)
373 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
374 # else
375 table = (const int32_t *)
376 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
377 weights = (const unsigned char *)
378 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
379 extra = (const unsigned char *)
380 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
381 indirect = (const int32_t *)
382 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
383 # endif
385 idx = FINDIDX (table, indirect, extra, &cp, 1);
386 if (idx != 0)
388 /* We found a table entry. Now see whether the
389 character we are currently at has the same
390 equivalence class value. */
391 int len = weights[idx & 0xffffff];
392 int32_t idx2;
393 const UCHAR *np = (const UCHAR *) n;
395 idx2 = FINDIDX (table, indirect, extra,
396 &np, string_end - n);
397 if (idx2 != 0
398 && (idx >> 24) == (idx2 >> 24)
399 && len == weights[idx2 & 0xffffff])
401 int cnt = 0;
403 idx &= 0xffffff;
404 idx2 &= 0xffffff;
406 while (cnt < len
407 && (weights[idx + 1 + cnt]
408 == weights[idx2 + 1 + cnt]))
409 ++cnt;
411 if (cnt == len)
412 goto matched;
417 c = *p++;
419 #endif
420 else if (c == L_('\0'))
422 /* [ unterminated, treat as normal character. */
423 p = p_init;
424 n = n_init;
425 c = L_('[');
426 goto normal_match;
428 else
430 bool is_range = false;
432 #ifdef _LIBC
433 bool is_seqval = false;
435 if (c == L_('[') && *p == L_('.'))
437 uint32_t nrules =
438 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
439 const CHAR *startp = p;
440 size_t c1 = 0;
442 while (1)
444 c = *++p;
445 if (c == L_('.') && p[1] == L_(']'))
447 p += 2;
448 break;
450 if (c == '\0')
451 return FNM_NOMATCH;
452 ++c1;
455 /* We have to handling the symbols differently in
456 ranges since then the collation sequence is
457 important. */
458 is_range = *p == L_('-') && p[1] != L_('\0');
460 if (nrules == 0)
462 /* There are no names defined in the collation
463 data. Therefore we only accept the trivial
464 names consisting of the character itself. */
465 if (c1 != 1)
466 return FNM_NOMATCH;
468 if (!is_range && *n == startp[1])
469 goto matched;
471 cold = startp[1];
472 c = *p++;
474 else
476 int32_t table_size;
477 const int32_t *symb_table;
478 const unsigned char *extra;
479 int32_t idx;
480 int32_t elem;
481 # if WIDE_CHAR_VERSION
482 CHAR *wextra;
483 # endif
485 table_size =
486 _NL_CURRENT_WORD (LC_COLLATE,
487 _NL_COLLATE_SYMB_HASH_SIZEMB);
488 symb_table = (const int32_t *)
489 _NL_CURRENT (LC_COLLATE,
490 _NL_COLLATE_SYMB_TABLEMB);
491 extra = (const unsigned char *)
492 _NL_CURRENT (LC_COLLATE,
493 _NL_COLLATE_SYMB_EXTRAMB);
495 for (elem = 0; elem < table_size; elem++)
496 if (symb_table[2 * elem] != 0)
498 idx = symb_table[2 * elem + 1];
499 /* Skip the name of collating element. */
500 idx += 1 + extra[idx];
501 # if WIDE_CHAR_VERSION
502 /* Skip the byte sequence of the
503 collating element. */
504 idx += 1 + extra[idx];
505 /* Adjust for the alignment. */
506 idx = (idx + 3) & ~3;
508 wextra = (CHAR *) &extra[idx + 4];
510 if (/* Compare the length of the sequence. */
511 c1 == wextra[0]
512 /* Compare the wide char sequence. */
513 && (__wmemcmp (startp + 1, &wextra[1],
515 == 0))
516 /* Yep, this is the entry. */
517 break;
518 # else
519 if (/* Compare the length of the sequence. */
520 c1 == extra[idx]
521 /* Compare the byte sequence. */
522 && memcmp (startp + 1,
523 &extra[idx + 1], c1) == 0)
524 /* Yep, this is the entry. */
525 break;
526 # endif
529 if (elem < table_size)
531 /* Compare the byte sequence but only if
532 this is not part of a range. */
534 /* The compiler might warn that idx may be
535 used uninitialized, however it will be
536 reached iff elem < table_size which means
537 that it was properly set in the loop
538 above. */
539 DIAG_PUSH_NEEDS_COMMENT;
540 DIAG_IGNORE_Os_NEEDS_COMMENT (8, "-Wmaybe-uninitialized");
541 if (! is_range
543 # if WIDE_CHAR_VERSION
544 && __wmemcmp (n, &wextra[1], c1) == 0
545 # else
546 && memcmp (n, &extra[idx + 1], c1) == 0
547 # endif
550 n += c1 - 1;
551 goto matched;
553 DIAG_POP_NEEDS_COMMENT;
555 /* Get the collation sequence value. */
556 is_seqval = true;
557 # if WIDE_CHAR_VERSION
558 /* The compile might warn that wextra may be
559 used uninitialized and similar to 'idx'
560 above it will be properly set by the loop.
562 DIAG_PUSH_NEEDS_COMMENT;
563 DIAG_IGNORE_Os_NEEDS_COMMENT (8, "-Wmaybe-uninitialized");
564 cold = wextra[1 + wextra[0]];
565 DIAG_POP_NEEDS_COMMENT;
566 # else
567 idx += 1 + extra[idx];
568 /* Adjust for the alignment. */
569 idx = (idx + 3) & ~3;
570 cold = *((int32_t *) &extra[idx]);
571 # endif
573 c = *p++;
575 else if (c1 == 1)
577 /* No valid character. Match it as a
578 single byte. */
579 if (!is_range && *n == startp[1])
580 goto matched;
582 cold = startp[1];
583 c = *p++;
585 else
586 return FNM_NOMATCH;
589 else
590 #endif
592 c = FOLD (c);
593 normal_bracket:
595 /* We have to handling the symbols differently in
596 ranges since then the collation sequence is
597 important. */
598 is_range = (*p == L_('-') && p[1] != L_('\0')
599 && p[1] != L_(']'));
601 if (!is_range && c == fn)
602 goto matched;
604 #if _LIBC
605 /* This is needed if we goto normal_bracket; from
606 outside of is_seqval's scope. */
607 is_seqval = false;
608 #endif
609 cold = c;
610 c = *p++;
613 if (c == L_('-') && *p != L_(']'))
615 #if _LIBC
616 /* We have to find the collation sequence
617 value for C. Collation sequence is nothing
618 we can regularly access. The sequence
619 value is defined by the order in which the
620 definitions of the collation values for the
621 various characters appear in the source
622 file. A strange concept, nowhere
623 documented. */
624 uint32_t fcollseq;
625 uint32_t lcollseq;
626 UCHAR cend = *p++;
628 # if WIDE_CHAR_VERSION
629 /* Search in the 'names' array for the characters. */
630 fcollseq = __collseq_table_lookup (collseq, fn);
631 if (fcollseq == ~((uint32_t) 0))
632 /* XXX We don't know anything about the character
633 we are supposed to match. This means we are
634 failing. */
635 goto range_not_matched;
637 if (is_seqval)
638 lcollseq = cold;
639 else
640 lcollseq = __collseq_table_lookup (collseq, cold);
641 # else
642 fcollseq = collseq[fn];
643 lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
644 # endif
646 is_seqval = false;
647 if (cend == L_('[') && *p == L_('.'))
649 uint32_t nrules =
650 _NL_CURRENT_WORD (LC_COLLATE,
651 _NL_COLLATE_NRULES);
652 const CHAR *startp = p;
653 size_t c1 = 0;
655 while (1)
657 c = *++p;
658 if (c == L_('.') && p[1] == L_(']'))
660 p += 2;
661 break;
663 if (c == '\0')
664 return FNM_NOMATCH;
665 ++c1;
668 if (nrules == 0)
670 /* There are no names defined in the
671 collation data. Therefore we only
672 accept the trivial names consisting
673 of the character itself. */
674 if (c1 != 1)
675 return FNM_NOMATCH;
677 cend = startp[1];
679 else
681 int32_t table_size;
682 const int32_t *symb_table;
683 const unsigned char *extra;
684 int32_t idx;
685 int32_t elem;
686 # if WIDE_CHAR_VERSION
687 CHAR *wextra;
688 # endif
690 table_size =
691 _NL_CURRENT_WORD (LC_COLLATE,
692 _NL_COLLATE_SYMB_HASH_SIZEMB);
693 symb_table = (const int32_t *)
694 _NL_CURRENT (LC_COLLATE,
695 _NL_COLLATE_SYMB_TABLEMB);
696 extra = (const unsigned char *)
697 _NL_CURRENT (LC_COLLATE,
698 _NL_COLLATE_SYMB_EXTRAMB);
700 for (elem = 0; elem < table_size; elem++)
701 if (symb_table[2 * elem] != 0)
703 idx = symb_table[2 * elem + 1];
704 /* Skip the name of collating
705 element. */
706 idx += 1 + extra[idx];
707 # if WIDE_CHAR_VERSION
708 /* Skip the byte sequence of the
709 collating element. */
710 idx += 1 + extra[idx];
711 /* Adjust for the alignment. */
712 idx = (idx + 3) & ~3;
714 wextra = (CHAR *) &extra[idx + 4];
716 if (/* Compare the length of the
717 sequence. */
718 c1 == wextra[0]
719 /* Compare the wide char sequence. */
720 && (__wmemcmp (startp + 1,
721 &wextra[1], c1)
722 == 0))
723 /* Yep, this is the entry. */
724 break;
725 # else
726 if (/* Compare the length of the
727 sequence. */
728 c1 == extra[idx]
729 /* Compare the byte sequence. */
730 && memcmp (startp + 1,
731 &extra[idx + 1], c1) == 0)
732 /* Yep, this is the entry. */
733 break;
734 # endif
737 if (elem < table_size)
739 /* Get the collation sequence value. */
740 is_seqval = true;
741 # if WIDE_CHAR_VERSION
742 /* The compiler might warn that wextra may
743 be used uninitialized, however it will
744 be reached iff elem < table_size which
745 means that it was properly set in the
746 loop above. */
747 DIAG_PUSH_NEEDS_COMMENT;
748 DIAG_IGNORE_Os_NEEDS_COMMENT (8, "-Wmaybe-uninitialized");
749 cend = wextra[1 + wextra[0]];
750 DIAG_POP_NEEDS_COMMENT;
751 # else
752 /* The compile might warn that idx may
753 be used uninitialized and similar to
754 wextra above it will be properly set by
755 the loop. */
756 DIAG_PUSH_NEEDS_COMMENT;
757 DIAG_IGNORE_Os_NEEDS_COMMENT (8, "-Wmaybe-uninitialized");
758 idx += 1 + extra[idx];
759 DIAG_POP_NEEDS_COMMENT;
760 /* Adjust for the alignment. */
761 idx = (idx + 3) & ~3;
762 cend = *((int32_t *) &extra[idx]);
763 # endif
765 else if (c1 == 1)
767 cend = startp[1];
768 c = *p++;
770 else
771 return FNM_NOMATCH;
774 else
776 if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
777 cend = *p++;
778 if (cend == L_('\0'))
779 return FNM_NOMATCH;
780 cend = FOLD (cend);
783 /* XXX It is not entirely clear to me how to handle
784 characters which are not mentioned in the
785 collation specification. */
786 if (
787 # if WIDE_CHAR_VERSION
788 lcollseq == 0xffffffff ||
789 # endif
790 lcollseq <= fcollseq)
792 /* We have to look at the upper bound. */
793 uint32_t hcollseq;
795 if (is_seqval)
796 hcollseq = cend;
797 else
799 # if WIDE_CHAR_VERSION
800 hcollseq =
801 __collseq_table_lookup (collseq, cend);
802 if (hcollseq == ~((uint32_t) 0))
804 /* Hum, no information about the upper
805 bound. The matching succeeds if the
806 lower bound is matched exactly. */
807 if (lcollseq != fcollseq)
808 goto range_not_matched;
810 goto matched;
812 # else
813 hcollseq = collseq[cend];
814 # endif
817 if (lcollseq <= hcollseq && fcollseq <= hcollseq)
818 goto matched;
820 # if WIDE_CHAR_VERSION
821 range_not_matched:
822 # endif
823 #else
824 /* We use a boring value comparison of the character
825 values. This is better than comparing using
826 'strcoll' since the latter would have surprising
827 and sometimes fatal consequences. */
828 UCHAR cend = *p++;
830 if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
831 cend = *p++;
832 if (cend == L_('\0'))
833 return FNM_NOMATCH;
835 /* It is a range. */
836 if ((UCHAR) cold <= fn && fn <= cend)
837 goto matched;
838 #endif
840 c = *p++;
844 if (c == L_(']'))
845 break;
848 if (!not)
849 return FNM_NOMATCH;
850 break;
852 matched:
853 /* Skip the rest of the [...] that already matched. */
854 while ((c = *p++) != L_(']'))
856 if (c == L_('\0'))
858 /* [ unterminated, treat as normal character. */
859 p = p_init;
860 n = n_init;
861 c = L_('[');
862 goto normal_match;
865 if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
867 if (*p == L_('\0'))
868 return FNM_NOMATCH;
869 /* XXX 1003.2d11 is unclear if this is right. */
870 ++p;
872 else if (c == L_('[') && *p == L_(':'))
874 int c1 = 0;
875 const CHAR *startp = p;
877 while (1)
879 c = *++p;
880 if (++c1 == CHAR_CLASS_MAX_LENGTH)
881 return FNM_NOMATCH;
883 if (*p == L_(':') && p[1] == L_(']'))
884 break;
886 if (c < L_('a') || c >= L_('z'))
888 p = startp - 2;
889 break;
892 p += 2;
894 else if (c == L_('[') && *p == L_('='))
896 c = *++p;
897 if (c == L_('\0'))
898 return FNM_NOMATCH;
899 c = *++p;
900 if (c != L_('=') || p[1] != L_(']'))
901 return FNM_NOMATCH;
902 p += 2;
904 else if (c == L_('[') && *p == L_('.'))
906 while (1)
908 c = *++p;
909 if (c == L_('\0'))
910 return FNM_NOMATCH;
912 if (c == L_('.') && p[1] == L_(']'))
913 break;
915 p += 2;
918 if (not)
919 return FNM_NOMATCH;
921 break;
923 case L_('+'):
924 case L_('@'):
925 case L_('!'):
926 if (__glibc_unlikely (flags & FNM_EXTMATCH) && *p == '(')
928 int res = EXT (c, p, n, string_end, no_leading_period, flags);
929 if (res != -1)
930 return res;
932 goto normal_match;
934 case L_('/'):
935 if (NO_LEADING_PERIOD (flags))
937 if (n == string_end || c != (UCHAR) *n)
938 return FNM_NOMATCH;
940 new_no_leading_period = true;
941 break;
943 FALLTHROUGH;
944 default:
945 normal_match:
946 if (n == string_end || c != FOLD ((UCHAR) *n))
947 return FNM_NOMATCH;
950 no_leading_period = new_no_leading_period;
951 ++n;
954 if (n == string_end)
955 return 0;
957 if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L_('/'))
958 /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */
959 return 0;
961 return FNM_NOMATCH;
965 static const CHAR *
966 END (const CHAR *pattern)
968 const CHAR *p = pattern;
970 while (1)
971 if (*++p == L_('\0'))
972 /* This is an invalid pattern. */
973 return pattern;
974 else if (*p == L_('['))
976 /* Handle brackets special. */
977 if (posixly_correct == 0)
978 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
980 /* Skip the not sign. We have to recognize it because of a possibly
981 following ']'. */
982 if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
983 ++p;
984 /* A leading ']' is recognized as such. */
985 if (*p == L_(']'))
986 ++p;
987 /* Skip over all characters of the list. */
988 while (*p != L_(']'))
989 if (*p++ == L_('\0'))
990 /* This is no valid pattern. */
991 return pattern;
993 else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
994 || *p == L_('!')) && p[1] == L_('('))
996 p = END (p + 1);
997 if (*p == L_('\0'))
998 /* This is an invalid pattern. */
999 return pattern;
1001 else if (*p == L_(')'))
1002 break;
1004 return p + 1;
1007 #if WIDE_CHAR_VERSION
1008 # define PATTERN_PREFIX pattern_list
1009 #else
1010 # define PATTERN_PREFIX wpattern_list
1011 #endif
1013 #define PASTE(a,b) PASTE1(a,b)
1014 #define PASTE1(a,b) a##b
1016 #define DYNARRAY_STRUCT PATTERN_PREFIX
1017 #define DYNARRAY_ELEMENT_FREE(ptr) free (*ptr)
1018 #define DYNARRAY_ELEMENT CHAR *
1019 #define DYNARRAY_PREFIX PASTE(PATTERN_PREFIX,_)
1020 #define DYNARRAY_INITIAL_SIZE 8
1021 #include <malloc/dynarray-skeleton.c>
1023 static int
1024 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
1025 bool no_leading_period, int flags)
1027 const CHAR *startp;
1028 ptrdiff_t level;
1029 struct PATTERN_PREFIX list;
1030 size_t pattern_len = STRLEN (pattern);
1031 size_t pattern_i = 0;
1032 const CHAR *p;
1033 const CHAR *rs;
1034 int retval = 0;
1036 PASTE (PATTERN_PREFIX, _init) (&list);
1038 /* Parse the pattern. Store the individual parts in the list. */
1039 level = 0;
1040 for (startp = p = pattern + 1; level >= 0; ++p)
1041 if (*p == L_('\0'))
1043 /* This is an invalid pattern. */
1044 retval = -1;
1045 goto out;
1047 else if (*p == L_('['))
1049 /* Handle brackets special. */
1050 if (posixly_correct == 0)
1051 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1053 /* Skip the not sign. We have to recognize it because of a possibly
1054 following ']'. */
1055 if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
1056 ++p;
1057 /* A leading ']' is recognized as such. */
1058 if (*p == L_(']'))
1059 ++p;
1060 /* Skip over all characters of the list. */
1061 while (*p != L_(']'))
1062 if (*p++ == L_('\0'))
1064 /* This is no valid pattern. */
1065 retval = -1;
1066 goto out;
1069 else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
1070 || *p == L_('!')) && p[1] == L_('('))
1071 /* Remember the nesting level. */
1072 ++level;
1073 else if (*p == L_(')') || *p == L_('|'))
1075 if (level == 0)
1077 size_t slen = opt == L_('?') || opt == L_('@')
1078 ? pattern_len : p - startp + 1;
1079 CHAR *newp = malloc (slen * sizeof (CHAR));
1080 if (newp != NULL)
1082 *((CHAR *) MEMPCPY (newp, startp, p - startp)) = L_('\0');
1083 PASTE (PATTERN_PREFIX,_add) (&list, newp);
1085 if (newp == NULL || PASTE (PATTERN_PREFIX, _has_failed) (&list))
1087 retval = -2;
1088 goto out;
1091 if (*p == L_('|'))
1092 startp = p + 1;
1094 if (*p == L_(')'))
1095 level--;
1097 assert (p[-1] == L_(')'));
1099 switch (opt)
1101 case L_('*'):
1102 if (FCT (p, string, string_end, no_leading_period, flags, NULL) == 0)
1103 goto success;
1104 FALLTHROUGH;
1105 case L_('+'):
1106 for (; pattern_i < PASTE (PATTERN_PREFIX, _size)(&list); pattern_i++)
1108 for (rs = string; rs <= string_end; ++rs)
1109 /* First match the prefix with the current pattern with the
1110 current pattern. */
1111 if (FCT (*PASTE (PATTERN_PREFIX, _at) (&list, pattern_i), string,
1112 rs, no_leading_period,
1113 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1114 NULL) == 0
1115 /* This was successful. Now match the rest with the rest
1116 of the pattern. */
1117 && (FCT (p, rs, string_end,
1118 rs == string
1119 ? no_leading_period
1120 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1121 flags & FNM_FILE_NAME
1122 ? flags : flags & ~FNM_PERIOD, NULL) == 0
1123 /* This didn't work. Try the whole pattern. */
1124 || (rs != string
1125 && FCT (pattern - 1, rs, string_end,
1126 rs == string
1127 ? no_leading_period
1128 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1129 flags & FNM_FILE_NAME
1130 ? flags : flags & ~FNM_PERIOD, NULL) == 0)))
1131 /* It worked. Signal success. */
1132 goto success;
1135 /* None of the patterns lead to a match. */
1136 retval = FNM_NOMATCH;
1137 break;
1139 case L_('?'):
1140 if (FCT (p, string, string_end, no_leading_period, flags, NULL) == 0)
1141 goto success;
1142 FALLTHROUGH;
1143 case L_('@'):
1144 for (; pattern_i < PASTE (PATTERN_PREFIX, _size) (&list); pattern_i++)
1146 /* I cannot believe it but `strcat' is actually acceptable
1147 here. Match the entire string with the prefix from the
1148 pattern list and the rest of the pattern following the
1149 pattern list. */
1150 if (FCT (STRCAT (*PASTE (PATTERN_PREFIX, _at) (&list, pattern_i), p),
1151 string, string_end, no_leading_period,
1152 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1153 NULL) == 0)
1154 /* It worked. Signal success. */
1155 goto success;
1158 /* None of the patterns lead to a match. */
1159 retval = FNM_NOMATCH;
1160 break;
1162 case L_('!'):
1163 for (rs = string; rs <= string_end; ++rs)
1165 size_t runp_i;
1167 for (runp_i = pattern_i;
1168 runp_i != PASTE (PATTERN_PREFIX, _size) (&list);
1169 runp_i++)
1171 if (FCT (*PASTE (PATTERN_PREFIX, _at) (&list, runp_i), string, rs,
1172 no_leading_period,
1173 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1174 NULL) == 0)
1175 break;
1178 /* If none of the patterns matched see whether the rest does. */
1179 if (runp_i == PASTE (PATTERN_PREFIX, _size) (&list)
1180 && (FCT (p, rs, string_end,
1181 rs == string
1182 ? no_leading_period
1183 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1184 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1185 NULL) == 0))
1186 /* This is successful. */
1187 goto success;
1190 /* None of the patterns together with the rest of the pattern
1191 lead to a match. */
1192 retval = FNM_NOMATCH;
1193 break;
1195 default:
1196 assert (! "Invalid extended matching operator");
1197 retval = -1;
1198 break;
1201 success:
1202 out:
1203 PASTE (PATTERN_PREFIX, _free) (&list);
1205 return retval;
1208 #undef PATTERN_PREFIX
1209 #undef PASTE
1210 #undef PASTE1
1212 #undef FOLD
1213 #undef CHAR
1214 #undef UCHAR
1215 #undef INT
1216 #undef FCT
1217 #undef EXT
1218 #undef END
1219 #undef STRUCT
1220 #undef MEMPCPY
1221 #undef MEMCHR
1222 #undef STRLEN
1223 #undef STRCAT
1224 #undef L_
1225 #undef BTOWC
1226 #undef WIDE_CHAR_VERSION
1227 #undef FINDIDX