<machine/stdint.h>: Check __cplusplus first.
[dragonfly.git] / contrib / diffutils / lib / fnmatch_loop.c
blobf57cd63f9ab36e3e26f5545a75349db1fc46fb13
1 /* Copyright (C) 1991-1993, 1996-2006, 2009-2013 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, see <http://www.gnu.org/licenses/>. */
17 /* Match STRING against the file name pattern PATTERN, returning zero if
18 it matches, nonzero if not. */
19 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
20 const CHAR *string_end, bool no_leading_period, int flags)
21 internal_function;
22 static const CHAR *END (const CHAR *patternp) internal_function;
24 static int
25 internal_function
26 FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
27 bool no_leading_period, int flags)
29 register const CHAR *p = pattern, *n = string;
30 register UCHAR c;
31 #ifdef _LIBC
32 # if WIDE_CHAR_VERSION
33 const char *collseq = (const char *)
34 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
35 # else
36 const UCHAR *collseq = (const UCHAR *)
37 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
38 # endif
39 #endif
41 while ((c = *p++) != L_('\0'))
43 bool new_no_leading_period = false;
44 c = FOLD (c);
46 switch (c)
48 case L_('?'):
49 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
51 int res;
53 res = EXT (c, p, n, string_end, no_leading_period,
54 flags);
55 if (res != -1)
56 return res;
59 if (n == string_end)
60 return FNM_NOMATCH;
61 else if (*n == L_('/') && (flags & FNM_FILE_NAME))
62 return FNM_NOMATCH;
63 else if (*n == L_('.') && no_leading_period)
64 return FNM_NOMATCH;
65 break;
67 case L_('\\'):
68 if (!(flags & FNM_NOESCAPE))
70 c = *p++;
71 if (c == L_('\0'))
72 /* Trailing \ loses. */
73 return FNM_NOMATCH;
74 c = FOLD (c);
76 if (n == string_end || FOLD ((UCHAR) *n) != c)
77 return FNM_NOMATCH;
78 break;
80 case L_('*'):
81 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
83 int res;
85 res = EXT (c, p, n, string_end, no_leading_period,
86 flags);
87 if (res != -1)
88 return res;
91 if (n != string_end && *n == L_('.') && no_leading_period)
92 return FNM_NOMATCH;
94 for (c = *p++; c == L_('?') || c == L_('*'); c = *p++)
96 if (*p == L_('(') && (flags & FNM_EXTMATCH) != 0)
98 const CHAR *endp = END (p);
99 if (endp != p)
101 /* This is a pattern. Skip over it. */
102 p = endp;
103 continue;
107 if (c == L_('?'))
109 /* A ? needs to match one character. */
110 if (n == string_end)
111 /* There isn't another character; no match. */
112 return FNM_NOMATCH;
113 else if (*n == L_('/')
114 && __builtin_expect (flags & FNM_FILE_NAME, 0))
115 /* A slash does not match a wildcard under
116 FNM_FILE_NAME. */
117 return FNM_NOMATCH;
118 else
119 /* One character of the string is consumed in matching
120 this ? wildcard, so *??? won't match if there are
121 less than three characters. */
122 ++n;
126 if (c == L_('\0'))
127 /* The wildcard(s) is/are the last element of the pattern.
128 If the name is a file name and contains another slash
129 this means it cannot match, unless the FNM_LEADING_DIR
130 flag is set. */
132 int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
134 if (flags & FNM_FILE_NAME)
136 if (flags & FNM_LEADING_DIR)
137 result = 0;
138 else
140 if (MEMCHR (n, L_('/'), string_end - n) == NULL)
141 result = 0;
145 return result;
147 else
149 const CHAR *endp;
151 endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L_('/') : L_('\0'),
152 string_end - n);
153 if (endp == NULL)
154 endp = string_end;
156 if (c == L_('[')
157 || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
158 && (c == L_('@') || c == L_('+') || c == L_('!'))
159 && *p == L_('(')))
161 int flags2 = ((flags & FNM_FILE_NAME)
162 ? flags : (flags & ~FNM_PERIOD));
163 bool no_leading_period2 = no_leading_period;
165 for (--p; n < endp; ++n, no_leading_period2 = false)
166 if (FCT (p, n, string_end, no_leading_period2, flags2)
167 == 0)
168 return 0;
170 else if (c == L_('/') && (flags & FNM_FILE_NAME))
172 while (n < string_end && *n != L_('/'))
173 ++n;
174 if (n < string_end && *n == L_('/')
175 && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags)
176 == 0))
177 return 0;
179 else
181 int flags2 = ((flags & FNM_FILE_NAME)
182 ? flags : (flags & ~FNM_PERIOD));
183 int no_leading_period2 = no_leading_period;
185 if (c == L_('\\') && !(flags & FNM_NOESCAPE))
186 c = *p;
187 c = FOLD (c);
188 for (--p; n < endp; ++n, no_leading_period2 = false)
189 if (FOLD ((UCHAR) *n) == c
190 && (FCT (p, n, string_end, no_leading_period2, flags2)
191 == 0))
192 return 0;
196 /* If we come here no match is possible with the wildcard. */
197 return FNM_NOMATCH;
199 case L_('['):
201 /* Nonzero if the sense of the character class is inverted. */
202 const CHAR *p_init = p;
203 const CHAR *n_init = n;
204 register bool not;
205 CHAR cold;
206 UCHAR fn;
208 if (posixly_correct == 0)
209 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
211 if (n == string_end)
212 return FNM_NOMATCH;
214 if (*n == L_('.') && no_leading_period)
215 return FNM_NOMATCH;
217 if (*n == L_('/') && (flags & FNM_FILE_NAME))
218 /* '/' cannot be matched. */
219 return FNM_NOMATCH;
221 not = (*p == L_('!') || (posixly_correct < 0 && *p == L_('^')));
222 if (not)
223 ++p;
225 fn = FOLD ((UCHAR) *n);
227 c = *p++;
228 for (;;)
230 if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
232 if (*p == L_('\0'))
233 return FNM_NOMATCH;
234 c = FOLD ((UCHAR) *p);
235 ++p;
237 goto normal_bracket;
239 else if (c == L_('[') && *p == L_(':'))
241 /* Leave room for the null. */
242 CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
243 size_t c1 = 0;
244 #if defined _LIBC || WIDE_CHAR_SUPPORT
245 wctype_t wt;
246 #endif
247 const CHAR *startp = p;
249 for (;;)
251 if (c1 == CHAR_CLASS_MAX_LENGTH)
252 /* The name is too long and therefore the pattern
253 is ill-formed. */
254 return FNM_NOMATCH;
256 c = *++p;
257 if (c == L_(':') && p[1] == L_(']'))
259 p += 2;
260 break;
262 if (c < L_('a') || c >= L_('z'))
264 /* This cannot possibly be a character class name.
265 Match it as a normal range. */
266 p = startp;
267 c = L_('[');
268 goto normal_bracket;
270 str[c1++] = c;
272 str[c1] = L_('\0');
274 #if defined _LIBC || WIDE_CHAR_SUPPORT
275 wt = IS_CHAR_CLASS (str);
276 if (wt == 0)
277 /* Invalid character class name. */
278 return FNM_NOMATCH;
280 # if defined _LIBC && ! WIDE_CHAR_VERSION
281 /* The following code is glibc specific but does
282 there a good job in speeding up the code since
283 we can avoid the btowc() call. */
284 if (_ISCTYPE ((UCHAR) *n, wt))
285 goto matched;
286 # else
287 if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
288 goto matched;
289 # endif
290 #else
291 if ((STREQ (str, L_("alnum")) && isalnum ((UCHAR) *n))
292 || (STREQ (str, L_("alpha")) && isalpha ((UCHAR) *n))
293 || (STREQ (str, L_("blank")) && isblank ((UCHAR) *n))
294 || (STREQ (str, L_("cntrl")) && iscntrl ((UCHAR) *n))
295 || (STREQ (str, L_("digit")) && isdigit ((UCHAR) *n))
296 || (STREQ (str, L_("graph")) && isgraph ((UCHAR) *n))
297 || (STREQ (str, L_("lower")) && islower ((UCHAR) *n))
298 || (STREQ (str, L_("print")) && isprint ((UCHAR) *n))
299 || (STREQ (str, L_("punct")) && ispunct ((UCHAR) *n))
300 || (STREQ (str, L_("space")) && isspace ((UCHAR) *n))
301 || (STREQ (str, L_("upper")) && isupper ((UCHAR) *n))
302 || (STREQ (str, L_("xdigit")) && isxdigit ((UCHAR) *n)))
303 goto matched;
304 #endif
305 c = *p++;
307 #ifdef _LIBC
308 else if (c == L_('[') && *p == L_('='))
310 UCHAR str[1];
311 uint32_t nrules =
312 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
313 const CHAR *startp = p;
315 c = *++p;
316 if (c == L_('\0'))
318 p = startp;
319 c = L_('[');
320 goto normal_bracket;
322 str[0] = c;
324 c = *++p;
325 if (c != L_('=') || p[1] != L_(']'))
327 p = startp;
328 c = L_('[');
329 goto normal_bracket;
331 p += 2;
333 if (nrules == 0)
335 if ((UCHAR) *n == str[0])
336 goto matched;
338 else
340 const int32_t *table;
341 # if WIDE_CHAR_VERSION
342 const int32_t *weights;
343 const int32_t *extra;
344 # else
345 const unsigned char *weights;
346 const unsigned char *extra;
347 # endif
348 const int32_t *indirect;
349 int32_t idx;
350 const UCHAR *cp = (const UCHAR *) str;
352 /* This #include defines a local function! */
353 # if WIDE_CHAR_VERSION
354 # include <locale/weightwc.h>
355 # else
356 # include <locale/weight.h>
357 # endif
359 # if WIDE_CHAR_VERSION
360 table = (const int32_t *)
361 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
362 weights = (const int32_t *)
363 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
364 extra = (const int32_t *)
365 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
366 indirect = (const int32_t *)
367 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
368 # else
369 table = (const int32_t *)
370 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
371 weights = (const unsigned char *)
372 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
373 extra = (const unsigned char *)
374 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
375 indirect = (const int32_t *)
376 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
377 # endif
379 idx = findidx (&cp);
380 if (idx != 0)
382 /* We found a table entry. Now see whether the
383 character we are currently at has the same
384 equivalence class value. */
385 int len = weights[idx & 0xffffff];
386 int32_t idx2;
387 const UCHAR *np = (const UCHAR *) n;
389 idx2 = findidx (&np);
390 if (idx2 != 0
391 && (idx >> 24) == (idx2 >> 24)
392 && len == weights[idx2 & 0xffffff])
394 int cnt = 0;
396 idx &= 0xffffff;
397 idx2 &= 0xffffff;
399 while (cnt < len
400 && (weights[idx + 1 + cnt]
401 == weights[idx2 + 1 + cnt]))
402 ++cnt;
404 if (cnt == len)
405 goto matched;
410 c = *p++;
412 #endif
413 else if (c == L_('\0'))
415 /* [ unterminated, treat as normal character. */
416 p = p_init;
417 n = n_init;
418 c = L_('[');
419 goto normal_match;
421 else
423 bool is_range = false;
425 #ifdef _LIBC
426 bool is_seqval = false;
428 if (c == L_('[') && *p == L_('.'))
430 uint32_t nrules =
431 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
432 const CHAR *startp = p;
433 size_t c1 = 0;
435 while (1)
437 c = *++p;
438 if (c == L_('.') && p[1] == L_(']'))
440 p += 2;
441 break;
443 if (c == '\0')
444 return FNM_NOMATCH;
445 ++c1;
448 /* We have to handling the symbols differently in
449 ranges since then the collation sequence is
450 important. */
451 is_range = *p == L_('-') && p[1] != L_('\0');
453 if (nrules == 0)
455 /* There are no names defined in the collation
456 data. Therefore we only accept the trivial
457 names consisting of the character itself. */
458 if (c1 != 1)
459 return FNM_NOMATCH;
461 if (!is_range && *n == startp[1])
462 goto matched;
464 cold = startp[1];
465 c = *p++;
467 else
469 int32_t table_size;
470 const int32_t *symb_table;
471 # ifdef WIDE_CHAR_VERSION
472 char str[c1];
473 size_t strcnt;
474 # else
475 # define str (startp + 1)
476 # endif
477 const unsigned char *extra;
478 int32_t idx;
479 int32_t elem;
480 int32_t second;
481 int32_t hash;
483 # ifdef WIDE_CHAR_VERSION
484 /* We have to convert the name to a single-byte
485 string. This is possible since the names
486 consist of ASCII characters and the internal
487 representation is UCS4. */
488 for (strcnt = 0; strcnt < c1; ++strcnt)
489 str[strcnt] = startp[1 + strcnt];
490 # endif
492 table_size =
493 _NL_CURRENT_WORD (LC_COLLATE,
494 _NL_COLLATE_SYMB_HASH_SIZEMB);
495 symb_table = (const int32_t *)
496 _NL_CURRENT (LC_COLLATE,
497 _NL_COLLATE_SYMB_TABLEMB);
498 extra = (const unsigned char *)
499 _NL_CURRENT (LC_COLLATE,
500 _NL_COLLATE_SYMB_EXTRAMB);
502 /* Locate the character in the hashing table. */
503 hash = elem_hash (str, c1);
505 idx = 0;
506 elem = hash % table_size;
507 if (symb_table[2 * elem] != 0)
509 second = hash % (table_size - 2) + 1;
513 /* First compare the hashing value. */
514 if (symb_table[2 * elem] == hash
515 && (c1
516 == extra[symb_table[2 * elem + 1]])
517 && memcmp (str,
518 &extra[symb_table[2 * elem
519 + 1]
520 + 1], c1) == 0)
522 /* Yep, this is the entry. */
523 idx = symb_table[2 * elem + 1];
524 idx += 1 + extra[idx];
525 break;
528 /* Next entry. */
529 elem += second;
531 while (symb_table[2 * elem] != 0);
534 if (symb_table[2 * elem] != 0)
536 /* Compare the byte sequence but only if
537 this is not part of a range. */
538 # ifdef WIDE_CHAR_VERSION
539 int32_t *wextra;
541 idx += 1 + extra[idx];
542 /* Adjust for the alignment. */
543 idx = (idx + 3) & ~3;
545 wextra = (int32_t *) &extra[idx + 4];
546 # endif
548 if (! is_range)
550 # ifdef WIDE_CHAR_VERSION
551 for (c1 = 0;
552 (int32_t) c1 < wextra[idx];
553 ++c1)
554 if (n[c1] != wextra[1 + c1])
555 break;
557 if ((int32_t) c1 == wextra[idx])
558 goto matched;
559 # else
560 for (c1 = 0; c1 < extra[idx]; ++c1)
561 if (n[c1] != extra[1 + c1])
562 break;
564 if (c1 == extra[idx])
565 goto matched;
566 # endif
569 /* Get the collation sequence value. */
570 is_seqval = true;
571 # ifdef WIDE_CHAR_VERSION
572 cold = wextra[1 + wextra[idx]];
573 # else
574 /* Adjust for the alignment. */
575 idx += 1 + extra[idx];
576 idx = (idx + 3) & ~4;
577 cold = *((int32_t *) &extra[idx]);
578 # endif
580 c = *p++;
582 else if (c1 == 1)
584 /* No valid character. Match it as a
585 single byte. */
586 if (!is_range && *n == str[0])
587 goto matched;
589 cold = str[0];
590 c = *p++;
592 else
593 return FNM_NOMATCH;
596 else
597 # undef str
598 #endif
600 c = FOLD (c);
601 normal_bracket:
603 /* We have to handling the symbols differently in
604 ranges since then the collation sequence is
605 important. */
606 is_range = (*p == L_('-') && p[1] != L_('\0')
607 && p[1] != L_(']'));
609 if (!is_range && c == fn)
610 goto matched;
612 #if _LIBC
613 /* This is needed if we goto normal_bracket; from
614 outside of is_seqval's scope. */
615 is_seqval = false;
616 #endif
618 cold = c;
619 c = *p++;
622 if (c == L_('-') && *p != L_(']'))
624 #if _LIBC
625 /* We have to find the collation sequence
626 value for C. Collation sequence is nothing
627 we can regularly access. The sequence
628 value is defined by the order in which the
629 definitions of the collation values for the
630 various characters appear in the source
631 file. A strange concept, nowhere
632 documented. */
633 uint32_t fcollseq;
634 uint32_t lcollseq;
635 UCHAR cend = *p++;
637 # ifdef WIDE_CHAR_VERSION
638 /* Search in the 'names' array for the characters. */
639 fcollseq = __collseq_table_lookup (collseq, fn);
640 if (fcollseq == ~((uint32_t) 0))
641 /* XXX We don't know anything about the character
642 we are supposed to match. This means we are
643 failing. */
644 goto range_not_matched;
646 if (is_seqval)
647 lcollseq = cold;
648 else
649 lcollseq = __collseq_table_lookup (collseq, cold);
650 # else
651 fcollseq = collseq[fn];
652 lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
653 # endif
655 is_seqval = false;
656 if (cend == L_('[') && *p == L_('.'))
658 uint32_t nrules =
659 _NL_CURRENT_WORD (LC_COLLATE,
660 _NL_COLLATE_NRULES);
661 const CHAR *startp = p;
662 size_t c1 = 0;
664 while (1)
666 c = *++p;
667 if (c == L_('.') && p[1] == L_(']'))
669 p += 2;
670 break;
672 if (c == '\0')
673 return FNM_NOMATCH;
674 ++c1;
677 if (nrules == 0)
679 /* There are no names defined in the
680 collation data. Therefore we only
681 accept the trivial names consisting
682 of the character itself. */
683 if (c1 != 1)
684 return FNM_NOMATCH;
686 cend = startp[1];
688 else
690 int32_t table_size;
691 const int32_t *symb_table;
692 # ifdef WIDE_CHAR_VERSION
693 char str[c1];
694 size_t strcnt;
695 # else
696 # define str (startp + 1)
697 # endif
698 const unsigned char *extra;
699 int32_t idx;
700 int32_t elem;
701 int32_t second;
702 int32_t hash;
704 # ifdef WIDE_CHAR_VERSION
705 /* We have to convert the name to a single-byte
706 string. This is possible since the names
707 consist of ASCII characters and the internal
708 representation is UCS4. */
709 for (strcnt = 0; strcnt < c1; ++strcnt)
710 str[strcnt] = startp[1 + strcnt];
711 # endif
713 table_size =
714 _NL_CURRENT_WORD (LC_COLLATE,
715 _NL_COLLATE_SYMB_HASH_SIZEMB);
716 symb_table = (const int32_t *)
717 _NL_CURRENT (LC_COLLATE,
718 _NL_COLLATE_SYMB_TABLEMB);
719 extra = (const unsigned char *)
720 _NL_CURRENT (LC_COLLATE,
721 _NL_COLLATE_SYMB_EXTRAMB);
723 /* Locate the character in the hashing
724 table. */
725 hash = elem_hash (str, c1);
727 idx = 0;
728 elem = hash % table_size;
729 if (symb_table[2 * elem] != 0)
731 second = hash % (table_size - 2) + 1;
735 /* First compare the hashing value. */
736 if (symb_table[2 * elem] == hash
737 && (c1
738 == extra[symb_table[2 * elem + 1]])
739 && memcmp (str,
740 &extra[symb_table[2 * elem + 1]
741 + 1], c1) == 0)
743 /* Yep, this is the entry. */
744 idx = symb_table[2 * elem + 1];
745 idx += 1 + extra[idx];
746 break;
749 /* Next entry. */
750 elem += second;
752 while (symb_table[2 * elem] != 0);
755 if (symb_table[2 * elem] != 0)
757 /* Compare the byte sequence but only if
758 this is not part of a range. */
759 # ifdef WIDE_CHAR_VERSION
760 int32_t *wextra;
762 idx += 1 + extra[idx];
763 /* Adjust for the alignment. */
764 idx = (idx + 3) & ~4;
766 wextra = (int32_t *) &extra[idx + 4];
767 # endif
768 /* Get the collation sequence value. */
769 is_seqval = true;
770 # ifdef WIDE_CHAR_VERSION
771 cend = wextra[1 + wextra[idx]];
772 # else
773 /* Adjust for the alignment. */
774 idx += 1 + extra[idx];
775 idx = (idx + 3) & ~4;
776 cend = *((int32_t *) &extra[idx]);
777 # endif
779 else if (symb_table[2 * elem] != 0 && c1 == 1)
781 cend = str[0];
782 c = *p++;
784 else
785 return FNM_NOMATCH;
787 # undef str
789 else
791 if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
792 cend = *p++;
793 if (cend == L_('\0'))
794 return FNM_NOMATCH;
795 cend = FOLD (cend);
798 /* XXX It is not entirely clear to me how to handle
799 characters which are not mentioned in the
800 collation specification. */
801 if (
802 # ifdef WIDE_CHAR_VERSION
803 lcollseq == 0xffffffff ||
804 # endif
805 lcollseq <= fcollseq)
807 /* We have to look at the upper bound. */
808 uint32_t hcollseq;
810 if (is_seqval)
811 hcollseq = cend;
812 else
814 # ifdef WIDE_CHAR_VERSION
815 hcollseq =
816 __collseq_table_lookup (collseq, cend);
817 if (hcollseq == ~((uint32_t) 0))
819 /* Hum, no information about the upper
820 bound. The matching succeeds if the
821 lower bound is matched exactly. */
822 if (lcollseq != fcollseq)
823 goto range_not_matched;
825 goto matched;
827 # else
828 hcollseq = collseq[cend];
829 # endif
832 if (lcollseq <= hcollseq && fcollseq <= hcollseq)
833 goto matched;
835 # ifdef WIDE_CHAR_VERSION
836 range_not_matched:
837 # endif
838 #else
839 /* We use a boring value comparison of the character
840 values. This is better than comparing using
841 'strcoll' since the latter would have surprising
842 and sometimes fatal consequences. */
843 UCHAR cend = *p++;
845 if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
846 cend = *p++;
847 if (cend == L_('\0'))
848 return FNM_NOMATCH;
850 /* It is a range. */
851 if (cold <= fn && fn <= cend)
852 goto matched;
853 #endif
855 c = *p++;
859 if (c == L_(']'))
860 break;
863 if (!not)
864 return FNM_NOMATCH;
865 break;
867 matched:
868 /* Skip the rest of the [...] that already matched. */
871 ignore_next:
872 c = *p++;
874 if (c == L_('\0'))
875 /* [... (unterminated) loses. */
876 return FNM_NOMATCH;
878 if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
880 if (*p == L_('\0'))
881 return FNM_NOMATCH;
882 /* XXX 1003.2d11 is unclear if this is right. */
883 ++p;
885 else if (c == L_('[') && *p == L_(':'))
887 int c1 = 0;
888 const CHAR *startp = p;
890 while (1)
892 c = *++p;
893 if (++c1 == CHAR_CLASS_MAX_LENGTH)
894 return FNM_NOMATCH;
896 if (*p == L_(':') && p[1] == L_(']'))
897 break;
899 if (c < L_('a') || c >= L_('z'))
901 p = startp;
902 goto ignore_next;
905 p += 2;
906 c = *p++;
908 else if (c == L_('[') && *p == L_('='))
910 c = *++p;
911 if (c == L_('\0'))
912 return FNM_NOMATCH;
913 c = *++p;
914 if (c != L_('=') || p[1] != L_(']'))
915 return FNM_NOMATCH;
916 p += 2;
917 c = *p++;
919 else if (c == L_('[') && *p == L_('.'))
921 ++p;
922 while (1)
924 c = *++p;
925 if (c == '\0')
926 return FNM_NOMATCH;
928 if (*p == L_('.') && p[1] == L_(']'))
929 break;
931 p += 2;
932 c = *p++;
935 while (c != L_(']'));
936 if (not)
937 return FNM_NOMATCH;
939 break;
941 case L_('+'):
942 case L_('@'):
943 case L_('!'):
944 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
946 int res;
948 res = EXT (c, p, n, string_end, no_leading_period, flags);
949 if (res != -1)
950 return res;
952 goto normal_match;
954 case L_('/'):
955 if (NO_LEADING_PERIOD (flags))
957 if (n == string_end || c != (UCHAR) *n)
958 return FNM_NOMATCH;
960 new_no_leading_period = true;
961 break;
963 /* FALLTHROUGH */
964 default:
965 normal_match:
966 if (n == string_end || c != FOLD ((UCHAR) *n))
967 return FNM_NOMATCH;
970 no_leading_period = new_no_leading_period;
971 ++n;
974 if (n == string_end)
975 return 0;
977 if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L_('/'))
978 /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */
979 return 0;
981 return FNM_NOMATCH;
985 static const CHAR *
986 internal_function
987 END (const CHAR *pattern)
989 const CHAR *p = pattern;
991 while (1)
992 if (*++p == L_('\0'))
993 /* This is an invalid pattern. */
994 return pattern;
995 else if (*p == L_('['))
997 /* Handle brackets special. */
998 if (posixly_correct == 0)
999 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1001 /* Skip the not sign. We have to recognize it because of a possibly
1002 following ']'. */
1003 if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
1004 ++p;
1005 /* A leading ']' is recognized as such. */
1006 if (*p == L_(']'))
1007 ++p;
1008 /* Skip over all characters of the list. */
1009 while (*p != L_(']'))
1010 if (*p++ == L_('\0'))
1011 /* This is no valid pattern. */
1012 return pattern;
1014 else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
1015 || *p == L_('!')) && p[1] == L_('('))
1016 p = END (p + 1);
1017 else if (*p == L_(')'))
1018 break;
1020 return p + 1;
1024 static int
1025 internal_function
1026 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
1027 bool no_leading_period, int flags)
1029 const CHAR *startp;
1030 size_t level;
1031 struct patternlist
1033 struct patternlist *next;
1034 CHAR str[1];
1035 } *list = NULL;
1036 struct patternlist **lastp = &list;
1037 size_t pattern_len = STRLEN (pattern);
1038 const CHAR *p;
1039 const CHAR *rs;
1040 enum { ALLOCA_LIMIT = 8000 };
1042 /* Parse the pattern. Store the individual parts in the list. */
1043 level = 0;
1044 for (startp = p = pattern + 1; ; ++p)
1045 if (*p == L_('\0'))
1046 /* This is an invalid pattern. */
1047 return -1;
1048 else if (*p == L_('['))
1050 /* Handle brackets special. */
1051 if (posixly_correct == 0)
1052 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1054 /* Skip the not sign. We have to recognize it because of a possibly
1055 following ']'. */
1056 if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
1057 ++p;
1058 /* A leading ']' is recognized as such. */
1059 if (*p == L_(']'))
1060 ++p;
1061 /* Skip over all characters of the list. */
1062 while (*p != L_(']'))
1063 if (*p++ == L_('\0'))
1064 /* This is no valid pattern. */
1065 return -1;
1067 else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
1068 || *p == L_('!')) && p[1] == L_('('))
1069 /* Remember the nesting level. */
1070 ++level;
1071 else if (*p == L_(')'))
1073 if (level-- == 0)
1075 /* This means we found the end of the pattern. */
1076 #define NEW_PATTERN \
1077 struct patternlist *newp; \
1078 size_t plen; \
1079 size_t plensize; \
1080 size_t newpsize; \
1082 plen = (opt == L_('?') || opt == L_('@') \
1083 ? pattern_len \
1084 : p - startp + 1UL); \
1085 plensize = plen * sizeof (CHAR); \
1086 newpsize = offsetof (struct patternlist, str) + plensize; \
1087 if ((size_t) -1 / sizeof (CHAR) < plen \
1088 || newpsize < offsetof (struct patternlist, str) \
1089 || ALLOCA_LIMIT <= newpsize) \
1090 return -1; \
1091 newp = (struct patternlist *) alloca (newpsize); \
1092 *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L_('\0'); \
1093 newp->next = NULL; \
1094 *lastp = newp; \
1095 lastp = &newp->next
1096 NEW_PATTERN;
1097 break;
1100 else if (*p == L_('|'))
1102 if (level == 0)
1104 NEW_PATTERN;
1105 startp = p + 1;
1108 assert (list != NULL);
1109 assert (p[-1] == L_(')'));
1110 #undef NEW_PATTERN
1112 switch (opt)
1114 case L_('*'):
1115 if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1116 return 0;
1117 /* FALLTHROUGH */
1119 case L_('+'):
1122 for (rs = string; rs <= string_end; ++rs)
1123 /* First match the prefix with the current pattern with the
1124 current pattern. */
1125 if (FCT (list->str, string, rs, no_leading_period,
1126 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0
1127 /* This was successful. Now match the rest with the rest
1128 of the pattern. */
1129 && (FCT (p, rs, string_end,
1130 rs == string
1131 ? no_leading_period
1132 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1133 flags & FNM_FILE_NAME
1134 ? flags : flags & ~FNM_PERIOD) == 0
1135 /* This didn't work. Try the whole pattern. */
1136 || (rs != string
1137 && FCT (pattern - 1, rs, string_end,
1138 rs == string
1139 ? no_leading_period
1140 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1141 flags & FNM_FILE_NAME
1142 ? flags : flags & ~FNM_PERIOD) == 0)))
1143 /* It worked. Signal success. */
1144 return 0;
1146 while ((list = list->next) != NULL);
1148 /* None of the patterns lead to a match. */
1149 return FNM_NOMATCH;
1151 case L_('?'):
1152 if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1153 return 0;
1154 /* FALLTHROUGH */
1156 case L_('@'):
1158 /* I cannot believe it but 'strcat' is actually acceptable
1159 here. Match the entire string with the prefix from the
1160 pattern list and the rest of the pattern following the
1161 pattern list. */
1162 if (FCT (STRCAT (list->str, p), string, string_end,
1163 no_leading_period,
1164 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1165 /* It worked. Signal success. */
1166 return 0;
1167 while ((list = list->next) != NULL);
1169 /* None of the patterns lead to a match. */
1170 return FNM_NOMATCH;
1172 case L_('!'):
1173 for (rs = string; rs <= string_end; ++rs)
1175 struct patternlist *runp;
1177 for (runp = list; runp != NULL; runp = runp->next)
1178 if (FCT (runp->str, string, rs, no_leading_period,
1179 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1180 break;
1182 /* If none of the patterns matched see whether the rest does. */
1183 if (runp == NULL
1184 && (FCT (p, rs, string_end,
1185 rs == string
1186 ? no_leading_period
1187 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1188 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD)
1189 == 0))
1190 /* This is successful. */
1191 return 0;
1194 /* None of the patterns together with the rest of the pattern
1195 lead to a match. */
1196 return FNM_NOMATCH;
1198 default:
1199 assert (! "Invalid extended matching operator");
1200 break;
1203 return -1;
1207 #undef FOLD
1208 #undef CHAR
1209 #undef UCHAR
1210 #undef INT
1211 #undef FCT
1212 #undef EXT
1213 #undef END
1214 #undef MEMPCPY
1215 #undef MEMCHR
1216 #undef STRLEN
1217 #undef STRCAT
1218 #undef L_
1219 #undef BTOWC