* intl/dcigettext.c (DCIGETTEXT): Avoid some code duplication.
[glibc.git] / posix / fnmatch_loop.c
blob67c0ee4abb7a92969d3d6b11899ebdaf102a3fea
1 /* Copyright (C) 1991-1993,1996-2001,2003-2005,2007
2 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, write to the Free
17 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18 02111-1307 USA. */
20 struct STRUCT
22 const CHAR *pattern;
23 const CHAR *string;
24 int no_leading_period;
27 /* Match STRING against the filename pattern PATTERN, returning zero if
28 it matches, nonzero if not. */
29 static int FCT (const CHAR *pattern, const CHAR *string,
30 const CHAR *string_end, int no_leading_period, int flags,
31 struct STRUCT *ends)
32 internal_function;
33 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
34 const CHAR *string_end, int no_leading_period, int flags)
35 internal_function;
36 static const CHAR *END (const CHAR *patternp) internal_function;
38 static int
39 internal_function
40 FCT (pattern, string, string_end, no_leading_period, flags, ends)
41 const CHAR *pattern;
42 const CHAR *string;
43 const CHAR *string_end;
44 int no_leading_period;
45 int flags;
46 struct STRUCT *ends;
48 register const CHAR *p = pattern, *n = string;
49 register UCHAR c;
50 #ifdef _LIBC
51 # if WIDE_CHAR_VERSION
52 const char *collseq = (const char *)
53 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
54 # else
55 const UCHAR *collseq = (const UCHAR *)
56 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
57 # endif
58 #endif
60 while ((c = *p++) != L('\0'))
62 int new_no_leading_period = 0;
63 c = FOLD (c);
65 switch (c)
67 case L('?'):
68 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
70 int res;
72 res = EXT (c, p, n, string_end, no_leading_period,
73 flags);
74 if (res != -1)
75 return res;
78 if (n == string_end)
79 return FNM_NOMATCH;
80 else if (*n == L('/') && (flags & FNM_FILE_NAME))
81 return FNM_NOMATCH;
82 else if (*n == L('.') && no_leading_period)
83 return FNM_NOMATCH;
84 break;
86 case L('\\'):
87 if (!(flags & FNM_NOESCAPE))
89 c = *p++;
90 if (c == L('\0'))
91 /* Trailing \ loses. */
92 return FNM_NOMATCH;
93 c = FOLD (c);
95 if (n == string_end || FOLD ((UCHAR) *n) != c)
96 return FNM_NOMATCH;
97 break;
99 case L('*'):
100 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
102 int res;
104 res = EXT (c, p, n, string_end, no_leading_period,
105 flags);
106 if (res != -1)
107 return res;
109 else if (ends != NULL)
111 ends->pattern = p - 1;
112 ends->string = n;
113 ends->no_leading_period = no_leading_period;
114 return 0;
117 if (n != string_end && *n == L('.') && no_leading_period)
118 return FNM_NOMATCH;
120 for (c = *p++; c == L('?') || c == L('*'); c = *p++)
122 if (*p == L('(') && (flags & FNM_EXTMATCH) != 0)
124 const CHAR *endp = END (p);
125 if (endp != p)
127 /* This is a pattern. Skip over it. */
128 p = endp;
129 continue;
133 if (c == L('?'))
135 /* A ? needs to match one character. */
136 if (n == string_end)
137 /* There isn't another character; no match. */
138 return FNM_NOMATCH;
139 else if (*n == L('/')
140 && __builtin_expect (flags & FNM_FILE_NAME, 0))
141 /* A slash does not match a wildcard under
142 FNM_FILE_NAME. */
143 return FNM_NOMATCH;
144 else
145 /* One character of the string is consumed in matching
146 this ? wildcard, so *??? won't match if there are
147 less than three characters. */
148 ++n;
152 if (c == L('\0'))
153 /* The wildcard(s) is/are the last element of the pattern.
154 If the name is a file name and contains another slash
155 this means it cannot match, unless the FNM_LEADING_DIR
156 flag is set. */
158 int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
160 if (flags & FNM_FILE_NAME)
162 if (flags & FNM_LEADING_DIR)
163 result = 0;
164 else
166 if (MEMCHR (n, L('/'), string_end - n) == NULL)
167 result = 0;
171 return result;
173 else
175 const CHAR *endp;
176 struct STRUCT end;
178 end.pattern = NULL;
179 endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L('/') : L('\0'),
180 string_end - n);
181 if (endp == NULL)
182 endp = string_end;
184 if (c == L('[')
185 || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
186 && (c == L('@') || c == L('+') || c == L('!'))
187 && *p == L('(')))
189 int flags2 = ((flags & FNM_FILE_NAME)
190 ? flags : (flags & ~FNM_PERIOD));
192 for (--p; n < endp; ++n, no_leading_period = 0)
193 if (FCT (p, n, string_end, no_leading_period, flags2,
194 &end) == 0)
195 goto found;
197 else if (c == L('/') && (flags & FNM_FILE_NAME))
199 while (n < string_end && *n != L('/'))
200 ++n;
201 if (n < string_end && *n == L('/')
202 && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags,
203 NULL) == 0))
204 return 0;
206 else
208 int flags2 = ((flags & FNM_FILE_NAME)
209 ? flags : (flags & ~FNM_PERIOD));
211 if (c == L('\\') && !(flags & FNM_NOESCAPE))
212 c = *p;
213 c = FOLD (c);
214 for (--p; n < endp; ++n, no_leading_period = 0)
215 if (FOLD ((UCHAR) *n) == c
216 && (FCT (p, n, string_end, no_leading_period, flags2,
217 &end) == 0))
219 found:
220 if (end.pattern == NULL)
221 return 0;
222 break;
224 if (end.pattern != NULL)
226 p = end.pattern;
227 n = end.string;
228 no_leading_period = end.no_leading_period;
229 continue;
234 /* If we come here no match is possible with the wildcard. */
235 return FNM_NOMATCH;
237 case L('['):
239 /* Nonzero if the sense of the character class is inverted. */
240 register int not;
241 CHAR cold;
242 UCHAR fn;
244 if (posixly_correct == 0)
245 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
247 if (n == string_end)
248 return FNM_NOMATCH;
250 if (*n == L('.') && no_leading_period)
251 return FNM_NOMATCH;
253 if (*n == L('/') && (flags & FNM_FILE_NAME))
254 /* `/' cannot be matched. */
255 return FNM_NOMATCH;
257 not = (*p == L('!') || (posixly_correct < 0 && *p == L('^')));
258 if (not)
259 ++p;
261 fn = FOLD ((UCHAR) *n);
263 c = *p++;
264 for (;;)
266 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
268 if (*p == L('\0'))
269 return FNM_NOMATCH;
270 c = FOLD ((UCHAR) *p);
271 ++p;
273 goto normal_bracket;
275 else if (c == L('[') && *p == L(':'))
277 /* Leave room for the null. */
278 CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
279 size_t c1 = 0;
280 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
281 wctype_t wt;
282 #endif
283 const CHAR *startp = p;
285 for (;;)
287 if (c1 == CHAR_CLASS_MAX_LENGTH)
288 /* The name is too long and therefore the pattern
289 is ill-formed. */
290 return FNM_NOMATCH;
292 c = *++p;
293 if (c == L(':') && p[1] == L(']'))
295 p += 2;
296 break;
298 if (c < L('a') || c >= L('z'))
300 /* This cannot possibly be a character class name.
301 Match it as a normal range. */
302 p = startp;
303 c = L('[');
304 goto normal_bracket;
306 str[c1++] = c;
308 str[c1] = L('\0');
310 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
311 wt = IS_CHAR_CLASS (str);
312 if (wt == 0)
313 /* Invalid character class name. */
314 return FNM_NOMATCH;
316 # if defined _LIBC && ! WIDE_CHAR_VERSION
317 /* The following code is glibc specific but does
318 there a good job in speeding up the code since
319 we can avoid the btowc() call. */
320 if (_ISCTYPE ((UCHAR) *n, wt))
321 goto matched;
322 # else
323 if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
324 goto matched;
325 # endif
326 #else
327 if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n))
328 || (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n))
329 || (STREQ (str, L("blank")) && ISBLANK ((UCHAR) *n))
330 || (STREQ (str, L("cntrl")) && ISCNTRL ((UCHAR) *n))
331 || (STREQ (str, L("digit")) && ISDIGIT ((UCHAR) *n))
332 || (STREQ (str, L("graph")) && ISGRAPH ((UCHAR) *n))
333 || (STREQ (str, L("lower")) && ISLOWER ((UCHAR) *n))
334 || (STREQ (str, L("print")) && ISPRINT ((UCHAR) *n))
335 || (STREQ (str, L("punct")) && ISPUNCT ((UCHAR) *n))
336 || (STREQ (str, L("space")) && ISSPACE ((UCHAR) *n))
337 || (STREQ (str, L("upper")) && ISUPPER ((UCHAR) *n))
338 || (STREQ (str, L("xdigit")) && ISXDIGIT ((UCHAR) *n)))
339 goto matched;
340 #endif
341 c = *p++;
343 #ifdef _LIBC
344 else if (c == L('[') && *p == L('='))
346 UCHAR str[1];
347 uint32_t nrules =
348 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
349 const CHAR *startp = p;
351 c = *++p;
352 if (c == L('\0'))
354 p = startp;
355 c = L('[');
356 goto normal_bracket;
358 str[0] = c;
360 c = *++p;
361 if (c != L('=') || p[1] != L(']'))
363 p = startp;
364 c = L('[');
365 goto normal_bracket;
367 p += 2;
369 if (nrules == 0)
371 if ((UCHAR) *n == str[0])
372 goto matched;
374 else
376 const int32_t *table;
377 # if WIDE_CHAR_VERSION
378 const int32_t *weights;
379 const int32_t *extra;
380 # else
381 const unsigned char *weights;
382 const unsigned char *extra;
383 # endif
384 const int32_t *indirect;
385 int32_t idx;
386 const UCHAR *cp = (const UCHAR *) str;
388 /* This #include defines a local function! */
389 # if WIDE_CHAR_VERSION
390 # include <locale/weightwc.h>
391 # else
392 # include <locale/weight.h>
393 # endif
395 # if WIDE_CHAR_VERSION
396 table = (const int32_t *)
397 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
398 weights = (const int32_t *)
399 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
400 extra = (const int32_t *)
401 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
402 indirect = (const int32_t *)
403 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
404 # else
405 table = (const int32_t *)
406 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
407 weights = (const unsigned char *)
408 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
409 extra = (const unsigned char *)
410 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
411 indirect = (const int32_t *)
412 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
413 # endif
415 idx = findidx (&cp);
416 if (idx != 0)
418 /* We found a table entry. Now see whether the
419 character we are currently at has the same
420 equivalance class value. */
421 int len = weights[idx & 0xffffff];
422 int32_t idx2;
423 const UCHAR *np = (const UCHAR *) n;
425 idx2 = findidx (&np);
426 if (idx2 != 0
427 && (idx >> 24) == (idx2 >> 24)
428 && len == weights[idx2 & 0xffffff])
430 int cnt = 0;
432 idx &= 0xffffff;
433 idx2 &= 0xffffff;
435 while (cnt < len
436 && (weights[idx + 1 + cnt]
437 == weights[idx2 + 1 + cnt]))
438 ++cnt;
440 if (cnt == len)
441 goto matched;
446 c = *p++;
448 #endif
449 else if (c == L('\0'))
450 /* [ (unterminated) loses. */
451 return FNM_NOMATCH;
452 else
454 int is_range = 0;
456 #ifdef _LIBC
457 int is_seqval = 0;
459 if (c == L('[') && *p == L('.'))
461 uint32_t nrules =
462 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
463 const CHAR *startp = p;
464 size_t c1 = 0;
466 while (1)
468 c = *++p;
469 if (c == L('.') && p[1] == L(']'))
471 p += 2;
472 break;
474 if (c == '\0')
475 return FNM_NOMATCH;
476 ++c1;
479 /* We have to handling the symbols differently in
480 ranges since then the collation sequence is
481 important. */
482 is_range = *p == L('-') && p[1] != L('\0');
484 if (nrules == 0)
486 /* There are no names defined in the collation
487 data. Therefore we only accept the trivial
488 names consisting of the character itself. */
489 if (c1 != 1)
490 return FNM_NOMATCH;
492 if (!is_range && *n == startp[1])
493 goto matched;
495 cold = startp[1];
496 c = *p++;
498 else
500 int32_t table_size;
501 const int32_t *symb_table;
502 # ifdef WIDE_CHAR_VERSION
503 char str[c1];
504 unsigned int strcnt;
505 # else
506 # define str (startp + 1)
507 # endif
508 const unsigned char *extra;
509 int32_t idx;
510 int32_t elem;
511 int32_t second;
512 int32_t hash;
514 # ifdef WIDE_CHAR_VERSION
515 /* We have to convert the name to a single-byte
516 string. This is possible since the names
517 consist of ASCII characters and the internal
518 representation is UCS4. */
519 for (strcnt = 0; strcnt < c1; ++strcnt)
520 str[strcnt] = startp[1 + strcnt];
521 #endif
523 table_size =
524 _NL_CURRENT_WORD (LC_COLLATE,
525 _NL_COLLATE_SYMB_HASH_SIZEMB);
526 symb_table = (const int32_t *)
527 _NL_CURRENT (LC_COLLATE,
528 _NL_COLLATE_SYMB_TABLEMB);
529 extra = (const unsigned char *)
530 _NL_CURRENT (LC_COLLATE,
531 _NL_COLLATE_SYMB_EXTRAMB);
533 /* Locate the character in the hashing table. */
534 hash = elem_hash (str, c1);
536 idx = 0;
537 elem = hash % table_size;
538 if (symb_table[2 * elem] != 0)
540 second = hash % (table_size - 2) + 1;
544 /* First compare the hashing value. */
545 if (symb_table[2 * elem] == hash
546 && (c1
547 == extra[symb_table[2 * elem + 1]])
548 && memcmp (str,
549 &extra[symb_table[2 * elem
550 + 1]
551 + 1], c1) == 0)
553 /* Yep, this is the entry. */
554 idx = symb_table[2 * elem + 1];
555 idx += 1 + extra[idx];
556 break;
559 /* Next entry. */
560 elem += second;
562 while (symb_table[2 * elem] != 0);
565 if (symb_table[2 * elem] != 0)
567 /* Compare the byte sequence but only if
568 this is not part of a range. */
569 # ifdef WIDE_CHAR_VERSION
570 int32_t *wextra;
572 idx += 1 + extra[idx];
573 /* Adjust for the alignment. */
574 idx = (idx + 3) & ~3;
576 wextra = (int32_t *) &extra[idx + 4];
577 # endif
579 if (! is_range)
581 # ifdef WIDE_CHAR_VERSION
582 for (c1 = 0;
583 (int32_t) c1 < wextra[idx];
584 ++c1)
585 if (n[c1] != wextra[1 + c1])
586 break;
588 if ((int32_t) c1 == wextra[idx])
589 goto matched;
590 # else
591 for (c1 = 0; c1 < extra[idx]; ++c1)
592 if (n[c1] != extra[1 + c1])
593 break;
595 if (c1 == extra[idx])
596 goto matched;
597 # endif
600 /* Get the collation sequence value. */
601 is_seqval = 1;
602 # ifdef WIDE_CHAR_VERSION
603 cold = wextra[1 + wextra[idx]];
604 # else
605 /* Adjust for the alignment. */
606 idx += 1 + extra[idx];
607 idx = (idx + 3) & ~4;
608 cold = *((int32_t *) &extra[idx]);
609 # endif
611 c = *p++;
613 else if (c1 == 1)
615 /* No valid character. Match it as a
616 single byte. */
617 if (!is_range && *n == str[0])
618 goto matched;
620 cold = str[0];
621 c = *p++;
623 else
624 return FNM_NOMATCH;
627 else
628 # undef str
629 #endif
631 c = FOLD (c);
632 normal_bracket:
634 /* We have to handling the symbols differently in
635 ranges since then the collation sequence is
636 important. */
637 is_range = (*p == L('-') && p[1] != L('\0')
638 && p[1] != L(']'));
640 if (!is_range && c == fn)
641 goto matched;
643 /* This is needed if we goto normal_bracket; from
644 outside of is_seqval's scope. */
645 is_seqval = 0;
646 cold = c;
647 c = *p++;
650 if (c == L('-') && *p != L(']'))
652 #if _LIBC
653 /* We have to find the collation sequence
654 value for C. Collation sequence is nothing
655 we can regularly access. The sequence
656 value is defined by the order in which the
657 definitions of the collation values for the
658 various characters appear in the source
659 file. A strange concept, nowhere
660 documented. */
661 uint32_t fcollseq;
662 uint32_t lcollseq;
663 UCHAR cend = *p++;
665 # ifdef WIDE_CHAR_VERSION
666 /* Search in the `names' array for the characters. */
667 fcollseq = __collseq_table_lookup (collseq, fn);
668 if (fcollseq == ~((uint32_t) 0))
669 /* XXX We don't know anything about the character
670 we are supposed to match. This means we are
671 failing. */
672 goto range_not_matched;
674 if (is_seqval)
675 lcollseq = cold;
676 else
677 lcollseq = __collseq_table_lookup (collseq, cold);
678 # else
679 fcollseq = collseq[fn];
680 lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
681 # endif
683 is_seqval = 0;
684 if (cend == L('[') && *p == L('.'))
686 uint32_t nrules =
687 _NL_CURRENT_WORD (LC_COLLATE,
688 _NL_COLLATE_NRULES);
689 const CHAR *startp = p;
690 size_t c1 = 0;
692 while (1)
694 c = *++p;
695 if (c == L('.') && p[1] == L(']'))
697 p += 2;
698 break;
700 if (c == '\0')
701 return FNM_NOMATCH;
702 ++c1;
705 if (nrules == 0)
707 /* There are no names defined in the
708 collation data. Therefore we only
709 accept the trivial names consisting
710 of the character itself. */
711 if (c1 != 1)
712 return FNM_NOMATCH;
714 cend = startp[1];
716 else
718 int32_t table_size;
719 const int32_t *symb_table;
720 # ifdef WIDE_CHAR_VERSION
721 char str[c1];
722 unsigned int strcnt;
723 # else
724 # define str (startp + 1)
725 # endif
726 const unsigned char *extra;
727 int32_t idx;
728 int32_t elem;
729 int32_t second;
730 int32_t hash;
732 # ifdef WIDE_CHAR_VERSION
733 /* We have to convert the name to a single-byte
734 string. This is possible since the names
735 consist of ASCII characters and the internal
736 representation is UCS4. */
737 for (strcnt = 0; strcnt < c1; ++strcnt)
738 str[strcnt] = startp[1 + strcnt];
739 # endif
741 table_size =
742 _NL_CURRENT_WORD (LC_COLLATE,
743 _NL_COLLATE_SYMB_HASH_SIZEMB);
744 symb_table = (const int32_t *)
745 _NL_CURRENT (LC_COLLATE,
746 _NL_COLLATE_SYMB_TABLEMB);
747 extra = (const unsigned char *)
748 _NL_CURRENT (LC_COLLATE,
749 _NL_COLLATE_SYMB_EXTRAMB);
751 /* Locate the character in the hashing
752 table. */
753 hash = elem_hash (str, c1);
755 idx = 0;
756 elem = hash % table_size;
757 if (symb_table[2 * elem] != 0)
759 second = hash % (table_size - 2) + 1;
763 /* First compare the hashing value. */
764 if (symb_table[2 * elem] == hash
765 && (c1
766 == extra[symb_table[2 * elem + 1]])
767 && memcmp (str,
768 &extra[symb_table[2 * elem + 1]
769 + 1], c1) == 0)
771 /* Yep, this is the entry. */
772 idx = symb_table[2 * elem + 1];
773 idx += 1 + extra[idx];
774 break;
777 /* Next entry. */
778 elem += second;
780 while (symb_table[2 * elem] != 0);
783 if (symb_table[2 * elem] != 0)
785 /* Compare the byte sequence but only if
786 this is not part of a range. */
787 # ifdef WIDE_CHAR_VERSION
788 int32_t *wextra;
790 idx += 1 + extra[idx];
791 /* Adjust for the alignment. */
792 idx = (idx + 3) & ~4;
794 wextra = (int32_t *) &extra[idx + 4];
795 # endif
796 /* Get the collation sequence value. */
797 is_seqval = 1;
798 # ifdef WIDE_CHAR_VERSION
799 cend = wextra[1 + wextra[idx]];
800 # else
801 /* Adjust for the alignment. */
802 idx += 1 + extra[idx];
803 idx = (idx + 3) & ~4;
804 cend = *((int32_t *) &extra[idx]);
805 # endif
807 else if (symb_table[2 * elem] != 0 && c1 == 1)
809 cend = str[0];
810 c = *p++;
812 else
813 return FNM_NOMATCH;
815 # undef str
817 else
819 if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
820 cend = *p++;
821 if (cend == L('\0'))
822 return FNM_NOMATCH;
823 cend = FOLD (cend);
826 /* XXX It is not entirely clear to me how to handle
827 characters which are not mentioned in the
828 collation specification. */
829 if (
830 # ifdef WIDE_CHAR_VERSION
831 lcollseq == 0xffffffff ||
832 # endif
833 lcollseq <= fcollseq)
835 /* We have to look at the upper bound. */
836 uint32_t hcollseq;
838 if (is_seqval)
839 hcollseq = cend;
840 else
842 # ifdef WIDE_CHAR_VERSION
843 hcollseq =
844 __collseq_table_lookup (collseq, cend);
845 if (hcollseq == ~((uint32_t) 0))
847 /* Hum, no information about the upper
848 bound. The matching succeeds if the
849 lower bound is matched exactly. */
850 if (lcollseq != fcollseq)
851 goto range_not_matched;
853 goto matched;
855 # else
856 hcollseq = collseq[cend];
857 # endif
860 if (lcollseq <= hcollseq && fcollseq <= hcollseq)
861 goto matched;
863 # ifdef WIDE_CHAR_VERSION
864 range_not_matched:
865 # endif
866 #else
867 /* We use a boring value comparison of the character
868 values. This is better than comparing using
869 `strcoll' since the latter would have surprising
870 and sometimes fatal consequences. */
871 UCHAR cend = *p++;
873 if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
874 cend = *p++;
875 if (cend == L('\0'))
876 return FNM_NOMATCH;
878 /* It is a range. */
879 if (cold <= fn && fn <= cend)
880 goto matched;
881 #endif
883 c = *p++;
887 if (c == L(']'))
888 break;
891 if (!not)
892 return FNM_NOMATCH;
893 break;
895 matched:
896 /* Skip the rest of the [...] that already matched. */
899 ignore_next:
900 c = *p++;
902 if (c == L('\0'))
903 /* [... (unterminated) loses. */
904 return FNM_NOMATCH;
906 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
908 if (*p == L('\0'))
909 return FNM_NOMATCH;
910 /* XXX 1003.2d11 is unclear if this is right. */
911 ++p;
913 else if (c == L('[') && *p == L(':'))
915 int c1 = 0;
916 const CHAR *startp = p;
918 while (1)
920 c = *++p;
921 if (++c1 == CHAR_CLASS_MAX_LENGTH)
922 return FNM_NOMATCH;
924 if (*p == L(':') && p[1] == L(']'))
925 break;
927 if (c < L('a') || c >= L('z'))
929 p = startp;
930 goto ignore_next;
933 p += 2;
934 c = *p++;
936 else if (c == L('[') && *p == L('='))
938 c = *++p;
939 if (c == L('\0'))
940 return FNM_NOMATCH;
941 c = *++p;
942 if (c != L('=') || p[1] != L(']'))
943 return FNM_NOMATCH;
944 p += 2;
945 c = *p++;
947 else if (c == L('[') && *p == L('.'))
949 ++p;
950 while (1)
952 c = *++p;
953 if (c == '\0')
954 return FNM_NOMATCH;
956 if (*p == L('.') && p[1] == L(']'))
957 break;
959 p += 2;
960 c = *p++;
963 while (c != L(']'));
964 if (not)
965 return FNM_NOMATCH;
967 break;
969 case L('+'):
970 case L('@'):
971 case L('!'):
972 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
974 int res;
976 res = EXT (c, p, n, string_end, no_leading_period, flags);
977 if (res != -1)
978 return res;
980 goto normal_match;
982 case L('/'):
983 if (NO_LEADING_PERIOD (flags))
985 if (n == string_end || c != (UCHAR) *n)
986 return FNM_NOMATCH;
988 new_no_leading_period = 1;
989 break;
991 /* FALLTHROUGH */
992 default:
993 normal_match:
994 if (n == string_end || c != FOLD ((UCHAR) *n))
995 return FNM_NOMATCH;
998 no_leading_period = new_no_leading_period;
999 ++n;
1002 if (n == string_end)
1003 return 0;
1005 if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L('/'))
1006 /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */
1007 return 0;
1009 return FNM_NOMATCH;
1013 static const CHAR *
1014 internal_function
1015 END (const CHAR *pattern)
1017 const CHAR *p = pattern;
1019 while (1)
1020 if (*++p == L('\0'))
1021 /* This is an invalid pattern. */
1022 return pattern;
1023 else if (*p == L('['))
1025 /* Handle brackets special. */
1026 if (posixly_correct == 0)
1027 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1029 /* Skip the not sign. We have to recognize it because of a possibly
1030 following ']'. */
1031 if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1032 ++p;
1033 /* A leading ']' is recognized as such. */
1034 if (*p == L(']'))
1035 ++p;
1036 /* Skip over all characters of the list. */
1037 while (*p != L(']'))
1038 if (*p++ == L('\0'))
1039 /* This is no valid pattern. */
1040 return pattern;
1042 else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1043 || *p == L('!')) && p[1] == L('('))
1044 p = END (p + 1);
1045 else if (*p == L(')'))
1046 break;
1048 return p + 1;
1052 static int
1053 internal_function
1054 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
1055 int no_leading_period, int flags)
1057 const CHAR *startp;
1058 int level;
1059 struct patternlist
1061 struct patternlist *next;
1062 CHAR str[0];
1063 } *list = NULL;
1064 struct patternlist **lastp = &list;
1065 size_t pattern_len = STRLEN (pattern);
1066 const CHAR *p;
1067 const CHAR *rs;
1069 /* Parse the pattern. Store the individual parts in the list. */
1070 level = 0;
1071 for (startp = p = pattern + 1; level >= 0; ++p)
1072 if (*p == L('\0'))
1073 /* This is an invalid pattern. */
1074 return -1;
1075 else if (*p == L('['))
1077 /* Handle brackets special. */
1078 if (posixly_correct == 0)
1079 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1081 /* Skip the not sign. We have to recognize it because of a possibly
1082 following ']'. */
1083 if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1084 ++p;
1085 /* A leading ']' is recognized as such. */
1086 if (*p == L(']'))
1087 ++p;
1088 /* Skip over all characters of the list. */
1089 while (*p != L(']'))
1090 if (*p++ == L('\0'))
1091 /* This is no valid pattern. */
1092 return -1;
1094 else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1095 || *p == L('!')) && p[1] == L('('))
1096 /* Remember the nesting level. */
1097 ++level;
1098 else if (*p == L(')'))
1100 if (level-- == 0)
1102 /* This means we found the end of the pattern. */
1103 #define NEW_PATTERN \
1104 struct patternlist *newp; \
1106 if (opt == L('?') || opt == L('@')) \
1107 newp = alloca (sizeof (struct patternlist) \
1108 + (pattern_len * sizeof (CHAR))); \
1109 else \
1110 newp = alloca (sizeof (struct patternlist) \
1111 + ((p - startp + 1) * sizeof (CHAR))); \
1112 *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L('\0'); \
1113 newp->next = NULL; \
1114 *lastp = newp; \
1115 lastp = &newp->next
1116 NEW_PATTERN;
1119 else if (*p == L('|'))
1121 if (level == 0)
1123 NEW_PATTERN;
1124 startp = p + 1;
1127 assert (list != NULL);
1128 assert (p[-1] == L(')'));
1129 #undef NEW_PATTERN
1131 switch (opt)
1133 case L('*'):
1134 if (FCT (p, string, string_end, no_leading_period, flags, NULL) == 0)
1135 return 0;
1136 /* FALLTHROUGH */
1138 case L('+'):
1141 for (rs = string; rs <= string_end; ++rs)
1142 /* First match the prefix with the current pattern with the
1143 current pattern. */
1144 if (FCT (list->str, string, rs, no_leading_period,
1145 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1146 NULL) == 0
1147 /* This was successful. Now match the rest with the rest
1148 of the pattern. */
1149 && (FCT (p, rs, string_end,
1150 rs == string
1151 ? no_leading_period
1152 : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1153 flags & FNM_FILE_NAME
1154 ? flags : flags & ~FNM_PERIOD, NULL) == 0
1155 /* This didn't work. Try the whole pattern. */
1156 || (rs != string
1157 && FCT (pattern - 1, rs, string_end,
1158 rs == string
1159 ? no_leading_period
1160 : (rs[-1] == '/' && NO_LEADING_PERIOD (flags)
1161 ? 1 : 0),
1162 flags & FNM_FILE_NAME
1163 ? flags : flags & ~FNM_PERIOD, NULL) == 0)))
1164 /* It worked. Signal success. */
1165 return 0;
1167 while ((list = list->next) != NULL);
1169 /* None of the patterns lead to a match. */
1170 return FNM_NOMATCH;
1172 case L('?'):
1173 if (FCT (p, string, string_end, no_leading_period, flags, NULL) == 0)
1174 return 0;
1175 /* FALLTHROUGH */
1177 case L('@'):
1179 /* I cannot believe it but `strcat' is actually acceptable
1180 here. Match the entire string with the prefix from the
1181 pattern list and the rest of the pattern following the
1182 pattern list. */
1183 if (FCT (STRCAT (list->str, p), string, string_end,
1184 no_leading_period,
1185 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1186 NULL) == 0)
1187 /* It worked. Signal success. */
1188 return 0;
1189 while ((list = list->next) != NULL);
1191 /* None of the patterns lead to a match. */
1192 return FNM_NOMATCH;
1194 case L('!'):
1195 for (rs = string; rs <= string_end; ++rs)
1197 struct patternlist *runp;
1199 for (runp = list; runp != NULL; runp = runp->next)
1200 if (FCT (runp->str, string, rs, no_leading_period,
1201 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1202 NULL) == 0)
1203 break;
1205 /* If none of the patterns matched see whether the rest does. */
1206 if (runp == NULL
1207 && (FCT (p, rs, string_end,
1208 rs == string
1209 ? no_leading_period
1210 : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1211 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1212 NULL) == 0))
1213 /* This is successful. */
1214 return 0;
1217 /* None of the patterns together with the rest of the pattern
1218 lead to a match. */
1219 return FNM_NOMATCH;
1221 default:
1222 assert (! "Invalid extended matching operator");
1223 break;
1226 return -1;
1230 #undef FOLD
1231 #undef CHAR
1232 #undef UCHAR
1233 #undef INT
1234 #undef FCT
1235 #undef EXT
1236 #undef END
1237 #undef STRUCT
1238 #undef MEMPCPY
1239 #undef MEMCHR
1240 #undef STRCOLL
1241 #undef STRLEN
1242 #undef STRCAT
1243 #undef L
1244 #undef BTOWC