1 /* Copyright (C) 1991-1993, 1996-2000, 2001 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 This library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Library General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
9 This library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Library General Public License for more details.
14 You should have received a copy of the GNU Library General Public
15 License along with this library; see the file COPYING.LIB. If not,
16 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 Boston, MA 02111-1307, USA. */
19 /* Match STRING against the filename pattern PATTERN, returning zero if
20 it matches, nonzero if not. */
21 static int FCT (const CHAR
*pattern
, const CHAR
*string
,
22 int no_leading_period
, int flags
) internal_function
;
26 FCT (pattern
, string
, no_leading_period
, flags
)
29 int no_leading_period
;
32 register const CHAR
*p
= pattern
, *n
= string
;
35 # if WIDE_CHAR_VERSION
36 const char *collseq
= (const char *)
37 _NL_CURRENT(LC_COLLATE
, _NL_COLLATE_COLLSEQWC
);
39 const UCHAR
*collseq
= (const UCHAR
*)
40 _NL_CURRENT(LC_COLLATE
, _NL_COLLATE_COLLSEQMB
);
44 while ((c
= *p
++) != L('\0'))
53 else if (*n
== L('/') && (flags
& FNM_FILE_NAME
))
55 else if (*n
== L('.') && no_leading_period
57 || (n
[-1] == L('/') && (flags
& FNM_FILE_NAME
))))
62 if (!(flags
& FNM_NOESCAPE
))
66 /* Trailing \ loses. */
70 if (FOLD ((UCHAR
) *n
) != c
)
75 if (*n
== L('.') && no_leading_period
77 || (n
[-1] == L('/') && (flags
& FNM_FILE_NAME
))))
80 for (c
= *p
++; c
== L('?') || c
== L('*'); c
= *p
++)
82 if (*n
== L('/') && (flags
& FNM_FILE_NAME
))
83 /* A slash does not match a wildcard under FNM_FILE_NAME. */
87 /* A ? needs to match one character. */
89 /* There isn't another character; no match. */
92 /* One character of the string is consumed in matching
93 this ? wildcard, so *??? won't match if there are
94 less than three characters. */
100 /* The wildcard(s) is/are the last element of the pattern.
101 If the name is a file name and contains another slash
102 this means it cannot match, unless the FNM_LEADING_DIR
105 int result
= (flags
& FNM_FILE_NAME
) == 0 ? 0 : FNM_NOMATCH
;
107 if (flags
& FNM_FILE_NAME
)
109 if (flags
& FNM_LEADING_DIR
)
113 if (STRCHR (n
, L('/')) == NULL
)
124 endp
= STRCHRNUL (n
, (flags
& FNM_FILE_NAME
) ? L('/') : L('\0'));
128 int flags2
= ((flags
& FNM_FILE_NAME
)
129 ? flags
: (flags
& ~FNM_PERIOD
));
131 for (--p
; n
< endp
; ++n
)
132 if (FCT (p
, n
, (no_leading_period
135 && (flags
& FNM_FILE_NAME
)))),
139 else if (c
== L('/') && (flags
& FNM_FILE_NAME
))
141 while (*n
!= L('\0') && *n
!= L('/'))
144 && (FCT (p
, n
+ 1, flags
& FNM_PERIOD
, flags
) == 0))
149 int flags2
= ((flags
& FNM_FILE_NAME
)
150 ? flags
: (flags
& ~FNM_PERIOD
));
152 if (c
== L('\\') && !(flags
& FNM_NOESCAPE
))
155 for (--p
; n
< endp
; ++n
)
156 if (FOLD ((UCHAR
) *n
) == c
157 && (FCT (p
, n
, (no_leading_period
160 && (flags
& FNM_FILE_NAME
)))),
166 /* If we come here no match is possible with the wildcard. */
171 static int posixly_correct
;
172 /* Nonzero if the sense of the character class is inverted. */
176 if (posixly_correct
== 0)
177 posixly_correct
= getenv ("POSIXLY_CORRECT") != NULL
? 1 : -1;
182 if (*n
== L('.') && no_leading_period
184 || (n
[-1] == L('/') && (flags
& FNM_FILE_NAME
))))
187 if (*n
== L('/') && (flags
& FNM_FILE_NAME
))
188 /* `/' cannot be matched. */
191 not = (*p
== L('!') || (posixly_correct
< 0 && *p
== L('^')));
198 UCHAR fn
= FOLD ((UCHAR
) *n
);
200 if (!(flags
& FNM_NOESCAPE
) && c
== L('\\'))
204 c
= FOLD ((UCHAR
) *p
);
210 else if (c
== L('[') && *p
== L(':'))
212 /* Leave room for the null. */
213 CHAR str
[CHAR_CLASS_MAX_LENGTH
+ 1];
215 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
218 const CHAR
*startp
= p
;
222 if (c1
== CHAR_CLASS_MAX_LENGTH
)
223 /* The name is too long and therefore the pattern
228 if (c
== L(':') && p
[1] == L(']'))
233 if (c
< L('a') || c
>= L('z'))
235 /* This cannot possibly be a character class name.
236 Match it as a normal range. */
245 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
246 wt
= IS_CHAR_CLASS (str
);
248 /* Invalid character class name. */
251 # if defined _LIBC && ! WIDE_CHAR_VERSION
252 /* The following code is glibc specific but does
253 there a good job in speeding up the code since
254 we can avoid the btowc() call. */
255 if (_ISCTYPE ((UCHAR
) *n
, wt
))
258 if (ISWCTYPE (BTOWC ((UCHAR
) *n
), wt
))
262 if ((STREQ (str
, L("alnum")) && ISALNUM ((UCHAR
) *n
))
263 || (STREQ (str
, L("alpha")) && ISALPHA ((UCHAR
) *n
))
264 || (STREQ (str
, L("blank")) && ISBLANK ((UCHAR
) *n
))
265 || (STREQ (str
, L("cntrl")) && ISCNTRL ((UCHAR
) *n
))
266 || (STREQ (str
, L("digit")) && ISDIGIT ((UCHAR
) *n
))
267 || (STREQ (str
, L("graph")) && ISGRAPH ((UCHAR
) *n
))
268 || (STREQ (str
, L("lower")) && ISLOWER ((UCHAR
) *n
))
269 || (STREQ (str
, L("print")) && ISPRINT ((UCHAR
) *n
))
270 || (STREQ (str
, L("punct")) && ISPUNCT ((UCHAR
) *n
))
271 || (STREQ (str
, L("space")) && ISSPACE ((UCHAR
) *n
))
272 || (STREQ (str
, L("upper")) && ISUPPER ((UCHAR
) *n
))
273 || (STREQ (str
, L("xdigit")) && ISXDIGIT ((UCHAR
) *n
)))
279 else if (c
== L('[') && *p
== L('='))
283 _NL_CURRENT_WORD (LC_COLLATE
, _NL_COLLATE_NRULES
);
284 const CHAR
*startp
= p
;
296 if (c
!= L('=') || p
[1] != L(']'))
306 if ((UCHAR
) *n
== str
[0])
311 const int32_t *table
;
312 # if WIDE_CHAR_VERSION
313 const int32_t *weights
;
314 const int32_t *extra
;
316 const unsigned char *weights
;
317 const unsigned char *extra
;
319 const int32_t *indirect
;
321 const UCHAR
*cp
= (const UCHAR
*) str
;
323 /* This #include defines a local function! */
324 # if WIDE_CHAR_VERSION
325 # include <locale/weightwc.h>
327 # include <locale/weight.h>
330 # if WIDE_CHAR_VERSION
331 table
= (const int32_t *)
332 _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_TABLEWC
);
333 weights
= (const int32_t *)
334 _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_WEIGHTWC
);
335 extra
= (const int32_t *)
336 _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_EXTRAWC
);
337 indirect
= (const int32_t *)
338 _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_INDIRECTWC
);
340 table
= (const int32_t *)
341 _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_TABLEMB
);
342 weights
= (const unsigned char *)
343 _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_WEIGHTMB
);
344 extra
= (const unsigned char *)
345 _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_EXTRAMB
);
346 indirect
= (const int32_t *)
347 _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_INDIRECTMB
);
353 /* We found a table entry. Now see whether the
354 character we are currently at has the same
355 equivalance class value. */
356 int len
= weights
[idx
];
358 const UCHAR
*np
= (const UCHAR
*) n
;
360 idx2
= findidx (&np
);
361 if (idx2
!= 0 && len
== weights
[idx2
])
366 && (weights
[idx
+ 1 + cnt
]
367 == weights
[idx2
+ 1 + cnt
]))
379 else if (c
== L('\0'))
380 /* [ (unterminated) loses. */
389 if (c
== L('[') && *p
== L('.'))
392 _NL_CURRENT_WORD (LC_COLLATE
, _NL_COLLATE_NRULES
);
393 const CHAR
*startp
= p
;
399 if (c
== L('.') && p
[1] == L(']'))
409 /* We have to handling the symbols differently in
410 ranges since then the collation sequence is
412 is_range
= *p
== L('-') && p
[1] != L('\0');
416 /* There are no names defined in the collation
417 data. Therefore we only accept the trivial
418 names consisting of the character itself. */
422 if (!is_range
&& *n
== startp
[1])
431 const int32_t *symb_table
;
432 # ifdef WIDE_CHAR_VERSION
436 # define str (startp + 1)
438 const unsigned char *extra
;
444 # ifdef WIDE_CHAR_VERSION
445 /* We have to convert the name to a single-byte
446 string. This is possible since the names
447 consist of ASCII characters and the internal
448 representation is UCS4. */
449 for (strcnt
= 0; strcnt
< c1
; ++strcnt
)
450 str
[strcnt
] = startp
[1 + strcnt
];
454 _NL_CURRENT_WORD (LC_COLLATE
,
455 _NL_COLLATE_SYMB_HASH_SIZEMB
);
456 symb_table
= (const int32_t *)
457 _NL_CURRENT (LC_COLLATE
,
458 _NL_COLLATE_SYMB_TABLEMB
);
459 extra
= (const unsigned char *)
460 _NL_CURRENT (LC_COLLATE
,
461 _NL_COLLATE_SYMB_EXTRAMB
);
463 /* Locate the character in the hashing table. */
464 hash
= elem_hash (str
, c1
);
467 elem
= hash
% table_size
;
468 second
= hash
% (table_size
- 2);
469 while (symb_table
[2 * elem
] != 0)
471 /* First compare the hashing value. */
472 if (symb_table
[2 * elem
] == hash
473 && c1
== extra
[symb_table
[2 * elem
+ 1]]
475 &extra
[symb_table
[2 * elem
+ 1]
478 /* Yep, this is the entry. */
479 idx
= symb_table
[2 * elem
+ 1];
480 idx
+= 1 + extra
[idx
];
488 if (symb_table
[2 * elem
] != 0)
490 /* Compare the byte sequence but only if
491 this is not part of a range. */
492 # ifdef WIDE_CHAR_VERSION
495 idx
+= 1 + extra
[idx
];
496 /* Adjust for the alignment. */
497 idx
= (idx
+ 3) & ~4;
499 wextra
= (int32_t *) &extra
[idx
+ 4];
504 # ifdef WIDE_CHAR_VERSION
505 for (c1
= 0; c1
< wextra
[idx
]; ++c1
)
506 if (n
[c1
] != wextra
[1 + c1
])
509 if (c1
== wextra
[idx
])
512 for (c1
= 0; c1
< extra
[idx
]; ++c1
)
513 if (n
[c1
] != extra
[1 + c1
])
516 if (c1
== extra
[idx
])
521 /* Get the collation sequence value. */
523 # ifdef WIDE_CHAR_VERSION
524 cold
= wextra
[1 + wextra
[idx
]];
526 /* Adjust for the alignment. */
527 idx
+= 1 + extra
[idx
];
528 idx
= (idx
+ 3) & ~4;
529 cold
= *((int32_t *) &extra
[idx
]);
536 /* No valid character. Match it as a
538 if (!is_range
&& *n
== str
[0])
555 /* We have to handling the symbols differently in
556 ranges since then the collation sequence is
558 is_range
= *p
== L('-') && p
[1] != L('\0');
560 if (!is_range
&& c
== fn
)
567 if (c
== L('-') && *p
!= L(']'))
570 /* We have to find the collation sequence
571 value for C. Collation sequence is nothing
572 we can regularly access. The sequence
573 value is defined by the order in which the
574 definitions of the collation values for the
575 various characters appear in the source
576 file. A strange concept, nowhere
582 # ifdef WIDE_CHAR_VERSION
583 /* Search in the `names' array for the characters. */
584 fcollseq
= collseq_table_lookup (collseq
, fn
);
585 if (fcollseq
== ~((uint32_t) 0))
586 /* XXX We don't know anything about the character
587 we are supposed to match. This means we are
589 goto range_not_matched
;
594 lcollseq
= collseq_table_lookup (collseq
, cold
);
596 fcollseq
= collseq
[fn
];
597 lcollseq
= is_seqval
? cold
: collseq
[(UCHAR
) cold
];
601 if (cend
== L('[') && *p
== L('.'))
604 _NL_CURRENT_WORD (LC_COLLATE
,
606 const CHAR
*startp
= p
;
612 if (c
== L('.') && p
[1] == L(']'))
624 /* There are no names defined in the
625 collation data. Therefore we only
626 accept the trivial names consisting
627 of the character itself. */
636 const int32_t *symb_table
;
637 # ifdef WIDE_CHAR_VERSION
641 # define str (startp + 1)
643 const unsigned char *extra
;
649 # ifdef WIDE_CHAR_VERSION
650 /* We have to convert the name to a single-byte
651 string. This is possible since the names
652 consist of ASCII characters and the internal
653 representation is UCS4. */
654 for (strcnt
= 0; strcnt
< c1
; ++strcnt
)
655 str
[strcnt
] = startp
[1 + strcnt
];
659 _NL_CURRENT_WORD (LC_COLLATE
,
660 _NL_COLLATE_SYMB_HASH_SIZEMB
);
661 symb_table
= (const int32_t *)
662 _NL_CURRENT (LC_COLLATE
,
663 _NL_COLLATE_SYMB_TABLEMB
);
664 extra
= (const unsigned char *)
665 _NL_CURRENT (LC_COLLATE
,
666 _NL_COLLATE_SYMB_EXTRAMB
);
668 /* Locate the character in the hashing
670 hash
= elem_hash (str
, c1
);
673 elem
= hash
% table_size
;
674 second
= hash
% (table_size
- 2);
675 while (symb_table
[2 * elem
] != 0)
677 /* First compare the hashing value. */
678 if (symb_table
[2 * elem
] == hash
680 == extra
[symb_table
[2 * elem
+ 1]])
682 &extra
[symb_table
[2 * elem
+ 1]
685 /* Yep, this is the entry. */
686 idx
= symb_table
[2 * elem
+ 1];
687 idx
+= 1 + extra
[idx
];
695 if (symb_table
[2 * elem
] != 0)
697 /* Compare the byte sequence but only if
698 this is not part of a range. */
699 # ifdef WIDE_CHAR_VERSION
702 idx
+= 1 + extra
[idx
];
703 /* Adjust for the alignment. */
704 idx
= (idx
+ 3) & ~4;
706 wextra
= (int32_t *) &extra
[idx
+ 4];
708 /* Get the collation sequence value. */
710 # ifdef WIDE_CHAR_VERSION
711 cend
= wextra
[1 + wextra
[idx
]];
713 /* Adjust for the alignment. */
714 idx
+= 1 + extra
[idx
];
715 idx
= (idx
+ 3) & ~4;
716 cend
= *((int32_t *) &extra
[idx
]);
719 else if (symb_table
[2 * elem
] != 0 && c1
== 1)
731 if (!(flags
& FNM_NOESCAPE
) && cend
== L('\\'))
738 /* XXX It is not entirely clear to me how to handle
739 characters which are not mentioned in the
740 collation specification. */
742 # ifdef WIDE_CHAR_VERSION
743 lcollseq
== 0xffffffff ||
745 lcollseq
<= fcollseq
)
747 /* We have to look at the upper bound. */
754 # ifdef WIDE_CHAR_VERSION
756 collseq_table_lookup (collseq
, cend
);
757 if (hcollseq
== ~((uint32_t) 0))
759 /* Hum, no information about the upper
760 bound. The matching succeeds if the
761 lower bound is matched exactly. */
762 if (lcollseq
!= fcollseq
)
763 goto range_not_matched
;
768 hcollseq
= collseq
[cend
];
772 if (lcollseq
<= hcollseq
&& fcollseq
<= hcollseq
)
775 # ifdef WIDE_CHAR_VERSION
779 /* We use a boring value comparison of the character
780 values. This is better than comparing using
781 `strcoll' since the latter would have surprising
782 and sometimes fatal consequences. */
785 if (!(flags
& FNM_NOESCAPE
) && cend
== L('\\'))
791 if (cold
<= fn
&& fn
<= c
)
808 /* Skip the rest of the [...] that already matched. */
815 /* [... (unterminated) loses. */
818 if (!(flags
& FNM_NOESCAPE
) && c
== L('\\'))
822 /* XXX 1003.2d11 is unclear if this is right. */
825 else if (c
== L('[') && *p
== L(':'))
828 const CHAR
*startp
= p
;
833 if (++c1
== CHAR_CLASS_MAX_LENGTH
)
836 if (*p
== L(':') && p
[1] == L(']'))
839 if (c
< L('a') || c
>= L('z'))
848 else if (c
== L('[') && *p
== L('='))
854 if (c
!= L('=') || p
[1] != L(']'))
859 else if (c
== L('[') && *p
== L('.'))
868 if (*p
== L('.') && p
[1] == L(']'))
882 if (c
!= FOLD ((UCHAR
) *n
))
892 if ((flags
& FNM_LEADING_DIR
) && *n
== L('/'))
893 /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */