1 /* Copyright (C) 1991-1993, 1996-1999, 2000 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 This library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Library General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
9 This library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Library General Public License for more details.
14 You should have received a copy of the GNU Library General Public
15 License along with this library; see the file COPYING.LIB. If not,
16 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 Boston, MA 02111-1307, USA. */
19 /* Match STRING against the filename pattern PATTERN, returning zero if
20 it matches, nonzero if not. */
21 static int FCT (const CHAR
*pattern
, const CHAR
*string
,
22 int no_leading_period
, int flags
) internal_function
;
26 FCT (pattern
, string
, no_leading_period
, flags
)
29 int no_leading_period
;
32 register const CHAR
*p
= pattern
, *n
= string
;
35 const UCHAR
*collseq
= (const UCHAR
*)
36 _NL_CURRENT(LC_COLLATE
, CONCAT(_NL_COLLATE_COLLSEQ
,SUFFIX
));
37 # ifdef WIDE_CHAR_VERSION
38 const wint_t *names
= (const wint_t *)
39 _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_NAMES
);
40 size_t size
= _NL_CURRENT_WORD (LC_COLLATE
, _NL_COLLATE_HASH_SIZE
);
41 size_t layers
= _NL_CURRENT_WORD (LC_COLLATE
, _NL_COLLATE_HASH_LAYERS
);
45 while ((c
= *p
++) != L('\0'))
54 else if (*n
== L('/') && (flags
& FNM_FILE_NAME
))
56 else if (*n
== L('.') && no_leading_period
58 || (n
[-1] == L('/') && (flags
& FNM_FILE_NAME
))))
63 if (!(flags
& FNM_NOESCAPE
))
67 /* Trailing \ loses. */
71 if (FOLD ((UCHAR
) *n
) != c
)
76 if (*n
== L('.') && no_leading_period
78 || (n
[-1] == L('/') && (flags
& FNM_FILE_NAME
))))
81 for (c
= *p
++; c
== L('?') || c
== L('*'); c
= *p
++)
83 if (*n
== L('/') && (flags
& FNM_FILE_NAME
))
84 /* A slash does not match a wildcard under FNM_FILE_NAME. */
88 /* A ? needs to match one character. */
90 /* There isn't another character; no match. */
93 /* One character of the string is consumed in matching
94 this ? wildcard, so *??? won't match if there are
95 less than three characters. */
101 /* The wildcard(s) is/are the last element of the pattern.
102 If the name is a file name and contains another slash
103 this does mean it cannot match. If the FNM_LEADING_DIR
104 flag is set and exactly one slash is following, we have
107 int result
= (flags
& FNM_FILE_NAME
) == 0 ? 0 : FNM_NOMATCH
;
109 if (flags
& FNM_FILE_NAME
)
111 const CHAR
*slashp
= STRCHR (n
, L('/'));
113 if (flags
& FNM_LEADING_DIR
)
116 && STRCHR (slashp
+ 1, L('/')) == NULL
)
132 endp
= STRCHRNUL (n
, (flags
& FNM_FILE_NAME
) ? L('/') : L('\0'));
136 int flags2
= ((flags
& FNM_FILE_NAME
)
137 ? flags
: (flags
& ~FNM_PERIOD
));
139 for (--p
; n
< endp
; ++n
)
140 if (FCT (p
, n
, (no_leading_period
143 && (flags
& FNM_FILE_NAME
)))),
147 else if (c
== L('/') && (flags
& FNM_FILE_NAME
))
149 while (*n
!= L('\0') && *n
!= L('/'))
152 && (FCT (p
, n
+ 1, flags
& FNM_PERIOD
, flags
) == 0))
157 int flags2
= ((flags
& FNM_FILE_NAME
)
158 ? flags
: (flags
& ~FNM_PERIOD
));
160 if (c
== L('\\') && !(flags
& FNM_NOESCAPE
))
163 for (--p
; n
< endp
; ++n
)
164 if (FOLD ((UCHAR
) *n
) == c
165 && (FCT (p
, n
, (no_leading_period
168 && (flags
& FNM_FILE_NAME
)))),
174 /* If we come here no match is possible with the wildcard. */
179 static int posixly_correct
;
180 /* Nonzero if the sense of the character class is inverted. */
184 if (posixly_correct
== 0)
185 posixly_correct
= getenv ("POSIXLY_CORRECT") != NULL
? 1 : -1;
190 if (*n
== L('.') && no_leading_period
192 || (n
[-1] == L('/') && (flags
& FNM_FILE_NAME
))))
195 if (*n
== L('/') && (flags
& FNM_FILE_NAME
))
196 /* `/' cannot be matched. */
199 not = (*p
== L('!') || (posixly_correct
< 0 && *p
== L('^')));
206 UCHAR fn
= FOLD ((UCHAR
) *n
);
208 if (!(flags
& FNM_NOESCAPE
) && c
== L('\\'))
212 c
= FOLD ((UCHAR
) *p
);
218 else if (c
== L('[') && *p
== L(':'))
220 /* Leave room for the null. */
221 CHAR str
[CHAR_CLASS_MAX_LENGTH
+ 1];
223 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
226 const CHAR
*startp
= p
;
230 if (c1
== CHAR_CLASS_MAX_LENGTH
)
231 /* The name is too long and therefore the pattern
236 if (c
== L(':') && p
[1] == L(']'))
241 if (c
< L('a') || c
>= L('z'))
243 /* This cannot possibly be a character class name.
244 Match it as a normal range. */
253 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
254 wt
= IS_CHAR_CLASS (str
);
256 /* Invalid character class name. */
259 /* The following code is glibc specific but does
260 there a good job in sppeding up the code since
261 we can avoid the btowc() call. The
262 IS_CHAR_CLASS call will return a bit mask for
263 the 32-bit table. We have to convert it to a
264 bitmask for the __ctype_b table. This has to
265 be done based on the byteorder as can be seen
266 below. In any case we will fall back on the
267 code using btowc() if the class is not one of
268 the standard classes. */
269 # if defined _LIBC && ! WIDE_CHAR_VERSION
270 # if __BYTE_ORDER == __LITTLE_ENDIAN
271 if ((wt
& 0xf0ffff) == 0)
274 if ((__ctype_b
[(UCHAR
) *n
] & wt
) != 0)
280 if ((__ctype_b
[(UCHAR
) *n
] & wt
) != 0)
286 if (ISWCTYPE (BTOWC ((UCHAR
) *n
), wt
))
289 if ((STREQ (str
, L("alnum")) && ISALNUM ((UCHAR
) *n
))
290 || (STREQ (str
, L("alpha")) && ISALPHA ((UCHAR
) *n
))
291 || (STREQ (str
, L("blank")) && ISBLANK ((UCHAR
) *n
))
292 || (STREQ (str
, L("cntrl")) && ISCNTRL ((UCHAR
) *n
))
293 || (STREQ (str
, L("digit")) && ISDIGIT ((UCHAR
) *n
))
294 || (STREQ (str
, L("graph")) && ISGRAPH ((UCHAR
) *n
))
295 || (STREQ (str
, L("lower")) && ISLOWER ((UCHAR
) *n
))
296 || (STREQ (str
, L("print")) && ISPRINT ((UCHAR
) *n
))
297 || (STREQ (str
, L("punct")) && ISPUNCT ((UCHAR
) *n
))
298 || (STREQ (str
, L("space")) && ISSPACE ((UCHAR
) *n
))
299 || (STREQ (str
, L("upper")) && ISUPPER ((UCHAR
) *n
))
300 || (STREQ (str
, L("xdigit")) && ISXDIGIT ((UCHAR
) *n
)))
306 else if (c
== L('[') && *p
== L('='))
310 _NL_CURRENT_WORD (LC_COLLATE
, _NL_COLLATE_NRULES
);
311 const CHAR
*startp
= p
;
323 if (c
!= L('=') || p
[1] != L(']'))
333 if ((UCHAR
) *n
== str
[0])
338 const int32_t *table
;
339 # if WIDE_CHAR_VERSION
340 const int32_t *weights
;
341 const int32_t *extra
;
343 const unsigned char *weights
;
344 const unsigned char *extra
;
346 const int32_t *indirect
;
348 const UCHAR
*cp
= (const UCHAR
*) str
;
350 /* This #include defines a local function! */
351 # if WIDE_CHAR_VERSION
352 # include <locale/weightwc.h>
354 # include <locale/weight.h>
357 # if WIDE_CHAR_VERSION
358 table
= (const int32_t *)
359 _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_TABLEWC
);
360 weights
= (const int32_t *)
361 _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_WEIGHTWC
);
362 extra
= (const int32_t *)
363 _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_EXTRAWC
);
364 indirect
= (const int32_t *)
365 _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_INDIRECTWC
);
367 table
= (const int32_t *)
368 _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_TABLEMB
);
369 weights
= (const unsigned char *)
370 _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_WEIGHTMB
);
371 extra
= (const unsigned char *)
372 _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_EXTRAMB
);
373 indirect
= (const int32_t *)
374 _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_INDIRECTMB
);
380 /* We found a table entry. Now see whether the
381 character we are currently at has the same
382 equivalance class value. */
383 # if !WIDE_CHAR_VERSION
384 int len
= weights
[idx
];
387 const UCHAR
*np
= (const UCHAR
*) n
;
389 idx2
= findidx (&np
);
390 # if WIDE_CHAR_VERSION
391 if (idx2
!= 0 && weights
[idx
] == weights
[idx2
])
394 if (idx2
!= 0 && len
== weights
[idx2
])
399 && (weights
[idx
+ 1 + cnt
]
400 == weights
[idx2
+ 1 + cnt
]))
413 else if (c
== L('\0'))
414 /* [ (unterminated) loses. */
422 if (c
== L('[') && *p
== L('.'))
425 _NL_CURRENT_WORD (LC_COLLATE
, _NL_COLLATE_NRULES
);
426 const CHAR
*startp
= p
;
432 if (c
== L('.') && p
[1] == L(']'))
442 /* We have to handling the symbols differently in
443 ranges since then the collation sequence is
445 is_range
= *p
== L('-') && p
[1] != L('\0');
449 /* There are no names defined in the collation
450 data. Therefore we only accept the trivial
451 names consisting of the character itself. */
455 if (!is_range
&& *n
== startp
[1])
464 const int32_t *symb_table
;
465 # ifdef WIDE_CHAR_VERSION
469 # define str (startp + 1)
471 const unsigned char *extra
;
477 # ifdef WIDE_CHAR_VERSION
478 /* We have to convert the name to a single-byte
479 string. This is possible since the names
480 consist of ASCII characters and the internal
481 representation is UCS4. */
482 for (strcnt
= 0; strcnt
< c1
; ++strcnt
)
483 str
[strcnt
] = startp
[1 + strcnt
];
487 _NL_CURRENT_WORD (LC_COLLATE
,
488 _NL_COLLATE_SYMB_HASH_SIZEMB
);
489 symb_table
= (const int32_t *)
490 _NL_CURRENT (LC_COLLATE
,
491 _NL_COLLATE_SYMB_TABLEMB
);
492 extra
= (const unsigned char *)
493 _NL_CURRENT (LC_COLLATE
,
494 _NL_COLLATE_SYMB_EXTRAMB
);
496 /* Locate the character in the hashing table. */
497 hash
= elem_hash (str
, c1
);
500 elem
= hash
% table_size
;
501 second
= hash
% (table_size
- 2);
502 while (symb_table
[2 * elem
] != 0)
504 /* First compare the hashing value. */
505 if (symb_table
[2 * elem
] == hash
506 && c1
== extra
[symb_table
[2 * elem
+ 1]]
508 &extra
[symb_table
[2 * elem
+ 1]
511 /* Yep, this is the entry. */
512 idx
= symb_table
[2 * elem
+ 1];
513 idx
+= 1 + extra
[idx
];
521 if (symb_table
[2 * elem
] != 0)
523 /* Compare the byte sequence but only if
524 this is not part of a range. */
525 # ifdef WIDE_CHAR_VERSION
528 idx
+= 1 + extra
[idx
];
529 /* Adjust for the alignment. */
530 idx
= (idx
+ 3) & ~4;
532 wextra
= (int32_t *) &extra
[idx
+ 4];
537 # ifdef WIDE_CHAR_VERSION
538 for (c1
= 0; c1
< wextra
[idx
]; ++c1
)
539 if (n
[c1
] != wextra
[1 + c1
])
542 if (c1
== wextra
[idx
])
545 for (c1
= 0; c1
< extra
[idx
]; ++c1
)
546 if (n
[c1
] != extra
[1 + c1
])
549 if (c1
== extra
[idx
])
554 /* Get the collation sequence value. */
556 # ifdef WIDE_CHAR_VERSION
557 cold
= wextra
[1 + wextra
[idx
]];
559 /* Adjust for the alignment. */
560 idx
+= 1 + extra
[idx
];
561 idx
= (idx
+ 3) & ~4;
562 cold
= *((int32_t *) &extra
[idx
]);
567 else if (symb_table
[2 * elem
] != 0 && c1
== 1)
569 /* No valid character. Match it as a
571 if (!is_range
&& *n
== str
[0])
588 /* We have to handling the symbols differently in
589 ranges since then the collation sequence is
591 is_range
= *p
== L('-') && p
[1] != L('\0');
593 if (!is_range
&& c
== fn
)
600 if (c
== L('-') && *p
!= L(']'))
603 /* We have to find the collation sequence
604 value for C. Collation sequence is nothing
605 we can regularly access. The sequence
606 value is defined by the order in which the
607 definitions of the collation values for the
608 various characters appear in the source
609 file. A strange concept, nowhere
614 # ifdef WIDE_CHAR_VERSION
619 # ifdef WIDE_CHAR_VERSION
620 /* Search in the `names' array for the characters. */
623 while (names
[idx
] != fn
)
626 /* XXX We don't know anything about
627 the character we are supposed to
628 match. This means we are failing. */
629 goto range_not_matched
;
633 fcollseq
= collseq
[idx
];
641 while (names
[idx
] != cold
)
651 lcollseq
= idx
== -1 ? 0xffffffff : collseq
[idx
];
654 fcollseq
= collseq
[fn
];
655 lcollseq
= is_seqval
? cold
: collseq
[(UCHAR
) cold
];
659 if (cend
== L('[') && *p
== L('.'))
662 _NL_CURRENT_WORD (LC_COLLATE
,
664 const CHAR
*startp
= p
;
670 if (c
== L('.') && p
[1] == L(']'))
682 /* There are no names defined in the
683 collation data. Therefore we only
684 accept the trivial names consisting
685 of the character itself. */
694 const int32_t *symb_table
;
695 # ifdef WIDE_CHAR_VERSION
699 # define str (startp + 1)
701 const unsigned char *extra
;
707 # ifdef WIDE_CHAR_VERSION
708 /* We have to convert the name to a single-byte
709 string. This is possible since the names
710 consist of ASCII characters and the internal
711 representation is UCS4. */
712 for (strcnt
= 0; strcnt
< c1
; ++strcnt
)
713 str
[strcnt
] = startp
[1 + strcnt
];
717 _NL_CURRENT_WORD (LC_COLLATE
,
718 _NL_COLLATE_SYMB_HASH_SIZEMB
);
719 symb_table
= (const int32_t *)
720 _NL_CURRENT (LC_COLLATE
,
721 _NL_COLLATE_SYMB_TABLEMB
);
722 extra
= (const unsigned char *)
723 _NL_CURRENT (LC_COLLATE
,
724 _NL_COLLATE_SYMB_EXTRAMB
);
726 /* Locate the character in the hashing
728 hash
= elem_hash (str
, c1
);
731 elem
= hash
% table_size
;
732 second
= hash
% (table_size
- 2);
733 while (symb_table
[2 * elem
] != 0)
735 /* First compare the hashing value. */
736 if (symb_table
[2 * elem
] == hash
738 == extra
[symb_table
[2 * elem
+ 1]])
740 &extra
[symb_table
[2 * elem
+ 1]
743 /* Yep, this is the entry. */
744 idx
= symb_table
[2 * elem
+ 1];
745 idx
+= 1 + extra
[idx
];
753 if (symb_table
[2 * elem
] != 0)
755 /* Compare the byte sequence but only if
756 this is not part of a range. */
757 # ifdef WIDE_CHAR_VERSION
760 idx
+= 1 + extra
[idx
];
761 /* Adjust for the alignment. */
762 idx
= (idx
+ 3) & ~4;
764 wextra
= (int32_t *) &extra
[idx
+ 4];
766 /* Get the collation sequence value. */
768 # ifdef WIDE_CHAR_VERSION
769 cend
= wextra
[1 + wextra
[idx
]];
771 /* Adjust for the alignment. */
772 idx
+= 1 + extra
[idx
];
773 idx
= (idx
+ 3) & ~4;
774 cend
= *((int32_t *) &extra
[idx
]);
777 else if (symb_table
[2 * elem
] != 0 && c1
== 1)
789 if (!(flags
& FNM_NOESCAPE
) && cend
== L('\\'))
796 /* XXX It is not entirely clear to me how to handle
797 characters which are not mentioned in the
798 collation specification. */
800 # ifdef WIDE_CHAR_VERSION
801 lcollseq
== 0xffffffff ||
803 lcollseq
<= fcollseq
)
805 /* We have to look at the upper bound. */
812 # ifdef WIDE_CHAR_VERSION
815 while (names
[idx
] != cend
)
819 /* Hum, no information about the upper
820 bound. The matching succeeds if the
821 lower bound is matched exactly. */
822 if (idx
== -1 && lcollseq
!= fcollseq
)
823 goto range_not_matched
;
828 hcollseq
= collseq
[idx
];
830 hcollseq
= collseq
[cend
];
834 if (lcollseq
<= hcollseq
&& fcollseq
<= hcollseq
)
837 # ifdef WIDE_CHAR_VERSION
841 /* We use a boring value comparison of the character
842 values. This is better than comparing using
843 `strcoll' since the latter would have surprising
844 and sometimes fatal consequences. */
847 if (!(flags
& FNM_NOESCAPE
) && cend
== L('\\'))
853 if (cold
<= fc
&& fc
<= c
)
870 /* Skip the rest of the [...] that already matched. */
877 /* [... (unterminated) loses. */
880 if (!(flags
& FNM_NOESCAPE
) && c
== L('\\'))
884 /* XXX 1003.2d11 is unclear if this is right. */
887 else if (c
== L('[') && *p
== L(':'))
890 const CHAR
*startp
= p
;
895 if (++c1
== CHAR_CLASS_MAX_LENGTH
)
898 if (*p
== L(':') && p
[1] == L(']'))
901 if (c
< L('a') || c
>= L('z'))
910 else if (c
== L('[') && *p
== L('='))
916 if (c
!= L('=') || p
[1] != L(']'))
921 else if (c
== L('[') && *p
== L('.'))
930 if (*p
== L('.') && p
[1] == L(']'))
944 if (c
!= FOLD ((UCHAR
) *n
))
954 if ((flags
& FNM_LEADING_DIR
) && *n
== L('/'))
955 /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */