2 * An implementation of what I call the "Sea of Stars" algorithm for
3 * POSIX fnmatch(). The basic idea is that we factor the pattern into
4 * a head component (which we match first and can reject without ever
5 * measuring the length of the string), an optional tail component
6 * (which only exists if the pattern contains at least one star), and
7 * an optional "sea of stars", a set of star-separated components
8 * between the head and tail. After the head and tail matches have
9 * been removed from the input string, the components in the "sea of
10 * stars" are matched sequentially by searching for their first
11 * occurrence past the end of the previous match.
13 * - Rich Felker, April 2012
21 #include "locale_impl.h"
24 #define UNMATCHABLE -2
29 static int str_next(const char *str
, size_t n
, size_t *step
)
37 int k
= mbtowc(&wc
, str
, n
);
49 static int pat_next(const char *pat
, size_t m
, size_t *step
, int flags
)
57 if (pat
[0]=='\\' && pat
[1] && !(flags
& FNM_NOESCAPE
)) {
65 if (k
<m
) if (pat
[k
] == '^' || pat
[k
] == '!') k
++;
66 if (k
<m
) if (pat
[k
] == ']') k
++;
67 for (; k
<m
&& pat
[k
] && pat
[k
]!=']'; k
++) {
68 if (k
+1<m
&& pat
[k
+1] && pat
[k
]=='[' && (pat
[k
+1]==':' || pat
[k
+1]=='.' || pat
[k
+1]=='=')) {
71 if (k
<m
&& pat
[k
]) k
++;
72 while (k
<m
&& pat
[k
] && (pat
[k
-1]!=z
|| pat
[k
]!=']')) k
++;
73 if (k
==m
|| !pat
[k
]) break;
76 if (k
==m
|| !pat
[k
]) {
90 int k
= mbtowc(&wc
, pat
, m
);
101 static int casefold(int k
)
104 return c
== k
? towlower(k
) : c
;
107 static int match_bracket(const char *p
, int k
, int kfold
)
112 if (*p
=='^' || *p
=='!') {
117 if (k
==']') return !inv
;
119 } else if (*p
=='-') {
120 if (k
=='-') return !inv
;
124 for (; *p
!= ']'; p
++) {
125 if (p
[0]=='-' && p
[1]!=']') {
127 int l
= mbtowc(&wc2
, p
+1, 4);
130 if ((unsigned)k
-wc
<= wc2
-wc
||
131 (unsigned)kfold
-wc
<= wc2
-wc
)
136 if (p
[0]=='[' && (p
[1]==':' || p
[1]=='.' || p
[1]=='=')) {
137 const char *p0
= p
+2;
140 while (p
[-1]!=z
|| p
[0]!=']') p
++;
141 if (z
== ':' && p
-1-p0
< 16) {
143 memcpy(buf
, p0
, p
-1-p0
);
145 if (iswctype(k
, wctype(buf
)) ||
146 iswctype(kfold
, wctype(buf
)))
152 wc
= (unsigned char)*p
;
154 int l
= mbtowc(&wc
, p
, 4);
158 if (wc
==k
|| wc
==kfold
) return !inv
;
163 static int fnmatch_internal(const char *pat
, size_t m
, const char *str
, size_t n
, int flags
)
165 const char *p
, *ptail
, *endpat
;
166 const char *s
, *stail
, *endstr
;
167 size_t pinc
, sinc
, tailcnt
=0;
170 if (flags
& FNM_PERIOD
) {
171 if (*str
== '.' && *pat
!= '.')
175 switch ((c
= pat_next(pat
, m
, &pinc
, flags
))) {
183 k
= str_next(str
, n
, &sinc
);
185 return (c
==END
) ? 0 : FNM_NOMATCH
;
188 kfold
= flags
& FNM_CASEFOLD
? casefold(k
) : k
;
190 if (!match_bracket(pat
, k
, kfold
))
192 } else if (c
!= QUESTION
&& k
!= c
&& kfold
!= c
) {
202 /* Compute real pat length if it was initially unknown/-1 */
206 /* Find the last * in pat and count chars needed after it */
207 for (p
=ptail
=pat
; p
<endpat
; p
+=pinc
) {
208 switch (pat_next(p
, endpat
-p
, &pinc
, flags
)) {
221 /* Past this point we need not check for UNMATCHABLE in pat,
222 * because all of pat has already been parsed once. */
224 /* Compute real str length if it was initially unknown/-1 */
227 if (n
< tailcnt
) return FNM_NOMATCH
;
229 /* Find the final tailcnt chars of str, accounting for UTF-8.
230 * On illegal sequences we may get it wrong, but in that case
231 * we necessarily have a matching failure anyway. */
232 for (s
=endstr
; s
>str
&& tailcnt
; tailcnt
--) {
233 if (s
[-1] < 128U || MB_CUR_MAX
==1) s
--;
234 else while ((unsigned char)*--s
-0x80U
<0x40 && s
>str
);
236 if (tailcnt
) return FNM_NOMATCH
;
239 /* Check that the pat and str tails match */
242 c
= pat_next(p
, endpat
-p
, &pinc
, flags
);
244 if ((k
= str_next(s
, endstr
-s
, &sinc
)) <= 0) {
245 if (c
!= END
) return FNM_NOMATCH
;
249 kfold
= flags
& FNM_CASEFOLD
? casefold(k
) : k
;
251 if (!match_bracket(p
-pinc
, k
, kfold
))
253 } else if (c
!= QUESTION
&& k
!= c
&& kfold
!= c
) {
258 /* We're all done with the tails now, so throw them out */
262 /* Match pattern components until there are none left */
267 c
= pat_next(p
, endpat
-p
, &pinc
, flags
);
269 /* Encountering * completes/commits a component */
275 k
= str_next(s
, endstr
-s
, &sinc
);
278 kfold
= flags
& FNM_CASEFOLD
? casefold(k
) : k
;
280 if (!match_bracket(p
-pinc
, k
, kfold
))
282 } else if (c
!= QUESTION
&& k
!= c
&& kfold
!= c
) {
287 if (c
== STAR
) continue;
288 /* If we failed, advance str, by 1 char if it's a valid
289 * char, or past all invalid bytes otherwise. */
290 k
= str_next(str
, endstr
-str
, &sinc
);
291 if (k
> 0) str
+= sinc
;
292 else for (str
++; str_next(str
, endstr
-str
, &sinc
)<0; str
++);
298 int fnmatch(const char *pat
, const char *str
, int flags
)
303 if (flags
& FNM_PATHNAME
) for (;;) {
304 for (s
=str
; *s
&& *s
!='/'; s
++);
305 for (p
=pat
; (c
=pat_next(p
, -1, &inc
, flags
))!=END
&& c
!='/'; p
+=inc
);
306 if (c
!=*s
&& (!*s
|| !(flags
& FNM_LEADING_DIR
)))
308 if (fnmatch_internal(pat
, p
-pat
, str
, s
-str
, flags
))
313 } else if (flags
& FNM_LEADING_DIR
) {
314 for (s
=str
; *s
; s
++) {
315 if (*s
!= '/') continue;
316 if (!fnmatch_internal(pat
, -1, str
, s
-str
, flags
))
320 return fnmatch_internal(pat
, -1, str
, -1, flags
);