riscv32: add arch headers
[musl.git] / src / regex / fnmatch.c
blob978fff884239b27b2256ee28a7bdf3bec022c310
1 /*
2 * An implementation of what I call the "Sea of Stars" algorithm for
3 * POSIX fnmatch(). The basic idea is that we factor the pattern into
4 * a head component (which we match first and can reject without ever
5 * measuring the length of the string), an optional tail component
6 * (which only exists if the pattern contains at least one star), and
7 * an optional "sea of stars", a set of star-separated components
8 * between the head and tail. After the head and tail matches have
9 * been removed from the input string, the components in the "sea of
10 * stars" are matched sequentially by searching for their first
11 * occurrence past the end of the previous match.
13 * - Rich Felker, April 2012
16 #include <string.h>
17 #include <fnmatch.h>
18 #include <stdlib.h>
19 #include <wchar.h>
20 #include <wctype.h>
21 #include "locale_impl.h"
23 #define END 0
24 #define UNMATCHABLE -2
25 #define BRACKET -3
26 #define QUESTION -4
27 #define STAR -5
29 static int str_next(const char *str, size_t n, size_t *step)
31 if (!n) {
32 *step = 0;
33 return 0;
35 if (str[0] >= 128U) {
36 wchar_t wc;
37 int k = mbtowc(&wc, str, n);
38 if (k<0) {
39 *step = 1;
40 return -1;
42 *step = k;
43 return wc;
45 *step = 1;
46 return str[0];
49 static int pat_next(const char *pat, size_t m, size_t *step, int flags)
51 int esc = 0;
52 if (!m || !*pat) {
53 *step = 0;
54 return END;
56 *step = 1;
57 if (pat[0]=='\\' && pat[1] && !(flags & FNM_NOESCAPE)) {
58 *step = 2;
59 pat++;
60 esc = 1;
61 goto escaped;
63 if (pat[0]=='[') {
64 size_t k = 1;
65 if (k<m) if (pat[k] == '^' || pat[k] == '!') k++;
66 if (k<m) if (pat[k] == ']') k++;
67 for (; k<m && pat[k] && pat[k]!=']'; k++) {
68 if (k+1<m && pat[k+1] && pat[k]=='[' && (pat[k+1]==':' || pat[k+1]=='.' || pat[k+1]=='=')) {
69 int z = pat[k+1];
70 k+=2;
71 if (k<m && pat[k]) k++;
72 while (k<m && pat[k] && (pat[k-1]!=z || pat[k]!=']')) k++;
73 if (k==m || !pat[k]) break;
76 if (k==m || !pat[k]) {
77 *step = 1;
78 return '[';
80 *step = k+1;
81 return BRACKET;
83 if (pat[0] == '*')
84 return STAR;
85 if (pat[0] == '?')
86 return QUESTION;
87 escaped:
88 if (pat[0] >= 128U) {
89 wchar_t wc;
90 int k = mbtowc(&wc, pat, m);
91 if (k<0) {
92 *step = 0;
93 return UNMATCHABLE;
95 *step = k + esc;
96 return wc;
98 return pat[0];
101 static int casefold(int k)
103 int c = towupper(k);
104 return c == k ? towlower(k) : c;
107 static int match_bracket(const char *p, int k, int kfold)
109 wchar_t wc;
110 int inv = 0;
111 p++;
112 if (*p=='^' || *p=='!') {
113 inv = 1;
114 p++;
116 if (*p==']') {
117 if (k==']') return !inv;
118 p++;
119 } else if (*p=='-') {
120 if (k=='-') return !inv;
121 p++;
123 wc = p[-1];
124 for (; *p != ']'; p++) {
125 if (p[0]=='-' && p[1]!=']') {
126 wchar_t wc2;
127 int l = mbtowc(&wc2, p+1, 4);
128 if (l < 0) return 0;
129 if (wc <= wc2)
130 if ((unsigned)k-wc <= wc2-wc ||
131 (unsigned)kfold-wc <= wc2-wc)
132 return !inv;
133 p += l-1;
134 continue;
136 if (p[0]=='[' && (p[1]==':' || p[1]=='.' || p[1]=='=')) {
137 const char *p0 = p+2;
138 int z = p[1];
139 p+=3;
140 while (p[-1]!=z || p[0]!=']') p++;
141 if (z == ':' && p-1-p0 < 16) {
142 char buf[16];
143 memcpy(buf, p0, p-1-p0);
144 buf[p-1-p0] = 0;
145 if (iswctype(k, wctype(buf)) ||
146 iswctype(kfold, wctype(buf)))
147 return !inv;
149 continue;
151 if (*p < 128U) {
152 wc = (unsigned char)*p;
153 } else {
154 int l = mbtowc(&wc, p, 4);
155 if (l < 0) return 0;
156 p += l-1;
158 if (wc==k || wc==kfold) return !inv;
160 return inv;
163 static int fnmatch_internal(const char *pat, size_t m, const char *str, size_t n, int flags)
165 const char *p, *ptail, *endpat;
166 const char *s, *stail, *endstr;
167 size_t pinc, sinc, tailcnt=0;
168 int c, k, kfold;
170 if (flags & FNM_PERIOD) {
171 if (*str == '.' && *pat != '.')
172 return FNM_NOMATCH;
174 for (;;) {
175 switch ((c = pat_next(pat, m, &pinc, flags))) {
176 case UNMATCHABLE:
177 return FNM_NOMATCH;
178 case STAR:
179 pat++;
180 m--;
181 break;
182 default:
183 k = str_next(str, n, &sinc);
184 if (k <= 0)
185 return (c==END) ? 0 : FNM_NOMATCH;
186 str += sinc;
187 n -= sinc;
188 kfold = flags & FNM_CASEFOLD ? casefold(k) : k;
189 if (c == BRACKET) {
190 if (!match_bracket(pat, k, kfold))
191 return FNM_NOMATCH;
192 } else if (c != QUESTION && k != c && kfold != c) {
193 return FNM_NOMATCH;
195 pat+=pinc;
196 m-=pinc;
197 continue;
199 break;
202 /* Compute real pat length if it was initially unknown/-1 */
203 m = strnlen(pat, m);
204 endpat = pat + m;
206 /* Find the last * in pat and count chars needed after it */
207 for (p=ptail=pat; p<endpat; p+=pinc) {
208 switch (pat_next(p, endpat-p, &pinc, flags)) {
209 case UNMATCHABLE:
210 return FNM_NOMATCH;
211 case STAR:
212 tailcnt=0;
213 ptail = p+1;
214 break;
215 default:
216 tailcnt++;
217 break;
221 /* Past this point we need not check for UNMATCHABLE in pat,
222 * because all of pat has already been parsed once. */
224 /* Compute real str length if it was initially unknown/-1 */
225 n = strnlen(str, n);
226 endstr = str + n;
227 if (n < tailcnt) return FNM_NOMATCH;
229 /* Find the final tailcnt chars of str, accounting for UTF-8.
230 * On illegal sequences we may get it wrong, but in that case
231 * we necessarily have a matching failure anyway. */
232 for (s=endstr; s>str && tailcnt; tailcnt--) {
233 if (s[-1] < 128U || MB_CUR_MAX==1) s--;
234 else while ((unsigned char)*--s-0x80U<0x40 && s>str);
236 if (tailcnt) return FNM_NOMATCH;
237 stail = s;
239 /* Check that the pat and str tails match */
240 p = ptail;
241 for (;;) {
242 c = pat_next(p, endpat-p, &pinc, flags);
243 p += pinc;
244 if ((k = str_next(s, endstr-s, &sinc)) <= 0) {
245 if (c != END) return FNM_NOMATCH;
246 break;
248 s += sinc;
249 kfold = flags & FNM_CASEFOLD ? casefold(k) : k;
250 if (c == BRACKET) {
251 if (!match_bracket(p-pinc, k, kfold))
252 return FNM_NOMATCH;
253 } else if (c != QUESTION && k != c && kfold != c) {
254 return FNM_NOMATCH;
258 /* We're all done with the tails now, so throw them out */
259 endstr = stail;
260 endpat = ptail;
262 /* Match pattern components until there are none left */
263 while (pat<endpat) {
264 p = pat;
265 s = str;
266 for (;;) {
267 c = pat_next(p, endpat-p, &pinc, flags);
268 p += pinc;
269 /* Encountering * completes/commits a component */
270 if (c == STAR) {
271 pat = p;
272 str = s;
273 break;
275 k = str_next(s, endstr-s, &sinc);
276 if (!k)
277 return FNM_NOMATCH;
278 kfold = flags & FNM_CASEFOLD ? casefold(k) : k;
279 if (c == BRACKET) {
280 if (!match_bracket(p-pinc, k, kfold))
281 break;
282 } else if (c != QUESTION && k != c && kfold != c) {
283 break;
285 s += sinc;
287 if (c == STAR) continue;
288 /* If we failed, advance str, by 1 char if it's a valid
289 * char, or past all invalid bytes otherwise. */
290 k = str_next(str, endstr-str, &sinc);
291 if (k > 0) str += sinc;
292 else for (str++; str_next(str, endstr-str, &sinc)<0; str++);
295 return 0;
298 int fnmatch(const char *pat, const char *str, int flags)
300 const char *s, *p;
301 size_t inc;
302 int c;
303 if (flags & FNM_PATHNAME) for (;;) {
304 for (s=str; *s && *s!='/'; s++);
305 for (p=pat; (c=pat_next(p, -1, &inc, flags))!=END && c!='/'; p+=inc);
306 if (c!=*s && (!*s || !(flags & FNM_LEADING_DIR)))
307 return FNM_NOMATCH;
308 if (fnmatch_internal(pat, p-pat, str, s-str, flags))
309 return FNM_NOMATCH;
310 if (!c) return 0;
311 str = s+1;
312 pat = p+inc;
313 } else if (flags & FNM_LEADING_DIR) {
314 for (s=str; *s; s++) {
315 if (*s != '/') continue;
316 if (!fnmatch_internal(pat, -1, str, s-str, flags))
317 return 0;
320 return fnmatch_internal(pat, -1, str, -1, flags);