rewrite core of the glob implementation for correctness & optimization
[musl.git] / src / regex / glob.c
blob751b69668a2c4232b03bc9fde927cd341f2c27ce
1 #define _BSD_SOURCE
2 #include <glob.h>
3 #include <fnmatch.h>
4 #include <sys/stat.h>
5 #include <dirent.h>
6 #include <limits.h>
7 #include <string.h>
8 #include <stdlib.h>
9 #include <errno.h>
10 #include <stddef.h>
12 struct match
14 struct match *next;
15 char name[];
18 static int append(struct match **tail, const char *name, size_t len, int mark)
20 struct match *new = malloc(sizeof(struct match) + len + 2);
21 if (!new) return -1;
22 (*tail)->next = new;
23 new->next = NULL;
24 memcpy(new->name, name, len+1);
25 if (mark && len && name[len-1]!='/') {
26 new->name[len] = '/';
27 new->name[len+1] = 0;
29 *tail = new;
30 return 0;
33 static int do_glob(char *buf, size_t pos, int type, char *pat, int flags, int (*errfunc)(const char *path, int err), struct match **tail)
35 /* If GLOB_MARK is unused, we don't care about type. */
36 if (!type && !(flags & GLOB_MARK)) type = DT_REG;
38 /* Special-case the remaining pattern being all slashes, in
39 * which case we can use caller-passed type if it's a dir. */
40 if (*pat && type!=DT_DIR) type = 0;
41 while (pos+1 < PATH_MAX && *pat=='/') buf[pos++] = *pat++;
43 /* Consume maximal [escaped-]literal prefix of pattern, copying
44 * and un-escaping it to the running buffer as we go. */
45 ptrdiff_t i=0, j=0;
46 int in_bracket = 0, overflow = 0;
47 for (; pat[i]!='*' && pat[i]!='?' && (!in_bracket || pat[i]!=']'); i++) {
48 if (!pat[i]) {
49 if (overflow) return 0;
50 pat += i;
51 pos += j;
52 i = j = 0;
53 break;
54 } else if (pat[i] == '[') {
55 in_bracket = 1;
56 } else if (pat[i] == '/') {
57 if (overflow) return 0;
58 in_bracket = 0;
59 pat += i+1;
60 i = -1;
61 pos += j+1;
62 j = -1;
63 } else if (pat[i] == '\\' && !(flags & GLOB_NOESCAPE)) {
64 /* Backslashes inside a bracket are (at least by
65 * our interpretation) non-special, so if next
66 * char is ']' we have a complete expression. */
67 if (in_bracket && pat[i+1]==']') break;
68 /* Unpaired final backslash never matches. */
69 if (!pat[i+1] || pat[i+1]=='/') return 0;
70 i++;
72 /* Only store a character if it fits in the buffer, but if
73 * a potential bracket expression is open, the overflow
74 * must be remembered and handled later only if the bracket
75 * is unterminated (and thereby a literal), so as not to
76 * disallow long bracket expressions with short matches. */
77 if (pos+(j+1) < PATH_MAX) {
78 buf[pos+j++] = pat[i];
79 } else if (in_bracket) {
80 overflow = 1;
81 } else {
82 return 0;
84 /* If we consume any new components, the caller-passed type
85 * or dummy type from above is no longer valid. */
86 type = 0;
88 buf[pos] = 0;
89 if (!*pat) {
90 /* If we consumed any components above, or if GLOB_MARK is
91 * requested and we don't yet know if the match is a dir,
92 * we must call stat to confirm the file exists and/or
93 * determine its type. */
94 struct stat st;
95 if ((flags & GLOB_MARK) && type==DT_LNK) type = 0;
96 if (!type && stat(buf, &st)) {
97 if (errno!=ENOENT && (errfunc(buf, errno) || (flags & GLOB_ERR)))
98 return GLOB_ABORTED;
99 return 0;
101 if (!type && S_ISDIR(st.st_mode)) type = DT_DIR;
102 if (append(tail, buf, pos, (flags & GLOB_MARK) && type==DT_DIR))
103 return GLOB_NOSPACE;
104 return 0;
106 char *p2 = strchr(pat, '/');
107 DIR *dir = opendir(pos ? buf : ".");
108 if (!dir) {
109 if (errfunc(buf, errno) || (flags & GLOB_ERR))
110 return GLOB_ABORTED;
111 return 0;
113 int old_errno = errno;
114 struct dirent *de;
115 while (errno=0, de=readdir(dir)) {
116 /* Quickly skip non-directories when there's pattern left. */
117 if (p2 && de->d_type && de->d_type!=DT_DIR && de->d_type!=DT_LNK)
118 continue;
120 size_t l = strlen(de->d_name);
121 if (l >= PATH_MAX-pos) continue;
123 if (p2) *p2 = 0;
125 int fnm_flags= ((flags & GLOB_NOESCAPE) ? FNM_NOESCAPE : 0)
126 | ((!(flags & GLOB_PERIOD)) ? FNM_PERIOD : 0);
128 if (fnmatch(pat, de->d_name, fnm_flags))
129 continue;
131 /* With GLOB_PERIOD, don't allow matching . or .. unless
132 * fnmatch would match them with FNM_PERIOD rules in effect. */
133 if (p2 && (flags & GLOB_PERIOD) && de->d_name[0]=='.'
134 && (!de->d_name[1] || de->d_name[1]=='.' && !de->d_name[2])
135 && fnmatch(pat, de->d_name, fnm_flags | FNM_PERIOD))
136 continue;
138 memcpy(buf+pos, de->d_name, l+1);
139 if (p2) *p2 = '/';
140 int r = do_glob(buf, pos+l, de->d_type, p2 ? p2 : "", flags, errfunc, tail);
141 if (r) {
142 closedir(dir);
143 return r;
146 int readerr = errno;
147 if (p2) *p2 = '/';
148 closedir(dir);
149 if (readerr && (errfunc(buf, errno) || (flags & GLOB_ERR)))
150 return GLOB_ABORTED;
151 errno = old_errno;
152 return 0;
155 static int ignore_err(const char *path, int err)
157 return 0;
160 static void freelist(struct match *head)
162 struct match *match, *next;
163 for (match=head->next; match; match=next) {
164 next = match->next;
165 free(match);
169 static int sort(const void *a, const void *b)
171 return strcmp(*(const char **)a, *(const char **)b);
174 int glob(const char *restrict pat, int flags, int (*errfunc)(const char *path, int err), glob_t *restrict g)
176 struct match head = { .next = NULL }, *tail = &head;
177 size_t cnt, i;
178 size_t offs = (flags & GLOB_DOOFFS) ? g->gl_offs : 0;
179 int error = 0;
180 char buf[PATH_MAX];
182 if (!errfunc) errfunc = ignore_err;
184 if (!(flags & GLOB_APPEND)) {
185 g->gl_offs = offs;
186 g->gl_pathc = 0;
187 g->gl_pathv = NULL;
190 if (*pat) {
191 char *p = strdup(pat);
192 if (!p) return GLOB_NOSPACE;
193 buf[0] = 0;
194 error = do_glob(buf, 0, 0, p, flags, errfunc, &tail);
195 free(p);
198 if (error == GLOB_NOSPACE) {
199 freelist(&head);
200 return error;
203 for (cnt=0, tail=head.next; tail; tail=tail->next, cnt++);
204 if (!cnt) {
205 if (flags & GLOB_NOCHECK) {
206 tail = &head;
207 if (append(&tail, pat, strlen(pat), 0))
208 return GLOB_NOSPACE;
209 cnt++;
210 } else
211 return GLOB_NOMATCH;
214 if (flags & GLOB_APPEND) {
215 char **pathv = realloc(g->gl_pathv, (offs + g->gl_pathc + cnt + 1) * sizeof(char *));
216 if (!pathv) {
217 freelist(&head);
218 return GLOB_NOSPACE;
220 g->gl_pathv = pathv;
221 offs += g->gl_pathc;
222 } else {
223 g->gl_pathv = malloc((offs + cnt + 1) * sizeof(char *));
224 if (!g->gl_pathv) {
225 freelist(&head);
226 return GLOB_NOSPACE;
228 for (i=0; i<offs; i++)
229 g->gl_pathv[i] = NULL;
231 for (i=0, tail=head.next; i<cnt; tail=tail->next, i++)
232 g->gl_pathv[offs + i] = tail->name;
233 g->gl_pathv[offs + i] = NULL;
234 g->gl_pathc += cnt;
236 if (!(flags & GLOB_NOSORT))
237 qsort(g->gl_pathv+offs, cnt, sizeof(char *), sort);
239 return error;
242 void globfree(glob_t *g)
244 size_t i;
245 for (i=0; i<g->gl_pathc; i++)
246 free(g->gl_pathv[g->gl_offs + i] - offsetof(struct match, name));
247 free(g->gl_pathv);
248 g->gl_pathc = 0;
249 g->gl_pathv = NULL;
252 weak_alias(glob, glob64);
253 weak_alias(globfree, globfree64);