2 * Copyright (c) 1989 The Regents of the University of California.
5 * This code is derived from software contributed to Berkeley by
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 #if defined(LIBC_SCCS) && !defined(lint)
33 static char sccsid
[] = "@(#)glob.c 5.12 (Berkeley) 6/24/91";
34 #endif /* LIBC_SCCS and not lint */
36 * Glob: the interface is a superset of the one defined in POSIX 1003.2,
39 * The [!...] convention to negate a range is supported (SysV, Posix, ksh).
41 * Optional extra services, controlled by flags not defined by POSIX:
44 * Escaping convention: \ inhibits any special meaning the following
45 * character might have (except \ at end of string is retained).
47 * Set in gl_flags if pattern contained a globbing character.
49 * Use ^ instead of ! for "not".
51 * Number of matches in the current invocation of glob.
55 #pragma warning(disable:4244)
56 #endif /* WINNT_NATIVE */
64 #define mblen(_s,_n) mbrlen((_s),(_n),NULL)
75 #define S_ISDIR(a) (((a) & S_IFMT) == S_IFDIR)
78 #if !defined(S_ISLNK) && defined(S_IFLNK)
79 #define S_ISLNK(a) (((a) & S_IFMT) == S_IFLNK)
82 #if !defined(S_ISLNK) && !defined(lstat)
86 typedef unsigned short Char
;
88 static int glob1 (Char
*, glob_t
*, int);
89 static int glob2 (struct strbuf
*, const Char
*, glob_t
*, int);
90 static int glob3 (struct strbuf
*, const Char
*, const Char
*,
91 const Char
*, glob_t
*, int);
92 static void globextend (const char *, glob_t
*);
93 static int match (const char *, const Char
*, const Char
*,
95 static int compare (const void *, const void *);
96 static DIR *Opendir (const char *);
98 static int Lstat (const char *, struct stat
*);
100 static int Stat (const char *, struct stat
*sb
);
101 static Char
*Strchr (Char
*, int);
103 static void qprintf (const char *, const Char
*);
119 #define UNDERSCORE '_'
121 #define M_META 0x8000
122 #define M_PROTECT 0x4000
123 #define M_MASK 0xffff
124 #define M_ASCII 0x00ff
126 #define LCHAR(c) ((c)&M_ASCII)
127 #define META(c) ((c)|M_META)
128 #define M_ALL META('*')
129 #define M_END META(']')
130 #define M_NOT META('!')
131 #define M_ALTNOT META('^')
132 #define M_ONE META('?')
133 #define M_RNG META('-')
134 #define M_SET META('[')
135 #define ismeta(c) (((c)&M_META) != 0)
138 globcharcoll(__Char c1
, __Char c2
, int cs
)
140 #if defined(NLS) && defined(LC_COLLATE) && defined(HAVE_STRCOLL)
141 # if defined(WIDE_STRINGS)
142 wchar_t s1
[2], s2
[2];
150 /* This should not be here, but I'll rather leave it in than engage in
151 a LC_COLLATE flamewar about a shell I don't use... */
152 if (iswlower(c1
) && iswupper(c2
))
154 if (iswupper(c1
) && iswlower(c2
))
159 s1
[1] = s2
[1] = '\0';
160 return wcscoll(s1
, s2
);
161 # else /* not WIDE_STRINGS */
167 * From kevin lyda <kevin@suberic.net>:
168 * strcoll does not guarantee case sorting, so we pre-process now:
171 c1
= islower(c1
) ? c1
: tolower(c1
);
172 c2
= islower(c2
) ? c2
: tolower(c2
);
174 if (islower(c1
) && isupper(c2
))
176 if (isupper(c1
) && islower(c2
))
181 s1
[1] = s2
[1] = '\0';
182 return strcoll(s1
, s2
);
190 * Need to dodge two kernel bugs:
191 * opendir("") != opendir(".")
192 * NAMEI_BUG: on plain files trailing slashes are ignored in some kernels.
193 * POSIX specifies that they should be ignored in directories.
197 Opendir(const char *str
)
199 #if defined(hpux) || defined(__hpux)
204 return (opendir("."));
205 #if defined(hpux) || defined(__hpux)
207 * Opendir on some device files hangs, so avoid it
209 if (stat(str
, &st
) == -1 || !S_ISDIR(st
.st_mode
))
217 Lstat(const char *fn
, struct stat
*sb
)
223 if (*fn
!= 0 && strend(fn
)[-1] == '/' && !S_ISDIR(sb
->st_mode
))
225 # endif /* NAMEI_BUG */
233 Stat(const char *fn
, struct stat
*sb
)
239 if (*fn
!= 0 && strend(fn
)[-1] == '/' && !S_ISDIR(sb
->st_mode
))
241 #endif /* NAMEI_BUG */
246 Strchr(Char
*str
, int ch
)
257 qprintf(const char *pre
, const Char
*s
)
263 xprintf("%c", *p
& 0xff);
264 xprintf("\n%s", pre
);
266 xprintf("%c", *p
& M_PROTECT
? '"' : ' ');
267 xprintf("\n%s", pre
);
269 xprintf("%c", *p
& M_META
? '_' : ' ');
275 compare(const void *p
, const void *q
)
277 #if defined(NLS) && defined(HAVE_STRCOLL)
278 return (strcoll(*(char *const *) p
, *(char *const *) q
));
280 return (strcmp(*(char *const *) p
, *(char *const *) q
));
281 #endif /* NLS && HAVE_STRCOLL */
285 * The main glob() routine: compiles the pattern (optionally processing
286 * quotes), calls glob1() to do the real pattern matching, and finally
287 * sorts the list (unless unsorted operation is requested). Returns 0
288 * if things went well, nonzero if errors occurred. It is not an error
289 * to find no matches.
292 glob(const char *pattern
, int flags
, int (*errfunc
) (const char *, int),
296 Char
*bufnext
, m_not
;
297 const unsigned char *patnext
;
299 Char
*qpatnext
, *patbuf
;
302 patnext
= (const unsigned char *) pattern
;
303 if (!(flags
& GLOB_APPEND
)) {
305 pglob
->gl_pathv
= NULL
;
306 if (!(flags
& GLOB_DOOFFS
))
309 pglob
->gl_flags
= flags
& ~GLOB_MAGCHAR
;
310 pglob
->gl_errfunc
= errfunc
;
311 oldpathc
= pglob
->gl_pathc
;
312 pglob
->gl_matchc
= 0;
314 if (pglob
->gl_flags
& GLOB_ALTNOT
) {
323 patbuf
= xmalloc((strlen(pattern
) + 1) * sizeof(*patbuf
));
326 no_match
= *patnext
== not;
330 if (flags
& GLOB_QUOTE
) {
331 /* Protect the quoted characters */
332 while ((c
= *patnext
++) != EOS
) {
336 len
= mblen((const char *)(patnext
- 1), MB_LEN_MAX
);
338 TCSH_IGNORE(mblen(NULL
, 0));
340 *bufnext
++ = (Char
) c
;
342 *bufnext
++ = (Char
) (*patnext
++ | M_PROTECT
);
344 #endif /* WIDE_STRINGS */
346 if ((c
= *patnext
++) == EOS
) {
350 *bufnext
++ = (Char
) (c
| M_PROTECT
);
353 *bufnext
++ = (Char
) c
;
357 while ((c
= *patnext
++) != EOS
)
358 *bufnext
++ = (Char
) c
;
363 while ((c
= *qpatnext
++) != EOS
) {
369 if (*qpatnext
== EOS
||
370 Strchr(qpatnext
+ 1, RBRACKET
) == NULL
) {
371 *bufnext
++ = LBRACKET
;
376 pglob
->gl_flags
|= GLOB_MAGCHAR
;
382 *bufnext
++ = LCHAR(c
);
383 if (*qpatnext
== RANGE
&&
384 (c
= qpatnext
[1]) != RBRACKET
) {
386 *bufnext
++ = LCHAR(c
);
389 } while ((c
= *qpatnext
++) != RBRACKET
);
393 pglob
->gl_flags
|= GLOB_MAGCHAR
;
397 pglob
->gl_flags
|= GLOB_MAGCHAR
;
398 /* collapse adjacent stars to one [or three if globstar],
399 * to avoid exponential behavior
401 if (bufnext
== patbuf
|| bufnext
[-1] != M_ALL
||
402 ((flags
& GLOB_STAR
) != 0 &&
403 (bufnext
- 1 == patbuf
|| bufnext
[-2] != M_ALL
||
404 bufnext
- 2 == patbuf
|| bufnext
[-3] != M_ALL
)))
408 *bufnext
++ = LCHAR(c
);
414 qprintf("patbuf=", patbuf
);
417 if ((err
= glob1(patbuf
, pglob
, no_match
)) != 0) {
423 * If there was no match we are going to append the pattern
424 * if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified
425 * and the pattern did not contain any magic characters
426 * GLOB_NOMAGIC is there just for compatibility with csh.
428 if (pglob
->gl_pathc
== oldpathc
&&
429 ((flags
& GLOB_NOCHECK
) ||
430 ((flags
& GLOB_NOMAGIC
) && !(pglob
->gl_flags
& GLOB_MAGCHAR
)))) {
431 if (!(flags
& GLOB_QUOTE
))
432 globextend(pattern
, pglob
);
437 /* copy pattern, interpreting quotes */
438 copy
= xmalloc(strlen(pattern
) + 1);
441 while (*src
!= EOS
) {
442 /* Don't interpret quotes. The spec does not say we should do */
450 globextend(copy
, pglob
);
456 else if (!(flags
& GLOB_NOSORT
) && (pglob
->gl_pathc
!= oldpathc
))
457 qsort(pglob
->gl_pathv
+ pglob
->gl_offs
+ oldpathc
,
458 pglob
->gl_pathc
- oldpathc
, sizeof(char *), compare
);
464 glob1(Char
*pattern
, glob_t
*pglob
, int no_match
)
466 struct strbuf pathbuf
= strbuf_INIT
;
470 * a null pathname is invalid -- POSIX 1003.1 sect. 2.4.
474 err
= glob2(&pathbuf
, pattern
, pglob
, no_match
);
480 * functions glob2 and glob3 are mutually recursive; there is one level
481 * of recursion for each segment in the pattern that contains one or
482 * more meta characters.
485 glob2(struct strbuf
*pathbuf
, const Char
*pattern
, glob_t
*pglob
, int no_match
)
493 * loop over pattern segments until end of pattern or until segment with
494 * meta character found.
498 if (*pattern
== EOS
) { /* end of pattern? */
499 strbuf_terminate(pathbuf
);
501 if (Lstat(pathbuf
->s
, &sbuf
))
504 if (((pglob
->gl_flags
& GLOB_MARK
) &&
505 pathbuf
->s
[pathbuf
->len
- 1] != SEP
) &&
506 (S_ISDIR(sbuf
.st_mode
)
508 || (S_ISLNK(sbuf
.st_mode
) &&
509 (Stat(pathbuf
->s
, &sbuf
) == 0) &&
510 S_ISDIR(sbuf
.st_mode
))
513 strbuf_append1(pathbuf
, SEP
);
514 strbuf_terminate(pathbuf
);
517 globextend(pathbuf
->s
, pglob
);
521 /* find end of next segment, tentatively copy to pathbuf */
523 orig_len
= pathbuf
->len
;
524 while (*p
!= EOS
&& *p
!= SEP
) {
527 strbuf_append1(pathbuf
, *p
++);
530 if (!anymeta
) { /* no expansion, do next segment */
532 while (*pattern
== SEP
)
533 strbuf_append1(pathbuf
, *pattern
++);
535 else { /* need expansion, recurse */
536 pathbuf
->len
= orig_len
;
537 return (glob3(pathbuf
, pattern
, p
, pattern
, pglob
, no_match
));
544 One_Char_mbtowc(__Char
*pwc
, const Char
*s
, size_t n
)
547 char buf
[MB_LEN_MAX
], *p
;
552 while (p
< buf
+ n
&& (*p
++ = LCHAR(*s
++)) != 0)
554 return one_mbtowc(pwc
, buf
, n
);
563 glob3(struct strbuf
*pathbuf
, const Char
*pattern
, const Char
*restpattern
,
564 const Char
*pglobstar
, glob_t
*pglob
, int no_match
)
570 Char m_not
= (pglob
->gl_flags
& GLOB_ALTNOT
) ? M_ALTNOT
: M_NOT
;
573 int chase_symlinks
= 0;
574 const Char
*termstar
= NULL
;
576 strbuf_terminate(pathbuf
);
577 orig_len
= pathbuf
->len
;
580 while (pglobstar
< restpattern
) {
582 size_t width
= One_Char_mbtowc(&wc
, pglobstar
, MB_LEN_MAX
);
583 if ((pglobstar
[0] & M_MASK
) == M_ALL
&&
584 (pglobstar
[width
] & M_MASK
) == M_ALL
) {
586 chase_symlinks
= (pglobstar
[2 * width
] & M_MASK
) == M_ALL
;
587 termstar
= pglobstar
+ (2 + chase_symlinks
) * width
;
594 err
= pglobstar
==pattern
&& termstar
==restpattern
?
595 *restpattern
== EOS
?
596 glob2(pathbuf
, restpattern
- 1, pglob
, no_match
) :
597 glob2(pathbuf
, restpattern
+ 1, pglob
, no_match
) :
598 glob3(pathbuf
, pattern
, restpattern
, termstar
, pglob
, no_match
);
601 pathbuf
->len
= orig_len
;
602 strbuf_terminate(pathbuf
);
605 if (*pathbuf
->s
&& (Lstat(pathbuf
->s
, &sbuf
) || !S_ISDIR(sbuf
.st_mode
)
607 && ((globstar
&& !chase_symlinks
) || !S_ISLNK(sbuf
.st_mode
))
612 if (!(dirp
= Opendir(pathbuf
->s
))) {
613 /* todo: don't call for ENOENT or ENOTDIR? */
614 if ((pglob
->gl_errfunc
&& (*pglob
->gl_errfunc
) (pathbuf
->s
, errno
)) ||
615 (pglob
->gl_flags
& GLOB_ERR
))
621 /* search directory for matching names */
622 while ((dp
= readdir(dirp
)) != NULL
) {
623 /* initial DOT must be matched literally */
624 if (dp
->d_name
[0] == DOT
&& *pattern
!= DOT
)
625 if (!(pglob
->gl_flags
& GLOB_DOT
) || !dp
->d_name
[1] ||
626 (dp
->d_name
[1] == DOT
&& !dp
->d_name
[2]))
627 continue; /*unless globdot and not . or .. */
628 pathbuf
->len
= orig_len
;
629 strbuf_append(pathbuf
, dp
->d_name
);
630 strbuf_terminate(pathbuf
);
634 if (!chase_symlinks
&&
635 (Lstat(pathbuf
->s
, &sbuf
) || S_ISLNK(sbuf
.st_mode
)))
638 if (match(pathbuf
->s
+ orig_len
, pattern
, termstar
,
639 (int)m_not
) == no_match
)
641 strbuf_append1(pathbuf
, SEP
);
642 strbuf_terminate(pathbuf
);
643 if ((err
= glob2(pathbuf
, pglobstar
, pglob
, no_match
)) != 0)
646 if (match(pathbuf
->s
+ orig_len
, pattern
, restpattern
,
647 (int) m_not
) == no_match
)
649 if ((err
= glob2(pathbuf
, restpattern
, pglob
, no_match
)) != 0)
653 /* todo: check error from readdir? */
660 * Extend the gl_pathv member of a glob_t structure to accomodate a new item,
661 * add the new item, and update gl_pathc.
663 * This assumes the BSD realloc, which only copies the block when its size
664 * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic
667 * Return 0 if new item added, error code if memory couldn't be allocated.
669 * Invariant of the glob_t structure:
670 * Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
671 * gl_pathv points to (gl_offs + gl_pathc + 1) items.
674 globextend(const char *path
, glob_t
*pglob
)
680 newsize
= sizeof(*pathv
) * (2 + pglob
->gl_pathc
+ pglob
->gl_offs
);
681 pathv
= xrealloc(pglob
->gl_pathv
, newsize
);
683 if (pglob
->gl_pathv
== NULL
&& pglob
->gl_offs
> 0) {
684 /* first time around -- clear initial gl_offs items */
685 pathv
+= pglob
->gl_offs
;
686 for (i
= pglob
->gl_offs
; --i
>= 0;)
689 pglob
->gl_pathv
= pathv
;
691 pathv
[pglob
->gl_offs
+ pglob
->gl_pathc
++] = strsave(path
);
692 pathv
[pglob
->gl_offs
+ pglob
->gl_pathc
] = NULL
;
696 * pattern matching function for filenames.
699 match(const char *name
, const Char
*pat
, const Char
*patend
, int m_not
)
701 int ok
, negate_range
;
703 const char *nameNext
, *nameStart
, *nameEnd
;
707 nameStart
= nameNext
= name
;
710 while (pat
< patend
|| *name
) {
714 c
= *pat
; /* Only for M_MASK bits */
718 pwk
= One_Char_mbtowc(&wc
, pat
, MB_LEN_MAX
);
719 lwk
= one_mbtowc(&wk
, name
, MB_LEN_MAX
);
720 switch (c
& M_MASK
) {
722 while ((*(pat
+ pwk
) & M_MASK
) == M_ALL
) {
724 pwk
= One_Char_mbtowc(&wc
, pat
, MB_LEN_MAX
);
727 nameNext
= name
+ lwk
;
741 pwk
= One_Char_mbtowc(&wc
, pat
, MB_LEN_MAX
);
743 if ((negate_range
= ((*pat
& M_MASK
) == m_not
)) != 0) {
745 pwk
= One_Char_mbtowc(&wc
, pat
, MB_LEN_MAX
);
748 while ((*pat
& M_MASK
) != M_END
) {
749 if ((*pat
& M_MASK
) == M_RNG
) {
753 pwk
= One_Char_mbtowc(&wc2
, pat
, MB_LEN_MAX
);
754 if (globcharcoll(wc1
, wk
, 0) <= 0 &&
755 globcharcoll(wk
, wc2
, 0) <= 0)
761 pwk
= One_Char_mbtowc(&wc
, pat
, MB_LEN_MAX
);
764 pwk
= One_Char_mbtowc(&wc
, pat
, MB_LEN_MAX
);
765 if (ok
== negate_range
)
769 if (*name
== EOS
|| samecase(wk
) != samecase(wc
))
775 if (nameNext
!= nameStart
776 && (nameEnd
== NULL
|| nameNext
<= nameEnd
)) {
786 /* free allocated data belonging to a glob_t structure */
788 globfree(glob_t
*pglob
)
793 if (pglob
->gl_pathv
!= NULL
) {
794 pp
= pglob
->gl_pathv
+ pglob
->gl_offs
;
795 for (i
= pglob
->gl_pathc
; i
--; ++pp
)
797 xfree(*pp
), *pp
= NULL
;
798 xfree(pglob
->gl_pathv
), pglob
->gl_pathv
= NULL
;