2 * sh.glob.c: Regular expression expansion
5 * Copyright (c) 1980, 1991 The Regents of the University of California.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41 #define G_NONE 0 /* No globbing needed */
42 #define G_GLOB 1 /* string contains *?[] characters */
43 #define G_CSH 2 /* string contains ~`{ characters */
45 #define GLOBSPACE 100 /* Alloc increment */
55 * globbing is now done in two stages. In the first pass we expand
56 * csh globbing idioms ~`{ and then we proceed doing the normal
57 * globbing if needed ?*[
59 * Csh type globbing is handled in globexpand() and the rest is
60 * handled in glob() which is part of the 4.4BSD libc.
63 static Char
*globtilde (Char
*);
64 static Char
*handleone (Char
*, Char
**, int);
65 static Char
**libglob (Char
**);
66 static Char
**globexpand (Char
**, int);
67 static int globbrace (const Char
*, Char
***);
68 static void expbrace (Char
***, Char
***, int);
69 static void pword (struct blk_buf
*, struct Strbuf
*);
70 static void backeval (struct blk_buf
*, struct Strbuf
*, Char
*,
75 Char
*name
, *u
, *home
, *res
;
82 for (s
++; *s
&& *s
!= '/' && *s
!= ':'; s
++)
85 name
= Strnsave(u
+ 1, s
- (u
+ 1));
86 cleanup_push(name
, xfree
);
89 if (adrof(STRnonomatch
)) {
94 stderror(ERR_UNKUSER
, short2str(name
));
99 if (home
[0] == '/' && home
[1] == '\0' && s
[0] == '/')
102 res
= Strspl(home
, s
);
108 /* Returns a newly allocated string, old or NULL */
117 * kfk - 17 Jan 1984 - stack hack allows user to get at arbitrary dir names
118 * in stack. PWP: let =foobar pass through (for X windows)
120 if (old
[1] == '-' && (old
[2] == '\0' || old
[2] == '/')) {
122 const Char
*olddir
= varval (STRowd
);
124 if (olddir
&& *olddir
&&
125 !dcwd
->di_next
->di_name
&& !dcwd
->di_prev
->di_name
)
126 return Strspl(olddir
, &old
[2]);
130 else if (Isdigit(old
[1])) {
133 for (b
= &old
[2]; Isdigit(*b
); b
++)
134 dig
= dig
* 10 + (*b
- '0');
135 if (*b
!= '\0' && *b
!= '/')
136 /* =<number>foobar */
146 return Strspl(dir
, b
);
150 globbrace(const Char
*s
, Char
***bl
)
152 struct Strbuf gbuf
= Strbuf_INIT
;
153 struct blk_buf bb
= BLK_BUF_INIT
;
155 const Char
*p
, *pm
, *pe
, *pl
;
158 /* copy part up to the brace */
159 for (p
= s
; *p
!= LBRC
; p
++)
163 /* check for balanced braces */
164 for (i
= 0, pe
= ++p
; *pe
; pe
++)
166 /* Ignore everything between [] */
167 for (++pe
; *pe
!= RBRK
&& *pe
!= EOS
; pe
++)
172 else if (*pe
== LBRC
)
174 else if (*pe
== RBRC
) {
180 if (i
!= 0 || *pe
== '\0')
183 Strbuf_appendn(&gbuf
, s
, prefix_len
);
185 for (i
= 0, pl
= pm
= p
; pm
<= pe
; pm
++)
188 for (++pm
; *pm
!= RBRK
&& *pm
!= EOS
; pm
++)
209 gbuf
.len
= prefix_len
;
210 Strbuf_appendn(&gbuf
, pl
, pm
- pl
);
211 Strbuf_append(&gbuf
, pe
+ 1);
212 Strbuf_terminate(&gbuf
);
213 bb_append(&bb
, Strsave(gbuf
.s
));
220 *bl
= bb_finish(&bb
);
227 expbrace(Char
***nvp
, Char
***elp
, int size
)
229 Char
**vl
, **el
, **nv
, *s
;
235 el
= vl
+ blklen(vl
);
237 for (s
= *vl
; s
; s
= *++vl
) {
240 /* leave {} untouched for find */
241 if (s
[0] == '{' && (s
[1] == '\0' || (s
[1] == '}' && s
[2] == '\0')))
243 if (Strchr(s
, '{') != NULL
) {
247 if ((len
= globbrace(s
, &bl
)) < 0)
248 stderror(ERR_MISSING
, -len
);
255 if (&el
[len
] >= &nv
[size
]) {
257 l
= &el
[len
] - &nv
[size
];
258 size
+= GLOBSPACE
> l
? GLOBSPACE
: l
;
261 nv
= xrealloc(nv
, size
* sizeof(Char
*));
262 *nvp
= nv
; /* To keep cleanups working */
275 for (bp
= el
; bp
!= vp
; bp
--)
285 for (bp
= bl
+ 1; *bp
; *vp
++ = *bp
++)
296 globexpand(Char
**v
, int noglob
)
299 Char
***fnv
, **vl
, **el
;
300 int size
= GLOBSPACE
;
303 fnv
= xmalloc(sizeof(Char
***));
304 *fnv
= vl
= xmalloc(sizeof(Char
*) * size
);
306 cleanup_push(fnv
, blk_indirect_cleanup
);
309 * Step 1: expand backquotes.
311 while ((s
= *v
++) != NULL
) {
312 if (Strchr(s
, '`')) {
316 expanded
= dobackp(s
, 0);
317 for (i
= 0; expanded
[i
] != NULL
; i
++) {
319 if (vl
== &(*fnv
)[size
]) {
321 *fnv
= xrealloc(*fnv
, size
* sizeof(Char
*));
322 vl
= &(*fnv
)[size
- GLOBSPACE
];
329 if (vl
== &(*fnv
)[size
]) {
331 *fnv
= xrealloc(*fnv
, size
* sizeof(Char
*));
332 vl
= &(*fnv
)[size
- GLOBSPACE
];
342 * Step 2: expand braces
345 expbrace(fnv
, &el
, size
);
352 for (s
= *vl
; s
; s
= *++vl
)
359 if ((ns
= globequal(s
)) == NULL
) {
360 if (!adrof(STRnonomatch
))
361 stderror(ERR_DEEP
); /* Error */
364 /* Expansion succeeded */
375 * Step 4: expand .. if the variable symlinks==expand is set
377 if (symlinks
== SYM_EXPAND
) {
378 for (s
= *vl
; s
; s
= *++vl
) {
379 *vl
= dnormalize(s
, 1);
393 handleone(Char
*str
, Char
**vl
, int action
)
400 setname(short2str(str
));
402 stderror(ERR_NAME
| ERR_AMBIG
);
406 for (t
= vl
; (p
= *t
++) != NULL
; chars
++)
408 str
= xmalloc(chars
* sizeof(Char
));
409 for (t
= vl
, strp
= str
; (p
= *t
++) != NULL
; chars
++) {
411 *strp
++ = *p
++ & TRIM
;
418 str
= Strsave(strip(*vl
));
430 int gflgs
= GLOB_QUOTE
| GLOB_NOMAGIC
| GLOB_ALTNOT
;
433 int nonomatch
= adrof(STRnonomatch
) != 0, magic
= 0, match
= 0;
435 if (adrof(STRglobdot
))
438 if (adrof(STRglobstar
))
449 gflgs
|= GLOB_NOCHECK
;
452 ptr
= short2qstr(*vl
);
453 switch (glob(ptr
, gflgs
, 0, &globv
)) {
457 stderror(ERR_NAME
| ERR_GLOB
);
466 if (globv
.gl_flags
& GLOB_MAGCHAR
) {
467 match
|= (globv
.gl_matchc
!= 0);
470 gflgs
|= GLOB_APPEND
;
473 vl
= (globv
.gl_pathc
== 0 || (magic
&& !match
&& !nonomatch
)) ?
474 NULL
: blk2short(globv
.gl_pathv
);
480 globone(Char
*str
, int action
)
482 Char
*v
[2], **vl
, **vo
;
485 noglob
= adrof(STRnoglob
) != 0;
490 return (strip(Strsave(str
)));
494 * Expand back-quote, tilde and brace
496 vo
= globexpand(v
, noglob
);
497 if (noglob
|| (gflg
& G_GLOB
) == 0) {
501 cleanup_push(vo
, blk_cleanup
);
503 else if (noglob
|| (gflg
& G_GLOB
) == 0)
504 return (strip(Strsave(str
)));
516 setname(short2str(str
));
517 stderror(ERR_NAME
| ERR_NOMATCH
);
520 if (vl
&& vl
[0] == NULL
) {
523 return (Strsave(STRNULL
));
526 return (handleone(str
, vl
, action
));
536 globall(Char
**v
, int gflg
)
544 noglob
= adrof(STRnoglob
) != 0;
548 * Expand back-quote, tilde and brace
550 vl
= vo
= globexpand(v
, noglob
);
552 vl
= vo
= saveblk(v
);
554 if (!noglob
&& (gflg
& G_GLOB
)) {
555 cleanup_push(vo
, blk_cleanup
);
568 glob_all_or_error(Char
**v
)
574 v
= globall(v
, gflag
);
576 stderror(ERR_NAME
| ERR_NOMATCH
);
585 rscan(Char
**t
, void (*f
) (Char
))
589 while ((p
= *t
++) != NULL
)
599 while ((p
= *t
++) != NULL
)
601 #if INVALID_BYTE != 0
602 if ((*p
& INVALID_BYTE
) != INVALID_BYTE
) /* *p < INVALID_BYTE */
616 while ((p
= *t
++) != NULL
) {
617 if (*p
== '~' || *p
== '=')
619 else if (*p
== '{' &&
620 (p
[1] == '\0' || (p
[1] == '}' && p
[2] == '\0')))
627 * We do want to expand echo `echo '*'`, so we don't\
628 * use this piece of code anymore.
631 while (*p
&& *p
!= '`')
633 if (*p
) /* Quoted chars */
638 if (!*p
) /* The matching ` */
646 else if (symlinks
== SYM_EXPAND
&&
647 p
[1] && ISDOTDOT(p
) && (p
== *(t
-1) || *(p
-1) == '/') )
656 * Command substitute cp. If literal, then this is a substitution from a
657 * << redirection, and so we should not crunch blanks and tabs, separating
658 * words only at newlines.
661 dobackp(Char
*cp
, int literal
)
663 struct Strbuf word
= Strbuf_INIT
;
664 struct blk_buf bb
= BLK_BUF_INIT
;
667 cleanup_push(&bb
, bb_cleanup
);
668 cleanup_push(&word
, Strbuf_cleanup
);
670 for (lp
= cp
; *lp
!= '\0' && *lp
!= '`'; lp
++)
672 Strbuf_appendn(&word
, cp
, lp
- cp
);
676 for (rp
= lp
; *rp
&& *rp
!= '`'; rp
++)
685 stderror(ERR_UNMATCHED
, '`');
687 ep
= Strnsave(lp
, rp
- lp
);
688 cleanup_push(ep
, xfree
);
689 backeval(&bb
, &word
, ep
, literal
);
697 return bb_finish(&bb
);
702 backeval(struct blk_buf
*bb
, struct Strbuf
*word
, Char
*cp
, int literal
)
706 struct command faket
;
709 Char
*fakecom
[2], ibuf
[BUFSIZE
];
714 for (ip
= cp
; (*ip
& QUOTE
) != 0; ip
++)
716 quoted
= *ip
== '\0';
719 faket
.t_dtyp
= NODE_COMMAND
;
720 faket
.t_dflg
= F_BACKQ
;
724 faket
.t_dcom
= fakecom
;
725 fakecom
[0] = STRfakecom1
;
729 * We do the psave job to temporarily change the current job so that the
730 * following fork is considered a separate job. This is so that when
731 * backquotes are used in a builtin function that calls glob the "current
732 * job" is not corrupted. We only need one level of pushed jobs as long as
733 * we are sure to fork here.
736 cleanup_push(&faket
, psavejob_cleanup
); /* faket is only a marker */
739 * It would be nicer if we could integrate this redirection more with the
740 * routines in sh.sem.c by doing a fake execute on a builtin function that
744 cleanup_push(&pvec
[0], open_cleanup
);
745 cleanup_push(&pvec
[1], open_cleanup
);
746 if (pfork(&faket
, -1) == 0) {
752 (void) dmove(pvec
[1], 1);
753 (void) dmove(SHDIAG
, 2);
757 for (arginp
= cp
; *cp
; cp
++) {
759 if (is_set(STRcsubstnonl
) && (*cp
== '\n' || *cp
== '\r'))
764 * In the child ``forget'' everything about current aliases or
772 omark
= cleanup_push_mark();
775 struct wordent paraml1
;
784 cleanup_pop_mark(omark
);
794 (void) lex(¶ml1
);
795 cleanup_push(¶ml1
, lex_cleanup
);
799 t
= syntax(paraml1
.next
, ¶ml1
, 0);
800 cleanup_push(t
, syntax_cleanup
);
801 /* The F_BACKQ flag must set so the job output is correct if
802 * printexitvalue is set. If it's not set, the job output
803 * will have "Exit N" appended where N is the exit status. */
805 t
->t_dflg
= F_BACKQ
|F_NOFORK
;
809 signal(SIGTSTP
, SIG_IGN
);
812 signal(SIGTTIN
, SIG_IGN
);
815 signal(SIGTTOU
, SIG_IGN
);
817 execute(t
, -1, NULL
, NULL
, TRUE
);
819 cleanup_until(¶ml1
);
822 cleanup_until(&pvec
[1]);
831 icnt
= wide_read(pvec
[0], ibuf
, BUFSIZE
, 0);
841 #if defined(WINNT_NATIVE) || defined(__CYGWIN__)
844 #endif /* WINNT_NATIVE || __CYGWIN__ */
847 * Continue around the loop one more time, so that we can eat
848 * the last newline without terminating this word.
853 if (!quoted
&& (c
== ' ' || c
== '\t'))
856 if (c
== '\\' || quoted
)
858 Strbuf_append1(word
, c
);
861 * Unless at end-of-file, we will form a new word here if there were
862 * characters in the word, or in any case when we take text literally.
863 * If we didn't make empty words here when literal was set then we
864 * would lose blank lines.
866 if (c
!= 0 && (cnt
|| literal
))
871 cleanup_until(&pvec
[0]);
873 cleanup_until(&faket
); /* psavejob_cleanup(); */
877 pword(struct blk_buf
*bb
, struct Strbuf
*word
)
881 s
= Strbuf_finish(word
);
887 Gmatch(const Char
*string
, const Char
*pattern
)
889 return Gnmatch(string
, pattern
, NULL
);
893 Gnmatch(const Char
*string
, const Char
*pattern
, const Char
**endstr
)
896 const Char
*tstring
= string
;
897 int gpol
= 1, gres
= 0;
899 if (*pattern
== '^') {
904 fblk
= xmalloc(sizeof(Char
***));
905 *fblk
= xmalloc(GLOBSPACE
* sizeof(Char
*));
906 (*fblk
)[0] = Strsave(pattern
);
909 cleanup_push(fblk
, blk_indirect_cleanup
);
910 expbrace(fblk
, NULL
, GLOBSPACE
);
913 /* Exact matches only */
914 for (p
= *fblk
; *p
; p
++)
915 gres
|= t_pmatch(string
, *p
, &tstring
, 1) == 2 ? 1 : 0;
919 /* partial matches */
920 end
= Strend(string
);
921 for (p
= *fblk
; *p
; p
++)
922 if (t_pmatch(string
, *p
, &tstring
, 1) != 0) {
931 return(gres
== gpol
);
935 * Return 2 on exact match,
936 * Return 1 on substring match.
937 * Return 0 on no match.
938 * *estr will point to the end of the longest exact or substring match.
941 t_pmatch(const Char
*string
, const Char
*pattern
, const Char
**estr
, int cs
)
943 Char stringc
, patternc
, rangec
;
944 int match
, negate_range
;
945 const Char
*pestr
, *nstring
;
947 for (nstring
= string
;; string
= nstring
) {
948 stringc
= *nstring
++ & TRIM
;
949 patternc
= *pattern
++ & TRIM
;
953 return (stringc
== '\0' ? 2 : 1);
960 *estr
= Strend(string
);
966 switch(t_pmatch(string
, pattern
, estr
, cs
)) {
970 pestr
= *estr
;/*FIXME: does not guarantee longest match */
975 abort(); /* Cannot happen */
977 stringc
= *string
++ & TRIM
;
991 if ((negate_range
= (*pattern
== '^')) != 0)
993 while ((rangec
= *pattern
++ & TRIM
) != '\0') {
998 if (*pattern
== '-' && pattern
[1] != ']') {
1001 rangec2
= *pattern
++ & TRIM
;
1002 match
= (globcharcoll(stringc
, rangec2
, 0) <= 0 &&
1003 globcharcoll(rangec
, stringc
, 0) <= 0);
1006 match
= (stringc
== rangec
);
1009 stderror(ERR_NAME
| ERR_MISSING
, ']');
1010 if ((!match
) && (stringc
== '\0'))
1012 if (match
== negate_range
)
1016 if (cs
? patternc
!= stringc
1017 : Tolower(patternc
) != Tolower(stringc
))