1 /* $Header: /src/pub/tcsh/sh.glob.c,v 3.54 2002/07/04 19:28:29 christos Exp $ */
3 * sh.glob.c: Regular expression expansion
6 * Copyright (c) 1980, 1991 The Regents of the University of California.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 RCSID("$Id: sh.glob.c,v 3.54 2002/07/04 19:28:29 christos Exp $")
43 static int pargsiz
, gargsiz
;
48 #define G_NONE 0 /* No globbing needed */
49 #define G_GLOB 1 /* string contains *?[] characters */
50 #define G_CSH 2 /* string contains ~`{ characters */
52 #define GLOBSPACE 100 /* Alloc increment */
53 #define LONGBSIZE 10240 /* Backquote expansion buffer size */
68 * globbing is now done in two stages. In the first pass we expand
69 * csh globbing idioms ~`{ and then we proceed doing the normal
70 * globbing if needed ?*[
72 * Csh type globbing is handled in globexpand() and the rest is
73 * handled in glob() which is part of the 4.4BSD libc.
76 static Char
*globtilde
__P((Char
**, Char
*));
77 static Char
*handleone
__P((Char
*, Char
**, int));
78 static Char
**libglob
__P((Char
**));
79 static Char
**globexpand
__P((Char
**));
80 static int globbrace
__P((Char
*, Char
*, Char
***));
81 static void expbrace
__P((Char
***, Char
***, int));
82 static int pmatch
__P((Char
*, Char
*, Char
**));
83 static void pword
__P((int));
84 static void psave
__P((int));
85 static void backeval
__P((Char
*, bool));
91 Char gbuf
[BUFSIZE
], *gstart
, *b
, *u
, *e
;
99 for (b
= gstart
, e
= &gbuf
[BUFSIZE
- 1];
100 *s
&& *s
!= '/' && *s
!= ':' && b
< e
;
104 if (gethdir(gstart
)) {
105 if (adrof(STRnonomatch
))
109 stderror(ERR_UNKUSER
, short2str(gstart
));
111 stderror(ERR_NOHOME
);
113 b
= &gstart
[Strlen(gstart
)];
115 slash
= gstart
[0] == '/' && gstart
[1] == '\0';
123 if (slash
&& gstart
[1] == '/')
126 return (Strsave(gstart
));
137 * kfk - 17 Jan 1984 - stack hack allows user to get at arbitrary dir names
138 * in stack. PWP: let =foobar pass through (for X windows)
140 if (old
[1] == '-' && (old
[2] == '\0' || old
[2] == '/')) {
145 else if (Isdigit(old
[1])) {
148 for (b
= &old
[2]; Isdigit(*b
); b
++)
149 dig
= dig
* 10 + (*b
- '0');
150 if (*b
!= '\0' && *b
!= '/')
151 /* =<number>foobar */
158 if (!getstakd(new, dig
))
161 /* Copy the rest of the string */
162 for (d
= &new[Strlen(new)];
163 d
< &new[BUFSIZE
- 1] && (*d
++ = *b
++) != '\0';)
175 Char
*pm
, *pe
, *lm
, *pl
;
178 int size
= GLOBSPACE
;
180 nv
= vl
= (Char
**) xmalloc((size_t) (sizeof(Char
*) * size
));
184 /* copy part up to the brace */
185 for (lm
= gbuf
, p
= s
; *p
!= LBRC
; *lm
++ = *p
++)
188 /* check for balanced braces */
189 for (i
= 0, pe
= ++p
; *pe
; pe
++)
191 if (Ismbyte1(*pe
) && *(pe
+ 1) != EOS
)
194 #endif /* DSPMBYTE */
196 /* Ignore everything between [] */
197 for (++pe
; *pe
!= RBRK
&& *pe
!= EOS
; pe
++)
199 if (Ismbyte1(*pe
) && *(pe
+ 1) != EOS
)
202 #endif /* DSPMBYTE */
209 else if (*pe
== LBRC
)
211 else if (*pe
== RBRC
) {
217 if (i
!= 0 || *pe
== '\0') {
222 for (i
= 0, pl
= pm
= p
; pm
<= pe
; pm
++)
224 if (Ismbyte1(*pm
) && pm
+ 1 <= pe
)
227 #endif /* DSPMBYTE */
230 for (++pm
; *pm
!= RBRK
&& *pm
!= EOS
; pm
++)
232 if (Ismbyte1(*pm
) && *(pm
+ 1) != EOS
)
235 #endif /* DSPMBYTE */
259 (void) Strcpy(lm
, pl
);
260 (void) Strcat(gbuf
, pe
+ 1);
262 *vl
++ = Strsave(gbuf
);
265 if (vl
== &nv
[size
]) {
267 nv
= (Char
**) xrealloc((ptr_t
) nv
,
268 (size_t) (size
* sizeof(Char
*)));
269 vl
= &nv
[size
- GLOBSPACE
];
283 expbrace(nvp
, elp
, size
)
287 Char
**vl
, **el
, **nv
, *s
;
293 for (el
= vl
; *el
; el
++)
296 for (s
= *vl
; s
; s
= *++vl
) {
300 /* leave {} untouched for find */
301 if (s
[0] == '{' && (s
[1] == '\0' || (s
[1] == '}' && s
[2] == '\0')))
303 if ((b
= Strchr(s
, '{')) != NULL
) {
307 #if defined (DSPMBYTE)
308 if (b
!= s
&& Ismbyte2(*b
) && Ismbyte1(*(b
-1))) {
309 /* The "{" is the 2nd byte of a MB character */
312 #endif /* DSPMBYTE */
313 if ((len
= globbrace(s
, b
, &bl
)) < 0) {
315 stderror(ERR_MISSING
, -len
);
323 if (&el
[len
] >= &nv
[size
]) {
325 l
= (int) (&el
[len
] - &nv
[size
]);
326 size
+= GLOBSPACE
> l
? GLOBSPACE
: l
;
329 nv
= (Char
**) xrealloc((ptr_t
) nv
,
330 (size_t) (size
* sizeof(Char
*)));
343 for (bp
= el
; bp
!= vp
; bp
--)
353 for (bp
= bl
+ 1; *bp
; *vp
++ = *bp
++)
369 Char
**nv
, **vl
, **el
;
370 int size
= GLOBSPACE
;
373 nv
= vl
= (Char
**) xmalloc((size_t) (sizeof(Char
*) * size
));
377 * Step 1: expand backquotes.
379 while ((s
= *v
++) != '\0') {
380 if (Strchr(s
, '`')) {
383 (void) dobackp(s
, 0);
384 for (i
= 0; i
< pargc
; i
++) {
386 if (vl
== &nv
[size
]) {
388 nv
= (Char
**) xrealloc((ptr_t
) nv
,
389 (size_t) (size
* sizeof(Char
*)));
390 vl
= &nv
[size
- GLOBSPACE
];
393 xfree((ptr_t
) pargv
);
398 if (vl
== &nv
[size
]) {
400 nv
= (Char
**) xrealloc((ptr_t
) nv
,
401 (size_t) (size
* sizeof(Char
*)));
402 vl
= &nv
[size
- GLOBSPACE
];
412 * Step 2: expand braces
415 expbrace(&nv
, &el
, size
);
422 for (s
= *vl
; s
; s
= *++vl
)
424 Char gp
[BUFSIZE
], *ns
;
426 *vl
= globtilde(nv
, s
);
429 if ((ns
= globequal(gp
, s
)) == NULL
) {
430 if (!adrof(STRnonomatch
)) {
437 /* Expansion succeeded */
448 * Step 4: expand .. if the variable symlinks==expand is set
450 if (symlinks
== SYM_EXPAND
) {
451 for (s
= *vl
; s
; s
= *++vl
) {
452 *vl
= dnormalize(s
, 1);
462 handleone(str
, vl
, action
)
473 setname(short2str(str
));
475 stderror(ERR_NAME
| ERR_AMBIG
);
479 for (t
= vlp
; (p
= *t
++) != '\0'; chars
++)
482 str
= (Char
*)xmalloc((size_t)(chars
* sizeof(Char
)));
483 for (t
= vlp
, strp
= str
; (p
= *t
++) != '\0'; chars
++) {
485 *strp
++ = *p
++ & TRIM
;
492 str
= Strsave(strip(*vlp
));
505 int gflgs
= GLOB_QUOTE
| GLOB_NOMAGIC
| GLOB_ALTNOT
;
508 int nonomatch
= adrof(STRnonomatch
) != 0, magic
= 0, match
= 0;
518 gflgs
|= GLOB_NOCHECK
;
521 ptr
= short2qstr(*vl
);
522 switch (glob(ptr
, gflgs
, 0, &globv
)) {
526 stderror(ERR_NAME
| ERR_GLOB
);
535 if (globv
.gl_flags
& GLOB_MAGCHAR
) {
536 match
|= (globv
.gl_matchc
!= 0);
539 gflgs
|= GLOB_APPEND
;
542 vl
= (globv
.gl_pathc
== 0 || (magic
&& !match
&& !nonomatch
)) ?
543 NULL
: blk2short(globv
.gl_pathv
);
554 Char
*v
[2], **vl
, **vo
;
557 noglob
= adrof(STRnoglob
) != 0;
564 return (strip(Strsave(str
)));
568 * Expand back-quote, tilde and brace
571 if (noglob
|| (gflg
& G_GLOB
) == 0) {
574 return (Strsave(STRNULL
));
577 return (handleone(str
, vo
, action
));
585 else if (noglob
|| (gflg
& G_GLOB
) == 0)
586 return (strip(Strsave(str
)));
591 if ((gflg
& G_CSH
) && vl
!= vo
)
594 setname(short2str(str
));
595 stderror(ERR_NAME
| ERR_NOMATCH
);
599 return (Strsave(STRNULL
));
602 return (handleone(str
, vl
, action
));
619 gargc
= blklen(gargv
);
623 noglob
= adrof(STRnoglob
) != 0;
627 * Expand back-quote, tilde and brace
629 vl
= vo
= globexpand(v
);
631 vl
= vo
= saveblk(v
);
633 if (!noglob
&& (gflg
& G_GLOB
)) {
641 gargc
= vl
? blklen(vl
) : 0;
649 gargv
= (Char
**) xmalloc((size_t) (sizeof(Char
*) * gargsiz
));
657 void (*f
) __P((int));
661 while ((p
= *t
++) != '\0')
672 while ((p
= *t
++) != '\0')
681 register Char
*p
, *c
;
683 while ((p
= *t
++) != '\0') {
684 if (*p
== '~' || *p
== '=')
686 else if (*p
== '{' &&
687 (p
[1] == '\0' || (p
[1] == '}' && p
[2] == '\0')))
690 * The following line used to be *(c = p++), but hp broke their
691 * optimizer in 9.01, so we break the assignment into two pieces
692 * The careful reader here will note that *most* compiler workarounds
693 * in tcsh are either for apollo/DomainOS or hpux. Is it a coincidence?
695 while ( *(c
= p
) != '\0') {
701 * We do want to expand echo `echo '*'`, so we don't\
702 * use this piece of code anymore.
704 while (*p
&& *p
!= '`')
706 if (*p
) /* Quoted chars */
711 if (*p
) /* The matching ` */
721 else if (symlinks
== SYM_EXPAND
&&
722 *p
&& ISDOTDOT(c
) && (c
== *(t
-1) || *(c
-1) == '/') )
729 * Command substitute cp. If literal, then this is a substitution from a
730 * << redirection, and so we should not crunch blanks and tabs, separating
731 * words only at newlines.
738 register Char
*lp
, *rp
;
739 Char
*ep
, word
[LONGBSIZE
];
748 pargv
= (Char
**) xmalloc((size_t) (sizeof(Char
*) * pargsiz
));
750 pargcp
= pargs
= word
;
752 pnleft
= LONGBSIZE
- 4;
754 #if defined(DSPMBYTE)
755 for (lp
= cp
;; lp
++) { /* } */
757 (lp
-1 < cp
|| !Ismbyte2(*lp
) || !Ismbyte1(*(lp
-1)))) {
761 for (lp
= cp
; *lp
!= '`'; lp
++) {
762 #endif /* DSPMBYTE */
771 for (rp
= lp
; *rp
&& *rp
!= '`'; rp
++)
778 oops
: stderror(ERR_UNMATCHED
, '`');
781 backeval(ep
, literal
);
788 backeval(cp
, literal
)
792 register int icnt
, c
;
794 struct command faket
;
797 Char
*fakecom
[2], ibuf
[BUFSIZE
];
802 quoted
= (literal
|| (cp
[0] & QUOTE
)) ? QUOTE
: 0;
803 faket
.t_dtyp
= NODE_COMMAND
;
804 faket
.t_dflg
= F_BACKQ
;
808 faket
.t_dcom
= fakecom
;
809 fakecom
[0] = STRfakecom1
;
813 * We do the psave job to temporarily change the current job so that the
814 * following fork is considered a separate job. This is so that when
815 * backquotes are used in a builtin function that calls glob the "current
816 * job" is not corrupted. We only need one level of pushed jobs as long as
817 * we are sure to fork here.
822 * It would be nicer if we could integrate this redirection more with the
823 * routines in sh.sem.c by doing a fake execute on a builtin function that
827 if (pfork(&faket
, -1) == 0) {
830 (void) close(pvec
[0]);
831 (void) dmove(pvec
[1], 1);
832 (void) dmove(SHDIAG
, 2);
836 * Bugfix for nested backquotes by Michael Greim <greim@sbsvax.UUCP>,
837 * posted to comp.bugs.4bsd 12 Sep. 1989.
839 if (pargv
) /* mg, 21.dec.88 */
840 blkfree(pargv
), pargv
= 0, pargsiz
= 0;
843 for (arginp
= cp
; *cp
; cp
++) {
845 if (*cp
== '\n' || *cp
== '\r')
850 * In the child ``forget'' everything about current aliases or
861 t
= syntax(paraml
.next
, ¶ml
, 0);
865 t
->t_dflg
|= F_NOFORK
;
867 (void) sigignore(SIGTSTP
);
870 (void) sigignore(SIGTTIN
);
873 (void) sigignore(SIGTTOU
);
875 execute(t
, -1, NULL
, NULL
, TRUE
);
879 (void) close(pvec
[1]);
891 icnt
= read(pvec
[0], tibuf
, BUFSIZE
);
892 while (icnt
== -1 && errno
== EINTR
);
897 for (i
= 0; i
< icnt
; i
++)
898 ip
[i
] = (unsigned char) tibuf
[i
];
909 #endif /* WINNT_NATIVE */
912 * Continue around the loop one more time, so that we can eat
913 * the last newline without terminating this word.
918 if (!quoted
&& (c
== ' ' || c
== '\t'))
924 * Unless at end-of-file, we will form a new word here if there were
925 * characters in the word, or in any case when we take text literally.
926 * If we didn't make empty words here when literal was set then we
927 * would lose blank lines.
929 if (c
!= -1 && (cnt
|| literal
))
933 (void) close(pvec
[0]);
943 stderror(ERR_WTOOLONG
);
944 *pargcp
++ = (Char
) c
;
952 if (pargc
== pargsiz
- 1) {
953 pargsiz
+= GLOBSPACE
;
954 pargv
= (Char
**) xrealloc((ptr_t
) pargv
,
955 (size_t) (pargsiz
* sizeof(Char
*)));
957 pargv
[pargc
++] = Strsave(pargs
);
964 Gmatch(string
, pattern
)
965 Char
*string
, *pattern
;
967 return Gnmatch(string
, pattern
, NULL
);
971 Gnmatch(string
, pattern
, endstr
)
972 Char
*string
, *pattern
, **endstr
;
974 Char
**blk
, **p
, *tstring
= string
;
975 int gpol
= 1, gres
= 0;
977 if (*pattern
== '^') {
982 blk
= (Char
**) xmalloc((size_t) (GLOBSPACE
* sizeof(Char
*)));
983 blk
[0] = Strsave(pattern
);
986 expbrace(&blk
, NULL
, GLOBSPACE
);
989 /* Exact matches only */
990 for (p
= blk
; *p
; p
++)
991 gres
|= pmatch(string
, *p
, &tstring
) == 2 ? 1 : 0;
993 /* partial matches */
994 int minc
= 0x7fffffff;
995 for (p
= blk
; *p
; p
++)
996 if (pmatch(string
, *p
, &tstring
) != 0) {
997 int t
= (int) (tstring
- string
);
999 if (minc
== -1 || minc
> t
)
1002 *endstr
= string
+ minc
;
1006 return(gres
== gpol
);
1010 * Return 2 on exact match,
1011 * Return 1 on substring match.
1012 * Return 0 on no match.
1013 * *estr will point to the end of the longest exact or substring match.
1016 pmatch(string
, pattern
, estr
)
1017 register Char
*string
, *pattern
, **estr
;
1019 register Char stringc
, patternc
;
1020 int match
, negate_range
;
1021 Char rangec
, *oestr
, *pestr
;
1024 stringc
= *string
& TRIM
;
1026 * apollo compiler bug: switch (patternc = *pattern++) dies
1028 patternc
= *pattern
++;
1032 return (stringc
== 0 ? 2 : 1);
1040 while (*string
) string
++;
1048 switch(pmatch(string
, pattern
, estr
)) {
1057 abort(); /* Cannot happen */
1074 if ((negate_range
= (*pattern
== '^')) != 0)
1076 while ((rangec
= *pattern
++) != '\0') {
1081 if (rangec
== '-' && *(pattern
-2) != '[' && *pattern
!= ']') {
1082 match
= (globcharcoll(stringc
, *pattern
& TRIM
) <= 0 &&
1083 globcharcoll(*(pattern
-2) & TRIM
, stringc
) <= 0);
1087 match
= (stringc
== (rangec
& TRIM
));
1090 stderror(ERR_NAME
| ERR_MISSING
, ']');
1091 if (match
== negate_range
)
1096 if ((patternc
& TRIM
) != stringc
)
1108 register Char
*p
, *q
;
1115 n
= (int) ((p
- s1
) + (q
- s2
) - 1);
1116 if (++gargc
>= gargsiz
) {
1117 gargsiz
+= GLOBSPACE
;
1118 gargv
= (Char
**) xrealloc((ptr_t
) gargv
,
1119 (size_t) (gargsiz
* sizeof(Char
*)));
1122 p
= gargv
[gargc
- 1] = (Char
*) xmalloc((size_t) (n
* sizeof(Char
)));
1123 for (q
= s1
; (*p
++ = *q
++) != '\0';)
1125 for (p
--, q
= s2
; (*p
++ = *q
++) != '\0';)
1129 #if defined(FILEC) && defined(TIOCSTI)
1132 register Char
**a
, **b
;
1134 if (!a
) /* check for NULL */
1139 if (!*a
) /* check for NULL */
1140 return (*b
? 1 : 0);
1144 return (int) collate(*a
, *b
);