2 * shell parser (C version)
9 int start_token
; /* token than began nesting (eg, FOR) */
10 int start_line
; /* line nesting began on */
13 static void yyparse(void);
14 static struct op
*pipeline(int);
15 static struct op
*andor(void);
16 static struct op
*c_list(int);
17 static struct ioword
*synio(int);
18 static void musthave(int, int);
19 static struct op
*nested(int, int, int);
20 static struct op
*get_command(int);
21 static struct op
*dogroup(void);
22 static struct op
*thenpart(void);
23 static struct op
*elsepart(void);
24 static struct op
*caselist(void);
25 static struct op
*casepart(int);
26 static struct op
*function_body(char *, int);
27 static char ** wordlist(void);
28 static struct op
*block(int, struct op
*, struct op
*, char **);
29 static struct op
*newtp(int);
30 static void syntaxerr(const char *) __attribute__((__noreturn__
));
31 static void nesting_push(struct nesting_state
*, int);
32 static void nesting_pop(struct nesting_state
*);
33 static int assign_command(char *);
34 static int inalias(struct source
*);
35 static int dbtestp_isa(Test_env
*, Test_meta
);
36 static const char *dbtestp_getopnd(Test_env
*, Test_op
, int);
37 static int dbtestp_eval(Test_env
*, Test_op
, const char *, const char *, int);
38 static void dbtestp_error(Test_env
*, int, const char *);
40 static struct op
*outtree
; /* yyparse output */
41 static struct nesting_state nesting
; /* \n changed to ; */
43 static int reject
; /* token(cf) gets symbol again */
44 static int symbol
; /* yylex value */
46 #define REJECT (reject = 1)
47 #define ACCEPT (reject = 0)
49 ((reject) ? (ACCEPT, symbol) : (symbol = yylex(cf)))
51 ((reject) ? (symbol) : (REJECT, symbol = yylex(cf)))
60 outtree
= c_list(source
->type
== SSTRING
);
62 if (c
== 0 && !outtree
)
63 outtree
= newtp(TEOF
);
64 else if (c
!= '\n' && c
!= 0)
65 syntaxerr((char *) 0);
71 struct op
*t
, *p
, *tl
= NULL
;
75 while (token(0) == '|') {
76 if ((p
= get_command(CONTIN
)) == NULL
)
77 syntaxerr((char *) 0);
79 t
= tl
= block(TPIPE
, t
, p
, NOWORDS
);
81 tl
= tl
->right
= block(TPIPE
, tl
->right
, p
, NOWORDS
);
96 while ((c
= token(0)) == LOGAND
|| c
== LOGOR
) {
97 if ((p
= pipeline(CONTIN
)) == NULL
)
98 syntaxerr((char *) 0);
99 t
= block(c
== LOGAND
? TAND
: TOR
, t
, p
, NOWORDS
);
109 struct op
*t
= NULL
, *p
, *tl
= NULL
;
115 /* Token has always been read/rejected at this point, so
116 * we don't worry about what flags to pass token()
120 if (c
== '\n' && (multi
|| inalias(source
))) {
121 if (!p
) /* ignore blank lines */
125 else if (c
== '&' || c
== COPROC
)
126 p
= block(c
== '&' ? TASYNC
: TCOPROC
,
127 p
, NOBLOCK
, NOWORDS
);
133 t
= tl
= block(TLIST
, t
, p
, NOWORDS
);
135 tl
= tl
->right
= block(TLIST
, tl
->right
, p
, NOWORDS
);
143 static struct ioword
*
149 if (tpeek(cf
) != REDIR
)
153 ishere
= (iop
->flag
&IOTYPE
) == IOHERE
;
154 musthave(LWORD
, ishere
? HEREDELIM
: 0);
156 iop
->delim
= yylval
.cp
;
157 if (*ident
!= 0) /* unquoted */
159 if (herep
>= &heres
[HERES
])
160 yyerror("too many <<'s\n");
163 iop
->name
= yylval
.cp
;
168 musthave(int c
, int cf
)
170 if ((token(cf
)) != c
)
171 syntaxerr((char *) 0);
175 nested(int type
, int smark
, int emark
)
178 struct nesting_state old_nesting
;
180 nesting_push(&old_nesting
, smark
);
182 musthave(emark
, KEYWORD
|ALIAS
);
183 nesting_pop(&old_nesting
);
184 return (block(type
, t
, NOBLOCK
, NOWORDS
));
191 int c
, iopn
= 0, syniocf
;
192 struct ioword
*iop
, **iops
;
194 struct nesting_state old_nesting
;
196 iops
= (struct ioword
**) alloc(sizeofN(struct ioword
*, NUFILE
+1),
201 syniocf
= KEYWORD
|ALIAS
;
202 switch (c
= token(cf
|KEYWORD
|ALIAS
|VARASN
)) {
205 afree((void*) iops
, ATEMP
);
208 return NULL
; /* empty line */
213 syniocf
&= ~(KEYWORD
|ALIAS
);
215 t
->lineno
= source
->line
;
217 cf
= (t
->u
.evalflags
? ARRAYVAR
: 0)
218 | (XPsize(args
) == 0 ? ALIAS
|VARASN
: CMDWORD
);
222 yyerror("too many redirections\n");
223 iops
[iopn
++] = synio(cf
);
228 /* the iopn == 0 and XPsize(vars) == 0 are
229 * dubious but at&t ksh acts this way
231 if (iopn
== 0 && XPsize(vars
) == 0
233 && assign_command(ident
))
234 t
->u
.evalflags
= DOVACHECK
;
235 if ((XPsize(args
) == 0 || Flag(FKEYWORD
))
236 && is_wdvarassign(yylval
.cp
))
237 XPput(vars
, yylval
.cp
);
239 XPput(args
, yylval
.cp
);
243 /* Check for "> foo (echo hi)", which at&t ksh
244 * allows (not POSIX, but not disallowed)
247 if (XPsize(args
) == 0 && XPsize(vars
) == 0) {
251 /* Must be a function */
252 if (iopn
!= 0 || XPsize(args
) != 1
253 || XPsize(vars
) != 0)
254 syntaxerr((char *) 0);
258 t
= function_body(XPptrv(args
)[0], false);
270 t
= nested(TPAREN
, '(', ')');
274 t
= nested(TBRACE
, '{', '}');
279 static const char let_cmd
[] = { CHAR
, 'l', CHAR
, 'e',
281 /* Leave KEYWORD in syniocf (allow if (( 1 )) then ...) */
283 t
->lineno
= source
->line
;
285 XPput(args
, wdcopy(let_cmd
, ATEMP
));
286 musthave(LWORD
,LETEXPR
);
287 XPput(args
, yylval
.cp
);
291 case DBRACKET
: /* [[ .. ]] */
292 /* Leave KEYWORD in syniocf (allow if [[ -n 1 ]] then ...) */
293 t
= newtp(TDBRACKET
);
298 te
.flags
= TEF_DBRACKET
;
300 te
.isa
= dbtestp_isa
;
301 te
.getopnd
= dbtestp_getopnd
;
302 te
.eval
= dbtestp_eval
;
303 te
.error
= dbtestp_error
;
311 t
= newtp((c
== FOR
) ? TFOR
: TSELECT
);
312 musthave(LWORD
, ARRAYVAR
);
313 if (!is_wdvarname(yylval
.cp
, true))
314 yyerror("%s: bad identifier\n",
315 c
== FOR
? "for" : "select");
316 t
->str
= str_save(ident
, ATEMP
);
317 nesting_push(&old_nesting
, c
);
318 t
->vars
= wordlist();
320 nesting_pop(&old_nesting
);
325 nesting_push(&old_nesting
, c
);
326 t
= newtp((c
== WHILE
) ? TWHILE
: TUNTIL
);
327 t
->left
= c_list(true);
328 t
->right
= dogroup();
329 nesting_pop(&old_nesting
);
336 nesting_push(&old_nesting
, c
);
337 t
->left
= caselist();
338 nesting_pop(&old_nesting
);
342 nesting_push(&old_nesting
, c
);
344 t
->left
= c_list(true);
345 t
->right
= thenpart();
346 musthave(FI
, KEYWORD
|ALIAS
);
347 nesting_pop(&old_nesting
);
351 syniocf
&= ~(KEYWORD
|ALIAS
);
353 if (t
== (struct op
*) 0)
354 syntaxerr((char *) 0);
355 t
= block(TBANG
, NOBLOCK
, t
, NOWORDS
);
359 syniocf
&= ~(KEYWORD
|ALIAS
);
361 t
= block(TTIME
, t
, NOBLOCK
, NOWORDS
);
366 t
= function_body(yylval
.cp
, true);
370 while ((iop
= synio(syniocf
)) != NULL
) {
372 yyerror("too many redirections\n");
377 afree((void*) iops
, ATEMP
);
381 iops
= (struct ioword
**) aresize((void*) iops
,
382 sizeofN(struct ioword
*, iopn
), ATEMP
);
386 if (t
->type
== TCOM
|| t
->type
== TDBRACKET
) {
388 t
->args
= (char **) XPclose(args
);
390 t
->vars
= (char **) XPclose(vars
);
405 c
= token(CONTIN
|KEYWORD
|ALIAS
);
406 /* A {...} can be used instead of do...done for for/select loops
407 * but not for while/until loops - we don't need to check if it
408 * is a while loop because it would have been parsed as part of
409 * the conditional command list...
416 syntaxerr((char *) 0);
418 musthave(c
, KEYWORD
|ALIAS
);
427 musthave(THEN
, KEYWORD
|ALIAS
);
429 t
->left
= c_list(true);
431 syntaxerr((char *) 0);
432 t
->right
= elsepart();
441 switch (token(KEYWORD
|ALIAS
|VARASN
)) {
443 if ((t
= c_list(true)) == NULL
)
444 syntaxerr((char *) 0);
449 t
->left
= c_list(true);
450 t
->right
= thenpart();
465 c
= token(CONTIN
|KEYWORD
|ALIAS
);
466 /* A {...} can be used instead of in...esac for case statements */
472 syntaxerr((char *) 0);
474 while ((tpeek(CONTIN
|KEYWORD
|ESACONLY
)) != c
) { /* no ALIAS here */
475 struct op
*tc
= casepart(c
);
477 t
= tl
= tc
, tl
->right
= NULL
;
479 tl
->right
= tc
, tl
= tc
;
481 musthave(c
, KEYWORD
|ALIAS
);
494 c
= token(CONTIN
|KEYWORD
); /* no ALIAS here */
499 XPput(ptns
, yylval
.cp
);
500 } while ((c
= token(0)) == '|');
503 t
->vars
= (char **) XPclose(ptns
);
506 t
->left
= c_list(true);
507 /* Note: Posix requires the ;; */
508 if ((tpeek(CONTIN
|KEYWORD
|ALIAS
)) != endtok
)
509 musthave(BREAK
, CONTIN
|KEYWORD
|ALIAS
);
514 function_body(char *name
,
515 int ksh_func
) /* function foo { ... } vs foo() { .. } */
521 sname
= wdstrip(name
);
522 /* Check for valid characters in name. posix and ksh93 say only
523 * allow [a-zA-Z_0-9] but this allows more as old pdksh's have
524 * allowed more (the following were never allowed:
525 * nul space nl tab $ ' " \ ` ( ) & | ; = < >
526 * C_QUOTE covers all but = and adds # [ ? *)
528 for (p
= sname
; *p
; p
++)
529 if (ctype(*p
, C_QUOTE
) || *p
== '=')
530 yyerror("%s: invalid function name\n", sname
);
534 t
->u
.ksh_func
= ksh_func
;
535 t
->lineno
= source
->line
;
537 /* Note that POSIX allows only compound statements after foo(), sh and
538 * at&t ksh allow any command, go with the later since it shouldn't
539 * break anything. However, for function foo, at&t ksh only accepts
543 musthave('{', CONTIN
|KEYWORD
|ALIAS
); /* } */
547 old_func_parse
= e
->flags
& EF_FUNC_PARSE
;
548 e
->flags
|= EF_FUNC_PARSE
;
549 if ((t
->left
= get_command(CONTIN
)) == (struct op
*) 0) {
551 * Probably something like foo() followed by eof or ;.
552 * This is accepted by sh and ksh88.
553 * To make "typset -f foo" work reliably (so its output can
554 * be used as input), we pretend there is a colon here.
556 t
->left
= newtp(TCOM
);
557 t
->left
->args
= (char **) alloc(sizeof(char *) * 2, ATEMP
);
558 t
->left
->args
[0] = alloc(sizeof(char) * 3, ATEMP
);
559 t
->left
->args
[0][0] = CHAR
;
560 t
->left
->args
[0][1] = ':';
561 t
->left
->args
[0][2] = EOS
;
562 t
->left
->args
[1] = (char *) 0;
563 t
->left
->vars
= (char **) alloc(sizeof(char *), ATEMP
);
564 t
->left
->vars
[0] = (char *) 0;
568 e
->flags
&= ~EF_FUNC_PARSE
;
580 /* Posix does not do alias expansion here... */
581 if ((c
= token(CONTIN
|KEYWORD
|ALIAS
)) != IN
) {
582 if (c
!= ';') /* non-POSIX, but at&t ksh accepts a ; here */
586 while ((c
= token(0)) == LWORD
)
587 XPput(args
, yylval
.cp
);
588 if (c
!= '\n' && c
!= ';')
589 syntaxerr((char *) 0);
590 if (XPsize(args
) == 0) {
595 return (char **) XPclose(args
);
600 * supporting functions
604 block(int type
, struct op
*t1
, struct op
*t2
, char **wp
)
615 const struct tokeninfo
{
622 { "then", THEN
, true },
623 { "else", ELSE
, true },
624 { "elif", ELIF
, true },
626 { "case", CASE
, true },
627 { "esac", ESAC
, true },
628 { "for", FOR
, true },
629 { "select", SELECT
, true },
630 { "while", WHILE
, true },
631 { "until", UNTIL
, true },
633 { "done", DONE
, true },
635 { "function", FUNCTION
, true },
636 { "time", TIME
, true },
640 { "[[", DBRACKET
, true },
641 /* Lexical tokens (0[EOF], LWORD and REDIR handled specially) */
642 { "&&", LOGAND
, false },
643 { "||", LOGOR
, false },
644 { ";;", BREAK
, false },
645 { "((", MDPAREN
, false },
646 { "|&", COPROC
, false },
647 /* and some special cases... */
648 { "newline", '\n', false },
655 struct tokeninfo
const *tt
;
658 ktinit(&keywords
, APERM
, 32); /* must be 2^n (currently 20 keywords) */
659 for (tt
= tokentab
; tt
->name
; tt
++) {
661 p
= ktenter(&keywords
, tt
->name
, hash(tt
->name
));
662 p
->flag
|= DEFINED
|ISSET
;
670 syntaxerr(const char *what
)
672 char redir
[6]; /* 2<<- is the longest redirection, I think */
674 struct tokeninfo
const *tt
;
684 if (nesting
.start_token
) {
685 c
= nesting
.start_token
;
686 source
->errline
= nesting
.start_line
;
690 /* don't quote the EOF */
691 yyerror("syntax error: unexpected EOF\n");
695 s
= snptreef((char *) 0, 32, "%S", yylval
.cp
);
699 s
= snptreef(redir
, sizeof(redir
), "%R", yylval
.iop
);
703 for (tt
= tokentab
; tt
->name
; tt
++)
709 if (c
> 0 && c
< 256) {
713 shf_snprintf(redir
, sizeof(redir
),
718 yyerror("syntax error: `%s' %s\n", s
, what
);
722 nesting_push(struct nesting_state
*save
, int tok
)
725 nesting
.start_token
= tok
;
726 nesting
.start_line
= source
->line
;
730 nesting_pop(struct nesting_state
*saved
)
740 t
= (struct op
*) alloc(sizeof(*t
), ATEMP
);
743 t
->args
= t
->vars
= NULL
;
745 t
->left
= t
->right
= NULL
;
753 nesting
.start_token
= 0;
754 nesting
.start_line
= 0;
761 /* This kludge exists to take care of sh/at&t ksh oddity in which
762 * the arguments of alias/export/readonly/typeset have no field
763 * splitting, file globbing, or (normal) tilde expansion done.
764 * at&t ksh seems to do something similar to this since
765 * $ touch a=a; typeset a=[ab]; echo "$a"
767 * $ x=typeset; $x a=[ab]; echo "$a"
772 assign_command(char *s
)
774 if (Flag(FPOSIX
) || !*s
)
776 return (strcmp(s
, "alias") == 0) ||
777 (strcmp(s
, "export") == 0) ||
778 (strcmp(s
, "readonly") == 0) ||
779 (strcmp(s
, "typeset") == 0);
782 /* Check if we are in the middle of reading an alias */
784 inalias(struct source
*s
)
786 for (; s
&& s
->type
== SALIAS
; s
= s
->next
)
787 if (!(s
->flags
& SF_ALIASEND
))
792 /* Order important - indexed by Test_meta values
793 * Note that ||, &&, ( and ) can't appear in as unquoted strings
794 * in normal shell input, so these can be interpreted unambiguously
795 * in the evaluation pass.
797 static const char dbtest_or
[] = { CHAR
, '|', CHAR
, '|', EOS
};
798 static const char dbtest_and
[] = { CHAR
, '&', CHAR
, '&', EOS
};
799 static const char dbtest_not
[] = { CHAR
, '!', EOS
};
800 static const char dbtest_oparen
[] = { CHAR
, '(', EOS
};
801 static const char dbtest_cparen
[] = { CHAR
, ')', EOS
};
802 const char *const dbtest_tokens
[] = {
803 dbtest_or
, dbtest_and
, dbtest_not
,
804 dbtest_oparen
, dbtest_cparen
806 const char db_close
[] = { CHAR
, ']', CHAR
, ']', EOS
};
807 const char db_lthan
[] = { CHAR
, '<', EOS
};
808 const char db_gthan
[] = { CHAR
, '>', EOS
};
810 /* Test if the current token is a whatever. Accepts the current token if
811 * it is. Returns 0 if it is not, non-zero if it is (in the case of
812 * TM_UNOP and TM_BINOP, the returned value is a Test_op).
815 dbtestp_isa(Test_env
*te
, Test_meta meta
)
817 int c
= tpeek(ARRAYVAR
| (meta
== TM_BINOP
? 0 : CONTIN
));
819 char *save
= (char *) 0;
823 uqword
= c
== LWORD
&& *ident
;
827 else if (meta
== TM_AND
)
829 else if (meta
== TM_NOT
)
830 ret
= uqword
&& strcmp(yylval
.cp
, dbtest_tokens
[(int) TM_NOT
]) == 0;
831 else if (meta
== TM_OPAREN
)
832 ret
= c
== '(' /*)*/;
833 else if (meta
== TM_CPAREN
)
834 ret
= c
== /*(*/ ')';
835 else if (meta
== TM_UNOP
|| meta
== TM_BINOP
) {
836 if (meta
== TM_BINOP
&& c
== REDIR
837 && (yylval
.iop
->flag
== IOREAD
838 || yylval
.iop
->flag
== IOWRITE
))
841 save
= wdcopy(yylval
.iop
->flag
== IOREAD
?
842 db_lthan
: db_gthan
, ATEMP
);
843 } else if (uqword
&& (ret
= (int) test_isop(te
, meta
, ident
)))
845 } else /* meta == TM_END */
846 ret
= uqword
&& strcmp(yylval
.cp
, db_close
) == 0;
849 if (meta
!= TM_END
) {
851 save
= wdcopy(dbtest_tokens
[(int) meta
], ATEMP
);
852 XPput(*te
->pos
.av
, save
);
859 dbtestp_getopnd(Test_env
*te
, Test_op op
, int do_eval
)
861 int c
= tpeek(ARRAYVAR
);
864 return (const char *) 0;
867 XPput(*te
->pos
.av
, yylval
.cp
);
873 dbtestp_eval(Test_env
*te
, Test_op op
, const char *opnd1
, const char *opnd2
,
880 dbtestp_error(Test_env
*te
, int offset
, const char *msg
)
882 te
->flags
|= TEF_ERROR
;
886 /* Kludgy to say the least... */
888 yylval
.cp
= *(XPptrv(*te
->pos
.av
) + XPsize(*te
->pos
.av
)