1 /* $OpenBSD: syn.c,v 1.39 2018/04/24 08:25:16 kn Exp $ */
4 * shell parser (C version)
12 struct nesting_state
{
13 int start_token
; /* token than began nesting (eg, FOR) */
14 int start_line
; /* line nesting began on */
17 static void yyparse(void);
18 static struct op
*pipeline(int);
19 static struct op
*andor(void);
20 static struct op
*c_list(int);
21 static struct ioword
*synio(int);
22 static void musthave(int, int);
23 static struct op
*nested(int, int, int);
24 static struct op
*get_command(int);
25 static struct op
*dogroup(void);
26 static struct op
*thenpart(void);
27 static struct op
*elsepart(void);
28 static struct op
*caselist(void);
29 static struct op
*casepart(int);
30 static struct op
*function_body(char *, int);
31 static char ** wordlist(void);
32 static struct op
*block(int, struct op
*, struct op
*, char **);
33 static struct op
*newtp(int);
34 static void syntaxerr(const char *) __attribute__((__noreturn__
));
35 static void nesting_push(struct nesting_state
*, int);
36 static void nesting_pop(struct nesting_state
*);
37 static int assign_command(char *);
38 static int inalias(struct source
*);
39 static int dbtestp_isa(Test_env
*, Test_meta
);
40 static const char *dbtestp_getopnd(Test_env
*, Test_op
, int);
41 static int dbtestp_eval(Test_env
*, Test_op
, const char *, const char *,
43 static void dbtestp_error(Test_env
*, int, const char *);
45 static struct op
*outtree
; /* yyparse output */
47 static struct nesting_state nesting
; /* \n changed to ; */
49 static int reject
; /* token(cf) gets symbol again */
50 static int symbol
; /* yylex value */
53 ((reject) ? (reject = false, symbol) : (symbol = yylex(cf)))
55 ((reject) ? (symbol) : (reject = true, symbol = yylex(cf)))
64 outtree
= c_list(source
->type
== SSTRING
);
66 if (c
== 0 && !outtree
)
67 outtree
= newtp(TEOF
);
68 else if (c
!= '\n' && c
!= 0)
75 struct op
*t
, *p
, *tl
= NULL
;
79 while (token(0) == '|') {
80 if ((p
= get_command(CONTIN
)) == NULL
)
83 t
= tl
= block(TPIPE
, t
, p
, NULL
);
85 tl
= tl
->right
= block(TPIPE
, tl
->right
, p
, NULL
);
100 while ((c
= token(0)) == LOGAND
|| c
== LOGOR
) {
101 if ((p
= pipeline(CONTIN
)) == NULL
)
103 t
= block(c
== LOGAND
? TAND
: TOR
, t
, p
, NULL
);
113 struct op
*t
= NULL
, *p
, *tl
= NULL
;
119 /* Token has always been read/rejected at this point, so
120 * we don't worry about what flags to pass token()
124 if (c
== '\n' && (multi
|| inalias(source
))) {
125 if (!p
) /* ignore blank lines */
129 else if (c
== '&' || c
== COPROC
)
130 p
= block(c
== '&' ? TASYNC
: TCOPROC
,
137 t
= tl
= block(TLIST
, t
, p
, NULL
);
139 tl
= tl
->right
= block(TLIST
, tl
->right
, p
, NULL
);
147 static struct ioword
*
153 if (tpeek(cf
) != REDIR
)
157 ishere
= (iop
->flag
&IOTYPE
) == IOHERE
;
158 musthave(LWORD
, ishere
? HEREDELIM
: 0);
160 iop
->delim
= yylval
.cp
;
161 if (*ident
!= 0) /* unquoted */
163 if (herep
>= &heres
[HERES
])
164 yyerror("too many <<'s\n");
167 iop
->name
= yylval
.cp
;
172 musthave(int c
, int cf
)
174 if ((token(cf
)) != c
)
179 nested(int type
, int smark
, int emark
)
182 struct nesting_state old_nesting
;
184 nesting_push(&old_nesting
, smark
);
186 musthave(emark
, KEYWORD
|ALIAS
);
187 nesting_pop(&old_nesting
);
188 return (block(type
, t
, NULL
, NULL
));
195 int c
, iopn
= 0, syniocf
;
196 struct ioword
*iop
, **iops
;
198 struct nesting_state old_nesting
;
200 iops
= areallocarray(NULL
, NUFILE
+ 1,
201 sizeof(struct ioword
*), ATEMP
);
205 syniocf
= KEYWORD
|ALIAS
;
206 switch (c
= token(cf
|KEYWORD
|ALIAS
|VARASN
)) {
212 return NULL
; /* empty line */
217 syniocf
&= ~(KEYWORD
|ALIAS
);
219 t
->lineno
= source
->line
;
221 cf
= (t
->u
.evalflags
? ARRAYVAR
: 0) |
222 (XPsize(args
) == 0 ? ALIAS
|VARASN
: CMDWORD
);
226 yyerror("too many redirections\n");
227 iops
[iopn
++] = synio(cf
);
232 /* the iopn == 0 and XPsize(vars) == 0 are
233 * dubious but at&t ksh acts this way
235 if (iopn
== 0 && XPsize(vars
) == 0 &&
237 assign_command(ident
))
238 t
->u
.evalflags
= DOVACHECK
;
239 if ((XPsize(args
) == 0 || Flag(FKEYWORD
)) &&
240 is_wdvarassign(yylval
.cp
))
241 XPput(vars
, yylval
.cp
);
243 XPput(args
, yylval
.cp
);
247 /* Check for "> foo (echo hi)", which at&t ksh
248 * allows (not POSIX, but not disallowed)
251 if (XPsize(args
) == 0 && XPsize(vars
) == 0) {
255 /* Must be a function */
256 if (iopn
!= 0 || XPsize(args
) != 1 ||
262 t
= function_body(XPptrv(args
)[0], false);
274 t
= nested(TPAREN
, '(', ')');
278 t
= nested(TBRACE
, '{', '}');
283 static const char let_cmd
[] = {
284 CHAR
, 'l', CHAR
, 'e',
287 /* Leave KEYWORD in syniocf (allow if (( 1 )) then ...) */
289 t
->lineno
= source
->line
;
291 XPput(args
, wdcopy(let_cmd
, ATEMP
));
292 musthave(LWORD
,LETEXPR
);
293 XPput(args
, yylval
.cp
);
297 case DBRACKET
: /* [[ .. ]] */
298 /* Leave KEYWORD in syniocf (allow if [[ -n 1 ]] then ...) */
299 t
= newtp(TDBRACKET
);
304 te
.flags
= TEF_DBRACKET
;
306 te
.isa
= dbtestp_isa
;
307 te
.getopnd
= dbtestp_getopnd
;
308 te
.eval
= dbtestp_eval
;
309 te
.error
= dbtestp_error
;
317 t
= newtp((c
== FOR
) ? TFOR
: TSELECT
);
318 musthave(LWORD
, ARRAYVAR
);
319 if (!is_wdvarname(yylval
.cp
, true))
320 yyerror("%s: bad identifier\n",
321 c
== FOR
? "for" : "select");
322 t
->str
= str_save(ident
, ATEMP
);
323 nesting_push(&old_nesting
, c
);
324 t
->vars
= wordlist();
326 nesting_pop(&old_nesting
);
331 nesting_push(&old_nesting
, c
);
332 t
= newtp((c
== WHILE
) ? TWHILE
: TUNTIL
);
333 t
->left
= c_list(true);
334 t
->right
= dogroup();
335 nesting_pop(&old_nesting
);
342 nesting_push(&old_nesting
, c
);
343 t
->left
= caselist();
344 nesting_pop(&old_nesting
);
348 nesting_push(&old_nesting
, c
);
350 t
->left
= c_list(true);
351 t
->right
= thenpart();
352 musthave(FI
, KEYWORD
|ALIAS
);
353 nesting_pop(&old_nesting
);
357 syniocf
&= ~(KEYWORD
|ALIAS
);
361 t
= block(TBANG
, NULL
, t
, NULL
);
365 syniocf
&= ~(KEYWORD
|ALIAS
);
369 t
->str
= str_save(t
->str
, ATEMP
);
371 t
->str
= alloc(2, ATEMP
);
372 t
->str
[0] = '\0'; /* TF_* flags */
376 t
= block(TTIME
, t
, NULL
, NULL
);
381 t
= function_body(yylval
.cp
, true);
385 while ((iop
= synio(syniocf
)) != NULL
) {
387 yyerror("too many redirections\n");
396 iops
= areallocarray(iops
, iopn
,
397 sizeof(struct ioword
*), ATEMP
);
401 if (t
->type
== TCOM
|| t
->type
== TDBRACKET
) {
403 t
->args
= (char **) XPclose(args
);
405 t
->vars
= (char **) XPclose(vars
);
420 c
= token(CONTIN
|KEYWORD
|ALIAS
);
421 /* A {...} can be used instead of do...done for for/select loops
422 * but not for while/until loops - we don't need to check if it
423 * is a while loop because it would have been parsed as part of
424 * the conditional command list...
433 musthave(c
, KEYWORD
|ALIAS
);
442 musthave(THEN
, KEYWORD
|ALIAS
);
444 t
->left
= c_list(true);
447 t
->right
= elsepart();
456 switch (token(KEYWORD
|ALIAS
|VARASN
)) {
458 if ((t
= c_list(true)) == NULL
)
464 t
->left
= c_list(true);
465 t
->right
= thenpart();
480 c
= token(CONTIN
|KEYWORD
|ALIAS
);
481 /* A {...} can be used instead of in...esac for case statements */
489 while ((tpeek(CONTIN
|KEYWORD
|ESACONLY
)) != c
) { /* no ALIAS here */
490 struct op
*tc
= casepart(c
);
492 t
= tl
= tc
, tl
->right
= NULL
;
494 tl
->right
= tc
, tl
= tc
;
496 musthave(c
, KEYWORD
|ALIAS
);
509 c
= token(CONTIN
|KEYWORD
); /* no ALIAS here */
514 XPput(ptns
, yylval
.cp
);
515 } while ((c
= token(0)) == '|');
518 t
->vars
= (char **) XPclose(ptns
);
521 t
->left
= c_list(true);
522 /* Note: Posix requires the ;; */
523 if ((tpeek(CONTIN
|KEYWORD
|ALIAS
)) != endtok
)
524 musthave(BREAK
, CONTIN
|KEYWORD
|ALIAS
);
529 function_body(char *name
,
530 int ksh_func
) /* function foo { ... } vs foo() { .. } */
536 sname
= wdstrip(name
);
537 /* Check for valid characters in name. posix and ksh93 say only
538 * allow [a-zA-Z_0-9] but this allows more as old pdksh's have
539 * allowed more (the following were never allowed:
540 * nul space nl tab $ ' " \ ` ( ) & | ; = < >
541 * C_QUOTE covers all but = and adds # [ ? *)
543 for (p
= sname
; *p
; p
++)
544 if (ctype(*p
, C_QUOTE
) || *p
== '=')
545 yyerror("%s: invalid function name\n", sname
);
549 t
->u
.ksh_func
= ksh_func
;
550 t
->lineno
= source
->line
;
552 /* Note that POSIX allows only compound statements after foo(), sh and
553 * at&t ksh allow any command, go with the later since it shouldn't
554 * break anything. However, for function foo, at&t ksh only accepts
558 musthave('{', CONTIN
|KEYWORD
|ALIAS
); /* } */
562 old_func_parse
= genv
->flags
& EF_FUNC_PARSE
;
563 genv
->flags
|= EF_FUNC_PARSE
;
564 if ((t
->left
= get_command(CONTIN
)) == NULL
) {
566 * Probably something like foo() followed by eof or ;.
567 * This is accepted by sh and ksh88.
568 * To make "typeset -f foo" work reliably (so its output can
569 * be used as input), we pretend there is a colon here.
571 t
->left
= newtp(TCOM
);
572 t
->left
->args
= areallocarray(NULL
, 2, sizeof(char *), ATEMP
);
573 t
->left
->args
[0] = alloc(3, ATEMP
);
574 t
->left
->args
[0][0] = CHAR
;
575 t
->left
->args
[0][1] = ':';
576 t
->left
->args
[0][2] = EOS
;
577 t
->left
->args
[1] = NULL
;
578 t
->left
->vars
= alloc(sizeof(char *), ATEMP
);
579 t
->left
->vars
[0] = NULL
;
583 genv
->flags
&= ~EF_FUNC_PARSE
;
595 /* Posix does not do alias expansion here... */
596 if ((c
= token(CONTIN
|KEYWORD
|ALIAS
)) != IN
) {
597 if (c
!= ';') /* non-POSIX, but at&t ksh accepts a ; here */
601 while ((c
= token(0)) == LWORD
)
602 XPput(args
, yylval
.cp
);
603 if (c
!= '\n' && c
!= ';')
606 return (char **) XPclose(args
);
610 * supporting functions
614 block(int type
, struct op
*t1
, struct op
*t2
, char **wp
)
625 const struct tokeninfo
{
632 { "then", THEN
, true },
633 { "else", ELSE
, true },
634 { "elif", ELIF
, true },
636 { "case", CASE
, true },
637 { "esac", ESAC
, true },
638 { "for", FOR
, true },
639 { "select", SELECT
, true },
640 { "while", WHILE
, true },
641 { "until", UNTIL
, true },
643 { "done", DONE
, true },
645 { "function", FUNCTION
, true },
646 { "time", TIME
, true },
650 { "[[", DBRACKET
, true },
651 /* Lexical tokens (0[EOF], LWORD and REDIR handled specially) */
652 { "&&", LOGAND
, false },
653 { "||", LOGOR
, false },
654 { ";;", BREAK
, false },
655 { "((", MDPAREN
, false },
656 { "|&", COPROC
, false },
657 /* and some special cases... */
658 { "newline", '\n', false },
665 struct tokeninfo
const *tt
;
668 ktinit(&keywords
, APERM
, 32); /* must be 2^n (currently 20 keywords) */
669 for (tt
= tokentab
; tt
->name
; tt
++) {
671 p
= ktenter(&keywords
, tt
->name
, hash(tt
->name
));
672 p
->flag
|= DEFINED
|ISSET
;
680 syntaxerr(const char *what
)
682 char redir
[6]; /* 2<<- is the longest redirection, I think */
684 struct tokeninfo
const *tt
;
694 if (nesting
.start_token
) {
695 c
= nesting
.start_token
;
696 source
->errline
= nesting
.start_line
;
700 /* don't quote the EOF */
701 yyerror("syntax error: unexpected EOF\n");
705 s
= snptreef(NULL
, 32, "%S", yylval
.cp
);
709 s
= snptreef(redir
, sizeof(redir
), "%R", yylval
.iop
);
713 for (tt
= tokentab
; tt
->name
; tt
++)
719 if (c
> 0 && c
< 256) {
723 shf_snprintf(redir
, sizeof(redir
),
728 yyerror("syntax error: `%s' %s\n", s
, what
);
732 nesting_push(struct nesting_state
*save
, int tok
)
735 nesting
.start_token
= tok
;
736 nesting
.start_line
= source
->line
;
740 nesting_pop(struct nesting_state
*saved
)
750 t
= alloc(sizeof(*t
), ATEMP
);
753 t
->args
= t
->vars
= NULL
;
755 t
->left
= t
->right
= NULL
;
763 nesting
.start_token
= 0;
764 nesting
.start_line
= 0;
771 /* This kludge exists to take care of sh/at&t ksh oddity in which
772 * the arguments of alias/export/readonly/typeset have no field
773 * splitting, file globbing, or (normal) tilde expansion done.
774 * at&t ksh seems to do something similar to this since
775 * $ touch a=a; typeset a=[ab]; echo "$a"
777 * $ x=typeset; $x a=[ab]; echo "$a"
782 assign_command(char *s
)
784 if (Flag(FPOSIX
) || !*s
)
786 return (strcmp(s
, "alias") == 0) ||
787 (strcmp(s
, "export") == 0) ||
788 (strcmp(s
, "readonly") == 0) ||
789 (strcmp(s
, "typeset") == 0);
792 /* Check if we are in the middle of reading an alias */
794 inalias(struct source
*s
)
796 for (; s
&& s
->type
== SALIAS
; s
= s
->next
)
797 if (!(s
->flags
& SF_ALIASEND
))
803 /* Order important - indexed by Test_meta values
804 * Note that ||, &&, ( and ) can't appear in as unquoted strings
805 * in normal shell input, so these can be interpreted unambiguously
806 * in the evaluation pass.
808 static const char dbtest_or
[] = { CHAR
, '|', CHAR
, '|', EOS
};
809 static const char dbtest_and
[] = { CHAR
, '&', CHAR
, '&', EOS
};
810 static const char dbtest_not
[] = { CHAR
, '!', EOS
};
811 static const char dbtest_oparen
[] = { CHAR
, '(', EOS
};
812 static const char dbtest_cparen
[] = { CHAR
, ')', EOS
};
813 const char *const dbtest_tokens
[] = {
814 dbtest_or
, dbtest_and
, dbtest_not
,
815 dbtest_oparen
, dbtest_cparen
817 const char db_close
[] = { CHAR
, ']', CHAR
, ']', EOS
};
818 const char db_lthan
[] = { CHAR
, '<', EOS
};
819 const char db_gthan
[] = { CHAR
, '>', EOS
};
821 /* Test if the current token is a whatever. Accepts the current token if
822 * it is. Returns 0 if it is not, non-zero if it is (in the case of
823 * TM_UNOP and TM_BINOP, the returned value is a Test_op).
826 dbtestp_isa(Test_env
*te
, Test_meta meta
)
828 int c
= tpeek(ARRAYVAR
| (meta
== TM_BINOP
? 0 : CONTIN
));
834 uqword
= c
== LWORD
&& *ident
;
838 else if (meta
== TM_AND
)
840 else if (meta
== TM_NOT
)
841 ret
= uqword
&& strcmp(yylval
.cp
, dbtest_tokens
[(int) TM_NOT
]) == 0;
842 else if (meta
== TM_OPAREN
)
843 ret
= c
== '(' /*)*/;
844 else if (meta
== TM_CPAREN
)
845 ret
= c
== /*(*/ ')';
846 else if (meta
== TM_UNOP
|| meta
== TM_BINOP
) {
847 if (meta
== TM_BINOP
&& c
== REDIR
&&
848 (yylval
.iop
->flag
== IOREAD
|| yylval
.iop
->flag
== IOWRITE
)) {
850 save
= wdcopy(yylval
.iop
->flag
== IOREAD
?
851 db_lthan
: db_gthan
, ATEMP
);
852 } else if (uqword
&& (ret
= (int) test_isop(te
, meta
, ident
)))
854 } else /* meta == TM_END */
855 ret
= uqword
&& strcmp(yylval
.cp
, db_close
) == 0;
858 if (meta
!= TM_END
) {
860 save
= wdcopy(dbtest_tokens
[(int) meta
], ATEMP
);
861 XPput(*te
->pos
.av
, save
);
868 dbtestp_getopnd(Test_env
*te
, Test_op op
, int do_eval
)
870 int c
= tpeek(ARRAYVAR
);
876 XPput(*te
->pos
.av
, yylval
.cp
);
882 dbtestp_eval(Test_env
*te
, Test_op op
, const char *opnd1
, const char *opnd2
,
889 dbtestp_error(Test_env
*te
, int offset
, const char *msg
)
891 te
->flags
|= TEF_ERROR
;
895 /* Kludgy to say the least... */
897 yylval
.cp
= *(XPptrv(*te
->pos
.av
) + XPsize(*te
->pos
.av
) +