parser: Add VSBIT to ensure subtype is never zero
[dash.git] / src / parser.c
bloba552c477e99cd979d0f4838b974dcf3162fbc82d
1 /*-
2 * Copyright (c) 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 * Copyright (c) 1997-2005
5 * Herbert Xu <herbert@gondor.apana.org.au>. All rights reserved.
7 * This code is derived from software contributed to Berkeley by
8 * Kenneth Almquist.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
35 #if HAVE_ALLOCA_H
36 #include <alloca.h>
37 #endif
39 #include <stdlib.h>
41 #include "shell.h"
42 #include "parser.h"
43 #include "nodes.h"
44 #include "expand.h" /* defines rmescapes() */
45 #include "exec.h" /* defines find_builtin() */
46 #include "syntax.h"
47 #include "options.h"
48 #include "input.h"
49 #include "output.h"
50 #include "var.h"
51 #include "error.h"
52 #include "memalloc.h"
53 #include "mystring.h"
54 #include "alias.h"
55 #include "show.h"
56 #include "builtins.h"
57 #include "system.h"
58 #ifndef SMALL
59 #include "myhistedit.h"
60 #endif
63 * Shell command parser.
66 /* values returned by readtoken */
67 #include "token_vars.h"
71 /* Used by expandstr to get here-doc like behaviour. */
72 #define FAKEEOFMARK (char *)1
76 struct heredoc {
77 struct heredoc *next; /* next here document in list */
78 union node *here; /* redirection node */
79 char *eofmark; /* string indicating end of input */
80 int striptabs; /* if set, strip leading tabs */
83 struct synstack {
84 const char *syntax;
85 struct synstack *prev;
86 struct synstack *next;
87 int innerdq;
88 int varpushed;
89 int dblquote;
90 int varnest; /* levels of variables expansion */
91 int parenlevel; /* levels of parens in arithmetic */
92 int dqvarnest; /* levels of variables expansion within double quotes */
97 struct heredoc *heredoclist; /* list of here documents to read */
98 int doprompt; /* if set, prompt the user */
99 int needprompt; /* true if interactive and at start of line */
100 int lasttoken; /* last token read */
101 int tokpushback; /* last token pushed back */
102 char *wordtext; /* text of last word returned by readtoken */
103 int checkkwd;
104 struct nodelist *backquotelist;
105 union node *redirnode;
106 struct heredoc *heredoc;
107 int quoteflag; /* set if (part of) last token was quoted */
110 STATIC union node *list(int);
111 STATIC union node *andor(void);
112 STATIC union node *pipeline(void);
113 STATIC union node *command(void);
114 STATIC union node *simplecmd(void);
115 STATIC union node *makename(void);
116 STATIC void parsefname(void);
117 STATIC void parseheredoc(void);
118 STATIC int readtoken(void);
119 STATIC int xxreadtoken(void);
120 STATIC int pgetc_eatbnl();
121 STATIC int readtoken1(int, char const *, char *, int);
122 STATIC void synexpect(int) __attribute__((__noreturn__));
123 STATIC void synerror(const char *) __attribute__((__noreturn__));
124 STATIC void setprompt(int);
127 int isassignment(const char *p)
129 const char *q = endofname(p);
130 if (p == q)
131 return 0;
132 return *q == '=';
135 static inline int realeofmark(const char *eofmark)
137 return eofmark && eofmark != FAKEEOFMARK;
142 * Read and parse a command. Returns NEOF on end of file. (NULL is a
143 * valid parse tree indicating a blank line.)
146 union node *
147 parsecmd(int interact)
149 tokpushback = 0;
150 checkkwd = 0;
151 heredoclist = 0;
152 doprompt = interact;
153 if (doprompt)
154 setprompt(doprompt);
155 needprompt = 0;
156 return list(1);
160 STATIC union node *
161 list(int nlflag)
163 int chknl = nlflag & 1 ? 0 : CHKNL;
164 union node *n1, *n2, *n3;
165 int tok;
167 n1 = NULL;
168 for (;;) {
169 checkkwd = chknl | CHKKWD | CHKALIAS;
170 tok = readtoken();
171 switch (tok) {
172 case TNL:
173 parseheredoc();
174 return n1;
176 case TEOF:
177 if (!n1 && !chknl)
178 n1 = NEOF;
179 out_eof:
180 parseheredoc();
181 tokpushback++;
182 lasttoken = TEOF;
183 return n1;
186 tokpushback++;
187 if (nlflag == 2 && tokendlist[tok])
188 return n1;
189 nlflag |= 2;
191 n2 = andor();
192 tok = readtoken();
193 if (tok == TBACKGND) {
194 if (n2->type == NPIPE) {
195 n2->npipe.backgnd = 1;
196 } else {
197 if (n2->type != NREDIR) {
198 n3 = stalloc(sizeof(struct nredir));
199 n3->nredir.n = n2;
200 n3->nredir.redirect = NULL;
201 n2 = n3;
203 n2->type = NBACKGND;
206 if (n1 == NULL) {
207 n1 = n2;
209 else {
210 n3 = (union node *)stalloc(sizeof (struct nbinary));
211 n3->type = NSEMI;
212 n3->nbinary.ch1 = n1;
213 n3->nbinary.ch2 = n2;
214 n1 = n3;
216 switch (tok) {
217 case TEOF:
218 goto out_eof;
219 case TNL:
220 tokpushback++;
221 /* fall through */
222 case TBACKGND:
223 case TSEMI:
224 break;
225 default:
226 if (!chknl)
227 synexpect(-1);
228 tokpushback++;
229 return n1;
236 STATIC union node *
237 andor(void)
239 union node *n1, *n2, *n3;
240 int t;
242 n1 = pipeline();
243 for (;;) {
244 if ((t = readtoken()) == TAND) {
245 t = NAND;
246 } else if (t == TOR) {
247 t = NOR;
248 } else {
249 tokpushback++;
250 return n1;
252 checkkwd = CHKNL | CHKKWD | CHKALIAS;
253 n2 = pipeline();
254 n3 = (union node *)stalloc(sizeof (struct nbinary));
255 n3->type = t;
256 n3->nbinary.ch1 = n1;
257 n3->nbinary.ch2 = n2;
258 n1 = n3;
264 STATIC union node *
265 pipeline(void)
267 union node *n1, *n2, *pipenode;
268 struct nodelist *lp, *prev;
269 int negate;
271 negate = 0;
272 TRACE(("pipeline: entered\n"));
273 if (readtoken() == TNOT) {
274 negate = !negate;
275 checkkwd = CHKKWD | CHKALIAS;
276 } else
277 tokpushback++;
278 n1 = command();
279 if (readtoken() == TPIPE) {
280 pipenode = (union node *)stalloc(sizeof (struct npipe));
281 pipenode->type = NPIPE;
282 pipenode->npipe.backgnd = 0;
283 lp = (struct nodelist *)stalloc(sizeof (struct nodelist));
284 pipenode->npipe.cmdlist = lp;
285 lp->n = n1;
286 do {
287 prev = lp;
288 lp = (struct nodelist *)stalloc(sizeof (struct nodelist));
289 checkkwd = CHKNL | CHKKWD | CHKALIAS;
290 lp->n = command();
291 prev->next = lp;
292 } while (readtoken() == TPIPE);
293 lp->next = NULL;
294 n1 = pipenode;
296 tokpushback++;
297 if (negate) {
298 n2 = (union node *)stalloc(sizeof (struct nnot));
299 n2->type = NNOT;
300 n2->nnot.com = n1;
301 return n2;
302 } else
303 return n1;
308 STATIC union node *
309 command(void)
311 union node *n1, *n2;
312 union node *ap, **app;
313 union node *cp, **cpp;
314 union node *redir, **rpp;
315 union node **rpp2;
316 int t;
317 int savelinno;
319 redir = NULL;
320 rpp2 = &redir;
322 savelinno = plinno;
324 switch (readtoken()) {
325 default:
326 synexpect(-1);
327 /* NOTREACHED */
328 case TIF:
329 n1 = (union node *)stalloc(sizeof (struct nif));
330 n1->type = NIF;
331 n1->nif.test = list(0);
332 if (readtoken() != TTHEN)
333 synexpect(TTHEN);
334 n1->nif.ifpart = list(0);
335 n2 = n1;
336 while (readtoken() == TELIF) {
337 n2->nif.elsepart = (union node *)stalloc(sizeof (struct nif));
338 n2 = n2->nif.elsepart;
339 n2->type = NIF;
340 n2->nif.test = list(0);
341 if (readtoken() != TTHEN)
342 synexpect(TTHEN);
343 n2->nif.ifpart = list(0);
345 if (lasttoken == TELSE)
346 n2->nif.elsepart = list(0);
347 else {
348 n2->nif.elsepart = NULL;
349 tokpushback++;
351 t = TFI;
352 break;
353 case TWHILE:
354 case TUNTIL: {
355 int got;
356 n1 = (union node *)stalloc(sizeof (struct nbinary));
357 n1->type = (lasttoken == TWHILE)? NWHILE : NUNTIL;
358 n1->nbinary.ch1 = list(0);
359 if ((got=readtoken()) != TDO) {
360 TRACE(("expecting DO got %s %s\n", tokname[got], got == TWORD ? wordtext : ""));
361 synexpect(TDO);
363 n1->nbinary.ch2 = list(0);
364 t = TDONE;
365 break;
367 case TFOR:
368 if (readtoken() != TWORD || quoteflag || ! goodname(wordtext))
369 synerror("Bad for loop variable");
370 n1 = (union node *)stalloc(sizeof (struct nfor));
371 n1->type = NFOR;
372 n1->nfor.linno = savelinno;
373 n1->nfor.var = wordtext;
374 checkkwd = CHKNL | CHKKWD | CHKALIAS;
375 if (readtoken() == TIN) {
376 app = &ap;
377 while (readtoken() == TWORD) {
378 n2 = (union node *)stalloc(sizeof (struct narg));
379 n2->type = NARG;
380 n2->narg.text = wordtext;
381 n2->narg.backquote = backquotelist;
382 *app = n2;
383 app = &n2->narg.next;
385 *app = NULL;
386 n1->nfor.args = ap;
387 if (lasttoken != TNL && lasttoken != TSEMI)
388 synexpect(-1);
389 } else {
390 n2 = (union node *)stalloc(sizeof (struct narg));
391 n2->type = NARG;
392 n2->narg.text = (char *)dolatstr;
393 n2->narg.backquote = NULL;
394 n2->narg.next = NULL;
395 n1->nfor.args = n2;
397 * Newline or semicolon here is optional (but note
398 * that the original Bourne shell only allowed NL).
400 if (lasttoken != TSEMI)
401 tokpushback++;
403 checkkwd = CHKNL | CHKKWD | CHKALIAS;
404 if (readtoken() != TDO)
405 synexpect(TDO);
406 n1->nfor.body = list(0);
407 t = TDONE;
408 break;
409 case TCASE:
410 n1 = (union node *)stalloc(sizeof (struct ncase));
411 n1->type = NCASE;
412 n1->ncase.linno = savelinno;
413 if (readtoken() != TWORD)
414 synexpect(TWORD);
415 n1->ncase.expr = n2 = (union node *)stalloc(sizeof (struct narg));
416 n2->type = NARG;
417 n2->narg.text = wordtext;
418 n2->narg.backquote = backquotelist;
419 n2->narg.next = NULL;
420 checkkwd = CHKNL | CHKKWD | CHKALIAS;
421 if (readtoken() != TIN)
422 synexpect(TIN);
423 cpp = &n1->ncase.cases;
424 next_case:
425 checkkwd = CHKNL | CHKKWD;
426 t = readtoken();
427 while(t != TESAC) {
428 if (lasttoken == TLP)
429 readtoken();
430 *cpp = cp = (union node *)stalloc(sizeof (struct nclist));
431 cp->type = NCLIST;
432 app = &cp->nclist.pattern;
433 for (;;) {
434 *app = ap = (union node *)stalloc(sizeof (struct narg));
435 ap->type = NARG;
436 ap->narg.text = wordtext;
437 ap->narg.backquote = backquotelist;
438 if (readtoken() != TPIPE)
439 break;
440 app = &ap->narg.next;
441 readtoken();
443 ap->narg.next = NULL;
444 if (lasttoken != TRP)
445 synexpect(TRP);
446 cp->nclist.body = list(2);
448 cpp = &cp->nclist.next;
450 checkkwd = CHKNL | CHKKWD;
451 if ((t = readtoken()) != TESAC) {
452 if (t != TENDCASE)
453 synexpect(TENDCASE);
454 else
455 goto next_case;
458 *cpp = NULL;
459 goto redir;
460 case TLP:
461 n1 = (union node *)stalloc(sizeof (struct nredir));
462 n1->type = NSUBSHELL;
463 n1->nredir.linno = savelinno;
464 n1->nredir.n = list(0);
465 n1->nredir.redirect = NULL;
466 t = TRP;
467 break;
468 case TBEGIN:
469 n1 = list(0);
470 t = TEND;
471 break;
472 case TWORD:
473 case TREDIR:
474 tokpushback++;
475 return simplecmd();
478 if (readtoken() != t)
479 synexpect(t);
481 redir:
482 /* Now check for redirection which may follow command */
483 checkkwd = CHKKWD | CHKALIAS;
484 rpp = rpp2;
485 while (readtoken() == TREDIR) {
486 *rpp = n2 = redirnode;
487 rpp = &n2->nfile.next;
488 parsefname();
490 tokpushback++;
491 *rpp = NULL;
492 if (redir) {
493 if (n1->type != NSUBSHELL) {
494 n2 = (union node *)stalloc(sizeof (struct nredir));
495 n2->type = NREDIR;
496 n2->nredir.linno = savelinno;
497 n2->nredir.n = n1;
498 n1 = n2;
500 n1->nredir.redirect = redir;
503 return n1;
507 STATIC union node *
508 simplecmd(void) {
509 union node *args, **app;
510 union node *n = NULL;
511 union node *vars, **vpp;
512 union node **rpp, *redir;
513 int savecheckkwd;
514 int savelinno;
516 args = NULL;
517 app = &args;
518 vars = NULL;
519 vpp = &vars;
520 redir = NULL;
521 rpp = &redir;
523 savecheckkwd = CHKALIAS;
524 savelinno = plinno;
525 for (;;) {
526 checkkwd = savecheckkwd;
527 switch (readtoken()) {
528 case TWORD:
529 n = (union node *)stalloc(sizeof (struct narg));
530 n->type = NARG;
531 n->narg.text = wordtext;
532 n->narg.backquote = backquotelist;
533 if (savecheckkwd && isassignment(wordtext)) {
534 *vpp = n;
535 vpp = &n->narg.next;
536 } else {
537 *app = n;
538 app = &n->narg.next;
539 savecheckkwd = 0;
541 break;
542 case TREDIR:
543 *rpp = n = redirnode;
544 rpp = &n->nfile.next;
545 parsefname(); /* read name of redirection file */
546 break;
547 case TLP:
548 if (
549 args && app == &args->narg.next &&
550 !vars && !redir
552 struct builtincmd *bcmd;
553 const char *name;
555 /* We have a function */
556 if (readtoken() != TRP)
557 synexpect(TRP);
558 name = n->narg.text;
559 if (
560 !goodname(name) || (
561 (bcmd = find_builtin(name)) &&
562 bcmd->flags & BUILTIN_SPECIAL
565 synerror("Bad function name");
566 n->type = NDEFUN;
567 checkkwd = CHKNL | CHKKWD | CHKALIAS;
568 n->ndefun.text = n->narg.text;
569 n->ndefun.linno = plinno;
570 n->ndefun.body = command();
571 return n;
573 /* fall through */
574 default:
575 tokpushback++;
576 goto out;
579 out:
580 *app = NULL;
581 *vpp = NULL;
582 *rpp = NULL;
583 n = (union node *)stalloc(sizeof (struct ncmd));
584 n->type = NCMD;
585 n->ncmd.linno = savelinno;
586 n->ncmd.args = args;
587 n->ncmd.assign = vars;
588 n->ncmd.redirect = redir;
589 return n;
592 STATIC union node *
593 makename(void)
595 union node *n;
597 n = (union node *)stalloc(sizeof (struct narg));
598 n->type = NARG;
599 n->narg.next = NULL;
600 n->narg.text = wordtext;
601 n->narg.backquote = backquotelist;
602 return n;
605 void fixredir(union node *n, const char *text, int err)
607 TRACE(("Fix redir %s %d\n", text, err));
608 if (!err)
609 n->ndup.vname = NULL;
611 if (is_digit(text[0]) && text[1] == '\0')
612 n->ndup.dupfd = digit_val(text[0]);
613 else if (text[0] == '-' && text[1] == '\0')
614 n->ndup.dupfd = -1;
615 else {
617 if (err)
618 synerror("Bad fd number");
619 else
620 n->ndup.vname = makename();
625 STATIC void
626 parsefname(void)
628 union node *n = redirnode;
630 if (n->type == NHERE)
631 checkkwd = CHKEOFMARK;
632 if (readtoken() != TWORD)
633 synexpect(-1);
634 if (n->type == NHERE) {
635 struct heredoc *here = heredoc;
636 struct heredoc *p;
638 if (quoteflag == 0)
639 n->type = NXHERE;
640 TRACE(("Here document %d\n", n->type));
641 rmescapes(wordtext);
642 here->eofmark = wordtext;
643 here->next = NULL;
644 if (heredoclist == NULL)
645 heredoclist = here;
646 else {
647 for (p = heredoclist ; p->next ; p = p->next);
648 p->next = here;
650 } else if (n->type == NTOFD || n->type == NFROMFD) {
651 fixredir(n, wordtext, 0);
652 } else {
653 n->nfile.fname = makename();
659 * Input any here documents.
662 STATIC void
663 parseheredoc(void)
665 struct heredoc *here;
666 union node *n;
668 here = heredoclist;
669 heredoclist = 0;
671 while (here) {
672 if (needprompt) {
673 setprompt(2);
675 if (here->here->type == NHERE)
676 readtoken1(pgetc(), SQSYNTAX, here->eofmark, here->striptabs);
677 else
678 readtoken1(pgetc_eatbnl(), DQSYNTAX, here->eofmark, here->striptabs);
679 n = (union node *)stalloc(sizeof (struct narg));
680 n->narg.type = NARG;
681 n->narg.next = NULL;
682 n->narg.text = wordtext;
683 n->narg.backquote = backquotelist;
684 here->here->nhere.doc = n;
685 here = here->next;
689 STATIC int
690 readtoken(void)
692 int t;
693 int kwd = checkkwd;
694 #ifdef DEBUG
695 int alreadyseen = tokpushback;
696 #endif
698 top:
699 t = xxreadtoken();
702 * eat newlines
704 if (kwd & CHKNL) {
705 while (t == TNL) {
706 parseheredoc();
707 checkkwd = 0;
708 t = xxreadtoken();
712 kwd |= checkkwd;
713 checkkwd = 0;
715 if (t != TWORD || quoteflag) {
716 goto out;
720 * check for keywords
722 if (kwd & CHKKWD) {
723 const char *const *pp;
725 if ((pp = findkwd(wordtext))) {
726 lasttoken = t = pp - parsekwd + KWDOFFSET;
727 TRACE(("keyword %s recognized\n", tokname[t]));
728 goto out;
732 if (kwd & CHKALIAS) {
733 struct alias *ap;
734 if ((ap = lookupalias(wordtext, 1)) != NULL) {
735 if (*ap->val) {
736 pushstring(ap->val, ap);
738 goto top;
741 out:
742 #ifdef DEBUG
743 if (!alreadyseen)
744 TRACE(("token %s %s\n", tokname[t], t == TWORD ? wordtext : ""));
745 else
746 TRACE(("reread token %s %s\n", tokname[t], t == TWORD ? wordtext : ""));
747 #endif
748 return (t);
751 static void nlprompt(void)
753 plinno++;
754 if (doprompt)
755 setprompt(2);
758 static void nlnoprompt(void)
760 plinno++;
761 needprompt = doprompt;
766 * Read the next input token.
767 * If the token is a word, we set backquotelist to the list of cmds in
768 * backquotes. We set quoteflag to true if any part of the word was
769 * quoted.
770 * If the token is TREDIR, then we set redirnode to a structure containing
771 * the redirection.
773 * [Change comment: here documents and internal procedures]
774 * [Readtoken shouldn't have any arguments. Perhaps we should make the
775 * word parsing code into a separate routine. In this case, readtoken
776 * doesn't need to have any internal procedures, but parseword does.
777 * We could also make parseoperator in essence the main routine, and
778 * have parseword (readtoken1?) handle both words and redirection.]
781 #define RETURN(token) return lasttoken = token
783 STATIC int
784 xxreadtoken(void)
786 int c;
788 if (tokpushback) {
789 tokpushback = 0;
790 return lasttoken;
792 if (needprompt) {
793 setprompt(2);
795 for (;;) { /* until token or start of word found */
796 c = pgetc_eatbnl();
797 switch (c) {
798 case ' ': case '\t':
799 continue;
800 case '#':
801 while ((c = pgetc()) != '\n' && c != PEOF);
802 pungetc();
803 continue;
804 case '\n':
805 nlnoprompt();
806 RETURN(TNL);
807 case PEOF:
808 RETURN(TEOF);
809 case '&':
810 if (pgetc_eatbnl() == '&')
811 RETURN(TAND);
812 pungetc();
813 RETURN(TBACKGND);
814 case '|':
815 if (pgetc_eatbnl() == '|')
816 RETURN(TOR);
817 pungetc();
818 RETURN(TPIPE);
819 case ';':
820 if (pgetc_eatbnl() == ';')
821 RETURN(TENDCASE);
822 pungetc();
823 RETURN(TSEMI);
824 case '(':
825 RETURN(TLP);
826 case ')':
827 RETURN(TRP);
829 break;
831 return readtoken1(c, BASESYNTAX, (char *)NULL, 0);
832 #undef RETURN
835 static int pgetc_eatbnl(void)
837 int c;
839 while ((c = pgetc()) == '\\') {
840 if (pgetc() != '\n') {
841 pungetc();
842 break;
845 nlprompt();
848 return c;
851 static int pgetc_top(struct synstack *stack)
853 return stack->syntax == SQSYNTAX ? pgetc() : pgetc_eatbnl();
856 static void synstack_push(struct synstack **stack, struct synstack *next,
857 const char *syntax)
859 memset(next, 0, sizeof(*next));
860 next->syntax = syntax;
861 next->next = *stack;
862 (*stack)->prev = next;
863 *stack = next;
866 static void synstack_pop(struct synstack **stack)
868 *stack = (*stack)->next;
874 * If eofmark is NULL, read a word or a redirection symbol. If eofmark
875 * is not NULL, read a here document. In the latter case, eofmark is the
876 * word which marks the end of the document and striptabs is true if
877 * leading tabs should be stripped from the document. The argument firstc
878 * is the first character of the input token or document.
880 * Because C does not have internal subroutines, I have simulated them
881 * using goto's to implement the subroutine linkage. The following macros
882 * will run code that appears at the end of readtoken1.
885 #define CHECKEND() {goto checkend; checkend_return:;}
886 #define PARSEREDIR() {goto parseredir; parseredir_return:;}
887 #define PARSESUB() {goto parsesub; parsesub_return:;}
888 #define PARSEBACKQOLD() {oldstyle = 1; goto parsebackq; parsebackq_oldreturn:;}
889 #define PARSEBACKQNEW() {oldstyle = 0; goto parsebackq; parsebackq_newreturn:;}
890 #define PARSEARITH() {goto parsearith; parsearith_return:;}
892 STATIC int
893 readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs)
895 int c = firstc;
896 char *out;
897 size_t len;
898 struct nodelist *bqlist;
899 int quotef;
900 int oldstyle;
901 /* syntax stack */
902 struct synstack synbase = { .syntax = syntax };
903 struct synstack *synstack = &synbase;
905 if (syntax == DQSYNTAX)
906 synstack->dblquote = 1;
907 quotef = 0;
908 bqlist = NULL;
910 STARTSTACKSTR(out);
911 loop: { /* for each line, until end of word */
912 #if ATTY
913 if (c == '\034' && doprompt
914 && attyset() && ! equal(termval(), "emacs")) {
915 attyline();
916 if (synstack->syntax == BASESYNTAX)
917 return readtoken();
918 c = pgetc_top(synstack);
919 goto loop;
921 #endif
922 CHECKEND(); /* set c to PEOF if at end of here document */
923 for (;;) { /* until end of line or end of word */
924 CHECKSTRSPACE(4, out); /* permit 4 calls to USTPUTC */
925 switch(synstack->syntax[c]) {
926 case CNL: /* '\n' */
927 if (synstack->syntax == BASESYNTAX &&
928 !synstack->varnest)
929 goto endword; /* exit outer loop */
930 USTPUTC(c, out);
931 nlprompt();
932 c = pgetc_top(synstack);
933 goto loop; /* continue outer loop */
934 case CWORD:
935 USTPUTC(c, out);
936 break;
937 case CCTL:
938 if ((!eofmark) | synstack->dblquote |
939 synstack->varnest)
940 USTPUTC(CTLESC, out);
941 USTPUTC(c, out);
942 break;
943 /* backslash */
944 case CBACK:
945 c = pgetc();
946 if (c == PEOF) {
947 USTPUTC(CTLESC, out);
948 USTPUTC('\\', out);
949 pungetc();
950 } else {
951 if (
952 synstack->dblquote &&
953 c != '\\' && c != '`' &&
954 c != '$' && (
955 c != '"' ||
956 (eofmark != NULL &&
957 !synstack->varnest)
958 ) && (
959 c != '}' ||
960 !synstack->varnest
963 USTPUTC(CTLESC, out);
964 USTPUTC('\\', out);
966 USTPUTC(CTLESC, out);
967 USTPUTC(c, out);
968 quotef++;
970 break;
971 case CSQUOTE:
972 synstack->syntax = SQSYNTAX;
973 quotemark:
974 if (eofmark == NULL) {
975 USTPUTC(CTLQUOTEMARK, out);
977 break;
978 case CDQUOTE:
979 synstack->syntax = DQSYNTAX;
980 synstack->dblquote = 1;
981 toggledq:
982 if (synstack->varnest)
983 synstack->innerdq ^= 1;
984 goto quotemark;
985 case CENDQUOTE:
986 if (eofmark && !synstack->varnest) {
987 USTPUTC(c, out);
988 break;
991 if (synstack->dqvarnest == 0) {
992 synstack->syntax = BASESYNTAX;
993 synstack->dblquote = 0;
996 quotef++;
998 if (c == '"')
999 goto toggledq;
1001 goto quotemark;
1002 case CVAR: /* '$' */
1003 PARSESUB(); /* parse substitution */
1004 break;
1005 case CENDVAR: /* '}' */
1006 if (!synstack->innerdq &&
1007 synstack->varnest > 0) {
1008 if (!--synstack->varnest &&
1009 synstack->varpushed)
1010 synstack_pop(&synstack);
1011 else if (synstack->dqvarnest > 0)
1012 synstack->dqvarnest--;
1013 USTPUTC(CTLENDVAR, out);
1014 } else {
1015 USTPUTC(c, out);
1017 break;
1018 case CLP: /* '(' in arithmetic */
1019 synstack->parenlevel++;
1020 USTPUTC(c, out);
1021 break;
1022 case CRP: /* ')' in arithmetic */
1023 if (synstack->parenlevel > 0) {
1024 USTPUTC(c, out);
1025 --synstack->parenlevel;
1026 } else {
1027 if (pgetc_eatbnl() == ')') {
1028 USTPUTC(CTLENDARI, out);
1029 synstack_pop(&synstack);
1030 } else {
1032 * unbalanced parens
1033 * (don't 2nd guess - no error)
1035 pungetc();
1036 USTPUTC(')', out);
1039 break;
1040 case CBQUOTE: /* '`' */
1041 if (checkkwd & CHKEOFMARK) {
1042 USTPUTC('`', out);
1043 break;
1046 PARSEBACKQOLD();
1047 break;
1048 case CEOF:
1049 goto endword; /* exit outer loop */
1050 default:
1051 if (synstack->varnest == 0)
1052 goto endword; /* exit outer loop */
1053 USTPUTC(c, out);
1055 c = pgetc_top(synstack);
1058 endword:
1059 if (synstack->syntax == ARISYNTAX)
1060 synerror("Missing '))'");
1061 if (synstack->syntax != BASESYNTAX && eofmark == NULL)
1062 synerror("Unterminated quoted string");
1063 if (synstack->varnest != 0) {
1064 /* { */
1065 synerror("Missing '}'");
1067 USTPUTC('\0', out);
1068 len = out - (char *)stackblock();
1069 out = stackblock();
1070 if (eofmark == NULL) {
1071 if ((c == '>' || c == '<')
1072 && quotef == 0
1073 && len <= 2
1074 && (*out == '\0' || is_digit(*out))) {
1075 PARSEREDIR();
1076 return lasttoken = TREDIR;
1077 } else {
1078 pungetc();
1081 quoteflag = quotef;
1082 backquotelist = bqlist;
1083 grabstackblock(len);
1084 wordtext = out;
1085 return lasttoken = TWORD;
1086 /* end of readtoken routine */
1091 * Check to see whether we are at the end of the here document. When this
1092 * is called, c is set to the first character of the next input line. If
1093 * we are at the end of the here document, this routine sets the c to PEOF.
1096 checkend: {
1097 if (realeofmark(eofmark)) {
1098 int markloc;
1099 char *p;
1101 if (striptabs) {
1102 while (c == '\t')
1103 c = pgetc();
1106 markloc = out - (char *)stackblock();
1107 for (p = eofmark; STPUTC(c, out), *p; p++) {
1108 if (c != *p)
1109 goto more_heredoc;
1111 c = pgetc();
1114 if (c == '\n' || c == PEOF) {
1115 c = PEOF;
1116 nlnoprompt();
1117 } else {
1118 int len;
1120 more_heredoc:
1121 p = (char *)stackblock() + markloc + 1;
1122 len = out - p;
1124 if (len) {
1125 len -= c < 0;
1126 c = p[-1];
1128 if (len) {
1129 char *str;
1131 str = alloca(len + 1);
1132 *(char *)mempcpy(str, p, len) = 0;
1134 pushstring(str, NULL);
1139 STADJUST((char *)stackblock() + markloc - out, out);
1141 goto checkend_return;
1146 * Parse a redirection operator. The variable "out" points to a string
1147 * specifying the fd to be redirected. The variable "c" contains the
1148 * first character of the redirection operator.
1151 parseredir: {
1152 char fd = *out;
1153 union node *np;
1155 np = (union node *)stalloc(sizeof (struct nfile));
1156 if (c == '>') {
1157 np->nfile.fd = 1;
1158 c = pgetc_eatbnl();
1159 if (c == '>')
1160 np->type = NAPPEND;
1161 else if (c == '|')
1162 np->type = NCLOBBER;
1163 else if (c == '&')
1164 np->type = NTOFD;
1165 else {
1166 np->type = NTO;
1167 pungetc();
1169 } else { /* c == '<' */
1170 np->nfile.fd = 0;
1171 switch (c = pgetc_eatbnl()) {
1172 case '<':
1173 if (sizeof (struct nfile) != sizeof (struct nhere)) {
1174 np = (union node *)stalloc(sizeof (struct nhere));
1175 np->nfile.fd = 0;
1177 np->type = NHERE;
1178 heredoc = (struct heredoc *)stalloc(sizeof (struct heredoc));
1179 heredoc->here = np;
1180 if ((c = pgetc_eatbnl()) == '-') {
1181 heredoc->striptabs = 1;
1182 } else {
1183 heredoc->striptabs = 0;
1184 pungetc();
1186 break;
1188 case '&':
1189 np->type = NFROMFD;
1190 break;
1192 case '>':
1193 np->type = NFROMTO;
1194 break;
1196 default:
1197 np->type = NFROM;
1198 pungetc();
1199 break;
1202 if (fd != '\0')
1203 np->nfile.fd = digit_val(fd);
1204 redirnode = np;
1205 goto parseredir_return;
1210 * Parse a substitution. At this point, we have read the dollar sign
1211 * and nothing else.
1214 parsesub: {
1215 int subtype;
1216 int typeloc;
1217 char *p;
1218 static const char types[] = "}-+?=";
1220 c = pgetc_eatbnl();
1221 if (
1222 (checkkwd & CHKEOFMARK) ||
1223 (c != '(' && c != '{' && !is_name(c) && !is_special(c))
1225 USTPUTC('$', out);
1226 pungetc();
1227 } else if (c == '(') { /* $(command) or $((arith)) */
1228 if (pgetc_eatbnl() == '(') {
1229 PARSEARITH();
1230 } else {
1231 pungetc();
1232 PARSEBACKQNEW();
1234 } else {
1235 const char *newsyn = synstack->syntax;
1237 USTPUTC(CTLVAR, out);
1238 typeloc = out - (char *)stackblock();
1239 STADJUST(1, out);
1240 subtype = VSNORMAL;
1241 if (likely(c == '{')) {
1242 c = pgetc_eatbnl();
1243 subtype = 0;
1245 varname:
1246 if (is_name(c)) {
1247 do {
1248 STPUTC(c, out);
1249 c = pgetc_eatbnl();
1250 } while (is_in_name(c));
1251 } else if (is_digit(c)) {
1252 do {
1253 STPUTC(c, out);
1254 c = pgetc_eatbnl();
1255 } while ((subtype <= 0 || subtype >= VSLENGTH) &&
1256 is_digit(c));
1257 } else if (c != '}') {
1258 int cc = c;
1260 c = pgetc_eatbnl();
1262 if (!subtype && cc == '#') {
1263 subtype = VSLENGTH;
1265 if (c == '_' || isalnum(c))
1266 goto varname;
1268 cc = c;
1269 c = pgetc_eatbnl();
1270 if (cc == '}' || c != '}') {
1271 pungetc();
1272 subtype = 0;
1273 c = cc;
1274 cc = '#';
1278 if (!is_special(cc)) {
1279 if (subtype == VSLENGTH)
1280 subtype = 0;
1281 goto badsub;
1284 USTPUTC(cc, out);
1285 } else
1286 goto badsub;
1288 if (subtype == 0) {
1289 int cc = c;
1291 switch (c) {
1292 case ':':
1293 subtype = VSNUL;
1294 c = pgetc_eatbnl();
1295 /*FALLTHROUGH*/
1296 default:
1297 p = strchr(types, c);
1298 if (p == NULL)
1299 break;
1300 subtype |= p - types + VSNORMAL;
1301 break;
1302 case '%':
1303 case '#':
1304 subtype = c == '#' ? VSTRIMLEFT :
1305 VSTRIMRIGHT;
1306 c = pgetc_eatbnl();
1307 if (c == cc)
1308 subtype++;
1309 else
1310 pungetc();
1312 newsyn = BASESYNTAX;
1313 break;
1315 } else {
1316 if (subtype == VSLENGTH && c != '}')
1317 subtype = 0;
1318 badsub:
1319 pungetc();
1322 if (newsyn == ARISYNTAX)
1323 newsyn = DQSYNTAX;
1325 if ((newsyn != synstack->syntax || synstack->innerdq) &&
1326 subtype != VSNORMAL) {
1327 synstack_push(&synstack,
1328 synstack->prev ?:
1329 alloca(sizeof(*synstack)),
1330 newsyn);
1332 synstack->varpushed++;
1333 synstack->dblquote = newsyn != BASESYNTAX;
1336 *((char *)stackblock() + typeloc) = subtype | VSBIT;
1337 if (subtype != VSNORMAL) {
1338 synstack->varnest++;
1339 if (synstack->dblquote)
1340 synstack->dqvarnest++;
1342 STPUTC('=', out);
1344 goto parsesub_return;
1349 * Called to parse command substitutions. Newstyle is set if the command
1350 * is enclosed inside $(...); nlpp is a pointer to the head of the linked
1351 * list of commands (passed by reference), and savelen is the number of
1352 * characters on the top of the stack which must be preserved.
1355 parsebackq: {
1356 struct nodelist **nlpp;
1357 union node *n;
1358 char *str;
1359 size_t savelen;
1360 struct heredoc *saveheredoclist;
1361 int uninitialized_var(saveprompt);
1363 str = NULL;
1364 savelen = out - (char *)stackblock();
1365 if (savelen > 0) {
1366 str = alloca(savelen);
1367 memcpy(str, stackblock(), savelen);
1369 if (oldstyle) {
1370 /* We must read until the closing backquote, giving special
1371 treatment to some slashes, and then push the string and
1372 reread it as input, interpreting it normally. */
1373 char *pout;
1374 int pc;
1375 size_t psavelen;
1376 char *pstr;
1379 STARTSTACKSTR(pout);
1380 for (;;) {
1381 if (needprompt) {
1382 setprompt(2);
1384 switch (pc = pgetc_eatbnl()) {
1385 case '`':
1386 goto done;
1388 case '\\':
1389 pc = pgetc();
1390 if (pc != '\\' && pc != '`' && pc != '$'
1391 && (!synstack->dblquote || pc != '"'))
1392 STPUTC('\\', pout);
1393 break;
1395 case PEOF:
1396 synerror("EOF in backquote substitution");
1398 case '\n':
1399 nlnoprompt();
1400 break;
1402 default:
1403 break;
1405 STPUTC(pc, pout);
1407 done:
1408 STPUTC('\0', pout);
1409 psavelen = pout - (char *)stackblock();
1410 if (psavelen > 0) {
1411 pstr = grabstackstr(pout);
1412 setinputstring(pstr);
1415 nlpp = &bqlist;
1416 while (*nlpp)
1417 nlpp = &(*nlpp)->next;
1418 *nlpp = (struct nodelist *)stalloc(sizeof (struct nodelist));
1419 (*nlpp)->next = NULL;
1421 saveheredoclist = heredoclist;
1422 heredoclist = NULL;
1424 if (oldstyle) {
1425 saveprompt = doprompt;
1426 doprompt = 0;
1429 n = list(2);
1431 if (oldstyle)
1432 doprompt = saveprompt;
1433 else {
1434 if (readtoken() != TRP)
1435 synexpect(TRP);
1436 setinputstring(nullstr);
1439 parseheredoc();
1440 heredoclist = saveheredoclist;
1442 (*nlpp)->n = n;
1443 /* Start reading from old file again. */
1444 popfile();
1445 /* Ignore any pushed back tokens left from the backquote parsing. */
1446 if (oldstyle)
1447 tokpushback = 0;
1448 out = growstackto(savelen + 1);
1449 if (str) {
1450 memcpy(out, str, savelen);
1451 STADJUST(savelen, out);
1453 USTPUTC(CTLBACKQ, out);
1454 if (oldstyle)
1455 goto parsebackq_oldreturn;
1456 else
1457 goto parsebackq_newreturn;
1461 * Parse an arithmetic expansion (indicate start of one and set state)
1463 parsearith: {
1465 synstack_push(&synstack,
1466 synstack->prev ?: alloca(sizeof(*synstack)),
1467 ARISYNTAX);
1468 synstack->dblquote = 1;
1469 USTPUTC(CTLARI, out);
1470 goto parsearith_return;
1473 } /* end of readtoken */
1477 #ifdef mkinit
1478 INCLUDE "parser.h"
1479 #endif
1483 * Return of a legal variable name (a letter or underscore followed by zero or
1484 * more letters, underscores, and digits).
1487 char *
1488 endofname(const char *name)
1490 char *p;
1492 p = (char *) name;
1493 if (! is_name(*p))
1494 return p;
1495 while (*++p) {
1496 if (! is_in_name(*p))
1497 break;
1499 return p;
1504 * Called when an unexpected token is read during the parse. The argument
1505 * is the token that is expected, or -1 if more than one type of token can
1506 * occur at this point.
1509 STATIC void
1510 synexpect(int token)
1512 char msg[64];
1514 if (token >= 0) {
1515 fmtstr(msg, 64, "%s unexpected (expecting %s)",
1516 tokname[lasttoken], tokname[token]);
1517 } else {
1518 fmtstr(msg, 64, "%s unexpected", tokname[lasttoken]);
1520 synerror(msg);
1521 /* NOTREACHED */
1525 STATIC void
1526 synerror(const char *msg)
1528 errlinno = plinno;
1529 sh_error("Syntax error: %s", msg);
1530 /* NOTREACHED */
1533 STATIC void
1534 setprompt(int which)
1536 struct stackmark smark;
1537 int show;
1539 needprompt = 0;
1540 whichprompt = which;
1542 #ifdef SMALL
1543 show = 1;
1544 #else
1545 show = !el;
1546 #endif
1547 if (show) {
1548 pushstackmark(&smark, stackblocksize());
1549 out2str(getprompt(NULL));
1550 popstackmark(&smark);
1554 const char *
1555 expandstr(const char *ps)
1557 struct parsefile *file_stop;
1558 struct jmploc *volatile savehandler;
1559 struct heredoc *saveheredoclist;
1560 const char *result;
1561 int saveprompt;
1562 struct jmploc jmploc;
1563 union node n;
1564 int err;
1566 file_stop = parsefile;
1568 /* XXX Fix (char *) cast. */
1569 setinputstring((char *)ps);
1571 saveheredoclist = heredoclist;
1572 heredoclist = NULL;
1573 saveprompt = doprompt;
1574 doprompt = 0;
1575 result = ps;
1576 savehandler = handler;
1577 if (unlikely(err = setjmp(jmploc.loc)))
1578 goto out;
1579 handler = &jmploc;
1581 readtoken1(pgetc_eatbnl(), DQSYNTAX, FAKEEOFMARK, 0);
1583 n.narg.type = NARG;
1584 n.narg.next = NULL;
1585 n.narg.text = wordtext;
1586 n.narg.backquote = backquotelist;
1588 expandarg(&n, NULL, EXP_QUOTED);
1589 result = stackblock();
1591 out:
1592 restore_handler_expandarg(savehandler, err);
1594 doprompt = saveprompt;
1595 unwindfiles(file_stop);
1596 heredoclist = saveheredoclist;
1598 return result;
1602 * called by editline -- any expansions to the prompt
1603 * should be added here.
1605 const char *
1606 getprompt(void *unused)
1608 const char *prompt;
1610 switch (whichprompt) {
1611 default:
1612 #ifdef DEBUG
1613 return "<internal prompt error>";
1614 #endif
1615 case 0:
1616 return nullstr;
1617 case 1:
1618 prompt = ps1val();
1619 break;
1620 case 2:
1621 prompt = ps2val();
1622 break;
1625 return expandstr(prompt);
1628 const char *const *
1629 findkwd(const char *s)
1631 return findstring(
1632 s, parsekwd, sizeof(parsekwd) / sizeof(const char *)