[PARSER] Catch variable length expansions on non-existant specials
[dash.git] / src / parser.c
blob382658e7ea9c21dd6a41eee4d766e716a3fcb190
1 /*-
2 * Copyright (c) 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 * Copyright (c) 1997-2005
5 * Herbert Xu <herbert@gondor.apana.org.au>. All rights reserved.
7 * This code is derived from software contributed to Berkeley by
8 * Kenneth Almquist.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
35 #if HAVE_ALLOCA_H
36 #include <alloca.h>
37 #endif
39 #include <stdlib.h>
41 #include "shell.h"
42 #include "parser.h"
43 #include "nodes.h"
44 #include "expand.h" /* defines rmescapes() */
45 #include "exec.h" /* defines find_builtin() */
46 #include "syntax.h"
47 #include "options.h"
48 #include "input.h"
49 #include "output.h"
50 #include "var.h"
51 #include "error.h"
52 #include "memalloc.h"
53 #include "mystring.h"
54 #include "alias.h"
55 #include "show.h"
56 #include "builtins.h"
57 #include "system.h"
58 #ifndef SMALL
59 #include "myhistedit.h"
60 #endif
63 * Shell command parser.
66 /* values returned by readtoken */
67 #include "token_vars.h"
71 /* Used by expandstr to get here-doc like behaviour. */
72 #define FAKEEOFMARK (char *)1
76 struct heredoc {
77 struct heredoc *next; /* next here document in list */
78 union node *here; /* redirection node */
79 char *eofmark; /* string indicating end of input */
80 int striptabs; /* if set, strip leading tabs */
85 struct heredoc *heredoclist; /* list of here documents to read */
86 int doprompt; /* if set, prompt the user */
87 int needprompt; /* true if interactive and at start of line */
88 int lasttoken; /* last token read */
89 int tokpushback; /* last token pushed back */
90 char *wordtext; /* text of last word returned by readtoken */
91 int checkkwd;
92 struct nodelist *backquotelist;
93 union node *redirnode;
94 struct heredoc *heredoc;
95 int quoteflag; /* set if (part of) last token was quoted */
98 STATIC union node *list(int);
99 STATIC union node *andor(void);
100 STATIC union node *pipeline(void);
101 STATIC union node *command(void);
102 STATIC union node *simplecmd(void);
103 STATIC union node *makename(void);
104 STATIC void parsefname(void);
105 STATIC void parseheredoc(void);
106 STATIC int peektoken(void);
107 STATIC int readtoken(void);
108 STATIC int xxreadtoken(void);
109 STATIC int readtoken1(int, char const *, char *, int);
110 STATIC void synexpect(int) __attribute__((__noreturn__));
111 STATIC void synerror(const char *) __attribute__((__noreturn__));
112 STATIC void setprompt(int);
115 static inline int
116 isassignment(const char *p)
118 const char *q = endofname(p);
119 if (p == q)
120 return 0;
121 return *q == '=';
124 static inline int realeofmark(const char *eofmark)
126 return eofmark && eofmark != FAKEEOFMARK;
131 * Read and parse a command. Returns NEOF on end of file. (NULL is a
132 * valid parse tree indicating a blank line.)
135 union node *
136 parsecmd(int interact)
138 tokpushback = 0;
139 checkkwd = 0;
140 heredoclist = 0;
141 doprompt = interact;
142 if (doprompt)
143 setprompt(doprompt);
144 needprompt = 0;
145 return list(1);
149 STATIC union node *
150 list(int nlflag)
152 union node *n1, *n2, *n3;
153 int tok;
155 n1 = NULL;
156 for (;;) {
157 switch (peektoken()) {
158 case TNL:
159 if (!(nlflag & 1))
160 break;
161 parseheredoc();
162 return n1;
164 case TEOF:
165 if (!n1 && (nlflag & 1))
166 n1 = NEOF;
167 parseheredoc();
168 return n1;
171 checkkwd = CHKNL | CHKKWD | CHKALIAS;
172 if (nlflag == 2 && tokendlist[peektoken()])
173 return n1;
174 nlflag |= 2;
176 n2 = andor();
177 tok = readtoken();
178 if (tok == TBACKGND) {
179 if (n2->type == NPIPE) {
180 n2->npipe.backgnd = 1;
181 } else {
182 if (n2->type != NREDIR) {
183 n3 = stalloc(sizeof(struct nredir));
184 n3->nredir.n = n2;
185 n3->nredir.redirect = NULL;
186 n2 = n3;
188 n2->type = NBACKGND;
191 if (n1 == NULL) {
192 n1 = n2;
194 else {
195 n3 = (union node *)stalloc(sizeof (struct nbinary));
196 n3->type = NSEMI;
197 n3->nbinary.ch1 = n1;
198 n3->nbinary.ch2 = n2;
199 n1 = n3;
201 switch (tok) {
202 case TNL:
203 case TEOF:
204 tokpushback++;
205 /* fall through */
206 case TBACKGND:
207 case TSEMI:
208 break;
209 default:
210 if ((nlflag & 1))
211 synexpect(-1);
212 tokpushback++;
213 return n1;
220 STATIC union node *
221 andor(void)
223 union node *n1, *n2, *n3;
224 int t;
226 n1 = pipeline();
227 for (;;) {
228 if ((t = readtoken()) == TAND) {
229 t = NAND;
230 } else if (t == TOR) {
231 t = NOR;
232 } else {
233 tokpushback++;
234 return n1;
236 checkkwd = CHKNL | CHKKWD | CHKALIAS;
237 n2 = pipeline();
238 n3 = (union node *)stalloc(sizeof (struct nbinary));
239 n3->type = t;
240 n3->nbinary.ch1 = n1;
241 n3->nbinary.ch2 = n2;
242 n1 = n3;
248 STATIC union node *
249 pipeline(void)
251 union node *n1, *n2, *pipenode;
252 struct nodelist *lp, *prev;
253 int negate;
255 negate = 0;
256 TRACE(("pipeline: entered\n"));
257 if (readtoken() == TNOT) {
258 negate = !negate;
259 checkkwd = CHKKWD | CHKALIAS;
260 } else
261 tokpushback++;
262 n1 = command();
263 if (readtoken() == TPIPE) {
264 pipenode = (union node *)stalloc(sizeof (struct npipe));
265 pipenode->type = NPIPE;
266 pipenode->npipe.backgnd = 0;
267 lp = (struct nodelist *)stalloc(sizeof (struct nodelist));
268 pipenode->npipe.cmdlist = lp;
269 lp->n = n1;
270 do {
271 prev = lp;
272 lp = (struct nodelist *)stalloc(sizeof (struct nodelist));
273 checkkwd = CHKNL | CHKKWD | CHKALIAS;
274 lp->n = command();
275 prev->next = lp;
276 } while (readtoken() == TPIPE);
277 lp->next = NULL;
278 n1 = pipenode;
280 tokpushback++;
281 if (negate) {
282 n2 = (union node *)stalloc(sizeof (struct nnot));
283 n2->type = NNOT;
284 n2->nnot.com = n1;
285 return n2;
286 } else
287 return n1;
292 STATIC union node *
293 command(void)
295 union node *n1, *n2;
296 union node *ap, **app;
297 union node *cp, **cpp;
298 union node *redir, **rpp;
299 union node **rpp2;
300 int t;
301 int savelinno;
303 redir = NULL;
304 rpp2 = &redir;
306 savelinno = plinno;
308 switch (readtoken()) {
309 default:
310 synexpect(-1);
311 /* NOTREACHED */
312 case TIF:
313 n1 = (union node *)stalloc(sizeof (struct nif));
314 n1->type = NIF;
315 n1->nif.test = list(0);
316 if (readtoken() != TTHEN)
317 synexpect(TTHEN);
318 n1->nif.ifpart = list(0);
319 n2 = n1;
320 while (readtoken() == TELIF) {
321 n2->nif.elsepart = (union node *)stalloc(sizeof (struct nif));
322 n2 = n2->nif.elsepart;
323 n2->type = NIF;
324 n2->nif.test = list(0);
325 if (readtoken() != TTHEN)
326 synexpect(TTHEN);
327 n2->nif.ifpart = list(0);
329 if (lasttoken == TELSE)
330 n2->nif.elsepart = list(0);
331 else {
332 n2->nif.elsepart = NULL;
333 tokpushback++;
335 t = TFI;
336 break;
337 case TWHILE:
338 case TUNTIL: {
339 int got;
340 n1 = (union node *)stalloc(sizeof (struct nbinary));
341 n1->type = (lasttoken == TWHILE)? NWHILE : NUNTIL;
342 n1->nbinary.ch1 = list(0);
343 if ((got=readtoken()) != TDO) {
344 TRACE(("expecting DO got %s %s\n", tokname[got], got == TWORD ? wordtext : ""));
345 synexpect(TDO);
347 n1->nbinary.ch2 = list(0);
348 t = TDONE;
349 break;
351 case TFOR:
352 if (readtoken() != TWORD || quoteflag || ! goodname(wordtext))
353 synerror("Bad for loop variable");
354 n1 = (union node *)stalloc(sizeof (struct nfor));
355 n1->type = NFOR;
356 n1->nfor.linno = savelinno;
357 n1->nfor.var = wordtext;
358 checkkwd = CHKNL | CHKKWD | CHKALIAS;
359 if (readtoken() == TIN) {
360 app = &ap;
361 while (readtoken() == TWORD) {
362 n2 = (union node *)stalloc(sizeof (struct narg));
363 n2->type = NARG;
364 n2->narg.text = wordtext;
365 n2->narg.backquote = backquotelist;
366 *app = n2;
367 app = &n2->narg.next;
369 *app = NULL;
370 n1->nfor.args = ap;
371 if (lasttoken != TNL && lasttoken != TSEMI)
372 synexpect(-1);
373 } else {
374 n2 = (union node *)stalloc(sizeof (struct narg));
375 n2->type = NARG;
376 n2->narg.text = (char *)dolatstr;
377 n2->narg.backquote = NULL;
378 n2->narg.next = NULL;
379 n1->nfor.args = n2;
381 * Newline or semicolon here is optional (but note
382 * that the original Bourne shell only allowed NL).
384 if (lasttoken != TSEMI)
385 tokpushback++;
387 checkkwd = CHKNL | CHKKWD | CHKALIAS;
388 if (readtoken() != TDO)
389 synexpect(TDO);
390 n1->nfor.body = list(0);
391 t = TDONE;
392 break;
393 case TCASE:
394 n1 = (union node *)stalloc(sizeof (struct ncase));
395 n1->type = NCASE;
396 n1->ncase.linno = savelinno;
397 if (readtoken() != TWORD)
398 synexpect(TWORD);
399 n1->ncase.expr = n2 = (union node *)stalloc(sizeof (struct narg));
400 n2->type = NARG;
401 n2->narg.text = wordtext;
402 n2->narg.backquote = backquotelist;
403 n2->narg.next = NULL;
404 checkkwd = CHKNL | CHKKWD | CHKALIAS;
405 if (readtoken() != TIN)
406 synexpect(TIN);
407 cpp = &n1->ncase.cases;
408 next_case:
409 checkkwd = CHKNL | CHKKWD;
410 t = readtoken();
411 while(t != TESAC) {
412 if (lasttoken == TLP)
413 readtoken();
414 *cpp = cp = (union node *)stalloc(sizeof (struct nclist));
415 cp->type = NCLIST;
416 app = &cp->nclist.pattern;
417 for (;;) {
418 *app = ap = (union node *)stalloc(sizeof (struct narg));
419 ap->type = NARG;
420 ap->narg.text = wordtext;
421 ap->narg.backquote = backquotelist;
422 if (readtoken() != TPIPE)
423 break;
424 app = &ap->narg.next;
425 readtoken();
427 ap->narg.next = NULL;
428 if (lasttoken != TRP)
429 synexpect(TRP);
430 cp->nclist.body = list(2);
432 cpp = &cp->nclist.next;
434 checkkwd = CHKNL | CHKKWD;
435 if ((t = readtoken()) != TESAC) {
436 if (t != TENDCASE)
437 synexpect(TENDCASE);
438 else
439 goto next_case;
442 *cpp = NULL;
443 goto redir;
444 case TLP:
445 n1 = (union node *)stalloc(sizeof (struct nredir));
446 n1->type = NSUBSHELL;
447 n1->nredir.linno = savelinno;
448 n1->nredir.n = list(0);
449 n1->nredir.redirect = NULL;
450 t = TRP;
451 break;
452 case TBEGIN:
453 n1 = list(0);
454 t = TEND;
455 break;
456 case TWORD:
457 case TREDIR:
458 tokpushback++;
459 return simplecmd();
462 if (readtoken() != t)
463 synexpect(t);
465 redir:
466 /* Now check for redirection which may follow command */
467 checkkwd = CHKKWD | CHKALIAS;
468 rpp = rpp2;
469 while (readtoken() == TREDIR) {
470 *rpp = n2 = redirnode;
471 rpp = &n2->nfile.next;
472 parsefname();
474 tokpushback++;
475 *rpp = NULL;
476 if (redir) {
477 if (n1->type != NSUBSHELL) {
478 n2 = (union node *)stalloc(sizeof (struct nredir));
479 n2->type = NREDIR;
480 n2->nredir.linno = savelinno;
481 n2->nredir.n = n1;
482 n1 = n2;
484 n1->nredir.redirect = redir;
487 return n1;
491 STATIC union node *
492 simplecmd(void) {
493 union node *args, **app;
494 union node *n = NULL;
495 union node *vars, **vpp;
496 union node **rpp, *redir;
497 int savecheckkwd;
498 int savelinno;
500 args = NULL;
501 app = &args;
502 vars = NULL;
503 vpp = &vars;
504 redir = NULL;
505 rpp = &redir;
507 savecheckkwd = CHKALIAS;
508 savelinno = plinno;
509 for (;;) {
510 checkkwd = savecheckkwd;
511 switch (readtoken()) {
512 case TWORD:
513 n = (union node *)stalloc(sizeof (struct narg));
514 n->type = NARG;
515 n->narg.text = wordtext;
516 n->narg.backquote = backquotelist;
517 if (savecheckkwd && isassignment(wordtext)) {
518 *vpp = n;
519 vpp = &n->narg.next;
520 } else {
521 *app = n;
522 app = &n->narg.next;
523 savecheckkwd = 0;
525 break;
526 case TREDIR:
527 *rpp = n = redirnode;
528 rpp = &n->nfile.next;
529 parsefname(); /* read name of redirection file */
530 break;
531 case TLP:
532 if (
533 args && app == &args->narg.next &&
534 !vars && !redir
536 struct builtincmd *bcmd;
537 const char *name;
539 /* We have a function */
540 if (readtoken() != TRP)
541 synexpect(TRP);
542 name = n->narg.text;
543 if (
544 !goodname(name) || (
545 (bcmd = find_builtin(name)) &&
546 bcmd->flags & BUILTIN_SPECIAL
549 synerror("Bad function name");
550 n->type = NDEFUN;
551 checkkwd = CHKNL | CHKKWD | CHKALIAS;
552 n->ndefun.text = n->narg.text;
553 n->ndefun.linno = plinno;
554 n->ndefun.body = command();
555 return n;
557 /* fall through */
558 default:
559 tokpushback++;
560 goto out;
563 out:
564 *app = NULL;
565 *vpp = NULL;
566 *rpp = NULL;
567 n = (union node *)stalloc(sizeof (struct ncmd));
568 n->type = NCMD;
569 n->ncmd.linno = savelinno;
570 n->ncmd.args = args;
571 n->ncmd.assign = vars;
572 n->ncmd.redirect = redir;
573 return n;
576 STATIC union node *
577 makename(void)
579 union node *n;
581 n = (union node *)stalloc(sizeof (struct narg));
582 n->type = NARG;
583 n->narg.next = NULL;
584 n->narg.text = wordtext;
585 n->narg.backquote = backquotelist;
586 return n;
589 void fixredir(union node *n, const char *text, int err)
591 TRACE(("Fix redir %s %d\n", text, err));
592 if (!err)
593 n->ndup.vname = NULL;
595 if (is_digit(text[0]) && text[1] == '\0')
596 n->ndup.dupfd = digit_val(text[0]);
597 else if (text[0] == '-' && text[1] == '\0')
598 n->ndup.dupfd = -1;
599 else {
601 if (err)
602 synerror("Bad fd number");
603 else
604 n->ndup.vname = makename();
609 STATIC void
610 parsefname(void)
612 union node *n = redirnode;
614 if (n->type == NHERE)
615 checkkwd = CHKEOFMARK;
616 if (readtoken() != TWORD)
617 synexpect(-1);
618 if (n->type == NHERE) {
619 struct heredoc *here = heredoc;
620 struct heredoc *p;
622 if (quoteflag == 0)
623 n->type = NXHERE;
624 TRACE(("Here document %d\n", n->type));
625 rmescapes(wordtext);
626 here->eofmark = wordtext;
627 here->next = NULL;
628 if (heredoclist == NULL)
629 heredoclist = here;
630 else {
631 for (p = heredoclist ; p->next ; p = p->next);
632 p->next = here;
634 } else if (n->type == NTOFD || n->type == NFROMFD) {
635 fixredir(n, wordtext, 0);
636 } else {
637 n->nfile.fname = makename();
643 * Input any here documents.
646 STATIC void
647 parseheredoc(void)
649 struct heredoc *here;
650 union node *n;
652 here = heredoclist;
653 heredoclist = 0;
655 while (here) {
656 if (needprompt) {
657 setprompt(2);
659 readtoken1(pgetc(), here->here->type == NHERE? SQSYNTAX : DQSYNTAX,
660 here->eofmark, here->striptabs);
661 n = (union node *)stalloc(sizeof (struct narg));
662 n->narg.type = NARG;
663 n->narg.next = NULL;
664 n->narg.text = wordtext;
665 n->narg.backquote = backquotelist;
666 here->here->nhere.doc = n;
667 here = here->next;
671 STATIC int
672 peektoken(void)
674 int t;
676 t = readtoken();
677 tokpushback++;
678 return (t);
681 STATIC int
682 readtoken(void)
684 int t;
685 int kwd = checkkwd;
686 #ifdef DEBUG
687 int alreadyseen = tokpushback;
688 #endif
690 top:
691 t = xxreadtoken();
694 * eat newlines
696 if (kwd & CHKNL) {
697 while (t == TNL) {
698 parseheredoc();
699 t = xxreadtoken();
703 if (t != TWORD || quoteflag) {
704 goto out;
708 * check for keywords
710 if (kwd & CHKKWD) {
711 const char *const *pp;
713 if ((pp = findkwd(wordtext))) {
714 lasttoken = t = pp - parsekwd + KWDOFFSET;
715 TRACE(("keyword %s recognized\n", tokname[t]));
716 goto out;
720 if (checkkwd & CHKALIAS) {
721 struct alias *ap;
722 if ((ap = lookupalias(wordtext, 1)) != NULL) {
723 if (*ap->val) {
724 pushstring(ap->val, ap);
726 goto top;
729 out:
730 checkkwd = 0;
731 #ifdef DEBUG
732 if (!alreadyseen)
733 TRACE(("token %s %s\n", tokname[t], t == TWORD ? wordtext : ""));
734 else
735 TRACE(("reread token %s %s\n", tokname[t], t == TWORD ? wordtext : ""));
736 #endif
737 return (t);
740 static void nlprompt(void)
742 plinno++;
743 if (doprompt)
744 setprompt(2);
747 static void nlnoprompt(void)
749 plinno++;
750 needprompt = doprompt;
755 * Read the next input token.
756 * If the token is a word, we set backquotelist to the list of cmds in
757 * backquotes. We set quoteflag to true if any part of the word was
758 * quoted.
759 * If the token is TREDIR, then we set redirnode to a structure containing
760 * the redirection.
762 * [Change comment: here documents and internal procedures]
763 * [Readtoken shouldn't have any arguments. Perhaps we should make the
764 * word parsing code into a separate routine. In this case, readtoken
765 * doesn't need to have any internal procedures, but parseword does.
766 * We could also make parseoperator in essence the main routine, and
767 * have parseword (readtoken1?) handle both words and redirection.]
770 #define RETURN(token) return lasttoken = token
772 STATIC int
773 xxreadtoken(void)
775 int c;
777 if (tokpushback) {
778 tokpushback = 0;
779 return lasttoken;
781 if (needprompt) {
782 setprompt(2);
784 for (;;) { /* until token or start of word found */
785 c = pgetc();
786 switch (c) {
787 case ' ': case '\t':
788 case PEOA:
789 continue;
790 case '#':
791 while ((c = pgetc()) != '\n' && c != PEOF);
792 pungetc();
793 continue;
794 case '\\':
795 if (pgetc() == '\n') {
796 nlprompt();
797 continue;
799 pungetc();
800 goto breakloop;
801 case '\n':
802 nlnoprompt();
803 RETURN(TNL);
804 case PEOF:
805 RETURN(TEOF);
806 case '&':
807 if (pgetc() == '&')
808 RETURN(TAND);
809 pungetc();
810 RETURN(TBACKGND);
811 case '|':
812 if (pgetc() == '|')
813 RETURN(TOR);
814 pungetc();
815 RETURN(TPIPE);
816 case ';':
817 if (pgetc() == ';')
818 RETURN(TENDCASE);
819 pungetc();
820 RETURN(TSEMI);
821 case '(':
822 RETURN(TLP);
823 case ')':
824 RETURN(TRP);
825 default:
826 goto breakloop;
829 breakloop:
830 return readtoken1(c, BASESYNTAX, (char *)NULL, 0);
831 #undef RETURN
834 static int pgetc_eatbnl(void)
836 int c;
838 while ((c = pgetc()) == '\\') {
839 if (pgetc() != '\n') {
840 pungetc();
841 break;
844 nlprompt();
847 return c;
853 * If eofmark is NULL, read a word or a redirection symbol. If eofmark
854 * is not NULL, read a here document. In the latter case, eofmark is the
855 * word which marks the end of the document and striptabs is true if
856 * leading tabs should be stripped from the document. The argument firstc
857 * is the first character of the input token or document.
859 * Because C does not have internal subroutines, I have simulated them
860 * using goto's to implement the subroutine linkage. The following macros
861 * will run code that appears at the end of readtoken1.
864 #define CHECKEND() {goto checkend; checkend_return:;}
865 #define PARSEREDIR() {goto parseredir; parseredir_return:;}
866 #define PARSESUB() {goto parsesub; parsesub_return:;}
867 #define PARSEBACKQOLD() {oldstyle = 1; goto parsebackq; parsebackq_oldreturn:;}
868 #define PARSEBACKQNEW() {oldstyle = 0; goto parsebackq; parsebackq_newreturn:;}
869 #define PARSEARITH() {goto parsearith; parsearith_return:;}
871 STATIC int
872 readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs)
874 int c = firstc;
875 char *out;
876 size_t len;
877 struct nodelist *bqlist;
878 int quotef;
879 int dblquote;
880 int varnest; /* levels of variables expansion */
881 int arinest; /* levels of arithmetic expansion */
882 int parenlevel; /* levels of parens in arithmetic */
883 int dqvarnest; /* levels of variables expansion within double quotes */
884 int oldstyle;
885 /* syntax before arithmetic */
886 char const *uninitialized_var(prevsyntax);
888 dblquote = 0;
889 if (syntax == DQSYNTAX)
890 dblquote = 1;
891 quotef = 0;
892 bqlist = NULL;
893 varnest = 0;
894 arinest = 0;
895 parenlevel = 0;
896 dqvarnest = 0;
898 STARTSTACKSTR(out);
899 loop: { /* for each line, until end of word */
900 #if ATTY
901 if (c == '\034' && doprompt
902 && attyset() && ! equal(termval(), "emacs")) {
903 attyline();
904 if (syntax == BASESYNTAX)
905 return readtoken();
906 c = pgetc();
907 goto loop;
909 #endif
910 CHECKEND(); /* set c to PEOF if at end of here document */
911 for (;;) { /* until end of line or end of word */
912 CHECKSTRSPACE(4, out); /* permit 4 calls to USTPUTC */
913 switch(syntax[c]) {
914 case CNL: /* '\n' */
915 if (syntax == BASESYNTAX)
916 goto endword; /* exit outer loop */
917 USTPUTC(c, out);
918 nlprompt();
919 c = pgetc();
920 goto loop; /* continue outer loop */
921 case CWORD:
922 USTPUTC(c, out);
923 break;
924 case CCTL:
925 if (eofmark == NULL || dblquote)
926 USTPUTC(CTLESC, out);
927 USTPUTC(c, out);
928 break;
929 /* backslash */
930 case CBACK:
931 c = pgetc2();
932 if (c == PEOF) {
933 USTPUTC(CTLESC, out);
934 USTPUTC('\\', out);
935 pungetc();
936 } else if (c == '\n') {
937 nlprompt();
938 } else {
939 if (
940 dblquote &&
941 c != '\\' && c != '`' &&
942 c != '$' && (
943 c != '"' ||
944 eofmark != NULL
947 USTPUTC('\\', out);
949 USTPUTC(CTLESC, out);
950 USTPUTC(c, out);
951 quotef++;
953 break;
954 case CSQUOTE:
955 syntax = SQSYNTAX;
956 quotemark:
957 if (eofmark == NULL) {
958 USTPUTC(CTLQUOTEMARK, out);
960 break;
961 case CDQUOTE:
962 syntax = DQSYNTAX;
963 dblquote = 1;
964 goto quotemark;
965 case CENDQUOTE:
966 if (eofmark && !varnest)
967 USTPUTC(c, out);
968 else {
969 if (dqvarnest == 0) {
970 syntax = BASESYNTAX;
971 dblquote = 0;
973 quotef++;
974 goto quotemark;
976 break;
977 case CVAR: /* '$' */
978 PARSESUB(); /* parse substitution */
979 break;
980 case CENDVAR: /* '}' */
981 if (varnest > 0) {
982 varnest--;
983 if (dqvarnest > 0) {
984 dqvarnest--;
986 USTPUTC(CTLENDVAR, out);
987 } else {
988 USTPUTC(c, out);
990 break;
991 case CLP: /* '(' in arithmetic */
992 parenlevel++;
993 USTPUTC(c, out);
994 break;
995 case CRP: /* ')' in arithmetic */
996 if (parenlevel > 0) {
997 USTPUTC(c, out);
998 --parenlevel;
999 } else {
1000 if (pgetc() == ')') {
1001 USTPUTC(CTLENDARI, out);
1002 if (!--arinest)
1003 syntax = prevsyntax;
1004 } else {
1006 * unbalanced parens
1007 * (don't 2nd guess - no error)
1009 pungetc();
1010 USTPUTC(')', out);
1013 break;
1014 case CBQUOTE: /* '`' */
1015 PARSEBACKQOLD();
1016 break;
1017 case CEOF:
1018 goto endword; /* exit outer loop */
1019 case CIGN:
1020 break;
1021 default:
1022 if (varnest == 0)
1023 goto endword; /* exit outer loop */
1024 if (c != PEOA) {
1025 USTPUTC(c, out);
1028 c = pgetc();
1031 endword:
1032 if (syntax == ARISYNTAX)
1033 synerror("Missing '))'");
1034 if (syntax != BASESYNTAX && eofmark == NULL)
1035 synerror("Unterminated quoted string");
1036 if (varnest != 0) {
1037 /* { */
1038 synerror("Missing '}'");
1040 USTPUTC('\0', out);
1041 len = out - (char *)stackblock();
1042 out = stackblock();
1043 if (eofmark == NULL) {
1044 if ((c == '>' || c == '<')
1045 && quotef == 0
1046 && len <= 2
1047 && (*out == '\0' || is_digit(*out))) {
1048 PARSEREDIR();
1049 return lasttoken = TREDIR;
1050 } else {
1051 pungetc();
1054 quoteflag = quotef;
1055 backquotelist = bqlist;
1056 grabstackblock(len);
1057 wordtext = out;
1058 return lasttoken = TWORD;
1059 /* end of readtoken routine */
1064 * Check to see whether we are at the end of the here document. When this
1065 * is called, c is set to the first character of the next input line. If
1066 * we are at the end of the here document, this routine sets the c to PEOF.
1069 checkend: {
1070 if (realeofmark(eofmark)) {
1071 int markloc;
1072 char *p;
1074 if (c == PEOA) {
1075 c = pgetc2();
1077 if (striptabs) {
1078 while (c == '\t') {
1079 c = pgetc2();
1083 markloc = out - (char *)stackblock();
1084 for (p = eofmark; STPUTC(c, out), *p; p++) {
1085 if (c != *p)
1086 goto more_heredoc;
1088 c = pgetc2();
1091 if (c == '\n' || c == PEOF) {
1092 c = PEOF;
1093 nlnoprompt();
1094 } else {
1095 int len;
1097 more_heredoc:
1098 p = (char *)stackblock() + markloc + 1;
1099 len = out - p;
1101 if (len) {
1102 len -= c < 0;
1103 c = p[-1];
1105 if (len) {
1106 char *str;
1108 str = alloca(len + 1);
1109 *(char *)mempcpy(str, p, len) = 0;
1111 pushstring(str, NULL);
1116 STADJUST((char *)stackblock() + markloc - out, out);
1118 goto checkend_return;
1123 * Parse a redirection operator. The variable "out" points to a string
1124 * specifying the fd to be redirected. The variable "c" contains the
1125 * first character of the redirection operator.
1128 parseredir: {
1129 char fd = *out;
1130 union node *np;
1132 np = (union node *)stalloc(sizeof (struct nfile));
1133 if (c == '>') {
1134 np->nfile.fd = 1;
1135 c = pgetc();
1136 if (c == '>')
1137 np->type = NAPPEND;
1138 else if (c == '|')
1139 np->type = NCLOBBER;
1140 else if (c == '&')
1141 np->type = NTOFD;
1142 else {
1143 np->type = NTO;
1144 pungetc();
1146 } else { /* c == '<' */
1147 np->nfile.fd = 0;
1148 switch (c = pgetc()) {
1149 case '<':
1150 if (sizeof (struct nfile) != sizeof (struct nhere)) {
1151 np = (union node *)stalloc(sizeof (struct nhere));
1152 np->nfile.fd = 0;
1154 np->type = NHERE;
1155 heredoc = (struct heredoc *)stalloc(sizeof (struct heredoc));
1156 heredoc->here = np;
1157 if ((c = pgetc()) == '-') {
1158 heredoc->striptabs = 1;
1159 } else {
1160 heredoc->striptabs = 0;
1161 pungetc();
1163 break;
1165 case '&':
1166 np->type = NFROMFD;
1167 break;
1169 case '>':
1170 np->type = NFROMTO;
1171 break;
1173 default:
1174 np->type = NFROM;
1175 pungetc();
1176 break;
1179 if (fd != '\0')
1180 np->nfile.fd = digit_val(fd);
1181 redirnode = np;
1182 goto parseredir_return;
1187 * Parse a substitution. At this point, we have read the dollar sign
1188 * and nothing else.
1191 parsesub: {
1192 int subtype;
1193 int typeloc;
1194 char *p;
1195 static const char types[] = "}-+?=";
1197 c = pgetc_eatbnl();
1198 if (
1199 (checkkwd & CHKEOFMARK) ||
1200 c <= PEOA ||
1201 (c != '(' && c != '{' && !is_name(c) && !is_special(c))
1203 USTPUTC('$', out);
1204 pungetc();
1205 } else if (c == '(') { /* $(command) or $((arith)) */
1206 if (pgetc_eatbnl() == '(') {
1207 PARSEARITH();
1208 } else {
1209 pungetc();
1210 PARSEBACKQNEW();
1212 } else {
1213 USTPUTC(CTLVAR, out);
1214 typeloc = out - (char *)stackblock();
1215 STADJUST(1, out);
1216 subtype = VSNORMAL;
1217 if (likely(c == '{')) {
1218 c = pgetc_eatbnl();
1219 subtype = 0;
1221 varname:
1222 if (is_name(c)) {
1223 do {
1224 STPUTC(c, out);
1225 c = pgetc_eatbnl();
1226 } while (is_in_name(c));
1227 } else if (is_digit(c)) {
1228 do {
1229 STPUTC(c, out);
1230 c = pgetc_eatbnl();
1231 } while (is_digit(c));
1232 } else {
1233 int cc = c;
1235 c = pgetc_eatbnl();
1237 if (!subtype && cc == '#') {
1238 subtype = VSLENGTH;
1240 if (c == '_' || isalnum(c))
1241 goto varname;
1243 cc = c;
1244 c = pgetc_eatbnl();
1245 if (cc == '}' || c != '}') {
1246 pungetc();
1247 subtype = 0;
1248 c = cc;
1249 cc = '#';
1253 if (!is_special(cc)) {
1254 if (subtype == VSLENGTH)
1255 subtype = 0;
1256 goto badsub;
1259 USTPUTC(cc, out);
1262 if (subtype == 0) {
1263 switch (c) {
1264 case ':':
1265 subtype = VSNUL;
1266 c = pgetc_eatbnl();
1267 /*FALLTHROUGH*/
1268 default:
1269 p = strchr(types, c);
1270 if (p == NULL)
1271 break;
1272 subtype |= p - types + VSNORMAL;
1273 break;
1274 case '%':
1275 case '#':
1277 int cc = c;
1278 subtype = c == '#' ? VSTRIMLEFT :
1279 VSTRIMRIGHT;
1280 c = pgetc_eatbnl();
1281 if (c == cc)
1282 subtype++;
1283 else
1284 pungetc();
1285 break;
1288 } else {
1289 badsub:
1290 pungetc();
1292 *((char *)stackblock() + typeloc) = subtype;
1293 if (subtype != VSNORMAL) {
1294 varnest++;
1295 if (dblquote)
1296 dqvarnest++;
1298 STPUTC('=', out);
1300 goto parsesub_return;
1305 * Called to parse command substitutions. Newstyle is set if the command
1306 * is enclosed inside $(...); nlpp is a pointer to the head of the linked
1307 * list of commands (passed by reference), and savelen is the number of
1308 * characters on the top of the stack which must be preserved.
1311 parsebackq: {
1312 struct nodelist **nlpp;
1313 union node *n;
1314 char *str;
1315 size_t savelen;
1316 int uninitialized_var(saveprompt);
1318 str = NULL;
1319 savelen = out - (char *)stackblock();
1320 if (savelen > 0) {
1321 str = alloca(savelen);
1322 memcpy(str, stackblock(), savelen);
1324 if (oldstyle) {
1325 /* We must read until the closing backquote, giving special
1326 treatment to some slashes, and then push the string and
1327 reread it as input, interpreting it normally. */
1328 char *pout;
1329 int pc;
1330 size_t psavelen;
1331 char *pstr;
1334 STARTSTACKSTR(pout);
1335 for (;;) {
1336 if (needprompt) {
1337 setprompt(2);
1339 switch (pc = pgetc()) {
1340 case '`':
1341 goto done;
1343 case '\\':
1344 if ((pc = pgetc()) == '\n') {
1345 nlprompt();
1347 * If eating a newline, avoid putting
1348 * the newline into the new character
1349 * stream (via the STPUTC after the
1350 * switch).
1352 continue;
1354 if (pc != '\\' && pc != '`' && pc != '$'
1355 && (!dblquote || pc != '"'))
1356 STPUTC('\\', pout);
1357 if (pc > PEOA) {
1358 break;
1360 /* fall through */
1362 case PEOF:
1363 case PEOA:
1364 synerror("EOF in backquote substitution");
1366 case '\n':
1367 nlnoprompt();
1368 break;
1370 default:
1371 break;
1373 STPUTC(pc, pout);
1375 done:
1376 STPUTC('\0', pout);
1377 psavelen = pout - (char *)stackblock();
1378 if (psavelen > 0) {
1379 pstr = grabstackstr(pout);
1380 setinputstring(pstr);
1383 nlpp = &bqlist;
1384 while (*nlpp)
1385 nlpp = &(*nlpp)->next;
1386 *nlpp = (struct nodelist *)stalloc(sizeof (struct nodelist));
1387 (*nlpp)->next = NULL;
1389 if (oldstyle) {
1390 saveprompt = doprompt;
1391 doprompt = 0;
1394 n = list(2);
1396 if (oldstyle)
1397 doprompt = saveprompt;
1398 else {
1399 if (readtoken() != TRP)
1400 synexpect(TRP);
1403 (*nlpp)->n = n;
1404 if (oldstyle) {
1406 * Start reading from old file again, ignoring any pushed back
1407 * tokens left from the backquote parsing
1409 popfile();
1410 tokpushback = 0;
1412 while (stackblocksize() <= savelen)
1413 growstackblock();
1414 STARTSTACKSTR(out);
1415 if (str) {
1416 memcpy(out, str, savelen);
1417 STADJUST(savelen, out);
1419 USTPUTC(CTLBACKQ, out);
1420 if (oldstyle)
1421 goto parsebackq_oldreturn;
1422 else
1423 goto parsebackq_newreturn;
1427 * Parse an arithmetic expansion (indicate start of one and set state)
1429 parsearith: {
1431 if (++arinest == 1) {
1432 prevsyntax = syntax;
1433 syntax = ARISYNTAX;
1435 USTPUTC(CTLARI, out);
1436 goto parsearith_return;
1439 } /* end of readtoken */
1443 #ifdef mkinit
1444 INCLUDE "parser.h"
1445 #endif
1449 * Return of a legal variable name (a letter or underscore followed by zero or
1450 * more letters, underscores, and digits).
1453 char *
1454 endofname(const char *name)
1456 char *p;
1458 p = (char *) name;
1459 if (! is_name(*p))
1460 return p;
1461 while (*++p) {
1462 if (! is_in_name(*p))
1463 break;
1465 return p;
1470 * Called when an unexpected token is read during the parse. The argument
1471 * is the token that is expected, or -1 if more than one type of token can
1472 * occur at this point.
1475 STATIC void
1476 synexpect(int token)
1478 char msg[64];
1480 if (token >= 0) {
1481 fmtstr(msg, 64, "%s unexpected (expecting %s)",
1482 tokname[lasttoken], tokname[token]);
1483 } else {
1484 fmtstr(msg, 64, "%s unexpected", tokname[lasttoken]);
1486 synerror(msg);
1487 /* NOTREACHED */
1491 STATIC void
1492 synerror(const char *msg)
1494 errlinno = plinno;
1495 sh_error("Syntax error: %s", msg);
1496 /* NOTREACHED */
1499 STATIC void
1500 setprompt(int which)
1502 struct stackmark smark;
1503 int show;
1505 needprompt = 0;
1506 whichprompt = which;
1508 #ifdef SMALL
1509 show = 1;
1510 #else
1511 show = !el;
1512 #endif
1513 if (show) {
1514 pushstackmark(&smark, stackblocksize());
1515 out2str(getprompt(NULL));
1516 popstackmark(&smark);
1520 const char *
1521 expandstr(const char *ps)
1523 union node n;
1524 int saveprompt;
1526 /* XXX Fix (char *) cast. */
1527 setinputstring((char *)ps);
1529 saveprompt = doprompt;
1530 doprompt = 0;
1532 readtoken1(pgetc(), DQSYNTAX, FAKEEOFMARK, 0);
1534 doprompt = saveprompt;
1536 popfile();
1538 n.narg.type = NARG;
1539 n.narg.next = NULL;
1540 n.narg.text = wordtext;
1541 n.narg.backquote = backquotelist;
1543 expandarg(&n, NULL, EXP_QUOTED);
1544 return stackblock();
1548 * called by editline -- any expansions to the prompt
1549 * should be added here.
1551 const char *
1552 getprompt(void *unused)
1554 const char *prompt;
1556 switch (whichprompt) {
1557 default:
1558 #ifdef DEBUG
1559 return "<internal prompt error>";
1560 #endif
1561 case 0:
1562 return nullstr;
1563 case 1:
1564 prompt = ps1val();
1565 break;
1566 case 2:
1567 prompt = ps2val();
1568 break;
1571 return expandstr(prompt);
1574 const char *const *
1575 findkwd(const char *s)
1577 return findstring(
1578 s, parsekwd, sizeof(parsekwd) / sizeof(const char *)