Sync usage with man page.
[netbsd-mini2440.git] / bin / sh / parser.c
blob924bcc83409be714d99a2c4e88233e3646113982
1 /* $NetBSD: parser.c,v 1.73 2008/11/08 00:14:05 christos Exp $ */
3 /*-
4 * Copyright (c) 1991, 1993
5 * The Regents of the University of California. All rights reserved.
7 * This code is derived from software contributed to Berkeley by
8 * Kenneth Almquist.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
35 #include <sys/cdefs.h>
36 #ifndef lint
37 #if 0
38 static char sccsid[] = "@(#)parser.c 8.7 (Berkeley) 5/16/95";
39 #else
40 __RCSID("$NetBSD: parser.c,v 1.73 2008/11/08 00:14:05 christos Exp $");
41 #endif
42 #endif /* not lint */
44 #include <stdlib.h>
46 #include "shell.h"
47 #include "parser.h"
48 #include "nodes.h"
49 #include "expand.h" /* defines rmescapes() */
50 #include "eval.h" /* defines commandname */
51 #include "redir.h" /* defines copyfd() */
52 #include "syntax.h"
53 #include "options.h"
54 #include "input.h"
55 #include "output.h"
56 #include "var.h"
57 #include "error.h"
58 #include "memalloc.h"
59 #include "mystring.h"
60 #include "alias.h"
61 #include "show.h"
62 #ifndef SMALL
63 #include "myhistedit.h"
64 #endif
67 * Shell command parser.
70 #define EOFMARKLEN 79
72 /* values returned by readtoken */
73 #include "token.h"
75 #define OPENBRACE '{'
76 #define CLOSEBRACE '}'
79 struct heredoc {
80 struct heredoc *next; /* next here document in list */
81 union node *here; /* redirection node */
82 char *eofmark; /* string indicating end of input */
83 int striptabs; /* if set, strip leading tabs */
88 static int noalias = 0; /* when set, don't handle aliases */
89 struct heredoc *heredoclist; /* list of here documents to read */
90 int parsebackquote; /* nonzero if we are inside backquotes */
91 int doprompt; /* if set, prompt the user */
92 int needprompt; /* true if interactive and at start of line */
93 int lasttoken; /* last token read */
94 MKINIT int tokpushback; /* last token pushed back */
95 char *wordtext; /* text of last word returned by readtoken */
96 MKINIT int checkkwd; /* 1 == check for kwds, 2 == also eat newlines */
97 struct nodelist *backquotelist;
98 union node *redirnode;
99 struct heredoc *heredoc;
100 int quoteflag; /* set if (part of) last token was quoted */
101 int startlinno; /* line # where last token started */
104 STATIC union node *list(int, int);
105 STATIC union node *andor(void);
106 STATIC union node *pipeline(void);
107 STATIC union node *command(void);
108 STATIC union node *simplecmd(union node **, union node *);
109 STATIC union node *makename(void);
110 STATIC void parsefname(void);
111 STATIC void parseheredoc(void);
112 STATIC int peektoken(void);
113 STATIC int readtoken(void);
114 STATIC int xxreadtoken(void);
115 STATIC int readtoken1(int, char const *, char *, int);
116 STATIC int noexpand(char *);
117 STATIC void synexpect(int) __dead;
118 STATIC void synerror(const char *) __dead;
119 STATIC void setprompt(int);
123 * Read and parse a command. Returns NEOF on end of file. (NULL is a
124 * valid parse tree indicating a blank line.)
127 union node *
128 parsecmd(int interact)
130 int t;
132 tokpushback = 0;
133 doprompt = interact;
134 if (doprompt)
135 setprompt(1);
136 else
137 setprompt(0);
138 needprompt = 0;
139 t = readtoken();
140 if (t == TEOF)
141 return NEOF;
142 if (t == TNL)
143 return NULL;
144 tokpushback++;
145 return list(1, 0);
149 STATIC union node *
150 list(int nlflag, int erflag)
152 union node *n1, *n2, *n3;
153 int tok;
154 TRACE(("list: entered\n"));
156 checkkwd = 2;
157 if (nlflag == 0 && tokendlist[peektoken()])
158 return NULL;
159 n1 = NULL;
160 for (;;) {
161 n2 = andor();
162 tok = readtoken();
163 if (tok == TBACKGND) {
164 if (n2->type == NCMD || n2->type == NPIPE) {
165 n2->ncmd.backgnd = 1;
166 } else if (n2->type == NREDIR) {
167 n2->type = NBACKGND;
168 } else {
169 n3 = (union node *)stalloc(sizeof (struct nredir));
170 n3->type = NBACKGND;
171 n3->nredir.n = n2;
172 n3->nredir.redirect = NULL;
173 n2 = n3;
176 if (n1 == NULL) {
177 n1 = n2;
179 else {
180 n3 = (union node *)stalloc(sizeof (struct nbinary));
181 n3->type = NSEMI;
182 n3->nbinary.ch1 = n1;
183 n3->nbinary.ch2 = n2;
184 n1 = n3;
186 switch (tok) {
187 case TBACKGND:
188 case TSEMI:
189 tok = readtoken();
190 /* fall through */
191 case TNL:
192 if (tok == TNL) {
193 parseheredoc();
194 if (nlflag)
195 return n1;
196 } else {
197 tokpushback++;
199 checkkwd = 2;
200 if (tokendlist[peektoken()])
201 return n1;
202 break;
203 case TEOF:
204 if (heredoclist)
205 parseheredoc();
206 else
207 pungetc(); /* push back EOF on input */
208 return n1;
209 default:
210 if (nlflag || erflag)
211 synexpect(-1);
212 tokpushback++;
213 return n1;
220 STATIC union node *
221 andor(void)
223 union node *n1, *n2, *n3;
224 int t;
226 TRACE(("andor: entered\n"));
227 n1 = pipeline();
228 for (;;) {
229 if ((t = readtoken()) == TAND) {
230 t = NAND;
231 } else if (t == TOR) {
232 t = NOR;
233 } else {
234 tokpushback++;
235 return n1;
237 n2 = pipeline();
238 n3 = (union node *)stalloc(sizeof (struct nbinary));
239 n3->type = t;
240 n3->nbinary.ch1 = n1;
241 n3->nbinary.ch2 = n2;
242 n1 = n3;
248 STATIC union node *
249 pipeline(void)
251 union node *n1, *n2, *pipenode;
252 struct nodelist *lp, *prev;
253 int negate;
255 TRACE(("pipeline: entered\n"));
257 negate = 0;
258 checkkwd = 2;
259 while (readtoken() == TNOT) {
260 TRACE(("pipeline: TNOT recognized\n"));
261 negate = !negate;
263 tokpushback++;
264 n1 = command();
265 if (readtoken() == TPIPE) {
266 pipenode = (union node *)stalloc(sizeof (struct npipe));
267 pipenode->type = NPIPE;
268 pipenode->npipe.backgnd = 0;
269 lp = (struct nodelist *)stalloc(sizeof (struct nodelist));
270 pipenode->npipe.cmdlist = lp;
271 lp->n = n1;
272 do {
273 prev = lp;
274 lp = (struct nodelist *)stalloc(sizeof (struct nodelist));
275 lp->n = command();
276 prev->next = lp;
277 } while (readtoken() == TPIPE);
278 lp->next = NULL;
279 n1 = pipenode;
281 tokpushback++;
282 if (negate) {
283 TRACE(("negate pipeline\n"));
284 n2 = (union node *)stalloc(sizeof (struct nnot));
285 n2->type = NNOT;
286 n2->nnot.com = n1;
287 return n2;
288 } else
289 return n1;
294 STATIC union node *
295 command(void)
297 union node *n1, *n2;
298 union node *ap, **app;
299 union node *cp, **cpp;
300 union node *redir, **rpp;
301 int t, negate = 0;
303 TRACE(("command: entered\n"));
305 checkkwd = 2;
306 redir = NULL;
307 n1 = NULL;
308 rpp = &redir;
310 /* Check for redirection which may precede command */
311 while (readtoken() == TREDIR) {
312 *rpp = n2 = redirnode;
313 rpp = &n2->nfile.next;
314 parsefname();
316 tokpushback++;
318 while (readtoken() == TNOT) {
319 TRACE(("command: TNOT recognized\n"));
320 negate = !negate;
322 tokpushback++;
324 switch (readtoken()) {
325 case TIF:
326 n1 = (union node *)stalloc(sizeof (struct nif));
327 n1->type = NIF;
328 n1->nif.test = list(0, 0);
329 if (readtoken() != TTHEN)
330 synexpect(TTHEN);
331 n1->nif.ifpart = list(0, 0);
332 n2 = n1;
333 while (readtoken() == TELIF) {
334 n2->nif.elsepart = (union node *)stalloc(sizeof (struct nif));
335 n2 = n2->nif.elsepart;
336 n2->type = NIF;
337 n2->nif.test = list(0, 0);
338 if (readtoken() != TTHEN)
339 synexpect(TTHEN);
340 n2->nif.ifpart = list(0, 0);
342 if (lasttoken == TELSE)
343 n2->nif.elsepart = list(0, 0);
344 else {
345 n2->nif.elsepart = NULL;
346 tokpushback++;
348 if (readtoken() != TFI)
349 synexpect(TFI);
350 checkkwd = 1;
351 break;
352 case TWHILE:
353 case TUNTIL: {
354 int got;
355 n1 = (union node *)stalloc(sizeof (struct nbinary));
356 n1->type = (lasttoken == TWHILE)? NWHILE : NUNTIL;
357 n1->nbinary.ch1 = list(0, 0);
358 if ((got=readtoken()) != TDO) {
359 TRACE(("expecting DO got %s %s\n", tokname[got], got == TWORD ? wordtext : ""));
360 synexpect(TDO);
362 n1->nbinary.ch2 = list(0, 0);
363 if (readtoken() != TDONE)
364 synexpect(TDONE);
365 checkkwd = 1;
366 break;
368 case TFOR:
369 if (readtoken() != TWORD || quoteflag || ! goodname(wordtext))
370 synerror("Bad for loop variable");
371 n1 = (union node *)stalloc(sizeof (struct nfor));
372 n1->type = NFOR;
373 n1->nfor.var = wordtext;
374 if (readtoken() == TWORD && ! quoteflag && equal(wordtext, "in")) {
375 app = &ap;
376 while (readtoken() == TWORD) {
377 n2 = (union node *)stalloc(sizeof (struct narg));
378 n2->type = NARG;
379 n2->narg.text = wordtext;
380 n2->narg.backquote = backquotelist;
381 *app = n2;
382 app = &n2->narg.next;
384 *app = NULL;
385 n1->nfor.args = ap;
386 if (lasttoken != TNL && lasttoken != TSEMI)
387 synexpect(-1);
388 } else {
389 static char argvars[5] = {CTLVAR, VSNORMAL|VSQUOTE,
390 '@', '=', '\0'};
391 n2 = (union node *)stalloc(sizeof (struct narg));
392 n2->type = NARG;
393 n2->narg.text = argvars;
394 n2->narg.backquote = NULL;
395 n2->narg.next = NULL;
396 n1->nfor.args = n2;
398 * Newline or semicolon here is optional (but note
399 * that the original Bourne shell only allowed NL).
401 if (lasttoken != TNL && lasttoken != TSEMI)
402 tokpushback++;
404 checkkwd = 2;
405 if ((t = readtoken()) == TDO)
406 t = TDONE;
407 else if (t == TBEGIN)
408 t = TEND;
409 else
410 synexpect(-1);
411 n1->nfor.body = list(0, 0);
412 if (readtoken() != t)
413 synexpect(t);
414 checkkwd = 1;
415 break;
416 case TCASE:
417 n1 = (union node *)stalloc(sizeof (struct ncase));
418 n1->type = NCASE;
419 if (readtoken() != TWORD)
420 synexpect(TWORD);
421 n1->ncase.expr = n2 = (union node *)stalloc(sizeof (struct narg));
422 n2->type = NARG;
423 n2->narg.text = wordtext;
424 n2->narg.backquote = backquotelist;
425 n2->narg.next = NULL;
426 while (readtoken() == TNL);
427 if (lasttoken != TWORD || ! equal(wordtext, "in"))
428 synerror("expecting \"in\"");
429 cpp = &n1->ncase.cases;
430 noalias = 1;
431 checkkwd = 2, readtoken();
432 do {
433 *cpp = cp = (union node *)stalloc(sizeof (struct nclist));
434 if (lasttoken == TLP)
435 readtoken();
436 cp->type = NCLIST;
437 app = &cp->nclist.pattern;
438 for (;;) {
439 *app = ap = (union node *)stalloc(sizeof (struct narg));
440 ap->type = NARG;
441 ap->narg.text = wordtext;
442 ap->narg.backquote = backquotelist;
443 if (checkkwd = 2, readtoken() != TPIPE)
444 break;
445 app = &ap->narg.next;
446 readtoken();
448 ap->narg.next = NULL;
449 noalias = 0;
450 if (lasttoken != TRP) {
451 synexpect(TRP);
453 cp->nclist.body = list(0, 0);
455 checkkwd = 2;
456 if ((t = readtoken()) != TESAC) {
457 if (t != TENDCASE) {
458 noalias = 0;
459 synexpect(TENDCASE);
460 } else {
461 noalias = 1;
462 checkkwd = 2;
463 readtoken();
466 cpp = &cp->nclist.next;
467 } while(lasttoken != TESAC);
468 noalias = 0;
469 *cpp = NULL;
470 checkkwd = 1;
471 break;
472 case TLP:
473 n1 = (union node *)stalloc(sizeof (struct nredir));
474 n1->type = NSUBSHELL;
475 n1->nredir.n = list(0, 0);
476 n1->nredir.redirect = NULL;
477 if (readtoken() != TRP)
478 synexpect(TRP);
479 checkkwd = 1;
480 break;
481 case TBEGIN:
482 n1 = list(0, 0);
483 if (readtoken() != TEND)
484 synexpect(TEND);
485 checkkwd = 1;
486 break;
487 /* Handle an empty command like other simple commands. */
488 case TSEMI:
490 * An empty command before a ; doesn't make much sense, and
491 * should certainly be disallowed in the case of `if ;'.
493 if (!redir)
494 synexpect(-1);
495 case TAND:
496 case TOR:
497 case TNL:
498 case TEOF:
499 case TWORD:
500 case TRP:
501 tokpushback++;
502 n1 = simplecmd(rpp, redir);
503 goto checkneg;
504 default:
505 synexpect(-1);
506 /* NOTREACHED */
509 /* Now check for redirection which may follow command */
510 while (readtoken() == TREDIR) {
511 *rpp = n2 = redirnode;
512 rpp = &n2->nfile.next;
513 parsefname();
515 tokpushback++;
516 *rpp = NULL;
517 if (redir) {
518 if (n1->type != NSUBSHELL) {
519 n2 = (union node *)stalloc(sizeof (struct nredir));
520 n2->type = NREDIR;
521 n2->nredir.n = n1;
522 n1 = n2;
524 n1->nredir.redirect = redir;
527 checkneg:
528 if (negate) {
529 TRACE(("negate command\n"));
530 n2 = (union node *)stalloc(sizeof (struct nnot));
531 n2->type = NNOT;
532 n2->nnot.com = n1;
533 return n2;
535 else
536 return n1;
540 STATIC union node *
541 simplecmd(union node **rpp, union node *redir)
543 union node *args, **app;
544 union node **orig_rpp = rpp;
545 union node *n = NULL, *n2;
546 int negate = 0;
548 /* If we don't have any redirections already, then we must reset */
549 /* rpp to be the address of the local redir variable. */
550 if (redir == 0)
551 rpp = &redir;
553 args = NULL;
554 app = &args;
556 * We save the incoming value, because we need this for shell
557 * functions. There can not be a redirect or an argument between
558 * the function name and the open parenthesis.
560 orig_rpp = rpp;
562 while (readtoken() == TNOT) {
563 TRACE(("simplcmd: TNOT recognized\n"));
564 negate = !negate;
566 tokpushback++;
568 for (;;) {
569 if (readtoken() == TWORD) {
570 n = (union node *)stalloc(sizeof (struct narg));
571 n->type = NARG;
572 n->narg.text = wordtext;
573 n->narg.backquote = backquotelist;
574 *app = n;
575 app = &n->narg.next;
576 } else if (lasttoken == TREDIR) {
577 *rpp = n = redirnode;
578 rpp = &n->nfile.next;
579 parsefname(); /* read name of redirection file */
580 } else if (lasttoken == TLP && app == &args->narg.next
581 && rpp == orig_rpp) {
582 /* We have a function */
583 if (readtoken() != TRP)
584 synexpect(TRP);
585 rmescapes(n->narg.text);
586 if (!goodname(n->narg.text))
587 synerror("Bad function name");
588 n->type = NDEFUN;
589 n->narg.next = command();
590 goto checkneg;
591 } else {
592 tokpushback++;
593 break;
596 *app = NULL;
597 *rpp = NULL;
598 n = (union node *)stalloc(sizeof (struct ncmd));
599 n->type = NCMD;
600 n->ncmd.backgnd = 0;
601 n->ncmd.args = args;
602 n->ncmd.redirect = redir;
604 checkneg:
605 if (negate) {
606 TRACE(("negate simplecmd\n"));
607 n2 = (union node *)stalloc(sizeof (struct nnot));
608 n2->type = NNOT;
609 n2->nnot.com = n;
610 return n2;
612 else
613 return n;
616 STATIC union node *
617 makename(void)
619 union node *n;
621 n = (union node *)stalloc(sizeof (struct narg));
622 n->type = NARG;
623 n->narg.next = NULL;
624 n->narg.text = wordtext;
625 n->narg.backquote = backquotelist;
626 return n;
629 void fixredir(union node *n, const char *text, int err)
631 TRACE(("Fix redir %s %d\n", text, err));
632 if (!err)
633 n->ndup.vname = NULL;
635 if (is_digit(text[0]) && text[1] == '\0')
636 n->ndup.dupfd = digit_val(text[0]);
637 else if (text[0] == '-' && text[1] == '\0')
638 n->ndup.dupfd = -1;
639 else {
641 if (err)
642 synerror("Bad fd number");
643 else
644 n->ndup.vname = makename();
649 STATIC void
650 parsefname(void)
652 union node *n = redirnode;
654 if (readtoken() != TWORD)
655 synexpect(-1);
656 if (n->type == NHERE) {
657 struct heredoc *here = heredoc;
658 struct heredoc *p;
659 int i;
661 if (quoteflag == 0)
662 n->type = NXHERE;
663 TRACE(("Here document %d\n", n->type));
664 if (here->striptabs) {
665 while (*wordtext == '\t')
666 wordtext++;
668 if (! noexpand(wordtext) || (i = strlen(wordtext)) == 0 || i > EOFMARKLEN)
669 synerror("Illegal eof marker for << redirection");
670 rmescapes(wordtext);
671 here->eofmark = wordtext;
672 here->next = NULL;
673 if (heredoclist == NULL)
674 heredoclist = here;
675 else {
676 for (p = heredoclist ; p->next ; p = p->next);
677 p->next = here;
679 } else if (n->type == NTOFD || n->type == NFROMFD) {
680 fixredir(n, wordtext, 0);
681 } else {
682 n->nfile.fname = makename();
688 * Input any here documents.
691 STATIC void
692 parseheredoc(void)
694 struct heredoc *here;
695 union node *n;
697 while (heredoclist) {
698 here = heredoclist;
699 heredoclist = here->next;
700 if (needprompt) {
701 setprompt(2);
702 needprompt = 0;
704 readtoken1(pgetc(), here->here->type == NHERE? SQSYNTAX : DQSYNTAX,
705 here->eofmark, here->striptabs);
706 n = (union node *)stalloc(sizeof (struct narg));
707 n->narg.type = NARG;
708 n->narg.next = NULL;
709 n->narg.text = wordtext;
710 n->narg.backquote = backquotelist;
711 here->here->nhere.doc = n;
715 STATIC int
716 peektoken(void)
718 int t;
720 t = readtoken();
721 tokpushback++;
722 return (t);
725 STATIC int
726 readtoken(void)
728 int t;
729 int savecheckkwd = checkkwd;
730 #ifdef DEBUG
731 int alreadyseen = tokpushback;
732 #endif
733 struct alias *ap;
735 top:
736 t = xxreadtoken();
738 if (checkkwd) {
740 * eat newlines
742 if (checkkwd == 2) {
743 checkkwd = 0;
744 while (t == TNL) {
745 parseheredoc();
746 t = xxreadtoken();
748 } else
749 checkkwd = 0;
751 * check for keywords and aliases
753 if (t == TWORD && !quoteflag)
755 const char *const *pp;
757 for (pp = parsekwd; *pp; pp++) {
758 if (**pp == *wordtext && equal(*pp, wordtext))
760 lasttoken = t = pp -
761 parsekwd + KWDOFFSET;
762 TRACE(("keyword %s recognized\n", tokname[t]));
763 goto out;
766 if(!noalias &&
767 (ap = lookupalias(wordtext, 1)) != NULL) {
768 pushstring(ap->val, strlen(ap->val), ap);
769 checkkwd = savecheckkwd;
770 goto top;
773 out:
774 checkkwd = (t == TNOT) ? savecheckkwd : 0;
776 TRACE(("%stoken %s %s\n", alreadyseen ? "reread " : "", tokname[t], t == TWORD ? wordtext : ""));
777 return (t);
782 * Read the next input token.
783 * If the token is a word, we set backquotelist to the list of cmds in
784 * backquotes. We set quoteflag to true if any part of the word was
785 * quoted.
786 * If the token is TREDIR, then we set redirnode to a structure containing
787 * the redirection.
788 * In all cases, the variable startlinno is set to the number of the line
789 * on which the token starts.
791 * [Change comment: here documents and internal procedures]
792 * [Readtoken shouldn't have any arguments. Perhaps we should make the
793 * word parsing code into a separate routine. In this case, readtoken
794 * doesn't need to have any internal procedures, but parseword does.
795 * We could also make parseoperator in essence the main routine, and
796 * have parseword (readtoken1?) handle both words and redirection.]
799 #define RETURN(token) return lasttoken = token
801 STATIC int
802 xxreadtoken(void)
804 int c;
806 if (tokpushback) {
807 tokpushback = 0;
808 return lasttoken;
810 if (needprompt) {
811 setprompt(2);
812 needprompt = 0;
814 startlinno = plinno;
815 for (;;) { /* until token or start of word found */
816 c = pgetc_macro();
817 if (c == ' ' || c == '\t')
818 continue; /* quick check for white space first */
819 switch (c) {
820 case ' ': case '\t':
821 continue;
822 case '#':
823 while ((c = pgetc()) != '\n' && c != PEOF);
824 pungetc();
825 continue;
826 case '\\':
827 if (pgetc() == '\n') {
828 startlinno = ++plinno;
829 if (doprompt)
830 setprompt(2);
831 else
832 setprompt(0);
833 continue;
835 pungetc();
836 goto breakloop;
837 case '\n':
838 plinno++;
839 needprompt = doprompt;
840 RETURN(TNL);
841 case PEOF:
842 RETURN(TEOF);
843 case '&':
844 if (pgetc() == '&')
845 RETURN(TAND);
846 pungetc();
847 RETURN(TBACKGND);
848 case '|':
849 if (pgetc() == '|')
850 RETURN(TOR);
851 pungetc();
852 RETURN(TPIPE);
853 case ';':
854 if (pgetc() == ';')
855 RETURN(TENDCASE);
856 pungetc();
857 RETURN(TSEMI);
858 case '(':
859 RETURN(TLP);
860 case ')':
861 RETURN(TRP);
862 default:
863 goto breakloop;
866 breakloop:
867 return readtoken1(c, BASESYNTAX, (char *)NULL, 0);
868 #undef RETURN
874 * If eofmark is NULL, read a word or a redirection symbol. If eofmark
875 * is not NULL, read a here document. In the latter case, eofmark is the
876 * word which marks the end of the document and striptabs is true if
877 * leading tabs should be stripped from the document. The argument firstc
878 * is the first character of the input token or document.
880 * Because C does not have internal subroutines, I have simulated them
881 * using goto's to implement the subroutine linkage. The following macros
882 * will run code that appears at the end of readtoken1.
885 #define CHECKEND() {goto checkend; checkend_return:;}
886 #define PARSEREDIR() {goto parseredir; parseredir_return:;}
887 #define PARSESUB() {goto parsesub; parsesub_return:;}
888 #define PARSEBACKQOLD() {oldstyle = 1; goto parsebackq; parsebackq_oldreturn:;}
889 #define PARSEBACKQNEW() {oldstyle = 0; goto parsebackq; parsebackq_newreturn:;}
890 #define PARSEARITH() {goto parsearith; parsearith_return:;}
893 * Keep track of nested doublequotes in dblquote and doublequotep.
894 * We use dblquote for the first 32 levels, and we expand to a malloc'ed
895 * region for levels above that. Usually we never need to malloc.
896 * This code assumes that an int is 32 bits. We don't use uint32_t,
897 * because the rest of the code does not.
899 #define ISDBLQUOTE() ((varnest < 32) ? (dblquote & (1 << varnest)) : \
900 (dblquotep[(varnest / 32) - 1] & (1 << (varnest % 32))))
902 #define SETDBLQUOTE() \
903 if (varnest < 32) \
904 dblquote |= (1 << varnest); \
905 else \
906 dblquotep[(varnest / 32) - 1] |= (1 << (varnest % 32))
908 #define CLRDBLQUOTE() \
909 if (varnest < 32) \
910 dblquote &= ~(1 << varnest); \
911 else \
912 dblquotep[(varnest / 32) - 1] &= ~(1 << (varnest % 32))
914 STATIC int
915 readtoken1(int firstc, char const *syn, char *eofmark, int striptabs)
917 char const * volatile syntax = syn;
918 int c = firstc;
919 char * volatile out;
920 int len;
921 char line[EOFMARKLEN + 1];
922 struct nodelist *bqlist;
923 volatile int quotef;
924 int * volatile dblquotep = NULL;
925 volatile size_t maxnest = 32;
926 volatile int dblquote;
927 volatile size_t varnest; /* levels of variables expansion */
928 volatile int arinest; /* levels of arithmetic expansion */
929 volatile int parenlevel; /* levels of parens in arithmetic */
930 volatile int oldstyle;
931 char const * volatile prevsyntax; /* syntax before arithmetic */
932 #ifdef __GNUC__
933 prevsyntax = NULL; /* XXX gcc4 */
934 #endif
936 startlinno = plinno;
937 dblquote = 0;
938 varnest = 0;
939 if (syntax == DQSYNTAX) {
940 SETDBLQUOTE();
942 quotef = 0;
943 bqlist = NULL;
944 arinest = 0;
945 parenlevel = 0;
947 STARTSTACKSTR(out);
948 loop: { /* for each line, until end of word */
949 #if ATTY
950 if (c == '\034' && doprompt
951 && attyset() && ! equal(termval(), "emacs")) {
952 attyline();
953 if (syntax == BASESYNTAX)
954 return readtoken();
955 c = pgetc();
956 goto loop;
958 #endif
959 CHECKEND(); /* set c to PEOF if at end of here document */
960 for (;;) { /* until end of line or end of word */
961 CHECKSTRSPACE(4, out); /* permit 4 calls to USTPUTC */
962 switch(syntax[c]) {
963 case CNL: /* '\n' */
964 if (syntax == BASESYNTAX)
965 goto endword; /* exit outer loop */
966 USTPUTC(c, out);
967 plinno++;
968 if (doprompt)
969 setprompt(2);
970 else
971 setprompt(0);
972 c = pgetc();
973 goto loop; /* continue outer loop */
974 case CWORD:
975 USTPUTC(c, out);
976 break;
977 case CCTL:
978 if (eofmark == NULL || ISDBLQUOTE())
979 USTPUTC(CTLESC, out);
980 USTPUTC(c, out);
981 break;
982 case CBACK: /* backslash */
983 c = pgetc();
984 if (c == PEOF) {
985 USTPUTC('\\', out);
986 pungetc();
987 break;
989 if (c == '\n') {
990 if (doprompt)
991 setprompt(2);
992 else
993 setprompt(0);
994 break;
996 quotef = 1;
997 if (ISDBLQUOTE() && c != '\\' &&
998 c != '`' && c != '$' &&
999 (c != '"' || eofmark != NULL))
1000 USTPUTC('\\', out);
1001 if (SQSYNTAX[c] == CCTL)
1002 USTPUTC(CTLESC, out);
1003 else if (eofmark == NULL) {
1004 USTPUTC(CTLQUOTEMARK, out);
1005 USTPUTC(c, out);
1006 if (varnest != 0)
1007 USTPUTC(CTLQUOTEEND, out);
1008 break;
1010 USTPUTC(c, out);
1011 break;
1012 case CSQUOTE:
1013 if (syntax != SQSYNTAX) {
1014 if (eofmark == NULL)
1015 USTPUTC(CTLQUOTEMARK, out);
1016 quotef = 1;
1017 syntax = SQSYNTAX;
1018 break;
1020 if (eofmark != NULL && arinest == 0 &&
1021 varnest == 0) {
1022 /* Ignore inside quoted here document */
1023 USTPUTC(c, out);
1024 break;
1026 /* End of single quotes... */
1027 if (arinest)
1028 syntax = ARISYNTAX;
1029 else {
1030 syntax = BASESYNTAX;
1031 if (varnest != 0)
1032 USTPUTC(CTLQUOTEEND, out);
1034 break;
1035 case CDQUOTE:
1036 if (eofmark != NULL && arinest == 0 &&
1037 varnest == 0) {
1038 /* Ignore inside here document */
1039 USTPUTC(c, out);
1040 break;
1042 quotef = 1;
1043 if (arinest) {
1044 if (ISDBLQUOTE()) {
1045 syntax = ARISYNTAX;
1046 CLRDBLQUOTE();
1047 } else {
1048 syntax = DQSYNTAX;
1049 SETDBLQUOTE();
1050 USTPUTC(CTLQUOTEMARK, out);
1052 break;
1054 if (eofmark != NULL)
1055 break;
1056 if (ISDBLQUOTE()) {
1057 if (varnest != 0)
1058 USTPUTC(CTLQUOTEEND, out);
1059 syntax = BASESYNTAX;
1060 CLRDBLQUOTE();
1061 } else {
1062 syntax = DQSYNTAX;
1063 SETDBLQUOTE();
1064 USTPUTC(CTLQUOTEMARK, out);
1066 break;
1067 case CVAR: /* '$' */
1068 PARSESUB(); /* parse substitution */
1069 break;
1070 case CENDVAR: /* CLOSEBRACE */
1071 if (varnest > 0 && !ISDBLQUOTE()) {
1072 varnest--;
1073 USTPUTC(CTLENDVAR, out);
1074 } else {
1075 USTPUTC(c, out);
1077 break;
1078 case CLP: /* '(' in arithmetic */
1079 parenlevel++;
1080 USTPUTC(c, out);
1081 break;
1082 case CRP: /* ')' in arithmetic */
1083 if (parenlevel > 0) {
1084 USTPUTC(c, out);
1085 --parenlevel;
1086 } else {
1087 if (pgetc() == ')') {
1088 if (--arinest == 0) {
1089 USTPUTC(CTLENDARI, out);
1090 syntax = prevsyntax;
1091 if (syntax == DQSYNTAX)
1092 SETDBLQUOTE();
1093 else
1094 CLRDBLQUOTE();
1095 } else
1096 USTPUTC(')', out);
1097 } else {
1099 * unbalanced parens
1100 * (don't 2nd guess - no error)
1102 pungetc();
1103 USTPUTC(')', out);
1106 break;
1107 case CBQUOTE: /* '`' */
1108 PARSEBACKQOLD();
1109 break;
1110 case CEOF:
1111 goto endword; /* exit outer loop */
1112 default:
1113 if (varnest == 0)
1114 goto endword; /* exit outer loop */
1115 USTPUTC(c, out);
1117 c = pgetc_macro();
1120 endword:
1121 if (syntax == ARISYNTAX)
1122 synerror("Missing '))'");
1123 if (syntax != BASESYNTAX && /* ! parsebackquote && */ eofmark == NULL)
1124 synerror("Unterminated quoted string");
1125 if (varnest != 0) {
1126 startlinno = plinno;
1127 /* { */
1128 synerror("Missing '}'");
1130 USTPUTC('\0', out);
1131 len = out - stackblock();
1132 out = stackblock();
1133 if (eofmark == NULL) {
1134 if ((c == '>' || c == '<')
1135 && quotef == 0
1136 && len <= 2
1137 && (*out == '\0' || is_digit(*out))) {
1138 PARSEREDIR();
1139 return lasttoken = TREDIR;
1140 } else {
1141 pungetc();
1144 quoteflag = quotef;
1145 backquotelist = bqlist;
1146 grabstackblock(len);
1147 wordtext = out;
1148 if (dblquotep != NULL)
1149 ckfree(dblquotep);
1150 return lasttoken = TWORD;
1151 /* end of readtoken routine */
1156 * Check to see whether we are at the end of the here document. When this
1157 * is called, c is set to the first character of the next input line. If
1158 * we are at the end of the here document, this routine sets the c to PEOF.
1161 checkend: {
1162 if (eofmark) {
1163 if (striptabs) {
1164 while (c == '\t')
1165 c = pgetc();
1167 if (c == *eofmark) {
1168 if (pfgets(line, sizeof line) != NULL) {
1169 char *p, *q;
1171 p = line;
1172 for (q = eofmark + 1 ; *q && *p == *q ; p++, q++);
1173 if ((*p == '\0' || *p == '\n') && *q == '\0') {
1174 c = PEOF;
1175 plinno++;
1176 needprompt = doprompt;
1177 } else {
1178 pushstring(line, strlen(line), NULL);
1183 goto checkend_return;
1188 * Parse a redirection operator. The variable "out" points to a string
1189 * specifying the fd to be redirected. The variable "c" contains the
1190 * first character of the redirection operator.
1193 parseredir: {
1194 char fd = *out;
1195 union node *np;
1197 np = (union node *)stalloc(sizeof (struct nfile));
1198 if (c == '>') {
1199 np->nfile.fd = 1;
1200 c = pgetc();
1201 if (c == '>')
1202 np->type = NAPPEND;
1203 else if (c == '|')
1204 np->type = NCLOBBER;
1205 else if (c == '&')
1206 np->type = NTOFD;
1207 else {
1208 np->type = NTO;
1209 pungetc();
1211 } else { /* c == '<' */
1212 np->nfile.fd = 0;
1213 switch (c = pgetc()) {
1214 case '<':
1215 if (sizeof (struct nfile) != sizeof (struct nhere)) {
1216 np = (union node *)stalloc(sizeof (struct nhere));
1217 np->nfile.fd = 0;
1219 np->type = NHERE;
1220 heredoc = (struct heredoc *)stalloc(sizeof (struct heredoc));
1221 heredoc->here = np;
1222 if ((c = pgetc()) == '-') {
1223 heredoc->striptabs = 1;
1224 } else {
1225 heredoc->striptabs = 0;
1226 pungetc();
1228 break;
1230 case '&':
1231 np->type = NFROMFD;
1232 break;
1234 case '>':
1235 np->type = NFROMTO;
1236 break;
1238 default:
1239 np->type = NFROM;
1240 pungetc();
1241 break;
1244 if (fd != '\0')
1245 np->nfile.fd = digit_val(fd);
1246 redirnode = np;
1247 goto parseredir_return;
1252 * Parse a substitution. At this point, we have read the dollar sign
1253 * and nothing else.
1256 parsesub: {
1257 int subtype;
1258 int typeloc;
1259 int flags;
1260 char *p;
1261 static const char types[] = "}-+?=";
1263 c = pgetc();
1264 if (c != '(' && c != OPENBRACE && !is_name(c) && !is_special(c)) {
1265 USTPUTC('$', out);
1266 pungetc();
1267 } else if (c == '(') { /* $(command) or $((arith)) */
1268 if (pgetc() == '(') {
1269 PARSEARITH();
1270 } else {
1271 pungetc();
1272 PARSEBACKQNEW();
1274 } else {
1275 USTPUTC(CTLVAR, out);
1276 typeloc = out - stackblock();
1277 USTPUTC(VSNORMAL, out);
1278 subtype = VSNORMAL;
1279 if (c == OPENBRACE) {
1280 c = pgetc();
1281 if (c == '#') {
1282 if ((c = pgetc()) == CLOSEBRACE)
1283 c = '#';
1284 else
1285 subtype = VSLENGTH;
1287 else
1288 subtype = 0;
1290 if (is_name(c)) {
1291 do {
1292 STPUTC(c, out);
1293 c = pgetc();
1294 } while (is_in_name(c));
1295 } else if (is_digit(c)) {
1296 do {
1297 USTPUTC(c, out);
1298 c = pgetc();
1299 } while (is_digit(c));
1301 else if (is_special(c)) {
1302 USTPUTC(c, out);
1303 c = pgetc();
1305 else
1306 badsub: synerror("Bad substitution");
1308 STPUTC('=', out);
1309 flags = 0;
1310 if (subtype == 0) {
1311 switch (c) {
1312 case ':':
1313 flags = VSNUL;
1314 c = pgetc();
1315 /*FALLTHROUGH*/
1316 default:
1317 p = strchr(types, c);
1318 if (p == NULL)
1319 goto badsub;
1320 subtype = p - types + VSNORMAL;
1321 break;
1322 case '%':
1323 case '#':
1325 int cc = c;
1326 subtype = c == '#' ? VSTRIMLEFT :
1327 VSTRIMRIGHT;
1328 c = pgetc();
1329 if (c == cc)
1330 subtype++;
1331 else
1332 pungetc();
1333 break;
1336 } else {
1337 pungetc();
1339 if (ISDBLQUOTE() || arinest)
1340 flags |= VSQUOTE;
1341 *(stackblock() + typeloc) = subtype | flags;
1342 if (subtype != VSNORMAL) {
1343 varnest++;
1344 if (varnest >= maxnest) {
1345 dblquotep = ckrealloc(dblquotep, maxnest / 8);
1346 dblquotep[(maxnest / 32) - 1] = 0;
1347 maxnest += 32;
1351 goto parsesub_return;
1356 * Called to parse command substitutions. Newstyle is set if the command
1357 * is enclosed inside $(...); nlpp is a pointer to the head of the linked
1358 * list of commands (passed by reference), and savelen is the number of
1359 * characters on the top of the stack which must be preserved.
1362 parsebackq: {
1363 struct nodelist **nlpp;
1364 int savepbq;
1365 union node *n;
1366 char *volatile str;
1367 struct jmploc jmploc;
1368 struct jmploc *volatile savehandler;
1369 int savelen;
1370 int saveprompt;
1372 savepbq = parsebackquote;
1373 if (setjmp(jmploc.loc)) {
1374 if (str)
1375 ckfree(str);
1376 parsebackquote = 0;
1377 handler = savehandler;
1378 longjmp(handler->loc, 1);
1380 INTOFF;
1381 str = NULL;
1382 savelen = out - stackblock();
1383 if (savelen > 0) {
1384 str = ckmalloc(savelen);
1385 memcpy(str, stackblock(), savelen);
1387 savehandler = handler;
1388 handler = &jmploc;
1389 INTON;
1390 if (oldstyle) {
1391 /* We must read until the closing backquote, giving special
1392 treatment to some slashes, and then push the string and
1393 reread it as input, interpreting it normally. */
1394 char *pout;
1395 int pc;
1396 int psavelen;
1397 char *pstr;
1400 STARTSTACKSTR(pout);
1401 for (;;) {
1402 if (needprompt) {
1403 setprompt(2);
1404 needprompt = 0;
1406 switch (pc = pgetc()) {
1407 case '`':
1408 goto done;
1410 case '\\':
1411 if ((pc = pgetc()) == '\n') {
1412 plinno++;
1413 if (doprompt)
1414 setprompt(2);
1415 else
1416 setprompt(0);
1418 * If eating a newline, avoid putting
1419 * the newline into the new character
1420 * stream (via the STPUTC after the
1421 * switch).
1423 continue;
1425 if (pc != '\\' && pc != '`' && pc != '$'
1426 && (!ISDBLQUOTE() || pc != '"'))
1427 STPUTC('\\', pout);
1428 break;
1430 case '\n':
1431 plinno++;
1432 needprompt = doprompt;
1433 break;
1435 case PEOF:
1436 startlinno = plinno;
1437 synerror("EOF in backquote substitution");
1438 break;
1440 default:
1441 break;
1443 STPUTC(pc, pout);
1445 done:
1446 STPUTC('\0', pout);
1447 psavelen = pout - stackblock();
1448 if (psavelen > 0) {
1449 pstr = grabstackstr(pout);
1450 setinputstring(pstr, 1);
1453 nlpp = &bqlist;
1454 while (*nlpp)
1455 nlpp = &(*nlpp)->next;
1456 *nlpp = (struct nodelist *)stalloc(sizeof (struct nodelist));
1457 (*nlpp)->next = NULL;
1458 parsebackquote = oldstyle;
1460 if (oldstyle) {
1461 saveprompt = doprompt;
1462 doprompt = 0;
1463 } else
1464 saveprompt = 0;
1466 n = list(0, oldstyle);
1468 if (oldstyle)
1469 doprompt = saveprompt;
1470 else {
1471 if (readtoken() != TRP)
1472 synexpect(TRP);
1475 (*nlpp)->n = n;
1476 if (oldstyle) {
1478 * Start reading from old file again, ignoring any pushed back
1479 * tokens left from the backquote parsing
1481 popfile();
1482 tokpushback = 0;
1484 while (stackblocksize() <= savelen)
1485 growstackblock();
1486 STARTSTACKSTR(out);
1487 if (str) {
1488 memcpy(out, str, savelen);
1489 STADJUST(savelen, out);
1490 INTOFF;
1491 ckfree(str);
1492 str = NULL;
1493 INTON;
1495 parsebackquote = savepbq;
1496 handler = savehandler;
1497 if (arinest || ISDBLQUOTE())
1498 USTPUTC(CTLBACKQ | CTLQUOTE, out);
1499 else
1500 USTPUTC(CTLBACKQ, out);
1501 if (oldstyle)
1502 goto parsebackq_oldreturn;
1503 else
1504 goto parsebackq_newreturn;
1508 * Parse an arithmetic expansion (indicate start of one and set state)
1510 parsearith: {
1512 if (++arinest == 1) {
1513 prevsyntax = syntax;
1514 syntax = ARISYNTAX;
1515 USTPUTC(CTLARI, out);
1516 if (ISDBLQUOTE())
1517 USTPUTC('"',out);
1518 else
1519 USTPUTC(' ',out);
1520 } else {
1522 * we collapse embedded arithmetic expansion to
1523 * parenthesis, which should be equivalent
1525 USTPUTC('(', out);
1527 goto parsearith_return;
1530 } /* end of readtoken */
1534 #ifdef mkinit
1535 RESET {
1536 tokpushback = 0;
1537 checkkwd = 0;
1539 #endif
1542 * Returns true if the text contains nothing to expand (no dollar signs
1543 * or backquotes).
1546 STATIC int
1547 noexpand(char *text)
1549 char *p;
1550 char c;
1552 p = text;
1553 while ((c = *p++) != '\0') {
1554 if (c == CTLQUOTEMARK)
1555 continue;
1556 if (c == CTLESC)
1557 p++;
1558 else if (BASESYNTAX[(int)c] == CCTL)
1559 return 0;
1561 return 1;
1566 * Return true if the argument is a legal variable name (a letter or
1567 * underscore followed by zero or more letters, underscores, and digits).
1571 goodname(char *name)
1573 char *p;
1575 p = name;
1576 if (! is_name(*p))
1577 return 0;
1578 while (*++p) {
1579 if (! is_in_name(*p))
1580 return 0;
1582 return 1;
1587 * Called when an unexpected token is read during the parse. The argument
1588 * is the token that is expected, or -1 if more than one type of token can
1589 * occur at this point.
1592 STATIC void
1593 synexpect(int token)
1595 char msg[64];
1597 if (token >= 0) {
1598 fmtstr(msg, 64, "%s unexpected (expecting %s)",
1599 tokname[lasttoken], tokname[token]);
1600 } else {
1601 fmtstr(msg, 64, "%s unexpected", tokname[lasttoken]);
1603 synerror(msg);
1604 /* NOTREACHED */
1608 STATIC void
1609 synerror(const char *msg)
1611 if (commandname)
1612 outfmt(&errout, "%s: %d: ", commandname, startlinno);
1613 else
1614 outfmt(&errout, "%s: ", getprogname());
1615 outfmt(&errout, "Syntax error: %s\n", msg);
1616 error((char *)NULL);
1617 /* NOTREACHED */
1620 STATIC void
1621 setprompt(int which)
1623 whichprompt = which;
1625 #ifndef SMALL
1626 if (!el)
1627 #endif
1628 out2str(getprompt(NULL));
1632 * called by editline -- any expansions to the prompt
1633 * should be added here.
1635 const char *
1636 getprompt(void *unused)
1638 switch (whichprompt) {
1639 case 0:
1640 return "";
1641 case 1:
1642 return ps1val();
1643 case 2:
1644 return ps2val();
1645 default:
1646 return "<internal prompt error>";