MFC:
[dragonfly.git] / bin / sh / parser.c
blob22a25d182949b3f9f80e6fd7d4ce2142a1a01f37
1 /*-
2 * Copyright (c) 1991, 1993
3 * The Regents of the University of California. All rights reserved.
5 * This code is derived from software contributed to Berkeley by
6 * Kenneth Almquist.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
36 * @(#)parser.c 8.7 (Berkeley) 5/16/95
37 * $FreeBSD: src/bin/sh/parser.c,v 1.58 2006/11/05 18:36:05 stefanf Exp $
38 * $DragonFly: src/bin/sh/parser.c,v 1.12 2007/01/18 17:03:18 corecode Exp $
41 #include <stdlib.h>
42 #include <unistd.h>
44 #include "shell.h"
45 #include "parser.h"
46 #include "nodes.h"
47 #include "expand.h" /* defines rmescapes() */
48 #include "syntax.h"
49 #include "options.h"
50 #include "input.h"
51 #include "output.h"
52 #include "var.h"
53 #include "error.h"
54 #include "memalloc.h"
55 #include "mystring.h"
56 #include "alias.h"
57 #include "show.h"
58 #include "eval.h"
59 #ifndef NO_HISTORY
60 #include "myhistedit.h"
61 #endif
64 * Shell command parser.
67 #define EOFMARKLEN 79
68 #define PROMPTLEN 128
70 /* values returned by readtoken */
71 #include "token.h"
75 struct heredoc {
76 struct heredoc *next; /* next here document in list */
77 union node *here; /* redirection node */
78 char *eofmark; /* string indicating end of input */
79 int striptabs; /* if set, strip leading tabs */
84 STATIC struct heredoc *heredoclist; /* list of here documents to read */
85 STATIC int parsebackquote; /* nonzero if we are inside backquotes */
86 STATIC int doprompt; /* if set, prompt the user */
87 STATIC int needprompt; /* true if interactive and at start of line */
88 STATIC int lasttoken; /* last token read */
89 MKINIT int tokpushback; /* last token pushed back */
90 STATIC char *wordtext; /* text of last word returned by readtoken */
92 * 1 == check for kwds
93 * 2 == also eat newlines
94 * 3 == check for TNOT
96 MKINIT int checkkwd;
97 STATIC struct nodelist *backquotelist;
98 STATIC union node *redirnode;
99 STATIC struct heredoc *heredoc;
100 STATIC int quoteflag; /* set if (part of) last token was quoted */
101 STATIC int startlinno; /* line # where last token started */
103 /* XXX When 'noaliases' is set to one, no alias expansion takes place. */
104 static int noaliases = 0;
107 STATIC union node *list(int);
108 STATIC union node *andor(void);
109 STATIC union node *pipeline(void);
110 STATIC union node *command(void);
111 STATIC union node *simplecmd(union node **, union node *);
112 STATIC union node *makename(void);
113 STATIC void parsefname(void);
114 STATIC void parseheredoc(void);
115 STATIC int peektoken(void);
116 STATIC int readtoken(void);
117 STATIC int xxreadtoken(void);
118 STATIC int readtoken1(int, char const *, char *, int);
119 STATIC int noexpand(char *);
120 STATIC void synexpect(int);
121 STATIC void synerror(const char *);
122 STATIC void setprompt(int);
126 * Read and parse a command. Returns NEOF on end of file. (NULL is a
127 * valid parse tree indicating a blank line.)
130 union node *
131 parsecmd(int interact)
133 int t;
135 tokpushback = 0;
136 doprompt = interact;
137 if (doprompt)
138 setprompt(1);
139 else
140 setprompt(0);
141 needprompt = 0;
142 t = readtoken();
143 if (t == TEOF)
144 return NEOF;
145 if (t == TNL)
146 return NULL;
147 tokpushback++;
148 return list(1);
152 STATIC union node *
153 list(int nlflag)
155 union node *n1, *n2, *n3;
156 int tok;
158 checkkwd = 2;
159 if (nlflag == 0 && tokendlist[peektoken()])
160 return NULL;
161 n1 = NULL;
162 for (;;) {
163 n2 = andor();
164 tok = readtoken();
165 if (tok == TBACKGND) {
166 if (n2->type == NCMD || n2->type == NPIPE) {
167 n2->ncmd.backgnd = 1;
168 } else if (n2->type == NREDIR) {
169 n2->type = NBACKGND;
170 } else {
171 n3 = (union node *)stalloc(sizeof (struct nredir));
172 n3->type = NBACKGND;
173 n3->nredir.n = n2;
174 n3->nredir.redirect = NULL;
175 n2 = n3;
178 if (n1 == NULL) {
179 n1 = n2;
181 else {
182 n3 = (union node *)stalloc(sizeof (struct nbinary));
183 n3->type = NSEMI;
184 n3->nbinary.ch1 = n1;
185 n3->nbinary.ch2 = n2;
186 n1 = n3;
188 switch (tok) {
189 case TBACKGND:
190 case TSEMI:
191 tok = readtoken();
192 /* FALLTHROUGH */
193 case TNL:
194 if (tok == TNL) {
195 parseheredoc();
196 if (nlflag)
197 return n1;
198 } else {
199 tokpushback++;
201 checkkwd = 2;
202 if (tokendlist[peektoken()])
203 return n1;
204 break;
205 case TEOF:
206 if (heredoclist)
207 parseheredoc();
208 else
209 pungetc(); /* push back EOF on input */
210 return n1;
211 default:
212 if (nlflag)
213 synexpect(-1);
214 tokpushback++;
215 return n1;
222 STATIC union node *
223 andor(void)
225 union node *n1, *n2, *n3;
226 int t;
228 n1 = pipeline();
229 for (;;) {
230 if ((t = readtoken()) == TAND) {
231 t = NAND;
232 } else if (t == TOR) {
233 t = NOR;
234 } else {
235 tokpushback++;
236 return n1;
238 n2 = pipeline();
239 n3 = (union node *)stalloc(sizeof (struct nbinary));
240 n3->type = t;
241 n3->nbinary.ch1 = n1;
242 n3->nbinary.ch2 = n2;
243 n1 = n3;
249 STATIC union node *
250 pipeline(void)
252 union node *n1, *n2, *pipenode;
253 struct nodelist *lp, *prev;
254 int negate;
256 negate = 0;
257 TRACE(("pipeline: entered\n"));
259 checkkwd = 3;
260 while (readtoken() == TNOT)
261 negate = !negate;
262 tokpushback++;
264 n1 = command();
265 if (readtoken() == TPIPE) {
266 pipenode = (union node *)stalloc(sizeof (struct npipe));
267 pipenode->type = NPIPE;
268 pipenode->npipe.backgnd = 0;
269 lp = (struct nodelist *)stalloc(sizeof (struct nodelist));
270 pipenode->npipe.cmdlist = lp;
271 lp->n = n1;
272 do {
273 int innernegate = 0;
275 checkkwd = 3;
276 while (readtoken() == TNOT)
277 innernegate = !innernegate;
278 tokpushback++;
280 prev = lp;
281 lp = (struct nodelist *)stalloc(sizeof (struct nodelist));
282 lp->n = command();
283 prev->next = lp;
285 if (innernegate) {
286 n2 = (union node *)stalloc(sizeof (struct nnot));
287 n2->type = NNOT;
288 n2->nnot.com = lp->n;
289 lp->n = n2;
291 } while (readtoken() == TPIPE);
292 lp->next = NULL;
293 n1 = pipenode;
295 tokpushback++;
296 if (negate) {
297 n2 = (union node *)stalloc(sizeof (struct nnot));
298 n2->type = NNOT;
299 n2->nnot.com = n1;
300 return n2;
301 } else
302 return n1;
307 STATIC union node *
308 command(void)
310 union node *n1, *n2;
311 union node *ap, **app;
312 union node *cp, **cpp;
313 union node *redir, **rpp;
314 int t;
316 checkkwd = 2;
317 redir = NULL;
318 n1 = NULL;
319 rpp = &redir;
321 /* Check for redirection which may precede command */
322 while (readtoken() == TREDIR) {
323 *rpp = n2 = redirnode;
324 rpp = &n2->nfile.next;
325 parsefname();
327 tokpushback++;
329 switch (readtoken()) {
330 case TIF:
331 n1 = (union node *)stalloc(sizeof (struct nif));
332 n1->type = NIF;
333 if ((n1->nif.test = list(0)) == NULL)
334 synexpect(-1);
335 if (readtoken() != TTHEN)
336 synexpect(TTHEN);
337 n1->nif.ifpart = list(0);
338 n2 = n1;
339 while (readtoken() == TELIF) {
340 n2->nif.elsepart = (union node *)stalloc(sizeof (struct nif));
341 n2 = n2->nif.elsepart;
342 n2->type = NIF;
343 if ((n2->nif.test = list(0)) == NULL)
344 synexpect(-1);
345 if (readtoken() != TTHEN)
346 synexpect(TTHEN);
347 n2->nif.ifpart = list(0);
349 if (lasttoken == TELSE)
350 n2->nif.elsepart = list(0);
351 else {
352 n2->nif.elsepart = NULL;
353 tokpushback++;
355 if (readtoken() != TFI)
356 synexpect(TFI);
357 checkkwd = 1;
358 break;
359 case TWHILE:
360 case TUNTIL: {
361 int got;
362 n1 = (union node *)stalloc(sizeof (struct nbinary));
363 n1->type = (lasttoken == TWHILE)? NWHILE : NUNTIL;
364 if ((n1->nbinary.ch1 = list(0)) == NULL)
365 synexpect(-1);
366 if ((got=readtoken()) != TDO) {
367 TRACE(("expecting DO got %s %s\n", tokname[got], got == TWORD ? wordtext : ""));
368 synexpect(TDO);
370 n1->nbinary.ch2 = list(0);
371 if (readtoken() != TDONE)
372 synexpect(TDONE);
373 checkkwd = 1;
374 break;
376 case TFOR:
377 if (readtoken() != TWORD || quoteflag || ! goodname(wordtext))
378 synerror("Bad for loop variable");
379 n1 = (union node *)stalloc(sizeof (struct nfor));
380 n1->type = NFOR;
381 n1->nfor.var = wordtext;
382 if (readtoken() == TWORD && ! quoteflag && equal(wordtext, "in")) {
383 app = &ap;
384 while (readtoken() == TWORD) {
385 n2 = (union node *)stalloc(sizeof (struct narg));
386 n2->type = NARG;
387 n2->narg.text = wordtext;
388 n2->narg.backquote = backquotelist;
389 *app = n2;
390 app = &n2->narg.next;
392 *app = NULL;
393 n1->nfor.args = ap;
394 if (lasttoken != TNL && lasttoken != TSEMI)
395 synexpect(-1);
396 } else {
397 static char argvars[5] = {
398 CTLVAR, VSNORMAL|VSQUOTE, '@', '=', '\0'
400 n2 = (union node *)stalloc(sizeof (struct narg));
401 n2->type = NARG;
402 n2->narg.text = argvars;
403 n2->narg.backquote = NULL;
404 n2->narg.next = NULL;
405 n1->nfor.args = n2;
407 * Newline or semicolon here is optional (but note
408 * that the original Bourne shell only allowed NL).
410 if (lasttoken != TNL && lasttoken != TSEMI)
411 tokpushback++;
413 checkkwd = 2;
414 if ((t = readtoken()) == TDO)
415 t = TDONE;
416 else if (t == TBEGIN)
417 t = TEND;
418 else
419 synexpect(-1);
420 n1->nfor.body = list(0);
421 if (readtoken() != t)
422 synexpect(t);
423 checkkwd = 1;
424 break;
425 case TCASE:
426 n1 = (union node *)stalloc(sizeof (struct ncase));
427 n1->type = NCASE;
428 if (readtoken() != TWORD)
429 synexpect(TWORD);
430 n1->ncase.expr = n2 = (union node *)stalloc(sizeof (struct narg));
431 n2->type = NARG;
432 n2->narg.text = wordtext;
433 n2->narg.backquote = backquotelist;
434 n2->narg.next = NULL;
435 while (readtoken() == TNL);
436 if (lasttoken != TWORD || ! equal(wordtext, "in"))
437 synerror("expecting \"in\"");
438 cpp = &n1->ncase.cases;
439 noaliases = 1; /* turn off alias expansion */
440 checkkwd = 2, readtoken();
441 while (lasttoken != TESAC) {
442 *cpp = cp = (union node *)stalloc(sizeof (struct nclist));
443 cp->type = NCLIST;
444 app = &cp->nclist.pattern;
445 if (lasttoken == TLP)
446 readtoken();
447 for (;;) {
448 *app = ap = (union node *)stalloc(sizeof (struct narg));
449 ap->type = NARG;
450 ap->narg.text = wordtext;
451 ap->narg.backquote = backquotelist;
452 if (checkkwd = 2, readtoken() != TPIPE)
453 break;
454 app = &ap->narg.next;
455 readtoken();
457 ap->narg.next = NULL;
458 if (lasttoken != TRP)
459 noaliases = 0, synexpect(TRP);
460 cp->nclist.body = list(0);
462 checkkwd = 2;
463 if ((t = readtoken()) != TESAC) {
464 if (t != TENDCASE)
465 noaliases = 0, synexpect(TENDCASE);
466 else
467 checkkwd = 2, readtoken();
469 cpp = &cp->nclist.next;
471 noaliases = 0; /* reset alias expansion */
472 *cpp = NULL;
473 checkkwd = 1;
474 break;
475 case TLP:
476 n1 = (union node *)stalloc(sizeof (struct nredir));
477 n1->type = NSUBSHELL;
478 n1->nredir.n = list(0);
479 n1->nredir.redirect = NULL;
480 if (readtoken() != TRP)
481 synexpect(TRP);
482 checkkwd = 1;
483 break;
484 case TBEGIN:
485 n1 = list(0);
486 if (readtoken() != TEND)
487 synexpect(TEND);
488 checkkwd = 1;
489 break;
490 /* Handle an empty command like other simple commands. */
491 case TSEMI:
492 case TAND:
493 case TOR:
495 * An empty command before a ; doesn't make much sense, and
496 * should certainly be disallowed in the case of `if ;'.
498 if (!redir)
499 synexpect(-1);
500 case TNL:
501 case TEOF:
502 case TWORD:
503 case TRP:
504 tokpushback++;
505 return simplecmd(rpp, redir);
506 default:
507 synexpect(-1);
510 /* Now check for redirection which may follow command */
511 while (readtoken() == TREDIR) {
512 *rpp = n2 = redirnode;
513 rpp = &n2->nfile.next;
514 parsefname();
516 tokpushback++;
517 *rpp = NULL;
518 if (redir) {
519 if (n1->type != NSUBSHELL) {
520 n2 = (union node *)stalloc(sizeof (struct nredir));
521 n2->type = NREDIR;
522 n2->nredir.n = n1;
523 n1 = n2;
525 n1->nredir.redirect = redir;
527 return n1;
531 STATIC union node *
532 simplecmd(union node **rpp, union node *redir)
534 union node *args, **app;
535 union node **orig_rpp = rpp;
536 union node *n = NULL;
538 /* If we don't have any redirections already, then we must reset */
539 /* rpp to be the address of the local redir variable. */
540 if (redir == 0)
541 rpp = &redir;
543 args = NULL;
544 app = &args;
546 * We save the incoming value, because we need this for shell
547 * functions. There can not be a redirect or an argument between
548 * the function name and the open parenthesis.
550 orig_rpp = rpp;
552 for (;;) {
553 if (readtoken() == TWORD) {
554 n = (union node *)stalloc(sizeof (struct narg));
555 n->type = NARG;
556 n->narg.text = wordtext;
557 n->narg.backquote = backquotelist;
558 *app = n;
559 app = &n->narg.next;
560 } else if (lasttoken == TREDIR) {
561 *rpp = n = redirnode;
562 rpp = &n->nfile.next;
563 parsefname(); /* read name of redirection file */
564 } else if (lasttoken == TLP && app == &args->narg.next
565 && rpp == orig_rpp) {
566 /* We have a function */
567 if (readtoken() != TRP)
568 synexpect(TRP);
569 #ifdef notdef
570 if (! goodname(n->narg.text))
571 synerror("Bad function name");
572 #endif
573 n->type = NDEFUN;
574 n->narg.next = command();
575 return n;
576 } else {
577 tokpushback++;
578 break;
581 *app = NULL;
582 *rpp = NULL;
583 n = (union node *)stalloc(sizeof (struct ncmd));
584 n->type = NCMD;
585 n->ncmd.backgnd = 0;
586 n->ncmd.args = args;
587 n->ncmd.redirect = redir;
588 return n;
591 STATIC union node *
592 makename(void)
594 union node *n;
596 n = (union node *)stalloc(sizeof (struct narg));
597 n->type = NARG;
598 n->narg.next = NULL;
599 n->narg.text = wordtext;
600 n->narg.backquote = backquotelist;
601 return n;
604 void
605 fixredir(union node *n, const char *text, int err)
607 TRACE(("Fix redir %s %d\n", text, err));
608 if (!err)
609 n->ndup.vname = NULL;
611 if (is_digit(text[0]) && text[1] == '\0')
612 n->ndup.dupfd = digit_val(text[0]);
613 else if (text[0] == '-' && text[1] == '\0')
614 n->ndup.dupfd = -1;
615 else {
617 if (err)
618 synerror("Bad fd number");
619 else
620 n->ndup.vname = makename();
625 STATIC void
626 parsefname(void)
628 union node *n = redirnode;
630 if (readtoken() != TWORD)
631 synexpect(-1);
632 if (n->type == NHERE) {
633 struct heredoc *here = heredoc;
634 struct heredoc *p;
635 int i;
637 if (quoteflag == 0)
638 n->type = NXHERE;
639 TRACE(("Here document %d\n", n->type));
640 if (here->striptabs) {
641 while (*wordtext == '\t')
642 wordtext++;
644 if (! noexpand(wordtext) || (i = strlen(wordtext)) == 0 || i > EOFMARKLEN)
645 synerror("Illegal eof marker for << redirection");
646 rmescapes(wordtext);
647 here->eofmark = wordtext;
648 here->next = NULL;
649 if (heredoclist == NULL)
650 heredoclist = here;
651 else {
652 for (p = heredoclist ; p->next ; p = p->next);
653 p->next = here;
655 } else if (n->type == NTOFD || n->type == NFROMFD) {
656 fixredir(n, wordtext, 0);
657 } else {
658 n->nfile.fname = makename();
664 * Input any here documents.
667 STATIC void
668 parseheredoc(void)
670 struct heredoc *here;
671 union node *n;
673 while (heredoclist) {
674 here = heredoclist;
675 heredoclist = here->next;
676 if (needprompt) {
677 setprompt(2);
678 needprompt = 0;
680 readtoken1(pgetc(), here->here->type == NHERE? SQSYNTAX : DQSYNTAX,
681 here->eofmark, here->striptabs);
682 n = (union node *)stalloc(sizeof (struct narg));
683 n->narg.type = NARG;
684 n->narg.next = NULL;
685 n->narg.text = wordtext;
686 n->narg.backquote = backquotelist;
687 here->here->nhere.doc = n;
691 STATIC int
692 peektoken(void)
694 int t;
696 t = readtoken();
697 tokpushback++;
698 return (t);
701 STATIC int
702 readtoken(void)
704 int t;
705 int savecheckkwd = checkkwd;
706 struct alias *ap;
707 #ifdef DEBUG
708 int alreadyseen = tokpushback;
709 #endif
711 top:
712 t = xxreadtoken();
714 if (checkkwd) {
716 * eat newlines
718 if (checkkwd > 1) {
719 checkkwd = 0;
720 while (t == TNL) {
721 parseheredoc();
722 t = xxreadtoken();
724 } else
725 checkkwd = 0;
727 * check for keywords and aliases
729 if (t == TWORD && !quoteflag)
731 const char * const *pp;
733 for (pp = parsekwd; *pp; pp++) {
734 if (**pp == *wordtext && equal(*pp, wordtext))
736 lasttoken = t = pp - parsekwd + KWDOFFSET;
737 TRACE(("keyword %s recognized\n", tokname[t]));
738 goto out;
741 if (noaliases == 0 &&
742 (ap = lookupalias(wordtext, 1)) != NULL) {
743 pushstring(ap->val, strlen(ap->val), ap);
744 checkkwd = savecheckkwd;
745 goto top;
748 out:
749 checkkwd = 0;
751 #ifdef DEBUG
752 if (!alreadyseen)
753 TRACE(("token %s %s\n", tokname[t], t == TWORD ? wordtext : ""));
754 else
755 TRACE(("reread token %s %s\n", tokname[t], t == TWORD ? wordtext : ""));
756 #endif
757 return (t);
762 * Read the next input token.
763 * If the token is a word, we set backquotelist to the list of cmds in
764 * backquotes. We set quoteflag to true if any part of the word was
765 * quoted.
766 * If the token is TREDIR, then we set redirnode to a structure containing
767 * the redirection.
768 * In all cases, the variable startlinno is set to the number of the line
769 * on which the token starts.
771 * [Change comment: here documents and internal procedures]
772 * [Readtoken shouldn't have any arguments. Perhaps we should make the
773 * word parsing code into a separate routine. In this case, readtoken
774 * doesn't need to have any internal procedures, but parseword does.
775 * We could also make parseoperator in essence the main routine, and
776 * have parseword (readtoken1?) handle both words and redirection.]
779 #define RETURN(token) return lasttoken = token
781 STATIC int
782 xxreadtoken(void)
784 int c;
786 if (tokpushback) {
787 tokpushback = 0;
788 return lasttoken;
790 if (needprompt) {
791 setprompt(2);
792 needprompt = 0;
794 startlinno = plinno;
795 for (;;) { /* until token or start of word found */
796 c = pgetc_macro();
797 if (c == ' ' || c == '\t')
798 continue; /* quick check for white space first */
799 switch (c) {
800 case ' ': case '\t':
801 continue;
802 case '#':
803 while ((c = pgetc()) != '\n' && c != PEOF);
804 pungetc();
805 continue;
806 case '\\':
807 if (pgetc() == '\n') {
808 startlinno = ++plinno;
809 if (doprompt)
810 setprompt(2);
811 else
812 setprompt(0);
813 continue;
815 pungetc();
816 goto breakloop;
817 case '\n':
818 plinno++;
819 needprompt = doprompt;
820 RETURN(TNL);
821 case PEOF:
822 RETURN(TEOF);
823 case '&':
824 if (pgetc() == '&')
825 RETURN(TAND);
826 pungetc();
827 RETURN(TBACKGND);
828 case '|':
829 if (pgetc() == '|')
830 RETURN(TOR);
831 pungetc();
832 RETURN(TPIPE);
833 case ';':
834 if (pgetc() == ';')
835 RETURN(TENDCASE);
836 pungetc();
837 RETURN(TSEMI);
838 case '(':
839 RETURN(TLP);
840 case ')':
841 RETURN(TRP);
842 case '!':
843 if (checkkwd == 3)
844 RETURN(TNOT);
845 /* else FALLTHROUGH */
846 default:
847 goto breakloop;
850 breakloop:
851 return readtoken1(c, BASESYNTAX, (char *)NULL, 0);
852 #undef RETURN
858 * If eofmark is NULL, read a word or a redirection symbol. If eofmark
859 * is not NULL, read a here document. In the latter case, eofmark is the
860 * word which marks the end of the document and striptabs is true if
861 * leading tabs should be stripped from the document. The argument firstc
862 * is the first character of the input token or document.
864 * Because C does not have internal subroutines, I have simulated them
865 * using goto's to implement the subroutine linkage. The following macros
866 * will run code that appears at the end of readtoken1.
869 #define CHECKEND() {goto checkend; checkend_return:;}
870 #define PARSEREDIR() {goto parseredir; parseredir_return:;}
871 #define PARSESUB() {goto parsesub; parsesub_return:;}
872 #define PARSEBACKQOLD() {oldstyle = 1; goto parsebackq; parsebackq_oldreturn:;}
873 #define PARSEBACKQNEW() {oldstyle = 0; goto parsebackq; parsebackq_newreturn:;}
874 #define PARSEARITH() {goto parsearith; parsearith_return:;}
876 STATIC int
877 readtoken1(int firstc, char const *syn, char *eofmark, int striptabs)
879 int c = firstc;
880 char const * volatile syntax = syn;
881 char * volatile out;
882 int len;
883 char line[EOFMARKLEN + 1];
884 struct nodelist *bqlist;
885 volatile int quotef;
886 volatile int dblquote;
887 volatile int varnest; /* levels of variables expansion */
888 volatile int arinest; /* levels of arithmetic expansion */
889 volatile int parenlevel; /* levels of parens in arithmetic */
890 volatile int oldstyle;
891 char const * volatile prevsyntax = NULL; /* syntax before arithmetic */
892 int synentry;
894 startlinno = plinno;
895 dblquote = 0;
896 if (syntax == DQSYNTAX)
897 dblquote = 1;
898 quotef = 0;
899 bqlist = NULL;
900 varnest = 0;
901 arinest = 0;
902 parenlevel = 0;
904 STARTSTACKSTR(out);
905 loop: { /* for each line, until end of word */
906 CHECKEND(); /* set c to PEOF if at end of here document */
907 for (;;) { /* until end of line or end of word */
908 CHECKSTRSPACE(3, out); /* permit 3 calls to USTPUTC */
910 synentry = syntax[c];
912 switch(synentry) {
913 case CNL: /* '\n' */
914 if (syntax == BASESYNTAX)
915 goto endword; /* exit outer loop */
916 USTPUTC(c, out);
917 plinno++;
918 if (doprompt)
919 setprompt(2);
920 else
921 setprompt(0);
922 c = pgetc();
923 goto loop; /* continue outer loop */
924 case CWORD:
925 USTPUTC(c, out);
926 break;
927 case CCTL:
928 if (eofmark == NULL || dblquote)
929 USTPUTC(CTLESC, out);
930 USTPUTC(c, out);
931 break;
932 case CBACK: /* backslash */
933 c = pgetc();
934 if (c == PEOF) {
935 USTPUTC('\\', out);
936 pungetc();
937 } else if (c == '\n') {
938 plinno++;
939 if (doprompt)
940 setprompt(2);
941 else
942 setprompt(0);
943 } else {
944 if (dblquote && c != '\\' &&
945 c != '`' && c != '$' &&
946 (c != '"' || eofmark != NULL))
947 USTPUTC('\\', out);
948 if (SQSYNTAX[c] == CCTL)
949 USTPUTC(CTLESC, out);
950 else if (eofmark == NULL)
951 USTPUTC(CTLQUOTEMARK, out);
952 USTPUTC(c, out);
953 quotef++;
955 break;
956 case CSQUOTE:
957 if (eofmark == NULL)
958 USTPUTC(CTLQUOTEMARK, out);
959 syntax = SQSYNTAX;
960 break;
961 case CDQUOTE:
962 if (eofmark == NULL)
963 USTPUTC(CTLQUOTEMARK, out);
964 syntax = DQSYNTAX;
965 dblquote = 1;
966 break;
967 case CENDQUOTE:
968 if (eofmark != NULL && arinest == 0 &&
969 varnest == 0) {
970 USTPUTC(c, out);
971 } else {
972 if (arinest) {
973 syntax = ARISYNTAX;
974 dblquote = 0;
975 } else if (eofmark == NULL) {
976 syntax = BASESYNTAX;
977 dblquote = 0;
979 quotef++;
981 break;
982 case CVAR: /* '$' */
983 PARSESUB(); /* parse substitution */
984 break;
985 case CENDVAR: /* '}' */
986 if (varnest > 0) {
987 varnest--;
988 USTPUTC(CTLENDVAR, out);
989 } else {
990 USTPUTC(c, out);
992 break;
993 case CLP: /* '(' in arithmetic */
994 parenlevel++;
995 USTPUTC(c, out);
996 break;
997 case CRP: /* ')' in arithmetic */
998 if (parenlevel > 0) {
999 USTPUTC(c, out);
1000 --parenlevel;
1001 } else {
1002 if (pgetc() == ')') {
1003 if (--arinest == 0) {
1004 USTPUTC(CTLENDARI, out);
1005 syntax = prevsyntax;
1006 if (syntax == DQSYNTAX)
1007 dblquote = 1;
1008 else
1009 dblquote = 0;
1010 } else
1011 USTPUTC(')', out);
1012 } else {
1014 * unbalanced parens
1015 * (don't 2nd guess - no error)
1017 pungetc();
1018 USTPUTC(')', out);
1021 break;
1022 case CBQUOTE: /* '`' */
1023 PARSEBACKQOLD();
1024 break;
1025 case CEOF:
1026 goto endword; /* exit outer loop */
1027 default:
1028 if (varnest == 0)
1029 goto endword; /* exit outer loop */
1030 USTPUTC(c, out);
1032 c = pgetc_macro();
1035 endword:
1036 if (syntax == ARISYNTAX)
1037 synerror("Missing '))'");
1038 if (syntax != BASESYNTAX && ! parsebackquote && eofmark == NULL)
1039 synerror("Unterminated quoted string");
1040 if (varnest != 0) {
1041 startlinno = plinno;
1042 synerror("Missing '}'");
1044 USTPUTC('\0', out);
1045 len = out - stackblock();
1046 out = stackblock();
1047 if (eofmark == NULL) {
1048 if ((c == '>' || c == '<')
1049 && quotef == 0
1050 && len <= 2
1051 && (*out == '\0' || is_digit(*out))) {
1052 PARSEREDIR();
1053 return lasttoken = TREDIR;
1054 } else {
1055 pungetc();
1058 quoteflag = quotef;
1059 backquotelist = bqlist;
1060 grabstackblock(len);
1061 wordtext = out;
1062 return lasttoken = TWORD;
1063 /* end of readtoken routine */
1068 * Check to see whether we are at the end of the here document. When this
1069 * is called, c is set to the first character of the next input line. If
1070 * we are at the end of the here document, this routine sets the c to PEOF.
1073 checkend: {
1074 if (eofmark) {
1075 if (striptabs) {
1076 while (c == '\t')
1077 c = pgetc();
1079 if (c == *eofmark) {
1080 if (pfgets(line, sizeof line) != NULL) {
1081 char *p, *q;
1083 p = line;
1084 for (q = eofmark + 1 ; *q && *p == *q ; p++, q++);
1085 if (*p == '\n' && *q == '\0') {
1086 c = PEOF;
1087 plinno++;
1088 needprompt = doprompt;
1089 } else {
1090 pushstring(line, strlen(line), NULL);
1095 goto checkend_return;
1100 * Parse a redirection operator. The variable "out" points to a string
1101 * specifying the fd to be redirected. The variable "c" contains the
1102 * first character of the redirection operator.
1105 parseredir: {
1106 char fd = *out;
1107 union node *np;
1109 np = (union node *)stalloc(sizeof (struct nfile));
1110 if (c == '>') {
1111 np->nfile.fd = 1;
1112 c = pgetc();
1113 if (c == '>')
1114 np->type = NAPPEND;
1115 else if (c == '&')
1116 np->type = NTOFD;
1117 else if (c == '|')
1118 np->type = NCLOBBER;
1119 else {
1120 np->type = NTO;
1121 pungetc();
1123 } else { /* c == '<' */
1124 np->nfile.fd = 0;
1125 c = pgetc();
1126 if (c == '<') {
1127 if (sizeof (struct nfile) != sizeof (struct nhere)) {
1128 np = (union node *)stalloc(sizeof (struct nhere));
1129 np->nfile.fd = 0;
1131 np->type = NHERE;
1132 heredoc = (struct heredoc *)stalloc(sizeof (struct heredoc));
1133 heredoc->here = np;
1134 if ((c = pgetc()) == '-') {
1135 heredoc->striptabs = 1;
1136 } else {
1137 heredoc->striptabs = 0;
1138 pungetc();
1140 } else if (c == '&')
1141 np->type = NFROMFD;
1142 else if (c == '>')
1143 np->type = NFROMTO;
1144 else {
1145 np->type = NFROM;
1146 pungetc();
1149 if (fd != '\0')
1150 np->nfile.fd = digit_val(fd);
1151 redirnode = np;
1152 goto parseredir_return;
1157 * Parse a substitution. At this point, we have read the dollar sign
1158 * and nothing else.
1161 parsesub: {
1162 int subtype;
1163 int typeloc;
1164 int flags;
1165 char *p;
1166 static const char types[] = "}-+?=";
1167 int bracketed_name = 0; /* used to handle ${[0-9]*} variables */
1169 c = pgetc();
1170 if (c != '(' && c != '{' && (is_eof(c) || !is_name(c)) &&
1171 !is_special(c)) {
1172 USTPUTC('$', out);
1173 pungetc();
1174 } else if (c == '(') { /* $(command) or $((arith)) */
1175 if (pgetc() == '(') {
1176 PARSEARITH();
1177 } else {
1178 pungetc();
1179 PARSEBACKQNEW();
1181 } else {
1182 USTPUTC(CTLVAR, out);
1183 typeloc = out - stackblock();
1184 USTPUTC(VSNORMAL, out);
1185 subtype = VSNORMAL;
1186 if (c == '{') {
1187 bracketed_name = 1;
1188 c = pgetc();
1189 if (c == '#') {
1190 if ((c = pgetc()) == '}')
1191 c = '#';
1192 else
1193 subtype = VSLENGTH;
1195 else
1196 subtype = 0;
1198 if (!is_eof(c) && is_name(c)) {
1199 do {
1200 STPUTC(c, out);
1201 c = pgetc();
1202 } while (!is_eof(c) && is_in_name(c));
1203 } else if (is_digit(c)) {
1204 if (bracketed_name) {
1205 do {
1206 STPUTC(c, out);
1207 c = pgetc();
1208 } while (is_digit(c));
1209 } else {
1210 STPUTC(c, out);
1211 c = pgetc();
1213 } else {
1214 if (! is_special(c)) {
1215 subtype = VSERROR;
1216 if (c == '}')
1217 pungetc();
1218 else
1219 USTPUTC(c, out);
1220 } else {
1221 USTPUTC(c, out);
1222 c = pgetc();
1225 flags = 0;
1226 if (subtype == 0) {
1227 switch (c) {
1228 case ':':
1229 flags = VSNUL;
1230 c = pgetc();
1231 /*FALLTHROUGH*/
1232 default:
1233 p = strchr(types, c);
1234 if (p == NULL) {
1235 if (flags == VSNUL)
1236 STPUTC(':', out);
1237 STPUTC(c, out);
1238 subtype = VSERROR;
1239 } else
1240 subtype = p - types + VSNORMAL;
1241 break;
1242 case '%':
1243 case '#':
1245 int cc = c;
1246 subtype = c == '#' ? VSTRIMLEFT :
1247 VSTRIMRIGHT;
1248 c = pgetc();
1249 if (c == cc)
1250 subtype++;
1251 else
1252 pungetc();
1253 break;
1256 } else if (subtype != VSERROR) {
1257 pungetc();
1259 STPUTC('=', out);
1260 if (subtype != VSLENGTH && (dblquote || arinest))
1261 flags |= VSQUOTE;
1262 *(stackblock() + typeloc) = subtype | flags;
1263 if (subtype != VSNORMAL)
1264 varnest++;
1266 goto parsesub_return;
1271 * Called to parse command substitutions. Newstyle is set if the command
1272 * is enclosed inside $(...); nlpp is a pointer to the head of the linked
1273 * list of commands (passed by reference), and savelen is the number of
1274 * characters on the top of the stack which must be preserved.
1277 parsebackq: {
1278 struct nodelist **nlpp;
1279 int savepbq;
1280 union node *n;
1281 char *volatile str;
1282 struct jmploc jmploc;
1283 struct jmploc *volatile savehandler;
1284 int savelen;
1285 volatile int saveprompt;
1287 savepbq = parsebackquote;
1288 if (setjmp(jmploc.loc)) {
1289 if (str)
1290 ckfree(str);
1291 parsebackquote = 0;
1292 handler = savehandler;
1293 longjmp(handler->loc, 1);
1295 INTOFF;
1296 str = NULL;
1297 savelen = out - stackblock();
1298 if (savelen > 0) {
1299 str = ckmalloc(savelen);
1300 memcpy(str, stackblock(), savelen);
1302 savehandler = handler;
1303 handler = &jmploc;
1304 INTON;
1305 if (oldstyle) {
1306 /* We must read until the closing backquote, giving special
1307 treatment to some slashes, and then push the string and
1308 reread it as input, interpreting it normally. */
1309 char *pout;
1310 int pc;
1311 int psavelen;
1312 char *pstr;
1315 STARTSTACKSTR(pout);
1316 for (;;) {
1317 if (needprompt) {
1318 setprompt(2);
1319 needprompt = 0;
1321 switch (pc = pgetc()) {
1322 case '`':
1323 goto done;
1325 case '\\':
1326 if ((pc = pgetc()) == '\n') {
1327 plinno++;
1328 if (doprompt)
1329 setprompt(2);
1330 else
1331 setprompt(0);
1333 * If eating a newline, avoid putting
1334 * the newline into the new character
1335 * stream (via the STPUTC after the
1336 * switch).
1338 continue;
1340 if (pc != '\\' && pc != '`' && pc != '$'
1341 && (!dblquote || pc != '"'))
1342 STPUTC('\\', pout);
1343 break;
1345 case '\n':
1346 plinno++;
1347 needprompt = doprompt;
1348 break;
1350 case PEOF:
1351 startlinno = plinno;
1352 synerror("EOF in backquote substitution");
1353 break;
1355 default:
1356 break;
1358 STPUTC(pc, pout);
1360 done:
1361 STPUTC('\0', pout);
1362 psavelen = pout - stackblock();
1363 if (psavelen > 0) {
1364 pstr = ckmalloc(psavelen);
1365 memcpy(pstr, stackblock(), psavelen);
1366 setinputstring(pstr, 1);
1369 nlpp = &bqlist;
1370 while (*nlpp)
1371 nlpp = &(*nlpp)->next;
1372 *nlpp = (struct nodelist *)stalloc(sizeof (struct nodelist));
1373 (*nlpp)->next = NULL;
1374 parsebackquote = oldstyle;
1376 if (oldstyle) {
1377 saveprompt = doprompt;
1378 doprompt = 0;
1381 n = list(0);
1383 if (oldstyle)
1384 doprompt = saveprompt;
1385 else {
1386 if (readtoken() != TRP)
1387 synexpect(TRP);
1390 (*nlpp)->n = n;
1391 if (oldstyle) {
1393 * Start reading from old file again, ignoring any pushed back
1394 * tokens left from the backquote parsing
1396 popfile();
1397 tokpushback = 0;
1399 while (stackblocksize() <= savelen)
1400 growstackblock();
1401 STARTSTACKSTR(out);
1402 if (str) {
1403 memcpy(out, str, savelen);
1404 STADJUST(savelen, out);
1405 INTOFF;
1406 ckfree(str);
1407 str = NULL;
1408 INTON;
1410 parsebackquote = savepbq;
1411 handler = savehandler;
1412 if (arinest || dblquote)
1413 USTPUTC(CTLBACKQ | CTLQUOTE, out);
1414 else
1415 USTPUTC(CTLBACKQ, out);
1416 if (oldstyle)
1417 goto parsebackq_oldreturn;
1418 else
1419 goto parsebackq_newreturn;
1423 * Parse an arithmetic expansion (indicate start of one and set state)
1425 parsearith: {
1427 if (++arinest == 1) {
1428 prevsyntax = syntax;
1429 syntax = ARISYNTAX;
1430 USTPUTC(CTLARI, out);
1431 if (dblquote)
1432 USTPUTC('"',out);
1433 else
1434 USTPUTC(' ',out);
1435 } else {
1437 * we collapse embedded arithmetic expansion to
1438 * parenthesis, which should be equivalent
1440 USTPUTC('(', out);
1442 goto parsearith_return;
1445 } /* end of readtoken */
1449 #ifdef mkinit
1450 RESET {
1451 tokpushback = 0;
1452 checkkwd = 0;
1454 #endif
1457 * Returns true if the text contains nothing to expand (no dollar signs
1458 * or backquotes).
1461 STATIC int
1462 noexpand(char *text)
1464 char *p;
1465 char c;
1467 p = text;
1468 while ((c = *p++) != '\0') {
1469 if ( c == CTLQUOTEMARK)
1470 continue;
1471 if (c == CTLESC)
1472 p++;
1473 else if (BASESYNTAX[(int)c] == CCTL)
1474 return 0;
1476 return 1;
1481 * Return true if the argument is a legal variable name (a letter or
1482 * underscore followed by zero or more letters, underscores, and digits).
1486 goodname(char *name)
1488 char *p;
1490 p = name;
1491 if (! is_name(*p))
1492 return 0;
1493 while (*++p) {
1494 if (! is_in_name(*p))
1495 return 0;
1497 return 1;
1502 * Called when an unexpected token is read during the parse. The argument
1503 * is the token that is expected, or -1 if more than one type of token can
1504 * occur at this point.
1507 STATIC void
1508 synexpect(int token)
1510 char msg[64];
1512 if (token >= 0) {
1513 fmtstr(msg, 64, "%s unexpected (expecting %s)",
1514 tokname[lasttoken], tokname[token]);
1515 } else {
1516 fmtstr(msg, 64, "%s unexpected", tokname[lasttoken]);
1518 synerror(msg);
1522 STATIC void
1523 synerror(const char *msg)
1525 if (commandname)
1526 outfmt(&errout, "%s: %d: ", commandname, startlinno);
1527 outfmt(&errout, "Syntax error: %s\n", msg);
1528 error((char *)NULL);
1531 STATIC void
1532 setprompt(int which)
1534 whichprompt = which;
1536 #ifndef NO_HISTORY
1537 if (!el)
1538 #endif
1539 out2str(getprompt(NULL));
1543 * called by editline -- any expansions to the prompt
1544 * should be added here.
1546 const char *
1547 getprompt(void *unused __unused)
1549 static char ps[PROMPTLEN];
1550 const char *fmt;
1551 int i, j, trim;
1554 * Select prompt format.
1556 switch (whichprompt) {
1557 case 0:
1558 fmt = "";
1559 break;
1560 case 1:
1561 fmt = ps1val();
1562 break;
1563 case 2:
1564 fmt = ps2val();
1565 break;
1566 default:
1567 return "<internal prompt error>";
1571 * Format prompt string.
1573 for (i = 0; (i < 127) && (*fmt != '\0'); i++, fmt++)
1574 if (*fmt == '\\')
1575 switch (*++fmt) {
1578 * Hostname.
1580 * \h specifies just the local hostname,
1581 * \H specifies fully-qualified hostname.
1583 case 'h':
1584 case 'H':
1585 ps[i] = '\0';
1586 gethostname(&ps[i], PROMPTLEN - i);
1587 /* Skip to end of hostname. */
1588 trim = (*fmt == 'h') ? '.' : '\0';
1589 while ((ps[i+1] != '\0') && (ps[i+1] != trim))
1590 i++;
1591 break;
1594 * Working directory.
1596 * \W specifies just the final component,
1597 * \w specifies the entire path.
1599 case 'W':
1600 case 'w':
1601 ps[i] = '\0';
1602 getcwd(&ps[i], PROMPTLEN - i);
1603 if (*fmt == 'W') {
1604 /* Final path component only. */
1605 trim = 1;
1606 for (j = i; ps[j] != '\0'; j++)
1607 if (ps[j] == '/')
1608 trim = j + 1;
1609 memmove(&ps[i], &ps[trim],
1610 j - trim + 1);
1612 /* Skip to end of path. */
1613 while (ps[i + 1] != '\0')
1614 i++;
1615 break;
1618 * Superuser status.
1620 * '$' for normal users, '#' for root.
1622 case '$':
1623 ps[i] = (geteuid() != 0) ? '$' : '#';
1624 break;
1627 * A literal \.
1629 case '\\':
1630 ps[i] = '\\';
1631 break;
1634 * Emit unrecognized formats verbatim.
1636 default:
1637 ps[i++] = '\\';
1638 ps[i] = *fmt;
1639 break;
1641 else
1642 ps[i] = *fmt;
1643 ps[i] = '\0';
1644 return (ps);