changelog for 0.9.1
[posh.git] / lex.c
blobe34abbf9b0e3e247128a611674dbdbfc036c74b9
1 /*
2 * lexical analysis and source input
3 */
5 #include "sh.h"
6 #include <ctype.h>
8 /*
9 * states while lexing word
11 #define SBASE 0 /* outside any lexical constructs */
12 #define SWORD 1 /* implicit quoting for substitute() */
13 #define SSQUOTE 2 /* inside '' */
14 #define SDQUOTE 3 /* inside "" */
15 #define SBRACE 4 /* inside ${} */
16 #define SCSPAREN 5 /* inside $() */
17 #define SBQUOTE 6 /* inside `` */
18 #define SASPAREN 7 /* inside $(( )) */
19 #define SHEREDELIM 8 /* parsing <<,<<- delimiter */
20 #define SHEREDQUOTE 9 /* parsing " in <<,<<- delimiter */
21 #define STBRACE 10 /* parsing ${..[#%]..} */
22 #define SQBRACE 11 /* inside "${}" */
25 /* Structure to keep track of the lexing state and the various pieces of info
26 * needed for each particular state. */
27 typedef struct lex_state Lex_state;
28 struct lex_state {
29 int ls_state;
30 union {
31 /* $(...) */
32 struct scsparen_info {
33 int nparen; /* count open parenthesis */
34 int csstate; /* XXX remove */
35 #define ls_scsparen ls_info.u_scsparen
36 } u_scsparen;
38 /* $((...)) */
39 struct sasparen_info {
40 int nparen; /* count open parenthesis */
41 int start; /* marks start of $(( in output str */
42 #define ls_sasparen ls_info.u_sasparen
43 } u_sasparen;
45 /* ((...)) */
46 struct sletparen_info {
47 int nparen; /* count open parenthesis */
48 #define ls_sletparen ls_info.u_sletparen
49 } u_sletparen;
51 /* `...` */
52 struct sbquote_info {
53 int indquotes; /* true if in double quotes: "`...`" */
54 #define ls_sbquote ls_info.u_sbquote
55 } u_sbquote;
57 /* =(...) */
58 struct sletarray_info {
59 int nparen; /* count open parentheses */
60 #define ls_sletarray ls_info.u_sletarray
61 } u_sletarray;
63 /* ADELIM */
64 struct sadelim_info {
65 unsigned char nparen; /* count open parentheses */
66 #define SADELIM_BASH 0
67 #define SADELIM_MAKE 1
68 unsigned char style;
69 unsigned char delimiter;
70 unsigned char num;
71 unsigned char flags; /* ofs. into sadelim_flags[] */
72 #define ls_sadelim ls_info.u_sadelim
73 } u_sadelim;
75 Lex_state *base; /* used to point to next state block */
76 } ls_info;
79 typedef struct State_info State_info;
80 struct State_info {
81 Lex_state *base;
82 Lex_state *end;
85 static void readhere(struct ioword *);
86 static int getsc__(void);
87 static void getsc_line(Source *);
88 static int getsc_bn(void);
89 static char *get_brace_var(XString *, char *);
90 static int arraysub(char **);
91 static const char *ungetsc(int);
92 static void gethere(void);
93 static Lex_state *push_state_(State_info *, Lex_state *);
94 static Lex_state *pop_state_(State_info *, Lex_state *);
96 static int backslash_skip;
97 static int ignore_backslash_newline;
99 /* optimized getsc_bn() */
100 #define getsc() (*source->str != '\0' && *source->str != '\\' \
101 && !backslash_skip && !(source->flags & SF_FIRST) \
102 ? *source->str++ : getsc_bn())
103 /* optimised getsc__() */
104 #define getsc_() ((*source->str != '\0') && !(source->flags & SF_FIRST) \
105 ? *source->str++ : getsc__())
107 #define STATE_BSIZE 32
109 #define PUSH_STATE(s) do { \
110 if (++statep == state_info.end) \
111 statep = push_state_(&state_info, statep); \
112 state = statep->ls_state = (s); \
113 } while (0)
115 #define POP_STATE() do { \
116 if (--statep == state_info.base) \
117 statep = pop_state_(&state_info, statep); \
118 state = statep->ls_state; \
119 } while (0)
122 * Lexical analyzer
124 * tokens are not regular expressions, they are LL(1).
125 * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
126 * hence the state stack.
130 yylex(int cf)
132 Lex_state states[STATE_BSIZE], *statep;
133 State_info state_info;
134 int c, state;
135 XString ws; /* expandable output word */
136 char *wp; /* output word pointer */
137 char *sp, *dp;
138 int c2;
140 Again:
141 states[0].ls_state = -1;
142 states[0].ls_info.base = NULL;
143 statep = &states[1];
144 state_info.base = states;
145 state_info.end = &states[STATE_BSIZE];
147 Xinit(ws, wp, 64, ATEMP);
149 backslash_skip = 0;
150 ignore_backslash_newline = 0;
152 if (cf&ONEWORD)
153 state = SWORD;
154 else { /* normal lexing */
155 state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
156 while ((c = getsc()) == ' ' || c == '\t')
158 if (c == '#') {
159 ignore_backslash_newline++;
160 while ((c = getsc()) != '\0' && c != '\n')
162 ignore_backslash_newline--;
164 ungetsc(c);
166 if (source->flags & SF_ALIAS) { /* trailing ' ' in alias definition */
167 source->flags &= ~SF_ALIAS;
170 /* Initial state: one of SBASE SHEREDELIM SWORD SASPAREN */
171 statep->ls_state = state;
173 /* collect non-special or quoted characters to form word */
174 while (!((c = getsc()) == 0
175 || ((state == SBASE || state == SHEREDELIM)
176 && ctype(c, C_LEX1))))
178 Xcheck(ws, wp);
179 switch (state) {
180 case SBASE:
181 if (c == '[' && (cf & (VARASN|ARRAYVAR))) {
182 *wp = EOS; /* temporary */
183 if (is_wdvarname(Xstring(ws, wp), FALSE)) {
184 char *p, *tmp;
186 if (arraysub(&tmp)) {
187 *wp++ = CHAR;
188 *wp++ = c;
189 for (p = tmp; *p; ) {
190 Xcheck(ws, wp);
191 *wp++ = CHAR;
192 *wp++ = *p++;
194 afree(tmp, ATEMP);
195 break;
196 } else {
197 Source *s;
199 s = pushs(SREREAD,
200 source->areap);
201 s->start = s->str =
202 s->u.freeme = tmp;
203 s->next = source;
204 source = s;
207 *wp++ = CHAR;
208 *wp++ = c;
209 break;
211 /* fall through.. */
212 Sbase2: /* doesn't include *(...|...) pattern (*+?@!) */
213 switch (c) {
214 case '\\':
215 getsc_qchar:
216 c = getsc();
217 if (c) /* trailing \ is lost */
218 *wp++ = QCHAR, *wp++ = c;
219 break;
220 case '\'':
221 open_ssquote:
222 *wp++ = OQUOTE;
223 ignore_backslash_newline++;
224 PUSH_STATE(SSQUOTE);
225 break;
226 case '"':
227 open_sdquote:
228 *wp++ = OQUOTE;
229 PUSH_STATE(SDQUOTE);
230 break;
231 default:
232 goto Subst;
234 break;
236 Subst:
237 switch (c) {
238 case '\\':
239 c = getsc();
240 switch (c) {
241 case '"':
242 if ((cf & HEREDOC))
243 goto heredocquote;
244 /* FALLTHROUGH */
245 case '\\':
246 case '$': case '`':
247 *wp++ = QCHAR, *wp++ = c;
248 break;
249 default:
250 heredocquote:
251 Xcheck(ws, wp);
252 if (c) { /* trailing \ is lost */
253 *wp++ = CHAR, *wp++ = '\\';
254 *wp++ = CHAR, *wp++ = c;
256 break;
258 break;
259 case '$':
260 subst_dollar:
261 c = getsc();
262 if (c == '(') /*)*/ {
263 c = getsc();
264 if (c == '(') /*)*/ {
265 PUSH_STATE(SASPAREN);
266 statep->ls_sasparen.nparen = 2;
267 statep->ls_sasparen.start =
268 Xsavepos(ws, wp);
269 *wp++ = EXPRSUB;
270 } else {
271 ungetsc(c);
272 PUSH_STATE(SCSPAREN);
273 statep->ls_scsparen.nparen = 1;
274 statep->ls_scsparen.csstate = 0;
275 *wp++ = COMSUB;
277 } else if (c == '{') /*}*/ {
278 *wp++ = OSUBST;
279 *wp++ = '{'; /*}*/
280 wp = get_brace_var(&ws, wp);
281 c = getsc();
282 /* If this is a trim operation,
283 * treat (,|,) specially in STBRACE.
285 if (c == '#' || c == '%') {
286 ungetsc(c);
287 PUSH_STATE(STBRACE);
288 } else {
289 ungetsc(c);
290 if (state == SDQUOTE)
291 PUSH_STATE(SQBRACE);
292 else
293 PUSH_STATE(SBRACE);
295 } else if (isalpha(c) || c=='_') {
296 *wp++ = OSUBST;
297 *wp++ = 'X';
298 do {
299 Xcheck(ws, wp);
300 *wp++ = c;
301 c = getsc();
302 } while (isalnum(c) || c=='_');
303 *wp++ = '\0';
304 *wp++ = CSUBST;
305 *wp++ = 'X';
306 ungetsc(c);
307 } else if (isdigit(c) || ctype(c, C_VAR1)) {
309 Xcheck(ws, wp);
310 *wp++ = OSUBST;
311 *wp++ = 'X';
312 *wp++ = c;
313 *wp++ = '\0';
314 *wp++ = CSUBST;
315 *wp++ = 'X';
316 } else {
317 *wp++ = CHAR, *wp++ = '$';
318 ungetsc(c);
320 break;
321 case '`':
322 subst_gravis:
323 PUSH_STATE(SBQUOTE);
324 *wp++ = COMSUB;
325 statep->ls_sbquote.indquotes = 0;
326 Lex_state *s = statep;
327 Lex_state *base = state_info.base;
328 while (1) {
329 for (; s != base; s--) {
330 if (s->ls_state == SDQUOTE) {
331 statep->ls_sbquote.indquotes = 1;
332 break;
335 if (s != base)
336 break;
337 if (!(s = s->ls_info.base))
338 break;
339 base = s-- - STATE_BSIZE;
341 break;
342 default:
343 store_char:
344 *wp++ = CHAR, *wp++ = c;
346 break;
348 case SSQUOTE:
349 if (c == '\'') {
350 POP_STATE();
351 *wp++ = CQUOTE;
352 ignore_backslash_newline--;
353 } else
354 *wp++ = QCHAR, *wp++ = c;
355 break;
357 case SDQUOTE:
358 if (c == '"') {
359 POP_STATE();
360 *wp++ = CQUOTE;
361 } else
362 goto Subst;
363 break;
365 case SCSPAREN: /* $( .. ) */
366 /* todo: deal with $(...) quoting properly
367 * kludge to partly fake quoting inside $(..): doesn't
368 * really work because nested $(..) or ${..} inside
369 * double quotes aren't dealt with.
371 switch (statep->ls_scsparen.csstate) {
372 case 0: /* normal */
373 switch (c) {
374 case '(':
375 statep->ls_scsparen.nparen++;
376 break;
377 case ')':
378 statep->ls_scsparen.nparen--;
379 break;
380 case '\\':
381 statep->ls_scsparen.csstate = 1;
382 break;
383 case '"':
384 statep->ls_scsparen.csstate = 2;
385 break;
386 case '\'':
387 statep->ls_scsparen.csstate = 4;
388 ignore_backslash_newline++;
389 break;
391 break;
393 case 1: /* backslash in normal mode */
394 case 3: /* backslash in double quotes */
395 --statep->ls_scsparen.csstate;
396 break;
398 case 2: /* double quotes */
399 if (c == '"')
400 statep->ls_scsparen.csstate = 0;
401 else if (c == '\\')
402 statep->ls_scsparen.csstate = 3;
403 break;
405 case 4: /* single quotes */
406 if (c == '\'') {
407 statep->ls_scsparen.csstate = 0;
408 ignore_backslash_newline--;
410 break;
412 if (statep->ls_scsparen.nparen == 0) {
413 POP_STATE();
414 *wp++ = 0; /* end of COMSUB */
415 } else
416 *wp++ = c;
417 break;
419 case SASPAREN: /* $(( .. )) */
420 /* todo: deal with $((...); (...)) properly */
421 /* XXX should nest using existing state machine
422 * (embed "..", $(...), etc.) */
423 if (c == '(')
424 statep->ls_sasparen.nparen++;
425 else if (c == ')') {
426 statep->ls_sasparen.nparen--;
427 if (statep->ls_sasparen.nparen == 1) {
428 /*(*/
429 if ((c2 = getsc()) == ')') {
430 POP_STATE();
431 *wp++ = 0; /* end of EXPRSUB */
432 break;
433 } else {
434 char *s;
436 ungetsc(c2);
437 /* mismatched parenthesis -
438 * assume we were really
439 * parsing a $(..) expression
441 s = Xrestpos(ws, wp,
442 statep->ls_sasparen.start);
443 memmove(s + 1, s, wp - s);
444 *s++ = COMSUB;
445 *s = '('; /*)*/
446 wp++;
447 statep->ls_scsparen.nparen = 1;
448 statep->ls_scsparen.csstate = 0;
449 state = statep->ls_state =
450 SCSPAREN;
454 *wp++ = c;
455 break;
457 case SBRACE:
458 if (c == '\'')
459 goto open_ssquote;
460 /* FALLTHROUGH */
461 case SQBRACE:
462 if (c == '"')
463 goto open_sdquote;
464 else if (c == '\\')
465 goto getsc_qchar;
466 else if (c == '$')
467 goto subst_dollar;
468 else if (c == '`')
469 goto subst_gravis;
470 else if (c != /*{*/ '}')
471 goto store_char;
472 POP_STATE();
473 *wp++ = CSUBST;
474 *wp++ = /*{*/ '}';
475 break;
477 case STBRACE:
478 /* Same as SBRACE, except (,|,) treated specially */
479 /*{*/
480 if (c == '}') {
481 POP_STATE();
482 *wp++ = CSUBST;
483 *wp++ = /*{*/ '}';
484 } else
485 goto Sbase2;
486 break;
488 case SBQUOTE:
489 if (c == '`') {
490 *wp++ = 0;
491 POP_STATE();
492 } else if (c == '\\') {
493 switch (c = getsc()) {
494 case '\\':
495 case '$': case '`':
496 *wp++ = c;
497 break;
498 case '"':
499 if (statep->ls_sbquote.indquotes) {
500 *wp++ = c;
501 break;
503 /* fall through.. */
504 default:
505 if (c) { /* trailing \ is lost */
506 *wp++ = '\\';
507 *wp++ = c;
509 break;
511 } else
512 *wp++ = c;
513 break;
515 case SWORD: /* ONEWORD */
516 goto Subst;
518 #ifdef KSH
519 case SLETPAREN: /* LETEXPR: (( ... )) */
520 /*(*/
521 if (c == ')') {
522 if (statep->ls_sletparen.nparen > 0)
523 --statep->ls_sletparen.nparen;
524 /*(*/
525 else if ((c2 = getsc()) == ')') {
526 c = 0;
527 *wp++ = CQUOTE;
528 goto Done;
529 } else
530 ungetsc(c2);
531 } else if (c == '(')
532 /* parenthesis inside quotes and backslashes
533 * are lost, but at&t ksh doesn't count them
534 * either
536 ++statep->ls_sletparen.nparen;
537 goto Sbase2;
538 #endif /* KSH */
540 case SHEREDELIM: /* <<,<<- delimiter */
541 /* XXX chuck this state (and the next) - use
542 * the existing states ($ and \`..` should be
543 * stripped of their specialness after the
544 * fact).
546 /* here delimiters need a special case since
547 * $ and `..` are not to be treated specially
549 if (c == '\\') {
550 c = getsc();
551 if (c) { /* trailing \ is lost */
552 *wp++ = QCHAR;
553 *wp++ = c;
555 } else if (c == '\'') {
556 PUSH_STATE(SSQUOTE);
557 *wp++ = OQUOTE;
558 ignore_backslash_newline++;
559 } else if (c == '"') {
560 state = statep->ls_state = SHEREDQUOTE;
561 *wp++ = OQUOTE;
562 } else {
563 *wp++ = CHAR;
564 *wp++ = c;
566 break;
568 case SHEREDQUOTE: /* " in <<,<<- delimiter */
569 if (c == '"') {
570 *wp++ = CQUOTE;
571 state = statep->ls_state = SHEREDELIM;
572 } else {
573 if (c == '\\') {
574 switch (c = getsc()) {
575 case '\\': case '"':
576 case '$': case '`':
577 break;
578 default:
579 if (c) { /* trailing \ lost */
580 *wp++ = CHAR;
581 *wp++ = '\\';
583 break;
586 *wp++ = CHAR;
587 *wp++ = c;
589 break;
593 Done:
594 Xcheck(ws, wp);
595 if (statep != &states[1])
596 /* XXX figure out what is missing */
597 yyerror("no closing quote\n");
599 /* This done to avoid tests for SHEREDELIM wherever SBASE tested */
600 if (state == SHEREDELIM)
601 state = SBASE;
603 dp = Xstring(ws, wp);
604 if ((c == '<' || c == '>') && state == SBASE
605 && ((c2 = Xlength(ws, wp)) == 0
606 || (c2 == 2 && dp[0] == CHAR && isdigit(dp[1]))))
608 struct ioword *iop =
609 (struct ioword *) alloc(sizeof(*iop), ATEMP);
611 if (c2 == 2)
612 iop->unit = dp[1] - '0';
613 else
614 iop->unit = c == '>'; /* 0 for <, 1 for > */
616 c2 = getsc();
617 /* <<, >>, <> are ok, >< is not */
618 if (c == c2 || (c == '<' && c2 == '>')) {
619 iop->flag = c == c2 ?
620 (c == '>' ? IOCAT : IOHERE) : IORDWR;
621 if (iop->flag == IOHERE) {
622 if ((c2 = getsc()) == '-')
623 iop->flag |= IOSKIP;
624 else
625 ungetsc(c2);
627 } else if (c2 == '&')
628 iop->flag = IODUP | (c == '<' ? IORDUP : 0);
629 else {
630 iop->flag = c == '>' ? IOWRITE : IOREAD;
631 if (c == '>' && c2 == '|')
632 iop->flag |= IOCLOB;
633 else
634 ungetsc(c2);
637 iop->name = NULL;
638 iop->delim = NULL;
639 iop->heredoc = NULL;
640 Xfree(ws, wp); /* free word */
641 yylval.iop = iop;
642 return (REDIR);
645 if (wp == dp && state == SBASE) {
646 Xfree(ws, wp); /* free word */
647 /* no word, process LEX1 character */
648 switch (c) {
649 default:
650 return c;
652 case '|':
653 case '&':
654 case ';':
655 if ((c2 = getsc()) == c)
656 c = (c == ';') ? BREAK :
657 (c == '|') ? LOGOR :
658 (c == '&') ? LOGAND :
659 YYERRCODE;
660 #ifdef KSH
661 else if (c == '|' && c2 == '&')
662 c = COPROC;
663 #endif /* KSH */
664 else
665 ungetsc(c2);
666 return c;
668 case '\n':
669 gethere();
670 if (cf & CONTIN)
671 goto Again;
672 return c;
674 case '(': /*)*/
675 #ifdef KSH
676 if ((c2 = getsc()) == '(') /*)*/
677 /* XXX need to handle ((...); (...)) */
678 c = MDPAREN;
679 else
680 ungetsc(c2);
681 #endif /* KSH */
682 return c;
683 /*(*/
684 case ')':
685 return c;
689 *wp++ = EOS; /* terminate word */
690 yylval.cp = Xclose(ws, wp);
691 if (state == SWORD) /* ONEWORD? */
692 return (LWORD);
693 ungetsc(c); /* unget terminator */
695 /* copy word to unprefixed string ident */
696 sp = yylval.cp;
697 dp = ident;
698 if ((cf & HEREDELIM) && (sp[1] == '<'))
699 while (dp < ident+IDENT) {
700 if ((c = *sp++) == CHAR)
701 *dp++ = *sp++;
702 else if ((c != OQUOTE) && (c != CQUOTE))
703 break;
705 else
706 while (dp < ident+IDENT && (c = *sp++) == CHAR)
707 *dp++ = *sp++;
708 /* Make sure the ident array stays '\0' padded */
709 memset(dp, 0, (ident+IDENT) - dp + 1);
710 if (c != EOS)
711 *ident = '\0'; /* word is not unquoted */
713 if (*ident != '\0' && (cf&(KEYWORD|ALIAS))) {
714 struct tbl *p;
716 /* { */
717 if ((cf & KEYWORD) && (p = transitional_tsearch(&keywords.root, ident))
718 && (!(cf & ESACONLY) || p->val.i == ESAC || p->val.i == '}'))
720 afree(yylval.cp, ATEMP);
721 return p->val.i;
723 if ((cf & ALIAS) && (p = transitional_tsearch(&aliases.root, ident))
724 && (p->flag & ISSET))
726 Source *s;
728 for (s = source; s->type == SALIAS; s = s->next)
729 if (s->u.tblp == p)
730 return LWORD;
731 /* push alias expansion */
732 s = pushs(SALIAS, source->areap);
733 s->start = s->str = p->val.s;
734 s->u.tblp = p;
735 s->next = source;
736 source = s;
737 afree(yylval.cp, ATEMP);
738 goto Again;
742 return (LWORD);
745 static void
746 gethere(void)
748 struct ioword **p;
750 for (p = heres; p < herep; p++)
751 readhere(*p);
752 herep = heres;
756 * read "<<word" text into temp file
759 static void
760 readhere(struct ioword *iop)
762 int c;
763 char *volatile eof;
764 char *eofp;
765 int skiptabs;
766 XString xs;
767 char *xp;
768 int xpos;
770 eof = evalstr(iop->delim, 0);
772 if (!(iop->flag & IOEVAL))
773 ignore_backslash_newline++;
775 Xinit(xs, xp, 256, ATEMP);
777 for (;;) {
778 eofp = eof;
779 skiptabs = iop->flag & IOSKIP;
780 xpos = Xsavepos(xs, xp);
781 while ((c = getsc()) != 0) {
782 if (skiptabs) {
783 if (c == '\t')
784 continue;
785 skiptabs = 0;
787 if (c != *eofp)
788 break;
789 Xcheck(xs, xp);
790 Xput(xs, xp, c);
791 eofp++;
793 /* Allow EOF here so commands with out trailing newlines
794 * will work (eg, ksh -c '...', $(...), etc).
796 if (*eofp == '\0' && (c == 0 || c == '\n')) {
797 xp = Xrestpos(xs, xp, xpos);
798 break;
800 ungetsc(c);
801 while ((c = getsc()) != '\n') {
802 if (c == 0)
803 yyerror("here document `%s' unclosed\n", eof);
804 Xcheck(xs, xp);
805 Xput(xs, xp, c);
807 Xcheck(xs, xp);
808 Xput(xs, xp, c);
810 Xput(xs, xp, '\0');
811 iop->heredoc = Xclose(xs, xp);
813 if (!(iop->flag & IOEVAL))
814 ignore_backslash_newline--;
817 void
818 yyerror(const char *fmt, ...)
820 va_list va;
822 /* pop aliases and re-reads */
823 while (source->type == SALIAS || source->type == SREREAD)
824 source = source->next;
825 source->str = null; /* zap pending input */
827 error_prefix(TRUE);
828 SH_VA_START(va, fmt);
829 shf_vfprintf(shl_out, fmt, va);
830 va_end(va);
831 errorf(null);
835 * input for yylex with alias expansion
838 Source *
839 pushs(int type, Area *areap)
841 Source *s;
843 s = alloc(sizeof(Source), areap);
844 s->type = type;
845 s->str = null;
846 s->start = NULL;
847 s->line = 0;
848 s->errline = 0;
849 s->file = NULL;
850 s->flags = 0;
851 s->next = NULL;
852 s->areap = areap;
853 if (type == SFILE || type == SSTDIN) {
854 char *dummy;
855 Xinit(s->xs, dummy, 256, s->areap);
856 } else
857 memset(&s->xs, 0, sizeof(s->xs));
858 return (s);
861 static int
862 getsc__()
864 Source *s = source;
865 int c;
867 getsc_again:
868 while ((c = *s->str++) == 0) {
869 s->str = NULL; /* return 0 for EOF by default */
870 switch (s->type) {
871 case SEOF:
872 s->str = null;
873 return 0;
875 case SSTDIN:
876 case SFILE:
877 getsc_line(s);
878 break;
880 case SWSTR:
881 break;
883 case SSTRING:
884 break;
886 case SWORDS:
887 s->start = s->str = *s->u.strv++;
888 s->type = SWORDSEP;
889 break;
891 case SWORDSEP:
892 if (*s->u.strv == NULL) {
893 s->start = s->str = "\n";
894 s->type = SEOF;
895 } else {
896 s->start = s->str = " ";
897 s->type = SWORDS;
899 break;
901 case SALIAS:
902 if (s->flags & SF_ALIASEND) {
903 /* pass on an unused SF_ALIAS flag */
904 source = s->next;
905 source->flags |= s->flags & SF_ALIAS;
906 s = source;
907 } else if (*s->u.tblp->val.s
908 && isspace(strchr(s->u.tblp->val.s, 0)[-1]))
910 source = s = s->next; /* pop source stack */
911 /* Note that this alias ended with a space,
912 * enabling alias expansion on the following
913 * word.
915 s->flags |= SF_ALIAS;
916 } else {
917 /* At this point, we need to keep the current
918 * alias in the source list so recursive
919 * aliases can be detected and we also need
920 * to return the next character. Do this
921 * by temporarily popping the alias to get
922 * the next character and then put it back
923 * in the source list with the SF_ALIASEND
924 * flag set.
926 source = s->next; /* pop source stack */
927 source->flags |= s->flags & SF_ALIAS;
928 c = getsc__();
929 if (c) {
930 s->flags |= SF_ALIASEND;
931 s->ugbuf[0] = c; s->ugbuf[1] = '\0';
932 s->start = s->str = s->ugbuf;
933 s->next = source;
934 source = s;
935 } else {
936 s = source;
937 /* avoid reading eof twice */
938 s->str = NULL;
939 break;
942 continue;
944 case SREREAD:
945 if (s->start != s->ugbuf) /* yuck */
946 afree(s->u.freeme, ATEMP);
947 source = s = s->next;
948 continue;
950 if (s->str == NULL) {
951 s->type = SEOF;
952 s->start = s->str = null;
953 return '\0';
955 if (s->flags & SF_ECHO) {
956 shf_puts(s->str, shl_out);
957 shf_flush(shl_out);
960 /* check for UTF-8 byte order mark */
961 if (s->flags & SF_FIRST) {
962 s->flags &= ~SF_FIRST;
963 if (((unsigned char)c == 0xEF) &&
964 (((const unsigned char *)(s->str))[0] == 0xBB) &&
965 (((const unsigned char *)(s->str))[1] == 0xBF)) {
966 s->str += 2;
967 /* UTFMODE = 1; */
968 goto getsc_again;
971 return (c);
974 static void
975 getsc_line(Source *s)
977 char *xp = Xstring(s->xs, xp);
978 int interactive = Flag(FTALKING) && s->type == SSTDIN;
979 /* int have_tty = interactive && (s->flags & SF_TTY); */
981 /* Done here to ensure nothing odd happens when a timeout occurs */
982 XcheckN(s->xs, xp, LINE);
983 *xp = '\0';
984 s->start = s->str = xp;
986 #ifdef KSH
987 if (have_tty && ksh_tmout) {
988 ksh_tmout_state = TMOUT_READING;
989 alarm(ksh_tmout);
991 #endif /* KSH */
992 #ifdef EDIT
993 if (have_tty && (0
994 # ifdef VI
995 || Flag(FVI)
996 # endif /* VI */
997 # ifdef EMACS
998 || Flag(FEMACS) || Flag(FGMACS)
999 # endif /* EMACS */
1002 int nread;
1004 nread = x_read(xp, LINE);
1005 if (nread < 0) /* read error */
1006 nread = 0;
1007 xp[nread] = '\0';
1008 xp += nread;
1009 } else
1010 #endif /* EDIT */
1012 if (interactive) {
1013 pprompt(prompt, 0);
1014 } else
1015 s->line++;
1017 while (1) {
1018 char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
1020 if (!p && shf_error(s->u.shf)
1021 && shf_errno(s->u.shf) == EINTR)
1023 shf_clearerr(s->u.shf);
1024 if (trap)
1025 runtraps(0);
1026 continue;
1028 if (!p || (xp = p, xp[-1] == '\n'))
1029 break;
1030 /* double buffer size */
1031 xp++; /* move past null so doubling works... */
1032 XcheckN(s->xs, xp, Xlength(s->xs, xp));
1033 xp--; /* ...and move back again */
1035 /* flush any unwanted input so other programs/builtins
1036 * can read it. Not very optimal, but less error prone
1037 * than flushing else where, dealing with redirections,
1038 * etc..
1039 * todo: reduce size of shf buffer (~128?) if SSTDIN
1041 if (s->type == SSTDIN)
1042 shf_flush(s->u.shf);
1044 /* XXX: temporary kludge to restore source after a
1045 * trap may have been executed.
1047 source = s;
1048 #ifdef KSH
1049 if (have_tty && ksh_tmout) {
1050 ksh_tmout_state = TMOUT_EXECUTING;
1051 alarm(0);
1053 #endif /* KSH */
1054 s->start = s->str = Xstring(s->xs, xp);
1055 strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
1056 /* Note: if input is all nulls, this is not eof */
1057 if (Xlength(s->xs, xp) == 0) { /* EOF */
1058 if (s->type == SFILE)
1059 shf_fdclose(s->u.shf);
1060 s->str = NULL;
1061 } else if (interactive) {
1062 #ifdef HISTORY
1063 char *p = Xstring(s->xs, xp);
1064 if (cur_prompt == PS1)
1065 while (*p && ctype(*p, C_IFS) && ctype(*p, C_IFSWS))
1066 p++;
1067 if (*p) {
1068 # ifdef EASY_HISTORY
1069 if (cur_prompt == PS2)
1070 histappend(Xstring(s->xs, xp), 1);
1071 else
1072 # endif /* EASY_HISTORY */
1074 s->line++;
1075 histsave(s->line, s->str, 1);
1078 #endif /* HISTORY */
1080 if (interactive)
1081 set_prompt(PS2, NULL);
1084 void
1085 set_prompt(int to, Source UNUSED(*s))
1087 cur_prompt = to;
1089 switch (to) {
1090 case PS1: /* command */
1091 #ifdef KSH
1092 /* Substitute ! and !! here, before substitutions are done
1093 * so ! in expanded variables are not expanded.
1094 * NOTE: this is not what at&t ksh does (it does it after
1095 * substitutions, POSIX doesn't say which is to be done.
1098 struct shf *shf;
1099 char *ps1;
1100 Area *saved_atemp;
1102 ps1 = str_val(global("PS1"));
1103 shf = shf_sopen((char *) 0, strlen(ps1) * 2,
1104 SHF_WR | SHF_DYNAMIC, (struct shf *) 0);
1105 while (*ps1) {
1106 if (*ps1 != '!' || *++ps1 == '!')
1107 shf_putchar(*ps1++, shf);
1108 else
1109 shf_fprintf(shf, "%d",
1110 s ? s->line + 1 : 0);
1112 ps1 = shf_sclose(shf);
1113 saved_atemp = ATEMP;
1114 newenv(E_ERRH);
1115 if (sigsetjmp(e->jbuf, 0)) {
1116 prompt = safe_prompt;
1117 /* Don't print an error - assume it has already
1118 * been printed. Reason is we may have forked
1119 * to run a command and the child may be
1120 * unwinding its stack through this code as it
1121 * exits.
1123 } else
1124 prompt = str_save(substitute(ps1, 0),
1125 saved_atemp);
1126 quitenv();
1128 #else /* KSH */
1129 prompt = str_val(global("PS1"));
1130 #endif /* KSH */
1131 break;
1133 case PS2: /* command continuation */
1134 prompt = str_val(global("PS2"));
1135 break;
1139 /* See also related routine, promptlen() in edit.c */
1140 void
1141 pprompt(cp, ntruncate)
1142 const char *cp;
1143 int ntruncate;
1145 #if 0
1146 char nbuf[32];
1147 int c;
1149 while (*cp != 0) {
1150 if (*cp != '!')
1151 c = *cp++;
1152 else if (*++cp == '!')
1153 c = *cp++;
1154 else {
1155 int len;
1156 char *p;
1158 shf_snprintf(p = nbuf, sizeof(nbuf), "%d",
1159 source->line + 1);
1160 len = strlen(nbuf);
1161 if (ntruncate) {
1162 if (ntruncate >= len) {
1163 ntruncate -= len;
1164 continue;
1166 p += ntruncate;
1167 len -= ntruncate;
1168 ntruncate = 0;
1170 shf_write(p, len, shl_out);
1171 continue;
1173 if (ntruncate)
1174 --ntruncate;
1175 else
1176 shf_putc(c, shl_out);
1178 #endif /* 0 */
1179 shf_puts(cp + ntruncate, shl_out);
1180 shf_flush(shl_out);
1183 /* Read the variable part of a ${...} expression (ie, up to but not including
1184 * the :[-+?=#%] or close-brace.
1186 static char *
1187 get_brace_var(XString *wsp, char *wp)
1189 enum parse_state {
1190 PS_INITIAL, PS_SAW_HASH, PS_IDENT,
1191 PS_NUMBER, PS_VAR1, PS_END
1193 state;
1194 char c;
1196 state = PS_INITIAL;
1197 while (1) {
1198 c = getsc();
1199 /* State machine to figure out where the variable part ends. */
1200 switch (state) {
1201 case PS_INITIAL:
1202 if (c == '#') {
1203 state = PS_SAW_HASH;
1204 break;
1206 /* fall through.. */
1207 case PS_SAW_HASH:
1208 if (isalpha(c) || c=='_')
1209 state = PS_IDENT;
1210 else if (isdigit(c))
1211 state = PS_NUMBER;
1212 else if (ctype(c, C_VAR1))
1213 state = PS_VAR1;
1214 else
1215 state = PS_END;
1216 break;
1217 case PS_IDENT:
1218 if (!isalnum(c) && c!='_') {
1219 state = PS_END;
1220 if (c == '[') {
1221 char *tmp, *p;
1223 if (!arraysub(&tmp))
1224 yyerror("missing ]\n");
1225 *wp++ = c;
1226 for (p = tmp; *p; ) {
1227 Xcheck(*wsp, wp);
1228 *wp++ = *p++;
1230 afree(tmp, ATEMP);
1231 c = getsc(); /* the ] */
1234 break;
1235 case PS_NUMBER:
1236 if (!isdigit(c))
1237 state = PS_END;
1238 break;
1239 case PS_VAR1:
1240 state = PS_END;
1241 break;
1242 case PS_END: /* keep gcc happy */
1243 break;
1245 if (state == PS_END) {
1246 *wp++ = '\0'; /* end of variable part */
1247 ungetsc(c);
1248 break;
1250 Xcheck(*wsp, wp);
1251 *wp++ = c;
1253 return wp;
1257 * Save an array subscript - returns true if matching bracket found, false
1258 * if eof or newline was found.
1259 * (Returned string double null terminated)
1261 static int
1262 arraysub(char **strp)
1264 XString ws;
1265 char *wp;
1266 char c;
1267 int depth = 1; /* we are just past the initial [ */
1269 Xinit(ws, wp, 32, ATEMP);
1271 do {
1272 c = getsc();
1273 Xcheck(ws, wp);
1274 *wp++ = c;
1275 if (c == '[')
1276 depth++;
1277 else if (c == ']')
1278 depth--;
1279 } while (depth > 0 && c && c != '\n');
1281 *wp++ = '\0';
1282 *strp = Xclose(ws, wp);
1284 return depth == 0 ? 1 : 0;
1287 /* Unget a char: handles case when we are already at the start of the buffer */
1288 static const char *
1289 ungetsc(int c)
1291 if (backslash_skip)
1292 backslash_skip--;
1293 /* Don't unget eof... */
1294 if (source->str == null && c == '\0')
1295 return source->str;
1296 if (source->str > source->start)
1297 source->str--;
1298 else {
1299 Source *s;
1301 s = pushs(SREREAD, source->areap);
1302 s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1303 s->start = s->str = s->ugbuf;
1304 s->next = source;
1305 source = s;
1307 return source->str;
1311 /* Called to get a char that isn't a \newline sequence. */
1312 static int
1313 getsc_bn(void)
1315 int c, c2;
1317 if (ignore_backslash_newline)
1318 return getsc_();
1320 if (backslash_skip == 1) {
1321 backslash_skip = 2;
1322 return getsc_();
1325 backslash_skip = 0;
1327 while (1) {
1328 c = getsc_();
1329 if (c == '\\') {
1330 if ((c2 = getsc_()) == '\n')
1331 /* ignore the \newline; get the next char... */
1332 continue;
1333 ungetsc(c2);
1334 backslash_skip = 1;
1336 return c;
1340 static Lex_state *
1341 push_state_(State_info *si, Lex_state *old_end)
1343 Lex_state *new = alloc(sizeof(Lex_state) * STATE_BSIZE, ATEMP);
1345 new[0].ls_info.base = old_end;
1346 si->base = &new[0];
1347 si->end = &new[STATE_BSIZE];
1348 return &new[1];
1351 static Lex_state *
1352 pop_state_(State_info *si, Lex_state *old_end)
1354 Lex_state *old_base = si->base;
1356 si->base = old_end->ls_info.base - STATE_BSIZE;
1357 si->end = old_end->ls_info.base;
1359 afree(old_base, ATEMP);
1361 return si->base + STATE_BSIZE - 1;