Don't crash when board size is changed.
[AROS-Contrib.git] / gnu / abc-shell / lex.c
blob083be031c703cd088b87872a450bd19ac6b37644
1 /*
2 * lexical analysis and source input
3 */
5 #include <libgen.h>
6 #include <ctype.h>
7 #include <time.h>
8 #include "sh.h"
10 /* Structure to keep track of the lexing state and the various pieces of info
11 * needed for each particular state.
13 typedef struct lex_state Lex_state;
14 struct lex_state {
15 int ls_state;
16 union {
17 /* $(...) */
18 struct scsparen_info {
19 int nparen; /* count open parenthesis */
20 int csstate; /* XXX remove */
21 #define ls_scsparen ls_info.u_scsparen
22 } u_scsparen;
24 /* $((...)) */
25 struct sasparen_info {
26 int nparen; /* count open parenthesis */
27 int start; /* marks start of $(( in output str */
28 #define ls_sasparen ls_info.u_sasparen
29 } u_sasparen;
31 /* ((...)) */
32 struct sletparen_info {
33 int nparen; /* count open parenthesis */
34 #define ls_sletparen ls_info.u_sletparen
35 } u_sletparen;
37 /* `...` */
38 struct sbquote_info {
39 int indquotes; /* true if in double quotes: "`...`" */
40 #define ls_sbquote ls_info.u_sbquote
41 } u_sbquote;
43 Lex_state *base; /* used to point to next state block */
44 } ls_info;
47 typedef struct State_info State_info;
48 struct State_info {
49 Lex_state *base;
50 Lex_state *end;
54 static void readhere(struct ioword *);
55 static int getsc__(void);
56 static void getsc_line(Source *);
57 static int getsc_bn(void);
58 static char *get_brace_var(XString *, char *);
59 static int arraysub(char **);
60 static const char *ungetsc(int);
61 static void gethere(void);
62 static Lex_state *push_state_(State_info *, Lex_state *);
63 static Lex_state *pop_state_(State_info *, Lex_state *);
64 static char *special_prompt_expand(char *);
66 static int backslash_skip;
67 static int ignore_backslash_newline;
69 /* optimized getsc_bn() */
70 #define getsc() (*source->str != '\0' && *source->str != '\\' \
71 && !backslash_skip ? *source->str++ : getsc_bn())
72 /* optimized getsc__() */
73 #define getsc_() ((*source->str != '\0') ? *source->str++ : getsc__())
75 #define STATE_BSIZE 32
77 #define PUSH_STATE(s) do { \
78 if (++statep == state_info.end) \
79 statep = push_state_(&state_info, statep); \
80 state = statep->ls_state = (s); \
81 } while (0)
83 #define POP_STATE() do { \
84 if (--statep == state_info.base) \
85 statep = pop_state_(&state_info, statep); \
86 state = statep->ls_state; \
87 } while (0)
92 * Lexical analyzer
94 * tokens are not regular expressions, they are LL(1).
95 * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
96 * hence the state stack.
99 int
100 yylex(int cf)
102 Lex_state states[STATE_BSIZE], *statep;
103 State_info state_info;
104 int c, state;
105 XString ws; /* expandable output word */
106 char *wp; /* output word pointer */
107 char *sp, *dp;
108 int c2;
109 int last_terminal_was_bracket;
112 Again:
113 states[0].ls_state = -1;
114 states[0].ls_info.base = (Lex_state *) 0;
115 statep = &states[1];
116 state_info.base = states;
117 state_info.end = &states[STATE_BSIZE];
119 Xinit(ws, wp, 64, ATEMP);
121 backslash_skip = 0;
122 ignore_backslash_newline = 0;
124 if (cf&ONEWORD)
125 state = SWORD;
126 else if (cf&LETEXPR) {
127 *wp++ = OQUOTE; /* enclose arguments in (double) quotes */
128 state = SLETPAREN;
129 statep->ls_sletparen.nparen = 0;
131 else { /* normal lexing */
132 state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
133 while ((c = getsc()) == ' ' || c == '\t')
135 if (c == '#') {
136 ignore_backslash_newline++;
137 while ((c = getsc()) != '\0' && c != '\n')
139 ignore_backslash_newline--;
141 ungetsc(c);
143 if (source->flags & SF_ALIAS) { /* trailing ' ' in alias definition */
144 source->flags &= ~SF_ALIAS;
145 /* In POSIX mode, a trailing space only counts if we are
146 * parsing a simple command
148 if (!Flag(FPOSIX) || (cf & CMDWORD))
149 cf |= ALIAS;
152 /* Initial state: one of SBASE SHEREDELIM SWORD SASPAREN */
153 statep->ls_state = state;
155 /* collect non-special or quoted characters to form word */
156 while (!((c = getsc()) == 0 ||
157 ((state == SBASE || state == SHEREDELIM) && ctype(c, C_LEX1))))
159 Xcheck(ws, wp);
160 switch (state) {
161 case SBASE:
162 if (c == '[' && (cf & (VARASN|ARRAYVAR))) {
163 *wp = EOS; /* temporary */
164 if (is_wdvarname(Xstring(ws, wp), false))
166 char *p, *tmp;
168 if (arraysub(&tmp)) {
169 *wp++ = CHAR;
170 *wp++ = c;
171 for (p = tmp; *p; ) {
172 Xcheck(ws, wp);
173 *wp++ = CHAR;
174 *wp++ = *p++;
176 afree(tmp, ATEMP);
177 break;
178 } else {
179 Source *s;
181 s = pushs(SREREAD,
182 source->areap);
183 s->start = s->str
184 = s->u.freeme = tmp;
185 s->next = source;
186 source = s;
189 *wp++ = CHAR;
190 *wp++ = c;
191 break;
193 /* FALLTHROUGH */
194 Sbase1: /* includes *(...|...) pattern (*+?@!) */
195 if (c == '*' || c == '@' || c == '+' || c == '?' ||
196 c == '!')
198 c2 = getsc();
199 if (c2 == '(' /*)*/ ) {
200 *wp++ = OPAT;
201 *wp++ = c;
202 PUSH_STATE(SPATTERN);
203 break;
205 ungetsc(c2);
207 /* FALLTHROUGH */
208 Sbase2: /* doesn't include *(...|...) pattern (*+?@!) */
209 switch (c) {
210 case '\\':
211 c = getsc();
212 if (c) /* trailing \ is lost */
213 *wp++ = QCHAR, *wp++ = c;
214 break;
215 case '\'':
216 *wp++ = OQUOTE;
217 ignore_backslash_newline++;
218 PUSH_STATE(SSQUOTE);
219 break;
220 case '"':
221 *wp++ = OQUOTE;
222 PUSH_STATE(SDQUOTE);
223 break;
224 default:
225 goto Subst;
227 break;
229 Subst:
230 switch (c) {
231 case '\\':
232 c = getsc();
233 switch (c) {
234 case '"': case '\\':
235 case '$': case '`':
236 *wp++ = QCHAR, *wp++ = c;
237 break;
238 default:
239 Xcheck(ws, wp);
240 if (c) { /* trailing \ is lost */
241 *wp++ = CHAR, *wp++ = '\\';
242 *wp++ = CHAR, *wp++ = c;
244 break;
246 break;
247 case '$':
248 c = getsc();
249 if (c == '(') /*)*/ {
250 c = getsc();
251 if (c == '(') /*)*/ {
252 PUSH_STATE(SASPAREN);
253 statep->ls_sasparen.nparen = 2;
254 statep->ls_sasparen.start =
255 Xsavepos(ws, wp);
256 *wp++ = EXPRSUB;
257 } else {
258 ungetsc(c);
259 PUSH_STATE(SCSPAREN);
260 statep->ls_scsparen.nparen = 1;
261 statep->ls_scsparen.csstate = 0;
262 *wp++ = COMSUB;
264 } else if (c == '{') /*}*/ {
265 *wp++ = OSUBST;
266 *wp++ = '{'; /*}*/
267 wp = get_brace_var(&ws, wp);
268 c = getsc();
269 /* allow :# and :% (ksh88 compat) */
270 if (c == ':') {
271 *wp++ = CHAR, *wp++ = c;
272 c = getsc();
274 /* If this is a trim operation,
275 * treat (,|,) specially in STBRACE.
277 if (c == '#' || c == '%') {
278 ungetsc(c);
279 PUSH_STATE(STBRACE);
280 } else {
281 ungetsc(c);
282 PUSH_STATE(SBRACE);
284 } else if (ctype(c, C_ALPHA)) {
285 *wp++ = OSUBST;
286 *wp++ = 'X';
287 do {
288 Xcheck(ws, wp);
289 *wp++ = c;
290 c = getsc();
291 } while (ctype(c, C_ALPHA|C_DIGIT));
292 *wp++ = '\0';
293 *wp++ = CSUBST;
294 *wp++ = 'X';
295 ungetsc(c);
296 } else if (ctype(c, C_DIGIT|C_VAR1)) {
297 Xcheck(ws, wp);
298 *wp++ = OSUBST;
299 *wp++ = 'X';
300 *wp++ = c;
301 *wp++ = '\0';
302 *wp++ = CSUBST;
303 *wp++ = 'X';
304 } else {
305 *wp++ = CHAR, *wp++ = '$';
306 ungetsc(c);
308 break;
309 case '`':
310 PUSH_STATE(SBQUOTE);
311 *wp++ = COMSUB;
312 /* Need to know if we are inside double quotes
313 * since sh/at&t-ksh translate the \" to " in
314 * "`..\"..`".
315 * This is not done in posix mode (section
316 * 3.2.3, Double Quotes: "The backquote shall
317 * retain its special meaning introducing the
318 * other form of command substitution (see
319 * 3.6.3). The portion of the quoted string
320 * from the initial backquote and the
321 * characters up to the next backquote that
322 * is not preceded by a backslash (having
323 * escape characters removed) defines that
324 * command whose output replaces `...` when
325 * the word is expanded."
326 * Section 3.6.3, Command Substitution:
327 * "Within the backquoted style of command
328 * substitution, backslash shall retain its
329 * literal meaning, except when followed by
330 * $ ` \.").
332 statep->ls_sbquote.indquotes = 0;
333 if (!Flag(FPOSIX)) {
334 Lex_state *s = statep;
335 Lex_state *base = state_info.base;
336 while (1) {
337 for (; s != base; s--) {
338 if (s->ls_state == SDQUOTE) {
339 statep->ls_sbquote.indquotes = 1;
340 break;
343 if (s != base)
344 break;
345 if (!(s = s->ls_info.base))
346 break;
347 base = s-- - STATE_BSIZE;
350 break;
351 default:
352 *wp++ = CHAR, *wp++ = c;
354 break;
356 case SSQUOTE:
357 if (c == '\'') {
358 POP_STATE();
359 *wp++ = CQUOTE;
360 ignore_backslash_newline--;
361 } else
362 *wp++ = QCHAR, *wp++ = c;
363 break;
365 case SDQUOTE:
366 if (c == '"') {
367 POP_STATE();
368 *wp++ = CQUOTE;
369 } else
370 goto Subst;
371 break;
373 case SCSPAREN: /* $( .. ) */
374 /* todo: deal with $(...) quoting properly
375 * kludge to partly fake quoting inside $(..): doesn't
376 * really work because nested $(..) or ${..} inside
377 * double quotes aren't dealt with.
379 switch (statep->ls_scsparen.csstate) {
380 case 0: /* normal */
381 switch (c) {
382 case '(':
383 statep->ls_scsparen.nparen++;
384 break;
385 case ')':
386 statep->ls_scsparen.nparen--;
387 break;
388 case '\\':
389 statep->ls_scsparen.csstate = 1;
390 break;
391 case '"':
392 statep->ls_scsparen.csstate = 2;
393 break;
394 case '\'':
395 statep->ls_scsparen.csstate = 4;
396 ignore_backslash_newline++;
397 break;
399 break;
401 case 1: /* backslash in normal mode */
402 case 3: /* backslash in double quotes */
403 --statep->ls_scsparen.csstate;
404 break;
406 case 2: /* double quotes */
407 if (c == '"')
408 statep->ls_scsparen.csstate = 0;
409 else if (c == '\\')
410 statep->ls_scsparen.csstate = 3;
411 break;
413 case 4: /* single quotes */
414 if (c == '\'') {
415 statep->ls_scsparen.csstate = 0;
416 ignore_backslash_newline--;
418 break;
420 if (statep->ls_scsparen.nparen == 0) {
421 POP_STATE();
422 *wp++ = 0; /* end of COMSUB */
423 } else
424 *wp++ = c;
425 break;
427 case SASPAREN: /* $(( .. )) */
428 /* todo: deal with $((...); (...)) properly */
429 /* XXX should nest using existing state machine
430 * (embed "..", $(...), etc.) */
431 if (c == '(')
432 statep->ls_sasparen.nparen++;
433 else if (c == ')') {
434 statep->ls_sasparen.nparen--;
435 if (statep->ls_sasparen.nparen == 1) {
436 /*(*/
437 if ((c2 = getsc()) == ')') {
438 POP_STATE();
439 *wp++ = 0; /* end of EXPRSUB */
440 break;
441 } else {
442 char *s;
444 ungetsc(c2);
445 /* mismatched parenthesis -
446 * assume we were really
447 * parsing a $(..) expression
449 s = Xrestpos(ws, wp,
450 statep->ls_sasparen.start);
451 memmove(s + 1, s, wp - s);
452 *s++ = COMSUB;
453 *s = '('; /*)*/
454 wp++;
455 statep->ls_scsparen.nparen = 1;
456 statep->ls_scsparen.csstate = 0;
457 state = statep->ls_state
458 = SCSPAREN;
463 *wp++ = c;
464 break;
466 case SBRACE:
467 /*{*/
468 if (c == '}') {
469 POP_STATE();
470 *wp++ = CSUBST;
471 *wp++ = /*{*/ '}';
472 } else
473 goto Sbase1;
474 break;
476 case STBRACE:
477 /* Same as SBRACE, except (,|,) treated specially */
478 /*{*/
479 if (c == '}') {
480 POP_STATE();
481 *wp++ = CSUBST;
482 *wp++ = /*{*/ '}';
483 } else if (c == '|') {
484 *wp++ = SPAT;
485 } else if (c == '(') {
486 *wp++ = OPAT;
487 *wp++ = ' '; /* simile for @ */
488 PUSH_STATE(SPATTERN);
489 } else
490 goto Sbase1;
491 break;
493 case SBQUOTE:
494 if (c == '`') {
495 *wp++ = 0;
496 POP_STATE();
497 } else if (c == '\\') {
498 switch (c = getsc()) {
499 case '\\':
500 case '$': case '`':
501 *wp++ = c;
502 break;
503 case '"':
504 if (statep->ls_sbquote.indquotes) {
505 *wp++ = c;
506 break;
508 /* FALLTHROUGH */
509 default:
510 if (c) { /* trailing \ is lost */
511 *wp++ = '\\';
512 *wp++ = c;
514 break;
516 } else
517 *wp++ = c;
518 break;
520 case SWORD: /* ONEWORD */
521 goto Subst;
523 case SLETPAREN: /* LETEXPR: (( ... )) */
524 /*(*/
525 if (c == ')') {
526 if (statep->ls_sletparen.nparen > 0)
527 --statep->ls_sletparen.nparen;
528 /*(*/
529 else if ((c2 = getsc()) == ')') {
530 c = 0;
531 *wp++ = CQUOTE;
532 goto Done;
533 } else
534 ungetsc(c2);
535 } else if (c == '(')
536 /* parenthesis inside quotes and backslashes
537 * are lost, but at&t ksh doesn't count them
538 * either
540 ++statep->ls_sletparen.nparen;
541 goto Sbase2;
543 case SHEREDELIM: /* <<,<<- delimiter */
544 /* XXX chuck this state (and the next) - use
545 * the existing states ($ and \`..` should be
546 * stripped of their specialness after the
547 * fact).
549 /* here delimiters need a special case since
550 * $ and `..` are not to be treated specially
552 if (c == '\\') {
553 c = getsc();
554 if (c) { /* trailing \ is lost */
555 *wp++ = QCHAR;
556 *wp++ = c;
558 } else if (c == '\'') {
559 PUSH_STATE(SSQUOTE);
560 *wp++ = OQUOTE;
561 ignore_backslash_newline++;
562 } else if (c == '"') {
563 state = statep->ls_state = SHEREDQUOTE;
564 *wp++ = OQUOTE;
565 } else {
566 *wp++ = CHAR;
567 *wp++ = c;
569 break;
571 case SHEREDQUOTE: /* " in <<,<<- delimiter */
572 if (c == '"') {
573 *wp++ = CQUOTE;
574 state = statep->ls_state = SHEREDELIM;
575 } else {
576 if (c == '\\') {
577 switch (c = getsc()) {
578 case '\\': case '"':
579 case '$': case '`':
580 break;
581 default:
582 if (c) { /* trailing \ lost */
583 *wp++ = CHAR;
584 *wp++ = '\\';
586 break;
589 *wp++ = CHAR;
590 *wp++ = c;
592 break;
594 case SPATTERN: /* in *(...|...) pattern (*+?@!) */
595 if ( /*(*/ c == ')') {
596 *wp++ = CPAT;
597 POP_STATE();
598 } else if (c == '|') {
599 *wp++ = SPAT;
600 } else if (c == '(') {
601 *wp++ = OPAT;
602 *wp++ = ' '; /* simile for @ */
603 PUSH_STATE(SPATTERN);
604 } else
605 goto Sbase1;
606 break;
609 Done:
610 Xcheck(ws, wp);
611 if (statep != &states[1])
612 /* XXX figure out what is missing */
613 yyerror("no closing quote\n");
615 /* This done to avoid tests for SHEREDELIM wherever SBASE tested */
616 if (state == SHEREDELIM)
617 state = SBASE;
619 dp = Xstring(ws, wp);
620 if ((c == '<' || c == '>') && state == SBASE
621 && ((c2 = Xlength(ws, wp)) == 0
622 || (c2 == 2 && dp[0] == CHAR && digit(dp[1]))))
624 struct ioword *iop =
625 (struct ioword *) alloc(sizeof(*iop), ATEMP);
627 if (c2 == 2)
628 iop->unit = dp[1] - '0';
629 else
630 iop->unit = c == '>'; /* 0 for <, 1 for > */
632 c2 = getsc();
633 /* <<, >>, <> are ok, >< is not */
634 if (c == c2 || (c == '<' && c2 == '>')) {
635 iop->flag = c == c2 ?
636 (c == '>' ? IOCAT : IOHERE) : IORDWR;
637 if (iop->flag == IOHERE) {
638 if ((c2 = getsc()) == '-')
639 iop->flag |= IOSKIP;
640 else
641 ungetsc(c2);
643 } else if (c2 == '&')
644 iop->flag = IODUP | (c == '<' ? IORDUP : 0);
645 else {
646 iop->flag = c == '>' ? IOWRITE : IOREAD;
647 if (c == '>' && c2 == '|')
648 iop->flag |= IOCLOB;
649 else
650 ungetsc(c2);
653 iop->name = (char *) 0;
654 iop->delim = (char *) 0;
655 iop->heredoc = (char *) 0;
656 Xfree(ws, wp); /* free word */
657 yylval.iop = iop;
658 return REDIR;
661 if (wp == dp && state == SBASE) {
662 Xfree(ws, wp); /* free word */
663 /* no word, process LEX1 character */
664 switch (c) {
665 default:
666 return c;
668 case '|':
669 case '&':
670 case ';':
671 if ((c2 = getsc()) == c)
672 c = (c == ';') ? BREAK :
673 (c == '|') ? LOGOR :
674 (c == '&') ? LOGAND :
675 YYERRCODE;
676 else if (c == '|' && c2 == '&')
677 c = COPROC;
678 else
679 ungetsc(c2);
680 return c;
682 case '\n':
683 gethere();
684 if (cf & CONTIN)
685 goto Again;
686 return c;
688 case '(': /*)*/
689 if (!Flag(FSH)) {
690 if ((c2 = getsc()) == '(') /*)*/
691 /* XXX need to handle ((...); (...)) */
692 c = MDPAREN;
693 else
694 ungetsc(c2);
696 return c;
697 /*(*/
698 case ')':
699 return c;
703 *wp++ = EOS; /* terminate word */
704 yylval.cp = Xclose(ws, wp);
705 if (state == SWORD || state == SLETPAREN) /* ONEWORD? */
706 return LWORD;
708 last_terminal_was_bracket = c == '(';
709 ungetsc(c); /* unget terminator */
711 /* copy word to unprefixed string ident */
712 for (sp = yylval.cp, dp = ident; dp < ident+IDENT && (c = *sp++) == CHAR; )
713 *dp++ = *sp++;
714 /* Make sure the ident array stays '\0' paded */
715 memset(dp, 0, (ident+IDENT) - dp + 1);
716 if (c != EOS)
717 *ident = '\0'; /* word is not unquoted */
719 if (*ident != '\0' && (cf&(KEYWORD|ALIAS))) {
720 struct tbl *p;
721 int h = hash(ident);
723 /* { */
724 if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h))
725 && (!(cf & ESACONLY) || p->val.i == ESAC || p->val.i == '}'))
727 afree(yylval.cp, ATEMP);
728 return p->val.i;
730 if ((cf & ALIAS) && (p = ktsearch(aliases, ident, h))
731 && (p->flag & ISSET))
734 if (last_terminal_was_bracket) {
735 /* The token is probably part of function's definition,
736 * and is should not be aliased. Moreover we remove the alias
737 * so it won't clash with the function name
738 * robert@debian.org, Feb 26th, 2005
740 ktdelete(p);
741 } else {
742 Source *s;
744 for (s = source; s->type == SALIAS; s = s->next)
745 if (s->u.tblp == p)
746 return LWORD;
747 /* push alias expansion */
748 s = pushs(SALIAS, source->areap);
749 s->start = s->str = p->val.s;
750 s->u.tblp = p;
751 s->next = source;
752 source = s;
753 afree(yylval.cp, ATEMP);
754 goto Again;
759 return LWORD;
762 static void
763 gethere(void)
765 struct ioword **p;
767 for (p = heres; p < herep; p++)
768 readhere(*p);
769 herep = heres;
773 * read "<<word" text into temp file
776 static void
777 readhere(struct ioword *iop)
779 int c;
780 char *volatile eof;
781 char *eofp;
782 int skiptabs;
783 XString xs;
784 char *xp;
785 int xpos;
787 eof = evalstr(iop->delim, 0);
789 if (!(iop->flag & IOEVAL))
790 ignore_backslash_newline++;
792 Xinit(xs, xp, 256, ATEMP);
794 for (;;) {
795 eofp = eof;
796 skiptabs = iop->flag & IOSKIP;
797 xpos = Xsavepos(xs, xp);
798 while ((c = getsc()) != 0) {
799 if (skiptabs) {
800 if (c == '\t')
801 continue;
802 skiptabs = 0;
804 if (c != *eofp)
805 break;
806 Xcheck(xs, xp);
807 Xput(xs, xp, c);
808 eofp++;
810 /* Allow EOF here so commands with out trailing newlines
811 * will work (eg, ksh -c '...', $(...), etc).
813 if (*eofp == '\0' && (c == 0 || c == '\n')) {
814 xp = Xrestpos(xs, xp, xpos);
815 break;
817 ungetsc(c);
818 while ((c = getsc()) != '\n') {
819 if (c == 0)
820 yyerror("here document `%s' unclosed\n", eof);
821 Xcheck(xs, xp);
822 Xput(xs, xp, c);
824 Xcheck(xs, xp);
825 Xput(xs, xp, c);
827 Xput(xs, xp, '\0');
828 iop->heredoc = Xclose(xs, xp);
830 if (!(iop->flag & IOEVAL))
831 ignore_backslash_newline--;
834 void
835 yyerror(const char *fmt, ...)
837 va_list va;
839 /* pop aliases and re-reads */
840 while (source->type == SALIAS || source->type == SREREAD)
841 source = source->next;
842 source->str = null; /* zap pending input */
844 error_prefix(true);
845 va_start(va, fmt);
846 shf_vfprintf(shl_out, fmt, va);
847 va_end(va);
848 errorf(null);
852 * input for yylex with alias expansion
855 Source *
856 pushs(int type, Area *areap)
858 Source *s;
860 s = (Source *) alloc(sizeof(Source), areap);
861 s->type = type;
862 s->str = null;
863 s->start = NULL;
864 s->line = 0;
865 s->cmd_offset = 0;
866 s->errline = 0;
867 s->file = NULL;
868 s->flags = 0;
869 s->next = NULL;
870 s->areap = areap;
871 if (type == SFILE || type == SSTDIN) {
872 char *dummy;
873 Xinit(s->xs, dummy, 256, s->areap);
874 (void)dummy; // Unused
875 } else
876 memset(&s->xs, 0, sizeof(s->xs));
877 return s;
880 static int
881 getsc__(void)
883 Source *s = source;
884 int c;
886 while ((c = *s->str++) == 0) {
887 s->str = NULL; /* return 0 for EOF by default */
888 switch (s->type) {
889 case SEOF:
890 s->str = null;
891 return 0;
893 case SSTDIN:
894 case SFILE:
895 getsc_line(s);
896 break;
898 case SWSTR:
899 break;
901 case SSTRING:
902 break;
904 case SWORDS:
905 s->start = s->str = *s->u.strv++;
906 s->type = SWORDSEP;
907 break;
909 case SWORDSEP:
910 if (*s->u.strv == NULL) {
911 s->start = s->str = newline;
912 s->type = SEOF;
913 } else {
914 s->start = s->str = space;
915 s->type = SWORDS;
917 break;
919 case SALIAS:
920 if (s->flags & SF_ALIASEND) {
921 /* pass on an unused SF_ALIAS flag */
922 source = s->next;
923 source->flags |= s->flags & SF_ALIAS;
924 s = source;
925 } else if (*s->u.tblp->val.s
926 && isspace(strchr(s->u.tblp->val.s, 0)[-1]))
928 source = s = s->next; /* pop source stack */
929 /* Note that this alias ended with a space,
930 * enabling alias expansion on the following
931 * word.
933 s->flags |= SF_ALIAS;
934 } else {
935 /* At this point, we need to keep the current
936 * alias in the source list so recursive
937 * aliases can be detected and we also need
938 * to return the next character. Do this
939 * by temporarily popping the alias to get
940 * the next character and then put it back
941 * in the source list with the SF_ALIASEND
942 * flag set.
944 source = s->next; /* pop source stack */
945 source->flags |= s->flags & SF_ALIAS;
946 c = getsc__();
947 if (c) {
948 s->flags |= SF_ALIASEND;
949 s->ugbuf[0] = c; s->ugbuf[1] = '\0';
950 s->start = s->str = s->ugbuf;
951 s->next = source;
952 source = s;
953 } else {
954 s = source;
955 /* avoid reading eof twice */
956 s->str = NULL;
957 break;
960 continue;
962 case SREREAD:
963 if (s->start != s->ugbuf) /* yuck */
964 afree(s->u.freeme, ATEMP);
965 source = s = s->next;
966 continue;
968 if (s->str == NULL) {
969 s->type = SEOF;
970 s->start = s->str = null;
971 return '\0';
973 if (s->flags & SF_ECHO) {
974 shf_puts(s->str, shl_out);
975 shf_flush(shl_out);
978 return c;
981 static void
982 getsc_line(Source *s)
984 char *xp = Xstring(s->xs, xp);
985 int interactive = Flag(FTALKING) && s->type == SSTDIN;
986 int have_tty = interactive && (s->flags & SF_TTY);
988 /* Done here to ensure nothing odd happens when a timeout occurs */
989 XcheckN(s->xs, xp, LINE);
990 *xp = '\0';
991 s->start = s->str = xp;
993 if (have_tty && ksh_tmout) {
994 ksh_tmout_state = TMOUT_READING;
995 alarm(ksh_tmout);
998 if (interactive) {
999 pprompt(prompt, 0);
1000 } else
1001 s->line++;
1003 while (1) {
1004 char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
1006 if (!p && shf_error(s->u.shf)
1007 && shf_errno(s->u.shf) == EINTR)
1009 shf_clearerr(s->u.shf);
1010 if (trap)
1011 runtraps(0);
1012 continue;
1014 if (!p || (xp = p, xp[-1] == '\n'))
1015 break;
1016 /* double buffer size */
1017 xp++; /* move past null so doubling works... */
1018 XcheckN(s->xs, xp, Xlength(s->xs, xp));
1019 xp--; /* ...and move back again */
1021 /* flush any unwanted input so other programs/builtins
1022 * can read it. Not very optimal, but less error prone
1023 * than flushing else where, dealing with redirections,
1024 * etc..
1025 * todo: reduce size of shf buffer (~128?) if SSTDIN
1027 if (s->type == SSTDIN)
1028 shf_flush(s->u.shf);
1030 /* XXX: temporary kludge to restore source after a
1031 * trap may have been executed.
1033 source = s;
1034 if (have_tty && ksh_tmout)
1036 ksh_tmout_state = TMOUT_EXECUTING;
1037 alarm(0);
1039 s->start = s->str = Xstring(s->xs, xp);
1040 strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
1041 /* Note: if input is all nulls, this is not eof */
1042 if (Xlength(s->xs, xp) == 0) { /* EOF */
1043 if (s->type == SFILE)
1044 shf_fdclose(s->u.shf);
1045 s->str = NULL;
1046 } else if (interactive) {
1047 #ifdef HISTORY
1048 char *p = Xstring(s->xs, xp);
1049 if (cur_prompt == PS1)
1050 while (*p && ctype(*p, C_IFS) && ctype(*p, C_IFSWS))
1051 p++;
1052 if (*p) {
1053 s->line++;
1054 histsave(s->line, s->str, 1);
1056 #endif /* HISTORY */
1058 if (interactive)
1059 set_prompt(PS2, (Source *) 0);
1062 static char *
1063 special_prompt_expand(char *str)
1065 char *p = str;
1067 while ((p = strstr(p, "\\$")) != NULL) {
1068 *(p+1) = 'p';
1070 return str;
1073 void
1074 set_prompt(int to, Source *s)
1076 char *ps1;
1077 Area *saved_atemp;
1078 cur_prompt = to;
1080 switch (to) {
1081 case PS1: /* command */
1082 ps1 = str_save(str_val(global("PS1")), ATEMP);
1083 saved_atemp = ATEMP; /* ps1 is freed by substitute() */
1084 newenv(E_ERRH);
1085 if (ksh_sigsetjmp(e->jbuf, 0)) {
1086 prompt = safe_prompt;
1087 /* Don't print an error - assume it has already
1088 * been printed. Reason is we may have forked
1089 * to run a command and the child may be
1090 * unwinding its stack through this code as it
1091 * exits.
1093 } else {
1094 /* expand \$ before other substitutions are done */
1095 char *tmp = special_prompt_expand(ps1);
1096 prompt = str_save(substitute(tmp, 0), saved_atemp);
1098 quitenv(NULL);
1099 break;
1101 case PS2: /* command continuation */
1102 prompt = str_val(global("PS2"));
1103 break;
1107 #if __AROS__
1108 static int gethostname(char* name, size_t len)
1110 name = "aros";
1111 return 0;
1113 #endif
1115 static int
1116 dopprompt(const char *sp, int ntruncate, const char **spp, int doprint)
1118 char strbuf[1024], tmpbuf[1024], *p, *str, nbuf[32], delimiter = '\0';
1119 int len, c, n, totlen = 0, indelimit = 0, counting = 1, delimitthis;
1120 const char *cp = sp;
1121 struct tm *tm;
1122 time_t t;
1124 if (*cp && cp[1] == '\r') {
1125 delimiter = *cp;
1126 cp += 2;
1129 while (*cp != 0) {
1130 delimitthis = 0;
1131 if (indelimit && *cp != delimiter)
1133 else if (*cp == '\n' || *cp == '\r') {
1134 totlen = 0;
1135 sp = cp + 1;
1136 } else if (*cp == '\t') {
1137 if (counting)
1138 totlen = (totlen | 7) + 1;
1139 } else if (*cp == delimiter) {
1140 indelimit = !indelimit;
1141 delimitthis = 1;
1144 if (*cp == '\\') {
1145 cp++;
1146 if (!*cp)
1147 break;
1148 if (Flag(FSH))
1149 snprintf(strbuf, sizeof strbuf, "\\%c", *cp);
1150 else switch (*cp) {
1151 case 'a': /* '\' 'a' bell */
1152 strbuf[0] = '\007';
1153 strbuf[1] = '\0';
1154 break;
1155 case 'd': /* '\' 'd' Dow Mon DD */
1156 time(&t);
1157 tm = localtime(&t);
1158 strftime(strbuf, sizeof strbuf, "%a %b %d", tm);
1159 break;
1160 case 'D': /* '\' 'D' '{' strftime format '}' */
1161 p = strchr(cp + 2, '}');
1162 if (cp[1] != '{' || p == NULL) {
1163 snprintf(strbuf, sizeof strbuf,
1164 "\\%c", *cp);
1165 break;
1167 strlcpy(tmpbuf, cp + 2, sizeof tmpbuf);
1168 p = strchr(tmpbuf, '}');
1169 if (p)
1170 *p = '\0';
1171 time(&t);
1172 tm = localtime(&t);
1173 strftime(strbuf, sizeof strbuf, tmpbuf, tm);
1174 cp = strchr(cp + 2, '}');
1175 break;
1176 case 'e': /* '\' 'e' escape */
1177 strbuf[0] = '\033';
1178 strbuf[1] = '\0';
1179 break;
1180 case 'h': /* '\' 'h' shortened hostname */
1181 gethostname(strbuf, sizeof strbuf);
1182 p = strchr(strbuf, '.');
1183 if (p)
1184 *p = '\0';
1185 break;
1186 case 'H': /* '\' 'H' full hostname */
1187 gethostname(strbuf, sizeof strbuf);
1188 break;
1189 case 'j': /* '\' 'j' number of jobs */
1190 snprintf(strbuf, sizeof strbuf, "%d",
1191 j_njobs());
1192 break;
1193 case 'l': /* '\' 'l' basename of tty */
1194 #if !defined (__amigaos4__) && !defined(__AROS__)
1195 p = ttyname(0);
1196 #else
1197 p = strdup("CONSOLE:");
1198 #endif
1199 if (p)
1200 p = basename(p);
1201 if (p)
1202 strlcpy(strbuf, p, sizeof strbuf);
1203 break;
1204 case 'n': /* '\' 'n' newline */
1205 strbuf[0] = '\n';
1206 strbuf[1] = '\0';
1207 totlen = 0; /* reset for prompt re-print */
1208 sp = cp + 1;
1209 break;
1210 case 'p': /* '\' '$' $ or # */
1211 strbuf[0] = ksheuid ? '$' : '#';
1212 strbuf[1] = '\0';
1213 break;
1214 case 'r': /* '\' 'r' return */
1215 strbuf[0] = '\r';
1216 strbuf[1] = '\0';
1217 totlen = 0; /* reset for prompt re-print */
1218 sp = cp + 1;
1219 break;
1220 case 's': /* '\' 's' basename $0 */
1221 strlcpy(strbuf, kshname, sizeof strbuf);
1222 break;
1223 case 't': /* '\' 't' 24 hour HH:MM:SS */
1224 time(&t);
1225 tm = localtime(&t);
1226 strftime(strbuf, sizeof strbuf, "%T", tm);
1227 break;
1228 case 'T': /* '\' 'T' 12 hour HH:MM:SS */
1229 time(&t);
1230 tm = localtime(&t);
1231 strftime(strbuf, sizeof strbuf, "%l:%M:%S", tm);
1232 break;
1233 case '@': /* '\' '@' 12 hour am/pm format */
1234 time(&t);
1235 tm = localtime(&t);
1236 strftime(strbuf, sizeof strbuf, "%r", tm);
1237 break;
1238 case 'A': /* '\' 'A' 24 hour HH:MM */
1239 time(&t);
1240 tm = localtime(&t);
1241 strftime(strbuf, sizeof strbuf, "%R", tm);
1242 break;
1243 case 'v': /* '\' 'v' version (short) */
1244 p = strchr(ksh_version, ' ');
1245 if (p)
1246 p = strchr(p + 1, ' ');
1247 if (p) {
1248 p++;
1249 strlcpy(strbuf, p, sizeof strbuf);
1250 p = strchr(strbuf, ' ');
1251 if (p)
1252 *p = '\0';
1254 break;
1255 case 'V': /* '\' 'V' version (long) */
1256 strlcpy(strbuf, ksh_version, sizeof strbuf);
1257 break;
1258 case 'w': /* '\' 'w' cwd */
1259 p = str_val(global("PWD"));
1260 n = strlen(str_val(global("HOME")));
1261 if (strcmp(p, "/") == 0) {
1262 strlcpy(strbuf, p, sizeof strbuf);
1263 } else if (strcmp(p, str_val(global("HOME"))) == 0) {
1264 strbuf[0] = '~';
1265 strbuf[1] = '\0';
1266 } else if (strncmp(p, str_val(global("HOME")), n)
1267 == 0 && p[n] == '/') {
1268 snprintf(strbuf, sizeof strbuf, "~/%s",
1269 str_val(global("PWD")) + n + 1);
1270 } else
1271 strlcpy(strbuf, p, sizeof strbuf);
1272 break;
1273 case 'W': /* '\' 'W' basename(cwd) */
1274 p = str_val(global("PWD"));
1275 strlcpy(strbuf, basename(p), sizeof strbuf);
1276 break;
1277 case '!': /* '\' '!' history line number */
1278 snprintf(strbuf, sizeof strbuf, "%d",
1279 source->line + 1);
1280 break;
1281 case '#': /* '\' '#' command line number */
1282 snprintf(strbuf, sizeof strbuf, "%d",
1283 source->line - source->cmd_offset + 1);
1284 break;
1285 case '0': /* '\' '#' '#' ' #' octal numeric handling */
1286 case '1':
1287 case '2':
1288 case '3':
1289 case '4':
1290 case '5':
1291 case '6':
1292 case '7':
1293 if ((cp[1] > '7' || cp[1] < '0') ||
1294 (cp[2] > '7' || cp[2] < '0')) {
1295 snprintf(strbuf, sizeof strbuf,
1296 "\\%c", *cp);
1297 break;
1299 n = cp[0] * 8 * 8 + cp[1] * 8 + cp[2];
1300 snprintf(strbuf, sizeof strbuf, "%c", n);
1301 cp += 2;
1302 break;
1303 case '\\': /* '\' '\' */
1304 strbuf[0] = '\\';
1305 strbuf[1] = '\0';
1306 break;
1307 case '[': /* '\' '[' .... stop counting */
1308 strbuf[0] = '\0';
1309 counting = 0;
1310 break;
1311 case ']': /* '\' ']' restart counting */
1312 strbuf[0] = '\0';
1313 counting = 1;
1314 break;
1316 default:
1317 snprintf(strbuf, sizeof strbuf, "\\%c", *cp);
1318 break;
1320 cp++;
1322 str = strbuf;
1323 len = strlen(str);
1324 if (ntruncate) {
1325 if (ntruncate >= len) {
1326 ntruncate -= len;
1327 continue;
1329 str += ntruncate;
1330 len -= ntruncate;
1331 ntruncate = 0;
1333 if (doprint)
1334 shf_write(str, len, shl_out);
1335 if (counting && !indelimit && !delimitthis)
1336 totlen += len;
1337 continue;
1338 } else if (*cp != '!')
1339 c = *cp++;
1340 else if (*++cp == '!')
1341 c = *cp++;
1342 else {
1343 char *p;
1345 shf_snprintf(p = nbuf, sizeof(nbuf), "%d",
1346 source->line + 1);
1347 len = strlen(nbuf);
1348 if (ntruncate) {
1349 if (ntruncate >= len) {
1350 ntruncate -= len;
1351 continue;
1353 p += ntruncate;
1354 len -= ntruncate;
1355 ntruncate = 0;
1357 if (doprint)
1358 shf_write(p, len, shl_out);
1359 if (counting && !indelimit && !delimitthis)
1360 totlen += len;
1361 continue;
1363 if (counting && ntruncate)
1364 --ntruncate;
1365 else if (doprint) {
1366 shf_putc(c, shl_out);
1368 if (counting && !indelimit && !delimitthis)
1369 totlen++;
1371 if (doprint)
1372 shf_flush(shl_out);
1373 if (spp)
1374 *spp = sp;
1375 return (totlen);
1378 void
1379 pprompt(const char *cp, int ntruncate)
1381 dopprompt(cp, ntruncate, NULL, 1);
1385 promptlen(const char *cp, const char **spp)
1387 return dopprompt(cp, 0, spp, 0);
1390 /* Read the variable part of a ${...} expression (ie, up to but not including
1391 * the :[-+?=#%] or close-brace.
1393 static char *
1394 get_brace_var(XString *wsp, char *wp)
1396 enum parse_state {
1397 PS_INITIAL, PS_SAW_HASH, PS_IDENT,
1398 PS_NUMBER, PS_VAR1, PS_END
1400 state;
1401 char c;
1403 state = PS_INITIAL;
1404 while (1) {
1405 c = getsc();
1406 /* State machine to figure out where the variable part ends. */
1407 switch (state) {
1408 case PS_INITIAL:
1409 if (c == '#') {
1410 state = PS_SAW_HASH;
1411 break;
1413 /* FALLTHROUGH */
1414 case PS_SAW_HASH:
1415 if (letter(c))
1416 state = PS_IDENT;
1417 else if (digit(c))
1418 state = PS_NUMBER;
1419 else if (ctype(c, C_VAR1))
1420 state = PS_VAR1;
1421 else
1422 state = PS_END;
1423 break;
1424 case PS_IDENT:
1425 if (!letnum(c)) {
1426 state = PS_END;
1427 if (c == '[') {
1428 char *tmp, *p;
1430 if (!arraysub(&tmp))
1431 yyerror("missing ]\n");
1432 *wp++ = c;
1433 for (p = tmp; *p; ) {
1434 Xcheck(*wsp, wp);
1435 *wp++ = *p++;
1437 afree(tmp, ATEMP);
1438 c = getsc(); /* the ] */
1441 break;
1442 case PS_NUMBER:
1443 if (!digit(c))
1444 state = PS_END;
1445 break;
1446 case PS_VAR1:
1447 state = PS_END;
1448 break;
1449 case PS_END: /* keep gcc happy */
1450 break;
1452 if (state == PS_END) {
1453 *wp++ = '\0'; /* end of variable part */
1454 ungetsc(c);
1455 break;
1457 Xcheck(*wsp, wp);
1458 *wp++ = c;
1460 return wp;
1464 * Save an array subscript - returns true if matching bracket found, false
1465 * if eof or newline was found.
1466 * (Returned string double null terminated)
1468 static int
1469 arraysub(char **strp)
1471 XString ws;
1472 char *wp;
1473 char c;
1474 int depth = 1; /* we are just past the initial [ */
1476 Xinit(ws, wp, 32, ATEMP);
1478 do {
1479 c = getsc();
1480 Xcheck(ws, wp);
1481 *wp++ = c;
1482 if (c == '[')
1483 depth++;
1484 else if (c == ']')
1485 depth--;
1486 } while (depth > 0 && c && c != '\n');
1488 *wp++ = '\0';
1489 *strp = Xclose(ws, wp);
1491 return depth == 0 ? 1 : 0;
1494 /* Unget a char: handles case when we are already at the start of the buffer */
1495 static const char *
1496 ungetsc(int c)
1498 if (backslash_skip)
1499 backslash_skip--;
1500 /* Don't unget eof... */
1501 if (source->str == null && c == '\0')
1502 return source->str;
1503 if (source->str > source->start)
1504 source->str--;
1505 else {
1506 Source *s;
1508 s = pushs(SREREAD, source->areap);
1509 s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1510 s->start = s->str = s->ugbuf;
1511 s->next = source;
1512 source = s;
1514 return source->str;
1518 /* Called to get a char that isn't a \newline sequence. */
1519 static int
1520 getsc_bn(void)
1522 int c, c2;
1524 if (ignore_backslash_newline)
1525 return getsc_();
1527 if (backslash_skip == 1) {
1528 backslash_skip = 2;
1529 return getsc_();
1532 backslash_skip = 0;
1534 while (1) {
1535 c = getsc_();
1536 if (c == '\\') {
1537 if ((c2 = getsc_()) == '\n')
1538 /* ignore the \newline; get the next char... */
1539 continue;
1540 ungetsc(c2);
1541 backslash_skip = 1;
1543 return c;
1547 static Lex_state *
1548 push_state_(State_info *si, Lex_state *old_end)
1550 Lex_state *new = alloc(sizeof(Lex_state) * STATE_BSIZE, ATEMP);
1552 new[0].ls_info.base = old_end;
1553 si->base = &new[0];
1554 si->end = &new[STATE_BSIZE];
1555 return &new[1];
1558 static Lex_state *
1559 pop_state_(State_info *si, Lex_state *old_end)
1561 Lex_state *old_base = si->base;
1563 si->base = old_end->ls_info.base - STATE_BSIZE;
1564 si->end = old_end->ls_info.base;
1566 afree(old_base, ATEMP);
1568 return si->base + STATE_BSIZE - 1;;