2 * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc.
4 * This file is part of Jam - see jam.c for Copyright information.
7 * scan.c - the jam yacc scanner
9 * 12/26/93 (seiwald) - bump buf in yylex to 10240 - yuk.
10 * 09/16/94 (seiwald) - check for overflows, unmatched {}'s, etc.
11 * Also handle tokens abutting EOF by remembering
12 * to return EOF now matter how many times yylex()
14 * 02/11/95 (seiwald) - honor only punctuation keywords if SCAN_PUNCT.
15 * 07/27/95 (seiwald) - Include jamgram.h after scan.h, so that YYSTYPE is
16 * defined before Linux's yacc tries to redefine it.
17 * 01/10/01 (seiwald) - \ can now escape any whitespace char
18 * 11/04/02 (seiwald) - const-ing for string literals
39 static const keyword_t keywords
[] = {
40 #include "jamgramtab.h"
45 typedef struct include_s
{
46 struct include_s
*next
; /* next serial include file */
47 const char *string
; /* pointer into current line */
48 char **strings
; /* for yyfparse() -- text to parse */
49 FILE *file
; /* for yyfparse() -- file being read */
50 const char *fname
; /* for yyfparse() -- file name */
51 int line
; /* line counter for error messages */
52 int pos
; /* position for error messages */
53 int back_count
; /* # of yyunget()ed chars */
54 char back_chars
[2]; /* buffer for yyunget()ed chars */
55 char *fcontents
; /* for yyfparse() -- file contents */
56 int prevwasn
; /* !0: increment line and reset to 0 */
59 static include_t
*incp
= NULL
; /* current file; head of chain */
62 static int scan_mode
= SCAN_NORMAL
;
63 /*static int any_errors = 0;*/
65 static const char *symdump (const token_t
*s
);
69 static const char *mnames
[] = {
78 * Set parser mode: normal, string, or keyword
82 if (DEBUG_SCAN
&& scan_mode
!= n
) printf("**MODE TRANSITION: %s --> %s\n", mnames
[scan_mode
], mnames
[n
]);
88 void yyerror (const token_t
*tk
, const char *s
) {
89 printf("ERROR(%d:%d) '%s': %s\n", tk
->line
, tk
->pos
, tk
->file
, s
);
90 exit(EXITBAD
); /* exit now */
94 static void yywarning_ex (const char *s
) {
95 printf("WARNING(%d:%d) '%s': %s\n", incp
->line
, incp
->pos
, incp
->fname
, s
);
99 void yyfparse (const char *s
) {
100 include_t
*i
= (include_t
*)malloc(sizeof(*i
));
101 /* push this onto the incp chain */
105 //i->fname = strdup(s);
106 i
->fname
= newstr(s
);
114 /* if the filename is "::Jambase", it means use the internal jambase */
115 if (strcmp(s
, "::Jambase") == 0) {
117 i
->strings
= jambase
;
123 * yychar() - read new line and return first character
125 * fabricates a continuous stream of characters across include files, returning EOF at the bitter end
127 static int yychar (void) {
129 if (!incp
) return EOF
;
130 /* once we start reading from the input stream, we reset the
131 * include insertion point so that the next include file becomes
132 * the head of the list */
133 if (i
->back_count
) return i
->back_chars
[--i
->back_count
];
134 /* if there is more data in this line, return it */
135 if (i
->prevwasn
) { i
->prevwasn
= 0; ++i
->line
; i
->pos
= 0; }
139 if (*i
->string
== '\n') i
->prevwasn
= 1;
142 /* if we're reading from an internal string list, go to the next string */
144 if (!*i
->strings
) goto next
;
145 i
->string
= *(i
->strings
++);
148 /* if necessary, open the file and get file contents */
152 if ((f
= fopen(i
->fname
, "rb")) == NULL
) perror(i
->fname
);
154 if (fseek(f
, 0, SEEK_END
) < 0) perror(i
->fname
);
155 if ((fsize
= ftell(f
)) < 0) perror(i
->fname
);
156 if (fseek(f
, 0, SEEK_SET
) < 0) perror(i
->fname
);
157 i
->fcontents
= calloc(fsize
+1, 1);
158 if (fsize
> 0 && fread(i
->fcontents
, fsize
, 1, f
) != 1) perror(i
->fname
);
159 fclose(f
); /* don't need to hold it opened */
160 i
->string
= i
->fcontents
;
164 /* this include is done */
165 /* free it up and return EOF so yyparse() returns to parse_file() */
167 /* close file, free name */
168 if (i
->fcontents
!= NULL
) free(i
->fcontents
);
169 //if (i->fname != NULL) free(i->fname);
176 * yychar() - back up one character
178 static inline void yyunget (int c
) {
180 if (incp
->back_count
>= 2) { fprintf(stderr
, "yyunget: too much!\n"); abort(); }
181 incp
->back_chars
[incp
->back_count
++] = c
;
186 /* eat white space */
187 static int skip_spaces (int c
) {
189 /* skip past white space */
190 while (c
!= EOF
&& isspace(c
)) {
191 yylval
.line
= incp
->line
;
192 yylval
.pos
= incp
->pos
;
195 /* not a comment? swallow up comment line */
197 while ((c
= yychar()) != EOF
&& c
!= '\n') ;
203 static int digit (int c
, int base
) {
204 if (c
== EOF
) return -1;
205 if (c
>= 'a' && c
<= 'z') c
-= 32;
206 if (c
< '0' || (c
> '9' && c
< 'A') || c
> 'Z') return -1;
207 if ((c
-= '0') > 9) c
-= 7;
208 if (c
>= base
) return -1;
213 /* textlen includes trailing zero */
214 static void remove_indent (char *text
, int textlen
, int indent
) {
217 char *eol
= strchr(text
, '\n');
218 if (eol
== NULL
) eol
= text
+textlen
-1;
219 if (eol
-text
>= indent
) {
222 memmove(text
, text
+indent
, textlen
);
225 textlen
-= eol
+1-text
;
232 static inline const keyword_t
*find_keyword (const char *nbuf
, size_t nblen
) {
234 for (const keyword_t
*k
= keywords
; k
->word
; ++k
) if (strncmp(k
->word
, nbuf
, nblen
) == 0 && k
->word
[nblen
] == 0) return k
;
241 * yylex() - set yylval to current token; return its type
244 #define PUSH_CHAR(_c) do { \
245 if (sbused+1 >= sbsize) { \
246 int newsz = ((sbused+1)|0x7ff)+1; \
247 char *nb = realloc(sbuf, newsz); \
248 if (nb == NULL) { fprintf(stderr, "FATAL: out of memory!\n"); abort(); } \
252 sbuf[sbused++] = (_c); \
255 static char *sbuf
= NULL
;
256 static int sbsize
= 0;
260 /* "$(" already scanned and pushed */
261 /* return char after ")" */
262 int scan_varaccess (void) {
263 int c
= yychar(), qch
= 0, oc
;
264 if (c
== EOF
) return c
;
265 /* scan variable name */
266 while (c
!= EOF
&& c
!= '[' && c
!= ':') {
270 if (oc
== ')') return c
;
271 if (oc
== '$' && c
== '(') { PUSH_CHAR(c
); c
= scan_varaccess(); }
273 if (c
== EOF
) return c
;
274 /* scan indexing; 'c' is not pushed */
276 while (c
!= EOF
&& c
!= ']') {
280 if (oc
== ')') return c
;
281 if (oc
== '$' && c
== '(') { PUSH_CHAR(c
); c
= scan_varaccess(); }
283 /* find either selector or ')' */
284 while (c
!= EOF
&& c
!= ':') {
288 if (oc
== ')') return c
;
289 if (oc
== '$' && c
== '(') { PUSH_CHAR(c
); c
= scan_varaccess(); }
291 if (c
== EOF
) return c
;
293 /* scan selectors; 'c' is not pushed */
295 if (qch
!= '\'' && c
== '\\') {
298 if ((c
= yychar()) == EOF
) break;
306 if (!qch
&& (oc
== '"' || oc
== '\'')) { qch
= oc
; continue; }
307 if (!qch
&& oc
== ')') return c
;
308 if (qch
!= '\'' && oc
== '$' && c
== '(') {
310 c
= scan_varaccess();
313 if (qch
&& oc
== qch
) {
314 if (!(qch
== '\'' && c
== '\'')) qch
= 0;
328 yylval
.strlit
= 0; /* expand this string */
329 yylval
.line
= incp
->line
;
330 yylval
.pos
= incp
->pos
;
331 yylval
.file
= incp
->fname
;
332 /* get first character (whitespace or of token) */
334 if (scan_mode
== SCAN_STRING
) {
335 /* if scanning for a string (action's {}'s), look for the closing brace */
336 /* we handle matching braces, if they match! */
337 int nest
= 1, indent
= -1, cind
, bol
;
338 /* skip spaces and newline */
339 while (c
!= EOF
&& c
!= '\n' && isspace(c
)) c
= yychar();
340 if (c
== '\n') c
= yychar();
341 /* collect string, caclucate indent */
345 if (c
== '{') ++nest
;
346 else if (c
== '}' && !--nest
) break;
347 /* indent calculation */
356 if (indent
< 0 || cind
< indent
) indent
= cind
;
362 /* we ate the ending brace -- regurgitate it */
363 if (c
!= EOF
) yyunget(c
);
364 /* check obvious errors */
365 if (nest
) { yyerror(&yylval
, "unmatched {} in action block"); goto eof
; }
366 /* remove trailing newlines and spaces, add one newline */
367 while (sbused
> 0 && isspace(sbuf
[sbused
-1])) --sbused
;
371 //fprintf(stderr, "=== %d ===\n%s===\n", indent, sbuf);
372 remove_indent(sbuf
, sbused
, indent
);
373 //fprintf(stderr, "--- %d ---\n%s---\n", indent, sbuf);
375 yylval
.type
= T_STRING
;
376 yylval
.string
= newstr(sbuf
);
379 int keyword
= 0, qch
= 0;
382 /* c now contains the first character of a token */
383 if (c
== EOF
) goto eof
;
384 /* special thingy: single-quoted string */
386 for (c
= yychar(); c
!= EOF
; c
= yychar()) {
388 /* check for special case: "''" */
389 if ((c
= yychar()) != '\'') {
390 if (c
!= EOF
&& !isspace(c
)) yyunget(c
);
398 yylval
.strlit
= 1; /* don't expand this string */
399 yylval
.string
= newstr(sbuf
);
403 keyword
= (scan_mode
== SCAN_NORMAL
&& isalpha(c
)) || (scan_mode
== SCAN_PUNCT
&& !isalnum(c
)); /* maybe */
404 //if (DEBUG_SCAN) printf("mode: %d; char: '%c'; keyword: %d\n", scan_mode, c, keyword);
405 /* look for white space to delimit word */
406 /* \ protects next character */
407 for (; c
!= EOF
; c
= yychar()) {
408 /* check if this is var access */
412 if ((c
= yychar()) == EOF
) break;
415 c
= scan_varaccess();
419 if (isalnum(c
) || c
== '_' || c
== '-' || c
== '<' || c
== '>') yywarning_ex("\"$x\" -- maybe you want \"$(x\" instead?");
421 /* check for some common bugs */
422 if (!qch
&& c
== '(') {
425 if (nc
== '$') yywarning_ex("\"($\" -- maybe you want \"$(\" instead?");
426 if (((sbused
> 0 && !isalnum(sbuf
[sbused
-1])) || (sbused
== 0)) &&
427 (isalnum(nc
) || nc
== '_' || nc
== '-' || nc
== '<' || nc
== '>')) yywarning_ex("\"(x\" -- maybe you want \"$(x\" instead?");
429 /* 'c' is not pushed yet */
430 if (!qch
&& scan_mode
== SCAN_PUNCT
) {
431 /* we are in list, the only possible keywords follows */
432 if (strchr("{}[];", c
) != NULL
) {
441 if (!qch
&& (isspace(c
) || c
== '\'')) break;
442 if (!qch
&& scan_mode
== SCAN_NORMAL
&& c
!= '"' && c
!= '\'' && !isalnum(c
)) {
443 /* check if this char (and possibly next) forms non-alnum token */
445 if ((c
= yychar()) != EOF
) {
446 /* try 2-char tokens */
448 if ((kw
= find_keyword(sbuf
+sbused
-2, 2)) != NULL
) {
451 yylval
.type
= kw
->type
;
452 yylval
.string
= kw
->word
; /* used by symdump */
455 yywarning_ex("non-alpha token without whitespace");
456 /* return this 2 chars */
457 yyunget(sbuf
[--sbused
]);
458 yyunget(sbuf
[--sbused
]);
462 /* return one char back */
466 /* try 1-char token */
467 if (sbused
> 1 && sbuf
[sbused
-1] == '=' && isalnum(sbuf
[sbused
-2])) goto skipkwone
;
468 if (sbused
== 1 && sbuf
[sbused
-1] == '!') {
471 if (isalnum(nc
) || nc
== '-' || nc
== '_') goto skipkwone
;
473 if ((kw
= find_keyword(sbuf
+sbused
-1, 1)) != NULL
) {
476 yylval
.type
= kw
->type
;
477 yylval
.string
= kw
->word
; /* used by symdump */
480 if (strchr("{}[];", sbuf
[sbused
-1]) == NULL
) yywarning_ex("non-alpha token without whitespace");
481 /* return this char */
482 yyunget(sbuf
[--sbused
]);
487 /* pop this char and process it as usual */
490 /* check for quoting */
491 if (qch
&& c
== qch
) {
495 if (!qch
&& c
== '"') {
503 if ((c
= yychar()) == EOF
) break;
507 case 'a': PUSH_CHAR('\a'); break;
508 case 'b': PUSH_CHAR('\b'); break;
509 case 'e': PUSH_CHAR('\x1b'); break;
510 case 'f': PUSH_CHAR('\f'); break;
511 case 'n': PUSH_CHAR('\n'); break;
512 case 'r': PUSH_CHAR('\r'); break;
513 case 't': PUSH_CHAR('\t'); break;
514 case 'v': PUSH_CHAR('\v'); break;
517 if ((c
= yychar()) == EOF
) { yyerror(&yylval
, "invalid hex escape in quoted string"); goto eof
; }
518 if ((n
= digit(c
, 16)) < 0) { yyerror(&yylval
, "invalid hex escape in quoted string"); goto eof
; }
520 if ((c
= yychar()) != EOF
) {
521 int d
= digit(c
, 16);
522 if (d
< 0) yyunget(c
); else n
= (n
*16)+d
;
524 if (n
== 0) { yyerror(&yylval
, "invalid hex escape in quoted string"); goto eof
; }
527 //TODO: add '\uXXXX'?
529 if (isalnum(c
)) { yyerror(&yylval
, "invalid escape in quoted string"); goto eof
; }
540 if (scan_mode
== SCAN_NORMAL
) {
541 if (keyword
&& !isalpha(c
)) keyword
= 0;
542 } else if (scan_mode
== SCAN_PUNCT
) {
543 if (keyword
&& isalnum(c
)) keyword
= 0;
547 /* we looked ahead a character -- back up */
548 /* don't return spaces, they will be skipped on next call anyway */
549 if (c
!= EOF
&& !isspace(c
)) yyunget(c
);
550 /* check obvious errors */
551 if (qch
) { yyerror(&yylval
, "unmatched \" in string"); goto eof
; }
553 /*if (DEBUG_SCAN) printf("keyword: %d; str='%s' (%d)\n", keyword, sbuf, sbused);*/
554 /* scan token table */
556 if (keyword
&& sbused
> 0) {
558 if ((kw
= find_keyword(sbuf
, sbused
-1)) != NULL
) {
559 yylval
.type
= kw
->type
;
560 yylval
.string
= kw
->word
; /* used by symdump */
563 if (yylval
.type
== T_ARG
) yylval
.string
= newstr(sbuf
);
566 if (DEBUG_SCAN
) printf("scan %s\n", symdump(&yylval
));
569 yylval
.type
= 0; /* 0 is EOF for lemon */
576 static const char *symdump (const token_t
*s
) {
577 static char *buf
= NULL
;
578 static int bufsz
= 0;
580 if (s
->type
== EOF
) return "EOF";
581 nsz
= strlen(s
->string
)+128;
583 char *nb
= realloc(buf
, nsz
);
584 if (nb
== NULL
) { fprintf(stderr
, "FATAL: out of memory!\n"); abort(); }
589 case 0: sprintf(buf
, "unknown symbol <%s>", s
->string
); break;
590 case T_ARG
: sprintf(buf
, "argument <%s>", s
->string
); break;
591 case T_STRING
: sprintf(buf
, "string \"%s\"", s
->string
); break;
592 default: sprintf(buf
, "keyword `%s`", s
->string
); break;