option.c: fixed warnings
[k8jam.git] / src / scan.c
blobf53bd717cd3347d777959a4d146e732b64c94bfe
1 /* coded by Ketmar // Vampire Avalon (psyc://ketmar.no-ip.org/~Ketmar)
2 * Understanding is not required. Only obedience.
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, version 3 of the License ONLY.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 * scan.c - the jam yacc scanner
19 #include "jam.h"
20 #include "lists.h"
21 #include "parse.h"
22 #include "scan.h"
23 #include "jamgram.h"
24 #include "jambase.h"
25 #include "newstr.h"
26 #include "dstrings.h"
29 token_t yylval;
32 typedef struct {
33 const char *word;
34 int type;
35 } keyword_t;
38 static const keyword_t keywords[] = {
39 #include "jamgramtab.h"
40 {0,0}
44 typedef struct include_s {
45 struct include_s *next; /* next serial include file */
46 const char *string; /* pointer into current line */
47 char **strings; /* for yyfparse() -- text to parse */
48 FILE *file; /* for yyfparse() -- file being read */
49 const char *fname; /* for yyfparse() -- file name */
50 int line; /* line counter for error messages */
51 int pos; /* position for error messages */
52 //int back_count; /* # of yyunget()ed chars */
53 //char back_chars[2]; /* buffer for yyunget()ed chars */
54 char *fcontents; /* for yyfparse() -- file contents */
55 int prevwasn; /* !0: increment line and reset to 0 */
56 } include_t;
58 static include_t *incp = NULL; /* current file; head of chain */
60 /* hack to stop segfaulting when last string contains ';' without space before it */
61 static int s_back_count = 0; /* # of yyunget()ed chars */
62 static char s_back_chars[2]; /* buffer for yyunget()ed chars */
65 static int scan_mode = SCAN_NORMAL;
66 /*static int any_errors = 0;*/
68 static const char *symdump (const token_t *s);
71 #ifndef NDEBUG
72 static const char *mnames[] = {
73 "SCAN_NORMAL",
74 "SCAN_STRING",
75 "SCAN_PUNCT",
77 #endif
81 * Set parser mode: normal, string, or keyword
83 void yymode (int n) {
84 #ifndef NDEBUG
85 if (DEBUG_SCAN && scan_mode != n) printf("**MODE TRANSITION: %s --> %s\n", mnames[scan_mode], mnames[n]);
86 #endif
87 scan_mode = n;
91 void yyerror (const token_t *tk, const char *s) {
92 printf("ERROR(%d:%d) '%s': %s\n", tk->line, tk->pos, tk->file, s);
93 exit(EXITBAD); /* exit now */
97 static void yywarning_ex (const char *s) {
98 printf("WARNING(%d:%d) '%s': %s\n", incp->line, incp->pos, incp->fname, s);
102 void yyfparse (const char *s) {
103 include_t *i = (include_t *)malloc(sizeof(*i));
104 /* push this onto the incp chain */
105 i->string = "";
106 i->strings = NULL;
107 i->file = NULL;
108 //i->fname = strdup(s);
109 i->fname = newstr(s);
110 i->line = 0;
111 i->pos = 0;
112 i->next = incp;
113 //i->back_count = 0;
114 i->fcontents = NULL;
115 i->prevwasn = 1;
116 incp = i;
117 /* if the filename is "::Jambase", it means use the internal jambase */
118 if (strcmp(s, "::Jambase") == 0) {
119 jambase_unpack();
120 i->strings = jambase;
126 * yychar() - read new line and return first character
128 * fabricates a continuous stream of characters across include files, returning EOF at the bitter end
130 static int yychar (void) {
131 include_t *i = incp;
132 if (s_back_count) {
133 //fprintf(stderr, "GET unget: %d\n", s_back_chars[s_back_count-1]);
134 return s_back_chars[--s_back_count];
136 if (!incp) return EOF;
137 /* once we start reading from the input stream, we reset the
138 * include insertion point so that the next include file becomes
139 * the head of the list */
140 /* if there is more data in this line, return it */
141 if (i->prevwasn) { i->prevwasn = 0; ++i->line; i->pos = 0; }
142 again:
143 ++i->pos;
144 if (*i->string) {
145 if (*i->string == '\n') i->prevwasn = 1;
146 return *i->string++;
148 /* if we're reading from an internal string list, go to the next string */
149 if (i->strings) {
150 if (!*i->strings) goto next;
151 i->string = *(i->strings++);
152 return *i->string++;
154 /* if necessary, open the file and get file contents */
155 if (!i->file) {
156 FILE *f;
157 long fsize;
158 if ((f = fopen(i->fname, "rb")) == NULL) perror(i->fname);
159 i->file = f;
160 if (fseek(f, 0, SEEK_END) < 0) perror(i->fname);
161 if ((fsize = ftell(f)) < 0) perror(i->fname);
162 if (fseek(f, 0, SEEK_SET) < 0) perror(i->fname);
163 if (fsize > 1024*1024*64) {
164 fprintf(stderr, "FATAL: input file (%s) too big!\n", i->fname);
165 abort();
167 i->fcontents = calloc((unsigned)fsize+1, 1);
168 if (fsize > 0 && fread(i->fcontents, fsize, 1, f) != 1) perror(i->fname);
169 fclose(f); /* don't need to hold it opened */
170 i->string = i->fcontents;
171 goto again;
173 next:
174 /* this include is done */
175 /* free it up and return EOF so yyparse() returns to parse_file() */
176 incp = i->next;
177 /* close file, free name */
178 if (i->fcontents != NULL) free(i->fcontents);
179 //if (i->fname != NULL) free(i->fname);
180 free(i);
181 return EOF;
186 * yychar() - back up one character
188 static inline void yyunget (int c) {
189 if (c != EOF) {
190 if (s_back_count >= 2) { fprintf(stderr, "yyunget: too much!\n"); abort(); }
191 s_back_chars[s_back_count++] = c;
192 //fprintf(stderr, "UNGET: %d\n", c);
197 /* eat white space */
198 static int skip_spaces (int c) {
199 for (;;) {
200 /* skip past white space */
201 while (c != EOF && isspace(c)) {
202 yylval.line = incp->line;
203 yylval.pos = incp->pos;
204 c = yychar();
206 /* not a comment? swallow up comment line */
207 if (c != '#') break;
208 while ((c = yychar()) != EOF && c != '\n') ;
210 return c;
214 static int digit (int c, int base) {
215 if (c == EOF) return -1;
216 if (c >= 'a' && c <= 'z') c -= 32;
217 if (c < '0' || (c > '9' && c < 'A') || c > 'Z') return -1;
218 if ((c -= '0') > 9) c -= 7;
219 if (c >= base) return -1;
220 return c;
224 /* textlen includes trailing zero */
225 static void remove_indent (char *text, int textlen, int indent) {
226 if (indent > 0) {
227 while (*text) {
228 char *eol = strchr(text, '\n');
229 if (eol == NULL) eol = text+textlen-1;
230 if (eol-text >= indent) {
231 textlen -= indent;
232 eol -= indent;
233 memmove(text, text+indent, textlen);
235 if (!eol[0]) break;
236 textlen -= eol+1-text;
237 text = eol+1;
243 static inline const keyword_t *find_keyword (const char *nbuf, size_t nblen) {
244 if (nblen > 0) {
245 for (const keyword_t *k = keywords; k->word; ++k) if (strncmp(k->word, nbuf, nblen) == 0 && k->word[nblen] == 0) return k;
247 return NULL;
252 * yylex() - set yylval to current token; return its type
255 #define PUSH_CHAR(_c) do { \
256 if (sbused+1 >= sbsize) { \
257 int newsz = ((sbused+1)|0x7ff)+1; \
258 char *nb = realloc(sbuf, newsz); \
259 if (nb == NULL) { fprintf(stderr, "FATAL: out of memory!\n"); abort(); } \
260 sbuf = nb; \
261 sbsize = newsz; \
263 sbuf[sbused++] = (_c); \
264 } while (0)
266 static char *sbuf = NULL;
267 static int sbsize = 0;
268 static int sbused;
271 /* "$(" already scanned and pushed */
272 /* return char after ")" */
273 int scan_varaccess (void) {
274 int c = yychar(), qch = 0, oc;
275 if (c == EOF) return c;
276 /* scan variable name */
277 while (c != EOF && c != '[' && c != ':') {
278 PUSH_CHAR(c);
279 oc = c;
280 c = yychar();
281 if (oc == ')') return c;
282 if (oc == '$' && c == '(') { PUSH_CHAR(c); c = scan_varaccess(); }
284 if (c == EOF) return c;
285 /* scan indexing; 'c' is not pushed */
286 if (c == '[') {
287 while (c != EOF && c != ']') {
288 PUSH_CHAR(c);
289 oc = c;
290 c = yychar();
291 if (oc == ')') return c;
292 if (oc == '$' && c == '(') { PUSH_CHAR(c); c = scan_varaccess(); }
294 /* find either selector or ')' */
295 while (c != EOF && c != ':') {
296 PUSH_CHAR(c);
297 oc = c;
298 c = yychar();
299 if (oc == ')') return c;
300 if (oc == '$' && c == '(') { PUSH_CHAR(c); c = scan_varaccess(); }
302 if (c == EOF) return c;
304 /* scan selectors; 'c' is not pushed */
305 while (c != EOF) {
306 if (qch != '\'' && c == '\\') {
307 /* screening */
308 PUSH_CHAR(c);
309 if ((c = yychar()) == EOF) break;
310 PUSH_CHAR(c);
311 c = yychar();
312 continue;
314 PUSH_CHAR(c);
315 oc = c;
316 c = yychar();
317 if (!qch && (oc == '"' || oc == '\'')) { qch = oc; continue; }
318 if (!qch && oc == ')') return c;
319 if (qch != '\'' && oc == '$' && c == '(') {
320 PUSH_CHAR(c);
321 c = scan_varaccess();
322 continue;
324 if (qch && oc == qch) {
325 if (!(qch == '\'' && c == '\'')) qch = 0;
326 continue;
329 return c;
333 int yylex (void) {
334 const keyword_t *kw;
335 int c;
336 sbused = 0;
337 yylval.strlit = 0;
338 if (!incp) goto eof;
339 yylval.strlit = 0; /* expand this string */
340 yylval.line = incp->line;
341 yylval.pos = incp->pos;
342 yylval.file = incp->fname;
343 /* get first character (whitespace or of token) */
344 c = yychar();
345 if (scan_mode == SCAN_STRING) {
346 /* if scanning for a string (action's {}'s), look for the closing brace */
347 /* we handle matching braces, if they match! */
348 int nest = 1, indent = -1, cind, bol;
349 /* skip spaces and newline */
350 while (c != EOF && c != '\n' && isspace(c)) c = yychar();
351 if (c == '\n') c = yychar();
352 /* collect string, caclucate indent */
353 cind = 0;
354 bol = 1;
355 while (c != EOF) {
356 if (c == '{') ++nest;
357 else if (c == '}' && !--nest) break;
358 /* indent calculation */
359 if (c == '\n') {
360 cind = 0;
361 bol = 1;
362 } else if (bol) {
363 if (isspace(c)) {
364 ++cind;
365 } else {
366 bol = 0;
367 if (indent < 0 || cind < indent) indent = cind;
370 PUSH_CHAR(c);
371 c = yychar();
373 /* we ate the ending brace -- regurgitate it */
374 if (c != EOF) yyunget(c);
375 /* check obvious errors */
376 if (nest) { yyerror(&yylval, "unmatched {} in action block"); goto eof; }
377 /* remove trailing newlines and spaces, add one newline */
378 while (sbused > 0 && isspace(sbuf[sbused-1])) --sbused;
379 PUSH_CHAR('\n');
380 PUSH_CHAR(0);
381 if (indent > 0) {
382 //fprintf(stderr, "=== %d ===\n%s===\n", indent, sbuf);
383 remove_indent(sbuf, sbused, indent);
384 //fprintf(stderr, "--- %d ---\n%s---\n", indent, sbuf);
386 yylval.type = T_STRING;
387 yylval.string = newstr(sbuf);
388 yymode(SCAN_NORMAL);
389 } else {
390 int keyword = 0, qch = 0;
391 int n;
392 c = skip_spaces(c);
393 /* c now contains the first character of a token */
394 if (c == EOF) goto eof;
395 /* special thingy: single-quoted string */
396 if (c == '\'') {
397 for (c = yychar(); c != EOF; c = yychar()) {
398 if (c == '\'') {
399 /* check for special case: "''" */
400 if ((c = yychar()) != '\'') {
401 if (c != EOF && !isspace(c)) yyunget(c);
402 break;
405 PUSH_CHAR(c);
407 PUSH_CHAR(0);
408 yylval.type = T_ARG;
409 yylval.strlit = 1; /* don't expand this string */
410 yylval.string = newstr(sbuf);
411 goto lexret;
413 /* 'normal' mode */
414 keyword = (scan_mode == SCAN_NORMAL && isalpha(c)) || (scan_mode == SCAN_PUNCT && !isalnum(c)); /* maybe */
415 //if (DEBUG_SCAN) printf("mode: %d; char: '%c'; keyword: %d\n", scan_mode, c, keyword);
416 /* look for white space to delimit word */
417 /* \ protects next character */
418 for (; c != EOF; c = yychar()) {
419 /* check if this is var access */
420 if (c == '$') {
421 keyword = 0;
422 PUSH_CHAR(c);
423 if ((c = yychar()) == EOF) break;
424 if (c == '(') {
425 PUSH_CHAR(c);
426 c = scan_varaccess();
427 yyunget(c);
428 continue;
430 if (!qch) {
431 if (isalnum(c) || c == '_' || c == '-' || c == '<' || c == '>') yywarning_ex("\"$x\" -- maybe you want \"$(x\" instead?");
434 /* check for some common bugs */
435 if (!qch && c == '(') {
436 int nc = yychar();
437 yyunget(nc);
438 if (nc == '$') yywarning_ex("\"($\" -- maybe you want \"$(\" instead?");
439 if (((sbused > 0 && !isalnum(sbuf[sbused-1])) || (sbused == 0)) &&
440 (isalnum(nc) || nc == '_' || nc == '-' || nc == '<' || nc == '>')) yywarning_ex("\"(x\" -- maybe you want \"$(x\" instead?");
442 /* 'c' is not pushed yet */
443 if (!qch && scan_mode == SCAN_PUNCT) {
444 /* we are in list, the only possible keywords follows */
445 if (strchr("{}[];", c) != NULL) {
446 if (sbused == 0) {
447 keyword = 1;
448 PUSH_CHAR(c);
449 c = ' ';
451 break;
454 if (!qch && (isspace(c) || c == '\'')) break;
455 if (!qch && scan_mode == SCAN_NORMAL && c != '"' && c != '\'' && !isalnum(c)) {
456 /* check if this char (and possibly next) forms non-alnum token */
457 PUSH_CHAR(c);
458 if ((c = yychar()) != EOF) {
459 /* try 2-char tokens */
460 PUSH_CHAR(c);
461 if ((kw = find_keyword(sbuf+sbused-2, 2)) != NULL) {
462 if (sbused == 2) {
463 /* wow! token! */
464 yylval.type = kw->type;
465 yylval.string = kw->word; /* used by symdump */
466 goto lexret;
468 yywarning_ex("non-alpha token without whitespace");
469 /* return this 2 chars */
470 yyunget(sbuf[--sbused]);
471 yyunget(sbuf[--sbused]);
472 c = ' ';
473 break;
475 /* return one char back */
476 --sbused;
477 yyunget(c);
479 /* try 1-char token */
480 if (sbused > 1 && sbuf[sbused-1] == '=' && isalnum(sbuf[sbused-2])) goto skipkwone;
481 if (sbused == 1 && sbuf[sbused-1] == '!') {
482 int nc = yychar();
483 yyunget(nc);
484 if (isalnum(nc) || nc == '-' || nc == '_') goto skipkwone;
486 if ((kw = find_keyword(sbuf+sbused-1, 1)) != NULL) {
487 if (sbused == 1) {
488 /* wow! token! */
489 yylval.type = kw->type;
490 yylval.string = kw->word; /* used by symdump */
491 goto lexret;
493 if (strchr("{}[];", sbuf[sbused-1]) == NULL) yywarning_ex("non-alpha token without whitespace");
494 /* return this char */
495 yyunget(sbuf[--sbused]);
496 c = ' ';
497 break;
499 skipkwone:
500 /* pop this char and process it as usual */
501 c = sbuf[--sbused];
503 /* check for quoting */
504 if (qch && c == qch) {
505 qch = 0;
506 continue;
508 if (!qch && c == '"') {
509 keyword = 0;
510 qch = c;
511 continue;
513 /* screened char? */
514 if (c == '\\') {
515 keyword = 0;
516 if ((c = yychar()) == EOF) break;
517 if (qch) {
518 /* in string */
519 switch (c) {
520 case 'a': PUSH_CHAR('\a'); break;
521 case 'b': PUSH_CHAR('\b'); break;
522 case 'e': PUSH_CHAR('\x1b'); break;
523 case 'f': PUSH_CHAR('\f'); break;
524 case 'n': PUSH_CHAR('\n'); break;
525 case 'r': PUSH_CHAR('\r'); break;
526 case 't': PUSH_CHAR('\t'); break;
527 case 'v': PUSH_CHAR('\v'); break;
528 case 'x':
529 // first digit
530 if ((c = yychar()) == EOF) { yyerror(&yylval, "invalid hex escape in quoted string"); goto eof; }
531 if ((n = digit(c, 16)) < 0) { yyerror(&yylval, "invalid hex escape in quoted string"); goto eof; }
532 // second digit
533 if ((c = yychar()) != EOF) {
534 int d = digit(c, 16);
535 if (d < 0) yyunget(c); else n = (n*16)+d;
537 if (n == 0) { yyerror(&yylval, "invalid hex escape in quoted string"); goto eof; }
538 PUSH_CHAR(n);
539 break;
540 //TODO: add '\uXXXX'?
541 default:
542 if (isalnum(c)) { yyerror(&yylval, "invalid escape in quoted string"); goto eof; }
543 PUSH_CHAR(c);
544 break;
546 } else {
547 /* not in string */
548 PUSH_CHAR(c);
550 continue;
552 /* normal char */
553 if (scan_mode == SCAN_NORMAL) {
554 if (keyword && !isalpha(c)) keyword = 0;
555 } else if (scan_mode == SCAN_PUNCT) {
556 if (keyword && isalnum(c)) keyword = 0;
558 PUSH_CHAR(c);
560 /* we looked ahead a character -- back up */
561 /* don't return spaces, they will be skipped on next call anyway */
562 if (c != EOF && !isspace(c)) yyunget(c);
563 /* check obvious errors */
564 if (qch) { yyerror(&yylval, "unmatched \" in string"); goto eof; }
565 PUSH_CHAR(0);
566 /*if (DEBUG_SCAN) printf("keyword: %d; str='%s' (%d)\n", keyword, sbuf, sbused);*/
567 /* scan token table */
568 yylval.type = T_ARG;
569 if (keyword && sbused > 0) {
570 /* find token */
571 if ((kw = find_keyword(sbuf, sbused-1)) != NULL) {
572 yylval.type = kw->type;
573 yylval.string = kw->word; /* used by symdump */
576 if (yylval.type == T_ARG) yylval.string = newstr(sbuf);
578 lexret:
579 if (DEBUG_SCAN) printf("scan %s\n", symdump(&yylval));
580 return yylval.type;
581 eof:
582 yylval.type = 0; /* 0 is EOF for lemon */
583 return yylval.type;
586 #undef PUSH_CHAR
589 static const char *symdump (const token_t *s) {
590 static char *buf = NULL;
591 static int bufsz = 0;
592 int nsz;
593 if (s->type == EOF) return "EOF";
594 nsz = strlen(s->string)+128;
595 if (nsz > bufsz) {
596 char *nb = realloc(buf, nsz);
597 if (nb == NULL) { fprintf(stderr, "FATAL: out of memory!\n"); abort(); }
598 buf = nb;
599 bufsz = nsz;
601 switch (s->type) {
602 case 0: sprintf(buf, "unknown symbol <%s>", s->string); break;
603 case T_ARG: sprintf(buf, "argument <%s>", s->string); break;
604 case T_STRING: sprintf(buf, "string \"%s\"", s->string); break;
605 default: sprintf(buf, "keyword `%s`", s->string); break;
607 return buf;