experimental GDC dependencies scanner (turned off by default)
[k8jam.git] / src / scan.c
blob6e2cdb15f3e7896a68580e63244fd60776c4fa45
1 /* coded by Ketmar // Vampire Avalon (psyc://ketmar.no-ip.org/~Ketmar)
2 * Understanding is not required. Only obedience.
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 * scan.c - the jam yacc scanner
20 #include "jam.h"
21 #include "lists.h"
22 #include "parse.h"
23 #include "scan.h"
24 #include "jamgram.h"
25 #include "jambase.h"
26 #include "newstr.h"
27 #include "dstrings.h"
30 token_t yylval;
33 typedef struct {
34 const char *word;
35 int type;
36 } keyword_t;
39 static const keyword_t keywords[] = {
40 #include "jamgramtab.h"
41 {0,0}
45 typedef struct include_s {
46 struct include_s *next; /* next serial include file */
47 const char *string; /* pointer into current line */
48 char **strings; /* for yyfparse() -- text to parse */
49 FILE *file; /* for yyfparse() -- file being read */
50 const char *fname; /* for yyfparse() -- file name */
51 int line; /* line counter for error messages */
52 int pos; /* position for error messages */
53 //int back_count; /* # of yyunget()ed chars */
54 //char back_chars[2]; /* buffer for yyunget()ed chars */
55 char *fcontents; /* for yyfparse() -- file contents */
56 int prevwasn; /* !0: increment line and reset to 0 */
57 } include_t;
59 static include_t *incp = NULL; /* current file; head of chain */
61 /* hack to stop segfaulting when last string contains ';' without space before it */
62 static int s_back_count = 0; /* # of yyunget()ed chars */
63 static char s_back_chars[2]; /* buffer for yyunget()ed chars */
66 static int scan_mode = SCAN_NORMAL;
67 /*static int any_errors = 0;*/
69 static const char *symdump (const token_t *s);
72 #ifndef NDEBUG
73 static const char *mnames[] = {
74 "SCAN_NORMAL",
75 "SCAN_STRING",
76 "SCAN_PUNCT",
78 #endif
82 * Set parser mode: normal, string, or keyword
84 void yymode (int n) {
85 #ifndef NDEBUG
86 if (DEBUG_SCAN && scan_mode != n) printf("**MODE TRANSITION: %s --> %s\n", mnames[scan_mode], mnames[n]);
87 #endif
88 scan_mode = n;
92 void yyerror (const token_t *tk, const char *s) {
93 printf("ERROR(%d:%d) '%s': %s\n", tk->line, tk->pos, tk->file, s);
94 exit(EXITBAD); /* exit now */
98 static void yywarning_ex (const char *s) {
99 printf("WARNING(%d:%d) '%s': %s\n", incp->line, incp->pos, incp->fname, s);
103 void yyfparse (const char *s) {
104 include_t *i = (include_t *)malloc(sizeof(*i));
105 /* push this onto the incp chain */
106 i->string = "";
107 i->strings = NULL;
108 i->file = NULL;
109 //i->fname = strdup(s);
110 i->fname = newstr(s);
111 i->line = 0;
112 i->pos = 0;
113 i->next = incp;
114 //i->back_count = 0;
115 i->fcontents = NULL;
116 i->prevwasn = 1;
117 incp = i;
118 /* if the filename is "::Jambase", it means use the internal jambase */
119 if (strcmp(s, "::Jambase") == 0) {
120 jambase_unpack();
121 i->strings = jambase;
127 * yychar() - read new line and return first character
129 * fabricates a continuous stream of characters across include files, returning EOF at the bitter end
131 static int yychar (void) {
132 include_t *i = incp;
133 if (s_back_count) {
134 //fprintf(stderr, "GET unget: %d\n", s_back_chars[s_back_count-1]);
135 return s_back_chars[--s_back_count];
137 if (!incp) return EOF;
138 /* once we start reading from the input stream, we reset the
139 * include insertion point so that the next include file becomes
140 * the head of the list */
141 /* if there is more data in this line, return it */
142 if (i->prevwasn) { i->prevwasn = 0; ++i->line; i->pos = 0; }
143 again:
144 ++i->pos;
145 if (*i->string) {
146 if (*i->string == '\n') i->prevwasn = 1;
147 return *i->string++;
149 /* if we're reading from an internal string list, go to the next string */
150 if (i->strings) {
151 if (!*i->strings) goto next;
152 i->string = *(i->strings++);
153 return *i->string++;
155 /* if necessary, open the file and get file contents */
156 if (!i->file) {
157 FILE *f;
158 long fsize;
159 if ((f = fopen(i->fname, "rb")) == NULL) perror(i->fname);
160 i->file = f;
161 if (fseek(f, 0, SEEK_END) < 0) perror(i->fname);
162 if ((fsize = ftell(f)) < 0) perror(i->fname);
163 if (fseek(f, 0, SEEK_SET) < 0) perror(i->fname);
164 i->fcontents = calloc(fsize+1, 1);
165 if (fsize > 0 && fread(i->fcontents, fsize, 1, f) != 1) perror(i->fname);
166 fclose(f); /* don't need to hold it opened */
167 i->string = i->fcontents;
168 goto again;
170 next:
171 /* this include is done */
172 /* free it up and return EOF so yyparse() returns to parse_file() */
173 incp = i->next;
174 /* close file, free name */
175 if (i->fcontents != NULL) free(i->fcontents);
176 //if (i->fname != NULL) free(i->fname);
177 free(i);
178 return EOF;
183 * yychar() - back up one character
185 static inline void yyunget (int c) {
186 if (c != EOF) {
187 if (s_back_count >= 2) { fprintf(stderr, "yyunget: too much!\n"); abort(); }
188 s_back_chars[s_back_count++] = c;
189 //fprintf(stderr, "UNGET: %d\n", c);
194 /* eat white space */
195 static int skip_spaces (int c) {
196 for (;;) {
197 /* skip past white space */
198 while (c != EOF && isspace(c)) {
199 yylval.line = incp->line;
200 yylval.pos = incp->pos;
201 c = yychar();
203 /* not a comment? swallow up comment line */
204 if (c != '#') break;
205 while ((c = yychar()) != EOF && c != '\n') ;
207 return c;
211 static int digit (int c, int base) {
212 if (c == EOF) return -1;
213 if (c >= 'a' && c <= 'z') c -= 32;
214 if (c < '0' || (c > '9' && c < 'A') || c > 'Z') return -1;
215 if ((c -= '0') > 9) c -= 7;
216 if (c >= base) return -1;
217 return c;
221 /* textlen includes trailing zero */
222 static void remove_indent (char *text, int textlen, int indent) {
223 if (indent > 0) {
224 while (*text) {
225 char *eol = strchr(text, '\n');
226 if (eol == NULL) eol = text+textlen-1;
227 if (eol-text >= indent) {
228 textlen -= indent;
229 eol -= indent;
230 memmove(text, text+indent, textlen);
232 if (!eol[0]) break;
233 textlen -= eol+1-text;
234 text = eol+1;
240 static inline const keyword_t *find_keyword (const char *nbuf, size_t nblen) {
241 if (nblen > 0) {
242 for (const keyword_t *k = keywords; k->word; ++k) if (strncmp(k->word, nbuf, nblen) == 0 && k->word[nblen] == 0) return k;
244 return NULL;
249 * yylex() - set yylval to current token; return its type
252 #define PUSH_CHAR(_c) do { \
253 if (sbused+1 >= sbsize) { \
254 int newsz = ((sbused+1)|0x7ff)+1; \
255 char *nb = realloc(sbuf, newsz); \
256 if (nb == NULL) { fprintf(stderr, "FATAL: out of memory!\n"); abort(); } \
257 sbuf = nb; \
258 sbsize = newsz; \
260 sbuf[sbused++] = (_c); \
261 } while (0)
263 static char *sbuf = NULL;
264 static int sbsize = 0;
265 static int sbused;
268 /* "$(" already scanned and pushed */
269 /* return char after ")" */
270 int scan_varaccess (void) {
271 int c = yychar(), qch = 0, oc;
272 if (c == EOF) return c;
273 /* scan variable name */
274 while (c != EOF && c != '[' && c != ':') {
275 PUSH_CHAR(c);
276 oc = c;
277 c = yychar();
278 if (oc == ')') return c;
279 if (oc == '$' && c == '(') { PUSH_CHAR(c); c = scan_varaccess(); }
281 if (c == EOF) return c;
282 /* scan indexing; 'c' is not pushed */
283 if (c == '[') {
284 while (c != EOF && c != ']') {
285 PUSH_CHAR(c);
286 oc = c;
287 c = yychar();
288 if (oc == ')') return c;
289 if (oc == '$' && c == '(') { PUSH_CHAR(c); c = scan_varaccess(); }
291 /* find either selector or ')' */
292 while (c != EOF && c != ':') {
293 PUSH_CHAR(c);
294 oc = c;
295 c = yychar();
296 if (oc == ')') return c;
297 if (oc == '$' && c == '(') { PUSH_CHAR(c); c = scan_varaccess(); }
299 if (c == EOF) return c;
301 /* scan selectors; 'c' is not pushed */
302 while (c != EOF) {
303 if (qch != '\'' && c == '\\') {
304 /* screening */
305 PUSH_CHAR(c);
306 if ((c = yychar()) == EOF) break;
307 PUSH_CHAR(c);
308 c = yychar();
309 continue;
311 PUSH_CHAR(c);
312 oc = c;
313 c = yychar();
314 if (!qch && (oc == '"' || oc == '\'')) { qch = oc; continue; }
315 if (!qch && oc == ')') return c;
316 if (qch != '\'' && oc == '$' && c == '(') {
317 PUSH_CHAR(c);
318 c = scan_varaccess();
319 continue;
321 if (qch && oc == qch) {
322 if (!(qch == '\'' && c == '\'')) qch = 0;
323 continue;
326 return c;
330 int yylex (void) {
331 const keyword_t *kw;
332 int c;
333 sbused = 0;
334 yylval.strlit = 0;
335 if (!incp) goto eof;
336 yylval.strlit = 0; /* expand this string */
337 yylval.line = incp->line;
338 yylval.pos = incp->pos;
339 yylval.file = incp->fname;
340 /* get first character (whitespace or of token) */
341 c = yychar();
342 if (scan_mode == SCAN_STRING) {
343 /* if scanning for a string (action's {}'s), look for the closing brace */
344 /* we handle matching braces, if they match! */
345 int nest = 1, indent = -1, cind, bol;
346 /* skip spaces and newline */
347 while (c != EOF && c != '\n' && isspace(c)) c = yychar();
348 if (c == '\n') c = yychar();
349 /* collect string, caclucate indent */
350 cind = 0;
351 bol = 1;
352 while (c != EOF) {
353 if (c == '{') ++nest;
354 else if (c == '}' && !--nest) break;
355 /* indent calculation */
356 if (c == '\n') {
357 cind = 0;
358 bol = 1;
359 } else if (bol) {
360 if (isspace(c)) {
361 ++cind;
362 } else {
363 bol = 0;
364 if (indent < 0 || cind < indent) indent = cind;
367 PUSH_CHAR(c);
368 c = yychar();
370 /* we ate the ending brace -- regurgitate it */
371 if (c != EOF) yyunget(c);
372 /* check obvious errors */
373 if (nest) { yyerror(&yylval, "unmatched {} in action block"); goto eof; }
374 /* remove trailing newlines and spaces, add one newline */
375 while (sbused > 0 && isspace(sbuf[sbused-1])) --sbused;
376 PUSH_CHAR('\n');
377 PUSH_CHAR(0);
378 if (indent > 0) {
379 //fprintf(stderr, "=== %d ===\n%s===\n", indent, sbuf);
380 remove_indent(sbuf, sbused, indent);
381 //fprintf(stderr, "--- %d ---\n%s---\n", indent, sbuf);
383 yylval.type = T_STRING;
384 yylval.string = newstr(sbuf);
385 yymode(SCAN_NORMAL);
386 } else {
387 int keyword = 0, qch = 0;
388 int n;
389 c = skip_spaces(c);
390 /* c now contains the first character of a token */
391 if (c == EOF) goto eof;
392 /* special thingy: single-quoted string */
393 if (c == '\'') {
394 for (c = yychar(); c != EOF; c = yychar()) {
395 if (c == '\'') {
396 /* check for special case: "''" */
397 if ((c = yychar()) != '\'') {
398 if (c != EOF && !isspace(c)) yyunget(c);
399 break;
402 PUSH_CHAR(c);
404 PUSH_CHAR(0);
405 yylval.type = T_ARG;
406 yylval.strlit = 1; /* don't expand this string */
407 yylval.string = newstr(sbuf);
408 goto lexret;
410 /* 'normal' mode */
411 keyword = (scan_mode == SCAN_NORMAL && isalpha(c)) || (scan_mode == SCAN_PUNCT && !isalnum(c)); /* maybe */
412 //if (DEBUG_SCAN) printf("mode: %d; char: '%c'; keyword: %d\n", scan_mode, c, keyword);
413 /* look for white space to delimit word */
414 /* \ protects next character */
415 for (; c != EOF; c = yychar()) {
416 /* check if this is var access */
417 if (c == '$') {
418 keyword = 0;
419 PUSH_CHAR(c);
420 if ((c = yychar()) == EOF) break;
421 if (c == '(') {
422 PUSH_CHAR(c);
423 c = scan_varaccess();
424 yyunget(c);
425 continue;
427 if (!qch) {
428 if (isalnum(c) || c == '_' || c == '-' || c == '<' || c == '>') yywarning_ex("\"$x\" -- maybe you want \"$(x\" instead?");
431 /* check for some common bugs */
432 if (!qch && c == '(') {
433 int nc = yychar();
434 yyunget(nc);
435 if (nc == '$') yywarning_ex("\"($\" -- maybe you want \"$(\" instead?");
436 if (((sbused > 0 && !isalnum(sbuf[sbused-1])) || (sbused == 0)) &&
437 (isalnum(nc) || nc == '_' || nc == '-' || nc == '<' || nc == '>')) yywarning_ex("\"(x\" -- maybe you want \"$(x\" instead?");
439 /* 'c' is not pushed yet */
440 if (!qch && scan_mode == SCAN_PUNCT) {
441 /* we are in list, the only possible keywords follows */
442 if (strchr("{}[];", c) != NULL) {
443 if (sbused == 0) {
444 keyword = 1;
445 PUSH_CHAR(c);
446 c = ' ';
448 break;
451 if (!qch && (isspace(c) || c == '\'')) break;
452 if (!qch && scan_mode == SCAN_NORMAL && c != '"' && c != '\'' && !isalnum(c)) {
453 /* check if this char (and possibly next) forms non-alnum token */
454 PUSH_CHAR(c);
455 if ((c = yychar()) != EOF) {
456 /* try 2-char tokens */
457 PUSH_CHAR(c);
458 if ((kw = find_keyword(sbuf+sbused-2, 2)) != NULL) {
459 if (sbused == 2) {
460 /* wow! token! */
461 yylval.type = kw->type;
462 yylval.string = kw->word; /* used by symdump */
463 goto lexret;
465 yywarning_ex("non-alpha token without whitespace");
466 /* return this 2 chars */
467 yyunget(sbuf[--sbused]);
468 yyunget(sbuf[--sbused]);
469 c = ' ';
470 break;
472 /* return one char back */
473 --sbused;
474 yyunget(c);
476 /* try 1-char token */
477 if (sbused > 1 && sbuf[sbused-1] == '=' && isalnum(sbuf[sbused-2])) goto skipkwone;
478 if (sbused == 1 && sbuf[sbused-1] == '!') {
479 int nc = yychar();
480 yyunget(nc);
481 if (isalnum(nc) || nc == '-' || nc == '_') goto skipkwone;
483 if ((kw = find_keyword(sbuf+sbused-1, 1)) != NULL) {
484 if (sbused == 1) {
485 /* wow! token! */
486 yylval.type = kw->type;
487 yylval.string = kw->word; /* used by symdump */
488 goto lexret;
490 if (strchr("{}[];", sbuf[sbused-1]) == NULL) yywarning_ex("non-alpha token without whitespace");
491 /* return this char */
492 yyunget(sbuf[--sbused]);
493 c = ' ';
494 break;
496 skipkwone:
497 /* pop this char and process it as usual */
498 c = sbuf[--sbused];
500 /* check for quoting */
501 if (qch && c == qch) {
502 qch = 0;
503 continue;
505 if (!qch && c == '"') {
506 keyword = 0;
507 qch = c;
508 continue;
510 /* screened char? */
511 if (c == '\\') {
512 keyword = 0;
513 if ((c = yychar()) == EOF) break;
514 if (qch) {
515 /* in string */
516 switch (c) {
517 case 'a': PUSH_CHAR('\a'); break;
518 case 'b': PUSH_CHAR('\b'); break;
519 case 'e': PUSH_CHAR('\x1b'); break;
520 case 'f': PUSH_CHAR('\f'); break;
521 case 'n': PUSH_CHAR('\n'); break;
522 case 'r': PUSH_CHAR('\r'); break;
523 case 't': PUSH_CHAR('\t'); break;
524 case 'v': PUSH_CHAR('\v'); break;
525 case 'x':
526 // first digit
527 if ((c = yychar()) == EOF) { yyerror(&yylval, "invalid hex escape in quoted string"); goto eof; }
528 if ((n = digit(c, 16)) < 0) { yyerror(&yylval, "invalid hex escape in quoted string"); goto eof; }
529 // second digit
530 if ((c = yychar()) != EOF) {
531 int d = digit(c, 16);
532 if (d < 0) yyunget(c); else n = (n*16)+d;
534 if (n == 0) { yyerror(&yylval, "invalid hex escape in quoted string"); goto eof; }
535 PUSH_CHAR(n);
536 break;
537 //TODO: add '\uXXXX'?
538 default:
539 if (isalnum(c)) { yyerror(&yylval, "invalid escape in quoted string"); goto eof; }
540 PUSH_CHAR(c);
541 break;
543 } else {
544 /* not in string */
545 PUSH_CHAR(c);
547 continue;
549 /* normal char */
550 if (scan_mode == SCAN_NORMAL) {
551 if (keyword && !isalpha(c)) keyword = 0;
552 } else if (scan_mode == SCAN_PUNCT) {
553 if (keyword && isalnum(c)) keyword = 0;
555 PUSH_CHAR(c);
557 /* we looked ahead a character -- back up */
558 /* don't return spaces, they will be skipped on next call anyway */
559 if (c != EOF && !isspace(c)) yyunget(c);
560 /* check obvious errors */
561 if (qch) { yyerror(&yylval, "unmatched \" in string"); goto eof; }
562 PUSH_CHAR(0);
563 /*if (DEBUG_SCAN) printf("keyword: %d; str='%s' (%d)\n", keyword, sbuf, sbused);*/
564 /* scan token table */
565 yylval.type = T_ARG;
566 if (keyword && sbused > 0) {
567 /* find token */
568 if ((kw = find_keyword(sbuf, sbused-1)) != NULL) {
569 yylval.type = kw->type;
570 yylval.string = kw->word; /* used by symdump */
573 if (yylval.type == T_ARG) yylval.string = newstr(sbuf);
575 lexret:
576 if (DEBUG_SCAN) printf("scan %s\n", symdump(&yylval));
577 return yylval.type;
578 eof:
579 yylval.type = 0; /* 0 is EOF for lemon */
580 return yylval.type;
583 #undef PUSH_CHAR
586 static const char *symdump (const token_t *s) {
587 static char *buf = NULL;
588 static int bufsz = 0;
589 int nsz;
590 if (s->type == EOF) return "EOF";
591 nsz = strlen(s->string)+128;
592 if (nsz > bufsz) {
593 char *nb = realloc(buf, nsz);
594 if (nb == NULL) { fprintf(stderr, "FATAL: out of memory!\n"); abort(); }
595 buf = nb;
596 bufsz = nsz;
598 switch (s->type) {
599 case 0: sprintf(buf, "unknown symbol <%s>", s->string); break;
600 case T_ARG: sprintf(buf, "argument <%s>", s->string); break;
601 case T_STRING: sprintf(buf, "string \"%s\"", s->string); break;
602 default: sprintf(buf, "keyword `%s`", s->string); break;
604 return buf;