added "C++FLAGS.all" (and "OBJCFLAGS.all"); "CFLAGS.all" is still in effect
[k8jam.git] / src / scan.c
blob4f02a0487a0601adcde9b01f9d006fa403ca2643
1 /* coded by Ketmar // Invisible Vector (psyc://ketmar.no-ip.org/~Ketmar)
2 * Understanding is not required. Only obedience.
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, version 3 of the License ONLY.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 * scan.c - the jam yacc scanner
19 #include "jam.h"
20 #include "lists.h"
21 #include "parse.h"
22 #include "scan.h"
23 #include "jamgram.h"
24 #include "jambase.h"
25 #include "newstr.h"
26 #include "dstrings.h"
29 token_t yylval;
32 typedef struct {
33 const char *word;
34 int type;
35 } keyword_t;
38 static const keyword_t keywords[] = {
39 #include "jamgramtab.h"
40 {0,0}
44 typedef struct include_s {
45 struct include_s *next; /* next serial include file */
46 const char *string; /* pointer into current line */
47 char **strings; /* for yyfparse() -- text to parse */
48 FILE *file; /* for yyfparse() -- file being read */
49 const char *fname; /* for yyfparse() -- file name */
50 int line; /* line counter for error messages */
51 int pos; /* position for error messages */
52 //int back_count; /* # of yyunget()ed chars */
53 //char back_chars[2]; /* buffer for yyunget()ed chars */
54 char *fcontents; /* for yyfparse() -- file contents */
55 int prevwasn; /* !0: increment line and reset to 0 */
56 } include_t;
58 static include_t *incp = NULL; /* current file; head of chain */
60 /* hack to stop segfaulting when last string contains ';' without space before it */
61 static int s_back_count = 0; /* # of yyunget()ed chars */
62 static char s_back_chars[2]; /* buffer for yyunget()ed chars */
65 static int scan_mode = SCAN_NORMAL;
66 /*static int any_errors = 0;*/
68 static const char *symdump (const token_t *s);
71 #ifndef NDEBUG
72 static const char *mnames[] = {
73 "SCAN_NORMAL",
74 "SCAN_STRING",
75 "SCAN_PUNCT",
77 #endif
81 * Set parser mode: normal, string, or keyword
83 void yymode (int n) {
84 #ifndef NDEBUG
85 if (DEBUG_SCAN && scan_mode != n) printf("**MODE TRANSITION: %s --> %s\n", mnames[scan_mode], mnames[n]);
86 #endif
87 scan_mode = n;
91 void yyerror (const token_t *tk, const char *s) {
92 printf("ERROR(%d:%d) '%s': %s\n", tk->line, tk->pos, tk->file, s);
93 exit(EXITBAD); /* exit now */
97 static void yywarning_ex (const char *s) {
98 printf("WARNING(%d:%d) '%s': %s\n", incp->line, incp->pos, incp->fname, s);
102 void yyfparse (const char *s) {
103 include_t *i = (include_t *)malloc(sizeof(*i));
104 /* push this onto the incp chain */
105 i->string = "";
106 i->strings = NULL;
107 i->file = NULL;
108 //i->fname = strdup(s);
109 i->fname = newstr(s);
110 i->line = 0;
111 i->pos = 0;
112 i->next = incp;
113 //i->back_count = 0;
114 i->fcontents = NULL;
115 i->prevwasn = 1;
116 incp = i;
117 /* if the filename is "::Jambase", it means use the internal jambase */
118 if (strcmp(s, "::Jambase") == 0) {
119 jambase_unpack();
120 i->strings = jambase;
126 * yychar() - read new line and return first character
128 * fabricates a continuous stream of characters across include files, returning EOF at the bitter end
130 static int yychar (void) {
131 include_t *i = incp;
132 if (s_back_count) {
133 //fprintf(stderr, "GET unget: %d\n", s_back_chars[s_back_count-1]);
134 return s_back_chars[--s_back_count];
136 if (!incp) return EOF;
137 /* once we start reading from the input stream, we reset the
138 * include insertion point so that the next include file becomes
139 * the head of the list */
140 /* if there is more data in this line, return it */
141 if (i->prevwasn) { i->prevwasn = 0; ++i->line; i->pos = 0; }
142 again:
143 ++i->pos;
144 if (*i->string) {
145 #if 0
146 if (i->fcontents != NULL) {
147 fprintf(stderr, "...: (%d:%d) pos=%u; <%s>\n", i->line, i->pos, (int)(ptrdiff_t)(i->string-i->fcontents), i->fname);
149 #endif
150 if (*i->string == '\n') i->prevwasn = 1;
151 return *i->string++;
153 /* if we're reading from an internal string list, go to the next string */
154 if (i->strings) {
155 if (!*i->strings) goto next;
156 i->string = *(i->strings++);
157 return *i->string++;
159 /* if necessary, open the file and get file contents */
160 if (!i->file) {
161 FILE *f;
162 long fsize;
163 #if 0
164 fprintf(stderr, "OPENING: <%s>\n", i->fname);
165 #endif
166 if ((f = fopen(i->fname, "rb")) == NULL) perror(i->fname);
167 i->file = f;
168 if (fseek(f, 0, SEEK_END) < 0) perror(i->fname);
169 if ((fsize = ftell(f)) < 0) perror(i->fname);
170 if (fseek(f, 0, SEEK_SET) < 0) perror(i->fname);
171 uint32_t xalsz;
172 if (fsize > 1024*1024*64) {
173 fprintf(stderr, "FATAL: input file (%s) too big!\n", i->fname);
174 abort();
176 xalsz = (uint32_t)fsize;
177 // fuck off, g-shit-cc!
178 if (xalsz > 1024*1024*64) {
179 fprintf(stderr, "FATAL: input file (%s) too big!\n", i->fname);
180 abort();
182 i->fcontents = calloc(1, xalsz+2U);
183 if (fsize > 0 && fread(i->fcontents, xalsz, 1, f) != 1) perror(i->fname);
184 fclose(f); /* don't need to hold it open */
185 /*k8: hack, because i don't understand why it doesn't work sometimes */
186 i->fcontents[xalsz] = '\n';
187 i->fcontents[xalsz+1] = 0;
188 i->string = i->fcontents;
189 goto again;
191 next:
192 /* this include is done */
193 /* free it up and return EOF so yyparse() returns to parse_file() */
194 incp = i->next;
195 /* close file, free name */
196 if (i->fcontents != NULL) {
197 #if 0
198 fprintf(stderr, "DONE-INC: <%s>\n", i->fname);
199 #endif
200 free(i->fcontents);
202 //if (i->fname != NULL) free(i->fname);
203 free(i);
204 return EOF;
209 * yychar() - back up one character
211 static inline void yyunget (int c) {
212 if (c != EOF) {
213 if (s_back_count >= 2) { fprintf(stderr, "yyunget: too much!\n"); abort(); }
214 s_back_chars[s_back_count++] = c;
215 //fprintf(stderr, "UNGET: %d\n", c);
220 /* eat white space */
221 static int skip_spaces (int c) {
222 for (;;) {
223 /* skip past white space */
224 while (c != EOF && isspace(c)) {
225 yylval.line = incp->line;
226 yylval.pos = incp->pos;
227 c = yychar();
229 /* not a comment? swallow up comment line */
230 if (c != '#') break;
231 while ((c = yychar()) != EOF && c != '\n') ;
233 return c;
237 static int digit (int c, int base) {
238 if (c == EOF) return -1;
239 if (c >= 'a' && c <= 'z') c -= 32;
240 if (c < '0' || (c > '9' && c < 'A') || c > 'Z') return -1;
241 if ((c -= '0') > 9) c -= 7;
242 if (c >= base) return -1;
243 return c;
247 /* textlen includes trailing zero */
248 static void remove_indent (char *text, int textlen, int indent) {
249 if (indent > 0) {
250 while (*text) {
251 char *eol = strchr(text, '\n');
252 if (eol == NULL) eol = text+textlen-1;
253 if (eol-text >= indent) {
254 textlen -= indent;
255 eol -= indent;
256 memmove(text, text+indent, textlen);
258 if (!eol[0]) break;
259 textlen -= eol+1-text;
260 text = eol+1;
266 static inline const keyword_t *find_keyword (const char *nbuf, size_t nblen) {
267 if (nblen > 0) {
268 for (const keyword_t *k = keywords; k->word; ++k) if (strncmp(k->word, nbuf, nblen) == 0 && k->word[nblen] == 0) return k;
270 return NULL;
275 * yylex() - set yylval to current token; return its type
278 #define PUSH_CHAR(_c) do { \
279 if (sbused+1 >= sbsize) { \
280 int newsz = ((sbused+1)|0x7ff)+1; \
281 char *nb = realloc(sbuf, newsz); \
282 if (nb == NULL) { fprintf(stderr, "FATAL: out of memory!\n"); abort(); } \
283 sbuf = nb; \
284 sbsize = newsz; \
286 sbuf[sbused++] = (_c); \
287 } while (0)
289 static char *sbuf = NULL;
290 static int sbsize = 0;
291 static int sbused;
294 /* "$(" already scanned and pushed */
295 /* return char after ")" */
296 int scan_varaccess (void) {
297 int c = yychar(), qch = 0, oc;
298 if (c == EOF) return c;
299 /* scan variable name */
300 while (c != EOF && c != '[' && c != ':') {
301 PUSH_CHAR(c);
302 oc = c;
303 c = yychar();
304 if (oc == ')') return c;
305 if (oc == '$' && c == '(') { PUSH_CHAR(c); c = scan_varaccess(); }
307 if (c == EOF) return c;
308 /* scan indexing; 'c' is not pushed */
309 if (c == '[') {
310 while (c != EOF && c != ']') {
311 PUSH_CHAR(c);
312 oc = c;
313 c = yychar();
314 if (oc == ')') return c;
315 if (oc == '$' && c == '(') { PUSH_CHAR(c); c = scan_varaccess(); }
317 /* find either selector or ')' */
318 while (c != EOF && c != ':') {
319 PUSH_CHAR(c);
320 oc = c;
321 c = yychar();
322 if (oc == ')') return c;
323 if (oc == '$' && c == '(') { PUSH_CHAR(c); c = scan_varaccess(); }
325 if (c == EOF) return c;
327 /* scan selectors; 'c' is not pushed */
328 while (c != EOF) {
329 if (qch != '\'' && c == '\\') {
330 /* screening */
331 PUSH_CHAR(c);
332 if ((c = yychar()) == EOF) break;
333 PUSH_CHAR(c);
334 c = yychar();
335 continue;
337 PUSH_CHAR(c);
338 oc = c;
339 c = yychar();
340 if (!qch && (oc == '"' || oc == '\'')) { qch = oc; continue; }
341 if (!qch && oc == ')') return c;
342 if (qch != '\'' && oc == '$' && c == '(') {
343 PUSH_CHAR(c);
344 c = scan_varaccess();
345 continue;
347 if (qch && oc == qch) {
348 if (!(qch == '\'' && c == '\'')) qch = 0;
349 continue;
352 return c;
356 int yylex (void) {
357 const keyword_t *kw;
358 int c;
359 sbused = 0;
360 yylval.strlit = 0;
361 if (!incp) goto eof;
362 yylval.strlit = 0; /* expand this string */
363 yylval.line = incp->line;
364 yylval.pos = incp->pos;
365 yylval.file = incp->fname;
366 /* get first character (whitespace or of token) */
367 c = yychar();
368 if (scan_mode == SCAN_STRING) {
369 /* if scanning for a string (action's {}'s), look for the closing brace */
370 /* we handle matching braces, if they match! */
371 int nest = 1, indent = -1, cind, bol;
372 /* skip spaces and newline */
373 while (c != EOF && c != '\n' && isspace(c)) c = yychar();
374 if (c == '\n') c = yychar();
375 /* collect string, caclucate indent */
376 cind = 0;
377 bol = 1;
378 while (c != EOF) {
379 if (c == '{') ++nest;
380 else if (c == '}' && !--nest) break;
381 /* indent calculation */
382 if (c == '\n') {
383 cind = 0;
384 bol = 1;
385 } else if (bol) {
386 if (isspace(c)) {
387 ++cind;
388 } else {
389 bol = 0;
390 if (indent < 0 || cind < indent) indent = cind;
393 PUSH_CHAR(c);
394 c = yychar();
396 /* we ate the ending brace -- regurgitate it */
397 if (c != EOF) yyunget(c);
398 /* check obvious errors */
399 if (nest) { yyerror(&yylval, "unmatched {} in action block"); goto eof; }
400 /* remove trailing newlines and spaces, add one newline */
401 while (sbused > 0 && isspace(sbuf[sbused-1])) --sbused;
402 PUSH_CHAR('\n');
403 PUSH_CHAR(0);
404 if (indent > 0) {
405 //fprintf(stderr, "=== %d ===\n%s===\n", indent, sbuf);
406 remove_indent(sbuf, sbused, indent);
407 //fprintf(stderr, "--- %d ---\n%s---\n", indent, sbuf);
409 yylval.type = T_STRING;
410 yylval.string = newstr(sbuf);
411 yymode(SCAN_NORMAL);
412 } else {
413 int keyword = 0, qch = 0;
414 int n;
415 c = skip_spaces(c);
416 /* c now contains the first character of a token */
417 if (c == EOF) goto eof;
418 /* special thingy: single-quoted string */
419 if (c == '\'') {
420 for (c = yychar(); c != EOF; c = yychar()) {
421 if (c == '\'') {
422 /* check for special case: "''" */
423 if ((c = yychar()) != '\'') {
424 if (c != EOF && !isspace(c)) yyunget(c);
425 break;
428 PUSH_CHAR(c);
430 PUSH_CHAR(0);
431 yylval.type = T_ARG;
432 yylval.strlit = 1; /* don't expand this string */
433 yylval.string = newstr(sbuf);
434 goto lexret;
436 /* 'normal' mode */
437 keyword = (scan_mode == SCAN_NORMAL && isalpha(c)) || (scan_mode == SCAN_PUNCT && !isalnum(c)); /* maybe */
438 //if (DEBUG_SCAN) printf("mode: %d; char: '%c'; keyword: %d\n", scan_mode, c, keyword);
439 /* look for white space to delimit word */
440 /* \ protects next character */
441 for (; c != EOF; c = yychar()) {
442 /* check if this is var access */
443 if (c == '$') {
444 keyword = 0;
445 PUSH_CHAR(c);
446 if ((c = yychar()) == EOF) break;
447 if (c == '(') {
448 PUSH_CHAR(c);
449 c = scan_varaccess();
450 yyunget(c);
451 continue;
453 if (!qch) {
454 if (isalnum(c) || c == '_' || c == '-' || c == '<' || c == '>') yywarning_ex("\"$x\" -- maybe you want \"$(x\" instead?");
457 /* check for some common bugs */
458 if (!qch && c == '(') {
459 int nc = yychar();
460 yyunget(nc);
461 if (nc == '$') yywarning_ex("\"($\" -- maybe you want \"$(\" instead?");
462 if (((sbused > 0 && !isalnum(sbuf[sbused-1])) || (sbused == 0)) &&
463 (isalnum(nc) || nc == '_' || nc == '-' || nc == '<' || nc == '>')) yywarning_ex("\"(x\" -- maybe you want \"$(x\" instead?");
465 /* 'c' is not pushed yet */
466 if (!qch && scan_mode == SCAN_PUNCT) {
467 /* we are in list, the only possible keywords follows */
468 if (strchr("{}[];", c) != NULL) {
469 if (sbused == 0) {
470 keyword = 1;
471 PUSH_CHAR(c);
472 c = ' ';
474 break;
477 if (!qch && (isspace(c) || c == '\'')) break;
478 if (!qch && scan_mode == SCAN_NORMAL && c != '"' && c != '\'' && !isalnum(c)) {
479 /* check if this char (and possibly next) forms non-alnum token */
480 PUSH_CHAR(c);
481 if ((c = yychar()) != EOF) {
482 /* try 2-char tokens */
483 PUSH_CHAR(c);
484 if ((kw = find_keyword(sbuf+sbused-2, 2)) != NULL) {
485 if (sbused == 2) {
486 /* wow! token! */
487 yylval.type = kw->type;
488 yylval.string = kw->word; /* used by symdump */
489 goto lexret;
491 yywarning_ex("non-alpha token without whitespace");
492 /* return this 2 chars */
493 yyunget(sbuf[--sbused]);
494 yyunget(sbuf[--sbused]);
495 c = ' ';
496 break;
498 /* return one char back */
499 --sbused;
500 yyunget(c);
502 /* try 1-char token */
503 if (sbused > 1 && sbuf[sbused-1] == '=' && isalnum(sbuf[sbused-2])) goto skipkwone;
504 if (sbused == 1 && sbuf[sbused-1] == '!') {
505 int nc = yychar();
506 yyunget(nc);
507 if (isalnum(nc) || nc == '-' || nc == '_') goto skipkwone;
509 if ((kw = find_keyword(sbuf+sbused-1, 1)) != NULL) {
510 if (sbused == 1) {
511 /* wow! token! */
512 yylval.type = kw->type;
513 yylval.string = kw->word; /* used by symdump */
514 goto lexret;
516 if (strchr("{}[];", sbuf[sbused-1]) == NULL) yywarning_ex("non-alpha token without whitespace");
517 /* return this char */
518 yyunget(sbuf[--sbused]);
519 c = ' ';
520 break;
522 skipkwone:
523 /* pop this char and process it as usual */
524 c = sbuf[--sbused];
526 /* check for quoting */
527 if (qch && c == qch) {
528 qch = 0;
529 continue;
531 if (!qch && c == '"') {
532 keyword = 0;
533 qch = c;
534 continue;
536 /* screened char? */
537 if (c == '\\') {
538 keyword = 0;
539 if ((c = yychar()) == EOF) break;
540 if (qch) {
541 /* in string */
542 switch (c) {
543 case 'a': PUSH_CHAR('\a'); break;
544 case 'b': PUSH_CHAR('\b'); break;
545 case 'e': PUSH_CHAR('\x1b'); break;
546 case 'f': PUSH_CHAR('\f'); break;
547 case 'n': PUSH_CHAR('\n'); break;
548 case 'r': PUSH_CHAR('\r'); break;
549 case 't': PUSH_CHAR('\t'); break;
550 case 'v': PUSH_CHAR('\v'); break;
551 case 'x':
552 // first digit
553 if ((c = yychar()) == EOF) { yyerror(&yylval, "invalid hex escape in quoted string"); goto eof; }
554 if ((n = digit(c, 16)) < 0) { yyerror(&yylval, "invalid hex escape in quoted string"); goto eof; }
555 // second digit
556 if ((c = yychar()) != EOF) {
557 int d = digit(c, 16);
558 if (d < 0) yyunget(c); else n = (n*16)+d;
560 if (n == 0) { yyerror(&yylval, "invalid hex escape in quoted string"); goto eof; }
561 PUSH_CHAR(n);
562 break;
563 //TODO: add '\uXXXX'?
564 default:
565 if (isalnum(c)) { yyerror(&yylval, "invalid escape in quoted string"); goto eof; }
566 PUSH_CHAR(c);
567 break;
569 } else {
570 /* not in string */
571 PUSH_CHAR(c);
573 continue;
575 /* normal char */
576 if (scan_mode == SCAN_NORMAL) {
577 if (keyword && !isalpha(c)) keyword = 0;
578 } else if (scan_mode == SCAN_PUNCT) {
579 if (keyword && isalnum(c)) keyword = 0;
581 PUSH_CHAR(c);
583 /* we looked ahead a character -- back up */
584 /* don't return spaces, they will be skipped on next call anyway */
585 if (c != EOF && !isspace(c)) yyunget(c);
586 /* check obvious errors */
587 if (qch) { yyerror(&yylval, "unmatched \" in string"); goto eof; }
588 PUSH_CHAR(0);
589 /*if (DEBUG_SCAN) printf("keyword: %d; str='%s' (%d)\n", keyword, sbuf, sbused);*/
590 /* scan token table */
591 yylval.type = T_ARG;
592 if (keyword && sbused > 0) {
593 /* find token */
594 if ((kw = find_keyword(sbuf, sbused-1)) != NULL) {
595 yylval.type = kw->type;
596 yylval.string = kw->word; /* used by symdump */
599 if (yylval.type == T_ARG) yylval.string = newstr(sbuf);
601 lexret:
602 if (DEBUG_SCAN) printf("scan %s\n", symdump(&yylval));
603 return yylval.type;
604 eof:
605 yylval.type = 0; /* 0 is EOF for lemon */
606 return yylval.type;
609 #undef PUSH_CHAR
612 static const char *symdump (const token_t *s) {
613 static char *buf = NULL;
614 static int bufsz = 0;
615 int nsz;
616 if (s->type == EOF) return "EOF";
617 nsz = strlen(s->string)+128;
618 if (nsz > bufsz) {
619 char *nb = realloc(buf, nsz);
620 if (nb == NULL) { fprintf(stderr, "FATAL: out of memory!\n"); abort(); }
621 buf = nb;
622 bufsz = nsz;
624 switch (s->type) {
625 case 0: sprintf(buf, "unknown symbol <%s>", s->string); break;
626 case T_ARG: sprintf(buf, "argument <%s>", s->string); break;
627 case T_STRING: sprintf(buf, "string \"%s\"", s->string); break;
628 default: sprintf(buf, "keyword `%s`", s->string); break;
630 return buf;