1 /* coded by Ketmar // Invisible Vector (psyc://ketmar.no-ip.org/~Ketmar)
2 * Understanding is not required. Only obedience.
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, version 3 of the License ONLY.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 * scan.c - the jam yacc scanner
38 static const keyword_t keywords
[] = {
39 #include "jamgramtab.h"
44 typedef struct include_s
{
45 struct include_s
*next
; /* next serial include file */
46 const char *string
; /* pointer into current line */
47 char **strings
; /* for yyfparse() -- text to parse */
48 FILE *file
; /* for yyfparse() -- file being read */
49 const char *fname
; /* for yyfparse() -- file name */
50 int line
; /* line counter for error messages */
51 int pos
; /* position for error messages */
52 //int back_count; /* # of yyunget()ed chars */
53 //char back_chars[2]; /* buffer for yyunget()ed chars */
54 char *fcontents
; /* for yyfparse() -- file contents */
55 int prevwasn
; /* !0: increment line and reset to 0 */
58 static include_t
*incp
= NULL
; /* current file; head of chain */
60 /* hack to stop segfaulting when last string contains ';' without space before it */
61 static int s_back_count
= 0; /* # of yyunget()ed chars */
62 static char s_back_chars
[2]; /* buffer for yyunget()ed chars */
65 static int scan_mode
= SCAN_NORMAL
;
66 /*static int any_errors = 0;*/
68 static const char *symdump (const token_t
*s
);
72 static const char *mnames
[] = {
81 * Set parser mode: normal, string, or keyword
85 if (DEBUG_SCAN
&& scan_mode
!= n
) printf("**MODE TRANSITION: %s --> %s\n", mnames
[scan_mode
], mnames
[n
]);
91 void yyerror (const token_t
*tk
, const char *s
) {
92 printf("ERROR(%d:%d) '%s': %s\n", tk
->line
, tk
->pos
, tk
->file
, s
);
93 exit(EXITBAD
); /* exit now */
97 static void yywarning_ex (const char *s
) {
98 printf("WARNING(%d:%d) '%s': %s\n", incp
->line
, incp
->pos
, incp
->fname
, s
);
102 void yyfparse (const char *s
) {
103 include_t
*i
= (include_t
*)malloc(sizeof(*i
));
104 /* push this onto the incp chain */
108 //i->fname = strdup(s);
109 i
->fname
= newstr(s
);
117 /* if the filename is "::Jambase", it means use the internal jambase */
118 if (strcmp(s
, "::Jambase") == 0) {
120 i
->strings
= jambase
;
126 * yychar() - read new line and return first character
128 * fabricates a continuous stream of characters across include files, returning EOF at the bitter end
130 static int yychar (void) {
133 //fprintf(stderr, "GET unget: %d\n", s_back_chars[s_back_count-1]);
134 return s_back_chars
[--s_back_count
];
136 if (!incp
) return EOF
;
137 /* once we start reading from the input stream, we reset the
138 * include insertion point so that the next include file becomes
139 * the head of the list */
140 /* if there is more data in this line, return it */
141 if (i
->prevwasn
) { i
->prevwasn
= 0; ++i
->line
; i
->pos
= 0; }
146 if (i
->fcontents
!= NULL
) {
147 fprintf(stderr
, "...: (%d:%d) pos=%u; <%s>\n", i
->line
, i
->pos
, (int)(ptrdiff_t)(i
->string
-i
->fcontents
), i
->fname
);
150 if (*i
->string
== '\n') i
->prevwasn
= 1;
153 /* if we're reading from an internal string list, go to the next string */
155 if (!*i
->strings
) goto next
;
156 i
->string
= *(i
->strings
++);
159 /* if necessary, open the file and get file contents */
164 fprintf(stderr
, "OPENING: <%s>\n", i
->fname
);
166 if ((f
= fopen(i
->fname
, "rb")) == NULL
) perror(i
->fname
);
168 if (fseek(f
, 0, SEEK_END
) < 0) perror(i
->fname
);
169 if ((fsize
= ftell(f
)) < 0) perror(i
->fname
);
170 if (fseek(f
, 0, SEEK_SET
) < 0) perror(i
->fname
);
172 if (fsize
> 1024*1024*64) {
173 fprintf(stderr
, "FATAL: input file (%s) too big!\n", i
->fname
);
176 xalsz
= (uint32_t)fsize
;
177 // fuck off, g-shit-cc!
178 if (xalsz
> 1024*1024*64) {
179 fprintf(stderr
, "FATAL: input file (%s) too big!\n", i
->fname
);
182 i
->fcontents
= calloc(1, xalsz
+2U);
183 if (fsize
> 0 && fread(i
->fcontents
, xalsz
, 1, f
) != 1) perror(i
->fname
);
184 fclose(f
); /* don't need to hold it open */
185 /*k8: hack, because i don't understand why it doesn't work sometimes */
186 i
->fcontents
[xalsz
] = '\n';
187 i
->fcontents
[xalsz
+1] = 0;
188 i
->string
= i
->fcontents
;
192 /* this include is done */
193 /* free it up and return EOF so yyparse() returns to parse_file() */
195 /* close file, free name */
196 if (i
->fcontents
!= NULL
) {
198 fprintf(stderr
, "DONE-INC: <%s>\n", i
->fname
);
202 //if (i->fname != NULL) free(i->fname);
209 * yychar() - back up one character
211 static inline void yyunget (int c
) {
213 if (s_back_count
>= 2) { fprintf(stderr
, "yyunget: too much!\n"); abort(); }
214 s_back_chars
[s_back_count
++] = c
;
215 //fprintf(stderr, "UNGET: %d\n", c);
220 /* eat white space */
221 static int skip_spaces (int c
) {
223 /* skip past white space */
224 while (c
!= EOF
&& isspace(c
)) {
225 yylval
.line
= incp
->line
;
226 yylval
.pos
= incp
->pos
;
229 /* not a comment? swallow up comment line */
231 while ((c
= yychar()) != EOF
&& c
!= '\n') ;
237 static int digit (int c
, int base
) {
238 if (c
== EOF
) return -1;
239 if (c
>= 'a' && c
<= 'z') c
-= 32;
240 if (c
< '0' || (c
> '9' && c
< 'A') || c
> 'Z') return -1;
241 if ((c
-= '0') > 9) c
-= 7;
242 if (c
>= base
) return -1;
247 /* textlen includes trailing zero */
248 static void remove_indent (char *text
, int textlen
, int indent
) {
251 char *eol
= strchr(text
, '\n');
252 if (eol
== NULL
) eol
= text
+textlen
-1;
253 if (eol
-text
>= indent
) {
256 memmove(text
, text
+indent
, textlen
);
259 textlen
-= eol
+1-text
;
266 static inline const keyword_t
*find_keyword (const char *nbuf
, size_t nblen
) {
268 for (const keyword_t
*k
= keywords
; k
->word
; ++k
) if (strncmp(k
->word
, nbuf
, nblen
) == 0 && k
->word
[nblen
] == 0) return k
;
275 * yylex() - set yylval to current token; return its type
278 #define PUSH_CHAR(_c) do { \
279 if (sbused+1 >= sbsize) { \
280 int newsz = ((sbused+1)|0x7ff)+1; \
281 char *nb = realloc(sbuf, newsz); \
282 if (nb == NULL) { fprintf(stderr, "FATAL: out of memory!\n"); abort(); } \
286 sbuf[sbused++] = (_c); \
289 static char *sbuf
= NULL
;
290 static int sbsize
= 0;
294 /* "$(" already scanned and pushed */
295 /* return char after ")" */
296 int scan_varaccess (void) {
297 int c
= yychar(), qch
= 0, oc
;
298 if (c
== EOF
) return c
;
299 /* scan variable name */
300 while (c
!= EOF
&& c
!= '[' && c
!= ':') {
304 if (oc
== ')') return c
;
305 if (oc
== '$' && c
== '(') { PUSH_CHAR(c
); c
= scan_varaccess(); }
307 if (c
== EOF
) return c
;
308 /* scan indexing; 'c' is not pushed */
310 while (c
!= EOF
&& c
!= ']') {
314 if (oc
== ')') return c
;
315 if (oc
== '$' && c
== '(') { PUSH_CHAR(c
); c
= scan_varaccess(); }
317 /* find either selector or ')' */
318 while (c
!= EOF
&& c
!= ':') {
322 if (oc
== ')') return c
;
323 if (oc
== '$' && c
== '(') { PUSH_CHAR(c
); c
= scan_varaccess(); }
325 if (c
== EOF
) return c
;
327 /* scan selectors; 'c' is not pushed */
329 if (qch
!= '\'' && c
== '\\') {
332 if ((c
= yychar()) == EOF
) break;
340 if (!qch
&& (oc
== '"' || oc
== '\'')) { qch
= oc
; continue; }
341 if (!qch
&& oc
== ')') return c
;
342 if (qch
!= '\'' && oc
== '$' && c
== '(') {
344 c
= scan_varaccess();
347 if (qch
&& oc
== qch
) {
348 if (!(qch
== '\'' && c
== '\'')) qch
= 0;
362 yylval
.strlit
= 0; /* expand this string */
363 yylval
.line
= incp
->line
;
364 yylval
.pos
= incp
->pos
;
365 yylval
.file
= incp
->fname
;
366 /* get first character (whitespace or of token) */
368 if (scan_mode
== SCAN_STRING
) {
369 /* if scanning for a string (action's {}'s), look for the closing brace */
370 /* we handle matching braces, if they match! */
371 int nest
= 1, indent
= -1, cind
, bol
;
372 /* skip spaces and newline */
373 while (c
!= EOF
&& c
!= '\n' && isspace(c
)) c
= yychar();
374 if (c
== '\n') c
= yychar();
375 /* collect string, caclucate indent */
379 if (c
== '{') ++nest
;
380 else if (c
== '}' && !--nest
) break;
381 /* indent calculation */
390 if (indent
< 0 || cind
< indent
) indent
= cind
;
396 /* we ate the ending brace -- regurgitate it */
397 if (c
!= EOF
) yyunget(c
);
398 /* check obvious errors */
399 if (nest
) { yyerror(&yylval
, "unmatched {} in action block"); goto eof
; }
400 /* remove trailing newlines and spaces, add one newline */
401 while (sbused
> 0 && isspace(sbuf
[sbused
-1])) --sbused
;
405 //fprintf(stderr, "=== %d ===\n%s===\n", indent, sbuf);
406 remove_indent(sbuf
, sbused
, indent
);
407 //fprintf(stderr, "--- %d ---\n%s---\n", indent, sbuf);
409 yylval
.type
= T_STRING
;
410 yylval
.string
= newstr(sbuf
);
413 int keyword
= 0, qch
= 0;
416 /* c now contains the first character of a token */
417 if (c
== EOF
) goto eof
;
418 /* special thingy: single-quoted string */
420 for (c
= yychar(); c
!= EOF
; c
= yychar()) {
422 /* check for special case: "''" */
423 if ((c
= yychar()) != '\'') {
424 if (c
!= EOF
&& !isspace(c
)) yyunget(c
);
432 yylval
.strlit
= 1; /* don't expand this string */
433 yylval
.string
= newstr(sbuf
);
437 keyword
= (scan_mode
== SCAN_NORMAL
&& isalpha(c
)) || (scan_mode
== SCAN_PUNCT
&& !isalnum(c
)); /* maybe */
438 //if (DEBUG_SCAN) printf("mode: %d; char: '%c'; keyword: %d\n", scan_mode, c, keyword);
439 /* look for white space to delimit word */
440 /* \ protects next character */
441 for (; c
!= EOF
; c
= yychar()) {
442 /* check if this is var access */
446 if ((c
= yychar()) == EOF
) break;
449 c
= scan_varaccess();
454 if (isalnum(c
) || c
== '_' || c
== '-' || c
== '<' || c
== '>') yywarning_ex("\"$x\" -- maybe you want \"$(x\" instead?");
457 /* check for some common bugs */
458 if (!qch
&& c
== '(') {
461 if (nc
== '$') yywarning_ex("\"($\" -- maybe you want \"$(\" instead?");
462 if (((sbused
> 0 && !isalnum(sbuf
[sbused
-1])) || (sbused
== 0)) &&
463 (isalnum(nc
) || nc
== '_' || nc
== '-' || nc
== '<' || nc
== '>')) yywarning_ex("\"(x\" -- maybe you want \"$(x\" instead?");
465 /* 'c' is not pushed yet */
466 if (!qch
&& scan_mode
== SCAN_PUNCT
) {
467 /* we are in list, the only possible keywords follows */
468 if (strchr("{}[];", c
) != NULL
) {
477 if (!qch
&& (isspace(c
) || c
== '\'')) break;
478 if (!qch
&& scan_mode
== SCAN_NORMAL
&& c
!= '"' && c
!= '\'' && !isalnum(c
)) {
479 /* check if this char (and possibly next) forms non-alnum token */
481 if ((c
= yychar()) != EOF
) {
482 /* try 2-char tokens */
484 if ((kw
= find_keyword(sbuf
+sbused
-2, 2)) != NULL
) {
487 yylval
.type
= kw
->type
;
488 yylval
.string
= kw
->word
; /* used by symdump */
491 yywarning_ex("non-alpha token without whitespace");
492 /* return this 2 chars */
493 yyunget(sbuf
[--sbused
]);
494 yyunget(sbuf
[--sbused
]);
498 /* return one char back */
502 /* try 1-char token */
503 if (sbused
> 1 && sbuf
[sbused
-1] == '=' && isalnum(sbuf
[sbused
-2])) goto skipkwone
;
504 if (sbused
== 1 && sbuf
[sbused
-1] == '!') {
507 if (isalnum(nc
) || nc
== '-' || nc
== '_') goto skipkwone
;
509 if ((kw
= find_keyword(sbuf
+sbused
-1, 1)) != NULL
) {
512 yylval
.type
= kw
->type
;
513 yylval
.string
= kw
->word
; /* used by symdump */
516 if (strchr("{}[];", sbuf
[sbused
-1]) == NULL
) yywarning_ex("non-alpha token without whitespace");
517 /* return this char */
518 yyunget(sbuf
[--sbused
]);
523 /* pop this char and process it as usual */
526 /* check for quoting */
527 if (qch
&& c
== qch
) {
531 if (!qch
&& c
== '"') {
539 if ((c
= yychar()) == EOF
) break;
543 case 'a': PUSH_CHAR('\a'); break;
544 case 'b': PUSH_CHAR('\b'); break;
545 case 'e': PUSH_CHAR('\x1b'); break;
546 case 'f': PUSH_CHAR('\f'); break;
547 case 'n': PUSH_CHAR('\n'); break;
548 case 'r': PUSH_CHAR('\r'); break;
549 case 't': PUSH_CHAR('\t'); break;
550 case 'v': PUSH_CHAR('\v'); break;
553 if ((c
= yychar()) == EOF
) { yyerror(&yylval
, "invalid hex escape in quoted string"); goto eof
; }
554 if ((n
= digit(c
, 16)) < 0) { yyerror(&yylval
, "invalid hex escape in quoted string"); goto eof
; }
556 if ((c
= yychar()) != EOF
) {
557 int d
= digit(c
, 16);
558 if (d
< 0) yyunget(c
); else n
= (n
*16)+d
;
560 if (n
== 0) { yyerror(&yylval
, "invalid hex escape in quoted string"); goto eof
; }
563 //TODO: add '\uXXXX'?
565 if (isalnum(c
)) { yyerror(&yylval
, "invalid escape in quoted string"); goto eof
; }
576 if (scan_mode
== SCAN_NORMAL
) {
577 if (keyword
&& !isalpha(c
)) keyword
= 0;
578 } else if (scan_mode
== SCAN_PUNCT
) {
579 if (keyword
&& isalnum(c
)) keyword
= 0;
583 /* we looked ahead a character -- back up */
584 /* don't return spaces, they will be skipped on next call anyway */
585 if (c
!= EOF
&& !isspace(c
)) yyunget(c
);
586 /* check obvious errors */
587 if (qch
) { yyerror(&yylval
, "unmatched \" in string"); goto eof
; }
589 /*if (DEBUG_SCAN) printf("keyword: %d; str='%s' (%d)\n", keyword, sbuf, sbused);*/
590 /* scan token table */
592 if (keyword
&& sbused
> 0) {
594 if ((kw
= find_keyword(sbuf
, sbused
-1)) != NULL
) {
595 yylval
.type
= kw
->type
;
596 yylval
.string
= kw
->word
; /* used by symdump */
599 if (yylval
.type
== T_ARG
) yylval
.string
= newstr(sbuf
);
602 if (DEBUG_SCAN
) printf("scan %s\n", symdump(&yylval
));
605 yylval
.type
= 0; /* 0 is EOF for lemon */
612 static const char *symdump (const token_t
*s
) {
613 static char *buf
= NULL
;
614 static int bufsz
= 0;
616 if (s
->type
== EOF
) return "EOF";
617 nsz
= strlen(s
->string
)+128;
619 char *nb
= realloc(buf
, nsz
);
620 if (nb
== NULL
) { fprintf(stderr
, "FATAL: out of memory!\n"); abort(); }
625 case 0: sprintf(buf
, "unknown symbol <%s>", s
->string
); break;
626 case T_ARG
: sprintf(buf
, "argument <%s>", s
->string
); break;
627 case T_STRING
: sprintf(buf
, "string \"%s\"", s
->string
); break;
628 default: sprintf(buf
, "keyword `%s`", s
->string
); break;