1 /* coded by Ketmar // Vampire Avalon (psyc://ketmar.no-ip.org/~Ketmar)
2 * Understanding is not required. Only obedience.
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 * scan.c - the jam yacc scanner
39 static const keyword_t keywords
[] = {
40 #include "jamgramtab.h"
45 typedef struct include_s
{
46 struct include_s
*next
; /* next serial include file */
47 const char *string
; /* pointer into current line */
48 char **strings
; /* for yyfparse() -- text to parse */
49 FILE *file
; /* for yyfparse() -- file being read */
50 const char *fname
; /* for yyfparse() -- file name */
51 int line
; /* line counter for error messages */
52 int pos
; /* position for error messages */
53 //int back_count; /* # of yyunget()ed chars */
54 //char back_chars[2]; /* buffer for yyunget()ed chars */
55 char *fcontents
; /* for yyfparse() -- file contents */
56 int prevwasn
; /* !0: increment line and reset to 0 */
59 static include_t
*incp
= NULL
; /* current file; head of chain */
61 /* hack to stop segfaulting when last string contains ';' without space before it */
62 static int s_back_count
= 0; /* # of yyunget()ed chars */
63 static char s_back_chars
[2]; /* buffer for yyunget()ed chars */
66 static int scan_mode
= SCAN_NORMAL
;
67 /*static int any_errors = 0;*/
69 static const char *symdump (const token_t
*s
);
73 static const char *mnames
[] = {
82 * Set parser mode: normal, string, or keyword
86 if (DEBUG_SCAN
&& scan_mode
!= n
) printf("**MODE TRANSITION: %s --> %s\n", mnames
[scan_mode
], mnames
[n
]);
92 void yyerror (const token_t
*tk
, const char *s
) {
93 printf("ERROR(%d:%d) '%s': %s\n", tk
->line
, tk
->pos
, tk
->file
, s
);
94 exit(EXITBAD
); /* exit now */
98 static void yywarning_ex (const char *s
) {
99 printf("WARNING(%d:%d) '%s': %s\n", incp
->line
, incp
->pos
, incp
->fname
, s
);
103 void yyfparse (const char *s
) {
104 include_t
*i
= (include_t
*)malloc(sizeof(*i
));
105 /* push this onto the incp chain */
109 //i->fname = strdup(s);
110 i
->fname
= newstr(s
);
118 /* if the filename is "::Jambase", it means use the internal jambase */
119 if (strcmp(s
, "::Jambase") == 0) {
121 i
->strings
= jambase
;
127 * yychar() - read new line and return first character
129 * fabricates a continuous stream of characters across include files, returning EOF at the bitter end
131 static int yychar (void) {
134 //fprintf(stderr, "GET unget: %d\n", s_back_chars[s_back_count-1]);
135 return s_back_chars
[--s_back_count
];
137 if (!incp
) return EOF
;
138 /* once we start reading from the input stream, we reset the
139 * include insertion point so that the next include file becomes
140 * the head of the list */
141 /* if there is more data in this line, return it */
142 if (i
->prevwasn
) { i
->prevwasn
= 0; ++i
->line
; i
->pos
= 0; }
146 if (*i
->string
== '\n') i
->prevwasn
= 1;
149 /* if we're reading from an internal string list, go to the next string */
151 if (!*i
->strings
) goto next
;
152 i
->string
= *(i
->strings
++);
155 /* if necessary, open the file and get file contents */
159 if ((f
= fopen(i
->fname
, "rb")) == NULL
) perror(i
->fname
);
161 if (fseek(f
, 0, SEEK_END
) < 0) perror(i
->fname
);
162 if ((fsize
= ftell(f
)) < 0) perror(i
->fname
);
163 if (fseek(f
, 0, SEEK_SET
) < 0) perror(i
->fname
);
164 i
->fcontents
= calloc(fsize
+1, 1);
165 if (fsize
> 0 && fread(i
->fcontents
, fsize
, 1, f
) != 1) perror(i
->fname
);
166 fclose(f
); /* don't need to hold it opened */
167 i
->string
= i
->fcontents
;
171 /* this include is done */
172 /* free it up and return EOF so yyparse() returns to parse_file() */
174 /* close file, free name */
175 if (i
->fcontents
!= NULL
) free(i
->fcontents
);
176 //if (i->fname != NULL) free(i->fname);
183 * yychar() - back up one character
185 static inline void yyunget (int c
) {
187 if (s_back_count
>= 2) { fprintf(stderr
, "yyunget: too much!\n"); abort(); }
188 s_back_chars
[s_back_count
++] = c
;
189 //fprintf(stderr, "UNGET: %d\n", c);
194 /* eat white space */
195 static int skip_spaces (int c
) {
197 /* skip past white space */
198 while (c
!= EOF
&& isspace(c
)) {
199 yylval
.line
= incp
->line
;
200 yylval
.pos
= incp
->pos
;
203 /* not a comment? swallow up comment line */
205 while ((c
= yychar()) != EOF
&& c
!= '\n') ;
211 static int digit (int c
, int base
) {
212 if (c
== EOF
) return -1;
213 if (c
>= 'a' && c
<= 'z') c
-= 32;
214 if (c
< '0' || (c
> '9' && c
< 'A') || c
> 'Z') return -1;
215 if ((c
-= '0') > 9) c
-= 7;
216 if (c
>= base
) return -1;
221 /* textlen includes trailing zero */
222 static void remove_indent (char *text
, int textlen
, int indent
) {
225 char *eol
= strchr(text
, '\n');
226 if (eol
== NULL
) eol
= text
+textlen
-1;
227 if (eol
-text
>= indent
) {
230 memmove(text
, text
+indent
, textlen
);
233 textlen
-= eol
+1-text
;
240 static inline const keyword_t
*find_keyword (const char *nbuf
, size_t nblen
) {
242 for (const keyword_t
*k
= keywords
; k
->word
; ++k
) if (strncmp(k
->word
, nbuf
, nblen
) == 0 && k
->word
[nblen
] == 0) return k
;
249 * yylex() - set yylval to current token; return its type
252 #define PUSH_CHAR(_c) do { \
253 if (sbused+1 >= sbsize) { \
254 int newsz = ((sbused+1)|0x7ff)+1; \
255 char *nb = realloc(sbuf, newsz); \
256 if (nb == NULL) { fprintf(stderr, "FATAL: out of memory!\n"); abort(); } \
260 sbuf[sbused++] = (_c); \
263 static char *sbuf
= NULL
;
264 static int sbsize
= 0;
268 /* "$(" already scanned and pushed */
269 /* return char after ")" */
270 int scan_varaccess (void) {
271 int c
= yychar(), qch
= 0, oc
;
272 if (c
== EOF
) return c
;
273 /* scan variable name */
274 while (c
!= EOF
&& c
!= '[' && c
!= ':') {
278 if (oc
== ')') return c
;
279 if (oc
== '$' && c
== '(') { PUSH_CHAR(c
); c
= scan_varaccess(); }
281 if (c
== EOF
) return c
;
282 /* scan indexing; 'c' is not pushed */
284 while (c
!= EOF
&& c
!= ']') {
288 if (oc
== ')') return c
;
289 if (oc
== '$' && c
== '(') { PUSH_CHAR(c
); c
= scan_varaccess(); }
291 /* find either selector or ')' */
292 while (c
!= EOF
&& c
!= ':') {
296 if (oc
== ')') return c
;
297 if (oc
== '$' && c
== '(') { PUSH_CHAR(c
); c
= scan_varaccess(); }
299 if (c
== EOF
) return c
;
301 /* scan selectors; 'c' is not pushed */
303 if (qch
!= '\'' && c
== '\\') {
306 if ((c
= yychar()) == EOF
) break;
314 if (!qch
&& (oc
== '"' || oc
== '\'')) { qch
= oc
; continue; }
315 if (!qch
&& oc
== ')') return c
;
316 if (qch
!= '\'' && oc
== '$' && c
== '(') {
318 c
= scan_varaccess();
321 if (qch
&& oc
== qch
) {
322 if (!(qch
== '\'' && c
== '\'')) qch
= 0;
336 yylval
.strlit
= 0; /* expand this string */
337 yylval
.line
= incp
->line
;
338 yylval
.pos
= incp
->pos
;
339 yylval
.file
= incp
->fname
;
340 /* get first character (whitespace or of token) */
342 if (scan_mode
== SCAN_STRING
) {
343 /* if scanning for a string (action's {}'s), look for the closing brace */
344 /* we handle matching braces, if they match! */
345 int nest
= 1, indent
= -1, cind
, bol
;
346 /* skip spaces and newline */
347 while (c
!= EOF
&& c
!= '\n' && isspace(c
)) c
= yychar();
348 if (c
== '\n') c
= yychar();
349 /* collect string, caclucate indent */
353 if (c
== '{') ++nest
;
354 else if (c
== '}' && !--nest
) break;
355 /* indent calculation */
364 if (indent
< 0 || cind
< indent
) indent
= cind
;
370 /* we ate the ending brace -- regurgitate it */
371 if (c
!= EOF
) yyunget(c
);
372 /* check obvious errors */
373 if (nest
) { yyerror(&yylval
, "unmatched {} in action block"); goto eof
; }
374 /* remove trailing newlines and spaces, add one newline */
375 while (sbused
> 0 && isspace(sbuf
[sbused
-1])) --sbused
;
379 //fprintf(stderr, "=== %d ===\n%s===\n", indent, sbuf);
380 remove_indent(sbuf
, sbused
, indent
);
381 //fprintf(stderr, "--- %d ---\n%s---\n", indent, sbuf);
383 yylval
.type
= T_STRING
;
384 yylval
.string
= newstr(sbuf
);
387 int keyword
= 0, qch
= 0;
390 /* c now contains the first character of a token */
391 if (c
== EOF
) goto eof
;
392 /* special thingy: single-quoted string */
394 for (c
= yychar(); c
!= EOF
; c
= yychar()) {
396 /* check for special case: "''" */
397 if ((c
= yychar()) != '\'') {
398 if (c
!= EOF
&& !isspace(c
)) yyunget(c
);
406 yylval
.strlit
= 1; /* don't expand this string */
407 yylval
.string
= newstr(sbuf
);
411 keyword
= (scan_mode
== SCAN_NORMAL
&& isalpha(c
)) || (scan_mode
== SCAN_PUNCT
&& !isalnum(c
)); /* maybe */
412 //if (DEBUG_SCAN) printf("mode: %d; char: '%c'; keyword: %d\n", scan_mode, c, keyword);
413 /* look for white space to delimit word */
414 /* \ protects next character */
415 for (; c
!= EOF
; c
= yychar()) {
416 /* check if this is var access */
420 if ((c
= yychar()) == EOF
) break;
423 c
= scan_varaccess();
428 if (isalnum(c
) || c
== '_' || c
== '-' || c
== '<' || c
== '>') yywarning_ex("\"$x\" -- maybe you want \"$(x\" instead?");
431 /* check for some common bugs */
432 if (!qch
&& c
== '(') {
435 if (nc
== '$') yywarning_ex("\"($\" -- maybe you want \"$(\" instead?");
436 if (((sbused
> 0 && !isalnum(sbuf
[sbused
-1])) || (sbused
== 0)) &&
437 (isalnum(nc
) || nc
== '_' || nc
== '-' || nc
== '<' || nc
== '>')) yywarning_ex("\"(x\" -- maybe you want \"$(x\" instead?");
439 /* 'c' is not pushed yet */
440 if (!qch
&& scan_mode
== SCAN_PUNCT
) {
441 /* we are in list, the only possible keywords follows */
442 if (strchr("{}[];", c
) != NULL
) {
451 if (!qch
&& (isspace(c
) || c
== '\'')) break;
452 if (!qch
&& scan_mode
== SCAN_NORMAL
&& c
!= '"' && c
!= '\'' && !isalnum(c
)) {
453 /* check if this char (and possibly next) forms non-alnum token */
455 if ((c
= yychar()) != EOF
) {
456 /* try 2-char tokens */
458 if ((kw
= find_keyword(sbuf
+sbused
-2, 2)) != NULL
) {
461 yylval
.type
= kw
->type
;
462 yylval
.string
= kw
->word
; /* used by symdump */
465 yywarning_ex("non-alpha token without whitespace");
466 /* return this 2 chars */
467 yyunget(sbuf
[--sbused
]);
468 yyunget(sbuf
[--sbused
]);
472 /* return one char back */
476 /* try 1-char token */
477 if (sbused
> 1 && sbuf
[sbused
-1] == '=' && isalnum(sbuf
[sbused
-2])) goto skipkwone
;
478 if (sbused
== 1 && sbuf
[sbused
-1] == '!') {
481 if (isalnum(nc
) || nc
== '-' || nc
== '_') goto skipkwone
;
483 if ((kw
= find_keyword(sbuf
+sbused
-1, 1)) != NULL
) {
486 yylval
.type
= kw
->type
;
487 yylval
.string
= kw
->word
; /* used by symdump */
490 if (strchr("{}[];", sbuf
[sbused
-1]) == NULL
) yywarning_ex("non-alpha token without whitespace");
491 /* return this char */
492 yyunget(sbuf
[--sbused
]);
497 /* pop this char and process it as usual */
500 /* check for quoting */
501 if (qch
&& c
== qch
) {
505 if (!qch
&& c
== '"') {
513 if ((c
= yychar()) == EOF
) break;
517 case 'a': PUSH_CHAR('\a'); break;
518 case 'b': PUSH_CHAR('\b'); break;
519 case 'e': PUSH_CHAR('\x1b'); break;
520 case 'f': PUSH_CHAR('\f'); break;
521 case 'n': PUSH_CHAR('\n'); break;
522 case 'r': PUSH_CHAR('\r'); break;
523 case 't': PUSH_CHAR('\t'); break;
524 case 'v': PUSH_CHAR('\v'); break;
527 if ((c
= yychar()) == EOF
) { yyerror(&yylval
, "invalid hex escape in quoted string"); goto eof
; }
528 if ((n
= digit(c
, 16)) < 0) { yyerror(&yylval
, "invalid hex escape in quoted string"); goto eof
; }
530 if ((c
= yychar()) != EOF
) {
531 int d
= digit(c
, 16);
532 if (d
< 0) yyunget(c
); else n
= (n
*16)+d
;
534 if (n
== 0) { yyerror(&yylval
, "invalid hex escape in quoted string"); goto eof
; }
537 //TODO: add '\uXXXX'?
539 if (isalnum(c
)) { yyerror(&yylval
, "invalid escape in quoted string"); goto eof
; }
550 if (scan_mode
== SCAN_NORMAL
) {
551 if (keyword
&& !isalpha(c
)) keyword
= 0;
552 } else if (scan_mode
== SCAN_PUNCT
) {
553 if (keyword
&& isalnum(c
)) keyword
= 0;
557 /* we looked ahead a character -- back up */
558 /* don't return spaces, they will be skipped on next call anyway */
559 if (c
!= EOF
&& !isspace(c
)) yyunget(c
);
560 /* check obvious errors */
561 if (qch
) { yyerror(&yylval
, "unmatched \" in string"); goto eof
; }
563 /*if (DEBUG_SCAN) printf("keyword: %d; str='%s' (%d)\n", keyword, sbuf, sbused);*/
564 /* scan token table */
566 if (keyword
&& sbused
> 0) {
568 if ((kw
= find_keyword(sbuf
, sbused
-1)) != NULL
) {
569 yylval
.type
= kw
->type
;
570 yylval
.string
= kw
->word
; /* used by symdump */
573 if (yylval
.type
== T_ARG
) yylval
.string
= newstr(sbuf
);
576 if (DEBUG_SCAN
) printf("scan %s\n", symdump(&yylval
));
579 yylval
.type
= 0; /* 0 is EOF for lemon */
586 static const char *symdump (const token_t
*s
) {
587 static char *buf
= NULL
;
588 static int bufsz
= 0;
590 if (s
->type
== EOF
) return "EOF";
591 nsz
= strlen(s
->string
)+128;
593 char *nb
= realloc(buf
, nsz
);
594 if (nb
== NULL
) { fprintf(stderr
, "FATAL: out of memory!\n"); abort(); }
599 case 0: sprintf(buf
, "unknown symbol <%s>", s
->string
); break;
600 case T_ARG
: sprintf(buf
, "argument <%s>", s
->string
); break;
601 case T_STRING
: sprintf(buf
, "string \"%s\"", s
->string
); break;
602 default: sprintf(buf
, "keyword `%s`", s
->string
); break;