2 * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc.
4 * This file is part of Jam - see jam.c for Copyright information.
7 * scan.c - the jam yacc scanner
9 * 12/26/93 (seiwald) - bump buf in yylex to 10240 - yuk.
10 * 09/16/94 (seiwald) - check for overflows, unmatched {}'s, etc.
11 * Also handle tokens abutting EOF by remembering
12 * to return EOF now matter how many times yylex()
14 * 02/11/95 (seiwald) - honor only punctuation keywords if SCAN_PUNCT.
15 * 07/27/95 (seiwald) - Include jamgram.h after scan.h, so that YYSTYPE is
16 * defined before Linux's yacc tries to redefine it.
17 * 01/10/01 (seiwald) - \ can now escape any whitespace char
18 * 11/04/02 (seiwald) - const-ing for string literals
30 const struct keyword
{
34 #include "jamgramtab.h"
40 struct include
*next
; /* next serial include file */
41 const char *string
; /* pointer into current line */
42 char **strings
; /* for yyfparse() -- text to parse */
43 FILE *file
; /* for yyfparse() -- file being read */
44 const char *fname
; /* for yyfparse() -- file name */
45 int line
; /* line counter for error messages */
46 char buf
[512]; /* for yyfparse() -- line buffer */
49 static struct include
*incp
= 0; /* current file; head of chain */
51 static int scan_mode
= SCAN_NORMAL
;
52 static int any_errors
= 0;
53 static char *symdump (YYSTYPE
*s
);
55 /* no single token can be larger */
56 #define BIGGEST_TOKEN (10240)
60 static const char *mnames
[] = {
70 * Set parser mode: normal, string, or keyword
73 if (n
== SCAN_PUNCT
&& scan_mode
== SCAN_BEFORE_STRING
) n
= SCAN_PUNCT_BS
;
74 else if (n
== SCAN_NORMAL
&& scan_mode
== SCAN_PUNCT_BS
) n
= SCAN_BEFORE_STRING
;
77 if (scan_mode
!= n
) printf("**MODE TRANSITION: %s --> %s\n", mnames
[scan_mode
], mnames
[n
]);
84 void yyerror (const char *s
) {
85 if (incp
) printf("%s: line %d: ", incp
->fname
, incp
->line
);
86 printf("%s at %s\n", s
, symdump(&yylval
));
91 int yyanyerrors (void) {
92 return (any_errors
!= 0);
96 void yyfparse (const char *s
) {
97 struct include
*i
= (struct include
*)malloc(sizeof(*i
));
98 /* push this onto the incp chain */
102 i
->fname
= copystr(s
);
106 /* if the filename is "::Jambase", it means use the internal jambase */
107 if (strcmp(s
, "::Jambase") == 0) {
109 i
->strings
= jambase
;
115 * yyline() - read new line and return first character
117 * fabricates a continuous stream of characters across include files, returning EOF at the bitter end
120 struct include
*i
= incp
;
121 if (!incp
) return EOF
;
122 /* once we start reading from the input stream, we reset the
123 * include insertion point so that the next include file becomes
124 * the head of the list */
125 /* if there is more data in this line, return it */
126 if (*i
->string
) return *i
->string
++;
127 /* if we're reading from an internal string list, go to the next string */
129 if (!*i
->strings
) goto next
;
131 i
->string
= *(i
->strings
++);
134 /* if necessary, open the file */
137 if (strcmp(i
->fname
, "-") && !(f
= fopen(i
->fname
, "r"))) perror(i
->fname
);
140 /* if there's another line in this file, start it */
141 if (i
->file
&& fgets(i
->buf
, sizeof(i
->buf
), i
->file
)) {
147 /* this include is done */
148 /* free it up and return EOF so yyparse() returns to parse_file() */
150 /* close file, free name */
151 if (i
->file
&& i
->file
!= stdin
) fclose(i
->file
);
159 * yylex() - set yylval to current token; return its type
161 * Macros to move things along:
163 * yychar() - return and advance character; invalid after EOF
164 * yyprev() - back up one character; invalid before yychar()
166 * yychar() returns a continuous stream of characters, until it hits
167 * the EOF of the current include file.
169 #define yychar() (*incp->string ? *incp->string++ : yyline())
170 #define yyprev() (incp->string--)
173 /* eat white space */
174 static int skip_spaces (int c
) {
176 /* skip past white space */
177 while (c
!= EOF
&& isspace(c
)) c
= yychar();
178 /* not a comment? swallow up comment line */
180 while ((c
= yychar()) != EOF
&& c
!= '\n') ;
186 static int digit (int c
, int base
) {
187 if (c
== EOF
) return -1;
188 if (c
>= 'a' && c
<= 'z') c
-= 32;
189 if (c
< '0' || (c
> '9' && c
< 'A') || c
> 'Z') return -1;
190 if ((c
-= '0') > 9) c
-= 7;
191 if (c
>= base
) return -1;
197 static char buf
[BIGGEST_TOKEN
];
198 const struct keyword
*k
;
202 /* get first character (whitespace or of token) */
204 if (scan_mode
== SCAN_STRING
) {
206 /* if scanning for a string (action's {}'s), look for the closing brace */
207 /* we handle matching braces, if they match! */
209 /* skip spaces and newline */
211 while (c != EOF && c != '\n' && isspace(c)) c = yychar();
212 if (c == '\n') c = yychar();
214 /*while (c != EOF && isspace(c)) c = yychar();*/
217 while (c
!= EOF
&& b
< buf
+sizeof(buf
)) {
218 if (c
== '{') ++nest
;
219 if (c
== '}' && !--nest
) break;
221 kStringPushBack(&s
, c
);
224 /* we ate the ending brace -- regurgitate it */
225 if (c
!= EOF
) yyprev();
226 /* check obvious errors */
227 /* if (b == buf+sizeof(buf)) { yyerror("action block too big"); goto eof; } */
228 if (nest
) { kStringFree(&s
); yyerror("unmatched {} in action block"); goto eof
; }
230 /* remove trailing newlines and spaces, add one newline */
231 /*strcpy(buf+nest, "\n");*/
233 nest = kStringLen(&s);
234 while (nest > 0 && isspace(kStringCStr(&s)[nest-1])) kStringPopBack(&s);
235 kStringAppendCStr(&s, "\n");
237 yylval
.type
= T_STRING
;
238 yylval
.string
= newstr(kStringCStr(&s
));
240 /*fprintf(stderr, "::: [%s]\n", yylval.string);*/
243 int in_quote
= 0, not_keyword
= 0, was_not_alnum
= 0;
246 /* c now points to the first character of a token */
247 if (c
== EOF
) goto eof
;
248 /* while scanning the word, disqualify it for (expensive)
249 * keyword lookup when we can: $anything, "anything", \anything */
250 not_keyword
= (c
== '$');
251 if (strchr("{}[];", c
)) {
256 /* only ':abc' is good, ':*' is not */
259 if (c
== EOF
|| isspace(c
) || isalnum(c
) || c
== '$' || c
== '_') goto lexdoneback
;
261 /* look for white space to delimit word */
262 /* "'s get stripped but preserve white space */
263 /* \ protects next character */
264 for (; c
!= EOF
&& b
< buf
+sizeof(buf
) && (in_quote
|| !isspace(c
)); c
= yychar()) {
267 in_quote
= !in_quote
;
272 /* k8: allow specials to work as delimiters */
273 if (strchr("{};", c
)) break;
275 if (strchr("[]", c
)) break; /* only in keywords; to allow things like $(a[2]) */
276 if (!was_not_alnum
&& c
== ':') break; /* '*:' is not good */
281 if (!isalnum(c
)) was_not_alnum
= 1;
286 if ((c
= yychar()) == EOF
) break;
290 case 't': *b
++ = '\t'; break;
291 case 'n': *b
++ = '\n'; break;
292 case 'r': *b
++ = '\r'; break;
293 case 'v': *b
++ = '\v'; break;
294 case 'b': *b
++ = '\b'; break;
295 case 'a': *b
++ = '\a'; break;
296 case 'f': *b
++ = '\f'; break;
297 case 'e': *b
++ = '\x1b'; break;
299 c
= yychar(); // first digit
301 if (n
< 0) { yyerror("invalid hex escape in quoted string"); goto eof
; }
302 c
= yychar(); // second digit
304 if (d
< 0) { if (c
!= EOF
) yyprev(); } else n
= (n
*16)+d
;
305 if (n
== 0) { yyerror("invalid hex escape in quoted string"); goto eof
; }
308 //TODO: add '\uXXXX'?
310 if (isalnum(c
)) { yyerror("invalid escape in quoted string"); goto eof
; }
319 /* we looked ahead a character -- back up */
321 if (c
!= EOF
) yyprev();
323 /* check obvious errors */
324 if (b
== buf
+sizeof(buf
)) { yyerror("string too big"); goto eof
; }
325 if (in_quote
) { yyerror("unmatched \" in string"); goto eof
; }
327 /* scan token table */
328 /* don't scan if it's obviously not a keyword or if its */
329 /* an alphabetic when we were looking for punctuation */
331 if (!not_keyword
&& !((scan_mode
== SCAN_PUNCT
|| scan_mode
== SCAN_PUNCT_BS
) && isalnum(*buf
))) {
333 for (k
= keywords
; k
->word
; ++k
) {
334 if (strcmp(k
->word
, buf
) == 0) {
335 yylval
.type
= k
->type
;
336 yylval
.string
= k
->word
; /* used by symdump */
341 if (yylval
.type
== T_ARG
) {
342 yylval
.string
= newstr(buf
);
343 if (strcmp(buf
, "on") == 0) {
344 printf("\"ON\": not_keyword=%d; scan_mode=%s\n", not_keyword
, mnames
[scan_mode
]);
348 if (DEBUG_SCAN
) printf("scan %s\n", symdump(&yylval
));
356 static char *symdump (YYSTYPE
*s
) {
357 static char buf
[BIGGEST_TOKEN
+20];
359 case EOF
: snprintf(buf
, sizeof(buf
), "EOF"); break;
360 case 0: snprintf(buf
, sizeof(buf
), "unknown symbol %s", s
->string
); break;
361 case T_ARG
: snprintf(buf
, sizeof(buf
), "argument %s", s
->string
); break;
362 case T_STRING
: snprintf(buf
, sizeof(buf
), "string \"%s\"", s
->string
); break;
363 default: snprintf(buf
, sizeof(buf
), "keyword %s", s
->string
); break;
369 void yystatetrans (int tk
) {
370 if (scan_mode
== SCAN_BEFORE_STRING
) {
372 case T_LBRACE_t
: yymode(SCAN_STRING
); break;
376 case T_ACTIONS_t
: yymode(SCAN_BEFORE_STRING
); break;
377 case T_LBRACKET_t
: yymode(SCAN_NORMAL
); break;