2 * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc.
4 * This file is part of Jam - see jam.c for Copyright information.
7 * scan.c - the jam yacc scanner
9 * 12/26/93 (seiwald) - bump buf in yylex to 10240 - yuk.
10 * 09/16/94 (seiwald) - check for overflows, unmatched {}'s, etc.
11 * Also handle tokens abutting EOF by remembering
12 * to return EOF now matter how many times yylex()
14 * 02/11/95 (seiwald) - honor only punctuation keywords if SCAN_PUNCT.
15 * 07/27/95 (seiwald) - Include jamgram.h after scan.h, so that YYSTYPE is
16 * defined before Linux's yacc tries to redefine it.
17 * 01/10/01 (seiwald) - \ can now escape any whitespace char
18 * 11/04/02 (seiwald) - const-ing for string literals
29 const struct keyword
{
33 #include "jamgramtab.h"
39 struct include
*next
; /* next serial include file */
40 const char *string
; /* pointer into current line */
41 char **strings
; /* for yyfparse() -- text to parse */
42 FILE *file
; /* for yyfparse() -- file being read */
43 const char *fname
; /* for yyfparse() -- file name */
44 int line
; /* line counter for error messages */
45 char buf
[512]; /* for yyfparse() -- line buffer */
48 static struct include
*incp
= 0; /* current file; head of chain */
50 static int scanmode
= SCAN_NORMAL
;
51 static int anyerrors
= 0;
52 static char *symdump (YYSTYPE
*s
);
54 /* no single token can be larger */
55 #define BIGGEST_TOKEN (10240)
59 * Set parser mode: normal, string, or keyword
66 void yyerror (const char *s
) {
67 if (incp
) printf("%s: line %d: ", incp
->fname
, incp
->line
);
68 printf("%s at %s\n", s
, symdump(&yylval
));
73 int yyanyerrors (void) {
74 return anyerrors
!= 0;
78 void yyfparse (const char *s
) {
79 struct include
*i
= (struct include
*)malloc(sizeof(*i
));
80 /* push this onto the incp chain */
84 i
->fname
= copystr(s
);
88 /* if the filename is "::Jambase", it means use the internal jambase */
89 if (!strcmp(s
, "::Jambase")) i
->strings
= jambase
;
94 * yyline() - read new line and return first character
96 * fabricates a continuous stream of characters across include files, returning EOF at the bitter end
99 struct include
*i
= incp
;
101 if (!incp
) return EOF
;
102 /* once we start reading from the input stream, we reset the
103 * include insertion point so that the next include file becomes
104 * the head of the list */
105 /* if there is more data in this line, return it */
106 if (*i
->string
) return *i
->string
++;
107 /* if we're reading from an internal string list, go to the next string */
109 if (!*i
->strings
) goto next
;
111 i
->string
= *(i
->strings
++);
114 /* if necessary, open the file */
117 if (strcmp(i
->fname
, "-") && !(f
= fopen(i
->fname
, "r"))) perror(i
->fname
);
120 /* if there's another line in this file, start it */
121 if (i
->file
&& fgets(i
->buf
, sizeof(i
->buf
), i
->file
)) {
127 /* this include is done */
128 /* free it up and return EOF so yyparse() returns to parse_file() */
130 /* close file, free name */
131 if (i
->file
&& i
->file
!= stdin
) fclose(i
->file
);
139 * yylex() - set yylval to current token; return its type
141 * Macros to move things along:
143 * yychar() - return and advance character; invalid after EOF
144 * yyprev() - back up one character; invalid before yychar()
146 * yychar() returns a continuous stream of characters, until it hits
147 * the EOF of the current include file.
149 #define yychar() (*incp->string ? *incp->string++ : yyline())
150 #define yyprev() (incp->string--)
153 /* eat white space */
154 static int skipSpaces (int c
) {
156 /* skip past white space */
157 while (c
!= EOF
&& isspace(c
)) c
= yychar();
158 /* not a comment? swallow up comment line */
160 while ((c
= yychar()) != EOF
&& c
!= '\n') ;
166 static int digit (int c
, int base
) {
167 if (c
== EOF
) return -1;
168 if (c
>= 'a' && c
<= 'z') c
-= 32;
169 if (c
< '0' || (c
> '9' && c
< 'A') || c
> 'Z') return -1;
172 if (c
>= base
) return -1;
179 char buf
[BIGGEST_TOKEN
];
183 /* get first character (whitespace or of token) */
185 if (scanmode
== SCAN_STRING
) {
186 /* if scanning for a string (action's {}'s), look for the closing brace */
187 /* we handle matching braces, if they match! */
190 while (c
!= EOF
&& b
< buf
+sizeof(buf
)) {
191 if (c
== '{') ++nest
;
192 if (c
== '}' && !--nest
) break;
196 /* we ate the ending brace -- regurgitate it */
197 if (c
!= EOF
) yyprev();
198 /* check obvious errors */
199 if (b
== buf
+sizeof(buf
)) { yyerror("action block too big"); goto eof
; }
200 if (nest
) { yyerror("unmatched {} in action block"); goto eof
; }
202 yylval
.type
= STRING
;
203 yylval
.string
= newstr(buf
);
206 const struct keyword
*k
;
207 int inquote
= 0, notkeyword
= 0, n
, d
, wasNotAlNum
= 0;
210 /* c now points to the first character of a token */
211 if (c
== EOF
) goto eof
;
212 //printf(":'%c'\n", c);
214 if (!isalpha(c
) && c
!= '$' && c
!= '_' && c
!= '"' && c
!= '\'') {
215 const struct keyword
*kgood
= NULL
;
216 /* special chars are delimiters */
220 for (k
= keywords
; k
->word
!= NULL
; ++k
) if (!isalpha(k
->word
[0]) && strcmp(buf
, k
->word
) == 0) break;
221 if (k
->word
!= NULL
) {
230 if (c
!= EOF
) --b
; /* remove last char from token buffer */
233 printf("![%s]\n", buf
);
237 /* bad luck, try it another way */
239 /* while scanning the word, disqualify it for (expensive)
240 * keyword lookup when we can: $anything, "anything", \anything */
241 notkeyword
= (c
== '$');
242 if (c
== '{' || c
== '}' || c
== ';' || c
== '[' || c
== ']') {
247 /* only ':abc' is good, ':*' is not */
249 if (c
== EOF
|| isspace(c
) || isalnum(c
) || c
== '$' || c
== '_') {
255 /* look for white space to delimit word */
256 /* "'s get stripped but preserve white space */
257 /* \ protects next character */
258 while (c
!= EOF
&& b
< buf
+sizeof(buf
) && (inquote
|| !isspace(c
))) {
263 } else if (!inquote
&& (c
== '{' || c
== '}' || c
== ';')) {
264 /* k8: allow specials to work as delimiters */
266 } else if (!inquote
&& !notkeyword
&& (c
== '[' || c
== ']')) {
267 /* k8: allow specials to work as delimiters */
269 } else if (!inquote
&& !notkeyword
&& !wasNotAlNum
&& c
== ':') {
270 /* k8: allow specials to work as delimiters; '*:' is not good */
271 /**b = 0; printf("***OUT [%s]! %d\n", buf, incp?incp->line:0);*/
273 } else if (c
!= '\\') {
275 if (!isalnum(c
)) wasNotAlNum
= 1;
277 } else if ((c
= yychar()) != EOF
) {
282 case 't': *b
++ = '\t'; break;
283 case 'n': *b
++ = '\n'; break;
284 case 'r': *b
++ = '\r'; break;
285 case 'v': *b
++ = '\v'; break;
286 case 'b': *b
++ = '\b'; break;
287 case 'a': *b
++ = '\a'; break;
288 case 'f': *b
++ = '\f'; break;
289 case 'e': *b
++ = '\x1b'; break;
291 c
= yychar(); // first digit
293 if (n
< 0) { yyerror("invalid hex escape in quoted string"); goto eof
; }
294 c
= yychar(); // second digit
296 if (d
< 0) { if (c
!= EOF
) yyprev(); } else n
= (n
*16)+d
;
297 if (n
== 0) { yyerror("invalid hex escape in quoted string"); goto eof
; }
300 default: *b
++ = c
; break;
312 /* we looked ahead a character - back up */
314 if (c
!= EOF
) yyprev();
316 /* check obvious errors */
317 if (b
== buf
+sizeof(buf
)) { yyerror("string too big"); goto eof
; }
318 if (inquote
) { yyerror("unmatched \" in string"); goto eof
; }
319 /* scan token table */
320 /* don't scan if it's obviously not a keyword or if its */
321 /* an alphabetic when were looking for punctuation */
324 if (!notkeyword
&& !(isalpha(*buf
) && scanmode
== SCAN_PUNCT
)) {
325 for (k
= keywords
; k
->word
; ++k
) {
326 if (*buf
== *k
->word
&& strcmp(k
->word
, buf
) == 0) {
327 yylval
.type
= k
->type
;
328 yylval
.string
= k
->word
; /* used by symdump */
333 if (yylval
.type
== ARG
) yylval
.string
= newstr(buf
);
335 if (DEBUG_SCAN
) printf("scan %s\n", symdump(&yylval
));
343 static char *symdump (YYSTYPE
*s
) {
344 static char buf
[BIGGEST_TOKEN
+20];
347 case EOF
: sprintf(buf
, "EOF"); break;
348 case 0: sprintf(buf
, "unknown symbol %s", s
->string
); break;
349 case ARG
: sprintf(buf
, "argument %s", s
->string
); break;
350 case STRING
: sprintf(buf
, "string \"%s\"", s
->string
); break;
351 default: sprintf(buf
, "keyword %s", s
->string
); break;