scan.c: cosmetix
[k8jam.git] / src / scan.c
blob0251752e9222fdc3bc1c74e6f79d710da0d21f5e
1 /*
2 * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc.
4 * This file is part of Jam - see jam.c for Copyright information.
5 */
6 /*
7 * scan.c - the jam yacc scanner
9 * 12/26/93 (seiwald) - bump buf in yylex to 10240 - yuk.
10 * 09/16/94 (seiwald) - check for overflows, unmatched {}'s, etc.
11 * Also handle tokens abutting EOF by remembering
12 * to return EOF now matter how many times yylex()
13 * reinvokes yyline().
14 * 02/11/95 (seiwald) - honor only punctuation keywords if SCAN_PUNCT.
15 * 07/27/95 (seiwald) - Include jamgram.h after scan.h, so that YYSTYPE is
16 * defined before Linux's yacc tries to redefine it.
17 * 01/10/01 (seiwald) - \ can now escape any whitespace char
18 * 11/04/02 (seiwald) - const-ing for string literals
20 #include "jam.h"
21 #include "lists.h"
22 #include "parse.h"
23 #include "scan.h"
24 #include "jamgram.h"
25 #include "jambase.h"
26 #include "newstr.h"
29 const struct keyword {
30 const char *word;
31 int type;
32 } keywords[] = {
33 #include "jamgramtab.h"
34 {0,0}
38 struct include {
39 struct include *next; /* next serial include file */
40 const char *string; /* pointer into current line */
41 char **strings; /* for yyfparse() -- text to parse */
42 FILE *file; /* for yyfparse() -- file being read */
43 const char *fname; /* for yyfparse() -- file name */
44 int line; /* line counter for error messages */
45 char buf[512]; /* for yyfparse() -- line buffer */
48 static struct include *incp = 0; /* current file; head of chain */
50 static int scan_mode = SCAN_NORMAL;
51 static int any_errors = 0;
52 static char *symdump (YYSTYPE *s);
54 /* no single token can be larger */
55 #define BIGGEST_TOKEN (10240)
59 * Set parser mode: normal, string, or keyword
61 void yymode (int n) {
62 scan_mode = n;
66 void yyerror (const char *s) {
67 if (incp) printf("%s: line %d: ", incp->fname, incp->line);
68 printf("%s at %s\n", s, symdump(&yylval));
69 ++any_errors;
73 int yyanyerrors (void) {
74 return (any_errors != 0);
78 void yyfparse (const char *s) {
79 struct include *i = (struct include *)malloc(sizeof(*i));
80 /* push this onto the incp chain */
81 i->string = "";
82 i->strings = 0;
83 i->file = 0;
84 i->fname = copystr(s);
85 i->line = 0;
86 i->next = incp;
87 incp = i;
88 /* if the filename is "::Jambase", it means use the internal jambase */
89 if (strcmp(s, "::Jambase") == 0) {
90 jambaseUnpack();
91 i->strings = jambase;
97 * yyline() - read new line and return first character
99 * fabricates a continuous stream of characters across include files, returning EOF at the bitter end
101 int yyline (void) {
102 struct include *i = incp;
103 if (!incp) return EOF;
104 /* once we start reading from the input stream, we reset the
105 * include insertion point so that the next include file becomes
106 * the head of the list */
107 /* if there is more data in this line, return it */
108 if (*i->string) return *i->string++;
109 /* if we're reading from an internal string list, go to the next string */
110 if (i->strings) {
111 if (!*i->strings) goto next;
112 ++i->line;
113 i->string = *(i->strings++);
114 return *i->string++;
116 /* if necessary, open the file */
117 if (!i->file) {
118 FILE *f = stdin;
119 if (strcmp(i->fname, "-") && !(f = fopen(i->fname, "r"))) perror(i->fname);
120 i->file = f;
122 /* if there's another line in this file, start it */
123 if (i->file && fgets(i->buf, sizeof(i->buf), i->file)) {
124 ++i->line;
125 i->string = i->buf;
126 return *i->string++;
128 next:
129 /* this include is done */
130 /* free it up and return EOF so yyparse() returns to parse_file() */
131 incp = i->next;
132 /* close file, free name */
133 if (i->file && i->file != stdin) fclose(i->file);
134 freestr(i->fname);
135 free(i);
136 return EOF;
141 * yylex() - set yylval to current token; return its type
143 * Macros to move things along:
145 * yychar() - return and advance character; invalid after EOF
146 * yyprev() - back up one character; invalid before yychar()
148 * yychar() returns a continuous stream of characters, until it hits
149 * the EOF of the current include file.
151 #define yychar() (*incp->string ? *incp->string++ : yyline())
152 #define yyprev() (incp->string--)
155 /* eat white space */
156 static int skip_spaces (int c) {
157 for (;;) {
158 /* skip past white space */
159 while (c != EOF && isspace(c)) c = yychar();
160 /* not a comment? swallow up comment line */
161 if (c != '#') break;
162 while ((c = yychar()) != EOF && c != '\n') ;
164 return c;
168 static int digit (int c, int base) {
169 if (c == EOF) return -1;
170 if (c >= 'a' && c <= 'z') c -= 32;
171 if (c < '0' || (c > '9' && c < 'A') || c > 'Z') return -1;
172 if ((c -= '0') > 9) c -= 7;
173 if (c >= base) return -1;
174 return c;
178 int yylex (void) {
179 static char buf[BIGGEST_TOKEN];
180 int c;
181 if (!incp) goto eof;
182 /* get first character (whitespace or of token) */
183 c = yychar();
184 if (scan_mode == SCAN_STRING) {
185 /* if scanning for a string (action's {}'s), look for the closing brace */
186 /* we handle matching braces, if they match! */
187 char *b = buf;
188 int nest = 1;
189 while (c != EOF && b < buf+sizeof(buf)) {
190 if (c == '{') ++nest;
191 if (c == '}' && !--nest) break;
192 *b++ = c;
193 c = yychar();
195 /* we ate the ending brace -- regurgitate it */
196 if (c != EOF) yyprev();
197 /* check obvious errors */
198 if (b == buf+sizeof(buf)) { yyerror("action block too big"); goto eof; }
199 if (nest) { yyerror("unmatched {} in action block"); goto eof; }
200 *b = 0;
201 yylval.type = STRING;
202 yylval.string = newstr(buf);
203 } else {
204 char *b = buf;
205 const struct keyword *k;
206 int inquote = 0, notkeyword = 0, n, d, wasNotAlNum = 0;
207 c = skip_spaces(c);
208 /* c now points to the first character of a token */
209 if (c == EOF) goto eof;
210 //printf(":'%c'\n", c);
211 #if 0
212 if (!isalpha(c) && c != '$' && c != '_' && c != '"' && c != '\'') {
213 const struct keyword *kgood = NULL;
214 /* special chars are delimiters */
215 while (c != EOF) {
216 *b++ = c;
217 *b = 0;
218 for (k = keywords; k->word != NULL; ++k) if (!isalpha(k->word[0]) && strcmp(buf, k->word) == 0) break;
219 if (k->word != NULL) {
220 /* good keyword */
221 kgood = k;
222 c = yychar();
223 continue;
225 /* bad keyword */
226 break;
228 if (c != EOF) --b; /* remove last char from token buffer */
229 if (kgood != NULL) {
230 /* ok, we got it */
231 printf("![%s]\n", buf);
232 goto lexdoneback;
235 /* bad luck, try it another way */
236 #else
237 /* while scanning the word, disqualify it for (expensive)
238 * keyword lookup when we can: $anything, "anything", \anything */
239 notkeyword = (c == '$');
240 if (c == '{' || c == '}' || c == ';' || c == '[' || c == ']') {
241 *b++ = c;
242 goto lexdone;
244 if (c == ':') {
245 /* only ':abc' is good, ':*' is not */
246 c = yychar();
247 if (c == EOF || isspace(c) || isalnum(c) || c == '$' || c == '_') {
248 *b++ = ':';
249 goto lexdoneback;
252 #endif
253 /* look for white space to delimit word */
254 /* "'s get stripped but preserve white space */
255 /* \ protects next character */
256 while (c != EOF && b < buf+sizeof(buf) && (inquote || !isspace(c))) {
257 if (c == '"') {
258 /* begin or end " */
259 inquote = !inquote;
260 notkeyword = 1;
261 } else if (!inquote && (c == '{' || c == '}' || c == ';')) {
262 /* k8: allow specials to work as delimiters */
263 break;
264 } else if (!inquote && !notkeyword && (c == '[' || c == ']')) {
265 /* k8: allow specials to work as delimiters */
266 break;
267 } else if (!inquote && !notkeyword && !wasNotAlNum && c == ':') {
268 /* k8: allow specials to work as delimiters; '*:' is not good */
269 /**b = 0; printf("***OUT [%s]! %d\n", buf, incp?incp->line:0);*/
270 break;
271 } else if (c != '\\') {
272 /* normal char */
273 if (!isalnum(c)) wasNotAlNum = 1;
274 *b++ = c;
275 } else if ((c = yychar()) != EOF) {
276 /* \c */
277 wasNotAlNum = 1;
278 if (inquote) {
279 switch (c) {
280 case 't': *b++ = '\t'; break;
281 case 'n': *b++ = '\n'; break;
282 case 'r': *b++ = '\r'; break;
283 case 'v': *b++ = '\v'; break;
284 case 'b': *b++ = '\b'; break;
285 case 'a': *b++ = '\a'; break;
286 case 'f': *b++ = '\f'; break;
287 case 'e': *b++ = '\x1b'; break;
288 case 'x':
289 c = yychar(); // first digit
290 n = digit(c, 16);
291 if (n < 0) { yyerror("invalid hex escape in quoted string"); goto eof; }
292 c = yychar(); // second digit
293 d = digit(c, 16);
294 if (d < 0) { if (c != EOF) yyprev(); } else n = (n*16)+d;
295 if (n == 0) { yyerror("invalid hex escape in quoted string"); goto eof; }
296 *b++ = n;
297 break;
298 default: *b++ = c; break;
300 } else {
301 *b++ = c;
303 notkeyword = 1;
304 } else {
305 /* \EOF */
306 break;
308 c = yychar();
310 /* we looked ahead a character - back up */
311 lexdoneback:
312 if (c != EOF) yyprev();
313 lexdone:
314 /* check obvious errors */
315 if (b == buf+sizeof(buf)) { yyerror("string too big"); goto eof; }
316 if (inquote) { yyerror("unmatched \" in string"); goto eof; }
317 /* scan token table */
318 /* don't scan if it's obviously not a keyword or if its */
319 /* an alphabetic when were looking for punctuation */
320 *b = 0;
321 yylval.type = ARG;
322 if (!notkeyword && !(isalpha(*buf) && scan_mode == SCAN_PUNCT)) {
323 for (k = keywords; k->word; ++k) {
324 if (*buf == *k->word && strcmp(k->word, buf) == 0) {
325 yylval.type = k->type;
326 yylval.string = k->word; /* used by symdump */
327 break;
331 if (yylval.type == ARG) yylval.string = newstr(buf);
333 if (DEBUG_SCAN) printf("scan %s\n", symdump(&yylval));
334 return yylval.type;
335 eof:
336 yylval.type = EOF;
337 return yylval.type;
341 static char *symdump (YYSTYPE *s) {
342 static char buf[BIGGEST_TOKEN+20];
343 switch (s->type) {
344 case EOF: snprintf(buf, sizeof(buf), "EOF"); break;
345 case 0: snprintf(buf, sizeof(buf), "unknown symbol %s", s->string); break;
346 case ARG: snprintf(buf, sizeof(buf), "argument %s", s->string); break;
347 case STRING: snprintf(buf, sizeof(buf), "string \"%s\"", s->string); break;
348 default: snprintf(buf, sizeof(buf), "keyword %s", s->string); break;
350 return buf;