bumped version to 2.5.6; release
[k8jam.git] / scan.c
blob71dafda702a3ecfde80ba6e7809981bdab06a979
1 /*
2 * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc.
4 * This file is part of Jam - see jam.c for Copyright information.
5 */
7 /*
8 * scan.c - the jam yacc scanner
10 * 12/26/93 (seiwald) - bump buf in yylex to 10240 - yuk.
11 * 09/16/94 (seiwald) - check for overflows, unmatched {}'s, etc.
12 * Also handle tokens abutting EOF by remembering
13 * to return EOF now matter how many times yylex()
14 * reinvokes yyline().
15 * 02/11/95 (seiwald) - honor only punctuation keywords if SCAN_PUNCT.
16 * 07/27/95 (seiwald) - Include jamgram.h after scan.h, so that YYSTYPE is
17 * defined before Linux's yacc tries to redefine it.
18 * 01/10/01 (seiwald) - \ can now escape any whitespace char
19 * 11/04/02 (seiwald) - const-ing for string literals
22 # include "jam.h"
23 # include "lists.h"
24 # include "parse.h"
25 # include "scan.h"
26 # include "jamgram.h"
27 # include "jambase.h"
28 # include "newstr.h"
30 struct keyword {
31 const char *word;
32 int type;
33 } keywords[] = {
34 # include "jamgramtab.h"
35 { 0, 0 }
38 struct include {
39 struct include *next; /* next serial include file */
40 const char *string; /* pointer into current line */
41 char **strings; /* for yyfparse() -- text to parse */
42 FILE *file; /* for yyfparse() -- file being read */
43 const char *fname; /* for yyfparse() -- file name */
44 int line; /* line counter for error messages */
45 char buf[512]; /* for yyfparse() -- line buffer */
48 static struct include *incp = 0; /* current file; head of chain */
50 static int scanmode = SCAN_NORMAL;
51 static int anyerrors = 0;
52 static char *symdump (YYSTYPE *s);
54 # define BIGGEST_TOKEN 10240 /* no single token can be larger */
58 * Set parser mode: normal, string, or keyword
60 void yymode (int n) {
61 scanmode = n;
65 void yyerror (const char *s) {
66 if (incp) printf("%s: line %d: ", incp->fname, incp->line);
67 printf("%s at %s\n", s, symdump(&yylval));
68 ++anyerrors;
72 int yyanyerrors (void) {
73 return anyerrors != 0;
77 void yyfparse (const char *s) {
78 struct include *i = (struct include *)malloc(sizeof(*i));
79 /* Push this onto the incp chain. */
80 i->string = "";
81 i->strings = 0;
82 i->file = 0;
83 i->fname = copystr(s);
84 i->line = 0;
85 i->next = incp;
86 incp = i;
87 /* If the filename is "+", it means use the internal jambase. */
88 if (!strcmp(s, "+")) i->strings = jambase;
93 * yyline() - read new line and return first character
95 * Fabricates a continuous stream of characters across include files,
96 * returning EOF at the bitter end.
98 int yyline (void) {
99 struct include *i = incp;
101 if (!incp) return EOF;
102 /* Once we start reading from the input stream, we reset the */
103 /* include insertion point so that the next include file becomes */
104 /* the head of the list. */
106 /* If there is more data in this line, return it. */
107 if (*i->string) return *i->string++;
108 /* If we're reading from an internal string list, go to the */
109 /* next string. */
110 if (i->strings) {
111 if(!*i->strings) goto next;
112 i->line++;
113 i->string = *(i->strings++);
114 return *i->string++;
116 /* If necessary, open the file */
117 if (!i->file) {
118 FILE *f = stdin;
119 if (strcmp(i->fname, "-") && !(f = fopen(i->fname, "r"))) perror(i->fname);
120 i->file = f;
122 /* If there's another line in this file, start it. */
123 if (i->file && fgets(i->buf, sizeof(i->buf), i->file)) {
124 i->line++;
125 i->string = i->buf;
126 return *i->string++;
128 next:
129 /* This include is done. */
130 /* Free it up and return EOF so yyparse() returns to parse_file(). */
131 incp = i->next;
132 /* Close file, free name */
133 if (i->file && i->file != stdin) fclose(i->file);
134 freestr(i->fname);
135 free((char *)i);
136 return EOF;
141 * yylex() - set yylval to current token; return its type
143 * Macros to move things along:
145 * yychar() - return and advance character; invalid after EOF
146 * yyprev() - back up one character; invalid before yychar()
148 * yychar() returns a continuous stream of characters, until it hits
149 * the EOF of the current include file.
152 # define yychar() (*incp->string ? *incp->string++ : yyline())
153 # define yyprev() (incp->string--)
155 int yylex (void) {
156 int c;
157 char buf[BIGGEST_TOKEN];
158 char *b = buf;
160 if (!incp) goto eof;
161 /* Get first character (whitespace or of token) */
162 c = yychar();
163 if (scanmode == SCAN_STRING) {
164 /* If scanning for a string (action's {}'s), look for the */
165 /* closing brace. We handle matching braces, if they match! */
166 int nest = 1;
168 while (c != EOF && b < buf+sizeof(buf)) {
169 if (c == '{') nest++;
170 if (c == '}' && !--nest) break;
171 *b++ = c;
172 c = yychar();
174 /* We ate the ending brace -- regurgitate it. */
175 if (c != EOF) yyprev();
176 /* Check obvious errors. */
177 if (b == buf+sizeof(buf)) {
178 yyerror("action block too big");
179 goto eof;
181 if (nest) {
182 yyerror("unmatched {} in action block");
183 goto eof;
185 *b = 0;
186 yylval.type = STRING;
187 yylval.string = newstr(buf);
188 } else {
189 char *b = buf;
190 struct keyword *k;
191 int inquote = 0;
192 int notkeyword;
193 /* Eat white space */
194 for (;;) {
195 /* Skip past white space */
196 while (c != EOF && isspace(c)) c = yychar();
197 /* Not a comment? Swallow up comment line. */
198 if (c != '#') break;
199 while ((c = yychar()) != EOF && c != '\n') ;
201 /* c now points to the first character of a token. */
202 if (c == EOF) goto eof;
203 /* While scanning the word, disqualify it for (expensive) */
204 /* keyword lookup when we can: $anything, "anything", \anything */
205 notkeyword = c == '$';
206 /* look for white space to delimit word */
207 /* "'s get stripped but preserve white space */
208 /* \ protects next character */
209 while (c != EOF && b < buf+sizeof(buf) && (inquote || !isspace(c))) {
210 if (c == '"') {
211 /* begin or end " */
212 inquote = !inquote;
213 notkeyword = 1;
214 } else if (c != '\\') {
215 /* normal char */
216 *b++ = c;
217 } else if ((c = yychar()) != EOF) {
218 /* \c */
219 *b++ = c;
220 notkeyword = 1;
221 } else {
222 /* \EOF */
223 break;
225 c = yychar();
227 /* Check obvious errors. */
228 if (b == buf+sizeof(buf)) {
229 yyerror("string too big");
230 goto eof;
232 if (inquote) {
233 yyerror("unmatched \" in string");
234 goto eof;
236 /* We looked ahead a character - back up. */
237 if (c != EOF) yyprev();
238 /* scan token table */
239 /* don't scan if it's obviously not a keyword or if its */
240 /* an alphabetic when were looking for punctuation */
241 *b = 0;
242 yylval.type = ARG;
243 if (!notkeyword && !(isalpha(*buf) && scanmode == SCAN_PUNCT)) {
244 for (k = keywords; k->word; k++) {
245 if (*buf == *k->word && !strcmp(k->word, buf)) {
246 yylval.type = k->type;
247 yylval.string = k->word; /* used by symdump */
248 break;
252 if (yylval.type == ARG) yylval.string = newstr(buf);
254 if (DEBUG_SCAN) printf("scan %s\n", symdump(&yylval));
255 return yylval.type;
256 eof:
257 yylval.type = EOF;
258 return yylval.type;
262 static char *symdump (YYSTYPE *s) {
263 static char buf[BIGGEST_TOKEN+20];
265 switch (s->type) {
266 case EOF:
267 sprintf(buf, "EOF");
268 break;
269 case 0:
270 sprintf(buf, "unknown symbol %s", s->string);
271 break;
272 case ARG:
273 sprintf(buf, "argument %s", s->string);
274 break;
275 case STRING:
276 sprintf(buf, "string \"%s\"", s->string);
277 break;
278 default:
279 sprintf(buf, "keyword %s", s->string);
280 break;
282 return buf;