Jambase unix: default compilers changed to gcc and g++; OPTIM is empty
[k8jam.git] / scan.c
blob986e2952e724e7129198ab8b9d345435de2b0500
1 /*
2 * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc.
4 * This file is part of Jam - see jam.c for Copyright information.
5 */
7 /*
8 * scan.c - the jam yacc scanner
10 * 12/26/93 (seiwald) - bump buf in yylex to 10240 - yuk.
11 * 09/16/94 (seiwald) - check for overflows, unmatched {}'s, etc.
12 * Also handle tokens abutting EOF by remembering
13 * to return EOF now matter how many times yylex()
14 * reinvokes yyline().
15 * 02/11/95 (seiwald) - honor only punctuation keywords if SCAN_PUNCT.
16 * 07/27/95 (seiwald) - Include jamgram.h after scan.h, so that YYSTYPE is
17 * defined before Linux's yacc tries to redefine it.
18 * 01/10/01 (seiwald) - \ can now escape any whitespace char
19 * 11/04/02 (seiwald) - const-ing for string literals
22 # include "jam.h"
23 # include "lists.h"
24 # include "parse.h"
25 # include "scan.h"
26 # include "jamgram.h"
27 # include "jambase.h"
28 # include "newstr.h"
30 struct keyword {
31 const char *word;
32 int type;
33 } keywords[] = {
34 # include "jamgramtab.h"
35 { 0, 0 }
36 } ;
38 struct include {
39 struct include *next; /* next serial include file */
40 const char *string; /* pointer into current line */
41 char **strings; /* for yyfparse() -- text to parse */
42 FILE *file; /* for yyfparse() -- file being read */
43 const char *fname; /* for yyfparse() -- file name */
44 int line; /* line counter for error messages */
45 char buf[ 512 ]; /* for yyfparse() -- line buffer */
46 } ;
48 static struct include *incp = 0; /* current file; head of chain */
50 static int scanmode = SCAN_NORMAL;
51 static int anyerrors = 0;
52 static char *symdump( YYSTYPE *s );
54 # define BIGGEST_TOKEN 10240 /* no single token can be larger */
57 * Set parser mode: normal, string, or keyword
60 void
61 yymode( int n )
63 scanmode = n;
66 void
67 yyerror( const char *s )
69 if( incp )
70 printf( "%s: line %d: ", incp->fname, incp->line );
72 printf( "%s at %s\n", s, symdump( &yylval ) );
74 ++anyerrors;
77 int
78 yyanyerrors()
80 return anyerrors != 0;
83 void
84 yyfparse( const char *s )
86 struct include *i = (struct include *)malloc( sizeof( *i ) );
88 /* Push this onto the incp chain. */
90 i->string = "";
91 i->strings = 0;
92 i->file = 0;
93 i->fname = copystr( s );
94 i->line = 0;
95 i->next = incp;
96 incp = i;
98 /* If the filename is "+", it means use the internal jambase. */
100 if( !strcmp( s, "+" ) )
101 i->strings = jambase;
105 * yyline() - read new line and return first character
107 * Fabricates a continuous stream of characters across include files,
108 * returning EOF at the bitter end.
112 yyline()
114 struct include *i = incp;
116 if( !incp )
117 return EOF;
119 /* Once we start reading from the input stream, we reset the */
120 /* include insertion point so that the next include file becomes */
121 /* the head of the list. */
123 /* If there is more data in this line, return it. */
125 if( *i->string )
126 return *i->string++;
128 /* If we're reading from an internal string list, go to the */
129 /* next string. */
131 if( i->strings )
133 if( !*i->strings )
134 goto next;
136 i->line++;
137 i->string = *(i->strings++);
138 return *i->string++;
141 /* If necessary, open the file */
143 if( !i->file )
145 FILE *f = stdin;
147 if( strcmp( i->fname, "-" ) && !( f = fopen( i->fname, "r" ) ) )
148 perror( i->fname );
150 i->file = f;
153 /* If there's another line in this file, start it. */
155 if( i->file && fgets( i->buf, sizeof( i->buf ), i->file ) )
157 i->line++;
158 i->string = i->buf;
159 return *i->string++;
162 next:
163 /* This include is done. */
164 /* Free it up and return EOF so yyparse() returns to parse_file(). */
166 incp = i->next;
168 /* Close file, free name */
170 if( i->file && i->file != stdin )
171 fclose( i->file );
172 freestr( i->fname );
173 free( (char *)i );
175 return EOF;
179 * yylex() - set yylval to current token; return its type
181 * Macros to move things along:
183 * yychar() - return and advance character; invalid after EOF
184 * yyprev() - back up one character; invalid before yychar()
186 * yychar() returns a continuous stream of characters, until it hits
187 * the EOF of the current include file.
190 # define yychar() ( *incp->string ? *incp->string++ : yyline() )
191 # define yyprev() ( incp->string-- )
194 yylex()
196 int c;
197 char buf[BIGGEST_TOKEN];
198 char *b = buf;
200 if( !incp )
201 goto eof;
203 /* Get first character (whitespace or of token) */
205 c = yychar();
207 if( scanmode == SCAN_STRING )
209 /* If scanning for a string (action's {}'s), look for the */
210 /* closing brace. We handle matching braces, if they match! */
212 int nest = 1;
214 while( c != EOF && b < buf + sizeof( buf ) )
216 if( c == '{' )
217 nest++;
219 if( c == '}' && !--nest )
220 break;
222 *b++ = c;
224 c = yychar();
227 /* We ate the ending brace -- regurgitate it. */
229 if( c != EOF )
230 yyprev();
232 /* Check obvious errors. */
234 if( b == buf + sizeof( buf ) )
236 yyerror( "action block too big" );
237 goto eof;
240 if( nest )
242 yyerror( "unmatched {} in action block" );
243 goto eof;
246 *b = 0;
247 yylval.type = STRING;
248 yylval.string = newstr( buf );
251 else
253 char *b = buf;
254 struct keyword *k;
255 int inquote = 0;
256 int notkeyword;
258 /* Eat white space */
260 for( ;; )
262 /* Skip past white space */
264 while( c != EOF && isspace( c ) )
265 c = yychar();
267 /* Not a comment? Swallow up comment line. */
269 if( c != '#' )
270 break;
271 while( ( c = yychar() ) != EOF && c != '\n' )
275 /* c now points to the first character of a token. */
277 if( c == EOF )
278 goto eof;
280 /* While scanning the word, disqualify it for (expensive) */
281 /* keyword lookup when we can: $anything, "anything", \anything */
283 notkeyword = c == '$';
285 /* look for white space to delimit word */
286 /* "'s get stripped but preserve white space */
287 /* \ protects next character */
289 while(
290 c != EOF &&
291 b < buf + sizeof( buf ) &&
292 ( inquote || !isspace( c ) ) )
294 if( c == '"' )
296 /* begin or end " */
297 inquote = !inquote;
298 notkeyword = 1;
300 else if( c != '\\' )
302 /* normal char */
303 *b++ = c;
305 else if( ( c = yychar()) != EOF )
307 /* \c */
308 *b++ = c;
309 notkeyword = 1;
311 else
313 /* \EOF */
314 break;
317 c = yychar();
320 /* Check obvious errors. */
322 if( b == buf + sizeof( buf ) )
324 yyerror( "string too big" );
325 goto eof;
328 if( inquote )
330 yyerror( "unmatched \" in string" );
331 goto eof;
334 /* We looked ahead a character - back up. */
336 if( c != EOF )
337 yyprev();
339 /* scan token table */
340 /* don't scan if it's obviously not a keyword or if its */
341 /* an alphabetic when were looking for punctuation */
343 *b = 0;
344 yylval.type = ARG;
346 if( !notkeyword && !( isalpha( *buf ) && scanmode == SCAN_PUNCT ) )
348 for( k = keywords; k->word; k++ )
349 if( *buf == *k->word && !strcmp( k->word, buf ) )
351 yylval.type = k->type;
352 yylval.string = k->word; /* used by symdump */
353 break;
357 if( yylval.type == ARG )
358 yylval.string = newstr( buf );
361 if( DEBUG_SCAN )
362 printf( "scan %s\n", symdump( &yylval ) );
364 return yylval.type;
366 eof:
367 yylval.type = EOF;
368 return yylval.type;
371 static char *
372 symdump( YYSTYPE *s )
374 static char buf[ BIGGEST_TOKEN + 20 ];
376 switch( s->type )
378 case EOF:
379 sprintf( buf, "EOF" );
380 break;
381 case 0:
382 sprintf( buf, "unknown symbol %s", s->string );
383 break;
384 case ARG:
385 sprintf( buf, "argument %s", s->string );
386 break;
387 case STRING:
388 sprintf( buf, "string \"%s\"", s->string );
389 break;
390 default:
391 sprintf( buf, "keyword %s", s->string );
392 break;
394 return buf;