initialize repository
[xorcyst.git] / scanner.l
blob0ae8ad45f1f446cd04550f1acbc45ae4ed5a5939
1 %{
2 /*
3  * $Id: scanner.l,v 1.15 2007/11/11 22:35:46 khansen Exp $
4  * $Log: scanner.l,v $
5  * Revision 1.15  2007/11/11 22:35:46  khansen
6  * compile on mac
7  *
8  * Revision 1.14  2007/08/19 11:19:47  khansen
9  * --case-insensitive option
10  *
11  * Revision 1.13  2007/08/12 18:58:49  khansen
12  * ability to generate pure 6502 binary
13  *
14  * Revision 1.12  2007/08/11 01:25:39  khansen
15  * includepaths support (-I option)
16  *
17  * Revision 1.11  2007/08/10 20:21:36  khansen
18  * *** empty log message ***
19  *
20  * Revision 1.10  2007/07/22 13:34:16  khansen
21  * convert tabs to whitespaces
22  *
23  * Revision 1.9  2005/01/09 11:20:31  kenth
24  * xorcyst 1.4.5
25  * BLT, BGE
26  *
27  * Revision 1.8  2004/12/19 19:59:08  kenth
28  * xorcyst 1.4.0
29  *
30  * Revision 1.7  2004/12/16 13:22:08  kenth
31  * added DEFINE parsing
32  *
33  * Revision 1.6  2004/12/14 01:50:33  kenth
34  * xorcyst 1.3.0
35  *
36  * Revision 1.5  2004/12/11 02:12:32  kenth
37  * xorcyst 1.2.0
38  *
39  * Revision 1.4  2004/12/10 22:43:53  kenth
40  * removed FILE_PATH recognition: ".db <a, >b" is erronously parsed otherwise
41  *
42  * Revision 1.3  2004/12/09 11:15:28  kenth
43  * bugfix: close file handle
44  * added: "WARNING", "ERROR" recognition
45  *
46  * Revision 1.2  2004/12/06 05:06:29  kenth
47  * xorcyst 1.1.0
48  *
49  * Revision 1.1  2004/06/30 07:56:59  kenth
50  * Initial revision
51  *
52  */
53 int yyparswap;
54 int yyignorecase;
55 void yyerror(const char *);   /* See below */
56 char *strip_label();    /* See below */
57 const char *maybe_ignorecase(char *);
58 #include "loc.h"
59 #include "astnode.h"
60 #include "parser.h"
61 #include "xasm.h"
62 #include <string.h>
63 #include <ctype.h>
64 /* Each time we match a string, move the end cursor to its end. */
65 #define YY_USER_ACTION  yylloc.last_column += yyleng;
67 whitespace      [ \t]+
68 comment         ";"[^\n]*
69 string_literal      \"[^\n"]*\"
70 binary_literal      [0-1]+
71 octal_literal       0[0-7]*
72 decimal_literal     [1-9][0-9]*
73 hex_literal     [0-9A-F]+
74 hex_literal2        [0-9][0-9A-F]*
75 hex_literal3        0[A-F][0-9A-F]*
76 identifier      [A-Z_][A-Z0-9_]{0,254}
77 local_id        [0-9A-Z_]{1,255}
78 pragma_prefix       "."?
79 label_prefix        ^[ \t]*
82 /* At each yylex invocation, mark the current position as the
83     start of the next token.  */
84 LOCATION_STEP (yylloc);
86 {whitespace}        { LOCATION_STEP (yylloc); }
87 {comment}       { LOCATION_STEP (yylloc); }
88 "%"{binary_literal} { yylval.integer = strtol(&yytext[1], NULL, 2); return(INTEGER_LITERAL); }
89 {binary_literal}"B" { yylval.integer = strtol(yytext, NULL, 2); return(INTEGER_LITERAL); }
90 {octal_literal}     { yylval.integer = strtol(yytext, NULL, 8); return(INTEGER_LITERAL); }
91 {decimal_literal}   { yylval.integer = strtol(yytext, NULL, 10); return(INTEGER_LITERAL); }
92 "0X"{hex_literal}   { yylval.integer = strtol(&yytext[2], NULL, 16); return(INTEGER_LITERAL); }
93 "$"{hex_literal}    { yylval.integer = strtol(&yytext[1], NULL, 16); return(INTEGER_LITERAL); }
94 {hex_literal3}"H"   { yylval.integer = strtol(yytext, NULL, 16); return(INTEGER_LITERAL); }
95 {hex_literal2}"H"   { yylval.integer = strtol(yytext, NULL, 16); return(INTEGER_LITERAL); }
96 "'"."'"         { yylval.integer = yytext[1]; return(INTEGER_LITERAL); }
97 "ADC"           { yylval.mnemonic = ADC_MNEMONIC; return(MNEMONIC); }
98 "AND"           { yylval.mnemonic = AND_MNEMONIC; return(MNEMONIC); }
99 "ASL"           { yylval.mnemonic = ASL_MNEMONIC; return(MNEMONIC); }
100 "BCC"           { yylval.mnemonic = BCC_MNEMONIC; return(MNEMONIC); }
101 "BCS"           { yylval.mnemonic = BCS_MNEMONIC; return(MNEMONIC); }
102 "BLT"           { yylval.mnemonic = BCC_MNEMONIC; return(MNEMONIC); }
103 "BGE"           { yylval.mnemonic = BCS_MNEMONIC; return(MNEMONIC); }
104 "BEQ"           { yylval.mnemonic = BEQ_MNEMONIC; return(MNEMONIC); }
105 "BIT"           { yylval.mnemonic = BIT_MNEMONIC; return(MNEMONIC); }
106 "BMI"           { yylval.mnemonic = BMI_MNEMONIC; return(MNEMONIC); }
107 "BNE"           { yylval.mnemonic = BNE_MNEMONIC; return(MNEMONIC); }
108 "BPL"           { yylval.mnemonic = BPL_MNEMONIC; return(MNEMONIC); }
109 "BRK"           { yylval.mnemonic = BRK_MNEMONIC; return(MNEMONIC); }
110 "BVC"           { yylval.mnemonic = BVC_MNEMONIC; return(MNEMONIC); }
111 "BVS"           { yylval.mnemonic = BVS_MNEMONIC; return(MNEMONIC); }
112 "CLC"           { yylval.mnemonic = CLC_MNEMONIC; return(MNEMONIC); }
113 "CLD"           { yylval.mnemonic = CLD_MNEMONIC; return(MNEMONIC); }
114 "CLI"           { yylval.mnemonic = CLI_MNEMONIC; return(MNEMONIC); }
115 "CLV"           { yylval.mnemonic = CLV_MNEMONIC; return(MNEMONIC); }
116 "CMP"           { yylval.mnemonic = CMP_MNEMONIC; return(MNEMONIC); }
117 "CPX"           { yylval.mnemonic = CPX_MNEMONIC; return(MNEMONIC); }
118 "CPY"           { yylval.mnemonic = CPY_MNEMONIC; return(MNEMONIC); }
119 "DEC"           { yylval.mnemonic = DEC_MNEMONIC; return(MNEMONIC); }
120 "DEX"           { yylval.mnemonic = DEX_MNEMONIC; return(MNEMONIC); }
121 "DEY"           { yylval.mnemonic = DEY_MNEMONIC; return(MNEMONIC); }
122 "EOR"           { yylval.mnemonic = EOR_MNEMONIC; return(MNEMONIC); }
123 "INC"           { yylval.mnemonic = INC_MNEMONIC; return(MNEMONIC); }
124 "INX"           { yylval.mnemonic = INX_MNEMONIC; return(MNEMONIC); }
125 "INY"           { yylval.mnemonic = INY_MNEMONIC; return(MNEMONIC); }
126 "JMP"           { yylval.mnemonic = JMP_MNEMONIC; return(MNEMONIC); }
127 "JSR"           { yylval.mnemonic = JSR_MNEMONIC; return(MNEMONIC); }
128 "LDA"           { yylval.mnemonic = LDA_MNEMONIC; return(MNEMONIC); }
129 "LDX"           { yylval.mnemonic = LDX_MNEMONIC; return(MNEMONIC); }
130 "LDY"           { yylval.mnemonic = LDY_MNEMONIC; return(MNEMONIC); }
131 "LSR"           { yylval.mnemonic = LSR_MNEMONIC; return(MNEMONIC); }
132 "NOP"           { yylval.mnemonic = NOP_MNEMONIC; return(MNEMONIC); }
133 "ORA"           { yylval.mnemonic = ORA_MNEMONIC; return(MNEMONIC); }
134 "PHA"           { yylval.mnemonic = PHA_MNEMONIC; return(MNEMONIC); }
135 "PHP"           { yylval.mnemonic = PHP_MNEMONIC; return(MNEMONIC); }
136 "PLA"           { yylval.mnemonic = PLA_MNEMONIC; return(MNEMONIC); }
137 "PLP"           { yylval.mnemonic = PLP_MNEMONIC; return(MNEMONIC); }
138 "ROL"           { yylval.mnemonic = ROL_MNEMONIC; return(MNEMONIC); }
139 "ROR"           { yylval.mnemonic = ROR_MNEMONIC; return(MNEMONIC); }
140 "RTI"           { yylval.mnemonic = RTI_MNEMONIC; return(MNEMONIC); }
141 "RTS"           { yylval.mnemonic = RTS_MNEMONIC; return(MNEMONIC); }
142 "SBC"           { yylval.mnemonic = SBC_MNEMONIC; return(MNEMONIC); }
143 "SEC"           { yylval.mnemonic = SEC_MNEMONIC; return(MNEMONIC); }
144 "SED"           { yylval.mnemonic = SED_MNEMONIC; return(MNEMONIC); }
145 "SEI"           { yylval.mnemonic = SEI_MNEMONIC; return(MNEMONIC); }
146 "STA"           { yylval.mnemonic = STA_MNEMONIC; return(MNEMONIC); }
147 "STX"           { yylval.mnemonic = STX_MNEMONIC; return(MNEMONIC); }
148 "STY"           { yylval.mnemonic = STY_MNEMONIC; return(MNEMONIC); }
149 "TAX"           { yylval.mnemonic = TAX_MNEMONIC; return(MNEMONIC); }
150 "TAY"           { yylval.mnemonic = TAY_MNEMONIC; return(MNEMONIC); }
151 "TSX"           { yylval.mnemonic = TSX_MNEMONIC; return(MNEMONIC); }
152 "TXA"           { yylval.mnemonic = TXA_MNEMONIC; return(MNEMONIC); }
153 "TXS"           { yylval.mnemonic = TXS_MNEMONIC; return(MNEMONIC); }
154 "TYA"           { yylval.mnemonic = TYA_MNEMONIC; return(MNEMONIC); }
155 "X"         { return('X'); }
156 "Y"         { return('Y'); }
157 "A"         { return('A'); }
158 "MASK"          { return(MASK); }
159 "SIZEOF"        { return(SIZEOF); }
160 {pragma_prefix}"DATA"   { return(DATASEG); }
161 {pragma_prefix}"CODE"   { return(CODESEG); }
162 {pragma_prefix}"DATASEG" { return(DATASEG); }
163 {pragma_prefix}"CODESEG" { return(CODESEG); }
164 {pragma_prefix}"IF" { return(IF); }
165 {pragma_prefix}"IFDEF"  { return(IFDEF); }
166 {pragma_prefix}"IFNDEF" { return(IFNDEF); }
167 {pragma_prefix}"ELSE"   { return(ELSE); }
168 {pragma_prefix}"ELIF"   { return(ELIF); }
169 {pragma_prefix}"ENDIF"  { return(ENDIF); }
170 {pragma_prefix}"MACRO"  { return(MACRO); }
171 {pragma_prefix}"END"    { return(END); }
172 {pragma_prefix}"ENDE"   { return(ENDE); }
173 {pragma_prefix}"ENDM"   { return(ENDM); }
174 {pragma_prefix}"ENDP"   { return(ENDP); }
175 {pragma_prefix}"ENDS"   { return(ENDS); }
176 {pragma_prefix}"EQU"    { return(EQU); }
177 {pragma_prefix}"DEFINE" { return(DEFINE); }
178 {pragma_prefix}"INCLUDE" { return(INCSRC); }
179 {pragma_prefix}"INCSRC" { return(INCSRC); }
180 {pragma_prefix}"INCBIN" { return(INCBIN); }
181 {pragma_prefix}"ALIGN"  { return(ALIGN); }
182 {pragma_prefix}"PUBLIC" { return(PUBLIC); }
183 {pragma_prefix}"EXTRN"  { return(EXTRN); }
184 {pragma_prefix}"ZEROPAGE" { return(ZEROPAGE); }
185 {pragma_prefix}"CHARMAP" { return(CHARMAP); }
186 {pragma_prefix}"STRUC"  { return(STRUC); }
187 {pragma_prefix}"UNION"  { return(UNION); }
188 {pragma_prefix}"RECORD" { return(RECORD); }
189 {pragma_prefix}"ENUM"   { return(ENUM); }
190 {pragma_prefix}"PROC"   { return(PROC); }
191 {pragma_prefix}"REPT"   { return(REPT); }
192 {pragma_prefix}"TAG"    { return(TAG); }
193 {pragma_prefix}"TYPE"   { return(TAG); }
194 {pragma_prefix}"LABEL"  { return(_LABEL_); }
195 {pragma_prefix}"MESSAGE" { return(MESSAGE); }
196 {pragma_prefix}"WARNING" { return(WARNING); }
197 {pragma_prefix}"ERROR"  { return(ERROR); }
198 {pragma_prefix}"WHILE"  { return(WHILE); }
199 {pragma_prefix}"PAD"    { return(DSB); }
200 {pragma_prefix}"DB" { return(BYTE); }
201 {pragma_prefix}"BYTE"   { return(BYTE); }
202 {pragma_prefix}"CHAR"   { return(CHAR); }
203 {pragma_prefix}"ASC"    { return(CHAR); }
204 {pragma_prefix}"DW" { return(WORD); }
205 {pragma_prefix}"WORD"   { return(WORD); }
206 {pragma_prefix}"DD" { return(DWORD); }
207 {pragma_prefix}"DWORD"  { return(DWORD); }
208 {pragma_prefix}"DSB"    { return(DSB); }
209 {pragma_prefix}"DSW"    { return(DSW); }
210 {pragma_prefix}"DSD"    { return(DSD); }
211 {pragma_prefix}"ORG"    { return(ORG); }
212 {label_prefix}{identifier}":" { yylval.label = maybe_ignorecase(strip_label()); return(LABEL); }
213 {label_prefix}"@@"{local_id}":" { yylval.label = maybe_ignorecase(strip_label()); return(LOCAL_LABEL); }
214 "@@"{local_id}      { yylval.ident = maybe_ignorecase(yytext); return(LOCAL_ID); }
215 {identifier}        { yylval.ident = maybe_ignorecase(yytext); return(IDENTIFIER); }
216 {string_literal}    { yytext[yyleng-1] = '\0'; yylval.string = &yytext[1]; return(STRING_LITERAL); }
217 "+"{2,8}        { yylval.ident = yytext; return(FORWARD_BRANCH); }
218 "-"{2,8}        { yylval.ident = yytext; return(BACKWARD_BRANCH); }
219 "::"            { return(SCOPE_OP); }
220 "."         { return('.'); }
221 \n          { LOCATION_LINES (yylloc, yyleng); LOCATION_STEP (yylloc); return('\n'); }
222 ","         { return(','); }
223 ":"         { return(':'); }
224 "#"         { return('#'); }
225 "="         { return('='); }
226 "("         { return( yyparswap ? '[' : '(' ); }
227 ")"         { return( yyparswap ? ']' : ')' ); }
228 "["         { return( yyparswap ? '(' : '[' ); }
229 "]"         { return( yyparswap ? ')' : ']'); }
230 "{"         { return('{'); }
231 "}"         { return('}'); }
232 "&"         { return('&'); }
233 "!"         { return('!'); }
234 "~"         { return('~'); }
235 "-"         { return('-'); }
236 "+"         { return('+'); }
237 "*"         { return('*'); }
238 "/"         { return('/'); }
239 "%"         { return('%'); }
240 "<"         { return('<'); }
241 ">"         { return('>'); }
242 "^"         { return('^'); }
243 "|"         { return('|'); }
244 ">>"                    { return(SHR_OP); }
245 "<<"                    { return(SHL_OP); }
246 "<="                    { return(LE_OP); }
247 ">="                    { return(GE_OP); }
248 "=="                    { return(EQ_OP); }
249 "!="                    { return(NE_OP); }
250 "$"         { return('$'); }
251 "@"         { return('@'); }
252 .           { yyerror("Skipping invalid character(s)"); LOCATION_STEP (yylloc); }
255  * Describes a file stack record.
256  * It holds name, location and buffer state for it.
257  */
258 typedef struct tag_file_stack_rec {
259     const char *name; /* Name of the file */
260     YY_BUFFER_STATE buf;    /* flex input buffer */
261     YYLTYPE loc;    /* Current position in file */
262     FILE *fp;   /* File handle */
263 } file_stack_rec;
265 /* Max. nesting depth */
266 #define FILE_STACK_SIZE 32
268 /* Stack of files */
269 static file_stack_rec file_stack[FILE_STACK_SIZE];
271 /* File stack pointer */
272 static int file_stack_ptr;
274 /* Macro to access top of file stack */
275 #define FTOS() file_stack[file_stack_ptr]
278  * Resets the location described by yylloc.
279  */
280 void yyresetloc(void)
282     LOCATION_RESET(yylloc);
286  * Function called by lexer upon EOF(yyin).
287  */
288 int yywrap()
290     /* Discard of buffer that has been scanned */
291     yy_delete_buffer(YY_CURRENT_BUFFER);
292     /* Close current file */
293     fclose(FTOS().fp);
294     /* Resume scanning of previous buffer, if any */
295     if (file_stack_ptr > 0) {
296         /* Pop stack */
297         file_stack_ptr--;
298         /* Restore location */
299         yylloc = FTOS().loc;
300         /* Resume scanning from this buffer */
301         yy_switch_to_buffer(FTOS().buf);
302         /* Not end of all input. */
303         return(0);
304     }
305     else {
306         /* We have reached end of all input. */
307         return(1);
308     }
312  * Opens a file and resumes scanning from there.
313  * The current file's status is pushed on stack.
314  * @param filename Name of new file to start scanning
315  * @return error code (0, 1 or 2)
316  */
317 int yypushandrestart(const char *filename)
319     FILE *fp;
320     /* Check for stack overflow */
321     if (file_stack_ptr == FILE_STACK_SIZE-1) {
322         /* Error, stack overflow */
323         return 2;
324     }
325     /* Attempt to open file */
326     fp = fopen(filename, "rt");
327     if (!fp && (filename[0] != '/')) {
328         /* Try search paths */
329         int i;
330         for (i = 0; i < xasm_args.include_path_count; ++i) {
331             const char *include_path = xasm_args.include_paths[i];
332             char *path = (char *)malloc(
333                 strlen(include_path) + strlen("/") + strlen(filename) + 1);
334             strcpy(path, include_path);
335             strcat(path, "/");
336             strcat(path, filename);
337             fp = fopen(path, "rt");
338             free(path);
339             if (fp)
340                 break;
341         }
342     }
343     if (fp) {
344         /* Save position in current file */
345         FTOS().loc = yylloc;
346         /* Push the new file */
347         file_stack_ptr++;
348         FTOS().name = filename;
349         FTOS().buf = yy_create_buffer(fp, YY_BUF_SIZE);
350         FTOS().fp = fp;
351         /* Reset file location */
352         yyresetloc();
353         /* Resume scanning from the new buffer */
354         yy_switch_to_buffer(FTOS().buf);
355         /* Success */
356         return 0;
357     }
358     else {
359         /* Error, couldn't open file */
360         return 1;
361     }
365  * Initializes the lexer, tells it to start tokenizing from the given file.
366  * @param filename Name of file where scanning is initiated
367  * @param swap_parens Use ( ) for indirection if 1, [ ] otherwise
368  * @return 1 if success, 0 otherwise
369  */
370 int yybegin(const char *filename, int swap_parens, int ignore_case)
372     FILE *fp;
373     yyparswap = swap_parens;
374     yyignorecase = ignore_case;
375     /* Attempt to open the given file */
376     fp = fopen(filename, "rt");
377     if (fp) {
378         /* Init stack */
379         file_stack_ptr = 0;
380         FTOS().name = filename;
381         FTOS().buf = yy_create_buffer(fp, YY_BUF_SIZE);
382         FTOS().fp = fp;
383         /* Reset file location */
384         yyresetloc();
385         /* Use the new buffer */
386         yy_switch_to_buffer(FTOS().buf);
387         /* Success */
388         return 1;
389     }
390     else {
391         /* Couldn't open file */
392         return 0;
393     }
397  * Gets the name of the file that is currently being tokenized.
398  */
399 const char *yy_current_filename()
401     return FTOS().name;
405  * Called by the parser to report an error during parsing.
406  * @param s Error message
407  */
408 void yyerror(const char *s) {
409     int i;
410     /* Print line of form: file:pos:message */
411     fprintf(stderr, "%s:", yy_current_filename());
412     LOCATION_PRINT(stderr, yylloc);
413     fprintf(stderr, ": %s\n", s);
414     /* Print include-trace */
415     for (i=file_stack_ptr-1; i>=0; i--) {
416         fprintf(stderr, "(%s:", file_stack[i].name);
417         LOCATION_PRINT(stderr, file_stack[i].loc);
418         fprintf(stderr, ")\n");
419     }
423  * Call when yytext is a label to remove whitespace and punctuation.
424  */
425 char *strip_label()
427     int start;
428     /* Kill the ':' at the end */
429     yytext[strlen(yytext)-1] = 0;
430     /* Skip the whitespace, if any */
431     start = 0;
432     while ((yytext[start] == ' ') || (yytext[start] == '\t')) start++;
433     return &yytext[start];
436 static char *__strupr(char *str)
438     char *p = str;
439     while (*p) {
440         *p = toupper(*p);
441         ++p;
442     }
443     return str;
446 const char *maybe_ignorecase(char *str)
448     if (yyignorecase)
449         __strupr(str);
450     return str;