5 * Copyright (C) 2007, The Perl Foundation.
9 * This pre-processor handles the macro layer of the
10 * PIR language. The following constructs are filtered
29 #define YY_NO_UNISTD_H
30 #include "macroparser.h"
32 #define YY_EXTRA_TYPE struct lexer_state *
35 extern macro_def *find_macro(constant_table *table, char *name);
36 extern char *concat(char *str1, char *str2);
42 The C89 standard does not define a strdup() in the C library,
43 so define our own strdup. Function names beginning with "str"
44 are reserved (I think), so make it dupstr, as that is what it
45 does: duplicate a string.
49 dupstr(char const * const source) {
50 char *newstring = (char *)calloc(strlen(source) + 1, sizeof (char));
52 strcpy(newstring, source);
58 See dupstr, except that this version takes the number of characters to be
59 copied. Easy for copying a string except the quotes.
63 dupstrn(char const * const source, size_t num_chars) {
64 char *newstring = (char *)calloc(num_chars + 1, sizeof (char));
66 /* only copy num_chars characters */
67 strncpy(newstring, source, num_chars);
73 Update location information. Handling of heredocs is buggy w.r.t. line no.
78 update_location(char *scanned_string, lexer_state *lexer) {
79 char *iter = scanned_string;
80 while (*iter != '\0') {
89 Use these on a big file with macros, and check out whether there are unused rules.
91 int ctr[YY_NUM_RULES];
92 int num_rules = YY_NUM_RULES;
95 /* after a rule is matched, execute this block of code to keep track of the line no. */
96 #define YY_USER_ACTION { ++ctr[yy_act]; \
97 update_location(yytext, yyget_extra(yyscanner)); \
102 static int is_pir_directive(char *name);
109 %option never-interactive
113 %option prefix="macro"
115 %option noyy_top_state
116 %option outfile="macrolexer.c"
117 %option header-file="macrolexer.h"
137 ALNUM {ALPHA}|{DIGIT}
139 IDENT {ALPHA}{ALNUM}*
142 HEX 0[xX][0-9A-Fa-f]+
149 BIGINT {SIGN}?{DIGITS}"L"
150 FLOATNUM {SIGN}?(({DIGITS}{DOT}{DIGIT}*|{DOT}{DIGITS})([eE]{SIGN}?{DIGITS})?|{DIGITS}[eE]{SIGN}?{DIGITS})
153 DQ_STRING \"(\\.|[^"\\\n])*\"
154 SQ_STRING \'[^'\n]*\'
155 Q_STRING {SQ_STRING}|{DQ_STRING}
157 NEWLINE {EOL}({WS}|{EOL})*
159 REG "$"?[SNIP]{DIGITS}
165 <EXPAND>{NEWLINE} { /**/
166 yy_pop_state(yyscanner);
167 yylval->sval = dupstr("\n");
171 <MACROPAR>{NEWLINE} { /* after macro header start the macro body */
172 yy_push_state(MACROBODY, yyscanner);
173 yylval->sval = dupstr("\n");
177 <MACROBODY>{NEWLINE} { /* return newlines in macro body as any tokens */
178 yylval->sval = dupstr("\n");
182 <CONSTDEF>{NEWLINE} { yy_pop_state(yyscanner);
187 <LINE>{NEWLINE} { yy_pop_state(yyscanner);
192 <BRACEDARGS>{NEWLINE} { yylval->sval = "\n"; return TK_ANY; }
196 yy_push_state(LINE, yyscanner);
201 yy_push_state(INCLUDE, yyscanner);
205 ".macro_const" { /* */
206 yy_push_state(CONSTDEF, yyscanner);
207 return TK_MACRO_CONST;
211 yy_push_state(MACROPAR, yyscanner);
215 <MACROBODY>".macro_label" { /* */
216 return TK_MACRO_LABEL;
219 <MACROBODY>"$"{IDENT}":" { /* unique label declaration using macro parameter */
220 yylval->sval = dupstr(yytext);
224 <MACROBODY>{IDENT}":" { /* unique label declaration, add special marker. */
225 yylval->sval = (char *)calloc(yyleng + 1 + 1, sizeof (char));
226 /* stick a special marker "@" so we can recognize this as a label that
229 strncpy(yylval->sval, yytext, yyleng - 1);
230 strcpy(yylval->sval + yyleng - 1, "@:");
234 <MACROBODY>".$"{IDENT} { /* referring to a declared label or local id. */
235 yylval->sval = dupstr(yytext );
239 <MACROBODY>".macro_local" { /* unique local declaration */
240 yy_push_state(MACROLOCAL, yyscanner);
241 return TK_MACRO_LOCAL;
244 <MACROLOCAL>"int" { yylval->sval = dupstr("int");
248 <MACROLOCAL>"num" { yylval->sval = dupstr("num");
252 <MACROLOCAL>"pmc" { yylval->sval = dupstr("pmc");
255 <MACROLOCAL>"string" { yylval->sval = dupstr("string");
259 <MACROLOCAL>"$"{IDENT} { /* unique local id declaration */
260 yylval->sval = dupstr(yytext);
261 yy_pop_state(yyscanner);
265 <MACROLOCAL>{IDENT} { /* unique local id declaration -- non-parameter */
266 yylval->sval = (char *)calloc(yyleng + 1 + 1, sizeof (char));
267 strcpy(yylval->sval, yytext);
268 /* add special marker that this token needs munging later */
269 yylval->sval[yyleng] = '@';
270 yy_pop_state(yyscanner);
274 <MACROBODY>".endm" { /* when reading ".endm" while scanning macro body, go back to previous state */
276 yy_pop_state(yyscanner);
277 yy_pop_state(yyscanner);
282 <MACROPAR>"(" { /* recognize "(" when scanning macro parameter list */
286 <MACROPAR>"," { /* recognize "," when scanning macro parameter list */
290 <MACROPAR>")" { /* recognize ")" when scanning a macro parameter list */
295 <EXPAND>"(" { /* recognize "(" when expanding a macro */
296 yy_push_state(EXPARGS, yyscanner);
301 <EXPAND>[^)] { /* anything except a ")" in this state means a constant expansion, which
302 * has no arguments. This works, because if we did see a '(', the
303 * state EXPARGS is activated.
305 yylval->sval = dupstr(yytext);
306 yy_pop_state(yyscanner);
312 yylval->sval = dupstr(yytext);
321 yy_pop_state(yyscanner);
325 <EXPARGS>"{" { /* braced argument */
326 yy_push_state(BRACEDARGS, yyscanner);
330 <BRACEDARGS>[\ ] { yylval->sval = dupstr(" "); return TK_ANY; }
333 yylval->sval = dupstr(yytext);
338 yy_pop_state(yyscanner);
342 <EXPARGS>{IDENT} { /* variable argument of macro expansion */
343 yylval->sval = dupstr(yytext);
347 <EXPARGS>"."{IDENT} { /* .foo(.bar) */
348 lexer_state *lexer = yyget_extra(yyscanner);
349 macro_def *macro = find_macro(lexer->globaldefinitions, dupstr(yytext + 1));
352 yylval->sval = macro->body;
356 fprintf(stderr, "Error: cannot find expansion for %s\n", yytext + 1);
363 <EXPARGS>{REG} { /* register can be an argument for macro expansion. */
364 yylval->sval = dupstr(yytext);
368 <CONSTDEF,MACROPAR>{IDENT} { /* constant or macro parameter ID */
369 yylval->sval = dupstr(yytext);
373 "."{IDENT} { /* directive, constant, macro or method-call */
374 if (is_pir_directive(yytext)) {
375 yylval->sval = dupstr(yytext);
378 else { /* not a directive */
379 lexer_state *lexer = yyget_extra(yyscanner);
380 macro_def *macro = find_macro(lexer->globaldefinitions, yytext + 1);
381 if (macro != NULL) { /* it's a macro! */
382 /* only go into EXPAND state if it's a macro/constant */
383 yy_push_state(EXPAND, yyscanner);
384 yylval->mval = macro; /* pass the pointer to the macro_def */
387 else { /* it's not a macro, just a method-call, but if it was supposed
388 to be a macro expansion, we will only see the error in the pir
391 yylval->sval = dupstr(yytext);
397 "$"{IDENT}":" { /* */
398 lexer_state *lexer = yyget_extra(yyscanner);
399 macro_def *macro = find_macro(lexer->globaldefinitions, /* skip "$" and ":" */
400 dupstrn(yytext + 1, yyleng - 2));
402 if (macro != NULL) { /* found the parameter */
403 yylval->sval = macro->body; /* expand the macro-parameter */
404 return TK_LABEL_EXPANSION;
407 fprintf(stderr, "Error: cannot find expansion for '%s'\n", yytext);
412 {IDENT}"@:" { /* this label was declared unique by ".macro_label foo:" but was munged so we
413 can recognize it now, and return a signal to the parser it needs to
414 munge it into a unique ID.
416 yylval->sval = dupstrn(yytext, yyleng - 2);
417 return TK_UNIQUE_LABEL;
420 {IDENT}"@" { /* this local was declared unique by ".macro_local", but was munged so we can
423 yylval->sval = dupstrn(yytext, yyleng - 1);
424 return TK_UNIQUE_LOCAL;
427 ".$"{IDENT} { /* expanding a declared variable (local or label) */
428 lexer_state *lexer = yyget_extra(yyscanner);
429 macro_def *macro = find_macro(lexer->globaldefinitions, yytext + 2);
431 if (macro != NULL) { /**/
432 yylval->sval = macro->body;
433 return TK_VAR_EXPANSION;
436 fprintf(stderr, "Error: cannot find expansion for symbol '%s'\n", yytext);
442 lexer_state *lexer = yyget_extra(yyscanner);
443 macro_def *macro = find_macro(lexer->globaldefinitions, yytext + 1);
444 if (macro != NULL) { /* */
445 yylval->sval = macro->body;
446 return TK_VAR_EXPANSION;
449 fprintf(stderr, "Error: cannot find expansion for parameter '%s'\n", yytext + 1);
456 "."({Q_STRING}|{REG}) {/* treat ".'foo'" or ".$P0" as 1 token to prevent the tokens are separated
457 * by a space; they clearly belong to each other if they were written
458 * together (without a space). This is a method-call, life foo .'bar'().
460 yylval->sval = dupstr(yytext);
464 <LINE>"," { return ','; }
468 <LINE>{DIGITS} { /* */
469 yylval->sval = dupstr(yytext);
475 <LINE>{Q_STRING} { /* */
476 yylval->sval = dupstr(yytext);
480 <*>"," { /* in all other cases (than the above), treat a comma just as any token. */
481 yylval->sval = dupstr(yytext);
486 <CONSTDEF>{REG} { /**/
487 yylval->sval = dupstr(yytext);
490 <CONSTDEF>{Q_STRING} { yylval->sval = dupstr(yytext);
494 <CONSTDEF>{SIGN}?{DIGITS} { yylval->sval = dupstr(yytext);
498 <CONSTDEF>({HEX}|{BIN}|{OCT}) { yylval->sval = dupstr(yytext);
502 <CONSTDEF>{FLOATNUM} { yylval->sval = dupstr(yytext);
506 <INCLUDE>{Q_STRING} { /* a quoted string is needed for an .include or the value of a
507 * macro constant. After this token, leave the current state.
509 yylval->sval = dupstr(yytext);
510 yy_pop_state(yyscanner);
514 <EXPARGS>{Q_STRING} { /* quoted strings argument for macro expansion */
515 yylval->sval = dupstr(yytext);
519 <EXPARGS>{SIGN}?{DIGITS} { yylval->sval = dupstr(yytext);
522 <EXPARGS>{HEX} { yylval->sval = dupstr(yytext);
525 <EXPARGS>{BIN} { yylval->sval = dupstr(yytext);
528 <EXPARGS>{OCT} { yylval->sval = dupstr(yytext);
531 <EXPARGS>{FLOATNUM} { yylval->sval = dupstr(yytext);
535 <INITIAL,MACROBODY>{REG} { /* register */
536 yylval->sval = dupstr(yytext);
540 <INITIAL,MACROBODY>{Q_STRING} { /* quoted string */
541 yylval->sval = dupstr(yytext);
545 <INITIAL,MACROBODY>{IDENT} { /* identifier */
546 yylval->sval = dupstr(yytext);
552 <INITIAL,MACROBODY>":"{IDENT} { /* flag */
553 yylval->sval = dupstr(yytext);
557 <MACROBODY>"."{IDENT} { /* expansions in a macro body; ignore for now. */
558 yylval->sval = dupstr(yytext);
562 <INITIAL,MACROBODY>{FLOATNUM} { yylval->sval = dupstr(yytext);
566 <INITIAL,MACROBODY>{SIGN}?{DIGITS} { yylval->sval = dupstr(yytext);
570 <INITIAL,MACROBODY>({HEX}|{BIN}|{OCT}) { yylval->sval = dupstr(yytext);
577 <INITIAL,MACROBODY>{IDENT}":" { /* normal label */
578 yylval->sval = dupstr(yytext);
582 <*>"=="|"!="|"<="|"=>"|">="|">>"|"<<"|">>>" { yylval->sval = dupstr(yytext);
586 <*>"+="|"-="|"*="|"/="|".="|">>="|"<<="|">>>="|"//=" { yylval->sval = dupstr(yytext);
590 <*>"%="|"|="|"&="|"**="|"~="|"||"|"&&"|"~~"|"//" { yylval->sval = dupstr(yytext);
594 <<EOF>> { /* when end of file or end of string buffer, stop scanning. */
599 <*>{WS} { /* skip whitespace */ }
601 <*>{NEWLINE} { /* in all other states, return newline as the newline token */
602 yylval->sval = dupstr("\n");
606 <*>. { /* just return any single character token we didn't match before. */
607 yylval->sval = dupstr(yytext);
620 =item C<is_pir_directive>
622 Returns a non-zero value if the specified name is a PIR directive.
626 is_pir_directive(char *name) {
628 /* maybe make this a hash or at least a binary search.
629 * Or, make these "special" macros, and have them expand
630 * to their own spelling. This would remove the need
631 * for special code, such as this.
633 static char * const directives[] = {
659 NULL /* needed to easily write loops on this array */
662 /* iter is a pointer to constant "char *" (strings). */
663 char * const *iter = directives;
665 while (*iter != NULL) {
666 if (strcmp(*iter, name) == 0) {
686 * c-file-style: "parrot"
688 * vim: expandtab shiftwidth=4: