tagged release 0.6.4
[parrot.git] / compilers / pirc / macro / macro.l
blobb968a1842529c841545b434b8a6856e54bbe1804
1 %{
3 /*
4  * $Id$
5  * Copyright (C) 2007, The Perl Foundation.
6  */
8 /*
9  * This pre-processor handles the macro layer of the
10  * PIR language. The following constructs are filtered
11  * out and handled:
12  *
13  * .include
14  * .macro
15  * .macro_local
16  * .macro_label
17  * .macro_const
18  *
19  */
22 #include <stdio.h>
23 #include <string.h>
24 #include <assert.h>
25 #include <stdlib.h>
26 #include "macro.h"
27 #include "lexer.h"
29 #define  YY_NO_UNISTD_H
30 #include "macroparser.h"
32 #define YY_EXTRA_TYPE  struct lexer_state *
35 extern macro_def *find_macro(constant_table *table, char *name);
36 extern char *concat(char *str1, char *str2);
42 The C89 standard does not define a strdup() in the C library,
43 so define our own strdup. Function names beginning with "str"
44 are reserved (I think), so make it dupstr, as that is what it
45 does: duplicate a string.
48 char *
49 dupstr(char const * const source) {
50     char *newstring = (char *)calloc(strlen(source) + 1, sizeof (char));
51     assert(newstring);
52     strcpy(newstring, source);
53     return newstring;
58 See dupstr, except that this version takes the number of characters to be
59 copied. Easy for copying a string except the quotes.
62 char *
63 dupstrn(char const * const source, size_t num_chars) {
64     char *newstring = (char *)calloc(num_chars + 1, sizeof (char));
65     assert(newstring);
66     /* only copy num_chars characters */
67     strncpy(newstring, source, num_chars);
68     return newstring;
73 Update location information. Handling of heredocs is buggy w.r.t. line no.
74 Fix this routine.
77 static void
78 update_location(char *scanned_string, lexer_state *lexer) {
79     char *iter = scanned_string;
80     while (*iter != '\0') {
81         if (*iter == '\n')
82             lexer->line++;
83         iter++;
84     }
89 Use these on a big file with macros, and check out whether there are unused rules.
91 int ctr[YY_NUM_RULES];
92 int num_rules = YY_NUM_RULES;
95 /* after a rule is matched, execute this block of code to keep track of the line no. */
96 #define YY_USER_ACTION   { ++ctr[yy_act];             \
97                            update_location(yytext, yyget_extra(yyscanner)); \
98                          }
102 static int is_pir_directive(char *name);
106 %option reentrant
107 %option noyywrap
108 %option bison-bridge
109 %option never-interactive
110 %option nounput
111 %option stack
112 %option debug
113 %option prefix="macro"
114 %option warn
115 %option noyy_top_state
116 %option outfile="macrolexer.c"
117 %option header-file="macrolexer.h"
120 %x MACRODEF
121 %x CONSTDEF
122 %x INCLUDE
123 %x MACROPAR
124 %x MACROBODY
125 %x EXPAND
126 %x LINE
127 %x MACROLOCAL
128 %x BRACEDARGS
129 %s EXPARGS
134 ALPHA          [a-zA-Z@_]
135 DIGIT          [0-9]
136 DIGITS         {DIGIT}+
137 ALNUM          {ALPHA}|{DIGIT}
139 IDENT          {ALPHA}{ALNUM}*
141 DOT            [.]
142 HEX            0[xX][0-9A-Fa-f]+
143 OCT            0[oO][0-7]+
144 BIN            0[bB][01]+
145 WS             [\t\f\r\x1a ]
146 EOL            \r?\n
148 SIGN           [-+]
149 BIGINT         {SIGN}?{DIGITS}"L"
150 FLOATNUM       {SIGN}?(({DIGITS}{DOT}{DIGIT}*|{DOT}{DIGITS})([eE]{SIGN}?{DIGITS})?|{DIGITS}[eE]{SIGN}?{DIGITS})
153 DQ_STRING       \"(\\.|[^"\\\n])*\"
154 SQ_STRING       \'[^'\n]*\'
155 Q_STRING       {SQ_STRING}|{DQ_STRING}
157 NEWLINE        {EOL}({WS}|{EOL})*
159 REG            "$"?[SNIP]{DIGITS}
165 <EXPAND>{NEWLINE}   { /**/
166                       yy_pop_state(yyscanner);
167                       yylval->sval = dupstr("\n");
168                       return TK_NL;
169                     }
171 <MACROPAR>{NEWLINE} { /* after macro header start the macro body */
172                       yy_push_state(MACROBODY, yyscanner);
173                       yylval->sval = dupstr("\n");
174                       return TK_NL;
175                     }
177 <MACROBODY>{NEWLINE} { /* return newlines in macro body as any tokens */
178                        yylval->sval = dupstr("\n");
179                        return TK_ANY;
180                      }
182 <CONSTDEF>{NEWLINE} { yy_pop_state(yyscanner);
183                       yylval->sval = "\n";
184                       return TK_NL;
185                     }
187 <LINE>{NEWLINE}     { yy_pop_state(yyscanner);
188                       yylval->sval = "\n";
189                       return TK_NL;
190                     }
192 <BRACEDARGS>{NEWLINE} { yylval->sval = "\n"; return TK_ANY; }
195 ".line"             { /* */
196                       yy_push_state(LINE, yyscanner);
197                       return TK_LINE;
198                     }
200 ".include"          { /* */
201                       yy_push_state(INCLUDE, yyscanner);
202                       return TK_INCLUDE;
203                     }
205 ".macro_const"      { /* */
206                       yy_push_state(CONSTDEF, yyscanner);
207                       return TK_MACRO_CONST;
208                     }
210 ".macro"            { /* */
211                       yy_push_state(MACROPAR, yyscanner);
212                       return TK_MACRO;
213                     }
215 <MACROBODY>".macro_label" { /* */
216                             return TK_MACRO_LABEL;
217                           }
219 <MACROBODY>"$"{IDENT}":"  { /* unique label declaration using macro parameter */
220                             yylval->sval = dupstr(yytext);
221                             return TK_LABEL_ID;
222                           }
224 <MACROBODY>{IDENT}":"     { /* unique label declaration, add special marker. */
225                             yylval->sval = (char *)calloc(yyleng + 1 + 1, sizeof (char));
226                             /* stick a special marker "@" so we can recognize this as a label that
227                              * must be munged.
228                              */
229                             strncpy(yylval->sval, yytext, yyleng - 1);
230                             strcpy(yylval->sval + yyleng - 1, "@:");
231                             return TK_LABEL_ID;
232                           }
234 <MACROBODY>".$"{IDENT}    { /* referring to a declared label or local id. */
235                             yylval->sval = dupstr(yytext );
236                             return TK_ANY;
237                           }
239 <MACROBODY>".macro_local" { /* unique local declaration */
240                             yy_push_state(MACROLOCAL, yyscanner);
241                             return TK_MACRO_LOCAL;
242                           }
244 <MACROLOCAL>"int"         { yylval->sval = dupstr("int");
245                             return TK_INT;
246                           }
248 <MACROLOCAL>"num"         { yylval->sval = dupstr("num");
249                             return TK_NUM;
250                           }
252 <MACROLOCAL>"pmc"         { yylval->sval = dupstr("pmc");
253                             return TK_PMC;
254                           }
255 <MACROLOCAL>"string"      { yylval->sval = dupstr("string");
256                             return TK_STRING;
257                           }
259 <MACROLOCAL>"$"{IDENT}     { /* unique local id declaration */
260                             yylval->sval = dupstr(yytext);
261                             yy_pop_state(yyscanner);
262                             return TK_LOCAL_ID;
263                           }
265 <MACROLOCAL>{IDENT}       { /* unique local id declaration -- non-parameter */
266                             yylval->sval = (char *)calloc(yyleng + 1 + 1, sizeof (char));
267                             strcpy(yylval->sval, yytext);
268                             /* add special marker that this token needs munging later */
269                             yylval->sval[yyleng] = '@';
270                             yy_pop_state(yyscanner);
271                             return TK_LOCAL_ID;
272                           }
274 <MACROBODY>".endm" { /* when reading ".endm" while scanning macro body, go back to previous state */
276                      yy_pop_state(yyscanner);
277                      yy_pop_state(yyscanner);
279                      return TK_ENDM;
280                    }
282 <MACROPAR>"("      { /* recognize "(" when scanning macro parameter list */
283                      return '(';
284                    }
286 <MACROPAR>","      { /* recognize "," when scanning macro parameter list */
287                      return ',';
288                    }
290 <MACROPAR>")"      { /* recognize ")" when scanning a macro parameter list */
291                      return ')';
292                    }
295 <EXPAND>"("        { /* recognize "(" when expanding a macro */
296                      yy_push_state(EXPARGS, yyscanner);
297                      return '(';
298                    }
301 <EXPAND>[^)]       { /* anything except a ")" in this state means a constant expansion, which
302                       * has no arguments. This works, because if we did see a '(', the
303                       * state EXPARGS is activated.
304                       */
305                      yylval->sval = dupstr(yytext);
306                      yy_pop_state(yyscanner);
307                      return TK_ANY;
308                    }
311 <INITIAL>"("      { /* */
312                     yylval->sval = dupstr(yytext);
313                     return TK_ANY;
314                   }
316 <EXPARGS>","       { /* */
317                      return ',';
318                    }
320 <EXPARGS>")"       { /* */
321                      yy_pop_state(yyscanner);
322                      return ')';
323                    }
325 <EXPARGS>"{"       { /* braced argument */
326                      yy_push_state(BRACEDARGS, yyscanner);
327                      return '{';
328                    }
330 <BRACEDARGS>[\ ]   { yylval->sval = dupstr(" "); return TK_ANY; }
332 <BRACEDARGS>[^}]   {
333                      yylval->sval = dupstr(yytext);
334                      return TK_ANY;
335                    }
337 <BRACEDARGS>"}"    {
338                      yy_pop_state(yyscanner);
339                      return '}';
340                    }
342 <EXPARGS>{IDENT}   { /* variable argument of macro expansion */
343                      yylval->sval = dupstr(yytext);
344                      return TK_IDENT;
345                    }
347 <EXPARGS>"."{IDENT} { /* .foo(.bar) */
348                       lexer_state *lexer = yyget_extra(yyscanner);
349                       macro_def *macro = find_macro(lexer->globaldefinitions, dupstr(yytext + 1));
351                       if (macro != NULL) {
352                          yylval->sval = macro->body;
353                          return TK_IDENT;
354                       }
355                       else {
356                          fprintf(stderr, "Error: cannot find expansion for %s\n", yytext + 1);
357                          lexer->errors++;
358                       }
359                     }
363 <EXPARGS>{REG}     { /* register can be an argument for macro expansion. */
364                      yylval->sval = dupstr(yytext);
365                      return TK_IDENT;
366                    }
368 <CONSTDEF,MACROPAR>{IDENT}   { /* constant or macro parameter ID */
369                                yylval->sval = dupstr(yytext);
370                                return TK_IDENT;
371                              }
373 "."{IDENT}          { /* directive, constant, macro or method-call */
374                       if (is_pir_directive(yytext)) {
375                           yylval->sval = dupstr(yytext);
376                           return TK_ANY;
377                       }
378                       else { /* not a directive */
379                           lexer_state *lexer = yyget_extra(yyscanner);
380                           macro_def *macro = find_macro(lexer->globaldefinitions, yytext + 1);
381                           if (macro != NULL) { /* it's a macro! */
382                               /* only go into EXPAND state if it's a macro/constant */
383                               yy_push_state(EXPAND, yyscanner);
384                               yylval->mval = macro; /* pass the pointer to the macro_def */
385                               return TK_DOT_IDENT;
386                           }
387                           else { /* it's not a macro, just a method-call, but if it was supposed
388                                     to be a macro expansion, we will only see the error in the pir
389                                     compiler.
390                                   */
391                               yylval->sval = dupstr(yytext);
392                               return TK_ANY;
393                           }
394                       }
395                    }
397 "$"{IDENT}":"      { /* */
398                      lexer_state *lexer = yyget_extra(yyscanner);
399                      macro_def *macro = find_macro(lexer->globaldefinitions, /* skip "$" and ":" */
400                                                    dupstrn(yytext + 1, yyleng - 2));
402                      if (macro != NULL) { /* found the parameter */
403                         yylval->sval = macro->body; /* expand the macro-parameter */
404                         return TK_LABEL_EXPANSION;
405                      }
406                      else {
407                         fprintf(stderr, "Error: cannot find expansion for '%s'\n", yytext);
408                         lexer->errors++;
409                      }
410                    }
412 {IDENT}"@:"        { /* this label was declared unique by ".macro_label foo:" but was munged so we
413                         can recognize it now, and return a signal to the parser it needs to
414                         munge it into a unique ID.
415                       */
416                      yylval->sval = dupstrn(yytext, yyleng - 2);
417                      return TK_UNIQUE_LABEL;
418                    }
420 {IDENT}"@"         { /* this local was declared unique by ".macro_local", but was munged so we can
421                         recognize it now.
422                      */
423                      yylval->sval = dupstrn(yytext, yyleng - 1);
424                      return TK_UNIQUE_LOCAL;
425                    }
427 ".$"{IDENT}      {   /* expanding a declared variable (local or label) */
428                      lexer_state *lexer = yyget_extra(yyscanner);
429                      macro_def *macro = find_macro(lexer->globaldefinitions, yytext + 2);
431                      if (macro != NULL) { /**/
432                         yylval->sval = macro->body;
433                         return TK_VAR_EXPANSION;
434                      }
435                      else {
436                         fprintf(stderr, "Error: cannot find expansion for symbol '%s'\n", yytext);
437                         lexer->errors++;
438                      }
439                  }
441 "$"{IDENT}      {    /* */
442                      lexer_state *lexer = yyget_extra(yyscanner);
443                      macro_def *macro = find_macro(lexer->globaldefinitions, yytext + 1);
444                      if (macro != NULL) { /*  */
445                         yylval->sval = macro->body;
446                         return TK_VAR_EXPANSION;
447                      }
448                      else {
449                         fprintf(stderr, "Error: cannot find expansion for parameter '%s'\n", yytext + 1);
450                         lexer->errors++;
451                      }
454                    }
456 "."({Q_STRING}|{REG}) {/* treat ".'foo'" or ".$P0" as 1 token to prevent the tokens are separated
457                         * by a space; they clearly belong to each other if they were written
458                         * together (without a space). This is a method-call, life foo .'bar'().
459                         */
460                        yylval->sval = dupstr(yytext);
461                        return TK_ANY;
462                       }
464 <LINE>","       { return ','; }
468 <LINE>{DIGITS}           { /*  */
469                            yylval->sval = dupstr(yytext);
470                            return TK_INTC;
471                          }
475 <LINE>{Q_STRING}      { /* */
476                         yylval->sval = dupstr(yytext);
477                         return TK_STRINGC;
478                       }
480 <*>","                { /* in all other cases (than the above), treat a comma just as any token. */
481                         yylval->sval = dupstr(yytext);
482                         return TK_ANY;
483                       }
486 <CONSTDEF>{REG}      { /**/
487                        yylval->sval = dupstr(yytext);
488                        return TK_IDENT;
489                      }
490 <CONSTDEF>{Q_STRING} { yylval->sval = dupstr(yytext);
491                        return TK_STRINGC;
492                      }
494 <CONSTDEF>{SIGN}?{DIGITS} { yylval->sval = dupstr(yytext);
495                             return TK_INTC;
496                           }
498 <CONSTDEF>({HEX}|{BIN}|{OCT}) { yylval->sval = dupstr(yytext);
499                                 return TK_INTC;
500                               }
502 <CONSTDEF>{FLOATNUM}      { yylval->sval = dupstr(yytext);
503                             return TK_NUMC;
504                           }
506 <INCLUDE>{Q_STRING}       { /* a quoted string is needed for an .include or the value of a
507                              * macro constant. After this token, leave the current state.
508                              */
509                              yylval->sval = dupstr(yytext);
510                              yy_pop_state(yyscanner);
511                              return TK_STRINGC;
512                           }
514 <EXPARGS>{Q_STRING}       { /* quoted strings argument for macro expansion */
515                             yylval->sval = dupstr(yytext);
516                             return TK_STRINGC;
517                           }
519 <EXPARGS>{SIGN}?{DIGITS}  { yylval->sval = dupstr(yytext);
520                             return TK_INTC;
521                           }
522 <EXPARGS>{HEX}            { yylval->sval = dupstr(yytext);
523                             return TK_INTC;
524                           }
525 <EXPARGS>{BIN}            { yylval->sval = dupstr(yytext);
526                             return TK_INTC;
527                           }
528 <EXPARGS>{OCT}            { yylval->sval = dupstr(yytext);
529                             return TK_INTC;
530                           }
531 <EXPARGS>{FLOATNUM}       { yylval->sval = dupstr(yytext);
532                             return TK_NUMC;
533                           }
535 <INITIAL,MACROBODY>{REG}         { /* register */
536                                    yylval->sval = dupstr(yytext);
537                                    return TK_ANY;
538                                  }
540 <INITIAL,MACROBODY>{Q_STRING}    { /* quoted string */
541                                    yylval->sval = dupstr(yytext);
542                                    return TK_ANY;
543                                  }
545 <INITIAL,MACROBODY>{IDENT}       { /* identifier */
546                                    yylval->sval = dupstr(yytext);
547                                    return TK_ANY;
548                                  }
552 <INITIAL,MACROBODY>":"{IDENT}    { /* flag */
553                                    yylval->sval = dupstr(yytext);
554                                    return TK_ANY;
555                                  }
557 <MACROBODY>"."{IDENT}            { /* expansions in a macro body; ignore for now. */
558                                    yylval->sval = dupstr(yytext);
559                                    return TK_ANY;
560                                  }
562 <INITIAL,MACROBODY>{FLOATNUM}    { yylval->sval = dupstr(yytext);
563                                    return TK_ANY;
564                                  }
566 <INITIAL,MACROBODY>{SIGN}?{DIGITS} { yylval->sval = dupstr(yytext);
567                                      return TK_ANY;
568                                    }
570 <INITIAL,MACROBODY>({HEX}|{BIN}|{OCT}) { yylval->sval = dupstr(yytext);
571                                          return TK_ANY;
572                                        }
577 <INITIAL,MACROBODY>{IDENT}":"    { /* normal label */
578                                    yylval->sval = dupstr(yytext);
579                                    return TK_ANY;
580                                  }
582 <*>"=="|"!="|"<="|"=>"|">="|">>"|"<<"|">>>"             { yylval->sval = dupstr(yytext);
583                                                           return TK_ANY;
584                                                         }
586 <*>"+="|"-="|"*="|"/="|".="|">>="|"<<="|">>>="|"//="    { yylval->sval = dupstr(yytext);
587                                                           return TK_ANY;
588                                                         }
590 <*>"%="|"|="|"&="|"**="|"~="|"||"|"&&"|"~~"|"//"        { yylval->sval = dupstr(yytext);
591                                                           return TK_ANY;
592                                                         }
594 <<EOF>>             { /* when end of file or end of string buffer, stop scanning. */
595                       yyterminate();
596                     }
599 <*>{WS}             { /* skip whitespace */ }
601 <*>{NEWLINE}        { /* in all other states, return newline as the newline token */
602                       yylval->sval = dupstr("\n");
603                       return TK_NL;
604                     }
606 <*>.                   { /* just return any single character token we didn't match before. */
607                          yylval->sval = dupstr(yytext);
608                          return TK_ANY;
609                        }
616 =head1 FUNCTIONS
618 =over 4
620 =item C<is_pir_directive>
622 Returns a non-zero value if the specified name is a PIR directive.
625 static int
626 is_pir_directive(char *name) {
628     /* maybe make this a hash or at least a binary search.
629      * Or, make these "special" macros, and have them expand
630      * to their own spelling. This would remove the need
631      * for special code, such as this.
632      */
633     static char * const directives[] = {
634         ".arg",
635         ".begin_call",
636         ".begin_return",
637         ".begin_yield",
638         ".call",
639         ".const",
640         ".end",
641         ".end_call",
642         ".end_return",
643         ".end_yield",
644         ".get_results",
645         ".globalconst",
646         ".HLL",
647         ".HLL_map",
648         ".invocant",
649         ".lex",
650         ".loadlib",
651         ".local",
652         ".meth_call",
653         ".namespace",
654         ".nci_call",
655         ".param",
656         ".return",
657         ".sub",
658         ".yield",
659         NULL /* needed to easily write loops on this array */
660     };
662     /* iter is a pointer to constant "char *" (strings). */
663     char * const *iter = directives;
665     while (*iter != NULL) {
666         if (strcmp(*iter, name) == 0) {
667             return 1;
668         }
669         iter++;
670     }
671     return 0;
676 =back
678 =cut
685  * Local variables:
686  *   c-file-style: "parrot"
687  * End:
688  * vim: expandtab shiftwidth=4:
689  */