tagged release 0.7.1
[parrot.git] / compilers / pirc / heredoc / hdocprep.l
blobc8530b8fa7a041855dc72052de12648de1d7e2d7
1 %{
3 /*
4  * $Id$
5  * Copyright (C) 2007, The Perl Foundation.
6  */
8 /*
9  * This pre-processor processes all heredoc strings into
10  * normal strings. Newline characters are escaped.
11  * POD comments and line comments are filtered out.
12  *
13  */
15 /* TODO:
16    - there's a weirdness in the SAVELINE state; we need to duplicate
17      the yytext string twice, otherwise things go wrong badly.
19    - it might be interesting to improve memory handling to make things
20      a bit more efficient.
22  */
24 #include <stdio.h>
25 #include <string.h>
26 #include <assert.h>
27 #include <stdlib.h>
29 /* keep Microsoft Visual Studio compiler happy */
30 #define YY_NO_UNISTD_H
32 /* declare the prototype of yylex */
33 #define YY_DECL int yylex( yyscan_t yyscanner)
36 extern char *yyget_text(yyscan_t yyscanner);
37 extern void  yyset_in(FILE *fp, yyscan_t yyscanner);
38 extern int   yylex_destroy(yyscan_t yyscanner);
39 extern int   yylex(yyscan_t yyscanner);
42 /* all globals are collected in this structure which
43  * is set in yyscan_t's "extra" field, available through
44  * yy{get,set}_extra() function.
45  */
46 typedef struct global_state {
47     char *heredoc;           /* heredoc string buffer */
48     char *linebuffer;        /* buffer to save the 'rest of the line' before scanning a heredoc */
49     char *delimiter;         /* buffer to save the delimiter of the heredoc string being scanned */
50     char *filename;          /* name of the file being scanned */
51     YY_BUFFER_STATE file_buffer;   /* needed to store the ref. to the file when scanning a string buffer */
53 } global_state;
55 /* accessor methods for setting and getting the lexer_state */
56 #define YY_EXTRA_TYPE  struct global_state *
58 extern YY_EXTRA_TYPE  yyget_extra(yyscan_t scanner);
59 extern void yyset_extra(YY_EXTRA_TYPE lexer , yyscan_t scanner);
61 #define output stdout
62 /* macro to chop off the last character, typically a newline character,
63  * but can also be something else
64  */
65 #define chop_yytext()   (yytext[--yyleng] = '\0')
70 =head1 FUNCTIONS
72 =over 4
74 =item C<lex_error>
76 Emit an error message.
78 =cut
81 static void
82 lex_error(char const * const message, int lineno, global_state *state) {
83     fprintf(stderr, "\nHeredoc pre-processor error in '%s' (line %d): %s\n",
84             state->filename, lineno, message);
90 =item C<dupstr>
92 The C89 standard does not define a dupstr() in the C library,
93 so define our own dupstr. Function names beginning with "str"
94 are reserved (I think), so make it dupstr, as that is what it
95 does: duplicate a string.
97 =cut
100 char *
101 dupstr(char * const source) {
102     char *newstring = (char *)calloc(strlen(source) + 1, sizeof (char));
103     assert(newstring != NULL);
104     strcpy(newstring, source);
105     return newstring;
109 void
110 printrules(void) {
111     extern int ctr[];
112     int i;
113     for (i = 0; i < YY_NUM_RULES; i++)
114         fprintf(output, "#rule %d was used %d times\n", i, ctr[i]);
120 =item C<init_global_state>
122 create and initialize a global_state structure, containing all 'global'
123 variables that are needed during the scanning.
125 =cut
128 static global_state *
129 init_global_state(char * const filename) {
130     global_state *state = (global_state *)malloc(sizeof (global_state));
131     assert(state != NULL);
132     state->filename    = filename;
133     state->heredoc     = NULL;
134     state->linebuffer  = dupstr("");
135     state->delimiter   = NULL;
136     state->file_buffer = NULL;
137     return state;
142 =item C<destroy_global_state>
144 free all memory of the global state structure.
146 =cut
149 static void
150 destroy_global_state(global_state *state) {
151     if (state->linebuffer)
152         free(state->linebuffer);
153     if (state->heredoc)
154         free(state->heredoc);
156     free(state);
157     state = NULL;
163 =item C<main>
165 Entry point of the heredoc pre-processor.
167 =cut
171 main(int argc, char *argv[]) {
172     FILE *fp = NULL;
173     yyscan_t yyscanner;
174     global_state *state = NULL;
176     /* check for proper usage */
177     if (argc < 2) {
178         fprintf(stderr, "Usage: %s <file>\n", argv[0]);
179         exit(EXIT_FAILURE);
180     }
182     /* open the file */
183     fp = fopen(argv[1], "r");
185     if (fp == NULL) {
186         fprintf(stderr, "error opening file '%s'\n", argv[1]);
187         exit(EXIT_FAILURE);
188     }
190     /* initialize a yyscan_t object */
191     yylex_init(&yyscanner);
192     /* set the scanner to a string buffer and go parse */
193     yyset_in(fp, yyscanner);
195     state = init_global_state(argv[1]);
197     yyset_extra(state, yyscanner);
199     /* the lexer never returns anything, only call it once.
200      * Don't give a YYSTYPE object.
201      */
202     yylex(yyscanner);
205     printrules();
207     /* clean up after playing */
208     yylex_destroy(yyscanner);
209     destroy_global_state(state);
211     return 0;
217 =back
219 =cut
226 using this we can check how often each rule is executed.
227 XXX this should be removed at some point (globals!)
231 int ctr[YY_NUM_RULES];
233 int num_rules = YY_NUM_RULES;
235 #define YY_USER_ACTION    do { ++ctr[yy_act]; } while (0);
240 %option reentrant
241 %option noyywrap
242 %option nounput
244 %option never-interactive
246 %option stack
247 %option debug
248 %option warn
249 %option noyy_top_state
250 %option outfile="hdocprep.c"
251 %option prefix="hd_pre"
252 %option yylineno
254 %pointer
256 %x POD
257 %x HEREDOC
258 %x HEREDOC2
259 %x SAVELINE
260 %x SAVELINE2
261 %x SCANSTRING
263 ALPHA          [a-zA-Z@_]
264 DIGIT          [0-9]
265 DIGITS         {DIGIT}+
266 ALNUM          {ALPHA}|{DIGIT}
267 IDENT          {ALPHA}{ALNUM}*
268 WS             [\t\f\r\x1a ]
269 EOL            \r?\n
270 DQ_STRING       \"(\\.|[^"\\\n])*\"
271 SQ_STRING       \'[^'\n]*\'
272 Q_STRING       {SQ_STRING}|{DQ_STRING}
273 NEWLINE        {EOL}({WS}|{EOL})*
274 LINECOMMENT    [#].*{EOL}
278 <*>{LINECOMMENT}       { /* ignore line comments */ }
280 <INITIAL>^"=".*{EOL}   { yy_push_state(POD, yyscanner); }
283 <POD>^"=cut".*{EOL}    { /* end of POD comment */
284                          yy_pop_state(yyscanner);
285                        }
287 <POD>.*{EOL}           { /* ignore pod comments */ }
289 <POD><<EOF>>           { /* we're scanning a POD comment, but encountered end-of-file. */
290                          lex_error("POD comment not closed!", yylineno, yyget_extra(yyscanner));
291                          yyterminate();
292                        }
294 <SCANSTRING>{EOL}      { /* don't do anything */ }
296 <SCANSTRING>.          { /* echo everything when scanning the string. */
297                          fprintf(output, "%s", yytext);
298                        }
300 <SCANSTRING><<EOF>>    { /* end of saved string */
301                          global_state *state = yyget_extra(yyscanner);
302                          assert(state->file_buffer);
303                          yy_switch_to_buffer(state->file_buffer, yyscanner);
305                          /* clear the temp. variable; file_buffer is only used to temporarily
306                           * store a reference to the current buffer when we switch from file
307                           * to string scanning; after finishing scanning the string (which
308                           * is now, as we just scanned <<EOF>>), we switch back to the file
309                           * buffer.
310                           */
311                          state->file_buffer = NULL;
313                          BEGIN(INITIAL);
315                          fprintf(output, "\n    setline %d\n", yylineno);
316                        }
319 <SCANSTRING>"<<"{Q_STRING} { /* 2nd and later heredoc argument */
320                              global_state *state = yyget_extra(yyscanner);
321                              state->delimiter    = (char *)calloc(yyleng - 4 + 1, sizeof (char));
322                              assert(state->delimiter);
323                              strncpy(state->delimiter, yytext + 3, yyleng - 4);
325                              state->heredoc = dupstr("");
326                              BEGIN(SAVELINE2);
327                            }
329 <INITIAL>"<<"{Q_STRING} { /* only copy the string after "<<'" and skip the last quote too */
330                          global_state *state = yyget_extra(yyscanner);
331                          /* allocate storage for the delimiter, skip the << and quote characters. */
332                          state->delimiter    = (char *)calloc(yyleng - 4 + 1, sizeof (char));
333                          assert(state->delimiter);
334                          strncpy(state->delimiter, yytext + 3, yyleng - 4);
335                          state->heredoc = dupstr("");
336                          BEGIN(SAVELINE);
337                         }
339 <SAVELINE>.*{EOL}      { /* this state is used when reading the first heredoc delimiter
340                           * argument. Save the rest of the line and go scan the heredoc.
341                           */
342                          global_state *state = yyget_extra(yyscanner);
343                          char *temp;
346                          assert(state->linebuffer != NULL);
347                          /* this does not work:
348                          free(state->linebuffer);
350                          */
352                          /* somehow, if we don't duplicate the string twice,
353                           * things don't work. Unclear to me why this is.
354                           */
355                          temp = dupstr(yytext);
356                          state->linebuffer = dupstr(temp);
357                          assert(strcmp(temp,state->linebuffer)==0 && strcmp(temp,yytext)==0);
359                          BEGIN(HEREDOC);
360                        }
362 <SAVELINE2>.*{EOL}     { /* this state is used when reading the 2nd and later heredoc
363                             delimiter arguments. Save the rest of the line and go scan
364                             the heredoc string. First, though, switch back to the file,
365                             because <SAVELINE2> state is activated when reading a string.
366                           */
367                          global_state *state = yyget_extra(yyscanner);
369                          state->linebuffer = dupstr(yytext);
371                          yy_switch_to_buffer(state->file_buffer, yyscanner);
372                          BEGIN(HEREDOC);
373                        }
375 <HEREDOC>{EOL}         { /* Scan a newline character, append this to the heredoc, but
376                             escape it.
377                           */
378                          global_state *state = yyget_extra(yyscanner);
379                          int len             = strlen(state->heredoc);
380                          char *temp          = (char *)calloc(len + 1 + 2, sizeof (char));
381                          assert(temp != NULL);
382                          strcpy(temp, state->heredoc);
384                          state->heredoc = temp;
386                          assert(state->heredoc != NULL);
388                          /* translate "\n" to a "\" and "n" character */
389                          /*
390                          state->heredoc[len]     = '\\';
391                          state->heredoc[len + 1] = 'n';
392                          state->heredoc[len + 2] = '\0';
393                          */
394                          strcpy(state->heredoc + len, "\\n");
395                        }
397 <HEREDOC>.*        { /* scan heredoc string contents */
399                          global_state *state = yyget_extra(yyscanner);
400                          /* on windows remove the '\r' character */
401                          if (yytext[yyleng - 1] == '\r') {
402                             chop_yytext();
403                          }
405                          if (strcmp(yytext, state->delimiter) == 0) {
407                             fprintf(output, "\"%s\"", state->heredoc);
408                             /* free the delimiter memory */
409                             free(state->delimiter);
410                             state->delimiter = NULL;
412                             assert(state->heredoc != NULL);
413                             free(state->heredoc);
415                             state->heredoc = dupstr("");
417                             /* save the current buffer, because we go scan the
418                              * rest of the string that was saved in <SAVELINE(2)>.
419                              */
420                             state->file_buffer = YY_CURRENT_BUFFER;
421                             BEGIN(SCANSTRING);
422                             assert(state->linebuffer != NULL);
423                             yy_scan_string(state->linebuffer, yyscanner);
424                          }
425                          else {
426                             /* save this heredoc string line */
427                             char *thisline = dupstr(yytext);
428                             state->heredoc = strcat(state->heredoc, thisline);
429                          }
430                        }
432 <HEREDOC><<EOF>>       { /* End of file while reading a heredoc string. This is bad. */
433                          global_state *state = yyget_extra(yyscanner);
434                          fprintf(stderr,
435                                  "\nError: end of file while reading heredoc string '%s'\n",
436                                  state->delimiter);
437                          yyterminate();
438                        }
441 <<EOF>>                { /* end of file */
442                          yyterminate();
443                        }
445 <INITIAL>{EOL}+        { /* we only want to print a single newline instead of all newlines. */
446                          fprintf(output, "\n");
447                        }
449 <*>{EOL}               { /* do nothing. */ }
451 <INITIAL>.             { /* just echo everything else */
452                          fprintf(output, "%s", yytext);
453                        }
459  * Local variables:
460  *   c-file-style: "parrot"
461  * End:
462  * vim: expandtab shiftwidth=4:
463  */