[t][TT #1610] Add tests for Parrot_compile_string
[parrot.git] / compilers / pirc / src / hdocprep.l
blob86f7d8376885c09af848ee540f4ae212353cd75c
1 %top{
3 /* ex: set ro ft=c:
4  * !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!!
5  *
6  * This file is generated automatically by the Parrot build process
7  * from the file compilers/pirc/new/hdocprep.l.
8  *
9  * Any changes made here will be lost!
10  *
13 /* HEADERIZER HFILE: none */
14 /* HEADERIZER STOP */
16 #ifndef __STDC_VERSION__
17 #  define __STDC_VERSION__ 0
18 #endif
20 #ifndef YY_NO_UNISTD_H
21 #  define YY_NO_UNISTD_H
22 #endif
30  * $Id$
31  * Copyright (C) 2007-2009, Parrot Foundation.
32  */
35  * This pre-processor processes all heredoc strings into
36  * normal strings. Newline characters are escaped.
37  * POD comments and line comments are filtered out.
38  *
39  * This pre-processor also handles the C<.include> directive.
40  * This is necessary, as all files being compiled as a side-effect
41  * must have their heredocs flattened.
42  */
45 #include <stdio.h>
46 #include <string.h>
47 #include <stdlib.h>
48 #include <stdarg.h>
49 #include "parrot/parrot.h"
50 #include "parrot/embed.h"
51 #include "pirheredoc.h"
54 /* don't bother to generate and include the header file;
55  * we don't need it, really. Just some prototypes declarations needed.
56  */
57 int   yylex_init(yyscan_t * yyscanner);
58 int   yyget_column(yyscan_t yyscanner);
59 void  yyset_column(int col, yyscan_t yyscanner);
60 char *yyget_text(yyscan_t yyscanner);
61 void  yyset_in(FILE *fp, yyscan_t yyscanner);
62 int   yylex_destroy(yyscan_t yyscanner);
63 int   yylex(yyscan_t yyscanner);
64 int   yyget_lineno(yyscan_t yyscanner);
68 /* disable some warnings that can't be fixed, as the code is generated. */
69 #ifdef _WIN32
70 #  pragma warning (disable:4018)
71 #  pragma warning (disable:4244)
72 #endif
74 /* declare the prototype of yylex */
75 #define YY_DECL int yylex(yyscan_t yyscanner)
78 /* all globals are collected in this structure which
79  * is set in yyscan_t's "extra" field, available through
80  * yy{get,set}_extra() function.
81  */
82 typedef struct global_state {
83     int             errors;
84     char           *heredoc;       /* heredoc string buffer */
85     char           *linebuffer;    /* buffer to save the 'rest of the line'
86                                       before scanning a heredoc */
87     char           *delimiter;     /* buffer to save the delimiter of the
88                                       heredoc string being scanned */
89     char           *filename;      /* name of the file being scanned */
90     YY_BUFFER_STATE file_buffer;   /* needed to store the ref. to the file
91                                       when scanning a string buffer */
93     FILE           *outfile;        /* output file; or STDOUT if no file is specified */
95     PARROT_INTERP;
97 } global_state;
99 /* accessor methods for setting and getting the lexer_state */
100 #define YY_EXTRA_TYPE  struct global_state *
102 extern YY_EXTRA_TYPE  yyget_extra(yyscan_t scanner);
103 extern void           yyset_extra(YY_EXTRA_TYPE lexer , yyscan_t scanner);
109 =head1 FUNCTIONS
111 =over 4
113 =item C<static void
114 lex_error(yyscan_t yyscanner, char const * const message, ...)>
116 Emit an error message, increment error counter.
118 =cut
121 static void
122 lex_error(yyscan_t yyscanner, NOTNULL(char const * const message), ...) {
123     global_state * const state  = yyget_extra(yyscanner);
124     int                  lineno = yyget_lineno(yyscanner);
125     va_list              arg_ptr;
127     fprintf(stderr, "\nHeredoc pre-processor error in '%s' (line %d): ", state->filename, lineno);
129     va_start(arg_ptr, message);
130     vfprintf(stderr, message, arg_ptr);
131     va_end(arg_ptr);
133     puts("");
135     ++state->errors;
143 =item C<static global_state *
144 init_global_state(char * const filename, FILE *outfile)>
146 Create and initialize a global_state structure, containing all 'global'
147 variables that are needed during the scanning.
149 =cut
152 PARROT_MALLOC
153 PARROT_WARN_UNUSED_RESULT
154 PARROT_CANNOT_RETURN_NULL
155 static global_state *
156 init_global_state(PARROT_INTERP, NOTNULL(char * const filename), NOTNULL(FILE *outfile)) {
157     global_state *state = (global_state *)mem_sys_allocate(sizeof (global_state));
158     state->filename     = filename;
159     state->heredoc      = NULL;
160     state->linebuffer   = NULL;
161     state->delimiter    = NULL;
162     state->file_buffer  = NULL;
163     state->errors       = 0;
164     state->outfile      = outfile;
165     state->interp       = interp;
167     return state;
172 =item C<static void
173 destroy_global_state(global_state * state)>
175 Free all memory of the global state structure.
177 =cut
180 static void
181 destroy_global_state(NOTNULL(global_state *state)) {
182     if (state->linebuffer)
183         mem_sys_free(state->linebuffer);
184     if (state->heredoc)
185         mem_sys_free(state->heredoc);
187     mem_sys_free(state);
188     state = NULL;
194 =item C<void
195 process_heredocs(char * const filename, FILE *outputfile)>
197 Scan the file C<filename> for heredoc strings, and write the I<normalized> heredoc
198 strings to the file C<outputfile>. The scan session uses a fresh C<yyscan_t>
199 object, so any nested (recursive, in a way) calls of this function are handled
200 fine, as each invocation has its own state. After the file C<filename> is processed,
201 all resources are released.
203 =cut
206 void
207 process_heredocs(PARROT_INTERP, NOTNULL(char * const filename), NOTNULL(FILE *outfile)) {
208     yyscan_t      yyscanner;
209     global_state *state = NULL;
210     FILE         *fp;
212     /* open the file */
213     fp = fopen(filename, "r");
215     if (fp == NULL) {
216         fprintf(stderr, "heredoc preprocessor: error opening file '%s'\n", filename);
217         exit(EXIT_FAILURE);
218     }
220     /* initialize a yyscan_t object */
221     yylex_init(&yyscanner);
222     /* set the scanner to a string buffer and go parse */
223     yyset_in(fp, yyscanner);
225     state = init_global_state(interp, filename, outfile);
227     yyset_extra(state, yyscanner);
229     /* the lexer never returns anything, only call it once. Don't give a YYSTYPE object. */
230     yylex(yyscanner);
232     destroy_global_state(state);
234     /* clean up after playing */
235     yylex_destroy(yyscanner);
241 =back
243 =cut
252 %option reentrant
253 %option noyywrap
254 %option nounput
256 %option never-interactive
258 %option stack
259 %option debug
260 %option warn
261 %option noyy_top_state
262 %option outfile="hdocprep.c"
263 %option yylineno
265 %pointer
267 %x POD
268 %x INCLUDE
269 %x SAVE_REST_OF_LINE
270 %x SAVE_REST_AGAIN
271 %x HEREDOC_STRING
272 %x SCAN_STRING
275 WS             [\t\f\r\x1a ]
276 EOL            \r?\n
277 DQ_STRING       \"(\\.|[^"\\\n])*\"
278 SQ_STRING       \'[^'\n]*\'
279 Q_STRING       {SQ_STRING}|{DQ_STRING}
280 LINECOMMENT    [#].*{EOL}?
284 <*>{LINECOMMENT}        { /* ignore line comments */ }
286 <INITIAL>^"=cut"{EOL}   { /* ignore a "=cut" if it's not in a POD comment */ }
288 <INITIAL>^"=".*{EOL}    { yy_push_state(POD, yyscanner); }
291 <POD>^"=cut".*{EOL}     { /* end of POD comment */
292                           yy_pop_state(yyscanner);
293                         }
295 <POD>.*{EOL}            { /* ignore pod comments */ }
297 <POD><<EOF>>            { /* we're scanning a POD comment, but encountered end-of-file. */
298                           lex_error(yyscanner, "POD comment not closed!");
299                           yyterminate();
300                         }
302 <INITIAL>{EOL}          { fprintf(yyget_extra(yyscanner)->outfile, "\n"); }
305 <INITIAL>"<<"{Q_STRING} {
306                           global_state * const state = yyget_extra(yyscanner);
308                           state->delimiter = (char *)mem_sys_allocate_zeroed((yyleng - 4 + 1)
309                                                                              * sizeof (char));
311                           strncpy(state->delimiter, yytext + 3, yyleng - 4);
313                           state->heredoc = (char *)mem_sys_allocate_zeroed(3 * sizeof (char));
315                           strcpy(state->heredoc, "\\n");
317                           BEGIN(SAVE_REST_OF_LINE);
319                         }
322 <SAVE_REST_OF_LINE>.*{EOL}  { /* match the rest of the line */
323                               global_state * const state = yyget_extra(yyscanner);
325                               state->linebuffer = (char *)mem_sys_allocate_zeroed((yyleng + 2)
326                                                                                   * sizeof (char));
328                               sprintf(state->linebuffer, "%s\n", yytext);
330                               BEGIN(HEREDOC_STRING);
332                             }
334 <SAVE_REST_AGAIN>.*{EOL}    { /* match the rest of the line */
335                               global_state * const state = yyget_extra(yyscanner);
337                               /* at this point we're saving the rest of the line again,
338                                * so we were scanning the line buffer; free this now to
339                                * prevent memory leaks.
340                                */
341                               PARROT_ASSERT(state->linebuffer != NULL);
342                               mem_sys_free(state->linebuffer);
344                               state->linebuffer = (char *)mem_sys_allocate_zeroed((yyleng + 2)
345                                                                                   * sizeof (char));
347                               sprintf(state->linebuffer, "%s\n", yytext);
349                               yy_switch_to_buffer(state->file_buffer, yyscanner);
351                               BEGIN(HEREDOC_STRING);
353                             }
356 <HEREDOC_STRING>{EOL}       {
357                               global_state * const state = yyget_extra(yyscanner);
359                               int len = strlen(state->heredoc);
361                               /* allocate enough space for the current heredoc and an
362                                * escaped newline character, + the NULL character.
363                                */
364                               char *temp = (char *)mem_sys_allocate_zeroed((len + 1 + 2)
365                                                                            * sizeof (char));
367                               sprintf(temp, "%s\\n", state->heredoc);
369                               mem_sys_free(state->heredoc);
371                               state->heredoc = temp;
373                             }
375 <HEREDOC_STRING>.*{EOL}    {
376                              global_state * const state = yyget_extra(yyscanner);
378                              /* remove the newline character */
379                              /* can this be done through #ifdef, to prevent checks? */
380                              if (yytext[yyleng - 2] == '\r')
381                                  yytext[yyleng - 2] = '\0';
382                              else /* yytext[yyleng - 1] is '\n'. */
383                                  yytext[yyleng - 1] = '\0';
385                              if (strcmp(state->delimiter, yytext) == 0) { /* delimiter found? */
387                                  /* done with the delimiter */
388                                  mem_sys_free(state->delimiter);
389                                  state->delimiter = NULL;
391                                  /* we're going to scan from the linebuffer, save the current
392                                   * buffer for later (to restore it).
393                                   */
394                                  state->file_buffer = YY_CURRENT_BUFFER;
396                                  /* print the flattened heredoc string */
397                                  fprintf(yyget_extra(yyscanner)->outfile, "\"%s\"", state->heredoc);
399                                  mem_sys_free(state->heredoc);
400                                  state->heredoc = NULL;
402                                  /* now continue with scanning the string that we saved */
403                                  BEGIN(SCAN_STRING);
405                                  yy_scan_string(state->linebuffer, yyscanner);
406                              }
407                              else { /* nope, this is part of the heredoc; save this line */
409                                  int len = strlen(state->heredoc);
411                                  char *temp = (char *)mem_sys_allocate((len + yyleng + 1 + 2)
412                                                                        * sizeof (char));
414                                  sprintf(temp, "%s%s\\n", state->heredoc, yytext);
416                                  mem_sys_free(state->heredoc);
418                                  state->heredoc = temp;
419                              }
420                            }
422 <HEREDOC_STRING><<EOF>>     { /* end of file while reading heredoc */
423                               lex_error(yyscanner, "runaway heredoc string");
424                               yyterminate();
425                             }
427 <SCAN_STRING>"<<"{Q_STRING} { /* a 'nested' heredoc string */
428                               global_state *state = yyget_extra(yyscanner);
430                               PARROT_ASSERT(state->delimiter == NULL);
432                               /* <<"FOO", only save the characters 'F','O','O'. */
433                               state->delimiter = (char *)mem_sys_allocate_zeroed((yyleng - 4 + 1)
434                                                                                  * sizeof (char));
435                               /* strncpy adds the NULL char., according to the spec. */
436                               strncpy(state->delimiter, yytext + 3, yyleng - 4);
438                               state->heredoc = (char *)mem_sys_allocate_zeroed(3 * sizeof (char));
440                               /* an empty heredoc has at least a newline */
441                               strcpy(state->heredoc, "\\n");
443                               BEGIN(SAVE_REST_AGAIN);
444                             }
446 <SCAN_STRING>{EOL}          { /* do nothing */ }
448 <SCAN_STRING>.              { fprintf(yyget_extra(yyscanner)->outfile, "%s", yytext); }
450 <SCAN_STRING><<EOF>>        {
451                               global_state * const state = yyget_extra(yyscanner);
453                               /* switch back to file; */
454                               yy_switch_to_buffer(state->file_buffer, yyscanner);
456                               /* done with scanning the rest of the line; free its memory */
457                               mem_sys_free(state->linebuffer);
458                               /* and make sure there's no stray pointers */
459                               state->linebuffer  = NULL;
461                               state->file_buffer = NULL;
463                               /* make sure the PIR parser will report the right line numbers,
464                                * in case of errors; as the heredoc string, which was X lines,
465                                * is now flattened into 1 line, the line number must be adjusted.
466                                */
467                               fprintf(state->outfile, "\n.line %d\n", yyget_lineno(yyscanner));
469                               BEGIN(INITIAL);
470                             }
474 <INITIAL><<EOF>>            { /* end of file */
475                               yyterminate();
476                             }
478 <INITIAL>".include"         { /* .include directives must be handled here */
479                               yy_push_state(INCLUDE, yyscanner);
480                             }
482 <INCLUDE>{WS}               { /* skip whitespace */ }
484 <INCLUDE>{Q_STRING}         { /* include this file */
485                               global_state * const state = yyget_extra(yyscanner);
487                               char * filename = (char *)mem_sys_allocate_zeroed((yyleng - 2 + 1)
488                                                                                 * sizeof (char));
489                               char * fullpath;
491                               strncpy(filename, yytext + 1, yyleng - 2);
493                               /* fprintf(stderr, "locating runtime file: [%s]\n", filename); */
494                               fullpath = Parrot_locate_runtime_file(state->interp, filename,
495                                                                     PARROT_RUNTIME_FT_INCLUDE);
497                               if (fullpath == NULL) { /*
498                                   fprintf(stderr, "Please make sure you're running pirc from "
499                                           "parrot root directory, not from compilers/pirc. "
500                                           "This will be fixed later. Sorry!\n");
502                                   /* for now, just fall back to filename */
503                                   fullpath = filename;
504                               }
506                               /* fprintf(stderr, "fullpath: [%s]\n", fullpath); */
507                               /* make sure that any error messages that occur in the .included
508                                * file will report the right line number and file name.
509                                */
510                               fprintf(state->outfile, ".line 1\n");
511                               fprintf(state->outfile, ".file %s\n", yytext); /* is quoted */
513                               process_heredocs(state->interp, fullpath, state->outfile);
515                               /* restore the location information; we didn't count the "\n"
516                                * yet that will come after the .include dir.; hence the + 1 now.
517                                */
518                               fprintf(state->outfile, ".line %d\n", yyget_lineno(yyscanner) + 1);
519                               fprintf(state->outfile, ".file '%s'\n", state->filename);
521                               if (filename != fullpath)
522                                   mem_sys_free(filename); /* only free one of them if they're
523                                                                  same pointer */
524                               mem_sys_free(fullpath);
525                             }
527 <INCLUDE>{EOL}              { /* after .include "foo.pir", go back to the state we were in */
528                               yy_pop_state(yyscanner);
529                             }
531 <INCLUDE>.                  { lex_error(yyscanner, "wrong scanner state\n"); }
533 <INITIAL>.                  { fprintf(yyget_extra(yyscanner)->outfile, "%s", yytext); }
539  * Local variables:
540  *   c-file-style: "parrot"
541  * End:
542  * vim: expandtab shiftwidth=4:
543  */