5 * Copyright (C) 2007, The Perl Foundation.
9 * This pre-processor processes all heredoc strings into
10 * normal strings. Newline characters are escaped.
11 * POD comments and line comments are filtered out.
16 - there's a weirdness in the SAVELINE state; we need to duplicate
17 the yytext string twice, otherwise things go wrong badly.
19 - it might be interesting to improve memory handling to make things
29 /* keep Microsoft Visual Studio compiler happy */
30 #define YY_NO_UNISTD_H
32 /* declare the prototype of yylex */
33 #define YY_DECL int yylex( yyscan_t yyscanner)
36 extern char *yyget_text(yyscan_t yyscanner);
37 extern void yyset_in(FILE *fp, yyscan_t yyscanner);
38 extern int yylex_destroy(yyscan_t yyscanner);
39 extern int yylex(yyscan_t yyscanner);
42 /* all globals are collected in this structure which
43 * is set in yyscan_t's "extra" field, available through
44 * yy{get,set}_extra() function.
46 typedef struct global_state {
47 char *heredoc; /* heredoc string buffer */
48 char *linebuffer; /* buffer to save the 'rest of the line' before scanning a heredoc */
49 char *delimiter; /* buffer to save the delimiter of the heredoc string being scanned */
50 char *filename; /* name of the file being scanned */
51 YY_BUFFER_STATE file_buffer; /* needed to store the ref. to the file when scanning a string buffer */
55 /* accessor methods for setting and getting the lexer_state */
56 #define YY_EXTRA_TYPE struct global_state *
58 extern YY_EXTRA_TYPE yyget_extra(yyscan_t scanner);
59 extern void yyset_extra(YY_EXTRA_TYPE lexer , yyscan_t scanner);
62 /* macro to chop off the last character, typically a newline character,
63 * but can also be something else
65 #define chop_yytext() (yytext[--yyleng] = '\0')
76 Emit an error message.
82 lex_error(char const * const message, int lineno, global_state *state) {
83 fprintf(stderr, "\nHeredoc pre-processor error in '%s' (line %d): %s\n",
84 state->filename, lineno, message);
92 The C89 standard does not define a dupstr() in the C library,
93 so define our own dupstr. Function names beginning with "str"
94 are reserved (I think), so make it dupstr, as that is what it
95 does: duplicate a string.
101 dupstr(char * const source) {
102 char *newstring = (char *)calloc(strlen(source) + 1, sizeof (char));
103 assert(newstring != NULL);
104 strcpy(newstring, source);
113 for (i = 0; i < YY_NUM_RULES; i++)
114 fprintf(output, "#rule %d was used %d times\n", i, ctr[i]);
120 =item C<init_global_state>
122 create and initialize a global_state structure, containing all 'global'
123 variables that are needed during the scanning.
128 static global_state *
129 init_global_state(char * const filename) {
130 global_state *state = (global_state *)malloc(sizeof (global_state));
131 assert(state != NULL);
132 state->filename = filename;
133 state->heredoc = NULL;
134 state->linebuffer = dupstr("");
135 state->delimiter = NULL;
136 state->file_buffer = NULL;
142 =item C<destroy_global_state>
144 free all memory of the global state structure.
150 destroy_global_state(global_state *state) {
151 if (state->linebuffer)
152 free(state->linebuffer);
154 free(state->heredoc);
165 Entry point of the heredoc pre-processor.
171 main(int argc, char *argv[]) {
174 global_state *state = NULL;
176 /* check for proper usage */
178 fprintf(stderr, "Usage: %s <file>\n", argv[0]);
183 fp = fopen(argv[1], "r");
186 fprintf(stderr, "error opening file '%s'\n", argv[1]);
190 /* initialize a yyscan_t object */
191 yylex_init(&yyscanner);
192 /* set the scanner to a string buffer and go parse */
193 yyset_in(fp, yyscanner);
195 state = init_global_state(argv[1]);
197 yyset_extra(state, yyscanner);
199 /* the lexer never returns anything, only call it once.
200 * Don't give a YYSTYPE object.
207 /* clean up after playing */
208 yylex_destroy(yyscanner);
209 destroy_global_state(state);
226 using this we can check how often each rule is executed.
227 XXX this should be removed at some point (globals!)
231 int ctr[YY_NUM_RULES];
233 int num_rules = YY_NUM_RULES;
235 #define YY_USER_ACTION do { ++ctr[yy_act]; } while (0);
244 %option never-interactive
249 %option noyy_top_state
250 %option outfile="hdocprep.c"
251 %option prefix="hd_pre"
266 ALNUM {ALPHA}|{DIGIT}
267 IDENT {ALPHA}{ALNUM}*
270 DQ_STRING \"(\\.|[^"\\\n])*\"
271 SQ_STRING \'[^'\n]*\'
272 Q_STRING {SQ_STRING}|{DQ_STRING}
273 NEWLINE {EOL}({WS}|{EOL})*
274 LINECOMMENT [#].*{EOL}
278 <*>{LINECOMMENT} { /* ignore line comments */ }
280 <INITIAL>^"=".*{EOL} { yy_push_state(POD, yyscanner); }
283 <POD>^"=cut".*{EOL} { /* end of POD comment */
284 yy_pop_state(yyscanner);
287 <POD>.*{EOL} { /* ignore pod comments */ }
289 <POD><<EOF>> { /* we're scanning a POD comment, but encountered end-of-file. */
290 lex_error("POD comment not closed!", yylineno, yyget_extra(yyscanner));
294 <SCANSTRING>{EOL} { /* don't do anything */ }
296 <SCANSTRING>. { /* echo everything when scanning the string. */
297 fprintf(output, "%s", yytext);
300 <SCANSTRING><<EOF>> { /* end of saved string */
301 global_state *state = yyget_extra(yyscanner);
302 assert(state->file_buffer);
303 yy_switch_to_buffer(state->file_buffer, yyscanner);
305 /* clear the temp. variable; file_buffer is only used to temporarily
306 * store a reference to the current buffer when we switch from file
307 * to string scanning; after finishing scanning the string (which
308 * is now, as we just scanned <<EOF>>), we switch back to the file
311 state->file_buffer = NULL;
315 fprintf(output, "\n setline %d\n", yylineno);
319 <SCANSTRING>"<<"{Q_STRING} { /* 2nd and later heredoc argument */
320 global_state *state = yyget_extra(yyscanner);
321 state->delimiter = (char *)calloc(yyleng - 4 + 1, sizeof (char));
322 assert(state->delimiter);
323 strncpy(state->delimiter, yytext + 3, yyleng - 4);
325 state->heredoc = dupstr("");
329 <INITIAL>"<<"{Q_STRING} { /* only copy the string after "<<'" and skip the last quote too */
330 global_state *state = yyget_extra(yyscanner);
331 /* allocate storage for the delimiter, skip the << and quote characters. */
332 state->delimiter = (char *)calloc(yyleng - 4 + 1, sizeof (char));
333 assert(state->delimiter);
334 strncpy(state->delimiter, yytext + 3, yyleng - 4);
335 state->heredoc = dupstr("");
339 <SAVELINE>.*{EOL} { /* this state is used when reading the first heredoc delimiter
340 * argument. Save the rest of the line and go scan the heredoc.
342 global_state *state = yyget_extra(yyscanner);
346 assert(state->linebuffer != NULL);
347 /* this does not work:
348 free(state->linebuffer);
352 /* somehow, if we don't duplicate the string twice,
353 * things don't work. Unclear to me why this is.
355 temp = dupstr(yytext);
356 state->linebuffer = dupstr(temp);
357 assert(strcmp(temp,state->linebuffer)==0 && strcmp(temp,yytext)==0);
362 <SAVELINE2>.*{EOL} { /* this state is used when reading the 2nd and later heredoc
363 delimiter arguments. Save the rest of the line and go scan
364 the heredoc string. First, though, switch back to the file,
365 because <SAVELINE2> state is activated when reading a string.
367 global_state *state = yyget_extra(yyscanner);
369 state->linebuffer = dupstr(yytext);
371 yy_switch_to_buffer(state->file_buffer, yyscanner);
375 <HEREDOC>{EOL} { /* Scan a newline character, append this to the heredoc, but
378 global_state *state = yyget_extra(yyscanner);
379 int len = strlen(state->heredoc);
380 char *temp = (char *)calloc(len + 1 + 2, sizeof (char));
381 assert(temp != NULL);
382 strcpy(temp, state->heredoc);
384 state->heredoc = temp;
386 assert(state->heredoc != NULL);
388 /* translate "\n" to a "\" and "n" character */
390 state->heredoc[len] = '\\';
391 state->heredoc[len + 1] = 'n';
392 state->heredoc[len + 2] = '\0';
394 strcpy(state->heredoc + len, "\\n");
397 <HEREDOC>.* { /* scan heredoc string contents */
399 global_state *state = yyget_extra(yyscanner);
400 /* on windows remove the '\r' character */
401 if (yytext[yyleng - 1] == '\r') {
405 if (strcmp(yytext, state->delimiter) == 0) {
407 fprintf(output, "\"%s\"", state->heredoc);
408 /* free the delimiter memory */
409 free(state->delimiter);
410 state->delimiter = NULL;
412 assert(state->heredoc != NULL);
413 free(state->heredoc);
415 state->heredoc = dupstr("");
417 /* save the current buffer, because we go scan the
418 * rest of the string that was saved in <SAVELINE(2)>.
420 state->file_buffer = YY_CURRENT_BUFFER;
422 assert(state->linebuffer != NULL);
423 yy_scan_string(state->linebuffer, yyscanner);
426 /* save this heredoc string line */
427 char *thisline = dupstr(yytext);
428 state->heredoc = strcat(state->heredoc, thisline);
432 <HEREDOC><<EOF>> { /* End of file while reading a heredoc string. This is bad. */
433 global_state *state = yyget_extra(yyscanner);
435 "\nError: end of file while reading heredoc string '%s'\n",
441 <<EOF>> { /* end of file */
445 <INITIAL>{EOL}+ { /* we only want to print a single newline instead of all newlines. */
446 fprintf(output, "\n");
449 <*>{EOL} { /* do nothing. */ }
451 <INITIAL>. { /* just echo everything else */
452 fprintf(output, "%s", yytext);
460 * c-file-style: "parrot"
462 * vim: expandtab shiftwidth=4: