2 * Copyright 2001-2007 Adrian Thurston <thurston@cs.queensu.ca>
5 /* This file is part of Ragel.
7 * Ragel is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * Ragel is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Ragel; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
30 #include <sys/types.h>
45 #define S_IRUSR _S_IREAD
46 #define S_IWUSR _S_IWRITE
54 /* Parameters and output. */
69 using std::streamsize
;
71 /* Controls minimization. */
72 MinimizeLevel minimizeLevel
= MinimizePartition2
;
73 MinimizeOpt minimizeOpt
= MinimizeMostOps
;
75 /* Graphviz dot file generation. */
76 char *machineSpec
= 0, *machineName
= 0;
77 bool machineSpecFound
= false;
78 bool wantDupsRemoved
= true;
80 bool printStatistics
= false;
81 bool frontendOnly
= false;
82 bool generateDot
= false;
84 ArgsVector frontendArgs
;
85 ArgsVector backendArgs
;
86 ArgsVector includePaths
;
88 /* Print a summary of the options. */
92 "usage: ragel [options] file\n"
94 " -h, -H, -?, --help Print this usage and exit\n"
95 " -v, --version Print version information and exit\n"
96 " -o <file> Write output to <file>\n"
97 " -s Print some statistics on stderr\n"
98 " -d Do not remove duplicates from action lists\n"
99 " -I <dir> Add <dir> to the list of directories to search\n"
100 " for included an imported files\n"
101 "error reporting format:\n"
102 " --error-format=gnu file:line:column: message (default)\n"
103 " --error-format=msvc file(line,column): message\n"
104 "fsm minimization:\n"
105 " -n Do not perform minimization\n"
106 " -m Minimize at the end of the compilation\n"
107 " -l Minimize after most operations (default)\n"
108 " -e Minimize after every operation\n"
110 " -x Run the frontend only: emit XML intermediate format\n"
111 " -V Generate a dot file for Graphviz\n"
112 " -p Display printable characters on labels\n"
113 " -S <spec> FSM specification to output (for rlgen-dot)\n"
114 " -M <machine> Machine definition/instantiation to output (for rlgen-dot)\n"
116 " -C The host language is C, C++, Obj-C or Obj-C++ (default)\n"
117 " -D The host language is D\n"
118 " -J The host language is Java\n"
119 " -R The host language is Ruby\n"
120 " -A The host language is C#\n"
121 "line direcives: (C/D/C# only)\n"
122 " -L Inhibit writing of #line directives\n"
123 "code style: (C/Ruby/C# only)\n"
124 " -T0 Table driven FSM (default)\n"
125 " -T1 Faster table driven FSM\n"
126 " -F0 Flat table driven FSM\n"
127 " -F1 Faster flat table-driven FSM\n"
128 "code style: (C/C# only)\n"
129 " -G0 Goto-driven FSM\n"
130 " -G1 Faster goto-driven FSM\n"
131 "code style: (C only)\n"
132 " -G2 Really fast goto-driven FSM\n"
133 " -P<N> N-Way Split really fast goto-driven FSM\n"
139 /* Print version information and exit. */
142 cout
<< "Ragel State Machine Compiler version " VERSION
<< " " PUBDATE
<< endl
<<
143 "Copyright (c) 2001-2007 by Adrian Thurston" << endl
;
147 /* Error reporting format. */
148 ErrorFormat errorFormat
= ErrorFormatGNU
;
150 InputLoc
makeInputLoc( const char *fileName
, int line
, int col
)
152 InputLoc loc
= { fileName
, line
, col
};
156 ostream
&operator<<( ostream
&out
, const InputLoc
&loc
)
158 assert( loc
.fileName
!= 0 );
159 switch ( errorFormat
) {
160 case ErrorFormatMSVC
:
161 out
<< loc
.fileName
<< "(" << loc
.line
;
163 out
<< "," << loc
.col
;
168 out
<< loc
.fileName
<< ":" << loc
.line
;
170 out
<< ":" << loc
.col
;
176 /* Total error count. */
177 int gblErrorCount
= 0;
179 /* Print the opening to a warning in the input, then return the error ostream. */
180 ostream
&warning( const InputLoc
&loc
)
182 cerr
<< loc
<< ": warning: ";
186 /* Print the opening to a program error, then return the error stream. */
190 cerr
<< PROGNAME
": ";
194 ostream
&error( const InputLoc
&loc
)
201 void escapeLineDirectivePath( std::ostream
&out
, char *path
)
203 for ( char *pc
= path
; *pc
!= 0; pc
++ ) {
211 void processArgs( int argc
, char **argv
, char *&inputFileName
, char *&outputFileName
)
213 ParamCheck
pc("xo:dnmleabjkS:M:I:CDJRAvHh?-:sT:F:G:P:LpV", argc
, argv
);
215 while ( pc
.check() ) {
216 switch ( pc
.state
) {
217 case ParamCheck::match
:
218 switch ( pc
.parameter
) {
229 if ( *pc
.paramArg
== 0 )
230 error() << "a zero length output file name was given" << endl
;
231 else if ( outputFileName
!= 0 )
232 error() << "more than one output file name was given" << endl
;
234 /* Ok, remember the output file name. */
235 outputFileName
= pc
.paramArg
;
239 /* Minimization, mostly hidden options. */
241 wantDupsRemoved
= false;
242 frontendArgs
.append( "-d" );
245 /* Minimization, mostly hidden options. */
247 minimizeOpt
= MinimizeNone
;
248 frontendArgs
.append( "-n" );
251 minimizeOpt
= MinimizeEnd
;
252 frontendArgs
.append( "-m" );
255 minimizeOpt
= MinimizeMostOps
;
256 frontendArgs
.append( "-l" );
259 minimizeOpt
= MinimizeEveryOp
;
260 frontendArgs
.append( "-e" );
263 minimizeLevel
= MinimizeApprox
;
264 frontendArgs
.append( "-a" );
267 minimizeLevel
= MinimizeStable
;
268 frontendArgs
.append( "-b" );
271 minimizeLevel
= MinimizePartition1
;
272 frontendArgs
.append( "-j" );
275 minimizeLevel
= MinimizePartition2
;
276 frontendArgs
.append( "-k" );
281 if ( *pc
.paramArg
== 0 )
282 error() << "please specify an argument to -S" << endl
;
283 else if ( machineSpec
!= 0 )
284 error() << "more than one -S argument was given" << endl
;
286 /* Ok, remember the path to the machine to generate. */
287 machineSpec
= pc
.paramArg
;
288 frontendArgs
.append( "-S" );
289 frontendArgs
.append( pc
.paramArg
);
295 if ( *pc
.paramArg
== 0 )
296 error() << "please specify an argument to -M" << endl
;
297 else if ( machineName
!= 0 )
298 error() << "more than one -M argument was given" << endl
;
300 /* Ok, remember the machine name to generate. */
301 machineName
= pc
.paramArg
;
302 frontendArgs
.append( "-M" );
303 frontendArgs
.append( pc
.paramArg
);
308 if ( *pc
.paramArg
== 0 )
309 error() << "please specify an argument to -I" << endl
;
311 includePaths
.append( pc
.paramArg
);
312 frontendArgs
.append( "-I" );
313 frontendArgs
.append( pc
.paramArg
);
317 /* Host language types. */
319 hostLang
= &hostLangC
;
320 frontendArgs
.append( "-C" );
323 hostLang
= &hostLangD
;
324 frontendArgs
.append( "-D" );
327 hostLang
= &hostLangJava
;
328 frontendArgs
.append( "-J" );
331 hostLang
= &hostLangRuby
;
332 frontendArgs
.append( "-R" );
335 hostLang
= &hostLangCSharp
;
336 frontendArgs
.append( "-A" );
339 /* Version and help. */
343 case 'H': case 'h': case '?':
347 printStatistics
= true;
348 frontendArgs
.append( "-s" );
351 char *eq
= strchr( pc
.paramArg
, '=' );
356 if ( strcmp( pc
.paramArg
, "help" ) == 0 )
358 else if ( strcmp( pc
.paramArg
, "version" ) == 0 )
360 else if ( strcmp( pc
.paramArg
, "error-format" ) == 0 ) {
362 error() << "expecting '=value' for error-format" << endl
;
363 else if ( strcmp( eq
, "gnu" ) == 0 ) {
364 errorFormat
= ErrorFormatGNU
;
365 frontendArgs
.append( "--error-format=gnu" );
367 else if ( strcmp( eq
, "msvc" ) == 0 ) {
368 errorFormat
= ErrorFormatMSVC
;
369 frontendArgs
.append( "--error-format=msvc" );
372 error() << "invalid value for error-format" << endl
;
375 else if ( strcmp( pc
.paramArg
, "rbx" ) == 0 )
376 backendArgs
.append( "--rbx" );
378 error() << "--" << pc
.paramArg
<<
379 " is an invalid argument" << endl
;
384 /* Passthrough args. */
386 backendArgs
.append( "-T" );
387 backendArgs
.append( pc
.paramArg
);
390 backendArgs
.append( "-F" );
391 backendArgs
.append( pc
.paramArg
);
394 backendArgs
.append( "-G" );
395 backendArgs
.append( pc
.paramArg
);
398 backendArgs
.append( "-P" );
399 backendArgs
.append( pc
.paramArg
);
402 backendArgs
.append( "-p" );
405 backendArgs
.append( "-L" );
410 case ParamCheck::invalid
:
411 error() << "-" << pc
.parameter
<< " is an invalid argument" << endl
;
414 case ParamCheck::noparam
:
415 /* It is interpreted as an input file. */
416 if ( *pc
.curArg
== 0 )
417 error() << "a zero length input file name was given" << endl
;
418 else if ( inputFileName
!= 0 )
419 error() << "more than one input file name was given" << endl
;
421 /* OK, Remember the filename. */
422 inputFileName
= pc
.curArg
;
429 int frontend( char *inputFileName
, char *outputFileName
)
431 /* Open the input file for reading. */
432 assert( inputFileName
!= 0 );
433 ifstream
*inFile
= new ifstream( inputFileName
);
434 istream
*inStream
= inFile
;
435 if ( ! inFile
->is_open() )
436 error() << "could not open " << inputFileName
<< " for reading" << endp
;
438 /* Used for just a few things. */
439 std::ostringstream hostData
;
441 if ( machineSpec
== 0 && machineName
== 0 )
442 hostData
<< "<host line=\"1\" col=\"1\">";
444 Scanner
scanner( inputFileName
, *inStream
, hostData
, 0, 0, 0, false );
447 /* Finished, final check for errors.. */
448 if ( gblErrorCount
> 0 )
451 /* Now send EOF to all parsers. */
452 terminateAllParsers();
454 /* Finished, final check for errors.. */
455 if ( gblErrorCount
> 0 )
458 if ( machineSpec
== 0 && machineName
== 0 )
459 hostData
<< "</host>\n";
461 if ( gblErrorCount
> 0 )
464 ostream
*outputFile
= 0;
465 if ( outputFileName
!= 0 )
466 outputFile
= new ofstream( outputFileName
);
470 /* Write the machines, then the surrounding code. */
471 writeMachines( *outputFile
, hostData
.str(), inputFileName
);
473 /* Close the intermediate file. */
474 if ( outputFileName
!= 0 )
477 return gblErrorCount
> 0;
480 char *makeIntermedTemplate( char *baseFileName
)
483 const char *templ
= "ragel-XXXXXX.xml";
484 char *lastSlash
= strrchr( baseFileName
, '/' );
485 if ( lastSlash
== 0 ) {
486 result
= new char[strlen(templ
)+1];
487 strcpy( result
, templ
);
490 int baseLen
= lastSlash
- baseFileName
+ 1;
491 result
= new char[baseLen
+ strlen(templ
) + 1];
492 memcpy( result
, baseFileName
, baseLen
);
493 strcpy( result
+baseLen
, templ
);
498 char *openIntermed( char *inputFileName
, char *outputFileName
)
503 /* Which filename do we use as the base? */
504 char *baseFileName
= outputFileName
!= 0 ? outputFileName
: inputFileName
;
506 /* The template for the intermediate file name. */
507 char *intermedFileName
= makeIntermedTemplate( baseFileName
);
509 /* Randomize the name and try to open. */
510 char fnChars
[] = "abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
511 char *firstX
= strrchr( intermedFileName
, 'X' ) - 5;
512 for ( int tries
= 0; tries
< 20; tries
++ ) {
513 /* Choose a random name. */
514 for ( int x
= 0; x
< 6; x
++ )
515 firstX
[x
] = fnChars
[rand() % 52];
517 /* Try to open the file. */
518 int fd
= ::open( intermedFileName
, O_WRONLY
|O_EXCL
|O_CREAT
, S_IRUSR
|S_IWUSR
);
521 /* Success. Close the file immediately and return the name for use
522 * by the child processes. */
524 result
= intermedFileName
;
528 if ( errno
== EACCES
) {
529 error() << "failed to open temp file " << intermedFileName
<<
530 ", access denied" << endp
;
535 error() << "abnormal error: cannot find unique name for temp file" << endp
;
541 void cleanExit( char *intermed
, int status
)
549 /* If any forward slash is found in argv0 then it is assumed that the path is
550 * explicit and the path to the backend executable should be derived from
551 * that. Whe check that location and also go up one then inside a directory of
552 * the same name in case we are executing from the source tree. If no forward
553 * slash is found it is assumed the file is being run from the installed
554 * location. The PREFIX supplied during configuration is used. */
555 char **makePathChecksUnix( const char *argv0
, const char *progName
)
557 char **result
= new char*[3];
558 const char *lastSlash
= strrchr( argv0
, '/' );
561 if ( lastSlash
!= 0 ) {
562 char *path
= strdup( argv0
);
563 int givenPathLen
= (lastSlash
- argv0
) + 1;
564 path
[givenPathLen
] = 0;
566 int progNameLen
= strlen(progName
);
567 int length
= givenPathLen
+ progNameLen
+ 1;
568 char *check
= new char[length
];
569 sprintf( check
, "%s%s", path
, progName
);
570 result
[numChecks
++] = check
;
572 length
= givenPathLen
+ 3 + progNameLen
+ 1 + progNameLen
+ 1;
573 check
= new char[length
];
574 sprintf( check
, "%s../%s/%s", path
, progName
, progName
);
575 result
[numChecks
++] = check
;
578 int prefixLen
= strlen(PREFIX
);
579 int progNameLen
= strlen(progName
);
580 int length
= prefixLen
+ 5 + progNameLen
+ 1;
581 char *check
= new char[length
];
583 sprintf( check
, PREFIX
"/bin/%s", progName
);
584 result
[numChecks
++] = check
;
587 result
[numChecks
] = 0;
592 void forkAndExec( const char *progName
, char **pathChecks
,
593 ArgsVector
&args
, char *intermed
)
597 /* Error, no child created. */
598 error() << "failed to fork for " << progName
<< endl
;
599 cleanExit( intermed
, 1 );
601 else if ( pid
== 0 ) {
603 while ( *pathChecks
!= 0 ) {
604 /* Execv does not modify argv, it just uses the const form that is
605 * compatible with the most code. Ours not included. */
606 execv( *pathChecks
, (char *const*) args
.data
);
609 error() << "failed to exec " << progName
<< endl
;
610 cleanExit( intermed
, 1 );
613 /* Parent process, wait for the child. */
617 /* What happened with the child. */
618 if ( ! WIFEXITED( status
) ) {
619 error() << progName
<< " did not exit normally" << endl
;
620 cleanExit( intermed
, 1 );
623 if ( WEXITSTATUS(status
) != 0 )
624 cleanExit( intermed
, WEXITSTATUS(status
) );
629 /* GetModuleFileNameEx is used to find out where the the current process's
630 * binary is. That location is searched first. If that fails then we go up one
631 * directory and look for the executable inside a directory of the same name
632 * in case we are executing from the source tree.
634 char **makePathChecksWin( const char *progName
)
637 char *imageFileName
= new char[len
];
638 HANDLE h
= GetCurrentProcess();
639 len
= GetModuleFileNameEx( h
, NULL
, imageFileName
, len
);
640 imageFileName
[len
] = 0;
642 char **result
= new char*[3];
643 const char *lastSlash
= strrchr( imageFileName
, '\\' );
646 assert( lastSlash
!= 0 );
647 char *path
= strdup( imageFileName
);
648 int givenPathLen
= (lastSlash
- imageFileName
) + 1;
649 path
[givenPathLen
] = 0;
651 int progNameLen
= strlen(progName
);
652 int length
= givenPathLen
+ progNameLen
+ 1;
653 char *check
= new char[length
];
654 sprintf( check
, "%s%s", path
, progName
);
655 result
[numChecks
++] = check
;
657 length
= givenPathLen
+ 3 + progNameLen
+ 1 + progNameLen
+ 1;
658 check
= new char[length
];
659 sprintf( check
, "%s..\\%s\\%s", path
, progName
, progName
);
660 result
[numChecks
++] = check
;
662 result
[numChecks
] = 0;
666 void spawn( const char *progName
, char **pathChecks
,
667 ArgsVector
&args
, char *intermed
)
670 while ( *pathChecks
!= 0 ) {
671 //cerr << "trying to execute " << *pathChecks << endl;
672 result
= _spawnv( _P_WAIT
, *pathChecks
, args
.data
);
673 if ( result
>= 0 || errno
!= ENOENT
)
679 error() << "failed to spawn " << progName
<< endl
;
680 cleanExit( intermed
, 1 );
684 cleanExit( intermed
, 1 );
689 void execFrontend( const char *argv0
, char *inputFileName
, char *intermed
)
691 /* The frontend program name. */
692 const char *progName
= "ragel";
694 frontendArgs
.insert( 0, progName
);
695 frontendArgs
.insert( 1, "-x" );
696 frontendArgs
.append( "-o" );
697 frontendArgs
.append( intermed
);
698 frontendArgs
.append( inputFileName
);
699 frontendArgs
.append( 0 );
702 char **pathChecks
= makePathChecksUnix( argv0
, progName
);
703 forkAndExec( progName
, pathChecks
, frontendArgs
, intermed
);
705 char **pathChecks
= makePathChecksWin( progName
);
706 spawn( progName
, pathChecks
, frontendArgs
, intermed
);
710 void execBackend( const char *argv0
, char *intermed
, char *outputFileName
)
712 /* Locate the backend program */
713 const char *progName
= 0;
715 progName
= "rlgen-dot";
717 switch ( hostLang
->lang
) {
720 progName
= "rlgen-cd";
723 progName
= "rlgen-java";
726 progName
= "rlgen-ruby";
728 case HostLang::CSharp
:
729 progName
= "rlgen-csharp";
733 backendArgs
.insert( 0, progName
);
734 if ( outputFileName
!= 0 ) {
735 backendArgs
.append( "-o" );
736 backendArgs
.append( outputFileName
);
738 backendArgs
.append( intermed
);
739 backendArgs
.append( 0 );
742 char **pathChecks
= makePathChecksUnix( argv0
, progName
);
743 forkAndExec( progName
, pathChecks
, backendArgs
, intermed
);
745 char **pathChecks
= makePathChecksWin( progName
);
746 spawn( progName
, pathChecks
, backendArgs
, intermed
);
750 /* Main, process args and call yyparse to start scanning input. */
751 int main(int argc
, char **argv
)
753 char *inputFileName
= 0;
754 char *outputFileName
= 0;
756 processArgs( argc
, argv
, inputFileName
, outputFileName
);
758 /* If -M or -S are given and we're not generating a dot file then invoke
759 * the frontend. These options are not useful with code generators. */
760 if ( machineName
!= 0 || machineSpec
!= 0 ) {
765 /* Require an input file. If we use standard in then we won't have a file
766 * name on which to base the output. */
767 if ( inputFileName
== 0 )
768 error() << "no input file given" << endl
;
770 /* Bail on argument processing errors. */
771 if ( gblErrorCount
> 0 )
774 /* Make sure we are not writing to the same file as the input file. */
775 if ( inputFileName
!= 0 && outputFileName
!= 0 &&
776 strcmp( inputFileName
, outputFileName
) == 0 )
778 error() << "output file \"" << outputFileName
<<
779 "\" is the same as the input file" << endp
;
783 return frontend( inputFileName
, outputFileName
);
785 char *intermed
= openIntermed( inputFileName
, outputFileName
);
787 /* From here on in the cleanExit function should be used to exit. */
789 /* Run the frontend, then the backend processes. */
790 execFrontend( argv
[0], inputFileName
, intermed
);
791 execBackend( argv
[0], intermed
, outputFileName
);
793 /* Clean up the intermediate. */
794 cleanExit( intermed
, 0 );