Notes on version 6.3.
[ragel.git] / ragel / main.cpp
blob4b2a2a12aeb41dc937b8ac3043bd299602da70a0
1 /*
2 * Copyright 2001-2007 Adrian Thurston <thurston@cs.queensu.ca>
3 */
5 /* This file is part of Ragel.
7 * Ragel is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * Ragel is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Ragel; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 #include <stdlib.h>
23 #include <string.h>
24 #include <stdio.h>
25 #include <iostream>
26 #include <fstream>
27 #include <unistd.h>
28 #include <sstream>
29 #include <unistd.h>
30 #include <sys/types.h>
31 #include <sys/stat.h>
32 #include <fcntl.h>
33 #include <errno.h>
35 #ifndef _WIN32
36 #include <sys/wait.h>
37 #else
38 #include <windows.h>
39 #include <psapi.h>
40 #include <time.h>
41 #include <io.h>
42 #include <process.h>
44 #if _MSC_VER
45 #define S_IRUSR _S_IREAD
46 #define S_IWUSR _S_IWRITE
47 #endif
48 #endif
50 /* Parsing. */
51 #include "ragel.h"
52 #include "rlscan.h"
54 /* Parameters and output. */
55 #include "pcheck.h"
56 #include "vector.h"
57 #include "version.h"
58 #include "common.h"
60 using std::istream;
61 using std::ostream;
62 using std::ifstream;
63 using std::ofstream;
64 using std::cin;
65 using std::cout;
66 using std::cerr;
67 using std::endl;
68 using std::ios;
69 using std::streamsize;
71 /* Controls minimization. */
72 MinimizeLevel minimizeLevel = MinimizePartition2;
73 MinimizeOpt minimizeOpt = MinimizeMostOps;
75 /* Graphviz dot file generation. */
76 char *machineSpec = 0, *machineName = 0;
77 bool machineSpecFound = false;
78 bool wantDupsRemoved = true;
80 bool printStatistics = false;
81 bool frontendOnly = false;
82 bool generateDot = false;
84 ArgsVector frontendArgs;
85 ArgsVector backendArgs;
86 ArgsVector includePaths;
88 /* Print a summary of the options. */
89 void usage()
91 cout <<
92 "usage: ragel [options] file\n"
93 "general:\n"
94 " -h, -H, -?, --help Print this usage and exit\n"
95 " -v, --version Print version information and exit\n"
96 " -o <file> Write output to <file>\n"
97 " -s Print some statistics on stderr\n"
98 " -d Do not remove duplicates from action lists\n"
99 " -I <dir> Add <dir> to the list of directories to search\n"
100 " for included an imported files\n"
101 "error reporting format:\n"
102 " --error-format=gnu file:line:column: message (default)\n"
103 " --error-format=msvc file(line,column): message\n"
104 "fsm minimization:\n"
105 " -n Do not perform minimization\n"
106 " -m Minimize at the end of the compilation\n"
107 " -l Minimize after most operations (default)\n"
108 " -e Minimize after every operation\n"
109 "visualization:\n"
110 " -x Run the frontend only: emit XML intermediate format\n"
111 " -V Generate a dot file for Graphviz\n"
112 " -p Display printable characters on labels\n"
113 " -S <spec> FSM specification to output (for rlgen-dot)\n"
114 " -M <machine> Machine definition/instantiation to output (for rlgen-dot)\n"
115 "host language:\n"
116 " -C The host language is C, C++, Obj-C or Obj-C++ (default)\n"
117 " -D The host language is D\n"
118 " -J The host language is Java\n"
119 " -R The host language is Ruby\n"
120 " -A The host language is C#\n"
121 "line direcives: (C/D/C# only)\n"
122 " -L Inhibit writing of #line directives\n"
123 "code style: (C/Ruby/C# only)\n"
124 " -T0 Table driven FSM (default)\n"
125 " -T1 Faster table driven FSM\n"
126 " -F0 Flat table driven FSM\n"
127 " -F1 Faster flat table-driven FSM\n"
128 "code style: (C/C# only)\n"
129 " -G0 Goto-driven FSM\n"
130 " -G1 Faster goto-driven FSM\n"
131 "code style: (C only)\n"
132 " -G2 Really fast goto-driven FSM\n"
133 " -P<N> N-Way Split really fast goto-driven FSM\n"
136 exit(0);
139 /* Print version information and exit. */
140 void version()
142 cout << "Ragel State Machine Compiler version " VERSION << " " PUBDATE << endl <<
143 "Copyright (c) 2001-2007 by Adrian Thurston" << endl;
144 exit(0);
147 /* Error reporting format. */
148 ErrorFormat errorFormat = ErrorFormatGNU;
150 InputLoc makeInputLoc( const char *fileName, int line, int col)
152 InputLoc loc = { fileName, line, col };
153 return loc;
156 ostream &operator<<( ostream &out, const InputLoc &loc )
158 assert( loc.fileName != 0 );
159 switch ( errorFormat ) {
160 case ErrorFormatMSVC:
161 out << loc.fileName << "(" << loc.line;
162 if ( loc.col )
163 out << "," << loc.col;
164 out << ")";
165 break;
167 default:
168 out << loc.fileName << ":" << loc.line;
169 if ( loc.col )
170 out << ":" << loc.col;
171 break;
173 return out;
176 /* Total error count. */
177 int gblErrorCount = 0;
179 /* Print the opening to a warning in the input, then return the error ostream. */
180 ostream &warning( const InputLoc &loc )
182 cerr << loc << ": warning: ";
183 return cerr;
186 /* Print the opening to a program error, then return the error stream. */
187 ostream &error()
189 gblErrorCount += 1;
190 cerr << PROGNAME ": ";
191 return cerr;
194 ostream &error( const InputLoc &loc )
196 gblErrorCount += 1;
197 cerr << loc << ": ";
198 return cerr;
201 void escapeLineDirectivePath( std::ostream &out, char *path )
203 for ( char *pc = path; *pc != 0; pc++ ) {
204 if ( *pc == '\\' )
205 out << "\\\\";
206 else
207 out << *pc;
211 void processArgs( int argc, char **argv, char *&inputFileName, char *&outputFileName )
213 ParamCheck pc("xo:dnmleabjkS:M:I:CDJRAvHh?-:sT:F:G:P:LpV", argc, argv);
215 while ( pc.check() ) {
216 switch ( pc.state ) {
217 case ParamCheck::match:
218 switch ( pc.parameter ) {
219 case 'V':
220 generateDot = true;
221 break;
223 case 'x':
224 frontendOnly = true;
225 break;
227 /* Output. */
228 case 'o':
229 if ( *pc.paramArg == 0 )
230 error() << "a zero length output file name was given" << endl;
231 else if ( outputFileName != 0 )
232 error() << "more than one output file name was given" << endl;
233 else {
234 /* Ok, remember the output file name. */
235 outputFileName = pc.paramArg;
237 break;
239 /* Minimization, mostly hidden options. */
240 case 'd':
241 wantDupsRemoved = false;
242 frontendArgs.append( "-d" );
243 break;
245 /* Minimization, mostly hidden options. */
246 case 'n':
247 minimizeOpt = MinimizeNone;
248 frontendArgs.append( "-n" );
249 break;
250 case 'm':
251 minimizeOpt = MinimizeEnd;
252 frontendArgs.append( "-m" );
253 break;
254 case 'l':
255 minimizeOpt = MinimizeMostOps;
256 frontendArgs.append( "-l" );
257 break;
258 case 'e':
259 minimizeOpt = MinimizeEveryOp;
260 frontendArgs.append( "-e" );
261 break;
262 case 'a':
263 minimizeLevel = MinimizeApprox;
264 frontendArgs.append( "-a" );
265 break;
266 case 'b':
267 minimizeLevel = MinimizeStable;
268 frontendArgs.append( "-b" );
269 break;
270 case 'j':
271 minimizeLevel = MinimizePartition1;
272 frontendArgs.append( "-j" );
273 break;
274 case 'k':
275 minimizeLevel = MinimizePartition2;
276 frontendArgs.append( "-k" );
277 break;
279 /* Machine spec. */
280 case 'S':
281 if ( *pc.paramArg == 0 )
282 error() << "please specify an argument to -S" << endl;
283 else if ( machineSpec != 0 )
284 error() << "more than one -S argument was given" << endl;
285 else {
286 /* Ok, remember the path to the machine to generate. */
287 machineSpec = pc.paramArg;
288 frontendArgs.append( "-S" );
289 frontendArgs.append( pc.paramArg );
291 break;
293 /* Machine path. */
294 case 'M':
295 if ( *pc.paramArg == 0 )
296 error() << "please specify an argument to -M" << endl;
297 else if ( machineName != 0 )
298 error() << "more than one -M argument was given" << endl;
299 else {
300 /* Ok, remember the machine name to generate. */
301 machineName = pc.paramArg;
302 frontendArgs.append( "-M" );
303 frontendArgs.append( pc.paramArg );
305 break;
307 case 'I':
308 if ( *pc.paramArg == 0 )
309 error() << "please specify an argument to -I" << endl;
310 else {
311 includePaths.append( pc.paramArg );
312 frontendArgs.append( "-I" );
313 frontendArgs.append( pc.paramArg );
315 break;
317 /* Host language types. */
318 case 'C':
319 hostLang = &hostLangC;
320 frontendArgs.append( "-C" );
321 break;
322 case 'D':
323 hostLang = &hostLangD;
324 frontendArgs.append( "-D" );
325 break;
326 case 'J':
327 hostLang = &hostLangJava;
328 frontendArgs.append( "-J" );
329 break;
330 case 'R':
331 hostLang = &hostLangRuby;
332 frontendArgs.append( "-R" );
333 break;
334 case 'A':
335 hostLang = &hostLangCSharp;
336 frontendArgs.append( "-A" );
337 break;
339 /* Version and help. */
340 case 'v':
341 version();
342 break;
343 case 'H': case 'h': case '?':
344 usage();
345 break;
346 case 's':
347 printStatistics = true;
348 frontendArgs.append( "-s" );
349 break;
350 case '-': {
351 char *eq = strchr( pc.paramArg, '=' );
353 if ( eq != 0 )
354 *eq++ = 0;
356 if ( strcmp( pc.paramArg, "help" ) == 0 )
357 usage();
358 else if ( strcmp( pc.paramArg, "version" ) == 0 )
359 version();
360 else if ( strcmp( pc.paramArg, "error-format" ) == 0 ) {
361 if ( eq == 0 )
362 error() << "expecting '=value' for error-format" << endl;
363 else if ( strcmp( eq, "gnu" ) == 0 ) {
364 errorFormat = ErrorFormatGNU;
365 frontendArgs.append( "--error-format=gnu" );
367 else if ( strcmp( eq, "msvc" ) == 0 ) {
368 errorFormat = ErrorFormatMSVC;
369 frontendArgs.append( "--error-format=msvc" );
371 else {
372 error() << "invalid value for error-format" << endl;
375 else if ( strcmp( pc.paramArg, "rbx" ) == 0 )
376 backendArgs.append( "--rbx" );
377 else {
378 error() << "--" << pc.paramArg <<
379 " is an invalid argument" << endl;
381 break;
384 /* Passthrough args. */
385 case 'T':
386 backendArgs.append( "-T" );
387 backendArgs.append( pc.paramArg );
388 break;
389 case 'F':
390 backendArgs.append( "-F" );
391 backendArgs.append( pc.paramArg );
392 break;
393 case 'G':
394 backendArgs.append( "-G" );
395 backendArgs.append( pc.paramArg );
396 break;
397 case 'P':
398 backendArgs.append( "-P" );
399 backendArgs.append( pc.paramArg );
400 break;
401 case 'p':
402 backendArgs.append( "-p" );
403 break;
404 case 'L':
405 backendArgs.append( "-L" );
406 break;
408 break;
410 case ParamCheck::invalid:
411 error() << "-" << pc.parameter << " is an invalid argument" << endl;
412 break;
414 case ParamCheck::noparam:
415 /* It is interpreted as an input file. */
416 if ( *pc.curArg == 0 )
417 error() << "a zero length input file name was given" << endl;
418 else if ( inputFileName != 0 )
419 error() << "more than one input file name was given" << endl;
420 else {
421 /* OK, Remember the filename. */
422 inputFileName = pc.curArg;
424 break;
429 int frontend( char *inputFileName, char *outputFileName )
431 /* Open the input file for reading. */
432 assert( inputFileName != 0 );
433 ifstream *inFile = new ifstream( inputFileName );
434 istream *inStream = inFile;
435 if ( ! inFile->is_open() )
436 error() << "could not open " << inputFileName << " for reading" << endp;
438 /* Used for just a few things. */
439 std::ostringstream hostData;
441 if ( machineSpec == 0 && machineName == 0 )
442 hostData << "<host line=\"1\" col=\"1\">";
444 Scanner scanner( inputFileName, *inStream, hostData, 0, 0, 0, false );
445 scanner.do_scan();
447 /* Finished, final check for errors.. */
448 if ( gblErrorCount > 0 )
449 return 1;
451 /* Now send EOF to all parsers. */
452 terminateAllParsers();
454 /* Finished, final check for errors.. */
455 if ( gblErrorCount > 0 )
456 return 1;
458 if ( machineSpec == 0 && machineName == 0 )
459 hostData << "</host>\n";
461 if ( gblErrorCount > 0 )
462 return 1;
464 ostream *outputFile = 0;
465 if ( outputFileName != 0 )
466 outputFile = new ofstream( outputFileName );
467 else
468 outputFile = &cout;
470 /* Write the machines, then the surrounding code. */
471 writeMachines( *outputFile, hostData.str(), inputFileName );
473 /* Close the intermediate file. */
474 if ( outputFileName != 0 )
475 delete outputFile;
477 return gblErrorCount > 0;
480 char *makeIntermedTemplate( char *baseFileName )
482 char *result = 0;
483 const char *templ = "ragel-XXXXXX.xml";
484 char *lastSlash = strrchr( baseFileName, '/' );
485 if ( lastSlash == 0 ) {
486 result = new char[strlen(templ)+1];
487 strcpy( result, templ );
489 else {
490 int baseLen = lastSlash - baseFileName + 1;
491 result = new char[baseLen + strlen(templ) + 1];
492 memcpy( result, baseFileName, baseLen );
493 strcpy( result+baseLen, templ );
495 return result;
498 char *openIntermed( char *inputFileName, char *outputFileName )
500 srand(time(0));
501 char *result = 0;
503 /* Which filename do we use as the base? */
504 char *baseFileName = outputFileName != 0 ? outputFileName : inputFileName;
506 /* The template for the intermediate file name. */
507 char *intermedFileName = makeIntermedTemplate( baseFileName );
509 /* Randomize the name and try to open. */
510 char fnChars[] = "abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
511 char *firstX = strrchr( intermedFileName, 'X' ) - 5;
512 for ( int tries = 0; tries < 20; tries++ ) {
513 /* Choose a random name. */
514 for ( int x = 0; x < 6; x++ )
515 firstX[x] = fnChars[rand() % 52];
517 /* Try to open the file. */
518 int fd = ::open( intermedFileName, O_WRONLY|O_EXCL|O_CREAT, S_IRUSR|S_IWUSR );
520 if ( fd > 0 ) {
521 /* Success. Close the file immediately and return the name for use
522 * by the child processes. */
523 ::close( fd );
524 result = intermedFileName;
525 break;
528 if ( errno == EACCES ) {
529 error() << "failed to open temp file " << intermedFileName <<
530 ", access denied" << endp;
534 if ( result == 0 )
535 error() << "abnormal error: cannot find unique name for temp file" << endp;
537 return result;
541 void cleanExit( char *intermed, int status )
543 unlink( intermed );
544 exit( status );
547 #ifndef _WIN32
549 /* If any forward slash is found in argv0 then it is assumed that the path is
550 * explicit and the path to the backend executable should be derived from
551 * that. Whe check that location and also go up one then inside a directory of
552 * the same name in case we are executing from the source tree. If no forward
553 * slash is found it is assumed the file is being run from the installed
554 * location. The PREFIX supplied during configuration is used. */
555 char **makePathChecksUnix( const char *argv0, const char *progName )
557 char **result = new char*[3];
558 const char *lastSlash = strrchr( argv0, '/' );
559 int numChecks = 0;
561 if ( lastSlash != 0 ) {
562 char *path = strdup( argv0 );
563 int givenPathLen = (lastSlash - argv0) + 1;
564 path[givenPathLen] = 0;
566 int progNameLen = strlen(progName);
567 int length = givenPathLen + progNameLen + 1;
568 char *check = new char[length];
569 sprintf( check, "%s%s", path, progName );
570 result[numChecks++] = check;
572 length = givenPathLen + 3 + progNameLen + 1 + progNameLen + 1;
573 check = new char[length];
574 sprintf( check, "%s../%s/%s", path, progName, progName );
575 result[numChecks++] = check;
577 else {
578 int prefixLen = strlen(PREFIX);
579 int progNameLen = strlen(progName);
580 int length = prefixLen + 5 + progNameLen + 1;
581 char *check = new char[length];
583 sprintf( check, PREFIX "/bin/%s", progName );
584 result[numChecks++] = check;
587 result[numChecks] = 0;
588 return result;
592 void forkAndExec( const char *progName, char **pathChecks,
593 ArgsVector &args, char *intermed )
595 pid_t pid = fork();
596 if ( pid < 0 ) {
597 /* Error, no child created. */
598 error() << "failed to fork for " << progName << endl;
599 cleanExit( intermed, 1 );
601 else if ( pid == 0 ) {
602 /* child */
603 while ( *pathChecks != 0 ) {
604 /* Execv does not modify argv, it just uses the const form that is
605 * compatible with the most code. Ours not included. */
606 execv( *pathChecks, (char *const*) args.data );
607 pathChecks += 1;
609 error() << "failed to exec " << progName << endl;
610 cleanExit( intermed, 1 );
613 /* Parent process, wait for the child. */
614 int status;
615 wait( &status );
617 /* What happened with the child. */
618 if ( ! WIFEXITED( status ) ) {
619 error() << progName << " did not exit normally" << endl;
620 cleanExit( intermed, 1 );
623 if ( WEXITSTATUS(status) != 0 )
624 cleanExit( intermed, WEXITSTATUS(status) );
627 #else
629 /* GetModuleFileNameEx is used to find out where the the current process's
630 * binary is. That location is searched first. If that fails then we go up one
631 * directory and look for the executable inside a directory of the same name
632 * in case we are executing from the source tree.
633 * */
634 char **makePathChecksWin( const char *progName )
636 int len = 1024;
637 char *imageFileName = new char[len];
638 HANDLE h = GetCurrentProcess();
639 len = GetModuleFileNameEx( h, NULL, imageFileName, len );
640 imageFileName[len] = 0;
642 char **result = new char*[3];
643 const char *lastSlash = strrchr( imageFileName, '\\' );
644 int numChecks = 0;
646 assert( lastSlash != 0 );
647 char *path = strdup( imageFileName );
648 int givenPathLen = (lastSlash - imageFileName) + 1;
649 path[givenPathLen] = 0;
651 int progNameLen = strlen(progName);
652 int length = givenPathLen + progNameLen + 1;
653 char *check = new char[length];
654 sprintf( check, "%s%s", path, progName );
655 result[numChecks++] = check;
657 length = givenPathLen + 3 + progNameLen + 1 + progNameLen + 1;
658 check = new char[length];
659 sprintf( check, "%s..\\%s\\%s", path, progName, progName );
660 result[numChecks++] = check;
662 result[numChecks] = 0;
663 return result;
666 void spawn( const char *progName, char **pathChecks,
667 ArgsVector &args, char *intermed )
669 int result = 0;
670 while ( *pathChecks != 0 ) {
671 //cerr << "trying to execute " << *pathChecks << endl;
672 result = _spawnv( _P_WAIT, *pathChecks, args.data );
673 if ( result >= 0 || errno != ENOENT )
674 break;
675 pathChecks += 1;
678 if ( result < 0 ) {
679 error() << "failed to spawn " << progName << endl;
680 cleanExit( intermed, 1 );
683 if ( result > 0 )
684 cleanExit( intermed, 1 );
687 #endif
689 void execFrontend( const char *argv0, char *inputFileName, char *intermed )
691 /* The frontend program name. */
692 const char *progName = "ragel";
694 frontendArgs.insert( 0, progName );
695 frontendArgs.insert( 1, "-x" );
696 frontendArgs.append( "-o" );
697 frontendArgs.append( intermed );
698 frontendArgs.append( inputFileName );
699 frontendArgs.append( 0 );
701 #ifndef _WIN32
702 char **pathChecks = makePathChecksUnix( argv0, progName );
703 forkAndExec( progName, pathChecks, frontendArgs, intermed );
704 #else
705 char **pathChecks = makePathChecksWin( progName );
706 spawn( progName, pathChecks, frontendArgs, intermed );
707 #endif
710 void execBackend( const char *argv0, char *intermed, char *outputFileName )
712 /* Locate the backend program */
713 const char *progName = 0;
714 if ( generateDot )
715 progName = "rlgen-dot";
716 else {
717 switch ( hostLang->lang ) {
718 case HostLang::C:
719 case HostLang::D:
720 progName = "rlgen-cd";
721 break;
722 case HostLang::Java:
723 progName = "rlgen-java";
724 break;
725 case HostLang::Ruby:
726 progName = "rlgen-ruby";
727 break;
728 case HostLang::CSharp:
729 progName = "rlgen-csharp";
733 backendArgs.insert( 0, progName );
734 if ( outputFileName != 0 ) {
735 backendArgs.append( "-o" );
736 backendArgs.append( outputFileName );
738 backendArgs.append( intermed );
739 backendArgs.append( 0 );
741 #ifndef _WIN32
742 char **pathChecks = makePathChecksUnix( argv0, progName );
743 forkAndExec( progName, pathChecks, backendArgs, intermed );
744 #else
745 char **pathChecks = makePathChecksWin( progName );
746 spawn( progName, pathChecks, backendArgs, intermed );
747 #endif
750 /* Main, process args and call yyparse to start scanning input. */
751 int main(int argc, char **argv)
753 char *inputFileName = 0;
754 char *outputFileName = 0;
756 processArgs( argc, argv, inputFileName, outputFileName );
758 /* If -M or -S are given and we're not generating a dot file then invoke
759 * the frontend. These options are not useful with code generators. */
760 if ( machineName != 0 || machineSpec != 0 ) {
761 if ( !generateDot )
762 frontendOnly = true;
765 /* Require an input file. If we use standard in then we won't have a file
766 * name on which to base the output. */
767 if ( inputFileName == 0 )
768 error() << "no input file given" << endl;
770 /* Bail on argument processing errors. */
771 if ( gblErrorCount > 0 )
772 exit(1);
774 /* Make sure we are not writing to the same file as the input file. */
775 if ( inputFileName != 0 && outputFileName != 0 &&
776 strcmp( inputFileName, outputFileName ) == 0 )
778 error() << "output file \"" << outputFileName <<
779 "\" is the same as the input file" << endp;
782 if ( frontendOnly )
783 return frontend( inputFileName, outputFileName );
785 char *intermed = openIntermed( inputFileName, outputFileName );
787 /* From here on in the cleanExit function should be used to exit. */
789 /* Run the frontend, then the backend processes. */
790 execFrontend( argv[0], inputFileName, intermed );
791 execBackend( argv[0], intermed, outputFileName );
793 /* Clean up the intermediate. */
794 cleanExit( intermed, 0 );
796 return 0;