2 --[[--------------------------------------------------------------------
4 Gazelle: a system for building fast, reusable parsers
8 The top-level file for compiling an input grammar (written in a
9 human-readable text format) into a compiled grammar in Bitcode.
11 Copyright (c) 2007 Joshua Haberman. See LICENSE for details.
13 --------------------------------------------------------------------]]--
15 -- Include functionality for all parts of the compiler.
17 -- parsing of the input grammar file
18 require "bootstrap/rtn"
24 -- lookahead calculation
26 require "intfa_combine"
34 Gazelle grammar compiler (v0.2-prerelase)
35 gzlc [options] input-file
38 -h, --help you're looking at it.
40 -o <file> output filename. Default is input filename
41 with extension replaced with .gzc
43 -v, --verbose dump information about compilation process and
46 --version dump Gazelle version
49 version = "Gazelle v0.2-prerelease"
56 while argnum <= #arg do
58 if a == "-h" or a == "--help" then
63 output_filename = arg[argnum]
64 if output_filename == nil then
65 stderr:write("gzlc: argument -o must be followed by a file name\n")
68 elseif a == "-v" or a == "--verbose" then
70 elseif a == "--version" then
74 if input_filename then
75 stderr:write("gzlc: only one input file may be specified\n")
78 input_filename = arg[argnum]
83 if input_filename == nil then
84 io.stderr:write("gzlc: no input file\n")
88 if output_filename == nil then
89 output_file = input_filename:gsub("%.[^%.]*$", "") .. ".gzc"
92 function print_verbose(str)
98 function write_verbose(str)
104 print_verbose(version)
107 -- We need to generate and emit RTNs, GLAs, and IntFAs. We work from the
108 -- top down: RTNs are generated from parsing the grammar, GLAs are
109 -- calculated from the RTNs by LL lookahead routines, and finally
110 -- IntFAs are generated from the RTNs and GLAs.
112 -- open and parse the grammar file
114 print_verbose(string.format("Opening input file '%s'...", input_filename))
115 input_file = io.open(input_filename, "r")
116 grm_str = input_file:read("*a")
117 if not input_file then
118 stderr:write(string.format("gzlc: couldn't open input file '%s'", input_filename))
121 print_verbose("Parsing grammar...")
122 grammar = parse_grammar(CharStream:new(grm_str))
124 -- make the RTNs in the grammar determistic and minimal
125 write_verbose("Convering RTN NFAs to DFAs...")
126 grammar:determinize_rtns()
127 write_verbose("Minimizing RTN DFAs...")
128 grammar:minimize_rtns()
130 -- Generate GLAs by doing lookahead calculations.
131 -- This annotates every nontrivial state in the grammar with a GLA.
133 print_verbose(string.format("Doing LL(k) lookahead calculations (capped at k=%d)", k))
134 compute_lookahead(grammar, k)
137 -- we now have everything figured out at the RTN level. Now we just need
138 -- to figure out how many IntFAs to generate, which terminals each one
139 -- should handle, and generate/determinize/minimize those IntFAs.
140 -- print_verbose("Combining lexer IntFAs...")
141 -- intfas = intfa_combine(attributes.terminals, grammar)