Fixes for command-line handling in gzlc.
[gazelle.git] / compiler / bytecode.lua
blob11f771b641ee37a20025d34e8be0575314daaf84
1 --[[--------------------------------------------------------------------
3 Gazelle: a system for building fast, reusable parsers
5 bytecode.lua
7 Code that takes the final optimized parsing structures and emits them
8 to bytecode (in Bitcode format).
10 Copyright (c) 2007-2008 Joshua Haberman. See LICENSE for details.
12 --------------------------------------------------------------------]]--
14 require "bc"
16 -- See FILEFORMAT for details about what these constants mean
17 BC_INTFAS = 8
18 BC_INTFA = 9
19 BC_STRINGS = 10
20 BC_RTNS = 11
21 BC_RTN = 12
22 BC_GLAS = 13
23 BC_GLA = 14
25 BC_INTFA_STATE = 0
26 BC_INTFA_FINAL_STATE = 1
27 BC_INTFA_TRANSITION = 2
28 BC_INTFA_TRANSITION_RANGE = 3
30 BC_STRING = 0
32 BC_RTN_INFO = 0
33 BC_RTN_STATE_WITH_INTFA = 2
34 BC_RTN_STATE_WITH_GLA = 3
35 BC_RTN_TRIVIAL_STATE = 4
36 BC_RTN_TRANSITION_TERMINAL = 5
37 BC_RTN_TRANSITION_NONTERM = 6
39 BC_GLA_STATE = 0
40 BC_GLA_FINAL_STATE = 1
41 BC_GLA_TRANSITION = 2
43 if not print_verbose then
44 function print_verbose(str)
45 print(str)
46 end
47 end
49 function write_bytecode(grammar, outfilename)
50 -- write Bitcode header
51 bc_file = bc.File:new(outfilename, "GH")
52 abbrevs = define_abbrevs(bc_file)
54 -- Obtain linearized representations of all the DFAs from the Grammar object.
55 local strings = grammar:get_strings()
56 local rtns = grammar:get_flattened_rtn_list()
57 local glas = grammar:get_flattened_gla_list()
58 local intfas = grammar.master_intfas
60 -- emit the strings
61 print_verbose(string.format("Writing %d strings...", strings:count()))
62 bc_file:enter_subblock(BC_STRINGS)
63 for string in each(strings) do
64 bc_file:write_abbreviated_record(abbrevs.bc_string, string)
65 end
66 bc_file:end_subblock(BC_STRINGS)
68 -- emit the intfas
69 print_verbose(string.format("Writing %d IntFAs...", intfas:count()))
70 bc_file:enter_subblock(BC_INTFAS)
71 for intfa in each(intfas) do
72 emit_intfa(intfa, strings, bc_file, abbrevs)
73 end
74 bc_file:end_subblock(BC_INTFAS)
76 -- emit the GLAs
77 print_verbose(string.format("Writing %d GLAs...", glas:count()))
78 bc_file:enter_subblock(BC_GLAS)
79 for gla in each(glas) do
80 emit_gla(gla, strings, rtns, intfas, bc_file, abbrevs)
81 end
82 bc_file:end_subblock(BC_GLAS)
84 -- emit the RTNs
85 bc_file:enter_subblock(BC_RTNS)
86 print_verbose(string.format("Writing %d RTNs...", rtns:count()))
87 for name, rtn in each(rtns) do
88 emit_rtn(name, rtn, rtns, glas, intfas, strings, bc_file, abbrevs)
89 end
90 bc_file:end_subblock(BC_RTNS)
92 end
95 function emit_intfa(intfa, strings, bc_file, abbrevs)
96 bc_file:enter_subblock(BC_INTFA)
98 local intfa_state_offsets = {}
99 local intfa_transitions = {}
101 -- order the states such that the start state is emitted first
102 local states = intfa:states()
103 states:remove(intfa.start)
104 states = states:to_array()
105 table.insert(states, 1, intfa.start)
107 -- do a first pass over the states that records their order and builds
108 -- each state's list of transitions.
109 local state_transitions = {}
110 for i, state in ipairs(states) do
111 intfa_state_offsets[state] = i - 1
113 state_transitions[state] = {}
114 for edge_val, target_state, properties in state:transitions() do
115 for range in edge_val:each_range() do
116 table.insert(state_transitions[state], {range, target_state})
120 -- sort the transitions into a stable order, to make the output
121 -- more deterministic.
122 table.sort(state_transitions[state], function (a, b) return a[1].low < b[1].low end)
124 -- add this state's transitions to the global list of transitions for the IntFA
125 for t in each(state_transitions[state])
126 do table.insert(intfa_transitions, t)
130 print_verbose(string.format(" %d states, %d transitions", #states, #intfa_transitions))
132 -- emit the states
133 for state in each(states) do
134 if state.final then
135 bc_file:write_abbreviated_record(abbrevs.bc_intfa_final_state,
136 #state_transitions[state],
137 strings:offset_of(state.final))
138 else
139 bc_file:write_abbreviated_record(abbrevs.bc_intfa_state, #state_transitions[state])
143 -- emit the transitions
144 for transition in each(intfa_transitions) do
145 local range, target_state = unpack(transition)
146 target_state_offset = intfa_state_offsets[target_state]
147 if range.low == range.high then
148 bc_file:write_abbreviated_record(abbrevs.bc_intfa_transition, range.low, target_state_offset)
149 else
150 local high = range.high
151 if high == math.huge then high = 255 end -- temporary ASCII-specific hack
152 bc_file:write_abbreviated_record(abbrevs.bc_intfa_transition_range, range.low, high, target_state_offset)
156 bc_file:end_subblock(BC_INTFA)
159 function emit_gla(gla, strings, rtns, intfas, bc_file, abbrevs)
160 bc_file:enter_subblock(BC_GLA)
162 local states = OrderedSet:new()
163 states:add(gla.start)
164 for state in each(gla:states()) do
165 if state ~= gla.start then
166 states:add(state)
170 -- emit states
171 for state in each(states) do
172 if state.final then
173 -- figure out the offset of the RTN transition this GLA final state implies.
174 local ordered_rtn = rtns:get(gla.rtn_state.rtn.name)
175 local transitions = ordered_rtn.transitions[gla.rtn_state]
176 local transition_offset = nil
177 if state.final[1] == 0 and state.final[2] == 0 then
178 transition_offset = 0
179 else
180 for i=1,#transitions do
181 if transitions[i][1] == state.final[1] and transitions[i][2] == state.final[2] then
182 transition_offset = i
183 break
188 if transition_offset == nil then
189 error("GLA final state indicated a state that was not found in the RTN state.")
191 bc_file:write_abbreviated_record(abbrevs.bc_gla_final_state, transition_offset)
192 else
193 bc_file:write_abbreviated_record(abbrevs.bc_gla_state,
194 intfas:offset_of(state.intfa),
195 state:num_transitions())
199 -- emit transitions
200 for state in each(states) do
201 for edge_val, dest_state in state:transitions() do
202 local edge_num = 0
203 if edge_val ~= fa.eof then
204 edge_num = strings:offset_of(edge_val)
206 bc_file:write_abbreviated_record(abbrevs.bc_gla_transition,
207 edge_num,
208 states:offset_of(dest_state))
212 bc_file:end_subblock(BC_GLA)
215 function emit_rtn(name, rtn, rtns, glas, intfas, strings, bc_file, abbrevs)
216 -- emit RTN name
217 bc_file:enter_subblock(BC_RTN)
218 bc_file:write_abbreviated_record(abbrevs.bc_rtn_info, strings:offset_of(name), rtn.slot_count)
220 -- emit states
221 for state in each(rtn.states) do
222 local is_final
223 if state.final then
224 is_final = 1
225 else
226 is_final = 0
229 if state.gla then
230 bc_file:write_abbreviated_record(abbrevs.bc_rtn_state_with_gla,
231 #rtn.transitions[state],
232 is_final,
233 glas:offset_of(state.gla))
234 elseif state.intfa then
235 bc_file:write_abbreviated_record(abbrevs.bc_rtn_state_with_intfa,
236 #rtn.transitions[state],
237 is_final,
238 intfas:offset_of(state.intfa))
239 else
240 bc_file:write_abbreviated_record(abbrevs.bc_rtn_trivial_state,
241 #rtn.transitions[state],
242 is_final)
246 -- emit transitions
247 for state in each(rtn.states) do
248 for transition in each(rtn.transitions[state]) do
249 local edge_val, dest_state, properties = unpack(transition)
250 if fa.is_nonterm(edge_val) then
251 bc_file:write_abbreviated_record(abbrevs.bc_rtn_transition_nonterm,
252 rtns:offset_of_key(edge_val.name),
253 rtn.states:offset_of(dest_state),
254 strings:offset_of(properties.name),
255 properties.slotnum-1)
256 else
257 bc_file:write_abbreviated_record(abbrevs.bc_rtn_transition_terminal,
258 strings:offset_of(edge_val),
259 rtn.states:offset_of(dest_state),
260 strings:offset_of(properties.name),
261 properties.slotnum-1)
266 bc_file:end_subblock(BC_RTN)
270 function define_abbrevs(bc_file)
271 abbrevs = {}
273 -- Enter a BLOCKINFO record to define abbreviations for all our records.
274 -- See FILEFORMAT for a description of what all the record types mean.
275 bc_file:enter_subblock(bc.BLOCKINFO)
277 -- IntFA abbreviations
278 bc_file:write_unabbreviated_record(bc.SETBID, BC_INTFA)
280 abbrevs.bc_intfa_final_state = bc_file:define_abbreviation(4,
281 bc.LiteralOp:new(BC_INTFA_FINAL_STATE),
282 bc.VBROp:new(5),
283 bc.VBROp:new(5))
285 abbrevs.bc_intfa_state = bc_file:define_abbreviation(5,
286 bc.LiteralOp:new(BC_INTFA_STATE),
287 bc.VBROp:new(5))
289 abbrevs.bc_intfa_transition = bc_file:define_abbreviation(6,
290 bc.LiteralOp:new(BC_INTFA_TRANSITION),
291 bc.VBROp:new(8),
292 bc.VBROp:new(6))
294 abbrevs.bc_intfa_transition_range = bc_file:define_abbreviation(7,
295 bc.LiteralOp:new(BC_INTFA_TRANSITION_RANGE),
296 bc.VBROp:new(8),
297 bc.VBROp:new(8),
298 bc.VBROp:new(6))
300 -- Strings abbreviations
301 bc_file:write_unabbreviated_record(bc.SETBID, BC_STRINGS)
303 abbrevs.bc_string = bc_file:define_abbreviation(4,
304 bc.LiteralOp:new(BC_STRING),
305 bc.ArrayOp:new(bc.FixedOp:new(7)))
307 -- RTN abbreviations
308 bc_file:write_unabbreviated_record(bc.SETBID, BC_RTN)
310 abbrevs.bc_rtn_info = bc_file:define_abbreviation(4,
311 bc.LiteralOp:new(BC_RTN_INFO),
312 bc.VBROp:new(6),
313 bc.VBROp:new(4))
315 abbrevs.bc_rtn_state_with_intfa = bc_file:define_abbreviation(5,
316 bc.LiteralOp:new(BC_RTN_STATE_WITH_INTFA),
317 bc.VBROp:new(4),
318 bc.FixedOp:new(1),
319 bc.VBROp:new(4))
321 abbrevs.bc_rtn_state_with_gla = bc_file:define_abbreviation(6,
322 bc.LiteralOp:new(BC_RTN_STATE_WITH_GLA),
323 bc.VBROp:new(4),
324 bc.FixedOp:new(1),
325 bc.VBROp:new(4))
327 abbrevs.bc_rtn_trivial_state = bc_file:define_abbreviation(7,
328 bc.LiteralOp:new(BC_RTN_TRIVIAL_STATE),
329 bc.FixedOp:new(1),
330 bc.FixedOp:new(1))
332 abbrevs.bc_rtn_transition_terminal = bc_file:define_abbreviation(8,
333 bc.LiteralOp:new(BC_RTN_TRANSITION_TERMINAL),
334 bc.VBROp:new(6),
335 bc.VBROp:new(5),
336 bc.VBROp:new(5),
337 bc.VBROp:new(4))
339 abbrevs.bc_rtn_transition_nonterm = bc_file:define_abbreviation(9,
340 bc.LiteralOp:new(BC_RTN_TRANSITION_NONTERM),
341 bc.VBROp:new(6),
342 bc.VBROp:new(5),
343 bc.VBROp:new(5),
344 bc.VBROp:new(4))
346 -- GLA abbreviations
347 bc_file:write_unabbreviated_record(bc.SETBID, BC_GLA)
349 abbrevs.bc_gla_state = bc_file:define_abbreviation(4,
350 bc.LiteralOp:new(BC_GLA_STATE),
351 bc.VBROp:new(4),
352 bc.VBROp:new(4))
354 abbrevs.bc_gla_final_state = bc_file:define_abbreviation(5,
355 bc.LiteralOp:new(BC_GLA_FINAL_STATE),
356 bc.VBROp:new(4))
358 abbrevs.bc_gla_transition = bc_file:define_abbreviation(6,
359 bc.LiteralOp:new(BC_GLA_TRANSITION),
360 bc.VBROp:new(4),
361 bc.VBROp:new(4))
363 bc_file:end_subblock(bc.BLOCKINFO)
365 return abbrevs
368 -- vim:et:sts=2:sw=2