1 --[[--------------------------------------------------------------------
3 Gazelle: a system for building fast, reusable parsers
7 Code that takes the final optimized parsing structures and emits them
8 to bytecode (in Bitcode format).
10 Copyright (c) 2007-2008 Joshua Haberman. See LICENSE for details.
12 --------------------------------------------------------------------]]--
16 -- See FILEFORMAT for details about what these constants mean
26 BC_INTFA_FINAL_STATE
= 1
27 BC_INTFA_TRANSITION
= 2
28 BC_INTFA_TRANSITION_RANGE
= 3
33 BC_RTN_STATE_WITH_INTFA
= 2
34 BC_RTN_STATE_WITH_GLA
= 3
35 BC_RTN_TRIVIAL_STATE
= 4
36 BC_RTN_TRANSITION_TERMINAL
= 5
37 BC_RTN_TRANSITION_NONTERM
= 6
40 BC_GLA_FINAL_STATE
= 1
43 if not print_verbose
then
44 function print_verbose(str
)
49 function write_bytecode(grammar
, outfilename
)
50 -- write Bitcode header
51 bc_file
= bc
.File
:new(outfilename
, "GH")
52 abbrevs
= define_abbrevs(bc_file
)
54 -- Obtain linearized representations of all the DFAs from the Grammar object.
55 local strings
= grammar
:get_strings()
56 local rtns
= grammar
:get_flattened_rtn_list()
57 local glas
= grammar
:get_flattened_gla_list()
58 local intfas
= grammar
.master_intfas
61 print_verbose(string.format("Writing %d strings...", strings
:count()))
62 bc_file
:enter_subblock(BC_STRINGS
)
63 for string in each(strings
) do
64 bc_file
:write_abbreviated_record(abbrevs
.bc_string
, string)
66 bc_file
:end_subblock(BC_STRINGS
)
69 print_verbose(string.format("Writing %d IntFAs...", intfas
:count()))
70 bc_file
:enter_subblock(BC_INTFAS
)
71 for intfa
in each(intfas
) do
72 emit_intfa(intfa
, strings
, bc_file
, abbrevs
)
74 bc_file
:end_subblock(BC_INTFAS
)
77 print_verbose(string.format("Writing %d GLAs...", glas
:count()))
78 bc_file
:enter_subblock(BC_GLAS
)
79 for gla
in each(glas
) do
80 emit_gla(gla
, strings
, rtns
, intfas
, bc_file
, abbrevs
)
82 bc_file
:end_subblock(BC_GLAS
)
85 bc_file
:enter_subblock(BC_RTNS
)
86 print_verbose(string.format("Writing %d RTNs...", rtns
:count()))
87 for name
, rtn
in each(rtns
) do
88 emit_rtn(name
, rtn
, rtns
, glas
, intfas
, strings
, bc_file
, abbrevs
)
90 bc_file
:end_subblock(BC_RTNS
)
95 function emit_intfa(intfa
, strings
, bc_file
, abbrevs
)
96 bc_file
:enter_subblock(BC_INTFA
)
98 local intfa_state_offsets
= {}
99 local intfa_transitions
= {}
101 -- order the states such that the start state is emitted first
102 local states
= intfa
:states()
103 states
:remove(intfa
.start
)
104 states
= states
:to_array()
105 table.insert(states
, 1, intfa
.start
)
107 -- do a first pass over the states that records their order and builds
108 -- each state's list of transitions.
109 local state_transitions
= {}
110 for i
, state
in ipairs(states
) do
111 intfa_state_offsets
[state
] = i
- 1
113 state_transitions
[state
] = {}
114 for edge_val
, target_state
, properties
in state
:transitions() do
115 for range
in edge_val
:each_range() do
116 table.insert(state_transitions
[state
], {range
, target_state
})
120 -- sort the transitions into a stable order, to make the output
121 -- more deterministic.
122 table.sort(state_transitions
[state
], function (a
, b
) return a
[1].low
< b
[1].low
end)
124 -- add this state's transitions to the global list of transitions for the IntFA
125 for t
in each(state_transitions
[state
])
126 do table.insert(intfa_transitions
, t
)
130 print_verbose(string.format(" %d states, %d transitions", #states
, #intfa_transitions
))
133 for state
in each(states
) do
135 bc_file
:write_abbreviated_record(abbrevs
.bc_intfa_final_state
,
136 #state_transitions
[state
],
137 strings
:offset_of(state
.final
))
139 bc_file
:write_abbreviated_record(abbrevs
.bc_intfa_state
, #state_transitions
[state
])
143 -- emit the transitions
144 for transition
in each(intfa_transitions
) do
145 local range
, target_state
= unpack(transition
)
146 target_state_offset
= intfa_state_offsets
[target_state
]
147 if range
.low
== range
.high
then
148 bc_file
:write_abbreviated_record(abbrevs
.bc_intfa_transition
, range
.low
, target_state_offset
)
150 local high
= range
.high
151 if high
== math
.huge
then high
= 255 end -- temporary ASCII-specific hack
152 bc_file
:write_abbreviated_record(abbrevs
.bc_intfa_transition_range
, range
.low
, high
, target_state_offset
)
156 bc_file
:end_subblock(BC_INTFA
)
159 function emit_gla(gla
, strings
, rtns
, intfas
, bc_file
, abbrevs
)
160 bc_file
:enter_subblock(BC_GLA
)
162 local states
= OrderedSet
:new()
163 states
:add(gla
.start
)
164 for state
in each(gla
:states()) do
165 if state
~= gla
.start
then
171 for state
in each(states
) do
173 -- figure out the offset of the RTN transition this GLA final state implies.
174 local ordered_rtn
= rtns
:get(gla
.rtn_state
.rtn
.name
)
175 local transitions
= ordered_rtn
.transitions
[gla
.rtn_state
]
176 local transition_offset
= nil
177 if state
.final
[1] == 0 and state
.final
[2] == 0 then
178 transition_offset
= 0
180 for i
=1,#transitions
do
181 if transitions
[i
][1] == state
.final
[1] and transitions
[i
][2] == state
.final
[2] then
182 transition_offset
= i
188 if transition_offset
== nil then
189 error("GLA final state indicated a state that was not found in the RTN state.")
191 bc_file
:write_abbreviated_record(abbrevs
.bc_gla_final_state
, transition_offset
)
193 bc_file
:write_abbreviated_record(abbrevs
.bc_gla_state
,
194 intfas
:offset_of(state
.intfa
),
195 state
:num_transitions())
200 for state
in each(states
) do
201 for edge_val
, dest_state
in state
:transitions() do
203 if edge_val
~= fa
.eof
then
204 edge_num
= strings
:offset_of(edge_val
)
206 bc_file
:write_abbreviated_record(abbrevs
.bc_gla_transition
,
208 states
:offset_of(dest_state
))
212 bc_file
:end_subblock(BC_GLA
)
215 function emit_rtn(name
, rtn
, rtns
, glas
, intfas
, strings
, bc_file
, abbrevs
)
217 bc_file
:enter_subblock(BC_RTN
)
218 bc_file
:write_abbreviated_record(abbrevs
.bc_rtn_info
, strings
:offset_of(name
), rtn
.slot_count
)
221 for state
in each(rtn
.states
) do
230 bc_file
:write_abbreviated_record(abbrevs
.bc_rtn_state_with_gla
,
231 #rtn
.transitions
[state
],
233 glas
:offset_of(state
.gla
))
234 elseif state
.intfa
then
235 bc_file
:write_abbreviated_record(abbrevs
.bc_rtn_state_with_intfa
,
236 #rtn
.transitions
[state
],
238 intfas
:offset_of(state
.intfa
))
240 bc_file
:write_abbreviated_record(abbrevs
.bc_rtn_trivial_state
,
241 #rtn
.transitions
[state
],
247 for state
in each(rtn
.states
) do
248 for transition
in each(rtn
.transitions
[state
]) do
249 local edge_val
, dest_state
, properties
= unpack(transition
)
250 if fa
.is_nonterm(edge_val
) then
251 bc_file
:write_abbreviated_record(abbrevs
.bc_rtn_transition_nonterm
,
252 rtns
:offset_of_key(edge_val
.name
),
253 rtn
.states
:offset_of(dest_state
),
254 strings
:offset_of(properties
.name
),
255 properties
.slotnum
-1)
257 bc_file
:write_abbreviated_record(abbrevs
.bc_rtn_transition_terminal
,
258 strings
:offset_of(edge_val
),
259 rtn
.states
:offset_of(dest_state
),
260 strings
:offset_of(properties
.name
),
261 properties
.slotnum
-1)
266 bc_file
:end_subblock(BC_RTN
)
270 function define_abbrevs(bc_file
)
273 -- Enter a BLOCKINFO record to define abbreviations for all our records.
274 -- See FILEFORMAT for a description of what all the record types mean.
275 bc_file
:enter_subblock(bc
.BLOCKINFO
)
277 -- IntFA abbreviations
278 bc_file
:write_unabbreviated_record(bc
.SETBID
, BC_INTFA
)
280 abbrevs
.bc_intfa_final_state
= bc_file
:define_abbreviation(4,
281 bc
.LiteralOp
:new(BC_INTFA_FINAL_STATE
),
285 abbrevs
.bc_intfa_state
= bc_file
:define_abbreviation(5,
286 bc
.LiteralOp
:new(BC_INTFA_STATE
),
289 abbrevs
.bc_intfa_transition
= bc_file
:define_abbreviation(6,
290 bc
.LiteralOp
:new(BC_INTFA_TRANSITION
),
294 abbrevs
.bc_intfa_transition_range
= bc_file
:define_abbreviation(7,
295 bc
.LiteralOp
:new(BC_INTFA_TRANSITION_RANGE
),
300 -- Strings abbreviations
301 bc_file
:write_unabbreviated_record(bc
.SETBID
, BC_STRINGS
)
303 abbrevs
.bc_string
= bc_file
:define_abbreviation(4,
304 bc
.LiteralOp
:new(BC_STRING
),
305 bc
.ArrayOp
:new(bc
.FixedOp
:new(7)))
308 bc_file
:write_unabbreviated_record(bc
.SETBID
, BC_RTN
)
310 abbrevs
.bc_rtn_info
= bc_file
:define_abbreviation(4,
311 bc
.LiteralOp
:new(BC_RTN_INFO
),
315 abbrevs
.bc_rtn_state_with_intfa
= bc_file
:define_abbreviation(5,
316 bc
.LiteralOp
:new(BC_RTN_STATE_WITH_INTFA
),
321 abbrevs
.bc_rtn_state_with_gla
= bc_file
:define_abbreviation(6,
322 bc
.LiteralOp
:new(BC_RTN_STATE_WITH_GLA
),
327 abbrevs
.bc_rtn_trivial_state
= bc_file
:define_abbreviation(7,
328 bc
.LiteralOp
:new(BC_RTN_TRIVIAL_STATE
),
332 abbrevs
.bc_rtn_transition_terminal
= bc_file
:define_abbreviation(8,
333 bc
.LiteralOp
:new(BC_RTN_TRANSITION_TERMINAL
),
339 abbrevs
.bc_rtn_transition_nonterm
= bc_file
:define_abbreviation(9,
340 bc
.LiteralOp
:new(BC_RTN_TRANSITION_NONTERM
),
347 bc_file
:write_unabbreviated_record(bc
.SETBID
, BC_GLA
)
349 abbrevs
.bc_gla_state
= bc_file
:define_abbreviation(4,
350 bc
.LiteralOp
:new(BC_GLA_STATE
),
354 abbrevs
.bc_gla_final_state
= bc_file
:define_abbreviation(5,
355 bc
.LiteralOp
:new(BC_GLA_FINAL_STATE
),
358 abbrevs
.bc_gla_transition
= bc_file
:define_abbreviation(6,
359 bc
.LiteralOp
:new(BC_GLA_TRANSITION
),
363 bc_file
:end_subblock(bc
.BLOCKINFO
)