1 --[[--------------------------------------------------------------------
3 Gazelle: a system for building fast, reusable parsers
7 Code that takes the final optimized parsing structures and emits them
8 to bytecode (in Bitcode format).
10 Copyright (c) 2007 Joshua Haberman. See LICENSE for details.
12 --------------------------------------------------------------------]]--
16 -- See FILEFORMAT for details about what these constants mean
26 BC_INTFA_FINAL_STATE
= 1
27 BC_INTFA_TRANSITION
= 2
28 BC_INTFA_TRANSITION_RANGE
= 3
34 BC_RTN_STATE_WITH_GLA
= 4
35 BC_RTN_TRIVIAL_FINAL_STATE
= 6
36 BC_RTN_TRANSITION_TERMINAL
= 2
37 BC_RTN_TRANSITION_NONTERM
= 3
41 BC_GLA_FINAL_STATE
= 1
45 function write_bytecode(grammar
, outfilename
)
46 -- write Bitcode header
47 bc_file
= bc
.File
:new(outfilename
, "GH")
48 abbrevs
= define_abbrevs(bc_file
)
50 print(string.format("Writing grammar to disk..."))
52 -- Obtain linearized representations of all the DFAs from the Grammar object.
53 local strings
= grammar
:get_strings()
54 local rtns
= grammar
:get_flattened_rtn_list()
55 local glas
= grammar
:get_flattened_gla_list()
56 local intfas
= grammar
.master_intfas
59 print(string.format("Writing %d strings...", strings
:count()))
60 bc_file
:enter_subblock(BC_STRINGS
)
61 for string in each(strings
) do
62 bc_file
:write_abbreviated_record(abbrevs
.bc_string
, string)
64 bc_file
:end_subblock(BC_STRINGS
)
67 print(string.format("Writing %d IntFAs...", intfas
:count()))
68 bc_file
:enter_subblock(BC_INTFAS
)
69 for intfa
in each(intfas
) do
70 emit_intfa(intfa
, strings
, bc_file
, abbrevs
)
72 bc_file
:end_subblock(BC_INTFAS
)
75 print(string.format("Writing %d GLAs...", glas
:count()))
76 bc_file
:enter_subblock(BC_GLAS
)
77 for gla
in each(glas
) do
78 emit_gla(gla
, bc_file
, abbrevs
)
80 bc_file
:end_subblock(BC_GLAS
)
83 bc_file
:enter_subblock(BC_RTNS
)
84 print(string.format("Writing %d RTNs...", rtns
:count()))
85 for name
, rtn
in each(rtns
) do
86 emit_rtn(name
, rtn
, rtns
, glas
, intfas
, strings
, bc_file
, abbrevs
)
88 bc_file
:end_subblock(BC_RTNS
)
93 function emit_intfa(intfa
, strings
, bc_file
, abbrevs
)
94 bc_file
:enter_subblock(BC_INTFA
)
96 local intfa_state_offsets
= {}
97 local intfa_transitions
= {}
99 -- order the states such that the start state is emitted first
100 local states
= intfa
:states()
101 states
:remove(intfa
.start
)
102 states
= states
:to_array()
103 table.insert(states
, 1, intfa
.start
)
105 -- do a first pass over the states that records their order and builds
106 -- each state's list of transitions.
107 local state_transitions
= {}
108 for i
, state
in ipairs(states
) do
109 intfa_state_offsets
[state
] = i
- 1
111 state_transitions
[state
] = {}
112 for edge_val
, target_state
, properties
in state
:transitions() do
113 for range
in edge_val
:each_range() do
114 table.insert(state_transitions
[state
], {range
, target_state
})
118 -- sort the transitions into a stable order, to make the output
119 -- more deterministic.
120 table.sort(state_transitions
[state
], function (a
, b
) return a
[1].low
< b
[1].low
end)
122 -- add this state's transitions to the global list of transitions for the IntFA
123 for t
in each(state_transitions
[state
])
124 do table.insert(intfa_transitions
, t
)
128 print(string.format(" %d states, %d transitions", #states
, #intfa_transitions
))
131 for state
in each(states
) do
133 bc_file
:write_abbreviated_record(abbrevs
.bc_intfa_final_state
,
134 #state_transitions
[state
],
135 strings
:offset_of(state
.final
))
137 bc_file
:write_abbreviated_record(abbrevs
.bc_intfa_state
, #state_transitions
[state
])
141 -- emit the transitions
142 for transition
in each(intfa_transitions
) do
143 local range
, target_state
= unpack(transition
)
144 target_state_offset
= intfa_state_offsets
[target_state
]
145 if range
.low
== range
.high
then
146 bc_file
:write_abbreviated_record(abbrevs
.bc_intfa_transition
, range
.low
, target_state_offset
)
148 if range
.high
== math
.huge
then range
.high
= 255 end -- temporary ASCII-specific hack
149 bc_file
:write_abbreviated_record(abbrevs
.bc_intfa_transition_range
, range
.low
, range
.high
, target_state_offset
)
153 bc_file
:end_subblock(BC_INTFA
)
156 function emit_gla(gla
, strings
, bc_file
, abbrevs
)
157 bc_file
:enter_subblock(BC_GLA
)
159 local states
= OrderedSet
:new()
160 states
:add(gla
.start
)
161 for state
in each(gla
:states()) do
162 if state
~= gla
.start
then
167 for state
in each(states
) do
170 bc_file
:end_subblock(BC_GLA
)
173 function emit_rtn(name
, rtn
, rtns
, glas
, intfas
, strings
, bc_file
, abbrevs
)
174 -- emit RTN name and ignore info
175 bc_file
:enter_subblock(BC_RTN
)
176 bc_file
:write_abbreviated_record(abbrevs
.bc_rtn_info
, strings
:offset_of(name
), rtn
.slot_count
)
179 for ign_terminal
in each(rtn
.ignore
) do
180 bc_file
:write_abbreviated_record(abbrevs
.bc_rtn_ignore
, strings
:offset_of(ign_terminal
))
185 for state
in each(rtn
.states
) do
194 bc_file
:write_abbreviated_record(abbrevs
.bc_rtn_state_with_gla
,
195 #rtn
.transitions
[state
],
196 glas
:offset_of(state
.gla
),
198 elseif state
.intfa
then
199 bc_file
:write_abbreviated_record(abbrevs
.bc_rtn_state
,
200 #rtn
.transitions
[state
],
201 intfas
:offset_of(state
.intfa
),
204 bc_file
:write_abbreviated_record(abbrevs
.bc_rtn_trivial_final_state
)
209 for state
in each(rtn
.states
) do
210 for transition
in each(rtn
.transitions
[state
]) do
211 local edge_val
, dest_state
, properties
= unpack(transition
)
212 if fa
.is_nonterm(edge_val
) then
213 bc_file
:write_abbreviated_record(abbrevs
.bc_rtn_transition_nonterm
,
214 rtns
:offset_of_key(edge_val
.name
),
215 rtn
.states
:offset_of(dest_state
),
216 strings
:offset_of(properties
.name
),
217 properties
.slotnum
-1)
219 bc_file
:write_abbreviated_record(abbrevs
.bc_rtn_transition_terminal
,
220 strings
:offset_of(edge_val
),
221 rtn
.states
:offset_of(dest_state
),
222 strings
:offset_of(properties
.name
),
223 properties
.slotnum
-1)
228 bc_file
:end_subblock(BC_RTN
)
232 function define_abbrevs(bc_file
)
235 -- Enter a BLOCKINFO record to define abbreviations for all our records.
236 -- See FILEFORMAT for a description of what all the record types mean.
237 bc_file
:enter_subblock(bc
.BLOCKINFO
)
239 -- IntFA abbreviations
240 bc_file
:write_unabbreviated_record(bc
.SETBID
, BC_INTFA
)
242 abbrevs
.bc_intfa_final_state
= bc_file
:define_abbreviation(4,
243 bc
.LiteralOp
:new(BC_INTFA_FINAL_STATE
),
247 abbrevs
.bc_intfa_state
= bc_file
:define_abbreviation(5,
248 bc
.LiteralOp
:new(BC_INTFA_STATE
),
251 abbrevs
.bc_intfa_transition
= bc_file
:define_abbreviation(6,
252 bc
.LiteralOp
:new(BC_INTFA_TRANSITION
),
256 abbrevs
.bc_intfa_transition_range
= bc_file
:define_abbreviation(7,
257 bc
.LiteralOp
:new(BC_INTFA_TRANSITION_RANGE
),
262 -- Strings abbreviations
263 bc_file
:write_unabbreviated_record(bc
.SETBID
, BC_STRINGS
)
265 abbrevs
.bc_string
= bc_file
:define_abbreviation(4,
266 bc
.LiteralOp
:new(BC_STRING
),
267 bc
.ArrayOp
:new(bc
.FixedOp
:new(7)))
270 bc_file
:write_unabbreviated_record(bc
.SETBID
, BC_RTN
)
272 abbrevs
.bc_rtn_info
= bc_file
:define_abbreviation(4,
273 bc
.LiteralOp
:new(BC_RTN_INFO
),
277 abbrevs
.bc_rtn_state
= bc_file
:define_abbreviation(5,
278 bc
.LiteralOp
:new(BC_RTN_STATE
),
283 abbrevs
.bc_rtn_state_with_gla
= bc_file
:define_abbreviation(6,
284 bc
.LiteralOp
:new(BC_RTN_STATE_WITH_GLA
),
289 abbrevs
.bc_rtn_trivial_final_state
= bc_file
:define_abbreviation(7,
290 bc
.LiteralOp
:new(BC_RTN_TRIVIAL_FINAL_STATE
))
292 abbrevs
.bc_rtn_transition_terminal
= bc_file
:define_abbreviation(8,
293 bc
.LiteralOp
:new(BC_RTN_TRANSITION_TERMINAL
),
299 abbrevs
.bc_rtn_transition_nonterm
= bc_file
:define_abbreviation(9,
300 bc
.LiteralOp
:new(BC_RTN_TRANSITION_NONTERM
),
306 abbrevs
.bc_rtn_ignore
= bc_file
:define_abbreviation(10,
307 bc
.LiteralOp
:new(BC_RTN_IGNORE
),
311 bc_file
:write_unabbreviated_record(bc
.SETBID
, BC_GLA
)
313 abbrevs
.bc_gla_state
= bc_file
:define_abbreviation(4,
314 bc
.LiteralOp
:new(BC_GLA_STATE
),
318 abbrevs
.bc_final_state
= bc_file
:define_abbreviation(5,
319 bc
.LiteralOp
:new(BC_GLA_FINAL_STATE
),
321 bc
.ArrayOp
:new(bc
.VBROp
:new(4)))
323 abbrevs
.bc_gla_transition
= bc_file
:define_abbreviation(6,
324 bc
.LiteralOp
:new(BC_GLA_TRANSITION
),
328 bc_file
:end_subblock(bc
.BLOCKINFO
)