2 require
"bootstrap/rtn"
6 --print(serialize(attributes.ignore))
8 function child_edges(edge
, stack
, grammar
, decisions
, terminals
)
9 if type(edge
) == "table" and edge
.class
== fa
.NonTerm
then
10 local child_edges
= {}
11 for edge_val
in grammar
[edge
.name
].start
:transitions() do
12 table.insert(child_edges
, edge_val
)
17 if type(edge
) == "table" then
18 str_or_regex
= edge
.properties
.string
23 table.insert(terminals
, str_or_regex
)
25 local decision_stack
= stack
:to_array()
26 if #decision_stack
> 1 then
27 decisions
[str_or_regex
] = stack
:to_array()
32 -- require "sketches/regex_debug"
33 -- require "sketches/pp"
35 TerminalTransition
= {name
="TerminalTransition", order
=1}
36 NontermTransition
= {name
="NontermTransition", order
=2}
37 Decision
= {name
="Decision", order
=3}
39 function load_grammar(file
)
40 -- First read grammar file
42 local grm
= io
.open(file
, "r")
43 local grm_str
= grm
:read("*a")
45 local grammar
, attributes
= parse_grammar(CharStream
:new(grm_str
))
47 -- First, determine what terminals (if any) conflict with each other.
48 -- In this context, "conflict" means that a string of characters can
49 -- be interpreted as one or more terminals.
53 for name
, terminal
in pairs(attributes
.terminals
) do
54 if type(terminal
) == "string" then
55 terminal
= fa
.IntFA
:new
{string=terminal
}
57 table.insert(nfas
, {terminal
, name
})
59 local uber_dfa
= nfas_to_dfa(nfas
, true)
60 for state
in each(uber_dfa
:states()) do
61 if type(state
.final
) == "table" then -- more than one terminal ended in this state
62 for term1
in each(state
.final
) do
63 for term2
in each(state
.final
) do
64 if term1
~= term2
then
65 conflicts
[term1
] = conflicts
[term1
] or Set
:new()
66 conflicts
[term1
]:add(term2
)
74 -- For each state in the grammar, create (or reuse) a DFA to run
75 -- when we hit that state.
78 function has_conflicts(conflicts
, dfa
, terminals
)
79 for term
in each(terminals
) do
80 if conflicts
[term
] then
81 for conflict
in each(conflicts
[term
]) do
82 if dfa
:contains(conflict
) then
90 for nonterm
, rtn
in pairs(grammar
) do
93 for state
in each(rtn
:states()) do
95 function my_child_edges(edge
, stack
)
96 return child_edges(edge
, stack
, grammar
, state
.decisions
, terminals
)
100 if state
:num_transitions() > 0 then
101 for edge_val
, target_state
in state
:transitions() do
102 depth_first_traversal(edge_val
, my_child_edges
)
105 -- print("Inside " .. nonterm .. ", state=" .. tostring(state) .. "...")
106 -- print(serialize(decisions))
108 -- We now have a list of terminals we want to find when we are in this RTN
109 -- state. Now get a DFA that will match all of them, either by creating
110 -- a new DFA or by finding an existing one that will work (without conflicting
111 -- with any of our terminals).
112 local found_dfa
= false
113 for i
, dfa
in ipairs(dfas
) do
114 -- will this dfa do? it will if none of our terminals conflict with any of the
115 -- existing terminals in this dfa.
116 -- (we can probably compute this faster by pre-computing equivalence classes)
117 if not has_conflicts(conflicts
, dfa
, terminals
) then
123 if found_dfa
== false then
125 table.insert(dfas
, new_dfa
)
129 -- add all the terminals for this state to the dfa we found
130 for term
in each(terminals
) do
131 dfas
[found_dfa
]:add(term
)
134 state
.dfa
= found_dfa
140 for dfa
in each(dfas
) do
142 for term
in each(dfa
) do
143 local target
= attributes
.terminals
[term
]
144 if type(target
) == "string" then
145 target
= fa
.IntFA
:new
{string=target
}
147 table.insert(nfas
, {target
, term
})
149 local real_dfa
= hopcroft_minimize(nfas_to_dfa(nfas
))
150 table.insert(real_dfas
, real_dfa
)
153 return grammar
, attributes
, real_dfas
156 function write_grammar(infilename
, outfilename
)
157 local grammar
, attributes
, intfas
= load_grammar(infilename
)
159 -- write Bitcode header
160 bc_file
= bc
.File
:new(outfilename
, "GH")
162 -- Enter a BLOCKINFO record to define abbreviations for all our records.
163 -- See FILEFORMAT for a description of what all the record types mean.
164 bc_file
:enter_subblock(bc
.BLOCKINFO
)
166 -- IntFA abbreviations
167 bc_file
:write_unabbreviated_record(bc
.SETBID
, BC_INTFA
)
169 bc_intfa_final_state
= bc_file
:define_abbreviation(4,
170 bc
.LiteralOp
:new(BC_INTFA_FINAL_STATE
),
174 bc_intfa_state
= bc_file
:define_abbreviation(5,
175 bc
.LiteralOp
:new(BC_INTFA_STATE
),
178 bc_intfa_transition
= bc_file
:define_abbreviation(6,
179 bc
.LiteralOp
:new(BC_INTFA_TRANSITION
),
183 bc_intfa_transition_range
= bc_file
:define_abbreviation(7,
184 bc
.LiteralOp
:new(BC_INTFA_TRANSITION_RANGE
),
189 -- Strings abbreviations
190 bc_file
:write_unabbreviated_record(bc
.SETBID
, BC_STRINGS
)
192 bc_string
= bc_file
:define_abbreviation(4,
193 bc
.LiteralOp
:new(BC_STRING
),
194 bc
.ArrayOp
:new(bc
.FixedOp
:new(7)))
197 bc_file
:write_unabbreviated_record(bc
.SETBID
, BC_RTN
)
199 bc_rtn_info
= bc_file
:define_abbreviation(4,
200 bc
.LiteralOp
:new(BC_RTN_INFO
),
204 bc_rtn_state
= bc_file
:define_abbreviation(5,
205 bc
.LiteralOp
:new(BC_RTN_STATE
),
210 bc_rtn_transition_terminal
= bc_file
:define_abbreviation(6,
211 bc
.LiteralOp
:new(BC_RTN_TRANSITION_TERMINAL
),
217 bc_rtn_transition_nonterm
= bc_file
:define_abbreviation(7,
218 bc
.LiteralOp
:new(BC_RTN_TRANSITION_NONTERM
),
224 bc_rtn_ignore
= bc_file
:define_abbreviation(8,
225 bc
.LiteralOp
:new(BC_RTN_IGNORE
),
228 bc_rtn_decision
= bc_file
:define_abbreviation(9,
229 bc
.LiteralOp
:new(BC_RTN_DECISION
),
231 bc
.ArrayOp
:new(bc
.VBROp
:new(4)))
233 bc_file
:end_subblock(bc
.BLOCKINFO
)
235 print(string.format("Writing grammar to disk..."))
238 local string_offsets
= {}
240 -- gather a list of all the strings from intfas
241 for intfa
in each(intfas
) do
242 for state
in each(intfa
:states()) do
243 if state
.final
and not string_offsets
[state
.final
] then
244 string_offsets
[state
.final
] = #strings
245 table.insert(strings
, state
.final
)
250 -- build an ordered list of RTNs and gather the strings from them
251 local rtns
= {{attributes
.start
, grammar
[attributes
.start
]}}
252 local rtns_offsets
= {}
253 rtns_offsets
[grammar
[attributes
.start]]
= 0
254 for name
, rtn
in pairs(grammar
) do
255 if name
~= attributes
.start
then
256 rtns_offsets
[rtn
] = #rtns
257 if not string_offsets
[name
] then
258 string_offsets
[name
] = #strings
259 table.insert(strings
, name
)
262 table.insert(rtns
, {name
, rtn
})
264 for rtn_state
in each(rtn
:states()) do
265 for edge_val
, target_state
, properties
in rtn_state
:transitions() do
266 if properties
and not string_offsets
[properties
.name
] then
267 string_offsets
[properties
.name
] = #strings
268 table.insert(strings
, properties
.name
)
276 print(string.format("Writing %d strings...", #strings
))
277 bc_file
:enter_subblock(BC_STRINGS
)
278 for string in each(strings
) do
279 bc_file
:write_abbreviated_record(bc_string
, string)
281 bc_file
:end_subblock(BC_STRINGS
)
284 print(string.format("Writing %d IntFAs...", #intfas
))
285 bc_file
:enter_subblock(BC_INTFAS
)
286 for intfa
in each(intfas
) do
287 bc_file
:enter_subblock(BC_INTFA
)
288 local intfa_state_offsets
= {}
289 local intfa_transitions
= {}
291 -- make sure the start state is emitted first
292 local states
= intfa
:states()
293 states
:remove(intfa
.start
)
294 states
= states
:to_array()
295 table.insert(states
, 1, intfa
.start
)
296 for i
, state
in ipairs(states
) do
297 intfa_state_offsets
[state
] = i
- 1
298 local this_state_transitions
= {}
299 for edge_val
, target_state
, properties
in state
:transitions() do
300 for range
in edge_val
:each_range() do
301 table.insert(this_state_transitions
, {range
, target_state
})
304 table.sort(this_state_transitions
, function (a
, b
) return a
[1].low
< b
[1].low
end)
305 for t
in each(this_state_transitions
) do table.insert(intfa_transitions
, t
) end
307 bc_file
:write_abbreviated_record(bc_intfa_final_state
, #this_state_transitions
, string_offsets
[state
.final
])
309 bc_file
:write_abbreviated_record(bc_intfa_state
, #this_state_transitions
)
313 print(string.format(" %d states, %d transitions", #states
, #intfa_transitions
))
315 for transition
in each(intfa_transitions
) do
316 local range
, target_state
= unpack(transition
)
317 target_state_offset
= intfa_state_offsets
[target_state
]
318 if range
.low
== range
.high
then
319 bc_file
:write_abbreviated_record(bc_intfa_transition
, range
.low
, target_state_offset
)
321 if range
.high
== math
.huge
then range
.high
= 255 end -- temporary ASCII-specific hack
322 bc_file
:write_abbreviated_record(bc_intfa_transition_range
, range
.low
, range
.high
, target_state_offset
)
326 bc_file
:end_subblock(BC_INTFA
)
328 bc_file
:end_subblock(BC_INTFAS
)
330 -- create a sorted list of transitions out of each RTN state, for all RTNs
331 local rtn_state_transitions
= {}
332 for name_rtn_pair
in each(rtns
) do
333 local name
, rtn
= unpack(name_rtn_pair
)
334 for rtn_state
in each(rtn
:states()) do
335 local this_state_transitions
= {}
336 for edge_val
, target_state
, properties
in rtn_state
:transitions() do
337 if type(edge_val
) == "table" and edge_val
.class
== fa
.NonTerm
then
338 table.insert(this_state_transitions
, {NontermTransition
, rtn_state
, {edge_val
, target_state
, properties
}})
340 table.insert(this_state_transitions
, {TerminalTransition
, rtn_state
, {edge_val
, target_state
, properties
}})
343 for terminal
, stack
in pairs(rtn_state
.decisions
) do
344 table.insert(this_state_transitions
, {Decision
, rtn_state
, {terminal
, stack
}})
347 table.sort(this_state_transitions
, function (a
, b
) return a
[1].order
< b
[1].order
end)
348 rtn_state_transitions
[rtn_state
] = this_state_transitions
353 bc_file
:enter_subblock(BC_RTNS
)
354 print(string.format("Writing %d RTNs...", #rtns
))
355 for name_rtn_pair
in each(rtns
) do
356 local name
, rtn
= unpack(name_rtn_pair
)
357 bc_file
:enter_subblock(BC_RTN
)
358 bc_file
:write_abbreviated_record(bc_rtn_info
, string_offsets
[name
], attributes
.slot_counts
[name
]-1)
360 if attributes
.ignore
[name
] then
361 for ign_terminal
in each(attributes
.ignore
[name
]) do
362 bc_file
:write_abbreviated_record(bc_rtn_ignore
, string_offsets
[ign_terminal
])
366 local rtn_states
= {}
367 local rtn_state_offsets
= {}
368 local rtn_transitions
= {}
370 -- make sure the start state is emitted first
371 local states
= rtn
:states()
372 states
:remove(rtn
.start
)
373 states
= states
:to_array()
374 table.insert(states
, 1, rtn
.start
)
377 for i
, rtn_state
in pairs(states
) do
378 rtn_state_offsets
[rtn_state
] = i
- 1
380 if rtn_state
.final
then is_final
= 1 end
381 -- states don't have an associated DFA if there are no outgoing transitions
382 rtn_state
.dfa
= rtn_state
.dfa
or 1
383 bc_file
:write_abbreviated_record(bc_rtn_state
, #rtn_state_transitions
[rtn_state
], rtn_state
.dfa
- 1, is_final
)
384 for t
in each(rtn_state_transitions
[rtn_state
]) do
385 table.insert(rtn_transitions
, t
)
389 print(string.format(" %s: %d states, %d transitions", name
, #states
, #rtn_transitions
))
392 for transition
in each(rtn_transitions
) do
393 local transition_type
, src_state
, data
= unpack(transition
)
394 if transition_type
== Decision
then
395 local terminal
, stack
= unpack(data
)
397 for i
, step
in ipairs(stack
) do
398 --print(string.format("Trying to find %s, %d transitions to consider", serialize(step), #rtn_state_transitions[src_state]))
399 for j
, transition2
in ipairs(rtn_state_transitions
[src_state
]) do
400 local transition_type2
, src_state2
, data2
= unpack(transition2
)
401 local edge_val
, target_state
, properties
= unpack(data2
)
402 --print(string.format("Is it %s?", serialize(edge_val)))
403 if edge_val
== step
then
404 --print("Transition found!")
405 stack_str
= stack_str
.. string.char(j
-1)
407 src_state
= grammar
[edge_val
.name
].start
411 if j
== #rtn_state_transitions
[src_state
] then
412 error(string.format("Transition not found! Step=%s", step
))
416 -- print(serialize(stack))
417 bc_file
:write_abbreviated_record(bc_rtn_decision
, string_offsets
[terminal
], stack_str
)
419 local edge_val
, target_state
, properties
= unpack(data
)
420 target_state_offset
= rtn_state_offsets
[target_state
]
421 if transition_type
== TerminalTransition
then
422 bc_file
:write_abbreviated_record(bc_rtn_transition_terminal
, string_offsets
[edge_val
],
423 rtn_state_offsets
[target_state
], string_offsets
[properties
.name
],
424 properties
.slotnum
-1)
426 bc_file
:write_abbreviated_record(bc_rtn_transition_nonterm
, rtns_offsets
[grammar
[edge_val
.name]]
,
427 rtn_state_offsets
[target_state
], string_offsets
[properties
.name
],
428 properties
.slotnum
-1)
433 bc_file
:end_subblock(BC_RTN
)
435 bc_file
:end_subblock(BC_RTNS
)
438 write_grammar(arg
[1], arg
[2])