Decisions are now fully emitted into output bitcode.
[gazelle.git] / nfa_to_dfa.lua
blob268adae67f1f09706fe0c1c90c6711dd5cec295b
1 --[[--------------------------------------------------------------------
3 nfa_to_dfa.lua
5 Translate a set of NFAs into a DFA that can recognize any of the
6 constituent strings. This is how we build a lexer that looks for
7 all candidate tokens simultaneously.
9 Copyright (c) 2007 Joshua Haberman. See LICENSE for details.
11 --------------------------------------------------------------------]]--
13 require "data_structures"
14 require "misc"
16 function epsilon_closure(state)
17 return breadth_first_traversal(state, function(s) return s:transitions_for(fa.e, "ANY") end)
18 end
20 -- we as input an array of NFAs, one for each token we want to match simultaneously,
21 -- and strings describing each token. So:
22 -- { {nfa1, "token string 1"},
23 -- {nfa2, "token string 2", etc} }
24 function nfas_to_dfa(nfa_string_pairs, ambiguous_ok)
25 ambiguous_ok = ambiguous_ok or false
26 -- First we need to mark all final states and capture groups with the token string
27 local nfas = {}
29 for i, nfa_string_pair in ipairs(nfa_string_pairs) do
30 local nfa, token_string = unpack(nfa_string_pair)
31 table.insert(nfas, nfa)
33 -- Mark the nfa fragment's final state as the final state for this *token*
34 nfa.final.final = token_string
35 end
37 -- Now combine all the nfas with alternation
38 local final_nfa = nfa_construct.alt(nfas)
39 return nfa_to_dfa(final_nfa, ambiguous_ok)
40 end
42 function new_dfa_state(nfa, nfa_states, ambiguous_ok)
43 local dfa_state = nfa:new_state()
45 -- If this is a final state for one or more of the nfas, make it an
46 -- (appropriately labeled) final state for the dfa
47 for nfa_state in nfa_states:each() do
48 if nfa_state.final then
49 if dfa_state.final and dfa_state.final ~= nfa_state.final then
50 if ambiguous_ok then
51 if type(dfa_state.final) ~= "table" then dfa_state.final = Set:new({dfa_state.final}) end
52 dfa_state.final:add(nfa_state.final)
53 else
54 error("Ambiguous finality not supported yet!! (" .. tostring(dfa_state.final) .. " and " .. tostring(nfa_state.final .. ")"))
55 end
56 else
57 dfa_state.final = nfa_state.final
58 end
59 end
60 end
62 return dfa_state
63 end
65 function nfa_to_dfa(nfa, ambiguous_ok)
66 -- The sets of NFA states we need to process for outgoing transitions
67 local first_nfa_states = epsilon_closure(nfa.start)
68 local queue = Queue:new(first_nfa_states)
70 local dfa = nfa:new_graph{start = new_dfa_state(nfa, first_nfa_states, ambiguous_ok)}
71 -- The DFA states we create from sets of NFA states
72 local dfa_states = {[first_nfa_states:hash_key()] = dfa.start}
74 while not queue:isempty() do
75 local nfa_states = queue:dequeue()
76 local dfa_state = dfa_states[nfa_states:hash_key()]
78 -- Generate a list of symbols that transition out of this set of NFA states.
79 -- We prefer this to iterating over the entire symbol space because it's
80 -- vastly more efficient in the case of a large symbol space (eg. Unicode)
81 local symbol_tuples = nfa:get_outgoing_edge_values(nfa_states)
83 -- For each output symbol, generate the list of destination NFA states that
84 -- recognizing this symbol could put you in (including epsilon transitions).
85 for symbol_tuple in each(symbol_tuples) do
86 local symbol, properties = unpack(symbol_tuple)
87 local dest_nfa_states = Set:new()
88 for nfa_state in nfa_states:each() do
89 -- equivalence classes dictate that this character represents what will
90 -- happen to ALL characters in the set
91 local target_states = nfa_state:transitions_for(symbol, properties)
93 if target_states then
94 for target_state in each(target_states) do
95 dest_nfa_states:add(target_state)
96 dest_nfa_states:add_collection(epsilon_closure(target_state):to_array())
97 end
98 end
99 end
101 -- this is necessary because (at the moment) get_outgoing_edge_values will generate
102 -- combinations of symbol/properties that don't *actually* transtion anywhere
103 -- TODO: get rid of that shortcoming
104 if not dest_nfa_states:isempty() then
106 -- create a DFA state for this set of NFA states, if one does not
107 -- already exist.
108 local dest_dfa_state = dfa_states[dest_nfa_states:hash_key()]
109 if dest_dfa_state == nil then
110 dest_dfa_state = new_dfa_state(nfa, dest_nfa_states, ambiguous_ok)
111 dfa_states[dest_nfa_states:hash_key()] = dest_dfa_state
112 queue:enqueue(dest_nfa_states)
115 -- create a transition from the current DFA state into the new one
116 dfa_state:add_transition(symbol, dest_dfa_state, properties)
121 return dfa