1 --[[--------------------------------------------------------------------
5 Translate a set of NFAs into a DFA that can recognize any of the
6 constituent strings. This is how we build a lexer that looks for
7 all candidate tokens simultaneously.
9 Copyright (c) 2007 Joshua Haberman. See LICENSE for details.
11 --------------------------------------------------------------------]]--
13 require
"data_structures"
16 function epsilon_closure(state
)
17 return breadth_first_traversal(state
, function(s
) return s
:transitions_for(fa
.e
, "ANY") end)
20 -- we as input an array of NFAs, one for each token we want to match simultaneously,
21 -- and strings describing each token. So:
22 -- { {nfa1, "token string 1"},
23 -- {nfa2, "token string 2", etc} }
24 function nfas_to_dfa(nfa_string_pairs
, ambiguous_ok
)
25 ambiguous_ok
= ambiguous_ok
or false
26 -- First we need to mark all final states and capture groups with the token string
29 for i
, nfa_string_pair
in ipairs(nfa_string_pairs
) do
30 local nfa
, token_string
= unpack(nfa_string_pair
)
31 table.insert(nfas
, nfa
)
33 -- Mark the nfa fragment's final state as the final state for this *token*
34 nfa
.final
.final
= token_string
37 -- Now combine all the nfas with alternation
38 local final_nfa
= nfa_construct
.alt(nfas
)
39 return nfa_to_dfa(final_nfa
, ambiguous_ok
)
42 function new_dfa_state(nfa
, nfa_states
, ambiguous_ok
)
43 local dfa_state
= nfa
:new_state()
45 -- If this is a final state for one or more of the nfas, make it an
46 -- (appropriately labeled) final state for the dfa
47 for nfa_state
in nfa_states
:each() do
48 if nfa_state
.final
then
49 if dfa_state
.final
and dfa_state
.final
~= nfa_state
.final
then
51 if type(dfa_state
.final
) ~= "table" then dfa_state
.final
= Set
:new({dfa_state
.final
}) end
52 dfa_state
.final
:add(nfa_state
.final
)
54 error("Ambiguous finality not supported yet!! (" .. tostring(dfa_state
.final
) .. " and " .. tostring(nfa_state
.final
.. ")"))
57 dfa_state
.final
= nfa_state
.final
65 function nfa_to_dfa(nfa
, ambiguous_ok
)
66 -- The sets of NFA states we need to process for outgoing transitions
67 local first_nfa_states
= epsilon_closure(nfa
.start
)
68 local queue
= Queue
:new(first_nfa_states
)
70 local dfa
= nfa
:new_graph
{start
= new_dfa_state(nfa
, first_nfa_states
, ambiguous_ok
)}
71 -- The DFA states we create from sets of NFA states
72 local dfa_states
= {[first_nfa_states
:hash_key()] = dfa
.start
}
74 while not queue
:isempty() do
75 local nfa_states
= queue
:dequeue()
76 local dfa_state
= dfa_states
[nfa_states
:hash_key()]
78 -- Generate a list of symbols that transition out of this set of NFA states.
79 -- We prefer this to iterating over the entire symbol space because it's
80 -- vastly more efficient in the case of a large symbol space (eg. Unicode)
81 local symbol_tuples
= nfa
:get_outgoing_edge_values(nfa_states
)
83 -- For each output symbol, generate the list of destination NFA states that
84 -- recognizing this symbol could put you in (including epsilon transitions).
85 for symbol_tuple
in each(symbol_tuples
) do
86 local symbol
, properties
= unpack(symbol_tuple
)
87 local dest_nfa_states
= Set
:new()
88 for nfa_state
in nfa_states
:each() do
89 -- equivalence classes dictate that this character represents what will
90 -- happen to ALL characters in the set
91 local target_states
= nfa_state
:transitions_for(symbol
, properties
)
94 for target_state
in each(target_states
) do
95 dest_nfa_states
:add(target_state
)
96 dest_nfa_states
:add_collection(epsilon_closure(target_state
):to_array())
101 -- this is necessary because (at the moment) get_outgoing_edge_values will generate
102 -- combinations of symbol/properties that don't *actually* transtion anywhere
103 -- TODO: get rid of that shortcoming
104 if not dest_nfa_states
:isempty() then
106 -- create a DFA state for this set of NFA states, if one does not
108 local dest_dfa_state
= dfa_states
[dest_nfa_states
:hash_key()]
109 if dest_dfa_state
== nil then
110 dest_dfa_state
= new_dfa_state(nfa
, dest_nfa_states
, ambiguous_ok
)
111 dfa_states
[dest_nfa_states
:hash_key()] = dest_dfa_state
112 queue
:enqueue(dest_nfa_states
)
115 -- create a transition from the current DFA state into the new one
116 dfa_state
:add_transition(symbol
, dest_dfa_state
, properties
)