1 --[[--------------------------------------------------------------------
3 Gazelle: a system for building fast, reusable parsers
7 Once the lookahead has been calculated, we know what terminal(s)
8 each RTN/GLA state is expecting to see. We use this information to
9 build IntFAs that recognize any possible valid token that could
10 occur at this point in the input. We combine and reuse DFAs as much
11 as possible -- only when two terminals conflict is it necessary to
14 Copyright (c) 2007 Joshua Haberman. See LICENSE for details.
16 --------------------------------------------------------------------]]--
18 -- Determine what terminals (if any) conflict with each other.
19 -- In this context, "conflict" means that a string of characters can
20 -- be interpreted as one or more terminals.
21 function analyze_conflicts(terminals
)
22 -- We detect conflicts by combining all the NFAs into a single DFA.
23 -- We then observe what states are final to more than one terminal.
26 for name
, terminal
in pairs(terminals
) do
27 if type(terminal
) == "string" then
28 terminal
= fa
.IntFA
:new
{string=terminal
}
30 table.insert(nfas
, {terminal
, name
})
33 local uber_dfa
= nfas_to_dfa(nfas
, true)
34 for state
in each(uber_dfa
:states()) do
35 if type(state
.final
) == "table" then -- more than one terminal ended in this state
36 for term1
in each(state
.final
) do
37 for term2
in each(state
.final
) do
38 if term1
~= term2
then
39 conflicts
[term1
] = conflicts
[term1
] or Set
:new()
40 conflicts
[term1
]:add(term2
)
50 function has_conflicts(conflicts
, term_set1
, term_set2
)
51 for term1
in each(term_set1
) do
52 if conflicts
[term1
] then
53 for conflict
in each(conflicts
[term1
]) do
54 if term_set2
:contains(conflict
) then
55 return true, term1
, conflict
62 function create_or_reuse_termset_for(terminals
, conflicts
, termsets
, nonterm
)
63 if has_conflicts(conflicts
, terminals
, terminals
) then
64 local has_conflict
, c1
, c2
= has_conflicts(conflicts
, terminals
, terminals
)
65 error(string.format("Can't build DFA inside %s, because terminals %s and %s conflict",
69 local found_termset
= false
70 for i
, termset
in ipairs(termsets
) do
71 -- will this termset do? it will if none of our terminals conflict with any of the
72 -- existing terminals in this set.
73 -- (we can probably compute this faster by pre-computing equivalence classes)
74 if not has_conflicts(conflicts
, termset
, terminals
) then
80 if found_termset
== false then
81 local new_termset
= Set
:new()
82 table.insert(termsets
, new_termset
)
83 found_termset
= #termsets
86 -- add all the terminals for this phase of lookahead to the termset we found
87 for term
in each(terminals
) do
88 termsets
[found_termset
]:add(term
)
94 function intfa_combine(all_terminals
, state_term_pairs
)
95 local conflicts
= analyze_conflicts(all_terminals
)
97 -- For each state in the grammar, create (or reuse) a DFA to run
98 -- when we hit that state.
100 local intfa_nums
= {}
101 for state_term_pair
in each(state_term_pairs
) do
102 local state
, terms
= unpack(state_term_pair
)
103 intfa_nums
[state
] = create_or_reuse_termset_for(terms
, conflicts
, termsets
)
106 local dfas
= OrderedSet
:new()
107 for termset
in each(termsets
) do
109 for term
in each(termset
) do
110 local target
= all_terminals
[term
]
111 if type(target
) == "string" then
112 target
= fa
.IntFA
:new
{string=target
}
114 table.insert(nfas
, {target
, term
})
116 local dfa
= hopcroft_minimize(nfas_to_dfa(nfas
))
117 dfa
.termset
= termset
121 for state
, intfa_num
in pairs(intfa_nums
) do
122 state
.intfa
= dfas
:element_at(intfa_num
)
125 dfas
:sort(function (a
, b
) return b
.termset
:count() < a
.termset
:count() end)