compiler/intfa_combine.lua

   1 --[[--------------------------------------------------------------------
   2
   3   Gazelle: a system for building fast, reusable parsers
   4
   5   intfa_combine.lua
   6
   7   Once the lookahead has been calculated, we know what terminal(s)
   8   each RTN/GLA state is expecting to see.  We use this information to
   9   build IntFAs that recognize any possible valid token that could
  10   occur at this point in the input.  We combine and reuse DFAs as much
  11   as possible -- only when two terminals conflict is it necessary to
  12   use different DFAs.
  13
  14   Copyright (c) 2007 Joshua Haberman.  See LICENSE for details.
  15
  16 --------------------------------------------------------------------]]--
  17
  18 -- Determine what terminals (if any) conflict with each other.
  19 -- In this context, "conflict" means that a string of characters can
  20 -- be interpreted as one or more terminals.
  21 function analyze_conflicts(terminals)
  22   -- We detect conflicts by combining all the NFAs into a single DFA.
  23   -- We then observe what states are final to more than one terminal.
  24   local conflicts = {}
  25   local nfas = {}
  26   for name, terminal in pairs(terminals) do
  27     if type(terminal) == "string" then
  28       terminal = fa.IntFA:new{string=terminal}
  29     end
  30     table.insert(nfas, {terminal, name})
  31   end
  32
  33   local uber_dfa = nfas_to_dfa(nfas, true)
  34   for state in each(uber_dfa:states()) do
  35     if type(state.final) == "table" then  -- more than one terminal ended in this state
  36       for term1 in each(state.final) do
  37         for term2 in each(state.final) do
  38           if term1 ~= term2 then
  39             conflicts[term1] = conflicts[term1] or Set:new()
  40             conflicts[term1]:add(term2)
  41           end
  42         end
  43       end
  44     end
  45   end
  46
  47   return conflicts
  48 end
  49
  50 function has_conflicts(conflicts, term_set1, term_set2)
  51   for term1 in each(term_set1) do
  52     if conflicts[term1] then
  53       for conflict in each(conflicts[term1]) do
  54         if term_set2:contains(conflict) then
  55           return true, term1, conflict
  56         end
  57       end
  58     end
  59   end
  60 end
  61
  62 function create_or_reuse_termset_for(terminals, conflicts, termsets, nonterm)
  63   if has_conflicts(conflicts, terminals, terminals) then
  64     local has_conflict, c1, c2 = has_conflicts(conflicts, terminals, terminals)
  65     error(string.format("Can't build DFA inside %s, because terminals %s and %s conflict",
  66                         nonterm, c1, c2))
  67   end
  68
  69   local found_termset = false
  70   for i, termset in ipairs(termsets) do
  71     -- will this termset do?  it will if none of our terminals conflict with any of the
  72     -- existing terminals in this set.
  73     -- (we can probably compute this faster by pre-computing equivalence classes)
  74     if not has_conflicts(conflicts, termset, terminals) then
  75       found_termset = i
  76       break
  77     end
  78   end
  79
  80   if found_termset == false then
  81     local new_termset = Set:new()
  82     table.insert(termsets, new_termset)
  83     found_termset = #termsets
  84   end
  85
  86   -- add all the terminals for this phase of lookahead to the termset we found
  87   for term in each(terminals) do
  88     termsets[found_termset]:add(term)
  89   end
  90
  91   return found_termset
  92 end
  93
  94 function intfa_combine(all_terminals, state_term_pairs)
  95   local conflicts = analyze_conflicts(all_terminals)
  96
  97   -- For each state in the grammar, create (or reuse) a DFA to run
  98   -- when we hit that state.
  99   local termsets = {}
 100   local intfa_nums = {}
 101   for state_term_pair in each(state_term_pairs) do
 102     local state, terms = unpack(state_term_pair)
 103     intfa_nums[state] = create_or_reuse_termset_for(terms, conflicts, termsets)
 104   end
 105
 106   local dfas = OrderedSet:new()
 107   for termset in each(termsets) do
 108     local nfas = {}
 109     for term in each(termset) do
 110       local target = all_terminals[term]
 111       if type(target) == "string" then
 112         target = fa.IntFA:new{string=target}
 113       end
 114       table.insert(nfas, {target, term})
 115     end
 116     local dfa = hopcroft_minimize(nfas_to_dfa(nfas))
 117     dfa.termset = termset
 118     dfas:add(dfa)
 119   end
 120
 121   for state, intfa_num in pairs(intfa_nums) do
 122     state.intfa = dfas:element_at(intfa_num)
 123   end
 124
 125   dfas:sort(function (a, b) return b.termset:count() < a.termset:count() end)
 126
 127   return dfas
 128 end
 129
 130 -- vim:et:sts=2:sw=2