A first pass at subparsers (whitespace ignoring).
[gazelle.git] / sketches / full-ll.lua
blobb954cc3fbd0233b042ff05a1913e84f4194b55e0
1 --[[--------------------------------------------------------------------
3 Editor's note: at one point I was toying around with this code that,
4 when checking for ambiguity in compiler/ll.lua, would be able to detect
5 cases that were full-LL but not strong-LL. But things got really
6 complicated really fast, and since I don't actually care about
7 full-LL at the moment, I ripped this code out. Here it lies.
9 --------------------------------------------------------------------]]--
11 function Path:return_stack(max)
12 local stack = {}
13 for action_arg_pair in each(self.path) do
14 local action, arg = unpack(action_arg_pair)
15 if action == "return" and arg then
16 if not max or #stack < max then
17 table.insert(stack, arg)
18 end
19 end
20 end
21 return stack
22 end
25 -- The following code was in check_for_ambiguity
26 -- If all paths involved "returns" from rules where no stack context was
27 -- available (meaning we had to use follow states), and k is the minimum
28 -- number of such returns that all stacks have done, then the rules to
29 -- which all paths have "returned" for their first k returns must be
30 -- identical for the paths to be considered ambiguous. Because otherwise
31 -- the paths are not ambiguous -- they would be distinguished by their
32 -- contexts.
34 -- If this is the case (their first k returns differ) and the two paths
35 -- predict the same alternative, we are fine -- building the GLA continues.
36 -- But if they predict *different* alternatives, this indicates that the
37 -- grammar is full-LL and not strong-LL.
39 local min_k = math.huge
40 local all_same_k = true
41 local return_stacks = Set:new()
42 for state_path in each(rtn_states[signature]) do
43 if min_k ~= math.huge and min_k ~= #state_path:return_stack() then
44 all_same_k = false
45 end
46 min_k = math.min(min_k, #state_path:return_stack())
47 end
49 -- Note that the next loop will trivially fail if min_k == 0 (ie. none of
50 -- this "return stack" funny business going on at all -- a signature clash
51 -- is a signature clash, which indicates an ambiguity), as a special case
52 -- of solving the more general problem.
53 local k_length_return_stacks = {}
54 for state_path in each(rtn_states[signature]) do
55 local unique_return_path = get_unique_table_for(state_path:return_stack(min_k))
56 if k_length_return_stacks[unique_return_path] then
57 local err
58 if all_same_k then
59 -- This is the one case where we can truly detect that the path was
60 -- a true ambiguity in the grammar.
61 err = "Ambiguous grammar for paths " .. serialize(state_path.path) ..
62 " and " .. serialize(k_length_return_stacks[unique_return_path].path)
63 error(err)
64 else
65 if not get_unique_predicted_alternative(rtn_states[signature]) then
66 -- This grammar is definitely not Strong-LL(k), and is *probably not
67 -- full-LL(k) either (but don't quote me on that).
68 err = "Gazelle cannot handle this grammar -- it is not Strong-LL(k). " ..
69 "The problem is that when generating lookahead for state %d in rule " ..
70 "%s, there were two paths through the grammar that consumed the same " ..
71 "series of terminals and ended in the same state, but predicted different " ..
72 "alternatives. This could be an ambiguity in the grammar, " ..
73 "or it could just be a non-ambiguous language that lies outside LL. " ..
74 "The paths are: " .. serialize(state_path.path) ..
75 " and " .. serialize(k_length_return_stacks[unique_return_path].path)
76 error(err)
77 end
78 end
79 end
80 k_length_return_stacks[unique_return_path] = state_path
81 end
83 -- Well if we got here it means that the paths weren't truly ambiguous
84 -- (their return stacks differed), but we're not out of the woods yet.
85 -- We have to ensure that they all predict the same alternative, or else
86 -- this grammar is full-LL but not strong-LL.
87 if not get_unique_predicted_alternative(rtn_states[signature]) then
88 error("Gazelle cannot handle this grammar: it is full-LL but not strong-LL")
89 end
90 else
91 rtn_states[signature] = Set:new()
92 rtn_states[signature]:add(path)
93 end
95 -- the following code was in get_follow_states_and_paths
96 -- Now turn the flat list of follow states into follow *paths*, by depth-first
97 -- searching the paths of follow states, bounding the search when we find cycles.
98 -- We only add states with a terminal transition out of them, and for each such
99 -- state we also include the return stack of such a path.
100 local follow_paths = {}
101 for name, rtn in each(grammar.rtns) do
102 follow_paths[rtn] = {}
104 function children_func(state, stack)
105 if stack:count() > 1 and state:has_terminal_transition() then
106 local stack_array = stack:to_array()
107 table.remove(stack_array, 1) -- don't care about the fake state at the bottom
108 table.insert(follow_paths[rtn], stack_array)
111 local children = {}
112 if state.final then
113 for follow_state in each(follow_states[state.rtn]) do
114 if not stack:contains(follow_state) then
115 table.insert(children, follow_state)
119 return children
122 -- seed with a fake state that is final and has this rtn
123 depth_first_traversal({final=true, rtn=rtn}, children_func, true)