Lookahead improvements: EOF handling and better ambiguity detection.
[gazelle.git] / tests / test_ll.lua
blobf3223c0ed4984029f514c6db93d7d1b933acba88
2 require "luaunit"
3 require "ll"
4 require "fa_algorithms"
5 require "bootstrap/rtn"
6 require "pp"
8 function find_state(rule, slotnum)
9 if slotnum == 0 then
10 return rule.start
11 end
13 for s in each(rule:states()) do
14 for edge_val, dest_state, properties in s:transitions() do
15 if properties.slotnum == slotnum then
16 return s
17 end
18 end
19 end
21 error(string.format("No such slotnum (%d) for rule %s", slotnum, rule_str))
22 end
24 function get_target_for_slotnum(state, slotnum)
25 for edge_val, dest_state, properties in state:transitions() do
26 if properties.slotnum == slotnum then
27 return {edge_val, dest_state}
28 end
29 end
30 error(string.format("Slotnum %d not found for state %s", slotnum, serialize(state, 4, " ")))
31 end
33 function parse_gla(str, rtn_state)
34 local stream = CharStream:new(str)
35 stream:ignore("whitespace")
36 local gla = fa.GLA:new()
37 gla.rtn_state = rtn_state
38 local states = {[1]=gla.start}
39 while not stream:eof() do
40 local statenum = tonumber(stream:consume_pattern("%d+"))
41 local state = states[statenum]
42 if not state then
43 error(string.format("GLA refers to state %d before it is used as a target",
44 statenum))
45 end
46 while stream:lookahead(1) ~= ";" do
47 stream:consume("-")
48 local term = stream:consume_pattern("%w+")
49 stream:consume("->")
50 local dest_state_num = tonumber(stream:consume_pattern("%d+"))
51 states[dest_state_num] = states[dest_state_num] or fa.GLAState:new()
52 local dest_state = states[dest_state_num]
53 if term == "EOF" then
54 state:add_transition(fa.eof, dest_state)
55 else
56 state:add_transition(term, dest_state)
57 end
58 if stream:lookahead(1) == "(" then
59 stream:consume("(")
60 local final_state_slotnum = tonumber(stream:consume_pattern("%d+"))
61 stream:consume(")")
62 dest_state.final = get_target_for_slotnum(rtn_state, final_state_slotnum)
63 end
64 state = dest_state
65 end
66 stream:consume(";")
67 end
69 return gla
70 end
72 function assert_lookahead(grammar_str, rule_str, slotnum, expected_gla_str)
73 grammar = parse_grammar(CharStream:new(grammar_str))
74 grammar:determinize_rtns()
75 grammar:minimize_rtns()
77 local rule = grammar.rtns:get(rule_str)
78 state = find_state(rule, slotnum)
79 expected_gla = parse_gla(expected_gla_str, state)
81 compute_lookahead(grammar)
83 if not fa_isequal(expected_gla, state.gla) then
84 local bad = io.open("bad.dot", "w")
85 bad:write("digraph untitled {\n")
86 bad:write(state.gla:to_dot())
87 bad:write("}")
88 bad:close()
90 local good = io.open("good.dot", "w")
91 good:write("digraph untitled {\n")
92 good:write(expected_gla:to_dot())
93 good:write("}")
94 good:close()
96 os.execute("dot -Tpng -o good.png good.dot")
97 os.execute("dot -Tpng -o bad.png bad.dot")
99 error("GLAs were not equal: expected and actual are " ..
100 "in good.png and bad.png, respectively")
104 TestSimple = {}
105 function TestSimple:test1()
106 assert_lookahead(
108 s -> a | "X";
109 a -> "Y";
111 "s", 0,
113 1 -Y-> 2(1);
114 1 -X-> 3(2);
119 function TestSimple:test_multiple_recursions()
120 assert_lookahead(
122 s -> a | "X";
123 a -> b;
124 b -> "Y";
126 "s", 0,
128 1 -Y-> 2(1);
129 1 -X-> 3(2);
134 TestEpsilon = {}
135 function TestEpsilon:test1()
136 assert_lookahead(
138 s -> a "Z" | "X";
139 a -> "Y"?;
141 "s", 0,
143 1 -Y-> 2(1);
144 1 -Z-> 2;
145 1 -X-> 3(3);
150 function TestEpsilon:test2()
151 assert_lookahead(
153 s -> a | "X";
154 a -> "Y"? "Z" ;
156 "s", 0,
158 1 -Y-> 2(1);
159 1 -Z-> 2;
160 1 -X-> 3(2);
165 function TestEpsilon:test3()
166 assert_lookahead(
168 s -> a "Q" | "X";
169 a -> "Y"? "Z"? ;
171 "s", 0,
173 1 -Y-> 2(1);
174 1 -Z-> 2;
175 1 -Q-> 2;
176 1 -X-> 3(3);
181 function TestEpsilon:test4()
182 assert_lookahead(
184 s -> a "Q" | "X";
185 a -> b "Y"?;
186 b -> "Z"?;
188 "s", 0,
190 1 -Y-> 2(1);
191 1 -Z-> 2;
192 1 -Q-> 2;
193 1 -X-> 3(3);
198 function TestEpsilon:test5()
199 assert_lookahead(
201 s -> a "Q" | "X";
202 a -> b? "Y"?;
203 b -> "Z";
205 "s", 0,
207 1 -Y-> 2(1);
208 1 -Z-> 2;
209 1 -Q-> 2;
210 1 -X-> 3(3);
215 --[=[
216 TODO: add this test when GLAs that tell RTNs to return are supported.
217 function TestEpsilon:test5()
218 assert_lookahead(
220 s -> a "X";
221 a -> "Y"* "Z";
223 "a", 1,
225 1 -Y-> 2(1);
226 1 -X-> 3(0);
232 TestMultipleNonterms = {}
233 function TestMultipleNonterms:test1()
234 assert_lookahead(
236 s -> a | b | c;
237 a -> "X";
238 b -> "Y";
239 c -> "Z";
241 "s", 0,
243 1 -X-> 2(1);
244 1 -Y-> 3(2);
245 1 -Z-> 4(3);
250 function TestMultipleNonterms:test22()
251 assert_lookahead(
253 s -> (a | b | c)? d;
254 a -> "X";
255 b -> "Y";
256 c -> "Z";
257 d -> "Q";
259 "s", 0,
261 1 -X-> 2(1);
262 1 -Y-> 3(2);
263 1 -Z-> 4(3);
264 1 -Q-> 5(4);
269 TestLL2 = {}
270 function TestLL2:test1()
271 assert_lookahead(
273 s -> "X" "Y" | "X" "Z";
275 "s", 0,
277 1 -X-> 2;
278 2 -Y-> 3(1);
279 2 -Z-> 4(3);
284 function TestLL2:test2()
285 assert_lookahead(
287 s -> a "Y" | a "Z";
288 a -> "X";
290 "s", 0,
292 1 -X-> 2;
293 2 -Y-> 3(1);
294 2 -Z-> 4(3);
299 function TestLL2:test3()
300 assert_lookahead(
302 s -> a "Y" | a "Z";
303 a -> "X" | "Q";
305 "s", 0,
307 1 -X-> 2;
308 1 -Q-> 2;
309 2 -Y-> 3(1);
310 2 -Z-> 4(3);
315 function TestLL2:test3()
316 assert_lookahead(
318 s -> "X"? "Y" | "X" "Z";
320 "s", 0,
322 1 -Y-> 2(2);
323 1 -X-> 3;
324 3 -Y-> 4(1);
325 3 -Z-> 5(3);
330 TestLL3 = {}
331 function TestLL3:test1()
332 assert_lookahead(
334 s -> a "X" | a "Y";
335 a -> ("P" | "Q") ("P" | "Q");
337 "s", 0,
339 1 -P-> 2 -P-> 3;
340 1 -Q-> 2 -Q-> 3;
341 2 -Q-> 3;
342 2 -P-> 3;
343 3 -X-> 4(1);
344 3 -Y-> 5(3);
349 function TestLL3:test2()
350 assert_lookahead(
352 s -> a "X" | a "Y";
353 a -> ("P" | "Q")? ("P" | "Q")?;
355 "s", 0,
357 1 -X-> 2(1);
358 1 -Y-> 3(3);
359 1 -P-> 4 -P-> 5;
360 4 -Q-> 5;
361 1 -Q-> 4 -Q-> 5;
362 4 -P-> 5;
363 4 -X-> 2;
364 4 -Y-> 3;
365 5 -X-> 2;
366 5 -Y-> 3;
371 TestEOF = {}
372 function TestEOF:test1()
373 assert_lookahead(
375 s -> "A" | "A" "B";
377 "s", 0,
379 1 -A-> 2;
380 2 -B-> 3(2);
381 2 -EOF-> 4(1);
386 function TestEOF:test2()
387 -- this is the example used by Terence Parr in his discussion of ANTLR 3.0's
388 -- lookahead analysis: http://www.antlr.org/blog/antlr3/lookahead.tml
389 assert_lookahead(
391 s -> a "A" | a "B";
392 a -> "A"?;
394 "s", 0,
396 1 -A-> 2;
397 1 -B-> 3(3);
398 2 -B-> 3(3);
399 2 -A-> 4(1);
400 2 -EOF-> 4(1);
405 -- -- Test lookahead that we can only compute correctly if we apply the
406 -- -- tail-recursion optimization.
407 -- TestTailRecursion = {}
408 -- function TestTailRecursion:test1()
409 -- assert_lookahead(
410 -- [[
411 -- s -> a "X" | a "Y";
412 -- a -> ("Z" a)?;
413 -- ]],
414 -- "s", 0,
415 -- [[
416 -- 1 -Z-> 1;
417 -- 1 -X-> 2(1);
418 -- 1 -Y-> 3(3);
419 -- ]]
420 -- )
421 -- end
423 TestFollow = {}
424 function TestFollow:test1()
425 assert_lookahead(
427 s -> a "X";
428 a -> "Y" "Y" | "Y";
430 "a", 0,
432 1 -Y-> 2;
433 2 -Y-> 3(1);
434 2 -X-> 4(3);
439 -- Won't work until lookahead can instruct an RTN to return.
440 -- function TestFollow:test1()
441 -- assert_lookahead(
442 -- [[
443 -- s -> a "X";
444 -- a -> "Y"?;
445 -- ]],
446 -- "a", 0,
447 -- [[
448 -- 1 -Y-> 2(1);
449 -- 1 -X-> 3(0);
450 -- ]]
451 -- )
452 -- end
454 LuaUnit:run()