Add more flexibility in how whitespace is specified.
[gazelle.git] / tests / test_ll.lua
blobafe8f1d89b8e2819920ed21b9b8cdcd33a658251
1 --[[--------------------------------------------------------------------
3 Gazelle: a system for building fast, reusable parsers
5 tests/test_ll.lua
7 Tests for the LL(*) lookahead generation. This is the among the
8 most complicated things the complier does, and it has a lot of edge
9 cases and failure modes, so this file is complicated and subtle.
11 Copyright (c) 2008 Joshua Haberman. See LICENSE for details.
13 --------------------------------------------------------------------]]--
15 require "luaunit"
16 require "ll"
17 require "fa_algorithms"
18 require "bootstrap/rtn"
19 require "pp"
21 function find_state(rule, slotnum)
22 if slotnum == 0 then
23 return rule.start
24 end
26 for s in each(rule:states()) do
27 for edge_val, dest_state, properties in s:transitions() do
28 if properties.slotnum == slotnum then
29 return s
30 end
31 end
32 end
34 error(string.format("No such slotnum (%d) for rule %s", slotnum, rule_str))
35 end
37 function get_target_for_slotnum(state, slotnum)
38 if slotnum == 0 then
39 return {0, 0}
40 else
41 for edge_val, dest_state, properties in state:transitions() do
42 if properties.slotnum == slotnum then
43 return {edge_val, dest_state}
44 end
45 end
46 end
47 error(string.format("Slotnum %d not found for state %s", slotnum, serialize(state, 6, " ")))
48 end
50 function parse_gla(str, rtn_state)
51 local stream = CharStream:new(str)
52 stream:ignore("whitespace")
53 local gla = fa.GLA:new()
54 gla.rtn_state = rtn_state
55 local states = {[1]=gla.start}
56 while not stream:eof() do
57 local statenum = tonumber(stream:consume_pattern("%d+"))
58 local state = states[statenum]
59 if not state then
60 error(string.format("GLA refers to state %d before it is used as a target",
61 statenum))
62 end
63 while stream:lookahead(1) ~= ";" do
64 stream:consume("-")
65 local term = stream:consume_pattern("%w+")
66 stream:consume("->")
67 local dest_state_num = tonumber(stream:consume_pattern("%d+"))
68 states[dest_state_num] = states[dest_state_num] or fa.GLAState:new()
69 local dest_state = states[dest_state_num]
70 if term == "EOF" then
71 state:add_transition(fa.eof, dest_state)
72 else
73 state:add_transition(term, dest_state)
74 end
75 if stream:lookahead(1) == "(" then
76 stream:consume("(")
77 local final_state_slotnum = tonumber(stream:consume_pattern("%d+"))
78 stream:consume(")")
79 dest_state.final = get_target_for_slotnum(rtn_state, final_state_slotnum)
80 end
81 state = dest_state
82 end
83 stream:consume(";")
84 end
86 return gla
87 end
89 function assert_lookahead(grammar_str, rule_str, slotnum, expected_gla_str, k)
90 local grammar = parse_grammar(CharStream:new(grammar_str))
91 grammar:assign_priorities()
92 grammar:determinize_rtns()
93 grammar:minimize_rtns()
95 local rule = grammar.rtns:get(rule_str)
96 local state = find_state(rule, slotnum)
97 local expected_gla = parse_gla(expected_gla_str, state)
99 compute_lookahead(grammar, k)
101 if not fa_isequal(expected_gla, state.gla) then
102 local bad = io.open("bad.dot", "w")
103 bad:write("digraph untitled {\n")
104 bad:write(state.gla:to_dot())
105 bad:write("}")
106 bad:close()
108 local good = io.open("good.dot", "w")
109 good:write("digraph untitled {\n")
110 good:write(expected_gla:to_dot())
111 good:write("}")
112 good:close()
114 os.execute("dot -Tpng -o good.png good.dot")
115 os.execute("dot -Tpng -o bad.png bad.dot")
117 error("GLAs were not equal: expected and actual are " ..
118 "in good.png and bad.png, respectively")
122 TestSimple = {}
123 function TestSimple:test1()
124 assert_lookahead(
126 s -> a | "X";
127 a -> "Y";
129 "s", 0,
131 1 -Y-> 2(1);
132 1 -X-> 3(2);
137 function TestSimple:test_multiple_recursions()
138 assert_lookahead(
140 s -> a | "X";
141 a -> b;
142 b -> "Y";
144 "s", 0,
146 1 -Y-> 2(1);
147 1 -X-> 3(2);
152 TestEpsilon = {}
153 function TestEpsilon:test1()
154 assert_lookahead(
156 s -> a "Z" | "X";
157 a -> "Y"?;
159 "s", 0,
161 1 -Y-> 2(1);
162 1 -Z-> 2;
163 1 -X-> 3(3);
168 function TestEpsilon:test2()
169 assert_lookahead(
171 s -> a | "X";
172 a -> "Y"? "Z" ;
174 "s", 0,
176 1 -Y-> 2(1);
177 1 -Z-> 2;
178 1 -X-> 3(2);
183 function TestEpsilon:test3()
184 assert_lookahead(
186 s -> a "Q" | "X";
187 a -> "Y"? "Z"? ;
189 "s", 0,
191 1 -Y-> 2(1);
192 1 -Z-> 2;
193 1 -Q-> 2;
194 1 -X-> 3(3);
199 function TestEpsilon:test4()
200 assert_lookahead(
202 s -> a "Q" | "X";
203 a -> b "Y"?;
204 b -> "Z"?;
206 "s", 0,
208 1 -Y-> 2(1);
209 1 -Z-> 2;
210 1 -Q-> 2;
211 1 -X-> 3(3);
216 TestMultipleNonterms = {}
217 function TestMultipleNonterms:test1()
218 assert_lookahead(
220 s -> a | b | c;
221 a -> "X";
222 b -> "Y";
223 c -> "Z";
225 "s", 0,
227 1 -X-> 2(1);
228 1 -Y-> 3(2);
229 1 -Z-> 4(3);
234 function TestMultipleNonterms:test22()
235 assert_lookahead(
237 s -> (a | b | c)? d;
238 a -> "X";
239 b -> "Y";
240 c -> "Z";
241 d -> "Q";
243 "s", 0,
245 1 -X-> 2(1);
246 1 -Y-> 3(2);
247 1 -Z-> 4(3);
248 1 -Q-> 5(4);
253 TestLL1 = {}
254 function TestLL1:test1()
255 assert_lookahead(
257 s -> "X" s?;
259 "s", 2,
261 1 -X-> 2(2);
262 1 -EOF-> 3(0);
267 TestLL2 = {}
268 function TestLL2:test1()
269 assert_lookahead(
271 s -> "X" "Y" | "X" "Z";
273 "s", 0,
275 1 -X-> 2;
276 2 -Y-> 3(1);
277 2 -Z-> 4(3);
282 function TestLL2:test2()
283 assert_lookahead(
285 s -> a "Y" | a "Z";
286 a -> "X";
288 "s", 0,
290 1 -X-> 2;
291 2 -Y-> 3(1);
292 2 -Z-> 4(3);
297 function TestLL2:test3()
298 assert_lookahead(
300 s -> a "Y" | a "Z";
301 a -> "X" | "Q";
303 "s", 0,
305 1 -X-> 2;
306 1 -Q-> 2;
307 2 -Y-> 3(1);
308 2 -Z-> 4(3);
313 function TestLL2:test3()
314 assert_lookahead(
316 s -> "X"? "Y" | "X" "Z";
318 "s", 0,
320 1 -Y-> 2(2);
321 1 -X-> 3;
322 3 -Y-> 4(1);
323 3 -Z-> 5(3);
328 function TestLL2:test3()
329 assert_lookahead(
331 s -> "X" s | "X" "Y";
333 "s", 0,
335 1 -X-> 2 -X-> 3(1);
336 2 -Y-> 4(3);
341 function TestLL2:test4()
342 assert_lookahead(
344 a -> b "X";
345 b -> c*;
346 c -> "X";
348 "b", 0,
350 1 -X-> 2 -X-> 3(1);
351 2 -EOF-> 4(0);
356 function TestLL2:test5()
357 assert_lookahead(
359 s -> "X"+ | "X" "Y";
361 "s", 0,
363 1 -X-> 2 -X-> 3(1);
364 2 -EOF-> 3;
365 2 -Y-> 4(2);
370 function TestLL2:test6()
371 assert_lookahead(
373 s -> a? "X" "X";
374 a -> "X" "Y" | "X" "Z";
376 "s", 0,
378 1 -X-> 2 -X-> 3(2);
379 2 -Y-> 4(1);
380 2 -Z-> 4;
385 TestLL3 = {}
386 function TestLL3:test1()
387 assert_lookahead(
389 s -> a "X" | a "Y";
390 a -> ("P" | "Q") ("P" | "Q");
392 "s", 0,
394 1 -P-> 2 -P-> 3;
395 1 -Q-> 2 -Q-> 3;
396 2 -Q-> 3;
397 2 -P-> 3;
398 3 -X-> 4(1);
399 3 -Y-> 5(3);
404 -- This is equivalent to the grammar on page 271 of The Definitive ANTLR Reference.
405 function TestLL3:test3()
406 assert_lookahead(
408 s -> "X" s "Y" | "X" "X" "Z";
410 "s", 0,
412 1 -X-> 2 -X-> 3;
413 3 -X-> 4(1);
414 3 -Z-> 5(4);
419 function TestLL3:test3()
420 assert_lookahead(
422 s -> a a "Y" | a a "Z";
423 a -> "X";
425 "s", 0,
427 1 -X-> 2 -X-> 3 -Y-> 4(1);
428 3 -Z-> 5(4);
433 TestEOF = {}
434 function TestEOF:test1()
435 assert_lookahead(
437 s -> "A" | "A" "B";
439 "s", 0,
441 1 -A-> 2;
442 2 -B-> 3(2);
443 2 -EOF-> 4(1);
448 function TestEOF:test2()
449 -- this is the example used by Terence Parr in his discussion of ANTLR 3.0's
450 -- lookahead analysis: http://www.antlr.org/blog/antlr3/lookahead.tml
451 assert_lookahead(
453 s -> a "A" | a "B";
454 a -> "A"?;
456 "s", 0,
458 1 -A-> 2;
459 1 -B-> 3(3);
460 2 -B-> 3(3);
461 2 -A-> 4(1);
462 2 -EOF-> 4(1);
467 function TestEOF:test3()
468 assert_lookahead(
470 s -> "A" a;
471 a -> "A"?;
473 "a", 0,
475 1 -A-> 2(1);
476 1 -EOF-> 3(0);
481 -- This is really a "follow" test
482 function TestEOF:test4()
483 assert_lookahead(
485 s -> "X" a "Y";
486 a -> b;
487 b -> "X"?;
489 "b", 0,
491 1 -X-> 2(1);
492 1 -Y-> 3(0);
498 TestLLStar = {}
499 function TestLLStar:test1()
500 assert_lookahead(
502 s -> a "X" | a "Y";
503 a -> "Z"*;
505 "s", 0,
507 1 -Z-> 1;
508 1 -X-> 2(1);
509 1 -Y-> 3(3);
514 -- Test lookahead that we can only compute correctly if we apply the
515 -- tail-recursion optimization.
516 function TestLLStar:test2()
517 assert_lookahead(
519 s -> a "X" | a "Y";
520 a -> ("Z" a)?;
522 "s", 0,
524 1 -Z-> 1;
525 1 -X-> 2(1);
526 1 -Y-> 3(3);
531 TestFollow = {}
532 function TestFollow:test1()
533 assert_lookahead(
535 s -> a "X";
536 a -> "Y" "Y" | "Y";
538 "a", 0,
540 1 -Y-> 2;
541 2 -Y-> 3(1);
542 2 -X-> 4(3);
547 function TestFollow:test2()
548 assert_lookahead(
550 s -> a "X";
551 a -> "Y"?;
553 "a", 0,
555 1 -Y-> 2(1);
556 1 -X-> 3(0);
561 function assert_fails_with_error(grammar_str, error_string)
562 local grammar = parse_grammar(CharStream:new(grammar_str))
563 grammar:assign_priorities()
564 grammar:determinize_rtns()
565 grammar:minimize_rtns()
567 local success, message = pcall(compute_lookahead, grammar)
568 if success then
569 error("Failed to fail!")
570 elseif not message:find(error_string) then
571 error("Failed with wrong message! Message was supposed to contain "
572 .. error_string .. ", but it was: " .. message)
576 function assert_left_recursive(grammar_str)
577 assert_fails_with_error(grammar_str, "it is left%-recursive")
580 function assert_nonregular(grammar_str)
581 assert_fails_with_error(grammar_str, "one lookahead language was nonregular, others were not all fixed")
584 function assert_ambiguous(grammar_str)
585 assert_fails_with_error(grammar_str, "Ambiguous grammar")
588 function assert_no_nonrecursive_alt(grammar_str)
589 assert_fails_with_error(grammar_str, "no non%-recursive alternative")
592 function assert_not_ll(grammar_str)
593 assert_fails_with_error(grammar_str, "It is not Strong%-LL or full%-LL")
596 function assert_never_taken(grammar_str)
597 assert_fails_with_error(grammar_str, "will never be taken")
600 function assert_resolution_not_supported(grammar_str)
601 assert_fails_with_error(grammar_str, "cannot support this resolution of the ambiguity")
604 TestDetectNonLLStar = {}
605 function TestDetectNonLLStar:test_left_recursive()
606 assert_left_recursive(
608 s -> s? "X";
613 function TestDetectNonLLStar:test_left_recursive2()
614 assert_left_recursive(
616 s -> a | "X";
617 a -> s | "Y";
622 function TestDetectNonLLStar:test_left_recursive3()
623 assert_left_recursive(
625 s -> (s "X")?;
630 -- Hmm, technically this test is broken because this is left-recursion
631 -- that is getting incorrectly reported as ambiguity instead. But
632 -- fixing this one isn't a high priority right now.
633 -- function TestDetectNonLLStar:test_left_recursive4()
634 -- assert_left_recursive(
635 -- [[
636 -- s -> a b?;
637 -- a -> "X"?;
638 -- b -> s;
639 -- ]]
640 -- )
641 -- end
643 function TestDetectNonLLStar:test_nonregular()
644 assert_nonregular(
646 s -> e "%" | e "!";
647 e -> "(" e ")" | "ID";
652 function TestDetectNonLLStar:test_fails_heuristic_but_is_ll()
653 assert_nonregular(
655 s -> "X"* "Y" "Y" "Z"| "X" c;
656 c -> "Y" c "Y" | "Q";
659 assert_lookahead(
661 s -> "X"* "Y" "Y" "Z"| "X" c;
662 c -> "Y" c "Y" | "Q";
664 "s", 0,
666 1 -Y-> 2(2);
667 1 -X-> 3 -Y-> 4 -Y-> 5 -Y-> 6(5);
668 3 -Q-> 6;
669 4 -Q-> 6;
670 5 -Q-> 6;
671 5 -Z-> 7(1);
672 3 -X-> 7;
678 function TestDetectNonLLStar:test_not_full_ll_1()
679 assert_not_ll(
681 s -> a a;
682 a -> b;
683 b -> "X"*;
688 function TestDetectNonLLStar:test_not_full_ll_1()
689 assert_not_ll(
691 s -> ("X" s "X")?;
696 function TestDetectNonLLStar:test_not_full_ll_2()
697 assert_not_ll(
699 s -> "if" e "then" s ("else" s)? | e;
700 e -> "5";
706 TestAmbiguity = {}
707 function TestAmbiguity:test1()
708 assert_ambiguous(
710 a -> b | c;
711 b -> c;
712 c -> "X";
717 function TestAmbiguity:test2()
718 assert_ambiguous(
720 a -> b c "Y";
721 b -> "X" ?;
722 c -> "X" ?;
727 function TestAmbiguity:test3()
728 assert_ambiguous(
730 a -> (b | c) "Y";
731 b -> "X";
732 c -> "X";
737 function TestAmbiguity:test4()
738 assert_ambiguous(
740 s -> a "X"?;
741 a -> "X"?;
746 function TestAmbiguity:test5()
747 assert_ambiguous(
749 s -> a "X"*;
750 a -> "X"*;
755 function TestAmbiguity:test6()
756 assert_ambiguous(
758 s -> a? a?;
759 a -> "X";
764 function TestAmbiguity:test7()
765 assert_ambiguous(
767 s -> "X" | "X";
772 function TestAmbiguity:test8()
773 assert_ambiguous(
775 s -> "X"? | "X";
780 function TestAmbiguity:test9()
781 assert_ambiguous(
783 s -> "X"? | "X"?;
788 function TestAmbiguity:test10()
789 assert_ambiguous(
791 s -> a b;
792 a -> "X"*;
793 b -> "X"*;
798 function TestAmbiguity:test11()
799 assert_ambiguous(
801 s -> a*;
802 a -> "X"?;
807 function TestAmbiguity:test12()
808 assert_ambiguous(
810 s -> a*;
811 a -> "X"*;
816 -- These tests are currently failing because I have not implemented this
817 -- check yet!
818 TestNoNonRecursiveAlt = {}
819 function TestNoNonRecursiveAlt:test1()
820 assert_no_nonrecursive_alt(
822 a -> a;
827 function TestNoNonRecursiveAlt:test2()
828 assert_no_nonrecursive_alt(
830 a -> "X" a;
835 function TestNoNonRecursiveAlt:test3()
836 assert_no_nonrecursive_alt(
838 a -> "X" b;
839 b -> a;
844 function TestNoNonRecursiveAlt:test4()
845 assert_no_nonrecursive_alt(
847 s -> a b;
848 a -> "X"?;
849 b -> s;
854 function TestNoNonRecursiveAlt:test4()
855 assert_no_nonrecursive_alt(
857 a -> "X" b;
858 b -> "X" a;
863 TestAmbiguityResolution = {}
864 function TestAmbiguityResolution:test1()
865 assert_lookahead(
867 s -> "X" "Y" / "X"+ "Y";
869 "s", 0,
871 1 -X-> 2 -Y-> 3(1);
872 2 -X-> 4(3);
877 function TestAmbiguityResolution:test2()
878 assert_lookahead(
880 s -> "if" e "then" s ("else" s)?+ | e;
881 e -> "5";
883 "s", 5,
885 1 -else-> 2(5);
886 1 -EOF-> 3(0);
891 function TestAmbiguityResolution:test_never_taken1()
892 assert_never_taken(
894 s -> "X" "Y" / "X" "Y";
899 -- This test is currently failing -- it's throwing the wrong error message.
900 -- Instead of warning you that one transition is never taken, it just says
901 -- that Gazelle can't support this resolution of the ambiguity.
902 function TestAmbiguityResolution:test_never_taken2()
903 assert_never_taken(
905 s -> a b / "X" "Y";
906 a -> "X";
907 b -> "Y";
912 function TestAmbiguityResolution:test_never_taken3()
913 assert_never_taken(
915 s -> "X"* "Y" / "X" "Y";
920 function TestAmbiguityResolution:test_resolution_not_supported1()
921 assert_resolution_not_supported(
923 s -> "S" / "S" s "S";
928 function TestDetectNonLLStar:test_resolution_not_supported2()
929 assert_resolution_not_supported(
931 s -> "if" e "then" s ("else" s)?- | e;
932 e -> "5";
938 LuaUnit:run(unpack(arg))