2to3 (compiles, not tested)
[tag_parser.git] / demo / NLTKTagParserDemo.py
blob958dbef337062ab870a4d2af1ced5f87a3b981ee
1 # This Python file uses the following encoding: utf-8
2 '''
3 Created on Jun 14, 2011
5 @author: mjacob
6 '''
7 from mjacob.nltk.grammar.TreeAdjoiningGrammar import TreeAdjoiningGrammar
8 from mjacob.nltk.parse.tag.TagChartParser import TagChartParser
9 from itertools import chain
10 import yaml
13 l4_grammar = TreeAdjoiningGrammar(filename='../tests/integration/tag_chart/L4-trinary/grammar.yaml')
14 l4_parser = TagChartParser(l4_grammar)
16 print("Parsing non-context-free languages")
17 print()
18 print("EXAMPLE 1: L4 {aⁿbⁿcⁿdⁿ : n∈ℕ}")
19 print("see https://secure.wikimedia.org/wikipedia/en/wiki/Context-sensitive_grammar")
20 print(" %s" % (l4_grammar))
21 for production in chain(sorted(l4_grammar.productions(is_auxiliary=False)), sorted(l4_grammar.productions(is_auxiliary=True))):
22 print(" %s" % (production))
23 print()
24 for n in 3,4:
25 string = "a"*n + "b"*n + "c"*n + "d"*n
26 tokens = tuple(c for c in string)
27 print(" parsing '%s': " % (string,))
28 for parse in l4_parser.nbest_parse(tokens):
29 print(" %s" % (parse.pprint(margin=100000)))
31 print()
32 print()
34 xs_grammar = TreeAdjoiningGrammar(filename='../tests/integration/tag_chart/cross_serial_2/grammar.yaml')
35 xs_parser = TagChartParser(xs_grammar)
37 print("EXAMPLE 2: Cross serial dependencies")
38 print("see http://www.let.rug.nl/~vannoord/papers/acl94/node5.html")
39 print(" %s" % (xs_grammar))
40 for production in chain(sorted(xs_grammar.productions(is_auxiliary=False)), sorted(xs_grammar.productions(is_auxiliary=True))):
41 print(" %s" % (production))
42 print()
43 examples = yaml.load(open('../tests/integration/tag_chart/cross_serial_2/tests.yaml').read())
44 for string in examples:
45 tokens = string.split(' ')
46 print(" parsing '%s': " % (string,))
47 for parse in xs_parser.nbest_parse(tokens):
48 print(" %s" % (parse.pprint(margin=100000)))
50 print()
51 print()
53 elephant_grammar = TreeAdjoiningGrammar(filename='../tests/integration/tag_chart/elephant/grammar.yaml')
54 elephant_parser = TagChartParser(elephant_grammar)
55 print("Example 3: An ambiguous sentence")
56 print(" %s" % (elephant_grammar))
57 for production in chain(sorted(elephant_grammar.productions(is_auxiliary=False)), sorted(elephant_grammar.productions(is_auxiliary=True))):
58 print(" %s" % (production))
59 print()
61 examples = yaml.load(open('../tests/integration/tag_chart/elephant/tests.yaml').read())
62 for string in examples:
63 tokens = string.split(' ')
64 print(" parsing '%s': " % (string,))
65 for parse in elephant_parser.nbest_parse(tokens):
66 print(" %s" % (parse.pprint(margin=100000)))
68 print()
69 print()
71 print("Performance testing results")
72 with open('../tests/performance/PERFORMANCE_RESULTS.txt') as file:
73 print(file.read())