2to3 (compiles, not tested)
[tag_parser.git] / tests / stand_alone / test_tag_chart_parser.py
blobb2f26edf5793ede8d88e69c711924598ff2d12d6
1 # This Python file uses the following encoding: utf-8
2 '''
3 Created on May 30, 2011
5 @author: mjacob
6 '''
7 import nltk
8 from mjacob.nltk.grammar.TreeAdjoiningGrammar import TreeAdjoiningGrammar
9 from mjacob.nltk.parse.tag.earley.rules import TAG_EARLEY_STRATEGY
10 from mjacob.nltk.parse.tag.TagChartParser import TagChartParser
12 VAR = 1
14 raw_data = nltk.data.load('grammars/large_grammars/atis_sentences.txt', format='raw')
15 sentences = [x[0] for x in nltk.parse.util.extract_test_sentences(raw_data)][:1]
17 if VAR == 0:
18 grammar = TreeAdjoiningGrammar(cfg=nltk.data.load('grammars/large_grammars/atis.cfg'))
19 elif VAR == 1:
20 grammar = TreeAdjoiningGrammar("../integration/tag_chart/degenerate_1/grammar.yaml")
21 elif VAR == 2:
22 grammar = TreeAdjoiningGrammar("../integration/tag_chart/cross_serial_2/grammar.yaml")
24 parser = TagChartParser(grammar)
26 if VAR == 0:
27 chart = parser.chart_parse(sentences[0])
28 elif VAR == 1:
29 chart = parser.chart_parse(['John'])
30 elif VAR == 2:
31 chart = parser.chart_parse("Jan Piet Marie de kinderen zag helpen leren zwemmen".split(' '))
33 print(chart)
34 print("\n\n\n")
35 print(chart.parses(grammar.start().symbol(), TAG_EARLEY_STRATEGY))