1 # This Python file uses the following encoding: utf-8
3 Created on May 30, 2011
8 from mjacob
.nltk
.grammar
.TreeAdjoiningGrammar
import TreeAdjoiningGrammar
9 from mjacob
.nltk
.parse
.tag
.earley
.rules
import TAG_EARLEY_STRATEGY
10 from mjacob
.nltk
.parse
.tag
.TagChartParser
import TagChartParser
15 raw_data
= nltk
.data
.load('grammars/large_grammars/atis_sentences.txt', format
='raw')
16 sentences
= [x
[0] for x
in nltk
.parse
.util
.extract_test_sentences(raw_data
)][:1]
18 print("creating grammar")
20 grammar
= TreeAdjoiningGrammar(cfg
=nltk
.data
.load('grammars/large_grammars/atis.cfg'))
22 grammar
= TreeAdjoiningGrammar("../integration/tag_chart/degenerate_1/grammar.yaml")
24 grammar
= TreeAdjoiningGrammar("../integration/tag_chart/cross_serial_2/grammar.yaml")
26 print("creating parser")
27 parser
= TagChartParser(grammar
)
28 print("parser created")
31 chart
= parser
.chart_parse(sentences
[0])
33 chart
= parser
.chart_parse(['John'])
35 chart
= parser
.chart_parse("Jan Piet Marie de kinderen zag helpen leren zwemmen".split(' '))
39 print("sentence accpeted: %s" % (chart
.accept(grammar
.start(), TAG_EARLEY_STRATEGY
)))
40 print(chart
.parses(grammar
.start(), TAG_EARLEY_STRATEGY
))