1 # This Python file uses the following encoding: utf-8
3 Created on May 17, 2011
5 compare the performance of various parsers using the ATIS grammar.
7 (extracted from a treebank of the DARPA ATIS3
8 http://www.informatics.sussex.ac.uk/research/groups/nlp/carroll/cfg-resources/
9 context free, *NOT* CNF (nodes w/ 10 children), 5517 productions, 549 non-terminals, 925 terminals
10 average sentence length: 11.408163265306122
12 Note that there are slight differences in the way that my parsers and the NLTK
13 parsers are invoked. Due to a design oversight, my parsers cannot feasibly
14 compute the parse trees of complex senteces with large grammars, so instead
15 I merely ask whether they recognize a string as valid. As the NLTK parsers
16 do not have a similar method, I cannot compare them directly.
18 In any event, my parsers cannot even recognize sentences from this grammar
19 in a reasonable length of time.
24 from ParsePerformanceTester
import ParsePerformanceTester
, NLTK_BEST
, MY_PARSER
26 grammar
= nltk
.data
.load('grammars/large_grammars/atis.cfg')
27 grammar_string
= "nltk.data.load('grammars/large_grammars/atis.cfg')"
29 raw_data
= nltk
.data
.load('grammars/large_grammars/atis_sentences.txt', format
='raw')
30 sentences
= [x
[0] for x
in nltk
.parse
.util
.extract_test_sentences(raw_data
)]
32 tester
= ParsePerformanceTester(grammar_string
,
35 parser
, parser_import
= list(NLTK_BEST
.items())[0]
36 nltk_time
= 1000*tester
.run(parser_import
, method
="parse")
37 parser
, parser_import
= list(MY_PARSER
.items())[0]
38 my_time1
= 1000*tester
.run(parser_import
, method
="parse")
39 parser
, parser_import
= list(MY_PARSER
.items())[1]
40 my_time2
= 1000*tester
.run(parser_import
, method
="parse")
42 print(" sentence length NLTK %s ratio %s ratio2" % (list(MY_PARSER
.items())[0][0], list(MY_PARSER
.items())[1][0]))
43 print(" %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f" % (tester
.average_sentence_length(), nltk_time
, my_time1
, my_time1
/nltk_time
, my_time2
, my_time2
/nltk_time
))