1 # This Python file uses the following encoding: utf-8
3 Created on Apr 29, 2011
5 compute some statistics of my parsers and some nltk parsers
10 from ParsePerformanceTester
import ParsePerformanceTester
, NLTK_BEST
, MY_PARSER
12 from mjacob
.algorithms
.generate_random
import generate_random_sentence
16 grammar
= nltk
.data
.load('grammars/sample_grammars/toy.cfg')
17 grammar_string
= "nltk.data.load('grammars/sample_grammars/toy.cfg')"
21 for tree_depth
in range(4,8):
22 sentence_file
= 'trivial_sentences_%s.yaml' % (tree_depth
,)
23 if os
.path
.exists(sentence_file
):
24 sentences
= yaml
.load(open(sentence_file
))
26 sentences
= [generate_random_sentence(grammar
, tree_depth
).split(' ')
28 yaml
.dump(sentences
, open(sentence_file
, 'w'))
30 tester
= ParsePerformanceTester(grammar_string
,
33 print("%s %s" % (tree_depth
, tester
.average_sentence_length()))
34 for parser
, parser_import
in sorted(NLTK_BEST
.items()):
35 nltk_time
= 1000*tester
.run(parser_import
, method
="parse")
36 print(" %s %.2f" % (parser
, nltk_time
))
37 for parser
, parser_import
in sorted(MY_PARSER
.items()):
38 my_time
= 1000*tester
.run(parser_import
, method
="parse")
39 print(" %s %.2f" % (parser
, my_time
))
41 print(" ratio: %s" % (my_time
/nltk_time
))