2to3 (compiles, not tested)
[tag_parser.git] / tests / performance / test_small_grammar.py
blob75f84e0b45606d6f45537eef3332f840411c10f7
1 # This Python file uses the following encoding: utf-8
2 '''
3 Created on Apr 29, 2011
5 compute some statistics of my parsers and some nltk parsers
6 using the toy grammar.
8 @author: mjacob
9 '''
10 from ParsePerformanceTester import ParsePerformanceTester, NLTK_BEST, MY_PARSER
11 import nltk
12 from mjacob.algorithms.generate_random import generate_random_sentence
13 import yaml
14 import os
16 grammar = nltk.data.load('grammars/sample_grammars/toy.cfg')
17 grammar_string = "nltk.data.load('grammars/sample_grammars/toy.cfg')"
21 for tree_depth in range(4,8):
22 sentence_file = 'trivial_sentences_%s.yaml' % (tree_depth,)
23 if os.path.exists(sentence_file):
24 sentences = yaml.load(open(sentence_file))
25 else:
26 sentences = [generate_random_sentence(grammar, tree_depth).split(' ')
27 for i in range(100)]
28 yaml.dump(sentences, open(sentence_file, 'w'))
30 tester = ParsePerformanceTester(grammar_string,
31 sentences)
33 print("%s %s" % (tree_depth, tester.average_sentence_length()))
34 for parser, parser_import in sorted(NLTK_BEST.items()):
35 nltk_time = 1000*tester.run(parser_import, method="parse")
36 print(" %s %.2f" % (parser, nltk_time))
37 for parser, parser_import in sorted(MY_PARSER.items()):
38 my_time = 1000*tester.run(parser_import, method="parse")
39 print(" %s %.2f" % (parser, my_time))
41 print(" ratio: %s" % (my_time/nltk_time))