a bit better, but prefix-valid is still slower. why did i bother?
[tag_parser.git] / tests / performance / ParsePerformanceTester.py
blob3e000f57c875fa13762fe7166131d0e26c50ecaa
1 # This Python file uses the following encoding: utf-8
2 '''
3 Created on May 17, 2011
5 @author: mjacob
6 '''
7 from timeit import Timer
9 NLTK_PARSERS = {
10 "NLTK's ChartParser": "from nltk import ChartParser",
11 "NLTK's LeftCornerChartParser": "from nltk import LeftCornerChartParser",
12 "NLTK's BottomUpChartParser":"from nltk import BottomUpChartParser",
13 "NLTK's BottomUpLeftCornerChartParser":"from nltk import BottomUpLeftCornerChartParser",
14 "NLTK's TopDownChartParser":"from nltk import TopDownChartParser",
16 "NLTK's IncrementalLeftCornerChartParser": "from nltk.parse.earleychart import IncrementalLeftCornerChartParser",
17 "NLTK's IncrementalChartParser": "from nltk.parse.earleychart import IncrementalChartParser",
18 "NLTK's EarleyChartParser": "from nltk.parse.earleychart import EarleyChartParser",
19 "NLTK's IncrementalBottomUpChartParser": "from nltk.parse.earleychart import IncrementalBottomUpChartParser",
20 "NLTK's IncrementalBottomUpLeftCornerChartParser": "from nltk.parse.earleychart import IncrementalBottomUpLeftCornerChartParser",
21 "NLTK's IncrementalTopDownChartParser": "from nltk.parse.earleychart import IncrementalTopDownChartParser",
24 MY_PARSER = { "TAG ChartParser": "from mjacob.nltk.parse.tag.TagChartParser import TagChartParser",
25 "PV TAG ChartParser": "from mjacob.nltk.parse.tag.TagChartParser import PrefixValidTagChartParser",}
26 NLTK_BEST = { "NLTK's IncrementalLeftCornerChartParser": "from nltk.parse.earleychart import IncrementalLeftCornerChartParser" }
28 SETUP = """
29 import nltk
30 %s as Parser
32 grammar = %s
33 sentences = %s
34 parser = Parser(grammar)
35 """
37 TEST = """
38 for sentence in sentences:
39 parser.%s(sentence)
40 """
42 class ParsePerformanceTester(object):
43 """class to use in performance testing parsers"""
44 def __init__(self, grammar, sentences, number_of_iterations = 3):
45 self.__sentences = sentences
46 self.__grammar = grammar
47 self.__setup = SETUP
48 self.__number_of_iterations = number_of_iterations
50 def average_sentence_length(self):
51 return sum(len(sent) for sent in self.__sentences) / (1.0*len(self.__sentences))
53 def run(self, parser_import, method="parse"):
54 test = TEST % (method)
55 timer = Timer(test,
56 self.__setup % (parser_import,
57 self.__grammar,
58 self.__sentences))
60 result_time = timer.timeit(number=self.__number_of_iterations)
61 return result_time/(self.__number_of_iterations*len(self.__sentences))