1 # This Python file uses the following encoding: utf-8
3 Created on May 17, 2011
7 from timeit
import Timer
10 "NLTK's ChartParser": "from nltk import ChartParser",
11 "NLTK's LeftCornerChartParser": "from nltk import LeftCornerChartParser",
12 "NLTK's BottomUpChartParser":"from nltk import BottomUpChartParser",
13 "NLTK's BottomUpLeftCornerChartParser":"from nltk import BottomUpLeftCornerChartParser",
14 "NLTK's TopDownChartParser":"from nltk import TopDownChartParser",
16 "NLTK's IncrementalLeftCornerChartParser": "from nltk.parse.earleychart import IncrementalLeftCornerChartParser",
17 "NLTK's IncrementalChartParser": "from nltk.parse.earleychart import IncrementalChartParser",
18 "NLTK's EarleyChartParser": "from nltk.parse.earleychart import EarleyChartParser",
19 "NLTK's IncrementalBottomUpChartParser": "from nltk.parse.earleychart import IncrementalBottomUpChartParser",
20 "NLTK's IncrementalBottomUpLeftCornerChartParser": "from nltk.parse.earleychart import IncrementalBottomUpLeftCornerChartParser",
21 "NLTK's IncrementalTopDownChartParser": "from nltk.parse.earleychart import IncrementalTopDownChartParser",
24 MY_PARSER
= { "TAG ChartParser": "from mjacob.nltk.parse.tag.TagChartParser import TagChartParser",
25 "PV TAG ChartParser": "from mjacob.nltk.parse.tag.TagChartParser import PrefixValidTagChartParser",}
26 NLTK_BEST
= { "NLTK's IncrementalLeftCornerChartParser": "from nltk.parse.earleychart import IncrementalLeftCornerChartParser" }
34 parser = Parser(grammar)
38 for sentence in sentences:
42 class ParsePerformanceTester(object):
43 """class to use in performance testing parsers"""
44 def __init__(self
, grammar
, sentences
, number_of_iterations
= 3):
45 self
.__sentences
= sentences
46 self
.__grammar
= grammar
48 self
.__number
_of
_iterations
= number_of_iterations
50 def average_sentence_length(self
):
51 return sum(len(sent
) for sent
in self
.__sentences
) / (1.0*len(self
.__sentences
))
53 def run(self
, parser_import
, method
="parse"):
54 test
= TEST
% (method
)
56 self
.__setup
% (parser_import
,
60 result_time
= timer
.timeit(number
=self
.__number
_of
_iterations
)
61 return result_time
/(self
.__number
_of
_iterations
*len(self
.__sentences
))