2to3 (compiles, not tested)
[tag_parser.git] / tests / integration / tag_chart / run_integration_tests.py
blobf045d1dbd0976ed16cbee170031920d0a8a42f5e
1 # This Python file uses the following encoding: utf-8
2 # -*- coding: UTF-8 -*-
3 """ integration tests - does a specific parser puke on any of these TAGs? """
4 import argparse
5 import yaml
6 import os
7 import sys
8 from nltk.tree import Tree
9 import re
10 import logging
11 from nltk.grammar import Nonterminal
12 logging.basicConfig(level=logging.WARN, stream=sys.stdout)
14 def get_class(module_name, class_name):
15 return __import__(module_name, fromlist=[class_name]).__getattribute__(class_name)
17 BASEDIR = os.path.split(os.path.abspath(__file__))[0]
19 class ParseTests(object):
20 """this class is used to run full tests of the TAG parsers.
22 given a grammar, some sentences, and their expected parses, is the
23 output of a specified TAG parser correct?
24 """
26 def __init__(self, args):
27 self.__args=args
29 def _all_tests(self):
30 """assume the test dirs are all the dirs inside the directory
31 containing this script """
32 return [x for x in os.listdir(BASEDIR) if os.path.isdir(os.path.join(BASEDIR, x))]
34 def read_tests(self, test_path):
35 return list(yaml.load(open(os.path.join(test_path,
36 "tests.yaml")).read()).items())
38 def run_tests(self):
39 if self.__args.tests:
40 tests = self.__args.tests.split(',')
41 else:
42 tests = self._all_tests()
44 grammar_module_name, grammar_class_name = re.match('(.*)\.(\w+)', self.__args.grammar).groups()
45 grammar_class = get_class(grammar_module_name, grammar_class_name)
46 parser_module_name, parser_class_name = re.match('(.*)\.(\w+)', self.__args.parser).groups()
47 parser_class = get_class(parser_module_name, parser_class_name)
48 strategy_module_name, strategy_name = re.match('(.*)\.(\w+)', self.__args.strategy).groups()
49 strategy_obj = get_class(strategy_module_name, strategy_name)
51 print("testing w/ parser %s, strategy %s" % (parser_class, strategy_name))
52 print("%s tests" % (len(tests)))
54 total_passed = 0
55 total_failed = 0
56 for test in tests:
57 test_path = os.path.join(sys.path[0], test)
58 grammar = grammar_class(os.path.join(test_path, "grammar.yaml"))
59 parser = parser_class(grammar, strategy=strategy_obj)
61 print("starting to test grammar '%s':" % (test))
62 failures = []
63 passes = 0
64 for sentence, trueparse in self.read_tests(test_path):
65 if not trueparse:
66 raise Exception("there's something wrong w/ your parse, hombre (%s)" % (sentence))
68 if type(trueparse) is list: # multiple possible
69 passed = self._parse_all(parser, sentence, trueparse, failures)
70 else:
71 passed = self._parse_one(parser, sentence, trueparse, failures)
72 #passed = self._parse_all(parser, sentence, [trueparse], failures)
74 if passed:
75 passes += 1
77 if failures:
78 print(" %s tests passed, %s failed" % (passes, len(failures)))
79 for failure in failures:
80 print(" " + failure)
81 else:
82 print(" %s tests passed" % (passes))
83 total_passed += passes
84 total_failed += len(failures)
85 if total_failed:
86 print("TOTAL: %s tests passed, %s failed" % (total_passed, total_failed))
87 return 1
88 else:
89 print("TOTAL: %s tests passed" % (total_passed))
90 return 0
92 def _parse_all(self, parser, sentence, trueparses, failures):
93 if sentence == '':
94 tokens = []
95 else:
96 tokens = sentence.split(' ')
98 accepted = parser.accept(tokens)
99 trueparses = set((self._convert_true_parse(trueparse) for trueparse in trueparses))
101 if not accepted:
102 failures.append('parser did not accept \'%s\'' % (sentence))
103 return False
105 elif not self.__args.accept_only:
106 try:
107 parses = parser.nbest_parse(tokens)
108 except:
109 print(sentence)
110 raise
111 if parses is None or len(parses) == 0:
112 failures.append('parsing failed: \'%s\'' % (sentence))
113 return False
114 elif set(parses) != trueparses:
115 failures.append("parses not equal (expected '%s', got '%s')" % ([tree.pprint(margin=10000) for tree in trueparses], [tree.pprint(margin=10000) for tree in parses]))
116 return False
117 else:
118 return True
120 return True
122 def _convert_true_parse(self, trueparse):
123 tree = Tree(trueparse)
124 for pos in tree.treepositions():
125 subtree = tree[pos]
126 if isinstance(subtree, Tree):
127 subtree.node = Nonterminal(subtree.node)
128 tree= tree.freeze()
129 return tree
131 def _parse_one(self, parser, sentence, trueparse, failures):
132 if sentence == '':
133 tokens = []
134 else:
135 tokens = sentence.split(' ')
137 accepted = parser.accept(tokens)
138 trueparse = self._convert_true_parse(trueparse)
139 if not accepted:
140 failures.append('parser did not accept \'%s\'' % (sentence))
141 return False
143 elif not self.__args.accept_only:
144 try:
145 parse = parser.parse(tokens)
146 except:
147 print(sentence)
148 raise
149 if parse is None:
150 failures.append('parsing failed: \'%s\'' % (sentence))
151 return False
152 elif parse != trueparse:
153 failures.append("parses not equal (expected '%s', got '%s')" % (trueparse.pprint(margin=10000), parse.pprint(margin=10000)))
154 return False
155 else:
156 return True
158 return True
160 def parse_arguments():
161 parser = argparse.ArgumentParser(description='integration tests for TAG parsers')
162 parser.add_argument('parser', metavar='parser class', type=str,
163 help='a tag parser class')
164 parser.add_argument('-g', '--grammar', dest='grammar',
165 default='mjacob.nltk.grammar.TreeAdjoiningGrammar.TreeAdjoiningGrammar',
166 help='a tag grammar class (must be able to read yaml)')
167 parser.add_argument('-s', '--strategy', dest='strategy',
168 default='mjacob.nltk.parse.tag.earley.rules.TAG_EARLEY_STRATEGY',
169 help='the strategy used to parse')
170 parser.add_argument('-t', '--tests', dest='tests',
171 help='only test the specified grammars')
172 parser.add_argument('--accept-only', dest='accept_only', default=False, action="store_true")
173 return parser.parse_args()
175 def main():
176 args = parse_arguments()
177 test_runner = ParseTests(args)
178 exit_code = test_runner.run_tests()
179 exit(exit_code)
181 if __name__ == "__main__":
182 main()