1 # This Python file uses the following encoding: utf-8
2 # -*- coding: UTF-8 -*-
3 """ integration tests - does a specific parser puke on any of these TAGs? """
8 from nltk
.tree
import Tree
11 from nltk
.grammar
import Nonterminal
12 logging
.basicConfig(level
=logging
.WARN
, stream
=sys
.stdout
)
14 def get_class(module_name
, class_name
):
15 return __import__(module_name
, fromlist
=[class_name
]).__getattribute
__(class_name
)
17 BASEDIR
= os
.path
.split(os
.path
.abspath(__file__
))[0]
19 class ParseTests(object):
20 """this class is used to run full tests of the TAG parsers.
22 given a grammar, some sentences, and their expected parses, is the
23 output of a specified TAG parser correct?
26 def __init__(self
, args
):
30 """assume the test dirs are all the dirs inside the directory
31 containing this script """
32 return [x
for x
in os
.listdir(BASEDIR
) if os
.path
.isdir(os
.path
.join(BASEDIR
, x
))]
34 def read_tests(self
, test_path
):
35 return list(yaml
.load(open(os
.path
.join(test_path
,
36 "tests.yaml")).read()).items())
40 tests
= self
.__args
.tests
.split(',')
42 tests
= self
._all
_tests
()
44 grammar_module_name
, grammar_class_name
= re
.match('(.*)\.(\w+)', self
.__args
.grammar
).groups()
45 grammar_class
= get_class(grammar_module_name
, grammar_class_name
)
46 parser_module_name
, parser_class_name
= re
.match('(.*)\.(\w+)', self
.__args
.parser
).groups()
47 parser_class
= get_class(parser_module_name
, parser_class_name
)
48 strategy_module_name
, strategy_name
= re
.match('(.*)\.(\w+)', self
.__args
.strategy
).groups()
49 strategy_obj
= get_class(strategy_module_name
, strategy_name
)
51 print("testing w/ parser %s, strategy %s" % (parser_class
, strategy_name
))
52 print("%s tests" % (len(tests
)))
57 test_path
= os
.path
.join(sys
.path
[0], test
)
58 grammar
= grammar_class(os
.path
.join(test_path
, "grammar.yaml"))
59 parser
= parser_class(grammar
, strategy
=strategy_obj
)
61 print("starting to test grammar '%s':" % (test
))
64 for sentence
, trueparse
in self
.read_tests(test_path
):
66 raise Exception("there's something wrong w/ your parse, hombre (%s)" % (sentence
))
68 if type(trueparse
) is list: # multiple possible
69 passed
= self
._parse
_all
(parser
, sentence
, trueparse
, failures
)
71 passed
= self
._parse
_one
(parser
, sentence
, trueparse
, failures
)
72 #passed = self._parse_all(parser, sentence, [trueparse], failures)
78 print(" %s tests passed, %s failed" % (passes
, len(failures
)))
79 for failure
in failures
:
82 print(" %s tests passed" % (passes
))
83 total_passed
+= passes
84 total_failed
+= len(failures
)
86 print("TOTAL: %s tests passed, %s failed" % (total_passed
, total_failed
))
89 print("TOTAL: %s tests passed" % (total_passed
))
92 def _parse_all(self
, parser
, sentence
, trueparses
, failures
):
96 tokens
= sentence
.split(' ')
98 accepted
= parser
.accept(tokens
)
99 trueparses
= set((self
._convert
_true
_parse
(trueparse
) for trueparse
in trueparses
))
102 failures
.append('parser did not accept \'%s\'' % (sentence
))
105 elif not self
.__args
.accept_only
:
107 parses
= parser
.nbest_parse(tokens
)
111 if parses
is None or len(parses
) == 0:
112 failures
.append('parsing failed: \'%s\'' % (sentence
))
114 elif set(parses
) != trueparses
:
115 failures
.append("parses not equal (expected '%s', got '%s')" % ([tree
.pprint(margin
=10000) for tree
in trueparses
], [tree
.pprint(margin
=10000) for tree
in parses
]))
122 def _convert_true_parse(self
, trueparse
):
123 tree
= Tree(trueparse
)
124 for pos
in tree
.treepositions():
126 if isinstance(subtree
, Tree
):
127 subtree
.node
= Nonterminal(subtree
.node
)
131 def _parse_one(self
, parser
, sentence
, trueparse
, failures
):
135 tokens
= sentence
.split(' ')
137 accepted
= parser
.accept(tokens
)
138 trueparse
= self
._convert
_true
_parse
(trueparse
)
140 failures
.append('parser did not accept \'%s\'' % (sentence
))
143 elif not self
.__args
.accept_only
:
145 parse
= parser
.parse(tokens
)
150 failures
.append('parsing failed: \'%s\'' % (sentence
))
152 elif parse
!= trueparse
:
153 failures
.append("parses not equal (expected '%s', got '%s')" % (trueparse
.pprint(margin
=10000), parse
.pprint(margin
=10000)))
160 def parse_arguments():
161 parser
= argparse
.ArgumentParser(description
='integration tests for TAG parsers')
162 parser
.add_argument('parser', metavar
='parser class', type=str,
163 help='a tag parser class')
164 parser
.add_argument('-g', '--grammar', dest
='grammar',
165 default
='mjacob.nltk.grammar.TreeAdjoiningGrammar.TreeAdjoiningGrammar',
166 help='a tag grammar class (must be able to read yaml)')
167 parser
.add_argument('-s', '--strategy', dest
='strategy',
168 default
='mjacob.nltk.parse.tag.earley.rules.TAG_EARLEY_STRATEGY',
169 help='the strategy used to parse')
170 parser
.add_argument('-t', '--tests', dest
='tests',
171 help='only test the specified grammars')
172 parser
.add_argument('--accept-only', dest
='accept_only', default
=False, action
="store_true")
173 return parser
.parse_args()
176 args
= parse_arguments()
177 test_runner
= ParseTests(args
)
178 exit_code
= test_runner
.run_tests()
181 if __name__
== "__main__":