From b15d3c67f81ab5a9ec29065813781b9b911a02a9 Mon Sep 17 00:00:00 2001 From: Micah Jacob Date: Thu, 11 Aug 2011 16:45:04 -0400 Subject: [PATCH] 2to3 (compiles, not tested) --- .pydevproject | 4 +- demo/NLTKTagParserDemo.py | 60 +++++++++++----------- src/mjacob/algorithms/deduction_iterator.py | 3 +- src/mjacob/algorithms/generate_random.py | 8 +-- src/mjacob/annotations/memoized.py | 20 ++++---- src/mjacob/collections/FrozenIndex.py | 21 ++++---- src/mjacob/nltk/grammar/TagNonterminal.py | 6 +-- src/mjacob/nltk/grammar/TagProduction.py | 8 +-- src/mjacob/nltk/grammar/TreeAdjoiningGrammar.py | 5 +- src/mjacob/nltk/parse/tag/AbstractTagChartRule.py | 2 +- src/mjacob/nltk/parse/tag/TagChart.py | 12 ++--- src/mjacob/nltk/parse/tag/TreeBuilderI.py | 4 +- src/mjacob/nltk/parse/tag/earley/TagEdge.py | 15 ++++-- .../parse/tag/prefix_valid_earley/PVTagEdge.py | 11 ++-- src/mjacob/nltk/parse/tag/strategies.py | 5 +- .../integration/tag_chart/run_integration_tests.py | 26 +++++----- tests/integration/tag_chart/test_tag_with_cfg.py | 28 +++++----- tests/performance/dedution_iterators.py | 12 ++--- tests/performance/test_large_grammar.py | 10 ++-- tests/performance/test_medium_grammar.py | 18 +++---- tests/performance/test_small_grammar.py | 12 ++--- tests/stand_alone/counting.py | 6 +-- tests/stand_alone/cross_serial.py | 2 +- tests/stand_alone/elephant.py | 2 +- tests/stand_alone/empty.py | 2 +- tests/stand_alone/prefix_valid_testing.py | 2 +- tests/stand_alone/prefix_valid_testing2.py | 2 +- tests/stand_alone/test7.py | 2 +- tests/stand_alone/test_tag_chart_parser.py | 6 +-- tests/stand_alone/test_tag_chart_parser2.py | 14 ++--- tests/stand_alone/trap.py | 2 +- 31 files changed, 168 insertions(+), 162 deletions(-) diff --git a/.pydevproject b/.pydevproject index a9791e1..723f296 100644 --- a/.pydevproject +++ b/.pydevproject @@ -2,8 +2,8 @@ -python 2.7 -python 2.7 +/Library/Frameworks/Python.framework/Versions/3.2/bin/python3.2 +python 3.0 /NLTKTagParser/src /NLTKTagParser/tests/integration diff --git a/demo/NLTKTagParserDemo.py b/demo/NLTKTagParserDemo.py index 7711e74..958dbef 100644 --- a/demo/NLTKTagParserDemo.py +++ b/demo/NLTKTagParserDemo.py @@ -13,61 +13,61 @@ import yaml l4_grammar = TreeAdjoiningGrammar(filename='../tests/integration/tag_chart/L4-trinary/grammar.yaml') l4_parser = TagChartParser(l4_grammar) -print "Parsing non-context-free languages" -print -print "EXAMPLE 1: L4 {aⁿbⁿcⁿdⁿ : n∈ℕ}" -print "see https://secure.wikimedia.org/wikipedia/en/wiki/Context-sensitive_grammar" -print " %s" % (l4_grammar) +print("Parsing non-context-free languages") +print() +print("EXAMPLE 1: L4 {aⁿbⁿcⁿdⁿ : n∈ℕ}") +print("see https://secure.wikimedia.org/wikipedia/en/wiki/Context-sensitive_grammar") +print(" %s" % (l4_grammar)) for production in chain(sorted(l4_grammar.productions(is_auxiliary=False)), sorted(l4_grammar.productions(is_auxiliary=True))): - print " %s" % (production) -print + print(" %s" % (production)) +print() for n in 3,4: string = "a"*n + "b"*n + "c"*n + "d"*n tokens = tuple(c for c in string) - print " parsing '%s': " % (string,) + print(" parsing '%s': " % (string,)) for parse in l4_parser.nbest_parse(tokens): - print " %s" % (parse.pprint(margin=100000)) + print(" %s" % (parse.pprint(margin=100000))) -print -print +print() +print() xs_grammar = TreeAdjoiningGrammar(filename='../tests/integration/tag_chart/cross_serial_2/grammar.yaml') xs_parser = TagChartParser(xs_grammar) -print "EXAMPLE 2: Cross serial dependencies" -print "see http://www.let.rug.nl/~vannoord/papers/acl94/node5.html" -print " %s" % (xs_grammar) +print("EXAMPLE 2: Cross serial dependencies") +print("see http://www.let.rug.nl/~vannoord/papers/acl94/node5.html") +print(" %s" % (xs_grammar)) for production in chain(sorted(xs_grammar.productions(is_auxiliary=False)), sorted(xs_grammar.productions(is_auxiliary=True))): - print " %s" % (production) -print + print(" %s" % (production)) +print() examples = yaml.load(open('../tests/integration/tag_chart/cross_serial_2/tests.yaml').read()) for string in examples: tokens = string.split(' ') - print " parsing '%s': " % (string,) + print(" parsing '%s': " % (string,)) for parse in xs_parser.nbest_parse(tokens): - print " %s" % (parse.pprint(margin=100000)) + print(" %s" % (parse.pprint(margin=100000))) -print -print +print() +print() elephant_grammar = TreeAdjoiningGrammar(filename='../tests/integration/tag_chart/elephant/grammar.yaml') elephant_parser = TagChartParser(elephant_grammar) -print "Example 3: An ambiguous sentence" -print " %s" % (elephant_grammar) +print("Example 3: An ambiguous sentence") +print(" %s" % (elephant_grammar)) for production in chain(sorted(elephant_grammar.productions(is_auxiliary=False)), sorted(elephant_grammar.productions(is_auxiliary=True))): - print " %s" % (production) -print + print(" %s" % (production)) +print() examples = yaml.load(open('../tests/integration/tag_chart/elephant/tests.yaml').read()) for string in examples: tokens = string.split(' ') - print " parsing '%s': " % (string,) + print(" parsing '%s': " % (string,)) for parse in elephant_parser.nbest_parse(tokens): - print " %s" % (parse.pprint(margin=100000)) + print(" %s" % (parse.pprint(margin=100000))) -print -print +print() +print() -print "Performance testing results" +print("Performance testing results") with open('../tests/performance/PERFORMANCE_RESULTS.txt') as file: - print file.read() \ No newline at end of file + print(file.read()) \ No newline at end of file diff --git a/src/mjacob/algorithms/deduction_iterator.py b/src/mjacob/algorithms/deduction_iterator.py index 4663e50..1673a79 100644 --- a/src/mjacob/algorithms/deduction_iterator.py +++ b/src/mjacob/algorithms/deduction_iterator.py @@ -13,6 +13,7 @@ the number of DAGs as sub-graphs of a directed hypergraph. @author: mjacob ''' +from functools import reduce def deductions(result, get_anteceding_rules, get_antecedents=lambda x: x, @@ -118,6 +119,6 @@ if __name__ == "__main__": #start, A = 1, {1: ((2,3,),), 2: ((3,), (4,),), 3: ((2,), (2,4,), (4,),), 4: ((),)} #start, A = 12, {0: ((),), 1: ((2,),), 2: ((10, 11),), 3: ((),), 4: ((5, 6),), 5: ((8, 9),), 6: ((2, 18),), 7: ((5, 1),), 8: ((),), 9: ((),), 10: ((),), 11: ((),), 12: ((13, 6),), 13: ((3,),), 14: ((13,),), 15: ((5,),), 16: ((),), 17: ((13, 1),), 18: ((37,),), 19: ((),), 20: ((21,),), 21: ((),), 22: ((20,),), 23: ((24,),), 24: ((),), 25: ((23,),), 26: ((),), 27: ((),), 28: ((),), 29: ((30, 31),), 30: ((),), 31: ((),), 32: ((29,),), 33: ((30, 34),), 34: ((),), 35: ((10, 16),), 36: ((35,),), 37: ((30, 0),), 38: ((35, 32),), 39: ((20, 32),), 40: ((33,),)} for deduction in deductions(start, A.__getitem__): - print "%s," % (deduction,) + print("%s," % (deduction,)) #print ", ".join("%s -> %s" % (antecedents, consequent) for consequent, antecedents in reversed(deduction)) \ No newline at end of file diff --git a/src/mjacob/algorithms/generate_random.py b/src/mjacob/algorithms/generate_random.py index 722e922..4c55b89 100644 --- a/src/mjacob/algorithms/generate_random.py +++ b/src/mjacob/algorithms/generate_random.py @@ -50,13 +50,13 @@ if __name__ == "__main__": p = EarleyChartParser(g) s = "un mesa con el amiga sobre los hombre vio una hombre sobre las mesa con una mesa con las noticia sobre el hombre" parse = p.nbest_parse(s.split(' ')) - print len(set([str(tree) for tree in parse])) + print(len(set([str(tree) for tree in parse]))) exit(0) - for i in xrange(100): + for i in range(100): sent = generate_random_sentence(g) tokens = sent.split(' ') #if len(tokens) > 15: # continue - print sent - print len(p.nbest_parse(tokens)) \ No newline at end of file + print(sent) + print(len(p.nbest_parse(tokens))) \ No newline at end of file diff --git a/src/mjacob/annotations/memoized.py b/src/mjacob/annotations/memoized.py index 971ebed..0fb5e8c 100644 --- a/src/mjacob/annotations/memoized.py +++ b/src/mjacob/annotations/memoized.py @@ -13,7 +13,7 @@ def Memoize(func): value = func(selv) def f2(): return value - setattr(selv, func.func_name, f2) + setattr(selv, func.__name__, f2) return value return f1 @@ -25,16 +25,16 @@ if __name__ == "__main__": @Memoize def thing(self): - print "computing %s" % (self.__v) + print("computing %s" % (self.__v)) return 100*self.__v a = foobar(1) b = foobar(2) - print "starting" - print a.thing - print a.thing() - print a.thing - print a.thing() - print a.thing - print b.thing() - print b.thing() \ No newline at end of file + print("starting") + print(a.thing) + print(a.thing()) + print(a.thing) + print(a.thing()) + print(a.thing) + print(b.thing()) + print(b.thing()) \ No newline at end of file diff --git a/src/mjacob/collections/FrozenIndex.py b/src/mjacob/collections/FrozenIndex.py index 6923d97..fb02a04 100644 --- a/src/mjacob/collections/FrozenIndex.py +++ b/src/mjacob/collections/FrozenIndex.py @@ -21,7 +21,7 @@ class FrozenIndex(object): for k in d: d[k] = frozenset(d[k]) self.__dict = d - self.__values = frozenset(self.values()) + self.__values = frozenset(list(self.values())) self.__hash = hash(tuple(items)) def __contains__(self, k): @@ -34,19 +34,19 @@ class FrozenIndex(object): return FrozenIndex.EMPTY def __iter__(self): - return self.itervalues() + return iter(self.values()) def __len__(self): - return len(self.values()) + return len(list(self.values())) def get(self, k): - if self.__dict.has_key(k): + if k in self.__dict: return self.__dict.get(k) else: return FrozenIndex.EMPTY def has_key(self, k): - return self.__dict.has_key(k) + return k in self.__dict def items(self): return tuple(((k,v) for k in self.__dict for v in self.__dict[k])) @@ -55,23 +55,20 @@ class FrozenIndex(object): return (((k,v) for k in self.__dict for v in self.__dict[k])) def iterkeys(self): - return self.__dict.iterkeys() + return iter(self.__dict.keys()) def itervalues(self): - return chain(*self.__dict.values()) + return chain(*list(self.__dict.values())) def keys(self): - return self.__dict.keys() + return list(self.__dict.keys()) def values(self): - return tuple(chain(*self.__dict.values())) + return tuple(chain(*list(self.__dict.values()))) def hash(self): return self.__hash - def __cmp__(self, othr): - return cmp(self.__dict, othr.__dict) - def __eq__(self, othr): return self.__dict == othr.__dict diff --git a/src/mjacob/nltk/grammar/TagNonterminal.py b/src/mjacob/nltk/grammar/TagNonterminal.py index d20ab2f..242f70d 100644 --- a/src/mjacob/nltk/grammar/TagNonterminal.py +++ b/src/mjacob/nltk/grammar/TagNonterminal.py @@ -16,10 +16,10 @@ class TagNonterminal(Nonterminal): NO_ADJUNCTION = 'N' OBLIGATORY_ADJUNCTION = 'O' IS_FOOT = '*' - NODE_TAG = re.compile(u'(\%s)?(\w+)(?:\.([%s%s]))?$' % (IS_FOOT, NO_ADJUNCTION, OBLIGATORY_ADJUNCTION)) + NODE_TAG = re.compile('(\%s)?(\w+)(?:\.([%s%s]))?$' % (IS_FOOT, NO_ADJUNCTION, OBLIGATORY_ADJUNCTION)) def __init__(self, string): - if not isinstance(string, basestring): + if not isinstance(string, str): raise InvalidTagNonterminalFormatException("unexpected type: %s %s" % (type(string), string)) m = re.match(TagNonterminal.NODE_TAG, string) if not m: @@ -53,7 +53,7 @@ class TagNonterminal(Nonterminal): return "%s%s%s" % (f, self.symbol(), r) def __repr__(self): - return unicode(self) + return str(self) def __eq__(self, other): return (type(self) is type(other) diff --git a/src/mjacob/nltk/grammar/TagProduction.py b/src/mjacob/nltk/grammar/TagProduction.py index 6fc5125..353abfc 100644 --- a/src/mjacob/nltk/grammar/TagProduction.py +++ b/src/mjacob/nltk/grammar/TagProduction.py @@ -42,7 +42,7 @@ from mjacob.collections.OrderedFrozenSet import OrderedFrozenSet from mjacob.annotations.memoized import Memoize from mjacob.nltk.grammar.TagNonterminal import TagNonterminal -IS_LEAF = re.compile(u"(?:'(.*)')$") +IS_LEAF = re.compile("(?:'(.*)')$") class TagProduction(ImmutableTree): """A """ @@ -247,7 +247,7 @@ class TagProduction(ImmutableTree): subtree.node = TagNonterminal(subtree.node) for leaf, position in ((tree[tree.leaf_treeposition(i)], tree.leaf_treeposition(i)) - for i in xrange(len(tree.leaves()))): + for i in range(len(tree.leaves()))): m = re.match(IS_LEAF, leaf) @@ -268,7 +268,7 @@ class TagProduction(ImmutableTree): for subtree in tree.subtrees(): if isinstance(subtree, Tree): subtree.node = Nonterminal(subtree.node.symbol()) - for i in xrange(len(subtree)): + for i in range(len(subtree)): subsubtree = subtree[i] if isinstance(subsubtree, Nonterminal): subtree[i] = Nonterminal(subsubtree.symbol()) @@ -284,7 +284,7 @@ class TagProduction(ImmutableTree): if type(item) is Nonterminal: return item.symbol() else: - return u"'%s'" % (item) + return "'%s'" % (item) return cls._convert_tree(Tree(convert_item(rule.lhs()), [convert_item(item) diff --git a/src/mjacob/nltk/grammar/TreeAdjoiningGrammar.py b/src/mjacob/nltk/grammar/TreeAdjoiningGrammar.py index d844590..1975de1 100644 --- a/src/mjacob/nltk/grammar/TreeAdjoiningGrammar.py +++ b/src/mjacob/nltk/grammar/TreeAdjoiningGrammar.py @@ -13,6 +13,7 @@ from collections import deque from mjacob.annotations.memoized import Memoize from mjacob.nltk.grammar.TagProduction import TagProduction from mjacob.nltk.grammar.TagNonterminal import TagNonterminal +from functools import reduce START = 'start' PRODUCTIONS = 'productions' @@ -73,7 +74,7 @@ A Tree Adjoining Grammar (TAG) is a tuple G = ⟨N, T, I, A, S, f,,OA,,, f,,SA,, """ - if len(filter(lambda x: x is not None, (grammar, cfg, filename, productions))) != 1: + if len([x for x in (grammar, cfg, filename, productions) if x is not None]) != 1: raise ValueError("exactly 1 of grammar, cfg or filename must be specified") if productions is None: @@ -245,7 +246,7 @@ A Tree Adjoining Grammar (TAG) is a tuple G = ⟨N, T, I, A, S, f,,OA,,, f,,SA,, raise InvalidGrammarException("there are no starting trees in the given grammar") def _filter_production(self, production, **filters): - for filter, value in filters.items(): + for filter, value in list(filters.items()): if getattr(production, filter)() != value: return False return True diff --git a/src/mjacob/nltk/parse/tag/AbstractTagChartRule.py b/src/mjacob/nltk/parse/tag/AbstractTagChartRule.py index d6c8fa1..2ffae54 100644 --- a/src/mjacob/nltk/parse/tag/AbstractTagChartRule.py +++ b/src/mjacob/nltk/parse/tag/AbstractTagChartRule.py @@ -18,7 +18,7 @@ class AbstractTagChartRule(AbstractChartRule): now i'm pretty sure it's only called once for every edge/rule combo so that would be a complete waste """ - for key, value in self.application_filter(chart, grammar).items(): + for key, value in list(self.application_filter(chart, grammar).items()): if not hasattr(edge, key): raise ValueError("unexpected key %s" % (key)) if getattr(edge, key)() != value: diff --git a/src/mjacob/nltk/parse/tag/TagChart.py b/src/mjacob/nltk/parse/tag/TagChart.py index 5c4e5b6..fa4d816 100644 --- a/src/mjacob/nltk/parse/tag/TagChart.py +++ b/src/mjacob/nltk/parse/tag/TagChart.py @@ -57,7 +57,7 @@ class TagChart(object): self.__tokens = tuple(tokens) self.__num_leaves = len(self.__tokens) self.__leaf_indices = dict((leaf, tuple(i - for i in xrange(len(tokens)) + for i in range(len(tokens)) if tokens[i] == leaf)) for leaf in frozenset(tokens)) @@ -85,9 +85,9 @@ class TagChart(object): return "\n".join(map(str, [type(self), " tokens: %s" % (self.__tokens,), " leaves: %s" % (self.__num_leaves,), - " indices: %s" % ("\n ".join(str(y) for y in self.__leaf_indices.items())), + " indices: %s" % ("\n ".join(str(y) for y in list(self.__leaf_indices.items()))), " edges: %s" % ("\n ".join(str(y) for y in self.__edges)), - " indexes: %s" % ("\n ".join(str(y) for y in self.__indexes.items())), + " indexes: %s" % ("\n ".join(str(y) for y in list(self.__indexes.items()))), ])) #//////////////////////////////////////////////////////////// @@ -171,7 +171,7 @@ class TagChart(object): if restrictions=={}: return iter(self.__edges) # Find the index corresponding to the given restrictions. - restr_keys = restrictions.keys() + restr_keys = list(restrictions.keys()) restr_keys.sort() restr_keys = tuple(restr_keys) @@ -190,7 +190,7 @@ class TagChart(object): # Make sure it's a valid index. for key in restr_keys: if not hasattr(self.__edge_type, key): - raise ValueError, 'Bad restriction: %s' % key + raise ValueError('Bad restriction: %s' % key) # Create the index. index = self.__indexes[restr_keys] = {} @@ -205,7 +205,7 @@ class TagChart(object): A helper function for L{insert}, which registers the new edge with all existing indexes. """ - for (restr_keys, index) in self.__indexes.items(): + for (restr_keys, index) in list(self.__indexes.items()): vals = tuple(getattr(edge, key)() for key in restr_keys) index.setdefault(vals, []).append(edge) diff --git a/src/mjacob/nltk/parse/tag/TreeBuilderI.py b/src/mjacob/nltk/parse/tag/TreeBuilderI.py index 533dfdd..99c9152 100644 --- a/src/mjacob/nltk/parse/tag/TreeBuilderI.py +++ b/src/mjacob/nltk/parse/tag/TreeBuilderI.py @@ -6,9 +6,7 @@ Created on Jun 16, 2011 ''' from abc import abstractmethod, ABCMeta -class TreeBuilderI(object): - __metaclass__ = ABCMeta - +class TreeBuilderI(object, metaclass=ABCMeta): def __init__(self, chart, type, *edges): self.__chart = chart self.__type = type diff --git a/src/mjacob/nltk/parse/tag/earley/TagEdge.py b/src/mjacob/nltk/parse/tag/earley/TagEdge.py index 3049bca..3ef59b5 100644 --- a/src/mjacob/nltk/parse/tag/earley/TagEdge.py +++ b/src/mjacob/nltk/parse/tag/earley/TagEdge.py @@ -10,6 +10,7 @@ from nltk.tree import Tree from nltk.grammar import Nonterminal from mjacob.annotations.memoized import Memoize from mjacob.nltk.grammar.TagNonterminal import TagNonterminal +from operator import lt, eq """dot positions: """ LA = "LA" @@ -221,7 +222,7 @@ class TagEdge(object): def terminal(self): """True iff production[treeposition] is a terminal element""" node = self._node() - if isinstance(node, basestring): + if isinstance(node, str): return node else: return None @@ -254,7 +255,7 @@ class TagEdge(object): True iff production[treeposition] is a terminal """ node = self._node() - return isinstance(node, basestring) + return isinstance(node, str) def is_epsilon(self): """ @@ -358,9 +359,13 @@ class TagEdge(object): def __repr__(self): return '[Edge: %s]' % (self) - def __cmp__(self, othr): - return cmp((self.start(), self.gapstart(), self.gapend(), self.end(), self.__production, self.__treeposition, self.__dotposition, self.__has_adjoined), - (othr.start(), othr.gapstart(), othr.gapend(), othr.end(), othr.__production, othr.__treeposition, othr.__dotposition, othr.__has_adjoined)) + def __lt__(self, othr): + return lt((self.start(), self.gapstart(), self.gapend(), self.end(), self.__production, self.__treeposition, self.__dotposition, self.__has_adjoined), + (othr.start(), othr.gapstart(), othr.gapend(), othr.end(), othr.__production, othr.__treeposition, othr.__dotposition, othr.__has_adjoined)) + + def __eq__(self, othr): + return eq((self.start(), self.gapstart(), self.gapend(), self.end(), self.__production, self.__treeposition, self.__dotposition, self.__has_adjoined), + (othr.start(), othr.gapstart(), othr.gapend(), othr.end(), othr.__production, othr.__treeposition, othr.__dotposition, othr.__has_adjoined)) def __hash__(self): return hash((self.__production, self.__treeposition, self.__span, self.__gapspan, self.__dotposition, self.__has_adjoined)) diff --git a/src/mjacob/nltk/parse/tag/prefix_valid_earley/PVTagEdge.py b/src/mjacob/nltk/parse/tag/prefix_valid_earley/PVTagEdge.py index 4a1f1a8..464153d 100644 --- a/src/mjacob/nltk/parse/tag/prefix_valid_earley/PVTagEdge.py +++ b/src/mjacob/nltk/parse/tag/prefix_valid_earley/PVTagEdge.py @@ -7,6 +7,7 @@ Based on nltk's EdgeI @author: mjacob ''' from mjacob.nltk.parse.tag.earley.TagEdge import TagEdge +from operator import lt, eq """the symbol for an index whose value doesn't matter is ~""" CARENT="~" @@ -40,9 +41,13 @@ class PVTagEdge(TagEdge): def __repr__(self): return '[Edge: %s]' % (self) - def __cmp__(self, othr): - return cmp((self.__treestart, self.start(), self.gapstart(), self.gapend(), self.end(), self.production(), self.treeposition(), self.dotposition(), self.has_adjoined()), - (othr.__treestart, othr.start(), othr.gapstart(), othr.gapend(), othr.end(), othr.production(), othr.treeposition(), othr.dotposition(), othr.has_adjoined())) + def __lt__(self, othr): + return lt((self.__treestart, self.start(), self.gapstart(), self.gapend(), self.end(), self.production(), self.treeposition(), self.dotposition(), self.has_adjoined()), + (othr.__treestart, othr.start(), othr.gapstart(), othr.gapend(), othr.end(), othr.production(), othr.treeposition(), othr.dotposition(), othr.has_adjoined())) + + def __eq__(self, othr): + return eq((self.__treestart, self.start(), self.gapstart(), self.gapend(), self.end(), self.production(), self.treeposition(), self.dotposition(), self.has_adjoined()), + (othr.__treestart, othr.start(), othr.gapstart(), othr.gapend(), othr.end(), othr.production(), othr.treeposition(), othr.dotposition(), othr.has_adjoined())) def __hash__(self): return hash((self.__treestart, self.production(), self.treeposition(), self.span(), self.gap(), self.dotposition(), self.has_adjoined())) diff --git a/src/mjacob/nltk/parse/tag/strategies.py b/src/mjacob/nltk/parse/tag/strategies.py index cd0dfac..f77c337 100644 --- a/src/mjacob/nltk/parse/tag/strategies.py +++ b/src/mjacob/nltk/parse/tag/strategies.py @@ -6,9 +6,8 @@ Created on May 30, 2011 ''' from abc import abstractmethod, ABCMeta -class Strategy(tuple): +class Strategy(tuple, metaclass=ABCMeta): """represents a ChartParser strategy""" - __metaclass__ = ABCMeta def __new__(cls, edge_class, *rules): return tuple.__new__(cls, rules) @@ -25,7 +24,7 @@ class Strategy(tuple): def goal_found(self, chart, symbol): try: - self.goal_edges(chart, symbol).next() + next(self.goal_edges(chart, symbol)) return True except StopIteration: return False \ No newline at end of file diff --git a/tests/integration/tag_chart/run_integration_tests.py b/tests/integration/tag_chart/run_integration_tests.py index 82cb4b9..f045d1d 100644 --- a/tests/integration/tag_chart/run_integration_tests.py +++ b/tests/integration/tag_chart/run_integration_tests.py @@ -29,11 +29,11 @@ class ParseTests(object): def _all_tests(self): """assume the test dirs are all the dirs inside the directory containing this script """ - return filter(lambda x: os.path.isdir(os.path.join(BASEDIR, x)), os.listdir(BASEDIR)) + return [x for x in os.listdir(BASEDIR) if os.path.isdir(os.path.join(BASEDIR, x))] def read_tests(self, test_path): - return yaml.load(open(os.path.join(test_path, - "tests.yaml")).read()).items() + return list(yaml.load(open(os.path.join(test_path, + "tests.yaml")).read()).items()) def run_tests(self): if self.__args.tests: @@ -48,8 +48,8 @@ class ParseTests(object): strategy_module_name, strategy_name = re.match('(.*)\.(\w+)', self.__args.strategy).groups() strategy_obj = get_class(strategy_module_name, strategy_name) - print "testing w/ parser %s, strategy %s" % (parser_class, strategy_name) - print "%s tests" % (len(tests)) + print("testing w/ parser %s, strategy %s" % (parser_class, strategy_name)) + print("%s tests" % (len(tests))) total_passed = 0 total_failed = 0 @@ -58,7 +58,7 @@ class ParseTests(object): grammar = grammar_class(os.path.join(test_path, "grammar.yaml")) parser = parser_class(grammar, strategy=strategy_obj) - print "starting to test grammar '%s':" % (test) + print("starting to test grammar '%s':" % (test)) failures = [] passes = 0 for sentence, trueparse in self.read_tests(test_path): @@ -75,18 +75,18 @@ class ParseTests(object): passes += 1 if failures: - print u" %s tests passed, %s failed" % (passes, len(failures)) + print(" %s tests passed, %s failed" % (passes, len(failures))) for failure in failures: - print u" " + failure + print(" " + failure) else: - print u" %s tests passed" % (passes) + print(" %s tests passed" % (passes)) total_passed += passes total_failed += len(failures) if total_failed: - print "TOTAL: %s tests passed, %s failed" % (total_passed, total_failed) + print("TOTAL: %s tests passed, %s failed" % (total_passed, total_failed)) return 1 else: - print "TOTAL: %s tests passed" % (total_passed) + print("TOTAL: %s tests passed" % (total_passed)) return 0 def _parse_all(self, parser, sentence, trueparses, failures): @@ -106,7 +106,7 @@ class ParseTests(object): try: parses = parser.nbest_parse(tokens) except: - print sentence + print(sentence) raise if parses is None or len(parses) == 0: failures.append('parsing failed: \'%s\'' % (sentence)) @@ -144,7 +144,7 @@ class ParseTests(object): try: parse = parser.parse(tokens) except: - print sentence + print(sentence) raise if parse is None: failures.append('parsing failed: \'%s\'' % (sentence)) diff --git a/tests/integration/tag_chart/test_tag_with_cfg.py b/tests/integration/tag_chart/test_tag_with_cfg.py index f34c1d4..a2d70ec 100644 --- a/tests/integration/tag_chart/test_tag_with_cfg.py +++ b/tests/integration/tag_chart/test_tag_with_cfg.py @@ -39,19 +39,19 @@ class ParseTests(object): errors = [] err = False - for i in xrange(n): - print i, + for i in range(n): + print(i, end=' ') sent = generate_random_sentence(grammar) - print len(errors), - print sent, + print(len(errors), end=' ') + print(sent, end=' ') tokens = sent.split(' ') good_parses = set(base_parser.nbest_parse(tokens, tree_class=ImmutableTree)) - print len(good_parses), + print(len(good_parses), end=' ') if len(good_parses) > 5: - print "skipping!" + print("skipping!") continue found_parses = set(parser.nbest_parse(tokens)) - print len(found_parses) + print(len(found_parses)) if found_parses != good_parses: if len(found_parses) != len(good_parses): @@ -59,20 +59,20 @@ class ParseTests(object): else: errors.append("different parses found for \"%s\" (%s)" % (sent, len(good_parses))) if not err and len(good_parses) == 1: - print "GOOD:" - print "\n".join(repr(x) for x in good_parses) - print "FOUND:" - print "\n".join(repr(x) for x in found_parses) + print("GOOD:") + print("\n".join(repr(x) for x in good_parses)) + print("FOUND:") + print("\n".join(repr(x) for x in found_parses)) err = True if errors: - print "%s errors (out of %s tests)" % (len(errors), n) - print "\n".join(errors) + print("%s errors (out of %s tests)" % (len(errors), n)) + print("\n".join(errors)) return 1 else: - print "%s tests passed" % (n) + print("%s tests passed" % (n)) def parse_arguments(): parser = argparse.ArgumentParser(description='integration tests for CFG parsers') diff --git a/tests/performance/dedution_iterators.py b/tests/performance/dedution_iterators.py index 09fa563..4eecb83 100644 --- a/tests/performance/dedution_iterators.py +++ b/tests/performance/dedution_iterators.py @@ -16,14 +16,14 @@ from mjacob.algorithms.deduction_iterator import deductions # 1 2 8 98 3728 go = "for ded in deductions(start, A.__getitem__):pass" -for n in xrange(1,8): +for n in range(1,8): m = 0#12241244 A = dict((i, tuple(g - for g in chain(*[combinations(xrange(n), m) - for m in xrange(n+1)]) # xrange(4) + for g in chain(*[combinations(range(n), m) + for m in range(n+1)]) # xrange(4) if i not in g)) - for i in xrange(n)) + for i in range(n)) #A = dict((i, tuple((j,k) for j,k in product(xrange(n+1), xrange(n+1,n+n+2)) if j is not i and k is not i)) for i in xrange(n+n+1) if i != n) #print "\n".join(str(x) for x in sorted(A.items())) @@ -35,7 +35,7 @@ for n in xrange(1,8): #print u"; ".join(u"%s -> %s" % (",".join(str(x[j]) for j in b), x[a]) for a,b in d) m += 1 - print "%s %s" % (n, m) + print("%s %s" % (n, m)) for d in ("deduction_iterator", "deduction_iterator3"): continue prep = """ @@ -47,4 +47,4 @@ A = %s """ % (d, n, A) timer = timeit.Timer(go, prep) - print "%s %s (%s): %.2f ms/pass" % (d, n, m, 1000 * timer.timeit(number=1)) + print("%s %s (%s): %.2f ms/pass" % (d, n, m, 1000 * timer.timeit(number=1))) diff --git a/tests/performance/test_large_grammar.py b/tests/performance/test_large_grammar.py index 12bbfc2..5fdf6b1 100644 --- a/tests/performance/test_large_grammar.py +++ b/tests/performance/test_large_grammar.py @@ -32,12 +32,12 @@ sentences = [x[0] for x in nltk.parse.util.extract_test_sentences(raw_data)] tester = ParsePerformanceTester(grammar_string, sentences) -parser, parser_import = NLTK_BEST.items()[0] +parser, parser_import = list(NLTK_BEST.items())[0] nltk_time = 1000*tester.run(parser_import, method="parse") -parser, parser_import = MY_PARSER.items()[0] +parser, parser_import = list(MY_PARSER.items())[0] my_time1 = 1000*tester.run(parser_import, method="parse") -parser, parser_import = MY_PARSER.items()[1] +parser, parser_import = list(MY_PARSER.items())[1] my_time2 = 1000*tester.run(parser_import, method="parse") -print " sentence length NLTK %s ratio %s ratio2" % (MY_PARSER.items()[0][0], MY_PARSER.items()[1][0]) -print " %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f" % (tester.average_sentence_length(), nltk_time, my_time1, my_time1/nltk_time, my_time2, my_time2/nltk_time) +print(" sentence length NLTK %s ratio %s ratio2" % (list(MY_PARSER.items())[0][0], list(MY_PARSER.items())[1][0])) +print(" %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f" % (tester.average_sentence_length(), nltk_time, my_time1, my_time1/nltk_time, my_time2, my_time2/nltk_time)) diff --git a/tests/performance/test_medium_grammar.py b/tests/performance/test_medium_grammar.py index d73144f..2d50e2e 100644 --- a/tests/performance/test_medium_grammar.py +++ b/tests/performance/test_medium_grammar.py @@ -37,18 +37,18 @@ grammar_string = """nltk.parse_cfg('''%s ''')""" % (g) gr = TreeAdjoiningGrammar(cfg=grammar) -print gr -print "nonterminals: %s" % (len(gr.nonterminals())) -print "terminals: %s" % (len(gr.terminals())) +print(gr) +print("nonterminals: %s" % (len(gr.nonterminals()))) +print("terminals: %s" % (len(gr.terminals()))) -print " tree depth sentence length NLTK %s ratio %s ratio2" % (MY_PARSER.items()[0][0], MY_PARSER.items()[1][0]) -for tree_depth in xrange(4,11): +print(" tree depth sentence length NLTK %s ratio %s ratio2" % (list(MY_PARSER.items())[0][0], list(MY_PARSER.items())[1][0])) +for tree_depth in range(4,11): sentence_file = 'medium_sentences_%s.yaml' % (tree_depth,) if os.path.exists(sentence_file): sentences = yaml.load(open(sentence_file)) else: sentences = [generate_random_sentence(grammar, tree_depth).split(' ') - for i in xrange(100)] + for i in range(100)] yaml.dump(sentences, open(sentence_file, 'w')) tester = ParsePerformanceTester(grammar_string, @@ -56,9 +56,9 @@ for tree_depth in xrange(4,11): for parser, parser_import in sorted(NLTK_BEST.items()): nltk_time = 1000*tester.run(parser_import, method="parse") - parser, parser_import = MY_PARSER.items()[0] + parser, parser_import = list(MY_PARSER.items())[0] my_time1 = 1000*tester.run(parser_import, method="parse") - parser, parser_import = MY_PARSER.items()[1] + parser, parser_import = list(MY_PARSER.items())[1] my_time2 = 1000*tester.run(parser_import, method="parse") - print " %2i %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f" % (tree_depth, tester.average_sentence_length(), nltk_time, my_time1, my_time1/nltk_time, my_time2, my_time2/nltk_time) + print(" %2i %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f" % (tree_depth, tester.average_sentence_length(), nltk_time, my_time1, my_time1/nltk_time, my_time2, my_time2/nltk_time)) diff --git a/tests/performance/test_small_grammar.py b/tests/performance/test_small_grammar.py index 7827a00..75f84e0 100644 --- a/tests/performance/test_small_grammar.py +++ b/tests/performance/test_small_grammar.py @@ -18,24 +18,24 @@ grammar_string = "nltk.data.load('grammars/sample_grammars/toy.cfg')" -for tree_depth in xrange(4,8): +for tree_depth in range(4,8): sentence_file = 'trivial_sentences_%s.yaml' % (tree_depth,) if os.path.exists(sentence_file): sentences = yaml.load(open(sentence_file)) else: sentences = [generate_random_sentence(grammar, tree_depth).split(' ') - for i in xrange(100)] + for i in range(100)] yaml.dump(sentences, open(sentence_file, 'w')) tester = ParsePerformanceTester(grammar_string, sentences) - print "%s %s" % (tree_depth, tester.average_sentence_length()) + print("%s %s" % (tree_depth, tester.average_sentence_length())) for parser, parser_import in sorted(NLTK_BEST.items()): nltk_time = 1000*tester.run(parser_import, method="parse") - print " %s %.2f" % (parser, nltk_time) + print(" %s %.2f" % (parser, nltk_time)) for parser, parser_import in sorted(MY_PARSER.items()): my_time = 1000*tester.run(parser_import, method="parse") - print " %s %.2f" % (parser, my_time) + print(" %s %.2f" % (parser, my_time)) - print " ratio: %s" % (my_time/nltk_time) + print(" ratio: %s" % (my_time/nltk_time)) diff --git a/tests/stand_alone/counting.py b/tests/stand_alone/counting.py index 90d8e7c..246587a 100644 --- a/tests/stand_alone/counting.py +++ b/tests/stand_alone/counting.py @@ -14,6 +14,6 @@ parser = TagChartParser(grammar) for s in ["john died completely"]: tokens = s.split(' ') chart = parser.chart_parse(tokens) - print " %s" % ("\n ".join(str(x) for x in chart.parses(grammar.start().symbol(), TAG_EARLEY_STRATEGY))) - print parser.nbest_parse(tokens) - print parser.parse(tokens) \ No newline at end of file + print(" %s" % ("\n ".join(str(x) for x in chart.parses(grammar.start().symbol(), TAG_EARLEY_STRATEGY)))) + print(parser.nbest_parse(tokens)) + print(parser.parse(tokens)) \ No newline at end of file diff --git a/tests/stand_alone/cross_serial.py b/tests/stand_alone/cross_serial.py index 9617126..406f79c 100644 --- a/tests/stand_alone/cross_serial.py +++ b/tests/stand_alone/cross_serial.py @@ -23,6 +23,6 @@ for s in ["Jan Piet Marie de kinderen zag helpen leren zwemmen"]: # print " %s" % (z,) # i += 1 - print " %s" % ("\n ".join(tree.pprint(margin=10000) for tree in chart.parses(grammar.start().symbol(), TAG_EARLEY_STRATEGY))) + print(" %s" % ("\n ".join(tree.pprint(margin=10000) for tree in chart.parses(grammar.start().symbol(), TAG_EARLEY_STRATEGY)))) #print parser.nbest_parse(tokens) #print parser.parse(tokens) \ No newline at end of file diff --git a/tests/stand_alone/elephant.py b/tests/stand_alone/elephant.py index f0a5298..626c216 100644 --- a/tests/stand_alone/elephant.py +++ b/tests/stand_alone/elephant.py @@ -23,6 +23,6 @@ for s in ["I shot an elephant in my pajamas"]: # print " %s" % (z,) # i += 1 - print " %s" % ("\n ".join(tree.pprint(margin=10000) for tree in chart.parses(grammar.start().symbol(), TAG_PREFIX_VALID_EARLEY_STRATEGY))) + print(" %s" % ("\n ".join(tree.pprint(margin=10000) for tree in chart.parses(grammar.start().symbol(), TAG_PREFIX_VALID_EARLEY_STRATEGY)))) #print parser.nbest_parse(tokens) #print parser.parse(tokens) \ No newline at end of file diff --git a/tests/stand_alone/empty.py b/tests/stand_alone/empty.py index 3c69925..ad8848f 100644 --- a/tests/stand_alone/empty.py +++ b/tests/stand_alone/empty.py @@ -14,6 +14,6 @@ parser = TagChartParser(grammar, strategy=TAG_PREFIX_VALID_EARLEY_STRATEGY) for s in [""]: tokens = [] chart = parser.chart_parse(tokens) - print " %s" % ("\n ".join(str(x) for x in chart.parses(grammar.start().symbol(), TAG_PREFIX_VALID_EARLEY_STRATEGY))) + print(" %s" % ("\n ".join(str(x) for x in chart.parses(grammar.start().symbol(), TAG_PREFIX_VALID_EARLEY_STRATEGY)))) #print parser.nbest_parse(tokens) #print parser.parse(tokens) \ No newline at end of file diff --git a/tests/stand_alone/prefix_valid_testing.py b/tests/stand_alone/prefix_valid_testing.py index e49843d..5832db5 100644 --- a/tests/stand_alone/prefix_valid_testing.py +++ b/tests/stand_alone/prefix_valid_testing.py @@ -14,6 +14,6 @@ parser = TagChartParser(grammar, strategy=TAG_PREFIX_VALID_EARLEY_STRATEGY) for s in ["a c"]: tokens = s.split(' ') chart = parser.chart_parse(tokens) - print " %s" % ("\n ".join(str(x) for x in chart.parses(grammar.start().symbol(), TAG_PREFIX_VALID_EARLEY_STRATEGY))) + print(" %s" % ("\n ".join(str(x) for x in chart.parses(grammar.start().symbol(), TAG_PREFIX_VALID_EARLEY_STRATEGY)))) #print parser.nbest_parse(tokens) #print parser.parse(tokens) \ No newline at end of file diff --git a/tests/stand_alone/prefix_valid_testing2.py b/tests/stand_alone/prefix_valid_testing2.py index 9406594..d5a4822 100644 --- a/tests/stand_alone/prefix_valid_testing2.py +++ b/tests/stand_alone/prefix_valid_testing2.py @@ -14,6 +14,6 @@ parser = TagChartParser(grammar, strategy=TAG_PREFIX_VALID_EARLEY_STRATEGY) for s in ["a a a a"]: tokens = s.split(' ') chart = parser.chart_parse(tokens) - print " %s" % ("\n ".join(str(x) for x in chart.parses(grammar.start().symbol(), TAG_PREFIX_VALID_EARLEY_STRATEGY))) + print(" %s" % ("\n ".join(str(x) for x in chart.parses(grammar.start().symbol(), TAG_PREFIX_VALID_EARLEY_STRATEGY)))) #print parser.nbest_parse(tokens) #print parser.parse(tokens) \ No newline at end of file diff --git a/tests/stand_alone/test7.py b/tests/stand_alone/test7.py index 9fb289e..1c01793 100644 --- a/tests/stand_alone/test7.py +++ b/tests/stand_alone/test7.py @@ -10,4 +10,4 @@ from mjacob.collections.FrozenIndex import FrozenIndex x = FrozenIndex(((1,1),(1,2),(2,3),(2,4))) for y in x: - print y \ No newline at end of file + print(y) \ No newline at end of file diff --git a/tests/stand_alone/test_tag_chart_parser.py b/tests/stand_alone/test_tag_chart_parser.py index 4dde6f3..b2f26ed 100644 --- a/tests/stand_alone/test_tag_chart_parser.py +++ b/tests/stand_alone/test_tag_chart_parser.py @@ -30,6 +30,6 @@ elif VAR == 1: elif VAR == 2: chart = parser.chart_parse("Jan Piet Marie de kinderen zag helpen leren zwemmen".split(' ')) -print chart -print "\n\n\n" -print chart.parses(grammar.start().symbol(), TAG_EARLEY_STRATEGY) \ No newline at end of file +print(chart) +print("\n\n\n") +print(chart.parses(grammar.start().symbol(), TAG_EARLEY_STRATEGY)) \ No newline at end of file diff --git a/tests/stand_alone/test_tag_chart_parser2.py b/tests/stand_alone/test_tag_chart_parser2.py index b480422..99d51ff 100644 --- a/tests/stand_alone/test_tag_chart_parser2.py +++ b/tests/stand_alone/test_tag_chart_parser2.py @@ -11,11 +11,11 @@ from mjacob.nltk.parse.tag.TagChartParser import TagChartParser VAR = 1 -print "starting" +print("starting") raw_data = nltk.data.load('grammars/large_grammars/atis_sentences.txt', format='raw') sentences = [x[0] for x in nltk.parse.util.extract_test_sentences(raw_data)][:1] -print "creating grammar" +print("creating grammar") if VAR == 0: grammar = TreeAdjoiningGrammar(cfg=nltk.data.load('grammars/large_grammars/atis.cfg')) elif VAR == 1: @@ -23,9 +23,9 @@ elif VAR == 1: elif VAR == 2: grammar = TreeAdjoiningGrammar("../integration/tag_chart/cross_serial_2/grammar.yaml") -print "creating parser" +print("creating parser") parser = TagChartParser(grammar) -print "parser created" +print("parser created") if VAR == 0: chart = parser.chart_parse(sentences[0]) @@ -34,7 +34,7 @@ elif VAR == 1: elif VAR == 2: chart = parser.chart_parse("Jan Piet Marie de kinderen zag helpen leren zwemmen".split(' ')) -print chart +print(chart) -print "sentence accpeted: %s" % (chart.accept(grammar.start(), TAG_EARLEY_STRATEGY)) -print chart.parses(grammar.start(), TAG_EARLEY_STRATEGY) \ No newline at end of file +print("sentence accpeted: %s" % (chart.accept(grammar.start(), TAG_EARLEY_STRATEGY))) +print(chart.parses(grammar.start(), TAG_EARLEY_STRATEGY)) \ No newline at end of file diff --git a/tests/stand_alone/trap.py b/tests/stand_alone/trap.py index 72d6da0..a295a01 100644 --- a/tests/stand_alone/trap.py +++ b/tests/stand_alone/trap.py @@ -20,4 +20,4 @@ parser = TagChartParser(grammar) for s in ["b"]: tokens = s.split(' ') chart = parser.chart_parse(tokens) - print chart.num_parses(grammar.start().symbol(), TAG_EARLEY_STRATEGY) + print(chart.num_parses(grammar.start().symbol(), TAG_EARLEY_STRATEGY)) -- 2.11.4.GIT