2to3 (compiles, not tested)
[tag_parser.git] / src / mjacob / nltk / parse / tag / earley / TagEdge.py
blob3ef59b50c43da58187417035467bec0a7e6ad384
1 # This Python file uses the following encoding: utf-8
2 '''
3 Created on May 18, 2011
5 Based on nltk's EdgeI
7 @author: mjacob
8 '''
9 from nltk.tree import Tree
10 from nltk.grammar import Nonterminal
11 from mjacob.annotations.memoized import Memoize
12 from mjacob.nltk.grammar.TagNonterminal import TagNonterminal
13 from operator import lt, eq
15 """dot positions: """
16 LA = "LA"
17 LB = "LB"
18 RA = "RA"
19 RB = "RB"
21 class TagEdge(object):
22 """
23 A hypothesis about the structure of part of a sentence.
24 Each edge records the fact that a structure is (partially)
25 consistent with the sentence. An edge contains:
27 - A X{span}, indicating what part of the sentence is
28 consistent with the hypothesized structure.
30 - A X{gap}, indicating a hypothesis about the possible
31 contents of a foot node
33 - A X{treeposition}, indicating which portion of a
34 TagProduction we are analyzing
36 - A X{dotposition}, indicating analysis of a treeposition
37 w.r.t. adjunction
39 - X{has_adjoined}, which indicates whether adjunction has already occurred
40 at the indicated node
42 There are two kinds of edge:
44 - C{TreeEdges<TagTreeEdge>} record which trees have been found to
45 be (partially) consistent with the text.
47 - C{LeafEdges<TagLeafEdge>} record the tokens occur in the text.
49 The C{TagEdgeI} interface provides a common interface to both types
50 of edge, allowing chart parsers to treat them in a uniform manner.
51 """
53 def __init__(self, production, treeposition, span, gapspan, dotposition, has_adjoined):
54 self.__production = production
55 self.__treeposition = treeposition
56 self.__span = span
57 self.__gapspan = gapspan
58 self.__dotposition = dotposition
59 self.__has_adjoined = has_adjoined
61 def is_auxiliary(self):
62 """
63 True iff the edge's production is auxiliary
64 """
65 return self.__production.is_auxiliary()
67 def is_initial(self):
68 """
69 True iff the edge's production is initial
70 """
71 return self.__production.is_initial()
73 #////////////////////////////////////////////////////////////
74 # Span
75 #////////////////////////////////////////////////////////////
77 def span(self):
78 """
79 @return: A tuple C{(s,e)}, where C{subtokens[s:e]} is the
80 portion of the sentence that is consistent with this
81 edge's structure.
82 @rtype: C{(int, int)}
83 """
84 return self.__span
86 def start(self):
87 """
88 @return: The start index of this edge's span.
89 @rtype: C{int}
90 """
91 return self.__span[0]
93 def end(self):
94 """
95 @return: The end index of this edge's span.
96 @rtype: C{int}
97 """
98 return self.__span[1]
100 def length(self):
102 @return: The length of this edge's span.
103 @rtype: C{int}
105 return self.__span[1] - self.__span[0]
107 def has_gap(self):
109 True iff there is a gap (foot of an auxiliary node) with unfilled content
111 return self.__gapspan is not None
113 def gap(self):
115 @return: A tuple C{(s,e)}, where C{subtokens[s:e]} represents
116 a potential gap in the structure due to an auxiliary foot node
117 @rtype: C{(int, int)}
119 return self.__gapspan
121 def gapstart(self):
123 the start of the gap
125 if self.__gapspan is not None:
126 return self.__gapspan[0]
127 else:
128 return None
130 def gapend(self):
132 the end of the gap
134 if self.__gapspan is not None:
135 return self.__gapspan[1]
136 else:
137 return None
139 def gaplength(self):
141 the length of the gap
143 if self.__gapspan is not None:
144 return self.__gapspan[1] - self.__gapspan[0]
146 #////////////////////////////////////////////////////////////
147 # Left Hand Side
148 #////////////////////////////////////////////////////////////
150 def production(self):
151 """the production associated with this edge"""
152 return self.__production
154 def productionid(self):
156 the id of the production associated with this edge.
157 this is a useful optimization for selecting edges by production
158 without having to do a full tree comparison.
160 return id(self.__production)
162 @Memoize
163 def _node(self):
164 """the node of the production selected by the edge's treeposition"""
165 return self.__production.get_node(self.__treeposition)
167 @Memoize
168 def _subtree(self):
169 """the subtree of the production selected by the edge's treeposition"""
170 return self.__production[self.__treeposition]
172 def has_adjoined(self):
174 True iff the edge represents an adjunction of the production @ treeposition
176 return self.__has_adjoined
178 def can_adjoin(self):
180 True iff adjunction is permissible at production[treeposition]
181 substitution (and foot?) positions can never be adjunction positions.
183 subtree = self._subtree()
184 return isinstance(subtree, Tree) and not subtree.node.no_adjunction()
186 def adjoinable_symbol(self):
187 """if adjunction is permissible at production[treeposition], return
188 the symbol which may be adjoined. otherwise, return None"""
189 node = self._node()
190 if (isinstance(node, TagNonterminal) and not node.no_adjunction()):
191 return node.symbol()
192 else:
193 return None
195 def must_adjoin(self):
196 """True iff adjunction is obligatory at production[treeposition]"""
197 node = self._node()
198 return isinstance(node, TagNonterminal) and node.obligatory_adjunction()
200 def root_symbol(self):
202 @return: This edge's root, which specifies what kind
203 of structure is hypothesized by this edge.
204 @see: L{TagTreeEdge} and L{TagLeafEdge} for a description of
205 the root values for each edge type.
207 return self.__production.node.symbol()
209 def symbol(self):
211 @return: This edge's node, which specifies what kind
212 of structure is hypothesized by this edge.
213 @see: L{TagTreeEdge} and L{TagLeafEdge} for a description of
214 the root values for each edge type.
216 node = self._node()
217 if isinstance(node, Nonterminal):
218 return node.symbol()
219 else:
220 return None
222 def terminal(self):
223 """True iff production[treeposition] is a terminal element"""
224 node = self._node()
225 if isinstance(node, str):
226 return node
227 else:
228 return None
230 def text(self):
231 """if production[treeposition] is non-terminal, the symbol of the non-terminal.
232 otherwise, the text of the terminal"""
233 node =self._node()
234 if isinstance(node, Nonterminal):
235 return node.symbol()
236 else:
237 return node
239 def is_substitution_site(self):
241 True iff substitution is allowed at production[treeposition]
243 subtree = self._subtree()
244 return isinstance(subtree, TagNonterminal) and not subtree.is_foot()
246 def is_foot(self):
248 True iff production[treeposition] is the foot of an auxiliary
250 node = self._node()
251 return isinstance(node, TagNonterminal) and node.is_foot()
253 def is_terminal(self):
255 True iff production[treeposition] is a terminal
257 node = self._node()
258 return isinstance(node, str)
260 def is_epsilon(self):
262 True iff production[treeposition] is an internal, non-terminal node with no children.
264 subtree = self._subtree()
265 return isinstance(subtree, Tree) and len(subtree) == 0
267 def is_nonterminal(self):
269 True iff production[treeposition] is non-terminal
271 node = self._node()
272 return isinstance(node, Nonterminal)
274 #////////////////////////////////////////////////////////////
275 # Right Hand Side
276 #////////////////////////////////////////////////////////////
278 def treeposition(self):
280 @return: This edge's tree position, which indicates how much of
281 the hypothesized structure is consistent with the
282 sentence. In particular, C{self.production[treeposition]} is consistent
283 with C{subtokens[self.start():self.end()]}.
284 @rtype: C{list(int)}
286 return self.__treeposition
288 def dotposition(self):
290 returns the dot status of the edge
292 return self.__dotposition
294 def is_dot_left(self):
296 returns true if the dot is on the left of the node
298 return self.__dotposition in (LA, LB)
300 def is_dot_right(self):
302 returns True iff the dot is on the right of the node
304 return self.__dotposition in (RA, RB)
306 def is_dot_above(self):
308 returns True iff the dot is above the node
310 return self.__dotposition in (LA, RA)
312 def is_dot_below(self):
314 returns True iff the dot is below the node
316 return self.__dotposition in (LB, RB)
318 @Memoize
319 def has_sibling(self):
321 return True iff there is a node to the right of treeposition
323 pos = self.__treeposition
324 return (pos != ()
325 and len(self.__production[pos[:-1]]) > pos[-1]+1
328 def has_children(self):
329 """returns True iff treeposition has children"""
330 subtree = self._subtree()
331 return isinstance(subtree, Tree) and len(subtree) > 0
333 def is_root(self):
334 """returns True iff treeposition is the root of the tree"""
335 return self.__treeposition == ()
337 def child_nodes(self):
338 """returns a tuple of the nodes below production[treeposition]"""
339 subtree = self._subtree()
340 if isinstance(subtree, Tree):
341 return tuple(subsubtree.node.symbol() for subsubtree in subtree)
342 else:
343 return ()
345 #////////////////////////////////////////////////////////////
346 # Comparisons
347 #////////////////////////////////////////////////////////////
348 def __str__(self):
349 return '[%s:%s:%s:%s] %r %s[%s] %s %s' % (self.__span[0],
350 self.__gapspan is None and "-" or self.__gapspan[0],
351 self.__gapspan is None and "-" or self.__gapspan[1],
352 self.__span[1],
353 self.text(),
354 self.__production,
355 self.__treeposition,
356 self.__dotposition,
357 self.__has_adjoined)
359 def __repr__(self):
360 return '[Edge: %s]' % (self)
362 def __lt__(self, othr):
363 return lt((self.start(), self.gapstart(), self.gapend(), self.end(), self.__production, self.__treeposition, self.__dotposition, self.__has_adjoined),
364 (othr.start(), othr.gapstart(), othr.gapend(), othr.end(), othr.__production, othr.__treeposition, othr.__dotposition, othr.__has_adjoined))
366 def __eq__(self, othr):
367 return eq((self.start(), self.gapstart(), self.gapend(), self.end(), self.__production, self.__treeposition, self.__dotposition, self.__has_adjoined),
368 (othr.start(), othr.gapstart(), othr.gapend(), othr.end(), othr.__production, othr.__treeposition, othr.__dotposition, othr.__has_adjoined))
370 def __hash__(self):
371 return hash((self.__production, self.__treeposition, self.__span, self.__gapspan, self.__dotposition, self.__has_adjoined))