1 # This Python file uses the following encoding: utf-8
3 Created on May 18, 2011
9 from nltk
.tree
import Tree
10 from nltk
.grammar
import Nonterminal
11 from mjacob
.annotations
.memoized
import Memoize
12 from mjacob
.nltk
.grammar
.TagNonterminal
import TagNonterminal
13 from operator
import lt
, eq
21 class TagEdge(object):
23 A hypothesis about the structure of part of a sentence.
24 Each edge records the fact that a structure is (partially)
25 consistent with the sentence. An edge contains:
27 - A X{span}, indicating what part of the sentence is
28 consistent with the hypothesized structure.
30 - A X{gap}, indicating a hypothesis about the possible
31 contents of a foot node
33 - A X{treeposition}, indicating which portion of a
34 TagProduction we are analyzing
36 - A X{dotposition}, indicating analysis of a treeposition
39 - X{has_adjoined}, which indicates whether adjunction has already occurred
42 There are two kinds of edge:
44 - C{TreeEdges<TagTreeEdge>} record which trees have been found to
45 be (partially) consistent with the text.
47 - C{LeafEdges<TagLeafEdge>} record the tokens occur in the text.
49 The C{TagEdgeI} interface provides a common interface to both types
50 of edge, allowing chart parsers to treat them in a uniform manner.
53 def __init__(self
, production
, treeposition
, span
, gapspan
, dotposition
, has_adjoined
):
54 self
.__production
= production
55 self
.__treeposition
= treeposition
57 self
.__gapspan
= gapspan
58 self
.__dotposition
= dotposition
59 self
.__has
_adjoined
= has_adjoined
61 def is_auxiliary(self
):
63 True iff the edge's production is auxiliary
65 return self
.__production
.is_auxiliary()
69 True iff the edge's production is initial
71 return self
.__production
.is_initial()
73 #////////////////////////////////////////////////////////////
75 #////////////////////////////////////////////////////////////
79 @return: A tuple C{(s,e)}, where C{subtokens[s:e]} is the
80 portion of the sentence that is consistent with this
88 @return: The start index of this edge's span.
95 @return: The end index of this edge's span.
102 @return: The length of this edge's span.
105 return self
.__span
[1] - self
.__span
[0]
109 True iff there is a gap (foot of an auxiliary node) with unfilled content
111 return self
.__gapspan
is not None
115 @return: A tuple C{(s,e)}, where C{subtokens[s:e]} represents
116 a potential gap in the structure due to an auxiliary foot node
117 @rtype: C{(int, int)}
119 return self
.__gapspan
125 if self
.__gapspan
is not None:
126 return self
.__gapspan
[0]
134 if self
.__gapspan
is not None:
135 return self
.__gapspan
[1]
141 the length of the gap
143 if self
.__gapspan
is not None:
144 return self
.__gapspan
[1] - self
.__gapspan
[0]
146 #////////////////////////////////////////////////////////////
148 #////////////////////////////////////////////////////////////
150 def production(self
):
151 """the production associated with this edge"""
152 return self
.__production
154 def productionid(self
):
156 the id of the production associated with this edge.
157 this is a useful optimization for selecting edges by production
158 without having to do a full tree comparison.
160 return id(self
.__production
)
164 """the node of the production selected by the edge's treeposition"""
165 return self
.__production
.get_node(self
.__treeposition
)
169 """the subtree of the production selected by the edge's treeposition"""
170 return self
.__production
[self
.__treeposition
]
172 def has_adjoined(self
):
174 True iff the edge represents an adjunction of the production @ treeposition
176 return self
.__has
_adjoined
178 def can_adjoin(self
):
180 True iff adjunction is permissible at production[treeposition]
181 substitution (and foot?) positions can never be adjunction positions.
183 subtree
= self
._subtree
()
184 return isinstance(subtree
, Tree
) and not subtree
.node
.no_adjunction()
186 def adjoinable_symbol(self
):
187 """if adjunction is permissible at production[treeposition], return
188 the symbol which may be adjoined. otherwise, return None"""
190 if (isinstance(node
, TagNonterminal
) and not node
.no_adjunction()):
195 def must_adjoin(self
):
196 """True iff adjunction is obligatory at production[treeposition]"""
198 return isinstance(node
, TagNonterminal
) and node
.obligatory_adjunction()
200 def root_symbol(self
):
202 @return: This edge's root, which specifies what kind
203 of structure is hypothesized by this edge.
204 @see: L{TagTreeEdge} and L{TagLeafEdge} for a description of
205 the root values for each edge type.
207 return self
.__production
.node
.symbol()
211 @return: This edge's node, which specifies what kind
212 of structure is hypothesized by this edge.
213 @see: L{TagTreeEdge} and L{TagLeafEdge} for a description of
214 the root values for each edge type.
217 if isinstance(node
, Nonterminal
):
223 """True iff production[treeposition] is a terminal element"""
225 if isinstance(node
, str):
231 """if production[treeposition] is non-terminal, the symbol of the non-terminal.
232 otherwise, the text of the terminal"""
234 if isinstance(node
, Nonterminal
):
239 def is_substitution_site(self
):
241 True iff substitution is allowed at production[treeposition]
243 subtree
= self
._subtree
()
244 return isinstance(subtree
, TagNonterminal
) and not subtree
.is_foot()
248 True iff production[treeposition] is the foot of an auxiliary
251 return isinstance(node
, TagNonterminal
) and node
.is_foot()
253 def is_terminal(self
):
255 True iff production[treeposition] is a terminal
258 return isinstance(node
, str)
260 def is_epsilon(self
):
262 True iff production[treeposition] is an internal, non-terminal node with no children.
264 subtree
= self
._subtree
()
265 return isinstance(subtree
, Tree
) and len(subtree
) == 0
267 def is_nonterminal(self
):
269 True iff production[treeposition] is non-terminal
272 return isinstance(node
, Nonterminal
)
274 #////////////////////////////////////////////////////////////
276 #////////////////////////////////////////////////////////////
278 def treeposition(self
):
280 @return: This edge's tree position, which indicates how much of
281 the hypothesized structure is consistent with the
282 sentence. In particular, C{self.production[treeposition]} is consistent
283 with C{subtokens[self.start():self.end()]}.
286 return self
.__treeposition
288 def dotposition(self
):
290 returns the dot status of the edge
292 return self
.__dotposition
294 def is_dot_left(self
):
296 returns true if the dot is on the left of the node
298 return self
.__dotposition
in (LA
, LB
)
300 def is_dot_right(self
):
302 returns True iff the dot is on the right of the node
304 return self
.__dotposition
in (RA
, RB
)
306 def is_dot_above(self
):
308 returns True iff the dot is above the node
310 return self
.__dotposition
in (LA
, RA
)
312 def is_dot_below(self
):
314 returns True iff the dot is below the node
316 return self
.__dotposition
in (LB
, RB
)
319 def has_sibling(self
):
321 return True iff there is a node to the right of treeposition
323 pos
= self
.__treeposition
325 and len(self
.__production
[pos
[:-1]]) > pos
[-1]+1
328 def has_children(self
):
329 """returns True iff treeposition has children"""
330 subtree
= self
._subtree
()
331 return isinstance(subtree
, Tree
) and len(subtree
) > 0
334 """returns True iff treeposition is the root of the tree"""
335 return self
.__treeposition
== ()
337 def child_nodes(self
):
338 """returns a tuple of the nodes below production[treeposition]"""
339 subtree
= self
._subtree
()
340 if isinstance(subtree
, Tree
):
341 return tuple(subsubtree
.node
.symbol() for subsubtree
in subtree
)
345 #////////////////////////////////////////////////////////////
347 #////////////////////////////////////////////////////////////
349 return '[%s:%s:%s:%s] %r %s[%s] %s %s' % (self
.__span
[0],
350 self
.__gapspan
is None and "-" or self
.__gapspan
[0],
351 self
.__gapspan
is None and "-" or self
.__gapspan
[1],
360 return '[Edge: %s]' % (self
)
362 def __lt__(self
, othr
):
363 return lt((self
.start(), self
.gapstart(), self
.gapend(), self
.end(), self
.__production
, self
.__treeposition
, self
.__dotposition
, self
.__has
_adjoined
),
364 (othr
.start(), othr
.gapstart(), othr
.gapend(), othr
.end(), othr
.__production
, othr
.__treeposition
, othr
.__dotposition
, othr
.__has
_adjoined
))
366 def __eq__(self
, othr
):
367 return eq((self
.start(), self
.gapstart(), self
.gapend(), self
.end(), self
.__production
, self
.__treeposition
, self
.__dotposition
, self
.__has
_adjoined
),
368 (othr
.start(), othr
.gapstart(), othr
.gapend(), othr
.end(), othr
.__production
, othr
.__treeposition
, othr
.__dotposition
, othr
.__has
_adjoined
))
371 return hash((self
.__production
, self
.__treeposition
, self
.__span
, self
.__gapspan
, self
.__dotposition
, self
.__has
_adjoined
))