1 # This Python file uses the following encoding: utf-8
3 Created on May 18, 2011
5 This is a hacked version of what's in NLTK.
7 it's available under the Apache 2.0 license:
8 http://www.apache.org/licenses/LICENSE-2.0
10 # Copyright (C) 2001-2011 NLTK Project
11 # Author: Edward Loper <edloper@gradient.cis.upenn.edu>
12 # Steven Bird <sb@csse.unimelb.edu.au>
13 # Jean Mark Gawron <gawron@mail.sdsu.edu>
14 # Peter Ljunglöf <peter.ljunglof@heatherleaf.se>
15 # URL: <http://www.nltk.org/>
16 # For license information, see LICENSE.TXT
20 from nltk
.tree
import Tree
22 class TagChart(object):
24 A blackboard for hypotheses about the syntactic constituents of a
25 sentence. A chart contains a set of edges, and each edge encodes
26 a single hypothesis about the structure of some portion of the
29 The L{select} method can be used to select a specific collection
30 of edges. For example C{chart.select(is_complete=True, start=0)}
31 yields all complete edges whose start indices are 0. To ensure
32 the efficiency of these selection operations, C{Chart} dynamically
33 creates and maintains an index for each set of attributes that
34 have been selected on.
36 In order to reconstruct the trees that are represented by an edge,
37 the chart associates each edge with a set of child pointer lists.
38 A X{child pointer list} is a list of the edges that license an
39 edge's right-hand side.
41 @ivar _tokens: The sentence that the chart covers.
42 @ivar _num_leaves: The number of tokens.
43 @ivar _edges: A list of the edges in the chart
44 @ivar _indexes: A dictionary mapping tuples of edge attributes
45 to indices, where each index maps the corresponding edge
46 attribute values to lists of edges.
48 def __init__(self
, tokens
, edge_type
):
50 Construct a new chart. The chart is initialized with the
51 leaf edges corresponding to the terminal leaves.
54 @param tokens: The sentence that this chart will be used to parse.
56 # Record the sentence token and the sentence length.
57 self
.__tokens
= tuple(tokens
)
58 self
.__num
_leaves
= len(self
.__tokens
)
59 self
.__leaf
_indices
= dict((leaf
, tuple(i
60 for i
in range(len(tokens
))
61 if tokens
[i
] == leaf
))
62 for leaf
in frozenset(tokens
))
64 self
.__edge
_type
=edge_type
66 # Initialise the chart.
73 # A list of edges contained in this chart.
76 # The set of child pointer lists associated with each edge.
77 self
.__edge
_treebuilders
= {}
78 self
.__cached
_trees
= {}
80 # Indexes mapping attribute values to lists of edges
85 return "\n".join(map(str, [type(self
),
86 " tokens: %s" % (self
.__tokens
,),
87 " leaves: %s" % (self
.__num
_leaves
,),
88 " indices: %s" % ("\n ".join(str(y
) for y
in list(self
.__leaf
_indices
.items()))),
89 " edges: %s" % ("\n ".join(str(y
) for y
in self
.__edges
)),
90 " indexes: %s" % ("\n ".join(str(y
) for y
in list(self
.__indexes
.items()))),
93 #////////////////////////////////////////////////////////////
95 #////////////////////////////////////////////////////////////
99 @return: The number of words in this chart's sentence.
102 return self
.__num
_leaves
104 def leaf(self
, index
):
106 @return: The leaf value of the word at the given index.
109 return self
.__tokens
[index
]
113 @return: A list of the leaf values of each word in the
115 @rtype: C{list} of C{string}
119 def leaf_indices(self
, leaf
):
121 @return: A list of indices at which @param leaf appears in the sentence
122 @rtype: C{tuple} of C{int}
124 return self
.__leaf
_indices
.get(leaf
, ())
126 #////////////////////////////////////////////////////////////
128 #////////////////////////////////////////////////////////////
132 @return: A list of all edges in this chart. New edges
133 that are added to the chart after the call to edges()
134 will be contained in this list, but you shouldn't rely on that.
135 @rtype: C{list} of L{EdgeI}
136 @see: L{iteredges}, L{select}
142 @return: An iterator over the edges in this chart. It is
143 I{not} guaranteed that new edges which are added to the
144 chart before the iterator is exhausted will also be
146 @rtype: C{iter} of L{EdgeI}
147 @see: L{edges}, L{select}
149 return iter(self
.__edges
)
151 # Iterating over the chart yields its edges.
156 @return: The number of edges contained in this chart.
159 return len(self
.__edges
)
161 def select(self
, **restrictions
):
163 @return: An iterator over the edges in this chart. Any
164 new edges that are added to the chart before the iterator
165 is exahusted will also be generated. C{restrictions}
166 can be used to restrict the set of edges that will be
168 @rtype: C{iter} of L{EdgeI}
170 # If there are no restrictions, then return all edges.
171 if restrictions
=={}: return iter(self
.__edges
)
173 # Find the index corresponding to the given restrictions.
174 restr_keys
= list(restrictions
.keys())
176 restr_keys
= tuple(restr_keys
)
178 # If it doesn't exist, then create it.
179 if restr_keys
not in self
.__indexes
:
180 self
._add
_index
(restr_keys
)
182 vals
= tuple(restrictions
[key
] for key
in restr_keys
)
183 return iter(self
.__indexes
[restr_keys
].get(vals
, []))
185 def _add_index(self
, restr_keys
):
187 A helper function for L{select}, which creates a new index for
188 a given set of attributes (aka restriction keys).
190 # Make sure it's a valid index.
191 for key
in restr_keys
:
192 if not hasattr(self
.__edge
_type
, key
):
193 raise ValueError('Bad restriction: %s' % key
)
196 index
= self
.__indexes
[restr_keys
] = {}
198 # Add all existing edges to the index.
199 for edge
in self
.__edges
:
200 vals
= tuple(getattr(edge
, key
)() for key
in restr_keys
)
201 index
.setdefault(vals
, []).append(edge
)
203 def _register_with_indexes(self
, edge
):
205 A helper function for L{insert}, which registers the new
206 edge with all existing indexes.
208 for (restr_keys
, index
) in list(self
.__indexes
.items()):
209 vals
= tuple(getattr(edge
, key
)() for key
in restr_keys
)
210 index
.setdefault(vals
, []).append(edge
)
212 #////////////////////////////////////////////////////////////
214 #////////////////////////////////////////////////////////////
216 def insert(self
, edge
, treebuilder
):
218 Add a new edge to the chart.
221 @param edge: The new edge
223 @return: True if this operation modified the chart. In
224 particular, return true iff the chart did not already
229 if edge
not in self
.__edge
_treebuilders
:
230 #print " new edge: %s" % (edge)
231 # Add it to the list of edges.
232 self
._append
_edge
(edge
)
233 # Register with indexes.
234 self
._register
_with
_indexes
(edge
)
235 self
.__edge
_treebuilders
[edge
] = set((treebuilder
,))
239 #print " duplicate edge: %s" % (edge)
240 self
.__edge
_treebuilders
[edge
].add(treebuilder
)
243 def get_trees(self
, edge
, working_on
=set()):
245 find the trees which can be built out of the specified edge.
247 the parameter "working_on" should not be specified by the user, as it is
248 an internal mechanism that is used to avoid loops in the recursion.
250 if edge
in self
.__cached
_trees
:
251 return self
.__cached
_trees
[edge
]
255 treebuilders
= self
.__edge
_treebuilders
[edge
]
256 if len(treebuilders
) == 1:
258 for treebuilder
in treebuilders
:
259 trees
= treebuilder
.build(working_on
)
262 for treebuilder
in treebuilders
:
263 trees
.extend(treebuilder
.build(working_on
))
264 trees
= frozenset(trees
)
265 self
.__cached
_trees
[edge
] = trees
266 working_on
.remove(edge
)
269 def _append_edge(self
, edge
):
270 self
.__edges
.append(edge
)
272 #////////////////////////////////////////////////////////////
273 # Tree extraction & child pointer lists
274 #////////////////////////////////////////////////////////////
276 def accept(self
, root
, strategy
):
277 """returns True if there's a goal item in the chart"""
278 return strategy
.goal_found(self
, root
)
280 """parses(self.__grammar.start(), self.__strategy, tree_class=tree_class)"""
281 def parses(self
, root
, strategy
, tree_class
=Tree
):
283 for goal_edge
in strategy
.goal_edges(self
, root
):
284 trees
.update(self
.get_trees(goal_edge
))