Add a way to specify the MimeMapper mappings to its constructor directly.
[cvs2svn.git] / cvs2svn_lib / changeset_graph.py
blob64ebf2ce8eeacc32f9d2dc3b2b477c3fe1f37d47
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2006-2008 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """The changeset dependency graph."""
20 from cvs2svn_lib.log import Log
21 from cvs2svn_lib.changeset import RevisionChangeset
22 from cvs2svn_lib.changeset import OrderedChangeset
23 from cvs2svn_lib.changeset import BranchChangeset
24 from cvs2svn_lib.changeset import TagChangeset
27 class CycleInGraphException(Exception):
28 def __init__(self, cycle):
29 Exception.__init__(
30 self,
31 'Cycle found in graph: %s'
32 % ' -> '.join(map(str, cycle + [cycle[0]])))
35 class NoPredNodeInGraphException(Exception):
36 def __init__(self, node):
37 Exception.__init__(self, 'Node %s has no predecessors' % (node,))
40 class _NoPredNodes:
41 """Manage changesets that are to be processed.
43 Output the changesets in order by time and changeset type.
45 The implementation of this class is crude: as changesets are added,
46 they are appended to a list. When one is needed, the list is sorted
47 in reverse order and then the last changeset in the list is
48 returned. To reduce the number of sorts that are needed, the class
49 keeps track of whether the list is currently sorted.
51 All this repeated sorting is wasteful and unnecessary. We should
52 instead use a heap to output the changeset order, which would
53 require O(lg N) work per add()/get() rather than O(1) and O(N lg N)
54 as in the current implementation [1]. But: (1) the lame interface
55 of heapq doesn't allow an arbitrary compare function, so we would
56 have to store extra information in the array elements; (2) in
57 practice, the number of items in the list at any time is only a tiny
58 fraction of the total number of changesets; and (3) testing showed
59 that the heapq implementation is no faster than this one (perhaps
60 because of the increased memory usage).
62 [1] According to Objects/listsort.txt in the Python source code, the
63 Python list-sorting code is heavily optimized for arrays that have
64 runs of already-sorted elements, so the current cost of get() is
65 probably closer to O(N) than O(N lg N)."""
67 def __init__(self, changeset_db):
68 self.changeset_db = changeset_db
69 # A list [(node, changeset,)] of nodes with no predecessors:
70 self._nodes = []
71 self._sorted = True
73 def __len__(self):
74 return len(self._nodes)
76 @staticmethod
77 def _compare((node_1, changeset_1), (node_2, changeset_2)):
78 """Define a (reverse) ordering on self._nodes."""
80 return cmp(node_2.time_range, node_1.time_range) \
81 or cmp(changeset_2, changeset_1)
83 def add(self, node):
84 self._nodes.append( (node, self.changeset_db[node.id],) )
85 self._sorted = False
87 def get(self):
88 """Return (node, changeset,) of the smallest node.
90 'Smallest' is defined by self._compare()."""
92 if not self._sorted:
93 self._nodes.sort(self._compare)
94 self._sorted = True
95 return self._nodes.pop()
98 class ChangesetGraph(object):
99 """A graph of changesets and their dependencies."""
101 def __init__(self, changeset_db, cvs_item_to_changeset_id):
102 self._changeset_db = changeset_db
103 self._cvs_item_to_changeset_id = cvs_item_to_changeset_id
104 # A map { id : ChangesetGraphNode }
105 self.nodes = {}
107 def close(self):
108 self._cvs_item_to_changeset_id.close()
109 self._cvs_item_to_changeset_id = None
110 self._changeset_db.close()
111 self._changeset_db = None
113 def add_changeset(self, changeset):
114 """Add CHANGESET to this graph.
116 Determine and record any dependencies to changesets that are
117 already in the graph. This method does not affect the databases."""
119 node = changeset.create_graph_node(self._cvs_item_to_changeset_id)
121 # Now tie the node into our graph. If a changeset referenced by
122 # node is already in our graph, then add the backwards connection
123 # from the other node to the new one. If not, then delete the
124 # changeset from node.
126 for pred_id in list(node.pred_ids):
127 pred_node = self.nodes.get(pred_id)
128 if pred_node is not None:
129 pred_node.succ_ids.add(node.id)
130 else:
131 node.pred_ids.remove(pred_id)
133 for succ_id in list(node.succ_ids):
134 succ_node = self.nodes.get(succ_id)
135 if succ_node is not None:
136 succ_node.pred_ids.add(node.id)
137 else:
138 node.succ_ids.remove(succ_id)
140 self.nodes[node.id] = node
142 def store_changeset(self, changeset):
143 for cvs_item_id in changeset.cvs_item_ids:
144 self._cvs_item_to_changeset_id[cvs_item_id] = changeset.id
145 self._changeset_db.store(changeset)
147 def add_new_changeset(self, changeset):
148 """Add the new CHANGESET to the graph and also to the databases."""
150 if Log().is_on(Log.DEBUG):
151 Log().debug('Adding changeset %r' % (changeset,))
153 self.add_changeset(changeset)
154 self.store_changeset(changeset)
156 def delete_changeset(self, changeset):
157 """Remove CHANGESET from the graph and also from the databases.
159 In fact, we don't remove CHANGESET from
160 self._cvs_item_to_changeset_id, because in practice the CVSItems
161 in CHANGESET are always added again as part of a new CHANGESET,
162 which will cause the old values to be overwritten."""
164 if Log().is_on(Log.DEBUG):
165 Log().debug('Removing changeset %r' % (changeset,))
167 del self[changeset.id]
168 del self._changeset_db[changeset.id]
170 def __nonzero__(self):
171 """Instances are considered True iff they contain any nodes."""
173 return bool(self.nodes)
175 def __contains__(self, id):
176 """Return True if the specified ID is contained in this graph."""
178 return id in self.nodes
180 def __getitem__(self, id):
181 return self.nodes[id]
183 def get(self, id):
184 return self.nodes.get(id)
186 def __delitem__(self, id):
187 """Remove the node corresponding to ID.
189 Also remove references to it from other nodes. This method does
190 not change pred_ids or succ_ids of the node being deleted, nor
191 does it affect the databases."""
193 node = self[id]
195 for succ_id in node.succ_ids:
196 succ = self[succ_id]
197 succ.pred_ids.remove(node.id)
199 for pred_id in node.pred_ids:
200 pred = self[pred_id]
201 pred.succ_ids.remove(node.id)
203 del self.nodes[node.id]
205 def keys(self):
206 return self.nodes.keys()
208 def __iter__(self):
209 return self.nodes.itervalues()
211 def _get_path(self, reachable_changesets, starting_node_id, ending_node_id):
212 """Return the shortest path from ENDING_NODE_ID to STARTING_NODE_ID.
214 Find a path from ENDING_NODE_ID to STARTING_NODE_ID in
215 REACHABLE_CHANGESETS, where STARTING_NODE_ID is the id of a
216 changeset that depends on the changeset with ENDING_NODE_ID. (See
217 the comment in search_for_path() for a description of the format
218 of REACHABLE_CHANGESETS.)
220 Return a list of changesets, where the 0th one has ENDING_NODE_ID
221 and the last one has STARTING_NODE_ID. If there is no such path
222 described in in REACHABLE_CHANGESETS, return None."""
224 if ending_node_id not in reachable_changesets:
225 return None
227 path = [self._changeset_db[ending_node_id]]
228 id = reachable_changesets[ending_node_id][1]
229 while id != starting_node_id:
230 path.append(self._changeset_db[id])
231 id = reachable_changesets[id][1]
232 path.append(self._changeset_db[starting_node_id])
233 return path
235 def search_for_path(self, starting_node_id, stop_set):
236 """Search for paths to prerequisites of STARTING_NODE_ID.
238 Try to find the shortest dependency path that causes the changeset
239 with STARTING_NODE_ID to depend (directly or indirectly) on one of
240 the changesets whose ids are contained in STOP_SET.
242 We consider direct and indirect dependencies in the sense that the
243 changeset can be reached by following a chain of predecessor nodes.
245 When one of the changeset_ids in STOP_SET is found, terminate the
246 search and return the path from that changeset_id to
247 STARTING_NODE_ID. If no path is found to a node in STOP_SET,
248 return None."""
250 # A map {node_id : (steps, next_node_id)} where NODE_ID can be
251 # reached from STARTING_NODE_ID in STEPS steps, and NEXT_NODE_ID
252 # is the id of the previous node in the path. STARTING_NODE_ID is
253 # only included as a key if there is a loop leading back to it.
254 reachable_changesets = {}
256 # A list of (node_id, steps) that still have to be investigated,
257 # and STEPS is the number of steps to get to NODE_ID.
258 open_nodes = [(starting_node_id, 0)]
259 # A breadth-first search:
260 while open_nodes:
261 (id, steps) = open_nodes.pop(0)
262 steps += 1
263 node = self[id]
264 for pred_id in node.pred_ids:
265 # Since the search is breadth-first, we only have to set steps
266 # that don't already exist.
267 if pred_id not in reachable_changesets:
268 reachable_changesets[pred_id] = (steps, id)
269 open_nodes.append((pred_id, steps))
271 # See if we can stop now:
272 if pred_id in stop_set:
273 return self._get_path(
274 reachable_changesets, starting_node_id, pred_id
277 return None
279 def consume_nopred_nodes(self):
280 """Remove and yield changesets in dependency order.
282 Each iteration, this generator yields a (changeset, time_range)
283 tuple for the oldest changeset in the graph that doesn't have any
284 predecessor nodes (i.e., it is ready to be committed). This is
285 continued until there are no more nodes without predecessors
286 (either because the graph has been emptied, or because of cycles
287 in the graph).
289 Among the changesets that are ready to be processed, the earliest
290 one (according to the sorting of the TimeRange class) is yielded
291 each time. (This is the order in which the changesets should be
292 committed.)
294 The graph should not be otherwise altered while this generator is
295 running."""
297 # Find a list of (node,changeset,) where the node has no
298 # predecessors:
299 nopred_nodes = _NoPredNodes(self._changeset_db)
300 for node in self.nodes.itervalues():
301 if not node.pred_ids:
302 nopred_nodes.add(node)
304 while nopred_nodes:
305 (node, changeset,) = nopred_nodes.get()
306 del self[node.id]
307 # See if any successors are now ready for extraction:
308 for succ_id in node.succ_ids:
309 succ = self[succ_id]
310 if not succ.pred_ids:
311 nopred_nodes.add(succ)
312 yield (changeset, node.time_range)
314 def find_cycle(self, starting_node_id):
315 """Find a cycle in the dependency graph and return it.
317 Use STARTING_NODE_ID as the place to start looking. This routine
318 must only be called after all nopred_nodes have been removed.
319 Return the list of changesets that are involved in the cycle
320 (ordered such that cycle[n-1] is a predecessor of cycle[n] and
321 cycle[-1] is a predecessor of cycle[0])."""
323 # Since there are no nopred nodes in the graph, all nodes in the
324 # graph must either be involved in a cycle or depend (directly or
325 # indirectly) on nodes that are in a cycle.
327 # Pick an arbitrary node:
328 node = self[starting_node_id]
330 seen_nodes = [node]
332 # Follow it backwards until a node is seen a second time; then we
333 # have our cycle.
334 while True:
335 # Pick an arbitrary predecessor of node. It must exist, because
336 # there are no nopred nodes:
337 try:
338 node_id = node.pred_ids.__iter__().next()
339 except StopIteration:
340 raise NoPredNodeInGraphException(node)
341 node = self[node_id]
342 try:
343 i = seen_nodes.index(node)
344 except ValueError:
345 seen_nodes.append(node)
346 else:
347 seen_nodes = seen_nodes[i:]
348 seen_nodes.reverse()
349 return [self._changeset_db[node.id] for node in seen_nodes]
351 def consume_graph(self, cycle_breaker=None):
352 """Remove and yield changesets from this graph in dependency order.
354 Each iteration, this generator yields a (changeset, time_range)
355 tuple for the oldest changeset in the graph that doesn't have any
356 predecessor nodes. If CYCLE_BREAKER is specified, then call
357 CYCLE_BREAKER(cycle) whenever a cycle is encountered, where cycle
358 is the list of changesets that are involved in the cycle (ordered
359 such that cycle[n-1] is a predecessor of cycle[n] and cycle[-1] is
360 a predecessor of cycle[0]). CYCLE_BREAKER should break the cycle
361 in place then return.
363 If a cycle is found and CYCLE_BREAKER was not specified, raise
364 CycleInGraphException."""
366 while True:
367 for (changeset, time_range) in self.consume_nopred_nodes():
368 yield (changeset, time_range)
370 # If there are any nodes left in the graph, then there must be
371 # at least one cycle. Find a cycle and process it.
373 # This might raise StopIteration, but that indicates that the
374 # graph has been fully consumed, so we just let the exception
375 # escape.
376 start_node_id = self.nodes.iterkeys().next()
378 cycle = self.find_cycle(start_node_id)
380 if cycle_breaker is not None:
381 cycle_breaker(cycle)
382 else:
383 raise CycleInGraphException(cycle)
385 def __repr__(self):
386 """For convenience only. The format is subject to change at any time."""
388 if self.nodes:
389 return 'ChangesetGraph:\n%s' \
390 % ''.join([' %r\n' % node for node in self])
391 else:
392 return 'ChangesetGraph:\n EMPTY\n'
394 node_colors = {
395 RevisionChangeset : 'lightgreen',
396 OrderedChangeset : 'cyan',
397 BranchChangeset : 'orange',
398 TagChangeset : 'yellow',
401 def output_coarse_dot(self, f):
402 """Output the graph in DOT format to file-like object f.
404 Such a file can be rendered into a visual representation of the
405 graph using tools like graphviz. Include only changesets in the
406 graph, and the dependencies between changesets."""
408 f.write('digraph G {\n')
409 for node in self:
410 f.write(
411 ' C%x [style=filled, fillcolor=%s];\n' % (
412 node.id,
413 self.node_colors[self._changeset_db[node.id].__class__],
416 f.write('\n')
418 for node in self:
419 for succ_id in node.succ_ids:
420 f.write(' C%x -> C%x\n' % (node.id, succ_id,))
421 f.write('\n')
423 f.write('}\n')
425 def output_fine_dot(self, f):
426 """Output the graph in DOT format to file-like object f.
428 Such a file can be rendered into a visual representation of the
429 graph using tools like graphviz. Include all CVSItems and the
430 CVSItem-CVSItem dependencies in the graph. Group the CVSItems
431 into clusters by changeset."""
433 f.write('digraph G {\n')
434 for node in self:
435 f.write(' subgraph cluster_%x {\n' % (node.id,))
436 f.write(' label = "C%x";\n' % (node.id,))
437 changeset = self._changeset_db[node.id]
438 for item_id in changeset.cvs_item_ids:
439 f.write(' I%x;\n' % (item_id,))
440 f.write(' style=filled;\n')
441 f.write(
442 ' fillcolor=%s;\n'
443 % (self.node_colors[self._changeset_db[node.id].__class__],))
444 f.write(' }\n\n')
446 for node in self:
447 changeset = self._changeset_db[node.id]
448 for cvs_item in changeset.iter_cvs_items():
449 for succ_id in cvs_item.get_succ_ids():
450 f.write(' I%x -> I%x;\n' % (cvs_item.id, succ_id,))
452 f.write('\n')
454 f.write('}\n')