cvs2git: Make the --blobfile argument optional.
[cvs2svn.git] / cvs2svn_lib / changeset_graph.py
blob0bf2196ad02f37055ef150dde59a117e69750a82
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2006-2008 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """The changeset dependency graph."""
20 import heapq
22 from cvs2svn_lib.log import logger
23 from cvs2svn_lib.changeset import RevisionChangeset
24 from cvs2svn_lib.changeset import OrderedChangeset
25 from cvs2svn_lib.changeset import BranchChangeset
26 from cvs2svn_lib.changeset import TagChangeset
29 class CycleInGraphException(Exception):
30 def __init__(self, cycle):
31 Exception.__init__(
32 self,
33 'Cycle found in graph: %s'
34 % ' -> '.join(map(str, cycle + [cycle[0]])))
37 class NoPredNodeInGraphException(Exception):
38 def __init__(self, node):
39 Exception.__init__(self, 'Node %s has no predecessors' % (node,))
42 class _NoPredNodes(object):
43 """Manage changesets that are ready to be processed.
45 Output the changesets in order by time and changeset type."""
47 def __init__(self, changeset_db, initial_nodes):
48 """Initialize.
50 INITIAL_NODES is an iterable over node to add to this object on
51 initialization."""
53 self.changeset_db = changeset_db
55 # A heapified list of (node.time_range, changeset, node) tuples
56 # that have no predecessors. These tuples sort in the desired
57 # commit order:
58 self._nodes = [
59 (node.time_range, self.changeset_db[node.id], node)
60 for node in initial_nodes
62 heapq.heapify(self._nodes)
64 def __len__(self):
65 return len(self._nodes)
67 def add(self, node):
68 node = (node.time_range, self.changeset_db[node.id], node)
69 heapq.heappush(self._nodes, node)
71 def get(self):
72 """Return (node, changeset,) of the next node to be committed.
74 'Smallest' is defined by the ordering of the tuples in
75 self._nodes; namely, the changeset with the earliest time_range,
76 with ties broken by comparing the changesets themselves."""
78 (time_range, changeset, node) = heapq.heappop(self._nodes)
79 return (node, changeset)
82 class ChangesetGraph(object):
83 """A graph of changesets and their dependencies."""
85 def __init__(self, changeset_db, cvs_item_to_changeset_id):
86 self._changeset_db = changeset_db
87 self._cvs_item_to_changeset_id = cvs_item_to_changeset_id
88 # A map { id : ChangesetGraphNode }
89 self.nodes = {}
91 def close(self):
92 self._cvs_item_to_changeset_id.close()
93 self._cvs_item_to_changeset_id = None
94 self._changeset_db.close()
95 self._changeset_db = None
97 def add_changeset(self, changeset):
98 """Add CHANGESET to this graph.
100 Determine and record any dependencies to changesets that are
101 already in the graph. This method does not affect the databases."""
103 node = changeset.create_graph_node(self._cvs_item_to_changeset_id)
105 # Now tie the node into our graph. If a changeset referenced by
106 # node is already in our graph, then add the backwards connection
107 # from the other node to the new one. If not, then delete the
108 # changeset from node.
110 for pred_id in list(node.pred_ids):
111 pred_node = self.nodes.get(pred_id)
112 if pred_node is not None:
113 pred_node.succ_ids.add(node.id)
114 else:
115 node.pred_ids.remove(pred_id)
117 for succ_id in list(node.succ_ids):
118 succ_node = self.nodes.get(succ_id)
119 if succ_node is not None:
120 succ_node.pred_ids.add(node.id)
121 else:
122 node.succ_ids.remove(succ_id)
124 self.nodes[node.id] = node
126 def store_changeset(self, changeset):
127 for cvs_item_id in changeset.cvs_item_ids:
128 self._cvs_item_to_changeset_id[cvs_item_id] = changeset.id
129 self._changeset_db.store(changeset)
131 def add_new_changeset(self, changeset):
132 """Add the new CHANGESET to the graph and also to the databases."""
134 if logger.is_on(logger.DEBUG):
135 logger.debug('Adding changeset %r' % (changeset,))
137 self.add_changeset(changeset)
138 self.store_changeset(changeset)
140 def delete_changeset(self, changeset):
141 """Remove CHANGESET from the graph and also from the databases.
143 In fact, we don't remove CHANGESET from
144 self._cvs_item_to_changeset_id, because in practice the CVSItems
145 in CHANGESET are always added again as part of a new CHANGESET,
146 which will cause the old values to be overwritten."""
148 if logger.is_on(logger.DEBUG):
149 logger.debug('Removing changeset %r' % (changeset,))
151 del self[changeset.id]
152 del self._changeset_db[changeset.id]
154 def __nonzero__(self):
155 """Instances are considered True iff they contain any nodes."""
157 return bool(self.nodes)
159 def __contains__(self, id):
160 """Return True if the specified ID is contained in this graph."""
162 return id in self.nodes
164 def __getitem__(self, id):
165 return self.nodes[id]
167 def get(self, id):
168 return self.nodes.get(id)
170 def __delitem__(self, id):
171 """Remove the node corresponding to ID.
173 Also remove references to it from other nodes. This method does
174 not change pred_ids or succ_ids of the node being deleted, nor
175 does it affect the databases."""
177 node = self[id]
179 for succ_id in node.succ_ids:
180 succ = self[succ_id]
181 succ.pred_ids.remove(node.id)
183 for pred_id in node.pred_ids:
184 pred = self[pred_id]
185 pred.succ_ids.remove(node.id)
187 del self.nodes[node.id]
189 def keys(self):
190 return self.nodes.keys()
192 def __iter__(self):
193 return self.nodes.itervalues()
195 def _get_path(self, reachable_changesets, starting_node_id, ending_node_id):
196 """Return the shortest path from ENDING_NODE_ID to STARTING_NODE_ID.
198 Find a path from ENDING_NODE_ID to STARTING_NODE_ID in
199 REACHABLE_CHANGESETS, where STARTING_NODE_ID is the id of a
200 changeset that depends on the changeset with ENDING_NODE_ID. (See
201 the comment in search_for_path() for a description of the format
202 of REACHABLE_CHANGESETS.)
204 Return a list of changesets, where the 0th one has ENDING_NODE_ID
205 and the last one has STARTING_NODE_ID. If there is no such path
206 described in in REACHABLE_CHANGESETS, return None."""
208 if ending_node_id not in reachable_changesets:
209 return None
211 path = [self._changeset_db[ending_node_id]]
212 id = reachable_changesets[ending_node_id][1]
213 while id != starting_node_id:
214 path.append(self._changeset_db[id])
215 id = reachable_changesets[id][1]
216 path.append(self._changeset_db[starting_node_id])
217 return path
219 def search_for_path(self, starting_node_id, stop_set):
220 """Search for paths to prerequisites of STARTING_NODE_ID.
222 Try to find the shortest dependency path that causes the changeset
223 with STARTING_NODE_ID to depend (directly or indirectly) on one of
224 the changesets whose ids are contained in STOP_SET.
226 We consider direct and indirect dependencies in the sense that the
227 changeset can be reached by following a chain of predecessor nodes.
229 When one of the changeset_ids in STOP_SET is found, terminate the
230 search and return the path from that changeset_id to
231 STARTING_NODE_ID. If no path is found to a node in STOP_SET,
232 return None."""
234 # A map {node_id : (steps, next_node_id)} where NODE_ID can be
235 # reached from STARTING_NODE_ID in STEPS steps, and NEXT_NODE_ID
236 # is the id of the previous node in the path. STARTING_NODE_ID is
237 # only included as a key if there is a loop leading back to it.
238 reachable_changesets = {}
240 # A list of (node_id, steps) that still have to be investigated,
241 # and STEPS is the number of steps to get to NODE_ID.
242 open_nodes = [(starting_node_id, 0)]
243 # A breadth-first search:
244 while open_nodes:
245 (id, steps) = open_nodes.pop(0)
246 steps += 1
247 node = self[id]
248 for pred_id in node.pred_ids:
249 # Since the search is breadth-first, we only have to set steps
250 # that don't already exist.
251 if pred_id not in reachable_changesets:
252 reachable_changesets[pred_id] = (steps, id)
253 open_nodes.append((pred_id, steps))
255 # See if we can stop now:
256 if pred_id in stop_set:
257 return self._get_path(
258 reachable_changesets, starting_node_id, pred_id
261 return None
263 def consume_nopred_nodes(self):
264 """Remove and yield changesets in dependency order.
266 Each iteration, this generator yields a (changeset, time_range)
267 tuple for the oldest changeset in the graph that doesn't have any
268 predecessor nodes (i.e., it is ready to be committed). This is
269 continued until there are no more nodes without predecessors
270 (either because the graph has been emptied, or because of cycles
271 in the graph).
273 Among the changesets that are ready to be processed, the earliest
274 one (according to the sorting of the TimeRange class) is yielded
275 each time. (This is the order in which the changesets should be
276 committed.)
278 The graph should not be otherwise altered while this generator is
279 running."""
281 # Find a list of (node,changeset,) where the node has no
282 # predecessors:
283 nopred_nodes = _NoPredNodes(
284 self._changeset_db,
286 node
287 for node in self.nodes.itervalues()
288 if not node.pred_ids
292 while nopred_nodes:
293 (node, changeset,) = nopred_nodes.get()
294 del self[node.id]
295 # See if any successors are now ready for extraction:
296 for succ_id in node.succ_ids:
297 succ = self[succ_id]
298 if not succ.pred_ids:
299 nopred_nodes.add(succ)
300 yield (changeset, node.time_range)
302 def find_cycle(self, starting_node_id):
303 """Find a cycle in the dependency graph and return it.
305 Use STARTING_NODE_ID as the place to start looking. This routine
306 must only be called after all nopred_nodes have been removed.
307 Return the list of changesets that are involved in the cycle
308 (ordered such that cycle[n-1] is a predecessor of cycle[n] and
309 cycle[-1] is a predecessor of cycle[0])."""
311 # Since there are no nopred nodes in the graph, all nodes in the
312 # graph must either be involved in a cycle or depend (directly or
313 # indirectly) on nodes that are in a cycle.
315 # Pick an arbitrary node:
316 node = self[starting_node_id]
318 seen_nodes = [node]
320 # Follow it backwards until a node is seen a second time; then we
321 # have our cycle.
322 while True:
323 # Pick an arbitrary predecessor of node. It must exist, because
324 # there are no nopred nodes:
325 try:
326 node_id = node.pred_ids.__iter__().next()
327 except StopIteration:
328 raise NoPredNodeInGraphException(node)
329 node = self[node_id]
330 try:
331 i = seen_nodes.index(node)
332 except ValueError:
333 seen_nodes.append(node)
334 else:
335 seen_nodes = seen_nodes[i:]
336 seen_nodes.reverse()
337 return [self._changeset_db[node.id] for node in seen_nodes]
339 def consume_graph(self, cycle_breaker=None):
340 """Remove and yield changesets from this graph in dependency order.
342 Each iteration, this generator yields a (changeset, time_range)
343 tuple for the oldest changeset in the graph that doesn't have any
344 predecessor nodes. If CYCLE_BREAKER is specified, then call
345 CYCLE_BREAKER(cycle) whenever a cycle is encountered, where cycle
346 is the list of changesets that are involved in the cycle (ordered
347 such that cycle[n-1] is a predecessor of cycle[n] and cycle[-1] is
348 a predecessor of cycle[0]). CYCLE_BREAKER should break the cycle
349 in place then return.
351 If a cycle is found and CYCLE_BREAKER was not specified, raise
352 CycleInGraphException."""
354 while True:
355 for (changeset, time_range) in self.consume_nopred_nodes():
356 yield (changeset, time_range)
358 # If there are any nodes left in the graph, then there must be
359 # at least one cycle. Find a cycle and process it.
361 # This might raise StopIteration, but that indicates that the
362 # graph has been fully consumed, so we just let the exception
363 # escape.
364 start_node_id = self.nodes.iterkeys().next()
366 cycle = self.find_cycle(start_node_id)
368 if cycle_breaker is not None:
369 cycle_breaker(cycle)
370 else:
371 raise CycleInGraphException(cycle)
373 def __repr__(self):
374 """For convenience only. The format is subject to change at any time."""
376 if self.nodes:
377 return 'ChangesetGraph:\n%s' \
378 % ''.join([' %r\n' % node for node in self])
379 else:
380 return 'ChangesetGraph:\n EMPTY\n'
382 node_colors = {
383 RevisionChangeset : 'lightgreen',
384 OrderedChangeset : 'cyan',
385 BranchChangeset : 'orange',
386 TagChangeset : 'yellow',
389 def output_coarse_dot(self, f):
390 """Output the graph in DOT format to file-like object f.
392 Such a file can be rendered into a visual representation of the
393 graph using tools like graphviz. Include only changesets in the
394 graph, and the dependencies between changesets."""
396 f.write('digraph G {\n')
397 for node in self:
398 f.write(
399 ' C%x [style=filled, fillcolor=%s];\n' % (
400 node.id,
401 self.node_colors[self._changeset_db[node.id].__class__],
404 f.write('\n')
406 for node in self:
407 for succ_id in node.succ_ids:
408 f.write(' C%x -> C%x\n' % (node.id, succ_id,))
409 f.write('\n')
411 f.write('}\n')
413 def output_fine_dot(self, f):
414 """Output the graph in DOT format to file-like object f.
416 Such a file can be rendered into a visual representation of the
417 graph using tools like graphviz. Include all CVSItems and the
418 CVSItem-CVSItem dependencies in the graph. Group the CVSItems
419 into clusters by changeset."""
421 f.write('digraph G {\n')
422 for node in self:
423 f.write(' subgraph cluster_%x {\n' % (node.id,))
424 f.write(' label = "C%x";\n' % (node.id,))
425 changeset = self._changeset_db[node.id]
426 for item_id in changeset.cvs_item_ids:
427 f.write(' I%x;\n' % (item_id,))
428 f.write(' style=filled;\n')
429 f.write(
430 ' fillcolor=%s;\n'
431 % (self.node_colors[self._changeset_db[node.id].__class__],))
432 f.write(' }\n\n')
434 for node in self:
435 changeset = self._changeset_db[node.id]
436 for cvs_item in changeset.iter_cvs_items():
437 for succ_id in cvs_item.get_succ_ids():
438 f.write(' I%x -> I%x;\n' % (cvs_item.id, succ_id,))
440 f.write('\n')
442 f.write('}\n')