1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2006-2008 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """The changeset dependency graph."""
22 from cvs2svn_lib
.log
import logger
23 from cvs2svn_lib
.changeset
import RevisionChangeset
24 from cvs2svn_lib
.changeset
import OrderedChangeset
25 from cvs2svn_lib
.changeset
import BranchChangeset
26 from cvs2svn_lib
.changeset
import TagChangeset
29 class CycleInGraphException(Exception):
30 def __init__(self
, cycle
):
33 'Cycle found in graph: %s'
34 % ' -> '.join(map(str, cycle
+ [cycle
[0]])))
37 class NoPredNodeInGraphException(Exception):
38 def __init__(self
, node
):
39 Exception.__init
__(self
, 'Node %s has no predecessors' % (node
,))
42 class _NoPredNodes(object):
43 """Manage changesets that are ready to be processed.
45 Output the changesets in order by time and changeset type."""
47 def __init__(self
, changeset_db
, initial_nodes
):
50 INITIAL_NODES is an iterable over node to add to this object on
53 self
.changeset_db
= changeset_db
55 # A heapified list of (node.time_range, changeset, node) tuples
56 # that have no predecessors. These tuples sort in the desired
59 (node
.time_range
, self
.changeset_db
[node
.id], node
)
60 for node
in initial_nodes
62 heapq
.heapify(self
._nodes
)
65 return len(self
._nodes
)
68 node
= (node
.time_range
, self
.changeset_db
[node
.id], node
)
69 heapq
.heappush(self
._nodes
, node
)
72 """Return (node, changeset,) of the next node to be committed.
74 'Smallest' is defined by the ordering of the tuples in
75 self._nodes; namely, the changeset with the earliest time_range,
76 with ties broken by comparing the changesets themselves."""
78 (time_range
, changeset
, node
) = heapq
.heappop(self
._nodes
)
79 return (node
, changeset
)
82 class ChangesetGraph(object):
83 """A graph of changesets and their dependencies."""
85 def __init__(self
, changeset_db
, cvs_item_to_changeset_id
):
86 self
._changeset
_db
= changeset_db
87 self
._cvs
_item
_to
_changeset
_id
= cvs_item_to_changeset_id
88 # A map { id : ChangesetGraphNode }
92 self
._cvs
_item
_to
_changeset
_id
.close()
93 self
._cvs
_item
_to
_changeset
_id
= None
94 self
._changeset
_db
.close()
95 self
._changeset
_db
= None
97 def add_changeset(self
, changeset
):
98 """Add CHANGESET to this graph.
100 Determine and record any dependencies to changesets that are
101 already in the graph. This method does not affect the databases."""
103 node
= changeset
.create_graph_node(self
._cvs
_item
_to
_changeset
_id
)
105 # Now tie the node into our graph. If a changeset referenced by
106 # node is already in our graph, then add the backwards connection
107 # from the other node to the new one. If not, then delete the
108 # changeset from node.
110 for pred_id
in list(node
.pred_ids
):
111 pred_node
= self
.nodes
.get(pred_id
)
112 if pred_node
is not None:
113 pred_node
.succ_ids
.add(node
.id)
115 node
.pred_ids
.remove(pred_id
)
117 for succ_id
in list(node
.succ_ids
):
118 succ_node
= self
.nodes
.get(succ_id
)
119 if succ_node
is not None:
120 succ_node
.pred_ids
.add(node
.id)
122 node
.succ_ids
.remove(succ_id
)
124 self
.nodes
[node
.id] = node
126 def store_changeset(self
, changeset
):
127 for cvs_item_id
in changeset
.cvs_item_ids
:
128 self
._cvs
_item
_to
_changeset
_id
[cvs_item_id
] = changeset
.id
129 self
._changeset
_db
.store(changeset
)
131 def add_new_changeset(self
, changeset
):
132 """Add the new CHANGESET to the graph and also to the databases."""
134 if logger
.is_on(logger
.DEBUG
):
135 logger
.debug('Adding changeset %r' % (changeset
,))
137 self
.add_changeset(changeset
)
138 self
.store_changeset(changeset
)
140 def delete_changeset(self
, changeset
):
141 """Remove CHANGESET from the graph and also from the databases.
143 In fact, we don't remove CHANGESET from
144 self._cvs_item_to_changeset_id, because in practice the CVSItems
145 in CHANGESET are always added again as part of a new CHANGESET,
146 which will cause the old values to be overwritten."""
148 if logger
.is_on(logger
.DEBUG
):
149 logger
.debug('Removing changeset %r' % (changeset
,))
151 del self
[changeset
.id]
152 del self
._changeset
_db
[changeset
.id]
154 def __nonzero__(self
):
155 """Instances are considered True iff they contain any nodes."""
157 return bool(self
.nodes
)
159 def __contains__(self
, id):
160 """Return True if the specified ID is contained in this graph."""
162 return id in self
.nodes
164 def __getitem__(self
, id):
165 return self
.nodes
[id]
168 return self
.nodes
.get(id)
170 def __delitem__(self
, id):
171 """Remove the node corresponding to ID.
173 Also remove references to it from other nodes. This method does
174 not change pred_ids or succ_ids of the node being deleted, nor
175 does it affect the databases."""
179 for succ_id
in node
.succ_ids
:
181 succ
.pred_ids
.remove(node
.id)
183 for pred_id
in node
.pred_ids
:
185 pred
.succ_ids
.remove(node
.id)
187 del self
.nodes
[node
.id]
190 return self
.nodes
.keys()
193 return self
.nodes
.itervalues()
195 def _get_path(self
, reachable_changesets
, starting_node_id
, ending_node_id
):
196 """Return the shortest path from ENDING_NODE_ID to STARTING_NODE_ID.
198 Find a path from ENDING_NODE_ID to STARTING_NODE_ID in
199 REACHABLE_CHANGESETS, where STARTING_NODE_ID is the id of a
200 changeset that depends on the changeset with ENDING_NODE_ID. (See
201 the comment in search_for_path() for a description of the format
202 of REACHABLE_CHANGESETS.)
204 Return a list of changesets, where the 0th one has ENDING_NODE_ID
205 and the last one has STARTING_NODE_ID. If there is no such path
206 described in in REACHABLE_CHANGESETS, return None."""
208 if ending_node_id
not in reachable_changesets
:
211 path
= [self
._changeset
_db
[ending_node_id
]]
212 id = reachable_changesets
[ending_node_id
][1]
213 while id != starting_node_id
:
214 path
.append(self
._changeset
_db
[id])
215 id = reachable_changesets
[id][1]
216 path
.append(self
._changeset
_db
[starting_node_id
])
219 def search_for_path(self
, starting_node_id
, stop_set
):
220 """Search for paths to prerequisites of STARTING_NODE_ID.
222 Try to find the shortest dependency path that causes the changeset
223 with STARTING_NODE_ID to depend (directly or indirectly) on one of
224 the changesets whose ids are contained in STOP_SET.
226 We consider direct and indirect dependencies in the sense that the
227 changeset can be reached by following a chain of predecessor nodes.
229 When one of the changeset_ids in STOP_SET is found, terminate the
230 search and return the path from that changeset_id to
231 STARTING_NODE_ID. If no path is found to a node in STOP_SET,
234 # A map {node_id : (steps, next_node_id)} where NODE_ID can be
235 # reached from STARTING_NODE_ID in STEPS steps, and NEXT_NODE_ID
236 # is the id of the previous node in the path. STARTING_NODE_ID is
237 # only included as a key if there is a loop leading back to it.
238 reachable_changesets
= {}
240 # A list of (node_id, steps) that still have to be investigated,
241 # and STEPS is the number of steps to get to NODE_ID.
242 open_nodes
= [(starting_node_id
, 0)]
243 # A breadth-first search:
245 (id, steps
) = open_nodes
.pop(0)
248 for pred_id
in node
.pred_ids
:
249 # Since the search is breadth-first, we only have to set steps
250 # that don't already exist.
251 if pred_id
not in reachable_changesets
:
252 reachable_changesets
[pred_id
] = (steps
, id)
253 open_nodes
.append((pred_id
, steps
))
255 # See if we can stop now:
256 if pred_id
in stop_set
:
257 return self
._get
_path
(
258 reachable_changesets
, starting_node_id
, pred_id
263 def consume_nopred_nodes(self
):
264 """Remove and yield changesets in dependency order.
266 Each iteration, this generator yields a (changeset, time_range)
267 tuple for the oldest changeset in the graph that doesn't have any
268 predecessor nodes (i.e., it is ready to be committed). This is
269 continued until there are no more nodes without predecessors
270 (either because the graph has been emptied, or because of cycles
273 Among the changesets that are ready to be processed, the earliest
274 one (according to the sorting of the TimeRange class) is yielded
275 each time. (This is the order in which the changesets should be
278 The graph should not be otherwise altered while this generator is
281 # Find a list of (node,changeset,) where the node has no
283 nopred_nodes
= _NoPredNodes(
287 for node
in self
.nodes
.itervalues()
293 (node
, changeset
,) = nopred_nodes
.get()
295 # See if any successors are now ready for extraction:
296 for succ_id
in node
.succ_ids
:
298 if not succ
.pred_ids
:
299 nopred_nodes
.add(succ
)
300 yield (changeset
, node
.time_range
)
302 def find_cycle(self
, starting_node_id
):
303 """Find a cycle in the dependency graph and return it.
305 Use STARTING_NODE_ID as the place to start looking. This routine
306 must only be called after all nopred_nodes have been removed.
307 Return the list of changesets that are involved in the cycle
308 (ordered such that cycle[n-1] is a predecessor of cycle[n] and
309 cycle[-1] is a predecessor of cycle[0])."""
311 # Since there are no nopred nodes in the graph, all nodes in the
312 # graph must either be involved in a cycle or depend (directly or
313 # indirectly) on nodes that are in a cycle.
315 # Pick an arbitrary node:
316 node
= self
[starting_node_id
]
320 # Follow it backwards until a node is seen a second time; then we
323 # Pick an arbitrary predecessor of node. It must exist, because
324 # there are no nopred nodes:
326 node_id
= node
.pred_ids
.__iter
__().next()
327 except StopIteration:
328 raise NoPredNodeInGraphException(node
)
331 i
= seen_nodes
.index(node
)
333 seen_nodes
.append(node
)
335 seen_nodes
= seen_nodes
[i
:]
337 return [self
._changeset
_db
[node
.id] for node
in seen_nodes
]
339 def consume_graph(self
, cycle_breaker
=None):
340 """Remove and yield changesets from this graph in dependency order.
342 Each iteration, this generator yields a (changeset, time_range)
343 tuple for the oldest changeset in the graph that doesn't have any
344 predecessor nodes. If CYCLE_BREAKER is specified, then call
345 CYCLE_BREAKER(cycle) whenever a cycle is encountered, where cycle
346 is the list of changesets that are involved in the cycle (ordered
347 such that cycle[n-1] is a predecessor of cycle[n] and cycle[-1] is
348 a predecessor of cycle[0]). CYCLE_BREAKER should break the cycle
349 in place then return.
351 If a cycle is found and CYCLE_BREAKER was not specified, raise
352 CycleInGraphException."""
355 for (changeset
, time_range
) in self
.consume_nopred_nodes():
356 yield (changeset
, time_range
)
358 # If there are any nodes left in the graph, then there must be
359 # at least one cycle. Find a cycle and process it.
361 # This might raise StopIteration, but that indicates that the
362 # graph has been fully consumed, so we just let the exception
364 start_node_id
= self
.nodes
.iterkeys().next()
366 cycle
= self
.find_cycle(start_node_id
)
368 if cycle_breaker
is not None:
371 raise CycleInGraphException(cycle
)
374 """For convenience only. The format is subject to change at any time."""
377 return 'ChangesetGraph:\n%s' \
378 % ''.join([' %r\n' % node
for node
in self
])
380 return 'ChangesetGraph:\n EMPTY\n'
383 RevisionChangeset
: 'lightgreen',
384 OrderedChangeset
: 'cyan',
385 BranchChangeset
: 'orange',
386 TagChangeset
: 'yellow',
389 def output_coarse_dot(self
, f
):
390 """Output the graph in DOT format to file-like object f.
392 Such a file can be rendered into a visual representation of the
393 graph using tools like graphviz. Include only changesets in the
394 graph, and the dependencies between changesets."""
396 f
.write('digraph G {\n')
399 ' C%x [style=filled, fillcolor=%s];\n' % (
401 self
.node_colors
[self
._changeset
_db
[node
.id].__class
__],
407 for succ_id
in node
.succ_ids
:
408 f
.write(' C%x -> C%x\n' % (node
.id, succ_id
,))
413 def output_fine_dot(self
, f
):
414 """Output the graph in DOT format to file-like object f.
416 Such a file can be rendered into a visual representation of the
417 graph using tools like graphviz. Include all CVSItems and the
418 CVSItem-CVSItem dependencies in the graph. Group the CVSItems
419 into clusters by changeset."""
421 f
.write('digraph G {\n')
423 f
.write(' subgraph cluster_%x {\n' % (node
.id,))
424 f
.write(' label = "C%x";\n' % (node
.id,))
425 changeset
= self
._changeset
_db
[node
.id]
426 for item_id
in changeset
.cvs_item_ids
:
427 f
.write(' I%x;\n' % (item_id
,))
428 f
.write(' style=filled;\n')
431 % (self
.node_colors
[self
._changeset
_db
[node
.id].__class
__],))
435 changeset
= self
._changeset
_db
[node
.id]
436 for cvs_item
in changeset
.iter_cvs_items():
437 for succ_id
in cvs_item
.get_succ_ids():
438 f
.write(' I%x -> I%x;\n' % (cvs_item
.id, succ_id
,))