1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2006-2008 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """The changeset dependency graph."""
20 from cvs2svn_lib
.log
import Log
21 from cvs2svn_lib
.changeset
import RevisionChangeset
22 from cvs2svn_lib
.changeset
import OrderedChangeset
23 from cvs2svn_lib
.changeset
import BranchChangeset
24 from cvs2svn_lib
.changeset
import TagChangeset
27 class CycleInGraphException(Exception):
28 def __init__(self
, cycle
):
31 'Cycle found in graph: %s'
32 % ' -> '.join(map(str, cycle
+ [cycle
[0]])))
35 class NoPredNodeInGraphException(Exception):
36 def __init__(self
, node
):
37 Exception.__init
__(self
, 'Node %s has no predecessors' % (node
,))
41 """Manage changesets that are to be processed.
43 Output the changesets in order by time and changeset type.
45 The implementation of this class is crude: as changesets are added,
46 they are appended to a list. When one is needed, the list is sorted
47 in reverse order and then the last changeset in the list is
48 returned. To reduce the number of sorts that are needed, the class
49 keeps track of whether the list is currently sorted.
51 All this repeated sorting is wasteful and unnecessary. We should
52 instead use a heap to output the changeset order, which would
53 require O(lg N) work per add()/get() rather than O(1) and O(N lg N)
54 as in the current implementation [1]. But: (1) the lame interface
55 of heapq doesn't allow an arbitrary compare function, so we would
56 have to store extra information in the array elements; (2) in
57 practice, the number of items in the list at any time is only a tiny
58 fraction of the total number of changesets; and (3) testing showed
59 that the heapq implementation is no faster than this one (perhaps
60 because of the increased memory usage).
62 [1] According to Objects/listsort.txt in the Python source code, the
63 Python list-sorting code is heavily optimized for arrays that have
64 runs of already-sorted elements, so the current cost of get() is
65 probably closer to O(N) than O(N lg N)."""
67 def __init__(self
, changeset_db
):
68 self
.changeset_db
= changeset_db
69 # A list [(node, changeset,)] of nodes with no predecessors:
74 return len(self
._nodes
)
77 def _compare((node_1
, changeset_1
), (node_2
, changeset_2
)):
78 """Define a (reverse) ordering on self._nodes."""
80 return cmp(node_2
.time_range
, node_1
.time_range
) \
81 or cmp(changeset_2
, changeset_1
)
84 self
._nodes
.append( (node
, self
.changeset_db
[node
.id],) )
88 """Return (node, changeset,) of the smallest node.
90 'Smallest' is defined by self._compare()."""
93 self
._nodes
.sort(self
._compare
)
95 return self
._nodes
.pop()
98 class ChangesetGraph(object):
99 """A graph of changesets and their dependencies."""
101 def __init__(self
, changeset_db
, cvs_item_to_changeset_id
):
102 self
._changeset
_db
= changeset_db
103 self
._cvs
_item
_to
_changeset
_id
= cvs_item_to_changeset_id
104 # A map { id : ChangesetGraphNode }
108 self
._cvs
_item
_to
_changeset
_id
.close()
109 self
._cvs
_item
_to
_changeset
_id
= None
110 self
._changeset
_db
.close()
111 self
._changeset
_db
= None
113 def add_changeset(self
, changeset
):
114 """Add CHANGESET to this graph.
116 Determine and record any dependencies to changesets that are
117 already in the graph. This method does not affect the databases."""
119 node
= changeset
.create_graph_node(self
._cvs
_item
_to
_changeset
_id
)
121 # Now tie the node into our graph. If a changeset referenced by
122 # node is already in our graph, then add the backwards connection
123 # from the other node to the new one. If not, then delete the
124 # changeset from node.
126 for pred_id
in list(node
.pred_ids
):
127 pred_node
= self
.nodes
.get(pred_id
)
128 if pred_node
is not None:
129 pred_node
.succ_ids
.add(node
.id)
131 node
.pred_ids
.remove(pred_id
)
133 for succ_id
in list(node
.succ_ids
):
134 succ_node
= self
.nodes
.get(succ_id
)
135 if succ_node
is not None:
136 succ_node
.pred_ids
.add(node
.id)
138 node
.succ_ids
.remove(succ_id
)
140 self
.nodes
[node
.id] = node
142 def store_changeset(self
, changeset
):
143 for cvs_item_id
in changeset
.cvs_item_ids
:
144 self
._cvs
_item
_to
_changeset
_id
[cvs_item_id
] = changeset
.id
145 self
._changeset
_db
.store(changeset
)
147 def add_new_changeset(self
, changeset
):
148 """Add the new CHANGESET to the graph and also to the databases."""
150 if Log().is_on(Log
.DEBUG
):
151 Log().debug('Adding changeset %r' % (changeset
,))
153 self
.add_changeset(changeset
)
154 self
.store_changeset(changeset
)
156 def delete_changeset(self
, changeset
):
157 """Remove CHANGESET from the graph and also from the databases.
159 In fact, we don't remove CHANGESET from
160 self._cvs_item_to_changeset_id, because in practice the CVSItems
161 in CHANGESET are always added again as part of a new CHANGESET,
162 which will cause the old values to be overwritten."""
164 if Log().is_on(Log
.DEBUG
):
165 Log().debug('Removing changeset %r' % (changeset
,))
167 del self
[changeset
.id]
168 del self
._changeset
_db
[changeset
.id]
170 def __nonzero__(self
):
171 """Instances are considered True iff they contain any nodes."""
173 return bool(self
.nodes
)
175 def __contains__(self
, id):
176 """Return True if the specified ID is contained in this graph."""
178 return id in self
.nodes
180 def __getitem__(self
, id):
181 return self
.nodes
[id]
184 return self
.nodes
.get(id)
186 def __delitem__(self
, id):
187 """Remove the node corresponding to ID.
189 Also remove references to it from other nodes. This method does
190 not change pred_ids or succ_ids of the node being deleted, nor
191 does it affect the databases."""
195 for succ_id
in node
.succ_ids
:
197 succ
.pred_ids
.remove(node
.id)
199 for pred_id
in node
.pred_ids
:
201 pred
.succ_ids
.remove(node
.id)
203 del self
.nodes
[node
.id]
206 return self
.nodes
.keys()
209 return self
.nodes
.itervalues()
211 def _get_path(self
, reachable_changesets
, starting_node_id
, ending_node_id
):
212 """Return the shortest path from ENDING_NODE_ID to STARTING_NODE_ID.
214 Find a path from ENDING_NODE_ID to STARTING_NODE_ID in
215 REACHABLE_CHANGESETS, where STARTING_NODE_ID is the id of a
216 changeset that depends on the changeset with ENDING_NODE_ID. (See
217 the comment in search_for_path() for a description of the format
218 of REACHABLE_CHANGESETS.)
220 Return a list of changesets, where the 0th one has ENDING_NODE_ID
221 and the last one has STARTING_NODE_ID. If there is no such path
222 described in in REACHABLE_CHANGESETS, return None."""
224 if ending_node_id
not in reachable_changesets
:
227 path
= [self
._changeset
_db
[ending_node_id
]]
228 id = reachable_changesets
[ending_node_id
][1]
229 while id != starting_node_id
:
230 path
.append(self
._changeset
_db
[id])
231 id = reachable_changesets
[id][1]
232 path
.append(self
._changeset
_db
[starting_node_id
])
235 def search_for_path(self
, starting_node_id
, stop_set
):
236 """Search for paths to prerequisites of STARTING_NODE_ID.
238 Try to find the shortest dependency path that causes the changeset
239 with STARTING_NODE_ID to depend (directly or indirectly) on one of
240 the changesets whose ids are contained in STOP_SET.
242 We consider direct and indirect dependencies in the sense that the
243 changeset can be reached by following a chain of predecessor nodes.
245 When one of the changeset_ids in STOP_SET is found, terminate the
246 search and return the path from that changeset_id to
247 STARTING_NODE_ID. If no path is found to a node in STOP_SET,
250 # A map {node_id : (steps, next_node_id)} where NODE_ID can be
251 # reached from STARTING_NODE_ID in STEPS steps, and NEXT_NODE_ID
252 # is the id of the previous node in the path. STARTING_NODE_ID is
253 # only included as a key if there is a loop leading back to it.
254 reachable_changesets
= {}
256 # A list of (node_id, steps) that still have to be investigated,
257 # and STEPS is the number of steps to get to NODE_ID.
258 open_nodes
= [(starting_node_id
, 0)]
259 # A breadth-first search:
261 (id, steps
) = open_nodes
.pop(0)
264 for pred_id
in node
.pred_ids
:
265 # Since the search is breadth-first, we only have to set steps
266 # that don't already exist.
267 if pred_id
not in reachable_changesets
:
268 reachable_changesets
[pred_id
] = (steps
, id)
269 open_nodes
.append((pred_id
, steps
))
271 # See if we can stop now:
272 if pred_id
in stop_set
:
273 return self
._get
_path
(
274 reachable_changesets
, starting_node_id
, pred_id
279 def consume_nopred_nodes(self
):
280 """Remove and yield changesets in dependency order.
282 Each iteration, this generator yields a (changeset, time_range)
283 tuple for the oldest changeset in the graph that doesn't have any
284 predecessor nodes (i.e., it is ready to be committed). This is
285 continued until there are no more nodes without predecessors
286 (either because the graph has been emptied, or because of cycles
289 Among the changesets that are ready to be processed, the earliest
290 one (according to the sorting of the TimeRange class) is yielded
291 each time. (This is the order in which the changesets should be
294 The graph should not be otherwise altered while this generator is
297 # Find a list of (node,changeset,) where the node has no
299 nopred_nodes
= _NoPredNodes(self
._changeset
_db
)
300 for node
in self
.nodes
.itervalues():
301 if not node
.pred_ids
:
302 nopred_nodes
.add(node
)
305 (node
, changeset
,) = nopred_nodes
.get()
307 # See if any successors are now ready for extraction:
308 for succ_id
in node
.succ_ids
:
310 if not succ
.pred_ids
:
311 nopred_nodes
.add(succ
)
312 yield (changeset
, node
.time_range
)
314 def find_cycle(self
, starting_node_id
):
315 """Find a cycle in the dependency graph and return it.
317 Use STARTING_NODE_ID as the place to start looking. This routine
318 must only be called after all nopred_nodes have been removed.
319 Return the list of changesets that are involved in the cycle
320 (ordered such that cycle[n-1] is a predecessor of cycle[n] and
321 cycle[-1] is a predecessor of cycle[0])."""
323 # Since there are no nopred nodes in the graph, all nodes in the
324 # graph must either be involved in a cycle or depend (directly or
325 # indirectly) on nodes that are in a cycle.
327 # Pick an arbitrary node:
328 node
= self
[starting_node_id
]
332 # Follow it backwards until a node is seen a second time; then we
335 # Pick an arbitrary predecessor of node. It must exist, because
336 # there are no nopred nodes:
338 node_id
= node
.pred_ids
.__iter
__().next()
339 except StopIteration:
340 raise NoPredNodeInGraphException(node
)
343 i
= seen_nodes
.index(node
)
345 seen_nodes
.append(node
)
347 seen_nodes
= seen_nodes
[i
:]
349 return [self
._changeset
_db
[node
.id] for node
in seen_nodes
]
351 def consume_graph(self
, cycle_breaker
=None):
352 """Remove and yield changesets from this graph in dependency order.
354 Each iteration, this generator yields a (changeset, time_range)
355 tuple for the oldest changeset in the graph that doesn't have any
356 predecessor nodes. If CYCLE_BREAKER is specified, then call
357 CYCLE_BREAKER(cycle) whenever a cycle is encountered, where cycle
358 is the list of changesets that are involved in the cycle (ordered
359 such that cycle[n-1] is a predecessor of cycle[n] and cycle[-1] is
360 a predecessor of cycle[0]). CYCLE_BREAKER should break the cycle
361 in place then return.
363 If a cycle is found and CYCLE_BREAKER was not specified, raise
364 CycleInGraphException."""
367 for (changeset
, time_range
) in self
.consume_nopred_nodes():
368 yield (changeset
, time_range
)
370 # If there are any nodes left in the graph, then there must be
371 # at least one cycle. Find a cycle and process it.
373 # This might raise StopIteration, but that indicates that the
374 # graph has been fully consumed, so we just let the exception
376 start_node_id
= self
.nodes
.iterkeys().next()
378 cycle
= self
.find_cycle(start_node_id
)
380 if cycle_breaker
is not None:
383 raise CycleInGraphException(cycle
)
386 """For convenience only. The format is subject to change at any time."""
389 return 'ChangesetGraph:\n%s' \
390 % ''.join([' %r\n' % node
for node
in self
])
392 return 'ChangesetGraph:\n EMPTY\n'
395 RevisionChangeset
: 'lightgreen',
396 OrderedChangeset
: 'cyan',
397 BranchChangeset
: 'orange',
398 TagChangeset
: 'yellow',
401 def output_coarse_dot(self
, f
):
402 """Output the graph in DOT format to file-like object f.
404 Such a file can be rendered into a visual representation of the
405 graph using tools like graphviz. Include only changesets in the
406 graph, and the dependencies between changesets."""
408 f
.write('digraph G {\n')
411 ' C%x [style=filled, fillcolor=%s];\n' % (
413 self
.node_colors
[self
._changeset
_db
[node
.id].__class
__],
419 for succ_id
in node
.succ_ids
:
420 f
.write(' C%x -> C%x\n' % (node
.id, succ_id
,))
425 def output_fine_dot(self
, f
):
426 """Output the graph in DOT format to file-like object f.
428 Such a file can be rendered into a visual representation of the
429 graph using tools like graphviz. Include all CVSItems and the
430 CVSItem-CVSItem dependencies in the graph. Group the CVSItems
431 into clusters by changeset."""
433 f
.write('digraph G {\n')
435 f
.write(' subgraph cluster_%x {\n' % (node
.id,))
436 f
.write(' label = "C%x";\n' % (node
.id,))
437 changeset
= self
._changeset
_db
[node
.id]
438 for item_id
in changeset
.cvs_item_ids
:
439 f
.write(' I%x;\n' % (item_id
,))
440 f
.write(' style=filled;\n')
443 % (self
.node_colors
[self
._changeset
_db
[node
.id].__class
__],))
447 changeset
= self
._changeset
_db
[node
.id]
448 for cvs_item
in changeset
.iter_cvs_items():
449 for succ_id
in cvs_item
.get_succ_ids():
450 f
.write(' I%x -> I%x;\n' % (cvs_item
.id, succ_id
,))