cvs2svn_lib/changeset_graph.py

   1 # (Be in -*- python -*- mode.)
   2 #
   3 # ====================================================================
   4 # Copyright (c) 2006-2008 CollabNet.  All rights reserved.
   5 #
   6 # This software is licensed as described in the file COPYING, which
   7 # you should have received as part of this distribution.  The terms
   8 # are also available at http://subversion.tigris.org/license-1.html.
   9 # If newer versions of this license are posted there, you may use a
  10 # newer version instead, at your option.
  11 #
  12 # This software consists of voluntary contributions made by many
  13 # individuals.  For exact contribution history, see the revision
  14 # history and logs, available at http://cvs2svn.tigris.org/.
  15 # ====================================================================
  16
  17 """The changeset dependency graph."""
  18
  19
  20 import heapq
  21
  22 from cvs2svn_lib.log import logger
  23 from cvs2svn_lib.changeset import RevisionChangeset
  24 from cvs2svn_lib.changeset import OrderedChangeset
  25 from cvs2svn_lib.changeset import BranchChangeset
  26 from cvs2svn_lib.changeset import TagChangeset
  27
  28
  29 class CycleInGraphException(Exception):
  30   def __init__(self, cycle):
  31     Exception.__init__(
  32         self,
  33         'Cycle found in graph: %s'
  34         % ' -> '.join(map(str, cycle + [cycle[0]])))
  35
  36
  37 class NoPredNodeInGraphException(Exception):
  38   def __init__(self, node):
  39     Exception.__init__(self, 'Node %s has no predecessors' % (node,))
  40
  41
  42 class _NoPredNodes(object):
  43   """Manage changesets that are ready to be processed.
  44
  45   Output the changesets in order by time and changeset type."""
  46
  47   def __init__(self, changeset_db, initial_nodes):
  48     """Initialize.
  49
  50     INITIAL_NODES is an iterable over node to add to this object on
  51     initialization."""
  52
  53     self.changeset_db = changeset_db
  54
  55     # A heapified list of (node.time_range, changeset, node) tuples
  56     # that have no predecessors.  These tuples sort in the desired
  57     # commit order:
  58     self._nodes = [
  59       (node.time_range, self.changeset_db[node.id], node)
  60       for node in initial_nodes
  61       ]
  62     heapq.heapify(self._nodes)
  63
  64   def __len__(self):
  65     return len(self._nodes)
  66
  67   def add(self, node):
  68     node = (node.time_range, self.changeset_db[node.id], node)
  69     heapq.heappush(self._nodes, node)
  70
  71   def get(self):
  72     """Return (node, changeset,) of the next node to be committed.
  73
  74     'Smallest' is defined by the ordering of the tuples in
  75     self._nodes; namely, the changeset with the earliest time_range,
  76     with ties broken by comparing the changesets themselves."""
  77
  78     (time_range, changeset, node) = heapq.heappop(self._nodes)
  79     return (node, changeset)
  80
  81
  82 class ChangesetGraph(object):
  83   """A graph of changesets and their dependencies."""
  84
  85   def __init__(self, changeset_db, cvs_item_to_changeset_id):
  86     self._changeset_db = changeset_db
  87     self._cvs_item_to_changeset_id = cvs_item_to_changeset_id
  88     # A map { id : ChangesetGraphNode }
  89     self.nodes = {}
  90
  91   def close(self):
  92     self._cvs_item_to_changeset_id.close()
  93     self._cvs_item_to_changeset_id = None
  94     self._changeset_db.close()
  95     self._changeset_db = None
  96
  97   def add_changeset(self, changeset):
  98     """Add CHANGESET to this graph.
  99
 100     Determine and record any dependencies to changesets that are
 101     already in the graph.  This method does not affect the databases."""
 102
 103     node = changeset.create_graph_node(self._cvs_item_to_changeset_id)
 104
 105     # Now tie the node into our graph.  If a changeset referenced by
 106     # node is already in our graph, then add the backwards connection
 107     # from the other node to the new one.  If not, then delete the
 108     # changeset from node.
 109
 110     for pred_id in list(node.pred_ids):
 111       pred_node = self.nodes.get(pred_id)
 112       if pred_node is not None:
 113         pred_node.succ_ids.add(node.id)
 114       else:
 115         node.pred_ids.remove(pred_id)
 116
 117     for succ_id in list(node.succ_ids):
 118       succ_node = self.nodes.get(succ_id)
 119       if succ_node is not None:
 120         succ_node.pred_ids.add(node.id)
 121       else:
 122         node.succ_ids.remove(succ_id)
 123
 124     self.nodes[node.id] = node
 125
 126   def store_changeset(self, changeset):
 127     for cvs_item_id in changeset.cvs_item_ids:
 128       self._cvs_item_to_changeset_id[cvs_item_id] = changeset.id
 129     self._changeset_db.store(changeset)
 130
 131   def add_new_changeset(self, changeset):
 132     """Add the new CHANGESET to the graph and also to the databases."""
 133
 134     if logger.is_on(logger.DEBUG):
 135       logger.debug('Adding changeset %r' % (changeset,))
 136
 137     self.add_changeset(changeset)
 138     self.store_changeset(changeset)
 139
 140   def delete_changeset(self, changeset):
 141     """Remove CHANGESET from the graph and also from the databases.
 142
 143     In fact, we don't remove CHANGESET from
 144     self._cvs_item_to_changeset_id, because in practice the CVSItems
 145     in CHANGESET are always added again as part of a new CHANGESET,
 146     which will cause the old values to be overwritten."""
 147
 148     if logger.is_on(logger.DEBUG):
 149       logger.debug('Removing changeset %r' % (changeset,))
 150
 151     del self[changeset.id]
 152     del self._changeset_db[changeset.id]
 153
 154   def __nonzero__(self):
 155     """Instances are considered True iff they contain any nodes."""
 156
 157     return bool(self.nodes)
 158
 159   def __contains__(self, id):
 160     """Return True if the specified ID is contained in this graph."""
 161
 162     return id in self.nodes
 163
 164   def __getitem__(self, id):
 165     return self.nodes[id]
 166
 167   def get(self, id):
 168     return self.nodes.get(id)
 169
 170   def __delitem__(self, id):
 171     """Remove the node corresponding to ID.
 172
 173     Also remove references to it from other nodes.  This method does
 174     not change pred_ids or succ_ids of the node being deleted, nor
 175     does it affect the databases."""
 176
 177     node = self[id]
 178
 179     for succ_id in node.succ_ids:
 180       succ = self[succ_id]
 181       succ.pred_ids.remove(node.id)
 182
 183     for pred_id in node.pred_ids:
 184       pred = self[pred_id]
 185       pred.succ_ids.remove(node.id)
 186
 187     del self.nodes[node.id]
 188
 189   def keys(self):
 190     return self.nodes.keys()
 191
 192   def __iter__(self):
 193     return self.nodes.itervalues()
 194
 195   def _get_path(self, reachable_changesets, starting_node_id, ending_node_id):
 196     """Return the shortest path from ENDING_NODE_ID to STARTING_NODE_ID.
 197
 198     Find a path from ENDING_NODE_ID to STARTING_NODE_ID in
 199     REACHABLE_CHANGESETS, where STARTING_NODE_ID is the id of a
 200     changeset that depends on the changeset with ENDING_NODE_ID.  (See
 201     the comment in search_for_path() for a description of the format
 202     of REACHABLE_CHANGESETS.)
 203
 204     Return a list of changesets, where the 0th one has ENDING_NODE_ID
 205     and the last one has STARTING_NODE_ID.  If there is no such path
 206     described in in REACHABLE_CHANGESETS, return None."""
 207
 208     if ending_node_id not in reachable_changesets:
 209       return None
 210
 211     path = [self._changeset_db[ending_node_id]]
 212     id = reachable_changesets[ending_node_id][1]
 213     while id != starting_node_id:
 214       path.append(self._changeset_db[id])
 215       id = reachable_changesets[id][1]
 216     path.append(self._changeset_db[starting_node_id])
 217     return path
 218
 219   def search_for_path(self, starting_node_id, stop_set):
 220     """Search for paths to prerequisites of STARTING_NODE_ID.
 221
 222     Try to find the shortest dependency path that causes the changeset
 223     with STARTING_NODE_ID to depend (directly or indirectly) on one of
 224     the changesets whose ids are contained in STOP_SET.
 225
 226     We consider direct and indirect dependencies in the sense that the
 227     changeset can be reached by following a chain of predecessor nodes.
 228
 229     When one of the changeset_ids in STOP_SET is found, terminate the
 230     search and return the path from that changeset_id to
 231     STARTING_NODE_ID.  If no path is found to a node in STOP_SET,
 232     return None."""
 233
 234     # A map {node_id : (steps, next_node_id)} where NODE_ID can be
 235     # reached from STARTING_NODE_ID in STEPS steps, and NEXT_NODE_ID
 236     # is the id of the previous node in the path.  STARTING_NODE_ID is
 237     # only included as a key if there is a loop leading back to it.
 238     reachable_changesets = {}
 239
 240     # A list of (node_id, steps) that still have to be investigated,
 241     # and STEPS is the number of steps to get to NODE_ID.
 242     open_nodes = [(starting_node_id, 0)]
 243     # A breadth-first search:
 244     while open_nodes:
 245       (id, steps) = open_nodes.pop(0)
 246       steps += 1
 247       node = self[id]
 248       for pred_id in node.pred_ids:
 249         # Since the search is breadth-first, we only have to set steps
 250         # that don't already exist.
 251         if pred_id not in reachable_changesets:
 252           reachable_changesets[pred_id] = (steps, id)
 253           open_nodes.append((pred_id, steps))
 254
 255           # See if we can stop now:
 256           if pred_id in stop_set:
 257             return self._get_path(
 258                 reachable_changesets, starting_node_id, pred_id
 259                 )
 260
 261     return None
 262
 263   def consume_nopred_nodes(self):
 264     """Remove and yield changesets in dependency order.
 265
 266     Each iteration, this generator yields a (changeset, time_range)
 267     tuple for the oldest changeset in the graph that doesn't have any
 268     predecessor nodes (i.e., it is ready to be committed).  This is
 269     continued until there are no more nodes without predecessors
 270     (either because the graph has been emptied, or because of cycles
 271     in the graph).
 272
 273     Among the changesets that are ready to be processed, the earliest
 274     one (according to the sorting of the TimeRange class) is yielded
 275     each time.  (This is the order in which the changesets should be
 276     committed.)
 277
 278     The graph should not be otherwise altered while this generator is
 279     running."""
 280
 281     # Find a list of (node,changeset,) where the node has no
 282     # predecessors:
 283     nopred_nodes = _NoPredNodes(
 284       self._changeset_db,
 285       (
 286           node
 287           for node in self.nodes.itervalues()
 288           if not node.pred_ids
 289           ),
 290       )
 291
 292     while nopred_nodes:
 293       (node, changeset,) = nopred_nodes.get()
 294       del self[node.id]
 295       # See if any successors are now ready for extraction:
 296       for succ_id in node.succ_ids:
 297         succ = self[succ_id]
 298         if not succ.pred_ids:
 299           nopred_nodes.add(succ)
 300       yield (changeset, node.time_range)
 301
 302   def find_cycle(self, starting_node_id):
 303     """Find a cycle in the dependency graph and return it.
 304
 305     Use STARTING_NODE_ID as the place to start looking.  This routine
 306     must only be called after all nopred_nodes have been removed.
 307     Return the list of changesets that are involved in the cycle
 308     (ordered such that cycle[n-1] is a predecessor of cycle[n] and
 309     cycle[-1] is a predecessor of cycle[0])."""
 310
 311     # Since there are no nopred nodes in the graph, all nodes in the
 312     # graph must either be involved in a cycle or depend (directly or
 313     # indirectly) on nodes that are in a cycle.
 314
 315     # Pick an arbitrary node:
 316     node = self[starting_node_id]
 317
 318     seen_nodes = [node]
 319
 320     # Follow it backwards until a node is seen a second time; then we
 321     # have our cycle.
 322     while True:
 323       # Pick an arbitrary predecessor of node.  It must exist, because
 324       # there are no nopred nodes:
 325       try:
 326         node_id = node.pred_ids.__iter__().next()
 327       except StopIteration:
 328         raise NoPredNodeInGraphException(node)
 329       node = self[node_id]
 330       try:
 331         i = seen_nodes.index(node)
 332       except ValueError:
 333         seen_nodes.append(node)
 334       else:
 335         seen_nodes = seen_nodes[i:]
 336         seen_nodes.reverse()
 337         return [self._changeset_db[node.id] for node in seen_nodes]
 338
 339   def consume_graph(self, cycle_breaker=None):
 340     """Remove and yield changesets from this graph in dependency order.
 341
 342     Each iteration, this generator yields a (changeset, time_range)
 343     tuple for the oldest changeset in the graph that doesn't have any
 344     predecessor nodes.  If CYCLE_BREAKER is specified, then call
 345     CYCLE_BREAKER(cycle) whenever a cycle is encountered, where cycle
 346     is the list of changesets that are involved in the cycle (ordered
 347     such that cycle[n-1] is a predecessor of cycle[n] and cycle[-1] is
 348     a predecessor of cycle[0]).  CYCLE_BREAKER should break the cycle
 349     in place then return.
 350
 351     If a cycle is found and CYCLE_BREAKER was not specified, raise
 352     CycleInGraphException."""
 353
 354     while True:
 355       for (changeset, time_range) in self.consume_nopred_nodes():
 356         yield (changeset, time_range)
 357
 358       # If there are any nodes left in the graph, then there must be
 359       # at least one cycle.  Find a cycle and process it.
 360
 361       # This might raise StopIteration, but that indicates that the
 362       # graph has been fully consumed, so we just let the exception
 363       # escape.
 364       start_node_id = self.nodes.iterkeys().next()
 365
 366       cycle = self.find_cycle(start_node_id)
 367
 368       if cycle_breaker is not None:
 369         cycle_breaker(cycle)
 370       else:
 371         raise CycleInGraphException(cycle)
 372
 373   def __repr__(self):
 374     """For convenience only.  The format is subject to change at any time."""
 375
 376     if self.nodes:
 377       return 'ChangesetGraph:\n%s' \
 378              % ''.join(['  %r\n' % node for node in self])
 379     else:
 380       return 'ChangesetGraph:\n  EMPTY\n'
 381
 382   node_colors = {
 383       RevisionChangeset : 'lightgreen',
 384       OrderedChangeset : 'cyan',
 385       BranchChangeset : 'orange',
 386       TagChangeset : 'yellow',
 387       }
 388
 389   def output_coarse_dot(self, f):
 390     """Output the graph in DOT format to file-like object f.
 391
 392     Such a file can be rendered into a visual representation of the
 393     graph using tools like graphviz.  Include only changesets in the
 394     graph, and the dependencies between changesets."""
 395
 396     f.write('digraph G {\n')
 397     for node in self:
 398       f.write(
 399           '  C%x [style=filled, fillcolor=%s];\n' % (
 400               node.id,
 401               self.node_colors[self._changeset_db[node.id].__class__],
 402               )
 403           )
 404     f.write('\n')
 405
 406     for node in self:
 407       for succ_id in node.succ_ids:
 408         f.write('  C%x -> C%x\n' % (node.id, succ_id,))
 409       f.write('\n')
 410
 411     f.write('}\n')
 412
 413   def output_fine_dot(self, f):
 414     """Output the graph in DOT format to file-like object f.
 415
 416     Such a file can be rendered into a visual representation of the
 417     graph using tools like graphviz.  Include all CVSItems and the
 418     CVSItem-CVSItem dependencies in the graph.  Group the CVSItems
 419     into clusters by changeset."""
 420
 421     f.write('digraph G {\n')
 422     for node in self:
 423       f.write('  subgraph cluster_%x {\n' % (node.id,))
 424       f.write('    label = "C%x";\n' % (node.id,))
 425       changeset = self._changeset_db[node.id]
 426       for item_id in changeset.cvs_item_ids:
 427         f.write('    I%x;\n' % (item_id,))
 428       f.write('    style=filled;\n')
 429       f.write(
 430           '    fillcolor=%s;\n'
 431           % (self.node_colors[self._changeset_db[node.id].__class__],))
 432       f.write('  }\n\n')
 433
 434     for node in self:
 435       changeset = self._changeset_db[node.id]
 436       for cvs_item in changeset.iter_cvs_items():
 437         for succ_id in cvs_item.get_succ_ids():
 438           f.write('  I%x -> I%x;\n' % (cvs_item.id, succ_id,))
 439
 440       f.write('\n')
 441
 442     f.write('}\n')
 443
 444