diff options
Diffstat (limited to 'cvs2svn_lib/changeset_graph.py')
-rw-r--r-- | cvs2svn_lib/changeset_graph.py | 456 |
1 files changed, 456 insertions, 0 deletions
diff --git a/cvs2svn_lib/changeset_graph.py b/cvs2svn_lib/changeset_graph.py new file mode 100644 index 0000000..64ebf2c --- /dev/null +++ b/cvs2svn_lib/changeset_graph.py @@ -0,0 +1,456 @@ +# (Be in -*- python -*- mode.) +# +# ==================================================================== +# Copyright (c) 2006-2008 CollabNet. All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://subversion.tigris.org/license-1.html. +# If newer versions of this license are posted there, you may use a +# newer version instead, at your option. +# +# This software consists of voluntary contributions made by many +# individuals. For exact contribution history, see the revision +# history and logs, available at http://cvs2svn.tigris.org/. +# ==================================================================== + +"""The changeset dependency graph.""" + + +from cvs2svn_lib.log import Log +from cvs2svn_lib.changeset import RevisionChangeset +from cvs2svn_lib.changeset import OrderedChangeset +from cvs2svn_lib.changeset import BranchChangeset +from cvs2svn_lib.changeset import TagChangeset + + +class CycleInGraphException(Exception): + def __init__(self, cycle): + Exception.__init__( + self, + 'Cycle found in graph: %s' + % ' -> '.join(map(str, cycle + [cycle[0]]))) + + +class NoPredNodeInGraphException(Exception): + def __init__(self, node): + Exception.__init__(self, 'Node %s has no predecessors' % (node,)) + + +class _NoPredNodes: + """Manage changesets that are to be processed. + + Output the changesets in order by time and changeset type. + + The implementation of this class is crude: as changesets are added, + they are appended to a list. When one is needed, the list is sorted + in reverse order and then the last changeset in the list is + returned. To reduce the number of sorts that are needed, the class + keeps track of whether the list is currently sorted. + + All this repeated sorting is wasteful and unnecessary. We should + instead use a heap to output the changeset order, which would + require O(lg N) work per add()/get() rather than O(1) and O(N lg N) + as in the current implementation [1]. But: (1) the lame interface + of heapq doesn't allow an arbitrary compare function, so we would + have to store extra information in the array elements; (2) in + practice, the number of items in the list at any time is only a tiny + fraction of the total number of changesets; and (3) testing showed + that the heapq implementation is no faster than this one (perhaps + because of the increased memory usage). + + [1] According to Objects/listsort.txt in the Python source code, the + Python list-sorting code is heavily optimized for arrays that have + runs of already-sorted elements, so the current cost of get() is + probably closer to O(N) than O(N lg N).""" + + def __init__(self, changeset_db): + self.changeset_db = changeset_db + # A list [(node, changeset,)] of nodes with no predecessors: + self._nodes = [] + self._sorted = True + + def __len__(self): + return len(self._nodes) + + @staticmethod + def _compare((node_1, changeset_1), (node_2, changeset_2)): + """Define a (reverse) ordering on self._nodes.""" + + return cmp(node_2.time_range, node_1.time_range) \ + or cmp(changeset_2, changeset_1) + + def add(self, node): + self._nodes.append( (node, self.changeset_db[node.id],) ) + self._sorted = False + + def get(self): + """Return (node, changeset,) of the smallest node. + + 'Smallest' is defined by self._compare().""" + + if not self._sorted: + self._nodes.sort(self._compare) + self._sorted = True + return self._nodes.pop() + + +class ChangesetGraph(object): + """A graph of changesets and their dependencies.""" + + def __init__(self, changeset_db, cvs_item_to_changeset_id): + self._changeset_db = changeset_db + self._cvs_item_to_changeset_id = cvs_item_to_changeset_id + # A map { id : ChangesetGraphNode } + self.nodes = {} + + def close(self): + self._cvs_item_to_changeset_id.close() + self._cvs_item_to_changeset_id = None + self._changeset_db.close() + self._changeset_db = None + + def add_changeset(self, changeset): + """Add CHANGESET to this graph. + + Determine and record any dependencies to changesets that are + already in the graph. This method does not affect the databases.""" + + node = changeset.create_graph_node(self._cvs_item_to_changeset_id) + + # Now tie the node into our graph. If a changeset referenced by + # node is already in our graph, then add the backwards connection + # from the other node to the new one. If not, then delete the + # changeset from node. + + for pred_id in list(node.pred_ids): + pred_node = self.nodes.get(pred_id) + if pred_node is not None: + pred_node.succ_ids.add(node.id) + else: + node.pred_ids.remove(pred_id) + + for succ_id in list(node.succ_ids): + succ_node = self.nodes.get(succ_id) + if succ_node is not None: + succ_node.pred_ids.add(node.id) + else: + node.succ_ids.remove(succ_id) + + self.nodes[node.id] = node + + def store_changeset(self, changeset): + for cvs_item_id in changeset.cvs_item_ids: + self._cvs_item_to_changeset_id[cvs_item_id] = changeset.id + self._changeset_db.store(changeset) + + def add_new_changeset(self, changeset): + """Add the new CHANGESET to the graph and also to the databases.""" + + if Log().is_on(Log.DEBUG): + Log().debug('Adding changeset %r' % (changeset,)) + + self.add_changeset(changeset) + self.store_changeset(changeset) + + def delete_changeset(self, changeset): + """Remove CHANGESET from the graph and also from the databases. + + In fact, we don't remove CHANGESET from + self._cvs_item_to_changeset_id, because in practice the CVSItems + in CHANGESET are always added again as part of a new CHANGESET, + which will cause the old values to be overwritten.""" + + if Log().is_on(Log.DEBUG): + Log().debug('Removing changeset %r' % (changeset,)) + + del self[changeset.id] + del self._changeset_db[changeset.id] + + def __nonzero__(self): + """Instances are considered True iff they contain any nodes.""" + + return bool(self.nodes) + + def __contains__(self, id): + """Return True if the specified ID is contained in this graph.""" + + return id in self.nodes + + def __getitem__(self, id): + return self.nodes[id] + + def get(self, id): + return self.nodes.get(id) + + def __delitem__(self, id): + """Remove the node corresponding to ID. + + Also remove references to it from other nodes. This method does + not change pred_ids or succ_ids of the node being deleted, nor + does it affect the databases.""" + + node = self[id] + + for succ_id in node.succ_ids: + succ = self[succ_id] + succ.pred_ids.remove(node.id) + + for pred_id in node.pred_ids: + pred = self[pred_id] + pred.succ_ids.remove(node.id) + + del self.nodes[node.id] + + def keys(self): + return self.nodes.keys() + + def __iter__(self): + return self.nodes.itervalues() + + def _get_path(self, reachable_changesets, starting_node_id, ending_node_id): + """Return the shortest path from ENDING_NODE_ID to STARTING_NODE_ID. + + Find a path from ENDING_NODE_ID to STARTING_NODE_ID in + REACHABLE_CHANGESETS, where STARTING_NODE_ID is the id of a + changeset that depends on the changeset with ENDING_NODE_ID. (See + the comment in search_for_path() for a description of the format + of REACHABLE_CHANGESETS.) + + Return a list of changesets, where the 0th one has ENDING_NODE_ID + and the last one has STARTING_NODE_ID. If there is no such path + described in in REACHABLE_CHANGESETS, return None.""" + + if ending_node_id not in reachable_changesets: + return None + + path = [self._changeset_db[ending_node_id]] + id = reachable_changesets[ending_node_id][1] + while id != starting_node_id: + path.append(self._changeset_db[id]) + id = reachable_changesets[id][1] + path.append(self._changeset_db[starting_node_id]) + return path + + def search_for_path(self, starting_node_id, stop_set): + """Search for paths to prerequisites of STARTING_NODE_ID. + + Try to find the shortest dependency path that causes the changeset + with STARTING_NODE_ID to depend (directly or indirectly) on one of + the changesets whose ids are contained in STOP_SET. + + We consider direct and indirect dependencies in the sense that the + changeset can be reached by following a chain of predecessor nodes. + + When one of the changeset_ids in STOP_SET is found, terminate the + search and return the path from that changeset_id to + STARTING_NODE_ID. If no path is found to a node in STOP_SET, + return None.""" + + # A map {node_id : (steps, next_node_id)} where NODE_ID can be + # reached from STARTING_NODE_ID in STEPS steps, and NEXT_NODE_ID + # is the id of the previous node in the path. STARTING_NODE_ID is + # only included as a key if there is a loop leading back to it. + reachable_changesets = {} + + # A list of (node_id, steps) that still have to be investigated, + # and STEPS is the number of steps to get to NODE_ID. + open_nodes = [(starting_node_id, 0)] + # A breadth-first search: + while open_nodes: + (id, steps) = open_nodes.pop(0) + steps += 1 + node = self[id] + for pred_id in node.pred_ids: + # Since the search is breadth-first, we only have to set steps + # that don't already exist. + if pred_id not in reachable_changesets: + reachable_changesets[pred_id] = (steps, id) + open_nodes.append((pred_id, steps)) + + # See if we can stop now: + if pred_id in stop_set: + return self._get_path( + reachable_changesets, starting_node_id, pred_id + ) + + return None + + def consume_nopred_nodes(self): + """Remove and yield changesets in dependency order. + + Each iteration, this generator yields a (changeset, time_range) + tuple for the oldest changeset in the graph that doesn't have any + predecessor nodes (i.e., it is ready to be committed). This is + continued until there are no more nodes without predecessors + (either because the graph has been emptied, or because of cycles + in the graph). + + Among the changesets that are ready to be processed, the earliest + one (according to the sorting of the TimeRange class) is yielded + each time. (This is the order in which the changesets should be + committed.) + + The graph should not be otherwise altered while this generator is + running.""" + + # Find a list of (node,changeset,) where the node has no + # predecessors: + nopred_nodes = _NoPredNodes(self._changeset_db) + for node in self.nodes.itervalues(): + if not node.pred_ids: + nopred_nodes.add(node) + + while nopred_nodes: + (node, changeset,) = nopred_nodes.get() + del self[node.id] + # See if any successors are now ready for extraction: + for succ_id in node.succ_ids: + succ = self[succ_id] + if not succ.pred_ids: + nopred_nodes.add(succ) + yield (changeset, node.time_range) + + def find_cycle(self, starting_node_id): + """Find a cycle in the dependency graph and return it. + + Use STARTING_NODE_ID as the place to start looking. This routine + must only be called after all nopred_nodes have been removed. + Return the list of changesets that are involved in the cycle + (ordered such that cycle[n-1] is a predecessor of cycle[n] and + cycle[-1] is a predecessor of cycle[0]).""" + + # Since there are no nopred nodes in the graph, all nodes in the + # graph must either be involved in a cycle or depend (directly or + # indirectly) on nodes that are in a cycle. + + # Pick an arbitrary node: + node = self[starting_node_id] + + seen_nodes = [node] + + # Follow it backwards until a node is seen a second time; then we + # have our cycle. + while True: + # Pick an arbitrary predecessor of node. It must exist, because + # there are no nopred nodes: + try: + node_id = node.pred_ids.__iter__().next() + except StopIteration: + raise NoPredNodeInGraphException(node) + node = self[node_id] + try: + i = seen_nodes.index(node) + except ValueError: + seen_nodes.append(node) + else: + seen_nodes = seen_nodes[i:] + seen_nodes.reverse() + return [self._changeset_db[node.id] for node in seen_nodes] + + def consume_graph(self, cycle_breaker=None): + """Remove and yield changesets from this graph in dependency order. + + Each iteration, this generator yields a (changeset, time_range) + tuple for the oldest changeset in the graph that doesn't have any + predecessor nodes. If CYCLE_BREAKER is specified, then call + CYCLE_BREAKER(cycle) whenever a cycle is encountered, where cycle + is the list of changesets that are involved in the cycle (ordered + such that cycle[n-1] is a predecessor of cycle[n] and cycle[-1] is + a predecessor of cycle[0]). CYCLE_BREAKER should break the cycle + in place then return. + + If a cycle is found and CYCLE_BREAKER was not specified, raise + CycleInGraphException.""" + + while True: + for (changeset, time_range) in self.consume_nopred_nodes(): + yield (changeset, time_range) + + # If there are any nodes left in the graph, then there must be + # at least one cycle. Find a cycle and process it. + + # This might raise StopIteration, but that indicates that the + # graph has been fully consumed, so we just let the exception + # escape. + start_node_id = self.nodes.iterkeys().next() + + cycle = self.find_cycle(start_node_id) + + if cycle_breaker is not None: + cycle_breaker(cycle) + else: + raise CycleInGraphException(cycle) + + def __repr__(self): + """For convenience only. The format is subject to change at any time.""" + + if self.nodes: + return 'ChangesetGraph:\n%s' \ + % ''.join([' %r\n' % node for node in self]) + else: + return 'ChangesetGraph:\n EMPTY\n' + + node_colors = { + RevisionChangeset : 'lightgreen', + OrderedChangeset : 'cyan', + BranchChangeset : 'orange', + TagChangeset : 'yellow', + } + + def output_coarse_dot(self, f): + """Output the graph in DOT format to file-like object f. + + Such a file can be rendered into a visual representation of the + graph using tools like graphviz. Include only changesets in the + graph, and the dependencies between changesets.""" + + f.write('digraph G {\n') + for node in self: + f.write( + ' C%x [style=filled, fillcolor=%s];\n' % ( + node.id, + self.node_colors[self._changeset_db[node.id].__class__], + ) + ) + f.write('\n') + + for node in self: + for succ_id in node.succ_ids: + f.write(' C%x -> C%x\n' % (node.id, succ_id,)) + f.write('\n') + + f.write('}\n') + + def output_fine_dot(self, f): + """Output the graph in DOT format to file-like object f. + + Such a file can be rendered into a visual representation of the + graph using tools like graphviz. Include all CVSItems and the + CVSItem-CVSItem dependencies in the graph. Group the CVSItems + into clusters by changeset.""" + + f.write('digraph G {\n') + for node in self: + f.write(' subgraph cluster_%x {\n' % (node.id,)) + f.write(' label = "C%x";\n' % (node.id,)) + changeset = self._changeset_db[node.id] + for item_id in changeset.cvs_item_ids: + f.write(' I%x;\n' % (item_id,)) + f.write(' style=filled;\n') + f.write( + ' fillcolor=%s;\n' + % (self.node_colors[self._changeset_db[node.id].__class__],)) + f.write(' }\n\n') + + for node in self: + changeset = self._changeset_db[node.id] + for cvs_item in changeset.iter_cvs_items(): + for succ_id in cvs_item.get_succ_ids(): + f.write(' I%x -> I%x;\n' % (cvs_item.id, succ_id,)) + + f.write('\n') + + f.write('}\n') + + |