246 lines
9.3 KiB
Python
246 lines
9.3 KiB
Python
|
__all__ = ["TopologicalSorter", "CycleError"]
|
||
|
|
||
|
_NODE_OUT = -1
|
||
|
_NODE_DONE = -2
|
||
|
|
||
|
|
||
|
class _NodeInfo:
|
||
|
__slots__ = "node", "npredecessors", "successors"
|
||
|
|
||
|
def __init__(self, node):
|
||
|
# The node this class is augmenting.
|
||
|
self.node = node
|
||
|
|
||
|
# Number of predecessors, generally >= 0. When this value falls to 0,
|
||
|
# and is returned by get_ready(), this is set to _NODE_OUT and when the
|
||
|
# node is marked done by a call to done(), set to _NODE_DONE.
|
||
|
self.npredecessors = 0
|
||
|
|
||
|
# List of successor nodes. The list can contain duplicated elements as
|
||
|
# long as they're all reflected in the successor's npredecessors attribute).
|
||
|
self.successors = []
|
||
|
|
||
|
|
||
|
class CycleError(ValueError):
|
||
|
"""Subclass of ValueError raised by TopologicalSorterif cycles exist in the graph
|
||
|
|
||
|
If multiple cycles exist, only one undefined choice among them will be reported
|
||
|
and included in the exception. The detected cycle can be accessed via the second
|
||
|
element in the *args* attribute of the exception instance and consists in a list
|
||
|
of nodes, such that each node is, in the graph, an immediate predecessor of the
|
||
|
next node in the list. In the reported list, the first and the last node will be
|
||
|
the same, to make it clear that it is cyclic.
|
||
|
"""
|
||
|
|
||
|
pass
|
||
|
|
||
|
|
||
|
class TopologicalSorter:
|
||
|
"""Provides functionality to topologically sort a graph of hashable nodes"""
|
||
|
|
||
|
def __init__(self, graph=None):
|
||
|
self._node2info = {}
|
||
|
self._ready_nodes = None
|
||
|
self._npassedout = 0
|
||
|
self._nfinished = 0
|
||
|
|
||
|
if graph is not None:
|
||
|
for node, predecessors in graph.items():
|
||
|
self.add(node, *predecessors)
|
||
|
|
||
|
def _get_nodeinfo(self, node):
|
||
|
if (result := self._node2info.get(node)) is None:
|
||
|
self._node2info[node] = result = _NodeInfo(node)
|
||
|
return result
|
||
|
|
||
|
def add(self, node, *predecessors):
|
||
|
"""Add a new node and its predecessors to the graph.
|
||
|
|
||
|
Both the *node* and all elements in *predecessors* must be hashable.
|
||
|
|
||
|
If called multiple times with the same node argument, the set of dependencies
|
||
|
will be the union of all dependencies passed in.
|
||
|
|
||
|
It is possible to add a node with no dependencies (*predecessors* is not provided)
|
||
|
as well as provide a dependency twice. If a node that has not been provided before
|
||
|
is included among *predecessors* it will be automatically added to the graph with
|
||
|
no predecessors of its own.
|
||
|
|
||
|
Raises ValueError if called after "prepare".
|
||
|
"""
|
||
|
if self._ready_nodes is not None:
|
||
|
raise ValueError("Nodes cannot be added after a call to prepare()")
|
||
|
|
||
|
# Create the node -> predecessor edges
|
||
|
nodeinfo = self._get_nodeinfo(node)
|
||
|
nodeinfo.npredecessors += len(predecessors)
|
||
|
|
||
|
# Create the predecessor -> node edges
|
||
|
for pred in predecessors:
|
||
|
pred_info = self._get_nodeinfo(pred)
|
||
|
pred_info.successors.append(node)
|
||
|
|
||
|
def prepare(self):
|
||
|
"""Mark the graph as finished and check for cycles in the graph.
|
||
|
|
||
|
If any cycle is detected, "CycleError" will be raised, but "get_ready" can
|
||
|
still be used to obtain as many nodes as possible until cycles block more
|
||
|
progress. After a call to this function, the graph cannot be modified and
|
||
|
therefore no more nodes can be added using "add".
|
||
|
"""
|
||
|
if self._ready_nodes is not None:
|
||
|
raise ValueError("cannot prepare() more than once")
|
||
|
|
||
|
self._ready_nodes = [
|
||
|
i.node for i in self._node2info.values() if i.npredecessors == 0
|
||
|
]
|
||
|
# ready_nodes is set before we look for cycles on purpose:
|
||
|
# if the user wants to catch the CycleError, that's fine,
|
||
|
# they can continue using the instance to grab as many
|
||
|
# nodes as possible before cycles block more progress
|
||
|
cycle = self._find_cycle()
|
||
|
if cycle:
|
||
|
raise CycleError(f"nodes are in a cycle", cycle)
|
||
|
|
||
|
def get_ready(self):
|
||
|
"""Return a tuple of all the nodes that are ready.
|
||
|
|
||
|
Initially it returns all nodes with no predecessors; once those are marked
|
||
|
as processed by calling "done", further calls will return all new nodes that
|
||
|
have all their predecessors already processed. Once no more progress can be made,
|
||
|
empty tuples are returned.
|
||
|
|
||
|
Raises ValueError if called without calling "prepare" previously.
|
||
|
"""
|
||
|
if self._ready_nodes is None:
|
||
|
raise ValueError("prepare() must be called first")
|
||
|
|
||
|
# Get the nodes that are ready and mark them
|
||
|
result = tuple(self._ready_nodes)
|
||
|
n2i = self._node2info
|
||
|
for node in result:
|
||
|
n2i[node].npredecessors = _NODE_OUT
|
||
|
|
||
|
# Clean the list of nodes that are ready and update
|
||
|
# the counter of nodes that we have returned.
|
||
|
self._ready_nodes.clear()
|
||
|
self._npassedout += len(result)
|
||
|
|
||
|
return result
|
||
|
|
||
|
def is_active(self):
|
||
|
"""Return True if more progress can be made and ``False`` otherwise.
|
||
|
|
||
|
Progress can be made if cycles do not block the resolution and either there
|
||
|
are still nodes ready that haven't yet been returned by "get_ready" or the
|
||
|
number of nodes marked "done" is less than the number that have been returned
|
||
|
by "get_ready".
|
||
|
|
||
|
Raises ValueError if called without calling "prepare" previously.
|
||
|
"""
|
||
|
if self._ready_nodes is None:
|
||
|
raise ValueError("prepare() must be called first")
|
||
|
return self._nfinished < self._npassedout or bool(self._ready_nodes)
|
||
|
|
||
|
def __bool__(self):
|
||
|
return self.is_active()
|
||
|
|
||
|
def done(self, *nodes):
|
||
|
"""Marks a set of nodes returned by "get_ready" as processed.
|
||
|
|
||
|
This method unblocks any successor of each node in *nodes* for being returned
|
||
|
in the future by a a call to "get_ready"
|
||
|
|
||
|
Raises :exec:`ValueError` if any node in *nodes* has already been marked as
|
||
|
processed by a previous call to this method, if a node was not added to the
|
||
|
graph by using "add" or if called without calling "prepare" previously or if
|
||
|
node has not yet been returned by "get_ready".
|
||
|
"""
|
||
|
|
||
|
if self._ready_nodes is None:
|
||
|
raise ValueError("prepare() must be called first")
|
||
|
|
||
|
n2i = self._node2info
|
||
|
|
||
|
for node in nodes:
|
||
|
|
||
|
# Check if we know about this node (it was added previously using add()
|
||
|
if (nodeinfo := n2i.get(node)) is None:
|
||
|
raise ValueError(f"node {node!r} was not added using add()")
|
||
|
|
||
|
# If the node has not being returned (marked as ready) previously, inform the user.
|
||
|
stat = nodeinfo.npredecessors
|
||
|
if stat != _NODE_OUT:
|
||
|
if stat >= 0:
|
||
|
raise ValueError(
|
||
|
f"node {node!r} was not passed out (still not ready)"
|
||
|
)
|
||
|
elif stat == _NODE_DONE:
|
||
|
raise ValueError(f"node {node!r} was already marked done")
|
||
|
else:
|
||
|
assert False, f"node {node!r}: unknown status {stat}"
|
||
|
|
||
|
# Mark the node as processed
|
||
|
nodeinfo.npredecessors = _NODE_DONE
|
||
|
|
||
|
# Go to all the successors and reduce the number of predecessors, collecting all the ones
|
||
|
# that are ready to be returned in the next get_ready() call.
|
||
|
for successor in nodeinfo.successors:
|
||
|
successor_info = n2i[successor]
|
||
|
successor_info.npredecessors -= 1
|
||
|
if successor_info.npredecessors == 0:
|
||
|
self._ready_nodes.append(successor)
|
||
|
self._nfinished += 1
|
||
|
|
||
|
def _find_cycle(self):
|
||
|
n2i = self._node2info
|
||
|
stack = []
|
||
|
itstack = []
|
||
|
seen = set()
|
||
|
node2stacki = {}
|
||
|
|
||
|
for node in n2i:
|
||
|
if node in seen:
|
||
|
continue
|
||
|
|
||
|
while True:
|
||
|
if node in seen:
|
||
|
# If we have seen already the node and is in the
|
||
|
# current stack we have found a cycle.
|
||
|
if node in node2stacki:
|
||
|
return stack[node2stacki[node] :] + [node]
|
||
|
# else go on to get next successor
|
||
|
else:
|
||
|
seen.add(node)
|
||
|
itstack.append(iter(n2i[node].successors).__next__)
|
||
|
node2stacki[node] = len(stack)
|
||
|
stack.append(node)
|
||
|
|
||
|
# Backtrack to the topmost stack entry with
|
||
|
# at least another successor.
|
||
|
while stack:
|
||
|
try:
|
||
|
node = itstack[-1]()
|
||
|
break
|
||
|
except StopIteration:
|
||
|
del node2stacki[stack.pop()]
|
||
|
itstack.pop()
|
||
|
else:
|
||
|
break
|
||
|
return None
|
||
|
|
||
|
def static_order(self):
|
||
|
"""Returns an iterable of nodes in a topological order.
|
||
|
|
||
|
The particular order that is returned may depend on the specific
|
||
|
order in which the items were inserted in the graph.
|
||
|
|
||
|
Using this method does not require to call "prepare" or "done". If any
|
||
|
cycle is detected, :exc:`CycleError` will be raised.
|
||
|
"""
|
||
|
self.prepare()
|
||
|
while self.is_active():
|
||
|
node_group = self.get_ready()
|
||
|
yield from node_group
|
||
|
self.done(*node_group)
|