mirror of https://github.com/python/cpython
372 lines
14 KiB
Python
372 lines
14 KiB
Python
"""Classes representing state-machine concepts"""
|
||
|
||
class NFA:
|
||
"""A non deterministic finite automata
|
||
|
||
A non deterministic automata is a form of a finite state
|
||
machine. An NFA's rules are less restrictive than a DFA.
|
||
The NFA rules are:
|
||
|
||
* A transition can be non-deterministic and can result in
|
||
nothing, one, or two or more states.
|
||
|
||
* An epsilon transition consuming empty input is valid.
|
||
Transitions consuming labeled symbols are also permitted.
|
||
|
||
This class assumes that there is only one starting state and one
|
||
accepting (ending) state.
|
||
|
||
Attributes:
|
||
name (str): The name of the rule the NFA is representing.
|
||
start (NFAState): The starting state.
|
||
end (NFAState): The ending state
|
||
"""
|
||
|
||
def __init__(self, start, end):
|
||
self.name = start.rule_name
|
||
self.start = start
|
||
self.end = end
|
||
|
||
def __repr__(self):
|
||
return "NFA(start={}, end={})".format(self.start, self.end)
|
||
|
||
def dump(self, writer=print):
|
||
"""Dump a graphical representation of the NFA"""
|
||
todo = [self.start]
|
||
for i, state in enumerate(todo):
|
||
writer(" State", i, state is self.end and "(final)" or "")
|
||
for arc in state.arcs:
|
||
label = arc.label
|
||
next = arc.target
|
||
if next in todo:
|
||
j = todo.index(next)
|
||
else:
|
||
j = len(todo)
|
||
todo.append(next)
|
||
if label is None:
|
||
writer(" -> %d" % j)
|
||
else:
|
||
writer(" %s -> %d" % (label, j))
|
||
|
||
|
||
class NFAArc:
|
||
"""An arc representing a transition between two NFA states.
|
||
|
||
NFA states can be connected via two ways:
|
||
|
||
* A label transition: An input equal to the label must
|
||
be consumed to perform the transition.
|
||
* An epsilon transition: The transition can be taken without
|
||
consuming any input symbol.
|
||
|
||
Attributes:
|
||
target (NFAState): The ending state of the transition arc.
|
||
label (Optional[str]): The label that must be consumed to make
|
||
the transition. An epsilon transition is represented
|
||
using `None`.
|
||
"""
|
||
|
||
def __init__(self, target, label):
|
||
self.target = target
|
||
self.label = label
|
||
|
||
def __repr__(self):
|
||
return "<%s: %s>" % (self.__class__.__name__, self.label)
|
||
|
||
|
||
class NFAState:
|
||
"""A state of a NFA, non deterministic finite automata.
|
||
|
||
Attributes:
|
||
target (rule_name): The name of the rule used to represent the NFA's
|
||
ending state after a transition.
|
||
arcs (Dict[Optional[str], NFAState]): A mapping representing transitions
|
||
between the current NFA state and another NFA state via following
|
||
a label.
|
||
"""
|
||
|
||
def __init__(self, rule_name):
|
||
self.rule_name = rule_name
|
||
self.arcs = []
|
||
|
||
def add_arc(self, target, label=None):
|
||
"""Add a new arc to connect the state to a target state within the NFA
|
||
|
||
The method adds a new arc to the list of arcs available as transitions
|
||
from the present state. An optional label indicates a named transition
|
||
that consumes an input while the absence of a label represents an epsilon
|
||
transition.
|
||
|
||
Attributes:
|
||
target (NFAState): The end of the transition that the arc represents.
|
||
label (Optional[str]): The label that must be consumed for making
|
||
the transition. If the label is not provided the transition is assumed
|
||
to be an epsilon-transition.
|
||
"""
|
||
assert label is None or isinstance(label, str)
|
||
assert isinstance(target, NFAState)
|
||
self.arcs.append(NFAArc(target, label))
|
||
|
||
def __repr__(self):
|
||
return "<%s: from %s>" % (self.__class__.__name__, self.rule_name)
|
||
|
||
|
||
class DFA:
|
||
"""A deterministic finite automata
|
||
|
||
A deterministic finite automata is a form of a finite state machine
|
||
that obeys the following rules:
|
||
|
||
* Each of the transitions is uniquely determined by
|
||
the source state and input symbol
|
||
* Reading an input symbol is required for each state
|
||
transition (no epsilon transitions).
|
||
|
||
The finite-state machine will accept or reject a string of symbols
|
||
and only produces a unique computation of the automaton for each input
|
||
string. The DFA must have a unique starting state (represented as the first
|
||
element in the list of states) but can have multiple final states.
|
||
|
||
Attributes:
|
||
name (str): The name of the rule the DFA is representing.
|
||
states (List[DFAState]): A collection of DFA states.
|
||
"""
|
||
|
||
def __init__(self, name, states):
|
||
self.name = name
|
||
self.states = states
|
||
|
||
@classmethod
|
||
def from_nfa(cls, nfa):
|
||
"""Constructs a DFA from a NFA using the Rabin–Scott construction algorithm.
|
||
|
||
To simulate the operation of a DFA on a given input string, it's
|
||
necessary to keep track of a single state at any time, or more precisely,
|
||
the state that the automaton will reach after seeing a prefix of the
|
||
input. In contrast, to simulate an NFA, it's necessary to keep track of
|
||
a set of states: all of the states that the automaton could reach after
|
||
seeing the same prefix of the input, according to the nondeterministic
|
||
choices made by the automaton. There are two possible sources of
|
||
non-determinism:
|
||
|
||
1) Multiple (one or more) transitions with the same label
|
||
|
||
'A' +-------+
|
||
+----------->+ State +----------->+
|
||
| | 2 |
|
||
+-------+ +-------+
|
||
| State |
|
||
| 1 | +-------+
|
||
+-------+ | State |
|
||
+----------->+ 3 +----------->+
|
||
'A' +-------+
|
||
|
||
2) Epsilon transitions (transitions that can be taken without consuming any input)
|
||
|
||
+-------+ +-------+
|
||
| State | ε | State |
|
||
| 1 +----------->+ 2 +----------->+
|
||
+-------+ +-------+
|
||
|
||
Looking at the first case above, we can't determine which transition should be
|
||
followed when given an input A. We could choose whether or not to follow the
|
||
transition while in the second case the problem is that we can choose both to
|
||
follow the transition or not doing it. To solve this problem we can imagine that
|
||
we follow all possibilities at the same time and we construct new states from the
|
||
set of all possible reachable states. For every case in the previous example:
|
||
|
||
|
||
1) For multiple transitions with the same label we colapse all of the
|
||
final states under the same one
|
||
|
||
+-------+ +-------+
|
||
| State | 'A' | State |
|
||
| 1 +----------->+ 2-3 +----------->+
|
||
+-------+ +-------+
|
||
|
||
2) For epsilon transitions we collapse all epsilon-reachable states
|
||
into the same one
|
||
|
||
+-------+
|
||
| State |
|
||
| 1-2 +----------->
|
||
+-------+
|
||
|
||
Because the DFA states consist of sets of NFA states, an n-state NFA
|
||
may be converted to a DFA with at most 2**n states. Notice that the
|
||
constructed DFA is not minimal and can be simplified or reduced
|
||
afterwards.
|
||
|
||
Parameters:
|
||
name (NFA): The NFA to transform to DFA.
|
||
"""
|
||
assert isinstance(nfa, NFA)
|
||
|
||
def add_closure(nfa_state, base_nfa_set):
|
||
"""Calculate the epsilon-closure of a given state
|
||
|
||
Add to the *base_nfa_set* all the states that are
|
||
reachable from *nfa_state* via epsilon-transitions.
|
||
"""
|
||
assert isinstance(nfa_state, NFAState)
|
||
if nfa_state in base_nfa_set:
|
||
return
|
||
base_nfa_set.add(nfa_state)
|
||
for nfa_arc in nfa_state.arcs:
|
||
if nfa_arc.label is None:
|
||
add_closure(nfa_arc.target, base_nfa_set)
|
||
|
||
# Calculte the epsilon-closure of the starting state
|
||
base_nfa_set = set()
|
||
add_closure(nfa.start, base_nfa_set)
|
||
|
||
# Start by visiting the NFA starting state (there is only one).
|
||
states = [DFAState(nfa.name, base_nfa_set, nfa.end)]
|
||
|
||
for state in states: # NB states grow while we're iterating
|
||
|
||
# Find transitions from the current state to other reachable states
|
||
# and store them in mapping that correlates the label to all the
|
||
# possible reachable states that can be obtained by consuming a
|
||
# token equal to the label. Each set of all the states that can
|
||
# be reached after following a label will be the a DFA state.
|
||
arcs = {}
|
||
for nfa_state in state.nfa_set:
|
||
for nfa_arc in nfa_state.arcs:
|
||
if nfa_arc.label is not None:
|
||
nfa_set = arcs.setdefault(nfa_arc.label, set())
|
||
# All states that can be reached by epsilon-transitions
|
||
# are also included in the set of reachable states.
|
||
add_closure(nfa_arc.target, nfa_set)
|
||
|
||
# Now create new DFAs by visiting all posible transitions between
|
||
# the current DFA state and the new power-set states (each nfa_set)
|
||
# via the different labels. As the nodes are appended to *states* this
|
||
# is performing a breadth-first search traversal over the power-set of
|
||
# the states of the original NFA.
|
||
for label, nfa_set in sorted(arcs.items()):
|
||
for exisisting_state in states:
|
||
if exisisting_state.nfa_set == nfa_set:
|
||
# The DFA state already exists for this rule.
|
||
next_state = exisisting_state
|
||
break
|
||
else:
|
||
next_state = DFAState(nfa.name, nfa_set, nfa.end)
|
||
states.append(next_state)
|
||
|
||
# Add a transition between the current DFA state and the new
|
||
# DFA state (the power-set state) via the current label.
|
||
state.add_arc(next_state, label)
|
||
|
||
return cls(nfa.name, states)
|
||
|
||
def __iter__(self):
|
||
return iter(self.states)
|
||
|
||
def simplify(self):
|
||
"""Attempt to reduce the number of states of the DFA
|
||
|
||
Transform the DFA into an equivalent DFA that has fewer states. Two
|
||
classes of states can be removed or merged from the original DFA without
|
||
affecting the language it accepts to minimize it:
|
||
|
||
* Unreachable states can not be reached from the initial
|
||
state of the DFA, for any input string.
|
||
* Nondistinguishable states are those that cannot be distinguished
|
||
from one another for any input string.
|
||
|
||
This algorithm does not achieve the optimal fully-reduced solution, but it
|
||
works well enough for the particularities of the Python grammar. The
|
||
algorithm repeatedly looks for two states that have the same set of
|
||
arcs (same labels pointing to the same nodes) and unifies them, until
|
||
things stop changing.
|
||
"""
|
||
changes = True
|
||
while changes:
|
||
changes = False
|
||
for i, state_i in enumerate(self.states):
|
||
for j in range(i + 1, len(self.states)):
|
||
state_j = self.states[j]
|
||
if state_i == state_j:
|
||
del self.states[j]
|
||
for state in self.states:
|
||
state.unifystate(state_j, state_i)
|
||
changes = True
|
||
break
|
||
|
||
def dump(self, writer=print):
|
||
"""Dump a graphical representation of the DFA"""
|
||
for i, state in enumerate(self.states):
|
||
writer(" State", i, state.is_final and "(final)" or "")
|
||
for label, next in sorted(state.arcs.items()):
|
||
writer(" %s -> %d" % (label, self.states.index(next)))
|
||
|
||
|
||
class DFAState(object):
|
||
"""A state of a DFA
|
||
|
||
Attributes:
|
||
rule_name (rule_name): The name of the DFA rule containing the represented state.
|
||
nfa_set (Set[NFAState]): The set of NFA states used to create this state.
|
||
final (bool): True if the state represents an accepting state of the DFA
|
||
containing this state.
|
||
arcs (Dict[label, DFAState]): A mapping representing transitions between
|
||
the current DFA state and another DFA state via following a label.
|
||
"""
|
||
|
||
def __init__(self, rule_name, nfa_set, final):
|
||
assert isinstance(nfa_set, set)
|
||
assert isinstance(next(iter(nfa_set)), NFAState)
|
||
assert isinstance(final, NFAState)
|
||
self.rule_name = rule_name
|
||
self.nfa_set = nfa_set
|
||
self.arcs = {} # map from terminals/nonterminals to DFAState
|
||
self.is_final = final in nfa_set
|
||
|
||
def add_arc(self, target, label):
|
||
"""Add a new arc to the current state.
|
||
|
||
Parameters:
|
||
target (DFAState): The DFA state at the end of the arc.
|
||
label (str): The label respresenting the token that must be consumed
|
||
to perform this transition.
|
||
"""
|
||
assert isinstance(label, str)
|
||
assert label not in self.arcs
|
||
assert isinstance(target, DFAState)
|
||
self.arcs[label] = target
|
||
|
||
def unifystate(self, old, new):
|
||
"""Replace all arcs from the current node to *old* with *new*.
|
||
|
||
Parameters:
|
||
old (DFAState): The DFA state to remove from all existing arcs.
|
||
new (DFAState): The DFA state to replace in all existing arcs.
|
||
"""
|
||
for label, next_ in self.arcs.items():
|
||
if next_ is old:
|
||
self.arcs[label] = new
|
||
|
||
def __eq__(self, other):
|
||
# The nfa_set does not matter for equality
|
||
assert isinstance(other, DFAState)
|
||
if self.is_final != other.is_final:
|
||
return False
|
||
# We cannot just return self.arcs == other.arcs because that
|
||
# would invoke this method recursively if there are any cycles.
|
||
if len(self.arcs) != len(other.arcs):
|
||
return False
|
||
for label, next_ in self.arcs.items():
|
||
if next_ is not other.arcs.get(label):
|
||
return False
|
||
return True
|
||
|
||
__hash__ = None # For Py3 compatibility.
|
||
|
||
def __repr__(self):
|
||
return "<%s: %s is_final=%s>" % (
|
||
self.__class__.__name__,
|
||
self.rule_name,
|
||
self.is_final,
|
||
)
|