161 lines
6.4 KiB
Python
161 lines
6.4 KiB
Python
|
import collections
|
||
|
|
||
|
class Grammar:
|
||
|
"""Pgen parsing tables conversion class.
|
||
|
|
||
|
Once initialized, this class supplies the grammar tables for the
|
||
|
parsing engine implemented by parse.py. The parsing engine
|
||
|
accesses the instance variables directly. The class here does not
|
||
|
provide initialization of the tables; several subclasses exist to
|
||
|
do this (see the conv and pgen modules).
|
||
|
|
||
|
The load() method reads the tables from a pickle file, which is
|
||
|
much faster than the other ways offered by subclasses. The pickle
|
||
|
file is written by calling dump() (after loading the grammar
|
||
|
tables using a subclass). The report() method prints a readable
|
||
|
representation of the tables to stdout, for debugging.
|
||
|
|
||
|
The instance variables are as follows:
|
||
|
|
||
|
symbol2number -- a dict mapping symbol names to numbers. Symbol
|
||
|
numbers are always 256 or higher, to distinguish
|
||
|
them from token numbers, which are between 0 and
|
||
|
255 (inclusive).
|
||
|
|
||
|
number2symbol -- a dict mapping numbers to symbol names;
|
||
|
these two are each other's inverse.
|
||
|
|
||
|
states -- a list of DFAs, where each DFA is a list of
|
||
|
states, each state is a list of arcs, and each
|
||
|
arc is a (i, j) pair where i is a label and j is
|
||
|
a state number. The DFA number is the index into
|
||
|
this list. (This name is slightly confusing.)
|
||
|
Final states are represented by a special arc of
|
||
|
the form (0, j) where j is its own state number.
|
||
|
|
||
|
dfas -- a dict mapping symbol numbers to (DFA, first)
|
||
|
pairs, where DFA is an item from the states list
|
||
|
above, and first is a set of tokens that can
|
||
|
begin this grammar rule (represented by a dict
|
||
|
whose values are always 1).
|
||
|
|
||
|
labels -- a list of (x, y) pairs where x is either a token
|
||
|
number or a symbol number, and y is either None
|
||
|
or a string; the strings are keywords. The label
|
||
|
number is the index in this list; label numbers
|
||
|
are used to mark state transitions (arcs) in the
|
||
|
DFAs.
|
||
|
|
||
|
start -- the number of the grammar's start symbol.
|
||
|
|
||
|
keywords -- a dict mapping keyword strings to arc labels.
|
||
|
|
||
|
tokens -- a dict mapping token numbers to arc labels.
|
||
|
|
||
|
"""
|
||
|
|
||
|
def __init__(self):
|
||
|
self.symbol2number = collections.OrderedDict()
|
||
|
self.number2symbol = collections.OrderedDict()
|
||
|
self.states = []
|
||
|
self.dfas = collections.OrderedDict()
|
||
|
self.labels = [(0, "EMPTY")]
|
||
|
self.keywords = collections.OrderedDict()
|
||
|
self.tokens = collections.OrderedDict()
|
||
|
self.symbol2label = collections.OrderedDict()
|
||
|
self.start = 256
|
||
|
|
||
|
def produce_graminit_h(self, writer):
|
||
|
writer("/* Generated by Parser/pgen */\n\n")
|
||
|
for number, symbol in self.number2symbol.items():
|
||
|
writer("#define {} {}\n".format(symbol, number))
|
||
|
|
||
|
def produce_graminit_c(self, writer):
|
||
|
writer("/* Generated by Parser/pgen */\n\n")
|
||
|
|
||
|
writer('#include "pgenheaders.h"\n')
|
||
|
writer('#include "grammar.h"\n')
|
||
|
writer("grammar _PyParser_Grammar;\n")
|
||
|
|
||
|
self.print_dfas(writer)
|
||
|
self.print_labels(writer)
|
||
|
|
||
|
writer("grammar _PyParser_Grammar = {\n")
|
||
|
writer(" {n_dfas},\n".format(n_dfas=len(self.dfas)))
|
||
|
writer(" dfas,\n")
|
||
|
writer(" {{{n_labels}, labels}},\n".format(n_labels=len(self.labels)))
|
||
|
writer(" {start_number}\n".format(start_number=self.start))
|
||
|
writer("};\n")
|
||
|
|
||
|
def print_labels(self, writer):
|
||
|
writer(
|
||
|
"static label labels[{n_labels}] = {{\n".format(n_labels=len(self.labels))
|
||
|
)
|
||
|
for label, name in self.labels:
|
||
|
if name is None:
|
||
|
writer(" {{{label}, 0}},\n".format(label=label))
|
||
|
else:
|
||
|
writer(
|
||
|
' {{{label}, "{label_name}"}},\n'.format(
|
||
|
label=label, label_name=name
|
||
|
)
|
||
|
)
|
||
|
writer("};\n")
|
||
|
|
||
|
def print_dfas(self, writer):
|
||
|
self.print_states(writer)
|
||
|
writer("static dfa dfas[{}] = {{\n".format(len(self.dfas)))
|
||
|
for dfaindex, dfa_elem in enumerate(self.dfas.items()):
|
||
|
symbol, (dfa, first_sets) = dfa_elem
|
||
|
writer(
|
||
|
' {{{dfa_symbol}, "{symbol_name}", '.format(
|
||
|
dfa_symbol=symbol, symbol_name=self.number2symbol[symbol]
|
||
|
)
|
||
|
+ "0, {n_states}, states_{dfa_index},\n".format(
|
||
|
n_states=len(dfa), dfa_index=dfaindex
|
||
|
)
|
||
|
)
|
||
|
writer(' "')
|
||
|
|
||
|
k = [name for label, name in self.labels if label in first_sets]
|
||
|
bitset = bytearray((len(self.labels) >> 3) + 1)
|
||
|
for token in first_sets:
|
||
|
bitset[token >> 3] |= 1 << (token & 7)
|
||
|
for byte in bitset:
|
||
|
writer("\\%03o" % (byte & 0xFF))
|
||
|
writer('"},\n')
|
||
|
writer("};\n")
|
||
|
|
||
|
def print_states(self, write):
|
||
|
for dfaindex, dfa in enumerate(self.states):
|
||
|
self.print_arcs(write, dfaindex, dfa)
|
||
|
write(
|
||
|
"static state states_{dfa_index}[{n_states}] = {{\n".format(
|
||
|
dfa_index=dfaindex, n_states=len(dfa)
|
||
|
)
|
||
|
)
|
||
|
for stateindex, state in enumerate(dfa):
|
||
|
narcs = len(state)
|
||
|
write(
|
||
|
" {{{n_arcs}, arcs_{dfa_index}_{state_index}}},\n".format(
|
||
|
n_arcs=narcs, dfa_index=dfaindex, state_index=stateindex
|
||
|
)
|
||
|
)
|
||
|
write("};\n")
|
||
|
|
||
|
def print_arcs(self, write, dfaindex, states):
|
||
|
for stateindex, state in enumerate(states):
|
||
|
narcs = len(state)
|
||
|
write(
|
||
|
"static arc arcs_{dfa_index}_{state_index}[{n_arcs}] = {{\n".format(
|
||
|
dfa_index=dfaindex, state_index=stateindex, n_arcs=narcs
|
||
|
)
|
||
|
)
|
||
|
for a, b in state:
|
||
|
write(
|
||
|
" {{{from_label}, {to_state}}},\n".format(
|
||
|
from_label=a, to_state=b
|
||
|
)
|
||
|
)
|
||
|
write("};\n")
|