Clean implementation of Parser/pgen and fix some style issues (GH-12156)
This commit is contained in:
parent
97c288df61
commit
8bc401a55c
|
@ -2,6 +2,7 @@ import argparse
|
||||||
|
|
||||||
from .pgen import ParserGenerator
|
from .pgen import ParserGenerator
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
parser = argparse.ArgumentParser(description="Parser generator main program.")
|
parser = argparse.ArgumentParser(description="Parser generator main program.")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
|
|
|
@ -1,19 +1,8 @@
|
||||||
import collections
|
import collections
|
||||||
|
|
||||||
|
|
||||||
class Grammar:
|
class Grammar:
|
||||||
"""Pgen parsing tables conversion class.
|
"""Pgen parsing tables class.
|
||||||
|
|
||||||
Once initialized, this class supplies the grammar tables for the
|
|
||||||
parsing engine implemented by parse.py. The parsing engine
|
|
||||||
accesses the instance variables directly. The class here does not
|
|
||||||
provide initialization of the tables; several subclasses exist to
|
|
||||||
do this (see the conv and pgen modules).
|
|
||||||
|
|
||||||
The load() method reads the tables from a pickle file, which is
|
|
||||||
much faster than the other ways offered by subclasses. The pickle
|
|
||||||
file is written by calling dump() (after loading the grammar
|
|
||||||
tables using a subclass). The report() method prints a readable
|
|
||||||
representation of the tables to stdout, for debugging.
|
|
||||||
|
|
||||||
The instance variables are as follows:
|
The instance variables are as follows:
|
||||||
|
|
||||||
|
@ -36,8 +25,7 @@ class Grammar:
|
||||||
dfas -- a dict mapping symbol numbers to (DFA, first)
|
dfas -- a dict mapping symbol numbers to (DFA, first)
|
||||||
pairs, where DFA is an item from the states list
|
pairs, where DFA is an item from the states list
|
||||||
above, and first is a set of tokens that can
|
above, and first is a set of tokens that can
|
||||||
begin this grammar rule (represented by a dict
|
begin this grammar rule.
|
||||||
whose values are always 1).
|
|
||||||
|
|
||||||
labels -- a list of (x, y) pairs where x is either a token
|
labels -- a list of (x, y) pairs where x is either a token
|
||||||
number or a symbol number, and y is either None
|
number or a symbol number, and y is either None
|
||||||
|
@ -92,14 +80,12 @@ class Grammar:
|
||||||
"static label labels[{n_labels}] = {{\n".format(n_labels=len(self.labels))
|
"static label labels[{n_labels}] = {{\n".format(n_labels=len(self.labels))
|
||||||
)
|
)
|
||||||
for label, name in self.labels:
|
for label, name in self.labels:
|
||||||
if name is None:
|
label_name = '"{}"'.format(name) if name is not None else 0
|
||||||
writer(" {{{label}, 0}},\n".format(label=label))
|
writer(
|
||||||
else:
|
' {{{label}, {label_name}}},\n'.format(
|
||||||
writer(
|
label=label, label_name=label_name
|
||||||
' {{{label}, "{label_name}"}},\n'.format(
|
|
||||||
label=label, label_name=name
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
)
|
||||||
writer("};\n")
|
writer("};\n")
|
||||||
|
|
||||||
def print_dfas(self, writer):
|
def print_dfas(self, writer):
|
||||||
|
@ -114,10 +100,9 @@ class Grammar:
|
||||||
+ "0, {n_states}, states_{dfa_index},\n".format(
|
+ "0, {n_states}, states_{dfa_index},\n".format(
|
||||||
n_states=len(dfa), dfa_index=dfaindex
|
n_states=len(dfa), dfa_index=dfaindex
|
||||||
)
|
)
|
||||||
|
+ ' "'
|
||||||
)
|
)
|
||||||
writer(' "')
|
|
||||||
|
|
||||||
k = [name for label, name in self.labels if label in first_sets]
|
|
||||||
bitset = bytearray((len(self.labels) >> 3) + 1)
|
bitset = bytearray((len(self.labels) >> 3) + 1)
|
||||||
for token in first_sets:
|
for token in first_sets:
|
||||||
bitset[token >> 3] |= 1 << (token & 7)
|
bitset[token >> 3] |= 1 << (token & 7)
|
||||||
|
|
|
@ -3,6 +3,7 @@ import tokenize # from stdlib
|
||||||
|
|
||||||
from . import grammar, token
|
from . import grammar, token
|
||||||
|
|
||||||
|
|
||||||
class ParserGenerator(object):
|
class ParserGenerator(object):
|
||||||
|
|
||||||
def __init__(self, grammar_file, token_file, stream=None, verbose=False):
|
def __init__(self, grammar_file, token_file, stream=None, verbose=False):
|
||||||
|
@ -183,11 +184,8 @@ class ParserGenerator(object):
|
||||||
dfa = self.make_dfa(a, z)
|
dfa = self.make_dfa(a, z)
|
||||||
if self.verbose:
|
if self.verbose:
|
||||||
self.dump_dfa(name, dfa)
|
self.dump_dfa(name, dfa)
|
||||||
oldlen = len(dfa)
|
|
||||||
self.simplify_dfa(dfa)
|
self.simplify_dfa(dfa)
|
||||||
newlen = len(dfa)
|
|
||||||
dfas[name] = dfa
|
dfas[name] = dfa
|
||||||
#print name, oldlen, newlen
|
|
||||||
if startsymbol is None:
|
if startsymbol is None:
|
||||||
startsymbol = name
|
startsymbol = name
|
||||||
return dfas, startsymbol
|
return dfas, startsymbol
|
||||||
|
@ -355,7 +353,7 @@ class ParserGenerator(object):
|
||||||
if args:
|
if args:
|
||||||
try:
|
try:
|
||||||
msg = msg % args
|
msg = msg % args
|
||||||
except:
|
except Exception:
|
||||||
msg = " ".join([msg] + list(map(str, args)))
|
msg = " ".join([msg] + list(map(str, args)))
|
||||||
raise SyntaxError(msg, (self.filename, self.end[0],
|
raise SyntaxError(msg, (self.filename, self.end[0],
|
||||||
self.end[1], self.line))
|
self.end[1], self.line))
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
import itertools
|
import itertools
|
||||||
|
|
||||||
|
|
||||||
def generate_tokens(tokens):
|
def generate_tokens(tokens):
|
||||||
numbers = itertools.count(0)
|
numbers = itertools.count(0)
|
||||||
for line in tokens:
|
for line in tokens:
|
||||||
|
@ -16,6 +17,7 @@ def generate_tokens(tokens):
|
||||||
yield ('N_TOKENS', next(numbers))
|
yield ('N_TOKENS', next(numbers))
|
||||||
yield ('NT_OFFSET', 256)
|
yield ('NT_OFFSET', 256)
|
||||||
|
|
||||||
|
|
||||||
def generate_opmap(tokens):
|
def generate_opmap(tokens):
|
||||||
for line in tokens:
|
for line in tokens:
|
||||||
line = line.strip()
|
line = line.strip()
|
||||||
|
|
Loading…
Reference in New Issue