Clean implementation of Parser/pgen and fix some style issues (GH-12156)

This commit is contained in:
Pablo Galindo 2019-03-04 07:26:13 +00:00 committed by GitHub
parent 97c288df61
commit 8bc401a55c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 14 additions and 28 deletions

View File

@ -2,6 +2,7 @@ import argparse
from .pgen import ParserGenerator from .pgen import ParserGenerator
def main(): def main():
parser = argparse.ArgumentParser(description="Parser generator main program.") parser = argparse.ArgumentParser(description="Parser generator main program.")
parser.add_argument( parser.add_argument(

View File

@ -1,19 +1,8 @@
import collections import collections
class Grammar: class Grammar:
"""Pgen parsing tables conversion class. """Pgen parsing tables class.
Once initialized, this class supplies the grammar tables for the
parsing engine implemented by parse.py. The parsing engine
accesses the instance variables directly. The class here does not
provide initialization of the tables; several subclasses exist to
do this (see the conv and pgen modules).
The load() method reads the tables from a pickle file, which is
much faster than the other ways offered by subclasses. The pickle
file is written by calling dump() (after loading the grammar
tables using a subclass). The report() method prints a readable
representation of the tables to stdout, for debugging.
The instance variables are as follows: The instance variables are as follows:
@ -36,8 +25,7 @@ class Grammar:
dfas -- a dict mapping symbol numbers to (DFA, first) dfas -- a dict mapping symbol numbers to (DFA, first)
pairs, where DFA is an item from the states list pairs, where DFA is an item from the states list
above, and first is a set of tokens that can above, and first is a set of tokens that can
begin this grammar rule (represented by a dict begin this grammar rule.
whose values are always 1).
labels -- a list of (x, y) pairs where x is either a token labels -- a list of (x, y) pairs where x is either a token
number or a symbol number, and y is either None number or a symbol number, and y is either None
@ -92,14 +80,12 @@ class Grammar:
"static label labels[{n_labels}] = {{\n".format(n_labels=len(self.labels)) "static label labels[{n_labels}] = {{\n".format(n_labels=len(self.labels))
) )
for label, name in self.labels: for label, name in self.labels:
if name is None: label_name = '"{}"'.format(name) if name is not None else 0
writer(" {{{label}, 0}},\n".format(label=label)) writer(
else: ' {{{label}, {label_name}}},\n'.format(
writer( label=label, label_name=label_name
' {{{label}, "{label_name}"}},\n'.format(
label=label, label_name=name
)
) )
)
writer("};\n") writer("};\n")
def print_dfas(self, writer): def print_dfas(self, writer):
@ -114,10 +100,9 @@ class Grammar:
+ "0, {n_states}, states_{dfa_index},\n".format( + "0, {n_states}, states_{dfa_index},\n".format(
n_states=len(dfa), dfa_index=dfaindex n_states=len(dfa), dfa_index=dfaindex
) )
+ ' "'
) )
writer(' "')
k = [name for label, name in self.labels if label in first_sets]
bitset = bytearray((len(self.labels) >> 3) + 1) bitset = bytearray((len(self.labels) >> 3) + 1)
for token in first_sets: for token in first_sets:
bitset[token >> 3] |= 1 << (token & 7) bitset[token >> 3] |= 1 << (token & 7)

View File

@ -3,6 +3,7 @@ import tokenize # from stdlib
from . import grammar, token from . import grammar, token
class ParserGenerator(object): class ParserGenerator(object):
def __init__(self, grammar_file, token_file, stream=None, verbose=False): def __init__(self, grammar_file, token_file, stream=None, verbose=False):
@ -183,11 +184,8 @@ class ParserGenerator(object):
dfa = self.make_dfa(a, z) dfa = self.make_dfa(a, z)
if self.verbose: if self.verbose:
self.dump_dfa(name, dfa) self.dump_dfa(name, dfa)
oldlen = len(dfa)
self.simplify_dfa(dfa) self.simplify_dfa(dfa)
newlen = len(dfa)
dfas[name] = dfa dfas[name] = dfa
#print name, oldlen, newlen
if startsymbol is None: if startsymbol is None:
startsymbol = name startsymbol = name
return dfas, startsymbol return dfas, startsymbol
@ -355,7 +353,7 @@ class ParserGenerator(object):
if args: if args:
try: try:
msg = msg % args msg = msg % args
except: except Exception:
msg = " ".join([msg] + list(map(str, args))) msg = " ".join([msg] + list(map(str, args)))
raise SyntaxError(msg, (self.filename, self.end[0], raise SyntaxError(msg, (self.filename, self.end[0],
self.end[1], self.line)) self.end[1], self.line))

View File

@ -1,5 +1,6 @@
import itertools import itertools
def generate_tokens(tokens): def generate_tokens(tokens):
numbers = itertools.count(0) numbers = itertools.count(0)
for line in tokens: for line in tokens:
@ -16,6 +17,7 @@ def generate_tokens(tokens):
yield ('N_TOKENS', next(numbers)) yield ('N_TOKENS', next(numbers))
yield ('NT_OFFSET', 256) yield ('NT_OFFSET', 256)
def generate_opmap(tokens): def generate_opmap(tokens):
for line in tokens: for line in tokens:
line = line.strip() line = line.strip()