mirror of https://github.com/python/cpython
lib2to3.pgen3.driver.load_grammar() now creates a stable cache file
between runs given the same Grammar.txt input regardless of the hash randomization setting.
This commit is contained in:
parent
d61910c598
commit
dd1c638b92
|
@ -106,16 +106,19 @@ class Driver(object):
|
|||
return self.parse_tokens(tokens, debug)
|
||||
|
||||
|
||||
def _generate_pickle_name(gt):
|
||||
head, tail = os.path.splitext(gt)
|
||||
if tail == ".txt":
|
||||
tail = ""
|
||||
return head + tail + ".".join(map(str, sys.version_info)) + ".pickle"
|
||||
|
||||
|
||||
def load_grammar(gt="Grammar.txt", gp=None,
|
||||
save=True, force=False, logger=None):
|
||||
"""Load the grammar (maybe from a pickle)."""
|
||||
if logger is None:
|
||||
logger = logging.getLogger()
|
||||
if gp is None:
|
||||
head, tail = os.path.splitext(gt)
|
||||
if tail == ".txt":
|
||||
tail = ""
|
||||
gp = head + tail + ".".join(map(str, sys.version_info)) + ".pickle"
|
||||
gp = _generate_pickle_name(gt) if gp is None else gp
|
||||
if force or not _newer(gp, gt):
|
||||
logger.info("Generating grammar tables from %s", gt)
|
||||
g = pgen.generate_grammar(gt)
|
||||
|
@ -124,7 +127,7 @@ def load_grammar(gt="Grammar.txt", gp=None,
|
|||
try:
|
||||
g.dump(gp)
|
||||
except OSError as e:
|
||||
logger.info("Writing failed:"+str(e))
|
||||
logger.info("Writing failed: %s", e)
|
||||
else:
|
||||
g = grammar.Grammar()
|
||||
g.load(gp)
|
||||
|
|
|
@ -13,6 +13,7 @@ fallback token code OP, but the parser needs the actual token code.
|
|||
"""
|
||||
|
||||
# Python imports
|
||||
import collections
|
||||
import pickle
|
||||
|
||||
# Local imports
|
||||
|
@ -85,9 +86,21 @@ class Grammar(object):
|
|||
self.start = 256
|
||||
|
||||
def dump(self, filename):
|
||||
"""Dump the grammar tables to a pickle file."""
|
||||
"""Dump the grammar tables to a pickle file.
|
||||
|
||||
dump() recursively changes all dict to OrderedDict, so the pickled file
|
||||
is not exactly the same as what was passed in to dump(). load() uses the
|
||||
pickled file to create the tables, but only changes OrderedDict to dict
|
||||
at the top level; it does not recursively change OrderedDict to dict.
|
||||
So, the loaded tables are different from the original tables that were
|
||||
passed to load() in that some of the OrderedDict (from the pickled file)
|
||||
are not changed back to dict. For parsing, this has no effect on
|
||||
performance because OrderedDict uses dict's __getitem__ with nothing in
|
||||
between.
|
||||
"""
|
||||
with open(filename, "wb") as f:
|
||||
pickle.dump(self.__dict__, f, 2)
|
||||
d = _make_deterministic(self.__dict__)
|
||||
pickle.dump(d, f, 2)
|
||||
|
||||
def load(self, filename):
|
||||
"""Load the grammar tables from a pickle file."""
|
||||
|
@ -124,6 +137,17 @@ class Grammar(object):
|
|||
print("start", self.start)
|
||||
|
||||
|
||||
def _make_deterministic(top):
|
||||
if isinstance(top, dict):
|
||||
return collections.OrderedDict(
|
||||
sorted(((k, _make_deterministic(v)) for k, v in top.items())))
|
||||
if isinstance(top, list):
|
||||
return [_make_deterministic(e) for e in top]
|
||||
if isinstance(top, tuple):
|
||||
return tuple(_make_deterministic(e) for e in top)
|
||||
return top
|
||||
|
||||
|
||||
# Map from operator to number (since tokenize doesn't do this)
|
||||
|
||||
opmap_raw = """
|
||||
|
|
|
@ -39,7 +39,7 @@ class ParserGenerator(object):
|
|||
states = []
|
||||
for state in dfa:
|
||||
arcs = []
|
||||
for label, next in state.arcs.items():
|
||||
for label, next in sorted(state.arcs.items()):
|
||||
arcs.append((self.make_label(c, label), dfa.index(next)))
|
||||
if state.isfinal:
|
||||
arcs.append((0, dfa.index(state)))
|
||||
|
@ -52,7 +52,7 @@ class ParserGenerator(object):
|
|||
def make_first(self, c, name):
|
||||
rawfirst = self.first[name]
|
||||
first = {}
|
||||
for label in rawfirst:
|
||||
for label in sorted(rawfirst):
|
||||
ilabel = self.make_label(c, label)
|
||||
##assert ilabel not in first # XXX failed on <> ... !=
|
||||
first[ilabel] = 1
|
||||
|
@ -192,7 +192,7 @@ class ParserGenerator(object):
|
|||
for label, next in nfastate.arcs:
|
||||
if label is not None:
|
||||
addclosure(next, arcs.setdefault(label, {}))
|
||||
for label, nfaset in arcs.items():
|
||||
for label, nfaset in sorted(arcs.items()):
|
||||
for st in states:
|
||||
if st.nfaset == nfaset:
|
||||
break
|
||||
|
@ -222,7 +222,7 @@ class ParserGenerator(object):
|
|||
print("Dump of DFA for", name)
|
||||
for i, state in enumerate(dfa):
|
||||
print(" State", i, state.isfinal and "(final)" or "")
|
||||
for label, next in state.arcs.items():
|
||||
for label, next in sorted(state.arcs.items()):
|
||||
print(" %s -> %d" % (label, dfa.index(next)))
|
||||
|
||||
def simplify_dfa(self, dfa):
|
||||
|
|
|
@ -11,13 +11,13 @@ from textwrap import dedent
|
|||
|
||||
# Local imports
|
||||
from lib2to3 import pytree, refactor
|
||||
from lib2to3.pgen2 import driver
|
||||
from lib2to3.pgen2 import driver as pgen2_driver
|
||||
|
||||
test_dir = os.path.dirname(__file__)
|
||||
proj_dir = os.path.normpath(os.path.join(test_dir, ".."))
|
||||
grammar_path = os.path.join(test_dir, "..", "Grammar.txt")
|
||||
grammar = driver.load_grammar(grammar_path)
|
||||
driver = driver.Driver(grammar, convert=pytree.convert)
|
||||
grammar = pgen2_driver.load_grammar(grammar_path)
|
||||
driver = pgen2_driver.Driver(grammar, convert=pytree.convert)
|
||||
|
||||
def parse_string(string):
|
||||
return driver.parse_string(reformat(string), debug=True)
|
||||
|
|
|
@ -6,8 +6,6 @@ parts of the grammar we've changed, we also make sure we can parse the
|
|||
test_grammar.py files from both Python 2 and Python 3.
|
||||
"""
|
||||
|
||||
from __future__ import with_statement
|
||||
|
||||
# Testing imports
|
||||
from . import support
|
||||
from .support import driver, test_dir
|
||||
|
@ -15,12 +13,15 @@ from test.support import verbose
|
|||
|
||||
# Python imports
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import unittest
|
||||
import warnings
|
||||
import subprocess
|
||||
|
||||
# Local imports
|
||||
from lib2to3.pgen2 import driver as pgen2_driver
|
||||
from lib2to3.pgen2 import tokenize
|
||||
from ..pgen2.parse import ParseError
|
||||
from lib2to3.pygram import python_symbols as syms
|
||||
|
@ -35,6 +36,71 @@ class TestDriver(support.TestCase):
|
|||
self.assertEqual(t.children[1].children[0].type, syms.print_stmt)
|
||||
|
||||
|
||||
class TestPgen2Caching(support.TestCase):
|
||||
def test_load_grammar_from_txt_file(self):
|
||||
pgen2_driver.load_grammar(support.grammar_path, save=False, force=True)
|
||||
|
||||
def test_load_grammar_from_pickle(self):
|
||||
# Make a copy of the grammar file in a temp directory we are
|
||||
# guaranteed to be able to write to.
|
||||
tmpdir = tempfile.mkdtemp()
|
||||
try:
|
||||
grammar_copy = os.path.join(
|
||||
tmpdir, os.path.basename(support.grammar_path))
|
||||
shutil.copy(support.grammar_path, grammar_copy)
|
||||
pickle_name = pgen2_driver._generate_pickle_name(grammar_copy)
|
||||
|
||||
pgen2_driver.load_grammar(grammar_copy, save=True, force=True)
|
||||
self.assertTrue(os.path.exists(pickle_name))
|
||||
|
||||
os.unlink(grammar_copy) # Only the pickle remains...
|
||||
pgen2_driver.load_grammar(grammar_copy, save=False, force=False)
|
||||
finally:
|
||||
shutil.rmtree(tmpdir)
|
||||
|
||||
@unittest.skipIf(sys.executable is None, 'sys.executable required')
|
||||
def test_load_grammar_from_subprocess(self):
|
||||
tmpdir = tempfile.mkdtemp()
|
||||
tmpsubdir = os.path.join(tmpdir, 'subdir')
|
||||
try:
|
||||
os.mkdir(tmpsubdir)
|
||||
grammar_base = os.path.basename(support.grammar_path)
|
||||
grammar_copy = os.path.join(tmpdir, grammar_base)
|
||||
grammar_sub_copy = os.path.join(tmpsubdir, grammar_base)
|
||||
shutil.copy(support.grammar_path, grammar_copy)
|
||||
shutil.copy(support.grammar_path, grammar_sub_copy)
|
||||
pickle_name = pgen2_driver._generate_pickle_name(grammar_copy)
|
||||
pickle_sub_name = pgen2_driver._generate_pickle_name(
|
||||
grammar_sub_copy)
|
||||
self.assertNotEqual(pickle_name, pickle_sub_name)
|
||||
|
||||
# Generate a pickle file from this process.
|
||||
pgen2_driver.load_grammar(grammar_copy, save=True, force=True)
|
||||
self.assertTrue(os.path.exists(pickle_name))
|
||||
|
||||
# Generate a new pickle file in a subprocess with a most likely
|
||||
# different hash randomization seed.
|
||||
sub_env = dict(os.environ)
|
||||
sub_env['PYTHONHASHSEED'] = 'random'
|
||||
subprocess.check_call(
|
||||
[sys.executable, '-c', """
|
||||
from lib2to3.pgen2 import driver as pgen2_driver
|
||||
pgen2_driver.load_grammar(%r, save=True, force=True)
|
||||
""" % (grammar_sub_copy,)],
|
||||
env=sub_env)
|
||||
self.assertTrue(os.path.exists(pickle_sub_name))
|
||||
|
||||
with open(pickle_name, 'rb') as pickle_f_1, \
|
||||
open(pickle_sub_name, 'rb') as pickle_f_2:
|
||||
self.assertEqual(
|
||||
pickle_f_1.read(), pickle_f_2.read(),
|
||||
msg='Grammar caches generated using different hash seeds'
|
||||
' were not identical.')
|
||||
finally:
|
||||
shutil.rmtree(tmpdir)
|
||||
|
||||
|
||||
|
||||
class GrammarTest(support.TestCase):
|
||||
def validate(self, code):
|
||||
support.parse_string(code)
|
||||
|
|
|
@ -67,6 +67,10 @@ Core and Builtins
|
|||
Library
|
||||
-------
|
||||
|
||||
- lib2to3.pgen3.driver.load_grammar() now creates a stable cache file
|
||||
between runs given the same Grammar.txt input regardless of the hash
|
||||
randomization setting.
|
||||
|
||||
- Issue #27570: Avoid zero-length memcpy() etc calls with null source
|
||||
pointers in the "ctypes" and "array" modules.
|
||||
|
||||
|
|
Loading…
Reference in New Issue