[lib2to3] Make grammar pickling faster (#6491)

* Now uses pickle protocol 4

* Doesn't wrap the grammar's `__dict__` in ordered dictionaries anymore as
dictionaries in Python 3.6+ are ordered by default

This still produces deterministic pickles (that hash the same with MD5).
Tested with different PYTHONHASHSEED values.
This commit is contained in:
Łukasz Langa 2018-04-16 17:33:59 -07:00 committed by GitHub
parent 2bea947628
commit 76618061b9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 3 additions and 25 deletions

View File

@ -86,21 +86,9 @@ class Grammar(object):
self.start = 256
def dump(self, filename):
"""Dump the grammar tables to a pickle file.
dump() recursively changes all dict to OrderedDict, so the pickled file
is not exactly the same as what was passed in to dump(). load() uses the
pickled file to create the tables, but only changes OrderedDict to dict
at the top level; it does not recursively change OrderedDict to dict.
So, the loaded tables are different from the original tables that were
passed to load() in that some of the OrderedDict (from the pickled file)
are not changed back to dict. For parsing, this has no effect on
performance because OrderedDict uses dict's __getitem__ with nothing in
between.
"""
"""Dump the grammar tables to a pickle file."""
with open(filename, "wb") as f:
d = _make_deterministic(self.__dict__)
pickle.dump(d, f, 2)
pickle.dump(self.__dict__, f, pickle.HIGHEST_PROTOCOL)
def load(self, filename):
"""Load the grammar tables from a pickle file."""
@ -141,17 +129,6 @@ class Grammar(object):
print("start", self.start)
def _make_deterministic(top):
if isinstance(top, dict):
return collections.OrderedDict(
sorted(((k, _make_deterministic(v)) for k, v in top.items())))
if isinstance(top, list):
return [_make_deterministic(e) for e in top]
if isinstance(top, tuple):
return tuple(_make_deterministic(e) for e in top)
return top
# Map from operator to number (since tokenize doesn't do this)
opmap_raw = """

View File

@ -0,0 +1 @@
lib2to3 now uses pickle protocol 4 for pre-computed grammars.