mirror of
https://github.com/python/cpython.git
synced 2024-12-11 10:50:11 +08:00
[lib2to3] Make grammar pickling faster (#6491)
* Now uses pickle protocol 4 * Doesn't wrap the grammar's `__dict__` in ordered dictionaries anymore as dictionaries in Python 3.6+ are ordered by default This still produces deterministic pickles (that hash the same with MD5). Tested with different PYTHONHASHSEED values.
This commit is contained in:
parent
2bea947628
commit
76618061b9
@ -86,21 +86,9 @@ class Grammar(object):
|
||||
self.start = 256
|
||||
|
||||
def dump(self, filename):
|
||||
"""Dump the grammar tables to a pickle file.
|
||||
|
||||
dump() recursively changes all dict to OrderedDict, so the pickled file
|
||||
is not exactly the same as what was passed in to dump(). load() uses the
|
||||
pickled file to create the tables, but only changes OrderedDict to dict
|
||||
at the top level; it does not recursively change OrderedDict to dict.
|
||||
So, the loaded tables are different from the original tables that were
|
||||
passed to load() in that some of the OrderedDict (from the pickled file)
|
||||
are not changed back to dict. For parsing, this has no effect on
|
||||
performance because OrderedDict uses dict's __getitem__ with nothing in
|
||||
between.
|
||||
"""
|
||||
"""Dump the grammar tables to a pickle file."""
|
||||
with open(filename, "wb") as f:
|
||||
d = _make_deterministic(self.__dict__)
|
||||
pickle.dump(d, f, 2)
|
||||
pickle.dump(self.__dict__, f, pickle.HIGHEST_PROTOCOL)
|
||||
|
||||
def load(self, filename):
|
||||
"""Load the grammar tables from a pickle file."""
|
||||
@ -141,17 +129,6 @@ class Grammar(object):
|
||||
print("start", self.start)
|
||||
|
||||
|
||||
def _make_deterministic(top):
|
||||
if isinstance(top, dict):
|
||||
return collections.OrderedDict(
|
||||
sorted(((k, _make_deterministic(v)) for k, v in top.items())))
|
||||
if isinstance(top, list):
|
||||
return [_make_deterministic(e) for e in top]
|
||||
if isinstance(top, tuple):
|
||||
return tuple(_make_deterministic(e) for e in top)
|
||||
return top
|
||||
|
||||
|
||||
# Map from operator to number (since tokenize doesn't do this)
|
||||
|
||||
opmap_raw = """
|
||||
|
@ -0,0 +1 @@
|
||||
lib2to3 now uses pickle protocol 4 for pre-computed grammars.
|
Loading…
Reference in New Issue
Block a user