mirror of https://github.com/python/cpython
Attempt to speed up deepfreeze.py (#107887)
* Instead of calling get_identifiers_and_strings(), extract identifiers and strings from pycore_global_strings.h. * Avoid ast.literal_eval(), it's very slow.
This commit is contained in:
parent
39745347f6
commit
a2a4b9f1ec
|
@ -1253,7 +1253,7 @@ regen-frozen: Tools/build/freeze_modules.py $(FROZEN_FILES_IN)
|
|||
.PHONY: regen-deepfreeze
|
||||
regen-deepfreeze: $(DEEPFREEZE_OBJS)
|
||||
|
||||
DEEPFREEZE_DEPS=$(srcdir)/Tools/build/deepfreeze.py $(FREEZE_MODULE_DEPS) $(FROZEN_FILES_OUT)
|
||||
DEEPFREEZE_DEPS=$(srcdir)/Tools/build/deepfreeze.py Include/internal/pycore_global_strings.h $(FREEZE_MODULE_DEPS) $(FROZEN_FILES_OUT)
|
||||
|
||||
# BEGIN: deepfreeze modules
|
||||
Python/deepfreeze/deepfreeze.c: $(DEEPFREEZE_DEPS)
|
||||
|
|
|
@ -6,7 +6,6 @@ On Windows, and in cross-compilation cases, it is executed
|
|||
by Python 3.10, and 3.11 features are not available.
|
||||
"""
|
||||
import argparse
|
||||
import ast
|
||||
import builtins
|
||||
import collections
|
||||
import contextlib
|
||||
|
@ -17,10 +16,10 @@ import types
|
|||
from typing import Dict, FrozenSet, TextIO, Tuple
|
||||
|
||||
import umarshal
|
||||
from generate_global_objects import get_identifiers_and_strings
|
||||
|
||||
ROOT = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
|
||||
|
||||
verbose = False
|
||||
identifiers, strings = get_identifiers_and_strings()
|
||||
|
||||
# This must be kept in sync with opcode.py
|
||||
RESUME = 151
|
||||
|
@ -114,6 +113,7 @@ class Printer:
|
|||
self.hits, self.misses = 0, 0
|
||||
self.finis: list[str] = []
|
||||
self.inits: list[str] = []
|
||||
self.identifiers, self.strings = self.get_identifiers_and_strings()
|
||||
self.write('#include "Python.h"')
|
||||
self.write('#include "internal/pycore_gc.h"')
|
||||
self.write('#include "internal/pycore_code.h"')
|
||||
|
@ -121,6 +121,19 @@ class Printer:
|
|||
self.write('#include "internal/pycore_long.h"')
|
||||
self.write("")
|
||||
|
||||
def get_identifiers_and_strings(self) -> tuple[set[str], dict[str, str]]:
|
||||
filename = os.path.join(ROOT, "Include", "internal", "pycore_global_strings.h")
|
||||
with open(filename) as fp:
|
||||
lines = fp.readlines()
|
||||
identifiers: set[str] = set()
|
||||
strings: dict[str, str] = {}
|
||||
for line in lines:
|
||||
if m := re.search(r"STRUCT_FOR_ID\((\w+)\)", line):
|
||||
identifiers.add(m.group(1))
|
||||
if m := re.search(r'STRUCT_FOR_STR\((\w+), "(.*?)"\)', line):
|
||||
strings[m.group(2)] = m.group(1)
|
||||
return identifiers, strings
|
||||
|
||||
@contextlib.contextmanager
|
||||
def indent(self) -> None:
|
||||
save_level = self.level
|
||||
|
@ -171,9 +184,9 @@ class Printer:
|
|||
return f"& {name}.ob_base.ob_base"
|
||||
|
||||
def generate_unicode(self, name: str, s: str) -> str:
|
||||
if s in strings:
|
||||
return f"&_Py_STR({strings[s]})"
|
||||
if s in identifiers:
|
||||
if s in self.strings:
|
||||
return f"&_Py_STR({self.strings[s]})"
|
||||
if s in self.identifiers:
|
||||
return f"&_Py_ID({s})"
|
||||
if len(s) == 1:
|
||||
c = ord(s)
|
||||
|
@ -441,12 +454,10 @@ def is_frozen_header(source: str) -> bool:
|
|||
|
||||
|
||||
def decode_frozen_data(source: str) -> types.CodeType:
|
||||
lines = source.splitlines()
|
||||
while lines and re.match(FROZEN_DATA_LINE, lines[0]) is None:
|
||||
del lines[0]
|
||||
while lines and re.match(FROZEN_DATA_LINE, lines[-1]) is None:
|
||||
del lines[-1]
|
||||
values: Tuple[int, ...] = ast.literal_eval("".join(lines).strip())
|
||||
values: list[int] = []
|
||||
for line in source.splitlines():
|
||||
if re.match(FROZEN_DATA_LINE, line):
|
||||
values.extend([int(x) for x in line.split(",") if x.strip()])
|
||||
data = bytes(values)
|
||||
return umarshal.loads(data)
|
||||
|
||||
|
|
Loading…
Reference in New Issue