bpo-45696: Deep-freeze selected modules (GH-29118)

This gains 10% or more in startup time for `python -c pass` on UNIX-ish systems.

The Makefile.pre.in generating code builds on Eric's work for bpo-45020, but the .c file generator is new.

Windows version TBD.
This commit is contained in:
Guido van Rossum 2021-11-10 18:01:53 -08:00 committed by GitHub
parent fc9b622819
commit 1cbaa505d0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 808 additions and 56 deletions

1
.gitattributes vendored
View File

@ -46,6 +46,7 @@ Modules/clinic/*.h linguist-generated=true
Objects/clinic/*.h linguist-generated=true
PC/clinic/*.h linguist-generated=true
Python/clinic/*.h linguist-generated=true
Python/deepfreeze/*.c linguist-generated=true
Python/frozen_modules/*.h linguist-generated=true
Python/frozen_modules/MANIFEST linguist-generated=true
Include/internal/pycore_ast.h linguist-generated=true

2
.gitignore vendored
View File

@ -59,6 +59,7 @@ Lib/distutils/command/*.pdb
Lib/lib2to3/*.pickle
Lib/test/data/*
!Lib/test/data/README
/_bootstrap_python
/Makefile
/Makefile.pre
Mac/Makefile
@ -115,6 +116,7 @@ Tools/unicode/data/
/platform
/profile-clean-stamp
/profile-run-stamp
/Python/deepfreeze/*.c
/pybuilddir.txt
/pyconfig.h
/python-config

View File

@ -1087,7 +1087,9 @@ size, we show only how this table can be read with :mod:`ctypes`::
>>> class struct_frozen(Structure):
... _fields_ = [("name", c_char_p),
... ("code", POINTER(c_ubyte)),
... ("size", c_int)]
... ("size", c_int),
... ("get_code", POINTER(c_ubyte)), # Function pointer
... ]
...
>>>

View File

@ -32,6 +32,7 @@ struct _frozen {
const char *name; /* ASCII encoded string */
const unsigned char *code;
int size;
PyObject *(*get_code)(void);
};
/* Embedding apps may change this pointer to point to their favorite

View File

@ -53,7 +53,9 @@ class PythonValuesTestCase(unittest.TestCase):
class struct_frozen(Structure):
_fields_ = [("name", c_char_p),
("code", POINTER(c_ubyte)),
("size", c_int)]
("size", c_int),
("get_code", POINTER(c_ubyte)), # Function ptr
]
FrozenTable = POINTER(struct_frozen)
modules = []

View File

@ -259,6 +259,7 @@ LIBOBJS= @LIBOBJS@
PYTHON= python$(EXE)
BUILDPYTHON= python$(BUILDEXE)
BOOTSTRAP= _bootstrap_python
PYTHON_FOR_REGEN?=@PYTHON_FOR_REGEN@
UPDATE_FILE=$(PYTHON_FOR_REGEN) $(srcdir)/Tools/scripts/update_file.py
@ -448,6 +449,30 @@ OBJECT_OBJS= \
Objects/unionobject.o \
Objects/weakrefobject.o
# DEEPFREEZE_OBJS is auto-generated by Tools/scripts/freeze_modules.py.
DEEPFREEZE_OBJS = \
Python/deepfreeze/importlib._bootstrap.o \
Python/deepfreeze/importlib._bootstrap_external.o \
Python/deepfreeze/zipimport.o \
Python/deepfreeze/abc.o \
Python/deepfreeze/codecs.o \
Python/deepfreeze/io.o \
Python/deepfreeze/_collections_abc.o \
Python/deepfreeze/_sitebuiltins.o \
Python/deepfreeze/genericpath.o \
Python/deepfreeze/ntpath.o \
Python/deepfreeze/posixpath.o \
Python/deepfreeze/os.o \
Python/deepfreeze/site.o \
Python/deepfreeze/stat.o \
Python/deepfreeze/__hello__.o \
Python/deepfreeze/__phello__.o \
Python/deepfreeze/__phello__.ham.o \
Python/deepfreeze/__phello__.ham.eggs.o \
Python/deepfreeze/__phello__.spam.o \
Python/deepfreeze/frozen_only.o
# End DEEPFREEZE_OBJS
##########################################################################
# objects that get linked into the Python library
LIBRARY_OBJS_OMIT_FROZEN= \
@ -460,6 +485,7 @@ LIBRARY_OBJS_OMIT_FROZEN= \
LIBRARY_OBJS= \
$(LIBRARY_OBJS_OMIT_FROZEN) \
$(DEEPFREEZE_OBJS) \
Python/frozen.o
##########################################################################
@ -602,9 +628,9 @@ platform: $(BUILDPYTHON) pybuilddir.txt
# problems by creating a dummy pybuilddir.txt just to allow interpreter
# initialization to succeed. It will be overwritten by generate-posix-vars
# or removed in case of failure.
pybuilddir.txt: $(BUILDPYTHON)
pybuilddir.txt: $(BOOTSTRAP)
@echo "none" > ./pybuilddir.txt
$(RUNSHARED) $(PYTHON_FOR_BUILD) -S -m sysconfig --generate-posix-vars ;\
./$(BOOTSTRAP) -S -m sysconfig --generate-posix-vars ;\
if test $$? -ne 0 ; then \
echo "generate-posix-vars failed" ; \
rm -f ./pybuilddir.txt ; \
@ -738,6 +764,158 @@ regen-test-frozenmain: $(BUILDPYTHON)
Programs/_testembed: Programs/_testembed.o $(LIBRARY_DEPS)
$(LINKCC) $(PY_CORE_LDFLAGS) $(LINKFORSHARED) -o $@ Programs/_testembed.o $(BLDLIBRARY) $(LIBS) $(MODLIBS) $(SYSLIBS)
############################################################################
# "Bootstrap Python" used to run deepfreeze.py
BOOTSTRAP_HEADERS = \
Python/frozen_modules/importlib._bootstrap.h \
Python/frozen_modules/importlib._bootstrap_external.h \
Python/frozen_modules/zipimport.h
Python/bootstrap_frozen.o: Python/bootstrap_frozen.c Include/cpython/import.h $(BOOTSTRAP_HEADERS)
$(BOOTSTRAP): $(LIBRARY_OBJS_OMIT_FROZEN) \
Python/bootstrap_frozen.o Programs/python.o
$(LINKCC) $(PY_CORE_LDFLAGS) -o $@ $(LIBRARY_OBJS_OMIT_FROZEN) \
Python/bootstrap_frozen.o \
Programs/python.o \
$(LIBS) $(MODLIBS) $(SYSLIBS)
############################################################################
# Deepfreeze targets
.PHONY: regen-deepfreeze
regen-deepfreeze: $(DEEPFREEZE_OBJS)
DEEPFREEZE_DEPS = \
$(BOOTSTRAP) \
pybuilddir.txt \
$(srcdir)/Tools/scripts/deepfreeze.py
# BEGIN: deepfreeze modules
Python/deepfreeze/importlib._bootstrap.c: $(srcdir)/Lib/importlib/_bootstrap.py $(DEEPFREEZE_DEPS)
@echo "Deepfreezing Python/deepfreeze/importlib._bootstrap.c from Lib/importlib/_bootstrap.py"
@./$(BOOTSTRAP) \
$(srcdir)/Tools/scripts/deepfreeze.py \
$(srcdir)/Lib/importlib/_bootstrap.py -m importlib._bootstrap -o Python/deepfreeze/importlib._bootstrap.c
Python/deepfreeze/importlib._bootstrap_external.c: $(srcdir)/Lib/importlib/_bootstrap_external.py $(DEEPFREEZE_DEPS)
@echo "Deepfreezing Python/deepfreeze/importlib._bootstrap_external.c from Lib/importlib/_bootstrap_external.py"
@./$(BOOTSTRAP) \
$(srcdir)/Tools/scripts/deepfreeze.py \
$(srcdir)/Lib/importlib/_bootstrap_external.py -m importlib._bootstrap_external -o Python/deepfreeze/importlib._bootstrap_external.c
Python/deepfreeze/zipimport.c: $(srcdir)/Lib/zipimport.py $(DEEPFREEZE_DEPS)
@echo "Deepfreezing Python/deepfreeze/zipimport.c from Lib/zipimport.py"
@./$(BOOTSTRAP) \
$(srcdir)/Tools/scripts/deepfreeze.py \
$(srcdir)/Lib/zipimport.py -m zipimport -o Python/deepfreeze/zipimport.c
Python/deepfreeze/abc.c: $(srcdir)/Lib/abc.py $(DEEPFREEZE_DEPS)
@echo "Deepfreezing Python/deepfreeze/abc.c from Lib/abc.py"
@./$(BOOTSTRAP) \
$(srcdir)/Tools/scripts/deepfreeze.py \
$(srcdir)/Lib/abc.py -m abc -o Python/deepfreeze/abc.c
Python/deepfreeze/codecs.c: $(srcdir)/Lib/codecs.py $(DEEPFREEZE_DEPS)
@echo "Deepfreezing Python/deepfreeze/codecs.c from Lib/codecs.py"
@./$(BOOTSTRAP) \
$(srcdir)/Tools/scripts/deepfreeze.py \
$(srcdir)/Lib/codecs.py -m codecs -o Python/deepfreeze/codecs.c
Python/deepfreeze/io.c: $(srcdir)/Lib/io.py $(DEEPFREEZE_DEPS)
@echo "Deepfreezing Python/deepfreeze/io.c from Lib/io.py"
@./$(BOOTSTRAP) \
$(srcdir)/Tools/scripts/deepfreeze.py \
$(srcdir)/Lib/io.py -m io -o Python/deepfreeze/io.c
Python/deepfreeze/_collections_abc.c: $(srcdir)/Lib/_collections_abc.py $(DEEPFREEZE_DEPS)
@echo "Deepfreezing Python/deepfreeze/_collections_abc.c from Lib/_collections_abc.py"
@./$(BOOTSTRAP) \
$(srcdir)/Tools/scripts/deepfreeze.py \
$(srcdir)/Lib/_collections_abc.py -m _collections_abc -o Python/deepfreeze/_collections_abc.c
Python/deepfreeze/_sitebuiltins.c: $(srcdir)/Lib/_sitebuiltins.py $(DEEPFREEZE_DEPS)
@echo "Deepfreezing Python/deepfreeze/_sitebuiltins.c from Lib/_sitebuiltins.py"
@./$(BOOTSTRAP) \
$(srcdir)/Tools/scripts/deepfreeze.py \
$(srcdir)/Lib/_sitebuiltins.py -m _sitebuiltins -o Python/deepfreeze/_sitebuiltins.c
Python/deepfreeze/genericpath.c: $(srcdir)/Lib/genericpath.py $(DEEPFREEZE_DEPS)
@echo "Deepfreezing Python/deepfreeze/genericpath.c from Lib/genericpath.py"
@./$(BOOTSTRAP) \
$(srcdir)/Tools/scripts/deepfreeze.py \
$(srcdir)/Lib/genericpath.py -m genericpath -o Python/deepfreeze/genericpath.c
Python/deepfreeze/ntpath.c: $(srcdir)/Lib/ntpath.py $(DEEPFREEZE_DEPS)
@echo "Deepfreezing Python/deepfreeze/ntpath.c from Lib/ntpath.py"
@./$(BOOTSTRAP) \
$(srcdir)/Tools/scripts/deepfreeze.py \
$(srcdir)/Lib/ntpath.py -m ntpath -o Python/deepfreeze/ntpath.c
Python/deepfreeze/posixpath.c: $(srcdir)/Lib/posixpath.py $(DEEPFREEZE_DEPS)
@echo "Deepfreezing Python/deepfreeze/posixpath.c from Lib/posixpath.py"
@./$(BOOTSTRAP) \
$(srcdir)/Tools/scripts/deepfreeze.py \
$(srcdir)/Lib/posixpath.py -m posixpath -o Python/deepfreeze/posixpath.c
Python/deepfreeze/os.c: $(srcdir)/Lib/os.py $(DEEPFREEZE_DEPS)
@echo "Deepfreezing Python/deepfreeze/os.c from Lib/os.py"
@./$(BOOTSTRAP) \
$(srcdir)/Tools/scripts/deepfreeze.py \
$(srcdir)/Lib/os.py -m os -o Python/deepfreeze/os.c
Python/deepfreeze/site.c: $(srcdir)/Lib/site.py $(DEEPFREEZE_DEPS)
@echo "Deepfreezing Python/deepfreeze/site.c from Lib/site.py"
@./$(BOOTSTRAP) \
$(srcdir)/Tools/scripts/deepfreeze.py \
$(srcdir)/Lib/site.py -m site -o Python/deepfreeze/site.c
Python/deepfreeze/stat.c: $(srcdir)/Lib/stat.py $(DEEPFREEZE_DEPS)
@echo "Deepfreezing Python/deepfreeze/stat.c from Lib/stat.py"
@./$(BOOTSTRAP) \
$(srcdir)/Tools/scripts/deepfreeze.py \
$(srcdir)/Lib/stat.py -m stat -o Python/deepfreeze/stat.c
Python/deepfreeze/__hello__.c: $(srcdir)/Lib/__hello__.py $(DEEPFREEZE_DEPS)
@echo "Deepfreezing Python/deepfreeze/__hello__.c from Lib/__hello__.py"
@./$(BOOTSTRAP) \
$(srcdir)/Tools/scripts/deepfreeze.py \
$(srcdir)/Lib/__hello__.py -m __hello__ -o Python/deepfreeze/__hello__.c
Python/deepfreeze/__phello__.c: $(srcdir)/Lib/__phello__/__init__.py $(DEEPFREEZE_DEPS)
@echo "Deepfreezing Python/deepfreeze/__phello__.c from Lib/__phello__/__init__.py"
@./$(BOOTSTRAP) \
$(srcdir)/Tools/scripts/deepfreeze.py \
$(srcdir)/Lib/__phello__/__init__.py -m __phello__ -o Python/deepfreeze/__phello__.c
Python/deepfreeze/__phello__.ham.c: $(srcdir)/Lib/__phello__/ham/__init__.py $(DEEPFREEZE_DEPS)
@echo "Deepfreezing Python/deepfreeze/__phello__.ham.c from Lib/__phello__/ham/__init__.py"
@./$(BOOTSTRAP) \
$(srcdir)/Tools/scripts/deepfreeze.py \
$(srcdir)/Lib/__phello__/ham/__init__.py -m __phello__.ham -o Python/deepfreeze/__phello__.ham.c
Python/deepfreeze/__phello__.ham.eggs.c: $(srcdir)/Lib/__phello__/ham/eggs.py $(DEEPFREEZE_DEPS)
@echo "Deepfreezing Python/deepfreeze/__phello__.ham.eggs.c from Lib/__phello__/ham/eggs.py"
@./$(BOOTSTRAP) \
$(srcdir)/Tools/scripts/deepfreeze.py \
$(srcdir)/Lib/__phello__/ham/eggs.py -m __phello__.ham.eggs -o Python/deepfreeze/__phello__.ham.eggs.c
Python/deepfreeze/__phello__.spam.c: $(srcdir)/Lib/__phello__/spam.py $(DEEPFREEZE_DEPS)
@echo "Deepfreezing Python/deepfreeze/__phello__.spam.c from Lib/__phello__/spam.py"
@./$(BOOTSTRAP) \
$(srcdir)/Tools/scripts/deepfreeze.py \
$(srcdir)/Lib/__phello__/spam.py -m __phello__.spam -o Python/deepfreeze/__phello__.spam.c
Python/deepfreeze/frozen_only.c: $(srcdir)/Tools/freeze/flag.py $(DEEPFREEZE_DEPS)
@echo "Deepfreezing Python/deepfreeze/frozen_only.c from Tools/freeze/flag.py"
@./$(BOOTSTRAP) \
$(srcdir)/Tools/scripts/deepfreeze.py \
$(srcdir)/Tools/freeze/flag.py -m frozen_only -o Python/deepfreeze/frozen_only.c
# END: deepfreeze modules
############################################################################
# frozen modules (including importlib)
@ -2017,7 +2195,8 @@ clean-retain-profile: pycremoval
find build -name '*.py[co]' -exec rm -f {} ';' || true
-rm -f pybuilddir.txt
-rm -f Lib/lib2to3/*Grammar*.pickle
-rm -f Programs/_testembed Programs/_freeze_module
-rm -f Programs/_testembed Programs/_freeze_module $(BOOTSTRAP)
-rm -f Python/deepfreeze/*.[co]
-find build -type f -a ! -name '*.gc??' -exec rm -f {} ';'
-rm -f Include/pydtrace_probes.h
-rm -f profile-gen-stamp

View File

@ -0,0 +1 @@
Skip the marshal step for frozen modules by generating C code that produces a set of ready-to-use code objects. This speeds up startup time by another 10% or more.

45
Python/bootstrap_frozen.c Normal file
View File

@ -0,0 +1,45 @@
/* Frozen modules bootstrap */
/* This file is linked with "bootstrap Python"
which is used (only) to run Tools/scripts/deepfreeze.py. */
#include "Python.h"
#include "pycore_import.h"
/* Includes for frozen modules: */
#include "frozen_modules/importlib._bootstrap.h"
#include "frozen_modules/importlib._bootstrap_external.h"
#include "frozen_modules/zipimport.h"
/* End includes */
/* Note that a negative size indicates a package. */
static const struct _frozen bootstrap_modules[] = {
{"_frozen_importlib", _Py_M__importlib__bootstrap, (int)sizeof(_Py_M__importlib__bootstrap)},
{"_frozen_importlib_external", _Py_M__importlib__bootstrap_external, (int)sizeof(_Py_M__importlib__bootstrap_external)},
{"zipimport", _Py_M__zipimport, (int)sizeof(_Py_M__zipimport)},
{0, 0, 0} /* bootstrap sentinel */
};
static const struct _frozen stdlib_modules[] = {
{0, 0, 0} /* stdlib sentinel */
};
static const struct _frozen test_modules[] = {
{0, 0, 0} /* test sentinel */
};
const struct _frozen *_PyImport_FrozenBootstrap = bootstrap_modules;
const struct _frozen *_PyImport_FrozenStdlib = stdlib_modules;
const struct _frozen *_PyImport_FrozenTest = test_modules;
static const struct _module_alias aliases[] = {
{"_frozen_importlib", "importlib._bootstrap"},
{"_frozen_importlib_external", "importlib._bootstrap_external"},
{0, 0} /* aliases sentinel */
};
const struct _module_alias *_PyImport_FrozenAliases = aliases;
/* Embedding apps may change this pointer to point to their favorite
collection of frozen modules: */
const struct _frozen *PyImport_FrozenModules = NULL;

View File

@ -0,0 +1,6 @@
This directory contains the generated .c files for all the deep-frozen
modules. Python/frozen.c depends on these files.
None of these files are committed into the repo.
See Tools/scripts/freeze_modules.py for more info.

View File

@ -61,50 +61,80 @@
#include "frozen_modules/frozen_only.h"
/* End includes */
#ifdef MS_WINDOWS
/* Deepfreeze isn't supported on Windows yet. */
#define GET_CODE(name) NULL
#else
#define GET_CODE(name) _Py_get_##name##_toplevel
#endif
/* Start extern declarations */
extern PyObject *_Py_get_importlib__bootstrap_toplevel(void);
extern PyObject *_Py_get_importlib__bootstrap_external_toplevel(void);
extern PyObject *_Py_get_zipimport_toplevel(void);
extern PyObject *_Py_get_abc_toplevel(void);
extern PyObject *_Py_get_codecs_toplevel(void);
extern PyObject *_Py_get_io_toplevel(void);
extern PyObject *_Py_get__collections_abc_toplevel(void);
extern PyObject *_Py_get__sitebuiltins_toplevel(void);
extern PyObject *_Py_get_genericpath_toplevel(void);
extern PyObject *_Py_get_ntpath_toplevel(void);
extern PyObject *_Py_get_posixpath_toplevel(void);
extern PyObject *_Py_get_posixpath_toplevel(void);
extern PyObject *_Py_get_os_toplevel(void);
extern PyObject *_Py_get_site_toplevel(void);
extern PyObject *_Py_get_stat_toplevel(void);
extern PyObject *_Py_get___hello___toplevel(void);
extern PyObject *_Py_get___hello___toplevel(void);
extern PyObject *_Py_get___hello___toplevel(void);
extern PyObject *_Py_get___hello___toplevel(void);
extern PyObject *_Py_get___phello___toplevel(void);
extern PyObject *_Py_get___phello___toplevel(void);
extern PyObject *_Py_get___phello___ham_toplevel(void);
extern PyObject *_Py_get___phello___ham_toplevel(void);
extern PyObject *_Py_get___phello___ham_eggs_toplevel(void);
extern PyObject *_Py_get___phello___spam_toplevel(void);
extern PyObject *_Py_get_frozen_only_toplevel(void);
/* End extern declarations */
/* Note that a negative size indicates a package. */
static const struct _frozen bootstrap_modules[] = {
{"_frozen_importlib", _Py_M__importlib__bootstrap,
(int)sizeof(_Py_M__importlib__bootstrap)},
{"_frozen_importlib_external", _Py_M__importlib__bootstrap_external,
(int)sizeof(_Py_M__importlib__bootstrap_external)},
{"zipimport", _Py_M__zipimport, (int)sizeof(_Py_M__zipimport)},
{"_frozen_importlib", _Py_M__importlib__bootstrap, (int)sizeof(_Py_M__importlib__bootstrap), GET_CODE(importlib__bootstrap)},
{"_frozen_importlib_external", _Py_M__importlib__bootstrap_external, (int)sizeof(_Py_M__importlib__bootstrap_external), GET_CODE(importlib__bootstrap_external)},
{"zipimport", _Py_M__zipimport, (int)sizeof(_Py_M__zipimport), GET_CODE(zipimport)},
{0, 0, 0} /* bootstrap sentinel */
};
static const struct _frozen stdlib_modules[] = {
/* stdlib - startup, without site (python -S) */
{"abc", _Py_M__abc, (int)sizeof(_Py_M__abc)},
{"codecs", _Py_M__codecs, (int)sizeof(_Py_M__codecs)},
{"io", _Py_M__io, (int)sizeof(_Py_M__io)},
{"abc", _Py_M__abc, (int)sizeof(_Py_M__abc), GET_CODE(abc)},
{"codecs", _Py_M__codecs, (int)sizeof(_Py_M__codecs), GET_CODE(codecs)},
{"io", _Py_M__io, (int)sizeof(_Py_M__io), GET_CODE(io)},
/* stdlib - startup, with site */
{"_collections_abc", _Py_M___collections_abc,
(int)sizeof(_Py_M___collections_abc)},
{"_sitebuiltins", _Py_M___sitebuiltins, (int)sizeof(_Py_M___sitebuiltins)},
{"genericpath", _Py_M__genericpath, (int)sizeof(_Py_M__genericpath)},
{"ntpath", _Py_M__ntpath, (int)sizeof(_Py_M__ntpath)},
{"posixpath", _Py_M__posixpath, (int)sizeof(_Py_M__posixpath)},
{"os.path", _Py_M__posixpath, (int)sizeof(_Py_M__posixpath)},
{"os", _Py_M__os, (int)sizeof(_Py_M__os)},
{"site", _Py_M__site, (int)sizeof(_Py_M__site)},
{"stat", _Py_M__stat, (int)sizeof(_Py_M__stat)},
{"_collections_abc", _Py_M___collections_abc, (int)sizeof(_Py_M___collections_abc), GET_CODE(_collections_abc)},
{"_sitebuiltins", _Py_M___sitebuiltins, (int)sizeof(_Py_M___sitebuiltins), GET_CODE(_sitebuiltins)},
{"genericpath", _Py_M__genericpath, (int)sizeof(_Py_M__genericpath), GET_CODE(genericpath)},
{"ntpath", _Py_M__ntpath, (int)sizeof(_Py_M__ntpath), GET_CODE(ntpath)},
{"posixpath", _Py_M__posixpath, (int)sizeof(_Py_M__posixpath), GET_CODE(posixpath)},
{"os.path", _Py_M__posixpath, (int)sizeof(_Py_M__posixpath), GET_CODE(posixpath)},
{"os", _Py_M__os, (int)sizeof(_Py_M__os), GET_CODE(os)},
{"site", _Py_M__site, (int)sizeof(_Py_M__site), GET_CODE(site)},
{"stat", _Py_M__stat, (int)sizeof(_Py_M__stat), GET_CODE(stat)},
{0, 0, 0} /* stdlib sentinel */
};
static const struct _frozen test_modules[] = {
{"__hello__", _Py_M____hello__, (int)sizeof(_Py_M____hello__)},
{"__hello_alias__", _Py_M____hello__, (int)sizeof(_Py_M____hello__)},
{"__phello_alias__", _Py_M____hello__, -(int)sizeof(_Py_M____hello__)},
{"__phello_alias__.spam", _Py_M____hello__, (int)sizeof(_Py_M____hello__)},
{"__phello__", _Py_M____phello__, -(int)sizeof(_Py_M____phello__)},
{"__phello__.__init__", _Py_M____phello__, (int)sizeof(_Py_M____phello__)},
{"__phello__.ham", _Py_M____phello___ham, -(int)sizeof(_Py_M____phello___ham)},
{"__phello__.ham.__init__", _Py_M____phello___ham,
(int)sizeof(_Py_M____phello___ham)},
{"__phello__.ham.eggs", _Py_M____phello___ham_eggs,
(int)sizeof(_Py_M____phello___ham_eggs)},
{"__phello__.spam", _Py_M____phello___spam,
(int)sizeof(_Py_M____phello___spam)},
{"__hello_only__", _Py_M__frozen_only, (int)sizeof(_Py_M__frozen_only)},
{"__hello__", _Py_M____hello__, (int)sizeof(_Py_M____hello__), GET_CODE(__hello__)},
{"__hello_alias__", _Py_M____hello__, (int)sizeof(_Py_M____hello__), GET_CODE(__hello__)},
{"__phello_alias__", _Py_M____hello__, -(int)sizeof(_Py_M____hello__), GET_CODE(__hello__)},
{"__phello_alias__.spam", _Py_M____hello__, (int)sizeof(_Py_M____hello__), GET_CODE(__hello__)},
{"__phello__", _Py_M____phello__, -(int)sizeof(_Py_M____phello__), GET_CODE(__phello__)},
{"__phello__.__init__", _Py_M____phello__, (int)sizeof(_Py_M____phello__), GET_CODE(__phello__)},
{"__phello__.ham", _Py_M____phello___ham, -(int)sizeof(_Py_M____phello___ham), GET_CODE(__phello___ham)},
{"__phello__.ham.__init__", _Py_M____phello___ham, (int)sizeof(_Py_M____phello___ham), GET_CODE(__phello___ham)},
{"__phello__.ham.eggs", _Py_M____phello___ham_eggs, (int)sizeof(_Py_M____phello___ham_eggs), GET_CODE(__phello___ham_eggs)},
{"__phello__.spam", _Py_M____phello___spam, (int)sizeof(_Py_M____phello___spam), GET_CODE(__phello___spam)},
{"__hello_only__", _Py_M__frozen_only, (int)sizeof(_Py_M__frozen_only), GET_CODE(frozen_only)},
{0, 0, 0} /* test sentinel */
};
const struct _frozen *_PyImport_FrozenBootstrap = bootstrap_modules;

View File

@ -1262,6 +1262,7 @@ look_up_frozen(const char *name)
struct frozen_info {
PyObject *nameobj;
const char *data;
PyObject *(*get_code)(void);
Py_ssize_t size;
bool is_package;
bool is_alias;
@ -1295,6 +1296,7 @@ find_frozen(PyObject *nameobj, struct frozen_info *info)
if (info != NULL) {
info->nameobj = nameobj; // borrowed
info->data = (const char *)p->code;
info->get_code = p->get_code;
info->size = p->size < 0 ? -(p->size) : p->size;
info->is_package = p->size < 0 ? true : false;
info->origname = name;
@ -1316,6 +1318,11 @@ find_frozen(PyObject *nameobj, struct frozen_info *info)
static PyObject *
unmarshal_frozen_code(struct frozen_info *info)
{
if (info->get_code) {
PyObject *code = info->get_code();
assert(code != NULL);
return code;
}
PyObject *co = PyMarshal_ReadObjectFromString(info->data, info->size);
if (co == NULL) {
/* Does not contain executable code. */

View File

@ -22,13 +22,23 @@ class UnsupportedError(Exception):
def _run_quiet(cmd, cwd=None):
#print(f'# {" ".join(shlex.quote(a) for a in cmd)}')
return subprocess.run(
cmd,
cwd=cwd,
capture_output=True,
text=True,
check=True,
)
try:
return subprocess.run(
cmd,
cwd=cwd,
capture_output=True,
text=True,
check=True,
)
except subprocess.CalledProcessError as err:
# Don't be quiet if things fail
print(f"{err.__class__.__name__}: {err}")
print("--- STDOUT ---")
print(err.stdout)
print("--- STDERR ---")
print(err.stderr)
print("---- END ----")
raise
def _run_stdout(cmd, cwd=None):

418
Tools/scripts/deepfreeze.py Normal file
View File

@ -0,0 +1,418 @@
import argparse
import builtins
import collections
import contextlib
import os
import sys
import time
import types
import typing
verbose = False
def make_string_literal(b: bytes) -> str:
res = ['"']
if b.isascii() and b.decode("ascii").isprintable():
res.append(b.decode("ascii").replace("\\", "\\\\").replace("\"", "\\\""))
else:
for i in b:
res.append(f"\\x{i:02x}")
res.append('"')
return "".join(res)
CO_FAST_LOCAL = 0x20
CO_FAST_CELL = 0x40
CO_FAST_FREE = 0x80
def get_localsplus(code: types.CodeType):
a = collections.defaultdict(int)
for name in code.co_varnames:
a[name] |= CO_FAST_LOCAL
for name in code.co_cellvars:
a[name] |= CO_FAST_CELL
for name in code.co_freevars:
a[name] |= CO_FAST_FREE
return tuple(a.keys()), bytes(a.values())
def get_localsplus_counts(code: types.CodeType,
names: tuple[str, ...],
kinds: bytes) -> tuple[int, int, int, int]:
nlocals = 0
nplaincellvars = 0
ncellvars = 0
nfreevars = 0
for name, kind in zip(names, kinds, strict=True):
if kind & CO_FAST_LOCAL:
nlocals += 1
if kind & CO_FAST_CELL:
ncellvars += 1
elif kind & CO_FAST_CELL:
ncellvars += 1
nplaincellvars += 1
elif kind & CO_FAST_FREE:
nfreevars += 1
assert nlocals == len(code.co_varnames) == code.co_nlocals
assert ncellvars == len(code.co_cellvars)
assert nfreevars == len(code.co_freevars)
assert len(names) == nlocals + nplaincellvars + nfreevars
return nlocals, nplaincellvars, ncellvars, nfreevars
PyUnicode_1BYTE_KIND = 1
PyUnicode_2BYTE_KIND = 2
PyUnicode_4BYTE_KIND = 4
def analyze_character_width(s: str) -> tuple[int, bool]:
maxchar = ' '
for c in s:
maxchar = max(maxchar, c)
ascii = False
if maxchar <= '\xFF':
kind = PyUnicode_1BYTE_KIND
ascii = maxchar <= '\x7F'
elif maxchar <= '\uFFFF':
kind = PyUnicode_2BYTE_KIND
else:
kind = PyUnicode_4BYTE_KIND
return kind, ascii
class Printer:
def __init__(self, file: typing.TextIO):
self.level = 0
self.file = file
self.cache: dict[tuple[type, object], str] = {}
self.hits, self.misses = 0, 0
self.patchups: list[str] = []
self.write('#include "Python.h"')
self.write('#include "internal/pycore_gc.h"')
self.write('#include "internal/pycore_code.h"')
self.write("")
@contextlib.contextmanager
def indent(self) -> None:
save_level = self.level
try:
self.level += 1
yield
finally:
self.level = save_level
def write(self, arg: str) -> None:
self.file.writelines((" "*self.level, arg, "\n"))
@contextlib.contextmanager
def block(self, prefix: str, suffix: str = "") -> None:
self.write(prefix + " {")
with self.indent():
yield
self.write("}" + suffix)
def object_head(self, typename: str) -> None:
with self.block(".ob_base =", ","):
self.write(f".ob_refcnt = 999999999,")
self.write(f".ob_type = &{typename},")
def object_var_head(self, typename: str, size: int) -> None:
with self.block(".ob_base =", ","):
self.object_head(typename)
self.write(f".ob_size = {size},")
def field(self, obj: object, name: str) -> None:
self.write(f".{name} = {getattr(obj, name)},")
def generate_bytes(self, name: str, b: bytes) -> str:
self.write("static")
with self.indent():
with self.block("struct"):
self.write("PyObject_VAR_HEAD")
self.write("Py_hash_t ob_shash;")
self.write(f"char ob_sval[{len(b) + 1}];")
with self.block(f"{name} =", ";"):
self.object_var_head("PyBytes_Type", len(b))
self.write(".ob_shash = -1,")
self.write(f".ob_sval = {make_string_literal(b)},")
return f"& {name}.ob_base.ob_base"
def generate_unicode(self, name: str, s: str) -> str:
kind, ascii = analyze_character_width(s)
if kind == PyUnicode_1BYTE_KIND:
datatype = "uint8_t"
elif kind == PyUnicode_2BYTE_KIND:
datatype = "uint16_t"
else:
datatype = "uint32_t"
self.write("static")
with self.indent():
with self.block("struct"):
if ascii:
self.write("PyASCIIObject _ascii;")
else:
self.write("PyCompactUnicodeObject _compact;")
self.write(f"{datatype} _data[{len(s)+1}];")
with self.block(f"{name} =", ";"):
if ascii:
with self.block("._ascii =", ","):
self.object_head("PyUnicode_Type")
self.write(f".length = {len(s)},")
self.write(".hash = -1,")
with self.block(".state =", ","):
self.write(".kind = 1,")
self.write(".compact = 1,")
self.write(".ascii = 1,")
self.write(".ready = 1,")
self.write(f"._data = {make_string_literal(s.encode('ascii'))},")
return f"& {name}._ascii.ob_base"
else:
with self.block("._compact =", ","):
with self.block("._base =", ","):
self.object_head("PyUnicode_Type")
self.write(f".length = {len(s)},")
self.write(".hash = -1,")
with self.block(".state =", ","):
self.write(f".kind = {kind},")
self.write(".compact = 1,")
self.write(".ascii = 0,")
self.write(".ready = 1,")
with self.block(f"._data =", ","):
for i in range(0, len(s), 16):
data = s[i:i+16]
self.write(", ".join(map(str, map(ord, data))) + ",")
if kind == PyUnicode_2BYTE_KIND:
self.patchups.append("if (sizeof(wchar_t) == 2) {")
self.patchups.append(f" {name}._compact._base.wstr = (wchar_t *) {name}._data;")
self.patchups.append(f" {name}._compact.wstr_length = {len(s)};")
self.patchups.append("}")
if kind == PyUnicode_4BYTE_KIND:
self.patchups.append("if (sizeof(wchar_t) == 4) {")
self.patchups.append(f" {name}._compact._base.wstr = (wchar_t *) {name}._data;")
self.patchups.append(f" {name}._compact.wstr_length = {len(s)};")
self.patchups.append("}")
return f"& {name}._compact._base.ob_base"
def generate_code(self, name: str, code: types.CodeType) -> str:
# The ordering here matches PyCode_NewWithPosOnlyArgs()
# (but see below).
co_code = self.generate(name + "_code", code.co_code)
co_consts = self.generate(name + "_consts", code.co_consts)
co_names = self.generate(name + "_names", code.co_names)
co_varnames = self.generate(name + "_varnames", code.co_varnames)
co_freevars = self.generate(name + "_freevars", code.co_freevars)
co_cellvars = self.generate(name + "_cellvars", code.co_cellvars)
co_filename = self.generate(name + "_filename", code.co_filename)
co_name = self.generate(name + "_name", code.co_name)
co_qualname = self.generate(name + "_qualname", code.co_qualname)
co_linetable = self.generate(name + "_linetable", code.co_linetable)
co_endlinetable = self.generate(name + "_endlinetable", code.co_endlinetable)
co_columntable = self.generate(name + "_columntable", code.co_columntable)
co_exceptiontable = self.generate(name + "_exceptiontable", code.co_exceptiontable)
# These fields are not directly accessible
localsplusnames, localspluskinds = get_localsplus(code)
co_localsplusnames = self.generate(name + "_localsplusnames", localsplusnames)
co_localspluskinds = self.generate(name + "_localspluskinds", localspluskinds)
# Derived values
nlocals, nplaincellvars, ncellvars, nfreevars = \
get_localsplus_counts(code, localsplusnames, localspluskinds)
with self.block(f"static struct PyCodeObject {name} =", ";"):
self.object_head("PyCode_Type")
# But the ordering here must match that in cpython/code.h
# (which is a pain because we tend to reorder those for perf)
# otherwise MSVC doesn't like it.
self.write(f".co_consts = {co_consts},")
self.write(f".co_names = {co_names},")
self.write(f".co_firstinstr = (_Py_CODEUNIT *) {co_code.removesuffix('.ob_base.ob_base')}.ob_sval,")
self.write(f".co_exceptiontable = {co_exceptiontable},")
self.field(code, "co_flags")
self.write(".co_warmup = QUICKENING_INITIAL_WARMUP_VALUE,")
self.field(code, "co_argcount")
self.field(code, "co_posonlyargcount")
self.field(code, "co_kwonlyargcount")
self.field(code, "co_stacksize")
self.field(code, "co_firstlineno")
self.write(f".co_code = {co_code},")
self.write(f".co_localsplusnames = {co_localsplusnames},")
self.write(f".co_localspluskinds = {co_localspluskinds},")
self.write(f".co_filename = {co_filename},")
self.write(f".co_name = {co_name},")
self.write(f".co_qualname = {co_qualname},")
self.write(f".co_linetable = {co_linetable},")
self.write(f".co_endlinetable = {co_endlinetable},")
self.write(f".co_columntable = {co_columntable},")
self.write(f".co_nlocalsplus = {len(localsplusnames)},")
self.field(code, "co_nlocals")
self.write(f".co_nplaincellvars = {nplaincellvars},")
self.write(f".co_ncellvars = {ncellvars},")
self.write(f".co_nfreevars = {nfreevars},")
self.write(f".co_varnames = {co_varnames},")
self.write(f".co_cellvars = {co_cellvars},")
self.write(f".co_freevars = {co_freevars},")
return f"& {name}.ob_base"
def generate_tuple(self, name: str, t: tuple[object, ...]) -> str:
items = [self.generate(f"{name}_{i}", it) for i, it in enumerate(t)]
self.write("static")
with self.indent():
with self.block("struct"):
self.write("PyGC_Head _gc_head;")
with self.block("struct", "_object;"):
self.write("PyObject_VAR_HEAD")
if t:
self.write(f"PyObject *ob_item[{len(t)}];")
with self.block(f"{name} =", ";"):
with self.block("._object =", ","):
self.object_var_head("PyTuple_Type", len(t))
if items:
with self.block(f".ob_item =", ","):
for item in items:
self.write(item + ",")
return f"& {name}._object.ob_base.ob_base"
def generate_int(self, name: str, i: int) -> str:
maxint = sys.maxsize
if maxint == 2**31 - 1:
digit = 2**15
elif maxint == 2**63 - 1:
digit = 2**30
else:
assert False, f"What int size is this system?!? {maxint=}"
sign = -1 if i < 0 else 0 if i == 0 else +1
i = abs(i)
digits: list[int] = []
while i:
i, rem = divmod(i, digit)
digits.append(rem)
self.write("static")
with self.indent():
with self.block("struct"):
self.write("PyObject_VAR_HEAD")
self.write(f"digit ob_digit[{max(1, len(digits))}];")
with self.block(f"{name} =", ";"):
self.object_var_head("PyLong_Type", sign*len(digits))
if digits:
ds = ", ".join(map(str, digits))
self.write(f".ob_digit = {{ {ds} }},")
return f"& {name}.ob_base.ob_base"
def generate_float(self, name: str, x: float) -> str:
with self.block(f"static PyFloatObject {name} =", ";"):
self.object_head("PyFloat_Type")
self.write(f".ob_fval = {x},")
return f"&{name}.ob_base"
def generate_complex(self, name: str, z: complex) -> str:
with self.block(f"static PyComplexObject {name} =", ";"):
self.object_head("PyComplex_Type")
self.write(f".cval = {{ {z.real}, {z.imag} }},")
return f"&{name}.ob_base"
def generate_frozenset(self, name: str, fs: frozenset[object]) -> str:
ret = self.generate_tuple(name, tuple(sorted(fs)))
self.write("// TODO: The above tuple should be a frozenset")
return ret
def generate(self, name: str, obj: object) -> str:
# Use repr() in the key to distinguish -0.0 from +0.0
key = (type(obj), obj, repr(obj))
if key in self.cache:
self.hits += 1
# print(f"Cache hit {key!r:.40}: {self.cache[key]!r:.40}")
return self.cache[key]
self.misses += 1
match obj:
case types.CodeType() as code:
val = self.generate_code(name, code)
case tuple(t):
val = self.generate_tuple(name, t)
case str(s):
val = self.generate_unicode(name, s)
case bytes(b):
val = self.generate_bytes(name, b)
case True:
return "Py_True"
case False:
return "Py_False"
case int(i):
val = self.generate_int(name, i)
case float(x):
val = self.generate_float(name, x)
case complex() as z:
val = self.generate_complex(name, z)
case frozenset(fs):
val = self.generate_frozenset(name, fs)
case builtins.Ellipsis:
return "Py_Ellipsis"
case None:
return "Py_None"
case _:
raise TypeError(
f"Cannot generate code for {type(obj).__name__} object")
# print(f"Cache store {key!r:.40}: {val!r:.40}")
self.cache[key] = val
return val
EPILOGUE = """
PyObject *
_Py_get_%%NAME%%_toplevel(void)
{
do_patchups();
return (PyObject *) &toplevel;
}
"""
def generate(source: str, filename: str, modname: str, file: typing.TextIO) -> None:
code = compile(source, filename, "exec")
printer = Printer(file)
printer.generate("toplevel", code)
printer.write("")
with printer.block("static void do_patchups()"):
for p in printer.patchups:
printer.write(p)
here = os.path.dirname(__file__)
printer.write(EPILOGUE.replace("%%NAME%%", modname.replace(".", "_")))
if verbose:
print(f"Cache hits: {printer.hits}, misses: {printer.misses}")
parser = argparse.ArgumentParser()
parser.add_argument("-m", "--module", help="Defaults to basename(file)")
parser.add_argument("-o", "--output", help="Defaults to MODULE.c")
parser.add_argument("-v", "--verbose", action="store_true", help="Print diagnostics")
parser.add_argument("file", help="Input file (required)")
@contextlib.contextmanager
def report_time(label: str):
t0 = time.time()
try:
yield
finally:
t1 = time.time()
if verbose:
print(f"{label}: {t1-t0:.3f} sec")
def main() -> None:
global verbose
args = parser.parse_args()
verbose = args.verbose
with open(args.file, encoding="utf-8") as f:
source = f.read()
modname = args.module or os.path.basename(args.file).removesuffix(".py")
output = args.output or modname + ".c"
with open(output, "w", encoding="utf-8") as file:
with report_time("generate"):
generate(source, f"<frozen {modname}>", modname, file)
if verbose:
print(f"Wrote {os.path.getsize(output)} bytes to {output}")
if __name__ == "__main__":
main()

View File

@ -528,6 +528,7 @@ def regen_frozen(modules):
header = relpath_for_posix_display(src.frozenfile, parentdir)
headerlines.append(f'#include "{header}"')
externlines = []
bootstraplines = []
stdliblines = []
testlines = []
@ -547,17 +548,18 @@ def regen_frozen(modules):
lines.append(f'/* {mod.section} */')
lastsection = mod.section
# Also add a extern declaration for the corresponding
# deepfreeze-generated function.
orig_name = mod.source.id
code_name = orig_name.replace(".", "_")
get_code_name = "_Py_get_%s_toplevel" % code_name
externlines.append("extern PyObject *%s(void);" % get_code_name)
symbol = mod.symbol
pkg = '-' if mod.ispkg else ''
line = ('{"%s", %s, %s(int)sizeof(%s)},'
) % (mod.name, symbol, pkg, symbol)
# TODO: Consider not folding lines
if len(line) < 80:
lines.append(line)
else:
line1, _, line2 = line.rpartition(' ')
lines.append(line1)
lines.append(indent + line2)
line = ('{"%s", %s, %s(int)sizeof(%s), GET_CODE(%s)},'
) % (mod.name, symbol, pkg, symbol, code_name)
lines.append(line)
if mod.isalias:
if not mod.orig:
@ -588,6 +590,13 @@ def regen_frozen(modules):
headerlines,
FROZEN_FILE,
)
lines = replace_block(
lines,
"/* Start extern declarations */",
"/* End extern declarations */",
externlines,
FROZEN_FILE,
)
lines = replace_block(
lines,
"static const struct _frozen bootstrap_modules[] =",
@ -622,7 +631,30 @@ def regen_frozen(modules):
def regen_makefile(modules):
pyfiles = []
frozenfiles = []
deepfreezefiles = []
rules = ['']
deepfreezerules = ['']
# TODO: Merge the two loops
for src in _iter_sources(modules):
header = relpath_for_posix_display(src.frozenfile, ROOT_DIR)
relfile = header.replace('\\', '/')
_pyfile = relpath_for_posix_display(src.pyfile, ROOT_DIR)
# TODO: This is a bit hackish
xfile = relfile.replace("/frozen_modules/", "/deepfreeze/")
cfile = xfile[:-2] + ".c"
ofile = xfile[:-2] + ".o"
deepfreezefiles.append(f"\t\t{ofile} \\")
# Also add a deepfreeze rule.
deepfreezerules.append(f'{cfile}: $(srcdir)/{_pyfile} $(DEEPFREEZE_DEPS)')
deepfreezerules.append(f'\t@echo "Deepfreezing {cfile} from {_pyfile}"')
deepfreezerules.append(f"\t@./$(BOOTSTRAP) \\")
deepfreezerules.append(f"\t\t$(srcdir)/Tools/scripts/deepfreeze.py \\")
deepfreezerules.append(f"\t\t$(srcdir)/{_pyfile} -m {src.frozenid} -o {cfile}")
deepfreezerules.append('')
for src in _iter_sources(modules):
header = relpath_for_posix_display(src.frozenfile, ROOT_DIR)
frozenfiles.append(f'\t\t{header} \\')
@ -639,6 +671,7 @@ def regen_makefile(modules):
])
pyfiles[-1] = pyfiles[-1].rstrip(" \\")
frozenfiles[-1] = frozenfiles[-1].rstrip(" \\")
deepfreezefiles[-1] = deepfreezefiles[-1].rstrip(" \\")
print(f'# Updating {os.path.relpath(MAKEFILE)}')
with updating_file_with_tmpfile(MAKEFILE) as (infile, outfile):
@ -657,6 +690,13 @@ def regen_makefile(modules):
frozenfiles,
MAKEFILE,
)
lines = replace_block(
lines,
"DEEPFREEZE_OBJS =",
"# End DEEPFREEZE_OBJS",
deepfreezefiles,
MAKEFILE,
)
lines = replace_block(
lines,
"# BEGIN: freezing modules",
@ -664,6 +704,13 @@ def regen_makefile(modules):
rules,
MAKEFILE,
)
lines = replace_block(
lines,
"# BEGIN: deepfreeze modules",
"# END: deepfreeze modules",
deepfreezerules,
MAKEFILE,
)
outfile.writelines(lines)
@ -721,7 +768,6 @@ def freeze_module(modname, pyfile=None, destdir=MODULES_DIR):
def _freeze_module(frozenid, pyfile, frozenfile, tmpsuffix):
tmpfile = f'{frozenfile}.{int(time.time())}'
print(tmpfile)
argv = [TOOL, frozenid, pyfile, tmpfile]
print('#', ' '.join(os.path.relpath(a) for a in argv), flush=True)

3
configure vendored
View File

@ -18119,7 +18119,8 @@ SRCDIRS="\
Objects \
Parser \
Programs \
Python"
Python \
Python/deepfreeze"
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for build directories" >&5
$as_echo_n "checking for build directories... " >&6; }
for dir in $SRCDIRS; do

View File

@ -5412,7 +5412,8 @@ SRCDIRS="\
Objects \
Parser \
Programs \
Python"
Python \
Python/deepfreeze"
AC_MSG_CHECKING(for build directories)
for dir in $SRCDIRS; do
if test ! -d $dir; then