From bf95ff91f2c1fc5a57190491f9ccdc63458b089e Mon Sep 17 00:00:00 2001 From: Kumar Aditya <59607654+kumaraditya303@users.noreply.github.com> Date: Fri, 4 Feb 2022 23:27:03 +0530 Subject: [PATCH] bpo-46608: exclude marshalled-frozen data if deep-freezing to save 300 KB space (GH-31074) This reduces the size of the data segment by **300 KB** of the executable because if the modules are deep-frozen then the marshalled frozen data just wastes space. This was inspired by comment by @gvanrossum in https://github.com/python/cpython/pull/29118#issuecomment-958521863. Note: There is a new option `--deepfreeze-only` in `freeze_modules.py` to change this behavior, it is on be default to save disk space. ```console # du -s ./python before 27892 ./python # du -s ./python after 27524 ./python ``` Automerge-Triggered-By: GH:ericsnowcurrently --- Doc/c-api/import.rst | 4 + Doc/whatsnew/3.11.rst | 5 ++ Include/cpython/import.h | 1 + Lib/ctypes/test/test_values.py | 10 ++- .../2022-02-02-11-26-46.bpo-46608.cXH9po.rst | 2 + Python/frozen.c | 83 +++++++------------ Python/import.c | 16 +++- Tools/freeze/makefreeze.py | 12 +-- Tools/scripts/freeze_modules.py | 38 +++++---- 9 files changed, 87 insertions(+), 84 deletions(-) create mode 100644 Misc/NEWS.d/next/Build/2022-02-02-11-26-46.bpo-46608.cXH9po.rst diff --git a/Doc/c-api/import.rst b/Doc/c-api/import.rst index d2ae6b6d4e4..5e2333a74ce 100644 --- a/Doc/c-api/import.rst +++ b/Doc/c-api/import.rst @@ -256,8 +256,12 @@ Importing Modules const char *name; const unsigned char *code; int size; + bool is_package; }; + .. versionchanged:: 3.11 + The new ``is_package`` field indicates whether the module is a package or not. + This replaces setting the ``size`` field to a negative value. .. c:var:: const struct _frozen* PyImport_FrozenModules diff --git a/Doc/whatsnew/3.11.rst b/Doc/whatsnew/3.11.rst index 18d4652fb42..1558d67d9a8 100644 --- a/Doc/whatsnew/3.11.rst +++ b/Doc/whatsnew/3.11.rst @@ -650,6 +650,11 @@ C API Changes fields of the result from the exception instance (the ``value`` field). (Contributed by Irit Katriel in :issue:`45711`.) +* :c:struct:`_frozen` has a new ``is_package`` field to indicate whether + or not the frozen module is a package. Previously, a negative value + in the ``size`` field was the indicator. Now only non-negative values + be used for ``size``. + (Contributed by Kumar Aditya in :issue:`46608`.) New Features ------------ diff --git a/Include/cpython/import.h b/Include/cpython/import.h index 5ec637e7ab3..da9fb770a92 100644 --- a/Include/cpython/import.h +++ b/Include/cpython/import.h @@ -32,6 +32,7 @@ struct _frozen { const char *name; /* ASCII encoded string */ const unsigned char *code; int size; + bool is_package; PyObject *(*get_code)(void); }; diff --git a/Lib/ctypes/test/test_values.py b/Lib/ctypes/test/test_values.py index 3e8b13768b4..b2db426d10f 100644 --- a/Lib/ctypes/test/test_values.py +++ b/Lib/ctypes/test/test_values.py @@ -54,6 +54,7 @@ class PythonValuesTestCase(unittest.TestCase): _fields_ = [("name", c_char_p), ("code", POINTER(c_ubyte)), ("size", c_int), + ("is_package", c_bool), ("get_code", POINTER(c_ubyte)), # Function ptr ] FrozenTable = POINTER(struct_frozen) @@ -71,13 +72,14 @@ class PythonValuesTestCase(unittest.TestCase): modname = entry.name.decode("ascii") modules.append(modname) with self.subTest(modname): - # Do a sanity check on entry.size and entry.code. - self.assertGreater(abs(entry.size), 10) - self.assertTrue([entry.code[i] for i in range(abs(entry.size))]) + if entry.size != 0: + # Do a sanity check on entry.size and entry.code. + self.assertGreater(abs(entry.size), 10) + self.assertTrue([entry.code[i] for i in range(abs(entry.size))]) # Check the module's package-ness. with import_helper.frozen_modules(): spec = importlib.util.find_spec(modname) - if entry.size < 0: + if entry.is_package: # It's a package. self.assertIsNotNone(spec.submodule_search_locations) else: diff --git a/Misc/NEWS.d/next/Build/2022-02-02-11-26-46.bpo-46608.cXH9po.rst b/Misc/NEWS.d/next/Build/2022-02-02-11-26-46.bpo-46608.cXH9po.rst new file mode 100644 index 00000000000..13c73a614e5 --- /dev/null +++ b/Misc/NEWS.d/next/Build/2022-02-02-11-26-46.bpo-46608.cXH9po.rst @@ -0,0 +1,2 @@ +Exclude marshalled-frozen data if deep-freezing to save 300 KB disk space. This includes adding +a new ``is_package`` field to :c:struct:`_frozen`. Patch by Kumar Aditya. diff --git a/Python/frozen.c b/Python/frozen.c index 25cf0a8d37c..c5b36f73b4a 100644 --- a/Python/frozen.c +++ b/Python/frozen.c @@ -39,29 +39,6 @@ #include "pycore_import.h" /* Includes for frozen modules: */ -#include "frozen_modules/importlib._bootstrap.h" -#include "frozen_modules/importlib._bootstrap_external.h" -#include "frozen_modules/zipimport.h" -#include "frozen_modules/abc.h" -#include "frozen_modules/codecs.h" -#include "frozen_modules/io.h" -#include "frozen_modules/_collections_abc.h" -#include "frozen_modules/_sitebuiltins.h" -#include "frozen_modules/genericpath.h" -#include "frozen_modules/ntpath.h" -#include "frozen_modules/posixpath.h" -#include "frozen_modules/os.h" -#include "frozen_modules/site.h" -#include "frozen_modules/stat.h" -#include "frozen_modules/importlib.util.h" -#include "frozen_modules/importlib.machinery.h" -#include "frozen_modules/runpy.h" -#include "frozen_modules/__hello__.h" -#include "frozen_modules/__phello__.h" -#include "frozen_modules/__phello__.ham.h" -#include "frozen_modules/__phello__.ham.eggs.h" -#include "frozen_modules/__phello__.spam.h" -#include "frozen_modules/frozen_only.h" /* End includes */ #define GET_CODE(name) _Py_get_##name##_toplevel @@ -98,49 +75,47 @@ extern PyObject *_Py_get___phello___spam_toplevel(void); extern PyObject *_Py_get_frozen_only_toplevel(void); /* End extern declarations */ -/* Note that a negative size indicates a package. */ - static const struct _frozen bootstrap_modules[] = { - {"_frozen_importlib", _Py_M__importlib__bootstrap, (int)sizeof(_Py_M__importlib__bootstrap), GET_CODE(importlib__bootstrap)}, - {"_frozen_importlib_external", _Py_M__importlib__bootstrap_external, (int)sizeof(_Py_M__importlib__bootstrap_external), GET_CODE(importlib__bootstrap_external)}, - {"zipimport", _Py_M__zipimport, (int)sizeof(_Py_M__zipimport), GET_CODE(zipimport)}, + {"_frozen_importlib", NULL, 0, false, GET_CODE(importlib__bootstrap)}, + {"_frozen_importlib_external", NULL, 0, false, GET_CODE(importlib__bootstrap_external)}, + {"zipimport", NULL, 0, false, GET_CODE(zipimport)}, {0, 0, 0} /* bootstrap sentinel */ }; static const struct _frozen stdlib_modules[] = { /* stdlib - startup, without site (python -S) */ - {"abc", _Py_M__abc, (int)sizeof(_Py_M__abc), GET_CODE(abc)}, - {"codecs", _Py_M__codecs, (int)sizeof(_Py_M__codecs), GET_CODE(codecs)}, - {"io", _Py_M__io, (int)sizeof(_Py_M__io), GET_CODE(io)}, + {"abc", NULL, 0, false, GET_CODE(abc)}, + {"codecs", NULL, 0, false, GET_CODE(codecs)}, + {"io", NULL, 0, false, GET_CODE(io)}, /* stdlib - startup, with site */ - {"_collections_abc", _Py_M___collections_abc, (int)sizeof(_Py_M___collections_abc), GET_CODE(_collections_abc)}, - {"_sitebuiltins", _Py_M___sitebuiltins, (int)sizeof(_Py_M___sitebuiltins), GET_CODE(_sitebuiltins)}, - {"genericpath", _Py_M__genericpath, (int)sizeof(_Py_M__genericpath), GET_CODE(genericpath)}, - {"ntpath", _Py_M__ntpath, (int)sizeof(_Py_M__ntpath), GET_CODE(ntpath)}, - {"posixpath", _Py_M__posixpath, (int)sizeof(_Py_M__posixpath), GET_CODE(posixpath)}, - {"os.path", _Py_M__posixpath, (int)sizeof(_Py_M__posixpath), GET_CODE(posixpath)}, - {"os", _Py_M__os, (int)sizeof(_Py_M__os), GET_CODE(os)}, - {"site", _Py_M__site, (int)sizeof(_Py_M__site), GET_CODE(site)}, - {"stat", _Py_M__stat, (int)sizeof(_Py_M__stat), GET_CODE(stat)}, + {"_collections_abc", NULL, 0, false, GET_CODE(_collections_abc)}, + {"_sitebuiltins", NULL, 0, false, GET_CODE(_sitebuiltins)}, + {"genericpath", NULL, 0, false, GET_CODE(genericpath)}, + {"ntpath", NULL, 0, false, GET_CODE(ntpath)}, + {"posixpath", NULL, 0, false, GET_CODE(posixpath)}, + {"os.path", NULL, 0, false, GET_CODE(posixpath)}, + {"os", NULL, 0, false, GET_CODE(os)}, + {"site", NULL, 0, false, GET_CODE(site)}, + {"stat", NULL, 0, false, GET_CODE(stat)}, /* runpy - run module with -m */ - {"importlib.util", _Py_M__importlib_util, (int)sizeof(_Py_M__importlib_util), GET_CODE(importlib_util)}, - {"importlib.machinery", _Py_M__importlib_machinery, (int)sizeof(_Py_M__importlib_machinery), GET_CODE(importlib_machinery)}, - {"runpy", _Py_M__runpy, (int)sizeof(_Py_M__runpy), GET_CODE(runpy)}, + {"importlib.util", NULL, 0, false, GET_CODE(importlib_util)}, + {"importlib.machinery", NULL, 0, false, GET_CODE(importlib_machinery)}, + {"runpy", NULL, 0, false, GET_CODE(runpy)}, {0, 0, 0} /* stdlib sentinel */ }; static const struct _frozen test_modules[] = { - {"__hello__", _Py_M____hello__, (int)sizeof(_Py_M____hello__), GET_CODE(__hello__)}, - {"__hello_alias__", _Py_M____hello__, (int)sizeof(_Py_M____hello__), GET_CODE(__hello__)}, - {"__phello_alias__", _Py_M____hello__, -(int)sizeof(_Py_M____hello__), GET_CODE(__hello__)}, - {"__phello_alias__.spam", _Py_M____hello__, (int)sizeof(_Py_M____hello__), GET_CODE(__hello__)}, - {"__phello__", _Py_M____phello__, -(int)sizeof(_Py_M____phello__), GET_CODE(__phello__)}, - {"__phello__.__init__", _Py_M____phello__, (int)sizeof(_Py_M____phello__), GET_CODE(__phello__)}, - {"__phello__.ham", _Py_M____phello___ham, -(int)sizeof(_Py_M____phello___ham), GET_CODE(__phello___ham)}, - {"__phello__.ham.__init__", _Py_M____phello___ham, (int)sizeof(_Py_M____phello___ham), GET_CODE(__phello___ham)}, - {"__phello__.ham.eggs", _Py_M____phello___ham_eggs, (int)sizeof(_Py_M____phello___ham_eggs), GET_CODE(__phello___ham_eggs)}, - {"__phello__.spam", _Py_M____phello___spam, (int)sizeof(_Py_M____phello___spam), GET_CODE(__phello___spam)}, - {"__hello_only__", _Py_M__frozen_only, (int)sizeof(_Py_M__frozen_only), GET_CODE(frozen_only)}, + {"__hello__", NULL, 0, false, GET_CODE(__hello__)}, + {"__hello_alias__", NULL, 0, false, GET_CODE(__hello__)}, + {"__phello_alias__", NULL, 0, true, GET_CODE(__hello__)}, + {"__phello_alias__.spam", NULL, 0, false, GET_CODE(__hello__)}, + {"__phello__", NULL, 0, true, GET_CODE(__phello__)}, + {"__phello__.__init__", NULL, 0, false, GET_CODE(__phello__)}, + {"__phello__.ham", NULL, 0, true, GET_CODE(__phello___ham)}, + {"__phello__.ham.__init__", NULL, 0, false, GET_CODE(__phello___ham)}, + {"__phello__.ham.eggs", NULL, 0, false, GET_CODE(__phello___ham_eggs)}, + {"__phello__.spam", NULL, 0, false, GET_CODE(__phello___spam)}, + {"__hello_only__", NULL, 0, false, GET_CODE(frozen_only)}, {0, 0, 0} /* test sentinel */ }; const struct _frozen *_PyImport_FrozenBootstrap = bootstrap_modules; diff --git a/Python/import.c b/Python/import.c index 51b779ca17c..be60c431f7e 100644 --- a/Python/import.c +++ b/Python/import.c @@ -1297,13 +1297,21 @@ find_frozen(PyObject *nameobj, struct frozen_info *info) info->nameobj = nameobj; // borrowed info->data = (const char *)p->code; info->get_code = p->get_code; - info->size = p->size < 0 ? -(p->size) : p->size; - info->is_package = p->size < 0 ? true : false; + info->size = p->size; + info->is_package = p->is_package; + if (p->size < 0) { + // backward compatibility with negative size values + info->size = -(p->size); + info->is_package = true; + } info->origname = name; info->is_alias = resolve_module_alias(name, _PyImport_FrozenAliases, &info->origname); } - + if (p->code == NULL && p->size == 0 && p->get_code != NULL) { + /* It is only deepfrozen. */ + return FROZEN_OKAY; + } if (p->code == NULL) { /* It is frozen but marked as un-importable. */ return FROZEN_EXCLUDED; @@ -2224,7 +2232,7 @@ _imp_get_frozen_object_impl(PyObject *module, PyObject *name, if (info.nameobj == NULL) { info.nameobj = name; } - if (info.size == 0) { + if (info.size == 0 && info.get_code == NULL) { /* Does not contain executable code. */ set_frozen_error(FROZEN_INVALID, name); return NULL; diff --git a/Tools/freeze/makefreeze.py b/Tools/freeze/makefreeze.py index d7d05db88a9..bc5f8567448 100644 --- a/Tools/freeze/makefreeze.py +++ b/Tools/freeze/makefreeze.py @@ -45,19 +45,19 @@ def makefreeze(base, dict, debug=0, entry_point=None, fail_import=()): print("freezing", mod, "...") str = marshal.dumps(m.__code__) size = len(str) + is_package = 'false' if m.__path__: - # Indicate package by negative size - size = -size - done.append((mod, mangled, size)) + is_package = 'true' + done.append((mod, mangled, size, is_package)) writecode(outfp, mangled, str) if debug: print("generating table of frozen modules") with bkfile.open(base + 'frozen.c', 'w') as outfp: - for mod, mangled, size in done: + for mod, mangled, size, _ in done: outfp.write('extern unsigned char M_%s[];\n' % mangled) outfp.write(header) - for mod, mangled, size in done: - outfp.write('\t{"%s", M_%s, %d},\n' % (mod, mangled, size)) + for mod, mangled, size, is_package in done: + outfp.write('\t{"%s", M_%s, %d, %s},\n' % (mod, mangled, size, is_package)) outfp.write('\n') # The following modules have a NULL code pointer, indicating # that the frozen program should not search for them on the host diff --git a/Tools/scripts/freeze_modules.py b/Tools/scripts/freeze_modules.py index 6d10758b528..03dcf939f97 100644 --- a/Tools/scripts/freeze_modules.py +++ b/Tools/scripts/freeze_modules.py @@ -9,7 +9,7 @@ import os import ntpath import posixpath import sys - +import argparse from update_file import updating_file_with_tmpfile @@ -463,14 +463,15 @@ def replace_block(lines, start_marker, end_marker, replacements, file): return lines[:start_pos + 1] + replacements + lines[end_pos:] -def regen_frozen(modules): +def regen_frozen(modules, deepfreeze_only: bool): headerlines = [] parentdir = os.path.dirname(FROZEN_FILE) - for src in _iter_sources(modules): - # Adding a comment to separate sections here doesn't add much, - # so we don't. - header = relpath_for_posix_display(src.frozenfile, parentdir) - headerlines.append(f'#include "{header}"') + if not deepfreeze_only: + for src in _iter_sources(modules): + # Adding a comment to separate sections here doesn't add much, + # so we don't. + header = relpath_for_posix_display(src.frozenfile, parentdir) + headerlines.append(f'#include "{header}"') externlines = [] bootstraplines = [] @@ -500,9 +501,13 @@ def regen_frozen(modules): externlines.append("extern PyObject *%s(void);" % get_code_name) symbol = mod.symbol - pkg = '-' if mod.ispkg else '' - line = ('{"%s", %s, %s(int)sizeof(%s), GET_CODE(%s)},' - ) % (mod.name, symbol, pkg, symbol, code_name) + pkg = 'true' if mod.ispkg else 'false' + if deepfreeze_only: + line = ('{"%s", NULL, 0, %s, GET_CODE(%s)},' + ) % (mod.name, pkg, code_name) + else: + line = ('{"%s", %s, (int)sizeof(%s), %s, GET_CODE(%s)},' + ) % (mod.name, symbol, symbol, pkg, code_name) lines.append(line) if mod.isalias: @@ -710,18 +715,19 @@ def regen_pcbuild(modules): ####################################### # the script -def main(): +def main(deepfreeze_only: bool): # Expand the raw specs, preserving order. modules = list(parse_frozen_specs()) # Regen build-related files. regen_makefile(modules) regen_pcbuild(modules) - regen_frozen(modules) + regen_frozen(modules, deepfreeze_only) if __name__ == '__main__': - argv = sys.argv[1:] - if argv: - sys.exit(f'ERROR: got unexpected args {argv}') - main() + parser = argparse.ArgumentParser() + parser.add_argument("--deepfreeze-only", action="store_true", + help="Only use deepfrozen modules", default=True) + args = parser.parse_args() + main(args.deepfreeze_only)