diff --git a/.gitattributes b/.gitattributes index 68566e89924..b9c08cdd7d6 100644 --- a/.gitattributes +++ b/.gitattributes @@ -46,8 +46,7 @@ Modules/clinic/*.h linguist-generated=true Objects/clinic/*.h linguist-generated=true PC/clinic/*.h linguist-generated=true Python/clinic/*.h linguist-generated=true -Python/importlib.h linguist-generated=true -Python/importlib_external.h linguist-generated=true +Python/frozen_modules/*.h linguist-generated=true Include/internal/pycore_ast.h linguist-generated=true Python/Python-ast.c linguist-generated=true Include/opcode.h linguist-generated=true diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c5d967dec7e..05bdf2445a2 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -71,6 +71,7 @@ jobs: make regen-stdlib-module-names - name: Check for changes run: | + git add -u changes=$(git status --porcelain) # Check for changes in regenerated files if ! test -z "$changes" diff --git a/.gitignore b/.gitignore index a96be679622..0ed4c8bdd0c 100644 --- a/.gitignore +++ b/.gitignore @@ -68,7 +68,7 @@ Modules/Setup.config Modules/Setup.local Modules/config.c Modules/ld_so_aix -Programs/_freeze_importlib +Programs/_freeze_module Programs/_testembed PC/python_nt*.h PC/pythonnt_rc*.h diff --git a/Doc/c-api/init.rst b/Doc/c-api/init.rst index 60564241bfd..2fcbcc8d77b 100644 --- a/Doc/c-api/init.rst +++ b/Doc/c-api/init.rst @@ -110,7 +110,7 @@ to 1 and ``-bb`` sets :c:data:`Py_BytesWarningFlag` to 2. Suppress error messages when calculating the module search path in :c:func:`Py_GetPath`. - Private flag used by ``_freeze_importlib`` and ``frozenmain`` programs. + Private flag used by ``_freeze_module`` and ``frozenmain`` programs. .. c:var:: int Py_HashRandomizationFlag diff --git a/Makefile.pre.in b/Makefile.pre.in index 1007f440759..804d0192bc5 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -574,8 +574,8 @@ coverage-lcov: @echo "lcov report at $(COVERAGE_REPORT)/index.html" @echo -# Force regeneration of parser and importlib -coverage-report: regen-token regen-importlib +# Force regeneration of parser and frozen modules +coverage-report: regen-token regen-frozen @ # build with coverage info $(MAKE) coverage @ # run tests, ignore failures @@ -734,45 +734,60 @@ Programs/_testembed: Programs/_testembed.o $(LIBRARY_DEPS) $(LINKCC) $(PY_CORE_LDFLAGS) $(LINKFORSHARED) -o $@ Programs/_testembed.o $(BLDLIBRARY) $(LIBS) $(MODLIBS) $(SYSLIBS) ############################################################################ -# Importlib +# frozen modules (including importlib) -Programs/_freeze_importlib.o: Programs/_freeze_importlib.c Makefile +Programs/_freeze_module.o: Programs/_freeze_module.c Makefile -Programs/_freeze_importlib: Programs/_freeze_importlib.o $(LIBRARY_OBJS_OMIT_FROZEN) - $(LINKCC) $(PY_CORE_LDFLAGS) -o $@ Programs/_freeze_importlib.o $(LIBRARY_OBJS_OMIT_FROZEN) $(LIBS) $(MODLIBS) $(SYSLIBS) +Programs/_freeze_module: Programs/_freeze_module.o $(LIBRARY_OBJS_OMIT_FROZEN) + $(LINKCC) $(PY_CORE_LDFLAGS) -o $@ Programs/_freeze_module.o $(LIBRARY_OBJS_OMIT_FROZEN) $(LIBS) $(MODLIBS) $(SYSLIBS) +Tools/scripts/freeze_modules.py: Programs/_freeze_module + +.PHONY: regen-frozen +regen-frozen: Tools/scripts/freeze_modules.py $(FROZEN_FILES) + $(PYTHON_FOR_REGEN) $(srcdir)/Tools/scripts/freeze_modules.py + @echo "The Makefile was updated, you may need to re-run make." + +# BEGIN: freezing modules + +Python/frozen_modules/importlib__bootstrap.h: $(srcdir)/Programs/_freeze_module $(srcdir)/Lib/importlib/_bootstrap.py + $(srcdir)/Programs/_freeze_module importlib._bootstrap \ + $(srcdir)/Lib/importlib/_bootstrap.py \ + $(srcdir)/Python/frozen_modules/importlib__bootstrap.h + +Python/frozen_modules/importlib__bootstrap_external.h: $(srcdir)/Programs/_freeze_module $(srcdir)/Lib/importlib/_bootstrap_external.py + $(srcdir)/Programs/_freeze_module importlib._bootstrap_external \ + $(srcdir)/Lib/importlib/_bootstrap_external.py \ + $(srcdir)/Python/frozen_modules/importlib__bootstrap_external.h + +Python/frozen_modules/zipimport.h: $(srcdir)/Programs/_freeze_module $(srcdir)/Lib/zipimport.py + $(srcdir)/Programs/_freeze_module zipimport \ + $(srcdir)/Lib/zipimport.py \ + $(srcdir)/Python/frozen_modules/zipimport.h + +Python/frozen_modules/hello.h: $(srcdir)/Programs/_freeze_module $(srcdir)/Tools/freeze/flag.py + $(srcdir)/Programs/_freeze_module hello \ + $(srcdir)/Tools/freeze/flag.py \ + $(srcdir)/Python/frozen_modules/hello.h + +# END: freezing modules + +# We keep this renamed target around for folks with muscle memory. .PHONY: regen-importlib -regen-importlib: Programs/_freeze_importlib - # Regenerate Python/importlib_external.h - # from Lib/importlib/_bootstrap_external.py using _freeze_importlib - ./Programs/_freeze_importlib importlib._bootstrap_external \ - $(srcdir)/Lib/importlib/_bootstrap_external.py \ - $(srcdir)/Python/importlib_external.h.new - $(UPDATE_FILE) $(srcdir)/Python/importlib_external.h $(srcdir)/Python/importlib_external.h.new - # Regenerate Python/importlib.h from Lib/importlib/_bootstrap.py - # using _freeze_importlib - ./Programs/_freeze_importlib importlib._bootstrap \ - $(srcdir)/Lib/importlib/_bootstrap.py \ - $(srcdir)/Python/importlib.h.new - $(UPDATE_FILE) $(srcdir)/Python/importlib.h $(srcdir)/Python/importlib.h.new - # Regenerate Python/importlib_zipimport.h from Lib/zipimport.py - # using _freeze_importlib - ./Programs/_freeze_importlib zipimport \ - $(srcdir)/Lib/zipimport.py \ - $(srcdir)/Python/importlib_zipimport.h.new - $(UPDATE_FILE) $(srcdir)/Python/importlib_zipimport.h $(srcdir)/Python/importlib_zipimport.h.new +regen-importlib: regen-frozen +############################################################################ +# ABI regen-limited-abi: all $(RUNSHARED) ./$(BUILDPYTHON) $(srcdir)/Tools/scripts/stable_abi.py --generate-all $(srcdir)/Misc/stable_abi.txt - ############################################################################ # Regenerate all generated files regen-all: regen-opcode regen-opcode-targets regen-typeslots \ - regen-token regen-ast regen-keyword regen-importlib clinic \ - regen-pegen-metaparser regen-pegen regen-frozen regen-test-frozenmain + regen-token regen-ast regen-keyword regen-frozen clinic \ + regen-pegen-metaparser regen-pegen regen-test-frozenmain @echo @echo "Note: make regen-stdlib-module-names and autoconf should be run manually" @@ -884,15 +899,6 @@ regen-opcode: $(srcdir)/Include/opcode.h.new $(UPDATE_FILE) $(srcdir)/Include/opcode.h $(srcdir)/Include/opcode.h.new -.PHONY: regen-frozen -regen-frozen: Programs/_freeze_importlib - # Regenerate code for frozen module "__hello__". - ./Programs/_freeze_importlib hello \ - $(srcdir)/Tools/freeze/flag.py \ - $(srcdir)/Python/frozen_hello.h.new - $(UPDATE_FILE) $(srcdir)/Python/frozen_hello.h \ - $(srcdir)/Python/frozen_hello.h.new - .PHONY: regen-token regen-token: # Regenerate Doc/library/token-list.inc from Grammar/Tokens @@ -995,8 +1001,15 @@ regen-opcode-targets: Python/ceval.o: $(srcdir)/Python/opcode_targets.h $(srcdir)/Python/ceval_gil.h \ $(srcdir)/Python/condvar.h -Python/frozen.o: $(srcdir)/Python/importlib.h $(srcdir)/Python/importlib_external.h \ - $(srcdir)/Python/importlib_zipimport.h $(srcdir)/Python/frozen_hello.h +# FROZEN_FILES is auto-generated by Tools/scripts/freeze_modules.py. +FROZEN_FILES = \ + $(srcdir)/Python/frozen_modules/importlib__bootstrap.h \ + $(srcdir)/Python/frozen_modules/importlib__bootstrap_external.h \ + $(srcdir)/Python/frozen_modules/zipimport.h \ + $(srcdir)/Python/frozen_modules/hello.h +# End FROZEN_FILES + +Python/frozen.o: $(FROZEN_FILES) # Generate DTrace probe macros, then rename them (PYTHON_ -> PyDTrace_) to # follow our naming conventions. dtrace(1) uses the output filename to generate @@ -1918,7 +1931,7 @@ clean-retain-profile: pycremoval find build -name '*.py[co]' -exec rm -f {} ';' || true -rm -f pybuilddir.txt -rm -f Lib/lib2to3/*Grammar*.pickle - -rm -f Programs/_testembed Programs/_freeze_importlib + -rm -f Programs/_testembed Programs/_freeze_module -find build -type f -a ! -name '*.gc??' -exec rm -f {} ';' -rm -f Include/pydtrace_probes.h -rm -f profile-gen-stamp diff --git a/Misc/NEWS.d/next/Build/2021-08-26-13-10-46.bpo-45019.e0mo49.rst b/Misc/NEWS.d/next/Build/2021-08-26-13-10-46.bpo-45019.e0mo49.rst new file mode 100644 index 00000000000..d11c6451462 --- /dev/null +++ b/Misc/NEWS.d/next/Build/2021-08-26-13-10-46.bpo-45019.e0mo49.rst @@ -0,0 +1,3 @@ +Generate lines in relevant files for frozen modules. Up until now each of +the files had to be edited manually. This change makes it easier to add to +and modify the frozen modules. diff --git a/PCbuild/_freeze_importlib.vcxproj b/PCbuild/_freeze_module.vcxproj similarity index 79% rename from PCbuild/_freeze_importlib.vcxproj rename to PCbuild/_freeze_module.vcxproj index e437412a161..a0bedf49e69 100644 --- a/PCbuild/_freeze_importlib.vcxproj +++ b/PCbuild/_freeze_module.vcxproj @@ -69,7 +69,7 @@ {19C0C13F-47CA-4432-AFF3-799A296A4DDC} Win32Proj - _freeze_importlib + _freeze_module false @@ -95,7 +95,7 @@ - + @@ -108,31 +108,33 @@ + importlib._bootstrap - $(IntDir)importlib.g.h - $(PySourcePath)Python\importlib.h + $(IntDir)importlib__bootstrap.g.h + $(PySourcePath)Python\frozen_modules\importlib__bootstrap.h importlib._bootstrap_external - $(IntDir)importlib_external.g.h - $(PySourcePath)Python\importlib_external.h + $(IntDir)importlib__bootstrap_external.g.h + $(PySourcePath)Python\frozen_modules\importlib__bootstrap_external.h zipimport - $(IntDir)importlib_zipimport.g.h - $(PySourcePath)Python\importlib_zipimport.h + $(IntDir)zipimport.g.h + $(PySourcePath)Python\frozen_modules\zipimport.h hello - $(IntDir)frozen_hello.g.h - $(PySourcePath)Python\frozen_hello.h + $(IntDir)ello.g.h + $(PySourcePath)Python\frozen_modules\hello.h + - + - - - + - + + + diff --git a/PCbuild/_freeze_importlib.vcxproj.filters b/PCbuild/_freeze_module.vcxproj.filters similarity index 76% rename from PCbuild/_freeze_importlib.vcxproj.filters rename to PCbuild/_freeze_module.vcxproj.filters index 3ee9eb750d6..bed7920fdba 100644 --- a/PCbuild/_freeze_importlib.vcxproj.filters +++ b/PCbuild/_freeze_module.vcxproj.filters @@ -10,19 +10,24 @@ - + Source Files + - Source Files - - Python Files Python Files + + Python Files + + + Python Files + + - \ No newline at end of file + diff --git a/PCbuild/pcbuild.proj b/PCbuild/pcbuild.proj index 8e7088d47d2..f464ad3b18e 100644 --- a/PCbuild/pcbuild.proj +++ b/PCbuild/pcbuild.proj @@ -72,8 +72,8 @@ false - - + + diff --git a/PCbuild/pcbuild.sln b/PCbuild/pcbuild.sln index 3507b972797..c774e049717 100644 --- a/PCbuild/pcbuild.sln +++ b/PCbuild/pcbuild.sln @@ -75,7 +75,7 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pywlauncher", "pywlauncher. {7B2727B5-5A3F-40EE-A866-43A13CD31446} = {7B2727B5-5A3F-40EE-A866-43A13CD31446} EndProjectSection EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "_freeze_importlib", "_freeze_importlib.vcxproj", "{19C0C13F-47CA-4432-AFF3-799A296A4DDC}" +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "_freeze_module", "_freeze_module.vcxproj", "{19C0C13F-47CA-4432-AFF3-799A296A4DDC}" EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "_overlapped", "_overlapped.vcxproj", "{EB6E69DD-04BF-4543-9B92-49FAABCEAC2E}" EndProject diff --git a/PCbuild/readme.txt b/PCbuild/readme.txt index 6c25522ea48..5ecded06e58 100644 --- a/PCbuild/readme.txt +++ b/PCbuild/readme.txt @@ -115,9 +115,10 @@ _testembed These are miscellaneous sub-projects that don't really fit the other categories: -_freeze_importlib - _freeze_importlib.exe, used to regenerate Python\importlib.h after - changes have been made to Lib\importlib\_bootstrap.py +_freeze_module + _freeze_module.exe, used to regenerate frozen modules in Python + after changes have been made to the corresponding source files + (e.g. Lib\importlib\_bootstrap.py). pyshellext pyshellext.dll, the shell extension deployed with the launcher python3dll diff --git a/Programs/_freeze_importlib.c b/Programs/_freeze_module.c similarity index 53% rename from Programs/_freeze_importlib.c rename to Programs/_freeze_module.c index 2e4ccbb154a..7e9f02aec8a 100644 --- a/Programs/_freeze_importlib.c +++ b/Programs/_freeze_module.c @@ -1,5 +1,10 @@ /* This is built as a stand-alone executable by the Makefile, and helps turn - Lib/importlib/_bootstrap.py into a frozen module in Python/importlib.h + modules into frozen modules (like Lib/importlib/_bootstrap.py + into Python/importlib.h). + + This is used directly by Tools/scripts/freeze_modules.py, and indirectly by "make regen-frozen". + + See Python/frozen.c for more info. */ #include @@ -28,54 +33,11 @@ const struct _frozen *PyImport_FrozenModules; #endif static const char header[] = - "/* Auto-generated by Programs/_freeze_importlib.c */"; + "/* Auto-generated by Programs/_freeze_module.c */"; -int -main(int argc, char *argv[]) +static void +runtime_init(void) { - const char *name, *inpath, *outpath; - char buf[100]; - FILE *infile = NULL, *outfile = NULL; - struct _Py_stat_struct stat; - size_t text_size, data_size, i, n; - char *text = NULL; - unsigned char *data; - PyObject *code = NULL, *marshalled = NULL; - - PyImport_FrozenModules = _PyImport_FrozenModules; - - if (argc != 4) { - fprintf(stderr, "need to specify the name, input and output paths\n"); - return 2; - } - name = argv[1]; - inpath = argv[2]; - outpath = argv[3]; - infile = fopen(inpath, "rb"); - if (infile == NULL) { - fprintf(stderr, "cannot open '%s' for reading\n", inpath); - goto error; - } - if (_Py_fstat_noraise(fileno(infile), &stat)) { - fprintf(stderr, "cannot fstat '%s'\n", inpath); - goto error; - } - text_size = (size_t)stat.st_size; - text = (char *) malloc(text_size + 1); - if (text == NULL) { - fprintf(stderr, "could not allocate %ld bytes\n", (long) text_size); - goto error; - } - n = fread(text, 1, text_size, infile); - fclose(infile); - infile = NULL; - if (n < text_size) { - fprintf(stderr, "read too short: got %ld instead of %ld bytes\n", - (long) n, (long) text_size); - goto error; - } - text[text_size] = '\0'; - PyConfig config; PyConfig_InitIsolatedConfig(&config); @@ -83,7 +45,7 @@ main(int argc, char *argv[]) PyStatus status; status = PyConfig_SetString(&config, &config.program_name, - L"./_freeze_importlib"); + L"./_freeze_module"); if (PyStatus_Exception(status)) { PyConfig_Clear(&config); Py_ExitStatusException(status); @@ -98,39 +60,93 @@ main(int argc, char *argv[]) if (PyStatus_Exception(status)) { Py_ExitStatusException(status); } +} - sprintf(buf, "", name); - code = Py_CompileStringExFlags(text, buf, Py_file_input, NULL, 0); - if (code == NULL) - goto error; - free(text); - text = NULL; - - marshalled = PyMarshal_WriteObjectToString(code, Py_MARSHAL_VERSION); - Py_CLEAR(code); - if (marshalled == NULL) - goto error; - - assert(PyBytes_CheckExact(marshalled)); - data = (unsigned char *) PyBytes_AS_STRING(marshalled); - data_size = PyBytes_GET_SIZE(marshalled); - - /* Open the file in text mode. The hg checkout should be using the eol extension, - which in turn should cause the EOL style match the C library's text mode */ - outfile = fopen(outpath, "w"); - if (outfile == NULL) { - fprintf(stderr, "cannot open '%s' for writing\n", outpath); - goto error; +static const char * +read_text(const char *inpath) +{ + FILE *infile = fopen(inpath, "rb"); + if (infile == NULL) { + fprintf(stderr, "cannot open '%s' for reading\n", inpath); + return NULL; } - fprintf(outfile, "%s\n", header); - for (i = n = 0; name[i] != '\0'; i++) { - if (name[i] != '.') { - buf[n++] = name[i]; + + struct _Py_stat_struct stat; + if (_Py_fstat_noraise(fileno(infile), &stat)) { + fprintf(stderr, "cannot fstat '%s'\n", inpath); + fclose(infile); + return NULL; + } + size_t text_size = (size_t)stat.st_size; + + char *text = (char *) malloc(text_size + 1); + if (text == NULL) { + fprintf(stderr, "could not allocate %ld bytes\n", (long) text_size); + fclose(infile); + return NULL; + } + size_t n = fread(text, 1, text_size, infile); + fclose(infile); + + if (n < text_size) { + fprintf(stderr, "read too short: got %ld instead of %ld bytes\n", + (long) n, (long) text_size); + free(text); + return NULL; + } + + text[text_size] = '\0'; + return (const char *)text; +} + +static PyObject * +compile_and_marshal(const char *name, const char *text) +{ + char *filename = (char *) malloc(strlen(name) + 10); + sprintf(filename, "", name); + PyObject *code = Py_CompileStringExFlags(text, filename, + Py_file_input, NULL, 0); + free(filename); + if (code == NULL) { + return NULL; + } + + PyObject *marshalled = PyMarshal_WriteObjectToString(code, Py_MARSHAL_VERSION); + Py_CLEAR(code); + if (marshalled == NULL) { + return NULL; + } + assert(PyBytes_CheckExact(marshalled)); + + return marshalled; +} + +static char * +get_varname(const char *name, const char *prefix) +{ + size_t n = strlen(prefix); + char *varname = (char *) malloc(strlen(name) + n + 1); + (void)strcpy(varname, prefix); + for (size_t i = 0; name[i] != '\0'; i++) { + if (name[i] == '.') { + varname[n++] = '_'; + } + else { + varname[n++] = name[i]; } } - buf[n] = '\0'; - fprintf(outfile, "const unsigned char _Py_M__%s[] = {\n", buf); - for (n = 0; n < data_size; n += 16) { + varname[n] = '\0'; + return varname; +} + +static void +write_code(FILE *outfile, PyObject *marshalled, const char *varname) +{ + unsigned char *data = (unsigned char *) PyBytes_AS_STRING(marshalled); + size_t data_size = PyBytes_GET_SIZE(marshalled); + + fprintf(outfile, "const unsigned char %s[] = {\n", varname); + for (size_t n = 0; n < data_size; n += 16) { size_t i, end = Py_MIN(n + 16, data_size); fprintf(outfile, " "); for (i = n; i < end; i++) { @@ -139,29 +155,72 @@ main(int argc, char *argv[]) fprintf(outfile, "\n"); } fprintf(outfile, "};\n"); +} - Py_CLEAR(marshalled); +static int +write_frozen(const char *outpath, const char *inpath, const char *name, + PyObject *marshalled) +{ + /* Open the file in text mode. The hg checkout should be using the eol extension, + which in turn should cause the EOL style match the C library's text mode */ + FILE *outfile = fopen(outpath, "w"); + if (outfile == NULL) { + fprintf(stderr, "cannot open '%s' for writing\n", outpath); + return -1; + } + + fprintf(outfile, "%s\n", header); + char *arrayname = get_varname(name, "_Py_M__"); + write_code(outfile, marshalled, arrayname); + free(arrayname); + + if (ferror(outfile)) { + fprintf(stderr, "error when writing to '%s'\n", outpath); + return -1; + } + fclose(outfile); + return 0; +} + +int +main(int argc, char *argv[]) +{ + const char *name, *inpath, *outpath; + + PyImport_FrozenModules = _PyImport_FrozenModules; + + if (argc != 4) { + fprintf(stderr, "need to specify the name, input and output paths\n"); + return 2; + } + name = argv[1]; + inpath = argv[2]; + outpath = argv[3]; + + runtime_init(); + + const char *text = read_text(inpath); + if (text == NULL) { + goto error; + } + + PyObject *marshalled = compile_and_marshal(name, text); + free((char *)text); + if (marshalled == NULL) { + goto error; + } + + int res = write_frozen(outpath, inpath, name, marshalled); + Py_DECREF(marshalled); + if (res != 0) { + goto error; + } Py_Finalize(); - if (outfile) { - if (ferror(outfile)) { - fprintf(stderr, "error when writing to '%s'\n", outpath); - goto error; - } - fclose(outfile); - } return 0; error: PyErr_Print(); Py_Finalize(); - if (infile) - fclose(infile); - if (outfile) - fclose(outfile); - if (text) - free(text); - if (marshalled) - Py_DECREF(marshalled); return 1; } diff --git a/Python/frozen.c b/Python/frozen.c index 7f433ff80ca..67aff2ed2eb 100644 --- a/Python/frozen.c +++ b/Python/frozen.c @@ -1,35 +1,63 @@ -/* Frozen modules initializer */ - -#include "Python.h" -#include "importlib.h" -#include "importlib_external.h" -#include "importlib_zipimport.h" +/* Frozen modules initializer + * + * Frozen modules are written to header files by Programs/_freeze_module. + * These files are typically put in Python/frozen_modules/. Each holds + * an array of bytes named "_Py_M__", which is used below. + * + * These files must be regenerated any time the corresponding .pyc + * file would change (including with changes to the compiler, bytecode + * format, marshal format). This can be done with "make regen-frozen". + * That make target just runs Tools/scripts/freeze_modules.py. + * + * The freeze_modules.py script also determines which modules get + * frozen. Update the list at the top of the script to add, remove, + * or modify the target modules. Then run the script + * (or run "make regen-frozen"). + * + * The script does the following: + * + * 1. run Programs/_freeze_module on the target modules + * 2. update the includes and _PyImport_FrozenModules[] in this file + * 3. update the FROZEN_FILES variable in Makefile.pre.in + * 4. update the per-module targets in Makefile.pre.in + * 5. update the lists of modules in PCbuild/_freeze_module.vcxproj and + * PCbuild/_freeze_module.vcxproj.filters + * + * (Note that most of the data in this file is auto-generated by the script.) + * + * Those steps can also be done manually, though this is not recommended. + * Expect such manual changes to be removed the next time + * freeze_modules.py runs. + * */ /* In order to test the support for frozen modules, by default we - define a single frozen module, __hello__. Loading it will print - some famous words... */ + define some simple frozen modules: __hello__, __phello__ (a package), + and __phello__.spam. Loading any will print some famous words... */ -/* Run "make regen-frozen" to regen the file below (e.g. after a bytecode - * format change). The include file defines _Py_M__hello as an array of bytes. - */ -#include "frozen_hello.h" +#include "Python.h" -#define SIZE (int)sizeof(_Py_M__hello) +/* Includes for frozen modules: */ +#include "frozen_modules/importlib__bootstrap.h" +#include "frozen_modules/importlib__bootstrap_external.h" +#include "frozen_modules/zipimport.h" +#include "frozen_modules/hello.h" +/* End includes */ + +/* Note that a negative size indicates a package. */ static const struct _frozen _PyImport_FrozenModules[] = { /* importlib */ - {"_frozen_importlib", _Py_M__importlib_bootstrap, - (int)sizeof(_Py_M__importlib_bootstrap)}, - {"_frozen_importlib_external", _Py_M__importlib_bootstrap_external, - (int)sizeof(_Py_M__importlib_bootstrap_external)}, - {"zipimport", _Py_M__zipimport, - (int)sizeof(_Py_M__zipimport)}, + {"_frozen_importlib", _Py_M__importlib__bootstrap, + (int)sizeof(_Py_M__importlib__bootstrap)}, + {"_frozen_importlib_external", _Py_M__importlib__bootstrap_external, + (int)sizeof(_Py_M__importlib__bootstrap_external)}, + {"zipimport", _Py_M__zipimport, (int)sizeof(_Py_M__zipimport)}, + /* Test module */ - {"__hello__", _Py_M__hello, SIZE}, - /* Test package (negative size indicates package-ness) */ - {"__phello__", _Py_M__hello, -SIZE}, - {"__phello__.spam", _Py_M__hello, SIZE}, + {"__hello__", _Py_M__hello, (int)sizeof(_Py_M__hello)}, + {"__phello__", _Py_M__hello, -(int)sizeof(_Py_M__hello)}, + {"__phello__.spam", _Py_M__hello, (int)sizeof(_Py_M__hello)}, {0, 0, 0} /* sentinel */ }; diff --git a/Python/frozen_hello.h b/Python/frozen_modules/hello.h similarity index 91% rename from Python/frozen_hello.h rename to Python/frozen_modules/hello.h index c65c661e9bf..2658c05886a 100644 --- a/Python/frozen_hello.h +++ b/Python/frozen_modules/hello.h @@ -1,4 +1,4 @@ -/* Auto-generated by Programs/_freeze_importlib.c */ +/* Auto-generated by Programs/_freeze_module.c */ const unsigned char _Py_M__hello[] = { 99,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0, 0,0,0,0,0,115,16,0,0,0,100,0,90,0,101,1, diff --git a/Python/importlib.h b/Python/frozen_modules/importlib__bootstrap.h similarity index 99% rename from Python/importlib.h rename to Python/frozen_modules/importlib__bootstrap.h index 69bd9727237..2716896c21f 100644 --- a/Python/importlib.h +++ b/Python/frozen_modules/importlib__bootstrap.h @@ -1,5 +1,5 @@ -/* Auto-generated by Programs/_freeze_importlib.c */ -const unsigned char _Py_M__importlib_bootstrap[] = { +/* Auto-generated by Programs/_freeze_module.c */ +const unsigned char _Py_M__importlib__bootstrap[] = { 99,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0, 0,0,0,0,0,115,130,1,0,0,100,0,90,0,100,1, 132,0,90,1,100,2,90,2,100,2,90,3,100,2,90,4, diff --git a/Python/importlib_external.h b/Python/frozen_modules/importlib__bootstrap_external.h similarity index 99% rename from Python/importlib_external.h rename to Python/frozen_modules/importlib__bootstrap_external.h index c49fa5516eb..7a3410067d4 100644 --- a/Python/importlib_external.h +++ b/Python/frozen_modules/importlib__bootstrap_external.h @@ -1,5 +1,5 @@ -/* Auto-generated by Programs/_freeze_importlib.c */ -const unsigned char _Py_M__importlib_bootstrap_external[] = { +/* Auto-generated by Programs/_freeze_module.c */ +const unsigned char _Py_M__importlib__bootstrap_external[] = { 99,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0, 0,0,0,0,0,115,158,2,0,0,100,0,90,0,100,1, 97,1,100,2,100,1,108,2,90,2,100,2,100,1,108,3, diff --git a/Python/importlib_zipimport.h b/Python/frozen_modules/zipimport.h similarity index 99% rename from Python/importlib_zipimport.h rename to Python/frozen_modules/zipimport.h index c12ed5215b3..b4e2e85283c 100644 --- a/Python/importlib_zipimport.h +++ b/Python/frozen_modules/zipimport.h @@ -1,4 +1,4 @@ -/* Auto-generated by Programs/_freeze_importlib.c */ +/* Auto-generated by Programs/_freeze_module.c */ const unsigned char _Py_M__zipimport[] = { 99,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0, 0,0,0,0,0,115,48,1,0,0,100,0,90,0,100,1, diff --git a/Tools/scripts/freeze_modules.py b/Tools/scripts/freeze_modules.py new file mode 100644 index 00000000000..4f60e1b9a3a --- /dev/null +++ b/Tools/scripts/freeze_modules.py @@ -0,0 +1,496 @@ +"""Freeze modules and regen related files (e.g. Python/frozen.c). + +See the notes at the top of Python/frozen.c for more info. +""" + +import os +import os.path +import subprocess +import sys +import textwrap + +from update_file import updating_file_with_tmpfile + + +SCRIPTS_DIR = os.path.abspath(os.path.dirname(__file__)) +TOOLS_DIR = os.path.dirname(SCRIPTS_DIR) +ROOT_DIR = os.path.dirname(TOOLS_DIR) + +STDLIB_DIR = os.path.join(ROOT_DIR, 'Lib') +# If MODULES_DIR is changed then the .gitattributes file needs to be updated. +MODULES_DIR = os.path.join(ROOT_DIR, 'Python/frozen_modules') +TOOL = os.path.join(ROOT_DIR, 'Programs', '_freeze_module') + +FROZEN_FILE = os.path.join(ROOT_DIR, 'Python', 'frozen.c') +MAKEFILE = os.path.join(ROOT_DIR, 'Makefile.pre.in') +PCBUILD_PROJECT = os.path.join(ROOT_DIR, 'PCbuild', '_freeze_module.vcxproj') +PCBUILD_FILTERS = os.path.join(ROOT_DIR, 'PCbuild', '_freeze_module.vcxproj.filters') + +# These are modules that get frozen. +FROZEN = [ + # See parse_frozen_spec() for the format. + # In cases where the frozenid is duplicated, the first one is re-used. + ('importlib', [ + 'importlib._bootstrap : _frozen_importlib', + 'importlib._bootstrap_external : _frozen_importlib_external', + 'zipimport', + ]), + ('Test module', [ + 'hello : __hello__ = ' + os.path.join(TOOLS_DIR, 'freeze', 'flag.py'), + 'hello : <__phello__>', + 'hello : __phello__.spam', + ]), +] + + +####################################### +# specs + +def parse_frozen_spec(rawspec, knownids=None, section=None): + """Yield (frozenid, pyfile, modname, ispkg) for the corresponding modules. + + Supported formats: + + frozenid + frozenid : modname + frozenid : modname = pyfile + + "frozenid" and "modname" must be valid module names (dot-separated + identifiers). If "modname" is not provided then "frozenid" is used. + If "pyfile" is not provided then the filename of the module + corresponding to "frozenid" is used. + + Angle brackets around a frozenid (e.g. '") indicate + it is a package. This also means it must be an actual module + (i.e. "pyfile" cannot have been provided). Such values can have + patterns to expand submodules: + + - also freeze all direct submodules + - also freeze the full submodule tree + + As with "frozenid", angle brackets around "modname" indicate + it is a package. However, in this case "pyfile" should not + have been provided and patterns in "modname" are not supported. + Also, if "modname" has brackets then "frozenid" should not, + and "pyfile" should have been provided.. + """ + frozenid, _, remainder = rawspec.partition(':') + modname, _, pyfile = remainder.partition('=') + frozenid = frozenid.strip() + modname = modname.strip() + pyfile = pyfile.strip() + + submodules = None + if modname.startswith('<') and modname.endswith('>'): + assert check_modname(frozenid), rawspec + modname = modname[1:-1] + assert check_modname(modname), rawspec + if frozenid in knownids: + pass + elif pyfile: + assert not os.path.isdir(pyfile), rawspec + else: + pyfile = _resolve_module(frozenid, ispkg=False) + ispkg = True + elif pyfile: + assert check_modname(frozenid), rawspec + assert not knownids or frozenid not in knownids, rawspec + assert check_modname(modname), rawspec + assert not os.path.isdir(pyfile), rawspec + ispkg = False + elif knownids and frozenid in knownids: + assert check_modname(frozenid), rawspec + assert check_modname(modname), rawspec + ispkg = False + else: + assert not modname or check_modname(modname), rawspec + resolved = iter(resolve_modules(frozenid)) + frozenid, pyfile, ispkg = next(resolved) + if not modname: + modname = frozenid + if ispkg: + pkgid = frozenid + pkgname = modname + def iter_subs(): + for frozenid, pyfile, ispkg in resolved: + assert not knownids or frozenid not in knownids, (frozenid, rawspec) + if pkgname: + modname = frozenid.replace(pkgid, pkgname, 1) + else: + modname = frozenid + yield frozenid, pyfile, modname, ispkg, section + submodules = iter_subs() + + spec = (frozenid, pyfile or None, modname, ispkg, section) + return spec, submodules + + +def parse_frozen_specs(rawspecs=FROZEN): + seen = set() + for section, _specs in rawspecs: + for spec in _parse_frozen_specs(_specs, section, seen): + frozenid = spec[0] + yield spec + seen.add(frozenid) + + +def _parse_frozen_specs(rawspecs, section, seen): + for rawspec in rawspecs: + spec, subs = parse_frozen_spec(rawspec, seen, section) + yield spec + for spec in subs or (): + yield spec + + +def resolve_frozen_file(spec, destdir=MODULES_DIR): + if isinstance(spec, str): + modname = spec + else: + _, frozenid, _, _, _= spec + modname = frozenid + # We use a consistent naming convention for all frozen modules. + return os.path.join(destdir, modname.replace('.', '_')) + '.h' + + +def resolve_frozen_files(specs, destdir=MODULES_DIR): + frozen = {} + frozenids = [] + lastsection = None + for spec in specs: + frozenid, pyfile, *_, section = spec + if frozenid in frozen: + if section is None: + lastsection = None + else: + assert section == lastsection + continue + lastsection = section + frozenfile = resolve_frozen_file(frozenid, destdir) + frozen[frozenid] = (pyfile, frozenfile) + frozenids.append(frozenid) + return frozen, frozenids + + +####################################### +# generic helpers + +def resolve_modules(modname, pyfile=None): + if modname.startswith('<') and modname.endswith('>'): + if pyfile: + assert os.path.isdir(pyfile) or os.path.basename(pyfile) == '__init__.py', pyfile + ispkg = True + modname = modname[1:-1] + rawname = modname + # For now, we only expect match patterns at the end of the name. + _modname, sep, match = modname.rpartition('.') + if sep: + if _modname.endswith('.**'): + modname = _modname[:-3] + match = f'**.{match}' + elif match and not match.isidentifier(): + modname = _modname + # Otherwise it's a plain name so we leave it alone. + else: + match = None + else: + ispkg = False + rawname = modname + match = None + + if not check_modname(modname): + raise ValueError(f'not a valid module name ({rawname})') + + if not pyfile: + pyfile = _resolve_module(modname, ispkg=ispkg) + elif os.path.isdir(pyfile): + pyfile = _resolve_module(modname, pyfile, ispkg) + yield modname, pyfile, ispkg + + if match: + pkgdir = os.path.dirname(pyfile) + yield from iter_submodules(modname, pkgdir, match) + + +def check_modname(modname): + return all(n.isidentifier() for n in modname.split('.')) + + +def iter_submodules(pkgname, pkgdir=None, match='*'): + if not pkgdir: + pkgdir = os.path.join(STDLIB_DIR, *pkgname.split('.')) + if not match: + match = '**.*' + match_modname = _resolve_modname_matcher(match, pkgdir) + + def _iter_submodules(pkgname, pkgdir): + for entry in sorted(os.scandir(pkgdir), key=lambda e: e.name): + matched, recursive = match_modname(entry.name) + if not matched: + continue + modname = f'{pkgname}.{entry.name}' + if modname.endswith('.py'): + yield modname[:-3], entry.path, False + elif entry.is_dir(): + pyfile = os.path.join(entry.path, '__init__.py') + # We ignore namespace packages. + if os.path.exists(pyfile): + yield modname, pyfile, True + if recursive: + yield from _iter_submodules(modname, entry.path) + + return _iter_submodules(pkgname, pkgdir) + + +def _resolve_modname_matcher(match, rootdir=None): + if isinstance(match, str): + if match.startswith('**.'): + recursive = True + pat = match[3:] + assert match + else: + recursive = False + pat = match + + if pat == '*': + def match_modname(modname): + return True, recursive + else: + raise NotImplementedError(match) + elif callable(match): + match_modname = match(rootdir) + else: + raise ValueError(f'unsupported matcher {match!r}') + return match_modname + + +def _resolve_module(modname, pathentry=STDLIB_DIR, ispkg=False): + assert pathentry, pathentry + pathentry = os.path.normpath(pathentry) + assert os.path.isabs(pathentry) + if ispkg: + return os.path.join(pathentry, *modname.split('.'), '__init__.py') + return os.path.join(pathentry, *modname.split('.')) + '.py' + + +####################################### +# regenerating dependent files + +def find_marker(lines, marker, file): + for pos, line in enumerate(lines): + if marker in line: + return pos + raise Exception(f"Can't find {marker!r} in file {file}") + + +def replace_block(lines, start_marker, end_marker, replacements, file): + start_pos = find_marker(lines, start_marker, file) + end_pos = find_marker(lines, end_marker, file) + if end_pos <= start_pos: + raise Exception(f"End marker {end_marker!r} " + f"occurs before start marker {start_marker!r} " + f"in file {file}") + replacements = [line.rstrip() + os.linesep for line in replacements] + return lines[:start_pos + 1] + replacements + lines[end_pos:] + + +def regen_frozen(specs, dest=MODULES_DIR): + if isinstance(dest, str): + frozen, frozenids = resolve_frozen_files(specs, destdir) + else: + frozenids, frozen = dest + + headerlines = [] + parentdir = os.path.dirname(FROZEN_FILE) + for frozenid in frozenids: + # Adding a comment to separate sections here doesn't add much, + # so we don't. + _, frozenfile = frozen[frozenid] + header = os.path.relpath(frozenfile, parentdir) + headerlines.append(f'#include "{header}"') + + deflines = [] + indent = ' ' + lastsection = None + for spec in specs: + frozenid, _, modname, ispkg, section = spec + if section != lastsection: + if lastsection is not None: + deflines.append('') + deflines.append(f'/* {section} */') + lastsection = section + + # This matches what we do in Programs/_freeze_module.c: + name = frozenid.replace('.', '_') + symbol = '_Py_M__' + name + pkg = '-' if ispkg else '' + line = ('{"%s", %s, %s(int)sizeof(%s)},' + % (modname, symbol, pkg, symbol)) + # TODO: Consider not folding lines + if len(line) < 80: + deflines.append(line) + else: + line1, _, line2 = line.rpartition(' ') + deflines.append(line1) + deflines.append(indent + line2) + + if not deflines[0]: + del deflines[0] + for i, line in enumerate(deflines): + if line: + deflines[i] = indent + line + + print(f'# Updating {os.path.relpath(FROZEN_FILE)}') + with updating_file_with_tmpfile(FROZEN_FILE) as (infile, outfile): + lines = infile.readlines() + # TODO: Use more obvious markers, e.g. + # $START GENERATED FOOBAR$ / $END GENERATED FOOBAR$ + lines = replace_block( + lines, + "/* Includes for frozen modules: */", + "/* End includes */", + headerlines, + FROZEN_FILE, + ) + lines = replace_block( + lines, + "static const struct _frozen _PyImport_FrozenModules[] =", + "/* sentinel */", + deflines, + FROZEN_FILE, + ) + outfile.writelines(lines) + + +def regen_makefile(frozenids, frozen): + frozenfiles = [] + rules = [''] + for frozenid in frozenids: + pyfile, frozenfile = frozen[frozenid] + header = os.path.relpath(frozenfile, ROOT_DIR) + relfile = header.replace('\\', '/') + frozenfiles.append(f'\t\t$(srcdir)/{relfile} \\') + + _pyfile = os.path.relpath(pyfile, ROOT_DIR) + tmpfile = f'{header}.new' + # Note that we freeze the module to the target .h file + # instead of going through an intermediate file like we used to. + rules.append(f'{header}: $(srcdir)/Programs/_freeze_module $(srcdir)/{_pyfile}') + rules.append(f'\t$(srcdir)/Programs/_freeze_module {frozenid} \\') + rules.append(f'\t\t$(srcdir)/{_pyfile} \\') + rules.append(f'\t\t$(srcdir)/{header}') + rules.append('') + + frozenfiles[-1] = frozenfiles[-1].rstrip(" \\") + + print(f'# Updating {os.path.relpath(MAKEFILE)}') + with updating_file_with_tmpfile(MAKEFILE) as (infile, outfile): + lines = infile.readlines() + lines = replace_block( + lines, + "FROZEN_FILES =", + "# End FROZEN_FILES", + frozenfiles, + MAKEFILE, + ) + lines = replace_block( + lines, + "# BEGIN: freezing modules", + "# END: freezing modules", + rules, + MAKEFILE, + ) + outfile.writelines(lines) + + +def regen_pcbuild(frozenids, frozen): + projlines = [] + filterlines = [] + for frozenid in frozenids: + pyfile, frozenfile = frozen[frozenid] + + _pyfile = os.path.relpath(pyfile, ROOT_DIR).replace('/', '\\') + header = os.path.relpath(frozenfile, ROOT_DIR).replace('/', '\\') + intfile = header.split('\\')[-1].strip('.h') + '.g.h' + projlines.append(f' ') + projlines.append(f' {frozenid}') + projlines.append(f' $(IntDir){intfile}') + projlines.append(f' $(PySourcePath){header}') + projlines.append(f' ') + + filterlines.append(f' ') + filterlines.append(' Python Files') + filterlines.append(' ') + + print(f'# Updating {os.path.relpath(PCBUILD_PROJECT)}') + with updating_file_with_tmpfile(PCBUILD_PROJECT) as (infile, outfile): + lines = infile.readlines() + lines = replace_block( + lines, + '', + '', + projlines, + PCBUILD_PROJECT, + ) + outfile.writelines(lines) + print(f'# Updating {os.path.relpath(PCBUILD_FILTERS)}') + with updating_file_with_tmpfile(PCBUILD_FILTERS) as (infile, outfile): + lines = infile.readlines() + lines = replace_block( + lines, + '', + '', + filterlines, + PCBUILD_FILTERS, + ) + outfile.writelines(lines) + + +####################################### +# freezing modules + +def freeze_module(modname, pyfile=None, destdir=MODULES_DIR): + """Generate the frozen module .h file for the given module.""" + for modname, pyfile, ispkg in resolve_modules(modname, pyfile): + frozenfile = _resolve_frozen(modname, destdir) + _freeze_module(modname, pyfile, frozenfile) + + +def _freeze_module(frozenid, pyfile, frozenfile): + tmpfile = frozenfile + '.new' + + argv = [TOOL, frozenid, pyfile, tmpfile] + print('#', ' '.join(os.path.relpath(a) for a in argv)) + try: + subprocess.run(argv, check=True) + except subprocess.CalledProcessError: + if not os.path.exists(TOOL): + sys.exit(f'ERROR: missing {TOOL}; you need to run "make regen-frozen"') + raise # re-raise + + os.replace(tmpfile, frozenfile) + + +####################################### +# the script + +def main(): + # Expand the raw specs, preserving order. + specs = list(parse_frozen_specs()) + frozen, frozenids = resolve_frozen_files(specs, MODULES_DIR) + + # Regen build-related files. + regen_frozen(specs, (frozenids, frozen)) + regen_makefile(frozenids, frozen) + regen_pcbuild(frozenids, frozen) + + # Freeze the target modules. + for frozenid in frozenids: + pyfile, frozenfile = frozen[frozenid] + _freeze_module(frozenid, pyfile, frozenfile) + + +if __name__ == '__main__': + argv = sys.argv[1:] + if argv: + sys.exit('ERROR: got unexpected args {argv}') + main() diff --git a/Tools/scripts/update_file.py b/Tools/scripts/update_file.py index 224585c69bb..cfc4e2b1ab1 100644 --- a/Tools/scripts/update_file.py +++ b/Tools/scripts/update_file.py @@ -6,23 +6,47 @@ This avoids wholesale rebuilds when a code (re)generation phase does not actually change the in-tree generated code. """ +import contextlib import os +import os.path import sys -def main(old_path, new_path): - with open(old_path, 'rb') as f: +@contextlib.contextmanager +def updating_file_with_tmpfile(filename, tmpfile=None): + """A context manager for updating a file via a temp file. + + The context manager provides two open files: the source file open + for reading, and the temp file, open for writing. + + Upon exiting: both files are closed, and the source file is replaced + with the temp file. + """ + # XXX Optionally use tempfile.TemporaryFile? + if not tmpfile: + tmpfile = filename + '.tmp' + elif os.path.isdir(tmpfile): + tmpfile = os.path.join(tmpfile, filename + '.tmp') + + with open(tmpfile, 'w') as outfile: + with open(filename) as infile: + yield infile, outfile + update_file_with_tmpfile(filename, tmpfile) + + +def update_file_with_tmpfile(filename, tmpfile): + with open(filename, 'rb') as f: old_contents = f.read() - with open(new_path, 'rb') as f: + with open(tmpfile, 'rb') as f: new_contents = f.read() if old_contents != new_contents: - os.replace(new_path, old_path) + os.replace(tmpfile, filename) else: - os.unlink(new_path) + os.unlink(tmpfile) if __name__ == '__main__': if len(sys.argv) != 3: print("Usage: %s " % (sys.argv[0],)) sys.exit(1) - main(sys.argv[1], sys.argv[2]) + update_file_with_tmpfile(sys.argv[1], sys.argv[2])