bpo-45019: Add a tool to generate list of modules to include for frozen modules (gh-27980)

Frozen modules must be added to several files in order to work properly. Before this change this had to be done manually. Here we add a tool to generate the relevant lines in those files instead. This helps us avoid mistakes and omissions.

https://bugs.python.org/issue45019
This commit is contained in:
Eric Snow 2021-08-30 17:25:11 -06:00 committed by GitHub
parent 5246dbc2a1
commit 044e8d866f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 833 additions and 199 deletions

3
.gitattributes vendored
View File

@ -46,8 +46,7 @@ Modules/clinic/*.h linguist-generated=true
Objects/clinic/*.h linguist-generated=true
PC/clinic/*.h linguist-generated=true
Python/clinic/*.h linguist-generated=true
Python/importlib.h linguist-generated=true
Python/importlib_external.h linguist-generated=true
Python/frozen_modules/*.h linguist-generated=true
Include/internal/pycore_ast.h linguist-generated=true
Python/Python-ast.c linguist-generated=true
Include/opcode.h linguist-generated=true

View File

@ -71,6 +71,7 @@ jobs:
make regen-stdlib-module-names
- name: Check for changes
run: |
git add -u
changes=$(git status --porcelain)
# Check for changes in regenerated files
if ! test -z "$changes"

2
.gitignore vendored
View File

@ -68,7 +68,7 @@ Modules/Setup.config
Modules/Setup.local
Modules/config.c
Modules/ld_so_aix
Programs/_freeze_importlib
Programs/_freeze_module
Programs/_testembed
PC/python_nt*.h
PC/pythonnt_rc*.h

View File

@ -110,7 +110,7 @@ to 1 and ``-bb`` sets :c:data:`Py_BytesWarningFlag` to 2.
Suppress error messages when calculating the module search path in
:c:func:`Py_GetPath`.
Private flag used by ``_freeze_importlib`` and ``frozenmain`` programs.
Private flag used by ``_freeze_module`` and ``frozenmain`` programs.
.. c:var:: int Py_HashRandomizationFlag

View File

@ -574,8 +574,8 @@ coverage-lcov:
@echo "lcov report at $(COVERAGE_REPORT)/index.html"
@echo
# Force regeneration of parser and importlib
coverage-report: regen-token regen-importlib
# Force regeneration of parser and frozen modules
coverage-report: regen-token regen-frozen
@ # build with coverage info
$(MAKE) coverage
@ # run tests, ignore failures
@ -734,45 +734,60 @@ Programs/_testembed: Programs/_testembed.o $(LIBRARY_DEPS)
$(LINKCC) $(PY_CORE_LDFLAGS) $(LINKFORSHARED) -o $@ Programs/_testembed.o $(BLDLIBRARY) $(LIBS) $(MODLIBS) $(SYSLIBS)
############################################################################
# Importlib
# frozen modules (including importlib)
Programs/_freeze_importlib.o: Programs/_freeze_importlib.c Makefile
Programs/_freeze_module.o: Programs/_freeze_module.c Makefile
Programs/_freeze_importlib: Programs/_freeze_importlib.o $(LIBRARY_OBJS_OMIT_FROZEN)
$(LINKCC) $(PY_CORE_LDFLAGS) -o $@ Programs/_freeze_importlib.o $(LIBRARY_OBJS_OMIT_FROZEN) $(LIBS) $(MODLIBS) $(SYSLIBS)
Programs/_freeze_module: Programs/_freeze_module.o $(LIBRARY_OBJS_OMIT_FROZEN)
$(LINKCC) $(PY_CORE_LDFLAGS) -o $@ Programs/_freeze_module.o $(LIBRARY_OBJS_OMIT_FROZEN) $(LIBS) $(MODLIBS) $(SYSLIBS)
Tools/scripts/freeze_modules.py: Programs/_freeze_module
.PHONY: regen-frozen
regen-frozen: Tools/scripts/freeze_modules.py $(FROZEN_FILES)
$(PYTHON_FOR_REGEN) $(srcdir)/Tools/scripts/freeze_modules.py
@echo "The Makefile was updated, you may need to re-run make."
# BEGIN: freezing modules
Python/frozen_modules/importlib__bootstrap.h: $(srcdir)/Programs/_freeze_module $(srcdir)/Lib/importlib/_bootstrap.py
$(srcdir)/Programs/_freeze_module importlib._bootstrap \
$(srcdir)/Lib/importlib/_bootstrap.py \
$(srcdir)/Python/frozen_modules/importlib__bootstrap.h
Python/frozen_modules/importlib__bootstrap_external.h: $(srcdir)/Programs/_freeze_module $(srcdir)/Lib/importlib/_bootstrap_external.py
$(srcdir)/Programs/_freeze_module importlib._bootstrap_external \
$(srcdir)/Lib/importlib/_bootstrap_external.py \
$(srcdir)/Python/frozen_modules/importlib__bootstrap_external.h
Python/frozen_modules/zipimport.h: $(srcdir)/Programs/_freeze_module $(srcdir)/Lib/zipimport.py
$(srcdir)/Programs/_freeze_module zipimport \
$(srcdir)/Lib/zipimport.py \
$(srcdir)/Python/frozen_modules/zipimport.h
Python/frozen_modules/hello.h: $(srcdir)/Programs/_freeze_module $(srcdir)/Tools/freeze/flag.py
$(srcdir)/Programs/_freeze_module hello \
$(srcdir)/Tools/freeze/flag.py \
$(srcdir)/Python/frozen_modules/hello.h
# END: freezing modules
# We keep this renamed target around for folks with muscle memory.
.PHONY: regen-importlib
regen-importlib: Programs/_freeze_importlib
# Regenerate Python/importlib_external.h
# from Lib/importlib/_bootstrap_external.py using _freeze_importlib
./Programs/_freeze_importlib importlib._bootstrap_external \
$(srcdir)/Lib/importlib/_bootstrap_external.py \
$(srcdir)/Python/importlib_external.h.new
$(UPDATE_FILE) $(srcdir)/Python/importlib_external.h $(srcdir)/Python/importlib_external.h.new
# Regenerate Python/importlib.h from Lib/importlib/_bootstrap.py
# using _freeze_importlib
./Programs/_freeze_importlib importlib._bootstrap \
$(srcdir)/Lib/importlib/_bootstrap.py \
$(srcdir)/Python/importlib.h.new
$(UPDATE_FILE) $(srcdir)/Python/importlib.h $(srcdir)/Python/importlib.h.new
# Regenerate Python/importlib_zipimport.h from Lib/zipimport.py
# using _freeze_importlib
./Programs/_freeze_importlib zipimport \
$(srcdir)/Lib/zipimport.py \
$(srcdir)/Python/importlib_zipimport.h.new
$(UPDATE_FILE) $(srcdir)/Python/importlib_zipimport.h $(srcdir)/Python/importlib_zipimport.h.new
regen-importlib: regen-frozen
############################################################################
# ABI
regen-limited-abi: all
$(RUNSHARED) ./$(BUILDPYTHON) $(srcdir)/Tools/scripts/stable_abi.py --generate-all $(srcdir)/Misc/stable_abi.txt
############################################################################
# Regenerate all generated files
regen-all: regen-opcode regen-opcode-targets regen-typeslots \
regen-token regen-ast regen-keyword regen-importlib clinic \
regen-pegen-metaparser regen-pegen regen-frozen regen-test-frozenmain
regen-token regen-ast regen-keyword regen-frozen clinic \
regen-pegen-metaparser regen-pegen regen-test-frozenmain
@echo
@echo "Note: make regen-stdlib-module-names and autoconf should be run manually"
@ -884,15 +899,6 @@ regen-opcode:
$(srcdir)/Include/opcode.h.new
$(UPDATE_FILE) $(srcdir)/Include/opcode.h $(srcdir)/Include/opcode.h.new
.PHONY: regen-frozen
regen-frozen: Programs/_freeze_importlib
# Regenerate code for frozen module "__hello__".
./Programs/_freeze_importlib hello \
$(srcdir)/Tools/freeze/flag.py \
$(srcdir)/Python/frozen_hello.h.new
$(UPDATE_FILE) $(srcdir)/Python/frozen_hello.h \
$(srcdir)/Python/frozen_hello.h.new
.PHONY: regen-token
regen-token:
# Regenerate Doc/library/token-list.inc from Grammar/Tokens
@ -995,8 +1001,15 @@ regen-opcode-targets:
Python/ceval.o: $(srcdir)/Python/opcode_targets.h $(srcdir)/Python/ceval_gil.h \
$(srcdir)/Python/condvar.h
Python/frozen.o: $(srcdir)/Python/importlib.h $(srcdir)/Python/importlib_external.h \
$(srcdir)/Python/importlib_zipimport.h $(srcdir)/Python/frozen_hello.h
# FROZEN_FILES is auto-generated by Tools/scripts/freeze_modules.py.
FROZEN_FILES = \
$(srcdir)/Python/frozen_modules/importlib__bootstrap.h \
$(srcdir)/Python/frozen_modules/importlib__bootstrap_external.h \
$(srcdir)/Python/frozen_modules/zipimport.h \
$(srcdir)/Python/frozen_modules/hello.h
# End FROZEN_FILES
Python/frozen.o: $(FROZEN_FILES)
# Generate DTrace probe macros, then rename them (PYTHON_ -> PyDTrace_) to
# follow our naming conventions. dtrace(1) uses the output filename to generate
@ -1918,7 +1931,7 @@ clean-retain-profile: pycremoval
find build -name '*.py[co]' -exec rm -f {} ';' || true
-rm -f pybuilddir.txt
-rm -f Lib/lib2to3/*Grammar*.pickle
-rm -f Programs/_testembed Programs/_freeze_importlib
-rm -f Programs/_testembed Programs/_freeze_module
-find build -type f -a ! -name '*.gc??' -exec rm -f {} ';'
-rm -f Include/pydtrace_probes.h
-rm -f profile-gen-stamp

View File

@ -0,0 +1,3 @@
Generate lines in relevant files for frozen modules. Up until now each of
the files had to be edited manually. This change makes it easier to add to
and modify the frozen modules.

View File

@ -69,7 +69,7 @@
<PropertyGroup Label="Globals">
<ProjectGuid>{19C0C13F-47CA-4432-AFF3-799A296A4DDC}</ProjectGuid>
<Keyword>Win32Proj</Keyword>
<RootNamespace>_freeze_importlib</RootNamespace>
<RootNamespace>_freeze_module</RootNamespace>
<SupportPGO>false</SupportPGO>
</PropertyGroup>
<Import Project="python.props" />
@ -95,7 +95,7 @@
</Link>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="..\Programs\_freeze_importlib.c" />
<ClCompile Include="..\Programs\_freeze_module.c" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="pythoncore.vcxproj">
@ -108,31 +108,33 @@
</ProjectReference>
</ItemGroup>
<ItemGroup>
<!-- BEGIN frozen modules -->
<None Include="..\Lib\importlib\_bootstrap.py">
<ModName>importlib._bootstrap</ModName>
<IntFile>$(IntDir)importlib.g.h</IntFile>
<OutFile>$(PySourcePath)Python\importlib.h</OutFile>
<IntFile>$(IntDir)importlib__bootstrap.g.h</IntFile>
<OutFile>$(PySourcePath)Python\frozen_modules\importlib__bootstrap.h</OutFile>
</None>
<None Include="..\Lib\importlib\_bootstrap_external.py">
<ModName>importlib._bootstrap_external</ModName>
<IntFile>$(IntDir)importlib_external.g.h</IntFile>
<OutFile>$(PySourcePath)Python\importlib_external.h</OutFile>
<IntFile>$(IntDir)importlib__bootstrap_external.g.h</IntFile>
<OutFile>$(PySourcePath)Python\frozen_modules\importlib__bootstrap_external.h</OutFile>
</None>
<None Include="..\Lib\zipimport.py">
<ModName>zipimport</ModName>
<IntFile>$(IntDir)importlib_zipimport.g.h</IntFile>
<OutFile>$(PySourcePath)Python\importlib_zipimport.h</OutFile>
<IntFile>$(IntDir)zipimport.g.h</IntFile>
<OutFile>$(PySourcePath)Python\frozen_modules\zipimport.h</OutFile>
</None>
<None Include="..\Tools\freeze\flag.py">
<ModName>hello</ModName>
<IntFile>$(IntDir)frozen_hello.g.h</IntFile>
<OutFile>$(PySourcePath)Python\frozen_hello.h</OutFile>
<IntFile>$(IntDir)ello.g.h</IntFile>
<OutFile>$(PySourcePath)Python\frozen_modules\hello.h</OutFile>
</None>
<!-- END frozen modules -->
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
<Target Name="_RebuildImportLib">
<Target Name="_RebuildFrozen">
<Exec Command='"$(TargetPath)" "%(None.ModName)" "%(None.FullPath)" "%(None.IntFile)"' />
<Copy SourceFiles="%(None.IntFile)"
@ -143,15 +145,18 @@
<Message Text="Updated files: @(_Updated->'%(Filename)%(Extension)',', ')"
Condition="'@(_Updated)' != ''" Importance="high" />
<Warning Text="Frozen importlib files were updated. Please rebuild to pick up the changes.%0D%0A%0D%0AIf you are not developing on Windows but you see this error on a continuous integration build, please run 'make regen-all' and commit anything that changes."
<Warning Text="Frozen modules (e.g. importlib) were updated. Please rebuild to pick up the changes.%0D%0A%0D%0AIf you are not developing on Windows but you see this error on a continuous integration build, please run 'make regen-all' and commit anything that changes."
Condition="'@(_Updated)' != '' and $(Configuration) == 'Debug'" />
<Error Text="Frozen importlib files were updated. Please rebuild to pick up the changes.%0D%0A%0D%0AIf you are not developing on Windows but you see this error on a continuous integration build, please run 'make regen-all' and commit anything that changes."
<Error Text="Frozen (e.g. importlib) files were updated. Please rebuild to pick up the changes.%0D%0A%0D%0AIf you are not developing on Windows but you see this error on a continuous integration build, please run 'make regen-all' and commit anything that changes."
Condition="'@(_Updated)' != '' and $(Configuration) == 'Release'" />
</Target>
<Target Name="RebuildImportLib" AfterTargets="AfterBuild" Condition="$(Configuration) == 'Debug' or $(Configuration) == 'Release'"
DependsOnTargets="_RebuildImportLib">
<Target Name="RebuildFrozen" AfterTargets="AfterBuild" Condition="$(Configuration) == 'Debug' or $(Configuration) == 'Release'"
DependsOnTargets="_RebuildFrozen">
</Target>
<Target Name="_CleanImportLib" BeforeTargets="CoreClean">
<Target Name="RebuildImportLib" AfterTargets="AfterBuild" Condition="$(Configuration) == 'Debug' or $(Configuration) == 'Release'"
DependsOnTargets="_RebuildFrozen">
</Target>
<Target Name="_CleanFrozen" BeforeTargets="CoreClean">
<ItemGroup>
<Clean Include="%(None.IntFile)" />
</ItemGroup>

View File

@ -10,19 +10,24 @@
</Filter>
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\Programs\_freeze_importlib.c">
<ClCompile Include="..\Programs\_freeze_module.c">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<!-- BEGIN frozen modules -->
<None Include="..\Lib\importlib\_bootstrap.py">
<Filter>Source Files</Filter>
</None>
<None Include="..\Lib\zipimport.py">
<Filter>Python Files</Filter>
</None>
<None Include="..\Lib\importlib\_bootstrap_external.py">
<Filter>Python Files</Filter>
</None>
<None Include="..\Lib\zipimport.py">
<Filter>Python Files</Filter>
</None>
<None Include="..\Tools\freeze\flag.py">
<Filter>Python Files</Filter>
</None>
<!-- END frozen modules -->
</ItemGroup>
</Project>

View File

@ -72,8 +72,8 @@
<BuildInParallel>false</BuildInParallel>
</Projects>
<!-- _freeze_importlib -->
<Projects2 Condition="$(Platform) != 'ARM' and $(Platform) != 'ARM64'" Include="_freeze_importlib.vcxproj" />
<!-- _freeze_module -->
<Projects2 Condition="$(Platform) != 'ARM' and $(Platform) != 'ARM64'" Include="_freeze_module.vcxproj" />
<!-- python[w].exe -->
<Projects2 Include="python.vcxproj;pythonw.vcxproj" />
<Projects2 Include="python_uwp.vcxproj;pythonw_uwp.vcxproj" Condition="$(IncludeUwp)" />

View File

@ -75,7 +75,7 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pywlauncher", "pywlauncher.
{7B2727B5-5A3F-40EE-A866-43A13CD31446} = {7B2727B5-5A3F-40EE-A866-43A13CD31446}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "_freeze_importlib", "_freeze_importlib.vcxproj", "{19C0C13F-47CA-4432-AFF3-799A296A4DDC}"
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "_freeze_module", "_freeze_module.vcxproj", "{19C0C13F-47CA-4432-AFF3-799A296A4DDC}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "_overlapped", "_overlapped.vcxproj", "{EB6E69DD-04BF-4543-9B92-49FAABCEAC2E}"
EndProject

View File

@ -115,9 +115,10 @@ _testembed
These are miscellaneous sub-projects that don't really fit the other
categories:
_freeze_importlib
_freeze_importlib.exe, used to regenerate Python\importlib.h after
changes have been made to Lib\importlib\_bootstrap.py
_freeze_module
_freeze_module.exe, used to regenerate frozen modules in Python
after changes have been made to the corresponding source files
(e.g. Lib\importlib\_bootstrap.py).
pyshellext
pyshellext.dll, the shell extension deployed with the launcher
python3dll

View File

@ -1,5 +1,10 @@
/* This is built as a stand-alone executable by the Makefile, and helps turn
Lib/importlib/_bootstrap.py into a frozen module in Python/importlib.h
modules into frozen modules (like Lib/importlib/_bootstrap.py
into Python/importlib.h).
This is used directly by Tools/scripts/freeze_modules.py, and indirectly by "make regen-frozen".
See Python/frozen.c for more info.
*/
#include <Python.h>
@ -28,54 +33,11 @@ const struct _frozen *PyImport_FrozenModules;
#endif
static const char header[] =
"/* Auto-generated by Programs/_freeze_importlib.c */";
"/* Auto-generated by Programs/_freeze_module.c */";
int
main(int argc, char *argv[])
static void
runtime_init(void)
{
const char *name, *inpath, *outpath;
char buf[100];
FILE *infile = NULL, *outfile = NULL;
struct _Py_stat_struct stat;
size_t text_size, data_size, i, n;
char *text = NULL;
unsigned char *data;
PyObject *code = NULL, *marshalled = NULL;
PyImport_FrozenModules = _PyImport_FrozenModules;
if (argc != 4) {
fprintf(stderr, "need to specify the name, input and output paths\n");
return 2;
}
name = argv[1];
inpath = argv[2];
outpath = argv[3];
infile = fopen(inpath, "rb");
if (infile == NULL) {
fprintf(stderr, "cannot open '%s' for reading\n", inpath);
goto error;
}
if (_Py_fstat_noraise(fileno(infile), &stat)) {
fprintf(stderr, "cannot fstat '%s'\n", inpath);
goto error;
}
text_size = (size_t)stat.st_size;
text = (char *) malloc(text_size + 1);
if (text == NULL) {
fprintf(stderr, "could not allocate %ld bytes\n", (long) text_size);
goto error;
}
n = fread(text, 1, text_size, infile);
fclose(infile);
infile = NULL;
if (n < text_size) {
fprintf(stderr, "read too short: got %ld instead of %ld bytes\n",
(long) n, (long) text_size);
goto error;
}
text[text_size] = '\0';
PyConfig config;
PyConfig_InitIsolatedConfig(&config);
@ -83,7 +45,7 @@ main(int argc, char *argv[])
PyStatus status;
status = PyConfig_SetString(&config, &config.program_name,
L"./_freeze_importlib");
L"./_freeze_module");
if (PyStatus_Exception(status)) {
PyConfig_Clear(&config);
Py_ExitStatusException(status);
@ -98,39 +60,93 @@ main(int argc, char *argv[])
if (PyStatus_Exception(status)) {
Py_ExitStatusException(status);
}
}
sprintf(buf, "<frozen %s>", name);
code = Py_CompileStringExFlags(text, buf, Py_file_input, NULL, 0);
if (code == NULL)
goto error;
free(text);
text = NULL;
marshalled = PyMarshal_WriteObjectToString(code, Py_MARSHAL_VERSION);
Py_CLEAR(code);
if (marshalled == NULL)
goto error;
assert(PyBytes_CheckExact(marshalled));
data = (unsigned char *) PyBytes_AS_STRING(marshalled);
data_size = PyBytes_GET_SIZE(marshalled);
/* Open the file in text mode. The hg checkout should be using the eol extension,
which in turn should cause the EOL style match the C library's text mode */
outfile = fopen(outpath, "w");
if (outfile == NULL) {
fprintf(stderr, "cannot open '%s' for writing\n", outpath);
goto error;
static const char *
read_text(const char *inpath)
{
FILE *infile = fopen(inpath, "rb");
if (infile == NULL) {
fprintf(stderr, "cannot open '%s' for reading\n", inpath);
return NULL;
}
fprintf(outfile, "%s\n", header);
for (i = n = 0; name[i] != '\0'; i++) {
if (name[i] != '.') {
buf[n++] = name[i];
struct _Py_stat_struct stat;
if (_Py_fstat_noraise(fileno(infile), &stat)) {
fprintf(stderr, "cannot fstat '%s'\n", inpath);
fclose(infile);
return NULL;
}
size_t text_size = (size_t)stat.st_size;
char *text = (char *) malloc(text_size + 1);
if (text == NULL) {
fprintf(stderr, "could not allocate %ld bytes\n", (long) text_size);
fclose(infile);
return NULL;
}
size_t n = fread(text, 1, text_size, infile);
fclose(infile);
if (n < text_size) {
fprintf(stderr, "read too short: got %ld instead of %ld bytes\n",
(long) n, (long) text_size);
free(text);
return NULL;
}
text[text_size] = '\0';
return (const char *)text;
}
static PyObject *
compile_and_marshal(const char *name, const char *text)
{
char *filename = (char *) malloc(strlen(name) + 10);
sprintf(filename, "<frozen %s>", name);
PyObject *code = Py_CompileStringExFlags(text, filename,
Py_file_input, NULL, 0);
free(filename);
if (code == NULL) {
return NULL;
}
PyObject *marshalled = PyMarshal_WriteObjectToString(code, Py_MARSHAL_VERSION);
Py_CLEAR(code);
if (marshalled == NULL) {
return NULL;
}
assert(PyBytes_CheckExact(marshalled));
return marshalled;
}
static char *
get_varname(const char *name, const char *prefix)
{
size_t n = strlen(prefix);
char *varname = (char *) malloc(strlen(name) + n + 1);
(void)strcpy(varname, prefix);
for (size_t i = 0; name[i] != '\0'; i++) {
if (name[i] == '.') {
varname[n++] = '_';
}
else {
varname[n++] = name[i];
}
}
buf[n] = '\0';
fprintf(outfile, "const unsigned char _Py_M__%s[] = {\n", buf);
for (n = 0; n < data_size; n += 16) {
varname[n] = '\0';
return varname;
}
static void
write_code(FILE *outfile, PyObject *marshalled, const char *varname)
{
unsigned char *data = (unsigned char *) PyBytes_AS_STRING(marshalled);
size_t data_size = PyBytes_GET_SIZE(marshalled);
fprintf(outfile, "const unsigned char %s[] = {\n", varname);
for (size_t n = 0; n < data_size; n += 16) {
size_t i, end = Py_MIN(n + 16, data_size);
fprintf(outfile, " ");
for (i = n; i < end; i++) {
@ -139,29 +155,72 @@ main(int argc, char *argv[])
fprintf(outfile, "\n");
}
fprintf(outfile, "};\n");
}
Py_CLEAR(marshalled);
static int
write_frozen(const char *outpath, const char *inpath, const char *name,
PyObject *marshalled)
{
/* Open the file in text mode. The hg checkout should be using the eol extension,
which in turn should cause the EOL style match the C library's text mode */
FILE *outfile = fopen(outpath, "w");
if (outfile == NULL) {
fprintf(stderr, "cannot open '%s' for writing\n", outpath);
return -1;
}
fprintf(outfile, "%s\n", header);
char *arrayname = get_varname(name, "_Py_M__");
write_code(outfile, marshalled, arrayname);
free(arrayname);
if (ferror(outfile)) {
fprintf(stderr, "error when writing to '%s'\n", outpath);
return -1;
}
fclose(outfile);
return 0;
}
int
main(int argc, char *argv[])
{
const char *name, *inpath, *outpath;
PyImport_FrozenModules = _PyImport_FrozenModules;
if (argc != 4) {
fprintf(stderr, "need to specify the name, input and output paths\n");
return 2;
}
name = argv[1];
inpath = argv[2];
outpath = argv[3];
runtime_init();
const char *text = read_text(inpath);
if (text == NULL) {
goto error;
}
PyObject *marshalled = compile_and_marshal(name, text);
free((char *)text);
if (marshalled == NULL) {
goto error;
}
int res = write_frozen(outpath, inpath, name, marshalled);
Py_DECREF(marshalled);
if (res != 0) {
goto error;
}
Py_Finalize();
if (outfile) {
if (ferror(outfile)) {
fprintf(stderr, "error when writing to '%s'\n", outpath);
goto error;
}
fclose(outfile);
}
return 0;
error:
PyErr_Print();
Py_Finalize();
if (infile)
fclose(infile);
if (outfile)
fclose(outfile);
if (text)
free(text);
if (marshalled)
Py_DECREF(marshalled);
return 1;
}

View File

@ -1,35 +1,63 @@
/* Frozen modules initializer */
#include "Python.h"
#include "importlib.h"
#include "importlib_external.h"
#include "importlib_zipimport.h"
/* Frozen modules initializer
*
* Frozen modules are written to header files by Programs/_freeze_module.
* These files are typically put in Python/frozen_modules/. Each holds
* an array of bytes named "_Py_M__<module>", which is used below.
*
* These files must be regenerated any time the corresponding .pyc
* file would change (including with changes to the compiler, bytecode
* format, marshal format). This can be done with "make regen-frozen".
* That make target just runs Tools/scripts/freeze_modules.py.
*
* The freeze_modules.py script also determines which modules get
* frozen. Update the list at the top of the script to add, remove,
* or modify the target modules. Then run the script
* (or run "make regen-frozen").
*
* The script does the following:
*
* 1. run Programs/_freeze_module on the target modules
* 2. update the includes and _PyImport_FrozenModules[] in this file
* 3. update the FROZEN_FILES variable in Makefile.pre.in
* 4. update the per-module targets in Makefile.pre.in
* 5. update the lists of modules in PCbuild/_freeze_module.vcxproj and
* PCbuild/_freeze_module.vcxproj.filters
*
* (Note that most of the data in this file is auto-generated by the script.)
*
* Those steps can also be done manually, though this is not recommended.
* Expect such manual changes to be removed the next time
* freeze_modules.py runs.
* */
/* In order to test the support for frozen modules, by default we
define a single frozen module, __hello__. Loading it will print
some famous words... */
define some simple frozen modules: __hello__, __phello__ (a package),
and __phello__.spam. Loading any will print some famous words... */
/* Run "make regen-frozen" to regen the file below (e.g. after a bytecode
* format change). The include file defines _Py_M__hello as an array of bytes.
*/
#include "frozen_hello.h"
#include "Python.h"
#define SIZE (int)sizeof(_Py_M__hello)
/* Includes for frozen modules: */
#include "frozen_modules/importlib__bootstrap.h"
#include "frozen_modules/importlib__bootstrap_external.h"
#include "frozen_modules/zipimport.h"
#include "frozen_modules/hello.h"
/* End includes */
/* Note that a negative size indicates a package. */
static const struct _frozen _PyImport_FrozenModules[] = {
/* importlib */
{"_frozen_importlib", _Py_M__importlib_bootstrap,
(int)sizeof(_Py_M__importlib_bootstrap)},
{"_frozen_importlib_external", _Py_M__importlib_bootstrap_external,
(int)sizeof(_Py_M__importlib_bootstrap_external)},
{"zipimport", _Py_M__zipimport,
(int)sizeof(_Py_M__zipimport)},
{"_frozen_importlib", _Py_M__importlib__bootstrap,
(int)sizeof(_Py_M__importlib__bootstrap)},
{"_frozen_importlib_external", _Py_M__importlib__bootstrap_external,
(int)sizeof(_Py_M__importlib__bootstrap_external)},
{"zipimport", _Py_M__zipimport, (int)sizeof(_Py_M__zipimport)},
/* Test module */
{"__hello__", _Py_M__hello, SIZE},
/* Test package (negative size indicates package-ness) */
{"__phello__", _Py_M__hello, -SIZE},
{"__phello__.spam", _Py_M__hello, SIZE},
{"__hello__", _Py_M__hello, (int)sizeof(_Py_M__hello)},
{"__phello__", _Py_M__hello, -(int)sizeof(_Py_M__hello)},
{"__phello__.spam", _Py_M__hello, (int)sizeof(_Py_M__hello)},
{0, 0, 0} /* sentinel */
};

View File

@ -1,4 +1,4 @@
/* Auto-generated by Programs/_freeze_importlib.c */
/* Auto-generated by Programs/_freeze_module.c */
const unsigned char _Py_M__hello[] = {
99,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,
0,0,0,0,0,115,16,0,0,0,100,0,90,0,101,1,

View File

@ -1,5 +1,5 @@
/* Auto-generated by Programs/_freeze_importlib.c */
const unsigned char _Py_M__importlib_bootstrap[] = {
/* Auto-generated by Programs/_freeze_module.c */
const unsigned char _Py_M__importlib__bootstrap[] = {
99,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,
0,0,0,0,0,115,130,1,0,0,100,0,90,0,100,1,
132,0,90,1,100,2,90,2,100,2,90,3,100,2,90,4,

View File

@ -1,5 +1,5 @@
/* Auto-generated by Programs/_freeze_importlib.c */
const unsigned char _Py_M__importlib_bootstrap_external[] = {
/* Auto-generated by Programs/_freeze_module.c */
const unsigned char _Py_M__importlib__bootstrap_external[] = {
99,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,
0,0,0,0,0,115,158,2,0,0,100,0,90,0,100,1,
97,1,100,2,100,1,108,2,90,2,100,2,100,1,108,3,

View File

@ -1,4 +1,4 @@
/* Auto-generated by Programs/_freeze_importlib.c */
/* Auto-generated by Programs/_freeze_module.c */
const unsigned char _Py_M__zipimport[] = {
99,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,
0,0,0,0,0,115,48,1,0,0,100,0,90,0,100,1,

View File

@ -0,0 +1,496 @@
"""Freeze modules and regen related files (e.g. Python/frozen.c).
See the notes at the top of Python/frozen.c for more info.
"""
import os
import os.path
import subprocess
import sys
import textwrap
from update_file import updating_file_with_tmpfile
SCRIPTS_DIR = os.path.abspath(os.path.dirname(__file__))
TOOLS_DIR = os.path.dirname(SCRIPTS_DIR)
ROOT_DIR = os.path.dirname(TOOLS_DIR)
STDLIB_DIR = os.path.join(ROOT_DIR, 'Lib')
# If MODULES_DIR is changed then the .gitattributes file needs to be updated.
MODULES_DIR = os.path.join(ROOT_DIR, 'Python/frozen_modules')
TOOL = os.path.join(ROOT_DIR, 'Programs', '_freeze_module')
FROZEN_FILE = os.path.join(ROOT_DIR, 'Python', 'frozen.c')
MAKEFILE = os.path.join(ROOT_DIR, 'Makefile.pre.in')
PCBUILD_PROJECT = os.path.join(ROOT_DIR, 'PCbuild', '_freeze_module.vcxproj')
PCBUILD_FILTERS = os.path.join(ROOT_DIR, 'PCbuild', '_freeze_module.vcxproj.filters')
# These are modules that get frozen.
FROZEN = [
# See parse_frozen_spec() for the format.
# In cases where the frozenid is duplicated, the first one is re-used.
('importlib', [
'importlib._bootstrap : _frozen_importlib',
'importlib._bootstrap_external : _frozen_importlib_external',
'zipimport',
]),
('Test module', [
'hello : __hello__ = ' + os.path.join(TOOLS_DIR, 'freeze', 'flag.py'),
'hello : <__phello__>',
'hello : __phello__.spam',
]),
]
#######################################
# specs
def parse_frozen_spec(rawspec, knownids=None, section=None):
"""Yield (frozenid, pyfile, modname, ispkg) for the corresponding modules.
Supported formats:
frozenid
frozenid : modname
frozenid : modname = pyfile
"frozenid" and "modname" must be valid module names (dot-separated
identifiers). If "modname" is not provided then "frozenid" is used.
If "pyfile" is not provided then the filename of the module
corresponding to "frozenid" is used.
Angle brackets around a frozenid (e.g. '<encodings>") indicate
it is a package. This also means it must be an actual module
(i.e. "pyfile" cannot have been provided). Such values can have
patterns to expand submodules:
<encodings.*> - also freeze all direct submodules
<encodings.**.*> - also freeze the full submodule tree
As with "frozenid", angle brackets around "modname" indicate
it is a package. However, in this case "pyfile" should not
have been provided and patterns in "modname" are not supported.
Also, if "modname" has brackets then "frozenid" should not,
and "pyfile" should have been provided..
"""
frozenid, _, remainder = rawspec.partition(':')
modname, _, pyfile = remainder.partition('=')
frozenid = frozenid.strip()
modname = modname.strip()
pyfile = pyfile.strip()
submodules = None
if modname.startswith('<') and modname.endswith('>'):
assert check_modname(frozenid), rawspec
modname = modname[1:-1]
assert check_modname(modname), rawspec
if frozenid in knownids:
pass
elif pyfile:
assert not os.path.isdir(pyfile), rawspec
else:
pyfile = _resolve_module(frozenid, ispkg=False)
ispkg = True
elif pyfile:
assert check_modname(frozenid), rawspec
assert not knownids or frozenid not in knownids, rawspec
assert check_modname(modname), rawspec
assert not os.path.isdir(pyfile), rawspec
ispkg = False
elif knownids and frozenid in knownids:
assert check_modname(frozenid), rawspec
assert check_modname(modname), rawspec
ispkg = False
else:
assert not modname or check_modname(modname), rawspec
resolved = iter(resolve_modules(frozenid))
frozenid, pyfile, ispkg = next(resolved)
if not modname:
modname = frozenid
if ispkg:
pkgid = frozenid
pkgname = modname
def iter_subs():
for frozenid, pyfile, ispkg in resolved:
assert not knownids or frozenid not in knownids, (frozenid, rawspec)
if pkgname:
modname = frozenid.replace(pkgid, pkgname, 1)
else:
modname = frozenid
yield frozenid, pyfile, modname, ispkg, section
submodules = iter_subs()
spec = (frozenid, pyfile or None, modname, ispkg, section)
return spec, submodules
def parse_frozen_specs(rawspecs=FROZEN):
seen = set()
for section, _specs in rawspecs:
for spec in _parse_frozen_specs(_specs, section, seen):
frozenid = spec[0]
yield spec
seen.add(frozenid)
def _parse_frozen_specs(rawspecs, section, seen):
for rawspec in rawspecs:
spec, subs = parse_frozen_spec(rawspec, seen, section)
yield spec
for spec in subs or ():
yield spec
def resolve_frozen_file(spec, destdir=MODULES_DIR):
if isinstance(spec, str):
modname = spec
else:
_, frozenid, _, _, _= spec
modname = frozenid
# We use a consistent naming convention for all frozen modules.
return os.path.join(destdir, modname.replace('.', '_')) + '.h'
def resolve_frozen_files(specs, destdir=MODULES_DIR):
frozen = {}
frozenids = []
lastsection = None
for spec in specs:
frozenid, pyfile, *_, section = spec
if frozenid in frozen:
if section is None:
lastsection = None
else:
assert section == lastsection
continue
lastsection = section
frozenfile = resolve_frozen_file(frozenid, destdir)
frozen[frozenid] = (pyfile, frozenfile)
frozenids.append(frozenid)
return frozen, frozenids
#######################################
# generic helpers
def resolve_modules(modname, pyfile=None):
if modname.startswith('<') and modname.endswith('>'):
if pyfile:
assert os.path.isdir(pyfile) or os.path.basename(pyfile) == '__init__.py', pyfile
ispkg = True
modname = modname[1:-1]
rawname = modname
# For now, we only expect match patterns at the end of the name.
_modname, sep, match = modname.rpartition('.')
if sep:
if _modname.endswith('.**'):
modname = _modname[:-3]
match = f'**.{match}'
elif match and not match.isidentifier():
modname = _modname
# Otherwise it's a plain name so we leave it alone.
else:
match = None
else:
ispkg = False
rawname = modname
match = None
if not check_modname(modname):
raise ValueError(f'not a valid module name ({rawname})')
if not pyfile:
pyfile = _resolve_module(modname, ispkg=ispkg)
elif os.path.isdir(pyfile):
pyfile = _resolve_module(modname, pyfile, ispkg)
yield modname, pyfile, ispkg
if match:
pkgdir = os.path.dirname(pyfile)
yield from iter_submodules(modname, pkgdir, match)
def check_modname(modname):
return all(n.isidentifier() for n in modname.split('.'))
def iter_submodules(pkgname, pkgdir=None, match='*'):
if not pkgdir:
pkgdir = os.path.join(STDLIB_DIR, *pkgname.split('.'))
if not match:
match = '**.*'
match_modname = _resolve_modname_matcher(match, pkgdir)
def _iter_submodules(pkgname, pkgdir):
for entry in sorted(os.scandir(pkgdir), key=lambda e: e.name):
matched, recursive = match_modname(entry.name)
if not matched:
continue
modname = f'{pkgname}.{entry.name}'
if modname.endswith('.py'):
yield modname[:-3], entry.path, False
elif entry.is_dir():
pyfile = os.path.join(entry.path, '__init__.py')
# We ignore namespace packages.
if os.path.exists(pyfile):
yield modname, pyfile, True
if recursive:
yield from _iter_submodules(modname, entry.path)
return _iter_submodules(pkgname, pkgdir)
def _resolve_modname_matcher(match, rootdir=None):
if isinstance(match, str):
if match.startswith('**.'):
recursive = True
pat = match[3:]
assert match
else:
recursive = False
pat = match
if pat == '*':
def match_modname(modname):
return True, recursive
else:
raise NotImplementedError(match)
elif callable(match):
match_modname = match(rootdir)
else:
raise ValueError(f'unsupported matcher {match!r}')
return match_modname
def _resolve_module(modname, pathentry=STDLIB_DIR, ispkg=False):
assert pathentry, pathentry
pathentry = os.path.normpath(pathentry)
assert os.path.isabs(pathentry)
if ispkg:
return os.path.join(pathentry, *modname.split('.'), '__init__.py')
return os.path.join(pathentry, *modname.split('.')) + '.py'
#######################################
# regenerating dependent files
def find_marker(lines, marker, file):
for pos, line in enumerate(lines):
if marker in line:
return pos
raise Exception(f"Can't find {marker!r} in file {file}")
def replace_block(lines, start_marker, end_marker, replacements, file):
start_pos = find_marker(lines, start_marker, file)
end_pos = find_marker(lines, end_marker, file)
if end_pos <= start_pos:
raise Exception(f"End marker {end_marker!r} "
f"occurs before start marker {start_marker!r} "
f"in file {file}")
replacements = [line.rstrip() + os.linesep for line in replacements]
return lines[:start_pos + 1] + replacements + lines[end_pos:]
def regen_frozen(specs, dest=MODULES_DIR):
if isinstance(dest, str):
frozen, frozenids = resolve_frozen_files(specs, destdir)
else:
frozenids, frozen = dest
headerlines = []
parentdir = os.path.dirname(FROZEN_FILE)
for frozenid in frozenids:
# Adding a comment to separate sections here doesn't add much,
# so we don't.
_, frozenfile = frozen[frozenid]
header = os.path.relpath(frozenfile, parentdir)
headerlines.append(f'#include "{header}"')
deflines = []
indent = ' '
lastsection = None
for spec in specs:
frozenid, _, modname, ispkg, section = spec
if section != lastsection:
if lastsection is not None:
deflines.append('')
deflines.append(f'/* {section} */')
lastsection = section
# This matches what we do in Programs/_freeze_module.c:
name = frozenid.replace('.', '_')
symbol = '_Py_M__' + name
pkg = '-' if ispkg else ''
line = ('{"%s", %s, %s(int)sizeof(%s)},'
% (modname, symbol, pkg, symbol))
# TODO: Consider not folding lines
if len(line) < 80:
deflines.append(line)
else:
line1, _, line2 = line.rpartition(' ')
deflines.append(line1)
deflines.append(indent + line2)
if not deflines[0]:
del deflines[0]
for i, line in enumerate(deflines):
if line:
deflines[i] = indent + line
print(f'# Updating {os.path.relpath(FROZEN_FILE)}')
with updating_file_with_tmpfile(FROZEN_FILE) as (infile, outfile):
lines = infile.readlines()
# TODO: Use more obvious markers, e.g.
# $START GENERATED FOOBAR$ / $END GENERATED FOOBAR$
lines = replace_block(
lines,
"/* Includes for frozen modules: */",
"/* End includes */",
headerlines,
FROZEN_FILE,
)
lines = replace_block(
lines,
"static const struct _frozen _PyImport_FrozenModules[] =",
"/* sentinel */",
deflines,
FROZEN_FILE,
)
outfile.writelines(lines)
def regen_makefile(frozenids, frozen):
frozenfiles = []
rules = ['']
for frozenid in frozenids:
pyfile, frozenfile = frozen[frozenid]
header = os.path.relpath(frozenfile, ROOT_DIR)
relfile = header.replace('\\', '/')
frozenfiles.append(f'\t\t$(srcdir)/{relfile} \\')
_pyfile = os.path.relpath(pyfile, ROOT_DIR)
tmpfile = f'{header}.new'
# Note that we freeze the module to the target .h file
# instead of going through an intermediate file like we used to.
rules.append(f'{header}: $(srcdir)/Programs/_freeze_module $(srcdir)/{_pyfile}')
rules.append(f'\t$(srcdir)/Programs/_freeze_module {frozenid} \\')
rules.append(f'\t\t$(srcdir)/{_pyfile} \\')
rules.append(f'\t\t$(srcdir)/{header}')
rules.append('')
frozenfiles[-1] = frozenfiles[-1].rstrip(" \\")
print(f'# Updating {os.path.relpath(MAKEFILE)}')
with updating_file_with_tmpfile(MAKEFILE) as (infile, outfile):
lines = infile.readlines()
lines = replace_block(
lines,
"FROZEN_FILES =",
"# End FROZEN_FILES",
frozenfiles,
MAKEFILE,
)
lines = replace_block(
lines,
"# BEGIN: freezing modules",
"# END: freezing modules",
rules,
MAKEFILE,
)
outfile.writelines(lines)
def regen_pcbuild(frozenids, frozen):
projlines = []
filterlines = []
for frozenid in frozenids:
pyfile, frozenfile = frozen[frozenid]
_pyfile = os.path.relpath(pyfile, ROOT_DIR).replace('/', '\\')
header = os.path.relpath(frozenfile, ROOT_DIR).replace('/', '\\')
intfile = header.split('\\')[-1].strip('.h') + '.g.h'
projlines.append(f' <None Include="..\\{_pyfile}">')
projlines.append(f' <ModName>{frozenid}</ModName>')
projlines.append(f' <IntFile>$(IntDir){intfile}</IntFile>')
projlines.append(f' <OutFile>$(PySourcePath){header}</OutFile>')
projlines.append(f' </None>')
filterlines.append(f' <None Include="..\\{_pyfile}">')
filterlines.append(' <Filter>Python Files</Filter>')
filterlines.append(' </None>')
print(f'# Updating {os.path.relpath(PCBUILD_PROJECT)}')
with updating_file_with_tmpfile(PCBUILD_PROJECT) as (infile, outfile):
lines = infile.readlines()
lines = replace_block(
lines,
'<!-- BEGIN frozen modules -->',
'<!-- END frozen modules -->',
projlines,
PCBUILD_PROJECT,
)
outfile.writelines(lines)
print(f'# Updating {os.path.relpath(PCBUILD_FILTERS)}')
with updating_file_with_tmpfile(PCBUILD_FILTERS) as (infile, outfile):
lines = infile.readlines()
lines = replace_block(
lines,
'<!-- BEGIN frozen modules -->',
'<!-- END frozen modules -->',
filterlines,
PCBUILD_FILTERS,
)
outfile.writelines(lines)
#######################################
# freezing modules
def freeze_module(modname, pyfile=None, destdir=MODULES_DIR):
"""Generate the frozen module .h file for the given module."""
for modname, pyfile, ispkg in resolve_modules(modname, pyfile):
frozenfile = _resolve_frozen(modname, destdir)
_freeze_module(modname, pyfile, frozenfile)
def _freeze_module(frozenid, pyfile, frozenfile):
tmpfile = frozenfile + '.new'
argv = [TOOL, frozenid, pyfile, tmpfile]
print('#', ' '.join(os.path.relpath(a) for a in argv))
try:
subprocess.run(argv, check=True)
except subprocess.CalledProcessError:
if not os.path.exists(TOOL):
sys.exit(f'ERROR: missing {TOOL}; you need to run "make regen-frozen"')
raise # re-raise
os.replace(tmpfile, frozenfile)
#######################################
# the script
def main():
# Expand the raw specs, preserving order.
specs = list(parse_frozen_specs())
frozen, frozenids = resolve_frozen_files(specs, MODULES_DIR)
# Regen build-related files.
regen_frozen(specs, (frozenids, frozen))
regen_makefile(frozenids, frozen)
regen_pcbuild(frozenids, frozen)
# Freeze the target modules.
for frozenid in frozenids:
pyfile, frozenfile = frozen[frozenid]
_freeze_module(frozenid, pyfile, frozenfile)
if __name__ == '__main__':
argv = sys.argv[1:]
if argv:
sys.exit('ERROR: got unexpected args {argv}')
main()

View File

@ -6,23 +6,47 @@ This avoids wholesale rebuilds when a code (re)generation phase does not
actually change the in-tree generated code.
"""
import contextlib
import os
import os.path
import sys
def main(old_path, new_path):
with open(old_path, 'rb') as f:
@contextlib.contextmanager
def updating_file_with_tmpfile(filename, tmpfile=None):
"""A context manager for updating a file via a temp file.
The context manager provides two open files: the source file open
for reading, and the temp file, open for writing.
Upon exiting: both files are closed, and the source file is replaced
with the temp file.
"""
# XXX Optionally use tempfile.TemporaryFile?
if not tmpfile:
tmpfile = filename + '.tmp'
elif os.path.isdir(tmpfile):
tmpfile = os.path.join(tmpfile, filename + '.tmp')
with open(tmpfile, 'w') as outfile:
with open(filename) as infile:
yield infile, outfile
update_file_with_tmpfile(filename, tmpfile)
def update_file_with_tmpfile(filename, tmpfile):
with open(filename, 'rb') as f:
old_contents = f.read()
with open(new_path, 'rb') as f:
with open(tmpfile, 'rb') as f:
new_contents = f.read()
if old_contents != new_contents:
os.replace(new_path, old_path)
os.replace(tmpfile, filename)
else:
os.unlink(new_path)
os.unlink(tmpfile)
if __name__ == '__main__':
if len(sys.argv) != 3:
print("Usage: %s <path to be updated> <path with new contents>" % (sys.argv[0],))
sys.exit(1)
main(sys.argv[1], sys.argv[2])
update_file_with_tmpfile(sys.argv[1], sys.argv[2])