bpo-45020: Identify which frozen modules are actually aliases. (gh-28655)

In the list of generated frozen modules at the top of Tools/scripts/freeze_modules.py, you will find that some of the modules have a different name than the module (or .py file) that is actually frozen. Let's call each case an "alias". Aliases do not come into play until we get to the (generated) list of modules in Python/frozen.c. (The tool for freezing modules, Programs/_freeze_module, is only concerned with the source file, not the module it will be used for.)

Knowledge of which frozen modules are aliases (and the identity of the original module) normally isn't important. However, this information is valuable when we go to set __file__ on frozen stdlib modules. This change updates Tools/scripts/freeze_modules.py to map aliases to the original module name (or None if not a stdlib module) in Python/frozen.c. We also add a helper function in Python/import.c to look up a frozen module's alias and add the result of that function to the frozen info returned from find_frozen().

https://bugs.python.org/issue45020
This commit is contained in:
Eric Snow 2021-10-05 11:26:37 -06:00 committed by GitHub
parent 444429142c
commit 08285d563e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 225 additions and 37 deletions

View File

@ -10,6 +10,13 @@ extern PyStatus _PyImport_ReInitLock(void);
#endif
extern PyObject* _PyImport_BootstrapImp(PyThreadState *tstate);
struct _module_alias {
const char *name; /* ASCII encoded string */
const char *orig; /* ASCII encoded string */
};
extern const struct _module_alias * _PyImport_FrozenAliases;
#ifdef __cplusplus
}
#endif

View File

@ -824,16 +824,39 @@ class FrozenImporter:
"slated for removal in Python 3.12", DeprecationWarning)
return '<module {!r} ({})>'.format(m.__name__, FrozenImporter._ORIGIN)
@classmethod
def _setup_module(cls, module):
assert not hasattr(module, '__file__'), module.__file__
ispkg = hasattr(module, '__path__')
assert not ispkg or not module.__path__, module.__path__
spec = module.__spec__
assert not ispkg or not spec.submodule_search_locations
if spec.loader_state is None:
spec.loader_state = type(sys.implementation)(
data=None,
origname=None,
)
elif not hasattr(spec.loader_state, 'data'):
spec.loader_state.data = None
if not getattr(spec.loader_state, 'origname', None):
origname = vars(module).pop('__origname__', None)
assert origname, 'see PyImport_ImportFrozenModuleObject()'
spec.loader_state.origname = origname
@classmethod
def find_spec(cls, fullname, path=None, target=None):
info = _call_with_frames_removed(_imp.find_frozen, fullname)
if info is None:
return None
data, ispkg = info
data, ispkg, origname = info
spec = spec_from_loader(fullname, cls,
origin=cls._ORIGIN,
is_package=ispkg)
spec.loader_state = data
spec.loader_state = type(sys.implementation)(
data=data,
origname=origname,
)
return spec
@classmethod
@ -857,7 +880,7 @@ class FrozenImporter:
spec = module.__spec__
name = spec.name
try:
data = spec.loader_state
data = spec.loader_state.data
except AttributeError:
if not _imp.is_frozen(name):
raise ImportError('{!r} is not a frozen module'.format(name),
@ -868,7 +891,7 @@ class FrozenImporter:
# Note that if this method is called again (e.g. by
# importlib.reload()) then _imp.get_frozen_object() will notice
# no data was provided and will look it up.
spec.loader_state = None
spec.loader_state.data = None
code = _call_with_frames_removed(_imp.get_frozen_object, name, data)
exec(code, module.__dict__)
@ -1220,6 +1243,8 @@ def _setup(sys_module, _imp_module):
continue
spec = _spec_from_module(module, loader)
_init_module_attrs(spec, module)
if loader is FrozenImporter:
loader._setup_module(module)
# Directly load built-in modules needed during bootstrap.
self_module = sys.modules[__name__]

View File

@ -9,7 +9,15 @@ import os.path
import unittest
import warnings
from test.support import import_helper, REPO_ROOT
from test.support import import_helper, REPO_ROOT, STDLIB_DIR
def resolve_stdlib_file(name, ispkg=False):
assert name
if ispkg:
return os.path.join(STDLIB_DIR, *name.split('.'), '__init__.py')
else:
return os.path.join(STDLIB_DIR, *name.split('.')) + '.py'
class FindSpecTests(abc.FinderTests):
@ -32,16 +40,30 @@ class FindSpecTests(abc.FinderTests):
self.assertIsNone(spec.submodule_search_locations)
self.assertIsNotNone(spec.loader_state)
def check_data(self, spec):
def check_loader_state(self, spec, origname=None, filename=None):
if not filename:
if not origname:
origname = spec.name
actual = dict(vars(spec.loader_state))
# Check the code object used to import the frozen module.
# We can't compare the marshaled data directly because
# marshal.dumps() would mark "expected" (below) as a ref,
# which slightly changes the output.
# (See https://bugs.python.org/issue34093.)
data = actual.pop('data')
with import_helper.frozen_modules():
expected = _imp.get_frozen_object(spec.name)
data = spec.loader_state
# We can't compare the marshaled data directly because
# marshal.dumps() would mark "expected" as a ref, which slightly
# changes the output. (See https://bugs.python.org/issue34093.)
code = marshal.loads(data)
self.assertEqual(code, expected)
# Check the rest of spec.loader_state.
expected = dict(
origname=origname,
)
self.assertDictEqual(actual, expected)
def check_search_locations(self, spec):
# Frozen packages do not have any path entries.
# (See https://bugs.python.org/issue21736.)
@ -58,7 +80,7 @@ class FindSpecTests(abc.FinderTests):
with self.subTest(f'{name} -> {name}'):
spec = self.find(name)
self.check_basic(spec, name)
self.check_data(spec)
self.check_loader_state(spec)
modules = {
'__hello_alias__': '__hello__',
'_frozen_importlib': 'importlib._bootstrap',
@ -67,26 +89,28 @@ class FindSpecTests(abc.FinderTests):
with self.subTest(f'{name} -> {origname}'):
spec = self.find(name)
self.check_basic(spec, name)
self.check_data(spec)
self.check_loader_state(spec, origname)
modules = [
'__phello__.__init__',
'__phello__.ham.__init__',
]
for name in modules:
origname = name.rpartition('.')[0]
origname = '<' + name.rpartition('.')[0]
filename = resolve_stdlib_file(name)
with self.subTest(f'{name} -> {origname}'):
spec = self.find(name)
self.check_basic(spec, name)
self.check_data(spec)
self.check_loader_state(spec, origname, filename)
modules = {
'__hello_only__': ('Tools', 'freeze', 'flag.py'),
}
for name, path in modules.items():
origname = None
filename = os.path.join(REPO_ROOT, *path)
with self.subTest(f'{name} -> {filename}'):
spec = self.find(name)
self.check_basic(spec, name)
self.check_data(spec)
self.check_loader_state(spec, origname, filename)
def test_package(self):
packages = [
@ -94,19 +118,21 @@ class FindSpecTests(abc.FinderTests):
'__phello__.ham',
]
for name in packages:
filename = resolve_stdlib_file(name, ispkg=True)
with self.subTest(f'{name} -> {name}'):
spec = self.find(name)
self.check_basic(spec, name, ispkg=True)
self.check_data(spec)
self.check_loader_state(spec, name, filename)
self.check_search_locations(spec)
packages = {
'__phello_alias__': '__hello__',
}
for name, origname in packages.items():
filename = resolve_stdlib_file(origname, ispkg=False)
with self.subTest(f'{name} -> {origname}'):
spec = self.find(name)
self.check_basic(spec, name, ispkg=True)
self.check_data(spec)
self.check_loader_state(spec, origname, filename)
self.check_search_locations(spec)
# These are covered by test_module() and test_package().

View File

@ -32,17 +32,19 @@ def fresh(name, *, oldapi=False):
class ExecModuleTests(abc.LoaderTests):
def exec_module(self, name):
def exec_module(self, name, origname=None):
with import_helper.frozen_modules():
is_package = self.machinery.FrozenImporter.is_package(name)
code = _imp.get_frozen_object(name)
data = marshal.dumps(code)
spec = self.machinery.ModuleSpec(
name,
self.machinery.FrozenImporter,
origin='frozen',
is_package=is_package,
loader_state=data,
loader_state=types.SimpleNamespace(
data=marshal.dumps(code),
origname=origname or name,
),
)
module = types.ModuleType(name)
module.__spec__ = spec
@ -66,7 +68,8 @@ class ExecModuleTests(abc.LoaderTests):
self.assertEqual(getattr(module, attr), value)
self.assertEqual(output, 'Hello world!\n')
self.assertTrue(hasattr(module, '__spec__'))
self.assertIsNone(module.__spec__.loader_state)
self.assertIsNone(module.__spec__.loader_state.data)
self.assertEqual(module.__spec__.loader_state.origname, name)
def test_package(self):
name = '__phello__'
@ -79,7 +82,8 @@ class ExecModuleTests(abc.LoaderTests):
name=name, attr=attr, given=attr_value,
expected=value))
self.assertEqual(output, 'Hello world!\n')
self.assertIsNone(module.__spec__.loader_state)
self.assertIsNone(module.__spec__.loader_state.data)
self.assertEqual(module.__spec__.loader_state.origname, name)
def test_lacking_parent(self):
name = '__phello__.spam'

View File

@ -0,0 +1,5 @@
For frozen stdlib modules, record the original module name as
``module.__spec__.loader_state.origname``. If the value is different than
``module.__spec__.name`` then the module was defined as an alias in
Tools/scripts/freeze_modules.py. If it is ``None`` then the module comes
from a source file outside the stdlib.

View File

@ -9,6 +9,7 @@
#include <Python.h>
#include <marshal.h>
#include <pycore_import.h>
#include <stdio.h>
#include <sys/types.h>
@ -24,8 +25,12 @@
static const struct _frozen _PyImport_FrozenModules[] = {
{0, 0, 0} /* sentinel */
};
static const struct _module_alias aliases[] = {
{0, 0} /* sentinel */
};
const struct _frozen *PyImport_FrozenModules;
const struct _module_alias *_PyImport_FrozenAliases;
static const char header[] =
"/* Auto-generated by Programs/_freeze_module.c */";
@ -183,6 +188,7 @@ main(int argc, char *argv[])
const char *name, *inpath, *outpath;
PyImport_FrozenModules = _PyImport_FrozenModules;
_PyImport_FrozenAliases = aliases;
if (argc != 4) {
fprintf(stderr, "need to specify the name, input and output paths\n");

View File

@ -178,7 +178,10 @@ PyDoc_STRVAR(_imp_find_frozen__doc__,
"The returned info (a 2-tuple):\n"
"\n"
" * data the raw marshalled bytes\n"
" * is_package whether or not it is a package");
" * is_package whether or not it is a package\n"
" * origname the originally frozen module\'s name, or None if not\n"
" a stdlib module (this will usually be the same as\n"
" the module\'s current name)");
#define _IMP_FIND_FROZEN_METHODDEF \
{"find_frozen", (PyCFunction)_imp_find_frozen, METH_O, _imp_find_frozen__doc__},
@ -545,4 +548,4 @@ exit:
#ifndef _IMP_EXEC_DYNAMIC_METHODDEF
#define _IMP_EXEC_DYNAMIC_METHODDEF
#endif /* !defined(_IMP_EXEC_DYNAMIC_METHODDEF) */
/*[clinic end generated code: output=a31e1c00653359ff input=a9049054013a1b77]*/
/*[clinic end generated code: output=8c8dd08158f9ac7c input=a9049054013a1b77]*/

View File

@ -36,6 +36,7 @@
and __phello__.spam. Loading any will print some famous words... */
#include "Python.h"
#include "pycore_import.h"
/* Includes for frozen modules: */
#include "frozen_modules/importlib._bootstrap.h"
@ -102,9 +103,24 @@ static const struct _frozen _PyImport_FrozenModules[] = {
{"__phello__.spam", _Py_M____phello___spam,
(int)sizeof(_Py_M____phello___spam)},
{"__hello_only__", _Py_M__frozen_only, (int)sizeof(_Py_M__frozen_only)},
{0, 0, 0} /* sentinel */
{0, 0, 0} /* modules sentinel */
};
static const struct _module_alias aliases[] = {
{"_frozen_importlib", "importlib._bootstrap"},
{"_frozen_importlib_external", "importlib._bootstrap_external"},
{"os.path", "posixpath"},
{"__hello_alias__", "__hello__"},
{"__phello_alias__", "__hello__"},
{"__phello_alias__.spam", "__hello__"},
{"__phello__.__init__", "<__phello__"},
{"__phello__.ham.__init__", "<__phello__.ham"},
{"__hello_only__", NULL},
{0, 0} /* aliases sentinel */
};
const struct _module_alias *_PyImport_FrozenAliases = aliases;
/* Embedding apps may change this pointer to point to their favorite
collection of frozen modules: */

View File

@ -1046,6 +1046,29 @@ _imp_create_builtin(PyObject *module, PyObject *spec)
}
/* Return true if the name is an alias. In that case, "alias" is set
to the original module name. If it is an alias but the original
module isn't known then "alias" is set to NULL while true is returned. */
static bool
resolve_module_alias(const char *name, const struct _module_alias *aliases,
const char **alias)
{
const struct _module_alias *entry;
for (entry = aliases; ; entry++) {
if (entry->name == NULL) {
/* It isn't an alias. */
return false;
}
if (strcmp(name, entry->name) == 0) {
if (alias != NULL) {
*alias = entry->orig;
}
return true;
}
}
}
/* Frozen modules */
static bool
@ -1161,16 +1184,15 @@ struct frozen_info {
const char *data;
Py_ssize_t size;
bool is_package;
bool is_alias;
const char *origname;
};
static frozen_status
find_frozen(PyObject *nameobj, struct frozen_info *info)
{
if (info != NULL) {
info->nameobj = NULL;
info->data = NULL;
info->size = 0;
info->is_package = false;
memset(info, 0, sizeof(*info));
}
if (nameobj == NULL || nameobj == Py_None) {
@ -1205,6 +1227,9 @@ find_frozen(PyObject *nameobj, struct frozen_info *info)
info->data = (const char *)p->code;
info->size = p->size < 0 ? -(p->size) : p->size;
info->is_package = p->size < 0 ? true : false;
info->origname = name;
info->is_alias = resolve_module_alias(name, _PyImport_FrozenAliases,
&info->origname);
}
if (p->code == NULL) {
@ -1246,7 +1271,8 @@ int
PyImport_ImportFrozenModuleObject(PyObject *name)
{
PyThreadState *tstate = _PyThreadState_GET();
PyObject *co, *m, *d;
PyObject *co, *m, *d = NULL;
int err;
struct frozen_info info;
frozen_status status = find_frozen(name, &info);
@ -1267,7 +1293,6 @@ PyImport_ImportFrozenModuleObject(PyObject *name)
if (info.is_package) {
/* Set __path__ to the empty list */
PyObject *l;
int err;
m = import_add_module(tstate, name);
if (m == NULL)
goto err_return;
@ -1288,15 +1313,33 @@ PyImport_ImportFrozenModuleObject(PyObject *name)
goto err_return;
}
m = exec_code_in_module(tstate, name, d, co);
Py_DECREF(d);
if (m == NULL) {
goto err_return;
}
Py_DECREF(co);
Py_DECREF(m);
/* Set __origname__ (consumed in FrozenImporter._setup_module()). */
PyObject *origname;
if (info.origname) {
origname = PyUnicode_FromString(info.origname);
if (origname == NULL) {
goto err_return;
}
}
else {
Py_INCREF(Py_None);
origname = Py_None;
}
err = PyDict_SetItemString(d, "__origname__", origname);
Py_DECREF(origname);
if (err != 0) {
goto err_return;
}
Py_DECREF(d);
Py_DECREF(co);
return 1;
err_return:
Py_XDECREF(d);
Py_DECREF(co);
return -1;
}
@ -2014,11 +2057,14 @@ The returned info (a 2-tuple):
* data the raw marshalled bytes
* is_package whether or not it is a package
* origname the originally frozen module's name, or None if not
a stdlib module (this will usually be the same as
the module's current name)
[clinic start generated code]*/
static PyObject *
_imp_find_frozen_impl(PyObject *module, PyObject *name)
/*[clinic end generated code: output=3fd17da90d417e4e input=4e52b3ac95f6d7ab]*/
/*[clinic end generated code: output=3fd17da90d417e4e input=6aa7b9078a89280a]*/
{
struct frozen_info info;
frozen_status status = find_frozen(name, &info);
@ -2032,12 +2078,25 @@ _imp_find_frozen_impl(PyObject *module, PyObject *name)
set_frozen_error(status, name);
return NULL;
}
PyObject *data = PyBytes_FromStringAndSize(info.data, info.size);
if (data == NULL) {
return NULL;
}
PyObject *result = PyTuple_Pack(2, data,
info.is_package ? Py_True : Py_False);
PyObject *origname = NULL;
if (info.origname != NULL && info.origname[0] != '\0') {
origname = PyUnicode_FromString(info.origname);
if (origname == NULL) {
Py_DECREF(data);
return NULL;
}
}
PyObject *result = PyTuple_Pack(3, data,
info.is_package ? Py_True : Py_False,
origname ? origname : Py_None);
Py_XDECREF(origname);
Py_DECREF(data);
return result;
}

View File

@ -274,6 +274,15 @@ class FrozenSource(namedtuple('FrozenSource', 'id pyfile frozenfile')):
name = self.frozenid.replace('.', '_')
return '_Py_M__' + name
@property
def ispkg(self):
if not self.pyfile:
return False
elif self.frozenid.endswith('.__init__'):
return False
else:
return os.path.basename(self.pyfile) == '__init__.py'
def resolve_frozen_file(frozenid, destdir=MODULES_DIR):
"""Return the filename corresponding to the given frozen ID.
@ -305,6 +314,17 @@ class FrozenModule(namedtuple('FrozenModule', 'name ispkg section source')):
def modname(self):
return self.name
@property
def orig(self):
return self.source.modname
@property
def isalias(self):
orig = self.source.modname
if not orig:
return True
return self.name != orig
def summarize(self):
source = self.source.modname
if source:
@ -507,6 +527,7 @@ def regen_frozen(modules):
headerlines.append(f'#include "{header}"')
deflines = []
aliaslines = []
indent = ' '
lastsection = None
for mod in modules:
@ -528,6 +549,15 @@ def regen_frozen(modules):
deflines.append(line1)
deflines.append(indent + line2)
if mod.isalias:
if not mod.orig:
entry = '{"%s", NULL},' % (mod.name,)
elif mod.source.ispkg:
entry = '{"%s", "<%s"},' % (mod.name, mod.orig)
else:
entry = '{"%s", "%s"},' % (mod.name, mod.orig)
aliaslines.append(indent + entry)
if not deflines[0]:
del deflines[0]
for i, line in enumerate(deflines):
@ -549,10 +579,17 @@ def regen_frozen(modules):
lines = replace_block(
lines,
"static const struct _frozen _PyImport_FrozenModules[] =",
"/* sentinel */",
"/* modules sentinel */",
deflines,
FROZEN_FILE,
)
lines = replace_block(
lines,
"const struct _module_alias aliases[] =",
"/* aliases sentinel */",
aliaslines,
FROZEN_FILE,
)
outfile.writelines(lines)