bpo-45324: Capture data in FrozenImporter.find_spec() to use in exec_module(). (gh-28633)

Before this change we end up duplicating effort and throwing away data in FrozenImporter.find_spec().  Now we do the work once in find_spec() and the only thing we do in FrozenImporter.exec_module() is turn the raw frozen data into a code object and then exec it.

We've added _imp.find_frozen(), add an arg to _imp.get_frozen_object(), and updated FrozenImporter.  We've also moved some code around to reduce duplication, get a little more consistency in outcomes, and be more efficient.

Note that this change is mostly necessary if we want to set __file__ on frozen stdlib modules. (See https://bugs.python.org/issue21736.)

https://bugs.python.org/issue45324
This commit is contained in:
Eric Snow 2021-10-05 10:01:27 -06:00 committed by GitHub
parent b9bb74871b
commit c3d9ac8b34
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 331 additions and 120 deletions

View File

@ -826,10 +826,15 @@ class FrozenImporter:
@classmethod
def find_spec(cls, fullname, path=None, target=None):
if _imp.is_frozen(fullname):
return spec_from_loader(fullname, cls, origin=cls._ORIGIN)
else:
info = _call_with_frames_removed(_imp.find_frozen, fullname)
if info is None:
return None
data, ispkg = info
spec = spec_from_loader(fullname, cls,
origin=cls._ORIGIN,
is_package=ispkg)
spec.loader_state = data
return spec
@classmethod
def find_module(cls, fullname, path=None):
@ -849,11 +854,22 @@ class FrozenImporter:
@staticmethod
def exec_module(module):
name = module.__spec__.name
if not _imp.is_frozen(name):
raise ImportError('{!r} is not a frozen module'.format(name),
name=name)
code = _call_with_frames_removed(_imp.get_frozen_object, name)
spec = module.__spec__
name = spec.name
try:
data = spec.loader_state
except AttributeError:
if not _imp.is_frozen(name):
raise ImportError('{!r} is not a frozen module'.format(name),
name=name)
data = None
else:
# We clear the extra data we got from the finder, to save memory.
# Note that if this method is called again (e.g. by
# importlib.reload()) then _imp.get_frozen_object() will notice
# no data was provided and will look it up.
spec.loader_state = None
code = _call_with_frames_removed(_imp.get_frozen_object, name, data)
exec(code, module.__dict__)
@classmethod

View File

@ -1,13 +1,15 @@
from .. import abc
import os.path
from .. import util
machinery = util.import_importlib('importlib.machinery')
import _imp
import marshal
import os.path
import unittest
import warnings
from test.support import import_helper
from test.support import import_helper, REPO_ROOT
class FindSpecTests(abc.FinderTests):
@ -19,39 +21,67 @@ class FindSpecTests(abc.FinderTests):
with import_helper.frozen_modules():
return finder.find_spec(name, **kwargs)
def check(self, spec, name):
def check_basic(self, spec, name, ispkg=False):
self.assertEqual(spec.name, name)
self.assertIs(spec.loader, self.machinery.FrozenImporter)
self.assertEqual(spec.origin, 'frozen')
self.assertFalse(spec.has_location)
if ispkg:
self.assertIsNotNone(spec.submodule_search_locations)
else:
self.assertIsNone(spec.submodule_search_locations)
self.assertIsNotNone(spec.loader_state)
def check_search_location(self, spec, source=None):
# Frozen packages do not have any path entries.
# (See https://bugs.python.org/issue21736.)
expected = []
self.assertListEqual(spec.submodule_search_locations, expected)
def check_data(self, spec, source=None, ispkg=None):
with import_helper.frozen_modules():
expected = _imp.get_frozen_object(spec.name)
data = spec.loader_state
# We can't compare the marshaled data directly because
# marshal.dumps() would mark "expected" as a ref, which slightly
# changes the output. (See https://bugs.python.org/issue34093.)
code = marshal.loads(data)
self.assertEqual(code, expected)
def test_module(self):
names = [
'__hello__',
'__hello_alias__',
'__hello_only__',
'__phello__.__init__',
'__phello__.spam',
'__phello__.ham.__init__',
'__phello__.ham.eggs',
]
for name in names:
modules = {
'__hello__': None,
'__phello__.__init__': None,
'__phello__.spam': None,
'__phello__.ham.__init__': None,
'__phello__.ham.eggs': None,
'__hello_alias__': '__hello__',
}
for name, source in modules.items():
with self.subTest(name):
spec = self.find(name)
self.check(spec, name)
self.assertEqual(spec.submodule_search_locations, None)
self.check_basic(spec, name)
self.check_data(spec, source)
def test_package(self):
names = [
'__phello__',
'__phello__.ham',
'__phello_alias__',
]
for name in names:
modules = {
'__phello__': None,
'__phello__.ham': None,
'__phello_alias__': '__hello__',
}
for name, source in modules.items():
with self.subTest(name):
spec = self.find(name)
self.check(spec, name)
self.assertEqual(spec.submodule_search_locations, [])
self.check_basic(spec, name, ispkg=True)
self.check_search_location(spec, source)
self.check_data(spec, source, ispkg=True)
def test_frozen_only(self):
name = '__hello_only__'
source = os.path.join(REPO_ROOT, 'Tools', 'freeze', 'flag.py')
spec = self.find(name)
self.check_basic(spec, name)
self.check_data(spec, source)
# These are covered by test_module() and test_package().
test_module_in_package = None

View File

@ -4,7 +4,9 @@ from .. import util
machinery = util.import_importlib('importlib.machinery')
from test.support import captured_stdout, import_helper
import _imp
import contextlib
import marshal
import types
import unittest
import warnings
@ -33,11 +35,14 @@ class ExecModuleTests(abc.LoaderTests):
def exec_module(self, name):
with import_helper.frozen_modules():
is_package = self.machinery.FrozenImporter.is_package(name)
code = _imp.get_frozen_object(name)
data = marshal.dumps(code)
spec = self.machinery.ModuleSpec(
name,
self.machinery.FrozenImporter,
origin='frozen',
is_package=is_package,
loader_state=data,
)
module = types.ModuleType(name)
module.__spec__ = spec
@ -61,6 +66,7 @@ class ExecModuleTests(abc.LoaderTests):
self.assertEqual(getattr(module, attr), value)
self.assertEqual(output, 'Hello world!\n')
self.assertTrue(hasattr(module, '__spec__'))
self.assertIsNone(module.__spec__.loader_state)
def test_package(self):
name = '__phello__'
@ -73,6 +79,7 @@ class ExecModuleTests(abc.LoaderTests):
name=name, attr=attr, given=attr_value,
expected=value))
self.assertEqual(output, 'Hello world!\n')
self.assertIsNone(module.__spec__.loader_state)
def test_lacking_parent(self):
name = '__phello__.spam'

View File

@ -0,0 +1,3 @@
In FrozenImporter.find_spec(), we now preserve the information needed in
exec_module() to load the module. This change mostly impacts internal
details, rather than changing the importer's behavior.

View File

@ -169,33 +169,80 @@ exit:
return return_value;
}
PyDoc_STRVAR(_imp_get_frozen_object__doc__,
"get_frozen_object($module, name, /)\n"
PyDoc_STRVAR(_imp_find_frozen__doc__,
"find_frozen($module, name, /)\n"
"--\n"
"\n"
"Create a code object for a frozen module.");
"Return info about the corresponding frozen module (if there is one) or None.\n"
"\n"
"The returned info (a 2-tuple):\n"
"\n"
" * data the raw marshalled bytes\n"
" * is_package whether or not it is a package");
#define _IMP_GET_FROZEN_OBJECT_METHODDEF \
{"get_frozen_object", (PyCFunction)_imp_get_frozen_object, METH_O, _imp_get_frozen_object__doc__},
#define _IMP_FIND_FROZEN_METHODDEF \
{"find_frozen", (PyCFunction)_imp_find_frozen, METH_O, _imp_find_frozen__doc__},
static PyObject *
_imp_get_frozen_object_impl(PyObject *module, PyObject *name);
_imp_find_frozen_impl(PyObject *module, PyObject *name);
static PyObject *
_imp_get_frozen_object(PyObject *module, PyObject *arg)
_imp_find_frozen(PyObject *module, PyObject *arg)
{
PyObject *return_value = NULL;
PyObject *name;
if (!PyUnicode_Check(arg)) {
_PyArg_BadArgument("get_frozen_object", "argument", "str", arg);
_PyArg_BadArgument("find_frozen", "argument", "str", arg);
goto exit;
}
if (PyUnicode_READY(arg) == -1) {
goto exit;
}
name = arg;
return_value = _imp_get_frozen_object_impl(module, name);
return_value = _imp_find_frozen_impl(module, name);
exit:
return return_value;
}
PyDoc_STRVAR(_imp_get_frozen_object__doc__,
"get_frozen_object($module, name, data=None, /)\n"
"--\n"
"\n"
"Create a code object for a frozen module.");
#define _IMP_GET_FROZEN_OBJECT_METHODDEF \
{"get_frozen_object", (PyCFunction)(void(*)(void))_imp_get_frozen_object, METH_FASTCALL, _imp_get_frozen_object__doc__},
static PyObject *
_imp_get_frozen_object_impl(PyObject *module, PyObject *name,
PyObject *dataobj);
static PyObject *
_imp_get_frozen_object(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
{
PyObject *return_value = NULL;
PyObject *name;
PyObject *dataobj = Py_None;
if (!_PyArg_CheckPositional("get_frozen_object", nargs, 1, 2)) {
goto exit;
}
if (!PyUnicode_Check(args[0])) {
_PyArg_BadArgument("get_frozen_object", "argument 1", "str", args[0]);
goto exit;
}
if (PyUnicode_READY(args[0]) == -1) {
goto exit;
}
name = args[0];
if (nargs < 2) {
goto skip_optional;
}
dataobj = args[1];
skip_optional:
return_value = _imp_get_frozen_object_impl(module, name, dataobj);
exit:
return return_value;
@ -498,4 +545,4 @@ exit:
#ifndef _IMP_EXEC_DYNAMIC_METHODDEF
#define _IMP_EXEC_DYNAMIC_METHODDEF
#endif /* !defined(_IMP_EXEC_DYNAMIC_METHODDEF) */
/*[clinic end generated code: output=96038c277119d6e3 input=a9049054013a1b77]*/
/*[clinic end generated code: output=a31e1c00653359ff input=a9049054013a1b77]*/

View File

@ -887,10 +887,6 @@ _imp__fix_co_filename_impl(PyObject *module, PyCodeObject *code,
}
/* Forward */
static const struct _frozen * find_frozen(PyObject *);
/* Helper to test for built-in module */
static int
@ -1119,75 +1115,125 @@ list_frozen_module_names()
return names;
}
static const struct _frozen *
find_frozen(PyObject *modname)
typedef enum {
FROZEN_OKAY,
FROZEN_BAD_NAME, // The given module name wasn't valid.
FROZEN_NOT_FOUND, // It wasn't in PyImport_FrozenModules.
FROZEN_DISABLED, // -X frozen_modules=off (and not essential)
FROZEN_EXCLUDED, // The PyImport_FrozenModules entry has NULL "code".
FROZEN_INVALID, // The PyImport_FrozenModules entry is bogus.
} frozen_status;
static inline void
set_frozen_error(frozen_status status, PyObject *modname)
{
if (modname == NULL) {
return NULL;
const char *err = NULL;
switch (status) {
case FROZEN_BAD_NAME:
case FROZEN_NOT_FOUND:
case FROZEN_DISABLED:
err = "No such frozen object named %R";
break;
case FROZEN_EXCLUDED:
err = "Excluded frozen object named %R";
break;
case FROZEN_INVALID:
err = "Frozen object named %R is invalid";
break;
case FROZEN_OKAY:
// There was no error.
break;
default:
Py_UNREACHABLE();
}
const char *name = PyUnicode_AsUTF8(modname);
if (err != NULL) {
PyObject *msg = PyUnicode_FromFormat(err, modname);
if (msg == NULL) {
PyErr_Clear();
}
PyErr_SetImportError(msg, modname, NULL);
Py_XDECREF(msg);
}
}
struct frozen_info {
PyObject *nameobj;
const char *data;
Py_ssize_t size;
bool is_package;
};
static frozen_status
find_frozen(PyObject *nameobj, struct frozen_info *info)
{
if (info != NULL) {
info->nameobj = NULL;
info->data = NULL;
info->size = 0;
info->is_package = false;
}
if (nameobj == NULL || nameobj == Py_None) {
return FROZEN_BAD_NAME;
}
const char *name = PyUnicode_AsUTF8(nameobj);
if (name == NULL) {
// Note that this function previously used
// _PyUnicode_EqualToASCIIString(). We clear the error here
// (instead of propagating it) to match the earlier behavior
// more closely.
PyErr_Clear();
return NULL;
return FROZEN_BAD_NAME;
}
if (!use_frozen() && !is_essential_frozen_module(name)) {
return NULL;
return FROZEN_DISABLED;
}
const struct _frozen *p;
for (p = PyImport_FrozenModules; ; p++) {
if (p->name == NULL) {
return NULL;
// We hit the end-of-list sentinel value.
return FROZEN_NOT_FOUND;
}
if (strcmp(name, p->name) == 0) {
break;
}
}
return p;
}
static PyObject *
get_frozen_object(PyObject *name)
{
const struct _frozen *p = find_frozen(name);
int size;
if (p == NULL) {
PyErr_Format(PyExc_ImportError,
"No such frozen object named %R",
name);
return NULL;
if (info != NULL) {
info->nameobj = nameobj; // borrowed
info->data = (const char *)p->code;
info->size = p->size < 0 ? -(p->size) : p->size;
info->is_package = p->size < 0 ? true : false;
}
if (p->code == NULL) {
PyErr_Format(PyExc_ImportError,
"Excluded frozen object named %R",
name);
return NULL;
/* It is frozen but marked as un-importable. */
return FROZEN_EXCLUDED;
}
size = p->size;
if (size < 0)
size = -size;
return PyMarshal_ReadObjectFromString((const char *)p->code, size);
if (p->code[0] == '\0' || p->size == 0) {
return FROZEN_INVALID;
}
return FROZEN_OKAY;
}
static PyObject *
is_frozen_package(PyObject *name)
unmarshal_frozen_code(struct frozen_info *info)
{
const struct _frozen *p = find_frozen(name);
int size;
if (p == NULL) {
PyErr_Format(PyExc_ImportError,
"No such frozen object named %R",
name);
PyObject *co = PyMarshal_ReadObjectFromString(info->data, info->size);
if (co == NULL) {
set_frozen_error(FROZEN_INVALID, info->nameobj);
return NULL;
}
size = p->size;
if (size < 0)
Py_RETURN_TRUE;
else
Py_RETURN_FALSE;
if (!PyCode_Check(co)) {
// We stick with TypeError for backward compatibility.
PyErr_Format(PyExc_TypeError,
"frozen object %R is not a code object",
info->nameobj);
Py_DECREF(co);
return NULL;
}
return co;
}
@ -1200,35 +1246,25 @@ int
PyImport_ImportFrozenModuleObject(PyObject *name)
{
PyThreadState *tstate = _PyThreadState_GET();
const struct _frozen *p;
PyObject *co, *m, *d;
int ispackage;
int size;
p = find_frozen(name);
if (p == NULL)
struct frozen_info info;
frozen_status status = find_frozen(name, &info);
if (status == FROZEN_NOT_FOUND || status == FROZEN_DISABLED) {
return 0;
if (p->code == NULL) {
_PyErr_Format(tstate, PyExc_ImportError,
"Excluded frozen object named %R",
name);
}
else if (status == FROZEN_BAD_NAME) {
return 0;
}
else if (status != FROZEN_OKAY) {
set_frozen_error(status, name);
return -1;
}
size = p->size;
ispackage = (size < 0);
if (ispackage)
size = -size;
co = PyMarshal_ReadObjectFromString((const char *)p->code, size);
if (co == NULL)
co = unmarshal_frozen_code(&info);
if (co == NULL) {
return -1;
if (!PyCode_Check(co)) {
_PyErr_Format(tstate, PyExc_TypeError,
"frozen object %R is not a code object",
name);
goto err_return;
}
if (ispackage) {
if (info.is_package) {
/* Set __path__ to the empty list */
PyObject *l;
int err;
@ -1966,20 +2002,83 @@ _imp_init_frozen_impl(PyObject *module, PyObject *name)
return import_add_module(tstate, name);
}
/*[clinic input]
_imp.find_frozen
name: unicode
/
Return info about the corresponding frozen module (if there is one) or None.
The returned info (a 2-tuple):
* data the raw marshalled bytes
* is_package whether or not it is a package
[clinic start generated code]*/
static PyObject *
_imp_find_frozen_impl(PyObject *module, PyObject *name)
/*[clinic end generated code: output=3fd17da90d417e4e input=4e52b3ac95f6d7ab]*/
{
struct frozen_info info;
frozen_status status = find_frozen(name, &info);
if (status == FROZEN_NOT_FOUND || status == FROZEN_DISABLED) {
Py_RETURN_NONE;
}
else if (status == FROZEN_BAD_NAME) {
Py_RETURN_NONE;
}
else if (status != FROZEN_OKAY) {
set_frozen_error(status, name);
return NULL;
}
PyObject *data = PyBytes_FromStringAndSize(info.data, info.size);
if (data == NULL) {
return NULL;
}
PyObject *result = PyTuple_Pack(2, data,
info.is_package ? Py_True : Py_False);
Py_DECREF(data);
return result;
}
/*[clinic input]
_imp.get_frozen_object
name: unicode
data as dataobj: object = None
/
Create a code object for a frozen module.
[clinic start generated code]*/
static PyObject *
_imp_get_frozen_object_impl(PyObject *module, PyObject *name)
/*[clinic end generated code: output=2568cc5b7aa0da63 input=ed689bc05358fdbd]*/
_imp_get_frozen_object_impl(PyObject *module, PyObject *name,
PyObject *dataobj)
/*[clinic end generated code: output=54368a673a35e745 input=034bdb88f6460b7b]*/
{
return get_frozen_object(name);
struct frozen_info info;
if (PyBytes_Check(dataobj)) {
info.nameobj = name;
info.data = PyBytes_AS_STRING(dataobj);
info.size = PyBytes_Size(dataobj);
if (info.size == 0) {
set_frozen_error(FROZEN_INVALID, name);
return NULL;
}
}
else if (dataobj != Py_None) {
_PyArg_BadArgument("get_frozen_object", "argument 2", "bytes", dataobj);
return NULL;
}
else {
frozen_status status = find_frozen(name, &info);
if (status != FROZEN_OKAY) {
set_frozen_error(status, name);
return NULL;
}
}
return unmarshal_frozen_code(&info);
}
/*[clinic input]
@ -1995,7 +2094,13 @@ static PyObject *
_imp_is_frozen_package_impl(PyObject *module, PyObject *name)
/*[clinic end generated code: output=e70cbdb45784a1c9 input=81b6cdecd080fbb8]*/
{
return is_frozen_package(name);
struct frozen_info info;
frozen_status status = find_frozen(name, &info);
if (status != FROZEN_OKAY && status != FROZEN_EXCLUDED) {
set_frozen_error(status, name);
return NULL;
}
return PyBool_FromLong(info.is_package);
}
/*[clinic input]
@ -2027,10 +2132,12 @@ static PyObject *
_imp_is_frozen_impl(PyObject *module, PyObject *name)
/*[clinic end generated code: output=01f408f5ec0f2577 input=7301dbca1897d66b]*/
{
const struct _frozen *p;
p = find_frozen(name);
return PyBool_FromLong((long) (p == NULL ? 0 : p->size));
struct frozen_info info;
frozen_status status = find_frozen(name, &info);
if (status != FROZEN_OKAY) {
Py_RETURN_FALSE;
}
Py_RETURN_TRUE;
}
/*[clinic input]
@ -2221,6 +2328,7 @@ static PyMethodDef imp_methods[] = {
_IMP_LOCK_HELD_METHODDEF
_IMP_ACQUIRE_LOCK_METHODDEF
_IMP_RELEASE_LOCK_METHODDEF
_IMP_FIND_FROZEN_METHODDEF
_IMP_GET_FROZEN_OBJECT_METHODDEF
_IMP_IS_FROZEN_PACKAGE_METHODDEF
_IMP_CREATE_BUILTIN_METHODDEF