From c3d9ac8b340fcbf54cee865737e67f11fcd70ed3 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 5 Oct 2021 10:01:27 -0600 Subject: [PATCH] bpo-45324: Capture data in FrozenImporter.find_spec() to use in exec_module(). (gh-28633) Before this change we end up duplicating effort and throwing away data in FrozenImporter.find_spec(). Now we do the work once in find_spec() and the only thing we do in FrozenImporter.exec_module() is turn the raw frozen data into a code object and then exec it. We've added _imp.find_frozen(), add an arg to _imp.get_frozen_object(), and updated FrozenImporter. We've also moved some code around to reduce duplication, get a little more consistency in outcomes, and be more efficient. Note that this change is mostly necessary if we want to set __file__ on frozen stdlib modules. (See https://bugs.python.org/issue21736.) https://bugs.python.org/issue45324 --- Lib/importlib/_bootstrap.py | 32 ++- Lib/test/test_importlib/frozen/test_finder.py | 76 +++-- Lib/test/test_importlib/frozen/test_loader.py | 7 + .../2021-09-29-12-02-39.bpo-45324.BTQElX.rst | 3 + Python/clinic/import.c.h | 67 ++++- Python/import.c | 266 ++++++++++++------ 6 files changed, 331 insertions(+), 120 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2021-09-29-12-02-39.bpo-45324.BTQElX.rst diff --git a/Lib/importlib/_bootstrap.py b/Lib/importlib/_bootstrap.py index e64f7ad1517..5807577c74b 100644 --- a/Lib/importlib/_bootstrap.py +++ b/Lib/importlib/_bootstrap.py @@ -826,10 +826,15 @@ class FrozenImporter: @classmethod def find_spec(cls, fullname, path=None, target=None): - if _imp.is_frozen(fullname): - return spec_from_loader(fullname, cls, origin=cls._ORIGIN) - else: + info = _call_with_frames_removed(_imp.find_frozen, fullname) + if info is None: return None + data, ispkg = info + spec = spec_from_loader(fullname, cls, + origin=cls._ORIGIN, + is_package=ispkg) + spec.loader_state = data + return spec @classmethod def find_module(cls, fullname, path=None): @@ -849,11 +854,22 @@ class FrozenImporter: @staticmethod def exec_module(module): - name = module.__spec__.name - if not _imp.is_frozen(name): - raise ImportError('{!r} is not a frozen module'.format(name), - name=name) - code = _call_with_frames_removed(_imp.get_frozen_object, name) + spec = module.__spec__ + name = spec.name + try: + data = spec.loader_state + except AttributeError: + if not _imp.is_frozen(name): + raise ImportError('{!r} is not a frozen module'.format(name), + name=name) + data = None + else: + # We clear the extra data we got from the finder, to save memory. + # Note that if this method is called again (e.g. by + # importlib.reload()) then _imp.get_frozen_object() will notice + # no data was provided and will look it up. + spec.loader_state = None + code = _call_with_frames_removed(_imp.get_frozen_object, name, data) exec(code, module.__dict__) @classmethod diff --git a/Lib/test/test_importlib/frozen/test_finder.py b/Lib/test/test_importlib/frozen/test_finder.py index 7d43fb0ff3e..23d1bf7fb77 100644 --- a/Lib/test/test_importlib/frozen/test_finder.py +++ b/Lib/test/test_importlib/frozen/test_finder.py @@ -1,13 +1,15 @@ from .. import abc -import os.path from .. import util machinery = util.import_importlib('importlib.machinery') +import _imp +import marshal +import os.path import unittest import warnings -from test.support import import_helper +from test.support import import_helper, REPO_ROOT class FindSpecTests(abc.FinderTests): @@ -19,39 +21,67 @@ class FindSpecTests(abc.FinderTests): with import_helper.frozen_modules(): return finder.find_spec(name, **kwargs) - def check(self, spec, name): + def check_basic(self, spec, name, ispkg=False): self.assertEqual(spec.name, name) self.assertIs(spec.loader, self.machinery.FrozenImporter) self.assertEqual(spec.origin, 'frozen') self.assertFalse(spec.has_location) + if ispkg: + self.assertIsNotNone(spec.submodule_search_locations) + else: + self.assertIsNone(spec.submodule_search_locations) + self.assertIsNotNone(spec.loader_state) + + def check_search_location(self, spec, source=None): + # Frozen packages do not have any path entries. + # (See https://bugs.python.org/issue21736.) + expected = [] + self.assertListEqual(spec.submodule_search_locations, expected) + + def check_data(self, spec, source=None, ispkg=None): + with import_helper.frozen_modules(): + expected = _imp.get_frozen_object(spec.name) + data = spec.loader_state + # We can't compare the marshaled data directly because + # marshal.dumps() would mark "expected" as a ref, which slightly + # changes the output. (See https://bugs.python.org/issue34093.) + code = marshal.loads(data) + self.assertEqual(code, expected) def test_module(self): - names = [ - '__hello__', - '__hello_alias__', - '__hello_only__', - '__phello__.__init__', - '__phello__.spam', - '__phello__.ham.__init__', - '__phello__.ham.eggs', - ] - for name in names: + modules = { + '__hello__': None, + '__phello__.__init__': None, + '__phello__.spam': None, + '__phello__.ham.__init__': None, + '__phello__.ham.eggs': None, + '__hello_alias__': '__hello__', + } + for name, source in modules.items(): with self.subTest(name): spec = self.find(name) - self.check(spec, name) - self.assertEqual(spec.submodule_search_locations, None) + self.check_basic(spec, name) + self.check_data(spec, source) def test_package(self): - names = [ - '__phello__', - '__phello__.ham', - '__phello_alias__', - ] - for name in names: + modules = { + '__phello__': None, + '__phello__.ham': None, + '__phello_alias__': '__hello__', + } + for name, source in modules.items(): with self.subTest(name): spec = self.find(name) - self.check(spec, name) - self.assertEqual(spec.submodule_search_locations, []) + self.check_basic(spec, name, ispkg=True) + self.check_search_location(spec, source) + self.check_data(spec, source, ispkg=True) + + def test_frozen_only(self): + name = '__hello_only__' + source = os.path.join(REPO_ROOT, 'Tools', 'freeze', 'flag.py') + spec = self.find(name) + self.check_basic(spec, name) + self.check_data(spec, source) # These are covered by test_module() and test_package(). test_module_in_package = None diff --git a/Lib/test/test_importlib/frozen/test_loader.py b/Lib/test/test_importlib/frozen/test_loader.py index cfa5e5bb31b..992dcef05bc 100644 --- a/Lib/test/test_importlib/frozen/test_loader.py +++ b/Lib/test/test_importlib/frozen/test_loader.py @@ -4,7 +4,9 @@ from .. import util machinery = util.import_importlib('importlib.machinery') from test.support import captured_stdout, import_helper +import _imp import contextlib +import marshal import types import unittest import warnings @@ -33,11 +35,14 @@ class ExecModuleTests(abc.LoaderTests): def exec_module(self, name): with import_helper.frozen_modules(): is_package = self.machinery.FrozenImporter.is_package(name) + code = _imp.get_frozen_object(name) + data = marshal.dumps(code) spec = self.machinery.ModuleSpec( name, self.machinery.FrozenImporter, origin='frozen', is_package=is_package, + loader_state=data, ) module = types.ModuleType(name) module.__spec__ = spec @@ -61,6 +66,7 @@ class ExecModuleTests(abc.LoaderTests): self.assertEqual(getattr(module, attr), value) self.assertEqual(output, 'Hello world!\n') self.assertTrue(hasattr(module, '__spec__')) + self.assertIsNone(module.__spec__.loader_state) def test_package(self): name = '__phello__' @@ -73,6 +79,7 @@ class ExecModuleTests(abc.LoaderTests): name=name, attr=attr, given=attr_value, expected=value)) self.assertEqual(output, 'Hello world!\n') + self.assertIsNone(module.__spec__.loader_state) def test_lacking_parent(self): name = '__phello__.spam' diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-09-29-12-02-39.bpo-45324.BTQElX.rst b/Misc/NEWS.d/next/Core and Builtins/2021-09-29-12-02-39.bpo-45324.BTQElX.rst new file mode 100644 index 00000000000..7b16847bb73 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2021-09-29-12-02-39.bpo-45324.BTQElX.rst @@ -0,0 +1,3 @@ +In FrozenImporter.find_spec(), we now preserve the information needed in +exec_module() to load the module. This change mostly impacts internal +details, rather than changing the importer's behavior. diff --git a/Python/clinic/import.c.h b/Python/clinic/import.c.h index 438a348fa09..09738834195 100644 --- a/Python/clinic/import.c.h +++ b/Python/clinic/import.c.h @@ -169,33 +169,80 @@ exit: return return_value; } -PyDoc_STRVAR(_imp_get_frozen_object__doc__, -"get_frozen_object($module, name, /)\n" +PyDoc_STRVAR(_imp_find_frozen__doc__, +"find_frozen($module, name, /)\n" "--\n" "\n" -"Create a code object for a frozen module."); +"Return info about the corresponding frozen module (if there is one) or None.\n" +"\n" +"The returned info (a 2-tuple):\n" +"\n" +" * data the raw marshalled bytes\n" +" * is_package whether or not it is a package"); -#define _IMP_GET_FROZEN_OBJECT_METHODDEF \ - {"get_frozen_object", (PyCFunction)_imp_get_frozen_object, METH_O, _imp_get_frozen_object__doc__}, +#define _IMP_FIND_FROZEN_METHODDEF \ + {"find_frozen", (PyCFunction)_imp_find_frozen, METH_O, _imp_find_frozen__doc__}, static PyObject * -_imp_get_frozen_object_impl(PyObject *module, PyObject *name); +_imp_find_frozen_impl(PyObject *module, PyObject *name); static PyObject * -_imp_get_frozen_object(PyObject *module, PyObject *arg) +_imp_find_frozen(PyObject *module, PyObject *arg) { PyObject *return_value = NULL; PyObject *name; if (!PyUnicode_Check(arg)) { - _PyArg_BadArgument("get_frozen_object", "argument", "str", arg); + _PyArg_BadArgument("find_frozen", "argument", "str", arg); goto exit; } if (PyUnicode_READY(arg) == -1) { goto exit; } name = arg; - return_value = _imp_get_frozen_object_impl(module, name); + return_value = _imp_find_frozen_impl(module, name); + +exit: + return return_value; +} + +PyDoc_STRVAR(_imp_get_frozen_object__doc__, +"get_frozen_object($module, name, data=None, /)\n" +"--\n" +"\n" +"Create a code object for a frozen module."); + +#define _IMP_GET_FROZEN_OBJECT_METHODDEF \ + {"get_frozen_object", (PyCFunction)(void(*)(void))_imp_get_frozen_object, METH_FASTCALL, _imp_get_frozen_object__doc__}, + +static PyObject * +_imp_get_frozen_object_impl(PyObject *module, PyObject *name, + PyObject *dataobj); + +static PyObject * +_imp_get_frozen_object(PyObject *module, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + PyObject *name; + PyObject *dataobj = Py_None; + + if (!_PyArg_CheckPositional("get_frozen_object", nargs, 1, 2)) { + goto exit; + } + if (!PyUnicode_Check(args[0])) { + _PyArg_BadArgument("get_frozen_object", "argument 1", "str", args[0]); + goto exit; + } + if (PyUnicode_READY(args[0]) == -1) { + goto exit; + } + name = args[0]; + if (nargs < 2) { + goto skip_optional; + } + dataobj = args[1]; +skip_optional: + return_value = _imp_get_frozen_object_impl(module, name, dataobj); exit: return return_value; @@ -498,4 +545,4 @@ exit: #ifndef _IMP_EXEC_DYNAMIC_METHODDEF #define _IMP_EXEC_DYNAMIC_METHODDEF #endif /* !defined(_IMP_EXEC_DYNAMIC_METHODDEF) */ -/*[clinic end generated code: output=96038c277119d6e3 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=a31e1c00653359ff input=a9049054013a1b77]*/ diff --git a/Python/import.c b/Python/import.c index d7f12678419..22cefdf08b4 100644 --- a/Python/import.c +++ b/Python/import.c @@ -887,10 +887,6 @@ _imp__fix_co_filename_impl(PyObject *module, PyCodeObject *code, } -/* Forward */ -static const struct _frozen * find_frozen(PyObject *); - - /* Helper to test for built-in module */ static int @@ -1119,75 +1115,125 @@ list_frozen_module_names() return names; } -static const struct _frozen * -find_frozen(PyObject *modname) +typedef enum { + FROZEN_OKAY, + FROZEN_BAD_NAME, // The given module name wasn't valid. + FROZEN_NOT_FOUND, // It wasn't in PyImport_FrozenModules. + FROZEN_DISABLED, // -X frozen_modules=off (and not essential) + FROZEN_EXCLUDED, // The PyImport_FrozenModules entry has NULL "code". + FROZEN_INVALID, // The PyImport_FrozenModules entry is bogus. +} frozen_status; + +static inline void +set_frozen_error(frozen_status status, PyObject *modname) { - if (modname == NULL) { - return NULL; + const char *err = NULL; + switch (status) { + case FROZEN_BAD_NAME: + case FROZEN_NOT_FOUND: + case FROZEN_DISABLED: + err = "No such frozen object named %R"; + break; + case FROZEN_EXCLUDED: + err = "Excluded frozen object named %R"; + break; + case FROZEN_INVALID: + err = "Frozen object named %R is invalid"; + break; + case FROZEN_OKAY: + // There was no error. + break; + default: + Py_UNREACHABLE(); } - const char *name = PyUnicode_AsUTF8(modname); + if (err != NULL) { + PyObject *msg = PyUnicode_FromFormat(err, modname); + if (msg == NULL) { + PyErr_Clear(); + } + PyErr_SetImportError(msg, modname, NULL); + Py_XDECREF(msg); + } +} + +struct frozen_info { + PyObject *nameobj; + const char *data; + Py_ssize_t size; + bool is_package; +}; + +static frozen_status +find_frozen(PyObject *nameobj, struct frozen_info *info) +{ + if (info != NULL) { + info->nameobj = NULL; + info->data = NULL; + info->size = 0; + info->is_package = false; + } + + if (nameobj == NULL || nameobj == Py_None) { + return FROZEN_BAD_NAME; + } + const char *name = PyUnicode_AsUTF8(nameobj); if (name == NULL) { + // Note that this function previously used + // _PyUnicode_EqualToASCIIString(). We clear the error here + // (instead of propagating it) to match the earlier behavior + // more closely. PyErr_Clear(); - return NULL; + return FROZEN_BAD_NAME; } + if (!use_frozen() && !is_essential_frozen_module(name)) { - return NULL; + return FROZEN_DISABLED; } + const struct _frozen *p; for (p = PyImport_FrozenModules; ; p++) { if (p->name == NULL) { - return NULL; + // We hit the end-of-list sentinel value. + return FROZEN_NOT_FOUND; } if (strcmp(name, p->name) == 0) { break; } } - return p; -} - -static PyObject * -get_frozen_object(PyObject *name) -{ - const struct _frozen *p = find_frozen(name); - int size; - - if (p == NULL) { - PyErr_Format(PyExc_ImportError, - "No such frozen object named %R", - name); - return NULL; + if (info != NULL) { + info->nameobj = nameobj; // borrowed + info->data = (const char *)p->code; + info->size = p->size < 0 ? -(p->size) : p->size; + info->is_package = p->size < 0 ? true : false; } + if (p->code == NULL) { - PyErr_Format(PyExc_ImportError, - "Excluded frozen object named %R", - name); - return NULL; + /* It is frozen but marked as un-importable. */ + return FROZEN_EXCLUDED; } - size = p->size; - if (size < 0) - size = -size; - return PyMarshal_ReadObjectFromString((const char *)p->code, size); + if (p->code[0] == '\0' || p->size == 0) { + return FROZEN_INVALID; + } + return FROZEN_OKAY; } static PyObject * -is_frozen_package(PyObject *name) +unmarshal_frozen_code(struct frozen_info *info) { - const struct _frozen *p = find_frozen(name); - int size; - - if (p == NULL) { - PyErr_Format(PyExc_ImportError, - "No such frozen object named %R", - name); + PyObject *co = PyMarshal_ReadObjectFromString(info->data, info->size); + if (co == NULL) { + set_frozen_error(FROZEN_INVALID, info->nameobj); return NULL; } - - size = p->size; - - if (size < 0) - Py_RETURN_TRUE; - else - Py_RETURN_FALSE; + if (!PyCode_Check(co)) { + // We stick with TypeError for backward compatibility. + PyErr_Format(PyExc_TypeError, + "frozen object %R is not a code object", + info->nameobj); + Py_DECREF(co); + return NULL; + } + return co; } @@ -1200,35 +1246,25 @@ int PyImport_ImportFrozenModuleObject(PyObject *name) { PyThreadState *tstate = _PyThreadState_GET(); - const struct _frozen *p; PyObject *co, *m, *d; - int ispackage; - int size; - p = find_frozen(name); - - if (p == NULL) + struct frozen_info info; + frozen_status status = find_frozen(name, &info); + if (status == FROZEN_NOT_FOUND || status == FROZEN_DISABLED) { return 0; - if (p->code == NULL) { - _PyErr_Format(tstate, PyExc_ImportError, - "Excluded frozen object named %R", - name); + } + else if (status == FROZEN_BAD_NAME) { + return 0; + } + else if (status != FROZEN_OKAY) { + set_frozen_error(status, name); return -1; } - size = p->size; - ispackage = (size < 0); - if (ispackage) - size = -size; - co = PyMarshal_ReadObjectFromString((const char *)p->code, size); - if (co == NULL) + co = unmarshal_frozen_code(&info); + if (co == NULL) { return -1; - if (!PyCode_Check(co)) { - _PyErr_Format(tstate, PyExc_TypeError, - "frozen object %R is not a code object", - name); - goto err_return; } - if (ispackage) { + if (info.is_package) { /* Set __path__ to the empty list */ PyObject *l; int err; @@ -1966,20 +2002,83 @@ _imp_init_frozen_impl(PyObject *module, PyObject *name) return import_add_module(tstate, name); } +/*[clinic input] +_imp.find_frozen + + name: unicode + / + +Return info about the corresponding frozen module (if there is one) or None. + +The returned info (a 2-tuple): + + * data the raw marshalled bytes + * is_package whether or not it is a package +[clinic start generated code]*/ + +static PyObject * +_imp_find_frozen_impl(PyObject *module, PyObject *name) +/*[clinic end generated code: output=3fd17da90d417e4e input=4e52b3ac95f6d7ab]*/ +{ + struct frozen_info info; + frozen_status status = find_frozen(name, &info); + if (status == FROZEN_NOT_FOUND || status == FROZEN_DISABLED) { + Py_RETURN_NONE; + } + else if (status == FROZEN_BAD_NAME) { + Py_RETURN_NONE; + } + else if (status != FROZEN_OKAY) { + set_frozen_error(status, name); + return NULL; + } + PyObject *data = PyBytes_FromStringAndSize(info.data, info.size); + if (data == NULL) { + return NULL; + } + PyObject *result = PyTuple_Pack(2, data, + info.is_package ? Py_True : Py_False); + Py_DECREF(data); + return result; +} + /*[clinic input] _imp.get_frozen_object name: unicode + data as dataobj: object = None / Create a code object for a frozen module. [clinic start generated code]*/ static PyObject * -_imp_get_frozen_object_impl(PyObject *module, PyObject *name) -/*[clinic end generated code: output=2568cc5b7aa0da63 input=ed689bc05358fdbd]*/ +_imp_get_frozen_object_impl(PyObject *module, PyObject *name, + PyObject *dataobj) +/*[clinic end generated code: output=54368a673a35e745 input=034bdb88f6460b7b]*/ { - return get_frozen_object(name); + struct frozen_info info; + if (PyBytes_Check(dataobj)) { + info.nameobj = name; + info.data = PyBytes_AS_STRING(dataobj); + info.size = PyBytes_Size(dataobj); + if (info.size == 0) { + set_frozen_error(FROZEN_INVALID, name); + return NULL; + } + } + else if (dataobj != Py_None) { + _PyArg_BadArgument("get_frozen_object", "argument 2", "bytes", dataobj); + return NULL; + } + else { + frozen_status status = find_frozen(name, &info); + if (status != FROZEN_OKAY) { + set_frozen_error(status, name); + return NULL; + } + } + return unmarshal_frozen_code(&info); } /*[clinic input] @@ -1995,7 +2094,13 @@ static PyObject * _imp_is_frozen_package_impl(PyObject *module, PyObject *name) /*[clinic end generated code: output=e70cbdb45784a1c9 input=81b6cdecd080fbb8]*/ { - return is_frozen_package(name); + struct frozen_info info; + frozen_status status = find_frozen(name, &info); + if (status != FROZEN_OKAY && status != FROZEN_EXCLUDED) { + set_frozen_error(status, name); + return NULL; + } + return PyBool_FromLong(info.is_package); } /*[clinic input] @@ -2027,10 +2132,12 @@ static PyObject * _imp_is_frozen_impl(PyObject *module, PyObject *name) /*[clinic end generated code: output=01f408f5ec0f2577 input=7301dbca1897d66b]*/ { - const struct _frozen *p; - - p = find_frozen(name); - return PyBool_FromLong((long) (p == NULL ? 0 : p->size)); + struct frozen_info info; + frozen_status status = find_frozen(name, &info); + if (status != FROZEN_OKAY) { + Py_RETURN_FALSE; + } + Py_RETURN_TRUE; } /*[clinic input] @@ -2221,6 +2328,7 @@ static PyMethodDef imp_methods[] = { _IMP_LOCK_HELD_METHODDEF _IMP_ACQUIRE_LOCK_METHODDEF _IMP_RELEASE_LOCK_METHODDEF + _IMP_FIND_FROZEN_METHODDEF _IMP_GET_FROZEN_OBJECT_METHODDEF _IMP_IS_FROZEN_PACKAGE_METHODDEF _IMP_CREATE_BUILTIN_METHODDEF