gh-101758: Add a Test For Single-Phase Init Module Variants (gh-101891)

The new test exercises the most important variants for single-phase init extension modules. We also add some explanation about those variants to import.c.

https://github.com/python/cpython/issues/101758
This commit is contained in:
Eric Snow 2023-02-14 14:26:03 -07:00 committed by GitHub
parent 81e3aa835c
commit 096d0097a0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 665 additions and 43 deletions

View File

@ -251,6 +251,205 @@ class ImportTests(unittest.TestCase):
with self.assertRaises(ImportError): with self.assertRaises(ImportError):
imp.load_dynamic('nonexistent', pathname) imp.load_dynamic('nonexistent', pathname)
@requires_load_dynamic
def test_singlephase_variants(self):
'''Exercise the most meaningful variants described in Python/import.c.'''
self.maxDiff = None
basename = '_testsinglephase'
fileobj, pathname, _ = imp.find_module(basename)
fileobj.close()
modules = {}
def load(name):
assert name not in modules
module = imp.load_dynamic(name, pathname)
self.assertNotIn(module, modules.values())
modules[name] = module
return module
def re_load(name, module):
assert sys.modules[name] is module
before = type(module)(module.__name__)
before.__dict__.update(vars(module))
reloaded = imp.load_dynamic(name, pathname)
return before, reloaded
def check_common(name, module):
summed = module.sum(1, 2)
lookedup = module.look_up_self()
initialized = module.initialized()
cached = sys.modules[name]
# module.__name__ might not match, but the spec will.
self.assertEqual(module.__spec__.name, name)
if initialized is not None:
self.assertIsInstance(initialized, float)
self.assertGreater(initialized, 0)
self.assertEqual(summed, 3)
self.assertTrue(issubclass(module.error, Exception))
self.assertEqual(module.int_const, 1969)
self.assertEqual(module.str_const, 'something different')
self.assertIs(cached, module)
return lookedup, initialized, cached
def check_direct(name, module, lookedup):
# The module has its own PyModuleDef, with a matching name.
self.assertEqual(module.__name__, name)
self.assertIs(lookedup, module)
def check_indirect(name, module, lookedup, orig):
# The module re-uses another's PyModuleDef, with a different name.
assert orig is not module
assert orig.__name__ != name
self.assertNotEqual(module.__name__, name)
self.assertIs(lookedup, module)
def check_basic(module, initialized):
init_count = module.initialized_count()
self.assertIsNot(initialized, None)
self.assertIsInstance(init_count, int)
self.assertGreater(init_count, 0)
return init_count
def check_common_reloaded(name, module, cached, before, reloaded):
recached = sys.modules[name]
self.assertEqual(reloaded.__spec__.name, name)
self.assertEqual(reloaded.__name__, before.__name__)
self.assertEqual(before.__dict__, module.__dict__)
self.assertIs(recached, reloaded)
def check_basic_reloaded(module, lookedup, initialized, init_count,
before, reloaded):
relookedup = reloaded.look_up_self()
reinitialized = reloaded.initialized()
reinit_count = reloaded.initialized_count()
self.assertIs(reloaded, module)
self.assertIs(reloaded.__dict__, module.__dict__)
# It only happens to be the same but that's good enough here.
# We really just want to verify that the re-loaded attrs
# didn't change.
self.assertIs(relookedup, lookedup)
self.assertEqual(reinitialized, initialized)
self.assertEqual(reinit_count, init_count)
def check_with_reinit_reloaded(module, lookedup, initialized,
before, reloaded):
relookedup = reloaded.look_up_self()
reinitialized = reloaded.initialized()
self.assertIsNot(reloaded, module)
self.assertIsNot(reloaded, module)
self.assertNotEqual(reloaded.__dict__, module.__dict__)
self.assertIs(relookedup, reloaded)
if initialized is None:
self.assertIs(reinitialized, None)
else:
self.assertGreater(reinitialized, initialized)
# Check the "basic" module.
name = basename
expected_init_count = 1
with self.subTest(name):
mod = load(name)
lookedup, initialized, cached = check_common(name, mod)
check_direct(name, mod, lookedup)
init_count = check_basic(mod, initialized)
self.assertEqual(init_count, expected_init_count)
before, reloaded = re_load(name, mod)
check_common_reloaded(name, mod, cached, before, reloaded)
check_basic_reloaded(mod, lookedup, initialized, init_count,
before, reloaded)
basic = mod
# Check its indirect variants.
name = f'{basename}_basic_wrapper'
expected_init_count += 1
with self.subTest(name):
mod = load(name)
lookedup, initialized, cached = check_common(name, mod)
check_indirect(name, mod, lookedup, basic)
init_count = check_basic(mod, initialized)
self.assertEqual(init_count, expected_init_count)
before, reloaded = re_load(name, mod)
check_common_reloaded(name, mod, cached, before, reloaded)
check_basic_reloaded(mod, lookedup, initialized, init_count,
before, reloaded)
# Currently _PyState_AddModule() always replaces the cached module.
self.assertIs(basic.look_up_self(), mod)
self.assertEqual(basic.initialized_count(), expected_init_count)
# The cached module shouldn't be changed after this point.
basic_lookedup = mod
# Check its direct variant.
name = f'{basename}_basic_copy'
expected_init_count += 1
with self.subTest(name):
mod = load(name)
lookedup, initialized, cached = check_common(name, mod)
check_direct(name, mod, lookedup)
init_count = check_basic(mod, initialized)
self.assertEqual(init_count, expected_init_count)
before, reloaded = re_load(name, mod)
check_common_reloaded(name, mod, cached, before, reloaded)
check_basic_reloaded(mod, lookedup, initialized, init_count,
before, reloaded)
# This should change the cached module for _testsinglephase.
self.assertIs(basic.look_up_self(), basic_lookedup)
self.assertEqual(basic.initialized_count(), expected_init_count)
# Check the non-basic variant that has no state.
name = f'{basename}_with_reinit'
with self.subTest(name):
mod = load(name)
lookedup, initialized, cached = check_common(name, mod)
self.assertIs(initialized, None)
check_direct(name, mod, lookedup)
before, reloaded = re_load(name, mod)
check_common_reloaded(name, mod, cached, before, reloaded)
check_with_reinit_reloaded(mod, lookedup, initialized,
before, reloaded)
# This should change the cached module for _testsinglephase.
self.assertIs(basic.look_up_self(), basic_lookedup)
self.assertEqual(basic.initialized_count(), expected_init_count)
# Check the basic variant that has state.
name = f'{basename}_with_state'
with self.subTest(name):
mod = load(name)
lookedup, initialized, cached = check_common(name, mod)
self.assertIsNot(initialized, None)
check_direct(name, mod, lookedup)
before, reloaded = re_load(name, mod)
check_common_reloaded(name, mod, cached, before, reloaded)
check_with_reinit_reloaded(mod, lookedup, initialized,
before, reloaded)
# This should change the cached module for _testsinglephase.
self.assertIs(basic.look_up_self(), basic_lookedup)
self.assertEqual(basic.initialized_count(), expected_init_count)
@requires_load_dynamic @requires_load_dynamic
def test_load_dynamic_ImportError_path(self): def test_load_dynamic_ImportError_path(self):
# Issue #1559549 added `name` and `path` attributes to ImportError # Issue #1559549 added `name` and `path` attributes to ImportError

View File

@ -5,74 +5,412 @@
# define Py_BUILD_CORE_MODULE 1 # define Py_BUILD_CORE_MODULE 1
#endif #endif
//#include <time.h>
#include "Python.h" #include "Python.h"
#include "pycore_namespace.h" // _PyNamespace_New() #include "pycore_namespace.h" // _PyNamespace_New()
typedef struct {
_PyTime_t initialized;
PyObject *error;
PyObject *int_const;
PyObject *str_const;
} module_state;
/* Process-global state is only used by _testsinglephase
since it's the only one that does not support re-init. */
static struct {
int initialized_count;
module_state module;
} global_state = { .initialized_count = -1 };
static inline module_state *
get_module_state(PyObject *module)
{
PyModuleDef *def = PyModule_GetDef(module);
if (def->m_size == -1) {
return &global_state.module;
}
else if (def->m_size == 0) {
return NULL;
}
else {
module_state *state = (module_state*)PyModule_GetState(module);
assert(state != NULL);
return state;
}
}
static void
clear_state(module_state *state)
{
state->initialized = 0;
Py_CLEAR(state->error);
Py_CLEAR(state->int_const);
Py_CLEAR(state->str_const);
}
static int
_set_initialized(_PyTime_t *initialized)
{
/* We go strictly monotonic to ensure each time is unique. */
_PyTime_t prev;
if (_PyTime_GetMonotonicClockWithInfo(&prev, NULL) != 0) {
return -1;
}
/* We do a busy sleep since the interval should be super short. */
_PyTime_t t;
do {
if (_PyTime_GetMonotonicClockWithInfo(&t, NULL) != 0) {
return -1;
}
} while (t == prev);
*initialized = t;
return 0;
}
static int
init_state(module_state *state)
{
assert(state->initialized == 0 &&
state->error == NULL &&
state->int_const == NULL &&
state->str_const == NULL);
if (_set_initialized(&state->initialized) != 0) {
goto error;
}
assert(state->initialized > 0);
/* Add an exception type */
state->error = PyErr_NewException("_testsinglephase.error", NULL, NULL);
if (state->error == NULL) {
goto error;
}
state->int_const = PyLong_FromLong(1969);
if (state->int_const == NULL) {
goto error;
}
state->str_const = PyUnicode_FromString("something different");
if (state->str_const == NULL) {
goto error;
}
return 0;
error:
clear_state(state);
return -1;
}
static int
init_module(PyObject *module, module_state *state)
{
if (PyModule_AddObjectRef(module, "error", state->error) != 0) {
return -1;
}
if (PyModule_AddObjectRef(module, "int_const", state->int_const) != 0) {
return -1;
}
if (PyModule_AddObjectRef(module, "str_const", state->str_const) != 0) {
return -1;
}
return 0;
}
PyDoc_STRVAR(common_initialized_doc,
"initialized()\n\
\n\
Return the seconds-since-epoch when the module was initialized.");
static PyObject *
common_initialized(PyObject *self, PyObject *Py_UNUSED(ignored))
{
module_state *state = get_module_state(self);
if (state == NULL) {
Py_RETURN_NONE;
}
double d = _PyTime_AsSecondsDouble(state->initialized);
return PyFloat_FromDouble(d);
}
#define INITIALIZED_METHODDEF \
{"initialized", common_initialized, METH_NOARGS, \
common_initialized_doc}
PyDoc_STRVAR(common_look_up_self_doc,
"look_up_self()\n\
\n\
Return the module associated with this module's def.m_base.m_index.");
static PyObject *
common_look_up_self(PyObject *self, PyObject *Py_UNUSED(ignored))
{
PyModuleDef *def = PyModule_GetDef(self);
if (def == NULL) {
return NULL;
}
return Py_NewRef(
PyState_FindModule(def));
}
#define LOOK_UP_SELF_METHODDEF \
{"look_up_self", common_look_up_self, METH_NOARGS, common_look_up_self_doc}
/* Function of two integers returning integer */ /* Function of two integers returning integer */
PyDoc_STRVAR(testexport_foo_doc, PyDoc_STRVAR(common_sum_doc,
"foo(i,j)\n\ "sum(i,j)\n\
\n\ \n\
Return the sum of i and j."); Return the sum of i and j.");
static PyObject * static PyObject *
testexport_foo(PyObject *self, PyObject *args) common_sum(PyObject *self, PyObject *args)
{ {
long i, j; long i, j;
long res; long res;
if (!PyArg_ParseTuple(args, "ll:foo", &i, &j)) if (!PyArg_ParseTuple(args, "ll:sum", &i, &j))
return NULL; return NULL;
res = i + j; res = i + j;
return PyLong_FromLong(res); return PyLong_FromLong(res);
} }
#define SUM_METHODDEF \
{"sum", common_sum, METH_VARARGS, common_sum_doc}
static PyMethodDef TestMethods[] = {
{"foo", testexport_foo, METH_VARARGS, PyDoc_STRVAR(basic_initialized_count_doc,
testexport_foo_doc}, "initialized_count()\n\
{NULL, NULL} /* sentinel */ \n\
Return how many times the module has been initialized.");
static PyObject *
basic_initialized_count(PyObject *self, PyObject *Py_UNUSED(ignored))
{
assert(PyModule_GetDef(self)->m_size == -1);
return PyLong_FromLong(global_state.initialized_count);
}
#define INITIALIZED_COUNT_METHODDEF \
{"initialized_count", basic_initialized_count, METH_VARARGS, \
basic_initialized_count_doc}
/*********************************************/
/* the _testsinglephase module (and aliases) */
/*********************************************/
/* This ia more typical of legacy extensions in the wild:
- single-phase init
- no module state
- does not support repeated initialization
(so m_copy is used)
- the module def is cached in _PyRuntime.extensions
(by name/filename)
Also note that, because the module has single-phase init,
it is cached in interp->module_by_index (using mod->md_def->m_base.m_index).
*/
static PyMethodDef TestMethods_Basic[] = {
LOOK_UP_SELF_METHODDEF,
SUM_METHODDEF,
INITIALIZED_METHODDEF,
INITIALIZED_COUNT_METHODDEF,
{NULL, NULL} /* sentinel */
}; };
static struct PyModuleDef _testsinglephase_basic = {
static struct PyModuleDef _testsinglephase = {
PyModuleDef_HEAD_INIT, PyModuleDef_HEAD_INIT,
.m_name = "_testsinglephase", .m_name = "_testsinglephase",
.m_doc = PyDoc_STR("Test module _testsinglephase (main)"), .m_doc = PyDoc_STR("Test module _testsinglephase"),
.m_size = -1, // no module state .m_size = -1, // no module state
.m_methods = TestMethods, .m_methods = TestMethods_Basic,
}; };
static PyObject *
PyMODINIT_FUNC init__testsinglephase_basic(PyModuleDef *def)
PyInit__testsinglephase(void)
{ {
PyObject *module = PyModule_Create(&_testsinglephase); if (global_state.initialized_count == -1) {
global_state.initialized_count = 0;
}
PyObject *module = PyModule_Create(def);
if (module == NULL) { if (module == NULL) {
return NULL; return NULL;
} }
/* Add an exception type */ module_state *state = &global_state.module;
PyObject *temp = PyErr_NewException("_testsinglephase.error", NULL, NULL); // It may have been set by a previous run or under a different name.
if (temp == NULL) { clear_state(state);
goto error; if (init_state(state) < 0) {
} Py_CLEAR(module);
if (PyModule_AddObject(module, "error", temp) != 0) { return NULL;
Py_DECREF(temp);
goto error;
} }
if (PyModule_AddIntConstant(module, "int_const", 1969) != 0) { if (init_module(module, state) < 0) {
goto error; Py_CLEAR(module);
goto finally;
} }
if (PyModule_AddStringConstant(module, "str_const", "something different") != 0) { global_state.initialized_count++;
goto error;
}
finally:
return module;
}
PyMODINIT_FUNC
PyInit__testsinglephase(void)
{
return init__testsinglephase_basic(&_testsinglephase_basic);
}
PyMODINIT_FUNC
PyInit__testsinglephase_basic_wrapper(void)
{
return PyInit__testsinglephase();
}
PyMODINIT_FUNC
PyInit__testsinglephase_basic_copy(void)
{
static struct PyModuleDef def = {
PyModuleDef_HEAD_INIT,
.m_name = "_testsinglephase_basic_copy",
.m_doc = PyDoc_STR("Test module _testsinglephase_basic_copy"),
.m_size = -1, // no module state
.m_methods = TestMethods_Basic,
};
return init__testsinglephase_basic(&def);
}
/*******************************************/
/* the _testsinglephase_with_reinit module */
/*******************************************/
/* This ia less typical of legacy extensions in the wild:
- single-phase init (same as _testsinglephase above)
- no module state
- supports repeated initialization
(so m_copy is not used)
- the module def is not cached in _PyRuntime.extensions
At this point most modules would reach for multi-phase init (PEP 489).
However, module state has been around a while (PEP 3121),
and most extensions predate multi-phase init.
(This module is basically the same as _testsinglephase,
but supports repeated initialization.)
*/
static PyMethodDef TestMethods_Reinit[] = {
LOOK_UP_SELF_METHODDEF,
SUM_METHODDEF,
INITIALIZED_METHODDEF,
{NULL, NULL} /* sentinel */
};
static struct PyModuleDef _testsinglephase_with_reinit = {
PyModuleDef_HEAD_INIT,
.m_name = "_testsinglephase_with_reinit",
.m_doc = PyDoc_STR("Test module _testsinglephase_with_reinit"),
.m_size = 0,
.m_methods = TestMethods_Reinit,
};
PyMODINIT_FUNC
PyInit__testsinglephase_with_reinit(void)
{
/* We purposefully do not try PyState_FindModule() first here
since we want to check the behavior of re-loading the module. */
PyObject *module = PyModule_Create(&_testsinglephase_with_reinit);
if (module == NULL) {
return NULL;
}
assert(get_module_state(module) == NULL);
module_state state = {0};
if (init_state(&state) < 0) {
Py_CLEAR(module);
return NULL;
}
if (init_module(module, &state) < 0) {
Py_CLEAR(module);
goto finally;
}
finally:
/* We only needed the module state for setting the module attrs. */
clear_state(&state);
return module;
}
/******************************************/
/* the _testsinglephase_with_state module */
/******************************************/
/* This ia less typical of legacy extensions in the wild:
- single-phase init (same as _testsinglephase above)
- has some module state
- supports repeated initialization
(so m_copy is not used)
- the module def is not cached in _PyRuntime.extensions
At this point most modules would reach for multi-phase init (PEP 489).
However, module state has been around a while (PEP 3121),
and most extensions predate multi-phase init.
*/
static PyMethodDef TestMethods_WithState[] = {
LOOK_UP_SELF_METHODDEF,
SUM_METHODDEF,
INITIALIZED_METHODDEF,
{NULL, NULL} /* sentinel */
};
static struct PyModuleDef _testsinglephase_with_state = {
PyModuleDef_HEAD_INIT,
.m_name = "_testsinglephase_with_state",
.m_doc = PyDoc_STR("Test module _testsinglephase_with_state"),
.m_size = sizeof(module_state),
.m_methods = TestMethods_WithState,
};
PyMODINIT_FUNC
PyInit__testsinglephase_with_state(void)
{
/* We purposefully do not try PyState_FindModule() first here
since we want to check the behavior of re-loading the module. */
PyObject *module = PyModule_Create(&_testsinglephase_with_state);
if (module == NULL) {
return NULL;
}
module_state *state = get_module_state(module);
assert(state != NULL);
if (init_state(state) < 0) {
Py_CLEAR(module);
return NULL;
}
if (init_module(module, state) < 0) {
clear_state(state);
Py_CLEAR(module);
goto finally;
}
finally:
return module; return module;
error:
Py_DECREF(module);
return NULL;
} }

View File

@ -428,6 +428,71 @@ PyImport_GetMagicTag(void)
} }
/*
We support a number of kinds of single-phase init builtin/extension modules:
* "basic"
* no module state (PyModuleDef.m_size == -1)
* does not support repeated init (we use PyModuleDef.m_base.m_copy)
* may have process-global state
* the module's def is cached in _PyRuntime.imports.extensions,
by (name, filename)
* "reinit"
* no module state (PyModuleDef.m_size == 0)
* supports repeated init (m_copy is never used)
* should not have any process-global state
* its def is never cached in _PyRuntime.imports.extensions
(except, currently, under the main interpreter, for some reason)
* "with state" (almost the same as reinit)
* has module state (PyModuleDef.m_size > 0)
* supports repeated init (m_copy is never used)
* should not have any process-global state
* its def is never cached in _PyRuntime.imports.extensions
(except, currently, under the main interpreter, for some reason)
There are also variants within those classes:
* two or more modules share a PyModuleDef
* a module's init func uses another module's PyModuleDef
* a module's init func calls another's module's init func
* a module's init "func" is actually a variable statically initialized
to another module's init func
* two or modules share "methods"
* a module's init func copies another module's PyModuleDef
(with a different name)
* (basic-only) two or modules share process-global state
In the first case, where modules share a PyModuleDef, the following
notable weirdness happens:
* the module's __name__ matches the def, not the requested name
* the last module (with the same def) to be imported for the first time wins
* returned by PyState_Find_Module() (via interp->modules_by_index)
* (non-basic-only) its init func is used when re-loading any of them
(via the def's m_init)
* (basic-only) the copy of its __dict__ is used when re-loading any of them
(via the def's m_copy)
However, the following happens as expected:
* a new module object (with its own __dict__) is created for each request
* the module's __spec__ has the requested name
* the loaded module is cached in sys.modules under the requested name
* the m_index field of the shared def is not changed,
so at least PyState_FindModule() will always look in the same place
For "basic" modules there are other quirks:
* (whether sharing a def or not) when loaded the first time,
m_copy is set before _init_module_attrs() is called
in importlib._bootstrap.module_from_spec(),
so when the module is re-loaded, the previous value
for __wpec__ (and others) is reset, possibly unexpectedly.
Generally, when multiple interpreters are involved, some of the above
gets even messier.
*/
/* Magic for extension modules (built-in as well as dynamically /* Magic for extension modules (built-in as well as dynamically
loaded). To prevent initializing an extension module more than loaded). To prevent initializing an extension module more than
once, we keep a static dictionary 'extensions' keyed by the tuple once, we keep a static dictionary 'extensions' keyed by the tuple
@ -489,9 +554,8 @@ _extensions_cache_clear(void)
Py_CLEAR(_PyRuntime.imports.extensions); Py_CLEAR(_PyRuntime.imports.extensions);
} }
int static int
_PyImport_FixupExtensionObject(PyObject *mod, PyObject *name, fix_up_extension(PyObject *mod, PyObject *name, PyObject *filename)
PyObject *filename, PyObject *modules)
{ {
if (mod == NULL || !PyModule_Check(mod)) { if (mod == NULL || !PyModule_Check(mod)) {
PyErr_BadInternalCall(); PyErr_BadInternalCall();
@ -505,16 +569,13 @@ _PyImport_FixupExtensionObject(PyObject *mod, PyObject *name,
} }
PyThreadState *tstate = _PyThreadState_GET(); PyThreadState *tstate = _PyThreadState_GET();
if (PyObject_SetItem(modules, name, mod) < 0) {
return -1;
}
if (_PyState_AddModule(tstate, mod, def) < 0) { if (_PyState_AddModule(tstate, mod, def) < 0) {
PyMapping_DelItem(modules, name);
return -1; return -1;
} }
// bpo-44050: Extensions and def->m_base.m_copy can be updated // bpo-44050: Extensions and def->m_base.m_copy can be updated
// when the extension module doesn't support sub-interpreters. // when the extension module doesn't support sub-interpreters.
// XXX Why special-case the main interpreter?
if (_Py_IsMainInterpreter(tstate->interp) || def->m_size == -1) { if (_Py_IsMainInterpreter(tstate->interp) || def->m_size == -1) {
if (def->m_size == -1) { if (def->m_size == -1) {
if (def->m_base.m_copy) { if (def->m_base.m_copy) {
@ -541,15 +602,39 @@ _PyImport_FixupExtensionObject(PyObject *mod, PyObject *name,
return 0; return 0;
} }
int
_PyImport_FixupExtensionObject(PyObject *mod, PyObject *name,
PyObject *filename, PyObject *modules)
{
if (PyObject_SetItem(modules, name, mod) < 0) {
return -1;
}
if (fix_up_extension(mod, name, filename) < 0) {
PyMapping_DelItem(modules, name);
return -1;
}
return 0;
}
int int
_PyImport_FixupBuiltin(PyObject *mod, const char *name, PyObject *modules) _PyImport_FixupBuiltin(PyObject *mod, const char *name, PyObject *modules)
{ {
int res; int res = -1;
PyObject *nameobj; PyObject *nameobj;
nameobj = PyUnicode_InternFromString(name); nameobj = PyUnicode_InternFromString(name);
if (nameobj == NULL) if (nameobj == NULL) {
return -1; return -1;
res = _PyImport_FixupExtensionObject(mod, nameobj, nameobj, modules); }
if (PyObject_SetItem(modules, nameobj, mod) < 0) {
goto finally;
}
if (fix_up_extension(mod, nameobj, nameobj) < 0) {
PyMapping_DelItem(modules, nameobj);
goto finally;
}
res = 0;
finally:
Py_DECREF(nameobj); Py_DECREF(nameobj);
return res; return res;
} }