gh-117953: Work Relative to Specific Extension Kinds in the Import Machinery (gh-118205)

This change will make some later changes simpler.
This commit is contained in:
Eric Snow 2024-05-01 17:40:28 -06:00 committed by GitHub
parent a524152b8c
commit 526ca4c09e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 322 additions and 64 deletions

View File

@ -15,8 +15,15 @@ extern "C" {
extern const char *_PyImport_DynLoadFiletab[];
typedef PyObject *(*PyModInitFunction)(void);
typedef enum ext_module_kind {
_Py_ext_module_kind_UNKNOWN = 0,
_Py_ext_module_kind_SINGLEPHASE = 1,
_Py_ext_module_kind_MULTIPHASE = 2,
_Py_ext_module_kind_INVALID = 3,
} _Py_ext_module_kind;
/* Input for loading an extension module. */
struct _Py_ext_module_loader_info {
PyObject *filename;
#ifndef MS_WINDOWS
@ -43,10 +50,33 @@ extern int _Py_ext_module_loader_info_init_from_spec(
struct _Py_ext_module_loader_info *info,
PyObject *spec);
/* The result from running an extension module's init function. */
struct _Py_ext_module_loader_result {
PyModuleDef *def;
PyObject *module;
_Py_ext_module_kind kind;
struct _Py_ext_module_loader_result_error *err;
struct _Py_ext_module_loader_result_error {
enum _Py_ext_module_loader_result_error_kind {
_Py_ext_module_loader_result_EXCEPTION = 0,
_Py_ext_module_loader_result_ERR_MISSING = 1,
_Py_ext_module_loader_result_ERR_UNREPORTED_EXC = 2,
_Py_ext_module_loader_result_ERR_UNINITIALIZED = 3,
_Py_ext_module_loader_result_ERR_NONASCII_NOT_MULTIPHASE = 4,
_Py_ext_module_loader_result_ERR_NOT_MODULE = 5,
_Py_ext_module_loader_result_ERR_MISSING_DEF = 6,
} kind;
PyObject *exc;
} _err;
};
extern void _Py_ext_module_loader_result_clear(
struct _Py_ext_module_loader_result *res);
extern void _Py_ext_module_loader_result_apply_error(
struct _Py_ext_module_loader_result *res,
const char *name);
/* The module init function. */
typedef PyObject *(*PyModInitFunction)(void);
extern PyModInitFunction _PyImport_GetModInitFunc(
struct _Py_ext_module_loader_info *info,
FILE *fp);

View File

@ -1193,26 +1193,63 @@ is_core_module(PyInterpreterState *interp, PyObject *name, PyObject *path)
}
#ifndef NDEBUG
static bool
is_singlephase(PyModuleDef *def)
static _Py_ext_module_kind
_get_extension_kind(PyModuleDef *def, bool check_size)
{
_Py_ext_module_kind kind;
if (def == NULL) {
/* It must be a module created by reload_singlephase_extension()
* from m_copy. Ideally we'd do away with this case. */
return true;
kind = _Py_ext_module_kind_SINGLEPHASE;
}
else if (def->m_slots == NULL) {
return true;
else if (def->m_slots != NULL) {
kind = _Py_ext_module_kind_MULTIPHASE;
}
else if (check_size && def->m_size == -1) {
kind = _Py_ext_module_kind_SINGLEPHASE;
}
else if (def->m_base.m_init != NULL) {
kind = _Py_ext_module_kind_SINGLEPHASE;
}
else {
return false;
// This is probably single-phase init, but a multi-phase
// module *can* have NULL m_slots.
kind = _Py_ext_module_kind_UNKNOWN;
}
return kind;
}
/* The module might not be fully initialized yet
* and PyModule_FromDefAndSpec() checks m_size
* so we skip m_size. */
#define assert_multiphase_def(def) \
do { \
_Py_ext_module_kind kind = _get_extension_kind(def, false); \
assert(kind == _Py_ext_module_kind_MULTIPHASE \
/* m_slots can be NULL. */ \
|| kind == _Py_ext_module_kind_UNKNOWN); \
} while (0)
#define assert_singlephase_def(def) \
do { \
_Py_ext_module_kind kind = _get_extension_kind(def, true); \
assert(kind == _Py_ext_module_kind_SINGLEPHASE \
|| kind == _Py_ext_module_kind_UNKNOWN); \
} while (0)
#define assert_singlephase(def) \
assert_singlephase_def(def)
#else /* defined(NDEBUG) */
#define assert_multiphase_def(def)
#define assert_singlephase_def(def)
#define assert_singlephase(def)
#endif
struct singlephase_global_update {
PyObject *m_dict;
PyModInitFunction m_init;
};
static int
@ -1226,10 +1263,24 @@ update_global_state_for_extension(PyThreadState *tstate,
assert(def->m_base.m_copy == NULL);
}
else {
assert(def->m_base.m_init != NULL
|| is_core_module(tstate->interp, name, path));
if (singlephase->m_dict == NULL) {
if (singlephase->m_init != NULL) {
assert(singlephase->m_dict == NULL);
assert(def->m_base.m_copy == NULL);
assert(def->m_size >= 0);
/* Remember pointer to module init function. */
// XXX If two modules share a def then def->m_base will
// reflect the last one added (here) to the global cache.
// We should prevent this somehow. The simplest solution
// is probably to store m_copy/m_init in the cache along
// with the def, rather than within the def.
def->m_base.m_init = singlephase->m_init;
}
else if (singlephase->m_dict == NULL) {
/* It must be a core builtin module. */
assert(is_core_module(tstate->interp, name, path));
assert(def->m_size == -1);
assert(def->m_base.m_copy == NULL);
assert(def->m_base.m_init == NULL);
}
else {
assert(PyDict_Check(singlephase->m_dict));
@ -1316,7 +1367,7 @@ import_find_extension(PyThreadState *tstate,
if (def == NULL) {
return NULL;
}
assert(is_singlephase(def));
assert_singlephase(def);
/* It may have been successfully imported previously
in an interpreter that allows legacy modules
@ -1369,11 +1420,26 @@ import_find_extension(PyThreadState *tstate,
}
struct _Py_ext_module_loader_result res;
if (_PyImport_RunModInitFunc(def->m_base.m_init, info, &res) < 0) {
_Py_ext_module_loader_result_apply_error(&res, name_buf);
return NULL;
}
assert(!PyErr_Occurred());
assert(res.err == NULL);
assert(res.kind == _Py_ext_module_kind_SINGLEPHASE);
mod = res.module;
// XXX __file__ doesn't get set!
/* Tchnically, the init function could return a different module def.
* Then we would probably need to update the global cache.
* However, we don't expect anyone to change the def. */
assert(res.def == def);
_Py_ext_module_loader_result_clear(&res);
/* Remember the filename as the __file__ attribute */
if (info->filename != NULL) {
if (PyModule_AddObjectRef(mod, "__file__", info->filename) < 0) {
PyErr_Clear(); /* Not important enough to report */
}
}
if (PyObject_SetItem(modules, info->name, mod) == -1) {
Py_DECREF(mod);
return NULL;
@ -1398,78 +1464,89 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0,
struct _Py_ext_module_loader_info *info,
PyObject *spec, PyObject *modules)
{
/* Core modules go through _PyImport_FixupBuiltin(). */
assert(!is_core_module(tstate->interp, info->name, info->path));
PyObject *mod = NULL;
PyModuleDef *def = NULL;
const char *name_buf = PyBytes_AS_STRING(info->name_encoded);
struct _Py_ext_module_loader_result res;
if (_PyImport_RunModInitFunc(p0, info, &res) < 0) {
/* We discard res.def. */
assert(res.module == NULL);
assert(PyErr_Occurred());
goto finally;
_Py_ext_module_loader_result_apply_error(&res, name_buf);
return NULL;
}
assert(!PyErr_Occurred());
assert(res.err == NULL);
mod = res.module;
res.module = NULL;
def = res.def;
assert(def != NULL);
if (mod == NULL) {
//assert(!is_singlephase(def));
if (res.kind == _Py_ext_module_kind_MULTIPHASE) {
assert_multiphase_def(def);
assert(mod == NULL);
mod = PyModule_FromDefAndSpec(def, spec);
if (mod == NULL) {
goto finally;
goto error;
}
}
else {
assert(is_singlephase(def));
assert(res.kind == _Py_ext_module_kind_SINGLEPHASE);
assert_singlephase_def(def);
assert(PyModule_Check(mod));
const char *name_buf = PyBytes_AS_STRING(info->name_encoded);
if (_PyImport_CheckSubinterpIncompatibleExtensionAllowed(name_buf) < 0) {
Py_CLEAR(mod);
goto finally;
goto error;
}
/* Remember pointer to module init function. */
def->m_base.m_init = p0;
/* Remember the filename as the __file__ attribute */
if (info->filename != NULL) {
/* Remember the filename as the __file__ attribute */
if (PyModule_AddObjectRef(mod, "__file__", info->filename) < 0) {
PyErr_Clear(); /* Not important enough to report */
}
}
/* Update global import state. */
struct singlephase_global_update singlephase = {0};
// gh-88216: Extensions and def->m_base.m_copy can be updated
// when the extension module doesn't support sub-interpreters.
if (def->m_size == -1
&& !is_core_module(tstate->interp, info->name, info->path))
{
if (def->m_size == -1) {
/* We will reload from m_copy. */
assert(def->m_base.m_init == NULL);
singlephase.m_dict = PyModule_GetDict(mod);
assert(singlephase.m_dict != NULL);
}
else {
/* We will reload via the init function. */
assert(def->m_size >= 0);
singlephase.m_init = p0;
}
if (update_global_state_for_extension(
tstate, info->path, info->name, def, &singlephase) < 0)
{
Py_CLEAR(mod);
goto finally;
goto error;
}
/* Update per-interpreter import state. */
PyObject *modules = get_modules_dict(tstate, true);
if (finish_singlephase_extension(
tstate, mod, def, info->name, modules) < 0)
{
Py_CLEAR(mod);
goto finally;
goto error;
}
}
finally:
_Py_ext_module_loader_result_clear(&res);
return mod;
error:
Py_XDECREF(mod);
_Py_ext_module_loader_result_clear(&res);
return NULL;
}
@ -1532,7 +1609,7 @@ _PyImport_FixupBuiltin(PyThreadState *tstate, PyObject *mod, const char *name,
* module state, but we also don't populate def->m_base.m_copy
* for them. */
assert(is_core_module(tstate->interp, nameobj, nameobj));
assert(is_singlephase(def));
assert_singlephase_def(def);
assert(def->m_size == -1);
assert(def->m_base.m_copy == NULL);
@ -1586,7 +1663,7 @@ create_builtin(PyThreadState *tstate, PyObject *name, PyObject *spec)
PyObject *mod = import_find_extension(tstate, &info);
if (mod != NULL) {
assert(!_PyErr_Occurred(tstate));
assert(is_singlephase(_PyModule_GetDef(mod)));
assert_singlephase(_PyModule_GetDef(mod));
goto finally;
}
else if (_PyErr_Occurred(tstate)) {
@ -3940,7 +4017,7 @@ _imp_create_dynamic_impl(PyObject *module, PyObject *spec, PyObject *file)
mod = import_find_extension(tstate, &info);
if (mod != NULL) {
assert(!_PyErr_Occurred(tstate));
assert(is_singlephase(_PyModule_GetDef(mod)));
assert_singlephase(_PyModule_GetDef(mod));
goto finally;
}
else if (_PyErr_Occurred(tstate)) {

View File

@ -28,6 +28,11 @@ extern dl_funcptr _PyImport_FindSharedFuncptr(const char *prefix,
const char *pathname, FILE *fp);
#endif
/***********************************/
/* module info to use when loading */
/***********************************/
static const char * const ascii_only_prefix = "PyInit";
static const char * const nonascii_prefix = "PyInitU";
@ -205,6 +210,150 @@ _Py_ext_module_loader_info_init_from_spec(
}
/********************************/
/* module init function results */
/********************************/
void
_Py_ext_module_loader_result_clear(struct _Py_ext_module_loader_result *res)
{
/* Instead, the caller should have called
* _Py_ext_module_loader_result_apply_error(). */
assert(res->err == NULL);
*res = (struct _Py_ext_module_loader_result){0};
}
static void
_Py_ext_module_loader_result_set_error(
struct _Py_ext_module_loader_result *res,
enum _Py_ext_module_loader_result_error_kind kind)
{
#ifndef NDEBUG
switch (kind) {
case _Py_ext_module_loader_result_EXCEPTION: /* fall through */
case _Py_ext_module_loader_result_ERR_UNREPORTED_EXC:
assert(PyErr_Occurred());
break;
case _Py_ext_module_loader_result_ERR_MISSING: /* fall through */
case _Py_ext_module_loader_result_ERR_UNINITIALIZED: /* fall through */
case _Py_ext_module_loader_result_ERR_NONASCII_NOT_MULTIPHASE: /* fall through */
case _Py_ext_module_loader_result_ERR_NOT_MODULE: /* fall through */
case _Py_ext_module_loader_result_ERR_MISSING_DEF:
assert(!PyErr_Occurred());
break;
default:
/* We added a new error kind but forgot to add it to this switch. */
assert(0);
}
#endif
assert(res->err == NULL && res->_err.exc == NULL);
res->err = &res->_err;
*res->err = (struct _Py_ext_module_loader_result_error){
.kind=kind,
.exc=PyErr_GetRaisedException(),
};
/* For some kinds, we also set/check res->kind. */
switch (kind) {
case _Py_ext_module_loader_result_ERR_UNINITIALIZED:
assert(res->kind == _Py_ext_module_kind_UNKNOWN);
res->kind = _Py_ext_module_kind_INVALID;
break;
/* None of the rest affect the result kind. */
case _Py_ext_module_loader_result_EXCEPTION: /* fall through */
case _Py_ext_module_loader_result_ERR_MISSING: /* fall through */
case _Py_ext_module_loader_result_ERR_UNREPORTED_EXC: /* fall through */
case _Py_ext_module_loader_result_ERR_NONASCII_NOT_MULTIPHASE: /* fall through */
case _Py_ext_module_loader_result_ERR_NOT_MODULE: /* fall through */
case _Py_ext_module_loader_result_ERR_MISSING_DEF:
break;
default:
/* We added a new error kind but forgot to add it to this switch. */
assert(0);
}
}
void
_Py_ext_module_loader_result_apply_error(
struct _Py_ext_module_loader_result *res,
const char *name)
{
assert(!PyErr_Occurred());
assert(res->err != NULL && res->err == &res->_err);
struct _Py_ext_module_loader_result_error err = *res->err;
res->err = NULL;
/* We're otherwise done with the result at this point. */
_Py_ext_module_loader_result_clear(res);
#ifndef NDEBUG
switch (err.kind) {
case _Py_ext_module_loader_result_EXCEPTION: /* fall through */
case _Py_ext_module_loader_result_ERR_UNREPORTED_EXC:
assert(err.exc != NULL);
break;
case _Py_ext_module_loader_result_ERR_MISSING: /* fall through */
case _Py_ext_module_loader_result_ERR_UNINITIALIZED: /* fall through */
case _Py_ext_module_loader_result_ERR_NONASCII_NOT_MULTIPHASE: /* fall through */
case _Py_ext_module_loader_result_ERR_NOT_MODULE: /* fall through */
case _Py_ext_module_loader_result_ERR_MISSING_DEF:
assert(err.exc == NULL);
break;
default:
/* We added a new error kind but forgot to add it to this switch. */
assert(0);
}
#endif
const char *msg = NULL;
switch (err.kind) {
case _Py_ext_module_loader_result_EXCEPTION:
break;
case _Py_ext_module_loader_result_ERR_MISSING:
msg = "initialization of %s failed without raising an exception";
break;
case _Py_ext_module_loader_result_ERR_UNREPORTED_EXC:
msg = "initialization of %s raised unreported exception";
break;
case _Py_ext_module_loader_result_ERR_UNINITIALIZED:
msg = "init function of %s returned uninitialized object";
break;
case _Py_ext_module_loader_result_ERR_NONASCII_NOT_MULTIPHASE:
msg = "initialization of %s did not return PyModuleDef";
break;
case _Py_ext_module_loader_result_ERR_NOT_MODULE:
msg = "initialization of %s did not return an extension module";
break;
case _Py_ext_module_loader_result_ERR_MISSING_DEF:
msg = "initialization of %s did not return a valid extension module";
break;
default:
/* We added a new error kind but forgot to add it to this switch. */
assert(0);
PyErr_Format(PyExc_SystemError,
"loading %s failed due to init function", name);
return;
}
if (err.exc != NULL) {
PyErr_SetRaisedException(err.exc);
err.exc = NULL; /* PyErr_SetRaisedException() stole our reference. */
if (msg != NULL) {
_PyErr_FormatFromCause(PyExc_SystemError, msg, name);
}
}
else {
assert(msg != NULL);
PyErr_Format(PyExc_SystemError, msg, name);
}
}
/********************************************/
/* getting/running the module init function */
/********************************************/
PyModInitFunction
_PyImport_GetModInitFunc(struct _Py_ext_module_loader_info *info,
FILE *fp)
@ -245,8 +394,9 @@ _PyImport_RunModInitFunc(PyModInitFunction p0,
struct _Py_ext_module_loader_info *info,
struct _Py_ext_module_loader_result *p_res)
{
struct _Py_ext_module_loader_result res = {0};
const char *name_buf = PyBytes_AS_STRING(info->name_encoded);
struct _Py_ext_module_loader_result res = {
.kind=_Py_ext_module_kind_UNKNOWN,
};
/* Call the module init function. */
@ -258,18 +408,18 @@ _PyImport_RunModInitFunc(PyModInitFunction p0,
/* Validate the result (and populate "res". */
if (m == NULL) {
if (!PyErr_Occurred()) {
PyErr_Format(
PyExc_SystemError,
"initialization of %s failed without raising an exception",
name_buf);
if (PyErr_Occurred()) {
_Py_ext_module_loader_result_set_error(
&res, _Py_ext_module_loader_result_EXCEPTION);
}
else {
_Py_ext_module_loader_result_set_error(
&res, _Py_ext_module_loader_result_ERR_MISSING);
}
goto error;
} else if (PyErr_Occurred()) {
_PyErr_FormatFromCause(
PyExc_SystemError,
"initialization of %s raised unreported exception",
name_buf);
_Py_ext_module_loader_result_set_error(
&res, _Py_ext_module_loader_result_ERR_UNREPORTED_EXC);
/* We would probably be correct to decref m here,
* but we weren't doing so before,
* so we stick with doing nothing. */
@ -281,9 +431,8 @@ _PyImport_RunModInitFunc(PyModInitFunction p0,
/* This can happen when a PyModuleDef is returned without calling
* PyModuleDef_Init on it
*/
PyErr_Format(PyExc_SystemError,
"init function of %s returned uninitialized object",
name_buf);
_Py_ext_module_loader_result_set_error(
&res, _Py_ext_module_loader_result_ERR_UNINITIALIZED);
/* Likewise, decref'ing here makes sense. However, the original
* code has a note about "prevent segfault in DECREF",
* so we play it safe and leave it alone. */
@ -293,48 +442,50 @@ _PyImport_RunModInitFunc(PyModInitFunction p0,
if (PyObject_TypeCheck(m, &PyModuleDef_Type)) {
/* multi-phase init */
res.kind = _Py_ext_module_kind_MULTIPHASE;
res.def = (PyModuleDef *)m;
/* Run PyModule_FromDefAndSpec() to finish loading the module. */
}
else if (info->hook_prefix == nonascii_prefix) {
/* It should have been multi-phase init? */
/* Non-ASCII is only supported for multi-phase init. */
res.kind = _Py_ext_module_kind_MULTIPHASE;
/* Don't allow legacy init for non-ASCII module names. */
PyErr_Format(
PyExc_SystemError,
"initialization of %s did not return PyModuleDef",
name_buf);
Py_DECREF(m);
return -1;
_Py_ext_module_loader_result_set_error(
&res, _Py_ext_module_loader_result_ERR_NONASCII_NOT_MULTIPHASE);
goto error;
}
else {
/* single-phase init (legacy) */
res.kind = _Py_ext_module_kind_SINGLEPHASE;
res.module = m;
if (!PyModule_Check(m)) {
PyErr_Format(PyExc_SystemError,
"initialization of %s did not return an extension "
"module", name_buf);
_Py_ext_module_loader_result_set_error(
&res, _Py_ext_module_loader_result_ERR_NOT_MODULE);
goto error;
}
res.def = _PyModule_GetDef(m);
if (res.def == NULL) {
PyErr_Format(PyExc_SystemError,
"initialization of %s did not return a valid extension "
"module", name_buf);
PyErr_Clear();
_Py_ext_module_loader_result_set_error(
&res, _Py_ext_module_loader_result_ERR_MISSING_DEF);
goto error;
}
}
assert(!PyErr_Occurred());
assert(res.err == NULL);
*p_res = res;
return 0;
error:
assert(PyErr_Occurred());
assert(!PyErr_Occurred());
assert(res.err != NULL);
Py_CLEAR(res.module);
res.def = NULL;
*p_res = res;
p_res->err = &p_res->_err;
return -1;
}