gh-110310: Add a Per-Interpreter XID Registry for Heap Types (gh-110311)

We do the following:

* add a per-interpreter XID registry (PyInterpreterState.xidregistry)
* put heap types there (keep static types in _PyRuntimeState.xidregistry)
* clear the registries during interpreter/runtime finalization
* avoid duplicate entries in the registry (when _PyCrossInterpreterData_RegisterClass() is called more than once for a type)
* use Py_TYPE() instead of PyObject_Type() in _PyCrossInterpreterData_Lookup()

The per-interpreter registry helps preserve isolation between interpreters.  This is important when heap types are registered, which is something we haven't been doing yet but I will likely do soon.
This commit is contained in:
Eric Snow 2023-10-04 16:35:27 -06:00 committed by GitHub
parent e561e98058
commit 80dc39e1dc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 148 additions and 58 deletions

View File

@ -39,6 +39,32 @@ struct _Py_long_state {
int max_str_digits;
};
/* cross-interpreter data registry */
/* For now we use a global registry of shareable classes. An
alternative would be to add a tp_* slot for a class's
crossinterpdatafunc. It would be simpler and more efficient. */
struct _xidregitem;
struct _xidregitem {
struct _xidregitem *prev;
struct _xidregitem *next;
/* This can be a dangling pointer, but only if weakref is set. */
PyTypeObject *cls;
/* This is NULL for builtin types. */
PyObject *weakref;
size_t refcount;
crossinterpdatafunc getdata;
};
struct _xidregistry {
PyThread_type_lock mutex;
struct _xidregitem *head;
};
/* interpreter state */
/* PyInterpreterState holds the global state for one of the runtime's
@ -149,6 +175,9 @@ struct _is {
Py_ssize_t co_extra_user_count;
freefunc co_extra_freefuncs[MAX_CO_EXTRA_USERS];
// XXX Remove this field once we have a tp_* slot.
struct _xidregistry xidregistry;
#ifdef HAVE_FORK
PyObject *before_forkers;
PyObject *after_forkers_parent;
@ -238,21 +267,6 @@ _PyInterpreterState_SetFinalizing(PyInterpreterState *interp, PyThreadState *tst
}
/* cross-interpreter data registry */
/* For now we use a global registry of shareable classes. An
alternative would be to add a tp_* slot for a class's
crossinterpdatafunc. It would be simpler and more efficient. */
struct _xidregitem;
struct _xidregitem {
struct _xidregitem *prev;
struct _xidregitem *next;
PyObject *cls; // weakref to a PyTypeObject
crossinterpdatafunc getdata;
};
extern PyInterpreterState* _PyInterpreterState_LookUpID(int64_t);
extern int _PyInterpreterState_IDInitref(PyInterpreterState *);

View File

@ -201,10 +201,7 @@ typedef struct pyruntimestate {
tools. */
// XXX Remove this field once we have a tp_* slot.
struct _xidregistry {
PyThread_type_lock mutex;
struct _xidregitem *head;
} xidregistry;
struct _xidregistry xidregistry;
struct _pymem_allocators allocators;
struct _obmalloc_global_state obmalloc;

View File

@ -495,6 +495,8 @@ _PyRuntimeState_Init(_PyRuntimeState *runtime)
return _PyStatus_OK();
}
static void _xidregistry_clear(struct _xidregistry *);
void
_PyRuntimeState_Fini(_PyRuntimeState *runtime)
{
@ -503,6 +505,8 @@ _PyRuntimeState_Fini(_PyRuntimeState *runtime)
assert(runtime->object_state.interpreter_leaks == 0);
#endif
_xidregistry_clear(&runtime->xidregistry);
if (gilstate_tss_initialized(runtime)) {
gilstate_tss_fini(runtime);
}
@ -548,6 +552,11 @@ _PyRuntimeState_ReInitThreads(_PyRuntimeState *runtime)
for (int i = 0; i < NUMLOCKS; i++) {
reinit_err += _PyThread_at_fork_reinit(lockptrs[i]);
}
/* PyOS_AfterFork_Child(), which calls this function, later calls
_PyInterpreterState_DeleteExceptMain(), so we only need to update
the main interpreter here. */
assert(runtime->interpreters.main != NULL);
runtime->interpreters.main->xidregistry.mutex = runtime->xidregistry.mutex;
PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
@ -709,6 +718,10 @@ init_interpreter(PyInterpreterState *interp,
interp->dtoa = (struct _dtoa_state)_dtoa_state_INIT(interp);
}
interp->f_opcode_trace_set = false;
assert(runtime->xidregistry.mutex != NULL);
interp->xidregistry.mutex = runtime->xidregistry.mutex;
interp->_initialized = 1;
return _PyStatus_OK();
}
@ -930,6 +943,10 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate)
Py_CLEAR(interp->sysdict);
Py_CLEAR(interp->builtins);
_xidregistry_clear(&interp->xidregistry);
/* The lock is owned by the runtime, so we don't free it here. */
interp->xidregistry.mutex = NULL;
if (tstate->interp == interp) {
/* We are now safe to fix tstate->_status.cleared. */
// XXX Do this (much) earlier?
@ -2613,23 +2630,27 @@ _PyCrossInterpreterData_ReleaseAndRawFree(_PyCrossInterpreterData *data)
crossinterpdatafunc. It would be simpler and more efficient. */
static int
_xidregistry_add_type(struct _xidregistry *xidregistry, PyTypeObject *cls,
crossinterpdatafunc getdata)
_xidregistry_add_type(struct _xidregistry *xidregistry,
PyTypeObject *cls, crossinterpdatafunc getdata)
{
// Note that we effectively replace already registered classes
// rather than failing.
struct _xidregitem *newhead = PyMem_RawMalloc(sizeof(struct _xidregitem));
if (newhead == NULL) {
return -1;
}
// XXX Assign a callback to clear the entry from the registry?
newhead->cls = PyWeakref_NewRef((PyObject *)cls, NULL);
if (newhead->cls == NULL) {
PyMem_RawFree(newhead);
return -1;
*newhead = (struct _xidregitem){
// We do not keep a reference, to avoid keeping the class alive.
.cls = cls,
.refcount = 1,
.getdata = getdata,
};
if (cls->tp_flags & Py_TPFLAGS_HEAPTYPE) {
// XXX Assign a callback to clear the entry from the registry?
newhead->weakref = PyWeakref_NewRef((PyObject *)cls, NULL);
if (newhead->weakref == NULL) {
PyMem_RawFree(newhead);
return -1;
}
}
newhead->getdata = getdata;
newhead->prev = NULL;
newhead->next = xidregistry->head;
if (newhead->next != NULL) {
newhead->next->prev = newhead;
@ -2654,39 +2675,77 @@ _xidregistry_remove_entry(struct _xidregistry *xidregistry,
if (next != NULL) {
next->prev = entry->prev;
}
Py_DECREF(entry->cls);
Py_XDECREF(entry->weakref);
PyMem_RawFree(entry);
return next;
}
static void
_xidregistry_clear(struct _xidregistry *xidregistry)
{
struct _xidregitem *cur = xidregistry->head;
xidregistry->head = NULL;
while (cur != NULL) {
struct _xidregitem *next = cur->next;
Py_XDECREF(cur->weakref);
PyMem_RawFree(cur);
cur = next;
}
}
static struct _xidregitem *
_xidregistry_find_type(struct _xidregistry *xidregistry, PyTypeObject *cls)
{
struct _xidregitem *cur = xidregistry->head;
while (cur != NULL) {
PyObject *registered = _PyWeakref_GET_REF(cur->cls);
if (registered == NULL) {
// The weakly ref'ed object was freed.
cur = _xidregistry_remove_entry(xidregistry, cur);
}
else {
assert(PyType_Check(registered));
if (registered == (PyObject *)cls) {
Py_DECREF(registered);
return cur;
if (cur->weakref != NULL) {
// cur is/was a heap type.
PyObject *registered = _PyWeakref_GET_REF(cur->weakref);
if (registered == NULL) {
// The weakly ref'ed object was freed.
cur = _xidregistry_remove_entry(xidregistry, cur);
continue;
}
assert(PyType_Check(registered));
assert(cur->cls == (PyTypeObject *)registered);
assert(cur->cls->tp_flags & Py_TPFLAGS_HEAPTYPE);
Py_DECREF(registered);
cur = cur->next;
}
if (cur->cls == cls) {
return cur;
}
cur = cur->next;
}
return NULL;
}
static inline struct _xidregistry *
_get_xidregistry(PyInterpreterState *interp, PyTypeObject *cls)
{
struct _xidregistry *xidregistry = &interp->runtime->xidregistry;
if (cls->tp_flags & Py_TPFLAGS_HEAPTYPE) {
assert(interp->xidregistry.mutex == xidregistry->mutex);
xidregistry = &interp->xidregistry;
}
return xidregistry;
}
static void _register_builtins_for_crossinterpreter_data(struct _xidregistry *xidregistry);
static inline void
_ensure_builtins_xid(PyInterpreterState *interp, struct _xidregistry *xidregistry)
{
if (xidregistry != &interp->xidregistry) {
assert(xidregistry == &interp->runtime->xidregistry);
if (xidregistry->head == NULL) {
_register_builtins_for_crossinterpreter_data(xidregistry);
}
}
}
int
_PyCrossInterpreterData_RegisterClass(PyTypeObject *cls,
crossinterpdatafunc getdata)
crossinterpdatafunc getdata)
{
if (!PyType_Check(cls)) {
PyErr_Format(PyExc_ValueError, "only classes may be registered");
@ -2697,12 +2756,23 @@ _PyCrossInterpreterData_RegisterClass(PyTypeObject *cls,
return -1;
}
struct _xidregistry *xidregistry = &_PyRuntime.xidregistry ;
int res = 0;
PyInterpreterState *interp = _PyInterpreterState_GET();
struct _xidregistry *xidregistry = _get_xidregistry(interp, cls);
PyThread_acquire_lock(xidregistry->mutex, WAIT_LOCK);
if (xidregistry->head == NULL) {
_register_builtins_for_crossinterpreter_data(xidregistry);
_ensure_builtins_xid(interp, xidregistry);
struct _xidregitem *matched = _xidregistry_find_type(xidregistry, cls);
if (matched != NULL) {
assert(matched->getdata == getdata);
matched->refcount += 1;
goto finally;
}
int res = _xidregistry_add_type(xidregistry, cls, getdata);
res = _xidregistry_add_type(xidregistry, cls, getdata);
finally:
PyThread_release_lock(xidregistry->mutex);
return res;
}
@ -2711,13 +2781,20 @@ int
_PyCrossInterpreterData_UnregisterClass(PyTypeObject *cls)
{
int res = 0;
struct _xidregistry *xidregistry = &_PyRuntime.xidregistry ;
PyInterpreterState *interp = _PyInterpreterState_GET();
struct _xidregistry *xidregistry = _get_xidregistry(interp, cls);
PyThread_acquire_lock(xidregistry->mutex, WAIT_LOCK);
struct _xidregitem *matched = _xidregistry_find_type(xidregistry, cls);
if (matched != NULL) {
(void)_xidregistry_remove_entry(xidregistry, matched);
assert(matched->refcount > 0);
matched->refcount -= 1;
if (matched->refcount == 0) {
(void)_xidregistry_remove_entry(xidregistry, matched);
}
res = 1;
}
PyThread_release_lock(xidregistry->mutex);
return res;
}
@ -2730,17 +2807,19 @@ _PyCrossInterpreterData_UnregisterClass(PyTypeObject *cls)
crossinterpdatafunc
_PyCrossInterpreterData_Lookup(PyObject *obj)
{
struct _xidregistry *xidregistry = &_PyRuntime.xidregistry ;
PyObject *cls = PyObject_Type(obj);
PyTypeObject *cls = Py_TYPE(obj);
PyInterpreterState *interp = _PyInterpreterState_GET();
struct _xidregistry *xidregistry = _get_xidregistry(interp, cls);
PyThread_acquire_lock(xidregistry->mutex, WAIT_LOCK);
if (xidregistry->head == NULL) {
_register_builtins_for_crossinterpreter_data(xidregistry);
}
struct _xidregitem *matched = _xidregistry_find_type(xidregistry,
(PyTypeObject *)cls);
Py_DECREF(cls);
_ensure_builtins_xid(interp, xidregistry);
struct _xidregitem *matched = _xidregistry_find_type(xidregistry, cls);
crossinterpdatafunc func = matched != NULL ? matched->getdata : NULL;
PyThread_release_lock(xidregistry->mutex);
return matched != NULL ? matched->getdata : NULL;
return func;
}
/* cross-interpreter data for builtin types */