diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-09-26-18-21-06.gh-issue-116510.FacUWO.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-09-26-18-21-06.gh-issue-116510.FacUWO.rst new file mode 100644 index 00000000000..e3741321006 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-09-26-18-21-06.gh-issue-116510.FacUWO.rst @@ -0,0 +1,5 @@ +Fix a crash caused by immortal interned strings being shared between +sub-interpreters that use basic single-phase init. In that case, the string +can be used by an interpreter that outlives the interpreter that created and +interned it. For interpreters that share obmalloc state, also share the +interned dict with the main interpreter. diff --git a/Objects/object.c b/Objects/object.c index 8a819dd336e..8d809158a6c 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -2491,6 +2491,42 @@ _Py_ResurrectReference(PyObject *op) #ifdef Py_TRACE_REFS +/* Make sure the ref is associated with the right interpreter. + * This only needs special attention for heap-allocated objects + * that have been immortalized, and only when the object might + * outlive the interpreter where it was created. That means the + * object was necessarily created using a global allocator + * (i.e. from the main interpreter). Thus in that specific case + * we move the object over to the main interpreter's refchain. + * + * This was added for the sake of the immortal interned strings, + * where legacy subinterpreters share the main interpreter's + * interned dict (and allocator), and therefore the strings can + * outlive the subinterpreter. + * + * It may make sense to fold this into _Py_SetImmortalUntracked(), + * but that requires further investigation. In the meantime, it is + * up to the caller to know if this is needed. There should be + * very few cases. + */ +void +_Py_NormalizeImmortalReference(PyObject *op) +{ + assert(_Py_IsImmortal(op)); + PyInterpreterState *interp = _PyInterpreterState_GET(); + if (!_PyRefchain_IsTraced(interp, op)) { + return; + } + PyInterpreterState *main_interp = _PyInterpreterState_Main(); + if (interp != main_interp + && interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC) + { + assert(!_PyRefchain_IsTraced(main_interp, op)); + _PyRefchain_Remove(interp, op); + _PyRefchain_Trace(main_interp, op); + } +} + void _Py_ForgetReference(PyObject *op) { diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index bd5bb5048fd..4ea7d5f380e 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -281,13 +281,37 @@ hashtable_unicode_compare(const void *key1, const void *key2) } } +/* Return true if this interpreter should share the main interpreter's + intern_dict. That's important for interpreters which load basic + single-phase init extension modules (m_size == -1). There could be interned + immortal strings that are shared between interpreters, due to the + PyDict_Update(mdict, m_copy) call in import_find_extension(). + + It's not safe to deallocate those strings until all interpreters that + potentially use them are freed. By storing them in the main interpreter, we + ensure they get freed after all other interpreters are freed. +*/ +static bool +has_shared_intern_dict(PyInterpreterState *interp) +{ + PyInterpreterState *main_interp = _PyInterpreterState_Main(); + return interp != main_interp && interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC; +} + static int init_interned_dict(PyInterpreterState *interp) { assert(get_interned_dict(interp) == NULL); - PyObject *interned = interned = PyDict_New(); - if (interned == NULL) { - return -1; + PyObject *interned; + if (has_shared_intern_dict(interp)) { + interned = get_interned_dict(_PyInterpreterState_Main()); + Py_INCREF(interned); + } + else { + interned = PyDict_New(); + if (interned == NULL) { + return -1; + } } _Py_INTERP_CACHED_OBJECT(interp, interned_strings) = interned; return 0; @@ -298,7 +322,10 @@ clear_interned_dict(PyInterpreterState *interp) { PyObject *interned = get_interned_dict(interp); if (interned != NULL) { - PyDict_Clear(interned); + if (!has_shared_intern_dict(interp)) { + // only clear if the dict belongs to this interpreter + PyDict_Clear(interned); + } Py_DECREF(interned); _Py_INTERP_CACHED_OBJECT(interp, interned_strings) = NULL; } @@ -15401,6 +15428,10 @@ _PyUnicode_InternStatic(PyInterpreterState *interp, PyObject **p) assert(*p); } +#ifdef Py_TRACE_REFS +extern void _Py_NormalizeImmortalReference(PyObject *); +#endif + static void immortalize_interned(PyObject *s) { @@ -15416,6 +15447,10 @@ immortalize_interned(PyObject *s) #endif _PyUnicode_STATE(s).interned = SSTATE_INTERNED_IMMORTAL; _Py_SetImmortal(s); +#ifdef Py_TRACE_REFS + /* Make sure the ref is associated with the right interpreter. */ + _Py_NormalizeImmortalReference(s); +#endif } static /* non-null */ PyObject* @@ -15609,6 +15644,13 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp) } assert(PyDict_CheckExact(interned)); + if (has_shared_intern_dict(interp)) { + // the dict doesn't belong to this interpreter, skip the debug + // checks on it and just clear the pointer to it + clear_interned_dict(interp); + return; + } + #ifdef INTERNED_STATS fprintf(stderr, "releasing %zd interned strings\n", PyDict_GET_SIZE(interned)); @@ -16117,8 +16159,10 @@ _PyUnicode_Fini(PyInterpreterState *interp) { struct _Py_unicode_state *state = &interp->unicode; - // _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini() - assert(get_interned_dict(interp) == NULL); + if (!has_shared_intern_dict(interp)) { + // _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini() + assert(get_interned_dict(interp) == NULL); + } _PyUnicode_FiniEncodings(&state->fs_codec);