From 666ecfb0957a2fa0df5e2bd03804195de74bdfbf Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 2 Jul 2020 01:19:57 +0200 Subject: [PATCH] bpo-1635741: Release Unicode interned strings at exit (GH-21269) * PyUnicode_InternInPlace() now ensures that interned strings are ready. * Add _PyUnicode_ClearInterned(). * Py_Finalize() now releases Unicode interned strings: call _PyUnicode_ClearInterned(). --- Include/internal/pycore_pylifecycle.h | 1 + Objects/unicodeobject.c | 68 +++++++++++++-------------- Python/pylifecycle.c | 1 + 3 files changed, 34 insertions(+), 36 deletions(-) diff --git a/Include/internal/pycore_pylifecycle.h b/Include/internal/pycore_pylifecycle.h index bffc95b27e9..22def3dbc8b 100644 --- a/Include/internal/pycore_pylifecycle.h +++ b/Include/internal/pycore_pylifecycle.h @@ -78,6 +78,7 @@ extern void _PyGC_Fini(PyThreadState *tstate); extern void _PyType_Fini(void); extern void _Py_HashRandomization_Fini(void); extern void _PyUnicode_Fini(PyThreadState *tstate); +extern void _PyUnicode_ClearInterned(PyThreadState *tstate); extern void _PyLong_Fini(PyThreadState *tstate); extern void _PyFaulthandler_Fini(void); extern void _PyHash_Fini(void); diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index fe46de2ae47..37e7fe5c0ef 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -55,8 +55,8 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. #include #endif -/* Uncomment to display statistics on interned strings at exit when - using Valgrind or Insecure++. */ +/* Uncomment to display statistics on interned strings at exit + in _PyUnicode_ClearInterned(). */ /* #define INTERNED_STATS 1 */ @@ -15681,6 +15681,11 @@ PyUnicode_InternInPlace(PyObject **p) } #ifdef INTERNED_STRINGS + if (PyUnicode_READY(s) == -1) { + PyErr_Clear(); + return; + } + if (interned == NULL) { interned = PyDict_New(); if (interned == NULL) { @@ -15733,23 +15738,29 @@ PyUnicode_InternFromString(const char *cp) } -#if defined(WITH_VALGRIND) || defined(__INSURE__) -static void -unicode_release_interned(void) +void +_PyUnicode_ClearInterned(PyThreadState *tstate) { - if (interned == NULL || !PyDict_Check(interned)) { - return; - } - PyObject *keys = PyDict_Keys(interned); - if (keys == NULL || !PyList_Check(keys)) { - PyErr_Clear(); + if (!_Py_IsMainInterpreter(tstate)) { + // interned dict is shared by all interpreters return; } - /* Since unicode_release_interned() is intended to help a leak - detector, interned unicode strings are not forcibly deallocated; - rather, we give them their stolen references back, and then clear - and DECREF the interned dict. */ + if (interned == NULL) { + return; + } + assert(PyDict_CheckExact(interned)); + + PyObject *keys = PyDict_Keys(interned); + if (keys == NULL) { + PyErr_Clear(); + return; + } + assert(PyList_CheckExact(keys)); + + /* Interned unicode strings are not forcibly deallocated; rather, we give + them their stolen references back, and then clear and DECREF the + interned dict. */ Py_ssize_t n = PyList_GET_SIZE(keys); #ifdef INTERNED_STATS @@ -15759,9 +15770,8 @@ unicode_release_interned(void) #endif for (Py_ssize_t i = 0; i < n; i++) { PyObject *s = PyList_GET_ITEM(keys, i); - if (PyUnicode_READY(s) == -1) { - Py_UNREACHABLE(); - } + assert(PyUnicode_IS_READY(s)); + switch (PyUnicode_CHECK_INTERNED(s)) { case SSTATE_INTERNED_IMMORTAL: Py_SET_REFCNT(s, Py_REFCNT(s) + 1); @@ -15788,10 +15798,10 @@ unicode_release_interned(void) mortal_size, immortal_size); #endif Py_DECREF(keys); + PyDict_Clear(interned); Py_CLEAR(interned); } -#endif /********************* Unicode Iterator **************************/ @@ -16160,23 +16170,9 @@ _PyUnicode_EnableLegacyWindowsFSEncoding(void) void _PyUnicode_Fini(PyThreadState *tstate) { - struct _Py_unicode_state *state = &tstate->interp->unicode; + // _PyUnicode_ClearInterned() must be called before - int is_main_interp = _Py_IsMainInterpreter(tstate); - if (is_main_interp) { -#if defined(WITH_VALGRIND) || defined(__INSURE__) - /* Insure++ is a memory analysis tool that aids in discovering - * memory leaks and other memory problems. On Python exit, the - * interned string dictionaries are flagged as being in use at exit - * (which it is). Under normal circumstances, this is fine because - * the memory will be automatically reclaimed by the system. Under - * memory debugging, it's a huge source of useless noise, so we - * trade off slower shutdown for less distraction in the memory - * reports. -baw - */ - unicode_release_interned(); -#endif /* __INSURE__ */ - } + struct _Py_unicode_state *state = &tstate->interp->unicode; Py_CLEAR(state->empty_string); @@ -16184,7 +16180,7 @@ _PyUnicode_Fini(PyThreadState *tstate) Py_CLEAR(state->latin1[i]); } - if (is_main_interp) { + if (_Py_IsMainInterpreter(tstate)) { unicode_clear_static_strings(); } diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index cfbaf21960b..3ce2c41ef1f 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -1263,6 +1263,7 @@ finalize_interp_types(PyThreadState *tstate) _PyFrame_Fini(tstate); _PyAsyncGen_Fini(tstate); _PyContext_Fini(tstate); + _PyUnicode_ClearInterned(tstate); _PyDict_Fini(tstate); _PyList_Fini(tstate);