From fecc4f2b474f16062514e95a67e66080fd626e14 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 19 Mar 2019 14:20:29 +0100 Subject: [PATCH] bpo-36356: Release Unicode interned strings on Valgrind (#12431) When Python is compiled with Valgrind support, release Unicode interned strings at exit in _PyUnicode_Fini(). * Rename _Py_ReleaseInternedUnicodeStrings() to unicode_release_interned() and make it private. * unicode_release_interned() is now called from _PyUnicode_Fini(): it must be called with a running Python thread state for TRASHCAN, it cannot be called from pymain_free(). * Don't display statistics on interned strings at exit anymore --- Include/cpython/unicodeobject.h | 2 -- Modules/main.c | 12 ------- Objects/unicodeobject.c | 57 ++++++++++++++++++++++++--------- 3 files changed, 42 insertions(+), 29 deletions(-) diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h index 4eecc963ae2..806c3aa7ced 100644 --- a/Include/cpython/unicodeobject.h +++ b/Include/cpython/unicodeobject.h @@ -722,8 +722,6 @@ PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter( Py_ssize_t start, Py_ssize_t end); -PyAPI_FUNC(void) _Py_ReleaseInternedUnicodeStrings(void); - /* --- wchar_t support for platforms which support it --------------------- */ #ifdef HAVE_WCHAR_H diff --git a/Modules/main.c b/Modules/main.c index 50fecc9103d..8f7a1bfa830 100644 --- a/Modules/main.c +++ b/Modules/main.c @@ -839,18 +839,6 @@ pymain_free(void) _PyPathConfig_ClearGlobal(); _Py_ClearStandardStreamEncoding(); _Py_ClearArgcArgv(); -#ifdef __INSURE__ - /* Insure++ is a memory analysis tool that aids in discovering - * memory leaks and other memory problems. On Python exit, the - * interned string dictionaries are flagged as being in use at exit - * (which it is). Under normal circumstances, this is fine because - * the memory will be automatically reclaimed by the system. Under - * memory debugging, it's a huge source of useless noise, so we - * trade off slower shutdown for less distraction in the memory - * reports. -baw - */ - _Py_ReleaseInternedUnicodeStrings(); -#endif /* __INSURE__ */ } diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 9d3ed0d18b1..6e83ed6bdd4 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -51,6 +51,11 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. #include #endif +/* Uncomment to display statistics on interned strings at exit when + using Valgrind or Insecure++. */ +/* #define INTERNED_STATS 1 */ + + /*[clinic input] class str "PyObject *" "&PyUnicode_Type" [clinic start generated code]*/ @@ -15157,18 +15162,6 @@ PyUnicode_ClearFreeList(void) return 0; } -void -_PyUnicode_Fini(void) -{ - int i; - - Py_CLEAR(unicode_empty); - - for (i = 0; i < 256; i++) - Py_CLEAR(unicode_latin1[i]); - _PyUnicode_ClearStaticStrings(); - (void)PyUnicode_ClearFreeList(); -} void PyUnicode_InternInPlace(PyObject **p) @@ -15233,8 +15226,10 @@ PyUnicode_InternFromString(const char *cp) return s; } -void -_Py_ReleaseInternedUnicodeStrings(void) + +#if defined(WITH_VALGRIND) || defined(__INSURE__) +static void +unicode_release_interned(void) { PyObject *keys; PyObject *s; @@ -15249,14 +15244,16 @@ _Py_ReleaseInternedUnicodeStrings(void) return; } - /* Since _Py_ReleaseInternedUnicodeStrings() is intended to help a leak + /* Since unicode_release_interned() is intended to help a leak detector, interned unicode strings are not forcibly deallocated; rather, we give them their stolen references back, and then clear and DECREF the interned dict. */ n = PyList_GET_SIZE(keys); +#ifdef INTERNED_STATS fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n", n); +#endif for (i = 0; i < n; i++) { s = PyList_GET_ITEM(keys, i); if (PyUnicode_READY(s) == -1) { @@ -15279,13 +15276,16 @@ _Py_ReleaseInternedUnicodeStrings(void) } _PyUnicode_STATE(s).interned = SSTATE_NOT_INTERNED; } +#ifdef INTERNED_STATS fprintf(stderr, "total size of all interned strings: " "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d " "mortal/immortal\n", mortal_size, immortal_size); +#endif Py_DECREF(keys); PyDict_Clear(interned); Py_CLEAR(interned); } +#endif /********************* Unicode Iterator **************************/ @@ -15564,6 +15564,33 @@ PyUnicode_AsUnicodeCopy(PyObject *unicode) return copy; } + +void +_PyUnicode_Fini(void) +{ +#if defined(WITH_VALGRIND) || defined(__INSURE__) + /* Insure++ is a memory analysis tool that aids in discovering + * memory leaks and other memory problems. On Python exit, the + * interned string dictionaries are flagged as being in use at exit + * (which it is). Under normal circumstances, this is fine because + * the memory will be automatically reclaimed by the system. Under + * memory debugging, it's a huge source of useless noise, so we + * trade off slower shutdown for less distraction in the memory + * reports. -baw + */ + unicode_release_interned(); +#endif /* __INSURE__ */ + + Py_CLEAR(unicode_empty); + + for (Py_ssize_t i = 0; i < 256; i++) { + Py_CLEAR(unicode_latin1[i]); + } + _PyUnicode_ClearStaticStrings(); + (void)PyUnicode_ClearFreeList(); +} + + /* A _string module, to export formatter_parser and formatter_field_name_split to the string.Formatter class implemented in Python. */