From ba3d67c2fb04a7842741b1b6da5d67f22c579f33 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Sat, 26 Dec 2020 00:41:46 +0100 Subject: [PATCH] bpo-39465: Fix _PyUnicode_FromId() for subinterpreters (GH-20058) Make _PyUnicode_FromId() function compatible with subinterpreters. Each interpreter now has an array of identifier objects (interned strings decoded from UTF-8). * Add PyInterpreterState.unicode.identifiers: array of identifiers objects. * Add _PyRuntimeState.unicode_ids used to allocate unique indexes to _Py_Identifier. * Rewrite the _Py_Identifier structure. Microbenchmark on _PyUnicode_FromId(&PyId_a) with _Py_IDENTIFIER(a): [ref] 2.42 ns +- 0.00 ns -> [atomic] 3.39 ns +- 0.00 ns: 1.40x slower This change adds 1 ns per _PyUnicode_FromId() call in average. --- Include/cpython/object.h | 7 +- Include/internal/pycore_interp.h | 7 ++ Include/internal/pycore_runtime.h | 7 ++ .../2020-05-13-18-50-27.bpo-39465.j7nl6A.rst | 3 + Objects/unicodeobject.c | 85 ++++++++++++++----- Python/pystate.c | 30 ++++--- 6 files changed, 102 insertions(+), 37 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2020-05-13-18-50-27.bpo-39465.j7nl6A.rst diff --git a/Include/cpython/object.h b/Include/cpython/object.h index 19c066b0ab7..86889f85768 100644 --- a/Include/cpython/object.h +++ b/Include/cpython/object.h @@ -35,12 +35,13 @@ PyAPI_FUNC(Py_ssize_t) _Py_GetRefTotal(void); _PyObject_{Get,Set,Has}AttrId are __getattr__ versions using _Py_Identifier*. */ typedef struct _Py_Identifier { - struct _Py_Identifier *next; const char* string; - PyObject *object; + // Index in PyInterpreterState.unicode.ids.array. It is process-wide + // unique and must be initialized to -1. + Py_ssize_t index; } _Py_Identifier; -#define _Py_static_string_init(value) { .next = NULL, .string = value, .object = NULL } +#define _Py_static_string_init(value) { .string = value, .index = -1 } #define _Py_static_string(varname, value) static _Py_Identifier varname = _Py_static_string_init(value) #define _Py_IDENTIFIER(varname) _Py_static_string(PyId_##varname, #varname) diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index 9ec5358b5e3..8c618025452 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -64,6 +64,11 @@ struct _Py_bytes_state { PyBytesObject *characters[256]; }; +struct _Py_unicode_ids { + Py_ssize_t size; + PyObject **array; +}; + struct _Py_unicode_state { // The empty Unicode object is a singleton to improve performance. PyObject *empty_string; @@ -71,6 +76,8 @@ struct _Py_unicode_state { shared as well. */ PyObject *latin1[256]; struct _Py_unicode_fs_codec fs_codec; + // Unicode identifiers (_Py_Identifier): see _PyUnicode_FromId() + struct _Py_unicode_ids ids; }; struct _Py_float_state { diff --git a/Include/internal/pycore_runtime.h b/Include/internal/pycore_runtime.h index 3a01d64e63d..8c54abbb069 100644 --- a/Include/internal/pycore_runtime.h +++ b/Include/internal/pycore_runtime.h @@ -49,6 +49,11 @@ typedef struct _Py_AuditHookEntry { void *userData; } _Py_AuditHookEntry; +struct _Py_unicode_runtime_ids { + PyThread_type_lock lock; + Py_ssize_t next_index; +}; + /* Full Python runtime state */ typedef struct pyruntimestate { @@ -106,6 +111,8 @@ typedef struct pyruntimestate { void *open_code_userdata; _Py_AuditHookEntry *audit_hook_head; + struct _Py_unicode_runtime_ids unicode_ids; + // XXX Consolidate globals found via the check-c-globals script. } _PyRuntimeState; diff --git a/Misc/NEWS.d/next/Core and Builtins/2020-05-13-18-50-27.bpo-39465.j7nl6A.rst b/Misc/NEWS.d/next/Core and Builtins/2020-05-13-18-50-27.bpo-39465.j7nl6A.rst new file mode 100644 index 00000000000..c29b5e1757f --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2020-05-13-18-50-27.bpo-39465.j7nl6A.rst @@ -0,0 +1,3 @@ +Make :c:func:`_PyUnicode_FromId` function compatible with subinterpreters. +Each interpreter now has an array of identifier objects (interned strings +decoded from UTF-8). Patch by Victor Stinner. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 409355534a2..3238d1e692a 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -41,6 +41,7 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. #define PY_SSIZE_T_CLEAN #include "Python.h" #include "pycore_abstract.h" // _PyIndex_Check() +#include "pycore_atomic_funcs.h" // _Py_atomic_size_get() #include "pycore_bytes_methods.h" // _Py_bytes_lower() #include "pycore_format.h" // F_LJUST #include "pycore_initconfig.h" // _PyStatus_OK() @@ -302,9 +303,6 @@ unicode_decode_utf8(const char *s, Py_ssize_t size, _Py_error_handler error_handler, const char *errors, Py_ssize_t *consumed); -/* List of static strings. */ -static _Py_Identifier *static_strings = NULL; - /* Fast detection of the most frequent whitespace characters */ const unsigned char _Py_ascii_whitespace[] = { 0, 0, 0, 0, 0, 0, 0, 0, @@ -2312,42 +2310,85 @@ PyUnicode_FromString(const char *u) return PyUnicode_DecodeUTF8Stateful(u, (Py_ssize_t)size, NULL, NULL); } + PyObject * _PyUnicode_FromId(_Py_Identifier *id) { - if (id->object) { - return id->object; + PyInterpreterState *interp = _PyInterpreterState_GET(); + struct _Py_unicode_ids *ids = &interp->unicode.ids; + + int index = _Py_atomic_size_get(&id->index); + if (index < 0) { + struct _Py_unicode_runtime_ids *rt_ids = &interp->runtime->unicode_ids; + + PyThread_acquire_lock(rt_ids->lock, WAIT_LOCK); + // Check again to detect concurrent access. Another thread can have + // initialized the index while this thread waited for the lock. + index = _Py_atomic_size_get(&id->index); + if (index < 0) { + assert(rt_ids->next_index < PY_SSIZE_T_MAX); + index = rt_ids->next_index; + rt_ids->next_index++; + _Py_atomic_size_set(&id->index, index); + } + PyThread_release_lock(rt_ids->lock); } + assert(index >= 0); PyObject *obj; - obj = PyUnicode_DecodeUTF8Stateful(id->string, - strlen(id->string), + if (index < ids->size) { + obj = ids->array[index]; + if (obj) { + // Return a borrowed reference + return obj; + } + } + + obj = PyUnicode_DecodeUTF8Stateful(id->string, strlen(id->string), NULL, NULL); if (!obj) { return NULL; } PyUnicode_InternInPlace(&obj); - assert(!id->next); - id->object = obj; - id->next = static_strings; - static_strings = id; - return id->object; + if (index >= ids->size) { + // Overallocate to reduce the number of realloc + Py_ssize_t new_size = Py_MAX(index * 2, 16); + Py_ssize_t item_size = sizeof(ids->array[0]); + PyObject **new_array = PyMem_Realloc(ids->array, new_size * item_size); + if (new_array == NULL) { + PyErr_NoMemory(); + return NULL; + } + memset(&new_array[ids->size], 0, (new_size - ids->size) * item_size); + ids->array = new_array; + ids->size = new_size; + } + + // The array stores a strong reference + ids->array[index] = obj; + + // Return a borrowed reference + return obj; } + static void -unicode_clear_static_strings(void) +unicode_clear_identifiers(PyThreadState *tstate) { - _Py_Identifier *tmp, *s = static_strings; - while (s) { - Py_CLEAR(s->object); - tmp = s->next; - s->next = NULL; - s = tmp; + PyInterpreterState *interp = _PyInterpreterState_GET(); + struct _Py_unicode_ids *ids = &interp->unicode.ids; + for (Py_ssize_t i=0; i < ids->size; i++) { + Py_XDECREF(ids->array[i]); } - static_strings = NULL; + ids->size = 0; + PyMem_Free(ids->array); + ids->array = NULL; + // Don't reset _PyRuntime next_index: _Py_Identifier.id remains valid + // after Py_Finalize(). } + /* Internal function, doesn't check maximum character */ PyObject* @@ -16238,9 +16279,7 @@ _PyUnicode_Fini(PyThreadState *tstate) Py_CLEAR(state->latin1[i]); } - if (_Py_IsMainInterpreter(tstate)) { - unicode_clear_static_strings(); - } + unicode_clear_identifiers(tstate); _PyUnicode_FiniEncodings(&tstate->interp->unicode.fs_codec); } diff --git a/Python/pystate.c b/Python/pystate.c index ead25b08d7a..231144b0828 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -73,18 +73,24 @@ _PyRuntimeState_Init_impl(_PyRuntimeState *runtime) runtime->interpreters.mutex = PyThread_allocate_lock(); if (runtime->interpreters.mutex == NULL) { - return _PyStatus_ERR("Can't initialize threads for interpreter"); + return _PyStatus_NO_MEMORY(); } runtime->interpreters.next_id = -1; runtime->xidregistry.mutex = PyThread_allocate_lock(); if (runtime->xidregistry.mutex == NULL) { - return _PyStatus_ERR("Can't initialize threads for cross-interpreter data registry"); + return _PyStatus_NO_MEMORY(); } // Set it to the ID of the main thread of the main interpreter. runtime->main_thread = PyThread_get_thread_ident(); + runtime->unicode_ids.lock = PyThread_allocate_lock(); + if (runtime->unicode_ids.lock == NULL) { + return _PyStatus_NO_MEMORY(); + } + runtime->unicode_ids.next_index = 0; + return _PyStatus_OK(); } @@ -108,17 +114,17 @@ _PyRuntimeState_Fini(_PyRuntimeState *runtime) /* Force the allocator used by _PyRuntimeState_Init(). */ PyMemAllocatorEx old_alloc; _PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, &old_alloc); - - if (runtime->interpreters.mutex != NULL) { - PyThread_free_lock(runtime->interpreters.mutex); - runtime->interpreters.mutex = NULL; +#define FREE_LOCK(LOCK) \ + if (LOCK != NULL) { \ + PyThread_free_lock(LOCK); \ + LOCK = NULL; \ } - if (runtime->xidregistry.mutex != NULL) { - PyThread_free_lock(runtime->xidregistry.mutex); - runtime->xidregistry.mutex = NULL; - } + FREE_LOCK(runtime->interpreters.mutex); + FREE_LOCK(runtime->xidregistry.mutex); + FREE_LOCK(runtime->unicode_ids.lock); +#undef FREE_LOCK PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc); } @@ -139,12 +145,14 @@ _PyRuntimeState_ReInitThreads(_PyRuntimeState *runtime) int reinit_interp = _PyThread_at_fork_reinit(&runtime->interpreters.mutex); int reinit_main_id = _PyThread_at_fork_reinit(&runtime->interpreters.main->id_mutex); int reinit_xidregistry = _PyThread_at_fork_reinit(&runtime->xidregistry.mutex); + int reinit_unicode_ids = _PyThread_at_fork_reinit(&runtime->unicode_ids.lock); PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc); if (reinit_interp < 0 || reinit_main_id < 0 - || reinit_xidregistry < 0) + || reinit_xidregistry < 0 + || reinit_unicode_ids < 0) { return _PyStatus_ERR("Failed to reinitialize runtime locks");