From 13907968d73b3b602c81e240fb7892a2627974d6 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Fri, 2 Feb 2024 05:53:53 +0900 Subject: [PATCH] gh-111968: Use per-thread freelists for dict in free-threading (gh-114323) --- Include/internal/pycore_dict.h | 3 +- Include/internal/pycore_dict_state.h | 19 ------ Include/internal/pycore_freelist.h | 13 ++++ Include/internal/pycore_gc.h | 2 +- Include/internal/pycore_interp.h | 2 +- Objects/dictobject.c | 88 ++++++++++++---------------- Objects/floatobject.c | 4 ++ Objects/genobject.c | 4 ++ Objects/listobject.c | 4 ++ Python/context.c | 4 ++ Python/gc_free_threading.c | 2 - Python/gc_gil.c | 2 - Python/pystate.c | 3 + 13 files changed, 75 insertions(+), 75 deletions(-) diff --git a/Include/internal/pycore_dict.h b/Include/internal/pycore_dict.h index b4e1f8cf1e3..60acd89cf6c 100644 --- a/Include/internal/pycore_dict.h +++ b/Include/internal/pycore_dict.h @@ -9,6 +9,7 @@ extern "C" { # error "this header requires Py_BUILD_CORE define" #endif +#include "pycore_freelist.h" // _PyFreeListState #include "pycore_identifier.h" // _Py_Identifier #include "pycore_object.h" // PyDictOrValues @@ -69,7 +70,7 @@ extern PyObject* _PyDictView_Intersect(PyObject* self, PyObject *other); /* runtime lifecycle */ -extern void _PyDict_Fini(PyInterpreterState *interp); +extern void _PyDict_Fini(PyInterpreterState *state); /* other API */ diff --git a/Include/internal/pycore_dict_state.h b/Include/internal/pycore_dict_state.h index ece0f10ca25..a6dd63d36e0 100644 --- a/Include/internal/pycore_dict_state.h +++ b/Include/internal/pycore_dict_state.h @@ -8,16 +8,6 @@ extern "C" { # error "this header requires Py_BUILD_CORE define" #endif - -#ifndef WITH_FREELISTS -// without freelists -# define PyDict_MAXFREELIST 0 -#endif - -#ifndef PyDict_MAXFREELIST -# define PyDict_MAXFREELIST 80 -#endif - #define DICT_MAX_WATCHERS 8 struct _Py_dict_state { @@ -26,15 +16,6 @@ struct _Py_dict_state { * time that a dictionary is modified. */ uint64_t global_version; uint32_t next_keys_version; - -#if PyDict_MAXFREELIST > 0 - /* Dictionary reuse scheme to save calls to malloc and free */ - PyDictObject *free_list[PyDict_MAXFREELIST]; - PyDictKeysObject *keys_free_list[PyDict_MAXFREELIST]; - int numfree; - int keys_numfree; -#endif - PyDict_WatchCallback watchers[DICT_MAX_WATCHERS]; }; diff --git a/Include/internal/pycore_freelist.h b/Include/internal/pycore_freelist.h index b91d2bc066b..82a42300991 100644 --- a/Include/internal/pycore_freelist.h +++ b/Include/internal/pycore_freelist.h @@ -17,6 +17,7 @@ extern "C" { # define PyTuple_NFREELISTS PyTuple_MAXSAVESIZE # define PyTuple_MAXFREELIST 2000 # define PyList_MAXFREELIST 80 +# define PyDict_MAXFREELIST 80 # define PyFloat_MAXFREELIST 100 # define PyContext_MAXFREELIST 255 # define _PyAsyncGen_MAXFREELIST 80 @@ -25,6 +26,7 @@ extern "C" { # define PyTuple_NFREELISTS 0 # define PyTuple_MAXFREELIST 0 # define PyList_MAXFREELIST 0 +# define PyDict_MAXFREELIST 0 # define PyFloat_MAXFREELIST 0 # define PyContext_MAXFREELIST 0 # define _PyAsyncGen_MAXFREELIST 0 @@ -65,6 +67,16 @@ struct _Py_float_state { #endif }; +struct _Py_dict_freelist { +#ifdef WITH_FREELISTS + /* Dictionary reuse scheme to save calls to malloc and free */ + PyDictObject *free_list[PyDict_MAXFREELIST]; + PyDictKeysObject *keys_free_list[PyDict_MAXFREELIST]; + int numfree; + int keys_numfree; +#endif +}; + struct _Py_slice_state { #ifdef WITH_FREELISTS /* Using a cache is very effective since typically only a single slice is @@ -106,6 +118,7 @@ typedef struct _Py_freelist_state { struct _Py_float_state floats; struct _Py_tuple_state tuples; struct _Py_list_state lists; + struct _Py_dict_freelist dicts; struct _Py_slice_state slices; struct _Py_context_state contexts; struct _Py_async_gen_state async_gens; diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index b362a294a59..ca1d9fdf525 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -267,7 +267,7 @@ extern void _PyTuple_ClearFreeList(_PyFreeListState *state, int is_finalization) extern void _PyFloat_ClearFreeList(_PyFreeListState *state, int is_finalization); extern void _PyList_ClearFreeList(_PyFreeListState *state, int is_finalization); extern void _PySlice_ClearCache(_PyFreeListState *state); -extern void _PyDict_ClearFreeList(PyInterpreterState *interp); +extern void _PyDict_ClearFreeList(_PyFreeListState *state, int is_finalization); extern void _PyAsyncGen_ClearFreeLists(_PyFreeListState *state, int is_finalization); extern void _PyContext_ClearFreeList(_PyFreeListState *state, int is_finalization); extern void _Py_ScheduleGC(PyInterpreterState *interp); diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index 04e75940dcb..c4732b15341 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -20,6 +20,7 @@ extern "C" { #include "pycore_dtoa.h" // struct _dtoa_state #include "pycore_exceptions.h" // struct _Py_exc_state #include "pycore_floatobject.h" // struct _Py_float_state +#include "pycore_freelist.h" // struct _Py_freelist_state #include "pycore_function.h" // FUNC_MAX_WATCHERS #include "pycore_gc.h" // struct _gc_runtime_state #include "pycore_genobject.h" // struct _Py_async_gen_state @@ -230,7 +231,6 @@ struct _is { struct _dtoa_state dtoa; struct _py_func_state func_state; - struct _Py_tuple_state tuple; struct _Py_dict_state dict_state; struct _Py_exc_state exc_state; diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 23d7e9b5e38..e24887b7d78 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -118,6 +118,7 @@ As a consequence of this, split keys have a maximum size of 16. #include "pycore_ceval.h" // _PyEval_GetBuiltin() #include "pycore_code.h" // stats #include "pycore_dict.h" // export _PyDict_SizeOf() +#include "pycore_freelist.h" // _PyFreeListState_GET() #include "pycore_gc.h" // _PyObject_GC_IS_TRACKED() #include "pycore_object.h" // _PyObject_GC_TRACK(), _PyDebugAllocatorStats() #include "pycore_pyerrors.h" // _PyErr_GetRaisedException() @@ -242,40 +243,44 @@ static PyObject* dict_iter(PyObject *dict); #include "clinic/dictobject.c.h" -#if PyDict_MAXFREELIST > 0 -static struct _Py_dict_state * -get_dict_state(PyInterpreterState *interp) +#ifdef WITH_FREELISTS +static struct _Py_dict_freelist * +get_dict_state(void) { - return &interp->dict_state; + _PyFreeListState *state = _PyFreeListState_GET(); + return &state->dicts; } #endif void -_PyDict_ClearFreeList(PyInterpreterState *interp) +_PyDict_ClearFreeList(_PyFreeListState *freelist_state, int is_finalization) { -#if PyDict_MAXFREELIST > 0 - struct _Py_dict_state *state = &interp->dict_state; - while (state->numfree) { +#ifdef WITH_FREELISTS + struct _Py_dict_freelist *state = &freelist_state->dicts; + while (state->numfree > 0) { PyDictObject *op = state->free_list[--state->numfree]; assert(PyDict_CheckExact(op)); PyObject_GC_Del(op); } - while (state->keys_numfree) { + while (state->keys_numfree > 0) { PyMem_Free(state->keys_free_list[--state->keys_numfree]); } + if (is_finalization) { + state->numfree = -1; + state->keys_numfree = -1; + } #endif } - void -_PyDict_Fini(PyInterpreterState *interp) +_PyDict_Fini(PyInterpreterState *Py_UNUSED(interp)) { - _PyDict_ClearFreeList(interp); -#if defined(Py_DEBUG) && PyDict_MAXFREELIST > 0 - struct _Py_dict_state *state = &interp->dict_state; - state->numfree = -1; - state->keys_numfree = -1; + // With Py_GIL_DISABLED: + // the freelists for the current thread state have already been cleared. +#ifndef Py_GIL_DISABLED + _PyFreeListState *state = _PyFreeListState_GET(); + _PyDict_ClearFreeList(state, 1); #endif } @@ -290,9 +295,8 @@ unicode_get_hash(PyObject *o) void _PyDict_DebugMallocStats(FILE *out) { -#if PyDict_MAXFREELIST > 0 - PyInterpreterState *interp = _PyInterpreterState_GET(); - struct _Py_dict_state *state = get_dict_state(interp); +#ifdef WITH_FREELISTS + struct _Py_dict_freelist *state = get_dict_state(); _PyDebugAllocatorStats(out, "free PyDictObject", state->numfree, sizeof(PyDictObject)); #endif @@ -300,7 +304,7 @@ _PyDict_DebugMallocStats(FILE *out) #define DK_MASK(dk) (DK_SIZE(dk)-1) -static void free_keys_object(PyInterpreterState *interp, PyDictKeysObject *keys); +static void free_keys_object(PyDictKeysObject *keys); /* PyDictKeysObject has refcounts like PyObject does, so we have the following two functions to mirror what Py_INCREF() and Py_DECREF() do. @@ -348,7 +352,7 @@ dictkeys_decref(PyInterpreterState *interp, PyDictKeysObject *dk) Py_XDECREF(entries[i].me_value); } } - free_keys_object(interp, dk); + free_keys_object(dk); } } @@ -643,12 +647,8 @@ new_keys_object(PyInterpreterState *interp, uint8_t log2_size, bool unicode) log2_bytes = log2_size + 2; } -#if PyDict_MAXFREELIST > 0 - struct _Py_dict_state *state = get_dict_state(interp); -#ifdef Py_DEBUG - // new_keys_object() must not be called after _PyDict_Fini() - assert(state->keys_numfree != -1); -#endif +#ifdef WITH_FREELISTS + struct _Py_dict_freelist *state = get_dict_state(); if (log2_size == PyDict_LOG_MINSIZE && unicode && state->keys_numfree > 0) { dk = state->keys_free_list[--state->keys_numfree]; OBJECT_STAT_INC(from_freelist); @@ -680,16 +680,13 @@ new_keys_object(PyInterpreterState *interp, uint8_t log2_size, bool unicode) } static void -free_keys_object(PyInterpreterState *interp, PyDictKeysObject *keys) +free_keys_object(PyDictKeysObject *keys) { -#if PyDict_MAXFREELIST > 0 - struct _Py_dict_state *state = get_dict_state(interp); -#ifdef Py_DEBUG - // free_keys_object() must not be called after _PyDict_Fini() - assert(state->keys_numfree != -1); -#endif +#ifdef WITH_FREELISTS + struct _Py_dict_freelist *state = get_dict_state(); if (DK_LOG_SIZE(keys) == PyDict_LOG_MINSIZE && state->keys_numfree < PyDict_MAXFREELIST + && state->keys_numfree >= 0 && DK_IS_UNICODE(keys)) { state->keys_free_list[state->keys_numfree++] = keys; OBJECT_STAT_INC(to_freelist); @@ -730,13 +727,9 @@ new_dict(PyInterpreterState *interp, { PyDictObject *mp; assert(keys != NULL); -#if PyDict_MAXFREELIST > 0 - struct _Py_dict_state *state = get_dict_state(interp); -#ifdef Py_DEBUG - // new_dict() must not be called after _PyDict_Fini() - assert(state->numfree != -1); -#endif - if (state->numfree) { +#ifdef WITH_FREELISTS + struct _Py_dict_freelist *state = get_dict_state(); + if (state->numfree > 0) { mp = state->free_list[--state->numfree]; assert (mp != NULL); assert (Py_IS_TYPE(mp, &PyDict_Type)); @@ -1547,7 +1540,7 @@ dictresize(PyInterpreterState *interp, PyDictObject *mp, #endif assert(oldkeys->dk_kind != DICT_KEYS_SPLIT); assert(oldkeys->dk_refcnt == 1); - free_keys_object(interp, oldkeys); + free_keys_object(oldkeys); } } @@ -2458,13 +2451,10 @@ dict_dealloc(PyObject *self) assert(keys->dk_refcnt == 1 || keys == Py_EMPTY_KEYS); dictkeys_decref(interp, keys); } -#if PyDict_MAXFREELIST > 0 - struct _Py_dict_state *state = get_dict_state(interp); -#ifdef Py_DEBUG - // new_dict() must not be called after _PyDict_Fini() - assert(state->numfree != -1); -#endif - if (state->numfree < PyDict_MAXFREELIST && Py_IS_TYPE(mp, &PyDict_Type)) { +#ifdef WITH_FREELISTS + struct _Py_dict_freelist *state = get_dict_state(); + if (state->numfree < PyDict_MAXFREELIST && state->numfree >=0 && + Py_IS_TYPE(mp, &PyDict_Type)) { state->free_list[state->numfree++] = mp; OBJECT_STAT_INC(to_freelist); } diff --git a/Objects/floatobject.c b/Objects/floatobject.c index b7611d5f96a..c440e0dab0e 100644 --- a/Objects/floatobject.c +++ b/Objects/floatobject.c @@ -2013,7 +2013,11 @@ _PyFloat_ClearFreeList(_PyFreeListState *freelist_state, int is_finalization) void _PyFloat_Fini(_PyFreeListState *state) { + // With Py_GIL_DISABLED: + // the freelists for the current thread state have already been cleared. +#ifndef Py_GIL_DISABLED _PyFloat_ClearFreeList(state, 1); +#endif } void diff --git a/Objects/genobject.c b/Objects/genobject.c index f47197330fd..ab523e46cce 100644 --- a/Objects/genobject.c +++ b/Objects/genobject.c @@ -1685,7 +1685,11 @@ _PyAsyncGen_ClearFreeLists(_PyFreeListState *freelist_state, int is_finalization void _PyAsyncGen_Fini(_PyFreeListState *state) { + // With Py_GIL_DISABLED: + // the freelists for the current thread state have already been cleared. +#ifndef Py_GIL_DISABLED _PyAsyncGen_ClearFreeLists(state, 1); +#endif } diff --git a/Objects/listobject.c b/Objects/listobject.c index 80a1f1da55b..da2b9cc3269 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -138,7 +138,11 @@ _PyList_ClearFreeList(_PyFreeListState *freelist_state, int is_finalization) void _PyList_Fini(_PyFreeListState *state) { + // With Py_GIL_DISABLED: + // the freelists for the current thread state have already been cleared. +#ifndef Py_GIL_DISABLED _PyList_ClearFreeList(state, 1); +#endif } /* Print summary info about the state of the optimized allocator */ diff --git a/Python/context.c b/Python/context.c index 294485e5b40..793dfa2b72c 100644 --- a/Python/context.c +++ b/Python/context.c @@ -1287,7 +1287,11 @@ _PyContext_ClearFreeList(_PyFreeListState *freelist_state, int is_finalization) void _PyContext_Fini(_PyFreeListState *state) { + // With Py_GIL_DISABLED: + // the freelists for the current thread state have already been cleared. +#ifndef Py_GIL_DISABLED _PyContext_ClearFreeList(state, 1); +#endif } diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 53f927bfa65..8fbcdb15109 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -1676,8 +1676,6 @@ PyUnstable_GC_VisitObjects(gcvisitobjects_t callback, void *arg) void _PyGC_ClearAllFreeLists(PyInterpreterState *interp) { - _PyDict_ClearFreeList(interp); - HEAD_LOCK(&_PyRuntime); _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)interp->threads.head; while (tstate != NULL) { diff --git a/Python/gc_gil.c b/Python/gc_gil.c index 04c1c184250..4e2aa8f7af7 100644 --- a/Python/gc_gil.c +++ b/Python/gc_gil.c @@ -11,8 +11,6 @@ void _PyGC_ClearAllFreeLists(PyInterpreterState *interp) { - _PyDict_ClearFreeList(interp); - _Py_ClearFreeLists(&interp->freelist_state, 0); } diff --git a/Python/pystate.c b/Python/pystate.c index 430121a6a35..27b6d0573ad 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -1461,9 +1461,12 @@ clear_datastack(PyThreadState *tstate) void _Py_ClearFreeLists(_PyFreeListState *state, int is_finalization) { + // In the free-threaded build, freelists are per-PyThreadState and cleared in PyThreadState_Clear() + // In the default build, freelists are per-interpreter and cleared in finalize_interp_types() _PyFloat_ClearFreeList(state, is_finalization); _PyTuple_ClearFreeList(state, is_finalization); _PyList_ClearFreeList(state, is_finalization); + _PyDict_ClearFreeList(state, is_finalization); _PyContext_ClearFreeList(state, is_finalization); _PyAsyncGen_ClearFreeLists(state, is_finalization); _PyObjectStackChunk_ClearFreeList(state, is_finalization);