From 3ea488aac44887a7cdb30be69580c81a0ca6afe2 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Tue, 15 Oct 2024 15:06:41 -0400 Subject: [PATCH] gh-124218: Use per-thread refcounts for code objects (#125216) Use per-thread refcounting for the reference from function objects to their corresponding code object. This can be a source of contention when frequently creating nested functions. Deferred refcounting alone isn't a great fit here because these references are on the heap and may be modified by other libraries. --- Include/cpython/code.h | 1 + Include/cpython/object.h | 2 +- Include/internal/pycore_object.h | 98 ++++++++++++++++++------------ Include/internal/pycore_uniqueid.h | 6 +- Objects/codeobject.c | 6 +- Objects/funcobject.c | 8 ++- Objects/typeobject.c | 4 +- Python/gc_free_threading.c | 13 ++-- Python/pylifecycle.c | 2 +- Python/pystate.c | 2 +- Python/uniqueid.c | 67 ++++++++++++-------- 11 files changed, 126 insertions(+), 83 deletions(-) diff --git a/Include/cpython/code.h b/Include/cpython/code.h index 03622698113..af9149b9c38 100644 --- a/Include/cpython/code.h +++ b/Include/cpython/code.h @@ -132,6 +132,7 @@ typedef struct { _PyCoCached *_co_cached; /* cached co_* attributes */ \ uintptr_t _co_instrumentation_version; /* current instrumentation version */ \ _PyCoMonitoringData *_co_monitoring; /* Monitoring data */ \ + Py_ssize_t _co_unique_id; /* ID used for per-thread refcounting */ \ int _co_firsttraceable; /* index of first traceable instruction */ \ /* Scratch space for extra data relating to the code object. \ Type is a void* to keep the format private in codeobject.c to force \ diff --git a/Include/cpython/object.h b/Include/cpython/object.h index 9d092749b90..f0f61796cd3 100644 --- a/Include/cpython/object.h +++ b/Include/cpython/object.h @@ -272,7 +272,7 @@ typedef struct _heaptypeobject { void *ht_token; // Storage for the "Py_tp_token" slot struct _specialization_cache _spec_cache; // For use by the specializer. #ifdef Py_GIL_DISABLED - Py_ssize_t unique_id; // ID used for thread-local refcounting + Py_ssize_t unique_id; // ID used for per-thread refcounting #endif /* here are optional user slots, followed by the members. */ } PyHeapTypeObject; diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index 8832692d03c..ad1a7d7e120 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -14,7 +14,7 @@ extern "C" { #include "pycore_interp.h" // PyInterpreterState.gc #include "pycore_pyatomic_ft_wrappers.h" // FT_ATOMIC_STORE_PTR_RELAXED #include "pycore_pystate.h" // _PyInterpreterState_GET() -#include "pycore_uniqueid.h" // _PyType_IncrefSlow +#include "pycore_uniqueid.h" // _PyObject_ThreadIncrefSlow() // This value is added to `ob_ref_shared` for objects that use deferred // reference counting so that they are not immediately deallocated when the @@ -291,7 +291,31 @@ extern bool _PyRefchain_IsTraced(PyInterpreterState *interp, PyObject *obj); #ifndef Py_GIL_DISABLED # define _Py_INCREF_TYPE Py_INCREF # define _Py_DECREF_TYPE Py_DECREF +# define _Py_INCREF_CODE Py_INCREF +# define _Py_DECREF_CODE Py_DECREF #else +static inline void +_Py_THREAD_INCREF_OBJECT(PyObject *obj, Py_ssize_t unique_id) +{ + _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET(); + + // Unsigned comparison so that `unique_id=-1`, which indicates that + // per-thread refcounting has been disabled on this object, is handled by + // the "else". + if ((size_t)unique_id < (size_t)tstate->refcounts.size) { +# ifdef Py_REF_DEBUG + _Py_INCREF_IncRefTotal(); +# endif + _Py_INCREF_STAT_INC(); + tstate->refcounts.values[unique_id]++; + } + else { + // The slow path resizes the per-thread refcount array if necessary. + // It handles the unique_id=-1 case to keep the inlinable function smaller. + _PyObject_ThreadIncrefSlow(obj, unique_id); + } +} + static inline void _Py_INCREF_TYPE(PyTypeObject *type) { @@ -308,31 +332,40 @@ _Py_INCREF_TYPE(PyTypeObject *type) # pragma GCC diagnostic push # pragma GCC diagnostic ignored "-Warray-bounds" #endif - - _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET(); - PyHeapTypeObject *ht = (PyHeapTypeObject *)type; - - // Unsigned comparison so that `unique_id=-1`, which indicates that - // per-thread refcounting has been disabled on this type, is handled by - // the "else". - if ((size_t)ht->unique_id < (size_t)tstate->refcounts.size) { -# ifdef Py_REF_DEBUG - _Py_INCREF_IncRefTotal(); -# endif - _Py_INCREF_STAT_INC(); - tstate->refcounts.values[ht->unique_id]++; - } - else { - // The slow path resizes the thread-local refcount array if necessary. - // It handles the unique_id=-1 case to keep the inlinable function smaller. - _PyType_IncrefSlow(ht); - } - + _Py_THREAD_INCREF_OBJECT((PyObject *)type, ((PyHeapTypeObject *)type)->unique_id); #if defined(__GNUC__) && __GNUC__ >= 11 # pragma GCC diagnostic pop #endif } +static inline void +_Py_INCREF_CODE(PyCodeObject *co) +{ + _Py_THREAD_INCREF_OBJECT((PyObject *)co, co->_co_unique_id); +} + +static inline void +_Py_THREAD_DECREF_OBJECT(PyObject *obj, Py_ssize_t unique_id) +{ + _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET(); + + // Unsigned comparison so that `unique_id=-1`, which indicates that + // per-thread refcounting has been disabled on this object, is handled by + // the "else". + if ((size_t)unique_id < (size_t)tstate->refcounts.size) { +# ifdef Py_REF_DEBUG + _Py_DECREF_DecRefTotal(); +# endif + _Py_DECREF_STAT_INC(); + tstate->refcounts.values[unique_id]--; + } + else { + // Directly decref the object if the id is not assigned or if + // per-thread refcounting has been disabled on this object. + Py_DECREF(obj); + } +} + static inline void _Py_DECREF_TYPE(PyTypeObject *type) { @@ -341,25 +374,14 @@ _Py_DECREF_TYPE(PyTypeObject *type) _Py_DECREF_IMMORTAL_STAT_INC(); return; } - - _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET(); PyHeapTypeObject *ht = (PyHeapTypeObject *)type; + _Py_THREAD_DECREF_OBJECT((PyObject *)type, ht->unique_id); +} - // Unsigned comparison so that `unique_id=-1`, which indicates that - // per-thread refcounting has been disabled on this type, is handled by - // the "else". - if ((size_t)ht->unique_id < (size_t)tstate->refcounts.size) { -# ifdef Py_REF_DEBUG - _Py_DECREF_DecRefTotal(); -# endif - _Py_DECREF_STAT_INC(); - tstate->refcounts.values[ht->unique_id]--; - } - else { - // Directly decref the type if the type id is not assigned or if - // per-thread refcounting has been disabled on this type. - Py_DECREF(type); - } +static inline void +_Py_DECREF_CODE(PyCodeObject *co) +{ + _Py_THREAD_DECREF_OBJECT((PyObject *)co, co->_co_unique_id); } #endif diff --git a/Include/internal/pycore_uniqueid.h b/Include/internal/pycore_uniqueid.h index 8f3b4418408..ad5dd38ea08 100644 --- a/Include/internal/pycore_uniqueid.h +++ b/Include/internal/pycore_uniqueid.h @@ -49,7 +49,7 @@ struct _Py_unique_id_pool { extern Py_ssize_t _PyObject_AssignUniqueId(PyObject *obj); // Releases the allocated id back to the pool. -extern void _PyObject_ReleaseUniqueId(Py_ssize_t unique_id); +extern void _PyObject_DisablePerThreadRefcounting(PyObject *obj); // Merges the per-thread reference counts into the corresponding objects. extern void _PyObject_MergePerThreadRefcounts(_PyThreadStateImpl *tstate); @@ -61,8 +61,8 @@ extern void _PyObject_FinalizePerThreadRefcounts(_PyThreadStateImpl *tstate); // Frees the interpreter's pool of type ids. extern void _PyObject_FinalizeUniqueIdPool(PyInterpreterState *interp); -// Increfs the type, resizing the per-thread refcount array if necessary. -PyAPI_FUNC(void) _PyType_IncrefSlow(PyHeapTypeObject *type); +// Increfs the object, resizing the thread-local refcount array if necessary. +PyAPI_FUNC(void) _PyObject_ThreadIncrefSlow(PyObject *obj, Py_ssize_t unique_id); #endif /* Py_GIL_DISABLED */ diff --git a/Objects/codeobject.c b/Objects/codeobject.c index de80f6cca29..9419cfc0048 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -14,6 +14,7 @@ #include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_setobject.h" // _PySet_NextEntry() #include "pycore_tuple.h" // _PyTuple_ITEMS() +#include "pycore_uniqueid.h" // _PyObject_AssignUniqueId() #include "clinic/codeobject.c.h" static const char * @@ -676,7 +677,7 @@ _PyCode_New(struct _PyCodeConstructor *con) } init_code(co, con); #ifdef Py_GIL_DISABLED - _PyObject_SetDeferredRefcount((PyObject *)co); + co->_co_unique_id = _PyObject_AssignUniqueId((PyObject *)co); _PyObject_GC_TRACK(co); #endif Py_XDECREF(replacement_locations); @@ -1864,6 +1865,9 @@ code_dealloc(PyCodeObject *co) Py_XDECREF(co->co_qualname); Py_XDECREF(co->co_linetable); Py_XDECREF(co->co_exceptiontable); +#ifdef Py_GIL_DISABLED + assert(co->_co_unique_id == -1); +#endif if (co->_co_cached != NULL) { Py_XDECREF(co->_co_cached->_co_code); Py_XDECREF(co->_co_cached->_co_cellvars); diff --git a/Objects/funcobject.c b/Objects/funcobject.c index 855d1a2eeca..6119a96b4aa 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -116,7 +116,8 @@ _PyFunction_FromConstructor(PyFrameConstructor *constr) op->func_builtins = Py_NewRef(constr->fc_builtins); op->func_name = Py_NewRef(constr->fc_name); op->func_qualname = Py_NewRef(constr->fc_qualname); - op->func_code = Py_NewRef(constr->fc_code); + _Py_INCREF_CODE((PyCodeObject *)constr->fc_code); + op->func_code = constr->fc_code; op->func_defaults = Py_XNewRef(constr->fc_defaults); op->func_kwdefaults = Py_XNewRef(constr->fc_kwdefaults); op->func_closure = Py_XNewRef(constr->fc_closure); @@ -146,7 +147,8 @@ PyFunction_NewWithQualName(PyObject *code, PyObject *globals, PyObject *qualname PyThreadState *tstate = _PyThreadState_GET(); - PyCodeObject *code_obj = (PyCodeObject *)Py_NewRef(code); + PyCodeObject *code_obj = (PyCodeObject *)code; + _Py_INCREF_CODE(code_obj); assert(code_obj->co_name != NULL); PyObject *name = Py_NewRef(code_obj->co_name); @@ -1094,7 +1096,7 @@ func_dealloc(PyObject *self) } (void)func_clear((PyObject*)op); // These aren't cleared by func_clear(). - Py_DECREF(op->func_code); + _Py_DECREF_CODE((PyCodeObject *)op->func_code); Py_DECREF(op->func_name); Py_DECREF(op->func_qualname); PyObject_GC_Del(op); diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 6ca4406ec0e..4d843824e1e 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -5025,7 +5025,7 @@ PyType_FromMetaclass( type->tp_dictoffset = dictoffset; #ifdef Py_GIL_DISABLED - // Assign a type id to enable thread-local refcounting + // Assign a unique id to enable per-thread refcounting res->unique_id = _PyObject_AssignUniqueId((PyObject *)res); #endif @@ -6043,7 +6043,7 @@ type_dealloc(PyObject *self) Py_XDECREF(et->ht_module); PyMem_Free(et->_ht_tpname); #ifdef Py_GIL_DISABLED - _PyObject_ReleaseUniqueId(et->unique_id); + assert(et->unique_id == -1); #endif et->ht_token = NULL; Py_TYPE(type)->tp_free((PyObject *)type); diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 3814cd46e2b..8558d4555a9 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -15,7 +15,7 @@ #include "pycore_tstate.h" // _PyThreadStateImpl #include "pycore_weakref.h" // _PyWeakref_ClearRef() #include "pydtrace.h" -#include "pycore_uniqueid.h" // _PyType_MergeThreadLocalRefcounts +#include "pycore_uniqueid.h" // _PyObject_MergeThreadLocalRefcounts() #ifdef Py_GIL_DISABLED @@ -215,15 +215,10 @@ disable_deferred_refcounting(PyObject *op) op->ob_gc_bits &= ~_PyGC_BITS_DEFERRED; op->ob_ref_shared -= _Py_REF_SHARED(_Py_REF_DEFERRED, 0); merge_refcount(op, 0); - } - // Heap types also use per-thread refcounting -- disable it here. - if (PyType_Check(op)) { - if (PyType_HasFeature((PyTypeObject *)op, Py_TPFLAGS_HEAPTYPE)) { - PyHeapTypeObject *ht = (PyHeapTypeObject *)op; - _PyObject_ReleaseUniqueId(ht->unique_id); - ht->unique_id = -1; - } + // Heap types and code objects also use per-thread refcounting, which + // should also be disabled when we turn off deferred refcounting. + _PyObject_DisablePerThreadRefcounting(op); } // Generators and frame objects may contain deferred references to other diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index ebeee4f41d7..5fb9c4f7c71 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -28,7 +28,7 @@ #include "pycore_sliceobject.h" // _PySlice_Fini() #include "pycore_sysmodule.h" // _PySys_ClearAuditHooks() #include "pycore_traceback.h" // _Py_DumpTracebackThreads() -#include "pycore_uniqueid.h" // _PyType_FinalizeIdPool() +#include "pycore_uniqueid.h" // _PyObject_FinalizeUniqueIdPool() #include "pycore_typeobject.h" // _PyTypes_InitTypes() #include "pycore_typevarobject.h" // _Py_clear_generic_types() #include "pycore_unicodeobject.h" // _PyUnicode_InitTypes() diff --git a/Python/pystate.c b/Python/pystate.c index 5d94b7714bd..e3812cba41d 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -20,7 +20,7 @@ #include "pycore_runtime_init.h" // _PyRuntimeState_INIT #include "pycore_sysmodule.h" // _PySys_Audit() #include "pycore_obmalloc.h" // _PyMem_obmalloc_state_on_heap() -#include "pycore_uniqueid.h" // _PyType_FinalizeThreadLocalRefcounts() +#include "pycore_uniqueid.h" // _PyObject_FinalizePerThreadRefcounts() /* -------------------------------------------------------------------------- CAUTION diff --git a/Python/uniqueid.c b/Python/uniqueid.c index 9a9ee2f3946..0cbb35c6cd2 100644 --- a/Python/uniqueid.c +++ b/Python/uniqueid.c @@ -98,36 +98,60 @@ _PyObject_AssignUniqueId(PyObject *obj) return unique_id; } -void -_PyObject_ReleaseUniqueId(Py_ssize_t unique_id) +static void +release_unique_id(Py_ssize_t unique_id) { PyInterpreterState *interp = _PyInterpreterState_GET(); struct _Py_unique_id_pool *pool = &interp->unique_ids; - if (unique_id < 0) { - // The id is not assigned - return; - } - LOCK_POOL(pool); + assert(unique_id >= 0 && unique_id < pool->size); _Py_unique_id_entry *entry = &pool->table[unique_id]; entry->next = pool->freelist; pool->freelist = entry; UNLOCK_POOL(pool); } +static Py_ssize_t +clear_unique_id(PyObject *obj) +{ + Py_ssize_t id = -1; + if (PyType_Check(obj)) { + if (PyType_HasFeature((PyTypeObject *)obj, Py_TPFLAGS_HEAPTYPE)) { + PyHeapTypeObject *ht = (PyHeapTypeObject *)obj; + id = ht->unique_id; + ht->unique_id = -1; + } + } + else if (PyCode_Check(obj)) { + PyCodeObject *co = (PyCodeObject *)obj; + id = co->_co_unique_id; + co->_co_unique_id = -1; + } + return id; +} + void -_PyType_IncrefSlow(PyHeapTypeObject *type) +_PyObject_DisablePerThreadRefcounting(PyObject *obj) +{ + Py_ssize_t id = clear_unique_id(obj); + if (id >= 0) { + release_unique_id(id); + } +} + +void +_PyObject_ThreadIncrefSlow(PyObject *obj, Py_ssize_t unique_id) { _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET(); - if (type->unique_id < 0 || resize_local_refcounts(tstate) < 0) { - // just incref the type directly. - Py_INCREF(type); + if (unique_id < 0 || resize_local_refcounts(tstate) < 0) { + // just incref the object directly. + Py_INCREF(obj); return; } - assert(type->unique_id < tstate->refcounts.size); - tstate->refcounts.values[type->unique_id]++; + assert(unique_id < tstate->refcounts.size); + tstate->refcounts.values[unique_id]++; #ifdef Py_REF_DEBUG _Py_IncRefTotal((PyThreadState *)tstate); #endif @@ -179,20 +203,15 @@ _PyObject_FinalizeUniqueIdPool(PyInterpreterState *interp) pool->freelist = next; } - // Now everything non-NULL is a type. Set the type's id to -1 in case it - // outlives the interpreter. + // Now everything non-NULL is a object. Clear their unique ids as the + // object outlives the interpreter. for (Py_ssize_t i = 0; i < pool->size; i++) { PyObject *obj = pool->table[i].obj; pool->table[i].obj = NULL; - if (obj == NULL) { - continue; - } - if (PyType_Check(obj)) { - assert(PyType_HasFeature((PyTypeObject *)obj, Py_TPFLAGS_HEAPTYPE)); - ((PyHeapTypeObject *)obj)->unique_id = -1; - } - else { - Py_UNREACHABLE(); + if (obj != NULL) { + Py_ssize_t id = clear_unique_id(obj); + (void)id; + assert(id == i); } } PyMem_Free(pool->table);