From 9b0bfba2a265b8108610b037945c004d8e81f2b4 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Mon, 21 Oct 2024 12:51:29 -0400 Subject: [PATCH] gh-124218: Use per-thread reference counting for globals and builtins (#125713) Use per-thread refcounting for the reference from function objects to the globals and builtins dictionaries. --- Include/cpython/dictobject.h | 4 +++- Include/internal/pycore_dict.h | 34 ++++++++++++++++++++++++++ Include/internal/pycore_object.h | 14 +++++++++++ Include/internal/pycore_uniqueid.h | 3 +++ Objects/dictobject.c | 18 ++++++++++++++ Objects/funcobject.c | 38 +++++++++++++++++++++++++----- Objects/moduleobject.c | 3 ++- Python/uniqueid.c | 12 +++++++--- 8 files changed, 115 insertions(+), 11 deletions(-) diff --git a/Include/cpython/dictobject.h b/Include/cpython/dictobject.h index b113c7fdcf6..78473e54898 100644 --- a/Include/cpython/dictobject.h +++ b/Include/cpython/dictobject.h @@ -17,7 +17,9 @@ typedef struct { /* This is a private field for CPython's internal use. * Bits 0-7 are for dict watchers. * Bits 8-11 are for the watched mutation counter (used by tier2 optimization) - * The remaining bits are not currently used. */ + * Bits 12-31 are currently unused + * Bits 32-63 are a unique id in the free threading build (used for per-thread refcounting) + */ uint64_t _ma_watcher_tag; PyDictKeysObject *ma_keys; diff --git a/Include/internal/pycore_dict.h b/Include/internal/pycore_dict.h index 1920724c1d4..1d185559b3e 100644 --- a/Include/internal/pycore_dict.h +++ b/Include/internal/pycore_dict.h @@ -229,6 +229,8 @@ static inline PyDictUnicodeEntry* DK_UNICODE_ENTRIES(PyDictKeysObject *dk) { #define DICT_VERSION_INCREMENT (1 << (DICT_MAX_WATCHERS + DICT_WATCHED_MUTATION_BITS)) #define DICT_WATCHER_MASK ((1 << DICT_MAX_WATCHERS) - 1) #define DICT_WATCHER_AND_MODIFICATION_MASK ((1 << (DICT_MAX_WATCHERS + DICT_WATCHED_MUTATION_BITS)) - 1) +#define DICT_UNIQUE_ID_SHIFT (32) +#define DICT_UNIQUE_ID_MAX ((UINT64_C(1) << (64 - DICT_UNIQUE_ID_SHIFT)) - 1) PyAPI_FUNC(void) @@ -307,8 +309,40 @@ _PyInlineValuesSize(PyTypeObject *tp) int _PyDict_DetachFromObject(PyDictObject *dict, PyObject *obj); +// Enables per-thread ref counting on this dict in the free threading build +extern void _PyDict_EnablePerThreadRefcounting(PyObject *op); + PyDictObject *_PyObject_MaterializeManagedDict_LockHeld(PyObject *); +// See `_Py_INCREF_TYPE()` in pycore_object.h +#ifndef Py_GIL_DISABLED +# define _Py_INCREF_DICT Py_INCREF +# define _Py_DECREF_DICT Py_DECREF +#else +static inline Py_ssize_t +_PyDict_UniqueId(PyDictObject *mp) +{ + // Offset by one so that _ma_watcher_tag=0 represents an unassigned id + return (Py_ssize_t)(mp->_ma_watcher_tag >> DICT_UNIQUE_ID_SHIFT) - 1; +} + +static inline void +_Py_INCREF_DICT(PyObject *op) +{ + assert(PyDict_Check(op)); + Py_ssize_t id = _PyDict_UniqueId((PyDictObject *)op); + _Py_THREAD_INCREF_OBJECT(op, id); +} + +static inline void +_Py_DECREF_DICT(PyObject *op) +{ + assert(PyDict_Check(op)); + Py_ssize_t id = _PyDict_UniqueId((PyDictObject *)op); + _Py_THREAD_DECREF_OBJECT(op, id); +} +#endif + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index 96f6d61e1c6..c7af720b1ce 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -293,6 +293,20 @@ extern PyStatus _PyObject_InitState(PyInterpreterState *interp); extern void _PyObject_FiniState(PyInterpreterState *interp); extern bool _PyRefchain_IsTraced(PyInterpreterState *interp, PyObject *obj); +// Macros used for per-thread reference counting in the free threading build. +// They resolve to normal Py_INCREF/DECREF calls in the default build. +// +// The macros are used for only a few references that would otherwise cause +// scaling bottlenecks in the free threading build: +// - The reference from an object to `ob_type`. +// - The reference from a function to `func_code`. +// - The reference from a function to `func_globals` and `func_builtins`. +// +// It's safe, but not performant or necessary, to use these macros for other +// references to code, type, or dict objects. It's also safe to mix their +// usage with normal Py_INCREF/DECREF calls. +// +// See also Include/internal/pycore_dict.h for _Py_INCREF_DICT/_Py_DECREF_DICT. #ifndef Py_GIL_DISABLED # define _Py_INCREF_TYPE Py_INCREF # define _Py_DECREF_TYPE Py_DECREF diff --git a/Include/internal/pycore_uniqueid.h b/Include/internal/pycore_uniqueid.h index ad5dd38ea08..d3db49ddb78 100644 --- a/Include/internal/pycore_uniqueid.h +++ b/Include/internal/pycore_uniqueid.h @@ -48,6 +48,9 @@ struct _Py_unique_id_pool { // Assigns the next id from the pool of ids. extern Py_ssize_t _PyObject_AssignUniqueId(PyObject *obj); +// Releases the allocated id back to the pool. +extern void _PyObject_ReleaseUniqueId(Py_ssize_t unique_id); + // Releases the allocated id back to the pool. extern void _PyObject_DisablePerThreadRefcounting(PyObject *obj); diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 806096f5814..c4e11a3e9c0 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -1636,6 +1636,24 @@ _PyDict_MaybeUntrack(PyObject *op) _PyObject_GC_UNTRACK(op); } +void +_PyDict_EnablePerThreadRefcounting(PyObject *op) +{ + assert(PyDict_Check(op)); +#ifdef Py_GIL_DISABLED + Py_ssize_t id = _PyObject_AssignUniqueId(op); + if ((uint64_t)id >= (uint64_t)DICT_UNIQUE_ID_MAX) { + _PyObject_ReleaseUniqueId(id); + return; + } + + PyDictObject *mp = (PyDictObject *)op; + assert((mp->_ma_watcher_tag >> DICT_UNIQUE_ID_SHIFT) == 0); + // Plus 1 so that _ma_watcher_tag=0 represents an unassigned id + mp->_ma_watcher_tag += ((uint64_t)id + 1) << DICT_UNIQUE_ID_SHIFT; +#endif +} + static inline int is_unusable_slot(Py_ssize_t ix) { diff --git a/Objects/funcobject.c b/Objects/funcobject.c index 3cb24769138..44fb4ac0907 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -3,6 +3,7 @@ #include "Python.h" #include "pycore_ceval.h" // _PyEval_BuiltinsFromGlobals() +#include "pycore_dict.h" // _Py_INCREF_DICT() #include "pycore_long.h" // _PyLong_GetOne() #include "pycore_modsupport.h" // _PyArg_NoKeywords() #include "pycore_object.h" // _PyObject_GC_UNTRACK() @@ -112,8 +113,15 @@ _PyFunction_FromConstructor(PyFrameConstructor *constr) Py_XDECREF(module); return NULL; } - op->func_globals = Py_NewRef(constr->fc_globals); - op->func_builtins = Py_NewRef(constr->fc_builtins); + _Py_INCREF_DICT(constr->fc_globals); + op->func_globals = constr->fc_globals; + if (PyDict_Check(constr->fc_builtins)) { + _Py_INCREF_DICT(constr->fc_builtins); + } + else { + Py_INCREF(constr->fc_builtins); + } + op->func_builtins = constr->fc_builtins; op->func_name = Py_NewRef(constr->fc_name); op->func_qualname = Py_NewRef(constr->fc_qualname); _Py_INCREF_CODE((PyCodeObject *)constr->fc_code); @@ -143,7 +151,7 @@ PyFunction_NewWithQualName(PyObject *code, PyObject *globals, PyObject *qualname { assert(globals != NULL); assert(PyDict_Check(globals)); - Py_INCREF(globals); + _Py_INCREF_DICT(globals); PyThreadState *tstate = _PyThreadState_GET(); @@ -184,7 +192,12 @@ PyFunction_NewWithQualName(PyObject *code, PyObject *globals, PyObject *qualname if (builtins == NULL) { goto error; } - Py_INCREF(builtins); + if (PyDict_Check(builtins)) { + _Py_INCREF_DICT(builtins); + } + else { + Py_INCREF(builtins); + } PyFunctionObject *op = PyObject_GC_New(PyFunctionObject, &PyFunction_Type); if (op == NULL) { @@ -1057,8 +1070,21 @@ func_clear(PyObject *self) { PyFunctionObject *op = _PyFunction_CAST(self); func_clear_version(_PyInterpreterState_GET(), op); - Py_CLEAR(op->func_globals); - Py_CLEAR(op->func_builtins); + PyObject *globals = op->func_globals; + op->func_globals = NULL; + if (globals != NULL) { + _Py_DECREF_DICT(globals); + } + PyObject *builtins = op->func_builtins; + op->func_builtins = NULL; + if (builtins != NULL) { + if (PyDict_Check(builtins)) { + _Py_DECREF_DICT(builtins); + } + else { + Py_DECREF(builtins); + } + } Py_CLEAR(op->func_module); Py_CLEAR(op->func_defaults); Py_CLEAR(op->func_kwdefaults); diff --git a/Objects/moduleobject.c b/Objects/moduleobject.c index f63ae4e048b..c06badd5f3e 100644 --- a/Objects/moduleobject.c +++ b/Objects/moduleobject.c @@ -3,6 +3,7 @@ #include "Python.h" #include "pycore_call.h" // _PyObject_CallNoArgs() +#include "pycore_dict.h" // _PyDict_EnablePerThreadRefcounting() #include "pycore_fileutils.h" // _Py_wgetcwd #include "pycore_interp.h" // PyInterpreterState.importlib #include "pycore_long.h" // _PyLong_GetOne() @@ -105,7 +106,7 @@ new_module_notrack(PyTypeObject *mt) static void track_module(PyModuleObject *m) { - _PyObject_SetDeferredRefcount(m->md_dict); + _PyDict_EnablePerThreadRefcounting(m->md_dict); PyObject_GC_Track(m->md_dict); _PyObject_SetDeferredRefcount((PyObject *)m); diff --git a/Python/uniqueid.c b/Python/uniqueid.c index 0cbb35c6cd2..b9f30713fee 100644 --- a/Python/uniqueid.c +++ b/Python/uniqueid.c @@ -1,5 +1,6 @@ #include "Python.h" +#include "pycore_dict.h" // _PyDict_UniqueId() #include "pycore_lock.h" // PyMutex_LockFlags() #include "pycore_pystate.h" // _PyThreadState_GET() #include "pycore_object.h" // _Py_IncRefTotal @@ -98,8 +99,8 @@ _PyObject_AssignUniqueId(PyObject *obj) return unique_id; } -static void -release_unique_id(Py_ssize_t unique_id) +void +_PyObject_ReleaseUniqueId(Py_ssize_t unique_id) { PyInterpreterState *interp = _PyInterpreterState_GET(); struct _Py_unique_id_pool *pool = &interp->unique_ids; @@ -128,6 +129,11 @@ clear_unique_id(PyObject *obj) id = co->_co_unique_id; co->_co_unique_id = -1; } + else if (PyDict_Check(obj)) { + PyDictObject *mp = (PyDictObject *)obj; + id = _PyDict_UniqueId(mp); + mp->_ma_watcher_tag &= ~(UINT64_MAX << DICT_UNIQUE_ID_SHIFT); + } return id; } @@ -136,7 +142,7 @@ _PyObject_DisablePerThreadRefcounting(PyObject *obj) { Py_ssize_t id = clear_unique_id(obj); if (id >= 0) { - release_unique_id(id); + _PyObject_ReleaseUniqueId(id); } }