gh-124218: Use per-thread reference counting for globals and builtins (#125713)

Use per-thread refcounting for the reference from function objects to
the globals and builtins dictionaries.
This commit is contained in:
Sam Gross 2024-10-21 12:51:29 -04:00 committed by GitHub
parent d880c83ff7
commit 9b0bfba2a2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 115 additions and 11 deletions

View File

@ -17,7 +17,9 @@ typedef struct {
/* This is a private field for CPython's internal use.
* Bits 0-7 are for dict watchers.
* Bits 8-11 are for the watched mutation counter (used by tier2 optimization)
* The remaining bits are not currently used. */
* Bits 12-31 are currently unused
* Bits 32-63 are a unique id in the free threading build (used for per-thread refcounting)
*/
uint64_t _ma_watcher_tag;
PyDictKeysObject *ma_keys;

View File

@ -229,6 +229,8 @@ static inline PyDictUnicodeEntry* DK_UNICODE_ENTRIES(PyDictKeysObject *dk) {
#define DICT_VERSION_INCREMENT (1 << (DICT_MAX_WATCHERS + DICT_WATCHED_MUTATION_BITS))
#define DICT_WATCHER_MASK ((1 << DICT_MAX_WATCHERS) - 1)
#define DICT_WATCHER_AND_MODIFICATION_MASK ((1 << (DICT_MAX_WATCHERS + DICT_WATCHED_MUTATION_BITS)) - 1)
#define DICT_UNIQUE_ID_SHIFT (32)
#define DICT_UNIQUE_ID_MAX ((UINT64_C(1) << (64 - DICT_UNIQUE_ID_SHIFT)) - 1)
PyAPI_FUNC(void)
@ -307,8 +309,40 @@ _PyInlineValuesSize(PyTypeObject *tp)
int
_PyDict_DetachFromObject(PyDictObject *dict, PyObject *obj);
// Enables per-thread ref counting on this dict in the free threading build
extern void _PyDict_EnablePerThreadRefcounting(PyObject *op);
PyDictObject *_PyObject_MaterializeManagedDict_LockHeld(PyObject *);
// See `_Py_INCREF_TYPE()` in pycore_object.h
#ifndef Py_GIL_DISABLED
# define _Py_INCREF_DICT Py_INCREF
# define _Py_DECREF_DICT Py_DECREF
#else
static inline Py_ssize_t
_PyDict_UniqueId(PyDictObject *mp)
{
// Offset by one so that _ma_watcher_tag=0 represents an unassigned id
return (Py_ssize_t)(mp->_ma_watcher_tag >> DICT_UNIQUE_ID_SHIFT) - 1;
}
static inline void
_Py_INCREF_DICT(PyObject *op)
{
assert(PyDict_Check(op));
Py_ssize_t id = _PyDict_UniqueId((PyDictObject *)op);
_Py_THREAD_INCREF_OBJECT(op, id);
}
static inline void
_Py_DECREF_DICT(PyObject *op)
{
assert(PyDict_Check(op));
Py_ssize_t id = _PyDict_UniqueId((PyDictObject *)op);
_Py_THREAD_DECREF_OBJECT(op, id);
}
#endif
#ifdef __cplusplus
}
#endif

View File

@ -293,6 +293,20 @@ extern PyStatus _PyObject_InitState(PyInterpreterState *interp);
extern void _PyObject_FiniState(PyInterpreterState *interp);
extern bool _PyRefchain_IsTraced(PyInterpreterState *interp, PyObject *obj);
// Macros used for per-thread reference counting in the free threading build.
// They resolve to normal Py_INCREF/DECREF calls in the default build.
//
// The macros are used for only a few references that would otherwise cause
// scaling bottlenecks in the free threading build:
// - The reference from an object to `ob_type`.
// - The reference from a function to `func_code`.
// - The reference from a function to `func_globals` and `func_builtins`.
//
// It's safe, but not performant or necessary, to use these macros for other
// references to code, type, or dict objects. It's also safe to mix their
// usage with normal Py_INCREF/DECREF calls.
//
// See also Include/internal/pycore_dict.h for _Py_INCREF_DICT/_Py_DECREF_DICT.
#ifndef Py_GIL_DISABLED
# define _Py_INCREF_TYPE Py_INCREF
# define _Py_DECREF_TYPE Py_DECREF

View File

@ -48,6 +48,9 @@ struct _Py_unique_id_pool {
// Assigns the next id from the pool of ids.
extern Py_ssize_t _PyObject_AssignUniqueId(PyObject *obj);
// Releases the allocated id back to the pool.
extern void _PyObject_ReleaseUniqueId(Py_ssize_t unique_id);
// Releases the allocated id back to the pool.
extern void _PyObject_DisablePerThreadRefcounting(PyObject *obj);

View File

@ -1636,6 +1636,24 @@ _PyDict_MaybeUntrack(PyObject *op)
_PyObject_GC_UNTRACK(op);
}
void
_PyDict_EnablePerThreadRefcounting(PyObject *op)
{
assert(PyDict_Check(op));
#ifdef Py_GIL_DISABLED
Py_ssize_t id = _PyObject_AssignUniqueId(op);
if ((uint64_t)id >= (uint64_t)DICT_UNIQUE_ID_MAX) {
_PyObject_ReleaseUniqueId(id);
return;
}
PyDictObject *mp = (PyDictObject *)op;
assert((mp->_ma_watcher_tag >> DICT_UNIQUE_ID_SHIFT) == 0);
// Plus 1 so that _ma_watcher_tag=0 represents an unassigned id
mp->_ma_watcher_tag += ((uint64_t)id + 1) << DICT_UNIQUE_ID_SHIFT;
#endif
}
static inline int
is_unusable_slot(Py_ssize_t ix)
{

View File

@ -3,6 +3,7 @@
#include "Python.h"
#include "pycore_ceval.h" // _PyEval_BuiltinsFromGlobals()
#include "pycore_dict.h" // _Py_INCREF_DICT()
#include "pycore_long.h" // _PyLong_GetOne()
#include "pycore_modsupport.h" // _PyArg_NoKeywords()
#include "pycore_object.h" // _PyObject_GC_UNTRACK()
@ -112,8 +113,15 @@ _PyFunction_FromConstructor(PyFrameConstructor *constr)
Py_XDECREF(module);
return NULL;
}
op->func_globals = Py_NewRef(constr->fc_globals);
op->func_builtins = Py_NewRef(constr->fc_builtins);
_Py_INCREF_DICT(constr->fc_globals);
op->func_globals = constr->fc_globals;
if (PyDict_Check(constr->fc_builtins)) {
_Py_INCREF_DICT(constr->fc_builtins);
}
else {
Py_INCREF(constr->fc_builtins);
}
op->func_builtins = constr->fc_builtins;
op->func_name = Py_NewRef(constr->fc_name);
op->func_qualname = Py_NewRef(constr->fc_qualname);
_Py_INCREF_CODE((PyCodeObject *)constr->fc_code);
@ -143,7 +151,7 @@ PyFunction_NewWithQualName(PyObject *code, PyObject *globals, PyObject *qualname
{
assert(globals != NULL);
assert(PyDict_Check(globals));
Py_INCREF(globals);
_Py_INCREF_DICT(globals);
PyThreadState *tstate = _PyThreadState_GET();
@ -184,7 +192,12 @@ PyFunction_NewWithQualName(PyObject *code, PyObject *globals, PyObject *qualname
if (builtins == NULL) {
goto error;
}
Py_INCREF(builtins);
if (PyDict_Check(builtins)) {
_Py_INCREF_DICT(builtins);
}
else {
Py_INCREF(builtins);
}
PyFunctionObject *op = PyObject_GC_New(PyFunctionObject, &PyFunction_Type);
if (op == NULL) {
@ -1057,8 +1070,21 @@ func_clear(PyObject *self)
{
PyFunctionObject *op = _PyFunction_CAST(self);
func_clear_version(_PyInterpreterState_GET(), op);
Py_CLEAR(op->func_globals);
Py_CLEAR(op->func_builtins);
PyObject *globals = op->func_globals;
op->func_globals = NULL;
if (globals != NULL) {
_Py_DECREF_DICT(globals);
}
PyObject *builtins = op->func_builtins;
op->func_builtins = NULL;
if (builtins != NULL) {
if (PyDict_Check(builtins)) {
_Py_DECREF_DICT(builtins);
}
else {
Py_DECREF(builtins);
}
}
Py_CLEAR(op->func_module);
Py_CLEAR(op->func_defaults);
Py_CLEAR(op->func_kwdefaults);

View File

@ -3,6 +3,7 @@
#include "Python.h"
#include "pycore_call.h" // _PyObject_CallNoArgs()
#include "pycore_dict.h" // _PyDict_EnablePerThreadRefcounting()
#include "pycore_fileutils.h" // _Py_wgetcwd
#include "pycore_interp.h" // PyInterpreterState.importlib
#include "pycore_long.h" // _PyLong_GetOne()
@ -105,7 +106,7 @@ new_module_notrack(PyTypeObject *mt)
static void
track_module(PyModuleObject *m)
{
_PyObject_SetDeferredRefcount(m->md_dict);
_PyDict_EnablePerThreadRefcounting(m->md_dict);
PyObject_GC_Track(m->md_dict);
_PyObject_SetDeferredRefcount((PyObject *)m);

View File

@ -1,5 +1,6 @@
#include "Python.h"
#include "pycore_dict.h" // _PyDict_UniqueId()
#include "pycore_lock.h" // PyMutex_LockFlags()
#include "pycore_pystate.h" // _PyThreadState_GET()
#include "pycore_object.h" // _Py_IncRefTotal
@ -98,8 +99,8 @@ _PyObject_AssignUniqueId(PyObject *obj)
return unique_id;
}
static void
release_unique_id(Py_ssize_t unique_id)
void
_PyObject_ReleaseUniqueId(Py_ssize_t unique_id)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
struct _Py_unique_id_pool *pool = &interp->unique_ids;
@ -128,6 +129,11 @@ clear_unique_id(PyObject *obj)
id = co->_co_unique_id;
co->_co_unique_id = -1;
}
else if (PyDict_Check(obj)) {
PyDictObject *mp = (PyDictObject *)obj;
id = _PyDict_UniqueId(mp);
mp->_ma_watcher_tag &= ~(UINT64_MAX << DICT_UNIQUE_ID_SHIFT);
}
return id;
}
@ -136,7 +142,7 @@ _PyObject_DisablePerThreadRefcounting(PyObject *obj)
{
Py_ssize_t id = clear_unique_id(obj);
if (id >= 0) {
release_unique_id(id);
_PyObject_ReleaseUniqueId(id);
}
}