mirror of https://github.com/python/cpython
gh-122417: Implement per-thread heap type refcounts (#122418)
The free-threaded build partially stores heap type reference counts in distributed manner in per-thread arrays. This avoids reference count contention when creating or destroying instances. Co-authored-by: Ken Jin <kenjin@python.org>
This commit is contained in:
parent
1429651a06
commit
dc09301067
|
@ -270,6 +270,9 @@ typedef struct _heaptypeobject {
|
|||
PyObject *ht_module;
|
||||
char *_ht_tpname; // Storage for "tp_name"; see PyType_FromModuleAndSpec
|
||||
struct _specialization_cache _spec_cache; // For use by the specializer.
|
||||
#ifdef Py_GIL_DISABLED
|
||||
Py_ssize_t unique_id; // ID used for thread-local refcounting
|
||||
#endif
|
||||
/* here are optional user slots, followed by the members. */
|
||||
} PyHeapTypeObject;
|
||||
|
||||
|
|
|
@ -381,10 +381,6 @@ extern void _PyGC_ClearAllFreeLists(PyInterpreterState *interp);
|
|||
extern void _Py_ScheduleGC(PyThreadState *tstate);
|
||||
extern void _Py_RunGC(PyThreadState *tstate);
|
||||
|
||||
#ifdef Py_GIL_DISABLED
|
||||
// gh-117783: Immortalize objects that use deferred reference counting
|
||||
extern void _PyGC_ImmortalizeDeferredObjects(PyInterpreterState *interp);
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -35,6 +35,7 @@ extern "C" {
|
|||
#include "pycore_qsbr.h" // struct _qsbr_state
|
||||
#include "pycore_tstate.h" // _PyThreadStateImpl
|
||||
#include "pycore_tuple.h" // struct _Py_tuple_state
|
||||
#include "pycore_typeid.h" // struct _Py_type_id_pool
|
||||
#include "pycore_typeobject.h" // struct types_state
|
||||
#include "pycore_unicodeobject.h" // struct _Py_unicode_state
|
||||
#include "pycore_warnings.h" // struct _warnings_runtime_state
|
||||
|
@ -220,6 +221,7 @@ struct _is {
|
|||
#if defined(Py_GIL_DISABLED)
|
||||
struct _mimalloc_interp_state mimalloc;
|
||||
struct _brc_state brc; // biased reference counting state
|
||||
struct _Py_type_id_pool type_ids;
|
||||
PyMutex weakref_locks[NUM_WEAKREF_LIST_LOCKS];
|
||||
#endif
|
||||
|
||||
|
|
|
@ -14,10 +14,19 @@ extern "C" {
|
|||
#include "pycore_interp.h" // PyInterpreterState.gc
|
||||
#include "pycore_pyatomic_ft_wrappers.h" // FT_ATOMIC_STORE_PTR_RELAXED
|
||||
#include "pycore_pystate.h" // _PyInterpreterState_GET()
|
||||
#include "pycore_typeid.h" // _PyType_IncrefSlow
|
||||
|
||||
|
||||
#define _Py_IMMORTAL_REFCNT_LOOSE ((_Py_IMMORTAL_REFCNT >> 1) + 1)
|
||||
|
||||
// This value is added to `ob_ref_shared` for objects that use deferred
|
||||
// reference counting so that they are not immediately deallocated when the
|
||||
// non-deferred reference count drops to zero.
|
||||
//
|
||||
// The value is half the maximum shared refcount because the low two bits of
|
||||
// `ob_ref_shared` are used for flags.
|
||||
#define _Py_REF_DEFERRED (PY_SSIZE_T_MAX / 8)
|
||||
|
||||
// gh-121528, gh-118997: Similar to _Py_IsImmortal() but be more loose when
|
||||
// comparing the reference count to stay compatible with C extensions built
|
||||
// with the stable ABI 3.11 or older. Such extensions implement INCREF/DECREF
|
||||
|
@ -280,6 +289,67 @@ extern PyStatus _PyObject_InitState(PyInterpreterState *interp);
|
|||
extern void _PyObject_FiniState(PyInterpreterState *interp);
|
||||
extern bool _PyRefchain_IsTraced(PyInterpreterState *interp, PyObject *obj);
|
||||
|
||||
#ifndef Py_GIL_DISABLED
|
||||
# define _Py_INCREF_TYPE Py_INCREF
|
||||
# define _Py_DECREF_TYPE Py_DECREF
|
||||
#else
|
||||
static inline void
|
||||
_Py_INCREF_TYPE(PyTypeObject *type)
|
||||
{
|
||||
if (!_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
|
||||
assert(_Py_IsImmortal(type));
|
||||
return;
|
||||
}
|
||||
|
||||
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
|
||||
PyHeapTypeObject *ht = (PyHeapTypeObject *)type;
|
||||
|
||||
// Unsigned comparison so that `unique_id=-1`, which indicates that
|
||||
// per-thread refcounting has been disabled on this type, is handled by
|
||||
// the "else".
|
||||
if ((size_t)ht->unique_id < (size_t)tstate->types.size) {
|
||||
# ifdef Py_REF_DEBUG
|
||||
_Py_INCREF_IncRefTotal();
|
||||
# endif
|
||||
_Py_INCREF_STAT_INC();
|
||||
tstate->types.refcounts[ht->unique_id]++;
|
||||
}
|
||||
else {
|
||||
// The slow path resizes the thread-local refcount array if necessary.
|
||||
// It handles the unique_id=-1 case to keep the inlinable function smaller.
|
||||
_PyType_IncrefSlow(ht);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
_Py_DECREF_TYPE(PyTypeObject *type)
|
||||
{
|
||||
if (!_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
|
||||
assert(_Py_IsImmortal(type));
|
||||
return;
|
||||
}
|
||||
|
||||
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
|
||||
PyHeapTypeObject *ht = (PyHeapTypeObject *)type;
|
||||
|
||||
// Unsigned comparison so that `unique_id=-1`, which indicates that
|
||||
// per-thread refcounting has been disabled on this type, is handled by
|
||||
// the "else".
|
||||
if ((size_t)ht->unique_id < (size_t)tstate->types.size) {
|
||||
# ifdef Py_REF_DEBUG
|
||||
_Py_DECREF_DecRefTotal();
|
||||
# endif
|
||||
_Py_DECREF_STAT_INC();
|
||||
tstate->types.refcounts[ht->unique_id]--;
|
||||
}
|
||||
else {
|
||||
// Directly decref the type if the type id is not assigned or if
|
||||
// per-thread refcounting has been disabled on this type.
|
||||
Py_DECREF(type);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Inline functions trading binary compatibility for speed:
|
||||
_PyObject_Init() is the fast version of PyObject_Init(), and
|
||||
_PyObject_InitVar() is the fast version of PyObject_InitVar().
|
||||
|
@ -291,7 +361,7 @@ _PyObject_Init(PyObject *op, PyTypeObject *typeobj)
|
|||
assert(op != NULL);
|
||||
Py_SET_TYPE(op, typeobj);
|
||||
assert(_PyType_HasFeature(typeobj, Py_TPFLAGS_HEAPTYPE) || _Py_IsImmortalLoose(typeobj));
|
||||
Py_INCREF(typeobj);
|
||||
_Py_INCREF_TYPE(typeobj);
|
||||
_Py_NewReference(op);
|
||||
}
|
||||
|
||||
|
|
|
@ -31,6 +31,16 @@ typedef struct _PyThreadStateImpl {
|
|||
struct _mimalloc_thread_state mimalloc;
|
||||
struct _Py_freelists freelists;
|
||||
struct _brc_thread_state brc;
|
||||
struct {
|
||||
// The thread-local refcounts for heap type objects
|
||||
Py_ssize_t *refcounts;
|
||||
|
||||
// Size of the refcounts array.
|
||||
Py_ssize_t size;
|
||||
|
||||
// If set, don't use thread-local refcounts
|
||||
int is_finalized;
|
||||
} types;
|
||||
#endif
|
||||
|
||||
#if defined(Py_REF_DEBUG) && defined(Py_GIL_DISABLED)
|
||||
|
|
|
@ -0,0 +1,75 @@
|
|||
#ifndef Py_INTERNAL_TYPEID_H
|
||||
#define Py_INTERNAL_TYPEID_H
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifndef Py_BUILD_CORE
|
||||
# error "this header requires Py_BUILD_CORE define"
|
||||
#endif
|
||||
|
||||
#ifdef Py_GIL_DISABLED
|
||||
|
||||
// This contains code for allocating unique ids to heap type objects
|
||||
// and re-using those ids when the type is deallocated.
|
||||
//
|
||||
// The type ids are used to implement per-thread reference counts of
|
||||
// heap type objects to avoid contention on the reference count fields
|
||||
// of heap type objects. Static type objects are immortal, so contention
|
||||
// is not an issue for those types.
|
||||
//
|
||||
// Type id of -1 is used to indicate a type doesn't use thread-local
|
||||
// refcounting. This value is used when a type object is finalized by the GC
|
||||
// and during interpreter shutdown to allow the type object to be
|
||||
// deallocated promptly when the object's refcount reaches zero.
|
||||
//
|
||||
// Each entry implicitly represents a type id based on it's offset in the
|
||||
// table. Non-allocated entries form a free-list via the 'next' pointer.
|
||||
// Allocated entries store the corresponding PyTypeObject.
|
||||
typedef union _Py_type_id_entry {
|
||||
// Points to the next free type id, when part of the freelist
|
||||
union _Py_type_id_entry *next;
|
||||
|
||||
// Stores the type object when the id is assigned
|
||||
PyHeapTypeObject *type;
|
||||
} _Py_type_id_entry;
|
||||
|
||||
struct _Py_type_id_pool {
|
||||
PyMutex mutex;
|
||||
|
||||
// combined table of types with allocated type ids and unallocated
|
||||
// type ids.
|
||||
_Py_type_id_entry *table;
|
||||
|
||||
// Next entry to allocate inside 'table' or NULL
|
||||
_Py_type_id_entry *freelist;
|
||||
|
||||
// size of 'table'
|
||||
Py_ssize_t size;
|
||||
};
|
||||
|
||||
// Assigns the next id from the pool of type ids.
|
||||
extern void _PyType_AssignId(PyHeapTypeObject *type);
|
||||
|
||||
// Releases the allocated type id back to the pool.
|
||||
extern void _PyType_ReleaseId(PyHeapTypeObject *type);
|
||||
|
||||
// Merges the thread-local reference counts into the corresponding types.
|
||||
extern void _PyType_MergeThreadLocalRefcounts(_PyThreadStateImpl *tstate);
|
||||
|
||||
// Like _PyType_MergeThreadLocalRefcounts, but also frees the thread-local
|
||||
// array of refcounts.
|
||||
extern void _PyType_FinalizeThreadLocalRefcounts(_PyThreadStateImpl *tstate);
|
||||
|
||||
// Frees the interpreter's pool of type ids.
|
||||
extern void _PyType_FinalizeIdPool(PyInterpreterState *interp);
|
||||
|
||||
// Increfs the type, resizing the thread-local refcount array if necessary.
|
||||
PyAPI_FUNC(void) _PyType_IncrefSlow(PyHeapTypeObject *type);
|
||||
|
||||
#endif /* Py_GIL_DISABLED */
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif /* !Py_INTERNAL_TYPEID_H */
|
|
@ -1710,6 +1710,7 @@ class SizeofTest(unittest.TestCase):
|
|||
fmt = 'P2nPI13Pl4Pn9Pn12PIPc'
|
||||
s = vsize(fmt)
|
||||
check(int, s)
|
||||
typeid = 'n' if support.Py_GIL_DISABLED else ''
|
||||
# class
|
||||
s = vsize(fmt + # PyTypeObject
|
||||
'4P' # PyAsyncMethods
|
||||
|
@ -1718,7 +1719,8 @@ class SizeofTest(unittest.TestCase):
|
|||
'10P' # PySequenceMethods
|
||||
'2P' # PyBufferProcs
|
||||
'6P'
|
||||
'1PIP' # Specializer cache
|
||||
'1PIP' # Specializer cache
|
||||
+ typeid # heap type id (free-threaded only)
|
||||
)
|
||||
class newstyleclass(object): pass
|
||||
# Separate block for PyDictKeysObject with 8 keys and 5 entries
|
||||
|
|
|
@ -483,6 +483,7 @@ PYTHON_OBJS= \
|
|||
Python/thread.o \
|
||||
Python/traceback.o \
|
||||
Python/tracemalloc.o \
|
||||
Python/typeid.o \
|
||||
Python/getopt.o \
|
||||
Python/pystrcmp.o \
|
||||
Python/pystrtod.o \
|
||||
|
@ -1257,6 +1258,7 @@ PYTHON_HEADERS= \
|
|||
$(srcdir)/Include/internal/pycore_tracemalloc.h \
|
||||
$(srcdir)/Include/internal/pycore_tstate.h \
|
||||
$(srcdir)/Include/internal/pycore_tuple.h \
|
||||
$(srcdir)/Include/internal/pycore_typeid.h \
|
||||
$(srcdir)/Include/internal/pycore_typeobject.h \
|
||||
$(srcdir)/Include/internal/pycore_typevarobject.h \
|
||||
$(srcdir)/Include/internal/pycore_ucnhash.h \
|
||||
|
|
|
@ -0,0 +1,4 @@
|
|||
In the free-threaded build, the reference counts for heap type objects are now
|
||||
partially stored in a distributed manner in per-thread arrays. This reduces
|
||||
contention on the heap type's reference count fields when creating or
|
||||
destroying instances of the same type from multiple threads concurrently.
|
|
@ -2477,15 +2477,7 @@ _PyObject_SetDeferredRefcount(PyObject *op)
|
|||
assert(_Py_IsOwnedByCurrentThread(op));
|
||||
assert(op->ob_ref_shared == 0);
|
||||
_PyObject_SET_GC_BITS(op, _PyGC_BITS_DEFERRED);
|
||||
PyInterpreterState *interp = _PyInterpreterState_GET();
|
||||
if (_Py_atomic_load_int_relaxed(&interp->gc.immortalize) == 1) {
|
||||
// gh-117696: immortalize objects instead of using deferred reference
|
||||
// counting for now.
|
||||
_Py_SetImmortal(op);
|
||||
return;
|
||||
}
|
||||
op->ob_ref_local += 1;
|
||||
op->ob_ref_shared = _Py_REF_QUEUED;
|
||||
op->ob_ref_shared = _Py_REF_SHARED(_Py_REF_DEFERRED, 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
@ -2452,7 +2452,7 @@ subtype_dealloc(PyObject *self)
|
|||
reference counting. Only decref if the base type is not already a heap
|
||||
allocated type. Otherwise, basedealloc should have decref'd it already */
|
||||
if (type_needs_decref) {
|
||||
Py_DECREF(type);
|
||||
_Py_DECREF_TYPE(type);
|
||||
}
|
||||
|
||||
/* Done */
|
||||
|
@ -2562,7 +2562,7 @@ subtype_dealloc(PyObject *self)
|
|||
reference counting. Only decref if the base type is not already a heap
|
||||
allocated type. Otherwise, basedealloc should have decref'd it already */
|
||||
if (type_needs_decref) {
|
||||
Py_DECREF(type);
|
||||
_Py_DECREF_TYPE(type);
|
||||
}
|
||||
|
||||
endlabel:
|
||||
|
@ -3913,7 +3913,9 @@ type_new_alloc(type_new_ctx *ctx)
|
|||
et->ht_module = NULL;
|
||||
et->_ht_tpname = NULL;
|
||||
|
||||
_PyObject_SetDeferredRefcount((PyObject *)et);
|
||||
#ifdef Py_GIL_DISABLED
|
||||
_PyType_AssignId(et);
|
||||
#endif
|
||||
|
||||
return type;
|
||||
}
|
||||
|
@ -4965,6 +4967,11 @@ _PyType_FromMetaclass_impl(
|
|||
type->tp_weaklistoffset = weaklistoffset;
|
||||
type->tp_dictoffset = dictoffset;
|
||||
|
||||
#ifdef Py_GIL_DISABLED
|
||||
// Assign a type id to enable thread-local refcounting
|
||||
_PyType_AssignId(res);
|
||||
#endif
|
||||
|
||||
/* Ready the type (which includes inheritance).
|
||||
*
|
||||
* After this call we should generally only touch up what's
|
||||
|
@ -5914,6 +5921,9 @@ type_dealloc(PyObject *self)
|
|||
}
|
||||
Py_XDECREF(et->ht_module);
|
||||
PyMem_Free(et->_ht_tpname);
|
||||
#ifdef Py_GIL_DISABLED
|
||||
_PyType_ReleaseId(et);
|
||||
#endif
|
||||
Py_TYPE(type)->tp_free((PyObject *)type);
|
||||
}
|
||||
|
||||
|
|
|
@ -267,6 +267,7 @@
|
|||
<ClCompile Include="..\Python\thread.c" />
|
||||
<ClCompile Include="..\Python\traceback.c" />
|
||||
<ClCompile Include="..\Python\tracemalloc.c" />
|
||||
<ClCompile Include="..\Python\typeid.c" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="..\PC\pyconfig.h.in" />
|
||||
|
|
|
@ -464,6 +464,9 @@
|
|||
<ClCompile Include="..\Python\tracemalloc.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\Python\typeid.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\Objects\tupleobject.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
|
|
|
@ -304,6 +304,7 @@
|
|||
<ClInclude Include="..\Include\internal\pycore_tracemalloc.h" />
|
||||
<ClInclude Include="..\Include\internal\pycore_tstate.h" />
|
||||
<ClInclude Include="..\Include\internal\pycore_tuple.h" />
|
||||
<ClInclude Include="..\Include\internal\pycore_typeid.h" />
|
||||
<ClInclude Include="..\Include\internal\pycore_typeobject.h" />
|
||||
<ClInclude Include="..\Include\internal\pycore_typevarobject.h" />
|
||||
<ClInclude Include="..\Include\internal\pycore_ucnhash.h" />
|
||||
|
@ -643,6 +644,7 @@
|
|||
<ClCompile Include="..\Python\thread.c" />
|
||||
<ClCompile Include="..\Python\traceback.c" />
|
||||
<ClCompile Include="..\Python\tracemalloc.c" />
|
||||
<ClCompile Include="..\Python\typeid.c" />
|
||||
</ItemGroup>
|
||||
<ItemGroup Condition="$(IncludeExternals)">
|
||||
<ClCompile Include="..\Modules\zlibmodule.c" />
|
||||
|
|
|
@ -831,6 +831,9 @@
|
|||
<ClInclude Include="..\Include\internal\pycore_tuple.h">
|
||||
<Filter>Include\internal</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\Include\internal\pycore_typeid.h">
|
||||
<Filter>Include\internal</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\Include\internal\pycore_typeobject.h">
|
||||
<Filter>Include\internal</Filter>
|
||||
</ClInclude>
|
||||
|
@ -1493,6 +1496,9 @@
|
|||
<ClCompile Include="..\Python\tracemalloc.c">
|
||||
<Filter>Python</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\Python\typeid.c">
|
||||
<Filter>Python</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\Python\bootstrap_hash.c">
|
||||
<Filter>Python</Filter>
|
||||
</ClCompile>
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#include "pycore_tstate.h" // _PyThreadStateImpl
|
||||
#include "pycore_weakref.h" // _PyWeakref_ClearRef()
|
||||
#include "pydtrace.h"
|
||||
#include "pycore_typeid.h" // _PyType_MergeThreadLocalRefcounts
|
||||
|
||||
#ifdef Py_GIL_DISABLED
|
||||
|
||||
|
@ -164,7 +165,15 @@ disable_deferred_refcounting(PyObject *op)
|
|||
{
|
||||
if (_PyObject_HasDeferredRefcount(op)) {
|
||||
op->ob_gc_bits &= ~_PyGC_BITS_DEFERRED;
|
||||
op->ob_ref_shared -= (1 << _Py_REF_SHARED_SHIFT);
|
||||
op->ob_ref_shared -= _Py_REF_SHARED(_Py_REF_DEFERRED, 0);
|
||||
|
||||
if (PyType_Check(op)) {
|
||||
// Disable thread-local refcounting for heap types
|
||||
PyTypeObject *type = (PyTypeObject *)op;
|
||||
if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
|
||||
_PyType_ReleaseId((PyHeapTypeObject *)op);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -328,16 +337,6 @@ merge_queued_objects(_PyThreadStateImpl *tstate, struct collection_state *state)
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
merge_all_queued_objects(PyInterpreterState *interp, struct collection_state *state)
|
||||
{
|
||||
HEAD_LOCK(&_PyRuntime);
|
||||
for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) {
|
||||
merge_queued_objects((_PyThreadStateImpl *)p, state);
|
||||
}
|
||||
HEAD_UNLOCK(&_PyRuntime);
|
||||
}
|
||||
|
||||
static void
|
||||
process_delayed_frees(PyInterpreterState *interp)
|
||||
{
|
||||
|
@ -389,7 +388,9 @@ update_refs(const mi_heap_t *heap, const mi_heap_area_t *area,
|
|||
}
|
||||
|
||||
Py_ssize_t refcount = Py_REFCNT(op);
|
||||
refcount -= _PyObject_HasDeferredRefcount(op);
|
||||
if (_PyObject_HasDeferredRefcount(op)) {
|
||||
refcount -= _Py_REF_DEFERRED;
|
||||
}
|
||||
_PyObject_ASSERT(op, refcount >= 0);
|
||||
|
||||
if (refcount > 0 && !_PyObject_HasDeferredRefcount(op)) {
|
||||
|
@ -754,10 +755,6 @@ _PyGC_Init(PyInterpreterState *interp)
|
|||
{
|
||||
GCState *gcstate = &interp->gc;
|
||||
|
||||
// gh-117783: immortalize objects that would use deferred refcounting
|
||||
// once the first non-main thread is created (but not in subinterpreters).
|
||||
gcstate->immortalize = _Py_IsMainInterpreter(interp) ? 0 : -1;
|
||||
|
||||
gcstate->garbage = PyList_New(0);
|
||||
if (gcstate->garbage == NULL) {
|
||||
return _PyStatus_NO_MEMORY();
|
||||
|
@ -1105,8 +1102,18 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state,
|
|||
state->gcstate->old[i-1].count = 0;
|
||||
}
|
||||
|
||||
// merge refcounts for all queued objects
|
||||
merge_all_queued_objects(interp, state);
|
||||
HEAD_LOCK(&_PyRuntime);
|
||||
for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) {
|
||||
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)p;
|
||||
|
||||
// merge per-thread refcount for types into the type's actual refcount
|
||||
_PyType_MergeThreadLocalRefcounts(tstate);
|
||||
|
||||
// merge refcounts for all queued objects
|
||||
merge_queued_objects(tstate, state);
|
||||
}
|
||||
HEAD_UNLOCK(&_PyRuntime);
|
||||
|
||||
process_delayed_frees(interp);
|
||||
|
||||
// Find unreachable objects
|
||||
|
@ -1835,32 +1842,6 @@ custom_visitor_wrapper(const mi_heap_t *heap, const mi_heap_area_t *area,
|
|||
return true;
|
||||
}
|
||||
|
||||
// gh-117783: Immortalize objects that use deferred reference counting to
|
||||
// temporarily work around scaling bottlenecks.
|
||||
static bool
|
||||
immortalize_visitor(const mi_heap_t *heap, const mi_heap_area_t *area,
|
||||
void *block, size_t block_size, void *args)
|
||||
{
|
||||
PyObject *op = op_from_block(block, args, false);
|
||||
if (op != NULL && _PyObject_HasDeferredRefcount(op)) {
|
||||
_Py_SetImmortal(op);
|
||||
op->ob_gc_bits &= ~_PyGC_BITS_DEFERRED;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
_PyGC_ImmortalizeDeferredObjects(PyInterpreterState *interp)
|
||||
{
|
||||
struct visitor_args args;
|
||||
_PyEval_StopTheWorld(interp);
|
||||
if (interp->gc.immortalize == 0) {
|
||||
gc_visit_heaps(interp, &immortalize_visitor, &args);
|
||||
interp->gc.immortalize = 1;
|
||||
}
|
||||
_PyEval_StartTheWorld(interp);
|
||||
}
|
||||
|
||||
void
|
||||
PyUnstable_GC_VisitObjects(gcvisitobjects_t callback, void *arg)
|
||||
{
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include "pycore_runtime_init.h" // _PyRuntimeState_INIT
|
||||
#include "pycore_sysmodule.h" // _PySys_Audit()
|
||||
#include "pycore_obmalloc.h" // _PyMem_obmalloc_state_on_heap()
|
||||
#include "pycore_typeid.h" // _PyType_FinalizeIdPool
|
||||
|
||||
/* --------------------------------------------------------------------------
|
||||
CAUTION
|
||||
|
@ -1584,13 +1585,6 @@ new_threadstate(PyInterpreterState *interp, int whence)
|
|||
PyMem_RawFree(new_tstate);
|
||||
}
|
||||
else {
|
||||
#ifdef Py_GIL_DISABLED
|
||||
if (_Py_atomic_load_int(&interp->gc.immortalize) == 0) {
|
||||
// Immortalize objects marked as using deferred reference counting
|
||||
// the first time a non-main thread is created.
|
||||
_PyGC_ImmortalizeDeferredObjects(interp);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef Py_GIL_DISABLED
|
||||
|
@ -1741,6 +1735,10 @@ PyThreadState_Clear(PyThreadState *tstate)
|
|||
struct _Py_freelists *freelists = _Py_freelists_GET();
|
||||
_PyObject_ClearFreeLists(freelists, 1);
|
||||
|
||||
// Merge our thread-local refcounts into the type's own refcount and
|
||||
// free our local refcount array.
|
||||
_PyType_FinalizeThreadLocalRefcounts((_PyThreadStateImpl *)tstate);
|
||||
|
||||
// Remove ourself from the biased reference counting table of threads.
|
||||
_Py_brc_remove_thread(tstate);
|
||||
#endif
|
||||
|
@ -1799,6 +1797,7 @@ tstate_delete_common(PyThreadState *tstate, int release_gil)
|
|||
_PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate;
|
||||
tstate->interp->object_state.reftotal += tstate_impl->reftotal;
|
||||
tstate_impl->reftotal = 0;
|
||||
assert(tstate_impl->types.refcounts == NULL);
|
||||
#endif
|
||||
|
||||
HEAD_UNLOCK(runtime);
|
||||
|
|
|
@ -0,0 +1,200 @@
|
|||
#include "Python.h"
|
||||
|
||||
#include "pycore_lock.h" // PyMutex_LockFlags()
|
||||
#include "pycore_pystate.h" // _PyThreadState_GET()
|
||||
#include "pycore_object.h" // _Py_IncRefTotal
|
||||
#include "pycore_typeid.h"
|
||||
|
||||
// This contains code for allocating unique ids to heap type objects
|
||||
// and re-using those ids when the type is deallocated.
|
||||
//
|
||||
// See Include/internal/pycore_typeid.h for more details.
|
||||
|
||||
#ifdef Py_GIL_DISABLED
|
||||
|
||||
#define POOL_MIN_SIZE 8
|
||||
|
||||
#define LOCK_POOL(pool) PyMutex_LockFlags(&pool->mutex, _Py_LOCK_DONT_DETACH)
|
||||
#define UNLOCK_POOL(pool) PyMutex_Unlock(&pool->mutex)
|
||||
|
||||
static int
|
||||
resize_interp_type_id_pool(struct _Py_type_id_pool *pool)
|
||||
{
|
||||
if ((size_t)pool->size > PY_SSIZE_T_MAX / (2 * sizeof(*pool->table))) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
Py_ssize_t new_size = pool->size * 2;
|
||||
if (new_size < POOL_MIN_SIZE) {
|
||||
new_size = POOL_MIN_SIZE;
|
||||
}
|
||||
|
||||
_Py_type_id_entry *table = PyMem_Realloc(pool->table,
|
||||
new_size * sizeof(*pool->table));
|
||||
if (table == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
Py_ssize_t start = pool->size;
|
||||
for (Py_ssize_t i = start; i < new_size - 1; i++) {
|
||||
table[i].next = &table[i + 1];
|
||||
}
|
||||
table[new_size - 1].next = NULL;
|
||||
|
||||
pool->table = table;
|
||||
pool->freelist = &table[start];
|
||||
_Py_atomic_store_ssize(&pool->size, new_size);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
resize_local_refcounts(_PyThreadStateImpl *tstate)
|
||||
{
|
||||
if (tstate->types.is_finalized) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
struct _Py_type_id_pool *pool = &tstate->base.interp->type_ids;
|
||||
Py_ssize_t size = _Py_atomic_load_ssize(&pool->size);
|
||||
|
||||
Py_ssize_t *refcnts = PyMem_Realloc(tstate->types.refcounts,
|
||||
size * sizeof(Py_ssize_t));
|
||||
if (refcnts == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
Py_ssize_t old_size = tstate->types.size;
|
||||
if (old_size < size) {
|
||||
memset(refcnts + old_size, 0, (size - old_size) * sizeof(Py_ssize_t));
|
||||
}
|
||||
|
||||
tstate->types.refcounts = refcnts;
|
||||
tstate->types.size = size;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
_PyType_AssignId(PyHeapTypeObject *type)
|
||||
{
|
||||
PyInterpreterState *interp = _PyInterpreterState_GET();
|
||||
struct _Py_type_id_pool *pool = &interp->type_ids;
|
||||
|
||||
LOCK_POOL(pool);
|
||||
if (pool->freelist == NULL) {
|
||||
if (resize_interp_type_id_pool(pool) < 0) {
|
||||
type->unique_id = -1;
|
||||
UNLOCK_POOL(pool);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
_Py_type_id_entry *entry = pool->freelist;
|
||||
pool->freelist = entry->next;
|
||||
entry->type = type;
|
||||
_PyObject_SetDeferredRefcount((PyObject *)type);
|
||||
type->unique_id = (entry - pool->table);
|
||||
UNLOCK_POOL(pool);
|
||||
}
|
||||
|
||||
void
|
||||
_PyType_ReleaseId(PyHeapTypeObject *type)
|
||||
{
|
||||
PyInterpreterState *interp = _PyInterpreterState_GET();
|
||||
struct _Py_type_id_pool *pool = &interp->type_ids;
|
||||
|
||||
if (type->unique_id < 0) {
|
||||
// The type doesn't have an id assigned.
|
||||
return;
|
||||
}
|
||||
|
||||
LOCK_POOL(pool);
|
||||
_Py_type_id_entry *entry = &pool->table[type->unique_id];
|
||||
assert(entry->type == type);
|
||||
entry->next = pool->freelist;
|
||||
pool->freelist = entry;
|
||||
|
||||
type->unique_id = -1;
|
||||
UNLOCK_POOL(pool);
|
||||
}
|
||||
|
||||
void
|
||||
_PyType_IncrefSlow(PyHeapTypeObject *type)
|
||||
{
|
||||
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
|
||||
if (type->unique_id < 0 || resize_local_refcounts(tstate) < 0) {
|
||||
// just incref the type directly.
|
||||
Py_INCREF(type);
|
||||
return;
|
||||
}
|
||||
|
||||
assert(type->unique_id < tstate->types.size);
|
||||
tstate->types.refcounts[type->unique_id]++;
|
||||
#ifdef Py_REF_DEBUG
|
||||
_Py_IncRefTotal((PyThreadState *)tstate);
|
||||
#endif
|
||||
_Py_INCREF_STAT_INC();
|
||||
}
|
||||
|
||||
void
|
||||
_PyType_MergeThreadLocalRefcounts(_PyThreadStateImpl *tstate)
|
||||
{
|
||||
if (tstate->types.refcounts == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
struct _Py_type_id_pool *pool = &tstate->base.interp->type_ids;
|
||||
|
||||
LOCK_POOL(pool);
|
||||
for (Py_ssize_t i = 0, n = tstate->types.size; i < n; i++) {
|
||||
Py_ssize_t refcnt = tstate->types.refcounts[i];
|
||||
if (refcnt != 0) {
|
||||
PyObject *type = (PyObject *)pool->table[i].type;
|
||||
assert(PyType_Check(type));
|
||||
|
||||
_Py_atomic_add_ssize(&type->ob_ref_shared,
|
||||
refcnt << _Py_REF_SHARED_SHIFT);
|
||||
tstate->types.refcounts[i] = 0;
|
||||
}
|
||||
}
|
||||
UNLOCK_POOL(pool);
|
||||
}
|
||||
|
||||
void
|
||||
_PyType_FinalizeThreadLocalRefcounts(_PyThreadStateImpl *tstate)
|
||||
{
|
||||
_PyType_MergeThreadLocalRefcounts(tstate);
|
||||
|
||||
PyMem_Free(tstate->types.refcounts);
|
||||
tstate->types.refcounts = NULL;
|
||||
tstate->types.size = 0;
|
||||
tstate->types.is_finalized = 1;
|
||||
}
|
||||
|
||||
void
|
||||
_PyType_FinalizeIdPool(PyInterpreterState *interp)
|
||||
{
|
||||
struct _Py_type_id_pool *pool = &interp->type_ids;
|
||||
|
||||
// First, set the free-list to NULL values
|
||||
while (pool->freelist) {
|
||||
_Py_type_id_entry *next = pool->freelist->next;
|
||||
pool->freelist->type = NULL;
|
||||
pool->freelist = next;
|
||||
}
|
||||
|
||||
// Now everything non-NULL is a type. Set the type's id to -1 in case it
|
||||
// outlives the interpreter.
|
||||
for (Py_ssize_t i = 0; i < pool->size; i++) {
|
||||
PyHeapTypeObject *ht = pool->table[i].type;
|
||||
if (ht) {
|
||||
ht->unique_id = -1;
|
||||
pool->table[i].type = NULL;
|
||||
}
|
||||
}
|
||||
PyMem_Free(pool->table);
|
||||
pool->table = NULL;
|
||||
pool->freelist = NULL;
|
||||
pool->size = 0;
|
||||
}
|
||||
|
||||
#endif /* Py_GIL_DISABLED */
|
Loading…
Reference in New Issue