mirror of https://github.com/python/cpython
gh-112529: Make the GC scheduling thread-safe (#114880)
The GC keeps track of the number of allocations (less deallocations) since the last GC. This buffers the count in thread-local state and uses atomic operations to modify the per-interpreter count. The thread-local buffering avoids contention on shared state. A consequence is that the GC scheduling is not as precise, so "test_sneaky_frame_object" is skipped because it requires that the GC be run exactly after allocating a frame object.
This commit is contained in:
parent
f92857a930
commit
b24c9161a6
|
@ -260,6 +260,13 @@ struct _gc_runtime_state {
|
||||||
Py_ssize_t long_lived_pending;
|
Py_ssize_t long_lived_pending;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#ifdef Py_GIL_DISABLED
|
||||||
|
struct _gc_thread_state {
|
||||||
|
/* Thread-local allocation count. */
|
||||||
|
Py_ssize_t alloc_count;
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
extern void _PyGC_InitState(struct _gc_runtime_state *);
|
extern void _PyGC_InitState(struct _gc_runtime_state *);
|
||||||
|
|
||||||
|
|
|
@ -28,6 +28,7 @@ typedef struct _PyThreadStateImpl {
|
||||||
PyThreadState base;
|
PyThreadState base;
|
||||||
|
|
||||||
#ifdef Py_GIL_DISABLED
|
#ifdef Py_GIL_DISABLED
|
||||||
|
struct _gc_thread_state gc;
|
||||||
struct _mimalloc_thread_state mimalloc;
|
struct _mimalloc_thread_state mimalloc;
|
||||||
struct _Py_object_freelists freelists;
|
struct _Py_object_freelists freelists;
|
||||||
struct _brc_thread_state brc;
|
struct _brc_thread_state brc;
|
||||||
|
|
|
@ -13,7 +13,7 @@ except ImportError:
|
||||||
_testcapi = None
|
_testcapi = None
|
||||||
|
|
||||||
from test import support
|
from test import support
|
||||||
from test.support import threading_helper
|
from test.support import threading_helper, Py_GIL_DISABLED
|
||||||
from test.support.script_helper import assert_python_ok
|
from test.support.script_helper import assert_python_ok
|
||||||
|
|
||||||
|
|
||||||
|
@ -294,6 +294,7 @@ class TestIncompleteFrameAreInvisible(unittest.TestCase):
|
||||||
assert_python_ok("-c", code)
|
assert_python_ok("-c", code)
|
||||||
|
|
||||||
@support.cpython_only
|
@support.cpython_only
|
||||||
|
@unittest.skipIf(Py_GIL_DISABLED, "test requires precise GC scheduling")
|
||||||
def test_sneaky_frame_object(self):
|
def test_sneaky_frame_object(self):
|
||||||
|
|
||||||
def trace(frame, event, arg):
|
def trace(frame, event, arg):
|
||||||
|
|
|
@ -363,6 +363,7 @@ class GCTests(unittest.TestCase):
|
||||||
# To minimize variations, though, we first store the get_count() results
|
# To minimize variations, though, we first store the get_count() results
|
||||||
# and check them at the end.
|
# and check them at the end.
|
||||||
@refcount_test
|
@refcount_test
|
||||||
|
@unittest.skipIf(Py_GIL_DISABLED, 'needs precise allocation counts')
|
||||||
def test_get_count(self):
|
def test_get_count(self):
|
||||||
gc.collect()
|
gc.collect()
|
||||||
a, b, c = gc.get_count()
|
a, b, c = gc.get_count()
|
||||||
|
|
|
@ -201,6 +201,16 @@ gc_get_count_impl(PyObject *module)
|
||||||
/*[clinic end generated code: output=354012e67b16398f input=a392794a08251751]*/
|
/*[clinic end generated code: output=354012e67b16398f input=a392794a08251751]*/
|
||||||
{
|
{
|
||||||
GCState *gcstate = get_gc_state();
|
GCState *gcstate = get_gc_state();
|
||||||
|
|
||||||
|
#ifdef Py_GIL_DISABLED
|
||||||
|
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
|
||||||
|
struct _gc_thread_state *gc = &tstate->gc;
|
||||||
|
|
||||||
|
// Flush the local allocation count to the global count
|
||||||
|
_Py_atomic_add_int(&gcstate->generations[0].count, (int)gc->alloc_count);
|
||||||
|
gc->alloc_count = 0;
|
||||||
|
#endif
|
||||||
|
|
||||||
return Py_BuildValue("(iii)",
|
return Py_BuildValue("(iii)",
|
||||||
gcstate->generations[0].count,
|
gcstate->generations[0].count,
|
||||||
gcstate->generations[1].count,
|
gcstate->generations[1].count,
|
||||||
|
|
|
@ -1835,6 +1835,8 @@ _PyType_AllocNoTrack(PyTypeObject *type, Py_ssize_t nitems)
|
||||||
if (presize) {
|
if (presize) {
|
||||||
((PyObject **)alloc)[0] = NULL;
|
((PyObject **)alloc)[0] = NULL;
|
||||||
((PyObject **)alloc)[1] = NULL;
|
((PyObject **)alloc)[1] = NULL;
|
||||||
|
}
|
||||||
|
if (PyType_IS_GC(type)) {
|
||||||
_PyObject_GC_Link(obj);
|
_PyObject_GC_Link(obj);
|
||||||
}
|
}
|
||||||
memset(obj, '\0', size);
|
memset(obj, '\0', size);
|
||||||
|
|
|
@ -23,6 +23,11 @@ typedef struct _gc_runtime_state GCState;
|
||||||
# define GC_DEBUG
|
# define GC_DEBUG
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// Each thread buffers the count of allocated objects in a thread-local
|
||||||
|
// variable up to +/- this amount to reduce the overhead of updating
|
||||||
|
// the global count.
|
||||||
|
#define LOCAL_ALLOC_COUNT_THRESHOLD 512
|
||||||
|
|
||||||
// Automatically choose the generation that needs collecting.
|
// Automatically choose the generation that needs collecting.
|
||||||
#define GENERATION_AUTO (-1)
|
#define GENERATION_AUTO (-1)
|
||||||
|
|
||||||
|
@ -959,6 +964,41 @@ gc_should_collect(GCState *gcstate)
|
||||||
gcstate->generations[1].threshold == 0);
|
gcstate->generations[1].threshold == 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
record_allocation(PyThreadState *tstate)
|
||||||
|
{
|
||||||
|
struct _gc_thread_state *gc = &((_PyThreadStateImpl *)tstate)->gc;
|
||||||
|
|
||||||
|
// We buffer the allocation count to avoid the overhead of atomic
|
||||||
|
// operations for every allocation.
|
||||||
|
gc->alloc_count++;
|
||||||
|
if (gc->alloc_count >= LOCAL_ALLOC_COUNT_THRESHOLD) {
|
||||||
|
// TODO: Use Py_ssize_t for the generation count.
|
||||||
|
GCState *gcstate = &tstate->interp->gc;
|
||||||
|
_Py_atomic_add_int(&gcstate->generations[0].count, (int)gc->alloc_count);
|
||||||
|
gc->alloc_count = 0;
|
||||||
|
|
||||||
|
if (gc_should_collect(gcstate) &&
|
||||||
|
!_Py_atomic_load_int_relaxed(&gcstate->collecting))
|
||||||
|
{
|
||||||
|
_Py_ScheduleGC(tstate->interp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
record_deallocation(PyThreadState *tstate)
|
||||||
|
{
|
||||||
|
struct _gc_thread_state *gc = &((_PyThreadStateImpl *)tstate)->gc;
|
||||||
|
|
||||||
|
gc->alloc_count--;
|
||||||
|
if (gc->alloc_count <= -LOCAL_ALLOC_COUNT_THRESHOLD) {
|
||||||
|
GCState *gcstate = &tstate->interp->gc;
|
||||||
|
_Py_atomic_add_int(&gcstate->generations[0].count, (int)gc->alloc_count);
|
||||||
|
gc->alloc_count = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
gc_collect_internal(PyInterpreterState *interp, struct collection_state *state)
|
gc_collect_internal(PyInterpreterState *interp, struct collection_state *state)
|
||||||
{
|
{
|
||||||
|
@ -981,6 +1021,9 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Record the number of live GC objects
|
||||||
|
interp->gc.long_lived_total = state->long_lived_total;
|
||||||
|
|
||||||
// Clear weakrefs and enqueue callbacks (but do not call them).
|
// Clear weakrefs and enqueue callbacks (but do not call them).
|
||||||
clear_weakrefs(state);
|
clear_weakrefs(state);
|
||||||
_PyEval_StartTheWorld(interp);
|
_PyEval_StartTheWorld(interp);
|
||||||
|
@ -1090,7 +1133,6 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason)
|
||||||
|
|
||||||
m = state.collected;
|
m = state.collected;
|
||||||
n = state.uncollectable;
|
n = state.uncollectable;
|
||||||
gcstate->long_lived_total = state.long_lived_total;
|
|
||||||
|
|
||||||
if (gcstate->debug & _PyGC_DEBUG_STATS) {
|
if (gcstate->debug & _PyGC_DEBUG_STATS) {
|
||||||
double d = _PyTime_AsSecondsDouble(_PyTime_GetPerfCounter() - t1);
|
double d = _PyTime_AsSecondsDouble(_PyTime_GetPerfCounter() - t1);
|
||||||
|
@ -1530,15 +1572,7 @@ _Py_ScheduleGC(PyInterpreterState *interp)
|
||||||
void
|
void
|
||||||
_PyObject_GC_Link(PyObject *op)
|
_PyObject_GC_Link(PyObject *op)
|
||||||
{
|
{
|
||||||
PyThreadState *tstate = _PyThreadState_GET();
|
record_allocation(_PyThreadState_GET());
|
||||||
GCState *gcstate = &tstate->interp->gc;
|
|
||||||
gcstate->generations[0].count++;
|
|
||||||
|
|
||||||
if (gc_should_collect(gcstate) &&
|
|
||||||
!_Py_atomic_load_int_relaxed(&gcstate->collecting))
|
|
||||||
{
|
|
||||||
_Py_ScheduleGC(tstate->interp);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
@ -1564,7 +1598,7 @@ gc_alloc(PyTypeObject *tp, size_t basicsize, size_t presize)
|
||||||
((PyObject **)mem)[1] = NULL;
|
((PyObject **)mem)[1] = NULL;
|
||||||
}
|
}
|
||||||
PyObject *op = (PyObject *)(mem + presize);
|
PyObject *op = (PyObject *)(mem + presize);
|
||||||
_PyObject_GC_Link(op);
|
record_allocation(tstate);
|
||||||
return op;
|
return op;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1646,10 +1680,9 @@ PyObject_GC_Del(void *op)
|
||||||
PyErr_SetRaisedException(exc);
|
PyErr_SetRaisedException(exc);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
GCState *gcstate = get_gc_state();
|
|
||||||
if (gcstate->generations[0].count > 0) {
|
record_deallocation(_PyThreadState_GET());
|
||||||
gcstate->generations[0].count--;
|
|
||||||
}
|
|
||||||
PyObject_Free(((char *)op)-presize);
|
PyObject_Free(((char *)op)-presize);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue