GH-126491: GC: Mark objects reachable from roots before doing cycle collection (GH-126502)

* Mark almost all reachable objects before doing collection phase

* Add stats for objects marked

* Visit new frames before each increment

* Remove lazy dict tracking

* Update docs

* Clearer calculation of work to do.
This commit is contained in:
Mark Shannon 2024-11-18 14:31:26 +00:00 committed by GitHub
parent a1d9c8aa80
commit b0fcc2c47a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
21 changed files with 332 additions and 330 deletions

View File

@ -99,6 +99,8 @@ typedef struct _gc_stats {
uint64_t collections;
uint64_t object_visits;
uint64_t objects_collected;
uint64_t objects_transitively_reachable;
uint64_t objects_not_transitively_reachable;
} GCStats;
typedef struct _uop_stats {

View File

@ -43,8 +43,6 @@ extern int _PyDict_Next(
extern int _PyDict_HasOnlyStringKeys(PyObject *mp);
extern void _PyDict_MaybeUntrack(PyObject *mp);
// Export for '_ctypes' shared extension
PyAPI_FUNC(Py_ssize_t) _PyDict_SizeOf(PyDictObject *);

View File

@ -75,6 +75,7 @@ typedef struct _PyInterpreterFrame {
_PyStackRef *stackpointer;
uint16_t return_offset; /* Only relevant during a function call */
char owner;
char visited;
/* Locals and stack */
_PyStackRef localsplus[1];
} _PyInterpreterFrame;
@ -207,6 +208,7 @@ _PyFrame_Initialize(
#endif
frame->return_offset = 0;
frame->owner = FRAME_OWNED_BY_THREAD;
frame->visited = 0;
for (int i = null_locals_from; i < code->co_nlocalsplus; i++) {
frame->localsplus[i] = PyStackRef_NULL;
@ -389,6 +391,7 @@ _PyFrame_PushTrampolineUnchecked(PyThreadState *tstate, PyCodeObject *code, int
frame->instr_ptr = _PyCode_CODE(code);
#endif
frame->owner = FRAME_OWNED_BY_THREAD;
frame->visited = 0;
frame->return_offset = 0;
#ifdef Py_GIL_DISABLED

View File

@ -10,11 +10,11 @@ extern "C" {
/* GC information is stored BEFORE the object structure. */
typedef struct {
// Pointer to next object in the list.
// Tagged pointer to next object in the list.
// 0 means the object is not tracked
uintptr_t _gc_next;
// Pointer to previous object in the list.
// Tagged pointer to previous object in the list.
// Lowest two bits are used for flags documented later.
uintptr_t _gc_prev;
} PyGC_Head;
@ -302,6 +302,11 @@ struct gc_generation_stats {
Py_ssize_t uncollectable;
};
enum _GCPhase {
GC_PHASE_MARK = 0,
GC_PHASE_COLLECT = 1
};
struct _gc_runtime_state {
/* List of objects that still need to be cleaned up, singly linked
* via their gc headers' gc_prev pointers. */
@ -325,10 +330,12 @@ struct _gc_runtime_state {
/* a list of callbacks to be invoked when collection is performed */
PyObject *callbacks;
Py_ssize_t prior_heap_size;
Py_ssize_t heap_size;
Py_ssize_t work_to_do;
/* Which of the old spaces is the visited space */
int visited_space;
int phase;
#ifdef Py_GIL_DISABLED
/* This is the number of objects that survived the last full

View File

@ -466,8 +466,8 @@ static inline void _PyObject_GC_TRACK(
PyGC_Head *last = (PyGC_Head*)(generation0->_gc_prev);
_PyGCHead_SET_NEXT(last, gc);
_PyGCHead_SET_PREV(gc, last);
/* Young objects will be moved into the visited space during GC, so set the bit here */
gc->_gc_next = ((uintptr_t)generation0) | (uintptr_t)interp->gc.visited_space;
uintptr_t not_visited = 1 ^ interp->gc.visited_space;
gc->_gc_next = ((uintptr_t)generation0) | not_visited;
generation0->_gc_prev = (uintptr_t)gc;
#endif
}

View File

@ -134,6 +134,7 @@ extern PyTypeObject _PyExc_MemoryError;
{ .threshold = 0, }, \
}, \
.work_to_do = -5000, \
.phase = GC_PHASE_MARK, \
}, \
.qsbr = { \
.wr_seq = QSBR_INITIAL, \

View File

@ -351,6 +351,7 @@ follows these steps in order:
the reference counts fall to 0, triggering the destruction of all unreachable
objects.
Optimization: incremental collection
====================================
@ -484,6 +485,46 @@ specifically in a generation by calling `gc.collect(generation=NUM)`.
```
Optimization: visiting reachable objects
========================================
An object cannot be garbage if it can be reached.
To avoid having to identify reference cycles across the whole heap, we can
reduce the amount of work done considerably by first moving most reachable objects
to the `visited` space. Empirically, most reachable objects can be reached from a
small set of global objects and local variables.
This step does much less work per object, so reduces the time spent
performing garbage collection by at least half.
> [!NOTE]
> Objects that are not determined to be reachable by this pass are not necessarily
> unreachable. We still need to perform the main algorithm to determine which objects
> are actually unreachable.
We use the same technique of forming a transitive closure as the incremental
collector does to find reachable objects, seeding the list with some global
objects and the currently executing frames.
This phase moves objects to the `visited` space, as follows:
1. All objects directly referred to by any builtin class, the `sys` module, the `builtins`
module and all objects directly referred to from stack frames are added to a working
set of reachable objects.
2. Until this working set is empty:
1. Pop an object from the set and move it to the `visited` space
2. For each object directly reachable from that object:
* If it is not already in `visited` space and it is a GC object,
add it to the working set
Before each increment of collection is performed, the stacks are scanned
to check for any new stack frames that have been created since the last
increment. All objects directly referred to from those stack frames are
added to the working set.
Then the above algorithm is repeated, starting from step 2.
Optimization: reusing fields to save memory
===========================================
@ -532,8 +573,8 @@ of `PyGC_Head` discussed in the `Memory layout and object structure`_ section:
currently in. Instead, when that's needed, ad hoc tricks (like the
`NEXT_MASK_UNREACHABLE` flag) are employed.
Optimization: delay tracking containers
=======================================
Optimization: delayed untracking of containers
==============================================
Certain types of containers cannot participate in a reference cycle, and so do
not need to be tracked by the garbage collector. Untracking these objects
@ -548,8 +589,8 @@ a container:
As a general rule, instances of atomic types aren't tracked and instances of
non-atomic types (containers, user-defined objects...) are. However, some
type-specific optimizations can be present in order to suppress the garbage
collector footprint of simple instances. Some examples of native types that
benefit from delayed tracking:
collector footprint of simple instances. Historically, both dictionaries and
tuples were untracked during garbage collection. Now it is only tuples:
- Tuples containing only immutable objects (integers, strings etc,
and recursively, tuples of immutable objects) do not need to be tracked. The
@ -558,14 +599,8 @@ benefit from delayed tracking:
tuples at creation time. Instead, all tuples except the empty tuple are tracked
when created. During garbage collection it is determined whether any surviving
tuples can be untracked. A tuple can be untracked if all of its contents are
already not tracked. Tuples are examined for untracking in all garbage collection
cycles. It may take more than one cycle to untrack a tuple.
- Dictionaries containing only immutable objects also do not need to be tracked.
Dictionaries are untracked when created. If a tracked item is inserted into a
dictionary (either as a key or value), the dictionary becomes tracked. During a
full garbage collection (all generations), the collector will untrack any dictionaries
whose contents are not tracked.
already not tracked. Tuples are examined for untracking when moved from the
young to the old generation.
The garbage collector module provides the Python function `is_tracked(obj)`, which returns
the current tracking status of the object. Subsequent garbage collections may change the
@ -578,11 +613,9 @@ tracking status of the object.
False
>>> gc.is_tracked([])
True
>>> gc.is_tracked({})
>>> gc.is_tracked(("a", 1))
False
>>> gc.is_tracked({"a": 1})
False
>>> gc.is_tracked({"a": []})
True
```

View File

@ -880,115 +880,6 @@ class DictTest(unittest.TestCase):
gc.collect()
self.assertIs(ref(), None, "Cycle was not collected")
def _not_tracked(self, t):
# Nested containers can take several collections to untrack
gc.collect()
gc.collect()
self.assertFalse(gc.is_tracked(t), t)
def _tracked(self, t):
self.assertTrue(gc.is_tracked(t), t)
gc.collect()
gc.collect()
self.assertTrue(gc.is_tracked(t), t)
def test_string_keys_can_track_values(self):
# Test that this doesn't leak.
for i in range(10):
d = {}
for j in range(10):
d[str(j)] = j
d["foo"] = d
@support.cpython_only
def test_track_literals(self):
# Test GC-optimization of dict literals
x, y, z, w = 1.5, "a", (1, None), []
self._not_tracked({})
self._not_tracked({x:(), y:x, z:1})
self._not_tracked({1: "a", "b": 2})
self._not_tracked({1: 2, (None, True, False, ()): int})
self._not_tracked({1: object()})
# Dicts with mutable elements are always tracked, even if those
# elements are not tracked right now.
self._tracked({1: []})
self._tracked({1: ([],)})
self._tracked({1: {}})
self._tracked({1: set()})
@support.cpython_only
def test_track_dynamic(self):
# Test GC-optimization of dynamically-created dicts
class MyObject(object):
pass
x, y, z, w, o = 1.5, "a", (1, object()), [], MyObject()
d = dict()
self._not_tracked(d)
d[1] = "a"
self._not_tracked(d)
d[y] = 2
self._not_tracked(d)
d[z] = 3
self._not_tracked(d)
self._not_tracked(d.copy())
d[4] = w
self._tracked(d)
self._tracked(d.copy())
d[4] = None
self._not_tracked(d)
self._not_tracked(d.copy())
# dd isn't tracked right now, but it may mutate and therefore d
# which contains it must be tracked.
d = dict()
dd = dict()
d[1] = dd
self._not_tracked(dd)
self._tracked(d)
dd[1] = d
self._tracked(dd)
d = dict.fromkeys([x, y, z])
self._not_tracked(d)
dd = dict()
dd.update(d)
self._not_tracked(dd)
d = dict.fromkeys([x, y, z, o])
self._tracked(d)
dd = dict()
dd.update(d)
self._tracked(dd)
d = dict(x=x, y=y, z=z)
self._not_tracked(d)
d = dict(x=x, y=y, z=z, w=w)
self._tracked(d)
d = dict()
d.update(x=x, y=y, z=z)
self._not_tracked(d)
d.update(w=w)
self._tracked(d)
d = dict([(x, y), (z, 1)])
self._not_tracked(d)
d = dict([(x, y), (z, w)])
self._tracked(d)
d = dict()
d.update([(x, y), (z, 1)])
self._not_tracked(d)
d.update([(x, y), (z, w)])
self._tracked(d)
@support.cpython_only
def test_track_subtypes(self):
# Dict subtypes are always tracked
class MyDict(dict):
pass
self._tracked(MyDict())
def make_shared_key_dict(self, n):
class C:
pass

View File

@ -31,6 +31,11 @@ except ImportError:
return C
ContainerNoGC = None
try:
import _testinternalcapi
except ImportError:
_testinternalcapi = None
### Support code
###############################################################################
@ -1130,6 +1135,7 @@ class IncrementalGCTests(unittest.TestCase):
def tearDown(self):
gc.disable()
@unittest.skipIf(_testinternalcapi is None, "requires _testinternalcapi")
@requires_gil_enabled("Free threading does not support incremental GC")
# Use small increments to emulate longer running process in a shorter time
@gc_threshold(200, 10)
@ -1155,32 +1161,18 @@ class IncrementalGCTests(unittest.TestCase):
return head
head = make_ll(1000)
count = 1000
# There will be some objects we aren't counting,
# e.g. the gc stats dicts. This test checks
# that the counts don't grow, so we try to
# correct for the uncounted objects
# This is just an estimate.
CORRECTION = 20
enabled = gc.isenabled()
gc.enable()
olds = []
initial_heap_size = _testinternalcapi.get_heap_size()
for i in range(20_000):
newhead = make_ll(20)
count += 20
newhead.surprise = head
olds.append(newhead)
if len(olds) == 20:
stats = gc.get_stats()
young = stats[0]
incremental = stats[1]
old = stats[2]
collected = young['collected'] + incremental['collected'] + old['collected']
count += CORRECTION
live = count - collected
self.assertLess(live, 25000)
new_objects = _testinternalcapi.get_heap_size() - initial_heap_size
self.assertLess(new_objects, 25_000)
del olds[:]
if not enabled:
gc.disable()
@ -1322,7 +1314,8 @@ class GCCallbackTests(unittest.TestCase):
from test.support import gc_collect, SuppressCrashReport
a = [1, 2, 3]
b = [a]
b = [a, a]
a.append(b)
# Avoid coredump when Py_FatalError() calls abort()
SuppressCrashReport().__enter__()
@ -1332,6 +1325,8 @@ class GCCallbackTests(unittest.TestCase):
# (to avoid deallocating it):
import ctypes
ctypes.pythonapi.Py_DecRef(ctypes.py_object(a))
del a
del b
# The garbage collector should now have a fatal error
# when it reaches the broken object
@ -1360,7 +1355,7 @@ class GCCallbackTests(unittest.TestCase):
self.assertRegex(stderr,
br'object type name: list')
self.assertRegex(stderr,
br'object repr : \[1, 2, 3\]')
br'object repr : \[1, 2, 3, \[\[...\], \[...\]\]\]')
class GCTogglingTests(unittest.TestCase):

View File

@ -0,0 +1,4 @@
Add a marking phase to the GC. All objects that can be transitively
reached from builtin modules or the stacks are marked as reachable
before cycle detection. This reduces the amount of work done by the
GC by approximately half.

View File

@ -2077,6 +2077,12 @@ has_deferred_refcount(PyObject *self, PyObject *op)
}
static PyObject *
get_heap_size(PyObject *self, PyObject *Py_UNUSED(ignored))
{
return PyLong_FromInt64(PyInterpreterState_Get()->gc.heap_size);
}
static PyMethodDef module_functions[] = {
{"get_configs", get_configs, METH_NOARGS},
{"get_recursion_depth", get_recursion_depth, METH_NOARGS},
@ -2174,6 +2180,7 @@ static PyMethodDef module_functions[] = {
{"get_static_builtin_types", get_static_builtin_types, METH_NOARGS},
{"identify_type_slot_wrappers", identify_type_slot_wrappers, METH_NOARGS},
{"has_deferred_refcount", has_deferred_refcount, METH_O},
{"get_heap_size", get_heap_size, METH_NOARGS},
{NULL, NULL} /* sentinel */
};

View File

@ -883,6 +883,7 @@ new_dict(PyInterpreterState *interp,
mp->ma_used = used;
mp->_ma_watcher_tag = 0;
ASSERT_CONSISTENT(mp);
_PyObject_GC_TRACK(mp);
return (PyObject *)mp;
}
@ -1578,64 +1579,6 @@ _PyDict_HasOnlyStringKeys(PyObject *dict)
return 1;
}
#define MAINTAIN_TRACKING(mp, key, value) \
do { \
if (!_PyObject_GC_IS_TRACKED(mp)) { \
if (_PyObject_GC_MAY_BE_TRACKED(key) || \
_PyObject_GC_MAY_BE_TRACKED(value)) { \
_PyObject_GC_TRACK(mp); \
} \
} \
} while(0)
void
_PyDict_MaybeUntrack(PyObject *op)
{
PyDictObject *mp;
PyObject *value;
Py_ssize_t i, numentries;
ASSERT_WORLD_STOPPED_OR_DICT_LOCKED(op);
if (!PyDict_CheckExact(op) || !_PyObject_GC_IS_TRACKED(op))
return;
mp = (PyDictObject *) op;
ASSERT_CONSISTENT(mp);
numentries = mp->ma_keys->dk_nentries;
if (_PyDict_HasSplitTable(mp)) {
for (i = 0; i < numentries; i++) {
if ((value = mp->ma_values->values[i]) == NULL)
continue;
if (_PyObject_GC_MAY_BE_TRACKED(value)) {
return;
}
}
}
else {
if (DK_IS_UNICODE(mp->ma_keys)) {
PyDictUnicodeEntry *ep0 = DK_UNICODE_ENTRIES(mp->ma_keys);
for (i = 0; i < numentries; i++) {
if ((value = ep0[i].me_value) == NULL)
continue;
if (_PyObject_GC_MAY_BE_TRACKED(value))
return;
}
}
else {
PyDictKeyEntry *ep0 = DK_ENTRIES(mp->ma_keys);
for (i = 0; i < numentries; i++) {
if ((value = ep0[i].me_value) == NULL)
continue;
if (_PyObject_GC_MAY_BE_TRACKED(value) ||
_PyObject_GC_MAY_BE_TRACKED(ep0[i].me_key))
return;
}
}
}
_PyObject_GC_UNTRACK(op);
}
void
_PyDict_EnablePerThreadRefcounting(PyObject *op)
{
@ -1761,7 +1704,6 @@ insert_split_value(PyInterpreterState *interp, PyDictObject *mp, PyObject *key,
{
assert(PyUnicode_CheckExact(key));
ASSERT_DICT_LOCKED(mp);
MAINTAIN_TRACKING(mp, key, value);
PyObject *old_value = mp->ma_values->values[ix];
if (old_value == NULL) {
_PyDict_NotifyEvent(interp, PyDict_EVENT_ADDED, mp, key, value);
@ -1818,8 +1760,6 @@ insertdict(PyInterpreterState *interp, PyDictObject *mp,
if (ix == DKIX_ERROR)
goto Fail;
MAINTAIN_TRACKING(mp, key, value);
if (ix == DKIX_EMPTY) {
assert(!_PyDict_HasSplitTable(mp));
/* Insert into new slot. */
@ -1878,8 +1818,6 @@ insert_to_emptydict(PyInterpreterState *interp, PyDictObject *mp,
/* We don't decref Py_EMPTY_KEYS here because it is immortal. */
assert(mp->ma_values == NULL);
MAINTAIN_TRACKING(mp, key, value);
size_t hashpos = (size_t)hash & (PyDict_MINSIZE-1);
dictkeys_set_index(newkeys, hashpos, 0);
if (unicode) {
@ -3770,11 +3708,6 @@ dict_dict_merge(PyInterpreterState *interp, PyDictObject *mp, PyDictObject *othe
STORE_USED(mp, other->ma_used);
ASSERT_CONSISTENT(mp);
if (_PyObject_GC_IS_TRACKED(other) && !_PyObject_GC_IS_TRACKED(mp)) {
/* Maintain tracking. */
_PyObject_GC_TRACK(mp);
}
return 0;
}
}
@ -4024,8 +3957,7 @@ copy_lock_held(PyObject *o)
split_copy->ma_used = mp->ma_used;
split_copy->_ma_watcher_tag = 0;
dictkeys_incref(mp->ma_keys);
if (_PyObject_GC_IS_TRACKED(mp))
_PyObject_GC_TRACK(split_copy);
_PyObject_GC_TRACK(split_copy);
return (PyObject *)split_copy;
}
@ -4060,10 +3992,6 @@ copy_lock_held(PyObject *o)
new->ma_used = mp->ma_used;
ASSERT_CONSISTENT(new);
if (_PyObject_GC_IS_TRACKED(mp)) {
/* Maintain tracking. */
_PyObject_GC_TRACK(new);
}
return (PyObject *)new;
}
@ -4350,8 +4278,6 @@ dict_setdefault_ref_lock_held(PyObject *d, PyObject *key, PyObject *default_valu
*result = NULL;
}
}
MAINTAIN_TRACKING(mp, key, value);
STORE_USED(mp, mp->ma_used + 1);
assert(mp->ma_keys->dk_usable >= 0);
ASSERT_CONSISTENT(mp);
@ -4801,15 +4727,8 @@ dict_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
d->ma_values = NULL;
ASSERT_CONSISTENT(d);
if (type != &PyDict_Type) {
// Don't track if a subclass tp_alloc is PyType_GenericAlloc()
if (!_PyObject_GC_IS_TRACKED(d)) {
_PyObject_GC_TRACK(d);
}
}
else {
// _PyType_AllocNoTrack() does not track the created object
assert(!_PyObject_GC_IS_TRACKED(d));
if (!_PyObject_GC_IS_TRACKED(d)) {
_PyObject_GC_TRACK(d);
}
return self;
}
@ -6746,19 +6665,14 @@ make_dict_from_instance_attributes(PyInterpreterState *interp,
{
dictkeys_incref(keys);
Py_ssize_t used = 0;
Py_ssize_t track = 0;
size_t size = shared_keys_usable_size(keys);
for (size_t i = 0; i < size; i++) {
PyObject *val = values->values[i];
if (val != NULL) {
used += 1;
track += _PyObject_GC_MAY_BE_TRACKED(val);
}
}
PyDictObject *res = (PyDictObject *)new_dict(interp, keys, values, used, 0);
if (track && res) {
_PyObject_GC_TRACK(res);
}
return res;
}
@ -7204,6 +7118,7 @@ _PyObject_SetManagedDict(PyObject *obj, PyObject *new_dict)
// since we locked it.
dict = _PyObject_ManagedDictPointer(obj)->dict;
err = _PyDict_DetachFromObject(dict, obj);
assert(err == 0 || new_dict == NULL);
if (err == 0) {
FT_ATOMIC_STORE_PTR(_PyObject_ManagedDictPointer(obj)->dict,
(PyDictObject *)Py_XNewRef(new_dict));
@ -7236,7 +7151,21 @@ void
PyObject_ClearManagedDict(PyObject *obj)
{
if (_PyObject_SetManagedDict(obj, NULL) < 0) {
/* Must be out of memory */
assert(PyErr_Occurred() == PyExc_MemoryError);
PyErr_WriteUnraisable(NULL);
/* Clear the dict */
PyDictObject *dict = _PyObject_GetManagedDict(obj);
Py_BEGIN_CRITICAL_SECTION2(dict, obj);
dict = _PyObject_ManagedDictPointer(obj)->dict;
PyInterpreterState *interp = _PyInterpreterState_GET();
PyDictKeysObject *oldkeys = dict->ma_keys;
set_keys(dict, Py_EMPTY_KEYS);
dict->ma_values = NULL;
dictkeys_decref(interp, oldkeys, IS_DICT_SHARED(dict));
STORE_USED(dict, 0);
set_dict_inline_values(obj, NULL);
Py_END_CRITICAL_SECTION2();
}
}
@ -7261,12 +7190,6 @@ _PyDict_DetachFromObject(PyDictObject *mp, PyObject *obj)
PyDictValues *values = copy_values(mp->ma_values);
if (values == NULL) {
/* Out of memory. Clear the dict */
PyInterpreterState *interp = _PyInterpreterState_GET();
PyDictKeysObject *oldkeys = mp->ma_keys;
set_keys(mp, Py_EMPTY_KEYS);
dictkeys_decref(interp, oldkeys, IS_DICT_SHARED(mp));
STORE_USED(mp, 0);
PyErr_NoMemory();
return -1;
}

View File

@ -107,8 +107,6 @@ static void
track_module(PyModuleObject *m)
{
_PyDict_EnablePerThreadRefcounting(m->md_dict);
PyObject_GC_Track(m->md_dict);
_PyObject_SetDeferredRefcount((PyObject *)m);
PyObject_GC_Track(m);
}

View File

@ -2340,10 +2340,6 @@ dummy_func(
DEOPT_IF(ep->me_key != name);
PyObject *old_value = ep->me_value;
DEOPT_IF(old_value == NULL);
/* Ensure dict is GC tracked if it needs to be */
if (!_PyObject_GC_IS_TRACKED(dict) && _PyObject_GC_MAY_BE_TRACKED(PyStackRef_AsPyObjectBorrow(value))) {
_PyObject_GC_TRACK(dict);
}
_PyDict_NotifyEvent(tstate->interp, PyDict_EVENT_MODIFIED, dict, name, PyStackRef_AsPyObjectBorrow(value));
ep->me_value = PyStackRef_AsPyObjectSteal(value);
// old_value should be DECREFed after GC track checking is done, if not, it could raise a segmentation fault,

View File

@ -821,6 +821,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
entry_frame.instr_ptr = (_Py_CODEUNIT *)_Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS + 1;
entry_frame.stackpointer = entry_frame.localsplus;
entry_frame.owner = FRAME_OWNED_BY_CSTACK;
entry_frame.visited = 0;
entry_frame.return_offset = 0;
/* Push frame */
entry_frame.previous = tstate->current_frame;

View File

@ -2914,10 +2914,6 @@
UOP_STAT_INC(uopcode, miss);
JUMP_TO_JUMP_TARGET();
}
/* Ensure dict is GC tracked if it needs to be */
if (!_PyObject_GC_IS_TRACKED(dict) && _PyObject_GC_MAY_BE_TRACKED(PyStackRef_AsPyObjectBorrow(value))) {
_PyObject_GC_TRACK(dict);
}
_PyFrame_SetStackPointer(frame, stack_pointer);
_PyDict_NotifyEvent(tstate->interp, PyDict_EVENT_MODIFIED, dict, name, PyStackRef_AsPyObjectBorrow(value));
stack_pointer = _PyFrame_GetStackPointer(frame);

View File

@ -5,7 +5,7 @@
#include "Python.h"
#include "pycore_ceval.h" // _Py_set_eval_breaker_bit()
#include "pycore_context.h"
#include "pycore_dict.h" // _PyDict_MaybeUntrack()
#include "pycore_dict.h" // _PyInlineValuesSize()
#include "pycore_initconfig.h"
#include "pycore_interp.h" // PyInterpreterState.gc
#include "pycore_object.h"
@ -185,6 +185,7 @@ _PyGC_Init(PyInterpreterState *interp)
if (gcstate->callbacks == NULL) {
return _PyStatus_NO_MEMORY();
}
gcstate->prior_heap_size = 0;
gcstate->heap_size = 0;
return _PyStatus_OK();
@ -747,21 +748,6 @@ untrack_tuples(PyGC_Head *head)
}
}
/* Try to untrack all currently tracked dictionaries */
static void
untrack_dicts(PyGC_Head *head)
{
PyGC_Head *next, *gc = GC_NEXT(head);
while (gc != head) {
PyObject *op = FROM_GC(gc);
next = GC_NEXT(gc);
if (PyDict_CheckExact(op)) {
_PyDict_MaybeUntrack(op);
}
gc = next;
}
}
/* Return true if object has a pre-PEP 442 finalization method. */
static int
has_legacy_finalizer(PyObject *op)
@ -1258,15 +1244,10 @@ handle_resurrected_objects(PyGC_Head *unreachable, PyGC_Head* still_unreachable,
gc_list_merge(resurrected, old_generation);
}
#define UNTRACK_TUPLES 1
#define UNTRACK_DICTS 2
static void
gc_collect_region(PyThreadState *tstate,
PyGC_Head *from,
PyGC_Head *to,
int untrack,
struct gc_collection_stats *stats);
static inline Py_ssize_t
@ -1315,6 +1296,7 @@ gc_collect_young(PyThreadState *tstate,
GCState *gcstate = &tstate->interp->gc;
PyGC_Head *young = &gcstate->young.head;
PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head;
untrack_tuples(&gcstate->young.head);
GC_STAT_ADD(0, collections, 1);
#ifdef Py_STATS
{
@ -1328,7 +1310,8 @@ gc_collect_young(PyThreadState *tstate,
PyGC_Head survivors;
gc_list_init(&survivors);
gc_collect_region(tstate, young, &survivors, UNTRACK_TUPLES, stats);
gc_list_set_space(young, gcstate->visited_space);
gc_collect_region(tstate, young, &survivors, stats);
Py_ssize_t survivor_count = 0;
if (gcstate->visited_space) {
/* objects in visited space have bit set, so we set it here */
@ -1343,16 +1326,11 @@ gc_collect_young(PyThreadState *tstate,
survivor_count++;
}
}
(void)survivor_count; // Silence compiler warning
gc_list_merge(&survivors, visited);
validate_old(gcstate);
gcstate->young.count = 0;
gcstate->old[gcstate->visited_space].count++;
Py_ssize_t scale_factor = gcstate->old[0].threshold;
if (scale_factor < 1) {
scale_factor = 1;
}
gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor;
gcstate->work_to_do += survivor_count * 4;
add_stats(gcstate, 0, stats);
}
@ -1368,15 +1346,15 @@ IS_IN_VISITED(PyGC_Head *gc, int visited_space)
struct container_and_flag {
PyGC_Head *container;
int visited_space;
uintptr_t size;
Py_ssize_t size;
};
/* A traversal callback for adding to container) */
static int
visit_add_to_container(PyObject *op, void *arg)
{
OBJECT_STAT_INC(object_visits);
struct container_and_flag *cf = (struct container_and_flag *)arg;
OBJECT_STAT_INC(object_visits);
int visited = cf->visited_space;
assert(visited == get_gc_state()->visited_space);
if (!_Py_IsImmortal(op) && _PyObject_IS_GC(op)) {
@ -1391,10 +1369,9 @@ visit_add_to_container(PyObject *op, void *arg)
return 0;
}
static uintptr_t
static Py_ssize_t
expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCState *gcstate)
{
validate_list(container, collecting_clear_unreachable_clear);
struct container_and_flag arg = {
.container = container,
.visited_space = gcstate->visited_space,
@ -1406,6 +1383,7 @@ expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCStat
* have been marked as visited */
assert(IS_IN_VISITED(gc, gcstate->visited_space));
PyObject *op = FROM_GC(gc);
assert(_PyObject_GC_IS_TRACKED(op));
if (_Py_IsImmortal(op)) {
PyGC_Head *next = GC_NEXT(gc);
gc_list_move(gc, &get_gc_state()->permanent_generation.head);
@ -1425,20 +1403,187 @@ expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCStat
static void
completed_cycle(GCState *gcstate)
{
#ifdef Py_DEBUG
PyGC_Head *not_visited = &gcstate->old[gcstate->visited_space^1].head;
assert(gc_list_is_empty(not_visited));
#endif
gcstate->visited_space = flip_old_space(gcstate->visited_space);
assert(gc_list_is_empty(&gcstate->old[gcstate->visited_space^1].head));
int not_visited = gcstate->visited_space;
gcstate->visited_space = flip_old_space(not_visited);
/* Make sure all young objects have old space bit set correctly */
PyGC_Head *young = &gcstate->young.head;
PyGC_Head *gc = GC_NEXT(young);
while (gc != young) {
PyGC_Head *next = GC_NEXT(gc);
gc_set_old_space(gc, gcstate->visited_space);
gc_set_old_space(gc, not_visited);
gc = next;
}
gcstate->work_to_do = 0;
gcstate->phase = GC_PHASE_MARK;
}
static Py_ssize_t
move_to_reachable(PyObject *op, PyGC_Head *reachable, int visited_space)
{
if (op != NULL && !_Py_IsImmortal(op) && _PyObject_IS_GC(op)) {
PyGC_Head *gc = AS_GC(op);
if (_PyObject_GC_IS_TRACKED(op) &&
gc_old_space(gc) != visited_space) {
gc_flip_old_space(gc);
gc_list_move(gc, reachable);
return 1;
}
}
return 0;
}
static Py_ssize_t
mark_all_reachable(PyGC_Head *reachable, PyGC_Head *visited, int visited_space)
{
// Transitively traverse all objects from reachable, until empty
struct container_and_flag arg = {
.container = reachable,
.visited_space = visited_space,
.size = 0
};
while (!gc_list_is_empty(reachable)) {
PyGC_Head *gc = _PyGCHead_NEXT(reachable);
assert(gc_old_space(gc) == visited_space);
gc_list_move(gc, visited);
PyObject *op = FROM_GC(gc);
traverseproc traverse = Py_TYPE(op)->tp_traverse;
(void) traverse(op,
visit_add_to_container,
&arg);
}
gc_list_validate_space(visited, visited_space);
return arg.size;
}
static Py_ssize_t
mark_global_roots(PyInterpreterState *interp, PyGC_Head *visited, int visited_space)
{
PyGC_Head reachable;
gc_list_init(&reachable);
Py_ssize_t objects_marked = 0;
objects_marked += move_to_reachable(interp->sysdict, &reachable, visited_space);
objects_marked += move_to_reachable(interp->builtins, &reachable, visited_space);
objects_marked += move_to_reachable(interp->dict, &reachable, visited_space);
struct types_state *types = &interp->types;
for (int i = 0; i < _Py_MAX_MANAGED_STATIC_BUILTIN_TYPES; i++) {
objects_marked += move_to_reachable(types->builtins.initialized[i].tp_dict, &reachable, visited_space);
objects_marked += move_to_reachable(types->builtins.initialized[i].tp_subclasses, &reachable, visited_space);
}
for (int i = 0; i < _Py_MAX_MANAGED_STATIC_EXT_TYPES; i++) {
objects_marked += move_to_reachable(types->for_extensions.initialized[i].tp_dict, &reachable, visited_space);
objects_marked += move_to_reachable(types->for_extensions.initialized[i].tp_subclasses, &reachable, visited_space);
}
objects_marked += mark_all_reachable(&reachable, visited, visited_space);
assert(gc_list_is_empty(&reachable));
return objects_marked;
}
static Py_ssize_t
mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, bool start)
{
PyGC_Head reachable;
gc_list_init(&reachable);
Py_ssize_t objects_marked = 0;
// Move all objects on stacks to reachable
_PyRuntimeState *runtime = &_PyRuntime;
HEAD_LOCK(runtime);
PyThreadState* ts = PyInterpreterState_ThreadHead(interp);
HEAD_UNLOCK(runtime);
while (ts) {
_PyInterpreterFrame *frame = ts->current_frame;
while (frame) {
if (frame->owner == FRAME_OWNED_BY_CSTACK) {
frame = frame->previous;
continue;
}
_PyStackRef *locals = frame->localsplus;
_PyStackRef *sp = frame->stackpointer;
objects_marked += move_to_reachable(frame->f_locals, &reachable, visited_space);
PyObject *func = PyStackRef_AsPyObjectBorrow(frame->f_funcobj);
objects_marked += move_to_reachable(func, &reachable, visited_space);
while (sp > locals) {
sp--;
if (PyStackRef_IsNull(*sp)) {
continue;
}
PyObject *op = PyStackRef_AsPyObjectBorrow(*sp);
if (!_Py_IsImmortal(op) && _PyObject_IS_GC(op)) {
PyGC_Head *gc = AS_GC(op);
if (_PyObject_GC_IS_TRACKED(op) &&
gc_old_space(gc) != visited_space) {
gc_flip_old_space(gc);
objects_marked++;
gc_list_move(gc, &reachable);
}
}
}
if (!start && frame->visited) {
// If this frame has already been visited, then the lower frames
// will have already been visited and will not have changed
break;
}
frame->visited = 1;
frame = frame->previous;
}
HEAD_LOCK(runtime);
ts = PyThreadState_Next(ts);
HEAD_UNLOCK(runtime);
}
objects_marked += mark_all_reachable(&reachable, visited, visited_space);
assert(gc_list_is_empty(&reachable));
return objects_marked;
}
static Py_ssize_t
mark_at_start(PyThreadState *tstate)
{
// TO DO -- Make this incremental
GCState *gcstate = &tstate->interp->gc;
validate_old(gcstate);
PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head;
Py_ssize_t objects_marked = mark_global_roots(tstate->interp, visited, gcstate->visited_space);
objects_marked += mark_stacks(tstate->interp, visited, gcstate->visited_space, true);
gcstate->work_to_do -= objects_marked;
gcstate->phase = GC_PHASE_COLLECT;
return objects_marked;
}
static Py_ssize_t
assess_work_to_do(GCState *gcstate)
{
/* The amount of work we want to do depends on three things.
* 1. The number of new objects created
* 2. The growth in heap size since the last collection
* 3. The heap size (up to the number of new objects, to avoid quadratic effects)
*
* For a steady state heap, the amount of work to do is three times the number
* of new objects added to the heap. This ensures that we stay ahead in the
* worst case of all new objects being garbage.
*
* This could be improved by tracking survival rates, but it is still a
* large improvement on the non-marking approach.
*/
Py_ssize_t scale_factor = gcstate->old[0].threshold;
if (scale_factor < 2) {
scale_factor = 2;
}
Py_ssize_t new_objects = gcstate->young.count;
Py_ssize_t growth = gcstate->heap_size - gcstate->prior_heap_size;
if (growth < 0) {
growth = 0;
}
if (gcstate->heap_size < new_objects * scale_factor) {
// Small heap: ignore growth
growth = 0;
}
Py_ssize_t heap_fraction = gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor;
if (heap_fraction > new_objects) {
heap_fraction = new_objects;
}
gcstate->young.count = 0;
gcstate->prior_heap_size = gcstate->heap_size;
return new_objects*3/2 + growth*2 + heap_fraction*3/2;
}
static void
@ -1446,16 +1591,24 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats)
{
GC_STAT_ADD(1, collections, 1);
GCState *gcstate = &tstate->interp->gc;
gcstate->work_to_do += assess_work_to_do(gcstate);
untrack_tuples(&gcstate->young.head);
if (gcstate->phase == GC_PHASE_MARK) {
Py_ssize_t objects_marked = mark_at_start(tstate);
GC_STAT_ADD(1, objects_transitively_reachable, objects_marked);
gcstate->work_to_do -= objects_marked;
return;
}
PyGC_Head *not_visited = &gcstate->old[gcstate->visited_space^1].head;
PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head;
PyGC_Head increment;
gc_list_init(&increment);
Py_ssize_t scale_factor = gcstate->old[0].threshold;
if (scale_factor < 1) {
scale_factor = 1;
}
Py_ssize_t objects_marked = mark_stacks(tstate->interp, visited, gcstate->visited_space, false);
GC_STAT_ADD(1, objects_transitively_reachable, objects_marked);
gcstate->work_to_do -= objects_marked;
gc_list_set_space(&gcstate->young.head, gcstate->visited_space);
gc_list_merge(&gcstate->young.head, &increment);
gcstate->young.count = 0;
gc_list_validate_space(&increment, gcstate->visited_space);
Py_ssize_t increment_size = 0;
while (increment_size < gcstate->work_to_do) {
@ -1465,17 +1618,18 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats)
PyGC_Head *gc = _PyGCHead_NEXT(not_visited);
gc_list_move(gc, &increment);
increment_size++;
assert(!_Py_IsImmortal(FROM_GC(gc)));
gc_set_old_space(gc, gcstate->visited_space);
increment_size += expand_region_transitively_reachable(&increment, gc, gcstate);
}
GC_STAT_ADD(1, objects_not_transitively_reachable, increment_size);
gc_list_validate_space(&increment, gcstate->visited_space);
PyGC_Head survivors;
gc_list_init(&survivors);
gc_collect_region(tstate, &increment, &survivors, UNTRACK_TUPLES, stats);
gc_collect_region(tstate, &increment, &survivors, stats);
gc_list_validate_space(&survivors, gcstate->visited_space);
gc_list_merge(&survivors, visited);
assert(gc_list_is_empty(&increment));
gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor;
gcstate->work_to_do -= increment_size;
validate_old(gcstate);
@ -1496,20 +1650,25 @@ gc_collect_full(PyThreadState *tstate,
PyGC_Head *young = &gcstate->young.head;
PyGC_Head *pending = &gcstate->old[gcstate->visited_space^1].head;
PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head;
/* merge all generations into visited */
gc_list_validate_space(young, gcstate->visited_space);
gc_list_set_space(pending, gcstate->visited_space);
untrack_tuples(&gcstate->young.head);
/* merge all generations into pending */
gc_list_validate_space(young, 1-gcstate->visited_space);
gc_list_merge(young, pending);
gc_list_set_space(visited, 1-gcstate->visited_space);
gc_list_merge(visited, pending);
/* Mark reachable */
Py_ssize_t reachable = mark_global_roots(tstate->interp, visited, gcstate->visited_space);
reachable += mark_stacks(tstate->interp, visited, gcstate->visited_space, true);
(void)reachable;
GC_STAT_ADD(2, objects_transitively_reachable, reachable);
GC_STAT_ADD(2, objects_not_transitively_reachable, gc_list_size(pending));
gcstate->young.count = 0;
gc_list_merge(pending, visited);
gc_collect_region(tstate, visited, visited,
UNTRACK_TUPLES | UNTRACK_DICTS,
stats);
gc_list_set_space(pending, gcstate->visited_space);
gc_collect_region(tstate, pending, visited, stats);
gcstate->young.count = 0;
gcstate->old[0].count = 0;
gcstate->old[1].count = 0;
completed_cycle(gcstate);
gcstate->work_to_do = - gcstate->young.threshold * 2;
_PyGC_ClearAllFreeLists(tstate->interp);
validate_old(gcstate);
@ -1522,7 +1681,6 @@ static void
gc_collect_region(PyThreadState *tstate,
PyGC_Head *from,
PyGC_Head *to,
int untrack,
struct gc_collection_stats *stats)
{
PyGC_Head unreachable; /* non-problematic unreachable trash */
@ -1536,12 +1694,6 @@ gc_collect_region(PyThreadState *tstate,
gc_list_init(&unreachable);
deduce_unreachable(from, &unreachable);
validate_consistent_old_space(from);
if (untrack & UNTRACK_TUPLES) {
untrack_tuples(from);
}
if (untrack & UNTRACK_DICTS) {
untrack_dicts(from);
}
validate_consistent_old_space(to);
if (from != to) {
gc_list_merge(from, to);
@ -1761,9 +1913,10 @@ _PyGC_Freeze(PyInterpreterState *interp)
{
GCState *gcstate = &interp->gc;
/* The permanent_generation has its old space bit set to zero */
if (gcstate->visited_space) {
if (!gcstate->visited_space) {
gc_list_set_space(&gcstate->young.head, 0);
}
gc_list_validate_space(&gcstate->young.head, 0);
gc_list_merge(&gcstate->young.head, &gcstate->permanent_generation.head);
gcstate->young.count = 0;
PyGC_Head*old0 = &gcstate->old[0].head;

View File

@ -3,7 +3,7 @@
#include "pycore_brc.h" // struct _brc_thread_state
#include "pycore_ceval.h" // _Py_set_eval_breaker_bit()
#include "pycore_context.h"
#include "pycore_dict.h" // _PyDict_MaybeUntrack()
#include "pycore_dict.h" // _PyInlineValuesSize()
#include "pycore_freelist.h" // _PyObject_ClearFreeLists()
#include "pycore_initconfig.h"
#include "pycore_interp.h" // PyInterpreterState.gc
@ -493,13 +493,6 @@ update_refs(const mi_heap_t *heap, const mi_heap_area_t *area,
return true;
}
}
else if (PyDict_CheckExact(op)) {
_PyDict_MaybeUntrack(op);
if (!_PyObject_GC_IS_TRACKED(op)) {
gc_restore_refs(op);
return true;
}
}
}
// We repurpose ob_tid to compute "gc_refs", the number of external

View File

@ -7432,10 +7432,6 @@
DEOPT_IF(ep->me_key != name, STORE_ATTR);
PyObject *old_value = ep->me_value;
DEOPT_IF(old_value == NULL, STORE_ATTR);
/* Ensure dict is GC tracked if it needs to be */
if (!_PyObject_GC_IS_TRACKED(dict) && _PyObject_GC_MAY_BE_TRACKED(PyStackRef_AsPyObjectBorrow(value))) {
_PyObject_GC_TRACK(dict);
}
_PyFrame_SetStackPointer(frame, stack_pointer);
_PyDict_NotifyEvent(tstate->interp, PyDict_EVENT_MODIFIED, dict, name, PyStackRef_AsPyObjectBorrow(value));
stack_pointer = _PyFrame_GetStackPointer(frame);

View File

@ -230,6 +230,8 @@ print_gc_stats(FILE *out, GCStats *stats)
for (int i = 0; i < NUM_GENERATIONS; i++) {
fprintf(out, "GC[%d] collections: %" PRIu64 "\n", i, stats[i].collections);
fprintf(out, "GC[%d] object visits: %" PRIu64 "\n", i, stats[i].object_visits);
fprintf(out, "GC[%d] objects reachable from roots: %" PRIu64 "\n", i, stats[i].objects_transitively_reachable);
fprintf(out, "GC[%d] objects not reachable from roots: %" PRIu64 "\n", i, stats[i].objects_not_transitively_reachable);
fprintf(out, "GC[%d] objects collected: %" PRIu64 "\n", i, stats[i].objects_collected);
}
}

View File

@ -1118,6 +1118,8 @@ def gc_stats_section() -> Section:
Count(gen["collections"]),
Count(gen["objects collected"]),
Count(gen["object visits"]),
Count(gen["objects reachable from roots"]),
Count(gen["objects not reachable from roots"]),
)
for (i, gen) in enumerate(gc_stats)
]
@ -1127,7 +1129,8 @@ def gc_stats_section() -> Section:
"GC collections and effectiveness",
[
Table(
("Generation:", "Collections:", "Objects collected:", "Object visits:"),
("Generation:", "Collections:", "Objects collected:", "Object visits:",
"Reachable from roots:", "Not reachable from roots:"),
calc_gc_stats,
)
],