GH-126491: GC: Mark objects reachable from roots before doing cycle collection (GH-126502)

* Mark almost all reachable objects before doing collection phase * Add stats for objects marked * Visit new frames before each increment * Remove lazy dict tracking * Update docs * Clearer calculation of work to do.
2024-11-18 14:31:26 +00:00 · 2024-11-18 14:31:26 +00:00 · b0fcc2c47a
parent a1d9c8aa80
commit b0fcc2c47a
21 changed files with 332 additions and 330 deletions
--- a/Include/cpython/pystats.h
+++ b/Include/cpython/pystats.h
@ -99,6 +99,8 @@ typedef struct _gc_stats {
    uint64_t collections;
    uint64_t object_visits;
    uint64_t objects_collected;
+    uint64_t objects_transitively_reachable;
+    uint64_t objects_not_transitively_reachable;
 } GCStats;

 typedef struct _uop_stats {
--- a/Include/internal/pycore_dict.h
+++ b/Include/internal/pycore_dict.h
@ -43,8 +43,6 @@ extern int _PyDict_Next(

 extern int _PyDict_HasOnlyStringKeys(PyObject *mp);

-extern void _PyDict_MaybeUntrack(PyObject *mp);
-
 // Export for '_ctypes' shared extension
 PyAPI_FUNC(Py_ssize_t) _PyDict_SizeOf(PyDictObject *);

--- a/Include/internal/pycore_frame.h
+++ b/Include/internal/pycore_frame.h
@ -75,6 +75,7 @@ typedef struct _PyInterpreterFrame {
    _PyStackRef *stackpointer;
    uint16_t return_offset;  /* Only relevant during a function call */
    char owner;
+    char visited;
    /* Locals and stack */
    _PyStackRef localsplus[1];
 } _PyInterpreterFrame;
@ -207,6 +208,7 @@ _PyFrame_Initialize(
 #endif
    frame->return_offset = 0;
    frame->owner = FRAME_OWNED_BY_THREAD;
+    frame->visited = 0;

    for (int i = null_locals_from; i < code->co_nlocalsplus; i++) {
        frame->localsplus[i] = PyStackRef_NULL;
@ -389,6 +391,7 @@ _PyFrame_PushTrampolineUnchecked(PyThreadState *tstate, PyCodeObject *code, int
    frame->instr_ptr = _PyCode_CODE(code);
 #endif
    frame->owner = FRAME_OWNED_BY_THREAD;
+    frame->visited = 0;
    frame->return_offset = 0;

 #ifdef Py_GIL_DISABLED
--- a/Include/internal/pycore_gc.h
+++ b/Include/internal/pycore_gc.h
@ -10,11 +10,11 @@ extern "C" {

 /* GC information is stored BEFORE the object structure. */
 typedef struct {
-    // Pointer to next object in the list.
+    // Tagged pointer to next object in the list.
    // 0 means the object is not tracked
    uintptr_t _gc_next;

-    // Pointer to previous object in the list.
+    // Tagged pointer to previous object in the list.
    // Lowest two bits are used for flags documented later.
    uintptr_t _gc_prev;
 } PyGC_Head;
@ -302,6 +302,11 @@ struct gc_generation_stats {
    Py_ssize_t uncollectable;
 };

+enum _GCPhase {
+    GC_PHASE_MARK = 0,
+    GC_PHASE_COLLECT = 1
+};
+
 struct _gc_runtime_state {
    /* List of objects that still need to be cleaned up, singly linked
     * via their gc headers' gc_prev pointers.  */
@ -325,10 +330,12 @@ struct _gc_runtime_state {
    /* a list of callbacks to be invoked when collection is performed */
    PyObject *callbacks;

+    Py_ssize_t prior_heap_size;
    Py_ssize_t heap_size;
    Py_ssize_t work_to_do;
    /* Which of the old spaces is the visited space */
    int visited_space;
+    int phase;

 #ifdef Py_GIL_DISABLED
    /* This is the number of objects that survived the last full
--- a/Include/internal/pycore_object.h
+++ b/Include/internal/pycore_object.h
@ -466,8 +466,8 @@ static inline void _PyObject_GC_TRACK(
    PyGC_Head *last = (PyGC_Head*)(generation0->_gc_prev);
    _PyGCHead_SET_NEXT(last, gc);
    _PyGCHead_SET_PREV(gc, last);
-    /* Young objects will be moved into the visited space during GC, so set the bit here */
-    gc->_gc_next = ((uintptr_t)generation0) | (uintptr_t)interp->gc.visited_space;
+    uintptr_t not_visited = 1 ^ interp->gc.visited_space;
+    gc->_gc_next = ((uintptr_t)generation0) | not_visited;
    generation0->_gc_prev = (uintptr_t)gc;
 #endif
 }
--- a/Include/internal/pycore_runtime_init.h
+++ b/Include/internal/pycore_runtime_init.h
@ -134,6 +134,7 @@ extern PyTypeObject _PyExc_MemoryError;
                { .threshold = 0, }, \
            }, \
            .work_to_do = -5000, \
+            .phase = GC_PHASE_MARK, \
        }, \
        .qsbr = { \
            .wr_seq = QSBR_INITIAL, \
--- a/InternalDocs/garbage_collector.md
+++ b/InternalDocs/garbage_collector.md
@ -351,6 +351,7 @@ follows these steps in order:
   the reference counts fall to 0, triggering the destruction of all unreachable
   objects.

+
 Optimization: incremental collection
 ====================================

@ -484,6 +485,46 @@ specifically in a generation by calling `gc.collect(generation=NUM)`.
 ```


+Optimization: visiting reachable objects
+========================================
+
+An object cannot be garbage if it can be reached.
+
+To avoid having to identify reference cycles across the whole heap, we can
+reduce the amount of work done considerably by first moving most reachable objects
+to the `visited` space. Empirically, most reachable objects can be reached from a
+small set of global objects and local variables.
+This step does much less work per object, so reduces the time spent
+performing garbage collection by at least half.
+
+> [!NOTE]
+> Objects that are not determined to be reachable by this pass are not necessarily
+> unreachable. We still need to perform the main algorithm to determine which objects
+> are actually unreachable.
+
+We use the same technique of forming a transitive closure as the incremental
+collector does to find reachable objects, seeding the list with some global
+objects and the currently executing frames.
+
+This phase moves objects to the `visited` space, as follows:
+
+1. All objects directly referred to by any builtin class, the `sys` module, the `builtins`
+module and all objects directly referred to from stack frames are added to a working
+set of reachable objects.
+2. Until this working set is empty:
+   1. Pop an object from the set and move it to the `visited` space
+   2. For each object directly reachable from that object:
+      * If it is not already in `visited` space and it is a GC object,
+        add it to the working set
+
+
+Before each increment of collection is performed, the stacks are scanned
+to check for any new stack frames that have been created since the last
+increment. All objects directly referred to from those stack frames are
+added to the working set.
+Then the above algorithm is repeated, starting from step 2.
+
+
 Optimization: reusing fields to save memory
 ===========================================

@ -532,8 +573,8 @@ of `PyGC_Head` discussed in the `Memory layout and object structure`_ section:
  currently in.  Instead, when that's needed, ad hoc tricks (like the
  `NEXT_MASK_UNREACHABLE` flag) are employed.

-Optimization: delay tracking containers
-=======================================
+Optimization: delayed untracking of containers
+==============================================

 Certain types of containers cannot participate in a reference cycle, and so do
 not need to be tracked by the garbage collector. Untracking these objects
@ -548,8 +589,8 @@ a container:
 As a general rule, instances of atomic types aren't tracked and instances of
 non-atomic types (containers, user-defined objects...) are.  However, some
 type-specific optimizations can be present in order to suppress the garbage
-collector footprint of simple instances. Some examples of native types that
-benefit from delayed tracking:
+collector footprint of simple instances. Historically, both dictionaries and
+tuples were untracked during garbage collection. Now it is only tuples:

 - Tuples containing only immutable objects (integers, strings etc,
  and recursively, tuples of immutable objects) do not need to be tracked. The
@ -558,14 +599,8 @@ benefit from delayed tracking:
  tuples at creation time. Instead, all tuples except the empty tuple are tracked
  when created. During garbage collection it is determined whether any surviving
  tuples can be untracked. A tuple can be untracked if all of its contents are
-  already not tracked. Tuples are examined for untracking in all garbage collection
-  cycles. It may take more than one cycle to untrack a tuple.
-
- Dictionaries containing only immutable objects also do not need to be tracked.
-  Dictionaries are untracked when created. If a tracked item is inserted into a
-  dictionary (either as a key or value), the dictionary becomes tracked. During a
-  full garbage collection (all generations), the collector will untrack any dictionaries
-  whose contents are not tracked.
+  already not tracked. Tuples are examined for untracking when moved from the
+  young to the old generation.

 The garbage collector module provides the Python function `is_tracked(obj)`, which returns
 the current tracking status of the object. Subsequent garbage collections may change the
@ -578,11 +613,9 @@ tracking status of the object.
      False
      >>> gc.is_tracked([])
      True
-      >>> gc.is_tracked({})
+      >>> gc.is_tracked(("a", 1))
      False
      >>> gc.is_tracked({"a": 1})
-      False
-      >>> gc.is_tracked({"a": []})
      True
 ```

--- a/Lib/test/test_dict.py
+++ b/Lib/test/test_dict.py
@ -880,115 +880,6 @@ class DictTest(unittest.TestCase):
            gc.collect()
            self.assertIs(ref(), None, "Cycle was not collected")

-    def _not_tracked(self, t):
-        # Nested containers can take several collections to untrack
-        gc.collect()
-        gc.collect()
-        self.assertFalse(gc.is_tracked(t), t)
-
-    def _tracked(self, t):
-        self.assertTrue(gc.is_tracked(t), t)
-        gc.collect()
-        gc.collect()
-        self.assertTrue(gc.is_tracked(t), t)
-
-    def test_string_keys_can_track_values(self):
-        # Test that this doesn't leak.
-        for i in range(10):
-            d = {}
-            for j in range(10):
-                d[str(j)] = j
-            d["foo"] = d
-
-    @support.cpython_only
-    def test_track_literals(self):
-        # Test GC-optimization of dict literals
-        x, y, z, w = 1.5, "a", (1, None), []
-
-        self._not_tracked({})
-        self._not_tracked({x:(), y:x, z:1})
-        self._not_tracked({1: "a", "b": 2})
-        self._not_tracked({1: 2, (None, True, False, ()): int})
-        self._not_tracked({1: object()})
-
-        # Dicts with mutable elements are always tracked, even if those
-        # elements are not tracked right now.
-        self._tracked({1: []})
-        self._tracked({1: ([],)})
-        self._tracked({1: {}})
-        self._tracked({1: set()})
-
-    @support.cpython_only
-    def test_track_dynamic(self):
-        # Test GC-optimization of dynamically-created dicts
-        class MyObject(object):
-            pass
-        x, y, z, w, o = 1.5, "a", (1, object()), [], MyObject()
-
-        d = dict()
-        self._not_tracked(d)
-        d[1] = "a"
-        self._not_tracked(d)
-        d[y] = 2
-        self._not_tracked(d)
-        d[z] = 3
-        self._not_tracked(d)
-        self._not_tracked(d.copy())
-        d[4] = w
-        self._tracked(d)
-        self._tracked(d.copy())
-        d[4] = None
-        self._not_tracked(d)
-        self._not_tracked(d.copy())
-
-        # dd isn't tracked right now, but it may mutate and therefore d
-        # which contains it must be tracked.
-        d = dict()
-        dd = dict()
-        d[1] = dd
-        self._not_tracked(dd)
-        self._tracked(d)
-        dd[1] = d
-        self._tracked(dd)
-
-        d = dict.fromkeys([x, y, z])
-        self._not_tracked(d)
-        dd = dict()
-        dd.update(d)
-        self._not_tracked(dd)
-        d = dict.fromkeys([x, y, z, o])
-        self._tracked(d)
-        dd = dict()
-        dd.update(d)
-        self._tracked(dd)
-
-        d = dict(x=x, y=y, z=z)
-        self._not_tracked(d)
-        d = dict(x=x, y=y, z=z, w=w)
-        self._tracked(d)
-        d = dict()
-        d.update(x=x, y=y, z=z)
-        self._not_tracked(d)
-        d.update(w=w)
-        self._tracked(d)
-
-        d = dict([(x, y), (z, 1)])
-        self._not_tracked(d)
-        d = dict([(x, y), (z, w)])
-        self._tracked(d)
-        d = dict()
-        d.update([(x, y), (z, 1)])
-        self._not_tracked(d)
-        d.update([(x, y), (z, w)])
-        self._tracked(d)
-
-    @support.cpython_only
-    def test_track_subtypes(self):
-        # Dict subtypes are always tracked
-        class MyDict(dict):
-            pass
-        self._tracked(MyDict())
-
    def make_shared_key_dict(self, n):
        class C:
            pass
--- a/Lib/test/test_gc.py
+++ b/Lib/test/test_gc.py
@ -31,6 +31,11 @@ except ImportError:
        return C
    ContainerNoGC = None

+try:
+    import _testinternalcapi
+except ImportError:
+    _testinternalcapi = None
+
 ### Support code
 ###############################################################################

@ -1130,6 +1135,7 @@ class IncrementalGCTests(unittest.TestCase):
    def tearDown(self):
        gc.disable()

+    @unittest.skipIf(_testinternalcapi is None, "requires _testinternalcapi")
    @requires_gil_enabled("Free threading does not support incremental GC")
    # Use small increments to emulate longer running process in a shorter time
    @gc_threshold(200, 10)
@ -1155,32 +1161,18 @@ class IncrementalGCTests(unittest.TestCase):
            return head

        head = make_ll(1000)
-        count = 1000
-
-        # There will be some objects we aren't counting,
-        # e.g. the gc stats dicts. This test checks
-        # that the counts don't grow, so we try to
-        # correct for the uncounted objects
-        # This is just an estimate.
-        CORRECTION = 20

        enabled = gc.isenabled()
        gc.enable()
        olds = []
+        initial_heap_size = _testinternalcapi.get_heap_size()
        for i in range(20_000):
            newhead = make_ll(20)
-            count += 20
            newhead.surprise = head
            olds.append(newhead)
            if len(olds) == 20:
-                stats = gc.get_stats()
-                young = stats[0]
-                incremental = stats[1]
-                old = stats[2]
-                collected = young['collected'] + incremental['collected'] + old['collected']
-                count += CORRECTION
-                live = count - collected
-                self.assertLess(live, 25000)
+                new_objects = _testinternalcapi.get_heap_size() - initial_heap_size
+                self.assertLess(new_objects, 25_000)
                del olds[:]
        if not enabled:
            gc.disable()
@ -1322,7 +1314,8 @@ class GCCallbackTests(unittest.TestCase):
            from test.support import gc_collect, SuppressCrashReport

            a = [1, 2, 3]
-            b = [a]
+            b = [a, a]
+            a.append(b)

            # Avoid coredump when Py_FatalError() calls abort()
            SuppressCrashReport().__enter__()
@ -1332,6 +1325,8 @@ class GCCallbackTests(unittest.TestCase):
            # (to avoid deallocating it):
            import ctypes
            ctypes.pythonapi.Py_DecRef(ctypes.py_object(a))
+            del a
+            del b

            # The garbage collector should now have a fatal error
            # when it reaches the broken object
@ -1360,7 +1355,7 @@ class GCCallbackTests(unittest.TestCase):
        self.assertRegex(stderr,
            br'object type name: list')
        self.assertRegex(stderr,
-            br'object repr     : \[1, 2, 3\]')
+            br'object repr     : \[1, 2, 3, \[\[...\], \[...\]\]\]')


 class GCTogglingTests(unittest.TestCase):
--- a/Misc/NEWS.d/next/Core_and_Builtins/2024-11-06-15-22-34.gh-issue-126491.n9VyZc.rst
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-11-06-15-22-34.gh-issue-126491.n9VyZc.rst
@ -0,0 +1,4 @@
+Add a marking phase to the GC. All objects that can be transitively
+reached from builtin modules or the stacks are marked as reachable
+before cycle detection. This reduces the amount of work done by the
+GC by approximately half.
--- a/Modules/_testinternalcapi.c
+++ b/Modules/_testinternalcapi.c
@ -2077,6 +2077,12 @@ has_deferred_refcount(PyObject *self, PyObject *op)
 }


+static PyObject *
+get_heap_size(PyObject *self, PyObject *Py_UNUSED(ignored))
+{
+    return PyLong_FromInt64(PyInterpreterState_Get()->gc.heap_size);
+}
+
 static PyMethodDef module_functions[] = {
    {"get_configs", get_configs, METH_NOARGS},
    {"get_recursion_depth", get_recursion_depth, METH_NOARGS},
@ -2174,6 +2180,7 @@ static PyMethodDef module_functions[] = {
    {"get_static_builtin_types", get_static_builtin_types, METH_NOARGS},
    {"identify_type_slot_wrappers", identify_type_slot_wrappers, METH_NOARGS},
    {"has_deferred_refcount", has_deferred_refcount, METH_O},
+    {"get_heap_size", get_heap_size, METH_NOARGS},
    {NULL, NULL} /* sentinel */
 };

--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@ -883,6 +883,7 @@ new_dict(PyInterpreterState *interp,
    mp->ma_used = used;
    mp->_ma_watcher_tag = 0;
    ASSERT_CONSISTENT(mp);
+    _PyObject_GC_TRACK(mp);
    return (PyObject *)mp;
 }

@ -1578,64 +1579,6 @@ _PyDict_HasOnlyStringKeys(PyObject *dict)
    return 1;
 }

-#define MAINTAIN_TRACKING(mp, key, value) \
-    do { \
-        if (!_PyObject_GC_IS_TRACKED(mp)) { \
-            if (_PyObject_GC_MAY_BE_TRACKED(key) || \
-                _PyObject_GC_MAY_BE_TRACKED(value)) { \
-                _PyObject_GC_TRACK(mp); \
-            } \
-        } \
-    } while(0)
-
-void
-_PyDict_MaybeUntrack(PyObject *op)
-{
-    PyDictObject *mp;
-    PyObject *value;
-    Py_ssize_t i, numentries;
-
-    ASSERT_WORLD_STOPPED_OR_DICT_LOCKED(op);
-
-    if (!PyDict_CheckExact(op) || !_PyObject_GC_IS_TRACKED(op))
-        return;
-
-    mp = (PyDictObject *) op;
-    ASSERT_CONSISTENT(mp);
-    numentries = mp->ma_keys->dk_nentries;
-    if (_PyDict_HasSplitTable(mp)) {
-        for (i = 0; i < numentries; i++) {
-            if ((value = mp->ma_values->values[i]) == NULL)
-                continue;
-            if (_PyObject_GC_MAY_BE_TRACKED(value)) {
-                return;
-            }
-        }
-    }
-    else {
-        if (DK_IS_UNICODE(mp->ma_keys)) {
-            PyDictUnicodeEntry *ep0 = DK_UNICODE_ENTRIES(mp->ma_keys);
-            for (i = 0; i < numentries; i++) {
-                if ((value = ep0[i].me_value) == NULL)
-                    continue;
-                if (_PyObject_GC_MAY_BE_TRACKED(value))
-                    return;
-            }
-        }
-        else {
-            PyDictKeyEntry *ep0 = DK_ENTRIES(mp->ma_keys);
-            for (i = 0; i < numentries; i++) {
-                if ((value = ep0[i].me_value) == NULL)
-                    continue;
-                if (_PyObject_GC_MAY_BE_TRACKED(value) ||
-                    _PyObject_GC_MAY_BE_TRACKED(ep0[i].me_key))
-                    return;
-            }
-        }
-    }
-    _PyObject_GC_UNTRACK(op);
-}
-
 void
 _PyDict_EnablePerThreadRefcounting(PyObject *op)
 {
@ -1761,7 +1704,6 @@ insert_split_value(PyInterpreterState *interp, PyDictObject *mp, PyObject *key,
 {
    assert(PyUnicode_CheckExact(key));
    ASSERT_DICT_LOCKED(mp);
-    MAINTAIN_TRACKING(mp, key, value);
    PyObject *old_value = mp->ma_values->values[ix];
    if (old_value == NULL) {
        _PyDict_NotifyEvent(interp, PyDict_EVENT_ADDED, mp, key, value);
@ -1818,8 +1760,6 @@ insertdict(PyInterpreterState *interp, PyDictObject *mp,
    if (ix == DKIX_ERROR)
        goto Fail;

-    MAINTAIN_TRACKING(mp, key, value);
-
    if (ix == DKIX_EMPTY) {
        assert(!_PyDict_HasSplitTable(mp));
        /* Insert into new slot. */
@ -1878,8 +1818,6 @@ insert_to_emptydict(PyInterpreterState *interp, PyDictObject *mp,
    /* We don't decref Py_EMPTY_KEYS here because it is immortal. */
    assert(mp->ma_values == NULL);

-    MAINTAIN_TRACKING(mp, key, value);
-
    size_t hashpos = (size_t)hash & (PyDict_MINSIZE-1);
    dictkeys_set_index(newkeys, hashpos, 0);
    if (unicode) {
@ -3770,11 +3708,6 @@ dict_dict_merge(PyInterpreterState *interp, PyDictObject *mp, PyDictObject *othe
            STORE_USED(mp, other->ma_used);
            ASSERT_CONSISTENT(mp);

-            if (_PyObject_GC_IS_TRACKED(other) && !_PyObject_GC_IS_TRACKED(mp)) {
-                /* Maintain tracking. */
-                _PyObject_GC_TRACK(mp);
-            }
-
            return 0;
        }
    }
@ -4024,8 +3957,7 @@ copy_lock_held(PyObject *o)
        split_copy->ma_used = mp->ma_used;
        split_copy->_ma_watcher_tag = 0;
        dictkeys_incref(mp->ma_keys);
-        if (_PyObject_GC_IS_TRACKED(mp))
-            _PyObject_GC_TRACK(split_copy);
+        _PyObject_GC_TRACK(split_copy);
        return (PyObject *)split_copy;
    }

@ -4060,10 +3992,6 @@ copy_lock_held(PyObject *o)

        new->ma_used = mp->ma_used;
        ASSERT_CONSISTENT(new);
-        if (_PyObject_GC_IS_TRACKED(mp)) {
-            /* Maintain tracking. */
-            _PyObject_GC_TRACK(new);
-        }

        return (PyObject *)new;
    }
@ -4350,8 +4278,6 @@ dict_setdefault_ref_lock_held(PyObject *d, PyObject *key, PyObject *default_valu
                *result = NULL;
            }
        }
-
-        MAINTAIN_TRACKING(mp, key, value);
        STORE_USED(mp, mp->ma_used + 1);
        assert(mp->ma_keys->dk_usable >= 0);
        ASSERT_CONSISTENT(mp);
@ -4801,15 +4727,8 @@ dict_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
    d->ma_values = NULL;
    ASSERT_CONSISTENT(d);

-    if (type != &PyDict_Type) {
-        // Don't track if a subclass tp_alloc is PyType_GenericAlloc()
-        if (!_PyObject_GC_IS_TRACKED(d)) {
-            _PyObject_GC_TRACK(d);
-        }
-    }
-    else {
-        // _PyType_AllocNoTrack() does not track the created object
-        assert(!_PyObject_GC_IS_TRACKED(d));
+    if (!_PyObject_GC_IS_TRACKED(d)) {
+        _PyObject_GC_TRACK(d);
    }
    return self;
 }
@ -6746,19 +6665,14 @@ make_dict_from_instance_attributes(PyInterpreterState *interp,
 {
    dictkeys_incref(keys);
    Py_ssize_t used = 0;
-    Py_ssize_t track = 0;
    size_t size = shared_keys_usable_size(keys);
    for (size_t i = 0; i < size; i++) {
        PyObject *val = values->values[i];
        if (val != NULL) {
            used += 1;
-            track += _PyObject_GC_MAY_BE_TRACKED(val);
        }
    }
    PyDictObject *res = (PyDictObject *)new_dict(interp, keys, values, used, 0);
-    if (track && res) {
-        _PyObject_GC_TRACK(res);
-    }
    return res;
 }

@ -7204,6 +7118,7 @@ _PyObject_SetManagedDict(PyObject *obj, PyObject *new_dict)
        // since we locked it.
        dict = _PyObject_ManagedDictPointer(obj)->dict;
        err = _PyDict_DetachFromObject(dict, obj);
+        assert(err == 0 || new_dict == NULL);
        if (err == 0) {
            FT_ATOMIC_STORE_PTR(_PyObject_ManagedDictPointer(obj)->dict,
                                (PyDictObject *)Py_XNewRef(new_dict));
@ -7236,7 +7151,21 @@ void
 PyObject_ClearManagedDict(PyObject *obj)
 {
    if (_PyObject_SetManagedDict(obj, NULL) < 0) {
+        /* Must be out of memory */
+        assert(PyErr_Occurred() == PyExc_MemoryError);
        PyErr_WriteUnraisable(NULL);
+        /* Clear the dict */
+        PyDictObject *dict = _PyObject_GetManagedDict(obj);
+        Py_BEGIN_CRITICAL_SECTION2(dict, obj);
+        dict = _PyObject_ManagedDictPointer(obj)->dict;
+        PyInterpreterState *interp = _PyInterpreterState_GET();
+        PyDictKeysObject *oldkeys = dict->ma_keys;
+        set_keys(dict, Py_EMPTY_KEYS);
+        dict->ma_values = NULL;
+        dictkeys_decref(interp, oldkeys, IS_DICT_SHARED(dict));
+        STORE_USED(dict, 0);
+        set_dict_inline_values(obj, NULL);
+        Py_END_CRITICAL_SECTION2();
    }
 }

@ -7261,12 +7190,6 @@ _PyDict_DetachFromObject(PyDictObject *mp, PyObject *obj)
    PyDictValues *values = copy_values(mp->ma_values);

    if (values == NULL) {
-        /* Out of memory. Clear the dict */
-        PyInterpreterState *interp = _PyInterpreterState_GET();
-        PyDictKeysObject *oldkeys = mp->ma_keys;
-        set_keys(mp, Py_EMPTY_KEYS);
-        dictkeys_decref(interp, oldkeys, IS_DICT_SHARED(mp));
-        STORE_USED(mp, 0);
        PyErr_NoMemory();
        return -1;
    }
--- a/Objects/moduleobject.c
+++ b/Objects/moduleobject.c
@ -107,8 +107,6 @@ static void
 track_module(PyModuleObject *m)
 {
    _PyDict_EnablePerThreadRefcounting(m->md_dict);
-    PyObject_GC_Track(m->md_dict);
-
    _PyObject_SetDeferredRefcount((PyObject *)m);
    PyObject_GC_Track(m);
 }
--- a/Python/bytecodes.c
+++ b/Python/bytecodes.c
@ -2340,10 +2340,6 @@ dummy_func(
            DEOPT_IF(ep->me_key != name);
            PyObject *old_value = ep->me_value;
            DEOPT_IF(old_value == NULL);
-            /* Ensure dict is GC tracked if it needs to be */
-            if (!_PyObject_GC_IS_TRACKED(dict) && _PyObject_GC_MAY_BE_TRACKED(PyStackRef_AsPyObjectBorrow(value))) {
-                _PyObject_GC_TRACK(dict);
-            }
            _PyDict_NotifyEvent(tstate->interp, PyDict_EVENT_MODIFIED, dict, name, PyStackRef_AsPyObjectBorrow(value));
            ep->me_value = PyStackRef_AsPyObjectSteal(value);
            // old_value should be DECREFed after GC track checking is done, if not, it could raise a segmentation fault,
--- a/Python/ceval.c
+++ b/Python/ceval.c
@ -821,6 +821,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
    entry_frame.instr_ptr = (_Py_CODEUNIT *)_Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS + 1;
    entry_frame.stackpointer = entry_frame.localsplus;
    entry_frame.owner = FRAME_OWNED_BY_CSTACK;
+    entry_frame.visited = 0;
    entry_frame.return_offset = 0;
    /* Push frame */
    entry_frame.previous = tstate->current_frame;
--- a/Python/executor_cases.c.h
+++ b/Python/executor_cases.c.h
@ -2914,10 +2914,6 @@
                UOP_STAT_INC(uopcode, miss);
                JUMP_TO_JUMP_TARGET();
            }
-            /* Ensure dict is GC tracked if it needs to be */
-            if (!_PyObject_GC_IS_TRACKED(dict) && _PyObject_GC_MAY_BE_TRACKED(PyStackRef_AsPyObjectBorrow(value))) {
-                _PyObject_GC_TRACK(dict);
-            }
            _PyFrame_SetStackPointer(frame, stack_pointer);
            _PyDict_NotifyEvent(tstate->interp, PyDict_EVENT_MODIFIED, dict, name, PyStackRef_AsPyObjectBorrow(value));
            stack_pointer = _PyFrame_GetStackPointer(frame);
--- a/Python/gc.c
+++ b/Python/gc.c
@ -5,7 +5,7 @@
 #include "Python.h"
 #include "pycore_ceval.h"         // _Py_set_eval_breaker_bit()
 #include "pycore_context.h"
-#include "pycore_dict.h"          // _PyDict_MaybeUntrack()
+#include "pycore_dict.h"          // _PyInlineValuesSize()
 #include "pycore_initconfig.h"
 #include "pycore_interp.h"        // PyInterpreterState.gc
 #include "pycore_object.h"
@ -185,6 +185,7 @@ _PyGC_Init(PyInterpreterState *interp)
    if (gcstate->callbacks == NULL) {
        return _PyStatus_NO_MEMORY();
    }
+    gcstate->prior_heap_size = 0;
    gcstate->heap_size = 0;

    return _PyStatus_OK();
@ -747,21 +748,6 @@ untrack_tuples(PyGC_Head *head)
    }
 }

-/* Try to untrack all currently tracked dictionaries */
-static void
-untrack_dicts(PyGC_Head *head)
-{
-    PyGC_Head *next, *gc = GC_NEXT(head);
-    while (gc != head) {
-        PyObject *op = FROM_GC(gc);
-        next = GC_NEXT(gc);
-        if (PyDict_CheckExact(op)) {
-            _PyDict_MaybeUntrack(op);
-        }
-        gc = next;
-    }
-}
-
 /* Return true if object has a pre-PEP 442 finalization method. */
 static int
 has_legacy_finalizer(PyObject *op)
@ -1258,15 +1244,10 @@ handle_resurrected_objects(PyGC_Head *unreachable, PyGC_Head* still_unreachable,
    gc_list_merge(resurrected, old_generation);
 }

-
-#define UNTRACK_TUPLES 1
-#define UNTRACK_DICTS 2
-
 static void
 gc_collect_region(PyThreadState *tstate,
                  PyGC_Head *from,
                  PyGC_Head *to,
-                  int untrack,
                  struct gc_collection_stats *stats);

 static inline Py_ssize_t
@ -1315,6 +1296,7 @@ gc_collect_young(PyThreadState *tstate,
    GCState *gcstate = &tstate->interp->gc;
    PyGC_Head *young = &gcstate->young.head;
    PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head;
+    untrack_tuples(&gcstate->young.head);
    GC_STAT_ADD(0, collections, 1);
 #ifdef Py_STATS
    {
@ -1328,7 +1310,8 @@ gc_collect_young(PyThreadState *tstate,

    PyGC_Head survivors;
    gc_list_init(&survivors);
-    gc_collect_region(tstate, young, &survivors, UNTRACK_TUPLES, stats);
+    gc_list_set_space(young, gcstate->visited_space);
+    gc_collect_region(tstate, young, &survivors, stats);
    Py_ssize_t survivor_count = 0;
    if (gcstate->visited_space) {
        /* objects in visited space have bit set, so we set it here */
@ -1343,16 +1326,11 @@ gc_collect_young(PyThreadState *tstate,
            survivor_count++;
        }
    }
-    (void)survivor_count;  // Silence compiler warning
    gc_list_merge(&survivors, visited);
    validate_old(gcstate);
    gcstate->young.count = 0;
    gcstate->old[gcstate->visited_space].count++;
-    Py_ssize_t scale_factor = gcstate->old[0].threshold;
-    if (scale_factor < 1) {
-        scale_factor = 1;
-    }
-    gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor;
+    gcstate->work_to_do += survivor_count * 4;
    add_stats(gcstate, 0, stats);
 }

@ -1368,15 +1346,15 @@ IS_IN_VISITED(PyGC_Head *gc, int visited_space)
 struct container_and_flag {
    PyGC_Head *container;
    int visited_space;
-    uintptr_t size;
+    Py_ssize_t size;
 };

 /* A traversal callback for adding to container) */
 static int
 visit_add_to_container(PyObject *op, void *arg)
 {
-    OBJECT_STAT_INC(object_visits);
    struct container_and_flag *cf = (struct container_and_flag *)arg;
+    OBJECT_STAT_INC(object_visits);
    int visited = cf->visited_space;
    assert(visited == get_gc_state()->visited_space);
    if (!_Py_IsImmortal(op) && _PyObject_IS_GC(op)) {
@ -1391,10 +1369,9 @@ visit_add_to_container(PyObject *op, void *arg)
    return 0;
 }

-static uintptr_t
+static Py_ssize_t
 expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCState *gcstate)
 {
-    validate_list(container, collecting_clear_unreachable_clear);
    struct container_and_flag arg = {
        .container = container,
        .visited_space = gcstate->visited_space,
@ -1406,6 +1383,7 @@ expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCStat
         * have been marked as visited */
        assert(IS_IN_VISITED(gc, gcstate->visited_space));
        PyObject *op = FROM_GC(gc);
+        assert(_PyObject_GC_IS_TRACKED(op));
        if (_Py_IsImmortal(op)) {
            PyGC_Head *next = GC_NEXT(gc);
            gc_list_move(gc, &get_gc_state()->permanent_generation.head);
@ -1425,20 +1403,187 @@ expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCStat
 static void
 completed_cycle(GCState *gcstate)
 {
-#ifdef Py_DEBUG
-    PyGC_Head *not_visited = &gcstate->old[gcstate->visited_space^1].head;
-    assert(gc_list_is_empty(not_visited));
-#endif
-    gcstate->visited_space = flip_old_space(gcstate->visited_space);
+    assert(gc_list_is_empty(&gcstate->old[gcstate->visited_space^1].head));
+    int not_visited = gcstate->visited_space;
+    gcstate->visited_space = flip_old_space(not_visited);
    /* Make sure all young objects have old space bit set correctly */
    PyGC_Head *young = &gcstate->young.head;
    PyGC_Head *gc = GC_NEXT(young);
    while (gc != young) {
        PyGC_Head *next = GC_NEXT(gc);
-        gc_set_old_space(gc, gcstate->visited_space);
+        gc_set_old_space(gc, not_visited);
        gc = next;
    }
    gcstate->work_to_do = 0;
+    gcstate->phase = GC_PHASE_MARK;
+}
+
+static Py_ssize_t
+move_to_reachable(PyObject *op, PyGC_Head *reachable, int visited_space)
+{
+    if (op != NULL && !_Py_IsImmortal(op) && _PyObject_IS_GC(op)) {
+        PyGC_Head *gc = AS_GC(op);
+        if (_PyObject_GC_IS_TRACKED(op) &&
+            gc_old_space(gc) != visited_space) {
+            gc_flip_old_space(gc);
+            gc_list_move(gc, reachable);
+            return 1;
+        }
+    }
+    return 0;
+}
+
+static Py_ssize_t
+mark_all_reachable(PyGC_Head *reachable, PyGC_Head *visited, int visited_space)
+{
+    // Transitively traverse all objects from reachable, until empty
+    struct container_and_flag arg = {
+        .container = reachable,
+        .visited_space = visited_space,
+        .size = 0
+    };
+    while (!gc_list_is_empty(reachable)) {
+        PyGC_Head *gc = _PyGCHead_NEXT(reachable);
+        assert(gc_old_space(gc) == visited_space);
+        gc_list_move(gc, visited);
+        PyObject *op = FROM_GC(gc);
+        traverseproc traverse = Py_TYPE(op)->tp_traverse;
+        (void) traverse(op,
+                        visit_add_to_container,
+                        &arg);
+    }
+    gc_list_validate_space(visited, visited_space);
+    return arg.size;
+}
+
+static Py_ssize_t
+mark_global_roots(PyInterpreterState *interp, PyGC_Head *visited, int visited_space)
+{
+    PyGC_Head reachable;
+    gc_list_init(&reachable);
+    Py_ssize_t objects_marked = 0;
+    objects_marked += move_to_reachable(interp->sysdict, &reachable, visited_space);
+    objects_marked += move_to_reachable(interp->builtins, &reachable, visited_space);
+    objects_marked += move_to_reachable(interp->dict, &reachable, visited_space);
+    struct types_state *types = &interp->types;
+    for (int i = 0; i < _Py_MAX_MANAGED_STATIC_BUILTIN_TYPES; i++) {
+        objects_marked += move_to_reachable(types->builtins.initialized[i].tp_dict, &reachable, visited_space);
+        objects_marked += move_to_reachable(types->builtins.initialized[i].tp_subclasses, &reachable, visited_space);
+    }
+    for (int i = 0; i < _Py_MAX_MANAGED_STATIC_EXT_TYPES; i++) {
+        objects_marked += move_to_reachable(types->for_extensions.initialized[i].tp_dict, &reachable, visited_space);
+        objects_marked += move_to_reachable(types->for_extensions.initialized[i].tp_subclasses, &reachable, visited_space);
+    }
+    objects_marked += mark_all_reachable(&reachable, visited, visited_space);
+    assert(gc_list_is_empty(&reachable));
+    return objects_marked;
+}
+
+static Py_ssize_t
+mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, bool start)
+{
+    PyGC_Head reachable;
+    gc_list_init(&reachable);
+    Py_ssize_t objects_marked = 0;
+    // Move all objects on stacks to reachable
+    _PyRuntimeState *runtime = &_PyRuntime;
+    HEAD_LOCK(runtime);
+    PyThreadState* ts = PyInterpreterState_ThreadHead(interp);
+    HEAD_UNLOCK(runtime);
+    while (ts) {
+        _PyInterpreterFrame *frame = ts->current_frame;
+        while (frame) {
+            if (frame->owner == FRAME_OWNED_BY_CSTACK) {
+                frame = frame->previous;
+                continue;
+            }
+            _PyStackRef *locals = frame->localsplus;
+            _PyStackRef *sp = frame->stackpointer;
+            objects_marked += move_to_reachable(frame->f_locals, &reachable, visited_space);
+            PyObject *func = PyStackRef_AsPyObjectBorrow(frame->f_funcobj);
+            objects_marked += move_to_reachable(func, &reachable, visited_space);
+            while (sp > locals) {
+                sp--;
+                if (PyStackRef_IsNull(*sp)) {
+                    continue;
+                }
+                PyObject *op = PyStackRef_AsPyObjectBorrow(*sp);
+                if (!_Py_IsImmortal(op) && _PyObject_IS_GC(op)) {
+                    PyGC_Head *gc = AS_GC(op);
+                    if (_PyObject_GC_IS_TRACKED(op) &&
+                        gc_old_space(gc) != visited_space) {
+                        gc_flip_old_space(gc);
+                        objects_marked++;
+                        gc_list_move(gc, &reachable);
+                    }
+                }
+            }
+            if (!start && frame->visited) {
+                // If this frame has already been visited, then the lower frames
+                // will have already been visited and will not have changed
+                break;
+            }
+            frame->visited = 1;
+            frame = frame->previous;
+        }
+        HEAD_LOCK(runtime);
+        ts = PyThreadState_Next(ts);
+        HEAD_UNLOCK(runtime);
+    }
+    objects_marked += mark_all_reachable(&reachable, visited, visited_space);
+    assert(gc_list_is_empty(&reachable));
+    return objects_marked;
+}
+
+static Py_ssize_t
+mark_at_start(PyThreadState *tstate)
+{
+    // TO DO -- Make this incremental
+    GCState *gcstate = &tstate->interp->gc;
+    validate_old(gcstate);
+    PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head;
+    Py_ssize_t objects_marked = mark_global_roots(tstate->interp, visited, gcstate->visited_space);
+    objects_marked += mark_stacks(tstate->interp, visited, gcstate->visited_space, true);
+    gcstate->work_to_do -= objects_marked;
+    gcstate->phase = GC_PHASE_COLLECT;
+    return objects_marked;
+}
+
+static Py_ssize_t
+assess_work_to_do(GCState *gcstate)
+{
+    /* The amount of work we want to do depends on three things.
+     * 1. The number of new objects created
+     * 2. The growth in heap size since the last collection
+     * 3. The heap size (up to the number of new objects, to avoid quadratic effects)
+     *
+     * For a steady state heap, the amount of work to do is three times the number
+     * of new objects added to the heap. This ensures that we stay ahead in the
+     * worst case of all new objects being garbage.
+     *
+     * This could be improved by tracking survival rates, but it is still a
+     * large improvement on the non-marking approach.
+     */
+    Py_ssize_t scale_factor = gcstate->old[0].threshold;
+    if (scale_factor < 2) {
+        scale_factor = 2;
+    }
+    Py_ssize_t new_objects = gcstate->young.count;
+    Py_ssize_t growth = gcstate->heap_size - gcstate->prior_heap_size;
+    if (growth < 0) {
+        growth = 0;
+    }
+    if (gcstate->heap_size < new_objects * scale_factor) {
+        // Small heap: ignore growth
+        growth = 0;
+    }
+    Py_ssize_t heap_fraction = gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor;
+    if (heap_fraction > new_objects) {
+        heap_fraction = new_objects;
+    }
+    gcstate->young.count = 0;
+    gcstate->prior_heap_size = gcstate->heap_size;
+    return new_objects*3/2 + growth*2 + heap_fraction*3/2;
 }

 static void
@ -1446,16 +1591,24 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats)
 {
    GC_STAT_ADD(1, collections, 1);
    GCState *gcstate = &tstate->interp->gc;
+
+    gcstate->work_to_do += assess_work_to_do(gcstate);
+    untrack_tuples(&gcstate->young.head);
+    if (gcstate->phase == GC_PHASE_MARK) {
+        Py_ssize_t objects_marked = mark_at_start(tstate);
+        GC_STAT_ADD(1, objects_transitively_reachable, objects_marked);
+        gcstate->work_to_do -= objects_marked;
+        return;
+    }
    PyGC_Head *not_visited = &gcstate->old[gcstate->visited_space^1].head;
    PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head;
    PyGC_Head increment;
    gc_list_init(&increment);
-    Py_ssize_t scale_factor = gcstate->old[0].threshold;
-    if (scale_factor < 1) {
-        scale_factor = 1;
-    }
+    Py_ssize_t objects_marked = mark_stacks(tstate->interp, visited, gcstate->visited_space, false);
+    GC_STAT_ADD(1, objects_transitively_reachable, objects_marked);
+    gcstate->work_to_do -= objects_marked;
+    gc_list_set_space(&gcstate->young.head, gcstate->visited_space);
    gc_list_merge(&gcstate->young.head, &increment);
-    gcstate->young.count = 0;
    gc_list_validate_space(&increment, gcstate->visited_space);
    Py_ssize_t increment_size = 0;
    while (increment_size < gcstate->work_to_do) {
@ -1465,17 +1618,18 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats)
        PyGC_Head *gc = _PyGCHead_NEXT(not_visited);
        gc_list_move(gc, &increment);
        increment_size++;
+        assert(!_Py_IsImmortal(FROM_GC(gc)));
        gc_set_old_space(gc, gcstate->visited_space);
        increment_size += expand_region_transitively_reachable(&increment, gc, gcstate);
    }
+    GC_STAT_ADD(1, objects_not_transitively_reachable, increment_size);
    gc_list_validate_space(&increment, gcstate->visited_space);
    PyGC_Head survivors;
    gc_list_init(&survivors);
-    gc_collect_region(tstate, &increment, &survivors, UNTRACK_TUPLES, stats);
+    gc_collect_region(tstate, &increment, &survivors, stats);
    gc_list_validate_space(&survivors, gcstate->visited_space);
    gc_list_merge(&survivors, visited);
    assert(gc_list_is_empty(&increment));
-    gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor;
    gcstate->work_to_do -= increment_size;

    validate_old(gcstate);
@ -1496,20 +1650,25 @@ gc_collect_full(PyThreadState *tstate,
    PyGC_Head *young = &gcstate->young.head;
    PyGC_Head *pending = &gcstate->old[gcstate->visited_space^1].head;
    PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head;
-    /* merge all generations into visited */
-    gc_list_validate_space(young, gcstate->visited_space);
-    gc_list_set_space(pending, gcstate->visited_space);
+    untrack_tuples(&gcstate->young.head);
+    /* merge all generations into pending */
+    gc_list_validate_space(young, 1-gcstate->visited_space);
    gc_list_merge(young, pending);
+    gc_list_set_space(visited, 1-gcstate->visited_space);
+    gc_list_merge(visited, pending);
+    /* Mark reachable */
+    Py_ssize_t reachable = mark_global_roots(tstate->interp, visited, gcstate->visited_space);
+    reachable += mark_stacks(tstate->interp, visited, gcstate->visited_space, true);
+    (void)reachable;
+    GC_STAT_ADD(2, objects_transitively_reachable, reachable);
+    GC_STAT_ADD(2, objects_not_transitively_reachable, gc_list_size(pending));
    gcstate->young.count = 0;
-    gc_list_merge(pending, visited);
-
-    gc_collect_region(tstate, visited, visited,
-                      UNTRACK_TUPLES | UNTRACK_DICTS,
-                      stats);
+    gc_list_set_space(pending, gcstate->visited_space);
+    gc_collect_region(tstate, pending, visited, stats);
    gcstate->young.count = 0;
    gcstate->old[0].count = 0;
    gcstate->old[1].count = 0;
-
+    completed_cycle(gcstate);
    gcstate->work_to_do = - gcstate->young.threshold * 2;
    _PyGC_ClearAllFreeLists(tstate->interp);
    validate_old(gcstate);
@ -1522,7 +1681,6 @@ static void
 gc_collect_region(PyThreadState *tstate,
                  PyGC_Head *from,
                  PyGC_Head *to,
-                  int untrack,
                  struct gc_collection_stats *stats)
 {
    PyGC_Head unreachable; /* non-problematic unreachable trash */
@ -1536,12 +1694,6 @@ gc_collect_region(PyThreadState *tstate,
    gc_list_init(&unreachable);
    deduce_unreachable(from, &unreachable);
    validate_consistent_old_space(from);
-    if (untrack & UNTRACK_TUPLES) {
-        untrack_tuples(from);
-    }
-    if (untrack & UNTRACK_DICTS) {
-        untrack_dicts(from);
-    }
    validate_consistent_old_space(to);
    if (from != to) {
        gc_list_merge(from, to);
@ -1761,9 +1913,10 @@ _PyGC_Freeze(PyInterpreterState *interp)
 {
    GCState *gcstate = &interp->gc;
    /* The permanent_generation has its old space bit set to zero */
-    if (gcstate->visited_space) {
+    if (!gcstate->visited_space) {
        gc_list_set_space(&gcstate->young.head, 0);
    }
+    gc_list_validate_space(&gcstate->young.head, 0);
    gc_list_merge(&gcstate->young.head, &gcstate->permanent_generation.head);
    gcstate->young.count = 0;
    PyGC_Head*old0 = &gcstate->old[0].head;
--- a/Python/gc_free_threading.c
+++ b/Python/gc_free_threading.c
@ -3,7 +3,7 @@
 #include "pycore_brc.h"           // struct _brc_thread_state
 #include "pycore_ceval.h"         // _Py_set_eval_breaker_bit()
 #include "pycore_context.h"
-#include "pycore_dict.h"          // _PyDict_MaybeUntrack()
+#include "pycore_dict.h"          // _PyInlineValuesSize()
 #include "pycore_freelist.h"      // _PyObject_ClearFreeLists()
 #include "pycore_initconfig.h"
 #include "pycore_interp.h"        // PyInterpreterState.gc
@ -493,13 +493,6 @@ update_refs(const mi_heap_t *heap, const mi_heap_area_t *area,
                return true;
            }
        }
-        else if (PyDict_CheckExact(op)) {
-            _PyDict_MaybeUntrack(op);
-            if (!_PyObject_GC_IS_TRACKED(op)) {
-                gc_restore_refs(op);
-                return true;
-            }
-        }
    }

    // We repurpose ob_tid to compute "gc_refs", the number of external
--- a/Python/generated_cases.c.h
+++ b/Python/generated_cases.c.h
@ -7432,10 +7432,6 @@
                DEOPT_IF(ep->me_key != name, STORE_ATTR);
                PyObject *old_value = ep->me_value;
                DEOPT_IF(old_value == NULL, STORE_ATTR);
-                /* Ensure dict is GC tracked if it needs to be */
-                if (!_PyObject_GC_IS_TRACKED(dict) && _PyObject_GC_MAY_BE_TRACKED(PyStackRef_AsPyObjectBorrow(value))) {
-                    _PyObject_GC_TRACK(dict);
-                }
                _PyFrame_SetStackPointer(frame, stack_pointer);
                _PyDict_NotifyEvent(tstate->interp, PyDict_EVENT_MODIFIED, dict, name, PyStackRef_AsPyObjectBorrow(value));
                stack_pointer = _PyFrame_GetStackPointer(frame);
--- a/Python/specialize.c
+++ b/Python/specialize.c
@ -230,6 +230,8 @@ print_gc_stats(FILE *out, GCStats *stats)
    for (int i = 0; i < NUM_GENERATIONS; i++) {
        fprintf(out, "GC[%d] collections: %" PRIu64 "\n", i, stats[i].collections);
        fprintf(out, "GC[%d] object visits: %" PRIu64 "\n", i, stats[i].object_visits);
+        fprintf(out, "GC[%d] objects reachable from roots: %" PRIu64 "\n", i, stats[i].objects_transitively_reachable);
+        fprintf(out, "GC[%d] objects not reachable from roots: %" PRIu64 "\n", i, stats[i].objects_not_transitively_reachable);
        fprintf(out, "GC[%d] objects collected: %" PRIu64 "\n", i, stats[i].objects_collected);
    }
 }
--- a/Tools/scripts/summarize_stats.py
+++ b/Tools/scripts/summarize_stats.py
@ -1118,6 +1118,8 @@ def gc_stats_section() -> Section:
                Count(gen["collections"]),
                Count(gen["objects collected"]),
                Count(gen["object visits"]),
+                Count(gen["objects reachable from roots"]),
+                Count(gen["objects not reachable from roots"]),
            )
            for (i, gen) in enumerate(gc_stats)
        ]
@ -1127,7 +1129,8 @@ def gc_stats_section() -> Section:
        "GC collections and effectiveness",
        [
            Table(
-                ("Generation:", "Collections:", "Objects collected:", "Object visits:"),
+                ("Generation:", "Collections:", "Objects collected:", "Object visits:",
+                 "Reachable from roots:", "Not reachable from roots:"),
                calc_gc_stats,
            )
        ],