From 587d4802034749e2aace9c00b00bd73eccdae1e7 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Thu, 1 Feb 2024 15:29:19 -0500 Subject: [PATCH] gh-112529: Remove PyGC_Head from object pre-header in free-threaded build (#114564) * gh-112529: Remove PyGC_Head from object pre-header in free-threaded build This avoids allocating space for PyGC_Head in the free-threaded build. The GC implementation for free-threaded CPython does not use the PyGC_Head structure. * The trashcan mechanism uses the `ob_tid` field instead of `_gc_prev` in the free-threaded build. * The GDB libpython.py file now determines the offset of the managed dict field based on whether the running process is a free-threaded build. Those are identified by the `ob_ref_local` field in PyObject. * Fixes `_PySys_GetSizeOf()` which incorrectly incorrectly included the size of `PyGC_Head` in the size of static `PyTypeObject`. --- Include/internal/pycore_object.h | 27 ++++++++++++------- Include/object.h | 5 ++-- Lib/test/test_sys.py | 5 ++-- ...-01-25-18-50-49.gh-issue-112529.IbbApA.rst | 4 +++ Modules/_testinternalcapi.c | 12 ++++++++- Objects/object.c | 13 +++++++-- Python/gc_free_threading.c | 21 +++++++++++---- Python/sysmodule.c | 10 ++++++- Tools/gdb/libpython.py | 15 ++++++++--- 9 files changed, 86 insertions(+), 26 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-01-25-18-50-49.gh-issue-112529.IbbApA.rst diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index e32ea2f5289..34a83ea228e 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -315,16 +315,15 @@ static inline void _PyObject_GC_TRACK( _PyObject_ASSERT_FROM(op, !_PyObject_GC_IS_TRACKED(op), "object already tracked by the garbage collector", filename, lineno, __func__); - +#ifdef Py_GIL_DISABLED + op->ob_gc_bits |= _PyGC_BITS_TRACKED; +#else PyGC_Head *gc = _Py_AS_GC(op); _PyObject_ASSERT_FROM(op, (gc->_gc_prev & _PyGC_PREV_MASK_COLLECTING) == 0, "object is in generation which is garbage collected", filename, lineno, __func__); -#ifdef Py_GIL_DISABLED - op->ob_gc_bits |= _PyGC_BITS_TRACKED; -#else PyInterpreterState *interp = _PyInterpreterState_GET(); PyGC_Head *generation0 = interp->gc.generation0; PyGC_Head *last = (PyGC_Head*)(generation0->_gc_prev); @@ -594,8 +593,12 @@ _PyObject_IS_GC(PyObject *obj) static inline size_t _PyType_PreHeaderSize(PyTypeObject *tp) { - return _PyType_IS_GC(tp) * sizeof(PyGC_Head) + - _PyType_HasFeature(tp, Py_TPFLAGS_PREHEADER) * 2 * sizeof(PyObject *); + return ( +#ifndef Py_GIL_DISABLED + _PyType_IS_GC(tp) * sizeof(PyGC_Head) + +#endif + _PyType_HasFeature(tp, Py_TPFLAGS_PREHEADER) * 2 * sizeof(PyObject *) + ); } void _PyObject_GC_Link(PyObject *op); @@ -625,6 +628,14 @@ extern int _PyObject_StoreInstanceAttribute(PyObject *obj, PyDictValues *values, PyObject * _PyObject_GetInstanceAttribute(PyObject *obj, PyDictValues *values, PyObject *name); +#ifdef Py_GIL_DISABLED +# define MANAGED_DICT_OFFSET (((Py_ssize_t)sizeof(PyObject *))*-1) +# define MANAGED_WEAKREF_OFFSET (((Py_ssize_t)sizeof(PyObject *))*-2) +#else +# define MANAGED_DICT_OFFSET (((Py_ssize_t)sizeof(PyObject *))*-3) +# define MANAGED_WEAKREF_OFFSET (((Py_ssize_t)sizeof(PyObject *))*-4) +#endif + typedef union { PyObject *dict; /* Use a char* to generate a warning if directly assigning a PyDictValues */ @@ -635,7 +646,7 @@ static inline PyDictOrValues * _PyObject_DictOrValuesPointer(PyObject *obj) { assert(Py_TYPE(obj)->tp_flags & Py_TPFLAGS_MANAGED_DICT); - return ((PyDictOrValues *)obj)-3; + return (PyDictOrValues *)((char *)obj + MANAGED_DICT_OFFSET); } static inline int @@ -664,8 +675,6 @@ _PyDictOrValues_SetValues(PyDictOrValues *ptr, PyDictValues *values) ptr->values = ((char *)values) - 1; } -#define MANAGED_WEAKREF_OFFSET (((Py_ssize_t)sizeof(PyObject *))*-4) - extern PyObject ** _PyObject_ComputedDictPointer(PyObject *); extern void _PyObject_FreeInstanceAttributes(PyObject *obj); extern int _PyObject_IsInstanceDictEmpty(PyObject *); diff --git a/Include/object.h b/Include/object.h index 568d315d760..05187fe5dc4 100644 --- a/Include/object.h +++ b/Include/object.h @@ -212,8 +212,9 @@ struct _object { struct _PyMutex { uint8_t v; }; struct _object { - // ob_tid stores the thread id (or zero). It is also used by the GC to - // store linked lists and the computed "gc_refs" refcount. + // ob_tid stores the thread id (or zero). It is also used by the GC and the + // trashcan mechanism as a linked list pointer and by the GC to store the + // computed "gc_refs" refcount. uintptr_t ob_tid; uint16_t _padding; struct _PyMutex ob_mutex; // per-object lock diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 6c87dfabad9..71671a5a984 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -1392,6 +1392,7 @@ class SizeofTest(unittest.TestCase): self.longdigit = sys.int_info.sizeof_digit import _testinternalcapi self.gc_headsize = _testinternalcapi.SIZEOF_PYGC_HEAD + self.managed_pre_header_size = _testinternalcapi.SIZEOF_MANAGED_PRE_HEADER check_sizeof = test.support.check_sizeof @@ -1427,7 +1428,7 @@ class SizeofTest(unittest.TestCase): def __sizeof__(self): return int(self) self.assertEqual(sys.getsizeof(OverflowSizeof(sys.maxsize)), - sys.maxsize + self.gc_headsize*2) + sys.maxsize + self.gc_headsize + self.managed_pre_header_size) with self.assertRaises(OverflowError): sys.getsizeof(OverflowSizeof(sys.maxsize + 1)) with self.assertRaises(ValueError): @@ -1650,7 +1651,7 @@ class SizeofTest(unittest.TestCase): # type # static type: PyTypeObject fmt = 'P2nPI13Pl4Pn9Pn12PIPc' - s = vsize('2P' + fmt) + s = vsize(fmt) check(int, s) # class s = vsize(fmt + # PyTypeObject diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-01-25-18-50-49.gh-issue-112529.IbbApA.rst b/Misc/NEWS.d/next/Core and Builtins/2024-01-25-18-50-49.gh-issue-112529.IbbApA.rst new file mode 100644 index 00000000000..2a6d74fb222 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-01-25-18-50-49.gh-issue-112529.IbbApA.rst @@ -0,0 +1,4 @@ +The free-threaded build no longer allocates space for the ``PyGC_Head`` +structure in objects that support cyclic garbage collection. A number of +other fields and data structures are used as replacements, including +``ob_gc_bits``, ``ob_tid``, and mimalloc internal data structures. diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index c4a648a1816..0bb739b5398 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -1752,8 +1752,18 @@ module_exec(PyObject *module) return 1; } + Py_ssize_t sizeof_gc_head = 0; +#ifndef Py_GIL_DISABLED + sizeof_gc_head = sizeof(PyGC_Head); +#endif + if (PyModule_Add(module, "SIZEOF_PYGC_HEAD", - PyLong_FromSsize_t(sizeof(PyGC_Head))) < 0) { + PyLong_FromSsize_t(sizeof_gc_head)) < 0) { + return 1; + } + + if (PyModule_Add(module, "SIZEOF_MANAGED_PRE_HEADER", + PyLong_FromSsize_t(2 * sizeof(PyObject*))) < 0) { return 1; } diff --git a/Objects/object.c b/Objects/object.c index 587c5528c01..bbf7f98ae3d 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -2671,7 +2671,12 @@ _PyTrash_thread_deposit_object(struct _py_trashcan *trash, PyObject *op) _PyObject_ASSERT(op, _PyObject_IS_GC(op)); _PyObject_ASSERT(op, !_PyObject_GC_IS_TRACKED(op)); _PyObject_ASSERT(op, Py_REFCNT(op) == 0); +#ifdef Py_GIL_DISABLED + _PyObject_ASSERT(op, op->ob_tid == 0); + op->ob_tid = (uintptr_t)trash->delete_later; +#else _PyGCHead_SET_PREV(_Py_AS_GC(op), (PyGC_Head*)trash->delete_later); +#endif trash->delete_later = op; } @@ -2697,8 +2702,12 @@ _PyTrash_thread_destroy_chain(struct _py_trashcan *trash) PyObject *op = trash->delete_later; destructor dealloc = Py_TYPE(op)->tp_dealloc; - trash->delete_later = - (PyObject*) _PyGCHead_PREV(_Py_AS_GC(op)); +#ifdef Py_GIL_DISABLED + trash->delete_later = (PyObject*) op->ob_tid; + op->ob_tid = 0; +#else + trash->delete_later = (PyObject*) _PyGCHead_PREV(_Py_AS_GC(op)); +#endif /* Call the deallocator directly. This used to try to * fool Py_DECREF into calling it indirectly, but diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index a6513a2c4ab..53f927bfa65 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -25,7 +25,10 @@ typedef struct _gc_runtime_state GCState; // Automatically choose the generation that needs collecting. #define GENERATION_AUTO (-1) -// A linked-list of objects using the `ob_tid` field as the next pointer. +// A linked list of objects using the `ob_tid` field as the next pointer. +// The linked list pointers are distinct from any real thread ids, because the +// thread ids returned by _Py_ThreadId() are also pointers to distinct objects. +// No thread will confuse its own id with a linked list pointer. struct worklist { uintptr_t head; }; @@ -221,7 +224,7 @@ gc_visit_heaps_lock_held(PyInterpreterState *interp, mi_block_visit_fun *visitor struct visitor_args *arg) { // Offset of PyObject header from start of memory block. - Py_ssize_t offset_base = sizeof(PyGC_Head); + Py_ssize_t offset_base = 0; if (_PyMem_DebugEnabled()) { // The debug allocator adds two words at the beginning of each block. offset_base += 2 * sizeof(size_t); @@ -331,8 +334,14 @@ update_refs(const mi_heap_t *heap, const mi_heap_area_t *area, Py_ssize_t refcount = Py_REFCNT(op); _PyObject_ASSERT(op, refcount >= 0); - // Add the actual refcount to ob_tid. + // We repurpose ob_tid to compute "gc_refs", the number of external + // references to the object (i.e., from outside the GC heaps). This means + // that ob_tid is no longer a valid thread id until it is restored by + // scan_heap_visitor(). Until then, we cannot use the standard reference + // counting functions or allow other threads to run Python code. gc_maybe_init_refs(op); + + // Add the actual refcount to ob_tid. gc_add_refs(op, refcount); // Subtract internal references from ob_tid. Objects with ob_tid > 0 @@ -1508,8 +1517,10 @@ gc_alloc(PyTypeObject *tp, size_t basicsize, size_t presize) if (mem == NULL) { return _PyErr_NoMemory(tstate); } - ((PyObject **)mem)[0] = NULL; - ((PyObject **)mem)[1] = NULL; + if (presize) { + ((PyObject **)mem)[0] = NULL; + ((PyObject **)mem)[1] = NULL; + } PyObject *op = (PyObject *)(mem + presize); _PyObject_GC_Link(op); return op; diff --git a/Python/sysmodule.c b/Python/sysmodule.c index f558a00a691..437d7f8dfc4 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -1878,7 +1878,15 @@ _PySys_GetSizeOf(PyObject *o) return (size_t)-1; } - return (size_t)size + _PyType_PreHeaderSize(Py_TYPE(o)); + size_t presize = 0; + if (!Py_IS_TYPE(o, &PyType_Type) || + PyType_HasFeature((PyTypeObject *)o, Py_TPFLAGS_HEAPTYPE)) + { + /* Add the size of the pre-header if "o" is not a static type */ + presize = _PyType_PreHeaderSize(Py_TYPE(o)); + } + + return (size_t)size + presize; } static PyObject * diff --git a/Tools/gdb/libpython.py b/Tools/gdb/libpython.py index 5ef55524c11..483f28b46df 100755 --- a/Tools/gdb/libpython.py +++ b/Tools/gdb/libpython.py @@ -70,6 +70,14 @@ def _type_unsigned_int_ptr(): def _sizeof_void_p(): return gdb.lookup_type('void').pointer().sizeof +def _managed_dict_offset(): + # See pycore_object.h + pyobj = gdb.lookup_type("PyObject") + if any(field.name == "ob_ref_local" for field in pyobj.fields()): + return -1 * _sizeof_void_p() + else: + return -3 * _sizeof_void_p() + Py_TPFLAGS_MANAGED_DICT = (1 << 4) Py_TPFLAGS_HEAPTYPE = (1 << 9) @@ -457,7 +465,7 @@ class HeapTypeObjectPtr(PyObjectPtr): if dictoffset < 0: if int_from_int(typeobj.field('tp_flags')) & Py_TPFLAGS_MANAGED_DICT: assert dictoffset == -1 - dictoffset = -3 * _sizeof_void_p() + dictoffset = _managed_dict_offset() else: type_PyVarObject_ptr = gdb.lookup_type('PyVarObject').pointer() tsize = int_from_int(self._gdbval.cast(type_PyVarObject_ptr)['ob_size']) @@ -485,9 +493,8 @@ class HeapTypeObjectPtr(PyObjectPtr): has_values = int_from_int(typeobj.field('tp_flags')) & Py_TPFLAGS_MANAGED_DICT if not has_values: return None - charptrptr_t = _type_char_ptr().pointer() - ptr = self._gdbval.cast(charptrptr_t) - 3 - char_ptr = ptr.dereference() + ptr = self._gdbval.cast(_type_char_ptr()) + _managed_dict_offset() + char_ptr = ptr.cast(_type_char_ptr().pointer()).dereference() if (int(char_ptr) & 1) == 0: return None char_ptr += 1