gh-112529: Remove PyGC_Head from object pre-header in free-threaded build (#114564)

* gh-112529: Remove PyGC_Head from object pre-header in free-threaded build

This avoids allocating space for PyGC_Head in the free-threaded build.
The GC implementation for free-threaded CPython does not use the
PyGC_Head structure.

 * The trashcan mechanism uses the `ob_tid` field instead of `_gc_prev`
   in the free-threaded build.
 * The GDB libpython.py file now determines the offset of the managed
   dict field based on whether the running process is a free-threaded
   build. Those are identified by the `ob_ref_local` field in PyObject.
 * Fixes `_PySys_GetSizeOf()` which incorrectly incorrectly included the
   size of `PyGC_Head` in the size of static `PyTypeObject`.
This commit is contained in:
Sam Gross 2024-02-01 15:29:19 -05:00 committed by GitHub
parent 500ede0117
commit 587d480203
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 86 additions and 26 deletions

View File

@ -315,16 +315,15 @@ static inline void _PyObject_GC_TRACK(
_PyObject_ASSERT_FROM(op, !_PyObject_GC_IS_TRACKED(op), _PyObject_ASSERT_FROM(op, !_PyObject_GC_IS_TRACKED(op),
"object already tracked by the garbage collector", "object already tracked by the garbage collector",
filename, lineno, __func__); filename, lineno, __func__);
#ifdef Py_GIL_DISABLED
op->ob_gc_bits |= _PyGC_BITS_TRACKED;
#else
PyGC_Head *gc = _Py_AS_GC(op); PyGC_Head *gc = _Py_AS_GC(op);
_PyObject_ASSERT_FROM(op, _PyObject_ASSERT_FROM(op,
(gc->_gc_prev & _PyGC_PREV_MASK_COLLECTING) == 0, (gc->_gc_prev & _PyGC_PREV_MASK_COLLECTING) == 0,
"object is in generation which is garbage collected", "object is in generation which is garbage collected",
filename, lineno, __func__); filename, lineno, __func__);
#ifdef Py_GIL_DISABLED
op->ob_gc_bits |= _PyGC_BITS_TRACKED;
#else
PyInterpreterState *interp = _PyInterpreterState_GET(); PyInterpreterState *interp = _PyInterpreterState_GET();
PyGC_Head *generation0 = interp->gc.generation0; PyGC_Head *generation0 = interp->gc.generation0;
PyGC_Head *last = (PyGC_Head*)(generation0->_gc_prev); PyGC_Head *last = (PyGC_Head*)(generation0->_gc_prev);
@ -594,8 +593,12 @@ _PyObject_IS_GC(PyObject *obj)
static inline size_t static inline size_t
_PyType_PreHeaderSize(PyTypeObject *tp) _PyType_PreHeaderSize(PyTypeObject *tp)
{ {
return _PyType_IS_GC(tp) * sizeof(PyGC_Head) + return (
_PyType_HasFeature(tp, Py_TPFLAGS_PREHEADER) * 2 * sizeof(PyObject *); #ifndef Py_GIL_DISABLED
_PyType_IS_GC(tp) * sizeof(PyGC_Head) +
#endif
_PyType_HasFeature(tp, Py_TPFLAGS_PREHEADER) * 2 * sizeof(PyObject *)
);
} }
void _PyObject_GC_Link(PyObject *op); void _PyObject_GC_Link(PyObject *op);
@ -625,6 +628,14 @@ extern int _PyObject_StoreInstanceAttribute(PyObject *obj, PyDictValues *values,
PyObject * _PyObject_GetInstanceAttribute(PyObject *obj, PyDictValues *values, PyObject * _PyObject_GetInstanceAttribute(PyObject *obj, PyDictValues *values,
PyObject *name); PyObject *name);
#ifdef Py_GIL_DISABLED
# define MANAGED_DICT_OFFSET (((Py_ssize_t)sizeof(PyObject *))*-1)
# define MANAGED_WEAKREF_OFFSET (((Py_ssize_t)sizeof(PyObject *))*-2)
#else
# define MANAGED_DICT_OFFSET (((Py_ssize_t)sizeof(PyObject *))*-3)
# define MANAGED_WEAKREF_OFFSET (((Py_ssize_t)sizeof(PyObject *))*-4)
#endif
typedef union { typedef union {
PyObject *dict; PyObject *dict;
/* Use a char* to generate a warning if directly assigning a PyDictValues */ /* Use a char* to generate a warning if directly assigning a PyDictValues */
@ -635,7 +646,7 @@ static inline PyDictOrValues *
_PyObject_DictOrValuesPointer(PyObject *obj) _PyObject_DictOrValuesPointer(PyObject *obj)
{ {
assert(Py_TYPE(obj)->tp_flags & Py_TPFLAGS_MANAGED_DICT); assert(Py_TYPE(obj)->tp_flags & Py_TPFLAGS_MANAGED_DICT);
return ((PyDictOrValues *)obj)-3; return (PyDictOrValues *)((char *)obj + MANAGED_DICT_OFFSET);
} }
static inline int static inline int
@ -664,8 +675,6 @@ _PyDictOrValues_SetValues(PyDictOrValues *ptr, PyDictValues *values)
ptr->values = ((char *)values) - 1; ptr->values = ((char *)values) - 1;
} }
#define MANAGED_WEAKREF_OFFSET (((Py_ssize_t)sizeof(PyObject *))*-4)
extern PyObject ** _PyObject_ComputedDictPointer(PyObject *); extern PyObject ** _PyObject_ComputedDictPointer(PyObject *);
extern void _PyObject_FreeInstanceAttributes(PyObject *obj); extern void _PyObject_FreeInstanceAttributes(PyObject *obj);
extern int _PyObject_IsInstanceDictEmpty(PyObject *); extern int _PyObject_IsInstanceDictEmpty(PyObject *);

View File

@ -212,8 +212,9 @@ struct _object {
struct _PyMutex { uint8_t v; }; struct _PyMutex { uint8_t v; };
struct _object { struct _object {
// ob_tid stores the thread id (or zero). It is also used by the GC to // ob_tid stores the thread id (or zero). It is also used by the GC and the
// store linked lists and the computed "gc_refs" refcount. // trashcan mechanism as a linked list pointer and by the GC to store the
// computed "gc_refs" refcount.
uintptr_t ob_tid; uintptr_t ob_tid;
uint16_t _padding; uint16_t _padding;
struct _PyMutex ob_mutex; // per-object lock struct _PyMutex ob_mutex; // per-object lock

View File

@ -1392,6 +1392,7 @@ class SizeofTest(unittest.TestCase):
self.longdigit = sys.int_info.sizeof_digit self.longdigit = sys.int_info.sizeof_digit
import _testinternalcapi import _testinternalcapi
self.gc_headsize = _testinternalcapi.SIZEOF_PYGC_HEAD self.gc_headsize = _testinternalcapi.SIZEOF_PYGC_HEAD
self.managed_pre_header_size = _testinternalcapi.SIZEOF_MANAGED_PRE_HEADER
check_sizeof = test.support.check_sizeof check_sizeof = test.support.check_sizeof
@ -1427,7 +1428,7 @@ class SizeofTest(unittest.TestCase):
def __sizeof__(self): def __sizeof__(self):
return int(self) return int(self)
self.assertEqual(sys.getsizeof(OverflowSizeof(sys.maxsize)), self.assertEqual(sys.getsizeof(OverflowSizeof(sys.maxsize)),
sys.maxsize + self.gc_headsize*2) sys.maxsize + self.gc_headsize + self.managed_pre_header_size)
with self.assertRaises(OverflowError): with self.assertRaises(OverflowError):
sys.getsizeof(OverflowSizeof(sys.maxsize + 1)) sys.getsizeof(OverflowSizeof(sys.maxsize + 1))
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
@ -1650,7 +1651,7 @@ class SizeofTest(unittest.TestCase):
# type # type
# static type: PyTypeObject # static type: PyTypeObject
fmt = 'P2nPI13Pl4Pn9Pn12PIPc' fmt = 'P2nPI13Pl4Pn9Pn12PIPc'
s = vsize('2P' + fmt) s = vsize(fmt)
check(int, s) check(int, s)
# class # class
s = vsize(fmt + # PyTypeObject s = vsize(fmt + # PyTypeObject

View File

@ -0,0 +1,4 @@
The free-threaded build no longer allocates space for the ``PyGC_Head``
structure in objects that support cyclic garbage collection. A number of
other fields and data structures are used as replacements, including
``ob_gc_bits``, ``ob_tid``, and mimalloc internal data structures.

View File

@ -1752,8 +1752,18 @@ module_exec(PyObject *module)
return 1; return 1;
} }
Py_ssize_t sizeof_gc_head = 0;
#ifndef Py_GIL_DISABLED
sizeof_gc_head = sizeof(PyGC_Head);
#endif
if (PyModule_Add(module, "SIZEOF_PYGC_HEAD", if (PyModule_Add(module, "SIZEOF_PYGC_HEAD",
PyLong_FromSsize_t(sizeof(PyGC_Head))) < 0) { PyLong_FromSsize_t(sizeof_gc_head)) < 0) {
return 1;
}
if (PyModule_Add(module, "SIZEOF_MANAGED_PRE_HEADER",
PyLong_FromSsize_t(2 * sizeof(PyObject*))) < 0) {
return 1; return 1;
} }

View File

@ -2671,7 +2671,12 @@ _PyTrash_thread_deposit_object(struct _py_trashcan *trash, PyObject *op)
_PyObject_ASSERT(op, _PyObject_IS_GC(op)); _PyObject_ASSERT(op, _PyObject_IS_GC(op));
_PyObject_ASSERT(op, !_PyObject_GC_IS_TRACKED(op)); _PyObject_ASSERT(op, !_PyObject_GC_IS_TRACKED(op));
_PyObject_ASSERT(op, Py_REFCNT(op) == 0); _PyObject_ASSERT(op, Py_REFCNT(op) == 0);
#ifdef Py_GIL_DISABLED
_PyObject_ASSERT(op, op->ob_tid == 0);
op->ob_tid = (uintptr_t)trash->delete_later;
#else
_PyGCHead_SET_PREV(_Py_AS_GC(op), (PyGC_Head*)trash->delete_later); _PyGCHead_SET_PREV(_Py_AS_GC(op), (PyGC_Head*)trash->delete_later);
#endif
trash->delete_later = op; trash->delete_later = op;
} }
@ -2697,8 +2702,12 @@ _PyTrash_thread_destroy_chain(struct _py_trashcan *trash)
PyObject *op = trash->delete_later; PyObject *op = trash->delete_later;
destructor dealloc = Py_TYPE(op)->tp_dealloc; destructor dealloc = Py_TYPE(op)->tp_dealloc;
trash->delete_later = #ifdef Py_GIL_DISABLED
(PyObject*) _PyGCHead_PREV(_Py_AS_GC(op)); trash->delete_later = (PyObject*) op->ob_tid;
op->ob_tid = 0;
#else
trash->delete_later = (PyObject*) _PyGCHead_PREV(_Py_AS_GC(op));
#endif
/* Call the deallocator directly. This used to try to /* Call the deallocator directly. This used to try to
* fool Py_DECREF into calling it indirectly, but * fool Py_DECREF into calling it indirectly, but

View File

@ -25,7 +25,10 @@ typedef struct _gc_runtime_state GCState;
// Automatically choose the generation that needs collecting. // Automatically choose the generation that needs collecting.
#define GENERATION_AUTO (-1) #define GENERATION_AUTO (-1)
// A linked-list of objects using the `ob_tid` field as the next pointer. // A linked list of objects using the `ob_tid` field as the next pointer.
// The linked list pointers are distinct from any real thread ids, because the
// thread ids returned by _Py_ThreadId() are also pointers to distinct objects.
// No thread will confuse its own id with a linked list pointer.
struct worklist { struct worklist {
uintptr_t head; uintptr_t head;
}; };
@ -221,7 +224,7 @@ gc_visit_heaps_lock_held(PyInterpreterState *interp, mi_block_visit_fun *visitor
struct visitor_args *arg) struct visitor_args *arg)
{ {
// Offset of PyObject header from start of memory block. // Offset of PyObject header from start of memory block.
Py_ssize_t offset_base = sizeof(PyGC_Head); Py_ssize_t offset_base = 0;
if (_PyMem_DebugEnabled()) { if (_PyMem_DebugEnabled()) {
// The debug allocator adds two words at the beginning of each block. // The debug allocator adds two words at the beginning of each block.
offset_base += 2 * sizeof(size_t); offset_base += 2 * sizeof(size_t);
@ -331,8 +334,14 @@ update_refs(const mi_heap_t *heap, const mi_heap_area_t *area,
Py_ssize_t refcount = Py_REFCNT(op); Py_ssize_t refcount = Py_REFCNT(op);
_PyObject_ASSERT(op, refcount >= 0); _PyObject_ASSERT(op, refcount >= 0);
// Add the actual refcount to ob_tid. // We repurpose ob_tid to compute "gc_refs", the number of external
// references to the object (i.e., from outside the GC heaps). This means
// that ob_tid is no longer a valid thread id until it is restored by
// scan_heap_visitor(). Until then, we cannot use the standard reference
// counting functions or allow other threads to run Python code.
gc_maybe_init_refs(op); gc_maybe_init_refs(op);
// Add the actual refcount to ob_tid.
gc_add_refs(op, refcount); gc_add_refs(op, refcount);
// Subtract internal references from ob_tid. Objects with ob_tid > 0 // Subtract internal references from ob_tid. Objects with ob_tid > 0
@ -1508,8 +1517,10 @@ gc_alloc(PyTypeObject *tp, size_t basicsize, size_t presize)
if (mem == NULL) { if (mem == NULL) {
return _PyErr_NoMemory(tstate); return _PyErr_NoMemory(tstate);
} }
((PyObject **)mem)[0] = NULL; if (presize) {
((PyObject **)mem)[1] = NULL; ((PyObject **)mem)[0] = NULL;
((PyObject **)mem)[1] = NULL;
}
PyObject *op = (PyObject *)(mem + presize); PyObject *op = (PyObject *)(mem + presize);
_PyObject_GC_Link(op); _PyObject_GC_Link(op);
return op; return op;

View File

@ -1878,7 +1878,15 @@ _PySys_GetSizeOf(PyObject *o)
return (size_t)-1; return (size_t)-1;
} }
return (size_t)size + _PyType_PreHeaderSize(Py_TYPE(o)); size_t presize = 0;
if (!Py_IS_TYPE(o, &PyType_Type) ||
PyType_HasFeature((PyTypeObject *)o, Py_TPFLAGS_HEAPTYPE))
{
/* Add the size of the pre-header if "o" is not a static type */
presize = _PyType_PreHeaderSize(Py_TYPE(o));
}
return (size_t)size + presize;
} }
static PyObject * static PyObject *

View File

@ -70,6 +70,14 @@ def _type_unsigned_int_ptr():
def _sizeof_void_p(): def _sizeof_void_p():
return gdb.lookup_type('void').pointer().sizeof return gdb.lookup_type('void').pointer().sizeof
def _managed_dict_offset():
# See pycore_object.h
pyobj = gdb.lookup_type("PyObject")
if any(field.name == "ob_ref_local" for field in pyobj.fields()):
return -1 * _sizeof_void_p()
else:
return -3 * _sizeof_void_p()
Py_TPFLAGS_MANAGED_DICT = (1 << 4) Py_TPFLAGS_MANAGED_DICT = (1 << 4)
Py_TPFLAGS_HEAPTYPE = (1 << 9) Py_TPFLAGS_HEAPTYPE = (1 << 9)
@ -457,7 +465,7 @@ class HeapTypeObjectPtr(PyObjectPtr):
if dictoffset < 0: if dictoffset < 0:
if int_from_int(typeobj.field('tp_flags')) & Py_TPFLAGS_MANAGED_DICT: if int_from_int(typeobj.field('tp_flags')) & Py_TPFLAGS_MANAGED_DICT:
assert dictoffset == -1 assert dictoffset == -1
dictoffset = -3 * _sizeof_void_p() dictoffset = _managed_dict_offset()
else: else:
type_PyVarObject_ptr = gdb.lookup_type('PyVarObject').pointer() type_PyVarObject_ptr = gdb.lookup_type('PyVarObject').pointer()
tsize = int_from_int(self._gdbval.cast(type_PyVarObject_ptr)['ob_size']) tsize = int_from_int(self._gdbval.cast(type_PyVarObject_ptr)['ob_size'])
@ -485,9 +493,8 @@ class HeapTypeObjectPtr(PyObjectPtr):
has_values = int_from_int(typeobj.field('tp_flags')) & Py_TPFLAGS_MANAGED_DICT has_values = int_from_int(typeobj.field('tp_flags')) & Py_TPFLAGS_MANAGED_DICT
if not has_values: if not has_values:
return None return None
charptrptr_t = _type_char_ptr().pointer() ptr = self._gdbval.cast(_type_char_ptr()) + _managed_dict_offset()
ptr = self._gdbval.cast(charptrptr_t) - 3 char_ptr = ptr.cast(_type_char_ptr().pointer()).dereference()
char_ptr = ptr.dereference()
if (int(char_ptr) & 1) == 0: if (int(char_ptr) & 1) == 0:
return None return None
char_ptr += 1 char_ptr += 1