From 6bcbee09df9f6fa6496fb7f68b6d655f190903a7 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Thu, 2 May 2024 19:30:00 +0200 Subject: [PATCH] gh-93502: Add new C-API functions to trace object creation and destruction (#115945) --- Doc/c-api/init.rst | 52 ++++++++++++ Doc/whatsnew/3.13.rst | 5 ++ Include/cpython/object.h | 10 +++ Include/internal/pycore_object.h | 2 +- Include/internal/pycore_runtime.h | 7 ++ Include/internal/pycore_runtime_init.h | 4 + ...4-02-26-13-14-52.gh-issue-93502.JMWRvA.rst | 4 + Modules/_testcapimodule.c | 84 +++++++++++++++++++ Objects/object.c | 37 ++++++-- Python/tracemalloc.c | 10 ++- 10 files changed, 207 insertions(+), 8 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-02-26-13-14-52.gh-issue-93502.JMWRvA.rst diff --git a/Doc/c-api/init.rst b/Doc/c-api/init.rst index 8725ce08514..9e118d4f361 100644 --- a/Doc/c-api/init.rst +++ b/Doc/c-api/init.rst @@ -1904,6 +1904,58 @@ Python-level trace functions in previous versions. .. versionadded:: 3.12 +Reference tracing +================= + +.. versionadded:: 3.13 + +.. c:type:: int (*PyRefTracer)(PyObject *, int event, void* data) + + The type of the trace function registered using :c:func:`PyRefTracer_SetTracer`. + The first parameter is a Python object that has been just created (when **event** + is set to :c:data:`PyRefTracer_CREATE`) or about to be destroyed (when **event** + is set to :c:data:`PyRefTracer_DESTROY`). The **data** argument is the opaque pointer + that was provided when :c:func:`PyRefTracer_SetTracer` was called. + +.. versionadded:: 3.13 + +.. c:var:: int PyRefTracer_CREATE + + The value for the *event* parameter to :c:type:`PyRefTracer` functions when a Python + object has been created. + +.. c:var:: int PyRefTracer_DESTROY + + The value for the *event* parameter to :c:type:`PyRefTracer` functions when a Python + object has been destroyed. + +.. c:function:: int PyRefTracer_SetTracer(PyRefTracer tracer, void *data) + + Register a reference tracer function. The function will be called when a new + Python has been created or when an object is going to be destroyed. If + **data** is provided it must be an opaque pointer that will be provided when + the tracer function is called. Return ``0`` on success. Set an exception and + return ``-1`` on error. + + Not that tracer functions **must not** create Python objects inside or + otherwise the call will be re-entrant. The tracer also **must not** clear + any existing exception or set an exception. The GIL will be held every time + the tracer function is called. + + The GIL must be held when calling this function. + +.. versionadded:: 3.13 + +.. c:function:: PyRefTracer PyRefTracer_GetTracer(void** data) + + Get the registered reference tracer function and the value of the opaque data + pointer that was registered when :c:func:`PyRefTracer_SetTracer` was called. + If no tracer was registered this function will return NULL and will set the + **data** pointer to NULL. + + The GIL must be held when calling this function. + +.. versionadded:: 3.13 .. _advanced-debugging: diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 3ccf17be979..fbf2f4c4474 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -1961,6 +1961,11 @@ New Features * Add :c:func:`PyType_GetModuleByDef` to the limited C API (Contributed by Victor Stinner in :gh:`116936`.) +* Add two new functions to the C-API, :c:func:`PyRefTracer_SetTracer` and + :c:func:`PyRefTracer_GetTracer`, that allows to track object creation and + destruction the same way the :mod:`tracemalloc` module does. (Contributed + by Pablo Galindo in :gh:`93502`.) + Porting to Python 3.13 ---------------------- diff --git a/Include/cpython/object.h b/Include/cpython/object.h index a6b93b93ab0..c2830b75e66 100644 --- a/Include/cpython/object.h +++ b/Include/cpython/object.h @@ -510,3 +510,13 @@ PyAPI_FUNC(int) PyType_Unwatch(int watcher_id, PyObject *type); * assigned, or 0 if a new tag could not be assigned. */ PyAPI_FUNC(int) PyUnstable_Type_AssignVersionTag(PyTypeObject *type); + + +typedef enum { + PyRefTracer_CREATE = 0, + PyRefTracer_DESTROY = 1, +} PyRefTracerEvent; + +typedef int (*PyRefTracer)(PyObject *, PyRefTracerEvent event, void *); +PyAPI_FUNC(int) PyRefTracer_SetTracer(PyRefTracer tracer, void *data); +PyAPI_FUNC(PyRefTracer) PyRefTracer_GetTracer(void**); diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index 7df8003196d..3b0222b05cb 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -257,7 +257,7 @@ extern int _PyDict_CheckConsistency(PyObject *mp, int check_content); when a memory block is reused from a free list. Internal function called by _Py_NewReference(). */ -extern int _PyTraceMalloc_NewReference(PyObject *op); +extern int _PyTraceMalloc_TraceRef(PyObject *op, PyRefTracerEvent event, void*); // Fast inlined version of PyType_HasFeature() static inline int diff --git a/Include/internal/pycore_runtime.h b/Include/internal/pycore_runtime.h index dc6f6f100f7..f58eccf729c 100644 --- a/Include/internal/pycore_runtime.h +++ b/Include/internal/pycore_runtime.h @@ -132,6 +132,12 @@ typedef struct _Py_DebugOffsets { } unicode_object; } _Py_DebugOffsets; +/* Reference tracer state */ +struct _reftracer_runtime_state { + PyRefTracer tracer_func; + void* tracer_data; +}; + /* Full Python runtime state */ /* _PyRuntimeState holds the global state for the CPython runtime. @@ -236,6 +242,7 @@ typedef struct pyruntimestate { struct _fileutils_state fileutils; struct _faulthandler_runtime_state faulthandler; struct _tracemalloc_runtime_state tracemalloc; + struct _reftracer_runtime_state ref_tracer; // The rwmutex is used to prevent overlapping global and per-interpreter // stop-the-world events. Global stop-the-world events lock the mutex diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h index 41331df8320..98920dbb7c7 100644 --- a/Include/internal/pycore_runtime_init.h +++ b/Include/internal/pycore_runtime_init.h @@ -128,6 +128,10 @@ extern PyTypeObject _PyExc_MemoryError; }, \ .faulthandler = _faulthandler_runtime_state_INIT, \ .tracemalloc = _tracemalloc_runtime_state_INIT, \ + .ref_tracer = { \ + .tracer_func = NULL, \ + .tracer_data = NULL, \ + }, \ .stoptheworld = { \ .is_global = 1, \ }, \ diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-02-26-13-14-52.gh-issue-93502.JMWRvA.rst b/Misc/NEWS.d/next/Core and Builtins/2024-02-26-13-14-52.gh-issue-93502.JMWRvA.rst new file mode 100644 index 00000000000..524626950c0 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-02-26-13-14-52.gh-issue-93502.JMWRvA.rst @@ -0,0 +1,4 @@ +Add two new functions to the C-API, :c:func:`PyRefTracer_SetTracer` and +:c:func:`PyRefTracer_GetTracer`, that allows to track object creation and +destruction the same way the :mod:`tracemalloc` module does. Patch by Pablo +Galindo diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index 3448291e401..f5892fc5ed2 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -3219,6 +3219,89 @@ test_weakref_capi(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args)) _Py_COMP_DIAG_POP } +struct simpletracer_data { + int create_count; + int destroy_count; + void* addresses[10]; +}; + +static int _simpletracer(PyObject *obj, PyRefTracerEvent event, void* data) { + struct simpletracer_data* the_data = (struct simpletracer_data*)data; + assert(the_data->create_count + the_data->destroy_count < (int)Py_ARRAY_LENGTH(the_data->addresses)); + the_data->addresses[the_data->create_count + the_data->destroy_count] = obj; + if (event == PyRefTracer_CREATE) { + the_data->create_count++; + } else { + the_data->destroy_count++; + } + return 0; +} + +static PyObject * +test_reftracer(PyObject *ob, PyObject *Py_UNUSED(ignored)) +{ + // Save the current tracer and data to restore it later + void* current_data; + PyRefTracer current_tracer = PyRefTracer_GetTracer(¤t_data); + + struct simpletracer_data tracer_data = {0}; + void* the_data = &tracer_data; + // Install a simple tracer function + if (PyRefTracer_SetTracer(_simpletracer, the_data) != 0) { + goto failed; + } + + // Check that the tracer was correctly installed + void* data; + if (PyRefTracer_GetTracer(&data) != _simpletracer || data != the_data) { + PyErr_SetString(PyExc_AssertionError, "The reftracer not correctly installed"); + (void)PyRefTracer_SetTracer(NULL, NULL); + goto failed; + } + + // Create a bunch of objects + PyObject* obj = PyList_New(0); + if (obj == NULL) { + goto failed; + } + PyObject* obj2 = PyDict_New(); + if (obj2 == NULL) { + Py_DECREF(obj); + goto failed; + } + + // Kill all objects + Py_DECREF(obj); + Py_DECREF(obj2); + + // Remove the tracer + (void)PyRefTracer_SetTracer(NULL, NULL); + + // Check that the tracer was removed + if (PyRefTracer_GetTracer(&data) != NULL || data != NULL) { + PyErr_SetString(PyExc_ValueError, "The reftracer was not correctly removed"); + goto failed; + } + + if (tracer_data.create_count != 2 || + tracer_data.addresses[0] != obj || + tracer_data.addresses[1] != obj2) { + PyErr_SetString(PyExc_ValueError, "The object creation was not correctly traced"); + goto failed; + } + + if (tracer_data.destroy_count != 2 || + tracer_data.addresses[2] != obj || + tracer_data.addresses[3] != obj2) { + PyErr_SetString(PyExc_ValueError, "The object destruction was not correctly traced"); + goto failed; + } + PyRefTracer_SetTracer(current_tracer, current_data); + Py_RETURN_NONE; +failed: + PyRefTracer_SetTracer(current_tracer, current_data); + return NULL; +} static PyMethodDef TestMethods[] = { {"set_errno", set_errno, METH_VARARGS}, @@ -3257,6 +3340,7 @@ static PyMethodDef TestMethods[] = { {"get_type_fullyqualname", get_type_fullyqualname, METH_O}, {"get_type_module_name", get_type_module_name, METH_O}, {"test_get_type_dict", test_get_type_dict, METH_NOARGS}, + {"test_reftracer", test_reftracer, METH_NOARGS}, {"_test_thread_state", test_thread_state, METH_VARARGS}, #ifndef MS_WINDOWS {"_spawn_pthread_waiter", spawn_pthread_waiter, METH_NOARGS}, diff --git a/Objects/object.c b/Objects/object.c index 45310a6c22d..79e4fb4dbbf 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -2372,9 +2372,6 @@ _PyTypes_FiniTypes(PyInterpreterState *interp) static inline void new_reference(PyObject *op) { - if (_PyRuntime.tracemalloc.config.tracing) { - _PyTraceMalloc_NewReference(op); - } // Skip the immortal object check in Py_SET_REFCNT; always set refcnt to 1 #if !defined(Py_GIL_DISABLED) op->ob_refcnt = 1; @@ -2389,6 +2386,11 @@ new_reference(PyObject *op) #ifdef Py_TRACE_REFS _Py_AddToAllObjects(op); #endif + struct _reftracer_runtime_state *tracer = &_PyRuntime.ref_tracer; + if (tracer->tracer_func != NULL) { + void* data = tracer->tracer_data; + tracer->tracer_func(op, PyRefTracer_CREATE, data); + } } void @@ -2450,12 +2452,13 @@ _PyObject_SetDeferredRefcount(PyObject *op) void _Py_ResurrectReference(PyObject *op) { - if (_PyRuntime.tracemalloc.config.tracing) { - _PyTraceMalloc_NewReference(op); - } #ifdef Py_TRACE_REFS _Py_AddToAllObjects(op); #endif + if (_PyRuntime.ref_tracer.tracer_func != NULL) { + void* data = _PyRuntime.ref_tracer.tracer_data; + _PyRuntime.ref_tracer.tracer_func(op, PyRefTracer_CREATE, data); + } } @@ -2845,6 +2848,12 @@ _Py_Dealloc(PyObject *op) Py_INCREF(type); #endif + struct _reftracer_runtime_state *tracer = &_PyRuntime.ref_tracer; + if (tracer->tracer_func != NULL) { + void* data = tracer->tracer_data; + tracer->tracer_func(op, PyRefTracer_DESTROY, data); + } + #ifdef Py_TRACE_REFS _Py_ForgetReference(op); #endif @@ -2933,6 +2942,22 @@ _Py_SetRefcnt(PyObject *ob, Py_ssize_t refcnt) Py_SET_REFCNT(ob, refcnt); } +int PyRefTracer_SetTracer(PyRefTracer tracer, void *data) { + assert(PyGILState_Check()); + _PyRuntime.ref_tracer.tracer_func = tracer; + _PyRuntime.ref_tracer.tracer_data = data; + return 0; +} + +PyRefTracer PyRefTracer_GetTracer(void** data) { + assert(PyGILState_Check()); + if (data != NULL) { + *data = _PyRuntime.ref_tracer.tracer_data; + } + return _PyRuntime.ref_tracer.tracer_func; +} + + static PyObject* constants[] = { &_Py_NoneStruct, // Py_CONSTANT_NONE diff --git a/Python/tracemalloc.c b/Python/tracemalloc.c index 19b64c619fe..e3ec72062f6 100644 --- a/Python/tracemalloc.c +++ b/Python/tracemalloc.c @@ -906,6 +906,10 @@ _PyTraceMalloc_Start(int max_nframe) return -1; } + if (PyRefTracer_SetTracer(_PyTraceMalloc_TraceRef, NULL) < 0) { + return -1; + } + if (tracemalloc_config.tracing) { /* hook already installed: do nothing */ return 0; @@ -1352,8 +1356,12 @@ _PyTraceMalloc_Fini(void) Do nothing if tracemalloc is not tracing memory allocations or if the object memory block is not already traced. */ int -_PyTraceMalloc_NewReference(PyObject *op) +_PyTraceMalloc_TraceRef(PyObject *op, PyRefTracerEvent event, void* Py_UNUSED(ignore)) { + if (event != PyRefTracer_CREATE) { + return 0; + } + assert(PyGILState_Check()); if (!tracemalloc_config.tracing) {