gh-91053: Add an optional callback that is invoked whenever a function is modified (#98175)

This commit is contained in:
mpage 2022-11-22 04:06:44 -08:00 committed by GitHub
parent 20d9749a0f
commit 3db0a21f73
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 524 additions and 0 deletions

View File

@ -118,3 +118,63 @@ There are a few functions specific to Python functions.
must be a dictionary or ``Py_None``.
Raises :exc:`SystemError` and returns ``-1`` on failure.
.. c:function:: int PyFunction_AddWatcher(PyFunction_WatchCallback callback)
Register *callback* as a function watcher for the current interpreter.
Return an ID which may be passed to :c:func:`PyFunction_ClearWatcher`.
In case of error (e.g. no more watcher IDs available),
return ``-1`` and set an exception.
.. versionadded:: 3.12
.. c:function:: int PyFunction_ClearWatcher(int watcher_id)
Clear watcher identified by *watcher_id* previously returned from
:c:func:`PyFunction_AddWatcher` for the current interpreter.
Return ``0`` on success, or ``-1`` and set an exception on error
(e.g. if the given *watcher_id* was never registered.)
.. versionadded:: 3.12
.. c:type:: PyFunction_WatchEvent
Enumeration of possible function watcher events:
- ``PyFunction_EVENT_CREATE``
- ``PyFunction_EVENT_DESTROY``
- ``PyFunction_EVENT_MODIFY_CODE``
- ``PyFunction_EVENT_MODIFY_DEFAULTS``
- ``PyFunction_EVENT_MODIFY_KWDEFAULTS``
.. versionadded:: 3.12
.. c:type:: int (*PyFunction_WatchCallback)(PyFunction_WatchEvent event, PyFunctionObject *func, PyObject *new_value)
Type of a function watcher callback function.
If *event* is ``PyFunction_EVENT_CREATE`` or ``PyFunction_EVENT_DESTROY``
then *new_value* will be ``NULL``. Otherwise, *new_value* will hold a
:term:`borrowed reference` to the new value that is about to be stored in
*func* for the attribute that is being modified.
The callback may inspect but must not modify *func*; doing so could have
unpredictable effects, including infinite recursion.
If *event* is ``PyFunction_EVENT_CREATE``, then the callback is invoked
after `func` has been fully initialized. Otherwise, the callback is invoked
before the modification to *func* takes place, so the prior state of *func*
can be inspected. The runtime is permitted to optimize away the creation of
function objects when possible. In such cases no event will be emitted.
Although this creates the possitibility of an observable difference of
runtime behavior depending on optimization decisions, it does not change
the semantics of the Python code being executed.
If the callback returns with an exception set, it must return ``-1``; this
exception will be printed as an unraisable exception using
:c:func:`PyErr_WriteUnraisable`. Otherwise it should return ``0``.
.. versionadded:: 3.12

View File

@ -131,6 +131,55 @@ PyAPI_DATA(PyTypeObject) PyStaticMethod_Type;
PyAPI_FUNC(PyObject *) PyClassMethod_New(PyObject *);
PyAPI_FUNC(PyObject *) PyStaticMethod_New(PyObject *);
#define FOREACH_FUNC_EVENT(V) \
V(CREATE) \
V(DESTROY) \
V(MODIFY_CODE) \
V(MODIFY_DEFAULTS) \
V(MODIFY_KWDEFAULTS)
typedef enum {
#define DEF_EVENT(EVENT) PyFunction_EVENT_##EVENT,
FOREACH_FUNC_EVENT(DEF_EVENT)
#undef DEF_EVENT
} PyFunction_WatchEvent;
/*
* A callback that is invoked for different events in a function's lifecycle.
*
* The callback is invoked with a borrowed reference to func, after it is
* created and before it is modified or destroyed. The callback should not
* modify func.
*
* When a function's code object, defaults, or kwdefaults are modified the
* callback will be invoked with the respective event and new_value will
* contain a borrowed reference to the new value that is about to be stored in
* the function. Otherwise the third argument is NULL.
*
* If the callback returns with an exception set, it must return -1. Otherwise
* it should return 0.
*/
typedef int (*PyFunction_WatchCallback)(
PyFunction_WatchEvent event,
PyFunctionObject *func,
PyObject *new_value);
/*
* Register a per-interpreter callback that will be invoked for function lifecycle
* events.
*
* Returns a handle that may be passed to PyFunction_ClearWatcher on success,
* or -1 and sets an error if no more handles are available.
*/
PyAPI_FUNC(int) PyFunction_AddWatcher(PyFunction_WatchCallback callback);
/*
* Clear the watcher associated with the watcher_id handle.
*
* Returns 0 on success or -1 if no watcher exists for the supplied id.
*/
PyAPI_FUNC(int) PyFunction_ClearWatcher(int watcher_id);
#ifdef __cplusplus
}
#endif

View File

@ -8,6 +8,8 @@ extern "C" {
# error "this header requires Py_BUILD_CORE define"
#endif
#define FUNC_MAX_WATCHERS 8
struct _py_func_runtime_state {
uint32_t next_version;
};

View File

@ -17,6 +17,7 @@ extern "C" {
#include "pycore_dict_state.h" // struct _Py_dict_state
#include "pycore_exceptions.h" // struct _Py_exc_state
#include "pycore_floatobject.h" // struct _Py_float_state
#include "pycore_function.h" // FUNC_MAX_WATCHERS
#include "pycore_genobject.h" // struct _Py_async_gen_state
#include "pycore_gc.h" // struct _gc_runtime_state
#include "pycore_list.h" // struct _Py_list_state
@ -171,6 +172,11 @@ struct _is {
// Initialized to _PyEval_EvalFrameDefault().
_PyFrameEvalFunction eval_frame;
PyDict_WatchCallback dict_watchers[DICT_MAX_WATCHERS];
PyFunction_WatchCallback func_watchers[FUNC_MAX_WATCHERS];
// One bit is set for each non-NULL entry in func_watchers
uint8_t active_func_watchers;
Py_ssize_t co_extra_user_count;
freefunc co_extra_freefuncs[MAX_CO_EXTRA_USERS];

View File

@ -336,5 +336,98 @@ class TestTypeWatchers(unittest.TestCase):
self.add_watcher()
class TestFuncWatchers(unittest.TestCase):
@contextmanager
def add_watcher(self, func):
wid = _testcapi.add_func_watcher(func)
try:
yield
finally:
_testcapi.clear_func_watcher(wid)
def test_func_events_dispatched(self):
events = []
def watcher(*args):
events.append(args)
with self.add_watcher(watcher):
def myfunc():
pass
self.assertIn((_testcapi.PYFUNC_EVENT_CREATE, myfunc, None), events)
myfunc_id = id(myfunc)
new_code = self.test_func_events_dispatched.__code__
myfunc.__code__ = new_code
self.assertIn((_testcapi.PYFUNC_EVENT_MODIFY_CODE, myfunc, new_code), events)
new_defaults = (123,)
myfunc.__defaults__ = new_defaults
self.assertIn((_testcapi.PYFUNC_EVENT_MODIFY_DEFAULTS, myfunc, new_defaults), events)
new_defaults = (456,)
_testcapi.set_func_defaults_via_capi(myfunc, new_defaults)
self.assertIn((_testcapi.PYFUNC_EVENT_MODIFY_DEFAULTS, myfunc, new_defaults), events)
new_kwdefaults = {"self": 123}
myfunc.__kwdefaults__ = new_kwdefaults
self.assertIn((_testcapi.PYFUNC_EVENT_MODIFY_KWDEFAULTS, myfunc, new_kwdefaults), events)
new_kwdefaults = {"self": 456}
_testcapi.set_func_kwdefaults_via_capi(myfunc, new_kwdefaults)
self.assertIn((_testcapi.PYFUNC_EVENT_MODIFY_KWDEFAULTS, myfunc, new_kwdefaults), events)
# Clear events reference to func
events = []
del myfunc
self.assertIn((_testcapi.PYFUNC_EVENT_DESTROY, myfunc_id, None), events)
def test_multiple_watchers(self):
events0 = []
def first_watcher(*args):
events0.append(args)
events1 = []
def second_watcher(*args):
events1.append(args)
with self.add_watcher(first_watcher):
with self.add_watcher(second_watcher):
def myfunc():
pass
event = (_testcapi.PYFUNC_EVENT_CREATE, myfunc, None)
self.assertIn(event, events0)
self.assertIn(event, events1)
def test_watcher_raises_error(self):
class MyError(Exception):
pass
def watcher(*args):
raise MyError("testing 123")
with self.add_watcher(watcher):
with catch_unraisable_exception() as cm:
def myfunc():
pass
self.assertIs(cm.unraisable.object, myfunc)
self.assertIsInstance(cm.unraisable.exc_value, MyError)
def test_clear_out_of_range_watcher_id(self):
with self.assertRaisesRegex(ValueError, r"invalid func watcher ID -1"):
_testcapi.clear_func_watcher(-1)
with self.assertRaisesRegex(ValueError, r"invalid func watcher ID 8"):
_testcapi.clear_func_watcher(8) # FUNC_MAX_WATCHERS = 8
def test_clear_unassigned_watcher_id(self):
with self.assertRaisesRegex(ValueError, r"no func watcher set for ID 1"):
_testcapi.clear_func_watcher(1)
def test_allocate_too_many_watchers(self):
with self.assertRaisesRegex(RuntimeError, r"no more func watcher IDs"):
_testcapi.allocate_too_many_func_watchers()
if __name__ == "__main__":
unittest.main()

View File

@ -0,0 +1,4 @@
Optimizing interpreters and JIT compilers may need to invalidate internal
metadata when functions are modified. This change adds the ability to
provide a callback that will be invoked each time a function is created,
modified, or destroyed.

View File

@ -1,5 +1,7 @@
#include "parts.h"
#define Py_BUILD_CORE
#include "pycore_function.h" // FUNC_MAX_WATCHERS
// Test dict watching
static PyObject *g_dict_watch_events;
@ -275,6 +277,223 @@ unwatch_type(PyObject *self, PyObject *args)
Py_RETURN_NONE;
}
// Test function watchers
#define NUM_FUNC_WATCHERS 2
static PyObject *pyfunc_watchers[NUM_FUNC_WATCHERS];
static int func_watcher_ids[NUM_FUNC_WATCHERS] = {-1, -1};
static PyObject *
get_id(PyObject *obj)
{
PyObject *builtins = PyEval_GetBuiltins(); // borrowed ref.
if (builtins == NULL) {
return NULL;
}
PyObject *id_str = PyUnicode_FromString("id");
if (id_str == NULL) {
return NULL;
}
PyObject *id_func = PyObject_GetItem(builtins, id_str);
Py_DECREF(id_str);
if (id_func == NULL) {
return NULL;
}
PyObject *stack[] = {obj};
PyObject *id = PyObject_Vectorcall(id_func, stack, 1, NULL);
Py_DECREF(id_func);
return id;
}
static int
call_pyfunc_watcher(PyObject *watcher, PyFunction_WatchEvent event,
PyFunctionObject *func, PyObject *new_value)
{
PyObject *event_obj = PyLong_FromLong(event);
if (event_obj == NULL) {
return -1;
}
if (new_value == NULL) {
new_value = Py_None;
}
Py_INCREF(new_value);
PyObject *func_or_id = NULL;
if (event == PyFunction_EVENT_DESTROY) {
/* Don't expose a function that's about to be destroyed to managed code */
func_or_id = get_id((PyObject *) func);
if (func_or_id == NULL) {
Py_DECREF(event_obj);
Py_DECREF(new_value);
return -1;
}
}
else {
Py_INCREF(func);
func_or_id = (PyObject *) func;
}
PyObject *stack[] = {event_obj, func_or_id, new_value};
PyObject *res = PyObject_Vectorcall(watcher, stack, 3, NULL);
int st = (res == NULL) ? -1 : 0;
Py_XDECREF(res);
Py_DECREF(new_value);
Py_DECREF(event_obj);
Py_DECREF(func_or_id);
return st;
}
static int
first_func_watcher_callback(PyFunction_WatchEvent event, PyFunctionObject *func,
PyObject *new_value)
{
return call_pyfunc_watcher(pyfunc_watchers[0], event, func, new_value);
}
static int
second_func_watcher_callback(PyFunction_WatchEvent event,
PyFunctionObject *func, PyObject *new_value)
{
return call_pyfunc_watcher(pyfunc_watchers[1], event, func, new_value);
}
static PyFunction_WatchCallback func_watcher_callbacks[NUM_FUNC_WATCHERS] = {
first_func_watcher_callback,
second_func_watcher_callback
};
static int
add_func_event(PyObject *module, const char *name, PyFunction_WatchEvent event)
{
PyObject *value = PyLong_FromLong(event);
if (value == NULL) {
return -1;
}
int ok = PyModule_AddObjectRef(module, name, value);
Py_DECREF(value);
return ok;
}
static PyObject *
add_func_watcher(PyObject *self, PyObject *func)
{
if (!PyFunction_Check(func)) {
PyErr_SetString(PyExc_TypeError, "'func' must be a function");
return NULL;
}
int idx = -1;
for (int i = 0; i < NUM_FUNC_WATCHERS; i++) {
if (func_watcher_ids[i] == -1) {
idx = i;
break;
}
}
if (idx == -1) {
PyErr_SetString(PyExc_RuntimeError, "no free watchers");
return NULL;
}
PyObject *result = PyLong_FromLong(idx);
if (result == NULL) {
return NULL;
}
func_watcher_ids[idx] = PyFunction_AddWatcher(func_watcher_callbacks[idx]);
if (func_watcher_ids[idx] < 0) {
Py_DECREF(result);
return NULL;
}
pyfunc_watchers[idx] = Py_NewRef(func);
return result;
}
static PyObject *
clear_func_watcher(PyObject *self, PyObject *watcher_id_obj)
{
long watcher_id = PyLong_AsLong(watcher_id_obj);
if ((watcher_id < INT_MIN) || (watcher_id > INT_MAX)) {
PyErr_SetString(PyExc_ValueError, "invalid watcher ID");
return NULL;
}
int wid = (int) watcher_id;
if (PyFunction_ClearWatcher(wid) < 0) {
return NULL;
}
int idx = -1;
for (int i = 0; i < NUM_FUNC_WATCHERS; i++) {
if (func_watcher_ids[i] == wid) {
idx = i;
break;
}
}
assert(idx != -1);
Py_CLEAR(pyfunc_watchers[idx]);
func_watcher_ids[idx] = -1;
Py_RETURN_NONE;
}
static int
noop_func_event_handler(PyFunction_WatchEvent event, PyFunctionObject *func,
PyObject *new_value)
{
return 0;
}
static PyObject *
allocate_too_many_func_watchers(PyObject *self, PyObject *args)
{
int watcher_ids[FUNC_MAX_WATCHERS + 1];
int num_watchers = 0;
for (unsigned long i = 0; i < sizeof(watcher_ids) / sizeof(int); i++) {
int watcher_id = PyFunction_AddWatcher(noop_func_event_handler);
if (watcher_id == -1) {
break;
}
watcher_ids[i] = watcher_id;
num_watchers++;
}
PyObject *type, *value, *traceback;
PyErr_Fetch(&type, &value, &traceback);
for (int i = 0; i < num_watchers; i++) {
if (PyFunction_ClearWatcher(watcher_ids[i]) < 0) {
PyErr_WriteUnraisable(Py_None);
break;
}
}
if (type) {
PyErr_Restore(type, value, traceback);
return NULL;
}
else if (PyErr_Occurred()) {
return NULL;
}
Py_RETURN_NONE;
}
static PyObject *
set_func_defaults(PyObject *self, PyObject *args)
{
PyObject *func = NULL;
PyObject *defaults = NULL;
if (!PyArg_ParseTuple(args, "OO", &func, &defaults)) {
return NULL;
}
if (PyFunction_SetDefaults(func, defaults) < 0) {
return NULL;
}
Py_RETURN_NONE;
}
static PyObject *
set_func_kwdefaults(PyObject *self, PyObject *args)
{
PyObject *func = NULL;
PyObject *kwdefaults = NULL;
if (!PyArg_ParseTuple(args, "OO", &func, &kwdefaults)) {
return NULL;
}
if (PyFunction_SetKwDefaults(func, kwdefaults) < 0) {
return NULL;
}
Py_RETURN_NONE;
}
static PyMethodDef test_methods[] = {
// Dict watchers.
{"add_dict_watcher", add_dict_watcher, METH_O, NULL},
@ -289,6 +508,14 @@ static PyMethodDef test_methods[] = {
{"watch_type", watch_type, METH_VARARGS, NULL},
{"unwatch_type", unwatch_type, METH_VARARGS, NULL},
{"get_type_modified_events", get_type_modified_events, METH_NOARGS, NULL},
// Function watchers.
{"add_func_watcher", add_func_watcher, METH_O, NULL},
{"clear_func_watcher", clear_func_watcher, METH_O, NULL},
{"set_func_defaults_via_capi", set_func_defaults, METH_VARARGS, NULL},
{"set_func_kwdefaults_via_capi", set_func_kwdefaults, METH_VARARGS, NULL},
{"allocate_too_many_func_watchers", allocate_too_many_func_watchers,
METH_NOARGS, NULL},
{NULL},
};
@ -298,5 +525,15 @@ _PyTestCapi_Init_Watchers(PyObject *mod)
if (PyModule_AddFunctions(mod, test_methods) < 0) {
return -1;
}
/* Expose each event as an attribute on the module */
#define ADD_EVENT(event) \
if (add_func_event(mod, "PYFUNC_EVENT_" #event, \
PyFunction_EVENT_##event)) { \
return -1; \
}
FOREACH_FUNC_EVENT(ADD_EVENT);
#undef ADD_EVENT
return 0;
}

View File

@ -3,10 +3,68 @@
#include "Python.h"
#include "pycore_ceval.h" // _PyEval_BuiltinsFromGlobals()
#include "pycore_function.h" // FUNC_MAX_WATCHERS
#include "pycore_object.h" // _PyObject_GC_UNTRACK()
#include "pycore_pyerrors.h" // _PyErr_Occurred()
#include "structmember.h" // PyMemberDef
static void
notify_func_watchers(PyInterpreterState *interp, PyFunction_WatchEvent event,
PyFunctionObject *func, PyObject *new_value)
{
for (int i = 0; i < FUNC_MAX_WATCHERS; i++) {
PyFunction_WatchCallback cb = interp->func_watchers[i];
if ((cb != NULL) && (cb(event, func, new_value) < 0)) {
PyErr_WriteUnraisable((PyObject *) func);
}
}
}
static inline void
handle_func_event(PyFunction_WatchEvent event, PyFunctionObject *func,
PyObject *new_value)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
if (interp->active_func_watchers) {
notify_func_watchers(interp, event, func, new_value);
}
}
int
PyFunction_AddWatcher(PyFunction_WatchCallback callback)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
assert(interp->_initialized);
for (int i = 0; i < FUNC_MAX_WATCHERS; i++) {
if (interp->func_watchers[i] == NULL) {
interp->func_watchers[i] = callback;
interp->active_func_watchers |= (1 << i);
return i;
}
}
PyErr_SetString(PyExc_RuntimeError, "no more func watcher IDs available");
return -1;
}
int
PyFunction_ClearWatcher(int watcher_id)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
if (watcher_id < 0 || watcher_id >= FUNC_MAX_WATCHERS) {
PyErr_Format(PyExc_ValueError, "invalid func watcher ID %d",
watcher_id);
return -1;
}
if (!interp->func_watchers[watcher_id]) {
PyErr_Format(PyExc_ValueError, "no func watcher set for ID %d",
watcher_id);
return -1;
}
interp->func_watchers[watcher_id] = NULL;
interp->active_func_watchers &= ~(1 << watcher_id);
return 0;
}
PyFunctionObject *
_PyFunction_FromConstructor(PyFrameConstructor *constr)
{
@ -31,6 +89,7 @@ _PyFunction_FromConstructor(PyFrameConstructor *constr)
op->vectorcall = _PyFunction_Vectorcall;
op->func_version = 0;
_PyObject_GC_TRACK(op);
handle_func_event(PyFunction_EVENT_CREATE, op, NULL);
return op;
}
@ -105,6 +164,7 @@ PyFunction_NewWithQualName(PyObject *code, PyObject *globals, PyObject *qualname
op->vectorcall = _PyFunction_Vectorcall;
op->func_version = 0;
_PyObject_GC_TRACK(op);
handle_func_event(PyFunction_EVENT_CREATE, op, NULL);
return (PyObject *)op;
error:
@ -196,6 +256,8 @@ PyFunction_SetDefaults(PyObject *op, PyObject *defaults)
PyErr_SetString(PyExc_SystemError, "non-tuple default args");
return -1;
}
handle_func_event(PyFunction_EVENT_MODIFY_DEFAULTS,
(PyFunctionObject *) op, defaults);
((PyFunctionObject *)op)->func_version = 0;
Py_XSETREF(((PyFunctionObject *)op)->func_defaults, defaults);
return 0;
@ -236,6 +298,8 @@ PyFunction_SetKwDefaults(PyObject *op, PyObject *defaults)
"non-dict keyword only default args");
return -1;
}
handle_func_event(PyFunction_EVENT_MODIFY_KWDEFAULTS,
(PyFunctionObject *) op, defaults);
((PyFunctionObject *)op)->func_version = 0;
Py_XSETREF(((PyFunctionObject *)op)->func_kwdefaults, defaults);
return 0;
@ -389,6 +453,7 @@ func_set_code(PyFunctionObject *op, PyObject *value, void *Py_UNUSED(ignored))
nclosure, nfree);
return -1;
}
handle_func_event(PyFunction_EVENT_MODIFY_CODE, op, value);
op->func_version = 0;
Py_XSETREF(op->func_code, Py_NewRef(value));
return 0;
@ -468,6 +533,7 @@ func_set_defaults(PyFunctionObject *op, PyObject *value, void *Py_UNUSED(ignored
return -1;
}
handle_func_event(PyFunction_EVENT_MODIFY_DEFAULTS, op, value);
op->func_version = 0;
Py_XSETREF(op->func_defaults, Py_XNewRef(value));
return 0;
@ -508,6 +574,7 @@ func_set_kwdefaults(PyFunctionObject *op, PyObject *value, void *Py_UNUSED(ignor
return -1;
}
handle_func_event(PyFunction_EVENT_MODIFY_KWDEFAULTS, op, value);
op->func_version = 0;
Py_XSETREF(op->func_kwdefaults, Py_XNewRef(value));
return 0;
@ -687,6 +754,7 @@ func_clear(PyFunctionObject *op)
static void
func_dealloc(PyFunctionObject *op)
{
handle_func_event(PyFunction_EVENT_DESTROY, op, NULL);
_PyObject_GC_UNTRACK(op);
if (op->func_weakreflist != NULL) {
PyObject_ClearWeakRefs((PyObject *) op);

View File

@ -461,6 +461,11 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate)
interp->dict_state.watchers[i] = NULL;
}
for (int i=0; i < FUNC_MAX_WATCHERS; i++) {
interp->func_watchers[i] = NULL;
}
interp->active_func_watchers = 0;
// XXX Once we have one allocator per interpreter (i.e.
// per-interpreter GC) we must ensure that all of the interpreter's
// objects have been cleaned up at the point.