gh-119258: Eliminate Type Guards in Tier 2 Optimizer with Watcher (GH-119365)

Co-authored-by: parmeggiani <parmeggiani@spaziodati.eu>
Co-authored-by: dpdani <git@danieleparmeggiani.me>
Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com>
Co-authored-by: Brandt Bucher <brandtbucher@microsoft.com>
Co-authored-by: Ken Jin <kenjin@python.org>
This commit is contained in:
Saul Shanabrook 2024-06-08 05:41:45 -04:00 committed by GitHub
parent 2080425154
commit 55402d3232
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 366 additions and 59 deletions

View File

@ -33,6 +33,7 @@ struct _Py_UopsSymbol {
int flags; // 0 bits: Top; 2 or more bits: Bottom int flags; // 0 bits: Top; 2 or more bits: Bottom
PyTypeObject *typ; // Borrowed reference PyTypeObject *typ; // Borrowed reference
PyObject *const_val; // Owned reference (!) PyObject *const_val; // Owned reference (!)
unsigned int type_version; // currently stores type version
}; };
#define UOP_FORMAT_TARGET 0 #define UOP_FORMAT_TARGET 0
@ -123,9 +124,11 @@ extern _Py_UopsSymbol *_Py_uop_sym_new_const(_Py_UOpsContext *ctx, PyObject *con
extern _Py_UopsSymbol *_Py_uop_sym_new_null(_Py_UOpsContext *ctx); extern _Py_UopsSymbol *_Py_uop_sym_new_null(_Py_UOpsContext *ctx);
extern bool _Py_uop_sym_has_type(_Py_UopsSymbol *sym); extern bool _Py_uop_sym_has_type(_Py_UopsSymbol *sym);
extern bool _Py_uop_sym_matches_type(_Py_UopsSymbol *sym, PyTypeObject *typ); extern bool _Py_uop_sym_matches_type(_Py_UopsSymbol *sym, PyTypeObject *typ);
extern bool _Py_uop_sym_matches_type_version(_Py_UopsSymbol *sym, unsigned int version);
extern void _Py_uop_sym_set_null(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym); extern void _Py_uop_sym_set_null(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym);
extern void _Py_uop_sym_set_non_null(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym); extern void _Py_uop_sym_set_non_null(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym);
extern void _Py_uop_sym_set_type(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, PyTypeObject *typ); extern void _Py_uop_sym_set_type(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, PyTypeObject *typ);
extern bool _Py_uop_sym_set_type_version(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, unsigned int version);
extern void _Py_uop_sym_set_const(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, PyObject *const_val); extern void _Py_uop_sym_set_const(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, PyObject *const_val);
extern bool _Py_uop_sym_is_bottom(_Py_UopsSymbol *sym); extern bool _Py_uop_sym_is_bottom(_Py_UopsSymbol *sym);
extern int _Py_uop_sym_truthiness(_Py_UopsSymbol *sym); extern int _Py_uop_sym_truthiness(_Py_UopsSymbol *sym);
@ -138,9 +141,9 @@ extern void _Py_uop_abstractcontext_fini(_Py_UOpsContext *ctx);
extern _Py_UOpsAbstractFrame *_Py_uop_frame_new( extern _Py_UOpsAbstractFrame *_Py_uop_frame_new(
_Py_UOpsContext *ctx, _Py_UOpsContext *ctx,
PyCodeObject *co, PyCodeObject *co,
_Py_UopsSymbol **localsplus_start, int curr_stackentries,
int n_locals_already_filled, _Py_UopsSymbol **args,
int curr_stackentries); int arg_len);
extern int _Py_uop_frame_pop(_Py_UOpsContext *ctx); extern int _Py_uop_frame_pop(_Py_UOpsContext *ctx);
PyAPI_FUNC(PyObject *) _Py_uop_symbols_test(PyObject *self, PyObject *ignored); PyAPI_FUNC(PyObject *) _Py_uop_symbols_test(PyObject *self, PyObject *ignored);

View File

@ -63,6 +63,8 @@ typedef struct {
PyObject *tp_weaklist; PyObject *tp_weaklist;
} managed_static_type_state; } managed_static_type_state;
#define TYPE_VERSION_CACHE_SIZE (1<<12) /* Must be a power of 2 */
struct types_state { struct types_state {
/* Used to set PyTypeObject.tp_version_tag. /* Used to set PyTypeObject.tp_version_tag.
It starts at _Py_MAX_GLOBAL_TYPE_VERSION_TAG + 1, It starts at _Py_MAX_GLOBAL_TYPE_VERSION_TAG + 1,
@ -118,6 +120,12 @@ struct types_state {
managed_static_type_state initialized[_Py_MAX_MANAGED_STATIC_EXT_TYPES]; managed_static_type_state initialized[_Py_MAX_MANAGED_STATIC_EXT_TYPES];
} for_extensions; } for_extensions;
PyMutex mutex; PyMutex mutex;
// Borrowed references to type objects whose
// tp_version_tag % TYPE_VERSION_CACHE_SIZE
// once was equal to the index in the table.
// They are cleared when the type object is deallocated.
PyTypeObject *type_version_cache[TYPE_VERSION_CACHE_SIZE];
}; };
@ -230,6 +238,9 @@ extern void _PyType_SetFlags(PyTypeObject *self, unsigned long mask,
extern void _PyType_SetFlagsRecursive(PyTypeObject *self, unsigned long mask, extern void _PyType_SetFlagsRecursive(PyTypeObject *self, unsigned long mask,
unsigned long flags); unsigned long flags);
extern unsigned int _PyType_GetVersionForCurrentState(PyTypeObject *tp);
PyAPI_FUNC(void) _PyType_SetVersion(PyTypeObject *tp, unsigned int version);
PyTypeObject *_PyType_LookupByVersion(unsigned int version);
#ifdef __cplusplus #ifdef __cplusplus
} }

View File

@ -1333,6 +1333,153 @@ class TestUopsOptimization(unittest.TestCase):
self.assertIs(type(s), float) self.assertIs(type(s), float)
self.assertEqual(s, 1024.0) self.assertEqual(s, 1024.0)
def test_guard_type_version_removed(self):
def thing(a):
x = 0
for _ in range(100):
x += a.attr
x += a.attr
return x
class Foo:
attr = 1
res, ex = self._run_with_optimizer(thing, Foo())
opnames = list(iter_opnames(ex))
self.assertIsNotNone(ex)
self.assertEqual(res, 200)
guard_type_version_count = opnames.count("_GUARD_TYPE_VERSION")
self.assertEqual(guard_type_version_count, 1)
def test_guard_type_version_removed_inlined(self):
"""
Verify that the guard type version if we have an inlined function
"""
def fn():
pass
def thing(a):
x = 0
for _ in range(100):
x += a.attr
fn()
x += a.attr
return x
class Foo:
attr = 1
res, ex = self._run_with_optimizer(thing, Foo())
opnames = list(iter_opnames(ex))
self.assertIsNotNone(ex)
self.assertEqual(res, 200)
guard_type_version_count = opnames.count("_GUARD_TYPE_VERSION")
self.assertEqual(guard_type_version_count, 1)
def test_guard_type_version_not_removed(self):
"""
Verify that the guard type version is not removed if we modify the class
"""
def thing(a):
x = 0
for i in range(100):
x += a.attr
# for the first 90 iterations we set the attribute on this dummy function which shouldn't
# trigger the type watcher
# then after 90 it should trigger it and stop optimizing
# Note that the code needs to be in this weird form so it's optimized inline without any control flow
setattr((Foo, Bar)[i < 90], "attr", 2)
x += a.attr
return x
class Foo:
attr = 1
class Bar:
pass
res, ex = self._run_with_optimizer(thing, Foo())
opnames = list(iter_opnames(ex))
self.assertIsNotNone(ex)
self.assertEqual(res, 219)
guard_type_version_count = opnames.count("_GUARD_TYPE_VERSION")
self.assertEqual(guard_type_version_count, 2)
@unittest.expectedFailure
def test_guard_type_version_not_removed_escaping(self):
"""
Verify that the guard type version is not removed if have an escaping function
"""
def thing(a):
x = 0
for i in range(100):
x += a.attr
# eval should be escaping and so should cause optimization to stop and preserve both type versions
eval("None")
x += a.attr
return x
class Foo:
attr = 1
res, ex = self._run_with_optimizer(thing, Foo())
opnames = list(iter_opnames(ex))
self.assertIsNotNone(ex)
self.assertEqual(res, 200)
guard_type_version_count = opnames.count("_GUARD_TYPE_VERSION")
# Note: This will actually be 1 for noe
# https://github.com/python/cpython/pull/119365#discussion_r1626220129
self.assertEqual(guard_type_version_count, 2)
def test_guard_type_version_executor_invalidated(self):
"""
Verify that the executor is invalided on a type change.
"""
def thing(a):
x = 0
for i in range(100):
x += a.attr
x += a.attr
return x
class Foo:
attr = 1
res, ex = self._run_with_optimizer(thing, Foo())
self.assertEqual(res, 200)
self.assertIsNotNone(ex)
self.assertEqual(list(iter_opnames(ex)).count("_GUARD_TYPE_VERSION"), 1)
self.assertTrue(ex.is_valid())
Foo.attr = 0
self.assertFalse(ex.is_valid())
def test_type_version_doesnt_segfault(self):
"""
Tests that setting a type version doesn't cause a segfault when later looking at the stack.
"""
# Minimized from mdp.py benchmark
class A:
def __init__(self):
self.attr = {}
def method(self, arg):
self.attr[arg] = None
def fn(a):
for _ in range(100):
(_ for _ in [])
(_ for _ in [a.method(None)])
fn(A())
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()

View File

@ -282,8 +282,10 @@ class TestTypeWatchers(unittest.TestCase):
self.watch(wid, C) self.watch(wid, C)
with catch_unraisable_exception() as cm: with catch_unraisable_exception() as cm:
C.foo = "bar" C.foo = "bar"
self.assertEqual(cm.unraisable.err_msg, self.assertEqual(
f"Exception ignored in type watcher callback #0 for {C!r}") cm.unraisable.err_msg,
f"Exception ignored in type watcher callback #1 for {C!r}",
)
self.assertIs(cm.unraisable.object, None) self.assertIs(cm.unraisable.object, None)
self.assertEqual(str(cm.unraisable.exc_value), "boom!") self.assertEqual(str(cm.unraisable.exc_value), "boom!")
self.assert_events([]) self.assert_events([])

View File

@ -10,8 +10,9 @@ except ImportError:
# Skip this test if the _testcapi module isn't available. # Skip this test if the _testcapi module isn't available.
_testcapi = import_helper.import_module("_testcapi") _testcapi = import_helper.import_module("_testcapi")
_testinternalcapi = import_helper.import_module("_testinternalcapi")
type_get_version = _testcapi.type_get_version type_get_version = _testcapi.type_get_version
type_assign_specific_version_unsafe = _testcapi.type_assign_specific_version_unsafe type_assign_specific_version_unsafe = _testinternalcapi.type_assign_specific_version_unsafe
type_assign_version = _testcapi.type_assign_version type_assign_version = _testcapi.type_assign_version
type_modified = _testcapi.type_modified type_modified = _testcapi.type_modified

View File

@ -0,0 +1,3 @@
Eliminate type version guards in the tier two interpreter.
Note that setting the ``tp_version_tag`` manually (which has never been supported) may result in crashes.

View File

@ -2403,21 +2403,6 @@ type_modified(PyObject *self, PyObject *type)
Py_RETURN_NONE; Py_RETURN_NONE;
} }
// Circumvents standard version assignment machinery - use with caution and only on
// short-lived heap types
static PyObject *
type_assign_specific_version_unsafe(PyObject *self, PyObject *args)
{
PyTypeObject *type;
unsigned int version;
if (!PyArg_ParseTuple(args, "Oi:type_assign_specific_version_unsafe", &type, &version)) {
return NULL;
}
assert(!PyType_HasFeature(type, Py_TPFLAGS_IMMUTABLETYPE));
type->tp_version_tag = version;
type->tp_flags |= Py_TPFLAGS_VALID_VERSION_TAG;
Py_RETURN_NONE;
}
static PyObject * static PyObject *
type_assign_version(PyObject *self, PyObject *type) type_assign_version(PyObject *self, PyObject *type)
@ -3427,8 +3412,6 @@ static PyMethodDef TestMethods[] = {
{"test_py_is_funcs", test_py_is_funcs, METH_NOARGS}, {"test_py_is_funcs", test_py_is_funcs, METH_NOARGS},
{"type_get_version", type_get_version, METH_O, PyDoc_STR("type->tp_version_tag")}, {"type_get_version", type_get_version, METH_O, PyDoc_STR("type->tp_version_tag")},
{"type_modified", type_modified, METH_O, PyDoc_STR("PyType_Modified")}, {"type_modified", type_modified, METH_O, PyDoc_STR("PyType_Modified")},
{"type_assign_specific_version_unsafe", type_assign_specific_version_unsafe, METH_VARARGS,
PyDoc_STR("forcefully assign type->tp_version_tag")},
{"type_assign_version", type_assign_version, METH_O, PyDoc_STR("PyUnstable_Type_AssignVersionTag")}, {"type_assign_version", type_assign_version, METH_O, PyDoc_STR("PyUnstable_Type_AssignVersionTag")},
{"type_get_tp_bases", type_get_tp_bases, METH_O}, {"type_get_tp_bases", type_get_tp_bases, METH_O},
{"type_get_tp_mro", type_get_tp_mro, METH_O}, {"type_get_tp_mro", type_get_tp_mro, METH_O},

View File

@ -2002,6 +2002,22 @@ has_inline_values(PyObject *self, PyObject *obj)
} }
// Circumvents standard version assignment machinery - use with caution and only on
// short-lived heap types
static PyObject *
type_assign_specific_version_unsafe(PyObject *self, PyObject *args)
{
PyTypeObject *type;
unsigned int version;
if (!PyArg_ParseTuple(args, "Oi:type_assign_specific_version_unsafe", &type, &version)) {
return NULL;
}
assert(!PyType_HasFeature(type, Py_TPFLAGS_IMMUTABLETYPE));
_PyType_SetVersion(type, version);
type->tp_flags |= Py_TPFLAGS_VALID_VERSION_TAG;
Py_RETURN_NONE;
}
/*[clinic input] /*[clinic input]
gh_119213_getargs gh_119213_getargs
@ -2102,6 +2118,9 @@ static PyMethodDef module_functions[] = {
{"get_rare_event_counters", get_rare_event_counters, METH_NOARGS}, {"get_rare_event_counters", get_rare_event_counters, METH_NOARGS},
{"reset_rare_event_counters", reset_rare_event_counters, METH_NOARGS}, {"reset_rare_event_counters", reset_rare_event_counters, METH_NOARGS},
{"has_inline_values", has_inline_values, METH_O}, {"has_inline_values", has_inline_values, METH_O},
{"type_assign_specific_version_unsafe", type_assign_specific_version_unsafe, METH_VARARGS,
PyDoc_STR("forcefully assign type->tp_version_tag")},
#ifdef Py_GIL_DISABLED #ifdef Py_GIL_DISABLED
{"py_thread_id", get_py_thread_id, METH_NOARGS}, {"py_thread_id", get_py_thread_id, METH_NOARGS},
#endif #endif

View File

@ -853,7 +853,8 @@ PyType_AddWatcher(PyType_WatchCallback callback)
{ {
PyInterpreterState *interp = _PyInterpreterState_GET(); PyInterpreterState *interp = _PyInterpreterState_GET();
for (int i = 0; i < TYPE_MAX_WATCHERS; i++) { // start at 1, 0 is reserved for cpython optimizer
for (int i = 1; i < TYPE_MAX_WATCHERS; i++) {
if (!interp->type_watchers[i]) { if (!interp->type_watchers[i]) {
interp->type_watchers[i] = callback; interp->type_watchers[i] = callback;
return i; return i;
@ -960,7 +961,7 @@ type_modification_starting_unlocked(PyTypeObject *type)
} }
/* 0 is not a valid version tag */ /* 0 is not a valid version tag */
_Py_atomic_store_uint32_release(&type->tp_version_tag, 0); _PyType_SetVersion(type, 0);
} }
#endif #endif
@ -1024,7 +1025,7 @@ type_modified_unlocked(PyTypeObject *type)
} }
type->tp_flags &= ~Py_TPFLAGS_VALID_VERSION_TAG; type->tp_flags &= ~Py_TPFLAGS_VALID_VERSION_TAG;
FT_ATOMIC_STORE_UINT32_RELAXED(type->tp_version_tag, 0); /* 0 is not a valid version tag */ _PyType_SetVersion(type, 0); /* 0 is not a valid version tag */
if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) { if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
// This field *must* be invalidated if the type is modified (see the // This field *must* be invalidated if the type is modified (see the
// comment on struct _specialization_cache): // comment on struct _specialization_cache):
@ -1101,7 +1102,7 @@ type_mro_modified(PyTypeObject *type, PyObject *bases) {
clear: clear:
assert(!(type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN)); assert(!(type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN));
type->tp_flags &= ~Py_TPFLAGS_VALID_VERSION_TAG; type->tp_flags &= ~Py_TPFLAGS_VALID_VERSION_TAG;
FT_ATOMIC_STORE_UINT32_RELAXED(type->tp_version_tag, 0); /* 0 is not a valid version tag */ _PyType_SetVersion(type, 0); /* 0 is not a valid version tag */
if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) { if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
// This field *must* be invalidated if the type is modified (see the // This field *must* be invalidated if the type is modified (see the
// comment on struct _specialization_cache): // comment on struct _specialization_cache):
@ -1109,6 +1110,64 @@ type_mro_modified(PyTypeObject *type, PyObject *bases) {
} }
} }
/*
The Tier 2 interpreter requires looking up the type object by the type version, so it can install
watchers to understand when they change.
So we add a global cache from type version to borrowed references of type objects.
This is similar to func_version_cache.
*/
void
_PyType_SetVersion(PyTypeObject *tp, unsigned int version)
{
#ifndef Py_GIL_DISABLED
PyInterpreterState *interp = _PyInterpreterState_GET();
// lookup the old version and set to null
if (tp->tp_version_tag != 0) {
PyTypeObject **slot =
interp->types.type_version_cache
+ (tp->tp_version_tag % TYPE_VERSION_CACHE_SIZE);
*slot = NULL;
}
#endif
FT_ATOMIC_STORE_UINT32_RELAXED(tp->tp_version_tag, version);
#ifndef Py_GIL_DISABLED
if (version != 0) {
PyTypeObject **slot =
interp->types.type_version_cache
+ (version % TYPE_VERSION_CACHE_SIZE);
*slot = tp;
}
#endif
}
PyTypeObject *
_PyType_LookupByVersion(unsigned int version)
{
#ifdef Py_GIL_DISABLED
return NULL;
#else
PyInterpreterState *interp = _PyInterpreterState_GET();
PyTypeObject **slot =
interp->types.type_version_cache
+ (version % TYPE_VERSION_CACHE_SIZE);
if (*slot && (*slot)->tp_version_tag == version) {
return *slot;
}
return NULL;
#endif
}
unsigned int
_PyType_GetVersionForCurrentState(PyTypeObject *tp)
{
return tp->tp_version_tag;
}
#define MAX_VERSIONS_PER_CLASS 1000 #define MAX_VERSIONS_PER_CLASS 1000
static int static int
@ -1137,8 +1196,7 @@ assign_version_tag(PyInterpreterState *interp, PyTypeObject *type)
/* We have run out of version numbers */ /* We have run out of version numbers */
return 0; return 0;
} }
FT_ATOMIC_STORE_UINT32_RELAXED(type->tp_version_tag, _PyType_SetVersion(type, NEXT_GLOBAL_VERSION_TAG++);
NEXT_GLOBAL_VERSION_TAG++);
assert (type->tp_version_tag <= _Py_MAX_GLOBAL_TYPE_VERSION_TAG); assert (type->tp_version_tag <= _Py_MAX_GLOBAL_TYPE_VERSION_TAG);
} }
else { else {
@ -1147,8 +1205,7 @@ assign_version_tag(PyInterpreterState *interp, PyTypeObject *type)
/* We have run out of version numbers */ /* We have run out of version numbers */
return 0; return 0;
} }
FT_ATOMIC_STORE_UINT32_RELAXED(type->tp_version_tag, _PyType_SetVersion(type, NEXT_VERSION_TAG(interp)++);
NEXT_VERSION_TAG(interp)++);
assert (type->tp_version_tag != 0); assert (type->tp_version_tag != 0);
} }
@ -5768,7 +5825,7 @@ fini_static_type(PyInterpreterState *interp, PyTypeObject *type,
if (final) { if (final) {
type->tp_flags &= ~Py_TPFLAGS_READY; type->tp_flags &= ~Py_TPFLAGS_READY;
type->tp_flags &= ~Py_TPFLAGS_VALID_VERSION_TAG; type->tp_flags &= ~Py_TPFLAGS_VALID_VERSION_TAG;
type->tp_version_tag = 0; _PyType_SetVersion(type, 0);
} }
_PyStaticType_ClearWeakRefs(interp, type); _PyStaticType_ClearWeakRefs(interp, type);
@ -5798,7 +5855,6 @@ type_dealloc(PyObject *self)
_PyObject_ASSERT((PyObject *)type, type->tp_flags & Py_TPFLAGS_HEAPTYPE); _PyObject_ASSERT((PyObject *)type, type->tp_flags & Py_TPFLAGS_HEAPTYPE);
_PyObject_GC_UNTRACK(type); _PyObject_GC_UNTRACK(type);
type_dealloc_common(type); type_dealloc_common(type);
// PyObject_ClearWeakRefs() raises an exception if Py_REFCNT() != 0 // PyObject_ClearWeakRefs() raises an exception if Py_REFCNT() != 0
@ -8367,7 +8423,7 @@ init_static_type(PyInterpreterState *interp, PyTypeObject *self,
self->tp_flags |= Py_TPFLAGS_IMMUTABLETYPE; self->tp_flags |= Py_TPFLAGS_IMMUTABLETYPE;
assert(NEXT_GLOBAL_VERSION_TAG <= _Py_MAX_GLOBAL_TYPE_VERSION_TAG); assert(NEXT_GLOBAL_VERSION_TAG <= _Py_MAX_GLOBAL_TYPE_VERSION_TAG);
self->tp_version_tag = NEXT_GLOBAL_VERSION_TAG++; _PyType_SetVersion(self, NEXT_GLOBAL_VERSION_TAG++);
self->tp_flags |= Py_TPFLAGS_VALID_VERSION_TAG; self->tp_flags |= Py_TPFLAGS_VALID_VERSION_TAG;
} }
else { else {

View File

@ -79,6 +79,7 @@ increment_mutations(PyObject* dict) {
* so we don't need to check that they haven't been used */ * so we don't need to check that they haven't been used */
#define BUILTINS_WATCHER_ID 0 #define BUILTINS_WATCHER_ID 0
#define GLOBALS_WATCHER_ID 1 #define GLOBALS_WATCHER_ID 1
#define TYPE_WATCHER_ID 0
static int static int
globals_watcher_callback(PyDict_WatchEvent event, PyObject* dict, globals_watcher_callback(PyDict_WatchEvent event, PyObject* dict,
@ -92,6 +93,14 @@ globals_watcher_callback(PyDict_WatchEvent event, PyObject* dict,
return 0; return 0;
} }
static int
type_watcher_callback(PyTypeObject* type)
{
_Py_Executors_InvalidateDependency(_PyInterpreterState_GET(), type, 1);
PyType_Unwatch(TYPE_WATCHER_ID, (PyObject *)type);
return 0;
}
static PyObject * static PyObject *
convert_global_to_const(_PyUOpInstruction *inst, PyObject *obj) convert_global_to_const(_PyUOpInstruction *inst, PyObject *obj)
{ {
@ -167,6 +176,9 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer,
if (interp->dict_state.watchers[GLOBALS_WATCHER_ID] == NULL) { if (interp->dict_state.watchers[GLOBALS_WATCHER_ID] == NULL) {
interp->dict_state.watchers[GLOBALS_WATCHER_ID] = globals_watcher_callback; interp->dict_state.watchers[GLOBALS_WATCHER_ID] = globals_watcher_callback;
} }
if (interp->type_watchers[TYPE_WATCHER_ID] == NULL) {
interp->type_watchers[TYPE_WATCHER_ID] = type_watcher_callback;
}
for (int pc = 0; pc < buffer_size; pc++) { for (int pc = 0; pc < buffer_size; pc++) {
_PyUOpInstruction *inst = &buffer[pc]; _PyUOpInstruction *inst = &buffer[pc];
int opcode = inst->opcode; int opcode = inst->opcode;
@ -310,9 +322,11 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer,
#define sym_has_type _Py_uop_sym_has_type #define sym_has_type _Py_uop_sym_has_type
#define sym_get_type _Py_uop_sym_get_type #define sym_get_type _Py_uop_sym_get_type
#define sym_matches_type _Py_uop_sym_matches_type #define sym_matches_type _Py_uop_sym_matches_type
#define sym_matches_type_version _Py_uop_sym_matches_type_version
#define sym_set_null(SYM) _Py_uop_sym_set_null(ctx, SYM) #define sym_set_null(SYM) _Py_uop_sym_set_null(ctx, SYM)
#define sym_set_non_null(SYM) _Py_uop_sym_set_non_null(ctx, SYM) #define sym_set_non_null(SYM) _Py_uop_sym_set_non_null(ctx, SYM)
#define sym_set_type(SYM, TYPE) _Py_uop_sym_set_type(ctx, SYM, TYPE) #define sym_set_type(SYM, TYPE) _Py_uop_sym_set_type(ctx, SYM, TYPE)
#define sym_set_type_version(SYM, VERSION) _Py_uop_sym_set_type_version(ctx, SYM, VERSION)
#define sym_set_const(SYM, CNST) _Py_uop_sym_set_const(ctx, SYM, CNST) #define sym_set_const(SYM, CNST) _Py_uop_sym_set_const(ctx, SYM, CNST)
#define sym_is_bottom _Py_uop_sym_is_bottom #define sym_is_bottom _Py_uop_sym_is_bottom
#define sym_truthiness _Py_uop_sym_truthiness #define sym_truthiness _Py_uop_sym_truthiness
@ -395,7 +409,7 @@ optimize_uops(
_PyUOpInstruction *corresponding_check_stack = NULL; _PyUOpInstruction *corresponding_check_stack = NULL;
_Py_uop_abstractcontext_init(ctx); _Py_uop_abstractcontext_init(ctx);
_Py_UOpsAbstractFrame *frame = _Py_uop_frame_new(ctx, co, ctx->n_consumed, 0, curr_stacklen); _Py_UOpsAbstractFrame *frame = _Py_uop_frame_new(ctx, co, curr_stacklen, NULL, 0);
if (frame == NULL) { if (frame == NULL) {
return -1; return -1;
} }

View File

@ -21,11 +21,13 @@ typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame;
#define sym_new_const _Py_uop_sym_new_const #define sym_new_const _Py_uop_sym_new_const
#define sym_new_null _Py_uop_sym_new_null #define sym_new_null _Py_uop_sym_new_null
#define sym_matches_type _Py_uop_sym_matches_type #define sym_matches_type _Py_uop_sym_matches_type
#define sym_matches_type_version _Py_uop_sym_matches_type_version
#define sym_get_type _Py_uop_sym_get_type #define sym_get_type _Py_uop_sym_get_type
#define sym_has_type _Py_uop_sym_has_type #define sym_has_type _Py_uop_sym_has_type
#define sym_set_null(SYM) _Py_uop_sym_set_null(ctx, SYM) #define sym_set_null(SYM) _Py_uop_sym_set_null(ctx, SYM)
#define sym_set_non_null(SYM) _Py_uop_sym_set_non_null(ctx, SYM) #define sym_set_non_null(SYM) _Py_uop_sym_set_non_null(ctx, SYM)
#define sym_set_type(SYM, TYPE) _Py_uop_sym_set_type(ctx, SYM, TYPE) #define sym_set_type(SYM, TYPE) _Py_uop_sym_set_type(ctx, SYM, TYPE)
#define sym_set_type_version(SYM, VERSION) _Py_uop_sym_set_type_version(ctx, SYM, VERSION)
#define sym_set_const(SYM, CNST) _Py_uop_sym_set_const(ctx, SYM, CNST) #define sym_set_const(SYM, CNST) _Py_uop_sym_set_const(ctx, SYM, CNST)
#define sym_is_bottom _Py_uop_sym_is_bottom #define sym_is_bottom _Py_uop_sym_is_bottom
#define frame_new _Py_uop_frame_new #define frame_new _Py_uop_frame_new
@ -113,6 +115,29 @@ dummy_func(void) {
sym_set_type(right, &PyLong_Type); sym_set_type(right, &PyLong_Type);
} }
op(_GUARD_TYPE_VERSION, (type_version/2, owner -- owner)) {
assert(type_version);
if (sym_matches_type_version(owner, type_version)) {
REPLACE_OP(this_instr, _NOP, 0, 0);
} else {
// add watcher so that whenever the type changes we invalidate this
PyTypeObject *type = _PyType_LookupByVersion(type_version);
// if the type is null, it was not found in the cache (there was a conflict)
// with the key, in which case we can't trust the version
if (type) {
// if the type version was set properly, then add a watcher
// if it wasn't this means that the type version was previously set to something else
// and we set the owner to bottom, so we don't need to add a watcher because we must have
// already added one earlier.
if (sym_set_type_version(owner, type_version)) {
PyType_Watch(TYPE_WATCHER_ID, (PyObject *)type);
_Py_BloomFilter_Add(dependencies, type);
}
}
}
}
op(_GUARD_BOTH_FLOAT, (left, right -- left, right)) { op(_GUARD_BOTH_FLOAT, (left, right -- left, right)) {
if (sym_matches_type(left, &PyFloat_Type)) { if (sym_matches_type(left, &PyFloat_Type)) {
if (sym_matches_type(right, &PyFloat_Type)) { if (sym_matches_type(right, &PyFloat_Type)) {
@ -563,16 +588,12 @@ dummy_func(void) {
argcount++; argcount++;
} }
_Py_UopsSymbol **localsplus_start = ctx->n_consumed;
int n_locals_already_filled = 0;
// Can determine statically, so we interleave the new locals
// and make the current stack the new locals.
// This also sets up for true call inlining.
if (sym_is_null(self_or_null) || sym_is_not_null(self_or_null)) { if (sym_is_null(self_or_null) || sym_is_not_null(self_or_null)) {
localsplus_start = args; new_frame = frame_new(ctx, co, 0, args, argcount);
n_locals_already_filled = argcount; } else {
new_frame = frame_new(ctx, co, 0, NULL, 0);
} }
new_frame = frame_new(ctx, co, localsplus_start, n_locals_already_filled, 0);
} }
op(_PY_FRAME_GENERAL, (callable, self_or_null, args[oparg] -- new_frame: _Py_UOpsAbstractFrame *)) { op(_PY_FRAME_GENERAL, (callable, self_or_null, args[oparg] -- new_frame: _Py_UOpsAbstractFrame *)) {

View File

@ -930,6 +930,28 @@
} }
case _GUARD_TYPE_VERSION: { case _GUARD_TYPE_VERSION: {
_Py_UopsSymbol *owner;
owner = stack_pointer[-1];
uint32_t type_version = (uint32_t)this_instr->operand;
assert(type_version);
if (sym_matches_type_version(owner, type_version)) {
REPLACE_OP(this_instr, _NOP, 0, 0);
} else {
// add watcher so that whenever the type changes we invalidate this
PyTypeObject *type = _PyType_LookupByVersion(type_version);
// if the type is null, it was not found in the cache (there was a conflict)
// with the key, in which case we can't trust the version
if (type) {
// if the type version was set properly, then add a watcher
// if it wasn't this means that the type version was previously set to something else
// and we set the owner to bottom, so we don't need to add a watcher because we must have
// already added one earlier.
if (sym_set_type_version(owner, type_version)) {
PyType_Watch(TYPE_WATCHER_ID, (PyObject *)type);
_Py_BloomFilter_Add(dependencies, type);
}
}
}
break; break;
} }
@ -1583,16 +1605,11 @@
args--; args--;
argcount++; argcount++;
} }
_Py_UopsSymbol **localsplus_start = ctx->n_consumed;
int n_locals_already_filled = 0;
// Can determine statically, so we interleave the new locals
// and make the current stack the new locals.
// This also sets up for true call inlining.
if (sym_is_null(self_or_null) || sym_is_not_null(self_or_null)) { if (sym_is_null(self_or_null) || sym_is_not_null(self_or_null)) {
localsplus_start = args; new_frame = frame_new(ctx, co, 0, args, argcount);
n_locals_already_filled = argcount; } else {
new_frame = frame_new(ctx, co, 0, NULL, 0);
} }
new_frame = frame_new(ctx, co, localsplus_start, n_locals_already_filled, 0);
stack_pointer[-2 - oparg] = (_Py_UopsSymbol *)new_frame; stack_pointer[-2 - oparg] = (_Py_UopsSymbol *)new_frame;
stack_pointer += -1 - oparg; stack_pointer += -1 - oparg;
break; break;

View File

@ -52,7 +52,8 @@ static inline int get_lltrace(void) {
static _Py_UopsSymbol NO_SPACE_SYMBOL = { static _Py_UopsSymbol NO_SPACE_SYMBOL = {
.flags = IS_NULL | NOT_NULL | NO_SPACE, .flags = IS_NULL | NOT_NULL | NO_SPACE,
.typ = NULL, .typ = NULL,
.const_val = NULL .const_val = NULL,
.type_version = 0,
}; };
_Py_UopsSymbol * _Py_UopsSymbol *
@ -76,6 +77,7 @@ sym_new(_Py_UOpsContext *ctx)
self->flags = 0; self->flags = 0;
self->typ = NULL; self->typ = NULL;
self->const_val = NULL; self->const_val = NULL;
self->type_version = 0;
return self; return self;
} }
@ -152,6 +154,18 @@ _Py_uop_sym_set_type(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, PyTypeObject *ty
} }
} }
bool
_Py_uop_sym_set_type_version(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, unsigned int version)
{
// if the type version was already set, then it must be different and we should set it to bottom
if (sym->type_version) {
sym_set_bottom(ctx, sym);
return false;
}
sym->type_version = version;
return true;
}
void void
_Py_uop_sym_set_const(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, PyObject *const_val) _Py_uop_sym_set_const(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, PyObject *const_val)
{ {
@ -256,6 +270,12 @@ _Py_uop_sym_get_type(_Py_UopsSymbol *sym)
return sym->typ; return sym->typ;
} }
unsigned int
_Py_uop_sym_get_type_version(_Py_UopsSymbol *sym)
{
return sym->type_version;
}
bool bool
_Py_uop_sym_has_type(_Py_UopsSymbol *sym) _Py_uop_sym_has_type(_Py_UopsSymbol *sym)
{ {
@ -272,6 +292,13 @@ _Py_uop_sym_matches_type(_Py_UopsSymbol *sym, PyTypeObject *typ)
return _Py_uop_sym_get_type(sym) == typ; return _Py_uop_sym_get_type(sym) == typ;
} }
bool
_Py_uop_sym_matches_type_version(_Py_UopsSymbol *sym, unsigned int version)
{
return _Py_uop_sym_get_type_version(sym) == version;
}
int int
_Py_uop_sym_truthiness(_Py_UopsSymbol *sym) _Py_uop_sym_truthiness(_Py_UopsSymbol *sym)
{ {
@ -311,9 +338,9 @@ _Py_UOpsAbstractFrame *
_Py_uop_frame_new( _Py_uop_frame_new(
_Py_UOpsContext *ctx, _Py_UOpsContext *ctx,
PyCodeObject *co, PyCodeObject *co,
_Py_UopsSymbol **localsplus_start, int curr_stackentries,
int n_locals_already_filled, _Py_UopsSymbol **args,
int curr_stackentries) int arg_len)
{ {
assert(ctx->curr_frame_depth < MAX_ABSTRACT_FRAME_DEPTH); assert(ctx->curr_frame_depth < MAX_ABSTRACT_FRAME_DEPTH);
_Py_UOpsAbstractFrame *frame = &ctx->frames[ctx->curr_frame_depth]; _Py_UOpsAbstractFrame *frame = &ctx->frames[ctx->curr_frame_depth];
@ -321,19 +348,22 @@ _Py_uop_frame_new(
frame->stack_len = co->co_stacksize; frame->stack_len = co->co_stacksize;
frame->locals_len = co->co_nlocalsplus; frame->locals_len = co->co_nlocalsplus;
frame->locals = localsplus_start; frame->locals = ctx->n_consumed;
frame->stack = frame->locals + co->co_nlocalsplus; frame->stack = frame->locals + co->co_nlocalsplus;
frame->stack_pointer = frame->stack + curr_stackentries; frame->stack_pointer = frame->stack + curr_stackentries;
ctx->n_consumed = localsplus_start + (co->co_nlocalsplus + co->co_stacksize); ctx->n_consumed = ctx->n_consumed + (co->co_nlocalsplus + co->co_stacksize);
if (ctx->n_consumed >= ctx->limit) { if (ctx->n_consumed >= ctx->limit) {
ctx->done = true; ctx->done = true;
ctx->out_of_space = true; ctx->out_of_space = true;
return NULL; return NULL;
} }
// Initialize with the initial state of all local variables // Initialize with the initial state of all local variables
for (int i = n_locals_already_filled; i < co->co_nlocalsplus; i++) { for (int i = 0; i < arg_len; i++) {
frame->locals[i] = args[i];
}
for (int i = arg_len; i < co->co_nlocalsplus; i++) {
_Py_UopsSymbol *local = _Py_uop_sym_new_unknown(ctx); _Py_UopsSymbol *local = _Py_uop_sym_new_unknown(ctx);
frame->locals[i] = local; frame->locals[i] = local;
} }