bpo-45829: Specialize BINARY_SUBSCR for __getitem__ implemented in Python. (GH-29592)

This commit is contained in:
Mark Shannon 2021-11-18 11:02:14 +00:00 committed by GitHub
parent 5275e59c0c
commit 21fa7a3e8f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 145 additions and 89 deletions

View File

@ -17,6 +17,7 @@ typedef struct {
uint8_t original_oparg;
uint8_t counter;
uint16_t index;
uint32_t version;
} _PyAdaptiveEntry;
@ -266,7 +267,7 @@ int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name
int _Py_Specialize_StoreAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache);
int _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache);
int _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache);
int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr);
int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache);
int _Py_Specialize_CallFunction(PyObject *callable, _Py_CODEUNIT *instr, int nargs, SpecializedCacheEntry *cache, PyObject *builtins);
void _Py_Specialize_BinaryOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr,
SpecializedCacheEntry *cache);

65
Include/opcode.h generated
View File

@ -121,38 +121,39 @@ extern "C" {
#define BINARY_OP_SUBTRACT_INT 19
#define BINARY_OP_SUBTRACT_FLOAT 20
#define BINARY_SUBSCR_ADAPTIVE 21
#define BINARY_SUBSCR_LIST_INT 22
#define BINARY_SUBSCR_TUPLE_INT 23
#define BINARY_SUBSCR_DICT 24
#define CALL_FUNCTION_ADAPTIVE 26
#define CALL_FUNCTION_BUILTIN_O 27
#define CALL_FUNCTION_BUILTIN_FAST 28
#define CALL_FUNCTION_LEN 29
#define CALL_FUNCTION_ISINSTANCE 34
#define CALL_FUNCTION_PY_SIMPLE 36
#define JUMP_ABSOLUTE_QUICK 38
#define LOAD_ATTR_ADAPTIVE 39
#define LOAD_ATTR_INSTANCE_VALUE 40
#define LOAD_ATTR_WITH_HINT 41
#define LOAD_ATTR_SLOT 42
#define LOAD_ATTR_MODULE 43
#define LOAD_GLOBAL_ADAPTIVE 44
#define LOAD_GLOBAL_MODULE 45
#define LOAD_GLOBAL_BUILTIN 46
#define LOAD_METHOD_ADAPTIVE 47
#define LOAD_METHOD_CACHED 48
#define LOAD_METHOD_CLASS 55
#define LOAD_METHOD_MODULE 56
#define LOAD_METHOD_NO_DICT 57
#define STORE_ATTR_ADAPTIVE 58
#define STORE_ATTR_INSTANCE_VALUE 59
#define STORE_ATTR_SLOT 62
#define STORE_ATTR_WITH_HINT 63
#define LOAD_FAST__LOAD_FAST 64
#define STORE_FAST__LOAD_FAST 65
#define LOAD_FAST__LOAD_CONST 66
#define LOAD_CONST__LOAD_FAST 67
#define STORE_FAST__STORE_FAST 75
#define BINARY_SUBSCR_GETITEM 22
#define BINARY_SUBSCR_LIST_INT 23
#define BINARY_SUBSCR_TUPLE_INT 24
#define BINARY_SUBSCR_DICT 26
#define CALL_FUNCTION_ADAPTIVE 27
#define CALL_FUNCTION_BUILTIN_O 28
#define CALL_FUNCTION_BUILTIN_FAST 29
#define CALL_FUNCTION_LEN 34
#define CALL_FUNCTION_ISINSTANCE 36
#define CALL_FUNCTION_PY_SIMPLE 38
#define JUMP_ABSOLUTE_QUICK 39
#define LOAD_ATTR_ADAPTIVE 40
#define LOAD_ATTR_INSTANCE_VALUE 41
#define LOAD_ATTR_WITH_HINT 42
#define LOAD_ATTR_SLOT 43
#define LOAD_ATTR_MODULE 44
#define LOAD_GLOBAL_ADAPTIVE 45
#define LOAD_GLOBAL_MODULE 46
#define LOAD_GLOBAL_BUILTIN 47
#define LOAD_METHOD_ADAPTIVE 48
#define LOAD_METHOD_CACHED 55
#define LOAD_METHOD_CLASS 56
#define LOAD_METHOD_MODULE 57
#define LOAD_METHOD_NO_DICT 58
#define STORE_ATTR_ADAPTIVE 59
#define STORE_ATTR_INSTANCE_VALUE 62
#define STORE_ATTR_SLOT 63
#define STORE_ATTR_WITH_HINT 64
#define LOAD_FAST__LOAD_FAST 65
#define STORE_FAST__LOAD_FAST 66
#define LOAD_FAST__LOAD_CONST 67
#define LOAD_CONST__LOAD_FAST 75
#define STORE_FAST__STORE_FAST 76
#define DO_TRACING 255
#ifdef NEED_OPCODE_JUMP_TABLES
static uint32_t _PyOpcode_RelativeJump[8] = {

View File

@ -234,6 +234,7 @@ _specialized_instructions = [
"BINARY_OP_SUBTRACT_INT",
"BINARY_OP_SUBTRACT_FLOAT",
"BINARY_SUBSCR_ADAPTIVE",
"BINARY_SUBSCR_GETITEM",
"BINARY_SUBSCR_LIST_INT",
"BINARY_SUBSCR_TUPLE_INT",
"BINARY_SUBSCR_DICT",

View File

@ -0,0 +1,2 @@
Specialize :opcode:`BINARY_SUBSCR` for classes with a ``__getitem__`` method
implemented in Python

View File

@ -2140,21 +2140,21 @@ check_eval_breaker:
}
TARGET(BINARY_SUBSCR_ADAPTIVE) {
if (oparg == 0) {
SpecializedCacheEntry *cache = GET_CACHE();
if (cache->adaptive.counter == 0) {
PyObject *sub = TOP();
PyObject *container = SECOND();
next_instr--;
if (_Py_Specialize_BinarySubscr(container, sub, next_instr) < 0) {
if (_Py_Specialize_BinarySubscr(container, sub, next_instr, cache) < 0) {
goto error;
}
DISPATCH();
}
else {
STAT_INC(BINARY_SUBSCR, deferred);
// oparg is the adaptive cache counter
UPDATE_PREV_INSTR_OPARG(next_instr, oparg - 1);
assert(_Py_OPCODE(next_instr[-1]) == BINARY_SUBSCR_ADAPTIVE);
assert(_Py_OPARG(next_instr[-1]) == oparg - 1);
cache->adaptive.counter--;
assert(cache->adaptive.original_oparg == 0);
/* No need to set oparg here; it isn't used by BINARY_SUBSCR */
STAT_DEC(BINARY_SUBSCR, unquickened);
JUMP_TO_INSTRUCTION(BINARY_SUBSCR);
}
@ -2223,6 +2223,37 @@ check_eval_breaker:
DISPATCH();
}
TARGET(BINARY_SUBSCR_GETITEM) {
PyObject *sub = TOP();
PyObject *container = SECOND();
SpecializedCacheEntry *caches = GET_CACHE();
_PyAdaptiveEntry *cache0 = &caches[0].adaptive;
_PyObjectCache *cache1 = &caches[-1].obj;
PyFunctionObject *getitem = (PyFunctionObject *)cache1->obj;
DEOPT_IF(Py_TYPE(container)->tp_version_tag != cache0->version, BINARY_SUBSCR);
DEOPT_IF(getitem->func_version != cache0->index, BINARY_SUBSCR);
PyCodeObject *code = (PyCodeObject *)getitem->func_code;
size_t size = code->co_nlocalsplus + code->co_stacksize + FRAME_SPECIALS_SIZE;
assert(code->co_argcount == 2);
InterpreterFrame *new_frame = _PyThreadState_BumpFramePointer(tstate, size);
if (new_frame == NULL) {
goto error;
}
_PyFrame_InitializeSpecials(new_frame, PyFunction_AS_FRAME_CONSTRUCTOR(getitem),
NULL, code->co_nlocalsplus);
STACK_SHRINK(2);
new_frame->localsplus[0] = container;
new_frame->localsplus[1] = sub;
for (int i = 2; i < code->co_nlocalsplus; i++) {
new_frame->localsplus[i] = NULL;
}
_PyFrame_SetStackPointer(frame, stack_pointer);
new_frame->previous = frame;
frame = cframe.current_frame = new_frame;
new_frame->depth = frame->depth + 1;
goto start_frame;
}
TARGET(LIST_APPEND) {
PyObject *v = POP();
PyObject *list = PEEK(oparg);
@ -4878,29 +4909,13 @@ opname ## _miss: \
JUMP_TO_INSTRUCTION(opname); \
}
#define MISS_WITH_OPARG_COUNTER(opname) \
opname ## _miss: \
{ \
STAT_INC(opname, miss); \
uint8_t oparg = _Py_OPARG(next_instr[-1])-1; \
UPDATE_PREV_INSTR_OPARG(next_instr, oparg); \
assert(_Py_OPARG(next_instr[-1]) == oparg); \
if (oparg == 0) /* too many cache misses */ { \
oparg = ADAPTIVE_CACHE_BACKOFF; \
next_instr[-1] = _Py_MAKECODEUNIT(opname ## _ADAPTIVE, oparg); \
STAT_INC(opname, deopt); \
} \
STAT_DEC(opname, unquickened); \
JUMP_TO_INSTRUCTION(opname); \
}
MISS_WITH_CACHE(LOAD_ATTR)
MISS_WITH_CACHE(STORE_ATTR)
MISS_WITH_CACHE(LOAD_GLOBAL)
MISS_WITH_CACHE(LOAD_METHOD)
MISS_WITH_CACHE(CALL_FUNCTION)
MISS_WITH_CACHE(BINARY_OP)
MISS_WITH_OPARG_COUNTER(BINARY_SUBSCR)
MISS_WITH_CACHE(BINARY_SUBSCR)
binary_subscr_dict_error:
{

View File

@ -21,22 +21,23 @@ static void *opcode_targets[256] = {
&&TARGET_BINARY_OP_SUBTRACT_INT,
&&TARGET_BINARY_OP_SUBTRACT_FLOAT,
&&TARGET_BINARY_SUBSCR_ADAPTIVE,
&&TARGET_BINARY_SUBSCR_GETITEM,
&&TARGET_BINARY_SUBSCR_LIST_INT,
&&TARGET_BINARY_SUBSCR_TUPLE_INT,
&&TARGET_BINARY_SUBSCR_DICT,
&&TARGET_BINARY_SUBSCR,
&&TARGET_BINARY_SUBSCR_DICT,
&&TARGET_CALL_FUNCTION_ADAPTIVE,
&&TARGET_CALL_FUNCTION_BUILTIN_O,
&&TARGET_CALL_FUNCTION_BUILTIN_FAST,
&&TARGET_CALL_FUNCTION_LEN,
&&TARGET_GET_LEN,
&&TARGET_MATCH_MAPPING,
&&TARGET_MATCH_SEQUENCE,
&&TARGET_MATCH_KEYS,
&&TARGET_CALL_FUNCTION_ISINSTANCE,
&&TARGET_CALL_FUNCTION_LEN,
&&TARGET_PUSH_EXC_INFO,
&&TARGET_CALL_FUNCTION_PY_SIMPLE,
&&TARGET_CALL_FUNCTION_ISINSTANCE,
&&TARGET_POP_EXCEPT_AND_RERAISE,
&&TARGET_CALL_FUNCTION_PY_SIMPLE,
&&TARGET_JUMP_ABSOLUTE_QUICK,
&&TARGET_LOAD_ATTR_ADAPTIVE,
&&TARGET_LOAD_ATTR_INSTANCE_VALUE,
@ -47,26 +48,25 @@ static void *opcode_targets[256] = {
&&TARGET_LOAD_GLOBAL_MODULE,
&&TARGET_LOAD_GLOBAL_BUILTIN,
&&TARGET_LOAD_METHOD_ADAPTIVE,
&&TARGET_LOAD_METHOD_CACHED,
&&TARGET_WITH_EXCEPT_START,
&&TARGET_GET_AITER,
&&TARGET_GET_ANEXT,
&&TARGET_BEFORE_ASYNC_WITH,
&&TARGET_BEFORE_WITH,
&&TARGET_END_ASYNC_FOR,
&&TARGET_LOAD_METHOD_CACHED,
&&TARGET_LOAD_METHOD_CLASS,
&&TARGET_LOAD_METHOD_MODULE,
&&TARGET_LOAD_METHOD_NO_DICT,
&&TARGET_STORE_ATTR_ADAPTIVE,
&&TARGET_STORE_ATTR_INSTANCE_VALUE,
&&TARGET_STORE_SUBSCR,
&&TARGET_DELETE_SUBSCR,
&&TARGET_STORE_ATTR_INSTANCE_VALUE,
&&TARGET_STORE_ATTR_SLOT,
&&TARGET_STORE_ATTR_WITH_HINT,
&&TARGET_LOAD_FAST__LOAD_FAST,
&&TARGET_STORE_FAST__LOAD_FAST,
&&TARGET_LOAD_FAST__LOAD_CONST,
&&TARGET_LOAD_CONST__LOAD_FAST,
&&TARGET_GET_ITER,
&&TARGET_GET_YIELD_FROM_ITER,
&&TARGET_PRINT_EXPR,
@ -74,13 +74,13 @@ static void *opcode_targets[256] = {
&&TARGET_YIELD_FROM,
&&TARGET_GET_AWAITABLE,
&&TARGET_LOAD_ASSERTION_ERROR,
&&TARGET_LOAD_CONST__LOAD_FAST,
&&TARGET_STORE_FAST__STORE_FAST,
&&_unknown_opcode,
&&_unknown_opcode,
&&_unknown_opcode,
&&_unknown_opcode,
&&_unknown_opcode,
&&_unknown_opcode,
&&TARGET_LIST_TO_TUPLE,
&&TARGET_RETURN_VALUE,
&&TARGET_IMPORT_STAR,

View File

@ -243,7 +243,7 @@ static uint8_t cache_requirements[256] = {
[LOAD_ATTR] = 2, /* _PyAdaptiveEntry and _PyAttrCache */
[LOAD_GLOBAL] = 2, /* _PyAdaptiveEntry and _PyLoadGlobalCache */
[LOAD_METHOD] = 3, /* _PyAdaptiveEntry, _PyAttrCache and _PyObjectCache */
[BINARY_SUBSCR] = 0,
[BINARY_SUBSCR] = 2, /* _PyAdaptiveEntry, _PyObjectCache */
[CALL_FUNCTION] = 2, /* _PyAdaptiveEntry and _PyObjectCache/_PyCallCache */
[STORE_ATTR] = 2, /* _PyAdaptiveEntry and _PyAttrCache */
[BINARY_OP] = 1, // _PyAdaptiveEntry
@ -1100,7 +1100,7 @@ success:
#if COLLECT_SPECIALIZATION_STATS_DETAILED
static int
binary_subscr_faiL_kind(PyTypeObject *container_type, PyObject *sub)
binary_subscr_fail_kind(PyTypeObject *container_type, PyObject *sub)
{
if (container_type == &PyUnicode_Type) {
if (PyLong_CheckExact(sub)) {
@ -1138,14 +1138,37 @@ binary_subscr_faiL_kind(PyTypeObject *container_type, PyObject *sub)
}
#endif
_Py_IDENTIFIER(__getitem__);
#define SIMPLE_FUNCTION 0
static int
function_kind(PyCodeObject *code) {
int flags = code->co_flags;
if (flags & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR)) {
return SPEC_FAIL_GENERATOR;
}
if ((flags & (CO_VARKEYWORDS | CO_VARARGS)) || code->co_kwonlyargcount) {
return SPEC_FAIL_COMPLEX_PARAMETERS;
}
if ((flags & CO_OPTIMIZED) == 0) {
return SPEC_FAIL_CO_NOT_OPTIMIZED;
}
if (code->co_nfreevars) {
return SPEC_FAIL_FREE_VARS;
}
return SIMPLE_FUNCTION;
}
int
_Py_Specialize_BinarySubscr(
PyObject *container, PyObject *sub, _Py_CODEUNIT *instr)
PyObject *container, PyObject *sub, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache)
{
_PyAdaptiveEntry *cache0 = &cache->adaptive;
PyTypeObject *container_type = Py_TYPE(container);
if (container_type == &PyList_Type) {
if (PyLong_CheckExact(sub)) {
*instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_LIST_INT, initial_counter_value());
*instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_LIST_INT, _Py_OPARG(*instr));
goto success;
}
SPECIALIZATION_FAIL(BINARY_SUBSCR,
@ -1154,7 +1177,7 @@ _Py_Specialize_BinarySubscr(
}
if (container_type == &PyTuple_Type) {
if (PyLong_CheckExact(sub)) {
*instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_TUPLE_INT, initial_counter_value());
*instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_TUPLE_INT, _Py_OPARG(*instr));
goto success;
}
SPECIALIZATION_FAIL(BINARY_SUBSCR,
@ -1162,20 +1185,46 @@ _Py_Specialize_BinarySubscr(
goto fail;
}
if (container_type == &PyDict_Type) {
*instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_DICT, initial_counter_value());
*instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_DICT, _Py_OPARG(*instr));
goto success;
}
PyTypeObject *cls = Py_TYPE(container);
PyObject *descriptor = _PyType_LookupId(cls, &PyId___getitem__);
if (descriptor && Py_TYPE(descriptor) == &PyFunction_Type) {
PyFunctionObject *func = (PyFunctionObject *)descriptor;
PyCodeObject *code = (PyCodeObject *)func->func_code;
int kind = function_kind(code);
if (kind != SIMPLE_FUNCTION) {
SPECIALIZATION_FAIL(BINARY_SUBSCR, kind);
goto fail;
}
if (code->co_argcount != 2) {
SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_WRONG_NUMBER_ARGUMENTS);
goto fail;
}
assert(cls->tp_version_tag != 0);
cache0->version = cls->tp_version_tag;
int version = _PyFunction_GetVersionForCurrentState(func);
if (version == 0) {
SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_OUT_OF_VERSIONS);
goto fail;
}
cache0->index = version;
cache[-1].obj.obj = descriptor;
*instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_GETITEM, _Py_OPARG(*instr));
goto success;
}
SPECIALIZATION_FAIL(BINARY_SUBSCR,
binary_subscr_faiL_kind(container_type, sub));
goto fail;
binary_subscr_fail_kind(container_type, sub));
fail:
STAT_INC(BINARY_SUBSCR, specialization_failure);
assert(!PyErr_Occurred());
*instr = _Py_MAKECODEUNIT(_Py_OPCODE(*instr), ADAPTIVE_CACHE_BACKOFF);
cache_backoff(cache0);
return 0;
success:
STAT_INC(BINARY_SUBSCR, specialization_success);
assert(!PyErr_Occurred());
cache0->counter = initial_counter_value();
return 0;
}
@ -1194,23 +1243,10 @@ specialize_py_call(
int nargs, SpecializedCacheEntry *cache)
{
_PyCallCache *cache1 = &cache[-1].call;
/* Exclude generator or coroutines for now */
PyCodeObject *code = (PyCodeObject *)func->func_code;
int flags = code->co_flags;
if (flags & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR)) {
SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_GENERATOR);
return -1;
}
if ((flags & (CO_VARKEYWORDS | CO_VARARGS)) || code->co_kwonlyargcount) {
SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_COMPLEX_PARAMETERS);
return -1;
}
if ((flags & CO_OPTIMIZED) == 0) {
SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_CO_NOT_OPTIMIZED);
return -1;
}
if (code->co_nfreevars) {
SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_FREE_VARS);
int kind = function_kind(code);
if (kind != SIMPLE_FUNCTION) {
SPECIALIZATION_FAIL(CALL_FUNCTION, kind);
return -1;
}
int argcount = code->co_argcount;