mirror of https://github.com/python/cpython
bpo-46841: Use inline cache for `BINARY_SUBSCR`. (GH-31618)
This commit is contained in:
parent
e91b0a7139
commit
3b0f1c5a71
|
@ -229,6 +229,13 @@ struct _typeobject {
|
|||
vectorcallfunc tp_vectorcall;
|
||||
};
|
||||
|
||||
/* This struct is used by the specializer
|
||||
* It should should be treated as an opaque blob
|
||||
* by code other than the specializer and interpreter. */
|
||||
struct _specialization_cache {
|
||||
PyObject *getitem;
|
||||
};
|
||||
|
||||
/* The *real* layout of a type object when allocated on the heap */
|
||||
typedef struct _heaptypeobject {
|
||||
/* Note: there's a dependency on the order of these members
|
||||
|
@ -247,6 +254,7 @@ typedef struct _heaptypeobject {
|
|||
struct _dictkeysobject *ht_cached_keys;
|
||||
PyObject *ht_module;
|
||||
char *_ht_tpname; // Storage for "tp_name"; see PyType_FromModuleAndSpec
|
||||
struct _specialization_cache _spec_cache; // For use by the specializer.
|
||||
/* here are optional user slots, followed by the members. */
|
||||
} PyHeapTypeObject;
|
||||
|
||||
|
|
|
@ -92,6 +92,15 @@ typedef struct {
|
|||
|
||||
#define INLINE_CACHE_ENTRIES_COMPARE_OP CACHE_ENTRIES(_PyCompareOpCache)
|
||||
|
||||
typedef struct {
|
||||
_Py_CODEUNIT counter;
|
||||
_Py_CODEUNIT type_version;
|
||||
_Py_CODEUNIT _t1;
|
||||
_Py_CODEUNIT func_version;
|
||||
} _PyBinarySubscrCache;
|
||||
|
||||
#define INLINE_CACHE_ENTRIES_BINARY_SUBSCR CACHE_ENTRIES(_PyBinarySubscrCache)
|
||||
|
||||
/* Maximum size of code to quicken, in code units. */
|
||||
#define MAX_SIZE_TO_QUICKEN 5000
|
||||
|
||||
|
@ -323,7 +332,7 @@ extern int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObjec
|
|||
extern int _Py_Specialize_StoreAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache);
|
||||
extern int _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins, _Py_CODEUNIT *instr, PyObject *name);
|
||||
extern int _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache);
|
||||
extern int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache);
|
||||
extern int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr);
|
||||
extern int _Py_Specialize_StoreSubscr(PyObject *container, PyObject *sub, _Py_CODEUNIT *instr);
|
||||
extern int _Py_Specialize_Call(PyObject *callable, _Py_CODEUNIT *instr, int nargs,
|
||||
PyObject *kwnames, SpecializedCacheEntry *cache);
|
||||
|
|
|
@ -211,6 +211,7 @@ static const uint32_t _PyOpcode_Jump[8] = {
|
|||
};
|
||||
|
||||
const uint8_t _PyOpcode_InlineCacheEntries[256] = {
|
||||
[BINARY_SUBSCR] = 4,
|
||||
[UNPACK_SEQUENCE] = 1,
|
||||
[COMPARE_OP] = 2,
|
||||
[LOAD_GLOBAL] = 5,
|
||||
|
|
|
@ -389,7 +389,7 @@ _code_type = type(_write_atomic.__code__)
|
|||
# Python 3.11a5 3480 (New CALL opcodes, second iteration)
|
||||
# Python 3.11a5 3481 (Use inline cache for BINARY_OP)
|
||||
# Python 3.11a5 3482 (Use inline caching for UNPACK_SEQUENCE and LOAD_GLOBAL)
|
||||
# Python 3.11a5 3483 (Use inline caching for COMPARE_OP)
|
||||
# Python 3.11a5 3483 (Use inline caching for COMPARE_OP and BINARY_SUBSCR)
|
||||
|
||||
# Python 3.12 will start with magic number 3500
|
||||
|
||||
|
|
|
@ -68,7 +68,7 @@ def_op('UNARY_NOT', 12)
|
|||
|
||||
def_op('UNARY_INVERT', 15)
|
||||
|
||||
def_op('BINARY_SUBSCR', 25)
|
||||
def_op('BINARY_SUBSCR', 25, 4)
|
||||
|
||||
def_op('GET_LEN', 30)
|
||||
def_op('MATCH_MAPPING', 31)
|
||||
|
|
|
@ -335,7 +335,7 @@ class CAPITest(unittest.TestCase):
|
|||
*_, count = line.split(b' ')
|
||||
count = int(count)
|
||||
self.assertLessEqual(count, i*5)
|
||||
self.assertGreaterEqual(count, i*5-1)
|
||||
self.assertGreaterEqual(count, i*5-2)
|
||||
|
||||
def test_mapping_keys_values_items(self):
|
||||
class Mapping1(dict):
|
||||
|
|
|
@ -1501,7 +1501,9 @@ class SizeofTest(unittest.TestCase):
|
|||
'3P' # PyMappingMethods
|
||||
'10P' # PySequenceMethods
|
||||
'2P' # PyBufferProcs
|
||||
'6P')
|
||||
'6P'
|
||||
'1P' # Specializer cache
|
||||
)
|
||||
class newstyleclass(object): pass
|
||||
# Separate block for PyDictKeysObject with 8 keys and 5 entries
|
||||
check(newstyleclass, s + calcsize(DICT_KEY_STRUCT_FORMAT) + 64 + 42*calcsize("n2P"))
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Use inline cache for :opcode:`BINARY_SUBSCR`.
|
|
@ -1,13 +1,14 @@
|
|||
// Auto-generated by Programs/freeze_test_frozenmain.py
|
||||
unsigned char M_test_frozenmain[] = {
|
||||
227,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,
|
||||
0,0,0,0,0,115,104,0,0,0,151,0,100,0,100,1,
|
||||
0,0,0,0,0,115,120,0,0,0,151,0,100,0,100,1,
|
||||
108,0,90,0,100,0,100,1,108,1,90,1,2,0,101,2,
|
||||
100,2,166,1,171,1,1,0,2,0,101,2,100,3,101,0,
|
||||
106,3,166,2,171,2,1,0,2,0,101,1,106,4,166,0,
|
||||
171,0,100,4,25,0,90,5,100,5,68,0,93,16,90,6,
|
||||
2,0,101,2,100,6,101,6,155,0,100,7,101,5,101,6,
|
||||
25,0,155,0,157,4,166,1,171,1,1,0,113,33,100,1,
|
||||
171,0,100,4,25,0,3,0,3,0,3,0,3,0,90,5,
|
||||
100,5,68,0,93,20,90,6,2,0,101,2,100,6,101,6,
|
||||
155,0,100,7,101,5,101,6,25,0,3,0,3,0,3,0,
|
||||
3,0,155,0,157,4,166,1,171,1,1,0,113,37,100,1,
|
||||
83,0,41,8,233,0,0,0,0,78,122,18,70,114,111,122,
|
||||
101,110,32,72,101,108,108,111,32,87,111,114,108,100,122,8,
|
||||
115,121,115,46,97,114,103,118,218,6,99,111,110,102,105,103,
|
||||
|
@ -24,14 +25,15 @@ unsigned char M_test_frozenmain[] = {
|
|||
0,0,250,18,116,101,115,116,95,102,114,111,122,101,110,109,
|
||||
97,105,110,46,112,121,250,8,60,109,111,100,117,108,101,62,
|
||||
114,11,0,0,0,1,0,0,0,115,18,0,0,0,2,128,
|
||||
8,3,8,1,12,2,16,1,16,1,8,1,30,7,4,249,
|
||||
115,20,0,0,0,2,128,8,3,8,1,12,2,16,1,16,
|
||||
1,2,7,4,1,2,249,34,7,115,104,0,0,0,0,0,
|
||||
8,3,8,1,12,2,16,1,24,1,8,1,38,7,4,249,
|
||||
115,20,0,0,0,2,128,8,3,8,1,12,2,16,1,24,
|
||||
1,2,7,4,1,2,249,42,7,115,120,0,0,0,0,0,
|
||||
1,11,1,11,1,11,1,11,1,25,1,25,1,25,1,25,
|
||||
1,6,1,6,7,27,1,28,1,28,1,28,1,6,1,6,
|
||||
7,17,19,22,19,27,1,28,1,28,1,28,10,39,10,27,
|
||||
10,39,10,41,10,41,42,50,10,51,1,7,12,2,1,42,
|
||||
1,42,5,8,5,10,5,10,11,41,21,24,11,41,11,41,
|
||||
28,34,35,38,28,39,11,41,11,41,5,42,5,42,5,42,
|
||||
10,39,10,41,10,41,42,50,10,51,10,51,10,51,10,51,
|
||||
10,51,1,7,12,2,1,42,1,42,5,8,5,10,5,10,
|
||||
11,41,21,24,11,41,11,41,28,34,35,38,28,39,28,39,
|
||||
28,39,28,39,28,39,11,41,11,41,5,42,5,42,5,42,
|
||||
5,42,1,42,1,42,114,9,0,0,0,
|
||||
};
|
||||
|
|
|
@ -2102,25 +2102,24 @@ handle_eval_breaker:
|
|||
SET_TOP(res);
|
||||
if (res == NULL)
|
||||
goto error;
|
||||
JUMPBY(INLINE_CACHE_ENTRIES_BINARY_SUBSCR);
|
||||
DISPATCH();
|
||||
}
|
||||
|
||||
TARGET(BINARY_SUBSCR_ADAPTIVE) {
|
||||
SpecializedCacheEntry *cache = GET_CACHE();
|
||||
if (cache->adaptive.counter == 0) {
|
||||
_PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)next_instr;
|
||||
if (cache->counter == 0) {
|
||||
PyObject *sub = TOP();
|
||||
PyObject *container = SECOND();
|
||||
next_instr--;
|
||||
if (_Py_Specialize_BinarySubscr(container, sub, next_instr, cache) < 0) {
|
||||
if (_Py_Specialize_BinarySubscr(container, sub, next_instr) < 0) {
|
||||
goto error;
|
||||
}
|
||||
DISPATCH();
|
||||
}
|
||||
else {
|
||||
STAT_INC(BINARY_SUBSCR, deferred);
|
||||
cache->adaptive.counter--;
|
||||
assert(cache->adaptive.original_oparg == 0);
|
||||
/* No need to set oparg here; it isn't used by BINARY_SUBSCR */
|
||||
cache->counter--;
|
||||
JUMP_TO_INSTRUCTION(BINARY_SUBSCR);
|
||||
}
|
||||
}
|
||||
|
@ -2146,6 +2145,7 @@ handle_eval_breaker:
|
|||
Py_DECREF(sub);
|
||||
SET_TOP(res);
|
||||
Py_DECREF(list);
|
||||
JUMPBY(INLINE_CACHE_ENTRIES_BINARY_SUBSCR);
|
||||
NOTRACE_DISPATCH();
|
||||
}
|
||||
|
||||
|
@ -2170,6 +2170,7 @@ handle_eval_breaker:
|
|||
Py_DECREF(sub);
|
||||
SET_TOP(res);
|
||||
Py_DECREF(tuple);
|
||||
JUMPBY(INLINE_CACHE_ENTRIES_BINARY_SUBSCR);
|
||||
NOTRACE_DISPATCH();
|
||||
}
|
||||
|
||||
|
@ -2188,18 +2189,22 @@ handle_eval_breaker:
|
|||
Py_DECREF(sub);
|
||||
SET_TOP(res);
|
||||
Py_DECREF(dict);
|
||||
JUMPBY(INLINE_CACHE_ENTRIES_BINARY_SUBSCR);
|
||||
DISPATCH();
|
||||
}
|
||||
|
||||
TARGET(BINARY_SUBSCR_GETITEM) {
|
||||
PyObject *sub = TOP();
|
||||
PyObject *container = SECOND();
|
||||
SpecializedCacheEntry *caches = GET_CACHE();
|
||||
_PyAdaptiveEntry *cache0 = &caches[0].adaptive;
|
||||
_PyObjectCache *cache1 = &caches[-1].obj;
|
||||
PyFunctionObject *getitem = (PyFunctionObject *)cache1->obj;
|
||||
DEOPT_IF(Py_TYPE(container)->tp_version_tag != cache0->version, BINARY_SUBSCR);
|
||||
DEOPT_IF(getitem->func_version != cache0->index, BINARY_SUBSCR);
|
||||
_PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)next_instr;
|
||||
uint32_t type_version = read32(&cache->type_version);
|
||||
PyTypeObject *tp = Py_TYPE(container);
|
||||
DEOPT_IF(tp->tp_version_tag != type_version, BINARY_SUBSCR);
|
||||
assert(tp->tp_flags & Py_TPFLAGS_HEAPTYPE);
|
||||
PyObject *cached = ((PyHeapTypeObject *)tp)->_spec_cache.getitem;
|
||||
assert(PyFunction_Check(cached));
|
||||
PyFunctionObject *getitem = (PyFunctionObject *)cached;
|
||||
DEOPT_IF(getitem->func_version != cache->func_version, BINARY_SUBSCR);
|
||||
PyCodeObject *code = (PyCodeObject *)getitem->func_code;
|
||||
size_t size = code->co_nlocalsplus + code->co_stacksize + FRAME_SPECIALS_SIZE;
|
||||
assert(code->co_argcount == 2);
|
||||
|
@ -2218,6 +2223,7 @@ handle_eval_breaker:
|
|||
new_frame->localsplus[i] = NULL;
|
||||
}
|
||||
_PyFrame_SetStackPointer(frame, stack_pointer);
|
||||
frame->f_lasti += INLINE_CACHE_ENTRIES_BINARY_SUBSCR;
|
||||
new_frame->previous = frame;
|
||||
frame = cframe.current_frame = new_frame;
|
||||
CALL_STAT_INC(inlined_py_calls);
|
||||
|
@ -5605,7 +5611,7 @@ MISS_WITH_CACHE(PRECALL)
|
|||
MISS_WITH_CACHE(CALL)
|
||||
MISS_WITH_INLINE_CACHE(BINARY_OP)
|
||||
MISS_WITH_INLINE_CACHE(COMPARE_OP)
|
||||
MISS_WITH_CACHE(BINARY_SUBSCR)
|
||||
MISS_WITH_INLINE_CACHE(BINARY_SUBSCR)
|
||||
MISS_WITH_INLINE_CACHE(UNPACK_SEQUENCE)
|
||||
MISS_WITH_OPARG_COUNTER(STORE_SUBSCR)
|
||||
|
||||
|
|
|
@ -60,7 +60,6 @@ static uint8_t adaptive_opcodes[256] = {
|
|||
static uint8_t cache_requirements[256] = {
|
||||
[LOAD_ATTR] = 1, // _PyAdaptiveEntry
|
||||
[LOAD_METHOD] = 3, /* _PyAdaptiveEntry, _PyAttrCache and _PyObjectCache */
|
||||
[BINARY_SUBSCR] = 2, /* _PyAdaptiveEntry, _PyObjectCache */
|
||||
[STORE_SUBSCR] = 0,
|
||||
[CALL] = 2, /* _PyAdaptiveEntry and _PyObjectCache/_PyCallCache */
|
||||
[PRECALL] = 2, /* _PyAdaptiveEntry and _PyObjectCache/_PyCallCache */
|
||||
|
@ -385,6 +384,8 @@ optimize(SpecializedCacheOrInstruction *quickened, int len)
|
|||
if (adaptive_opcode) {
|
||||
if (_PyOpcode_InlineCacheEntries[opcode]) {
|
||||
instructions[i] = _Py_MAKECODEUNIT(adaptive_opcode, oparg);
|
||||
previous_opcode = -1;
|
||||
i += _PyOpcode_InlineCacheEntries[opcode];
|
||||
}
|
||||
else if (previous_opcode != EXTENDED_ARG) {
|
||||
int new_oparg = oparg_from_instruction_and_update_offset(
|
||||
|
@ -553,6 +554,7 @@ initial_counter_value(void) {
|
|||
#define SPEC_FAIL_SUBSCR_PY_SIMPLE 20
|
||||
#define SPEC_FAIL_SUBSCR_PY_OTHER 21
|
||||
#define SPEC_FAIL_SUBSCR_DICT_SUBCLASS_NO_OVERRIDE 22
|
||||
#define SPEC_FAIL_SUBSCR_NOT_HEAP_TYPE 23
|
||||
|
||||
/* Binary op */
|
||||
|
||||
|
@ -1335,9 +1337,11 @@ function_kind(PyCodeObject *code) {
|
|||
|
||||
int
|
||||
_Py_Specialize_BinarySubscr(
|
||||
PyObject *container, PyObject *sub, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache)
|
||||
PyObject *container, PyObject *sub, _Py_CODEUNIT *instr)
|
||||
{
|
||||
_PyAdaptiveEntry *cache0 = &cache->adaptive;
|
||||
assert(_PyOpcode_InlineCacheEntries[BINARY_SUBSCR] ==
|
||||
INLINE_CACHE_ENTRIES_BINARY_SUBSCR);
|
||||
_PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)(instr + 1);
|
||||
PyTypeObject *container_type = Py_TYPE(container);
|
||||
if (container_type == &PyList_Type) {
|
||||
if (PyLong_CheckExact(sub)) {
|
||||
|
@ -1364,26 +1368,30 @@ _Py_Specialize_BinarySubscr(
|
|||
PyTypeObject *cls = Py_TYPE(container);
|
||||
PyObject *descriptor = _PyType_Lookup(cls, &_Py_ID(__getitem__));
|
||||
if (descriptor && Py_TYPE(descriptor) == &PyFunction_Type) {
|
||||
if (!(container_type->tp_flags & Py_TPFLAGS_HEAPTYPE)) {
|
||||
SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_SUBSCR_NOT_HEAP_TYPE);
|
||||
goto fail;
|
||||
}
|
||||
PyFunctionObject *func = (PyFunctionObject *)descriptor;
|
||||
PyCodeObject *code = (PyCodeObject *)func->func_code;
|
||||
int kind = function_kind(code);
|
||||
PyCodeObject *fcode = (PyCodeObject *)func->func_code;
|
||||
int kind = function_kind(fcode);
|
||||
if (kind != SIMPLE_FUNCTION) {
|
||||
SPECIALIZATION_FAIL(BINARY_SUBSCR, kind);
|
||||
goto fail;
|
||||
}
|
||||
if (code->co_argcount != 2) {
|
||||
if (fcode->co_argcount != 2) {
|
||||
SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_WRONG_NUMBER_ARGUMENTS);
|
||||
goto fail;
|
||||
}
|
||||
assert(cls->tp_version_tag != 0);
|
||||
cache0->version = cls->tp_version_tag;
|
||||
write32(&cache->type_version, cls->tp_version_tag);
|
||||
int version = _PyFunction_GetVersionForCurrentState(func);
|
||||
if (version == 0 || version != (uint16_t)version) {
|
||||
SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_OUT_OF_VERSIONS);
|
||||
goto fail;
|
||||
}
|
||||
cache0->index = version;
|
||||
cache[-1].obj.obj = descriptor;
|
||||
cache->func_version = version;
|
||||
((PyHeapTypeObject *)container_type)->_spec_cache.getitem = descriptor;
|
||||
*instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_GETITEM, _Py_OPARG(*instr));
|
||||
goto success;
|
||||
}
|
||||
|
@ -1392,12 +1400,12 @@ _Py_Specialize_BinarySubscr(
|
|||
fail:
|
||||
STAT_INC(BINARY_SUBSCR, failure);
|
||||
assert(!PyErr_Occurred());
|
||||
cache_backoff(cache0);
|
||||
cache->counter = ADAPTIVE_CACHE_BACKOFF;
|
||||
return 0;
|
||||
success:
|
||||
STAT_INC(BINARY_SUBSCR, success);
|
||||
assert(!PyErr_Occurred());
|
||||
cache0->counter = initial_counter_value();
|
||||
cache->counter = initial_counter_value();
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue