diff --git a/Include/cpython/object.h b/Include/cpython/object.h index 5f978eec465..b018dabf9d8 100644 --- a/Include/cpython/object.h +++ b/Include/cpython/object.h @@ -229,6 +229,13 @@ struct _typeobject { vectorcallfunc tp_vectorcall; }; +/* This struct is used by the specializer + * It should should be treated as an opaque blob + * by code other than the specializer and interpreter. */ +struct _specialization_cache { + PyObject *getitem; +}; + /* The *real* layout of a type object when allocated on the heap */ typedef struct _heaptypeobject { /* Note: there's a dependency on the order of these members @@ -247,6 +254,7 @@ typedef struct _heaptypeobject { struct _dictkeysobject *ht_cached_keys; PyObject *ht_module; char *_ht_tpname; // Storage for "tp_name"; see PyType_FromModuleAndSpec + struct _specialization_cache _spec_cache; // For use by the specializer. /* here are optional user slots, followed by the members. */ } PyHeapTypeObject; diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 47c1998c88d..b9671d0ec32 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -92,6 +92,15 @@ typedef struct { #define INLINE_CACHE_ENTRIES_COMPARE_OP CACHE_ENTRIES(_PyCompareOpCache) +typedef struct { + _Py_CODEUNIT counter; + _Py_CODEUNIT type_version; + _Py_CODEUNIT _t1; + _Py_CODEUNIT func_version; +} _PyBinarySubscrCache; + +#define INLINE_CACHE_ENTRIES_BINARY_SUBSCR CACHE_ENTRIES(_PyBinarySubscrCache) + /* Maximum size of code to quicken, in code units. */ #define MAX_SIZE_TO_QUICKEN 5000 @@ -323,7 +332,7 @@ extern int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObjec extern int _Py_Specialize_StoreAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache); extern int _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins, _Py_CODEUNIT *instr, PyObject *name); extern int _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache); -extern int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache); +extern int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr); extern int _Py_Specialize_StoreSubscr(PyObject *container, PyObject *sub, _Py_CODEUNIT *instr); extern int _Py_Specialize_Call(PyObject *callable, _Py_CODEUNIT *instr, int nargs, PyObject *kwnames, SpecializedCacheEntry *cache); diff --git a/Include/opcode.h b/Include/opcode.h index ba85b7ff75f..f6330d9056a 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -211,6 +211,7 @@ static const uint32_t _PyOpcode_Jump[8] = { }; const uint8_t _PyOpcode_InlineCacheEntries[256] = { + [BINARY_SUBSCR] = 4, [UNPACK_SEQUENCE] = 1, [COMPARE_OP] = 2, [LOAD_GLOBAL] = 5, diff --git a/Lib/importlib/_bootstrap_external.py b/Lib/importlib/_bootstrap_external.py index c69c0c73e7a..dd1f6ffd64c 100644 --- a/Lib/importlib/_bootstrap_external.py +++ b/Lib/importlib/_bootstrap_external.py @@ -389,7 +389,7 @@ _code_type = type(_write_atomic.__code__) # Python 3.11a5 3480 (New CALL opcodes, second iteration) # Python 3.11a5 3481 (Use inline cache for BINARY_OP) # Python 3.11a5 3482 (Use inline caching for UNPACK_SEQUENCE and LOAD_GLOBAL) -# Python 3.11a5 3483 (Use inline caching for COMPARE_OP) +# Python 3.11a5 3483 (Use inline caching for COMPARE_OP and BINARY_SUBSCR) # Python 3.12 will start with magic number 3500 diff --git a/Lib/opcode.py b/Lib/opcode.py index dc45cff3017..9b08562cd04 100644 --- a/Lib/opcode.py +++ b/Lib/opcode.py @@ -68,7 +68,7 @@ def_op('UNARY_NOT', 12) def_op('UNARY_INVERT', 15) -def_op('BINARY_SUBSCR', 25) +def_op('BINARY_SUBSCR', 25, 4) def_op('GET_LEN', 30) def_op('MATCH_MAPPING', 31) diff --git a/Lib/test/test_capi.py b/Lib/test/test_capi.py index 8832292a999..d9615430327 100644 --- a/Lib/test/test_capi.py +++ b/Lib/test/test_capi.py @@ -335,7 +335,7 @@ class CAPITest(unittest.TestCase): *_, count = line.split(b' ') count = int(count) self.assertLessEqual(count, i*5) - self.assertGreaterEqual(count, i*5-1) + self.assertGreaterEqual(count, i*5-2) def test_mapping_keys_values_items(self): class Mapping1(dict): diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index f828d1b15d2..70768f56fa9 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -1501,7 +1501,9 @@ class SizeofTest(unittest.TestCase): '3P' # PyMappingMethods '10P' # PySequenceMethods '2P' # PyBufferProcs - '6P') + '6P' + '1P' # Specializer cache + ) class newstyleclass(object): pass # Separate block for PyDictKeysObject with 8 keys and 5 entries check(newstyleclass, s + calcsize(DICT_KEY_STRUCT_FORMAT) + 64 + 42*calcsize("n2P")) diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-02-28-15-46-36.bpo-46841.MDQoty.rst b/Misc/NEWS.d/next/Core and Builtins/2022-02-28-15-46-36.bpo-46841.MDQoty.rst new file mode 100644 index 00000000000..97b03debcf0 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-02-28-15-46-36.bpo-46841.MDQoty.rst @@ -0,0 +1 @@ +Use inline cache for :opcode:`BINARY_SUBSCR`. diff --git a/Programs/test_frozenmain.h b/Programs/test_frozenmain.h index 11593a9ba3d..3fef981e42f 100644 --- a/Programs/test_frozenmain.h +++ b/Programs/test_frozenmain.h @@ -1,13 +1,14 @@ // Auto-generated by Programs/freeze_test_frozenmain.py unsigned char M_test_frozenmain[] = { 227,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0, - 0,0,0,0,0,115,104,0,0,0,151,0,100,0,100,1, + 0,0,0,0,0,115,120,0,0,0,151,0,100,0,100,1, 108,0,90,0,100,0,100,1,108,1,90,1,2,0,101,2, 100,2,166,1,171,1,1,0,2,0,101,2,100,3,101,0, 106,3,166,2,171,2,1,0,2,0,101,1,106,4,166,0, - 171,0,100,4,25,0,90,5,100,5,68,0,93,16,90,6, - 2,0,101,2,100,6,101,6,155,0,100,7,101,5,101,6, - 25,0,155,0,157,4,166,1,171,1,1,0,113,33,100,1, + 171,0,100,4,25,0,3,0,3,0,3,0,3,0,90,5, + 100,5,68,0,93,20,90,6,2,0,101,2,100,6,101,6, + 155,0,100,7,101,5,101,6,25,0,3,0,3,0,3,0, + 3,0,155,0,157,4,166,1,171,1,1,0,113,37,100,1, 83,0,41,8,233,0,0,0,0,78,122,18,70,114,111,122, 101,110,32,72,101,108,108,111,32,87,111,114,108,100,122,8, 115,121,115,46,97,114,103,118,218,6,99,111,110,102,105,103, @@ -24,14 +25,15 @@ unsigned char M_test_frozenmain[] = { 0,0,250,18,116,101,115,116,95,102,114,111,122,101,110,109, 97,105,110,46,112,121,250,8,60,109,111,100,117,108,101,62, 114,11,0,0,0,1,0,0,0,115,18,0,0,0,2,128, - 8,3,8,1,12,2,16,1,16,1,8,1,30,7,4,249, - 115,20,0,0,0,2,128,8,3,8,1,12,2,16,1,16, - 1,2,7,4,1,2,249,34,7,115,104,0,0,0,0,0, + 8,3,8,1,12,2,16,1,24,1,8,1,38,7,4,249, + 115,20,0,0,0,2,128,8,3,8,1,12,2,16,1,24, + 1,2,7,4,1,2,249,42,7,115,120,0,0,0,0,0, 1,11,1,11,1,11,1,11,1,25,1,25,1,25,1,25, 1,6,1,6,7,27,1,28,1,28,1,28,1,6,1,6, 7,17,19,22,19,27,1,28,1,28,1,28,10,39,10,27, - 10,39,10,41,10,41,42,50,10,51,1,7,12,2,1,42, - 1,42,5,8,5,10,5,10,11,41,21,24,11,41,11,41, - 28,34,35,38,28,39,11,41,11,41,5,42,5,42,5,42, + 10,39,10,41,10,41,42,50,10,51,10,51,10,51,10,51, + 10,51,1,7,12,2,1,42,1,42,5,8,5,10,5,10, + 11,41,21,24,11,41,11,41,28,34,35,38,28,39,28,39, + 28,39,28,39,28,39,11,41,11,41,5,42,5,42,5,42, 5,42,1,42,1,42,114,9,0,0,0, }; diff --git a/Python/ceval.c b/Python/ceval.c index 0f57e7dc94a..b3673d7d04a 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -2102,25 +2102,24 @@ handle_eval_breaker: SET_TOP(res); if (res == NULL) goto error; + JUMPBY(INLINE_CACHE_ENTRIES_BINARY_SUBSCR); DISPATCH(); } TARGET(BINARY_SUBSCR_ADAPTIVE) { - SpecializedCacheEntry *cache = GET_CACHE(); - if (cache->adaptive.counter == 0) { + _PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)next_instr; + if (cache->counter == 0) { PyObject *sub = TOP(); PyObject *container = SECOND(); next_instr--; - if (_Py_Specialize_BinarySubscr(container, sub, next_instr, cache) < 0) { + if (_Py_Specialize_BinarySubscr(container, sub, next_instr) < 0) { goto error; } DISPATCH(); } else { STAT_INC(BINARY_SUBSCR, deferred); - cache->adaptive.counter--; - assert(cache->adaptive.original_oparg == 0); - /* No need to set oparg here; it isn't used by BINARY_SUBSCR */ + cache->counter--; JUMP_TO_INSTRUCTION(BINARY_SUBSCR); } } @@ -2146,6 +2145,7 @@ handle_eval_breaker: Py_DECREF(sub); SET_TOP(res); Py_DECREF(list); + JUMPBY(INLINE_CACHE_ENTRIES_BINARY_SUBSCR); NOTRACE_DISPATCH(); } @@ -2170,6 +2170,7 @@ handle_eval_breaker: Py_DECREF(sub); SET_TOP(res); Py_DECREF(tuple); + JUMPBY(INLINE_CACHE_ENTRIES_BINARY_SUBSCR); NOTRACE_DISPATCH(); } @@ -2188,18 +2189,22 @@ handle_eval_breaker: Py_DECREF(sub); SET_TOP(res); Py_DECREF(dict); + JUMPBY(INLINE_CACHE_ENTRIES_BINARY_SUBSCR); DISPATCH(); } TARGET(BINARY_SUBSCR_GETITEM) { PyObject *sub = TOP(); PyObject *container = SECOND(); - SpecializedCacheEntry *caches = GET_CACHE(); - _PyAdaptiveEntry *cache0 = &caches[0].adaptive; - _PyObjectCache *cache1 = &caches[-1].obj; - PyFunctionObject *getitem = (PyFunctionObject *)cache1->obj; - DEOPT_IF(Py_TYPE(container)->tp_version_tag != cache0->version, BINARY_SUBSCR); - DEOPT_IF(getitem->func_version != cache0->index, BINARY_SUBSCR); + _PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)next_instr; + uint32_t type_version = read32(&cache->type_version); + PyTypeObject *tp = Py_TYPE(container); + DEOPT_IF(tp->tp_version_tag != type_version, BINARY_SUBSCR); + assert(tp->tp_flags & Py_TPFLAGS_HEAPTYPE); + PyObject *cached = ((PyHeapTypeObject *)tp)->_spec_cache.getitem; + assert(PyFunction_Check(cached)); + PyFunctionObject *getitem = (PyFunctionObject *)cached; + DEOPT_IF(getitem->func_version != cache->func_version, BINARY_SUBSCR); PyCodeObject *code = (PyCodeObject *)getitem->func_code; size_t size = code->co_nlocalsplus + code->co_stacksize + FRAME_SPECIALS_SIZE; assert(code->co_argcount == 2); @@ -2218,6 +2223,7 @@ handle_eval_breaker: new_frame->localsplus[i] = NULL; } _PyFrame_SetStackPointer(frame, stack_pointer); + frame->f_lasti += INLINE_CACHE_ENTRIES_BINARY_SUBSCR; new_frame->previous = frame; frame = cframe.current_frame = new_frame; CALL_STAT_INC(inlined_py_calls); @@ -5605,7 +5611,7 @@ MISS_WITH_CACHE(PRECALL) MISS_WITH_CACHE(CALL) MISS_WITH_INLINE_CACHE(BINARY_OP) MISS_WITH_INLINE_CACHE(COMPARE_OP) -MISS_WITH_CACHE(BINARY_SUBSCR) +MISS_WITH_INLINE_CACHE(BINARY_SUBSCR) MISS_WITH_INLINE_CACHE(UNPACK_SEQUENCE) MISS_WITH_OPARG_COUNTER(STORE_SUBSCR) diff --git a/Python/specialize.c b/Python/specialize.c index 925edf3f88d..5486b5b1f65 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -60,7 +60,6 @@ static uint8_t adaptive_opcodes[256] = { static uint8_t cache_requirements[256] = { [LOAD_ATTR] = 1, // _PyAdaptiveEntry [LOAD_METHOD] = 3, /* _PyAdaptiveEntry, _PyAttrCache and _PyObjectCache */ - [BINARY_SUBSCR] = 2, /* _PyAdaptiveEntry, _PyObjectCache */ [STORE_SUBSCR] = 0, [CALL] = 2, /* _PyAdaptiveEntry and _PyObjectCache/_PyCallCache */ [PRECALL] = 2, /* _PyAdaptiveEntry and _PyObjectCache/_PyCallCache */ @@ -385,6 +384,8 @@ optimize(SpecializedCacheOrInstruction *quickened, int len) if (adaptive_opcode) { if (_PyOpcode_InlineCacheEntries[opcode]) { instructions[i] = _Py_MAKECODEUNIT(adaptive_opcode, oparg); + previous_opcode = -1; + i += _PyOpcode_InlineCacheEntries[opcode]; } else if (previous_opcode != EXTENDED_ARG) { int new_oparg = oparg_from_instruction_and_update_offset( @@ -553,6 +554,7 @@ initial_counter_value(void) { #define SPEC_FAIL_SUBSCR_PY_SIMPLE 20 #define SPEC_FAIL_SUBSCR_PY_OTHER 21 #define SPEC_FAIL_SUBSCR_DICT_SUBCLASS_NO_OVERRIDE 22 +#define SPEC_FAIL_SUBSCR_NOT_HEAP_TYPE 23 /* Binary op */ @@ -1335,9 +1337,11 @@ function_kind(PyCodeObject *code) { int _Py_Specialize_BinarySubscr( - PyObject *container, PyObject *sub, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache) + PyObject *container, PyObject *sub, _Py_CODEUNIT *instr) { - _PyAdaptiveEntry *cache0 = &cache->adaptive; + assert(_PyOpcode_InlineCacheEntries[BINARY_SUBSCR] == + INLINE_CACHE_ENTRIES_BINARY_SUBSCR); + _PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)(instr + 1); PyTypeObject *container_type = Py_TYPE(container); if (container_type == &PyList_Type) { if (PyLong_CheckExact(sub)) { @@ -1364,26 +1368,30 @@ _Py_Specialize_BinarySubscr( PyTypeObject *cls = Py_TYPE(container); PyObject *descriptor = _PyType_Lookup(cls, &_Py_ID(__getitem__)); if (descriptor && Py_TYPE(descriptor) == &PyFunction_Type) { + if (!(container_type->tp_flags & Py_TPFLAGS_HEAPTYPE)) { + SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_SUBSCR_NOT_HEAP_TYPE); + goto fail; + } PyFunctionObject *func = (PyFunctionObject *)descriptor; - PyCodeObject *code = (PyCodeObject *)func->func_code; - int kind = function_kind(code); + PyCodeObject *fcode = (PyCodeObject *)func->func_code; + int kind = function_kind(fcode); if (kind != SIMPLE_FUNCTION) { SPECIALIZATION_FAIL(BINARY_SUBSCR, kind); goto fail; } - if (code->co_argcount != 2) { + if (fcode->co_argcount != 2) { SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_WRONG_NUMBER_ARGUMENTS); goto fail; } assert(cls->tp_version_tag != 0); - cache0->version = cls->tp_version_tag; + write32(&cache->type_version, cls->tp_version_tag); int version = _PyFunction_GetVersionForCurrentState(func); if (version == 0 || version != (uint16_t)version) { SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_OUT_OF_VERSIONS); goto fail; } - cache0->index = version; - cache[-1].obj.obj = descriptor; + cache->func_version = version; + ((PyHeapTypeObject *)container_type)->_spec_cache.getitem = descriptor; *instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_GETITEM, _Py_OPARG(*instr)); goto success; } @@ -1392,12 +1400,12 @@ _Py_Specialize_BinarySubscr( fail: STAT_INC(BINARY_SUBSCR, failure); assert(!PyErr_Occurred()); - cache_backoff(cache0); + cache->counter = ADAPTIVE_CACHE_BACKOFF; return 0; success: STAT_INC(BINARY_SUBSCR, success); assert(!PyErr_Occurred()); - cache0->counter = initial_counter_value(); + cache->counter = initial_counter_value(); return 0; }