From 05a8bc1c944709e7468f157bd1b6032f368e43bf Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Thu, 3 Mar 2022 15:31:00 -0800 Subject: [PATCH] bpo-46841: Use inline caching for attribute accesses (GH-31640) --- Include/internal/pycore_code.h | 137 +++++++-- Include/opcode.h | 4 +- Lib/importlib/_bootstrap_external.py | 4 +- Lib/opcode.py | 7 +- Lib/test/test_dis.py | 2 +- .../2022-03-01-17-47-58.bpo-46841.inYQlU.rst | 2 + Programs/test_frozenmain.h | 16 +- Python/ceval.c | 268 +++++++----------- Python/opcode_targets.h | 2 +- Python/specialize.c | 120 ++++---- 10 files changed, 292 insertions(+), 270 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-03-01-17-47-58.bpo-46841.inYQlU.rst diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index b9671d0ec32..25c31a1fca7 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -20,14 +20,8 @@ typedef struct { uint32_t version; } _PyAdaptiveEntry; - typedef struct { - uint32_t tp_version; - uint32_t dk_version; -} _PyAttrCache; - -typedef struct { - /* Borrowed ref in LOAD_METHOD */ + /* Borrowed ref */ PyObject *obj; } _PyObjectCache; @@ -51,7 +45,6 @@ typedef struct { typedef union { _PyEntryZero zero; _PyAdaptiveEntry adaptive; - _PyAttrCache attr; _PyObjectCache obj; _PyCallCache call; } SpecializedCacheEntry; @@ -65,8 +58,7 @@ typedef union { typedef struct { _Py_CODEUNIT counter; _Py_CODEUNIT index; - _Py_CODEUNIT module_keys_version; - _Py_CODEUNIT _m1; + _Py_CODEUNIT module_keys_version[2]; _Py_CODEUNIT builtin_keys_version; } _PyLoadGlobalCache; @@ -94,13 +86,32 @@ typedef struct { typedef struct { _Py_CODEUNIT counter; - _Py_CODEUNIT type_version; - _Py_CODEUNIT _t1; + _Py_CODEUNIT type_version[2]; _Py_CODEUNIT func_version; } _PyBinarySubscrCache; #define INLINE_CACHE_ENTRIES_BINARY_SUBSCR CACHE_ENTRIES(_PyBinarySubscrCache) +typedef struct { + _Py_CODEUNIT counter; + _Py_CODEUNIT version[2]; + _Py_CODEUNIT index; +} _PyAttrCache; + +#define INLINE_CACHE_ENTRIES_LOAD_ATTR CACHE_ENTRIES(_PyAttrCache) + +#define INLINE_CACHE_ENTRIES_STORE_ATTR CACHE_ENTRIES(_PyAttrCache) + +typedef struct { + _Py_CODEUNIT counter; + _Py_CODEUNIT type_version[2]; + _Py_CODEUNIT dict_offset; + _Py_CODEUNIT keys_version[2]; + _Py_CODEUNIT descr[4]; +} _PyLoadMethodCache; + +#define INLINE_CACHE_ENTRIES_LOAD_METHOD CACHE_ENTRIES(_PyLoadMethodCache) + /* Maximum size of code to quicken, in code units. */ #define MAX_SIZE_TO_QUICKEN 5000 @@ -328,10 +339,13 @@ cache_backoff(_PyAdaptiveEntry *entry) { /* Specialization functions */ -extern int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache); -extern int _Py_Specialize_StoreAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache); +extern int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, + PyObject *name); +extern int _Py_Specialize_StoreAttr(PyObject *owner, _Py_CODEUNIT *instr, + PyObject *name); extern int _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins, _Py_CODEUNIT *instr, PyObject *name); -extern int _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache); +extern int _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, + PyObject *name); extern int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr); extern int _Py_Specialize_StoreSubscr(PyObject *container, PyObject *sub, _Py_CODEUNIT *instr); extern int _Py_Specialize_Call(PyObject *callable, _Py_CODEUNIT *instr, int nargs, @@ -416,35 +430,108 @@ extern PyObject* _Py_GetSpecializationStats(void); #ifdef WORDS_BIGENDIAN static inline void -write32(uint16_t *p, uint32_t val) +write_u32(uint16_t *p, uint32_t val) { - p[0] = val >> 16; - p[1] = (uint16_t)val; + p[0] = (uint16_t)(val >> 16); + p[1] = (uint16_t)(val >> 0); +} + +static inline void +write_u64(uint16_t *p, uint64_t val) +{ + p[0] = (uint16_t)(val >> 48); + p[1] = (uint16_t)(val >> 32); + p[2] = (uint16_t)(val >> 16); + p[3] = (uint16_t)(val >> 0); } static inline uint32_t -read32(uint16_t *p) +read_u32(uint16_t *p) { - return (p[0] << 16) | p[1]; + uint32_t val = 0; + val |= (uint32_t)p[0] << 16; + val |= (uint32_t)p[1] << 0; + return val; +} + +static inline uint64_t +read_u64(uint16_t *p) +{ + uint64_t val = 0; + val |= (uint64_t)p[0] << 48; + val |= (uint64_t)p[1] << 32; + val |= (uint64_t)p[2] << 16; + val |= (uint64_t)p[3] << 0; + return val; } #else static inline void -write32(uint16_t *p, uint32_t val) +write_u32(uint16_t *p, uint32_t val) { - p[0] = (uint16_t)val; - p[1] = val >> 16; + p[0] = (uint16_t)(val >> 0); + p[1] = (uint16_t)(val >> 16); +} + +static inline void +write_u64(uint16_t *p, uint64_t val) +{ + p[0] = (uint16_t)(val >> 0); + p[1] = (uint16_t)(val >> 16); + p[2] = (uint16_t)(val >> 32); + p[3] = (uint16_t)(val >> 48); } static inline uint32_t -read32(uint16_t *p) +read_u32(uint16_t *p) { - return p[0] | (p[1] << 16); + uint32_t val = 0; + val |= (uint32_t)p[0] << 0; + val |= (uint32_t)p[1] << 16; + return val; +} + +static inline uint64_t +read_u64(uint16_t *p) +{ + uint64_t val = 0; + val |= (uint64_t)p[0] << 0; + val |= (uint64_t)p[1] << 16; + val |= (uint64_t)p[2] << 32; + val |= (uint64_t)p[3] << 48; + return val; } #endif +static inline void +write_obj(uint16_t *p, PyObject *obj) +{ + uintptr_t val = (uintptr_t)obj; +#if SIZEOF_VOID_P == 8 + write_u64(p, val); +#elif SIZEOF_VOID_P == 4 + write_u32(p, val); +#else + #error "SIZEOF_VOID_P must be 4 or 8" +#endif +} + +static inline PyObject * +read_obj(uint16_t *p) +{ + uintptr_t val; +#if SIZEOF_VOID_P == 8 + val = read_u64(p); +#elif SIZEOF_VOID_P == 4 + val = read_u32(p); +#else + #error "SIZEOF_VOID_P must be 4 or 8" +#endif + return (PyObject *)val; +} + #ifdef __cplusplus } #endif diff --git a/Include/opcode.h b/Include/opcode.h index f6330d9056a..110f8c36171 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -183,7 +183,6 @@ extern "C" { #define LOAD_FAST__LOAD_CONST 173 #define LOAD_CONST__LOAD_FAST 174 #define STORE_FAST__STORE_FAST 175 -#define LOAD_FAST__LOAD_ATTR_INSTANCE_VALUE 176 #define DO_TRACING 255 extern const uint8_t _PyOpcode_InlineCacheEntries[256]; @@ -213,9 +212,12 @@ static const uint32_t _PyOpcode_Jump[8] = { const uint8_t _PyOpcode_InlineCacheEntries[256] = { [BINARY_SUBSCR] = 4, [UNPACK_SEQUENCE] = 1, + [STORE_ATTR] = 4, + [LOAD_ATTR] = 4, [COMPARE_OP] = 2, [LOAD_GLOBAL] = 5, [BINARY_OP] = 1, + [LOAD_METHOD] = 10, }; #endif /* OPCODE_TABLES */ diff --git a/Lib/importlib/_bootstrap_external.py b/Lib/importlib/_bootstrap_external.py index dd1f6ffd64c..9d36bc27c44 100644 --- a/Lib/importlib/_bootstrap_external.py +++ b/Lib/importlib/_bootstrap_external.py @@ -390,6 +390,8 @@ _code_type = type(_write_atomic.__code__) # Python 3.11a5 3481 (Use inline cache for BINARY_OP) # Python 3.11a5 3482 (Use inline caching for UNPACK_SEQUENCE and LOAD_GLOBAL) # Python 3.11a5 3483 (Use inline caching for COMPARE_OP and BINARY_SUBSCR) +# Python 3.11a5 3484 (Use inline caching for LOAD_ATTR, LOAD_METHOD, and +# STORE_ATTR) # Python 3.12 will start with magic number 3500 @@ -404,7 +406,7 @@ _code_type = type(_write_atomic.__code__) # Whenever MAGIC_NUMBER is changed, the ranges in the magic_values array # in PC/launcher.c must also be updated. -MAGIC_NUMBER = (3483).to_bytes(2, 'little') + b'\r\n' +MAGIC_NUMBER = (3484).to_bytes(2, 'little') + b'\r\n' _RAW_MAGIC_NUMBER = int.from_bytes(MAGIC_NUMBER, 'little') # For import.c _PYCACHE = '__pycache__' diff --git a/Lib/opcode.py b/Lib/opcode.py index 9b08562cd04..f6e2dec32e0 100644 --- a/Lib/opcode.py +++ b/Lib/opcode.py @@ -112,7 +112,7 @@ name_op('DELETE_NAME', 91) # "" def_op('UNPACK_SEQUENCE', 92, 1) # Number of tuple items jrel_op('FOR_ITER', 93) def_op('UNPACK_EX', 94) -name_op('STORE_ATTR', 95) # Index in name list +name_op('STORE_ATTR', 95, 4) # Index in name list name_op('DELETE_ATTR', 96) # "" name_op('STORE_GLOBAL', 97) # "" name_op('DELETE_GLOBAL', 98) # "" @@ -124,7 +124,7 @@ def_op('BUILD_TUPLE', 102) # Number of tuple items def_op('BUILD_LIST', 103) # Number of list items def_op('BUILD_SET', 104) # Number of set items def_op('BUILD_MAP', 105) # Number of dict entries -name_op('LOAD_ATTR', 106) # Index in name list +name_op('LOAD_ATTR', 106, 4) # Index in name list def_op('COMPARE_OP', 107, 2) # Comparison operator hascompare.append(107) name_op('IMPORT_NAME', 108) # Index in name list @@ -186,7 +186,7 @@ def_op('FORMAT_VALUE', 155) def_op('BUILD_CONST_KEY_MAP', 156) def_op('BUILD_STRING', 157) -name_op('LOAD_METHOD', 160) +name_op('LOAD_METHOD', 160, 10) def_op('LIST_EXTEND', 162) def_op('SET_UPDATE', 163) @@ -301,7 +301,6 @@ _specialized_instructions = [ "LOAD_FAST__LOAD_CONST", "LOAD_CONST__LOAD_FAST", "STORE_FAST__STORE_FAST", - "LOAD_FAST__LOAD_ATTR_INSTANCE_VALUE", ] _specialization_stats = [ "success", diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index 8de2ed09e83..7e0542ae0ae 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -375,7 +375,7 @@ dis_traceback = """\ >> PUSH_EXC_INFO %3d LOAD_GLOBAL 0 (Exception) - JUMP_IF_NOT_EXC_MATCH 31 (to 62) + JUMP_IF_NOT_EXC_MATCH 35 (to 70) STORE_FAST 0 (e) %3d LOAD_FAST 0 (e) diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-03-01-17-47-58.bpo-46841.inYQlU.rst b/Misc/NEWS.d/next/Core and Builtins/2022-03-01-17-47-58.bpo-46841.inYQlU.rst new file mode 100644 index 00000000000..0e7beb019f4 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-03-01-17-47-58.bpo-46841.inYQlU.rst @@ -0,0 +1,2 @@ +Use inline caching for :opcode:`LOAD_ATTR`, :opcode:`LOAD_METHOD`, and +:opcode:`STORE_ATTR`. diff --git a/Programs/test_frozenmain.h b/Programs/test_frozenmain.h index 3fef981e42f..4ebab4f7544 100644 --- a/Programs/test_frozenmain.h +++ b/Programs/test_frozenmain.h @@ -1,14 +1,15 @@ // Auto-generated by Programs/freeze_test_frozenmain.py unsigned char M_test_frozenmain[] = { 227,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0, - 0,0,0,0,0,115,120,0,0,0,151,0,100,0,100,1, + 0,0,0,0,0,115,136,0,0,0,151,0,100,0,100,1, 108,0,90,0,100,0,100,1,108,1,90,1,2,0,101,2, 100,2,166,1,171,1,1,0,2,0,101,2,100,3,101,0, - 106,3,166,2,171,2,1,0,2,0,101,1,106,4,166,0, + 106,3,3,0,3,0,3,0,3,0,166,2,171,2,1,0, + 2,0,101,1,106,4,3,0,3,0,3,0,3,0,166,0, 171,0,100,4,25,0,3,0,3,0,3,0,3,0,90,5, 100,5,68,0,93,20,90,6,2,0,101,2,100,6,101,6, 155,0,100,7,101,5,101,6,25,0,3,0,3,0,3,0, - 3,0,155,0,157,4,166,1,171,1,1,0,113,37,100,1, + 3,0,155,0,157,4,166,1,171,1,1,0,113,45,100,1, 83,0,41,8,233,0,0,0,0,78,122,18,70,114,111,122, 101,110,32,72,101,108,108,111,32,87,111,114,108,100,122,8, 115,121,115,46,97,114,103,118,218,6,99,111,110,102,105,103, @@ -25,12 +26,13 @@ unsigned char M_test_frozenmain[] = { 0,0,250,18,116,101,115,116,95,102,114,111,122,101,110,109, 97,105,110,46,112,121,250,8,60,109,111,100,117,108,101,62, 114,11,0,0,0,1,0,0,0,115,18,0,0,0,2,128, - 8,3,8,1,12,2,16,1,24,1,8,1,38,7,4,249, - 115,20,0,0,0,2,128,8,3,8,1,12,2,16,1,24, - 1,2,7,4,1,2,249,42,7,115,120,0,0,0,0,0, + 8,3,8,1,12,2,24,1,32,1,8,1,38,7,4,249, + 115,20,0,0,0,2,128,8,3,8,1,12,2,24,1,32, + 1,2,7,4,1,2,249,42,7,115,136,0,0,0,0,0, 1,11,1,11,1,11,1,11,1,25,1,25,1,25,1,25, 1,6,1,6,7,27,1,28,1,28,1,28,1,6,1,6, - 7,17,19,22,19,27,1,28,1,28,1,28,10,39,10,27, + 7,17,19,22,19,27,19,27,19,27,19,27,19,27,1,28, + 1,28,1,28,10,39,10,27,10,39,10,39,10,39,10,39, 10,39,10,41,10,41,42,50,10,51,10,51,10,51,10,51, 10,51,1,7,12,2,1,42,1,42,5,8,5,10,5,10, 11,41,21,24,11,41,11,41,28,34,35,38,28,39,28,39, diff --git a/Python/ceval.c b/Python/ceval.c index c86b7443768..915ab9313a9 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1448,16 +1448,15 @@ eval_frame_handle_pending(PyThreadState *tstate) // shared by LOAD_ATTR_MODULE and LOAD_METHOD_MODULE #define LOAD_MODULE_ATTR_OR_METHOD(attr_or_method) \ - SpecializedCacheEntry *caches = GET_CACHE(); \ - _PyAdaptiveEntry *cache0 = &caches[0].adaptive; \ + _PyAttrCache *cache = (_PyAttrCache *)next_instr; \ DEOPT_IF(!PyModule_CheckExact(owner), LOAD_##attr_or_method); \ PyDictObject *dict = (PyDictObject *)((PyModuleObject *)owner)->md_dict; \ assert(dict != NULL); \ - DEOPT_IF(dict->ma_keys->dk_version != cache0->version, \ - LOAD_##attr_or_method); \ + DEOPT_IF(dict->ma_keys->dk_version != read_u32(cache->version), \ + LOAD_##attr_or_method); \ assert(dict->ma_keys->dk_kind == DICT_KEYS_UNICODE); \ - assert(cache0->index < dict->ma_keys->dk_nentries); \ - PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(dict->ma_keys) + cache0->index; \ + assert(cache->index < dict->ma_keys->dk_nentries); \ + PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(dict->ma_keys) + cache->index; \ res = ep->me_value; \ DEOPT_IF(res == NULL, LOAD_##attr_or_method); \ STAT_INC(LOAD_##attr_or_method, hit); \ @@ -2197,7 +2196,7 @@ handle_eval_breaker: PyObject *sub = TOP(); PyObject *container = SECOND(); _PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)next_instr; - uint32_t type_version = read32(&cache->type_version); + uint32_t type_version = read_u32(cache->type_version); PyTypeObject *tp = Py_TYPE(container); DEOPT_IF(tp->tp_version_tag != type_version, BINARY_SUBSCR); assert(tp->tp_flags & Py_TPFLAGS_HEAPTYPE); @@ -2849,8 +2848,10 @@ handle_eval_breaker: err = PyObject_SetAttr(owner, name, v); Py_DECREF(v); Py_DECREF(owner); - if (err != 0) + if (err != 0) { goto error; + } + JUMPBY(INLINE_CACHE_ENTRIES_STORE_ATTR); DISPATCH(); } @@ -3028,7 +3029,7 @@ handle_eval_breaker: DEOPT_IF(!PyDict_CheckExact(GLOBALS()), LOAD_GLOBAL); PyDictObject *dict = (PyDictObject *)GLOBALS(); _PyLoadGlobalCache *cache = (_PyLoadGlobalCache *)next_instr; - uint32_t version = read32(&cache->module_keys_version); + uint32_t version = read_u32(cache->module_keys_version); DEOPT_IF(dict->ma_keys->dk_version != version, LOAD_GLOBAL); assert(DK_IS_UNICODE(dict->ma_keys)); PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(dict->ma_keys); @@ -3048,7 +3049,7 @@ handle_eval_breaker: PyDictObject *mdict = (PyDictObject *)GLOBALS(); PyDictObject *bdict = (PyDictObject *)BUILTINS(); _PyLoadGlobalCache *cache = (_PyLoadGlobalCache *)next_instr; - uint32_t mod_version = read32(&cache->module_keys_version); + uint32_t mod_version = read_u32(cache->module_keys_version); uint16_t bltn_version = cache->builtin_keys_version; DEOPT_IF(mdict->ma_keys->dk_version != mod_version, LOAD_GLOBAL); DEOPT_IF(bdict->ma_keys->dk_version != bltn_version, LOAD_GLOBAL); @@ -3423,76 +3424,49 @@ handle_eval_breaker: } Py_DECREF(owner); SET_TOP(res); + JUMPBY(INLINE_CACHE_ENTRIES_LOAD_ATTR); DISPATCH(); } TARGET(LOAD_ATTR_ADAPTIVE) { assert(cframe.use_tracing == 0); - SpecializedCacheEntry *cache = GET_CACHE(); - if (cache->adaptive.counter == 0) { + _PyAttrCache *cache = (_PyAttrCache *)next_instr; + if (cache->counter == 0) { PyObject *owner = TOP(); - PyObject *name = GETITEM(names, cache->adaptive.original_oparg); + PyObject *name = GETITEM(names, oparg); next_instr--; - if (_Py_Specialize_LoadAttr(owner, next_instr, name, cache) < 0) { + if (_Py_Specialize_LoadAttr(owner, next_instr, name) < 0) { goto error; } DISPATCH(); } else { STAT_INC(LOAD_ATTR, deferred); - cache->adaptive.counter--; - oparg = cache->adaptive.original_oparg; + cache->counter--; JUMP_TO_INSTRUCTION(LOAD_ATTR); } } - TARGET(LOAD_FAST__LOAD_ATTR_INSTANCE_VALUE) { - assert(cframe.use_tracing == 0); - PyObject *owner = GETLOCAL(oparg); // borrowed - if (owner == NULL) { - goto unbound_local_error; - } - // GET_CACHE(), but for the following opcode - assert(_Py_OPCODE(*next_instr) == LOAD_ATTR_INSTANCE_VALUE); - SpecializedCacheEntry *caches = _GetSpecializedCacheEntryForInstruction( - first_instr, INSTR_OFFSET() + 1, _Py_OPARG(*next_instr)); - _PyAdaptiveEntry *cache0 = &caches[0].adaptive; - assert(cache0->version != 0); - PyTypeObject *tp = Py_TYPE(owner); - // These DEOPT_IF miss branches do PUSH(Py_NewRef(owner)). - DEOPT_IF(tp->tp_version_tag != cache0->version, - LOAD_FAST__LOAD_ATTR_INSTANCE_VALUE); - assert(tp->tp_dictoffset < 0); - assert(tp->tp_flags & Py_TPFLAGS_MANAGED_DICT); - PyDictValues *values = *_PyObject_ValuesPointer(owner); - DEOPT_IF(values == NULL, LOAD_FAST__LOAD_ATTR_INSTANCE_VALUE); - PyObject *res = values->values[cache0->index]; - DEOPT_IF(res == NULL, LOAD_FAST__LOAD_ATTR_INSTANCE_VALUE); - STAT_INC(LOAD_ATTR, hit); - PUSH(Py_NewRef(res)); - next_instr++; - NOTRACE_DISPATCH(); - } - TARGET(LOAD_ATTR_INSTANCE_VALUE) { assert(cframe.use_tracing == 0); PyObject *owner = TOP(); PyObject *res; PyTypeObject *tp = Py_TYPE(owner); - SpecializedCacheEntry *caches = GET_CACHE(); - _PyAdaptiveEntry *cache0 = &caches[0].adaptive; - assert(cache0->version != 0); - DEOPT_IF(tp->tp_version_tag != cache0->version, LOAD_ATTR_INSTANCE_VALUE); + _PyAttrCache *cache = (_PyAttrCache *)next_instr; + uint32_t type_version = read_u32(cache->version); + assert(type_version != 0); + DEOPT_IF(tp->tp_version_tag != type_version, LOAD_ATTR); assert(tp->tp_dictoffset < 0); assert(tp->tp_flags & Py_TPFLAGS_MANAGED_DICT); PyDictValues *values = *_PyObject_ValuesPointer(owner); - DEOPT_IF(values == NULL, LOAD_ATTR_INSTANCE_VALUE); - res = values->values[cache0->index]; - DEOPT_IF(res == NULL, LOAD_ATTR_INSTANCE_VALUE); + DEOPT_IF(values == NULL, LOAD_ATTR); + res = values->values[cache->index]; + DEOPT_IF(res == NULL, LOAD_ATTR); STAT_INC(LOAD_ATTR, hit); Py_INCREF(res); SET_TOP(res); Py_DECREF(owner); + JUMPBY(INLINE_CACHE_ENTRIES_LOAD_ATTR); NOTRACE_DISPATCH(); } @@ -3504,6 +3478,7 @@ handle_eval_breaker: LOAD_MODULE_ATTR_OR_METHOD(ATTR); SET_TOP(res); Py_DECREF(owner); + JUMPBY(INLINE_CACHE_ENTRIES_LOAD_ATTR); NOTRACE_DISPATCH(); } @@ -3512,16 +3487,16 @@ handle_eval_breaker: PyObject *owner = TOP(); PyObject *res; PyTypeObject *tp = Py_TYPE(owner); - SpecializedCacheEntry *caches = GET_CACHE(); - _PyAdaptiveEntry *cache0 = &caches[0].adaptive; - assert(cache0->version != 0); - DEOPT_IF(tp->tp_version_tag != cache0->version, LOAD_ATTR); + _PyAttrCache *cache = (_PyAttrCache *)next_instr; + uint32_t type_version = read_u32(cache->version); + assert(type_version != 0); + DEOPT_IF(tp->tp_version_tag != type_version, LOAD_ATTR); assert(tp->tp_flags & Py_TPFLAGS_MANAGED_DICT); PyDictObject *dict = *(PyDictObject **)_PyObject_ManagedDictPointer(owner); DEOPT_IF(dict == NULL, LOAD_ATTR); assert(PyDict_CheckExact((PyObject *)dict)); - PyObject *name = GETITEM(names, cache0->original_oparg); - uint16_t hint = cache0->index; + PyObject *name = GETITEM(names, oparg); + uint16_t hint = cache->index; DEOPT_IF(hint >= (size_t)dict->ma_keys->dk_nentries, LOAD_ATTR); if (DK_IS_UNICODE(dict->ma_keys)) { PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(dict->ma_keys) + hint; @@ -3538,6 +3513,7 @@ handle_eval_breaker: Py_INCREF(res); SET_TOP(res); Py_DECREF(owner); + JUMPBY(INLINE_CACHE_ENTRIES_LOAD_ATTR); NOTRACE_DISPATCH(); } @@ -3546,36 +3522,36 @@ handle_eval_breaker: PyObject *owner = TOP(); PyObject *res; PyTypeObject *tp = Py_TYPE(owner); - SpecializedCacheEntry *caches = GET_CACHE(); - _PyAdaptiveEntry *cache0 = &caches[0].adaptive; - assert(cache0->version != 0); - DEOPT_IF(tp->tp_version_tag != cache0->version, LOAD_ATTR); - char *addr = (char *)owner + cache0->index; + _PyAttrCache *cache = (_PyAttrCache *)next_instr; + uint32_t type_version = read_u32(cache->version); + assert(type_version != 0); + DEOPT_IF(tp->tp_version_tag != type_version, LOAD_ATTR); + char *addr = (char *)owner + cache->index; res = *(PyObject **)addr; DEOPT_IF(res == NULL, LOAD_ATTR); STAT_INC(LOAD_ATTR, hit); Py_INCREF(res); SET_TOP(res); Py_DECREF(owner); + JUMPBY(INLINE_CACHE_ENTRIES_LOAD_ATTR); NOTRACE_DISPATCH(); } TARGET(STORE_ATTR_ADAPTIVE) { assert(cframe.use_tracing == 0); - SpecializedCacheEntry *cache = GET_CACHE(); - if (cache->adaptive.counter == 0) { + _PyAttrCache *cache = (_PyAttrCache *)next_instr; + if (cache->counter == 0) { PyObject *owner = TOP(); - PyObject *name = GETITEM(names, cache->adaptive.original_oparg); + PyObject *name = GETITEM(names, oparg); next_instr--; - if (_Py_Specialize_StoreAttr(owner, next_instr, name, cache) < 0) { + if (_Py_Specialize_StoreAttr(owner, next_instr, name) < 0) { goto error; } DISPATCH(); } else { STAT_INC(STORE_ATTR, deferred); - cache->adaptive.counter--; - oparg = cache->adaptive.original_oparg; + cache->counter--; JUMP_TO_INSTRUCTION(STORE_ATTR); } } @@ -3584,15 +3560,15 @@ handle_eval_breaker: assert(cframe.use_tracing == 0); PyObject *owner = TOP(); PyTypeObject *tp = Py_TYPE(owner); - SpecializedCacheEntry *caches = GET_CACHE(); - _PyAdaptiveEntry *cache0 = &caches[0].adaptive; - assert(cache0->version != 0); - DEOPT_IF(tp->tp_version_tag != cache0->version, STORE_ATTR); + _PyAttrCache *cache = (_PyAttrCache *)next_instr; + uint32_t type_version = read_u32(cache->version); + assert(type_version != 0); + DEOPT_IF(tp->tp_version_tag != type_version, STORE_ATTR); assert(tp->tp_flags & Py_TPFLAGS_MANAGED_DICT); PyDictValues *values = *_PyObject_ValuesPointer(owner); DEOPT_IF(values == NULL, STORE_ATTR); STAT_INC(STORE_ATTR, hit); - Py_ssize_t index = cache0->index; + Py_ssize_t index = cache->index; STACK_SHRINK(1); PyObject *value = POP(); PyObject *old_value = values->values[index]; @@ -3604,6 +3580,7 @@ handle_eval_breaker: Py_DECREF(old_value); } Py_DECREF(owner); + JUMPBY(INLINE_CACHE_ENTRIES_STORE_ATTR); NOTRACE_DISPATCH(); } @@ -3611,16 +3588,16 @@ handle_eval_breaker: assert(cframe.use_tracing == 0); PyObject *owner = TOP(); PyTypeObject *tp = Py_TYPE(owner); - SpecializedCacheEntry *caches = GET_CACHE(); - _PyAdaptiveEntry *cache0 = &caches[0].adaptive; - assert(cache0->version != 0); - DEOPT_IF(tp->tp_version_tag != cache0->version, STORE_ATTR); + _PyAttrCache *cache = (_PyAttrCache *)next_instr; + uint32_t type_version = read_u32(cache->version); + assert(type_version != 0); + DEOPT_IF(tp->tp_version_tag != type_version, STORE_ATTR); assert(tp->tp_flags & Py_TPFLAGS_MANAGED_DICT); PyDictObject *dict = *(PyDictObject **)_PyObject_ManagedDictPointer(owner); DEOPT_IF(dict == NULL, STORE_ATTR); assert(PyDict_CheckExact((PyObject *)dict)); - PyObject *name = GETITEM(names, cache0->original_oparg); - uint16_t hint = cache0->index; + PyObject *name = GETITEM(names, oparg); + uint16_t hint = cache->index; DEOPT_IF(hint >= (size_t)dict->ma_keys->dk_nentries, STORE_ATTR); PyObject *value, *old_value; if (DK_IS_UNICODE(dict->ma_keys)) { @@ -3650,6 +3627,7 @@ handle_eval_breaker: /* PEP 509 */ dict->ma_version_tag = DICT_NEXT_VERSION(); Py_DECREF(owner); + JUMPBY(INLINE_CACHE_ENTRIES_STORE_ATTR); NOTRACE_DISPATCH(); } @@ -3657,11 +3635,11 @@ handle_eval_breaker: assert(cframe.use_tracing == 0); PyObject *owner = TOP(); PyTypeObject *tp = Py_TYPE(owner); - SpecializedCacheEntry *caches = GET_CACHE(); - _PyAdaptiveEntry *cache0 = &caches[0].adaptive; - assert(cache0->version != 0); - DEOPT_IF(tp->tp_version_tag != cache0->version, STORE_ATTR); - char *addr = (char *)owner + cache0->index; + _PyAttrCache *cache = (_PyAttrCache *)next_instr; + uint32_t type_version = read_u32(cache->version); + assert(type_version != 0); + DEOPT_IF(tp->tp_version_tag != type_version, STORE_ATTR); + char *addr = (char *)owner + cache->index; STAT_INC(STORE_ATTR, hit); STACK_SHRINK(1); PyObject *value = POP(); @@ -3669,6 +3647,7 @@ handle_eval_breaker: *(PyObject **)addr = value; Py_XDECREF(old_value); Py_DECREF(owner); + JUMPBY(INLINE_CACHE_ENTRIES_STORE_ATTR); NOTRACE_DISPATCH(); } @@ -4425,25 +4404,25 @@ handle_eval_breaker: Py_DECREF(obj); PUSH(meth); } + JUMPBY(INLINE_CACHE_ENTRIES_LOAD_METHOD); DISPATCH(); } TARGET(LOAD_METHOD_ADAPTIVE) { assert(cframe.use_tracing == 0); - SpecializedCacheEntry *cache = GET_CACHE(); - if (cache->adaptive.counter == 0) { + _PyLoadMethodCache *cache = (_PyLoadMethodCache *)next_instr; + if (cache->counter == 0) { PyObject *owner = TOP(); - PyObject *name = GETITEM(names, cache->adaptive.original_oparg); + PyObject *name = GETITEM(names, oparg); next_instr--; - if (_Py_Specialize_LoadMethod(owner, next_instr, name, cache) < 0) { + if (_Py_Specialize_LoadMethod(owner, next_instr, name) < 0) { goto error; } DISPATCH(); } else { STAT_INC(LOAD_METHOD, deferred); - cache->adaptive.counter--; - oparg = cache->adaptive.original_oparg; + cache->counter--; JUMP_TO_INSTRUCTION(LOAD_METHOD); } } @@ -4453,22 +4432,24 @@ handle_eval_breaker: assert(cframe.use_tracing == 0); PyObject *self = TOP(); PyTypeObject *self_cls = Py_TYPE(self); - SpecializedCacheEntry *caches = GET_CACHE(); - _PyAttrCache *cache1 = &caches[-1].attr; - _PyObjectCache *cache2 = &caches[-2].obj; - assert(cache1->tp_version != 0); - DEOPT_IF(self_cls->tp_version_tag != cache1->tp_version, LOAD_METHOD); + _PyLoadMethodCache *cache = (_PyLoadMethodCache *)next_instr; + uint32_t type_version = read_u32(cache->type_version); + assert(type_version != 0); + DEOPT_IF(self_cls->tp_version_tag != type_version, LOAD_METHOD); assert(self_cls->tp_flags & Py_TPFLAGS_MANAGED_DICT); PyDictObject *dict = *(PyDictObject**)_PyObject_ManagedDictPointer(self); DEOPT_IF(dict != NULL, LOAD_METHOD); - DEOPT_IF(((PyHeapTypeObject *)self_cls)->ht_cached_keys->dk_version != cache1->dk_version, LOAD_METHOD); + PyHeapTypeObject *self_heap_type = (PyHeapTypeObject *)self_cls; + DEOPT_IF(self_heap_type->ht_cached_keys->dk_version != + read_u32(cache->keys_version), LOAD_METHOD); STAT_INC(LOAD_METHOD, hit); - PyObject *res = cache2->obj; + PyObject *res = read_obj(cache->descr); assert(res != NULL); assert(_PyType_HasFeature(Py_TYPE(res), Py_TPFLAGS_METHOD_DESCRIPTOR)); Py_INCREF(res); SET_TOP(res); PUSH(self); + JUMPBY(INLINE_CACHE_ENTRIES_LOAD_METHOD); NOTRACE_DISPATCH(); } @@ -4478,14 +4459,12 @@ handle_eval_breaker: assert(cframe.use_tracing == 0); PyObject *self = TOP(); PyTypeObject *self_cls = Py_TYPE(self); - SpecializedCacheEntry *caches = GET_CACHE(); - _PyAdaptiveEntry *cache0 = &caches[0].adaptive; - _PyAttrCache *cache1 = &caches[-1].attr; - _PyObjectCache *cache2 = &caches[-2].obj; + _PyLoadMethodCache *cache = (_PyLoadMethodCache *)next_instr; - DEOPT_IF(self_cls->tp_version_tag != cache1->tp_version, LOAD_METHOD); + DEOPT_IF(self_cls->tp_version_tag != read_u32(cache->type_version), + LOAD_METHOD); /* Treat index as a signed 16 bit value */ - int dictoffset = *(int16_t *)&cache0->index; + int dictoffset = *(int16_t *)&cache->dict_offset; PyDictObject **dictptr = (PyDictObject**)(((char *)self)+dictoffset); assert( dictoffset == MANAGED_DICT_OFFSET || @@ -4493,14 +4472,16 @@ handle_eval_breaker: ); PyDictObject *dict = *dictptr; DEOPT_IF(dict == NULL, LOAD_METHOD); - DEOPT_IF(dict->ma_keys->dk_version != cache1->dk_version, LOAD_METHOD); + DEOPT_IF(dict->ma_keys->dk_version != read_u32(cache->keys_version), + LOAD_METHOD); STAT_INC(LOAD_METHOD, hit); - PyObject *res = cache2->obj; + PyObject *res = read_obj(cache->descr); assert(res != NULL); assert(_PyType_HasFeature(Py_TYPE(res), Py_TPFLAGS_METHOD_DESCRIPTOR)); Py_INCREF(res); SET_TOP(res); PUSH(self); + JUMPBY(INLINE_CACHE_ENTRIES_LOAD_METHOD); NOTRACE_DISPATCH(); } @@ -4508,18 +4489,18 @@ handle_eval_breaker: assert(cframe.use_tracing == 0); PyObject *self = TOP(); PyTypeObject *self_cls = Py_TYPE(self); - SpecializedCacheEntry *caches = GET_CACHE(); - _PyAttrCache *cache1 = &caches[-1].attr; - _PyObjectCache *cache2 = &caches[-2].obj; - DEOPT_IF(self_cls->tp_version_tag != cache1->tp_version, LOAD_METHOD); + _PyLoadMethodCache *cache = (_PyLoadMethodCache *)next_instr; + uint32_t type_version = read_u32(cache->type_version); + DEOPT_IF(self_cls->tp_version_tag != type_version, LOAD_METHOD); assert(self_cls->tp_dictoffset == 0); STAT_INC(LOAD_METHOD, hit); - PyObject *res = cache2->obj; + PyObject *res = read_obj(cache->descr); assert(res != NULL); assert(_PyType_HasFeature(Py_TYPE(res), Py_TPFLAGS_METHOD_DESCRIPTOR)); Py_INCREF(res); SET_TOP(res); PUSH(self); + JUMPBY(INLINE_CACHE_ENTRIES_LOAD_METHOD); NOTRACE_DISPATCH(); } @@ -4532,29 +4513,30 @@ handle_eval_breaker: SET_TOP(NULL); Py_DECREF(owner); PUSH(res); + JUMPBY(INLINE_CACHE_ENTRIES_LOAD_METHOD); NOTRACE_DISPATCH(); } TARGET(LOAD_METHOD_CLASS) { /* LOAD_METHOD, for class methods */ assert(cframe.use_tracing == 0); - SpecializedCacheEntry *caches = GET_CACHE(); - _PyAttrCache *cache1 = &caches[-1].attr; - _PyObjectCache *cache2 = &caches[-2].obj; + _PyLoadMethodCache *cache = (_PyLoadMethodCache *)next_instr; PyObject *cls = TOP(); DEOPT_IF(!PyType_Check(cls), LOAD_METHOD); - DEOPT_IF(((PyTypeObject *)cls)->tp_version_tag != cache1->tp_version, - LOAD_METHOD); - assert(cache1->tp_version != 0); + uint32_t type_version = read_u32(cache->type_version); + DEOPT_IF(((PyTypeObject *)cls)->tp_version_tag != type_version, + LOAD_METHOD); + assert(type_version != 0); STAT_INC(LOAD_METHOD, hit); - PyObject *res = cache2->obj; + PyObject *res = read_obj(cache->descr); assert(res != NULL); Py_INCREF(res); SET_TOP(NULL); Py_DECREF(cls); PUSH(res); + JUMPBY(INLINE_CACHE_ENTRIES_LOAD_METHOD); NOTRACE_DISPATCH(); } @@ -5607,10 +5589,10 @@ opname ## _miss: \ JUMP_TO_INSTRUCTION(opname); \ } -MISS_WITH_CACHE(LOAD_ATTR) -MISS_WITH_CACHE(STORE_ATTR) +MISS_WITH_INLINE_CACHE(LOAD_ATTR) +MISS_WITH_INLINE_CACHE(STORE_ATTR) MISS_WITH_INLINE_CACHE(LOAD_GLOBAL) -MISS_WITH_CACHE(LOAD_METHOD) +MISS_WITH_INLINE_CACHE(LOAD_METHOD) MISS_WITH_CACHE(PRECALL) MISS_WITH_CACHE(CALL) MISS_WITH_INLINE_CACHE(BINARY_OP) @@ -5619,52 +5601,6 @@ MISS_WITH_INLINE_CACHE(BINARY_SUBSCR) MISS_WITH_INLINE_CACHE(UNPACK_SEQUENCE) MISS_WITH_OPARG_COUNTER(STORE_SUBSCR) -LOAD_ATTR_INSTANCE_VALUE_miss: - { - // Special-cased so that if LOAD_ATTR_INSTANCE_VALUE - // gets replaced, then any preceeding - // LOAD_FAST__LOAD_ATTR_INSTANCE_VALUE gets replaced as well - STAT_INC(LOAD_ATTR_INSTANCE_VALUE, miss); - STAT_INC(LOAD_ATTR, miss); - _PyAdaptiveEntry *cache = &GET_CACHE()->adaptive; - cache->counter--; - if (cache->counter == 0) { - next_instr[-1] = _Py_MAKECODEUNIT(LOAD_ATTR_ADAPTIVE, _Py_OPARG(next_instr[-1])); - if (_Py_OPCODE(next_instr[-2]) == LOAD_FAST__LOAD_ATTR_INSTANCE_VALUE) { - next_instr[-2] = _Py_MAKECODEUNIT(LOAD_FAST, _Py_OPARG(next_instr[-2])); - if (_Py_OPCODE(next_instr[-3]) == LOAD_FAST) { - next_instr[-3] = _Py_MAKECODEUNIT(LOAD_FAST__LOAD_FAST, _Py_OPARG(next_instr[-3])); - } - } - STAT_INC(LOAD_ATTR, deopt); - cache_backoff(cache); - } - oparg = cache->original_oparg; - JUMP_TO_INSTRUCTION(LOAD_ATTR); - } - -LOAD_FAST__LOAD_ATTR_INSTANCE_VALUE_miss: - { - // This is special-cased because we have a superinstruction - // that includes a specialized instruction. - // If the specialized portion misses, carry out - // the first instruction, then perform a miss - // for the second instruction as usual. - - // Do LOAD_FAST - { - PyObject *value = GETLOCAL(oparg); - assert(value != NULL); // Already checked if unbound - Py_INCREF(value); - PUSH(value); - NEXTOPARG(); - next_instr++; - } - - // Now we are in the correct state for LOAD_ATTR - goto LOAD_ATTR_INSTANCE_VALUE_miss; - } - binary_subscr_dict_error: { PyObject *sub = POP(); diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index d463e303e27..7be7b168a75 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -175,7 +175,7 @@ static void *opcode_targets[256] = { &&TARGET_LOAD_FAST__LOAD_CONST, &&TARGET_LOAD_CONST__LOAD_FAST, &&TARGET_STORE_FAST__STORE_FAST, - &&TARGET_LOAD_FAST__LOAD_ATTR_INSTANCE_VALUE, + &&_unknown_opcode, &&_unknown_opcode, &&_unknown_opcode, &&_unknown_opcode, diff --git a/Python/specialize.c b/Python/specialize.c index 4a94aafefea..66dce8c93d7 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -58,12 +58,9 @@ static uint8_t adaptive_opcodes[256] = { /* The number of cache entries required for a "family" of instructions. */ static uint8_t cache_requirements[256] = { - [LOAD_ATTR] = 1, // _PyAdaptiveEntry - [LOAD_METHOD] = 3, /* _PyAdaptiveEntry, _PyAttrCache and _PyObjectCache */ [STORE_SUBSCR] = 0, [CALL] = 2, /* _PyAdaptiveEntry and _PyObjectCache/_PyCallCache */ [PRECALL] = 2, /* _PyAdaptiveEntry and _PyObjectCache/_PyCallCache */ - [STORE_ATTR] = 1, // _PyAdaptiveEntry }; Py_ssize_t _Py_QuickenedCount = 0; @@ -641,11 +638,10 @@ initial_counter_value(void) { static int -specialize_module_load_attr( - PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, - _PyAdaptiveEntry *cache0, int opcode, - int opcode_module) +specialize_module_load_attr(PyObject *owner, _Py_CODEUNIT *instr, + PyObject *name, int opcode, int opcode_module) { + _PyAttrCache *cache = (_PyAttrCache *)(instr + 1); PyModuleObject *m = (PyModuleObject *)owner; PyObject *value = NULL; assert((owner->ob_type->tp_flags & Py_TPFLAGS_MANAGED_DICT) == 0); @@ -676,8 +672,8 @@ specialize_module_load_attr( SPECIALIZATION_FAIL(opcode, SPEC_FAIL_OUT_OF_VERSIONS); return -1; } - cache0->version = keys_version; - cache0->index = (uint16_t)index; + write_u32(cache->version, keys_version); + cache->index = (uint16_t)index; *instr = _Py_MAKECODEUNIT(opcode_module, _Py_OPARG(*instr)); return 0; } @@ -765,7 +761,6 @@ static int specialize_dict_access( PyObject *owner, _Py_CODEUNIT *instr, PyTypeObject *type, DescriptorClassification kind, PyObject *name, - _PyAdaptiveEntry *cache0, int base_op, int values_op, int hint_op) { assert(kind == NON_OVERRIDING || kind == NON_DESCRIPTOR || kind == ABSENT || @@ -775,6 +770,7 @@ specialize_dict_access( SPECIALIZATION_FAIL(base_op, SPEC_FAIL_ATTR_NOT_MANAGED_DICT); return 0; } + _PyAttrCache *cache = (_PyAttrCache *)(instr + 1); PyObject **dictptr = _PyObject_ManagedDictPointer(owner); PyDictObject *dict = (PyDictObject *)*dictptr; if (dict == NULL) { @@ -787,8 +783,8 @@ specialize_dict_access( SPECIALIZATION_FAIL(base_op, SPEC_FAIL_OUT_OF_RANGE); return 0; } - cache0->version = type->tp_version_tag; - cache0->index = (uint16_t)index; + write_u32(cache->version, type->tp_version_tag); + cache->index = (uint16_t)index; *instr = _Py_MAKECODEUNIT(values_op, _Py_OPARG(*instr)); } else { @@ -804,20 +800,22 @@ specialize_dict_access( SPECIALIZATION_FAIL(base_op, SPEC_FAIL_OUT_OF_RANGE); return 0; } - cache0->index = (uint16_t)hint; - cache0->version = type->tp_version_tag; + cache->index = (uint16_t)hint; + write_u32(cache->version, type->tp_version_tag); *instr = _Py_MAKECODEUNIT(hint_op, _Py_OPARG(*instr)); } return 1; } int -_Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache) +_Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name) { - _PyAdaptiveEntry *cache0 = &cache->adaptive; + assert(_PyOpcode_InlineCacheEntries[LOAD_ATTR] == + INLINE_CACHE_ENTRIES_LOAD_ATTR); + _PyAttrCache *cache = (_PyAttrCache *)(instr + 1); if (PyModule_CheckExact(owner)) { - int err = specialize_module_load_attr(owner, instr, name, cache0, - LOAD_ATTR, LOAD_ATTR_MODULE); + int err = specialize_module_load_attr(owner, instr, name, LOAD_ATTR, + LOAD_ATTR_MODULE); if (err) { goto fail; } @@ -856,8 +854,8 @@ _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, Sp } assert(dmem->type == T_OBJECT_EX); assert(offset > 0); - cache0->index = (uint16_t)offset; - cache0->version = type->tp_version_tag; + cache->index = (uint16_t)offset; + write_u32(cache->version, type->tp_version_tag); *instr = _Py_MAKECODEUNIT(LOAD_ATTR_SLOT, _Py_OPARG(*instr)); goto success; } @@ -865,8 +863,8 @@ _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, Sp { Py_ssize_t offset = offsetof(PyObject, ob_type); assert(offset == (uint16_t)offset); - cache0->index = (uint16_t)offset; - cache0->version = type->tp_version_tag; + cache->index = (uint16_t)offset; + write_u32(cache->version, type->tp_version_tag); *instr = _Py_MAKECODEUNIT(LOAD_ATTR_SLOT, _Py_OPARG(*instr)); goto success; } @@ -887,41 +885,33 @@ _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, Sp break; } int err = specialize_dict_access( - owner, instr, type, kind, name, cache0, + owner, instr, type, kind, name, LOAD_ATTR, LOAD_ATTR_INSTANCE_VALUE, LOAD_ATTR_WITH_HINT ); if (err < 0) { return -1; } if (err) { - if (_Py_OPCODE(instr[0]) == LOAD_ATTR_INSTANCE_VALUE) { - // Note: instr[-1] exists because there's something on the stack, - // and instr[-2] exists because there's at least a RESUME as well. - if (_Py_OPCODE(instr[-1]) == LOAD_FAST) { - instr[-1] = _Py_MAKECODEUNIT(LOAD_FAST__LOAD_ATTR_INSTANCE_VALUE, _Py_OPARG(instr[-1])); - if (_Py_OPCODE(instr[-2]) == LOAD_FAST__LOAD_FAST) { - instr[-2] = _Py_MAKECODEUNIT(LOAD_FAST, _Py_OPARG(instr[-2])); - } - } - } goto success; } fail: STAT_INC(LOAD_ATTR, failure); assert(!PyErr_Occurred()); - cache_backoff(cache0); + cache->counter = ADAPTIVE_CACHE_BACKOFF; return 0; success: STAT_INC(LOAD_ATTR, success); assert(!PyErr_Occurred()); - cache0->counter = initial_counter_value(); + cache->counter = initial_counter_value(); return 0; } int -_Py_Specialize_StoreAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache) +_Py_Specialize_StoreAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name) { - _PyAdaptiveEntry *cache0 = &cache->adaptive; + assert(_PyOpcode_InlineCacheEntries[STORE_ATTR] == + INLINE_CACHE_ENTRIES_STORE_ATTR); + _PyAttrCache *cache = (_PyAttrCache *)(instr + 1); PyTypeObject *type = Py_TYPE(owner); if (PyModule_CheckExact(owner)) { SPECIALIZATION_FAIL(STORE_ATTR, SPEC_FAIL_OVERRIDDEN); @@ -954,8 +944,8 @@ _Py_Specialize_StoreAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, S } assert(dmem->type == T_OBJECT_EX); assert(offset > 0); - cache0->index = (uint16_t)offset; - cache0->version = type->tp_version_tag; + cache->index = (uint16_t)offset; + write_u32(cache->version, type->tp_version_tag); *instr = _Py_MAKECODEUNIT(STORE_ATTR_SLOT, _Py_OPARG(*instr)); goto success; } @@ -978,7 +968,7 @@ _Py_Specialize_StoreAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, S } int err = specialize_dict_access( - owner, instr, type, kind, name, cache0, + owner, instr, type, kind, name, STORE_ATTR, STORE_ATTR_INSTANCE_VALUE, STORE_ATTR_WITH_HINT ); if (err < 0) { @@ -990,12 +980,12 @@ _Py_Specialize_StoreAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, S fail: STAT_INC(STORE_ATTR, failure); assert(!PyErr_Occurred()); - cache_backoff(cache0); + cache->counter = ADAPTIVE_CACHE_BACKOFF; return 0; success: STAT_INC(STORE_ATTR, success); assert(!PyErr_Occurred()); - cache0->counter = initial_counter_value(); + cache->counter = initial_counter_value(); return 0; } @@ -1037,18 +1027,18 @@ load_method_fail_kind(DescriptorClassification kind) #endif static int -specialize_class_load_method(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, - _PyAttrCache *cache1, _PyObjectCache *cache2) +specialize_class_load_method(PyObject *owner, _Py_CODEUNIT *instr, + PyObject *name) { - + _PyLoadMethodCache *cache = (_PyLoadMethodCache *)(instr + 1); PyObject *descr = NULL; DescriptorClassification kind = 0; kind = analyze_descriptor((PyTypeObject *)owner, name, &descr, 0); switch (kind) { case METHOD: case NON_DESCRIPTOR: - cache1->tp_version = ((PyTypeObject *)owner)->tp_version_tag; - cache2->obj = descr; + write_u32(cache->type_version, ((PyTypeObject *)owner)->tp_version_tag); + write_obj(cache->descr, descr); *instr = _Py_MAKECODEUNIT(LOAD_METHOD_CLASS, _Py_OPARG(*instr)); return 0; #ifdef Py_STATS @@ -1078,16 +1068,18 @@ typedef enum { // can cause a significant drop in cache hits. A possible test is // python.exe -m test_typing test_re test_dis test_zlib. int -_Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache) +_Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name) { - _PyAdaptiveEntry *cache0 = &cache->adaptive; - _PyAttrCache *cache1 = &cache[-1].attr; - _PyObjectCache *cache2 = &cache[-2].obj; + assert(_PyOpcode_InlineCacheEntries[LOAD_METHOD] == + INLINE_CACHE_ENTRIES_LOAD_METHOD); + _PyLoadMethodCache *cache = (_PyLoadMethodCache *)(instr + 1); PyTypeObject *owner_cls = Py_TYPE(owner); if (PyModule_CheckExact(owner)) { - int err = specialize_module_load_attr(owner, instr, name, cache0, - LOAD_METHOD, LOAD_METHOD_MODULE); + assert(INLINE_CACHE_ENTRIES_LOAD_ATTR <= + INLINE_CACHE_ENTRIES_LOAD_METHOD); + int err = specialize_module_load_attr(owner, instr, name, LOAD_METHOD, + LOAD_METHOD_MODULE); if (err) { goto fail; } @@ -1099,7 +1091,7 @@ _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, } } if (PyType_Check(owner)) { - int err = specialize_class_load_method(owner, instr, name, cache1, cache2); + int err = specialize_class_load_method(owner, instr, name); if (err) { goto fail; } @@ -1157,7 +1149,7 @@ _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SPECIALIZATION_FAIL(LOAD_METHOD, SPEC_FAIL_OUT_OF_VERSIONS); goto fail; } - cache1->dk_version = keys_version; + write_u32(cache->keys_version, keys_version); } switch(dictkind) { case NO_DICT: @@ -1167,12 +1159,12 @@ _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, *instr = _Py_MAKECODEUNIT(LOAD_METHOD_WITH_VALUES, _Py_OPARG(*instr)); break; case MANAGED_DICT: - *(int16_t *)&cache0->index = (int16_t)MANAGED_DICT_OFFSET; + *(int16_t *)&cache->dict_offset = (int16_t)MANAGED_DICT_OFFSET; *instr = _Py_MAKECODEUNIT(LOAD_METHOD_WITH_DICT, _Py_OPARG(*instr)); break; case OFFSET_DICT: assert(owner_cls->tp_dictoffset > 0 && owner_cls->tp_dictoffset <= INT16_MAX); - cache0->index = (uint16_t)owner_cls->tp_dictoffset; + cache->dict_offset = (uint16_t)owner_cls->tp_dictoffset; *instr = _Py_MAKECODEUNIT(LOAD_METHOD_WITH_DICT, _Py_OPARG(*instr)); break; } @@ -1190,18 +1182,18 @@ _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, * PyType_Modified usages in typeobject.c). The MCACHE has been * working since Python 2.6 and it's battle-tested. */ - cache1->tp_version = owner_cls->tp_version_tag; - cache2->obj = descr; + write_u32(cache->type_version, owner_cls->tp_version_tag); + write_obj(cache->descr, descr); // Fall through. success: STAT_INC(LOAD_METHOD, success); assert(!PyErr_Occurred()); - cache0->counter = initial_counter_value(); + cache->counter = initial_counter_value(); return 0; fail: STAT_INC(LOAD_METHOD, failure); assert(!PyErr_Occurred()); - cache_backoff(cache0); + cache->counter = ADAPTIVE_CACHE_BACKOFF; return 0; } @@ -1238,7 +1230,7 @@ _Py_Specialize_LoadGlobal( goto fail; } cache->index = (uint16_t)index; - write32(&cache->module_keys_version, keys_version); + write_u32(cache->module_keys_version, keys_version); *instr = _Py_MAKECODEUNIT(LOAD_GLOBAL_MODULE, _Py_OPARG(*instr)); goto success; } @@ -1273,7 +1265,7 @@ _Py_Specialize_LoadGlobal( goto fail; } cache->index = (uint16_t)index; - write32(&cache->module_keys_version, globals_version); + write_u32(cache->module_keys_version, globals_version); cache->builtin_keys_version = (uint16_t)builtins_version; *instr = _Py_MAKECODEUNIT(LOAD_GLOBAL_BUILTIN, _Py_OPARG(*instr)); goto success; @@ -1393,7 +1385,7 @@ _Py_Specialize_BinarySubscr( goto fail; } assert(cls->tp_version_tag != 0); - write32(&cache->type_version, cls->tp_version_tag); + write_u32(cache->type_version, cls->tp_version_tag); int version = _PyFunction_GetVersionForCurrentState(func); if (version == 0 || version != (uint16_t)version) { SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_OUT_OF_VERSIONS);