diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 61f4cf43c14..b0703073f1d 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -91,8 +91,6 @@ typedef struct { #define INLINE_CACHE_ENTRIES_FOR_ITER CACHE_ENTRIES(_PyForIterCache) -extern uint8_t _PyOpcode_Adaptive[256]; - // Borrowed references to common callables: struct callable_cache { PyObject *isinstance; @@ -219,11 +217,14 @@ extern int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name); extern int _Py_Specialize_StoreAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name); -extern int _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins, _Py_CODEUNIT *instr, PyObject *name); -extern int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr); -extern int _Py_Specialize_StoreSubscr(PyObject *container, PyObject *sub, _Py_CODEUNIT *instr); -extern int _Py_Specialize_Call(PyObject *callable, _Py_CODEUNIT *instr, - int nargs, PyObject *kwnames); +extern void _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins, + _Py_CODEUNIT *instr, PyObject *name); +extern void _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, + _Py_CODEUNIT *instr); +extern void _Py_Specialize_StoreSubscr(PyObject *container, PyObject *sub, + _Py_CODEUNIT *instr); +extern void _Py_Specialize_Call(PyObject *callable, _Py_CODEUNIT *instr, + int nargs, PyObject *kwnames); extern void _Py_Specialize_BinaryOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr, int oparg, PyObject **locals); extern void _Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs, @@ -377,8 +378,22 @@ write_location_entry_start(uint8_t *ptr, int code, int length) /* With a 16-bit counter, we have 12 bits for the counter value, and 4 bits for the backoff */ #define ADAPTIVE_BACKOFF_BITS 4 -/* The initial counter value is 1 == 2**ADAPTIVE_BACKOFF_START - 1 */ -#define ADAPTIVE_BACKOFF_START 1 + +// A value of 1 means that we attempt to specialize the *second* time each +// instruction is executed. Executing twice is a much better indicator of +// "hotness" than executing once, but additional warmup delays only prevent +// specialization. Most types stabilize by the second execution, too: +#define ADAPTIVE_WARMUP_VALUE 1 +#define ADAPTIVE_WARMUP_BACKOFF 1 + +// A value of 52 means that we attempt to re-specialize after 53 misses (a prime +// number, useful for avoiding artifacts if every nth value is a different type +// or something). Setting the backoff to 0 means that the counter is reset to +// the same state as a warming-up instruction (value == 1, backoff == 1) after +// deoptimization. This isn't strictly necessary, but it is bit easier to reason +// about when thinking about the opcode transitions as a state machine: +#define ADAPTIVE_COOLDOWN_VALUE 52 +#define ADAPTIVE_COOLDOWN_BACKOFF 0 #define MAX_BACKOFF_VALUE (16 - ADAPTIVE_BACKOFF_BITS) @@ -390,9 +405,15 @@ adaptive_counter_bits(int value, int backoff) { } static inline uint16_t -adaptive_counter_start(void) { - unsigned int value = (1 << ADAPTIVE_BACKOFF_START) - 1; - return adaptive_counter_bits(value, ADAPTIVE_BACKOFF_START); +adaptive_counter_warmup(void) { + return adaptive_counter_bits(ADAPTIVE_WARMUP_VALUE, + ADAPTIVE_WARMUP_BACKOFF); +} + +static inline uint16_t +adaptive_counter_cooldown(void) { + return adaptive_counter_bits(ADAPTIVE_COOLDOWN_VALUE, + ADAPTIVE_COOLDOWN_BACKOFF); } static inline uint16_t diff --git a/Include/internal/pycore_opcode.h b/Include/internal/pycore_opcode.h index 2ae1fc874c6..949d2c161d6 100644 --- a/Include/internal/pycore_opcode.h +++ b/Include/internal/pycore_opcode.h @@ -58,10 +58,10 @@ const uint8_t _PyOpcode_Deopt[256] = { [BEFORE_ASYNC_WITH] = BEFORE_ASYNC_WITH, [BEFORE_WITH] = BEFORE_WITH, [BINARY_OP] = BINARY_OP, - [BINARY_OP_ADAPTIVE] = BINARY_OP, [BINARY_OP_ADD_FLOAT] = BINARY_OP, [BINARY_OP_ADD_INT] = BINARY_OP, [BINARY_OP_ADD_UNICODE] = BINARY_OP, + [BINARY_OP_GENERIC] = BINARY_OP, [BINARY_OP_INPLACE_ADD_UNICODE] = BINARY_OP, [BINARY_OP_MULTIPLY_FLOAT] = BINARY_OP, [BINARY_OP_MULTIPLY_INT] = BINARY_OP, @@ -69,7 +69,6 @@ const uint8_t _PyOpcode_Deopt[256] = { [BINARY_OP_SUBTRACT_INT] = BINARY_OP, [BINARY_SLICE] = BINARY_SLICE, [BINARY_SUBSCR] = BINARY_SUBSCR, - [BINARY_SUBSCR_ADAPTIVE] = BINARY_SUBSCR, [BINARY_SUBSCR_DICT] = BINARY_SUBSCR, [BINARY_SUBSCR_GETITEM] = BINARY_SUBSCR, [BINARY_SUBSCR_LIST_INT] = BINARY_SUBSCR, @@ -83,7 +82,6 @@ const uint8_t _PyOpcode_Deopt[256] = { [BUILD_TUPLE] = BUILD_TUPLE, [CACHE] = CACHE, [CALL] = CALL, - [CALL_ADAPTIVE] = CALL, [CALL_BOUND_METHOD_EXACT_ARGS] = CALL, [CALL_BUILTIN_CLASS] = CALL, [CALL_BUILTIN_FAST_WITH_KEYWORDS] = CALL, @@ -106,8 +104,8 @@ const uint8_t _PyOpcode_Deopt[256] = { [CHECK_EXC_MATCH] = CHECK_EXC_MATCH, [CLEANUP_THROW] = CLEANUP_THROW, [COMPARE_OP] = COMPARE_OP, - [COMPARE_OP_ADAPTIVE] = COMPARE_OP, [COMPARE_OP_FLOAT_JUMP] = COMPARE_OP, + [COMPARE_OP_GENERIC] = COMPARE_OP, [COMPARE_OP_INT_JUMP] = COMPARE_OP, [COMPARE_OP_STR_JUMP] = COMPARE_OP, [CONTAINS_OP] = CONTAINS_OP, @@ -124,10 +122,8 @@ const uint8_t _PyOpcode_Deopt[256] = { [END_ASYNC_FOR] = END_ASYNC_FOR, [END_FOR] = END_FOR, [EXTENDED_ARG] = EXTENDED_ARG, - [EXTENDED_ARG_QUICK] = EXTENDED_ARG, [FORMAT_VALUE] = FORMAT_VALUE, [FOR_ITER] = FOR_ITER, - [FOR_ITER_ADAPTIVE] = FOR_ITER, [FOR_ITER_GEN] = FOR_ITER, [FOR_ITER_LIST] = FOR_ITER, [FOR_ITER_RANGE] = FOR_ITER, @@ -152,7 +148,6 @@ const uint8_t _PyOpcode_Deopt[256] = { [LIST_TO_TUPLE] = LIST_TO_TUPLE, [LOAD_ASSERTION_ERROR] = LOAD_ASSERTION_ERROR, [LOAD_ATTR] = LOAD_ATTR, - [LOAD_ATTR_ADAPTIVE] = LOAD_ATTR, [LOAD_ATTR_CLASS] = LOAD_ATTR, [LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN] = LOAD_ATTR, [LOAD_ATTR_INSTANCE_VALUE] = LOAD_ATTR, @@ -175,7 +170,6 @@ const uint8_t _PyOpcode_Deopt[256] = { [LOAD_FAST__LOAD_CONST] = LOAD_FAST, [LOAD_FAST__LOAD_FAST] = LOAD_FAST, [LOAD_GLOBAL] = LOAD_GLOBAL, - [LOAD_GLOBAL_ADAPTIVE] = LOAD_GLOBAL, [LOAD_GLOBAL_BUILTIN] = LOAD_GLOBAL, [LOAD_GLOBAL_MODULE] = LOAD_GLOBAL, [LOAD_NAME] = LOAD_NAME, @@ -208,7 +202,6 @@ const uint8_t _PyOpcode_Deopt[256] = { [SET_UPDATE] = SET_UPDATE, [STOPITERATION_ERROR] = STOPITERATION_ERROR, [STORE_ATTR] = STORE_ATTR, - [STORE_ATTR_ADAPTIVE] = STORE_ATTR, [STORE_ATTR_INSTANCE_VALUE] = STORE_ATTR, [STORE_ATTR_SLOT] = STORE_ATTR, [STORE_ATTR_WITH_HINT] = STORE_ATTR, @@ -220,7 +213,6 @@ const uint8_t _PyOpcode_Deopt[256] = { [STORE_NAME] = STORE_NAME, [STORE_SLICE] = STORE_SLICE, [STORE_SUBSCR] = STORE_SUBSCR, - [STORE_SUBSCR_ADAPTIVE] = STORE_SUBSCR, [STORE_SUBSCR_DICT] = STORE_SUBSCR, [STORE_SUBSCR_LIST_INT] = STORE_SUBSCR, [SWAP] = SWAP, @@ -230,7 +222,6 @@ const uint8_t _PyOpcode_Deopt[256] = { [UNARY_POSITIVE] = UNARY_POSITIVE, [UNPACK_EX] = UNPACK_EX, [UNPACK_SEQUENCE] = UNPACK_SEQUENCE, - [UNPACK_SEQUENCE_ADAPTIVE] = UNPACK_SEQUENCE, [UNPACK_SEQUENCE_LIST] = UNPACK_SEQUENCE, [UNPACK_SEQUENCE_TUPLE] = UNPACK_SEQUENCE, [UNPACK_SEQUENCE_TWO_TUPLE] = UNPACK_SEQUENCE, @@ -244,11 +235,11 @@ static const char *const _PyOpcode_OpName[263] = { [CACHE] = "CACHE", [POP_TOP] = "POP_TOP", [PUSH_NULL] = "PUSH_NULL", - [BINARY_OP_ADAPTIVE] = "BINARY_OP_ADAPTIVE", - [END_FOR] = "END_FOR", [BINARY_OP_ADD_FLOAT] = "BINARY_OP_ADD_FLOAT", + [END_FOR] = "END_FOR", [BINARY_OP_ADD_INT] = "BINARY_OP_ADD_INT", [BINARY_OP_ADD_UNICODE] = "BINARY_OP_ADD_UNICODE", + [BINARY_OP_GENERIC] = "BINARY_OP_GENERIC", [BINARY_OP_INPLACE_ADD_UNICODE] = "BINARY_OP_INPLACE_ADD_UNICODE", [NOP] = "NOP", [UNARY_POSITIVE] = "UNARY_POSITIVE", @@ -259,28 +250,26 @@ static const char *const _PyOpcode_OpName[263] = { [UNARY_INVERT] = "UNARY_INVERT", [BINARY_OP_SUBTRACT_FLOAT] = "BINARY_OP_SUBTRACT_FLOAT", [BINARY_OP_SUBTRACT_INT] = "BINARY_OP_SUBTRACT_INT", - [BINARY_SUBSCR_ADAPTIVE] = "BINARY_SUBSCR_ADAPTIVE", [BINARY_SUBSCR_DICT] = "BINARY_SUBSCR_DICT", [BINARY_SUBSCR_GETITEM] = "BINARY_SUBSCR_GETITEM", [BINARY_SUBSCR_LIST_INT] = "BINARY_SUBSCR_LIST_INT", [BINARY_SUBSCR_TUPLE_INT] = "BINARY_SUBSCR_TUPLE_INT", - [CALL_ADAPTIVE] = "CALL_ADAPTIVE", [CALL_PY_EXACT_ARGS] = "CALL_PY_EXACT_ARGS", + [CALL_PY_WITH_DEFAULTS] = "CALL_PY_WITH_DEFAULTS", + [CALL_BOUND_METHOD_EXACT_ARGS] = "CALL_BOUND_METHOD_EXACT_ARGS", [BINARY_SUBSCR] = "BINARY_SUBSCR", [BINARY_SLICE] = "BINARY_SLICE", [STORE_SLICE] = "STORE_SLICE", - [CALL_PY_WITH_DEFAULTS] = "CALL_PY_WITH_DEFAULTS", - [CALL_BOUND_METHOD_EXACT_ARGS] = "CALL_BOUND_METHOD_EXACT_ARGS", + [CALL_BUILTIN_CLASS] = "CALL_BUILTIN_CLASS", + [CALL_BUILTIN_FAST_WITH_KEYWORDS] = "CALL_BUILTIN_FAST_WITH_KEYWORDS", [GET_LEN] = "GET_LEN", [MATCH_MAPPING] = "MATCH_MAPPING", [MATCH_SEQUENCE] = "MATCH_SEQUENCE", [MATCH_KEYS] = "MATCH_KEYS", - [CALL_BUILTIN_CLASS] = "CALL_BUILTIN_CLASS", + [CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS] = "CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS", [PUSH_EXC_INFO] = "PUSH_EXC_INFO", [CHECK_EXC_MATCH] = "CHECK_EXC_MATCH", [CHECK_EG_MATCH] = "CHECK_EG_MATCH", - [CALL_BUILTIN_FAST_WITH_KEYWORDS] = "CALL_BUILTIN_FAST_WITH_KEYWORDS", - [CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS] = "CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS", [CALL_NO_KW_BUILTIN_FAST] = "CALL_NO_KW_BUILTIN_FAST", [CALL_NO_KW_BUILTIN_O] = "CALL_NO_KW_BUILTIN_O", [CALL_NO_KW_ISINSTANCE] = "CALL_NO_KW_ISINSTANCE", @@ -290,6 +279,8 @@ static const char *const _PyOpcode_OpName[263] = { [CALL_NO_KW_METHOD_DESCRIPTOR_NOARGS] = "CALL_NO_KW_METHOD_DESCRIPTOR_NOARGS", [CALL_NO_KW_METHOD_DESCRIPTOR_O] = "CALL_NO_KW_METHOD_DESCRIPTOR_O", [CALL_NO_KW_STR_1] = "CALL_NO_KW_STR_1", + [CALL_NO_KW_TUPLE_1] = "CALL_NO_KW_TUPLE_1", + [CALL_NO_KW_TYPE_1] = "CALL_NO_KW_TYPE_1", [WITH_EXCEPT_START] = "WITH_EXCEPT_START", [GET_AITER] = "GET_AITER", [GET_ANEXT] = "GET_ANEXT", @@ -297,37 +288,37 @@ static const char *const _PyOpcode_OpName[263] = { [BEFORE_WITH] = "BEFORE_WITH", [END_ASYNC_FOR] = "END_ASYNC_FOR", [CLEANUP_THROW] = "CLEANUP_THROW", - [CALL_NO_KW_TUPLE_1] = "CALL_NO_KW_TUPLE_1", - [CALL_NO_KW_TYPE_1] = "CALL_NO_KW_TYPE_1", - [COMPARE_OP_ADAPTIVE] = "COMPARE_OP_ADAPTIVE", [COMPARE_OP_FLOAT_JUMP] = "COMPARE_OP_FLOAT_JUMP", + [COMPARE_OP_GENERIC] = "COMPARE_OP_GENERIC", + [COMPARE_OP_INT_JUMP] = "COMPARE_OP_INT_JUMP", + [COMPARE_OP_STR_JUMP] = "COMPARE_OP_STR_JUMP", [STORE_SUBSCR] = "STORE_SUBSCR", [DELETE_SUBSCR] = "DELETE_SUBSCR", - [COMPARE_OP_INT_JUMP] = "COMPARE_OP_INT_JUMP", - [STOPITERATION_ERROR] = "STOPITERATION_ERROR", - [COMPARE_OP_STR_JUMP] = "COMPARE_OP_STR_JUMP", - [EXTENDED_ARG_QUICK] = "EXTENDED_ARG_QUICK", - [FOR_ITER_ADAPTIVE] = "FOR_ITER_ADAPTIVE", [FOR_ITER_LIST] = "FOR_ITER_LIST", + [STOPITERATION_ERROR] = "STOPITERATION_ERROR", + [FOR_ITER_RANGE] = "FOR_ITER_RANGE", + [FOR_ITER_GEN] = "FOR_ITER_GEN", + [LOAD_ATTR_CLASS] = "LOAD_ATTR_CLASS", + [LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN] = "LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN", [GET_ITER] = "GET_ITER", [GET_YIELD_FROM_ITER] = "GET_YIELD_FROM_ITER", [PRINT_EXPR] = "PRINT_EXPR", [LOAD_BUILD_CLASS] = "LOAD_BUILD_CLASS", - [FOR_ITER_RANGE] = "FOR_ITER_RANGE", - [FOR_ITER_GEN] = "FOR_ITER_GEN", - [LOAD_ASSERTION_ERROR] = "LOAD_ASSERTION_ERROR", - [RETURN_GENERATOR] = "RETURN_GENERATOR", - [LOAD_ATTR_ADAPTIVE] = "LOAD_ATTR_ADAPTIVE", - [LOAD_ATTR_CLASS] = "LOAD_ATTR_CLASS", - [LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN] = "LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN", [LOAD_ATTR_INSTANCE_VALUE] = "LOAD_ATTR_INSTANCE_VALUE", [LOAD_ATTR_MODULE] = "LOAD_ATTR_MODULE", + [LOAD_ASSERTION_ERROR] = "LOAD_ASSERTION_ERROR", + [RETURN_GENERATOR] = "RETURN_GENERATOR", [LOAD_ATTR_PROPERTY] = "LOAD_ATTR_PROPERTY", + [LOAD_ATTR_SLOT] = "LOAD_ATTR_SLOT", + [LOAD_ATTR_WITH_HINT] = "LOAD_ATTR_WITH_HINT", + [LOAD_ATTR_METHOD_LAZY_DICT] = "LOAD_ATTR_METHOD_LAZY_DICT", + [LOAD_ATTR_METHOD_NO_DICT] = "LOAD_ATTR_METHOD_NO_DICT", + [LOAD_ATTR_METHOD_WITH_DICT] = "LOAD_ATTR_METHOD_WITH_DICT", [LIST_TO_TUPLE] = "LIST_TO_TUPLE", [RETURN_VALUE] = "RETURN_VALUE", [IMPORT_STAR] = "IMPORT_STAR", [SETUP_ANNOTATIONS] = "SETUP_ANNOTATIONS", - [LOAD_ATTR_SLOT] = "LOAD_ATTR_SLOT", + [LOAD_ATTR_METHOD_WITH_VALUES] = "LOAD_ATTR_METHOD_WITH_VALUES", [ASYNC_GEN_WRAP] = "ASYNC_GEN_WRAP", [PREP_RERAISE_STAR] = "PREP_RERAISE_STAR", [POP_EXCEPT] = "POP_EXCEPT", @@ -354,7 +345,7 @@ static const char *const _PyOpcode_OpName[263] = { [JUMP_FORWARD] = "JUMP_FORWARD", [JUMP_IF_FALSE_OR_POP] = "JUMP_IF_FALSE_OR_POP", [JUMP_IF_TRUE_OR_POP] = "JUMP_IF_TRUE_OR_POP", - [LOAD_ATTR_WITH_HINT] = "LOAD_ATTR_WITH_HINT", + [LOAD_CONST__LOAD_FAST] = "LOAD_CONST__LOAD_FAST", [POP_JUMP_IF_FALSE] = "POP_JUMP_IF_FALSE", [POP_JUMP_IF_TRUE] = "POP_JUMP_IF_TRUE", [LOAD_GLOBAL] = "LOAD_GLOBAL", @@ -362,7 +353,7 @@ static const char *const _PyOpcode_OpName[263] = { [CONTAINS_OP] = "CONTAINS_OP", [RERAISE] = "RERAISE", [COPY] = "COPY", - [LOAD_ATTR_METHOD_LAZY_DICT] = "LOAD_ATTR_METHOD_LAZY_DICT", + [LOAD_FAST__LOAD_CONST] = "LOAD_FAST__LOAD_CONST", [BINARY_OP] = "BINARY_OP", [SEND] = "SEND", [LOAD_FAST] = "LOAD_FAST", @@ -382,9 +373,9 @@ static const char *const _PyOpcode_OpName[263] = { [STORE_DEREF] = "STORE_DEREF", [DELETE_DEREF] = "DELETE_DEREF", [JUMP_BACKWARD] = "JUMP_BACKWARD", - [LOAD_ATTR_METHOD_NO_DICT] = "LOAD_ATTR_METHOD_NO_DICT", + [LOAD_FAST__LOAD_FAST] = "LOAD_FAST__LOAD_FAST", [CALL_FUNCTION_EX] = "CALL_FUNCTION_EX", - [LOAD_ATTR_METHOD_WITH_DICT] = "LOAD_ATTR_METHOD_WITH_DICT", + [LOAD_GLOBAL_BUILTIN] = "LOAD_GLOBAL_BUILTIN", [EXTENDED_ARG] = "EXTENDED_ARG", [LIST_APPEND] = "LIST_APPEND", [SET_ADD] = "SET_ADD", @@ -394,35 +385,35 @@ static const char *const _PyOpcode_OpName[263] = { [YIELD_VALUE] = "YIELD_VALUE", [RESUME] = "RESUME", [MATCH_CLASS] = "MATCH_CLASS", - [LOAD_ATTR_METHOD_WITH_VALUES] = "LOAD_ATTR_METHOD_WITH_VALUES", - [LOAD_CONST__LOAD_FAST] = "LOAD_CONST__LOAD_FAST", + [LOAD_GLOBAL_MODULE] = "LOAD_GLOBAL_MODULE", + [STORE_ATTR_INSTANCE_VALUE] = "STORE_ATTR_INSTANCE_VALUE", [FORMAT_VALUE] = "FORMAT_VALUE", [BUILD_CONST_KEY_MAP] = "BUILD_CONST_KEY_MAP", [BUILD_STRING] = "BUILD_STRING", - [LOAD_FAST__LOAD_CONST] = "LOAD_FAST__LOAD_CONST", - [LOAD_FAST__LOAD_FAST] = "LOAD_FAST__LOAD_FAST", - [LOAD_GLOBAL_ADAPTIVE] = "LOAD_GLOBAL_ADAPTIVE", - [LOAD_GLOBAL_BUILTIN] = "LOAD_GLOBAL_BUILTIN", + [STORE_ATTR_SLOT] = "STORE_ATTR_SLOT", + [STORE_ATTR_WITH_HINT] = "STORE_ATTR_WITH_HINT", + [STORE_FAST__LOAD_FAST] = "STORE_FAST__LOAD_FAST", + [STORE_FAST__STORE_FAST] = "STORE_FAST__STORE_FAST", [LIST_EXTEND] = "LIST_EXTEND", [SET_UPDATE] = "SET_UPDATE", [DICT_MERGE] = "DICT_MERGE", [DICT_UPDATE] = "DICT_UPDATE", - [LOAD_GLOBAL_MODULE] = "LOAD_GLOBAL_MODULE", - [STORE_ATTR_ADAPTIVE] = "STORE_ATTR_ADAPTIVE", - [STORE_ATTR_INSTANCE_VALUE] = "STORE_ATTR_INSTANCE_VALUE", - [STORE_ATTR_SLOT] = "STORE_ATTR_SLOT", - [STORE_ATTR_WITH_HINT] = "STORE_ATTR_WITH_HINT", - [CALL] = "CALL", - [KW_NAMES] = "KW_NAMES", - [STORE_FAST__LOAD_FAST] = "STORE_FAST__LOAD_FAST", - [STORE_FAST__STORE_FAST] = "STORE_FAST__STORE_FAST", - [STORE_SUBSCR_ADAPTIVE] = "STORE_SUBSCR_ADAPTIVE", [STORE_SUBSCR_DICT] = "STORE_SUBSCR_DICT", [STORE_SUBSCR_LIST_INT] = "STORE_SUBSCR_LIST_INT", - [UNPACK_SEQUENCE_ADAPTIVE] = "UNPACK_SEQUENCE_ADAPTIVE", [UNPACK_SEQUENCE_LIST] = "UNPACK_SEQUENCE_LIST", [UNPACK_SEQUENCE_TUPLE] = "UNPACK_SEQUENCE_TUPLE", [UNPACK_SEQUENCE_TWO_TUPLE] = "UNPACK_SEQUENCE_TWO_TUPLE", + [CALL] = "CALL", + [KW_NAMES] = "KW_NAMES", + [173] = "<173>", + [174] = "<174>", + [175] = "<175>", + [176] = "<176>", + [177] = "<177>", + [178] = "<178>", + [179] = "<179>", + [180] = "<180>", + [181] = "<181>", [182] = "<182>", [183] = "<183>", [184] = "<184>", @@ -508,6 +499,15 @@ static const char *const _PyOpcode_OpName[263] = { #endif #define EXTRA_CASES \ + case 173: \ + case 174: \ + case 175: \ + case 176: \ + case 177: \ + case 178: \ + case 179: \ + case 180: \ + case 181: \ case 182: \ case 183: \ case 184: \ diff --git a/Include/opcode.h b/Include/opcode.h index e1978a5d3ed..9fbdbe5c6a6 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -128,78 +128,69 @@ extern "C" { #define JUMP_NO_INTERRUPT 261 #define LOAD_METHOD 262 #define MAX_PSEUDO_OPCODE 262 -#define BINARY_OP_ADAPTIVE 3 -#define BINARY_OP_ADD_FLOAT 5 -#define BINARY_OP_ADD_INT 6 -#define BINARY_OP_ADD_UNICODE 7 +#define BINARY_OP_ADD_FLOAT 3 +#define BINARY_OP_ADD_INT 5 +#define BINARY_OP_ADD_UNICODE 6 +#define BINARY_OP_GENERIC 7 #define BINARY_OP_INPLACE_ADD_UNICODE 8 #define BINARY_OP_MULTIPLY_FLOAT 13 #define BINARY_OP_MULTIPLY_INT 14 #define BINARY_OP_SUBTRACT_FLOAT 16 #define BINARY_OP_SUBTRACT_INT 17 -#define BINARY_SUBSCR_ADAPTIVE 18 -#define BINARY_SUBSCR_DICT 19 -#define BINARY_SUBSCR_GETITEM 20 -#define BINARY_SUBSCR_LIST_INT 21 -#define BINARY_SUBSCR_TUPLE_INT 22 -#define CALL_ADAPTIVE 23 -#define CALL_PY_EXACT_ARGS 24 -#define CALL_PY_WITH_DEFAULTS 28 -#define CALL_BOUND_METHOD_EXACT_ARGS 29 -#define CALL_BUILTIN_CLASS 34 -#define CALL_BUILTIN_FAST_WITH_KEYWORDS 38 -#define CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS 39 -#define CALL_NO_KW_BUILTIN_FAST 40 -#define CALL_NO_KW_BUILTIN_O 41 -#define CALL_NO_KW_ISINSTANCE 42 -#define CALL_NO_KW_LEN 43 -#define CALL_NO_KW_LIST_APPEND 44 -#define CALL_NO_KW_METHOD_DESCRIPTOR_FAST 45 -#define CALL_NO_KW_METHOD_DESCRIPTOR_NOARGS 46 -#define CALL_NO_KW_METHOD_DESCRIPTOR_O 47 -#define CALL_NO_KW_STR_1 48 -#define CALL_NO_KW_TUPLE_1 56 -#define CALL_NO_KW_TYPE_1 57 -#define COMPARE_OP_ADAPTIVE 58 -#define COMPARE_OP_FLOAT_JUMP 59 -#define COMPARE_OP_INT_JUMP 62 -#define COMPARE_OP_STR_JUMP 64 -#define EXTENDED_ARG_QUICK 65 -#define FOR_ITER_ADAPTIVE 66 -#define FOR_ITER_LIST 67 -#define FOR_ITER_RANGE 72 -#define FOR_ITER_GEN 73 -#define LOAD_ATTR_ADAPTIVE 76 -#define LOAD_ATTR_CLASS 77 -#define LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN 78 -#define LOAD_ATTR_INSTANCE_VALUE 79 -#define LOAD_ATTR_MODULE 80 -#define LOAD_ATTR_PROPERTY 81 -#define LOAD_ATTR_SLOT 86 -#define LOAD_ATTR_WITH_HINT 113 -#define LOAD_ATTR_METHOD_LAZY_DICT 121 -#define LOAD_ATTR_METHOD_NO_DICT 141 -#define LOAD_ATTR_METHOD_WITH_DICT 143 -#define LOAD_ATTR_METHOD_WITH_VALUES 153 -#define LOAD_CONST__LOAD_FAST 154 -#define LOAD_FAST__LOAD_CONST 158 -#define LOAD_FAST__LOAD_FAST 159 -#define LOAD_GLOBAL_ADAPTIVE 160 -#define LOAD_GLOBAL_BUILTIN 161 -#define LOAD_GLOBAL_MODULE 166 -#define STORE_ATTR_ADAPTIVE 167 -#define STORE_ATTR_INSTANCE_VALUE 168 -#define STORE_ATTR_SLOT 169 -#define STORE_ATTR_WITH_HINT 170 -#define STORE_FAST__LOAD_FAST 173 -#define STORE_FAST__STORE_FAST 174 -#define STORE_SUBSCR_ADAPTIVE 175 -#define STORE_SUBSCR_DICT 176 -#define STORE_SUBSCR_LIST_INT 177 -#define UNPACK_SEQUENCE_ADAPTIVE 178 -#define UNPACK_SEQUENCE_LIST 179 -#define UNPACK_SEQUENCE_TUPLE 180 -#define UNPACK_SEQUENCE_TWO_TUPLE 181 +#define BINARY_SUBSCR_DICT 18 +#define BINARY_SUBSCR_GETITEM 19 +#define BINARY_SUBSCR_LIST_INT 20 +#define BINARY_SUBSCR_TUPLE_INT 21 +#define CALL_PY_EXACT_ARGS 22 +#define CALL_PY_WITH_DEFAULTS 23 +#define CALL_BOUND_METHOD_EXACT_ARGS 24 +#define CALL_BUILTIN_CLASS 28 +#define CALL_BUILTIN_FAST_WITH_KEYWORDS 29 +#define CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS 34 +#define CALL_NO_KW_BUILTIN_FAST 38 +#define CALL_NO_KW_BUILTIN_O 39 +#define CALL_NO_KW_ISINSTANCE 40 +#define CALL_NO_KW_LEN 41 +#define CALL_NO_KW_LIST_APPEND 42 +#define CALL_NO_KW_METHOD_DESCRIPTOR_FAST 43 +#define CALL_NO_KW_METHOD_DESCRIPTOR_NOARGS 44 +#define CALL_NO_KW_METHOD_DESCRIPTOR_O 45 +#define CALL_NO_KW_STR_1 46 +#define CALL_NO_KW_TUPLE_1 47 +#define CALL_NO_KW_TYPE_1 48 +#define COMPARE_OP_FLOAT_JUMP 56 +#define COMPARE_OP_GENERIC 57 +#define COMPARE_OP_INT_JUMP 58 +#define COMPARE_OP_STR_JUMP 59 +#define FOR_ITER_LIST 62 +#define FOR_ITER_RANGE 64 +#define FOR_ITER_GEN 65 +#define LOAD_ATTR_CLASS 66 +#define LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN 67 +#define LOAD_ATTR_INSTANCE_VALUE 72 +#define LOAD_ATTR_MODULE 73 +#define LOAD_ATTR_PROPERTY 76 +#define LOAD_ATTR_SLOT 77 +#define LOAD_ATTR_WITH_HINT 78 +#define LOAD_ATTR_METHOD_LAZY_DICT 79 +#define LOAD_ATTR_METHOD_NO_DICT 80 +#define LOAD_ATTR_METHOD_WITH_DICT 81 +#define LOAD_ATTR_METHOD_WITH_VALUES 86 +#define LOAD_CONST__LOAD_FAST 113 +#define LOAD_FAST__LOAD_CONST 121 +#define LOAD_FAST__LOAD_FAST 141 +#define LOAD_GLOBAL_BUILTIN 143 +#define LOAD_GLOBAL_MODULE 153 +#define STORE_ATTR_INSTANCE_VALUE 154 +#define STORE_ATTR_SLOT 158 +#define STORE_ATTR_WITH_HINT 159 +#define STORE_FAST__LOAD_FAST 160 +#define STORE_FAST__STORE_FAST 161 +#define STORE_SUBSCR_DICT 166 +#define STORE_SUBSCR_LIST_INT 167 +#define UNPACK_SEQUENCE_LIST 168 +#define UNPACK_SEQUENCE_TUPLE 169 +#define UNPACK_SEQUENCE_TWO_TUPLE 170 #define DO_TRACING 255 #define HAS_ARG(op) ((((op) >= HAVE_ARGUMENT) && (!IS_PSEUDO_OPCODE(op)))\ diff --git a/Lib/dis.py b/Lib/dis.py index a045d18241b..523bd01d929 100644 --- a/Lib/dis.py +++ b/Lib/dis.py @@ -512,9 +512,8 @@ def _get_instructions_bytes(code, varname_from_oparg=None, for i in range(size): offset += 2 # Only show the fancy argrepr for a CACHE instruction when it's - # the first entry for a particular cache value and the - # instruction using it is actually quickened: - if i == 0 and op != deop: + # the first entry for a particular cache value: + if i == 0: data = code[offset: offset + 2 * size] argrepr = f"{name}: {int.from_bytes(data, sys.byteorder)}" else: diff --git a/Lib/opcode.py b/Lib/opcode.py index 990f5b6fa8a..00ef78aa0ba 100644 --- a/Lib/opcode.py +++ b/Lib/opcode.py @@ -278,10 +278,10 @@ _nb_ops = [ _specializations = { "BINARY_OP": [ - "BINARY_OP_ADAPTIVE", "BINARY_OP_ADD_FLOAT", "BINARY_OP_ADD_INT", "BINARY_OP_ADD_UNICODE", + "BINARY_OP_GENERIC", "BINARY_OP_INPLACE_ADD_UNICODE", "BINARY_OP_MULTIPLY_FLOAT", "BINARY_OP_MULTIPLY_INT", @@ -289,14 +289,12 @@ _specializations = { "BINARY_OP_SUBTRACT_INT", ], "BINARY_SUBSCR": [ - "BINARY_SUBSCR_ADAPTIVE", "BINARY_SUBSCR_DICT", "BINARY_SUBSCR_GETITEM", "BINARY_SUBSCR_LIST_INT", "BINARY_SUBSCR_TUPLE_INT", ], "CALL": [ - "CALL_ADAPTIVE", "CALL_PY_EXACT_ARGS", "CALL_PY_WITH_DEFAULTS", "CALL_BOUND_METHOD_EXACT_ARGS", @@ -316,22 +314,17 @@ _specializations = { "CALL_NO_KW_TYPE_1", ], "COMPARE_OP": [ - "COMPARE_OP_ADAPTIVE", "COMPARE_OP_FLOAT_JUMP", + "COMPARE_OP_GENERIC", "COMPARE_OP_INT_JUMP", "COMPARE_OP_STR_JUMP", ], - "EXTENDED_ARG": [ - "EXTENDED_ARG_QUICK", - ], "FOR_ITER": [ - "FOR_ITER_ADAPTIVE", "FOR_ITER_LIST", "FOR_ITER_RANGE", "FOR_ITER_GEN", ], "LOAD_ATTR": [ - "LOAD_ATTR_ADAPTIVE", # These potentially push [NULL, bound method] onto the stack. "LOAD_ATTR_CLASS", "LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN", @@ -354,12 +347,10 @@ _specializations = { "LOAD_FAST__LOAD_FAST", ], "LOAD_GLOBAL": [ - "LOAD_GLOBAL_ADAPTIVE", "LOAD_GLOBAL_BUILTIN", "LOAD_GLOBAL_MODULE", ], "STORE_ATTR": [ - "STORE_ATTR_ADAPTIVE", "STORE_ATTR_INSTANCE_VALUE", "STORE_ATTR_SLOT", "STORE_ATTR_WITH_HINT", @@ -369,12 +360,10 @@ _specializations = { "STORE_FAST__STORE_FAST", ], "STORE_SUBSCR": [ - "STORE_SUBSCR_ADAPTIVE", "STORE_SUBSCR_DICT", "STORE_SUBSCR_LIST_INT", ], "UNPACK_SEQUENCE": [ - "UNPACK_SEQUENCE_ADAPTIVE", "UNPACK_SEQUENCE_LIST", "UNPACK_SEQUENCE_TUPLE", "UNPACK_SEQUENCE_TWO_TUPLE", diff --git a/Lib/test/test__opcode.py b/Lib/test/test__opcode.py index 704d19fffd0..db831069c7a 100644 --- a/Lib/test/test__opcode.py +++ b/Lib/test/test__opcode.py @@ -72,9 +72,10 @@ class SpecializationStatsTests(unittest.TestCase): stat_names = opcode._specialization_stats specialized_opcodes = [ - op[:-len("_ADAPTIVE")].lower() for - op in opcode._specialized_instructions - if op.endswith("_ADAPTIVE")] + op.lower() + for op in opcode._specializations + if opcode._inline_cache_entries[opcode.opmap[op]] + ] self.assertIn('load_attr', specialized_opcodes) self.assertIn('binary_subscr', specialized_opcodes) diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index ceba007ebc1..5640bf265b0 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -741,7 +741,7 @@ dis_loop_test_quickened_code = """\ LOAD_CONST 1 ((1, 2, 3)) LIST_EXTEND 1 LOAD_CONST 2 (3) - BINARY_OP_ADAPTIVE 5 (*) + BINARY_OP 5 (*) GET_ITER >> FOR_ITER_LIST 15 (to 50) STORE_FAST 0 (i) @@ -1200,7 +1200,7 @@ class DisTests(DisTestBase): for cache in caches: self.assertRegex(cache, pattern) total_caches = 23 - empty_caches = 8 if adaptive else total_caches + empty_caches = 8 self.assertEqual(caches.count(""), empty_caches) self.assertEqual(len(caches), total_caches) diff --git a/Lib/test/test_embed.py b/Lib/test/test_embed.py index 8c92f495d16..2dda7ccf7bf 100644 --- a/Lib/test/test_embed.py +++ b/Lib/test/test_embed.py @@ -362,10 +362,6 @@ class EmbeddingTests(EmbeddingTestsMixin, unittest.TestCase): opname in opcode._specialized_instructions # Exclude superinstructions: and "__" not in opname - # Exclude adaptive instructions: - and not opname.endswith("_ADAPTIVE") - # Exclude "quick" instructions: - and not opname.endswith("_QUICK") ): return True return False diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-11-06-00-47-11.gh-issue-98686.DBDy6U.rst b/Misc/NEWS.d/next/Core and Builtins/2022-11-06-00-47-11.gh-issue-98686.DBDy6U.rst new file mode 100644 index 00000000000..f259b389e32 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-11-06-00-47-11.gh-issue-98686.DBDy6U.rst @@ -0,0 +1,5 @@ +Merge the adaptive opcode logic into each instruction's unquickened variant, +and merge the logic in ``EXTENDED_ARG_QUICK`` into :opcode:`EXTENDED_ARG`. +With these changes, the quickening that happens at code object creation is +now only responsible for initializing warmup counters and inserting +superinstructions. diff --git a/Python/adaptive.md b/Python/adaptive.md index e8161bcdd5b..d978c089b23 100644 --- a/Python/adaptive.md +++ b/Python/adaptive.md @@ -11,7 +11,7 @@ A family of instructions has the following fundamental properties: generated by the bytecode compiler. * It has a single adaptive instruction that records an execution count and, at regular intervals, attempts to specialize itself. If not specializing, - it executes the non-adaptive instruction. + it executes the base implementation. * It has at least one specialized form of the instruction that is tailored for a particular value or set of values at runtime. * All members of the family must have the same number of inline cache entries, @@ -22,19 +22,18 @@ A family of instructions has the following fundamental properties: The current implementation also requires the following, although these are not fundamental and may change: -* All families uses one or more inline cache entries, +* All families use one or more inline cache entries, the first entry is always the counter. -* All instruction names should start with the name of the non-adaptive +* All instruction names should start with the name of the adaptive instruction. -* The adaptive instruction should end in `_ADAPTIVE`. * Specialized forms should have names describing their specialization. ## Example family -The `LOAD_GLOBAL` instruction (in Python/ceval.c) already has an adaptive +The `LOAD_GLOBAL` instruction (in Python/bytecodes.c) already has an adaptive family that serves as a relatively simple example. -The `LOAD_GLOBAL_ADAPTIVE` instruction performs adaptive specialization, +The `LOAD_GLOBAL` instruction performs adaptive specialization, calling `_Py_Specialize_LoadGlobal()` when the counter reaches zero. There are two specialized instructions in the family, `LOAD_GLOBAL_MODULE` @@ -138,5 +137,5 @@ to eliminate the branches. Finally, take care that stats are gather correctly. After the last `DEOPT_IF` has passed, a hit should be recorded with `STAT_INC(BASE_INSTRUCTION, hit)`. -After a optimization has been deferred in the `ADAPTIVE` form, +After an optimization has been deferred in the adaptive instruction, that should be recorded with `STAT_INC(BASE_INSTRUCTION, deferred)`. diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 8469f7f01f2..d2df56ef7ba 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -327,6 +327,15 @@ dummy_func( } inst(BINARY_SUBSCR, (container, sub -- res)) { + _PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)next_instr; + if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { + assert(cframe.use_tracing == 0); + next_instr--; + _Py_Specialize_BinarySubscr(container, sub, next_instr); + DISPATCH_SAME_OPARG(); + } + STAT_INC(BINARY_SUBSCR, deferred); + DECREMENT_ADAPTIVE_COUNTER(cache->counter); res = PyObject_GetItem(container, sub); Py_DECREF(container); Py_DECREF(sub); @@ -364,25 +373,6 @@ dummy_func( ERROR_IF(err, error); } - // stack effect: (__0 -- ) - inst(BINARY_SUBSCR_ADAPTIVE) { - _PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)next_instr; - if (ADAPTIVE_COUNTER_IS_ZERO(cache)) { - PyObject *sub = TOP(); - PyObject *container = SECOND(); - next_instr--; - if (_Py_Specialize_BinarySubscr(container, sub, next_instr) < 0) { - goto error; - } - DISPATCH_SAME_OPARG(); - } - else { - STAT_INC(BINARY_SUBSCR, deferred); - DECREMENT_ADAPTIVE_COUNTER(cache); - GO_TO_INSTRUCTION(BINARY_SUBSCR); - } - } - // stack effect: (__0 -- ) inst(BINARY_SUBSCR_LIST_INT) { assert(cframe.use_tracing == 0); @@ -511,9 +501,17 @@ dummy_func( } inst(STORE_SUBSCR, (v, container, sub -- )) { - int err; + _PyStoreSubscrCache *cache = (_PyStoreSubscrCache *)next_instr; + if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { + assert(cframe.use_tracing == 0); + next_instr--; + _Py_Specialize_StoreSubscr(container, sub, next_instr); + DISPATCH_SAME_OPARG(); + } + STAT_INC(STORE_SUBSCR, deferred); + DECREMENT_ADAPTIVE_COUNTER(cache->counter); /* container[sub] = v */ - err = PyObject_SetItem(container, sub, v); + int err = PyObject_SetItem(container, sub, v); Py_DECREF(v); Py_DECREF(container); Py_DECREF(sub); @@ -521,25 +519,6 @@ dummy_func( JUMPBY(INLINE_CACHE_ENTRIES_STORE_SUBSCR); } - // stack effect: (__0, __1, __2 -- ) - inst(STORE_SUBSCR_ADAPTIVE) { - _PyStoreSubscrCache *cache = (_PyStoreSubscrCache *)next_instr; - if (ADAPTIVE_COUNTER_IS_ZERO(cache)) { - PyObject *sub = TOP(); - PyObject *container = SECOND(); - next_instr--; - if (_Py_Specialize_StoreSubscr(container, sub, next_instr) < 0) { - goto error; - } - DISPATCH_SAME_OPARG(); - } - else { - STAT_INC(STORE_SUBSCR, deferred); - DECREMENT_ADAPTIVE_COUNTER(cache); - GO_TO_INSTRUCTION(STORE_SUBSCR); - } - } - // stack effect: (__0, __1, __2 -- ) inst(STORE_SUBSCR_LIST_INT) { assert(cframe.use_tracing == 0); @@ -1088,6 +1067,16 @@ dummy_func( // stack effect: (__0 -- __array[oparg]) inst(UNPACK_SEQUENCE) { + _PyUnpackSequenceCache *cache = (_PyUnpackSequenceCache *)next_instr; + if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { + assert(cframe.use_tracing == 0); + PyObject *seq = TOP(); + next_instr--; + _Py_Specialize_UnpackSequence(seq, next_instr, oparg); + DISPATCH_SAME_OPARG(); + } + STAT_INC(UNPACK_SEQUENCE, deferred); + DECREMENT_ADAPTIVE_COUNTER(cache->counter); PyObject *seq = POP(); PyObject **top = stack_pointer + oparg; if (!unpack_iterable(tstate, seq, oparg, -1, top)) { @@ -1099,23 +1088,6 @@ dummy_func( JUMPBY(INLINE_CACHE_ENTRIES_UNPACK_SEQUENCE); } - // stack effect: (__0 -- __array[oparg]) - inst(UNPACK_SEQUENCE_ADAPTIVE) { - assert(cframe.use_tracing == 0); - _PyUnpackSequenceCache *cache = (_PyUnpackSequenceCache *)next_instr; - if (ADAPTIVE_COUNTER_IS_ZERO(cache)) { - PyObject *seq = TOP(); - next_instr--; - _Py_Specialize_UnpackSequence(seq, next_instr, oparg); - DISPATCH_SAME_OPARG(); - } - else { - STAT_INC(UNPACK_SEQUENCE, deferred); - DECREMENT_ADAPTIVE_COUNTER(cache); - GO_TO_INSTRUCTION(UNPACK_SEQUENCE); - } - } - // stack effect: (__0 -- __array[oparg]) inst(UNPACK_SEQUENCE_TWO_TUPLE) { PyObject *seq = TOP(); @@ -1173,6 +1145,19 @@ dummy_func( // stack effect: (__0, __1 -- ) inst(STORE_ATTR) { + _PyAttrCache *cache = (_PyAttrCache *)next_instr; + if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { + assert(cframe.use_tracing == 0); + PyObject *owner = TOP(); + PyObject *name = GETITEM(names, oparg); + next_instr--; + if (_Py_Specialize_StoreAttr(owner, next_instr, name)) { + goto error; + } + DISPATCH_SAME_OPARG(); + } + STAT_INC(STORE_ATTR, deferred); + DECREMENT_ADAPTIVE_COUNTER(cache->counter); PyObject *name = GETITEM(names, oparg); PyObject *owner = TOP(); PyObject *v = SECOND(); @@ -1289,6 +1274,16 @@ dummy_func( // error: LOAD_GLOBAL has irregular stack effect inst(LOAD_GLOBAL) { + _PyLoadGlobalCache *cache = (_PyLoadGlobalCache *)next_instr; + if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { + assert(cframe.use_tracing == 0); + PyObject *name = GETITEM(names, oparg>>1); + next_instr--; + _Py_Specialize_LoadGlobal(GLOBALS(), BUILTINS(), next_instr, name); + DISPATCH_SAME_OPARG(); + } + STAT_INC(LOAD_GLOBAL, deferred); + DECREMENT_ADAPTIVE_COUNTER(cache->counter); int push_null = oparg & 1; PEEK(0) = NULL; PyObject *name = GETITEM(names, oparg>>1); @@ -1339,25 +1334,6 @@ dummy_func( PUSH(v); } - // error: LOAD_GLOBAL has irregular stack effect - inst(LOAD_GLOBAL_ADAPTIVE) { - assert(cframe.use_tracing == 0); - _PyLoadGlobalCache *cache = (_PyLoadGlobalCache *)next_instr; - if (ADAPTIVE_COUNTER_IS_ZERO(cache)) { - PyObject *name = GETITEM(names, oparg>>1); - next_instr--; - if (_Py_Specialize_LoadGlobal(GLOBALS(), BUILTINS(), next_instr, name) < 0) { - goto error; - } - DISPATCH_SAME_OPARG(); - } - else { - STAT_INC(LOAD_GLOBAL, deferred); - DECREMENT_ADAPTIVE_COUNTER(cache); - GO_TO_INSTRUCTION(LOAD_GLOBAL); - } - } - // error: LOAD_GLOBAL has irregular stack effect inst(LOAD_GLOBAL_MODULE) { assert(cframe.use_tracing == 0); @@ -1752,6 +1728,19 @@ dummy_func( // error: LOAD_ATTR has irregular stack effect inst(LOAD_ATTR) { + _PyAttrCache *cache = (_PyAttrCache *)next_instr; + if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { + assert(cframe.use_tracing == 0); + PyObject *owner = TOP(); + PyObject *name = GETITEM(names, oparg>>1); + next_instr--; + if (_Py_Specialize_LoadAttr(owner, next_instr, name)) { + goto error; + } + DISPATCH_SAME_OPARG(); + } + STAT_INC(LOAD_ATTR, deferred); + DECREMENT_ADAPTIVE_COUNTER(cache->counter); PyObject *name = GETITEM(names, oparg >> 1); PyObject *owner = TOP(); if (oparg & 1) { @@ -1798,26 +1787,6 @@ dummy_func( JUMPBY(INLINE_CACHE_ENTRIES_LOAD_ATTR); } - // error: LOAD_ATTR has irregular stack effect - inst(LOAD_ATTR_ADAPTIVE) { - assert(cframe.use_tracing == 0); - _PyAttrCache *cache = (_PyAttrCache *)next_instr; - if (ADAPTIVE_COUNTER_IS_ZERO(cache)) { - PyObject *owner = TOP(); - PyObject *name = GETITEM(names, oparg>>1); - next_instr--; - if (_Py_Specialize_LoadAttr(owner, next_instr, name) < 0) { - goto error; - } - DISPATCH_SAME_OPARG(); - } - else { - STAT_INC(LOAD_ATTR, deferred); - DECREMENT_ADAPTIVE_COUNTER(cache); - GO_TO_INSTRUCTION(LOAD_ATTR); - } - } - // error: LOAD_ATTR has irregular stack effect inst(LOAD_ATTR_INSTANCE_VALUE) { assert(cframe.use_tracing == 0); @@ -2009,7 +1978,7 @@ dummy_func( DEOPT_IF(f->func_version != func_version, LOAD_ATTR); PyCodeObject *code = (PyCodeObject *)f->func_code; assert(code->co_argcount == 2); - DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), CALL); + DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), LOAD_ATTR); STAT_INC(LOAD_ATTR, hit); PyObject *name = GETITEM(names, oparg >> 1); @@ -2033,26 +2002,6 @@ dummy_func( goto start_frame; } - // stack effect: (__0, __1 -- ) - inst(STORE_ATTR_ADAPTIVE) { - assert(cframe.use_tracing == 0); - _PyAttrCache *cache = (_PyAttrCache *)next_instr; - if (ADAPTIVE_COUNTER_IS_ZERO(cache)) { - PyObject *owner = TOP(); - PyObject *name = GETITEM(names, oparg); - next_instr--; - if (_Py_Specialize_StoreAttr(owner, next_instr, name) < 0) { - goto error; - } - DISPATCH_SAME_OPARG(); - } - else { - STAT_INC(STORE_ATTR, deferred); - DECREMENT_ADAPTIVE_COUNTER(cache); - GO_TO_INSTRUCTION(STORE_ATTR); - } - } - // stack effect: (__0, __1 -- ) inst(STORE_ATTR_INSTANCE_VALUE) { assert(cframe.use_tracing == 0); @@ -2093,7 +2042,7 @@ dummy_func( DEOPT_IF(tp->tp_version_tag != type_version, STORE_ATTR); assert(tp->tp_flags & Py_TPFLAGS_MANAGED_DICT); PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner); - DEOPT_IF(_PyDictOrValues_IsValues(dorv), LOAD_ATTR); + DEOPT_IF(_PyDictOrValues_IsValues(dorv), STORE_ATTR); PyDictObject *dict = (PyDictObject *)_PyDictOrValues_GetDict(dorv); DEOPT_IF(dict == NULL, STORE_ATTR); assert(PyDict_CheckExact((PyObject *)dict)); @@ -2155,7 +2104,7 @@ dummy_func( } // stack effect: (__0 -- ) - inst(COMPARE_OP) { + inst(COMPARE_OP_GENERIC) { assert(oparg <= Py_GE); PyObject *right = POP(); PyObject *left = TOP(); @@ -2170,21 +2119,19 @@ dummy_func( } // stack effect: (__0 -- ) - inst(COMPARE_OP_ADAPTIVE) { - assert(cframe.use_tracing == 0); + inst(COMPARE_OP) { _PyCompareOpCache *cache = (_PyCompareOpCache *)next_instr; - if (ADAPTIVE_COUNTER_IS_ZERO(cache)) { + if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { + assert(cframe.use_tracing == 0); PyObject *right = TOP(); PyObject *left = SECOND(); next_instr--; _Py_Specialize_CompareOp(left, right, next_instr, oparg); DISPATCH_SAME_OPARG(); } - else { - STAT_INC(COMPARE_OP, deferred); - DECREMENT_ADAPTIVE_COUNTER(cache); - GO_TO_INSTRUCTION(COMPARE_OP); - } + STAT_INC(COMPARE_OP, deferred); + DECREMENT_ADAPTIVE_COUNTER(cache->counter); + GO_TO_INSTRUCTION(COMPARE_OP_GENERIC); } // stack effect: (__0 -- ) @@ -2665,6 +2612,15 @@ dummy_func( // stack effect: ( -- __0) inst(FOR_ITER) { + _PyForIterCache *cache = (_PyForIterCache *)next_instr; + if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { + assert(cframe.use_tracing == 0); + next_instr--; + _Py_Specialize_ForIter(TOP(), next_instr, oparg); + DISPATCH_SAME_OPARG(); + } + STAT_INC(FOR_ITER, deferred); + DECREMENT_ADAPTIVE_COUNTER(cache->counter); /* before: [iter]; after: [iter, iter()] *or* [] */ PyObject *iter = TOP(); PyObject *next = (*Py_TYPE(iter)->tp_iternext)(iter); @@ -2690,22 +2646,6 @@ dummy_func( JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1); } - // stack effect: ( -- __0) - inst(FOR_ITER_ADAPTIVE) { - assert(cframe.use_tracing == 0); - _PyForIterCache *cache = (_PyForIterCache *)next_instr; - if (ADAPTIVE_COUNTER_IS_ZERO(cache)) { - next_instr--; - _Py_Specialize_ForIter(TOP(), next_instr, oparg); - DISPATCH_SAME_OPARG(); - } - else { - STAT_INC(FOR_ITER, deferred); - DECREMENT_ADAPTIVE_COUNTER(cache); - GO_TO_INSTRUCTION(FOR_ITER); - } - } - // stack effect: ( -- __0) inst(FOR_ITER_LIST) { assert(cframe.use_tracing == 0); @@ -3018,6 +2958,18 @@ dummy_func( // stack effect: (__0, __array[oparg] -- ) inst(CALL) { + _PyCallCache *cache = (_PyCallCache *)next_instr; + if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { + assert(cframe.use_tracing == 0); + int is_meth = is_method(stack_pointer, oparg); + int nargs = oparg + is_meth; + PyObject *callable = PEEK(nargs + 1); + next_instr--; + _Py_Specialize_Call(callable, next_instr, nargs, call_shape.kwnames); + DISPATCH_SAME_OPARG(); + } + STAT_INC(CALL, deferred); + DECREMENT_ADAPTIVE_COUNTER(cache->counter); int total_args, is_meth; is_meth = is_method(stack_pointer, oparg); PyObject *function = PEEK(oparg + 1); @@ -3091,28 +3043,6 @@ dummy_func( CHECK_EVAL_BREAKER(); } - // stack effect: (__0, __array[oparg] -- ) - inst(CALL_ADAPTIVE) { - _PyCallCache *cache = (_PyCallCache *)next_instr; - if (ADAPTIVE_COUNTER_IS_ZERO(cache)) { - next_instr--; - int is_meth = is_method(stack_pointer, oparg); - int nargs = oparg + is_meth; - PyObject *callable = PEEK(nargs + 1); - int err = _Py_Specialize_Call(callable, next_instr, nargs, - call_shape.kwnames); - if (err < 0) { - goto error; - } - DISPATCH_SAME_OPARG(); - } - else { - STAT_INC(CALL, deferred); - DECREMENT_ADAPTIVE_COUNTER(cache); - GO_TO_INSTRUCTION(CALL); - } - } - // stack effect: (__0, __array[oparg] -- ) inst(CALL_PY_EXACT_ARGS) { assert(call_shape.kwnames == NULL); @@ -3809,7 +3739,7 @@ dummy_func( } // stack effect: (__0 -- ) - inst(BINARY_OP) { + inst(BINARY_OP_GENERIC) { PyObject *rhs = POP(); PyObject *lhs = TOP(); assert(0 <= oparg); @@ -3826,21 +3756,19 @@ dummy_func( } // stack effect: (__0 -- ) - inst(BINARY_OP_ADAPTIVE) { - assert(cframe.use_tracing == 0); + inst(BINARY_OP) { _PyBinaryOpCache *cache = (_PyBinaryOpCache *)next_instr; - if (ADAPTIVE_COUNTER_IS_ZERO(cache)) { + if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { + assert(cframe.use_tracing == 0); PyObject *lhs = SECOND(); PyObject *rhs = TOP(); next_instr--; _Py_Specialize_BinaryOp(lhs, rhs, next_instr, oparg, &GETLOCAL(0)); DISPATCH_SAME_OPARG(); } - else { - STAT_INC(BINARY_OP, deferred); - DECREMENT_ADAPTIVE_COUNTER(cache); - GO_TO_INSTRUCTION(BINARY_OP); - } + STAT_INC(BINARY_OP, deferred); + DECREMENT_ADAPTIVE_COUNTER(cache->counter); + GO_TO_INSTRUCTION(BINARY_OP_GENERIC); } // stack effect: ( -- ) @@ -3854,28 +3782,10 @@ dummy_func( // stack effect: ( -- ) inst(EXTENDED_ARG) { assert(oparg); - oparg <<= 8; - oparg |= _Py_OPARG(*next_instr); - // We might be tracing. To avoid breaking tracing guarantees in - // quickened instructions, always deoptimize the next opcode: - opcode = _PyOpcode_Deopt[_Py_OPCODE(*next_instr)]; - PRE_DISPATCH_GOTO(); - // CPython hasn't traced the following instruction historically - // (DO_TRACING would clobber our extended oparg anyways), so just - // skip our usual cframe.use_tracing check before dispatch. Also, - // make sure the next instruction isn't a RESUME, since that needs - // to trace properly (and shouldn't have an extended arg anyways): - assert(opcode != RESUME); - DISPATCH_GOTO(); - } - - // stack effect: ( -- ) - inst(EXTENDED_ARG_QUICK) { assert(cframe.use_tracing == 0); - assert(oparg); - int oldoparg = oparg; - NEXTOPARG(); - oparg |= oldoparg << 8; + opcode = _Py_OPCODE(*next_instr); + oparg = oparg << 8 | _Py_OPARG(*next_instr); + PRE_DISPATCH_GOTO(); DISPATCH_GOTO(); } @@ -3901,15 +3811,15 @@ dummy_func( // Families go below this point // family(binary_op) = { - BINARY_OP, BINARY_OP_ADAPTIVE, BINARY_OP_ADD_FLOAT, - BINARY_OP_ADD_INT, BINARY_OP_ADD_UNICODE, BINARY_OP_INPLACE_ADD_UNICODE, + BINARY_OP, BINARY_OP_ADD_FLOAT, + BINARY_OP_ADD_INT, BINARY_OP_ADD_UNICODE, BINARY_OP_GENERIC, BINARY_OP_INPLACE_ADD_UNICODE, BINARY_OP_MULTIPLY_FLOAT, BINARY_OP_MULTIPLY_INT, BINARY_OP_SUBTRACT_FLOAT, BINARY_OP_SUBTRACT_INT }; family(binary_subscr) = { - BINARY_SUBSCR, BINARY_SUBSCR_ADAPTIVE, BINARY_SUBSCR_DICT, + BINARY_SUBSCR, BINARY_SUBSCR_DICT, BINARY_SUBSCR_GETITEM, BINARY_SUBSCR_LIST_INT, BINARY_SUBSCR_TUPLE_INT }; family(call) = { - CALL, CALL_ADAPTIVE, CALL_PY_EXACT_ARGS, + CALL, CALL_PY_EXACT_ARGS, CALL_PY_WITH_DEFAULTS, CALL_BOUND_METHOD_EXACT_ARGS, CALL_BUILTIN_CLASS, CALL_BUILTIN_FAST_WITH_KEYWORDS, CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS, CALL_NO_KW_BUILTIN_FAST, CALL_NO_KW_BUILTIN_O, CALL_NO_KW_ISINSTANCE, CALL_NO_KW_LEN, @@ -3917,14 +3827,13 @@ family(call) = { CALL_NO_KW_METHOD_DESCRIPTOR_O, CALL_NO_KW_STR_1, CALL_NO_KW_TUPLE_1, CALL_NO_KW_TYPE_1 }; family(compare_op) = { - COMPARE_OP, COMPARE_OP_ADAPTIVE, COMPARE_OP_FLOAT_JUMP, + COMPARE_OP, COMPARE_OP_FLOAT_JUMP, COMPARE_OP_GENERIC, COMPARE_OP_INT_JUMP, COMPARE_OP_STR_JUMP }; -family(extended_arg) = { EXTENDED_ARG, EXTENDED_ARG_QUICK }; family(for_iter) = { - FOR_ITER, FOR_ITER_ADAPTIVE, FOR_ITER_LIST, + FOR_ITER, FOR_ITER_LIST, FOR_ITER_RANGE }; family(load_attr) = { - LOAD_ATTR, LOAD_ATTR_ADAPTIVE, LOAD_ATTR_CLASS, + LOAD_ATTR, LOAD_ATTR_CLASS, LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN, LOAD_ATTR_INSTANCE_VALUE, LOAD_ATTR_MODULE, LOAD_ATTR_PROPERTY, LOAD_ATTR_SLOT, LOAD_ATTR_WITH_HINT, LOAD_ATTR_METHOD_LAZY_DICT, LOAD_ATTR_METHOD_NO_DICT, LOAD_ATTR_METHOD_WITH_DICT, @@ -3932,15 +3841,15 @@ family(load_attr) = { family(load_const) = { LOAD_CONST, LOAD_CONST__LOAD_FAST }; family(load_fast) = { LOAD_FAST, LOAD_FAST__LOAD_CONST, LOAD_FAST__LOAD_FAST }; family(load_global) = { - LOAD_GLOBAL, LOAD_GLOBAL_ADAPTIVE, LOAD_GLOBAL_BUILTIN, + LOAD_GLOBAL, LOAD_GLOBAL_BUILTIN, LOAD_GLOBAL_MODULE }; family(store_attr) = { - STORE_ATTR, STORE_ATTR_ADAPTIVE, STORE_ATTR_INSTANCE_VALUE, + STORE_ATTR, STORE_ATTR_INSTANCE_VALUE, STORE_ATTR_SLOT, STORE_ATTR_WITH_HINT }; family(store_fast) = { STORE_FAST, STORE_FAST__LOAD_FAST, STORE_FAST__STORE_FAST }; family(store_subscr) = { - STORE_SUBSCR, STORE_SUBSCR_ADAPTIVE, STORE_SUBSCR_DICT, + STORE_SUBSCR, STORE_SUBSCR_DICT, STORE_SUBSCR_LIST_INT }; family(unpack_sequence) = { - UNPACK_SEQUENCE, UNPACK_SEQUENCE_ADAPTIVE, UNPACK_SEQUENCE_LIST, + UNPACK_SEQUENCE, UNPACK_SEQUENCE_LIST, UNPACK_SEQUENCE_TUPLE, UNPACK_SEQUENCE_TWO_TUPLE }; diff --git a/Python/ceval.c b/Python/ceval.c index 4828adb576f..af706e1a021 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -743,12 +743,6 @@ GETITEM(PyObject *v, Py_ssize_t i) { #define JUMPTO(x) (next_instr = first_instr + (x)) #define JUMPBY(x) (next_instr += (x)) -/* Get opcode and oparg from original instructions, not quickened form. */ -#define TRACING_NEXTOPARG() do { \ - NEXTOPARG(); \ - opcode = _PyOpcode_Deopt[opcode]; \ - } while (0) - /* OpCode prediction macros Some opcodes tend to come in pairs thus making it possible to predict the second code when the first is run. For example, @@ -852,8 +846,31 @@ GETITEM(PyObject *v, Py_ssize_t i) { #define GO_TO_INSTRUCTION(op) goto PREDICT_ID(op) +#ifdef Py_STATS +#define UPDATE_MISS_STATS(INSTNAME) \ + do { \ + STAT_INC(opcode, miss); \ + STAT_INC((INSTNAME), miss); \ + /* The counter is always the first cache entry: */ \ + if (ADAPTIVE_COUNTER_IS_ZERO(*next_instr)) { \ + STAT_INC((INSTNAME), deopt); \ + } \ + else { \ + /* This is about to be (incorrectly) incremented: */ \ + STAT_DEC((INSTNAME), deferred); \ + } \ + } while (0) +#else +#define UPDATE_MISS_STATS(INSTNAME) ((void)0) +#endif -#define DEOPT_IF(cond, instname) if (cond) { goto miss; } +#define DEOPT_IF(COND, INSTNAME) \ + if ((COND)) { \ + /* This is only a single jump on release builds! */ \ + UPDATE_MISS_STATS((INSTNAME)); \ + assert(_PyOpcode_Deopt[opcode] == (INSTNAME)); \ + GO_TO_INSTRUCTION(INSTNAME); \ + } #define GLOBALS() frame->f_globals @@ -905,11 +922,23 @@ GETITEM(PyObject *v, Py_ssize_t i) { dtrace_function_entry(frame); \ } -#define ADAPTIVE_COUNTER_IS_ZERO(cache) \ - (cache)->counter < (1<> ADAPTIVE_BACKOFF_BITS) == 0) -#define DECREMENT_ADAPTIVE_COUNTER(cache) \ - (cache)->counter -= (1<> ADAPTIVE_BACKOFF_BITS) == ((1 << MAX_BACKOFF_VALUE) - 1)) + +#define DECREMENT_ADAPTIVE_COUNTER(COUNTER) \ + do { \ + assert(!ADAPTIVE_COUNTER_IS_ZERO((COUNTER))); \ + (COUNTER) -= (1 << ADAPTIVE_BACKOFF_BITS); \ + } while (0); + +#define INCREMENT_ADAPTIVE_COUNTER(COUNTER) \ + do { \ + assert(!ADAPTIVE_COUNTER_IS_MAX((COUNTER))); \ + (COUNTER) += (1 << ADAPTIVE_BACKOFF_BITS); \ + } while (0); static int trace_function_entry(PyThreadState *tstate, _PyInterpreterFrame *frame) @@ -1172,7 +1201,8 @@ handle_eval_breaker: if (INSTR_OFFSET() >= frame->f_code->_co_firsttraceable) { int instr_prev = _PyInterpreterFrame_LASTI(frame); frame->prev_instr = next_instr; - TRACING_NEXTOPARG(); + NEXTOPARG(); + // No _PyOpcode_Deopt here, since RESUME has no optimized forms: if (opcode == RESUME) { if (oparg < 2) { CHECK_EVAL_BREAKER(); @@ -1219,8 +1249,29 @@ handle_eval_breaker: } } } - TRACING_NEXTOPARG(); + NEXTOPARG(); PRE_DISPATCH_GOTO(); + // No _PyOpcode_Deopt here, since EXTENDED_ARG has no optimized forms: + while (opcode == EXTENDED_ARG) { + // CPython hasn't ever traced the instruction after an EXTENDED_ARG. + // Inline the EXTENDED_ARG here, so we can avoid branching there: + INSTRUCTION_START(EXTENDED_ARG); + opcode = _Py_OPCODE(*next_instr); + oparg = oparg << 8 | _Py_OPARG(*next_instr); + // Make sure the next instruction isn't a RESUME, since that needs + // to trace properly (and shouldn't have an EXTENDED_ARG, anyways): + assert(opcode != RESUME); + PRE_DISPATCH_GOTO(); + } + opcode = _PyOpcode_Deopt[opcode]; + if (_PyOpcode_Caches[opcode]) { + _Py_CODEUNIT *counter = &next_instr[1]; + // The instruction is going to decrement the counter, so we need to + // increment it here to make sure it doesn't try to specialize: + if (!ADAPTIVE_COUNTER_IS_MAX(*counter)) { + INCREMENT_ADAPTIVE_COUNTER(*counter); + } + } DISPATCH_GOTO(); } @@ -1245,27 +1296,6 @@ handle_eval_breaker: or goto error. */ Py_UNREACHABLE(); -/* Specialization misses */ - -miss: - { - STAT_INC(opcode, miss); - opcode = _PyOpcode_Deopt[opcode]; - STAT_INC(opcode, miss); - /* The counter is always the first cache entry: */ - _Py_CODEUNIT *counter = (_Py_CODEUNIT *)next_instr; - *counter -= 1; - if (*counter == 0) { - int adaptive_opcode = _PyOpcode_Adaptive[opcode]; - assert(adaptive_opcode); - _Py_SET_OPCODE(next_instr[-1], adaptive_opcode); - STAT_INC(opcode, deopt); - *counter = adaptive_counter_start(); - } - next_instr--; - DISPATCH_GOTO(); - } - unbound_local_error: { format_exc_check_arg(tstate, PyExc_UnboundLocalError, diff --git a/Python/compile.c b/Python/compile.c index 065d1b08d06..030378f19a3 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -8062,7 +8062,6 @@ scan_block_for_locals(basicblock *b, basicblock ***sp) for (int i = 0; i < b->b_iused; i++) { struct instr *instr = &b->b_instr[i]; assert(instr->i_opcode != EXTENDED_ARG); - assert(instr->i_opcode != EXTENDED_ARG_QUICK); assert(!IS_SUPERINSTRUCTION_OPCODE(instr->i_opcode)); if (instr->i_except != NULL) { maybe_push(instr->i_except, unsafe_mask, sp); @@ -8119,7 +8118,6 @@ fast_scan_many_locals(basicblock *entryblock, int nlocals) for (int i = 0; i < b->b_iused; i++) { struct instr *instr = &b->b_instr[i]; assert(instr->i_opcode != EXTENDED_ARG); - assert(instr->i_opcode != EXTENDED_ARG_QUICK); assert(!IS_SUPERINSTRUCTION_OPCODE(instr->i_opcode)); int arg = instr->i_oparg; if (arg < 64) { @@ -8667,7 +8665,6 @@ fix_cell_offsets(struct compiler *c, basicblock *entryblock, int *fixedmap) struct instr *inst = &b->b_instr[i]; // This is called before extended args are generated. assert(inst->i_opcode != EXTENDED_ARG); - assert(inst->i_opcode != EXTENDED_ARG_QUICK); int oldoffset = inst->i_oparg; switch(inst->i_opcode) { case MAKE_CELL: diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 4e9b665db1a..da6b34038cd 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -301,6 +301,15 @@ PyObject *sub = PEEK(1); PyObject *container = PEEK(2); PyObject *res; + _PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)next_instr; + if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { + assert(cframe.use_tracing == 0); + next_instr--; + _Py_Specialize_BinarySubscr(container, sub, next_instr); + DISPATCH_SAME_OPARG(); + } + STAT_INC(BINARY_SUBSCR, deferred); + DECREMENT_ADAPTIVE_COUNTER(cache->counter); res = PyObject_GetItem(container, sub); Py_DECREF(container); Py_DECREF(sub); @@ -354,25 +363,6 @@ DISPATCH(); } - TARGET(BINARY_SUBSCR_ADAPTIVE) { - _PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)next_instr; - if (ADAPTIVE_COUNTER_IS_ZERO(cache)) { - PyObject *sub = TOP(); - PyObject *container = SECOND(); - next_instr--; - if (_Py_Specialize_BinarySubscr(container, sub, next_instr) < 0) { - goto error; - } - DISPATCH_SAME_OPARG(); - } - else { - STAT_INC(BINARY_SUBSCR, deferred); - DECREMENT_ADAPTIVE_COUNTER(cache); - GO_TO_INSTRUCTION(BINARY_SUBSCR); - } - DISPATCH(); - } - TARGET(BINARY_SUBSCR_LIST_INT) { assert(cframe.use_tracing == 0); PyObject *sub = TOP(); @@ -504,9 +494,17 @@ PyObject *sub = PEEK(1); PyObject *container = PEEK(2); PyObject *v = PEEK(3); - int err; + _PyStoreSubscrCache *cache = (_PyStoreSubscrCache *)next_instr; + if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { + assert(cframe.use_tracing == 0); + next_instr--; + _Py_Specialize_StoreSubscr(container, sub, next_instr); + DISPATCH_SAME_OPARG(); + } + STAT_INC(STORE_SUBSCR, deferred); + DECREMENT_ADAPTIVE_COUNTER(cache->counter); /* container[sub] = v */ - err = PyObject_SetItem(container, sub, v); + int err = PyObject_SetItem(container, sub, v); Py_DECREF(v); Py_DECREF(container); Py_DECREF(sub); @@ -516,25 +514,6 @@ DISPATCH(); } - TARGET(STORE_SUBSCR_ADAPTIVE) { - _PyStoreSubscrCache *cache = (_PyStoreSubscrCache *)next_instr; - if (ADAPTIVE_COUNTER_IS_ZERO(cache)) { - PyObject *sub = TOP(); - PyObject *container = SECOND(); - next_instr--; - if (_Py_Specialize_StoreSubscr(container, sub, next_instr) < 0) { - goto error; - } - DISPATCH_SAME_OPARG(); - } - else { - STAT_INC(STORE_SUBSCR, deferred); - DECREMENT_ADAPTIVE_COUNTER(cache); - GO_TO_INSTRUCTION(STORE_SUBSCR); - } - DISPATCH(); - } - TARGET(STORE_SUBSCR_LIST_INT) { assert(cframe.use_tracing == 0); PyObject *sub = TOP(); @@ -1078,6 +1057,16 @@ TARGET(UNPACK_SEQUENCE) { PREDICTED(UNPACK_SEQUENCE); + _PyUnpackSequenceCache *cache = (_PyUnpackSequenceCache *)next_instr; + if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { + assert(cframe.use_tracing == 0); + PyObject *seq = TOP(); + next_instr--; + _Py_Specialize_UnpackSequence(seq, next_instr, oparg); + DISPATCH_SAME_OPARG(); + } + STAT_INC(UNPACK_SEQUENCE, deferred); + DECREMENT_ADAPTIVE_COUNTER(cache->counter); PyObject *seq = POP(); PyObject **top = stack_pointer + oparg; if (!unpack_iterable(tstate, seq, oparg, -1, top)) { @@ -1090,23 +1079,6 @@ DISPATCH(); } - TARGET(UNPACK_SEQUENCE_ADAPTIVE) { - assert(cframe.use_tracing == 0); - _PyUnpackSequenceCache *cache = (_PyUnpackSequenceCache *)next_instr; - if (ADAPTIVE_COUNTER_IS_ZERO(cache)) { - PyObject *seq = TOP(); - next_instr--; - _Py_Specialize_UnpackSequence(seq, next_instr, oparg); - DISPATCH_SAME_OPARG(); - } - else { - STAT_INC(UNPACK_SEQUENCE, deferred); - DECREMENT_ADAPTIVE_COUNTER(cache); - GO_TO_INSTRUCTION(UNPACK_SEQUENCE); - } - DISPATCH(); - } - TARGET(UNPACK_SEQUENCE_TWO_TUPLE) { PyObject *seq = TOP(); DEOPT_IF(!PyTuple_CheckExact(seq), UNPACK_SEQUENCE); @@ -1164,6 +1136,19 @@ TARGET(STORE_ATTR) { PREDICTED(STORE_ATTR); + _PyAttrCache *cache = (_PyAttrCache *)next_instr; + if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { + assert(cframe.use_tracing == 0); + PyObject *owner = TOP(); + PyObject *name = GETITEM(names, oparg); + next_instr--; + if (_Py_Specialize_StoreAttr(owner, next_instr, name)) { + goto error; + } + DISPATCH_SAME_OPARG(); + } + STAT_INC(STORE_ATTR, deferred); + DECREMENT_ADAPTIVE_COUNTER(cache->counter); PyObject *name = GETITEM(names, oparg); PyObject *owner = TOP(); PyObject *v = SECOND(); @@ -1281,6 +1266,16 @@ TARGET(LOAD_GLOBAL) { PREDICTED(LOAD_GLOBAL); + _PyLoadGlobalCache *cache = (_PyLoadGlobalCache *)next_instr; + if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { + assert(cframe.use_tracing == 0); + PyObject *name = GETITEM(names, oparg>>1); + next_instr--; + _Py_Specialize_LoadGlobal(GLOBALS(), BUILTINS(), next_instr, name); + DISPATCH_SAME_OPARG(); + } + STAT_INC(LOAD_GLOBAL, deferred); + DECREMENT_ADAPTIVE_COUNTER(cache->counter); int push_null = oparg & 1; PEEK(0) = NULL; PyObject *name = GETITEM(names, oparg>>1); @@ -1332,25 +1327,6 @@ DISPATCH(); } - TARGET(LOAD_GLOBAL_ADAPTIVE) { - assert(cframe.use_tracing == 0); - _PyLoadGlobalCache *cache = (_PyLoadGlobalCache *)next_instr; - if (ADAPTIVE_COUNTER_IS_ZERO(cache)) { - PyObject *name = GETITEM(names, oparg>>1); - next_instr--; - if (_Py_Specialize_LoadGlobal(GLOBALS(), BUILTINS(), next_instr, name) < 0) { - goto error; - } - DISPATCH_SAME_OPARG(); - } - else { - STAT_INC(LOAD_GLOBAL, deferred); - DECREMENT_ADAPTIVE_COUNTER(cache); - GO_TO_INSTRUCTION(LOAD_GLOBAL); - } - DISPATCH(); - } - TARGET(LOAD_GLOBAL_MODULE) { assert(cframe.use_tracing == 0); DEOPT_IF(!PyDict_CheckExact(GLOBALS()), LOAD_GLOBAL); @@ -1743,6 +1719,19 @@ TARGET(LOAD_ATTR) { PREDICTED(LOAD_ATTR); + _PyAttrCache *cache = (_PyAttrCache *)next_instr; + if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { + assert(cframe.use_tracing == 0); + PyObject *owner = TOP(); + PyObject *name = GETITEM(names, oparg>>1); + next_instr--; + if (_Py_Specialize_LoadAttr(owner, next_instr, name)) { + goto error; + } + DISPATCH_SAME_OPARG(); + } + STAT_INC(LOAD_ATTR, deferred); + DECREMENT_ADAPTIVE_COUNTER(cache->counter); PyObject *name = GETITEM(names, oparg >> 1); PyObject *owner = TOP(); if (oparg & 1) { @@ -1790,26 +1779,6 @@ DISPATCH(); } - TARGET(LOAD_ATTR_ADAPTIVE) { - assert(cframe.use_tracing == 0); - _PyAttrCache *cache = (_PyAttrCache *)next_instr; - if (ADAPTIVE_COUNTER_IS_ZERO(cache)) { - PyObject *owner = TOP(); - PyObject *name = GETITEM(names, oparg>>1); - next_instr--; - if (_Py_Specialize_LoadAttr(owner, next_instr, name) < 0) { - goto error; - } - DISPATCH_SAME_OPARG(); - } - else { - STAT_INC(LOAD_ATTR, deferred); - DECREMENT_ADAPTIVE_COUNTER(cache); - GO_TO_INSTRUCTION(LOAD_ATTR); - } - DISPATCH(); - } - TARGET(LOAD_ATTR_INSTANCE_VALUE) { assert(cframe.use_tracing == 0); PyObject *owner = TOP(); @@ -1999,7 +1968,7 @@ DEOPT_IF(f->func_version != func_version, LOAD_ATTR); PyCodeObject *code = (PyCodeObject *)f->func_code; assert(code->co_argcount == 2); - DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), CALL); + DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), LOAD_ATTR); STAT_INC(LOAD_ATTR, hit); PyObject *name = GETITEM(names, oparg >> 1); @@ -2023,26 +1992,6 @@ goto start_frame; } - TARGET(STORE_ATTR_ADAPTIVE) { - assert(cframe.use_tracing == 0); - _PyAttrCache *cache = (_PyAttrCache *)next_instr; - if (ADAPTIVE_COUNTER_IS_ZERO(cache)) { - PyObject *owner = TOP(); - PyObject *name = GETITEM(names, oparg); - next_instr--; - if (_Py_Specialize_StoreAttr(owner, next_instr, name) < 0) { - goto error; - } - DISPATCH_SAME_OPARG(); - } - else { - STAT_INC(STORE_ATTR, deferred); - DECREMENT_ADAPTIVE_COUNTER(cache); - GO_TO_INSTRUCTION(STORE_ATTR); - } - DISPATCH(); - } - TARGET(STORE_ATTR_INSTANCE_VALUE) { assert(cframe.use_tracing == 0); PyObject *owner = TOP(); @@ -2082,7 +2031,7 @@ DEOPT_IF(tp->tp_version_tag != type_version, STORE_ATTR); assert(tp->tp_flags & Py_TPFLAGS_MANAGED_DICT); PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner); - DEOPT_IF(_PyDictOrValues_IsValues(dorv), LOAD_ATTR); + DEOPT_IF(_PyDictOrValues_IsValues(dorv), STORE_ATTR); PyDictObject *dict = (PyDictObject *)_PyDictOrValues_GetDict(dorv); DEOPT_IF(dict == NULL, STORE_ATTR); assert(PyDict_CheckExact((PyObject *)dict)); @@ -2144,8 +2093,8 @@ DISPATCH(); } - TARGET(COMPARE_OP) { - PREDICTED(COMPARE_OP); + TARGET(COMPARE_OP_GENERIC) { + PREDICTED(COMPARE_OP_GENERIC); assert(oparg <= Py_GE); PyObject *right = POP(); PyObject *left = TOP(); @@ -2160,22 +2109,20 @@ DISPATCH(); } - TARGET(COMPARE_OP_ADAPTIVE) { - assert(cframe.use_tracing == 0); + TARGET(COMPARE_OP) { + PREDICTED(COMPARE_OP); _PyCompareOpCache *cache = (_PyCompareOpCache *)next_instr; - if (ADAPTIVE_COUNTER_IS_ZERO(cache)) { + if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { + assert(cframe.use_tracing == 0); PyObject *right = TOP(); PyObject *left = SECOND(); next_instr--; _Py_Specialize_CompareOp(left, right, next_instr, oparg); DISPATCH_SAME_OPARG(); } - else { - STAT_INC(COMPARE_OP, deferred); - DECREMENT_ADAPTIVE_COUNTER(cache); - GO_TO_INSTRUCTION(COMPARE_OP); - } - DISPATCH(); + STAT_INC(COMPARE_OP, deferred); + DECREMENT_ADAPTIVE_COUNTER(cache->counter); + GO_TO_INSTRUCTION(COMPARE_OP_GENERIC); } TARGET(COMPARE_OP_FLOAT_JUMP) { @@ -2658,6 +2605,15 @@ TARGET(FOR_ITER) { PREDICTED(FOR_ITER); + _PyForIterCache *cache = (_PyForIterCache *)next_instr; + if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { + assert(cframe.use_tracing == 0); + next_instr--; + _Py_Specialize_ForIter(TOP(), next_instr, oparg); + DISPATCH_SAME_OPARG(); + } + STAT_INC(FOR_ITER, deferred); + DECREMENT_ADAPTIVE_COUNTER(cache->counter); /* before: [iter]; after: [iter, iter()] *or* [] */ PyObject *iter = TOP(); PyObject *next = (*Py_TYPE(iter)->tp_iternext)(iter); @@ -2684,22 +2640,6 @@ DISPATCH(); } - TARGET(FOR_ITER_ADAPTIVE) { - assert(cframe.use_tracing == 0); - _PyForIterCache *cache = (_PyForIterCache *)next_instr; - if (ADAPTIVE_COUNTER_IS_ZERO(cache)) { - next_instr--; - _Py_Specialize_ForIter(TOP(), next_instr, oparg); - DISPATCH_SAME_OPARG(); - } - else { - STAT_INC(FOR_ITER, deferred); - DECREMENT_ADAPTIVE_COUNTER(cache); - GO_TO_INSTRUCTION(FOR_ITER); - } - DISPATCH(); - } - TARGET(FOR_ITER_LIST) { assert(cframe.use_tracing == 0); _PyListIterObject *it = (_PyListIterObject *)TOP(); @@ -3009,6 +2949,18 @@ TARGET(CALL) { PREDICTED(CALL); + _PyCallCache *cache = (_PyCallCache *)next_instr; + if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { + assert(cframe.use_tracing == 0); + int is_meth = is_method(stack_pointer, oparg); + int nargs = oparg + is_meth; + PyObject *callable = PEEK(nargs + 1); + next_instr--; + _Py_Specialize_Call(callable, next_instr, nargs, call_shape.kwnames); + DISPATCH_SAME_OPARG(); + } + STAT_INC(CALL, deferred); + DECREMENT_ADAPTIVE_COUNTER(cache->counter); int total_args, is_meth; is_meth = is_method(stack_pointer, oparg); PyObject *function = PEEK(oparg + 1); @@ -3083,28 +3035,6 @@ DISPATCH(); } - TARGET(CALL_ADAPTIVE) { - _PyCallCache *cache = (_PyCallCache *)next_instr; - if (ADAPTIVE_COUNTER_IS_ZERO(cache)) { - next_instr--; - int is_meth = is_method(stack_pointer, oparg); - int nargs = oparg + is_meth; - PyObject *callable = PEEK(nargs + 1); - int err = _Py_Specialize_Call(callable, next_instr, nargs, - call_shape.kwnames); - if (err < 0) { - goto error; - } - DISPATCH_SAME_OPARG(); - } - else { - STAT_INC(CALL, deferred); - DECREMENT_ADAPTIVE_COUNTER(cache); - GO_TO_INSTRUCTION(CALL); - } - DISPATCH(); - } - TARGET(CALL_PY_EXACT_ARGS) { PREDICTED(CALL_PY_EXACT_ARGS); assert(call_shape.kwnames == NULL); @@ -3799,8 +3729,8 @@ DISPATCH(); } - TARGET(BINARY_OP) { - PREDICTED(BINARY_OP); + TARGET(BINARY_OP_GENERIC) { + PREDICTED(BINARY_OP_GENERIC); PyObject *rhs = POP(); PyObject *lhs = TOP(); assert(0 <= oparg); @@ -3817,22 +3747,20 @@ DISPATCH(); } - TARGET(BINARY_OP_ADAPTIVE) { - assert(cframe.use_tracing == 0); + TARGET(BINARY_OP) { + PREDICTED(BINARY_OP); _PyBinaryOpCache *cache = (_PyBinaryOpCache *)next_instr; - if (ADAPTIVE_COUNTER_IS_ZERO(cache)) { + if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { + assert(cframe.use_tracing == 0); PyObject *lhs = SECOND(); PyObject *rhs = TOP(); next_instr--; _Py_Specialize_BinaryOp(lhs, rhs, next_instr, oparg, &GETLOCAL(0)); DISPATCH_SAME_OPARG(); } - else { - STAT_INC(BINARY_OP, deferred); - DECREMENT_ADAPTIVE_COUNTER(cache); - GO_TO_INSTRUCTION(BINARY_OP); - } - DISPATCH(); + STAT_INC(BINARY_OP, deferred); + DECREMENT_ADAPTIVE_COUNTER(cache->counter); + GO_TO_INSTRUCTION(BINARY_OP_GENERIC); } TARGET(SWAP) { @@ -3845,27 +3773,10 @@ TARGET(EXTENDED_ARG) { assert(oparg); - oparg <<= 8; - oparg |= _Py_OPARG(*next_instr); - // We might be tracing. To avoid breaking tracing guarantees in - // quickened instructions, always deoptimize the next opcode: - opcode = _PyOpcode_Deopt[_Py_OPCODE(*next_instr)]; - PRE_DISPATCH_GOTO(); - // CPython hasn't traced the following instruction historically - // (DO_TRACING would clobber our extended oparg anyways), so just - // skip our usual cframe.use_tracing check before dispatch. Also, - // make sure the next instruction isn't a RESUME, since that needs - // to trace properly (and shouldn't have an extended arg anyways): - assert(opcode != RESUME); - DISPATCH_GOTO(); - } - - TARGET(EXTENDED_ARG_QUICK) { assert(cframe.use_tracing == 0); - assert(oparg); - int oldoparg = oparg; - NEXTOPARG(); - oparg |= oldoparg << 8; + opcode = _Py_OPCODE(*next_instr); + oparg = oparg << 8 | _Py_OPARG(*next_instr); + PRE_DISPATCH_GOTO(); DISPATCH_GOTO(); } diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index 2251c94b96d..0603b44184b 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -2,11 +2,11 @@ static void *opcode_targets[256] = { &&TARGET_CACHE, &&TARGET_POP_TOP, &&TARGET_PUSH_NULL, - &&TARGET_BINARY_OP_ADAPTIVE, - &&TARGET_END_FOR, &&TARGET_BINARY_OP_ADD_FLOAT, + &&TARGET_END_FOR, &&TARGET_BINARY_OP_ADD_INT, &&TARGET_BINARY_OP_ADD_UNICODE, + &&TARGET_BINARY_OP_GENERIC, &&TARGET_BINARY_OP_INPLACE_ADD_UNICODE, &&TARGET_NOP, &&TARGET_UNARY_POSITIVE, @@ -17,28 +17,26 @@ static void *opcode_targets[256] = { &&TARGET_UNARY_INVERT, &&TARGET_BINARY_OP_SUBTRACT_FLOAT, &&TARGET_BINARY_OP_SUBTRACT_INT, - &&TARGET_BINARY_SUBSCR_ADAPTIVE, &&TARGET_BINARY_SUBSCR_DICT, &&TARGET_BINARY_SUBSCR_GETITEM, &&TARGET_BINARY_SUBSCR_LIST_INT, &&TARGET_BINARY_SUBSCR_TUPLE_INT, - &&TARGET_CALL_ADAPTIVE, &&TARGET_CALL_PY_EXACT_ARGS, + &&TARGET_CALL_PY_WITH_DEFAULTS, + &&TARGET_CALL_BOUND_METHOD_EXACT_ARGS, &&TARGET_BINARY_SUBSCR, &&TARGET_BINARY_SLICE, &&TARGET_STORE_SLICE, - &&TARGET_CALL_PY_WITH_DEFAULTS, - &&TARGET_CALL_BOUND_METHOD_EXACT_ARGS, + &&TARGET_CALL_BUILTIN_CLASS, + &&TARGET_CALL_BUILTIN_FAST_WITH_KEYWORDS, &&TARGET_GET_LEN, &&TARGET_MATCH_MAPPING, &&TARGET_MATCH_SEQUENCE, &&TARGET_MATCH_KEYS, - &&TARGET_CALL_BUILTIN_CLASS, + &&TARGET_CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS, &&TARGET_PUSH_EXC_INFO, &&TARGET_CHECK_EXC_MATCH, &&TARGET_CHECK_EG_MATCH, - &&TARGET_CALL_BUILTIN_FAST_WITH_KEYWORDS, - &&TARGET_CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS, &&TARGET_CALL_NO_KW_BUILTIN_FAST, &&TARGET_CALL_NO_KW_BUILTIN_O, &&TARGET_CALL_NO_KW_ISINSTANCE, @@ -48,6 +46,8 @@ static void *opcode_targets[256] = { &&TARGET_CALL_NO_KW_METHOD_DESCRIPTOR_NOARGS, &&TARGET_CALL_NO_KW_METHOD_DESCRIPTOR_O, &&TARGET_CALL_NO_KW_STR_1, + &&TARGET_CALL_NO_KW_TUPLE_1, + &&TARGET_CALL_NO_KW_TYPE_1, &&TARGET_WITH_EXCEPT_START, &&TARGET_GET_AITER, &&TARGET_GET_ANEXT, @@ -55,37 +55,37 @@ static void *opcode_targets[256] = { &&TARGET_BEFORE_WITH, &&TARGET_END_ASYNC_FOR, &&TARGET_CLEANUP_THROW, - &&TARGET_CALL_NO_KW_TUPLE_1, - &&TARGET_CALL_NO_KW_TYPE_1, - &&TARGET_COMPARE_OP_ADAPTIVE, &&TARGET_COMPARE_OP_FLOAT_JUMP, + &&TARGET_COMPARE_OP_GENERIC, + &&TARGET_COMPARE_OP_INT_JUMP, + &&TARGET_COMPARE_OP_STR_JUMP, &&TARGET_STORE_SUBSCR, &&TARGET_DELETE_SUBSCR, - &&TARGET_COMPARE_OP_INT_JUMP, - &&TARGET_STOPITERATION_ERROR, - &&TARGET_COMPARE_OP_STR_JUMP, - &&TARGET_EXTENDED_ARG_QUICK, - &&TARGET_FOR_ITER_ADAPTIVE, &&TARGET_FOR_ITER_LIST, + &&TARGET_STOPITERATION_ERROR, + &&TARGET_FOR_ITER_RANGE, + &&TARGET_FOR_ITER_GEN, + &&TARGET_LOAD_ATTR_CLASS, + &&TARGET_LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN, &&TARGET_GET_ITER, &&TARGET_GET_YIELD_FROM_ITER, &&TARGET_PRINT_EXPR, &&TARGET_LOAD_BUILD_CLASS, - &&TARGET_FOR_ITER_RANGE, - &&TARGET_FOR_ITER_GEN, - &&TARGET_LOAD_ASSERTION_ERROR, - &&TARGET_RETURN_GENERATOR, - &&TARGET_LOAD_ATTR_ADAPTIVE, - &&TARGET_LOAD_ATTR_CLASS, - &&TARGET_LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN, &&TARGET_LOAD_ATTR_INSTANCE_VALUE, &&TARGET_LOAD_ATTR_MODULE, + &&TARGET_LOAD_ASSERTION_ERROR, + &&TARGET_RETURN_GENERATOR, &&TARGET_LOAD_ATTR_PROPERTY, + &&TARGET_LOAD_ATTR_SLOT, + &&TARGET_LOAD_ATTR_WITH_HINT, + &&TARGET_LOAD_ATTR_METHOD_LAZY_DICT, + &&TARGET_LOAD_ATTR_METHOD_NO_DICT, + &&TARGET_LOAD_ATTR_METHOD_WITH_DICT, &&TARGET_LIST_TO_TUPLE, &&TARGET_RETURN_VALUE, &&TARGET_IMPORT_STAR, &&TARGET_SETUP_ANNOTATIONS, - &&TARGET_LOAD_ATTR_SLOT, + &&TARGET_LOAD_ATTR_METHOD_WITH_VALUES, &&TARGET_ASYNC_GEN_WRAP, &&TARGET_PREP_RERAISE_STAR, &&TARGET_POP_EXCEPT, @@ -112,7 +112,7 @@ static void *opcode_targets[256] = { &&TARGET_JUMP_FORWARD, &&TARGET_JUMP_IF_FALSE_OR_POP, &&TARGET_JUMP_IF_TRUE_OR_POP, - &&TARGET_LOAD_ATTR_WITH_HINT, + &&TARGET_LOAD_CONST__LOAD_FAST, &&TARGET_POP_JUMP_IF_FALSE, &&TARGET_POP_JUMP_IF_TRUE, &&TARGET_LOAD_GLOBAL, @@ -120,7 +120,7 @@ static void *opcode_targets[256] = { &&TARGET_CONTAINS_OP, &&TARGET_RERAISE, &&TARGET_COPY, - &&TARGET_LOAD_ATTR_METHOD_LAZY_DICT, + &&TARGET_LOAD_FAST__LOAD_CONST, &&TARGET_BINARY_OP, &&TARGET_SEND, &&TARGET_LOAD_FAST, @@ -140,9 +140,9 @@ static void *opcode_targets[256] = { &&TARGET_STORE_DEREF, &&TARGET_DELETE_DEREF, &&TARGET_JUMP_BACKWARD, - &&TARGET_LOAD_ATTR_METHOD_NO_DICT, + &&TARGET_LOAD_FAST__LOAD_FAST, &&TARGET_CALL_FUNCTION_EX, - &&TARGET_LOAD_ATTR_METHOD_WITH_DICT, + &&TARGET_LOAD_GLOBAL_BUILTIN, &&TARGET_EXTENDED_ARG, &&TARGET_LIST_APPEND, &&TARGET_SET_ADD, @@ -152,35 +152,35 @@ static void *opcode_targets[256] = { &&TARGET_YIELD_VALUE, &&TARGET_RESUME, &&TARGET_MATCH_CLASS, - &&TARGET_LOAD_ATTR_METHOD_WITH_VALUES, - &&TARGET_LOAD_CONST__LOAD_FAST, + &&TARGET_LOAD_GLOBAL_MODULE, + &&TARGET_STORE_ATTR_INSTANCE_VALUE, &&TARGET_FORMAT_VALUE, &&TARGET_BUILD_CONST_KEY_MAP, &&TARGET_BUILD_STRING, - &&TARGET_LOAD_FAST__LOAD_CONST, - &&TARGET_LOAD_FAST__LOAD_FAST, - &&TARGET_LOAD_GLOBAL_ADAPTIVE, - &&TARGET_LOAD_GLOBAL_BUILTIN, + &&TARGET_STORE_ATTR_SLOT, + &&TARGET_STORE_ATTR_WITH_HINT, + &&TARGET_STORE_FAST__LOAD_FAST, + &&TARGET_STORE_FAST__STORE_FAST, &&TARGET_LIST_EXTEND, &&TARGET_SET_UPDATE, &&TARGET_DICT_MERGE, &&TARGET_DICT_UPDATE, - &&TARGET_LOAD_GLOBAL_MODULE, - &&TARGET_STORE_ATTR_ADAPTIVE, - &&TARGET_STORE_ATTR_INSTANCE_VALUE, - &&TARGET_STORE_ATTR_SLOT, - &&TARGET_STORE_ATTR_WITH_HINT, - &&TARGET_CALL, - &&TARGET_KW_NAMES, - &&TARGET_STORE_FAST__LOAD_FAST, - &&TARGET_STORE_FAST__STORE_FAST, - &&TARGET_STORE_SUBSCR_ADAPTIVE, &&TARGET_STORE_SUBSCR_DICT, &&TARGET_STORE_SUBSCR_LIST_INT, - &&TARGET_UNPACK_SEQUENCE_ADAPTIVE, &&TARGET_UNPACK_SEQUENCE_LIST, &&TARGET_UNPACK_SEQUENCE_TUPLE, &&TARGET_UNPACK_SEQUENCE_TWO_TUPLE, + &&TARGET_CALL, + &&TARGET_KW_NAMES, + &&_unknown_opcode, + &&_unknown_opcode, + &&_unknown_opcode, + &&_unknown_opcode, + &&_unknown_opcode, + &&_unknown_opcode, + &&_unknown_opcode, + &&_unknown_opcode, + &&_unknown_opcode, &&_unknown_opcode, &&_unknown_opcode, &&_unknown_opcode, diff --git a/Python/specialize.c b/Python/specialize.c index 57179912792..61d7a5de0a7 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -16,21 +16,6 @@ * ./adaptive.md */ -/* Map from opcode to adaptive opcode. - Values of zero are ignored. */ -uint8_t _PyOpcode_Adaptive[256] = { - [LOAD_ATTR] = LOAD_ATTR_ADAPTIVE, - [LOAD_GLOBAL] = LOAD_GLOBAL_ADAPTIVE, - [BINARY_SUBSCR] = BINARY_SUBSCR_ADAPTIVE, - [STORE_SUBSCR] = STORE_SUBSCR_ADAPTIVE, - [CALL] = CALL_ADAPTIVE, - [STORE_ATTR] = STORE_ATTR_ADAPTIVE, - [BINARY_OP] = BINARY_OP_ADAPTIVE, - [COMPARE_OP] = COMPARE_OP_ADAPTIVE, - [UNPACK_SEQUENCE] = UNPACK_SEQUENCE_ADAPTIVE, - [FOR_ITER] = FOR_ITER_ADAPTIVE, -}; - #ifdef Py_STATS PyStats _py_stats_struct = { 0 }; PyStats *_py_stats = &_py_stats_struct; @@ -143,7 +128,7 @@ print_spec_stats(FILE *out, OpcodeStats *stats) fprintf(out, "opcode[%d].specializable : 1\n", BINARY_SLICE); fprintf(out, "opcode[%d].specializable : 1\n", STORE_SLICE); for (int i = 0; i < 256; i++) { - if (_PyOpcode_Adaptive[i]) { + if (_PyOpcode_Caches[i]) { fprintf(out, "opcode[%d].specializable : 1\n", i); } PRINT_STAT(i, specialization.success); @@ -275,74 +260,42 @@ do { \ #define SPECIALIZATION_FAIL(opcode, kind) ((void)0) #endif -// Insert adaptive instructions and superinstructions. This cannot fail. +// Initialize warmup counters and insert superinstructions. This cannot fail. void _PyCode_Quicken(PyCodeObject *code) { - int previous_opcode = -1; + int previous_opcode = 0; _Py_CODEUNIT *instructions = _PyCode_CODE(code); for (int i = 0; i < Py_SIZE(code); i++) { int opcode = _PyOpcode_Deopt[_Py_OPCODE(instructions[i])]; - uint8_t adaptive_opcode = _PyOpcode_Adaptive[opcode]; - if (adaptive_opcode) { - _Py_SET_OPCODE(instructions[i], adaptive_opcode); - instructions[i + 1] = adaptive_counter_start(); - previous_opcode = -1; - i += _PyOpcode_Caches[opcode]; + int caches = _PyOpcode_Caches[opcode]; + if (caches) { + instructions[i + 1] = adaptive_counter_warmup(); + previous_opcode = 0; + i += caches; + continue; } - else { - assert(!_PyOpcode_Caches[opcode]); - switch (opcode) { - case EXTENDED_ARG: - _Py_SET_OPCODE(instructions[i], EXTENDED_ARG_QUICK); - break; - case LOAD_FAST: - switch(previous_opcode) { - case LOAD_FAST: - _Py_SET_OPCODE(instructions[i - 1], - LOAD_FAST__LOAD_FAST); - break; - case STORE_FAST: - _Py_SET_OPCODE(instructions[i - 1], - STORE_FAST__LOAD_FAST); - break; - case LOAD_CONST: - _Py_SET_OPCODE(instructions[i - 1], - LOAD_CONST__LOAD_FAST); - break; - } - break; - case STORE_FAST: - if (previous_opcode == STORE_FAST) { - _Py_SET_OPCODE(instructions[i - 1], - STORE_FAST__STORE_FAST); - } - break; - case LOAD_CONST: - if (previous_opcode == LOAD_FAST) { - _Py_SET_OPCODE(instructions[i - 1], - LOAD_FAST__LOAD_CONST); - } - break; - } - previous_opcode = opcode; + switch (previous_opcode << 8 | opcode) { + case LOAD_CONST << 8 | LOAD_FAST: + _Py_SET_OPCODE(instructions[i - 1], LOAD_CONST__LOAD_FAST); + break; + case LOAD_FAST << 8 | LOAD_CONST: + _Py_SET_OPCODE(instructions[i - 1], LOAD_FAST__LOAD_CONST); + break; + case LOAD_FAST << 8 | LOAD_FAST: + _Py_SET_OPCODE(instructions[i - 1], LOAD_FAST__LOAD_FAST); + break; + case STORE_FAST << 8 | LOAD_FAST: + _Py_SET_OPCODE(instructions[i - 1], STORE_FAST__LOAD_FAST); + break; + case STORE_FAST << 8 | STORE_FAST: + _Py_SET_OPCODE(instructions[i - 1], STORE_FAST__STORE_FAST); + break; } + previous_opcode = opcode; } } -static inline int -miss_counter_start(void) { - /* Starting value for the counter. - * This value needs to be not too low, otherwise - * it would cause excessive de-optimization. - * Neither should it be too high, or that would delay - * de-optimization excessively when it is needed. - * A value around 50 seems to work, and we choose a - * prime number to avoid artifacts. - */ - return 53; -} - #define SIMPLE_FUNCTION 0 /* Common */ @@ -859,12 +812,13 @@ _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name) fail: STAT_INC(LOAD_ATTR, failure); assert(!PyErr_Occurred()); + _Py_SET_OPCODE(*instr, LOAD_ATTR); cache->counter = adaptive_counter_backoff(cache->counter); return 0; success: STAT_INC(LOAD_ATTR, success); assert(!PyErr_Occurred()); - cache->counter = miss_counter_start(); + cache->counter = adaptive_counter_cooldown(); return 0; } @@ -942,12 +896,13 @@ _Py_Specialize_StoreAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name) fail: STAT_INC(STORE_ATTR, failure); assert(!PyErr_Occurred()); + _Py_SET_OPCODE(*instr, STORE_ATTR); cache->counter = adaptive_counter_backoff(cache->counter); return 0; success: STAT_INC(STORE_ATTR, success); assert(!PyErr_Occurred()); - cache->counter = miss_counter_start(); + cache->counter = adaptive_counter_cooldown(); return 0; } @@ -1127,7 +1082,7 @@ fail: return 0; } -int +void _Py_Specialize_LoadGlobal( PyObject *globals, PyObject *builtins, _Py_CODEUNIT *instr, PyObject *name) @@ -1200,13 +1155,13 @@ _Py_Specialize_LoadGlobal( fail: STAT_INC(LOAD_GLOBAL, failure); assert(!PyErr_Occurred()); + _Py_SET_OPCODE(*instr, LOAD_GLOBAL); cache->counter = adaptive_counter_backoff(cache->counter); - return 0; + return; success: STAT_INC(LOAD_GLOBAL, success); assert(!PyErr_Occurred()); - cache->counter = miss_counter_start(); - return 0; + cache->counter = adaptive_counter_cooldown(); } #ifdef Py_STATS @@ -1294,7 +1249,7 @@ function_get_version(PyObject *o, int opcode) return version; } -int +void _Py_Specialize_BinarySubscr( PyObject *container, PyObject *sub, _Py_CODEUNIT *instr) { @@ -1360,16 +1315,16 @@ _Py_Specialize_BinarySubscr( fail: STAT_INC(BINARY_SUBSCR, failure); assert(!PyErr_Occurred()); + _Py_SET_OPCODE(*instr, BINARY_SUBSCR); cache->counter = adaptive_counter_backoff(cache->counter); - return 0; + return; success: STAT_INC(BINARY_SUBSCR, success); assert(!PyErr_Occurred()); - cache->counter = miss_counter_start(); - return 0; + cache->counter = adaptive_counter_cooldown(); } -int +void _Py_Specialize_StoreSubscr(PyObject *container, PyObject *sub, _Py_CODEUNIT *instr) { _PyStoreSubscrCache *cache = (_PyStoreSubscrCache *)(instr + 1); @@ -1464,20 +1419,19 @@ _Py_Specialize_StoreSubscr(PyObject *container, PyObject *sub, _Py_CODEUNIT *ins fail: STAT_INC(STORE_SUBSCR, failure); assert(!PyErr_Occurred()); + _Py_SET_OPCODE(*instr, STORE_SUBSCR); cache->counter = adaptive_counter_backoff(cache->counter); - return 0; + return; success: STAT_INC(STORE_SUBSCR, success); assert(!PyErr_Occurred()); - cache->counter = miss_counter_start(); - return 0; + cache->counter = adaptive_counter_cooldown(); } static int specialize_class_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs, PyObject *kwnames) { - assert(_Py_OPCODE(*instr) == CALL_ADAPTIVE); PyTypeObject *tp = _PyType_CAST(callable); if (tp->tp_new == PyBaseObject_Type.tp_new) { SPECIALIZATION_FAIL(CALL, SPEC_FAIL_CALL_PYTHON_CLASS); @@ -1539,7 +1493,6 @@ static int specialize_method_descriptor(PyMethodDescrObject *descr, _Py_CODEUNIT *instr, int nargs, PyObject *kwnames) { - assert(_Py_OPCODE(*instr) == CALL_ADAPTIVE); if (kwnames) { SPECIALIZATION_FAIL(CALL, SPEC_FAIL_CALL_KWNAMES); return -1; @@ -1591,7 +1544,6 @@ specialize_py_call(PyFunctionObject *func, _Py_CODEUNIT *instr, int nargs, PyObject *kwnames, bool bound_method) { _PyCallCache *cache = (_PyCallCache *)(instr + 1); - assert(_Py_OPCODE(*instr) == CALL_ADAPTIVE); PyCodeObject *code = (PyCodeObject *)func->func_code; int kind = function_kind(code); /* Don't specialize if PEP 523 is active */ @@ -1646,7 +1598,6 @@ static int specialize_c_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs, PyObject *kwnames) { - assert(_Py_OPCODE(*instr) == CALL_ADAPTIVE); if (PyCFunction_GET_FUNCTION(callable) == NULL) { return 1; } @@ -1743,7 +1694,7 @@ call_fail_kind(PyObject *callable) /* TODO: - Specialize calling classes. */ -int +void _Py_Specialize_Call(PyObject *callable, _Py_CODEUNIT *instr, int nargs, PyObject *kwnames) { @@ -1781,14 +1732,14 @@ _Py_Specialize_Call(PyObject *callable, _Py_CODEUNIT *instr, int nargs, if (fail) { STAT_INC(CALL, failure); assert(!PyErr_Occurred()); + _Py_SET_OPCODE(*instr, CALL); cache->counter = adaptive_counter_backoff(cache->counter); } else { STAT_INC(CALL, success); assert(!PyErr_Occurred()); - cache->counter = miss_counter_start(); + cache->counter = adaptive_counter_cooldown(); } - return 0; } #ifdef Py_STATS @@ -1928,17 +1879,18 @@ _Py_Specialize_BinaryOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr, // back to BINARY_OP (unless we're collecting stats, where it's more // important to get accurate hit counts for the unadaptive version // and each of the different failure types): - _Py_SET_OPCODE(*instr, BINARY_OP); + _Py_SET_OPCODE(*instr, BINARY_OP_GENERIC); return; #endif } SPECIALIZATION_FAIL(BINARY_OP, binary_op_fail_kind(oparg, lhs, rhs)); STAT_INC(BINARY_OP, failure); + _Py_SET_OPCODE(*instr, BINARY_OP); cache->counter = adaptive_counter_backoff(cache->counter); return; success: STAT_INC(BINARY_OP, success); - cache->counter = miss_counter_start(); + cache->counter = adaptive_counter_cooldown(); } @@ -2004,7 +1956,7 @@ _Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr, // counts for the unadaptive version and each of the different failure // types): #ifndef Py_STATS - _Py_SET_OPCODE(*instr, COMPARE_OP); + _Py_SET_OPCODE(*instr, COMPARE_OP_GENERIC); return; #else if (next_opcode == EXTENDED_ARG) { @@ -2054,11 +2006,12 @@ _Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr, SPECIALIZATION_FAIL(COMPARE_OP, compare_op_fail_kind(lhs, rhs)); failure: STAT_INC(COMPARE_OP, failure); + _Py_SET_OPCODE(*instr, COMPARE_OP); cache->counter = adaptive_counter_backoff(cache->counter); return; success: STAT_INC(COMPARE_OP, success); - cache->counter = miss_counter_start(); + cache->counter = adaptive_counter_cooldown(); } #ifdef Py_STATS @@ -2104,11 +2057,12 @@ _Py_Specialize_UnpackSequence(PyObject *seq, _Py_CODEUNIT *instr, int oparg) SPECIALIZATION_FAIL(UNPACK_SEQUENCE, unpack_sequence_fail_kind(seq)); failure: STAT_INC(UNPACK_SEQUENCE, failure); + _Py_SET_OPCODE(*instr, UNPACK_SEQUENCE); cache->counter = adaptive_counter_backoff(cache->counter); return; success: STAT_INC(UNPACK_SEQUENCE, success); - cache->counter = miss_counter_start(); + cache->counter = adaptive_counter_cooldown(); } #ifdef Py_STATS @@ -2207,9 +2161,10 @@ _Py_Specialize_ForIter(PyObject *iter, _Py_CODEUNIT *instr, int oparg) SPECIALIZATION_FAIL(FOR_ITER, _PySpecialization_ClassifyIterator(iter)); STAT_INC(FOR_ITER, failure); + _Py_SET_OPCODE(*instr, FOR_ITER); cache->counter = adaptive_counter_backoff(cache->counter); return; success: STAT_INC(FOR_ITER, success); - cache->counter = miss_counter_start(); + cache->counter = adaptive_counter_cooldown(); } diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv index dbfb0e01e7f..ea16ed2f5e9 100644 --- a/Tools/c-analyzer/cpython/ignored.tsv +++ b/Tools/c-analyzer/cpython/ignored.tsv @@ -501,6 +501,5 @@ Python/pystate.c - initial - Python/specialize.c - adaptive_opcodes - Python/specialize.c - cache_requirements - Python/specialize.c - compare_masks - -Python/specialize.c - _PyOpcode_Adaptive - Python/stdlib_module_names.h - _Py_stdlib_module_names - Python/sysmodule.c - whatstrings - diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index e5610343880..b4f5f8f01dc 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -13,6 +13,8 @@ from typing import TextIO, cast import parser from parser import InstDef # TODO: Use parser.InstDef +RE_PREDICTED = r"(?s)(?:PREDICT\(|GO_TO_INSTRUCTION\(|DEOPT_IF\(.*?,\s*)(\w+)\);" + arg_parser = argparse.ArgumentParser() arg_parser.add_argument("-i", "--input", type=str, default="Python/bytecodes.c") arg_parser.add_argument("-o", "--output", type=str, default="Python/generated_cases.c.h") @@ -121,7 +123,7 @@ def write_cases(f: TextIO, instrs: list[InstDef], supers: list[parser.Super]): for instr in instrs: assert isinstance(instr, InstDef) assert instr.block is not None - for target in re.findall(r"(?:PREDICT|GO_TO_INSTRUCTION)\((\w+)\)", instr.block.text): + for target in re.findall(RE_PREDICTED, instr.block.text): predictions.add(target) indent = " " f.write(f"// This file is generated by {os.path.relpath(__file__)}\n")