From 424ecab494d538650ba34937cdd710094ccb2275 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Mon, 28 Feb 2022 03:54:14 -0800 Subject: [PATCH] bpo-46841: Use inline caching for `UNPACK_SEQUENCE` (GH-31591) --- Include/internal/pycore_code.h | 9 +++++- Include/opcode.h | 1 + Lib/importlib/_bootstrap_external.py | 3 +- Lib/opcode.py | 2 +- .../2022-02-25-15-18-40.bpo-46841.tmLpgC.rst | 1 + Python/ceval.c | 29 ++++++++++--------- Python/specialize.c | 23 +++++++-------- 7 files changed, 39 insertions(+), 29 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-02-25-15-18-40.bpo-46841.tmLpgC.rst diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 0c4850f98a3..0e401d61f76 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -68,9 +68,16 @@ typedef struct { _Py_CODEUNIT counter; } _PyBinaryOpCache; +typedef struct { + _Py_CODEUNIT counter; +} _PyUnpackSequenceCache; + #define INLINE_CACHE_ENTRIES_BINARY_OP \ (sizeof(_PyBinaryOpCache) / sizeof(_Py_CODEUNIT)) +#define INLINE_CACHE_ENTRIES_UNPACK_SEQUENCE \ + (sizeof(_PyUnpackSequenceCache) / sizeof(_Py_CODEUNIT)) + /* Maximum size of code to quicken, in code units. */ #define MAX_SIZE_TO_QUICKEN 5000 @@ -312,7 +319,7 @@ extern void _Py_Specialize_BinaryOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT * int oparg); extern void _Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache); extern void _Py_Specialize_UnpackSequence(PyObject *seq, _Py_CODEUNIT *instr, - SpecializedCacheEntry *cache); + int oparg); /* Deallocator function for static codeobjects used in deepfreeze.py */ extern void _PyStaticCode_Dealloc(PyCodeObject *co); diff --git a/Include/opcode.h b/Include/opcode.h index 0862f96e7de..ae21d92a865 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -241,6 +241,7 @@ static uint32_t _PyOpcode_Jump[8] = { #define NB_INPLACE_XOR 25 static const uint8_t _PyOpcode_InlineCacheEntries[256] = { + [UNPACK_SEQUENCE] = 1, [BINARY_OP] = 1, }; diff --git a/Lib/importlib/_bootstrap_external.py b/Lib/importlib/_bootstrap_external.py index 53fe1b8fc21..be23eeed9df 100644 --- a/Lib/importlib/_bootstrap_external.py +++ b/Lib/importlib/_bootstrap_external.py @@ -388,6 +388,7 @@ _code_type = type(_write_atomic.__code__) # Python 3.11a5 3479 (Add PUSH_NULL opcode) # Python 3.11a5 3480 (New CALL opcodes, second iteration) # Python 3.11a5 3481 (Use inline CACHE instructions) +# Python 3.11a5 3482 (Use inline caching for UNPACK_SEQUENCE) # Python 3.12 will start with magic number 3500 @@ -402,7 +403,7 @@ _code_type = type(_write_atomic.__code__) # Whenever MAGIC_NUMBER is changed, the ranges in the magic_values array # in PC/launcher.c must also be updated. -MAGIC_NUMBER = (3481).to_bytes(2, 'little') + b'\r\n' +MAGIC_NUMBER = (3482).to_bytes(2, 'little') + b'\r\n' _RAW_MAGIC_NUMBER = int.from_bytes(MAGIC_NUMBER, 'little') # For import.c _PYCACHE = '__pycache__' diff --git a/Lib/opcode.py b/Lib/opcode.py index 84ad002c8dd..8fa71bf4d18 100644 --- a/Lib/opcode.py +++ b/Lib/opcode.py @@ -109,7 +109,7 @@ HAVE_ARGUMENT = 90 # Opcodes from here have an argument: name_op('STORE_NAME', 90) # Index in name list name_op('DELETE_NAME', 91) # "" -def_op('UNPACK_SEQUENCE', 92) # Number of tuple items +def_op('UNPACK_SEQUENCE', 92, 1) # Number of tuple items jrel_op('FOR_ITER', 93) def_op('UNPACK_EX', 94) name_op('STORE_ATTR', 95) # Index in name list diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-02-25-15-18-40.bpo-46841.tmLpgC.rst b/Misc/NEWS.d/next/Core and Builtins/2022-02-25-15-18-40.bpo-46841.tmLpgC.rst new file mode 100644 index 00000000000..fec18aa5136 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-02-25-15-18-40.bpo-46841.tmLpgC.rst @@ -0,0 +1 @@ +Use inline caching for :opcode:`UNPACK_SEQUENCE`. diff --git a/Python/ceval.c b/Python/ceval.c index 4c0a71b036e..fe757829729 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -2758,22 +2758,22 @@ handle_eval_breaker: } STACK_GROW(oparg); Py_DECREF(seq); + JUMPBY(INLINE_CACHE_ENTRIES_UNPACK_SEQUENCE); DISPATCH(); } TARGET(UNPACK_SEQUENCE_ADAPTIVE) { assert(cframe.use_tracing == 0); - SpecializedCacheEntry *cache = GET_CACHE(); - if (cache->adaptive.counter == 0) { + _PyUnpackSequenceCache *cache = (_PyUnpackSequenceCache *)next_instr; + if (cache->counter == 0) { PyObject *seq = TOP(); next_instr--; - _Py_Specialize_UnpackSequence(seq, next_instr, cache); + _Py_Specialize_UnpackSequence(seq, next_instr, oparg); DISPATCH(); } else { STAT_INC(UNPACK_SEQUENCE, deferred); - cache->adaptive.counter--; - oparg = cache->adaptive.original_oparg; + cache->counter--; JUMP_TO_INSTRUCTION(UNPACK_SEQUENCE); } } @@ -2786,36 +2786,37 @@ handle_eval_breaker: SET_TOP(Py_NewRef(PyTuple_GET_ITEM(seq, 1))); PUSH(Py_NewRef(PyTuple_GET_ITEM(seq, 0))); Py_DECREF(seq); + JUMPBY(INLINE_CACHE_ENTRIES_UNPACK_SEQUENCE); NOTRACE_DISPATCH(); } TARGET(UNPACK_SEQUENCE_TUPLE) { PyObject *seq = TOP(); - int len = GET_CACHE()->adaptive.original_oparg; DEOPT_IF(!PyTuple_CheckExact(seq), UNPACK_SEQUENCE); - DEOPT_IF(PyTuple_GET_SIZE(seq) != len, UNPACK_SEQUENCE); + DEOPT_IF(PyTuple_GET_SIZE(seq) != oparg, UNPACK_SEQUENCE); STAT_INC(UNPACK_SEQUENCE, hit); STACK_SHRINK(1); PyObject **items = _PyTuple_ITEMS(seq); - while (len--) { - PUSH(Py_NewRef(items[len])); + while (oparg--) { + PUSH(Py_NewRef(items[oparg])); } Py_DECREF(seq); + JUMPBY(INLINE_CACHE_ENTRIES_UNPACK_SEQUENCE); NOTRACE_DISPATCH(); } TARGET(UNPACK_SEQUENCE_LIST) { PyObject *seq = TOP(); - int len = GET_CACHE()->adaptive.original_oparg; DEOPT_IF(!PyList_CheckExact(seq), UNPACK_SEQUENCE); - DEOPT_IF(PyList_GET_SIZE(seq) != len, UNPACK_SEQUENCE); + DEOPT_IF(PyList_GET_SIZE(seq) != oparg, UNPACK_SEQUENCE); STAT_INC(UNPACK_SEQUENCE, hit); STACK_SHRINK(1); PyObject **items = _PyList_ITEMS(seq); - while (len--) { - PUSH(Py_NewRef(items[len])); + while (oparg--) { + PUSH(Py_NewRef(items[oparg])); } Py_DECREF(seq); + JUMPBY(INLINE_CACHE_ENTRIES_UNPACK_SEQUENCE); NOTRACE_DISPATCH(); } @@ -5600,7 +5601,7 @@ MISS_WITH_CACHE(CALL) MISS_WITH_INLINE_CACHE(BINARY_OP) MISS_WITH_CACHE(COMPARE_OP) MISS_WITH_CACHE(BINARY_SUBSCR) -MISS_WITH_CACHE(UNPACK_SEQUENCE) +MISS_WITH_INLINE_CACHE(UNPACK_SEQUENCE) MISS_WITH_OPARG_COUNTER(STORE_SUBSCR) LOAD_ATTR_INSTANCE_VALUE_miss: diff --git a/Python/specialize.c b/Python/specialize.c index 1624f1955d4..b88c5d517bd 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -67,7 +67,6 @@ static uint8_t cache_requirements[256] = { [PRECALL] = 2, /* _PyAdaptiveEntry and _PyObjectCache/_PyCallCache */ [STORE_ATTR] = 1, // _PyAdaptiveEntry [COMPARE_OP] = 1, /* _PyAdaptiveEntry */ - [UNPACK_SEQUENCE] = 1, // _PyAdaptiveEntry }; Py_ssize_t _Py_QuickenedCount = 0; @@ -2133,39 +2132,39 @@ unpack_sequence_fail_kind(PyObject *seq) #endif void -_Py_Specialize_UnpackSequence(PyObject *seq, _Py_CODEUNIT *instr, - SpecializedCacheEntry *cache) +_Py_Specialize_UnpackSequence(PyObject *seq, _Py_CODEUNIT *instr, int oparg) { - _PyAdaptiveEntry *adaptive = &cache->adaptive; + assert(_PyOpcode_InlineCacheEntries[UNPACK_SEQUENCE] == + INLINE_CACHE_ENTRIES_UNPACK_SEQUENCE); + _PyUnpackSequenceCache *cache = (_PyUnpackSequenceCache *)(instr + 1); if (PyTuple_CheckExact(seq)) { - if (PyTuple_GET_SIZE(seq) != adaptive->original_oparg) { + if (PyTuple_GET_SIZE(seq) != oparg) { SPECIALIZATION_FAIL(UNPACK_SEQUENCE, SPEC_FAIL_EXPECTED_ERROR); goto failure; } if (PyTuple_GET_SIZE(seq) == 2) { - *instr = _Py_MAKECODEUNIT(UNPACK_SEQUENCE_TWO_TUPLE, - _Py_OPARG(*instr)); + *instr = _Py_MAKECODEUNIT(UNPACK_SEQUENCE_TWO_TUPLE, oparg); goto success; } - *instr = _Py_MAKECODEUNIT(UNPACK_SEQUENCE_TUPLE, _Py_OPARG(*instr)); + *instr = _Py_MAKECODEUNIT(UNPACK_SEQUENCE_TUPLE, oparg); goto success; } if (PyList_CheckExact(seq)) { - if (PyList_GET_SIZE(seq) != adaptive->original_oparg) { + if (PyList_GET_SIZE(seq) != oparg) { SPECIALIZATION_FAIL(UNPACK_SEQUENCE, SPEC_FAIL_EXPECTED_ERROR); goto failure; } - *instr = _Py_MAKECODEUNIT(UNPACK_SEQUENCE_LIST, _Py_OPARG(*instr)); + *instr = _Py_MAKECODEUNIT(UNPACK_SEQUENCE_LIST, oparg); goto success; } SPECIALIZATION_FAIL(UNPACK_SEQUENCE, unpack_sequence_fail_kind(seq)); failure: STAT_INC(UNPACK_SEQUENCE, failure); - cache_backoff(adaptive); + cache->counter = ADAPTIVE_CACHE_BACKOFF; return; success: STAT_INC(UNPACK_SEQUENCE, success); - adaptive->counter = initial_counter_value(); + cache->counter = initial_counter_value(); } #ifdef Py_STATS