From dd1884dc5dc1a540c60e98ea1bc482a51d996564 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 12 Jul 2023 10:23:59 -0700 Subject: [PATCH] gh-106529: Split FOR_ITER_RANGE into uops (#106638) For an example of what this does for Tier 1 and Tier 2, see https://github.com/python/cpython/issues/106529#issuecomment-1631649920 --- Include/internal/pycore_opcode_metadata.h | 6 +++ Lib/test/test_capi/test_misc.py | 25 ++++++++-- Python/bytecodes.c | 27 +++++++++-- Python/executor_cases.c.h | 34 +++++++++++++ Python/generated_cases.c.h | 58 +++++++++++++++-------- Python/optimizer.c | 24 +++++++++- 6 files changed, 146 insertions(+), 28 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 317f42afea8..d2c1f9ad6e5 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -40,6 +40,9 @@ #define _GUARD_GLOBALS_VERSION 318 #define _GUARD_BUILTINS_VERSION 319 #define IS_NONE 320 +#define _ITER_CHECK_RANGE 321 +#define _ITER_EXHAUSTED_RANGE 322 +#define _ITER_NEXT_RANGE 323 #ifndef NEED_OPCODE_METADATA extern int _PyOpcode_num_popped(int opcode, int oparg, bool jump); @@ -1319,5 +1322,8 @@ const char * const _PyOpcode_uop_name[512] = { [318] = "_GUARD_GLOBALS_VERSION", [319] = "_GUARD_BUILTINS_VERSION", [320] = "IS_NONE", + [321] = "_ITER_CHECK_RANGE", + [322] = "_ITER_EXHAUSTED_RANGE", + [323] = "_ITER_NEXT_RANGE", }; #endif // NEED_OPCODE_METADATA diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index 9c14a501875..abdf7ed8976 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -2443,7 +2443,6 @@ class TestUops(unittest.TestCase): i += 1 opt = _testinternalcapi.get_uop_optimizer() - with temporary_optimizer(opt): testfunc(1000) @@ -2580,13 +2579,33 @@ class TestUops(unittest.TestCase): ex = get_first_executor(testfunc) self.assertIsNotNone(ex) - # for i, (opname, oparg) in enumerate(ex): - # print(f"{i:4d}: {opname:<20s} {oparg:4d}") uops = {opname for opname, _ in ex} # Since there is no JUMP_FORWARD instruction, # look for indirect evidence: the += operator self.assertIn("_BINARY_OP_ADD_INT", uops) + def test_for_iter_range(self): + def testfunc(n): + total = 0 + for i in range(n): + total += i + return total + # import dis; dis.dis(testfunc) + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + total = testfunc(10) + self.assertEqual(total, 45) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + # for i, (opname, oparg) in enumerate(ex): + # print(f"{i:4d}: {opname:<20s} {oparg:3d}") + uops = {opname for opname, _ in ex} + self.assertIn("_ITER_EXHAUSTED_RANGE", uops) + # Verification that the jump goes past END_FOR + # is done by manual inspection of the output + if __name__ == "__main__": unittest.main() diff --git a/Python/bytecodes.c b/Python/bytecodes.c index f5ce2e72d26..18862f87b65 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2451,9 +2451,14 @@ dummy_func( // Common case: no jump, leave it to the code generator } - inst(FOR_ITER_RANGE, (unused/1, iter -- iter, next)) { + op(_ITER_CHECK_RANGE, (iter -- iter)) { _PyRangeIterObject *r = (_PyRangeIterObject *)iter; DEOPT_IF(Py_TYPE(r) != &PyRangeIter_Type, FOR_ITER); + } + + op(_ITER_JUMP_RANGE, (iter -- iter)) { + _PyRangeIterObject *r = (_PyRangeIterObject *)iter; + assert(Py_TYPE(r) == &PyRangeIter_Type); STAT_INC(FOR_ITER, hit); if (r->len <= 0) { STACK_SHRINK(1); @@ -2463,15 +2468,29 @@ dummy_func( JUMPBY(oparg + 1); DISPATCH(); } + } + + // Only used by Tier 2 + op(_ITER_EXHAUSTED_RANGE, (iter -- iter, exhausted)) { + _PyRangeIterObject *r = (_PyRangeIterObject *)iter; + assert(Py_TYPE(r) == &PyRangeIter_Type); + exhausted = r->len <= 0 ? Py_True : Py_False; + } + + op(_ITER_NEXT_RANGE, (iter -- iter, next)) { + _PyRangeIterObject *r = (_PyRangeIterObject *)iter; + assert(Py_TYPE(r) == &PyRangeIter_Type); + assert(r->len > 0); long value = r->start; r->start = value + r->step; r->len--; next = PyLong_FromLong(value); - if (next == NULL) { - goto error; - } + ERROR_IF(next == NULL, error); } + macro(FOR_ITER_RANGE) = + unused/1 + _ITER_CHECK_RANGE + _ITER_JUMP_RANGE + _ITER_NEXT_RANGE; + inst(FOR_ITER_GEN, (unused/1, iter -- iter, unused)) { DEOPT_IF(tstate->interp->eval_frame, FOR_ITER); PyGenObject *gen = (PyGenObject *)iter; diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 1df8feba352..2c2dbf429ce 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1720,6 +1720,40 @@ break; } + case _ITER_CHECK_RANGE: { + PyObject *iter = stack_pointer[-1]; + _PyRangeIterObject *r = (_PyRangeIterObject *)iter; + DEOPT_IF(Py_TYPE(r) != &PyRangeIter_Type, FOR_ITER); + break; + } + + case _ITER_EXHAUSTED_RANGE: { + PyObject *iter = stack_pointer[-1]; + PyObject *exhausted; + _PyRangeIterObject *r = (_PyRangeIterObject *)iter; + assert(Py_TYPE(r) == &PyRangeIter_Type); + exhausted = r->len <= 0 ? Py_True : Py_False; + STACK_GROW(1); + stack_pointer[-1] = exhausted; + break; + } + + case _ITER_NEXT_RANGE: { + PyObject *iter = stack_pointer[-1]; + PyObject *next; + _PyRangeIterObject *r = (_PyRangeIterObject *)iter; + assert(Py_TYPE(r) == &PyRangeIter_Type); + assert(r->len > 0); + long value = r->start; + r->start = value + r->step; + r->len--; + next = PyLong_FromLong(value); + if (next == NULL) goto error; + STACK_GROW(1); + stack_pointer[-1] = next; + break; + } + case WITH_EXCEPT_START: { PyObject *val = stack_pointer[-1]; PyObject *lasti = stack_pointer[-3]; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index f7a18b43ff1..383432f51a8 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -3092,29 +3092,47 @@ } TARGET(FOR_ITER_RANGE) { - PyObject *iter = stack_pointer[-1]; - PyObject *next; - _PyRangeIterObject *r = (_PyRangeIterObject *)iter; - DEOPT_IF(Py_TYPE(r) != &PyRangeIter_Type, FOR_ITER); - STAT_INC(FOR_ITER, hit); - if (r->len <= 0) { - STACK_SHRINK(1); - Py_DECREF(r); - SKIP_OVER(INLINE_CACHE_ENTRIES_FOR_ITER); - // Jump over END_FOR instruction. - JUMPBY(oparg + 1); - DISPATCH(); + PyObject *_tmp_1; + PyObject *_tmp_2 = stack_pointer[-1]; + { + PyObject *iter = _tmp_2; + _PyRangeIterObject *r = (_PyRangeIterObject *)iter; + DEOPT_IF(Py_TYPE(r) != &PyRangeIter_Type, FOR_ITER); + _tmp_2 = iter; } - long value = r->start; - r->start = value + r->step; - r->len--; - next = PyLong_FromLong(value); - if (next == NULL) { - goto error; + { + PyObject *iter = _tmp_2; + _PyRangeIterObject *r = (_PyRangeIterObject *)iter; + assert(Py_TYPE(r) == &PyRangeIter_Type); + STAT_INC(FOR_ITER, hit); + if (r->len <= 0) { + STACK_SHRINK(1); + Py_DECREF(r); + SKIP_OVER(INLINE_CACHE_ENTRIES_FOR_ITER); + // Jump over END_FOR instruction. + JUMPBY(oparg + 1); + DISPATCH(); + } + _tmp_2 = iter; + } + { + PyObject *iter = _tmp_2; + PyObject *next; + _PyRangeIterObject *r = (_PyRangeIterObject *)iter; + assert(Py_TYPE(r) == &PyRangeIter_Type); + assert(r->len > 0); + long value = r->start; + r->start = value + r->step; + r->len--; + next = PyLong_FromLong(value); + if (next == NULL) goto error; + _tmp_2 = iter; + _tmp_1 = next; } - STACK_GROW(1); - stack_pointer[-1] = next; next_instr += 1; + STACK_GROW(1); + stack_pointer[-1] = _tmp_1; + stack_pointer[-2] = _tmp_2; DISPATCH(); } diff --git a/Python/optimizer.c b/Python/optimizer.c index c3fdee63a7e..abd2351f6b7 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -479,6 +479,28 @@ translate_bytecode_to_trace( break; } + case FOR_ITER_RANGE: + { + // Assume jump unlikely (can a for-loop exit be likely?) + // Reserve 9 entries (4 here, 3 stub, plus SAVE_IP + EXIT_TRACE) + if (trace_length + 9 > max_length) { + DPRINTF(1, "Ran out of space for FOR_ITER_RANGE\n"); + goto done; + } + _Py_CODEUNIT *target_instr = // +1 at the end skips over END_FOR + instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] + oparg + 1; + max_length -= 3; // Really the start of the stubs + ADD_TO_TRACE(_ITER_CHECK_RANGE, 0); + ADD_TO_TRACE(_ITER_EXHAUSTED_RANGE, 0); + ADD_TO_TRACE(_POP_JUMP_IF_TRUE, max_length); + ADD_TO_TRACE(_ITER_NEXT_RANGE, 0); + + ADD_TO_STUB(max_length + 0, POP_TOP, 0); + ADD_TO_STUB(max_length + 1, SAVE_IP, INSTR_IP(target_instr, code)); + ADD_TO_STUB(max_length + 2, EXIT_TRACE, 0); + break; + } + default: { const struct opcode_macro_expansion *expansion = &_PyOpcode_macro_expansion[opcode]; @@ -574,8 +596,8 @@ done: } } } - trace_length += buffer_size - max_length; } + trace_length += buffer_size - max_length; return trace_length; } else {