GH-111848: Tidy up tier 2 handling of FOR_ITER specialization by using DEOPT_IF instead of jumps. (GH-111849)

This commit is contained in:
Mark Shannon 2023-11-08 13:31:55 +00:00 committed by GitHub
parent 11e83488c5
commit 06efb60264
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 59 additions and 136 deletions

View File

@ -81,15 +81,15 @@
#define _FOR_ITER 353
#define _ITER_CHECK_LIST 354
#define _ITER_JUMP_LIST 355
#define _IS_ITER_EXHAUSTED_LIST 356
#define _GUARD_NOT_EXHAUSTED_LIST 356
#define _ITER_NEXT_LIST 357
#define _ITER_CHECK_TUPLE 358
#define _ITER_JUMP_TUPLE 359
#define _IS_ITER_EXHAUSTED_TUPLE 360
#define _GUARD_NOT_EXHAUSTED_TUPLE 360
#define _ITER_NEXT_TUPLE 361
#define _ITER_CHECK_RANGE 362
#define _ITER_JUMP_RANGE 363
#define _IS_ITER_EXHAUSTED_RANGE 364
#define _GUARD_NOT_EXHAUSTED_RANGE 364
#define _ITER_NEXT_RANGE 365
#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 366
#define _GUARD_KEYS_VERSION 367
@ -542,7 +542,7 @@ int _PyOpcode_num_popped(int opcode, int oparg, bool jump) {
return 1;
case _ITER_JUMP_LIST:
return 1;
case _IS_ITER_EXHAUSTED_LIST:
case _GUARD_NOT_EXHAUSTED_LIST:
return 1;
case _ITER_NEXT_LIST:
return 1;
@ -552,7 +552,7 @@ int _PyOpcode_num_popped(int opcode, int oparg, bool jump) {
return 1;
case _ITER_JUMP_TUPLE:
return 1;
case _IS_ITER_EXHAUSTED_TUPLE:
case _GUARD_NOT_EXHAUSTED_TUPLE:
return 1;
case _ITER_NEXT_TUPLE:
return 1;
@ -562,7 +562,7 @@ int _PyOpcode_num_popped(int opcode, int oparg, bool jump) {
return 1;
case _ITER_JUMP_RANGE:
return 1;
case _IS_ITER_EXHAUSTED_RANGE:
case _GUARD_NOT_EXHAUSTED_RANGE:
return 1;
case _ITER_NEXT_RANGE:
return 1;
@ -1170,8 +1170,8 @@ int _PyOpcode_num_pushed(int opcode, int oparg, bool jump) {
return 1;
case _ITER_JUMP_LIST:
return 1;
case _IS_ITER_EXHAUSTED_LIST:
return 2;
case _GUARD_NOT_EXHAUSTED_LIST:
return 1;
case _ITER_NEXT_LIST:
return 2;
case FOR_ITER_LIST:
@ -1180,8 +1180,8 @@ int _PyOpcode_num_pushed(int opcode, int oparg, bool jump) {
return 1;
case _ITER_JUMP_TUPLE:
return 1;
case _IS_ITER_EXHAUSTED_TUPLE:
return 2;
case _GUARD_NOT_EXHAUSTED_TUPLE:
return 1;
case _ITER_NEXT_TUPLE:
return 2;
case FOR_ITER_TUPLE:
@ -1190,8 +1190,8 @@ int _PyOpcode_num_pushed(int opcode, int oparg, bool jump) {
return 1;
case _ITER_JUMP_RANGE:
return 1;
case _IS_ITER_EXHAUSTED_RANGE:
return 2;
case _GUARD_NOT_EXHAUSTED_RANGE:
return 1;
case _ITER_NEXT_RANGE:
return 2;
case FOR_ITER_RANGE:
@ -1651,17 +1651,17 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = {
[INSTRUMENTED_FOR_ITER] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ERROR_FLAG },
[_ITER_CHECK_LIST] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG },
[_ITER_JUMP_LIST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_JUMP_FLAG },
[_IS_ITER_EXHAUSTED_LIST] = { true, INSTR_FMT_IX, 0 },
[_GUARD_NOT_EXHAUSTED_LIST] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG },
[_ITER_NEXT_LIST] = { true, INSTR_FMT_IX, 0 },
[FOR_ITER_LIST] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_DEOPT_FLAG },
[_ITER_CHECK_TUPLE] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG },
[_ITER_JUMP_TUPLE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_JUMP_FLAG },
[_IS_ITER_EXHAUSTED_TUPLE] = { true, INSTR_FMT_IX, 0 },
[_GUARD_NOT_EXHAUSTED_TUPLE] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG },
[_ITER_NEXT_TUPLE] = { true, INSTR_FMT_IX, 0 },
[FOR_ITER_TUPLE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_DEOPT_FLAG },
[_ITER_CHECK_RANGE] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG },
[_ITER_JUMP_RANGE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_JUMP_FLAG },
[_IS_ITER_EXHAUSTED_RANGE] = { true, INSTR_FMT_IX, 0 },
[_GUARD_NOT_EXHAUSTED_RANGE] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG },
[_ITER_NEXT_RANGE] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG },
[FOR_ITER_RANGE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG },
[FOR_ITER_GEN] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG },
@ -1870,6 +1870,9 @@ const struct opcode_macro_expansion _PyOpcode_macro_expansion[OPCODE_MACRO_EXPAN
[MATCH_KEYS] = { .nuops = 1, .uops = { { MATCH_KEYS, 0, 0 } } },
[GET_ITER] = { .nuops = 1, .uops = { { GET_ITER, 0, 0 } } },
[GET_YIELD_FROM_ITER] = { .nuops = 1, .uops = { { GET_YIELD_FROM_ITER, 0, 0 } } },
[FOR_ITER_LIST] = { .nuops = 3, .uops = { { _ITER_CHECK_LIST, 0, 0 }, { _ITER_JUMP_LIST, 0, 0 }, { _ITER_NEXT_LIST, 0, 0 } } },
[FOR_ITER_TUPLE] = { .nuops = 3, .uops = { { _ITER_CHECK_TUPLE, 0, 0 }, { _ITER_JUMP_TUPLE, 0, 0 }, { _ITER_NEXT_TUPLE, 0, 0 } } },
[FOR_ITER_RANGE] = { .nuops = 3, .uops = { { _ITER_CHECK_RANGE, 0, 0 }, { _ITER_JUMP_RANGE, 0, 0 }, { _ITER_NEXT_RANGE, 0, 0 } } },
[BEFORE_ASYNC_WITH] = { .nuops = 1, .uops = { { BEFORE_ASYNC_WITH, 0, 0 } } },
[BEFORE_WITH] = { .nuops = 1, .uops = { { BEFORE_WITH, 0, 0 } } },
[WITH_EXCEPT_START] = { .nuops = 1, .uops = { { WITH_EXCEPT_START, 0, 0 } } },
@ -1966,15 +1969,15 @@ const char * const _PyOpcode_uop_name[OPCODE_UOP_NAME_SIZE] = {
[_FOR_ITER] = "_FOR_ITER",
[_ITER_CHECK_LIST] = "_ITER_CHECK_LIST",
[_ITER_JUMP_LIST] = "_ITER_JUMP_LIST",
[_IS_ITER_EXHAUSTED_LIST] = "_IS_ITER_EXHAUSTED_LIST",
[_GUARD_NOT_EXHAUSTED_LIST] = "_GUARD_NOT_EXHAUSTED_LIST",
[_ITER_NEXT_LIST] = "_ITER_NEXT_LIST",
[_ITER_CHECK_TUPLE] = "_ITER_CHECK_TUPLE",
[_ITER_JUMP_TUPLE] = "_ITER_JUMP_TUPLE",
[_IS_ITER_EXHAUSTED_TUPLE] = "_IS_ITER_EXHAUSTED_TUPLE",
[_GUARD_NOT_EXHAUSTED_TUPLE] = "_GUARD_NOT_EXHAUSTED_TUPLE",
[_ITER_NEXT_TUPLE] = "_ITER_NEXT_TUPLE",
[_ITER_CHECK_RANGE] = "_ITER_CHECK_RANGE",
[_ITER_JUMP_RANGE] = "_ITER_JUMP_RANGE",
[_IS_ITER_EXHAUSTED_RANGE] = "_IS_ITER_EXHAUSTED_RANGE",
[_GUARD_NOT_EXHAUSTED_RANGE] = "_GUARD_NOT_EXHAUSTED_RANGE",
[_ITER_NEXT_RANGE] = "_ITER_NEXT_RANGE",
[_GUARD_DORV_VALUES_INST_ATTR_FROM_DICT] = "_GUARD_DORV_VALUES_INST_ATTR_FROM_DICT",
[_GUARD_KEYS_VERSION] = "_GUARD_KEYS_VERSION",

View File

@ -2712,7 +2712,7 @@ class TestUops(unittest.TestCase):
# for i, (opname, oparg) in enumerate(ex):
# print(f"{i:4d}: {opname:<20s} {oparg:3d}")
uops = {opname for opname, _, _ in ex}
self.assertIn("_IS_ITER_EXHAUSTED_RANGE", uops)
self.assertIn("_GUARD_NOT_EXHAUSTED_RANGE", uops)
# Verification that the jump goes past END_FOR
# is done by manual inspection of the output
@ -2734,7 +2734,7 @@ class TestUops(unittest.TestCase):
# for i, (opname, oparg) in enumerate(ex):
# print(f"{i:4d}: {opname:<20s} {oparg:3d}")
uops = {opname for opname, _, _ in ex}
self.assertIn("_IS_ITER_EXHAUSTED_LIST", uops)
self.assertIn("_GUARD_NOT_EXHAUSTED_LIST", uops)
# Verification that the jump goes past END_FOR
# is done by manual inspection of the output
@ -2756,7 +2756,7 @@ class TestUops(unittest.TestCase):
# for i, (opname, oparg) in enumerate(ex):
# print(f"{i:4d}: {opname:<20s} {oparg:3d}")
uops = {opname for opname, _, _ in ex}
self.assertIn("_IS_ITER_EXHAUSTED_TUPLE", uops)
self.assertIn("_GUARD_NOT_EXHAUSTED_TUPLE", uops)
# Verification that the jump goes past END_FOR
# is done by manual inspection of the output

View File

@ -628,9 +628,7 @@
break;
}
case _IS_ITER_EXHAUSTED_LIST: {
STACK_GROW(1);
PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true);
case _GUARD_NOT_EXHAUSTED_LIST: {
break;
}
@ -644,9 +642,7 @@
break;
}
case _IS_ITER_EXHAUSTED_TUPLE: {
STACK_GROW(1);
PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true);
case _GUARD_NOT_EXHAUSTED_TUPLE: {
break;
}
@ -660,9 +656,7 @@
break;
}
case _IS_ITER_EXHAUSTED_RANGE: {
STACK_GROW(1);
PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true);
case _GUARD_NOT_EXHAUSTED_RANGE: {
break;
}

View File

@ -2580,7 +2580,7 @@ dummy_func(
DEOPT_IF(Py_TYPE(iter) != &PyListIter_Type);
}
op(_ITER_JUMP_LIST, (iter -- iter)) {
replaced op(_ITER_JUMP_LIST, (iter -- iter)) {
_PyListIterObject *it = (_PyListIterObject *)iter;
assert(Py_TYPE(iter) == &PyListIter_Type);
STAT_INC(FOR_ITER, hit);
@ -2599,21 +2599,12 @@ dummy_func(
}
// Only used by Tier 2
op(_IS_ITER_EXHAUSTED_LIST, (iter -- iter, exhausted)) {
op(_GUARD_NOT_EXHAUSTED_LIST, (iter -- iter)) {
_PyListIterObject *it = (_PyListIterObject *)iter;
assert(Py_TYPE(iter) == &PyListIter_Type);
PyListObject *seq = it->it_seq;
if (seq == NULL) {
exhausted = Py_True;
}
else if (it->it_index >= PyList_GET_SIZE(seq)) {
Py_DECREF(seq);
it->it_seq = NULL;
exhausted = Py_True;
}
else {
exhausted = Py_False;
}
DEOPT_IF(seq == NULL);
DEOPT_IF(it->it_index >= PyList_GET_SIZE(seq));
}
op(_ITER_NEXT_LIST, (iter -- iter, next)) {
@ -2635,7 +2626,7 @@ dummy_func(
DEOPT_IF(Py_TYPE(iter) != &PyTupleIter_Type);
}
op(_ITER_JUMP_TUPLE, (iter -- iter)) {
replaced op(_ITER_JUMP_TUPLE, (iter -- iter)) {
_PyTupleIterObject *it = (_PyTupleIterObject *)iter;
assert(Py_TYPE(iter) == &PyTupleIter_Type);
STAT_INC(FOR_ITER, hit);
@ -2654,21 +2645,12 @@ dummy_func(
}
// Only used by Tier 2
op(_IS_ITER_EXHAUSTED_TUPLE, (iter -- iter, exhausted)) {
op(_GUARD_NOT_EXHAUSTED_TUPLE, (iter -- iter)) {
_PyTupleIterObject *it = (_PyTupleIterObject *)iter;
assert(Py_TYPE(iter) == &PyTupleIter_Type);
PyTupleObject *seq = it->it_seq;
if (seq == NULL) {
exhausted = Py_True;
}
else if (it->it_index >= PyTuple_GET_SIZE(seq)) {
Py_DECREF(seq);
it->it_seq = NULL;
exhausted = Py_True;
}
else {
exhausted = Py_False;
}
DEOPT_IF(seq == NULL);
DEOPT_IF(it->it_index >= PyTuple_GET_SIZE(seq));
}
op(_ITER_NEXT_TUPLE, (iter -- iter, next)) {
@ -2691,7 +2673,7 @@ dummy_func(
DEOPT_IF(Py_TYPE(r) != &PyRangeIter_Type);
}
op(_ITER_JUMP_RANGE, (iter -- iter)) {
replaced op(_ITER_JUMP_RANGE, (iter -- iter)) {
_PyRangeIterObject *r = (_PyRangeIterObject *)iter;
assert(Py_TYPE(r) == &PyRangeIter_Type);
STAT_INC(FOR_ITER, hit);
@ -2705,10 +2687,10 @@ dummy_func(
}
// Only used by Tier 2
op(_IS_ITER_EXHAUSTED_RANGE, (iter -- iter, exhausted)) {
op(_GUARD_NOT_EXHAUSTED_RANGE, (iter -- iter)) {
_PyRangeIterObject *r = (_PyRangeIterObject *)iter;
assert(Py_TYPE(r) == &PyRangeIter_Type);
exhausted = r->len <= 0 ? Py_True : Py_False;
DEOPT_IF(r->len <= 0);
}
op(_ITER_NEXT_RANGE, (iter -- iter, next)) {

View File

@ -2138,26 +2138,14 @@
break;
}
case _IS_ITER_EXHAUSTED_LIST: {
case _GUARD_NOT_EXHAUSTED_LIST: {
PyObject *iter;
PyObject *exhausted;
iter = stack_pointer[-1];
_PyListIterObject *it = (_PyListIterObject *)iter;
assert(Py_TYPE(iter) == &PyListIter_Type);
PyListObject *seq = it->it_seq;
if (seq == NULL) {
exhausted = Py_True;
}
else if (it->it_index >= PyList_GET_SIZE(seq)) {
Py_DECREF(seq);
it->it_seq = NULL;
exhausted = Py_True;
}
else {
exhausted = Py_False;
}
STACK_GROW(1);
stack_pointer[-1] = exhausted;
DEOPT_IF(seq == NULL, _GUARD_NOT_EXHAUSTED_LIST);
DEOPT_IF(it->it_index >= PyList_GET_SIZE(seq), _GUARD_NOT_EXHAUSTED_LIST);
break;
}
@ -2183,26 +2171,14 @@
break;
}
case _IS_ITER_EXHAUSTED_TUPLE: {
case _GUARD_NOT_EXHAUSTED_TUPLE: {
PyObject *iter;
PyObject *exhausted;
iter = stack_pointer[-1];
_PyTupleIterObject *it = (_PyTupleIterObject *)iter;
assert(Py_TYPE(iter) == &PyTupleIter_Type);
PyTupleObject *seq = it->it_seq;
if (seq == NULL) {
exhausted = Py_True;
}
else if (it->it_index >= PyTuple_GET_SIZE(seq)) {
Py_DECREF(seq);
it->it_seq = NULL;
exhausted = Py_True;
}
else {
exhausted = Py_False;
}
STACK_GROW(1);
stack_pointer[-1] = exhausted;
DEOPT_IF(seq == NULL, _GUARD_NOT_EXHAUSTED_TUPLE);
DEOPT_IF(it->it_index >= PyTuple_GET_SIZE(seq), _GUARD_NOT_EXHAUSTED_TUPLE);
break;
}
@ -2229,15 +2205,12 @@
break;
}
case _IS_ITER_EXHAUSTED_RANGE: {
case _GUARD_NOT_EXHAUSTED_RANGE: {
PyObject *iter;
PyObject *exhausted;
iter = stack_pointer[-1];
_PyRangeIterObject *r = (_PyRangeIterObject *)iter;
assert(Py_TYPE(r) == &PyRangeIter_Type);
exhausted = r->len <= 0 ? Py_True : Py_False;
STACK_GROW(1);
stack_pointer[-1] = exhausted;
DEOPT_IF(r->len <= 0, _GUARD_NOT_EXHAUSTED_RANGE);
break;
}

View File

@ -384,6 +384,14 @@ PyTypeObject _PyUOpExecutor_Type = {
.tp_methods = executor_methods,
};
/* TO DO -- Generate this table */
static const uint16_t
_PyUop_Replacements[OPCODE_METADATA_SIZE] = {
[_ITER_JUMP_RANGE] = _GUARD_NOT_EXHAUSTED_RANGE,
[_ITER_JUMP_LIST] = _GUARD_NOT_EXHAUSTED_LIST,
[_ITER_JUMP_TUPLE] = _GUARD_NOT_EXHAUSTED_TUPLE,
};
#define TRACE_STACK_SIZE 5
/* Returns 1 on success,
@ -586,46 +594,6 @@ pop_jump_if_bool:
break;
}
case FOR_ITER_LIST:
case FOR_ITER_TUPLE:
case FOR_ITER_RANGE:
{
RESERVE(4, 3);
int check_op, exhausted_op, next_op;
switch (opcode) {
case FOR_ITER_LIST:
check_op = _ITER_CHECK_LIST;
exhausted_op = _IS_ITER_EXHAUSTED_LIST;
next_op = _ITER_NEXT_LIST;
break;
case FOR_ITER_TUPLE:
check_op = _ITER_CHECK_TUPLE;
exhausted_op = _IS_ITER_EXHAUSTED_TUPLE;
next_op = _ITER_NEXT_TUPLE;
break;
case FOR_ITER_RANGE:
check_op = _ITER_CHECK_RANGE;
exhausted_op = _IS_ITER_EXHAUSTED_RANGE;
next_op = _ITER_NEXT_RANGE;
break;
default:
Py_UNREACHABLE();
}
// Assume jump unlikely (can a for-loop exit be likely?)
_Py_CODEUNIT *target_instr = // +1 at the end skips over END_FOR
instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] + oparg + 1;
max_length -= 3; // Really the start of the stubs
ADD_TO_TRACE(check_op, 0, 0);
ADD_TO_TRACE(exhausted_op, 0, 0);
ADD_TO_TRACE(_POP_JUMP_IF_TRUE, max_length, 0);
ADD_TO_TRACE(next_op, 0, 0);
ADD_TO_STUB(max_length + 0, POP_TOP, 0, 0);
ADD_TO_STUB(max_length + 1, _SET_IP, INSTR_IP(target_instr, code), 0);
ADD_TO_STUB(max_length + 2, _EXIT_TRACE, 0, 0);
break;
}
default:
{
const struct opcode_macro_expansion *expansion = &_PyOpcode_macro_expansion[opcode];
@ -661,6 +629,9 @@ pop_jump_if_bool:
oparg += extras;
}
}
if (_PyUop_Replacements[uop]) {
uop = _PyUop_Replacements[uop];
}
break;
case OPARG_CACHE_1:
operand = read_u16(&instr[offset].cache);

View File

@ -655,7 +655,7 @@ class Generator(Analyzer):
if "specializing" in part.instr.annotations:
continue
# All other component instructions must be viable uops
if not part.instr.is_viable_uop():
if not part.instr.is_viable_uop() and "replaced" not in part.instr.annotations:
# This note just reminds us about macros that cannot
# be expanded to Tier 2 uops. It is not an error.
# It is sometimes emitted for macros that have a

View File

@ -216,7 +216,7 @@ kwds.append(MACRO)
keywords = {name.lower(): name for name in kwds}
ANNOTATION = "ANNOTATION"
annotations = {"specializing", "guard", "override", "register"}
annotations = {"specializing", "guard", "override", "register", "replaced"}
__all__ = []
__all__.extend(kwds)