GH-87849: Simplify stack effect of SEND and specialize it for generators and coroutines. (GH-101788)

This commit is contained in:
Mark Shannon 2023-02-13 11:24:55 +00:00 committed by GitHub
parent a1f08f5f19
commit 160f2fe2b9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 191 additions and 105 deletions

View File

@ -92,6 +92,12 @@ typedef struct {
#define INLINE_CACHE_ENTRIES_FOR_ITER CACHE_ENTRIES(_PyForIterCache)
typedef struct {
uint16_t counter;
} _PySendCache;
#define INLINE_CACHE_ENTRIES_SEND CACHE_ENTRIES(_PySendCache)
// Borrowed references to common callables:
struct callable_cache {
PyObject *isinstance;
@ -233,6 +239,7 @@ extern void _Py_Specialize_CompareAndBranch(PyObject *lhs, PyObject *rhs,
extern void _Py_Specialize_UnpackSequence(PyObject *seq, _Py_CODEUNIT *instr,
int oparg);
extern void _Py_Specialize_ForIter(PyObject *iter, _Py_CODEUNIT *instr, int oparg);
extern void _Py_Specialize_Send(PyObject *receiver, _Py_CODEUNIT *instr);
/* Finalizer function for static codeobjects used in deepfreeze.py */
extern void _PyStaticCode_Fini(PyCodeObject *co);

View File

@ -50,6 +50,7 @@ const uint8_t _PyOpcode_Caches[256] = {
[COMPARE_OP] = 1,
[LOAD_GLOBAL] = 5,
[BINARY_OP] = 1,
[SEND] = 1,
[COMPARE_AND_BRANCH] = 1,
[CALL] = 4,
};
@ -196,6 +197,7 @@ const uint8_t _PyOpcode_Deopt[256] = {
[RETURN_GENERATOR] = RETURN_GENERATOR,
[RETURN_VALUE] = RETURN_VALUE,
[SEND] = SEND,
[SEND_GEN] = SEND,
[SETUP_ANNOTATIONS] = SETUP_ANNOTATIONS,
[SET_ADD] = SET_ADD,
[SET_UPDATE] = SET_UPDATE,
@ -395,7 +397,7 @@ static const char *const _PyOpcode_OpName[263] = {
[SET_UPDATE] = "SET_UPDATE",
[DICT_MERGE] = "DICT_MERGE",
[DICT_UPDATE] = "DICT_UPDATE",
[166] = "<166>",
[SEND_GEN] = "SEND_GEN",
[167] = "<167>",
[168] = "<168>",
[169] = "<169>",
@ -496,7 +498,6 @@ static const char *const _PyOpcode_OpName[263] = {
#endif
#define EXTRA_CASES \
case 166: \
case 167: \
case 168: \
case 169: \

1
Include/opcode.h generated
View File

@ -187,6 +187,7 @@ extern "C" {
#define UNPACK_SEQUENCE_LIST 159
#define UNPACK_SEQUENCE_TUPLE 160
#define UNPACK_SEQUENCE_TWO_TUPLE 161
#define SEND_GEN 166
#define DO_TRACING 255
#define HAS_ARG(op) ((((op) >= HAVE_ARGUMENT) && (!IS_PSEUDO_OPCODE(op)))\

View File

@ -39,6 +39,7 @@ LOAD_GLOBAL = opmap['LOAD_GLOBAL']
BINARY_OP = opmap['BINARY_OP']
JUMP_BACKWARD = opmap['JUMP_BACKWARD']
FOR_ITER = opmap['FOR_ITER']
SEND = opmap['SEND']
LOAD_ATTR = opmap['LOAD_ATTR']
CACHE = opmap["CACHE"]
@ -453,6 +454,7 @@ def _get_instructions_bytes(code, varname_from_oparg=None,
argrepr = ''
positions = Positions(*next(co_positions, ()))
deop = _deoptop(op)
caches = _inline_cache_entries[deop]
if arg is not None:
# Set argval to the dereferenced value of the argument when
# available, and argrepr to the string representation of argval.
@ -478,8 +480,7 @@ def _get_instructions_bytes(code, varname_from_oparg=None,
elif deop in hasjrel:
signed_arg = -arg if _is_backward_jump(deop) else arg
argval = offset + 2 + signed_arg*2
if deop == FOR_ITER:
argval += 2
argval += 2 * caches
argrepr = "to " + repr(argval)
elif deop in haslocal or deop in hasfree:
argval, argrepr = _get_name_info(arg, varname_from_oparg)
@ -633,12 +634,12 @@ def findlabels(code):
for offset, op, arg in _unpack_opargs(code):
if arg is not None:
deop = _deoptop(op)
caches = _inline_cache_entries[deop]
if deop in hasjrel:
if _is_backward_jump(deop):
arg = -arg
label = offset + 2 + arg*2
if deop == FOR_ITER:
label += 2
label += 2 * caches
elif deop in hasjabs:
label = arg*2
else:

View File

@ -432,6 +432,7 @@ _code_type = type(_write_atomic.__code__)
# Python 3.12a5 3516 (Add COMPARE_AND_BRANCH instruction)
# Python 3.12a5 3517 (Change YIELD_VALUE oparg to exception block depth)
# Python 3.12a5 3518 (Add RETURN_CONST instruction)
# Python 3.12a5 3519 (Modify SEND instruction)
# Python 3.13 will start with 3550
@ -444,7 +445,7 @@ _code_type = type(_write_atomic.__code__)
# Whenever MAGIC_NUMBER is changed, the ranges in the magic_values array
# in PC/launcher.c must also be updated.
MAGIC_NUMBER = (3518).to_bytes(2, 'little') + b'\r\n'
MAGIC_NUMBER = (3519).to_bytes(2, 'little') + b'\r\n'
_RAW_MAGIC_NUMBER = int.from_bytes(MAGIC_NUMBER, 'little') # For import.c

View File

@ -167,7 +167,7 @@ def_op('COPY', 120)
def_op('RETURN_CONST', 121)
hasconst.append(121)
def_op('BINARY_OP', 122)
jrel_op('SEND', 123) # Number of bytes to skip
jrel_op('SEND', 123) # Number of words to skip
def_op('LOAD_FAST', 124) # Local variable number, no null check
haslocal.append(124)
def_op('STORE_FAST', 125) # Local variable number
@ -370,6 +370,9 @@ _specializations = {
"UNPACK_SEQUENCE_TUPLE",
"UNPACK_SEQUENCE_TWO_TUPLE",
],
"SEND": [
"SEND_GEN",
],
}
_specialized_instructions = [
opcode for family in _specializations.values() for opcode in family
@ -429,6 +432,9 @@ _cache_format = {
"STORE_SUBSCR": {
"counter": 1,
},
"SEND": {
"counter": 1,
},
}
_inline_cache_entries = [

View File

@ -475,11 +475,13 @@ dis_asyncwith = """\
BEFORE_ASYNC_WITH
GET_AWAITABLE 1
LOAD_CONST 0 (None)
>> SEND 3 (to 22)
>> SEND 3 (to 24)
YIELD_VALUE 2
RESUME 3
JUMP_BACKWARD_NO_INTERRUPT 4 (to 14)
>> POP_TOP
JUMP_BACKWARD_NO_INTERRUPT 5 (to 14)
>> SWAP 2
POP_TOP
POP_TOP
%3d LOAD_CONST 1 (1)
STORE_FAST 1 (x)
@ -490,30 +492,33 @@ dis_asyncwith = """\
CALL 2
GET_AWAITABLE 2
LOAD_CONST 0 (None)
>> SEND 3 (to 56)
>> SEND 3 (to 64)
YIELD_VALUE 2
RESUME 3
JUMP_BACKWARD_NO_INTERRUPT 4 (to 48)
JUMP_BACKWARD_NO_INTERRUPT 5 (to 54)
>> POP_TOP
POP_TOP
%3d LOAD_CONST 2 (2)
STORE_FAST 2 (y)
RETURN_CONST 0 (None)
%3d >> CLEANUP_THROW
JUMP_BACKWARD 23 (to 22)
JUMP_BACKWARD 27 (to 24)
>> CLEANUP_THROW
JUMP_BACKWARD 8 (to 56)
JUMP_BACKWARD 9 (to 64)
>> PUSH_EXC_INFO
WITH_EXCEPT_START
GET_AWAITABLE 2
LOAD_CONST 0 (None)
>> SEND 4 (to 90)
>> SEND 4 (to 102)
YIELD_VALUE 3
RESUME 3
JUMP_BACKWARD_NO_INTERRUPT 4 (to 80)
JUMP_BACKWARD_NO_INTERRUPT 5 (to 90)
>> CLEANUP_THROW
>> POP_JUMP_IF_TRUE 1 (to 94)
>> SWAP 2
POP_TOP
POP_JUMP_IF_TRUE 1 (to 110)
RERAISE 2
>> POP_TOP
POP_EXCEPT

View File

@ -0,0 +1,3 @@
Change the ``SEND`` instruction to leave the receiver on the stack. This
allows the specialized form of ``SEND`` to skip the chain of C calls and jump
directly to the ``RESUME`` in the generator or coroutine.

View File

@ -334,10 +334,10 @@ mark_stacks(PyCodeObject *code_obj, int len)
break;
}
case SEND:
j = get_arg(code, i) + i + 1;
j = get_arg(code, i) + i + INLINE_CACHE_ENTRIES_SEND + 1;
assert(j < len);
assert(stacks[j] == UNINITIALIZED || stacks[j] == pop_value(next_stack));
stacks[j] = pop_value(next_stack);
assert(stacks[j] == UNINITIALIZED || stacks[j] == next_stack);
stacks[j] = next_stack;
stacks[i+1] = next_stack;
break;
case JUMP_FORWARD:

View File

@ -680,51 +680,66 @@ dummy_func(
PREDICT(LOAD_CONST);
}
inst(SEND, (receiver, v -- receiver if (!jump), retval)) {
family(for_iter, INLINE_CACHE_ENTRIES_FOR_ITER) = {
SEND,
SEND_GEN,
};
inst(SEND, (unused/1, receiver, v -- receiver, retval)) {
#if ENABLE_SPECIALIZATION
_PySendCache *cache = (_PySendCache *)next_instr;
if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) {
assert(cframe.use_tracing == 0);
next_instr--;
_Py_Specialize_Send(receiver, next_instr);
DISPATCH_SAME_OPARG();
}
STAT_INC(SEND, deferred);
DECREMENT_ADAPTIVE_COUNTER(cache->counter);
#endif /* ENABLE_SPECIALIZATION */
assert(frame != &entry_frame);
bool jump = false;
PySendResult gen_status;
if (tstate->c_tracefunc == NULL) {
gen_status = PyIter_Send(receiver, v, &retval);
} else {
if (Py_IsNone(v) && PyIter_Check(receiver)) {
retval = Py_TYPE(receiver)->tp_iternext(receiver);
}
else {
retval = PyObject_CallMethodOneArg(receiver, &_Py_ID(send), v);
}
if (retval == NULL) {
if (tstate->c_tracefunc != NULL
&& _PyErr_ExceptionMatches(tstate, PyExc_StopIteration))
call_exc_trace(tstate->c_tracefunc, tstate->c_traceobj, tstate, frame);
if (_PyGen_FetchStopIterationValue(&retval) == 0) {
gen_status = PYGEN_RETURN;
}
else {
gen_status = PYGEN_ERROR;
}
}
else {
gen_status = PYGEN_NEXT;
}
}
if (gen_status == PYGEN_ERROR) {
assert(retval == NULL);
goto error;
}
Py_DECREF(v);
if (gen_status == PYGEN_RETURN) {
assert(retval != NULL);
Py_DECREF(receiver);
JUMPBY(oparg);
jump = true;
if (Py_IsNone(v) && PyIter_Check(receiver)) {
retval = Py_TYPE(receiver)->tp_iternext(receiver);
}
else {
retval = PyObject_CallMethodOneArg(receiver, &_Py_ID(send), v);
}
if (retval == NULL) {
if (tstate->c_tracefunc != NULL
&& _PyErr_ExceptionMatches(tstate, PyExc_StopIteration))
call_exc_trace(tstate->c_tracefunc, tstate->c_traceobj, tstate, frame);
if (_PyGen_FetchStopIterationValue(&retval) == 0) {
assert(retval != NULL);
JUMPBY(oparg);
}
else {
assert(retval == NULL);
goto error;
}
}
else {
assert(gen_status == PYGEN_NEXT);
assert(retval != NULL);
}
}
inst(SEND_GEN, (unused/1, receiver, v -- receiver)) {
assert(cframe.use_tracing == 0);
PyGenObject *gen = (PyGenObject *)receiver;
DEOPT_IF(Py_TYPE(gen) != &PyGen_Type &&
Py_TYPE(gen) != &PyCoro_Type, SEND);
DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING, SEND);
STAT_INC(SEND, hit);
_PyInterpreterFrame *gen_frame = (_PyInterpreterFrame *)gen->gi_iframe;
frame->yield_offset = oparg;
STACK_SHRINK(1);
_PyFrame_StackPush(gen_frame, v);
gen->gi_frame_state = FRAME_EXECUTING;
gen->gi_exc_state.previous_item = tstate->exc_info;
tstate->exc_info = &gen->gi_exc_state;
JUMPBY(INLINE_CACHE_ENTRIES_SEND + oparg);
DISPATCH_INLINED(gen_frame);
}
inst(YIELD_VALUE, (retval -- unused)) {
// NOTE: It's important that YIELD_VALUE never raises an exception!
// The compiler treats any exception raised here as a failed close()
@ -796,12 +811,13 @@ dummy_func(
}
}
inst(CLEANUP_THROW, (sub_iter, last_sent_val, exc_value -- value)) {
inst(CLEANUP_THROW, (sub_iter, last_sent_val, exc_value -- none, value)) {
assert(throwflag);
assert(exc_value && PyExceptionInstance_Check(exc_value));
if (PyErr_GivenExceptionMatches(exc_value, PyExc_StopIteration)) {
value = Py_NewRef(((PyStopIterationObject *)exc_value)->value);
DECREF_INPUTS();
none = Py_NewRef(Py_None);
}
else {
_PyErr_SetRaisedException(tstate, Py_NewRef(exc_value));

View File

@ -1789,6 +1789,8 @@ compiler_add_yield_from(struct compiler *c, location loc, int await)
ADDOP(c, loc, CLEANUP_THROW);
USE_LABEL(c, exit);
ADDOP_I(c, loc, SWAP, 2);
ADDOP(c, loc, POP_TOP);
return SUCCESS;
}

View File

@ -882,57 +882,69 @@
}
TARGET(SEND) {
PREDICTED(SEND);
PyObject *v = PEEK(1);
PyObject *receiver = PEEK(2);
PyObject *retval;
#if ENABLE_SPECIALIZATION
_PySendCache *cache = (_PySendCache *)next_instr;
if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) {
assert(cframe.use_tracing == 0);
next_instr--;
_Py_Specialize_Send(receiver, next_instr);
DISPATCH_SAME_OPARG();
}
STAT_INC(SEND, deferred);
DECREMENT_ADAPTIVE_COUNTER(cache->counter);
#endif /* ENABLE_SPECIALIZATION */
assert(frame != &entry_frame);
bool jump = false;
PySendResult gen_status;
if (tstate->c_tracefunc == NULL) {
gen_status = PyIter_Send(receiver, v, &retval);
} else {
if (Py_IsNone(v) && PyIter_Check(receiver)) {
retval = Py_TYPE(receiver)->tp_iternext(receiver);
}
else {
retval = PyObject_CallMethodOneArg(receiver, &_Py_ID(send), v);
}
if (retval == NULL) {
if (tstate->c_tracefunc != NULL
&& _PyErr_ExceptionMatches(tstate, PyExc_StopIteration))
call_exc_trace(tstate->c_tracefunc, tstate->c_traceobj, tstate, frame);
if (_PyGen_FetchStopIterationValue(&retval) == 0) {
gen_status = PYGEN_RETURN;
}
else {
gen_status = PYGEN_ERROR;
}
}
else {
gen_status = PYGEN_NEXT;
}
}
if (gen_status == PYGEN_ERROR) {
assert(retval == NULL);
goto error;
}
Py_DECREF(v);
if (gen_status == PYGEN_RETURN) {
assert(retval != NULL);
Py_DECREF(receiver);
JUMPBY(oparg);
jump = true;
if (Py_IsNone(v) && PyIter_Check(receiver)) {
retval = Py_TYPE(receiver)->tp_iternext(receiver);
}
else {
retval = PyObject_CallMethodOneArg(receiver, &_Py_ID(send), v);
}
if (retval == NULL) {
if (tstate->c_tracefunc != NULL
&& _PyErr_ExceptionMatches(tstate, PyExc_StopIteration))
call_exc_trace(tstate->c_tracefunc, tstate->c_traceobj, tstate, frame);
if (_PyGen_FetchStopIterationValue(&retval) == 0) {
assert(retval != NULL);
JUMPBY(oparg);
}
else {
assert(retval == NULL);
goto error;
}
}
else {
assert(gen_status == PYGEN_NEXT);
assert(retval != NULL);
}
STACK_SHRINK(1);
STACK_GROW(((!jump) ? 1 : 0));
POKE(1, retval);
JUMPBY(1);
DISPATCH();
}
TARGET(SEND_GEN) {
PyObject *v = PEEK(1);
PyObject *receiver = PEEK(2);
assert(cframe.use_tracing == 0);
PyGenObject *gen = (PyGenObject *)receiver;
DEOPT_IF(Py_TYPE(gen) != &PyGen_Type &&
Py_TYPE(gen) != &PyCoro_Type, SEND);
DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING, SEND);
STAT_INC(SEND, hit);
_PyInterpreterFrame *gen_frame = (_PyInterpreterFrame *)gen->gi_iframe;
frame->yield_offset = oparg;
STACK_SHRINK(1);
_PyFrame_StackPush(gen_frame, v);
gen->gi_frame_state = FRAME_EXECUTING;
gen->gi_exc_state.previous_item = tstate->exc_info;
tstate->exc_info = &gen->gi_exc_state;
JUMPBY(INLINE_CACHE_ENTRIES_SEND + oparg);
DISPATCH_INLINED(gen_frame);
}
TARGET(YIELD_VALUE) {
PyObject *retval = PEEK(1);
// NOTE: It's important that YIELD_VALUE never raises an exception!
@ -1026,6 +1038,7 @@
PyObject *exc_value = PEEK(1);
PyObject *last_sent_val = PEEK(2);
PyObject *sub_iter = PEEK(3);
PyObject *none;
PyObject *value;
assert(throwflag);
assert(exc_value && PyExceptionInstance_Check(exc_value));
@ -1034,13 +1047,15 @@
Py_DECREF(sub_iter);
Py_DECREF(last_sent_val);
Py_DECREF(exc_value);
none = Py_NewRef(Py_None);
}
else {
_PyErr_SetRaisedException(tstate, Py_NewRef(exc_value));
goto exception_unwind;
}
STACK_SHRINK(2);
STACK_SHRINK(1);
POKE(1, value);
POKE(2, none);
DISPATCH();
}

View File

@ -104,6 +104,8 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) {
return 1;
case SEND:
return 2;
case SEND_GEN:
return 2;
case YIELD_VALUE:
return 1;
case POP_EXCEPT:
@ -453,7 +455,9 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) {
case GET_AWAITABLE:
return 1;
case SEND:
return ((!jump) ? 1 : 0) + 1;
return 2;
case SEND_GEN:
return 1;
case YIELD_VALUE:
return 1;
case POP_EXCEPT:
@ -465,7 +469,7 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) {
case END_ASYNC_FOR:
return 0;
case CLEANUP_THROW:
return 1;
return 2;
case LOAD_ASSERTION_ERROR:
return 1;
case LOAD_BUILD_CLASS:
@ -763,7 +767,8 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[256] = {
[GET_AITER] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IX },
[GET_ANEXT] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IX },
[GET_AWAITABLE] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IB },
[SEND] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IB },
[SEND] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IBC },
[SEND_GEN] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IBC },
[YIELD_VALUE] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IX },
[POP_EXCEPT] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IX },
[RERAISE] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IB },

View File

@ -165,7 +165,7 @@ static void *opcode_targets[256] = {
&&TARGET_SET_UPDATE,
&&TARGET_DICT_MERGE,
&&TARGET_DICT_UPDATE,
&&_unknown_opcode,
&&TARGET_SEND_GEN,
&&_unknown_opcode,
&&_unknown_opcode,
&&_unknown_opcode,

View File

@ -128,6 +128,7 @@ print_spec_stats(FILE *out, OpcodeStats *stats)
fprintf(out, "opcode[%d].specializable : 1\n", BINARY_SLICE);
fprintf(out, "opcode[%d].specializable : 1\n", COMPARE_OP);
fprintf(out, "opcode[%d].specializable : 1\n", STORE_SLICE);
fprintf(out, "opcode[%d].specializable : 1\n", SEND);
for (int i = 0; i < 256; i++) {
if (_PyOpcode_Caches[i]) {
fprintf(out, "opcode[%d].specializable : 1\n", i);
@ -1084,7 +1085,7 @@ PyObject *descr, DescriptorClassification kind)
if (dict) {
SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_ATTR_NOT_MANAGED_DICT);
return 0;
}
}
assert(owner_cls->tp_dictoffset > 0);
assert(owner_cls->tp_dictoffset <= INT16_MAX);
_py_set_opcode(instr, LOAD_ATTR_METHOD_LAZY_DICT);
@ -2183,3 +2184,25 @@ success:
STAT_INC(FOR_ITER, success);
cache->counter = adaptive_counter_cooldown();
}
void
_Py_Specialize_Send(PyObject *receiver, _Py_CODEUNIT *instr)
{
assert(ENABLE_SPECIALIZATION);
assert(_PyOpcode_Caches[SEND] == INLINE_CACHE_ENTRIES_SEND);
_PySendCache *cache = (_PySendCache *)(instr + 1);
PyTypeObject *tp = Py_TYPE(receiver);
if (tp == &PyGen_Type || tp == &PyCoro_Type) {
_py_set_opcode(instr, SEND_GEN);
goto success;
}
SPECIALIZATION_FAIL(SEND,
_PySpecialization_ClassifyIterator(receiver));
STAT_INC(SEND, failure);
_py_set_opcode(instr, SEND);
cache->counter = adaptive_counter_backoff(cache->counter);
return;
success:
STAT_INC(SEND, success);
cache->counter = adaptive_counter_cooldown();
}