mirror of https://github.com/python/cpython
gh-106290: Fix edge cases around uops (#106319)
- Tweak uops debugging output - Fix the bug from gh-106290 - Rename `SET_IP` to `SAVE_IP` (per https://github.com/faster-cpython/ideas/issues/558) - Add a `SAVE_IP` uop at the start of the trace (ditto) - Allow `unbound_local_error`; this gives us uops for `LOAD_FAST_CHECK`, `LOAD_CLOSURE`, and `DELETE_FAST` - Longer traces - Support `STORE_FAST_LOAD_FAST`, `STORE_FAST_STORE_FAST` - Add deps on pycore_uops.h to Makefile(.pre.in)
This commit is contained in:
parent
58906213cc
commit
2028a4f6d9
|
@ -8,7 +8,7 @@ extern "C" {
|
|||
# error "this header requires Py_BUILD_CORE define"
|
||||
#endif
|
||||
|
||||
#define _Py_UOP_MAX_TRACE_LENGTH 16
|
||||
#define _Py_UOP_MAX_TRACE_LENGTH 32
|
||||
|
||||
typedef struct {
|
||||
int opcode;
|
||||
|
|
|
@ -1800,6 +1800,7 @@ PYTHON_HEADERS= \
|
|||
$(srcdir)/Include/internal/pycore_unionobject.h \
|
||||
$(srcdir)/Include/internal/pycore_unicodeobject.h \
|
||||
$(srcdir)/Include/internal/pycore_unicodeobject_generated.h \
|
||||
$(srcdir)/Include/internal/pycore_uops.h \
|
||||
$(srcdir)/Include/internal/pycore_warnings.h \
|
||||
$(srcdir)/Include/internal/pycore_weakref.h \
|
||||
$(DTRACE_HEADERS) \
|
||||
|
|
|
@ -2773,24 +2773,26 @@ void Py_LeaveRecursiveCall(void)
|
|||
_PyInterpreterFrame *
|
||||
_PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject **stack_pointer)
|
||||
{
|
||||
#ifdef LLTRACE
|
||||
#ifdef Py_DEBUG
|
||||
char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG");
|
||||
int lltrace = 0;
|
||||
if (uop_debug != NULL && *uop_debug >= '0') {
|
||||
lltrace = *uop_debug - '0'; // TODO: Parse an int and all that
|
||||
}
|
||||
if (lltrace >= 2) {
|
||||
PyCodeObject *code = _PyFrame_GetCode(frame);
|
||||
_Py_CODEUNIT *instr = frame->prev_instr + 1;
|
||||
fprintf(stderr,
|
||||
"Entering _PyUopExecute for %s (%s:%d) at offset %ld\n",
|
||||
PyUnicode_AsUTF8(code->co_qualname),
|
||||
PyUnicode_AsUTF8(code->co_filename),
|
||||
code->co_firstlineno,
|
||||
(long)(instr - (_Py_CODEUNIT *)code->co_code_adaptive));
|
||||
}
|
||||
#define DPRINTF(level, ...) \
|
||||
if (lltrace >= (level)) { fprintf(stderr, __VA_ARGS__); }
|
||||
#else
|
||||
#define DPRINTF(level, ...)
|
||||
#endif
|
||||
|
||||
DPRINTF(3,
|
||||
"Entering _PyUopExecute for %s (%s:%d) at offset %ld\n",
|
||||
PyUnicode_AsUTF8(_PyFrame_GetCode(frame)->co_qualname),
|
||||
PyUnicode_AsUTF8(_PyFrame_GetCode(frame)->co_filename),
|
||||
_PyFrame_GetCode(frame)->co_firstlineno,
|
||||
(long)(frame->prev_instr + 1 -
|
||||
(_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive));
|
||||
|
||||
PyThreadState *tstate = _PyThreadState_GET();
|
||||
_PyUOpExecutorObject *self = (_PyUOpExecutorObject *)executor;
|
||||
|
||||
|
@ -2803,7 +2805,7 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject
|
|||
}
|
||||
|
||||
OBJECT_STAT_INC(optimization_traces_executed);
|
||||
_Py_CODEUNIT *ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive - 1;
|
||||
_Py_CODEUNIT *ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive;
|
||||
int pc = 0;
|
||||
int opcode;
|
||||
uint64_t operand;
|
||||
|
@ -2812,14 +2814,11 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject
|
|||
opcode = self->trace[pc].opcode;
|
||||
operand = self->trace[pc].operand;
|
||||
oparg = (int)operand;
|
||||
#ifdef LLTRACE
|
||||
if (lltrace >= 3) {
|
||||
const char *opname = opcode < 256 ? _PyOpcode_OpName[opcode] : _PyOpcode_uop_name[opcode];
|
||||
int stack_level = (int)(stack_pointer - _PyFrame_Stackbase(frame));
|
||||
fprintf(stderr, " uop %s, operand %" PRIu64 ", stack_level %d\n",
|
||||
opname, operand, stack_level);
|
||||
}
|
||||
#endif
|
||||
DPRINTF(3,
|
||||
" uop %s, operand %" PRIu64 ", stack_level %d\n",
|
||||
opcode < 256 ? _PyOpcode_OpName[opcode] : _PyOpcode_uop_name[opcode],
|
||||
operand,
|
||||
(int)(stack_pointer - _PyFrame_Stackbase(frame)));
|
||||
pc++;
|
||||
OBJECT_STAT_INC(optimization_uops_executed);
|
||||
switch (opcode) {
|
||||
|
@ -2828,7 +2827,7 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject
|
|||
#define ENABLE_SPECIALIZATION 0
|
||||
#include "executor_cases.c.h"
|
||||
|
||||
case SET_IP:
|
||||
case SAVE_IP:
|
||||
{
|
||||
frame->prev_instr = ip_offset + oparg;
|
||||
break;
|
||||
|
@ -2836,6 +2835,7 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject
|
|||
|
||||
case EXIT_TRACE:
|
||||
{
|
||||
frame->prev_instr--; // Back up to just before destination
|
||||
_PyFrame_SetStackPointer(frame, stack_pointer);
|
||||
Py_DECREF(self);
|
||||
return frame;
|
||||
|
@ -2850,6 +2850,13 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject
|
|||
}
|
||||
}
|
||||
|
||||
unbound_local_error:
|
||||
format_exc_check_arg(tstate, PyExc_UnboundLocalError,
|
||||
UNBOUNDLOCAL_ERROR_MSG,
|
||||
PyTuple_GetItem(_PyFrame_GetCode(frame)->co_localsplusnames, oparg)
|
||||
);
|
||||
goto error;
|
||||
|
||||
pop_4_error:
|
||||
STACK_SHRINK(1);
|
||||
pop_3_error:
|
||||
|
@ -2861,11 +2868,7 @@ pop_1_error:
|
|||
error:
|
||||
// On ERROR_IF we return NULL as the frame.
|
||||
// The caller recovers the frame from cframe.current_frame.
|
||||
#ifdef LLTRACE
|
||||
if (lltrace >= 2) {
|
||||
fprintf(stderr, "Error: [Opcode %d, operand %" PRIu64 "]\n", opcode, operand);
|
||||
}
|
||||
#endif
|
||||
DPRINTF(2, "Error: [Opcode %d, operand %" PRIu64 "]\n", opcode, operand);
|
||||
_PyFrame_SetStackPointer(frame, stack_pointer);
|
||||
Py_DECREF(self);
|
||||
return NULL;
|
||||
|
@ -2873,11 +2876,8 @@ error:
|
|||
deoptimize:
|
||||
// On DEOPT_IF we just repeat the last instruction.
|
||||
// This presumes nothing was popped from the stack (nor pushed).
|
||||
#ifdef LLTRACE
|
||||
if (lltrace >= 2) {
|
||||
fprintf(stderr, "DEOPT: [Opcode %d, operand %" PRIu64 "]\n", opcode, operand);
|
||||
}
|
||||
#endif
|
||||
DPRINTF(2, "DEOPT: [Opcode %d, operand %" PRIu64 "]\n", opcode, operand);
|
||||
frame->prev_instr--; // Back up to just before destination
|
||||
_PyFrame_SetStackPointer(frame, stack_pointer);
|
||||
Py_DECREF(self);
|
||||
return frame;
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -20,7 +20,7 @@
|
|||
0)
|
||||
|
||||
#define EXIT_TRACE 300
|
||||
#define SET_IP 301
|
||||
#define SAVE_IP 301
|
||||
#define _GUARD_BOTH_INT 302
|
||||
#define _BINARY_OP_MULTIPLY_INT 303
|
||||
#define _BINARY_OP_ADD_INT 304
|
||||
|
@ -1164,6 +1164,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[512] = {
|
|||
};
|
||||
const struct opcode_macro_expansion _PyOpcode_macro_expansion[256] = {
|
||||
[NOP] = { .nuops = 1, .uops = { { NOP, 0, 0 } } },
|
||||
[LOAD_FAST_CHECK] = { .nuops = 1, .uops = { { LOAD_FAST_CHECK, 0, 0 } } },
|
||||
[LOAD_FAST] = { .nuops = 1, .uops = { { LOAD_FAST, 0, 0 } } },
|
||||
[LOAD_FAST_AND_CLEAR] = { .nuops = 1, .uops = { { LOAD_FAST_AND_CLEAR, 0, 0 } } },
|
||||
[LOAD_CONST] = { .nuops = 1, .uops = { { LOAD_CONST, 0, 0 } } },
|
||||
|
@ -1218,6 +1219,7 @@ const struct opcode_macro_expansion _PyOpcode_macro_expansion[256] = {
|
|||
[LOAD_LOCALS] = { .nuops = 1, .uops = { { _LOAD_LOCALS, 0, 0 } } },
|
||||
[LOAD_NAME] = { .nuops = 2, .uops = { { _LOAD_LOCALS, 0, 0 }, { _LOAD_FROM_DICT_OR_GLOBALS, 0, 0 } } },
|
||||
[LOAD_FROM_DICT_OR_GLOBALS] = { .nuops = 1, .uops = { { _LOAD_FROM_DICT_OR_GLOBALS, 0, 0 } } },
|
||||
[DELETE_FAST] = { .nuops = 1, .uops = { { DELETE_FAST, 0, 0 } } },
|
||||
[DELETE_DEREF] = { .nuops = 1, .uops = { { DELETE_DEREF, 0, 0 } } },
|
||||
[LOAD_FROM_DICT_OR_DEREF] = { .nuops = 1, .uops = { { LOAD_FROM_DICT_OR_DEREF, 0, 0 } } },
|
||||
[LOAD_DEREF] = { .nuops = 1, .uops = { { LOAD_DEREF, 0, 0 } } },
|
||||
|
@ -1266,7 +1268,7 @@ const struct opcode_macro_expansion _PyOpcode_macro_expansion[256] = {
|
|||
#ifdef Py_DEBUG
|
||||
const char * const _PyOpcode_uop_name[512] = {
|
||||
[300] = "EXIT_TRACE",
|
||||
[301] = "SET_IP",
|
||||
[301] = "SAVE_IP",
|
||||
[302] = "_GUARD_BOTH_INT",
|
||||
[303] = "_BINARY_OP_MULTIPLY_INT",
|
||||
[304] = "_BINARY_OP_ADD_INT",
|
||||
|
|
|
@ -282,11 +282,6 @@ PyUnstable_Optimizer_NewCounter(void)
|
|||
|
||||
///////////////////// Experimental UOp Optimizer /////////////////////
|
||||
|
||||
#ifdef Py_DEBUG
|
||||
/* For debugging the interpreter: */
|
||||
# define LLTRACE 1 /* Low-level trace feature */
|
||||
#endif
|
||||
|
||||
static void
|
||||
uop_dealloc(_PyUOpExecutorObject *self) {
|
||||
PyObject_Free(self);
|
||||
|
@ -308,60 +303,81 @@ translate_bytecode_to_trace(
|
|||
_PyUOpInstruction *trace,
|
||||
int max_length)
|
||||
{
|
||||
#ifdef LLTRACE
|
||||
int trace_length = 0;
|
||||
|
||||
#ifdef Py_DEBUG
|
||||
char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG");
|
||||
int lltrace = 0;
|
||||
if (uop_debug != NULL && *uop_debug >= '0') {
|
||||
lltrace = *uop_debug - '0'; // TODO: Parse an int and all that
|
||||
}
|
||||
if (lltrace >= 4) {
|
||||
fprintf(stderr,
|
||||
"Optimizing %s (%s:%d) at offset %ld\n",
|
||||
PyUnicode_AsUTF8(code->co_qualname),
|
||||
PyUnicode_AsUTF8(code->co_filename),
|
||||
code->co_firstlineno,
|
||||
(long)(instr - (_Py_CODEUNIT *)code->co_code_adaptive));
|
||||
}
|
||||
#define ADD_TO_TRACE(OPCODE, OPERAND) \
|
||||
if (lltrace >= 2) { \
|
||||
const char *opname = (OPCODE) < 256 ? _PyOpcode_OpName[(OPCODE)] : _PyOpcode_uop_name[(OPCODE)]; \
|
||||
fprintf(stderr, " ADD_TO_TRACE(%s, %" PRIu64 ")\n", opname, (uint64_t)(OPERAND)); \
|
||||
} \
|
||||
trace[trace_length].opcode = (OPCODE); \
|
||||
trace[trace_length].operand = (OPERAND); \
|
||||
trace_length++;
|
||||
#define DPRINTF(level, ...) \
|
||||
if (lltrace >= (level)) { fprintf(stderr, __VA_ARGS__); }
|
||||
#else
|
||||
#define ADD_TO_TRACE(OPCODE, OPERAND) \
|
||||
trace[trace_length].opcode = (OPCODE); \
|
||||
trace[trace_length].operand = (OPERAND); \
|
||||
trace_length++;
|
||||
#define DPRINTF(level, ...)
|
||||
#endif
|
||||
|
||||
int trace_length = 0;
|
||||
// Always reserve space for one uop, plus SET_UP, plus EXIT_TRACE
|
||||
while (trace_length + 3 <= max_length) {
|
||||
#define ADD_TO_TRACE(OPCODE, OPERAND) \
|
||||
DPRINTF(2, \
|
||||
" ADD_TO_TRACE(%s, %" PRIu64 ")\n", \
|
||||
(OPCODE) < 256 ? _PyOpcode_OpName[(OPCODE)] : _PyOpcode_uop_name[(OPCODE)], \
|
||||
(uint64_t)(OPERAND)); \
|
||||
assert(trace_length < max_length); \
|
||||
trace[trace_length].opcode = (OPCODE); \
|
||||
trace[trace_length].operand = (OPERAND); \
|
||||
trace_length++;
|
||||
|
||||
DPRINTF(4,
|
||||
"Optimizing %s (%s:%d) at offset %ld\n",
|
||||
PyUnicode_AsUTF8(code->co_qualname),
|
||||
PyUnicode_AsUTF8(code->co_filename),
|
||||
code->co_firstlineno,
|
||||
(long)(instr - (_Py_CODEUNIT *)code->co_code_adaptive));
|
||||
|
||||
for (;;) {
|
||||
ADD_TO_TRACE(SAVE_IP, (int)(instr - (_Py_CODEUNIT *)code->co_code_adaptive));
|
||||
int opcode = instr->op.code;
|
||||
uint64_t operand = instr->op.arg;
|
||||
switch (opcode) {
|
||||
case LOAD_FAST_LOAD_FAST:
|
||||
case STORE_FAST_LOAD_FAST:
|
||||
case STORE_FAST_STORE_FAST:
|
||||
{
|
||||
// Reserve space for two uops (+ SETUP + EXIT_TRACE)
|
||||
// Reserve space for two uops (+ SAVE_IP + EXIT_TRACE)
|
||||
if (trace_length + 4 > max_length) {
|
||||
DPRINTF(1, "Ran out of space for LOAD_FAST_LOAD_FAST\n");
|
||||
goto done;
|
||||
}
|
||||
uint64_t oparg1 = operand >> 4;
|
||||
uint64_t oparg2 = operand & 15;
|
||||
ADD_TO_TRACE(LOAD_FAST, oparg1);
|
||||
ADD_TO_TRACE(LOAD_FAST, oparg2);
|
||||
switch (opcode) {
|
||||
case LOAD_FAST_LOAD_FAST:
|
||||
ADD_TO_TRACE(LOAD_FAST, oparg1);
|
||||
ADD_TO_TRACE(LOAD_FAST, oparg2);
|
||||
break;
|
||||
case STORE_FAST_LOAD_FAST:
|
||||
ADD_TO_TRACE(STORE_FAST, oparg1);
|
||||
ADD_TO_TRACE(LOAD_FAST, oparg2);
|
||||
break;
|
||||
case STORE_FAST_STORE_FAST:
|
||||
ADD_TO_TRACE(STORE_FAST, oparg1);
|
||||
ADD_TO_TRACE(STORE_FAST, oparg2);
|
||||
break;
|
||||
default:
|
||||
Py_FatalError("Missing case");
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
const struct opcode_macro_expansion *expansion = &_PyOpcode_macro_expansion[opcode];
|
||||
if (expansion->nuops > 0) {
|
||||
// Reserve space for nuops (+ SETUP + EXIT_TRACE)
|
||||
// Reserve space for nuops (+ SAVE_IP + EXIT_TRACE)
|
||||
int nuops = expansion->nuops;
|
||||
if (trace_length + nuops + 2 > max_length) {
|
||||
DPRINTF(1,
|
||||
"Ran out of space for %s\n",
|
||||
opcode < 256 ? _PyOpcode_OpName[opcode] : _PyOpcode_uop_name[opcode]);
|
||||
goto done;
|
||||
}
|
||||
for (int i = 0; i < nuops; i++) {
|
||||
|
@ -387,49 +403,45 @@ translate_bytecode_to_trace(
|
|||
Py_FatalError("garbled expansion");
|
||||
}
|
||||
ADD_TO_TRACE(expansion->uops[i].uop, operand);
|
||||
assert(expansion->uops[0].size == 0); // TODO
|
||||
}
|
||||
break;
|
||||
}
|
||||
// fprintf(stderr, "Unsupported opcode %d\n", opcode);
|
||||
goto done; // Break out of while loop
|
||||
DPRINTF(2,
|
||||
"Unsupported opcode %s\n",
|
||||
opcode < 256 ? _PyOpcode_OpName[opcode] : _PyOpcode_uop_name[opcode]);
|
||||
goto done; // Break out of loop
|
||||
}
|
||||
}
|
||||
instr++;
|
||||
// Add cache size for opcode
|
||||
instr += _PyOpcode_Caches[_PyOpcode_Deopt[opcode]];
|
||||
ADD_TO_TRACE(SET_IP, (int)(instr - (_Py_CODEUNIT *)code->co_code_adaptive));
|
||||
}
|
||||
|
||||
done:
|
||||
if (trace_length > 0) {
|
||||
// Skip short traces like SAVE_IP, LOAD_FAST, SAVE_IP, EXIT_TRACE
|
||||
if (trace_length > 3) {
|
||||
ADD_TO_TRACE(EXIT_TRACE, 0);
|
||||
#ifdef LLTRACE
|
||||
if (lltrace >= 1) {
|
||||
fprintf(stderr,
|
||||
"Created a trace for %s (%s:%d) at offset %ld -- length %d\n",
|
||||
PyUnicode_AsUTF8(code->co_qualname),
|
||||
PyUnicode_AsUTF8(code->co_filename),
|
||||
code->co_firstlineno,
|
||||
(long)(instr - (_Py_CODEUNIT *)code->co_code_adaptive),
|
||||
trace_length);
|
||||
}
|
||||
#endif
|
||||
DPRINTF(1,
|
||||
"Created a trace for %s (%s:%d) at offset %ld -- length %d\n",
|
||||
PyUnicode_AsUTF8(code->co_qualname),
|
||||
PyUnicode_AsUTF8(code->co_filename),
|
||||
code->co_firstlineno,
|
||||
(long)(instr - (_Py_CODEUNIT *)code->co_code_adaptive),
|
||||
trace_length);
|
||||
return trace_length;
|
||||
}
|
||||
else {
|
||||
#ifdef LLTRACE
|
||||
if (lltrace >= 4) {
|
||||
fprintf(stderr,
|
||||
"No trace for %s (%s:%d) at offset %ld\n",
|
||||
PyUnicode_AsUTF8(code->co_qualname),
|
||||
PyUnicode_AsUTF8(code->co_filename),
|
||||
code->co_firstlineno,
|
||||
(long)(instr - (_Py_CODEUNIT *)code->co_code_adaptive));
|
||||
}
|
||||
#endif
|
||||
DPRINTF(4,
|
||||
"No trace for %s (%s:%d) at offset %ld\n",
|
||||
PyUnicode_AsUTF8(code->co_qualname),
|
||||
PyUnicode_AsUTF8(code->co_filename),
|
||||
code->co_firstlineno,
|
||||
(long)(instr - (_Py_CODEUNIT *)code->co_code_adaptive));
|
||||
}
|
||||
return trace_length;
|
||||
return 0;
|
||||
|
||||
#undef ADD_TO_TRACE
|
||||
#undef DPRINTF
|
||||
}
|
||||
|
||||
static int
|
||||
|
|
|
@ -308,8 +308,7 @@ class ActiveCacheEffect:
|
|||
|
||||
|
||||
FORBIDDEN_NAMES_IN_UOPS = (
|
||||
"resume_with_error", # Proxy for "goto", which isn't an IDENTIFIER
|
||||
"unbound_local_error",
|
||||
"resume_with_error",
|
||||
"kwnames",
|
||||
"next_instr",
|
||||
"oparg1", # Proxy for super-instructions like LOAD_FAST_LOAD_FAST
|
||||
|
@ -401,20 +400,25 @@ class Instruction:
|
|||
def is_viable_uop(self) -> bool:
|
||||
"""Whether this instruction is viable as a uop."""
|
||||
if self.always_exits:
|
||||
# print(f"Skipping {self.name} because it always exits")
|
||||
return False
|
||||
if self.instr_flags.HAS_ARG_FLAG:
|
||||
# If the instruction uses oparg, it cannot use any caches
|
||||
if self.active_caches:
|
||||
# print(f"Skipping {self.name} because it uses oparg and caches")
|
||||
return False
|
||||
else:
|
||||
# If it doesn't use oparg, it can have one cache entry
|
||||
if len(self.active_caches) > 1:
|
||||
# print(f"Skipping {self.name} because it has >1 cache entries")
|
||||
return False
|
||||
res = True
|
||||
for forbidden in FORBIDDEN_NAMES_IN_UOPS:
|
||||
# TODO: Don't check in '#ifdef ENABLE_SPECIALIZATION' regions
|
||||
if variable_used(self.inst, forbidden):
|
||||
return False
|
||||
return True
|
||||
# print(f"Skipping {self.name} because it uses {forbidden}")
|
||||
res = False
|
||||
return res
|
||||
|
||||
def write(self, out: Formatter, tier: Tiers = TIER_ONE) -> None:
|
||||
"""Write one instruction, sans prologue and epilogue."""
|
||||
|
@ -1323,7 +1327,7 @@ class Analyzer:
|
|||
self.out.emit(make_text(name, counter))
|
||||
counter += 1
|
||||
add("EXIT_TRACE")
|
||||
add("SET_IP")
|
||||
add("SAVE_IP")
|
||||
for instr in self.instrs.values():
|
||||
if instr.kind == "op" and instr.is_viable_uop():
|
||||
add(instr.name)
|
||||
|
|
Loading…
Reference in New Issue