mirror of https://github.com/python/cpython
gh-104584: Baby steps towards generating and executing traces (#105924)
Added a new, experimental, tracing optimizer and interpreter (a.k.a. "tier 2"). This currently pessimizes, so don't use yet -- this is infrastructure so we can experiment with optimizing passes. To enable it, pass ``-Xuops`` or set ``PYTHONUOPS=1``. To get debug output, set ``PYTHONUOPSDEBUG=N`` where ``N`` is a debug level (0-4, where 0 is no debug output and 4 is excessively verbose). All of this code is likely to change dramatically before the 3.13 feature freeze. But this is a first step.
This commit is contained in:
parent
d3af83b934
commit
51fc725117
|
@ -86,6 +86,7 @@ Parser/token.c generated
|
|||
Programs/test_frozenmain.h generated
|
||||
Python/Python-ast.c generated
|
||||
Python/generated_cases.c.h generated
|
||||
Python/executor_cases.c.h generated
|
||||
Python/opcode_targets.h generated
|
||||
Python/stdlib_module_names.h generated
|
||||
Tools/peg_generator/pegen/grammar_parser.py generated
|
||||
|
|
|
@ -45,6 +45,7 @@ extern _PyOptimizerObject _PyOptimizer_Default;
|
|||
|
||||
/* For testing */
|
||||
PyAPI_FUNC(PyObject *)PyUnstable_Optimizer_NewCounter(void);
|
||||
PyAPI_FUNC(PyObject *)PyUnstable_Optimizer_NewUOpOptimizer(void);
|
||||
|
||||
#define OPTIMIZER_BITS_IN_COUNTER 4
|
||||
|
||||
|
|
|
@ -0,0 +1,31 @@
|
|||
#ifndef Py_INTERNAL_UOPS_H
|
||||
#define Py_INTERNAL_UOPS_H
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifndef Py_BUILD_CORE
|
||||
# error "this header requires Py_BUILD_CORE define"
|
||||
#endif
|
||||
|
||||
#define _Py_UOP_MAX_TRACE_LENGTH 16
|
||||
|
||||
typedef struct {
|
||||
int opcode;
|
||||
uint64_t operand; // Sometimes oparg, sometimes a cache entry
|
||||
} _PyUOpInstruction;
|
||||
|
||||
typedef struct {
|
||||
_PyExecutorObject base;
|
||||
_PyUOpInstruction trace[_Py_UOP_MAX_TRACE_LENGTH]; // TODO: variable length
|
||||
} _PyUOpExecutorObject;
|
||||
|
||||
_PyInterpreterFrame *_PyUopExecute(
|
||||
_PyExecutorObject *executor,
|
||||
_PyInterpreterFrame *frame,
|
||||
PyObject **stack_pointer);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif /* !Py_INTERNAL_UOPS_H */
|
|
@ -71,6 +71,9 @@ typedef struct _object_stats {
|
|||
uint64_t type_cache_dunder_misses;
|
||||
uint64_t type_cache_collisions;
|
||||
uint64_t optimization_attempts;
|
||||
uint64_t optimization_traces_created;
|
||||
uint64_t optimization_traces_executed;
|
||||
uint64_t optimization_uops_executed;
|
||||
} ObjectStats;
|
||||
|
||||
typedef struct _stats {
|
||||
|
|
|
@ -1542,19 +1542,9 @@ regen-opcode-targets:
|
|||
|
||||
.PHONY: regen-cases
|
||||
regen-cases:
|
||||
# Regenerate Python/generated_cases.c.h
|
||||
# and Python/opcode_metadata.h
|
||||
# from Python/bytecodes.c
|
||||
# using Tools/cases_generator/generate_cases.py
|
||||
# Regenerate various files from Python/bytecodes.c
|
||||
PYTHONPATH=$(srcdir)/Tools/cases_generator \
|
||||
$(PYTHON_FOR_REGEN) \
|
||||
$(srcdir)/Tools/cases_generator/generate_cases.py \
|
||||
--emit-line-directives \
|
||||
-o $(srcdir)/Python/generated_cases.c.h.new \
|
||||
-m $(srcdir)/Python/opcode_metadata.h.new \
|
||||
$(srcdir)/Python/bytecodes.c
|
||||
$(UPDATE_FILE) $(srcdir)/Python/generated_cases.c.h $(srcdir)/Python/generated_cases.c.h.new
|
||||
$(UPDATE_FILE) $(srcdir)/Python/opcode_metadata.h $(srcdir)/Python/opcode_metadata.h.new
|
||||
$(PYTHON_FOR_REGEN) $(srcdir)/Tools/cases_generator/generate_cases.py -l
|
||||
|
||||
Python/compile.o: $(srcdir)/Python/opcode_metadata.h
|
||||
|
||||
|
@ -1565,6 +1555,13 @@ Python/ceval.o: \
|
|||
$(srcdir)/Python/opcode_metadata.h \
|
||||
$(srcdir)/Python/opcode_targets.h
|
||||
|
||||
Python/flowgraph.o: \
|
||||
$(srcdir)/Python/opcode_metadata.h
|
||||
|
||||
Python/optimizer.o: \
|
||||
$(srcdir)/Python/executor_cases.c.h \
|
||||
$(srcdir)/Python/opcode_metadata.h
|
||||
|
||||
Python/frozen.o: $(FROZEN_FILES_OUT)
|
||||
|
||||
# Generate DTrace probe macros, then rename them (PYTHON_ -> PyDTrace_) to
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Added a new, experimental, tracing optimizer and interpreter (a.k.a. "tier 2"). This currently pessimizes, so don't use yet -- this is infrastructure so we can experiment with optimizing passes. To enable it, pass ``-Xuops`` or set ``PYTHONUOPS=1``. To get debug output, set ``PYTHONUOPSDEBUG=N`` where ``N`` is a debug level (0-4, where 0 is no debug output and 4 is excessively verbose).
|
|
@ -830,6 +830,12 @@ get_counter_optimizer(PyObject *self, PyObject *arg)
|
|||
return PyUnstable_Optimizer_NewCounter();
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
get_uop_optimizer(PyObject *self, PyObject *arg)
|
||||
{
|
||||
return PyUnstable_Optimizer_NewUOpOptimizer();
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
set_optimizer(PyObject *self, PyObject *opt)
|
||||
{
|
||||
|
@ -994,6 +1000,7 @@ static PyMethodDef module_functions[] = {
|
|||
{"get_optimizer", get_optimizer, METH_NOARGS, NULL},
|
||||
{"set_optimizer", set_optimizer, METH_O, NULL},
|
||||
{"get_counter_optimizer", get_counter_optimizer, METH_NOARGS, NULL},
|
||||
{"get_uop_optimizer", get_uop_optimizer, METH_NOARGS, NULL},
|
||||
{"pending_threadfunc", _PyCFunction_CAST(pending_threadfunc),
|
||||
METH_VARARGS | METH_KEYWORDS},
|
||||
// {"pending_fd_identify", pending_fd_identify, METH_VARARGS, NULL},
|
||||
|
|
|
@ -52,8 +52,6 @@
|
|||
#define family(name, ...) static int family_##name
|
||||
#define pseudo(name) static int pseudo_##name
|
||||
|
||||
typedef PyObject *(*convertion_func_ptr)(PyObject *);
|
||||
|
||||
// Dummy variables for stack effects.
|
||||
static PyObject *value, *value1, *value2, *left, *right, *res, *sum, *prod, *sub;
|
||||
static PyObject *container, *start, *stop, *v, *lhs, *rhs, *res2;
|
||||
|
@ -2182,7 +2180,7 @@ dummy_func(
|
|||
frame = executor->execute(executor, frame, stack_pointer);
|
||||
if (frame == NULL) {
|
||||
frame = cframe.current_frame;
|
||||
goto error;
|
||||
goto resume_with_error;
|
||||
}
|
||||
goto resume_frame;
|
||||
}
|
||||
|
|
137
Python/ceval.c
137
Python/ceval.c
|
@ -22,6 +22,7 @@
|
|||
#include "pycore_sysmodule.h" // _PySys_Audit()
|
||||
#include "pycore_tuple.h" // _PyTuple_ITEMS()
|
||||
#include "pycore_typeobject.h" // _PySuper_Lookup()
|
||||
#include "pycore_uops.h" // _PyUOpExecutorObject
|
||||
#include "pycore_emscripten_signal.h" // _Py_CHECK_EMSCRIPTEN_SIGNALS
|
||||
|
||||
#include "pycore_dict.h"
|
||||
|
@ -223,14 +224,6 @@ _PyEvalFramePushAndInit_Ex(PyThreadState *tstate, PyFunctionObject *func,
|
|||
static void
|
||||
_PyEvalFrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame *frame);
|
||||
|
||||
typedef PyObject *(*convertion_func_ptr)(PyObject *);
|
||||
|
||||
static const convertion_func_ptr CONVERSION_FUNCTIONS[4] = {
|
||||
[FVC_STR] = PyObject_Str,
|
||||
[FVC_REPR] = PyObject_Repr,
|
||||
[FVC_ASCII] = PyObject_ASCII
|
||||
};
|
||||
|
||||
#define UNBOUNDLOCAL_ERROR_MSG \
|
||||
"cannot access local variable '%s' where it is not associated with a value"
|
||||
#define UNBOUNDFREE_ERROR_MSG \
|
||||
|
@ -2771,3 +2764,131 @@ void Py_LeaveRecursiveCall(void)
|
|||
{
|
||||
_Py_LeaveRecursiveCall();
|
||||
}
|
||||
|
||||
///////////////////// Experimental UOp Interpreter /////////////////////
|
||||
|
||||
// UPDATE_MISS_STATS (called by DEOPT_IF) uses next_instr
|
||||
// TODO: Make it do something useful
|
||||
#undef UPDATE_MISS_STATS
|
||||
#define UPDATE_MISS_STATS(INSTNAME) ((void)0)
|
||||
|
||||
_PyInterpreterFrame *
|
||||
_PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject **stack_pointer)
|
||||
{
|
||||
#ifdef LLTRACE
|
||||
char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG");
|
||||
int lltrace = 0;
|
||||
if (uop_debug != NULL && *uop_debug >= '0') {
|
||||
lltrace = *uop_debug - '0'; // TODO: Parse an int and all that
|
||||
}
|
||||
if (lltrace >= 2) {
|
||||
PyCodeObject *code = _PyFrame_GetCode(frame);
|
||||
_Py_CODEUNIT *instr = frame->prev_instr + 1;
|
||||
fprintf(stderr,
|
||||
"Entering _PyUopExecute for %s (%s:%d) at offset %ld\n",
|
||||
PyUnicode_AsUTF8(code->co_qualname),
|
||||
PyUnicode_AsUTF8(code->co_filename),
|
||||
code->co_firstlineno,
|
||||
(long)(instr - (_Py_CODEUNIT *)code->co_code_adaptive));
|
||||
}
|
||||
#endif
|
||||
|
||||
PyThreadState *tstate = _PyThreadState_GET();
|
||||
_PyUOpExecutorObject *self = (_PyUOpExecutorObject *)executor;
|
||||
|
||||
// Equivalent to CHECK_EVAL_BREAKER()
|
||||
_Py_CHECK_EMSCRIPTEN_SIGNALS_PERIODICALLY();
|
||||
if (_Py_atomic_load_relaxed_int32(&tstate->interp->ceval.eval_breaker)) {
|
||||
if (_Py_HandlePending(tstate) != 0) {
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
OBJECT_STAT_INC(optimization_traces_executed);
|
||||
_Py_CODEUNIT *ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive - 1;
|
||||
int pc = 0;
|
||||
int opcode;
|
||||
uint64_t operand;
|
||||
int oparg;
|
||||
for (;;) {
|
||||
opcode = self->trace[pc].opcode;
|
||||
operand = self->trace[pc].operand;
|
||||
oparg = (int)operand;
|
||||
#ifdef LLTRACE
|
||||
if (lltrace >= 3) {
|
||||
const char *opname = opcode < 256 ? _PyOpcode_OpName[opcode] : "";
|
||||
int stack_level = (int)(stack_pointer - _PyFrame_Stackbase(frame));
|
||||
fprintf(stderr, " uop %s %d, operand %" PRIu64 ", stack_level %d\n",
|
||||
opname, opcode, operand, stack_level);
|
||||
}
|
||||
#endif
|
||||
pc++;
|
||||
OBJECT_STAT_INC(optimization_uops_executed);
|
||||
switch (opcode) {
|
||||
|
||||
#undef ENABLE_SPECIALIZATION
|
||||
#define ENABLE_SPECIALIZATION 0
|
||||
#include "executor_cases.c.h"
|
||||
|
||||
case SET_IP:
|
||||
{
|
||||
frame->prev_instr = ip_offset + oparg;
|
||||
break;
|
||||
}
|
||||
|
||||
case EXIT_TRACE:
|
||||
{
|
||||
_PyFrame_SetStackPointer(frame, stack_pointer);
|
||||
Py_DECREF(self);
|
||||
return frame;
|
||||
}
|
||||
|
||||
default:
|
||||
{
|
||||
fprintf(stderr, "Unknown uop %d, operand %" PRIu64 "\n", opcode, operand);
|
||||
Py_FatalError("Unknown uop");
|
||||
abort(); // Unreachable
|
||||
for (;;) {}
|
||||
// Really unreachable
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
pop_4_error:
|
||||
STACK_SHRINK(1);
|
||||
pop_3_error:
|
||||
STACK_SHRINK(1);
|
||||
pop_2_error:
|
||||
STACK_SHRINK(1);
|
||||
pop_1_error:
|
||||
STACK_SHRINK(1);
|
||||
error:
|
||||
// On ERROR_IF we return NULL as the frame.
|
||||
// The caller recovers the frame from cframe.current_frame.
|
||||
#ifdef LLTRACE
|
||||
if (lltrace >= 2) {
|
||||
fprintf(stderr, "Error: [Opcode %d, operand %" PRIu64 "]\n", opcode, operand);
|
||||
}
|
||||
#endif
|
||||
_PyFrame_SetStackPointer(frame, stack_pointer);
|
||||
Py_DECREF(self);
|
||||
return NULL;
|
||||
|
||||
PREDICTED(UNPACK_SEQUENCE)
|
||||
PREDICTED(COMPARE_OP)
|
||||
PREDICTED(LOAD_SUPER_ATTR)
|
||||
PREDICTED(STORE_SUBSCR)
|
||||
PREDICTED(BINARY_SUBSCR)
|
||||
PREDICTED(BINARY_OP)
|
||||
// On DEOPT_IF we just repeat the last instruction.
|
||||
// This presumes nothing was popped from the stack (nor pushed).
|
||||
#ifdef LLTRACE
|
||||
if (lltrace >= 2) {
|
||||
fprintf(stderr, "DEOPT: [Opcode %d, operand %" PRIu64 "]\n", opcode, operand);
|
||||
}
|
||||
#endif
|
||||
_PyFrame_SetStackPointer(frame, stack_pointer);
|
||||
Py_DECREF(self);
|
||||
return frame;
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Macros needed by ceval.c and bytecodes.c
|
||||
// Macros and other things needed by ceval.c and bytecodes.c
|
||||
|
||||
/* Computed GOTOs, or
|
||||
the-optimization-commonly-but-improperly-known-as-"threaded code"
|
||||
|
@ -339,3 +339,11 @@ do { \
|
|||
goto error; \
|
||||
} \
|
||||
} while (0);
|
||||
|
||||
typedef PyObject *(*convertion_func_ptr)(PyObject *);
|
||||
|
||||
static const convertion_func_ptr CONVERSION_FUNCTIONS[4] = {
|
||||
[FVC_STR] = PyObject_Str,
|
||||
[FVC_REPR] = PyObject_Repr,
|
||||
[FVC_ASCII] = PyObject_ASCII
|
||||
};
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -18,6 +18,21 @@
|
|||
((OP) == POP_BLOCK) || \
|
||||
0
|
||||
|
||||
#define EXIT_TRACE 300
|
||||
#define SET_IP 301
|
||||
#define _GUARD_BOTH_INT 302
|
||||
#define _BINARY_OP_MULTIPLY_INT 303
|
||||
#define _BINARY_OP_ADD_INT 304
|
||||
#define _BINARY_OP_SUBTRACT_INT 305
|
||||
#define _GUARD_BOTH_FLOAT 306
|
||||
#define _BINARY_OP_MULTIPLY_FLOAT 307
|
||||
#define _BINARY_OP_ADD_FLOAT 308
|
||||
#define _BINARY_OP_SUBTRACT_FLOAT 309
|
||||
#define _GUARD_BOTH_UNICODE 310
|
||||
#define _BINARY_OP_ADD_UNICODE 311
|
||||
#define _LOAD_LOCALS 312
|
||||
#define _LOAD_FROM_DICT_OR_GLOBALS 313
|
||||
|
||||
#ifndef NEED_OPCODE_METADATA
|
||||
extern int _PyOpcode_num_popped(int opcode, int oparg, bool jump);
|
||||
#else
|
||||
|
@ -885,12 +900,19 @@ struct opcode_metadata {
|
|||
int flags;
|
||||
};
|
||||
|
||||
struct opcode_macro_expansion {
|
||||
int nuops;
|
||||
struct { int16_t uop; int8_t size; int8_t offset; } uops[8];
|
||||
};
|
||||
|
||||
|
||||
#define OPCODE_METADATA_FMT(OP) (_PyOpcode_opcode_metadata[(OP)].instr_format)
|
||||
#define SAME_OPCODE_METADATA(OP1, OP2) \
|
||||
(OPCODE_METADATA_FMT(OP1) == OPCODE_METADATA_FMT(OP2))
|
||||
|
||||
#ifndef NEED_OPCODE_METADATA
|
||||
extern const struct opcode_metadata _PyOpcode_opcode_metadata[512];
|
||||
extern const struct opcode_macro_expansion _PyOpcode_macro_expansion[256];
|
||||
#else
|
||||
const struct opcode_metadata _PyOpcode_opcode_metadata[512] = {
|
||||
[NOP] = { true, INSTR_FMT_IX, 0 },
|
||||
|
@ -1101,4 +1123,88 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[512] = {
|
|||
[CACHE] = { true, INSTR_FMT_IX, 0 },
|
||||
[RESERVED] = { true, INSTR_FMT_IX, 0 },
|
||||
};
|
||||
const struct opcode_macro_expansion _PyOpcode_macro_expansion[256] = {
|
||||
[NOP] = { .nuops = 1, .uops = { { NOP, 0, 0 } } },
|
||||
[LOAD_FAST] = { .nuops = 1, .uops = { { LOAD_FAST, 0, 0 } } },
|
||||
[LOAD_FAST_AND_CLEAR] = { .nuops = 1, .uops = { { LOAD_FAST_AND_CLEAR, 0, 0 } } },
|
||||
[LOAD_CONST] = { .nuops = 1, .uops = { { LOAD_CONST, 0, 0 } } },
|
||||
[STORE_FAST] = { .nuops = 1, .uops = { { STORE_FAST, 0, 0 } } },
|
||||
[POP_TOP] = { .nuops = 1, .uops = { { POP_TOP, 0, 0 } } },
|
||||
[PUSH_NULL] = { .nuops = 1, .uops = { { PUSH_NULL, 0, 0 } } },
|
||||
[END_SEND] = { .nuops = 1, .uops = { { END_SEND, 0, 0 } } },
|
||||
[UNARY_NEGATIVE] = { .nuops = 1, .uops = { { UNARY_NEGATIVE, 0, 0 } } },
|
||||
[UNARY_NOT] = { .nuops = 1, .uops = { { UNARY_NOT, 0, 0 } } },
|
||||
[UNARY_INVERT] = { .nuops = 1, .uops = { { UNARY_INVERT, 0, 0 } } },
|
||||
[BINARY_SLICE] = { .nuops = 1, .uops = { { BINARY_SLICE, 0, 0 } } },
|
||||
[STORE_SLICE] = { .nuops = 1, .uops = { { STORE_SLICE, 0, 0 } } },
|
||||
[BINARY_SUBSCR_LIST_INT] = { .nuops = 1, .uops = { { BINARY_SUBSCR_LIST_INT, 0, 0 } } },
|
||||
[BINARY_SUBSCR_TUPLE_INT] = { .nuops = 1, .uops = { { BINARY_SUBSCR_TUPLE_INT, 0, 0 } } },
|
||||
[BINARY_SUBSCR_DICT] = { .nuops = 1, .uops = { { BINARY_SUBSCR_DICT, 0, 0 } } },
|
||||
[LIST_APPEND] = { .nuops = 1, .uops = { { LIST_APPEND, 0, 0 } } },
|
||||
[SET_ADD] = { .nuops = 1, .uops = { { SET_ADD, 0, 0 } } },
|
||||
[STORE_SUBSCR_LIST_INT] = { .nuops = 1, .uops = { { STORE_SUBSCR_LIST_INT, 0, 0 } } },
|
||||
[STORE_SUBSCR_DICT] = { .nuops = 1, .uops = { { STORE_SUBSCR_DICT, 0, 0 } } },
|
||||
[DELETE_SUBSCR] = { .nuops = 1, .uops = { { DELETE_SUBSCR, 0, 0 } } },
|
||||
[CALL_INTRINSIC_1] = { .nuops = 1, .uops = { { CALL_INTRINSIC_1, 0, 0 } } },
|
||||
[CALL_INTRINSIC_2] = { .nuops = 1, .uops = { { CALL_INTRINSIC_2, 0, 0 } } },
|
||||
[GET_AITER] = { .nuops = 1, .uops = { { GET_AITER, 0, 0 } } },
|
||||
[GET_ANEXT] = { .nuops = 1, .uops = { { GET_ANEXT, 0, 0 } } },
|
||||
[GET_AWAITABLE] = { .nuops = 1, .uops = { { GET_AWAITABLE, 0, 0 } } },
|
||||
[POP_EXCEPT] = { .nuops = 1, .uops = { { POP_EXCEPT, 0, 0 } } },
|
||||
[LOAD_ASSERTION_ERROR] = { .nuops = 1, .uops = { { LOAD_ASSERTION_ERROR, 0, 0 } } },
|
||||
[LOAD_BUILD_CLASS] = { .nuops = 1, .uops = { { LOAD_BUILD_CLASS, 0, 0 } } },
|
||||
[STORE_NAME] = { .nuops = 1, .uops = { { STORE_NAME, 0, 0 } } },
|
||||
[DELETE_NAME] = { .nuops = 1, .uops = { { DELETE_NAME, 0, 0 } } },
|
||||
[UNPACK_SEQUENCE_TWO_TUPLE] = { .nuops = 1, .uops = { { UNPACK_SEQUENCE_TWO_TUPLE, 0, 0 } } },
|
||||
[UNPACK_SEQUENCE_TUPLE] = { .nuops = 1, .uops = { { UNPACK_SEQUENCE_TUPLE, 0, 0 } } },
|
||||
[UNPACK_SEQUENCE_LIST] = { .nuops = 1, .uops = { { UNPACK_SEQUENCE_LIST, 0, 0 } } },
|
||||
[UNPACK_EX] = { .nuops = 1, .uops = { { UNPACK_EX, 0, 0 } } },
|
||||
[DELETE_ATTR] = { .nuops = 1, .uops = { { DELETE_ATTR, 0, 0 } } },
|
||||
[STORE_GLOBAL] = { .nuops = 1, .uops = { { STORE_GLOBAL, 0, 0 } } },
|
||||
[DELETE_GLOBAL] = { .nuops = 1, .uops = { { DELETE_GLOBAL, 0, 0 } } },
|
||||
[DELETE_DEREF] = { .nuops = 1, .uops = { { DELETE_DEREF, 0, 0 } } },
|
||||
[LOAD_FROM_DICT_OR_DEREF] = { .nuops = 1, .uops = { { LOAD_FROM_DICT_OR_DEREF, 0, 0 } } },
|
||||
[LOAD_DEREF] = { .nuops = 1, .uops = { { LOAD_DEREF, 0, 0 } } },
|
||||
[STORE_DEREF] = { .nuops = 1, .uops = { { STORE_DEREF, 0, 0 } } },
|
||||
[COPY_FREE_VARS] = { .nuops = 1, .uops = { { COPY_FREE_VARS, 0, 0 } } },
|
||||
[BUILD_STRING] = { .nuops = 1, .uops = { { BUILD_STRING, 0, 0 } } },
|
||||
[BUILD_TUPLE] = { .nuops = 1, .uops = { { BUILD_TUPLE, 0, 0 } } },
|
||||
[BUILD_LIST] = { .nuops = 1, .uops = { { BUILD_LIST, 0, 0 } } },
|
||||
[LIST_EXTEND] = { .nuops = 1, .uops = { { LIST_EXTEND, 0, 0 } } },
|
||||
[SET_UPDATE] = { .nuops = 1, .uops = { { SET_UPDATE, 0, 0 } } },
|
||||
[BUILD_SET] = { .nuops = 1, .uops = { { BUILD_SET, 0, 0 } } },
|
||||
[BUILD_MAP] = { .nuops = 1, .uops = { { BUILD_MAP, 0, 0 } } },
|
||||
[SETUP_ANNOTATIONS] = { .nuops = 1, .uops = { { SETUP_ANNOTATIONS, 0, 0 } } },
|
||||
[BUILD_CONST_KEY_MAP] = { .nuops = 1, .uops = { { BUILD_CONST_KEY_MAP, 0, 0 } } },
|
||||
[DICT_UPDATE] = { .nuops = 1, .uops = { { DICT_UPDATE, 0, 0 } } },
|
||||
[DICT_MERGE] = { .nuops = 1, .uops = { { DICT_MERGE, 0, 0 } } },
|
||||
[MAP_ADD] = { .nuops = 1, .uops = { { MAP_ADD, 0, 0 } } },
|
||||
[LOAD_SUPER_ATTR_ATTR] = { .nuops = 1, .uops = { { LOAD_SUPER_ATTR_ATTR, 0, 0 } } },
|
||||
[LOAD_SUPER_ATTR_METHOD] = { .nuops = 1, .uops = { { LOAD_SUPER_ATTR_METHOD, 0, 0 } } },
|
||||
[COMPARE_OP_FLOAT] = { .nuops = 1, .uops = { { COMPARE_OP_FLOAT, 0, 0 } } },
|
||||
[COMPARE_OP_INT] = { .nuops = 1, .uops = { { COMPARE_OP_INT, 0, 0 } } },
|
||||
[COMPARE_OP_STR] = { .nuops = 1, .uops = { { COMPARE_OP_STR, 0, 0 } } },
|
||||
[IS_OP] = { .nuops = 1, .uops = { { IS_OP, 0, 0 } } },
|
||||
[CONTAINS_OP] = { .nuops = 1, .uops = { { CONTAINS_OP, 0, 0 } } },
|
||||
[CHECK_EG_MATCH] = { .nuops = 1, .uops = { { CHECK_EG_MATCH, 0, 0 } } },
|
||||
[CHECK_EXC_MATCH] = { .nuops = 1, .uops = { { CHECK_EXC_MATCH, 0, 0 } } },
|
||||
[GET_LEN] = { .nuops = 1, .uops = { { GET_LEN, 0, 0 } } },
|
||||
[MATCH_CLASS] = { .nuops = 1, .uops = { { MATCH_CLASS, 0, 0 } } },
|
||||
[MATCH_MAPPING] = { .nuops = 1, .uops = { { MATCH_MAPPING, 0, 0 } } },
|
||||
[MATCH_SEQUENCE] = { .nuops = 1, .uops = { { MATCH_SEQUENCE, 0, 0 } } },
|
||||
[MATCH_KEYS] = { .nuops = 1, .uops = { { MATCH_KEYS, 0, 0 } } },
|
||||
[GET_ITER] = { .nuops = 1, .uops = { { GET_ITER, 0, 0 } } },
|
||||
[GET_YIELD_FROM_ITER] = { .nuops = 1, .uops = { { GET_YIELD_FROM_ITER, 0, 0 } } },
|
||||
[WITH_EXCEPT_START] = { .nuops = 1, .uops = { { WITH_EXCEPT_START, 0, 0 } } },
|
||||
[PUSH_EXC_INFO] = { .nuops = 1, .uops = { { PUSH_EXC_INFO, 0, 0 } } },
|
||||
[EXIT_INIT_CHECK] = { .nuops = 1, .uops = { { EXIT_INIT_CHECK, 0, 0 } } },
|
||||
[MAKE_FUNCTION] = { .nuops = 1, .uops = { { MAKE_FUNCTION, 0, 0 } } },
|
||||
[SET_FUNCTION_ATTRIBUTE] = { .nuops = 1, .uops = { { SET_FUNCTION_ATTRIBUTE, 0, 0 } } },
|
||||
[BUILD_SLICE] = { .nuops = 1, .uops = { { BUILD_SLICE, 0, 0 } } },
|
||||
[CONVERT_VALUE] = { .nuops = 1, .uops = { { CONVERT_VALUE, 0, 0 } } },
|
||||
[FORMAT_SIMPLE] = { .nuops = 1, .uops = { { FORMAT_SIMPLE, 0, 0 } } },
|
||||
[FORMAT_WITH_SPEC] = { .nuops = 1, .uops = { { FORMAT_WITH_SPEC, 0, 0 } } },
|
||||
[COPY] = { .nuops = 1, .uops = { { COPY, 0, 0 } } },
|
||||
[SWAP] = { .nuops = 1, .uops = { { SWAP, 0, 0 } } },
|
||||
};
|
||||
#endif
|
||||
|
|
|
@ -3,7 +3,9 @@
|
|||
#include "opcode.h"
|
||||
#include "pycore_interp.h"
|
||||
#include "pycore_opcode.h"
|
||||
#include "opcode_metadata.h"
|
||||
#include "pycore_pystate.h"
|
||||
#include "pycore_uops.h"
|
||||
#include "cpython/optimizer.h"
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
|
@ -278,3 +280,200 @@ PyUnstable_Optimizer_NewCounter(void)
|
|||
opt->count = 0;
|
||||
return (PyObject *)opt;
|
||||
}
|
||||
|
||||
///////////////////// Experimental UOp Optimizer /////////////////////
|
||||
|
||||
#ifdef Py_DEBUG
|
||||
/* For debugging the interpreter: */
|
||||
# define LLTRACE 1 /* Low-level trace feature */
|
||||
#endif
|
||||
|
||||
static void
|
||||
uop_dealloc(_PyUOpExecutorObject *self) {
|
||||
PyObject_Free(self);
|
||||
}
|
||||
|
||||
static PyTypeObject UOpExecutor_Type = {
|
||||
PyVarObject_HEAD_INIT(&PyType_Type, 0)
|
||||
.tp_name = "uop_executor",
|
||||
.tp_basicsize = sizeof(_PyUOpExecutorObject),
|
||||
.tp_itemsize = 0,
|
||||
.tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION,
|
||||
.tp_dealloc = (destructor)uop_dealloc,
|
||||
};
|
||||
|
||||
static int
|
||||
translate_bytecode_to_trace(
|
||||
PyCodeObject *code,
|
||||
_Py_CODEUNIT *instr,
|
||||
_PyUOpInstruction *trace,
|
||||
int max_length)
|
||||
{
|
||||
#ifdef LLTRACE
|
||||
char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG");
|
||||
int lltrace = 0;
|
||||
if (uop_debug != NULL && *uop_debug >= '0') {
|
||||
lltrace = *uop_debug - '0'; // TODO: Parse an int and all that
|
||||
}
|
||||
if (lltrace >= 4) {
|
||||
fprintf(stderr,
|
||||
"Optimizing %s (%s:%d) at offset %ld\n",
|
||||
PyUnicode_AsUTF8(code->co_qualname),
|
||||
PyUnicode_AsUTF8(code->co_filename),
|
||||
code->co_firstlineno,
|
||||
(long)(instr - (_Py_CODEUNIT *)code->co_code_adaptive));
|
||||
}
|
||||
#define ADD_TO_TRACE(OPCODE, OPERAND) \
|
||||
if (lltrace >= 2) { \
|
||||
const char *opname = (OPCODE) < 256 ? _PyOpcode_OpName[(OPCODE)] : ""; \
|
||||
fprintf(stderr, " ADD_TO_TRACE(%s %d, %" PRIu64 ")\n", opname, (OPCODE), (uint64_t)(OPERAND)); \
|
||||
} \
|
||||
trace[trace_length].opcode = (OPCODE); \
|
||||
trace[trace_length].operand = (OPERAND); \
|
||||
trace_length++;
|
||||
#else
|
||||
#define ADD_TO_TRACE(OPCODE, OPERAND) \
|
||||
trace[trace_length].opcode = (OPCODE); \
|
||||
trace[trace_length].operand = (OPERAND); \
|
||||
trace_length++;
|
||||
#endif
|
||||
|
||||
int trace_length = 0;
|
||||
// Always reserve space for one uop, plus SET_UP, plus EXIT_TRACE
|
||||
while (trace_length + 3 <= max_length) {
|
||||
int opcode = instr->op.code;
|
||||
uint64_t operand = instr->op.arg;
|
||||
switch (opcode) {
|
||||
case LOAD_FAST_LOAD_FAST:
|
||||
{
|
||||
// Reserve space for two uops (+ SETUP + EXIT_TRACE)
|
||||
if (trace_length + 4 > max_length) {
|
||||
goto done;
|
||||
}
|
||||
uint64_t oparg1 = operand >> 4;
|
||||
uint64_t oparg2 = operand & 15;
|
||||
ADD_TO_TRACE(LOAD_FAST, oparg1);
|
||||
ADD_TO_TRACE(LOAD_FAST, oparg2);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
const struct opcode_macro_expansion *expansion = &_PyOpcode_macro_expansion[opcode];
|
||||
if (expansion->nuops > 0) {
|
||||
// Reserve space for nuops (+ SETUP + EXIT_TRACE)
|
||||
int nuops = expansion->nuops;
|
||||
if (trace_length + nuops + 2 > max_length) {
|
||||
goto done;
|
||||
}
|
||||
for (int i = 0; i < nuops; i++) {
|
||||
int offset = expansion->uops[i].offset;
|
||||
switch (expansion->uops[i].size) {
|
||||
case 0:
|
||||
break;
|
||||
case 1:
|
||||
operand = read_u16(&instr[offset].cache);
|
||||
break;
|
||||
case 2:
|
||||
operand = read_u32(&instr[offset].cache);
|
||||
break;
|
||||
case 4:
|
||||
operand = read_u64(&instr[offset].cache);
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr,
|
||||
"opcode=%d, operand=%" PRIu64 "; nuops=%d, i=%d; size=%d, offset=%d\n",
|
||||
opcode, operand, nuops, i,
|
||||
expansion->uops[i].size,
|
||||
expansion->uops[i].offset);
|
||||
Py_FatalError("garbled expansion");
|
||||
}
|
||||
ADD_TO_TRACE(expansion->uops[i].uop, operand);
|
||||
assert(expansion->uops[0].size == 0); // TODO
|
||||
}
|
||||
break;
|
||||
}
|
||||
// fprintf(stderr, "Unsupported opcode %d\n", opcode);
|
||||
goto done; // Break out of while loop
|
||||
}
|
||||
}
|
||||
instr++;
|
||||
// Add cache size for opcode
|
||||
instr += _PyOpcode_Caches[_PyOpcode_Deopt[opcode]];
|
||||
ADD_TO_TRACE(SET_IP, (int)(instr - (_Py_CODEUNIT *)code->co_code_adaptive));
|
||||
}
|
||||
done:
|
||||
if (trace_length > 0) {
|
||||
ADD_TO_TRACE(EXIT_TRACE, 0);
|
||||
#ifdef LLTRACE
|
||||
if (lltrace >= 1) {
|
||||
fprintf(stderr,
|
||||
"Created a trace for %s (%s:%d) at offset %ld -- length %d\n",
|
||||
PyUnicode_AsUTF8(code->co_qualname),
|
||||
PyUnicode_AsUTF8(code->co_filename),
|
||||
code->co_firstlineno,
|
||||
(long)(instr - (_Py_CODEUNIT *)code->co_code_adaptive),
|
||||
trace_length);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
else {
|
||||
#ifdef LLTRACE
|
||||
if (lltrace >= 4) {
|
||||
fprintf(stderr,
|
||||
"No trace for %s (%s:%d) at offset %ld\n",
|
||||
PyUnicode_AsUTF8(code->co_qualname),
|
||||
PyUnicode_AsUTF8(code->co_filename),
|
||||
code->co_firstlineno,
|
||||
(long)(instr - (_Py_CODEUNIT *)code->co_code_adaptive));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
return trace_length;
|
||||
|
||||
#undef ADD_TO_TRACE
|
||||
}
|
||||
|
||||
static int
|
||||
uop_optimize(
|
||||
_PyOptimizerObject *self,
|
||||
PyCodeObject *code,
|
||||
_Py_CODEUNIT *instr,
|
||||
_PyExecutorObject **exec_ptr)
|
||||
{
|
||||
_PyUOpInstruction trace[_Py_UOP_MAX_TRACE_LENGTH];
|
||||
int trace_length = translate_bytecode_to_trace(code, instr, trace, _Py_UOP_MAX_TRACE_LENGTH);
|
||||
if (trace_length <= 0) {
|
||||
// Error or nothing translated
|
||||
return trace_length;
|
||||
}
|
||||
OBJECT_STAT_INC(optimization_traces_created);
|
||||
_PyUOpExecutorObject *executor = (_PyUOpExecutorObject *)_PyObject_New(&UOpExecutor_Type);
|
||||
if (executor == NULL) {
|
||||
return -1;
|
||||
}
|
||||
executor->base.execute = _PyUopExecute;
|
||||
memcpy(executor->trace, trace, trace_length * sizeof(_PyUOpInstruction));
|
||||
*exec_ptr = (_PyExecutorObject *)executor;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static PyTypeObject UOpOptimizer_Type = {
|
||||
PyVarObject_HEAD_INIT(&PyType_Type, 0)
|
||||
.tp_name = "uop_optimizer",
|
||||
.tp_basicsize = sizeof(_PyOptimizerObject),
|
||||
.tp_itemsize = 0,
|
||||
.tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION,
|
||||
};
|
||||
|
||||
PyObject *
|
||||
PyUnstable_Optimizer_NewUOpOptimizer(void)
|
||||
{
|
||||
_PyOptimizerObject *opt = (_PyOptimizerObject *)_PyObject_New(&UOpOptimizer_Type);
|
||||
if (opt == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
opt->optimize = uop_optimize;
|
||||
opt->resume_threshold = UINT16_MAX;
|
||||
opt->backedge_threshold = 0;
|
||||
return (PyObject *)opt;
|
||||
}
|
||||
|
|
|
@ -1181,6 +1181,19 @@ init_interp_main(PyThreadState *tstate)
|
|||
#endif
|
||||
}
|
||||
|
||||
// Turn on experimental tier 2 (uops-based) optimizer
|
||||
if (is_main_interp) {
|
||||
char *envvar = Py_GETENV("PYTHONUOPS");
|
||||
int enabled = envvar != NULL && *envvar > '0';
|
||||
if (_Py_get_xoption(&config->xoptions, L"uops") != NULL) {
|
||||
enabled = 1;
|
||||
}
|
||||
if (enabled) {
|
||||
PyObject *opt = PyUnstable_Optimizer_NewUOpOptimizer();
|
||||
PyUnstable_SetOptimizer((_PyOptimizerObject *)opt);
|
||||
}
|
||||
}
|
||||
|
||||
assert(!_PyErr_Occurred(tstate));
|
||||
|
||||
return _PyStatus_OK();
|
||||
|
|
|
@ -195,6 +195,10 @@ print_object_stats(FILE *out, ObjectStats *stats)
|
|||
fprintf(out, "Object method cache collisions: %" PRIu64 "\n", stats->type_cache_collisions);
|
||||
fprintf(out, "Object method cache dunder hits: %" PRIu64 "\n", stats->type_cache_dunder_hits);
|
||||
fprintf(out, "Object method cache dunder misses: %" PRIu64 "\n", stats->type_cache_dunder_misses);
|
||||
fprintf(out, "Optimization attempts: %" PRIu64 "\n", stats->optimization_attempts);
|
||||
fprintf(out, "Optimization traces created: %" PRIu64 "\n", stats->optimization_traces_created);
|
||||
fprintf(out, "Optimization traces executed: %" PRIu64 "\n", stats->optimization_traces_executed);
|
||||
fprintf(out, "Optimization uops executed: %" PRIu64 "\n", stats->optimization_uops_executed);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
@ -83,6 +83,7 @@ Objects/unicodetype_db.h
|
|||
Python/deepfreeze/*.c
|
||||
Python/frozen_modules/*.h
|
||||
Python/generated_cases.c.h
|
||||
Python/executor_cases.c.h
|
||||
|
||||
# not actually source
|
||||
Python/bytecodes.c
|
||||
|
|
|
@ -366,6 +366,8 @@ Python/sysmodule.c - whatstrings -
|
|||
Python/optimizer.c - DefaultOptimizer_Type -
|
||||
Python/optimizer.c - CounterExecutor_Type -
|
||||
Python/optimizer.c - CounterOptimizer_Type -
|
||||
Python/optimizer.c - UOpExecutor_Type -
|
||||
Python/optimizer.c - UOpOptimizer_Type -
|
||||
Python/optimizer.c - _PyOptimizer_Default -
|
||||
|
||||
##-----------------------
|
||||
|
|
Can't render this file because it has a wrong number of fields in line 4.
|
|
@ -9,7 +9,7 @@ What's currently here:
|
|||
- `plexer.py`: OO interface on top of lexer.py; main class: `PLexer`
|
||||
- `parser.py`: Parser for instruction definition DSL; main class `Parser`
|
||||
- `generate_cases.py`: driver script to read `Python/bytecodes.c` and
|
||||
write `Python/generated_cases.c.h`
|
||||
write `Python/generated_cases.c.h` (and several other files)
|
||||
- `test_generator.py`: tests, require manual running using `pytest`
|
||||
|
||||
Note that there is some dummy C code at the top and bottom of
|
||||
|
|
|
@ -29,6 +29,9 @@ DEFAULT_METADATA_OUTPUT = os.path.relpath(
|
|||
DEFAULT_PYMETADATA_OUTPUT = os.path.relpath(
|
||||
os.path.join(ROOT, "Lib/_opcode_metadata.py")
|
||||
)
|
||||
DEFAULT_EXECUTOR_OUTPUT = os.path.relpath(
|
||||
os.path.join(ROOT, "Python/executor_cases.c.h")
|
||||
)
|
||||
BEGIN_MARKER = "// BEGIN BYTECODES //"
|
||||
END_MARKER = "// END BYTECODES //"
|
||||
RE_PREDICTED = (
|
||||
|
@ -61,6 +64,13 @@ arg_parser.add_argument(
|
|||
arg_parser.add_argument(
|
||||
"input", nargs=argparse.REMAINDER, help="Instruction definition file(s)"
|
||||
)
|
||||
arg_parser.add_argument(
|
||||
"-e",
|
||||
"--executor-cases",
|
||||
type=str,
|
||||
help="Write executor cases to this file",
|
||||
default=DEFAULT_EXECUTOR_OUTPUT,
|
||||
)
|
||||
|
||||
|
||||
def effect_size(effect: StackEffect) -> tuple[int, str]:
|
||||
|
@ -176,14 +186,14 @@ class Formatter:
|
|||
self.prefix = self.prefix[:-4]
|
||||
|
||||
@contextlib.contextmanager
|
||||
def block(self, head: str):
|
||||
def block(self, head: str, tail: str = ""):
|
||||
if head:
|
||||
self.emit(head + " {")
|
||||
else:
|
||||
self.emit("{")
|
||||
with self.indent():
|
||||
yield
|
||||
self.emit("}")
|
||||
self.emit("}" + tail)
|
||||
|
||||
def stack_adjust(
|
||||
self,
|
||||
|
@ -290,6 +300,29 @@ class InstructionFlags:
|
|||
f"(_PyOpcode_opcode_metadata[(OP)].flags & ({name}))")
|
||||
|
||||
|
||||
FORBIDDEN_NAMES_IN_UOPS = (
|
||||
"resume_with_error", # Proxy for "goto", which isn't an IDENTIFIER
|
||||
"unbound_local_error",
|
||||
"kwnames",
|
||||
"next_instr",
|
||||
"oparg1", # Proxy for super-instructions like LOAD_FAST_LOAD_FAST
|
||||
"JUMPBY",
|
||||
"DISPATCH",
|
||||
"INSTRUMENTED_JUMP",
|
||||
"throwflag",
|
||||
"exception_unwind",
|
||||
"import_from",
|
||||
"import_name",
|
||||
"_PyObject_CallNoArgs", # Proxy for BEFORE_WITH
|
||||
)
|
||||
|
||||
|
||||
# Interpreter tiers
|
||||
TIER_ONE = 1 # Specializing adaptive interpreter (PEP 659)
|
||||
TIER_TWO = 2 # Experimental tracing interpreter
|
||||
Tiers: typing.TypeAlias = typing.Literal[1, 2]
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class Instruction:
|
||||
"""An instruction with additional data and code."""
|
||||
|
@ -353,7 +386,32 @@ class Instruction:
|
|||
cache = "0"
|
||||
self.instr_fmt = fmt
|
||||
|
||||
def write(self, out: Formatter) -> None:
|
||||
def is_viable_uop(self) -> bool:
|
||||
"""Whether this instruction is viable as a uop."""
|
||||
if self.always_exits:
|
||||
return False
|
||||
if self.instr_flags.HAS_ARG_FLAG:
|
||||
# If the instruction uses oparg, it cannot use any caches
|
||||
for c in self.cache_effects:
|
||||
if c.name != UNUSED:
|
||||
return False
|
||||
else:
|
||||
# If it doesn't use oparg, it can have one cache entry
|
||||
caches: list[parser.CacheEffect] = []
|
||||
cache_offset = 0
|
||||
for c in self.cache_effects:
|
||||
if c.name != UNUSED:
|
||||
caches.append(c)
|
||||
cache_offset += c.size
|
||||
if len(caches) > 1:
|
||||
return False
|
||||
for forbidden in FORBIDDEN_NAMES_IN_UOPS:
|
||||
# TODO: Don't check in '#ifdef ENABLE_SPECIALIZATION' regions
|
||||
if variable_used(self.inst, forbidden):
|
||||
return False
|
||||
return True
|
||||
|
||||
def write(self, out: Formatter, tier: Tiers = TIER_ONE) -> None:
|
||||
"""Write one instruction, sans prologue and epilogue."""
|
||||
# Write a static assertion that a family's cache size is correct
|
||||
if family := self.family:
|
||||
|
@ -400,7 +458,7 @@ class Instruction:
|
|||
|
||||
# out.emit(f"next_instr += OPSIZE({self.inst.name}) - 1;")
|
||||
|
||||
self.write_body(out, 0)
|
||||
self.write_body(out, 0, tier=tier)
|
||||
|
||||
# Skip the rest if the block always exits
|
||||
if self.always_exits:
|
||||
|
@ -427,10 +485,16 @@ class Instruction:
|
|||
out.assign(dst, oeffect)
|
||||
|
||||
# Write cache effect
|
||||
if self.cache_offset:
|
||||
if tier == TIER_ONE and self.cache_offset:
|
||||
out.emit(f"next_instr += {self.cache_offset};")
|
||||
|
||||
def write_body(self, out: Formatter, dedent: int, cache_adjust: int = 0) -> None:
|
||||
def write_body(
|
||||
self,
|
||||
out: Formatter,
|
||||
dedent: int,
|
||||
cache_adjust: int = 0,
|
||||
tier: Tiers = TIER_ONE,
|
||||
) -> None:
|
||||
"""Write the instruction body."""
|
||||
# Write cache effect variable declarations and initializations
|
||||
cache_offset = cache_adjust
|
||||
|
@ -447,9 +511,12 @@ class Instruction:
|
|||
else:
|
||||
typ = f"uint{bits}_t "
|
||||
func = f"read_u{bits}"
|
||||
out.emit(
|
||||
f"{typ}{ceffect.name} = {func}(&next_instr[{cache_offset}].cache);"
|
||||
)
|
||||
if tier == TIER_ONE:
|
||||
out.emit(
|
||||
f"{typ}{ceffect.name} = {func}(&next_instr[{cache_offset}].cache);"
|
||||
)
|
||||
else:
|
||||
out.emit(f"{typ}{ceffect.name} = operand;")
|
||||
cache_offset += ceffect.size
|
||||
assert cache_offset == self.cache_offset + cache_adjust
|
||||
|
||||
|
@ -573,16 +640,24 @@ class Analyzer:
|
|||
output_filename: str
|
||||
metadata_filename: str
|
||||
pymetadata_filename: str
|
||||
executor_filename: str
|
||||
errors: int = 0
|
||||
emit_line_directives: bool = False
|
||||
|
||||
def __init__(self, input_filenames: list[str], output_filename: str,
|
||||
metadata_filename: str, pymetadata_filename: str):
|
||||
def __init__(
|
||||
self,
|
||||
input_filenames: list[str],
|
||||
output_filename: str,
|
||||
metadata_filename: str,
|
||||
pymetadata_filename: str,
|
||||
executor_filename: str,
|
||||
):
|
||||
"""Read the input file."""
|
||||
self.input_filenames = input_filenames
|
||||
self.output_filename = output_filename
|
||||
self.metadata_filename = metadata_filename
|
||||
self.pymetadata_filename = pymetadata_filename
|
||||
self.executor_filename = executor_filename
|
||||
|
||||
def error(self, msg: str, node: parser.Node) -> None:
|
||||
lineno = 0
|
||||
|
@ -1107,6 +1182,8 @@ class Analyzer:
|
|||
|
||||
self.write_pseudo_instrs()
|
||||
|
||||
self.write_uop_defines()
|
||||
|
||||
self.write_stack_effect_functions()
|
||||
|
||||
# Write type definitions
|
||||
|
@ -1114,12 +1191,17 @@ class Analyzer:
|
|||
|
||||
InstructionFlags.emit_macros(self.out)
|
||||
|
||||
self.out.emit("struct opcode_metadata {")
|
||||
with self.out.indent():
|
||||
with self.out.block("struct opcode_metadata", ";"):
|
||||
self.out.emit("bool valid_entry;")
|
||||
self.out.emit("enum InstructionFormat instr_format;")
|
||||
self.out.emit("int flags;")
|
||||
self.out.emit("};")
|
||||
self.out.emit("")
|
||||
|
||||
with self.out.block("struct opcode_macro_expansion", ";"):
|
||||
self.out.emit("int nuops;")
|
||||
self.out.emit("struct { int16_t uop; int8_t size; int8_t offset; } uops[8];")
|
||||
self.out.emit("")
|
||||
|
||||
self.out.emit("")
|
||||
self.out.emit("#define OPCODE_METADATA_FMT(OP) "
|
||||
"(_PyOpcode_opcode_metadata[(OP)].instr_format)")
|
||||
|
@ -1130,7 +1212,9 @@ class Analyzer:
|
|||
# Write metadata array declaration
|
||||
self.out.emit("#ifndef NEED_OPCODE_METADATA")
|
||||
self.out.emit("extern const struct opcode_metadata _PyOpcode_opcode_metadata[512];")
|
||||
self.out.emit("extern const struct opcode_macro_expansion _PyOpcode_macro_expansion[256];")
|
||||
self.out.emit("#else")
|
||||
|
||||
self.out.emit("const struct opcode_metadata _PyOpcode_opcode_metadata[512] = {")
|
||||
|
||||
# Write metadata for each instruction
|
||||
|
@ -1150,6 +1234,31 @@ class Analyzer:
|
|||
|
||||
# Write end of array
|
||||
self.out.emit("};")
|
||||
|
||||
with self.out.block(
|
||||
"const struct opcode_macro_expansion _PyOpcode_macro_expansion[256] =",
|
||||
";",
|
||||
):
|
||||
# Write macro expansion for each non-pseudo instruction
|
||||
for thing in self.everything:
|
||||
match thing:
|
||||
case OverriddenInstructionPlaceHolder():
|
||||
pass
|
||||
case parser.InstDef(name=name):
|
||||
instr = self.instrs[name]
|
||||
if instr.kind != "op" and instr.is_viable_uop():
|
||||
self.out.emit(
|
||||
f"[{name}] = "
|
||||
f"{{ .nuops = 1, .uops = {{ {{ {name}, 0, 0 }} }} }},"
|
||||
)
|
||||
case parser.Macro():
|
||||
# TODO: emit expansion if all parts are viable uops
|
||||
pass
|
||||
case parser.Pseudo():
|
||||
pass
|
||||
case _:
|
||||
typing.assert_never(thing)
|
||||
|
||||
self.out.emit("#endif")
|
||||
|
||||
with open(self.pymetadata_filename, "w") as f:
|
||||
|
@ -1184,7 +1293,6 @@ class Analyzer:
|
|||
"opcode for family in _specializations.values() for opcode in family"
|
||||
"]")
|
||||
|
||||
|
||||
def write_pseudo_instrs(self) -> None:
|
||||
"""Write the IS_PSEUDO_INSTR macro"""
|
||||
self.out.emit("\n\n#define IS_PSEUDO_INSTR(OP) \\")
|
||||
|
@ -1192,6 +1300,20 @@ class Analyzer:
|
|||
self.out.emit(f" ((OP) == {op}) || \\")
|
||||
self.out.emit(f" 0")
|
||||
|
||||
def write_uop_defines(self) -> None:
|
||||
"""Write '#define XXX NNN' for each uop"""
|
||||
self.out.emit("")
|
||||
counter = 300
|
||||
def add(name: str) -> None:
|
||||
nonlocal counter
|
||||
self.out.emit(f"#define {name} {counter}")
|
||||
counter += 1
|
||||
add("EXIT_TRACE")
|
||||
add("SET_IP")
|
||||
for instr in self.instrs.values():
|
||||
if instr.kind == "op" and instr.is_viable_uop():
|
||||
add(instr.name)
|
||||
|
||||
def emit_metadata_entry(
|
||||
self, name: str, fmt: str, flags: InstructionFlags
|
||||
) -> None:
|
||||
|
@ -1221,10 +1343,7 @@ class Analyzer:
|
|||
# Create formatter
|
||||
self.out = Formatter(f, 8, self.emit_line_directives)
|
||||
|
||||
# Write provenance header
|
||||
self.out.write_raw(f"{self.out.comment} This file is generated by {THIS}\n")
|
||||
self.out.write_raw(self.from_source_files())
|
||||
self.out.write_raw(f"{self.out.comment} Do not edit!\n")
|
||||
self.write_provenance_header()
|
||||
|
||||
# Write and count instructions of all kinds
|
||||
n_instrs = 0
|
||||
|
@ -1252,6 +1371,33 @@ class Analyzer:
|
|||
file=sys.stderr,
|
||||
)
|
||||
|
||||
def write_executor_instructions(self) -> None:
|
||||
"""Generate cases for the Tier 2 interpreter."""
|
||||
with open(self.executor_filename, "w") as f:
|
||||
self.out = Formatter(f, 8)
|
||||
self.write_provenance_header()
|
||||
for thing in self.everything:
|
||||
match thing:
|
||||
case OverriddenInstructionPlaceHolder():
|
||||
self.write_overridden_instr_place_holder(thing)
|
||||
case parser.InstDef():
|
||||
instr = self.instrs[thing.name]
|
||||
if instr.is_viable_uop():
|
||||
self.out.emit("")
|
||||
with self.out.block(f"case {thing.name}:"):
|
||||
instr.write(self.out, tier=TIER_TWO)
|
||||
self.out.emit("break;")
|
||||
case parser.Macro():
|
||||
pass # TODO
|
||||
case parser.Pseudo():
|
||||
pass
|
||||
case _:
|
||||
typing.assert_never(thing)
|
||||
print(
|
||||
f"Wrote some stuff to {self.executor_filename}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
|
||||
def write_overridden_instr_place_holder(self,
|
||||
place_holder: OverriddenInstructionPlaceHolder) -> None:
|
||||
self.out.emit("")
|
||||
|
@ -1405,7 +1551,7 @@ def main():
|
|||
args.input.append(DEFAULT_INPUT)
|
||||
|
||||
# Raises OSError if input unreadable
|
||||
a = Analyzer(args.input, args.output, args.metadata, args.pymetadata)
|
||||
a = Analyzer(args.input, args.output, args.metadata, args.pymetadata, args.executor_cases)
|
||||
|
||||
if args.emit_line_directives:
|
||||
a.emit_line_directives = True
|
||||
|
@ -1415,6 +1561,7 @@ def main():
|
|||
sys.exit(f"Found {a.errors} errors")
|
||||
a.write_instructions() # Raises OSError if output can't be written
|
||||
a.write_metadata()
|
||||
a.write_executor_instructions()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
@ -44,7 +44,15 @@ def run_cases_test(input: str, expected: str):
|
|||
temp_input.flush()
|
||||
temp_output = tempfile.NamedTemporaryFile("w+")
|
||||
temp_metadata = tempfile.NamedTemporaryFile("w+")
|
||||
a = generate_cases.Analyzer([temp_input.name], temp_output.name, temp_metadata.name)
|
||||
temp_pymetadata = tempfile.NamedTemporaryFile("w+")
|
||||
temp_executor = tempfile.NamedTemporaryFile("w+")
|
||||
a = generate_cases.Analyzer(
|
||||
[temp_input.name],
|
||||
temp_output.name,
|
||||
temp_metadata.name,
|
||||
temp_pymetadata.name,
|
||||
temp_executor.name,
|
||||
)
|
||||
a.parse()
|
||||
a.analyze()
|
||||
if a.errors:
|
||||
|
|
Loading…
Reference in New Issue