diff --git a/Include/cpython/optimizer.h b/Include/cpython/optimizer.h index 0e7bc9fdd7a..96e829f8fbe 100644 --- a/Include/cpython/optimizer.h +++ b/Include/cpython/optimizer.h @@ -29,10 +29,17 @@ typedef struct { _PyExecutorLinkListNode links; } _PyVMData; +typedef struct { + uint16_t opcode; + uint16_t oparg; + uint32_t target; + uint64_t operand; // A cache entry +} _PyUOpInstruction; + typedef struct _PyExecutorObject { PyObject_VAR_HEAD _PyVMData vm_data; /* Used by the VM, but opaque to the optimizer */ - /* Data needed by the executor goes here, but is opaque to the VM */ + _PyUOpInstruction trace[1]; } _PyExecutorObject; typedef struct _PyOptimizerObject _PyOptimizerObject; diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index b052460b44b..31f30c673f2 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -8,8 +8,6 @@ extern "C" { # error "this header requires Py_BUILD_CORE define" #endif -#include "pycore_uops.h" // _PyUOpInstruction - int _Py_uop_analyze_and_optimize(PyCodeObject *code, _PyUOpInstruction *trace, int trace_len, int curr_stackentries); diff --git a/Include/internal/pycore_uops.h b/Include/internal/pycore_uops.h deleted file mode 100644 index eb10002d34c..00000000000 --- a/Include/internal/pycore_uops.h +++ /dev/null @@ -1,30 +0,0 @@ -#ifndef Py_INTERNAL_UOPS_H -#define Py_INTERNAL_UOPS_H -#ifdef __cplusplus -extern "C" { -#endif - -#ifndef Py_BUILD_CORE -# error "this header requires Py_BUILD_CORE define" -#endif - -#include "pycore_frame.h" // _PyInterpreterFrame - -#define _Py_UOP_MAX_TRACE_LENGTH 512 - -typedef struct { - uint16_t opcode; - uint16_t oparg; - uint32_t target; - uint64_t operand; // A cache entry -} _PyUOpInstruction; - -typedef struct { - _PyExecutorObject base; - _PyUOpInstruction trace[1]; -} _PyUOpExecutorObject; - -#ifdef __cplusplus -} -#endif -#endif /* !Py_INTERNAL_UOPS_H */ diff --git a/Makefile.pre.in b/Makefile.pre.in index 15d419b930c..289ab97666e 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -1895,7 +1895,6 @@ PYTHON_HEADERS= \ $(srcdir)/Include/internal/pycore_unionobject.h \ $(srcdir)/Include/internal/pycore_unicodeobject.h \ $(srcdir)/Include/internal/pycore_unicodeobject_generated.h \ - $(srcdir)/Include/internal/pycore_uops.h \ $(srcdir)/Include/internal/pycore_uop_metadata.h \ $(srcdir)/Include/internal/pycore_warnings.h \ $(srcdir)/Include/internal/pycore_weakref.h \ diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index a8b753ca489..64738b1bbf2 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -295,7 +295,6 @@ - diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index 965efa2e3d3..b37ca2dfed5 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -804,9 +804,6 @@ Include\internal - - Include\internal - Include\internal\mimalloc diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 2011d963e36..b346fe73f76 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -68,7 +68,7 @@ static size_t jump; static uint16_t invert, counter, index, hint; #define unused 0 // Used in a macro def, can't be static static uint32_t type_version; -static _PyUOpExecutorObject *current_executor; +static _PyExecutorObject *current_executor; static PyObject * dummy_func( @@ -2369,10 +2369,10 @@ dummy_func( CHECK_EVAL_BREAKER(); PyCodeObject *code = _PyFrame_GetCode(frame); - _PyExecutorObject *executor = (_PyExecutorObject *)code->co_executors->executors[oparg&255]; + _PyExecutorObject *executor = code->co_executors->executors[oparg & 255]; if (executor->vm_data.valid) { Py_INCREF(executor); - current_executor = (_PyUOpExecutorObject *)executor; + current_executor = executor; GOTO_TIER_TWO(); } else { @@ -4063,7 +4063,7 @@ dummy_func( op(_CHECK_VALIDITY, (--)) { TIER_TWO_ONLY - DEOPT_IF(!current_executor->base.vm_data.valid); + DEOPT_IF(!current_executor->vm_data.valid); } op(_LOAD_CONST_INLINE_BORROW, (ptr/4 -- value)) { diff --git a/Python/ceval.c b/Python/ceval.c index b3b542f8dde..49388cd2037 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -25,7 +25,6 @@ #include "pycore_tuple.h" // _PyTuple_ITEMS() #include "pycore_typeobject.h" // _PySuper_Lookup() #include "pycore_uop_ids.h" // Uops -#include "pycore_uops.h" // _PyUOpExecutorObject #include "pycore_pyerrors.h" #include "pycore_dict.h" @@ -739,7 +738,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int } /* State shared between Tier 1 and Tier 2 interpreter */ - _PyUOpExecutorObject *current_executor = NULL; + _PyExecutorObject *current_executor = NULL; /* Local "register" variables. * These are cached values from the frame and code object. */ diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index ea4caa9a97a..3ffe4161b01 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3393,7 +3393,7 @@ case _CHECK_VALIDITY: { TIER_TWO_ONLY - if (!current_executor->base.vm_data.valid) goto deoptimize; + if (!current_executor->vm_data.valid) goto deoptimize; break; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 1e995b62a72..68468728d44 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -2377,10 +2377,10 @@ TIER_ONE_ONLY CHECK_EVAL_BREAKER(); PyCodeObject *code = _PyFrame_GetCode(frame); - _PyExecutorObject *executor = (_PyExecutorObject *)code->co_executors->executors[oparg&255]; + _PyExecutorObject *executor = code->co_executors->executors[oparg & 255]; if (executor->vm_data.valid) { Py_INCREF(executor); - current_executor = (_PyUOpExecutorObject *)executor; + current_executor = executor; GOTO_TIER_TWO(); } else { diff --git a/Python/optimizer.c b/Python/optimizer.c index 227d6be0092..236ae266971 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -7,7 +7,6 @@ #include "pycore_optimizer.h" // _Py_uop_analyze_and_optimize() #include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_uop_ids.h" -#include "pycore_uops.h" #include "cpython/optimizer.h" #include #include @@ -17,6 +16,8 @@ #include "pycore_uop_metadata.h" // Uop tables #undef NEED_OPCODE_METADATA +#define UOP_MAX_TRACE_LENGTH 512 + #define MAX_EXECUTORS_SIZE 256 @@ -224,8 +225,8 @@ static PyMethodDef executor_methods[] = { ///////////////////// Experimental UOp Optimizer ///////////////////// static void -uop_dealloc(_PyUOpExecutorObject *self) { - _Py_ExecutorClear((_PyExecutorObject *)self); +uop_dealloc(_PyExecutorObject *self) { + _Py_ExecutorClear(self); PyObject_Free(self); } @@ -236,13 +237,13 @@ _PyUOpName(int index) } static Py_ssize_t -uop_len(_PyUOpExecutorObject *self) +uop_len(_PyExecutorObject *self) { return Py_SIZE(self); } static PyObject * -uop_item(_PyUOpExecutorObject *self, Py_ssize_t index) +uop_item(_PyExecutorObject *self, Py_ssize_t index) { Py_ssize_t len = uop_len(self); if (index < 0 || index >= len) { @@ -280,7 +281,7 @@ PySequenceMethods uop_as_sequence = { PyTypeObject _PyUOpExecutor_Type = { PyVarObject_HEAD_INIT(&PyType_Type, 0) .tp_name = "uop_executor", - .tp_basicsize = offsetof(_PyUOpExecutorObject, trace), + .tp_basicsize = offsetof(_PyExecutorObject, trace), .tp_itemsize = sizeof(_PyUOpInstruction), .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION, .tp_dealloc = (destructor)uop_dealloc, @@ -423,8 +424,7 @@ top: // Jump here after _PUSH_FRAME or likely branches if (opcode == ENTER_EXECUTOR) { assert(oparg < 256); - _PyExecutorObject *executor = - (_PyExecutorObject *)code->co_executors->executors[oparg]; + _PyExecutorObject *executor = code->co_executors->executors[oparg]; opcode = executor->vm_data.opcode; DPRINTF(2, " * ENTER_EXECUTOR -> %s\n", _PyOpcode_OpName[opcode]); oparg = executor->vm_data.oparg; @@ -704,7 +704,7 @@ compute_used(_PyUOpInstruction *buffer, uint32_t *used) { int count = 0; SET_BIT(used, 0); - for (int i = 0; i < _Py_UOP_MAX_TRACE_LENGTH; i++) { + for (int i = 0; i < UOP_MAX_TRACE_LENGTH; i++) { if (!BIT_IS_SET(used, i)) { continue; } @@ -736,15 +736,15 @@ compute_used(_PyUOpInstruction *buffer, uint32_t *used) static _PyExecutorObject * make_executor_from_uops(_PyUOpInstruction *buffer, _PyBloomFilter *dependencies) { - uint32_t used[(_Py_UOP_MAX_TRACE_LENGTH + 31)/32] = { 0 }; + uint32_t used[(UOP_MAX_TRACE_LENGTH + 31)/32] = { 0 }; int length = compute_used(buffer, used); - _PyUOpExecutorObject *executor = PyObject_NewVar(_PyUOpExecutorObject, &_PyUOpExecutor_Type, length); + _PyExecutorObject *executor = PyObject_NewVar(_PyExecutorObject, &_PyUOpExecutor_Type, length); if (executor == NULL) { return NULL; } int dest = length - 1; /* Scan backwards, so that we see the destinations of jumps before the jumps themselves. */ - for (int i = _Py_UOP_MAX_TRACE_LENGTH-1; i >= 0; i--) { + for (int i = UOP_MAX_TRACE_LENGTH-1; i >= 0; i--) { if (!BIT_IS_SET(used, i)) { continue; } @@ -763,7 +763,7 @@ make_executor_from_uops(_PyUOpInstruction *buffer, _PyBloomFilter *dependencies) dest--; } assert(dest == -1); - _Py_ExecutorInit((_PyExecutorObject *)executor, dependencies); + _Py_ExecutorInit(executor, dependencies); #ifdef Py_DEBUG char *python_lltrace = Py_GETENV("PYTHON_LLTRACE"); int lltrace = 0; @@ -782,7 +782,7 @@ make_executor_from_uops(_PyUOpInstruction *buffer, _PyBloomFilter *dependencies) } } #endif - return (_PyExecutorObject *)executor; + return executor; } static int @@ -795,8 +795,8 @@ uop_optimize( { _PyBloomFilter dependencies; _Py_BloomFilter_Init(&dependencies); - _PyUOpInstruction buffer[_Py_UOP_MAX_TRACE_LENGTH]; - int err = translate_bytecode_to_trace(code, instr, buffer, _Py_UOP_MAX_TRACE_LENGTH, &dependencies); + _PyUOpInstruction buffer[UOP_MAX_TRACE_LENGTH]; + int err = translate_bytecode_to_trace(code, instr, buffer, UOP_MAX_TRACE_LENGTH, &dependencies); if (err <= 0) { // Error or nothing translated return err; @@ -804,7 +804,7 @@ uop_optimize( OPT_STAT_INC(traces_created); char *uop_optimize = Py_GETENV("PYTHONUOPSOPTIMIZE"); if (uop_optimize == NULL || *uop_optimize > '0') { - err = _Py_uop_analyze_and_optimize(code, buffer, _Py_UOP_MAX_TRACE_LENGTH, curr_stackentries); + err = _Py_uop_analyze_and_optimize(code, buffer, UOP_MAX_TRACE_LENGTH, curr_stackentries); if (err < 0) { return -1; } @@ -848,7 +848,7 @@ PyUnstable_Optimizer_NewUOpOptimizer(void) } static void -counter_dealloc(_PyUOpExecutorObject *self) { +counter_dealloc(_PyExecutorObject *self) { PyObject *opt = (PyObject *)self->trace[0].operand; Py_DECREF(opt); uop_dealloc(self); @@ -857,7 +857,7 @@ counter_dealloc(_PyUOpExecutorObject *self) { PyTypeObject _PyCounterExecutor_Type = { PyVarObject_HEAD_INIT(&PyType_Type, 0) .tp_name = "counting_executor", - .tp_basicsize = offsetof(_PyUOpExecutorObject, trace), + .tp_basicsize = offsetof(_PyExecutorObject, trace), .tp_itemsize = sizeof(_PyUOpInstruction), .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION, .tp_dealloc = (destructor)counter_dealloc, diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 4eb2d9711f5..7db51f0d90a 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -5,7 +5,6 @@ #include "pycore_opcode_utils.h" #include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_uop_metadata.h" -#include "pycore_uops.h" #include "pycore_long.h" #include "cpython/optimizer.h" #include