mirror of https://github.com/python/cpython
gh-117045: Add code object to function version cache (#117028)
Changes to the function version cache: - In addition to the function object, also store the code object, and allow the latter to be retrieved even if the function has been evicted. - Stop assigning new function versions after a critical attribute (e.g. `__code__`) has been modified; the version is permanently reset to zero in this case. - Changes to `__annotations__` are no longer considered critical. (This fixes gh-109998.) Changes to the Tier 2 optimization machinery: - If we cannot map a function version to a function, but it is still mapped to a code object, we continue projecting the trace. The operand of the `_PUSH_FRAME` and `_POP_FRAME` opcodes can be either NULL, a function object, or a code object with the lowest bit set. This allows us to trace through code that calls an ephemeral function, i.e., a function that may not be alive when we are constructing the executor, e.g. a generator expression or certain nested functions. We will lose globals removal inside such functions, but we can still do other peephole operations (and even possibly [call inlining](https://github.com/python/cpython/pull/116290), if we decide to do it), which only need the code object. As before, if we cannot retrieve the code object from the cache, we stop projecting.
This commit is contained in:
parent
c85d84166a
commit
570a82d46a
|
@ -55,7 +55,7 @@ enum _frameowner {
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef struct _PyInterpreterFrame {
|
typedef struct _PyInterpreterFrame {
|
||||||
PyObject *f_executable; /* Strong reference */
|
PyObject *f_executable; /* Strong reference (code object or None) */
|
||||||
struct _PyInterpreterFrame *previous;
|
struct _PyInterpreterFrame *previous;
|
||||||
PyObject *f_funcobj; /* Strong reference. Only valid if not on C stack */
|
PyObject *f_funcobj; /* Strong reference. Only valid if not on C stack */
|
||||||
PyObject *f_globals; /* Borrowed reference. Only valid if not on C stack */
|
PyObject *f_globals; /* Borrowed reference. Only valid if not on C stack */
|
||||||
|
|
|
@ -17,20 +17,27 @@ extern PyObject* _PyFunction_Vectorcall(
|
||||||
#define FUNC_MAX_WATCHERS 8
|
#define FUNC_MAX_WATCHERS 8
|
||||||
|
|
||||||
#define FUNC_VERSION_CACHE_SIZE (1<<12) /* Must be a power of 2 */
|
#define FUNC_VERSION_CACHE_SIZE (1<<12) /* Must be a power of 2 */
|
||||||
|
|
||||||
|
struct _func_version_cache_item {
|
||||||
|
PyFunctionObject *func;
|
||||||
|
PyObject *code;
|
||||||
|
};
|
||||||
|
|
||||||
struct _py_func_state {
|
struct _py_func_state {
|
||||||
uint32_t next_version;
|
uint32_t next_version;
|
||||||
// Borrowed references to function objects whose
|
// Borrowed references to function and code objects whose
|
||||||
// func_version % FUNC_VERSION_CACHE_SIZE
|
// func_version % FUNC_VERSION_CACHE_SIZE
|
||||||
// once was equal to the index in the table.
|
// once was equal to the index in the table.
|
||||||
// They are cleared when the function is deallocated.
|
// They are cleared when the function or code object is deallocated.
|
||||||
PyFunctionObject *func_version_cache[FUNC_VERSION_CACHE_SIZE];
|
struct _func_version_cache_item func_version_cache[FUNC_VERSION_CACHE_SIZE];
|
||||||
};
|
};
|
||||||
|
|
||||||
extern PyFunctionObject* _PyFunction_FromConstructor(PyFrameConstructor *constr);
|
extern PyFunctionObject* _PyFunction_FromConstructor(PyFrameConstructor *constr);
|
||||||
|
|
||||||
extern uint32_t _PyFunction_GetVersionForCurrentState(PyFunctionObject *func);
|
extern uint32_t _PyFunction_GetVersionForCurrentState(PyFunctionObject *func);
|
||||||
PyAPI_FUNC(void) _PyFunction_SetVersion(PyFunctionObject *func, uint32_t version);
|
PyAPI_FUNC(void) _PyFunction_SetVersion(PyFunctionObject *func, uint32_t version);
|
||||||
PyFunctionObject *_PyFunction_LookupByVersion(uint32_t version);
|
void _PyFunction_ClearCodeByVersion(uint32_t version);
|
||||||
|
PyFunctionObject *_PyFunction_LookupByVersion(uint32_t version, PyObject **p_code);
|
||||||
|
|
||||||
extern PyObject *_Py_set_function_type_params(
|
extern PyObject *_Py_set_function_type_params(
|
||||||
PyThreadState* unused, PyObject *func, PyObject *type_params);
|
PyThreadState* unused, PyObject *func, PyObject *type_params);
|
||||||
|
|
|
@ -1710,6 +1710,7 @@ code_dealloc(PyCodeObject *co)
|
||||||
}
|
}
|
||||||
Py_SET_REFCNT(co, 0);
|
Py_SET_REFCNT(co, 0);
|
||||||
|
|
||||||
|
_PyFunction_ClearCodeByVersion(co->co_version);
|
||||||
if (co->co_extra != NULL) {
|
if (co->co_extra != NULL) {
|
||||||
PyInterpreterState *interp = _PyInterpreterState_GET();
|
PyInterpreterState *interp = _PyInterpreterState_GET();
|
||||||
_PyCodeObjectExtra *co_extra = co->co_extra;
|
_PyCodeObjectExtra *co_extra = co->co_extra;
|
||||||
|
|
|
@ -218,43 +218,61 @@ error:
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Function versions
|
(This is purely internal documentation. There are no public APIs here.)
|
||||||
-----------------
|
|
||||||
|
|
||||||
Function versions are used to detect when a function object has been
|
Function (and code) versions
|
||||||
updated, invalidating inline cache data used by the `CALL` bytecode
|
----------------------------
|
||||||
(notably `CALL_PY_EXACT_ARGS` and a few other `CALL` specializations).
|
|
||||||
|
|
||||||
They are also used by the Tier 2 superblock creation code to find
|
The Tier 1 specializer generates CALL variants that can be invalidated
|
||||||
the function being called (and from there the code object).
|
by changes to critical function attributes:
|
||||||
|
|
||||||
How does a function's `func_version` field get initialized?
|
- __code__
|
||||||
|
- __defaults__
|
||||||
|
- __kwdefaults__
|
||||||
|
- __closure__
|
||||||
|
|
||||||
- `PyFunction_New` and friends initialize it to 0.
|
For this purpose function objects have a 32-bit func_version member
|
||||||
- The `MAKE_FUNCTION` instruction sets it from the code's `co_version`.
|
that the specializer writes to the specialized instruction's inline
|
||||||
- It is reset to 0 when various attributes like `__code__` are set.
|
cache and which is checked by a guard on the specialized instructions.
|
||||||
- A new version is allocated by `_PyFunction_GetVersionForCurrentState`
|
|
||||||
when the specializer needs a version and the version is 0.
|
|
||||||
|
|
||||||
The latter allocates versions using a counter in the interpreter state,
|
The MAKE_FUNCTION bytecode sets func_version from the code object's
|
||||||
`interp->func_state.next_version`.
|
co_version field. The latter is initialized from a counter in the
|
||||||
When the counter wraps around to 0, no more versions are allocated.
|
interpreter state (interp->func_state.next_version) and never changes.
|
||||||
There is one other special case: functions with a non-standard
|
When this counter overflows, it remains zero and the specializer loses
|
||||||
`vectorcall` field are not given a version.
|
the ability to specialize calls to new functions.
|
||||||
|
|
||||||
When the function version is 0, the `CALL` bytecode is not specialized.
|
The func_version is reset to zero when any of the critical attributes
|
||||||
|
is modified; after this point the specializer will no longer specialize
|
||||||
|
calls to this function, and the guard will always fail.
|
||||||
|
|
||||||
Code object versions
|
The function and code version cache
|
||||||
--------------------
|
-----------------------------------
|
||||||
|
|
||||||
So where to code objects get their `co_version`?
|
The Tier 2 optimizer now has a problem, since it needs to find the
|
||||||
They share the same counter, `interp->func_state.next_version`.
|
function and code objects given only the version number from the inline
|
||||||
|
cache. Our solution is to maintain a cache mapping version numbers to
|
||||||
|
function and code objects. To limit the cache size we could hash
|
||||||
|
the version number, but for now we simply use it modulo the table size.
|
||||||
|
|
||||||
|
There are some corner cases (e.g. generator expressions) where we will
|
||||||
|
be unable to find the function object in the cache but we can still
|
||||||
|
find the code object. For this reason the cache stores both the
|
||||||
|
function object and the code object.
|
||||||
|
|
||||||
|
The cache doesn't contain strong references; cache entries are
|
||||||
|
invalidated whenever the function or code object is deallocated.
|
||||||
|
|
||||||
|
Invariants
|
||||||
|
----------
|
||||||
|
|
||||||
|
These should hold at any time except when one of the cache-mutating
|
||||||
|
functions is running.
|
||||||
|
|
||||||
|
- For any slot s at index i:
|
||||||
|
- s->func == NULL or s->func->func_version % FUNC_VERSION_CACHE_SIZE == i
|
||||||
|
- s->code == NULL or s->code->co_version % FUNC_VERSION_CACHE_SIZE == i
|
||||||
|
if s->func != NULL, then s->func->func_code == s->code
|
||||||
|
|
||||||
Code objects get a new `co_version` allocated from this counter upon
|
|
||||||
creation. Since code objects are nominally immutable, `co_version` can
|
|
||||||
not be invalidated. The only way it can be 0 is when 2**32 or more
|
|
||||||
code objects have been created during the process's lifetime.
|
|
||||||
(The counter isn't reset by `fork()`, extending the lifetime.)
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
void
|
void
|
||||||
|
@ -262,28 +280,61 @@ _PyFunction_SetVersion(PyFunctionObject *func, uint32_t version)
|
||||||
{
|
{
|
||||||
PyInterpreterState *interp = _PyInterpreterState_GET();
|
PyInterpreterState *interp = _PyInterpreterState_GET();
|
||||||
if (func->func_version != 0) {
|
if (func->func_version != 0) {
|
||||||
PyFunctionObject **slot =
|
struct _func_version_cache_item *slot =
|
||||||
interp->func_state.func_version_cache
|
interp->func_state.func_version_cache
|
||||||
+ (func->func_version % FUNC_VERSION_CACHE_SIZE);
|
+ (func->func_version % FUNC_VERSION_CACHE_SIZE);
|
||||||
if (*slot == func) {
|
if (slot->func == func) {
|
||||||
*slot = NULL;
|
slot->func = NULL;
|
||||||
|
// Leave slot->code alone, there may be use for it.
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
func->func_version = version;
|
func->func_version = version;
|
||||||
if (version != 0) {
|
if (version != 0) {
|
||||||
interp->func_state.func_version_cache[
|
struct _func_version_cache_item *slot =
|
||||||
version % FUNC_VERSION_CACHE_SIZE] = func;
|
interp->func_state.func_version_cache
|
||||||
|
+ (version % FUNC_VERSION_CACHE_SIZE);
|
||||||
|
slot->func = func;
|
||||||
|
slot->code = func->func_code;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
_PyFunction_ClearCodeByVersion(uint32_t version)
|
||||||
|
{
|
||||||
|
PyInterpreterState *interp = _PyInterpreterState_GET();
|
||||||
|
struct _func_version_cache_item *slot =
|
||||||
|
interp->func_state.func_version_cache
|
||||||
|
+ (version % FUNC_VERSION_CACHE_SIZE);
|
||||||
|
if (slot->code) {
|
||||||
|
assert(PyCode_Check(slot->code));
|
||||||
|
PyCodeObject *code = (PyCodeObject *)slot->code;
|
||||||
|
if (code->co_version == version) {
|
||||||
|
slot->code = NULL;
|
||||||
|
slot->func = NULL;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
PyFunctionObject *
|
PyFunctionObject *
|
||||||
_PyFunction_LookupByVersion(uint32_t version)
|
_PyFunction_LookupByVersion(uint32_t version, PyObject **p_code)
|
||||||
{
|
{
|
||||||
PyInterpreterState *interp = _PyInterpreterState_GET();
|
PyInterpreterState *interp = _PyInterpreterState_GET();
|
||||||
PyFunctionObject *func = interp->func_state.func_version_cache[
|
struct _func_version_cache_item *slot =
|
||||||
version % FUNC_VERSION_CACHE_SIZE];
|
interp->func_state.func_version_cache
|
||||||
if (func != NULL && func->func_version == version) {
|
+ (version % FUNC_VERSION_CACHE_SIZE);
|
||||||
return func;
|
if (slot->code) {
|
||||||
|
assert(PyCode_Check(slot->code));
|
||||||
|
PyCodeObject *code = (PyCodeObject *)slot->code;
|
||||||
|
if (code->co_version == version) {
|
||||||
|
*p_code = slot->code;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
*p_code = NULL;
|
||||||
|
}
|
||||||
|
if (slot->func && slot->func->func_version == version) {
|
||||||
|
assert(slot->func->func_code == slot->code);
|
||||||
|
return slot->func;
|
||||||
}
|
}
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
@ -291,19 +342,7 @@ _PyFunction_LookupByVersion(uint32_t version)
|
||||||
uint32_t
|
uint32_t
|
||||||
_PyFunction_GetVersionForCurrentState(PyFunctionObject *func)
|
_PyFunction_GetVersionForCurrentState(PyFunctionObject *func)
|
||||||
{
|
{
|
||||||
if (func->func_version != 0) {
|
|
||||||
return func->func_version;
|
return func->func_version;
|
||||||
}
|
|
||||||
if (func->vectorcall != _PyFunction_Vectorcall) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
PyInterpreterState *interp = _PyInterpreterState_GET();
|
|
||||||
if (interp->func_state.next_version == 0) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
uint32_t v = interp->func_state.next_version++;
|
|
||||||
_PyFunction_SetVersion(func, v);
|
|
||||||
return v;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
PyObject *
|
PyObject *
|
||||||
|
@ -507,7 +546,6 @@ PyFunction_SetAnnotations(PyObject *op, PyObject *annotations)
|
||||||
"non-dict annotations");
|
"non-dict annotations");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
_PyFunction_SetVersion((PyFunctionObject *)op, 0);
|
|
||||||
Py_XSETREF(((PyFunctionObject *)op)->func_annotations, annotations);
|
Py_XSETREF(((PyFunctionObject *)op)->func_annotations, annotations);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -731,7 +769,6 @@ func_set_annotations(PyFunctionObject *op, PyObject *value, void *Py_UNUSED(igno
|
||||||
"__annotations__ must be set to a dict object");
|
"__annotations__ must be set to a dict object");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
_PyFunction_SetVersion(op, 0);
|
|
||||||
Py_XSETREF(op->func_annotations, Py_XNewRef(value));
|
Py_XSETREF(op->func_annotations, Py_XNewRef(value));
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -211,7 +211,7 @@ _PyOptimizer_Optimize(
|
||||||
_PyInterpreterFrame *frame, _Py_CODEUNIT *start,
|
_PyInterpreterFrame *frame, _Py_CODEUNIT *start,
|
||||||
PyObject **stack_pointer, _PyExecutorObject **executor_ptr)
|
PyObject **stack_pointer, _PyExecutorObject **executor_ptr)
|
||||||
{
|
{
|
||||||
PyCodeObject *code = (PyCodeObject *)frame->f_executable;
|
PyCodeObject *code = _PyFrame_GetCode(frame);
|
||||||
assert(PyCode_Check(code));
|
assert(PyCode_Check(code));
|
||||||
PyInterpreterState *interp = _PyInterpreterState_GET();
|
PyInterpreterState *interp = _PyInterpreterState_GET();
|
||||||
if (!has_space_for_executor(code, start)) {
|
if (!has_space_for_executor(code, start)) {
|
||||||
|
@ -479,8 +479,9 @@ BRANCH_TO_GUARD[4][2] = {
|
||||||
ADD_TO_TRACE(_EXIT_TRACE, 0, 0, 0); \
|
ADD_TO_TRACE(_EXIT_TRACE, 0, 0, 0); \
|
||||||
goto done; \
|
goto done; \
|
||||||
} \
|
} \
|
||||||
assert(func->func_code == (PyObject *)code); \
|
assert(func == NULL || func->func_code == (PyObject *)code); \
|
||||||
trace_stack[trace_stack_depth].func = func; \
|
trace_stack[trace_stack_depth].func = func; \
|
||||||
|
trace_stack[trace_stack_depth].code = code; \
|
||||||
trace_stack[trace_stack_depth].instr = instr; \
|
trace_stack[trace_stack_depth].instr = instr; \
|
||||||
trace_stack_depth++;
|
trace_stack_depth++;
|
||||||
#define TRACE_STACK_POP() \
|
#define TRACE_STACK_POP() \
|
||||||
|
@ -489,7 +490,8 @@ BRANCH_TO_GUARD[4][2] = {
|
||||||
} \
|
} \
|
||||||
trace_stack_depth--; \
|
trace_stack_depth--; \
|
||||||
func = trace_stack[trace_stack_depth].func; \
|
func = trace_stack[trace_stack_depth].func; \
|
||||||
code = (PyCodeObject *)trace_stack[trace_stack_depth].func->func_code; \
|
code = trace_stack[trace_stack_depth].code; \
|
||||||
|
assert(func == NULL || func->func_code == (PyObject *)code); \
|
||||||
instr = trace_stack[trace_stack_depth].instr;
|
instr = trace_stack[trace_stack_depth].instr;
|
||||||
|
|
||||||
/* Returns 1 on success,
|
/* Returns 1 on success,
|
||||||
|
@ -505,7 +507,7 @@ translate_bytecode_to_trace(
|
||||||
_PyBloomFilter *dependencies)
|
_PyBloomFilter *dependencies)
|
||||||
{
|
{
|
||||||
bool progress_needed = true;
|
bool progress_needed = true;
|
||||||
PyCodeObject *code = (PyCodeObject *)frame->f_executable;
|
PyCodeObject *code = _PyFrame_GetCode(frame);
|
||||||
PyFunctionObject *func = (PyFunctionObject *)frame->f_funcobj;
|
PyFunctionObject *func = (PyFunctionObject *)frame->f_funcobj;
|
||||||
assert(PyFunction_Check(func));
|
assert(PyFunction_Check(func));
|
||||||
PyCodeObject *initial_code = code;
|
PyCodeObject *initial_code = code;
|
||||||
|
@ -515,6 +517,7 @@ translate_bytecode_to_trace(
|
||||||
int max_length = buffer_size;
|
int max_length = buffer_size;
|
||||||
struct {
|
struct {
|
||||||
PyFunctionObject *func;
|
PyFunctionObject *func;
|
||||||
|
PyCodeObject *code;
|
||||||
_Py_CODEUNIT *instr;
|
_Py_CODEUNIT *instr;
|
||||||
} trace_stack[TRACE_STACK_SIZE];
|
} trace_stack[TRACE_STACK_SIZE];
|
||||||
int trace_stack_depth = 0;
|
int trace_stack_depth = 0;
|
||||||
|
@ -719,9 +722,19 @@ top: // Jump here after _PUSH_FRAME or likely branches
|
||||||
|
|
||||||
if (uop == _POP_FRAME) {
|
if (uop == _POP_FRAME) {
|
||||||
TRACE_STACK_POP();
|
TRACE_STACK_POP();
|
||||||
/* Set the operand to the function object returned to,
|
/* Set the operand to the function or code object returned to,
|
||||||
* to assist optimization passes */
|
* to assist optimization passes. (See _PUSH_FRAME below.)
|
||||||
ADD_TO_TRACE(uop, oparg, (uintptr_t)func, target);
|
*/
|
||||||
|
if (func != NULL) {
|
||||||
|
operand = (uintptr_t)func;
|
||||||
|
}
|
||||||
|
else if (code != NULL) {
|
||||||
|
operand = (uintptr_t)code | 1;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
operand = 0;
|
||||||
|
}
|
||||||
|
ADD_TO_TRACE(uop, oparg, operand, target);
|
||||||
DPRINTF(2,
|
DPRINTF(2,
|
||||||
"Returning to %s (%s:%d) at byte offset %d\n",
|
"Returning to %s (%s:%d) at byte offset %d\n",
|
||||||
PyUnicode_AsUTF8(code->co_qualname),
|
PyUnicode_AsUTF8(code->co_qualname),
|
||||||
|
@ -738,10 +751,12 @@ top: // Jump here after _PUSH_FRAME or likely branches
|
||||||
// Add one to account for the actual opcode/oparg pair:
|
// Add one to account for the actual opcode/oparg pair:
|
||||||
+ 1;
|
+ 1;
|
||||||
uint32_t func_version = read_u32(&instr[func_version_offset].cache);
|
uint32_t func_version = read_u32(&instr[func_version_offset].cache);
|
||||||
PyFunctionObject *new_func = _PyFunction_LookupByVersion(func_version);
|
PyCodeObject *new_code = NULL;
|
||||||
DPRINTF(2, "Function: version=%#x; object=%p\n", (int)func_version, new_func);
|
PyFunctionObject *new_func =
|
||||||
if (new_func != NULL) {
|
_PyFunction_LookupByVersion(func_version, (PyObject **) &new_code);
|
||||||
PyCodeObject *new_code = (PyCodeObject *)PyFunction_GET_CODE(new_func);
|
DPRINTF(2, "Function: version=%#x; new_func=%p, new_code=%p\n",
|
||||||
|
(int)func_version, new_func, new_code);
|
||||||
|
if (new_code != NULL) {
|
||||||
if (new_code == code) {
|
if (new_code == code) {
|
||||||
// Recursive call, bail (we could be here forever).
|
// Recursive call, bail (we could be here forever).
|
||||||
DPRINTF(2, "Bailing on recursive call to %s (%s:%d)\n",
|
DPRINTF(2, "Bailing on recursive call to %s (%s:%d)\n",
|
||||||
|
@ -766,9 +781,22 @@ top: // Jump here after _PUSH_FRAME or likely branches
|
||||||
instr += _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] + 1;
|
instr += _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] + 1;
|
||||||
TRACE_STACK_PUSH();
|
TRACE_STACK_PUSH();
|
||||||
_Py_BloomFilter_Add(dependencies, new_code);
|
_Py_BloomFilter_Add(dependencies, new_code);
|
||||||
/* Set the operand to the callee's function object,
|
/* Set the operand to the callee's function or code object,
|
||||||
* to assist optimization passes */
|
* to assist optimization passes.
|
||||||
ADD_TO_TRACE(uop, oparg, (uintptr_t)new_func, target);
|
* We prefer setting it to the function (for remove_globals())
|
||||||
|
* but if that's not available but the code is available,
|
||||||
|
* use the code, setting the low bit so the optimizer knows.
|
||||||
|
*/
|
||||||
|
if (new_func != NULL) {
|
||||||
|
operand = (uintptr_t)new_func;
|
||||||
|
}
|
||||||
|
else if (new_code != NULL) {
|
||||||
|
operand = (uintptr_t)new_code | 1;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
operand = 0;
|
||||||
|
}
|
||||||
|
ADD_TO_TRACE(uop, oparg, operand, target);
|
||||||
code = new_code;
|
code = new_code;
|
||||||
func = new_func;
|
func = new_func;
|
||||||
instr = _PyCode_CODE(code);
|
instr = _PyCode_CODE(code);
|
||||||
|
@ -780,8 +808,8 @@ top: // Jump here after _PUSH_FRAME or likely branches
|
||||||
2 * INSTR_IP(instr, code));
|
2 * INSTR_IP(instr, code));
|
||||||
goto top;
|
goto top;
|
||||||
}
|
}
|
||||||
DPRINTF(2, "Bail, new_func == NULL\n");
|
DPRINTF(2, "Bail, new_code == NULL\n");
|
||||||
ADD_TO_TRACE(uop, oparg, operand, target);
|
ADD_TO_TRACE(uop, oparg, 0, target);
|
||||||
ADD_TO_TRACE(_EXIT_TRACE, 0, 0, 0);
|
ADD_TO_TRACE(_EXIT_TRACE, 0, 0, 0);
|
||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
|
@ -1116,7 +1144,7 @@ counter_optimize(
|
||||||
int Py_UNUSED(curr_stackentries)
|
int Py_UNUSED(curr_stackentries)
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
PyCodeObject *code = (PyCodeObject *)frame->f_executable;
|
PyCodeObject *code = _PyFrame_GetCode(frame);
|
||||||
int oparg = instr->op.arg;
|
int oparg = instr->op.arg;
|
||||||
while (instr->op.code == EXTENDED_ARG) {
|
while (instr->op.code == EXTENDED_ARG) {
|
||||||
instr++;
|
instr++;
|
||||||
|
|
|
@ -228,7 +228,12 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer,
|
||||||
builtins_watched <<= 1;
|
builtins_watched <<= 1;
|
||||||
globals_watched <<= 1;
|
globals_watched <<= 1;
|
||||||
function_checked <<= 1;
|
function_checked <<= 1;
|
||||||
PyFunctionObject *func = (PyFunctionObject *)buffer[pc].operand;
|
uint64_t operand = buffer[pc].operand;
|
||||||
|
if (operand == 0 || (operand & 1)) {
|
||||||
|
// It's either a code object or NULL, so bail
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
PyFunctionObject *func = (PyFunctionObject *)operand;
|
||||||
if (func == NULL) {
|
if (func == NULL) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
@ -251,7 +256,15 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer,
|
||||||
builtins_watched >>= 1;
|
builtins_watched >>= 1;
|
||||||
globals_watched >>= 1;
|
globals_watched >>= 1;
|
||||||
function_checked >>= 1;
|
function_checked >>= 1;
|
||||||
PyFunctionObject *func = (PyFunctionObject *)buffer[pc].operand;
|
uint64_t operand = buffer[pc].operand;
|
||||||
|
if (operand == 0 || (operand & 1)) {
|
||||||
|
// It's either a code object or NULL, so bail
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
PyFunctionObject *func = (PyFunctionObject *)operand;
|
||||||
|
if (func == NULL) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
assert(PyFunction_Check(func));
|
assert(PyFunction_Check(func));
|
||||||
function_version = func->func_version;
|
function_version = func->func_version;
|
||||||
globals = func->func_globals;
|
globals = func->func_globals;
|
||||||
|
@ -522,7 +535,7 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size)
|
||||||
static void
|
static void
|
||||||
peephole_opt(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, int buffer_size)
|
peephole_opt(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, int buffer_size)
|
||||||
{
|
{
|
||||||
PyCodeObject *co = (PyCodeObject *)frame->f_executable;
|
PyCodeObject *co = _PyFrame_GetCode(frame);
|
||||||
for (int pc = 0; pc < buffer_size; pc++) {
|
for (int pc = 0; pc < buffer_size; pc++) {
|
||||||
int opcode = buffer[pc].opcode;
|
int opcode = buffer[pc].opcode;
|
||||||
switch(opcode) {
|
switch(opcode) {
|
||||||
|
@ -545,11 +558,16 @@ peephole_opt(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, int buffer_s
|
||||||
case _PUSH_FRAME:
|
case _PUSH_FRAME:
|
||||||
case _POP_FRAME:
|
case _POP_FRAME:
|
||||||
{
|
{
|
||||||
PyFunctionObject *func = (PyFunctionObject *)buffer[pc].operand;
|
uint64_t operand = buffer[pc].operand;
|
||||||
if (func == NULL) {
|
if (operand & 1) {
|
||||||
|
co = (PyCodeObject *)(operand & ~1);
|
||||||
|
assert(PyCode_Check(co));
|
||||||
|
}
|
||||||
|
else if (operand == 0) {
|
||||||
co = NULL;
|
co = NULL;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
PyFunctionObject *func = (PyFunctionObject *)operand;
|
||||||
assert(PyFunction_Check(func));
|
assert(PyFunction_Check(func));
|
||||||
co = (PyCodeObject *)func->func_code;
|
co = (PyCodeObject *)func->func_code;
|
||||||
}
|
}
|
||||||
|
@ -587,7 +605,7 @@ _Py_uop_analyze_and_optimize(
|
||||||
peephole_opt(frame, buffer, buffer_size);
|
peephole_opt(frame, buffer, buffer_size);
|
||||||
|
|
||||||
err = optimize_uops(
|
err = optimize_uops(
|
||||||
(PyCodeObject *)frame->f_executable, buffer,
|
_PyFrame_GetCode(frame), buffer,
|
||||||
buffer_size, curr_stacklen, dependencies);
|
buffer_size, curr_stacklen, dependencies);
|
||||||
|
|
||||||
if (err == 0) {
|
if (err == 0) {
|
||||||
|
|
|
@ -543,14 +543,25 @@ dummy_func(void) {
|
||||||
|
|
||||||
(void)callable;
|
(void)callable;
|
||||||
|
|
||||||
PyFunctionObject *func = (PyFunctionObject *)(this_instr + 2)->operand;
|
PyCodeObject *co = NULL;
|
||||||
DPRINTF(3, "func: %p ", func);
|
assert((this_instr + 2)->opcode == _PUSH_FRAME);
|
||||||
|
uintptr_t push_operand = (this_instr + 2)->operand;
|
||||||
|
if (push_operand & 1) {
|
||||||
|
co = (PyCodeObject *)(push_operand & ~1);
|
||||||
|
DPRINTF(3, "code=%p ", co);
|
||||||
|
assert(PyCode_Check(co));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
PyFunctionObject *func = (PyFunctionObject *)push_operand;
|
||||||
|
DPRINTF(3, "func=%p ", func);
|
||||||
if (func == NULL) {
|
if (func == NULL) {
|
||||||
DPRINTF(3, "\n");
|
DPRINTF(3, "\n");
|
||||||
DPRINTF(1, "Missing function\n");
|
DPRINTF(1, "Missing function\n");
|
||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
PyCodeObject *co = (PyCodeObject *)func->func_code;
|
co = (PyCodeObject *)func->func_code;
|
||||||
|
DPRINTF(3, "code=%p ", co);
|
||||||
|
}
|
||||||
|
|
||||||
assert(self_or_null != NULL);
|
assert(self_or_null != NULL);
|
||||||
assert(args != NULL);
|
assert(args != NULL);
|
||||||
|
|
|
@ -1596,14 +1596,25 @@
|
||||||
callable = stack_pointer[-2 - oparg];
|
callable = stack_pointer[-2 - oparg];
|
||||||
int argcount = oparg;
|
int argcount = oparg;
|
||||||
(void)callable;
|
(void)callable;
|
||||||
PyFunctionObject *func = (PyFunctionObject *)(this_instr + 2)->operand;
|
PyCodeObject *co = NULL;
|
||||||
DPRINTF(3, "func: %p ", func);
|
assert((this_instr + 2)->opcode == _PUSH_FRAME);
|
||||||
|
uintptr_t push_operand = (this_instr + 2)->operand;
|
||||||
|
if (push_operand & 1) {
|
||||||
|
co = (PyCodeObject *)(push_operand & ~1);
|
||||||
|
DPRINTF(3, "code=%p ", co);
|
||||||
|
assert(PyCode_Check(co));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
PyFunctionObject *func = (PyFunctionObject *)push_operand;
|
||||||
|
DPRINTF(3, "func=%p ", func);
|
||||||
if (func == NULL) {
|
if (func == NULL) {
|
||||||
DPRINTF(3, "\n");
|
DPRINTF(3, "\n");
|
||||||
DPRINTF(1, "Missing function\n");
|
DPRINTF(1, "Missing function\n");
|
||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
PyCodeObject *co = (PyCodeObject *)func->func_code;
|
co = (PyCodeObject *)func->func_code;
|
||||||
|
DPRINTF(3, "code=%p ", co);
|
||||||
|
}
|
||||||
assert(self_or_null != NULL);
|
assert(self_or_null != NULL);
|
||||||
assert(args != NULL);
|
assert(args != NULL);
|
||||||
if (sym_is_not_null(self_or_null)) {
|
if (sym_is_not_null(self_or_null)) {
|
||||||
|
|
Loading…
Reference in New Issue