2024-04-30 22:26:34 -03:00
|
|
|
#ifdef _Py_TIER2
|
|
|
|
|
2024-02-13 09:24:48 -04:00
|
|
|
/*
|
2024-02-26 12:42:53 -04:00
|
|
|
* This file contains the support code for CPython's uops optimizer.
|
2024-02-13 09:24:48 -04:00
|
|
|
* It also performs some simple optimizations.
|
|
|
|
* It performs a traditional data-flow analysis[1] over the trace of uops.
|
|
|
|
* Using the information gained, it chooses to emit, or skip certain instructions
|
|
|
|
* if possible.
|
|
|
|
*
|
|
|
|
* [1] For information on data-flow analysis, please see
|
|
|
|
* https://clang.llvm.org/docs/DataFlowAnalysisIntro.html
|
|
|
|
*
|
|
|
|
* */
|
2023-08-15 15:04:17 -03:00
|
|
|
#include "Python.h"
|
|
|
|
#include "opcode.h"
|
2024-02-02 08:14:34 -04:00
|
|
|
#include "pycore_dict.h"
|
2023-08-15 15:04:17 -03:00
|
|
|
#include "pycore_interp.h"
|
|
|
|
#include "pycore_opcode_metadata.h"
|
|
|
|
#include "pycore_opcode_utils.h"
|
|
|
|
#include "pycore_pystate.h" // _PyInterpreterState_GET()
|
2024-01-02 18:09:57 -04:00
|
|
|
#include "pycore_uop_metadata.h"
|
2024-02-02 08:14:34 -04:00
|
|
|
#include "pycore_dict.h"
|
2023-08-15 15:04:17 -03:00
|
|
|
#include "pycore_long.h"
|
|
|
|
#include "cpython/optimizer.h"
|
2024-02-13 09:24:48 -04:00
|
|
|
#include "pycore_optimizer.h"
|
|
|
|
#include "pycore_object.h"
|
|
|
|
#include "pycore_dict.h"
|
|
|
|
#include "pycore_function.h"
|
|
|
|
#include "pycore_uop_metadata.h"
|
|
|
|
#include "pycore_uop_ids.h"
|
|
|
|
#include "pycore_range.h"
|
|
|
|
|
|
|
|
#include <stdarg.h>
|
2023-08-15 15:04:17 -03:00
|
|
|
#include <stdbool.h>
|
|
|
|
#include <stdint.h>
|
|
|
|
#include <stddef.h>
|
2024-02-13 09:24:48 -04:00
|
|
|
|
|
|
|
#ifdef Py_DEBUG
|
2024-02-20 16:24:35 -04:00
|
|
|
extern const char *_PyUOpName(int index);
|
2024-03-18 15:08:43 -03:00
|
|
|
extern void _PyUOpPrint(const _PyUOpInstruction *uop);
|
2024-02-13 09:24:48 -04:00
|
|
|
static const char *const DEBUG_ENV = "PYTHON_OPT_DEBUG";
|
|
|
|
static inline int get_lltrace(void) {
|
|
|
|
char *uop_debug = Py_GETENV(DEBUG_ENV);
|
|
|
|
int lltrace = 0;
|
|
|
|
if (uop_debug != NULL && *uop_debug >= '0') {
|
|
|
|
lltrace = *uop_debug - '0'; // TODO: Parse an int and all that
|
|
|
|
}
|
|
|
|
return lltrace;
|
|
|
|
}
|
|
|
|
#define DPRINTF(level, ...) \
|
|
|
|
if (get_lltrace() >= (level)) { printf(__VA_ARGS__); }
|
|
|
|
#else
|
|
|
|
#define DPRINTF(level, ...)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static inline bool
|
|
|
|
op_is_end(uint32_t opcode)
|
|
|
|
{
|
|
|
|
return opcode == _EXIT_TRACE || opcode == _JUMP_TO_TOP;
|
|
|
|
}
|
2023-08-15 15:04:17 -03:00
|
|
|
|
2024-02-02 08:14:34 -04:00
|
|
|
static int
|
|
|
|
get_mutations(PyObject* dict) {
|
|
|
|
assert(PyDict_CheckExact(dict));
|
|
|
|
PyDictObject *d = (PyDictObject *)dict;
|
|
|
|
return (d->ma_version_tag >> DICT_MAX_WATCHERS) & ((1 << DICT_WATCHED_MUTATION_BITS)-1);
|
|
|
|
}
|
|
|
|
|
2024-01-24 08:08:31 -04:00
|
|
|
static void
|
2024-02-02 08:14:34 -04:00
|
|
|
increment_mutations(PyObject* dict) {
|
|
|
|
assert(PyDict_CheckExact(dict));
|
|
|
|
PyDictObject *d = (PyDictObject *)dict;
|
|
|
|
d->ma_version_tag += (1 << DICT_MAX_WATCHERS);
|
|
|
|
}
|
|
|
|
|
2024-02-12 12:07:38 -04:00
|
|
|
/* The first two dict watcher IDs are reserved for CPython,
|
|
|
|
* so we don't need to check that they haven't been used */
|
|
|
|
#define BUILTINS_WATCHER_ID 0
|
|
|
|
#define GLOBALS_WATCHER_ID 1
|
|
|
|
|
2024-02-02 08:14:34 -04:00
|
|
|
static int
|
|
|
|
globals_watcher_callback(PyDict_WatchEvent event, PyObject* dict,
|
|
|
|
PyObject* key, PyObject* new_value)
|
|
|
|
{
|
2024-02-12 12:07:38 -04:00
|
|
|
RARE_EVENT_STAT_INC(watched_globals_modification);
|
|
|
|
assert(get_mutations(dict) < _Py_MAX_ALLOWED_GLOBALS_MODIFICATIONS);
|
2024-02-26 13:51:47 -04:00
|
|
|
_Py_Executors_InvalidateDependency(_PyInterpreterState_GET(), dict, 1);
|
2024-02-12 12:07:38 -04:00
|
|
|
increment_mutations(dict);
|
|
|
|
PyDict_Unwatch(GLOBALS_WATCHER_ID, dict);
|
2024-02-02 08:14:34 -04:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2024-02-22 10:48:25 -04:00
|
|
|
static PyObject *
|
|
|
|
convert_global_to_const(_PyUOpInstruction *inst, PyObject *obj)
|
2024-02-02 08:14:34 -04:00
|
|
|
{
|
2024-02-22 10:48:25 -04:00
|
|
|
assert(inst->opcode == _LOAD_GLOBAL_MODULE || inst->opcode == _LOAD_GLOBAL_BUILTINS || inst->opcode == _LOAD_ATTR_MODULE);
|
2024-02-02 08:14:34 -04:00
|
|
|
assert(PyDict_CheckExact(obj));
|
|
|
|
PyDictObject *dict = (PyDictObject *)obj;
|
|
|
|
assert(dict->ma_keys->dk_kind == DICT_KEYS_UNICODE);
|
|
|
|
PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(dict->ma_keys);
|
|
|
|
assert(inst->operand <= UINT16_MAX);
|
2024-02-22 10:48:25 -04:00
|
|
|
if ((int)inst->operand >= dict->ma_keys->dk_nentries) {
|
|
|
|
return NULL;
|
|
|
|
}
|
2024-02-02 08:14:34 -04:00
|
|
|
PyObject *res = entries[inst->operand].me_value;
|
|
|
|
if (res == NULL) {
|
2024-02-22 10:48:25 -04:00
|
|
|
return NULL;
|
2024-02-02 08:14:34 -04:00
|
|
|
}
|
|
|
|
if (_Py_IsImmortal(res)) {
|
|
|
|
inst->opcode = (inst->oparg & 1) ? _LOAD_CONST_INLINE_BORROW_WITH_NULL : _LOAD_CONST_INLINE_BORROW;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
inst->opcode = (inst->oparg & 1) ? _LOAD_CONST_INLINE_WITH_NULL : _LOAD_CONST_INLINE;
|
|
|
|
}
|
|
|
|
inst->operand = (uint64_t)res;
|
2024-02-22 10:48:25 -04:00
|
|
|
return res;
|
2024-02-02 08:14:34 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
incorrect_keys(_PyUOpInstruction *inst, PyObject *obj)
|
2024-01-24 08:08:31 -04:00
|
|
|
{
|
2024-02-02 08:14:34 -04:00
|
|
|
if (!PyDict_CheckExact(obj)) {
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
PyDictObject *dict = (PyDictObject *)obj;
|
|
|
|
if (dict->ma_keys->dk_version != inst->operand) {
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Returns 1 if successfully optimized
|
|
|
|
* 0 if the trace is not suitable for optimization (yet)
|
|
|
|
* -1 if there was an error. */
|
|
|
|
static int
|
|
|
|
remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer,
|
|
|
|
int buffer_size, _PyBloomFilter *dependencies)
|
|
|
|
{
|
|
|
|
PyInterpreterState *interp = _PyInterpreterState_GET();
|
|
|
|
PyObject *builtins = frame->f_builtins;
|
|
|
|
if (builtins != interp->builtins) {
|
2024-03-21 14:27:46 -03:00
|
|
|
OPT_STAT_INC(remove_globals_builtins_changed);
|
2024-02-02 08:14:34 -04:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
PyObject *globals = frame->f_globals;
|
2024-03-08 05:47:41 -04:00
|
|
|
PyFunctionObject *function = (PyFunctionObject *)frame->f_funcobj;
|
|
|
|
assert(PyFunction_Check(function));
|
|
|
|
assert(function->func_builtins == builtins);
|
|
|
|
assert(function->func_globals == globals);
|
|
|
|
uint32_t function_version = _PyFunction_GetVersionForCurrentState(function);
|
2024-02-02 08:14:34 -04:00
|
|
|
/* In order to treat globals as constants, we need to
|
|
|
|
* know that the globals dict is the one we expected, and
|
|
|
|
* that it hasn't changed
|
|
|
|
* In order to treat builtins as constants, we need to
|
|
|
|
* know that the builtins dict is the one we expected, and
|
|
|
|
* that it hasn't changed and that the global dictionary's
|
|
|
|
* keys have not changed */
|
|
|
|
|
|
|
|
/* These values represent stacks of booleans (one bool per bit).
|
|
|
|
* Pushing a frame shifts left, popping a frame shifts right. */
|
2024-03-06 09:12:23 -04:00
|
|
|
uint32_t function_checked = 0;
|
2024-02-02 08:14:34 -04:00
|
|
|
uint32_t builtins_watched = 0;
|
|
|
|
uint32_t globals_watched = 0;
|
2024-03-06 09:12:23 -04:00
|
|
|
uint32_t prechecked_function_version = 0;
|
2024-02-12 12:07:38 -04:00
|
|
|
if (interp->dict_state.watchers[GLOBALS_WATCHER_ID] == NULL) {
|
|
|
|
interp->dict_state.watchers[GLOBALS_WATCHER_ID] = globals_watcher_callback;
|
2024-02-02 08:14:34 -04:00
|
|
|
}
|
|
|
|
for (int pc = 0; pc < buffer_size; pc++) {
|
|
|
|
_PyUOpInstruction *inst = &buffer[pc];
|
|
|
|
int opcode = inst->opcode;
|
|
|
|
switch(opcode) {
|
|
|
|
case _GUARD_BUILTINS_VERSION:
|
|
|
|
if (incorrect_keys(inst, builtins)) {
|
2024-03-21 14:27:46 -03:00
|
|
|
OPT_STAT_INC(remove_globals_incorrect_keys);
|
2024-02-02 08:14:34 -04:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if (interp->rare_events.builtin_dict >= _Py_MAX_ALLOWED_BUILTINS_MODIFICATIONS) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if ((builtins_watched & 1) == 0) {
|
|
|
|
PyDict_Watch(BUILTINS_WATCHER_ID, builtins);
|
|
|
|
builtins_watched |= 1;
|
|
|
|
}
|
2024-03-06 09:12:23 -04:00
|
|
|
if (function_checked & 1) {
|
2024-02-02 08:14:34 -04:00
|
|
|
buffer[pc].opcode = NOP;
|
|
|
|
}
|
|
|
|
else {
|
2024-03-06 09:12:23 -04:00
|
|
|
buffer[pc].opcode = _CHECK_FUNCTION;
|
2024-03-08 05:47:41 -04:00
|
|
|
buffer[pc].operand = function_version;
|
2024-03-06 09:12:23 -04:00
|
|
|
function_checked |= 1;
|
2024-02-02 08:14:34 -04:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case _GUARD_GLOBALS_VERSION:
|
|
|
|
if (incorrect_keys(inst, globals)) {
|
2024-03-21 14:27:46 -03:00
|
|
|
OPT_STAT_INC(remove_globals_incorrect_keys);
|
2024-02-02 08:14:34 -04:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
uint64_t watched_mutations = get_mutations(globals);
|
|
|
|
if (watched_mutations >= _Py_MAX_ALLOWED_GLOBALS_MODIFICATIONS) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if ((globals_watched & 1) == 0) {
|
|
|
|
PyDict_Watch(GLOBALS_WATCHER_ID, globals);
|
|
|
|
_Py_BloomFilter_Add(dependencies, globals);
|
|
|
|
globals_watched |= 1;
|
|
|
|
}
|
2024-03-06 09:12:23 -04:00
|
|
|
if (function_checked & 1) {
|
2024-02-02 08:14:34 -04:00
|
|
|
buffer[pc].opcode = NOP;
|
|
|
|
}
|
|
|
|
else {
|
2024-03-06 09:12:23 -04:00
|
|
|
buffer[pc].opcode = _CHECK_FUNCTION;
|
2024-03-08 05:47:41 -04:00
|
|
|
buffer[pc].operand = function_version;
|
2024-03-06 09:12:23 -04:00
|
|
|
function_checked |= 1;
|
2024-02-02 08:14:34 -04:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case _LOAD_GLOBAL_BUILTINS:
|
2024-03-06 09:12:23 -04:00
|
|
|
if (function_checked & globals_watched & builtins_watched & 1) {
|
2024-02-22 10:48:25 -04:00
|
|
|
convert_global_to_const(inst, builtins);
|
2024-02-02 08:14:34 -04:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case _LOAD_GLOBAL_MODULE:
|
2024-03-06 09:12:23 -04:00
|
|
|
if (function_checked & globals_watched & 1) {
|
2024-02-22 10:48:25 -04:00
|
|
|
convert_global_to_const(inst, globals);
|
2024-02-02 08:14:34 -04:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case _PUSH_FRAME:
|
|
|
|
{
|
|
|
|
builtins_watched <<= 1;
|
2024-03-06 09:12:23 -04:00
|
|
|
globals_watched <<= 1;
|
|
|
|
function_checked <<= 1;
|
gh-117045: Add code object to function version cache (#117028)
Changes to the function version cache:
- In addition to the function object, also store the code object,
and allow the latter to be retrieved even if the function has been evicted.
- Stop assigning new function versions after a critical attribute (e.g. `__code__`)
has been modified; the version is permanently reset to zero in this case.
- Changes to `__annotations__` are no longer considered critical. (This fixes gh-109998.)
Changes to the Tier 2 optimization machinery:
- If we cannot map a function version to a function, but it is still mapped to a code object,
we continue projecting the trace.
The operand of the `_PUSH_FRAME` and `_POP_FRAME` opcodes can be either NULL,
a function object, or a code object with the lowest bit set.
This allows us to trace through code that calls an ephemeral function,
i.e., a function that may not be alive when we are constructing the executor,
e.g. a generator expression or certain nested functions.
We will lose globals removal inside such functions,
but we can still do other peephole operations
(and even possibly [call inlining](https://github.com/python/cpython/pull/116290),
if we decide to do it), which only need the code object.
As before, if we cannot retrieve the code object from the cache, we stop projecting.
2024-03-21 16:37:41 -03:00
|
|
|
uint64_t operand = buffer[pc].operand;
|
|
|
|
if (operand == 0 || (operand & 1)) {
|
|
|
|
// It's either a code object or NULL, so bail
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
PyFunctionObject *func = (PyFunctionObject *)operand;
|
2024-02-02 08:14:34 -04:00
|
|
|
if (func == NULL) {
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
assert(PyFunction_Check(func));
|
2024-03-08 05:47:41 -04:00
|
|
|
function_version = func->func_version;
|
|
|
|
if (prechecked_function_version == function_version) {
|
2024-03-06 09:12:23 -04:00
|
|
|
function_checked |= 1;
|
|
|
|
}
|
|
|
|
prechecked_function_version = 0;
|
2024-02-02 08:14:34 -04:00
|
|
|
globals = func->func_globals;
|
|
|
|
builtins = func->func_builtins;
|
|
|
|
if (builtins != interp->builtins) {
|
2024-03-21 14:27:46 -03:00
|
|
|
OPT_STAT_INC(remove_globals_builtins_changed);
|
2024-02-02 08:14:34 -04:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case _POP_FRAME:
|
|
|
|
{
|
|
|
|
builtins_watched >>= 1;
|
2024-03-06 09:12:23 -04:00
|
|
|
globals_watched >>= 1;
|
|
|
|
function_checked >>= 1;
|
gh-117045: Add code object to function version cache (#117028)
Changes to the function version cache:
- In addition to the function object, also store the code object,
and allow the latter to be retrieved even if the function has been evicted.
- Stop assigning new function versions after a critical attribute (e.g. `__code__`)
has been modified; the version is permanently reset to zero in this case.
- Changes to `__annotations__` are no longer considered critical. (This fixes gh-109998.)
Changes to the Tier 2 optimization machinery:
- If we cannot map a function version to a function, but it is still mapped to a code object,
we continue projecting the trace.
The operand of the `_PUSH_FRAME` and `_POP_FRAME` opcodes can be either NULL,
a function object, or a code object with the lowest bit set.
This allows us to trace through code that calls an ephemeral function,
i.e., a function that may not be alive when we are constructing the executor,
e.g. a generator expression or certain nested functions.
We will lose globals removal inside such functions,
but we can still do other peephole operations
(and even possibly [call inlining](https://github.com/python/cpython/pull/116290),
if we decide to do it), which only need the code object.
As before, if we cannot retrieve the code object from the cache, we stop projecting.
2024-03-21 16:37:41 -03:00
|
|
|
uint64_t operand = buffer[pc].operand;
|
|
|
|
if (operand == 0 || (operand & 1)) {
|
|
|
|
// It's either a code object or NULL, so bail
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
PyFunctionObject *func = (PyFunctionObject *)operand;
|
|
|
|
if (func == NULL) {
|
|
|
|
return 1;
|
|
|
|
}
|
2024-02-02 08:14:34 -04:00
|
|
|
assert(PyFunction_Check(func));
|
2024-03-08 05:47:41 -04:00
|
|
|
function_version = func->func_version;
|
2024-02-02 08:14:34 -04:00
|
|
|
globals = func->func_globals;
|
|
|
|
builtins = func->func_builtins;
|
|
|
|
break;
|
|
|
|
}
|
2024-03-06 09:12:23 -04:00
|
|
|
case _CHECK_FUNCTION_EXACT_ARGS:
|
|
|
|
prechecked_function_version = (uint32_t)buffer[pc].operand;
|
|
|
|
break;
|
2024-02-13 09:24:48 -04:00
|
|
|
default:
|
|
|
|
if (op_is_end(opcode)) {
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#define STACK_LEVEL() ((int)(stack_pointer - ctx->frame->stack))
|
|
|
|
|
|
|
|
#define GETLOCAL(idx) ((ctx->frame->locals[idx]))
|
|
|
|
|
|
|
|
#define REPLACE_OP(INST, OP, ARG, OPERAND) \
|
|
|
|
INST->opcode = OP; \
|
|
|
|
INST->oparg = ARG; \
|
|
|
|
INST->operand = OPERAND;
|
|
|
|
|
2024-02-27 09:25:02 -04:00
|
|
|
/* Shortened forms for convenience, used in optimizer_bytecodes.c */
|
|
|
|
#define sym_is_not_null _Py_uop_sym_is_not_null
|
|
|
|
#define sym_is_const _Py_uop_sym_is_const
|
|
|
|
#define sym_get_const _Py_uop_sym_get_const
|
|
|
|
#define sym_new_unknown _Py_uop_sym_new_unknown
|
|
|
|
#define sym_new_not_null _Py_uop_sym_new_not_null
|
|
|
|
#define sym_new_type _Py_uop_sym_new_type
|
|
|
|
#define sym_is_null _Py_uop_sym_is_null
|
|
|
|
#define sym_new_const _Py_uop_sym_new_const
|
|
|
|
#define sym_new_null _Py_uop_sym_new_null
|
2024-03-05 11:06:00 -04:00
|
|
|
#define sym_has_type _Py_uop_sym_has_type
|
2024-04-22 09:34:06 -03:00
|
|
|
#define sym_get_type _Py_uop_sym_get_type
|
2024-02-27 09:25:02 -04:00
|
|
|
#define sym_matches_type _Py_uop_sym_matches_type
|
2024-05-10 13:43:23 -03:00
|
|
|
#define sym_set_null(SYM) _Py_uop_sym_set_null(ctx, SYM)
|
|
|
|
#define sym_set_non_null(SYM) _Py_uop_sym_set_non_null(ctx, SYM)
|
|
|
|
#define sym_set_type(SYM, TYPE) _Py_uop_sym_set_type(ctx, SYM, TYPE)
|
|
|
|
#define sym_set_const(SYM, CNST) _Py_uop_sym_set_const(ctx, SYM, CNST)
|
2024-02-28 18:38:01 -04:00
|
|
|
#define sym_is_bottom _Py_uop_sym_is_bottom
|
2024-03-05 07:23:46 -04:00
|
|
|
#define sym_truthiness _Py_uop_sym_truthiness
|
2024-02-27 09:25:02 -04:00
|
|
|
#define frame_new _Py_uop_frame_new
|
|
|
|
#define frame_pop _Py_uop_frame_pop
|
|
|
|
|
2024-03-05 07:23:46 -04:00
|
|
|
static int
|
|
|
|
optimize_to_bool(
|
|
|
|
_PyUOpInstruction *this_instr,
|
|
|
|
_Py_UOpsContext *ctx,
|
|
|
|
_Py_UopsSymbol *value,
|
|
|
|
_Py_UopsSymbol **result_ptr)
|
|
|
|
{
|
|
|
|
if (sym_matches_type(value, &PyBool_Type)) {
|
|
|
|
REPLACE_OP(this_instr, _NOP, 0, 0);
|
|
|
|
*result_ptr = value;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
int truthiness = sym_truthiness(value);
|
|
|
|
if (truthiness >= 0) {
|
|
|
|
PyObject *load = truthiness ? Py_True : Py_False;
|
|
|
|
REPLACE_OP(this_instr, _POP_TOP_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)load);
|
|
|
|
*result_ptr = sym_new_const(ctx, load);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
2024-02-27 09:25:02 -04:00
|
|
|
|
2024-03-05 11:06:00 -04:00
|
|
|
static void
|
|
|
|
eliminate_pop_guard(_PyUOpInstruction *this_instr, bool exit)
|
|
|
|
{
|
|
|
|
REPLACE_OP(this_instr, _POP_TOP, 0, 0);
|
|
|
|
if (exit) {
|
|
|
|
REPLACE_OP((this_instr+1), _EXIT_TRACE, 0, 0);
|
|
|
|
this_instr[1].target = this_instr->target;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-04-18 07:09:30 -03:00
|
|
|
/* _PUSH_FRAME/_POP_FRAME's operand can be 0, a PyFunctionObject *, or a
|
|
|
|
* PyCodeObject *. Retrieve the code object if possible.
|
|
|
|
*/
|
|
|
|
static PyCodeObject *
|
|
|
|
get_code(_PyUOpInstruction *op)
|
|
|
|
{
|
2024-04-25 07:32:47 -03:00
|
|
|
assert(op->opcode == _PUSH_FRAME || op->opcode == _POP_FRAME || op->opcode == _RETURN_GENERATOR);
|
2024-04-18 07:09:30 -03:00
|
|
|
PyCodeObject *co = NULL;
|
|
|
|
uint64_t operand = op->operand;
|
|
|
|
if (operand == 0) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
if (operand & 1) {
|
|
|
|
co = (PyCodeObject *)(operand & ~1);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
PyFunctionObject *func = (PyFunctionObject *)operand;
|
|
|
|
assert(PyFunction_Check(func));
|
|
|
|
co = (PyCodeObject *)func->func_code;
|
|
|
|
}
|
|
|
|
assert(PyCode_Check(co));
|
|
|
|
return co;
|
|
|
|
}
|
|
|
|
|
2024-02-13 09:24:48 -04:00
|
|
|
/* 1 for success, 0 for not ready, cannot error at the moment. */
|
|
|
|
static int
|
2024-02-26 12:42:53 -04:00
|
|
|
optimize_uops(
|
2024-02-13 09:24:48 -04:00
|
|
|
PyCodeObject *co,
|
|
|
|
_PyUOpInstruction *trace,
|
|
|
|
int trace_len,
|
2024-02-22 10:48:25 -04:00
|
|
|
int curr_stacklen,
|
|
|
|
_PyBloomFilter *dependencies
|
2024-02-13 09:24:48 -04:00
|
|
|
)
|
|
|
|
{
|
|
|
|
|
2024-02-27 09:25:02 -04:00
|
|
|
_Py_UOpsContext context;
|
|
|
|
_Py_UOpsContext *ctx = &context;
|
2024-03-21 14:27:46 -03:00
|
|
|
uint32_t opcode = UINT16_MAX;
|
2024-04-18 07:09:30 -03:00
|
|
|
int curr_space = 0;
|
|
|
|
int max_space = 0;
|
|
|
|
_PyUOpInstruction *first_valid_check_stack = NULL;
|
|
|
|
_PyUOpInstruction *corresponding_check_stack = NULL;
|
2024-02-13 09:24:48 -04:00
|
|
|
|
2024-05-10 13:43:23 -03:00
|
|
|
_Py_uop_abstractcontext_init(ctx);
|
2024-02-27 09:25:02 -04:00
|
|
|
_Py_UOpsAbstractFrame *frame = _Py_uop_frame_new(ctx, co, ctx->n_consumed, 0, curr_stacklen);
|
2024-02-27 06:51:26 -04:00
|
|
|
if (frame == NULL) {
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
ctx->curr_frame_depth++;
|
|
|
|
ctx->frame = frame;
|
2024-05-10 13:43:23 -03:00
|
|
|
ctx->done = false;
|
|
|
|
ctx->out_of_space = false;
|
|
|
|
ctx->contradiction = false;
|
2024-02-13 09:24:48 -04:00
|
|
|
|
2024-03-26 06:35:11 -03:00
|
|
|
_PyUOpInstruction *this_instr = NULL;
|
2024-05-10 13:43:23 -03:00
|
|
|
for (int i = 0; !ctx->done; i++) {
|
|
|
|
assert(i < trace_len);
|
2024-03-26 06:35:11 -03:00
|
|
|
this_instr = &trace[i];
|
2024-02-13 09:24:48 -04:00
|
|
|
|
|
|
|
int oparg = this_instr->oparg;
|
2024-03-21 14:27:46 -03:00
|
|
|
opcode = this_instr->opcode;
|
2024-02-27 09:25:02 -04:00
|
|
|
_Py_UopsSymbol **stack_pointer = ctx->frame->stack_pointer;
|
2024-02-13 09:24:48 -04:00
|
|
|
|
2024-03-18 15:08:43 -03:00
|
|
|
#ifdef Py_DEBUG
|
|
|
|
if (get_lltrace() >= 3) {
|
|
|
|
printf("%4d abs: ", (int)(this_instr - trace));
|
|
|
|
_PyUOpPrint(this_instr);
|
|
|
|
printf(" ");
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2024-02-13 09:24:48 -04:00
|
|
|
switch (opcode) {
|
2024-03-18 15:08:43 -03:00
|
|
|
|
2024-02-26 12:42:53 -04:00
|
|
|
#include "optimizer_cases.c.h"
|
2024-02-13 09:24:48 -04:00
|
|
|
|
|
|
|
default:
|
2024-03-18 15:08:43 -03:00
|
|
|
DPRINTF(1, "\nUnknown opcode in abstract interpreter\n");
|
2024-02-13 09:24:48 -04:00
|
|
|
Py_UNREACHABLE();
|
2024-02-02 08:14:34 -04:00
|
|
|
}
|
2024-02-13 09:24:48 -04:00
|
|
|
assert(ctx->frame != NULL);
|
|
|
|
DPRINTF(3, " stack_level %d\n", STACK_LEVEL());
|
|
|
|
ctx->frame->stack_pointer = stack_pointer;
|
|
|
|
assert(STACK_LEVEL() >= 0);
|
2024-02-02 08:14:34 -04:00
|
|
|
}
|
2024-05-10 13:43:23 -03:00
|
|
|
if (ctx->out_of_space) {
|
|
|
|
DPRINTF(3, "\n");
|
|
|
|
DPRINTF(1, "Out of space in abstract interpreter\n");
|
|
|
|
}
|
|
|
|
if (ctx->contradiction) {
|
|
|
|
// Attempted to push a "bottom" (contradiction) symbol onto the stack.
|
|
|
|
// This means that the abstract interpreter has hit unreachable code.
|
|
|
|
// We *could* generate an _EXIT_TRACE or _FATAL_ERROR here, but hitting
|
|
|
|
// bottom indicates type instability, so we are probably better off
|
|
|
|
// retrying later.
|
|
|
|
DPRINTF(3, "\n");
|
|
|
|
DPRINTF(1, "Hit bottom in abstract interpreter\n");
|
|
|
|
_Py_uop_abstractcontext_fini(ctx);
|
|
|
|
return 0;
|
2024-03-21 14:27:46 -03:00
|
|
|
}
|
2024-02-29 14:55:29 -04:00
|
|
|
|
2024-04-18 07:09:30 -03:00
|
|
|
/* Either reached the end or cannot optimize further, but there
|
|
|
|
* would be no benefit in retrying later */
|
2024-03-20 15:24:02 -03:00
|
|
|
_Py_uop_abstractcontext_fini(ctx);
|
2024-04-18 07:09:30 -03:00
|
|
|
if (first_valid_check_stack != NULL) {
|
|
|
|
assert(first_valid_check_stack->opcode == _CHECK_STACK_SPACE);
|
|
|
|
assert(max_space > 0);
|
|
|
|
assert(max_space <= INT_MAX);
|
|
|
|
assert(max_space <= INT32_MAX);
|
|
|
|
first_valid_check_stack->opcode = _CHECK_STACK_SPACE_OPERAND;
|
|
|
|
first_valid_check_stack->operand = max_space;
|
|
|
|
}
|
2024-03-26 06:35:11 -03:00
|
|
|
return trace_len;
|
2024-05-10 13:43:23 -03:00
|
|
|
|
|
|
|
error:
|
|
|
|
DPRINTF(3, "\n");
|
|
|
|
DPRINTF(1, "Encountered error in abstract interpreter\n");
|
|
|
|
if (opcode <= MAX_UOP_ID) {
|
|
|
|
OPT_ERROR_IN_OPCODE(opcode);
|
|
|
|
}
|
|
|
|
_Py_uop_abstractcontext_fini(ctx);
|
|
|
|
return -1;
|
|
|
|
|
2024-02-02 08:14:34 -04:00
|
|
|
}
|
|
|
|
|
2024-02-13 09:24:48 -04:00
|
|
|
|
2024-03-26 06:35:11 -03:00
|
|
|
static int
|
2024-02-13 09:24:48 -04:00
|
|
|
remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size)
|
|
|
|
{
|
2024-02-13 12:28:19 -04:00
|
|
|
/* Remove _SET_IP and _CHECK_VALIDITY where possible.
|
|
|
|
* _SET_IP is needed if the following instruction escapes or
|
|
|
|
* could error. _CHECK_VALIDITY is needed if the previous
|
|
|
|
* instruction could have escaped. */
|
2024-02-13 09:24:48 -04:00
|
|
|
int last_set_ip = -1;
|
2024-02-22 10:48:25 -04:00
|
|
|
bool may_have_escaped = true;
|
2024-02-13 09:24:48 -04:00
|
|
|
for (int pc = 0; pc < buffer_size; pc++) {
|
|
|
|
int opcode = buffer[pc].opcode;
|
2024-02-13 12:28:19 -04:00
|
|
|
switch (opcode) {
|
2024-04-19 05:26:42 -03:00
|
|
|
case _START_EXECUTOR:
|
|
|
|
may_have_escaped = false;
|
|
|
|
break;
|
2024-02-13 12:28:19 -04:00
|
|
|
case _SET_IP:
|
2024-03-11 10:37:48 -03:00
|
|
|
buffer[pc].opcode = _NOP;
|
2024-02-13 12:28:19 -04:00
|
|
|
last_set_ip = pc;
|
|
|
|
break;
|
|
|
|
case _CHECK_VALIDITY:
|
|
|
|
if (may_have_escaped) {
|
|
|
|
may_have_escaped = false;
|
2024-02-13 09:24:48 -04:00
|
|
|
}
|
2024-02-13 12:28:19 -04:00
|
|
|
else {
|
2024-03-11 10:37:48 -03:00
|
|
|
buffer[pc].opcode = _NOP;
|
2024-02-13 12:28:19 -04:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case _CHECK_VALIDITY_AND_SET_IP:
|
|
|
|
if (may_have_escaped) {
|
|
|
|
may_have_escaped = false;
|
|
|
|
buffer[pc].opcode = _CHECK_VALIDITY;
|
|
|
|
}
|
|
|
|
else {
|
2024-03-11 10:37:48 -03:00
|
|
|
buffer[pc].opcode = _NOP;
|
2024-02-13 12:28:19 -04:00
|
|
|
}
|
|
|
|
last_set_ip = pc;
|
|
|
|
break;
|
2024-02-22 10:48:25 -04:00
|
|
|
case _POP_TOP:
|
|
|
|
{
|
|
|
|
_PyUOpInstruction *last = &buffer[pc-1];
|
|
|
|
while (last->opcode == _NOP) {
|
|
|
|
last--;
|
|
|
|
}
|
|
|
|
if (last->opcode == _LOAD_CONST_INLINE ||
|
|
|
|
last->opcode == _LOAD_CONST_INLINE_BORROW ||
|
|
|
|
last->opcode == _LOAD_FAST ||
|
|
|
|
last->opcode == _COPY
|
|
|
|
) {
|
|
|
|
last->opcode = _NOP;
|
2024-03-11 10:37:48 -03:00
|
|
|
buffer[pc].opcode = _NOP;
|
2024-02-22 10:48:25 -04:00
|
|
|
}
|
2024-03-05 11:23:08 -04:00
|
|
|
if (last->opcode == _REPLACE_WITH_TRUE) {
|
|
|
|
last->opcode = _NOP;
|
|
|
|
}
|
2024-02-22 10:48:25 -04:00
|
|
|
break;
|
|
|
|
}
|
2024-02-13 12:28:19 -04:00
|
|
|
case _JUMP_TO_TOP:
|
|
|
|
case _EXIT_TRACE:
|
2024-03-26 06:35:11 -03:00
|
|
|
return pc + 1;
|
2024-02-13 12:28:19 -04:00
|
|
|
default:
|
|
|
|
{
|
2024-04-19 05:25:07 -03:00
|
|
|
/* _PUSH_FRAME doesn't escape or error, but it
|
|
|
|
* does need the IP for the return address */
|
|
|
|
bool needs_ip = opcode == _PUSH_FRAME;
|
2024-02-13 12:28:19 -04:00
|
|
|
if (_PyUop_Flags[opcode] & HAS_ESCAPES_FLAG) {
|
|
|
|
needs_ip = true;
|
|
|
|
may_have_escaped = true;
|
|
|
|
}
|
|
|
|
if (needs_ip && last_set_ip >= 0) {
|
|
|
|
if (buffer[last_set_ip].opcode == _CHECK_VALIDITY) {
|
|
|
|
buffer[last_set_ip].opcode = _CHECK_VALIDITY_AND_SET_IP;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
assert(buffer[last_set_ip].opcode == _NOP);
|
|
|
|
buffer[last_set_ip].opcode = _SET_IP;
|
|
|
|
}
|
|
|
|
last_set_ip = -1;
|
2024-02-13 09:24:48 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2024-03-26 06:35:11 -03:00
|
|
|
Py_UNREACHABLE();
|
2024-02-13 09:24:48 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
// 0 - failure, no error raised, just fall back to Tier 1
|
|
|
|
// -1 - failure, and raise error
|
2024-03-26 06:35:11 -03:00
|
|
|
// > 0 - length of optimized trace
|
2023-08-15 15:04:17 -03:00
|
|
|
int
|
|
|
|
_Py_uop_analyze_and_optimize(
|
2024-02-02 08:14:34 -04:00
|
|
|
_PyInterpreterFrame *frame,
|
2023-11-06 07:28:52 -04:00
|
|
|
_PyUOpInstruction *buffer,
|
2024-03-26 06:35:11 -03:00
|
|
|
int length,
|
2024-02-02 08:14:34 -04:00
|
|
|
int curr_stacklen,
|
|
|
|
_PyBloomFilter *dependencies
|
2023-08-15 15:04:17 -03:00
|
|
|
)
|
|
|
|
{
|
2024-02-13 09:24:48 -04:00
|
|
|
OPT_STAT_INC(optimizer_attempts);
|
|
|
|
|
2024-03-26 06:35:11 -03:00
|
|
|
int err = remove_globals(frame, buffer, length, dependencies);
|
|
|
|
if (err <= 0) {
|
|
|
|
return err;
|
2024-02-02 08:14:34 -04:00
|
|
|
}
|
2024-02-13 09:24:48 -04:00
|
|
|
|
2024-03-26 06:35:11 -03:00
|
|
|
length = optimize_uops(
|
gh-117045: Add code object to function version cache (#117028)
Changes to the function version cache:
- In addition to the function object, also store the code object,
and allow the latter to be retrieved even if the function has been evicted.
- Stop assigning new function versions after a critical attribute (e.g. `__code__`)
has been modified; the version is permanently reset to zero in this case.
- Changes to `__annotations__` are no longer considered critical. (This fixes gh-109998.)
Changes to the Tier 2 optimization machinery:
- If we cannot map a function version to a function, but it is still mapped to a code object,
we continue projecting the trace.
The operand of the `_PUSH_FRAME` and `_POP_FRAME` opcodes can be either NULL,
a function object, or a code object with the lowest bit set.
This allows us to trace through code that calls an ephemeral function,
i.e., a function that may not be alive when we are constructing the executor,
e.g. a generator expression or certain nested functions.
We will lose globals removal inside such functions,
but we can still do other peephole operations
(and even possibly [call inlining](https://github.com/python/cpython/pull/116290),
if we decide to do it), which only need the code object.
As before, if we cannot retrieve the code object from the cache, we stop projecting.
2024-03-21 16:37:41 -03:00
|
|
|
_PyFrame_GetCode(frame), buffer,
|
2024-03-26 06:35:11 -03:00
|
|
|
length, curr_stacklen, dependencies);
|
2024-02-13 09:24:48 -04:00
|
|
|
|
2024-03-26 06:35:11 -03:00
|
|
|
if (length <= 0) {
|
|
|
|
return length;
|
2024-02-13 09:24:48 -04:00
|
|
|
}
|
|
|
|
|
2024-03-26 06:35:11 -03:00
|
|
|
length = remove_unneeded_uops(buffer, length);
|
|
|
|
assert(length > 0);
|
2024-02-13 09:24:48 -04:00
|
|
|
|
|
|
|
OPT_STAT_INC(optimizer_successes);
|
2024-03-26 06:35:11 -03:00
|
|
|
return length;
|
2023-08-15 15:04:17 -03:00
|
|
|
}
|
2024-04-30 22:26:34 -03:00
|
|
|
|
|
|
|
#endif /* _Py_TIER2 */
|