mirror of https://github.com/python/cpython
GH-113710: Backedge counter improvements. (GH-115166)
This commit is contained in:
parent
7cce857622
commit
f9f6156c5a
|
@ -71,6 +71,8 @@ typedef struct {
|
|||
|
||||
PyAPI_FUNC(int) PyUnstable_Replace_Executor(PyCodeObject *code, _Py_CODEUNIT *instr, _PyExecutorObject *executor);
|
||||
|
||||
_PyOptimizerObject *_Py_SetOptimizer(PyInterpreterState *interp, _PyOptimizerObject* optimizer);
|
||||
|
||||
PyAPI_FUNC(void) PyUnstable_SetOptimizer(_PyOptimizerObject* optimizer);
|
||||
|
||||
PyAPI_FUNC(_PyOptimizerObject *) PyUnstable_GetOptimizer(void);
|
||||
|
@ -80,8 +82,6 @@ PyAPI_FUNC(_PyExecutorObject *) PyUnstable_GetExecutor(PyCodeObject *code, int o
|
|||
int
|
||||
_PyOptimizer_Optimize(struct _PyInterpreterFrame *frame, _Py_CODEUNIT *start, PyObject **stack_pointer);
|
||||
|
||||
extern _PyOptimizerObject _PyOptimizer_Default;
|
||||
|
||||
void _Py_ExecutorInit(_PyExecutorObject *, _PyBloomFilter *);
|
||||
void _Py_ExecutorClear(_PyExecutorObject *);
|
||||
void _Py_BloomFilter_Init(_PyBloomFilter *);
|
||||
|
@ -96,7 +96,11 @@ PyAPI_FUNC(PyObject *)PyUnstable_Optimizer_NewUOpOptimizer(void);
|
|||
|
||||
#define OPTIMIZER_BITS_IN_COUNTER 4
|
||||
/* Minimum of 16 additional executions before retry */
|
||||
#define MINIMUM_TIER2_BACKOFF 4
|
||||
#define MIN_TIER2_BACKOFF 4
|
||||
#define MAX_TIER2_BACKOFF (15 - OPTIMIZER_BITS_IN_COUNTER)
|
||||
#define OPTIMIZER_BITS_MASK ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1)
|
||||
/* A value <= UINT16_MAX but large enough that when shifted is > UINT16_MAX */
|
||||
#define OPTIMIZER_UNREACHABLE_THRESHOLD UINT16_MAX
|
||||
|
||||
#define _Py_MAX_ALLOWED_BUILTINS_MODIFICATIONS 3
|
||||
#define _Py_MAX_ALLOWED_GLOBALS_MODIFICATIONS 6
|
||||
|
|
|
@ -239,8 +239,10 @@ struct _is {
|
|||
struct callable_cache callable_cache;
|
||||
_PyOptimizerObject *optimizer;
|
||||
_PyExecutorObject *executor_list_head;
|
||||
uint16_t optimizer_resume_threshold;
|
||||
uint16_t optimizer_backedge_threshold;
|
||||
/* These values are shifted and offset to speed up check in JUMP_BACKWARD */
|
||||
uint32_t optimizer_resume_threshold;
|
||||
uint32_t optimizer_backedge_threshold;
|
||||
|
||||
uint32_t next_func_version;
|
||||
_rare_events rare_events;
|
||||
PyDict_WatchCallback builtins_dict_watcher;
|
||||
|
|
|
@ -2318,13 +2318,16 @@ dummy_func(
|
|||
assert(oparg <= INSTR_OFFSET());
|
||||
JUMPBY(-oparg);
|
||||
#if ENABLE_SPECIALIZATION
|
||||
this_instr[1].cache += (1 << OPTIMIZER_BITS_IN_COUNTER);
|
||||
uint16_t counter = this_instr[1].cache;
|
||||
this_instr[1].cache = counter + (1 << OPTIMIZER_BITS_IN_COUNTER);
|
||||
/* We are using unsigned values, but we really want signed values, so
|
||||
* do the 2s complement comparison manually */
|
||||
uint16_t ucounter = this_instr[1].cache + (1 << 15);
|
||||
uint16_t threshold = tstate->interp->optimizer_backedge_threshold + (1 << 15);
|
||||
* do the 2s complement adjustment manually */
|
||||
uint32_t offset_counter = counter ^ (1 << 15);
|
||||
uint32_t threshold = tstate->interp->optimizer_backedge_threshold;
|
||||
assert((threshold & OPTIMIZER_BITS_MASK) == 0);
|
||||
// Use '>=' not '>' so that the optimizer/backoff bits do not effect the result.
|
||||
// Double-check that the opcode isn't instrumented or something:
|
||||
if (ucounter > threshold && this_instr->op.code == JUMP_BACKWARD) {
|
||||
if (offset_counter >= threshold && this_instr->op.code == JUMP_BACKWARD) {
|
||||
OPT_STAT_INC(attempts);
|
||||
_Py_CODEUNIT *start = this_instr;
|
||||
/* Back up over EXTENDED_ARGs so optimizer sees the whole instruction */
|
||||
|
@ -2338,18 +2341,18 @@ dummy_func(
|
|||
// Rewind and enter the executor:
|
||||
assert(start->op.code == ENTER_EXECUTOR);
|
||||
next_instr = start;
|
||||
this_instr[1].cache &= ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1);
|
||||
this_instr[1].cache &= OPTIMIZER_BITS_MASK;
|
||||
}
|
||||
else {
|
||||
int backoff = this_instr[1].cache & ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1);
|
||||
if (backoff < MINIMUM_TIER2_BACKOFF) {
|
||||
backoff = MINIMUM_TIER2_BACKOFF;
|
||||
int backoff = this_instr[1].cache & OPTIMIZER_BITS_MASK;
|
||||
backoff++;
|
||||
if (backoff < MIN_TIER2_BACKOFF) {
|
||||
backoff = MIN_TIER2_BACKOFF;
|
||||
}
|
||||
else if (backoff < 15 - OPTIMIZER_BITS_IN_COUNTER) {
|
||||
backoff++;
|
||||
else if (backoff > MAX_TIER2_BACKOFF) {
|
||||
backoff = MAX_TIER2_BACKOFF;
|
||||
}
|
||||
assert(backoff <= 15 - OPTIMIZER_BITS_IN_COUNTER);
|
||||
this_instr[1].cache = ((1 << 16) - ((1 << OPTIMIZER_BITS_IN_COUNTER) << backoff)) | backoff;
|
||||
this_instr[1].cache = ((UINT16_MAX << OPTIMIZER_BITS_IN_COUNTER) << backoff) | backoff;
|
||||
}
|
||||
}
|
||||
#endif /* ENABLE_SPECIALIZATION */
|
||||
|
|
|
@ -3263,13 +3263,16 @@
|
|||
assert(oparg <= INSTR_OFFSET());
|
||||
JUMPBY(-oparg);
|
||||
#if ENABLE_SPECIALIZATION
|
||||
this_instr[1].cache += (1 << OPTIMIZER_BITS_IN_COUNTER);
|
||||
uint16_t counter = this_instr[1].cache;
|
||||
this_instr[1].cache = counter + (1 << OPTIMIZER_BITS_IN_COUNTER);
|
||||
/* We are using unsigned values, but we really want signed values, so
|
||||
* do the 2s complement comparison manually */
|
||||
uint16_t ucounter = this_instr[1].cache + (1 << 15);
|
||||
uint16_t threshold = tstate->interp->optimizer_backedge_threshold + (1 << 15);
|
||||
* do the 2s complement adjustment manually */
|
||||
uint32_t offset_counter = counter ^ (1 << 15);
|
||||
uint32_t threshold = tstate->interp->optimizer_backedge_threshold;
|
||||
assert((threshold & OPTIMIZER_BITS_MASK) == 0);
|
||||
// Use '>=' not '>' so that the optimizer/backoff bits do not effect the result.
|
||||
// Double-check that the opcode isn't instrumented or something:
|
||||
if (ucounter > threshold && this_instr->op.code == JUMP_BACKWARD) {
|
||||
if (offset_counter >= threshold && this_instr->op.code == JUMP_BACKWARD) {
|
||||
OPT_STAT_INC(attempts);
|
||||
_Py_CODEUNIT *start = this_instr;
|
||||
/* Back up over EXTENDED_ARGs so optimizer sees the whole instruction */
|
||||
|
@ -3283,18 +3286,18 @@
|
|||
// Rewind and enter the executor:
|
||||
assert(start->op.code == ENTER_EXECUTOR);
|
||||
next_instr = start;
|
||||
this_instr[1].cache &= ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1);
|
||||
this_instr[1].cache &= OPTIMIZER_BITS_MASK;
|
||||
}
|
||||
else {
|
||||
int backoff = this_instr[1].cache & ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1);
|
||||
if (backoff < MINIMUM_TIER2_BACKOFF) {
|
||||
backoff = MINIMUM_TIER2_BACKOFF;
|
||||
int backoff = this_instr[1].cache & OPTIMIZER_BITS_MASK;
|
||||
backoff++;
|
||||
if (backoff < MIN_TIER2_BACKOFF) {
|
||||
backoff = MIN_TIER2_BACKOFF;
|
||||
}
|
||||
else if (backoff < 15 - OPTIMIZER_BITS_IN_COUNTER) {
|
||||
backoff++;
|
||||
else if (backoff > MAX_TIER2_BACKOFF) {
|
||||
backoff = MAX_TIER2_BACKOFF;
|
||||
}
|
||||
assert(backoff <= 15 - OPTIMIZER_BITS_IN_COUNTER);
|
||||
this_instr[1].cache = ((1 << 16) - ((1 << OPTIMIZER_BITS_IN_COUNTER) << backoff)) | backoff;
|
||||
this_instr[1].cache = ((UINT16_MAX << OPTIMIZER_BITS_IN_COUNTER) << backoff) | backoff;
|
||||
}
|
||||
}
|
||||
#endif /* ENABLE_SPECIALIZATION */
|
||||
|
|
|
@ -109,6 +109,9 @@ never_optimize(
|
|||
_PyExecutorObject **exec,
|
||||
int Py_UNUSED(stack_entries))
|
||||
{
|
||||
/* Although it should be benign for this to be called,
|
||||
* it shouldn't happen, so fail in debug builds. */
|
||||
assert(0 && "never optimize should never be called");
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -120,13 +123,19 @@ PyTypeObject _PyDefaultOptimizer_Type = {
|
|||
.tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION,
|
||||
};
|
||||
|
||||
_PyOptimizerObject _PyOptimizer_Default = {
|
||||
static _PyOptimizerObject _PyOptimizer_Default = {
|
||||
PyObject_HEAD_INIT(&_PyDefaultOptimizer_Type)
|
||||
.optimize = never_optimize,
|
||||
.resume_threshold = INT16_MAX,
|
||||
.backedge_threshold = INT16_MAX,
|
||||
.resume_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD,
|
||||
.backedge_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD,
|
||||
};
|
||||
|
||||
static uint32_t
|
||||
shift_and_offset_threshold(uint16_t threshold)
|
||||
{
|
||||
return (threshold << OPTIMIZER_BITS_IN_COUNTER) + (1 << 15);
|
||||
}
|
||||
|
||||
_PyOptimizerObject *
|
||||
PyUnstable_GetOptimizer(void)
|
||||
{
|
||||
|
@ -134,24 +143,33 @@ PyUnstable_GetOptimizer(void)
|
|||
if (interp->optimizer == &_PyOptimizer_Default) {
|
||||
return NULL;
|
||||
}
|
||||
assert(interp->optimizer_backedge_threshold == interp->optimizer->backedge_threshold);
|
||||
assert(interp->optimizer_resume_threshold == interp->optimizer->resume_threshold);
|
||||
assert(interp->optimizer_backedge_threshold ==
|
||||
shift_and_offset_threshold(interp->optimizer->backedge_threshold));
|
||||
assert(interp->optimizer_resume_threshold ==
|
||||
shift_and_offset_threshold(interp->optimizer->resume_threshold));
|
||||
Py_INCREF(interp->optimizer);
|
||||
return interp->optimizer;
|
||||
}
|
||||
|
||||
void
|
||||
PyUnstable_SetOptimizer(_PyOptimizerObject *optimizer)
|
||||
_PyOptimizerObject *
|
||||
_Py_SetOptimizer(PyInterpreterState *interp, _PyOptimizerObject *optimizer)
|
||||
{
|
||||
PyInterpreterState *interp = _PyInterpreterState_GET();
|
||||
if (optimizer == NULL) {
|
||||
optimizer = &_PyOptimizer_Default;
|
||||
}
|
||||
_PyOptimizerObject *old = interp->optimizer;
|
||||
Py_INCREF(optimizer);
|
||||
interp->optimizer = optimizer;
|
||||
interp->optimizer_backedge_threshold = optimizer->backedge_threshold;
|
||||
interp->optimizer_resume_threshold = optimizer->resume_threshold;
|
||||
interp->optimizer_backedge_threshold = shift_and_offset_threshold(optimizer->backedge_threshold);
|
||||
interp->optimizer_resume_threshold = shift_and_offset_threshold(optimizer->resume_threshold);
|
||||
return old;
|
||||
}
|
||||
|
||||
void
|
||||
PyUnstable_SetOptimizer(_PyOptimizerObject *optimizer)
|
||||
{
|
||||
PyInterpreterState *interp = _PyInterpreterState_GET();
|
||||
_PyOptimizerObject *old = _Py_SetOptimizer(interp, optimizer);
|
||||
Py_DECREF(old);
|
||||
}
|
||||
|
||||
|
@ -860,10 +878,10 @@ PyUnstable_Optimizer_NewUOpOptimizer(void)
|
|||
return NULL;
|
||||
}
|
||||
opt->optimize = uop_optimize;
|
||||
opt->resume_threshold = INT16_MAX;
|
||||
// Need at least 3 iterations to settle specializations.
|
||||
// A few lower bits of the counter are reserved for other flags.
|
||||
opt->backedge_threshold = 16 << OPTIMIZER_BITS_IN_COUNTER;
|
||||
opt->resume_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD;
|
||||
// Need a few iterations to settle specializations,
|
||||
// and to ammortize the cost of optimization.
|
||||
opt->backedge_threshold = 16;
|
||||
return (PyObject *)opt;
|
||||
}
|
||||
|
||||
|
@ -950,7 +968,7 @@ PyUnstable_Optimizer_NewCounter(void)
|
|||
return NULL;
|
||||
}
|
||||
opt->base.optimize = counter_optimize;
|
||||
opt->base.resume_threshold = INT16_MAX;
|
||||
opt->base.resume_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD;
|
||||
opt->base.backedge_threshold = 0;
|
||||
opt->count = 0;
|
||||
return (PyObject *)opt;
|
||||
|
|
|
@ -1627,8 +1627,8 @@ finalize_modules(PyThreadState *tstate)
|
|||
|
||||
// Invalidate all executors and turn off tier 2 optimizer
|
||||
_Py_Executors_InvalidateAll(interp);
|
||||
Py_XDECREF(interp->optimizer);
|
||||
interp->optimizer = &_PyOptimizer_Default;
|
||||
_PyOptimizerObject *old = _Py_SetOptimizer(interp, NULL);
|
||||
Py_XDECREF(old);
|
||||
|
||||
// Stop watching __builtin__ modifications
|
||||
PyDict_Unwatch(0, interp->builtins);
|
||||
|
|
|
@ -625,9 +625,7 @@ init_interpreter(PyInterpreterState *interp,
|
|||
}
|
||||
interp->sys_profile_initialized = false;
|
||||
interp->sys_trace_initialized = false;
|
||||
interp->optimizer = &_PyOptimizer_Default;
|
||||
interp->optimizer_backedge_threshold = _PyOptimizer_Default.backedge_threshold;
|
||||
interp->optimizer_resume_threshold = _PyOptimizer_Default.backedge_threshold;
|
||||
(void)_Py_SetOptimizer(interp, NULL);
|
||||
interp->next_func_version = 1;
|
||||
interp->executor_list_head = NULL;
|
||||
if (interp != &runtime->_main_interpreter) {
|
||||
|
@ -780,10 +778,8 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate)
|
|||
tstate->_status.cleared = 0;
|
||||
}
|
||||
|
||||
Py_CLEAR(interp->optimizer);
|
||||
interp->optimizer = &_PyOptimizer_Default;
|
||||
interp->optimizer_backedge_threshold = _PyOptimizer_Default.backedge_threshold;
|
||||
interp->optimizer_resume_threshold = _PyOptimizer_Default.backedge_threshold;
|
||||
_PyOptimizerObject *old = _Py_SetOptimizer(interp, NULL);
|
||||
Py_DECREF(old);
|
||||
|
||||
/* It is possible that any of the objects below have a finalizer
|
||||
that runs Python code or otherwise relies on a thread state
|
||||
|
|
Loading…
Reference in New Issue