gh-116968: Reimplement Tier 2 counters (#117144)

Introduce a unified 16-bit backoff counter type (``_Py_BackoffCounter``),
shared between the Tier 1 adaptive specializer and the Tier 2 optimizer. The
API used for adaptive specialization counters is changed but the behavior is
(supposed to be) identical.

The behavior of the Tier 2 counters is changed:
- There are no longer dynamic thresholds (we never varied these).
- All counters now use the same exponential backoff.
- The counter for ``JUMP_BACKWARD`` starts counting down from 16.
- The ``temperature`` in side exits starts counting down from 64.
This commit is contained in:
Guido van Rossum 2024-04-04 08:03:27 -07:00 committed by GitHub
parent 63bbe77d9b
commit 060a96f1a9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
19 changed files with 313 additions and 235 deletions

View File

@ -24,6 +24,16 @@ typedef struct _Py_GlobalMonitors {
uint8_t tools[_PY_MONITORING_UNGROUPED_EVENTS];
} _Py_GlobalMonitors;
typedef struct {
union {
struct {
uint16_t backoff : 4;
uint16_t value : 12;
};
uint16_t as_counter; // For printf("%#x", ...)
};
} _Py_BackoffCounter;
/* Each instruction in a code object is a fixed-width value,
* currently 2 bytes: 1-byte opcode + 1-byte oparg. The EXTENDED_ARG
* opcode allows for larger values but the current limit is 3 uses
@ -39,6 +49,7 @@ typedef union {
uint8_t code;
uint8_t arg;
} op;
_Py_BackoffCounter counter; // First cache entry of specializable op
} _Py_CODEUNIT;

View File

@ -89,7 +89,7 @@ static inline uint16_t uop_get_error_target(const _PyUOpInstruction *inst)
typedef struct _exit_data {
uint32_t target;
int16_t temperature;
_Py_BackoffCounter temperature;
const struct _PyExecutorObject *executor;
} _PyExitData;
@ -115,11 +115,6 @@ typedef int (*optimize_func)(
struct _PyOptimizerObject {
PyObject_HEAD
optimize_func optimize;
/* These thresholds are treated as signed so do not exceed INT16_MAX
* Use INT16_MAX to indicate that the optimizer should never be called */
uint16_t resume_threshold;
uint16_t side_threshold;
uint16_t backedge_threshold;
/* Data needed by the optimizer goes here, but is opaque to the VM */
};
@ -151,14 +146,6 @@ extern void _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_inval
PyAPI_FUNC(PyObject *)PyUnstable_Optimizer_NewCounter(void);
PyAPI_FUNC(PyObject *)PyUnstable_Optimizer_NewUOpOptimizer(void);
#define OPTIMIZER_BITS_IN_COUNTER 4
/* Minimum of 16 additional executions before retry */
#define MIN_TIER2_BACKOFF 4
#define MAX_TIER2_BACKOFF (15 - OPTIMIZER_BITS_IN_COUNTER)
#define OPTIMIZER_BITS_MASK ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1)
/* A value <= UINT16_MAX but large enough that when shifted is > UINT16_MAX */
#define OPTIMIZER_UNREACHABLE_THRESHOLD UINT16_MAX
#define _Py_MAX_ALLOWED_BUILTINS_MODIFICATIONS 3
#define _Py_MAX_ALLOWED_GLOBALS_MODIFICATIONS 6

View File

@ -0,0 +1,128 @@
#ifndef Py_INTERNAL_BACKOFF_H
#define Py_INTERNAL_BACKOFF_H
#ifdef __cplusplus
extern "C" {
#endif
#ifndef Py_BUILD_CORE
# error "this header requires Py_BUILD_CORE define"
#endif
#include <assert.h>
#include <stdbool.h>
#include <stdint.h>
/* 16-bit countdown counters using exponential backoff.
These are used by the adaptive specializer to count down until
it is time to specialize an instruction. If specialization fails
the counter is reset using exponential backoff.
Another use is for the Tier 2 optimizer to decide when to create
a new Tier 2 trace (executor). Again, exponential backoff is used.
The 16-bit counter is structured as a 12-bit unsigned 'value'
and a 4-bit 'backoff' field. When resetting the counter, the
backoff field is incremented (until it reaches a limit) and the
value is set to a bit mask representing the value 2**backoff - 1.
The maximum backoff is 12 (the number of value bits).
There is an exceptional value which must not be updated, 0xFFFF.
*/
#define UNREACHABLE_BACKOFF 0xFFFF
static inline bool
is_unreachable_backoff_counter(_Py_BackoffCounter counter)
{
return counter.as_counter == UNREACHABLE_BACKOFF;
}
static inline _Py_BackoffCounter
make_backoff_counter(uint16_t value, uint16_t backoff)
{
assert(backoff <= 15);
assert(value <= 0xFFF);
return (_Py_BackoffCounter){.value = value, .backoff = backoff};
}
static inline _Py_BackoffCounter
forge_backoff_counter(uint16_t counter)
{
return (_Py_BackoffCounter){.as_counter = counter};
}
static inline _Py_BackoffCounter
restart_backoff_counter(_Py_BackoffCounter counter)
{
assert(!is_unreachable_backoff_counter(counter));
if (counter.backoff < 12) {
return make_backoff_counter((1 << (counter.backoff + 1)) - 1, counter.backoff + 1);
}
else {
return make_backoff_counter((1 << 12) - 1, 12);
}
}
static inline _Py_BackoffCounter
pause_backoff_counter(_Py_BackoffCounter counter)
{
return make_backoff_counter(counter.value | 1, counter.backoff);
}
static inline _Py_BackoffCounter
advance_backoff_counter(_Py_BackoffCounter counter)
{
if (!is_unreachable_backoff_counter(counter)) {
return make_backoff_counter((counter.value - 1) & 0xFFF, counter.backoff);
}
else {
return counter;
}
}
static inline bool
backoff_counter_triggers(_Py_BackoffCounter counter)
{
return counter.value == 0;
}
/* Initial JUMP_BACKWARD counter.
* This determines when we create a trace for a loop.
* Backoff sequence 16, 32, 64, 128, 256, 512, 1024, 2048, 4096. */
#define JUMP_BACKWARD_INITIAL_VALUE 16
#define JUMP_BACKWARD_INITIAL_BACKOFF 4
static inline _Py_BackoffCounter
initial_jump_backoff_counter(void)
{
return make_backoff_counter(JUMP_BACKWARD_INITIAL_VALUE,
JUMP_BACKWARD_INITIAL_BACKOFF);
}
/* Initial exit temperature.
* Must be larger than ADAPTIVE_COOLDOWN_VALUE,
* otherwise when a side exit warms up we may construct
* a new trace before the Tier 1 code has properly re-specialized.
* Backoff sequence 64, 128, 256, 512, 1024, 2048, 4096. */
#define COLD_EXIT_INITIAL_VALUE 64
#define COLD_EXIT_INITIAL_BACKOFF 6
static inline _Py_BackoffCounter
initial_temperature_backoff_counter(void)
{
return make_backoff_counter(COLD_EXIT_INITIAL_VALUE,
COLD_EXIT_INITIAL_BACKOFF);
}
/* Unreachable backoff counter. */
static inline _Py_BackoffCounter
initial_unreachable_backoff_counter(void)
{
return forge_backoff_counter(UNREACHABLE_BACKOFF);
}
#ifdef __cplusplus
}
#endif
#endif /* !Py_INTERNAL_BACKOFF_H */

View File

@ -31,7 +31,7 @@ extern "C" {
#define CACHE_ENTRIES(cache) (sizeof(cache)/sizeof(_Py_CODEUNIT))
typedef struct {
uint16_t counter;
_Py_BackoffCounter counter;
uint16_t module_keys_version;
uint16_t builtin_keys_version;
uint16_t index;
@ -40,44 +40,44 @@ typedef struct {
#define INLINE_CACHE_ENTRIES_LOAD_GLOBAL CACHE_ENTRIES(_PyLoadGlobalCache)
typedef struct {
uint16_t counter;
_Py_BackoffCounter counter;
} _PyBinaryOpCache;
#define INLINE_CACHE_ENTRIES_BINARY_OP CACHE_ENTRIES(_PyBinaryOpCache)
typedef struct {
uint16_t counter;
_Py_BackoffCounter counter;
} _PyUnpackSequenceCache;
#define INLINE_CACHE_ENTRIES_UNPACK_SEQUENCE \
CACHE_ENTRIES(_PyUnpackSequenceCache)
typedef struct {
uint16_t counter;
_Py_BackoffCounter counter;
} _PyCompareOpCache;
#define INLINE_CACHE_ENTRIES_COMPARE_OP CACHE_ENTRIES(_PyCompareOpCache)
typedef struct {
uint16_t counter;
_Py_BackoffCounter counter;
} _PyBinarySubscrCache;
#define INLINE_CACHE_ENTRIES_BINARY_SUBSCR CACHE_ENTRIES(_PyBinarySubscrCache)
typedef struct {
uint16_t counter;
_Py_BackoffCounter counter;
} _PySuperAttrCache;
#define INLINE_CACHE_ENTRIES_LOAD_SUPER_ATTR CACHE_ENTRIES(_PySuperAttrCache)
typedef struct {
uint16_t counter;
_Py_BackoffCounter counter;
uint16_t version[2];
uint16_t index;
} _PyAttrCache;
typedef struct {
uint16_t counter;
_Py_BackoffCounter counter;
uint16_t type_version[2];
union {
uint16_t keys_version[2];
@ -93,39 +93,39 @@ typedef struct {
#define INLINE_CACHE_ENTRIES_STORE_ATTR CACHE_ENTRIES(_PyAttrCache)
typedef struct {
uint16_t counter;
_Py_BackoffCounter counter;
uint16_t func_version[2];
} _PyCallCache;
#define INLINE_CACHE_ENTRIES_CALL CACHE_ENTRIES(_PyCallCache)
typedef struct {
uint16_t counter;
_Py_BackoffCounter counter;
} _PyStoreSubscrCache;
#define INLINE_CACHE_ENTRIES_STORE_SUBSCR CACHE_ENTRIES(_PyStoreSubscrCache)
typedef struct {
uint16_t counter;
_Py_BackoffCounter counter;
} _PyForIterCache;
#define INLINE_CACHE_ENTRIES_FOR_ITER CACHE_ENTRIES(_PyForIterCache)
typedef struct {
uint16_t counter;
_Py_BackoffCounter counter;
} _PySendCache;
#define INLINE_CACHE_ENTRIES_SEND CACHE_ENTRIES(_PySendCache)
typedef struct {
uint16_t counter;
_Py_BackoffCounter counter;
uint16_t version[2];
} _PyToBoolCache;
#define INLINE_CACHE_ENTRIES_TO_BOOL CACHE_ENTRIES(_PyToBoolCache)
typedef struct {
uint16_t counter;
_Py_BackoffCounter counter;
} _PyContainsOpCache;
#define INLINE_CACHE_ENTRIES_CONTAINS_OP CACHE_ENTRIES(_PyContainsOpCache)
@ -451,18 +451,14 @@ write_location_entry_start(uint8_t *ptr, int code, int length)
/** Counters
* The first 16-bit value in each inline cache is a counter.
* When counting misses, the counter is treated as a simple unsigned value.
*
* When counting executions until the next specialization attempt,
* exponential backoff is used to reduce the number of specialization failures.
* The high 12 bits store the counter, the low 4 bits store the backoff exponent.
* On a specialization failure, the backoff exponent is incremented and the
* counter set to (2**backoff - 1).
* Backoff == 6 -> starting counter == 63, backoff == 10 -> starting counter == 1023.
* See pycore_backoff.h for more details.
* On a specialization failure, the backoff counter is restarted.
*/
/* With a 16-bit counter, we have 12 bits for the counter value, and 4 bits for the backoff */
#define ADAPTIVE_BACKOFF_BITS 4
#include "pycore_backoff.h"
// A value of 1 means that we attempt to specialize the *second* time each
// instruction is executed. Executing twice is a much better indicator of
@ -480,36 +476,30 @@ write_location_entry_start(uint8_t *ptr, int code, int length)
#define ADAPTIVE_COOLDOWN_VALUE 52
#define ADAPTIVE_COOLDOWN_BACKOFF 0
#define MAX_BACKOFF_VALUE (16 - ADAPTIVE_BACKOFF_BITS)
// Can't assert this in pycore_backoff.h because of header order dependencies
static_assert(COLD_EXIT_INITIAL_VALUE > ADAPTIVE_COOLDOWN_VALUE,
"Cold exit value should be larger than adaptive cooldown value");
static inline uint16_t
static inline _Py_BackoffCounter
adaptive_counter_bits(uint16_t value, uint16_t backoff) {
return ((value << ADAPTIVE_BACKOFF_BITS)
| (backoff & ((1 << ADAPTIVE_BACKOFF_BITS) - 1)));
return make_backoff_counter(value, backoff);
}
static inline uint16_t
static inline _Py_BackoffCounter
adaptive_counter_warmup(void) {
return adaptive_counter_bits(ADAPTIVE_WARMUP_VALUE,
ADAPTIVE_WARMUP_BACKOFF);
}
static inline uint16_t
static inline _Py_BackoffCounter
adaptive_counter_cooldown(void) {
return adaptive_counter_bits(ADAPTIVE_COOLDOWN_VALUE,
ADAPTIVE_COOLDOWN_BACKOFF);
}
static inline uint16_t
adaptive_counter_backoff(uint16_t counter) {
uint16_t backoff = counter & ((1 << ADAPTIVE_BACKOFF_BITS) - 1);
backoff++;
if (backoff > MAX_BACKOFF_VALUE) {
backoff = MAX_BACKOFF_VALUE;
}
uint16_t value = (uint16_t)(1 << backoff) - 1;
return adaptive_counter_bits(value, backoff);
static inline _Py_BackoffCounter
adaptive_counter_backoff(_Py_BackoffCounter counter) {
return restart_backoff_counter(counter);
}

View File

@ -239,12 +239,6 @@ struct _is {
_PyOptimizerObject *optimizer;
_PyExecutorObject *executor_list_head;
/* These two values are shifted and offset to speed up check in JUMP_BACKWARD */
uint32_t optimizer_resume_threshold;
uint32_t optimizer_backedge_threshold;
uint16_t optimizer_side_threshold;
_rare_events rare_events;
PyDict_WatchCallback builtins_dict_watcher;

View File

@ -10,6 +10,7 @@ import _testinternalcapi
from test.support import script_helper, requires_specialization
from _testinternalcapi import TIER2_THRESHOLD
@contextlib.contextmanager
def temporary_optimizer(opt):
@ -69,7 +70,8 @@ class TestOptimizerAPI(unittest.TestCase):
self.assertEqual(opt.get_count(), 0)
with clear_executors(loop):
loop()
self.assertEqual(opt.get_count(), 1000)
# Subtract because optimizer doesn't kick in sooner
self.assertEqual(opt.get_count(), 1000 - TIER2_THRESHOLD)
def test_long_loop(self):
"Check that we aren't confused by EXTENDED_ARG"
@ -81,7 +83,7 @@ class TestOptimizerAPI(unittest.TestCase):
pass
def long_loop():
for _ in range(10):
for _ in range(20):
nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop();
nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop();
nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop();
@ -96,7 +98,7 @@ class TestOptimizerAPI(unittest.TestCase):
with temporary_optimizer(opt):
self.assertEqual(opt.get_count(), 0)
long_loop()
self.assertEqual(opt.get_count(), 10)
self.assertEqual(opt.get_count(), 20 - TIER2_THRESHOLD) # Need iterations to warm up
def test_code_restore_for_ENTER_EXECUTOR(self):
def testfunc(x):
@ -932,10 +934,10 @@ class TestUopsOptimization(unittest.TestCase):
exec(src, ns, ns)
testfunc = ns['testfunc']
ns['_test_global'] = 0
_, ex = self._run_with_optimizer(testfunc, 16)
_, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
self.assertIsNone(ex)
ns['_test_global'] = 1
_, ex = self._run_with_optimizer(testfunc, 16)
_, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
self.assertNotIn("_GUARD_BOTH_INT", uops)
@ -946,10 +948,10 @@ class TestUopsOptimization(unittest.TestCase):
exec(src, ns, ns)
testfunc = ns['testfunc']
ns['_test_global'] = 0
_, ex = self._run_with_optimizer(testfunc, 16)
_, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
self.assertIsNone(ex)
ns['_test_global'] = 3.14
_, ex = self._run_with_optimizer(testfunc, 16)
_, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
self.assertIsNone(ex)
def test_combine_stack_space_checks_sequential(self):

View File

@ -1124,6 +1124,7 @@ PYTHON_HEADERS= \
$(srcdir)/Include/internal/pycore_ast.h \
$(srcdir)/Include/internal/pycore_ast_state.h \
$(srcdir)/Include/internal/pycore_atexit.h \
$(srcdir)/Include/internal/pycore_backoff.h \
$(srcdir)/Include/internal/pycore_bitutils.h \
$(srcdir)/Include/internal/pycore_blocks_output_buffer.h \
$(srcdir)/Include/internal/pycore_brc.h \

View File

@ -0,0 +1,11 @@
Introduce a unified 16-bit backoff counter type (``_Py_BackoffCounter``),
shared between the Tier 1 adaptive specializer and the Tier 2 optimizer. The
API used for adaptive specialization counters is changed but the behavior is
(supposed to be) identical.
The behavior of the Tier 2 counters is changed:
* There are no longer dynamic thresholds (we never varied these).
* All counters now use the same exponential backoff.
* The counter for ``JUMP_BACKWARD`` starts counting down from 16.
* The ``temperature`` in side exits starts counting down from 64.

View File

@ -10,6 +10,7 @@
#undef NDEBUG
#include "Python.h"
#include "pycore_backoff.h" // JUMP_BACKWARD_INITIAL_VALUE
#include "pycore_bitutils.h" // _Py_bswap32()
#include "pycore_bytesobject.h" // _PyBytes_Find()
#include "pycore_ceval.h" // _PyEval_AddPendingCall()
@ -1819,6 +1820,11 @@ module_exec(PyObject *module)
return 1;
}
if (PyModule_Add(module, "TIER2_THRESHOLD",
PyLong_FromLong(JUMP_BACKWARD_INITIAL_VALUE)) < 0) {
return 1;
}
return 0;
}

View File

@ -204,6 +204,7 @@
<ClInclude Include="..\Include\internal\pycore_ast.h" />
<ClInclude Include="..\Include\internal\pycore_ast_state.h" />
<ClInclude Include="..\Include\internal\pycore_atexit.h" />
<ClInclude Include="..\Include\internal\pycore_backoff.h" />
<ClInclude Include="..\Include\internal\pycore_bitutils.h" />
<ClInclude Include="..\Include\internal\pycore_brc.h" />
<ClInclude Include="..\Include\internal\pycore_bytes_methods.h" />

View File

@ -8,6 +8,7 @@
#include "Python.h"
#include "pycore_abstract.h" // _PyIndex_Check()
#include "pycore_backoff.h"
#include "pycore_cell.h" // PyCell_GetRef()
#include "pycore_code.h"
#include "pycore_emscripten_signal.h" // _Py_CHECK_EMSCRIPTEN_SIGNALS
@ -326,13 +327,13 @@ dummy_func(
specializing op(_SPECIALIZE_TO_BOOL, (counter/1, value -- value)) {
#if ENABLE_SPECIALIZATION
if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_ToBool(value, next_instr);
DISPATCH_SAME_OPARG();
}
STAT_INC(TO_BOOL, deferred);
DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
@ -551,13 +552,13 @@ dummy_func(
specializing op(_SPECIALIZE_BINARY_SUBSCR, (counter/1, container, sub -- container, sub)) {
#if ENABLE_SPECIALIZATION
if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_BinarySubscr(container, sub, next_instr);
DISPATCH_SAME_OPARG();
}
STAT_INC(BINARY_SUBSCR, deferred);
DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
@ -698,13 +699,13 @@ dummy_func(
specializing op(_SPECIALIZE_STORE_SUBSCR, (counter/1, container, sub -- container, sub)) {
#if ENABLE_SPECIALIZATION
if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_StoreSubscr(container, sub, next_instr);
DISPATCH_SAME_OPARG();
}
STAT_INC(STORE_SUBSCR, deferred);
DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
@ -982,13 +983,13 @@ dummy_func(
specializing op(_SPECIALIZE_SEND, (counter/1, receiver, unused -- receiver, unused)) {
#if ENABLE_SPECIALIZATION
if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_Send(receiver, next_instr);
DISPATCH_SAME_OPARG();
}
STAT_INC(SEND, deferred);
DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
@ -1211,13 +1212,13 @@ dummy_func(
specializing op(_SPECIALIZE_UNPACK_SEQUENCE, (counter/1, seq -- seq)) {
#if ENABLE_SPECIALIZATION
if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_UnpackSequence(seq, next_instr, oparg);
DISPATCH_SAME_OPARG();
}
STAT_INC(UNPACK_SEQUENCE, deferred);
DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
(void)seq;
(void)counter;
@ -1280,14 +1281,14 @@ dummy_func(
specializing op(_SPECIALIZE_STORE_ATTR, (counter/1, owner -- owner)) {
#if ENABLE_SPECIALIZATION
if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
PyObject *name = GETITEM(FRAME_CO_NAMES, oparg);
next_instr = this_instr;
_Py_Specialize_StoreAttr(owner, next_instr, name);
DISPATCH_SAME_OPARG();
}
STAT_INC(STORE_ATTR, deferred);
DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
@ -1398,14 +1399,14 @@ dummy_func(
specializing op(_SPECIALIZE_LOAD_GLOBAL, (counter/1 -- )) {
#if ENABLE_SPECIALIZATION
if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
PyObject *name = GETITEM(FRAME_CO_NAMES, oparg>>1);
next_instr = this_instr;
_Py_Specialize_LoadGlobal(GLOBALS(), BUILTINS(), next_instr, name);
DISPATCH_SAME_OPARG();
}
STAT_INC(LOAD_GLOBAL, deferred);
DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
@ -1711,7 +1712,7 @@ dummy_func(
inst(INSTRUMENTED_LOAD_SUPER_ATTR, (unused/1, unused, unused, unused -- unused, unused if (oparg & 1))) {
// cancel out the decrement that will happen in LOAD_SUPER_ATTR; we
// don't want to specialize instrumented instructions
INCREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
PAUSE_ADAPTIVE_COUNTER(this_instr[1].counter);
GO_TO_INSTRUCTION(LOAD_SUPER_ATTR);
}
@ -1723,13 +1724,13 @@ dummy_func(
specializing op(_SPECIALIZE_LOAD_SUPER_ATTR, (counter/1, global_super, class, unused -- global_super, class, unused)) {
#if ENABLE_SPECIALIZATION
int load_method = oparg & 1;
if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_LoadSuperAttr(global_super, class, next_instr, load_method);
DISPATCH_SAME_OPARG();
}
STAT_INC(LOAD_SUPER_ATTR, deferred);
DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
@ -1836,14 +1837,14 @@ dummy_func(
specializing op(_SPECIALIZE_LOAD_ATTR, (counter/1, owner -- owner)) {
#if ENABLE_SPECIALIZATION
if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
PyObject *name = GETITEM(FRAME_CO_NAMES, oparg>>1);
next_instr = this_instr;
_Py_Specialize_LoadAttr(owner, next_instr, name);
DISPATCH_SAME_OPARG();
}
STAT_INC(LOAD_ATTR, deferred);
DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
@ -2157,13 +2158,13 @@ dummy_func(
specializing op(_SPECIALIZE_COMPARE_OP, (counter/1, left, right -- left, right)) {
#if ENABLE_SPECIALIZATION
if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_CompareOp(left, right, next_instr, oparg);
DISPATCH_SAME_OPARG();
}
STAT_INC(COMPARE_OP, deferred);
DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
@ -2254,13 +2255,13 @@ dummy_func(
specializing op(_SPECIALIZE_CONTAINS_OP, (counter/1, left, right -- left, right)) {
#if ENABLE_SPECIALIZATION
if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_ContainsOp(right, next_instr);
DISPATCH_SAME_OPARG();
}
STAT_INC(CONTAINS_OP, deferred);
DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
@ -2340,16 +2341,8 @@ dummy_func(
assert(oparg <= INSTR_OFFSET());
JUMPBY(-oparg);
#if ENABLE_SPECIALIZATION
uint16_t counter = this_instr[1].cache;
this_instr[1].cache = counter + (1 << OPTIMIZER_BITS_IN_COUNTER);
/* We are using unsigned values, but we really want signed values, so
* do the 2s complement adjustment manually */
uint32_t offset_counter = counter ^ (1 << 15);
uint32_t threshold = tstate->interp->optimizer_backedge_threshold;
assert((threshold & OPTIMIZER_BITS_MASK) == 0);
// Use '>=' not '>' so that the optimizer/backoff bits do not effect the result.
// Double-check that the opcode isn't instrumented or something:
if (offset_counter >= threshold && this_instr->op.code == JUMP_BACKWARD) {
_Py_BackoffCounter counter = this_instr[1].counter;
if (backoff_counter_triggers(counter) && this_instr->op.code == JUMP_BACKWARD) {
_Py_CODEUNIT *start = this_instr;
/* Back up over EXTENDED_ARGs so optimizer sees the whole instruction */
while (oparg > 255) {
@ -2365,17 +2358,12 @@ dummy_func(
GOTO_TIER_TWO(executor);
}
else {
int backoff = this_instr[1].cache & OPTIMIZER_BITS_MASK;
backoff++;
if (backoff < MIN_TIER2_BACKOFF) {
backoff = MIN_TIER2_BACKOFF;
}
else if (backoff > MAX_TIER2_BACKOFF) {
backoff = MAX_TIER2_BACKOFF;
}
this_instr[1].cache = ((UINT16_MAX << OPTIMIZER_BITS_IN_COUNTER) << backoff) | backoff;
this_instr[1].counter = restart_backoff_counter(counter);
}
}
else {
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
}
#endif /* ENABLE_SPECIALIZATION */
}
@ -2535,13 +2523,13 @@ dummy_func(
specializing op(_SPECIALIZE_FOR_ITER, (counter/1, iter -- iter)) {
#if ENABLE_SPECIALIZATION
if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_ForIter(iter, next_instr, oparg);
DISPATCH_SAME_OPARG();
}
STAT_INC(FOR_ITER, deferred);
DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
@ -3001,7 +2989,7 @@ dummy_func(
tstate, PY_MONITORING_EVENT_CALL,
frame, this_instr, function, arg);
ERROR_IF(err, error);
INCREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
PAUSE_ADAPTIVE_COUNTER(this_instr[1].counter);
GO_TO_INSTRUCTION(CALL);
}
@ -3030,13 +3018,13 @@ dummy_func(
specializing op(_SPECIALIZE_CALL, (counter/1, callable, self_or_null, args[oparg] -- callable, self_or_null, args[oparg])) {
#if ENABLE_SPECIALIZATION
if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_Call(callable, next_instr, oparg + (self_or_null != NULL));
DISPATCH_SAME_OPARG();
}
STAT_INC(CALL, deferred);
DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
@ -3933,13 +3921,13 @@ dummy_func(
specializing op(_SPECIALIZE_BINARY_OP, (counter/1, lhs, rhs -- lhs, rhs)) {
#if ENABLE_SPECIALIZATION
if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_BinaryOp(lhs, rhs, next_instr, oparg, LOCALS_ARRAY);
DISPATCH_SAME_OPARG();
}
STAT_INC(BINARY_OP, deferred);
DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
assert(NB_ADD <= oparg);
assert(oparg <= NB_INPLACE_XOR);
@ -3965,7 +3953,7 @@ dummy_func(
ERROR_IF(next_opcode < 0, error);
next_instr = this_instr;
if (_PyOpcode_Caches[next_opcode]) {
INCREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
PAUSE_ADAPTIVE_COUNTER(next_instr[1].counter);
}
assert(next_opcode > 0 && next_opcode < 256);
opcode = next_opcode;
@ -4157,21 +4145,22 @@ dummy_func(
tier2 op(_COLD_EXIT, (--)) {
_PyExecutorObject *previous = (_PyExecutorObject *)tstate->previous_executor;
_PyExitData *exit = &previous->exits[oparg];
exit->temperature++;
PyCodeObject *code = _PyFrame_GetCode(frame);
_Py_CODEUNIT *target = _PyCode_CODE(code) + exit->target;
if (exit->temperature < (int32_t)tstate->interp->optimizer_side_threshold) {
_Py_BackoffCounter temperature = exit->temperature;
if (!backoff_counter_triggers(temperature)) {
exit->temperature = advance_backoff_counter(temperature);
GOTO_TIER_ONE(target);
}
_PyExecutorObject *executor;
if (target->op.code == ENTER_EXECUTOR) {
executor = code->co_executors->executors[target->op.arg];
Py_INCREF(executor);
} else {
}
else {
int optimized = _PyOptimizer_Optimize(frame, target, stack_pointer, &executor);
if (optimized <= 0) {
int32_t new_temp = -1 * tstate->interp->optimizer_side_threshold;
exit->temperature = (new_temp < INT16_MIN) ? INT16_MIN : new_temp;
exit->temperature = restart_backoff_counter(temperature);
if (optimized < 0) {
Py_DECREF(previous);
tstate->previous_executor = Py_None;
@ -4181,7 +4170,7 @@ dummy_func(
}
}
/* We need two references. One to store in exit->executor and
* one to keep the executor alive when executing. */
* one to keep the executor alive when executing. */
Py_INCREF(executor);
exit->executor = executor;
GOTO_TIER_TWO(executor);

View File

@ -4,6 +4,7 @@
#include "Python.h"
#include "pycore_abstract.h" // _PyIndex_Check()
#include "pycore_backoff.h"
#include "pycore_call.h" // _PyObject_CallNoArgs()
#include "pycore_cell.h" // PyCell_GetRef()
#include "pycore_ceval.h"
@ -822,7 +823,7 @@ resume_frame:
_PyBinaryOpCache *cache = (_PyBinaryOpCache *)(next_instr+1);
/* Prevent the underlying instruction from specializing
* and overwriting the instrumentation. */
INCREMENT_ADAPTIVE_COUNTER(cache->counter);
PAUSE_ADAPTIVE_COUNTER(cache->counter);
}
opcode = original_opcode;
DISPATCH_GOTO();
@ -1099,7 +1100,7 @@ exit_to_trace:
printf("SIDE EXIT: [UOp ");
_PyUOpPrint(&next_uop[-1]);
printf(", exit %u, temp %d, target %d -> %s]\n",
exit_index, exit->temperature, exit->target,
exit_index, exit->temperature.as_counter, exit->target,
_PyOpcode_OpName[_PyCode_CODE(_PyFrame_GetCode(frame))[exit->target].op.code]);
}
#endif

View File

@ -262,7 +262,7 @@ GETITEM(PyObject *v, Py_ssize_t i) {
STAT_INC(opcode, miss); \
STAT_INC((INSTNAME), miss); \
/* The counter is always the first cache entry: */ \
if (ADAPTIVE_COUNTER_IS_ZERO(next_instr->cache)) { \
if (ADAPTIVE_COUNTER_TRIGGERS(next_instr->cache)) { \
STAT_INC((INSTNAME), deopt); \
} \
} while (0)
@ -290,29 +290,28 @@ GETITEM(PyObject *v, Py_ssize_t i) {
dtrace_function_entry(frame); \
}
#define ADAPTIVE_COUNTER_IS_ZERO(COUNTER) \
(((COUNTER) >> ADAPTIVE_BACKOFF_BITS) == 0)
#define ADAPTIVE_COUNTER_IS_MAX(COUNTER) \
(((COUNTER) >> ADAPTIVE_BACKOFF_BITS) == ((1 << MAX_BACKOFF_VALUE) - 1))
/* This takes a uint16_t instead of a _Py_BackoffCounter,
* because it is used directly on the cache entry in generated code,
* which is always an integral type. */
#define ADAPTIVE_COUNTER_TRIGGERS(COUNTER) \
backoff_counter_triggers(forge_backoff_counter((COUNTER)))
#ifdef Py_GIL_DISABLED
#define DECREMENT_ADAPTIVE_COUNTER(COUNTER) \
do { \
/* gh-115999 tracks progress on addressing this. */ \
#define ADVANCE_ADAPTIVE_COUNTER(COUNTER) \
do { \
/* gh-115999 tracks progress on addressing this. */ \
static_assert(0, "The specializing interpreter is not yet thread-safe"); \
} while (0);
#else
#define DECREMENT_ADAPTIVE_COUNTER(COUNTER) \
do { \
assert(!ADAPTIVE_COUNTER_IS_ZERO((COUNTER))); \
(COUNTER) -= (1 << ADAPTIVE_BACKOFF_BITS); \
#define ADVANCE_ADAPTIVE_COUNTER(COUNTER) \
do { \
(COUNTER) = advance_backoff_counter((COUNTER)); \
} while (0);
#endif
#define INCREMENT_ADAPTIVE_COUNTER(COUNTER) \
do { \
(COUNTER) += (1 << ADAPTIVE_BACKOFF_BITS); \
#define PAUSE_ADAPTIVE_COUNTER(COUNTER) \
do { \
(COUNTER) = pause_backoff_counter((COUNTER)); \
} while (0);
#define UNBOUNDLOCAL_ERROR_MSG \

View File

@ -3694,21 +3694,22 @@
oparg = CURRENT_OPARG();
_PyExecutorObject *previous = (_PyExecutorObject *)tstate->previous_executor;
_PyExitData *exit = &previous->exits[oparg];
exit->temperature++;
PyCodeObject *code = _PyFrame_GetCode(frame);
_Py_CODEUNIT *target = _PyCode_CODE(code) + exit->target;
if (exit->temperature < (int32_t)tstate->interp->optimizer_side_threshold) {
_Py_BackoffCounter temperature = exit->temperature;
if (!backoff_counter_triggers(temperature)) {
exit->temperature = advance_backoff_counter(temperature);
GOTO_TIER_ONE(target);
}
_PyExecutorObject *executor;
if (target->op.code == ENTER_EXECUTOR) {
executor = code->co_executors->executors[target->op.arg];
Py_INCREF(executor);
} else {
}
else {
int optimized = _PyOptimizer_Optimize(frame, target, stack_pointer, &executor);
if (optimized <= 0) {
int32_t new_temp = -1 * tstate->interp->optimizer_side_threshold;
exit->temperature = (new_temp < INT16_MIN) ? INT16_MIN : new_temp;
exit->temperature = restart_backoff_counter(temperature);
if (optimized < 0) {
Py_DECREF(previous);
tstate->previous_executor = Py_None;

View File

@ -115,13 +115,13 @@
uint16_t counter = read_u16(&this_instr[1].cache);
(void)counter;
#if ENABLE_SPECIALIZATION
if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_BinaryOp(lhs, rhs, next_instr, oparg, LOCALS_ARRAY);
DISPATCH_SAME_OPARG();
}
STAT_INC(BINARY_OP, deferred);
DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
assert(NB_ADD <= oparg);
assert(oparg <= NB_INPLACE_XOR);
@ -432,13 +432,13 @@
uint16_t counter = read_u16(&this_instr[1].cache);
(void)counter;
#if ENABLE_SPECIALIZATION
if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_BinarySubscr(container, sub, next_instr);
DISPATCH_SAME_OPARG();
}
STAT_INC(BINARY_SUBSCR, deferred);
DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
// _BINARY_SUBSCR
@ -760,13 +760,13 @@
uint16_t counter = read_u16(&this_instr[1].cache);
(void)counter;
#if ENABLE_SPECIALIZATION
if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_Call(callable, next_instr, oparg + (self_or_null != NULL));
DISPATCH_SAME_OPARG();
}
STAT_INC(CALL, deferred);
DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
/* Skip 2 cache entries */
@ -2036,13 +2036,13 @@
uint16_t counter = read_u16(&this_instr[1].cache);
(void)counter;
#if ENABLE_SPECIALIZATION
if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_CompareOp(left, right, next_instr, oparg);
DISPATCH_SAME_OPARG();
}
STAT_INC(COMPARE_OP, deferred);
DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
// _COMPARE_OP
@ -2185,13 +2185,13 @@
uint16_t counter = read_u16(&this_instr[1].cache);
(void)counter;
#if ENABLE_SPECIALIZATION
if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_ContainsOp(right, next_instr);
DISPATCH_SAME_OPARG();
}
STAT_INC(CONTAINS_OP, deferred);
DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
// _CONTAINS_OP
@ -2596,13 +2596,13 @@
uint16_t counter = read_u16(&this_instr[1].cache);
(void)counter;
#if ENABLE_SPECIALIZATION
if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_ForIter(iter, next_instr, oparg);
DISPATCH_SAME_OPARG();
}
STAT_INC(FOR_ITER, deferred);
DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
// _FOR_ITER
@ -3026,7 +3026,7 @@
tstate, PY_MONITORING_EVENT_CALL,
frame, this_instr, function, arg);
if (err) goto error;
INCREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
PAUSE_ADAPTIVE_COUNTER(this_instr[1].counter);
GO_TO_INSTRUCTION(CALL);
}
@ -3142,7 +3142,7 @@
if (next_opcode < 0) goto error;
next_instr = this_instr;
if (_PyOpcode_Caches[next_opcode]) {
INCREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
PAUSE_ADAPTIVE_COUNTER(next_instr[1].counter);
}
assert(next_opcode > 0 && next_opcode < 256);
opcode = next_opcode;
@ -3177,7 +3177,7 @@
/* Skip 1 cache entry */
// cancel out the decrement that will happen in LOAD_SUPER_ATTR; we
// don't want to specialize instrumented instructions
INCREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
PAUSE_ADAPTIVE_COUNTER(this_instr[1].counter);
GO_TO_INSTRUCTION(LOAD_SUPER_ATTR);
}
@ -3415,16 +3415,8 @@
assert(oparg <= INSTR_OFFSET());
JUMPBY(-oparg);
#if ENABLE_SPECIALIZATION
uint16_t counter = this_instr[1].cache;
this_instr[1].cache = counter + (1 << OPTIMIZER_BITS_IN_COUNTER);
/* We are using unsigned values, but we really want signed values, so
* do the 2s complement adjustment manually */
uint32_t offset_counter = counter ^ (1 << 15);
uint32_t threshold = tstate->interp->optimizer_backedge_threshold;
assert((threshold & OPTIMIZER_BITS_MASK) == 0);
// Use '>=' not '>' so that the optimizer/backoff bits do not effect the result.
// Double-check that the opcode isn't instrumented or something:
if (offset_counter >= threshold && this_instr->op.code == JUMP_BACKWARD) {
_Py_BackoffCounter counter = this_instr[1].counter;
if (backoff_counter_triggers(counter) && this_instr->op.code == JUMP_BACKWARD) {
_Py_CODEUNIT *start = this_instr;
/* Back up over EXTENDED_ARGs so optimizer sees the whole instruction */
while (oparg > 255) {
@ -3440,17 +3432,12 @@
GOTO_TIER_TWO(executor);
}
else {
int backoff = this_instr[1].cache & OPTIMIZER_BITS_MASK;
backoff++;
if (backoff < MIN_TIER2_BACKOFF) {
backoff = MIN_TIER2_BACKOFF;
}
else if (backoff > MAX_TIER2_BACKOFF) {
backoff = MAX_TIER2_BACKOFF;
}
this_instr[1].cache = ((UINT16_MAX << OPTIMIZER_BITS_IN_COUNTER) << backoff) | backoff;
this_instr[1].counter = restart_backoff_counter(counter);
}
}
else {
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
}
#endif /* ENABLE_SPECIALIZATION */
DISPATCH();
}
@ -3543,14 +3530,14 @@
uint16_t counter = read_u16(&this_instr[1].cache);
(void)counter;
#if ENABLE_SPECIALIZATION
if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
PyObject *name = GETITEM(FRAME_CO_NAMES, oparg>>1);
next_instr = this_instr;
_Py_Specialize_LoadAttr(owner, next_instr, name);
DISPATCH_SAME_OPARG();
}
STAT_INC(LOAD_ATTR, deferred);
DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
/* Skip 8 cache entries */
@ -4238,14 +4225,14 @@
uint16_t counter = read_u16(&this_instr[1].cache);
(void)counter;
#if ENABLE_SPECIALIZATION
if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
PyObject *name = GETITEM(FRAME_CO_NAMES, oparg>>1);
next_instr = this_instr;
_Py_Specialize_LoadGlobal(GLOBALS(), BUILTINS(), next_instr, name);
DISPATCH_SAME_OPARG();
}
STAT_INC(LOAD_GLOBAL, deferred);
DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
/* Skip 1 cache entry */
@ -4442,13 +4429,13 @@
(void)counter;
#if ENABLE_SPECIALIZATION
int load_method = oparg & 1;
if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_LoadSuperAttr(global_super, class, next_instr, load_method);
DISPATCH_SAME_OPARG();
}
STAT_INC(LOAD_SUPER_ATTR, deferred);
DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
// _LOAD_SUPER_ATTR
@ -5083,13 +5070,13 @@
uint16_t counter = read_u16(&this_instr[1].cache);
(void)counter;
#if ENABLE_SPECIALIZATION
if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_Send(receiver, next_instr);
DISPATCH_SAME_OPARG();
}
STAT_INC(SEND, deferred);
DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
// _SEND
@ -5271,14 +5258,14 @@
uint16_t counter = read_u16(&this_instr[1].cache);
(void)counter;
#if ENABLE_SPECIALIZATION
if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
PyObject *name = GETITEM(FRAME_CO_NAMES, oparg);
next_instr = this_instr;
_Py_Specialize_StoreAttr(owner, next_instr, name);
DISPATCH_SAME_OPARG();
}
STAT_INC(STORE_ATTR, deferred);
DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
/* Skip 3 cache entries */
@ -5562,13 +5549,13 @@
uint16_t counter = read_u16(&this_instr[1].cache);
(void)counter;
#if ENABLE_SPECIALIZATION
if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_StoreSubscr(container, sub, next_instr);
DISPATCH_SAME_OPARG();
}
STAT_INC(STORE_SUBSCR, deferred);
DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
// _STORE_SUBSCR
@ -5665,13 +5652,13 @@
uint16_t counter = read_u16(&this_instr[1].cache);
(void)counter;
#if ENABLE_SPECIALIZATION
if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_ToBool(value, next_instr);
DISPATCH_SAME_OPARG();
}
STAT_INC(TO_BOOL, deferred);
DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
/* Skip 2 cache entries */
@ -5882,13 +5869,13 @@
uint16_t counter = read_u16(&this_instr[1].cache);
(void)counter;
#if ENABLE_SPECIALIZATION
if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_UnpackSequence(seq, next_instr, oparg);
DISPATCH_SAME_OPARG();
}
STAT_INC(UNPACK_SEQUENCE, deferred);
DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
(void)seq;
(void)counter;

View File

@ -590,7 +590,7 @@ de_instrument(PyCodeObject *code, int i, int event)
CHECK(_PyOpcode_Deopt[deinstrumented] == deinstrumented);
*opcode_ptr = deinstrumented;
if (_PyOpcode_Caches[deinstrumented]) {
instr[1].cache = adaptive_counter_warmup();
instr[1].counter = adaptive_counter_warmup();
}
}
@ -611,7 +611,7 @@ de_instrument_line(PyCodeObject *code, int i)
CHECK(original_opcode == _PyOpcode_Deopt[original_opcode]);
instr->op.code = original_opcode;
if (_PyOpcode_Caches[original_opcode]) {
instr[1].cache = adaptive_counter_warmup();
instr[1].counter = adaptive_counter_warmup();
}
assert(instr->op.code != INSTRUMENTED_LINE);
}
@ -634,7 +634,7 @@ de_instrument_per_instruction(PyCodeObject *code, int i)
CHECK(original_opcode == _PyOpcode_Deopt[original_opcode]);
*opcode_ptr = original_opcode;
if (_PyOpcode_Caches[original_opcode]) {
instr[1].cache = adaptive_counter_warmup();
instr[1].counter = adaptive_counter_warmup();
}
assert(*opcode_ptr != INSTRUMENTED_INSTRUCTION);
assert(instr->op.code != INSTRUMENTED_INSTRUCTION);
@ -667,7 +667,7 @@ instrument(PyCodeObject *code, int i)
assert(instrumented);
*opcode_ptr = instrumented;
if (_PyOpcode_Caches[deopt]) {
instr[1].cache = adaptive_counter_warmup();
instr[1].counter = adaptive_counter_warmup();
}
}
}

View File

@ -1,6 +1,7 @@
#include "Python.h"
#include "opcode.h"
#include "pycore_interp.h"
#include "pycore_backoff.h"
#include "pycore_bitutils.h" // _Py_popcount32()
#include "pycore_object.h" // _PyObject_GC_UNTRACK()
#include "pycore_opcode_metadata.h" // _PyOpcode_OpName[]
@ -110,9 +111,7 @@ never_optimize(
_PyExecutorObject **exec,
int Py_UNUSED(stack_entries))
{
/* Although it should be benign for this to be called,
* it shouldn't happen, so fail in debug builds. */
assert(0 && "never optimize should never be called");
// This may be called if the optimizer is reset
return 0;
}
@ -127,25 +126,12 @@ PyTypeObject _PyDefaultOptimizer_Type = {
static _PyOptimizerObject _PyOptimizer_Default = {
PyObject_HEAD_INIT(&_PyDefaultOptimizer_Type)
.optimize = never_optimize,
.resume_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD,
.backedge_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD,
.side_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD,
};
static uint32_t
shift_and_offset_threshold(uint32_t threshold)
{
return (threshold << OPTIMIZER_BITS_IN_COUNTER) + (1 << 15);
}
_PyOptimizerObject *
PyUnstable_GetOptimizer(void)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
assert(interp->optimizer_backedge_threshold ==
shift_and_offset_threshold(interp->optimizer->backedge_threshold));
assert(interp->optimizer_resume_threshold ==
shift_and_offset_threshold(interp->optimizer->resume_threshold));
if (interp->optimizer == &_PyOptimizer_Default) {
return NULL;
}
@ -190,13 +176,6 @@ _Py_SetOptimizer(PyInterpreterState *interp, _PyOptimizerObject *optimizer)
}
Py_INCREF(optimizer);
interp->optimizer = optimizer;
interp->optimizer_backedge_threshold = shift_and_offset_threshold(optimizer->backedge_threshold);
interp->optimizer_resume_threshold = shift_and_offset_threshold(optimizer->resume_threshold);
interp->optimizer_side_threshold = optimizer->side_threshold;
if (optimizer == &_PyOptimizer_Default) {
assert(interp->optimizer_backedge_threshold > (1 << 16));
assert(interp->optimizer_resume_threshold > (1 << 16));
}
return old;
}
@ -1109,7 +1088,7 @@ make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFil
assert(exit_count < COLD_EXIT_COUNT);
for (int i = 0; i < exit_count; i++) {
executor->exits[i].executor = &COLD_EXITS[i];
executor->exits[i].temperature = 0;
executor->exits[i].temperature = initial_temperature_backoff_counter();
}
int next_exit = exit_count-1;
_PyUOpInstruction *dest = (_PyUOpInstruction *)&executor->trace[length];
@ -1291,11 +1270,6 @@ PyUnstable_Optimizer_NewUOpOptimizer(void)
return NULL;
}
opt->optimize = uop_optimize;
opt->resume_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD;
// Need a few iterations to settle specializations,
// and to ammortize the cost of optimization.
opt->side_threshold = 16;
opt->backedge_threshold = 16;
return (PyObject *)opt;
}
@ -1385,9 +1359,6 @@ PyUnstable_Optimizer_NewCounter(void)
return NULL;
}
opt->base.optimize = counter_optimize;
opt->base.resume_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD;
opt->base.side_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD;
opt->base.backedge_threshold = 0;
opt->count = 0;
return (PyObject *)opt;
}
@ -1554,7 +1525,7 @@ _Py_ExecutorClear(_PyExecutorObject *executor)
for (uint32_t i = 0; i < executor->exit_count; i++) {
Py_DECREF(executor->exits[i].executor);
executor->exits[i].executor = &COLD_EXITS[i];
executor->exits[i].temperature = INT16_MIN;
executor->exits[i].temperature = initial_unreachable_backoff_counter();
}
_Py_CODEUNIT *instruction = &_PyCode_CODE(code)[executor->vm_data.index];
assert(instruction->op.code == ENTER_EXECUTOR);

View File

@ -419,22 +419,20 @@ _PyCode_Quicken(PyCodeObject *code)
int caches = _PyOpcode_Caches[opcode];
if (caches) {
// The initial value depends on the opcode
int initial_value;
switch (opcode) {
case JUMP_BACKWARD:
initial_value = 0;
instructions[i + 1].counter = initial_jump_backoff_counter();
break;
case POP_JUMP_IF_FALSE:
case POP_JUMP_IF_TRUE:
case POP_JUMP_IF_NONE:
case POP_JUMP_IF_NOT_NONE:
initial_value = 0x5555; // Alternating 0, 1 bits
instructions[i + 1].cache = 0x5555; // Alternating 0, 1 bits
break;
default:
initial_value = adaptive_counter_warmup();
instructions[i + 1].counter = adaptive_counter_warmup();
break;
}
instructions[i + 1].cache = initial_value;
i += caches;
}
}

View File

@ -1,5 +1,6 @@
#include "Python.h"
#include "pycore_backoff.h"
#include "pycore_call.h"
#include "pycore_ceval.h"
#include "pycore_cell.h"