mirror of https://github.com/python/cpython
bpo-44338: Port LOAD_GLOBAL to PEP 659 adaptive interpreter (GH-26638)
* Add specializations of LOAD_GLOBAL. * Add more stats. * Remove old opcache; it is no longer used. * Add NEWS
This commit is contained in:
parent
fafcfff926
commit
eecbc7c390
|
@ -106,20 +106,6 @@ struct PyCodeObject {
|
|||
interpreter. */
|
||||
union _cache_or_instruction *co_quickened;
|
||||
|
||||
/* Per opcodes just-in-time cache
|
||||
*
|
||||
* To reduce cache size, we use indirect mapping from opcode index to
|
||||
* cache object:
|
||||
* cache = co_opcache[co_opcache_map[next_instr - first_instr] - 1]
|
||||
*/
|
||||
|
||||
// co_opcache_map is indexed by (next_instr - first_instr).
|
||||
// * 0 means there is no cache for this opcode.
|
||||
// * n > 0 means there is cache in co_opcache[n-1].
|
||||
unsigned char *co_opcache_map;
|
||||
_PyOpcache *co_opcache;
|
||||
int co_opcache_flag; // used to determine when create a cache.
|
||||
unsigned char co_opcache_size; // length of co_opcache.
|
||||
};
|
||||
|
||||
/* Masks for co_flags above */
|
||||
|
|
|
@ -48,6 +48,11 @@ typedef struct {
|
|||
uint32_t dk_version_or_hint;
|
||||
} _PyLoadAttrCache;
|
||||
|
||||
typedef struct {
|
||||
uint32_t module_keys_version;
|
||||
uint32_t builtin_keys_version;
|
||||
} _PyLoadGlobalCache;
|
||||
|
||||
/* Add specialized versions of entries to this union.
|
||||
*
|
||||
* Do not break the invariant: sizeof(SpecializedCacheEntry) == 8
|
||||
|
@ -62,6 +67,7 @@ typedef union {
|
|||
_PyEntryZero zero;
|
||||
_PyAdaptiveEntry adaptive;
|
||||
_PyLoadAttrCache load_attr;
|
||||
_PyLoadGlobalCache load_global;
|
||||
} SpecializedCacheEntry;
|
||||
|
||||
#define INSTRUCTIONS_PER_ENTRY (sizeof(SpecializedCacheEntry)/sizeof(_Py_CODEUNIT))
|
||||
|
@ -254,8 +260,6 @@ PyAPI_FUNC(PyCodeObject *) _PyCode_New(struct _PyCodeConstructor *);
|
|||
|
||||
/* Private API */
|
||||
|
||||
int _PyCode_InitOpcache(PyCodeObject *co);
|
||||
|
||||
/* Getters for internal PyCodeObject data. */
|
||||
PyAPI_FUNC(PyObject *) _PyCode_GetVarnames(PyCodeObject *);
|
||||
PyAPI_FUNC(PyObject *) _PyCode_GetCellvars(PyCodeObject *);
|
||||
|
@ -318,24 +322,25 @@ cache_backoff(_PyAdaptiveEntry *entry) {
|
|||
/* Specialization functions */
|
||||
|
||||
int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache);
|
||||
int _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache);
|
||||
|
||||
#define SPECIALIZATION_STATS 0
|
||||
#if SPECIALIZATION_STATS
|
||||
|
||||
typedef struct _specialization_stats {
|
||||
typedef struct _stats {
|
||||
uint64_t specialization_success;
|
||||
uint64_t specialization_failure;
|
||||
uint64_t loadattr_hit;
|
||||
uint64_t loadattr_deferred;
|
||||
uint64_t loadattr_miss;
|
||||
uint64_t loadattr_deopt;
|
||||
uint64_t hit;
|
||||
uint64_t deferred;
|
||||
uint64_t miss;
|
||||
uint64_t deopt;
|
||||
} SpecializationStats;
|
||||
|
||||
extern SpecializationStats _specialization_stats;
|
||||
#define STAT_INC(name) _specialization_stats.name++
|
||||
extern SpecializationStats _specialization_stats[256];
|
||||
#define STAT_INC(opname, name) _specialization_stats[opname].name++
|
||||
void _Py_PrintSpecializationStats(void);
|
||||
#else
|
||||
#define STAT_INC(name) ((void)0)
|
||||
#define STAT_INC(opname, name) ((void)0)
|
||||
#endif
|
||||
|
||||
|
||||
|
|
|
@ -142,6 +142,9 @@ extern "C" {
|
|||
#define LOAD_ATTR_WITH_HINT 14
|
||||
#define LOAD_ATTR_SLOT 18
|
||||
#define LOAD_ATTR_MODULE 21
|
||||
#define LOAD_GLOBAL_ADAPTIVE 36
|
||||
#define LOAD_GLOBAL_MODULE 38
|
||||
#define LOAD_GLOBAL_BUILTIN 39
|
||||
#ifdef NEED_OPCODE_JUMP_TABLES
|
||||
static uint32_t _PyOpcode_RelativeJump[8] = {
|
||||
0U,
|
||||
|
|
|
@ -226,4 +226,7 @@ _specialized_instructions = [
|
|||
"LOAD_ATTR_WITH_HINT",
|
||||
"LOAD_ATTR_SLOT",
|
||||
"LOAD_ATTR_MODULE",
|
||||
"LOAD_GLOBAL_ADAPTIVE",
|
||||
"LOAD_GLOBAL_MODULE",
|
||||
"LOAD_GLOBAL_BUILTIN",
|
||||
]
|
||||
|
|
|
@ -0,0 +1,7 @@
|
|||
Implement adaptive specialization for LOAD_GLOBAL
|
||||
|
||||
Two specialized forms of LOAD_GLOBAL are added:
|
||||
|
||||
* LOAD_GLOBAL_MODULE
|
||||
|
||||
* LOAD_GLOBAL_BUILTIN
|
|
@ -350,10 +350,7 @@ init_code(PyCodeObject *co, struct _PyCodeConstructor *con)
|
|||
/* not set */
|
||||
co->co_weakreflist = NULL;
|
||||
co->co_extra = NULL;
|
||||
co->co_opcache_map = NULL;
|
||||
co->co_opcache = NULL;
|
||||
co->co_opcache_flag = 0;
|
||||
co->co_opcache_size = 0;
|
||||
|
||||
co->co_warmup = QUICKENING_INITIAL_WARMUP_VALUE;
|
||||
co->co_quickened = NULL;
|
||||
}
|
||||
|
@ -912,55 +909,6 @@ new_linesiterator(PyCodeObject *code)
|
|||
return li;
|
||||
}
|
||||
|
||||
|
||||
/******************
|
||||
* the opcache
|
||||
******************/
|
||||
|
||||
int
|
||||
_PyCode_InitOpcache(PyCodeObject *co)
|
||||
{
|
||||
Py_ssize_t co_size = PyBytes_Size(co->co_code) / sizeof(_Py_CODEUNIT);
|
||||
co->co_opcache_map = (unsigned char *)PyMem_Calloc(co_size, 1);
|
||||
if (co->co_opcache_map == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
const _Py_CODEUNIT *opcodes = (const _Py_CODEUNIT*)PyBytes_AS_STRING(co->co_code);
|
||||
Py_ssize_t opts = 0;
|
||||
|
||||
for (Py_ssize_t i = 0; i < co_size;) {
|
||||
unsigned char opcode = _Py_OPCODE(opcodes[i]);
|
||||
i++; // 'i' is now aligned to (next_instr - first_instr)
|
||||
|
||||
// TODO: LOAD_METHOD
|
||||
if (opcode == LOAD_GLOBAL || opcode == LOAD_ATTR) {
|
||||
opts++;
|
||||
co->co_opcache_map[i] = (unsigned char)opts;
|
||||
if (opts > 254) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (opts) {
|
||||
co->co_opcache = (_PyOpcache *)PyMem_Calloc(opts, sizeof(_PyOpcache));
|
||||
if (co->co_opcache == NULL) {
|
||||
PyMem_Free(co->co_opcache_map);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
else {
|
||||
PyMem_Free(co->co_opcache_map);
|
||||
co->co_opcache_map = NULL;
|
||||
co->co_opcache = NULL;
|
||||
}
|
||||
|
||||
co->co_opcache_size = (unsigned char)opts;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/******************
|
||||
* "extra" frame eval info (see PEP 523)
|
||||
******************/
|
||||
|
@ -1207,15 +1155,6 @@ code_new_impl(PyTypeObject *type, int argcount, int posonlyargcount,
|
|||
static void
|
||||
code_dealloc(PyCodeObject *co)
|
||||
{
|
||||
if (co->co_opcache != NULL) {
|
||||
PyMem_Free(co->co_opcache);
|
||||
}
|
||||
if (co->co_opcache_map != NULL) {
|
||||
PyMem_Free(co->co_opcache_map);
|
||||
}
|
||||
co->co_opcache_flag = 0;
|
||||
co->co_opcache_size = 0;
|
||||
|
||||
if (co->co_extra != NULL) {
|
||||
PyInterpreterState *interp = _PyInterpreterState_GET();
|
||||
_PyCodeObjectExtra *co_extra = co->co_extra;
|
||||
|
@ -1442,12 +1381,11 @@ code_sizeof(PyCodeObject *co, PyObject *Py_UNUSED(args))
|
|||
res += co->co_ncellvars * sizeof(Py_ssize_t);
|
||||
}
|
||||
|
||||
if (co->co_opcache != NULL) {
|
||||
assert(co->co_opcache_map != NULL);
|
||||
// co_opcache_map
|
||||
res += PyBytes_GET_SIZE(co->co_code) / sizeof(_Py_CODEUNIT);
|
||||
// co_opcache
|
||||
res += co->co_opcache_size * sizeof(_PyOpcache);
|
||||
if (co->co_quickened != NULL) {
|
||||
Py_ssize_t count = co->co_quickened[0].entry.zero.cache_count;
|
||||
count += (PyBytes_GET_SIZE(co->co_code)+sizeof(SpecializedCacheEntry)-1)/
|
||||
sizeof(SpecializedCacheEntry);
|
||||
res += count * sizeof(SpecializedCacheEntry);
|
||||
}
|
||||
|
||||
return PyLong_FromSsize_t(res);
|
||||
|
|
314
Python/ceval.c
314
Python/ceval.c
|
@ -13,7 +13,7 @@
|
|||
#include "pycore_abstract.h" // _PyIndex_Check()
|
||||
#include "pycore_call.h" // _PyObject_FastCallDictTstate()
|
||||
#include "pycore_ceval.h" // _PyEval_SignalAsyncExc()
|
||||
#include "pycore_code.h" // _PyCode_InitOpcache()
|
||||
#include "pycore_code.h"
|
||||
#include "pycore_initconfig.h" // _PyStatus_OK()
|
||||
#include "pycore_object.h" // _PyObject_GC_TRACK()
|
||||
#include "pycore_moduleobject.h"
|
||||
|
@ -109,7 +109,6 @@ static long dxp[256];
|
|||
/* per opcode cache */
|
||||
static int opcache_min_runs = 1024; /* create opcache when code executed this many times */
|
||||
#define OPCODE_CACHE_MAX_TRIES 20
|
||||
#define OPCACHE_STATS 0 /* Enable stats */
|
||||
|
||||
// This function allows to deactivate the opcode cache. As different cache mechanisms may hold
|
||||
// references, this can mess with the reference leak detector functionality so the cache needs
|
||||
|
@ -120,22 +119,6 @@ _PyEval_DeactivateOpCache(void)
|
|||
opcache_min_runs = 0;
|
||||
}
|
||||
|
||||
#if OPCACHE_STATS
|
||||
static size_t opcache_code_objects = 0;
|
||||
static size_t opcache_code_objects_extra_mem = 0;
|
||||
|
||||
static size_t opcache_global_opts = 0;
|
||||
static size_t opcache_global_hits = 0;
|
||||
static size_t opcache_global_misses = 0;
|
||||
|
||||
static size_t opcache_attr_opts = 0;
|
||||
static size_t opcache_attr_hits = 0;
|
||||
static size_t opcache_attr_misses = 0;
|
||||
static size_t opcache_attr_deopts = 0;
|
||||
static size_t opcache_attr_total = 0;
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef NDEBUG
|
||||
/* Ensure that tstate is valid: sanity check for PyEval_AcquireThread() and
|
||||
PyEval_RestoreThread(). Detect if tstate memory was freed. It can happen
|
||||
|
@ -360,48 +343,8 @@ PyEval_InitThreads(void)
|
|||
void
|
||||
_PyEval_Fini(void)
|
||||
{
|
||||
#if OPCACHE_STATS
|
||||
fprintf(stderr, "-- Opcode cache number of objects = %zd\n",
|
||||
opcache_code_objects);
|
||||
|
||||
fprintf(stderr, "-- Opcode cache total extra mem = %zd\n",
|
||||
opcache_code_objects_extra_mem);
|
||||
|
||||
fprintf(stderr, "\n");
|
||||
|
||||
fprintf(stderr, "-- Opcode cache LOAD_GLOBAL hits = %zd (%d%%)\n",
|
||||
opcache_global_hits,
|
||||
(int) (100.0 * opcache_global_hits /
|
||||
(opcache_global_hits + opcache_global_misses)));
|
||||
|
||||
fprintf(stderr, "-- Opcode cache LOAD_GLOBAL misses = %zd (%d%%)\n",
|
||||
opcache_global_misses,
|
||||
(int) (100.0 * opcache_global_misses /
|
||||
(opcache_global_hits + opcache_global_misses)));
|
||||
|
||||
fprintf(stderr, "-- Opcode cache LOAD_GLOBAL opts = %zd\n",
|
||||
opcache_global_opts);
|
||||
|
||||
fprintf(stderr, "\n");
|
||||
|
||||
fprintf(stderr, "-- Opcode cache LOAD_ATTR hits = %zd (%d%%)\n",
|
||||
opcache_attr_hits,
|
||||
(int) (100.0 * opcache_attr_hits /
|
||||
opcache_attr_total));
|
||||
|
||||
fprintf(stderr, "-- Opcode cache LOAD_ATTR misses = %zd (%d%%)\n",
|
||||
opcache_attr_misses,
|
||||
(int) (100.0 * opcache_attr_misses /
|
||||
opcache_attr_total));
|
||||
|
||||
fprintf(stderr, "-- Opcode cache LOAD_ATTR opts = %zd\n",
|
||||
opcache_attr_opts);
|
||||
|
||||
fprintf(stderr, "-- Opcode cache LOAD_ATTR deopts = %zd\n",
|
||||
opcache_attr_deopts);
|
||||
|
||||
fprintf(stderr, "-- Opcode cache LOAD_ATTR total = %zd\n",
|
||||
opcache_attr_total);
|
||||
#if SPECIALIZATION_STATS
|
||||
_Py_PrintSpecializationStats();
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -1448,108 +1391,11 @@ eval_frame_handle_pending(PyThreadState *tstate)
|
|||
GETLOCAL(i) = value; \
|
||||
Py_XDECREF(tmp); } while (0)
|
||||
|
||||
/* macros for opcode cache */
|
||||
#define OPCACHE_CHECK() \
|
||||
do { \
|
||||
co_opcache = NULL; \
|
||||
if (co->co_opcache != NULL) { \
|
||||
unsigned char co_opcache_offset = \
|
||||
co->co_opcache_map[next_instr - first_instr]; \
|
||||
if (co_opcache_offset > 0) { \
|
||||
assert(co_opcache_offset <= co->co_opcache_size); \
|
||||
co_opcache = &co->co_opcache[co_opcache_offset - 1]; \
|
||||
assert(co_opcache != NULL); \
|
||||
} \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define OPCACHE_DEOPT() \
|
||||
do { \
|
||||
if (co_opcache != NULL) { \
|
||||
co_opcache->optimized = -1; \
|
||||
unsigned char co_opcache_offset = \
|
||||
co->co_opcache_map[next_instr - first_instr]; \
|
||||
assert(co_opcache_offset <= co->co_opcache_size); \
|
||||
co->co_opcache_map[co_opcache_offset] = 0; \
|
||||
co_opcache = NULL; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define OPCACHE_DEOPT_LOAD_ATTR() \
|
||||
do { \
|
||||
if (co_opcache != NULL) { \
|
||||
OPCACHE_STAT_ATTR_DEOPT(); \
|
||||
OPCACHE_DEOPT(); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define OPCACHE_MAYBE_DEOPT_LOAD_ATTR() \
|
||||
do { \
|
||||
if (co_opcache != NULL && --co_opcache->optimized <= 0) { \
|
||||
OPCACHE_DEOPT_LOAD_ATTR(); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#if OPCACHE_STATS
|
||||
|
||||
#define OPCACHE_STAT_GLOBAL_HIT() \
|
||||
do { \
|
||||
if (co->co_opcache != NULL) opcache_global_hits++; \
|
||||
} while (0)
|
||||
|
||||
#define OPCACHE_STAT_GLOBAL_MISS() \
|
||||
do { \
|
||||
if (co->co_opcache != NULL) opcache_global_misses++; \
|
||||
} while (0)
|
||||
|
||||
#define OPCACHE_STAT_GLOBAL_OPT() \
|
||||
do { \
|
||||
if (co->co_opcache != NULL) opcache_global_opts++; \
|
||||
} while (0)
|
||||
|
||||
#define OPCACHE_STAT_ATTR_HIT() \
|
||||
do { \
|
||||
if (co->co_opcache != NULL) opcache_attr_hits++; \
|
||||
} while (0)
|
||||
|
||||
#define OPCACHE_STAT_ATTR_MISS() \
|
||||
do { \
|
||||
if (co->co_opcache != NULL) opcache_attr_misses++; \
|
||||
} while (0)
|
||||
|
||||
#define OPCACHE_STAT_ATTR_OPT() \
|
||||
do { \
|
||||
if (co->co_opcache!= NULL) opcache_attr_opts++; \
|
||||
} while (0)
|
||||
|
||||
#define OPCACHE_STAT_ATTR_DEOPT() \
|
||||
do { \
|
||||
if (co->co_opcache != NULL) opcache_attr_deopts++; \
|
||||
} while (0)
|
||||
|
||||
#define OPCACHE_STAT_ATTR_TOTAL() \
|
||||
do { \
|
||||
if (co->co_opcache != NULL) opcache_attr_total++; \
|
||||
} while (0)
|
||||
|
||||
#else /* OPCACHE_STATS */
|
||||
|
||||
#define OPCACHE_STAT_GLOBAL_HIT()
|
||||
#define OPCACHE_STAT_GLOBAL_MISS()
|
||||
#define OPCACHE_STAT_GLOBAL_OPT()
|
||||
|
||||
#define OPCACHE_STAT_ATTR_HIT()
|
||||
#define OPCACHE_STAT_ATTR_MISS()
|
||||
#define OPCACHE_STAT_ATTR_OPT()
|
||||
#define OPCACHE_STAT_ATTR_DEOPT()
|
||||
#define OPCACHE_STAT_ATTR_TOTAL()
|
||||
|
||||
#define JUMP_TO_INSTRUCTION(op) goto PREDICT_ID(op)
|
||||
|
||||
#define GET_CACHE() \
|
||||
_GetSpecializedCacheEntryForInstruction(first_instr, INSTR_OFFSET(), oparg)
|
||||
|
||||
#endif
|
||||
|
||||
#define DEOPT_IF(cond, instname) if (cond) { goto instname ## _miss; }
|
||||
|
||||
|
@ -1582,7 +1428,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
|
|||
_Py_CODEUNIT *first_instr;
|
||||
PyObject *names;
|
||||
PyObject *consts;
|
||||
_PyOpcache *co_opcache;
|
||||
|
||||
#ifdef LLTRACE
|
||||
_Py_IDENTIFIER(__ltrace__);
|
||||
|
@ -1690,21 +1535,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
|
|||
f->f_stackdepth = -1;
|
||||
f->f_state = FRAME_EXECUTING;
|
||||
|
||||
if (co->co_opcache_flag < opcache_min_runs) {
|
||||
co->co_opcache_flag++;
|
||||
if (co->co_opcache_flag == opcache_min_runs) {
|
||||
if (_PyCode_InitOpcache(co) < 0) {
|
||||
goto exit_eval_frame;
|
||||
}
|
||||
#if OPCACHE_STATS
|
||||
opcache_code_objects_extra_mem +=
|
||||
PyBytes_Size(co->co_code) / sizeof(_Py_CODEUNIT) +
|
||||
sizeof(_PyOpcache) * co->co_opcache_size;
|
||||
opcache_code_objects++;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef LLTRACE
|
||||
{
|
||||
int r = _PyDict_ContainsId(GLOBALS(), &PyId___ltrace__);
|
||||
|
@ -2974,30 +2804,12 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
|
|||
}
|
||||
|
||||
case TARGET(LOAD_GLOBAL): {
|
||||
PyObject *name;
|
||||
PREDICTED(LOAD_GLOBAL);
|
||||
PyObject *name = GETITEM(names, oparg);
|
||||
PyObject *v;
|
||||
if (PyDict_CheckExact(GLOBALS())
|
||||
&& PyDict_CheckExact(BUILTINS()))
|
||||
{
|
||||
OPCACHE_CHECK();
|
||||
if (co_opcache != NULL && co_opcache->optimized > 0) {
|
||||
_PyOpcache_LoadGlobal *lg = &co_opcache->u.lg;
|
||||
|
||||
if (lg->globals_ver ==
|
||||
((PyDictObject *)GLOBALS())->ma_version_tag
|
||||
&& lg->builtins_ver ==
|
||||
((PyDictObject *)BUILTINS())->ma_version_tag)
|
||||
{
|
||||
PyObject *ptr = lg->ptr;
|
||||
OPCACHE_STAT_GLOBAL_HIT();
|
||||
assert(ptr != NULL);
|
||||
Py_INCREF(ptr);
|
||||
PUSH(ptr);
|
||||
DISPATCH();
|
||||
}
|
||||
}
|
||||
|
||||
name = GETITEM(names, oparg);
|
||||
v = _PyDict_LoadGlobal((PyDictObject *)GLOBALS(),
|
||||
(PyDictObject *)BUILTINS(),
|
||||
name);
|
||||
|
@ -3010,25 +2822,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
|
|||
}
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (co_opcache != NULL) {
|
||||
_PyOpcache_LoadGlobal *lg = &co_opcache->u.lg;
|
||||
|
||||
if (co_opcache->optimized == 0) {
|
||||
/* Wasn't optimized before. */
|
||||
OPCACHE_STAT_GLOBAL_OPT();
|
||||
} else {
|
||||
OPCACHE_STAT_GLOBAL_MISS();
|
||||
}
|
||||
|
||||
co_opcache->optimized = 1;
|
||||
lg->globals_ver =
|
||||
((PyDictObject *)GLOBALS())->ma_version_tag;
|
||||
lg->builtins_ver =
|
||||
((PyDictObject *)BUILTINS())->ma_version_tag;
|
||||
lg->ptr = v; /* borrowed */
|
||||
}
|
||||
|
||||
Py_INCREF(v);
|
||||
}
|
||||
else {
|
||||
|
@ -3059,6 +2852,61 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
|
|||
DISPATCH();
|
||||
}
|
||||
|
||||
case TARGET(LOAD_GLOBAL_ADAPTIVE): {
|
||||
SpecializedCacheEntry *cache = GET_CACHE();
|
||||
if (cache->adaptive.counter == 0) {
|
||||
PyObject *name = GETITEM(names, cache->adaptive.original_oparg);
|
||||
next_instr--;
|
||||
if (_Py_Specialize_LoadGlobal(GLOBALS(), BUILTINS(), next_instr, name, cache) < 0) {
|
||||
goto error;
|
||||
}
|
||||
DISPATCH();
|
||||
}
|
||||
else {
|
||||
STAT_INC(LOAD_GLOBAL, deferred);
|
||||
cache->adaptive.counter--;
|
||||
oparg = cache->adaptive.original_oparg;
|
||||
JUMP_TO_INSTRUCTION(LOAD_GLOBAL);
|
||||
}
|
||||
}
|
||||
|
||||
case TARGET(LOAD_GLOBAL_MODULE): {
|
||||
DEOPT_IF(!PyDict_CheckExact(GLOBALS()), LOAD_GLOBAL);
|
||||
PyDictObject *dict = (PyDictObject *)GLOBALS();
|
||||
SpecializedCacheEntry *caches = GET_CACHE();
|
||||
_PyAdaptiveEntry *cache0 = &caches[0].adaptive;
|
||||
_PyLoadGlobalCache *cache1 = &caches[-1].load_global;
|
||||
DEOPT_IF(dict->ma_keys->dk_version != cache1->module_keys_version, LOAD_GLOBAL);
|
||||
PyDictKeyEntry *ep = DK_ENTRIES(dict->ma_keys) + cache0->index;
|
||||
PyObject *res = ep->me_value;
|
||||
DEOPT_IF(res == NULL, LOAD_GLOBAL);
|
||||
record_cache_hit(cache0);
|
||||
STAT_INC(LOAD_GLOBAL, hit);
|
||||
Py_INCREF(res);
|
||||
PUSH(res);
|
||||
DISPATCH();
|
||||
}
|
||||
|
||||
case TARGET(LOAD_GLOBAL_BUILTIN): {
|
||||
DEOPT_IF(!PyDict_CheckExact(GLOBALS()), LOAD_GLOBAL);
|
||||
DEOPT_IF(!PyDict_CheckExact(BUILTINS()), LOAD_GLOBAL);
|
||||
PyDictObject *mdict = (PyDictObject *)GLOBALS();
|
||||
PyDictObject *bdict = (PyDictObject *)BUILTINS();
|
||||
SpecializedCacheEntry *caches = GET_CACHE();
|
||||
_PyAdaptiveEntry *cache0 = &caches[0].adaptive;
|
||||
_PyLoadGlobalCache *cache1 = &caches[-1].load_global;
|
||||
DEOPT_IF(mdict->ma_keys->dk_version != cache1->module_keys_version, LOAD_GLOBAL);
|
||||
DEOPT_IF(bdict->ma_keys->dk_version != cache1->builtin_keys_version, LOAD_GLOBAL);
|
||||
PyDictKeyEntry *ep = DK_ENTRIES(bdict->ma_keys) + cache0->index;
|
||||
PyObject *res = ep->me_value;
|
||||
DEOPT_IF(res == NULL, LOAD_GLOBAL);
|
||||
record_cache_hit(cache0);
|
||||
STAT_INC(LOAD_GLOBAL, hit);
|
||||
Py_INCREF(res);
|
||||
PUSH(res);
|
||||
DISPATCH();
|
||||
}
|
||||
|
||||
case TARGET(DELETE_FAST): {
|
||||
PyObject *v = GETLOCAL(oparg);
|
||||
if (v != NULL) {
|
||||
|
@ -3464,7 +3312,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
|
|||
DISPATCH();
|
||||
}
|
||||
else {
|
||||
STAT_INC(loadattr_deferred);
|
||||
STAT_INC(LOAD_ATTR, deferred);
|
||||
cache->adaptive.counter--;
|
||||
oparg = cache->adaptive.original_oparg;
|
||||
JUMP_TO_INSTRUCTION(LOAD_ATTR);
|
||||
|
@ -3487,9 +3335,9 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
|
|||
DEOPT_IF(dict->ma_keys->dk_version != cache1->dk_version_or_hint, LOAD_ATTR);
|
||||
res = dict->ma_values[cache0->index];
|
||||
DEOPT_IF(res == NULL, LOAD_ATTR);
|
||||
STAT_INC(loadattr_hit);
|
||||
STAT_INC(LOAD_ATTR, hit);
|
||||
record_cache_hit(cache0);
|
||||
STAT_INC(loadattr_hit);
|
||||
STAT_INC(LOAD_ATTR, hit);
|
||||
Py_INCREF(res);
|
||||
SET_TOP(res);
|
||||
Py_DECREF(owner);
|
||||
|
@ -3510,7 +3358,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
|
|||
PyDictKeyEntry *ep = DK_ENTRIES(dict->ma_keys) + cache0->index;
|
||||
res = ep->me_value;
|
||||
DEOPT_IF(res == NULL, LOAD_ATTR);
|
||||
STAT_INC(loadattr_hit);
|
||||
STAT_INC(LOAD_ATTR, hit);
|
||||
record_cache_hit(cache0);
|
||||
Py_INCREF(res);
|
||||
SET_TOP(res);
|
||||
|
@ -3538,7 +3386,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
|
|||
DEOPT_IF(ep->me_key != name, LOAD_ATTR);
|
||||
res = ep->me_value;
|
||||
DEOPT_IF(res == NULL, LOAD_ATTR);
|
||||
STAT_INC(loadattr_hit);
|
||||
STAT_INC(LOAD_ATTR, hit);
|
||||
record_cache_hit(cache0);
|
||||
Py_INCREF(res);
|
||||
SET_TOP(res);
|
||||
|
@ -3558,7 +3406,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
|
|||
char *addr = (char *)owner + cache0->index;
|
||||
res = *(PyObject **)addr;
|
||||
DEOPT_IF(res == NULL, LOAD_ATTR);
|
||||
STAT_INC(loadattr_hit);
|
||||
STAT_INC(LOAD_ATTR, hit);
|
||||
record_cache_hit(cache0);
|
||||
Py_INCREF(res);
|
||||
SET_TOP(res);
|
||||
|
@ -4445,22 +4293,26 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
|
|||
or goto error. */
|
||||
Py_UNREACHABLE();
|
||||
|
||||
/* Cache misses */
|
||||
/* Specialization misses */
|
||||
|
||||
LOAD_ATTR_miss:
|
||||
{
|
||||
STAT_INC(loadattr_miss);
|
||||
_PyAdaptiveEntry *cache = &GET_CACHE()->adaptive;
|
||||
record_cache_miss(cache);
|
||||
if (too_many_cache_misses(cache)) {
|
||||
next_instr[-1] = _Py_MAKECODEUNIT(LOAD_ATTR_ADAPTIVE, _Py_OPARG(next_instr[-1]));
|
||||
STAT_INC(loadattr_deopt);
|
||||
cache_backoff(cache);
|
||||
}
|
||||
oparg = cache->original_oparg;
|
||||
JUMP_TO_INSTRUCTION(LOAD_ATTR);
|
||||
#define MISS_WITH_CACHE(opname) \
|
||||
opname ## _miss: \
|
||||
{ \
|
||||
STAT_INC(opname, miss); \
|
||||
_PyAdaptiveEntry *cache = &GET_CACHE()->adaptive; \
|
||||
record_cache_miss(cache); \
|
||||
if (too_many_cache_misses(cache)) { \
|
||||
next_instr[-1] = _Py_MAKECODEUNIT(opname ## _ADAPTIVE, _Py_OPARG(next_instr[-1])); \
|
||||
STAT_INC(opname, deopt); \
|
||||
cache_backoff(cache); \
|
||||
} \
|
||||
oparg = cache->original_oparg; \
|
||||
JUMP_TO_INSTRUCTION(opname); \
|
||||
}
|
||||
|
||||
MISS_WITH_CACHE(LOAD_ATTR)
|
||||
MISS_WITH_CACHE(LOAD_GLOBAL)
|
||||
|
||||
error:
|
||||
/* Double-check exception status. */
|
||||
#ifdef NDEBUG
|
||||
|
|
|
@ -35,10 +35,10 @@ static void *opcode_targets[256] = {
|
|||
&&TARGET_MATCH_KEYS,
|
||||
&&TARGET_COPY_DICT_WITHOUT_KEYS,
|
||||
&&TARGET_PUSH_EXC_INFO,
|
||||
&&_unknown_opcode,
|
||||
&&TARGET_LOAD_GLOBAL_ADAPTIVE,
|
||||
&&TARGET_POP_EXCEPT_AND_RERAISE,
|
||||
&&_unknown_opcode,
|
||||
&&_unknown_opcode,
|
||||
&&TARGET_LOAD_GLOBAL_MODULE,
|
||||
&&TARGET_LOAD_GLOBAL_BUILTIN,
|
||||
&&_unknown_opcode,
|
||||
&&_unknown_opcode,
|
||||
&&_unknown_opcode,
|
||||
|
|
|
@ -33,18 +33,27 @@
|
|||
|
||||
Py_ssize_t _Py_QuickenedCount = 0;
|
||||
#if SPECIALIZATION_STATS
|
||||
SpecializationStats _specialization_stats = { 0 };
|
||||
SpecializationStats _specialization_stats[256] = { 0 };
|
||||
|
||||
#define PRINT_STAT(name, field) fprintf(stderr, " %s." #field " : %" PRIu64 "\n", name, stats->field);
|
||||
|
||||
static void
|
||||
print_stats(SpecializationStats *stats, const char *name)
|
||||
{
|
||||
PRINT_STAT(name, specialization_success);
|
||||
PRINT_STAT(name, specialization_failure);
|
||||
PRINT_STAT(name, hit);
|
||||
PRINT_STAT(name, deferred);
|
||||
PRINT_STAT(name, miss);
|
||||
PRINT_STAT(name, deopt);
|
||||
}
|
||||
|
||||
#define PRINT_STAT(name) fprintf(stderr, #name " : %" PRIu64" \n", _specialization_stats.name);
|
||||
void
|
||||
_Py_PrintSpecializationStats(void)
|
||||
{
|
||||
PRINT_STAT(specialization_success);
|
||||
PRINT_STAT(specialization_failure);
|
||||
PRINT_STAT(loadattr_hit);
|
||||
PRINT_STAT(loadattr_deferred);
|
||||
PRINT_STAT(loadattr_miss);
|
||||
PRINT_STAT(loadattr_deopt);
|
||||
printf("Specialization stats:\n");
|
||||
print_stats(&_specialization_stats[LOAD_ATTR], "load_attr");
|
||||
print_stats(&_specialization_stats[LOAD_GLOBAL], "load_global");
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -77,11 +86,13 @@ get_cache_count(SpecializedCacheOrInstruction *quickened) {
|
|||
Values of zero are ignored. */
|
||||
static uint8_t adaptive_opcodes[256] = {
|
||||
[LOAD_ATTR] = LOAD_ATTR_ADAPTIVE,
|
||||
[LOAD_GLOBAL] = LOAD_GLOBAL_ADAPTIVE,
|
||||
};
|
||||
|
||||
/* The number of cache entries required for a "family" of instructions. */
|
||||
static uint8_t cache_requirements[256] = {
|
||||
[LOAD_ATTR] = 2,
|
||||
[LOAD_ATTR] = 2, /* _PyAdaptiveEntry and _PyLoadAttrCache */
|
||||
[LOAD_GLOBAL] = 2, /* _PyAdaptiveEntry and _PyLoadGlobalCache */
|
||||
};
|
||||
|
||||
/* Return the oparg for the cache_offset and instruction index.
|
||||
|
@ -357,14 +368,81 @@ _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, Sp
|
|||
}
|
||||
|
||||
fail:
|
||||
STAT_INC(specialization_failure);
|
||||
STAT_INC(LOAD_ATTR, specialization_failure);
|
||||
assert(!PyErr_Occurred());
|
||||
cache_backoff(cache0);
|
||||
return 0;
|
||||
success:
|
||||
STAT_INC(specialization_success);
|
||||
STAT_INC(LOAD_ATTR, specialization_success);
|
||||
assert(!PyErr_Occurred());
|
||||
cache0->counter = saturating_start();
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
_Py_Specialize_LoadGlobal(
|
||||
PyObject *globals, PyObject *builtins,
|
||||
_Py_CODEUNIT *instr, PyObject *name,
|
||||
SpecializedCacheEntry *cache)
|
||||
{
|
||||
_PyAdaptiveEntry *cache0 = &cache->adaptive;
|
||||
_PyLoadGlobalCache *cache1 = &cache[-1].load_global;
|
||||
assert(PyUnicode_CheckExact(name));
|
||||
if (!PyDict_CheckExact(globals)) {
|
||||
goto fail;
|
||||
}
|
||||
if (((PyDictObject *)globals)->ma_keys->dk_kind != DICT_KEYS_UNICODE) {
|
||||
goto fail;
|
||||
}
|
||||
PyObject *value = NULL;
|
||||
Py_ssize_t index = _PyDict_GetItemHint((PyDictObject *)globals, name, -1, &value);
|
||||
assert (index != DKIX_ERROR);
|
||||
if (index != DKIX_EMPTY) {
|
||||
if (index != (uint16_t)index) {
|
||||
goto fail;
|
||||
}
|
||||
uint32_t keys_version = _PyDictKeys_GetVersionForCurrentState((PyDictObject *)globals);
|
||||
if (keys_version == 0) {
|
||||
goto fail;
|
||||
}
|
||||
cache1->module_keys_version = keys_version;
|
||||
cache0->index = (uint16_t)index;
|
||||
*instr = _Py_MAKECODEUNIT(LOAD_GLOBAL_MODULE, _Py_OPARG(*instr));
|
||||
goto success;
|
||||
}
|
||||
if (!PyDict_CheckExact(builtins)) {
|
||||
goto fail;
|
||||
}
|
||||
if (((PyDictObject *)builtins)->ma_keys->dk_kind != DICT_KEYS_UNICODE) {
|
||||
goto fail;
|
||||
}
|
||||
index = _PyDict_GetItemHint((PyDictObject *)builtins, name, -1, &value);
|
||||
assert (index != DKIX_ERROR);
|
||||
if (index != (uint16_t)index) {
|
||||
goto fail;
|
||||
}
|
||||
uint32_t globals_version = _PyDictKeys_GetVersionForCurrentState((PyDictObject *)globals);
|
||||
if (globals_version == 0) {
|
||||
goto fail;
|
||||
}
|
||||
uint32_t builtins_version = _PyDictKeys_GetVersionForCurrentState((PyDictObject *)builtins);
|
||||
if (builtins_version == 0) {
|
||||
goto fail;
|
||||
}
|
||||
cache1->module_keys_version = globals_version;
|
||||
cache1->builtin_keys_version = builtins_version;
|
||||
cache0->index = (uint16_t)index;
|
||||
*instr = _Py_MAKECODEUNIT(LOAD_GLOBAL_BUILTIN, _Py_OPARG(*instr));
|
||||
goto success;
|
||||
fail:
|
||||
STAT_INC(LOAD_GLOBAL, specialization_failure);
|
||||
assert(!PyErr_Occurred());
|
||||
cache_backoff(cache0);
|
||||
return 0;
|
||||
success:
|
||||
STAT_INC(LOAD_GLOBAL, specialization_success);
|
||||
assert(!PyErr_Occurred());
|
||||
cache0->counter = saturating_start();
|
||||
return 0;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue