bpo-42093: Add opcode cache for LOAD_ATTR (GH-22803)

This commit is contained in:
Pablo Galindo 2020-10-20 06:22:44 +01:00 committed by GitHub
parent 871934d4cf
commit 109826c850
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 296 additions and 7 deletions

View File

@ -252,6 +252,9 @@ Optimizations
average.
(Contributed by Victor Stinner in :issue:`41006`.)
* The ``LOAD_ATTR`` instruction now uses new "per opcode cache" mechanism.
It is about 36% faster now. (Contributed by Pablo Galindo and Yury Selivanov
in :issue:`42093`.)
Deprecated
==========

View File

@ -71,6 +71,7 @@ PyAPI_FUNC(void) _PyDict_DebugMallocStats(FILE *out);
int _PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr, PyObject *name, PyObject *value);
PyObject *_PyDict_LoadGlobal(PyDictObject *, PyDictObject *, PyObject *);
Py_ssize_t _PyDict_GetItemHint(PyDictObject *, PyObject *, Py_ssize_t, PyObject **);
/* _PyDictView */

View File

@ -10,9 +10,16 @@ typedef struct {
uint64_t builtins_ver; /* ma_version of builtin dict */
} _PyOpcache_LoadGlobal;
typedef struct {
PyTypeObject *type;
Py_ssize_t hint;
unsigned int tp_version_tag;
} _PyOpCodeOpt_LoadAttr;
struct _PyOpcache {
union {
_PyOpcache_LoadGlobal lg;
_PyOpCodeOpt_LoadAttr la;
} u;
char optimized;
};

View File

@ -0,0 +1,2 @@
The ``LOAD_ATTR`` instruction now uses new "per opcode cache" mechanism and
it is about 36% faster now. Patch by Pablo Galindo and Yury Selivanov.

View File

@ -301,8 +301,8 @@ _PyCode_InitOpcache(PyCodeObject *co)
unsigned char opcode = _Py_OPCODE(opcodes[i]);
i++; // 'i' is now aligned to (next_instr - first_instr)
// TODO: LOAD_METHOD, LOAD_ATTR
if (opcode == LOAD_GLOBAL) {
// TODO: LOAD_METHOD
if (opcode == LOAD_GLOBAL || opcode == LOAD_ATTR) {
opts++;
co->co_opcache_map[i] = (unsigned char)opts;
if (opts > 254) {

View File

@ -1437,6 +1437,71 @@ PyDict_GetItem(PyObject *op, PyObject *key)
return value;
}
Py_ssize_t
_PyDict_GetItemHint(PyDictObject *mp, PyObject *key,
Py_ssize_t hint, PyObject **value)
{
Py_hash_t hash;
PyThreadState *tstate;
assert(*value == NULL);
assert(PyDict_CheckExact((PyObject*)mp));
assert(PyUnicode_CheckExact(key));
if (hint >= 0 && hint < _PyDict_KeysSize(mp->ma_keys)) {
PyObject *res = NULL;
PyDictKeyEntry *ep = DK_ENTRIES(mp->ma_keys) + (size_t)hint;
if (ep->me_key == key) {
if (mp->ma_keys->dk_lookup == lookdict_split) {
assert(mp->ma_values != NULL);
res = mp->ma_values[(size_t)hint];
}
else {
res = ep->me_value;
}
if (res != NULL) {
*value = res;
return hint;
}
}
}
if ((hash = ((PyASCIIObject *) key)->hash) == -1)
{
hash = PyObject_Hash(key);
if (hash == -1) {
PyErr_Clear();
return -1;
}
}
// We can arrive here with a NULL tstate during initialization: try
// running "python -Wi" for an example related to string interning
tstate = _PyThreadState_UncheckedGet();
Py_ssize_t ix = 0;
if (tstate != NULL && tstate->curexc_type != NULL) {
/* preserve the existing exception */
PyObject *err_type, *err_value, *err_tb;
PyErr_Fetch(&err_type, &err_value, &err_tb);
ix = (mp->ma_keys->dk_lookup)(mp, key, hash, value);
/* ignore errors */
PyErr_Restore(err_type, err_value, err_tb);
if (ix < 0) {
return -1;
}
}
else {
ix = (mp->ma_keys->dk_lookup)(mp, key, hash, value);
if (ix < 0) {
PyErr_Clear();
return -1;
}
}
return ix;
}
/* Same as PyDict_GetItemWithError() but with hash supplied by caller.
This returns NULL *with* an exception set if an exception occurred.
It returns NULL *without* an exception set if the key wasn't present.

View File

@ -111,6 +111,7 @@ static long dxp[256];
#else
#define OPCACHE_MIN_RUNS 1024 /* create opcache when code executed this time */
#endif
#define OPCODE_CACHE_MAX_TRIES 20
#define OPCACHE_STATS 0 /* Enable stats */
#if OPCACHE_STATS
@ -120,6 +121,12 @@ static size_t opcache_code_objects_extra_mem = 0;
static size_t opcache_global_opts = 0;
static size_t opcache_global_hits = 0;
static size_t opcache_global_misses = 0;
static size_t opcache_attr_opts = 0;
static size_t opcache_attr_hits = 0;
static size_t opcache_attr_misses = 0;
static size_t opcache_attr_deopts = 0;
static size_t opcache_attr_total = 0;
#endif
@ -365,6 +372,25 @@ _PyEval_Fini(void)
opcache_global_opts);
fprintf(stderr, "\n");
fprintf(stderr, "-- Opcode cache LOAD_ATTR hits = %zd (%d%%)\n",
opcache_attr_hits,
(int) (100.0 * opcache_attr_hits /
opcache_attr_total));
fprintf(stderr, "-- Opcode cache LOAD_ATTR misses = %zd (%d%%)\n",
opcache_attr_misses,
(int) (100.0 * opcache_attr_misses /
opcache_attr_total));
fprintf(stderr, "-- Opcode cache LOAD_ATTR opts = %zd\n",
opcache_attr_opts);
fprintf(stderr, "-- Opcode cache LOAD_ATTR deopts = %zd\n",
opcache_attr_deopts);
fprintf(stderr, "-- Opcode cache LOAD_ATTR total = %zd\n",
opcache_attr_total);
#endif
}
@ -1224,16 +1250,43 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
do { \
co_opcache = NULL; \
if (co->co_opcache != NULL) { \
unsigned char co_opt_offset = \
unsigned char co_opcache_offset = \
co->co_opcache_map[next_instr - first_instr]; \
if (co_opt_offset > 0) { \
assert(co_opt_offset <= co->co_opcache_size); \
co_opcache = &co->co_opcache[co_opt_offset - 1]; \
if (co_opcache_offset > 0) { \
assert(co_opcache_offset <= co->co_opcache_size); \
co_opcache = &co->co_opcache[co_opcache_offset - 1]; \
assert(co_opcache != NULL); \
} \
} \
} while (0)
#define OPCACHE_DEOPT() \
do { \
if (co_opcache != NULL) { \
co_opcache->optimized = -1; \
unsigned char co_opcache_offset = \
co->co_opcache_map[next_instr - first_instr]; \
assert(co_opcache_offset <= co->co_opcache_size); \
co->co_opcache_map[co_opcache_offset] = 0; \
co_opcache = NULL; \
} \
} while (0)
#define OPCACHE_DEOPT_LOAD_ATTR() \
do { \
if (co_opcache != NULL) { \
OPCACHE_STAT_ATTR_DEOPT(); \
OPCACHE_DEOPT(); \
} \
} while (0)
#define OPCACHE_MAYBE_DEOPT_LOAD_ATTR() \
do { \
if (co_opcache != NULL && --co_opcache->optimized <= 0) { \
OPCACHE_DEOPT_LOAD_ATTR(); \
} \
} while (0)
#if OPCACHE_STATS
#define OPCACHE_STAT_GLOBAL_HIT() \
@ -1251,12 +1304,43 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
if (co->co_opcache != NULL) opcache_global_opts++; \
} while (0)
#define OPCACHE_STAT_ATTR_HIT() \
do { \
if (co->co_opcache != NULL) opcache_attr_hits++; \
} while (0)
#define OPCACHE_STAT_ATTR_MISS() \
do { \
if (co->co_opcache != NULL) opcache_attr_misses++; \
} while (0)
#define OPCACHE_STAT_ATTR_OPT() \
do { \
if (co->co_opcache!= NULL) opcache_attr_opts++; \
} while (0)
#define OPCACHE_STAT_ATTR_DEOPT() \
do { \
if (co->co_opcache != NULL) opcache_attr_deopts++; \
} while (0)
#define OPCACHE_STAT_ATTR_TOTAL() \
do { \
if (co->co_opcache != NULL) opcache_attr_total++; \
} while (0)
#else /* OPCACHE_STATS */
#define OPCACHE_STAT_GLOBAL_HIT()
#define OPCACHE_STAT_GLOBAL_MISS()
#define OPCACHE_STAT_GLOBAL_OPT()
#define OPCACHE_STAT_ATTR_HIT()
#define OPCACHE_STAT_ATTR_MISS()
#define OPCACHE_STAT_ATTR_OPT()
#define OPCACHE_STAT_ATTR_DEOPT()
#define OPCACHE_STAT_ATTR_TOTAL()
#endif
/* Start of code */
@ -3023,7 +3107,134 @@ main_loop:
case TARGET(LOAD_ATTR): {
PyObject *name = GETITEM(names, oparg);
PyObject *owner = TOP();
PyObject *res = PyObject_GetAttr(owner, name);
PyTypeObject *type = Py_TYPE(owner);
PyObject *res;
PyObject **dictptr;
PyObject *dict;
_PyOpCodeOpt_LoadAttr *la;
OPCACHE_STAT_ATTR_TOTAL();
OPCACHE_CHECK();
if (co_opcache != NULL && PyType_HasFeature(type, Py_TPFLAGS_VALID_VERSION_TAG))
{
if (co_opcache->optimized > 0) {
/* Fast path -- cache hit makes LOAD_ATTR ~30% faster */
la = &co_opcache->u.la;
if (la->type == type && la->tp_version_tag == type->tp_version_tag)
{
assert(type->tp_dict != NULL);
assert(type->tp_dictoffset > 0);
dictptr = (PyObject **) ((char *)owner + type->tp_dictoffset);
dict = *dictptr;
if (dict != NULL && PyDict_CheckExact(dict)) {
Py_ssize_t hint = la->hint;
Py_INCREF(dict);
res = NULL;
la->hint = _PyDict_GetItemHint((PyDictObject*)dict, name, hint, &res);
if (res != NULL) {
if (la->hint == hint && hint >= 0) {
/* Our hint has helped -- cache hit. */
OPCACHE_STAT_ATTR_HIT();
} else {
/* The hint we provided didn't work.
Maybe next time? */
OPCACHE_MAYBE_DEOPT_LOAD_ATTR();
}
Py_INCREF(res);
SET_TOP(res);
Py_DECREF(owner);
Py_DECREF(dict);
DISPATCH();
} else {
// This attribute can be missing sometimes -- we
// don't want to optimize this lookup.
OPCACHE_DEOPT_LOAD_ATTR();
Py_DECREF(dict);
}
} else {
// There is no dict, or __dict__ doesn't satisfy PyDict_CheckExact
OPCACHE_DEOPT_LOAD_ATTR();
}
} else {
// The type of the object has either been updated,
// or is different. Maybe it will stabilize?
OPCACHE_MAYBE_DEOPT_LOAD_ATTR();
}
OPCACHE_STAT_ATTR_MISS();
}
if (co_opcache != NULL && /* co_opcache can be NULL after a DEOPT() call. */
type->tp_getattro == PyObject_GenericGetAttr)
{
PyObject *descr;
Py_ssize_t ret;
if (type->tp_dictoffset > 0) {
if (type->tp_dict == NULL) {
if (PyType_Ready(type) < 0) {
Py_DECREF(owner);
SET_TOP(NULL);
goto error;
}
}
descr = _PyType_Lookup(type, name);
if (descr == NULL ||
descr->ob_type->tp_descr_get == NULL ||
!PyDescr_IsData(descr))
{
dictptr = (PyObject **) ((char *)owner + type->tp_dictoffset);
dict = *dictptr;
if (dict != NULL && PyDict_CheckExact(dict)) {
Py_INCREF(dict);
res = NULL;
ret = _PyDict_GetItemHint((PyDictObject*)dict, name, -1, &res);
if (res != NULL) {
Py_INCREF(res);
Py_DECREF(dict);
Py_DECREF(owner);
SET_TOP(res);
if (co_opcache->optimized == 0) {
// First time we optimize this opcode. */
OPCACHE_STAT_ATTR_OPT();
co_opcache->optimized = OPCODE_CACHE_MAX_TRIES;
}
la = &co_opcache->u.la;
la->type = type;
la->tp_version_tag = type->tp_version_tag;
la->hint = ret;
DISPATCH();
}
Py_DECREF(dict);
} else {
// There is no dict, or __dict__ doesn't satisfy PyDict_CheckExact
OPCACHE_DEOPT_LOAD_ATTR();
}
} else {
// We failed to find an attribute without a data-like descriptor
OPCACHE_DEOPT_LOAD_ATTR();
}
} else {
// The object's class does not have a tp_dictoffset we can use
OPCACHE_DEOPT_LOAD_ATTR();
}
} else if (type->tp_getattro != PyObject_GenericGetAttr) {
OPCACHE_DEOPT_LOAD_ATTR();
}
}
/* slow path */
res = PyObject_GetAttr(owner, name);
Py_DECREF(owner);
SET_TOP(res);
if (res == NULL)