mirror of https://github.com/python/cpython
bpo-44525: Specialize simple Python calls. (GH-29033)
This commit is contained in:
parent
8d6740f489
commit
8863a0fcc5
|
@ -35,6 +35,12 @@ typedef struct {
|
|||
PyObject *obj;
|
||||
} _PyObjectCache;
|
||||
|
||||
typedef struct {
|
||||
uint32_t func_version;
|
||||
uint16_t defaults_start;
|
||||
uint16_t defaults_len;
|
||||
} _PyCallCache;
|
||||
|
||||
/* Add specialized versions of entries to this union.
|
||||
*
|
||||
* Do not break the invariant: sizeof(SpecializedCacheEntry) == 8
|
||||
|
@ -51,6 +57,7 @@ typedef union {
|
|||
_PyAttrCache attr;
|
||||
_PyLoadGlobalCache load_global;
|
||||
_PyObjectCache obj;
|
||||
_PyCallCache call;
|
||||
} SpecializedCacheEntry;
|
||||
|
||||
#define INSTRUCTIONS_PER_ENTRY (sizeof(SpecializedCacheEntry)/sizeof(_Py_CODEUNIT))
|
||||
|
|
|
@ -153,29 +153,30 @@ extern "C" {
|
|||
#define CALL_FUNCTION_BUILTIN_FAST 45
|
||||
#define CALL_FUNCTION_LEN 46
|
||||
#define CALL_FUNCTION_ISINSTANCE 47
|
||||
#define JUMP_ABSOLUTE_QUICK 48
|
||||
#define LOAD_ATTR_ADAPTIVE 58
|
||||
#define LOAD_ATTR_INSTANCE_VALUE 80
|
||||
#define LOAD_ATTR_WITH_HINT 81
|
||||
#define LOAD_ATTR_SLOT 87
|
||||
#define LOAD_ATTR_MODULE 88
|
||||
#define LOAD_GLOBAL_ADAPTIVE 120
|
||||
#define LOAD_GLOBAL_MODULE 122
|
||||
#define LOAD_GLOBAL_BUILTIN 123
|
||||
#define LOAD_METHOD_ADAPTIVE 127
|
||||
#define LOAD_METHOD_CACHED 128
|
||||
#define LOAD_METHOD_CLASS 134
|
||||
#define LOAD_METHOD_MODULE 140
|
||||
#define LOAD_METHOD_NO_DICT 143
|
||||
#define STORE_ATTR_ADAPTIVE 149
|
||||
#define STORE_ATTR_INSTANCE_VALUE 150
|
||||
#define STORE_ATTR_SLOT 151
|
||||
#define STORE_ATTR_WITH_HINT 153
|
||||
#define LOAD_FAST__LOAD_FAST 154
|
||||
#define STORE_FAST__LOAD_FAST 158
|
||||
#define LOAD_FAST__LOAD_CONST 159
|
||||
#define LOAD_CONST__LOAD_FAST 167
|
||||
#define STORE_FAST__STORE_FAST 168
|
||||
#define CALL_FUNCTION_PY_SIMPLE 48
|
||||
#define JUMP_ABSOLUTE_QUICK 58
|
||||
#define LOAD_ATTR_ADAPTIVE 80
|
||||
#define LOAD_ATTR_INSTANCE_VALUE 81
|
||||
#define LOAD_ATTR_WITH_HINT 87
|
||||
#define LOAD_ATTR_SLOT 88
|
||||
#define LOAD_ATTR_MODULE 120
|
||||
#define LOAD_GLOBAL_ADAPTIVE 122
|
||||
#define LOAD_GLOBAL_MODULE 123
|
||||
#define LOAD_GLOBAL_BUILTIN 127
|
||||
#define LOAD_METHOD_ADAPTIVE 128
|
||||
#define LOAD_METHOD_CACHED 134
|
||||
#define LOAD_METHOD_CLASS 140
|
||||
#define LOAD_METHOD_MODULE 143
|
||||
#define LOAD_METHOD_NO_DICT 149
|
||||
#define STORE_ATTR_ADAPTIVE 150
|
||||
#define STORE_ATTR_INSTANCE_VALUE 151
|
||||
#define STORE_ATTR_SLOT 153
|
||||
#define STORE_ATTR_WITH_HINT 154
|
||||
#define LOAD_FAST__LOAD_FAST 158
|
||||
#define STORE_FAST__LOAD_FAST 159
|
||||
#define LOAD_FAST__LOAD_CONST 167
|
||||
#define LOAD_CONST__LOAD_FAST 168
|
||||
#define STORE_FAST__STORE_FAST 169
|
||||
#define DO_TRACING 255
|
||||
#ifdef NEED_OPCODE_JUMP_TABLES
|
||||
static uint32_t _PyOpcode_RelativeJump[8] = {
|
||||
|
|
|
@ -237,6 +237,7 @@ _specialized_instructions = [
|
|||
"CALL_FUNCTION_BUILTIN_FAST",
|
||||
"CALL_FUNCTION_LEN",
|
||||
"CALL_FUNCTION_ISINSTANCE",
|
||||
"CALL_FUNCTION_PY_SIMPLE",
|
||||
"JUMP_ABSOLUTE_QUICK",
|
||||
"LOAD_ATTR_ADAPTIVE",
|
||||
"LOAD_ATTR_INSTANCE_VALUE",
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Specialize simple calls to Python functions (no starargs, keyowrd dict, or closure)
|
|
@ -4720,9 +4720,9 @@ check_eval_breaker:
|
|||
|
||||
TARGET(CALL_FUNCTION_ADAPTIVE) {
|
||||
SpecializedCacheEntry *cache = GET_CACHE();
|
||||
nargs = cache->adaptive.original_oparg;
|
||||
if (cache->adaptive.counter == 0) {
|
||||
next_instr--;
|
||||
int nargs = cache->adaptive.original_oparg;
|
||||
if (_Py_Specialize_CallFunction(
|
||||
PEEK(nargs + 1), next_instr, nargs, cache, BUILTINS()) < 0) {
|
||||
goto error;
|
||||
|
@ -4732,11 +4732,50 @@ check_eval_breaker:
|
|||
else {
|
||||
STAT_INC(CALL_FUNCTION, deferred);
|
||||
cache->adaptive.counter--;
|
||||
oparg = cache->adaptive.original_oparg;
|
||||
JUMP_TO_INSTRUCTION(CALL_FUNCTION);
|
||||
oparg = nargs;
|
||||
kwnames = NULL;
|
||||
postcall_shrink = 1;
|
||||
goto call_function;
|
||||
}
|
||||
}
|
||||
|
||||
TARGET(CALL_FUNCTION_PY_SIMPLE) {
|
||||
SpecializedCacheEntry *caches = GET_CACHE();
|
||||
_PyAdaptiveEntry *cache0 = &caches[0].adaptive;
|
||||
int argcount = cache0->original_oparg;
|
||||
_PyCallCache *cache1 = &caches[-1].call;
|
||||
PyObject *callable = PEEK(argcount+1);
|
||||
DEOPT_IF(!PyFunction_Check(callable), CALL_FUNCTION);
|
||||
PyFunctionObject *func = (PyFunctionObject *)callable;
|
||||
DEOPT_IF(func->func_version != cache1->func_version, CALL_FUNCTION);
|
||||
/* PEP 523 */
|
||||
DEOPT_IF(tstate->interp->eval_frame != NULL, CALL_FUNCTION);
|
||||
STAT_INC(CALL_FUNCTION, hit);
|
||||
record_cache_hit(cache0);
|
||||
InterpreterFrame *new_frame = _PyThreadState_PushFrame(
|
||||
tstate, PyFunction_AS_FRAME_CONSTRUCTOR(func), NULL);
|
||||
if (new_frame == NULL) {
|
||||
goto error;
|
||||
}
|
||||
STACK_SHRINK(argcount);
|
||||
for (int i = 0; i < argcount; i++) {
|
||||
new_frame->localsplus[i] = stack_pointer[i];
|
||||
}
|
||||
int deflen = cache1->defaults_len;
|
||||
for (int i = 0; i < deflen; i++) {
|
||||
PyObject *def = PyTuple_GET_ITEM(func->func_defaults, cache1->defaults_start+i);
|
||||
Py_INCREF(def);
|
||||
new_frame->localsplus[argcount+i] = def;
|
||||
}
|
||||
STACK_SHRINK(1);
|
||||
Py_DECREF(func);
|
||||
_PyFrame_SetStackPointer(frame, stack_pointer);
|
||||
new_frame->previous = tstate->frame;
|
||||
new_frame->depth = frame->depth + 1;
|
||||
tstate->frame = frame = new_frame;
|
||||
goto start_frame;
|
||||
}
|
||||
|
||||
TARGET(CALL_FUNCTION_BUILTIN_O) {
|
||||
assert(cframe.use_tracing == 0);
|
||||
/* Builtin METH_O functions */
|
||||
|
|
|
@ -47,7 +47,7 @@ static void *opcode_targets[256] = {
|
|||
&&TARGET_CALL_FUNCTION_BUILTIN_FAST,
|
||||
&&TARGET_CALL_FUNCTION_LEN,
|
||||
&&TARGET_CALL_FUNCTION_ISINSTANCE,
|
||||
&&TARGET_JUMP_ABSOLUTE_QUICK,
|
||||
&&TARGET_CALL_FUNCTION_PY_SIMPLE,
|
||||
&&TARGET_WITH_EXCEPT_START,
|
||||
&&TARGET_GET_AITER,
|
||||
&&TARGET_GET_ANEXT,
|
||||
|
@ -57,7 +57,7 @@ static void *opcode_targets[256] = {
|
|||
&&TARGET_INPLACE_ADD,
|
||||
&&TARGET_INPLACE_SUBTRACT,
|
||||
&&TARGET_INPLACE_MULTIPLY,
|
||||
&&TARGET_LOAD_ATTR_ADAPTIVE,
|
||||
&&TARGET_JUMP_ABSOLUTE_QUICK,
|
||||
&&TARGET_INPLACE_MODULO,
|
||||
&&TARGET_STORE_SUBSCR,
|
||||
&&TARGET_DELETE_SUBSCR,
|
||||
|
@ -79,15 +79,15 @@ static void *opcode_targets[256] = {
|
|||
&&TARGET_INPLACE_AND,
|
||||
&&TARGET_INPLACE_XOR,
|
||||
&&TARGET_INPLACE_OR,
|
||||
&&TARGET_LOAD_ATTR_ADAPTIVE,
|
||||
&&TARGET_LOAD_ATTR_INSTANCE_VALUE,
|
||||
&&TARGET_LOAD_ATTR_WITH_HINT,
|
||||
&&TARGET_LIST_TO_TUPLE,
|
||||
&&TARGET_RETURN_VALUE,
|
||||
&&TARGET_IMPORT_STAR,
|
||||
&&TARGET_SETUP_ANNOTATIONS,
|
||||
&&TARGET_YIELD_VALUE,
|
||||
&&TARGET_LOAD_ATTR_WITH_HINT,
|
||||
&&TARGET_LOAD_ATTR_SLOT,
|
||||
&&TARGET_LOAD_ATTR_MODULE,
|
||||
&&TARGET_POP_EXCEPT,
|
||||
&&TARGET_STORE_NAME,
|
||||
&&TARGET_DELETE_NAME,
|
||||
|
@ -119,46 +119,46 @@ static void *opcode_targets[256] = {
|
|||
&&TARGET_IS_OP,
|
||||
&&TARGET_CONTAINS_OP,
|
||||
&&TARGET_RERAISE,
|
||||
&&TARGET_LOAD_GLOBAL_ADAPTIVE,
|
||||
&&TARGET_LOAD_ATTR_MODULE,
|
||||
&&TARGET_JUMP_IF_NOT_EXC_MATCH,
|
||||
&&TARGET_LOAD_GLOBAL_ADAPTIVE,
|
||||
&&TARGET_LOAD_GLOBAL_MODULE,
|
||||
&&TARGET_LOAD_GLOBAL_BUILTIN,
|
||||
&&TARGET_LOAD_FAST,
|
||||
&&TARGET_STORE_FAST,
|
||||
&&TARGET_DELETE_FAST,
|
||||
&&TARGET_LOAD_GLOBAL_BUILTIN,
|
||||
&&TARGET_LOAD_METHOD_ADAPTIVE,
|
||||
&&TARGET_LOAD_METHOD_CACHED,
|
||||
&&TARGET_GEN_START,
|
||||
&&TARGET_RAISE_VARARGS,
|
||||
&&TARGET_CALL_FUNCTION,
|
||||
&&TARGET_MAKE_FUNCTION,
|
||||
&&TARGET_BUILD_SLICE,
|
||||
&&TARGET_LOAD_METHOD_CLASS,
|
||||
&&TARGET_LOAD_METHOD_CACHED,
|
||||
&&TARGET_MAKE_CELL,
|
||||
&&TARGET_LOAD_CLOSURE,
|
||||
&&TARGET_LOAD_DEREF,
|
||||
&&TARGET_STORE_DEREF,
|
||||
&&TARGET_DELETE_DEREF,
|
||||
&&TARGET_LOAD_METHOD_MODULE,
|
||||
&&TARGET_LOAD_METHOD_CLASS,
|
||||
&&TARGET_CALL_FUNCTION_KW,
|
||||
&&TARGET_CALL_FUNCTION_EX,
|
||||
&&TARGET_LOAD_METHOD_NO_DICT,
|
||||
&&TARGET_LOAD_METHOD_MODULE,
|
||||
&&TARGET_EXTENDED_ARG,
|
||||
&&TARGET_LIST_APPEND,
|
||||
&&TARGET_SET_ADD,
|
||||
&&TARGET_MAP_ADD,
|
||||
&&TARGET_LOAD_CLASSDEREF,
|
||||
&&TARGET_LOAD_METHOD_NO_DICT,
|
||||
&&TARGET_STORE_ATTR_ADAPTIVE,
|
||||
&&TARGET_STORE_ATTR_INSTANCE_VALUE,
|
||||
&&TARGET_STORE_ATTR_SLOT,
|
||||
&&TARGET_MATCH_CLASS,
|
||||
&&TARGET_STORE_ATTR_SLOT,
|
||||
&&TARGET_STORE_ATTR_WITH_HINT,
|
||||
&&TARGET_LOAD_FAST__LOAD_FAST,
|
||||
&&TARGET_FORMAT_VALUE,
|
||||
&&TARGET_BUILD_CONST_KEY_MAP,
|
||||
&&TARGET_BUILD_STRING,
|
||||
&&TARGET_LOAD_FAST__LOAD_FAST,
|
||||
&&TARGET_STORE_FAST__LOAD_FAST,
|
||||
&&TARGET_LOAD_FAST__LOAD_CONST,
|
||||
&&TARGET_LOAD_METHOD,
|
||||
&&TARGET_CALL_METHOD,
|
||||
&&TARGET_LIST_EXTEND,
|
||||
|
@ -166,6 +166,7 @@ static void *opcode_targets[256] = {
|
|||
&&TARGET_DICT_MERGE,
|
||||
&&TARGET_DICT_UPDATE,
|
||||
&&TARGET_CALL_METHOD_KW,
|
||||
&&TARGET_LOAD_FAST__LOAD_CONST,
|
||||
&&TARGET_LOAD_CONST__LOAD_FAST,
|
||||
&&TARGET_STORE_FAST__STORE_FAST,
|
||||
&&_unknown_opcode,
|
||||
|
@ -253,6 +254,5 @@ static void *opcode_targets[256] = {
|
|||
&&_unknown_opcode,
|
||||
&&_unknown_opcode,
|
||||
&&_unknown_opcode,
|
||||
&&_unknown_opcode,
|
||||
&&TARGET_DO_TRACING
|
||||
};
|
||||
|
|
|
@ -249,7 +249,7 @@ static uint8_t cache_requirements[256] = {
|
|||
[BINARY_ADD] = 0,
|
||||
[BINARY_MULTIPLY] = 0,
|
||||
[BINARY_SUBSCR] = 0,
|
||||
[CALL_FUNCTION] = 2, /* _PyAdaptiveEntry and _PyObjectCache */
|
||||
[CALL_FUNCTION] = 2, /* _PyAdaptiveEntry and _PyObjectCache/_PyCallCache */
|
||||
[STORE_ATTR] = 2, /* _PyAdaptiveEntry and _PyAttrCache */
|
||||
};
|
||||
|
||||
|
@ -461,15 +461,20 @@ _Py_Quicken(PyCodeObject *code) {
|
|||
#define SPEC_FAIL_NON_FUNCTION_SCOPE 11
|
||||
#define SPEC_FAIL_DIFFERENT_TYPES 12
|
||||
|
||||
/* Call function */
|
||||
/* Calls */
|
||||
#define SPEC_FAIL_GENERATOR 7
|
||||
#define SPEC_FAIL_COMPLEX_PARAMETERS 8
|
||||
#define SPEC_FAIL_WRONG_NUMBER_ARGUMENTS 9
|
||||
#define SPEC_FAIL_CO_NOT_OPTIMIZED 10
|
||||
/* SPEC_FAIL_METHOD defined as 11 above */
|
||||
#define SPEC_FAIL_FREE_VARS 12
|
||||
#define SPEC_FAIL_PYCFUNCTION 13
|
||||
#define SPEC_FAIL_PYCFUNCTION_WITH_KEYWORDS 14
|
||||
#define SPEC_FAIL_PYCFUNCTION_FAST_WITH_KEYWORDS 15
|
||||
#define SPEC_FAIL_PYCFUNCTION_NOARGS 16
|
||||
#define SPEC_FAIL_BAD_CALL_FLAGS 17
|
||||
#define SPEC_FAIL_CLASS 18
|
||||
|
||||
#define SPEC_FAIL_PYCFUNCTION 10
|
||||
#define SPEC_FAIL_PYCFUNCTION_WITH_KEYWORDS 13
|
||||
#define SPEC_FAIL_PYCFUNCTION_FAST_WITH_KEYWORDS 14
|
||||
#define SPEC_FAIL_PYCFUNCTION_NOARGS 15
|
||||
#define SPEC_FAIL_BAD_CALL_FLAGS 16
|
||||
#define SPEC_FAIL_PYTHON_FUNCTION 17
|
||||
#define SPEC_FAIL_IMMUTABLE_CLASS 18
|
||||
|
||||
static int
|
||||
specialize_module_load_attr(
|
||||
|
@ -1236,6 +1241,69 @@ success:
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
specialize_class_call(
|
||||
PyObject *callable, _Py_CODEUNIT *instr,
|
||||
int nargs, SpecializedCacheEntry *cache)
|
||||
{
|
||||
SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_CLASS);
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int
|
||||
specialize_py_call(
|
||||
PyFunctionObject *func, _Py_CODEUNIT *instr,
|
||||
int nargs, SpecializedCacheEntry *cache)
|
||||
{
|
||||
_PyCallCache *cache1 = &cache[-1].call;
|
||||
/* Exclude generator or coroutines for now */
|
||||
PyCodeObject *code = (PyCodeObject *)func->func_code;
|
||||
int flags = code->co_flags;
|
||||
if (flags & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR)) {
|
||||
SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_GENERATOR);
|
||||
return -1;
|
||||
}
|
||||
if ((flags & (CO_VARKEYWORDS | CO_VARARGS)) || code->co_kwonlyargcount) {
|
||||
SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_COMPLEX_PARAMETERS);
|
||||
return -1;
|
||||
}
|
||||
if ((flags & CO_OPTIMIZED) == 0) {
|
||||
SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_CO_NOT_OPTIMIZED);
|
||||
return -1;
|
||||
}
|
||||
if (code->co_nfreevars) {
|
||||
SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_FREE_VARS);
|
||||
return -1;
|
||||
}
|
||||
int argcount = code->co_argcount;
|
||||
int defcount = func->func_defaults == NULL ? 0 : (int)PyTuple_GET_SIZE(func->func_defaults);
|
||||
assert(defcount <= argcount);
|
||||
int min_args = argcount-defcount;
|
||||
if (nargs > argcount || nargs < min_args) {
|
||||
SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_WRONG_NUMBER_ARGUMENTS);
|
||||
return -1;
|
||||
}
|
||||
assert(nargs <= argcount && nargs >= min_args);
|
||||
int defstart = nargs - min_args;
|
||||
int deflen = argcount - nargs;
|
||||
assert(defstart >= 0 && deflen >= 0);
|
||||
assert(deflen == 0 || func->func_defaults != NULL);
|
||||
if (defstart > 0xffff || deflen > 0xffff) {
|
||||
SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_OUT_OF_RANGE);
|
||||
return -1;
|
||||
}
|
||||
int version = _PyFunction_GetVersionForCurrentState(func);
|
||||
if (version == 0) {
|
||||
SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_OUT_OF_VERSIONS);
|
||||
return -1;
|
||||
}
|
||||
cache1->func_version = version;
|
||||
cache1->defaults_start = defstart;
|
||||
cache1->defaults_len = deflen;
|
||||
*instr = _Py_MAKECODEUNIT(CALL_FUNCTION_PY_SIMPLE, _Py_OPARG(*instr));
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if COLLECT_SPECIALIZATION_STATS_DETAILED
|
||||
static int
|
||||
builtin_call_fail_kind(int ml_flags)
|
||||
|
@ -1315,11 +1383,7 @@ specialize_c_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs,
|
|||
static int
|
||||
call_fail_kind(PyObject *callable)
|
||||
{
|
||||
if (PyFunction_Check(callable)) {
|
||||
return SPEC_FAIL_PYTHON_FUNCTION;
|
||||
}
|
||||
// new-style bound methods
|
||||
else if (PyInstanceMethod_Check(callable)) {
|
||||
if (PyInstanceMethod_Check(callable)) {
|
||||
return SPEC_FAIL_METHOD;
|
||||
}
|
||||
else if (PyMethod_Check(callable)) {
|
||||
|
@ -1330,17 +1394,14 @@ call_fail_kind(PyObject *callable)
|
|||
return SPEC_FAIL_METHOD;
|
||||
}
|
||||
else if (PyType_Check(callable)) {
|
||||
PyTypeObject *type = Py_TYPE(callable);
|
||||
return PyType_HasFeature(type, Py_TPFLAGS_IMMUTABLETYPE) ?
|
||||
SPEC_FAIL_IMMUTABLE_CLASS : SPEC_FAIL_MUTABLE_CLASS;
|
||||
return SPEC_FAIL_CLASS;
|
||||
}
|
||||
return SPEC_FAIL_OTHER;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* TODO:
|
||||
- Specialize calling types.
|
||||
- Specialize python function calls.
|
||||
- Specialize calling classes.
|
||||
*/
|
||||
int
|
||||
_Py_Specialize_CallFunction(
|
||||
|
@ -1352,9 +1413,15 @@ _Py_Specialize_CallFunction(
|
|||
if (PyCFunction_CheckExact(callable)) {
|
||||
fail = specialize_c_call(callable, instr, nargs, cache, builtins);
|
||||
}
|
||||
else if (PyFunction_Check(callable)) {
|
||||
fail = specialize_py_call((PyFunctionObject *)callable, instr, nargs, cache);
|
||||
}
|
||||
else if (PyType_Check(callable)) {
|
||||
fail = specialize_class_call(callable, instr, nargs, cache);
|
||||
}
|
||||
else {
|
||||
SPECIALIZATION_FAIL(CALL_FUNCTION, call_fail_kind(callable));
|
||||
fail = 1;
|
||||
fail = -1;
|
||||
}
|
||||
_PyAdaptiveEntry *cache0 = &cache->adaptive;
|
||||
if (fail) {
|
||||
|
|
Loading…
Reference in New Issue