diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index d464f3d2a81..482bd7eb6ae 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -35,6 +35,12 @@ typedef struct { PyObject *obj; } _PyObjectCache; +typedef struct { + uint32_t func_version; + uint16_t defaults_start; + uint16_t defaults_len; +} _PyCallCache; + /* Add specialized versions of entries to this union. * * Do not break the invariant: sizeof(SpecializedCacheEntry) == 8 @@ -51,6 +57,7 @@ typedef union { _PyAttrCache attr; _PyLoadGlobalCache load_global; _PyObjectCache obj; + _PyCallCache call; } SpecializedCacheEntry; #define INSTRUCTIONS_PER_ENTRY (sizeof(SpecializedCacheEntry)/sizeof(_Py_CODEUNIT)) diff --git a/Include/opcode.h b/Include/opcode.h index 22d968ee0d4..f8c02b840e0 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -153,29 +153,30 @@ extern "C" { #define CALL_FUNCTION_BUILTIN_FAST 45 #define CALL_FUNCTION_LEN 46 #define CALL_FUNCTION_ISINSTANCE 47 -#define JUMP_ABSOLUTE_QUICK 48 -#define LOAD_ATTR_ADAPTIVE 58 -#define LOAD_ATTR_INSTANCE_VALUE 80 -#define LOAD_ATTR_WITH_HINT 81 -#define LOAD_ATTR_SLOT 87 -#define LOAD_ATTR_MODULE 88 -#define LOAD_GLOBAL_ADAPTIVE 120 -#define LOAD_GLOBAL_MODULE 122 -#define LOAD_GLOBAL_BUILTIN 123 -#define LOAD_METHOD_ADAPTIVE 127 -#define LOAD_METHOD_CACHED 128 -#define LOAD_METHOD_CLASS 134 -#define LOAD_METHOD_MODULE 140 -#define LOAD_METHOD_NO_DICT 143 -#define STORE_ATTR_ADAPTIVE 149 -#define STORE_ATTR_INSTANCE_VALUE 150 -#define STORE_ATTR_SLOT 151 -#define STORE_ATTR_WITH_HINT 153 -#define LOAD_FAST__LOAD_FAST 154 -#define STORE_FAST__LOAD_FAST 158 -#define LOAD_FAST__LOAD_CONST 159 -#define LOAD_CONST__LOAD_FAST 167 -#define STORE_FAST__STORE_FAST 168 +#define CALL_FUNCTION_PY_SIMPLE 48 +#define JUMP_ABSOLUTE_QUICK 58 +#define LOAD_ATTR_ADAPTIVE 80 +#define LOAD_ATTR_INSTANCE_VALUE 81 +#define LOAD_ATTR_WITH_HINT 87 +#define LOAD_ATTR_SLOT 88 +#define LOAD_ATTR_MODULE 120 +#define LOAD_GLOBAL_ADAPTIVE 122 +#define LOAD_GLOBAL_MODULE 123 +#define LOAD_GLOBAL_BUILTIN 127 +#define LOAD_METHOD_ADAPTIVE 128 +#define LOAD_METHOD_CACHED 134 +#define LOAD_METHOD_CLASS 140 +#define LOAD_METHOD_MODULE 143 +#define LOAD_METHOD_NO_DICT 149 +#define STORE_ATTR_ADAPTIVE 150 +#define STORE_ATTR_INSTANCE_VALUE 151 +#define STORE_ATTR_SLOT 153 +#define STORE_ATTR_WITH_HINT 154 +#define LOAD_FAST__LOAD_FAST 158 +#define STORE_FAST__LOAD_FAST 159 +#define LOAD_FAST__LOAD_CONST 167 +#define LOAD_CONST__LOAD_FAST 168 +#define STORE_FAST__STORE_FAST 169 #define DO_TRACING 255 #ifdef NEED_OPCODE_JUMP_TABLES static uint32_t _PyOpcode_RelativeJump[8] = { diff --git a/Lib/opcode.py b/Lib/opcode.py index fe6066fc517..5377ec32bf1 100644 --- a/Lib/opcode.py +++ b/Lib/opcode.py @@ -237,6 +237,7 @@ _specialized_instructions = [ "CALL_FUNCTION_BUILTIN_FAST", "CALL_FUNCTION_LEN", "CALL_FUNCTION_ISINSTANCE", + "CALL_FUNCTION_PY_SIMPLE", "JUMP_ABSOLUTE_QUICK", "LOAD_ATTR_ADAPTIVE", "LOAD_ATTR_INSTANCE_VALUE", diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-10-20-11-57-31.bpo-44525.veL4lJ.rst b/Misc/NEWS.d/next/Core and Builtins/2021-10-20-11-57-31.bpo-44525.veL4lJ.rst new file mode 100644 index 00000000000..6ab1d05603d --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2021-10-20-11-57-31.bpo-44525.veL4lJ.rst @@ -0,0 +1 @@ +Specialize simple calls to Python functions (no starargs, keyowrd dict, or closure) diff --git a/Python/ceval.c b/Python/ceval.c index 76325903149..f4186dae8a4 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -4720,9 +4720,9 @@ check_eval_breaker: TARGET(CALL_FUNCTION_ADAPTIVE) { SpecializedCacheEntry *cache = GET_CACHE(); + nargs = cache->adaptive.original_oparg; if (cache->adaptive.counter == 0) { next_instr--; - int nargs = cache->adaptive.original_oparg; if (_Py_Specialize_CallFunction( PEEK(nargs + 1), next_instr, nargs, cache, BUILTINS()) < 0) { goto error; @@ -4732,11 +4732,50 @@ check_eval_breaker: else { STAT_INC(CALL_FUNCTION, deferred); cache->adaptive.counter--; - oparg = cache->adaptive.original_oparg; - JUMP_TO_INSTRUCTION(CALL_FUNCTION); + oparg = nargs; + kwnames = NULL; + postcall_shrink = 1; + goto call_function; } } + TARGET(CALL_FUNCTION_PY_SIMPLE) { + SpecializedCacheEntry *caches = GET_CACHE(); + _PyAdaptiveEntry *cache0 = &caches[0].adaptive; + int argcount = cache0->original_oparg; + _PyCallCache *cache1 = &caches[-1].call; + PyObject *callable = PEEK(argcount+1); + DEOPT_IF(!PyFunction_Check(callable), CALL_FUNCTION); + PyFunctionObject *func = (PyFunctionObject *)callable; + DEOPT_IF(func->func_version != cache1->func_version, CALL_FUNCTION); + /* PEP 523 */ + DEOPT_IF(tstate->interp->eval_frame != NULL, CALL_FUNCTION); + STAT_INC(CALL_FUNCTION, hit); + record_cache_hit(cache0); + InterpreterFrame *new_frame = _PyThreadState_PushFrame( + tstate, PyFunction_AS_FRAME_CONSTRUCTOR(func), NULL); + if (new_frame == NULL) { + goto error; + } + STACK_SHRINK(argcount); + for (int i = 0; i < argcount; i++) { + new_frame->localsplus[i] = stack_pointer[i]; + } + int deflen = cache1->defaults_len; + for (int i = 0; i < deflen; i++) { + PyObject *def = PyTuple_GET_ITEM(func->func_defaults, cache1->defaults_start+i); + Py_INCREF(def); + new_frame->localsplus[argcount+i] = def; + } + STACK_SHRINK(1); + Py_DECREF(func); + _PyFrame_SetStackPointer(frame, stack_pointer); + new_frame->previous = tstate->frame; + new_frame->depth = frame->depth + 1; + tstate->frame = frame = new_frame; + goto start_frame; + } + TARGET(CALL_FUNCTION_BUILTIN_O) { assert(cframe.use_tracing == 0); /* Builtin METH_O functions */ diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index 4179689e8a8..5c7d3ad544e 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -47,7 +47,7 @@ static void *opcode_targets[256] = { &&TARGET_CALL_FUNCTION_BUILTIN_FAST, &&TARGET_CALL_FUNCTION_LEN, &&TARGET_CALL_FUNCTION_ISINSTANCE, - &&TARGET_JUMP_ABSOLUTE_QUICK, + &&TARGET_CALL_FUNCTION_PY_SIMPLE, &&TARGET_WITH_EXCEPT_START, &&TARGET_GET_AITER, &&TARGET_GET_ANEXT, @@ -57,7 +57,7 @@ static void *opcode_targets[256] = { &&TARGET_INPLACE_ADD, &&TARGET_INPLACE_SUBTRACT, &&TARGET_INPLACE_MULTIPLY, - &&TARGET_LOAD_ATTR_ADAPTIVE, + &&TARGET_JUMP_ABSOLUTE_QUICK, &&TARGET_INPLACE_MODULO, &&TARGET_STORE_SUBSCR, &&TARGET_DELETE_SUBSCR, @@ -79,15 +79,15 @@ static void *opcode_targets[256] = { &&TARGET_INPLACE_AND, &&TARGET_INPLACE_XOR, &&TARGET_INPLACE_OR, + &&TARGET_LOAD_ATTR_ADAPTIVE, &&TARGET_LOAD_ATTR_INSTANCE_VALUE, - &&TARGET_LOAD_ATTR_WITH_HINT, &&TARGET_LIST_TO_TUPLE, &&TARGET_RETURN_VALUE, &&TARGET_IMPORT_STAR, &&TARGET_SETUP_ANNOTATIONS, &&TARGET_YIELD_VALUE, + &&TARGET_LOAD_ATTR_WITH_HINT, &&TARGET_LOAD_ATTR_SLOT, - &&TARGET_LOAD_ATTR_MODULE, &&TARGET_POP_EXCEPT, &&TARGET_STORE_NAME, &&TARGET_DELETE_NAME, @@ -119,46 +119,46 @@ static void *opcode_targets[256] = { &&TARGET_IS_OP, &&TARGET_CONTAINS_OP, &&TARGET_RERAISE, - &&TARGET_LOAD_GLOBAL_ADAPTIVE, + &&TARGET_LOAD_ATTR_MODULE, &&TARGET_JUMP_IF_NOT_EXC_MATCH, + &&TARGET_LOAD_GLOBAL_ADAPTIVE, &&TARGET_LOAD_GLOBAL_MODULE, - &&TARGET_LOAD_GLOBAL_BUILTIN, &&TARGET_LOAD_FAST, &&TARGET_STORE_FAST, &&TARGET_DELETE_FAST, + &&TARGET_LOAD_GLOBAL_BUILTIN, &&TARGET_LOAD_METHOD_ADAPTIVE, - &&TARGET_LOAD_METHOD_CACHED, &&TARGET_GEN_START, &&TARGET_RAISE_VARARGS, &&TARGET_CALL_FUNCTION, &&TARGET_MAKE_FUNCTION, &&TARGET_BUILD_SLICE, - &&TARGET_LOAD_METHOD_CLASS, + &&TARGET_LOAD_METHOD_CACHED, &&TARGET_MAKE_CELL, &&TARGET_LOAD_CLOSURE, &&TARGET_LOAD_DEREF, &&TARGET_STORE_DEREF, &&TARGET_DELETE_DEREF, - &&TARGET_LOAD_METHOD_MODULE, + &&TARGET_LOAD_METHOD_CLASS, &&TARGET_CALL_FUNCTION_KW, &&TARGET_CALL_FUNCTION_EX, - &&TARGET_LOAD_METHOD_NO_DICT, + &&TARGET_LOAD_METHOD_MODULE, &&TARGET_EXTENDED_ARG, &&TARGET_LIST_APPEND, &&TARGET_SET_ADD, &&TARGET_MAP_ADD, &&TARGET_LOAD_CLASSDEREF, + &&TARGET_LOAD_METHOD_NO_DICT, &&TARGET_STORE_ATTR_ADAPTIVE, &&TARGET_STORE_ATTR_INSTANCE_VALUE, - &&TARGET_STORE_ATTR_SLOT, &&TARGET_MATCH_CLASS, + &&TARGET_STORE_ATTR_SLOT, &&TARGET_STORE_ATTR_WITH_HINT, - &&TARGET_LOAD_FAST__LOAD_FAST, &&TARGET_FORMAT_VALUE, &&TARGET_BUILD_CONST_KEY_MAP, &&TARGET_BUILD_STRING, + &&TARGET_LOAD_FAST__LOAD_FAST, &&TARGET_STORE_FAST__LOAD_FAST, - &&TARGET_LOAD_FAST__LOAD_CONST, &&TARGET_LOAD_METHOD, &&TARGET_CALL_METHOD, &&TARGET_LIST_EXTEND, @@ -166,6 +166,7 @@ static void *opcode_targets[256] = { &&TARGET_DICT_MERGE, &&TARGET_DICT_UPDATE, &&TARGET_CALL_METHOD_KW, + &&TARGET_LOAD_FAST__LOAD_CONST, &&TARGET_LOAD_CONST__LOAD_FAST, &&TARGET_STORE_FAST__STORE_FAST, &&_unknown_opcode, @@ -253,6 +254,5 @@ static void *opcode_targets[256] = { &&_unknown_opcode, &&_unknown_opcode, &&_unknown_opcode, - &&_unknown_opcode, &&TARGET_DO_TRACING }; diff --git a/Python/specialize.c b/Python/specialize.c index ee573d29a47..5cc7082a35a 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -249,7 +249,7 @@ static uint8_t cache_requirements[256] = { [BINARY_ADD] = 0, [BINARY_MULTIPLY] = 0, [BINARY_SUBSCR] = 0, - [CALL_FUNCTION] = 2, /* _PyAdaptiveEntry and _PyObjectCache */ + [CALL_FUNCTION] = 2, /* _PyAdaptiveEntry and _PyObjectCache/_PyCallCache */ [STORE_ATTR] = 2, /* _PyAdaptiveEntry and _PyAttrCache */ }; @@ -461,15 +461,20 @@ _Py_Quicken(PyCodeObject *code) { #define SPEC_FAIL_NON_FUNCTION_SCOPE 11 #define SPEC_FAIL_DIFFERENT_TYPES 12 -/* Call function */ +/* Calls */ +#define SPEC_FAIL_GENERATOR 7 +#define SPEC_FAIL_COMPLEX_PARAMETERS 8 +#define SPEC_FAIL_WRONG_NUMBER_ARGUMENTS 9 +#define SPEC_FAIL_CO_NOT_OPTIMIZED 10 +/* SPEC_FAIL_METHOD defined as 11 above */ +#define SPEC_FAIL_FREE_VARS 12 +#define SPEC_FAIL_PYCFUNCTION 13 +#define SPEC_FAIL_PYCFUNCTION_WITH_KEYWORDS 14 +#define SPEC_FAIL_PYCFUNCTION_FAST_WITH_KEYWORDS 15 +#define SPEC_FAIL_PYCFUNCTION_NOARGS 16 +#define SPEC_FAIL_BAD_CALL_FLAGS 17 +#define SPEC_FAIL_CLASS 18 -#define SPEC_FAIL_PYCFUNCTION 10 -#define SPEC_FAIL_PYCFUNCTION_WITH_KEYWORDS 13 -#define SPEC_FAIL_PYCFUNCTION_FAST_WITH_KEYWORDS 14 -#define SPEC_FAIL_PYCFUNCTION_NOARGS 15 -#define SPEC_FAIL_BAD_CALL_FLAGS 16 -#define SPEC_FAIL_PYTHON_FUNCTION 17 -#define SPEC_FAIL_IMMUTABLE_CLASS 18 static int specialize_module_load_attr( @@ -1236,6 +1241,69 @@ success: return 0; } +static int +specialize_class_call( + PyObject *callable, _Py_CODEUNIT *instr, + int nargs, SpecializedCacheEntry *cache) +{ + SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_CLASS); + return -1; +} + +static int +specialize_py_call( + PyFunctionObject *func, _Py_CODEUNIT *instr, + int nargs, SpecializedCacheEntry *cache) +{ + _PyCallCache *cache1 = &cache[-1].call; + /* Exclude generator or coroutines for now */ + PyCodeObject *code = (PyCodeObject *)func->func_code; + int flags = code->co_flags; + if (flags & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR)) { + SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_GENERATOR); + return -1; + } + if ((flags & (CO_VARKEYWORDS | CO_VARARGS)) || code->co_kwonlyargcount) { + SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_COMPLEX_PARAMETERS); + return -1; + } + if ((flags & CO_OPTIMIZED) == 0) { + SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_CO_NOT_OPTIMIZED); + return -1; + } + if (code->co_nfreevars) { + SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_FREE_VARS); + return -1; + } + int argcount = code->co_argcount; + int defcount = func->func_defaults == NULL ? 0 : (int)PyTuple_GET_SIZE(func->func_defaults); + assert(defcount <= argcount); + int min_args = argcount-defcount; + if (nargs > argcount || nargs < min_args) { + SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_WRONG_NUMBER_ARGUMENTS); + return -1; + } + assert(nargs <= argcount && nargs >= min_args); + int defstart = nargs - min_args; + int deflen = argcount - nargs; + assert(defstart >= 0 && deflen >= 0); + assert(deflen == 0 || func->func_defaults != NULL); + if (defstart > 0xffff || deflen > 0xffff) { + SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_OUT_OF_RANGE); + return -1; + } + int version = _PyFunction_GetVersionForCurrentState(func); + if (version == 0) { + SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_OUT_OF_VERSIONS); + return -1; + } + cache1->func_version = version; + cache1->defaults_start = defstart; + cache1->defaults_len = deflen; + *instr = _Py_MAKECODEUNIT(CALL_FUNCTION_PY_SIMPLE, _Py_OPARG(*instr)); + return 0; +} + #if COLLECT_SPECIALIZATION_STATS_DETAILED static int builtin_call_fail_kind(int ml_flags) @@ -1315,11 +1383,7 @@ specialize_c_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs, static int call_fail_kind(PyObject *callable) { - if (PyFunction_Check(callable)) { - return SPEC_FAIL_PYTHON_FUNCTION; - } - // new-style bound methods - else if (PyInstanceMethod_Check(callable)) { + if (PyInstanceMethod_Check(callable)) { return SPEC_FAIL_METHOD; } else if (PyMethod_Check(callable)) { @@ -1330,17 +1394,14 @@ call_fail_kind(PyObject *callable) return SPEC_FAIL_METHOD; } else if (PyType_Check(callable)) { - PyTypeObject *type = Py_TYPE(callable); - return PyType_HasFeature(type, Py_TPFLAGS_IMMUTABLETYPE) ? - SPEC_FAIL_IMMUTABLE_CLASS : SPEC_FAIL_MUTABLE_CLASS; + return SPEC_FAIL_CLASS; } return SPEC_FAIL_OTHER; } #endif /* TODO: - - Specialize calling types. - - Specialize python function calls. + - Specialize calling classes. */ int _Py_Specialize_CallFunction( @@ -1352,9 +1413,15 @@ _Py_Specialize_CallFunction( if (PyCFunction_CheckExact(callable)) { fail = specialize_c_call(callable, instr, nargs, cache, builtins); } + else if (PyFunction_Check(callable)) { + fail = specialize_py_call((PyFunctionObject *)callable, instr, nargs, cache); + } + else if (PyType_Check(callable)) { + fail = specialize_class_call(callable, instr, nargs, cache); + } else { SPECIALIZATION_FAIL(CALL_FUNCTION, call_fail_kind(callable)); - fail = 1; + fail = -1; } _PyAdaptiveEntry *cache0 = &cache->adaptive; if (fail) {