From d3eaf0cc5b311ad023fd13e367f817d528403306 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 27 Aug 2021 09:21:01 +0100 Subject: [PATCH] bpo-44945: Specialize BINARY_ADD (GH-27967) --- Include/internal/pycore_code.h | 1 + Include/internal/pycore_long.h | 2 + Include/opcode.h | 57 ++++---- Lib/opcode.py | 5 + .../2021-08-18-11-14-38.bpo-44945.CO3s77.rst | 7 + Objects/longobject.c | 18 ++- Python/ceval.c | 129 ++++++++++++++++-- Python/opcode_targets.h | 50 +++---- Python/specialize.c | 62 ++++++++- 9 files changed, 253 insertions(+), 78 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2021-08-18-11-14-38.bpo-44945.CO3s77.rst diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 6289acdd968..0b127ed2899 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -307,6 +307,7 @@ int _Py_Specialize_StoreAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *nam int _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache); int _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache); int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr); +int _Py_Specialize_BinaryAdd(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr); #define PRINT_SPECIALIZATION_STATS 0 #define PRINT_SPECIALIZATION_STATS_DETAILED 0 diff --git a/Include/internal/pycore_long.h b/Include/internal/pycore_long.h index 2bea3a55ec8..7336c317c3f 100644 --- a/Include/internal/pycore_long.h +++ b/Include/internal/pycore_long.h @@ -34,6 +34,8 @@ static inline PyObject* _PyLong_GetZero(void) static inline PyObject* _PyLong_GetOne(void) { return __PyLong_GetSmallInt_internal(1); } +PyObject *_PyLong_Add(PyLongObject *left, PyLongObject *right); + #ifdef __cplusplus } #endif diff --git a/Include/opcode.h b/Include/opcode.h index 6b0298224ae..0043cc2d209 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -136,32 +136,37 @@ extern "C" { #define DICT_MERGE 164 #define DICT_UPDATE 165 #define CALL_METHOD_KW 166 -#define BINARY_SUBSCR_ADAPTIVE 7 -#define BINARY_SUBSCR_LIST_INT 8 -#define BINARY_SUBSCR_TUPLE_INT 13 -#define BINARY_SUBSCR_DICT 14 -#define JUMP_ABSOLUTE_QUICK 18 -#define LOAD_ATTR_ADAPTIVE 21 -#define LOAD_ATTR_SPLIT_KEYS 36 -#define LOAD_ATTR_WITH_HINT 38 -#define LOAD_ATTR_SLOT 39 -#define LOAD_ATTR_MODULE 40 -#define LOAD_GLOBAL_ADAPTIVE 41 -#define LOAD_GLOBAL_MODULE 42 -#define LOAD_GLOBAL_BUILTIN 43 -#define LOAD_METHOD_ADAPTIVE 44 -#define LOAD_METHOD_CACHED 45 -#define LOAD_METHOD_CLASS 46 -#define LOAD_METHOD_MODULE 47 -#define STORE_ATTR_ADAPTIVE 48 -#define STORE_ATTR_SPLIT_KEYS 58 -#define STORE_ATTR_SLOT 80 -#define STORE_ATTR_WITH_HINT 81 -#define LOAD_FAST__LOAD_FAST 87 -#define STORE_FAST__LOAD_FAST 88 -#define LOAD_FAST__LOAD_CONST 120 -#define LOAD_CONST__LOAD_FAST 122 -#define STORE_FAST__STORE_FAST 123 +#define BINARY_ADD_ADAPTIVE 7 +#define BINARY_ADD_INT 8 +#define BINARY_ADD_FLOAT 13 +#define BINARY_ADD_UNICODE 14 +#define BINARY_ADD_UNICODE_INPLACE_FAST 18 +#define BINARY_SUBSCR_ADAPTIVE 21 +#define BINARY_SUBSCR_LIST_INT 36 +#define BINARY_SUBSCR_TUPLE_INT 38 +#define BINARY_SUBSCR_DICT 39 +#define JUMP_ABSOLUTE_QUICK 40 +#define LOAD_ATTR_ADAPTIVE 41 +#define LOAD_ATTR_SPLIT_KEYS 42 +#define LOAD_ATTR_WITH_HINT 43 +#define LOAD_ATTR_SLOT 44 +#define LOAD_ATTR_MODULE 45 +#define LOAD_GLOBAL_ADAPTIVE 46 +#define LOAD_GLOBAL_MODULE 47 +#define LOAD_GLOBAL_BUILTIN 48 +#define LOAD_METHOD_ADAPTIVE 58 +#define LOAD_METHOD_CACHED 80 +#define LOAD_METHOD_CLASS 81 +#define LOAD_METHOD_MODULE 87 +#define STORE_ATTR_ADAPTIVE 88 +#define STORE_ATTR_SPLIT_KEYS 120 +#define STORE_ATTR_SLOT 122 +#define STORE_ATTR_WITH_HINT 123 +#define LOAD_FAST__LOAD_FAST 127 +#define STORE_FAST__LOAD_FAST 128 +#define LOAD_FAST__LOAD_CONST 134 +#define LOAD_CONST__LOAD_FAST 140 +#define STORE_FAST__STORE_FAST 143 #ifdef NEED_OPCODE_JUMP_TABLES static uint32_t _PyOpcode_RelativeJump[8] = { 0U, diff --git a/Lib/opcode.py b/Lib/opcode.py index d3a7c8bf16f..5d356746888 100644 --- a/Lib/opcode.py +++ b/Lib/opcode.py @@ -220,6 +220,11 @@ def_op('CALL_METHOD_KW', 166) del def_op, name_op, jrel_op, jabs_op _specialized_instructions = [ + "BINARY_ADD_ADAPTIVE", + "BINARY_ADD_INT", + "BINARY_ADD_FLOAT", + "BINARY_ADD_UNICODE", + "BINARY_ADD_UNICODE_INPLACE_FAST", "BINARY_SUBSCR_ADAPTIVE", "BINARY_SUBSCR_LIST_INT", "BINARY_SUBSCR_TUPLE_INT", diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-08-18-11-14-38.bpo-44945.CO3s77.rst b/Misc/NEWS.d/next/Core and Builtins/2021-08-18-11-14-38.bpo-44945.CO3s77.rst new file mode 100644 index 00000000000..66d53ec523d --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2021-08-18-11-14-38.bpo-44945.CO3s77.rst @@ -0,0 +1,7 @@ +Specialize the BINARY_ADD instruction using the PEP 659 machinery. Adds five new instructions: + +* BINARY_ADD_ADAPTIVE +* BINARY_ADD_FLOAT +* BINARY_ADD_INT +* BINARY_ADD_UNICODE +* BINARY_ADD_UNICODE_INPLACE_FAST diff --git a/Objects/longobject.c b/Objects/longobject.c index 18b0839adb6..3b6df12212c 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -3119,16 +3119,14 @@ x_sub(PyLongObject *a, PyLongObject *b) return maybe_small_long(long_normalize(z)); } -static PyObject * -long_add(PyLongObject *a, PyLongObject *b) +PyObject * +_PyLong_Add(PyLongObject *a, PyLongObject *b) { - PyLongObject *z; - - CHECK_BINOP(a, b); - if (IS_MEDIUM_VALUE(a) && IS_MEDIUM_VALUE(b)) { return _PyLong_FromSTwoDigits(medium_value(a) + medium_value(b)); } + + PyLongObject *z; if (Py_SIZE(a) < 0) { if (Py_SIZE(b) < 0) { z = x_add(a, b); @@ -3153,6 +3151,14 @@ long_add(PyLongObject *a, PyLongObject *b) return (PyObject *)z; } +static PyObject * +long_add(PyLongObject *a, PyLongObject *b) +{ + CHECK_BINOP(a, b); + return _PyLong_Add(a, b); +} + + static PyObject * long_sub(PyLongObject *a, PyLongObject *b) { diff --git a/Python/ceval.c b/Python/ceval.c index 5fec90b1048..8aaa83b1b74 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1435,6 +1435,12 @@ eval_frame_handle_pending(PyThreadState *tstate) #define UPDATE_PREV_INSTR_OPARG(instr, oparg) ((uint8_t*)(instr))[-1] = (oparg) +static inline void +record_hit_inline(_Py_CODEUNIT *next_instr, int oparg) +{ + UPDATE_PREV_INSTR_OPARG(next_instr, saturating_increment(oparg)); +} + #define GLOBALS() frame->f_globals #define BUILTINS() frame->f_builtins #define LOCALS() frame->f_locals @@ -1980,28 +1986,120 @@ check_eval_breaker: } TARGET(BINARY_ADD): { + PREDICTED(BINARY_ADD); + STAT_INC(BINARY_ADD, unquickened); PyObject *right = POP(); PyObject *left = TOP(); - PyObject *sum; - /* NOTE(vstinner): Please don't try to micro-optimize int+int on - CPython using bytecode, it is simply worthless. - See http://bugs.python.org/issue21955 and - http://bugs.python.org/issue10044 for the discussion. In short, - no patch shown any impact on a realistic benchmark, only a minor - speedup on microbenchmarks. */ - if (PyUnicode_CheckExact(left) && - PyUnicode_CheckExact(right)) { - sum = unicode_concatenate(tstate, left, right, frame, next_instr); - /* unicode_concatenate consumed the ref to left */ + PyObject *sum = PyNumber_Add(left, right); + SET_TOP(sum); + Py_DECREF(left); + Py_DECREF(right); + if (sum == NULL) { + goto error; + } + DISPATCH(); + } + + TARGET(BINARY_ADD_ADAPTIVE): { + if (oparg == 0) { + PyObject *left = SECOND(); + PyObject *right = TOP(); + next_instr--; + if (_Py_Specialize_BinaryAdd(left, right, next_instr) < 0) { + goto error; + } + DISPATCH(); } else { - sum = PyNumber_Add(left, right); - Py_DECREF(left); + STAT_INC(BINARY_ADD, deferred); + UPDATE_PREV_INSTR_OPARG(next_instr, oparg - 1); + STAT_DEC(BINARY_ADD, unquickened); + JUMP_TO_INSTRUCTION(BINARY_ADD); } + } + + TARGET(BINARY_ADD_UNICODE): { + PyObject *left = SECOND(); + PyObject *right = TOP(); + DEOPT_IF(!PyUnicode_CheckExact(left), BINARY_ADD); + DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_ADD); + STAT_INC(BINARY_ADD, hit); + record_hit_inline(next_instr, oparg); + PyObject *res = PyUnicode_Concat(left, right); + STACK_SHRINK(1); + SET_TOP(res); + Py_DECREF(left); Py_DECREF(right); - SET_TOP(sum); - if (sum == NULL) + if (TOP() == NULL) { goto error; + } + DISPATCH(); + } + + TARGET(BINARY_ADD_UNICODE_INPLACE_FAST): { + PyObject *left = SECOND(); + PyObject *right = TOP(); + DEOPT_IF(!PyUnicode_CheckExact(left), BINARY_ADD); + DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_ADD); + DEOPT_IF(Py_REFCNT(left) != 2, BINARY_ADD); + int next_oparg = _Py_OPARG(*next_instr); + assert(_Py_OPCODE(*next_instr) == STORE_FAST); + /* In the common case, there are 2 references to the value + * stored in 'variable' when the v = v + ... is performed: one + * on the value stack (in 'v') and one still stored in the + * 'variable'. We try to delete the variable now to reduce + * the refcnt to 1. + */ + PyObject *var = GETLOCAL(next_oparg); + DEOPT_IF(var != left, BINARY_ADD); + STAT_INC(BINARY_ADD, hit); + record_hit_inline(next_instr, oparg); + GETLOCAL(next_oparg) = NULL; + Py_DECREF(left); + STACK_SHRINK(1); + PyUnicode_Append(&TOP(), right); + Py_DECREF(right); + if (TOP() == NULL) { + goto error; + } + DISPATCH(); + } + + TARGET(BINARY_ADD_FLOAT): { + PyObject *left = SECOND(); + PyObject *right = TOP(); + DEOPT_IF(!PyFloat_CheckExact(left), BINARY_ADD); + DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_ADD); + STAT_INC(BINARY_ADD, hit); + record_hit_inline(next_instr, oparg); + double dsum = ((PyFloatObject *)left)->ob_fval + + ((PyFloatObject *)right)->ob_fval; + PyObject *sum = PyFloat_FromDouble(dsum); + SET_SECOND(sum); + Py_DECREF(right); + Py_DECREF(left); + STACK_SHRINK(1); + if (sum == NULL) { + goto error; + } + DISPATCH(); + } + + TARGET(BINARY_ADD_INT): { + PyObject *left = SECOND(); + PyObject *right = TOP(); + DEOPT_IF(!PyLong_CheckExact(left), BINARY_ADD); + DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_ADD); + STAT_INC(BINARY_ADD, hit); + record_hit_inline(next_instr, oparg); + PyObject *sum = _PyLong_Add((PyLongObject *)left, (PyLongObject *)right); + SET_SECOND(sum); + Py_DECREF(right); + Py_DECREF(left); + STACK_SHRINK(1); + if (sum == NULL) { + goto error; + } DISPATCH(); } @@ -4761,6 +4859,7 @@ MISS_WITH_CACHE(STORE_ATTR) MISS_WITH_CACHE(LOAD_GLOBAL) MISS_WITH_CACHE(LOAD_METHOD) MISS_WITH_OPARG_COUNTER(BINARY_SUBSCR) +MISS_WITH_OPARG_COUNTER(BINARY_ADD) binary_subscr_dict_error: { diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index f97eaf80815..f3bfae545bc 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -6,21 +6,21 @@ static void *opcode_targets[256] = { &&TARGET_DUP_TOP, &&TARGET_DUP_TOP_TWO, &&TARGET_ROT_FOUR, - &&TARGET_BINARY_SUBSCR_ADAPTIVE, - &&TARGET_BINARY_SUBSCR_LIST_INT, + &&TARGET_BINARY_ADD_ADAPTIVE, + &&TARGET_BINARY_ADD_INT, &&TARGET_NOP, &&TARGET_UNARY_POSITIVE, &&TARGET_UNARY_NEGATIVE, &&TARGET_UNARY_NOT, - &&TARGET_BINARY_SUBSCR_TUPLE_INT, - &&TARGET_BINARY_SUBSCR_DICT, + &&TARGET_BINARY_ADD_FLOAT, + &&TARGET_BINARY_ADD_UNICODE, &&TARGET_UNARY_INVERT, &&TARGET_BINARY_MATRIX_MULTIPLY, &&TARGET_INPLACE_MATRIX_MULTIPLY, - &&TARGET_JUMP_ABSOLUTE_QUICK, + &&TARGET_BINARY_ADD_UNICODE_INPLACE_FAST, &&TARGET_BINARY_POWER, &&TARGET_BINARY_MULTIPLY, - &&TARGET_LOAD_ATTR_ADAPTIVE, + &&TARGET_BINARY_SUBSCR_ADAPTIVE, &&TARGET_BINARY_MODULO, &&TARGET_BINARY_ADD, &&TARGET_BINARY_SUBTRACT, @@ -35,19 +35,19 @@ static void *opcode_targets[256] = { &&TARGET_MATCH_KEYS, &&TARGET_COPY_DICT_WITHOUT_KEYS, &&TARGET_PUSH_EXC_INFO, - &&TARGET_LOAD_ATTR_SPLIT_KEYS, + &&TARGET_BINARY_SUBSCR_LIST_INT, &&TARGET_POP_EXCEPT_AND_RERAISE, + &&TARGET_BINARY_SUBSCR_TUPLE_INT, + &&TARGET_BINARY_SUBSCR_DICT, + &&TARGET_JUMP_ABSOLUTE_QUICK, + &&TARGET_LOAD_ATTR_ADAPTIVE, + &&TARGET_LOAD_ATTR_SPLIT_KEYS, &&TARGET_LOAD_ATTR_WITH_HINT, &&TARGET_LOAD_ATTR_SLOT, &&TARGET_LOAD_ATTR_MODULE, &&TARGET_LOAD_GLOBAL_ADAPTIVE, &&TARGET_LOAD_GLOBAL_MODULE, &&TARGET_LOAD_GLOBAL_BUILTIN, - &&TARGET_LOAD_METHOD_ADAPTIVE, - &&TARGET_LOAD_METHOD_CACHED, - &&TARGET_LOAD_METHOD_CLASS, - &&TARGET_LOAD_METHOD_MODULE, - &&TARGET_STORE_ATTR_ADAPTIVE, &&TARGET_WITH_EXCEPT_START, &&TARGET_GET_AITER, &&TARGET_GET_ANEXT, @@ -57,7 +57,7 @@ static void *opcode_targets[256] = { &&TARGET_INPLACE_ADD, &&TARGET_INPLACE_SUBTRACT, &&TARGET_INPLACE_MULTIPLY, - &&TARGET_STORE_ATTR_SPLIT_KEYS, + &&TARGET_LOAD_METHOD_ADAPTIVE, &&TARGET_INPLACE_MODULO, &&TARGET_STORE_SUBSCR, &&TARGET_DELETE_SUBSCR, @@ -79,15 +79,15 @@ static void *opcode_targets[256] = { &&TARGET_INPLACE_AND, &&TARGET_INPLACE_XOR, &&TARGET_INPLACE_OR, - &&TARGET_STORE_ATTR_SLOT, - &&TARGET_STORE_ATTR_WITH_HINT, + &&TARGET_LOAD_METHOD_CACHED, + &&TARGET_LOAD_METHOD_CLASS, &&TARGET_LIST_TO_TUPLE, &&TARGET_RETURN_VALUE, &&TARGET_IMPORT_STAR, &&TARGET_SETUP_ANNOTATIONS, &&TARGET_YIELD_VALUE, - &&TARGET_LOAD_FAST__LOAD_FAST, - &&TARGET_STORE_FAST__LOAD_FAST, + &&TARGET_LOAD_METHOD_MODULE, + &&TARGET_STORE_ATTR_ADAPTIVE, &&TARGET_POP_EXCEPT, &&TARGET_STORE_NAME, &&TARGET_DELETE_NAME, @@ -119,30 +119,30 @@ static void *opcode_targets[256] = { &&TARGET_IS_OP, &&TARGET_CONTAINS_OP, &&TARGET_RERAISE, - &&TARGET_LOAD_FAST__LOAD_CONST, + &&TARGET_STORE_ATTR_SPLIT_KEYS, &&TARGET_JUMP_IF_NOT_EXC_MATCH, - &&TARGET_LOAD_CONST__LOAD_FAST, - &&TARGET_STORE_FAST__STORE_FAST, + &&TARGET_STORE_ATTR_SLOT, + &&TARGET_STORE_ATTR_WITH_HINT, &&TARGET_LOAD_FAST, &&TARGET_STORE_FAST, &&TARGET_DELETE_FAST, - &&_unknown_opcode, - &&_unknown_opcode, + &&TARGET_LOAD_FAST__LOAD_FAST, + &&TARGET_STORE_FAST__LOAD_FAST, &&TARGET_GEN_START, &&TARGET_RAISE_VARARGS, &&TARGET_CALL_FUNCTION, &&TARGET_MAKE_FUNCTION, &&TARGET_BUILD_SLICE, - &&_unknown_opcode, + &&TARGET_LOAD_FAST__LOAD_CONST, &&TARGET_MAKE_CELL, &&TARGET_LOAD_CLOSURE, &&TARGET_LOAD_DEREF, &&TARGET_STORE_DEREF, &&TARGET_DELETE_DEREF, - &&_unknown_opcode, + &&TARGET_LOAD_CONST__LOAD_FAST, &&TARGET_CALL_FUNCTION_KW, &&TARGET_CALL_FUNCTION_EX, - &&_unknown_opcode, + &&TARGET_STORE_FAST__STORE_FAST, &&TARGET_EXTENDED_ARG, &&TARGET_LIST_APPEND, &&TARGET_SET_ADD, diff --git a/Python/specialize.c b/Python/specialize.c index 359bec57193..b321368148f 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -121,7 +121,8 @@ _Py_GetSpecializationStats(void) { int err = 0; err += add_stat_dict(stats, LOAD_ATTR, "load_attr"); err += add_stat_dict(stats, LOAD_GLOBAL, "load_global"); - err += add_stat_dict(stats, LOAD_GLOBAL, "load_method"); + err += add_stat_dict(stats, LOAD_METHOD, "load_method"); + err += add_stat_dict(stats, BINARY_ADD, "binary_add"); err += add_stat_dict(stats, BINARY_SUBSCR, "binary_subscr"); err += add_stat_dict(stats, STORE_ATTR, "store_attr"); if (err < 0) { @@ -177,6 +178,7 @@ _Py_PrintSpecializationStats(void) print_stats(out, &_specialization_stats[LOAD_ATTR], "load_attr"); print_stats(out, &_specialization_stats[LOAD_GLOBAL], "load_global"); print_stats(out, &_specialization_stats[LOAD_METHOD], "load_method"); + print_stats(out, &_specialization_stats[BINARY_ADD], "binary_add"); print_stats(out, &_specialization_stats[BINARY_SUBSCR], "binary_subscr"); print_stats(out, &_specialization_stats[STORE_ATTR], "store_attr"); if (out != stderr) { @@ -226,6 +228,7 @@ static uint8_t adaptive_opcodes[256] = { [LOAD_ATTR] = LOAD_ATTR_ADAPTIVE, [LOAD_GLOBAL] = LOAD_GLOBAL_ADAPTIVE, [LOAD_METHOD] = LOAD_METHOD_ADAPTIVE, + [BINARY_ADD] = BINARY_ADD_ADAPTIVE, [BINARY_SUBSCR] = BINARY_SUBSCR_ADAPTIVE, [STORE_ATTR] = STORE_ATTR_ADAPTIVE, }; @@ -235,6 +238,7 @@ static uint8_t cache_requirements[256] = { [LOAD_ATTR] = 2, /* _PyAdaptiveEntry and _PyAttrCache */ [LOAD_GLOBAL] = 2, /* _PyAdaptiveEntry and _PyLoadGlobalCache */ [LOAD_METHOD] = 3, /* _PyAdaptiveEntry, _PyAttrCache and _PyObjectCache */ + [BINARY_ADD] = 0, [BINARY_SUBSCR] = 0, [STORE_ATTR] = 2, /* _PyAdaptiveEntry and _PyAttrCache */ }; @@ -436,6 +440,12 @@ _Py_Quicken(PyCodeObject *code) { #define SPEC_FAIL_TUPLE_NON_INT_SUBSCRIPT 9 #define SPEC_FAIL_NOT_TUPLE_LIST_OR_DICT 10 +/* Binary add */ + +#define SPEC_FAIL_NON_FUNCTION_SCOPE 11 +#define SPEC_FAIL_DIFFERENT_TYPES 12 +#define SPEC_FAIL_OTHER_TYPE 13 + static int specialize_module_load_attr( @@ -898,7 +908,7 @@ _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SPECIALIZATION_FAIL(LOAD_METHOD, SPEC_FAIL_NOT_METHOD); goto fail; } - + assert(kind == METHOD); // If o.__dict__ changes, the method might be found in o.__dict__ // instead of old type lookup. So record o.__dict__'s keys. @@ -933,15 +943,15 @@ _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, } // Fall through. } // Else owner is maybe a builtin with no dict, or __slots__. Doesn't matter. - + /* `descr` is borrowed. Just check tp_version_tag before accessing in case * it's deleted. This is safe for methods (even inherited ones from super * classes!) as long as tp_version_tag is validated for two main reasons: - * + * * 1. The class will always hold a reference to the method so it will * usually not be GC-ed. Should it be deleted in Python, e.g. * `del obj.meth`, tp_version_tag will be invalidated, because of reason 2. - * + * * 2. The pre-existing type method cache (MCACHE) uses the same principles * of caching a borrowed descriptor. It does all the heavy lifting for us. * E.g. it invalidates on any MRO modification, on any type object @@ -968,6 +978,7 @@ fail: return 0; } + int _Py_Specialize_LoadGlobal( PyObject *globals, PyObject *builtins, @@ -1035,7 +1046,6 @@ success: return 0; } - int _Py_Specialize_BinarySubscr( PyObject *container, PyObject *sub, _Py_CODEUNIT *instr) @@ -1076,3 +1086,43 @@ success: return 0; } +int +_Py_Specialize_BinaryAdd(PyObject *left, PyObject *right, _Py_CODEUNIT *instr) +{ + PyTypeObject *left_type = Py_TYPE(left); + if (left_type != Py_TYPE(right)) { + SPECIALIZATION_FAIL(BINARY_ADD, SPEC_FAIL_DIFFERENT_TYPES); + goto fail; + } + if (left_type == &PyUnicode_Type) { + int next_opcode = _Py_OPCODE(instr[1]); + if (next_opcode == STORE_FAST) { + *instr = _Py_MAKECODEUNIT(BINARY_ADD_UNICODE_INPLACE_FAST, saturating_start()); + } + else { + *instr = _Py_MAKECODEUNIT(BINARY_ADD_UNICODE, saturating_start()); + } + goto success; + } + else if (left_type == &PyLong_Type) { + *instr = _Py_MAKECODEUNIT(BINARY_ADD_INT, saturating_start()); + goto success; + } + else if (left_type == &PyFloat_Type) { + *instr = _Py_MAKECODEUNIT(BINARY_ADD_FLOAT, saturating_start()); + goto success; + + } + else { + SPECIALIZATION_FAIL(BINARY_ADD, SPEC_FAIL_OTHER_TYPE); + } +fail: + STAT_INC(BINARY_ADD, specialization_failure); + assert(!PyErr_Occurred()); + *instr = _Py_MAKECODEUNIT(_Py_OPCODE(*instr), ADAPTIVE_CACHE_BACKOFF); + return 0; +success: + STAT_INC(BINARY_ADD, specialization_success); + assert(!PyErr_Occurred()); + return 0; +}