GH-89987: Shrink the BINARY_SUBSCR caches (GH-103022)

This commit is contained in:
Brandt Bucher 2023-03-29 15:53:30 -07:00 committed by GitHub
parent e647dbaded
commit 121057aa36
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 272 additions and 250 deletions

View File

@ -234,7 +234,18 @@ struct _typeobject {
* It should should be treated as an opaque blob * It should should be treated as an opaque blob
* by code other than the specializer and interpreter. */ * by code other than the specializer and interpreter. */
struct _specialization_cache { struct _specialization_cache {
// In order to avoid bloating the bytecode with lots of inline caches, the
// members of this structure have a somewhat unique contract. They are set
// by the specialization machinery, and are invalidated by PyType_Modified.
// The rules for using them are as follows:
// - If getitem is non-NULL, then it is the same Python function that
// PyType_Lookup(cls, "__getitem__") would return.
// - If getitem is NULL, then getitem_version is meaningless.
// - If getitem->func_version == getitem_version, then getitem can be called
// with two positional arguments and no keyword arguments, and has neither
// *args nor **kwargs (as required by BINARY_SUBSCR_GETITEM):
PyObject *getitem; PyObject *getitem;
uint32_t getitem_version;
}; };
/* The *real* layout of a type object when allocated on the heap */ /* The *real* layout of a type object when allocated on the heap */

View File

@ -47,8 +47,6 @@ typedef struct {
typedef struct { typedef struct {
uint16_t counter; uint16_t counter;
uint16_t type_version[2];
uint16_t func_version;
} _PyBinarySubscrCache; } _PyBinarySubscrCache;
#define INLINE_CACHE_ENTRIES_BINARY_SUBSCR CACHE_ENTRIES(_PyBinarySubscrCache) #define INLINE_CACHE_ENTRIES_BINARY_SUBSCR CACHE_ENTRIES(_PyBinarySubscrCache)

View File

@ -41,7 +41,7 @@ static const uint32_t _PyOpcode_Jump[9] = {
}; };
const uint8_t _PyOpcode_Caches[256] = { const uint8_t _PyOpcode_Caches[256] = {
[BINARY_SUBSCR] = 4, [BINARY_SUBSCR] = 1,
[STORE_SUBSCR] = 1, [STORE_SUBSCR] = 1,
[UNPACK_SEQUENCE] = 1, [UNPACK_SEQUENCE] = 1,
[FOR_ITER] = 1, [FOR_ITER] = 1,

View File

@ -435,7 +435,9 @@ _code_type = type(_write_atomic.__code__)
# Python 3.12a6 3519 (Modify SEND instruction) # Python 3.12a6 3519 (Modify SEND instruction)
# Python 3.12a6 3520 (Remove PREP_RERAISE_STAR, add CALL_INTRINSIC_2) # Python 3.12a6 3520 (Remove PREP_RERAISE_STAR, add CALL_INTRINSIC_2)
# Python 3.12a7 3521 (Shrink the LOAD_GLOBAL caches) # Python 3.12a7 3521 (Shrink the LOAD_GLOBAL caches)
# Python 3.12a7 3522 (Removed JUMP_IF_FALSE_OR_POP/JUMP_IF_TRUE_OR_POP)
# Python 3.12a7 3523 (Convert COMPARE_AND_BRANCH back to COMPARE_OP) # Python 3.12a7 3523 (Convert COMPARE_AND_BRANCH back to COMPARE_OP)
# Python 3.12a7 3524 (Shrink the BINARY_SUBSCR caches)
# Python 3.13 will start with 3550 # Python 3.13 will start with 3550
@ -452,7 +454,7 @@ _code_type = type(_write_atomic.__code__)
# Whenever MAGIC_NUMBER is changed, the ranges in the magic_values array # Whenever MAGIC_NUMBER is changed, the ranges in the magic_values array
# in PC/launcher.c must also be updated. # in PC/launcher.c must also be updated.
MAGIC_NUMBER = (3523).to_bytes(2, 'little') + b'\r\n' MAGIC_NUMBER = (3524).to_bytes(2, 'little') + b'\r\n'
_RAW_MAGIC_NUMBER = int.from_bytes(MAGIC_NUMBER, 'little') # For import.c _RAW_MAGIC_NUMBER = int.from_bytes(MAGIC_NUMBER, 'little') # For import.c

View File

@ -392,8 +392,6 @@ _cache_format = {
}, },
"BINARY_SUBSCR": { "BINARY_SUBSCR": {
"counter": 1, "counter": 1,
"type_version": 2,
"func_version": 1,
}, },
"FOR_ITER": { "FOR_ITER": {
"counter": 1, "counter": 1,

View File

@ -1108,7 +1108,7 @@ class DisTests(DisTestBase):
1 2 LOAD_NAME 0 (a) 1 2 LOAD_NAME 0 (a)
4 LOAD_CONST 0 (0) 4 LOAD_CONST 0 (0)
6 %s 6 %s
16 RETURN_VALUE 10 RETURN_VALUE
""" """
co_list = compile('a[0]', "<list>", "eval") co_list = compile('a[0]', "<list>", "eval")
self.code_quicken(lambda: exec(co_list, {}, {'a': [0]})) self.code_quicken(lambda: exec(co_list, {}, {'a': [0]}))

View File

@ -1556,7 +1556,7 @@ class SizeofTest(unittest.TestCase):
'10P' # PySequenceMethods '10P' # PySequenceMethods
'2P' # PyBufferProcs '2P' # PyBufferProcs
'6P' '6P'
'1P' # Specializer cache '1PI' # Specializer cache
) )
class newstyleclass(object): pass class newstyleclass(object): pass
# Separate block for PyDictKeysObject with 8 keys and 5 entries # Separate block for PyDictKeysObject with 8 keys and 5 entries

View File

@ -0,0 +1,2 @@
Reduce the number of inline :opcode:`CACHE` entries for
:opcode:`BINARY_SUBSCR`.

View File

@ -510,6 +510,11 @@ PyType_Modified(PyTypeObject *type)
type->tp_flags &= ~Py_TPFLAGS_VALID_VERSION_TAG; type->tp_flags &= ~Py_TPFLAGS_VALID_VERSION_TAG;
type->tp_version_tag = 0; /* 0 is not a valid version tag */ type->tp_version_tag = 0; /* 0 is not a valid version tag */
if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
// This field *must* be invalidated if the type is modified (see the
// comment on struct _specialization_cache):
((PyHeapTypeObject *)type)->_spec_cache.getitem = NULL;
}
} }
static void static void
@ -563,6 +568,11 @@ type_mro_modified(PyTypeObject *type, PyObject *bases) {
clear: clear:
type->tp_flags &= ~Py_TPFLAGS_VALID_VERSION_TAG; type->tp_flags &= ~Py_TPFLAGS_VALID_VERSION_TAG;
type->tp_version_tag = 0; /* 0 is not a valid version tag */ type->tp_version_tag = 0; /* 0 is not a valid version tag */
if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
// This field *must* be invalidated if the type is modified (see the
// comment on struct _specialization_cache):
((PyHeapTypeObject *)type)->_spec_cache.getitem = NULL;
}
} }
static int static int

View File

@ -1,39 +1,38 @@
// Auto-generated by Programs/freeze_test_frozenmain.py // Auto-generated by Programs/freeze_test_frozenmain.py
unsigned char M_test_frozenmain[] = { unsigned char M_test_frozenmain[] = {
227,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0, 227,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,
0,0,0,0,0,243,182,0,0,0,151,0,100,0,100,1, 0,0,0,0,0,243,170,0,0,0,151,0,100,0,100,1,
108,0,90,0,100,0,100,1,108,1,90,1,2,0,101,2, 108,0,90,0,100,0,100,1,108,1,90,1,2,0,101,2,
100,2,171,1,0,0,0,0,0,0,0,0,1,0,2,0, 100,2,171,1,0,0,0,0,0,0,0,0,1,0,2,0,
101,2,100,3,101,0,106,6,0,0,0,0,0,0,0,0, 101,2,100,3,101,0,106,6,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,171,2,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,171,2,0,0,0,0,
0,0,0,0,1,0,2,0,101,1,106,8,0,0,0,0, 0,0,0,0,1,0,2,0,101,1,106,8,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,171,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,171,0,
0,0,0,0,0,0,0,0,100,4,25,0,0,0,0,0, 0,0,0,0,0,0,0,0,100,4,25,0,0,0,90,5,
0,0,0,0,90,5,100,5,68,0,93,23,0,0,90,6, 100,5,68,0,93,20,0,0,90,6,2,0,101,2,100,6,
2,0,101,2,100,6,101,6,155,0,100,7,101,5,101,6, 101,6,155,0,100,7,101,5,101,6,25,0,0,0,155,0,
25,0,0,0,0,0,0,0,0,0,155,0,157,4,171,1, 157,4,171,1,0,0,0,0,0,0,0,0,1,0,140,22,
0,0,0,0,0,0,0,0,1,0,140,25,4,0,121,1, 4,0,121,1,41,8,233,0,0,0,0,78,122,18,70,114,
41,8,233,0,0,0,0,78,122,18,70,114,111,122,101,110, 111,122,101,110,32,72,101,108,108,111,32,87,111,114,108,100,
32,72,101,108,108,111,32,87,111,114,108,100,122,8,115,121, 122,8,115,121,115,46,97,114,103,118,218,6,99,111,110,102,
115,46,97,114,103,118,218,6,99,111,110,102,105,103,41,5, 105,103,41,5,218,12,112,114,111,103,114,97,109,95,110,97,
218,12,112,114,111,103,114,97,109,95,110,97,109,101,218,10, 109,101,218,10,101,120,101,99,117,116,97,98,108,101,218,15,
101,120,101,99,117,116,97,98,108,101,218,15,117,115,101,95, 117,115,101,95,101,110,118,105,114,111,110,109,101,110,116,218,
101,110,118,105,114,111,110,109,101,110,116,218,17,99,111,110, 17,99,111,110,102,105,103,117,114,101,95,99,95,115,116,100,
102,105,103,117,114,101,95,99,95,115,116,100,105,111,218,14, 105,111,218,14,98,117,102,102,101,114,101,100,95,115,116,100,
98,117,102,102,101,114,101,100,95,115,116,100,105,111,122,7, 105,111,122,7,99,111,110,102,105,103,32,122,2,58,32,41,
99,111,110,102,105,103,32,122,2,58,32,41,7,218,3,115, 7,218,3,115,121,115,218,17,95,116,101,115,116,105,110,116,
121,115,218,17,95,116,101,115,116,105,110,116,101,114,110,97, 101,114,110,97,108,99,97,112,105,218,5,112,114,105,110,116,
108,99,97,112,105,218,5,112,114,105,110,116,218,4,97,114, 218,4,97,114,103,118,218,11,103,101,116,95,99,111,110,102,
103,118,218,11,103,101,116,95,99,111,110,102,105,103,115,114, 105,103,115,114,3,0,0,0,218,3,107,101,121,169,0,243,
3,0,0,0,218,3,107,101,121,169,0,243,0,0,0,0, 0,0,0,0,250,18,116,101,115,116,95,102,114,111,122,101,
250,18,116,101,115,116,95,102,114,111,122,101,110,109,97,105, 110,109,97,105,110,46,112,121,250,8,60,109,111,100,117,108,
110,46,112,121,250,8,60,109,111,100,117,108,101,62,114,18, 101,62,114,18,0,0,0,1,0,0,0,115,100,0,0,0,
0,0,0,1,0,0,0,115,100,0,0,0,240,3,1,1, 240,3,1,1,1,243,8,0,1,11,219,0,24,225,0,5,
1,243,8,0,1,11,219,0,24,225,0,5,208,6,26,213, 208,6,26,213,0,27,217,0,5,128,106,144,35,151,40,145,
0,27,217,0,5,128,106,144,35,151,40,145,40,213,0,27, 40,213,0,27,216,9,38,208,9,26,215,9,38,209,9,38,
216,9,38,208,9,26,215,9,38,209,9,38,212,9,40,168, 212,9,40,168,24,209,9,50,128,6,240,2,6,12,2,242,
24,212,9,50,128,6,240,2,6,12,2,242,0,7,1,42, 0,7,1,42,128,67,241,14,0,5,10,208,10,40,144,67,
128,67,241,14,0,5,10,208,10,40,144,67,209,10,40,152, 209,10,40,152,54,160,35,153,59,209,10,40,214,4,41,241,
54,160,35,156,59,209,10,40,214,4,41,241,15,7,1,42, 15,7,1,42,114,16,0,0,0,
114,16,0,0,0,
}; };

View File

@ -292,7 +292,7 @@ dummy_func(
BINARY_SUBSCR_TUPLE_INT, BINARY_SUBSCR_TUPLE_INT,
}; };
inst(BINARY_SUBSCR, (unused/4, container, sub -- res)) { inst(BINARY_SUBSCR, (unused/1, container, sub -- res)) {
#if ENABLE_SPECIALIZATION #if ENABLE_SPECIALIZATION
_PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)next_instr; _PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)next_instr;
if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) {
@ -339,7 +339,7 @@ dummy_func(
ERROR_IF(err, error); ERROR_IF(err, error);
} }
inst(BINARY_SUBSCR_LIST_INT, (unused/4, list, sub -- res)) { inst(BINARY_SUBSCR_LIST_INT, (unused/1, list, sub -- res)) {
assert(cframe.use_tracing == 0); assert(cframe.use_tracing == 0);
DEOPT_IF(!PyLong_CheckExact(sub), BINARY_SUBSCR); DEOPT_IF(!PyLong_CheckExact(sub), BINARY_SUBSCR);
DEOPT_IF(!PyList_CheckExact(list), BINARY_SUBSCR); DEOPT_IF(!PyList_CheckExact(list), BINARY_SUBSCR);
@ -356,7 +356,7 @@ dummy_func(
Py_DECREF(list); Py_DECREF(list);
} }
inst(BINARY_SUBSCR_TUPLE_INT, (unused/4, tuple, sub -- res)) { inst(BINARY_SUBSCR_TUPLE_INT, (unused/1, tuple, sub -- res)) {
assert(cframe.use_tracing == 0); assert(cframe.use_tracing == 0);
DEOPT_IF(!PyLong_CheckExact(sub), BINARY_SUBSCR); DEOPT_IF(!PyLong_CheckExact(sub), BINARY_SUBSCR);
DEOPT_IF(!PyTuple_CheckExact(tuple), BINARY_SUBSCR); DEOPT_IF(!PyTuple_CheckExact(tuple), BINARY_SUBSCR);
@ -373,7 +373,7 @@ dummy_func(
Py_DECREF(tuple); Py_DECREF(tuple);
} }
inst(BINARY_SUBSCR_DICT, (unused/4, dict, sub -- res)) { inst(BINARY_SUBSCR_DICT, (unused/1, dict, sub -- res)) {
assert(cframe.use_tracing == 0); assert(cframe.use_tracing == 0);
DEOPT_IF(!PyDict_CheckExact(dict), BINARY_SUBSCR); DEOPT_IF(!PyDict_CheckExact(dict), BINARY_SUBSCR);
STAT_INC(BINARY_SUBSCR, hit); STAT_INC(BINARY_SUBSCR, hit);
@ -389,14 +389,16 @@ dummy_func(
DECREF_INPUTS(); DECREF_INPUTS();
} }
inst(BINARY_SUBSCR_GETITEM, (unused/1, type_version/2, func_version/1, container, sub -- unused)) { inst(BINARY_SUBSCR_GETITEM, (unused/1, container, sub -- unused)) {
PyTypeObject *tp = Py_TYPE(container); PyTypeObject *tp = Py_TYPE(container);
DEOPT_IF(tp->tp_version_tag != type_version, BINARY_SUBSCR); DEOPT_IF(!PyType_HasFeature(tp, Py_TPFLAGS_HEAPTYPE), BINARY_SUBSCR);
assert(tp->tp_flags & Py_TPFLAGS_HEAPTYPE); PyHeapTypeObject *ht = (PyHeapTypeObject *)tp;
PyObject *cached = ((PyHeapTypeObject *)tp)->_spec_cache.getitem; PyObject *cached = ht->_spec_cache.getitem;
DEOPT_IF(cached == NULL, BINARY_SUBSCR);
assert(PyFunction_Check(cached)); assert(PyFunction_Check(cached));
PyFunctionObject *getitem = (PyFunctionObject *)cached; PyFunctionObject *getitem = (PyFunctionObject *)cached;
DEOPT_IF(getitem->func_version != func_version, BINARY_SUBSCR); uint32_t cached_version = ht->_spec_cache.getitem_version;
DEOPT_IF(getitem->func_version != cached_version, BINARY_SUBSCR);
PyCodeObject *code = (PyCodeObject *)getitem->func_code; PyCodeObject *code = (PyCodeObject *)getitem->func_code;
assert(code->co_argcount == 2); assert(code->co_argcount == 2);
DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), BINARY_SUBSCR); DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), BINARY_SUBSCR);

File diff suppressed because it is too large Load Diff

View File

@ -731,13 +731,13 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[256] = {
[BINARY_OP_INPLACE_ADD_UNICODE] = { true, INSTR_FMT_IX }, [BINARY_OP_INPLACE_ADD_UNICODE] = { true, INSTR_FMT_IX },
[BINARY_OP_ADD_FLOAT] = { true, INSTR_FMT_IXC }, [BINARY_OP_ADD_FLOAT] = { true, INSTR_FMT_IXC },
[BINARY_OP_ADD_INT] = { true, INSTR_FMT_IXC }, [BINARY_OP_ADD_INT] = { true, INSTR_FMT_IXC },
[BINARY_SUBSCR] = { true, INSTR_FMT_IXC000 }, [BINARY_SUBSCR] = { true, INSTR_FMT_IXC },
[BINARY_SLICE] = { true, INSTR_FMT_IX }, [BINARY_SLICE] = { true, INSTR_FMT_IX },
[STORE_SLICE] = { true, INSTR_FMT_IX }, [STORE_SLICE] = { true, INSTR_FMT_IX },
[BINARY_SUBSCR_LIST_INT] = { true, INSTR_FMT_IXC000 }, [BINARY_SUBSCR_LIST_INT] = { true, INSTR_FMT_IXC },
[BINARY_SUBSCR_TUPLE_INT] = { true, INSTR_FMT_IXC000 }, [BINARY_SUBSCR_TUPLE_INT] = { true, INSTR_FMT_IXC },
[BINARY_SUBSCR_DICT] = { true, INSTR_FMT_IXC000 }, [BINARY_SUBSCR_DICT] = { true, INSTR_FMT_IXC },
[BINARY_SUBSCR_GETITEM] = { true, INSTR_FMT_IXC000 }, [BINARY_SUBSCR_GETITEM] = { true, INSTR_FMT_IXC },
[LIST_APPEND] = { true, INSTR_FMT_IB }, [LIST_APPEND] = { true, INSTR_FMT_IB },
[SET_ADD] = { true, INSTR_FMT_IB }, [SET_ADD] = { true, INSTR_FMT_IB },
[STORE_SUBSCR] = { true, INSTR_FMT_IXC }, [STORE_SUBSCR] = { true, INSTR_FMT_IXC },

View File

@ -1330,16 +1330,16 @@ _Py_Specialize_BinarySubscr(
SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_WRONG_NUMBER_ARGUMENTS); SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_WRONG_NUMBER_ARGUMENTS);
goto fail; goto fail;
} }
assert(cls->tp_version_tag != 0); uint32_t version = _PyFunction_GetVersionForCurrentState(func);
write_u32(cache->type_version, cls->tp_version_tag); if (version == 0) {
int version = _PyFunction_GetVersionForCurrentState(func); SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_OUT_OF_VERSIONS);
if (version == 0 || version != (uint16_t)version) {
SPECIALIZATION_FAIL(BINARY_SUBSCR, version == 0 ?
SPEC_FAIL_OUT_OF_VERSIONS : SPEC_FAIL_OUT_OF_RANGE);
goto fail; goto fail;
} }
cache->func_version = version; PyHeapTypeObject *ht = (PyHeapTypeObject *)container_type;
((PyHeapTypeObject *)container_type)->_spec_cache.getitem = descriptor; // This pointer is invalidated by PyType_Modified (see the comment on
// struct _specialization_cache):
ht->_spec_cache.getitem = descriptor;
ht->_spec_cache.getitem_version = version;
instr->op.code = BINARY_SUBSCR_GETITEM; instr->op.code = BINARY_SUBSCR_GETITEM;
goto success; goto success;
} }