From df13a1821a90fcfb75eca59aad6af1f0893b1e77 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 2 Aug 2024 00:19:05 +0100 Subject: [PATCH] GH-118095: Add tier two support for BINARY_SUBSCR_GETITEM (GH-120793) --- Include/internal/pycore_opcode_metadata.h | 3 +- Include/internal/pycore_optimizer.h | 10 + Include/internal/pycore_uop_ids.h | 293 +++++++++++----------- Include/internal/pycore_uop_metadata.h | 8 + Python/bytecodes.c | 42 ++-- Python/executor_cases.c.h | 52 +++- Python/generated_cases.c.h | 80 ++++-- Python/optimizer.c | 12 +- Python/optimizer_analysis.c | 11 +- Python/optimizer_cases.c.h | 13 +- 10 files changed, 317 insertions(+), 207 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 464d3500890..d8e5034268b 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -505,7 +505,7 @@ int _PyOpcode_num_pushed(int opcode, int oparg) { case BINARY_SUBSCR_DICT: return 1; case BINARY_SUBSCR_GETITEM: - return 1; + return 0; case BINARY_SUBSCR_LIST_INT: return 1; case BINARY_SUBSCR_STR_INT: @@ -1231,6 +1231,7 @@ _PyOpcode_macro_expansion[256] = { [BINARY_SLICE] = { .nuops = 1, .uops = { { _BINARY_SLICE, 0, 0 } } }, [BINARY_SUBSCR] = { .nuops = 1, .uops = { { _BINARY_SUBSCR, 0, 0 } } }, [BINARY_SUBSCR_DICT] = { .nuops = 1, .uops = { { _BINARY_SUBSCR_DICT, 0, 0 } } }, + [BINARY_SUBSCR_GETITEM] = { .nuops = 4, .uops = { { _CHECK_PEP_523, 0, 0 }, { _BINARY_SUBSCR_CHECK_FUNC, 0, 0 }, { _BINARY_SUBSCR_INIT_CALL, 0, 0 }, { _PUSH_FRAME, 0, 0 } } }, [BINARY_SUBSCR_LIST_INT] = { .nuops = 1, .uops = { { _BINARY_SUBSCR_LIST_INT, 0, 0 } } }, [BINARY_SUBSCR_STR_INT] = { .nuops = 1, .uops = { { _BINARY_SUBSCR_STR_INT, 0, 0 } } }, [BINARY_SUBSCR_TUPLE_INT] = { .nuops = 1, .uops = { { _BINARY_SUBSCR_TUPLE_INT, 0, 0 } } }, diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index bcbb8b73706..b6da27c0677 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -259,6 +259,16 @@ PyAPI_FUNC(PyObject *) _Py_uop_symbols_test(PyObject *self, PyObject *ignored); PyAPI_FUNC(int) _PyOptimizer_Optimize(struct _PyInterpreterFrame *frame, _Py_CODEUNIT *start, _PyStackRef *stack_pointer, _PyExecutorObject **exec_ptr); +static inline int is_terminator(const _PyUOpInstruction *uop) +{ + int opcode = uop->opcode; + return ( + opcode == _EXIT_TRACE || + opcode == _JUMP_TO_TOP || + opcode == _DYNAMIC_EXIT + ); +} + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index d6c910255eb..27d7f96863f 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -22,8 +22,9 @@ extern "C" { #define _BINARY_OP_SUBTRACT_INT 310 #define _BINARY_SLICE BINARY_SLICE #define _BINARY_SUBSCR 311 +#define _BINARY_SUBSCR_CHECK_FUNC 312 #define _BINARY_SUBSCR_DICT BINARY_SUBSCR_DICT -#define _BINARY_SUBSCR_GETITEM BINARY_SUBSCR_GETITEM +#define _BINARY_SUBSCR_INIT_CALL 313 #define _BINARY_SUBSCR_LIST_INT BINARY_SUBSCR_LIST_INT #define _BINARY_SUBSCR_STR_INT BINARY_SUBSCR_STR_INT #define _BINARY_SUBSCR_TUPLE_INT BINARY_SUBSCR_TUPLE_INT @@ -34,10 +35,10 @@ extern "C" { #define _BUILD_STRING BUILD_STRING #define _BUILD_TUPLE BUILD_TUPLE #define _CALL_ALLOC_AND_ENTER_INIT CALL_ALLOC_AND_ENTER_INIT -#define _CALL_BUILTIN_CLASS 312 -#define _CALL_BUILTIN_FAST 313 -#define _CALL_BUILTIN_FAST_WITH_KEYWORDS 314 -#define _CALL_BUILTIN_O 315 +#define _CALL_BUILTIN_CLASS 314 +#define _CALL_BUILTIN_FAST 315 +#define _CALL_BUILTIN_FAST_WITH_KEYWORDS 316 +#define _CALL_BUILTIN_O 317 #define _CALL_FUNCTION_EX CALL_FUNCTION_EX #define _CALL_INTRINSIC_1 CALL_INTRINSIC_1 #define _CALL_INTRINSIC_2 CALL_INTRINSIC_2 @@ -45,38 +46,38 @@ extern "C" { #define _CALL_KW CALL_KW #define _CALL_LEN CALL_LEN #define _CALL_LIST_APPEND CALL_LIST_APPEND -#define _CALL_METHOD_DESCRIPTOR_FAST 316 -#define _CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS 317 -#define _CALL_METHOD_DESCRIPTOR_NOARGS 318 -#define _CALL_METHOD_DESCRIPTOR_O 319 -#define _CALL_NON_PY_GENERAL 320 -#define _CALL_STR_1 321 -#define _CALL_TUPLE_1 322 +#define _CALL_METHOD_DESCRIPTOR_FAST 318 +#define _CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS 319 +#define _CALL_METHOD_DESCRIPTOR_NOARGS 320 +#define _CALL_METHOD_DESCRIPTOR_O 321 +#define _CALL_NON_PY_GENERAL 322 +#define _CALL_STR_1 323 +#define _CALL_TUPLE_1 324 #define _CALL_TYPE_1 CALL_TYPE_1 -#define _CHECK_ATTR_CLASS 323 -#define _CHECK_ATTR_METHOD_LAZY_DICT 324 -#define _CHECK_ATTR_MODULE 325 -#define _CHECK_ATTR_WITH_HINT 326 -#define _CHECK_CALL_BOUND_METHOD_EXACT_ARGS 327 +#define _CHECK_ATTR_CLASS 325 +#define _CHECK_ATTR_METHOD_LAZY_DICT 326 +#define _CHECK_ATTR_MODULE 327 +#define _CHECK_ATTR_WITH_HINT 328 +#define _CHECK_CALL_BOUND_METHOD_EXACT_ARGS 329 #define _CHECK_EG_MATCH CHECK_EG_MATCH #define _CHECK_EXC_MATCH CHECK_EXC_MATCH -#define _CHECK_FUNCTION 328 -#define _CHECK_FUNCTION_EXACT_ARGS 329 -#define _CHECK_FUNCTION_VERSION 330 -#define _CHECK_IS_NOT_PY_CALLABLE 331 -#define _CHECK_MANAGED_OBJECT_HAS_VALUES 332 -#define _CHECK_METHOD_VERSION 333 -#define _CHECK_PEP_523 334 -#define _CHECK_PERIODIC 335 -#define _CHECK_STACK_SPACE 336 -#define _CHECK_STACK_SPACE_OPERAND 337 -#define _CHECK_VALIDITY 338 -#define _CHECK_VALIDITY_AND_SET_IP 339 -#define _COMPARE_OP 340 -#define _COMPARE_OP_FLOAT 341 -#define _COMPARE_OP_INT 342 -#define _COMPARE_OP_STR 343 -#define _CONTAINS_OP 344 +#define _CHECK_FUNCTION 330 +#define _CHECK_FUNCTION_EXACT_ARGS 331 +#define _CHECK_FUNCTION_VERSION 332 +#define _CHECK_IS_NOT_PY_CALLABLE 333 +#define _CHECK_MANAGED_OBJECT_HAS_VALUES 334 +#define _CHECK_METHOD_VERSION 335 +#define _CHECK_PEP_523 336 +#define _CHECK_PERIODIC 337 +#define _CHECK_STACK_SPACE 338 +#define _CHECK_STACK_SPACE_OPERAND 339 +#define _CHECK_VALIDITY 340 +#define _CHECK_VALIDITY_AND_SET_IP 341 +#define _COMPARE_OP 342 +#define _COMPARE_OP_FLOAT 343 +#define _COMPARE_OP_INT 344 +#define _COMPARE_OP_STR 345 +#define _CONTAINS_OP 346 #define _CONTAINS_OP_DICT CONTAINS_OP_DICT #define _CONTAINS_OP_SET CONTAINS_OP_SET #define _CONVERT_VALUE CONVERT_VALUE @@ -88,56 +89,56 @@ extern "C" { #define _DELETE_GLOBAL DELETE_GLOBAL #define _DELETE_NAME DELETE_NAME #define _DELETE_SUBSCR DELETE_SUBSCR -#define _DEOPT 345 +#define _DEOPT 347 #define _DICT_MERGE DICT_MERGE #define _DICT_UPDATE DICT_UPDATE -#define _DO_CALL 346 -#define _DYNAMIC_EXIT 347 +#define _DO_CALL 348 +#define _DYNAMIC_EXIT 349 #define _END_SEND END_SEND -#define _ERROR_POP_N 348 +#define _ERROR_POP_N 350 #define _EXIT_INIT_CHECK EXIT_INIT_CHECK -#define _EXPAND_METHOD 349 -#define _FATAL_ERROR 350 +#define _EXPAND_METHOD 351 +#define _FATAL_ERROR 352 #define _FORMAT_SIMPLE FORMAT_SIMPLE #define _FORMAT_WITH_SPEC FORMAT_WITH_SPEC -#define _FOR_ITER 351 -#define _FOR_ITER_GEN_FRAME 352 -#define _FOR_ITER_TIER_TWO 353 +#define _FOR_ITER 353 +#define _FOR_ITER_GEN_FRAME 354 +#define _FOR_ITER_TIER_TWO 355 #define _GET_AITER GET_AITER #define _GET_ANEXT GET_ANEXT #define _GET_AWAITABLE GET_AWAITABLE #define _GET_ITER GET_ITER #define _GET_LEN GET_LEN #define _GET_YIELD_FROM_ITER GET_YIELD_FROM_ITER -#define _GUARD_BOTH_FLOAT 354 -#define _GUARD_BOTH_INT 355 -#define _GUARD_BOTH_UNICODE 356 -#define _GUARD_BUILTINS_VERSION 357 -#define _GUARD_DORV_NO_DICT 358 -#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 359 -#define _GUARD_GLOBALS_VERSION 360 -#define _GUARD_IS_FALSE_POP 361 -#define _GUARD_IS_NONE_POP 362 -#define _GUARD_IS_NOT_NONE_POP 363 -#define _GUARD_IS_TRUE_POP 364 -#define _GUARD_KEYS_VERSION 365 -#define _GUARD_NOS_FLOAT 366 -#define _GUARD_NOS_INT 367 -#define _GUARD_NOT_EXHAUSTED_LIST 368 -#define _GUARD_NOT_EXHAUSTED_RANGE 369 -#define _GUARD_NOT_EXHAUSTED_TUPLE 370 -#define _GUARD_TOS_FLOAT 371 -#define _GUARD_TOS_INT 372 -#define _GUARD_TYPE_VERSION 373 +#define _GUARD_BOTH_FLOAT 356 +#define _GUARD_BOTH_INT 357 +#define _GUARD_BOTH_UNICODE 358 +#define _GUARD_BUILTINS_VERSION 359 +#define _GUARD_DORV_NO_DICT 360 +#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 361 +#define _GUARD_GLOBALS_VERSION 362 +#define _GUARD_IS_FALSE_POP 363 +#define _GUARD_IS_NONE_POP 364 +#define _GUARD_IS_NOT_NONE_POP 365 +#define _GUARD_IS_TRUE_POP 366 +#define _GUARD_KEYS_VERSION 367 +#define _GUARD_NOS_FLOAT 368 +#define _GUARD_NOS_INT 369 +#define _GUARD_NOT_EXHAUSTED_LIST 370 +#define _GUARD_NOT_EXHAUSTED_RANGE 371 +#define _GUARD_NOT_EXHAUSTED_TUPLE 372 +#define _GUARD_TOS_FLOAT 373 +#define _GUARD_TOS_INT 374 +#define _GUARD_TYPE_VERSION 375 #define _IMPORT_FROM IMPORT_FROM #define _IMPORT_NAME IMPORT_NAME -#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 374 -#define _INIT_CALL_PY_EXACT_ARGS 375 -#define _INIT_CALL_PY_EXACT_ARGS_0 376 -#define _INIT_CALL_PY_EXACT_ARGS_1 377 -#define _INIT_CALL_PY_EXACT_ARGS_2 378 -#define _INIT_CALL_PY_EXACT_ARGS_3 379 -#define _INIT_CALL_PY_EXACT_ARGS_4 380 +#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 376 +#define _INIT_CALL_PY_EXACT_ARGS 377 +#define _INIT_CALL_PY_EXACT_ARGS_0 378 +#define _INIT_CALL_PY_EXACT_ARGS_1 379 +#define _INIT_CALL_PY_EXACT_ARGS_2 380 +#define _INIT_CALL_PY_EXACT_ARGS_3 381 +#define _INIT_CALL_PY_EXACT_ARGS_4 382 #define _INSTRUMENTED_CALL_FUNCTION_EX INSTRUMENTED_CALL_FUNCTION_EX #define _INSTRUMENTED_CALL_KW INSTRUMENTED_CALL_KW #define _INSTRUMENTED_FOR_ITER INSTRUMENTED_FOR_ITER @@ -151,65 +152,65 @@ extern "C" { #define _INSTRUMENTED_POP_JUMP_IF_NOT_NONE INSTRUMENTED_POP_JUMP_IF_NOT_NONE #define _INSTRUMENTED_POP_JUMP_IF_TRUE INSTRUMENTED_POP_JUMP_IF_TRUE #define _INSTRUMENTED_RESUME INSTRUMENTED_RESUME -#define _INTERNAL_INCREMENT_OPT_COUNTER 381 -#define _IS_NONE 382 +#define _INTERNAL_INCREMENT_OPT_COUNTER 383 +#define _IS_NONE 384 #define _IS_OP IS_OP -#define _ITER_CHECK_LIST 383 -#define _ITER_CHECK_RANGE 384 -#define _ITER_CHECK_TUPLE 385 -#define _ITER_JUMP_LIST 386 -#define _ITER_JUMP_RANGE 387 -#define _ITER_JUMP_TUPLE 388 -#define _ITER_NEXT_LIST 389 -#define _ITER_NEXT_RANGE 390 -#define _ITER_NEXT_TUPLE 391 -#define _JUMP_TO_TOP 392 +#define _ITER_CHECK_LIST 385 +#define _ITER_CHECK_RANGE 386 +#define _ITER_CHECK_TUPLE 387 +#define _ITER_JUMP_LIST 388 +#define _ITER_JUMP_RANGE 389 +#define _ITER_JUMP_TUPLE 390 +#define _ITER_NEXT_LIST 391 +#define _ITER_NEXT_RANGE 392 +#define _ITER_NEXT_TUPLE 393 +#define _JUMP_TO_TOP 394 #define _LIST_APPEND LIST_APPEND #define _LIST_EXTEND LIST_EXTEND -#define _LOAD_ATTR 393 -#define _LOAD_ATTR_CLASS 394 -#define _LOAD_ATTR_CLASS_0 395 -#define _LOAD_ATTR_CLASS_1 396 +#define _LOAD_ATTR 395 +#define _LOAD_ATTR_CLASS 396 +#define _LOAD_ATTR_CLASS_0 397 +#define _LOAD_ATTR_CLASS_1 398 #define _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN -#define _LOAD_ATTR_INSTANCE_VALUE 397 -#define _LOAD_ATTR_INSTANCE_VALUE_0 398 -#define _LOAD_ATTR_INSTANCE_VALUE_1 399 -#define _LOAD_ATTR_METHOD_LAZY_DICT 400 -#define _LOAD_ATTR_METHOD_NO_DICT 401 -#define _LOAD_ATTR_METHOD_WITH_VALUES 402 -#define _LOAD_ATTR_MODULE 403 -#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 404 -#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 405 -#define _LOAD_ATTR_PROPERTY_FRAME 406 -#define _LOAD_ATTR_SLOT 407 -#define _LOAD_ATTR_SLOT_0 408 -#define _LOAD_ATTR_SLOT_1 409 -#define _LOAD_ATTR_WITH_HINT 410 +#define _LOAD_ATTR_INSTANCE_VALUE 399 +#define _LOAD_ATTR_INSTANCE_VALUE_0 400 +#define _LOAD_ATTR_INSTANCE_VALUE_1 401 +#define _LOAD_ATTR_METHOD_LAZY_DICT 402 +#define _LOAD_ATTR_METHOD_NO_DICT 403 +#define _LOAD_ATTR_METHOD_WITH_VALUES 404 +#define _LOAD_ATTR_MODULE 405 +#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 406 +#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 407 +#define _LOAD_ATTR_PROPERTY_FRAME 408 +#define _LOAD_ATTR_SLOT 409 +#define _LOAD_ATTR_SLOT_0 410 +#define _LOAD_ATTR_SLOT_1 411 +#define _LOAD_ATTR_WITH_HINT 412 #define _LOAD_BUILD_CLASS LOAD_BUILD_CLASS #define _LOAD_COMMON_CONSTANT LOAD_COMMON_CONSTANT #define _LOAD_CONST LOAD_CONST -#define _LOAD_CONST_INLINE 411 -#define _LOAD_CONST_INLINE_BORROW 412 -#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 413 -#define _LOAD_CONST_INLINE_WITH_NULL 414 +#define _LOAD_CONST_INLINE 413 +#define _LOAD_CONST_INLINE_BORROW 414 +#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 415 +#define _LOAD_CONST_INLINE_WITH_NULL 416 #define _LOAD_DEREF LOAD_DEREF -#define _LOAD_FAST 415 -#define _LOAD_FAST_0 416 -#define _LOAD_FAST_1 417 -#define _LOAD_FAST_2 418 -#define _LOAD_FAST_3 419 -#define _LOAD_FAST_4 420 -#define _LOAD_FAST_5 421 -#define _LOAD_FAST_6 422 -#define _LOAD_FAST_7 423 +#define _LOAD_FAST 417 +#define _LOAD_FAST_0 418 +#define _LOAD_FAST_1 419 +#define _LOAD_FAST_2 420 +#define _LOAD_FAST_3 421 +#define _LOAD_FAST_4 422 +#define _LOAD_FAST_5 423 +#define _LOAD_FAST_6 424 +#define _LOAD_FAST_7 425 #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR #define _LOAD_FAST_CHECK LOAD_FAST_CHECK #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF #define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS -#define _LOAD_GLOBAL 424 -#define _LOAD_GLOBAL_BUILTINS 425 -#define _LOAD_GLOBAL_MODULE 426 +#define _LOAD_GLOBAL 426 +#define _LOAD_GLOBAL_BUILTINS 427 +#define _LOAD_GLOBAL_MODULE 428 #define _LOAD_LOCALS LOAD_LOCALS #define _LOAD_NAME LOAD_NAME #define _LOAD_SPECIAL LOAD_SPECIAL @@ -222,55 +223,55 @@ extern "C" { #define _MATCH_KEYS MATCH_KEYS #define _MATCH_MAPPING MATCH_MAPPING #define _MATCH_SEQUENCE MATCH_SEQUENCE -#define _MAYBE_EXPAND_METHOD 427 -#define _MONITOR_CALL 428 +#define _MAYBE_EXPAND_METHOD 429 +#define _MONITOR_CALL 430 #define _NOP NOP #define _POP_EXCEPT POP_EXCEPT -#define _POP_JUMP_IF_FALSE 429 -#define _POP_JUMP_IF_TRUE 430 +#define _POP_JUMP_IF_FALSE 431 +#define _POP_JUMP_IF_TRUE 432 #define _POP_TOP POP_TOP -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 431 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 433 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 432 +#define _PUSH_FRAME 434 #define _PUSH_NULL PUSH_NULL -#define _PY_FRAME_GENERAL 433 -#define _REPLACE_WITH_TRUE 434 +#define _PY_FRAME_GENERAL 435 +#define _REPLACE_WITH_TRUE 436 #define _RESUME_CHECK RESUME_CHECK #define _RETURN_GENERATOR RETURN_GENERATOR #define _RETURN_VALUE RETURN_VALUE -#define _SAVE_RETURN_OFFSET 435 -#define _SEND 436 -#define _SEND_GEN_FRAME 437 +#define _SAVE_RETURN_OFFSET 437 +#define _SEND 438 +#define _SEND_GEN_FRAME 439 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _START_EXECUTOR 438 -#define _STORE_ATTR 439 -#define _STORE_ATTR_INSTANCE_VALUE 440 -#define _STORE_ATTR_SLOT 441 -#define _STORE_ATTR_WITH_HINT 442 +#define _START_EXECUTOR 440 +#define _STORE_ATTR 441 +#define _STORE_ATTR_INSTANCE_VALUE 442 +#define _STORE_ATTR_SLOT 443 +#define _STORE_ATTR_WITH_HINT 444 #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 443 -#define _STORE_FAST_0 444 -#define _STORE_FAST_1 445 -#define _STORE_FAST_2 446 -#define _STORE_FAST_3 447 -#define _STORE_FAST_4 448 -#define _STORE_FAST_5 449 -#define _STORE_FAST_6 450 -#define _STORE_FAST_7 451 +#define _STORE_FAST 445 +#define _STORE_FAST_0 446 +#define _STORE_FAST_1 447 +#define _STORE_FAST_2 448 +#define _STORE_FAST_3 449 +#define _STORE_FAST_4 450 +#define _STORE_FAST_5 451 +#define _STORE_FAST_6 452 +#define _STORE_FAST_7 453 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME #define _STORE_SLICE STORE_SLICE -#define _STORE_SUBSCR 452 +#define _STORE_SUBSCR 454 #define _STORE_SUBSCR_DICT STORE_SUBSCR_DICT #define _STORE_SUBSCR_LIST_INT STORE_SUBSCR_LIST_INT #define _SWAP SWAP -#define _TIER2_RESUME_CHECK 453 -#define _TO_BOOL 454 +#define _TIER2_RESUME_CHECK 455 +#define _TO_BOOL 456 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT #define _TO_BOOL_LIST TO_BOOL_LIST @@ -280,13 +281,13 @@ extern "C" { #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 455 +#define _UNPACK_SEQUENCE 457 #define _UNPACK_SEQUENCE_LIST UNPACK_SEQUENCE_LIST #define _UNPACK_SEQUENCE_TUPLE UNPACK_SEQUENCE_TUPLE #define _UNPACK_SEQUENCE_TWO_TUPLE UNPACK_SEQUENCE_TWO_TUPLE #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE -#define MAX_UOP_ID 455 +#define MAX_UOP_ID 457 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index d23a4e2ea14..f5c666454dc 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -80,6 +80,8 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_BINARY_SUBSCR_STR_INT] = HAS_DEOPT_FLAG, [_BINARY_SUBSCR_TUPLE_INT] = HAS_DEOPT_FLAG, [_BINARY_SUBSCR_DICT] = HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_BINARY_SUBSCR_CHECK_FUNC] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, + [_BINARY_SUBSCR_INIT_CALL] = 0, [_LIST_APPEND] = HAS_ARG_FLAG | HAS_ERROR_FLAG, [_SET_ADD] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_STORE_SUBSCR] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, @@ -288,7 +290,9 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_BINARY_OP_SUBTRACT_INT] = "_BINARY_OP_SUBTRACT_INT", [_BINARY_SLICE] = "_BINARY_SLICE", [_BINARY_SUBSCR] = "_BINARY_SUBSCR", + [_BINARY_SUBSCR_CHECK_FUNC] = "_BINARY_SUBSCR_CHECK_FUNC", [_BINARY_SUBSCR_DICT] = "_BINARY_SUBSCR_DICT", + [_BINARY_SUBSCR_INIT_CALL] = "_BINARY_SUBSCR_INIT_CALL", [_BINARY_SUBSCR_LIST_INT] = "_BINARY_SUBSCR_LIST_INT", [_BINARY_SUBSCR_STR_INT] = "_BINARY_SUBSCR_STR_INT", [_BINARY_SUBSCR_TUPLE_INT] = "_BINARY_SUBSCR_TUPLE_INT", @@ -652,6 +656,10 @@ int _PyUop_num_popped(int opcode, int oparg) return 2; case _BINARY_SUBSCR_DICT: return 2; + case _BINARY_SUBSCR_CHECK_FUNC: + return 2; + case _BINARY_SUBSCR_INIT_CALL: + return 2; case _LIST_APPEND: return 2 + (oparg-1); case _SET_ADD: diff --git a/Python/bytecodes.c b/Python/bytecodes.c index abfd8039b29..414725549d1 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -765,32 +765,40 @@ dummy_func( res = PyStackRef_FromPyObjectSteal(res_o); } - inst(BINARY_SUBSCR_GETITEM, (unused/1, container_st, sub_st -- unused)) { - PyObject *container = PyStackRef_AsPyObjectBorrow(container_st); - - DEOPT_IF(tstate->interp->eval_frame); - PyTypeObject *tp = Py_TYPE(container); + op(_BINARY_SUBSCR_CHECK_FUNC, (container, unused -- container, unused)) { + PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container)); DEOPT_IF(!PyType_HasFeature(tp, Py_TPFLAGS_HEAPTYPE)); PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; - PyObject *cached = ht->_spec_cache.getitem; - DEOPT_IF(cached == NULL); - assert(PyFunction_Check(cached)); - PyFunctionObject *getitem = (PyFunctionObject *)cached; + PyObject *getitem = ht->_spec_cache.getitem; + DEOPT_IF(getitem == NULL); + assert(PyFunction_Check(getitem)); uint32_t cached_version = ht->_spec_cache.getitem_version; - DEOPT_IF(getitem->func_version != cached_version); - PyCodeObject *code = (PyCodeObject *)getitem->func_code; + DEOPT_IF(((PyFunctionObject *)getitem)->func_version != cached_version); + PyCodeObject *code = (PyCodeObject *)PyFunction_GET_CODE(getitem); assert(code->co_argcount == 2); DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize)); STAT_INC(BINARY_SUBSCR, hit); Py_INCREF(getitem); - _PyInterpreterFrame *new_frame = _PyFrame_PushUnchecked(tstate, getitem, 2); - STACK_SHRINK(2); - new_frame->localsplus[0] = container_st; - new_frame->localsplus[1] = sub_st; - frame->return_offset = (uint16_t)(next_instr - this_instr); - DISPATCH_INLINED(new_frame); } + op(_BINARY_SUBSCR_INIT_CALL, (container, sub -- new_frame: _PyInterpreterFrame* )) { + PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container)); + PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; + PyObject *getitem = ht->_spec_cache.getitem; + new_frame = _PyFrame_PushUnchecked(tstate, (PyFunctionObject *)getitem, 2); + SYNC_SP(); + new_frame->localsplus[0] = container; + new_frame->localsplus[1] = sub; + frame->return_offset = (uint16_t)(1 + INLINE_CACHE_ENTRIES_BINARY_SUBSCR); + } + + macro(BINARY_SUBSCR_GETITEM) = + unused/1 + // Skip over the counter + _CHECK_PEP_523 + + _BINARY_SUBSCR_CHECK_FUNC + + _BINARY_SUBSCR_INIT_CALL + + _PUSH_FRAME; + inst(LIST_APPEND, (list, unused[oparg-1], v -- list, unused[oparg-1])) { ERROR_IF(_PyList_AppendTakeRef((PyListObject *)PyStackRef_AsPyObjectBorrow(list), PyStackRef_AsPyObjectSteal(v)) < 0, error); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index f0acc3b6ea2..61e1c5cf5c2 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -966,7 +966,57 @@ break; } - /* _BINARY_SUBSCR_GETITEM is not a viable micro-op for tier 2 because it uses the 'this_instr' variable */ + case _BINARY_SUBSCR_CHECK_FUNC: { + _PyStackRef container; + container = stack_pointer[-2]; + PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container)); + if (!PyType_HasFeature(tp, Py_TPFLAGS_HEAPTYPE)) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; + PyObject *getitem = ht->_spec_cache.getitem; + if (getitem == NULL) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + assert(PyFunction_Check(getitem)); + uint32_t cached_version = ht->_spec_cache.getitem_version; + if (((PyFunctionObject *)getitem)->func_version != cached_version) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + PyCodeObject *code = (PyCodeObject *)PyFunction_GET_CODE(getitem); + assert(code->co_argcount == 2); + if (!_PyThreadState_HasStackSpace(tstate, code->co_framesize)) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + STAT_INC(BINARY_SUBSCR, hit); + Py_INCREF(getitem); + break; + } + + case _BINARY_SUBSCR_INIT_CALL: { + _PyStackRef sub; + _PyStackRef container; + _PyInterpreterFrame *new_frame; + sub = stack_pointer[-1]; + container = stack_pointer[-2]; + PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container)); + PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; + PyObject *getitem = ht->_spec_cache.getitem; + new_frame = _PyFrame_PushUnchecked(tstate, (PyFunctionObject *)getitem, 2); + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + new_frame->localsplus[0] = container; + new_frame->localsplus[1] = sub; + frame->return_offset = (uint16_t)(1 + INLINE_CACHE_ENTRIES_BINARY_SUBSCR); + stack_pointer[0].bits = (uintptr_t)new_frame; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } case _LIST_APPEND: { _PyStackRef v; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index ff8c4eab58f..4efaf899f23 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -469,37 +469,63 @@ } TARGET(BINARY_SUBSCR_GETITEM) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + frame->instr_ptr = next_instr; next_instr += 2; INSTRUCTION_STATS(BINARY_SUBSCR_GETITEM); static_assert(INLINE_CACHE_ENTRIES_BINARY_SUBSCR == 1, "incorrect cache size"); - _PyStackRef container_st; - _PyStackRef sub_st; + _PyStackRef container; + _PyStackRef sub; + _PyInterpreterFrame *new_frame; /* Skip 1 cache entry */ - sub_st = stack_pointer[-1]; - container_st = stack_pointer[-2]; - PyObject *container = PyStackRef_AsPyObjectBorrow(container_st); - DEOPT_IF(tstate->interp->eval_frame, BINARY_SUBSCR); - PyTypeObject *tp = Py_TYPE(container); - DEOPT_IF(!PyType_HasFeature(tp, Py_TPFLAGS_HEAPTYPE), BINARY_SUBSCR); - PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; - PyObject *cached = ht->_spec_cache.getitem; - DEOPT_IF(cached == NULL, BINARY_SUBSCR); - assert(PyFunction_Check(cached)); - PyFunctionObject *getitem = (PyFunctionObject *)cached; - uint32_t cached_version = ht->_spec_cache.getitem_version; - DEOPT_IF(getitem->func_version != cached_version, BINARY_SUBSCR); - PyCodeObject *code = (PyCodeObject *)getitem->func_code; - assert(code->co_argcount == 2); - DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), BINARY_SUBSCR); - STAT_INC(BINARY_SUBSCR, hit); - Py_INCREF(getitem); - _PyInterpreterFrame *new_frame = _PyFrame_PushUnchecked(tstate, getitem, 2); - STACK_SHRINK(2); - new_frame->localsplus[0] = container_st; - new_frame->localsplus[1] = sub_st; - frame->return_offset = (uint16_t)(next_instr - this_instr); - DISPATCH_INLINED(new_frame); + // _CHECK_PEP_523 + { + DEOPT_IF(tstate->interp->eval_frame, BINARY_SUBSCR); + } + // _BINARY_SUBSCR_CHECK_FUNC + container = stack_pointer[-2]; + { + PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container)); + DEOPT_IF(!PyType_HasFeature(tp, Py_TPFLAGS_HEAPTYPE), BINARY_SUBSCR); + PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; + PyObject *getitem = ht->_spec_cache.getitem; + DEOPT_IF(getitem == NULL, BINARY_SUBSCR); + assert(PyFunction_Check(getitem)); + uint32_t cached_version = ht->_spec_cache.getitem_version; + DEOPT_IF(((PyFunctionObject *)getitem)->func_version != cached_version, BINARY_SUBSCR); + PyCodeObject *code = (PyCodeObject *)PyFunction_GET_CODE(getitem); + assert(code->co_argcount == 2); + DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), BINARY_SUBSCR); + STAT_INC(BINARY_SUBSCR, hit); + Py_INCREF(getitem); + } + // _BINARY_SUBSCR_INIT_CALL + sub = stack_pointer[-1]; + { + PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container)); + PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; + PyObject *getitem = ht->_spec_cache.getitem; + new_frame = _PyFrame_PushUnchecked(tstate, (PyFunctionObject *)getitem, 2); + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + new_frame->localsplus[0] = container; + new_frame->localsplus[1] = sub; + frame->return_offset = (uint16_t)(1 + INLINE_CACHE_ENTRIES_BINARY_SUBSCR); + } + // _PUSH_FRAME + { + // Write it out explicitly because it's subtly different. + // Eventually this should be the only occurrence of this code. + assert(tstate->interp->eval_frame == NULL); + _PyFrame_SetStackPointer(frame, stack_pointer); + new_frame->previous = frame; + CALL_STAT_INC(inlined_py_calls); + frame = tstate->current_frame = new_frame; + tstate->py_recursion_remaining--; + LOAD_SP(); + LOAD_IP(0); + LLTRACE_RESUME_FRAME(); + } + DISPATCH(); } TARGET(BINARY_SUBSCR_LIST_INT) { diff --git a/Python/optimizer.c b/Python/optimizer.c index 9d0381357f2..e9cbfc54971 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -795,6 +795,7 @@ translate_bytecode_to_trace( assert(i + 1 == nuops); if (opcode == FOR_ITER_GEN || opcode == LOAD_ATTR_PROPERTY || + opcode == BINARY_SUBSCR_GETITEM || opcode == SEND_GEN) { DPRINTF(2, "Bailing due to dynamic target\n"); @@ -921,7 +922,9 @@ done: 2 * INSTR_IP(initial_instr, code)); return 0; } - if (trace[trace_length-1].opcode != _JUMP_TO_TOP) { + if (!is_terminator(&trace[trace_length-1])) { + /* Allow space for _EXIT_TRACE */ + max_length += 2; ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target); } DPRINTF(1, @@ -1102,7 +1105,7 @@ sanity_check(_PyExecutorObject *executor) CHECK(inst->format == UOP_FORMAT_JUMP); CHECK(inst->error_target < executor->code_size); } - if (opcode == _JUMP_TO_TOP || opcode == _EXIT_TRACE) { + if (is_terminator(inst)) { ended = true; i++; break; @@ -1207,8 +1210,7 @@ int effective_trace_length(_PyUOpInstruction *buffer, int length) if (opcode == _NOP) { nop_count++; } - if (opcode == _EXIT_TRACE || - opcode == _JUMP_TO_TOP) { + if (is_terminator(&buffer[i])) { return i+1-nop_count; } } @@ -1257,7 +1259,7 @@ uop_optimize( else if (oparg < _PyUop_Replication[opcode]) { buffer[pc].opcode = opcode + oparg + 1; } - else if (opcode == _JUMP_TO_TOP || opcode == _EXIT_TRACE) { + else if (is_terminator(&buffer[pc])) { break; } assert(_PyOpcode_uop_name[buffer[pc].opcode]); diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 8c866417478..f7adb44c9e0 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -52,14 +52,6 @@ #define DPRINTF(level, ...) #endif - - -static inline bool -op_is_end(uint32_t opcode) -{ - return opcode == _EXIT_TRACE || opcode == _JUMP_TO_TOP; -} - static int get_mutations(PyObject* dict) { assert(PyDict_CheckExact(dict)); @@ -288,7 +280,7 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, prechecked_function_version = (uint32_t)buffer[pc].operand; break; default: - if (op_is_end(opcode)) { + if (is_terminator(inst)) { return 1; } break; @@ -552,6 +544,7 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) } case _JUMP_TO_TOP: case _EXIT_TRACE: + case _DYNAMIC_EXIT: return pc + 1; default: { diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index b704c9e7731..50aa9728cf2 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -539,7 +539,18 @@ break; } - /* _BINARY_SUBSCR_GETITEM is not a viable micro-op for tier 2 */ + case _BINARY_SUBSCR_CHECK_FUNC: { + break; + } + + case _BINARY_SUBSCR_INIT_CALL: { + _PyInterpreterFrame *new_frame; + new_frame = sym_new_not_null(ctx); + stack_pointer[-2] = (_Py_UopsSymbol *)new_frame; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } case _LIST_APPEND: { stack_pointer += -1;