From d9efa45d7457b0dfea467bb1c2d22c69056ffc73 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Thu, 25 Jul 2024 14:45:07 -0700 Subject: [PATCH] GH-118093: Add tier two support for BINARY_OP_INPLACE_ADD_UNICODE (GH-122253) --- Include/internal/pycore_opcode_metadata.h | 1 + Include/internal/pycore_uop_ids.h | 297 +++++++++++----------- Include/internal/pycore_uop_metadata.h | 4 + Python/bytecodes.c | 15 +- Python/executor_cases.c.h | 49 ++++ Python/generated_cases.c.h | 13 +- Python/optimizer.c | 9 + Python/optimizer_cases.c.h | 6 + 8 files changed, 241 insertions(+), 153 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 2fb6b2c4ed8..9c7ad926f9a 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1218,6 +1218,7 @@ _PyOpcode_macro_expansion[256] = { [BINARY_OP_ADD_FLOAT] = { .nuops = 2, .uops = { { _GUARD_BOTH_FLOAT, 0, 0 }, { _BINARY_OP_ADD_FLOAT, 0, 0 } } }, [BINARY_OP_ADD_INT] = { .nuops = 2, .uops = { { _GUARD_BOTH_INT, 0, 0 }, { _BINARY_OP_ADD_INT, 0, 0 } } }, [BINARY_OP_ADD_UNICODE] = { .nuops = 2, .uops = { { _GUARD_BOTH_UNICODE, 0, 0 }, { _BINARY_OP_ADD_UNICODE, 0, 0 } } }, + [BINARY_OP_INPLACE_ADD_UNICODE] = { .nuops = 2, .uops = { { _GUARD_BOTH_UNICODE, 0, 0 }, { _BINARY_OP_INPLACE_ADD_UNICODE, 0, 0 } } }, [BINARY_OP_MULTIPLY_FLOAT] = { .nuops = 2, .uops = { { _GUARD_BOTH_FLOAT, 0, 0 }, { _BINARY_OP_MULTIPLY_FLOAT, 0, 0 } } }, [BINARY_OP_MULTIPLY_INT] = { .nuops = 2, .uops = { { _GUARD_BOTH_INT, 0, 0 }, { _BINARY_OP_MULTIPLY_INT, 0, 0 } } }, [BINARY_OP_SUBTRACT_FLOAT] = { .nuops = 2, .uops = { { _GUARD_BOTH_FLOAT, 0, 0 }, { _BINARY_OP_SUBTRACT_FLOAT, 0, 0 } } }, diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 88c835ca8ed..9ae82ca3c3d 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -15,12 +15,13 @@ extern "C" { #define _BINARY_OP_ADD_FLOAT 303 #define _BINARY_OP_ADD_INT 304 #define _BINARY_OP_ADD_UNICODE 305 -#define _BINARY_OP_MULTIPLY_FLOAT 306 -#define _BINARY_OP_MULTIPLY_INT 307 -#define _BINARY_OP_SUBTRACT_FLOAT 308 -#define _BINARY_OP_SUBTRACT_INT 309 +#define _BINARY_OP_INPLACE_ADD_UNICODE 306 +#define _BINARY_OP_MULTIPLY_FLOAT 307 +#define _BINARY_OP_MULTIPLY_INT 308 +#define _BINARY_OP_SUBTRACT_FLOAT 309 +#define _BINARY_OP_SUBTRACT_INT 310 #define _BINARY_SLICE BINARY_SLICE -#define _BINARY_SUBSCR 310 +#define _BINARY_SUBSCR 311 #define _BINARY_SUBSCR_DICT BINARY_SUBSCR_DICT #define _BINARY_SUBSCR_GETITEM BINARY_SUBSCR_GETITEM #define _BINARY_SUBSCR_LIST_INT BINARY_SUBSCR_LIST_INT @@ -32,12 +33,12 @@ extern "C" { #define _BUILD_SLICE BUILD_SLICE #define _BUILD_STRING BUILD_STRING #define _BUILD_TUPLE BUILD_TUPLE -#define _CALL 311 +#define _CALL 312 #define _CALL_ALLOC_AND_ENTER_INIT CALL_ALLOC_AND_ENTER_INIT -#define _CALL_BUILTIN_CLASS 312 -#define _CALL_BUILTIN_FAST 313 -#define _CALL_BUILTIN_FAST_WITH_KEYWORDS 314 -#define _CALL_BUILTIN_O 315 +#define _CALL_BUILTIN_CLASS 313 +#define _CALL_BUILTIN_FAST 314 +#define _CALL_BUILTIN_FAST_WITH_KEYWORDS 315 +#define _CALL_BUILTIN_O 316 #define _CALL_FUNCTION_EX CALL_FUNCTION_EX #define _CALL_INTRINSIC_1 CALL_INTRINSIC_1 #define _CALL_INTRINSIC_2 CALL_INTRINSIC_2 @@ -45,38 +46,38 @@ extern "C" { #define _CALL_KW CALL_KW #define _CALL_LEN CALL_LEN #define _CALL_LIST_APPEND CALL_LIST_APPEND -#define _CALL_METHOD_DESCRIPTOR_FAST 316 -#define _CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS 317 -#define _CALL_METHOD_DESCRIPTOR_NOARGS 318 -#define _CALL_METHOD_DESCRIPTOR_O 319 -#define _CALL_NON_PY_GENERAL 320 -#define _CALL_STR_1 321 -#define _CALL_TUPLE_1 322 +#define _CALL_METHOD_DESCRIPTOR_FAST 317 +#define _CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS 318 +#define _CALL_METHOD_DESCRIPTOR_NOARGS 319 +#define _CALL_METHOD_DESCRIPTOR_O 320 +#define _CALL_NON_PY_GENERAL 321 +#define _CALL_STR_1 322 +#define _CALL_TUPLE_1 323 #define _CALL_TYPE_1 CALL_TYPE_1 -#define _CHECK_ATTR_CLASS 323 -#define _CHECK_ATTR_METHOD_LAZY_DICT 324 -#define _CHECK_ATTR_MODULE 325 -#define _CHECK_ATTR_WITH_HINT 326 -#define _CHECK_CALL_BOUND_METHOD_EXACT_ARGS 327 +#define _CHECK_ATTR_CLASS 324 +#define _CHECK_ATTR_METHOD_LAZY_DICT 325 +#define _CHECK_ATTR_MODULE 326 +#define _CHECK_ATTR_WITH_HINT 327 +#define _CHECK_CALL_BOUND_METHOD_EXACT_ARGS 328 #define _CHECK_EG_MATCH CHECK_EG_MATCH #define _CHECK_EXC_MATCH CHECK_EXC_MATCH -#define _CHECK_FUNCTION 328 -#define _CHECK_FUNCTION_EXACT_ARGS 329 -#define _CHECK_FUNCTION_VERSION 330 -#define _CHECK_IS_NOT_PY_CALLABLE 331 -#define _CHECK_MANAGED_OBJECT_HAS_VALUES 332 -#define _CHECK_METHOD_VERSION 333 -#define _CHECK_PEP_523 334 -#define _CHECK_PERIODIC 335 -#define _CHECK_STACK_SPACE 336 -#define _CHECK_STACK_SPACE_OPERAND 337 -#define _CHECK_VALIDITY 338 -#define _CHECK_VALIDITY_AND_SET_IP 339 -#define _COMPARE_OP 340 -#define _COMPARE_OP_FLOAT 341 -#define _COMPARE_OP_INT 342 -#define _COMPARE_OP_STR 343 -#define _CONTAINS_OP 344 +#define _CHECK_FUNCTION 329 +#define _CHECK_FUNCTION_EXACT_ARGS 330 +#define _CHECK_FUNCTION_VERSION 331 +#define _CHECK_IS_NOT_PY_CALLABLE 332 +#define _CHECK_MANAGED_OBJECT_HAS_VALUES 333 +#define _CHECK_METHOD_VERSION 334 +#define _CHECK_PEP_523 335 +#define _CHECK_PERIODIC 336 +#define _CHECK_STACK_SPACE 337 +#define _CHECK_STACK_SPACE_OPERAND 338 +#define _CHECK_VALIDITY 339 +#define _CHECK_VALIDITY_AND_SET_IP 340 +#define _COMPARE_OP 341 +#define _COMPARE_OP_FLOAT 342 +#define _COMPARE_OP_INT 343 +#define _COMPARE_OP_STR 344 +#define _CONTAINS_OP 345 #define _CONTAINS_OP_DICT CONTAINS_OP_DICT #define _CONTAINS_OP_SET CONTAINS_OP_SET #define _CONVERT_VALUE CONVERT_VALUE @@ -88,55 +89,55 @@ extern "C" { #define _DELETE_GLOBAL DELETE_GLOBAL #define _DELETE_NAME DELETE_NAME #define _DELETE_SUBSCR DELETE_SUBSCR -#define _DEOPT 345 +#define _DEOPT 346 #define _DICT_MERGE DICT_MERGE #define _DICT_UPDATE DICT_UPDATE -#define _DYNAMIC_EXIT 346 +#define _DYNAMIC_EXIT 347 #define _END_SEND END_SEND -#define _ERROR_POP_N 347 +#define _ERROR_POP_N 348 #define _EXIT_INIT_CHECK EXIT_INIT_CHECK -#define _EXPAND_METHOD 348 -#define _FATAL_ERROR 349 +#define _EXPAND_METHOD 349 +#define _FATAL_ERROR 350 #define _FORMAT_SIMPLE FORMAT_SIMPLE #define _FORMAT_WITH_SPEC FORMAT_WITH_SPEC -#define _FOR_ITER 350 -#define _FOR_ITER_GEN_FRAME 351 -#define _FOR_ITER_TIER_TWO 352 +#define _FOR_ITER 351 +#define _FOR_ITER_GEN_FRAME 352 +#define _FOR_ITER_TIER_TWO 353 #define _GET_AITER GET_AITER #define _GET_ANEXT GET_ANEXT #define _GET_AWAITABLE GET_AWAITABLE #define _GET_ITER GET_ITER #define _GET_LEN GET_LEN #define _GET_YIELD_FROM_ITER GET_YIELD_FROM_ITER -#define _GUARD_BOTH_FLOAT 353 -#define _GUARD_BOTH_INT 354 -#define _GUARD_BOTH_UNICODE 355 -#define _GUARD_BUILTINS_VERSION 356 -#define _GUARD_DORV_NO_DICT 357 -#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 358 -#define _GUARD_GLOBALS_VERSION 359 -#define _GUARD_IS_FALSE_POP 360 -#define _GUARD_IS_NONE_POP 361 -#define _GUARD_IS_NOT_NONE_POP 362 -#define _GUARD_IS_TRUE_POP 363 -#define _GUARD_KEYS_VERSION 364 -#define _GUARD_NOS_FLOAT 365 -#define _GUARD_NOS_INT 366 -#define _GUARD_NOT_EXHAUSTED_LIST 367 -#define _GUARD_NOT_EXHAUSTED_RANGE 368 -#define _GUARD_NOT_EXHAUSTED_TUPLE 369 -#define _GUARD_TOS_FLOAT 370 -#define _GUARD_TOS_INT 371 -#define _GUARD_TYPE_VERSION 372 +#define _GUARD_BOTH_FLOAT 354 +#define _GUARD_BOTH_INT 355 +#define _GUARD_BOTH_UNICODE 356 +#define _GUARD_BUILTINS_VERSION 357 +#define _GUARD_DORV_NO_DICT 358 +#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 359 +#define _GUARD_GLOBALS_VERSION 360 +#define _GUARD_IS_FALSE_POP 361 +#define _GUARD_IS_NONE_POP 362 +#define _GUARD_IS_NOT_NONE_POP 363 +#define _GUARD_IS_TRUE_POP 364 +#define _GUARD_KEYS_VERSION 365 +#define _GUARD_NOS_FLOAT 366 +#define _GUARD_NOS_INT 367 +#define _GUARD_NOT_EXHAUSTED_LIST 368 +#define _GUARD_NOT_EXHAUSTED_RANGE 369 +#define _GUARD_NOT_EXHAUSTED_TUPLE 370 +#define _GUARD_TOS_FLOAT 371 +#define _GUARD_TOS_INT 372 +#define _GUARD_TYPE_VERSION 373 #define _IMPORT_FROM IMPORT_FROM #define _IMPORT_NAME IMPORT_NAME -#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 373 -#define _INIT_CALL_PY_EXACT_ARGS 374 -#define _INIT_CALL_PY_EXACT_ARGS_0 375 -#define _INIT_CALL_PY_EXACT_ARGS_1 376 -#define _INIT_CALL_PY_EXACT_ARGS_2 377 -#define _INIT_CALL_PY_EXACT_ARGS_3 378 -#define _INIT_CALL_PY_EXACT_ARGS_4 379 +#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 374 +#define _INIT_CALL_PY_EXACT_ARGS 375 +#define _INIT_CALL_PY_EXACT_ARGS_0 376 +#define _INIT_CALL_PY_EXACT_ARGS_1 377 +#define _INIT_CALL_PY_EXACT_ARGS_2 378 +#define _INIT_CALL_PY_EXACT_ARGS_3 379 +#define _INIT_CALL_PY_EXACT_ARGS_4 380 #define _INSTRUMENTED_CALL INSTRUMENTED_CALL #define _INSTRUMENTED_CALL_FUNCTION_EX INSTRUMENTED_CALL_FUNCTION_EX #define _INSTRUMENTED_CALL_KW INSTRUMENTED_CALL_KW @@ -153,65 +154,65 @@ extern "C" { #define _INSTRUMENTED_RETURN_CONST INSTRUMENTED_RETURN_CONST #define _INSTRUMENTED_RETURN_VALUE INSTRUMENTED_RETURN_VALUE #define _INSTRUMENTED_YIELD_VALUE INSTRUMENTED_YIELD_VALUE -#define _INTERNAL_INCREMENT_OPT_COUNTER 380 -#define _IS_NONE 381 +#define _INTERNAL_INCREMENT_OPT_COUNTER 381 +#define _IS_NONE 382 #define _IS_OP IS_OP -#define _ITER_CHECK_LIST 382 -#define _ITER_CHECK_RANGE 383 -#define _ITER_CHECK_TUPLE 384 -#define _ITER_JUMP_LIST 385 -#define _ITER_JUMP_RANGE 386 -#define _ITER_JUMP_TUPLE 387 -#define _ITER_NEXT_LIST 388 -#define _ITER_NEXT_RANGE 389 -#define _ITER_NEXT_TUPLE 390 -#define _JUMP_TO_TOP 391 +#define _ITER_CHECK_LIST 383 +#define _ITER_CHECK_RANGE 384 +#define _ITER_CHECK_TUPLE 385 +#define _ITER_JUMP_LIST 386 +#define _ITER_JUMP_RANGE 387 +#define _ITER_JUMP_TUPLE 388 +#define _ITER_NEXT_LIST 389 +#define _ITER_NEXT_RANGE 390 +#define _ITER_NEXT_TUPLE 391 +#define _JUMP_TO_TOP 392 #define _LIST_APPEND LIST_APPEND #define _LIST_EXTEND LIST_EXTEND -#define _LOAD_ATTR 392 -#define _LOAD_ATTR_CLASS 393 -#define _LOAD_ATTR_CLASS_0 394 -#define _LOAD_ATTR_CLASS_1 395 +#define _LOAD_ATTR 393 +#define _LOAD_ATTR_CLASS 394 +#define _LOAD_ATTR_CLASS_0 395 +#define _LOAD_ATTR_CLASS_1 396 #define _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN -#define _LOAD_ATTR_INSTANCE_VALUE 396 -#define _LOAD_ATTR_INSTANCE_VALUE_0 397 -#define _LOAD_ATTR_INSTANCE_VALUE_1 398 -#define _LOAD_ATTR_METHOD_LAZY_DICT 399 -#define _LOAD_ATTR_METHOD_NO_DICT 400 -#define _LOAD_ATTR_METHOD_WITH_VALUES 401 -#define _LOAD_ATTR_MODULE 402 -#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 403 -#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 404 -#define _LOAD_ATTR_PROPERTY_FRAME 405 -#define _LOAD_ATTR_SLOT 406 -#define _LOAD_ATTR_SLOT_0 407 -#define _LOAD_ATTR_SLOT_1 408 -#define _LOAD_ATTR_WITH_HINT 409 +#define _LOAD_ATTR_INSTANCE_VALUE 397 +#define _LOAD_ATTR_INSTANCE_VALUE_0 398 +#define _LOAD_ATTR_INSTANCE_VALUE_1 399 +#define _LOAD_ATTR_METHOD_LAZY_DICT 400 +#define _LOAD_ATTR_METHOD_NO_DICT 401 +#define _LOAD_ATTR_METHOD_WITH_VALUES 402 +#define _LOAD_ATTR_MODULE 403 +#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 404 +#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 405 +#define _LOAD_ATTR_PROPERTY_FRAME 406 +#define _LOAD_ATTR_SLOT 407 +#define _LOAD_ATTR_SLOT_0 408 +#define _LOAD_ATTR_SLOT_1 409 +#define _LOAD_ATTR_WITH_HINT 410 #define _LOAD_BUILD_CLASS LOAD_BUILD_CLASS #define _LOAD_COMMON_CONSTANT LOAD_COMMON_CONSTANT #define _LOAD_CONST LOAD_CONST -#define _LOAD_CONST_INLINE 410 -#define _LOAD_CONST_INLINE_BORROW 411 -#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 412 -#define _LOAD_CONST_INLINE_WITH_NULL 413 +#define _LOAD_CONST_INLINE 411 +#define _LOAD_CONST_INLINE_BORROW 412 +#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 413 +#define _LOAD_CONST_INLINE_WITH_NULL 414 #define _LOAD_DEREF LOAD_DEREF -#define _LOAD_FAST 414 -#define _LOAD_FAST_0 415 -#define _LOAD_FAST_1 416 -#define _LOAD_FAST_2 417 -#define _LOAD_FAST_3 418 -#define _LOAD_FAST_4 419 -#define _LOAD_FAST_5 420 -#define _LOAD_FAST_6 421 -#define _LOAD_FAST_7 422 +#define _LOAD_FAST 415 +#define _LOAD_FAST_0 416 +#define _LOAD_FAST_1 417 +#define _LOAD_FAST_2 418 +#define _LOAD_FAST_3 419 +#define _LOAD_FAST_4 420 +#define _LOAD_FAST_5 421 +#define _LOAD_FAST_6 422 +#define _LOAD_FAST_7 423 #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR #define _LOAD_FAST_CHECK LOAD_FAST_CHECK #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF #define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS -#define _LOAD_GLOBAL 423 -#define _LOAD_GLOBAL_BUILTINS 424 -#define _LOAD_GLOBAL_MODULE 425 +#define _LOAD_GLOBAL 424 +#define _LOAD_GLOBAL_BUILTINS 425 +#define _LOAD_GLOBAL_MODULE 426 #define _LOAD_LOCALS LOAD_LOCALS #define _LOAD_NAME LOAD_NAME #define _LOAD_SPECIAL LOAD_SPECIAL @@ -226,51 +227,51 @@ extern "C" { #define _MATCH_SEQUENCE MATCH_SEQUENCE #define _NOP NOP #define _POP_EXCEPT POP_EXCEPT -#define _POP_JUMP_IF_FALSE 426 -#define _POP_JUMP_IF_TRUE 427 +#define _POP_JUMP_IF_FALSE 427 +#define _POP_JUMP_IF_TRUE 428 #define _POP_TOP POP_TOP -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 428 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 429 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 429 +#define _PUSH_FRAME 430 #define _PUSH_NULL PUSH_NULL -#define _PY_FRAME_GENERAL 430 -#define _REPLACE_WITH_TRUE 431 +#define _PY_FRAME_GENERAL 431 +#define _REPLACE_WITH_TRUE 432 #define _RESUME_CHECK RESUME_CHECK #define _RETURN_GENERATOR RETURN_GENERATOR #define _RETURN_VALUE RETURN_VALUE -#define _SAVE_RETURN_OFFSET 432 -#define _SEND 433 -#define _SEND_GEN_FRAME 434 +#define _SAVE_RETURN_OFFSET 433 +#define _SEND 434 +#define _SEND_GEN_FRAME 435 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _START_EXECUTOR 435 -#define _STORE_ATTR 436 -#define _STORE_ATTR_INSTANCE_VALUE 437 -#define _STORE_ATTR_SLOT 438 -#define _STORE_ATTR_WITH_HINT 439 +#define _START_EXECUTOR 436 +#define _STORE_ATTR 437 +#define _STORE_ATTR_INSTANCE_VALUE 438 +#define _STORE_ATTR_SLOT 439 +#define _STORE_ATTR_WITH_HINT 440 #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 440 -#define _STORE_FAST_0 441 -#define _STORE_FAST_1 442 -#define _STORE_FAST_2 443 -#define _STORE_FAST_3 444 -#define _STORE_FAST_4 445 -#define _STORE_FAST_5 446 -#define _STORE_FAST_6 447 -#define _STORE_FAST_7 448 +#define _STORE_FAST 441 +#define _STORE_FAST_0 442 +#define _STORE_FAST_1 443 +#define _STORE_FAST_2 444 +#define _STORE_FAST_3 445 +#define _STORE_FAST_4 446 +#define _STORE_FAST_5 447 +#define _STORE_FAST_6 448 +#define _STORE_FAST_7 449 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME #define _STORE_SLICE STORE_SLICE -#define _STORE_SUBSCR 449 +#define _STORE_SUBSCR 450 #define _STORE_SUBSCR_DICT STORE_SUBSCR_DICT #define _STORE_SUBSCR_LIST_INT STORE_SUBSCR_LIST_INT #define _SWAP SWAP -#define _TIER2_RESUME_CHECK 450 -#define _TO_BOOL 451 +#define _TIER2_RESUME_CHECK 451 +#define _TO_BOOL 452 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT #define _TO_BOOL_LIST TO_BOOL_LIST @@ -280,13 +281,13 @@ extern "C" { #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 452 +#define _UNPACK_SEQUENCE 453 #define _UNPACK_SEQUENCE_LIST UNPACK_SEQUENCE_LIST #define _UNPACK_SEQUENCE_TUPLE UNPACK_SEQUENCE_TUPLE #define _UNPACK_SEQUENCE_TWO_TUPLE UNPACK_SEQUENCE_TWO_TUPLE #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE -#define MAX_UOP_ID 452 +#define MAX_UOP_ID 453 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 14befe59f04..190c6fb2365 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -72,6 +72,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_BINARY_OP_SUBTRACT_FLOAT] = HAS_PURE_FLAG, [_GUARD_BOTH_UNICODE] = HAS_EXIT_FLAG, [_BINARY_OP_ADD_UNICODE] = HAS_ERROR_FLAG | HAS_PURE_FLAG, + [_BINARY_OP_INPLACE_ADD_UNICODE] = HAS_LOCAL_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_BINARY_SUBSCR] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_BINARY_SLICE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_STORE_SLICE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, @@ -279,6 +280,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_BINARY_OP_ADD_FLOAT] = "_BINARY_OP_ADD_FLOAT", [_BINARY_OP_ADD_INT] = "_BINARY_OP_ADD_INT", [_BINARY_OP_ADD_UNICODE] = "_BINARY_OP_ADD_UNICODE", + [_BINARY_OP_INPLACE_ADD_UNICODE] = "_BINARY_OP_INPLACE_ADD_UNICODE", [_BINARY_OP_MULTIPLY_FLOAT] = "_BINARY_OP_MULTIPLY_FLOAT", [_BINARY_OP_MULTIPLY_INT] = "_BINARY_OP_MULTIPLY_INT", [_BINARY_OP_SUBTRACT_FLOAT] = "_BINARY_OP_SUBTRACT_FLOAT", @@ -632,6 +634,8 @@ int _PyUop_num_popped(int opcode, int oparg) return 2; case _BINARY_OP_ADD_UNICODE: return 2; + case _BINARY_OP_INPLACE_ADD_UNICODE: + return 2; case _BINARY_SUBSCR: return 2; case _BINARY_SLICE: diff --git a/Python/bytecodes.c b/Python/bytecodes.c index d356fc9bfdd..eb8b66f8d8b 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -581,12 +581,18 @@ dummy_func( // So the inputs are the same as for all BINARY_OP // specializations, but there is no output. // At the end we just skip over the STORE_FAST. - tier1 op(_BINARY_OP_INPLACE_ADD_UNICODE, (left, right --)) { + op(_BINARY_OP_INPLACE_ADD_UNICODE, (left, right --)) { PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + int next_oparg; + #if TIER_ONE assert(next_instr->op.code == STORE_FAST); - _PyStackRef *target_local = &GETLOCAL(next_instr->op.arg); + next_oparg = next_instr->op.arg; + #else + next_oparg = CURRENT_OPERAND(); + #endif + _PyStackRef *target_local = &GETLOCAL(next_oparg); DEOPT_IF(!PyStackRef_Is(*target_local, left)); STAT_INC(BINARY_OP, hit); /* Handle `left = left + right` or `left += right` for str. @@ -607,9 +613,12 @@ dummy_func( *target_local = PyStackRef_FromPyObjectSteal(temp); _Py_DECREF_SPECIALIZED(right_o, _PyUnicode_ExactDealloc); ERROR_IF(PyStackRef_IsNull(*target_local), error); - // The STORE_FAST is already done. + #if TIER_ONE + // The STORE_FAST is already done. This is done here in tier one, + // and during trace projection in tier two: assert(next_instr->op.code == STORE_FAST); SKIP_OVER(1); + #endif } macro(BINARY_OP_INPLACE_ADD_UNICODE) = diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index b8343f9ffd5..3379f0be227 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -693,6 +693,55 @@ break; } + case _BINARY_OP_INPLACE_ADD_UNICODE: { + _PyStackRef right; + _PyStackRef left; + right = stack_pointer[-1]; + left = stack_pointer[-2]; + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + int next_oparg; + #if TIER_ONE + assert(next_instr->op.code == STORE_FAST); + next_oparg = next_instr->op.arg; + #else + next_oparg = CURRENT_OPERAND(); + #endif + _PyStackRef *target_local = &GETLOCAL(next_oparg); + if (!PyStackRef_Is(*target_local, left)) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + STAT_INC(BINARY_OP, hit); + /* Handle `left = left + right` or `left += right` for str. + * + * When possible, extend `left` in place rather than + * allocating a new PyUnicodeObject. This attempts to avoid + * quadratic behavior when one neglects to use str.join(). + * + * If `left` has only two references remaining (one from + * the stack, one in the locals), DECREFing `left` leaves + * only the locals reference, so PyUnicode_Append knows + * that the string is safe to mutate. + */ + assert(Py_REFCNT(left_o) >= 2); + _Py_DECREF_NO_DEALLOC(left_o); + PyObject *temp = PyStackRef_AsPyObjectBorrow(*target_local); + PyUnicode_Append(&temp, right_o); + *target_local = PyStackRef_FromPyObjectSteal(temp); + _Py_DECREF_SPECIALIZED(right_o, _PyUnicode_ExactDealloc); + if (PyStackRef_IsNull(*target_local)) JUMP_TO_ERROR(); + #if TIER_ONE + // The STORE_FAST is already done. This is done here in tier one, + // and during trace projection in tier two: + assert(next_instr->op.code == STORE_FAST); + SKIP_OVER(1); + #endif + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _BINARY_SUBSCR: { _PyStackRef sub; _PyStackRef container; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 6f996f91921..c9907438ddc 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -181,8 +181,14 @@ { PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + int next_oparg; + #if TIER_ONE assert(next_instr->op.code == STORE_FAST); - _PyStackRef *target_local = &GETLOCAL(next_instr->op.arg); + next_oparg = next_instr->op.arg; + #else + next_oparg = CURRENT_OPERAND(); + #endif + _PyStackRef *target_local = &GETLOCAL(next_oparg); DEOPT_IF(!PyStackRef_Is(*target_local, left), BINARY_OP); STAT_INC(BINARY_OP, hit); /* Handle `left = left + right` or `left += right` for str. @@ -203,9 +209,12 @@ *target_local = PyStackRef_FromPyObjectSteal(temp); _Py_DECREF_SPECIALIZED(right_o, _PyUnicode_ExactDealloc); if (PyStackRef_IsNull(*target_local)) goto pop_2_error; - // The STORE_FAST is already done. + #if TIER_ONE + // The STORE_FAST is already done. This is done here in tier one, + // and during trace projection in tier two: assert(next_instr->op.code == STORE_FAST); SKIP_OVER(1); + #endif } stack_pointer += -2; assert(WITHIN_STACK_BOUNDS()); diff --git a/Python/optimizer.c b/Python/optimizer.c index e08c1dc9936..f0793b8c8f2 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -875,6 +875,15 @@ top: // Jump here after _PUSH_FRAME or likely branches goto done; } + if (uop == _BINARY_OP_INPLACE_ADD_UNICODE) { + assert(i + 1 == nuops); + _Py_CODEUNIT *next_instr = instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]]; + assert(next_instr->op.code == STORE_FAST); + operand = next_instr->op.arg; + // Skip the STORE_FAST: + instr++; + } + // All other instructions ADD_TO_TRACE(uop, oparg, operand, target); } diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 8c2b1ac7926..33af8552ba6 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -473,6 +473,12 @@ break; } + case _BINARY_OP_INPLACE_ADD_UNICODE: { + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _BINARY_SUBSCR: { _Py_UopsSymbol *res; res = sym_new_not_null(ctx);