gh-124285: Fix bug where bool() is called multiple times for the same part of a boolean expression (#124394)

This commit is contained in:
Irit Katriel 2024-09-25 15:51:25 +01:00 committed by GitHub
parent c58c572a65
commit 78aeb38f7d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 234 additions and 47 deletions

View File

@ -1872,6 +1872,12 @@ but are replaced by real opcodes or removed before bytecode is generated.
Undirected relative jump instructions which are replaced by their
directed (forward/backward) counterparts by the assembler.
.. opcode:: JUMP_IF_TRUE
.. opcode:: JUMP_IF_FALSE
Conditional jumps which do not impact the stack. Replaced by the sequence
``COPY 1``, ``TO_BOOL``, ``POP_JUMP_IF_TRUE/FALSE``.
.. opcode:: LOAD_CLOSURE (i)
Pushes a reference to the cell contained in slot ``i`` of the "fast locals"

View File

@ -258,6 +258,7 @@ Known values:
Python 3.14a1 3604 (Do not duplicate test at end of while statements)
Python 3.14a1 3605 (Move ENTER_EXECUTOR to opcode 255)
Python 3.14a1 3606 (Specialize CALL_KW)
Python 3.14a1 3607 (Add pseudo instructions JUMP_IF_TRUE/FALSE)
Python 3.15 will start with 3650
@ -270,7 +271,7 @@ PC/launcher.c must also be updated.
*/
#define PYC_MAGIC_NUMBER 3606
#define PYC_MAGIC_NUMBER 3607
/* This is equivalent to converting PYC_MAGIC_NUMBER to 2 bytes
(little-endian) and then appending b'\r\n'. */
#define PYC_MAGIC_NUMBER_TOKEN \

View File

@ -22,6 +22,8 @@ extern "C" {
((OP) == STORE_FAST_MAYBE_NULL) || \
((OP) == JUMP) || \
((OP) == JUMP_NO_INTERRUPT) || \
((OP) == JUMP_IF_FALSE) || \
((OP) == JUMP_IF_TRUE) || \
((OP) == SETUP_FINALLY) || \
((OP) == SETUP_CLEANUP) || \
((OP) == SETUP_WITH) || \
@ -269,6 +271,10 @@ int _PyOpcode_num_popped(int opcode, int oparg) {
return 0;
case JUMP_FORWARD:
return 0;
case JUMP_IF_FALSE:
return 1;
case JUMP_IF_TRUE:
return 1;
case JUMP_NO_INTERRUPT:
return 0;
case LIST_APPEND:
@ -726,6 +732,10 @@ int _PyOpcode_num_pushed(int opcode, int oparg) {
return 0;
case JUMP_FORWARD:
return 0;
case JUMP_IF_FALSE:
return 1;
case JUMP_IF_TRUE:
return 1;
case JUMP_NO_INTERRUPT:
return 0;
case LIST_APPEND:
@ -956,7 +966,7 @@ enum InstructionFormat {
};
#define IS_VALID_OPCODE(OP) \
(((OP) >= 0) && ((OP) < 264) && \
(((OP) >= 0) && ((OP) < 266) && \
(_PyOpcode_opcode_metadata[(OP)].valid_entry))
#define HAS_ARG_FLAG (1)
@ -1005,9 +1015,9 @@ struct opcode_metadata {
int16_t flags;
};
extern const struct opcode_metadata _PyOpcode_opcode_metadata[264];
extern const struct opcode_metadata _PyOpcode_opcode_metadata[266];
#ifdef NEED_OPCODE_METADATA
const struct opcode_metadata _PyOpcode_opcode_metadata[264] = {
const struct opcode_metadata _PyOpcode_opcode_metadata[266] = {
[BINARY_OP] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[BINARY_OP_ADD_FLOAT] = { true, INSTR_FMT_IXC, HAS_EXIT_FLAG },
[BINARY_OP_ADD_INT] = { true, INSTR_FMT_IXC, HAS_EXIT_FLAG | HAS_ERROR_FLAG },
@ -1224,6 +1234,8 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[264] = {
[YIELD_VALUE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ESCAPES_FLAG },
[_DO_CALL_FUNCTION_EX] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG },
[JUMP] = { true, -1, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[JUMP_IF_FALSE] = { true, -1, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[JUMP_IF_TRUE] = { true, -1, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[JUMP_NO_INTERRUPT] = { true, -1, HAS_ARG_FLAG | HAS_JUMP_FLAG },
[LOAD_CLOSURE] = { true, -1, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG },
[POP_BLOCK] = { true, -1, HAS_PURE_FLAG },
@ -1422,9 +1434,9 @@ _PyOpcode_macro_expansion[256] = {
};
#endif // NEED_OPCODE_METADATA
extern const char *_PyOpcode_OpName[264];
extern const char *_PyOpcode_OpName[266];
#ifdef NEED_OPCODE_METADATA
const char *_PyOpcode_OpName[264] = {
const char *_PyOpcode_OpName[266] = {
[BINARY_OP] = "BINARY_OP",
[BINARY_OP_ADD_FLOAT] = "BINARY_OP_ADD_FLOAT",
[BINARY_OP_ADD_INT] = "BINARY_OP_ADD_INT",
@ -1543,6 +1555,8 @@ const char *_PyOpcode_OpName[264] = {
[JUMP_BACKWARD] = "JUMP_BACKWARD",
[JUMP_BACKWARD_NO_INTERRUPT] = "JUMP_BACKWARD_NO_INTERRUPT",
[JUMP_FORWARD] = "JUMP_FORWARD",
[JUMP_IF_FALSE] = "JUMP_IF_FALSE",
[JUMP_IF_TRUE] = "JUMP_IF_TRUE",
[JUMP_NO_INTERRUPT] = "JUMP_NO_INTERRUPT",
[LIST_APPEND] = "LIST_APPEND",
[LIST_EXTEND] = "LIST_EXTEND",
@ -1943,25 +1957,28 @@ const uint8_t _PyOpcode_Deopt[256] = {
case 235: \
;
struct pseudo_targets {
uint8_t targets[3];
uint8_t as_sequence;
uint8_t targets[4];
};
extern const struct pseudo_targets _PyOpcode_PseudoTargets[8];
extern const struct pseudo_targets _PyOpcode_PseudoTargets[10];
#ifdef NEED_OPCODE_METADATA
const struct pseudo_targets _PyOpcode_PseudoTargets[8] = {
[LOAD_CLOSURE-256] = { { LOAD_FAST, 0, 0 } },
[STORE_FAST_MAYBE_NULL-256] = { { STORE_FAST, 0, 0 } },
[JUMP-256] = { { JUMP_FORWARD, JUMP_BACKWARD, 0 } },
[JUMP_NO_INTERRUPT-256] = { { JUMP_FORWARD, JUMP_BACKWARD_NO_INTERRUPT, 0 } },
[SETUP_FINALLY-256] = { { NOP, 0, 0 } },
[SETUP_CLEANUP-256] = { { NOP, 0, 0 } },
[SETUP_WITH-256] = { { NOP, 0, 0 } },
[POP_BLOCK-256] = { { NOP, 0, 0 } },
const struct pseudo_targets _PyOpcode_PseudoTargets[10] = {
[LOAD_CLOSURE-256] = { 0, { LOAD_FAST, 0, 0, 0 } },
[STORE_FAST_MAYBE_NULL-256] = { 0, { STORE_FAST, 0, 0, 0 } },
[JUMP-256] = { 0, { JUMP_FORWARD, JUMP_BACKWARD, 0, 0 } },
[JUMP_NO_INTERRUPT-256] = { 0, { JUMP_FORWARD, JUMP_BACKWARD_NO_INTERRUPT, 0, 0 } },
[JUMP_IF_FALSE-256] = { 1, { COPY, TO_BOOL, POP_JUMP_IF_FALSE, 0 } },
[JUMP_IF_TRUE-256] = { 1, { COPY, TO_BOOL, POP_JUMP_IF_TRUE, 0 } },
[SETUP_FINALLY-256] = { 0, { NOP, 0, 0, 0 } },
[SETUP_CLEANUP-256] = { 0, { NOP, 0, 0, 0 } },
[SETUP_WITH-256] = { 0, { NOP, 0, 0, 0 } },
[POP_BLOCK-256] = { 0, { NOP, 0, 0, 0 } },
};
#endif // NEED_OPCODE_METADATA
static inline bool
is_pseudo_target(int pseudo, int target) {
if (pseudo < 256 || pseudo >= 264) {
if (pseudo < 256 || pseudo >= 266) {
return false;
}
for (int i = 0; _PyOpcode_PseudoTargets[pseudo-256].targets[i]; i++) {

16
Include/opcode_ids.h generated
View File

@ -226,13 +226,15 @@ extern "C" {
#define INSTRUMENTED_LINE 254
#define ENTER_EXECUTOR 255
#define JUMP 256
#define JUMP_NO_INTERRUPT 257
#define LOAD_CLOSURE 258
#define POP_BLOCK 259
#define SETUP_CLEANUP 260
#define SETUP_FINALLY 261
#define SETUP_WITH 262
#define STORE_FAST_MAYBE_NULL 263
#define JUMP_IF_FALSE 257
#define JUMP_IF_TRUE 258
#define JUMP_NO_INTERRUPT 259
#define LOAD_CLOSURE 260
#define POP_BLOCK 261
#define SETUP_CLEANUP 262
#define SETUP_FINALLY 263
#define SETUP_WITH 264
#define STORE_FAST_MAYBE_NULL 265
#define HAVE_ARGUMENT 41
#define MIN_SPECIALIZED_OPCODE 150

View File

@ -335,13 +335,15 @@ opmap = {
'INSTRUMENTED_CALL': 252,
'INSTRUMENTED_JUMP_BACKWARD': 253,
'JUMP': 256,
'JUMP_NO_INTERRUPT': 257,
'LOAD_CLOSURE': 258,
'POP_BLOCK': 259,
'SETUP_CLEANUP': 260,
'SETUP_FINALLY': 261,
'SETUP_WITH': 262,
'STORE_FAST_MAYBE_NULL': 263,
'JUMP_IF_FALSE': 257,
'JUMP_IF_TRUE': 258,
'JUMP_NO_INTERRUPT': 259,
'LOAD_CLOSURE': 260,
'POP_BLOCK': 261,
'SETUP_CLEANUP': 262,
'SETUP_FINALLY': 263,
'SETUP_WITH': 264,
'STORE_FAST_MAYBE_NULL': 265,
}
HAVE_ARGUMENT = 41

View File

@ -1527,6 +1527,45 @@ class TestSpecifics(unittest.TestCase):
pass
[[]]
class TestBooleanExpression(unittest.TestCase):
class Value:
def __init__(self):
self.called = 0
def __bool__(self):
self.called += 1
return self.value
class Yes(Value):
value = True
class No(Value):
value = False
def test_short_circuit_and(self):
v = [self.Yes(), self.No(), self.Yes()]
res = v[0] and v[1] and v[0]
self.assertIs(res, v[1])
self.assertEqual([e.called for e in v], [1, 1, 0])
def test_short_circuit_or(self):
v = [self.No(), self.Yes(), self.No()]
res = v[0] or v[1] or v[0]
self.assertIs(res, v[1])
self.assertEqual([e.called for e in v], [1, 1, 0])
def test_compound(self):
# See gh-124285
v = [self.No(), self.Yes(), self.Yes(), self.Yes()]
res = v[0] and v[1] or v[2] or v[3]
self.assertIs(res, v[2])
self.assertEqual([e.called for e in v], [1, 0, 1, 0])
v = [self.No(), self.No(), self.Yes(), self.Yes(), self.No()]
res = v[0] or v[1] and v[2] or v[3] or v[4]
self.assertIs(res, v[3])
self.assertEqual([e.called for e in v], [1, 1, 0, 1, 0])
@requires_debug_ranges()
class TestSourcePositions(unittest.TestCase):
# Ensure that compiled code snippets have correct line and column numbers

View File

@ -523,6 +523,36 @@ class TestGeneratedCases(unittest.TestCase):
"""
self.run_cases_test(input, output)
def test_pseudo_instruction_as_sequence(self):
input = """
pseudo(OP, (in -- out1, out2)) = [
OP1, OP2
];
inst(OP1, (--)) {
}
inst(OP2, (--)) {
}
"""
output = """
TARGET(OP1) {
frame->instr_ptr = next_instr;
next_instr += 1;
INSTRUCTION_STATS(OP1);
DISPATCH();
}
TARGET(OP2) {
frame->instr_ptr = next_instr;
next_instr += 1;
INSTRUCTION_STATS(OP2);
DISPATCH();
}
"""
self.run_cases_test(input, output)
def test_array_input(self):
input = """
inst(OP, (below, values[oparg*2], above --)) {

View File

@ -0,0 +1,2 @@
Fix bug where ``bool(a)`` can be invoked more than once during the
evaluation of a compound boolean expression.

View File

@ -2570,6 +2570,14 @@ dummy_func(
JUMP_BACKWARD_NO_INTERRUPT,
};
pseudo(JUMP_IF_FALSE, (cond -- cond)) = [
COPY, TO_BOOL, POP_JUMP_IF_FALSE,
];
pseudo(JUMP_IF_TRUE, (cond -- cond)) = [
COPY, TO_BOOL, POP_JUMP_IF_TRUE,
];
tier1 inst(ENTER_EXECUTOR, (--)) {
#ifdef _Py_TIER2
PyCodeObject *code = _PyFrame_GetCode(frame);

View File

@ -3140,17 +3140,15 @@ codegen_boolop(compiler *c, expr_ty e)
location loc = LOC(e);
assert(e->kind == BoolOp_kind);
if (e->v.BoolOp.op == And)
jumpi = POP_JUMP_IF_FALSE;
jumpi = JUMP_IF_FALSE;
else
jumpi = POP_JUMP_IF_TRUE;
jumpi = JUMP_IF_TRUE;
NEW_JUMP_TARGET_LABEL(c, end);
s = e->v.BoolOp.values;
n = asdl_seq_LEN(s) - 1;
assert(n >= 0);
for (i = 0; i < n; ++i) {
VISIT(c, expr, (expr_ty)asdl_seq_GET(s, i));
ADDOP_I(c, loc, COPY, 1);
ADDOP(c, loc, TO_BOOL);
ADDOP_JUMP(c, loc, jumpi, end);
ADDOP(c, loc, POP_TOP);
}

View File

@ -1589,6 +1589,8 @@ basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb, PyObject *
switch(nextop) {
case POP_JUMP_IF_FALSE:
case POP_JUMP_IF_TRUE:
case JUMP_IF_FALSE:
case JUMP_IF_TRUE:
{
/* Remove LOAD_CONST const; conditional jump */
PyObject* cnt = get_const_value(opcode, oparg, consts);
@ -1600,8 +1602,11 @@ basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb, PyObject *
if (is_true == -1) {
return ERROR;
}
if (PyCompile_OpcodeStackEffect(nextop, 0) == -1) {
/* POP_JUMP_IF_FALSE or POP_JUMP_IF_TRUE */
INSTR_SET_OP0(inst, NOP);
int jump_if_true = nextop == POP_JUMP_IF_TRUE;
}
int jump_if_true = (nextop == POP_JUMP_IF_TRUE || nextop == JUMP_IF_TRUE);
if (is_true == jump_if_true) {
bb->b_instr[i+1].i_opcode = JUMP;
}
@ -1761,6 +1766,36 @@ optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts)
i -= jump_thread(bb, inst, target, POP_JUMP_IF_TRUE);
}
break;
case JUMP_IF_FALSE:
switch (target->i_opcode) {
case JUMP:
case JUMP_IF_FALSE:
i -= jump_thread(bb, inst, target, JUMP_IF_FALSE);
continue;
case JUMP_IF_TRUE:
// No need to check for loops here, a block's b_next
// cannot point to itself.
assert(inst->i_target != inst->i_target->b_next);
inst->i_target = inst->i_target->b_next;
i--;
continue;
}
break;
case JUMP_IF_TRUE:
switch (target->i_opcode) {
case JUMP:
case JUMP_IF_TRUE:
i -= jump_thread(bb, inst, target, JUMP_IF_TRUE);
continue;
case JUMP_IF_FALSE:
// No need to check for loops here, a block's b_next
// cannot point to itself.
assert(inst->i_target != inst->i_target->b_next);
inst->i_target = inst->i_target->b_next;
i--;
continue;
}
break;
case JUMP:
case JUMP_NO_INTERRUPT:
switch (target->i_opcode) {
@ -2367,6 +2402,38 @@ push_cold_blocks_to_end(cfg_builder *g) {
return SUCCESS;
}
static int
convert_pseudo_conditional_jumps(cfg_builder *g)
{
basicblock *entryblock = g->g_entryblock;
for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
for (int i = 0; i < b->b_iused; i++) {
cfg_instr *instr = &b->b_instr[i];
if (instr->i_opcode == JUMP_IF_FALSE || instr->i_opcode == JUMP_IF_TRUE) {
assert(i == b->b_iused - 1);
instr->i_opcode = instr->i_opcode == JUMP_IF_FALSE ?
POP_JUMP_IF_FALSE : POP_JUMP_IF_TRUE;
location loc = instr->i_loc;
cfg_instr copy = {
.i_opcode = COPY,
.i_oparg = 1,
.i_loc = loc,
.i_target = NULL,
};
RETURN_IF_ERROR(basicblock_insert_instruction(b, i++, &copy));
cfg_instr to_bool = {
.i_opcode = TO_BOOL,
.i_oparg = 0,
.i_loc = loc,
.i_target = NULL,
};
RETURN_IF_ERROR(basicblock_insert_instruction(b, i++, &to_bool));
}
}
}
return SUCCESS;
}
static int
convert_pseudo_ops(cfg_builder *g)
{
@ -2826,6 +2893,8 @@ _PyCfg_OptimizedCfgToInstructionSequence(cfg_builder *g,
int *stackdepth, int *nlocalsplus,
_PyInstructionSequence *seq)
{
RETURN_IF_ERROR(convert_pseudo_conditional_jumps(g));
*stackdepth = calculate_stackdepth(g);
if (*stackdepth < 0) {
return ERROR;

View File

@ -248,6 +248,7 @@ class PseudoInstruction:
name: str
stack: StackEffect
targets: list[Instruction]
as_sequence: bool
flags: list[str]
opcode: int = -1
@ -852,6 +853,7 @@ def add_pseudo(
pseudo.name,
analyze_stack(pseudo),
[instructions[target] for target in pseudo.targets],
pseudo.as_sequence,
pseudo.flags,
)

View File

@ -305,6 +305,7 @@ def generate_pseudo_targets(analysis: Analysis, out: CWriter) -> None:
table_size = len(analysis.pseudos)
max_targets = max(len(pseudo.targets) for pseudo in analysis.pseudos.values())
out.emit("struct pseudo_targets {\n")
out.emit(f"uint8_t as_sequence;\n")
out.emit(f"uint8_t targets[{max_targets + 1}];\n")
out.emit("};\n")
out.emit(
@ -315,10 +316,11 @@ def generate_pseudo_targets(analysis: Analysis, out: CWriter) -> None:
f"const struct pseudo_targets _PyOpcode_PseudoTargets[{table_size}] = {{\n"
)
for pseudo in analysis.pseudos.values():
as_sequence = "1" if pseudo.as_sequence else "0"
targets = ["0"] * (max_targets + 1)
for i, target in enumerate(pseudo.targets):
targets[i] = target.name
out.emit(f"[{pseudo.name}-256] = {{ {{ {', '.join(targets)} }} }},\n")
out.emit(f"[{pseudo.name}-256] = {{ {as_sequence}, {{ {', '.join(targets)} }} }},\n")
out.emit("};\n\n")
out.emit("#endif // NEED_OPCODE_METADATA\n")
out.emit("static inline bool\n")

View File

@ -148,6 +148,7 @@ class Pseudo(Node):
outputs: list[OutputEffect]
flags: list[str] # instr flags to set on the pseudo instruction
targets: list[str] # opcodes this can be replaced by
as_sequence: bool
AstNode = InstDef | Macro | Pseudo | Family
@ -423,16 +424,22 @@ class Parser(PLexer):
flags = []
if self.expect(lx.RPAREN):
if self.expect(lx.EQUALS):
if not self.expect(lx.LBRACE):
raise self.make_syntax_error("Expected {")
if members := self.members():
if self.expect(lx.RBRACE) and self.expect(lx.SEMI):
if self.expect(lx.LBRACE):
as_sequence = False
closing = lx.RBRACE
elif self.expect(lx.LBRACKET):
as_sequence = True
closing = lx.RBRACKET
else:
raise self.make_syntax_error("Expected { or [")
if members := self.members(allow_sequence=True):
if self.expect(closing) and self.expect(lx.SEMI):
return Pseudo(
tkn.text, inp, outp, flags, members
tkn.text, inp, outp, flags, members, as_sequence
)
return None
def members(self) -> list[str] | None:
def members(self, allow_sequence : bool=False) -> list[str] | None:
here = self.getpos()
if tkn := self.expect(lx.IDENTIFIER):
members = [tkn.text]
@ -442,8 +449,10 @@ class Parser(PLexer):
else:
break
peek = self.peek()
if not peek or peek.kind != lx.RBRACE:
raise self.make_syntax_error("Expected comma or right paren")
kinds = [lx.RBRACE, lx.RBRACKET] if allow_sequence else [lx.RBRACE]
if not peek or peek.kind not in kinds:
raise self.make_syntax_error(
f"Expected comma or right paren{'/bracket' if allow_sequence else ''}")
return members
self.setpos(here)
return None