From 68d68520061a5a4aa3437f8c74ba383b2da50280 Mon Sep 17 00:00:00 2001 From: Jeffrey Yasskin Date: Sat, 28 Feb 2009 19:03:21 +0000 Subject: [PATCH] Backport r69961 to trunk, replacing JUMP_IF_{TRUE,FALSE} with POP_JUMP_IF_{TRUE,FALSE} and JUMP_IF_{TRUE,FALSE}_OR_POP. This avoids executing a POP_TOP on each conditional and sometimes allows the peephole optimizer to skip a JUMP_ABSOLUTE entirely. It speeds up list comprehensions significantly. --- Doc/library/dis.rst | 22 ++++++--- Include/opcode.h | 9 ++-- Lib/compiler/pyassem.py | 3 ++ Lib/compiler/pycodegen.py | 47 +++++-------------- Lib/opcode.py | 8 ++-- Lib/test/test_dis.py | 32 ++++++------- Lib/test/test_peepholer.py | 11 ++--- Python/ceval.c | 88 ++++++++++++++++++++++++++-------- Python/compile.c | 86 +++++++++++++--------------------- Python/import.c | 4 +- Python/peephole.c | 96 ++++++++++++++++++++++++-------------- 11 files changed, 228 insertions(+), 178 deletions(-) diff --git a/Doc/library/dis.rst b/Doc/library/dis.rst index 3dd528b23df..6b7ed1eed32 100644 --- a/Doc/library/dis.rst +++ b/Doc/library/dis.rst @@ -664,16 +664,26 @@ the more significant byte last. Increments bytecode counter by *delta*. -.. opcode:: JUMP_IF_TRUE (delta) +.. opcode:: POP_JUMP_IF_TRUE (target) - If TOS is true, increment the bytecode counter by *delta*. TOS is left on the - stack. + If TOS is true, sets the bytecode counter to *target*. TOS is popped. -.. opcode:: JUMP_IF_FALSE (delta) +.. opcode:: POP_JUMP_IF_FALSE (target) - If TOS is false, increment the bytecode counter by *delta*. TOS is not - changed. + If TOS is false, sets the bytecode counter to *target*. TOS is popped. + + +.. opcode:: JUMP_IF_TRUE_OR_POP (target) + + If TOS is true, sets the bytecode counter to *target* and leaves TOS + on the stack. Otherwise (TOS is false), TOS is popped. + + +.. opcode:: JUMP_IF_FALSE_OR_POP (target) + + If TOS is false, sets the bytecode counter to *target* and leaves + TOS on the stack. Otherwise (TOS is true), TOS is popped. .. opcode:: JUMP_ABSOLUTE (target) diff --git a/Include/opcode.h b/Include/opcode.h index 5e6fba088c8..36a35dc2fea 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -106,9 +106,12 @@ extern "C" { #define IMPORT_FROM 108 /* Index in name list */ #define JUMP_FORWARD 110 /* Number of bytes to skip */ -#define JUMP_IF_FALSE 111 /* "" */ -#define JUMP_IF_TRUE 112 /* "" */ -#define JUMP_ABSOLUTE 113 /* Target byte offset from beginning of code */ +#define JUMP_IF_FALSE_OR_POP 111 /* Target byte offset from beginning + of code */ +#define JUMP_IF_TRUE_OR_POP 112 /* "" */ +#define JUMP_ABSOLUTE 113 /* "" */ +#define POP_JUMP_IF_FALSE 114 /* "" */ +#define POP_JUMP_IF_TRUE 115 /* "" */ #define LOAD_GLOBAL 116 /* Index in name list */ diff --git a/Lib/compiler/pyassem.py b/Lib/compiler/pyassem.py index 5098f2c1ee9..be0255d6aa2 100644 --- a/Lib/compiler/pyassem.py +++ b/Lib/compiler/pyassem.py @@ -122,6 +122,9 @@ def order_blocks(start_block, exit_block): for b in remaining: if __debug__ and b.next: assert b is b.next[0].prev[0], (b, b.next) + # Make sure every block appears in dominators, even if no + # other block must precede it. + dominators.setdefault(b, set()) # preceeding blocks dominate following blocks for c in b.get_followers(): while 1: diff --git a/Lib/compiler/pycodegen.py b/Lib/compiler/pycodegen.py index 6d5a41c4115..81234d1ff00 100644 --- a/Lib/compiler/pycodegen.py +++ b/Lib/compiler/pycodegen.py @@ -421,13 +421,11 @@ class CodeGenerator: self.set_lineno(test) self.visit(test) nextTest = self.newBlock() - self.emit('JUMP_IF_FALSE', nextTest) + self.emit('POP_JUMP_IF_FALSE', nextTest) self.nextBlock() - self.emit('POP_TOP') self.visit(suite) self.emit('JUMP_FORWARD', end) self.startBlock(nextTest) - self.emit('POP_TOP') if node.else_: self.visit(node.else_) self.nextBlock(end) @@ -446,15 +444,13 @@ class CodeGenerator: self.set_lineno(node, force=True) self.visit(node.test) - self.emit('JUMP_IF_FALSE', else_ or after) + self.emit('POP_JUMP_IF_FALSE', else_ or after) self.nextBlock() - self.emit('POP_TOP') self.visit(node.body) self.emit('JUMP_ABSOLUTE', loop) self.startBlock(else_) # or just the POPs if not else clause - self.emit('POP_TOP') self.emit('POP_BLOCK') self.setups.pop() if node.else_: @@ -525,26 +521,23 @@ class CodeGenerator: self.visit(child) self.emit(jump, end) self.nextBlock() - self.emit('POP_TOP') self.visit(node.nodes[-1]) self.nextBlock(end) def visitAnd(self, node): - self.visitTest(node, 'JUMP_IF_FALSE') + self.visitTest(node, 'JUMP_IF_FALSE_OR_POP') def visitOr(self, node): - self.visitTest(node, 'JUMP_IF_TRUE') + self.visitTest(node, 'JUMP_IF_TRUE_OR_POP') def visitIfExp(self, node): endblock = self.newBlock() elseblock = self.newBlock() self.visit(node.test) - self.emit('JUMP_IF_FALSE', elseblock) - self.emit('POP_TOP') + self.emit('POP_JUMP_IF_FALSE', elseblock) self.visit(node.then) self.emit('JUMP_FORWARD', endblock) self.nextBlock(elseblock) - self.emit('POP_TOP') self.visit(node.else_) self.nextBlock(endblock) @@ -556,9 +549,8 @@ class CodeGenerator: self.emit('DUP_TOP') self.emit('ROT_THREE') self.emit('COMPARE_OP', op) - self.emit('JUMP_IF_FALSE', cleanup) + self.emit('JUMP_IF_FALSE_OR_POP', cleanup) self.nextBlock() - self.emit('POP_TOP') # now do the last comparison if node.ops: op, code = node.ops[-1] @@ -593,11 +585,7 @@ class CodeGenerator: for start, cont, anchor in stack: if cont: - skip_one = self.newBlock() - self.emit('JUMP_FORWARD', skip_one) - self.startBlock(cont) - self.emit('POP_TOP') - self.nextBlock(skip_one) + self.nextBlock(cont) self.emit('JUMP_ABSOLUTE', start) self.startBlock(anchor) @@ -617,9 +605,8 @@ class CodeGenerator: def visitListCompIf(self, node, branch): self.set_lineno(node, force=True) self.visit(node.test) - self.emit('JUMP_IF_FALSE', branch) + self.emit('POP_JUMP_IF_FALSE', branch) self.newBlock() - self.emit('POP_TOP') def _makeClosure(self, gen, args): frees = gen.scope.get_free_vars() @@ -665,11 +652,7 @@ class CodeGenerator: for start, cont, anchor, end in stack: if cont: - skip_one = self.newBlock() - self.emit('JUMP_FORWARD', skip_one) - self.startBlock(cont) - self.emit('POP_TOP') - self.nextBlock(skip_one) + self.nextBlock(cont) self.emit('JUMP_ABSOLUTE', start) self.startBlock(anchor) self.emit('POP_BLOCK') @@ -702,9 +685,8 @@ class CodeGenerator: def visitGenExprIf(self, node, branch): self.set_lineno(node, force=True) self.visit(node.test) - self.emit('JUMP_IF_FALSE', branch) + self.emit('POP_JUMP_IF_FALSE', branch) self.newBlock() - self.emit('POP_TOP') # exception related @@ -719,9 +701,8 @@ class CodeGenerator: # is a sort of renaming op. self.nextBlock() self.visit(node.test) - self.emit('JUMP_IF_TRUE', end) + self.emit('POP_JUMP_IF_TRUE', end) self.nextBlock() - self.emit('POP_TOP') self.emit('LOAD_GLOBAL', 'AssertionError') if node.fail: self.visit(node.fail) @@ -729,7 +710,6 @@ class CodeGenerator: else: self.emit('RAISE_VARARGS', 1) self.nextBlock(end) - self.emit('POP_TOP') def visitRaise(self, node): self.set_lineno(node) @@ -772,9 +752,8 @@ class CodeGenerator: self.visit(expr) self.emit('COMPARE_OP', 'exception match') next = self.newBlock() - self.emit('JUMP_IF_FALSE', next) + self.emit('POP_JUMP_IF_FALSE', next) self.nextBlock() - self.emit('POP_TOP') self.emit('POP_TOP') if target: self.visit(target) @@ -787,8 +766,6 @@ class CodeGenerator: self.nextBlock(next) else: self.nextBlock() - if expr: # XXX - self.emit('POP_TOP') self.emit('END_FINALLY') if node.else_: self.nextBlock(lElse) diff --git a/Lib/opcode.py b/Lib/opcode.py index d41383b2f6f..24d5dcc3feb 100644 --- a/Lib/opcode.py +++ b/Lib/opcode.py @@ -146,9 +146,11 @@ name_op('IMPORT_NAME', 107) # Index in name list name_op('IMPORT_FROM', 108) # Index in name list jrel_op('JUMP_FORWARD', 110) # Number of bytes to skip -jrel_op('JUMP_IF_FALSE', 111) # "" -jrel_op('JUMP_IF_TRUE', 112) # "" -jabs_op('JUMP_ABSOLUTE', 113) # Target byte offset from beginning of code +jabs_op('JUMP_IF_FALSE_OR_POP', 111) # Target byte offset from beginning of code +jabs_op('JUMP_IF_TRUE_OR_POP', 112) # "" +jabs_op('JUMP_ABSOLUTE', 113) # "" +jabs_op('POP_JUMP_IF_FALSE', 114) # "" +jabs_op('POP_JUMP_IF_TRUE', 115) # "" name_op('LOAD_GLOBAL', 116) # Index in name list diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index 6fa437b76e0..613d30a4dde 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -54,25 +54,23 @@ def bug1333982(x=[]): dis_bug1333982 = """\ %-4d 0 LOAD_CONST 1 (0) - 3 JUMP_IF_TRUE 33 (to 39) - 6 POP_TOP - 7 LOAD_GLOBAL 0 (AssertionError) - 10 BUILD_LIST 0 - 13 LOAD_FAST 0 (x) - 16 GET_ITER - >> 17 FOR_ITER 12 (to 32) - 20 STORE_FAST 1 (s) - 23 LOAD_FAST 1 (s) - 26 LIST_APPEND 2 - 29 JUMP_ABSOLUTE 17 + 3 POP_JUMP_IF_TRUE 38 + 6 LOAD_GLOBAL 0 (AssertionError) + 9 BUILD_LIST 0 + 12 LOAD_FAST 0 (x) + 15 GET_ITER + >> 16 FOR_ITER 12 (to 31) + 19 STORE_FAST 1 (s) + 22 LOAD_FAST 1 (s) + 25 LIST_APPEND 2 + 28 JUMP_ABSOLUTE 16 - %-4d >> 32 LOAD_CONST 2 (1) - 35 BINARY_ADD - 36 RAISE_VARARGS 2 - >> 39 POP_TOP + %-4d >> 31 LOAD_CONST 2 (1) + 34 BINARY_ADD + 35 RAISE_VARARGS 2 - %-4d 40 LOAD_CONST 0 (None) - 43 RETURN_VALUE + %-4d >> 38 LOAD_CONST 0 (None) + 41 RETURN_VALUE """%(bug1333982.func_code.co_firstlineno + 1, bug1333982.func_code.co_firstlineno + 2, bug1333982.func_code.co_firstlineno + 3) diff --git a/Lib/test/test_peepholer.py b/Lib/test/test_peepholer.py index c547984daaa..6638a5a4c44 100644 --- a/Lib/test/test_peepholer.py +++ b/Lib/test/test_peepholer.py @@ -19,15 +19,14 @@ def dis_single(line): class TestTranforms(unittest.TestCase): def test_unot(self): - # UNARY_NOT JUMP_IF_FALSE POP_TOP --> JUMP_IF_TRUE POP_TOP' + # UNARY_NOT POP_JUMP_IF_FALSE --> POP_JUMP_IF_TRUE def unot(x): if not x == 2: del x asm = disassemble(unot) - for elem in ('UNARY_NOT', 'JUMP_IF_FALSE'): + for elem in ('UNARY_NOT', 'POP_JUMP_IF_FALSE'): self.assert_(elem not in asm) - for elem in ('JUMP_IF_TRUE', 'POP_TOP'): - self.assert_(elem in asm) + self.assert_('POP_JUMP_IF_TRUE' in asm) def test_elim_inversion_of_is_or_in(self): for line, elem in ( @@ -56,13 +55,13 @@ class TestTranforms(unittest.TestCase): self.assert_('LOAD_GLOBAL' not in disassemble(f)) def test_while_one(self): - # Skip over: LOAD_CONST trueconst JUMP_IF_FALSE xx POP_TOP + # Skip over: LOAD_CONST trueconst POP_JUMP_IF_FALSE xx def f(): while 1: pass return list asm = disassemble(f) - for elem in ('LOAD_CONST', 'JUMP_IF_FALSE'): + for elem in ('LOAD_CONST', 'POP_JUMP_IF_FALSE'): self.assert_(elem not in asm) for elem in ('JUMP_ABSOLUTE',): self.assert_(elem in asm) diff --git a/Python/ceval.c b/Python/ceval.c index 419ecfaaaaa..88b9d0e8882 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -764,8 +764,8 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag) /* OpCode prediction macros Some opcodes tend to come in pairs thus making it possible to predict the second code when the first is run. For example, - COMPARE_OP is often followed by JUMP_IF_FALSE or JUMP_IF_TRUE. And, - those opcodes are often followed by a POP_TOP. + GET_ITER is often followed by FOR_ITER. And FOR_ITER is often + followed by STORE_FAST or UNPACK_SEQUENCE. Verifying the prediction costs a single high-speed test of a register variable against a constant. If the pairing was good, then the @@ -1110,7 +1110,6 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag) SETLOCAL(oparg, v); goto fast_next_opcode; - PREDICTED(POP_TOP); case POP_TOP: v = POP(); Py_DECREF(v); @@ -2220,8 +2219,8 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag) Py_DECREF(w); SET_TOP(x); if (x == NULL) break; - PREDICT(JUMP_IF_FALSE); - PREDICT(JUMP_IF_TRUE); + PREDICT(POP_JUMP_IF_FALSE); + PREDICT(POP_JUMP_IF_TRUE); continue; case IMPORT_NAME: @@ -2298,44 +2297,95 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag) JUMPBY(oparg); goto fast_next_opcode; - PREDICTED_WITH_ARG(JUMP_IF_FALSE); - case JUMP_IF_FALSE: - w = TOP(); + PREDICTED_WITH_ARG(POP_JUMP_IF_FALSE); + case POP_JUMP_IF_FALSE: + w = POP(); if (w == Py_True) { - PREDICT(POP_TOP); + Py_DECREF(w); goto fast_next_opcode; } if (w == Py_False) { - JUMPBY(oparg); + Py_DECREF(w); + JUMPTO(oparg); goto fast_next_opcode; } err = PyObject_IsTrue(w); + Py_DECREF(w); if (err > 0) err = 0; else if (err == 0) - JUMPBY(oparg); + JUMPTO(oparg); else break; continue; - PREDICTED_WITH_ARG(JUMP_IF_TRUE); - case JUMP_IF_TRUE: - w = TOP(); + PREDICTED_WITH_ARG(POP_JUMP_IF_TRUE); + case POP_JUMP_IF_TRUE: + w = POP(); if (w == Py_False) { - PREDICT(POP_TOP); + Py_DECREF(w); goto fast_next_opcode; } if (w == Py_True) { - JUMPBY(oparg); + Py_DECREF(w); + JUMPTO(oparg); + goto fast_next_opcode; + } + err = PyObject_IsTrue(w); + Py_DECREF(w); + if (err > 0) { + err = 0; + JUMPTO(oparg); + } + else if (err == 0) + ; + else + break; + continue; + + case JUMP_IF_FALSE_OR_POP: + w = TOP(); + if (w == Py_True) { + STACKADJ(-1); + Py_DECREF(w); + goto fast_next_opcode; + } + if (w == Py_False) { + JUMPTO(oparg); + goto fast_next_opcode; + } + err = PyObject_IsTrue(w); + if (err > 0) { + STACKADJ(-1); + Py_DECREF(w); + err = 0; + } + else if (err == 0) + JUMPTO(oparg); + else + break; + continue; + + case JUMP_IF_TRUE_OR_POP: + w = TOP(); + if (w == Py_False) { + STACKADJ(-1); + Py_DECREF(w); + goto fast_next_opcode; + } + if (w == Py_True) { + JUMPTO(oparg); goto fast_next_opcode; } err = PyObject_IsTrue(w); if (err > 0) { err = 0; - JUMPBY(oparg); + JUMPTO(oparg); + } + else if (err == 0) { + STACKADJ(-1); + Py_DECREF(w); } - else if (err == 0) - ; else break; continue; diff --git a/Python/compile.c b/Python/compile.c index a252bd15f10..69321ae0c41 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -836,11 +836,15 @@ opcode_stack_effect(int opcode, int oparg) return 1; case JUMP_FORWARD: - case JUMP_IF_FALSE: - case JUMP_IF_TRUE: + case JUMP_IF_TRUE_OR_POP: /* -1 if jump not taken */ + case JUMP_IF_FALSE_OR_POP: /* "" */ case JUMP_ABSOLUTE: return 0; + case POP_JUMP_IF_FALSE: + case POP_JUMP_IF_TRUE: + return -1; + case LOAD_GLOBAL: return 1; @@ -1499,12 +1503,10 @@ compiler_ifexp(struct compiler *c, expr_ty e) if (next == NULL) return 0; VISIT(c, expr, e->v.IfExp.test); - ADDOP_JREL(c, JUMP_IF_FALSE, next); - ADDOP(c, POP_TOP); + ADDOP_JABS(c, POP_JUMP_IF_FALSE, next); VISIT(c, expr, e->v.IfExp.body); ADDOP_JREL(c, JUMP_FORWARD, end); compiler_use_next_block(c, next); - ADDOP(c, POP_TOP); VISIT(c, expr, e->v.IfExp.orelse); compiler_use_next_block(c, end); return 1; @@ -1597,9 +1599,6 @@ compiler_if(struct compiler *c, stmt_ty s) end = compiler_new_block(c); if (end == NULL) return 0; - next = compiler_new_block(c); - if (next == NULL) - return 0; constant = expr_constant(s->v.If.test); /* constant = 0: "if 0" @@ -1611,15 +1610,21 @@ compiler_if(struct compiler *c, stmt_ty s) } else if (constant == 1) { VISIT_SEQ(c, stmt, s->v.If.body); } else { + if (s->v.If.orelse) { + next = compiler_new_block(c); + if (next == NULL) + return 0; + } + else + next = end; VISIT(c, expr, s->v.If.test); - ADDOP_JREL(c, JUMP_IF_FALSE, next); - ADDOP(c, POP_TOP); + ADDOP_JABS(c, POP_JUMP_IF_FALSE, next); VISIT_SEQ(c, stmt, s->v.If.body); ADDOP_JREL(c, JUMP_FORWARD, end); - compiler_use_next_block(c, next); - ADDOP(c, POP_TOP); - if (s->v.If.orelse) + if (s->v.If.orelse) { + compiler_use_next_block(c, next); VISIT_SEQ(c, stmt, s->v.If.orelse); + } } compiler_use_next_block(c, end); return 1; @@ -1693,8 +1698,7 @@ compiler_while(struct compiler *c, stmt_ty s) so we need to set an extra line number. */ c->u->u_lineno_set = false; VISIT(c, expr, s->v.While.test); - ADDOP_JREL(c, JUMP_IF_FALSE, anchor); - ADDOP(c, POP_TOP); + ADDOP_JABS(c, POP_JUMP_IF_FALSE, anchor); } VISIT_SEQ(c, stmt, s->v.While.body); ADDOP_JABS(c, JUMP_ABSOLUTE, loop); @@ -1705,7 +1709,6 @@ compiler_while(struct compiler *c, stmt_ty s) if (constant == -1) { compiler_use_next_block(c, anchor); - ADDOP(c, POP_TOP); ADDOP(c, POP_BLOCK); } compiler_pop_fblock(c, LOOP, loop); @@ -1826,20 +1829,17 @@ compiler_try_finally(struct compiler *c, stmt_ty s) [tb, val, exc] L1: DUP ) [tb, val, exc, exc] ) [tb, val, exc, exc, E1] COMPARE_OP EXC_MATCH ) only if E1 - [tb, val, exc, 1-or-0] JUMP_IF_FALSE L2 ) - [tb, val, exc, 1] POP ) + [tb, val, exc, 1-or-0] POP_JUMP_IF_FALSE L2 ) [tb, val, exc] POP [tb, val] (or POP if no V1) [tb] POP [] JUMP_FORWARD L0 - [tb, val, exc, 0] L2: POP - [tb, val, exc] DUP + [tb, val, exc] L2: DUP .............................etc....................... - [tb, val, exc, 0] Ln+1: POP - [tb, val, exc] END_FINALLY # re-raise exception + [tb, val, exc] Ln+1: END_FINALLY # re-raise exception [] L0: @@ -1881,8 +1881,7 @@ compiler_try_except(struct compiler *c, stmt_ty s) ADDOP(c, DUP_TOP); VISIT(c, expr, handler->v.ExceptHandler.type); ADDOP_I(c, COMPARE_OP, PyCmp_EXC_MATCH); - ADDOP_JREL(c, JUMP_IF_FALSE, except); - ADDOP(c, POP_TOP); + ADDOP_JABS(c, POP_JUMP_IF_FALSE, except); } ADDOP(c, POP_TOP); if (handler->v.ExceptHandler.name) { @@ -1895,8 +1894,6 @@ compiler_try_except(struct compiler *c, stmt_ty s) VISIT_SEQ(c, stmt, handler->v.ExceptHandler.body); ADDOP_JREL(c, JUMP_FORWARD, end); compiler_use_next_block(c, except); - if (handler->v.ExceptHandler.type) - ADDOP(c, POP_TOP); } ADDOP(c, END_FINALLY); compiler_use_next_block(c, orelse); @@ -2084,8 +2081,7 @@ compiler_assert(struct compiler *c, stmt_ty s) end = compiler_new_block(c); if (end == NULL) return 0; - ADDOP_JREL(c, JUMP_IF_TRUE, end); - ADDOP(c, POP_TOP); + ADDOP_JABS(c, POP_JUMP_IF_TRUE, end); ADDOP_O(c, LOAD_GLOBAL, assertion_error, names); if (s->v.Assert.msg) { VISIT(c, expr, s->v.Assert.msg); @@ -2095,7 +2091,6 @@ compiler_assert(struct compiler *c, stmt_ty s) ADDOP_I(c, RAISE_VARARGS, 1); } compiler_use_next_block(c, end); - ADDOP(c, POP_TOP); return 1; } @@ -2473,9 +2468,9 @@ compiler_boolop(struct compiler *c, expr_ty e) assert(e->kind == BoolOp_kind); if (e->v.BoolOp.op == And) - jumpi = JUMP_IF_FALSE; + jumpi = JUMP_IF_FALSE_OR_POP; else - jumpi = JUMP_IF_TRUE; + jumpi = JUMP_IF_TRUE_OR_POP; end = compiler_new_block(c); if (end == NULL) return 0; @@ -2484,8 +2479,7 @@ compiler_boolop(struct compiler *c, expr_ty e) assert(n >= 0); for (i = 0; i < n; ++i) { VISIT(c, expr, (expr_ty)asdl_seq_GET(s, i)); - ADDOP_JREL(c, jumpi, end); - ADDOP(c, POP_TOP) + ADDOP_JABS(c, jumpi, end); } VISIT(c, expr, (expr_ty)asdl_seq_GET(s, n)); compiler_use_next_block(c, end); @@ -2543,9 +2537,8 @@ compiler_compare(struct compiler *c, expr_ty e) ADDOP_I(c, COMPARE_OP, cmpop((cmpop_ty)(asdl_seq_GET( e->v.Compare.ops, i - 1)))); - ADDOP_JREL(c, JUMP_IF_FALSE, cleanup); + ADDOP_JABS(c, JUMP_IF_FALSE_OR_POP, cleanup); NEXT_BLOCK(c); - ADDOP(c, POP_TOP); if (i < (n - 1)) VISIT(c, expr, (expr_ty)asdl_seq_GET(e->v.Compare.comparators, i)); @@ -2636,9 +2629,8 @@ compiler_listcomp_generator(struct compiler *c, asdl_seq *generators, for (i = 0; i < n; i++) { expr_ty e = (expr_ty)asdl_seq_GET(l->ifs, i); VISIT(c, expr, e); - ADDOP_JREL(c, JUMP_IF_FALSE, if_cleanup); + ADDOP_JABS(c, POP_JUMP_IF_FALSE, if_cleanup); NEXT_BLOCK(c); - ADDOP(c, POP_TOP); } if (++gen_index < asdl_seq_LEN(generators)) @@ -2652,12 +2644,7 @@ compiler_listcomp_generator(struct compiler *c, asdl_seq *generators, compiler_use_next_block(c, skip); } - for (i = 0; i < n; i++) { - ADDOP_I(c, JUMP_FORWARD, 1); - if (i == 0) - compiler_use_next_block(c, if_cleanup); - ADDOP(c, POP_TOP); - } + compiler_use_next_block(c, if_cleanup); ADDOP_JABS(c, JUMP_ABSOLUTE, start); compiler_use_next_block(c, anchor); @@ -2720,10 +2707,9 @@ compiler_genexp_generator(struct compiler *c, for (i = 0; i < n; i++) { expr_ty e = (expr_ty)asdl_seq_GET(ge->ifs, i); VISIT(c, expr, e); - ADDOP_JREL(c, JUMP_IF_FALSE, if_cleanup); + ADDOP_JABS(c, POP_JUMP_IF_FALSE, if_cleanup); NEXT_BLOCK(c); - ADDOP(c, POP_TOP); - } + } if (++gen_index < asdl_seq_LEN(generators)) if (!compiler_genexp_generator(c, generators, gen_index, elt)) @@ -2737,13 +2723,7 @@ compiler_genexp_generator(struct compiler *c, compiler_use_next_block(c, skip); } - for (i = 0; i < n; i++) { - ADDOP_I(c, JUMP_FORWARD, 1); - if (i == 0) - compiler_use_next_block(c, if_cleanup); - - ADDOP(c, POP_TOP); - } + compiler_use_next_block(c, if_cleanup); ADDOP_JABS(c, JUMP_ABSOLUTE, start); compiler_use_next_block(c, anchor); ADDOP(c, POP_BLOCK); diff --git a/Python/import.c b/Python/import.c index b00986c6978..98b8dceba43 100644 --- a/Python/import.c +++ b/Python/import.c @@ -72,9 +72,11 @@ typedef unsigned short mode_t; Python 2.6a0: 62151 (peephole optimizations and STORE_MAP opcode) Python 2.6a1: 62161 (WITH_CLEANUP optimization) Python 2.7a0: 62171 (optimize list comprehensions/change LIST_APPEND) + Python 2.7a0: 62181 (optimize conditional branches: + introduce POP_JUMP_IF_FALSE and POP_JUMP_IF_TRUE) . */ -#define MAGIC (62171 | ((long)'\r'<<16) | ((long)'\n'<<24)) +#define MAGIC (62181 | ((long)'\r'<<16) | ((long)'\n'<<24)) /* Magic word as global; note that _PyImport_Init() can change the value of this global to accommodate for alterations of how the diff --git a/Python/peephole.c b/Python/peephole.c index e9da377dc68..a3dda9c8aaf 100644 --- a/Python/peephole.c +++ b/Python/peephole.c @@ -13,7 +13,12 @@ #define GETARG(arr, i) ((int)((arr[i+2]<<8) + arr[i+1])) #define UNCONDITIONAL_JUMP(op) (op==JUMP_ABSOLUTE || op==JUMP_FORWARD) -#define ABSOLUTE_JUMP(op) (op==JUMP_ABSOLUTE || op==CONTINUE_LOOP) +#define CONDITIONAL_JUMP(op) (op==POP_JUMP_IF_FALSE || op==POP_JUMP_IF_TRUE \ + || op==JUMP_IF_FALSE_OR_POP || op==JUMP_IF_TRUE_OR_POP) +#define ABSOLUTE_JUMP(op) (op==JUMP_ABSOLUTE || op==CONTINUE_LOOP \ + || op==POP_JUMP_IF_FALSE || op==POP_JUMP_IF_TRUE \ + || op==JUMP_IF_FALSE_OR_POP || op==JUMP_IF_TRUE_OR_POP) +#define JUMPS_ON_TRUE(op) (op==POP_JUMP_IF_TRUE || op==JUMP_IF_TRUE_OR_POP) #define GETJUMPTGT(arr, i) (GETARG(arr,i) + (ABSOLUTE_JUMP(arr[i]) ? 0 : i+3)) #define SETARG(arr, i, val) arr[i+2] = val>>8; arr[i+1] = val & 255 #define CODESIZE(op) (HAS_ARG(op) ? 3 : 1) @@ -245,8 +250,10 @@ markblocks(unsigned char *code, Py_ssize_t len) switch (opcode) { case FOR_ITER: case JUMP_FORWARD: - case JUMP_IF_FALSE: - case JUMP_IF_TRUE: + case JUMP_IF_FALSE_OR_POP: + case JUMP_IF_TRUE_OR_POP: + case POP_JUMP_IF_FALSE: + case POP_JUMP_IF_TRUE: case JUMP_ABSOLUTE: case CONTINUE_LOOP: case SETUP_LOOP: @@ -338,29 +345,24 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names, assert(PyList_Check(consts)); for (i=0 ; i a is not b not a in b --> a not in b @@ -400,16 +402,16 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names, break; /* Skip over LOAD_CONST trueconst - JUMP_IF_FALSE xx POP_TOP */ + POP_JUMP_IF_FALSE xx. This improves + "while 1" performance. */ case LOAD_CONST: cumlc = lastlc + 1; j = GETARG(codestr, i); - if (codestr[i+3] != JUMP_IF_FALSE || - codestr[i+6] != POP_TOP || - !ISBASICBLOCK(blocks,i,7) || + if (codestr[i+3] != POP_JUMP_IF_FALSE || + !ISBASICBLOCK(blocks,i,6) || !PyObject_IsTrue(PyList_GET_ITEM(consts, j))) continue; - memset(codestr+i, NOP, 7); + memset(codestr+i, NOP, 6); cumlc = 0; break; @@ -498,27 +500,49 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names, "if a or b:" "a and b or c" "(a and b) and c" - x:JUMP_IF_FALSE y y:JUMP_IF_FALSE z --> x:JUMP_IF_FALSE z - x:JUMP_IF_FALSE y y:JUMP_IF_TRUE z --> x:JUMP_IF_FALSE y+3 + x:JUMP_IF_FALSE_OR_POP y y:JUMP_IF_FALSE_OR_POP z + --> x:JUMP_IF_FALSE_OR_POP z + x:JUMP_IF_FALSE_OR_POP y y:JUMP_IF_TRUE_OR_POP z + --> x:POP_JUMP_IF_FALSE y+3 where y+3 is the instruction following the second test. */ - case JUMP_IF_FALSE: - case JUMP_IF_TRUE: + case JUMP_IF_FALSE_OR_POP: + case JUMP_IF_TRUE_OR_POP: tgt = GETJUMPTGT(codestr, i); j = codestr[tgt]; - if (j == JUMP_IF_FALSE || j == JUMP_IF_TRUE) { - if (j == opcode) { - tgttgt = GETJUMPTGT(codestr, tgt) - i - 3; + if (CONDITIONAL_JUMP(j)) { + /* NOTE: all possible jumps here are + absolute! */ + if (JUMPS_ON_TRUE(j) == JUMPS_ON_TRUE(opcode)) { + /* The second jump will be + taken iff the first is. */ + tgttgt = GETJUMPTGT(codestr, tgt); + /* The current opcode inherits + its target's stack behaviour */ + codestr[i] = j; SETARG(codestr, i, tgttgt); + goto reoptimize_current; } else { - tgt -= i; - SETARG(codestr, i, tgt); + /* The second jump is not taken + if the first is (so jump past + it), and all conditional + jumps pop their argument when + they're not taken (so change + the first jump to pop its + argument when it's taken). */ + if (JUMPS_ON_TRUE(opcode)) + codestr[i] = POP_JUMP_IF_TRUE; + else + codestr[i] = POP_JUMP_IF_FALSE; + SETARG(codestr, i, (tgt + 3)); + goto reoptimize_current; } - break; } - /* Intentional fallthrough */ + /* Intentional fallthrough */ /* Replace jumps to unconditional jumps */ + case POP_JUMP_IF_FALSE: + case POP_JUMP_IF_TRUE: case FOR_ITER: case JUMP_FORWARD: case JUMP_ABSOLUTE: @@ -591,14 +615,16 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names, case JUMP_ABSOLUTE: case CONTINUE_LOOP: + case POP_JUMP_IF_FALSE: + case POP_JUMP_IF_TRUE: + case JUMP_IF_FALSE_OR_POP: + case JUMP_IF_TRUE_OR_POP: j = addrmap[GETARG(codestr, i)]; SETARG(codestr, i, j); break; case FOR_ITER: case JUMP_FORWARD: - case JUMP_IF_FALSE: - case JUMP_IF_TRUE: case SETUP_LOOP: case SETUP_EXCEPT: case SETUP_FINALLY: