Issue #11244: The peephole optimizer is now able to constant-fold

arbitrarily complex expressions.  This also fixes a 3.2 regression where
operations involving negative numbers were not constant-folded.
This commit is contained in:
Antoine Pitrou 2011-03-11 17:27:02 +01:00
parent 336c729563
commit 17b880a5d6
3 changed files with 153 additions and 49 deletions

View File

@ -8,8 +8,10 @@ def disassemble(func):
f = StringIO()
tmp = sys.stdout
sys.stdout = f
dis.dis(func)
sys.stdout = tmp
try:
dis.dis(func)
finally:
sys.stdout = tmp
result = f.getvalue()
f.close()
return result
@ -99,6 +101,12 @@ class TestTranforms(unittest.TestCase):
self.assertIn(elem, asm)
self.assertNotIn('BUILD_TUPLE', asm)
# Long tuples should be folded too.
asm = dis_single(repr(tuple(range(10000))))
# One LOAD_CONST for the tuple, one for the None return value
self.assertEqual(asm.count('LOAD_CONST'), 2)
self.assertNotIn('BUILD_TUPLE', asm)
# Bug 1053819: Tuple of constants misidentified when presented with:
# . . . opcode_with_arg 100 unary_opcode BUILD_TUPLE 1 . . .
# The following would segfault upon compilation
@ -267,6 +275,25 @@ class TestTranforms(unittest.TestCase):
asm = disassemble(f)
self.assertNotIn('BINARY_ADD', asm)
def test_constant_folding(self):
# Issue #11244: aggressive constant folding.
exprs = [
"3 * -5",
"-3 * 5",
"2 * (3 * 4)",
"(2 * 3) * 4",
"(-1, 2, 3)",
"(1, -2, 3)",
"(1, 2, -3)",
"(1, 2, -3) * 6",
"lambda x: x in {(3 * -5) + (-1 - 6), (1, -2, 3) * 2, None}",
]
for e in exprs:
asm = dis_single(e)
self.assertNotIn('UNARY_', asm, e)
self.assertNotIn('BINARY_', asm, e)
self.assertNotIn('BUILD_', asm, e)
def test_main(verbose=None):
import sys

View File

@ -10,6 +10,10 @@ What's New in Python 3.3 Alpha 1?
Core and Builtins
-----------------
- Issue #11244: The peephole optimizer is now able to constant-fold
arbitrarily complex expressions. This also fixes a 3.2 regression where
operations involving negative numbers were not constant-folded.
- Issue #11450: Don't truncate hg version info in Py_GetBuildInfo() when
there are many tags (e.g. when using mq). Patch by Nadeem Vawda.

View File

@ -23,6 +23,64 @@
#define ISBASICBLOCK(blocks, start, bytes) \
(blocks[start]==blocks[start+bytes-1])
#define CONST_STACK_CREATE() { \
const_stack_size = 256; \
const_stack = PyMem_New(PyObject *, const_stack_size); \
load_const_stack = PyMem_New(Py_ssize_t, const_stack_size); \
if (!const_stack || !load_const_stack) { \
PyErr_NoMemory(); \
goto exitError; \
} \
}
#define CONST_STACK_DELETE() do { \
if (const_stack) \
PyMem_Free(const_stack); \
if (load_const_stack) \
PyMem_Free(load_const_stack); \
} while(0)
#define CONST_STACK_LEN() (const_stack_top + 1)
#define CONST_STACK_PUSH_OP(i) do { \
PyObject *_x; \
assert(codestr[i] == LOAD_CONST); \
assert(PyList_GET_SIZE(consts) > GETARG(codestr, i)); \
_x = PyList_GET_ITEM(consts, GETARG(codestr, i)); \
if (++const_stack_top >= const_stack_size) { \
const_stack_size *= 2; \
PyMem_Resize(const_stack, PyObject *, const_stack_size); \
PyMem_Resize(load_const_stack, Py_ssize_t, const_stack_size); \
if (!const_stack || !load_const_stack) { \
PyErr_NoMemory(); \
goto exitError; \
} \
} \
load_const_stack[const_stack_top] = i; \
const_stack[const_stack_top] = _x; \
in_consts = 1; \
} while(0)
#define CONST_STACK_RESET() do { \
const_stack_top = -1; \
} while(0)
#define CONST_STACK_TOP(x) \
const_stack[const_stack_top]
#define CONST_STACK_LASTN(i) \
&const_stack[const_stack_top - i + 1]
#define CONST_STACK_POP(i) do { \
assert(const_stack_top + 1 >= i); \
const_stack_top -= i; \
} while(0)
#define CONST_STACK_OP_LASTN(i) \
((const_stack_top >= i - 1) ? load_const_stack[const_stack_top - i + 1] : -1)
/* Replace LOAD_CONST c1. LOAD_CONST c2 ... LOAD_CONST cn BUILD_TUPLE n
with LOAD_CONST (c1, c2, ... cn).
The consts table must still be in list form so that the
@ -33,17 +91,14 @@
test; for BUILD_SET it assembles a frozenset rather than a tuple.
*/
static int
tuple_of_constants(unsigned char *codestr, Py_ssize_t n, PyObject *consts)
tuple_of_constants(unsigned char *codestr, Py_ssize_t n,
PyObject *consts, PyObject **objs)
{
PyObject *newconst, *constant;
Py_ssize_t i, arg, len_consts;
Py_ssize_t i, len_consts;
/* Pre-conditions */
assert(PyList_CheckExact(consts));
assert(codestr[n*3] == BUILD_TUPLE || codestr[n*3] == BUILD_LIST || codestr[n*3] == BUILD_SET);
assert(GETARG(codestr, (n*3)) == n);
for (i=0 ; i<n ; i++)
assert(codestr[i*3] == LOAD_CONST);
/* Buildup new tuple of constants */
newconst = PyTuple_New(n);
@ -51,16 +106,14 @@ tuple_of_constants(unsigned char *codestr, Py_ssize_t n, PyObject *consts)
return 0;
len_consts = PyList_GET_SIZE(consts);
for (i=0 ; i<n ; i++) {
arg = GETARG(codestr, (i*3));
assert(arg < len_consts);
constant = PyList_GET_ITEM(consts, arg);
constant = objs[i];
Py_INCREF(constant);
PyTuple_SET_ITEM(newconst, i, constant);
}
/* If it's a BUILD_SET, use the PyTuple we just built to create a
PyFrozenSet, and use that as the constant instead: */
if (codestr[n*3] == BUILD_SET) {
if (codestr[0] == BUILD_SET) {
PyObject *tuple = newconst;
newconst = PyFrozenSet_New(tuple);
Py_DECREF(tuple);
@ -77,9 +130,8 @@ tuple_of_constants(unsigned char *codestr, Py_ssize_t n, PyObject *consts)
/* Write NOPs over old LOAD_CONSTS and
add a new LOAD_CONST newconst on top of the BUILD_TUPLE n */
memset(codestr, NOP, n*3);
codestr[n*3] = LOAD_CONST;
SETARG(codestr, (n*3), len_consts);
codestr[0] = LOAD_CONST;
SETARG(codestr, 0, len_consts);
return 1;
}
@ -87,14 +139,14 @@ tuple_of_constants(unsigned char *codestr, Py_ssize_t n, PyObject *consts)
with LOAD_CONST binop(c1,c2)
The consts table must still be in list form so that the
new constant can be appended.
Called with codestr pointing to the first LOAD_CONST.
Called with codestr pointing to the BINOP.
Abandons the transformation if the folding fails (i.e. 1+'a').
If the new constant is a sequence, only folds when the size
is below a threshold value. That keeps pyc files from
becoming large in the presence of code like: (None,)*1000.
*/
static int
fold_binops_on_constants(unsigned char *codestr, PyObject *consts)
fold_binops_on_constants(unsigned char *codestr, PyObject *consts, PyObject **objs)
{
PyObject *newconst, *v, *w;
Py_ssize_t len_consts, size;
@ -102,13 +154,11 @@ fold_binops_on_constants(unsigned char *codestr, PyObject *consts)
/* Pre-conditions */
assert(PyList_CheckExact(consts));
assert(codestr[0] == LOAD_CONST);
assert(codestr[3] == LOAD_CONST);
/* Create new constant */
v = PyList_GET_ITEM(consts, GETARG(codestr, 0));
w = PyList_GET_ITEM(consts, GETARG(codestr, 3));
opcode = codestr[6];
v = objs[0];
w = objs[1];
opcode = codestr[0];
switch (opcode) {
case BINARY_POWER:
newconst = PyNumber_Power(v, w, Py_None);
@ -180,16 +230,15 @@ fold_binops_on_constants(unsigned char *codestr, PyObject *consts)
Py_DECREF(newconst);
/* Write NOP NOP NOP NOP LOAD_CONST newconst */
memset(codestr, NOP, 4);
codestr[4] = LOAD_CONST;
SETARG(codestr, 4, len_consts);
codestr[-2] = LOAD_CONST;
SETARG(codestr, -2, len_consts);
return 1;
}
static int
fold_unaryops_on_constants(unsigned char *codestr, PyObject *consts)
fold_unaryops_on_constants(unsigned char *codestr, PyObject *consts, PyObject *v)
{
PyObject *newconst=NULL, *v;
PyObject *newconst=NULL/*, *v*/;
Py_ssize_t len_consts;
int opcode;
@ -198,7 +247,6 @@ fold_unaryops_on_constants(unsigned char *codestr, PyObject *consts)
assert(codestr[0] == LOAD_CONST);
/* Create new constant */
v = PyList_GET_ITEM(consts, GETARG(codestr, 0));
opcode = codestr[3];
switch (opcode) {
case UNARY_NEGATIVE:
@ -340,7 +388,11 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,
unsigned char *lineno;
int *addrmap = NULL;
int new_line, cum_orig_line, last_line, tabsiz;
int cumlc=0, lastlc=0; /* Count runs of consecutive LOAD_CONSTs */
PyObject **const_stack = NULL;
Py_ssize_t *load_const_stack = NULL;
Py_ssize_t const_stack_top = -1;
Py_ssize_t const_stack_size = 0;
int in_consts = 0; /* whether we are in a LOAD_CONST sequence */
unsigned int *blocks = NULL;
char *name;
@ -386,12 +438,16 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,
goto exitError;
assert(PyList_Check(consts));
CONST_STACK_CREATE();
for (i=0 ; i<codelen ; i += CODESIZE(codestr[i])) {
reoptimize_current:
opcode = codestr[i];
lastlc = cumlc;
cumlc = 0;
if (!in_consts) {
CONST_STACK_RESET();
}
in_consts = 0;
switch (opcode) {
/* Replace UNARY_NOT POP_JUMP_IF_FALSE
@ -432,21 +488,21 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,
goto exitError;
else if (h == 0)
continue;
cumlc = lastlc + 1;
CONST_STACK_PUSH_OP(i);
break;
/* Skip over LOAD_CONST trueconst
POP_JUMP_IF_FALSE xx. This improves
"while 1" performance. */
case LOAD_CONST:
cumlc = lastlc + 1;
CONST_STACK_PUSH_OP(i);
j = GETARG(codestr, i);
if (codestr[i+3] != POP_JUMP_IF_FALSE ||
!ISBASICBLOCK(blocks,i,6) ||
!PyObject_IsTrue(PyList_GET_ITEM(consts, j)))
continue;
memset(codestr+i, NOP, 6);
cumlc = 0;
CONST_STACK_RESET();
break;
/* Try to fold tuples of constants (includes a case for lists and sets
@ -458,19 +514,23 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,
case BUILD_LIST:
case BUILD_SET:
j = GETARG(codestr, i);
h = i - 3 * j;
if (h >= 0 &&
j <= lastlc &&
if (j == 0)
break;
h = CONST_STACK_OP_LASTN(j);
assert((h >= 0 || CONST_STACK_LEN() < j));
if (h >= 0 && j > 0 && j <= CONST_STACK_LEN() &&
((opcode == BUILD_TUPLE &&
ISBASICBLOCK(blocks, h, 3*(j+1))) ||
ISBASICBLOCK(blocks, h, i-h+3)) ||
((opcode == BUILD_LIST || opcode == BUILD_SET) &&
codestr[i+3]==COMPARE_OP &&
ISBASICBLOCK(blocks, h, 3*(j+2)) &&
ISBASICBLOCK(blocks, h, i-h+6) &&
(GETARG(codestr,i+3)==6 ||
GETARG(codestr,i+3)==7))) &&
tuple_of_constants(&codestr[h], j, consts)) {
tuple_of_constants(&codestr[i], j, consts, CONST_STACK_LASTN(j))) {
assert(codestr[i] == LOAD_CONST);
cumlc = 1;
memset(&codestr[h], NOP, i - h);
CONST_STACK_POP(j);
CONST_STACK_PUSH_OP(i);
break;
}
if (codestr[i+3] != UNPACK_SEQUENCE ||
@ -482,10 +542,12 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,
} else if (j == 2) {
codestr[i] = ROT_TWO;
memset(codestr+i+1, NOP, 5);
CONST_STACK_RESET();
} else if (j == 3) {
codestr[i] = ROT_THREE;
codestr[i+1] = ROT_TWO;
memset(codestr+i+2, NOP, 4);
CONST_STACK_RESET();
}
break;
@ -504,12 +566,18 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,
case BINARY_AND:
case BINARY_XOR:
case BINARY_OR:
if (lastlc >= 2 &&
ISBASICBLOCK(blocks, i-6, 7) &&
fold_binops_on_constants(&codestr[i-6], consts)) {
/* NOTE: LOAD_CONST is saved at `i-2` since it has an arg
while BINOP hasn't */
h = CONST_STACK_OP_LASTN(2);
assert((h >= 0 || CONST_STACK_LEN() < 2));
if (h >= 0 &&
ISBASICBLOCK(blocks, h, i-h+1) &&
fold_binops_on_constants(&codestr[i], consts, CONST_STACK_LASTN(2))) {
i -= 2;
memset(&codestr[h], NOP, i - h);
assert(codestr[i] == LOAD_CONST);
cumlc = 1;
CONST_STACK_POP(2);
CONST_STACK_PUSH_OP(i);
}
break;
@ -518,12 +586,15 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,
case UNARY_NEGATIVE:
case UNARY_INVERT:
case UNARY_POSITIVE:
if (lastlc >= 1 &&
ISBASICBLOCK(blocks, i-3, 4) &&
fold_unaryops_on_constants(&codestr[i-3], consts)) {
h = CONST_STACK_OP_LASTN(1);
assert((h >= 0 || CONST_STACK_LEN() < 1));
if (h >= 0 &&
ISBASICBLOCK(blocks, h, i-h+1) &&
fold_unaryops_on_constants(&codestr[i-3], consts, CONST_STACK_TOP())) {
i -= 2;
assert(codestr[i] == LOAD_CONST);
cumlc = 1;
CONST_STACK_POP(1);
CONST_STACK_PUSH_OP(i);
}
break;
@ -680,6 +751,7 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,
assert(h + nops == codelen);
code = PyBytes_FromStringAndSize((char *)codestr, h);
CONST_STACK_DELETE();
PyMem_Free(addrmap);
PyMem_Free(codestr);
PyMem_Free(blocks);
@ -689,6 +761,7 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,
code = NULL;
exitUnchanged:
CONST_STACK_DELETE();
if (blocks != NULL)
PyMem_Free(blocks);
if (addrmap != NULL)