Improved the bytecode optimizer.

* Can now test for basic blocks.
* Optimize inverted comparisions.
* Optimize unary_not followed by a conditional jump.
* Added a new opcode, NOP, to keep code size constant.
* Applied NOP to previous transformations where appropriate.

Note, the NOP would not be necessary if other functions were
added to re-target jump addresses and update the co_lnotab mapping.
That would yield slightly faster and cleaner bytecode at the
expense of optimizer simplicity and of keeping it decoupled
from the line-numbering structure.
This commit is contained in:
Raymond Hettinger 2003-04-22 06:49:11 +00:00
parent 0c83348d5c
commit 060641d511
5 changed files with 97 additions and 9 deletions

View File

@ -14,6 +14,8 @@ extern "C" {
#define DUP_TOP 4
#define ROT_FOUR 5
#define NOP 9
#define UNARY_POSITIVE 10
#define UNARY_NEGATIVE 11
#define UNARY_NOT 12

View File

@ -49,6 +49,8 @@ def_op('ROT_THREE', 3)
def_op('DUP_TOP', 4)
def_op('ROT_FOUR', 5)
def_op('NOP', 9)
def_op('UNARY_POSITIVE', 10)
def_op('UNARY_NEGATIVE', 11)
def_op('UNARY_NOT', 12)

View File

@ -294,6 +294,13 @@ Core and builtins
value, but according to PEP 237 it really needs to be 1 now. This
will be backported to Python 2.2.3 a well. (SF #660455)
- Added several bytecode optimizations. Provides speed-ups to
inverted in/is tests, inverted jumps, while 1 loops, and jumps to
unconditional jumps.
- Added a new opcode, NOP, which is used in some of the bytecode
transformations.
- int(s, base) sometimes sign-folds hex and oct constants; it only
does this when base is 0 and s.strip() starts with a '0'. When the
sign is actually folded, as in int("0xffffffff", 0) on a 32-bit

View File

@ -873,6 +873,9 @@ eval_frame(PyFrameObject *f)
/* case STOP_CODE: this is an error! */
case NOP:
goto fast_next_opcode;
case LOAD_FAST:
x = GETLOCAL(oparg);
if (x != NULL) {

View File

@ -328,6 +328,43 @@ intern_strings(PyObject *tuple)
#define ABSOLUTE_JUMP(op) (op==JUMP_ABSOLUTE || op==CONTINUE_LOOP)
#define GETJUMPTGT(arr, i) (GETARG(arr,i) + (ABSOLUTE_JUMP(arr[i]) ? 0 : i+3))
#define SETARG(arr, i, val) arr[i+2] = val>>8; arr[i+1] = val & 255
#define CODESIZE(op) (HAS_ARG(op) ? 3 : 1)
#define ISBASICBLOCK(blocks, start, bytes) (blocks[start]==blocks[start+bytes-1])
static unsigned int *
markblocks(unsigned char *code, int len)
{
unsigned int *blocks = PyMem_Malloc(len*sizeof(int));
int i,j, opcode, oldblock, newblock, blockcnt = 0;
if (blocks == NULL)
return NULL;
memset(blocks, 0, len*sizeof(int));
for (i=0 ; i<len ; i+=CODESIZE(opcode)) {
opcode = code[i];
switch (opcode) {
case FOR_ITER:
case JUMP_FORWARD:
case JUMP_IF_FALSE:
case JUMP_IF_TRUE:
case JUMP_ABSOLUTE:
case CONTINUE_LOOP:
case SETUP_LOOP:
case SETUP_EXCEPT:
case SETUP_FINALLY:
j = GETJUMPTGT(code, i);
oldblock = blocks[j];
newblock = ++blockcnt;
for (; j<len ; j++) {
if (blocks[j] != (unsigned)oldblock)
break;
blocks[j] = newblock;
}
break;
}
}
return blocks;
}
static PyObject *
optimize_code(PyObject *code, PyObject* consts)
@ -335,18 +372,24 @@ optimize_code(PyObject *code, PyObject* consts)
int i, j, codelen;
int tgt, tgttgt, opcode;
unsigned char *codestr;
unsigned int *blocks;
/* Make a modifiable copy of the code string */
if (!PyString_Check(code))
goto exitUnchanged;
codelen = PyString_Size(code);
codestr = PyMem_Malloc(codelen);
if (codestr == NULL)
if (codestr == NULL)
goto exitUnchanged;
codestr = memcpy(codestr, PyString_AS_STRING(code), codelen);
blocks = markblocks(codestr, codelen);
if (blocks == NULL) {
PyMem_Free(codestr);
goto exitUnchanged;
}
assert(PyTuple_Check(consts));
for (i=0 ; i<codelen-7 ; i += HAS_ARG(codestr[i]) ? 3 : 1) {
for (i=0 ; i<codelen ; i += CODESIZE(codestr[i])) {
opcode = codestr[i];
switch (opcode) {
@ -363,8 +406,8 @@ optimize_code(PyObject *code, PyObject* consts)
SETARG(codestr, i, 4);
break;
/* Replace BUILD_SEQN 2 UNPACK_SEQN 2 with ROT2 JMP+2.
Replace BUILD_SEQN 3 UNPACK_SEQN 3 with ROT3 ROT2 JMP+1.
/* Replace BUILD_SEQN 2 UNPACK_SEQN 2 with ROT2 JMP+2 NOP NOP.
Replace BUILD_SEQN 3 UNPACK_SEQN 3 with ROT3 ROT2 JMP+1 NOP.
Note, these opcodes occur together only in assignment
statements. Accordingly, the unpack opcode is never
a jump target. */
@ -377,8 +420,8 @@ optimize_code(PyObject *code, PyObject* consts)
codestr[i] = ROT_TWO;
codestr[i+1] = JUMP_FORWARD;
SETARG(codestr, i+1, 2);
codestr[i+4] = DUP_TOP; /* Filler codes used as NOPs */
codestr[i+5] = POP_TOP;
codestr[i+4] = NOP;
codestr[i+5] = NOP;
continue;
}
if (GETARG(codestr, i) == 3 && \
@ -386,11 +429,41 @@ optimize_code(PyObject *code, PyObject* consts)
codestr[i] = ROT_THREE;
codestr[i+1] = ROT_TWO;
codestr[i+2] = JUMP_FORWARD;
SETARG(codestr, i+2, 1);
codestr[i+5] = DUP_TOP;
SETARG(codestr, i+2, 1);
codestr[i+5] = NOP;
}
break;
/* Simplify inverted tests.
Must verify that sequence is a basic block because the jump
can itself be a jump target. Also, must verify that *both*
jump alternatives go to a POP_TOP. Otherwise, the code will
expect the stack value to have been inverted. */
case UNARY_NOT:
if (codestr[i+1] != JUMP_IF_FALSE || \
codestr[i+4] != POP_TOP || \
!ISBASICBLOCK(blocks,i,5))
continue;
tgt = GETJUMPTGT(codestr, (i+1));
if (codestr[tgt] != POP_TOP)
continue;
codestr[i] = NOP;
codestr[i+1] = JUMP_IF_TRUE;
break;
/* not a is b --> a is not b
not a in b --> a not in b
not a is not b --> a is b
not a not in b --> a in b */
case COMPARE_OP:
j = GETARG(codestr, i);
if (codestr[i+3] != UNARY_NOT || j < 6 || \
j > 9 || !ISBASICBLOCK(blocks,i,4))
continue;
SETARG(codestr, i, (j^1));
codestr[i+3] = NOP;
break;
/* Replace jumps to unconditional jumps */
case FOR_ITER:
case JUMP_FORWARD:
@ -402,7 +475,7 @@ optimize_code(PyObject *code, PyObject* consts)
case SETUP_EXCEPT:
case SETUP_FINALLY:
tgt = GETJUMPTGT(codestr, i);
if (!UNCONDITIONAL_JUMP(codestr[tgt]))
if (!UNCONDITIONAL_JUMP(codestr[tgt]))
continue;
tgttgt = GETJUMPTGT(codestr, tgt);
if (opcode == JUMP_FORWARD) /* JMP_ABS can go backwards */
@ -422,6 +495,7 @@ optimize_code(PyObject *code, PyObject* consts)
}
code = PyString_FromStringAndSize(codestr, codelen);
PyMem_Free(codestr);
PyMem_Free(blocks);
return code;
exitUnchanged: