bpo-40228: More robust frame.setlineno. (GH-19437)

More robust frame.setlineno. Makes no assumptions about source->bytecode translation.
This commit is contained in:
Mark Shannon 2020-04-29 16:49:45 +01:00 committed by GitHub
parent ec9bea4a37
commit 57697245e1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 278 additions and 339 deletions

View File

@ -948,8 +948,8 @@ class JumpTestCase(unittest.TestCase):
output.append(11) output.append(11)
output.append(12) output.append(12)
@jump_test(5, 11, [2, 4, 12]) @jump_test(5, 11, [2, 4], (ValueError, 'unreachable'))
def test_jump_over_return_try_finally_in_finally_block(output): def test_no_jump_over_return_try_finally_in_finally_block(output):
try: try:
output.append(2) output.append(2)
finally: finally:
@ -963,8 +963,8 @@ class JumpTestCase(unittest.TestCase):
pass pass
output.append(12) output.append(12)
@jump_test(3, 4, [1, 4]) @jump_test(3, 4, [1], (ValueError, 'unreachable'))
def test_jump_infinite_while_loop(output): def test_no_jump_infinite_while_loop(output):
output.append(1) output.append(1)
while True: while True:
output.append(3) output.append(3)
@ -1357,16 +1357,16 @@ class JumpTestCase(unittest.TestCase):
output.append(7) output.append(7)
output.append(8) output.append(8)
@jump_test(1, 5, [], (ValueError, "into a 'finally'")) @jump_test(1, 5, [5])
def test_no_jump_into_finally_block(output): def test_jump_into_finally_block(output):
output.append(1) output.append(1)
try: try:
output.append(3) output.append(3)
finally: finally:
output.append(5) output.append(5)
@jump_test(3, 6, [2, 5, 6], (ValueError, "into a 'finally'")) @jump_test(3, 6, [2, 6, 7])
def test_no_jump_into_finally_block_from_try_block(output): def test_jump_into_finally_block_from_try_block(output):
try: try:
output.append(2) output.append(2)
output.append(3) output.append(3)
@ -1375,8 +1375,8 @@ class JumpTestCase(unittest.TestCase):
output.append(6) output.append(6)
output.append(7) output.append(7)
@jump_test(5, 1, [1, 3], (ValueError, "out of a 'finally'")) @jump_test(5, 1, [1, 3, 1, 3, 5])
def test_no_jump_out_of_finally_block(output): def test_jump_out_of_finally_block(output):
output.append(1) output.append(1)
try: try:
output.append(3) output.append(3)
@ -1441,23 +1441,23 @@ class JumpTestCase(unittest.TestCase):
output.append(6) output.append(6)
output.append(7) output.append(7)
@jump_test(3, 5, [1, 2, -2], (ValueError, 'into')) @jump_test(3, 5, [1, 2, 5, -2])
def test_no_jump_between_with_blocks(output): def test_jump_between_with_blocks(output):
output.append(1) output.append(1)
with tracecontext(output, 2): with tracecontext(output, 2):
output.append(3) output.append(3)
with tracecontext(output, 4): with tracecontext(output, 4):
output.append(5) output.append(5)
@async_jump_test(3, 5, [1, 2, -2], (ValueError, 'into')) @async_jump_test(3, 5, [1, 2, 5, -2])
async def test_no_jump_between_async_with_blocks(output): async def test_jump_between_async_with_blocks(output):
output.append(1) output.append(1)
async with asynctracecontext(output, 2): async with asynctracecontext(output, 2):
output.append(3) output.append(3)
async with asynctracecontext(output, 4): async with asynctracecontext(output, 4):
output.append(5) output.append(5)
@jump_test(5, 7, [2, 4], (ValueError, 'finally')) @jump_test(5, 7, [2, 4], (ValueError, "unreachable"))
def test_no_jump_over_return_out_of_finally_block(output): def test_no_jump_over_return_out_of_finally_block(output):
try: try:
output.append(2) output.append(2)
@ -1551,9 +1551,8 @@ output.append(4)
output.append(1) output.append(1)
1 / 0 1 / 0
@jump_test(3, 2, [2], event='return', error=(ValueError, @jump_test(3, 2, [2, 5], event='return')
"can't jump from a 'yield' statement")) def test_jump_from_yield(output):
def test_no_jump_from_yield(output):
def gen(): def gen():
output.append(2) output.append(2)
yield 3 yield 3

View File

@ -0,0 +1 @@
Setting frame.f_lineno is now robust w.r.t. changes in the source-to-bytecode compiler

View File

@ -69,98 +69,218 @@ get_arg(const _Py_CODEUNIT *codestr, Py_ssize_t i)
return oparg; return oparg;
} }
typedef struct _codetracker { typedef enum kind {
unsigned char *code; With = 1,
Py_ssize_t code_len; Loop = 2,
unsigned char *lnotab; Try = 3,
Py_ssize_t lnotab_len; Except = 4,
int start_line; } Kind;
int offset;
int line;
int addr;
int line_addr;
} codetracker;
/* Reset the mutable parts of the tracker */ #define BITS_PER_BLOCK 3
static void
reset(codetracker *tracker) static inline int64_t
push_block(int64_t stack, Kind kind)
{ {
tracker->offset = 0; assert(stack < ((int64_t)1)<<(BITS_PER_BLOCK*CO_MAXBLOCKS));
tracker->addr = 0; return (stack << BITS_PER_BLOCK) | kind;
tracker->line_addr = 0;
tracker->line = tracker->start_line;
} }
/* Initialise the tracker */ static inline int64_t
static void pop_block(int64_t stack)
init_codetracker(codetracker *tracker, PyCodeObject *code_obj)
{ {
PyBytes_AsStringAndSize(code_obj->co_code, assert(stack > 0);
(char **)&tracker->code, &tracker->code_len); return stack >> BITS_PER_BLOCK;
PyBytes_AsStringAndSize(code_obj->co_lnotab,
(char **)&tracker->lnotab, &tracker->lnotab_len);
tracker->start_line = code_obj->co_firstlineno;
reset(tracker);
} }
static void static inline Kind
advance_tracker(codetracker *tracker) top_block(int64_t stack)
{ {
tracker->addr += sizeof(_Py_CODEUNIT); return stack & ((1<<BITS_PER_BLOCK)-1);
if (tracker->offset >= tracker->lnotab_len) {
return;
}
while (tracker->offset < tracker->lnotab_len &&
tracker->addr >= tracker->line_addr + tracker->lnotab[tracker->offset]) {
tracker->line_addr += tracker->lnotab[tracker->offset];
tracker->line += (signed char)tracker->lnotab[tracker->offset+1];
tracker->offset += 2;
}
} }
static int64_t *
static void markblocks(PyCodeObject *code_obj, int len)
retreat_tracker(codetracker *tracker)
{ {
tracker->addr -= sizeof(_Py_CODEUNIT); const _Py_CODEUNIT *code =
while (tracker->addr < tracker->line_addr) { (const _Py_CODEUNIT *)PyBytes_AS_STRING(code_obj->co_code);
tracker->offset -= 2; int64_t *blocks = PyMem_New(int64_t, len+1);
tracker->line_addr -= tracker->lnotab[tracker->offset]; int i, j, opcode;
tracker->line -= (signed char)tracker->lnotab[tracker->offset+1];
if (blocks == NULL) {
PyErr_NoMemory();
return NULL;
} }
memset(blocks, -1, (len+1)*sizeof(int64_t));
blocks[0] = 0;
int todo = 1;
while (todo) {
todo = 0;
for (i = 0; i < len; i++) {
int64_t block_stack = blocks[i];
int64_t except_stack;
if (block_stack == -1) {
continue;
}
opcode = _Py_OPCODE(code[i]);
switch (opcode) {
case JUMP_IF_FALSE_OR_POP:
case JUMP_IF_TRUE_OR_POP:
case POP_JUMP_IF_FALSE:
case POP_JUMP_IF_TRUE:
case JUMP_IF_NOT_EXC_MATCH:
j = get_arg(code, i) / sizeof(_Py_CODEUNIT);
assert(j < len);
if (blocks[j] == -1 && j < i) {
todo = 1;
}
assert(blocks[j] == -1 || blocks[j] == block_stack);
blocks[j] = block_stack;
blocks[i+1] = block_stack;
break;
case JUMP_ABSOLUTE:
j = get_arg(code, i) / sizeof(_Py_CODEUNIT);
assert(j < len);
if (blocks[j] == -1 && j < i) {
todo = 1;
}
assert(blocks[j] == -1 || blocks[j] == block_stack);
blocks[j] = block_stack;
break;
case SETUP_FINALLY:
j = get_arg(code, i) / sizeof(_Py_CODEUNIT) + i + 1;
assert(j < len);
except_stack = push_block(block_stack, Except);
assert(blocks[j] == -1 || blocks[j] == except_stack);
blocks[j] = except_stack;
block_stack = push_block(block_stack, Try);
blocks[i+1] = block_stack;
break;
case SETUP_WITH:
case SETUP_ASYNC_WITH:
j = get_arg(code, i) / sizeof(_Py_CODEUNIT) + i + 1;
assert(j < len);
except_stack = push_block(block_stack, Except);
assert(blocks[j] == -1 || blocks[j] == except_stack);
blocks[j] = except_stack;
block_stack = push_block(block_stack, With);
blocks[i+1] = block_stack;
break;
case JUMP_FORWARD:
j = get_arg(code, i) / sizeof(_Py_CODEUNIT) + i + 1;
assert(j < len);
assert(blocks[j] == -1 || blocks[j] == block_stack);
blocks[j] = block_stack;
break;
case GET_ITER:
case GET_AITER:
block_stack = push_block(block_stack, Loop);
blocks[i+1] = block_stack;
break;
case FOR_ITER:
blocks[i+1] = block_stack;
block_stack = pop_block(block_stack);
j = get_arg(code, i) / sizeof(_Py_CODEUNIT) + i + 1;
assert(j < len);
assert(blocks[j] == -1 || blocks[j] == block_stack);
blocks[j] = block_stack;
break;
case POP_BLOCK:
case POP_EXCEPT:
block_stack = pop_block(block_stack);
blocks[i+1] = block_stack;
break;
case END_ASYNC_FOR:
block_stack = pop_block(pop_block(block_stack));
blocks[i+1] = block_stack;
break;
case RETURN_VALUE:
case RAISE_VARARGS:
case RERAISE:
/* End of block */
break;
default:
blocks[i+1] = block_stack;
}
}
}
return blocks;
} }
static int static int
move_to_addr(codetracker *tracker, int addr) compatible_block_stack(int64_t from_stack, int64_t to_stack)
{ {
while (addr > tracker->addr) { if (to_stack < 0) {
advance_tracker(tracker); return 0;
if (tracker->addr >= tracker->code_len) {
return -1;
}
} }
while (addr < tracker->addr) { while(from_stack > to_stack) {
retreat_tracker(tracker); from_stack = pop_block(from_stack);
if (tracker->addr < 0) {
return -1;
}
} }
return 0; return from_stack == to_stack;
}
static const char *
explain_incompatible_block_stack(int64_t to_stack)
{
Kind target_kind = top_block(to_stack);
switch(target_kind) {
case Except:
return "can't jump into an 'except' block as there's no exception";
case Try:
return "can't jump into the body of a try statement";
case With:
return "can't jump into the body of a with statement";
case Loop:
return "can't jump into the body of a for loop";
default:
Py_UNREACHABLE();
}
}
static int *
marklines(PyCodeObject *code, int len)
{
int *linestarts = PyMem_New(int, len);
if (linestarts == NULL) {
return NULL;
}
Py_ssize_t size = PyBytes_GET_SIZE(code->co_lnotab) / 2;
unsigned char *p = (unsigned char*)PyBytes_AS_STRING(code->co_lnotab);
int line = code->co_firstlineno;
int addr = 0;
int index = 0;
while (--size >= 0) {
addr += *p++;
if (index*2 < addr) {
linestarts[index++] = line;
}
while (index*2 < addr) {
linestarts[index++] = -1;
if (index >= len) {
break;
}
}
line += (signed char)*p;
p++;
}
if (index < len) {
linestarts[index++] = line;
}
while (index < len) {
linestarts[index++] = -1;
}
assert(index == len);
return linestarts;
} }
static int static int
first_line_not_before(codetracker *tracker, int line) first_line_not_before(int *lines, int len, int line)
{ {
int result = INT_MAX; int result = INT_MAX;
reset(tracker); for (int i = 0; i < len; i++) {
while (tracker->addr < tracker->code_len) { if (lines[i] < result && lines[i] >= line) {
if (tracker->line == line) { result = lines[i];
return line;
} }
if (tracker->line > line && tracker->line < result) {
result = tracker->line;
}
advance_tracker(tracker);
} }
if (result == INT_MAX) { if (result == INT_MAX) {
return -1; return -1;
@ -168,166 +288,6 @@ first_line_not_before(codetracker *tracker, int line)
return result; return result;
} }
static int
move_to_nearest_start_of_line(codetracker *tracker, int line)
{
if (line > tracker->line) {
while (line != tracker->line) {
advance_tracker(tracker);
if (tracker->addr >= tracker->code_len) {
return -1;
}
}
}
else {
while (line != tracker->line) {
retreat_tracker(tracker);
if (tracker->addr < 0) {
return -1;
}
}
while (tracker->addr > tracker->line_addr) {
retreat_tracker(tracker);
}
}
return 0;
}
typedef struct _blockitem
{
unsigned char kind;
int end_addr;
int start_line;
} blockitem;
typedef struct _blockstack
{
blockitem stack[CO_MAXBLOCKS];
int depth;
} blockstack;
static void
init_blockstack(blockstack *blocks)
{
blocks->depth = 0;
}
static void
push_block(blockstack *blocks, unsigned char kind,
int end_addr, int start_line)
{
assert(blocks->depth < CO_MAXBLOCKS);
blocks->stack[blocks->depth].kind = kind;
blocks->stack[blocks->depth].end_addr = end_addr;
blocks->stack[blocks->depth].start_line = start_line;
blocks->depth++;
}
static unsigned char
pop_block(blockstack *blocks)
{
assert(blocks->depth > 0);
blocks->depth--;
return blocks->stack[blocks->depth].kind;
}
static blockitem *
top_block(blockstack *blocks)
{
assert(blocks->depth > 0);
return &blocks->stack[blocks->depth-1];
}
static inline int
is_try_except(unsigned char op, int target_op)
{
return op == SETUP_FINALLY && (target_op == DUP_TOP || target_op == POP_TOP);
}
static inline int
is_async_for(unsigned char op, int target_op)
{
return op == SETUP_FINALLY && target_op == END_ASYNC_FOR;
}
static inline int
is_try_finally(unsigned char op, int target_op)
{
return op == SETUP_FINALLY && !is_try_except(op, target_op) && !is_async_for(op, target_op);
}
/* Kind for finding except blocks in the jump to line code */
#define TRY_EXCEPT 250
static int
block_stack_for_line(codetracker *tracker, int line, blockstack *blocks)
{
if (line < tracker->start_line) {
return -1;
}
init_blockstack(blocks);
reset(tracker);
while (tracker->addr < tracker->code_len) {
if (tracker->line == line) {
return 0;
}
if (blocks->depth > 0 && tracker->addr == top_block(blocks)->end_addr) {
unsigned char kind = pop_block(blocks);
assert(kind != SETUP_FINALLY);
if (kind == TRY_EXCEPT) {
push_block(blocks, POP_EXCEPT, -1, tracker->line);
}
if (kind == SETUP_WITH || kind == SETUP_ASYNC_WITH) {
push_block(blocks, WITH_EXCEPT_START, -1, tracker->line);
}
}
unsigned char op = tracker->code[tracker->addr];
if (op == SETUP_FINALLY || op == SETUP_ASYNC_WITH || op == SETUP_WITH || op == FOR_ITER) {
unsigned int oparg = get_arg((const _Py_CODEUNIT *)tracker->code,
tracker->addr / sizeof(_Py_CODEUNIT));
int target_addr = tracker->addr + oparg + sizeof(_Py_CODEUNIT);
int target_op = tracker->code[target_addr];
if (is_async_for(op, target_op)) {
push_block(blocks, FOR_ITER, target_addr, tracker->line);
}
else if (op == FOR_ITER) {
push_block(blocks, FOR_ITER, target_addr-sizeof(_Py_CODEUNIT), tracker->line);
}
else if (is_try_except(op, target_op)) {
push_block(blocks, TRY_EXCEPT, target_addr-sizeof(_Py_CODEUNIT), tracker->line);
}
else if (is_try_finally(op, target_op)) {
int addr = tracker->addr;
// Skip over duplicate 'finally' blocks if line is after body.
move_to_addr(tracker, target_addr);
if (tracker->line > line) {
// Target is in body, rewind to start.
move_to_addr(tracker, addr);
push_block(blocks, op, target_addr, tracker->line);
}
else {
// Now in finally block.
push_block(blocks, RERAISE, -1, tracker->line);
}
}
else {
push_block(blocks, op, target_addr, tracker->line);
}
}
else if (op == RERAISE) {
assert(blocks->depth > 0);
unsigned char kind = top_block(blocks)->kind;
if (kind == RERAISE || kind == WITH_EXCEPT_START || kind == POP_EXCEPT) {
pop_block(blocks);
}
}
advance_tracker(tracker);
}
return -1;
}
static void static void
frame_stack_pop(PyFrameObject *f) frame_stack_pop(PyFrameObject *f)
{ {
@ -412,131 +372,110 @@ frame_setlineno(PyFrameObject *f, PyObject* p_new_lineno, void *Py_UNUSED(ignore
return -1; return -1;
} }
codetracker tracker;
init_codetracker(&tracker, f->f_code);
move_to_addr(&tracker, f->f_lasti);
int current_line = tracker.line;
assert(current_line >= 0);
int new_lineno; int new_lineno;
{ /* Fail if the line falls outside the code block and
/* Fail if the line falls outside the code block and select first line with actual code. */
select first line with actual code. */ int overflow;
int overflow; long l_new_lineno = PyLong_AsLongAndOverflow(p_new_lineno, &overflow);
long l_new_lineno = PyLong_AsLongAndOverflow(p_new_lineno, &overflow); if (overflow
if (overflow
#if SIZEOF_LONG > SIZEOF_INT #if SIZEOF_LONG > SIZEOF_INT
|| l_new_lineno > INT_MAX || l_new_lineno > INT_MAX
|| l_new_lineno < INT_MIN || l_new_lineno < INT_MIN
#endif #endif
) { ) {
PyErr_SetString(PyExc_ValueError,
"lineno out of range");
return -1;
}
new_lineno = (int)l_new_lineno;
if (new_lineno < f->f_code->co_firstlineno) {
PyErr_Format(PyExc_ValueError,
"line %d comes before the current code block",
new_lineno);
return -1;
}
new_lineno = first_line_not_before(&tracker, new_lineno);
if (new_lineno < 0) {
PyErr_Format(PyExc_ValueError,
"line %d comes after the current code block",
(int)l_new_lineno);
return -1;
}
}
if (tracker.code[f->f_lasti] == YIELD_VALUE || tracker.code[f->f_lasti] == YIELD_FROM) {
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError,
"can't jump from a 'yield' statement"); "lineno out of range");
return -1;
}
new_lineno = (int)l_new_lineno;
if (new_lineno < f->f_code->co_firstlineno) {
PyErr_Format(PyExc_ValueError,
"line %d comes before the current code block",
new_lineno);
return -1; return -1;
} }
/* Find block stack for current line and target line. */ int len = PyBytes_GET_SIZE(f->f_code->co_code)/sizeof(_Py_CODEUNIT);
blockstack current_stack, new_stack; int *lines = marklines(f->f_code, len);
block_stack_for_line(&tracker, new_lineno, &new_stack); if (lines == NULL) {
block_stack_for_line(&tracker, current_line, &current_stack);
/* The trace function is called with a 'return' trace event after the
* execution of a yield statement. */
if (tracker.code[tracker.addr] == DUP_TOP || tracker.code[tracker.addr] == POP_TOP) {
PyErr_SetString(PyExc_ValueError,
"can't jump to 'except' line as there's no exception");
return -1; return -1;
} }
/* Validate change of block stack. */ new_lineno = first_line_not_before(lines, len, new_lineno);
if (new_stack.depth > 0) { if (new_lineno < 0) {
blockitem *current_block_at_new_depth = &(current_stack.stack[new_stack.depth-1]); PyErr_Format(PyExc_ValueError,
if (new_stack.depth > current_stack.depth || "line %d comes after the current code block",
top_block(&new_stack)->start_line != current_block_at_new_depth->start_line) { (int)l_new_lineno);
unsigned char target_kind = top_block(&new_stack)->kind; PyMem_Free(lines);
const char *msg; return -1;
if (target_kind == POP_EXCEPT) {
msg = "can't jump into an 'except' block as there's no exception";
}
else if (target_kind == RERAISE) {
msg = "can't jump into a 'finally' block";
}
else {
msg = "can't jump into the middle of a block";
}
PyErr_SetString(PyExc_ValueError, msg);
return -1;
}
} }
/* Check for illegal jumps out of finally or except blocks. */ int64_t *blocks = markblocks(f->f_code, len);
for (int depth = new_stack.depth; depth < current_stack.depth; depth++) { if (blocks == NULL) {
switch(current_stack.stack[depth].kind) { PyMem_Free(lines);
case RERAISE: return -1;
PyErr_SetString(PyExc_ValueError, }
"can't jump out of a 'finally' block");
return -1; int64_t target_block_stack = -1;
case POP_EXCEPT: int64_t best_block_stack = -1;
PyErr_SetString(PyExc_ValueError, int best_addr = -1;
"can't jump out of an 'except' block"); int64_t start_block_stack = blocks[f->f_lasti/sizeof(_Py_CODEUNIT)];
return -1; const char *msg = "cannot find bytecode for specified line";
for (int i = 0; i < len; i++) {
if (lines[i] == new_lineno) {
target_block_stack = blocks[i];
if (compatible_block_stack(start_block_stack, target_block_stack)) {
msg = NULL;
if (target_block_stack > best_block_stack) {
best_block_stack = target_block_stack;
best_addr = i*sizeof(_Py_CODEUNIT);
}
}
else if (msg) {
if (target_block_stack >= 0) {
msg = explain_incompatible_block_stack(target_block_stack);
}
else {
msg = "code may be unreachable.";
}
}
} }
} }
PyMem_Free(blocks);
PyMem_Free(lines);
if (msg != NULL) {
PyErr_SetString(PyExc_ValueError, msg);
return -1;
}
/* Unwind block stack. */ /* Unwind block stack. */
while (current_stack.depth > new_stack.depth) { while (start_block_stack > best_block_stack) {
unsigned char kind = pop_block(&current_stack); Kind kind = top_block(start_block_stack);
switch(kind) { switch(kind) {
case FOR_ITER: case Loop:
frame_stack_pop(f); frame_stack_pop(f);
break; break;
case SETUP_FINALLY: case Try:
case TRY_EXCEPT:
frame_block_unwind(f); frame_block_unwind(f);
break; break;
case SETUP_WITH: case With:
case SETUP_ASYNC_WITH:
frame_block_unwind(f); frame_block_unwind(f);
// Pop the exit function // Pop the exit function
frame_stack_pop(f); frame_stack_pop(f);
break; break;
default: case Except:
PyErr_SetString(PyExc_SystemError, PyErr_SetString(PyExc_ValueError,
"unexpected block kind"); "can't jump out of an 'except' block");
return -1; return -1;
} }
start_block_stack = pop_block(start_block_stack);
} }
move_to_addr(&tracker, f->f_lasti);
move_to_nearest_start_of_line(&tracker, new_lineno);
/* Finally set the new f_lineno and f_lasti and return OK. */ /* Finally set the new f_lineno and f_lasti and return OK. */
f->f_lineno = new_lineno; f->f_lineno = new_lineno;
f->f_lasti = tracker.addr; f->f_lasti = best_addr;
return 0; return 0;
} }