mirror of https://github.com/python/cpython
bpo-42827: Fix crash on SyntaxError in multiline expressions (GH-24140)
When trying to extract the error line for the error message there are two distinct cases: 1. The input comes from a file, which means that we can extract the error line by using `PyErr_ProgramTextObject` and which we already do. 2. The input does not come from a file, at which point we need to get the source code from the tokenizer: * If the tokenizer's current line number is the same with the line of the error, we get the line from `tok->buf` and we're ready. * Else, we can extract the error line from the source code in the following two ways: * If the input comes from a string we have all the input in `tok->str` and we can extract the error line from it. * If the input comes from stdin, i.e. the interactive prompt, we do not have access to the previous line. That's why a new field `tok->stdin_content` is added which holds the whole input for the current (multiline) statement or expression. We can then extract the error line from `tok->stdin_content` like we do in the string case above. Co-authored-by: Pablo Galindo <Pablogsal@gmail.com>
This commit is contained in:
parent
9712358277
commit
e5fe509054
|
@ -209,6 +209,9 @@ class ExceptionTests(unittest.TestCase):
|
|||
check('x = "a', 1, 7)
|
||||
check('lambda x: x = 2', 1, 1)
|
||||
check('f{a + b + c}', 1, 2)
|
||||
check('[file for str(file) in []\n])', 1, 11)
|
||||
check('[\nfile\nfor str(file)\nin\n[]\n]', 3, 5)
|
||||
check('[file for\n str(file) in []]', 2, 2)
|
||||
|
||||
# Errors thrown by compile.c
|
||||
check('class foo:return 1', 1, 11)
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
Fix a crash when working out the error line of a :exc:`SyntaxError` in some
|
||||
multi-line expressions.
|
|
@ -380,6 +380,27 @@ _PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
get_error_line(Parser *p, Py_ssize_t lineno)
|
||||
{
|
||||
/* If p->tok->fp == NULL, then we're parsing from a string, which means that
|
||||
the whole source is stored in p->tok->str. If not, then we're parsing
|
||||
from the REPL, so the source lines of the current (multi-line) statement
|
||||
are stored in p->tok->stdin_content */
|
||||
assert(p->tok->fp == NULL || p->tok->fp == stdin);
|
||||
|
||||
char *cur_line = p->tok->fp == NULL ? p->tok->str : p->tok->stdin_content;
|
||||
for (int i = 0; i < lineno - 1; i++) {
|
||||
cur_line = strchr(cur_line, '\n') + 1;
|
||||
}
|
||||
|
||||
char *next_newline;
|
||||
if ((next_newline = strchr(cur_line, '\n')) == NULL) { // This is the last line
|
||||
next_newline = cur_line + strlen(cur_line);
|
||||
}
|
||||
return PyUnicode_DecodeUTF8(cur_line, next_newline - cur_line, "replace");
|
||||
}
|
||||
|
||||
void *
|
||||
_PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
|
||||
Py_ssize_t lineno, Py_ssize_t col_offset,
|
||||
|
@ -416,8 +437,22 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
|
|||
}
|
||||
|
||||
if (!error_line) {
|
||||
Py_ssize_t size = p->tok->inp - p->tok->buf;
|
||||
error_line = PyUnicode_DecodeUTF8(p->tok->buf, size, "replace");
|
||||
/* PyErr_ProgramTextObject was not called or returned NULL. If it was not called,
|
||||
then we need to find the error line from some other source, because
|
||||
p->start_rule != Py_file_input. If it returned NULL, then it either unexpectedly
|
||||
failed or we're parsing from a string or the REPL. There's a third edge case where
|
||||
we're actually parsing from a file, which has an E_EOF SyntaxError and in that case
|
||||
`PyErr_ProgramTextObject` fails because lineno points to last_file_line + 1, which
|
||||
does not physically exist */
|
||||
assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF);
|
||||
|
||||
if (p->tok->lineno == lineno) {
|
||||
Py_ssize_t size = p->tok->inp - p->tok->buf;
|
||||
error_line = PyUnicode_DecodeUTF8(p->tok->buf, size, "replace");
|
||||
}
|
||||
else {
|
||||
error_line = get_error_line(p, lineno);
|
||||
}
|
||||
if (!error_line) {
|
||||
goto error;
|
||||
}
|
||||
|
|
|
@ -81,6 +81,7 @@ tok_new(void)
|
|||
tok->decoding_readline = NULL;
|
||||
tok->decoding_buffer = NULL;
|
||||
tok->type_comments = 0;
|
||||
tok->stdin_content = NULL;
|
||||
|
||||
tok->async_hacks = 0;
|
||||
tok->async_def = 0;
|
||||
|
@ -816,6 +817,8 @@ PyTokenizer_Free(struct tok_state *tok)
|
|||
PyMem_Free(tok->buf);
|
||||
if (tok->input)
|
||||
PyMem_Free(tok->input);
|
||||
if (tok->stdin_content)
|
||||
PyMem_Free(tok->stdin_content);
|
||||
PyMem_Free(tok);
|
||||
}
|
||||
|
||||
|
@ -856,6 +859,24 @@ tok_nextc(struct tok_state *tok)
|
|||
if (translated == NULL)
|
||||
return EOF;
|
||||
newtok = translated;
|
||||
if (tok->stdin_content == NULL) {
|
||||
tok->stdin_content = PyMem_Malloc(strlen(translated) + 1);
|
||||
if (tok->stdin_content == NULL) {
|
||||
tok->done = E_NOMEM;
|
||||
return EOF;
|
||||
}
|
||||
sprintf(tok->stdin_content, "%s", translated);
|
||||
}
|
||||
else {
|
||||
char *new_str = PyMem_Malloc(strlen(tok->stdin_content) + strlen(translated) + 1);
|
||||
if (new_str == NULL) {
|
||||
tok->done = E_NOMEM;
|
||||
return EOF;
|
||||
}
|
||||
sprintf(new_str, "%s%s", tok->stdin_content, translated);
|
||||
PyMem_Free(tok->stdin_content);
|
||||
tok->stdin_content = new_str;
|
||||
}
|
||||
}
|
||||
if (tok->encoding && newtok && *newtok) {
|
||||
/* Recode to UTF-8 */
|
||||
|
|
|
@ -37,6 +37,7 @@ struct tok_state {
|
|||
int atbol; /* Nonzero if at begin of new line */
|
||||
int pendin; /* Pending indents (if > 0) or dedents (if < 0) */
|
||||
const char *prompt, *nextprompt; /* For interactive prompting */
|
||||
char *stdin_content;
|
||||
int lineno; /* Current line number */
|
||||
int first_lineno; /* First line of a single line or multi line string
|
||||
expression (cf. issue 16806) */
|
||||
|
|
Loading…
Reference in New Issue