diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py index 864422390ad..eb70d7b4e49 100644 --- a/Lib/test/test_exceptions.py +++ b/Lib/test/test_exceptions.py @@ -209,6 +209,9 @@ class ExceptionTests(unittest.TestCase): check('x = "a', 1, 7) check('lambda x: x = 2', 1, 1) check('f{a + b + c}', 1, 2) + check('[file for str(file) in []\n])', 1, 11) + check('[\nfile\nfor str(file)\nin\n[]\n]', 3, 5) + check('[file for\n str(file) in []]', 2, 2) # Errors thrown by compile.c check('class foo:return 1', 1, 11) diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-01-06-17-06-37.bpo-42827.jtRR0D.rst b/Misc/NEWS.d/next/Core and Builtins/2021-01-06-17-06-37.bpo-42827.jtRR0D.rst new file mode 100644 index 00000000000..8e40ab6a653 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2021-01-06-17-06-37.bpo-42827.jtRR0D.rst @@ -0,0 +1,2 @@ +Fix a crash when working out the error line of a :exc:`SyntaxError` in some +multi-line expressions. diff --git a/Parser/pegen.c b/Parser/pegen.c index 188fd282b76..a6f97929255 100644 --- a/Parser/pegen.c +++ b/Parser/pegen.c @@ -380,6 +380,27 @@ _PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...) return NULL; } +static PyObject * +get_error_line(Parser *p, Py_ssize_t lineno) +{ + /* If p->tok->fp == NULL, then we're parsing from a string, which means that + the whole source is stored in p->tok->str. If not, then we're parsing + from the REPL, so the source lines of the current (multi-line) statement + are stored in p->tok->stdin_content */ + assert(p->tok->fp == NULL || p->tok->fp == stdin); + + char *cur_line = p->tok->fp == NULL ? p->tok->str : p->tok->stdin_content; + for (int i = 0; i < lineno - 1; i++) { + cur_line = strchr(cur_line, '\n') + 1; + } + + char *next_newline; + if ((next_newline = strchr(cur_line, '\n')) == NULL) { // This is the last line + next_newline = cur_line + strlen(cur_line); + } + return PyUnicode_DecodeUTF8(cur_line, next_newline - cur_line, "replace"); +} + void * _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype, Py_ssize_t lineno, Py_ssize_t col_offset, @@ -416,8 +437,22 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype, } if (!error_line) { - Py_ssize_t size = p->tok->inp - p->tok->buf; - error_line = PyUnicode_DecodeUTF8(p->tok->buf, size, "replace"); + /* PyErr_ProgramTextObject was not called or returned NULL. If it was not called, + then we need to find the error line from some other source, because + p->start_rule != Py_file_input. If it returned NULL, then it either unexpectedly + failed or we're parsing from a string or the REPL. There's a third edge case where + we're actually parsing from a file, which has an E_EOF SyntaxError and in that case + `PyErr_ProgramTextObject` fails because lineno points to last_file_line + 1, which + does not physically exist */ + assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF); + + if (p->tok->lineno == lineno) { + Py_ssize_t size = p->tok->inp - p->tok->buf; + error_line = PyUnicode_DecodeUTF8(p->tok->buf, size, "replace"); + } + else { + error_line = get_error_line(p, lineno); + } if (!error_line) { goto error; } diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 96539bd5565..62cd2966231 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -81,6 +81,7 @@ tok_new(void) tok->decoding_readline = NULL; tok->decoding_buffer = NULL; tok->type_comments = 0; + tok->stdin_content = NULL; tok->async_hacks = 0; tok->async_def = 0; @@ -816,6 +817,8 @@ PyTokenizer_Free(struct tok_state *tok) PyMem_Free(tok->buf); if (tok->input) PyMem_Free(tok->input); + if (tok->stdin_content) + PyMem_Free(tok->stdin_content); PyMem_Free(tok); } @@ -856,6 +859,24 @@ tok_nextc(struct tok_state *tok) if (translated == NULL) return EOF; newtok = translated; + if (tok->stdin_content == NULL) { + tok->stdin_content = PyMem_Malloc(strlen(translated) + 1); + if (tok->stdin_content == NULL) { + tok->done = E_NOMEM; + return EOF; + } + sprintf(tok->stdin_content, "%s", translated); + } + else { + char *new_str = PyMem_Malloc(strlen(tok->stdin_content) + strlen(translated) + 1); + if (new_str == NULL) { + tok->done = E_NOMEM; + return EOF; + } + sprintf(new_str, "%s%s", tok->stdin_content, translated); + PyMem_Free(tok->stdin_content); + tok->stdin_content = new_str; + } } if (tok->encoding && newtok && *newtok) { /* Recode to UTF-8 */ diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h index 5660ea38e94..b659f34796e 100644 --- a/Parser/tokenizer.h +++ b/Parser/tokenizer.h @@ -37,6 +37,7 @@ struct tok_state { int atbol; /* Nonzero if at begin of new line */ int pendin; /* Pending indents (if > 0) or dedents (if < 0) */ const char *prompt, *nextprompt; /* For interactive prompting */ + char *stdin_content; int lineno; /* Current line number */ int first_lineno; /* First line of a single line or multi line string expression (cf. issue 16806) */