From 0219017df7ec41839fd0d56a3076b5f09c58d313 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Thu, 7 Oct 2021 22:33:05 +0100 Subject: [PATCH] bpo-45408: Don't override previous tokenizer errors in the second parser pass (GH-28812) --- Lib/test/test_ast.py | 8 ++++++++ Lib/test/test_exceptions.py | 2 +- .../2021-10-07-21-26-44.bpo-45408.qUqzcd.rst | 2 ++ Parser/pegen.c | 5 ++++- 4 files changed, 15 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2021-10-07-21-26-44.bpo-45408.qUqzcd.rst diff --git a/Lib/test/test_ast.py b/Lib/test/test_ast.py index 1f4257b1191..e630677f796 100644 --- a/Lib/test/test_ast.py +++ b/Lib/test/test_ast.py @@ -1075,6 +1075,14 @@ Module( with self.assertRaisesRegex(ValueError, msg): ast.literal_eval(node) + def test_literal_eval_syntax_errors(self): + msg = "unexpected character after line continuation character" + with self.assertRaisesRegex(SyntaxError, msg): + ast.literal_eval(r''' + \ + (\ + \ ''') + def test_bad_integer(self): # issue13436: Bad error message with invalid numeric values body = [ast.ImportFrom(module='time', diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py index 85dc3c0f220..02489a2bd92 100644 --- a/Lib/test/test_exceptions.py +++ b/Lib/test/test_exceptions.py @@ -223,7 +223,7 @@ class ExceptionTests(unittest.TestCase): check('x = "a', 1, 5) check('lambda x: x = 2', 1, 1) check('f{a + b + c}', 1, 1) - check('[file for str(file) in []\n])', 2, 2) + check('[file for str(file) in []\n])', 1, 11) check('a = « hello » « world »', 1, 5) check('[\nfile\nfor str(file)\nin\n[]\n]', 3, 5) check('[file for\n str(file) in []]', 2, 2) diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-10-07-21-26-44.bpo-45408.qUqzcd.rst b/Misc/NEWS.d/next/Core and Builtins/2021-10-07-21-26-44.bpo-45408.qUqzcd.rst new file mode 100644 index 00000000000..e4d4db9cb95 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2021-10-07-21-26-44.bpo-45408.qUqzcd.rst @@ -0,0 +1,2 @@ +Fix a crash in the parser when reporting tokenizer errors that occur at the +same time unclosed parentheses are detected. Patch by Pablo Galindo. diff --git a/Parser/pegen.c b/Parser/pegen.c index 7e2d37caae3..a9896356e5b 100644 --- a/Parser/pegen.c +++ b/Parser/pegen.c @@ -1342,13 +1342,16 @@ _PyPegen_run_parser(Parser *p) { void *res = _PyPegen_parse(p); if (res == NULL) { + if (PyErr_Occurred() && !PyErr_ExceptionMatches(PyExc_SyntaxError)) { + return NULL; + } Token *last_token = p->tokens[p->fill - 1]; reset_parser_state(p); _PyPegen_parse(p); if (PyErr_Occurred()) { // Prioritize tokenizer errors to custom syntax errors raised // on the second phase only if the errors come from the parser. - if (p->tok->done != E_ERROR && PyErr_ExceptionMatches(PyExc_SyntaxError)) { + if (p->tok->done == E_DONE && PyErr_ExceptionMatches(PyExc_SyntaxError)) { _PyPegen_check_tokenizer_errors(p); } return NULL;