From d55133f49fe678fbf047a647aa8bb8b520410e8d Mon Sep 17 00:00:00 2001 From: Lysandros Nikolaou Date: Tue, 28 Apr 2020 03:23:35 +0300 Subject: [PATCH] bpo-40334: Catch E_EOF error, when the tokenizer returns ERRORTOKEN (GH-19743) An E_EOF error was only being caught after the parser exited before this commit. There are some cases though, where the tokenizer returns ERRORTOKEN *and* has set an E_EOF error (like when EOF directly follows a line continuation character) which weren't correctly handled before. --- Lib/test/test_eof.py | 2 -- Parser/pegen/pegen.c | 9 ++++++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_eof.py b/Lib/test/test_eof.py index f8065788cec..9ef8eb11874 100644 --- a/Lib/test/test_eof.py +++ b/Lib/test/test_eof.py @@ -26,7 +26,6 @@ class EOFTestCase(unittest.TestCase): else: raise support.TestFailed - @support.skip_if_new_parser("TODO for PEG -- fails with new parser") def test_line_continuation_EOF(self): """A continuation at the end of input must be an error; bpo2180.""" expect = 'unexpected EOF while parsing (, line 1)' @@ -37,7 +36,6 @@ class EOFTestCase(unittest.TestCase): exec('\\') self.assertEqual(str(excinfo.exception), expect) - @unittest.skip("TODO for PEG -- fails even with old parser now") @unittest.skipIf(not sys.executable, "sys.executable required") def test_line_continuation_EOF_from_file_bpo2180(self): """Ensure tok_nextc() does not add too many ending newlines.""" diff --git a/Parser/pegen/pegen.c b/Parser/pegen/pegen.c index d75267b2e27..6f78d8c8652 100644 --- a/Parser/pegen/pegen.c +++ b/Parser/pegen/pegen.c @@ -344,13 +344,16 @@ tokenizer_error(Parser *p) break; case E_BADPREFIX: return tokenizer_error_with_col_offset(p, - PyExc_SyntaxError, "invalid string prefix"); + errtype, "invalid string prefix"); case E_EOFS: return tokenizer_error_with_col_offset(p, - PyExc_SyntaxError, "EOF while scanning triple-quoted string literal"); + errtype, "EOF while scanning triple-quoted string literal"); case E_EOLS: return tokenizer_error_with_col_offset(p, - PyExc_SyntaxError, "EOL while scanning string literal"); + errtype, "EOL while scanning string literal"); + case E_EOF: + return tokenizer_error_with_col_offset(p, + errtype, "unexpected EOF while parsing"); case E_DEDENT: return tokenizer_error_with_col_offset(p, PyExc_IndentationError, "unindent does not match any outer indentation level");