From 9a4b38f66b3e674db94e07980e1cacb39e388c73 Mon Sep 17 00:00:00 2001 From: Lysandros Nikolaou Date: Wed, 15 Apr 2020 21:22:10 +0300 Subject: [PATCH] bpo-40267: Fix message when last input character produces a SyntaxError (GH-19521) When there is a SyntaxError after reading the last input character from the tokenizer and if no newline follows it, the error message used to be `unexpected EOF while parsing`, which is wrong. --- Include/token.h | 4 ++++ Lib/test/test_fstring.py | 2 +- .../2020-04-14-18-54-50.bpo-40267.Q2N6Bw.rst | 1 + Parser/parsetok.c | 3 +++ Tools/scripts/generate_token.py | 4 ++++ 5 files changed, 13 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2020-04-14-18-54-50.bpo-40267.Q2N6Bw.rst diff --git a/Include/token.h b/Include/token.h index e08708baf19..9b8a3aae074 100644 --- a/Include/token.h +++ b/Include/token.h @@ -78,6 +78,10 @@ extern "C" { #define ISTERMINAL(x) ((x) < NT_OFFSET) #define ISNONTERMINAL(x) ((x) >= NT_OFFSET) #define ISEOF(x) ((x) == ENDMARKER) +#define ISWHITESPACE(x) ((x) == ENDMARKER || \ + (x) == NEWLINE || \ + (x) == INDENT || \ + (x) == DEDENT) PyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */ diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py index 8fd7cf09a99..fe465b7e1d4 100644 --- a/Lib/test/test_fstring.py +++ b/Lib/test/test_fstring.py @@ -713,7 +713,7 @@ non-important content # lambda doesn't work without parens, because the colon # makes the parser think it's a format_spec - self.assertAllRaise(SyntaxError, 'unexpected EOF while parsing', + self.assertAllRaise(SyntaxError, 'invalid syntax', ["f'{lambda x:x}'", ]) diff --git a/Misc/NEWS.d/next/Core and Builtins/2020-04-14-18-54-50.bpo-40267.Q2N6Bw.rst b/Misc/NEWS.d/next/Core and Builtins/2020-04-14-18-54-50.bpo-40267.Q2N6Bw.rst new file mode 100644 index 00000000000..a778594ce9c --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2020-04-14-18-54-50.bpo-40267.Q2N6Bw.rst @@ -0,0 +1 @@ +Fix the tokenizer to display the correct error message, when there is a SyntaxError on the last input character and no newline follows. It used to be `unexpected EOF while parsing`, while it should be `invalid syntax`. \ No newline at end of file diff --git a/Parser/parsetok.c b/Parser/parsetok.c index cb9472150f2..37ca65c275a 100644 --- a/Parser/parsetok.c +++ b/Parser/parsetok.c @@ -332,6 +332,9 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, PyParser_AddToken(ps, (int)type, str, lineno, col_offset, tok->lineno, end_col_offset, &(err_ret->expected))) != E_OK) { + if (tok->done == E_EOF && !ISWHITESPACE(type)) { + tok->done = E_SYNTAX; + } if (err_ret->error != E_DONE) { PyObject_FREE(str); err_ret->token = type; diff --git a/Tools/scripts/generate_token.py b/Tools/scripts/generate_token.py index f2745e8353f..77bb5bd5eca 100755 --- a/Tools/scripts/generate_token.py +++ b/Tools/scripts/generate_token.py @@ -69,6 +69,10 @@ extern "C" { #define ISTERMINAL(x) ((x) < NT_OFFSET) #define ISNONTERMINAL(x) ((x) >= NT_OFFSET) #define ISEOF(x) ((x) == ENDMARKER) +#define ISWHITESPACE(x) ((x) == ENDMARKER || \\ + (x) == NEWLINE || \\ + (x) == INDENT || \\ + (x) == DEDENT) PyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */