From 6d6508765514c7c10719478a0430f5e47c9a96ac Mon Sep 17 00:00:00 2001 From: Lysandros Nikolaou Date: Wed, 29 Apr 2020 04:42:27 +0300 Subject: [PATCH] bpo-40334: Disallow invalid single statements in the new parser (GH-19774) After parsing is done in single statement mode, the tokenizer buffer has to be checked for additional lines and a `SyntaxError` must be raised, in case there are any. Co-authored-by: Pablo Galindo --- Lib/test/test_compile.py | 1 - Parser/pegen/pegen.c | 51 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py index a507ac09149..566ca27fca8 100644 --- a/Lib/test/test_compile.py +++ b/Lib/test/test_compile.py @@ -501,7 +501,6 @@ if 1: self.compile_single("if x:\n f(x)\nelse:\n g(x)") self.compile_single("class T:\n pass") - @support.skip_if_new_parser('Pegen does not disallow multiline single stmts') def test_bad_single_statement(self): self.assertInvalidSingle('1\n2') self.assertInvalidSingle('def f(): pass') diff --git a/Parser/pegen/pegen.c b/Parser/pegen/pegen.c index ef95aacb7f0..39da2709991 100644 --- a/Parser/pegen/pegen.c +++ b/Parser/pegen/pegen.c @@ -911,6 +911,52 @@ _PyPegen_number_token(Parser *p) p->arena); } +static int // bool +newline_in_string(Parser *p, const char *cur) +{ + for (char c = *cur; cur >= p->tok->buf; c = *--cur) { + if (c == '\'' || c == '"') { + return 1; + } + } + return 0; +} + +/* Check that the source for a single input statement really is a single + statement by looking at what is left in the buffer after parsing. + Trailing whitespace and comments are OK. */ +static int // bool +bad_single_statement(Parser *p) +{ + const char *cur = strchr(p->tok->buf, '\n'); + + /* Newlines are allowed if preceded by a line continuation character + or if they appear inside a string. */ + if (!cur || *(cur - 1) == '\\' || newline_in_string(p, cur)) { + return 0; + } + char c = *cur; + + for (;;) { + while (c == ' ' || c == '\t' || c == '\n' || c == '\014') { + c = *++cur; + } + + if (!c) { + return 0; + } + + if (c != '#') { + return 1; + } + + /* Suck up comment. */ + while (c && c != '\n') { + c = *++cur; + } + } +} + void _PyPegen_Parser_Free(Parser *p) { @@ -1014,6 +1060,11 @@ _PyPegen_run_parser(Parser *p) return NULL; } + if (p->start_rule == Py_single_input && bad_single_statement(p)) { + p->tok->done = E_BADSINGLE; // This is not necessary for now, but might be in the future + return RAISE_SYNTAX_ERROR("multiple statements found while compiling a single statement"); + } + return res; }