bpo-40334: Disallow invalid single statements in the new parser (GH-19774)

After parsing is done in single statement mode, the tokenizer buffer has to be checked for additional lines and a `SyntaxError` must be raised, in case there are any.

Co-authored-by: Pablo Galindo <Pablogsal@gmail.com>
This commit is contained in:
Lysandros Nikolaou 2020-04-29 04:42:27 +03:00 committed by GitHub
parent a4dfe8ede5
commit 6d65087655
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 51 additions and 1 deletions

View File

@ -501,7 +501,6 @@ if 1:
self.compile_single("if x:\n f(x)\nelse:\n g(x)") self.compile_single("if x:\n f(x)\nelse:\n g(x)")
self.compile_single("class T:\n pass") self.compile_single("class T:\n pass")
@support.skip_if_new_parser('Pegen does not disallow multiline single stmts')
def test_bad_single_statement(self): def test_bad_single_statement(self):
self.assertInvalidSingle('1\n2') self.assertInvalidSingle('1\n2')
self.assertInvalidSingle('def f(): pass') self.assertInvalidSingle('def f(): pass')

View File

@ -911,6 +911,52 @@ _PyPegen_number_token(Parser *p)
p->arena); p->arena);
} }
static int // bool
newline_in_string(Parser *p, const char *cur)
{
for (char c = *cur; cur >= p->tok->buf; c = *--cur) {
if (c == '\'' || c == '"') {
return 1;
}
}
return 0;
}
/* Check that the source for a single input statement really is a single
statement by looking at what is left in the buffer after parsing.
Trailing whitespace and comments are OK. */
static int // bool
bad_single_statement(Parser *p)
{
const char *cur = strchr(p->tok->buf, '\n');
/* Newlines are allowed if preceded by a line continuation character
or if they appear inside a string. */
if (!cur || *(cur - 1) == '\\' || newline_in_string(p, cur)) {
return 0;
}
char c = *cur;
for (;;) {
while (c == ' ' || c == '\t' || c == '\n' || c == '\014') {
c = *++cur;
}
if (!c) {
return 0;
}
if (c != '#') {
return 1;
}
/* Suck up comment. */
while (c && c != '\n') {
c = *++cur;
}
}
}
void void
_PyPegen_Parser_Free(Parser *p) _PyPegen_Parser_Free(Parser *p)
{ {
@ -1014,6 +1060,11 @@ _PyPegen_run_parser(Parser *p)
return NULL; return NULL;
} }
if (p->start_rule == Py_single_input && bad_single_statement(p)) {
p->tok->done = E_BADSINGLE; // This is not necessary for now, but might be in the future
return RAISE_SYNTAX_ERROR("multiple statements found while compiling a single statement");
}
return res; return res;
} }