bpo-40334: Disallow invalid single statements in the new parser (GH-19774)
After parsing is done in single statement mode, the tokenizer buffer has to be checked for additional lines and a `SyntaxError` must be raised, in case there are any. Co-authored-by: Pablo Galindo <Pablogsal@gmail.com>
This commit is contained in:
parent
a4dfe8ede5
commit
6d65087655
|
@ -501,7 +501,6 @@ if 1:
|
||||||
self.compile_single("if x:\n f(x)\nelse:\n g(x)")
|
self.compile_single("if x:\n f(x)\nelse:\n g(x)")
|
||||||
self.compile_single("class T:\n pass")
|
self.compile_single("class T:\n pass")
|
||||||
|
|
||||||
@support.skip_if_new_parser('Pegen does not disallow multiline single stmts')
|
|
||||||
def test_bad_single_statement(self):
|
def test_bad_single_statement(self):
|
||||||
self.assertInvalidSingle('1\n2')
|
self.assertInvalidSingle('1\n2')
|
||||||
self.assertInvalidSingle('def f(): pass')
|
self.assertInvalidSingle('def f(): pass')
|
||||||
|
|
|
@ -911,6 +911,52 @@ _PyPegen_number_token(Parser *p)
|
||||||
p->arena);
|
p->arena);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int // bool
|
||||||
|
newline_in_string(Parser *p, const char *cur)
|
||||||
|
{
|
||||||
|
for (char c = *cur; cur >= p->tok->buf; c = *--cur) {
|
||||||
|
if (c == '\'' || c == '"') {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check that the source for a single input statement really is a single
|
||||||
|
statement by looking at what is left in the buffer after parsing.
|
||||||
|
Trailing whitespace and comments are OK. */
|
||||||
|
static int // bool
|
||||||
|
bad_single_statement(Parser *p)
|
||||||
|
{
|
||||||
|
const char *cur = strchr(p->tok->buf, '\n');
|
||||||
|
|
||||||
|
/* Newlines are allowed if preceded by a line continuation character
|
||||||
|
or if they appear inside a string. */
|
||||||
|
if (!cur || *(cur - 1) == '\\' || newline_in_string(p, cur)) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
char c = *cur;
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
while (c == ' ' || c == '\t' || c == '\n' || c == '\014') {
|
||||||
|
c = *++cur;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!c) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (c != '#') {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Suck up comment. */
|
||||||
|
while (c && c != '\n') {
|
||||||
|
c = *++cur;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
_PyPegen_Parser_Free(Parser *p)
|
_PyPegen_Parser_Free(Parser *p)
|
||||||
{
|
{
|
||||||
|
@ -1014,6 +1060,11 @@ _PyPegen_run_parser(Parser *p)
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (p->start_rule == Py_single_input && bad_single_statement(p)) {
|
||||||
|
p->tok->done = E_BADSINGLE; // This is not necessary for now, but might be in the future
|
||||||
|
return RAISE_SYNTAX_ERROR("multiple statements found while compiling a single statement");
|
||||||
|
}
|
||||||
|
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue