bpo-42997: Improve error message for missing : before suites (GH-24292)

* Add to the peg generator a new directive ('&&') that allows to expect
  a token and hard fail the parsing if the token is not found. This
  allows to quickly emmit syntax errors for missing tokens.

* Use the new grammar element to hard-fail if the ':' is missing before
  suites.
This commit is contained in:
Pablo Galindo 2021-02-02 19:54:22 +00:00 committed by GitHub
parent 802b645e81
commit 58fb156edd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 1219 additions and 428 deletions

View File

@ -27,6 +27,12 @@ class PEGLexer(RegexLexer):
tokens = {
"ws": [(r"\n", Text), (r"\s+", Text), (r"#.*$", Comment.Singleline),],
"lookaheads": [
# Forced tokens
(r"(&&)(?=\w+\s?)", bygroups(None)),
(r"(&&)(?='.+'\s?)", bygroups(None)),
(r'(&&)(?=".+"\s?)', bygroups(None)),
(r"(&&)(?=\(.+\)\s?)", bygroups(None)),
(r"(?<=\|\s)(&\w+\s?)", bygroups(None)),
(r"(?<=\|\s)(&'.+'\s?)", bygroups(None)),
(r'(?<=\|\s)(&".+"\s?)', bygroups(None)),

View File

@ -162,22 +162,22 @@ dotted_name[expr_ty]:
| NAME
if_stmt[stmt_ty]:
| 'if' a=named_expression ':' b=block c=elif_stmt {
| 'if' a=named_expression &&':' b=block c=elif_stmt {
_Py_If(a, b, CHECK(asdl_stmt_seq*, _PyPegen_singleton_seq(p, c)), EXTRA) }
| 'if' a=named_expression ':' b=block c=[else_block] { _Py_If(a, b, c, EXTRA) }
| 'if' a=named_expression &&':' b=block c=[else_block] { _Py_If(a, b, c, EXTRA) }
elif_stmt[stmt_ty]:
| 'elif' a=named_expression ':' b=block c=elif_stmt {
| 'elif' a=named_expression &&':' b=block c=elif_stmt {
_Py_If(a, b, CHECK(asdl_stmt_seq*, _PyPegen_singleton_seq(p, c)), EXTRA) }
| 'elif' a=named_expression ':' b=block c=[else_block] { _Py_If(a, b, c, EXTRA) }
else_block[asdl_stmt_seq*]: 'else' ':' b=block { b }
| 'elif' a=named_expression &&':' b=block c=[else_block] { _Py_If(a, b, c, EXTRA) }
else_block[asdl_stmt_seq*]: 'else' &&':' b=block { b }
while_stmt[stmt_ty]:
| 'while' a=named_expression ':' b=block c=[else_block] { _Py_While(a, b, c, EXTRA) }
| 'while' a=named_expression &&':' b=block c=[else_block] { _Py_While(a, b, c, EXTRA) }
for_stmt[stmt_ty]:
| 'for' t=star_targets 'in' ~ ex=star_expressions ':' tc=[TYPE_COMMENT] b=block el=[else_block] {
| 'for' t=star_targets 'in' ~ ex=star_expressions &&':' tc=[TYPE_COMMENT] b=block el=[else_block] {
_Py_For(t, ex, b, el, NEW_TYPE_COMMENT(p, tc), EXTRA) }
| ASYNC 'for' t=star_targets 'in' ~ ex=star_expressions ':' tc=[TYPE_COMMENT] b=block el=[else_block] {
| ASYNC 'for' t=star_targets 'in' ~ ex=star_expressions &&':' tc=[TYPE_COMMENT] b=block el=[else_block] {
CHECK_VERSION(stmt_ty, 5, "Async for loops are", _Py_AsyncFor(t, ex, b, el, NEW_TYPE_COMMENT(p, tc), EXTRA)) }
| invalid_for_target
@ -190,18 +190,20 @@ with_stmt[stmt_ty]:
CHECK_VERSION(stmt_ty, 5, "Async with statements are", _Py_AsyncWith(a, b, NULL, EXTRA)) }
| ASYNC 'with' a[asdl_withitem_seq*]=','.with_item+ ':' tc=[TYPE_COMMENT] b=block {
CHECK_VERSION(stmt_ty, 5, "Async with statements are", _Py_AsyncWith(a, b, NEW_TYPE_COMMENT(p, tc), EXTRA)) }
| invalid_with_stmt
with_item[withitem_ty]:
| e=expression 'as' t=star_target &(',' | ')' | ':') { _Py_withitem(e, t, p->arena) }
| invalid_with_item
| e=expression { _Py_withitem(e, NULL, p->arena) }
try_stmt[stmt_ty]:
| 'try' ':' b=block f=finally_block { _Py_Try(b, NULL, NULL, f, EXTRA) }
| 'try' ':' b=block ex[asdl_excepthandler_seq*]=except_block+ el=[else_block] f=[finally_block] { _Py_Try(b, ex, el, f, EXTRA) }
| 'try' &&':' b=block f=finally_block { _Py_Try(b, NULL, NULL, f, EXTRA) }
| 'try' &&':' b=block ex[asdl_excepthandler_seq*]=except_block+ el=[else_block] f=[finally_block] { _Py_Try(b, ex, el, f, EXTRA) }
except_block[excepthandler_ty]:
| 'except' e=expression t=['as' z=NAME { z }] ':' b=block {
| 'except' e=expression t=['as' z=NAME { z }] &&':' b=block {
_Py_ExceptHandler(e, (t) ? ((expr_ty) t)->v.Name.id : NULL, b, EXTRA) }
| 'except' ':' b=block { _Py_ExceptHandler(NULL, NULL, b, EXTRA) }
| 'except' &&':' b=block { _Py_ExceptHandler(NULL, NULL, b, EXTRA) }
finally_block[asdl_stmt_seq*]: 'finally' ':' a=block { a }
return_stmt[stmt_ty]:
@ -216,11 +218,11 @@ function_def[stmt_ty]:
| function_def_raw
function_def_raw[stmt_ty]:
| 'def' n=NAME '(' params=[params] ')' a=['->' z=expression { z }] ':' tc=[func_type_comment] b=block {
| 'def' n=NAME '(' params=[params] ')' a=['->' z=expression { z }] &&':' tc=[func_type_comment] b=block {
_Py_FunctionDef(n->v.Name.id,
(params) ? params : CHECK(arguments_ty, _PyPegen_empty_arguments(p)),
b, NULL, a, NEW_TYPE_COMMENT(p, tc), EXTRA) }
| ASYNC 'def' n=NAME '(' params=[params] ')' a=['->' z=expression { z }] ':' tc=[func_type_comment] b=block {
| ASYNC 'def' n=NAME '(' params=[params] ')' a=['->' z=expression { z }] &&':' tc=[func_type_comment] b=block {
CHECK_VERSION(
stmt_ty,
5,
@ -300,7 +302,7 @@ class_def[stmt_ty]:
| a=decorators b=class_def_raw { _PyPegen_class_def_decorators(p, a, b) }
| class_def_raw
class_def_raw[stmt_ty]:
| 'class' a=NAME b=['(' z=[arguments] ')' { z }] ':' c=block {
| 'class' a=NAME b=['(' z=[arguments] ')' { z }] &&':' c=block {
_Py_ClassDef(a->v.Name.id,
(b) ? ((expr_ty) b)->v.Call.args : NULL,
(b) ? ((expr_ty) b)->v.Call.keywords : NULL,
@ -718,7 +720,7 @@ invalid_double_type_comments:
| TYPE_COMMENT NEWLINE TYPE_COMMENT NEWLINE INDENT {
RAISE_SYNTAX_ERROR("Cannot have two type comments on def") }
invalid_with_item:
| expression 'as' a=expression {
| expression 'as' a=expression &(',' | ')' | ':') {
RAISE_SYNTAX_ERROR_INVALID_TARGET(STAR_TARGETS, a) }
invalid_for_target:
@ -731,3 +733,7 @@ invalid_group:
invalid_import_from_targets:
| import_from_as_names ',' {
RAISE_SYNTAX_ERROR("trailing comma not allowed without surrounding parentheses") }
invalid_with_stmt:
| [ASYNC] 'with' ','.(expression ['as' star_target])+ &&':'
| [ASYNC] 'with' '(' ','.(expressions ['as' star_target])+ ','? ')' &&':'

View File

@ -229,7 +229,7 @@ SyntaxError: cannot assign to function call
>>> with a as b
Traceback (most recent call last):
SyntaxError: invalid syntax
SyntaxError: expected ':'
>>> p = p =
Traceback (most recent call last):
@ -331,7 +331,7 @@ SyntaxError: Generator expression must be parenthesized
>>> class C(x for x in L):
... pass
Traceback (most recent call last):
SyntaxError: invalid syntax
SyntaxError: expected ':'
>>> def g(*args, **kwargs):
... print(args, sorted(kwargs.items()))
@ -708,6 +708,107 @@ leading to spurious errors.
...
SyntaxError: cannot assign to function call
Missing ':' before suites:
>>> def f()
... pass
Traceback (most recent call last):
SyntaxError: expected ':'
>>> class A
... pass
Traceback (most recent call last):
SyntaxError: expected ':'
>>> if 1
... pass
... elif 1:
... pass
... else:
... x() = 1
Traceback (most recent call last):
SyntaxError: expected ':'
>>> if 1:
... pass
... elif 1
... pass
... else:
... x() = 1
Traceback (most recent call last):
SyntaxError: expected ':'
>>> if 1:
... pass
... elif 1:
... pass
... else
... x() = 1
Traceback (most recent call last):
SyntaxError: expected ':'
>>> for x in range(10)
... pass
Traceback (most recent call last):
SyntaxError: expected ':'
>>> while True
... pass
Traceback (most recent call last):
SyntaxError: expected ':'
>>> with blech as something
... pass
Traceback (most recent call last):
SyntaxError: expected ':'
>>> with blech
... pass
Traceback (most recent call last):
SyntaxError: expected ':'
>>> with blech, block as something
... pass
Traceback (most recent call last):
SyntaxError: expected ':'
>>> with blech, block as something, bluch
... pass
Traceback (most recent call last):
SyntaxError: expected ':'
>>> with (blech as something)
... pass
Traceback (most recent call last):
SyntaxError: expected ':'
>>> with (blech)
... pass
Traceback (most recent call last):
SyntaxError: expected ':'
>>> with (blech, block as something)
... pass
Traceback (most recent call last):
SyntaxError: expected ':'
>>> with (blech, block as something, bluch)
... pass
Traceback (most recent call last):
SyntaxError: expected ':'
>>> try
... pass
Traceback (most recent call last):
SyntaxError: expected ':'
>>> try:
... pass
... except
... pass
Traceback (most recent call last):
SyntaxError: expected ':'
Make sure that the old "raise X, Y[, Z]" form is gone:
>>> raise X, Y
Traceback (most recent call last):
@ -992,7 +1093,7 @@ def func2():
finally:
pass
"""
self._check_error(code, "invalid syntax")
self._check_error(code, "expected ':'")
def test_invalid_line_continuation_left_recursive(self):
# Check bpo-42218: SyntaxErrors following left-recursive rules

View File

@ -0,0 +1 @@
Improve error message for missing ":" before blocks. Patch by Pablo Galindo.

File diff suppressed because it is too large Load Diff

View File

@ -782,7 +782,6 @@ _PyPegen_is_memoized(Parser *p, int type, void *pres)
return 0;
}
int
_PyPegen_lookahead_with_name(int positive, expr_ty (func)(Parser *), Parser *p)
{
@ -836,6 +835,28 @@ _PyPegen_expect_token(Parser *p, int type)
return t;
}
Token *
_PyPegen_expect_forced_token(Parser *p, int type, const char* expected) {
if (p->error_indicator == 1) {
return NULL;
}
if (p->mark == p->fill) {
if (_PyPegen_fill_token(p) < 0) {
p->error_indicator = 1;
return NULL;
}
}
Token *t = p->tokens[p->mark];
if (t->type != type) {
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(t, "expected '%s'", expected);
return NULL;
}
p->mark += 1;
return t;
}
expr_ty
_PyPegen_expect_soft_keyword(Parser *p, const char *keyword)
{

View File

@ -102,10 +102,7 @@ typedef struct {
arg_ty kwarg;
} StarEtc;
typedef struct {
operator_ty kind;
} AugOperator;
typedef struct { operator_ty kind; } AugOperator;
typedef struct {
void *element;
int is_keyword;
@ -118,12 +115,14 @@ int _PyPegen_insert_memo(Parser *p, int mark, int type, void *node);
int _PyPegen_update_memo(Parser *p, int mark, int type, void *node);
int _PyPegen_is_memoized(Parser *p, int type, void *pres);
int _PyPegen_lookahead_with_name(int, expr_ty (func)(Parser *), Parser *);
int _PyPegen_lookahead_with_int(int, Token *(func)(Parser *, int), Parser *, int);
int _PyPegen_lookahead_with_string(int , expr_ty (func)(Parser *, const char*), Parser *, const char*);
int _PyPegen_lookahead(int, void *(func)(Parser *), Parser *);
Token *_PyPegen_expect_token(Parser *p, int type);
Token *_PyPegen_expect_forced_token(Parser *p, int type, const char* expected);
expr_ty _PyPegen_expect_soft_keyword(Parser *p, const char *keyword);
Token *_PyPegen_get_last_nonnwhitespace_token(Parser *);
int _PyPegen_fill_token(Parser *p);

View File

@ -8,6 +8,7 @@ from pegen import grammar
from pegen.grammar import (
Alt,
Cut,
Forced,
Gather,
GrammarVisitor,
Group,
@ -252,6 +253,24 @@ class CCallMakerVisitor(GrammarVisitor):
def visit_NegativeLookahead(self, node: NegativeLookahead) -> FunctionCall:
return self.lookahead_call_helper(node, 0)
def visit_Forced(self, node: Forced) -> FunctionCall:
call = self.generate_call(node.node)
if call.nodetype == NodeTypes.GENERIC_TOKEN:
val = ast.literal_eval(node.node.value)
assert val in self.exact_tokens, f"{node.value} is not a known literal"
type = self.exact_tokens[val]
return FunctionCall(
assigned_variable="_literal",
function=f"_PyPegen_expect_forced_token",
arguments=["p", type, f'"{val}"'],
nodetype=NodeTypes.GENERIC_TOKEN,
return_type="Token *",
comment=f"forced_token='{val}'",
)
else:
raise NotImplementedError(
f"Forced tokens don't work with {call.nodetype} tokens")
def visit_Opt(self, node: Opt) -> FunctionCall:
call = self.generate_call(node.node)
return FunctionCall(

View File

@ -288,6 +288,23 @@ class NamedItem:
gen.callmakervisitor.visit(self.item)
class Forced:
def __init__(self, node: Plain):
self.node = node
def __str__(self) -> str:
return f"&&{self.node}"
def __iter__(self) -> Iterator[Plain]:
yield self.node
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
return True
def initial_names(self) -> AbstractSet[str]:
return set()
class Lookahead:
def __init__(self, node: Plain, sign: str):
self.node = node
@ -459,7 +476,7 @@ class Cut:
Plain = Union[Leaf, Group]
Item = Union[Plain, Opt, Repeat, Lookahead, Rhs, Cut]
Item = Union[Plain, Opt, Repeat, Forced, Lookahead, Rhs, Cut]
RuleName = Tuple[str, str]
MetaTuple = Tuple[str, Optional[str]]
MetaList = List[MetaTuple]

View File

@ -13,6 +13,7 @@ from ast import literal_eval
from pegen.grammar import (
Alt,
Cut,
Forced,
Gather,
Group,
Item,
@ -402,7 +403,7 @@ class GeneratedParser(Parser):
@memoize
def named_item(self) -> Optional[NamedItem]:
# named_item: NAME '[' NAME '*' ']' '=' ~ item | NAME '[' NAME ']' '=' ~ item | NAME '=' ~ item | item | lookahead
# named_item: NAME '[' NAME '*' ']' '=' ~ item | NAME '[' NAME ']' '=' ~ item | NAME '=' ~ item | item | forced_atom | lookahead
mark = self.mark()
cut = False
if (
@ -465,6 +466,13 @@ class GeneratedParser(Parser):
self.reset(mark)
if cut: return None
cut = False
if (
(it := self.forced_atom())
):
return NamedItem ( None , it )
self.reset(mark)
if cut: return None
cut = False
if (
(it := self.lookahead())
):
@ -473,6 +481,25 @@ class GeneratedParser(Parser):
if cut: return None
return None
@memoize
def forced_atom(self) -> Optional[NamedItem]:
# forced_atom: '&' '&' ~ atom
mark = self.mark()
cut = False
if (
(literal := self.expect('&'))
and
(literal_1 := self.expect('&'))
and
(cut := True)
and
(atom := self.atom())
):
return Forced ( atom )
self.reset(mark)
if cut: return None
return None
@memoize
def lookahead(self) -> Optional[LookaheadOrCut]:
# lookahead: '&' ~ atom | '!' ~ atom | '~'

View File

@ -4,6 +4,7 @@ from ast import literal_eval
from pegen.grammar import (
Alt,
Cut,
Forced,
Gather,
Group,
Item,
@ -87,8 +88,12 @@ named_item[NamedItem]:
| NAME '[' type=NAME ']' '=' ~ item {NamedItem(name.string, item, type.string)}
| NAME '=' ~ item {NamedItem(name.string, item)}
| item {NamedItem(None, item)}
| it=forced_atom {NamedItem(None, it)}
| it=lookahead {NamedItem(None, it)}
forced_atom[NamedItem]:
| '&''&' ~ atom {Forced(atom)}
lookahead[LookaheadOrCut]:
| '&' ~ atom {PositiveLookahead(atom)}
| '!' ~ atom {NegativeLookahead(atom)}