mirror of https://github.com/python/cpython
bpo-42997: Improve error message for missing : before suites (GH-24292)
* Add to the peg generator a new directive ('&&') that allows to expect a token and hard fail the parsing if the token is not found. This allows to quickly emmit syntax errors for missing tokens. * Use the new grammar element to hard-fail if the ':' is missing before suites.
This commit is contained in:
parent
802b645e81
commit
58fb156edd
|
@ -27,6 +27,12 @@ class PEGLexer(RegexLexer):
|
|||
tokens = {
|
||||
"ws": [(r"\n", Text), (r"\s+", Text), (r"#.*$", Comment.Singleline),],
|
||||
"lookaheads": [
|
||||
# Forced tokens
|
||||
(r"(&&)(?=\w+\s?)", bygroups(None)),
|
||||
(r"(&&)(?='.+'\s?)", bygroups(None)),
|
||||
(r'(&&)(?=".+"\s?)', bygroups(None)),
|
||||
(r"(&&)(?=\(.+\)\s?)", bygroups(None)),
|
||||
|
||||
(r"(?<=\|\s)(&\w+\s?)", bygroups(None)),
|
||||
(r"(?<=\|\s)(&'.+'\s?)", bygroups(None)),
|
||||
(r'(?<=\|\s)(&".+"\s?)', bygroups(None)),
|
||||
|
|
|
@ -162,22 +162,22 @@ dotted_name[expr_ty]:
|
|||
| NAME
|
||||
|
||||
if_stmt[stmt_ty]:
|
||||
| 'if' a=named_expression ':' b=block c=elif_stmt {
|
||||
| 'if' a=named_expression &&':' b=block c=elif_stmt {
|
||||
_Py_If(a, b, CHECK(asdl_stmt_seq*, _PyPegen_singleton_seq(p, c)), EXTRA) }
|
||||
| 'if' a=named_expression ':' b=block c=[else_block] { _Py_If(a, b, c, EXTRA) }
|
||||
| 'if' a=named_expression &&':' b=block c=[else_block] { _Py_If(a, b, c, EXTRA) }
|
||||
elif_stmt[stmt_ty]:
|
||||
| 'elif' a=named_expression ':' b=block c=elif_stmt {
|
||||
| 'elif' a=named_expression &&':' b=block c=elif_stmt {
|
||||
_Py_If(a, b, CHECK(asdl_stmt_seq*, _PyPegen_singleton_seq(p, c)), EXTRA) }
|
||||
| 'elif' a=named_expression ':' b=block c=[else_block] { _Py_If(a, b, c, EXTRA) }
|
||||
else_block[asdl_stmt_seq*]: 'else' ':' b=block { b }
|
||||
| 'elif' a=named_expression &&':' b=block c=[else_block] { _Py_If(a, b, c, EXTRA) }
|
||||
else_block[asdl_stmt_seq*]: 'else' &&':' b=block { b }
|
||||
|
||||
while_stmt[stmt_ty]:
|
||||
| 'while' a=named_expression ':' b=block c=[else_block] { _Py_While(a, b, c, EXTRA) }
|
||||
| 'while' a=named_expression &&':' b=block c=[else_block] { _Py_While(a, b, c, EXTRA) }
|
||||
|
||||
for_stmt[stmt_ty]:
|
||||
| 'for' t=star_targets 'in' ~ ex=star_expressions ':' tc=[TYPE_COMMENT] b=block el=[else_block] {
|
||||
| 'for' t=star_targets 'in' ~ ex=star_expressions &&':' tc=[TYPE_COMMENT] b=block el=[else_block] {
|
||||
_Py_For(t, ex, b, el, NEW_TYPE_COMMENT(p, tc), EXTRA) }
|
||||
| ASYNC 'for' t=star_targets 'in' ~ ex=star_expressions ':' tc=[TYPE_COMMENT] b=block el=[else_block] {
|
||||
| ASYNC 'for' t=star_targets 'in' ~ ex=star_expressions &&':' tc=[TYPE_COMMENT] b=block el=[else_block] {
|
||||
CHECK_VERSION(stmt_ty, 5, "Async for loops are", _Py_AsyncFor(t, ex, b, el, NEW_TYPE_COMMENT(p, tc), EXTRA)) }
|
||||
| invalid_for_target
|
||||
|
||||
|
@ -190,18 +190,20 @@ with_stmt[stmt_ty]:
|
|||
CHECK_VERSION(stmt_ty, 5, "Async with statements are", _Py_AsyncWith(a, b, NULL, EXTRA)) }
|
||||
| ASYNC 'with' a[asdl_withitem_seq*]=','.with_item+ ':' tc=[TYPE_COMMENT] b=block {
|
||||
CHECK_VERSION(stmt_ty, 5, "Async with statements are", _Py_AsyncWith(a, b, NEW_TYPE_COMMENT(p, tc), EXTRA)) }
|
||||
| invalid_with_stmt
|
||||
|
||||
with_item[withitem_ty]:
|
||||
| e=expression 'as' t=star_target &(',' | ')' | ':') { _Py_withitem(e, t, p->arena) }
|
||||
| invalid_with_item
|
||||
| e=expression { _Py_withitem(e, NULL, p->arena) }
|
||||
|
||||
try_stmt[stmt_ty]:
|
||||
| 'try' ':' b=block f=finally_block { _Py_Try(b, NULL, NULL, f, EXTRA) }
|
||||
| 'try' ':' b=block ex[asdl_excepthandler_seq*]=except_block+ el=[else_block] f=[finally_block] { _Py_Try(b, ex, el, f, EXTRA) }
|
||||
| 'try' &&':' b=block f=finally_block { _Py_Try(b, NULL, NULL, f, EXTRA) }
|
||||
| 'try' &&':' b=block ex[asdl_excepthandler_seq*]=except_block+ el=[else_block] f=[finally_block] { _Py_Try(b, ex, el, f, EXTRA) }
|
||||
except_block[excepthandler_ty]:
|
||||
| 'except' e=expression t=['as' z=NAME { z }] ':' b=block {
|
||||
| 'except' e=expression t=['as' z=NAME { z }] &&':' b=block {
|
||||
_Py_ExceptHandler(e, (t) ? ((expr_ty) t)->v.Name.id : NULL, b, EXTRA) }
|
||||
| 'except' ':' b=block { _Py_ExceptHandler(NULL, NULL, b, EXTRA) }
|
||||
| 'except' &&':' b=block { _Py_ExceptHandler(NULL, NULL, b, EXTRA) }
|
||||
finally_block[asdl_stmt_seq*]: 'finally' ':' a=block { a }
|
||||
|
||||
return_stmt[stmt_ty]:
|
||||
|
@ -216,11 +218,11 @@ function_def[stmt_ty]:
|
|||
| function_def_raw
|
||||
|
||||
function_def_raw[stmt_ty]:
|
||||
| 'def' n=NAME '(' params=[params] ')' a=['->' z=expression { z }] ':' tc=[func_type_comment] b=block {
|
||||
| 'def' n=NAME '(' params=[params] ')' a=['->' z=expression { z }] &&':' tc=[func_type_comment] b=block {
|
||||
_Py_FunctionDef(n->v.Name.id,
|
||||
(params) ? params : CHECK(arguments_ty, _PyPegen_empty_arguments(p)),
|
||||
b, NULL, a, NEW_TYPE_COMMENT(p, tc), EXTRA) }
|
||||
| ASYNC 'def' n=NAME '(' params=[params] ')' a=['->' z=expression { z }] ':' tc=[func_type_comment] b=block {
|
||||
| ASYNC 'def' n=NAME '(' params=[params] ')' a=['->' z=expression { z }] &&':' tc=[func_type_comment] b=block {
|
||||
CHECK_VERSION(
|
||||
stmt_ty,
|
||||
5,
|
||||
|
@ -300,7 +302,7 @@ class_def[stmt_ty]:
|
|||
| a=decorators b=class_def_raw { _PyPegen_class_def_decorators(p, a, b) }
|
||||
| class_def_raw
|
||||
class_def_raw[stmt_ty]:
|
||||
| 'class' a=NAME b=['(' z=[arguments] ')' { z }] ':' c=block {
|
||||
| 'class' a=NAME b=['(' z=[arguments] ')' { z }] &&':' c=block {
|
||||
_Py_ClassDef(a->v.Name.id,
|
||||
(b) ? ((expr_ty) b)->v.Call.args : NULL,
|
||||
(b) ? ((expr_ty) b)->v.Call.keywords : NULL,
|
||||
|
@ -718,7 +720,7 @@ invalid_double_type_comments:
|
|||
| TYPE_COMMENT NEWLINE TYPE_COMMENT NEWLINE INDENT {
|
||||
RAISE_SYNTAX_ERROR("Cannot have two type comments on def") }
|
||||
invalid_with_item:
|
||||
| expression 'as' a=expression {
|
||||
| expression 'as' a=expression &(',' | ')' | ':') {
|
||||
RAISE_SYNTAX_ERROR_INVALID_TARGET(STAR_TARGETS, a) }
|
||||
|
||||
invalid_for_target:
|
||||
|
@ -731,3 +733,7 @@ invalid_group:
|
|||
invalid_import_from_targets:
|
||||
| import_from_as_names ',' {
|
||||
RAISE_SYNTAX_ERROR("trailing comma not allowed without surrounding parentheses") }
|
||||
|
||||
invalid_with_stmt:
|
||||
| [ASYNC] 'with' ','.(expression ['as' star_target])+ &&':'
|
||||
| [ASYNC] 'with' '(' ','.(expressions ['as' star_target])+ ','? ')' &&':'
|
||||
|
|
|
@ -229,7 +229,7 @@ SyntaxError: cannot assign to function call
|
|||
|
||||
>>> with a as b
|
||||
Traceback (most recent call last):
|
||||
SyntaxError: invalid syntax
|
||||
SyntaxError: expected ':'
|
||||
|
||||
>>> p = p =
|
||||
Traceback (most recent call last):
|
||||
|
@ -331,7 +331,7 @@ SyntaxError: Generator expression must be parenthesized
|
|||
>>> class C(x for x in L):
|
||||
... pass
|
||||
Traceback (most recent call last):
|
||||
SyntaxError: invalid syntax
|
||||
SyntaxError: expected ':'
|
||||
|
||||
>>> def g(*args, **kwargs):
|
||||
... print(args, sorted(kwargs.items()))
|
||||
|
@ -708,6 +708,107 @@ leading to spurious errors.
|
|||
...
|
||||
SyntaxError: cannot assign to function call
|
||||
|
||||
Missing ':' before suites:
|
||||
|
||||
>>> def f()
|
||||
... pass
|
||||
Traceback (most recent call last):
|
||||
SyntaxError: expected ':'
|
||||
|
||||
>>> class A
|
||||
... pass
|
||||
Traceback (most recent call last):
|
||||
SyntaxError: expected ':'
|
||||
|
||||
>>> if 1
|
||||
... pass
|
||||
... elif 1:
|
||||
... pass
|
||||
... else:
|
||||
... x() = 1
|
||||
Traceback (most recent call last):
|
||||
SyntaxError: expected ':'
|
||||
|
||||
>>> if 1:
|
||||
... pass
|
||||
... elif 1
|
||||
... pass
|
||||
... else:
|
||||
... x() = 1
|
||||
Traceback (most recent call last):
|
||||
SyntaxError: expected ':'
|
||||
|
||||
>>> if 1:
|
||||
... pass
|
||||
... elif 1:
|
||||
... pass
|
||||
... else
|
||||
... x() = 1
|
||||
Traceback (most recent call last):
|
||||
SyntaxError: expected ':'
|
||||
|
||||
>>> for x in range(10)
|
||||
... pass
|
||||
Traceback (most recent call last):
|
||||
SyntaxError: expected ':'
|
||||
|
||||
>>> while True
|
||||
... pass
|
||||
Traceback (most recent call last):
|
||||
SyntaxError: expected ':'
|
||||
|
||||
>>> with blech as something
|
||||
... pass
|
||||
Traceback (most recent call last):
|
||||
SyntaxError: expected ':'
|
||||
|
||||
>>> with blech
|
||||
... pass
|
||||
Traceback (most recent call last):
|
||||
SyntaxError: expected ':'
|
||||
|
||||
>>> with blech, block as something
|
||||
... pass
|
||||
Traceback (most recent call last):
|
||||
SyntaxError: expected ':'
|
||||
|
||||
>>> with blech, block as something, bluch
|
||||
... pass
|
||||
Traceback (most recent call last):
|
||||
SyntaxError: expected ':'
|
||||
|
||||
>>> with (blech as something)
|
||||
... pass
|
||||
Traceback (most recent call last):
|
||||
SyntaxError: expected ':'
|
||||
|
||||
>>> with (blech)
|
||||
... pass
|
||||
Traceback (most recent call last):
|
||||
SyntaxError: expected ':'
|
||||
|
||||
>>> with (blech, block as something)
|
||||
... pass
|
||||
Traceback (most recent call last):
|
||||
SyntaxError: expected ':'
|
||||
|
||||
>>> with (blech, block as something, bluch)
|
||||
... pass
|
||||
Traceback (most recent call last):
|
||||
SyntaxError: expected ':'
|
||||
|
||||
>>> try
|
||||
... pass
|
||||
Traceback (most recent call last):
|
||||
SyntaxError: expected ':'
|
||||
|
||||
>>> try:
|
||||
... pass
|
||||
... except
|
||||
... pass
|
||||
Traceback (most recent call last):
|
||||
SyntaxError: expected ':'
|
||||
|
||||
Make sure that the old "raise X, Y[, Z]" form is gone:
|
||||
>>> raise X, Y
|
||||
Traceback (most recent call last):
|
||||
|
@ -992,7 +1093,7 @@ def func2():
|
|||
finally:
|
||||
pass
|
||||
"""
|
||||
self._check_error(code, "invalid syntax")
|
||||
self._check_error(code, "expected ':'")
|
||||
|
||||
def test_invalid_line_continuation_left_recursive(self):
|
||||
# Check bpo-42218: SyntaxErrors following left-recursive rules
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Improve error message for missing ":" before blocks. Patch by Pablo Galindo.
|
1393
Parser/parser.c
1393
Parser/parser.c
File diff suppressed because it is too large
Load Diff
|
@ -782,7 +782,6 @@ _PyPegen_is_memoized(Parser *p, int type, void *pres)
|
|||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
_PyPegen_lookahead_with_name(int positive, expr_ty (func)(Parser *), Parser *p)
|
||||
{
|
||||
|
@ -836,6 +835,28 @@ _PyPegen_expect_token(Parser *p, int type)
|
|||
return t;
|
||||
}
|
||||
|
||||
Token *
|
||||
_PyPegen_expect_forced_token(Parser *p, int type, const char* expected) {
|
||||
|
||||
if (p->error_indicator == 1) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (p->mark == p->fill) {
|
||||
if (_PyPegen_fill_token(p) < 0) {
|
||||
p->error_indicator = 1;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
Token *t = p->tokens[p->mark];
|
||||
if (t->type != type) {
|
||||
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(t, "expected '%s'", expected);
|
||||
return NULL;
|
||||
}
|
||||
p->mark += 1;
|
||||
return t;
|
||||
}
|
||||
|
||||
expr_ty
|
||||
_PyPegen_expect_soft_keyword(Parser *p, const char *keyword)
|
||||
{
|
||||
|
|
|
@ -102,10 +102,7 @@ typedef struct {
|
|||
arg_ty kwarg;
|
||||
} StarEtc;
|
||||
|
||||
typedef struct {
|
||||
operator_ty kind;
|
||||
} AugOperator;
|
||||
|
||||
typedef struct { operator_ty kind; } AugOperator;
|
||||
typedef struct {
|
||||
void *element;
|
||||
int is_keyword;
|
||||
|
@ -118,12 +115,14 @@ int _PyPegen_insert_memo(Parser *p, int mark, int type, void *node);
|
|||
int _PyPegen_update_memo(Parser *p, int mark, int type, void *node);
|
||||
int _PyPegen_is_memoized(Parser *p, int type, void *pres);
|
||||
|
||||
|
||||
int _PyPegen_lookahead_with_name(int, expr_ty (func)(Parser *), Parser *);
|
||||
int _PyPegen_lookahead_with_int(int, Token *(func)(Parser *, int), Parser *, int);
|
||||
int _PyPegen_lookahead_with_string(int , expr_ty (func)(Parser *, const char*), Parser *, const char*);
|
||||
int _PyPegen_lookahead(int, void *(func)(Parser *), Parser *);
|
||||
|
||||
Token *_PyPegen_expect_token(Parser *p, int type);
|
||||
Token *_PyPegen_expect_forced_token(Parser *p, int type, const char* expected);
|
||||
expr_ty _PyPegen_expect_soft_keyword(Parser *p, const char *keyword);
|
||||
Token *_PyPegen_get_last_nonnwhitespace_token(Parser *);
|
||||
int _PyPegen_fill_token(Parser *p);
|
||||
|
|
|
@ -8,6 +8,7 @@ from pegen import grammar
|
|||
from pegen.grammar import (
|
||||
Alt,
|
||||
Cut,
|
||||
Forced,
|
||||
Gather,
|
||||
GrammarVisitor,
|
||||
Group,
|
||||
|
@ -252,6 +253,24 @@ class CCallMakerVisitor(GrammarVisitor):
|
|||
def visit_NegativeLookahead(self, node: NegativeLookahead) -> FunctionCall:
|
||||
return self.lookahead_call_helper(node, 0)
|
||||
|
||||
def visit_Forced(self, node: Forced) -> FunctionCall:
|
||||
call = self.generate_call(node.node)
|
||||
if call.nodetype == NodeTypes.GENERIC_TOKEN:
|
||||
val = ast.literal_eval(node.node.value)
|
||||
assert val in self.exact_tokens, f"{node.value} is not a known literal"
|
||||
type = self.exact_tokens[val]
|
||||
return FunctionCall(
|
||||
assigned_variable="_literal",
|
||||
function=f"_PyPegen_expect_forced_token",
|
||||
arguments=["p", type, f'"{val}"'],
|
||||
nodetype=NodeTypes.GENERIC_TOKEN,
|
||||
return_type="Token *",
|
||||
comment=f"forced_token='{val}'",
|
||||
)
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
f"Forced tokens don't work with {call.nodetype} tokens")
|
||||
|
||||
def visit_Opt(self, node: Opt) -> FunctionCall:
|
||||
call = self.generate_call(node.node)
|
||||
return FunctionCall(
|
||||
|
|
|
@ -288,6 +288,23 @@ class NamedItem:
|
|||
gen.callmakervisitor.visit(self.item)
|
||||
|
||||
|
||||
class Forced:
|
||||
def __init__(self, node: Plain):
|
||||
self.node = node
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"&&{self.node}"
|
||||
|
||||
def __iter__(self) -> Iterator[Plain]:
|
||||
yield self.node
|
||||
|
||||
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
|
||||
return True
|
||||
|
||||
def initial_names(self) -> AbstractSet[str]:
|
||||
return set()
|
||||
|
||||
|
||||
class Lookahead:
|
||||
def __init__(self, node: Plain, sign: str):
|
||||
self.node = node
|
||||
|
@ -459,7 +476,7 @@ class Cut:
|
|||
|
||||
|
||||
Plain = Union[Leaf, Group]
|
||||
Item = Union[Plain, Opt, Repeat, Lookahead, Rhs, Cut]
|
||||
Item = Union[Plain, Opt, Repeat, Forced, Lookahead, Rhs, Cut]
|
||||
RuleName = Tuple[str, str]
|
||||
MetaTuple = Tuple[str, Optional[str]]
|
||||
MetaList = List[MetaTuple]
|
||||
|
|
|
@ -13,6 +13,7 @@ from ast import literal_eval
|
|||
from pegen.grammar import (
|
||||
Alt,
|
||||
Cut,
|
||||
Forced,
|
||||
Gather,
|
||||
Group,
|
||||
Item,
|
||||
|
@ -402,7 +403,7 @@ class GeneratedParser(Parser):
|
|||
|
||||
@memoize
|
||||
def named_item(self) -> Optional[NamedItem]:
|
||||
# named_item: NAME '[' NAME '*' ']' '=' ~ item | NAME '[' NAME ']' '=' ~ item | NAME '=' ~ item | item | lookahead
|
||||
# named_item: NAME '[' NAME '*' ']' '=' ~ item | NAME '[' NAME ']' '=' ~ item | NAME '=' ~ item | item | forced_atom | lookahead
|
||||
mark = self.mark()
|
||||
cut = False
|
||||
if (
|
||||
|
@ -465,6 +466,13 @@ class GeneratedParser(Parser):
|
|||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
if (
|
||||
(it := self.forced_atom())
|
||||
):
|
||||
return NamedItem ( None , it )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
cut = False
|
||||
if (
|
||||
(it := self.lookahead())
|
||||
):
|
||||
|
@ -473,6 +481,25 @@ class GeneratedParser(Parser):
|
|||
if cut: return None
|
||||
return None
|
||||
|
||||
@memoize
|
||||
def forced_atom(self) -> Optional[NamedItem]:
|
||||
# forced_atom: '&' '&' ~ atom
|
||||
mark = self.mark()
|
||||
cut = False
|
||||
if (
|
||||
(literal := self.expect('&'))
|
||||
and
|
||||
(literal_1 := self.expect('&'))
|
||||
and
|
||||
(cut := True)
|
||||
and
|
||||
(atom := self.atom())
|
||||
):
|
||||
return Forced ( atom )
|
||||
self.reset(mark)
|
||||
if cut: return None
|
||||
return None
|
||||
|
||||
@memoize
|
||||
def lookahead(self) -> Optional[LookaheadOrCut]:
|
||||
# lookahead: '&' ~ atom | '!' ~ atom | '~'
|
||||
|
|
|
@ -4,6 +4,7 @@ from ast import literal_eval
|
|||
from pegen.grammar import (
|
||||
Alt,
|
||||
Cut,
|
||||
Forced,
|
||||
Gather,
|
||||
Group,
|
||||
Item,
|
||||
|
@ -87,8 +88,12 @@ named_item[NamedItem]:
|
|||
| NAME '[' type=NAME ']' '=' ~ item {NamedItem(name.string, item, type.string)}
|
||||
| NAME '=' ~ item {NamedItem(name.string, item)}
|
||||
| item {NamedItem(None, item)}
|
||||
| it=forced_atom {NamedItem(None, it)}
|
||||
| it=lookahead {NamedItem(None, it)}
|
||||
|
||||
forced_atom[NamedItem]:
|
||||
| '&''&' ~ atom {Forced(atom)}
|
||||
|
||||
lookahead[LookaheadOrCut]:
|
||||
| '&' ~ atom {PositiveLookahead(atom)}
|
||||
| '!' ~ atom {NegativeLookahead(atom)}
|
||||
|
|
Loading…
Reference in New Issue