bpo-42218: Correctly handle errors in left-recursive rules (GH-23065)

Left-recursive rules need to check for errors explicitly, since
even if the rule returns NULL, the parsing might continue and lead
to long-distance failures.

Co-authored-by: Pablo Galindo <Pablogsal@gmail.com>
This commit is contained in:
Lysandros Nikolaou 2020-10-31 20:31:41 +02:00 committed by GitHub
parent d21cb2d5ee
commit 02cdfc93f8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 32 additions and 0 deletions

View File

@ -972,6 +972,14 @@ def func2():
""" """
self._check_error(code, "invalid syntax") self._check_error(code, "invalid syntax")
def test_invalid_line_continuation_left_recursive(self):
# Check bpo-42218: SyntaxErrors following left-recursive rules
# (t_primary_raw in this case) need to be tested explicitly
self._check_error("A.\u018a\\ ",
"unexpected character after line continuation character")
self._check_error("A.\u03bc\\\n",
"unexpected EOF while parsing")
def test_main(): def test_main():
support.run_unittest(SyntaxTestCase) support.run_unittest(SyntaxTestCase)
from test import test_syntax from test import test_syntax

View File

@ -0,0 +1,3 @@
Fixed a bug in the PEG parser that was causing crashes in debug mode. Now errors are checked
in left-recursive rules to avoid cases where such errors do not get handled in time and appear
as long-distance crashes in other places.

View File

@ -3461,6 +3461,8 @@ dotted_name_rule(Parser *p)
} }
p->mark = _mark; p->mark = _mark;
void *_raw = dotted_name_raw(p); void *_raw = dotted_name_raw(p);
if (p->error_indicator)
return NULL;
if (_raw == NULL || p->mark <= _resmark) if (_raw == NULL || p->mark <= _resmark)
break; break;
_resmark = p->mark; _resmark = p->mark;
@ -9045,6 +9047,8 @@ bitwise_or_rule(Parser *p)
} }
p->mark = _mark; p->mark = _mark;
void *_raw = bitwise_or_raw(p); void *_raw = bitwise_or_raw(p);
if (p->error_indicator)
return NULL;
if (_raw == NULL || p->mark <= _resmark) if (_raw == NULL || p->mark <= _resmark)
break; break;
_resmark = p->mark; _resmark = p->mark;
@ -9159,6 +9163,8 @@ bitwise_xor_rule(Parser *p)
} }
p->mark = _mark; p->mark = _mark;
void *_raw = bitwise_xor_raw(p); void *_raw = bitwise_xor_raw(p);
if (p->error_indicator)
return NULL;
if (_raw == NULL || p->mark <= _resmark) if (_raw == NULL || p->mark <= _resmark)
break; break;
_resmark = p->mark; _resmark = p->mark;
@ -9273,6 +9279,8 @@ bitwise_and_rule(Parser *p)
} }
p->mark = _mark; p->mark = _mark;
void *_raw = bitwise_and_raw(p); void *_raw = bitwise_and_raw(p);
if (p->error_indicator)
return NULL;
if (_raw == NULL || p->mark <= _resmark) if (_raw == NULL || p->mark <= _resmark)
break; break;
_resmark = p->mark; _resmark = p->mark;
@ -9387,6 +9395,8 @@ shift_expr_rule(Parser *p)
} }
p->mark = _mark; p->mark = _mark;
void *_raw = shift_expr_raw(p); void *_raw = shift_expr_raw(p);
if (p->error_indicator)
return NULL;
if (_raw == NULL || p->mark <= _resmark) if (_raw == NULL || p->mark <= _resmark)
break; break;
_resmark = p->mark; _resmark = p->mark;
@ -9540,6 +9550,8 @@ sum_rule(Parser *p)
} }
p->mark = _mark; p->mark = _mark;
void *_raw = sum_raw(p); void *_raw = sum_raw(p);
if (p->error_indicator)
return NULL;
if (_raw == NULL || p->mark <= _resmark) if (_raw == NULL || p->mark <= _resmark)
break; break;
_resmark = p->mark; _resmark = p->mark;
@ -9699,6 +9711,8 @@ term_rule(Parser *p)
} }
p->mark = _mark; p->mark = _mark;
void *_raw = term_raw(p); void *_raw = term_raw(p);
if (p->error_indicator)
return NULL;
if (_raw == NULL || p->mark <= _resmark) if (_raw == NULL || p->mark <= _resmark)
break; break;
_resmark = p->mark; _resmark = p->mark;
@ -10303,6 +10317,8 @@ primary_rule(Parser *p)
} }
p->mark = _mark; p->mark = _mark;
void *_raw = primary_raw(p); void *_raw = primary_raw(p);
if (p->error_indicator)
return NULL;
if (_raw == NULL || p->mark <= _resmark) if (_raw == NULL || p->mark <= _resmark)
break; break;
_resmark = p->mark; _resmark = p->mark;
@ -13943,6 +13959,8 @@ t_primary_rule(Parser *p)
} }
p->mark = _mark; p->mark = _mark;
void *_raw = t_primary_raw(p); void *_raw = t_primary_raw(p);
if (p->error_indicator)
return NULL;
if (_raw == NULL || p->mark <= _resmark) if (_raw == NULL || p->mark <= _resmark)
break; break;
_resmark = p->mark; _resmark = p->mark;

View File

@ -502,6 +502,9 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
) )
self.print("p->mark = _mark;") self.print("p->mark = _mark;")
self.print(f"void *_raw = {node.name}_raw(p);") self.print(f"void *_raw = {node.name}_raw(p);")
self.print("if (p->error_indicator)")
with self.indent():
self.print("return NULL;")
self.print("if (_raw == NULL || p->mark <= _resmark)") self.print("if (_raw == NULL || p->mark <= _resmark)")
with self.indent(): with self.indent():
self.print("break;") self.print("break;")