From cfcb952e30e01d7cce430829af8edc7afc94e0b1 Mon Sep 17 00:00:00 2001 From: Lysandros Nikolaou Date: Sat, 31 Oct 2020 21:06:03 +0200 Subject: [PATCH] [3.9] bpo-42218: Correctly handle errors in left-recursive rules (GH-23065) (GH-23066) Left-recursive rules need to check for errors explicitly, since even if the rule returns NULL, the parsing might continue and lead to long-distance failures. Co-authored-by: Pablo Galindo (cherry picked from commit 02cdfc93f82fecdb7eae97a868d4ee222b9875d9) Automerge-Triggered-By: GH:lysnikolaou --- Lib/test/test_syntax.py | 8 ++++++++ .../2020-10-31-17-50-23.bpo-42218.Dp_Z3v.rst | 3 +++ Parser/pegen/parse.c | 18 ++++++++++++++++++ Tools/peg_generator/pegen/c_generator.py | 3 +++ 4 files changed, 32 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2020-10-31-17-50-23.bpo-42218.Dp_Z3v.rst diff --git a/Lib/test/test_syntax.py b/Lib/test/test_syntax.py index a95992d869e..1336231fbbf 100644 --- a/Lib/test/test_syntax.py +++ b/Lib/test/test_syntax.py @@ -964,6 +964,14 @@ def func2(): """ self._check_error(code, "invalid syntax") + def test_invalid_line_continuation_left_recursive(self): + # Check bpo-42218: SyntaxErrors following left-recursive rules + # (t_primary_raw in this case) need to be tested explicitly + self._check_error("A.\u018a\\ ", + "unexpected character after line continuation character") + self._check_error("A.\u03bc\\\n", + "unexpected EOF while parsing") + def test_main(): support.run_unittest(SyntaxTestCase) from test import test_syntax diff --git a/Misc/NEWS.d/next/Core and Builtins/2020-10-31-17-50-23.bpo-42218.Dp_Z3v.rst b/Misc/NEWS.d/next/Core and Builtins/2020-10-31-17-50-23.bpo-42218.Dp_Z3v.rst new file mode 100644 index 00000000000..a38a310e4b4 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2020-10-31-17-50-23.bpo-42218.Dp_Z3v.rst @@ -0,0 +1,3 @@ +Fixed a bug in the PEG parser that was causing crashes in debug mode. Now errors are checked +in left-recursive rules to avoid cases where such errors do not get handled in time and appear +as long-distance crashes in other places. diff --git a/Parser/pegen/parse.c b/Parser/pegen/parse.c index bae9463e274..97cefa9c2a3 100644 --- a/Parser/pegen/parse.c +++ b/Parser/pegen/parse.c @@ -3460,6 +3460,8 @@ dotted_name_rule(Parser *p) } p->mark = _mark; void *_raw = dotted_name_raw(p); + if (p->error_indicator) + return NULL; if (_raw == NULL || p->mark <= _resmark) break; _resmark = p->mark; @@ -9044,6 +9046,8 @@ bitwise_or_rule(Parser *p) } p->mark = _mark; void *_raw = bitwise_or_raw(p); + if (p->error_indicator) + return NULL; if (_raw == NULL || p->mark <= _resmark) break; _resmark = p->mark; @@ -9158,6 +9162,8 @@ bitwise_xor_rule(Parser *p) } p->mark = _mark; void *_raw = bitwise_xor_raw(p); + if (p->error_indicator) + return NULL; if (_raw == NULL || p->mark <= _resmark) break; _resmark = p->mark; @@ -9272,6 +9278,8 @@ bitwise_and_rule(Parser *p) } p->mark = _mark; void *_raw = bitwise_and_raw(p); + if (p->error_indicator) + return NULL; if (_raw == NULL || p->mark <= _resmark) break; _resmark = p->mark; @@ -9386,6 +9394,8 @@ shift_expr_rule(Parser *p) } p->mark = _mark; void *_raw = shift_expr_raw(p); + if (p->error_indicator) + return NULL; if (_raw == NULL || p->mark <= _resmark) break; _resmark = p->mark; @@ -9539,6 +9549,8 @@ sum_rule(Parser *p) } p->mark = _mark; void *_raw = sum_raw(p); + if (p->error_indicator) + return NULL; if (_raw == NULL || p->mark <= _resmark) break; _resmark = p->mark; @@ -9698,6 +9710,8 @@ term_rule(Parser *p) } p->mark = _mark; void *_raw = term_raw(p); + if (p->error_indicator) + return NULL; if (_raw == NULL || p->mark <= _resmark) break; _resmark = p->mark; @@ -10302,6 +10316,8 @@ primary_rule(Parser *p) } p->mark = _mark; void *_raw = primary_raw(p); + if (p->error_indicator) + return NULL; if (_raw == NULL || p->mark <= _resmark) break; _resmark = p->mark; @@ -13962,6 +13978,8 @@ t_primary_rule(Parser *p) } p->mark = _mark; void *_raw = t_primary_raw(p); + if (p->error_indicator) + return NULL; if (_raw == NULL || p->mark <= _resmark) break; _resmark = p->mark; diff --git a/Tools/peg_generator/pegen/c_generator.py b/Tools/peg_generator/pegen/c_generator.py index d0abc12b402..b4d6a0bab51 100644 --- a/Tools/peg_generator/pegen/c_generator.py +++ b/Tools/peg_generator/pegen/c_generator.py @@ -496,6 +496,9 @@ class CParserGenerator(ParserGenerator, GrammarVisitor): ) self.print("p->mark = _mark;") self.print(f"void *_raw = {node.name}_raw(p);") + self.print("if (p->error_indicator)") + with self.indent(): + self.print("return NULL;") self.print("if (_raw == NULL || p->mark <= _resmark)") with self.indent(): self.print("break;")