mirror of https://github.com/python/cpython
bpo-40661: Fix segfault when parsing invalid input (GH-20165)
Fix segfaults when parsing very complex invalid input, like `import äˆ ð£„¯ð¢·žð±‹á”€ð””ð‘©±å®ä±¬ð©¾\n𗶽`. Co-authored-by: Guido van Rossum <guido@python.org> Co-authored-by: Pablo Galindo <pablogsal@gmail.com>
This commit is contained in:
parent
08b47c367a
commit
7b7a21bc4f
|
@ -591,6 +591,7 @@ FAIL_TEST_CASES = [
|
|||
("f-string_single_closing_brace", "f'}'"),
|
||||
("from_import_invalid", "from import import a"),
|
||||
("from_import_trailing_comma", "from a import b,"),
|
||||
("import_non_ascii_syntax_error", "import ä £"),
|
||||
# This test case checks error paths involving tokens with uninitialized
|
||||
# values of col_offset and end_col_offset.
|
||||
("invalid indentation",
|
||||
|
|
|
@ -659,6 +659,9 @@ Corner-cases that used to crash:
|
|||
Traceback (most recent call last):
|
||||
SyntaxError: cannot assign to __debug__
|
||||
|
||||
>>> import ä £
|
||||
Traceback (most recent call last):
|
||||
SyntaxError: invalid character '£' (U+00A3)
|
||||
"""
|
||||
|
||||
import re
|
||||
|
|
1590
Parser/pegen/parse.c
1590
Parser/pegen/parse.c
File diff suppressed because it is too large
Load Diff
|
@ -433,6 +433,12 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
|
|||
self.print("int _end_col_offset = _token->end_col_offset;")
|
||||
self.print("UNUSED(_end_col_offset); // Only used by EXTRA macro")
|
||||
|
||||
def _check_for_errors(self) -> None:
|
||||
self.print("if (p->error_indicator) {")
|
||||
with self.indent():
|
||||
self.print("return NULL;")
|
||||
self.print("}")
|
||||
|
||||
def _set_up_rule_memoization(self, node: Rule, result_type: str) -> None:
|
||||
self.print("{")
|
||||
with self.indent():
|
||||
|
@ -468,10 +474,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
|
|||
memoize = self._should_memoize(node)
|
||||
|
||||
with self.indent():
|
||||
self.print("if (p->error_indicator) {")
|
||||
with self.indent():
|
||||
self.print("return NULL;")
|
||||
self.print("}")
|
||||
self._check_for_errors()
|
||||
self.print(f"{result_type} _res = NULL;")
|
||||
if memoize:
|
||||
self.print(f"if (_PyPegen_is_memoized(p, {node.name}_type, &_res))")
|
||||
|
@ -500,10 +503,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
|
|||
is_repeat1 = node.name.startswith("_loop1")
|
||||
|
||||
with self.indent():
|
||||
self.print("if (p->error_indicator) {")
|
||||
with self.indent():
|
||||
self.print("return NULL;")
|
||||
self.print("}")
|
||||
self._check_for_errors()
|
||||
self.print("void *_res = NULL;")
|
||||
if memoize:
|
||||
self.print(f"if (_PyPegen_is_memoized(p, {node.name}_type, &_res))")
|
||||
|
@ -687,6 +687,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
|
|||
) -> None:
|
||||
self.print(f"{{ // {node}")
|
||||
with self.indent():
|
||||
self._check_for_errors()
|
||||
# Prepare variable declarations for the alternative
|
||||
vars = self.collect_vars(node)
|
||||
for v, var_type in sorted(item for item in vars.items() if item[0] is not None):
|
||||
|
|
Loading…
Reference in New Issue