mirror of https://github.com/python/cpython
bpo-40661: Fix segfault when parsing invalid input (GH-20165)
Fix segfaults when parsing very complex invalid input, like `import äˆ ð£„¯ð¢·žð±‹á”€ð””ð‘©±å®ä±¬ð©¾\n𗶽`. Co-authored-by: Guido van Rossum <guido@python.org> Co-authored-by: Pablo Galindo <pablogsal@gmail.com>
This commit is contained in:
parent
08b47c367a
commit
7b7a21bc4f
|
@ -591,6 +591,7 @@ FAIL_TEST_CASES = [
|
||||||
("f-string_single_closing_brace", "f'}'"),
|
("f-string_single_closing_brace", "f'}'"),
|
||||||
("from_import_invalid", "from import import a"),
|
("from_import_invalid", "from import import a"),
|
||||||
("from_import_trailing_comma", "from a import b,"),
|
("from_import_trailing_comma", "from a import b,"),
|
||||||
|
("import_non_ascii_syntax_error", "import ä £"),
|
||||||
# This test case checks error paths involving tokens with uninitialized
|
# This test case checks error paths involving tokens with uninitialized
|
||||||
# values of col_offset and end_col_offset.
|
# values of col_offset and end_col_offset.
|
||||||
("invalid indentation",
|
("invalid indentation",
|
||||||
|
|
|
@ -659,6 +659,9 @@ Corner-cases that used to crash:
|
||||||
Traceback (most recent call last):
|
Traceback (most recent call last):
|
||||||
SyntaxError: cannot assign to __debug__
|
SyntaxError: cannot assign to __debug__
|
||||||
|
|
||||||
|
>>> import ä £
|
||||||
|
Traceback (most recent call last):
|
||||||
|
SyntaxError: invalid character '£' (U+00A3)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
1590
Parser/pegen/parse.c
1590
Parser/pegen/parse.c
File diff suppressed because it is too large
Load Diff
|
@ -433,6 +433,12 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
|
||||||
self.print("int _end_col_offset = _token->end_col_offset;")
|
self.print("int _end_col_offset = _token->end_col_offset;")
|
||||||
self.print("UNUSED(_end_col_offset); // Only used by EXTRA macro")
|
self.print("UNUSED(_end_col_offset); // Only used by EXTRA macro")
|
||||||
|
|
||||||
|
def _check_for_errors(self) -> None:
|
||||||
|
self.print("if (p->error_indicator) {")
|
||||||
|
with self.indent():
|
||||||
|
self.print("return NULL;")
|
||||||
|
self.print("}")
|
||||||
|
|
||||||
def _set_up_rule_memoization(self, node: Rule, result_type: str) -> None:
|
def _set_up_rule_memoization(self, node: Rule, result_type: str) -> None:
|
||||||
self.print("{")
|
self.print("{")
|
||||||
with self.indent():
|
with self.indent():
|
||||||
|
@ -468,10 +474,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
|
||||||
memoize = self._should_memoize(node)
|
memoize = self._should_memoize(node)
|
||||||
|
|
||||||
with self.indent():
|
with self.indent():
|
||||||
self.print("if (p->error_indicator) {")
|
self._check_for_errors()
|
||||||
with self.indent():
|
|
||||||
self.print("return NULL;")
|
|
||||||
self.print("}")
|
|
||||||
self.print(f"{result_type} _res = NULL;")
|
self.print(f"{result_type} _res = NULL;")
|
||||||
if memoize:
|
if memoize:
|
||||||
self.print(f"if (_PyPegen_is_memoized(p, {node.name}_type, &_res))")
|
self.print(f"if (_PyPegen_is_memoized(p, {node.name}_type, &_res))")
|
||||||
|
@ -500,10 +503,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
|
||||||
is_repeat1 = node.name.startswith("_loop1")
|
is_repeat1 = node.name.startswith("_loop1")
|
||||||
|
|
||||||
with self.indent():
|
with self.indent():
|
||||||
self.print("if (p->error_indicator) {")
|
self._check_for_errors()
|
||||||
with self.indent():
|
|
||||||
self.print("return NULL;")
|
|
||||||
self.print("}")
|
|
||||||
self.print("void *_res = NULL;")
|
self.print("void *_res = NULL;")
|
||||||
if memoize:
|
if memoize:
|
||||||
self.print(f"if (_PyPegen_is_memoized(p, {node.name}_type, &_res))")
|
self.print(f"if (_PyPegen_is_memoized(p, {node.name}_type, &_res))")
|
||||||
|
@ -687,6 +687,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
|
||||||
) -> None:
|
) -> None:
|
||||||
self.print(f"{{ // {node}")
|
self.print(f"{{ // {node}")
|
||||||
with self.indent():
|
with self.indent():
|
||||||
|
self._check_for_errors()
|
||||||
# Prepare variable declarations for the alternative
|
# Prepare variable declarations for the alternative
|
||||||
vars = self.collect_vars(node)
|
vars = self.collect_vars(node)
|
||||||
for v, var_type in sorted(item for item in vars.items() if item[0] is not None):
|
for v, var_type in sorted(item for item in vars.items() if item[0] is not None):
|
||||||
|
|
Loading…
Reference in New Issue