bpo-40661: Fix segfault when parsing invalid input (GH-20165)

Fix segfaults when parsing very complex invalid input, like `import äˆ ð£„¯ð¢·žð±‹á”€ð””ð‘©±å®ä±¬ð©¾\n𗶽`.

Co-authored-by: Guido van Rossum <guido@python.org>
Co-authored-by: Pablo Galindo <pablogsal@gmail.com>
This commit is contained in:
Lysandros Nikolaou 2020-05-18 20:32:03 +03:00 committed by GitHub
parent 08b47c367a
commit 7b7a21bc4f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 1603 additions and 8 deletions

View File

@ -591,6 +591,7 @@ FAIL_TEST_CASES = [
("f-string_single_closing_brace", "f'}'"), ("f-string_single_closing_brace", "f'}'"),
("from_import_invalid", "from import import a"), ("from_import_invalid", "from import import a"),
("from_import_trailing_comma", "from a import b,"), ("from_import_trailing_comma", "from a import b,"),
("import_non_ascii_syntax_error", "import ä £"),
# This test case checks error paths involving tokens with uninitialized # This test case checks error paths involving tokens with uninitialized
# values of col_offset and end_col_offset. # values of col_offset and end_col_offset.
("invalid indentation", ("invalid indentation",

View File

@ -659,6 +659,9 @@ Corner-cases that used to crash:
Traceback (most recent call last): Traceback (most recent call last):
SyntaxError: cannot assign to __debug__ SyntaxError: cannot assign to __debug__
>>> import ä £
Traceback (most recent call last):
SyntaxError: invalid character '£' (U+00A3)
""" """
import re import re

File diff suppressed because it is too large Load Diff

View File

@ -433,6 +433,12 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
self.print("int _end_col_offset = _token->end_col_offset;") self.print("int _end_col_offset = _token->end_col_offset;")
self.print("UNUSED(_end_col_offset); // Only used by EXTRA macro") self.print("UNUSED(_end_col_offset); // Only used by EXTRA macro")
def _check_for_errors(self) -> None:
self.print("if (p->error_indicator) {")
with self.indent():
self.print("return NULL;")
self.print("}")
def _set_up_rule_memoization(self, node: Rule, result_type: str) -> None: def _set_up_rule_memoization(self, node: Rule, result_type: str) -> None:
self.print("{") self.print("{")
with self.indent(): with self.indent():
@ -468,10 +474,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
memoize = self._should_memoize(node) memoize = self._should_memoize(node)
with self.indent(): with self.indent():
self.print("if (p->error_indicator) {") self._check_for_errors()
with self.indent():
self.print("return NULL;")
self.print("}")
self.print(f"{result_type} _res = NULL;") self.print(f"{result_type} _res = NULL;")
if memoize: if memoize:
self.print(f"if (_PyPegen_is_memoized(p, {node.name}_type, &_res))") self.print(f"if (_PyPegen_is_memoized(p, {node.name}_type, &_res))")
@ -500,10 +503,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
is_repeat1 = node.name.startswith("_loop1") is_repeat1 = node.name.startswith("_loop1")
with self.indent(): with self.indent():
self.print("if (p->error_indicator) {") self._check_for_errors()
with self.indent():
self.print("return NULL;")
self.print("}")
self.print("void *_res = NULL;") self.print("void *_res = NULL;")
if memoize: if memoize:
self.print(f"if (_PyPegen_is_memoized(p, {node.name}_type, &_res))") self.print(f"if (_PyPegen_is_memoized(p, {node.name}_type, &_res))")
@ -687,6 +687,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
) -> None: ) -> None:
self.print(f"{{ // {node}") self.print(f"{{ // {node}")
with self.indent(): with self.indent():
self._check_for_errors()
# Prepare variable declarations for the alternative # Prepare variable declarations for the alternative
vars = self.collect_vars(node) vars = self.collect_vars(node)
for v, var_type in sorted(item for item in vars.items() if item[0] is not None): for v, var_type in sorted(item for item in vars.items() if item[0] is not None):