bpo-45811: Improve error message when source code contains invisible control characters (GH-29654)

This commit is contained in:
Pablo Galindo Salgado 2021-11-20 18:28:28 +00:00 committed by GitHub
parent 7a1d932528
commit 81f4e116ef
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 11 additions and 0 deletions

View File

@ -1566,6 +1566,9 @@ def func2():
for paren in ")]}":
self._check_error(paren + "1 + 2", f"unmatched '\\{paren}'")
def test_invisible_characters(self):
self._check_error('print\x17("Hello")', "invalid non-printable character")
def test_match_call_does_not_raise_syntax_error(self):
code = """
def match(x):

View File

@ -0,0 +1,2 @@
Improve the tokenizer errors when encountering invisible control characters
in the parser. Patch by Pablo Galindo

View File

@ -2045,6 +2045,12 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
break;
}
if (!Py_UNICODE_ISPRINTABLE(c)) {
char hex[9];
(void)PyOS_snprintf(hex, sizeof(hex), "%04X", c);
return syntaxerror(tok, "invalid non-printable character U+%s", hex);
}
/* Punctuation character */
*p_start = tok->start;
*p_end = tok->cur;