gh-96611: Fix error message for invalid UTF-8 in mid-multiline string (#96623)

This commit is contained in:
Michael Droettboom 2022-09-06 19:12:16 -04:00 committed by GitHub
parent 67444902a0
commit 05692c67c5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 16 additions and 0 deletions

View File

@ -147,6 +147,18 @@ class MiscSourceEncodingTest(unittest.TestCase):
self.assertTrue(c.exception.args[0].startswith(expected),
msg=c.exception.args[0])
def test_file_parse_error_multiline(self):
# gh96611:
with open(TESTFN, "wb") as fd:
fd.write(b'print("""\n\xb1""")\n')
try:
retcode, stdout, stderr = script_helper.assert_python_failure(TESTFN)
self.assertGreater(retcode, 0)
self.assertIn(b"Non-UTF-8 code starting with '\\xb1'", stderr)
finally:
os.unlink(TESTFN)
class AbstractSourceEncodingTest:

View File

@ -0,0 +1,2 @@
When loading a file with invalid UTF-8 inside a multi-line string, a correct
SyntaxError is emitted.

View File

@ -1936,6 +1936,8 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
/* Get rest of string */
while (end_quote_size != quote_size) {
c = tok_nextc(tok);
if (tok->done == E_DECODE)
break;
if (c == EOF || (quote_size == 1 && c == '\n')) {
assert(tok->multi_line_start != NULL);
// shift the tok_state's location into