bpo-44349: Fix edge case when displaying text from files with encoding in syntax errors (GH-26611)

This commit is contained in:
Pablo Galindo 2021-06-09 00:54:29 +01:00 committed by GitHub
parent 2ea6d89028
commit 9fd21f649d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 22 additions and 2 deletions

View File

@ -2105,6 +2105,22 @@ class SyntaxErrorTests(unittest.TestCase):
sys.__excepthook__(*sys.exc_info())
the_exception = exc
def test_encodings(self):
source = (
'# -*- coding: cp437 -*-\n'
'"¢¢¢¢¢¢" + f(4, x for x in range(1))\n'
)
try:
with open(TESTFN, 'w', encoding='cp437') as testfile:
testfile.write(source)
rc, out, err = script_helper.assert_python_failure('-Wd', '-X', 'utf8', TESTFN)
err = err.decode('utf-8').splitlines()
self.assertEqual(err[-3], ' "¢¢¢¢¢¢" + f(4, x for x in range(1))')
self.assertEqual(err[-2], ' ^^^^^^^^^^^^^^^^^^^')
finally:
unlink(TESTFN)
def test_attributes_new_constructor(self):
args = ("bad.py", 1, 2, "abcdefg", 1, 100)
the_exception = SyntaxError("bad bad", args)

View File

@ -0,0 +1 @@
Fix an edge case when displaying text from files with encoding in syntax errors. Patch by Pablo Galindo.

View File

@ -456,10 +456,13 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
goto error;
}
// PyErr_ProgramTextObject assumes that the text is utf-8 so we cannot call it with a file
// with an arbitrary encoding or otherwise we could get some badly decoded text.
int uses_utf8_codec = (!p->tok->encoding || strcmp(p->tok->encoding, "utf-8") == 0);
if (p->tok->fp_interactive) {
error_line = get_error_line(p, lineno);
}
else if (p->start_rule == Py_file_input) {
else if (uses_utf8_codec && p->start_rule == Py_file_input) {
error_line = PyErr_ProgramTextObject(p->tok->filename, (int) lineno);
}
@ -471,7 +474,7 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
we're actually parsing from a file, which has an E_EOF SyntaxError and in that case
`PyErr_ProgramTextObject` fails because lineno points to last_file_line + 1, which
does not physically exist */
assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF);
assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF || !uses_utf8_codec);
if (p->tok->lineno <= lineno) {
Py_ssize_t size = p->tok->inp - p->tok->buf;