From 9fd21f649d66dcb10108ee395fd68ed32c8239cd Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Wed, 9 Jun 2021 00:54:29 +0100 Subject: [PATCH] bpo-44349: Fix edge case when displaying text from files with encoding in syntax errors (GH-26611) --- Lib/test/test_exceptions.py | 16 ++++++++++++++++ .../2021-06-08-22-49-06.bpo-44349.xgEgeA.rst | 1 + Parser/pegen.c | 7 +++++-- 3 files changed, 22 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2021-06-08-22-49-06.bpo-44349.xgEgeA.rst diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py index df5778d7e5f..b242c082f85 100644 --- a/Lib/test/test_exceptions.py +++ b/Lib/test/test_exceptions.py @@ -2105,6 +2105,22 @@ class SyntaxErrorTests(unittest.TestCase): sys.__excepthook__(*sys.exc_info()) the_exception = exc + def test_encodings(self): + source = ( + '# -*- coding: cp437 -*-\n' + '"¢¢¢¢¢¢" + f(4, x for x in range(1))\n' + ) + try: + with open(TESTFN, 'w', encoding='cp437') as testfile: + testfile.write(source) + rc, out, err = script_helper.assert_python_failure('-Wd', '-X', 'utf8', TESTFN) + err = err.decode('utf-8').splitlines() + + self.assertEqual(err[-3], ' "¢¢¢¢¢¢" + f(4, x for x in range(1))') + self.assertEqual(err[-2], ' ^^^^^^^^^^^^^^^^^^^') + finally: + unlink(TESTFN) + def test_attributes_new_constructor(self): args = ("bad.py", 1, 2, "abcdefg", 1, 100) the_exception = SyntaxError("bad bad", args) diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-06-08-22-49-06.bpo-44349.xgEgeA.rst b/Misc/NEWS.d/next/Core and Builtins/2021-06-08-22-49-06.bpo-44349.xgEgeA.rst new file mode 100644 index 00000000000..b386a8ed2c8 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2021-06-08-22-49-06.bpo-44349.xgEgeA.rst @@ -0,0 +1 @@ +Fix an edge case when displaying text from files with encoding in syntax errors. Patch by Pablo Galindo. \ No newline at end of file diff --git a/Parser/pegen.c b/Parser/pegen.c index 42a992251da..e6518198eca 100644 --- a/Parser/pegen.c +++ b/Parser/pegen.c @@ -456,10 +456,13 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype, goto error; } + // PyErr_ProgramTextObject assumes that the text is utf-8 so we cannot call it with a file + // with an arbitrary encoding or otherwise we could get some badly decoded text. + int uses_utf8_codec = (!p->tok->encoding || strcmp(p->tok->encoding, "utf-8") == 0); if (p->tok->fp_interactive) { error_line = get_error_line(p, lineno); } - else if (p->start_rule == Py_file_input) { + else if (uses_utf8_codec && p->start_rule == Py_file_input) { error_line = PyErr_ProgramTextObject(p->tok->filename, (int) lineno); } @@ -471,7 +474,7 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype, we're actually parsing from a file, which has an E_EOF SyntaxError and in that case `PyErr_ProgramTextObject` fails because lineno points to last_file_line + 1, which does not physically exist */ - assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF); + assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF || !uses_utf8_codec); if (p->tok->lineno <= lineno) { Py_ssize_t size = p->tok->inp - p->tok->buf;