diff --git a/Lib/test/test_pep263.py b/Lib/test/test_pep263.py index 598d980b2a6..1290bc70de6 100644 --- a/Lib/test/test_pep263.py +++ b/Lib/test/test_pep263.py @@ -55,6 +55,24 @@ class PEP263Test(unittest.TestCase): # two bytes in common with the UTF-8 BOM self.assertRaises(SyntaxError, eval, b'\xef\xbb\x20') + def test_error_message(self): + compile(b'# -*- coding: iso-8859-15 -*-\n', 'dummy', 'exec') + compile(b'\xef\xbb\xbf\n', 'dummy', 'exec') + compile(b'\xef\xbb\xbf# -*- coding: utf-8 -*-\n', 'dummy', 'exec') + with self.assertRaisesRegexp(SyntaxError, 'fake'): + compile(b'# -*- coding: fake -*-\n', 'dummy', 'exec') + with self.assertRaisesRegexp(SyntaxError, 'iso-8859-15'): + compile(b'\xef\xbb\xbf# -*- coding: iso-8859-15 -*-\n', + 'dummy', 'exec') + with self.assertRaisesRegexp(SyntaxError, 'BOM'): + compile(b'\xef\xbb\xbf# -*- coding: iso-8859-15 -*-\n', + 'dummy', 'exec') + with self.assertRaisesRegexp(SyntaxError, 'fake'): + compile(b'\xef\xbb\xbf# -*- coding: fake -*-\n', 'dummy', 'exec') + with self.assertRaisesRegexp(SyntaxError, 'BOM'): + compile(b'\xef\xbb\xbf# -*- coding: fake -*-\n', 'dummy', 'exec') + + def test_main(): support.run_unittest(PEP263Test) diff --git a/Misc/NEWS b/Misc/NEWS index b337d484678..b6cf7c9e9ff 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,9 @@ What's New in Python 3.4.0 Alpha 1? Core and Builtins ----------------- +- Issue #18038: SyntaxError raised during compilation sources with illegal + encoding now always contains an encoding name. + - Issue #17931: Resolve confusion on Windows between pids and process handles. diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 5480278feac..62b1a91b87c 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -291,20 +291,20 @@ check_coding_spec(const char* line, Py_ssize_t size, struct tok_state *tok, tok->encoding = cs; tok->decoding_state = STATE_NORMAL; } - else + else { + PyErr_Format(PyExc_SyntaxError, + "encoding problem: %s", cs); PyMem_FREE(cs); + } } } else { /* then, compare cs with BOM */ r = (strcmp(tok->encoding, cs) == 0); + if (!r) + PyErr_Format(PyExc_SyntaxError, + "encoding problem: %s with BOM", cs); PyMem_FREE(cs); } } - if (!r) { - cs = tok->encoding; - if (!cs) - cs = "with BOM"; - PyErr_Format(PyExc_SyntaxError, "encoding problem: %s", cs); - } return r; }