From 65fd0592fb3845c17b27c441380553fc22f78812 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 21 Jan 2014 22:26:52 +0200 Subject: [PATCH] Issue #2382: SyntaxError cursor "^" now is written at correct position in most cases when multibyte characters are in line (before "^"). This still not works correctly with wide East Asian characters. --- Lib/test/test_exceptions.py | 13 +++++++++++++ Lib/test/test_traceback.py | 9 +++++++++ Misc/NEWS | 4 ++++ Python/pythonrun.c | 14 ++++++++++++-- 4 files changed, 38 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py index 1ad7f97b740..fe660bf9b45 100644 --- a/Lib/test/test_exceptions.py +++ b/Lib/test/test_exceptions.py @@ -148,6 +148,19 @@ class ExceptionTests(unittest.TestCase): ckmsg(s, "'continue' not properly in loop") ckmsg("continue\n", "'continue' not properly in loop") + def testSyntaxErrorOffset(self): + def check(src, lineno, offset): + with self.assertRaises(SyntaxError) as cm: + compile(src, '', 'exec') + self.assertEqual(cm.exception.lineno, lineno) + self.assertEqual(cm.exception.offset, offset) + + check('def fact(x):\n\treturn x!\n', 2, 10) + check('1 +\n', 1, 4) + check('def spam():\n print(1)\n print(2)', 3, 10) + check('Python = "Python" +', 1, 20) + check('Python = "\u1e54\xfd\u0163\u0125\xf2\xf1" +', 1, 20) + @cpython_only def testSettingException(self): # test that setting an exception at the C level works even if the diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py index bca825de43b..373d9af6418 100644 --- a/Lib/test/test_traceback.py +++ b/Lib/test/test_traceback.py @@ -32,6 +32,9 @@ class SyntaxTracebackCases(unittest.TestCase): def syntax_error_bad_indentation(self): compile("def spam():\n print(1)\n print(2)", "?", "exec") + def syntax_error_with_caret_non_ascii(self): + compile('Python = "\u1e54\xfd\u0163\u0125\xf2\xf1" +', "?", "exec") + def test_caret(self): err = self.get_exception_format(self.syntax_error_with_caret, SyntaxError) @@ -46,6 +49,12 @@ class SyntaxTracebackCases(unittest.TestCase): self.assertTrue(err[2].count('\n') == 1) # and no additional newline self.assertTrue(err[1].find("+") == err[2].find("^")) # in the right place + err = self.get_exception_format(self.syntax_error_with_caret_non_ascii, + SyntaxError) + self.assertIn("^", err[2]) # third line has caret + self.assertTrue(err[2].count('\n') == 1) # and no additional newline + self.assertTrue(err[1].find("+") == err[2].find("^")) # in the right place + def test_nocaret(self): exc = SyntaxError("error", ("x.py", 23, None, "bad syntax")) err = traceback.format_exception_only(SyntaxError, exc) diff --git a/Misc/NEWS b/Misc/NEWS index e470fa90286..4e3aa45a82e 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,10 @@ What's New in Python 3.3.4 release candidate 1? Core and Builtins ----------------- +- Issue #2382: SyntaxError cursor "^" is now written at correct position in most + cases when multibyte characters are in line (before "^"). This still not + works correctly with wide East Asian characters. + - Issue #18960: The first line of Python script could be executed twice when the source encoding was specified on the second line. Now the source encoding declaration on the second line isn't effective if the first line contains diff --git a/Python/pythonrun.c b/Python/pythonrun.c index e02dbe2be18..91d56b78eea 100644 --- a/Python/pythonrun.c +++ b/Python/pythonrun.c @@ -2226,6 +2226,7 @@ err_input(perrdetail *err) PyObject *v, *w, *errtype, *errtext; PyObject *msg_obj = NULL; char *msg = NULL; + int offset = err->offset; errtype = PyExc_SyntaxError; switch (err->error) { @@ -2310,11 +2311,20 @@ err_input(perrdetail *err) errtext = Py_None; Py_INCREF(Py_None); } else { - errtext = PyUnicode_DecodeUTF8(err->text, strlen(err->text), + errtext = PyUnicode_DecodeUTF8(err->text, err->offset, "replace"); + if (errtext != NULL) { + Py_ssize_t len = strlen(err->text); + offset = (int)PyUnicode_GET_LENGTH(errtext); + if (len != err->offset) { + Py_DECREF(errtext); + errtext = PyUnicode_DecodeUTF8(err->text, len, + "replace"); + } + } } v = Py_BuildValue("(OiiN)", err->filename, - err->lineno, err->offset, errtext); + err->lineno, offset, errtext); if (v != NULL) { if (msg_obj) w = Py_BuildValue("(OO)", msg_obj, v);