Issue #2382: SyntaxError cursor "^" now is written at correct position in most

cases when multibyte characters are in line (before "^"). This still not works correctly with wide East Asian characters.
2014-01-21 22:29:47 +02:00 · 2014-01-21 22:29:47 +02:00 · 2bd59daf58
parent 567b26e882 65fd0592fb
commit 2bd59daf58
4 changed files with 38 additions and 2 deletions
--- a/Lib/test/test_exceptions.py
+++ b/Lib/test/test_exceptions.py
@ -148,6 +148,19 @@ class ExceptionTests(unittest.TestCase):
        ckmsg(s, "'continue' not properly in loop")
        ckmsg("continue\n", "'continue' not properly in loop")

+    def testSyntaxErrorOffset(self):
+        def check(src, lineno, offset):
+            with self.assertRaises(SyntaxError) as cm:
+                compile(src, '<fragment>', 'exec')
+            self.assertEqual(cm.exception.lineno, lineno)
+            self.assertEqual(cm.exception.offset, offset)
+
+        check('def fact(x):\n\treturn x!\n', 2, 10)
+        check('1 +\n', 1, 4)
+        check('def spam():\n  print(1)\n print(2)', 3, 10)
+        check('Python = "Python" +', 1, 20)
+        check('Python = "\u1e54\xfd\u0163\u0125\xf2\xf1" +', 1, 20)
+
    @cpython_only
    def testSettingException(self):
        # test that setting an exception at the C level works even if the
--- a/Lib/test/test_traceback.py
+++ b/Lib/test/test_traceback.py
@ -32,6 +32,9 @@ class SyntaxTracebackCases(unittest.TestCase):
    def syntax_error_bad_indentation(self):
        compile("def spam():\n  print(1)\n print(2)", "?", "exec")

+    def syntax_error_with_caret_non_ascii(self):
+        compile('Python = "\u1e54\xfd\u0163\u0125\xf2\xf1" +', "?", "exec")
+
    def test_caret(self):
        err = self.get_exception_format(self.syntax_error_with_caret,
                                        SyntaxError)
@ -46,6 +49,12 @@ class SyntaxTracebackCases(unittest.TestCase):
        self.assertTrue(err[2].count('\n') == 1) # and no additional newline
        self.assertTrue(err[1].find("+") == err[2].find("^")) # in the right place

+        err = self.get_exception_format(self.syntax_error_with_caret_non_ascii,
+                                        SyntaxError)
+        self.assertIn("^", err[2]) # third line has caret
+        self.assertTrue(err[2].count('\n') == 1) # and no additional newline
+        self.assertTrue(err[1].find("+") == err[2].find("^")) # in the right place
+
    def test_nocaret(self):
        exc = SyntaxError("error", ("x.py", 23, None, "bad syntax"))
        err = traceback.format_exception_only(SyntaxError, exc)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@ -10,6 +10,10 @@ Release date: 2014-01-19
 Core and Builtins
 -----------------

+- Issue #2382: SyntaxError cursor "^" is now written at correct position in most
+  cases when multibyte characters are in line (before "^").  This still not
+  works correctly with wide East Asian characters.
+
 - Issue #18960: The first line of Python script could be executed twice when
  the source encoding was specified on the second line.  Now the source encoding
  declaration on the second line isn't effective if the first line contains
--- a/Python/pythonrun.c
+++ b/Python/pythonrun.c
@ -2470,6 +2470,7 @@ err_input(perrdetail *err)
    PyObject *v, *w, *errtype, *errtext;
    PyObject *msg_obj = NULL;
    char *msg = NULL;
+    int offset = err->offset;

    errtype = PyExc_SyntaxError;
    switch (err->error) {
@ -2554,11 +2555,20 @@ err_input(perrdetail *err)
        errtext = Py_None;
        Py_INCREF(Py_None);
    } else {
-        errtext = PyUnicode_DecodeUTF8(err->text, strlen(err->text),
+        errtext = PyUnicode_DecodeUTF8(err->text, err->offset,
                                       "replace");
+        if (errtext != NULL) {
+            Py_ssize_t len = strlen(err->text);
+            offset = (int)PyUnicode_GET_LENGTH(errtext);
+            if (len != err->offset) {
+                Py_DECREF(errtext);
+                errtext = PyUnicode_DecodeUTF8(err->text, len,
+                                               "replace");
+            }
+        }
    }
    v = Py_BuildValue("(OiiN)", err->filename,
-                      err->lineno, err->offset, errtext);
+                      err->lineno, offset, errtext);
    if (v != NULL) {
        if (msg_obj)
            w = Py_BuildValue("(OO)", msg_obj, v);