From 15bc9ab301d73f20bff47a12ef05326feb40f797 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 14 May 2020 19:22:48 -0700 Subject: [PATCH] bpo-40612: Fix SyntaxError edge cases in traceback formatting (GH-20072) This fixes both the traceback.py module and the C code for formatting syntax errors (in Python/pythonrun.c). They now both consistently do the following: - Suppress caret if it points left of text - Allow caret pointing just past end of line - If caret points past end of line, clip to *just* past end of line The syntax error formatting code in traceback.py was mostly rewritten; small, subtle changes were applied to the C code in pythonrun.c. There's still a difference when the text contains embedded newlines. Neither handles these very well, and I don't think the case occurs in practice. Automerge-Triggered-By: @gvanrossum --- Lib/test/test_cmd_line_script.py | 2 +- Lib/test/test_traceback.py | 34 ++++++--- Lib/traceback.py | 29 +++++--- .../2020-05-13-10-23-29.bpo-40612.gOIreM.rst | 2 + Python/pythonrun.c | 70 +++++++++++++------ 5 files changed, 96 insertions(+), 41 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2020-05-13-10-23-29.bpo-40612.gOIreM.rst diff --git a/Lib/test/test_cmd_line_script.py b/Lib/test/test_cmd_line_script.py index 171340581af..15fca7b8a51 100644 --- a/Lib/test/test_cmd_line_script.py +++ b/Lib/test/test_cmd_line_script.py @@ -633,7 +633,7 @@ class CmdLineTest(unittest.TestCase): stderr.splitlines()[-3:], [ b' foo"""', - b' ^', + b' ^', b'SyntaxError: f-string: empty expression not allowed', ], ) diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py index 7361d091cfb..f9a5f2fc53e 100644 --- a/Lib/test/test_traceback.py +++ b/Lib/test/test_traceback.py @@ -58,13 +58,13 @@ class TracebackCases(unittest.TestCase): SyntaxError) self.assertIn("^", err[2]) # third line has caret self.assertEqual(err[2].count('\n'), 1) # and no additional newline - self.assertEqual(err[1].find("+"), err[2].find("^")) # in the right place + self.assertEqual(err[1].find("+") + 1, err[2].find("^")) # in the right place err = self.get_exception_format(self.syntax_error_with_caret_non_ascii, SyntaxError) self.assertIn("^", err[2]) # third line has caret self.assertEqual(err[2].count('\n'), 1) # and no additional newline - self.assertEqual(err[1].find("+"), err[2].find("^")) # in the right place + self.assertEqual(err[1].find("+") + 1, err[2].find("^")) # in the right place def test_nocaret(self): exc = SyntaxError("error", ("x.py", 23, None, "bad syntax")) @@ -78,14 +78,13 @@ class TracebackCases(unittest.TestCase): self.assertEqual(len(err), 4) self.assertEqual(err[1].strip(), "print(2)") self.assertIn("^", err[2]) - self.assertEqual(err[1].find(")"), err[2].find("^")) + self.assertEqual(err[1].find(")") + 1, err[2].find("^")) + # No caret for "unexpected indent" err = self.get_exception_format(self.syntax_error_bad_indentation2, IndentationError) - self.assertEqual(len(err), 4) + self.assertEqual(len(err), 3) self.assertEqual(err[1].strip(), "print(2)") - self.assertIn("^", err[2]) - self.assertEqual(err[1].find("p"), err[2].find("^")) def test_base_exception(self): # Test that exceptions derived from BaseException are formatted right @@ -656,7 +655,7 @@ class BaseExceptionReportingTests: self.assertIn('inner_raise() # Marker', blocks[2]) self.check_zero_div(blocks[2]) - @support.skip_if_new_parser("Pegen is arguably better here, so no need to fix this") + @unittest.skipIf(support.use_old_parser(), "Pegen is arguably better here, so no need to fix this") def test_syntax_error_offset_at_eol(self): # See #10186. def e(): @@ -666,7 +665,7 @@ class BaseExceptionReportingTests: def e(): exec("x = 5 | 4 |") msg = self.get_report(e).splitlines() - self.assertEqual(msg[-2], ' ^') + self.assertEqual(msg[-2], ' ^') def test_message_none(self): # A message that looks like "None" should not be treated specially @@ -679,6 +678,25 @@ class BaseExceptionReportingTests: err = self.get_report(Exception('')) self.assertIn('Exception\n', err) + def test_syntax_error_various_offsets(self): + for offset in range(-5, 10): + for add in [0, 2]: + text = " "*add + "text%d" % offset + expected = [' File "file.py", line 1'] + if offset < 1: + expected.append(" %s" % text.lstrip()) + elif offset <= 6: + expected.append(" %s" % text.lstrip()) + expected.append(" %s^" % (" "*(offset-1))) + else: + expected.append(" %s" % text.lstrip()) + expected.append(" %s^" % (" "*5)) + expected.append("SyntaxError: msg") + expected.append("") + err = self.get_report(SyntaxError("msg", ("file.py", 1, offset+add, text))) + exp = "\n".join(expected) + self.assertEqual(exp, err) + class PyExcReportingTests(BaseExceptionReportingTests, unittest.TestCase): # diff --git a/Lib/traceback.py b/Lib/traceback.py index bf34bbab8a1..a19e38718b1 100644 --- a/Lib/traceback.py +++ b/Lib/traceback.py @@ -569,23 +569,30 @@ class TracebackException: if not issubclass(self.exc_type, SyntaxError): yield _format_final_exc_line(stype, self._str) - return + else: + yield from self._format_syntax_error(stype) - # It was a syntax error; show exactly where the problem was found. + def _format_syntax_error(self, stype): + """Format SyntaxError exceptions (internal helper).""" + # Show exactly where the problem was found. filename = self.filename or "" lineno = str(self.lineno) or '?' yield ' File "{}", line {}\n'.format(filename, lineno) - badline = self.text - offset = self.offset - if badline is not None: - yield ' {}\n'.format(badline.strip()) - if offset is not None: - caretspace = badline.rstrip('\n') - offset = min(len(caretspace), offset) - 1 - caretspace = caretspace[:offset].lstrip() + text = self.text + if text is not None: + # text = " foo\n" + # rtext = " foo" + # ltext = "foo" + rtext = text.rstrip('\n') + ltext = rtext.lstrip(' \n\f') + spaces = len(rtext) - len(ltext) + yield ' {}\n'.format(ltext) + # Convert 1-based column offset to 0-based index into stripped text + caret = (self.offset or 0) - 1 - spaces + if caret >= 0: # non-space whitespace (likes tabs) must be kept for alignment - caretspace = ((c.isspace() and c or ' ') for c in caretspace) + caretspace = ((c if c.isspace() else ' ') for c in ltext[:caret]) yield ' {}^\n'.format(''.join(caretspace)) msg = self.msg or "" yield "{}: {}\n".format(stype, msg) diff --git a/Misc/NEWS.d/next/Library/2020-05-13-10-23-29.bpo-40612.gOIreM.rst b/Misc/NEWS.d/next/Library/2020-05-13-10-23-29.bpo-40612.gOIreM.rst new file mode 100644 index 00000000000..32cc8073d3f --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-05-13-10-23-29.bpo-40612.gOIreM.rst @@ -0,0 +1,2 @@ +Fix edge cases in SyntaxError formatting. If the offset is <= 0, no caret is printed. +If the offset is > line length, the caret is printed pointing just after the last character. diff --git a/Python/pythonrun.c b/Python/pythonrun.c index 45f08b707eb..160f44d38e2 100644 --- a/Python/pythonrun.c +++ b/Python/pythonrun.c @@ -554,37 +554,65 @@ finally: static void print_error_text(PyObject *f, int offset, PyObject *text_obj) { - const char *text; - const char *nl; - - text = PyUnicode_AsUTF8(text_obj); + /* Convert text to a char pointer; return if error */ + const char *text = PyUnicode_AsUTF8(text_obj); if (text == NULL) return; - if (offset >= 0) { - if (offset > 0 && (size_t)offset == strlen(text) && text[offset - 1] == '\n') - offset--; - for (;;) { - nl = strchr(text, '\n'); - if (nl == NULL || nl-text >= offset) - break; - offset -= (int)(nl+1-text); - text = nl+1; - } - while (*text == ' ' || *text == '\t' || *text == '\f') { - text++; - offset--; - } + /* Convert offset from 1-based to 0-based */ + offset--; + + /* Strip leading whitespace from text, adjusting offset as we go */ + while (*text == ' ' || *text == '\t' || *text == '\f') { + text++; + offset--; } + + /* Calculate text length excluding trailing newline */ + Py_ssize_t len = strlen(text); + if (len > 0 && text[len-1] == '\n') { + len--; + } + + /* Clip offset to at most len */ + if (offset > len) { + offset = len; + } + + /* Skip past newlines embedded in text */ + for (;;) { + const char *nl = strchr(text, '\n'); + if (nl == NULL) { + break; + } + Py_ssize_t inl = nl - text; + if (inl >= (Py_ssize_t)offset) { + break; + } + inl += 1; + text += inl; + len -= inl; + offset -= (int)inl; + } + + /* Print text */ PyFile_WriteString(" ", f); PyFile_WriteString(text, f); - if (*text == '\0' || text[strlen(text)-1] != '\n') + + /* Make sure there's a newline at the end */ + if (text[len] != '\n') { PyFile_WriteString("\n", f); - if (offset == -1) + } + + /* Don't print caret if it points to the left of the text */ + if (offset < 0) return; + + /* Write caret line */ PyFile_WriteString(" ", f); - while (--offset > 0) + while (--offset >= 0) { PyFile_WriteString(" ", f); + } PyFile_WriteString("^\n", f); }