bpo-45738: Fix computation of error location for invalid continuation (GH-29550)

characters in the parser
2021-11-14 01:06:41 +00:00 · 2021-11-14 01:06:41 +00:00 · 25835c518a
parent f8da00ef04
commit 25835c518a
4 changed files with 14 additions and 12 deletions
--- a/Lib/test/test_syntax.py
+++ b/Lib/test/test_syntax.py
@ -1505,7 +1505,13 @@ def func2():
    def test_invalid_line_continuation_error_position(self):
        self._check_error(r"a = 3 \ 4",
                          "unexpected character after line continuation character",
-                          lineno=1, offset=9)
+                          lineno=1, offset=8)
        self._check_error('1,\\#\n2',
                          "unexpected character after line continuation character",
                          lineno=1, offset=4)
        self._check_error('\nfgdfgf\n1,\\#\n2\n',
                          "unexpected character after line continuation character",
                          lineno=3, offset=4)
    def test_invalid_line_continuation_left_recursive(self):
        # Check bpo-42218: SyntaxErrors following left-recursive rules
--- a/Builtins/2021-11-14-00-14-45.bpo-45738.e0cgKd.rst
+++ b/Builtins/2021-11-14-00-14-45.bpo-45738.e0cgKd.rst
@ -0,0 +1,2 @@
 Fix computation of error location for invalid continuation characters in the
 parser. Patch by Pablo Galindo.
--- a/Parser/pegen.c
+++ b/Parser/pegen.c
@ -351,14 +351,7 @@ tokenizer_error(Parser *p)
            msg = "too many levels of indentation";
            break;
        case E_LINECONT: {
-            char* loc = strrchr(p->tok->buf, '\n');
+            col_offset = p->tok->cur - p->tok->buf - 1;
            const char* last_char = p->tok->cur - 1;
            if (loc != NULL && loc != last_char) {
                col_offset = p->tok->cur - loc - 1;
                p->tok->buf = loc;
            } else {
                col_offset = last_char - p->tok->buf - 1;
            }
            msg = "unexpected character after line continuation character";
            break;
        }
@ -366,7 +359,9 @@ tokenizer_error(Parser *p)
            msg = "unknown parsing error";
    }
-    RAISE_ERROR_KNOWN_LOCATION(p, errtype, p->tok->lineno, col_offset, p->tok->lineno, -1, msg);
+    RAISE_ERROR_KNOWN_LOCATION(p, errtype, p->tok->lineno,
                               col_offset >= 0 ? col_offset : 0,
                               p->tok->lineno, -1, msg);
    return -1;
 }
@ -497,7 +492,7 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
           does not physically exist */
        assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF || !uses_utf8_codec);
-        if (p->tok->lineno <= lineno) {
+        if (p->tok->lineno <= lineno && p->tok->inp > p->tok->buf) {
            Py_ssize_t size = p->tok->inp - p->tok->buf;
            error_line = PyUnicode_DecodeUTF8(p->tok->buf, size, "replace");
        }
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@ -1970,7 +1970,6 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
        c = tok_nextc(tok);
        if (c != '\n') {
            tok->done = E_LINECONT;
            tok->cur = tok->inp;
            return ERRORTOKEN;
        }
        c = tok_nextc(tok);
		`@ -0,0 +1,2 @@`
							`Fix computation of error location for invalid continuation characters in the`
							`parser. Patch by Pablo Galindo.`