From 1b62bcee941e54244b3ce6476aef8913604987c9 Mon Sep 17 00:00:00 2001 From: Lysandros Nikolaou Date: Tue, 11 Jun 2024 19:00:53 +0200 Subject: [PATCH] gh-120343: Do not reset byte_col_offset_diff after multiline tokens (#120352) Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com> --- Lib/test/test_tokenize.py | 11 +++++++++++ .../2024-06-11-16-34-41.gh-issue-120343.hdiXeU.rst | 1 + Python/Python-tokenize.c | 7 ++++++- 3 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2024-06-11-16-34-41.gh-issue-120343.hdiXeU.rst diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index 4428e8cea19..36dba71766c 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -1199,6 +1199,17 @@ async def f(): NAME 'x' (1, 3) (1, 4) """) + def test_multiline_non_ascii_fstring(self): + self.check_tokenize("""\ +a = f''' + Autorzy, którzy tą jednostkę mają wpisani jako AKTUALNA -- czyli'''""", """\ + NAME 'a' (1, 0) (1, 1) + OP '=' (1, 2) (1, 3) + FSTRING_START "f\'\'\'" (1, 4) (1, 8) + FSTRING_MIDDLE '\\n Autorzy, którzy tą jednostkę mają wpisani jako AKTUALNA -- czyli' (1, 8) (2, 68) + FSTRING_END "\'\'\'" (2, 68) (2, 71) + """) + class GenerateTokensTest(TokenizeTest): def check_tokenize(self, s, expected): # Format the tokens in s in a table format. diff --git a/Misc/NEWS.d/next/Library/2024-06-11-16-34-41.gh-issue-120343.hdiXeU.rst b/Misc/NEWS.d/next/Library/2024-06-11-16-34-41.gh-issue-120343.hdiXeU.rst new file mode 100644 index 00000000000..76714b0c394 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-06-11-16-34-41.gh-issue-120343.hdiXeU.rst @@ -0,0 +1 @@ +Fix column offset reporting for tokens that come after multiline f-strings in the :mod:`tokenize` module. diff --git a/Python/Python-tokenize.c b/Python/Python-tokenize.c index 09fad18b5b4..2591dae3573 100644 --- a/Python/Python-tokenize.c +++ b/Python/Python-tokenize.c @@ -36,6 +36,7 @@ typedef struct /* Needed to cache line for performance */ PyObject *last_line; Py_ssize_t last_lineno; + Py_ssize_t last_end_lineno; Py_ssize_t byte_col_offset_diff; } tokenizeriterobject; @@ -77,6 +78,7 @@ tokenizeriter_new_impl(PyTypeObject *type, PyObject *readline, self->last_line = NULL; self->byte_col_offset_diff = 0; self->last_lineno = 0; + self->last_end_lineno = 0; return (PyObject *)self; } @@ -227,7 +229,9 @@ tokenizeriter_next(tokenizeriterobject *it) Py_XDECREF(it->last_line); line = PyUnicode_DecodeUTF8(line_start, size, "replace"); it->last_line = line; - it->byte_col_offset_diff = 0; + if (it->tok->lineno != it->last_end_lineno) { + it->byte_col_offset_diff = 0; + } } else { // Line hasn't changed so we reuse the cached one. line = it->last_line; @@ -241,6 +245,7 @@ tokenizeriter_next(tokenizeriterobject *it) Py_ssize_t lineno = ISSTRINGLIT(type) ? it->tok->first_lineno : it->tok->lineno; Py_ssize_t end_lineno = it->tok->lineno; it->last_lineno = lineno; + it->last_end_lineno = end_lineno; Py_ssize_t col_offset = -1; Py_ssize_t end_col_offset = -1;