From d382ad49157b3802fc5619f68d96810def517869 Mon Sep 17 00:00:00 2001 From: Lysandros Nikolaou Date: Thu, 15 Jun 2023 18:21:24 +0200 Subject: [PATCH] gh-105820: Fix tok_mode expression buffer in file & readline tokenizer (#105828) --- Lib/test/test_fstring.py | 15 ++++++++++++++- Lib/test/test_tokenize.py | 13 +++++++++++++ ...2023-06-15-15-54-47.gh-issue-105831.-MC9Zs.rst | 1 + Parser/tokenizer.c | 11 ++++++++--- 4 files changed, 36 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2023-06-15-15-54-47.gh-issue-105831.-MC9Zs.rst diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py index cbb03080f79..8f6b576b5f7 100644 --- a/Lib/test/test_fstring.py +++ b/Lib/test/test_fstring.py @@ -15,7 +15,7 @@ import decimal import unittest from test import support from test.support.os_helper import temp_cwd -from test.support.script_helper import assert_python_failure +from test.support.script_helper import assert_python_failure, assert_python_ok a_global = 'global variable' @@ -1635,5 +1635,18 @@ sdfsdfs{1+ "f'{1=}{1;}'", ]) + def test_debug_in_file(self): + with temp_cwd(): + script = 'script.py' + with open('script.py', 'w') as f: + f.write(f"""\ +print(f'''{{ +3 +=}}''')""") + + _, stdout, _ = assert_python_ok(script) + self.assertEqual(stdout.decode('utf-8').strip().replace('\r\n', '\n').replace('\r', '\n'), + "3\n=3") + if __name__ == '__main__': unittest.main() diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index 15f53632cff..5ad278499c9 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -558,6 +558,19 @@ def"', """\ OP '}' (1, 39) (1, 40) FSTRING_MIDDLE ' final words' (1, 40) (1, 52) FSTRING_END "'" (1, 52) (1, 53) + """) + self.check_tokenize("""\ +f'''{ +3 +=}'''""", """\ + FSTRING_START "f'''" (1, 0) (1, 4) + OP '{' (1, 4) (1, 5) + NL '\\n' (1, 5) (1, 6) + NUMBER '3' (2, 0) (2, 1) + NL '\\n' (2, 1) (2, 2) + OP '=' (3, 0) (3, 1) + OP '}' (3, 1) (3, 2) + FSTRING_END "'''" (3, 2) (3, 5) """) def test_function(self): diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-06-15-15-54-47.gh-issue-105831.-MC9Zs.rst b/Misc/NEWS.d/next/Core and Builtins/2023-06-15-15-54-47.gh-issue-105831.-MC9Zs.rst new file mode 100644 index 00000000000..df42b9e1d5a --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-06-15-15-54-47.gh-issue-105831.-MC9Zs.rst @@ -0,0 +1 @@ +Fix an f-string bug, where using a debug expression (the `=` sign) that appears in the last line of a file results to the debug buffer that holds the expression text being one character too small. diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 5d01b8e8a5a..4f7b1f8114e 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -1039,9 +1039,6 @@ tok_readline_raw(struct tok_state *tok) if (line == NULL) { return 1; } - if (tok->tok_mode_stack_index && !update_fstring_expr(tok, 0)) { - return 0; - } if (tok->fp_interactive && tok_concatenate_interactive_new_line(tok, line) == -1) { return 0; @@ -1270,6 +1267,10 @@ tok_underflow_file(struct tok_state *tok) { tok->implicit_newline = 1; } + if (tok->tok_mode_stack_index && !update_fstring_expr(tok, 0)) { + return 0; + } + ADVANCE_LINENO(); if (tok->decoding_state != STATE_NORMAL) { if (tok->lineno > 2) { @@ -1314,6 +1315,10 @@ tok_underflow_readline(struct tok_state* tok) { tok->implicit_newline = 1; } + if (tok->tok_mode_stack_index && !update_fstring_expr(tok, 0)) { + return 0; + } + ADVANCE_LINENO(); /* The default encoding is UTF-8, so make sure we don't have any non-UTF-8 sequences in it. */