diff --git a/Lib/ast.py b/Lib/ast.py index 1f54309c845..f7888d18859 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -1270,13 +1270,15 @@ class _Unparser(NodeVisitor): quote_type = quote_types[0] self.write(f"{quote_type}{value}{quote_type}") - def _write_fstring_inner(self, node): + def _write_fstring_inner(self, node, scape_newlines=False): if isinstance(node, JoinedStr): # for both the f-string itself, and format_spec for value in node.values: - self._write_fstring_inner(value) + self._write_fstring_inner(value, scape_newlines=scape_newlines) elif isinstance(node, Constant) and isinstance(node.value, str): value = node.value.replace("{", "{{").replace("}", "}}") + if scape_newlines: + value = value.replace("\n", "\\n") self.write(value) elif isinstance(node, FormattedValue): self.visit_FormattedValue(node) @@ -1299,7 +1301,10 @@ class _Unparser(NodeVisitor): self.write(f"!{chr(node.conversion)}") if node.format_spec: self.write(":") - self._write_fstring_inner(node.format_spec) + self._write_fstring_inner( + node.format_spec, + scape_newlines=True + ) def visit_Name(self, node): self.write(node.id) diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index 94fb6d933de..93695607887 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -566,6 +566,55 @@ f'''{ OP '=' (3, 0) (3, 1) OP '}' (3, 1) (3, 2) FSTRING_END "'''" (3, 2) (3, 5) + """) + self.check_tokenize("""\ +f'''__{ + x:a +}__'''""", """\ + FSTRING_START "f'''" (1, 0) (1, 4) + FSTRING_MIDDLE '__' (1, 4) (1, 6) + OP '{' (1, 6) (1, 7) + NL '\\n' (1, 7) (1, 8) + NAME 'x' (2, 4) (2, 5) + OP ':' (2, 5) (2, 6) + FSTRING_MIDDLE 'a\\n' (2, 6) (3, 0) + OP '}' (3, 0) (3, 1) + FSTRING_MIDDLE '__' (3, 1) (3, 3) + FSTRING_END "'''" (3, 3) (3, 6) + """) + self.check_tokenize("""\ +f'''__{ + x:a + b + c + d +}__'''""", """\ + FSTRING_START "f'''" (1, 0) (1, 4) + FSTRING_MIDDLE '__' (1, 4) (1, 6) + OP '{' (1, 6) (1, 7) + NL '\\n' (1, 7) (1, 8) + NAME 'x' (2, 4) (2, 5) + OP ':' (2, 5) (2, 6) + FSTRING_MIDDLE 'a\\n b\\n c\\n d\\n' (2, 6) (6, 0) + OP '}' (6, 0) (6, 1) + FSTRING_MIDDLE '__' (6, 1) (6, 3) + FSTRING_END "'''" (6, 3) (6, 6) + """) + self.check_tokenize("""\ +f'__{ + x:d +}__'""", """\ + FSTRING_START "f'" (1, 0) (1, 2) + FSTRING_MIDDLE '__' (1, 2) (1, 4) + OP '{' (1, 4) (1, 5) + NL '\\n' (1, 5) (1, 6) + NAME 'x' (2, 4) (2, 5) + OP ':' (2, 5) (2, 6) + FSTRING_MIDDLE 'd' (2, 6) (2, 7) + NL '\\n' (2, 7) (2, 8) + OP '}' (3, 0) (3, 1) + FSTRING_MIDDLE '__' (3, 1) (3, 3) + FSTRING_END "'" (3, 3) (3, 4) """) def test_function(self): @@ -2277,6 +2326,54 @@ def"', """\ FSTRING_START \'f"\' (1, 0) (1, 2) FSTRING_MIDDLE 'hola\\\\\\\\\\\\r\\\\ndfgf' (1, 2) (1, 16) FSTRING_END \'"\' (1, 16) (1, 17) + """) + + self.check_tokenize("""\ +f'''__{ + x:a +}__'''""", """\ + FSTRING_START "f'''" (1, 0) (1, 4) + FSTRING_MIDDLE '__' (1, 4) (1, 6) + LBRACE '{' (1, 6) (1, 7) + NAME 'x' (2, 4) (2, 5) + COLON ':' (2, 5) (2, 6) + FSTRING_MIDDLE 'a\\n' (2, 6) (3, 0) + RBRACE '}' (3, 0) (3, 1) + FSTRING_MIDDLE '__' (3, 1) (3, 3) + FSTRING_END "'''" (3, 3) (3, 6) + """) + + self.check_tokenize("""\ +f'''__{ + x:a + b + c + d +}__'''""", """\ + FSTRING_START "f'''" (1, 0) (1, 4) + FSTRING_MIDDLE '__' (1, 4) (1, 6) + LBRACE '{' (1, 6) (1, 7) + NAME 'x' (2, 4) (2, 5) + COLON ':' (2, 5) (2, 6) + FSTRING_MIDDLE 'a\\n b\\n c\\n d\\n' (2, 6) (6, 0) + RBRACE '}' (6, 0) (6, 1) + FSTRING_MIDDLE '__' (6, 1) (6, 3) + FSTRING_END "'''" (6, 3) (6, 6) + """) + + self.check_tokenize("""\ +f'__{ + x:d +}__'""", """\ + FSTRING_START "f'" (1, 0) (1, 2) + FSTRING_MIDDLE '__' (1, 2) (1, 4) + LBRACE '{' (1, 4) (1, 5) + NAME 'x' (2, 4) (2, 5) + COLON ':' (2, 5) (2, 6) + FSTRING_MIDDLE 'd' (2, 6) (2, 7) + RBRACE '}' (3, 0) (3, 1) + FSTRING_MIDDLE '__' (3, 1) (3, 3) + FSTRING_END "'" (3, 3) (3, 4) """) def test_function(self): diff --git a/Lib/test/test_unparse.py b/Lib/test/test_unparse.py index bdf7b0588be..6f698a8d891 100644 --- a/Lib/test/test_unparse.py +++ b/Lib/test/test_unparse.py @@ -730,7 +730,8 @@ class DirectoryTestCase(ASTTestCase): test_directories = (lib_dir, lib_dir / "test") run_always_files = {"test_grammar.py", "test_syntax.py", "test_compile.py", "test_ast.py", "test_asdl_parser.py", "test_fstring.py", - "test_patma.py", "test_type_alias.py", "test_type_params.py"} + "test_patma.py", "test_type_alias.py", "test_type_params.py", + "test_tokenize.py"} _files_to_test = None diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-10-03-11-43-48.gh-issue-110259.ka93x5.rst b/Misc/NEWS.d/next/Core and Builtins/2023-10-03-11-43-48.gh-issue-110259.ka93x5.rst new file mode 100644 index 00000000000..55c743d0e49 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-10-03-11-43-48.gh-issue-110259.ka93x5.rst @@ -0,0 +1,3 @@ +Correctly identify the format spec in f-strings (with single or triple +quotes) that have multiple lines in the expression part and include a +formatting spec. Patch by Pablo Galindo diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 41d0d16a471..5e3816f59af 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -2690,11 +2690,28 @@ f_string_middle: if (tok->done == E_ERROR) { return MAKE_TOKEN(ERRORTOKEN); } - if (c == EOF || (current_tok->f_string_quote_size == 1 && c == '\n')) { + int in_format_spec = ( + current_tok->last_expr_end != -1 + && + INSIDE_FSTRING_EXPR(current_tok) + ); + + if (c == EOF || (current_tok->f_string_quote_size == 1 && c == '\n')) { if (tok->decoding_erred) { return MAKE_TOKEN(ERRORTOKEN); } + // If we are in a format spec and we found a newline, + // it means that the format spec ends here and we should + // return to the regular mode. + if (in_format_spec && c == '\n') { + tok_backup(tok, c); + TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE; + p_start = tok->start; + p_end = tok->cur; + return MAKE_TOKEN(FSTRING_MIDDLE); + } + assert(tok->multi_line_start != NULL); // shift the tok_state's location into // the start of string, and report the error @@ -2726,11 +2743,6 @@ f_string_middle: end_quote_size = 0; } - int in_format_spec = ( - current_tok->last_expr_end != -1 - && - INSIDE_FSTRING_EXPR(current_tok) - ); if (c == '{') { int peek = tok_nextc(tok); if (peek != '{' || in_format_spec) {