bpo-28002: Roundtrip f-strings with ast.unparse better (GH-19612) (GH-23430)
By attempting to avoid backslashes in f-string expressions.
We also now proactively raise errors for some backslashes we can't
avoid while unparsing FormattedValues
Co-authored-by: hauntsaninja <>
Co-authored-by: Shantanu <hauntsaninja@users.noreply.github.com>
Co-authored-by: Batuhan Taskaya <isidentical@gmail.com>
(cherry picked from commit a993e901eb
)
Co-authored-by: Shantanu <12621235+hauntsaninja@users.noreply.github.com>
Co-authored-by: Shantanu <12621235+hauntsaninja@users.noreply.github.com>
This commit is contained in:
parent
f552f4b2d6
commit
3763cc1dbd
110
Lib/ast.py
110
Lib/ast.py
|
@ -662,17 +662,23 @@ class _Precedence(IntEnum):
|
||||||
except ValueError:
|
except ValueError:
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
|
||||||
|
_SINGLE_QUOTES = ("'", '"')
|
||||||
|
_MULTI_QUOTES = ('"""', "'''")
|
||||||
|
_ALL_QUOTES = (*_SINGLE_QUOTES, *_MULTI_QUOTES)
|
||||||
|
|
||||||
class _Unparser(NodeVisitor):
|
class _Unparser(NodeVisitor):
|
||||||
"""Methods in this class recursively traverse an AST and
|
"""Methods in this class recursively traverse an AST and
|
||||||
output source code for the abstract syntax; original formatting
|
output source code for the abstract syntax; original formatting
|
||||||
is disregarded."""
|
is disregarded."""
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self, *, _avoid_backslashes=False):
|
||||||
self._source = []
|
self._source = []
|
||||||
self._buffer = []
|
self._buffer = []
|
||||||
self._precedences = {}
|
self._precedences = {}
|
||||||
self._type_ignores = {}
|
self._type_ignores = {}
|
||||||
self._indent = 0
|
self._indent = 0
|
||||||
|
self._avoid_backslashes = _avoid_backslashes
|
||||||
|
|
||||||
def interleave(self, inter, f, seq):
|
def interleave(self, inter, f, seq):
|
||||||
"""Call f on each item in seq, calling inter() in between."""
|
"""Call f on each item in seq, calling inter() in between."""
|
||||||
|
@ -1067,15 +1073,85 @@ class _Unparser(NodeVisitor):
|
||||||
with self.block(extra=self.get_type_comment(node)):
|
with self.block(extra=self.get_type_comment(node)):
|
||||||
self.traverse(node.body)
|
self.traverse(node.body)
|
||||||
|
|
||||||
|
def _str_literal_helper(
|
||||||
|
self, string, *, quote_types=_ALL_QUOTES, escape_special_whitespace=False
|
||||||
|
):
|
||||||
|
"""Helper for writing string literals, minimizing escapes.
|
||||||
|
Returns the tuple (string literal to write, possible quote types).
|
||||||
|
"""
|
||||||
|
def escape_char(c):
|
||||||
|
# \n and \t are non-printable, but we only escape them if
|
||||||
|
# escape_special_whitespace is True
|
||||||
|
if not escape_special_whitespace and c in "\n\t":
|
||||||
|
return c
|
||||||
|
# Always escape backslashes and other non-printable characters
|
||||||
|
if c == "\\" or not c.isprintable():
|
||||||
|
return c.encode("unicode_escape").decode("ascii")
|
||||||
|
return c
|
||||||
|
|
||||||
|
escaped_string = "".join(map(escape_char, string))
|
||||||
|
possible_quotes = quote_types
|
||||||
|
if "\n" in escaped_string:
|
||||||
|
possible_quotes = [q for q in possible_quotes if q in _MULTI_QUOTES]
|
||||||
|
possible_quotes = [q for q in possible_quotes if q not in escaped_string]
|
||||||
|
if not possible_quotes:
|
||||||
|
# If there aren't any possible_quotes, fallback to using repr
|
||||||
|
# on the original string. Try to use a quote from quote_types,
|
||||||
|
# e.g., so that we use triple quotes for docstrings.
|
||||||
|
string = repr(string)
|
||||||
|
quote = next((q for q in quote_types if string[0] in q), string[0])
|
||||||
|
return string[1:-1], [quote]
|
||||||
|
if escaped_string:
|
||||||
|
# Sort so that we prefer '''"''' over """\""""
|
||||||
|
possible_quotes.sort(key=lambda q: q[0] == escaped_string[-1])
|
||||||
|
# If we're using triple quotes and we'd need to escape a final
|
||||||
|
# quote, escape it
|
||||||
|
if possible_quotes[0][0] == escaped_string[-1]:
|
||||||
|
assert len(possible_quotes[0]) == 3
|
||||||
|
escaped_string = escaped_string[:-1] + "\\" + escaped_string[-1]
|
||||||
|
return escaped_string, possible_quotes
|
||||||
|
|
||||||
|
def _write_str_avoiding_backslashes(self, string, *, quote_types=_ALL_QUOTES):
|
||||||
|
"""Write string literal value with a best effort attempt to avoid backslashes."""
|
||||||
|
string, quote_types = self._str_literal_helper(string, quote_types=quote_types)
|
||||||
|
quote_type = quote_types[0]
|
||||||
|
self.write(f"{quote_type}{string}{quote_type}")
|
||||||
|
|
||||||
def visit_JoinedStr(self, node):
|
def visit_JoinedStr(self, node):
|
||||||
self.write("f")
|
self.write("f")
|
||||||
self._fstring_JoinedStr(node, self.buffer_writer)
|
if self._avoid_backslashes:
|
||||||
self.write(repr(self.buffer))
|
self._fstring_JoinedStr(node, self.buffer_writer)
|
||||||
|
self._write_str_avoiding_backslashes(self.buffer)
|
||||||
|
return
|
||||||
|
|
||||||
|
# If we don't need to avoid backslashes globally (i.e., we only need
|
||||||
|
# to avoid them inside FormattedValues), it's cosmetically preferred
|
||||||
|
# to use escaped whitespace. That is, it's preferred to use backslashes
|
||||||
|
# for cases like: f"{x}\n". To accomplish this, we keep track of what
|
||||||
|
# in our buffer corresponds to FormattedValues and what corresponds to
|
||||||
|
# Constant parts of the f-string, and allow escapes accordingly.
|
||||||
|
buffer = []
|
||||||
|
for value in node.values:
|
||||||
|
meth = getattr(self, "_fstring_" + type(value).__name__)
|
||||||
|
meth(value, self.buffer_writer)
|
||||||
|
buffer.append((self.buffer, isinstance(value, Constant)))
|
||||||
|
new_buffer = []
|
||||||
|
quote_types = _ALL_QUOTES
|
||||||
|
for value, is_constant in buffer:
|
||||||
|
# Repeatedly narrow down the list of possible quote_types
|
||||||
|
value, quote_types = self._str_literal_helper(
|
||||||
|
value, quote_types=quote_types,
|
||||||
|
escape_special_whitespace=is_constant
|
||||||
|
)
|
||||||
|
new_buffer.append(value)
|
||||||
|
value = "".join(new_buffer)
|
||||||
|
quote_type = quote_types[0]
|
||||||
|
self.write(f"{quote_type}{value}{quote_type}")
|
||||||
|
|
||||||
def visit_FormattedValue(self, node):
|
def visit_FormattedValue(self, node):
|
||||||
self.write("f")
|
self.write("f")
|
||||||
self._fstring_FormattedValue(node, self.buffer_writer)
|
self._fstring_FormattedValue(node, self.buffer_writer)
|
||||||
self.write(repr(self.buffer))
|
self._write_str_avoiding_backslashes(self.buffer)
|
||||||
|
|
||||||
def _fstring_JoinedStr(self, node, write):
|
def _fstring_JoinedStr(self, node, write):
|
||||||
for value in node.values:
|
for value in node.values:
|
||||||
|
@ -1090,11 +1166,13 @@ class _Unparser(NodeVisitor):
|
||||||
|
|
||||||
def _fstring_FormattedValue(self, node, write):
|
def _fstring_FormattedValue(self, node, write):
|
||||||
write("{")
|
write("{")
|
||||||
unparser = type(self)()
|
unparser = type(self)(_avoid_backslashes=True)
|
||||||
unparser.set_precedence(_Precedence.TEST.next(), node.value)
|
unparser.set_precedence(_Precedence.TEST.next(), node.value)
|
||||||
expr = unparser.visit(node.value)
|
expr = unparser.visit(node.value)
|
||||||
if expr.startswith("{"):
|
if expr.startswith("{"):
|
||||||
write(" ") # Separate pair of opening brackets as "{ {"
|
write(" ") # Separate pair of opening brackets as "{ {"
|
||||||
|
if "\\" in expr:
|
||||||
|
raise ValueError("Unable to avoid backslash in f-string expression part")
|
||||||
write(expr)
|
write(expr)
|
||||||
if node.conversion != -1:
|
if node.conversion != -1:
|
||||||
conversion = chr(node.conversion)
|
conversion = chr(node.conversion)
|
||||||
|
@ -1111,33 +1189,17 @@ class _Unparser(NodeVisitor):
|
||||||
self.write(node.id)
|
self.write(node.id)
|
||||||
|
|
||||||
def _write_docstring(self, node):
|
def _write_docstring(self, node):
|
||||||
def esc_char(c):
|
|
||||||
if c in ("\n", "\t"):
|
|
||||||
# In the AST form, we don't know the author's intentation
|
|
||||||
# about how this should be displayed. We'll only escape
|
|
||||||
# \n and \t, because they are more likely to be unescaped
|
|
||||||
# in the source
|
|
||||||
return c
|
|
||||||
return c.encode('unicode_escape').decode('ascii')
|
|
||||||
|
|
||||||
self.fill()
|
self.fill()
|
||||||
if node.kind == "u":
|
if node.kind == "u":
|
||||||
self.write("u")
|
self.write("u")
|
||||||
|
self._write_str_avoiding_backslashes(node.value, quote_types=_MULTI_QUOTES)
|
||||||
value = node.value
|
|
||||||
if value:
|
|
||||||
# Preserve quotes in the docstring by escaping them
|
|
||||||
value = "".join(map(esc_char, value))
|
|
||||||
if value[-1] == '"':
|
|
||||||
value = value.replace('"', '\\"', -1)
|
|
||||||
value = value.replace('"""', '""\\"')
|
|
||||||
|
|
||||||
self.write(f'"""{value}"""')
|
|
||||||
|
|
||||||
def _write_constant(self, value):
|
def _write_constant(self, value):
|
||||||
if isinstance(value, (float, complex)):
|
if isinstance(value, (float, complex)):
|
||||||
# Substitute overflowing decimal literal for AST infinities.
|
# Substitute overflowing decimal literal for AST infinities.
|
||||||
self.write(repr(value).replace("inf", _INFSTR))
|
self.write(repr(value).replace("inf", _INFSTR))
|
||||||
|
elif self._avoid_backslashes and isinstance(value, str):
|
||||||
|
self._write_str_avoiding_backslashes(value)
|
||||||
else:
|
else:
|
||||||
self.write(repr(value))
|
self.write(repr(value))
|
||||||
|
|
||||||
|
|
|
@ -152,6 +152,18 @@ class UnparseTestCase(ASTTestCase):
|
||||||
# See issue 25180
|
# See issue 25180
|
||||||
self.check_ast_roundtrip(r"""f'{f"{0}"*3}'""")
|
self.check_ast_roundtrip(r"""f'{f"{0}"*3}'""")
|
||||||
self.check_ast_roundtrip(r"""f'{f"{y}"*3}'""")
|
self.check_ast_roundtrip(r"""f'{f"{y}"*3}'""")
|
||||||
|
self.check_ast_roundtrip("""f''""")
|
||||||
|
self.check_ast_roundtrip('''f"""'end' "quote\\""""''')
|
||||||
|
|
||||||
|
def test_fstrings_complicated(self):
|
||||||
|
# See issue 28002
|
||||||
|
self.check_ast_roundtrip("""f'''{"'"}'''""")
|
||||||
|
self.check_ast_roundtrip('''f\'\'\'-{f"""*{f"+{f'.{x}.'}+"}*"""}-\'\'\'''')
|
||||||
|
self.check_ast_roundtrip('''f\'\'\'-{f"""*{f"+{f'.{x}.'}+"}*"""}-'single quote\\'\'\'\'''')
|
||||||
|
self.check_ast_roundtrip('f"""{\'\'\'\n\'\'\'}"""')
|
||||||
|
self.check_ast_roundtrip('f"""{g(\'\'\'\n\'\'\')}"""')
|
||||||
|
self.check_ast_roundtrip('''f"a\\r\\nb"''')
|
||||||
|
self.check_ast_roundtrip('''f"\\u2028{'x'}"''')
|
||||||
|
|
||||||
def test_strings(self):
|
def test_strings(self):
|
||||||
self.check_ast_roundtrip("u'foo'")
|
self.check_ast_roundtrip("u'foo'")
|
||||||
|
@ -311,6 +323,9 @@ class UnparseTestCase(ASTTestCase):
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def test_invalid_fstring_backslash(self):
|
||||||
|
self.check_invalid(ast.FormattedValue(value=ast.Constant(value="\\\\")))
|
||||||
|
|
||||||
def test_invalid_set(self):
|
def test_invalid_set(self):
|
||||||
self.check_invalid(ast.Set(elts=[]))
|
self.check_invalid(ast.Set(elts=[]))
|
||||||
|
|
||||||
|
@ -330,8 +345,8 @@ class UnparseTestCase(ASTTestCase):
|
||||||
'\r\\r\t\\t\n\\n',
|
'\r\\r\t\\t\n\\n',
|
||||||
'""">>> content = \"\"\"blabla\"\"\" <<<"""',
|
'""">>> content = \"\"\"blabla\"\"\" <<<"""',
|
||||||
r'foo\n\x00',
|
r'foo\n\x00',
|
||||||
'🐍⛎𩸽üéş^\N{LONG RIGHTWARDS SQUIGGLE ARROW}'
|
"' \\'\\'\\'\"\"\" \"\"\\'\\' \\'",
|
||||||
|
'🐍⛎𩸽üéş^\\\\X\\\\BB\N{LONG RIGHTWARDS SQUIGGLE ARROW}'
|
||||||
)
|
)
|
||||||
for docstring in docstrings:
|
for docstring in docstrings:
|
||||||
# check as Module docstrings for easy testing
|
# check as Module docstrings for easy testing
|
||||||
|
@ -416,7 +431,6 @@ class CosmeticTestCase(ASTTestCase):
|
||||||
self.check_src_roundtrip("call((yield x))")
|
self.check_src_roundtrip("call((yield x))")
|
||||||
self.check_src_roundtrip("return x + (yield x)")
|
self.check_src_roundtrip("return x + (yield x)")
|
||||||
|
|
||||||
|
|
||||||
def test_class_bases_and_keywords(self):
|
def test_class_bases_and_keywords(self):
|
||||||
self.check_src_roundtrip("class X:\n pass")
|
self.check_src_roundtrip("class X:\n pass")
|
||||||
self.check_src_roundtrip("class X(A):\n pass")
|
self.check_src_roundtrip("class X(A):\n pass")
|
||||||
|
@ -429,6 +443,13 @@ class CosmeticTestCase(ASTTestCase):
|
||||||
self.check_src_roundtrip("class X(*args):\n pass")
|
self.check_src_roundtrip("class X(*args):\n pass")
|
||||||
self.check_src_roundtrip("class X(*args, **kwargs):\n pass")
|
self.check_src_roundtrip("class X(*args, **kwargs):\n pass")
|
||||||
|
|
||||||
|
def test_fstrings(self):
|
||||||
|
self.check_src_roundtrip('''f\'\'\'-{f"""*{f"+{f'.{x}.'}+"}*"""}-\'\'\'''')
|
||||||
|
self.check_src_roundtrip('''f"\\u2028{'x'}"''')
|
||||||
|
self.check_src_roundtrip(r"f'{x}\n'")
|
||||||
|
self.check_src_roundtrip('''f''\'{"""\n"""}\\n''\'''')
|
||||||
|
self.check_src_roundtrip('''f''\'{f"""{x}\n"""}\\n''\'''')
|
||||||
|
|
||||||
def test_docstrings(self):
|
def test_docstrings(self):
|
||||||
docstrings = (
|
docstrings = (
|
||||||
'"""simple doc string"""',
|
'"""simple doc string"""',
|
||||||
|
@ -443,6 +464,10 @@ class CosmeticTestCase(ASTTestCase):
|
||||||
'""""""',
|
'""""""',
|
||||||
'"""\'\'\'"""',
|
'"""\'\'\'"""',
|
||||||
'"""\'\'\'\'\'\'"""',
|
'"""\'\'\'\'\'\'"""',
|
||||||
|
'"""🐍⛎𩸽üéş^\\\\X\\\\BB⟿"""',
|
||||||
|
'"""end in single \'quote\'"""',
|
||||||
|
"'''end in double \"quote\"'''",
|
||||||
|
'"""almost end in double "quote"."""',
|
||||||
)
|
)
|
||||||
|
|
||||||
for prefix in docstring_prefixes:
|
for prefix in docstring_prefixes:
|
||||||
|
@ -483,9 +508,8 @@ class DirectoryTestCase(ASTTestCase):
|
||||||
|
|
||||||
lib_dir = pathlib.Path(__file__).parent / ".."
|
lib_dir = pathlib.Path(__file__).parent / ".."
|
||||||
test_directories = (lib_dir, lib_dir / "test")
|
test_directories = (lib_dir, lib_dir / "test")
|
||||||
skip_files = {"test_fstring.py"}
|
|
||||||
run_always_files = {"test_grammar.py", "test_syntax.py", "test_compile.py",
|
run_always_files = {"test_grammar.py", "test_syntax.py", "test_compile.py",
|
||||||
"test_ast.py", "test_asdl_parser.py"}
|
"test_ast.py", "test_asdl_parser.py", "test_fstring.py"}
|
||||||
|
|
||||||
_files_to_test = None
|
_files_to_test = None
|
||||||
|
|
||||||
|
@ -525,14 +549,6 @@ class DirectoryTestCase(ASTTestCase):
|
||||||
if test.support.verbose:
|
if test.support.verbose:
|
||||||
print(f"Testing {item.absolute()}")
|
print(f"Testing {item.absolute()}")
|
||||||
|
|
||||||
# Some f-strings are not correctly round-tripped by
|
|
||||||
# Tools/parser/unparse.py. See issue 28002 for details.
|
|
||||||
# We need to skip files that contain such f-strings.
|
|
||||||
if item.name in self.skip_files:
|
|
||||||
if test.support.verbose:
|
|
||||||
print(f"Skipping {item.absolute()}: see issue 28002")
|
|
||||||
continue
|
|
||||||
|
|
||||||
with self.subTest(filename=item):
|
with self.subTest(filename=item):
|
||||||
source = read_pyfile(item)
|
source = read_pyfile(item)
|
||||||
self.check_ast_roundtrip(source)
|
self.check_ast_roundtrip(source)
|
||||||
|
|
Loading…
Reference in New Issue