bpo-38870: correctly escape unprintable characters on ast.unparse (GH-20166)
Unprintable characters such as `\x00` weren't correctly roundtripped due to not using default string repr when generating docstrings. This patch correctly encodes all unprintable characters (except `\n` and `\t`, which are commonly used for formatting, and found unescaped). Co-authored-by: Pablo Galindo <Pablogsal@gmail.com> Co-authored-by: Batuhan Taskaya <isidentical@gmail.com>
This commit is contained in:
parent
dc31800f86
commit
d71a6492db
14
Lib/ast.py
14
Lib/ast.py
|
@ -1090,6 +1090,15 @@ class _Unparser(NodeVisitor):
|
|||
self.write(node.id)
|
||||
|
||||
def _write_docstring(self, node):
|
||||
def esc_char(c):
|
||||
if c in ("\n", "\t"):
|
||||
# In the AST form, we don't know the author's intentation
|
||||
# about how this should be displayed. We'll only escape
|
||||
# \n and \t, because they are more likely to be unescaped
|
||||
# in the source
|
||||
return c
|
||||
return c.encode('unicode_escape').decode('ascii')
|
||||
|
||||
self.fill()
|
||||
if node.kind == "u":
|
||||
self.write("u")
|
||||
|
@ -1097,11 +1106,10 @@ class _Unparser(NodeVisitor):
|
|||
value = node.value
|
||||
if value:
|
||||
# Preserve quotes in the docstring by escaping them
|
||||
value = value.replace("\\", "\\\\")
|
||||
value = value.replace('"""', '""\"')
|
||||
value = value.replace("\r", "\\r")
|
||||
value = "".join(map(esc_char, value))
|
||||
if value[-1] == '"':
|
||||
value = value.replace('"', '\\"', -1)
|
||||
value = value.replace('"""', '""\\"')
|
||||
|
||||
self.write(f'"""{value}"""')
|
||||
|
||||
|
|
|
@ -324,7 +324,11 @@ class UnparseTestCase(ASTTestCase):
|
|||
'\\t',
|
||||
'\n',
|
||||
'\\n',
|
||||
'\r\\r\t\\t\n\\n'
|
||||
'\r\\r\t\\t\n\\n',
|
||||
'""">>> content = \"\"\"blabla\"\"\" <<<"""',
|
||||
r'foo\n\x00',
|
||||
'🐍⛎𩸽üéş^\X\BB\N{LONG RIGHTWARDS SQUIGGLE ARROW}'
|
||||
|
||||
)
|
||||
for docstring in docstrings:
|
||||
# check as Module docstrings for easy testing
|
||||
|
|
Loading…
Reference in New Issue