mirror of https://github.com/python/cpython
gh-115154: Fix untokenize handling of unicode named literals (#115171)
This commit is contained in:
parent
d504968983
commit
ecf16ee50e
|
@ -1877,6 +1877,43 @@ class TestRoundtrip(TestCase):
|
||||||
" print('Can not import' # comment2\n)"
|
" print('Can not import' # comment2\n)"
|
||||||
"else: print('Loaded')\n")
|
"else: print('Loaded')\n")
|
||||||
|
|
||||||
|
self.check_roundtrip("f'\\N{EXCLAMATION MARK}'")
|
||||||
|
self.check_roundtrip(r"f'\\N{SNAKE}'")
|
||||||
|
self.check_roundtrip(r"f'\\N{{SNAKE}}'")
|
||||||
|
self.check_roundtrip(r"f'\N{SNAKE}'")
|
||||||
|
self.check_roundtrip(r"f'\\\N{SNAKE}'")
|
||||||
|
self.check_roundtrip(r"f'\\\\\N{SNAKE}'")
|
||||||
|
self.check_roundtrip(r"f'\\\\\\\N{SNAKE}'")
|
||||||
|
|
||||||
|
self.check_roundtrip(r"f'\\N{1}'")
|
||||||
|
self.check_roundtrip(r"f'\\\\N{2}'")
|
||||||
|
self.check_roundtrip(r"f'\\\\\\N{3}'")
|
||||||
|
self.check_roundtrip(r"f'\\\\\\\\N{4}'")
|
||||||
|
|
||||||
|
self.check_roundtrip(r"f'\\N{{'")
|
||||||
|
self.check_roundtrip(r"f'\\\\N{{'")
|
||||||
|
self.check_roundtrip(r"f'\\\\\\N{{'")
|
||||||
|
self.check_roundtrip(r"f'\\\\\\\\N{{'")
|
||||||
|
cases = [
|
||||||
|
"""
|
||||||
|
if 1:
|
||||||
|
"foo"
|
||||||
|
"bar"
|
||||||
|
""",
|
||||||
|
"""
|
||||||
|
if 1:
|
||||||
|
("foo"
|
||||||
|
"bar")
|
||||||
|
""",
|
||||||
|
"""
|
||||||
|
if 1:
|
||||||
|
"foo"
|
||||||
|
"bar"
|
||||||
|
""" ]
|
||||||
|
for case in cases:
|
||||||
|
self.check_roundtrip(case)
|
||||||
|
|
||||||
|
|
||||||
def test_continuation(self):
|
def test_continuation(self):
|
||||||
# Balancing continuation
|
# Balancing continuation
|
||||||
self.check_roundtrip("a = (3,4, \n"
|
self.check_roundtrip("a = (3,4, \n"
|
||||||
|
@ -1911,9 +1948,6 @@ class TestRoundtrip(TestCase):
|
||||||
tempdir = os.path.dirname(__file__) or os.curdir
|
tempdir = os.path.dirname(__file__) or os.curdir
|
||||||
testfiles = glob.glob(os.path.join(glob.escape(tempdir), "test*.py"))
|
testfiles = glob.glob(os.path.join(glob.escape(tempdir), "test*.py"))
|
||||||
|
|
||||||
# TODO: Remove this once we can untokenize PEP 701 syntax
|
|
||||||
testfiles.remove(os.path.join(tempdir, "test_fstring.py"))
|
|
||||||
|
|
||||||
if not support.is_resource_enabled("cpu"):
|
if not support.is_resource_enabled("cpu"):
|
||||||
testfiles = random.sample(testfiles, 10)
|
testfiles = random.sample(testfiles, 10)
|
||||||
|
|
||||||
|
|
|
@ -168,6 +168,7 @@ class Untokenizer:
|
||||||
self.tokens = []
|
self.tokens = []
|
||||||
self.prev_row = 1
|
self.prev_row = 1
|
||||||
self.prev_col = 0
|
self.prev_col = 0
|
||||||
|
self.prev_type = None
|
||||||
self.encoding = None
|
self.encoding = None
|
||||||
|
|
||||||
def add_whitespace(self, start):
|
def add_whitespace(self, start):
|
||||||
|
@ -183,6 +184,29 @@ class Untokenizer:
|
||||||
if col_offset:
|
if col_offset:
|
||||||
self.tokens.append(" " * col_offset)
|
self.tokens.append(" " * col_offset)
|
||||||
|
|
||||||
|
def escape_brackets(self, token):
|
||||||
|
characters = []
|
||||||
|
consume_until_next_bracket = False
|
||||||
|
for character in token:
|
||||||
|
if character == "}":
|
||||||
|
if consume_until_next_bracket:
|
||||||
|
consume_until_next_bracket = False
|
||||||
|
else:
|
||||||
|
characters.append(character)
|
||||||
|
if character == "{":
|
||||||
|
n_backslashes = sum(
|
||||||
|
1 for char in _itertools.takewhile(
|
||||||
|
"\\".__eq__,
|
||||||
|
characters[-2::-1]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if n_backslashes % 2 == 0:
|
||||||
|
characters.append(character)
|
||||||
|
else:
|
||||||
|
consume_until_next_bracket = True
|
||||||
|
characters.append(character)
|
||||||
|
return "".join(characters)
|
||||||
|
|
||||||
def untokenize(self, iterable):
|
def untokenize(self, iterable):
|
||||||
it = iter(iterable)
|
it = iter(iterable)
|
||||||
indents = []
|
indents = []
|
||||||
|
@ -214,11 +238,13 @@ class Untokenizer:
|
||||||
startline = False
|
startline = False
|
||||||
elif tok_type == FSTRING_MIDDLE:
|
elif tok_type == FSTRING_MIDDLE:
|
||||||
if '{' in token or '}' in token:
|
if '{' in token or '}' in token:
|
||||||
|
token = self.escape_brackets(token)
|
||||||
|
last_line = token.splitlines()[-1]
|
||||||
end_line, end_col = end
|
end_line, end_col = end
|
||||||
end = (end_line, end_col + token.count('{') + token.count('}'))
|
extra_chars = last_line.count("{{") + last_line.count("}}")
|
||||||
token = re.sub('{', '{{', token)
|
end = (end_line, end_col + extra_chars)
|
||||||
token = re.sub('}', '}}', token)
|
elif tok_type in (STRING, FSTRING_START) and self.prev_type in (STRING, FSTRING_END):
|
||||||
|
self.tokens.append(" ")
|
||||||
|
|
||||||
self.add_whitespace(start)
|
self.add_whitespace(start)
|
||||||
self.tokens.append(token)
|
self.tokens.append(token)
|
||||||
|
@ -226,6 +252,7 @@ class Untokenizer:
|
||||||
if tok_type in (NEWLINE, NL):
|
if tok_type in (NEWLINE, NL):
|
||||||
self.prev_row += 1
|
self.prev_row += 1
|
||||||
self.prev_col = 0
|
self.prev_col = 0
|
||||||
|
self.prev_type = tok_type
|
||||||
return "".join(self.tokens)
|
return "".join(self.tokens)
|
||||||
|
|
||||||
def compat(self, token, iterable):
|
def compat(self, token, iterable):
|
||||||
|
@ -233,6 +260,7 @@ class Untokenizer:
|
||||||
toks_append = self.tokens.append
|
toks_append = self.tokens.append
|
||||||
startline = token[0] in (NEWLINE, NL)
|
startline = token[0] in (NEWLINE, NL)
|
||||||
prevstring = False
|
prevstring = False
|
||||||
|
in_fstring = 0
|
||||||
|
|
||||||
for tok in _itertools.chain([token], iterable):
|
for tok in _itertools.chain([token], iterable):
|
||||||
toknum, tokval = tok[:2]
|
toknum, tokval = tok[:2]
|
||||||
|
@ -251,6 +279,10 @@ class Untokenizer:
|
||||||
else:
|
else:
|
||||||
prevstring = False
|
prevstring = False
|
||||||
|
|
||||||
|
if toknum == FSTRING_START:
|
||||||
|
in_fstring += 1
|
||||||
|
elif toknum == FSTRING_END:
|
||||||
|
in_fstring -= 1
|
||||||
if toknum == INDENT:
|
if toknum == INDENT:
|
||||||
indents.append(tokval)
|
indents.append(tokval)
|
||||||
continue
|
continue
|
||||||
|
@ -263,11 +295,18 @@ class Untokenizer:
|
||||||
toks_append(indents[-1])
|
toks_append(indents[-1])
|
||||||
startline = False
|
startline = False
|
||||||
elif toknum == FSTRING_MIDDLE:
|
elif toknum == FSTRING_MIDDLE:
|
||||||
if '{' in tokval or '}' in tokval:
|
tokval = self.escape_brackets(tokval)
|
||||||
tokval = re.sub('{', '{{', tokval)
|
|
||||||
tokval = re.sub('}', '}}', tokval)
|
# Insert a space between two consecutive brackets if we are in an f-string
|
||||||
|
if tokval in {"{", "}"} and self.tokens and self.tokens[-1] == tokval and in_fstring:
|
||||||
|
tokval = ' ' + tokval
|
||||||
|
|
||||||
|
# Insert a space between two consecutive f-strings
|
||||||
|
if toknum in (STRING, FSTRING_START) and self.prev_type in (STRING, FSTRING_END):
|
||||||
|
self.tokens.append(" ")
|
||||||
|
|
||||||
toks_append(tokval)
|
toks_append(tokval)
|
||||||
|
self.prev_type = toknum
|
||||||
|
|
||||||
|
|
||||||
def untokenize(iterable):
|
def untokenize(iterable):
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
Fix a bug that was causing the :func:`tokenize.untokenize` function to
|
||||||
|
handle unicode named literals incorrectly. Patch by Pablo Galindo
|
Loading…
Reference in New Issue