Issue #18037: 2to3 now escapes '\u' and '\U' in native strings.
This commit is contained in:
commit
026af2a597
|
@ -1,25 +1,43 @@
|
|||
"""Fixer that changes unicode to str, unichr to chr, and u"..." into "...".
|
||||
r"""Fixer for unicode.
|
||||
|
||||
* Changes unicode to str and unichr to chr.
|
||||
|
||||
* If "...\u..." is not unicode literal change it into "...\\u...".
|
||||
|
||||
* Change u"..." into "...".
|
||||
|
||||
"""
|
||||
|
||||
import re
|
||||
from ..pgen2 import token
|
||||
from .. import fixer_base
|
||||
|
||||
_mapping = {"unichr" : "chr", "unicode" : "str"}
|
||||
_literal_re = re.compile(r"[uU][rR]?[\'\"]")
|
||||
|
||||
class FixUnicode(fixer_base.BaseFix):
|
||||
BM_compatible = True
|
||||
PATTERN = "STRING | 'unicode' | 'unichr'"
|
||||
|
||||
def start_tree(self, tree, filename):
|
||||
super(FixUnicode, self).start_tree(tree, filename)
|
||||
self.unicode_literals = 'unicode_literals' in tree.future_features
|
||||
|
||||
def transform(self, node, results):
|
||||
if node.type == token.NAME:
|
||||
new = node.clone()
|
||||
new.value = _mapping[node.value]
|
||||
return new
|
||||
elif node.type == token.STRING:
|
||||
if _literal_re.match(node.value):
|
||||
new = node.clone()
|
||||
new.value = new.value[1:]
|
||||
return new
|
||||
val = node.value
|
||||
if (not self.unicode_literals and val[0] in 'rR\'"' and
|
||||
'\\' in val):
|
||||
val = r'\\'.join([
|
||||
v.replace('\\u', r'\\u').replace('\\U', r'\\U')
|
||||
for v in val.split(r'\\')
|
||||
])
|
||||
if val[0] in 'uU':
|
||||
val = val[1:]
|
||||
if val == node.value:
|
||||
return node
|
||||
new = node.clone()
|
||||
new.value = val
|
||||
return new
|
||||
|
|
|
@ -2883,6 +2883,43 @@ class Test_unicode(FixerTestCase):
|
|||
a = """R'''x''' """
|
||||
self.check(b, a)
|
||||
|
||||
def test_native_literal_escape_u(self):
|
||||
b = r"""'\\\u20ac\U0001d121\\u20ac'"""
|
||||
a = r"""'\\\\u20ac\\U0001d121\\u20ac'"""
|
||||
self.check(b, a)
|
||||
|
||||
b = r"""r'\\\u20ac\U0001d121\\u20ac'"""
|
||||
a = r"""r'\\\\u20ac\\U0001d121\\u20ac'"""
|
||||
self.check(b, a)
|
||||
|
||||
def test_bytes_literal_escape_u(self):
|
||||
b = r"""b'\\\u20ac\U0001d121\\u20ac'"""
|
||||
a = r"""b'\\\u20ac\U0001d121\\u20ac'"""
|
||||
self.check(b, a)
|
||||
|
||||
b = r"""br'\\\u20ac\U0001d121\\u20ac'"""
|
||||
a = r"""br'\\\u20ac\U0001d121\\u20ac'"""
|
||||
self.check(b, a)
|
||||
|
||||
def test_unicode_literal_escape_u(self):
|
||||
b = r"""u'\\\u20ac\U0001d121\\u20ac'"""
|
||||
a = r"""'\\\u20ac\U0001d121\\u20ac'"""
|
||||
self.check(b, a)
|
||||
|
||||
b = r"""ur'\\\u20ac\U0001d121\\u20ac'"""
|
||||
a = r"""r'\\\u20ac\U0001d121\\u20ac'"""
|
||||
self.check(b, a)
|
||||
|
||||
def test_native_unicode_literal_escape_u(self):
|
||||
f = 'from __future__ import unicode_literals\n'
|
||||
b = f + r"""'\\\u20ac\U0001d121\\u20ac'"""
|
||||
a = f + r"""'\\\u20ac\U0001d121\\u20ac'"""
|
||||
self.check(b, a)
|
||||
|
||||
b = f + r"""r'\\\u20ac\U0001d121\\u20ac'"""
|
||||
a = f + r"""r'\\\u20ac\U0001d121\\u20ac'"""
|
||||
self.check(b, a)
|
||||
|
||||
class Test_callable(FixerTestCase):
|
||||
fixer = "callable"
|
||||
|
||||
|
|
|
@ -20,6 +20,8 @@ Core and Builtins
|
|||
Library
|
||||
-------
|
||||
|
||||
- Issue #18037: 2to3 now escapes '\u' and '\U' in native strings.
|
||||
|
||||
- Issue #17839: base64.decodebytes and base64.encodebytes now accept any
|
||||
object that exports a 1 dimensional array of bytes (this means the same
|
||||
is now also true for base64_codec)
|
||||
|
|
Loading…
Reference in New Issue