Issue #18037: 2to3 now escapes '\u' and '\U' in native strings.

This commit is contained in:
Serhiy Storchaka 2013-10-03 12:10:49 +03:00
commit 026af2a597
3 changed files with 64 additions and 7 deletions

View File

@ -1,25 +1,43 @@
"""Fixer that changes unicode to str, unichr to chr, and u"..." into "...".
r"""Fixer for unicode.
* Changes unicode to str and unichr to chr.
* If "...\u..." is not unicode literal change it into "...\\u...".
* Change u"..." into "...".
"""
import re
from ..pgen2 import token
from .. import fixer_base
_mapping = {"unichr" : "chr", "unicode" : "str"}
_literal_re = re.compile(r"[uU][rR]?[\'\"]")
class FixUnicode(fixer_base.BaseFix):
BM_compatible = True
PATTERN = "STRING | 'unicode' | 'unichr'"
def start_tree(self, tree, filename):
super(FixUnicode, self).start_tree(tree, filename)
self.unicode_literals = 'unicode_literals' in tree.future_features
def transform(self, node, results):
if node.type == token.NAME:
new = node.clone()
new.value = _mapping[node.value]
return new
elif node.type == token.STRING:
if _literal_re.match(node.value):
new = node.clone()
new.value = new.value[1:]
return new
val = node.value
if (not self.unicode_literals and val[0] in 'rR\'"' and
'\\' in val):
val = r'\\'.join([
v.replace('\\u', r'\\u').replace('\\U', r'\\U')
for v in val.split(r'\\')
])
if val[0] in 'uU':
val = val[1:]
if val == node.value:
return node
new = node.clone()
new.value = val
return new

View File

@ -2883,6 +2883,43 @@ class Test_unicode(FixerTestCase):
a = """R'''x''' """
self.check(b, a)
def test_native_literal_escape_u(self):
b = r"""'\\\u20ac\U0001d121\\u20ac'"""
a = r"""'\\\\u20ac\\U0001d121\\u20ac'"""
self.check(b, a)
b = r"""r'\\\u20ac\U0001d121\\u20ac'"""
a = r"""r'\\\\u20ac\\U0001d121\\u20ac'"""
self.check(b, a)
def test_bytes_literal_escape_u(self):
b = r"""b'\\\u20ac\U0001d121\\u20ac'"""
a = r"""b'\\\u20ac\U0001d121\\u20ac'"""
self.check(b, a)
b = r"""br'\\\u20ac\U0001d121\\u20ac'"""
a = r"""br'\\\u20ac\U0001d121\\u20ac'"""
self.check(b, a)
def test_unicode_literal_escape_u(self):
b = r"""u'\\\u20ac\U0001d121\\u20ac'"""
a = r"""'\\\u20ac\U0001d121\\u20ac'"""
self.check(b, a)
b = r"""ur'\\\u20ac\U0001d121\\u20ac'"""
a = r"""r'\\\u20ac\U0001d121\\u20ac'"""
self.check(b, a)
def test_native_unicode_literal_escape_u(self):
f = 'from __future__ import unicode_literals\n'
b = f + r"""'\\\u20ac\U0001d121\\u20ac'"""
a = f + r"""'\\\u20ac\U0001d121\\u20ac'"""
self.check(b, a)
b = f + r"""r'\\\u20ac\U0001d121\\u20ac'"""
a = f + r"""r'\\\u20ac\U0001d121\\u20ac'"""
self.check(b, a)
class Test_callable(FixerTestCase):
fixer = "callable"

View File

@ -20,6 +20,8 @@ Core and Builtins
Library
-------
- Issue #18037: 2to3 now escapes '\u' and '\U' in native strings.
- Issue #17839: base64.decodebytes and base64.encodebytes now accept any
object that exports a 1 dimensional array of bytes (this means the same
is now also true for base64_codec)