From 2a16eea71f56c2d8f38c295c8ce71a9a9a140aff Mon Sep 17 00:00:00 2001 From: Daniel Fortunov Date: Wed, 28 Aug 2019 06:38:09 +0200 Subject: [PATCH] bpo-36582: Make collections.UserString.encode() return bytes, not str (GH-13138) --- Lib/collections/__init__.py | 10 ++++------ Lib/test/test_userstring.py | 14 ++++++++++++++ Misc/ACKS | 1 + .../2019-05-07-17-42-36.bpo-36582.L_dxR6.rst | 1 + 4 files changed, 20 insertions(+), 6 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2019-05-07-17-42-36.bpo-36582.L_dxR6.rst diff --git a/Lib/collections/__init__.py b/Lib/collections/__init__.py index 6a3e599a91e..859b8460e48 100644 --- a/Lib/collections/__init__.py +++ b/Lib/collections/__init__.py @@ -1184,12 +1184,10 @@ class UserString(_collections_abc.Sequence): if isinstance(sub, UserString): sub = sub.data return self.data.count(sub, start, end) - def encode(self, encoding=None, errors=None): # XXX improve this? - if encoding: - if errors: - return self.__class__(self.data.encode(encoding, errors)) - return self.__class__(self.data.encode(encoding)) - return self.__class__(self.data.encode()) + def encode(self, encoding='utf-8', errors='strict'): + encoding = 'utf-8' if encoding is None else encoding + errors = 'strict' if errors is None else errors + return self.data.encode(encoding, errors) def endswith(self, suffix, start=0, end=_sys.maxsize): return self.data.endswith(suffix, start, end) def expandtabs(self, tabsize=8): diff --git a/Lib/test/test_userstring.py b/Lib/test/test_userstring.py index 19b0acfc760..4d1d8b6b6fe 100644 --- a/Lib/test/test_userstring.py +++ b/Lib/test/test_userstring.py @@ -51,6 +51,20 @@ class UserStringTest( str3 = ustr3('TEST') self.assertEqual(fmt2 % str3, 'value is TEST') + def test_encode_default_args(self): + self.checkequal(b'hello', 'hello', 'encode') + # Check that encoding defaults to utf-8 + self.checkequal(b'\xf0\xa3\x91\x96', '\U00023456', 'encode') + # Check that errors defaults to 'strict' + self.checkraises(UnicodeError, '\ud800', 'encode') + + def test_encode_explicit_none_args(self): + self.checkequal(b'hello', 'hello', 'encode', None, None) + # Check that encoding defaults to utf-8 + self.checkequal(b'\xf0\xa3\x91\x96', '\U00023456', 'encode', None, None) + # Check that errors defaults to 'strict' + self.checkraises(UnicodeError, '\ud800', 'encode', None, None) + if __name__ == "__main__": unittest.main() diff --git a/Misc/ACKS b/Misc/ACKS index 4f5ebd67f86..d5064f644e2 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -512,6 +512,7 @@ Arnaud Fontaine Michael Foord Amaury Forgeot d'Arc Doug Fort +Daniel Fortunov Evens Fortuné Chris Foster John Fouhy diff --git a/Misc/NEWS.d/next/Library/2019-05-07-17-42-36.bpo-36582.L_dxR6.rst b/Misc/NEWS.d/next/Library/2019-05-07-17-42-36.bpo-36582.L_dxR6.rst new file mode 100644 index 00000000000..34f16fcde87 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-05-07-17-42-36.bpo-36582.L_dxR6.rst @@ -0,0 +1 @@ +Fix ``UserString.encode()`` to correctly return ``bytes`` rather than a ``UserString`` instance. \ No newline at end of file