Close #17839: support bytes-like objects in base64 module
This mostly affected the encodebytes and decodebytes function (which are used by base64_codec) Also added a test to ensure all bytes-bytes codecs can handle memoryview input and tests for handling of multidimensional and non-bytes format input in the modern base64 API.
This commit is contained in:
parent
73c6ee0080
commit
fdf239a855
|
@ -27,6 +27,10 @@ byte strings, but only using the Base64 standard alphabet.
|
|||
ASCII-only Unicode strings are now accepted by the decoding functions of
|
||||
the modern interface.
|
||||
|
||||
.. versionchanged:: 3.4
|
||||
Any :term:`bytes-like object`\ s are now accepted by all
|
||||
encoding and decoding functions in this module.
|
||||
|
||||
The modern interface provides:
|
||||
|
||||
.. function:: b64encode(s, altchars=None)
|
||||
|
|
|
@ -1208,36 +1208,41 @@ mappings.
|
|||
|
||||
.. tabularcolumns:: |l|L|L|
|
||||
|
||||
+----------------------+---------------------------+------------------------------+
|
||||
| Codec | Purpose | Encoder/decoder |
|
||||
+======================+===========================+==============================+
|
||||
| base64_codec [#b64]_ | Convert operand to MIME | :meth:`base64.b64encode`, |
|
||||
| | base64 (the result always | :meth:`base64.b64decode` |
|
||||
| | includes a trailing | |
|
||||
| | ``'\n'``) | |
|
||||
+----------------------+---------------------------+------------------------------+
|
||||
| bz2_codec | Compress the operand | :meth:`bz2.compress`, |
|
||||
| | using bz2 | :meth:`bz2.decompress` |
|
||||
+----------------------+---------------------------+------------------------------+
|
||||
| hex_codec | Convert operand to | :meth:`base64.b16encode`, |
|
||||
| | hexadecimal | :meth:`base64.b16decode` |
|
||||
| | representation, with two | |
|
||||
| | digits per byte | |
|
||||
+----------------------+---------------------------+------------------------------+
|
||||
| quopri_codec | Convert operand to MIME | :meth:`quopri.encodestring`, |
|
||||
| | quoted printable | :meth:`quopri.decodestring` |
|
||||
+----------------------+---------------------------+------------------------------+
|
||||
| uu_codec | Convert the operand using | :meth:`uu.encode`, |
|
||||
| | uuencode | :meth:`uu.decode` |
|
||||
+----------------------+---------------------------+------------------------------+
|
||||
| zlib_codec | Compress the operand | :meth:`zlib.compress`, |
|
||||
| | using gzip | :meth:`zlib.decompress` |
|
||||
+----------------------+---------------------------+------------------------------+
|
||||
+----------------------+------------------------------+------------------------------+
|
||||
| Codec | Purpose | Encoder / decoder |
|
||||
+======================+==============================+==============================+
|
||||
| base64_codec [#b64]_ | Convert operand to MIME | :meth:`base64.b64encode` / |
|
||||
| | base64 (the result always | :meth:`base64.b64decode` |
|
||||
| | includes a trailing | |
|
||||
| | ``'\n'``) | |
|
||||
| | | |
|
||||
| | .. versionchanged:: 3.4 | |
|
||||
| | accepts any | |
|
||||
| | :term:`bytes-like object` | |
|
||||
| | as input for encoding and | |
|
||||
| | decoding | |
|
||||
+----------------------+------------------------------+------------------------------+
|
||||
| bz2_codec | Compress the operand | :meth:`bz2.compress` / |
|
||||
| | using bz2 | :meth:`bz2.decompress` |
|
||||
+----------------------+------------------------------+------------------------------+
|
||||
| hex_codec | Convert operand to | :meth:`base64.b16encode` / |
|
||||
| | hexadecimal | :meth:`base64.b16decode` |
|
||||
| | representation, with two | |
|
||||
| | digits per byte | |
|
||||
+----------------------+------------------------------+------------------------------+
|
||||
| quopri_codec | Convert operand to MIME | :meth:`quopri.encodestring` /|
|
||||
| | quoted printable | :meth:`quopri.decodestring` |
|
||||
+----------------------+------------------------------+------------------------------+
|
||||
| uu_codec | Convert the operand using | :meth:`uu.encode` / |
|
||||
| | uuencode | :meth:`uu.decode` |
|
||||
+----------------------+------------------------------+------------------------------+
|
||||
| zlib_codec | Compress the operand | :meth:`zlib.compress` / |
|
||||
| | using gzip | :meth:`zlib.decompress` |
|
||||
+----------------------+------------------------------+------------------------------+
|
||||
|
||||
.. [#b64] Rather than accepting any :term:`bytes-like object`,
|
||||
``'base64_codec'`` accepts only :class:`bytes` and :class:`bytearray` for
|
||||
encoding and only :class:`bytes`, :class:`bytearray`, and ASCII-only
|
||||
instances of :class:`str` for decoding
|
||||
.. [#b64] In addition to :term:`bytes-like objects <bytes-like object>`,
|
||||
``'base64_codec'`` also accepts ASCII-only instances of :class:`str` for
|
||||
decoding
|
||||
|
||||
|
||||
The following codecs provide :class:`str` to :class:`str` mappings.
|
||||
|
|
|
@ -35,11 +35,13 @@ def _bytes_from_decode_data(s):
|
|||
return s.encode('ascii')
|
||||
except UnicodeEncodeError:
|
||||
raise ValueError('string argument should contain only ASCII characters')
|
||||
elif isinstance(s, bytes_types):
|
||||
if isinstance(s, bytes_types):
|
||||
return s
|
||||
else:
|
||||
raise TypeError("argument should be bytes or ASCII string, not %s" % s.__class__.__name__)
|
||||
|
||||
try:
|
||||
return memoryview(s).tobytes()
|
||||
except TypeError:
|
||||
raise TypeError("argument should be a bytes-like object or ASCII "
|
||||
"string, not %r" % s.__class__.__name__) from None
|
||||
|
||||
|
||||
# Base64 encoding/decoding uses binascii
|
||||
|
@ -54,14 +56,9 @@ def b64encode(s, altchars=None):
|
|||
|
||||
The encoded byte string is returned.
|
||||
"""
|
||||
if not isinstance(s, bytes_types):
|
||||
raise TypeError("expected bytes, not %s" % s.__class__.__name__)
|
||||
# Strip off the trailing newline
|
||||
encoded = binascii.b2a_base64(s)[:-1]
|
||||
if altchars is not None:
|
||||
if not isinstance(altchars, bytes_types):
|
||||
raise TypeError("expected bytes, not %s"
|
||||
% altchars.__class__.__name__)
|
||||
assert len(altchars) == 2, repr(altchars)
|
||||
return encoded.translate(bytes.maketrans(b'+/', altchars))
|
||||
return encoded
|
||||
|
@ -149,7 +146,7 @@ def b32encode(s):
|
|||
s is the byte string to encode. The encoded byte string is returned.
|
||||
"""
|
||||
if not isinstance(s, bytes_types):
|
||||
raise TypeError("expected bytes, not %s" % s.__class__.__name__)
|
||||
s = memoryview(s).tobytes()
|
||||
leftover = len(s) % 5
|
||||
# Pad the last quantum with zero bits if necessary
|
||||
if leftover:
|
||||
|
@ -250,8 +247,6 @@ def b16encode(s):
|
|||
|
||||
s is the byte string to encode. The encoded byte string is returned.
|
||||
"""
|
||||
if not isinstance(s, bytes_types):
|
||||
raise TypeError("expected bytes, not %s" % s.__class__.__name__)
|
||||
return binascii.hexlify(s).upper()
|
||||
|
||||
|
||||
|
@ -306,12 +301,26 @@ def decode(input, output):
|
|||
s = binascii.a2b_base64(line)
|
||||
output.write(s)
|
||||
|
||||
def _input_type_check(s):
|
||||
try:
|
||||
m = memoryview(s)
|
||||
except TypeError as err:
|
||||
msg = "expected bytes-like object, not %s" % s.__class__.__name__
|
||||
raise TypeError(msg) from err
|
||||
if m.format not in ('c', 'b', 'B'):
|
||||
msg = ("expected single byte elements, not %r from %s" %
|
||||
(m.format, s.__class__.__name__))
|
||||
raise TypeError(msg)
|
||||
if m.ndim != 1:
|
||||
msg = ("expected 1-D data, not %d-D data from %s" %
|
||||
(m.ndim, s.__class__.__name__))
|
||||
raise TypeError(msg)
|
||||
|
||||
|
||||
def encodebytes(s):
|
||||
"""Encode a bytestring into a bytestring containing multiple lines
|
||||
of base-64 data."""
|
||||
if not isinstance(s, bytes_types):
|
||||
raise TypeError("expected bytes, not %s" % s.__class__.__name__)
|
||||
_input_type_check(s)
|
||||
pieces = []
|
||||
for i in range(0, len(s), MAXBINSIZE):
|
||||
chunk = s[i : i + MAXBINSIZE]
|
||||
|
@ -328,8 +337,7 @@ def encodestring(s):
|
|||
|
||||
def decodebytes(s):
|
||||
"""Decode a bytestring of base-64 data into a bytestring."""
|
||||
if not isinstance(s, bytes_types):
|
||||
raise TypeError("expected bytes, not %s" % s.__class__.__name__)
|
||||
_input_type_check(s)
|
||||
return binascii.a2b_base64(s)
|
||||
|
||||
def decodestring(s):
|
||||
|
|
|
@ -5,10 +5,21 @@ import binascii
|
|||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
|
||||
import struct
|
||||
from array import array
|
||||
|
||||
|
||||
class LegacyBase64TestCase(unittest.TestCase):
|
||||
|
||||
# Legacy API is not as permissive as the modern API
|
||||
def check_type_errors(self, f):
|
||||
self.assertRaises(TypeError, f, "")
|
||||
self.assertRaises(TypeError, f, [])
|
||||
multidimensional = memoryview(b"1234").cast('B', (2, 2))
|
||||
self.assertRaises(TypeError, f, multidimensional)
|
||||
int_data = memoryview(b"1234").cast('I')
|
||||
self.assertRaises(TypeError, f, int_data)
|
||||
|
||||
def test_encodebytes(self):
|
||||
eq = self.assertEqual
|
||||
eq(base64.encodebytes(b"www.python.org"), b"d3d3LnB5dGhvbi5vcmc=\n")
|
||||
|
@ -24,7 +35,9 @@ class LegacyBase64TestCase(unittest.TestCase):
|
|||
b"Y3ODkhQCMwXiYqKCk7Ojw+LC4gW117fQ==\n")
|
||||
# Non-bytes
|
||||
eq(base64.encodebytes(bytearray(b'abc')), b'YWJj\n')
|
||||
self.assertRaises(TypeError, base64.encodebytes, "")
|
||||
eq(base64.encodebytes(memoryview(b'abc')), b'YWJj\n')
|
||||
eq(base64.encodebytes(array('B', b'abc')), b'YWJj\n')
|
||||
self.check_type_errors(base64.encodebytes)
|
||||
|
||||
def test_decodebytes(self):
|
||||
eq = self.assertEqual
|
||||
|
@ -41,7 +54,9 @@ class LegacyBase64TestCase(unittest.TestCase):
|
|||
eq(base64.decodebytes(b''), b'')
|
||||
# Non-bytes
|
||||
eq(base64.decodebytes(bytearray(b'YWJj\n')), b'abc')
|
||||
self.assertRaises(TypeError, base64.decodebytes, "")
|
||||
eq(base64.decodebytes(memoryview(b'YWJj\n')), b'abc')
|
||||
eq(base64.decodebytes(array('B', b'YWJj\n')), b'abc')
|
||||
self.check_type_errors(base64.decodebytes)
|
||||
|
||||
def test_encode(self):
|
||||
eq = self.assertEqual
|
||||
|
@ -73,6 +88,38 @@ class LegacyBase64TestCase(unittest.TestCase):
|
|||
|
||||
|
||||
class BaseXYTestCase(unittest.TestCase):
|
||||
|
||||
# Modern API completely ignores exported dimension and format data and
|
||||
# treats any buffer as a stream of bytes
|
||||
def check_encode_type_errors(self, f):
|
||||
self.assertRaises(TypeError, f, "")
|
||||
self.assertRaises(TypeError, f, [])
|
||||
|
||||
def check_decode_type_errors(self, f):
|
||||
self.assertRaises(TypeError, f, [])
|
||||
|
||||
def check_other_types(self, f, bytes_data, expected):
|
||||
eq = self.assertEqual
|
||||
eq(f(bytearray(bytes_data)), expected)
|
||||
eq(f(memoryview(bytes_data)), expected)
|
||||
eq(f(array('B', bytes_data)), expected)
|
||||
self.check_nonbyte_element_format(base64.b64encode, bytes_data)
|
||||
self.check_multidimensional(base64.b64encode, bytes_data)
|
||||
|
||||
def check_multidimensional(self, f, data):
|
||||
padding = b"\x00" if len(data) % 2 else b""
|
||||
bytes_data = data + padding # Make sure cast works
|
||||
shape = (len(bytes_data) // 2, 2)
|
||||
multidimensional = memoryview(bytes_data).cast('B', shape)
|
||||
self.assertEqual(f(multidimensional), f(bytes_data))
|
||||
|
||||
def check_nonbyte_element_format(self, f, data):
|
||||
padding = b"\x00" * ((4 - len(data)) % 4)
|
||||
bytes_data = data + padding # Make sure cast works
|
||||
int_data = memoryview(bytes_data).cast('I')
|
||||
self.assertEqual(f(int_data), f(bytes_data))
|
||||
|
||||
|
||||
def test_b64encode(self):
|
||||
eq = self.assertEqual
|
||||
# Test default alphabet
|
||||
|
@ -90,13 +137,16 @@ class BaseXYTestCase(unittest.TestCase):
|
|||
b"Y3ODkhQCMwXiYqKCk7Ojw+LC4gW117fQ==")
|
||||
# Test with arbitrary alternative characters
|
||||
eq(base64.b64encode(b'\xd3V\xbeo\xf7\x1d', altchars=b'*$'), b'01a*b$cd')
|
||||
# Non-bytes
|
||||
eq(base64.b64encode(bytearray(b'abcd')), b'YWJjZA==')
|
||||
eq(base64.b64encode(b'\xd3V\xbeo\xf7\x1d', altchars=bytearray(b'*$')),
|
||||
b'01a*b$cd')
|
||||
# Check if passing a str object raises an error
|
||||
self.assertRaises(TypeError, base64.b64encode, "")
|
||||
self.assertRaises(TypeError, base64.b64encode, b"", altchars="")
|
||||
eq(base64.b64encode(b'\xd3V\xbeo\xf7\x1d', altchars=memoryview(b'*$')),
|
||||
b'01a*b$cd')
|
||||
eq(base64.b64encode(b'\xd3V\xbeo\xf7\x1d', altchars=array('B', b'*$')),
|
||||
b'01a*b$cd')
|
||||
# Non-bytes
|
||||
self.check_other_types(base64.b64encode, b'abcd', b'YWJjZA==')
|
||||
self.check_encode_type_errors(base64.b64encode)
|
||||
self.assertRaises(TypeError, base64.b64encode, b"", altchars="*$")
|
||||
# Test standard alphabet
|
||||
eq(base64.standard_b64encode(b"www.python.org"), b"d3d3LnB5dGhvbi5vcmc=")
|
||||
eq(base64.standard_b64encode(b"a"), b"YQ==")
|
||||
|
@ -110,15 +160,15 @@ class BaseXYTestCase(unittest.TestCase):
|
|||
b"RUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0NT"
|
||||
b"Y3ODkhQCMwXiYqKCk7Ojw+LC4gW117fQ==")
|
||||
# Non-bytes
|
||||
eq(base64.standard_b64encode(bytearray(b'abcd')), b'YWJjZA==')
|
||||
# Check if passing a str object raises an error
|
||||
self.assertRaises(TypeError, base64.standard_b64encode, "")
|
||||
self.check_other_types(base64.standard_b64encode,
|
||||
b'abcd', b'YWJjZA==')
|
||||
self.check_encode_type_errors(base64.standard_b64encode)
|
||||
# Test with 'URL safe' alternative characters
|
||||
eq(base64.urlsafe_b64encode(b'\xd3V\xbeo\xf7\x1d'), b'01a-b_cd')
|
||||
# Non-bytes
|
||||
eq(base64.urlsafe_b64encode(bytearray(b'\xd3V\xbeo\xf7\x1d')), b'01a-b_cd')
|
||||
# Check if passing a str object raises an error
|
||||
self.assertRaises(TypeError, base64.urlsafe_b64encode, "")
|
||||
self.check_other_types(base64.urlsafe_b64encode,
|
||||
b'\xd3V\xbeo\xf7\x1d', b'01a-b_cd')
|
||||
self.check_encode_type_errors(base64.urlsafe_b64encode)
|
||||
|
||||
def test_b64decode(self):
|
||||
eq = self.assertEqual
|
||||
|
@ -141,7 +191,8 @@ class BaseXYTestCase(unittest.TestCase):
|
|||
eq(base64.b64decode(data), res)
|
||||
eq(base64.b64decode(data.decode('ascii')), res)
|
||||
# Non-bytes
|
||||
eq(base64.b64decode(bytearray(b"YWJj")), b"abc")
|
||||
self.check_other_types(base64.b64decode, b"YWJj", b"abc")
|
||||
self.check_decode_type_errors(base64.b64decode)
|
||||
|
||||
# Test with arbitrary alternative characters
|
||||
tests_altchars = {(b'01a*b$cd', b'*$'): b'\xd3V\xbeo\xf7\x1d',
|
||||
|
@ -160,7 +211,8 @@ class BaseXYTestCase(unittest.TestCase):
|
|||
eq(base64.standard_b64decode(data), res)
|
||||
eq(base64.standard_b64decode(data.decode('ascii')), res)
|
||||
# Non-bytes
|
||||
eq(base64.standard_b64decode(bytearray(b"YWJj")), b"abc")
|
||||
self.check_other_types(base64.standard_b64decode, b"YWJj", b"abc")
|
||||
self.check_decode_type_errors(base64.standard_b64decode)
|
||||
|
||||
# Test with 'URL safe' alternative characters
|
||||
tests_urlsafe = {b'01a-b_cd': b'\xd3V\xbeo\xf7\x1d',
|
||||
|
@ -170,7 +222,9 @@ class BaseXYTestCase(unittest.TestCase):
|
|||
eq(base64.urlsafe_b64decode(data), res)
|
||||
eq(base64.urlsafe_b64decode(data.decode('ascii')), res)
|
||||
# Non-bytes
|
||||
eq(base64.urlsafe_b64decode(bytearray(b'01a-b_cd')), b'\xd3V\xbeo\xf7\x1d')
|
||||
self.check_other_types(base64.urlsafe_b64decode, b'01a-b_cd',
|
||||
b'\xd3V\xbeo\xf7\x1d')
|
||||
self.check_decode_type_errors(base64.urlsafe_b64decode)
|
||||
|
||||
def test_b64decode_padding_error(self):
|
||||
self.assertRaises(binascii.Error, base64.b64decode, b'abc')
|
||||
|
@ -205,8 +259,8 @@ class BaseXYTestCase(unittest.TestCase):
|
|||
eq(base64.b32encode(b'abcd'), b'MFRGGZA=')
|
||||
eq(base64.b32encode(b'abcde'), b'MFRGGZDF')
|
||||
# Non-bytes
|
||||
eq(base64.b32encode(bytearray(b'abcd')), b'MFRGGZA=')
|
||||
self.assertRaises(TypeError, base64.b32encode, "")
|
||||
self.check_other_types(base64.b32encode, b'abcd', b'MFRGGZA=')
|
||||
self.check_encode_type_errors(base64.b32encode)
|
||||
|
||||
def test_b32decode(self):
|
||||
eq = self.assertEqual
|
||||
|
@ -222,7 +276,8 @@ class BaseXYTestCase(unittest.TestCase):
|
|||
eq(base64.b32decode(data), res)
|
||||
eq(base64.b32decode(data.decode('ascii')), res)
|
||||
# Non-bytes
|
||||
eq(base64.b32decode(bytearray(b'MFRGG===')), b'abc')
|
||||
self.check_other_types(base64.b32decode, b'MFRGG===', b"abc")
|
||||
self.check_decode_type_errors(base64.b32decode)
|
||||
|
||||
def test_b32decode_casefold(self):
|
||||
eq = self.assertEqual
|
||||
|
@ -277,8 +332,9 @@ class BaseXYTestCase(unittest.TestCase):
|
|||
eq(base64.b16encode(b'\x01\x02\xab\xcd\xef'), b'0102ABCDEF')
|
||||
eq(base64.b16encode(b'\x00'), b'00')
|
||||
# Non-bytes
|
||||
eq(base64.b16encode(bytearray(b'\x01\x02\xab\xcd\xef')), b'0102ABCDEF')
|
||||
self.assertRaises(TypeError, base64.b16encode, "")
|
||||
self.check_other_types(base64.b16encode, b'\x01\x02\xab\xcd\xef',
|
||||
b'0102ABCDEF')
|
||||
self.check_encode_type_errors(base64.b16encode)
|
||||
|
||||
def test_b16decode(self):
|
||||
eq = self.assertEqual
|
||||
|
@ -293,7 +349,15 @@ class BaseXYTestCase(unittest.TestCase):
|
|||
eq(base64.b16decode(b'0102abcdef', True), b'\x01\x02\xab\xcd\xef')
|
||||
eq(base64.b16decode('0102abcdef', True), b'\x01\x02\xab\xcd\xef')
|
||||
# Non-bytes
|
||||
eq(base64.b16decode(bytearray(b"0102ABCDEF")), b'\x01\x02\xab\xcd\xef')
|
||||
self.check_other_types(base64.b16decode, b"0102ABCDEF",
|
||||
b'\x01\x02\xab\xcd\xef')
|
||||
self.check_decode_type_errors(base64.b16decode)
|
||||
eq(base64.b16decode(bytearray(b"0102abcdef"), True),
|
||||
b'\x01\x02\xab\xcd\xef')
|
||||
eq(base64.b16decode(memoryview(b"0102abcdef"), True),
|
||||
b'\x01\x02\xab\xcd\xef')
|
||||
eq(base64.b16decode(array('B', b"0102abcdef"), True),
|
||||
b'\x01\x02\xab\xcd\xef')
|
||||
|
||||
def test_decode_nonascii_str(self):
|
||||
decode_funcs = (base64.b64decode,
|
||||
|
|
|
@ -2285,6 +2285,24 @@ class TransformCodecTest(unittest.TestCase):
|
|||
sout = reader.readline()
|
||||
self.assertEqual(sout, b"\x80")
|
||||
|
||||
def test_buffer_api_usage(self):
|
||||
# We check all the transform codecs accept memoryview input
|
||||
# for encoding and decoding
|
||||
# and also that they roundtrip correctly
|
||||
original = b"12345\x80"
|
||||
for encoding in bytes_transform_encodings:
|
||||
data = original
|
||||
view = memoryview(data)
|
||||
data = codecs.encode(data, encoding)
|
||||
view_encoded = codecs.encode(view, encoding)
|
||||
self.assertEqual(view_encoded, data)
|
||||
view = memoryview(data)
|
||||
data = codecs.decode(data, encoding)
|
||||
self.assertEqual(data, original)
|
||||
view_decoded = codecs.decode(view, encoding)
|
||||
self.assertEqual(view_decoded, data)
|
||||
|
||||
|
||||
|
||||
@unittest.skipUnless(sys.platform == 'win32',
|
||||
'code pages are specific to Windows')
|
||||
|
|
|
@ -20,6 +20,10 @@ Core and Builtins
|
|||
Library
|
||||
-------
|
||||
|
||||
- Issue #17839: base64.decodebytes and base64.encodebytes now accept any
|
||||
object that exports a 1 dimensional array of bytes (this means the same
|
||||
is now also true for base64_codec)
|
||||
|
||||
- Issue #19132: The pprint module now supports compact mode.
|
||||
|
||||
- Issue #19137: The pprint module now correctly formats instances of set and
|
||||
|
|
Loading…
Reference in New Issue