Issue #13641: Decoding functions in the base64 module now accept ASCII-only unicode strings.
Patch by Catalin Iacob.
This commit is contained in:
parent
0588eac218
commit
ea6b4d5f70
|
@ -18,9 +18,14 @@ POST request. The encoding algorithm is not the same as the
|
|||
|
||||
There are two interfaces provided by this module. The modern interface
|
||||
supports encoding and decoding ASCII byte string objects using all three
|
||||
alphabets. The legacy interface provides for encoding and decoding to and from
|
||||
file-like objects as well as byte strings, but only using the Base64 standard
|
||||
alphabet.
|
||||
alphabets. Additionally, the decoding functions of the modern interface also
|
||||
accept Unicode strings containing only ASCII characters. The legacy interface
|
||||
provides for encoding and decoding to and from file-like objects as well as
|
||||
byte strings, but only using the Base64 standard alphabet.
|
||||
|
||||
.. versionchanged:: 3.3
|
||||
ASCII-only Unicode strings are now accepted by the decoding functions of
|
||||
the modern interface.
|
||||
|
||||
The modern interface provides:
|
||||
|
||||
|
|
|
@ -29,6 +29,16 @@ __all__ = [
|
|||
|
||||
bytes_types = (bytes, bytearray) # Types acceptable as binary data
|
||||
|
||||
def _bytes_from_decode_data(s):
|
||||
if isinstance(s, str):
|
||||
try:
|
||||
return s.encode('ascii')
|
||||
except UnicodeEncodeError:
|
||||
raise ValueError('string argument should contain only ASCII characters')
|
||||
elif isinstance(s, bytes_types):
|
||||
return s
|
||||
else:
|
||||
raise TypeError("argument should be bytes or ASCII string, not %s" % s.__class__.__name__)
|
||||
|
||||
def _translate(s, altchars):
|
||||
if not isinstance(s, bytes_types):
|
||||
|
@ -79,12 +89,9 @@ def b64decode(s, altchars=None, validate=False):
|
|||
discarded prior to the padding check. If validate is True,
|
||||
non-base64-alphabet characters in the input result in a binascii.Error.
|
||||
"""
|
||||
if not isinstance(s, bytes_types):
|
||||
raise TypeError("expected bytes, not %s" % s.__class__.__name__)
|
||||
s = _bytes_from_decode_data(s)
|
||||
if altchars is not None:
|
||||
if not isinstance(altchars, bytes_types):
|
||||
raise TypeError("expected bytes, not %s"
|
||||
% altchars.__class__.__name__)
|
||||
altchars = _bytes_from_decode_data(altchars)
|
||||
assert len(altchars) == 2, repr(altchars)
|
||||
s = _translate(s, {chr(altchars[0]): b'+', chr(altchars[1]): b'/'})
|
||||
if validate and not re.match(b'^[A-Za-z0-9+/]*={0,2}$', s):
|
||||
|
@ -211,8 +218,7 @@ def b32decode(s, casefold=False, map01=None):
|
|||
the input is incorrectly padded or if there are non-alphabet
|
||||
characters present in the input.
|
||||
"""
|
||||
if not isinstance(s, bytes_types):
|
||||
raise TypeError("expected bytes, not %s" % s.__class__.__name__)
|
||||
s = _bytes_from_decode_data(s)
|
||||
quanta, leftover = divmod(len(s), 8)
|
||||
if leftover:
|
||||
raise binascii.Error('Incorrect padding')
|
||||
|
@ -220,8 +226,7 @@ def b32decode(s, casefold=False, map01=None):
|
|||
# False, or the character to map the digit 1 (one) to. It should be
|
||||
# either L (el) or I (eye).
|
||||
if map01 is not None:
|
||||
if not isinstance(map01, bytes_types):
|
||||
raise TypeError("expected bytes, not %s" % map01.__class__.__name__)
|
||||
map01 = _bytes_from_decode_data(map01)
|
||||
assert len(map01) == 1, repr(map01)
|
||||
s = _translate(s, {b'0': b'O', b'1': map01})
|
||||
if casefold:
|
||||
|
@ -292,8 +297,7 @@ def b16decode(s, casefold=False):
|
|||
s were incorrectly padded or if there are non-alphabet characters
|
||||
present in the string.
|
||||
"""
|
||||
if not isinstance(s, bytes_types):
|
||||
raise TypeError("expected bytes, not %s" % s.__class__.__name__)
|
||||
s = _bytes_from_decode_data(s)
|
||||
if casefold:
|
||||
s = s.upper()
|
||||
if re.search(b'[^0-9A-F]', s):
|
||||
|
|
|
@ -102,44 +102,53 @@ class BaseXYTestCase(unittest.TestCase):
|
|||
|
||||
def test_b64decode(self):
|
||||
eq = self.assertEqual
|
||||
eq(base64.b64decode(b"d3d3LnB5dGhvbi5vcmc="), b"www.python.org")
|
||||
eq(base64.b64decode(b'AA=='), b'\x00')
|
||||
eq(base64.b64decode(b"YQ=="), b"a")
|
||||
eq(base64.b64decode(b"YWI="), b"ab")
|
||||
eq(base64.b64decode(b"YWJj"), b"abc")
|
||||
eq(base64.b64decode(b"YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNE"
|
||||
|
||||
tests = {b"d3d3LnB5dGhvbi5vcmc=": b"www.python.org",
|
||||
b'AA==': b'\x00',
|
||||
b"YQ==": b"a",
|
||||
b"YWI=": b"ab",
|
||||
b"YWJj": b"abc",
|
||||
b"YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNE"
|
||||
b"RUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0\nNT"
|
||||
b"Y3ODkhQCMwXiYqKCk7Ojw+LC4gW117fQ=="),
|
||||
b"Y3ODkhQCMwXiYqKCk7Ojw+LC4gW117fQ==":
|
||||
|
||||
b"abcdefghijklmnopqrstuvwxyz"
|
||||
b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
b"0123456789!@#0^&*();:<>,. []{}")
|
||||
eq(base64.b64decode(b''), b'')
|
||||
b"0123456789!@#0^&*();:<>,. []{}",
|
||||
b'': b'',
|
||||
}
|
||||
for data, res in tests.items():
|
||||
eq(base64.b64decode(data), res)
|
||||
eq(base64.b64decode(data.decode('ascii')), res)
|
||||
|
||||
# Test with arbitrary alternative characters
|
||||
eq(base64.b64decode(b'01a*b$cd', altchars=b'*$'), b'\xd3V\xbeo\xf7\x1d')
|
||||
# Check if passing a str object raises an error
|
||||
self.assertRaises(TypeError, base64.b64decode, "")
|
||||
self.assertRaises(TypeError, base64.b64decode, b"", altchars="")
|
||||
tests_altchars = {(b'01a*b$cd', b'*$'): b'\xd3V\xbeo\xf7\x1d',
|
||||
}
|
||||
for (data, altchars), res in tests_altchars.items():
|
||||
data_str = data.decode('ascii')
|
||||
altchars_str = altchars.decode('ascii')
|
||||
|
||||
eq(base64.b64decode(data, altchars=altchars), res)
|
||||
eq(base64.b64decode(data_str, altchars=altchars), res)
|
||||
eq(base64.b64decode(data, altchars=altchars_str), res)
|
||||
eq(base64.b64decode(data_str, altchars=altchars_str), res)
|
||||
|
||||
# Test standard alphabet
|
||||
eq(base64.standard_b64decode(b"d3d3LnB5dGhvbi5vcmc="), b"www.python.org")
|
||||
eq(base64.standard_b64decode(b"YQ=="), b"a")
|
||||
eq(base64.standard_b64decode(b"YWI="), b"ab")
|
||||
eq(base64.standard_b64decode(b"YWJj"), b"abc")
|
||||
eq(base64.standard_b64decode(b""), b"")
|
||||
eq(base64.standard_b64decode(b"YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNE"
|
||||
b"RUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0NT"
|
||||
b"Y3ODkhQCMwXiYqKCk7Ojw+LC4gW117fQ=="),
|
||||
b"abcdefghijklmnopqrstuvwxyz"
|
||||
b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
b"0123456789!@#0^&*();:<>,. []{}")
|
||||
# Check if passing a str object raises an error
|
||||
self.assertRaises(TypeError, base64.standard_b64decode, "")
|
||||
self.assertRaises(TypeError, base64.standard_b64decode, b"", altchars="")
|
||||
for data, res in tests.items():
|
||||
eq(base64.standard_b64decode(data), res)
|
||||
eq(base64.standard_b64decode(data.decode('ascii')), res)
|
||||
|
||||
# Test with 'URL safe' alternative characters
|
||||
eq(base64.urlsafe_b64decode(b'01a-b_cd'), b'\xd3V\xbeo\xf7\x1d')
|
||||
self.assertRaises(TypeError, base64.urlsafe_b64decode, "")
|
||||
tests_urlsafe = {b'01a-b_cd': b'\xd3V\xbeo\xf7\x1d',
|
||||
b'': b'',
|
||||
}
|
||||
for data, res in tests_urlsafe.items():
|
||||
eq(base64.urlsafe_b64decode(data), res)
|
||||
eq(base64.urlsafe_b64decode(data.decode('ascii')), res)
|
||||
|
||||
def test_b64decode_padding_error(self):
|
||||
self.assertRaises(binascii.Error, base64.b64decode, b'abc')
|
||||
self.assertRaises(binascii.Error, base64.b64decode, 'abc')
|
||||
|
||||
def test_b64decode_invalid_chars(self):
|
||||
# issue 1466065: Test some invalid characters.
|
||||
|
@ -154,8 +163,10 @@ class BaseXYTestCase(unittest.TestCase):
|
|||
(b'YWJj\nYWI=', b'abcab'))
|
||||
for bstr, res in tests:
|
||||
self.assertEqual(base64.b64decode(bstr), res)
|
||||
self.assertEqual(base64.b64decode(bstr.decode('ascii')), res)
|
||||
with self.assertRaises(binascii.Error):
|
||||
base64.b64decode(bstr, validate=True)
|
||||
base64.b64decode(bstr.decode('ascii'), validate=True)
|
||||
|
||||
def test_b32encode(self):
|
||||
eq = self.assertEqual
|
||||
|
@ -170,40 +181,62 @@ class BaseXYTestCase(unittest.TestCase):
|
|||
|
||||
def test_b32decode(self):
|
||||
eq = self.assertEqual
|
||||
eq(base64.b32decode(b''), b'')
|
||||
eq(base64.b32decode(b'AA======'), b'\x00')
|
||||
eq(base64.b32decode(b'ME======'), b'a')
|
||||
eq(base64.b32decode(b'MFRA===='), b'ab')
|
||||
eq(base64.b32decode(b'MFRGG==='), b'abc')
|
||||
eq(base64.b32decode(b'MFRGGZA='), b'abcd')
|
||||
eq(base64.b32decode(b'MFRGGZDF'), b'abcde')
|
||||
self.assertRaises(TypeError, base64.b32decode, "")
|
||||
tests = {b'': b'',
|
||||
b'AA======': b'\x00',
|
||||
b'ME======': b'a',
|
||||
b'MFRA====': b'ab',
|
||||
b'MFRGG===': b'abc',
|
||||
b'MFRGGZA=': b'abcd',
|
||||
b'MFRGGZDF': b'abcde',
|
||||
}
|
||||
for data, res in tests.items():
|
||||
eq(base64.b32decode(data), res)
|
||||
eq(base64.b32decode(data.decode('ascii')), res)
|
||||
|
||||
def test_b32decode_casefold(self):
|
||||
eq = self.assertEqual
|
||||
eq(base64.b32decode(b'', True), b'')
|
||||
eq(base64.b32decode(b'ME======', True), b'a')
|
||||
eq(base64.b32decode(b'MFRA====', True), b'ab')
|
||||
eq(base64.b32decode(b'MFRGG===', True), b'abc')
|
||||
eq(base64.b32decode(b'MFRGGZA=', True), b'abcd')
|
||||
eq(base64.b32decode(b'MFRGGZDF', True), b'abcde')
|
||||
tests = {b'': b'',
|
||||
b'ME======': b'a',
|
||||
b'MFRA====': b'ab',
|
||||
b'MFRGG===': b'abc',
|
||||
b'MFRGGZA=': b'abcd',
|
||||
b'MFRGGZDF': b'abcde',
|
||||
# Lower cases
|
||||
eq(base64.b32decode(b'me======', True), b'a')
|
||||
eq(base64.b32decode(b'mfra====', True), b'ab')
|
||||
eq(base64.b32decode(b'mfrgg===', True), b'abc')
|
||||
eq(base64.b32decode(b'mfrggza=', True), b'abcd')
|
||||
eq(base64.b32decode(b'mfrggzdf', True), b'abcde')
|
||||
# Expected exceptions
|
||||
b'me======': b'a',
|
||||
b'mfra====': b'ab',
|
||||
b'mfrgg===': b'abc',
|
||||
b'mfrggza=': b'abcd',
|
||||
b'mfrggzdf': b'abcde',
|
||||
}
|
||||
|
||||
for data, res in tests.items():
|
||||
eq(base64.b32decode(data, True), res)
|
||||
eq(base64.b32decode(data.decode('ascii'), True), res)
|
||||
|
||||
self.assertRaises(TypeError, base64.b32decode, b'me======')
|
||||
self.assertRaises(TypeError, base64.b32decode, 'me======')
|
||||
|
||||
# Mapping zero and one
|
||||
eq(base64.b32decode(b'MLO23456'), b'b\xdd\xad\xf3\xbe')
|
||||
eq(base64.b32decode(b'M1023456', map01=b'L'), b'b\xdd\xad\xf3\xbe')
|
||||
eq(base64.b32decode(b'M1023456', map01=b'I'), b'b\x1d\xad\xf3\xbe')
|
||||
self.assertRaises(TypeError, base64.b32decode, b"", map01="")
|
||||
eq(base64.b32decode('MLO23456'), b'b\xdd\xad\xf3\xbe')
|
||||
|
||||
map_tests = {(b'M1023456', b'L'): b'b\xdd\xad\xf3\xbe',
|
||||
(b'M1023456', b'I'): b'b\x1d\xad\xf3\xbe',
|
||||
}
|
||||
for (data, map01), res in map_tests.items():
|
||||
data_str = data.decode('ascii')
|
||||
map01_str = map01.decode('ascii')
|
||||
|
||||
eq(base64.b32decode(data, map01=map01), res)
|
||||
eq(base64.b32decode(data_str, map01=map01), res)
|
||||
eq(base64.b32decode(data, map01=map01_str), res)
|
||||
eq(base64.b32decode(data_str, map01=map01_str), res)
|
||||
|
||||
def test_b32decode_error(self):
|
||||
self.assertRaises(binascii.Error, base64.b32decode, b'abc')
|
||||
self.assertRaises(binascii.Error, base64.b32decode, b'ABCDEF==')
|
||||
for data in [b'abc', b'ABCDEF==']:
|
||||
with self.assertRaises(binascii.Error):
|
||||
base64.b32decode(data)
|
||||
base64.b32decode(data.decode('ascii'))
|
||||
|
||||
def test_b16encode(self):
|
||||
eq = self.assertEqual
|
||||
|
@ -214,12 +247,24 @@ class BaseXYTestCase(unittest.TestCase):
|
|||
def test_b16decode(self):
|
||||
eq = self.assertEqual
|
||||
eq(base64.b16decode(b'0102ABCDEF'), b'\x01\x02\xab\xcd\xef')
|
||||
eq(base64.b16decode('0102ABCDEF'), b'\x01\x02\xab\xcd\xef')
|
||||
eq(base64.b16decode(b'00'), b'\x00')
|
||||
eq(base64.b16decode('00'), b'\x00')
|
||||
# Lower case is not allowed without a flag
|
||||
self.assertRaises(binascii.Error, base64.b16decode, b'0102abcdef')
|
||||
self.assertRaises(binascii.Error, base64.b16decode, '0102abcdef')
|
||||
# Case fold
|
||||
eq(base64.b16decode(b'0102abcdef', True), b'\x01\x02\xab\xcd\xef')
|
||||
self.assertRaises(TypeError, base64.b16decode, "")
|
||||
eq(base64.b16decode('0102abcdef', True), b'\x01\x02\xab\xcd\xef')
|
||||
|
||||
def test_decode_nonascii_str(self):
|
||||
decode_funcs = (base64.b64decode,
|
||||
base64.standard_b64decode,
|
||||
base64.urlsafe_b64decode,
|
||||
base64.b32decode,
|
||||
base64.b16decode)
|
||||
for f in decode_funcs:
|
||||
self.assertRaises(ValueError, f, 'with non-ascii \xcb')
|
||||
|
||||
def test_ErrorHeritage(self):
|
||||
self.assertTrue(issubclass(binascii.Error, ValueError))
|
||||
|
|
|
@ -469,6 +469,9 @@ Core and Builtins
|
|||
Library
|
||||
-------
|
||||
|
||||
- Issue #13641: Decoding functions in the base64 module now accept ASCII-only
|
||||
unicode strings. Patch by Catalin Iacob.
|
||||
|
||||
- Issue #14043: Speed up importlib's _FileFinder by at least 8x, and add a
|
||||
new importlib.invalidate_caches() function.
|
||||
|
||||
|
|
Loading…
Reference in New Issue