#1466065: add validate option to base64.b64decode

Patch by Neil Tallim.  This provides a mechanism for module
users to achieve RFC 3548 compliance in the cases where ignoring
non-base64-alphabet input characters is *not* mandated by the RFC that
references RFC 3548.
This commit is contained in:
R. David Murray 2010-11-11 20:09:20 +00:00
parent 49afa380fd
commit 6495136e40
4 changed files with 37 additions and 9 deletions

View File

@ -37,7 +37,7 @@ The modern interface provides:
The encoded byte string is returned.
.. function:: b64decode(s, altchars=None)
.. function:: b64decode(s, altchars=None, validate=False)
Decode a Base64 encoded byte string.
@ -45,9 +45,13 @@ The modern interface provides:
at least length 2 (additional characters are ignored) which specifies the
alternative alphabet used instead of the ``+`` and ``/`` characters.
The decoded byte string is returned. A :exc:`TypeError` is raised if *s* were
incorrectly padded or if there are non-alphabet characters present in the
string.
The decoded string is returned. A `binascii.Error` is raised if *s* is
incorrectly padded.
If *validate* is ``False`` (the default), non-base64-alphabet characters are
discarded prior to the padding check. If *validate* is ``True``,
non-base64-alphabet characters in the input result in a
:exc:`binascii.Error`.
.. function:: standard_b64encode(s)

View File

@ -65,16 +65,19 @@ def b64encode(s, altchars=None):
return encoded
def b64decode(s, altchars=None):
def b64decode(s, altchars=None, validate=False):
"""Decode a Base64 encoded byte string.
s is the byte string to decode. Optional altchars must be a
string of length 2 which specifies the alternative alphabet used
instead of the '+' and '/' characters.
The decoded byte string is returned. binascii.Error is raised if
s were incorrectly padded or if there are non-alphabet characters
present in the string.
The decoded string is returned. A binascii.Error is raised if s is
incorrectly padded.
If validate is False (the default), non-base64-alphabet characters are
discarded prior to the padding check. If validate is True,
non-base64-alphabet characters in the input result in a binascii.Error.
"""
if not isinstance(s, bytes_types):
raise TypeError("expected bytes, not %s" % s.__class__.__name__)
@ -84,6 +87,8 @@ def b64decode(s, altchars=None):
% altchars.__class__.__name__)
assert len(altchars) == 2, repr(altchars)
s = _translate(s, {chr(altchars[0]): b'+', chr(altchars[1]): b'/'})
if validate and not re.match(b'^[A-Za-z0-9+/]*={0,2}$', s):
raise binascii.Error('Non-base64 digit found')
return binascii.a2b_base64(s)

View File

@ -138,9 +138,25 @@ class BaseXYTestCase(unittest.TestCase):
eq(base64.urlsafe_b64decode(b'01a-b_cd'), b'\xd3V\xbeo\xf7\x1d')
self.assertRaises(TypeError, base64.urlsafe_b64decode, "")
def test_b64decode_error(self):
def test_b64decode_padding_error(self):
self.assertRaises(binascii.Error, base64.b64decode, b'abc')
def test_b64decode_invalid_chars(self):
# issue 1466065: Test some invalid characters.
tests = ((b'%3d==', b'\xdd'),
(b'$3d==', b'\xdd'),
(b'[==', b''),
(b'YW]3=', b'am'),
(b'3{d==', b'\xdd'),
(b'3d}==', b'\xdd'),
(b'@@', b''),
(b'!', b''),
(b'YWJj\nYWI=', b'abcab'))
for bstr, res in tests:
self.assertEquals(base64.b64decode(bstr), res)
with self.assertRaises(binascii.Error):
base64.b64decode(bstr, validate=True)
def test_b32encode(self):
eq = self.assertEqual
eq(base64.b32encode(b''), b'')

View File

@ -63,6 +63,9 @@ Core and Builtins
Library
-------
- Issue #1466065: Add 'validate' option to base64.b64decode to raise
an error if there are non-base64 alphabet characters in the input.
- Issue #10386: Add __all__ to token module; this simplifies importing
in tokenize module and prevents leaking of private names through
import *.