From 6495136e4018b626c40500bbd564fdfda9825587 Mon Sep 17 00:00:00 2001 From: "R. David Murray" Date: Thu, 11 Nov 2010 20:09:20 +0000 Subject: [PATCH] #1466065: add validate option to base64.b64decode Patch by Neil Tallim. This provides a mechanism for module users to achieve RFC 3548 compliance in the cases where ignoring non-base64-alphabet input characters is *not* mandated by the RFC that references RFC 3548. --- Doc/library/base64.rst | 12 ++++++++---- Lib/base64.py | 13 +++++++++---- Lib/test/test_base64.py | 18 +++++++++++++++++- Misc/NEWS | 3 +++ 4 files changed, 37 insertions(+), 9 deletions(-) diff --git a/Doc/library/base64.rst b/Doc/library/base64.rst index c10a74ac8ab..2401ae7a132 100644 --- a/Doc/library/base64.rst +++ b/Doc/library/base64.rst @@ -37,7 +37,7 @@ The modern interface provides: The encoded byte string is returned. -.. function:: b64decode(s, altchars=None) +.. function:: b64decode(s, altchars=None, validate=False) Decode a Base64 encoded byte string. @@ -45,9 +45,13 @@ The modern interface provides: at least length 2 (additional characters are ignored) which specifies the alternative alphabet used instead of the ``+`` and ``/`` characters. - The decoded byte string is returned. A :exc:`TypeError` is raised if *s* were - incorrectly padded or if there are non-alphabet characters present in the - string. + The decoded string is returned. A `binascii.Error` is raised if *s* is + incorrectly padded. + + If *validate* is ``False`` (the default), non-base64-alphabet characters are + discarded prior to the padding check. If *validate* is ``True``, + non-base64-alphabet characters in the input result in a + :exc:`binascii.Error`. .. function:: standard_b64encode(s) diff --git a/Lib/base64.py b/Lib/base64.py index af7cf644658..895d813f7ee 100755 --- a/Lib/base64.py +++ b/Lib/base64.py @@ -65,16 +65,19 @@ def b64encode(s, altchars=None): return encoded -def b64decode(s, altchars=None): +def b64decode(s, altchars=None, validate=False): """Decode a Base64 encoded byte string. s is the byte string to decode. Optional altchars must be a string of length 2 which specifies the alternative alphabet used instead of the '+' and '/' characters. - The decoded byte string is returned. binascii.Error is raised if - s were incorrectly padded or if there are non-alphabet characters - present in the string. + The decoded string is returned. A binascii.Error is raised if s is + incorrectly padded. + + If validate is False (the default), non-base64-alphabet characters are + discarded prior to the padding check. If validate is True, + non-base64-alphabet characters in the input result in a binascii.Error. """ if not isinstance(s, bytes_types): raise TypeError("expected bytes, not %s" % s.__class__.__name__) @@ -84,6 +87,8 @@ def b64decode(s, altchars=None): % altchars.__class__.__name__) assert len(altchars) == 2, repr(altchars) s = _translate(s, {chr(altchars[0]): b'+', chr(altchars[1]): b'/'}) + if validate and not re.match(b'^[A-Za-z0-9+/]*={0,2}$', s): + raise binascii.Error('Non-base64 digit found') return binascii.a2b_base64(s) diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py index 49edf395f16..228a0fb23dd 100644 --- a/Lib/test/test_base64.py +++ b/Lib/test/test_base64.py @@ -138,9 +138,25 @@ class BaseXYTestCase(unittest.TestCase): eq(base64.urlsafe_b64decode(b'01a-b_cd'), b'\xd3V\xbeo\xf7\x1d') self.assertRaises(TypeError, base64.urlsafe_b64decode, "") - def test_b64decode_error(self): + def test_b64decode_padding_error(self): self.assertRaises(binascii.Error, base64.b64decode, b'abc') + def test_b64decode_invalid_chars(self): + # issue 1466065: Test some invalid characters. + tests = ((b'%3d==', b'\xdd'), + (b'$3d==', b'\xdd'), + (b'[==', b''), + (b'YW]3=', b'am'), + (b'3{d==', b'\xdd'), + (b'3d}==', b'\xdd'), + (b'@@', b''), + (b'!', b''), + (b'YWJj\nYWI=', b'abcab')) + for bstr, res in tests: + self.assertEquals(base64.b64decode(bstr), res) + with self.assertRaises(binascii.Error): + base64.b64decode(bstr, validate=True) + def test_b32encode(self): eq = self.assertEqual eq(base64.b32encode(b''), b'') diff --git a/Misc/NEWS b/Misc/NEWS index a887fafd9fd..2ff72474727 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -63,6 +63,9 @@ Core and Builtins Library ------- +- Issue #1466065: Add 'validate' option to base64.b64decode to raise + an error if there are non-base64 alphabet characters in the input. + - Issue #10386: Add __all__ to token module; this simplifies importing in tokenize module and prevents leaking of private names through import *.