bpo-16995: add support for base32 extended hex (base32hex) (GH-20441)

cc @pganssle Automerge-Triggered-By: @pganssle
2020-08-10 15:48:20 +01:00 · 2020-08-10 15:48:20 +01:00 · 4ce6faa6c9
parent 39042e00ab
commit 4ce6faa6c9
5 changed files with 156 additions and 33 deletions
--- a/Doc/library/base64.rst
+++ b/Doc/library/base64.rst
@ -124,7 +124,7 @@ The modern interface provides:
   whether a lowercase alphabet is acceptable as input.  For security purposes,
   the default is ``False``.
-   :rfc:`3548` allows for optional mapping of the digit 0 (zero) to the letter O
+   :rfc:`4648` allows for optional mapping of the digit 0 (zero) to the letter O
   (oh), and for optional mapping of the digit 1 (one) to either the letter I (eye)
   or letter L (el).  The optional argument *map01* when not ``None``, specifies
   which letter the digit 1 should be mapped to (when *map01* is not ``None``, the
@ -136,6 +136,27 @@ The modern interface provides:
   input.
 .. function:: b32hexencode(s)
   Similar to :func:`b32encode` but uses the Extended Hex Alphabet, as defined in
   :rfc:`4648`.
   .. versionadded:: 3.10
 .. function:: b32hexdecode(s, casefold=False)
   Similar to :func:`b32decode` but uses the Extended Hex Alphabet, as defined in
   :rfc:`4648`.
   This version does not allow the digit 0 (zero) to the letter O (oh) and digit
   1 (one) to either the letter I (eye) or letter L (el) mappings, all these
   characters are included in the Extended Hex Alphabet and are not
   interchangable.
   .. versionadded:: 3.10
 .. function:: b16encode(s)
   Encode the :term:`bytes-like object` *s* using Base16 and return the
--- a/Doc/whatsnew/3.10.rst
+++ b/Doc/whatsnew/3.10.rst
@ -103,6 +103,12 @@ New Modules
 Improved Modules
 ================
 base64
 ------
 Add :func:`base64.b32hexencode` and :func:`base64.b32hexdecode` to support the
 Base32 Encoding with Extended Hex Alphabet.
 curses
 ------
--- a/Lib/base64.py
+++ b/Lib/base64.py
@ -16,7 +16,7 @@ __all__ = [
    'encode', 'decode', 'encodebytes', 'decodebytes',
    # Generalized interface for other encodings
    'b64encode', 'b64decode', 'b32encode', 'b32decode',
-    'b16encode', 'b16decode',
+    'b32hexencode', 'b32hexdecode', 'b16encode', 'b16decode',
    # Base85 and Ascii85 encodings
    'b85encode', 'b85decode', 'a85encode', 'a85decode',
    # Standard Base64 encoding
@ -135,19 +135,40 @@ def urlsafe_b64decode(s):
 # Base32 encoding/decoding must be done in Python
-_b32alphabet = b'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'
+_B32_ENCODE_DOCSTRING = '''
-_b32tab2 = None
+Encode the bytes-like objects using {encoding} and return a bytes object.
-_b32rev = None
+'''
 _B32_DECODE_DOCSTRING = '''
 Decode the {encoding} encoded bytes-like object or ASCII string s.
-def b32encode(s):
+Optional casefold is a flag specifying whether a lowercase alphabet is
-    """Encode the bytes-like object s using Base32 and return a bytes object.
+acceptable as input.  For security purposes, the default is False.
-    """
+{extra_args}
 The result is returned as a bytes object.  A binascii.Error is raised if
 the input is incorrectly padded or if there are non-alphabet
 characters present in the input.
 '''
 _B32_DECODE_MAP01_DOCSTRING = '''
 RFC 3548 allows for optional mapping of the digit 0 (zero) to the
 letter O (oh), and for optional mapping of the digit 1 (one) to
 either the letter I (eye) or letter L (el).  The optional argument
 map01 when not None, specifies which letter the digit 1 should be
 mapped to (when map01 is not None, the digit 0 is always mapped to
 the letter O).  For security purposes the default is None, so that
 0 and 1 are not allowed in the input.
 '''
 _b32alphabet = b'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'
 _b32hexalphabet = b'0123456789ABCDEFGHIJKLMNOPQRSTUV'
 _b32tab2 = {}
 _b32rev = {}
 def _b32encode(alphabet, s):
    global _b32tab2
    # Delay the initialization of the table to not waste memory
    # if the function is never called
-    if _b32tab2 is None:
+    if alphabet not in _b32tab2:
-        b32tab = [bytes((i,)) for i in _b32alphabet]
+        b32tab = [bytes((i,)) for i in alphabet]
-        _b32tab2 = [a + b for a in b32tab for b in b32tab]
+        _b32tab2[alphabet] = [a + b for a in b32tab for b in b32tab]
        b32tab = None
    if not isinstance(s, bytes_types):
@ -158,7 +179,7 @@ def b32encode(s):
        s = s + b'\0' * (5 - leftover)  # Don't use += !
    encoded = bytearray()
    from_bytes = int.from_bytes
-    b32tab2 = _b32tab2
+    b32tab2 = _b32tab2[alphabet]
    for i in range(0, len(s), 5):
        c = from_bytes(s[i: i + 5], 'big')
        encoded += (b32tab2[c >> 30] +           # bits 1 - 10
@ -177,29 +198,12 @@ def b32encode(s):
        encoded[-1:] = b'='
    return bytes(encoded)
-def b32decode(s, casefold=False, map01=None):
+def _b32decode(alphabet, s, casefold=False, map01=None):
    """Decode the Base32 encoded bytes-like object or ASCII string s.
    Optional casefold is a flag specifying whether a lowercase alphabet is
    acceptable as input.  For security purposes, the default is False.
    RFC 3548 allows for optional mapping of the digit 0 (zero) to the
    letter O (oh), and for optional mapping of the digit 1 (one) to
    either the letter I (eye) or letter L (el).  The optional argument
    map01 when not None, specifies which letter the digit 1 should be
    mapped to (when map01 is not None, the digit 0 is always mapped to
    the letter O).  For security purposes the default is None, so that
    0 and 1 are not allowed in the input.
    The result is returned as a bytes object.  A binascii.Error is raised if
    the input is incorrectly padded or if there are non-alphabet
    characters present in the input.
    """
    global _b32rev
    # Delay the initialization of the table to not waste memory
    # if the function is never called
-    if _b32rev is None:
+    if alphabet not in _b32rev:
-        _b32rev = {v: k for k, v in enumerate(_b32alphabet)}
+        _b32rev[alphabet] = {v: k for k, v in enumerate(alphabet)}
    s = _bytes_from_decode_data(s)
    if len(s) % 8:
        raise binascii.Error('Incorrect padding')
@ -220,7 +224,7 @@ def b32decode(s, casefold=False, map01=None):
    padchars = l - len(s)
    # Now decode the full quanta
    decoded = bytearray()
-    b32rev = _b32rev
+    b32rev = _b32rev[alphabet]
    for i in range(0, len(s), 8):
        quanta = s[i: i + 8]
        acc = 0
@ -241,6 +245,26 @@ def b32decode(s, casefold=False, map01=None):
    return bytes(decoded)
 def b32encode(s):
    return _b32encode(_b32alphabet, s)
 b32encode.__doc__ = _B32_ENCODE_DOCSTRING.format(encoding='base32')
 def b32decode(s, casefold=False, map01=None):
    return _b32decode(_b32alphabet, s, casefold, map01)
 b32decode.__doc__ = _B32_DECODE_DOCSTRING.format(encoding='base32',
                                        extra_args=_B32_DECODE_MAP01_DOCSTRING)
 def b32hexencode(s):
    return _b32encode(_b32hexalphabet, s)
 b32hexencode.__doc__ = _B32_ENCODE_DOCSTRING.format(encoding='base32hex')
 def b32hexdecode(s, casefold=False):
    # base32hex does not have the 01 mapping
    return _b32decode(_b32hexalphabet, s, casefold)
 b32hexdecode.__doc__ = _B32_DECODE_DOCSTRING.format(encoding='base32hex',
                                                    extra_args='')
 # RFC 3548, Base 16 Alphabet specifies uppercase, but hexlify() returns
 # lowercase.  The RFC also recommends against accepting input case
 # insensitively.
--- a/Lib/test/test_base64.py
+++ b/Lib/test/test_base64.py
@ -351,6 +351,76 @@ class BaseXYTestCase(unittest.TestCase):
                with self.assertRaises(binascii.Error):
                    base64.b32decode(data.decode('ascii'))
    def test_b32hexencode(self):
        test_cases = [
            # to_encode, expected
            (b'',      b''),
            (b'\x00',  b'00======'),
            (b'a',     b'C4======'),
            (b'ab',    b'C5H0===='),
            (b'abc',   b'C5H66==='),
            (b'abcd',  b'C5H66P0='),
            (b'abcde', b'C5H66P35'),
        ]
        for to_encode, expected in test_cases:
            with self.subTest(to_decode=to_encode):
                self.assertEqual(base64.b32hexencode(to_encode), expected)
    def test_b32hexencode_other_types(self):
        self.check_other_types(base64.b32hexencode, b'abcd', b'C5H66P0=')
        self.check_encode_type_errors(base64.b32hexencode)
    def test_b32hexdecode(self):
        test_cases = [
            # to_decode, expected, casefold
            (b'',         b'',      False),
            (b'00======', b'\x00',  False),
            (b'C4======', b'a',     False),
            (b'C5H0====', b'ab',    False),
            (b'C5H66===', b'abc',   False),
            (b'C5H66P0=', b'abcd',  False),
            (b'C5H66P35', b'abcde', False),
            (b'',         b'',      True),
            (b'00======', b'\x00',  True),
            (b'C4======', b'a',     True),
            (b'C5H0====', b'ab',    True),
            (b'C5H66===', b'abc',   True),
            (b'C5H66P0=', b'abcd',  True),
            (b'C5H66P35', b'abcde', True),
            (b'c4======', b'a',     True),
            (b'c5h0====', b'ab',    True),
            (b'c5h66===', b'abc',   True),
            (b'c5h66p0=', b'abcd',  True),
            (b'c5h66p35', b'abcde', True),
        ]
        for to_decode, expected, casefold in test_cases:
            with self.subTest(to_decode=to_decode, casefold=casefold):
                self.assertEqual(base64.b32hexdecode(to_decode, casefold),
                                 expected)
                self.assertEqual(base64.b32hexdecode(to_decode.decode('ascii'),
                                 casefold), expected)
    def test_b32hexdecode_other_types(self):
        self.check_other_types(base64.b32hexdecode, b'C5H66===', b'abc')
        self.check_decode_type_errors(base64.b32hexdecode)
    def test_b32hexdecode_error(self):
        tests = [b'abc', b'ABCDEF==', b'==ABCDEF', b'c4======']
        prefixes = [b'M', b'ME', b'MFRA', b'MFRGG', b'MFRGGZA', b'MFRGGZDF']
        for i in range(0, 17):
            if i:
                tests.append(b'='*i)
            for prefix in prefixes:
                if len(prefix) + i != 8:
                    tests.append(prefix + b'='*i)
        for data in tests:
            with self.subTest(to_decode=data):
                with self.assertRaises(binascii.Error):
                    base64.b32hexdecode(data)
                with self.assertRaises(binascii.Error):
                    base64.b32hexdecode(data.decode('ascii'))
    def test_b16encode(self):
        eq = self.assertEqual
        eq(base64.b16encode(b'\x01\x02\xab\xcd\xef'), b'0102ABCDEF')
--- a/Misc/NEWS.d/next/Library/2020-05-27-00-09-52.bpo-16995.4niOT7.rst
+++ b/Misc/NEWS.d/next/Library/2020-05-27-00-09-52.bpo-16995.4niOT7.rst
@ -0,0 +1,2 @@
 Add :func:`base64.b32hexencode` and :func:`base64.b32hexdecode` to support the
 Base32 Encoding with Extended Hex Alphabet.
		`@ -0,0 +1,2 @@`
							Add :func:`base64.b32hexencode` and :func:`base64.b32hexdecode` to support the
							`Base32 Encoding with Extended Hex Alphabet.`