bpo-16995: add support for base32 extended hex (base32hex) (GH-20441)
cc @pganssle Automerge-Triggered-By: @pganssle
This commit is contained in:
parent
39042e00ab
commit
4ce6faa6c9
|
@ -124,7 +124,7 @@ The modern interface provides:
|
||||||
whether a lowercase alphabet is acceptable as input. For security purposes,
|
whether a lowercase alphabet is acceptable as input. For security purposes,
|
||||||
the default is ``False``.
|
the default is ``False``.
|
||||||
|
|
||||||
:rfc:`3548` allows for optional mapping of the digit 0 (zero) to the letter O
|
:rfc:`4648` allows for optional mapping of the digit 0 (zero) to the letter O
|
||||||
(oh), and for optional mapping of the digit 1 (one) to either the letter I (eye)
|
(oh), and for optional mapping of the digit 1 (one) to either the letter I (eye)
|
||||||
or letter L (el). The optional argument *map01* when not ``None``, specifies
|
or letter L (el). The optional argument *map01* when not ``None``, specifies
|
||||||
which letter the digit 1 should be mapped to (when *map01* is not ``None``, the
|
which letter the digit 1 should be mapped to (when *map01* is not ``None``, the
|
||||||
|
@ -136,6 +136,27 @@ The modern interface provides:
|
||||||
input.
|
input.
|
||||||
|
|
||||||
|
|
||||||
|
.. function:: b32hexencode(s)
|
||||||
|
|
||||||
|
Similar to :func:`b32encode` but uses the Extended Hex Alphabet, as defined in
|
||||||
|
:rfc:`4648`.
|
||||||
|
|
||||||
|
.. versionadded:: 3.10
|
||||||
|
|
||||||
|
|
||||||
|
.. function:: b32hexdecode(s, casefold=False)
|
||||||
|
|
||||||
|
Similar to :func:`b32decode` but uses the Extended Hex Alphabet, as defined in
|
||||||
|
:rfc:`4648`.
|
||||||
|
|
||||||
|
This version does not allow the digit 0 (zero) to the letter O (oh) and digit
|
||||||
|
1 (one) to either the letter I (eye) or letter L (el) mappings, all these
|
||||||
|
characters are included in the Extended Hex Alphabet and are not
|
||||||
|
interchangable.
|
||||||
|
|
||||||
|
.. versionadded:: 3.10
|
||||||
|
|
||||||
|
|
||||||
.. function:: b16encode(s)
|
.. function:: b16encode(s)
|
||||||
|
|
||||||
Encode the :term:`bytes-like object` *s* using Base16 and return the
|
Encode the :term:`bytes-like object` *s* using Base16 and return the
|
||||||
|
|
|
@ -103,6 +103,12 @@ New Modules
|
||||||
Improved Modules
|
Improved Modules
|
||||||
================
|
================
|
||||||
|
|
||||||
|
base64
|
||||||
|
------
|
||||||
|
|
||||||
|
Add :func:`base64.b32hexencode` and :func:`base64.b32hexdecode` to support the
|
||||||
|
Base32 Encoding with Extended Hex Alphabet.
|
||||||
|
|
||||||
curses
|
curses
|
||||||
------
|
------
|
||||||
|
|
||||||
|
|
|
@ -16,7 +16,7 @@ __all__ = [
|
||||||
'encode', 'decode', 'encodebytes', 'decodebytes',
|
'encode', 'decode', 'encodebytes', 'decodebytes',
|
||||||
# Generalized interface for other encodings
|
# Generalized interface for other encodings
|
||||||
'b64encode', 'b64decode', 'b32encode', 'b32decode',
|
'b64encode', 'b64decode', 'b32encode', 'b32decode',
|
||||||
'b16encode', 'b16decode',
|
'b32hexencode', 'b32hexdecode', 'b16encode', 'b16decode',
|
||||||
# Base85 and Ascii85 encodings
|
# Base85 and Ascii85 encodings
|
||||||
'b85encode', 'b85decode', 'a85encode', 'a85decode',
|
'b85encode', 'b85decode', 'a85encode', 'a85decode',
|
||||||
# Standard Base64 encoding
|
# Standard Base64 encoding
|
||||||
|
@ -135,19 +135,40 @@ def urlsafe_b64decode(s):
|
||||||
|
|
||||||
|
|
||||||
# Base32 encoding/decoding must be done in Python
|
# Base32 encoding/decoding must be done in Python
|
||||||
_b32alphabet = b'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'
|
_B32_ENCODE_DOCSTRING = '''
|
||||||
_b32tab2 = None
|
Encode the bytes-like objects using {encoding} and return a bytes object.
|
||||||
_b32rev = None
|
'''
|
||||||
|
_B32_DECODE_DOCSTRING = '''
|
||||||
|
Decode the {encoding} encoded bytes-like object or ASCII string s.
|
||||||
|
|
||||||
def b32encode(s):
|
Optional casefold is a flag specifying whether a lowercase alphabet is
|
||||||
"""Encode the bytes-like object s using Base32 and return a bytes object.
|
acceptable as input. For security purposes, the default is False.
|
||||||
"""
|
{extra_args}
|
||||||
|
The result is returned as a bytes object. A binascii.Error is raised if
|
||||||
|
the input is incorrectly padded or if there are non-alphabet
|
||||||
|
characters present in the input.
|
||||||
|
'''
|
||||||
|
_B32_DECODE_MAP01_DOCSTRING = '''
|
||||||
|
RFC 3548 allows for optional mapping of the digit 0 (zero) to the
|
||||||
|
letter O (oh), and for optional mapping of the digit 1 (one) to
|
||||||
|
either the letter I (eye) or letter L (el). The optional argument
|
||||||
|
map01 when not None, specifies which letter the digit 1 should be
|
||||||
|
mapped to (when map01 is not None, the digit 0 is always mapped to
|
||||||
|
the letter O). For security purposes the default is None, so that
|
||||||
|
0 and 1 are not allowed in the input.
|
||||||
|
'''
|
||||||
|
_b32alphabet = b'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'
|
||||||
|
_b32hexalphabet = b'0123456789ABCDEFGHIJKLMNOPQRSTUV'
|
||||||
|
_b32tab2 = {}
|
||||||
|
_b32rev = {}
|
||||||
|
|
||||||
|
def _b32encode(alphabet, s):
|
||||||
global _b32tab2
|
global _b32tab2
|
||||||
# Delay the initialization of the table to not waste memory
|
# Delay the initialization of the table to not waste memory
|
||||||
# if the function is never called
|
# if the function is never called
|
||||||
if _b32tab2 is None:
|
if alphabet not in _b32tab2:
|
||||||
b32tab = [bytes((i,)) for i in _b32alphabet]
|
b32tab = [bytes((i,)) for i in alphabet]
|
||||||
_b32tab2 = [a + b for a in b32tab for b in b32tab]
|
_b32tab2[alphabet] = [a + b for a in b32tab for b in b32tab]
|
||||||
b32tab = None
|
b32tab = None
|
||||||
|
|
||||||
if not isinstance(s, bytes_types):
|
if not isinstance(s, bytes_types):
|
||||||
|
@ -158,7 +179,7 @@ def b32encode(s):
|
||||||
s = s + b'\0' * (5 - leftover) # Don't use += !
|
s = s + b'\0' * (5 - leftover) # Don't use += !
|
||||||
encoded = bytearray()
|
encoded = bytearray()
|
||||||
from_bytes = int.from_bytes
|
from_bytes = int.from_bytes
|
||||||
b32tab2 = _b32tab2
|
b32tab2 = _b32tab2[alphabet]
|
||||||
for i in range(0, len(s), 5):
|
for i in range(0, len(s), 5):
|
||||||
c = from_bytes(s[i: i + 5], 'big')
|
c = from_bytes(s[i: i + 5], 'big')
|
||||||
encoded += (b32tab2[c >> 30] + # bits 1 - 10
|
encoded += (b32tab2[c >> 30] + # bits 1 - 10
|
||||||
|
@ -177,29 +198,12 @@ def b32encode(s):
|
||||||
encoded[-1:] = b'='
|
encoded[-1:] = b'='
|
||||||
return bytes(encoded)
|
return bytes(encoded)
|
||||||
|
|
||||||
def b32decode(s, casefold=False, map01=None):
|
def _b32decode(alphabet, s, casefold=False, map01=None):
|
||||||
"""Decode the Base32 encoded bytes-like object or ASCII string s.
|
|
||||||
|
|
||||||
Optional casefold is a flag specifying whether a lowercase alphabet is
|
|
||||||
acceptable as input. For security purposes, the default is False.
|
|
||||||
|
|
||||||
RFC 3548 allows for optional mapping of the digit 0 (zero) to the
|
|
||||||
letter O (oh), and for optional mapping of the digit 1 (one) to
|
|
||||||
either the letter I (eye) or letter L (el). The optional argument
|
|
||||||
map01 when not None, specifies which letter the digit 1 should be
|
|
||||||
mapped to (when map01 is not None, the digit 0 is always mapped to
|
|
||||||
the letter O). For security purposes the default is None, so that
|
|
||||||
0 and 1 are not allowed in the input.
|
|
||||||
|
|
||||||
The result is returned as a bytes object. A binascii.Error is raised if
|
|
||||||
the input is incorrectly padded or if there are non-alphabet
|
|
||||||
characters present in the input.
|
|
||||||
"""
|
|
||||||
global _b32rev
|
global _b32rev
|
||||||
# Delay the initialization of the table to not waste memory
|
# Delay the initialization of the table to not waste memory
|
||||||
# if the function is never called
|
# if the function is never called
|
||||||
if _b32rev is None:
|
if alphabet not in _b32rev:
|
||||||
_b32rev = {v: k for k, v in enumerate(_b32alphabet)}
|
_b32rev[alphabet] = {v: k for k, v in enumerate(alphabet)}
|
||||||
s = _bytes_from_decode_data(s)
|
s = _bytes_from_decode_data(s)
|
||||||
if len(s) % 8:
|
if len(s) % 8:
|
||||||
raise binascii.Error('Incorrect padding')
|
raise binascii.Error('Incorrect padding')
|
||||||
|
@ -220,7 +224,7 @@ def b32decode(s, casefold=False, map01=None):
|
||||||
padchars = l - len(s)
|
padchars = l - len(s)
|
||||||
# Now decode the full quanta
|
# Now decode the full quanta
|
||||||
decoded = bytearray()
|
decoded = bytearray()
|
||||||
b32rev = _b32rev
|
b32rev = _b32rev[alphabet]
|
||||||
for i in range(0, len(s), 8):
|
for i in range(0, len(s), 8):
|
||||||
quanta = s[i: i + 8]
|
quanta = s[i: i + 8]
|
||||||
acc = 0
|
acc = 0
|
||||||
|
@ -241,6 +245,26 @@ def b32decode(s, casefold=False, map01=None):
|
||||||
return bytes(decoded)
|
return bytes(decoded)
|
||||||
|
|
||||||
|
|
||||||
|
def b32encode(s):
|
||||||
|
return _b32encode(_b32alphabet, s)
|
||||||
|
b32encode.__doc__ = _B32_ENCODE_DOCSTRING.format(encoding='base32')
|
||||||
|
|
||||||
|
def b32decode(s, casefold=False, map01=None):
|
||||||
|
return _b32decode(_b32alphabet, s, casefold, map01)
|
||||||
|
b32decode.__doc__ = _B32_DECODE_DOCSTRING.format(encoding='base32',
|
||||||
|
extra_args=_B32_DECODE_MAP01_DOCSTRING)
|
||||||
|
|
||||||
|
def b32hexencode(s):
|
||||||
|
return _b32encode(_b32hexalphabet, s)
|
||||||
|
b32hexencode.__doc__ = _B32_ENCODE_DOCSTRING.format(encoding='base32hex')
|
||||||
|
|
||||||
|
def b32hexdecode(s, casefold=False):
|
||||||
|
# base32hex does not have the 01 mapping
|
||||||
|
return _b32decode(_b32hexalphabet, s, casefold)
|
||||||
|
b32hexdecode.__doc__ = _B32_DECODE_DOCSTRING.format(encoding='base32hex',
|
||||||
|
extra_args='')
|
||||||
|
|
||||||
|
|
||||||
# RFC 3548, Base 16 Alphabet specifies uppercase, but hexlify() returns
|
# RFC 3548, Base 16 Alphabet specifies uppercase, but hexlify() returns
|
||||||
# lowercase. The RFC also recommends against accepting input case
|
# lowercase. The RFC also recommends against accepting input case
|
||||||
# insensitively.
|
# insensitively.
|
||||||
|
|
|
@ -351,6 +351,76 @@ class BaseXYTestCase(unittest.TestCase):
|
||||||
with self.assertRaises(binascii.Error):
|
with self.assertRaises(binascii.Error):
|
||||||
base64.b32decode(data.decode('ascii'))
|
base64.b32decode(data.decode('ascii'))
|
||||||
|
|
||||||
|
def test_b32hexencode(self):
|
||||||
|
test_cases = [
|
||||||
|
# to_encode, expected
|
||||||
|
(b'', b''),
|
||||||
|
(b'\x00', b'00======'),
|
||||||
|
(b'a', b'C4======'),
|
||||||
|
(b'ab', b'C5H0===='),
|
||||||
|
(b'abc', b'C5H66==='),
|
||||||
|
(b'abcd', b'C5H66P0='),
|
||||||
|
(b'abcde', b'C5H66P35'),
|
||||||
|
]
|
||||||
|
for to_encode, expected in test_cases:
|
||||||
|
with self.subTest(to_decode=to_encode):
|
||||||
|
self.assertEqual(base64.b32hexencode(to_encode), expected)
|
||||||
|
|
||||||
|
def test_b32hexencode_other_types(self):
|
||||||
|
self.check_other_types(base64.b32hexencode, b'abcd', b'C5H66P0=')
|
||||||
|
self.check_encode_type_errors(base64.b32hexencode)
|
||||||
|
|
||||||
|
def test_b32hexdecode(self):
|
||||||
|
test_cases = [
|
||||||
|
# to_decode, expected, casefold
|
||||||
|
(b'', b'', False),
|
||||||
|
(b'00======', b'\x00', False),
|
||||||
|
(b'C4======', b'a', False),
|
||||||
|
(b'C5H0====', b'ab', False),
|
||||||
|
(b'C5H66===', b'abc', False),
|
||||||
|
(b'C5H66P0=', b'abcd', False),
|
||||||
|
(b'C5H66P35', b'abcde', False),
|
||||||
|
(b'', b'', True),
|
||||||
|
(b'00======', b'\x00', True),
|
||||||
|
(b'C4======', b'a', True),
|
||||||
|
(b'C5H0====', b'ab', True),
|
||||||
|
(b'C5H66===', b'abc', True),
|
||||||
|
(b'C5H66P0=', b'abcd', True),
|
||||||
|
(b'C5H66P35', b'abcde', True),
|
||||||
|
(b'c4======', b'a', True),
|
||||||
|
(b'c5h0====', b'ab', True),
|
||||||
|
(b'c5h66===', b'abc', True),
|
||||||
|
(b'c5h66p0=', b'abcd', True),
|
||||||
|
(b'c5h66p35', b'abcde', True),
|
||||||
|
]
|
||||||
|
for to_decode, expected, casefold in test_cases:
|
||||||
|
with self.subTest(to_decode=to_decode, casefold=casefold):
|
||||||
|
self.assertEqual(base64.b32hexdecode(to_decode, casefold),
|
||||||
|
expected)
|
||||||
|
self.assertEqual(base64.b32hexdecode(to_decode.decode('ascii'),
|
||||||
|
casefold), expected)
|
||||||
|
|
||||||
|
def test_b32hexdecode_other_types(self):
|
||||||
|
self.check_other_types(base64.b32hexdecode, b'C5H66===', b'abc')
|
||||||
|
self.check_decode_type_errors(base64.b32hexdecode)
|
||||||
|
|
||||||
|
def test_b32hexdecode_error(self):
|
||||||
|
tests = [b'abc', b'ABCDEF==', b'==ABCDEF', b'c4======']
|
||||||
|
prefixes = [b'M', b'ME', b'MFRA', b'MFRGG', b'MFRGGZA', b'MFRGGZDF']
|
||||||
|
for i in range(0, 17):
|
||||||
|
if i:
|
||||||
|
tests.append(b'='*i)
|
||||||
|
for prefix in prefixes:
|
||||||
|
if len(prefix) + i != 8:
|
||||||
|
tests.append(prefix + b'='*i)
|
||||||
|
for data in tests:
|
||||||
|
with self.subTest(to_decode=data):
|
||||||
|
with self.assertRaises(binascii.Error):
|
||||||
|
base64.b32hexdecode(data)
|
||||||
|
with self.assertRaises(binascii.Error):
|
||||||
|
base64.b32hexdecode(data.decode('ascii'))
|
||||||
|
|
||||||
|
|
||||||
def test_b16encode(self):
|
def test_b16encode(self):
|
||||||
eq = self.assertEqual
|
eq = self.assertEqual
|
||||||
eq(base64.b16encode(b'\x01\x02\xab\xcd\xef'), b'0102ABCDEF')
|
eq(base64.b16encode(b'\x01\x02\xab\xcd\xef'), b'0102ABCDEF')
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
Add :func:`base64.b32hexencode` and :func:`base64.b32hexdecode` to support the
|
||||||
|
Base32 Encoding with Extended Hex Alphabet.
|
Loading…
Reference in New Issue