From ea6b4d5f70469071912e81cb29319996cfd990e0 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Mon, 20 Feb 2012 19:30:23 +0100 Subject: [PATCH 1/2] Issue #13641: Decoding functions in the base64 module now accept ASCII-only unicode strings. Patch by Catalin Iacob. --- Doc/library/base64.rst | 11 ++- Lib/base64.py | 26 ++++--- Lib/test/test_base64.py | 163 +++++++++++++++++++++++++--------------- Misc/NEWS | 3 + 4 files changed, 130 insertions(+), 73 deletions(-) diff --git a/Doc/library/base64.rst b/Doc/library/base64.rst index 06f3ab1282f..afbedce3baa 100644 --- a/Doc/library/base64.rst +++ b/Doc/library/base64.rst @@ -18,9 +18,14 @@ POST request. The encoding algorithm is not the same as the There are two interfaces provided by this module. The modern interface supports encoding and decoding ASCII byte string objects using all three -alphabets. The legacy interface provides for encoding and decoding to and from -file-like objects as well as byte strings, but only using the Base64 standard -alphabet. +alphabets. Additionally, the decoding functions of the modern interface also +accept Unicode strings containing only ASCII characters. The legacy interface +provides for encoding and decoding to and from file-like objects as well as +byte strings, but only using the Base64 standard alphabet. + +.. versionchanged:: 3.3 + ASCII-only Unicode strings are now accepted by the decoding functions of + the modern interface. The modern interface provides: diff --git a/Lib/base64.py b/Lib/base64.py index 895d813f7ee..edcc4bea601 100755 --- a/Lib/base64.py +++ b/Lib/base64.py @@ -29,6 +29,16 @@ __all__ = [ bytes_types = (bytes, bytearray) # Types acceptable as binary data +def _bytes_from_decode_data(s): + if isinstance(s, str): + try: + return s.encode('ascii') + except UnicodeEncodeError: + raise ValueError('string argument should contain only ASCII characters') + elif isinstance(s, bytes_types): + return s + else: + raise TypeError("argument should be bytes or ASCII string, not %s" % s.__class__.__name__) def _translate(s, altchars): if not isinstance(s, bytes_types): @@ -79,12 +89,9 @@ def b64decode(s, altchars=None, validate=False): discarded prior to the padding check. If validate is True, non-base64-alphabet characters in the input result in a binascii.Error. """ - if not isinstance(s, bytes_types): - raise TypeError("expected bytes, not %s" % s.__class__.__name__) + s = _bytes_from_decode_data(s) if altchars is not None: - if not isinstance(altchars, bytes_types): - raise TypeError("expected bytes, not %s" - % altchars.__class__.__name__) + altchars = _bytes_from_decode_data(altchars) assert len(altchars) == 2, repr(altchars) s = _translate(s, {chr(altchars[0]): b'+', chr(altchars[1]): b'/'}) if validate and not re.match(b'^[A-Za-z0-9+/]*={0,2}$', s): @@ -211,8 +218,7 @@ def b32decode(s, casefold=False, map01=None): the input is incorrectly padded or if there are non-alphabet characters present in the input. """ - if not isinstance(s, bytes_types): - raise TypeError("expected bytes, not %s" % s.__class__.__name__) + s = _bytes_from_decode_data(s) quanta, leftover = divmod(len(s), 8) if leftover: raise binascii.Error('Incorrect padding') @@ -220,8 +226,7 @@ def b32decode(s, casefold=False, map01=None): # False, or the character to map the digit 1 (one) to. It should be # either L (el) or I (eye). if map01 is not None: - if not isinstance(map01, bytes_types): - raise TypeError("expected bytes, not %s" % map01.__class__.__name__) + map01 = _bytes_from_decode_data(map01) assert len(map01) == 1, repr(map01) s = _translate(s, {b'0': b'O', b'1': map01}) if casefold: @@ -292,8 +297,7 @@ def b16decode(s, casefold=False): s were incorrectly padded or if there are non-alphabet characters present in the string. """ - if not isinstance(s, bytes_types): - raise TypeError("expected bytes, not %s" % s.__class__.__name__) + s = _bytes_from_decode_data(s) if casefold: s = s.upper() if re.search(b'[^0-9A-F]', s): diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py index 93c623cc8ab..5744af40bec 100644 --- a/Lib/test/test_base64.py +++ b/Lib/test/test_base64.py @@ -102,44 +102,53 @@ class BaseXYTestCase(unittest.TestCase): def test_b64decode(self): eq = self.assertEqual - eq(base64.b64decode(b"d3d3LnB5dGhvbi5vcmc="), b"www.python.org") - eq(base64.b64decode(b'AA=='), b'\x00') - eq(base64.b64decode(b"YQ=="), b"a") - eq(base64.b64decode(b"YWI="), b"ab") - eq(base64.b64decode(b"YWJj"), b"abc") - eq(base64.b64decode(b"YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNE" - b"RUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0\nNT" - b"Y3ODkhQCMwXiYqKCk7Ojw+LC4gW117fQ=="), - b"abcdefghijklmnopqrstuvwxyz" - b"ABCDEFGHIJKLMNOPQRSTUVWXYZ" - b"0123456789!@#0^&*();:<>,. []{}") - eq(base64.b64decode(b''), b'') + + tests = {b"d3d3LnB5dGhvbi5vcmc=": b"www.python.org", + b'AA==': b'\x00', + b"YQ==": b"a", + b"YWI=": b"ab", + b"YWJj": b"abc", + b"YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNE" + b"RUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0\nNT" + b"Y3ODkhQCMwXiYqKCk7Ojw+LC4gW117fQ==": + + b"abcdefghijklmnopqrstuvwxyz" + b"ABCDEFGHIJKLMNOPQRSTUVWXYZ" + b"0123456789!@#0^&*();:<>,. []{}", + b'': b'', + } + for data, res in tests.items(): + eq(base64.b64decode(data), res) + eq(base64.b64decode(data.decode('ascii')), res) + # Test with arbitrary alternative characters - eq(base64.b64decode(b'01a*b$cd', altchars=b'*$'), b'\xd3V\xbeo\xf7\x1d') - # Check if passing a str object raises an error - self.assertRaises(TypeError, base64.b64decode, "") - self.assertRaises(TypeError, base64.b64decode, b"", altchars="") + tests_altchars = {(b'01a*b$cd', b'*$'): b'\xd3V\xbeo\xf7\x1d', + } + for (data, altchars), res in tests_altchars.items(): + data_str = data.decode('ascii') + altchars_str = altchars.decode('ascii') + + eq(base64.b64decode(data, altchars=altchars), res) + eq(base64.b64decode(data_str, altchars=altchars), res) + eq(base64.b64decode(data, altchars=altchars_str), res) + eq(base64.b64decode(data_str, altchars=altchars_str), res) + # Test standard alphabet - eq(base64.standard_b64decode(b"d3d3LnB5dGhvbi5vcmc="), b"www.python.org") - eq(base64.standard_b64decode(b"YQ=="), b"a") - eq(base64.standard_b64decode(b"YWI="), b"ab") - eq(base64.standard_b64decode(b"YWJj"), b"abc") - eq(base64.standard_b64decode(b""), b"") - eq(base64.standard_b64decode(b"YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNE" - b"RUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0NT" - b"Y3ODkhQCMwXiYqKCk7Ojw+LC4gW117fQ=="), - b"abcdefghijklmnopqrstuvwxyz" - b"ABCDEFGHIJKLMNOPQRSTUVWXYZ" - b"0123456789!@#0^&*();:<>,. []{}") - # Check if passing a str object raises an error - self.assertRaises(TypeError, base64.standard_b64decode, "") - self.assertRaises(TypeError, base64.standard_b64decode, b"", altchars="") + for data, res in tests.items(): + eq(base64.standard_b64decode(data), res) + eq(base64.standard_b64decode(data.decode('ascii')), res) + # Test with 'URL safe' alternative characters - eq(base64.urlsafe_b64decode(b'01a-b_cd'), b'\xd3V\xbeo\xf7\x1d') - self.assertRaises(TypeError, base64.urlsafe_b64decode, "") + tests_urlsafe = {b'01a-b_cd': b'\xd3V\xbeo\xf7\x1d', + b'': b'', + } + for data, res in tests_urlsafe.items(): + eq(base64.urlsafe_b64decode(data), res) + eq(base64.urlsafe_b64decode(data.decode('ascii')), res) def test_b64decode_padding_error(self): self.assertRaises(binascii.Error, base64.b64decode, b'abc') + self.assertRaises(binascii.Error, base64.b64decode, 'abc') def test_b64decode_invalid_chars(self): # issue 1466065: Test some invalid characters. @@ -154,8 +163,10 @@ class BaseXYTestCase(unittest.TestCase): (b'YWJj\nYWI=', b'abcab')) for bstr, res in tests: self.assertEqual(base64.b64decode(bstr), res) + self.assertEqual(base64.b64decode(bstr.decode('ascii')), res) with self.assertRaises(binascii.Error): base64.b64decode(bstr, validate=True) + base64.b64decode(bstr.decode('ascii'), validate=True) def test_b32encode(self): eq = self.assertEqual @@ -170,40 +181,62 @@ class BaseXYTestCase(unittest.TestCase): def test_b32decode(self): eq = self.assertEqual - eq(base64.b32decode(b''), b'') - eq(base64.b32decode(b'AA======'), b'\x00') - eq(base64.b32decode(b'ME======'), b'a') - eq(base64.b32decode(b'MFRA===='), b'ab') - eq(base64.b32decode(b'MFRGG==='), b'abc') - eq(base64.b32decode(b'MFRGGZA='), b'abcd') - eq(base64.b32decode(b'MFRGGZDF'), b'abcde') - self.assertRaises(TypeError, base64.b32decode, "") + tests = {b'': b'', + b'AA======': b'\x00', + b'ME======': b'a', + b'MFRA====': b'ab', + b'MFRGG===': b'abc', + b'MFRGGZA=': b'abcd', + b'MFRGGZDF': b'abcde', + } + for data, res in tests.items(): + eq(base64.b32decode(data), res) + eq(base64.b32decode(data.decode('ascii')), res) def test_b32decode_casefold(self): eq = self.assertEqual - eq(base64.b32decode(b'', True), b'') - eq(base64.b32decode(b'ME======', True), b'a') - eq(base64.b32decode(b'MFRA====', True), b'ab') - eq(base64.b32decode(b'MFRGG===', True), b'abc') - eq(base64.b32decode(b'MFRGGZA=', True), b'abcd') - eq(base64.b32decode(b'MFRGGZDF', True), b'abcde') - # Lower cases - eq(base64.b32decode(b'me======', True), b'a') - eq(base64.b32decode(b'mfra====', True), b'ab') - eq(base64.b32decode(b'mfrgg===', True), b'abc') - eq(base64.b32decode(b'mfrggza=', True), b'abcd') - eq(base64.b32decode(b'mfrggzdf', True), b'abcde') - # Expected exceptions + tests = {b'': b'', + b'ME======': b'a', + b'MFRA====': b'ab', + b'MFRGG===': b'abc', + b'MFRGGZA=': b'abcd', + b'MFRGGZDF': b'abcde', + # Lower cases + b'me======': b'a', + b'mfra====': b'ab', + b'mfrgg===': b'abc', + b'mfrggza=': b'abcd', + b'mfrggzdf': b'abcde', + } + + for data, res in tests.items(): + eq(base64.b32decode(data, True), res) + eq(base64.b32decode(data.decode('ascii'), True), res) + self.assertRaises(TypeError, base64.b32decode, b'me======') + self.assertRaises(TypeError, base64.b32decode, 'me======') + # Mapping zero and one eq(base64.b32decode(b'MLO23456'), b'b\xdd\xad\xf3\xbe') - eq(base64.b32decode(b'M1023456', map01=b'L'), b'b\xdd\xad\xf3\xbe') - eq(base64.b32decode(b'M1023456', map01=b'I'), b'b\x1d\xad\xf3\xbe') - self.assertRaises(TypeError, base64.b32decode, b"", map01="") + eq(base64.b32decode('MLO23456'), b'b\xdd\xad\xf3\xbe') + + map_tests = {(b'M1023456', b'L'): b'b\xdd\xad\xf3\xbe', + (b'M1023456', b'I'): b'b\x1d\xad\xf3\xbe', + } + for (data, map01), res in map_tests.items(): + data_str = data.decode('ascii') + map01_str = map01.decode('ascii') + + eq(base64.b32decode(data, map01=map01), res) + eq(base64.b32decode(data_str, map01=map01), res) + eq(base64.b32decode(data, map01=map01_str), res) + eq(base64.b32decode(data_str, map01=map01_str), res) def test_b32decode_error(self): - self.assertRaises(binascii.Error, base64.b32decode, b'abc') - self.assertRaises(binascii.Error, base64.b32decode, b'ABCDEF==') + for data in [b'abc', b'ABCDEF==']: + with self.assertRaises(binascii.Error): + base64.b32decode(data) + base64.b32decode(data.decode('ascii')) def test_b16encode(self): eq = self.assertEqual @@ -214,12 +247,24 @@ class BaseXYTestCase(unittest.TestCase): def test_b16decode(self): eq = self.assertEqual eq(base64.b16decode(b'0102ABCDEF'), b'\x01\x02\xab\xcd\xef') + eq(base64.b16decode('0102ABCDEF'), b'\x01\x02\xab\xcd\xef') eq(base64.b16decode(b'00'), b'\x00') + eq(base64.b16decode('00'), b'\x00') # Lower case is not allowed without a flag self.assertRaises(binascii.Error, base64.b16decode, b'0102abcdef') + self.assertRaises(binascii.Error, base64.b16decode, '0102abcdef') # Case fold eq(base64.b16decode(b'0102abcdef', True), b'\x01\x02\xab\xcd\xef') - self.assertRaises(TypeError, base64.b16decode, "") + eq(base64.b16decode('0102abcdef', True), b'\x01\x02\xab\xcd\xef') + + def test_decode_nonascii_str(self): + decode_funcs = (base64.b64decode, + base64.standard_b64decode, + base64.urlsafe_b64decode, + base64.b32decode, + base64.b16decode) + for f in decode_funcs: + self.assertRaises(ValueError, f, 'with non-ascii \xcb') def test_ErrorHeritage(self): self.assertTrue(issubclass(binascii.Error, ValueError)) diff --git a/Misc/NEWS b/Misc/NEWS index fbeb1770ffe..d9ad8fc3773 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -469,6 +469,9 @@ Core and Builtins Library ------- +- Issue #13641: Decoding functions in the base64 module now accept ASCII-only + unicode strings. Patch by Catalin Iacob. + - Issue #14043: Speed up importlib's _FileFinder by at least 8x, and add a new importlib.invalidate_caches() function. From c229e6e8ff6de61581f59266dc553f04f2c870c3 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Mon, 20 Feb 2012 19:41:11 +0100 Subject: [PATCH 2/2] Issue #14040: Remove rarely used file name suffixes for C extensions (under POSIX mainly). This will improve import performance a bit (especially under importlib). --- Doc/whatsnew/3.3.rst | 14 ++++++++++++++ Misc/NEWS | 3 +++ Python/dynload_aix.c | 1 - Python/dynload_dl.c | 1 - Python/dynload_hpux.c | 1 - Python/dynload_next.c | 1 - Python/dynload_shlib.c | 6 ------ 7 files changed, 17 insertions(+), 10 deletions(-) diff --git a/Doc/whatsnew/3.3.rst b/Doc/whatsnew/3.3.rst index 8cabab60766..20e2914bc08 100644 --- a/Doc/whatsnew/3.3.rst +++ b/Doc/whatsnew/3.3.rst @@ -939,6 +939,20 @@ Porting C code :c:func:`PyUnicode_FromFormat()`, your code will automatically take advantage of the new unicode representations. +Building C extensions +--------------------- + +* The range of possible file names for C extensions has been narrowed. + Very rarely used spellings have been suppressed: under POSIX, files + named ``xxxmodule.so``, ``xxxmodule.abi3.so`` and + ``xxxmodule.cpython-*.so`` are no longer recognized as implementing + the ``xxx`` module. If you had been generating such files, you have + to switch to the other spellings (i.e., remove the ``module`` string + from the file names). + + (implemented in :issue:`14040`.) + + Other issues ------------ diff --git a/Misc/NEWS b/Misc/NEWS index d9ad8fc3773..6e010b7a2f0 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,9 @@ What's New in Python 3.3 Alpha 1? Core and Builtins ----------------- +- Issue #14040: Remove rarely used file name suffixes for C extensions + (under POSIX mainly). + - Issue #14051: Allow arbitrary attributes to be set of classmethod and staticmethod. diff --git a/Python/dynload_aix.c b/Python/dynload_aix.c index 6287c86edf3..8346f065e3f 100644 --- a/Python/dynload_aix.c +++ b/Python/dynload_aix.c @@ -28,7 +28,6 @@ typedef struct Module { const struct filedescr _PyImport_DynLoadFiletab[] = { {".so", "rb", C_EXTENSION}, - {"module.so", "rb", C_EXTENSION}, {0, 0} }; diff --git a/Python/dynload_dl.c b/Python/dynload_dl.c index 37519b23e7e..a914a0846e6 100644 --- a/Python/dynload_dl.c +++ b/Python/dynload_dl.c @@ -11,7 +11,6 @@ extern char *Py_GetProgramName(void); const struct filedescr _PyImport_DynLoadFiletab[] = { {".o", "rb", C_EXTENSION}, - {"module.o", "rb", C_EXTENSION}, {0, 0} }; diff --git a/Python/dynload_hpux.c b/Python/dynload_hpux.c index 3ebbbad01f1..1004010902f 100644 --- a/Python/dynload_hpux.c +++ b/Python/dynload_hpux.c @@ -15,7 +15,6 @@ const struct filedescr _PyImport_DynLoadFiletab[] = { {SHLIB_EXT, "rb", C_EXTENSION}, - {"module"SHLIB_EXT, "rb", C_EXTENSION}, {0, 0} }; diff --git a/Python/dynload_next.c b/Python/dynload_next.c index eb17950b457..5caff8bc9c4 100644 --- a/Python/dynload_next.c +++ b/Python/dynload_next.c @@ -10,7 +10,6 @@ const struct filedescr _PyImport_DynLoadFiletab[] = { {".so", "rb", C_EXTENSION}, - {"module.so", "rb", C_EXTENSION}, {0, 0} }; diff --git a/Python/dynload_shlib.c b/Python/dynload_shlib.c index 1c215c329cb..ab24238f570 100644 --- a/Python/dynload_shlib.c +++ b/Python/dynload_shlib.c @@ -39,7 +39,6 @@ const struct filedescr _PyImport_DynLoadFiletab[] = { #ifdef __CYGWIN__ {".dll", "rb", C_EXTENSION}, - {"module.dll", "rb", C_EXTENSION}, #else /* !__CYGWIN__ */ #if defined(PYOS_OS2) && defined(PYCC_GCC) {".pyd", "rb", C_EXTENSION}, @@ -48,15 +47,10 @@ const struct filedescr _PyImport_DynLoadFiletab[] = { #ifdef __VMS {".exe", "rb", C_EXTENSION}, {".EXE", "rb", C_EXTENSION}, - {"module.exe", "rb", C_EXTENSION}, - {"MODULE.EXE", "rb", C_EXTENSION}, #else /* !__VMS */ {"." SOABI ".so", "rb", C_EXTENSION}, - {"module." SOABI ".so", "rb", C_EXTENSION}, {".abi" PYTHON_ABI_STRING ".so", "rb", C_EXTENSION}, - {"module.abi" PYTHON_ABI_STRING ".so", "rb", C_EXTENSION}, {".so", "rb", C_EXTENSION}, - {"module.so", "rb", C_EXTENSION}, #endif /* __VMS */ #endif /* defined(PYOS_OS2) && defined(PYCC_GCC) */ #endif /* __CYGWIN__ */