Issue #19619: Blacklist non-text codecs in method API
str.encode, bytes.decode and bytearray.decode now use an internal API to throw LookupError for known non-text encodings, rather than attempting the encoding or decoding operation and then throwing a TypeError for an unexpected output type. The latter mechanism remains in place for third party non-text encodings.
This commit is contained in:
parent
322f5ba0d8
commit
c72e4e6dcc
|
@ -94,6 +94,33 @@ PyAPI_FUNC(PyObject *) PyCodec_Decode(
|
||||||
const char *errors
|
const char *errors
|
||||||
);
|
);
|
||||||
|
|
||||||
|
#ifndef PY_LIMITED_API
|
||||||
|
/* Text codec specific encoding and decoding API.
|
||||||
|
|
||||||
|
Checks the encoding against a list of codecs which do not
|
||||||
|
implement a str<->bytes encoding before attempting the
|
||||||
|
operation.
|
||||||
|
|
||||||
|
Please note that these APIs are internal and should not
|
||||||
|
be used in Python C extensions.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
PyAPI_FUNC(PyObject *) _PyCodec_EncodeText(
|
||||||
|
PyObject *object,
|
||||||
|
const char *encoding,
|
||||||
|
const char *errors
|
||||||
|
);
|
||||||
|
|
||||||
|
PyAPI_FUNC(PyObject *) _PyCodec_DecodeText(
|
||||||
|
PyObject *object,
|
||||||
|
const char *encoding,
|
||||||
|
const char *errors
|
||||||
|
);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* --- Codec Lookup APIs --------------------------------------------------
|
/* --- Codec Lookup APIs --------------------------------------------------
|
||||||
|
|
||||||
All APIs return a codec object with incremented refcount and are
|
All APIs return a codec object with incremented refcount and are
|
||||||
|
|
|
@ -73,9 +73,19 @@ BOM64_BE = BOM_UTF32_BE
|
||||||
### Codec base classes (defining the API)
|
### Codec base classes (defining the API)
|
||||||
|
|
||||||
class CodecInfo(tuple):
|
class CodecInfo(tuple):
|
||||||
|
"""Codec details when looking up the codec registry"""
|
||||||
|
|
||||||
|
# Private API to allow Python 3.4 to blacklist the known non-Unicode
|
||||||
|
# codecs in the standard library. A more general mechanism to
|
||||||
|
# reliably distinguish test encodings from other codecs will hopefully
|
||||||
|
# be defined for Python 3.5
|
||||||
|
#
|
||||||
|
# See http://bugs.python.org/issue19619
|
||||||
|
_is_text_encoding = True # Assume codecs are text encodings by default
|
||||||
|
|
||||||
def __new__(cls, encode, decode, streamreader=None, streamwriter=None,
|
def __new__(cls, encode, decode, streamreader=None, streamwriter=None,
|
||||||
incrementalencoder=None, incrementaldecoder=None, name=None):
|
incrementalencoder=None, incrementaldecoder=None, name=None,
|
||||||
|
*, _is_text_encoding=None):
|
||||||
self = tuple.__new__(cls, (encode, decode, streamreader, streamwriter))
|
self = tuple.__new__(cls, (encode, decode, streamreader, streamwriter))
|
||||||
self.name = name
|
self.name = name
|
||||||
self.encode = encode
|
self.encode = encode
|
||||||
|
@ -84,6 +94,8 @@ class CodecInfo(tuple):
|
||||||
self.incrementaldecoder = incrementaldecoder
|
self.incrementaldecoder = incrementaldecoder
|
||||||
self.streamwriter = streamwriter
|
self.streamwriter = streamwriter
|
||||||
self.streamreader = streamreader
|
self.streamreader = streamreader
|
||||||
|
if _is_text_encoding is not None:
|
||||||
|
self._is_text_encoding = _is_text_encoding
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
|
|
|
@ -52,4 +52,5 @@ def getregentry():
|
||||||
incrementaldecoder=IncrementalDecoder,
|
incrementaldecoder=IncrementalDecoder,
|
||||||
streamwriter=StreamWriter,
|
streamwriter=StreamWriter,
|
||||||
streamreader=StreamReader,
|
streamreader=StreamReader,
|
||||||
|
_is_text_encoding=False,
|
||||||
)
|
)
|
||||||
|
|
|
@ -74,4 +74,5 @@ def getregentry():
|
||||||
incrementaldecoder=IncrementalDecoder,
|
incrementaldecoder=IncrementalDecoder,
|
||||||
streamwriter=StreamWriter,
|
streamwriter=StreamWriter,
|
||||||
streamreader=StreamReader,
|
streamreader=StreamReader,
|
||||||
|
_is_text_encoding=False,
|
||||||
)
|
)
|
||||||
|
|
|
@ -52,4 +52,5 @@ def getregentry():
|
||||||
incrementaldecoder=IncrementalDecoder,
|
incrementaldecoder=IncrementalDecoder,
|
||||||
streamwriter=StreamWriter,
|
streamwriter=StreamWriter,
|
||||||
streamreader=StreamReader,
|
streamreader=StreamReader,
|
||||||
|
_is_text_encoding=False,
|
||||||
)
|
)
|
||||||
|
|
|
@ -53,4 +53,5 @@ def getregentry():
|
||||||
incrementaldecoder=IncrementalDecoder,
|
incrementaldecoder=IncrementalDecoder,
|
||||||
streamwriter=StreamWriter,
|
streamwriter=StreamWriter,
|
||||||
streamreader=StreamReader,
|
streamreader=StreamReader,
|
||||||
|
_is_text_encoding=False,
|
||||||
)
|
)
|
||||||
|
|
|
@ -43,6 +43,7 @@ def getregentry():
|
||||||
incrementaldecoder=IncrementalDecoder,
|
incrementaldecoder=IncrementalDecoder,
|
||||||
streamwriter=StreamWriter,
|
streamwriter=StreamWriter,
|
||||||
streamreader=StreamReader,
|
streamreader=StreamReader,
|
||||||
|
_is_text_encoding=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
### Map
|
### Map
|
||||||
|
|
|
@ -96,4 +96,5 @@ def getregentry():
|
||||||
incrementaldecoder=IncrementalDecoder,
|
incrementaldecoder=IncrementalDecoder,
|
||||||
streamreader=StreamReader,
|
streamreader=StreamReader,
|
||||||
streamwriter=StreamWriter,
|
streamwriter=StreamWriter,
|
||||||
|
_is_text_encoding=False,
|
||||||
)
|
)
|
||||||
|
|
|
@ -74,4 +74,5 @@ def getregentry():
|
||||||
incrementaldecoder=IncrementalDecoder,
|
incrementaldecoder=IncrementalDecoder,
|
||||||
streamreader=StreamReader,
|
streamreader=StreamReader,
|
||||||
streamwriter=StreamWriter,
|
streamwriter=StreamWriter,
|
||||||
|
_is_text_encoding=False,
|
||||||
)
|
)
|
||||||
|
|
|
@ -6,6 +6,7 @@ import locale
|
||||||
import sys
|
import sys
|
||||||
import unittest
|
import unittest
|
||||||
import warnings
|
import warnings
|
||||||
|
import encodings
|
||||||
|
|
||||||
from test import support
|
from test import support
|
||||||
|
|
||||||
|
@ -2381,67 +2382,68 @@ class TransformCodecTest(unittest.TestCase):
|
||||||
view_decoded = codecs.decode(view, encoding)
|
view_decoded = codecs.decode(view, encoding)
|
||||||
self.assertEqual(view_decoded, data)
|
self.assertEqual(view_decoded, data)
|
||||||
|
|
||||||
def test_type_error_for_text_input(self):
|
def test_text_to_binary_blacklists_binary_transforms(self):
|
||||||
# Check binary -> binary codecs give a good error for str input
|
# Check binary -> binary codecs give a good error for str input
|
||||||
bad_input = "bad input type"
|
bad_input = "bad input type"
|
||||||
for encoding in bytes_transform_encodings:
|
for encoding in bytes_transform_encodings:
|
||||||
with self.subTest(encoding=encoding):
|
with self.subTest(encoding=encoding):
|
||||||
msg = "^encoding with '{}' codec failed".format(encoding)
|
fmt = ( "{!r} is not a text encoding; "
|
||||||
with self.assertRaisesRegex(TypeError, msg) as failure:
|
"use codecs.encode\(\) to handle arbitrary codecs")
|
||||||
|
msg = fmt.format(encoding)
|
||||||
|
with self.assertRaisesRegex(LookupError, msg) as failure:
|
||||||
bad_input.encode(encoding)
|
bad_input.encode(encoding)
|
||||||
self.assertTrue(isinstance(failure.exception.__cause__,
|
self.assertIsNone(failure.exception.__cause__)
|
||||||
TypeError))
|
|
||||||
|
|
||||||
def test_type_error_for_binary_input(self):
|
def test_text_to_binary_blacklists_text_transforms(self):
|
||||||
# Check str -> str codec gives a good error for binary input
|
# Check str.encode gives a good error message for str -> str codecs
|
||||||
for bad_input in (b"immutable", bytearray(b"mutable")):
|
msg = (r"^'rot_13' is not a text encoding; "
|
||||||
with self.subTest(bad_input=bad_input):
|
"use codecs.encode\(\) to handle arbitrary codecs")
|
||||||
msg = "^decoding with 'rot_13' codec failed"
|
with self.assertRaisesRegex(LookupError, msg):
|
||||||
with self.assertRaisesRegex(AttributeError, msg) as failure:
|
"just an example message".encode("rot_13")
|
||||||
bad_input.decode("rot_13")
|
|
||||||
self.assertTrue(isinstance(failure.exception.__cause__,
|
|
||||||
AttributeError))
|
|
||||||
|
|
||||||
def test_custom_zlib_error_is_wrapped(self):
|
def test_binary_to_text_blacklists_binary_transforms(self):
|
||||||
# Check zlib codec gives a good error for malformed input
|
|
||||||
msg = "^decoding with 'zlib_codec' codec failed"
|
|
||||||
with self.assertRaisesRegex(Exception, msg) as failure:
|
|
||||||
b"hello".decode("zlib_codec")
|
|
||||||
self.assertTrue(isinstance(failure.exception.__cause__,
|
|
||||||
type(failure.exception)))
|
|
||||||
|
|
||||||
def test_custom_hex_error_is_wrapped(self):
|
|
||||||
# Check hex codec gives a good error for malformed input
|
|
||||||
msg = "^decoding with 'hex_codec' codec failed"
|
|
||||||
with self.assertRaisesRegex(Exception, msg) as failure:
|
|
||||||
b"hello".decode("hex_codec")
|
|
||||||
self.assertTrue(isinstance(failure.exception.__cause__,
|
|
||||||
type(failure.exception)))
|
|
||||||
|
|
||||||
# Unfortunately, the bz2 module throws OSError, which the codec
|
|
||||||
# machinery currently can't wrap :(
|
|
||||||
|
|
||||||
def test_bad_decoding_output_type(self):
|
|
||||||
# Check bytes.decode and bytearray.decode give a good error
|
# Check bytes.decode and bytearray.decode give a good error
|
||||||
# message for binary -> binary codecs
|
# message for binary -> binary codecs
|
||||||
data = b"encode first to ensure we meet any format restrictions"
|
data = b"encode first to ensure we meet any format restrictions"
|
||||||
for encoding in bytes_transform_encodings:
|
for encoding in bytes_transform_encodings:
|
||||||
with self.subTest(encoding=encoding):
|
with self.subTest(encoding=encoding):
|
||||||
encoded_data = codecs.encode(data, encoding)
|
encoded_data = codecs.encode(data, encoding)
|
||||||
fmt = ("'{}' decoder returned 'bytes' instead of 'str'; "
|
fmt = (r"{!r} is not a text encoding; "
|
||||||
"use codecs.decode\(\) to decode to arbitrary types")
|
"use codecs.decode\(\) to handle arbitrary codecs")
|
||||||
msg = fmt.format(encoding)
|
msg = fmt.format(encoding)
|
||||||
with self.assertRaisesRegex(TypeError, msg):
|
with self.assertRaisesRegex(LookupError, msg):
|
||||||
encoded_data.decode(encoding)
|
encoded_data.decode(encoding)
|
||||||
with self.assertRaisesRegex(TypeError, msg):
|
with self.assertRaisesRegex(LookupError, msg):
|
||||||
bytearray(encoded_data).decode(encoding)
|
bytearray(encoded_data).decode(encoding)
|
||||||
|
|
||||||
def test_bad_encoding_output_type(self):
|
def test_binary_to_text_blacklists_text_transforms(self):
|
||||||
# Check str.encode gives a good error message for str -> str codecs
|
# Check str -> str codec gives a good error for binary input
|
||||||
msg = ("'rot_13' encoder returned 'str' instead of 'bytes'; "
|
for bad_input in (b"immutable", bytearray(b"mutable")):
|
||||||
"use codecs.encode\(\) to encode to arbitrary types")
|
with self.subTest(bad_input=bad_input):
|
||||||
with self.assertRaisesRegex(TypeError, msg):
|
msg = (r"^'rot_13' is not a text encoding; "
|
||||||
"just an example message".encode("rot_13")
|
"use codecs.decode\(\) to handle arbitrary codecs")
|
||||||
|
with self.assertRaisesRegex(LookupError, msg) as failure:
|
||||||
|
bad_input.decode("rot_13")
|
||||||
|
self.assertIsNone(failure.exception.__cause__)
|
||||||
|
|
||||||
|
def test_custom_zlib_error_is_wrapped(self):
|
||||||
|
# Check zlib codec gives a good error for malformed input
|
||||||
|
msg = "^decoding with 'zlib_codec' codec failed"
|
||||||
|
with self.assertRaisesRegex(Exception, msg) as failure:
|
||||||
|
codecs.decode(b"hello", "zlib_codec")
|
||||||
|
self.assertIsInstance(failure.exception.__cause__,
|
||||||
|
type(failure.exception))
|
||||||
|
|
||||||
|
def test_custom_hex_error_is_wrapped(self):
|
||||||
|
# Check hex codec gives a good error for malformed input
|
||||||
|
msg = "^decoding with 'hex_codec' codec failed"
|
||||||
|
with self.assertRaisesRegex(Exception, msg) as failure:
|
||||||
|
codecs.decode(b"hello", "hex_codec")
|
||||||
|
self.assertIsInstance(failure.exception.__cause__,
|
||||||
|
type(failure.exception))
|
||||||
|
|
||||||
|
# Unfortunately, the bz2 module throws OSError, which the codec
|
||||||
|
# machinery currently can't wrap :(
|
||||||
|
|
||||||
|
|
||||||
# The codec system tries to wrap exceptions in order to ensure the error
|
# The codec system tries to wrap exceptions in order to ensure the error
|
||||||
|
@ -2466,27 +2468,44 @@ class ExceptionChainingTest(unittest.TestCase):
|
||||||
# case finishes by using the test case repr as the codec name
|
# case finishes by using the test case repr as the codec name
|
||||||
# The codecs module normalizes codec names, although this doesn't
|
# The codecs module normalizes codec names, although this doesn't
|
||||||
# appear to be formally documented...
|
# appear to be formally documented...
|
||||||
self.codec_name = repr(self).lower().replace(" ", "-")
|
# We also make sure we use a truly unique id for the custom codec
|
||||||
|
# to avoid issues with the codec cache when running these tests
|
||||||
|
# multiple times (e.g. when hunting for refleaks)
|
||||||
|
unique_id = repr(self) + str(id(self))
|
||||||
|
self.codec_name = encodings.normalize_encoding(unique_id).lower()
|
||||||
|
|
||||||
|
# We store the object to raise on the instance because of a bad
|
||||||
|
# interaction between the codec caching (which means we can't
|
||||||
|
# recreate the codec entry) and regrtest refleak hunting (which
|
||||||
|
# runs the same test instance multiple times). This means we
|
||||||
|
# need to ensure the codecs call back in to the instance to find
|
||||||
|
# out which exception to raise rather than binding them in a
|
||||||
|
# closure to an object that may change on the next run
|
||||||
|
self.obj_to_raise = RuntimeError
|
||||||
|
|
||||||
def tearDown(self):
|
def tearDown(self):
|
||||||
_TEST_CODECS.pop(self.codec_name, None)
|
_TEST_CODECS.pop(self.codec_name, None)
|
||||||
|
|
||||||
def set_codec(self, obj_to_raise):
|
def set_codec(self, encode, decode):
|
||||||
def raise_obj(*args, **kwds):
|
codec_info = codecs.CodecInfo(encode, decode,
|
||||||
raise obj_to_raise
|
|
||||||
codec_info = codecs.CodecInfo(raise_obj, raise_obj,
|
|
||||||
name=self.codec_name)
|
name=self.codec_name)
|
||||||
_TEST_CODECS[self.codec_name] = codec_info
|
_TEST_CODECS[self.codec_name] = codec_info
|
||||||
|
|
||||||
@contextlib.contextmanager
|
@contextlib.contextmanager
|
||||||
def assertWrapped(self, operation, exc_type, msg):
|
def assertWrapped(self, operation, exc_type, msg):
|
||||||
full_msg = "{} with '{}' codec failed \({}: {}\)".format(
|
full_msg = r"{} with {!r} codec failed \({}: {}\)".format(
|
||||||
operation, self.codec_name, exc_type.__name__, msg)
|
operation, self.codec_name, exc_type.__name__, msg)
|
||||||
with self.assertRaisesRegex(exc_type, full_msg) as caught:
|
with self.assertRaisesRegex(exc_type, full_msg) as caught:
|
||||||
yield caught
|
yield caught
|
||||||
|
self.assertIsInstance(caught.exception.__cause__, exc_type)
|
||||||
|
|
||||||
|
def raise_obj(self, *args, **kwds):
|
||||||
|
# Helper to dynamically change the object raised by a test codec
|
||||||
|
raise self.obj_to_raise
|
||||||
|
|
||||||
def check_wrapped(self, obj_to_raise, msg, exc_type=RuntimeError):
|
def check_wrapped(self, obj_to_raise, msg, exc_type=RuntimeError):
|
||||||
self.set_codec(obj_to_raise)
|
self.obj_to_raise = obj_to_raise
|
||||||
|
self.set_codec(self.raise_obj, self.raise_obj)
|
||||||
with self.assertWrapped("encoding", exc_type, msg):
|
with self.assertWrapped("encoding", exc_type, msg):
|
||||||
"str_input".encode(self.codec_name)
|
"str_input".encode(self.codec_name)
|
||||||
with self.assertWrapped("encoding", exc_type, msg):
|
with self.assertWrapped("encoding", exc_type, msg):
|
||||||
|
@ -2515,23 +2534,17 @@ class ExceptionChainingTest(unittest.TestCase):
|
||||||
pass
|
pass
|
||||||
self.check_wrapped(MyRuntimeError(msg), msg, MyRuntimeError)
|
self.check_wrapped(MyRuntimeError(msg), msg, MyRuntimeError)
|
||||||
|
|
||||||
@contextlib.contextmanager
|
def check_not_wrapped(self, obj_to_raise, msg):
|
||||||
def assertNotWrapped(self, operation, exc_type, msg_re, msg=None):
|
def raise_obj(*args, **kwds):
|
||||||
if msg is None:
|
raise obj_to_raise
|
||||||
msg = msg_re
|
self.set_codec(raise_obj, raise_obj)
|
||||||
with self.assertRaisesRegex(exc_type, msg) as caught:
|
with self.assertRaisesRegex(RuntimeError, msg):
|
||||||
yield caught
|
|
||||||
self.assertEqual(str(caught.exception), msg)
|
|
||||||
|
|
||||||
def check_not_wrapped(self, obj_to_raise, msg_re, msg=None):
|
|
||||||
self.set_codec(obj_to_raise)
|
|
||||||
with self.assertNotWrapped("encoding", RuntimeError, msg_re, msg):
|
|
||||||
"str input".encode(self.codec_name)
|
"str input".encode(self.codec_name)
|
||||||
with self.assertNotWrapped("encoding", RuntimeError, msg_re, msg):
|
with self.assertRaisesRegex(RuntimeError, msg):
|
||||||
codecs.encode("str input", self.codec_name)
|
codecs.encode("str input", self.codec_name)
|
||||||
with self.assertNotWrapped("decoding", RuntimeError, msg_re, msg):
|
with self.assertRaisesRegex(RuntimeError, msg):
|
||||||
b"bytes input".decode(self.codec_name)
|
b"bytes input".decode(self.codec_name)
|
||||||
with self.assertNotWrapped("decoding", RuntimeError, msg_re, msg):
|
with self.assertRaisesRegex(RuntimeError, msg):
|
||||||
codecs.decode(b"bytes input", self.codec_name)
|
codecs.decode(b"bytes input", self.codec_name)
|
||||||
|
|
||||||
def test_init_override_is_not_wrapped(self):
|
def test_init_override_is_not_wrapped(self):
|
||||||
|
@ -2550,29 +2563,56 @@ class ExceptionChainingTest(unittest.TestCase):
|
||||||
msg = "This should NOT be wrapped"
|
msg = "This should NOT be wrapped"
|
||||||
exc = RuntimeError(msg)
|
exc = RuntimeError(msg)
|
||||||
exc.attr = 1
|
exc.attr = 1
|
||||||
self.check_not_wrapped(exc, msg)
|
self.check_not_wrapped(exc, "^{}$".format(msg))
|
||||||
|
|
||||||
def test_non_str_arg_is_not_wrapped(self):
|
def test_non_str_arg_is_not_wrapped(self):
|
||||||
self.check_not_wrapped(RuntimeError(1), "1")
|
self.check_not_wrapped(RuntimeError(1), "1")
|
||||||
|
|
||||||
def test_multiple_args_is_not_wrapped(self):
|
def test_multiple_args_is_not_wrapped(self):
|
||||||
msg_re = "\('a', 'b', 'c'\)"
|
msg_re = r"^\('a', 'b', 'c'\)$"
|
||||||
msg = "('a', 'b', 'c')"
|
self.check_not_wrapped(RuntimeError('a', 'b', 'c'), msg_re)
|
||||||
self.check_not_wrapped(RuntimeError('a', 'b', 'c'), msg_re, msg)
|
|
||||||
|
|
||||||
# http://bugs.python.org/issue19609
|
# http://bugs.python.org/issue19609
|
||||||
def test_codec_lookup_failure_not_wrapped(self):
|
def test_codec_lookup_failure_not_wrapped(self):
|
||||||
msg = "unknown encoding: %s" % self.codec_name
|
msg = "^unknown encoding: {}$".format(self.codec_name)
|
||||||
# The initial codec lookup should not be wrapped
|
# The initial codec lookup should not be wrapped
|
||||||
with self.assertNotWrapped("encoding", LookupError, msg):
|
with self.assertRaisesRegex(LookupError, msg):
|
||||||
"str input".encode(self.codec_name)
|
"str input".encode(self.codec_name)
|
||||||
with self.assertNotWrapped("encoding", LookupError, msg):
|
with self.assertRaisesRegex(LookupError, msg):
|
||||||
codecs.encode("str input", self.codec_name)
|
codecs.encode("str input", self.codec_name)
|
||||||
with self.assertNotWrapped("decoding", LookupError, msg):
|
with self.assertRaisesRegex(LookupError, msg):
|
||||||
b"bytes input".decode(self.codec_name)
|
b"bytes input".decode(self.codec_name)
|
||||||
with self.assertNotWrapped("decoding", LookupError, msg):
|
with self.assertRaisesRegex(LookupError, msg):
|
||||||
codecs.decode(b"bytes input", self.codec_name)
|
codecs.decode(b"bytes input", self.codec_name)
|
||||||
|
|
||||||
|
def test_unflagged_non_text_codec_handling(self):
|
||||||
|
# The stdlib non-text codecs are now marked so they're
|
||||||
|
# pre-emptively skipped by the text model related methods
|
||||||
|
# However, third party codecs won't be flagged, so we still make
|
||||||
|
# sure the case where an inappropriate output type is produced is
|
||||||
|
# handled appropriately
|
||||||
|
def encode_to_str(*args, **kwds):
|
||||||
|
return "not bytes!", 0
|
||||||
|
def decode_to_bytes(*args, **kwds):
|
||||||
|
return b"not str!", 0
|
||||||
|
self.set_codec(encode_to_str, decode_to_bytes)
|
||||||
|
# No input or output type checks on the codecs module functions
|
||||||
|
encoded = codecs.encode(None, self.codec_name)
|
||||||
|
self.assertEqual(encoded, "not bytes!")
|
||||||
|
decoded = codecs.decode(None, self.codec_name)
|
||||||
|
self.assertEqual(decoded, b"not str!")
|
||||||
|
# Text model methods should complain
|
||||||
|
fmt = (r"^{!r} encoder returned 'str' instead of 'bytes'; "
|
||||||
|
"use codecs.encode\(\) to encode to arbitrary types$")
|
||||||
|
msg = fmt.format(self.codec_name)
|
||||||
|
with self.assertRaisesRegex(TypeError, msg):
|
||||||
|
"str_input".encode(self.codec_name)
|
||||||
|
fmt = (r"^{!r} decoder returned 'bytes' instead of 'str'; "
|
||||||
|
"use codecs.decode\(\) to decode to arbitrary types$")
|
||||||
|
msg = fmt.format(self.codec_name)
|
||||||
|
with self.assertRaisesRegex(TypeError, msg):
|
||||||
|
b"bytes input".decode(self.codec_name)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@unittest.skipUnless(sys.platform == 'win32',
|
@unittest.skipUnless(sys.platform == 'win32',
|
||||||
|
|
|
@ -10,6 +10,12 @@ Projected release date: 2013-11-24
|
||||||
Core and Builtins
|
Core and Builtins
|
||||||
-----------------
|
-----------------
|
||||||
|
|
||||||
|
- Issue #19619: str.encode, bytes.decode and bytearray.decode now use an
|
||||||
|
internal API to throw LookupError for known non-text encodings, rather
|
||||||
|
than attempting the encoding or decoding operation and then throwing a
|
||||||
|
TypeError for an unexpected output type. (The latter mechanism remains
|
||||||
|
in place for third party non-text encodings)
|
||||||
|
|
||||||
- Issue #19183: Implement PEP 456 'secure and interchangeable hash algorithm'.
|
- Issue #19183: Implement PEP 456 'secure and interchangeable hash algorithm'.
|
||||||
Python now uses SipHash24 on all major platforms.
|
Python now uses SipHash24 on all major platforms.
|
||||||
|
|
||||||
|
|
|
@ -3044,7 +3044,7 @@ PyUnicode_Decode(const char *s,
|
||||||
buffer = PyMemoryView_FromBuffer(&info);
|
buffer = PyMemoryView_FromBuffer(&info);
|
||||||
if (buffer == NULL)
|
if (buffer == NULL)
|
||||||
goto onError;
|
goto onError;
|
||||||
unicode = PyCodec_Decode(buffer, encoding, errors);
|
unicode = _PyCodec_DecodeText(buffer, encoding, errors);
|
||||||
if (unicode == NULL)
|
if (unicode == NULL)
|
||||||
goto onError;
|
goto onError;
|
||||||
if (!PyUnicode_Check(unicode)) {
|
if (!PyUnicode_Check(unicode)) {
|
||||||
|
@ -3410,7 +3410,7 @@ PyUnicode_AsEncodedString(PyObject *unicode,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Encode via the codec registry */
|
/* Encode via the codec registry */
|
||||||
v = PyCodec_Encode(unicode, encoding, errors);
|
v = _PyCodec_EncodeText(unicode, encoding, errors);
|
||||||
if (v == NULL)
|
if (v == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
|
138
Python/codecs.c
138
Python/codecs.c
|
@ -353,18 +353,15 @@ wrap_codec_error(const char *operation,
|
||||||
|
|
||||||
errors is passed to the encoder factory as argument if non-NULL. */
|
errors is passed to the encoder factory as argument if non-NULL. */
|
||||||
|
|
||||||
PyObject *PyCodec_Encode(PyObject *object,
|
static PyObject *
|
||||||
const char *encoding,
|
_PyCodec_EncodeInternal(PyObject *object,
|
||||||
const char *errors)
|
PyObject *encoder,
|
||||||
|
const char *encoding,
|
||||||
|
const char *errors)
|
||||||
{
|
{
|
||||||
PyObject *encoder = NULL;
|
|
||||||
PyObject *args = NULL, *result = NULL;
|
PyObject *args = NULL, *result = NULL;
|
||||||
PyObject *v = NULL;
|
PyObject *v = NULL;
|
||||||
|
|
||||||
encoder = PyCodec_Encoder(encoding);
|
|
||||||
if (encoder == NULL)
|
|
||||||
goto onError;
|
|
||||||
|
|
||||||
args = args_tuple(object, errors);
|
args = args_tuple(object, errors);
|
||||||
if (args == NULL)
|
if (args == NULL)
|
||||||
goto onError;
|
goto onError;
|
||||||
|
@ -402,18 +399,15 @@ PyObject *PyCodec_Encode(PyObject *object,
|
||||||
|
|
||||||
errors is passed to the decoder factory as argument if non-NULL. */
|
errors is passed to the decoder factory as argument if non-NULL. */
|
||||||
|
|
||||||
PyObject *PyCodec_Decode(PyObject *object,
|
static PyObject *
|
||||||
const char *encoding,
|
_PyCodec_DecodeInternal(PyObject *object,
|
||||||
const char *errors)
|
PyObject *decoder,
|
||||||
|
const char *encoding,
|
||||||
|
const char *errors)
|
||||||
{
|
{
|
||||||
PyObject *decoder = NULL;
|
|
||||||
PyObject *args = NULL, *result = NULL;
|
PyObject *args = NULL, *result = NULL;
|
||||||
PyObject *v;
|
PyObject *v;
|
||||||
|
|
||||||
decoder = PyCodec_Decoder(encoding);
|
|
||||||
if (decoder == NULL)
|
|
||||||
goto onError;
|
|
||||||
|
|
||||||
args = args_tuple(object, errors);
|
args = args_tuple(object, errors);
|
||||||
if (args == NULL)
|
if (args == NULL)
|
||||||
goto onError;
|
goto onError;
|
||||||
|
@ -445,6 +439,118 @@ PyObject *PyCodec_Decode(PyObject *object,
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Generic encoding/decoding API */
|
||||||
|
PyObject *PyCodec_Encode(PyObject *object,
|
||||||
|
const char *encoding,
|
||||||
|
const char *errors)
|
||||||
|
{
|
||||||
|
PyObject *encoder;
|
||||||
|
|
||||||
|
encoder = PyCodec_Encoder(encoding);
|
||||||
|
if (encoder == NULL)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
return _PyCodec_EncodeInternal(object, encoder, encoding, errors);
|
||||||
|
}
|
||||||
|
|
||||||
|
PyObject *PyCodec_Decode(PyObject *object,
|
||||||
|
const char *encoding,
|
||||||
|
const char *errors)
|
||||||
|
{
|
||||||
|
PyObject *decoder;
|
||||||
|
|
||||||
|
decoder = PyCodec_Decoder(encoding);
|
||||||
|
if (decoder == NULL)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
return _PyCodec_DecodeInternal(object, decoder, encoding, errors);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Text encoding/decoding API */
|
||||||
|
static
|
||||||
|
PyObject *codec_getitem_checked(const char *encoding,
|
||||||
|
const char *operation_name,
|
||||||
|
int index)
|
||||||
|
{
|
||||||
|
_Py_IDENTIFIER(_is_text_encoding);
|
||||||
|
PyObject *codec;
|
||||||
|
PyObject *attr;
|
||||||
|
PyObject *v;
|
||||||
|
int is_text_codec;
|
||||||
|
|
||||||
|
codec = _PyCodec_Lookup(encoding);
|
||||||
|
if (codec == NULL)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
/* Backwards compatibility: assume any raw tuple describes a text
|
||||||
|
* encoding, and the same for anything lacking the private
|
||||||
|
* attribute.
|
||||||
|
*/
|
||||||
|
if (!PyTuple_CheckExact(codec)) {
|
||||||
|
attr = _PyObject_GetAttrId(codec, &PyId__is_text_encoding);
|
||||||
|
if (attr == NULL) {
|
||||||
|
if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
|
||||||
|
PyErr_Clear();
|
||||||
|
} else {
|
||||||
|
Py_DECREF(codec);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
is_text_codec = PyObject_IsTrue(attr);
|
||||||
|
Py_DECREF(attr);
|
||||||
|
if (!is_text_codec) {
|
||||||
|
Py_DECREF(codec);
|
||||||
|
PyErr_Format(PyExc_LookupError,
|
||||||
|
"'%.400s' is not a text encoding; "
|
||||||
|
"use codecs.%s() to handle arbitrary codecs",
|
||||||
|
encoding, operation_name);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
v = PyTuple_GET_ITEM(codec, index);
|
||||||
|
Py_DECREF(codec);
|
||||||
|
Py_INCREF(v);
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyObject * _PyCodec_TextEncoder(const char *encoding)
|
||||||
|
{
|
||||||
|
return codec_getitem_checked(encoding, "encode", 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyObject * _PyCodec_TextDecoder(const char *encoding)
|
||||||
|
{
|
||||||
|
return codec_getitem_checked(encoding, "decode", 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
PyObject *_PyCodec_EncodeText(PyObject *object,
|
||||||
|
const char *encoding,
|
||||||
|
const char *errors)
|
||||||
|
{
|
||||||
|
PyObject *encoder;
|
||||||
|
|
||||||
|
encoder = _PyCodec_TextEncoder(encoding);
|
||||||
|
if (encoder == NULL)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
return _PyCodec_EncodeInternal(object, encoder, encoding, errors);
|
||||||
|
}
|
||||||
|
|
||||||
|
PyObject *_PyCodec_DecodeText(PyObject *object,
|
||||||
|
const char *encoding,
|
||||||
|
const char *errors)
|
||||||
|
{
|
||||||
|
PyObject *decoder;
|
||||||
|
|
||||||
|
decoder = _PyCodec_TextDecoder(encoding);
|
||||||
|
if (decoder == NULL)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
return _PyCodec_DecodeInternal(object, decoder, encoding, errors);
|
||||||
|
}
|
||||||
|
|
||||||
/* Register the error handling callback function error under the name
|
/* Register the error handling callback function error under the name
|
||||||
name. This function will be called by the codec when it encounters
|
name. This function will be called by the codec when it encounters
|
||||||
an unencodable characters/undecodable bytes and doesn't know the
|
an unencodable characters/undecodable bytes and doesn't know the
|
||||||
|
|
Loading…
Reference in New Issue