Rename the surrogates error handler to surrogatepass.
This commit is contained in:
parent
cf7925dfc6
commit
e0a2b72e61
|
@ -327,15 +327,15 @@ and implemented by all standard Python codecs:
|
|||
|
||||
In addition, the following error handlers are specific to a single codec:
|
||||
|
||||
+------------------+---------+--------------------------------------------+
|
||||
| Value | Codec | Meaning |
|
||||
+==================+=========+============================================+
|
||||
| ``'surrogates'`` | utf-8 | Allow encoding and decoding of surrogate |
|
||||
| | | codes in UTF-8. |
|
||||
+------------------+---------+--------------------------------------------+
|
||||
+-------------------+---------+-------------------------------------------+
|
||||
| Value | Codec | Meaning |
|
||||
+===================+=========+===========================================+
|
||||
|``'surrogatepass'``| utf-8 | Allow encoding and decoding of surrogate |
|
||||
| | | codes in UTF-8. |
|
||||
+-------------------+---------+-------------------------------------------+
|
||||
|
||||
.. versionadded:: 3.1
|
||||
The ``'utf8b'`` and ``'surrogates'`` error handlers.
|
||||
The ``'utf8b'`` and ``'surrogatepass'`` error handlers.
|
||||
|
||||
The set of allowed values can be extended via :meth:`register_error`.
|
||||
|
||||
|
|
|
@ -545,12 +545,12 @@ class UTF8Test(ReadTest):
|
|||
self.assertRaises(UnicodeEncodeError, "\ud800".encode, "utf-8")
|
||||
self.assertRaises(UnicodeDecodeError, b"\xed\xa0\x80".decode, "utf-8")
|
||||
|
||||
def test_surrogates_handler(self):
|
||||
self.assertEquals("abc\ud800def".encode("utf-8", "surrogates"),
|
||||
def test_surrogatepass_handler(self):
|
||||
self.assertEquals("abc\ud800def".encode("utf-8", "surrogatepass"),
|
||||
b"abc\xed\xa0\x80def")
|
||||
self.assertEquals(b"abc\xed\xa0\x80def".decode("utf-8", "surrogates"),
|
||||
self.assertEquals(b"abc\xed\xa0\x80def".decode("utf-8", "surrogatepass"),
|
||||
"abc\ud800def")
|
||||
self.assertTrue(codecs.lookup_error("surrogates"))
|
||||
self.assertTrue(codecs.lookup_error("surrogatepass"))
|
||||
|
||||
class UTF7Test(ReadTest):
|
||||
encoding = "utf-7"
|
||||
|
@ -1040,12 +1040,12 @@ class NameprepTest(unittest.TestCase):
|
|||
# Skipped
|
||||
continue
|
||||
# The Unicode strings are given in UTF-8
|
||||
orig = str(orig, "utf-8", "surrogates")
|
||||
orig = str(orig, "utf-8", "surrogatepass")
|
||||
if prepped is None:
|
||||
# Input contains prohibited characters
|
||||
self.assertRaises(UnicodeError, nameprep, orig)
|
||||
else:
|
||||
prepped = str(prepped, "utf-8", "surrogates")
|
||||
prepped = str(prepped, "utf-8", "surrogatepass")
|
||||
try:
|
||||
self.assertEquals(nameprep(orig), prepped)
|
||||
except Exception as e:
|
||||
|
|
|
@ -906,10 +906,10 @@ class UnicodeTest(
|
|||
self.assertEqual('\u20ac'.encode('utf-8'), b'\xe2\x82\xac')
|
||||
self.assertEqual('\ud800\udc02'.encode('utf-8'), b'\xf0\x90\x80\x82')
|
||||
self.assertEqual('\ud84d\udc56'.encode('utf-8'), b'\xf0\xa3\x91\x96')
|
||||
self.assertEqual('\ud800'.encode('utf-8', 'surrogates'), b'\xed\xa0\x80')
|
||||
self.assertEqual('\udc00'.encode('utf-8', 'surrogates'), b'\xed\xb0\x80')
|
||||
self.assertEqual('\ud800'.encode('utf-8', 'surrogatepass'), b'\xed\xa0\x80')
|
||||
self.assertEqual('\udc00'.encode('utf-8', 'surrogatepass'), b'\xed\xb0\x80')
|
||||
self.assertEqual(
|
||||
('\ud800\udc02'*1000).encode('utf-8', 'surrogates'),
|
||||
('\ud800\udc02'*1000).encode('utf-8', 'surrogatepass'),
|
||||
b'\xf0\x90\x80\x82'*1000
|
||||
)
|
||||
self.assertEqual(
|
||||
|
|
|
@ -56,7 +56,7 @@ Core and Builtins
|
|||
- Issue #4426: The UTF-7 decoder was too strict and didn't accept some legal
|
||||
sequences. Patch by Nick Barnes and Victor Stinner.
|
||||
|
||||
- Issue #3672: Reject surrogates in utf-8 codec; add surrogates error handler.
|
||||
- Issue #3672: Reject surrogates in utf-8 codec; add surrogatepass error handler.
|
||||
|
||||
- Issue #5883: In the io module, the BufferedIOBase and TextIOBase ABCs have
|
||||
received a new method, detach(). detach() disconnects the underlying stream
|
||||
|
|
|
@ -751,7 +751,7 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
|
|||
/* This handler is declared static until someone demonstrates
|
||||
a need to call it directly. */
|
||||
static PyObject *
|
||||
PyCodec_SurrogateErrors(PyObject *exc)
|
||||
PyCodec_SurrogatePassErrors(PyObject *exc)
|
||||
{
|
||||
PyObject *restuple;
|
||||
PyObject *object;
|
||||
|
@ -935,9 +935,9 @@ static PyObject *backslashreplace_errors(PyObject *self, PyObject *exc)
|
|||
return PyCodec_BackslashReplaceErrors(exc);
|
||||
}
|
||||
|
||||
static PyObject *surrogates_errors(PyObject *self, PyObject *exc)
|
||||
static PyObject *surrogatepass_errors(PyObject *self, PyObject *exc)
|
||||
{
|
||||
return PyCodec_SurrogateErrors(exc);
|
||||
return PyCodec_SurrogatePassErrors(exc);
|
||||
}
|
||||
|
||||
static PyObject *utf8b_errors(PyObject *self, PyObject *exc)
|
||||
|
@ -993,10 +993,10 @@ static int _PyCodecRegistry_Init(void)
|
|||
}
|
||||
},
|
||||
{
|
||||
"surrogates",
|
||||
"surrogatepass",
|
||||
{
|
||||
"surrogates",
|
||||
surrogates_errors,
|
||||
"surrogatepass",
|
||||
surrogatepass_errors,
|
||||
METH_O
|
||||
}
|
||||
},
|
||||
|
|
|
@ -314,7 +314,7 @@ w_object(PyObject *v, WFILE *p)
|
|||
PyObject *utf8;
|
||||
utf8 = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(v),
|
||||
PyUnicode_GET_SIZE(v),
|
||||
"surrogates");
|
||||
"surrogatepass");
|
||||
if (utf8 == NULL) {
|
||||
p->depth--;
|
||||
p->error = WFERR_UNMARSHALLABLE;
|
||||
|
@ -809,7 +809,7 @@ r_object(RFILE *p)
|
|||
retval = NULL;
|
||||
break;
|
||||
}
|
||||
v = PyUnicode_DecodeUTF8(buffer, n, "surrogates");
|
||||
v = PyUnicode_DecodeUTF8(buffer, n, "surrogatepass");
|
||||
PyMem_DEL(buffer);
|
||||
retval = v;
|
||||
break;
|
||||
|
|
Loading…
Reference in New Issue