Issue #13916: Disallowed the surrogatepass error handler for non UTF-*
encodings.
This commit is contained in:
parent
8e4efbe115
commit
88d8fb6af6
|
@ -2807,6 +2807,9 @@ class CodePageTest(unittest.TestCase):
|
|||
('[\u20ac]', 'replace', b'[?]'),
|
||||
('[\xff]', 'backslashreplace', b'[\\xff]'),
|
||||
('[\xff]', 'xmlcharrefreplace', b'[ÿ]'),
|
||||
('\udcff', 'strict', None),
|
||||
('[\udcff]', 'surrogateescape', b'[\xff]'),
|
||||
('[\udcff]', 'surrogatepass', None),
|
||||
))
|
||||
self.check_decode(932, (
|
||||
(b'abc', 'strict', 'abc'),
|
||||
|
@ -2816,6 +2819,7 @@ class CodePageTest(unittest.TestCase):
|
|||
(b'[\xff]', 'ignore', '[]'),
|
||||
(b'[\xff]', 'replace', '[\ufffd]'),
|
||||
(b'[\xff]', 'surrogateescape', '[\udcff]'),
|
||||
(b'[\xff]', 'surrogatepass', None),
|
||||
(b'\x81\x00abc', 'strict', None),
|
||||
(b'\x81\x00abc', 'ignore', '\x00abc'),
|
||||
(b'\x81\x00abc', 'replace', '\ufffd\x00abc'),
|
||||
|
@ -2826,14 +2830,23 @@ class CodePageTest(unittest.TestCase):
|
|||
('abc', 'strict', b'abc'),
|
||||
('\xe9\u20ac', 'strict', b'\xe9\x80'),
|
||||
('\xff', 'strict', b'\xff'),
|
||||
# test error handlers
|
||||
('\u0141', 'strict', None),
|
||||
('\u0141', 'ignore', b''),
|
||||
('\u0141', 'replace', b'L'),
|
||||
('\udc98', 'surrogateescape', b'\x98'),
|
||||
('\udc98', 'surrogatepass', None),
|
||||
))
|
||||
self.check_decode(1252, (
|
||||
(b'abc', 'strict', 'abc'),
|
||||
(b'\xe9\x80', 'strict', '\xe9\u20ac'),
|
||||
(b'\xff', 'strict', '\xff'),
|
||||
# invalid bytes
|
||||
(b'[\x98]', 'strict', None),
|
||||
(b'[\x98]', 'ignore', '[]'),
|
||||
(b'[\x98]', 'replace', '[\ufffd]'),
|
||||
(b'[\x98]', 'surrogateescape', '[\udc98]'),
|
||||
(b'[\x98]', 'surrogatepass', None),
|
||||
))
|
||||
|
||||
def test_cp_utf7(self):
|
||||
|
|
|
@ -84,6 +84,9 @@ Core and Builtins
|
|||
Library
|
||||
-------
|
||||
|
||||
- Issue #13916: Disallowed the surrogatepass error handler for non UTF-*
|
||||
encodings.
|
||||
|
||||
- Issue #20998: Fixed re.fullmatch() of repeated single character pattern
|
||||
with ignore case. Original patch by Matthew Barnett.
|
||||
|
||||
|
|
|
@ -901,6 +901,7 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
|
|||
}
|
||||
}
|
||||
|
||||
#define ENC_UNKNOWN -1
|
||||
#define ENC_UTF8 0
|
||||
#define ENC_UTF16BE 1
|
||||
#define ENC_UTF16LE 2
|
||||
|
@ -916,7 +917,11 @@ get_standard_encoding(const char *encoding, int *bytelength)
|
|||
encoding += 3;
|
||||
if (*encoding == '-' || *encoding == '_' )
|
||||
encoding++;
|
||||
if (encoding[0] == '1' && encoding[1] == '6') {
|
||||
if (encoding[0] == '8' && encoding[1] == '\0') {
|
||||
*bytelength = 3;
|
||||
return ENC_UTF8;
|
||||
}
|
||||
else if (encoding[0] == '1' && encoding[1] == '6') {
|
||||
encoding += 2;
|
||||
*bytelength = 2;
|
||||
if (*encoding == '\0') {
|
||||
|
@ -955,9 +960,7 @@ get_standard_encoding(const char *encoding, int *bytelength)
|
|||
}
|
||||
}
|
||||
}
|
||||
/* utf-8 */
|
||||
*bytelength = 3;
|
||||
return ENC_UTF8;
|
||||
return ENC_UNKNOWN;
|
||||
}
|
||||
|
||||
/* This handler is declared static until someone demonstrates
|
||||
|
@ -994,6 +997,12 @@ PyCodec_SurrogatePassErrors(PyObject *exc)
|
|||
}
|
||||
code = get_standard_encoding(encoding, &bytelength);
|
||||
Py_DECREF(encode);
|
||||
if (code == ENC_UNKNOWN) {
|
||||
/* Not supported, fail with original exception */
|
||||
PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
|
||||
Py_DECREF(object);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
res = PyBytes_FromStringAndSize(NULL, bytelength*(end-start));
|
||||
if (!res) {
|
||||
|
@ -1068,6 +1077,12 @@ PyCodec_SurrogatePassErrors(PyObject *exc)
|
|||
}
|
||||
code = get_standard_encoding(encoding, &bytelength);
|
||||
Py_DECREF(encode);
|
||||
if (code == ENC_UNKNOWN) {
|
||||
/* Not supported, fail with original exception */
|
||||
PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
|
||||
Py_DECREF(object);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Try decoding a single surrogate character. If
|
||||
there are more, let the codec call us again. */
|
||||
|
|
Loading…
Reference in New Issue