Write tests for invalid characters (U+00110000)
Test the following functions: * codecs.raw_unicode_escape_decode() * PyUnicode_FromWideChar() * PyUnicode_FromUnicode() * "unicode_internal" and "unicode_escape" decoders
This commit is contained in:
parent
db6238964d
commit
e3b47152a4
|
@ -1034,6 +1034,16 @@ class UnicodeInternalTest(unittest.TestCase):
|
|||
'deprecated', DeprecationWarning)):
|
||||
self.assertRaises(UnicodeDecodeError, internal.decode,
|
||||
"unicode_internal")
|
||||
if sys.byteorder == "little":
|
||||
invalid = b"\x00\x00\x11\x00"
|
||||
else:
|
||||
invalid = b"\x00\x11\x00\x00"
|
||||
with support.check_warnings():
|
||||
self.assertRaises(UnicodeDecodeError,
|
||||
invalid.decode, "unicode_internal")
|
||||
with support.check_warnings():
|
||||
self.assertEqual(invalid.decode("unicode_internal", "replace"),
|
||||
'\ufffd')
|
||||
|
||||
@unittest.skipUnless(SIZEOF_WCHAR_T == 4, 'specific to 32-bit wchar_t')
|
||||
def test_decode_error_attributes(self):
|
||||
|
@ -1729,6 +1739,12 @@ class TypesTest(unittest.TestCase):
|
|||
self.assertEqual(codecs.raw_unicode_escape_decode(r"\u1234"), ("\u1234", 6))
|
||||
self.assertEqual(codecs.raw_unicode_escape_decode(br"\u1234"), ("\u1234", 6))
|
||||
|
||||
self.assertRaises(UnicodeDecodeError, codecs.unicode_escape_decode, br"\U00110000")
|
||||
self.assertEqual(codecs.unicode_escape_decode(r"\U00110000", "replace"), ("\ufffd", 10))
|
||||
|
||||
self.assertRaises(UnicodeDecodeError, codecs.raw_unicode_escape_decode, br"\U00110000")
|
||||
self.assertEqual(codecs.raw_unicode_escape_decode(r"\U00110000", "replace"), ("\ufffd", 10))
|
||||
|
||||
class SurrogateEscapeTest(unittest.TestCase):
|
||||
|
||||
def test_utf8(self):
|
||||
|
|
|
@ -1409,6 +1409,7 @@ test_widechar(PyObject *self)
|
|||
#if defined(SIZEOF_WCHAR_T) && (SIZEOF_WCHAR_T == 4)
|
||||
const wchar_t wtext[2] = {(wchar_t)0x10ABCDu};
|
||||
size_t wtextlen = 1;
|
||||
const wchar_t invalid[1] = {(wchar_t)0x110000u};
|
||||
#else
|
||||
const wchar_t wtext[3] = {(wchar_t)0xDBEAu, (wchar_t)0xDFCDu};
|
||||
size_t wtextlen = 2;
|
||||
|
@ -1444,6 +1445,23 @@ test_widechar(PyObject *self)
|
|||
|
||||
Py_DECREF(wide);
|
||||
Py_DECREF(utf8);
|
||||
|
||||
#if defined(SIZEOF_WCHAR_T) && (SIZEOF_WCHAR_T == 4)
|
||||
wide = PyUnicode_FromWideChar(invalid, 1);
|
||||
if (wide == NULL)
|
||||
PyErr_Clear();
|
||||
else
|
||||
return raiseTestError("test_widechar",
|
||||
"PyUnicode_FromWideChar(L\"\\U00110000\", 1) didn't fail");
|
||||
|
||||
wide = PyUnicode_FromUnicode(invalid, 1);
|
||||
if (wide == NULL)
|
||||
PyErr_Clear();
|
||||
else
|
||||
return raiseTestError("test_widechar",
|
||||
"PyUnicode_FromUnicode(L\"\\U00110000\", 1) didn't fail");
|
||||
#endif
|
||||
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue