bpo-36297: remove "unicode_internal" codec (GH-12342)

This commit is contained in:
Inada Naoki 2019-03-18 15:44:11 +09:00 committed by GitHub
parent 6fb544d8bc
commit 6a16b18224
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 40 additions and 529 deletions

View File

@ -1316,16 +1316,10 @@ encodings.
| | | code actually uses UTF-8 |
| | | by default. |
+--------------------+---------+---------------------------+
| unicode_internal | | Return the internal |
| | | representation of the |
| | | operand. Stateful codecs |
| | | are not supported. |
| | | |
| | | .. deprecated:: 3.3 |
| | | This representation is |
| | | obsoleted by |
| | | :pep:`393`. |
+--------------------+---------+---------------------------+
.. versionchanged:: 3.8
"unicode_internal" codec is removed.
.. _binary-transforms:

View File

@ -573,6 +573,9 @@ The following features and APIs have been removed from Python 3.8:
* Removed the ``doctype()`` method of :class:`~xml.etree.ElementTree.XMLParser`.
(Contributed by Serhiy Storchaka in :issue:`29209`.)
* "unicode_internal" codec is removed.
(Contributed by Inada Naoki in :issue:`36297`.)
Porting to Python 3.8
=====================

View File

@ -896,15 +896,6 @@ PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape(
Py_ssize_t length /* Number of Py_UNICODE chars to encode */
) Py_DEPRECATED(3.3);
/* --- Unicode Internal Codec --------------------------------------------- */
/* Only for internal use in _codecsmodule.c */
PyObject *_PyUnicode_DecodeUnicodeInternal(
const char *string,
Py_ssize_t length,
const char *errors
);
/* --- Latin-1 Codecs ----------------------------------------------------- */
PyAPI_FUNC(PyObject*) _PyUnicode_AsLatin1String(

View File

@ -1,45 +0,0 @@
""" Python 'unicode-internal' Codec
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""
import codecs
### Codec APIs
class Codec(codecs.Codec):
# Note: Binding these as C functions will result in the class not
# converting them to methods. This is intended.
encode = codecs.unicode_internal_encode
decode = codecs.unicode_internal_decode
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.unicode_internal_encode(input, self.errors)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.unicode_internal_decode(input, self.errors)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='unicode-internal',
encode=Codec.encode,
decode=Codec.decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamwriter=StreamWriter,
streamreader=StreamReader,
)

View File

@ -211,42 +211,6 @@ class CodecCallbackTest(unittest.TestCase):
charmap[ord("?")] = "XYZ" # wrong type in mapping
self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap)
def test_decodeunicodeinternal(self):
with test.support.check_warnings(('unicode_internal codec has been '
'deprecated', DeprecationWarning)):
self.assertRaises(
UnicodeDecodeError,
b"\x00\x00\x00\x00\x00".decode,
"unicode-internal",
)
if len('\0'.encode('unicode-internal')) == 4:
def handler_unicodeinternal(exc):
if not isinstance(exc, UnicodeDecodeError):
raise TypeError("don't know how to handle %r" % exc)
return ("\x01", 1)
self.assertEqual(
b"\x00\x00\x00\x00\x00".decode("unicode-internal", "ignore"),
"\u0000"
)
self.assertEqual(
b"\x00\x00\x00\x00\x00".decode("unicode-internal", "replace"),
"\u0000\ufffd"
)
self.assertEqual(
b"\x00\x00\x00\x00\x00".decode("unicode-internal", "backslashreplace"),
"\u0000\\x00"
)
codecs.register_error("test.hui", handler_unicodeinternal)
self.assertEqual(
b"\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui"),
"\u0000\u0001\u0000"
)
def test_callbacks(self):
def handler1(exc):
r = range(exc.start, exc.end)
@ -794,16 +758,13 @@ class CodecCallbackTest(unittest.TestCase):
("ascii", b"\xff"),
("utf-8", b"\xff"),
("utf-7", b"+x-"),
("unicode-internal", b"\x00"),
):
with test.support.check_warnings():
# unicode-internal has been deprecated
self.assertRaises(
TypeError,
bytes.decode,
enc,
"test.badhandler"
)
self.assertRaises(
TypeError,
bytes.decode,
enc,
"test.badhandler"
)
def test_lookup(self):
self.assertEqual(codecs.strict_errors, codecs.lookup_error("strict"))
@ -1013,7 +974,6 @@ class CodecCallbackTest(unittest.TestCase):
("utf-32", b"\xff"),
("unicode-escape", b"\\u123g"),
("raw-unicode-escape", b"\\u123g"),
("unicode-internal", b"\xff"),
]
def replacing(exc):
@ -1024,11 +984,9 @@ class CodecCallbackTest(unittest.TestCase):
raise TypeError("don't know how to handle %r" % exc)
codecs.register_error("test.replacing", replacing)
with test.support.check_warnings():
# unicode-internal has been deprecated
for (encoding, data) in baddata:
with self.assertRaises(TypeError):
data.decode(encoding, "test.replacing")
for (encoding, data) in baddata:
with self.assertRaises(TypeError):
data.decode(encoding, "test.replacing")
def mutating(exc):
if isinstance(exc, UnicodeDecodeError):
@ -1039,10 +997,8 @@ class CodecCallbackTest(unittest.TestCase):
codecs.register_error("test.mutating", mutating)
# If the decoder doesn't pick up the modified input the following
# will lead to an endless loop
with test.support.check_warnings():
# unicode-internal has been deprecated
for (encoding, data) in baddata:
self.assertEqual(data.decode(encoding, "test.mutating"), "\u4242")
for (encoding, data) in baddata:
self.assertEqual(data.decode(encoding, "test.mutating"), "\u4242")
# issue32583
def test_crashing_decode_handler(self):

View File

@ -1239,16 +1239,6 @@ class EscapeDecodeTest(unittest.TestCase):
self.assertEqual(decode(br"[\x0]\x0", "replace"), (b"[?]?", 8))
class RecodingTest(unittest.TestCase):
def test_recoding(self):
f = io.BytesIO()
with codecs.EncodedFile(f, "unicode_internal", "utf-8") as f2:
f2.write("a")
# Python used to crash on this at exit because of a refcount
# bug in _codecsmodule.c
self.assertTrue(f.closed)
# From RFC 3492
punycode_testcases = [
# A Arabic (Egyptian):
@ -1378,87 +1368,6 @@ class PunycodeTest(unittest.TestCase):
self.assertEqual(uni, puny.decode("punycode"))
class UnicodeInternalTest(unittest.TestCase):
@unittest.skipUnless(SIZEOF_WCHAR_T == 4, 'specific to 32-bit wchar_t')
def test_bug1251300(self):
# Decoding with unicode_internal used to not correctly handle "code
# points" above 0x10ffff on UCS-4 builds.
ok = [
(b"\x00\x10\xff\xff", "\U0010ffff"),
(b"\x00\x00\x01\x01", "\U00000101"),
(b"", ""),
]
not_ok = [
b"\x7f\xff\xff\xff",
b"\x80\x00\x00\x00",
b"\x81\x00\x00\x00",
b"\x00",
b"\x00\x00\x00\x00\x00",
]
for internal, uni in ok:
if sys.byteorder == "little":
internal = bytes(reversed(internal))
with support.check_warnings():
self.assertEqual(uni, internal.decode("unicode_internal"))
for internal in not_ok:
if sys.byteorder == "little":
internal = bytes(reversed(internal))
with support.check_warnings(('unicode_internal codec has been '
'deprecated', DeprecationWarning)):
self.assertRaises(UnicodeDecodeError, internal.decode,
"unicode_internal")
if sys.byteorder == "little":
invalid = b"\x00\x00\x11\x00"
invalid_backslashreplace = r"\x00\x00\x11\x00"
else:
invalid = b"\x00\x11\x00\x00"
invalid_backslashreplace = r"\x00\x11\x00\x00"
with support.check_warnings():
self.assertRaises(UnicodeDecodeError,
invalid.decode, "unicode_internal")
with support.check_warnings():
self.assertEqual(invalid.decode("unicode_internal", "replace"),
'\ufffd')
with support.check_warnings():
self.assertEqual(invalid.decode("unicode_internal", "backslashreplace"),
invalid_backslashreplace)
@unittest.skipUnless(SIZEOF_WCHAR_T == 4, 'specific to 32-bit wchar_t')
def test_decode_error_attributes(self):
try:
with support.check_warnings(('unicode_internal codec has been '
'deprecated', DeprecationWarning)):
b"\x00\x00\x00\x00\x00\x11\x11\x00".decode("unicode_internal")
except UnicodeDecodeError as ex:
self.assertEqual("unicode_internal", ex.encoding)
self.assertEqual(b"\x00\x00\x00\x00\x00\x11\x11\x00", ex.object)
self.assertEqual(4, ex.start)
self.assertEqual(8, ex.end)
else:
self.fail()
@unittest.skipUnless(SIZEOF_WCHAR_T == 4, 'specific to 32-bit wchar_t')
def test_decode_callback(self):
codecs.register_error("UnicodeInternalTest", codecs.ignore_errors)
decoder = codecs.getdecoder("unicode_internal")
with support.check_warnings(('unicode_internal codec has been '
'deprecated', DeprecationWarning)):
ab = "ab".encode("unicode_internal").decode()
ignored = decoder(bytes("%s\x22\x22\x22\x22%s" % (ab[:4], ab[4:]),
"ascii"),
"UnicodeInternalTest")
self.assertEqual(("ab", 12), ignored)
def test_encode_length(self):
with support.check_warnings(('unicode_internal codec has been '
'deprecated', DeprecationWarning)):
# Issue 3739
encoder = codecs.getencoder("unicode_internal")
self.assertEqual(encoder("a")[1], 1)
self.assertEqual(encoder("\xe9\u0142")[1], 2)
self.assertEqual(codecs.escape_encode(br'\x00')[1], 4)
# From http://www.gnu.org/software/libidn/draft-josefsson-idn-test-vectors.html
nameprep_tests = [
# 3.1 Map to nothing.
@ -1949,7 +1858,6 @@ all_unicode_encodings = [
"shift_jisx0213",
"tis_620",
"unicode_escape",
"unicode_internal",
"utf_16",
"utf_16_be",
"utf_16_le",
@ -1969,7 +1877,6 @@ if hasattr(codecs, "oem_encode"):
# The following encodings don't work in stateful mode
broken_unicode_with_stateful = [
"punycode",
"unicode_internal"
]
@ -1984,12 +1891,10 @@ class BasicUnicodeTest(unittest.TestCase, MixInCheckStateHandling):
name = "latin_1"
self.assertEqual(encoding.replace("_", "-"), name.replace("_", "-"))
with support.check_warnings():
# unicode-internal has been deprecated
(b, size) = codecs.getencoder(encoding)(s)
self.assertEqual(size, len(s), "encoding=%r" % encoding)
(chars, size) = codecs.getdecoder(encoding)(b)
self.assertEqual(chars, s, "encoding=%r" % encoding)
(b, size) = codecs.getencoder(encoding)(s)
self.assertEqual(size, len(s), "encoding=%r" % encoding)
(chars, size) = codecs.getdecoder(encoding)(b)
self.assertEqual(chars, s, "encoding=%r" % encoding)
if encoding not in broken_unicode_with_stateful:
# check stream reader/writer
@ -2116,9 +2021,7 @@ class BasicUnicodeTest(unittest.TestCase, MixInCheckStateHandling):
def test_bad_encode_args(self):
for encoding in all_unicode_encodings:
encoder = codecs.getencoder(encoding)
with support.check_warnings():
# unicode-internal has been deprecated
self.assertRaises(TypeError, encoder)
self.assertRaises(TypeError, encoder)
def test_encoding_map_type_initialized(self):
from encodings import cp1140

View File

@ -2104,12 +2104,8 @@ class UnicodeTest(string_tests.CommonTest,
u = chr(c)
for encoding in ('utf-7', 'utf-8', 'utf-16', 'utf-16-le',
'utf-16-be', 'raw_unicode_escape',
'unicode_escape', 'unicode_internal'):
with warnings.catch_warnings():
# unicode-internal has been deprecated
warnings.simplefilter("ignore", DeprecationWarning)
self.assertEqual(str(u.encode(encoding),encoding), u)
'unicode_escape'):
self.assertEqual(str(u.encode(encoding),encoding), u)
# Roundtrip safety for BMP (just the first 256 chars)
for c in range(256):
@ -2125,13 +2121,9 @@ class UnicodeTest(string_tests.CommonTest,
# Roundtrip safety for non-BMP (just a few chars)
with warnings.catch_warnings():
# unicode-internal has been deprecated
warnings.simplefilter("ignore", DeprecationWarning)
u = '\U00010001\U00020002\U00030003\U00040004\U00050005'
for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
'raw_unicode_escape',
'unicode_escape', 'unicode_internal'):
'raw_unicode_escape', 'unicode_escape'):
self.assertEqual(str(u.encode(encoding),encoding), u)
# UTF-8 must be roundtrip safe for all code points
@ -2349,22 +2341,22 @@ class UnicodeTest(string_tests.CommonTest,
self.assertEqual(args[0], text)
self.assertEqual(len(args), 1)
@support.cpython_only
def test_resize(self):
from _testcapi import getargs_u
for length in range(1, 100, 7):
# generate a fresh string (refcount=1)
text = 'a' * length + 'b'
with support.check_warnings(('unicode_internal codec has been '
'deprecated', DeprecationWarning)):
# fill wstr internal field
abc = text.encode('unicode_internal')
self.assertEqual(abc.decode('unicode_internal'), text)
# fill wstr internal field
abc = getargs_u(text)
self.assertEqual(abc, text)
# resize text: wstr field must be cleared and then recomputed
text += 'c'
abcdef = text.encode('unicode_internal')
self.assertNotEqual(abc, abcdef)
self.assertEqual(abcdef.decode('unicode_internal'), text)
# resize text: wstr field must be cleared and then recomputed
text += 'c'
abcdef = getargs_u(text)
self.assertNotEqual(abc, abcdef)
self.assertEqual(abcdef, text)
def test_compare(self):
# Issue #17615

View File

@ -0,0 +1,2 @@
"unicode_internal" codec is removed. It was deprecated since Python 3.3.
Patch by Inada Naoki.

View File

@ -21,8 +21,7 @@
(Unicode object, bytes consumed)
These <encoding>s are available: utf_8, unicode_escape,
raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit),
mbcs (on win32).
raw_unicode_escape, latin_1, ascii (7-bit), mbcs (on win32).
Written by Marc-Andre Lemburg (mal@lemburg.com).
@ -250,38 +249,6 @@ _codecs_escape_encode_impl(PyObject *module, PyObject *data,
}
/* --- Decoder ------------------------------------------------------------ */
/*[clinic input]
_codecs.unicode_internal_decode
obj: object
errors: str(accept={str, NoneType}) = NULL
/
[clinic start generated code]*/
static PyObject *
_codecs_unicode_internal_decode_impl(PyObject *module, PyObject *obj,
const char *errors)
/*[clinic end generated code: output=edbfe175e09eff9a input=8d57930aeda170c6]*/
{
if (PyUnicode_Check(obj)) {
if (PyUnicode_READY(obj) < 0)
return NULL;
Py_INCREF(obj);
return codec_tuple(obj, PyUnicode_GET_LENGTH(obj));
}
else {
Py_buffer view;
PyObject *result;
if (PyObject_GetBuffer(obj, &view, PyBUF_SIMPLE) != 0)
return NULL;
result = codec_tuple(
_PyUnicode_DecodeUnicodeInternal(view.buf, view.len, errors),
view.len);
PyBuffer_Release(&view);
return result;
}
}
/*[clinic input]
_codecs.utf_7_decode
data: Py_buffer
@ -686,51 +653,6 @@ _codecs_readbuffer_encode_impl(PyObject *module, Py_buffer *data,
return codec_tuple(result, data->len);
}
/*[clinic input]
_codecs.unicode_internal_encode
obj: object
errors: str(accept={str, NoneType}) = NULL
/
[clinic start generated code]*/
static PyObject *
_codecs_unicode_internal_encode_impl(PyObject *module, PyObject *obj,
const char *errors)
/*[clinic end generated code: output=a72507dde4ea558f input=8628f0280cf5ba61]*/
{
if (PyErr_WarnEx(PyExc_DeprecationWarning,
"unicode_internal codec has been deprecated",
1))
return NULL;
if (PyUnicode_Check(obj)) {
Py_UNICODE *u;
Py_ssize_t len, size;
if (PyUnicode_READY(obj) < 0)
return NULL;
u = PyUnicode_AsUnicodeAndSize(obj, &len);
if (u == NULL)
return NULL;
if ((size_t)len > (size_t)PY_SSIZE_T_MAX / sizeof(Py_UNICODE))
return PyErr_NoMemory();
size = len * sizeof(Py_UNICODE);
return codec_tuple(PyBytes_FromStringAndSize((const char*)u, size),
PyUnicode_GET_LENGTH(obj));
}
else {
Py_buffer view;
PyObject *result;
if (PyObject_GetBuffer(obj, &view, PyBUF_SIMPLE) != 0)
return NULL;
result = codec_tuple(PyBytes_FromStringAndSize(view.buf, view.len),
view.len);
PyBuffer_Release(&view);
return result;
}
}
/*[clinic input]
_codecs.utf_7_encode
str: unicode
@ -1095,8 +1017,6 @@ static PyMethodDef _codecs_functions[] = {
_CODECS_UTF_32_EX_DECODE_METHODDEF
_CODECS_UNICODE_ESCAPE_ENCODE_METHODDEF
_CODECS_UNICODE_ESCAPE_DECODE_METHODDEF
_CODECS_UNICODE_INTERNAL_ENCODE_METHODDEF
_CODECS_UNICODE_INTERNAL_DECODE_METHODDEF
_CODECS_RAW_UNICODE_ESCAPE_ENCODE_METHODDEF
_CODECS_RAW_UNICODE_ESCAPE_DECODE_METHODDEF
_CODECS_LATIN_1_ENCODE_METHODDEF

View File

@ -370,57 +370,6 @@ exit:
return return_value;
}
PyDoc_STRVAR(_codecs_unicode_internal_decode__doc__,
"unicode_internal_decode($module, obj, errors=None, /)\n"
"--\n"
"\n");
#define _CODECS_UNICODE_INTERNAL_DECODE_METHODDEF \
{"unicode_internal_decode", (PyCFunction)(void(*)(void))_codecs_unicode_internal_decode, METH_FASTCALL, _codecs_unicode_internal_decode__doc__},
static PyObject *
_codecs_unicode_internal_decode_impl(PyObject *module, PyObject *obj,
const char *errors);
static PyObject *
_codecs_unicode_internal_decode(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
{
PyObject *return_value = NULL;
PyObject *obj;
const char *errors = NULL;
if (!_PyArg_CheckPositional("unicode_internal_decode", nargs, 1, 2)) {
goto exit;
}
obj = args[0];
if (nargs < 2) {
goto skip_optional;
}
if (args[1] == Py_None) {
errors = NULL;
}
else if (PyUnicode_Check(args[1])) {
Py_ssize_t errors_length;
errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length);
if (errors == NULL) {
goto exit;
}
if (strlen(errors) != (size_t)errors_length) {
PyErr_SetString(PyExc_ValueError, "embedded null character");
goto exit;
}
}
else {
_PyArg_BadArgument("unicode_internal_decode", 2, "str or None", args[1]);
goto exit;
}
skip_optional:
return_value = _codecs_unicode_internal_decode_impl(module, obj, errors);
exit:
return return_value;
}
PyDoc_STRVAR(_codecs_utf_7_decode__doc__,
"utf_7_decode($module, data, errors=None, final=False, /)\n"
"--\n"
@ -1853,57 +1802,6 @@ exit:
return return_value;
}
PyDoc_STRVAR(_codecs_unicode_internal_encode__doc__,
"unicode_internal_encode($module, obj, errors=None, /)\n"
"--\n"
"\n");
#define _CODECS_UNICODE_INTERNAL_ENCODE_METHODDEF \
{"unicode_internal_encode", (PyCFunction)(void(*)(void))_codecs_unicode_internal_encode, METH_FASTCALL, _codecs_unicode_internal_encode__doc__},
static PyObject *
_codecs_unicode_internal_encode_impl(PyObject *module, PyObject *obj,
const char *errors);
static PyObject *
_codecs_unicode_internal_encode(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
{
PyObject *return_value = NULL;
PyObject *obj;
const char *errors = NULL;
if (!_PyArg_CheckPositional("unicode_internal_encode", nargs, 1, 2)) {
goto exit;
}
obj = args[0];
if (nargs < 2) {
goto skip_optional;
}
if (args[1] == Py_None) {
errors = NULL;
}
else if (PyUnicode_Check(args[1])) {
Py_ssize_t errors_length;
errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length);
if (errors == NULL) {
goto exit;
}
if (strlen(errors) != (size_t)errors_length) {
PyErr_SetString(PyExc_ValueError, "embedded null character");
goto exit;
}
}
else {
_PyArg_BadArgument("unicode_internal_encode", 2, "str or None", args[1]);
goto exit;
}
skip_optional:
return_value = _codecs_unicode_internal_encode_impl(module, obj, errors);
exit:
return return_value;
}
PyDoc_STRVAR(_codecs_utf_7_encode__doc__,
"utf_7_encode($module, str, errors=None, /)\n"
"--\n"
@ -3024,4 +2922,4 @@ exit:
#ifndef _CODECS_CODE_PAGE_ENCODE_METHODDEF
#define _CODECS_CODE_PAGE_ENCODE_METHODDEF
#endif /* !defined(_CODECS_CODE_PAGE_ENCODE_METHODDEF) */
/*[clinic end generated code: output=02bd0f0cf9a28150 input=a9049054013a1b77]*/
/*[clinic end generated code: output=da3c47709a55a05e input=a9049054013a1b77]*/

View File

@ -6551,108 +6551,6 @@ PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s,
return result;
}
/* --- Unicode Internal Codec ------------------------------------------- */
PyObject *
_PyUnicode_DecodeUnicodeInternal(const char *s,
Py_ssize_t size,
const char *errors)
{
const char *starts = s;
Py_ssize_t startinpos;
Py_ssize_t endinpos;
_PyUnicodeWriter writer;
const char *end;
const char *reason;
PyObject *errorHandler = NULL;
PyObject *exc = NULL;
if (PyErr_WarnEx(PyExc_DeprecationWarning,
"unicode_internal codec has been deprecated",
1))
return NULL;
if (size < 0) {
PyErr_BadInternalCall();
return NULL;
}
if (size == 0)
_Py_RETURN_UNICODE_EMPTY();
_PyUnicodeWriter_Init(&writer);
if (size / Py_UNICODE_SIZE > PY_SSIZE_T_MAX - 1) {
PyErr_NoMemory();
goto onError;
}
writer.min_length = (size + (Py_UNICODE_SIZE - 1)) / Py_UNICODE_SIZE;
end = s + size;
while (s < end) {
Py_UNICODE uch;
Py_UCS4 ch;
if (end - s < Py_UNICODE_SIZE) {
endinpos = end-starts;
reason = "truncated input";
goto error;
}
/* We copy the raw representation one byte at a time because the
pointer may be unaligned (see test_codeccallbacks). */
((char *) &uch)[0] = s[0];
((char *) &uch)[1] = s[1];
#ifdef Py_UNICODE_WIDE
((char *) &uch)[2] = s[2];
((char *) &uch)[3] = s[3];
#endif
ch = uch;
#ifdef Py_UNICODE_WIDE
/* We have to sanity check the raw data, otherwise doom looms for
some malformed UCS-4 data. */
if (ch > 0x10ffff) {
endinpos = s - starts + Py_UNICODE_SIZE;
reason = "illegal code point (> 0x10FFFF)";
goto error;
}
#endif
s += Py_UNICODE_SIZE;
#ifndef Py_UNICODE_WIDE
if (Py_UNICODE_IS_HIGH_SURROGATE(ch) && end - s >= Py_UNICODE_SIZE)
{
Py_UNICODE uch2;
((char *) &uch2)[0] = s[0];
((char *) &uch2)[1] = s[1];
if (Py_UNICODE_IS_LOW_SURROGATE(uch2))
{
ch = Py_UNICODE_JOIN_SURROGATES(uch, uch2);
s += Py_UNICODE_SIZE;
}
}
#endif
if (_PyUnicodeWriter_WriteCharInline(&writer, ch) < 0)
goto onError;
continue;
error:
startinpos = s - starts;
if (unicode_decode_call_errorhandler_writer(
errors, &errorHandler,
"unicode_internal", reason,
&starts, &end, &startinpos, &endinpos, &exc, &s,
&writer))
goto onError;
}
Py_XDECREF(errorHandler);
Py_XDECREF(exc);
return _PyUnicodeWriter_Finish(&writer);
onError:
_PyUnicodeWriter_Dealloc(&writer);
Py_XDECREF(errorHandler);
Py_XDECREF(exc);
return NULL;
}
/* --- Latin-1 Codec ------------------------------------------------------ */
PyObject *

View File

@ -392,7 +392,6 @@
<Compile Include="encodings\tis_620.py" />
<Compile Include="encodings\undefined.py" />
<Compile Include="encodings\unicode_escape.py" />
<Compile Include="encodings\unicode_internal.py" />
<Compile Include="encodings\utf_16.py" />
<Compile Include="encodings\utf_16_be.py" />
<Compile Include="encodings\utf_16_le.py" />