bpo-36297: remove "unicode_internal" codec (GH-12342)
This commit is contained in:
parent
6fb544d8bc
commit
6a16b18224
|
@ -1316,16 +1316,10 @@ encodings.
|
|||
| | | code actually uses UTF-8 |
|
||||
| | | by default. |
|
||||
+--------------------+---------+---------------------------+
|
||||
| unicode_internal | | Return the internal |
|
||||
| | | representation of the |
|
||||
| | | operand. Stateful codecs |
|
||||
| | | are not supported. |
|
||||
| | | |
|
||||
| | | .. deprecated:: 3.3 |
|
||||
| | | This representation is |
|
||||
| | | obsoleted by |
|
||||
| | | :pep:`393`. |
|
||||
+--------------------+---------+---------------------------+
|
||||
|
||||
.. versionchanged:: 3.8
|
||||
"unicode_internal" codec is removed.
|
||||
|
||||
|
||||
.. _binary-transforms:
|
||||
|
||||
|
|
|
@ -573,6 +573,9 @@ The following features and APIs have been removed from Python 3.8:
|
|||
* Removed the ``doctype()`` method of :class:`~xml.etree.ElementTree.XMLParser`.
|
||||
(Contributed by Serhiy Storchaka in :issue:`29209`.)
|
||||
|
||||
* "unicode_internal" codec is removed.
|
||||
(Contributed by Inada Naoki in :issue:`36297`.)
|
||||
|
||||
|
||||
Porting to Python 3.8
|
||||
=====================
|
||||
|
|
|
@ -896,15 +896,6 @@ PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape(
|
|||
Py_ssize_t length /* Number of Py_UNICODE chars to encode */
|
||||
) Py_DEPRECATED(3.3);
|
||||
|
||||
/* --- Unicode Internal Codec --------------------------------------------- */
|
||||
|
||||
/* Only for internal use in _codecsmodule.c */
|
||||
PyObject *_PyUnicode_DecodeUnicodeInternal(
|
||||
const char *string,
|
||||
Py_ssize_t length,
|
||||
const char *errors
|
||||
);
|
||||
|
||||
/* --- Latin-1 Codecs ----------------------------------------------------- */
|
||||
|
||||
PyAPI_FUNC(PyObject*) _PyUnicode_AsLatin1String(
|
||||
|
|
|
@ -1,45 +0,0 @@
|
|||
""" Python 'unicode-internal' Codec
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
# Note: Binding these as C functions will result in the class not
|
||||
# converting them to methods. This is intended.
|
||||
encode = codecs.unicode_internal_encode
|
||||
decode = codecs.unicode_internal_decode
|
||||
|
||||
class IncrementalEncoder(codecs.IncrementalEncoder):
|
||||
def encode(self, input, final=False):
|
||||
return codecs.unicode_internal_encode(input, self.errors)[0]
|
||||
|
||||
class IncrementalDecoder(codecs.IncrementalDecoder):
|
||||
def decode(self, input, final=False):
|
||||
return codecs.unicode_internal_decode(input, self.errors)[0]
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
return codecs.CodecInfo(
|
||||
name='unicode-internal',
|
||||
encode=Codec.encode,
|
||||
decode=Codec.decode,
|
||||
incrementalencoder=IncrementalEncoder,
|
||||
incrementaldecoder=IncrementalDecoder,
|
||||
streamwriter=StreamWriter,
|
||||
streamreader=StreamReader,
|
||||
)
|
|
@ -211,42 +211,6 @@ class CodecCallbackTest(unittest.TestCase):
|
|||
charmap[ord("?")] = "XYZ" # wrong type in mapping
|
||||
self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap)
|
||||
|
||||
def test_decodeunicodeinternal(self):
|
||||
with test.support.check_warnings(('unicode_internal codec has been '
|
||||
'deprecated', DeprecationWarning)):
|
||||
self.assertRaises(
|
||||
UnicodeDecodeError,
|
||||
b"\x00\x00\x00\x00\x00".decode,
|
||||
"unicode-internal",
|
||||
)
|
||||
if len('\0'.encode('unicode-internal')) == 4:
|
||||
def handler_unicodeinternal(exc):
|
||||
if not isinstance(exc, UnicodeDecodeError):
|
||||
raise TypeError("don't know how to handle %r" % exc)
|
||||
return ("\x01", 1)
|
||||
|
||||
self.assertEqual(
|
||||
b"\x00\x00\x00\x00\x00".decode("unicode-internal", "ignore"),
|
||||
"\u0000"
|
||||
)
|
||||
|
||||
self.assertEqual(
|
||||
b"\x00\x00\x00\x00\x00".decode("unicode-internal", "replace"),
|
||||
"\u0000\ufffd"
|
||||
)
|
||||
|
||||
self.assertEqual(
|
||||
b"\x00\x00\x00\x00\x00".decode("unicode-internal", "backslashreplace"),
|
||||
"\u0000\\x00"
|
||||
)
|
||||
|
||||
codecs.register_error("test.hui", handler_unicodeinternal)
|
||||
|
||||
self.assertEqual(
|
||||
b"\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui"),
|
||||
"\u0000\u0001\u0000"
|
||||
)
|
||||
|
||||
def test_callbacks(self):
|
||||
def handler1(exc):
|
||||
r = range(exc.start, exc.end)
|
||||
|
@ -794,16 +758,13 @@ class CodecCallbackTest(unittest.TestCase):
|
|||
("ascii", b"\xff"),
|
||||
("utf-8", b"\xff"),
|
||||
("utf-7", b"+x-"),
|
||||
("unicode-internal", b"\x00"),
|
||||
):
|
||||
with test.support.check_warnings():
|
||||
# unicode-internal has been deprecated
|
||||
self.assertRaises(
|
||||
TypeError,
|
||||
bytes.decode,
|
||||
enc,
|
||||
"test.badhandler"
|
||||
)
|
||||
self.assertRaises(
|
||||
TypeError,
|
||||
bytes.decode,
|
||||
enc,
|
||||
"test.badhandler"
|
||||
)
|
||||
|
||||
def test_lookup(self):
|
||||
self.assertEqual(codecs.strict_errors, codecs.lookup_error("strict"))
|
||||
|
@ -1013,7 +974,6 @@ class CodecCallbackTest(unittest.TestCase):
|
|||
("utf-32", b"\xff"),
|
||||
("unicode-escape", b"\\u123g"),
|
||||
("raw-unicode-escape", b"\\u123g"),
|
||||
("unicode-internal", b"\xff"),
|
||||
]
|
||||
|
||||
def replacing(exc):
|
||||
|
@ -1024,11 +984,9 @@ class CodecCallbackTest(unittest.TestCase):
|
|||
raise TypeError("don't know how to handle %r" % exc)
|
||||
codecs.register_error("test.replacing", replacing)
|
||||
|
||||
with test.support.check_warnings():
|
||||
# unicode-internal has been deprecated
|
||||
for (encoding, data) in baddata:
|
||||
with self.assertRaises(TypeError):
|
||||
data.decode(encoding, "test.replacing")
|
||||
for (encoding, data) in baddata:
|
||||
with self.assertRaises(TypeError):
|
||||
data.decode(encoding, "test.replacing")
|
||||
|
||||
def mutating(exc):
|
||||
if isinstance(exc, UnicodeDecodeError):
|
||||
|
@ -1039,10 +997,8 @@ class CodecCallbackTest(unittest.TestCase):
|
|||
codecs.register_error("test.mutating", mutating)
|
||||
# If the decoder doesn't pick up the modified input the following
|
||||
# will lead to an endless loop
|
||||
with test.support.check_warnings():
|
||||
# unicode-internal has been deprecated
|
||||
for (encoding, data) in baddata:
|
||||
self.assertEqual(data.decode(encoding, "test.mutating"), "\u4242")
|
||||
for (encoding, data) in baddata:
|
||||
self.assertEqual(data.decode(encoding, "test.mutating"), "\u4242")
|
||||
|
||||
# issue32583
|
||||
def test_crashing_decode_handler(self):
|
||||
|
|
|
@ -1239,16 +1239,6 @@ class EscapeDecodeTest(unittest.TestCase):
|
|||
self.assertEqual(decode(br"[\x0]\x0", "replace"), (b"[?]?", 8))
|
||||
|
||||
|
||||
class RecodingTest(unittest.TestCase):
|
||||
def test_recoding(self):
|
||||
f = io.BytesIO()
|
||||
with codecs.EncodedFile(f, "unicode_internal", "utf-8") as f2:
|
||||
f2.write("a")
|
||||
# Python used to crash on this at exit because of a refcount
|
||||
# bug in _codecsmodule.c
|
||||
|
||||
self.assertTrue(f.closed)
|
||||
|
||||
# From RFC 3492
|
||||
punycode_testcases = [
|
||||
# A Arabic (Egyptian):
|
||||
|
@ -1378,87 +1368,6 @@ class PunycodeTest(unittest.TestCase):
|
|||
self.assertEqual(uni, puny.decode("punycode"))
|
||||
|
||||
|
||||
class UnicodeInternalTest(unittest.TestCase):
|
||||
@unittest.skipUnless(SIZEOF_WCHAR_T == 4, 'specific to 32-bit wchar_t')
|
||||
def test_bug1251300(self):
|
||||
# Decoding with unicode_internal used to not correctly handle "code
|
||||
# points" above 0x10ffff on UCS-4 builds.
|
||||
ok = [
|
||||
(b"\x00\x10\xff\xff", "\U0010ffff"),
|
||||
(b"\x00\x00\x01\x01", "\U00000101"),
|
||||
(b"", ""),
|
||||
]
|
||||
not_ok = [
|
||||
b"\x7f\xff\xff\xff",
|
||||
b"\x80\x00\x00\x00",
|
||||
b"\x81\x00\x00\x00",
|
||||
b"\x00",
|
||||
b"\x00\x00\x00\x00\x00",
|
||||
]
|
||||
for internal, uni in ok:
|
||||
if sys.byteorder == "little":
|
||||
internal = bytes(reversed(internal))
|
||||
with support.check_warnings():
|
||||
self.assertEqual(uni, internal.decode("unicode_internal"))
|
||||
for internal in not_ok:
|
||||
if sys.byteorder == "little":
|
||||
internal = bytes(reversed(internal))
|
||||
with support.check_warnings(('unicode_internal codec has been '
|
||||
'deprecated', DeprecationWarning)):
|
||||
self.assertRaises(UnicodeDecodeError, internal.decode,
|
||||
"unicode_internal")
|
||||
if sys.byteorder == "little":
|
||||
invalid = b"\x00\x00\x11\x00"
|
||||
invalid_backslashreplace = r"\x00\x00\x11\x00"
|
||||
else:
|
||||
invalid = b"\x00\x11\x00\x00"
|
||||
invalid_backslashreplace = r"\x00\x11\x00\x00"
|
||||
with support.check_warnings():
|
||||
self.assertRaises(UnicodeDecodeError,
|
||||
invalid.decode, "unicode_internal")
|
||||
with support.check_warnings():
|
||||
self.assertEqual(invalid.decode("unicode_internal", "replace"),
|
||||
'\ufffd')
|
||||
with support.check_warnings():
|
||||
self.assertEqual(invalid.decode("unicode_internal", "backslashreplace"),
|
||||
invalid_backslashreplace)
|
||||
|
||||
@unittest.skipUnless(SIZEOF_WCHAR_T == 4, 'specific to 32-bit wchar_t')
|
||||
def test_decode_error_attributes(self):
|
||||
try:
|
||||
with support.check_warnings(('unicode_internal codec has been '
|
||||
'deprecated', DeprecationWarning)):
|
||||
b"\x00\x00\x00\x00\x00\x11\x11\x00".decode("unicode_internal")
|
||||
except UnicodeDecodeError as ex:
|
||||
self.assertEqual("unicode_internal", ex.encoding)
|
||||
self.assertEqual(b"\x00\x00\x00\x00\x00\x11\x11\x00", ex.object)
|
||||
self.assertEqual(4, ex.start)
|
||||
self.assertEqual(8, ex.end)
|
||||
else:
|
||||
self.fail()
|
||||
|
||||
@unittest.skipUnless(SIZEOF_WCHAR_T == 4, 'specific to 32-bit wchar_t')
|
||||
def test_decode_callback(self):
|
||||
codecs.register_error("UnicodeInternalTest", codecs.ignore_errors)
|
||||
decoder = codecs.getdecoder("unicode_internal")
|
||||
with support.check_warnings(('unicode_internal codec has been '
|
||||
'deprecated', DeprecationWarning)):
|
||||
ab = "ab".encode("unicode_internal").decode()
|
||||
ignored = decoder(bytes("%s\x22\x22\x22\x22%s" % (ab[:4], ab[4:]),
|
||||
"ascii"),
|
||||
"UnicodeInternalTest")
|
||||
self.assertEqual(("ab", 12), ignored)
|
||||
|
||||
def test_encode_length(self):
|
||||
with support.check_warnings(('unicode_internal codec has been '
|
||||
'deprecated', DeprecationWarning)):
|
||||
# Issue 3739
|
||||
encoder = codecs.getencoder("unicode_internal")
|
||||
self.assertEqual(encoder("a")[1], 1)
|
||||
self.assertEqual(encoder("\xe9\u0142")[1], 2)
|
||||
|
||||
self.assertEqual(codecs.escape_encode(br'\x00')[1], 4)
|
||||
|
||||
# From http://www.gnu.org/software/libidn/draft-josefsson-idn-test-vectors.html
|
||||
nameprep_tests = [
|
||||
# 3.1 Map to nothing.
|
||||
|
@ -1949,7 +1858,6 @@ all_unicode_encodings = [
|
|||
"shift_jisx0213",
|
||||
"tis_620",
|
||||
"unicode_escape",
|
||||
"unicode_internal",
|
||||
"utf_16",
|
||||
"utf_16_be",
|
||||
"utf_16_le",
|
||||
|
@ -1969,7 +1877,6 @@ if hasattr(codecs, "oem_encode"):
|
|||
# The following encodings don't work in stateful mode
|
||||
broken_unicode_with_stateful = [
|
||||
"punycode",
|
||||
"unicode_internal"
|
||||
]
|
||||
|
||||
|
||||
|
@ -1984,12 +1891,10 @@ class BasicUnicodeTest(unittest.TestCase, MixInCheckStateHandling):
|
|||
name = "latin_1"
|
||||
self.assertEqual(encoding.replace("_", "-"), name.replace("_", "-"))
|
||||
|
||||
with support.check_warnings():
|
||||
# unicode-internal has been deprecated
|
||||
(b, size) = codecs.getencoder(encoding)(s)
|
||||
self.assertEqual(size, len(s), "encoding=%r" % encoding)
|
||||
(chars, size) = codecs.getdecoder(encoding)(b)
|
||||
self.assertEqual(chars, s, "encoding=%r" % encoding)
|
||||
(b, size) = codecs.getencoder(encoding)(s)
|
||||
self.assertEqual(size, len(s), "encoding=%r" % encoding)
|
||||
(chars, size) = codecs.getdecoder(encoding)(b)
|
||||
self.assertEqual(chars, s, "encoding=%r" % encoding)
|
||||
|
||||
if encoding not in broken_unicode_with_stateful:
|
||||
# check stream reader/writer
|
||||
|
@ -2116,9 +2021,7 @@ class BasicUnicodeTest(unittest.TestCase, MixInCheckStateHandling):
|
|||
def test_bad_encode_args(self):
|
||||
for encoding in all_unicode_encodings:
|
||||
encoder = codecs.getencoder(encoding)
|
||||
with support.check_warnings():
|
||||
# unicode-internal has been deprecated
|
||||
self.assertRaises(TypeError, encoder)
|
||||
self.assertRaises(TypeError, encoder)
|
||||
|
||||
def test_encoding_map_type_initialized(self):
|
||||
from encodings import cp1140
|
||||
|
|
|
@ -2104,12 +2104,8 @@ class UnicodeTest(string_tests.CommonTest,
|
|||
u = chr(c)
|
||||
for encoding in ('utf-7', 'utf-8', 'utf-16', 'utf-16-le',
|
||||
'utf-16-be', 'raw_unicode_escape',
|
||||
'unicode_escape', 'unicode_internal'):
|
||||
with warnings.catch_warnings():
|
||||
# unicode-internal has been deprecated
|
||||
warnings.simplefilter("ignore", DeprecationWarning)
|
||||
|
||||
self.assertEqual(str(u.encode(encoding),encoding), u)
|
||||
'unicode_escape'):
|
||||
self.assertEqual(str(u.encode(encoding),encoding), u)
|
||||
|
||||
# Roundtrip safety for BMP (just the first 256 chars)
|
||||
for c in range(256):
|
||||
|
@ -2125,13 +2121,9 @@ class UnicodeTest(string_tests.CommonTest,
|
|||
|
||||
# Roundtrip safety for non-BMP (just a few chars)
|
||||
with warnings.catch_warnings():
|
||||
# unicode-internal has been deprecated
|
||||
warnings.simplefilter("ignore", DeprecationWarning)
|
||||
|
||||
u = '\U00010001\U00020002\U00030003\U00040004\U00050005'
|
||||
for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
|
||||
'raw_unicode_escape',
|
||||
'unicode_escape', 'unicode_internal'):
|
||||
'raw_unicode_escape', 'unicode_escape'):
|
||||
self.assertEqual(str(u.encode(encoding),encoding), u)
|
||||
|
||||
# UTF-8 must be roundtrip safe for all code points
|
||||
|
@ -2349,22 +2341,22 @@ class UnicodeTest(string_tests.CommonTest,
|
|||
self.assertEqual(args[0], text)
|
||||
self.assertEqual(len(args), 1)
|
||||
|
||||
@support.cpython_only
|
||||
def test_resize(self):
|
||||
from _testcapi import getargs_u
|
||||
for length in range(1, 100, 7):
|
||||
# generate a fresh string (refcount=1)
|
||||
text = 'a' * length + 'b'
|
||||
|
||||
with support.check_warnings(('unicode_internal codec has been '
|
||||
'deprecated', DeprecationWarning)):
|
||||
# fill wstr internal field
|
||||
abc = text.encode('unicode_internal')
|
||||
self.assertEqual(abc.decode('unicode_internal'), text)
|
||||
# fill wstr internal field
|
||||
abc = getargs_u(text)
|
||||
self.assertEqual(abc, text)
|
||||
|
||||
# resize text: wstr field must be cleared and then recomputed
|
||||
text += 'c'
|
||||
abcdef = text.encode('unicode_internal')
|
||||
self.assertNotEqual(abc, abcdef)
|
||||
self.assertEqual(abcdef.decode('unicode_internal'), text)
|
||||
# resize text: wstr field must be cleared and then recomputed
|
||||
text += 'c'
|
||||
abcdef = getargs_u(text)
|
||||
self.assertNotEqual(abc, abcdef)
|
||||
self.assertEqual(abcdef, text)
|
||||
|
||||
def test_compare(self):
|
||||
# Issue #17615
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
"unicode_internal" codec is removed. It was deprecated since Python 3.3.
|
||||
Patch by Inada Naoki.
|
|
@ -21,8 +21,7 @@
|
|||
(Unicode object, bytes consumed)
|
||||
|
||||
These <encoding>s are available: utf_8, unicode_escape,
|
||||
raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit),
|
||||
mbcs (on win32).
|
||||
raw_unicode_escape, latin_1, ascii (7-bit), mbcs (on win32).
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
@ -250,38 +249,6 @@ _codecs_escape_encode_impl(PyObject *module, PyObject *data,
|
|||
}
|
||||
|
||||
/* --- Decoder ------------------------------------------------------------ */
|
||||
/*[clinic input]
|
||||
_codecs.unicode_internal_decode
|
||||
obj: object
|
||||
errors: str(accept={str, NoneType}) = NULL
|
||||
/
|
||||
[clinic start generated code]*/
|
||||
|
||||
static PyObject *
|
||||
_codecs_unicode_internal_decode_impl(PyObject *module, PyObject *obj,
|
||||
const char *errors)
|
||||
/*[clinic end generated code: output=edbfe175e09eff9a input=8d57930aeda170c6]*/
|
||||
{
|
||||
if (PyUnicode_Check(obj)) {
|
||||
if (PyUnicode_READY(obj) < 0)
|
||||
return NULL;
|
||||
Py_INCREF(obj);
|
||||
return codec_tuple(obj, PyUnicode_GET_LENGTH(obj));
|
||||
}
|
||||
else {
|
||||
Py_buffer view;
|
||||
PyObject *result;
|
||||
if (PyObject_GetBuffer(obj, &view, PyBUF_SIMPLE) != 0)
|
||||
return NULL;
|
||||
|
||||
result = codec_tuple(
|
||||
_PyUnicode_DecodeUnicodeInternal(view.buf, view.len, errors),
|
||||
view.len);
|
||||
PyBuffer_Release(&view);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
/*[clinic input]
|
||||
_codecs.utf_7_decode
|
||||
data: Py_buffer
|
||||
|
@ -686,51 +653,6 @@ _codecs_readbuffer_encode_impl(PyObject *module, Py_buffer *data,
|
|||
return codec_tuple(result, data->len);
|
||||
}
|
||||
|
||||
/*[clinic input]
|
||||
_codecs.unicode_internal_encode
|
||||
obj: object
|
||||
errors: str(accept={str, NoneType}) = NULL
|
||||
/
|
||||
[clinic start generated code]*/
|
||||
|
||||
static PyObject *
|
||||
_codecs_unicode_internal_encode_impl(PyObject *module, PyObject *obj,
|
||||
const char *errors)
|
||||
/*[clinic end generated code: output=a72507dde4ea558f input=8628f0280cf5ba61]*/
|
||||
{
|
||||
if (PyErr_WarnEx(PyExc_DeprecationWarning,
|
||||
"unicode_internal codec has been deprecated",
|
||||
1))
|
||||
return NULL;
|
||||
|
||||
if (PyUnicode_Check(obj)) {
|
||||
Py_UNICODE *u;
|
||||
Py_ssize_t len, size;
|
||||
|
||||
if (PyUnicode_READY(obj) < 0)
|
||||
return NULL;
|
||||
|
||||
u = PyUnicode_AsUnicodeAndSize(obj, &len);
|
||||
if (u == NULL)
|
||||
return NULL;
|
||||
if ((size_t)len > (size_t)PY_SSIZE_T_MAX / sizeof(Py_UNICODE))
|
||||
return PyErr_NoMemory();
|
||||
size = len * sizeof(Py_UNICODE);
|
||||
return codec_tuple(PyBytes_FromStringAndSize((const char*)u, size),
|
||||
PyUnicode_GET_LENGTH(obj));
|
||||
}
|
||||
else {
|
||||
Py_buffer view;
|
||||
PyObject *result;
|
||||
if (PyObject_GetBuffer(obj, &view, PyBUF_SIMPLE) != 0)
|
||||
return NULL;
|
||||
result = codec_tuple(PyBytes_FromStringAndSize(view.buf, view.len),
|
||||
view.len);
|
||||
PyBuffer_Release(&view);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
/*[clinic input]
|
||||
_codecs.utf_7_encode
|
||||
str: unicode
|
||||
|
@ -1095,8 +1017,6 @@ static PyMethodDef _codecs_functions[] = {
|
|||
_CODECS_UTF_32_EX_DECODE_METHODDEF
|
||||
_CODECS_UNICODE_ESCAPE_ENCODE_METHODDEF
|
||||
_CODECS_UNICODE_ESCAPE_DECODE_METHODDEF
|
||||
_CODECS_UNICODE_INTERNAL_ENCODE_METHODDEF
|
||||
_CODECS_UNICODE_INTERNAL_DECODE_METHODDEF
|
||||
_CODECS_RAW_UNICODE_ESCAPE_ENCODE_METHODDEF
|
||||
_CODECS_RAW_UNICODE_ESCAPE_DECODE_METHODDEF
|
||||
_CODECS_LATIN_1_ENCODE_METHODDEF
|
||||
|
|
|
@ -370,57 +370,6 @@ exit:
|
|||
return return_value;
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(_codecs_unicode_internal_decode__doc__,
|
||||
"unicode_internal_decode($module, obj, errors=None, /)\n"
|
||||
"--\n"
|
||||
"\n");
|
||||
|
||||
#define _CODECS_UNICODE_INTERNAL_DECODE_METHODDEF \
|
||||
{"unicode_internal_decode", (PyCFunction)(void(*)(void))_codecs_unicode_internal_decode, METH_FASTCALL, _codecs_unicode_internal_decode__doc__},
|
||||
|
||||
static PyObject *
|
||||
_codecs_unicode_internal_decode_impl(PyObject *module, PyObject *obj,
|
||||
const char *errors);
|
||||
|
||||
static PyObject *
|
||||
_codecs_unicode_internal_decode(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
|
||||
{
|
||||
PyObject *return_value = NULL;
|
||||
PyObject *obj;
|
||||
const char *errors = NULL;
|
||||
|
||||
if (!_PyArg_CheckPositional("unicode_internal_decode", nargs, 1, 2)) {
|
||||
goto exit;
|
||||
}
|
||||
obj = args[0];
|
||||
if (nargs < 2) {
|
||||
goto skip_optional;
|
||||
}
|
||||
if (args[1] == Py_None) {
|
||||
errors = NULL;
|
||||
}
|
||||
else if (PyUnicode_Check(args[1])) {
|
||||
Py_ssize_t errors_length;
|
||||
errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length);
|
||||
if (errors == NULL) {
|
||||
goto exit;
|
||||
}
|
||||
if (strlen(errors) != (size_t)errors_length) {
|
||||
PyErr_SetString(PyExc_ValueError, "embedded null character");
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
else {
|
||||
_PyArg_BadArgument("unicode_internal_decode", 2, "str or None", args[1]);
|
||||
goto exit;
|
||||
}
|
||||
skip_optional:
|
||||
return_value = _codecs_unicode_internal_decode_impl(module, obj, errors);
|
||||
|
||||
exit:
|
||||
return return_value;
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(_codecs_utf_7_decode__doc__,
|
||||
"utf_7_decode($module, data, errors=None, final=False, /)\n"
|
||||
"--\n"
|
||||
|
@ -1853,57 +1802,6 @@ exit:
|
|||
return return_value;
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(_codecs_unicode_internal_encode__doc__,
|
||||
"unicode_internal_encode($module, obj, errors=None, /)\n"
|
||||
"--\n"
|
||||
"\n");
|
||||
|
||||
#define _CODECS_UNICODE_INTERNAL_ENCODE_METHODDEF \
|
||||
{"unicode_internal_encode", (PyCFunction)(void(*)(void))_codecs_unicode_internal_encode, METH_FASTCALL, _codecs_unicode_internal_encode__doc__},
|
||||
|
||||
static PyObject *
|
||||
_codecs_unicode_internal_encode_impl(PyObject *module, PyObject *obj,
|
||||
const char *errors);
|
||||
|
||||
static PyObject *
|
||||
_codecs_unicode_internal_encode(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
|
||||
{
|
||||
PyObject *return_value = NULL;
|
||||
PyObject *obj;
|
||||
const char *errors = NULL;
|
||||
|
||||
if (!_PyArg_CheckPositional("unicode_internal_encode", nargs, 1, 2)) {
|
||||
goto exit;
|
||||
}
|
||||
obj = args[0];
|
||||
if (nargs < 2) {
|
||||
goto skip_optional;
|
||||
}
|
||||
if (args[1] == Py_None) {
|
||||
errors = NULL;
|
||||
}
|
||||
else if (PyUnicode_Check(args[1])) {
|
||||
Py_ssize_t errors_length;
|
||||
errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length);
|
||||
if (errors == NULL) {
|
||||
goto exit;
|
||||
}
|
||||
if (strlen(errors) != (size_t)errors_length) {
|
||||
PyErr_SetString(PyExc_ValueError, "embedded null character");
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
else {
|
||||
_PyArg_BadArgument("unicode_internal_encode", 2, "str or None", args[1]);
|
||||
goto exit;
|
||||
}
|
||||
skip_optional:
|
||||
return_value = _codecs_unicode_internal_encode_impl(module, obj, errors);
|
||||
|
||||
exit:
|
||||
return return_value;
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(_codecs_utf_7_encode__doc__,
|
||||
"utf_7_encode($module, str, errors=None, /)\n"
|
||||
"--\n"
|
||||
|
@ -3024,4 +2922,4 @@ exit:
|
|||
#ifndef _CODECS_CODE_PAGE_ENCODE_METHODDEF
|
||||
#define _CODECS_CODE_PAGE_ENCODE_METHODDEF
|
||||
#endif /* !defined(_CODECS_CODE_PAGE_ENCODE_METHODDEF) */
|
||||
/*[clinic end generated code: output=02bd0f0cf9a28150 input=a9049054013a1b77]*/
|
||||
/*[clinic end generated code: output=da3c47709a55a05e input=a9049054013a1b77]*/
|
||||
|
|
|
@ -6551,108 +6551,6 @@ PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s,
|
|||
return result;
|
||||
}
|
||||
|
||||
/* --- Unicode Internal Codec ------------------------------------------- */
|
||||
|
||||
PyObject *
|
||||
_PyUnicode_DecodeUnicodeInternal(const char *s,
|
||||
Py_ssize_t size,
|
||||
const char *errors)
|
||||
{
|
||||
const char *starts = s;
|
||||
Py_ssize_t startinpos;
|
||||
Py_ssize_t endinpos;
|
||||
_PyUnicodeWriter writer;
|
||||
const char *end;
|
||||
const char *reason;
|
||||
PyObject *errorHandler = NULL;
|
||||
PyObject *exc = NULL;
|
||||
|
||||
if (PyErr_WarnEx(PyExc_DeprecationWarning,
|
||||
"unicode_internal codec has been deprecated",
|
||||
1))
|
||||
return NULL;
|
||||
|
||||
if (size < 0) {
|
||||
PyErr_BadInternalCall();
|
||||
return NULL;
|
||||
}
|
||||
if (size == 0)
|
||||
_Py_RETURN_UNICODE_EMPTY();
|
||||
|
||||
_PyUnicodeWriter_Init(&writer);
|
||||
if (size / Py_UNICODE_SIZE > PY_SSIZE_T_MAX - 1) {
|
||||
PyErr_NoMemory();
|
||||
goto onError;
|
||||
}
|
||||
writer.min_length = (size + (Py_UNICODE_SIZE - 1)) / Py_UNICODE_SIZE;
|
||||
|
||||
end = s + size;
|
||||
while (s < end) {
|
||||
Py_UNICODE uch;
|
||||
Py_UCS4 ch;
|
||||
if (end - s < Py_UNICODE_SIZE) {
|
||||
endinpos = end-starts;
|
||||
reason = "truncated input";
|
||||
goto error;
|
||||
}
|
||||
/* We copy the raw representation one byte at a time because the
|
||||
pointer may be unaligned (see test_codeccallbacks). */
|
||||
((char *) &uch)[0] = s[0];
|
||||
((char *) &uch)[1] = s[1];
|
||||
#ifdef Py_UNICODE_WIDE
|
||||
((char *) &uch)[2] = s[2];
|
||||
((char *) &uch)[3] = s[3];
|
||||
#endif
|
||||
ch = uch;
|
||||
#ifdef Py_UNICODE_WIDE
|
||||
/* We have to sanity check the raw data, otherwise doom looms for
|
||||
some malformed UCS-4 data. */
|
||||
if (ch > 0x10ffff) {
|
||||
endinpos = s - starts + Py_UNICODE_SIZE;
|
||||
reason = "illegal code point (> 0x10FFFF)";
|
||||
goto error;
|
||||
}
|
||||
#endif
|
||||
s += Py_UNICODE_SIZE;
|
||||
#ifndef Py_UNICODE_WIDE
|
||||
if (Py_UNICODE_IS_HIGH_SURROGATE(ch) && end - s >= Py_UNICODE_SIZE)
|
||||
{
|
||||
Py_UNICODE uch2;
|
||||
((char *) &uch2)[0] = s[0];
|
||||
((char *) &uch2)[1] = s[1];
|
||||
if (Py_UNICODE_IS_LOW_SURROGATE(uch2))
|
||||
{
|
||||
ch = Py_UNICODE_JOIN_SURROGATES(uch, uch2);
|
||||
s += Py_UNICODE_SIZE;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (_PyUnicodeWriter_WriteCharInline(&writer, ch) < 0)
|
||||
goto onError;
|
||||
continue;
|
||||
|
||||
error:
|
||||
startinpos = s - starts;
|
||||
if (unicode_decode_call_errorhandler_writer(
|
||||
errors, &errorHandler,
|
||||
"unicode_internal", reason,
|
||||
&starts, &end, &startinpos, &endinpos, &exc, &s,
|
||||
&writer))
|
||||
goto onError;
|
||||
}
|
||||
|
||||
Py_XDECREF(errorHandler);
|
||||
Py_XDECREF(exc);
|
||||
return _PyUnicodeWriter_Finish(&writer);
|
||||
|
||||
onError:
|
||||
_PyUnicodeWriter_Dealloc(&writer);
|
||||
Py_XDECREF(errorHandler);
|
||||
Py_XDECREF(exc);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* --- Latin-1 Codec ------------------------------------------------------ */
|
||||
|
||||
PyObject *
|
||||
|
|
|
@ -392,7 +392,6 @@
|
|||
<Compile Include="encodings\tis_620.py" />
|
||||
<Compile Include="encodings\undefined.py" />
|
||||
<Compile Include="encodings\unicode_escape.py" />
|
||||
<Compile Include="encodings\unicode_internal.py" />
|
||||
<Compile Include="encodings\utf_16.py" />
|
||||
<Compile Include="encodings\utf_16_be.py" />
|
||||
<Compile Include="encodings\utf_16_le.py" />
|
||||
|
|
Loading…
Reference in New Issue