mirror of https://github.com/python/cpython
bpo-36819: Fix crashes in built-in encoders with weird error handlers (GH-28593)
If the error handler returns position less or equal than the starting position of non-encodable characters, most of built-in encoders didn't properly re-size the output buffer. This led to out-of-bounds writes, and segfaults.
This commit is contained in:
parent
614420df97
commit
18b07d773e
|
@ -1,5 +1,6 @@
|
||||||
import codecs
|
import codecs
|
||||||
import html.entities
|
import html.entities
|
||||||
|
import itertools
|
||||||
import sys
|
import sys
|
||||||
import unicodedata
|
import unicodedata
|
||||||
import unittest
|
import unittest
|
||||||
|
@ -22,6 +23,18 @@ class PosReturn:
|
||||||
self.pos = len(exc.object)
|
self.pos = len(exc.object)
|
||||||
return ("<?>", oldpos)
|
return ("<?>", oldpos)
|
||||||
|
|
||||||
|
class RepeatedPosReturn:
|
||||||
|
def __init__(self, repl="<?>"):
|
||||||
|
self.repl = repl
|
||||||
|
self.pos = 0
|
||||||
|
self.count = 0
|
||||||
|
|
||||||
|
def handle(self, exc):
|
||||||
|
if self.count > 0:
|
||||||
|
self.count -= 1
|
||||||
|
return (self.repl, self.pos)
|
||||||
|
return (self.repl, exc.end)
|
||||||
|
|
||||||
# A UnicodeEncodeError object with a bad start attribute
|
# A UnicodeEncodeError object with a bad start attribute
|
||||||
class BadStartUnicodeEncodeError(UnicodeEncodeError):
|
class BadStartUnicodeEncodeError(UnicodeEncodeError):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
@ -783,20 +796,104 @@ class CodecCallbackTest(unittest.TestCase):
|
||||||
codecs.lookup_error("namereplace")
|
codecs.lookup_error("namereplace")
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_unencodablereplacement(self):
|
def test_encode_nonascii_replacement(self):
|
||||||
|
def handle(exc):
|
||||||
|
if isinstance(exc, UnicodeEncodeError):
|
||||||
|
return (repl, exc.end)
|
||||||
|
raise TypeError("don't know how to handle %r" % exc)
|
||||||
|
codecs.register_error("test.replacing", handle)
|
||||||
|
|
||||||
|
for enc, input, repl in (
|
||||||
|
("ascii", "[¤]", "abc"),
|
||||||
|
("iso-8859-1", "[€]", "½¾"),
|
||||||
|
("iso-8859-15", "[¤]", "œŸ"),
|
||||||
|
):
|
||||||
|
res = input.encode(enc, "test.replacing")
|
||||||
|
self.assertEqual(res, ("[" + repl + "]").encode(enc))
|
||||||
|
|
||||||
|
for enc, input, repl in (
|
||||||
|
("utf-8", "[\udc80]", "\U0001f40d"),
|
||||||
|
("utf-16", "[\udc80]", "\U0001f40d"),
|
||||||
|
("utf-32", "[\udc80]", "\U0001f40d"),
|
||||||
|
):
|
||||||
|
with self.subTest(encoding=enc):
|
||||||
|
with self.assertRaises(UnicodeEncodeError) as cm:
|
||||||
|
input.encode(enc, "test.replacing")
|
||||||
|
exc = cm.exception
|
||||||
|
self.assertEqual(exc.start, 1)
|
||||||
|
self.assertEqual(exc.end, 2)
|
||||||
|
self.assertEqual(exc.object, input)
|
||||||
|
|
||||||
|
def test_encode_unencodable_replacement(self):
|
||||||
def unencrepl(exc):
|
def unencrepl(exc):
|
||||||
if isinstance(exc, UnicodeEncodeError):
|
if isinstance(exc, UnicodeEncodeError):
|
||||||
return ("\u4242", exc.end)
|
return (repl, exc.end)
|
||||||
else:
|
else:
|
||||||
raise TypeError("don't know how to handle %r" % exc)
|
raise TypeError("don't know how to handle %r" % exc)
|
||||||
codecs.register_error("test.unencreplhandler", unencrepl)
|
codecs.register_error("test.unencreplhandler", unencrepl)
|
||||||
for enc in ("ascii", "iso-8859-1", "iso-8859-15"):
|
|
||||||
self.assertRaises(
|
for enc, input, repl in (
|
||||||
UnicodeEncodeError,
|
("ascii", "[¤]", "½"),
|
||||||
"\u4242".encode,
|
("iso-8859-1", "[€]", "œ"),
|
||||||
enc,
|
("iso-8859-15", "[¤]", "½"),
|
||||||
"test.unencreplhandler"
|
("utf-8", "[\udc80]", "\udcff"),
|
||||||
)
|
("utf-16", "[\udc80]", "\udcff"),
|
||||||
|
("utf-32", "[\udc80]", "\udcff"),
|
||||||
|
):
|
||||||
|
with self.subTest(encoding=enc):
|
||||||
|
with self.assertRaises(UnicodeEncodeError) as cm:
|
||||||
|
input.encode(enc, "test.unencreplhandler")
|
||||||
|
exc = cm.exception
|
||||||
|
self.assertEqual(exc.start, 1)
|
||||||
|
self.assertEqual(exc.end, 2)
|
||||||
|
self.assertEqual(exc.object, input)
|
||||||
|
|
||||||
|
def test_encode_bytes_replacement(self):
|
||||||
|
def handle(exc):
|
||||||
|
if isinstance(exc, UnicodeEncodeError):
|
||||||
|
return (repl, exc.end)
|
||||||
|
raise TypeError("don't know how to handle %r" % exc)
|
||||||
|
codecs.register_error("test.replacing", handle)
|
||||||
|
|
||||||
|
# It works even if the bytes sequence is not decodable.
|
||||||
|
for enc, input, repl in (
|
||||||
|
("ascii", "[¤]", b"\xbd\xbe"),
|
||||||
|
("iso-8859-1", "[€]", b"\xbd\xbe"),
|
||||||
|
("iso-8859-15", "[¤]", b"\xbd\xbe"),
|
||||||
|
("utf-8", "[\udc80]", b"\xbd\xbe"),
|
||||||
|
("utf-16le", "[\udc80]", b"\xbd\xbe"),
|
||||||
|
("utf-16be", "[\udc80]", b"\xbd\xbe"),
|
||||||
|
("utf-32le", "[\udc80]", b"\xbc\xbd\xbe\xbf"),
|
||||||
|
("utf-32be", "[\udc80]", b"\xbc\xbd\xbe\xbf"),
|
||||||
|
):
|
||||||
|
with self.subTest(encoding=enc):
|
||||||
|
res = input.encode(enc, "test.replacing")
|
||||||
|
self.assertEqual(res, "[".encode(enc) + repl + "]".encode(enc))
|
||||||
|
|
||||||
|
def test_encode_odd_bytes_replacement(self):
|
||||||
|
def handle(exc):
|
||||||
|
if isinstance(exc, UnicodeEncodeError):
|
||||||
|
return (repl, exc.end)
|
||||||
|
raise TypeError("don't know how to handle %r" % exc)
|
||||||
|
codecs.register_error("test.replacing", handle)
|
||||||
|
|
||||||
|
input = "[\udc80]"
|
||||||
|
# Tests in which the replacement bytestring contains not whole number
|
||||||
|
# of code units.
|
||||||
|
for enc, repl in (
|
||||||
|
*itertools.product(("utf-16le", "utf-16be"),
|
||||||
|
[b"a", b"abc"]),
|
||||||
|
*itertools.product(("utf-32le", "utf-32be"),
|
||||||
|
[b"a", b"ab", b"abc", b"abcde"]),
|
||||||
|
):
|
||||||
|
with self.subTest(encoding=enc, repl=repl):
|
||||||
|
with self.assertRaises(UnicodeEncodeError) as cm:
|
||||||
|
input.encode(enc, "test.replacing")
|
||||||
|
exc = cm.exception
|
||||||
|
self.assertEqual(exc.start, 1)
|
||||||
|
self.assertEqual(exc.end, 2)
|
||||||
|
self.assertEqual(exc.object, input)
|
||||||
|
self.assertEqual(exc.reason, "surrogates not allowed")
|
||||||
|
|
||||||
def test_badregistercall(self):
|
def test_badregistercall(self):
|
||||||
# enhance coverage of:
|
# enhance coverage of:
|
||||||
|
@ -940,6 +1037,68 @@ class CodecCallbackTest(unittest.TestCase):
|
||||||
self.assertRaises(ValueError, codecs.charmap_encode, "\xff", err, D())
|
self.assertRaises(ValueError, codecs.charmap_encode, "\xff", err, D())
|
||||||
self.assertRaises(TypeError, codecs.charmap_encode, "\xff", err, {0xff: 300})
|
self.assertRaises(TypeError, codecs.charmap_encode, "\xff", err, {0xff: 300})
|
||||||
|
|
||||||
|
def test_decodehelper_bug36819(self):
|
||||||
|
handler = RepeatedPosReturn("x")
|
||||||
|
codecs.register_error("test.bug36819", handler.handle)
|
||||||
|
|
||||||
|
testcases = [
|
||||||
|
("ascii", b"\xff"),
|
||||||
|
("utf-8", b"\xff"),
|
||||||
|
("utf-16be", b'\xdc\x80'),
|
||||||
|
("utf-32be", b'\x00\x00\xdc\x80'),
|
||||||
|
("iso-8859-6", b"\xff"),
|
||||||
|
]
|
||||||
|
for enc, bad in testcases:
|
||||||
|
input = "abcd".encode(enc) + bad
|
||||||
|
with self.subTest(encoding=enc):
|
||||||
|
handler.count = 50
|
||||||
|
decoded = input.decode(enc, "test.bug36819")
|
||||||
|
self.assertEqual(decoded, 'abcdx' * 51)
|
||||||
|
|
||||||
|
def test_encodehelper_bug36819(self):
|
||||||
|
handler = RepeatedPosReturn()
|
||||||
|
codecs.register_error("test.bug36819", handler.handle)
|
||||||
|
|
||||||
|
input = "abcd\udc80"
|
||||||
|
encodings = ["ascii", "latin1", "utf-8", "utf-16", "utf-32"] # built-in
|
||||||
|
encodings += ["iso-8859-15"] # charmap codec
|
||||||
|
if sys.platform == 'win32':
|
||||||
|
encodings = ["mbcs", "oem"] # code page codecs
|
||||||
|
|
||||||
|
handler.repl = "\udcff"
|
||||||
|
for enc in encodings:
|
||||||
|
with self.subTest(encoding=enc):
|
||||||
|
handler.count = 50
|
||||||
|
with self.assertRaises(UnicodeEncodeError) as cm:
|
||||||
|
input.encode(enc, "test.bug36819")
|
||||||
|
exc = cm.exception
|
||||||
|
self.assertEqual(exc.start, 4)
|
||||||
|
self.assertEqual(exc.end, 5)
|
||||||
|
self.assertEqual(exc.object, input)
|
||||||
|
if sys.platform == "win32":
|
||||||
|
handler.count = 50
|
||||||
|
with self.assertRaises(UnicodeEncodeError) as cm:
|
||||||
|
codecs.code_page_encode(437, input, "test.bug36819")
|
||||||
|
exc = cm.exception
|
||||||
|
self.assertEqual(exc.start, 4)
|
||||||
|
self.assertEqual(exc.end, 5)
|
||||||
|
self.assertEqual(exc.object, input)
|
||||||
|
|
||||||
|
handler.repl = "x"
|
||||||
|
for enc in encodings:
|
||||||
|
with self.subTest(encoding=enc):
|
||||||
|
# The interpreter should segfault after a handful of attempts.
|
||||||
|
# 50 was chosen to try to ensure a segfault without a fix,
|
||||||
|
# but not OOM a machine with one.
|
||||||
|
handler.count = 50
|
||||||
|
encoded = input.encode(enc, "test.bug36819")
|
||||||
|
self.assertEqual(encoded.decode(enc), "abcdx" * 51)
|
||||||
|
if sys.platform == "win32":
|
||||||
|
handler.count = 50
|
||||||
|
encoded = codecs.code_page_encode(437, input, "test.bug36819")
|
||||||
|
self.assertEqual(encoded[0].decode(), "abcdx" * 51)
|
||||||
|
self.assertEqual(encoded[1], len(input))
|
||||||
|
|
||||||
def test_translatehelper(self):
|
def test_translatehelper(self):
|
||||||
# enhance coverage of:
|
# enhance coverage of:
|
||||||
# Objects/unicodeobject.c::unicode_encode_call_errorhandler()
|
# Objects/unicodeobject.c::unicode_encode_call_errorhandler()
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
Fix crashes in built-in encoders with error handlers that return position
|
||||||
|
less or equal than the starting position of non-encodable characters.
|
|
@ -387,8 +387,19 @@ STRINGLIB(utf8_encoder)(_PyBytesWriter *writer,
|
||||||
if (!rep)
|
if (!rep)
|
||||||
goto error;
|
goto error;
|
||||||
|
|
||||||
/* subtract preallocated bytes */
|
if (newpos < startpos) {
|
||||||
writer->min_size -= max_char_size * (newpos - startpos);
|
writer->overallocate = 1;
|
||||||
|
p = _PyBytesWriter_Prepare(writer, p,
|
||||||
|
max_char_size * (startpos - newpos));
|
||||||
|
if (p == NULL)
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
/* subtract preallocated bytes */
|
||||||
|
writer->min_size -= max_char_size * (newpos - startpos);
|
||||||
|
/* Only overallocate the buffer if it's not the last write */
|
||||||
|
writer->overallocate = (newpos < size);
|
||||||
|
}
|
||||||
|
|
||||||
if (PyBytes_Check(rep)) {
|
if (PyBytes_Check(rep)) {
|
||||||
p = _PyBytesWriter_WriteBytes(writer, p,
|
p = _PyBytesWriter_WriteBytes(writer, p,
|
||||||
|
|
|
@ -5868,7 +5868,7 @@ _PyUnicode_EncodeUTF32(PyObject *str,
|
||||||
|
|
||||||
pos = 0;
|
pos = 0;
|
||||||
while (pos < len) {
|
while (pos < len) {
|
||||||
Py_ssize_t repsize, moreunits;
|
Py_ssize_t newpos, repsize, moreunits;
|
||||||
|
|
||||||
if (kind == PyUnicode_2BYTE_KIND) {
|
if (kind == PyUnicode_2BYTE_KIND) {
|
||||||
pos += ucs2lib_utf32_encode((const Py_UCS2 *)data + pos, len - pos,
|
pos += ucs2lib_utf32_encode((const Py_UCS2 *)data + pos, len - pos,
|
||||||
|
@ -5885,7 +5885,7 @@ _PyUnicode_EncodeUTF32(PyObject *str,
|
||||||
rep = unicode_encode_call_errorhandler(
|
rep = unicode_encode_call_errorhandler(
|
||||||
errors, &errorHandler,
|
errors, &errorHandler,
|
||||||
encoding, "surrogates not allowed",
|
encoding, "surrogates not allowed",
|
||||||
str, &exc, pos, pos + 1, &pos);
|
str, &exc, pos, pos + 1, &newpos);
|
||||||
if (!rep)
|
if (!rep)
|
||||||
goto error;
|
goto error;
|
||||||
|
|
||||||
|
@ -5893,7 +5893,7 @@ _PyUnicode_EncodeUTF32(PyObject *str,
|
||||||
repsize = PyBytes_GET_SIZE(rep);
|
repsize = PyBytes_GET_SIZE(rep);
|
||||||
if (repsize & 3) {
|
if (repsize & 3) {
|
||||||
raise_encode_exception(&exc, encoding,
|
raise_encode_exception(&exc, encoding,
|
||||||
str, pos - 1, pos,
|
str, pos, pos + 1,
|
||||||
"surrogates not allowed");
|
"surrogates not allowed");
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
@ -5906,28 +5906,30 @@ _PyUnicode_EncodeUTF32(PyObject *str,
|
||||||
moreunits = repsize = PyUnicode_GET_LENGTH(rep);
|
moreunits = repsize = PyUnicode_GET_LENGTH(rep);
|
||||||
if (!PyUnicode_IS_ASCII(rep)) {
|
if (!PyUnicode_IS_ASCII(rep)) {
|
||||||
raise_encode_exception(&exc, encoding,
|
raise_encode_exception(&exc, encoding,
|
||||||
str, pos - 1, pos,
|
str, pos, pos + 1,
|
||||||
"surrogates not allowed");
|
"surrogates not allowed");
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
moreunits += pos - newpos;
|
||||||
|
pos = newpos;
|
||||||
|
|
||||||
/* four bytes are reserved for each surrogate */
|
/* four bytes are reserved for each surrogate */
|
||||||
if (moreunits > 1) {
|
if (moreunits > 0) {
|
||||||
Py_ssize_t outpos = out - (uint32_t*) PyBytes_AS_STRING(v);
|
Py_ssize_t outpos = out - (uint32_t*) PyBytes_AS_STRING(v);
|
||||||
if (moreunits >= (PY_SSIZE_T_MAX - PyBytes_GET_SIZE(v)) / 4) {
|
if (moreunits >= (PY_SSIZE_T_MAX - PyBytes_GET_SIZE(v)) / 4) {
|
||||||
/* integer overflow */
|
/* integer overflow */
|
||||||
PyErr_NoMemory();
|
PyErr_NoMemory();
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
if (_PyBytes_Resize(&v, PyBytes_GET_SIZE(v) + 4 * (moreunits - 1)) < 0)
|
if (_PyBytes_Resize(&v, PyBytes_GET_SIZE(v) + 4 * moreunits) < 0)
|
||||||
goto error;
|
goto error;
|
||||||
out = (uint32_t*) PyBytes_AS_STRING(v) + outpos;
|
out = (uint32_t*) PyBytes_AS_STRING(v) + outpos;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (PyBytes_Check(rep)) {
|
if (PyBytes_Check(rep)) {
|
||||||
memcpy(out, PyBytes_AS_STRING(rep), repsize);
|
memcpy(out, PyBytes_AS_STRING(rep), repsize);
|
||||||
out += moreunits;
|
out += repsize / 4;
|
||||||
} else /* rep is unicode */ {
|
} else /* rep is unicode */ {
|
||||||
assert(PyUnicode_KIND(rep) == PyUnicode_1BYTE_KIND);
|
assert(PyUnicode_KIND(rep) == PyUnicode_1BYTE_KIND);
|
||||||
ucs1lib_utf32_encode(PyUnicode_1BYTE_DATA(rep), repsize,
|
ucs1lib_utf32_encode(PyUnicode_1BYTE_DATA(rep), repsize,
|
||||||
|
@ -6205,7 +6207,7 @@ _PyUnicode_EncodeUTF16(PyObject *str,
|
||||||
|
|
||||||
pos = 0;
|
pos = 0;
|
||||||
while (pos < len) {
|
while (pos < len) {
|
||||||
Py_ssize_t repsize, moreunits;
|
Py_ssize_t newpos, repsize, moreunits;
|
||||||
|
|
||||||
if (kind == PyUnicode_2BYTE_KIND) {
|
if (kind == PyUnicode_2BYTE_KIND) {
|
||||||
pos += ucs2lib_utf16_encode((const Py_UCS2 *)data + pos, len - pos,
|
pos += ucs2lib_utf16_encode((const Py_UCS2 *)data + pos, len - pos,
|
||||||
|
@ -6222,7 +6224,7 @@ _PyUnicode_EncodeUTF16(PyObject *str,
|
||||||
rep = unicode_encode_call_errorhandler(
|
rep = unicode_encode_call_errorhandler(
|
||||||
errors, &errorHandler,
|
errors, &errorHandler,
|
||||||
encoding, "surrogates not allowed",
|
encoding, "surrogates not allowed",
|
||||||
str, &exc, pos, pos + 1, &pos);
|
str, &exc, pos, pos + 1, &newpos);
|
||||||
if (!rep)
|
if (!rep)
|
||||||
goto error;
|
goto error;
|
||||||
|
|
||||||
|
@ -6230,7 +6232,7 @@ _PyUnicode_EncodeUTF16(PyObject *str,
|
||||||
repsize = PyBytes_GET_SIZE(rep);
|
repsize = PyBytes_GET_SIZE(rep);
|
||||||
if (repsize & 1) {
|
if (repsize & 1) {
|
||||||
raise_encode_exception(&exc, encoding,
|
raise_encode_exception(&exc, encoding,
|
||||||
str, pos - 1, pos,
|
str, pos, pos + 1,
|
||||||
"surrogates not allowed");
|
"surrogates not allowed");
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
@ -6243,28 +6245,30 @@ _PyUnicode_EncodeUTF16(PyObject *str,
|
||||||
moreunits = repsize = PyUnicode_GET_LENGTH(rep);
|
moreunits = repsize = PyUnicode_GET_LENGTH(rep);
|
||||||
if (!PyUnicode_IS_ASCII(rep)) {
|
if (!PyUnicode_IS_ASCII(rep)) {
|
||||||
raise_encode_exception(&exc, encoding,
|
raise_encode_exception(&exc, encoding,
|
||||||
str, pos - 1, pos,
|
str, pos, pos + 1,
|
||||||
"surrogates not allowed");
|
"surrogates not allowed");
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
moreunits += pos - newpos;
|
||||||
|
pos = newpos;
|
||||||
|
|
||||||
/* two bytes are reserved for each surrogate */
|
/* two bytes are reserved for each surrogate */
|
||||||
if (moreunits > 1) {
|
if (moreunits > 0) {
|
||||||
Py_ssize_t outpos = out - (unsigned short*) PyBytes_AS_STRING(v);
|
Py_ssize_t outpos = out - (unsigned short*) PyBytes_AS_STRING(v);
|
||||||
if (moreunits >= (PY_SSIZE_T_MAX - PyBytes_GET_SIZE(v)) / 2) {
|
if (moreunits >= (PY_SSIZE_T_MAX - PyBytes_GET_SIZE(v)) / 2) {
|
||||||
/* integer overflow */
|
/* integer overflow */
|
||||||
PyErr_NoMemory();
|
PyErr_NoMemory();
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
if (_PyBytes_Resize(&v, PyBytes_GET_SIZE(v) + 2 * (moreunits - 1)) < 0)
|
if (_PyBytes_Resize(&v, PyBytes_GET_SIZE(v) + 2 * moreunits) < 0)
|
||||||
goto error;
|
goto error;
|
||||||
out = (unsigned short*) PyBytes_AS_STRING(v) + outpos;
|
out = (unsigned short*) PyBytes_AS_STRING(v) + outpos;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (PyBytes_Check(rep)) {
|
if (PyBytes_Check(rep)) {
|
||||||
memcpy(out, PyBytes_AS_STRING(rep), repsize);
|
memcpy(out, PyBytes_AS_STRING(rep), repsize);
|
||||||
out += moreunits;
|
out += repsize / 2;
|
||||||
} else /* rep is unicode */ {
|
} else /* rep is unicode */ {
|
||||||
assert(PyUnicode_KIND(rep) == PyUnicode_1BYTE_KIND);
|
assert(PyUnicode_KIND(rep) == PyUnicode_1BYTE_KIND);
|
||||||
ucs1lib_utf16_encode(PyUnicode_1BYTE_DATA(rep), repsize,
|
ucs1lib_utf16_encode(PyUnicode_1BYTE_DATA(rep), repsize,
|
||||||
|
@ -7167,8 +7171,19 @@ unicode_encode_ucs1(PyObject *unicode,
|
||||||
if (rep == NULL)
|
if (rep == NULL)
|
||||||
goto onError;
|
goto onError;
|
||||||
|
|
||||||
/* subtract preallocated bytes */
|
if (newpos < collstart) {
|
||||||
writer.min_size -= newpos - collstart;
|
writer.overallocate = 1;
|
||||||
|
str = _PyBytesWriter_Prepare(&writer, str,
|
||||||
|
collstart - newpos);
|
||||||
|
if (str == NULL)
|
||||||
|
goto onError;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
/* subtract preallocated bytes */
|
||||||
|
writer.min_size -= newpos - collstart;
|
||||||
|
/* Only overallocate the buffer if it's not the last write */
|
||||||
|
writer.overallocate = (newpos < size);
|
||||||
|
}
|
||||||
|
|
||||||
if (PyBytes_Check(rep)) {
|
if (PyBytes_Check(rep)) {
|
||||||
/* Directly copy bytes result to output. */
|
/* Directly copy bytes result to output. */
|
||||||
|
@ -7944,13 +7959,14 @@ encode_code_page_errors(UINT code_page, PyObject **outbytes,
|
||||||
pos, pos + 1, &newpos);
|
pos, pos + 1, &newpos);
|
||||||
if (rep == NULL)
|
if (rep == NULL)
|
||||||
goto error;
|
goto error;
|
||||||
pos = newpos;
|
|
||||||
|
|
||||||
|
Py_ssize_t morebytes = pos - newpos;
|
||||||
if (PyBytes_Check(rep)) {
|
if (PyBytes_Check(rep)) {
|
||||||
outsize = PyBytes_GET_SIZE(rep);
|
outsize = PyBytes_GET_SIZE(rep);
|
||||||
if (outsize != 1) {
|
morebytes += outsize;
|
||||||
|
if (morebytes > 0) {
|
||||||
Py_ssize_t offset = out - PyBytes_AS_STRING(*outbytes);
|
Py_ssize_t offset = out - PyBytes_AS_STRING(*outbytes);
|
||||||
newoutsize = PyBytes_GET_SIZE(*outbytes) + (outsize - 1);
|
newoutsize = PyBytes_GET_SIZE(*outbytes) + morebytes;
|
||||||
if (_PyBytes_Resize(outbytes, newoutsize) < 0) {
|
if (_PyBytes_Resize(outbytes, newoutsize) < 0) {
|
||||||
Py_DECREF(rep);
|
Py_DECREF(rep);
|
||||||
goto error;
|
goto error;
|
||||||
|
@ -7971,9 +7987,10 @@ encode_code_page_errors(UINT code_page, PyObject **outbytes,
|
||||||
}
|
}
|
||||||
|
|
||||||
outsize = PyUnicode_GET_LENGTH(rep);
|
outsize = PyUnicode_GET_LENGTH(rep);
|
||||||
if (outsize != 1) {
|
morebytes += outsize;
|
||||||
|
if (morebytes > 0) {
|
||||||
Py_ssize_t offset = out - PyBytes_AS_STRING(*outbytes);
|
Py_ssize_t offset = out - PyBytes_AS_STRING(*outbytes);
|
||||||
newoutsize = PyBytes_GET_SIZE(*outbytes) + (outsize - 1);
|
newoutsize = PyBytes_GET_SIZE(*outbytes) + morebytes;
|
||||||
if (_PyBytes_Resize(outbytes, newoutsize) < 0) {
|
if (_PyBytes_Resize(outbytes, newoutsize) < 0) {
|
||||||
Py_DECREF(rep);
|
Py_DECREF(rep);
|
||||||
goto error;
|
goto error;
|
||||||
|
@ -7996,6 +8013,7 @@ encode_code_page_errors(UINT code_page, PyObject **outbytes,
|
||||||
out++;
|
out++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
pos = newpos;
|
||||||
Py_DECREF(rep);
|
Py_DECREF(rep);
|
||||||
}
|
}
|
||||||
/* write a NUL byte */
|
/* write a NUL byte */
|
||||||
|
|
Loading…
Reference in New Issue