gh-87193: Support bytes objects with refcount > 1 in _PyBytes_Resize() (GH-117160)

Create a new bytes object and destroy the old one if it has refcount > 1.
This commit is contained in:
Serhiy Storchaka 2024-03-25 17:32:11 +02:00 committed by GitHub
parent 01e7405da4
commit 0c1a42cf9c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 123 additions and 30 deletions

View File

@ -191,10 +191,10 @@ called with a non-bytes parameter.
.. c:function:: int _PyBytes_Resize(PyObject **bytes, Py_ssize_t newsize)
A way to resize a bytes object even though it is "immutable". Only use this
to build up a brand new bytes object; don't use this if the bytes may already
be known in other parts of the code. It is an error to call this function if
the refcount on the input bytes object is not one. Pass the address of an
Resize a bytes object. *newsize* will be the new length of the bytes object.
You can think of it as creating a new bytes object and destroying the old
one, only more efficiently.
Pass the address of an
existing bytes object as an lvalue (it may be written into), and the new size
desired. On success, *\*bytes* holds the resized bytes object and ``0`` is
returned; the address in *\*bytes* may differ from its input value. If the

View File

@ -2,6 +2,7 @@ import unittest
from test.support import import_helper
_testlimitedcapi = import_helper.import_module('_testlimitedcapi')
_testcapi = import_helper.import_module('_testcapi')
from _testcapi import PY_SSIZE_T_MIN, PY_SSIZE_T_MAX
NULL = None
@ -217,6 +218,35 @@ class CAPITest(unittest.TestCase):
# CRASHES decodeescape(b'abc', NULL, -1)
# CRASHES decodeescape(NULL, NULL, 1)
def test_resize(self):
"""Test _PyBytes_Resize()"""
resize = _testcapi.bytes_resize
for new in True, False:
self.assertEqual(resize(b'abc', 0, new), b'')
self.assertEqual(resize(b'abc', 1, new), b'a')
self.assertEqual(resize(b'abc', 2, new), b'ab')
self.assertEqual(resize(b'abc', 3, new), b'abc')
b = resize(b'abc', 4, new)
self.assertEqual(len(b), 4)
self.assertEqual(b[:3], b'abc')
self.assertEqual(resize(b'a', 0, new), b'')
self.assertEqual(resize(b'a', 1, new), b'a')
b = resize(b'a', 2, new)
self.assertEqual(len(b), 2)
self.assertEqual(b[:1], b'a')
self.assertEqual(resize(b'', 0, new), b'')
self.assertEqual(len(resize(b'', 1, new)), 1)
self.assertEqual(len(resize(b'', 2, new)), 2)
self.assertRaises(SystemError, resize, b'abc', -1, False)
self.assertRaises(SystemError, resize, bytearray(b'abc'), 3, False)
# CRASHES resize(NULL, 0, False)
# CRASHES resize(NULL, 3, False)
if __name__ == "__main__":
unittest.main()

View File

@ -0,0 +1,3 @@
:c:func:`_PyBytes_Resize` can now be called for bytes objects with reference
count > 1, including 1-byte bytes objects. It creates a new bytes object and
destroys the old one if it has reference count > 1.

View File

@ -162,7 +162,7 @@
@MODULE__XXTESTFUZZ_TRUE@_xxtestfuzz _xxtestfuzz/_xxtestfuzz.c _xxtestfuzz/fuzzer.c
@MODULE__TESTBUFFER_TRUE@_testbuffer _testbuffer.c
@MODULE__TESTINTERNALCAPI_TRUE@_testinternalcapi _testinternalcapi.c _testinternalcapi/test_lock.c _testinternalcapi/pytime.c _testinternalcapi/set.c _testinternalcapi/test_critical_sections.c
@MODULE__TESTCAPI_TRUE@_testcapi _testcapimodule.c _testcapi/vectorcall.c _testcapi/heaptype.c _testcapi/abstract.c _testcapi/unicode.c _testcapi/dict.c _testcapi/set.c _testcapi/list.c _testcapi/tuple.c _testcapi/getargs.c _testcapi/datetime.c _testcapi/docstring.c _testcapi/mem.c _testcapi/watchers.c _testcapi/long.c _testcapi/float.c _testcapi/complex.c _testcapi/numbers.c _testcapi/structmember.c _testcapi/exceptions.c _testcapi/code.c _testcapi/buffer.c _testcapi/pyatomic.c _testcapi/file.c _testcapi/codec.c _testcapi/immortal.c _testcapi/gc.c _testcapi/hash.c _testcapi/time.c
@MODULE__TESTCAPI_TRUE@_testcapi _testcapimodule.c _testcapi/vectorcall.c _testcapi/heaptype.c _testcapi/abstract.c _testcapi/unicode.c _testcapi/dict.c _testcapi/set.c _testcapi/list.c _testcapi/tuple.c _testcapi/getargs.c _testcapi/datetime.c _testcapi/docstring.c _testcapi/mem.c _testcapi/watchers.c _testcapi/long.c _testcapi/float.c _testcapi/complex.c _testcapi/numbers.c _testcapi/structmember.c _testcapi/exceptions.c _testcapi/code.c _testcapi/buffer.c _testcapi/pyatomic.c _testcapi/file.c _testcapi/codec.c _testcapi/immortal.c _testcapi/gc.c _testcapi/hash.c _testcapi/time.c _testcapi/bytes.c
@MODULE__TESTLIMITEDCAPI_TRUE@_testlimitedcapi _testlimitedcapi.c _testlimitedcapi/abstract.c _testlimitedcapi/bytearray.c _testlimitedcapi/bytes.c _testlimitedcapi/complex.c _testlimitedcapi/dict.c _testlimitedcapi/float.c _testlimitedcapi/heaptype_relative.c _testlimitedcapi/list.c _testlimitedcapi/long.c _testlimitedcapi/object.c _testlimitedcapi/pyos.c _testlimitedcapi/set.c _testlimitedcapi/sys.c _testlimitedcapi/unicode.c _testlimitedcapi/vectorcall_limited.c
@MODULE__TESTCLINIC_TRUE@_testclinic _testclinic.c
@MODULE__TESTCLINIC_LIMITED_TRUE@_testclinic_limited _testclinic_limited.c

53
Modules/_testcapi/bytes.c Normal file
View File

@ -0,0 +1,53 @@
#include "parts.h"
#include "util.h"
/* Test _PyBytes_Resize() */
static PyObject *
bytes_resize(PyObject *Py_UNUSED(module), PyObject *args)
{
PyObject *obj;
Py_ssize_t newsize;
int new;
if (!PyArg_ParseTuple(args, "Onp", &obj, &newsize, &new))
return NULL;
NULLABLE(obj);
if (new) {
assert(obj != NULL);
assert(PyBytes_CheckExact(obj));
PyObject *newobj = PyBytes_FromStringAndSize(NULL, PyBytes_Size(obj));
if (newobj == NULL) {
return NULL;
}
memcpy(PyBytes_AsString(newobj), PyBytes_AsString(obj), PyBytes_Size(obj));
obj = newobj;
}
else {
Py_XINCREF(obj);
}
if (_PyBytes_Resize(&obj, newsize) < 0) {
assert(obj == NULL);
}
else {
assert(obj != NULL);
}
return obj;
}
static PyMethodDef test_methods[] = {
{"bytes_resize", bytes_resize, METH_VARARGS},
{NULL},
};
int
_PyTestCapi_Init_Bytes(PyObject *m)
{
if (PyModule_AddFunctions(m, test_methods) < 0) {
return -1;
}
return 0;
}

View File

@ -31,6 +31,7 @@
int _PyTestCapi_Init_Vectorcall(PyObject *module);
int _PyTestCapi_Init_Heaptype(PyObject *module);
int _PyTestCapi_Init_Abstract(PyObject *module);
int _PyTestCapi_Init_Bytes(PyObject *module);
int _PyTestCapi_Init_Unicode(PyObject *module);
int _PyTestCapi_Init_GetArgs(PyObject *module);
int _PyTestCapi_Init_DateTime(PyObject *module);

View File

@ -3971,6 +3971,9 @@ PyInit__testcapi(void)
if (_PyTestCapi_Init_Abstract(m) < 0) {
return NULL;
}
if (_PyTestCapi_Init_Bytes(m) < 0) {
return NULL;
}
if (_PyTestCapi_Init_Unicode(m) < 0) {
return NULL;
}

View File

@ -3025,11 +3025,9 @@ PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
/* The following function breaks the notion that bytes are immutable:
it changes the size of a bytes object. We get away with this only if there
is only one module referencing the object. You can also think of it
it changes the size of a bytes object. You can think of it
as creating a new bytes object and destroying the old one, only
more efficiently. In any case, don't use this if the bytes object may
already be known to some other part of the code...
more efficiently.
Note that if there's not enough memory to resize the bytes object, the
original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
memory" exception is set, and -1 is returned. Else (on success) 0 is
@ -3045,28 +3043,40 @@ _PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
PyBytesObject *sv;
v = *pv;
if (!PyBytes_Check(v) || newsize < 0) {
goto error;
*pv = 0;
Py_DECREF(v);
PyErr_BadInternalCall();
return -1;
}
if (Py_SIZE(v) == newsize) {
Py_ssize_t oldsize = PyBytes_GET_SIZE(v);
if (oldsize == newsize) {
/* return early if newsize equals to v->ob_size */
return 0;
}
if (Py_SIZE(v) == 0) {
if (newsize == 0) {
return 0;
}
if (oldsize == 0) {
*pv = _PyBytes_FromSize(newsize, 0);
Py_DECREF(v);
return (*pv == NULL) ? -1 : 0;
}
if (Py_REFCNT(v) != 1) {
goto error;
}
if (newsize == 0) {
*pv = bytes_get_empty();
Py_DECREF(v);
return 0;
}
if (Py_REFCNT(v) != 1) {
if (oldsize < newsize) {
*pv = _PyBytes_FromSize(newsize, 0);
if (*pv) {
memcpy(PyBytes_AS_STRING(*pv), PyBytes_AS_STRING(v), oldsize);
}
}
else {
*pv = PyBytes_FromStringAndSize(PyBytes_AS_STRING(v), newsize);
}
Py_DECREF(v);
return (*pv == NULL) ? -1 : 0;
}
#ifdef Py_TRACE_REFS
_Py_ForgetReference(v);
#endif
@ -3089,11 +3099,6 @@ _Py_COMP_DIAG_IGNORE_DEPR_DECLS
sv->ob_shash = -1; /* invalidate cached hash value */
_Py_COMP_DIAG_POP
return 0;
error:
*pv = 0;
Py_DECREF(v);
PyErr_BadInternalCall();
return -1;
}

View File

@ -80,13 +80,7 @@ PyFile_GetLine(PyObject *f, int n)
"EOF when reading a line");
}
else if (s[len-1] == '\n') {
if (Py_REFCNT(result) == 1)
_PyBytes_Resize(&result, len-1);
else {
PyObject *v;
v = PyBytes_FromStringAndSize(s, len-1);
Py_SETREF(result, v);
}
(void) _PyBytes_Resize(&result, len-1);
}
}
if (n < 0 && result != NULL && PyUnicode_Check(result)) {

View File

@ -98,6 +98,7 @@
<ClCompile Include="..\Modules\_testcapi\vectorcall.c" />
<ClCompile Include="..\Modules\_testcapi\heaptype.c" />
<ClCompile Include="..\Modules\_testcapi\abstract.c" />
<ClCompile Include="..\Modules\_testcapi\bytes.c" />
<ClCompile Include="..\Modules\_testcapi\unicode.c" />
<ClCompile Include="..\Modules\_testcapi\dict.c" />
<ClCompile Include="..\Modules\_testcapi\set.c" />

View File

@ -30,6 +30,9 @@
<ClCompile Include="..\Modules\_testcapi\abstract.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\Modules\_testcapi\bytes.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\Modules\_testcapi\unicode.c">
<Filter>Source Files</Filter>
</ClCompile>