From 0c1a42cf9c8cd0d4534d5c1d58f118ce7c5c446e Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 25 Mar 2024 17:32:11 +0200 Subject: [PATCH] gh-87193: Support bytes objects with refcount > 1 in _PyBytes_Resize() (GH-117160) Create a new bytes object and destroy the old one if it has refcount > 1. --- Doc/c-api/bytes.rst | 8 +-- Lib/test/test_capi/test_bytes.py | 30 +++++++++++ ...4-03-22-19-29-24.gh-issue-87193.u7O-jY.rst | 3 ++ Modules/Setup.stdlib.in | 2 +- Modules/_testcapi/bytes.c | 53 +++++++++++++++++++ Modules/_testcapi/parts.h | 1 + Modules/_testcapimodule.c | 3 ++ Objects/bytesobject.c | 41 +++++++------- Objects/fileobject.c | 8 +-- PCbuild/_testcapi.vcxproj | 1 + PCbuild/_testcapi.vcxproj.filters | 3 ++ 11 files changed, 123 insertions(+), 30 deletions(-) create mode 100644 Misc/NEWS.d/next/C API/2024-03-22-19-29-24.gh-issue-87193.u7O-jY.rst create mode 100644 Modules/_testcapi/bytes.c diff --git a/Doc/c-api/bytes.rst b/Doc/c-api/bytes.rst index 4790d3b2da4..bca78a9c369 100644 --- a/Doc/c-api/bytes.rst +++ b/Doc/c-api/bytes.rst @@ -191,10 +191,10 @@ called with a non-bytes parameter. .. c:function:: int _PyBytes_Resize(PyObject **bytes, Py_ssize_t newsize) - A way to resize a bytes object even though it is "immutable". Only use this - to build up a brand new bytes object; don't use this if the bytes may already - be known in other parts of the code. It is an error to call this function if - the refcount on the input bytes object is not one. Pass the address of an + Resize a bytes object. *newsize* will be the new length of the bytes object. + You can think of it as creating a new bytes object and destroying the old + one, only more efficiently. + Pass the address of an existing bytes object as an lvalue (it may be written into), and the new size desired. On success, *\*bytes* holds the resized bytes object and ``0`` is returned; the address in *\*bytes* may differ from its input value. If the diff --git a/Lib/test/test_capi/test_bytes.py b/Lib/test/test_capi/test_bytes.py index a2ba7708f8f..f14d5545c82 100644 --- a/Lib/test/test_capi/test_bytes.py +++ b/Lib/test/test_capi/test_bytes.py @@ -2,6 +2,7 @@ import unittest from test.support import import_helper _testlimitedcapi = import_helper.import_module('_testlimitedcapi') +_testcapi = import_helper.import_module('_testcapi') from _testcapi import PY_SSIZE_T_MIN, PY_SSIZE_T_MAX NULL = None @@ -217,6 +218,35 @@ class CAPITest(unittest.TestCase): # CRASHES decodeescape(b'abc', NULL, -1) # CRASHES decodeescape(NULL, NULL, 1) + def test_resize(self): + """Test _PyBytes_Resize()""" + resize = _testcapi.bytes_resize + + for new in True, False: + self.assertEqual(resize(b'abc', 0, new), b'') + self.assertEqual(resize(b'abc', 1, new), b'a') + self.assertEqual(resize(b'abc', 2, new), b'ab') + self.assertEqual(resize(b'abc', 3, new), b'abc') + b = resize(b'abc', 4, new) + self.assertEqual(len(b), 4) + self.assertEqual(b[:3], b'abc') + + self.assertEqual(resize(b'a', 0, new), b'') + self.assertEqual(resize(b'a', 1, new), b'a') + b = resize(b'a', 2, new) + self.assertEqual(len(b), 2) + self.assertEqual(b[:1], b'a') + + self.assertEqual(resize(b'', 0, new), b'') + self.assertEqual(len(resize(b'', 1, new)), 1) + self.assertEqual(len(resize(b'', 2, new)), 2) + + self.assertRaises(SystemError, resize, b'abc', -1, False) + self.assertRaises(SystemError, resize, bytearray(b'abc'), 3, False) + + # CRASHES resize(NULL, 0, False) + # CRASHES resize(NULL, 3, False) + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/C API/2024-03-22-19-29-24.gh-issue-87193.u7O-jY.rst b/Misc/NEWS.d/next/C API/2024-03-22-19-29-24.gh-issue-87193.u7O-jY.rst new file mode 100644 index 00000000000..cb921a9c7bf --- /dev/null +++ b/Misc/NEWS.d/next/C API/2024-03-22-19-29-24.gh-issue-87193.u7O-jY.rst @@ -0,0 +1,3 @@ +:c:func:`_PyBytes_Resize` can now be called for bytes objects with reference +count > 1, including 1-byte bytes objects. It creates a new bytes object and +destroys the old one if it has reference count > 1. diff --git a/Modules/Setup.stdlib.in b/Modules/Setup.stdlib.in index 09d6f3b2bb7..ff5c05f88d0 100644 --- a/Modules/Setup.stdlib.in +++ b/Modules/Setup.stdlib.in @@ -162,7 +162,7 @@ @MODULE__XXTESTFUZZ_TRUE@_xxtestfuzz _xxtestfuzz/_xxtestfuzz.c _xxtestfuzz/fuzzer.c @MODULE__TESTBUFFER_TRUE@_testbuffer _testbuffer.c @MODULE__TESTINTERNALCAPI_TRUE@_testinternalcapi _testinternalcapi.c _testinternalcapi/test_lock.c _testinternalcapi/pytime.c _testinternalcapi/set.c _testinternalcapi/test_critical_sections.c -@MODULE__TESTCAPI_TRUE@_testcapi _testcapimodule.c _testcapi/vectorcall.c _testcapi/heaptype.c _testcapi/abstract.c _testcapi/unicode.c _testcapi/dict.c _testcapi/set.c _testcapi/list.c _testcapi/tuple.c _testcapi/getargs.c _testcapi/datetime.c _testcapi/docstring.c _testcapi/mem.c _testcapi/watchers.c _testcapi/long.c _testcapi/float.c _testcapi/complex.c _testcapi/numbers.c _testcapi/structmember.c _testcapi/exceptions.c _testcapi/code.c _testcapi/buffer.c _testcapi/pyatomic.c _testcapi/file.c _testcapi/codec.c _testcapi/immortal.c _testcapi/gc.c _testcapi/hash.c _testcapi/time.c +@MODULE__TESTCAPI_TRUE@_testcapi _testcapimodule.c _testcapi/vectorcall.c _testcapi/heaptype.c _testcapi/abstract.c _testcapi/unicode.c _testcapi/dict.c _testcapi/set.c _testcapi/list.c _testcapi/tuple.c _testcapi/getargs.c _testcapi/datetime.c _testcapi/docstring.c _testcapi/mem.c _testcapi/watchers.c _testcapi/long.c _testcapi/float.c _testcapi/complex.c _testcapi/numbers.c _testcapi/structmember.c _testcapi/exceptions.c _testcapi/code.c _testcapi/buffer.c _testcapi/pyatomic.c _testcapi/file.c _testcapi/codec.c _testcapi/immortal.c _testcapi/gc.c _testcapi/hash.c _testcapi/time.c _testcapi/bytes.c @MODULE__TESTLIMITEDCAPI_TRUE@_testlimitedcapi _testlimitedcapi.c _testlimitedcapi/abstract.c _testlimitedcapi/bytearray.c _testlimitedcapi/bytes.c _testlimitedcapi/complex.c _testlimitedcapi/dict.c _testlimitedcapi/float.c _testlimitedcapi/heaptype_relative.c _testlimitedcapi/list.c _testlimitedcapi/long.c _testlimitedcapi/object.c _testlimitedcapi/pyos.c _testlimitedcapi/set.c _testlimitedcapi/sys.c _testlimitedcapi/unicode.c _testlimitedcapi/vectorcall_limited.c @MODULE__TESTCLINIC_TRUE@_testclinic _testclinic.c @MODULE__TESTCLINIC_LIMITED_TRUE@_testclinic_limited _testclinic_limited.c diff --git a/Modules/_testcapi/bytes.c b/Modules/_testcapi/bytes.c new file mode 100644 index 00000000000..02294d8887a --- /dev/null +++ b/Modules/_testcapi/bytes.c @@ -0,0 +1,53 @@ +#include "parts.h" +#include "util.h" + + +/* Test _PyBytes_Resize() */ +static PyObject * +bytes_resize(PyObject *Py_UNUSED(module), PyObject *args) +{ + PyObject *obj; + Py_ssize_t newsize; + int new; + + if (!PyArg_ParseTuple(args, "Onp", &obj, &newsize, &new)) + return NULL; + + NULLABLE(obj); + if (new) { + assert(obj != NULL); + assert(PyBytes_CheckExact(obj)); + PyObject *newobj = PyBytes_FromStringAndSize(NULL, PyBytes_Size(obj)); + if (newobj == NULL) { + return NULL; + } + memcpy(PyBytes_AsString(newobj), PyBytes_AsString(obj), PyBytes_Size(obj)); + obj = newobj; + } + else { + Py_XINCREF(obj); + } + if (_PyBytes_Resize(&obj, newsize) < 0) { + assert(obj == NULL); + } + else { + assert(obj != NULL); + } + return obj; +} + + +static PyMethodDef test_methods[] = { + {"bytes_resize", bytes_resize, METH_VARARGS}, + {NULL}, +}; + +int +_PyTestCapi_Init_Bytes(PyObject *m) +{ + if (PyModule_AddFunctions(m, test_methods) < 0) { + return -1; + } + + return 0; +} diff --git a/Modules/_testcapi/parts.h b/Modules/_testcapi/parts.h index f9bdd830775..e7c868f6bcf 100644 --- a/Modules/_testcapi/parts.h +++ b/Modules/_testcapi/parts.h @@ -31,6 +31,7 @@ int _PyTestCapi_Init_Vectorcall(PyObject *module); int _PyTestCapi_Init_Heaptype(PyObject *module); int _PyTestCapi_Init_Abstract(PyObject *module); +int _PyTestCapi_Init_Bytes(PyObject *module); int _PyTestCapi_Init_Unicode(PyObject *module); int _PyTestCapi_Init_GetArgs(PyObject *module); int _PyTestCapi_Init_DateTime(PyObject *module); diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index 16b5e1d257e..3c30381be6d 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -3971,6 +3971,9 @@ PyInit__testcapi(void) if (_PyTestCapi_Init_Abstract(m) < 0) { return NULL; } + if (_PyTestCapi_Init_Bytes(m) < 0) { + return NULL; + } if (_PyTestCapi_Init_Unicode(m) < 0) { return NULL; } diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 26227dd2511..256e01f54f0 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -3025,11 +3025,9 @@ PyBytes_ConcatAndDel(PyObject **pv, PyObject *w) /* The following function breaks the notion that bytes are immutable: - it changes the size of a bytes object. We get away with this only if there - is only one module referencing the object. You can also think of it + it changes the size of a bytes object. You can think of it as creating a new bytes object and destroying the old one, only - more efficiently. In any case, don't use this if the bytes object may - already be known to some other part of the code... + more efficiently. Note that if there's not enough memory to resize the bytes object, the original bytes object at *pv is deallocated, *pv is set to NULL, an "out of memory" exception is set, and -1 is returned. Else (on success) 0 is @@ -3045,28 +3043,40 @@ _PyBytes_Resize(PyObject **pv, Py_ssize_t newsize) PyBytesObject *sv; v = *pv; if (!PyBytes_Check(v) || newsize < 0) { - goto error; + *pv = 0; + Py_DECREF(v); + PyErr_BadInternalCall(); + return -1; } - if (Py_SIZE(v) == newsize) { + Py_ssize_t oldsize = PyBytes_GET_SIZE(v); + if (oldsize == newsize) { /* return early if newsize equals to v->ob_size */ return 0; } - if (Py_SIZE(v) == 0) { - if (newsize == 0) { - return 0; - } + if (oldsize == 0) { *pv = _PyBytes_FromSize(newsize, 0); Py_DECREF(v); return (*pv == NULL) ? -1 : 0; } - if (Py_REFCNT(v) != 1) { - goto error; - } if (newsize == 0) { *pv = bytes_get_empty(); Py_DECREF(v); return 0; } + if (Py_REFCNT(v) != 1) { + if (oldsize < newsize) { + *pv = _PyBytes_FromSize(newsize, 0); + if (*pv) { + memcpy(PyBytes_AS_STRING(*pv), PyBytes_AS_STRING(v), oldsize); + } + } + else { + *pv = PyBytes_FromStringAndSize(PyBytes_AS_STRING(v), newsize); + } + Py_DECREF(v); + return (*pv == NULL) ? -1 : 0; + } + #ifdef Py_TRACE_REFS _Py_ForgetReference(v); #endif @@ -3089,11 +3099,6 @@ _Py_COMP_DIAG_IGNORE_DEPR_DECLS sv->ob_shash = -1; /* invalidate cached hash value */ _Py_COMP_DIAG_POP return 0; -error: - *pv = 0; - Py_DECREF(v); - PyErr_BadInternalCall(); - return -1; } diff --git a/Objects/fileobject.c b/Objects/fileobject.c index e30ab952dff..bae49d367b6 100644 --- a/Objects/fileobject.c +++ b/Objects/fileobject.c @@ -80,13 +80,7 @@ PyFile_GetLine(PyObject *f, int n) "EOF when reading a line"); } else if (s[len-1] == '\n') { - if (Py_REFCNT(result) == 1) - _PyBytes_Resize(&result, len-1); - else { - PyObject *v; - v = PyBytes_FromStringAndSize(s, len-1); - Py_SETREF(result, v); - } + (void) _PyBytes_Resize(&result, len-1); } } if (n < 0 && result != NULL && PyUnicode_Check(result)) { diff --git a/PCbuild/_testcapi.vcxproj b/PCbuild/_testcapi.vcxproj index 6522cb1fcf5..615d73d5e00 100644 --- a/PCbuild/_testcapi.vcxproj +++ b/PCbuild/_testcapi.vcxproj @@ -98,6 +98,7 @@ + diff --git a/PCbuild/_testcapi.vcxproj.filters b/PCbuild/_testcapi.vcxproj.filters index 772a9a86151..0c11e918556 100644 --- a/PCbuild/_testcapi.vcxproj.filters +++ b/PCbuild/_testcapi.vcxproj.filters @@ -30,6 +30,9 @@ Source Files + + Source Files + Source Files