From 2ef73be891eb95064e268341e38e81d008add480 Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Tue, 26 Jul 2022 04:10:23 +0200 Subject: [PATCH] gh-91247: Use memcpy for list and tuple repeat (#91482) * Add _Py_memory_repeat function to pycore_list * Add _Py_RefcntAdd function to pycore_object * Use the new functions in tuplerepeat, list_repeat, and list_inplace_repeat --- Include/internal/pycore_list.h | 13 ++++ Include/internal/pycore_object.h | 10 +++ .../2022-03-22-13-12-27.bpo-47091.tJcy-P.rst | 1 + Objects/listobject.c | 73 ++++++++----------- Objects/tupleobject.c | 41 +++++------ 5 files changed, 73 insertions(+), 65 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-03-22-13-12-27.bpo-47091.tJcy-P.rst diff --git a/Include/internal/pycore_list.h b/Include/internal/pycore_list.h index 7721f6c2e22..691d13bc8d9 100644 --- a/Include/internal/pycore_list.h +++ b/Include/internal/pycore_list.h @@ -56,6 +56,19 @@ _PyList_AppendTakeRef(PyListObject *self, PyObject *newitem) return _PyList_AppendTakeRefListResize(self, newitem); } +// Repeat the bytes of a buffer in place +static inline void +_Py_memory_repeat(char* dest, Py_ssize_t len_dest, Py_ssize_t len_src) +{ + assert(len_src > 0); + Py_ssize_t copied = len_src; + while (copied < len_dest) { + Py_ssize_t bytes_to_copy = Py_MIN(copied, len_dest - copied); + memcpy(dest + copied, dest, bytes_to_copy); + copied += bytes_to_copy; + } +} + typedef struct { PyObject_HEAD Py_ssize_t it_index; diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index 1bb2da71704..c7490799d81 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -37,6 +37,16 @@ PyAPI_FUNC(void) _Py_NO_RETURN _Py_FatalRefcountErrorFunc( #define _Py_FatalRefcountError(message) \ _Py_FatalRefcountErrorFunc(__func__, (message)) +// Increment reference count by n +static inline void _Py_RefcntAdd(PyObject* op, Py_ssize_t n) +{ +#ifdef Py_REF_DEBUG + _Py_RefTotal += n; +#endif + op->ob_refcnt += n; +} +#define _Py_RefcntAdd(op, n) _Py_RefcntAdd(_PyObject_CAST(op), n) + static inline void _Py_DECREF_SPECIALIZED(PyObject *op, const destructor destruct) { diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-03-22-13-12-27.bpo-47091.tJcy-P.rst b/Misc/NEWS.d/next/Core and Builtins/2022-03-22-13-12-27.bpo-47091.tJcy-P.rst new file mode 100644 index 00000000000..72a39b5ebc7 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-03-22-13-12-27.bpo-47091.tJcy-P.rst @@ -0,0 +1 @@ +Improve performance of repetition of :class:`list` and :class:`tuple` by using ``memcpy`` to copy data and performing the reference increments in one step. diff --git a/Objects/listobject.c b/Objects/listobject.c index 822954bee43..9afa68f9fe1 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -551,47 +551,41 @@ list_concat(PyListObject *a, PyObject *bb) static PyObject * list_repeat(PyListObject *a, Py_ssize_t n) { - Py_ssize_t size; - PyListObject *np; - if (n < 0) - n = 0; - if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) - return PyErr_NoMemory(); - size = Py_SIZE(a) * n; - if (size == 0) + const Py_ssize_t input_size = Py_SIZE(a); + if (input_size == 0 || n <= 0) return PyList_New(0); - np = (PyListObject *) list_new_prealloc(size); + assert(n > 0); + + if (input_size > PY_SSIZE_T_MAX / n) + return PyErr_NoMemory(); + Py_ssize_t output_size = input_size * n; + + PyListObject *np = (PyListObject *) list_new_prealloc(output_size); if (np == NULL) return NULL; + PyObject **dest = np->ob_item; - PyObject **dest_end = dest + size; - if (Py_SIZE(a) == 1) { + if (input_size == 1) { PyObject *elem = a->ob_item[0]; - Py_SET_REFCNT(elem, Py_REFCNT(elem) + n); -#ifdef Py_REF_DEBUG - _Py_RefTotal += n; -#endif + _Py_RefcntAdd(elem, n); + PyObject **dest_end = dest + output_size; while (dest < dest_end) { *dest++ = elem; } } else { PyObject **src = a->ob_item; - PyObject **src_end = src + Py_SIZE(a); + PyObject **src_end = src + input_size; while (src < src_end) { - Py_SET_REFCNT(*src, Py_REFCNT(*src) + n); -#ifdef Py_REF_DEBUG - _Py_RefTotal += n; -#endif - *dest++ = *src++; - } - // Now src chases after dest in the same buffer - src = np->ob_item; - while (dest < dest_end) { + _Py_RefcntAdd(*src, n); *dest++ = *src++; } + + _Py_memory_repeat((char *)np->ob_item, sizeof(PyObject *)*output_size, + sizeof(PyObject *)*input_size); } - Py_SET_SIZE(np, size); + + Py_SET_SIZE(np, output_size); return (PyObject *) np; } @@ -743,12 +737,8 @@ PyList_SetSlice(PyObject *a, Py_ssize_t ilow, Py_ssize_t ihigh, PyObject *v) static PyObject * list_inplace_repeat(PyListObject *self, Py_ssize_t n) { - PyObject **items; - Py_ssize_t size, i, j, p; - - - size = PyList_GET_SIZE(self); - if (size == 0 || n == 1) { + Py_ssize_t input_size = PyList_GET_SIZE(self); + if (input_size == 0 || n == 1) { Py_INCREF(self); return (PyObject *)self; } @@ -759,22 +749,21 @@ list_inplace_repeat(PyListObject *self, Py_ssize_t n) return (PyObject *)self; } - if (size > PY_SSIZE_T_MAX / n) { + if (input_size > PY_SSIZE_T_MAX / n) { return PyErr_NoMemory(); } + Py_ssize_t output_size = input_size * n; - if (list_resize(self, size*n) < 0) + if (list_resize(self, output_size) < 0) return NULL; - p = size; - items = self->ob_item; - for (i = 1; i < n; i++) { /* Start counting at 1, not 0 */ - for (j = 0; j < size; j++) { - PyObject *o = items[j]; - Py_INCREF(o); - items[p++] = o; - } + PyObject **items = self->ob_item; + for (Py_ssize_t j = 0; j < input_size; j++) { + _Py_RefcntAdd(items[j], n-1); } + _Py_memory_repeat((char *)items, sizeof(PyObject *)*output_size, + sizeof(PyObject *)*input_size); + Py_INCREF(self); return (PyObject *)self; } diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index dfb8597b876..240af0a9075 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -495,9 +495,8 @@ tupleconcat(PyTupleObject *a, PyObject *bb) static PyObject * tuplerepeat(PyTupleObject *a, Py_ssize_t n) { - Py_ssize_t size; - PyTupleObject *np; - if (Py_SIZE(a) == 0 || n == 1) { + const Py_ssize_t input_size = Py_SIZE(a); + if (input_size == 0 || n == 1) { if (PyTuple_CheckExact(a)) { /* Since tuples are immutable, we can return a shared copy in this case */ @@ -505,42 +504,38 @@ tuplerepeat(PyTupleObject *a, Py_ssize_t n) return (PyObject *)a; } } - if (Py_SIZE(a) == 0 || n <= 0) { + if (input_size == 0 || n <= 0) { return tuple_get_empty(); } - if (n > PY_SSIZE_T_MAX / Py_SIZE(a)) + assert(n>0); + + if (input_size > PY_SSIZE_T_MAX / n) return PyErr_NoMemory(); - size = Py_SIZE(a) * n; - np = tuple_alloc(size); + Py_ssize_t output_size = input_size * n; + + PyTupleObject *np = tuple_alloc(output_size); if (np == NULL) return NULL; + PyObject **dest = np->ob_item; - PyObject **dest_end = dest + size; - if (Py_SIZE(a) == 1) { + if (input_size == 1) { PyObject *elem = a->ob_item[0]; - Py_SET_REFCNT(elem, Py_REFCNT(elem) + n); -#ifdef Py_REF_DEBUG - _Py_RefTotal += n; -#endif + _Py_RefcntAdd(elem, n); + PyObject **dest_end = dest + output_size; while (dest < dest_end) { *dest++ = elem; } } else { PyObject **src = a->ob_item; - PyObject **src_end = src + Py_SIZE(a); + PyObject **src_end = src + input_size; while (src < src_end) { - Py_SET_REFCNT(*src, Py_REFCNT(*src) + n); -#ifdef Py_REF_DEBUG - _Py_RefTotal += n; -#endif - *dest++ = *src++; - } - // Now src chases after dest in the same buffer - src = np->ob_item; - while (dest < dest_end) { + _Py_RefcntAdd(*src, n); *dest++ = *src++; } + + _Py_memory_repeat((char *)np->ob_item, sizeof(PyObject *)*output_size, + sizeof(PyObject *)*input_size); } _PyObject_GC_TRACK(np); return (PyObject *) np;