mirror of https://github.com/python/cpython
gh-91247: Use memcpy for list and tuple repeat (#91482)
* Add _Py_memory_repeat function to pycore_list * Add _Py_RefcntAdd function to pycore_object * Use the new functions in tuplerepeat, list_repeat, and list_inplace_repeat
This commit is contained in:
parent
27055d766a
commit
2ef73be891
|
@ -56,6 +56,19 @@ _PyList_AppendTakeRef(PyListObject *self, PyObject *newitem)
|
||||||
return _PyList_AppendTakeRefListResize(self, newitem);
|
return _PyList_AppendTakeRefListResize(self, newitem);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Repeat the bytes of a buffer in place
|
||||||
|
static inline void
|
||||||
|
_Py_memory_repeat(char* dest, Py_ssize_t len_dest, Py_ssize_t len_src)
|
||||||
|
{
|
||||||
|
assert(len_src > 0);
|
||||||
|
Py_ssize_t copied = len_src;
|
||||||
|
while (copied < len_dest) {
|
||||||
|
Py_ssize_t bytes_to_copy = Py_MIN(copied, len_dest - copied);
|
||||||
|
memcpy(dest + copied, dest, bytes_to_copy);
|
||||||
|
copied += bytes_to_copy;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
PyObject_HEAD
|
PyObject_HEAD
|
||||||
Py_ssize_t it_index;
|
Py_ssize_t it_index;
|
||||||
|
|
|
@ -37,6 +37,16 @@ PyAPI_FUNC(void) _Py_NO_RETURN _Py_FatalRefcountErrorFunc(
|
||||||
#define _Py_FatalRefcountError(message) \
|
#define _Py_FatalRefcountError(message) \
|
||||||
_Py_FatalRefcountErrorFunc(__func__, (message))
|
_Py_FatalRefcountErrorFunc(__func__, (message))
|
||||||
|
|
||||||
|
// Increment reference count by n
|
||||||
|
static inline void _Py_RefcntAdd(PyObject* op, Py_ssize_t n)
|
||||||
|
{
|
||||||
|
#ifdef Py_REF_DEBUG
|
||||||
|
_Py_RefTotal += n;
|
||||||
|
#endif
|
||||||
|
op->ob_refcnt += n;
|
||||||
|
}
|
||||||
|
#define _Py_RefcntAdd(op, n) _Py_RefcntAdd(_PyObject_CAST(op), n)
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
_Py_DECREF_SPECIALIZED(PyObject *op, const destructor destruct)
|
_Py_DECREF_SPECIALIZED(PyObject *op, const destructor destruct)
|
||||||
{
|
{
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
Improve performance of repetition of :class:`list` and :class:`tuple` by using ``memcpy`` to copy data and performing the reference increments in one step.
|
|
@ -551,47 +551,41 @@ list_concat(PyListObject *a, PyObject *bb)
|
||||||
static PyObject *
|
static PyObject *
|
||||||
list_repeat(PyListObject *a, Py_ssize_t n)
|
list_repeat(PyListObject *a, Py_ssize_t n)
|
||||||
{
|
{
|
||||||
Py_ssize_t size;
|
const Py_ssize_t input_size = Py_SIZE(a);
|
||||||
PyListObject *np;
|
if (input_size == 0 || n <= 0)
|
||||||
if (n < 0)
|
|
||||||
n = 0;
|
|
||||||
if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n)
|
|
||||||
return PyErr_NoMemory();
|
|
||||||
size = Py_SIZE(a) * n;
|
|
||||||
if (size == 0)
|
|
||||||
return PyList_New(0);
|
return PyList_New(0);
|
||||||
np = (PyListObject *) list_new_prealloc(size);
|
assert(n > 0);
|
||||||
|
|
||||||
|
if (input_size > PY_SSIZE_T_MAX / n)
|
||||||
|
return PyErr_NoMemory();
|
||||||
|
Py_ssize_t output_size = input_size * n;
|
||||||
|
|
||||||
|
PyListObject *np = (PyListObject *) list_new_prealloc(output_size);
|
||||||
if (np == NULL)
|
if (np == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
PyObject **dest = np->ob_item;
|
PyObject **dest = np->ob_item;
|
||||||
PyObject **dest_end = dest + size;
|
if (input_size == 1) {
|
||||||
if (Py_SIZE(a) == 1) {
|
|
||||||
PyObject *elem = a->ob_item[0];
|
PyObject *elem = a->ob_item[0];
|
||||||
Py_SET_REFCNT(elem, Py_REFCNT(elem) + n);
|
_Py_RefcntAdd(elem, n);
|
||||||
#ifdef Py_REF_DEBUG
|
PyObject **dest_end = dest + output_size;
|
||||||
_Py_RefTotal += n;
|
|
||||||
#endif
|
|
||||||
while (dest < dest_end) {
|
while (dest < dest_end) {
|
||||||
*dest++ = elem;
|
*dest++ = elem;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
PyObject **src = a->ob_item;
|
PyObject **src = a->ob_item;
|
||||||
PyObject **src_end = src + Py_SIZE(a);
|
PyObject **src_end = src + input_size;
|
||||||
while (src < src_end) {
|
while (src < src_end) {
|
||||||
Py_SET_REFCNT(*src, Py_REFCNT(*src) + n);
|
_Py_RefcntAdd(*src, n);
|
||||||
#ifdef Py_REF_DEBUG
|
|
||||||
_Py_RefTotal += n;
|
|
||||||
#endif
|
|
||||||
*dest++ = *src++;
|
*dest++ = *src++;
|
||||||
}
|
}
|
||||||
// Now src chases after dest in the same buffer
|
|
||||||
src = np->ob_item;
|
_Py_memory_repeat((char *)np->ob_item, sizeof(PyObject *)*output_size,
|
||||||
while (dest < dest_end) {
|
sizeof(PyObject *)*input_size);
|
||||||
*dest++ = *src++;
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
Py_SET_SIZE(np, size);
|
Py_SET_SIZE(np, output_size);
|
||||||
return (PyObject *) np;
|
return (PyObject *) np;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -743,12 +737,8 @@ PyList_SetSlice(PyObject *a, Py_ssize_t ilow, Py_ssize_t ihigh, PyObject *v)
|
||||||
static PyObject *
|
static PyObject *
|
||||||
list_inplace_repeat(PyListObject *self, Py_ssize_t n)
|
list_inplace_repeat(PyListObject *self, Py_ssize_t n)
|
||||||
{
|
{
|
||||||
PyObject **items;
|
Py_ssize_t input_size = PyList_GET_SIZE(self);
|
||||||
Py_ssize_t size, i, j, p;
|
if (input_size == 0 || n == 1) {
|
||||||
|
|
||||||
|
|
||||||
size = PyList_GET_SIZE(self);
|
|
||||||
if (size == 0 || n == 1) {
|
|
||||||
Py_INCREF(self);
|
Py_INCREF(self);
|
||||||
return (PyObject *)self;
|
return (PyObject *)self;
|
||||||
}
|
}
|
||||||
|
@ -759,22 +749,21 @@ list_inplace_repeat(PyListObject *self, Py_ssize_t n)
|
||||||
return (PyObject *)self;
|
return (PyObject *)self;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (size > PY_SSIZE_T_MAX / n) {
|
if (input_size > PY_SSIZE_T_MAX / n) {
|
||||||
return PyErr_NoMemory();
|
return PyErr_NoMemory();
|
||||||
}
|
}
|
||||||
|
Py_ssize_t output_size = input_size * n;
|
||||||
|
|
||||||
if (list_resize(self, size*n) < 0)
|
if (list_resize(self, output_size) < 0)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
p = size;
|
PyObject **items = self->ob_item;
|
||||||
items = self->ob_item;
|
for (Py_ssize_t j = 0; j < input_size; j++) {
|
||||||
for (i = 1; i < n; i++) { /* Start counting at 1, not 0 */
|
_Py_RefcntAdd(items[j], n-1);
|
||||||
for (j = 0; j < size; j++) {
|
|
||||||
PyObject *o = items[j];
|
|
||||||
Py_INCREF(o);
|
|
||||||
items[p++] = o;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
_Py_memory_repeat((char *)items, sizeof(PyObject *)*output_size,
|
||||||
|
sizeof(PyObject *)*input_size);
|
||||||
|
|
||||||
Py_INCREF(self);
|
Py_INCREF(self);
|
||||||
return (PyObject *)self;
|
return (PyObject *)self;
|
||||||
}
|
}
|
||||||
|
|
|
@ -495,9 +495,8 @@ tupleconcat(PyTupleObject *a, PyObject *bb)
|
||||||
static PyObject *
|
static PyObject *
|
||||||
tuplerepeat(PyTupleObject *a, Py_ssize_t n)
|
tuplerepeat(PyTupleObject *a, Py_ssize_t n)
|
||||||
{
|
{
|
||||||
Py_ssize_t size;
|
const Py_ssize_t input_size = Py_SIZE(a);
|
||||||
PyTupleObject *np;
|
if (input_size == 0 || n == 1) {
|
||||||
if (Py_SIZE(a) == 0 || n == 1) {
|
|
||||||
if (PyTuple_CheckExact(a)) {
|
if (PyTuple_CheckExact(a)) {
|
||||||
/* Since tuples are immutable, we can return a shared
|
/* Since tuples are immutable, we can return a shared
|
||||||
copy in this case */
|
copy in this case */
|
||||||
|
@ -505,42 +504,38 @@ tuplerepeat(PyTupleObject *a, Py_ssize_t n)
|
||||||
return (PyObject *)a;
|
return (PyObject *)a;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (Py_SIZE(a) == 0 || n <= 0) {
|
if (input_size == 0 || n <= 0) {
|
||||||
return tuple_get_empty();
|
return tuple_get_empty();
|
||||||
}
|
}
|
||||||
if (n > PY_SSIZE_T_MAX / Py_SIZE(a))
|
assert(n>0);
|
||||||
|
|
||||||
|
if (input_size > PY_SSIZE_T_MAX / n)
|
||||||
return PyErr_NoMemory();
|
return PyErr_NoMemory();
|
||||||
size = Py_SIZE(a) * n;
|
Py_ssize_t output_size = input_size * n;
|
||||||
np = tuple_alloc(size);
|
|
||||||
|
PyTupleObject *np = tuple_alloc(output_size);
|
||||||
if (np == NULL)
|
if (np == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
PyObject **dest = np->ob_item;
|
PyObject **dest = np->ob_item;
|
||||||
PyObject **dest_end = dest + size;
|
if (input_size == 1) {
|
||||||
if (Py_SIZE(a) == 1) {
|
|
||||||
PyObject *elem = a->ob_item[0];
|
PyObject *elem = a->ob_item[0];
|
||||||
Py_SET_REFCNT(elem, Py_REFCNT(elem) + n);
|
_Py_RefcntAdd(elem, n);
|
||||||
#ifdef Py_REF_DEBUG
|
PyObject **dest_end = dest + output_size;
|
||||||
_Py_RefTotal += n;
|
|
||||||
#endif
|
|
||||||
while (dest < dest_end) {
|
while (dest < dest_end) {
|
||||||
*dest++ = elem;
|
*dest++ = elem;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
PyObject **src = a->ob_item;
|
PyObject **src = a->ob_item;
|
||||||
PyObject **src_end = src + Py_SIZE(a);
|
PyObject **src_end = src + input_size;
|
||||||
while (src < src_end) {
|
while (src < src_end) {
|
||||||
Py_SET_REFCNT(*src, Py_REFCNT(*src) + n);
|
_Py_RefcntAdd(*src, n);
|
||||||
#ifdef Py_REF_DEBUG
|
|
||||||
_Py_RefTotal += n;
|
|
||||||
#endif
|
|
||||||
*dest++ = *src++;
|
|
||||||
}
|
|
||||||
// Now src chases after dest in the same buffer
|
|
||||||
src = np->ob_item;
|
|
||||||
while (dest < dest_end) {
|
|
||||||
*dest++ = *src++;
|
*dest++ = *src++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_Py_memory_repeat((char *)np->ob_item, sizeof(PyObject *)*output_size,
|
||||||
|
sizeof(PyObject *)*input_size);
|
||||||
}
|
}
|
||||||
_PyObject_GC_TRACK(np);
|
_PyObject_GC_TRACK(np);
|
||||||
return (PyObject *) np;
|
return (PyObject *) np;
|
||||||
|
|
Loading…
Reference in New Issue