From ad1d5908ada171eff768291371a80022bfad4f04 Mon Sep 17 00:00:00 2001 From: Dennis Sweeney <36520290+sweeneyde@users.noreply.github.com> Date: Fri, 7 Jan 2022 22:47:58 -0500 Subject: [PATCH] bpo-46235: Do all ref-counting at once during list/tuple multiplication (GH-30346) When multiplying lists and tuples by `n`, increment each element's refcount, by `n`, just once. Saves `n-1` increments per element, and allows for a leaner & faster copying loop. Code by sweeneyde (Dennis Sweeney). --- .../2022-01-02-23-55-13.bpo-46235.gUjp2v.rst | 1 + Objects/listobject.c | 39 +++++++++++-------- Objects/tupleobject.c | 35 ++++++++++++----- 3 files changed, 49 insertions(+), 26 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-01-02-23-55-13.bpo-46235.gUjp2v.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-01-02-23-55-13.bpo-46235.gUjp2v.rst b/Misc/NEWS.d/next/Core and Builtins/2022-01-02-23-55-13.bpo-46235.gUjp2v.rst new file mode 100644 index 00000000000..9115c9d70a3 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-01-02-23-55-13.bpo-46235.gUjp2v.rst @@ -0,0 +1 @@ +Certain sequence multiplication operations like ``[0] * 1_000`` are now faster due to reference-counting optimizations. Patch by Dennis Sweeney. \ No newline at end of file diff --git a/Objects/listobject.c b/Objects/listobject.c index e7023fb9eb1..29f5d70f1db 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -553,11 +553,8 @@ list_concat(PyListObject *a, PyObject *bb) static PyObject * list_repeat(PyListObject *a, Py_ssize_t n) { - Py_ssize_t i, j; Py_ssize_t size; PyListObject *np; - PyObject **p, **items; - PyObject *elem; if (n < 0) n = 0; if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) @@ -568,24 +565,32 @@ list_repeat(PyListObject *a, Py_ssize_t n) np = (PyListObject *) list_new_prealloc(size); if (np == NULL) return NULL; - + PyObject **dest = np->ob_item; + PyObject **dest_end = dest + size; if (Py_SIZE(a) == 1) { - items = np->ob_item; - elem = a->ob_item[0]; - for (i = 0; i < n; i++) { - items[i] = elem; - Py_INCREF(elem); + PyObject *elem = a->ob_item[0]; + Py_SET_REFCNT(elem, Py_REFCNT(elem) + n); +#ifdef Py_REF_DEBUG + _Py_RefTotal += n; +#endif + while (dest < dest_end) { + *dest++ = elem; } } else { - p = np->ob_item; - items = a->ob_item; - for (i = 0; i < n; i++) { - for (j = 0; j < Py_SIZE(a); j++) { - *p = items[j]; - Py_INCREF(*p); - p++; - } + PyObject **src = a->ob_item; + PyObject **src_end = src + Py_SIZE(a); + while (src < src_end) { + Py_SET_REFCNT(*src, Py_REFCNT(*src) + n); +#ifdef Py_REF_DEBUG + _Py_RefTotal += n; +#endif + *dest++ = *src++; + } + // Now src chases after dest in the same buffer + src = np->ob_item; + while (dest < dest_end) { + *dest++ = *src++; } } Py_SET_SIZE(np, size); diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index cb34c5eb15e..2051c1812ef 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -589,10 +589,8 @@ tupleconcat(PyTupleObject *a, PyObject *bb) static PyObject * tuplerepeat(PyTupleObject *a, Py_ssize_t n) { - Py_ssize_t i, j; Py_ssize_t size; PyTupleObject *np; - PyObject **p, **items; if (Py_SIZE(a) == 0 || n == 1) { if (PyTuple_CheckExact(a)) { /* Since tuples are immutable, we can return a shared @@ -610,13 +608,32 @@ tuplerepeat(PyTupleObject *a, Py_ssize_t n) np = tuple_alloc(size); if (np == NULL) return NULL; - p = np->ob_item; - items = a->ob_item; - for (i = 0; i < n; i++) { - for (j = 0; j < Py_SIZE(a); j++) { - *p = items[j]; - Py_INCREF(*p); - p++; + PyObject **dest = np->ob_item; + PyObject **dest_end = dest + size; + if (Py_SIZE(a) == 1) { + PyObject *elem = a->ob_item[0]; + Py_SET_REFCNT(elem, Py_REFCNT(elem) + n); +#ifdef Py_REF_DEBUG + _Py_RefTotal += n; +#endif + while (dest < dest_end) { + *dest++ = elem; + } + } + else { + PyObject **src = a->ob_item; + PyObject **src_end = src + Py_SIZE(a); + while (src < src_end) { + Py_SET_REFCNT(*src, Py_REFCNT(*src) + n); +#ifdef Py_REF_DEBUG + _Py_RefTotal += n; +#endif + *dest++ = *src++; + } + // Now src chases after dest in the same buffer + src = np->ob_item; + while (dest < dest_end) { + *dest++ = *src++; } } _PyObject_GC_TRACK(np);