mirror of https://github.com/python/cpython
bpo-47070: Add _PyBytes_Repeat() (GH-31999)
Use it where appropriate: the repeat functions of `array.array`, `bytes`, `bytearray`, and `str`.
This commit is contained in:
parent
86384cf83f
commit
850687df47
|
@ -33,6 +33,19 @@ _PyBytes_ReverseFind(const char *haystack, Py_ssize_t len_haystack,
|
|||
const char *needle, Py_ssize_t len_needle,
|
||||
Py_ssize_t offset);
|
||||
|
||||
|
||||
/** Helper function to implement the repeat and inplace repeat methods on a buffer
|
||||
*
|
||||
* len_dest is assumed to be an integer multiple of len_src.
|
||||
* If src equals dest, then assume the operation is inplace.
|
||||
*
|
||||
* This method repeately doubles the number of bytes copied to reduce
|
||||
* the number of invocations of memcpy.
|
||||
*/
|
||||
PyAPI_FUNC(void)
|
||||
_PyBytes_Repeat(char* dest, Py_ssize_t len_dest,
|
||||
const char* src, Py_ssize_t len_src);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
Improve performance of ``array_inplace_repeat`` by reducing the number of invocations of ``memcpy``.
|
||||
Refactor the ``repeat`` and inplace ``repeat`` methods of ``array``, ``bytes``, ``bytearray``
|
||||
and ``unicodeobject`` to use the common ``_PyBytes_Repeat``.
|
|
@ -10,6 +10,7 @@
|
|||
#define PY_SSIZE_T_CLEAN
|
||||
#include "Python.h"
|
||||
#include "pycore_moduleobject.h" // _PyModule_GetState()
|
||||
#include "pycore_bytesobject.h" // _PyBytes_Repeat
|
||||
#include "structmember.h" // PyMemberDef
|
||||
#include <stddef.h> // offsetof()
|
||||
#include <stddef.h>
|
||||
|
@ -910,34 +911,24 @@ static PyObject *
|
|||
array_repeat(arrayobject *a, Py_ssize_t n)
|
||||
{
|
||||
array_state *state = find_array_state_by_type(Py_TYPE(a));
|
||||
Py_ssize_t size;
|
||||
arrayobject *np;
|
||||
Py_ssize_t oldbytes, newbytes;
|
||||
|
||||
if (n < 0)
|
||||
n = 0;
|
||||
if ((Py_SIZE(a) != 0) && (n > PY_SSIZE_T_MAX / Py_SIZE(a))) {
|
||||
const Py_ssize_t array_length = Py_SIZE(a);
|
||||
if ((array_length != 0) && (n > PY_SSIZE_T_MAX / array_length)) {
|
||||
return PyErr_NoMemory();
|
||||
}
|
||||
size = Py_SIZE(a) * n;
|
||||
np = (arrayobject *) newarrayobject(state->ArrayType, size, a->ob_descr);
|
||||
Py_ssize_t size = array_length * n;
|
||||
arrayobject* np = (arrayobject *) newarrayobject(state->ArrayType, size, a->ob_descr);
|
||||
if (np == NULL)
|
||||
return NULL;
|
||||
if (size == 0)
|
||||
return (PyObject *)np;
|
||||
oldbytes = Py_SIZE(a) * a->ob_descr->itemsize;
|
||||
newbytes = oldbytes * n;
|
||||
/* this follows the code in unicode_repeat */
|
||||
if (oldbytes == 1) {
|
||||
memset(np->ob_item, a->ob_item[0], newbytes);
|
||||
} else {
|
||||
Py_ssize_t done = oldbytes;
|
||||
memcpy(np->ob_item, a->ob_item, oldbytes);
|
||||
while (done < newbytes) {
|
||||
Py_ssize_t ncopy = (done <= newbytes-done) ? done : newbytes-done;
|
||||
memcpy(np->ob_item+done, np->ob_item, ncopy);
|
||||
done += ncopy;
|
||||
}
|
||||
}
|
||||
|
||||
const Py_ssize_t oldbytes = array_length * a->ob_descr->itemsize;
|
||||
const Py_ssize_t newbytes = oldbytes * n;
|
||||
_PyBytes_Repeat(np->ob_item, newbytes, a->ob_item, oldbytes);
|
||||
|
||||
return (PyObject *)np;
|
||||
}
|
||||
|
||||
|
@ -1075,27 +1066,23 @@ array_inplace_concat(arrayobject *self, PyObject *bb)
|
|||
static PyObject *
|
||||
array_inplace_repeat(arrayobject *self, Py_ssize_t n)
|
||||
{
|
||||
char *items, *p;
|
||||
Py_ssize_t size, i;
|
||||
const Py_ssize_t array_size = Py_SIZE(self);
|
||||
|
||||
if (Py_SIZE(self) > 0) {
|
||||
if (array_size > 0 && n != 1 ) {
|
||||
if (n < 0)
|
||||
n = 0;
|
||||
if ((self->ob_descr->itemsize != 0) &&
|
||||
(Py_SIZE(self) > PY_SSIZE_T_MAX / self->ob_descr->itemsize)) {
|
||||
(array_size > PY_SSIZE_T_MAX / self->ob_descr->itemsize)) {
|
||||
return PyErr_NoMemory();
|
||||
}
|
||||
size = Py_SIZE(self) * self->ob_descr->itemsize;
|
||||
Py_ssize_t size = array_size * self->ob_descr->itemsize;
|
||||
if (n > 0 && size > PY_SSIZE_T_MAX / n) {
|
||||
return PyErr_NoMemory();
|
||||
}
|
||||
if (array_resize(self, n * Py_SIZE(self)) == -1)
|
||||
if (array_resize(self, n * array_size) == -1)
|
||||
return NULL;
|
||||
items = p = self->ob_item;
|
||||
for (i = 1; i < n; i++) {
|
||||
p += size;
|
||||
memcpy(p, items, size);
|
||||
}
|
||||
|
||||
_PyBytes_Repeat(self->ob_item, n*size, self->ob_item, size);
|
||||
}
|
||||
Py_INCREF(self);
|
||||
return (PyObject *)self;
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
#include "Python.h"
|
||||
#include "pycore_abstract.h" // _PyIndex_Check()
|
||||
#include "pycore_bytes_methods.h"
|
||||
#include "pycore_bytesobject.h"
|
||||
#include "pycore_object.h" // _PyObject_GC_UNTRACK()
|
||||
#include "pycore_strhex.h" // _Py_strhex_with_sep()
|
||||
#include "pycore_long.h" // _PyLong_FromUnsignedChar()
|
||||
|
@ -319,37 +320,16 @@ bytearray_iconcat(PyByteArrayObject *self, PyObject *other)
|
|||
static PyObject *
|
||||
bytearray_repeat(PyByteArrayObject *self, Py_ssize_t count)
|
||||
{
|
||||
PyByteArrayObject *result;
|
||||
Py_ssize_t mysize;
|
||||
Py_ssize_t size;
|
||||
const char *buf;
|
||||
|
||||
if (count < 0)
|
||||
count = 0;
|
||||
mysize = Py_SIZE(self);
|
||||
const Py_ssize_t mysize = Py_SIZE(self);
|
||||
if (count > 0 && mysize > PY_SSIZE_T_MAX / count)
|
||||
return PyErr_NoMemory();
|
||||
size = mysize * count;
|
||||
result = (PyByteArrayObject *)PyByteArray_FromStringAndSize(NULL, size);
|
||||
buf = PyByteArray_AS_STRING(self);
|
||||
Py_ssize_t size = mysize * count;
|
||||
PyByteArrayObject* result = (PyByteArrayObject *)PyByteArray_FromStringAndSize(NULL, size);
|
||||
const char* buf = PyByteArray_AS_STRING(self);
|
||||
if (result != NULL && size != 0) {
|
||||
if (mysize == 1)
|
||||
memset(result->ob_bytes, buf[0], size);
|
||||
else {
|
||||
Py_ssize_t i, j;
|
||||
|
||||
i = 0;
|
||||
if (i < size) {
|
||||
memcpy(result->ob_bytes, buf, mysize);
|
||||
i = mysize;
|
||||
}
|
||||
// repeatedly double the number of bytes copied
|
||||
while (i < size) {
|
||||
j = Py_MIN(i, size - i);
|
||||
memcpy(result->ob_bytes + i, result->ob_bytes, j);
|
||||
i += j;
|
||||
}
|
||||
}
|
||||
_PyBytes_Repeat(result->ob_bytes, size, buf, mysize);
|
||||
}
|
||||
return (PyObject *)result;
|
||||
}
|
||||
|
@ -357,33 +337,22 @@ bytearray_repeat(PyByteArrayObject *self, Py_ssize_t count)
|
|||
static PyObject *
|
||||
bytearray_irepeat(PyByteArrayObject *self, Py_ssize_t count)
|
||||
{
|
||||
Py_ssize_t mysize;
|
||||
Py_ssize_t size;
|
||||
char *buf;
|
||||
|
||||
if (count < 0)
|
||||
count = 0;
|
||||
mysize = Py_SIZE(self);
|
||||
else if (count == 1) {
|
||||
Py_INCREF(self);
|
||||
return (PyObject*)self;
|
||||
}
|
||||
|
||||
const Py_ssize_t mysize = Py_SIZE(self);
|
||||
if (count > 0 && mysize > PY_SSIZE_T_MAX / count)
|
||||
return PyErr_NoMemory();
|
||||
size = mysize * count;
|
||||
const Py_ssize_t size = mysize * count;
|
||||
if (PyByteArray_Resize((PyObject *)self, size) < 0)
|
||||
return NULL;
|
||||
|
||||
buf = PyByteArray_AS_STRING(self);
|
||||
if (mysize == 1)
|
||||
memset(buf, buf[0], size);
|
||||
else {
|
||||
Py_ssize_t i, j;
|
||||
|
||||
i = mysize;
|
||||
// repeatedly double the number of bytes copied
|
||||
while (i < size) {
|
||||
j = Py_MIN(i, size - i);
|
||||
memcpy(buf + i, buf, j);
|
||||
i += j;
|
||||
}
|
||||
}
|
||||
char* buf = PyByteArray_AS_STRING(self);
|
||||
_PyBytes_Repeat(buf, size, buf, mysize);
|
||||
|
||||
Py_INCREF(self);
|
||||
return (PyObject *)self;
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
|
||||
#include "Python.h"
|
||||
#include "pycore_abstract.h" // _PyIndex_Check()
|
||||
#include "pycore_bytesobject.h" // _PyBytes_Find()
|
||||
#include "pycore_bytesobject.h" // _PyBytes_Find(), _PyBytes_Repeat()
|
||||
#include "pycore_bytes_methods.h" // _Py_bytes_startswith()
|
||||
#include "pycore_call.h" // _PyObject_CallNoArgs()
|
||||
#include "pycore_format.h" // F_LJUST
|
||||
|
@ -1421,8 +1421,6 @@ bytes_concat(PyObject *a, PyObject *b)
|
|||
static PyObject *
|
||||
bytes_repeat(PyBytesObject *a, Py_ssize_t n)
|
||||
{
|
||||
Py_ssize_t i;
|
||||
Py_ssize_t j;
|
||||
Py_ssize_t size;
|
||||
PyBytesObject *op;
|
||||
size_t nbytes;
|
||||
|
@ -1457,20 +1455,9 @@ _Py_COMP_DIAG_IGNORE_DEPR_DECLS
|
|||
op->ob_shash = -1;
|
||||
_Py_COMP_DIAG_POP
|
||||
op->ob_sval[size] = '\0';
|
||||
if (Py_SIZE(a) == 1 && n > 0) {
|
||||
memset(op->ob_sval, a->ob_sval[0] , n);
|
||||
return (PyObject *) op;
|
||||
}
|
||||
i = 0;
|
||||
if (i < size) {
|
||||
memcpy(op->ob_sval, a->ob_sval, Py_SIZE(a));
|
||||
i = Py_SIZE(a);
|
||||
}
|
||||
while (i < size) {
|
||||
j = (i <= size-i) ? i : size-i;
|
||||
memcpy(op->ob_sval+i, op->ob_sval, j);
|
||||
i += j;
|
||||
}
|
||||
|
||||
_PyBytes_Repeat(op->ob_sval, size, a->ob_sval, Py_SIZE(a));
|
||||
|
||||
return (PyObject *) op;
|
||||
}
|
||||
|
||||
|
@ -3528,3 +3515,28 @@ _PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
|
|||
|
||||
return str;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
_PyBytes_Repeat(char* dest, Py_ssize_t len_dest,
|
||||
const char* src, Py_ssize_t len_src)
|
||||
{
|
||||
if (len_dest == 0) {
|
||||
return;
|
||||
}
|
||||
if (len_src == 1) {
|
||||
memset(dest, src[0], len_dest);
|
||||
}
|
||||
else {
|
||||
if (src != dest) {
|
||||
memcpy(dest, src, len_src);
|
||||
}
|
||||
Py_ssize_t copied = len_src;
|
||||
while (copied < len_dest) {
|
||||
Py_ssize_t bytes_to_copy = Py_MIN(copied, len_dest - copied);
|
||||
memcpy(dest + copied, dest, bytes_to_copy);
|
||||
copied += bytes_to_copy;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -42,6 +42,7 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|||
#include "Python.h"
|
||||
#include "pycore_abstract.h" // _PyIndex_Check()
|
||||
#include "pycore_atomic_funcs.h" // _Py_atomic_size_get()
|
||||
#include "pycore_bytesobject.h" // _PyBytes_Repeat()
|
||||
#include "pycore_bytes_methods.h" // _Py_bytes_lower()
|
||||
#include "pycore_format.h" // F_LJUST
|
||||
#include "pycore_initconfig.h" // _PyStatus_OK()
|
||||
|
@ -12782,17 +12783,10 @@ unicode_repeat(PyObject *str, Py_ssize_t len)
|
|||
}
|
||||
}
|
||||
else {
|
||||
/* number of characters copied this far */
|
||||
Py_ssize_t done = PyUnicode_GET_LENGTH(str);
|
||||
Py_ssize_t char_size = PyUnicode_KIND(str);
|
||||
char *to = (char *) PyUnicode_DATA(u);
|
||||
memcpy(to, PyUnicode_DATA(str),
|
||||
PyUnicode_GET_LENGTH(str) * char_size);
|
||||
while (done < nchars) {
|
||||
n = (done <= nchars-done) ? done : nchars-done;
|
||||
memcpy(to + (done * char_size), to, n * char_size);
|
||||
done += n;
|
||||
}
|
||||
_PyBytes_Repeat(to, nchars * char_size, PyUnicode_DATA(str),
|
||||
PyUnicode_GET_LENGTH(str) * char_size);
|
||||
}
|
||||
|
||||
assert(_PyUnicode_CheckConsistency(u, 1));
|
||||
|
|
Loading…
Reference in New Issue