diff --git a/Doc/whatsnew/3.6.rst b/Doc/whatsnew/3.6.rst index 904605861b7..986c145dcc8 100644 --- a/Doc/whatsnew/3.6.rst +++ b/Doc/whatsnew/3.6.rst @@ -339,6 +339,9 @@ Optimizations * Optimize :meth:`bytes.fromhex` and :meth:`bytearray.fromhex`: they are now between 2x and 3.5x faster. (Contributed by Victor Stinner in :issue:`25401`). +* Optimize ``bytes.replace(b'', b'.')`` and ``bytearray.replace(b'', b'.')``: + up to 80% faster. (Contributed by Josh Snider in :issue:`26574`). + Build and C API Changes ======================= diff --git a/Misc/ACKS b/Misc/ACKS index e67f6d1ef0d..52eae6944d4 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1376,6 +1376,7 @@ Mark Smith Roy Smith Ryan Smith-Roberts Rafal Smotrzyk +Josh Snider Eric Snow Dirk Soede Nir Soffer diff --git a/Misc/NEWS b/Misc/NEWS index 6f5c7ab0bf7..2fa82f348ec 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,9 @@ Release date: tba Core and Builtins ----------------- +- Issue #26574: Optimize ``bytes.replace(b'', b'.')`` and + ``bytearray.replace(b'', b'.')``. Patch written by Josh Snider. + - Issue #26581: If coding cookie is specified multiple times on a line in Python source code file, only the first one is taken to account. diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c index 9e8ba399929..209a64130e9 100644 --- a/Objects/bytearrayobject.c +++ b/Objects/bytearrayobject.c @@ -1705,17 +1705,27 @@ replace_interleave(PyByteArrayObject *self, self_s = PyByteArray_AS_STRING(self); result_s = PyByteArray_AS_STRING(result); - /* TODO: special case single character, which doesn't need memcpy */ - - /* Lay the first one down (guaranteed this will occur) */ - Py_MEMCPY(result_s, to_s, to_len); - result_s += to_len; - count -= 1; - - for (i=0; i 1) { + /* Lay the first one down (guaranteed this will occur) */ Py_MEMCPY(result_s, to_s, to_len); result_s += to_len; + count -= 1; + + for (i = 0; i < count; i++) { + *result_s++ = *self_s++; + Py_MEMCPY(result_s, to_s, to_len); + result_s += to_len; + } + } + else { + result_s[0] = to_s[0]; + result_s += to_len; + count -= 1; + for (i = 0; i < count; i++) { + *result_s++ = *self_s++; + result_s[0] = to_s[0]; + result_s += to_len; + } } /* Copy the rest of the original string */ diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 602dea681ce..5b9006eb9e2 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -2464,17 +2464,27 @@ replace_interleave(PyBytesObject *self, self_s = PyBytes_AS_STRING(self); result_s = PyBytes_AS_STRING(result); - /* TODO: special case single character, which doesn't need memcpy */ - - /* Lay the first one down (guaranteed this will occur) */ - Py_MEMCPY(result_s, to_s, to_len); - result_s += to_len; - count -= 1; - - for (i=0; i 1) { + /* Lay the first one down (guaranteed this will occur) */ Py_MEMCPY(result_s, to_s, to_len); result_s += to_len; + count -= 1; + + for (i = 0; i < count; i++) { + *result_s++ = *self_s++; + Py_MEMCPY(result_s, to_s, to_len); + result_s += to_len; + } + } + else { + result_s[0] = to_s[0]; + result_s += to_len; + count -= 1; + for (i = 0; i < count; i++) { + *result_s++ = *self_s++; + result_s[0] = to_s[0]; + result_s += to_len; + } } /* Copy the rest of the original string */