From e2cef885a25967605007248cb158671b765df002 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 13 Apr 2013 22:45:04 +0300 Subject: [PATCH] Issue #16061: Speed up str.replace() for replacing 1-character strings. --- Makefile.pre.in | 1 + Misc/NEWS | 2 + Objects/stringlib/replace.h | 53 +++++++++++++++++++++++++ Objects/unicodeobject.c | 64 ++++++++++++++++++------------ PC/VS9.0/pythoncore.vcproj | 4 ++ PCbuild/pythoncore.vcxproj | 1 + PCbuild/pythoncore.vcxproj.filters | 3 ++ 7 files changed, 102 insertions(+), 26 deletions(-) create mode 100644 Objects/stringlib/replace.h diff --git a/Makefile.pre.in b/Makefile.pre.in index 534ddc0dc55..ab56e3f4e5d 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -726,6 +726,7 @@ UNICODE_DEPS = \ $(srcdir)/Objects/stringlib/find_max_char.h \ $(srcdir)/Objects/stringlib/localeutil.h \ $(srcdir)/Objects/stringlib/partition.h \ + $(srcdir)/Objects/stringlib/replace.h \ $(srcdir)/Objects/stringlib/split.h \ $(srcdir)/Objects/stringlib/ucs1lib.h \ $(srcdir)/Objects/stringlib/ucs2lib.h \ diff --git a/Misc/NEWS b/Misc/NEWS index c6188de24b0..1889ac25d0c 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,8 @@ What's New in Python 3.4.0 Alpha 1? Core and Builtins ----------------- +- Issue #16061: Speed up str.replace() for replacing 1-character strings. + - Issue #17715: Fix segmentation fault from raising an exception in a __trunc__ method. diff --git a/Objects/stringlib/replace.h b/Objects/stringlib/replace.h new file mode 100644 index 00000000000..ef318ed6dd5 --- /dev/null +++ b/Objects/stringlib/replace.h @@ -0,0 +1,53 @@ +/* stringlib: replace implementation */ + +#ifndef STRINGLIB_FASTSEARCH_H +#error must include "stringlib/fastsearch.h" before including this module +#endif + +Py_LOCAL_INLINE(void) +STRINGLIB(replace_1char_inplace)(STRINGLIB_CHAR* s, STRINGLIB_CHAR* end, + Py_UCS4 u1, Py_UCS4 u2, Py_ssize_t maxcount) +{ + *s = u2; + while (--maxcount && ++s != end) { + /* Find the next character to be replaced. + + If it occurs often, it is faster to scan for it using an inline + loop. If it occurs seldom, it is faster to scan for it using a + function call; the overhead of the function call is amortized + across the many characters that call covers. We start with an + inline loop and use a heuristic to determine whether to fall back + to a function call. */ + if (*s != u1) { + int attempts = 10; + /* search u1 in a dummy loop */ + while (1) { + if (++s == end) + return; + if (*s == u1) + break; + if (!--attempts) { + /* if u1 was not found for attempts iterations, + use FASTSEARCH() or memchr() */ +#if STRINGLIB_SIZEOF_CHAR == 1 + s++; + s = memchr(s, u1, end - s); + if (s == NULL) + return; +#else + Py_ssize_t i; + STRINGLIB_CHAR ch1 = (STRINGLIB_CHAR) u1; + s++; + i = FASTSEARCH(s, end - s, &ch1, 1, 0, FAST_SEARCH); + if (i < 0) + return; + s += i; +#endif + /* restart the dummy loop */ + break; + } + } + } + *s = u2; + } +} diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index e52571db2e8..3688f4a789f 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -605,6 +605,7 @@ make_bloom_mask(int kind, void* ptr, Py_ssize_t len) #include "stringlib/split.h" #include "stringlib/count.h" #include "stringlib/find.h" +#include "stringlib/replace.h" #include "stringlib/find_max_char.h" #include "stringlib/localeutil.h" #include "stringlib/undef.h" @@ -615,6 +616,7 @@ make_bloom_mask(int kind, void* ptr, Py_ssize_t len) #include "stringlib/split.h" #include "stringlib/count.h" #include "stringlib/find.h" +#include "stringlib/replace.h" #include "stringlib/find_max_char.h" #include "stringlib/localeutil.h" #include "stringlib/undef.h" @@ -625,6 +627,7 @@ make_bloom_mask(int kind, void* ptr, Py_ssize_t len) #include "stringlib/split.h" #include "stringlib/count.h" #include "stringlib/find.h" +#include "stringlib/replace.h" #include "stringlib/find_max_char.h" #include "stringlib/localeutil.h" #include "stringlib/undef.h" @@ -9927,6 +9930,31 @@ anylib_count(int kind, PyObject *sstr, void* sbuf, Py_ssize_t slen, return 0; } +static void +replace_1char_inplace(PyObject *u, Py_ssize_t pos, + Py_UCS4 u1, Py_UCS4 u2, Py_ssize_t maxcount) +{ + int kind = PyUnicode_KIND(u); + void *data = PyUnicode_DATA(u); + Py_ssize_t len = PyUnicode_GET_LENGTH(u); + if (kind == PyUnicode_1BYTE_KIND) { + ucs1lib_replace_1char_inplace((Py_UCS1 *)data + pos, + (Py_UCS1 *)data + len, + u1, u2, maxcount); + } + else if (kind == PyUnicode_2BYTE_KIND) { + ucs2lib_replace_1char_inplace((Py_UCS2 *)data + pos, + (Py_UCS2 *)data + len, + u1, u2, maxcount); + } + else { + assert(kind == PyUnicode_4BYTE_KIND); + ucs4lib_replace_1char_inplace((Py_UCS4 *)data + pos, + (Py_UCS4 *)data + len, + u1, u2, maxcount); + } +} + static PyObject * replace(PyObject *self, PyObject *str1, PyObject *str2, Py_ssize_t maxcount) @@ -9943,7 +9971,7 @@ replace(PyObject *self, PyObject *str1, Py_ssize_t len1 = PyUnicode_GET_LENGTH(str1); Py_ssize_t len2 = PyUnicode_GET_LENGTH(str2); int mayshrink; - Py_UCS4 maxchar, maxchar_str2; + Py_UCS4 maxchar, maxchar_str1, maxchar_str2; if (maxcount < 0) maxcount = PY_SSIZE_T_MAX; @@ -9952,15 +9980,16 @@ replace(PyObject *self, PyObject *str1, if (str1 == str2) goto nothing; - if (skind < kind1) - /* substring too wide to be present */ - goto nothing; maxchar = PyUnicode_MAX_CHAR_VALUE(self); + maxchar_str1 = PyUnicode_MAX_CHAR_VALUE(str1); + if (maxchar < maxchar_str1) + /* substring too wide to be present */ + goto nothing; maxchar_str2 = PyUnicode_MAX_CHAR_VALUE(str2); /* Replacing str1 with str2 may cause a maxchar reduction in the result string. */ - mayshrink = (maxchar_str2 < maxchar); + mayshrink = (maxchar_str2 < maxchar_str1) && (maxchar == maxchar_str1); maxchar = MAX_MAXCHAR(maxchar, maxchar_str2); if (len1 == len2) { @@ -9970,36 +9999,19 @@ replace(PyObject *self, PyObject *str1, if (len1 == 1) { /* replace characters */ Py_UCS4 u1, u2; - int rkind; - Py_ssize_t index, pos; - char *src, *rbuf; + Py_ssize_t pos; u1 = PyUnicode_READ(kind1, buf1, 0); - pos = findchar(sbuf, PyUnicode_KIND(self), slen, u1, 1); + pos = findchar(sbuf, skind, slen, u1, 1); if (pos < 0) goto nothing; u2 = PyUnicode_READ(kind2, buf2, 0); u = PyUnicode_New(slen, maxchar); if (!u) goto error; - _PyUnicode_FastCopyCharacters(u, 0, self, 0, slen); - rkind = PyUnicode_KIND(u); - rbuf = PyUnicode_DATA(u); - PyUnicode_WRITE(rkind, rbuf, pos, u2); - index = 0; - src = sbuf; - while (--maxcount) - { - pos++; - src += pos * PyUnicode_KIND(self); - slen -= pos; - index += pos; - pos = findchar(src, PyUnicode_KIND(self), slen, u1, 1); - if (pos < 0) - break; - PyUnicode_WRITE(rkind, rbuf, index + pos, u2); - } + _PyUnicode_FastCopyCharacters(u, 0, self, 0, slen); + replace_1char_inplace(u, pos, u1, u2, maxcount); } else { int rkind = skind; diff --git a/PC/VS9.0/pythoncore.vcproj b/PC/VS9.0/pythoncore.vcproj index 9fb63ff105b..ab7feca6796 100644 --- a/PC/VS9.0/pythoncore.vcproj +++ b/PC/VS9.0/pythoncore.vcproj @@ -1586,6 +1586,10 @@ RelativePath="..\..\Objects\rangeobject.c" > + + diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index 29493e399b7..b00991e0c70 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -475,6 +475,7 @@ + diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index 09b4bb4d027..915fec50491 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -378,6 +378,9 @@ Objects + + Objects + Objects