gh-93033: Use wmemchr in stringlib (GH-93034)

Generally comparable perf for the "good" case where memchr doesn't
return any collisions (false matches on lower byte) but clearly faster
with collisions.
This commit is contained in:
goldsteinn 2022-05-23 20:45:31 -05:00 committed by GitHub
parent f7fabae75c
commit 7108bdf27c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 39 additions and 14 deletions

View File

@ -0,0 +1 @@
Search in some strings (platform dependent i.e [U+0xFFFF, U+0x0100] on Windows or [U+0xFFFFFFFF, U+0x00010000] on Linux 64-bit) are now up to 10 times faster.

View File

@ -1096,6 +1096,7 @@ bytearray_dealloc(PyByteArrayObject *self)
#define STRINGLIB_ISSPACE Py_ISSPACE #define STRINGLIB_ISSPACE Py_ISSPACE
#define STRINGLIB_ISLINEBREAK(x) ((x == '\n') || (x == '\r')) #define STRINGLIB_ISLINEBREAK(x) ((x == '\n') || (x == '\r'))
#define STRINGLIB_CHECK_EXACT PyByteArray_CheckExact #define STRINGLIB_CHECK_EXACT PyByteArray_CheckExact
#define STRINGLIB_FAST_MEMCHR memchr
#define STRINGLIB_MUTABLE 1 #define STRINGLIB_MUTABLE 1
#include "stringlib/fastsearch.h" #include "stringlib/fastsearch.h"

View File

@ -431,6 +431,7 @@ _Py_bytes_maketrans(Py_buffer *frm, Py_buffer *to)
#define STRINGLIB(F) stringlib_##F #define STRINGLIB(F) stringlib_##F
#define STRINGLIB_CHAR char #define STRINGLIB_CHAR char
#define STRINGLIB_SIZEOF_CHAR 1 #define STRINGLIB_SIZEOF_CHAR 1
#define STRINGLIB_FAST_MEMCHR memchr
#include "stringlib/fastsearch.h" #include "stringlib/fastsearch.h"
#include "stringlib/count.h" #include "stringlib/count.h"

View File

@ -21,6 +21,7 @@
#define STRINGLIB_CHECK PyUnicode_Check #define STRINGLIB_CHECK PyUnicode_Check
#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact #define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
#define STRINGLIB_MUTABLE 0 #define STRINGLIB_MUTABLE 0
#define STRINGLIB_FAST_MEMCHR memchr
#define STRINGLIB_TOSTR PyObject_Str #define STRINGLIB_TOSTR PyObject_Str
#define STRINGLIB_TOASCII PyObject_ASCII #define STRINGLIB_TOASCII PyObject_ASCII

View File

@ -39,7 +39,7 @@
#define STRINGLIB_BLOOM(mask, ch) \ #define STRINGLIB_BLOOM(mask, ch) \
((mask & (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1))))) ((mask & (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
#if STRINGLIB_SIZEOF_CHAR == 1 #ifdef STRINGLIB_FAST_MEMCHR
# define MEMCHR_CUT_OFF 15 # define MEMCHR_CUT_OFF 15
#else #else
# define MEMCHR_CUT_OFF 40 # define MEMCHR_CUT_OFF 40
@ -53,8 +53,8 @@ STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
p = s; p = s;
e = s + n; e = s + n;
if (n > MEMCHR_CUT_OFF) { if (n > MEMCHR_CUT_OFF) {
#if STRINGLIB_SIZEOF_CHAR == 1 #ifdef STRINGLIB_FAST_MEMCHR
p = memchr(s, ch, n); p = STRINGLIB_FAST_MEMCHR(s, ch, n);
if (p != NULL) if (p != NULL)
return (p - s); return (p - s);
return -1; return -1;
@ -102,16 +102,26 @@ STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
return -1; return -1;
} }
#undef MEMCHR_CUT_OFF
#if STRINGLIB_SIZEOF_CHAR == 1
# define MEMRCHR_CUT_OFF 15
#else
# define MEMRCHR_CUT_OFF 40
#endif
Py_LOCAL_INLINE(Py_ssize_t) Py_LOCAL_INLINE(Py_ssize_t)
STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch) STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
{ {
const STRINGLIB_CHAR *p; const STRINGLIB_CHAR *p;
#ifdef HAVE_MEMRCHR #ifdef HAVE_MEMRCHR
/* memrchr() is a GNU extension, available since glibc 2.1.91. /* memrchr() is a GNU extension, available since glibc 2.1.91. it
it doesn't seem as optimized as memchr(), but is still quite doesn't seem as optimized as memchr(), but is still quite
faster than our hand-written loop below */ faster than our hand-written loop below. There is no wmemrchr
for 4-byte chars. */
if (n > MEMCHR_CUT_OFF) { if (n > MEMRCHR_CUT_OFF) {
#if STRINGLIB_SIZEOF_CHAR == 1 #if STRINGLIB_SIZEOF_CHAR == 1
p = memrchr(s, ch, n); p = memrchr(s, ch, n);
if (p != NULL) if (p != NULL)
@ -139,11 +149,11 @@ STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
if (*p == ch) if (*p == ch)
return n; return n;
/* False positive */ /* False positive */
if (n1 - n > MEMCHR_CUT_OFF) if (n1 - n > MEMRCHR_CUT_OFF)
continue; continue;
if (n <= MEMCHR_CUT_OFF) if (n <= MEMRCHR_CUT_OFF)
break; break;
s1 = p - MEMCHR_CUT_OFF; s1 = p - MEMRCHR_CUT_OFF;
while (p > s1) { while (p > s1) {
p--; p--;
if (*p == ch) if (*p == ch)
@ -151,7 +161,7 @@ STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
} }
n = p - s; n = p - s;
} }
while (n > MEMCHR_CUT_OFF); while (n > MEMRCHR_CUT_OFF);
} }
#endif #endif
} }
@ -165,7 +175,7 @@ STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
return -1; return -1;
} }
#undef MEMCHR_CUT_OFF #undef MEMRCHR_CUT_OFF
/* Change to a 1 to see logging comments walk through the algorithm. */ /* Change to a 1 to see logging comments walk through the algorithm. */
#if 0 && STRINGLIB_SIZEOF_CHAR == 1 #if 0 && STRINGLIB_SIZEOF_CHAR == 1

View File

@ -29,9 +29,9 @@ STRINGLIB(replace_1char_inplace)(STRINGLIB_CHAR* s, STRINGLIB_CHAR* end,
if (!--attempts) { if (!--attempts) {
/* if u1 was not found for attempts iterations, /* if u1 was not found for attempts iterations,
use FASTSEARCH() or memchr() */ use FASTSEARCH() or memchr() */
#if STRINGLIB_SIZEOF_CHAR == 1 #ifdef STRINGLIB_FAST_MEMCHR
s++; s++;
s = memchr(s, u1, end - s); s = STRINGLIB_FAST_MEMCHR(s, u1, end - s);
if (s == NULL) if (s == NULL)
return; return;
#else #else

View File

@ -24,4 +24,5 @@
#define STRINGLIB_CHECK_EXACT PyBytes_CheckExact #define STRINGLIB_CHECK_EXACT PyBytes_CheckExact
#define STRINGLIB_TOSTR PyObject_Str #define STRINGLIB_TOSTR PyObject_Str
#define STRINGLIB_TOASCII PyObject_Repr #define STRINGLIB_TOASCII PyObject_Repr
#define STRINGLIB_FAST_MEMCHR memchr
#endif /* !STRINGLIB_STRINGDEFS_H */ #endif /* !STRINGLIB_STRINGDEFS_H */

View File

@ -20,6 +20,7 @@
#define STRINGLIB_NEW _PyUnicode_FromUCS1 #define STRINGLIB_NEW _PyUnicode_FromUCS1
#define STRINGLIB_CHECK PyUnicode_Check #define STRINGLIB_CHECK PyUnicode_Check
#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact #define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
#define STRINGLIB_FAST_MEMCHR memchr
#define STRINGLIB_MUTABLE 0 #define STRINGLIB_MUTABLE 0
#define STRINGLIB_TOSTR PyObject_Str #define STRINGLIB_TOSTR PyObject_Str

View File

@ -21,6 +21,10 @@
#define STRINGLIB_CHECK PyUnicode_Check #define STRINGLIB_CHECK PyUnicode_Check
#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact #define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
#define STRINGLIB_MUTABLE 0 #define STRINGLIB_MUTABLE 0
#if SIZEOF_WCHAR_T == 2
#define STRINGLIB_FAST_MEMCHR(s, c, n) \
(Py_UCS2 *)wmemchr((const wchar_t *)(s), c, n)
#endif
#define STRINGLIB_TOSTR PyObject_Str #define STRINGLIB_TOSTR PyObject_Str
#define STRINGLIB_TOASCII PyObject_ASCII #define STRINGLIB_TOASCII PyObject_ASCII

View File

@ -21,6 +21,10 @@
#define STRINGLIB_CHECK PyUnicode_Check #define STRINGLIB_CHECK PyUnicode_Check
#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact #define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
#define STRINGLIB_MUTABLE 0 #define STRINGLIB_MUTABLE 0
#if SIZEOF_WCHAR_T == 4
#define STRINGLIB_FAST_MEMCHR(s, c, n) \
(Py_UCS4 *)wmemchr((const wchar_t *)(s), c, n)
#endif
#define STRINGLIB_TOSTR PyObject_Str #define STRINGLIB_TOSTR PyObject_Str
#define STRINGLIB_TOASCII PyObject_ASCII #define STRINGLIB_TOASCII PyObject_ASCII

View File

@ -8,3 +8,4 @@
#undef STRINGLIB_NEW #undef STRINGLIB_NEW
#undef STRINGLIB_IS_UNICODE #undef STRINGLIB_IS_UNICODE
#undef STRINGLIB_MUTABLE #undef STRINGLIB_MUTABLE
#undef STRINGLIB_FAST_MEMCHR