bpo-46848: Use stringlib/fastsearch in mmap (GH-31625)

Speed up mmap.find(). Add _PyBytes_Find() and _PyBytes_ReverseFind().
This commit is contained in:
Dennis Sweeney 2022-03-01 23:46:30 -05:00 committed by GitHub
parent 9833bb91e4
commit 6ddb09f35b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 53 additions and 19 deletions

View File

@ -116,3 +116,22 @@ PyAPI_FUNC(void*) _PyBytesWriter_WriteBytes(_PyBytesWriter *writer,
void *str,
const void *bytes,
Py_ssize_t size);
/* Substring Search.
Returns the index of the first occurence of
a substring ("needle") in a larger text ("haystack").
If the needle is not found, return -1.
If the needle is found, add offset to the index.
*/
PyAPI_FUNC(Py_ssize_t)
_PyBytes_Find(const char *haystack, Py_ssize_t len_haystack,
const char *needle, Py_ssize_t len_needle,
Py_ssize_t offset);
/* Same as above, but search right-to-left */
PyAPI_FUNC(Py_ssize_t)
_PyBytes_ReverseFind(const char *haystack, Py_ssize_t len_haystack,
const char *needle, Py_ssize_t len_needle,
Py_ssize_t offset);

View File

@ -0,0 +1,3 @@
For performance, use the optimized string-searching implementations
from :meth:`~bytes.find` and :meth:`~bytes.rfind`
for :meth:`~mmap.find` and :meth:`~mmap.rfind`.

View File

@ -315,12 +315,8 @@ mmap_gfind(mmap_object *self,
if (!PyArg_ParseTuple(args, reverse ? "y*|nn:rfind" : "y*|nn:find",
&view, &start, &end)) {
return NULL;
} else {
const char *p, *start_p, *end_p;
int sign = reverse ? -1 : 1;
const char *needle = view.buf;
Py_ssize_t len = view.len;
}
else {
if (start < 0)
start += self->size;
if (start < 0)
@ -335,21 +331,19 @@ mmap_gfind(mmap_object *self,
else if (end > self->size)
end = self->size;
start_p = self->data + start;
end_p = self->data + end;
for (p = (reverse ? end_p - len : start_p);
(p >= start_p) && (p + len <= end_p); p += sign) {
Py_ssize_t i;
for (i = 0; i < len && needle[i] == p[i]; ++i)
/* nothing */;
if (i == len) {
PyBuffer_Release(&view);
return PyLong_FromSsize_t(p - self->data);
Py_ssize_t res;
if (reverse) {
res = _PyBytes_ReverseFind(
self->data + start, end - start,
view.buf, view.len, start);
}
else {
res = _PyBytes_Find(
self->data + start, end - start,
view.buf, view.len, start);
}
PyBuffer_Release(&view);
return PyLong_FromLong(-1);
return PyLong_FromSsize_t(res);
}
}

View File

@ -1247,6 +1247,24 @@ PyBytes_AsStringAndSize(PyObject *obj,
#undef STRINGLIB_GET_EMPTY
Py_ssize_t
_PyBytes_Find(const char *haystack, Py_ssize_t len_haystack,
const char *needle, Py_ssize_t len_needle,
Py_ssize_t offset)
{
return stringlib_find(haystack, len_haystack,
needle, len_needle, offset);
}
Py_ssize_t
_PyBytes_ReverseFind(const char *haystack, Py_ssize_t len_haystack,
const char *needle, Py_ssize_t len_needle,
Py_ssize_t offset)
{
return stringlib_rfind(haystack, len_haystack,
needle, len_needle, offset);
}
PyObject *
PyBytes_Repr(PyObject *obj, int smartquotes)
{