bpo-46848: Use stringlib/fastsearch in mmap (GH-31625)

Speed up mmap.find(). Add _PyBytes_Find() and _PyBytes_ReverseFind().
This commit is contained in:
Dennis Sweeney 2022-03-01 23:46:30 -05:00 committed by GitHub
parent 9833bb91e4
commit 6ddb09f35b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 53 additions and 19 deletions

View File

@ -116,3 +116,22 @@ PyAPI_FUNC(void*) _PyBytesWriter_WriteBytes(_PyBytesWriter *writer,
void *str, void *str,
const void *bytes, const void *bytes,
Py_ssize_t size); Py_ssize_t size);
/* Substring Search.
Returns the index of the first occurence of
a substring ("needle") in a larger text ("haystack").
If the needle is not found, return -1.
If the needle is found, add offset to the index.
*/
PyAPI_FUNC(Py_ssize_t)
_PyBytes_Find(const char *haystack, Py_ssize_t len_haystack,
const char *needle, Py_ssize_t len_needle,
Py_ssize_t offset);
/* Same as above, but search right-to-left */
PyAPI_FUNC(Py_ssize_t)
_PyBytes_ReverseFind(const char *haystack, Py_ssize_t len_haystack,
const char *needle, Py_ssize_t len_needle,
Py_ssize_t offset);

View File

@ -0,0 +1,3 @@
For performance, use the optimized string-searching implementations
from :meth:`~bytes.find` and :meth:`~bytes.rfind`
for :meth:`~mmap.find` and :meth:`~mmap.rfind`.

View File

@ -315,12 +315,8 @@ mmap_gfind(mmap_object *self,
if (!PyArg_ParseTuple(args, reverse ? "y*|nn:rfind" : "y*|nn:find", if (!PyArg_ParseTuple(args, reverse ? "y*|nn:rfind" : "y*|nn:find",
&view, &start, &end)) { &view, &start, &end)) {
return NULL; return NULL;
} else { }
const char *p, *start_p, *end_p; else {
int sign = reverse ? -1 : 1;
const char *needle = view.buf;
Py_ssize_t len = view.len;
if (start < 0) if (start < 0)
start += self->size; start += self->size;
if (start < 0) if (start < 0)
@ -335,21 +331,19 @@ mmap_gfind(mmap_object *self,
else if (end > self->size) else if (end > self->size)
end = self->size; end = self->size;
start_p = self->data + start; Py_ssize_t res;
end_p = self->data + end; if (reverse) {
res = _PyBytes_ReverseFind(
for (p = (reverse ? end_p - len : start_p); self->data + start, end - start,
(p >= start_p) && (p + len <= end_p); p += sign) { view.buf, view.len, start);
Py_ssize_t i; }
for (i = 0; i < len && needle[i] == p[i]; ++i) else {
/* nothing */; res = _PyBytes_Find(
if (i == len) { self->data + start, end - start,
PyBuffer_Release(&view); view.buf, view.len, start);
return PyLong_FromSsize_t(p - self->data);
}
} }
PyBuffer_Release(&view); PyBuffer_Release(&view);
return PyLong_FromLong(-1); return PyLong_FromSsize_t(res);
} }
} }

View File

@ -1247,6 +1247,24 @@ PyBytes_AsStringAndSize(PyObject *obj,
#undef STRINGLIB_GET_EMPTY #undef STRINGLIB_GET_EMPTY
Py_ssize_t
_PyBytes_Find(const char *haystack, Py_ssize_t len_haystack,
const char *needle, Py_ssize_t len_needle,
Py_ssize_t offset)
{
return stringlib_find(haystack, len_haystack,
needle, len_needle, offset);
}
Py_ssize_t
_PyBytes_ReverseFind(const char *haystack, Py_ssize_t len_haystack,
const char *needle, Py_ssize_t len_needle,
Py_ssize_t offset)
{
return stringlib_rfind(haystack, len_haystack,
needle, len_needle, offset);
}
PyObject * PyObject *
PyBytes_Repr(PyObject *obj, int smartquotes) PyBytes_Repr(PyObject *obj, int smartquotes)
{ {