mirror of https://github.com/python/cpython
bpo-46848: Use stringlib/fastsearch in mmap (GH-31625)
Speed up mmap.find(). Add _PyBytes_Find() and _PyBytes_ReverseFind().
This commit is contained in:
parent
9833bb91e4
commit
6ddb09f35b
|
@ -116,3 +116,22 @@ PyAPI_FUNC(void*) _PyBytesWriter_WriteBytes(_PyBytesWriter *writer,
|
|||
void *str,
|
||||
const void *bytes,
|
||||
Py_ssize_t size);
|
||||
|
||||
/* Substring Search.
|
||||
|
||||
Returns the index of the first occurence of
|
||||
a substring ("needle") in a larger text ("haystack").
|
||||
If the needle is not found, return -1.
|
||||
If the needle is found, add offset to the index.
|
||||
*/
|
||||
|
||||
PyAPI_FUNC(Py_ssize_t)
|
||||
_PyBytes_Find(const char *haystack, Py_ssize_t len_haystack,
|
||||
const char *needle, Py_ssize_t len_needle,
|
||||
Py_ssize_t offset);
|
||||
|
||||
/* Same as above, but search right-to-left */
|
||||
PyAPI_FUNC(Py_ssize_t)
|
||||
_PyBytes_ReverseFind(const char *haystack, Py_ssize_t len_haystack,
|
||||
const char *needle, Py_ssize_t len_needle,
|
||||
Py_ssize_t offset);
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
For performance, use the optimized string-searching implementations
|
||||
from :meth:`~bytes.find` and :meth:`~bytes.rfind`
|
||||
for :meth:`~mmap.find` and :meth:`~mmap.rfind`.
|
|
@ -315,12 +315,8 @@ mmap_gfind(mmap_object *self,
|
|||
if (!PyArg_ParseTuple(args, reverse ? "y*|nn:rfind" : "y*|nn:find",
|
||||
&view, &start, &end)) {
|
||||
return NULL;
|
||||
} else {
|
||||
const char *p, *start_p, *end_p;
|
||||
int sign = reverse ? -1 : 1;
|
||||
const char *needle = view.buf;
|
||||
Py_ssize_t len = view.len;
|
||||
|
||||
}
|
||||
else {
|
||||
if (start < 0)
|
||||
start += self->size;
|
||||
if (start < 0)
|
||||
|
@ -335,21 +331,19 @@ mmap_gfind(mmap_object *self,
|
|||
else if (end > self->size)
|
||||
end = self->size;
|
||||
|
||||
start_p = self->data + start;
|
||||
end_p = self->data + end;
|
||||
|
||||
for (p = (reverse ? end_p - len : start_p);
|
||||
(p >= start_p) && (p + len <= end_p); p += sign) {
|
||||
Py_ssize_t i;
|
||||
for (i = 0; i < len && needle[i] == p[i]; ++i)
|
||||
/* nothing */;
|
||||
if (i == len) {
|
||||
PyBuffer_Release(&view);
|
||||
return PyLong_FromSsize_t(p - self->data);
|
||||
Py_ssize_t res;
|
||||
if (reverse) {
|
||||
res = _PyBytes_ReverseFind(
|
||||
self->data + start, end - start,
|
||||
view.buf, view.len, start);
|
||||
}
|
||||
else {
|
||||
res = _PyBytes_Find(
|
||||
self->data + start, end - start,
|
||||
view.buf, view.len, start);
|
||||
}
|
||||
PyBuffer_Release(&view);
|
||||
return PyLong_FromLong(-1);
|
||||
return PyLong_FromSsize_t(res);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1247,6 +1247,24 @@ PyBytes_AsStringAndSize(PyObject *obj,
|
|||
|
||||
#undef STRINGLIB_GET_EMPTY
|
||||
|
||||
Py_ssize_t
|
||||
_PyBytes_Find(const char *haystack, Py_ssize_t len_haystack,
|
||||
const char *needle, Py_ssize_t len_needle,
|
||||
Py_ssize_t offset)
|
||||
{
|
||||
return stringlib_find(haystack, len_haystack,
|
||||
needle, len_needle, offset);
|
||||
}
|
||||
|
||||
Py_ssize_t
|
||||
_PyBytes_ReverseFind(const char *haystack, Py_ssize_t len_haystack,
|
||||
const char *needle, Py_ssize_t len_needle,
|
||||
Py_ssize_t offset)
|
||||
{
|
||||
return stringlib_rfind(haystack, len_haystack,
|
||||
needle, len_needle, offset);
|
||||
}
|
||||
|
||||
PyObject *
|
||||
PyBytes_Repr(PyObject *obj, int smartquotes)
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue