mirror of https://github.com/python/cpython
bpo-46848: Use stringlib/fastsearch in mmap (GH-31625)
Speed up mmap.find(). Add _PyBytes_Find() and _PyBytes_ReverseFind().
This commit is contained in:
parent
9833bb91e4
commit
6ddb09f35b
|
@ -116,3 +116,22 @@ PyAPI_FUNC(void*) _PyBytesWriter_WriteBytes(_PyBytesWriter *writer,
|
||||||
void *str,
|
void *str,
|
||||||
const void *bytes,
|
const void *bytes,
|
||||||
Py_ssize_t size);
|
Py_ssize_t size);
|
||||||
|
|
||||||
|
/* Substring Search.
|
||||||
|
|
||||||
|
Returns the index of the first occurence of
|
||||||
|
a substring ("needle") in a larger text ("haystack").
|
||||||
|
If the needle is not found, return -1.
|
||||||
|
If the needle is found, add offset to the index.
|
||||||
|
*/
|
||||||
|
|
||||||
|
PyAPI_FUNC(Py_ssize_t)
|
||||||
|
_PyBytes_Find(const char *haystack, Py_ssize_t len_haystack,
|
||||||
|
const char *needle, Py_ssize_t len_needle,
|
||||||
|
Py_ssize_t offset);
|
||||||
|
|
||||||
|
/* Same as above, but search right-to-left */
|
||||||
|
PyAPI_FUNC(Py_ssize_t)
|
||||||
|
_PyBytes_ReverseFind(const char *haystack, Py_ssize_t len_haystack,
|
||||||
|
const char *needle, Py_ssize_t len_needle,
|
||||||
|
Py_ssize_t offset);
|
||||||
|
|
|
@ -0,0 +1,3 @@
|
||||||
|
For performance, use the optimized string-searching implementations
|
||||||
|
from :meth:`~bytes.find` and :meth:`~bytes.rfind`
|
||||||
|
for :meth:`~mmap.find` and :meth:`~mmap.rfind`.
|
|
@ -315,12 +315,8 @@ mmap_gfind(mmap_object *self,
|
||||||
if (!PyArg_ParseTuple(args, reverse ? "y*|nn:rfind" : "y*|nn:find",
|
if (!PyArg_ParseTuple(args, reverse ? "y*|nn:rfind" : "y*|nn:find",
|
||||||
&view, &start, &end)) {
|
&view, &start, &end)) {
|
||||||
return NULL;
|
return NULL;
|
||||||
} else {
|
}
|
||||||
const char *p, *start_p, *end_p;
|
else {
|
||||||
int sign = reverse ? -1 : 1;
|
|
||||||
const char *needle = view.buf;
|
|
||||||
Py_ssize_t len = view.len;
|
|
||||||
|
|
||||||
if (start < 0)
|
if (start < 0)
|
||||||
start += self->size;
|
start += self->size;
|
||||||
if (start < 0)
|
if (start < 0)
|
||||||
|
@ -335,21 +331,19 @@ mmap_gfind(mmap_object *self,
|
||||||
else if (end > self->size)
|
else if (end > self->size)
|
||||||
end = self->size;
|
end = self->size;
|
||||||
|
|
||||||
start_p = self->data + start;
|
Py_ssize_t res;
|
||||||
end_p = self->data + end;
|
if (reverse) {
|
||||||
|
res = _PyBytes_ReverseFind(
|
||||||
for (p = (reverse ? end_p - len : start_p);
|
self->data + start, end - start,
|
||||||
(p >= start_p) && (p + len <= end_p); p += sign) {
|
view.buf, view.len, start);
|
||||||
Py_ssize_t i;
|
}
|
||||||
for (i = 0; i < len && needle[i] == p[i]; ++i)
|
else {
|
||||||
/* nothing */;
|
res = _PyBytes_Find(
|
||||||
if (i == len) {
|
self->data + start, end - start,
|
||||||
PyBuffer_Release(&view);
|
view.buf, view.len, start);
|
||||||
return PyLong_FromSsize_t(p - self->data);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
PyBuffer_Release(&view);
|
PyBuffer_Release(&view);
|
||||||
return PyLong_FromLong(-1);
|
return PyLong_FromSsize_t(res);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1247,6 +1247,24 @@ PyBytes_AsStringAndSize(PyObject *obj,
|
||||||
|
|
||||||
#undef STRINGLIB_GET_EMPTY
|
#undef STRINGLIB_GET_EMPTY
|
||||||
|
|
||||||
|
Py_ssize_t
|
||||||
|
_PyBytes_Find(const char *haystack, Py_ssize_t len_haystack,
|
||||||
|
const char *needle, Py_ssize_t len_needle,
|
||||||
|
Py_ssize_t offset)
|
||||||
|
{
|
||||||
|
return stringlib_find(haystack, len_haystack,
|
||||||
|
needle, len_needle, offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
Py_ssize_t
|
||||||
|
_PyBytes_ReverseFind(const char *haystack, Py_ssize_t len_haystack,
|
||||||
|
const char *needle, Py_ssize_t len_needle,
|
||||||
|
Py_ssize_t offset)
|
||||||
|
{
|
||||||
|
return stringlib_rfind(haystack, len_haystack,
|
||||||
|
needle, len_needle, offset);
|
||||||
|
}
|
||||||
|
|
||||||
PyObject *
|
PyObject *
|
||||||
PyBytes_Repr(PyObject *obj, int smartquotes)
|
PyBytes_Repr(PyObject *obj, int smartquotes)
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in New Issue