mirror of https://github.com/python/cpython
gh-105235: Prevent reading outside buffer during mmap.find() (#105252)
* Add a special case for s[-m:] == p in _PyBytes_Find * Add tests for _PyBytes_Find * Make sure that start <= end in mmap.find
This commit is contained in:
parent
2d43beec22
commit
ab86426a34
|
@ -299,6 +299,27 @@ class MmapTests(unittest.TestCase):
|
||||||
self.assertEqual(m.find(b'one', 1, -2), -1)
|
self.assertEqual(m.find(b'one', 1, -2), -1)
|
||||||
self.assertEqual(m.find(bytearray(b'one')), 0)
|
self.assertEqual(m.find(bytearray(b'one')), 0)
|
||||||
|
|
||||||
|
for i in range(-n-1, n+1):
|
||||||
|
for j in range(-n-1, n+1):
|
||||||
|
for p in [b"o", b"on", b"two", b"ones", b"s"]:
|
||||||
|
expected = data.find(p, i, j)
|
||||||
|
self.assertEqual(m.find(p, i, j), expected, (p, i, j))
|
||||||
|
|
||||||
|
def test_find_does_not_access_beyond_buffer(self):
|
||||||
|
try:
|
||||||
|
flags = mmap.MAP_PRIVATE | mmap.MAP_ANONYMOUS
|
||||||
|
PAGESIZE = mmap.PAGESIZE
|
||||||
|
PROT_NONE = 0
|
||||||
|
PROT_READ = mmap.PROT_READ
|
||||||
|
except AttributeError as e:
|
||||||
|
raise unittest.SkipTest("mmap flags unavailable") from e
|
||||||
|
for i in range(0, 2049):
|
||||||
|
with mmap.mmap(-1, PAGESIZE * (i + 1),
|
||||||
|
flags=flags, prot=PROT_NONE) as guard:
|
||||||
|
with mmap.mmap(-1, PAGESIZE * (i + 2048),
|
||||||
|
flags=flags, prot=PROT_READ) as fm:
|
||||||
|
fm.find(b"fo", -2)
|
||||||
|
|
||||||
|
|
||||||
def test_rfind(self):
|
def test_rfind(self):
|
||||||
# test the new 'end' parameter works as expected
|
# test the new 'end' parameter works as expected
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
Prevent out-of-bounds memory access during ``mmap.find()`` calls.
|
|
@ -14,6 +14,7 @@
|
||||||
#include "interpreteridobject.h" // _PyInterpreterID_LookUp()
|
#include "interpreteridobject.h" // _PyInterpreterID_LookUp()
|
||||||
#include "pycore_atomic_funcs.h" // _Py_atomic_int_get()
|
#include "pycore_atomic_funcs.h" // _Py_atomic_int_get()
|
||||||
#include "pycore_bitutils.h" // _Py_bswap32()
|
#include "pycore_bitutils.h" // _Py_bswap32()
|
||||||
|
#include "pycore_bytesobject.h" // _PyBytes_Find()
|
||||||
#include "pycore_compile.h" // _PyCompile_CodeGen, _PyCompile_OptimizeCfg, _PyCompile_Assemble
|
#include "pycore_compile.h" // _PyCompile_CodeGen, _PyCompile_OptimizeCfg, _PyCompile_Assemble
|
||||||
#include "pycore_ceval.h" // _PyEval_AddPendingCall
|
#include "pycore_ceval.h" // _PyEval_AddPendingCall
|
||||||
#include "pycore_fileutils.h" // _Py_normpath
|
#include "pycore_fileutils.h" // _Py_normpath
|
||||||
|
@ -443,6 +444,118 @@ test_edit_cost(PyObject *self, PyObject *Py_UNUSED(args))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static int
|
||||||
|
check_bytes_find(const char *haystack0, const char *needle0,
|
||||||
|
int offset, Py_ssize_t expected)
|
||||||
|
{
|
||||||
|
Py_ssize_t len_haystack = strlen(haystack0);
|
||||||
|
Py_ssize_t len_needle = strlen(needle0);
|
||||||
|
Py_ssize_t result_1 = _PyBytes_Find(haystack0, len_haystack,
|
||||||
|
needle0, len_needle, offset);
|
||||||
|
if (result_1 != expected) {
|
||||||
|
PyErr_Format(PyExc_AssertionError,
|
||||||
|
"Incorrect result_1: '%s' in '%s' (offset=%zd)",
|
||||||
|
needle0, haystack0, offset);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// Allocate new buffer with no NULL terminator.
|
||||||
|
char *haystack = PyMem_Malloc(len_haystack);
|
||||||
|
if (haystack == NULL) {
|
||||||
|
PyErr_NoMemory();
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
char *needle = PyMem_Malloc(len_needle);
|
||||||
|
if (needle == NULL) {
|
||||||
|
PyMem_Free(haystack);
|
||||||
|
PyErr_NoMemory();
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
memcpy(haystack, haystack0, len_haystack);
|
||||||
|
memcpy(needle, needle0, len_needle);
|
||||||
|
Py_ssize_t result_2 = _PyBytes_Find(haystack, len_haystack,
|
||||||
|
needle, len_needle, offset);
|
||||||
|
PyMem_Free(haystack);
|
||||||
|
PyMem_Free(needle);
|
||||||
|
if (result_2 != expected) {
|
||||||
|
PyErr_Format(PyExc_AssertionError,
|
||||||
|
"Incorrect result_2: '%s' in '%s' (offset=%zd)",
|
||||||
|
needle0, haystack0, offset);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
check_bytes_find_large(Py_ssize_t len_haystack, Py_ssize_t len_needle,
|
||||||
|
const char *needle)
|
||||||
|
{
|
||||||
|
char *zeros = PyMem_RawCalloc(len_haystack, 1);
|
||||||
|
if (zeros == NULL) {
|
||||||
|
PyErr_NoMemory();
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
Py_ssize_t res = _PyBytes_Find(zeros, len_haystack, needle, len_needle, 0);
|
||||||
|
PyMem_RawFree(zeros);
|
||||||
|
if (res != -1) {
|
||||||
|
PyErr_Format(PyExc_AssertionError,
|
||||||
|
"check_bytes_find_large(%zd, %zd) found %zd",
|
||||||
|
len_haystack, len_needle, res);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
test_bytes_find(PyObject *self, PyObject *Py_UNUSED(args))
|
||||||
|
{
|
||||||
|
#define CHECK(H, N, O, E) do { \
|
||||||
|
if (check_bytes_find(H, N, O, E) < 0) { \
|
||||||
|
return NULL; \
|
||||||
|
} \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
CHECK("", "", 0, 0);
|
||||||
|
CHECK("Python", "", 0, 0);
|
||||||
|
CHECK("Python", "", 3, 3);
|
||||||
|
CHECK("Python", "", 6, 6);
|
||||||
|
CHECK("Python", "yth", 0, 1);
|
||||||
|
CHECK("ython", "yth", 1, 1);
|
||||||
|
CHECK("thon", "yth", 2, -1);
|
||||||
|
CHECK("Python", "thon", 0, 2);
|
||||||
|
CHECK("ython", "thon", 1, 2);
|
||||||
|
CHECK("thon", "thon", 2, 2);
|
||||||
|
CHECK("hon", "thon", 3, -1);
|
||||||
|
CHECK("Pytho", "zz", 0, -1);
|
||||||
|
CHECK("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "ab", 0, -1);
|
||||||
|
CHECK("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "ba", 0, -1);
|
||||||
|
CHECK("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "bb", 0, -1);
|
||||||
|
CHECK("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab", "ab", 0, 30);
|
||||||
|
CHECK("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaba", "ba", 0, 30);
|
||||||
|
CHECK("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaabb", "bb", 0, 30);
|
||||||
|
#undef CHECK
|
||||||
|
|
||||||
|
// Hunt for segfaults
|
||||||
|
// n, m chosen here so that (n - m) % (m + 1) == 0
|
||||||
|
// This would make default_find in fastsearch.h access haystack[n].
|
||||||
|
if (check_bytes_find_large(2048, 2, "ab") < 0) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
if (check_bytes_find_large(4096, 16, "0123456789abcdef") < 0) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
if (check_bytes_find_large(8192, 2, "ab") < 0) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
if (check_bytes_find_large(16384, 4, "abcd") < 0) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
if (check_bytes_find_large(32768, 2, "ab") < 0) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
Py_RETURN_NONE;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
normalize_path(PyObject *self, PyObject *filename)
|
normalize_path(PyObject *self, PyObject *filename)
|
||||||
{
|
{
|
||||||
|
@ -1328,6 +1441,7 @@ static PyMethodDef module_functions[] = {
|
||||||
{"reset_path_config", test_reset_path_config, METH_NOARGS},
|
{"reset_path_config", test_reset_path_config, METH_NOARGS},
|
||||||
{"test_atomic_funcs", test_atomic_funcs, METH_NOARGS},
|
{"test_atomic_funcs", test_atomic_funcs, METH_NOARGS},
|
||||||
{"test_edit_cost", test_edit_cost, METH_NOARGS},
|
{"test_edit_cost", test_edit_cost, METH_NOARGS},
|
||||||
|
{"test_bytes_find", test_bytes_find, METH_NOARGS},
|
||||||
{"normalize_path", normalize_path, METH_O, NULL},
|
{"normalize_path", normalize_path, METH_O, NULL},
|
||||||
{"get_getpath_codeobject", get_getpath_codeobject, METH_NOARGS, NULL},
|
{"get_getpath_codeobject", get_getpath_codeobject, METH_NOARGS, NULL},
|
||||||
{"EncodeLocaleEx", encode_locale_ex, METH_VARARGS},
|
{"EncodeLocaleEx", encode_locale_ex, METH_VARARGS},
|
||||||
|
|
|
@ -342,12 +342,17 @@ mmap_gfind(mmap_object *self,
|
||||||
|
|
||||||
Py_ssize_t res;
|
Py_ssize_t res;
|
||||||
CHECK_VALID_OR_RELEASE(NULL, view);
|
CHECK_VALID_OR_RELEASE(NULL, view);
|
||||||
if (reverse) {
|
if (end < start) {
|
||||||
|
res = -1;
|
||||||
|
}
|
||||||
|
else if (reverse) {
|
||||||
|
assert(0 <= start && start <= end && end <= self->size);
|
||||||
res = _PyBytes_ReverseFind(
|
res = _PyBytes_ReverseFind(
|
||||||
self->data + start, end - start,
|
self->data + start, end - start,
|
||||||
view.buf, view.len, start);
|
view.buf, view.len, start);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
assert(0 <= start && start <= end && end <= self->size);
|
||||||
res = _PyBytes_Find(
|
res = _PyBytes_Find(
|
||||||
self->data + start, end - start,
|
self->data + start, end - start,
|
||||||
view.buf, view.len, start);
|
view.buf, view.len, start);
|
||||||
|
|
|
@ -1272,8 +1272,25 @@ _PyBytes_Find(const char *haystack, Py_ssize_t len_haystack,
|
||||||
const char *needle, Py_ssize_t len_needle,
|
const char *needle, Py_ssize_t len_needle,
|
||||||
Py_ssize_t offset)
|
Py_ssize_t offset)
|
||||||
{
|
{
|
||||||
return stringlib_find(haystack, len_haystack,
|
assert(len_haystack >= 0);
|
||||||
|
assert(len_needle >= 0);
|
||||||
|
// Extra checks because stringlib_find accesses haystack[len_haystack].
|
||||||
|
if (len_needle == 0) {
|
||||||
|
return offset;
|
||||||
|
}
|
||||||
|
if (len_needle > len_haystack) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
assert(len_haystack >= 1);
|
||||||
|
Py_ssize_t res = stringlib_find(haystack, len_haystack - 1,
|
||||||
needle, len_needle, offset);
|
needle, len_needle, offset);
|
||||||
|
if (res == -1) {
|
||||||
|
Py_ssize_t last_align = len_haystack - len_needle;
|
||||||
|
if (memcmp(haystack + last_align, needle, len_needle) == 0) {
|
||||||
|
return offset + last_align;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
Py_ssize_t
|
Py_ssize_t
|
||||||
|
|
Loading…
Reference in New Issue