diff --git a/Lib/test/test_bisect.py b/Lib/test/test_bisect.py index 934ba8c7a10..4f004d2213f 100644 --- a/Lib/test/test_bisect.py +++ b/Lib/test/test_bisect.py @@ -122,6 +122,13 @@ class TestBisect(unittest.TestCase): self.assertRaises(ValueError, mod.insort_left, [1, 2, 3], 5, -1, 3), self.assertRaises(ValueError, mod.insort_right, [1, 2, 3], 5, -1, 3), + def test_large_range(self): + # Issue 13496 + mod = self.module + data = xrange(sys.maxsize-1) + self.assertEqual(mod.bisect_left(data, sys.maxsize-3), sys.maxsize-3) + self.assertEqual(mod.bisect_right(data, sys.maxsize-3), sys.maxsize-2) + def test_random(self, n=25): from random import randrange for i in xrange(n): diff --git a/Misc/NEWS b/Misc/NEWS index 85bea30b3ec..ec27d20152c 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -48,6 +48,9 @@ Core and Builtins Library ------- +- Issue #13496: Fix potential overflow in bisect.bisect algorithm when applied + to a collection of size > sys.maxsize / 2. + - Issue #14399: zipfile now recognizes that the archive has been modified even if only the comment is changed. As a consequence of this fix, ZipFile is now a new style class. diff --git a/Modules/_bisectmodule.c b/Modules/_bisectmodule.c index e8976b24053..4798c124eef 100644 --- a/Modules/_bisectmodule.c +++ b/Modules/_bisectmodule.c @@ -21,7 +21,13 @@ internal_bisect_right(PyObject *list, PyObject *item, Py_ssize_t lo, Py_ssize_t return -1; } while (lo < hi) { - mid = (lo + hi) / 2; + /* The (size_t)cast ensures that the addition and subsequent division + are performed as unsigned operations, avoiding difficulties from + signed overflow. (See issue 13496.) */ + printf("lo: %d\n", lo); + printf("hi: %d\n", hi); + printf("mid: %d\n", mid); + mid = ((size_t)lo + hi) / 2; litem = PySequence_GetItem(list, mid); if (litem == NULL) return -1; @@ -122,7 +128,10 @@ internal_bisect_left(PyObject *list, PyObject *item, Py_ssize_t lo, Py_ssize_t h return -1; } while (lo < hi) { - mid = (lo + hi) / 2; + /* The (size_t)cast ensures that the addition and subsequent division + are performed as unsigned operations, avoiding difficulties from + signed overflow. (See issue 13496.) */ + mid = ((size_t)lo + hi) / 2; litem = PySequence_GetItem(list, mid); if (litem == NULL) return -1;