From cc83afd803478ee234df97054bd2d182432c1297 Mon Sep 17 00:00:00 2001 From: Mark Dickinson Date: Sun, 15 Nov 2009 12:34:12 +0000 Subject: [PATCH] Merged revisions 76295 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ........ r76295 | mark.dickinson | 2009-11-15 12:31:13 +0000 (Sun, 15 Nov 2009) | 5 lines Avoid signed overflow in some xrange calculations, and extend xrange tests to cover some special cases that caused problems in py3k. This is a partial backport of r76292-76293 (see issue #7298.) ........ --- Lib/test/test_xrange.py | 69 ++++++++++++++++++++++++++++++++++++ Objects/rangeobject.c | 78 +++++++++++++++++++++++------------------ 2 files changed, 113 insertions(+), 34 deletions(-) diff --git a/Lib/test/test_xrange.py b/Lib/test/test_xrange.py index 7e2a1a30581..dbfeb438332 100644 --- a/Lib/test/test_xrange.py +++ b/Lib/test/test_xrange.py @@ -3,12 +3,49 @@ import test.test_support, unittest import sys import pickle +import itertools import warnings warnings.filterwarnings("ignore", "integer argument expected", DeprecationWarning, "unittest") +# pure Python implementations (3 args only), for comparison +def pyrange(start, stop, step): + if (start - stop) // step < 0: + # replace stop with next element in the sequence of integers + # that are congruent to start modulo step. + stop += (start - stop) % step + while start != stop: + yield start + start += step + +def pyrange_reversed(start, stop, step): + stop += (start - stop) % step + return pyrange(stop - step, start - step, -step) + + class XrangeTest(unittest.TestCase): + def assert_iterators_equal(self, xs, ys, test_id, limit=None): + # check that an iterator xs matches the expected results ys, + # up to a given limit. + if limit is not None: + xs = itertools.islice(xs, limit) + ys = itertools.islice(ys, limit) + sentinel = object() + pairs = itertools.izip_longest(xs, ys, fillvalue=sentinel) + for i, (x, y) in enumerate(pairs): + if x == y: + continue + elif x == sentinel: + self.fail('{0}: iterator ended unexpectedly ' + 'at position {1}; expected {2}'.format(test_id, i, y)) + elif y == sentinel: + self.fail('{0}: unexpected excess element {1} at ' + 'position {2}'.format(test_id, x, i)) + else: + self.fail('{0}: wrong element at position {1};' + 'expected {2}, got {3}'.format(test_id, i, y, x)) + def test_xrange(self): self.assertEqual(list(xrange(3)), [0, 1, 2]) self.assertEqual(list(xrange(1, 5)), [1, 2, 3, 4]) @@ -67,6 +104,38 @@ class XrangeTest(unittest.TestCase): self.assertEquals(list(pickle.loads(pickle.dumps(r, proto))), list(r)) + def test_range_iterators(self): + # see issue 7298 + limits = [base + jiggle + for M in (2**32, 2**64) + for base in (-M, -M//2, 0, M//2, M) + for jiggle in (-2, -1, 0, 1, 2)] + test_ranges = [(start, end, step) + for start in limits + for end in limits + for step in (-2**63, -2**31, -2, -1, 1, 2)] + + for start, end, step in test_ranges: + try: + iter1 = xrange(start, end, step) + except OverflowError: + pass + else: + iter2 = pyrange(start, end, step) + test_id = "xrange({0}, {1}, {2})".format(start, end, step) + # check first 100 entries + self.assert_iterators_equal(iter1, iter2, test_id, limit=100) + + try: + iter1 = reversed(xrange(start, end, step)) + except OverflowError: + pass + else: + iter2 = pyrange_reversed(start, end, step) + test_id = "reversed(xrange({0}, {1}, {2}))".format( + start, end, step) + self.assert_iterators_equal(iter1, iter2, test_id, limit=100) + def test_main(): test.test_support.run_unittest(XrangeTest) diff --git a/Objects/rangeobject.c b/Objects/rangeobject.c index 76d384914e5..0fee40f6841 100644 --- a/Objects/rangeobject.c +++ b/Objects/rangeobject.c @@ -9,33 +9,32 @@ typedef struct { long len; } rangeobject; -/* Return number of items in range/xrange (lo, hi, step). step > 0 - * required. Return a value < 0 if & only if the true value is too - * large to fit in a signed long. +/* Return number of items in range (lo, hi, step). step != 0 + * required. The result always fits in an unsigned long. */ -static long +static unsigned long get_len_of_range(long lo, long hi, long step) { - /* ------------------------------------------------------------- - If lo >= hi, the range is empty. - Else if n values are in the range, the last one is - lo + (n-1)*step, which must be <= hi-1. Rearranging, - n <= (hi - lo - 1)/step + 1, so taking the floor of the RHS gives - the proper value. Since lo < hi in this case, hi-lo-1 >= 0, so - the RHS is non-negative and so truncation is the same as the - floor. Letting M be the largest positive long, the worst case - for the RHS numerator is hi=M, lo=-M-1, and then - hi-lo-1 = M-(-M-1)-1 = 2*M. Therefore unsigned long has enough - precision to compute the RHS exactly. - ---------------------------------------------------------------*/ - long n = 0; - if (lo < hi) { - unsigned long uhi = (unsigned long)hi; - unsigned long ulo = (unsigned long)lo; - unsigned long diff = uhi - ulo - 1; - n = (long)(diff / (unsigned long)step + 1); - } - return n; + /* ------------------------------------------------------------- + If step > 0 and lo >= hi, or step < 0 and lo <= hi, the range is empty. + Else for step > 0, if n values are in the range, the last one is + lo + (n-1)*step, which must be <= hi-1. Rearranging, + n <= (hi - lo - 1)/step + 1, so taking the floor of the RHS gives + the proper value. Since lo < hi in this case, hi-lo-1 >= 0, so + the RHS is non-negative and so truncation is the same as the + floor. Letting M be the largest positive long, the worst case + for the RHS numerator is hi=M, lo=-M-1, and then + hi-lo-1 = M-(-M-1)-1 = 2*M. Therefore unsigned long has enough + precision to compute the RHS exactly. The analysis for step < 0 + is similar. + ---------------------------------------------------------------*/ + assert(step != 0); + if (step > 0 && lo < hi) + return 1UL + (hi - 1UL - lo) / step; + else if (step < 0 && lo > hi) + return 1UL + (lo - 1UL - hi) / (0UL - step); + else + return 0UL; } static PyObject * @@ -43,7 +42,7 @@ range_new(PyTypeObject *type, PyObject *args, PyObject *kw) { rangeobject *obj; long ilow = 0, ihigh = 0, istep = 1; - long n; + unsigned long n; if (!_PyArg_NoKeywords("xrange()", kw)) return NULL; @@ -64,11 +63,8 @@ range_new(PyTypeObject *type, PyObject *args, PyObject *kw) PyErr_SetString(PyExc_ValueError, "xrange() arg 3 must not be zero"); return NULL; } - if (istep > 0) - n = get_len_of_range(ilow, ihigh, istep); - else - n = get_len_of_range(ihigh, ilow, -istep); - if (n < 0) { + n = get_len_of_range(ilow, ihigh, istep); + if (n > (unsigned long)LONG_MAX || (long)n > PY_SSIZE_T_MAX) { PyErr_SetString(PyExc_OverflowError, "xrange() result has too many items"); return NULL; @@ -78,7 +74,7 @@ range_new(PyTypeObject *type, PyObject *args, PyObject *kw) if (obj == NULL) return NULL; obj->start = ilow; - obj->len = n; + obj->len = (long)n; obj->step = istep; return (PyObject *) obj; } @@ -98,7 +94,9 @@ range_item(rangeobject *r, Py_ssize_t i) "xrange object index out of range"); return NULL; } - return PyInt_FromSsize_t(r->start + i * r->step); + /* do calculation entirely using unsigned longs, to avoid + undefined behaviour due to signed overflow. */ + return PyInt_FromLong((long)(r->start + (unsigned long)i * r->step)); } static Py_ssize_t @@ -304,9 +302,21 @@ range_reverse(PyObject *seq) len = ((rangeobject *)seq)->len; it->index = 0; - it->start = start + (len-1) * step; - it->step = -step; it->len = len; + /* the casts below guard against signed overflow by turning it + into unsigned overflow instead. The correctness of this + code still depends on conversion from unsigned long to long + wrapping modulo ULONG_MAX+1, which isn't guaranteed (see + C99 6.3.1.3p3) but seems to hold in practice for all + platforms we're likely to meet. + + If step == LONG_MIN then we still end up with LONG_MIN + after negation; but this works out, since we've still got + the correct value modulo ULONG_MAX+1, and the range_item + calculation is also done modulo ULONG_MAX+1. + */ + it->start = (long)(start + (unsigned long)(len-1) * step); + it->step = (long)(-(unsigned long)step); return (PyObject *)it; }