From 37ee850b1073f3b8e85855ed02d5dc3d11c5fe62 Mon Sep 17 00:00:00 2001 From: Nick Coghlan Date: Fri, 3 Dec 2010 14:26:13 +0000 Subject: [PATCH] Issue 2690: Add support for slicing and negative indices to range objects (includes precalculation and storage of the range length). Refer to the tracker issue for the language moratorium implications of this change --- Doc/library/functions.rst | 27 +++- Doc/whatsnew/3.2.rst | 4 + Lib/test/test_range.py | 73 +++++++++- Lib/test/test_sys.py | 4 +- Misc/NEWS | 4 +- Objects/rangeobject.c | 284 ++++++++++++++++++++++++-------------- 6 files changed, 291 insertions(+), 105 deletions(-) diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst index d6df5ba246b..bb392fc140b 100644 --- a/Doc/library/functions.rst +++ b/Doc/library/functions.rst @@ -1023,8 +1023,33 @@ are always available. They are listed here in alphabetical order. >>> list(range(1, 0)) [] + Range objects implement the :class:`collections.Sequence` ABC, and provide + features such as containment tests, element index lookup, slicing and + support for negative indices: + + >>> r = range(0, 20, 2) + >>> r + range(0, 20, 2) + >>> 11 in r + False + >>> 10 in r + True + >>> r.index(10) + 5 + >>> r[5] + 10 + >>> r[:5] + range(0, 10, 2) + >>> r[-1] + 18 + + Ranges containing absolute values larger than ``sys.maxint`` are permitted + but some features (such as :func:`len`) will raise :exc:`OverflowError`. + .. versionchanged:: 3.2 - Testing integers for membership takes constant time instead of iterating + Implement the Sequence ABC + Support slicing and negative indices + Test integers for membership in constant time instead of iterating through all items. diff --git a/Doc/whatsnew/3.2.rst b/Doc/whatsnew/3.2.rst index 1083979d690..db88e4abad0 100644 --- a/Doc/whatsnew/3.2.rst +++ b/Doc/whatsnew/3.2.rst @@ -313,6 +313,10 @@ Some smaller changes made to the core Python language are: (Added by Antoine Pitrou, :issue:`10093`.) +.. XXX: Issues #9213 and #2690 make the objects returned by range() + more sequence like in accordance with their registration as + implementing the Sequence ABC + New, Improved, and Deprecated Modules ===================================== diff --git a/Lib/test/test_range.py b/Lib/test/test_range.py index 5cbc11bf218..5aa592082cb 100644 --- a/Lib/test/test_range.py +++ b/Lib/test/test_range.py @@ -136,7 +136,12 @@ class RangeTest(unittest.TestCase): self.assertNotIn(-b, seq) self.assertEqual(len(seq), 2) - self.assertRaises(OverflowError, len, range(0, sys.maxsize**10)) + self.assertRaises(OverflowError, len, + range(-sys.maxsize, sys.maxsize)) + self.assertRaises(OverflowError, len, + range(0, 2*sys.maxsize)) + self.assertRaises(OverflowError, len, + range(0, sys.maxsize**10)) def test_invalid_invocation(self): self.assertRaises(TypeError, range) @@ -248,6 +253,8 @@ class RangeTest(unittest.TestCase): always_equal = AlwaysEqual() self.assertEqual(range(10).count(always_equal), 10) + self.assertEqual(len(range(sys.maxsize, sys.maxsize+10)), 10) + def test_repr(self): self.assertEqual(repr(range(1)), 'range(0, 1)') self.assertEqual(repr(range(1, 2)), 'range(1, 2)') @@ -349,6 +356,70 @@ class RangeTest(unittest.TestCase): test_id = "reversed(range({}, {}, {}))".format(start, end, step) self.assert_iterators_equal(iter1, iter2, test_id, limit=100) + def test_slice(self): + def check(start, stop, step=None): + i = slice(start, stop, step) + self.assertEqual(list(r[i]), list(r)[i]) + self.assertEqual(len(r[i]), len(list(r)[i])) + for r in [range(10), + range(0), + range(1, 9, 3), + range(8, 0, -3), + range(sys.maxsize+1, sys.maxsize+10), + ]: + check(0, 2) + check(0, 20) + check(1, 2) + check(20, 30) + check(-30, -20) + check(-1, 100, 2) + check(0, -1) + check(-1, -3, -1) + + def test_contains(self): + r = range(10) + self.assertIn(0, r) + self.assertIn(1, r) + self.assertIn(5.0, r) + self.assertNotIn(5.1, r) + self.assertNotIn(-1, r) + self.assertNotIn(10, r) + self.assertNotIn("", r) + r = range(9, -1, -1) + self.assertIn(0, r) + self.assertIn(1, r) + self.assertIn(5.0, r) + self.assertNotIn(5.1, r) + self.assertNotIn(-1, r) + self.assertNotIn(10, r) + self.assertNotIn("", r) + r = range(0, 10, 2) + self.assertIn(0, r) + self.assertNotIn(1, r) + self.assertNotIn(5.0, r) + self.assertNotIn(5.1, r) + self.assertNotIn(-1, r) + self.assertNotIn(10, r) + self.assertNotIn("", r) + r = range(9, -1, -2) + self.assertNotIn(0, r) + self.assertIn(1, r) + self.assertIn(5.0, r) + self.assertNotIn(5.1, r) + self.assertNotIn(-1, r) + self.assertNotIn(10, r) + self.assertNotIn("", r) + + def test_reverse_iteration(self): + for r in [range(10), + range(0), + range(1, 9, 3), + range(8, 0, -3), + range(sys.maxsize+1, sys.maxsize+10), + ]: + self.assertEqual(list(reversed(r)), list(r)[::-1]) + + def test_main(): test.support.run_unittest(RangeTest) diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 21f0edc66df..d7d77f098f6 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -782,8 +782,8 @@ class SizeofTest(unittest.TestCase): # reverse check(reversed(''), size(h + 'PP')) # range - check(range(1), size(h + '3P')) - check(range(66000), size(h + '3P')) + check(range(1), size(h + '4P')) + check(range(66000), size(h + '4P')) # set # frozenset PySet_MINSIZE = 8 diff --git a/Misc/NEWS b/Misc/NEWS index 07a62eec813..13c29cd9923 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,8 @@ What's New in Python 3.2 Beta 1? Core and Builtins ----------------- +- Issue #2690: Range objects support negative indices and slicing + - Issue #9915: Speed up sorting with a key. - Issue #7475: Added transform() and untransform() methods to both bytes and @@ -652,7 +654,7 @@ Core and Builtins - Issue #9252: PyImport_Import no longer uses a fromlist hack to return the module that was imported, but instead gets the module from sys.modules. -- Issue #9212: The range type_items now provides index() and count() methods, to +- Issue #9213: The range type_items now provides index() and count() methods, to conform to the Sequence ABC. Patch by Daniel Urban and Daniel Stutzbach. - Issue #7994: Issue a PendingDeprecationWarning if object.__format__ is called diff --git a/Objects/rangeobject.c b/Objects/rangeobject.c index 6ce0187adbe..bf42b1e321f 100644 --- a/Objects/rangeobject.c +++ b/Objects/rangeobject.c @@ -14,6 +14,7 @@ typedef struct { PyObject *start; PyObject *stop; PyObject *step; + PyObject *length; } rangeobject; /* Helper function for validating step. Always returns a new reference or @@ -43,6 +44,31 @@ validate_step(PyObject *step) return step; } +static PyObject * +compute_range_length(PyObject *start, PyObject *stop, PyObject *step); + +static rangeobject * +make_range_object(PyTypeObject *type, PyObject *start, + PyObject *stop, PyObject *step) +{ + rangeobject *obj = NULL; + PyObject *length; + length = compute_range_length(start, stop, step); + if (length == NULL) { + return NULL; + } + obj = PyObject_New(rangeobject, type); + if (obj == NULL) { + Py_DECREF(length); + return NULL; + } + obj->start = start; + obj->stop = stop; + obj->step = step; + obj->length = length; + return obj; +} + /* XXX(nnorwitz): should we error check if the user passes any empty ranges? range(-10) range(0, -5) @@ -51,7 +77,7 @@ validate_step(PyObject *step) static PyObject * range_new(PyTypeObject *type, PyObject *args, PyObject *kw) { - rangeobject *obj = NULL; + rangeobject *obj; PyObject *start = NULL, *stop = NULL, *step = NULL; if (!_PyArg_NoKeywords("range()", kw)) @@ -97,15 +123,11 @@ range_new(PyTypeObject *type, PyObject *args, PyObject *kw) } } - obj = PyObject_New(rangeobject, &PyRange_Type); - if (obj == NULL) - goto Fail; - obj->start = start; - obj->stop = stop; - obj->step = step; - return (PyObject *) obj; + obj = make_range_object(type, start, stop, step); + if (obj != NULL) + return (PyObject *) obj; -Fail: + /* Failed to create object, release attributes */ Py_XDECREF(start); Py_XDECREF(stop); Py_XDECREF(step); @@ -115,7 +137,7 @@ Fail: PyDoc_STRVAR(range_doc, "range([start,] stop[, step]) -> range object\n\ \n\ -Returns an iterator that generates the numbers in the range on demand."); +Returns a virtual sequence of numbers from start to stop by step."); static void range_dealloc(rangeobject *r) @@ -123,6 +145,7 @@ range_dealloc(rangeobject *r) Py_DECREF(r->start); Py_DECREF(r->stop); Py_DECREF(r->step); + Py_DECREF(r->length); PyObject_Del(r); } @@ -131,7 +154,7 @@ range_dealloc(rangeobject *r) * PyLong_Check(). Return NULL when there is an error. */ static PyObject* -range_length_obj(rangeobject *r) +compute_range_length(PyObject *start, PyObject *stop, PyObject *step) { /* ------------------------------------------------------------- Algorithm is equal to that of get_len_of_range(), but it operates @@ -139,7 +162,6 @@ range_length_obj(rangeobject *r) ---------------------------------------------------------------*/ int cmp_result; PyObject *lo, *hi; - PyObject *step = NULL; PyObject *diff = NULL; PyObject *one = NULL; PyObject *tmp1 = NULL, *tmp2 = NULL, *result; @@ -148,20 +170,19 @@ range_length_obj(rangeobject *r) PyObject *zero = PyLong_FromLong(0); if (zero == NULL) return NULL; - cmp_result = PyObject_RichCompareBool(r->step, zero, Py_GT); + cmp_result = PyObject_RichCompareBool(step, zero, Py_GT); Py_DECREF(zero); if (cmp_result == -1) return NULL; if (cmp_result == 1) { - lo = r->start; - hi = r->stop; - step = r->step; + lo = start; + hi = stop; Py_INCREF(step); } else { - lo = r->stop; - hi = r->start; - step = PyNumber_Negative(r->step); + lo = stop; + hi = start; + step = PyNumber_Negative(step); if (!step) return NULL; } @@ -206,32 +227,15 @@ range_length_obj(rangeobject *r) static Py_ssize_t range_length(rangeobject *r) { - PyObject *len = range_length_obj(r); - Py_ssize_t result = -1; - if (len) { - result = PyLong_AsSsize_t(len); - Py_DECREF(len); - } - return result; + return PyLong_AsSsize_t(r->length); } /* range(...)[x] is necessary for: seq[:] = range(...) */ - static PyObject * -range_item(rangeobject *r, Py_ssize_t i) +compute_range_item(rangeobject *r, Py_ssize_t i) { - Py_ssize_t len = range_length(r); PyObject *rem, *incr, *result; - /* XXX(nnorwitz): should negative indices be supported? */ - /* XXX(nnorwitz): should support range[x] where x > PY_SSIZE_T_MAX? */ - if (i < 0 || i >= len) { - if (!PyErr_Occurred()) - PyErr_SetString(PyExc_IndexError, - "range object index out of range"); - return NULL; - } - /* XXX(nnorwitz): optimize for short ints. */ rem = PyLong_FromSsize_t(i); if (!rem) @@ -246,31 +250,22 @@ range_item(rangeobject *r, Py_ssize_t i) } static PyObject * -range_repr(rangeobject *r) +range_item(rangeobject *r, Py_ssize_t i) { - Py_ssize_t istep; + /* XXX(nnorwitz): should we support range[x] where x > PY_SSIZE_T_MAX? */ + Py_ssize_t len = range_length(r); - /* Check for special case values for printing. We don't always - need the step value. We don't care about errors - (it means overflow), so clear the errors. */ - istep = PyNumber_AsSsize_t(r->step, NULL); - if (istep != 1 || (istep == -1 && PyErr_Occurred())) { - PyErr_Clear(); + if (i < 0) + i += len; + + if (i < 0 || i >= len) { + /* Also handles case where len < 0 due to (e.g) OverflowError */ + if (!PyErr_Occurred()) + PyErr_SetString(PyExc_IndexError, + "range object index out of range"); + return NULL; } - - if (istep == 1) - return PyUnicode_FromFormat("range(%R, %R)", r->start, r->stop); - else - return PyUnicode_FromFormat("range(%R, %R, %R)", - r->start, r->stop, r->step); -} - -/* Pickling support */ -static PyObject * -range_reduce(rangeobject *r, PyObject *args) -{ - return Py_BuildValue("(O(OOO))", Py_TYPE(r), - r->start, r->stop, r->step); + return compute_range_item(r, i); } /* Assumes (PyLong_CheckExact(ob) || PyBool_Check(ob)) */ @@ -388,15 +383,111 @@ range_index(rangeobject *r, PyObject *ob) static PySequenceMethods range_as_sequence = { (lenfunc)range_length, /* sq_length */ - 0, /* sq_concat */ - 0, /* sq_repeat */ - (ssizeargfunc)range_item, /* sq_item */ - 0, /* sq_slice */ - 0, /* sq_ass_item */ - 0, /* sq_ass_slice */ + 0, /* sq_concat */ + 0, /* sq_repeat */ + (ssizeargfunc)range_item, /* sq_item */ + 0, /* sq_slice */ + 0, /* sq_ass_item */ + 0, /* sq_ass_slice */ (objobjproc)range_contains, /* sq_contains */ }; +static PyObject * +range_repr(rangeobject *r) +{ + Py_ssize_t istep; + + /* Check for special case values for printing. We don't always + need the step value. We don't care about errors + (it means overflow), so clear the errors. */ + istep = PyNumber_AsSsize_t(r->step, NULL); + if (istep != 1 || (istep == -1 && PyErr_Occurred())) { + PyErr_Clear(); + } + + if (istep == 1) + return PyUnicode_FromFormat("range(%R, %R)", r->start, r->stop); + else + return PyUnicode_FromFormat("range(%R, %R, %R)", + r->start, r->stop, r->step); +} + +/* Pickling support */ +static PyObject * +range_reduce(rangeobject *r, PyObject *args) +{ + return Py_BuildValue("(O(OOO))", Py_TYPE(r), + r->start, r->stop, r->step); +} + +static PyObject * +range_subscript(rangeobject* self, PyObject* item) +{ + if (PyIndex_Check(item)) { + Py_ssize_t i; + i = PyNumber_AsSsize_t(item, PyExc_IndexError); + if (i == -1 && PyErr_Occurred()) + return NULL; + return range_item(self, i); + } + if (PySlice_Check(item)) { + PySliceObject *slice = (PySliceObject*)item; + Py_ssize_t start, stop, step, len, rlen; + rangeobject *result; + PyObject *substart = NULL, *substep = NULL, *substop = NULL; + + rlen = range_length(self); + if (rlen < 0) { + return NULL; + } + + if (PySlice_GetIndicesEx(slice, rlen, + &start, &stop, &step, &len) < 0) { + return NULL; + } + if (step == 1) { + substep = self->step; + Py_INCREF(substep); + } else { + /* NB: slice step != Py_None here */ + substep = PyNumber_Multiply(self->step, slice->step); + if (substep == NULL) + goto fail; + } + substart = compute_range_item(self, start); + if (substart == NULL) + goto fail; + if (len <= 0) { + substop = substart; + Py_INCREF(substop); + } + else { + substop = compute_range_item(self, stop); + if (substop == NULL) + goto fail; + } + result = make_range_object(Py_TYPE(self), substart, substop, substep); + if (result != NULL) + return (PyObject *) result; + fail: + Py_XDECREF(substart); + Py_XDECREF(substep); + Py_XDECREF(substop); + return NULL; + } + PyErr_Format(PyExc_TypeError, + "range indices must be integers or slices, not %.200s", + item->ob_type->tp_name); + return NULL; +} + + +static PyMappingMethods range_as_mapping = { + (lenfunc)range_length, /* mp_length */ + (binaryfunc)range_subscript, /* mp_subscript */ + (objobjargproc)0, /* mp_ass_subscript */ +}; + static PyObject * range_iter(PyObject *seq); static PyObject * range_reverse(PyObject *seq); @@ -431,7 +522,7 @@ PyTypeObject PyRange_Type = { (reprfunc)range_repr, /* tp_repr */ 0, /* tp_as_number */ &range_as_sequence, /* tp_as_sequence */ - 0, /* tp_as_mapping */ + &range_as_mapping, /* tp_as_mapping */ 0, /* tp_hash */ 0, /* tp_call */ 0, /* tp_str */ @@ -491,22 +582,6 @@ rangeiter_len(rangeiterobject *r) return PyLong_FromLong(r->len - r->index); } -typedef struct { - PyObject_HEAD - PyObject *index; - PyObject *start; - PyObject *step; - PyObject *len; -} longrangeiterobject; - -static PyObject * -longrangeiter_len(longrangeiterobject *r, PyObject *no_args) -{ - return PyNumber_Subtract(r->len, r->index); -} - -static PyObject *rangeiter_new(PyTypeObject *, PyObject *args, PyObject *kw); - PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(it))."); @@ -516,6 +591,8 @@ static PyMethodDef rangeiter_methods[] = { {NULL, NULL} /* sentinel */ }; +static PyObject *rangeiter_new(PyTypeObject *, PyObject *args, PyObject *kw); + PyTypeObject PyRangeIter_Type = { PyVarObject_HEAD_INIT(&PyType_Type, 0) "range_iterator", /* tp_name */ @@ -590,7 +667,7 @@ get_len_of_range(long lo, long hi, long step) is not representable as a C long, OverflowError is raised. */ static PyObject * -int_range_iter(long start, long stop, long step) +fast_range_iter(long start, long stop, long step) { rangeiterobject *it = PyObject_New(rangeiterobject, &PyRangeIter_Type); unsigned long ulen; @@ -622,7 +699,21 @@ rangeiter_new(PyTypeObject *type, PyObject *args, PyObject *kw) &start, &stop, &step)) return NULL; - return int_range_iter(start, stop, step); + return fast_range_iter(start, stop, step); +} + +typedef struct { + PyObject_HEAD + PyObject *index; + PyObject *start; + PyObject *step; + PyObject *len; +} longrangeiterobject; + +static PyObject * +longrangeiter_len(longrangeiterobject *r, PyObject *no_args) +{ + return PyNumber_Subtract(r->len, r->index); } static PyMethodDef longrangeiter_methods[] = { @@ -736,7 +827,7 @@ range_iter(PyObject *seq) PyErr_Clear(); goto long_range; } - int_it = int_range_iter(lstart, lstop, lstep); + int_it = fast_range_iter(lstart, lstop, lstep); if (int_it == NULL && PyErr_ExceptionMatches(PyExc_OverflowError)) { PyErr_Clear(); goto long_range; @@ -751,14 +842,11 @@ range_iter(PyObject *seq) /* Do all initialization here, so we can DECREF on failure. */ it->start = r->start; it->step = r->step; + it->len = r->length; Py_INCREF(it->start); Py_INCREF(it->step); + Py_INCREF(it->len); - it->len = it->index = NULL; - - it->len = range_length_obj(r); - if (!it->len) - goto create_failure; it->index = PyLong_FromLong(0); if (!it->index) goto create_failure; @@ -775,7 +863,7 @@ range_reverse(PyObject *seq) { rangeobject *range = (rangeobject*) seq; longrangeiterobject *it; - PyObject *one, *sum, *diff, *len = NULL, *product; + PyObject *one, *sum, *diff, *product; long lstart, lstop, lstep, new_start, new_stop; unsigned long ulen; @@ -838,7 +926,7 @@ range_reverse(PyObject *seq) new_stop = lstart - lstep; new_start = (long)(new_stop + ulen * lstep); - return int_range_iter(new_start, new_stop, -lstep); + return fast_range_iter(new_start, new_stop, -lstep); long_range: it = PyObject_New(longrangeiterobject, &PyLongRangeIter_Type); @@ -846,18 +934,14 @@ long_range: return NULL; /* start + (len - 1) * step */ - len = range_length_obj(range); - if (!len) - goto create_failure; - - /* Steal reference to len. */ - it->len = len; + it->len = range->length; + Py_INCREF(it->len); one = PyLong_FromLong(1); if (!one) goto create_failure; - diff = PyNumber_Subtract(len, one); + diff = PyNumber_Subtract(it->len, one); Py_DECREF(one); if (!diff) goto create_failure;