From aa9a79d27958ae5afb6c8769a2b342d98677c091 Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Sat, 6 Oct 2012 14:03:24 +0200 Subject: [PATCH] Issue #16148: implemented PEP 424 --- Doc/c-api/object.rst | 7 ++++ Doc/library/operator.rst | 6 +++ Include/abstract.h | 5 +-- Lib/test/test_enumerate.py | 9 ++--- Lib/test/test_iterlen.py | 62 ++++++++++++++---------------- Lib/test/test_itertools.py | 5 +-- Lib/test/test_operator.py | 25 ++++++++++++ Lib/test/test_set.py | 2 - Modules/operator.c | 27 +++++++++++++ Objects/abstract.c | 78 +++++++++++++++++++++++--------------- Objects/bytearrayobject.c | 2 +- Objects/bytesobject.c | 2 +- Objects/iterobject.c | 11 ++++-- Objects/listobject.c | 2 +- 14 files changed, 161 insertions(+), 82 deletions(-) diff --git a/Doc/c-api/object.rst b/Doc/c-api/object.rst index d895547f1f7..8458fe88cf8 100644 --- a/Doc/c-api/object.rst +++ b/Doc/c-api/object.rst @@ -342,6 +342,13 @@ is considered sufficient for this determination. returned. This is the equivalent to the Python expression ``len(o)``. +.. c:function:: Py_ssize_t PyObject_LengthHint(PyObject *o, Py_ssize_t default) + + Return an estimated length for the object *o*. First trying to return its + actual length, then an estimate using ``__length_hint__``, and finally + returning the default value. On error ``-1`` is returned. This is the + equivalent to the Python expression ``operator.length_hint(o, default)``. + .. c:function:: PyObject* PyObject_GetItem(PyObject *o, PyObject *key) Return element of *o* corresponding to the object *key* or *NULL* on failure. diff --git a/Doc/library/operator.rst b/Doc/library/operator.rst index 3860880bf0e..93f33ffe800 100644 --- a/Doc/library/operator.rst +++ b/Doc/library/operator.rst @@ -235,6 +235,12 @@ their character equivalents. .. XXX: find a better, readable, example +.. function:: length_hint(obj, default=0) + + Return an estimated length for the object *o*. First trying to return its + actual length, then an estimate using ``__length_hint__``, and finally + returning the default value. + The :mod:`operator` module also defines tools for generalized attribute and item lookups. These are useful for making fast field extractors as arguments for :func:`map`, :func:`sorted`, :meth:`itertools.groupby`, or other functions that diff --git a/Include/abstract.h b/Include/abstract.h index 44b5af72c9b..814867538a9 100644 --- a/Include/abstract.h +++ b/Include/abstract.h @@ -403,9 +403,8 @@ xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx*/ PyAPI_FUNC(Py_ssize_t) PyObject_Length(PyObject *o); #define PyObject_Length PyObject_Size -#ifndef Py_LIMITED_API - PyAPI_FUNC(Py_ssize_t) _PyObject_LengthHint(PyObject *o, Py_ssize_t); -#endif +PyAPI_FUNC(int) _PyObject_HasLen(PyObject *o); +PyAPI_FUNC(Py_ssize_t) PyObject_LengthHint(PyObject *o, Py_ssize_t); /* Guess the size of object o using len(o) or o.__length_hint__(). diff --git a/Lib/test/test_enumerate.py b/Lib/test/test_enumerate.py index 2e904cf8789..c0560fe9803 100644 --- a/Lib/test/test_enumerate.py +++ b/Lib/test/test_enumerate.py @@ -1,4 +1,5 @@ import unittest +import operator import sys import pickle @@ -168,15 +169,13 @@ class TestReversed(unittest.TestCase, PickleTest): x = range(1) self.assertEqual(type(reversed(x)), type(iter(x))) - @support.cpython_only def test_len(self): # This is an implementation detail, not an interface requirement - from test.test_iterlen import len for s in ('hello', tuple('hello'), list('hello'), range(5)): - self.assertEqual(len(reversed(s)), len(s)) + self.assertEqual(operator.length_hint(reversed(s)), len(s)) r = reversed(s) list(r) - self.assertEqual(len(r), 0) + self.assertEqual(operator.length_hint(r), 0) class SeqWithWeirdLen: called = False def __len__(self): @@ -187,7 +186,7 @@ class TestReversed(unittest.TestCase, PickleTest): def __getitem__(self, index): return index r = reversed(SeqWithWeirdLen()) - self.assertRaises(ZeroDivisionError, len, r) + self.assertRaises(ZeroDivisionError, operator.length_hint, r) def test_gc(self): diff --git a/Lib/test/test_iterlen.py b/Lib/test/test_iterlen.py index 7469a319f7a..57f7101beed 100644 --- a/Lib/test/test_iterlen.py +++ b/Lib/test/test_iterlen.py @@ -45,31 +45,21 @@ import unittest from test import support from itertools import repeat from collections import deque -from builtins import len as _len +from operator import length_hint n = 10 -def len(obj): - try: - return _len(obj) - except TypeError: - try: - # note: this is an internal undocumented API, - # don't rely on it in your own programs - return obj.__length_hint__() - except AttributeError: - raise TypeError class TestInvariantWithoutMutations(unittest.TestCase): def test_invariant(self): it = self.it for i in reversed(range(1, n+1)): - self.assertEqual(len(it), i) + self.assertEqual(length_hint(it), i) next(it) - self.assertEqual(len(it), 0) + self.assertEqual(length_hint(it), 0) self.assertRaises(StopIteration, next, it) - self.assertEqual(len(it), 0) + self.assertEqual(length_hint(it), 0) class TestTemporarilyImmutable(TestInvariantWithoutMutations): @@ -78,12 +68,12 @@ class TestTemporarilyImmutable(TestInvariantWithoutMutations): # length immutability during iteration it = self.it - self.assertEqual(len(it), n) + self.assertEqual(length_hint(it), n) next(it) - self.assertEqual(len(it), n-1) + self.assertEqual(length_hint(it), n-1) self.mutate() self.assertRaises(RuntimeError, next, it) - self.assertEqual(len(it), 0) + self.assertEqual(length_hint(it), 0) ## ------- Concrete Type Tests ------- @@ -92,10 +82,6 @@ class TestRepeat(TestInvariantWithoutMutations): def setUp(self): self.it = repeat(None, n) - def test_no_len_for_infinite_repeat(self): - # The repeat() object can also be infinite - self.assertRaises(TypeError, len, repeat(None)) - class TestXrange(TestInvariantWithoutMutations): def setUp(self): @@ -167,14 +153,15 @@ class TestList(TestInvariantWithoutMutations): it = iter(d) next(it) next(it) - self.assertEqual(len(it), n-2) + self.assertEqual(length_hint(it), n - 2) d.append(n) - self.assertEqual(len(it), n-1) # grow with append + self.assertEqual(length_hint(it), n - 1) # grow with append d[1:] = [] - self.assertEqual(len(it), 0) + self.assertEqual(length_hint(it), 0) self.assertEqual(list(it), []) d.extend(range(20)) - self.assertEqual(len(it), 0) + self.assertEqual(length_hint(it), 0) + class TestListReversed(TestInvariantWithoutMutations): @@ -186,32 +173,41 @@ class TestListReversed(TestInvariantWithoutMutations): it = reversed(d) next(it) next(it) - self.assertEqual(len(it), n-2) + self.assertEqual(length_hint(it), n - 2) d.append(n) - self.assertEqual(len(it), n-2) # ignore append + self.assertEqual(length_hint(it), n - 2) # ignore append d[1:] = [] - self.assertEqual(len(it), 0) + self.assertEqual(length_hint(it), 0) self.assertEqual(list(it), []) # confirm invariant d.extend(range(20)) - self.assertEqual(len(it), 0) + self.assertEqual(length_hint(it), 0) ## -- Check to make sure exceptions are not suppressed by __length_hint__() class BadLen(object): - def __iter__(self): return iter(range(10)) + def __iter__(self): + return iter(range(10)) + def __len__(self): raise RuntimeError('hello') + class BadLengthHint(object): - def __iter__(self): return iter(range(10)) + def __iter__(self): + return iter(range(10)) + def __length_hint__(self): raise RuntimeError('hello') + class NoneLengthHint(object): - def __iter__(self): return iter(range(10)) + def __iter__(self): + return iter(range(10)) + def __length_hint__(self): - return None + return NotImplemented + class TestLengthHintExceptions(unittest.TestCase): diff --git a/Lib/test/test_itertools.py b/Lib/test/test_itertools.py index 90e85a9b25d..ece7f995684 100644 --- a/Lib/test/test_itertools.py +++ b/Lib/test/test_itertools.py @@ -1723,9 +1723,8 @@ class TestVariousIteratorArgs(unittest.TestCase): class LengthTransparency(unittest.TestCase): def test_repeat(self): - from test.test_iterlen import len - self.assertEqual(len(repeat(None, 50)), 50) - self.assertRaises(TypeError, len, repeat(None)) + self.assertEqual(operator.length_hint(repeat(None, 50)), 50) + self.assertEqual(operator.length_hint(repeat(None), 12), 12) class RegressionTests(unittest.TestCase): diff --git a/Lib/test/test_operator.py b/Lib/test/test_operator.py index fa608b9a52d..b445ee0ada7 100644 --- a/Lib/test/test_operator.py +++ b/Lib/test/test_operator.py @@ -410,6 +410,31 @@ class OperatorTestCase(unittest.TestCase): self.assertEqual(operator.__ixor__ (c, 5), "ixor") self.assertEqual(operator.__iconcat__ (c, c), "iadd") + def test_length_hint(self): + class X(object): + def __init__(self, value): + self.value = value + + def __length_hint__(self): + if type(self.value) is type: + raise self.value + else: + return self.value + + self.assertEqual(operator.length_hint([], 2), 0) + self.assertEqual(operator.length_hint(iter([1, 2, 3])), 3) + + self.assertEqual(operator.length_hint(X(2)), 2) + self.assertEqual(operator.length_hint(X(NotImplemented), 4), 4) + self.assertEqual(operator.length_hint(X(TypeError), 12), 12) + with self.assertRaises(TypeError): + operator.length_hint(X("abc")) + with self.assertRaises(ValueError): + operator.length_hint(X(-2)) + with self.assertRaises(LookupError): + operator.length_hint(X(LookupError)) + + def test_main(verbose=None): import sys test_classes = ( diff --git a/Lib/test/test_set.py b/Lib/test/test_set.py index 8e9e5878b25..da627238ccc 100644 --- a/Lib/test/test_set.py +++ b/Lib/test/test_set.py @@ -848,8 +848,6 @@ class TestBasicOps(unittest.TestCase): for v in self.set: self.assertIn(v, self.values) setiter = iter(self.set) - # note: __length_hint__ is an internal undocumented API, - # don't rely on it in your own programs self.assertEqual(setiter.__length_hint__(), len(self.set)) def test_pickling(self): diff --git a/Modules/operator.c b/Modules/operator.c index 12fdad54d60..c5369a50216 100644 --- a/Modules/operator.c +++ b/Modules/operator.c @@ -208,6 +208,31 @@ _tscmp(const unsigned char *a, const unsigned char *b, return (result == 0); } +PyDoc_STRVAR(length_hint__doc__, +"length_hint(obj, default=0) -> int\n" +"Return an estimate of the number of items in obj.\n" +"This is useful for presizing containers when building from an\n" +"iterable.\n" +"\n" +"If the object supports len(), the result will be\n" +"exact. Otherwise, it may over- or under-estimate by an\n" +"arbitrary amount. The result will be an integer >= 0."); + +static PyObject *length_hint(PyObject *self, PyObject *args) +{ + PyObject *obj; + Py_ssize_t defaultvalue = 0, res; + if (!PyArg_ParseTuple(args, "O|n:length_hint", &obj, &defaultvalue)) { + return NULL; + } + res = PyObject_LengthHint(obj, defaultvalue); + if (res == -1 && PyErr_Occurred()) { + return NULL; + } + return PyLong_FromSsize_t(res); +} + + PyDoc_STRVAR(compare_digest__doc__, "compare_digest(a, b) -> bool\n" "\n" @@ -366,6 +391,8 @@ spam2(ge,__ge__, "ge(a, b) -- Same as a>=b.") {"_compare_digest", (PyCFunction)compare_digest, METH_VARARGS, compare_digest__doc__}, + {"length_hint", (PyCFunction)length_hint, METH_VARARGS, + length_hint__doc__}, {NULL, NULL} /* sentinel */ }; diff --git a/Objects/abstract.c b/Objects/abstract.c index a2737dd5f4f..b6fc478595b 100644 --- a/Objects/abstract.c +++ b/Objects/abstract.c @@ -64,49 +64,67 @@ PyObject_Length(PyObject *o) } #define PyObject_Length PyObject_Size +int +_PyObject_HasLen(PyObject *o) { + return (Py_TYPE(o)->tp_as_sequence && Py_TYPE(o)->tp_as_sequence->sq_length) || + (Py_TYPE(o)->tp_as_mapping && Py_TYPE(o)->tp_as_mapping->mp_length); +} /* The length hint function returns a non-negative value from o.__len__() - or o.__length_hint__(). If those methods aren't found or return a negative - value, then the defaultvalue is returned. If one of the calls fails, - this function returns -1. + or o.__length_hint__(). If those methods aren't found. If one of the calls + fails this function returns -1. */ Py_ssize_t -_PyObject_LengthHint(PyObject *o, Py_ssize_t defaultvalue) +PyObject_LengthHint(PyObject *o, Py_ssize_t defaultvalue) { _Py_IDENTIFIER(__length_hint__); - PyObject *ro, *hintmeth; - Py_ssize_t rv; - - /* try o.__len__() */ - rv = PyObject_Size(o); - if (rv >= 0) - return rv; - if (PyErr_Occurred()) { - if (!PyErr_ExceptionMatches(PyExc_TypeError)) + Py_ssize_t res = PyObject_Length(o); + if (res < 0 && PyErr_Occurred()) { + if (!PyErr_ExceptionMatches(PyExc_TypeError)) { return -1; + } PyErr_Clear(); } - - /* try o.__length_hint__() */ - hintmeth = _PyObject_LookupSpecial(o, &PyId___length_hint__); - if (hintmeth == NULL) { - if (PyErr_Occurred()) - return -1; - else - return defaultvalue; + else { + return res; } - ro = PyObject_CallFunctionObjArgs(hintmeth, NULL); - Py_DECREF(hintmeth); - if (ro == NULL) { - if (!PyErr_ExceptionMatches(PyExc_TypeError)) + PyObject *hint = _PyObject_LookupSpecial(o, &PyId___length_hint__); + if (hint == NULL) { + if (PyErr_Occurred()) { return -1; - PyErr_Clear(); + } return defaultvalue; } - rv = PyLong_Check(ro) ? PyLong_AsSsize_t(ro) : defaultvalue; - Py_DECREF(ro); - return rv; + PyObject *result = PyObject_CallFunctionObjArgs(hint, NULL); + Py_DECREF(hint); + if (result == NULL) { + if (PyErr_ExceptionMatches(PyExc_TypeError)) { + PyErr_Clear(); + return defaultvalue; + } + return -1; + } + else if (result == Py_NotImplemented) { + Py_DECREF(result); + return defaultvalue; + } + if (!PyLong_Check(result)) { + PyErr_Format(PyExc_TypeError, "Length hint must be an integer, not %s", + Py_TYPE(result)->tp_name); + Py_DECREF(result); + return -1; + } + defaultvalue = PyLong_AsSsize_t(result); + Py_DECREF(result); + if (defaultvalue < 0 && PyErr_Occurred()) { + return -1; + } + if (defaultvalue < 0) { + PyErr_Format(PyExc_ValueError, "__length_hint__() should return >= 0"); + return -1; + } + return defaultvalue; } PyObject * @@ -1687,7 +1705,7 @@ PySequence_Tuple(PyObject *v) return NULL; /* Guess result size and allocate space. */ - n = _PyObject_LengthHint(v, 10); + n = PyObject_LengthHint(v, 10); if (n == -1) goto Fail; result = PyTuple_New(n); diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c index 2bb3a29daaa..26c76d26877 100644 --- a/Objects/bytearrayobject.c +++ b/Objects/bytearrayobject.c @@ -2282,7 +2282,7 @@ bytearray_extend(PyByteArrayObject *self, PyObject *arg) return NULL; /* Try to determine the length of the argument. 32 is arbitrary. */ - buf_size = _PyObject_LengthHint(arg, 32); + buf_size = PyObject_LengthHint(arg, 32); if (buf_size == -1) { Py_DECREF(it); return NULL; diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index bf9259f187a..25c2326b956 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -2651,7 +2651,7 @@ PyBytes_FromObject(PyObject *x) } /* For iterator version, create a string object and resize as needed */ - size = _PyObject_LengthHint(x, 64); + size = PyObject_LengthHint(x, 64); if (size == -1 && PyErr_Occurred()) return NULL; /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from diff --git a/Objects/iterobject.c b/Objects/iterobject.c index 3cfbeaf2e35..cf4af5b81f2 100644 --- a/Objects/iterobject.c +++ b/Objects/iterobject.c @@ -76,9 +76,14 @@ iter_len(seqiterobject *it) Py_ssize_t seqsize, len; if (it->it_seq) { - seqsize = PySequence_Size(it->it_seq); - if (seqsize == -1) - return NULL; + if (_PyObject_HasLen(it->it_seq)) { + seqsize = PySequence_Size(it->it_seq); + if (seqsize == -1) + return NULL; + } + else { + return Py_NotImplemented; + } len = seqsize - it->it_index; if (len >= 0) return PyLong_FromSsize_t(len); diff --git a/Objects/listobject.c b/Objects/listobject.c index 6e0d094154e..4cc34b5a3c3 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -826,7 +826,7 @@ listextend(PyListObject *self, PyObject *b) iternext = *it->ob_type->tp_iternext; /* Guess a result list size. */ - n = _PyObject_LengthHint(b, 8); + n = PyObject_LengthHint(b, 8); if (n == -1) { Py_DECREF(it); return NULL;