From 61fe64d5de313dd66e7d42940f54b69ef371fc4f Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 23 Feb 2003 04:40:07 +0000 Subject: [PATCH] User requested changes to the itertools module. Subsumed times() into repeat(). Added cycle() and chain(). --- Doc/lib/libitertools.tex | 92 ++++++------ Lib/test/test_itertools.py | 23 +-- Misc/NEWS | 4 +- Modules/itertoolsmodule.c | 288 ++++++++++++++++++++++++++++++------- 4 files changed, 303 insertions(+), 104 deletions(-) diff --git a/Doc/lib/libitertools.tex b/Doc/lib/libitertools.tex index 5c6c2b728de..fafc48b6bc1 100644 --- a/Doc/lib/libitertools.tex +++ b/Doc/lib/libitertools.tex @@ -33,22 +33,8 @@ Adopting the principles of just-in-time manufacturing, they create data when and where needed instead of consuming memory with the computer equivalent of ``inventory''. -Some tools were omitted from the module because they offered no -advantage over their pure python counterparts or because their behavior -was too surprising. - -For instance, SML provides a tool: \code{cycle(\var{seq})} which -loops over the sequence elements and then starts again when the -sequence is exhausted. The surprising behavior is the need for -significant auxiliary storage (which is unusual for an iterator). -If needed, the tool is readily constructible using pure Python. - -Other tools are being considered for inclusion in future versions of the -module. For instance, the function -\function{chain(\var{it0}, \var{it1}, ...)} would return elements from -the first iterator until it was exhausted and then move on to each -successive iterator. The module author welcomes suggestions for other -basic building blocks. +The module author welcomes suggestions for other basic building blocks +to be added to future versions of the module. \begin{seealso} \seetext{The Standard ML Basis Library, @@ -67,6 +53,20 @@ The following module functions all construct and return iterators. Some provide streams of infinite length, so they should only be accessed by functions or loops that truncate the stream. +\begin{funcdesc}{chain}{*iterables} + Make an iterator that returns elements from the first iterable until + it is exhausted, then proceeds to the next iterable, until all of the + iterables are exhausted. Used for treating consecutive sequences as + a single sequence. Equivalent to: + + \begin{verbatim} + def chain(*iterables): + for it in iterables: + for element in it: + yield element + \end{verbatim} +\end{funcdesc} + \begin{funcdesc}{count}{\optional{n}} Make an iterator that returns consecutive integers starting with \var{n}. Does not currently support python long integers. Often used as an @@ -85,6 +85,29 @@ by functions or loops that truncate the stream. may change in the future. \end{funcdesc} +\begin{funcdesc}{cycle}{iterable} + Make an iterator returning elements from the iterable and saving a + copy of each. When the iterable is exhausted, return elements from + the saved copy. Repeats indefinitely. Equivalent to: + + \begin{verbatim} + def cycle(iterable): + saved = [] + for element in iterable: + yield element + saved.append(element) + if len(saved) == 0: + return + while True: + for element in saved: + yield element + \end{verbatim} + + Note, this is the only member of the toolkit that may require + significant auxiliary storage (depending on the length of the + iterable. +\end{funcdesc} + \begin{funcdesc}{dropwhile}{predicate, iterable} Make an iterator that drops elements from the iterable as long as the predicate is true; afterwards, returns every element. Note, @@ -207,16 +230,21 @@ by functions or loops that truncate the stream. \end{verbatim} \end{funcdesc} -\begin{funcdesc}{repeat}{object} +\begin{funcdesc}{repeat}{object\optional{, times}} Make an iterator that returns \var{object} over and over again. + Runs indefinitely unless the \var{times} argument is specified. Used as argument to \function{imap()} for invariant parameters to the called function. Also used with \function{izip()} to create an invariant part of a tuple record. Equivalent to: \begin{verbatim} - def repeat(object): - while True: - yield object + def repeat(object, times=None): + if times is None: + while True: + yield object + else: + for i in xrange(times): + yield object \end{verbatim} \end{funcdesc} @@ -253,20 +281,6 @@ by functions or loops that truncate the stream. \end{verbatim} \end{funcdesc} -\begin{funcdesc}{times}{n, \optional{object}} - Make an iterator that returns \var{object} \var{n} times. - \var{object} defaults to \code{None}. Used for looping a specific - number of times without creating a number object on each pass. - Equivalent to: - - \begin{verbatim} - def times(n, object=None): - if n<0 : raise ValueError - for i in xrange(n): - yield object - \end{verbatim} -\end{funcdesc} - \subsection{Examples \label{itertools-example}} @@ -274,12 +288,6 @@ The following examples show common uses for each tool and demonstrate ways they can be combined. \begin{verbatim} ->>> for i in times(3): -... print "Hello" -... -Hello -Hello -Hello >>> amounts = [120.15, 764.05, 823.14] >>> for checknum, amount in izip(count(1200), amounts): @@ -343,4 +351,8 @@ from building blocks. ... "Returns True if pred(x) is False for every element in the iterable" ... return not nth(ifilter(pred, seq), 0) +>>> def pairwise(seq): +... "s -> (s0,s1), (s1,s2), (s2, s3), ..." +... return izip(seq, islice(seq,1,len(seq))) + \end{verbatim} diff --git a/Lib/test/test_itertools.py b/Lib/test/test_itertools.py index 09b7d1320bd..4c506d0c670 100644 --- a/Lib/test/test_itertools.py +++ b/Lib/test/test_itertools.py @@ -4,11 +4,18 @@ from itertools import * import sys class TestBasicOps(unittest.TestCase): + def test_chain(self): + self.assertEqual(list(chain('abc', 'def')), list('abcdef')) + def test_count(self): self.assertEqual(zip('abc',count()), [('a', 0), ('b', 1), ('c', 2)]) self.assertEqual(zip('abc',count(3)), [('a', 3), ('b', 4), ('c', 5)]) self.assertRaises(TypeError, count, 2, 3) + def test_cycle(self): + self.assertEqual(list(islice(cycle('abc'),10)), list('abcabcabca')) + self.assertEqual(list(cycle('')), []) + def test_ifilter(self): def isEven(x): return x%2==0 @@ -35,13 +42,9 @@ class TestBasicOps(unittest.TestCase): def test_repeat(self): self.assertEqual(zip(xrange(3),repeat('a')), [(0, 'a'), (1, 'a'), (2, 'a')]) + self.assertEqual(list(repeat('a', 3)), ['a', 'a', 'a']) self.assertRaises(TypeError, repeat) - def test_times(self): - self.assertEqual(list(times(3)), [None]*3) - self.assertEqual(list(times(3, True)), [True]*3) - self.assertRaises(ValueError, times, -1) - def test_imap(self): import operator self.assertEqual(list(imap(operator.pow, range(3), range(1,7))), @@ -94,12 +97,6 @@ class TestBasicOps(unittest.TestCase): libreftest = """ Doctest for examples in the library reference, libitertools.tex ->>> for i in times(3): -... print "Hello" -... -Hello -Hello -Hello >>> amounts = [120.15, 764.05, 823.14] >>> for checknum, amount in izip(count(1200), amounts): @@ -154,6 +151,10 @@ Samuele ... "Returns True if pred(x) is False for every element in the iterable" ... return not nth(ifilter(pred, seq), 0) +>>> def pairwise(seq): +... "s -> (s0,s1), (s1,s2), (s2, s3), ..." +... return izip(seq, islice(seq,1,len(seq))) + """ __test__ = {'libreftest' : libreftest} diff --git a/Misc/NEWS b/Misc/NEWS index 461432bebb3..8d5a66959b9 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -17,7 +17,9 @@ TBD Extension modules ----------------- -TBD +- Made user requested changes to the itertools module. + Subsumed the times() function into repeat(). + Added chain() and cycle(). Library ------- diff --git a/Modules/itertoolsmodule.c b/Modules/itertoolsmodule.c index 5766b75bc05..a76d8c2e009 100644 --- a/Modules/itertoolsmodule.c +++ b/Modules/itertoolsmodule.c @@ -7,6 +7,163 @@ All rights reserved. */ +/* cycle object **********************************************************/ + +typedef struct { + PyObject_HEAD + PyObject *it; + PyObject *saved; + int firstpass; +} cycleobject; + +PyTypeObject cycle_type; + +static PyObject * +cycle_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PyObject *it; + PyObject *iterable; + PyObject *saved; + cycleobject *lz; + + if (!PyArg_UnpackTuple(args, "cycle", 1, 1, &iterable)) + return NULL; + + /* Get iterator. */ + it = PyObject_GetIter(iterable); + if (it == NULL) + return NULL; + + saved = PyList_New(0); + if (saved == NULL) { + Py_DECREF(it); + return NULL; + } + + /* create cycleobject structure */ + lz = (cycleobject *)type->tp_alloc(type, 0); + if (lz == NULL) { + Py_DECREF(it); + Py_DECREF(saved); + return NULL; + } + lz->it = it; + lz->saved = saved; + lz->firstpass = 0; + + return (PyObject *)lz; +} + +static void +cycle_dealloc(cycleobject *lz) +{ + PyObject_GC_UnTrack(lz); + Py_XDECREF(lz->saved); + Py_XDECREF(lz->it); + lz->ob_type->tp_free(lz); +} + +static int +cycle_traverse(cycleobject *lz, visitproc visit, void *arg) +{ + int err; + + if (lz->it) { + err = visit(lz->it, arg); + if (err) + return err; + } + if (lz->saved) { + err = visit(lz->saved, arg); + if (err) + return err; + } + return 0; +} + +static PyObject * +cycle_next(cycleobject *lz) +{ + PyObject *item; + PyObject *it; + + while (1) { + item = PyIter_Next(lz->it); + if (item != NULL) { + if (!lz->firstpass) + PyList_Append(lz->saved, item); + return item; + } + if (PyList_Size(lz->saved) == 0) + return NULL; + it = PyObject_GetIter(lz->saved); + if (it == NULL) + return NULL; + Py_DECREF(lz->it); + lz->it = it; + lz->firstpass = 1; + } +} + +static PyObject * +cycle_getiter(PyObject *lz) +{ + Py_INCREF(lz); + return lz; +} + +PyDoc_STRVAR(cycle_doc, +"cycle(iterable) --> cycle object\n\ +\n\ +Return elements from the iterable until it is exhausted.\n\ +Then repeat the sequence indefinitely."); + +PyTypeObject cycle_type = { + PyObject_HEAD_INIT(NULL) + 0, /* ob_size */ + "itertools.cycle", /* tp_name */ + sizeof(cycleobject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)cycle_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | + Py_TPFLAGS_BASETYPE, /* tp_flags */ + cycle_doc, /* tp_doc */ + (traverseproc)cycle_traverse, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + (getiterfunc)cycle_getiter, /* tp_iter */ + (iternextfunc)cycle_next, /* tp_iternext */ + 0, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + PyType_GenericAlloc, /* tp_alloc */ + cycle_new, /* tp_new */ + PyObject_GC_Del, /* tp_free */ +}; + + /* dropwhile object **********************************************************/ typedef struct { @@ -826,93 +983,110 @@ PyTypeObject imap_type = { }; -/* times object ************************************************************/ +/* chain object ************************************************************/ typedef struct { PyObject_HEAD - PyObject *obj; - long cnt; -} timesobject; + long tuplesize; + long iternum; /* which iterator is active */ + PyObject *ittuple; /* tuple of iterators */ +} chainobject; -PyTypeObject times_type; +PyTypeObject chain_type; static PyObject * -times_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +chain_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { - timesobject *lz; - PyObject *obj = Py_None; - long cnt; + chainobject *lz; + int tuplesize = PySequence_Length(args); + int i; + PyObject *ittuple; - if (!PyArg_ParseTuple(args, "l|O:times", &cnt, &obj)) - return NULL; - - if (cnt < 0) { - PyErr_SetString(PyExc_ValueError, - "count for times() cannot be negative."); + /* obtain iterators */ + assert(PyTuple_Check(args)); + ittuple = PyTuple_New(tuplesize); + if(ittuple == NULL) return NULL; + for (i=0; i < tuplesize; ++i) { + PyObject *item = PyTuple_GET_ITEM(args, i); + PyObject *it = PyObject_GetIter(item); + if (it == NULL) { + if (PyErr_ExceptionMatches(PyExc_TypeError)) + PyErr_Format(PyExc_TypeError, + "chain argument #%d must support iteration", + i+1); + Py_DECREF(ittuple); + return NULL; + } + PyTuple_SET_ITEM(ittuple, i, it); } - /* create timesobject structure */ - lz = (timesobject *)type->tp_alloc(type, 0); + /* create chainobject structure */ + lz = (chainobject *)type->tp_alloc(type, 0); if (lz == NULL) return NULL; - lz->cnt = cnt; - Py_INCREF(obj); - lz->obj = obj; + + lz->ittuple = ittuple; + lz->iternum = 0; + lz->tuplesize = tuplesize; return (PyObject *)lz; } static void -times_dealloc(timesobject *lz) +chain_dealloc(chainobject *lz) { PyObject_GC_UnTrack(lz); - Py_XDECREF(lz->obj); + Py_XDECREF(lz->ittuple); lz->ob_type->tp_free(lz); } static int -times_traverse(timesobject *lz, visitproc visit, void *arg) +chain_traverse(chainobject *lz, visitproc visit, void *arg) { - if (lz->obj) - return visit(lz->obj, arg); + if (lz->ittuple) + return visit(lz->ittuple, arg); return 0; } static PyObject * -times_next(timesobject *lz) +chain_next(chainobject *lz) { - PyObject *obj = lz->obj; + PyObject *it; + PyObject *item; - if (lz->cnt > 0) { - lz->cnt--; - Py_INCREF(obj); - return obj; + while (lz->iternum < lz->tuplesize) { + it = PyTuple_GET_ITEM(lz->ittuple, lz->iternum); + item = PyIter_Next(it); + if (item != NULL) + return item; + lz->iternum++; } return NULL; } static PyObject * -times_getiter(PyObject *lz) +chain_getiter(PyObject *lz) { Py_INCREF(lz); return lz; } -PyDoc_STRVAR(times_doc, -"times(n [,obj]) --> times object\n\ +PyDoc_STRVAR(chain_doc, +"chain(*iterables) --> chain object\n\ \n\ -Return a times object whose .next() method returns n consecutive\n\ -instances of obj (default is None)."); +Return a chain object whose .next() method returns elements from the\n\ +first iterable until it is exhausted, then elements from the next\n\ +iterable, until all of the iterables are exhausted."); -PyTypeObject times_type = { +PyTypeObject chain_type = { PyObject_HEAD_INIT(NULL) 0, /* ob_size */ - "itertools.times", /* tp_name */ - sizeof(timesobject), /* tp_basicsize */ + "itertools.chain", /* tp_name */ + sizeof(chainobject), /* tp_basicsize */ 0, /* tp_itemsize */ /* methods */ - (destructor)times_dealloc, /* tp_dealloc */ + (destructor)chain_dealloc, /* tp_dealloc */ 0, /* tp_print */ 0, /* tp_getattr */ 0, /* tp_setattr */ @@ -929,13 +1103,13 @@ PyTypeObject times_type = { 0, /* tp_as_buffer */ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE, /* tp_flags */ - times_doc, /* tp_doc */ - (traverseproc)times_traverse, /* tp_traverse */ + chain_doc, /* tp_doc */ + (traverseproc)chain_traverse, /* tp_traverse */ 0, /* tp_clear */ 0, /* tp_richcompare */ 0, /* tp_weaklistoffset */ - (getiterfunc)times_getiter, /* tp_iter */ - (iternextfunc)times_next, /* tp_iternext */ + (getiterfunc)chain_getiter, /* tp_iter */ + (iternextfunc)chain_next, /* tp_iternext */ 0, /* tp_methods */ 0, /* tp_members */ 0, /* tp_getset */ @@ -946,7 +1120,7 @@ PyTypeObject times_type = { 0, /* tp_dictoffset */ 0, /* tp_init */ PyType_GenericAlloc, /* tp_alloc */ - times_new, /* tp_new */ + chain_new, /* tp_new */ PyObject_GC_Del, /* tp_free */ }; @@ -1544,6 +1718,7 @@ PyTypeObject izip_type = { typedef struct { PyObject_HEAD PyObject *element; + long cnt; } repeatobject; PyTypeObject repeat_type; @@ -1553,8 +1728,9 @@ repeat_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { repeatobject *ro; PyObject *element; + long cnt = -1; - if (!PyArg_UnpackTuple(args, "repeat", 1, 1, &element)) + if (!PyArg_ParseTuple(args, "O|l:repeat", &element, &cnt)) return NULL; ro = (repeatobject *)type->tp_alloc(type, 0); @@ -1562,6 +1738,7 @@ repeat_new(PyTypeObject *type, PyObject *args, PyObject *kwds) return NULL; Py_INCREF(element); ro->element = element; + ro->cnt = cnt; return (PyObject *)ro; } @@ -1584,6 +1761,10 @@ repeat_traverse(repeatobject *ro, visitproc visit, void *arg) static PyObject * repeat_next(repeatobject *ro) { + if (ro->cnt == 0) + return NULL; + if (ro->cnt > 0) + ro->cnt--; Py_INCREF(ro->element); return ro->element; } @@ -1596,7 +1777,9 @@ repeat_getiter(PyObject *ro) } PyDoc_STRVAR(repeat_doc, -"repeat(element) -> create an iterator which returns the element endlessly."); +"repeat(element [,times]) -> create an iterator which returns the element\n\ +for the specified number of times. If not specified, returns the element\n\ +endlessly."); PyTypeObject repeat_type = { PyObject_HEAD_INIT(NULL) @@ -1651,7 +1834,8 @@ PyDoc_STRVAR(module_doc, \n\ Infinite iterators:\n\ count([n]) --> n, n+1, n+2, ...\n\ -repeat(elem) --> elem, elem, elem, ...\n\ +cycle(p) --> p0, p1, ... plast, p0, p1, ...\n\ +repeat(elem [,n]) --> elem, elem, elem, ... endlessly or upto n times\n\ \n\ Iterators terminating on the shortest input sequence:\n\ izip(p, q, ...) --> (p[0], q[0]), (p[1], q[1]), ... \n\ @@ -1661,7 +1845,7 @@ islice(seq, [start,] stop [, step]) --> elements from\n\ seq[start:stop:step]\n\ imap(fun, p, q, ...) --> fun(p0, q0), fun(p1, q1), ...\n\ starmap(fun, seq) --> fun(*seq[0]), fun(*seq[1]), ...\n\ -times(n, [obj]) --> obj, obj, ... for n times. obj defaults to None\n\ +chain(p, q, ...) --> p0, p1, ... plast, q0, q1, ... \n\ takewhile(pred, seq) --> seq[0], seq[1], until pred fails\n\ dropwhile(pred, seq) --> seq[n], seq[n+1], starting when pred fails\n\ "); @@ -1674,12 +1858,13 @@ inititertools(void) PyObject *m; char *name; PyTypeObject *typelist[] = { + &cycle_type, &dropwhile_type, &takewhile_type, &islice_type, &starmap_type, &imap_type, - ×_type, + &chain_type, &ifilter_type, &ifilterfalse_type, &count_type, @@ -1694,8 +1879,7 @@ inititertools(void) if (PyType_Ready(typelist[i]) < 0) return; name = strchr(typelist[i]->tp_name, '.') + 1; - if (name == NULL) - return; + assert (name != NULL); Py_INCREF(typelist[i]); PyModule_AddObject(m, name, (PyObject *)typelist[i]); }