From d36862cf78a280a5260f90a558cdac3d5f5897c1 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Wed, 21 Feb 2007 05:20:38 +0000 Subject: [PATCH] Add itertools.izip_longest(). --- Doc/lib/libitertools.tex | 27 +++++ Lib/test/test_itertools.py | 54 +++++++++ Misc/NEWS | 2 + Modules/itertoolsmodule.c | 234 +++++++++++++++++++++++++++++++++++++ 4 files changed, 317 insertions(+) diff --git a/Doc/lib/libitertools.tex b/Doc/lib/libitertools.tex index 59fbd98844e..ac6028b31a7 100644 --- a/Doc/lib/libitertools.tex +++ b/Doc/lib/libitertools.tex @@ -302,6 +302,33 @@ by functions or loops that truncate the stream. don't care about trailing, unmatched values from the longer iterables. \end{funcdesc} +\begin{funcdesc}{izip_longest}{*iterables\optional{, fillvalue}} + Make an iterator that aggregates elements from each of the iterables. + If the iterables are of uneven length, missing values are filled-in + with \var{fillvalue}. Iteration continues until the longest iterable + is exhausted. Equivalent to: + + \begin{verbatim} + def izip_longest(*args, **kwds): + fillvalue = kwds.get('fillvalue') + def sentinel(counter = ([fillvalue]*(len(args)-1)).pop): + yield counter() # yields the fillvalue, or raises IndexError + fillers = repeat(fillvalue) + iters = [chain(it, sentinel(), fillers) for it in args] + try: + for tup in izip(*iters): + yield tup + except IndexError: + pass + \end{verbatim} + + If one of the iterables is potentially infinite, then the + \function{izip_longest()} function should be wrapped with something + that limits the number of calls (for example \function{islice()} or + \function{take()}). + \versionadded{2.6} +\end{funcdesc} + \begin{funcdesc}{repeat}{object\optional{, times}} Make an iterator that returns \var{object} over and over again. Runs indefinitely unless the \var{times} argument is specified. diff --git a/Lib/test/test_itertools.py b/Lib/test/test_itertools.py index c965d4c14ef..93fdab7998f 100644 --- a/Lib/test/test_itertools.py +++ b/Lib/test/test_itertools.py @@ -198,6 +198,51 @@ class TestBasicOps(unittest.TestCase): ids = map(id, list(izip('abc', 'def'))) self.assertEqual(len(dict.fromkeys(ids)), len(ids)) + def test_iziplongest(self): + for args in [ + ['abc', range(6)], + [range(6), 'abc'], + [range(1000), range(2000,2100), range(3000,3050)], + [range(1000), range(0), range(3000,3050), range(1200), range(1500)], + [range(1000), range(0), range(3000,3050), range(1200), range(1500), range(0)], + ]: + target = map(None, *args) + self.assertEqual(list(izip_longest(*args)), target) + self.assertEqual(list(izip_longest(*args, **{})), target) + target = [tuple((e is None and 'X' or e) for e in t) for t in target] # Replace None fills with 'X' + self.assertEqual(list(izip_longest(*args, **dict(fillvalue='X'))), target) + + self.assertEqual(take(3,izip_longest('abcdef', count())), zip('abcdef', range(3))) # take 3 from infinite input + + self.assertEqual(list(izip_longest()), zip()) + self.assertEqual(list(izip_longest([])), zip([])) + self.assertEqual(list(izip_longest('abcdef')), zip('abcdef')) + + self.assertEqual(list(izip_longest('abc', 'defg', **{})), map(None, 'abc', 'defg')) # empty keyword dict + self.assertRaises(TypeError, izip_longest, 3) + self.assertRaises(TypeError, izip_longest, range(3), 3) + + for stmt in [ + "izip_longest('abc', fv=1)", + "izip_longest('abc', fillvalue=1, bogus_keyword=None)", + ]: + try: + eval(stmt, globals(), locals()) + except TypeError: + pass + else: + self.fail('Did not raise Type in: ' + stmt) + + # Check tuple re-use (implementation detail) + self.assertEqual([tuple(list(pair)) for pair in izip_longest('abc', 'def')], + zip('abc', 'def')) + self.assertEqual([pair for pair in izip_longest('abc', 'def')], + zip('abc', 'def')) + ids = map(id, izip_longest('abc', 'def')) + self.assertEqual(min(ids), max(ids)) + ids = map(id, list(izip_longest('abc', 'def'))) + self.assertEqual(len(dict.fromkeys(ids)), len(ids)) + def test_repeat(self): self.assertEqual(zip(xrange(3),repeat('a')), [(0, 'a'), (1, 'a'), (2, 'a')]) @@ -611,6 +656,15 @@ class TestVariousIteratorArgs(unittest.TestCase): self.assertRaises(TypeError, list, izip(N(s))) self.assertRaises(ZeroDivisionError, list, izip(E(s))) + def test_iziplongest(self): + for s in ("123", "", range(1000), ('do', 1.2), xrange(2000,2200,5)): + for g in (G, I, Ig, S, L, R): + self.assertEqual(list(izip_longest(g(s))), zip(g(s))) + self.assertEqual(list(izip_longest(g(s), g(s))), zip(g(s), g(s))) + self.assertRaises(TypeError, izip_longest, X(s)) + self.assertRaises(TypeError, list, izip_longest(N(s))) + self.assertRaises(ZeroDivisionError, list, izip_longest(E(s))) + def test_imap(self): for s in (range(10), range(0), range(100), (7,11), xrange(20,50,5)): for g in (G, I, Ig, S, L, R): diff --git a/Misc/NEWS b/Misc/NEWS index 72d58320a71..d7eab9cbc0f 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -127,6 +127,8 @@ Library - Added heapq.merge() for merging sorted input streams. +- Added itertools.izip_longest(). + - Have the encoding package's search function dynamically import using absolute import semantics. diff --git a/Modules/itertoolsmodule.c b/Modules/itertoolsmodule.c index 70f787f784b..1c91a1959df 100644 --- a/Modules/itertoolsmodule.c +++ b/Modules/itertoolsmodule.c @@ -2472,6 +2472,238 @@ static PyTypeObject repeat_type = { PyObject_GC_Del, /* tp_free */ }; +/* iziplongest object ************************************************************/ + +#include "Python.h" + +typedef struct { + PyObject_HEAD + Py_ssize_t tuplesize; + Py_ssize_t numactive; + PyObject *ittuple; /* tuple of iterators */ + PyObject *result; + PyObject *fillvalue; + PyObject *filler; /* repeat(fillvalue) */ +} iziplongestobject; + +static PyTypeObject iziplongest_type; + +static PyObject * +izip_longest_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + iziplongestobject *lz; + Py_ssize_t i; + PyObject *ittuple; /* tuple of iterators */ + PyObject *result; + PyObject *fillvalue = Py_None; + PyObject *filler; + Py_ssize_t tuplesize = PySequence_Length(args); + + if (kwds != NULL && PyDict_CheckExact(kwds) && PyDict_Size(kwds) > 0) { + fillvalue = PyDict_GetItemString(kwds, "fillvalue"); + if (fillvalue == NULL || PyDict_Size(kwds) > 1) { + PyErr_SetString(PyExc_TypeError, + "izip_longest() got an unexpected keyword argument"); + return NULL; + } + } + + /* args must be a tuple */ + assert(PyTuple_Check(args)); + + /* obtain iterators */ + ittuple = PyTuple_New(tuplesize); + if (ittuple == NULL) + return NULL; + for (i=0; i < tuplesize; ++i) { + PyObject *item = PyTuple_GET_ITEM(args, i); + PyObject *it = PyObject_GetIter(item); + if (it == NULL) { + if (PyErr_ExceptionMatches(PyExc_TypeError)) + PyErr_Format(PyExc_TypeError, + "izip_longest argument #%zd must support iteration", + i+1); + Py_DECREF(ittuple); + return NULL; + } + PyTuple_SET_ITEM(ittuple, i, it); + } + + filler = PyObject_CallFunctionObjArgs((PyObject *)(&repeat_type), fillvalue, NULL); + if (filler == NULL) { + Py_DECREF(ittuple); + return NULL; + } + + /* create a result holder */ + result = PyTuple_New(tuplesize); + if (result == NULL) { + Py_DECREF(ittuple); + Py_DECREF(filler); + return NULL; + } + for (i=0 ; i < tuplesize ; i++) { + Py_INCREF(Py_None); + PyTuple_SET_ITEM(result, i, Py_None); + } + + /* create iziplongestobject structure */ + lz = (iziplongestobject *)type->tp_alloc(type, 0); + if (lz == NULL) { + Py_DECREF(ittuple); + Py_DECREF(filler); + Py_DECREF(result); + return NULL; + } + lz->ittuple = ittuple; + lz->tuplesize = tuplesize; + lz->numactive = tuplesize; + lz->result = result; + Py_INCREF(fillvalue); + lz->fillvalue = fillvalue; + Py_INCREF(filler); + lz->filler = filler; /* XXX */ + return (PyObject *)lz; +} + +static void +izip_longest_dealloc(iziplongestobject *lz) +{ + PyObject_GC_UnTrack(lz); + Py_XDECREF(lz->ittuple); + Py_XDECREF(lz->result); + Py_XDECREF(lz->fillvalue); + Py_XDECREF(lz->filler); + lz->ob_type->tp_free(lz); +} + +static int +izip_longest_traverse(iziplongestobject *lz, visitproc visit, void *arg) +{ + Py_VISIT(lz->ittuple); + Py_VISIT(lz->result); + Py_VISIT(lz->fillvalue); + Py_VISIT(lz->filler); + return 0; +} + +static PyObject * +izip_longest_next(iziplongestobject *lz) +{ + Py_ssize_t i; + Py_ssize_t tuplesize = lz->tuplesize; + PyObject *result = lz->result; + PyObject *it; + PyObject *item; + PyObject *olditem; + + if (tuplesize == 0) + return NULL; + if (result->ob_refcnt == 1) { + Py_INCREF(result); + for (i=0 ; i < tuplesize ; i++) { + it = PyTuple_GET_ITEM(lz->ittuple, i); + assert(PyIter_Check(it)); + item = (*it->ob_type->tp_iternext)(it); + if (item == NULL) { + if (lz->numactive <= 1) { + Py_DECREF(result); + return NULL; + } else { + Py_INCREF(lz->filler); + PyTuple_SET_ITEM(lz->ittuple, i, lz->filler); + Py_INCREF(lz->fillvalue); + item = lz->fillvalue; + Py_DECREF(it); + lz->numactive -= 1; + } + } + olditem = PyTuple_GET_ITEM(result, i); + PyTuple_SET_ITEM(result, i, item); + Py_DECREF(olditem); + } + } else { + result = PyTuple_New(tuplesize); + if (result == NULL) + return NULL; + for (i=0 ; i < tuplesize ; i++) { + it = PyTuple_GET_ITEM(lz->ittuple, i); + assert(PyIter_Check(it)); + item = (*it->ob_type->tp_iternext)(it); + if (item == NULL) { + if (lz->numactive <= 1) { + Py_DECREF(result); + return NULL; + } else { + Py_INCREF(lz->filler); + PyTuple_SET_ITEM(lz->ittuple, i, lz->filler); + Py_INCREF(lz->fillvalue); + item = lz->fillvalue; + Py_DECREF(it); + lz->numactive -= 1; + } + } + PyTuple_SET_ITEM(result, i, item); + } + } + return result; +} + +PyDoc_STRVAR(izip_longest_doc, +"izip_longest(iter1 [,iter2 [...]], [fillvalue=None]) --> izip_longest object\n\ +\n\ +Return an izip_longest object whose .next() method returns a tuple where\n\ +the i-th element comes from the i-th iterable argument. The .next()\n\ +method continues until the longest iterable in the argument sequence\n\ +is exhausted and then it raises StopIteration. When the shorter iterables\n\ +are exhausted, the fillvalue is substituted in their place. The fillvalue\n\ +defaults to None or can be specified by a keyword argument.\n\ +"); + +static PyTypeObject iziplongest_type = { + PyObject_HEAD_INIT(NULL) + 0, /* ob_size */ + "itertools.izip_longest", /* tp_name */ + sizeof(iziplongestobject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)izip_longest_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | + Py_TPFLAGS_BASETYPE, /* tp_flags */ + izip_longest_doc, /* tp_doc */ + (traverseproc)izip_longest_traverse, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + PyObject_SelfIter, /* tp_iter */ + (iternextfunc)izip_longest_next, /* tp_iternext */ + 0, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + izip_longest_new, /* tp_new */ + PyObject_GC_Del, /* tp_free */ +}; /* module level code ********************************************************/ @@ -2485,6 +2717,7 @@ repeat(elem [,n]) --> elem, elem, elem, ... endlessly or up to n times\n\ \n\ Iterators terminating on the shortest input sequence:\n\ izip(p, q, ...) --> (p[0], q[0]), (p[1], q[1]), ... \n\ +izip_longest(p, q, ...) --> (p[0], q[0]), (p[1], q[1]), ... \n\ ifilter(pred, seq) --> elements of seq where pred(elem) is True\n\ ifilterfalse(pred, seq) --> elements of seq where pred(elem) is False\n\ islice(seq, [start,] stop [, step]) --> elements from\n\ @@ -2522,6 +2755,7 @@ inititertools(void) &ifilterfalse_type, &count_type, &izip_type, + &iziplongest_type, &repeat_type, &groupby_type, NULL