Add itertools.izip_longest().

This commit is contained in:
Raymond Hettinger 2007-02-21 05:20:38 +00:00
parent 15cade0568
commit d36862cf78
4 changed files with 317 additions and 0 deletions

View File

@ -302,6 +302,33 @@ by functions or loops that truncate the stream.
don't care about trailing, unmatched values from the longer iterables. don't care about trailing, unmatched values from the longer iterables.
\end{funcdesc} \end{funcdesc}
\begin{funcdesc}{izip_longest}{*iterables\optional{, fillvalue}}
Make an iterator that aggregates elements from each of the iterables.
If the iterables are of uneven length, missing values are filled-in
with \var{fillvalue}. Iteration continues until the longest iterable
is exhausted. Equivalent to:
\begin{verbatim}
def izip_longest(*args, **kwds):
fillvalue = kwds.get('fillvalue')
def sentinel(counter = ([fillvalue]*(len(args)-1)).pop):
yield counter() # yields the fillvalue, or raises IndexError
fillers = repeat(fillvalue)
iters = [chain(it, sentinel(), fillers) for it in args]
try:
for tup in izip(*iters):
yield tup
except IndexError:
pass
\end{verbatim}
If one of the iterables is potentially infinite, then the
\function{izip_longest()} function should be wrapped with something
that limits the number of calls (for example \function{islice()} or
\function{take()}).
\versionadded{2.6}
\end{funcdesc}
\begin{funcdesc}{repeat}{object\optional{, times}} \begin{funcdesc}{repeat}{object\optional{, times}}
Make an iterator that returns \var{object} over and over again. Make an iterator that returns \var{object} over and over again.
Runs indefinitely unless the \var{times} argument is specified. Runs indefinitely unless the \var{times} argument is specified.

View File

@ -198,6 +198,51 @@ class TestBasicOps(unittest.TestCase):
ids = map(id, list(izip('abc', 'def'))) ids = map(id, list(izip('abc', 'def')))
self.assertEqual(len(dict.fromkeys(ids)), len(ids)) self.assertEqual(len(dict.fromkeys(ids)), len(ids))
def test_iziplongest(self):
for args in [
['abc', range(6)],
[range(6), 'abc'],
[range(1000), range(2000,2100), range(3000,3050)],
[range(1000), range(0), range(3000,3050), range(1200), range(1500)],
[range(1000), range(0), range(3000,3050), range(1200), range(1500), range(0)],
]:
target = map(None, *args)
self.assertEqual(list(izip_longest(*args)), target)
self.assertEqual(list(izip_longest(*args, **{})), target)
target = [tuple((e is None and 'X' or e) for e in t) for t in target] # Replace None fills with 'X'
self.assertEqual(list(izip_longest(*args, **dict(fillvalue='X'))), target)
self.assertEqual(take(3,izip_longest('abcdef', count())), zip('abcdef', range(3))) # take 3 from infinite input
self.assertEqual(list(izip_longest()), zip())
self.assertEqual(list(izip_longest([])), zip([]))
self.assertEqual(list(izip_longest('abcdef')), zip('abcdef'))
self.assertEqual(list(izip_longest('abc', 'defg', **{})), map(None, 'abc', 'defg')) # empty keyword dict
self.assertRaises(TypeError, izip_longest, 3)
self.assertRaises(TypeError, izip_longest, range(3), 3)
for stmt in [
"izip_longest('abc', fv=1)",
"izip_longest('abc', fillvalue=1, bogus_keyword=None)",
]:
try:
eval(stmt, globals(), locals())
except TypeError:
pass
else:
self.fail('Did not raise Type in: ' + stmt)
# Check tuple re-use (implementation detail)
self.assertEqual([tuple(list(pair)) for pair in izip_longest('abc', 'def')],
zip('abc', 'def'))
self.assertEqual([pair for pair in izip_longest('abc', 'def')],
zip('abc', 'def'))
ids = map(id, izip_longest('abc', 'def'))
self.assertEqual(min(ids), max(ids))
ids = map(id, list(izip_longest('abc', 'def')))
self.assertEqual(len(dict.fromkeys(ids)), len(ids))
def test_repeat(self): def test_repeat(self):
self.assertEqual(zip(xrange(3),repeat('a')), self.assertEqual(zip(xrange(3),repeat('a')),
[(0, 'a'), (1, 'a'), (2, 'a')]) [(0, 'a'), (1, 'a'), (2, 'a')])
@ -611,6 +656,15 @@ class TestVariousIteratorArgs(unittest.TestCase):
self.assertRaises(TypeError, list, izip(N(s))) self.assertRaises(TypeError, list, izip(N(s)))
self.assertRaises(ZeroDivisionError, list, izip(E(s))) self.assertRaises(ZeroDivisionError, list, izip(E(s)))
def test_iziplongest(self):
for s in ("123", "", range(1000), ('do', 1.2), xrange(2000,2200,5)):
for g in (G, I, Ig, S, L, R):
self.assertEqual(list(izip_longest(g(s))), zip(g(s)))
self.assertEqual(list(izip_longest(g(s), g(s))), zip(g(s), g(s)))
self.assertRaises(TypeError, izip_longest, X(s))
self.assertRaises(TypeError, list, izip_longest(N(s)))
self.assertRaises(ZeroDivisionError, list, izip_longest(E(s)))
def test_imap(self): def test_imap(self):
for s in (range(10), range(0), range(100), (7,11), xrange(20,50,5)): for s in (range(10), range(0), range(100), (7,11), xrange(20,50,5)):
for g in (G, I, Ig, S, L, R): for g in (G, I, Ig, S, L, R):

View File

@ -127,6 +127,8 @@ Library
- Added heapq.merge() for merging sorted input streams. - Added heapq.merge() for merging sorted input streams.
- Added itertools.izip_longest().
- Have the encoding package's search function dynamically import using absolute - Have the encoding package's search function dynamically import using absolute
import semantics. import semantics.

View File

@ -2472,6 +2472,238 @@ static PyTypeObject repeat_type = {
PyObject_GC_Del, /* tp_free */ PyObject_GC_Del, /* tp_free */
}; };
/* iziplongest object ************************************************************/
#include "Python.h"
typedef struct {
PyObject_HEAD
Py_ssize_t tuplesize;
Py_ssize_t numactive;
PyObject *ittuple; /* tuple of iterators */
PyObject *result;
PyObject *fillvalue;
PyObject *filler; /* repeat(fillvalue) */
} iziplongestobject;
static PyTypeObject iziplongest_type;
static PyObject *
izip_longest_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
iziplongestobject *lz;
Py_ssize_t i;
PyObject *ittuple; /* tuple of iterators */
PyObject *result;
PyObject *fillvalue = Py_None;
PyObject *filler;
Py_ssize_t tuplesize = PySequence_Length(args);
if (kwds != NULL && PyDict_CheckExact(kwds) && PyDict_Size(kwds) > 0) {
fillvalue = PyDict_GetItemString(kwds, "fillvalue");
if (fillvalue == NULL || PyDict_Size(kwds) > 1) {
PyErr_SetString(PyExc_TypeError,
"izip_longest() got an unexpected keyword argument");
return NULL;
}
}
/* args must be a tuple */
assert(PyTuple_Check(args));
/* obtain iterators */
ittuple = PyTuple_New(tuplesize);
if (ittuple == NULL)
return NULL;
for (i=0; i < tuplesize; ++i) {
PyObject *item = PyTuple_GET_ITEM(args, i);
PyObject *it = PyObject_GetIter(item);
if (it == NULL) {
if (PyErr_ExceptionMatches(PyExc_TypeError))
PyErr_Format(PyExc_TypeError,
"izip_longest argument #%zd must support iteration",
i+1);
Py_DECREF(ittuple);
return NULL;
}
PyTuple_SET_ITEM(ittuple, i, it);
}
filler = PyObject_CallFunctionObjArgs((PyObject *)(&repeat_type), fillvalue, NULL);
if (filler == NULL) {
Py_DECREF(ittuple);
return NULL;
}
/* create a result holder */
result = PyTuple_New(tuplesize);
if (result == NULL) {
Py_DECREF(ittuple);
Py_DECREF(filler);
return NULL;
}
for (i=0 ; i < tuplesize ; i++) {
Py_INCREF(Py_None);
PyTuple_SET_ITEM(result, i, Py_None);
}
/* create iziplongestobject structure */
lz = (iziplongestobject *)type->tp_alloc(type, 0);
if (lz == NULL) {
Py_DECREF(ittuple);
Py_DECREF(filler);
Py_DECREF(result);
return NULL;
}
lz->ittuple = ittuple;
lz->tuplesize = tuplesize;
lz->numactive = tuplesize;
lz->result = result;
Py_INCREF(fillvalue);
lz->fillvalue = fillvalue;
Py_INCREF(filler);
lz->filler = filler; /* XXX */
return (PyObject *)lz;
}
static void
izip_longest_dealloc(iziplongestobject *lz)
{
PyObject_GC_UnTrack(lz);
Py_XDECREF(lz->ittuple);
Py_XDECREF(lz->result);
Py_XDECREF(lz->fillvalue);
Py_XDECREF(lz->filler);
lz->ob_type->tp_free(lz);
}
static int
izip_longest_traverse(iziplongestobject *lz, visitproc visit, void *arg)
{
Py_VISIT(lz->ittuple);
Py_VISIT(lz->result);
Py_VISIT(lz->fillvalue);
Py_VISIT(lz->filler);
return 0;
}
static PyObject *
izip_longest_next(iziplongestobject *lz)
{
Py_ssize_t i;
Py_ssize_t tuplesize = lz->tuplesize;
PyObject *result = lz->result;
PyObject *it;
PyObject *item;
PyObject *olditem;
if (tuplesize == 0)
return NULL;
if (result->ob_refcnt == 1) {
Py_INCREF(result);
for (i=0 ; i < tuplesize ; i++) {
it = PyTuple_GET_ITEM(lz->ittuple, i);
assert(PyIter_Check(it));
item = (*it->ob_type->tp_iternext)(it);
if (item == NULL) {
if (lz->numactive <= 1) {
Py_DECREF(result);
return NULL;
} else {
Py_INCREF(lz->filler);
PyTuple_SET_ITEM(lz->ittuple, i, lz->filler);
Py_INCREF(lz->fillvalue);
item = lz->fillvalue;
Py_DECREF(it);
lz->numactive -= 1;
}
}
olditem = PyTuple_GET_ITEM(result, i);
PyTuple_SET_ITEM(result, i, item);
Py_DECREF(olditem);
}
} else {
result = PyTuple_New(tuplesize);
if (result == NULL)
return NULL;
for (i=0 ; i < tuplesize ; i++) {
it = PyTuple_GET_ITEM(lz->ittuple, i);
assert(PyIter_Check(it));
item = (*it->ob_type->tp_iternext)(it);
if (item == NULL) {
if (lz->numactive <= 1) {
Py_DECREF(result);
return NULL;
} else {
Py_INCREF(lz->filler);
PyTuple_SET_ITEM(lz->ittuple, i, lz->filler);
Py_INCREF(lz->fillvalue);
item = lz->fillvalue;
Py_DECREF(it);
lz->numactive -= 1;
}
}
PyTuple_SET_ITEM(result, i, item);
}
}
return result;
}
PyDoc_STRVAR(izip_longest_doc,
"izip_longest(iter1 [,iter2 [...]], [fillvalue=None]) --> izip_longest object\n\
\n\
Return an izip_longest object whose .next() method returns a tuple where\n\
the i-th element comes from the i-th iterable argument. The .next()\n\
method continues until the longest iterable in the argument sequence\n\
is exhausted and then it raises StopIteration. When the shorter iterables\n\
are exhausted, the fillvalue is substituted in their place. The fillvalue\n\
defaults to None or can be specified by a keyword argument.\n\
");
static PyTypeObject iziplongest_type = {
PyObject_HEAD_INIT(NULL)
0, /* ob_size */
"itertools.izip_longest", /* tp_name */
sizeof(iziplongestobject), /* tp_basicsize */
0, /* tp_itemsize */
/* methods */
(destructor)izip_longest_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_compare */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
PyObject_GenericGetAttr, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
Py_TPFLAGS_BASETYPE, /* tp_flags */
izip_longest_doc, /* tp_doc */
(traverseproc)izip_longest_traverse, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
PyObject_SelfIter, /* tp_iter */
(iternextfunc)izip_longest_next, /* tp_iternext */
0, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
0, /* tp_init */
0, /* tp_alloc */
izip_longest_new, /* tp_new */
PyObject_GC_Del, /* tp_free */
};
/* module level code ********************************************************/ /* module level code ********************************************************/
@ -2485,6 +2717,7 @@ repeat(elem [,n]) --> elem, elem, elem, ... endlessly or up to n times\n\
\n\ \n\
Iterators terminating on the shortest input sequence:\n\ Iterators terminating on the shortest input sequence:\n\
izip(p, q, ...) --> (p[0], q[0]), (p[1], q[1]), ... \n\ izip(p, q, ...) --> (p[0], q[0]), (p[1], q[1]), ... \n\
izip_longest(p, q, ...) --> (p[0], q[0]), (p[1], q[1]), ... \n\
ifilter(pred, seq) --> elements of seq where pred(elem) is True\n\ ifilter(pred, seq) --> elements of seq where pred(elem) is True\n\
ifilterfalse(pred, seq) --> elements of seq where pred(elem) is False\n\ ifilterfalse(pred, seq) --> elements of seq where pred(elem) is False\n\
islice(seq, [start,] stop [, step]) --> elements from\n\ islice(seq, [start,] stop [, step]) --> elements from\n\
@ -2522,6 +2755,7 @@ inititertools(void)
&ifilterfalse_type, &ifilterfalse_type,
&count_type, &count_type,
&izip_type, &izip_type,
&iziplongest_type,
&repeat_type, &repeat_type,
&groupby_type, &groupby_type,
NULL NULL