Add itertools.izip_longest().

2007-02-21 05:20:38 +00:00 · 2007-02-21 05:20:38 +00:00 · d36862cf78
parent 15cade0568
commit d36862cf78
4 changed files with 317 additions and 0 deletions
--- a/Doc/lib/libitertools.tex
+++ b/Doc/lib/libitertools.tex
@ -302,6 +302,33 @@ by functions or loops that truncate the stream.
  don't care about trailing, unmatched values from the longer iterables.
 \end{funcdesc}
 \begin{funcdesc}{izip_longest}{*iterables\optional{, fillvalue}}
  Make an iterator that aggregates elements from each of the iterables.
  If the iterables are of uneven length, missing values are filled-in
  with \var{fillvalue}.  Iteration continues until the longest iterable
  is exhausted.  Equivalent to:
  \begin{verbatim}
    def izip_longest(*args, **kwds):
        fillvalue = kwds.get('fillvalue')
        def sentinel(counter = ([fillvalue]*(len(args)-1)).pop):
            yield counter()         # yields the fillvalue, or raises IndexError
        fillers = repeat(fillvalue)
        iters = [chain(it, sentinel(), fillers) for it in args]
        try:
            for tup in izip(*iters):
                yield tup
        except IndexError:
            pass
  \end{verbatim}
  If one of the iterables is potentially infinite, then the
  \function{izip_longest()} function should be wrapped with something
  that limits the number of calls (for example \function{islice()} or
  \function{take()}).
  \versionadded{2.6}
 \end{funcdesc}
 \begin{funcdesc}{repeat}{object\optional{, times}}
  Make an iterator that returns \var{object} over and over again.
  Runs indefinitely unless the \var{times} argument is specified.
--- a/Lib/test/test_itertools.py
+++ b/Lib/test/test_itertools.py
@ -198,6 +198,51 @@ class TestBasicOps(unittest.TestCase):
        ids = map(id, list(izip('abc', 'def')))
        self.assertEqual(len(dict.fromkeys(ids)), len(ids))
    def test_iziplongest(self):
        for args in [
                ['abc', range(6)],
                [range(6), 'abc'],
                [range(1000), range(2000,2100), range(3000,3050)],
                [range(1000), range(0), range(3000,3050), range(1200), range(1500)],
                [range(1000), range(0), range(3000,3050), range(1200), range(1500), range(0)],
            ]:
            target = map(None, *args)
            self.assertEqual(list(izip_longest(*args)), target)
            self.assertEqual(list(izip_longest(*args, **{})), target)
            target = [tuple((e is None and 'X' or e) for e in t) for t in target]   # Replace None fills with 'X'
            self.assertEqual(list(izip_longest(*args, **dict(fillvalue='X'))), target)
        self.assertEqual(take(3,izip_longest('abcdef', count())), zip('abcdef', range(3))) # take 3 from infinite input
        self.assertEqual(list(izip_longest()), zip())
        self.assertEqual(list(izip_longest([])), zip([]))
        self.assertEqual(list(izip_longest('abcdef')), zip('abcdef'))
        self.assertEqual(list(izip_longest('abc', 'defg', **{})), map(None, 'abc', 'defg')) # empty keyword dict
        self.assertRaises(TypeError, izip_longest, 3)
        self.assertRaises(TypeError, izip_longest, range(3), 3)
        for stmt in [
            "izip_longest('abc', fv=1)",
            "izip_longest('abc', fillvalue=1, bogus_keyword=None)",            
        ]:
            try:
                eval(stmt, globals(), locals())
            except TypeError:
                pass
            else:
                self.fail('Did not raise Type in:  ' + stmt)
        # Check tuple re-use (implementation detail)
        self.assertEqual([tuple(list(pair)) for pair in izip_longest('abc', 'def')],
                         zip('abc', 'def'))
        self.assertEqual([pair for pair in izip_longest('abc', 'def')],
                         zip('abc', 'def'))
        ids = map(id, izip_longest('abc', 'def'))
        self.assertEqual(min(ids), max(ids))
        ids = map(id, list(izip_longest('abc', 'def')))
        self.assertEqual(len(dict.fromkeys(ids)), len(ids))
    def test_repeat(self):
        self.assertEqual(zip(xrange(3),repeat('a')),
                         [(0, 'a'), (1, 'a'), (2, 'a')])
@ -611,6 +656,15 @@ class TestVariousIteratorArgs(unittest.TestCase):
            self.assertRaises(TypeError, list, izip(N(s)))
            self.assertRaises(ZeroDivisionError, list, izip(E(s)))
    def test_iziplongest(self):
        for s in ("123", "", range(1000), ('do', 1.2), xrange(2000,2200,5)):
            for g in (G, I, Ig, S, L, R):
                self.assertEqual(list(izip_longest(g(s))), zip(g(s)))
                self.assertEqual(list(izip_longest(g(s), g(s))), zip(g(s), g(s)))
            self.assertRaises(TypeError, izip_longest, X(s))
            self.assertRaises(TypeError, list, izip_longest(N(s)))
            self.assertRaises(ZeroDivisionError, list, izip_longest(E(s)))
    def test_imap(self):
        for s in (range(10), range(0), range(100), (7,11), xrange(20,50,5)):
            for g in (G, I, Ig, S, L, R):
--- a/Misc/NEWS
+++ b/Misc/NEWS
@ -127,6 +127,8 @@ Library
 - Added heapq.merge() for merging sorted input streams.
 - Added itertools.izip_longest().
 - Have the encoding package's search function dynamically import using absolute
  import semantics.
--- a/Modules/itertoolsmodule.c
+++ b/Modules/itertoolsmodule.c
@ -2472,6 +2472,238 @@ static PyTypeObject repeat_type = {
 	PyObject_GC_Del,		/* tp_free */
 };
 /* iziplongest object ************************************************************/
 #include "Python.h"
 typedef struct {
 	PyObject_HEAD
 	Py_ssize_t tuplesize;
 	Py_ssize_t numactive;	
 	PyObject *ittuple;		/* tuple of iterators */
 	PyObject *result;
 	PyObject *fillvalue;
 	PyObject *filler;		/* repeat(fillvalue) */
 } iziplongestobject;
 static PyTypeObject iziplongest_type;
 static PyObject *
 izip_longest_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
 {
 	iziplongestobject *lz;
 	Py_ssize_t i;
 	PyObject *ittuple;  /* tuple of iterators */
 	PyObject *result;
 	PyObject *fillvalue = Py_None;
 	PyObject *filler;
 	Py_ssize_t tuplesize = PySequence_Length(args);
        if (kwds != NULL && PyDict_CheckExact(kwds) && PyDict_Size(kwds) > 0) {
                fillvalue = PyDict_GetItemString(kwds, "fillvalue");
                if (fillvalue == NULL  ||  PyDict_Size(kwds) > 1) {
                        PyErr_SetString(PyExc_TypeError,
 				"izip_longest() got an unexpected keyword argument");
                        return NULL;                      
                }
        }
 	/* args must be a tuple */
 	assert(PyTuple_Check(args));
 	/* obtain iterators */
 	ittuple = PyTuple_New(tuplesize);
 	if (ittuple == NULL)
 		return NULL;
 	for (i=0; i < tuplesize; ++i) {
 		PyObject *item = PyTuple_GET_ITEM(args, i);
 		PyObject *it = PyObject_GetIter(item);
 		if (it == NULL) {
 			if (PyErr_ExceptionMatches(PyExc_TypeError))
 				PyErr_Format(PyExc_TypeError,
 				    "izip_longest argument #%zd must support iteration",
 				    i+1);
 			Py_DECREF(ittuple);
 			return NULL;
 		}
 		PyTuple_SET_ITEM(ittuple, i, it);
 	}
 	filler = PyObject_CallFunctionObjArgs((PyObject *)(&repeat_type), fillvalue, NULL);
 	if (filler == NULL) {
 		Py_DECREF(ittuple);
 		return NULL;
 	}
 	/* create a result holder */
 	result = PyTuple_New(tuplesize);
 	if (result == NULL) {
 		Py_DECREF(ittuple);
 		Py_DECREF(filler);
 		return NULL;
 	}
 	for (i=0 ; i < tuplesize ; i++) {
 		Py_INCREF(Py_None);
 		PyTuple_SET_ITEM(result, i, Py_None);
 	}
 	/* create iziplongestobject structure */
 	lz = (iziplongestobject *)type->tp_alloc(type, 0);
 	if (lz == NULL) {
 		Py_DECREF(ittuple);
 		Py_DECREF(filler);
 		Py_DECREF(result);
 		return NULL;
 	}
 	lz->ittuple = ittuple;
 	lz->tuplesize = tuplesize;
 	lz->numactive = tuplesize;
 	lz->result = result;
 	Py_INCREF(fillvalue);
 	lz->fillvalue = fillvalue;
 	Py_INCREF(filler);
 	lz->filler = filler;			/* XXX */
 	return (PyObject *)lz;
 }
 static void
 izip_longest_dealloc(iziplongestobject *lz)
 {
 	PyObject_GC_UnTrack(lz);
 	Py_XDECREF(lz->ittuple);
 	Py_XDECREF(lz->result);
 	Py_XDECREF(lz->fillvalue);
 	Py_XDECREF(lz->filler);
 	lz->ob_type->tp_free(lz);
 }
 static int
 izip_longest_traverse(iziplongestobject *lz, visitproc visit, void *arg)
 {
 	Py_VISIT(lz->ittuple);
 	Py_VISIT(lz->result);
 	Py_VISIT(lz->fillvalue);
 	Py_VISIT(lz->filler);
 	return 0;
 }
 static PyObject *
 izip_longest_next(iziplongestobject *lz)
 {
 	Py_ssize_t i;
 	Py_ssize_t tuplesize = lz->tuplesize;
 	PyObject *result = lz->result;
 	PyObject *it;
 	PyObject *item;
 	PyObject *olditem;
 	if (tuplesize == 0)
 		return NULL;
 	if (result->ob_refcnt == 1) {
 		Py_INCREF(result);
 		for (i=0 ; i < tuplesize ; i++) {
 			it = PyTuple_GET_ITEM(lz->ittuple, i);
 			assert(PyIter_Check(it));
 			item = (*it->ob_type->tp_iternext)(it);
 			if (item == NULL) {
 				if (lz->numactive <= 1) {
 					Py_DECREF(result);
 					return NULL;
 				} else {
 					Py_INCREF(lz->filler);
 					PyTuple_SET_ITEM(lz->ittuple, i, lz->filler);
 					Py_INCREF(lz->fillvalue);
 					item = lz->fillvalue;
 					Py_DECREF(it);
 					lz->numactive -= 1;
 				}
 			}
 			olditem = PyTuple_GET_ITEM(result, i);
 			PyTuple_SET_ITEM(result, i, item);
 			Py_DECREF(olditem);
 		}
 	} else {
 		result = PyTuple_New(tuplesize);
 		if (result == NULL)
 			return NULL;
 		for (i=0 ; i < tuplesize ; i++) {
 			it = PyTuple_GET_ITEM(lz->ittuple, i);
 			assert(PyIter_Check(it));
 			item = (*it->ob_type->tp_iternext)(it);
 			if (item == NULL) {
 				if (lz->numactive <= 1) {
 					Py_DECREF(result);
 					return NULL;
 				} else {
 					Py_INCREF(lz->filler);
 					PyTuple_SET_ITEM(lz->ittuple, i, lz->filler);
 					Py_INCREF(lz->fillvalue);
 					item = lz->fillvalue;
 					Py_DECREF(it);
 					lz->numactive -= 1;
 				}
 			}
 			PyTuple_SET_ITEM(result, i, item);
 		}
 	}
 	return result;
 }
 PyDoc_STRVAR(izip_longest_doc,
 "izip_longest(iter1 [,iter2 [...]], [fillvalue=None]) --> izip_longest object\n\
 \n\
 Return an izip_longest object whose .next() method returns a tuple where\n\
 the i-th element comes from the i-th iterable argument.  The .next()\n\
 method continues until the longest iterable in the argument sequence\n\
 is exhausted and then it raises StopIteration.  When the shorter iterables\n\
 are exhausted, the fillvalue is substituted in their place.  The fillvalue\n\
 defaults to None or can be specified by a keyword argument.\n\
 ");
 static PyTypeObject iziplongest_type = {
 	PyObject_HEAD_INIT(NULL)
 	0,				/* ob_size */
 	"itertools.izip_longest",	/* tp_name */
 	sizeof(iziplongestobject),	/* tp_basicsize */
 	0,				/* tp_itemsize */
 	/* methods */
 	(destructor)izip_longest_dealloc,	/* tp_dealloc */
 	0,				/* tp_print */
 	0,				/* tp_getattr */
 	0,				/* tp_setattr */
 	0,				/* tp_compare */
 	0,				/* tp_repr */
 	0,				/* tp_as_number */
 	0,				/* tp_as_sequence */
 	0,				/* tp_as_mapping */
 	0,				/* tp_hash */
 	0,				/* tp_call */
 	0,				/* tp_str */
 	PyObject_GenericGetAttr,	/* tp_getattro */
 	0,				/* tp_setattro */
 	0,				/* tp_as_buffer */
 	Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
 		Py_TPFLAGS_BASETYPE,	/* tp_flags */
 	izip_longest_doc,			/* tp_doc */
 	(traverseproc)izip_longest_traverse,    /* tp_traverse */
 	0,				/* tp_clear */
 	0,				/* tp_richcompare */
 	0,				/* tp_weaklistoffset */
 	PyObject_SelfIter,		/* tp_iter */
 	(iternextfunc)izip_longest_next,	/* tp_iternext */
 	0,				/* tp_methods */
 	0,				/* tp_members */
 	0,				/* tp_getset */
 	0,				/* tp_base */
 	0,				/* tp_dict */
 	0,				/* tp_descr_get */
 	0,				/* tp_descr_set */
 	0,				/* tp_dictoffset */
 	0,				/* tp_init */
 	0,				/* tp_alloc */
 	izip_longest_new,			/* tp_new */
 	PyObject_GC_Del,		/* tp_free */
 };
 /* module level code ********************************************************/
@ -2485,6 +2717,7 @@ repeat(elem [,n]) --> elem, elem, elem, ... endlessly or up to n times\n\
 \n\
 Iterators terminating on the shortest input sequence:\n\
 izip(p, q, ...) --> (p[0], q[0]), (p[1], q[1]), ... \n\
 izip_longest(p, q, ...) --> (p[0], q[0]), (p[1], q[1]), ... \n\
 ifilter(pred, seq) --> elements of seq where pred(elem) is True\n\
 ifilterfalse(pred, seq) --> elements of seq where pred(elem) is False\n\
 islice(seq, [start,] stop [, step]) --> elements from\n\
@ -2522,6 +2755,7 @@ inititertools(void)
 		&ifilterfalse_type,
 		&count_type,
 		&izip_type,
 		&iziplongest_type,                
 		&repeat_type,
 		&groupby_type,
 		NULL