2007-08-15 11:28:01 -03:00
|
|
|
|
|
|
|
:mod:`itertools` --- Functions creating iterators for efficient looping
|
|
|
|
=======================================================================
|
|
|
|
|
|
|
|
.. module:: itertools
|
|
|
|
:synopsis: Functions creating iterators for efficient looping.
|
|
|
|
.. moduleauthor:: Raymond Hettinger <python@rcn.com>
|
|
|
|
.. sectionauthor:: Raymond Hettinger <python@rcn.com>
|
|
|
|
|
|
|
|
|
2008-03-22 19:04:10 -03:00
|
|
|
.. testsetup::
|
|
|
|
|
|
|
|
from itertools import *
|
|
|
|
|
2007-08-15 11:28:01 -03:00
|
|
|
.. versionadded:: 2.3
|
|
|
|
|
2007-10-21 09:10:28 -03:00
|
|
|
This module implements a number of :term:`iterator` building blocks inspired by
|
2007-08-15 11:28:01 -03:00
|
|
|
constructs from the Haskell and SML programming languages. Each has been recast
|
|
|
|
in a form suitable for Python.
|
|
|
|
|
|
|
|
The module standardizes a core set of fast, memory efficient tools that are
|
|
|
|
useful by themselves or in combination. Standardization helps avoid the
|
|
|
|
readability and reliability problems which arise when many different individuals
|
|
|
|
create their own slightly varying implementations, each with their own quirks
|
|
|
|
and naming conventions.
|
|
|
|
|
|
|
|
The tools are designed to combine readily with one another. This makes it easy
|
|
|
|
to construct more specialized tools succinctly and efficiently in pure Python.
|
|
|
|
|
|
|
|
For instance, SML provides a tabulation tool: ``tabulate(f)`` which produces a
|
|
|
|
sequence ``f(0), f(1), ...``. This toolbox provides :func:`imap` and
|
|
|
|
:func:`count` which can be combined to form ``imap(f, count())`` and produce an
|
|
|
|
equivalent result.
|
|
|
|
|
|
|
|
Likewise, the functional tools are designed to work well with the high-speed
|
|
|
|
functions provided by the :mod:`operator` module.
|
|
|
|
|
|
|
|
Whether cast in pure python form or compiled code, tools that use iterators are
|
2008-07-19 20:58:47 -03:00
|
|
|
more memory efficient (and often faster) than their list based counterparts. Adopting
|
2007-08-15 11:28:01 -03:00
|
|
|
the principles of just-in-time manufacturing, they create data when and where
|
|
|
|
needed instead of consuming memory with the computer equivalent of "inventory".
|
|
|
|
|
|
|
|
|
|
|
|
.. seealso::
|
|
|
|
|
|
|
|
The Standard ML Basis Library, `The Standard ML Basis Library
|
|
|
|
<http://www.standardml.org/Basis/>`_.
|
|
|
|
|
|
|
|
Haskell, A Purely Functional Language, `Definition of Haskell and the Standard
|
|
|
|
Libraries <http://www.haskell.org/definition/>`_.
|
|
|
|
|
|
|
|
|
|
|
|
.. _itertools-functions:
|
|
|
|
|
|
|
|
Itertool functions
|
|
|
|
------------------
|
|
|
|
|
|
|
|
The following module functions all construct and return iterators. Some provide
|
|
|
|
streams of infinite length, so they should only be accessed by functions or
|
|
|
|
loops that truncate the stream.
|
|
|
|
|
|
|
|
|
|
|
|
.. function:: chain(*iterables)
|
|
|
|
|
|
|
|
Make an iterator that returns elements from the first iterable until it is
|
|
|
|
exhausted, then proceeds to the next iterable, until all of the iterables are
|
|
|
|
exhausted. Used for treating consecutive sequences as a single sequence.
|
|
|
|
Equivalent to::
|
|
|
|
|
|
|
|
def chain(*iterables):
|
2008-03-05 21:15:52 -04:00
|
|
|
# chain('ABC', 'DEF') --> A B C D E F
|
2007-08-15 11:28:01 -03:00
|
|
|
for it in iterables:
|
|
|
|
for element in it:
|
|
|
|
yield element
|
|
|
|
|
|
|
|
|
2008-02-28 15:41:24 -04:00
|
|
|
.. function:: itertools.chain.from_iterable(iterable)
|
|
|
|
|
2009-01-03 16:55:06 -04:00
|
|
|
Alternate constructor for :func:`chain`. Gets chained inputs from a
|
2008-02-28 15:41:24 -04:00
|
|
|
single iterable argument that is evaluated lazily. Equivalent to::
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def from_iterable(iterables):
|
2008-03-05 21:15:52 -04:00
|
|
|
# chain.from_iterable(['ABC', 'DEF']) --> A B C D E F
|
2008-02-28 15:41:24 -04:00
|
|
|
for it in iterables:
|
|
|
|
for element in it:
|
|
|
|
yield element
|
|
|
|
|
|
|
|
.. versionadded:: 2.6
|
|
|
|
|
2008-03-04 00:17:08 -04:00
|
|
|
|
2008-02-25 22:46:54 -04:00
|
|
|
.. function:: combinations(iterable, r)
|
|
|
|
|
2008-04-17 07:48:31 -03:00
|
|
|
Return *r* length subsequences of elements from the input *iterable*.
|
2008-02-25 22:46:54 -04:00
|
|
|
|
2009-01-03 16:55:06 -04:00
|
|
|
Combinations are emitted in lexicographic sort order. So, if the
|
2008-02-25 22:46:54 -04:00
|
|
|
input *iterable* is sorted, the combination tuples will be produced
|
2009-01-03 16:55:06 -04:00
|
|
|
in sorted order.
|
2008-02-25 22:46:54 -04:00
|
|
|
|
|
|
|
Elements are treated as unique based on their position, not on their
|
|
|
|
value. So if the input elements are unique, there will be no repeat
|
2008-02-28 15:41:24 -04:00
|
|
|
values in each combination.
|
2008-02-25 22:46:54 -04:00
|
|
|
|
|
|
|
Equivalent to::
|
|
|
|
|
|
|
|
def combinations(iterable, r):
|
2008-03-05 21:15:52 -04:00
|
|
|
# combinations('ABCD', 2) --> AB AC AD BC BD CD
|
|
|
|
# combinations(range(4), 3) --> 012 013 023 123
|
2008-02-25 22:46:54 -04:00
|
|
|
pool = tuple(iterable)
|
2008-02-26 19:40:50 -04:00
|
|
|
n = len(pool)
|
2009-01-08 02:39:04 -04:00
|
|
|
if r > n:
|
|
|
|
return
|
2008-03-02 06:59:31 -04:00
|
|
|
indices = range(r)
|
|
|
|
yield tuple(pool[i] for i in indices)
|
2008-02-26 19:40:50 -04:00
|
|
|
while 1:
|
|
|
|
for i in reversed(range(r)):
|
2008-03-02 06:59:31 -04:00
|
|
|
if indices[i] != i + n - r:
|
2008-02-26 21:44:34 -04:00
|
|
|
break
|
2008-02-26 19:40:50 -04:00
|
|
|
else:
|
|
|
|
return
|
2008-03-02 06:59:31 -04:00
|
|
|
indices[i] += 1
|
2008-02-26 21:44:34 -04:00
|
|
|
for j in range(i+1, r):
|
2008-03-02 06:59:31 -04:00
|
|
|
indices[j] = indices[j-1] + 1
|
|
|
|
yield tuple(pool[i] for i in indices)
|
2008-02-25 22:46:54 -04:00
|
|
|
|
2008-03-04 00:17:08 -04:00
|
|
|
The code for :func:`combinations` can be also expressed as a subsequence
|
|
|
|
of :func:`permutations` after filtering entries where the elements are not
|
|
|
|
in sorted order (according to their position in the input pool)::
|
|
|
|
|
|
|
|
def combinations(iterable, r):
|
|
|
|
pool = tuple(iterable)
|
|
|
|
n = len(pool)
|
|
|
|
for indices in permutations(range(n), r):
|
|
|
|
if sorted(indices) == list(indices):
|
|
|
|
yield tuple(pool[i] for i in indices)
|
|
|
|
|
2009-01-08 02:39:04 -04:00
|
|
|
The number of items returned is ``n! / r! / (n-r)!`` when ``0 <= r <= n``
|
|
|
|
or zero when ``r > n``.
|
|
|
|
|
2008-02-25 22:46:54 -04:00
|
|
|
.. versionadded:: 2.6
|
|
|
|
|
2009-01-26 22:58:49 -04:00
|
|
|
.. function:: combinations_with_replacement(iterable, r)
|
|
|
|
|
|
|
|
Return *r* length subsequences of elements from the input *iterable*
|
|
|
|
allowing individual elements to be repeated more than once.
|
|
|
|
|
|
|
|
Combinations are emitted in lexicographic sort order. So, if the
|
|
|
|
input *iterable* is sorted, the combination tuples will be produced
|
|
|
|
in sorted order.
|
|
|
|
|
|
|
|
Elements are treated as unique based on their position, not on their
|
|
|
|
value. So if the input elements are unique, the generated combinations
|
|
|
|
will also be unique.
|
|
|
|
|
|
|
|
Equivalent to::
|
|
|
|
|
|
|
|
def combinations_with_replacement(iterable, r):
|
|
|
|
# combinations_with_replacement('ABC', 2) --> AA AB AC BB BC CC
|
|
|
|
pool = tuple(iterable)
|
|
|
|
n = len(pool)
|
|
|
|
if not n and r:
|
|
|
|
return
|
|
|
|
indices = [0] * r
|
|
|
|
yield tuple(pool[i] for i in indices)
|
|
|
|
while 1:
|
|
|
|
for i in reversed(range(r)):
|
|
|
|
if indices[i] != n - 1:
|
|
|
|
break
|
|
|
|
else:
|
|
|
|
return
|
|
|
|
indices[i:] = [indices[i] + 1] * (r - i)
|
|
|
|
yield tuple(pool[i] for i in indices)
|
|
|
|
|
|
|
|
The code for :func:`combinations_with_replacement` can be also expressed as
|
|
|
|
a subsequence of :func:`product` after filtering entries where the elements
|
|
|
|
are not in sorted order (according to their position in the input pool)::
|
|
|
|
|
|
|
|
def combinations_with_replacement(iterable, r):
|
|
|
|
pool = tuple(iterable)
|
|
|
|
n = len(pool)
|
|
|
|
for indices in product(range(n), repeat=r):
|
|
|
|
if sorted(indices) == list(indices):
|
|
|
|
yield tuple(pool[i] for i in indices)
|
|
|
|
|
|
|
|
The number of items returned is ``(n+r-1)! / r! / (n-1)!`` when ``n > 0``.
|
|
|
|
|
|
|
|
.. versionadded:: 2.7
|
|
|
|
|
2009-01-25 17:04:14 -04:00
|
|
|
.. function:: compress(data, selectors)
|
|
|
|
|
|
|
|
Make an iterator that filters elements from *data* returning only those that
|
|
|
|
have a corresponding element in *selectors* that evaluates to ``True``.
|
|
|
|
Stops when either the *data* or *selectors* iterables have been exhausted.
|
|
|
|
Equivalent to::
|
|
|
|
|
|
|
|
def compress(data, selectors):
|
|
|
|
# compress('ABCDEF', [1,0,1,0,1,1]) --> A C E F
|
|
|
|
return (d for d, s in izip(data, selectors) if s)
|
|
|
|
|
|
|
|
.. versionadded:: 2.7
|
|
|
|
|
|
|
|
|
2007-08-15 11:28:01 -03:00
|
|
|
.. function:: count([n])
|
|
|
|
|
|
|
|
Make an iterator that returns consecutive integers starting with *n*. If not
|
2007-10-03 21:20:27 -03:00
|
|
|
specified *n* defaults to zero. Often used as an argument to :func:`imap` to
|
|
|
|
generate consecutive data points. Also, used with :func:`izip` to add sequence
|
|
|
|
numbers. Equivalent to::
|
2007-08-15 11:28:01 -03:00
|
|
|
|
|
|
|
def count(n=0):
|
2008-03-05 21:15:52 -04:00
|
|
|
# count(10) --> 10 11 12 13 14 ...
|
2007-08-15 11:28:01 -03:00
|
|
|
while True:
|
|
|
|
yield n
|
|
|
|
n += 1
|
|
|
|
|
|
|
|
|
|
|
|
.. function:: cycle(iterable)
|
|
|
|
|
|
|
|
Make an iterator returning elements from the iterable and saving a copy of each.
|
|
|
|
When the iterable is exhausted, return elements from the saved copy. Repeats
|
|
|
|
indefinitely. Equivalent to::
|
|
|
|
|
|
|
|
def cycle(iterable):
|
2008-03-05 21:15:52 -04:00
|
|
|
# cycle('ABCD') --> A B C D A B C D A B C D ...
|
2007-08-15 11:28:01 -03:00
|
|
|
saved = []
|
|
|
|
for element in iterable:
|
|
|
|
yield element
|
|
|
|
saved.append(element)
|
|
|
|
while saved:
|
|
|
|
for element in saved:
|
|
|
|
yield element
|
|
|
|
|
|
|
|
Note, this member of the toolkit may require significant auxiliary storage
|
|
|
|
(depending on the length of the iterable).
|
|
|
|
|
|
|
|
|
|
|
|
.. function:: dropwhile(predicate, iterable)
|
|
|
|
|
|
|
|
Make an iterator that drops elements from the iterable as long as the predicate
|
|
|
|
is true; afterwards, returns every element. Note, the iterator does not produce
|
|
|
|
*any* output until the predicate first becomes false, so it may have a lengthy
|
|
|
|
start-up time. Equivalent to::
|
|
|
|
|
|
|
|
def dropwhile(predicate, iterable):
|
2008-03-05 21:15:52 -04:00
|
|
|
# dropwhile(lambda x: x<5, [1,4,6,4,1]) --> 6 4 1
|
2007-08-15 11:28:01 -03:00
|
|
|
iterable = iter(iterable)
|
|
|
|
for x in iterable:
|
|
|
|
if not predicate(x):
|
|
|
|
yield x
|
|
|
|
break
|
|
|
|
for x in iterable:
|
|
|
|
yield x
|
|
|
|
|
|
|
|
|
|
|
|
.. function:: groupby(iterable[, key])
|
|
|
|
|
|
|
|
Make an iterator that returns consecutive keys and groups from the *iterable*.
|
|
|
|
The *key* is a function computing a key value for each element. If not
|
|
|
|
specified or is ``None``, *key* defaults to an identity function and returns
|
|
|
|
the element unchanged. Generally, the iterable needs to already be sorted on
|
|
|
|
the same key function.
|
|
|
|
|
|
|
|
The operation of :func:`groupby` is similar to the ``uniq`` filter in Unix. It
|
|
|
|
generates a break or new group every time the value of the key function changes
|
|
|
|
(which is why it is usually necessary to have sorted the data using the same key
|
|
|
|
function). That behavior differs from SQL's GROUP BY which aggregates common
|
|
|
|
elements regardless of their input order.
|
|
|
|
|
|
|
|
The returned group is itself an iterator that shares the underlying iterable
|
|
|
|
with :func:`groupby`. Because the source is shared, when the :func:`groupby`
|
|
|
|
object is advanced, the previous group is no longer visible. So, if that data
|
|
|
|
is needed later, it should be stored as a list::
|
|
|
|
|
|
|
|
groups = []
|
|
|
|
uniquekeys = []
|
|
|
|
data = sorted(data, key=keyfunc)
|
|
|
|
for k, g in groupby(data, keyfunc):
|
|
|
|
groups.append(list(g)) # Store group iterator as a list
|
|
|
|
uniquekeys.append(k)
|
|
|
|
|
|
|
|
:func:`groupby` is equivalent to::
|
|
|
|
|
|
|
|
class groupby(object):
|
2008-03-05 21:15:52 -04:00
|
|
|
# [k for k, g in groupby('AAAABBBCCDAABBB')] --> A B C D A B
|
2009-02-04 06:52:32 -04:00
|
|
|
# [list(g) for k, g in groupby('AAAABBBCCD')] --> AAAA BBB CC D
|
2007-08-15 11:28:01 -03:00
|
|
|
def __init__(self, iterable, key=None):
|
|
|
|
if key is None:
|
|
|
|
key = lambda x: x
|
|
|
|
self.keyfunc = key
|
|
|
|
self.it = iter(iterable)
|
2007-12-29 18:16:24 -04:00
|
|
|
self.tgtkey = self.currkey = self.currvalue = object()
|
2007-08-15 11:28:01 -03:00
|
|
|
def __iter__(self):
|
|
|
|
return self
|
|
|
|
def next(self):
|
|
|
|
while self.currkey == self.tgtkey:
|
|
|
|
self.currvalue = self.it.next() # Exit on StopIteration
|
|
|
|
self.currkey = self.keyfunc(self.currvalue)
|
|
|
|
self.tgtkey = self.currkey
|
|
|
|
return (self.currkey, self._grouper(self.tgtkey))
|
|
|
|
def _grouper(self, tgtkey):
|
|
|
|
while self.currkey == tgtkey:
|
|
|
|
yield self.currvalue
|
|
|
|
self.currvalue = self.it.next() # Exit on StopIteration
|
|
|
|
self.currkey = self.keyfunc(self.currvalue)
|
|
|
|
|
|
|
|
.. versionadded:: 2.4
|
|
|
|
|
|
|
|
|
|
|
|
.. function:: ifilter(predicate, iterable)
|
|
|
|
|
|
|
|
Make an iterator that filters elements from iterable returning only those for
|
|
|
|
which the predicate is ``True``. If *predicate* is ``None``, return the items
|
|
|
|
that are true. Equivalent to::
|
|
|
|
|
|
|
|
def ifilter(predicate, iterable):
|
2008-03-05 21:15:52 -04:00
|
|
|
# ifilter(lambda x: x%2, range(10)) --> 1 3 5 7 9
|
2007-08-15 11:28:01 -03:00
|
|
|
if predicate is None:
|
|
|
|
predicate = bool
|
|
|
|
for x in iterable:
|
|
|
|
if predicate(x):
|
|
|
|
yield x
|
|
|
|
|
|
|
|
|
|
|
|
.. function:: ifilterfalse(predicate, iterable)
|
|
|
|
|
|
|
|
Make an iterator that filters elements from iterable returning only those for
|
|
|
|
which the predicate is ``False``. If *predicate* is ``None``, return the items
|
|
|
|
that are false. Equivalent to::
|
|
|
|
|
|
|
|
def ifilterfalse(predicate, iterable):
|
2008-03-05 21:15:52 -04:00
|
|
|
# ifilterfalse(lambda x: x%2, range(10)) --> 0 2 4 6 8
|
2007-08-15 11:28:01 -03:00
|
|
|
if predicate is None:
|
|
|
|
predicate = bool
|
|
|
|
for x in iterable:
|
|
|
|
if not predicate(x):
|
|
|
|
yield x
|
|
|
|
|
|
|
|
|
|
|
|
.. function:: imap(function, *iterables)
|
|
|
|
|
|
|
|
Make an iterator that computes the function using arguments from each of the
|
|
|
|
iterables. If *function* is set to ``None``, then :func:`imap` returns the
|
|
|
|
arguments as a tuple. Like :func:`map` but stops when the shortest iterable is
|
|
|
|
exhausted instead of filling in ``None`` for shorter iterables. The reason for
|
|
|
|
the difference is that infinite iterator arguments are typically an error for
|
|
|
|
:func:`map` (because the output is fully evaluated) but represent a common and
|
|
|
|
useful way of supplying arguments to :func:`imap`. Equivalent to::
|
|
|
|
|
|
|
|
def imap(function, *iterables):
|
2008-03-05 21:15:52 -04:00
|
|
|
# imap(pow, (2,3,10), (5,2,3)) --> 32 9 1000
|
2007-08-15 11:28:01 -03:00
|
|
|
iterables = map(iter, iterables)
|
|
|
|
while True:
|
2008-01-22 18:09:26 -04:00
|
|
|
args = [it.next() for it in iterables]
|
2007-08-15 11:28:01 -03:00
|
|
|
if function is None:
|
|
|
|
yield tuple(args)
|
|
|
|
else:
|
|
|
|
yield function(*args)
|
|
|
|
|
|
|
|
|
|
|
|
.. function:: islice(iterable, [start,] stop [, step])
|
|
|
|
|
|
|
|
Make an iterator that returns selected elements from the iterable. If *start* is
|
|
|
|
non-zero, then elements from the iterable are skipped until start is reached.
|
|
|
|
Afterward, elements are returned consecutively unless *step* is set higher than
|
|
|
|
one which results in items being skipped. If *stop* is ``None``, then iteration
|
|
|
|
continues until the iterator is exhausted, if at all; otherwise, it stops at the
|
|
|
|
specified position. Unlike regular slicing, :func:`islice` does not support
|
|
|
|
negative values for *start*, *stop*, or *step*. Can be used to extract related
|
|
|
|
fields from data where the internal structure has been flattened (for example, a
|
|
|
|
multi-line report may list a name field on every third line). Equivalent to::
|
|
|
|
|
|
|
|
def islice(iterable, *args):
|
2008-03-05 21:15:52 -04:00
|
|
|
# islice('ABCDEFG', 2) --> A B
|
|
|
|
# islice('ABCDEFG', 2, 4) --> C D
|
|
|
|
# islice('ABCDEFG', 2, None) --> C D E F G
|
|
|
|
# islice('ABCDEFG', 0, None, 2) --> A C E G
|
2007-08-15 11:28:01 -03:00
|
|
|
s = slice(*args)
|
|
|
|
it = iter(xrange(s.start or 0, s.stop or sys.maxint, s.step or 1))
|
|
|
|
nexti = it.next()
|
|
|
|
for i, element in enumerate(iterable):
|
|
|
|
if i == nexti:
|
|
|
|
yield element
|
2009-01-03 16:55:06 -04:00
|
|
|
nexti = it.next()
|
2007-08-15 11:28:01 -03:00
|
|
|
|
|
|
|
If *start* is ``None``, then iteration starts at zero. If *step* is ``None``,
|
|
|
|
then the step defaults to one.
|
|
|
|
|
|
|
|
.. versionchanged:: 2.5
|
|
|
|
accept ``None`` values for default *start* and *step*.
|
|
|
|
|
|
|
|
|
|
|
|
.. function:: izip(*iterables)
|
|
|
|
|
|
|
|
Make an iterator that aggregates elements from each of the iterables. Like
|
|
|
|
:func:`zip` except that it returns an iterator instead of a list. Used for
|
|
|
|
lock-step iteration over several iterables at a time. Equivalent to::
|
|
|
|
|
|
|
|
def izip(*iterables):
|
2008-03-05 21:15:52 -04:00
|
|
|
# izip('ABCD', 'xy') --> Ax By
|
2007-08-15 11:28:01 -03:00
|
|
|
iterables = map(iter, iterables)
|
|
|
|
while iterables:
|
|
|
|
result = [it.next() for it in iterables]
|
|
|
|
yield tuple(result)
|
|
|
|
|
|
|
|
.. versionchanged:: 2.4
|
|
|
|
When no iterables are specified, returns a zero length iterator instead of
|
|
|
|
raising a :exc:`TypeError` exception.
|
|
|
|
|
2008-01-22 15:51:41 -04:00
|
|
|
The left-to-right evaluation order of the iterables is guaranteed. This
|
|
|
|
makes possible an idiom for clustering a data series into n-length groups
|
|
|
|
using ``izip(*[iter(s)]*n)``.
|
|
|
|
|
|
|
|
:func:`izip` should only be used with unequal length inputs when you don't
|
|
|
|
care about trailing, unmatched values from the longer iterables. If those
|
|
|
|
values are important, use :func:`izip_longest` instead.
|
2007-08-15 11:28:01 -03:00
|
|
|
|
|
|
|
|
|
|
|
.. function:: izip_longest(*iterables[, fillvalue])
|
|
|
|
|
|
|
|
Make an iterator that aggregates elements from each of the iterables. If the
|
|
|
|
iterables are of uneven length, missing values are filled-in with *fillvalue*.
|
|
|
|
Iteration continues until the longest iterable is exhausted. Equivalent to::
|
|
|
|
|
|
|
|
def izip_longest(*args, **kwds):
|
2008-03-05 21:15:52 -04:00
|
|
|
# izip_longest('ABCD', 'xy', fillvalue='-') --> Ax By C- D-
|
2007-08-15 11:28:01 -03:00
|
|
|
fillvalue = kwds.get('fillvalue')
|
|
|
|
def sentinel(counter = ([fillvalue]*(len(args)-1)).pop):
|
|
|
|
yield counter() # yields the fillvalue, or raises IndexError
|
|
|
|
fillers = repeat(fillvalue)
|
|
|
|
iters = [chain(it, sentinel(), fillers) for it in args]
|
|
|
|
try:
|
|
|
|
for tup in izip(*iters):
|
|
|
|
yield tup
|
|
|
|
except IndexError:
|
|
|
|
pass
|
|
|
|
|
2008-07-25 14:02:11 -03:00
|
|
|
If one of the iterables is potentially infinite, then the
|
|
|
|
:func:`izip_longest` function should be wrapped with something that limits
|
|
|
|
the number of calls (for example :func:`islice` or :func:`takewhile`). If
|
|
|
|
not specified, *fillvalue* defaults to ``None``.
|
2007-08-15 11:28:01 -03:00
|
|
|
|
|
|
|
.. versionadded:: 2.6
|
|
|
|
|
2008-02-28 15:41:24 -04:00
|
|
|
.. function:: permutations(iterable[, r])
|
|
|
|
|
|
|
|
Return successive *r* length permutations of elements in the *iterable*.
|
|
|
|
|
|
|
|
If *r* is not specified or is ``None``, then *r* defaults to the length
|
2009-01-03 16:55:06 -04:00
|
|
|
of the *iterable* and all possible full-length permutations
|
2008-02-28 15:41:24 -04:00
|
|
|
are generated.
|
|
|
|
|
2009-01-03 16:55:06 -04:00
|
|
|
Permutations are emitted in lexicographic sort order. So, if the
|
2008-02-28 15:41:24 -04:00
|
|
|
input *iterable* is sorted, the permutation tuples will be produced
|
2009-01-03 16:55:06 -04:00
|
|
|
in sorted order.
|
2008-02-28 15:41:24 -04:00
|
|
|
|
|
|
|
Elements are treated as unique based on their position, not on their
|
|
|
|
value. So if the input elements are unique, there will be no repeat
|
|
|
|
values in each permutation.
|
|
|
|
|
2008-03-02 06:59:31 -04:00
|
|
|
Equivalent to::
|
|
|
|
|
|
|
|
def permutations(iterable, r=None):
|
2008-03-05 21:15:52 -04:00
|
|
|
# permutations('ABCD', 2) --> AB AC AD BA BC BD CA CB CD DA DB DC
|
|
|
|
# permutations(range(3)) --> 012 021 102 120 201 210
|
2008-03-02 06:59:31 -04:00
|
|
|
pool = tuple(iterable)
|
|
|
|
n = len(pool)
|
|
|
|
r = n if r is None else r
|
2009-01-08 02:39:04 -04:00
|
|
|
if r > n:
|
|
|
|
return
|
2008-03-02 06:59:31 -04:00
|
|
|
indices = range(n)
|
2008-03-22 21:55:46 -03:00
|
|
|
cycles = range(n, n-r, -1)
|
2008-03-02 06:59:31 -04:00
|
|
|
yield tuple(pool[i] for i in indices[:r])
|
|
|
|
while n:
|
|
|
|
for i in reversed(range(r)):
|
|
|
|
cycles[i] -= 1
|
|
|
|
if cycles[i] == 0:
|
2008-03-02 07:17:51 -04:00
|
|
|
indices[i:] = indices[i+1:] + indices[i:i+1]
|
2008-03-02 06:59:31 -04:00
|
|
|
cycles[i] = n - i
|
|
|
|
else:
|
|
|
|
j = cycles[i]
|
|
|
|
indices[i], indices[-j] = indices[-j], indices[i]
|
|
|
|
yield tuple(pool[i] for i in indices[:r])
|
|
|
|
break
|
|
|
|
else:
|
|
|
|
return
|
2008-02-28 15:41:24 -04:00
|
|
|
|
2009-01-03 16:55:06 -04:00
|
|
|
The code for :func:`permutations` can be also expressed as a subsequence of
|
2008-03-04 00:17:08 -04:00
|
|
|
:func:`product`, filtered to exclude entries with repeated elements (those
|
|
|
|
from the same position in the input pool)::
|
|
|
|
|
|
|
|
def permutations(iterable, r=None):
|
|
|
|
pool = tuple(iterable)
|
|
|
|
n = len(pool)
|
|
|
|
r = n if r is None else r
|
|
|
|
for indices in product(range(n), repeat=r):
|
|
|
|
if len(set(indices)) == r:
|
|
|
|
yield tuple(pool[i] for i in indices)
|
|
|
|
|
2009-01-08 02:39:04 -04:00
|
|
|
The number of items returned is ``n! / (n-r)!`` when ``0 <= r <= n``
|
|
|
|
or zero when ``r > n``.
|
|
|
|
|
2008-02-28 15:41:24 -04:00
|
|
|
.. versionadded:: 2.6
|
|
|
|
|
2008-02-28 05:23:48 -04:00
|
|
|
.. function:: product(*iterables[, repeat])
|
2008-02-22 15:50:06 -04:00
|
|
|
|
|
|
|
Cartesian product of input iterables.
|
|
|
|
|
|
|
|
Equivalent to nested for-loops in a generator expression. For example,
|
|
|
|
``product(A, B)`` returns the same as ``((x,y) for x in A for y in B)``.
|
|
|
|
|
2008-04-17 07:48:31 -03:00
|
|
|
The nested loops cycle like an odometer with the rightmost element advancing
|
2008-04-17 17:44:06 -03:00
|
|
|
on every iteration. This pattern creates a lexicographic ordering so that if
|
|
|
|
the input's iterables are sorted, the product tuples are emitted in sorted
|
2008-04-17 07:48:31 -03:00
|
|
|
order.
|
2008-02-22 15:50:06 -04:00
|
|
|
|
2008-02-28 05:23:48 -04:00
|
|
|
To compute the product of an iterable with itself, specify the number of
|
|
|
|
repetitions with the optional *repeat* keyword argument. For example,
|
|
|
|
``product(A, repeat=4)`` means the same as ``product(A, A, A, A)``.
|
|
|
|
|
2008-03-03 21:47:38 -04:00
|
|
|
This function is equivalent to the following code, except that the
|
|
|
|
actual implementation does not build up intermediate results in memory::
|
2008-02-22 15:50:06 -04:00
|
|
|
|
2008-02-28 05:23:48 -04:00
|
|
|
def product(*args, **kwds):
|
2008-03-05 21:15:52 -04:00
|
|
|
# product('ABCD', 'xy') --> Ax Ay Bx By Cx Cy Dx Dy
|
|
|
|
# product(range(2), repeat=3) --> 000 001 010 011 100 101 110 111
|
2008-02-28 05:23:48 -04:00
|
|
|
pools = map(tuple, args) * kwds.get('repeat', 1)
|
2008-03-04 00:17:08 -04:00
|
|
|
result = [[]]
|
|
|
|
for pool in pools:
|
|
|
|
result = [x+[y] for x in result for y in pool]
|
|
|
|
for prod in result:
|
|
|
|
yield tuple(prod)
|
2008-02-22 15:50:06 -04:00
|
|
|
|
|
|
|
.. versionadded:: 2.6
|
2007-08-15 11:28:01 -03:00
|
|
|
|
|
|
|
.. function:: repeat(object[, times])
|
|
|
|
|
|
|
|
Make an iterator that returns *object* over and over again. Runs indefinitely
|
|
|
|
unless the *times* argument is specified. Used as argument to :func:`imap` for
|
2008-03-05 21:15:52 -04:00
|
|
|
invariant function parameters. Also used with :func:`izip` to create constant
|
|
|
|
fields in a tuple record. Equivalent to::
|
2007-08-15 11:28:01 -03:00
|
|
|
|
|
|
|
def repeat(object, times=None):
|
2008-03-05 21:15:52 -04:00
|
|
|
# repeat(10, 3) --> 10 10 10
|
2007-08-15 11:28:01 -03:00
|
|
|
if times is None:
|
|
|
|
while True:
|
|
|
|
yield object
|
|
|
|
else:
|
|
|
|
for i in xrange(times):
|
|
|
|
yield object
|
|
|
|
|
|
|
|
|
|
|
|
.. function:: starmap(function, iterable)
|
|
|
|
|
2008-01-16 23:02:14 -04:00
|
|
|
Make an iterator that computes the function using arguments obtained from
|
2007-08-15 11:28:01 -03:00
|
|
|
the iterable. Used instead of :func:`imap` when argument parameters are already
|
|
|
|
grouped in tuples from a single iterable (the data has been "pre-zipped"). The
|
|
|
|
difference between :func:`imap` and :func:`starmap` parallels the distinction
|
|
|
|
between ``function(a,b)`` and ``function(*c)``. Equivalent to::
|
|
|
|
|
|
|
|
def starmap(function, iterable):
|
2008-03-05 21:15:52 -04:00
|
|
|
# starmap(pow, [(2,5), (3,2), (10,3)]) --> 32 9 1000
|
2008-01-16 23:02:14 -04:00
|
|
|
for args in iterable:
|
|
|
|
yield function(*args)
|
2007-08-15 11:28:01 -03:00
|
|
|
|
2008-01-16 23:02:14 -04:00
|
|
|
.. versionchanged:: 2.6
|
|
|
|
Previously, :func:`starmap` required the function arguments to be tuples.
|
|
|
|
Now, any iterable is allowed.
|
2007-08-15 11:28:01 -03:00
|
|
|
|
|
|
|
.. function:: takewhile(predicate, iterable)
|
|
|
|
|
|
|
|
Make an iterator that returns elements from the iterable as long as the
|
|
|
|
predicate is true. Equivalent to::
|
|
|
|
|
|
|
|
def takewhile(predicate, iterable):
|
2008-03-05 21:15:52 -04:00
|
|
|
# takewhile(lambda x: x<5, [1,4,6,4,1]) --> 1 4
|
2007-08-15 11:28:01 -03:00
|
|
|
for x in iterable:
|
|
|
|
if predicate(x):
|
|
|
|
yield x
|
|
|
|
else:
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
|
|
.. function:: tee(iterable[, n=2])
|
|
|
|
|
|
|
|
Return *n* independent iterators from a single iterable. The case where ``n==2``
|
|
|
|
is equivalent to::
|
|
|
|
|
|
|
|
def tee(iterable):
|
2007-12-29 18:09:34 -04:00
|
|
|
def gen(next, data={}):
|
2007-08-15 11:28:01 -03:00
|
|
|
for i in count():
|
2007-12-29 18:09:34 -04:00
|
|
|
if i in data:
|
|
|
|
yield data.pop(i)
|
2007-08-15 11:28:01 -03:00
|
|
|
else:
|
2007-12-29 18:09:34 -04:00
|
|
|
data[i] = next()
|
|
|
|
yield data[i]
|
2007-08-15 11:28:01 -03:00
|
|
|
it = iter(iterable)
|
2007-12-29 18:09:34 -04:00
|
|
|
return gen(it.next), gen(it.next)
|
2007-08-15 11:28:01 -03:00
|
|
|
|
|
|
|
Note, once :func:`tee` has made a split, the original *iterable* should not be
|
|
|
|
used anywhere else; otherwise, the *iterable* could get advanced without the tee
|
|
|
|
objects being informed.
|
|
|
|
|
|
|
|
Note, this member of the toolkit may require significant auxiliary storage
|
|
|
|
(depending on how much temporary data needs to be stored). In general, if one
|
|
|
|
iterator is going to use most or all of the data before the other iterator, it
|
|
|
|
is faster to use :func:`list` instead of :func:`tee`.
|
|
|
|
|
|
|
|
.. versionadded:: 2.4
|
|
|
|
|
|
|
|
|
|
|
|
.. _itertools-example:
|
|
|
|
|
|
|
|
Examples
|
|
|
|
--------
|
|
|
|
|
|
|
|
The following examples show common uses for each tool and demonstrate ways they
|
2008-03-22 19:04:10 -03:00
|
|
|
can be combined.
|
|
|
|
|
|
|
|
.. doctest::
|
2007-08-15 11:28:01 -03:00
|
|
|
|
2009-01-01 12:43:12 -04:00
|
|
|
>>> # Show a dictionary sorted and grouped by value
|
2007-08-15 11:28:01 -03:00
|
|
|
>>> from operator import itemgetter
|
|
|
|
>>> d = dict(a=1, b=2, c=1, d=2, e=1, f=2, g=3)
|
|
|
|
>>> di = sorted(d.iteritems(), key=itemgetter(1))
|
|
|
|
>>> for k, g in groupby(di, key=itemgetter(1)):
|
|
|
|
... print k, map(itemgetter(0), g)
|
|
|
|
...
|
|
|
|
1 ['a', 'c', 'e']
|
|
|
|
2 ['b', 'd', 'f']
|
|
|
|
3 ['g']
|
|
|
|
|
2009-01-01 12:43:12 -04:00
|
|
|
>>> # Find runs of consecutive numbers using groupby. The key to the solution
|
|
|
|
>>> # is differencing with a range so that consecutive numbers all appear in
|
|
|
|
>>> # same group.
|
2007-08-15 11:28:01 -03:00
|
|
|
>>> data = [ 1, 4,5,6, 10, 15,16,17,18, 22, 25,26,27,28]
|
|
|
|
>>> for k, g in groupby(enumerate(data), lambda (i,x):i-x):
|
2008-03-22 19:04:10 -03:00
|
|
|
... print map(itemgetter(1), g)
|
2009-01-03 16:55:06 -04:00
|
|
|
...
|
2007-08-15 11:28:01 -03:00
|
|
|
[1]
|
|
|
|
[4, 5, 6]
|
|
|
|
[10]
|
|
|
|
[15, 16, 17, 18]
|
|
|
|
[22]
|
|
|
|
[25, 26, 27, 28]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.. _itertools-recipes:
|
|
|
|
|
|
|
|
Recipes
|
|
|
|
-------
|
|
|
|
|
|
|
|
This section shows recipes for creating an extended toolset using the existing
|
|
|
|
itertools as building blocks.
|
|
|
|
|
|
|
|
The extended tools offer the same high performance as the underlying toolset.
|
|
|
|
The superior memory performance is kept by processing elements one at a time
|
|
|
|
rather than bringing the whole iterable into memory all at once. Code volume is
|
|
|
|
kept small by linking the tools together in a functional style which helps
|
|
|
|
eliminate temporary variables. High speed is retained by preferring
|
2007-10-21 07:52:38 -03:00
|
|
|
"vectorized" building blocks over the use of for-loops and :term:`generator`\s
|
2008-03-22 19:04:10 -03:00
|
|
|
which incur interpreter overhead.
|
|
|
|
|
|
|
|
.. testcode::
|
2007-08-15 11:28:01 -03:00
|
|
|
|
2008-07-19 20:58:47 -03:00
|
|
|
def take(n, iterable):
|
|
|
|
"Return first n items of the iterable as a list"
|
|
|
|
return list(islice(iterable, n))
|
2007-08-15 11:28:01 -03:00
|
|
|
|
2008-07-19 20:58:47 -03:00
|
|
|
def enumerate(iterable, start=0):
|
|
|
|
return izip(count(start), iterable)
|
2007-08-15 11:28:01 -03:00
|
|
|
|
2008-07-19 20:58:47 -03:00
|
|
|
def tabulate(function, start=0):
|
2007-08-15 11:28:01 -03:00
|
|
|
"Return function(0), function(1), ..."
|
2008-07-19 20:58:47 -03:00
|
|
|
return imap(function, count(start))
|
2007-08-15 11:28:01 -03:00
|
|
|
|
|
|
|
def nth(iterable, n):
|
2009-02-04 06:52:32 -04:00
|
|
|
"Returns the nth item or None"
|
|
|
|
return next(islice(iterable, n, None), None)
|
2008-07-19 20:58:47 -03:00
|
|
|
|
|
|
|
def quantify(iterable, pred=bool):
|
|
|
|
"Count how many times the predicate is true"
|
|
|
|
return sum(imap(pred, iterable))
|
|
|
|
|
|
|
|
def padnone(iterable):
|
2007-08-15 11:28:01 -03:00
|
|
|
"""Returns the sequence elements and then returns None indefinitely.
|
|
|
|
|
|
|
|
Useful for emulating the behavior of the built-in map() function.
|
|
|
|
"""
|
2008-07-19 20:58:47 -03:00
|
|
|
return chain(iterable, repeat(None))
|
2007-08-15 11:28:01 -03:00
|
|
|
|
2008-07-19 20:58:47 -03:00
|
|
|
def ncycles(iterable, n):
|
2007-08-15 11:28:01 -03:00
|
|
|
"Returns the sequence elements n times"
|
2008-07-19 20:58:47 -03:00
|
|
|
return chain.from_iterable(repeat(iterable, n))
|
2007-08-15 11:28:01 -03:00
|
|
|
|
|
|
|
def dotproduct(vec1, vec2):
|
|
|
|
return sum(imap(operator.mul, vec1, vec2))
|
|
|
|
|
|
|
|
def flatten(listOfLists):
|
2008-02-28 15:41:24 -04:00
|
|
|
return list(chain.from_iterable(listOfLists))
|
2007-08-15 11:28:01 -03:00
|
|
|
|
|
|
|
def repeatfunc(func, times=None, *args):
|
|
|
|
"""Repeat calls to func with specified arguments.
|
|
|
|
|
|
|
|
Example: repeatfunc(random.random)
|
|
|
|
"""
|
|
|
|
if times is None:
|
|
|
|
return starmap(func, repeat(args))
|
2008-02-28 15:41:24 -04:00
|
|
|
return starmap(func, repeat(args, times))
|
2007-08-15 11:28:01 -03:00
|
|
|
|
|
|
|
def pairwise(iterable):
|
|
|
|
"s -> (s0,s1), (s1,s2), (s2, s3), ..."
|
|
|
|
a, b = tee(iterable)
|
2008-03-06 21:33:20 -04:00
|
|
|
for elem in b:
|
|
|
|
break
|
2007-08-15 11:28:01 -03:00
|
|
|
return izip(a, b)
|
|
|
|
|
2008-03-06 21:33:20 -04:00
|
|
|
def grouper(n, iterable, fillvalue=None):
|
2008-07-30 04:27:30 -03:00
|
|
|
"grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx"
|
2008-03-06 21:33:20 -04:00
|
|
|
args = [iter(iterable)] * n
|
2008-07-30 22:19:50 -03:00
|
|
|
return izip_longest(fillvalue=fillvalue, *args)
|
2007-08-15 11:28:01 -03:00
|
|
|
|
2008-01-30 18:17:31 -04:00
|
|
|
def roundrobin(*iterables):
|
2008-07-30 04:27:30 -03:00
|
|
|
"roundrobin('ABC', 'D', 'EF') --> A D E B F C"
|
2008-02-28 15:41:24 -04:00
|
|
|
# Recipe credited to George Sakkis
|
2008-01-30 18:17:31 -04:00
|
|
|
pending = len(iterables)
|
|
|
|
nexts = cycle(iter(it).next for it in iterables)
|
|
|
|
while pending:
|
|
|
|
try:
|
|
|
|
for next in nexts:
|
|
|
|
yield next()
|
|
|
|
except StopIteration:
|
|
|
|
pending -= 1
|
|
|
|
nexts = cycle(islice(nexts, pending))
|
2007-08-15 11:28:01 -03:00
|
|
|
|
2008-02-23 06:04:15 -04:00
|
|
|
def powerset(iterable):
|
2009-01-25 17:31:47 -04:00
|
|
|
"powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)"
|
|
|
|
s = list(iterable)
|
|
|
|
return chain.from_iterable(combinations(s, r) for r in range(len(s)+1))
|
2008-02-23 06:04:15 -04:00
|
|
|
|
2009-01-31 16:01:48 -04:00
|
|
|
def unique_everseen(iterable, key=None):
|
|
|
|
"List unique elements, preserving order. Remember all elements ever seen."
|
|
|
|
# unique_everseen('AAAABBBCCDAABBB') --> A B C D
|
|
|
|
# unique_everseen('ABBCcAD', str.lower) --> A B C D
|
|
|
|
seen = set()
|
|
|
|
seen_add = seen.add
|
|
|
|
if key is None:
|
|
|
|
for element in iterable:
|
|
|
|
if element not in seen:
|
|
|
|
seen_add(element)
|
|
|
|
yield element
|
|
|
|
else:
|
|
|
|
for element in iterable:
|
|
|
|
k = key(element)
|
|
|
|
if k not in seen:
|
|
|
|
seen_add(k)
|
|
|
|
yield element
|
|
|
|
|
|
|
|
def unique_justseen(iterable, key=None):
|
|
|
|
"List unique elements, preserving order. Remember only the element just seen."
|
|
|
|
# unique_justseen('AAAABBBCCDAABBB') --> A B C D A B
|
|
|
|
# unique_justseen('ABBCcAD', str.lower) --> A B C A D
|
|
|
|
return imap(next, imap(itemgetter(1), groupby(iterable, key)))
|