General improvements to the itertools docs (GH-98408)

This commit is contained in:
Raymond Hettinger 2022-10-18 14:09:34 -05:00 committed by GitHub
parent c051d55ddb
commit f4ead4874b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 92 additions and 51 deletions

View File

@ -10,6 +10,10 @@
.. testsetup:: .. testsetup::
from itertools import * from itertools import *
import collections
import math
import operator
import random
-------------- --------------
@ -133,10 +137,9 @@ loops that truncate the stream.
There are a number of uses for the *func* argument. It can be set to There are a number of uses for the *func* argument. It can be set to
:func:`min` for a running minimum, :func:`max` for a running maximum, or :func:`min` for a running minimum, :func:`max` for a running maximum, or
:func:`operator.mul` for a running product. Amortization tables can be :func:`operator.mul` for a running product. Amortization tables can be
built by accumulating interest and applying payments. First-order built by accumulating interest and applying payments:
`recurrence relations <https://en.wikipedia.org/wiki/Recurrence_relation>`_
can be modeled by supplying the initial value in the iterable and using only .. doctest::
the accumulated total in *func* argument::
>>> data = [3, 4, 6, 2, 1, 9, 0, 7, 5, 8] >>> data = [3, 4, 6, 2, 1, 9, 0, 7, 5, 8]
>>> list(accumulate(data, operator.mul)) # running product >>> list(accumulate(data, operator.mul)) # running product
@ -149,17 +152,6 @@ loops that truncate the stream.
>>> list(accumulate(cashflows, lambda bal, pmt: bal*1.05 + pmt)) >>> list(accumulate(cashflows, lambda bal, pmt: bal*1.05 + pmt))
[1000, 960.0, 918.0, 873.9000000000001, 827.5950000000001] [1000, 960.0, 918.0, 873.9000000000001, 827.5950000000001]
# Chaotic recurrence relation https://en.wikipedia.org/wiki/Logistic_map
>>> logistic_map = lambda x, _: r * x * (1 - x)
>>> r = 3.8
>>> x0 = 0.4
>>> inputs = repeat(x0, 36) # only the initial value is used
>>> [format(x, '.2f') for x in accumulate(inputs, logistic_map)]
['0.40', '0.91', '0.30', '0.81', '0.60', '0.92', '0.29', '0.79', '0.63',
'0.88', '0.39', '0.90', '0.33', '0.84', '0.52', '0.95', '0.18', '0.57',
'0.93', '0.25', '0.71', '0.79', '0.63', '0.88', '0.39', '0.91', '0.32',
'0.83', '0.54', '0.95', '0.20', '0.60', '0.91', '0.30', '0.80', '0.60']
See :func:`functools.reduce` for a similar function that returns only the See :func:`functools.reduce` for a similar function that returns only the
final accumulated value. final accumulated value.
@ -241,10 +233,10 @@ loops that truncate the stream.
The combination tuples are emitted in lexicographic ordering according to The combination tuples are emitted in lexicographic ordering according to
the order of the input *iterable*. So, if the input *iterable* is sorted, the order of the input *iterable*. So, if the input *iterable* is sorted,
the combination tuples will be produced in sorted order. the output tuples will be produced in sorted order.
Elements are treated as unique based on their position, not on their Elements are treated as unique based on their position, not on their
value. So if the input elements are unique, there will be no repeat value. So if the input elements are unique, there will be no repeated
values in each combination. values in each combination.
Roughly equivalent to:: Roughly equivalent to::
@ -290,7 +282,7 @@ loops that truncate the stream.
The combination tuples are emitted in lexicographic ordering according to The combination tuples are emitted in lexicographic ordering according to
the order of the input *iterable*. So, if the input *iterable* is sorted, the order of the input *iterable*. So, if the input *iterable* is sorted,
the combination tuples will be produced in sorted order. the output tuples will be produced in sorted order.
Elements are treated as unique based on their position, not on their Elements are treated as unique based on their position, not on their
value. So if the input elements are unique, the generated combinations value. So if the input elements are unique, the generated combinations
@ -449,14 +441,17 @@ loops that truncate the stream.
class groupby: class groupby:
# [k for k, g in groupby('AAAABBBCCDAABBB')] --> A B C D A B # [k for k, g in groupby('AAAABBBCCDAABBB')] --> A B C D A B
# [list(g) for k, g in groupby('AAAABBBCCD')] --> AAAA BBB CC D # [list(g) for k, g in groupby('AAAABBBCCD')] --> AAAA BBB CC D
def __init__(self, iterable, key=None): def __init__(self, iterable, key=None):
if key is None: if key is None:
key = lambda x: x key = lambda x: x
self.keyfunc = key self.keyfunc = key
self.it = iter(iterable) self.it = iter(iterable)
self.tgtkey = self.currkey = self.currvalue = object() self.tgtkey = self.currkey = self.currvalue = object()
def __iter__(self): def __iter__(self):
return self return self
def __next__(self): def __next__(self):
self.id = object() self.id = object()
while self.currkey == self.tgtkey: while self.currkey == self.tgtkey:
@ -464,6 +459,7 @@ loops that truncate the stream.
self.currkey = self.keyfunc(self.currvalue) self.currkey = self.keyfunc(self.currvalue)
self.tgtkey = self.currkey self.tgtkey = self.currkey
return (self.currkey, self._grouper(self.tgtkey, self.id)) return (self.currkey, self._grouper(self.tgtkey, self.id))
def _grouper(self, tgtkey, id): def _grouper(self, tgtkey, id):
while self.id is id and self.currkey == tgtkey: while self.id is id and self.currkey == tgtkey:
yield self.currvalue yield self.currvalue
@ -482,10 +478,17 @@ loops that truncate the stream.
Afterward, elements are returned consecutively unless *step* is set higher than Afterward, elements are returned consecutively unless *step* is set higher than
one which results in items being skipped. If *stop* is ``None``, then iteration one which results in items being skipped. If *stop* is ``None``, then iteration
continues until the iterator is exhausted, if at all; otherwise, it stops at the continues until the iterator is exhausted, if at all; otherwise, it stops at the
specified position. Unlike regular slicing, :func:`islice` does not support specified position.
negative values for *start*, *stop*, or *step*. Can be used to extract related
fields from data where the internal structure has been flattened (for example, a If *start* is ``None``, then iteration starts at zero. If *step* is ``None``,
multi-line report may list a name field on every third line). Roughly equivalent to:: then the step defaults to one.
Unlike regular slicing, :func:`islice` does not support negative values for
*start*, *stop*, or *step*. Can be used to extract related fields from
data where the internal structure has been flattened (for example, a
multi-line report may list a name field on every third line).
Roughly equivalent to::
def islice(iterable, *args): def islice(iterable, *args):
# islice('ABCDEFG', 2) --> A B # islice('ABCDEFG', 2) --> A B
@ -512,8 +515,6 @@ loops that truncate the stream.
for i, element in zip(range(i + 1, stop), iterable): for i, element in zip(range(i + 1, stop), iterable):
pass pass
If *start* is ``None``, then iteration starts at zero. If *step* is ``None``,
then the step defaults to one.
.. function:: pairwise(iterable) .. function:: pairwise(iterable)
@ -542,13 +543,13 @@ loops that truncate the stream.
of the *iterable* and all possible full-length permutations of the *iterable* and all possible full-length permutations
are generated. are generated.
The permutation tuples are emitted in lexicographic ordering according to The permutation tuples are emitted in lexicographic order according to
the order of the input *iterable*. So, if the input *iterable* is sorted, the order of the input *iterable*. So, if the input *iterable* is sorted,
the combination tuples will be produced in sorted order. the output tuples will be produced in sorted order.
Elements are treated as unique based on their position, not on their Elements are treated as unique based on their position, not on their
value. So if the input elements are unique, there will be no repeat value. So if the input elements are unique, there will be no repeated
values in each permutation. values within a permutation.
Roughly equivalent to:: Roughly equivalent to::
@ -628,9 +629,7 @@ loops that truncate the stream.
.. function:: repeat(object[, times]) .. function:: repeat(object[, times])
Make an iterator that returns *object* over and over again. Runs indefinitely Make an iterator that returns *object* over and over again. Runs indefinitely
unless the *times* argument is specified. Used as argument to :func:`map` for unless the *times* argument is specified.
invariant parameters to the called function. Also used with :func:`zip` to
create an invariant part of a tuple record.
Roughly equivalent to:: Roughly equivalent to::
@ -644,7 +643,9 @@ loops that truncate the stream.
yield object yield object
A common use for *repeat* is to supply a stream of constant values to *map* A common use for *repeat* is to supply a stream of constant values to *map*
or *zip*:: or *zip*:
.. doctest::
>>> list(map(pow, range(10), repeat(2))) >>> list(map(pow, range(10), repeat(2)))
[0, 1, 4, 9, 16, 25, 36, 49, 64, 81] [0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
@ -653,9 +654,12 @@ loops that truncate the stream.
Make an iterator that computes the function using arguments obtained from Make an iterator that computes the function using arguments obtained from
the iterable. Used instead of :func:`map` when argument parameters are already the iterable. Used instead of :func:`map` when argument parameters are already
grouped in tuples from a single iterable (the data has been "pre-zipped"). The grouped in tuples from a single iterable (when the data has been
difference between :func:`map` and :func:`starmap` parallels the distinction "pre-zipped").
between ``function(a,b)`` and ``function(*c)``. Roughly equivalent to::
The difference between :func:`map` and :func:`starmap` parallels the
distinction between ``function(a,b)`` and ``function(*c)``. Roughly
equivalent to::
def starmap(function, iterable): def starmap(function, iterable):
# starmap(pow, [(2,5), (3,2), (10,3)]) --> 32 9 1000 # starmap(pow, [(2,5), (3,2), (10,3)]) --> 32 9 1000
@ -683,9 +687,7 @@ loops that truncate the stream.
The following Python code helps explain what *tee* does (although the actual The following Python code helps explain what *tee* does (although the actual
implementation is more complex and uses only a single underlying implementation is more complex and uses only a single underlying
:abbr:`FIFO (first-in, first-out)` queue). :abbr:`FIFO (first-in, first-out)` queue)::
Roughly equivalent to::
def tee(iterable, n=2): def tee(iterable, n=2):
it = iter(iterable) it = iter(iterable)
@ -702,7 +704,7 @@ loops that truncate the stream.
yield mydeque.popleft() yield mydeque.popleft()
return tuple(gen(d) for d in deques) return tuple(gen(d) for d in deques)
Once :func:`tee` has made a split, the original *iterable* should not be Once a :func:`tee` has been created, the original *iterable* should not be
used anywhere else; otherwise, the *iterable* could get advanced without used anywhere else; otherwise, the *iterable* could get advanced without
the tee objects being informed. the tee objects being informed.
@ -756,14 +758,28 @@ Itertools Recipes
This section shows recipes for creating an extended toolset using the existing This section shows recipes for creating an extended toolset using the existing
itertools as building blocks. itertools as building blocks.
The primary purpose of the itertools recipes is educational. The recipes show
various ways of thinking about individual tools — for example, that
``chain.from_iterable`` is related to the concept of flattening. The recipes
also give ideas about ways that the tools can be combined — for example, how
`compress()` and `range()` can work together. The recipes also show patterns
for using itertools with the :mod:`operator` and :mod:`collections` modules as
well as with the built-in itertools such as ``map()``, ``filter()``,
``reversed()``, and ``enumerate()``.
A secondary purpose of the recipes is to serve as an incubator. The
``accumulate()``, ``compress()``, and ``pairwise()`` itertools started out as
recipes. Currently, the ``iter_index()`` recipe is being tested to see
whether it proves its worth.
Substantially all of these recipes and many, many others can be installed from Substantially all of these recipes and many, many others can be installed from
the `more-itertools project <https://pypi.org/project/more-itertools/>`_ found the `more-itertools project <https://pypi.org/project/more-itertools/>`_ found
on the Python Package Index:: on the Python Package Index::
python -m pip install more-itertools python -m pip install more-itertools
The extended tools offer the same high performance as the underlying toolset. Many of the recipes offer the same high performance as the underlying toolset.
The superior memory performance is kept by processing elements one at a time Superior memory performance is kept by processing elements one at a time
rather than bringing the whole iterable into memory all at once. Code volume is rather than bringing the whole iterable into memory all at once. Code volume is
kept small by linking the tools together in a functional style which helps kept small by linking the tools together in a functional style which helps
eliminate temporary variables. High speed is retained by preferring eliminate temporary variables. High speed is retained by preferring
@ -848,13 +864,23 @@ which incur interpreter overhead.
for k in range(len(roots) + 1) for k in range(len(roots) + 1)
] ]
def iter_index(seq, value, start=0): def iter_index(iterable, value, start=0):
"Return indices where a value occurs in a sequence." "Return indices where a value occurs in a sequence or iterable."
# iter_index('AABCADEAF', 'A') --> 0 1 4 7 # iter_index('AABCADEAF', 'A') --> 0 1 4 7
try:
seq_index = iterable.index
except AttributeError:
# Slow path for general iterables
it = islice(iterable, start, None)
for i, element in enumerate(it, start):
if element is value or element == value:
yield i
else:
# Fast path for sequences
i = start - 1 i = start - 1
try: try:
while True: while True:
yield (i := seq.index(value, i+1)) yield (i := seq_index(value, i+1))
except ValueError: except ValueError:
pass pass
@ -978,16 +1004,19 @@ which incur interpreter overhead.
# unique_everseen('AAAABBBCCDAABBB') --> A B C D # unique_everseen('AAAABBBCCDAABBB') --> A B C D
# unique_everseen('ABBCcAD', str.lower) --> A B C D # unique_everseen('ABBCcAD', str.lower) --> A B C D
seen = set() seen = set()
seen_add = seen.add
if key is None: if key is None:
for element in filterfalse(seen.__contains__, iterable): for element in filterfalse(seen.__contains__, iterable):
seen_add(element) seen.add(element)
yield element yield element
# Note: The steps shown above are intended to demonstrate
# filterfalse(). For order preserving deduplication,
# a better solution is:
# yield from dict.fromkeys(iterable)
else: else:
for element in iterable: for element in iterable:
k = key(element) k = key(element)
if k not in seen: if k not in seen:
seen_add(k) seen.add(k)
yield element yield element
def unique_justseen(iterable, key=None): def unique_justseen(iterable, key=None):
@ -1196,6 +1225,18 @@ which incur interpreter overhead.
[] []
>>> list(iter_index('', 'X')) >>> list(iter_index('', 'X'))
[] []
>>> list(iter_index('AABCADEAF', 'A', 1))
[1, 4, 7]
>>> list(iter_index(iter('AABCADEAF'), 'A', 1))
[1, 4, 7]
>>> list(iter_index('AABCADEAF', 'A', 2))
[4, 7]
>>> list(iter_index(iter('AABCADEAF'), 'A', 2))
[4, 7]
>>> list(iter_index('AABCADEAF', 'A', 10))
[]
>>> list(iter_index(iter('AABCADEAF'), 'A', 10))
[]
>>> list(sieve(30)) >>> list(sieve(30))
[2, 3, 5, 7, 11, 13, 17, 19, 23, 29] [2, 3, 5, 7, 11, 13, 17, 19, 23, 29]