Move itertools module from the sandbox and into production.
This commit is contained in:
parent
506be287aa
commit
96ef8115dd
|
@ -125,6 +125,7 @@ and how to embed it in other applications.
|
|||
\input{libheapq}
|
||||
\input{libarray}
|
||||
\input{libsets}
|
||||
\input{libitertools}
|
||||
\input{libcfgparser}
|
||||
\input{libfileinput}
|
||||
\input{libxreadlines}
|
||||
|
|
|
@ -0,0 +1,325 @@
|
|||
\section{\module{itertools} ---
|
||||
Functions creating iterators for efficient looping}
|
||||
|
||||
\declaremodule{standard}{itertools}
|
||||
\modulesynopsis{Functions creating iterators for efficient looping.}
|
||||
\moduleauthor{Raymond Hettinger}{python@rcn.com}
|
||||
\sectionauthor{Raymond Hettinger}{python@rcn.com}
|
||||
\versionadded{2.3}
|
||||
|
||||
|
||||
This module implements a number of iterator building blocks inspired
|
||||
by constructs from the Haskell and SML programming languages. Each
|
||||
has been recast in a form suitable for Python.
|
||||
|
||||
With the advent of iterators and generators in Python 2.3, each of
|
||||
these tools can be expressed easily and succinctly in pure python.
|
||||
Rather duplicating what can already be done, this module emphasizes
|
||||
providing value in other ways:
|
||||
|
||||
\begin{itemize}
|
||||
|
||||
\item Instead of constructing an over-specialized toolset, this module
|
||||
provides basic building blocks that can be readily combined.
|
||||
|
||||
For instance, SML provides a tabulation tool: \code{tabulate(\var{f})}
|
||||
which produces a sequence \code{f(0), f(1), ...}. This toolbox
|
||||
takes a different approach of providing \function{imap()} and
|
||||
\function{count()} which can be combined to form
|
||||
\code{imap(\var{f}, count())} and produce an equivalent result.
|
||||
|
||||
\item Some tools were dropped because they offer no advantage over their
|
||||
pure python counterparts or because their behavior was too
|
||||
surprising.
|
||||
|
||||
For instance, SML provides a tool: \code{cycle(\var{seq})} which
|
||||
loops over the sequence elements and then starts again when the
|
||||
sequence is exhausted. The surprising behavior is the need for
|
||||
significant auxiliary storage (unusual for iterators). Also, it
|
||||
is trivially implemented in python with almost no performance
|
||||
penalty.
|
||||
|
||||
\item Another source of value comes from standardizing a core set of tools
|
||||
to avoid the readability and reliability problems that arise when many
|
||||
different individuals create their own slightly varying implementations
|
||||
each with their own quirks and naming conventions.
|
||||
|
||||
\item Whether cast in pure python form or C code, tools that use iterators
|
||||
are more memory efficient (and faster) than their list based counterparts.
|
||||
Adopting the principles of just-in-time manufacturing, they create
|
||||
data when and where needed instead of consuming memory with the
|
||||
computer equivalent of ``inventory''.
|
||||
|
||||
\end{itemize}
|
||||
|
||||
\begin{seealso}
|
||||
\seetext{The Standard ML Basis Library,
|
||||
\citetitle[http://www.standardml.org/Basis/]
|
||||
{The Standard ML Basis Library}.}
|
||||
|
||||
\seetext{Haskell, A Purely Functional Language,
|
||||
\citetitle[http://www.haskell.org/definition/]
|
||||
{Definition of Haskell and the Standard Libraries}.}
|
||||
\end{seealso}
|
||||
|
||||
|
||||
\subsection{Itertool functions \label{itertools-functions}}
|
||||
|
||||
The following module functions all construct and return iterators.
|
||||
Some provide streams of infinite length, so they should only be accessed
|
||||
by functions or loops that truncate the stream.
|
||||
|
||||
\begin{funcdesc}{count}{\optional{n}}
|
||||
Make an iterator that returns consecutive integers starting with \var{n}.
|
||||
Does not currently support python long integers. Often used as an
|
||||
argument to \function{imap()} to generate consecutive data points.
|
||||
Also, used in \function{izip()} to add sequence numbers. Equivalent to:
|
||||
|
||||
\begin{verbatim}
|
||||
def count(n=0):
|
||||
cnt = n
|
||||
while True:
|
||||
yield cnt
|
||||
cnt += 1
|
||||
\end{verbatim}
|
||||
\end{funcdesc}
|
||||
|
||||
\begin{funcdesc}{dropwhile}{predicate, iterable}
|
||||
Make an iterator that drops elements from the iterable as long as
|
||||
the predicate is true; afterwards, returns every element. Note,
|
||||
the iterator does not produce \emph{any} output until the predicate
|
||||
is true, so it may have a lengthy start-up time. Equivalent to:
|
||||
|
||||
\begin{verbatim}
|
||||
def dropwhile(predicate, iterable):
|
||||
iterable = iter(iterable)
|
||||
while True:
|
||||
x = iterable.next()
|
||||
if predicate(x): continue # drop when predicate is true
|
||||
yield x
|
||||
break
|
||||
while True:
|
||||
yield iterable.next()
|
||||
\end{verbatim}
|
||||
\end{funcdesc}
|
||||
|
||||
\begin{funcdesc}{ifilter}{predicate, iterable \optional{, invert}}
|
||||
Make an iterator that filters elements from iterable returning only
|
||||
those for which the predicate is \code{True}. If
|
||||
\var{invert} is \code{True}, then reverse the process and pass through
|
||||
only those elements for which the predicate is \code{False}.
|
||||
If \var{predicate} is \code{None}, return the items that are true
|
||||
(or false if \var{invert} has been set). Equivalent to:
|
||||
|
||||
\begin{verbatim}
|
||||
def ifilter(predicate, iterable, invert=False):
|
||||
iterable = iter(iterable)
|
||||
while True:
|
||||
x = iterable.next()
|
||||
if predicate is None:
|
||||
b = bool(x)
|
||||
else:
|
||||
b = bool(predicate(x))
|
||||
if not invert and b or invert and not b:
|
||||
yield x
|
||||
\end{verbatim}
|
||||
\end{funcdesc}
|
||||
|
||||
\begin{funcdesc}{imap}{function, *iterables}
|
||||
Make an iterator that computes the function using arguments from
|
||||
each of the iterables. If \var{function} is set to \code{None}, then
|
||||
\function{imap()} returns the arguments as a tuple. Like
|
||||
\function{map()} but stops when the shortest iterable is exhausted
|
||||
instead of filling in \code{None} for shorter iterables. The reason
|
||||
for the difference is that infinite iterator arguments are typically
|
||||
an error for \function{map()} (because the output is fully evaluated)
|
||||
but represent a common and useful way of supplying arguments to
|
||||
\function{imap()}.
|
||||
Equivalent to:
|
||||
|
||||
\begin{verbatim}
|
||||
def imap(function, *iterables):
|
||||
iterables = map(iter, iterables)
|
||||
while True:
|
||||
args = [i.next() for i in iterables]
|
||||
if function is None:
|
||||
yield tuple(args)
|
||||
else:
|
||||
yield function(*args)
|
||||
\end{verbatim}
|
||||
\end{funcdesc}
|
||||
|
||||
\begin{funcdesc}{islice}{iterable, \optional{start,} stop \optional{, step}}
|
||||
Make an iterator that returns selected elements from the iterable.
|
||||
If \var{start} is non-zero, then elements from the iterable are skipped
|
||||
until start is reached. Afterward, elements are returned consecutively
|
||||
unless \var{step} is set higher than one which results in items being
|
||||
skipped. If \var{stop} is specified, then iteration stops at the
|
||||
specified element position; otherwise, it continues indefinitely or
|
||||
until the iterable is exhausted. Unlike regular slicing,
|
||||
\function{islice()} does not support negative values for \var{start},
|
||||
\var{stop}, or \var{step}. Can be used to extract related fields
|
||||
from data where the internal structure has been flattened (for
|
||||
example, a multi-line report may list a name field on every
|
||||
third line). Equivalent to:
|
||||
|
||||
\begin{verbatim}
|
||||
def islice(iterable, *args):
|
||||
iterable = iter(iterable)
|
||||
s = slice(*args)
|
||||
next = s.start or 0
|
||||
stop = s.stop
|
||||
step = s.step or 1
|
||||
cnt = 0
|
||||
while True:
|
||||
while cnt < next:
|
||||
dummy = iterable.next()
|
||||
cnt += 1
|
||||
if cnt >= stop:
|
||||
break
|
||||
yield iterable.next()
|
||||
cnt += 1
|
||||
next += step
|
||||
\end{verbatim}
|
||||
\end{funcdesc}
|
||||
|
||||
\begin{funcdesc}{izip}{*iterables}
|
||||
Make an iterator that aggregates elements from each of the iterables.
|
||||
Like \function{zip()} except that it returns an iterator instead of
|
||||
a list. Used for lock-step iteration over several iterables at a
|
||||
time. Equivalent to:
|
||||
|
||||
\begin{verbatim}
|
||||
def izip(*iterables):
|
||||
iterables = map(iter, iterables)
|
||||
while True:
|
||||
result = [i.next() for i in iterables]
|
||||
yield tuple(result)
|
||||
\end{verbatim}
|
||||
\end{funcdesc}
|
||||
|
||||
\begin{funcdesc}{repeat}{obj}
|
||||
Make an iterator that returns \var{obj} over and over again.
|
||||
Used as argument to \function{imap()} for invariant parameters
|
||||
to the called function. Also used with function{izip()} to create
|
||||
an invariant part of a tuple record. Equivalent to:
|
||||
|
||||
\begin{verbatim}
|
||||
def repeat(x):
|
||||
while True:
|
||||
yield x
|
||||
\end{verbatim}
|
||||
\end{funcdesc}
|
||||
|
||||
\begin{funcdesc}{starmap}{function, iterable}
|
||||
Make an iterator that computes the function using arguments tuples
|
||||
obtained from the iterable. Used instead of \function{imap()} when
|
||||
argument parameters are already grouped in tuples from a single iterable
|
||||
(the data has been ``pre-zipped''). The difference between
|
||||
\function{imap()} and \function{starmap} parallels the distinction
|
||||
between \code{function(a,b)} and \code{function(*c)}.
|
||||
Equivalent to:
|
||||
|
||||
\begin{verbatim}
|
||||
def starmap(function, iterable):
|
||||
iterable = iter(iterable)
|
||||
while True:
|
||||
yield function(*iterable.next())
|
||||
\end{verbatim}
|
||||
\end{funcdesc}
|
||||
|
||||
\begin{funcdesc}{takewhile}{predicate, iterable}
|
||||
Make an iterator that returns elements from the iterable as long as
|
||||
the predicate is true. Equivalent to:
|
||||
|
||||
\begin{verbatim}
|
||||
def takewhile(predicate, iterable):
|
||||
iterable = iter(iterable)
|
||||
while True:
|
||||
x = iterable.next()
|
||||
if predicate(x):
|
||||
yield x
|
||||
else:
|
||||
break
|
||||
\end{verbatim}
|
||||
\end{funcdesc}
|
||||
|
||||
\begin{funcdesc}{times}{n, \optional{object}}
|
||||
Make an iterator that returns \var{object} \var{n} times.
|
||||
\var{object} defaults to \code{None}. Used for looping a specific
|
||||
number of times without creating a number object on each pass.
|
||||
Equivalent to:
|
||||
|
||||
\begin{verbatim}
|
||||
def times(n, object=None):
|
||||
if n<0 : raise ValueError
|
||||
for i in xrange(n):
|
||||
yield object
|
||||
\end{verbatim}
|
||||
\end{funcdesc}
|
||||
|
||||
|
||||
\subsection{Examples \label{itertools-example}}
|
||||
|
||||
The following examples show common uses for each tool and
|
||||
demonstrate ways they can be combined.
|
||||
|
||||
\begin{verbatim}
|
||||
>>> for i in times(3):
|
||||
... print "Hello"
|
||||
...
|
||||
Hello
|
||||
Hello
|
||||
Hello
|
||||
|
||||
>>> amounts = [120.15, 764.05, 823.14]
|
||||
>>> for checknum, amount in izip(count(1200), amounts):
|
||||
... print 'Check %d is for $%.2f' % (checknum, amount)
|
||||
...
|
||||
Check 1200 is for $120.15
|
||||
Check 1201 is for $764.05
|
||||
Check 1202 is for $823.14
|
||||
|
||||
>>> import operator
|
||||
>>> for cube in imap(operator.pow, xrange(1,4), repeat(3)):
|
||||
... print cube
|
||||
...
|
||||
1
|
||||
8
|
||||
27
|
||||
|
||||
>>> reportlines = ['EuroPython', 'Roster', '', 'alex', '', 'laura',
|
||||
'', 'martin', '', 'walter', '', 'samuele']
|
||||
>>> for name in islice(reportlines, 3, len(reportlines), 2):
|
||||
... print name.title()
|
||||
...
|
||||
Alex
|
||||
Laura
|
||||
Martin
|
||||
Walter
|
||||
Samuele
|
||||
|
||||
\end{verbatim}
|
||||
|
||||
This section has further examples of how itertools can be combined.
|
||||
Note that \function{enumerate()} and \method{iteritems()} already
|
||||
have highly efficient implementations in Python. They are only
|
||||
included here to illustrate how higher level tools can be created
|
||||
from building blocks.
|
||||
|
||||
\begin{verbatim}
|
||||
>>> def enumerate(iterable):
|
||||
... return izip(count(), iterable)
|
||||
|
||||
>>> def tabulate(function):
|
||||
... "Return function(0), function(1), ..."
|
||||
... return imap(function, count())
|
||||
|
||||
>>> def iteritems(mapping):
|
||||
... return izip(mapping.iterkeys(), mapping.itervalues())
|
||||
|
||||
>>> def nth(iterable, n):
|
||||
... "Returns the nth item"
|
||||
... return islice(iterable, n, n+1).next()
|
||||
|
||||
\end{verbatim}
|
|
@ -0,0 +1,158 @@
|
|||
import unittest
|
||||
from test import test_support
|
||||
from itertools import *
|
||||
|
||||
class TestBasicOps(unittest.TestCase):
|
||||
def test_count(self):
|
||||
self.assertEqual(zip('abc',count()), [('a', 0), ('b', 1), ('c', 2)])
|
||||
self.assertEqual(zip('abc',count(3)), [('a', 3), ('b', 4), ('c', 5)])
|
||||
self.assertRaises(TypeError, count, 2, 3)
|
||||
|
||||
def test_ifilter(self):
|
||||
def isEven(x):
|
||||
return x%2==0
|
||||
self.assertEqual(list(ifilter(isEven, range(6))), [0,2,4])
|
||||
self.assertEqual(list(ifilter(isEven, range(6), True)), [1,3,5])
|
||||
self.assertEqual(list(ifilter(None, [0,1,0,2,0])), [1,2])
|
||||
self.assertRaises(TypeError, ifilter)
|
||||
self.assertRaises(TypeError, ifilter, 3)
|
||||
self.assertRaises(TypeError, ifilter, isEven, 3)
|
||||
self.assertRaises(TypeError, ifilter, isEven, [3], True, 4)
|
||||
|
||||
def test_izip(self):
|
||||
ans = [(x,y) for x, y in izip('abc',count())]
|
||||
self.assertEqual(ans, [('a', 0), ('b', 1), ('c', 2)])
|
||||
self.assertRaises(TypeError, izip)
|
||||
|
||||
def test_repeat(self):
|
||||
self.assertEqual(zip(xrange(3),repeat('a')),
|
||||
[(0, 'a'), (1, 'a'), (2, 'a')])
|
||||
self.assertRaises(TypeError, repeat)
|
||||
|
||||
def test_times(self):
|
||||
self.assertEqual(list(times(3)), [None]*3)
|
||||
self.assertEqual(list(times(3, True)), [True]*3)
|
||||
self.assertRaises(ValueError, times, -1)
|
||||
|
||||
def test_imap(self):
|
||||
import operator
|
||||
self.assertEqual(list(imap(operator.pow, range(3), range(1,7))),
|
||||
[0**1, 1**2, 2**3])
|
||||
self.assertEqual(list(imap(None, 'abc', range(5))),
|
||||
[('a',0),('b',1),('c',2)])
|
||||
self.assertRaises(TypeError, imap)
|
||||
self.assertRaises(TypeError, imap, operator.neg)
|
||||
|
||||
def test_starmap(self):
|
||||
import operator
|
||||
self.assertEqual(list(starmap(operator.pow, zip(range(3), range(1,7)))),
|
||||
[0**1, 1**2, 2**3])
|
||||
|
||||
def test_islice(self):
|
||||
for args in [ # islice(args) should agree with range(args)
|
||||
(10, 20, 3),
|
||||
(10, 3, 20),
|
||||
(10, 20),
|
||||
(10, 3),
|
||||
(20,)
|
||||
]:
|
||||
self.assertEqual(list(islice(xrange(100), *args)), range(*args))
|
||||
|
||||
for args, tgtargs in [ # Stop when seqn is exhausted
|
||||
((10, 110, 3), ((10, 100, 3))),
|
||||
((10, 110), ((10, 100))),
|
||||
((110,), (100,))
|
||||
]:
|
||||
self.assertEqual(list(islice(xrange(100), *args)), range(*tgtargs))
|
||||
|
||||
self.assertRaises(TypeError, islice, xrange(10))
|
||||
self.assertRaises(TypeError, islice, xrange(10), 1, 2, 3, 4)
|
||||
self.assertRaises(ValueError, islice, xrange(10), -5, 10, 1)
|
||||
self.assertRaises(ValueError, islice, xrange(10), 1, -5, -1)
|
||||
self.assertRaises(ValueError, islice, xrange(10), 1, 10, -1)
|
||||
self.assertRaises(ValueError, islice, xrange(10), 1, 10, 0)
|
||||
|
||||
def test_takewhile(self):
|
||||
data = [1, 3, 5, 20, 2, 4, 6, 8]
|
||||
underten = lambda x: x<10
|
||||
self.assertEqual(list(takewhile(underten, data)), [1, 3, 5])
|
||||
|
||||
def test_dropwhile(self):
|
||||
data = [1, 3, 5, 20, 2, 4, 6, 8]
|
||||
underten = lambda x: x<10
|
||||
self.assertEqual(list(dropwhile(underten, data)), [20, 2, 4, 6, 8])
|
||||
|
||||
libreftest = """ Doctest for examples in the library reference, libitertools.tex
|
||||
|
||||
>>> for i in times(3):
|
||||
... print "Hello"
|
||||
...
|
||||
Hello
|
||||
Hello
|
||||
Hello
|
||||
|
||||
>>> amounts = [120.15, 764.05, 823.14]
|
||||
>>> for checknum, amount in izip(count(1200), amounts):
|
||||
... print 'Check %d is for $%.2f' % (checknum, amount)
|
||||
...
|
||||
Check 1200 is for $120.15
|
||||
Check 1201 is for $764.05
|
||||
Check 1202 is for $823.14
|
||||
|
||||
>>> import operator
|
||||
>>> import operator
|
||||
>>> for cube in imap(operator.pow, xrange(1,4), repeat(3)):
|
||||
... print cube
|
||||
...
|
||||
1
|
||||
8
|
||||
27
|
||||
|
||||
>>> reportlines = ['EuroPython', 'Roster', '', 'alex', '', 'laura', '', 'martin', '', 'walter', '', 'samuele']
|
||||
>>> for name in islice(reportlines, 3, len(reportlines), 2):
|
||||
... print name.title()
|
||||
...
|
||||
Alex
|
||||
Laura
|
||||
Martin
|
||||
Walter
|
||||
Samuele
|
||||
|
||||
>>> def enumerate(iterable):
|
||||
... return izip(count(), iterable)
|
||||
|
||||
>>> def tabulate(function):
|
||||
... "Return function(0), function(1), ..."
|
||||
... return imap(function, count())
|
||||
|
||||
>>> def iteritems(mapping):
|
||||
... return izip(mapping.iterkeys(), mapping.itervalues())
|
||||
|
||||
>>> def nth(iterable, n):
|
||||
... "Returns the nth item"
|
||||
... return islice(iterable, n, n+1).next()
|
||||
|
||||
"""
|
||||
|
||||
__test__ = {'libreftest' : libreftest}
|
||||
|
||||
def test_main(verbose=None):
|
||||
import test_itertools
|
||||
suite = unittest.TestSuite()
|
||||
for testclass in (TestBasicOps,
|
||||
):
|
||||
suite.addTest(unittest.makeSuite(testclass))
|
||||
test_support.run_suite(suite)
|
||||
test_support.run_doctest(test_itertools, verbose)
|
||||
|
||||
# verify reference counting
|
||||
import sys
|
||||
if verbose and hasattr(sys, "gettotalrefcount"):
|
||||
counts = []
|
||||
for i in xrange(5):
|
||||
test_support.run_suite(suite)
|
||||
counts.append(sys.gettotalrefcount())
|
||||
print counts
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_main(verbose=True)
|
|
@ -30,6 +30,9 @@ Core and builtins
|
|||
Extension modules
|
||||
-----------------
|
||||
|
||||
- Added an itertools module containing high speed, memory efficient
|
||||
looping constructs inspired by tools from Haskell and SML.
|
||||
|
||||
- The SSL module now handles sockets with a timeout set correctly (SF
|
||||
patch #675750, fixing SF bug #675552).
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -45,6 +45,7 @@ extern void init_hotshot(void);
|
|||
extern void initxxsubtype(void);
|
||||
extern void initzipimport(void);
|
||||
extern void init_random(void);
|
||||
extern void inititertools(void);
|
||||
|
||||
/* XXX tim: what's the purpose of ADDMODULE MARKER? */
|
||||
/* -- ADDMODULE MARKER 1 -- */
|
||||
|
@ -97,6 +98,7 @@ struct _inittab _PyImport_Inittab[] = {
|
|||
{"_weakref", init_weakref},
|
||||
{"_hotshot", init_hotshot},
|
||||
{"_random", init_random},
|
||||
{"itertools", inititertools},
|
||||
|
||||
{"xxsubtype", initxxsubtype},
|
||||
{"zipimport", initzipimport},
|
||||
|
|
|
@ -323,6 +323,10 @@ SOURCE=..\Objects\iterobject.c
|
|||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\Modules\itertoolsmodule.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\Parser\listnode.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
|
2
setup.py
2
setup.py
|
@ -324,6 +324,8 @@ class PyBuildExt(build_ext):
|
|||
libraries=math_libs) )
|
||||
# random number generator implemented in C
|
||||
exts.append( Extension("_random", ["_randommodule.c"]) )
|
||||
# fast iterator tools implemented in C
|
||||
exts.append( Extension("itertools", ["itertoolsmodule.c"]) )
|
||||
# operator.add() and similar goodies
|
||||
exts.append( Extension('operator', ['operator.c']) )
|
||||
# Python C API test module
|
||||
|
|
Loading…
Reference in New Issue