Move itertools module from the sandbox and into production.

2003-02-01 00:10:11 +00:00 · 2003-02-01 00:10:11 +00:00 · 96ef8115dd
parent 506be287aa
commit 96ef8115dd
8 changed files with 2027 additions and 0 deletions
--- a/Doc/lib/lib.tex
+++ b/Doc/lib/lib.tex
@ -125,6 +125,7 @@ and how to embed it in other applications.
 \input{libheapq}
 \input{libarray}
 \input{libsets}
+\input{libitertools}
 \input{libcfgparser}
 \input{libfileinput}
 \input{libxreadlines}
--- a/Doc/lib/libitertools.tex
+++ b/Doc/lib/libitertools.tex
@ -0,0 +1,325 @@
+\section{\module{itertools} ---
+         Functions creating iterators for efficient looping}
+
+\declaremodule{standard}{itertools}
+\modulesynopsis{Functions creating iterators for efficient looping.}
+\moduleauthor{Raymond Hettinger}{python@rcn.com}
+\sectionauthor{Raymond Hettinger}{python@rcn.com}
+\versionadded{2.3}
+
+
+This module implements a number of iterator building blocks inspired
+by constructs from the Haskell and SML programming languages.  Each
+has been recast in a form suitable for Python.
+
+With the advent of iterators and generators in Python 2.3, each of
+these tools can be expressed easily and succinctly in pure python.
+Rather duplicating what can already be done, this module emphasizes
+providing value in other ways:
+
+\begin{itemize}
+
+    \item Instead of constructing an over-specialized toolset, this module
+        provides basic building blocks that can be readily combined.
+
+        For instance, SML provides a tabulation tool: \code{tabulate(\var{f})}
+        which produces a sequence \code{f(0), f(1), ...}.  This toolbox
+        takes a different approach of providing \function{imap()} and
+        \function{count()} which can be combined to form
+        \code{imap(\var{f}, count())} and produce an equivalent result.
+
+    \item Some tools were dropped because they offer no advantage over their
+        pure python counterparts or because their behavior was too
+        surprising.
+
+        For instance, SML provides a tool:  \code{cycle(\var{seq})} which
+        loops over the sequence elements and then starts again when the
+        sequence is exhausted.  The surprising behavior is the need for
+        significant auxiliary storage (unusual for iterators).  Also, it
+        is trivially implemented in python with almost no performance
+        penalty.
+
+    \item Another source of value comes from standardizing a core set of tools
+        to avoid the readability and reliability problems that arise when many
+        different individuals create their own slightly varying implementations
+        each with their own quirks and naming conventions.
+
+    \item Whether cast in pure python form or C code, tools that use iterators
+        are more memory efficient (and faster) than their list based counterparts.
+        Adopting the principles of just-in-time manufacturing, they create
+        data when and where needed instead of consuming memory with the
+        computer equivalent of ``inventory''.
+
+\end{itemize}
+
+\begin{seealso}
+  \seetext{The Standard ML Basis Library,
+           \citetitle[http://www.standardml.org/Basis/]
+           {The Standard ML Basis Library}.}
+
+  \seetext{Haskell, A Purely Functional Language,
+           \citetitle[http://www.haskell.org/definition/]
+           {Definition of Haskell and the Standard Libraries}.}
+\end{seealso}
+
+
+\subsection{Itertool functions \label{itertools-functions}}
+
+The following module functions all construct and return iterators.
+Some provide streams of infinite length, so they should only be accessed
+by functions or loops that truncate the stream.
+
+\begin{funcdesc}{count}{\optional{n}}
+  Make an iterator that returns consecutive integers starting with \var{n}.
+  Does not currently support python long integers.  Often used as an
+  argument to \function{imap()} to generate consecutive data points.
+  Also, used in \function{izip()} to add sequence numbers.  Equivalent to:
+
+  \begin{verbatim}
+     def count(n=0):
+         cnt = n
+         while True:
+             yield cnt
+             cnt += 1
+  \end{verbatim}
+\end{funcdesc}
+
+\begin{funcdesc}{dropwhile}{predicate, iterable}
+  Make an iterator that drops elements from the iterable as long as
+  the predicate is true; afterwards, returns every element.  Note,
+  the iterator does not produce \emph{any} output until the predicate
+  is true, so it may have a lengthy start-up time.  Equivalent to:
+
+  \begin{verbatim}
+     def dropwhile(predicate, iterable):
+         iterable = iter(iterable)
+         while True:
+             x = iterable.next()
+             if predicate(x): continue # drop when predicate is true
+             yield x
+             break
+         while True:
+             yield iterable.next()
+  \end{verbatim}
+\end{funcdesc}
+
+\begin{funcdesc}{ifilter}{predicate, iterable \optional{, invert}}
+  Make an iterator that filters elements from iterable returning only
+  those for which the predicate is \code{True}.  If
+  \var{invert} is \code{True}, then reverse the process and pass through
+  only those elements for which the predicate is \code{False}.
+  If \var{predicate} is \code{None}, return the items that are true
+  (or false if \var{invert} has been set).  Equivalent to:
+
+  \begin{verbatim}
+     def ifilter(predicate, iterable, invert=False):
+         iterable = iter(iterable)
+         while True:
+             x = iterable.next()
+             if predicate is None:
+                  b = bool(x)
+             else:
+                  b = bool(predicate(x))
+             if not invert and b or invert and not b:
+                 yield x
+  \end{verbatim}
+\end{funcdesc}
+
+\begin{funcdesc}{imap}{function, *iterables}
+  Make an iterator that computes the function using arguments from
+  each of the iterables.  If \var{function} is set to \code{None}, then
+  \function{imap()} returns the arguments as a tuple.  Like
+  \function{map()} but stops when the shortest iterable is exhausted
+  instead of filling in \code{None} for shorter iterables.  The reason
+  for the difference is that infinite iterator arguments are typically
+  an error for \function{map()} (because the output is fully evaluated)
+  but represent a common and useful way of supplying arguments to
+  \function{imap()}.
+  Equivalent to:
+
+  \begin{verbatim}
+     def imap(function, *iterables):
+         iterables = map(iter, iterables)
+         while True:
+             args = [i.next() for i in iterables]
+             if function is None:
+                 yield tuple(args)
+             else:
+                 yield function(*args)
+  \end{verbatim}
+\end{funcdesc}
+
+\begin{funcdesc}{islice}{iterable, \optional{start,} stop \optional{, step}}
+  Make an iterator that returns selected elements from the iterable.
+  If \var{start} is non-zero, then elements from the iterable are skipped
+  until start is reached.  Afterward, elements are returned consecutively
+  unless \var{step} is set higher than one which results in items being
+  skipped.  If \var{stop} is specified, then iteration stops at the
+  specified element position; otherwise, it continues indefinitely or
+  until the iterable is exhausted.  Unlike regular slicing,
+  \function{islice()} does not support negative values for \var{start},
+  \var{stop}, or \var{step}.  Can be used to extract related fields
+  from data where the internal structure has been flattened (for
+  example, a multi-line report may list a name field on every
+  third line).  Equivalent to:
+
+  \begin{verbatim}
+     def islice(iterable, *args):
+         iterable = iter(iterable)
+         s = slice(*args)
+         next = s.start or 0
+         stop = s.stop
+         step = s.step or 1
+         cnt = 0
+         while True:
+              while cnt < next:
+                   dummy = iterable.next()
+                   cnt += 1
+              if cnt >= stop:
+                   break
+              yield iterable.next()
+              cnt += 1
+              next += step
+  \end{verbatim}
+\end{funcdesc}
+
+\begin{funcdesc}{izip}{*iterables}
+  Make an iterator that aggregates elements from each of the iterables.
+  Like \function{zip()} except that it returns an iterator instead of
+  a list.  Used for lock-step iteration over several iterables at a
+  time.  Equivalent to:
+
+  \begin{verbatim}
+     def izip(*iterables):
+         iterables = map(iter, iterables)
+         while True:
+             result = [i.next() for i in iterables]
+             yield tuple(result)
+  \end{verbatim}
+\end{funcdesc}
+
+\begin{funcdesc}{repeat}{obj}
+  Make an iterator that returns \var{obj} over and over again.
+  Used as argument to \function{imap()} for invariant parameters
+  to the called function.  Also used with function{izip()} to create
+  an invariant part of a tuple record.  Equivalent to:
+
+  \begin{verbatim}
+     def repeat(x):
+         while True:
+             yield x
+  \end{verbatim}
+\end{funcdesc}
+
+\begin{funcdesc}{starmap}{function, iterable}
+  Make an iterator that computes the function using arguments tuples
+  obtained from the iterable.  Used instead of \function{imap()} when
+  argument parameters are already grouped in tuples from a single iterable
+  (the data has been ``pre-zipped'').  The difference between
+  \function{imap()} and \function{starmap} parallels the distinction
+  between \code{function(a,b)} and \code{function(*c)}.
+  Equivalent to:
+
+  \begin{verbatim}
+     def starmap(function, iterable):
+         iterable = iter(iterable)
+         while True:
+             yield function(*iterable.next())
+  \end{verbatim}
+\end{funcdesc}
+
+\begin{funcdesc}{takewhile}{predicate, iterable}
+  Make an iterator that returns elements from the iterable as long as
+  the predicate is true.  Equivalent to:
+
+  \begin{verbatim}
+     def takewhile(predicate, iterable):
+         iterable = iter(iterable)
+         while True:
+             x = iterable.next()
+             if predicate(x):
+                 yield x
+             else:
+                 break
+  \end{verbatim}
+\end{funcdesc}
+
+\begin{funcdesc}{times}{n, \optional{object}}
+  Make an iterator that returns \var{object} \var{n} times.
+  \var{object} defaults to \code{None}.  Used for looping a specific
+  number of times without creating a number object on each pass.
+  Equivalent to:
+
+  \begin{verbatim}
+     def times(n, object=None):
+         if n<0 : raise ValueError
+         for i in xrange(n):
+             yield object
+  \end{verbatim}
+\end{funcdesc}
+
+
+\subsection{Examples \label{itertools-example}}
+
+The following examples show common uses for each tool and
+demonstrate ways they can be combined.
+
+\begin{verbatim}
+>>> for i in times(3):
+...     print "Hello"
+...
+Hello
+Hello
+Hello
+
+>>> amounts = [120.15, 764.05, 823.14]
+>>> for checknum, amount in izip(count(1200), amounts):
+...     print 'Check %d is for $%.2f' % (checknum, amount)
+...
+Check 1200 is for $120.15
+Check 1201 is for $764.05
+Check 1202 is for $823.14
+
+>>> import operator
+>>> for cube in imap(operator.pow, xrange(1,4), repeat(3)):
+...    print cube
+...
+1
+8
+27
+
+>>> reportlines = ['EuroPython', 'Roster', '', 'alex', '', 'laura',
+                  '', 'martin', '', 'walter', '', 'samuele']
+>>> for name in islice(reportlines, 3, len(reportlines), 2):
+...    print name.title()
+...
+Alex
+Laura
+Martin
+Walter
+Samuele
+
+\end{verbatim}
+
+This section has further examples of how itertools can be combined.
+Note that \function{enumerate()} and \method{iteritems()} already
+have highly efficient implementations in Python.  They are only
+included here to illustrate how higher level tools can be created
+from building blocks.
+
+\begin{verbatim}
+>>> def enumerate(iterable):
+...     return izip(count(), iterable)
+
+>>> def tabulate(function):
+...     "Return function(0), function(1), ..."
+...     return imap(function, count())
+
+>>> def iteritems(mapping):
+...     return izip(mapping.iterkeys(), mapping.itervalues())
+
+>>> def nth(iterable, n):
+...     "Returns the nth item"
+...     return islice(iterable, n, n+1).next()
+
+\end{verbatim}
--- a/Lib/test/test_itertools.py
+++ b/Lib/test/test_itertools.py
@ -0,0 +1,158 @@
+import unittest
+from test import test_support
+from itertools import *
+
+class TestBasicOps(unittest.TestCase):
+    def test_count(self):
+        self.assertEqual(zip('abc',count()), [('a', 0), ('b', 1), ('c', 2)])
+        self.assertEqual(zip('abc',count(3)), [('a', 3), ('b', 4), ('c', 5)])
+        self.assertRaises(TypeError, count, 2, 3)
+
+    def test_ifilter(self):
+        def isEven(x):
+            return x%2==0
+        self.assertEqual(list(ifilter(isEven, range(6))), [0,2,4])
+        self.assertEqual(list(ifilter(isEven, range(6), True)), [1,3,5])
+        self.assertEqual(list(ifilter(None, [0,1,0,2,0])), [1,2])
+        self.assertRaises(TypeError, ifilter)
+        self.assertRaises(TypeError, ifilter, 3)
+        self.assertRaises(TypeError, ifilter, isEven, 3)
+        self.assertRaises(TypeError, ifilter, isEven, [3], True, 4)
+
+    def test_izip(self):
+        ans = [(x,y) for x, y in izip('abc',count())]
+        self.assertEqual(ans, [('a', 0), ('b', 1), ('c', 2)])
+        self.assertRaises(TypeError, izip)
+
+    def test_repeat(self):
+        self.assertEqual(zip(xrange(3),repeat('a')),
+                         [(0, 'a'), (1, 'a'), (2, 'a')])
+        self.assertRaises(TypeError, repeat)
+
+    def test_times(self):
+        self.assertEqual(list(times(3)), [None]*3)
+        self.assertEqual(list(times(3, True)), [True]*3)
+        self.assertRaises(ValueError, times, -1)
+
+    def test_imap(self):
+        import operator
+        self.assertEqual(list(imap(operator.pow, range(3), range(1,7))),
+                         [0**1, 1**2, 2**3])
+        self.assertEqual(list(imap(None, 'abc', range(5))),
+                         [('a',0),('b',1),('c',2)])
+        self.assertRaises(TypeError, imap)
+        self.assertRaises(TypeError, imap, operator.neg)
+
+    def test_starmap(self):
+        import operator
+        self.assertEqual(list(starmap(operator.pow, zip(range(3), range(1,7)))),
+                         [0**1, 1**2, 2**3])
+
+    def test_islice(self):
+        for args in [          # islice(args) should agree with range(args)
+                (10, 20, 3),
+                (10, 3, 20),
+                (10, 20),
+                (10, 3),
+                (20,)
+                ]:
+            self.assertEqual(list(islice(xrange(100), *args)), range(*args))
+
+        for args, tgtargs in [  # Stop when seqn is exhausted
+                ((10, 110, 3), ((10, 100, 3))),
+                ((10, 110), ((10, 100))),
+                ((110,), (100,))
+                ]:
+            self.assertEqual(list(islice(xrange(100), *args)), range(*tgtargs))
+
+        self.assertRaises(TypeError, islice, xrange(10))
+        self.assertRaises(TypeError, islice, xrange(10), 1, 2, 3, 4)
+        self.assertRaises(ValueError, islice, xrange(10), -5, 10, 1)
+        self.assertRaises(ValueError, islice, xrange(10), 1, -5, -1)
+        self.assertRaises(ValueError, islice, xrange(10), 1, 10, -1)
+        self.assertRaises(ValueError, islice, xrange(10), 1, 10, 0)
+
+    def test_takewhile(self):
+        data = [1, 3, 5, 20, 2, 4, 6, 8]
+        underten = lambda x: x<10
+        self.assertEqual(list(takewhile(underten, data)), [1, 3, 5])
+
+    def test_dropwhile(self):
+        data = [1, 3, 5, 20, 2, 4, 6, 8]
+        underten = lambda x: x<10
+        self.assertEqual(list(dropwhile(underten, data)), [20, 2, 4, 6, 8])
+
+libreftest = """ Doctest for examples in the library reference, libitertools.tex
+
+>>> for i in times(3):
+...     print "Hello"
+...
+Hello
+Hello
+Hello
+
+>>> amounts = [120.15, 764.05, 823.14]
+>>> for checknum, amount in izip(count(1200), amounts):
+...     print 'Check %d is for $%.2f' % (checknum, amount)
+...
+Check 1200 is for $120.15
+Check 1201 is for $764.05
+Check 1202 is for $823.14
+
+>>> import operator
+>>> import operator
+>>> for cube in imap(operator.pow, xrange(1,4), repeat(3)):
+...    print cube
+...
+1
+8
+27
+
+>>> reportlines = ['EuroPython', 'Roster', '', 'alex', '', 'laura', '', 'martin', '', 'walter', '', 'samuele']
+>>> for name in islice(reportlines, 3, len(reportlines), 2):
+...    print name.title()
+...
+Alex
+Laura
+Martin
+Walter
+Samuele
+
+>>> def enumerate(iterable):
+...     return izip(count(), iterable)
+
+>>> def tabulate(function):
+...     "Return function(0), function(1), ..."
+...     return imap(function, count())
+
+>>> def iteritems(mapping):
+...     return izip(mapping.iterkeys(), mapping.itervalues())
+
+>>> def nth(iterable, n):
+...     "Returns the nth item"
+...     return islice(iterable, n, n+1).next()
+
+"""
+
+__test__ = {'libreftest' : libreftest}
+
+def test_main(verbose=None):
+    import test_itertools
+    suite = unittest.TestSuite()
+    for testclass in (TestBasicOps,
+                          ):
+        suite.addTest(unittest.makeSuite(testclass))
+    test_support.run_suite(suite)
+    test_support.run_doctest(test_itertools, verbose)
+
+    # verify reference counting
+    import sys
+    if verbose and hasattr(sys, "gettotalrefcount"):
+        counts = []
+        for i in xrange(5):
+            test_support.run_suite(suite)
+            counts.append(sys.gettotalrefcount())
+        print counts
+
+if __name__ == "__main__":
+    test_main(verbose=True)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@ -30,6 +30,9 @@ Core and builtins
 Extension modules
 -----------------

+- Added an itertools module containing high speed, memory efficient
+  looping constructs inspired by tools from Haskell and SML.
+
 - The SSL module now handles sockets with a timeout set correctly (SF
  patch #675750, fixing SF bug #675552).

--- a/Modules/itertoolsmodule.c
+++ b/Modules/itertoolsmodule.c
--- a/PC/config.c
+++ b/PC/config.c
@ -45,6 +45,7 @@ extern void init_hotshot(void);
 extern void initxxsubtype(void);
 extern void initzipimport(void);
 extern void init_random(void);
+extern void inititertools(void);

 /* XXX tim: what's the purpose of ADDMODULE MARKER? */
 /* -- ADDMODULE MARKER 1 -- */
@ -97,6 +98,7 @@ struct _inittab _PyImport_Inittab[] = {
 	{"_weakref", init_weakref},
 	{"_hotshot", init_hotshot},
 	{"_random", init_random},
+	{"itertools", inititertools},

 	{"xxsubtype", initxxsubtype},
 	{"zipimport", initzipimport},
--- a/PCbuild/pythoncore.dsp
+++ b/PCbuild/pythoncore.dsp
@ -323,6 +323,10 @@ SOURCE=..\Objects\iterobject.c
 # End Source File
 # Begin Source File

+SOURCE=..\Modules\itertoolsmodule.c
+# End Source File
+# Begin Source File
+
 SOURCE=..\Parser\listnode.c
 # End Source File
 # Begin Source File
--- a/setup.py
+++ b/setup.py
@ -324,6 +324,8 @@ class PyBuildExt(build_ext):
                               libraries=math_libs) )
        # random number generator implemented in C
        exts.append( Extension("_random", ["_randommodule.c"]) )
+        # fast iterator tools implemented in C
+        exts.append( Extension("itertools", ["itertoolsmodule.c"]) )
        # operator.add() and similar goodies
        exts.append( Extension('operator', ['operator.c']) )
        # Python C API test module