Add merge() function to heapq.

This commit is contained in:
Raymond Hettinger 2007-02-19 04:08:43 +00:00
parent d6fc72a5ae
commit 00166c5532
4 changed files with 64 additions and 5 deletions

View File

@ -88,7 +88,18 @@ True
>>>
\end{verbatim}
The module also offers two general purpose functions based on heaps.
The module also offers three general purpose functions based on heaps.
\begin{funcdesc}{merge}{*iterables}
Merge multiple sorted inputs into a single sorted output (for example, merge
timestamped entries from multiple log files). Returns an iterator over
over the sorted values.
Similar to \code{sorted(itertools.chain(*iterables))} but returns an iterable,
does not pull the data into memory all at once, and reduces the number of
comparisons by assuming that each of the input streams is already sorted.
\versionadded{2.6}
\end{funcdesc}
\begin{funcdesc}{nlargest}{n, iterable\optional{, key}}
Return a list with the \var{n} largest elements from the dataset defined
@ -110,7 +121,7 @@ Equivalent to: \samp{sorted(iterable, key=key)[:n]}
\versionchanged[Added the optional \var{key} argument]{2.5}
\end{funcdesc}
Both functions perform best for smaller values of \var{n}. For larger
The latter two functions perform best for smaller values of \var{n}. For larger
values, it is more efficient to use the \function{sorted()} function. Also,
when \code{n==1}, it is more efficient to use the builtin \function{min()}
and \function{max()} functions.

View File

@ -126,8 +126,8 @@ Believe me, real good tape sorts were quite spectacular to watch!
From all times, sorting has always been a Great Art! :-)
"""
__all__ = ['heappush', 'heappop', 'heapify', 'heapreplace', 'nlargest',
'nsmallest']
__all__ = ['heappush', 'heappop', 'heapify', 'heapreplace', 'merge',
'nlargest', 'nsmallest']
from itertools import islice, repeat, count, imap, izip, tee
from operator import itemgetter, neg
@ -308,6 +308,41 @@ try:
except ImportError:
pass
def merge(*iterables):
'''Merge multiple sorted inputs into a single sorted output.
Similar to sorted(itertools.chain(*iterables)) but returns an iterable,
does not pull the data into memory all at once, and reduces the number
of comparisons by assuming that each of the input streams is already sorted.
>>> list(merge([1,3,5,7], [0,2,4,8], [5,10,15,20], [], [25]))
[0, 1, 2, 3, 4, 5, 5, 7, 8, 10, 15, 20, 25]
'''
_heappop, siftup, _StopIteration = heappop, _siftup, StopIteration
h = []
h_append = h.append
for it in map(iter, iterables):
try:
next = it.next
h_append([next(), next])
except _StopIteration:
pass
heapify(h)
while 1:
try:
while 1:
v, next = s = h[0] # raises IndexError when h is empty
yield v
s[0] = next() # raises StopIteration when exhausted
siftup(h, 0) # restore heap condition
except _StopIteration:
_heappop(h) # remove empty iterator
except IndexError:
return
# Extend the implementations of nsmallest and nlargest to use a key= argument
_nsmallest = nsmallest
def nsmallest(n, iterable, key=None):
@ -341,3 +376,6 @@ if __name__ == "__main__":
while heap:
sort.append(heappop(heap))
print sort
import doctest
doctest.testmod()

View File

@ -1,6 +1,6 @@
"""Unittests for heapq."""
from heapq import heappush, heappop, heapify, heapreplace, nlargest, nsmallest
from heapq import heappush, heappop, heapify, heapreplace, merge, nlargest, nsmallest
import random
import unittest
from test import test_support
@ -103,6 +103,14 @@ class TestHeap(unittest.TestCase):
heap_sorted = [heappop(heap) for i in range(size)]
self.assertEqual(heap_sorted, sorted(data))
def test_merge(self):
inputs = []
for i in xrange(random.randrange(5)):
row = sorted(random.randrange(1000) for j in range(random.randrange(10)))
inputs.append(row)
self.assertEqual(sorted(chain(*inputs)), list(merge(*inputs)))
self.assertEqual(list(merge()), [])
def test_nsmallest(self):
data = [(random.randrange(2000), i) for i in range(1000)]
for f in (None, lambda x: x[0] * 547 % 2000):

View File

@ -128,6 +128,8 @@ Core and builtins
Library
-------
- Added heapq.merge() for merging sorted input streams.
- Have the encoding package's search function dynamically import using absolute
import semantics.