Add merge() function to heapq.
This commit is contained in:
parent
d6fc72a5ae
commit
00166c5532
|
@ -88,7 +88,18 @@ True
|
|||
>>>
|
||||
\end{verbatim}
|
||||
|
||||
The module also offers two general purpose functions based on heaps.
|
||||
The module also offers three general purpose functions based on heaps.
|
||||
|
||||
\begin{funcdesc}{merge}{*iterables}
|
||||
Merge multiple sorted inputs into a single sorted output (for example, merge
|
||||
timestamped entries from multiple log files). Returns an iterator over
|
||||
over the sorted values.
|
||||
|
||||
Similar to \code{sorted(itertools.chain(*iterables))} but returns an iterable,
|
||||
does not pull the data into memory all at once, and reduces the number of
|
||||
comparisons by assuming that each of the input streams is already sorted.
|
||||
\versionadded{2.6}
|
||||
\end{funcdesc}
|
||||
|
||||
\begin{funcdesc}{nlargest}{n, iterable\optional{, key}}
|
||||
Return a list with the \var{n} largest elements from the dataset defined
|
||||
|
@ -110,7 +121,7 @@ Equivalent to: \samp{sorted(iterable, key=key)[:n]}
|
|||
\versionchanged[Added the optional \var{key} argument]{2.5}
|
||||
\end{funcdesc}
|
||||
|
||||
Both functions perform best for smaller values of \var{n}. For larger
|
||||
The latter two functions perform best for smaller values of \var{n}. For larger
|
||||
values, it is more efficient to use the \function{sorted()} function. Also,
|
||||
when \code{n==1}, it is more efficient to use the builtin \function{min()}
|
||||
and \function{max()} functions.
|
||||
|
|
42
Lib/heapq.py
42
Lib/heapq.py
|
@ -126,8 +126,8 @@ Believe me, real good tape sorts were quite spectacular to watch!
|
|||
From all times, sorting has always been a Great Art! :-)
|
||||
"""
|
||||
|
||||
__all__ = ['heappush', 'heappop', 'heapify', 'heapreplace', 'nlargest',
|
||||
'nsmallest']
|
||||
__all__ = ['heappush', 'heappop', 'heapify', 'heapreplace', 'merge',
|
||||
'nlargest', 'nsmallest']
|
||||
|
||||
from itertools import islice, repeat, count, imap, izip, tee
|
||||
from operator import itemgetter, neg
|
||||
|
@ -308,6 +308,41 @@ try:
|
|||
except ImportError:
|
||||
pass
|
||||
|
||||
def merge(*iterables):
|
||||
'''Merge multiple sorted inputs into a single sorted output.
|
||||
|
||||
Similar to sorted(itertools.chain(*iterables)) but returns an iterable,
|
||||
does not pull the data into memory all at once, and reduces the number
|
||||
of comparisons by assuming that each of the input streams is already sorted.
|
||||
|
||||
>>> list(merge([1,3,5,7], [0,2,4,8], [5,10,15,20], [], [25]))
|
||||
[0, 1, 2, 3, 4, 5, 5, 7, 8, 10, 15, 20, 25]
|
||||
|
||||
'''
|
||||
_heappop, siftup, _StopIteration = heappop, _siftup, StopIteration
|
||||
|
||||
h = []
|
||||
h_append = h.append
|
||||
for it in map(iter, iterables):
|
||||
try:
|
||||
next = it.next
|
||||
h_append([next(), next])
|
||||
except _StopIteration:
|
||||
pass
|
||||
heapify(h)
|
||||
|
||||
while 1:
|
||||
try:
|
||||
while 1:
|
||||
v, next = s = h[0] # raises IndexError when h is empty
|
||||
yield v
|
||||
s[0] = next() # raises StopIteration when exhausted
|
||||
siftup(h, 0) # restore heap condition
|
||||
except _StopIteration:
|
||||
_heappop(h) # remove empty iterator
|
||||
except IndexError:
|
||||
return
|
||||
|
||||
# Extend the implementations of nsmallest and nlargest to use a key= argument
|
||||
_nsmallest = nsmallest
|
||||
def nsmallest(n, iterable, key=None):
|
||||
|
@ -341,3 +376,6 @@ if __name__ == "__main__":
|
|||
while heap:
|
||||
sort.append(heappop(heap))
|
||||
print sort
|
||||
|
||||
import doctest
|
||||
doctest.testmod()
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
"""Unittests for heapq."""
|
||||
|
||||
from heapq import heappush, heappop, heapify, heapreplace, nlargest, nsmallest
|
||||
from heapq import heappush, heappop, heapify, heapreplace, merge, nlargest, nsmallest
|
||||
import random
|
||||
import unittest
|
||||
from test import test_support
|
||||
|
@ -103,6 +103,14 @@ class TestHeap(unittest.TestCase):
|
|||
heap_sorted = [heappop(heap) for i in range(size)]
|
||||
self.assertEqual(heap_sorted, sorted(data))
|
||||
|
||||
def test_merge(self):
|
||||
inputs = []
|
||||
for i in xrange(random.randrange(5)):
|
||||
row = sorted(random.randrange(1000) for j in range(random.randrange(10)))
|
||||
inputs.append(row)
|
||||
self.assertEqual(sorted(chain(*inputs)), list(merge(*inputs)))
|
||||
self.assertEqual(list(merge()), [])
|
||||
|
||||
def test_nsmallest(self):
|
||||
data = [(random.randrange(2000), i) for i in range(1000)]
|
||||
for f in (None, lambda x: x[0] * 547 % 2000):
|
||||
|
|
Loading…
Reference in New Issue