SF patch #969791: Add nlargest() and nsmallest() to heapq.
This commit is contained in:
parent
7d019664d7
commit
33ecffb65a
|
@ -83,6 +83,30 @@ True
|
|||
>>>
|
||||
\end{verbatim}
|
||||
|
||||
The module also offers two general purpose functions based on heaps.
|
||||
|
||||
\begin{funcdesc}{nlargest}{iterable, n}
|
||||
Return a list with the \var{n} largest elements from the dataset defined
|
||||
by \var{iterable}. Equivalent to: \code{sorted(iterable, reverse=True)[:n]}
|
||||
\versionadded{2.4}
|
||||
\end{funcdesc}
|
||||
|
||||
\begin{funcdesc}{nsmallest}{iterable, n}
|
||||
Return a list with the \var{n} smallest elements from the dataset defined
|
||||
by \var{iterable}. Equivalent to: \code{sorted(iterable)[:n]}
|
||||
\versionadded{2.4}
|
||||
\end{funcdesc}
|
||||
|
||||
Though the above functions appear symmetrical, they each have different
|
||||
speed and space requirements. In particular, \function{nsmallest()}
|
||||
operates on a full copy of the dataset. In contrast, \function{nlargest()}
|
||||
only requires storage space for \var{n} elements.
|
||||
|
||||
Both functions perform best for smaller values of \var{n}. For larger
|
||||
values, it is more efficient to use the \function{sorted()} function. Also,
|
||||
when \code{n==1}, it is more efficient to use the builtin \function{min()}
|
||||
and \function{max()} functions.
|
||||
|
||||
|
||||
\subsection{Theory}
|
||||
|
||||
|
|
|
@ -449,7 +449,10 @@ improved performance: \module{Queue}, \module{mutex}, \module{shlex}
|
|||
|
||||
\item The \module{heapq} module has been converted to C. The resulting
|
||||
tenfold improvement in speed makes the module suitable for handling
|
||||
high volumes of data.
|
||||
high volumes of data. In addition, the module has two new functions
|
||||
\function{nlargest()} and \function{nsmallest()} that use heaps to
|
||||
find the largest or smallest n values in a dataset without the
|
||||
expense of a full sort.
|
||||
|
||||
\item The \module{imaplib} module now supports IMAP's THREAD command.
|
||||
(Contributed by Yves Dionne.)
|
||||
|
|
36
Lib/heapq.py
36
Lib/heapq.py
|
@ -30,7 +30,7 @@ without surprises: heap[0] is the smallest item, and heap.sort()
|
|||
maintains the heap invariant!
|
||||
"""
|
||||
|
||||
# Original code by Kevin O'Connor, augmented by Tim Peters
|
||||
# Original code by Kevin O'Connor, augmented by Tim Peters and Raymond Hettinger
|
||||
|
||||
__about__ = """Heap queues
|
||||
|
||||
|
@ -126,7 +126,10 @@ Believe me, real good tape sorts were quite spectacular to watch!
|
|||
From all times, sorting has always been a Great Art! :-)
|
||||
"""
|
||||
|
||||
__all__ = ['heappush', 'heappop', 'heapify', 'heapreplace']
|
||||
__all__ = ['heappush', 'heappop', 'heapify', 'heapreplace', 'nlargest',
|
||||
'nsmallest']
|
||||
|
||||
from itertools import islice, repeat
|
||||
|
||||
def heappush(heap, item):
|
||||
"""Push item onto heap, maintaining the heap invariant."""
|
||||
|
@ -168,6 +171,35 @@ def heapify(x):
|
|||
for i in reversed(xrange(n//2)):
|
||||
_siftup(x, i)
|
||||
|
||||
def nlargest(iterable, n):
|
||||
"""Find the n largest elements in a dataset.
|
||||
|
||||
Equivalent to: sorted(iterable, reverse=True)[:n]
|
||||
"""
|
||||
it = iter(iterable)
|
||||
result = list(islice(it, n))
|
||||
if not result:
|
||||
return result
|
||||
heapify(result)
|
||||
_heapreplace = heapreplace
|
||||
sol = result[0] # sol --> smallest of the nlargest
|
||||
for elem in it:
|
||||
if elem <= sol:
|
||||
continue
|
||||
_heapreplace(result, elem)
|
||||
sol = result[0]
|
||||
result.sort(reverse=True)
|
||||
return result
|
||||
|
||||
def nsmallest(iterable, n):
|
||||
"""Find the n smallest elements in a dataset.
|
||||
|
||||
Equivalent to: sorted(iterable)[:n]
|
||||
"""
|
||||
h = list(iterable)
|
||||
heapify(h)
|
||||
return map(heappop, repeat(h, min(n, len(h))))
|
||||
|
||||
# 'heap' is a heap at all indices >= startpos, except possibly for pos. pos
|
||||
# is the index of a leaf with a possibly out-of-order value. Restore the
|
||||
# heap invariant.
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
from test.test_support import verify, vereq, verbose, TestFailed
|
||||
|
||||
from heapq import heappush, heappop, heapify, heapreplace
|
||||
from heapq import heappush, heappop, heapify, heapreplace, nlargest, nsmallest
|
||||
import random
|
||||
|
||||
def check_invariant(heap):
|
||||
|
@ -84,6 +84,15 @@ def test_main():
|
|||
data.sort()
|
||||
sorted = [heappop(heap) for i in range(size)]
|
||||
vereq(data, sorted)
|
||||
|
||||
# 7) Check nlargest() and nsmallest()
|
||||
data = [random.randrange(2000) for i in range(1000)]
|
||||
copy = data[:]
|
||||
copy.sort(reverse=True)
|
||||
vereq(nlargest(data, 400), copy[:400])
|
||||
copy.sort()
|
||||
vereq(nsmallest(data, 400), copy[:400])
|
||||
|
||||
# Make user happy
|
||||
if verbose:
|
||||
print "All OK"
|
||||
|
|
|
@ -416,7 +416,9 @@ Library
|
|||
os.path.exists(), switched to using os.lstat() directly if possible.
|
||||
|
||||
- bisect.py and heapq.py now have underlying C implementations
|
||||
for better performance
|
||||
for better performance.
|
||||
|
||||
- heapq.py has two new functions, nsmallest() and nlargest().
|
||||
|
||||
- traceback.format_exc has been added (similar to print_exc but it returns
|
||||
a string).
|
||||
|
|
Loading…
Reference in New Issue