Issue #16098: Update heapq.nsmallest to use the same algorithm as nlargest.
This removes the dependency on bisect and it bring the pure Python code in-sync with the C code.
This commit is contained in:
parent
31584e30ab
commit
f6b26676bc
84
Lib/heapq.py
84
Lib/heapq.py
|
@ -127,8 +127,7 @@ From all times, sorting has always been a Great Art! :-)
|
||||||
__all__ = ['heappush', 'heappop', 'heapify', 'heapreplace', 'merge',
|
__all__ = ['heappush', 'heappop', 'heapify', 'heapreplace', 'merge',
|
||||||
'nlargest', 'nsmallest', 'heappushpop']
|
'nlargest', 'nsmallest', 'heappushpop']
|
||||||
|
|
||||||
from itertools import islice, repeat, count, tee, chain
|
from itertools import islice, count, tee, chain
|
||||||
import bisect
|
|
||||||
|
|
||||||
def heappush(heap, item):
|
def heappush(heap, item):
|
||||||
"""Push item onto heap, maintaining the heap invariant."""
|
"""Push item onto heap, maintaining the heap invariant."""
|
||||||
|
@ -180,6 +179,19 @@ def heapify(x):
|
||||||
for i in reversed(range(n//2)):
|
for i in reversed(range(n//2)):
|
||||||
_siftup(x, i)
|
_siftup(x, i)
|
||||||
|
|
||||||
|
def _heappushpop_max(heap, item):
|
||||||
|
"""Maxheap version of a heappush followed by a heappop."""
|
||||||
|
if heap and item < heap[0]:
|
||||||
|
item, heap[0] = heap[0], item
|
||||||
|
_siftup_max(heap, 0)
|
||||||
|
return item
|
||||||
|
|
||||||
|
def _heapify_max(x):
|
||||||
|
"""Transform list into a maxheap, in-place, in O(len(x)) time."""
|
||||||
|
n = len(x)
|
||||||
|
for i in reversed(range(n//2)):
|
||||||
|
_siftup_max(x, i)
|
||||||
|
|
||||||
def nlargest(n, iterable):
|
def nlargest(n, iterable):
|
||||||
"""Find the n largest elements in a dataset.
|
"""Find the n largest elements in a dataset.
|
||||||
|
|
||||||
|
@ -205,30 +217,16 @@ def nsmallest(n, iterable):
|
||||||
"""
|
"""
|
||||||
if n < 0:
|
if n < 0:
|
||||||
return []
|
return []
|
||||||
if hasattr(iterable, '__len__') and n * 10 <= len(iterable):
|
it = iter(iterable)
|
||||||
# For smaller values of n, the bisect method is faster than a minheap.
|
result = list(islice(it, n))
|
||||||
# It is also memory efficient, consuming only n elements of space.
|
if not result:
|
||||||
it = iter(iterable)
|
|
||||||
result = sorted(islice(it, 0, n))
|
|
||||||
if not result:
|
|
||||||
return result
|
|
||||||
insort = bisect.insort
|
|
||||||
pop = result.pop
|
|
||||||
los = result[-1] # los --> Largest of the nsmallest
|
|
||||||
for elem in it:
|
|
||||||
if elem < los:
|
|
||||||
insort(result, elem)
|
|
||||||
pop()
|
|
||||||
los = result[-1]
|
|
||||||
return result
|
return result
|
||||||
# An alternative approach manifests the whole iterable in memory but
|
_heapify_max(result)
|
||||||
# saves comparisons by heapifying all at once. Also, saves time
|
_heappushpop = _heappushpop_max
|
||||||
# over bisect.insort() which has O(n) data movement time for every
|
for elem in it:
|
||||||
# insertion. Finding the n smallest of an m length iterable requires
|
_heappushpop(result, elem)
|
||||||
# O(m) + O(n log m) comparisons.
|
result.sort()
|
||||||
h = list(iterable)
|
return result
|
||||||
heapify(h)
|
|
||||||
return list(map(heappop, repeat(h, min(n, len(h)))))
|
|
||||||
|
|
||||||
# 'heap' is a heap at all indices >= startpos, except possibly for pos. pos
|
# 'heap' is a heap at all indices >= startpos, except possibly for pos. pos
|
||||||
# is the index of a leaf with a possibly out-of-order value. Restore the
|
# is the index of a leaf with a possibly out-of-order value. Restore the
|
||||||
|
@ -306,6 +304,42 @@ def _siftup(heap, pos):
|
||||||
heap[pos] = newitem
|
heap[pos] = newitem
|
||||||
_siftdown(heap, startpos, pos)
|
_siftdown(heap, startpos, pos)
|
||||||
|
|
||||||
|
def _siftdown_max(heap, startpos, pos):
|
||||||
|
'Maxheap variant of _siftdown'
|
||||||
|
newitem = heap[pos]
|
||||||
|
# Follow the path to the root, moving parents down until finding a place
|
||||||
|
# newitem fits.
|
||||||
|
while pos > startpos:
|
||||||
|
parentpos = (pos - 1) >> 1
|
||||||
|
parent = heap[parentpos]
|
||||||
|
if parent < newitem:
|
||||||
|
heap[pos] = parent
|
||||||
|
pos = parentpos
|
||||||
|
continue
|
||||||
|
break
|
||||||
|
heap[pos] = newitem
|
||||||
|
|
||||||
|
def _siftup_max(heap, pos):
|
||||||
|
'Minheap variant of _siftup'
|
||||||
|
endpos = len(heap)
|
||||||
|
startpos = pos
|
||||||
|
newitem = heap[pos]
|
||||||
|
# Bubble up the larger child until hitting a leaf.
|
||||||
|
childpos = 2*pos + 1 # leftmost child position
|
||||||
|
while childpos < endpos:
|
||||||
|
# Set childpos to index of larger child.
|
||||||
|
rightpos = childpos + 1
|
||||||
|
if rightpos < endpos and not heap[rightpos] < heap[childpos]:
|
||||||
|
childpos = rightpos
|
||||||
|
# Move the larger child up.
|
||||||
|
heap[pos] = heap[childpos]
|
||||||
|
pos = childpos
|
||||||
|
childpos = 2*pos + 1
|
||||||
|
# The leaf at pos is empty now. Put newitem there, and bubble it up
|
||||||
|
# to its final resting place (by sifting its parents down).
|
||||||
|
heap[pos] = newitem
|
||||||
|
_siftdown_max(heap, startpos, pos)
|
||||||
|
|
||||||
# If available, use C implementation
|
# If available, use C implementation
|
||||||
try:
|
try:
|
||||||
from _heapq import *
|
from _heapq import *
|
||||||
|
|
Loading…
Reference in New Issue