mirror of https://github.com/python/cpython
Hmm! I thought I checked this in before! Oh well.
Added new heapify() function, which transforms an arbitrary list into a heap in linear time; that's a fundamental tool for using heaps in real life <wink>. Added heapyify() test. Added a "less naive" N-best algorithm to the test suite, and noted that this could actually go much faster (building on heapify()) if we had max-heaps instead of min-heaps (the iterative method is appropriate when all the data isn't known in advance, but when it is known in advance the tradeoffs get murkier).
This commit is contained in:
parent
940dc922c0
commit
28c25527c2
38
Lib/heapq.py
38
Lib/heapq.py
|
@ -13,6 +13,7 @@ heap = [] # creates an empty heap
|
|||
heappush(heap, item) # pushes a new item on the heap
|
||||
item = heappop(heap) # pops the smallest item from the heap
|
||||
item = heap[0] # smallest item on the heap without popping it
|
||||
heapify(heap) # transform list into a heap, in-place, in linear time
|
||||
|
||||
Our API differs from textbook heap algorithms as follows:
|
||||
|
||||
|
@ -136,15 +137,13 @@ def heappush(heap, item):
|
|||
pos = parentpos
|
||||
heap[pos] = item
|
||||
|
||||
def heappop(heap):
|
||||
"""Pop the smallest item off the heap, maintaining the heap invariant."""
|
||||
endpos = len(heap) - 1
|
||||
if endpos <= 0:
|
||||
return heap.pop()
|
||||
returnitem = heap[0]
|
||||
item = heap.pop()
|
||||
pos = 0
|
||||
# Sift item into position, down from the root, moving the smaller
|
||||
# The child indices of heap index pos are already heaps, and we want to make
|
||||
# a heap at index pos too.
|
||||
def _siftdown(heap, pos):
|
||||
endpos = len(heap)
|
||||
assert pos < endpos
|
||||
item = heap[pos]
|
||||
# Sift item into position, down from pos, moving the smaller
|
||||
# child up, until finding pos such that item <= pos's children.
|
||||
childpos = 2*pos + 1 # leftmost child position
|
||||
while childpos < endpos:
|
||||
|
@ -164,8 +163,29 @@ def heappop(heap):
|
|||
pos = childpos
|
||||
childpos = 2*pos + 1
|
||||
heap[pos] = item
|
||||
|
||||
def heappop(heap):
|
||||
"""Pop the smallest item off the heap, maintaining the heap invariant."""
|
||||
lastelt = heap.pop() # raises appropriate IndexError if heap is empty
|
||||
if heap:
|
||||
returnitem = heap[0]
|
||||
heap[0] = lastelt
|
||||
_siftdown(heap, 0)
|
||||
else:
|
||||
returnitem = lastelt
|
||||
return returnitem
|
||||
|
||||
def heapify(heap):
|
||||
"""Transform list heap into a heap, in-place, in O(len(heap)) time."""
|
||||
n = len(heap)
|
||||
# Transform bottom-up. The largest index there's any point to looking at
|
||||
# is the largest with a child index in-range, so must have 2*i + 1 < n,
|
||||
# or i < (n-1)/2. If n is even = 2*j, this is (2*j-1)/2 = j-1/2 so
|
||||
# j-1 is the largest, which is n//2 - 1. If n is odd = 2*j+1, this is
|
||||
# (2*j+1-1)/2 = j so j-1 is the largest, and that's again n//2-1.
|
||||
for i in xrange(n//2 - 1, -1, -1):
|
||||
_siftdown(heap, i)
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Simple sanity test
|
||||
heap = []
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
from test.test_support import verify, vereq, verbose, TestFailed
|
||||
|
||||
from heapq import heappush, heappop
|
||||
from heapq import heappush, heappop, heapify
|
||||
import random
|
||||
|
||||
def check_invariant(heap):
|
||||
|
@ -40,6 +40,24 @@ def test_main():
|
|||
heappop(heap)
|
||||
heap.sort()
|
||||
vereq(heap, data_sorted[-10:])
|
||||
# 4) Test heapify.
|
||||
for size in range(30):
|
||||
heap = [random.random() for dummy in range(size)]
|
||||
heapify(heap)
|
||||
check_invariant(heap)
|
||||
# 5) Less-naive "N-best" algorithm, much faster (if len(data) is big
|
||||
# enough <wink>) than sorting all of data. However, if we had a max
|
||||
# heap instead of a min heap, it would go much faster still via
|
||||
# heapify'ing all of data (linear time), then doing 10 heappops
|
||||
# (10 log-time steps).
|
||||
heap = data[:10]
|
||||
heapify(heap)
|
||||
for item in data[10:]:
|
||||
if item > heap[0]: # this gets rarer and rarer the longer we run
|
||||
heappush(heap, item)
|
||||
heappop(heap)
|
||||
heap.sort()
|
||||
vereq(heap, data_sorted[-10:])
|
||||
# Make user happy
|
||||
if verbose:
|
||||
print "All OK"
|
||||
|
|
Loading…
Reference in New Issue