Hmm! I thought I checked this in before! Oh well.

Added new heapify() function, which transforms an arbitrary list into a
heap in linear time; that's a fundamental tool for using heaps in real
life <wink>.

Added heapyify() test.  Added a "less naive" N-best algorithm to the test
suite, and noted that this could actually go much faster (building on
heapify()) if we had max-heaps instead of min-heaps (the iterative method
is appropriate when all the data isn't known in advance, but when it is
known in advance the tradeoffs get murkier).
This commit is contained in:
Tim Peters 2002-08-02 21:48:06 +00:00
parent 940dc922c0
commit 28c25527c2
2 changed files with 48 additions and 10 deletions

View File

@ -13,6 +13,7 @@ heap = [] # creates an empty heap
heappush(heap, item) # pushes a new item on the heap
item = heappop(heap) # pops the smallest item from the heap
item = heap[0] # smallest item on the heap without popping it
heapify(heap) # transform list into a heap, in-place, in linear time
Our API differs from textbook heap algorithms as follows:
@ -136,15 +137,13 @@ def heappush(heap, item):
pos = parentpos
heap[pos] = item
def heappop(heap):
"""Pop the smallest item off the heap, maintaining the heap invariant."""
endpos = len(heap) - 1
if endpos <= 0:
return heap.pop()
returnitem = heap[0]
item = heap.pop()
pos = 0
# Sift item into position, down from the root, moving the smaller
# The child indices of heap index pos are already heaps, and we want to make
# a heap at index pos too.
def _siftdown(heap, pos):
endpos = len(heap)
assert pos < endpos
item = heap[pos]
# Sift item into position, down from pos, moving the smaller
# child up, until finding pos such that item <= pos's children.
childpos = 2*pos + 1 # leftmost child position
while childpos < endpos:
@ -164,8 +163,29 @@ def heappop(heap):
pos = childpos
childpos = 2*pos + 1
heap[pos] = item
def heappop(heap):
"""Pop the smallest item off the heap, maintaining the heap invariant."""
lastelt = heap.pop() # raises appropriate IndexError if heap is empty
if heap:
returnitem = heap[0]
heap[0] = lastelt
_siftdown(heap, 0)
else:
returnitem = lastelt
return returnitem
def heapify(heap):
"""Transform list heap into a heap, in-place, in O(len(heap)) time."""
n = len(heap)
# Transform bottom-up. The largest index there's any point to looking at
# is the largest with a child index in-range, so must have 2*i + 1 < n,
# or i < (n-1)/2. If n is even = 2*j, this is (2*j-1)/2 = j-1/2 so
# j-1 is the largest, which is n//2 - 1. If n is odd = 2*j+1, this is
# (2*j+1-1)/2 = j so j-1 is the largest, and that's again n//2-1.
for i in xrange(n//2 - 1, -1, -1):
_siftdown(heap, i)
if __name__ == "__main__":
# Simple sanity test
heap = []

View File

@ -2,7 +2,7 @@
from test.test_support import verify, vereq, verbose, TestFailed
from heapq import heappush, heappop
from heapq import heappush, heappop, heapify
import random
def check_invariant(heap):
@ -40,6 +40,24 @@ def test_main():
heappop(heap)
heap.sort()
vereq(heap, data_sorted[-10:])
# 4) Test heapify.
for size in range(30):
heap = [random.random() for dummy in range(size)]
heapify(heap)
check_invariant(heap)
# 5) Less-naive "N-best" algorithm, much faster (if len(data) is big
# enough <wink>) than sorting all of data. However, if we had a max
# heap instead of a min heap, it would go much faster still via
# heapify'ing all of data (linear time), then doing 10 heappops
# (10 log-time steps).
heap = data[:10]
heapify(heap)
for item in data[10:]:
if item > heap[0]: # this gets rarer and rarer the longer we run
heappush(heap, item)
heappop(heap)
heap.sort()
vereq(heap, data_sorted[-10:])
# Make user happy
if verbose:
print "All OK"