Issue #13742: Add key and reverse parameters to heapq.merge()

This commit is contained in:
Raymond Hettinger 2014-05-30 02:28:36 -07:00
parent e7bfe13635
commit 35db43955c
5 changed files with 98 additions and 19 deletions

View File

@ -444,12 +444,13 @@ Glossary
A number of tools in Python accept key functions to control how elements
are ordered or grouped. They include :func:`min`, :func:`max`,
:func:`sorted`, :meth:`list.sort`, :func:`heapq.nsmallest`,
:func:`heapq.nlargest`, and :func:`itertools.groupby`.
:func:`sorted`, :meth:`list.sort`, :func:`heapq.merge`,
:func:`heapq.nsmallest`, :func:`heapq.nlargest`, and
:func:`itertools.groupby`.
There are several ways to create a key function. For example. the
:meth:`str.lower` method can serve as a key function for case insensitive
sorts. Alternatively, an ad-hoc key function can be built from a
sorts. Alternatively, a key function can be built from a
:keyword:`lambda` expression such as ``lambda r: (r[0], r[2])``. Also,
the :mod:`operator` module provides three key function constructors:
:func:`~operator.attrgetter`, :func:`~operator.itemgetter`, and

View File

@ -81,7 +81,7 @@ The following functions are provided:
The module also offers three general purpose functions based on heaps.
.. function:: merge(*iterables)
.. function:: merge(*iterables, key=None, reverse=False)
Merge multiple sorted inputs into a single sorted output (for example, merge
timestamped entries from multiple log files). Returns an :term:`iterator`
@ -91,6 +91,18 @@ The module also offers three general purpose functions based on heaps.
not pull the data into memory all at once, and assumes that each of the input
streams is already sorted (smallest to largest).
Has two optional arguments which must be specified as keyword arguments.
*key* specifies a :term:`key function` of one argument that is used to
extract a comparison key from each input element. The default value is
``None`` (compare the elements directly).
*reverse* is a boolean value. If set to ``True``, then the input elements
are merged as if each comparison were reversed.
.. versionchanged:: 3.5
Added the optional *key* and *reverse* parameters.
.. function:: nlargest(n, iterable, key=None)

View File

@ -176,6 +176,16 @@ def heapify(x):
for i in reversed(range(n//2)):
_siftup(x, i)
def _heappop_max(heap):
"""Maxheap version of a heappop."""
lastelt = heap.pop() # raises appropriate IndexError if heap is empty
if heap:
returnitem = heap[0]
heap[0] = lastelt
_siftup_max(heap, 0)
return returnitem
return lastelt
def _heapreplace_max(heap, item):
"""Maxheap version of a heappop followed by a heappush."""
returnitem = heap[0] # raises appropriate IndexError if heap is empty
@ -311,7 +321,7 @@ try:
except ImportError:
pass
def merge(*iterables):
def merge(*iterables, key=None, reverse=False):
'''Merge multiple sorted inputs into a single sorted output.
Similar to sorted(itertools.chain(*iterables)) but returns a generator,
@ -321,31 +331,73 @@ def merge(*iterables):
>>> list(merge([1,3,5,7], [0,2,4,8], [5,10,15,20], [], [25]))
[0, 1, 2, 3, 4, 5, 5, 7, 8, 10, 15, 20, 25]
If *key* is not None, applies a key function to each element to determine
its sort order.
>>> list(merge(['dog', 'horse'], ['cat', 'fish', 'kangaroo'], key=len))
['dog', 'cat', 'fish', 'horse', 'kangaroo']
'''
h = []
h_append = h.append
if reverse:
_heapify = _heapify_max
_heappop = _heappop_max
_heapreplace = _heapreplace_max
direction = -1
else:
_heapify = heapify
_heappop = heappop
_heapreplace = heapreplace
direction = 1
if key is None:
for order, it in enumerate(map(iter, iterables)):
try:
next = it.__next__
h_append([next(), order * direction, next])
except StopIteration:
pass
_heapify(h)
while len(h) > 1:
try:
while True:
value, order, next = s = h[0]
yield value
s[0] = next() # raises StopIteration when exhausted
_heapreplace(h, s) # restore heap condition
except StopIteration:
_heappop(h) # remove empty iterator
if h:
# fast case when only a single iterator remains
value, order, next = h[0]
yield value
yield from next.__self__
return
for order, it in enumerate(map(iter, iterables)):
try:
next = it.__next__
h_append([next(), order, next])
value = next()
h_append([key(value), order * direction, value, next])
except StopIteration:
pass
heapify(h)
_heapreplace = heapreplace
_heapify(h)
while len(h) > 1:
try:
while True:
value, order, next = s = h[0]
key_value, order, value, next = s = h[0]
yield value
s[0] = next() # raises StopIteration when exhausted
_heapreplace(h, s) # restore heap condition
value = next()
s[0] = key(value)
s[2] = value
_heapreplace(h, s)
except StopIteration:
heappop(h) # remove empty iterator
_heappop(h)
if h:
# fast case when only a single iterator remains
value, order, next = h[0]
key_value, order, value, next = h[0]
yield value
yield from next.__self__

View File

@ -6,6 +6,7 @@ import unittest
from test import support
from unittest import TestCase, skipUnless
from operator import itemgetter
py_heapq = support.import_fresh_module('heapq', blocked=['_heapq'])
c_heapq = support.import_fresh_module('heapq', fresh=['_heapq'])
@ -152,11 +153,21 @@ class TestHeap:
def test_merge(self):
inputs = []
for i in range(random.randrange(5)):
row = sorted(random.randrange(1000) for j in range(random.randrange(10)))
for i in range(random.randrange(25)):
row = []
for j in range(random.randrange(100)):
tup = random.choice('ABC'), random.randrange(-500, 500)
row.append(tup)
inputs.append(row)
self.assertEqual(sorted(chain(*inputs)), list(self.module.merge(*inputs)))
self.assertEqual(list(self.module.merge()), [])
for key in [None, itemgetter(0), itemgetter(1), itemgetter(1, 0)]:
for reverse in [False, True]:
seqs = []
for seq in inputs:
seqs.append(sorted(seq, key=key, reverse=reverse))
self.assertEqual(sorted(chain(*inputs), key=key, reverse=reverse),
list(self.module.merge(*seqs, key=key, reverse=reverse)))
self.assertEqual(list(self.module.merge()), [])
def test_merge_does_not_suppress_index_error(self):
# Issue 19018: Heapq.merge suppresses IndexError from user generator

View File

@ -94,6 +94,9 @@ Library
error bubble up as this "bad data" appears in many real world zip files in
the wild and is ignored by other zip tools.
- Issue #13742: Added "key" and "reverse" parameters to heapq.merge().
(First draft of patch contributed by Simon Sapin.)
- Issue #21402: tkinter.ttk now works when default root window is not set.
- Issue #3015: _tkinter.create() now creates tkapp object with wantobject=1 by