From 2e3dfaf7070900c459d5879530dbdb3680f7fb9d Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 13 Jun 2004 05:26:33 +0000 Subject: [PATCH] Install C version of heapq.nsmallest(). --- Doc/lib/libheapq.tex | 5 -- Lib/heapq.py | 2 +- Lib/test/test_heapq.py | 25 ++++-- Modules/_heapqmodule.c | 167 +++++++++++++++++++++++++++++++++++++++-- 4 files changed, 182 insertions(+), 17 deletions(-) diff --git a/Doc/lib/libheapq.tex b/Doc/lib/libheapq.tex index 4585058b321..5a140ab81c8 100644 --- a/Doc/lib/libheapq.tex +++ b/Doc/lib/libheapq.tex @@ -97,11 +97,6 @@ by \var{iterable}. Equivalent to: \code{sorted(iterable)[:n]} \versionadded{2.4} \end{funcdesc} -Though the above functions appear symmetrical, they each have different -speed and space requirements. In particular, \function{nsmallest()} -operates on a full copy of the dataset. In contrast, \function{nlargest()} -only requires storage space for \var{n} elements. - Both functions perform best for smaller values of \var{n}. For larger values, it is more efficient to use the \function{sorted()} function. Also, when \code{n==1}, it is more efficient to use the builtin \function{min()} diff --git a/Lib/heapq.py b/Lib/heapq.py index 65f41550420..09f996aff80 100644 --- a/Lib/heapq.py +++ b/Lib/heapq.py @@ -300,7 +300,7 @@ def _siftup(heap, pos): # If available, use C implementation try: - from _heapq import heappush, heappop, heapify, heapreplace + from _heapq import heappush, heappop, heapify, heapreplace, nlargest, nsmallest except ImportError: pass diff --git a/Lib/test/test_heapq.py b/Lib/test/test_heapq.py index 1cdaabe8861..b6fec9f47a8 100644 --- a/Lib/test/test_heapq.py +++ b/Lib/test/test_heapq.py @@ -4,6 +4,7 @@ from heapq import heappush, heappop, heapify, heapreplace, nlargest, nsmallest import random import unittest from test import test_support +import sys def heapiter(heap): @@ -91,16 +92,28 @@ class TestHeap(unittest.TestCase): def test_nsmallest(self): data = [random.randrange(2000) for i in range(1000)] - self.assertEqual(nsmallest(data, 400), sorted(data)[:400]) - self.assertEqual(nsmallest(data, 50), sorted(data)[:50]) + for i in (0, 1, 2, 10, 100, 400, 999, 1000, 1100): + self.assertEqual(nsmallest(data, i), sorted(data)[:i]) def test_largest(self): data = [random.randrange(2000) for i in range(1000)] - self.assertEqual(nlargest(data, 400), sorted(data, reverse=True)[:400]) + for i in (0, 1, 2, 10, 100, 400, 999, 1000, 1100): + self.assertEqual(nlargest(data, i), sorted(data, reverse=True)[:i]) -def test_main(): - test_support.run_unittest(TestHeap) +def test_main(verbose=None): + test_classes = [TestHeap] + test_support.run_unittest(*test_classes) + + # verify reference counting + if verbose and hasattr(sys, "gettotalrefcount"): + import gc + counts = [None] * 5 + for i in xrange(len(counts)): + test_support.run_unittest(*test_classes) + gc.collect() + counts[i] = sys.gettotalrefcount() + print counts if __name__ == "__main__": - test_main() + test_main(verbose=True) diff --git a/Modules/_heapqmodule.c b/Modules/_heapqmodule.c index 8fe742ff0eb..61ee4134392 100644 --- a/Modules/_heapqmodule.c +++ b/Modules/_heapqmodule.c @@ -219,7 +219,7 @@ PyDoc_STRVAR(heapify_doc, static PyObject * nlargest(PyObject *self, PyObject *args) { - PyObject *heap=NULL, *elem, *rv, *iterable, *sol, *it, *oldelem; + PyObject *heap=NULL, *elem, *iterable, *sol, *it, *oldelem; int i, n; if (!PyArg_ParseTuple(args, "Oi:nlargest", &iterable, &n)) @@ -246,10 +246,9 @@ nlargest(PyObject *self, PyObject *args) if (PyList_GET_SIZE(heap) == 0) goto sortit; - rv = heapify(self, heap); - if (rv == NULL) - goto fail; - Py_DECREF(rv); + for (i=n/2-1 ; i>=0 ; i--) + if(_siftup((PyListObject *)heap, i) == -1) + goto fail; sol = PyList_GET_ITEM(heap, 0); while (1) { @@ -290,6 +289,162 @@ PyDoc_STRVAR(nlargest_doc, \n\ Equivalent to: sorted(iterable, reverse=True)[:n]\n"); +static int +_siftdownmax(PyListObject *heap, int startpos, int pos) +{ + PyObject *newitem, *parent; + int cmp, parentpos; + + assert(PyList_Check(heap)); + if (pos >= PyList_GET_SIZE(heap)) { + PyErr_SetString(PyExc_IndexError, "index out of range"); + return -1; + } + + newitem = PyList_GET_ITEM(heap, pos); + Py_INCREF(newitem); + /* Follow the path to the root, moving parents down until finding + a place newitem fits. */ + while (pos > startpos){ + parentpos = (pos - 1) >> 1; + parent = PyList_GET_ITEM(heap, parentpos); + cmp = PyObject_RichCompareBool(newitem, parent, Py_LE); + if (cmp == -1) + return -1; + if (cmp == 1) + break; + Py_INCREF(parent); + Py_DECREF(PyList_GET_ITEM(heap, pos)); + PyList_SET_ITEM(heap, pos, parent); + pos = parentpos; + } + Py_DECREF(PyList_GET_ITEM(heap, pos)); + PyList_SET_ITEM(heap, pos, newitem); + return 0; +} + +static int +_siftupmax(PyListObject *heap, int pos) +{ + int startpos, endpos, childpos, rightpos; + int cmp; + PyObject *newitem, *tmp; + + assert(PyList_Check(heap)); + endpos = PyList_GET_SIZE(heap); + startpos = pos; + if (pos >= endpos) { + PyErr_SetString(PyExc_IndexError, "index out of range"); + return -1; + } + newitem = PyList_GET_ITEM(heap, pos); + Py_INCREF(newitem); + + /* Bubble up the smaller child until hitting a leaf. */ + childpos = 2*pos + 1; /* leftmost child position */ + while (childpos < endpos) { + /* Set childpos to index of smaller child. */ + rightpos = childpos + 1; + if (rightpos < endpos) { + cmp = PyObject_RichCompareBool( + PyList_GET_ITEM(heap, childpos), + PyList_GET_ITEM(heap, rightpos), + Py_LE); + if (cmp == -1) + return -1; + if (cmp == 1) + childpos = rightpos; + } + /* Move the smaller child up. */ + tmp = PyList_GET_ITEM(heap, childpos); + Py_INCREF(tmp); + Py_DECREF(PyList_GET_ITEM(heap, pos)); + PyList_SET_ITEM(heap, pos, tmp); + pos = childpos; + childpos = 2*pos + 1; + } + + /* The leaf at pos is empty now. Put newitem there, and and bubble + it up to its final resting place (by sifting its parents down). */ + Py_DECREF(PyList_GET_ITEM(heap, pos)); + PyList_SET_ITEM(heap, pos, newitem); + return _siftdownmax(heap, startpos, pos); +} + +static PyObject * +nsmallest(PyObject *self, PyObject *args) +{ + PyObject *heap=NULL, *elem, *iterable, *los, *it, *oldelem; + int i, n; + + if (!PyArg_ParseTuple(args, "Oi:nsmallest", &iterable, &n)) + return NULL; + + it = PyObject_GetIter(iterable); + if (it == NULL) + return NULL; + + heap = PyList_New(0); + if (it == NULL) + goto fail; + + for (i=0 ; i=0 ; i--) + if(_siftupmax((PyListObject *)heap, i) == -1) + goto fail; + + los = PyList_GET_ITEM(heap, 0); + while (1) { + elem = PyIter_Next(it); + if (elem == NULL) { + if (PyErr_Occurred()) + goto fail; + else + goto sortit; + } + if (PyObject_RichCompareBool(los, elem, Py_LE)) { + Py_DECREF(elem); + continue; + } + + oldelem = PyList_GET_ITEM(heap, 0); + PyList_SET_ITEM(heap, 0, elem); + Py_DECREF(oldelem); + if (_siftupmax((PyListObject *)heap, 0) == -1) + goto fail; + los = PyList_GET_ITEM(heap, 0); + } + +sortit: + Py_DECREF(it); + if (PyList_Sort(heap) == -1) + goto fail; + return heap; + +fail: + Py_DECREF(it); + Py_XDECREF(heap); + return NULL; +} + +PyDoc_STRVAR(nsmallest_doc, +"Find the n smallest elements in a dataset.\n\ +\n\ +Equivalent to: sorted(iterable)[:n]\n"); + static PyMethodDef heapq_methods[] = { {"heappush", (PyCFunction)heappush, METH_VARARGS, heappush_doc}, @@ -301,6 +456,8 @@ static PyMethodDef heapq_methods[] = { METH_O, heapify_doc}, {"nlargest", (PyCFunction)nlargest, METH_VARARGS, nlargest_doc}, + {"nsmallest", (PyCFunction)nsmallest, + METH_VARARGS, nsmallest_doc}, {NULL, NULL} /* sentinel */ };