Promote compress() from a recipe to being a regular itertool.

This commit is contained in:
Raymond Hettinger 2009-01-25 21:04:14 +00:00
parent 1af3b5a9a4
commit 2bcb8e9b0d
5 changed files with 206 additions and 13 deletions

View File

@ -298,7 +298,7 @@ counts less than one::
Section 4.6.3, Exercise 19*\.
* To enumerate all distinct multisets of a given size over a given set of
elements, see the :func:`combinations_with_replacement` function in the
elements, see :func:`combinations_with_replacement` in the
:ref:`itertools-recipes` for itertools::
map(Counter, combinations_with_replacement('ABC', 2)) --> AA AB AC BB BC CC

View File

@ -139,6 +139,20 @@ loops that truncate the stream.
.. versionadded:: 2.6
.. function:: compress(data, selectors)
Make an iterator that filters elements from *data* returning only those that
have a corresponding element in *selectors* that evaluates to ``True``.
Stops when either the *data* or *selectors* iterables have been exhausted.
Equivalent to::
def compress(data, selectors):
# compress('ABCDEF', [1,0,1,0,1,1]) --> A C E F
return (d for d, s in izip(data, selectors) if s)
.. versionadded:: 2.7
.. function:: count([n])
Make an iterator that returns consecutive integers starting with *n*. If not
@ -679,10 +693,6 @@ which incur interpreter overhead.
for n in xrange(2**len(pairs)):
yield set(x for m, x in pairs if m&n)
def compress(data, selectors):
"compress('ABCDEF', [1,0,1,0,1,1]) --> A C E F"
return (d for d, s in izip(data, selectors) if s)
def combinations_with_replacement(iterable, r):
"combinations_with_replacement('ABC', 2) --> AA AB AC BB BC CC"
# number items returned: (n+r-1)! / r! / (n-1)!

View File

@ -191,6 +191,21 @@ class TestBasicOps(unittest.TestCase):
self.assertEqual(len(set(map(id, permutations('abcde', 3)))), 1)
self.assertNotEqual(len(set(map(id, list(permutations('abcde', 3))))), 1)
def test_compress(self):
self.assertEqual(list(compress('ABCDEF', [1,0,1,0,1,1])), list('ACEF'))
self.assertEqual(list(compress('ABCDEF', [0,0,0,0,0,0])), list(''))
self.assertEqual(list(compress('ABCDEF', [1,1,1,1,1,1])), list('ABCDEF'))
self.assertEqual(list(compress('ABCDEF', [1,0,1])), list('AC'))
self.assertEqual(list(compress('ABC', [0,1,1,1,1,1])), list('BC'))
n = 10000
data = chain.from_iterable(repeat(range(6), n))
selectors = chain.from_iterable(repeat((0, 1)))
self.assertEqual(list(compress(data, selectors)), [1,3,5] * n)
self.assertRaises(TypeError, compress, None, range(6)) # 1st arg not iterable
self.assertRaises(TypeError, compress, range(6), None) # 2nd arg not iterable
self.assertRaises(TypeError, compress, range(6)) # too few args
self.assertRaises(TypeError, compress, range(6), None) # too many args
def test_count(self):
self.assertEqual(zip('abc',count()), [('a', 0), ('b', 1), ('c', 2)])
self.assertEqual(zip('abc',count(3)), [('a', 3), ('b', 4), ('c', 5)])
@ -701,6 +716,9 @@ class TestExamples(unittest.TestCase):
self.assertEqual(list(combinations(range(4), 3)),
[(0,1,2), (0,1,3), (0,2,3), (1,2,3)])
def test_compress(self):
self.assertEqual(list(compress('ABCDEF', [1,0,1,0,1,1])), list('ACEF'))
def test_count(self):
self.assertEqual(list(islice(count(10), 5)), [10, 11, 12, 13, 14])
@ -781,6 +799,10 @@ class TestGC(unittest.TestCase):
a = []
self.makecycle(combinations([1,2,a,3], 3), a)
def test_compress(self):
a = []
self.makecycle(compress('ABCDEF', [1,0,1,0,1,0]), a)
def test_cycle(self):
a = []
self.makecycle(cycle([a]*2), a)
@ -934,6 +956,15 @@ class TestVariousIteratorArgs(unittest.TestCase):
self.assertRaises(TypeError, list, chain(N(s)))
self.assertRaises(ZeroDivisionError, list, chain(E(s)))
def test_compress(self):
for s in ("123", "", range(1000), ('do', 1.2), xrange(2000,2200,5)):
n = len(s)
for g in (G, I, Ig, S, L, R):
self.assertEqual(list(compress(g(s), repeat(1))), list(g(s)))
self.assertRaises(TypeError, compress, X(s), repeat(1))
self.assertRaises(TypeError, list, compress(N(s), repeat(1)))
self.assertRaises(ZeroDivisionError, list, compress(E(s), repeat(1)))
def test_product(self):
for s in ("123", "", range(1000), ('do', 1.2), xrange(2000,2200,5)):
self.assertRaises(TypeError, product, X(s))
@ -1125,7 +1156,7 @@ class SubclassWithKwargsTest(unittest.TestCase):
def test_keywords_in_subclass(self):
# count is not subclassable...
for cls in (repeat, izip, ifilter, ifilterfalse, chain, imap,
starmap, islice, takewhile, dropwhile, cycle):
starmap, islice, takewhile, dropwhile, cycle, compress):
class Subclass(cls):
def __init__(self, newarg=None, *args):
cls.__init__(self, *args)
@ -1262,10 +1293,6 @@ Samuele
... for n in xrange(2**len(pairs)):
... yield set(x for m, x in pairs if m&n)
>>> def compress(data, selectors):
... "compress('ABCDEF', [1,0,1,0,1,1]) --> A C E F"
... return (d for d, s in izip(data, selectors) if s)
>>> def combinations_with_replacement(iterable, r):
... "combinations_with_replacement('ABC', 3) --> AA AB AC BB BC CC"
... pool = tuple(iterable)
@ -1361,9 +1388,6 @@ perform as purported.
>>> map(sorted, powerset('ab'))
[[], ['a'], ['b'], ['a', 'b']]
>>> list(compress('abcdef', [1,0,1,0,1,1]))
['a', 'c', 'e', 'f']
>>> list(combinations_with_replacement('abc', 2))
[('a', 'a'), ('a', 'b'), ('a', 'c'), ('b', 'b'), ('b', 'c'), ('c', 'c')]

View File

@ -147,6 +147,8 @@ Library
- Issue #4863: distutils.mwerkscompiler has been removed.
- Added a new function: itertools.compress().
- Fix and properly document the multiprocessing module's logging
support, expose the internal levels and provide proper usage
examples.

View File

@ -2506,6 +2506,162 @@ static PyTypeObject permutations_type = {
};
/* compress object ************************************************************/
/* Equivalent to:
def compress(data, selectors):
"compress('ABCDEF', [1,0,1,0,1,1]) --> A C E F"
return (d for d, s in izip(data, selectors) if s)
*/
typedef struct {
PyObject_HEAD
PyObject *data;
PyObject *selectors;
} compressobject;
static PyTypeObject compress_type;
static PyObject *
compress_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
PyObject *seq1, *seq2;
PyObject *data=NULL, *selectors=NULL;
compressobject *lz;
if (type == &compress_type && !_PyArg_NoKeywords("compress()", kwds))
return NULL;
if (!PyArg_UnpackTuple(args, "compress", 2, 2, &seq1, &seq2))
return NULL;
data = PyObject_GetIter(seq1);
if (data == NULL)
goto fail;
selectors = PyObject_GetIter(seq2);
if (selectors == NULL)
goto fail;
/* create compressobject structure */
lz = (compressobject *)type->tp_alloc(type, 0);
if (lz == NULL)
goto fail;
lz->data = data;
lz->selectors = selectors;
return (PyObject *)lz;
fail:
Py_XDECREF(data);
Py_XDECREF(selectors);
return NULL;
}
static void
compress_dealloc(compressobject *lz)
{
PyObject_GC_UnTrack(lz);
Py_XDECREF(lz->data);
Py_XDECREF(lz->selectors);
Py_TYPE(lz)->tp_free(lz);
}
static int
compress_traverse(compressobject *lz, visitproc visit, void *arg)
{
Py_VISIT(lz->data);
Py_VISIT(lz->selectors);
return 0;
}
static PyObject *
compress_next(compressobject *lz)
{
PyObject *data = lz->data, *selectors = lz->selectors;
PyObject *datum, *selector;
PyObject *(*datanext)(PyObject *) = *Py_TYPE(data)->tp_iternext;
PyObject *(*selectornext)(PyObject *) = *Py_TYPE(selectors)->tp_iternext;
int ok;
while (1) {
/* Steps: get datum, get selector, evaluate selector.
Order is important (to match the pure python version
in terms of which input gets a chance to raise an
exception first).
*/
datum = datanext(data);
if (datum == NULL)
return NULL;
selector = selectornext(selectors);
if (selector == NULL) {
Py_DECREF(datum);
return NULL;
}
ok = PyObject_IsTrue(selector);
Py_DECREF(selector);
if (ok == 1)
return datum;
Py_DECREF(datum);
if (ok == -1)
return NULL;
}
}
PyDoc_STRVAR(compress_doc,
"compress(data sequence, selector sequence) --> iterator over selected data\n\
\n\
Return data elements corresponding to true selector elements.\n\
Forms a shorter iterator from selected data elements using the\n\
selectors to choose the data elements.");
static PyTypeObject compress_type = {
PyVarObject_HEAD_INIT(NULL, 0)
"itertools.compress", /* tp_name */
sizeof(compressobject), /* tp_basicsize */
0, /* tp_itemsize */
/* methods */
(destructor)compress_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_compare */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
PyObject_GenericGetAttr, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
Py_TPFLAGS_BASETYPE, /* tp_flags */
compress_doc, /* tp_doc */
(traverseproc)compress_traverse, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
PyObject_SelfIter, /* tp_iter */
(iternextfunc)compress_next, /* tp_iternext */
0, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
0, /* tp_init */
0, /* tp_alloc */
compress_new, /* tp_new */
PyObject_GC_Del, /* tp_free */
};
/* ifilter object ************************************************************/
typedef struct {
@ -3552,6 +3708,7 @@ inititertools(void)
&starmap_type,
&imap_type,
&chain_type,
&compress_type,
&ifilter_type,
&ifilterfalse_type,
&count_type,