bpo-38200: Add itertools.pairwise() (GH-23549)

This commit is contained in:
Raymond Hettinger 2020-11-30 20:42:54 -08:00 committed by GitHub
parent 427613f005
commit cc061d0e6f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 220 additions and 26 deletions

View File

@ -55,6 +55,7 @@ Iterator Arguments Results
:func:`filterfalse` pred, seq elements of seq where pred(elem) is false ``filterfalse(lambda x: x%2, range(10)) --> 0 2 4 6 8``
:func:`groupby` iterable[, key] sub-iterators grouped by value of key(v)
:func:`islice` seq, [start,] stop [, step] elements from seq[start:stop:step] ``islice('ABCDEFG', 2, None) --> C D E F G``
:func:`pairwise` iterable (p[0], p[1]), (p[1], p[2]) ``pairwise('ABCDEFG') --> AB BC CD DE EF FG``
:func:`starmap` func, seq func(\*seq[0]), func(\*seq[1]), ... ``starmap(pow, [(2,5), (3,2), (10,3)]) --> 32 9 1000``
:func:`takewhile` pred, seq seq[0], seq[1], until pred fails ``takewhile(lambda x: x<5, [1,4,6,4,1]) --> 1 4``
:func:`tee` it, n it1, it2, ... itn splits one iterator into n
@ -475,6 +476,22 @@ loops that truncate the stream.
If *start* is ``None``, then iteration starts at zero. If *step* is ``None``,
then the step defaults to one.
.. function:: pairwise(iterable)
Return successive overlapping pairs taken from the input *iterable*.
The number of 2-tuples in the output iterator will be one fewer than the
number of inputs. It will be empty if the input iterable has fewer than
two values.
Roughly equivalent to::
def pairwise(iterable):
# pairwise('ABCDEFG') --> AB BC CD DE EF FG
a, b = tee(iterable)
next(b, None)
return zip(a, b)
.. function:: permutations(iterable, r=None)
@ -782,12 +799,6 @@ which incur interpreter overhead.
return starmap(func, repeat(args))
return starmap(func, repeat(args, times))
def pairwise(iterable):
"s -> (s0,s1), (s1,s2), (s2, s3), ..."
a, b = tee(iterable)
next(b, None)
return zip(a, b)
def grouper(iterable, n, fillvalue=None):
"Collect data into fixed-length chunks or blocks"
# grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx"

View File

@ -1024,6 +1024,25 @@ class TestBasicOps(unittest.TestCase):
self.assertEqual(next(it), (1, 2))
self.assertRaises(RuntimeError, next, it)
def test_pairwise(self):
self.assertEqual(list(pairwise('')), [])
self.assertEqual(list(pairwise('a')), [])
self.assertEqual(list(pairwise('ab')),
[('a', 'b')]),
self.assertEqual(list(pairwise('abcde')),
[('a', 'b'), ('b', 'c'), ('c', 'd'), ('d', 'e')])
self.assertEqual(list(pairwise(range(10_000))),
list(zip(range(10_000), range(1, 10_000))))
with self.assertRaises(TypeError):
pairwise() # too few arguments
with self.assertRaises(TypeError):
pairwise('abc', 10) # too many arguments
with self.assertRaises(TypeError):
pairwise(iterable='abc') # keyword arguments
with self.assertRaises(TypeError):
pairwise(None) # non-iterable argument
def test_product(self):
for args, result in [
([], [()]), # zero iterables
@ -1787,6 +1806,10 @@ class TestGC(unittest.TestCase):
a = []
self.makecycle(islice([a]*2, None), a)
def test_pairwise(self):
a = []
self.makecycle(pairwise([a]*5), a)
def test_permutations(self):
a = []
self.makecycle(permutations([1,2,a,3], 3), a)
@ -1995,6 +2018,17 @@ class TestVariousIteratorArgs(unittest.TestCase):
self.assertRaises(TypeError, islice, N(s), 10)
self.assertRaises(ZeroDivisionError, list, islice(E(s), 10))
def test_pairwise(self):
for s in ("123", "", range(1000), ('do', 1.2), range(2000,2200,5)):
for g in (G, I, Ig, S, L, R):
seq = list(g(s))
expected = list(zip(seq, seq[1:]))
actual = list(pairwise(g(s)))
self.assertEqual(actual, expected)
self.assertRaises(TypeError, pairwise, X(s))
self.assertRaises(TypeError, pairwise, N(s))
self.assertRaises(ZeroDivisionError, list, pairwise(E(s)))
def test_starmap(self):
for s in (range(10), range(0), range(100), (7,11), range(20,50,5)):
for g in (G, I, Ig, S, L, R):
@ -2312,15 +2346,6 @@ Samuele
... else:
... return starmap(func, repeat(args, times))
>>> def pairwise(iterable):
... "s -> (s0,s1), (s1,s2), (s2, s3), ..."
... a, b = tee(iterable)
... try:
... next(b)
... except StopIteration:
... pass
... return zip(a, b)
>>> def grouper(n, iterable, fillvalue=None):
... "grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx"
... args = [iter(iterable)] * n
@ -2451,15 +2476,6 @@ True
>>> take(5, map(int, repeatfunc(random.random)))
[0, 0, 0, 0, 0]
>>> list(pairwise('abcd'))
[('a', 'b'), ('b', 'c'), ('c', 'd')]
>>> list(pairwise([]))
[]
>>> list(pairwise('a'))
[]
>>> list(islice(pad_none('abc'), 0, 6))
['a', 'b', 'c', None, None, None]

View File

@ -0,0 +1 @@
Added itertools.pairwise()

View File

@ -2,6 +2,37 @@
preserve
[clinic start generated code]*/
PyDoc_STRVAR(pairwise_new__doc__,
"pairwise(iterable, /)\n"
"--\n"
"\n"
"Return an iterator of overlapping pairs taken from the input iterator.\n"
"\n"
" s -> (s0,s1), (s1,s2), (s2, s3), ...");
static PyObject *
pairwise_new_impl(PyTypeObject *type, PyObject *iterable);
static PyObject *
pairwise_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
{
PyObject *return_value = NULL;
PyObject *iterable;
if ((type == &pairwise_type) &&
!_PyArg_NoKeywords("pairwise", kwargs)) {
goto exit;
}
if (!_PyArg_CheckPositional("pairwise", PyTuple_GET_SIZE(args), 1, 1)) {
goto exit;
}
iterable = PyTuple_GET_ITEM(args, 0);
return_value = pairwise_new_impl(type, iterable);
exit:
return return_value;
}
PyDoc_STRVAR(itertools_groupby__doc__,
"groupby(iterable, key=None)\n"
"--\n"
@ -627,4 +658,4 @@ skip_optional_pos:
exit:
return return_value;
}
/*[clinic end generated code: output=d7f58dc477814b45 input=a9049054013a1b77]*/
/*[clinic end generated code: output=889c4afc3b13574f input=a9049054013a1b77]*/

View File

@ -1,4 +1,5 @@
#define PY_SSIZE_T_CLEAN
#include "Python.h"
#include "pycore_long.h" // _PyLong_GetZero()
@ -27,8 +28,9 @@ class itertools.accumulate "accumulateobject *" "&accumulate_type"
class itertools.compress "compressobject *" "&compress_type"
class itertools.filterfalse "filterfalseobject *" "&filterfalse_type"
class itertools.count "countobject *" "&count_type"
class itertools.pairwise "pairwiseobject *" "&pairwise_type"
[clinic start generated code]*/
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=ea05c93c6d94726a]*/
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=6498ed21fbe1bf94]*/
static PyTypeObject groupby_type;
static PyTypeObject _grouper_type;
@ -45,9 +47,140 @@ static PyTypeObject accumulate_type;
static PyTypeObject compress_type;
static PyTypeObject filterfalse_type;
static PyTypeObject count_type;
static PyTypeObject pairwise_type;
#include "clinic/itertoolsmodule.c.h"
/* pairwise object ***********************************************************/
typedef struct {
PyObject_HEAD
PyObject *it;
PyObject *old;
} pairwiseobject;
/*[clinic input]
@classmethod
itertools.pairwise.__new__ as pairwise_new
iterable: object
/
Return an iterator of overlapping pairs taken from the input iterator.
s -> (s0,s1), (s1,s2), (s2, s3), ...
[clinic start generated code]*/
static PyObject *
pairwise_new_impl(PyTypeObject *type, PyObject *iterable)
/*[clinic end generated code: output=9f0267062d384456 input=6e7c3cddb431a8d6]*/
{
PyObject *it;
pairwiseobject *po;
it = PyObject_GetIter(iterable);
if (it == NULL) {
return NULL;
}
po = (pairwiseobject *)type->tp_alloc(type, 0);
if (po == NULL) {
Py_DECREF(it);
return NULL;
}
po->it = it;
po->old = NULL;
return (PyObject *)po;
}
static void
pairwise_dealloc(pairwiseobject *po)
{
PyObject_GC_UnTrack(po);
Py_XDECREF(po->it);
Py_XDECREF(po->old);
Py_TYPE(po)->tp_free(po);
}
static int
pairwise_traverse(pairwiseobject *po, visitproc visit, void *arg)
{
Py_VISIT(po->it);
Py_VISIT(po->old);
return 0;
}
static PyObject *
pairwise_next(pairwiseobject *po)
{
PyObject *it = po->it;
PyObject *old = po->old;
PyObject *new, *result;
if (it == NULL) {
return NULL;
}
if (old == NULL) {
po->old = old = (*Py_TYPE(it)->tp_iternext)(it);
if (old == NULL) {
Py_CLEAR(po->it);
return NULL;
}
}
new = (*Py_TYPE(it)->tp_iternext)(it);
if (new == NULL) {
Py_CLEAR(po->it);
Py_CLEAR(po->old);
return NULL;
}
/* Future optimization: Reuse the result tuple as we do in enumerate() */
result = PyTuple_Pack(2, old, new);
Py_SETREF(po->old, new);
return result;
}
static PyTypeObject pairwise_type = {
PyVarObject_HEAD_INIT(&PyType_Type, 0)
"itertools.pairwise", /* tp_name */
sizeof(pairwiseobject), /* tp_basicsize */
0, /* tp_itemsize */
/* methods */
(destructor)pairwise_dealloc, /* tp_dealloc */
0, /* tp_vectorcall_offset */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_as_async */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
PyObject_GenericGetAttr, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
Py_TPFLAGS_BASETYPE, /* tp_flags */
pairwise_new__doc__, /* tp_doc */
(traverseproc)pairwise_traverse, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
PyObject_SelfIter, /* tp_iter */
(iternextfunc)pairwise_next, /* tp_iternext */
0, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
0, /* tp_init */
PyType_GenericAlloc, /* tp_alloc */
pairwise_new, /* tp_new */
PyObject_GC_Del, /* tp_free */
};
/* groupby object ************************************************************/
@ -4666,6 +4799,7 @@ groupby(iterable[, keyfunc]) --> sub-iterators grouped by value of keyfunc(v)\n\
filterfalse(pred, seq) --> elements of seq where pred(elem) is False\n\
islice(seq, [start,] stop [, step]) --> elements from\n\
seq[start:stop:step]\n\
pairwise(s) --> (s[0],s[1]), (s[1],s[2]), (s[2], s[3]), ...\n\
starmap(fun, seq) --> fun(*seq[0]), fun(*seq[1]), ...\n\
tee(it, n=2) --> (it1, it2 , ... itn) splits one iterator into n\n\
takewhile(pred, seq) --> seq[0], seq[1], until pred fails\n\
@ -4695,6 +4829,7 @@ itertoolsmodule_exec(PyObject *m)
&filterfalse_type,
&count_type,
&ziplongest_type,
&pairwise_type,
&permutations_type,
&product_type,
&repeat_type,