From cc061d0e6fb2569efa91531686f75b89e94ec865 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Mon, 30 Nov 2020 20:42:54 -0800 Subject: [PATCH] bpo-38200: Add itertools.pairwise() (GH-23549) --- Doc/library/itertools.rst | 23 ++- Lib/test/test_itertools.py | 52 ++++--- .../2020-11-28-22-52-57.bpo-38200.DuWGlW.rst | 1 + Modules/clinic/itertoolsmodule.c.h | 33 ++++- Modules/itertoolsmodule.c | 137 +++++++++++++++++- 5 files changed, 220 insertions(+), 26 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2020-11-28-22-52-57.bpo-38200.DuWGlW.rst diff --git a/Doc/library/itertools.rst b/Doc/library/itertools.rst index 3de66c93492..44728b42287 100644 --- a/Doc/library/itertools.rst +++ b/Doc/library/itertools.rst @@ -55,6 +55,7 @@ Iterator Arguments Results :func:`filterfalse` pred, seq elements of seq where pred(elem) is false ``filterfalse(lambda x: x%2, range(10)) --> 0 2 4 6 8`` :func:`groupby` iterable[, key] sub-iterators grouped by value of key(v) :func:`islice` seq, [start,] stop [, step] elements from seq[start:stop:step] ``islice('ABCDEFG', 2, None) --> C D E F G`` +:func:`pairwise` iterable (p[0], p[1]), (p[1], p[2]) ``pairwise('ABCDEFG') --> AB BC CD DE EF FG`` :func:`starmap` func, seq func(\*seq[0]), func(\*seq[1]), ... ``starmap(pow, [(2,5), (3,2), (10,3)]) --> 32 9 1000`` :func:`takewhile` pred, seq seq[0], seq[1], until pred fails ``takewhile(lambda x: x<5, [1,4,6,4,1]) --> 1 4`` :func:`tee` it, n it1, it2, ... itn splits one iterator into n @@ -475,6 +476,22 @@ loops that truncate the stream. If *start* is ``None``, then iteration starts at zero. If *step* is ``None``, then the step defaults to one. +.. function:: pairwise(iterable) + + Return successive overlapping pairs taken from the input *iterable*. + + The number of 2-tuples in the output iterator will be one fewer than the + number of inputs. It will be empty if the input iterable has fewer than + two values. + + Roughly equivalent to:: + + def pairwise(iterable): + # pairwise('ABCDEFG') --> AB BC CD DE EF FG + a, b = tee(iterable) + next(b, None) + return zip(a, b) + .. function:: permutations(iterable, r=None) @@ -782,12 +799,6 @@ which incur interpreter overhead. return starmap(func, repeat(args)) return starmap(func, repeat(args, times)) - def pairwise(iterable): - "s -> (s0,s1), (s1,s2), (s2, s3), ..." - a, b = tee(iterable) - next(b, None) - return zip(a, b) - def grouper(iterable, n, fillvalue=None): "Collect data into fixed-length chunks or blocks" # grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx" diff --git a/Lib/test/test_itertools.py b/Lib/test/test_itertools.py index 702cf082031..df2997e87d4 100644 --- a/Lib/test/test_itertools.py +++ b/Lib/test/test_itertools.py @@ -1024,6 +1024,25 @@ class TestBasicOps(unittest.TestCase): self.assertEqual(next(it), (1, 2)) self.assertRaises(RuntimeError, next, it) + def test_pairwise(self): + self.assertEqual(list(pairwise('')), []) + self.assertEqual(list(pairwise('a')), []) + self.assertEqual(list(pairwise('ab')), + [('a', 'b')]), + self.assertEqual(list(pairwise('abcde')), + [('a', 'b'), ('b', 'c'), ('c', 'd'), ('d', 'e')]) + self.assertEqual(list(pairwise(range(10_000))), + list(zip(range(10_000), range(1, 10_000)))) + + with self.assertRaises(TypeError): + pairwise() # too few arguments + with self.assertRaises(TypeError): + pairwise('abc', 10) # too many arguments + with self.assertRaises(TypeError): + pairwise(iterable='abc') # keyword arguments + with self.assertRaises(TypeError): + pairwise(None) # non-iterable argument + def test_product(self): for args, result in [ ([], [()]), # zero iterables @@ -1787,6 +1806,10 @@ class TestGC(unittest.TestCase): a = [] self.makecycle(islice([a]*2, None), a) + def test_pairwise(self): + a = [] + self.makecycle(pairwise([a]*5), a) + def test_permutations(self): a = [] self.makecycle(permutations([1,2,a,3], 3), a) @@ -1995,6 +2018,17 @@ class TestVariousIteratorArgs(unittest.TestCase): self.assertRaises(TypeError, islice, N(s), 10) self.assertRaises(ZeroDivisionError, list, islice(E(s), 10)) + def test_pairwise(self): + for s in ("123", "", range(1000), ('do', 1.2), range(2000,2200,5)): + for g in (G, I, Ig, S, L, R): + seq = list(g(s)) + expected = list(zip(seq, seq[1:])) + actual = list(pairwise(g(s))) + self.assertEqual(actual, expected) + self.assertRaises(TypeError, pairwise, X(s)) + self.assertRaises(TypeError, pairwise, N(s)) + self.assertRaises(ZeroDivisionError, list, pairwise(E(s))) + def test_starmap(self): for s in (range(10), range(0), range(100), (7,11), range(20,50,5)): for g in (G, I, Ig, S, L, R): @@ -2312,15 +2346,6 @@ Samuele ... else: ... return starmap(func, repeat(args, times)) ->>> def pairwise(iterable): -... "s -> (s0,s1), (s1,s2), (s2, s3), ..." -... a, b = tee(iterable) -... try: -... next(b) -... except StopIteration: -... pass -... return zip(a, b) - >>> def grouper(n, iterable, fillvalue=None): ... "grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx" ... args = [iter(iterable)] * n @@ -2451,15 +2476,6 @@ True >>> take(5, map(int, repeatfunc(random.random))) [0, 0, 0, 0, 0] ->>> list(pairwise('abcd')) -[('a', 'b'), ('b', 'c'), ('c', 'd')] - ->>> list(pairwise([])) -[] - ->>> list(pairwise('a')) -[] - >>> list(islice(pad_none('abc'), 0, 6)) ['a', 'b', 'c', None, None, None] diff --git a/Misc/NEWS.d/next/Library/2020-11-28-22-52-57.bpo-38200.DuWGlW.rst b/Misc/NEWS.d/next/Library/2020-11-28-22-52-57.bpo-38200.DuWGlW.rst new file mode 100644 index 00000000000..b4bc5551b25 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-11-28-22-52-57.bpo-38200.DuWGlW.rst @@ -0,0 +1 @@ +Added itertools.pairwise() diff --git a/Modules/clinic/itertoolsmodule.c.h b/Modules/clinic/itertoolsmodule.c.h index c1192bbcb0d..82729eeb56b 100644 --- a/Modules/clinic/itertoolsmodule.c.h +++ b/Modules/clinic/itertoolsmodule.c.h @@ -2,6 +2,37 @@ preserve [clinic start generated code]*/ +PyDoc_STRVAR(pairwise_new__doc__, +"pairwise(iterable, /)\n" +"--\n" +"\n" +"Return an iterator of overlapping pairs taken from the input iterator.\n" +"\n" +" s -> (s0,s1), (s1,s2), (s2, s3), ..."); + +static PyObject * +pairwise_new_impl(PyTypeObject *type, PyObject *iterable); + +static PyObject * +pairwise_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) +{ + PyObject *return_value = NULL; + PyObject *iterable; + + if ((type == &pairwise_type) && + !_PyArg_NoKeywords("pairwise", kwargs)) { + goto exit; + } + if (!_PyArg_CheckPositional("pairwise", PyTuple_GET_SIZE(args), 1, 1)) { + goto exit; + } + iterable = PyTuple_GET_ITEM(args, 0); + return_value = pairwise_new_impl(type, iterable); + +exit: + return return_value; +} + PyDoc_STRVAR(itertools_groupby__doc__, "groupby(iterable, key=None)\n" "--\n" @@ -627,4 +658,4 @@ skip_optional_pos: exit: return return_value; } -/*[clinic end generated code: output=d7f58dc477814b45 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=889c4afc3b13574f input=a9049054013a1b77]*/ diff --git a/Modules/itertoolsmodule.c b/Modules/itertoolsmodule.c index ce8b4347ef2..7144856c352 100644 --- a/Modules/itertoolsmodule.c +++ b/Modules/itertoolsmodule.c @@ -1,4 +1,5 @@ + #define PY_SSIZE_T_CLEAN #include "Python.h" #include "pycore_long.h" // _PyLong_GetZero() @@ -27,8 +28,9 @@ class itertools.accumulate "accumulateobject *" "&accumulate_type" class itertools.compress "compressobject *" "&compress_type" class itertools.filterfalse "filterfalseobject *" "&filterfalse_type" class itertools.count "countobject *" "&count_type" +class itertools.pairwise "pairwiseobject *" "&pairwise_type" [clinic start generated code]*/ -/*[clinic end generated code: output=da39a3ee5e6b4b0d input=ea05c93c6d94726a]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=6498ed21fbe1bf94]*/ static PyTypeObject groupby_type; static PyTypeObject _grouper_type; @@ -45,9 +47,140 @@ static PyTypeObject accumulate_type; static PyTypeObject compress_type; static PyTypeObject filterfalse_type; static PyTypeObject count_type; +static PyTypeObject pairwise_type; #include "clinic/itertoolsmodule.c.h" +/* pairwise object ***********************************************************/ + +typedef struct { + PyObject_HEAD + PyObject *it; + PyObject *old; +} pairwiseobject; + +/*[clinic input] +@classmethod +itertools.pairwise.__new__ as pairwise_new + iterable: object + / +Return an iterator of overlapping pairs taken from the input iterator. + + s -> (s0,s1), (s1,s2), (s2, s3), ... + +[clinic start generated code]*/ + +static PyObject * +pairwise_new_impl(PyTypeObject *type, PyObject *iterable) +/*[clinic end generated code: output=9f0267062d384456 input=6e7c3cddb431a8d6]*/ +{ + PyObject *it; + pairwiseobject *po; + + it = PyObject_GetIter(iterable); + if (it == NULL) { + return NULL; + } + po = (pairwiseobject *)type->tp_alloc(type, 0); + if (po == NULL) { + Py_DECREF(it); + return NULL; + } + po->it = it; + po->old = NULL; + return (PyObject *)po; +} + +static void +pairwise_dealloc(pairwiseobject *po) +{ + PyObject_GC_UnTrack(po); + Py_XDECREF(po->it); + Py_XDECREF(po->old); + Py_TYPE(po)->tp_free(po); +} + +static int +pairwise_traverse(pairwiseobject *po, visitproc visit, void *arg) +{ + Py_VISIT(po->it); + Py_VISIT(po->old); + return 0; +} + +static PyObject * +pairwise_next(pairwiseobject *po) +{ + PyObject *it = po->it; + PyObject *old = po->old; + PyObject *new, *result; + + if (it == NULL) { + return NULL; + } + if (old == NULL) { + po->old = old = (*Py_TYPE(it)->tp_iternext)(it); + if (old == NULL) { + Py_CLEAR(po->it); + return NULL; + } + } + new = (*Py_TYPE(it)->tp_iternext)(it); + if (new == NULL) { + Py_CLEAR(po->it); + Py_CLEAR(po->old); + return NULL; + } + /* Future optimization: Reuse the result tuple as we do in enumerate() */ + result = PyTuple_Pack(2, old, new); + Py_SETREF(po->old, new); + return result; +} + +static PyTypeObject pairwise_type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + "itertools.pairwise", /* tp_name */ + sizeof(pairwiseobject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)pairwise_dealloc, /* tp_dealloc */ + 0, /* tp_vectorcall_offset */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_as_async */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | + Py_TPFLAGS_BASETYPE, /* tp_flags */ + pairwise_new__doc__, /* tp_doc */ + (traverseproc)pairwise_traverse, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + PyObject_SelfIter, /* tp_iter */ + (iternextfunc)pairwise_next, /* tp_iternext */ + 0, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + PyType_GenericAlloc, /* tp_alloc */ + pairwise_new, /* tp_new */ + PyObject_GC_Del, /* tp_free */ +}; + /* groupby object ************************************************************/ @@ -4666,6 +4799,7 @@ groupby(iterable[, keyfunc]) --> sub-iterators grouped by value of keyfunc(v)\n\ filterfalse(pred, seq) --> elements of seq where pred(elem) is False\n\ islice(seq, [start,] stop [, step]) --> elements from\n\ seq[start:stop:step]\n\ +pairwise(s) --> (s[0],s[1]), (s[1],s[2]), (s[2], s[3]), ...\n\ starmap(fun, seq) --> fun(*seq[0]), fun(*seq[1]), ...\n\ tee(it, n=2) --> (it1, it2 , ... itn) splits one iterator into n\n\ takewhile(pred, seq) --> seq[0], seq[1], until pred fails\n\ @@ -4695,6 +4829,7 @@ itertoolsmodule_exec(PyObject *m) &filterfalse_type, &count_type, &ziplongest_type, + &pairwise_type, &permutations_type, &product_type, &repeat_type,