From 9f146819598013046dfb4d24ef4f2a748e6c5930 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Sat, 27 Apr 2013 00:20:04 +0200 Subject: [PATCH] Issue #17804: New function ``struct.iter_unpack`` allows for streaming struct unpacking. --- Doc/library/struct.rst | 20 +++++ Lib/struct.py | 1 + Lib/test/test_struct.py | 74 +++++++++++++++++- Misc/NEWS | 3 + Modules/_struct.c | 165 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 262 insertions(+), 1 deletion(-) diff --git a/Doc/library/struct.rst b/Doc/library/struct.rst index 994506c2fe7..f2ea3619294 100644 --- a/Doc/library/struct.rst +++ b/Doc/library/struct.rst @@ -66,6 +66,19 @@ The module defines the following exception and functions: format (``len(buffer[offset:])`` must be at least ``calcsize(fmt)``). +.. function:: iter_unpack(fmt, buffer) + + Iteratively unpack from the buffer *buffer* according to the format + string *fmt*. This function returns an iterator which will read + equally-sized chunks from the buffer until all its contents have been + consumed. The buffer's size in bytes must be a multiple of the amount + of data required by the format, as reflected by :func:`calcsize`. + + Each iteration yields a tuple as specified by the format string. + + .. versionadded:: 3.4 + + .. function:: calcsize(fmt) Return the size of the struct (and hence of the bytes object produced by @@ -388,6 +401,13 @@ The :mod:`struct` module also defines the following type: (``len(buffer[offset:])`` must be at least :attr:`self.size`). + .. method:: iter_unpack(buffer) + + Identical to the :func:`iter_unpack` function, using the compiled format. + (``len(buffer)`` must be a multiple of :attr:`self.size`). + + .. versionadded:: 3.4 + .. attribute:: format The format string used to construct this Struct object. diff --git a/Lib/struct.py b/Lib/struct.py index 9bfc23f8d5b..d6bba588636 100644 --- a/Lib/struct.py +++ b/Lib/struct.py @@ -1,6 +1,7 @@ __all__ = [ # Functions 'calcsize', 'pack', 'pack_into', 'unpack', 'unpack_from', + 'iter_unpack', # Classes 'Struct', diff --git a/Lib/test/test_struct.py b/Lib/test/test_struct.py index eb97a2cc7a8..8ffa7e6144a 100644 --- a/Lib/test/test_struct.py +++ b/Lib/test/test_struct.py @@ -1,4 +1,6 @@ +from collections import abc import array +import operator import unittest import struct import sys @@ -593,8 +595,78 @@ class StructTest(unittest.TestCase): self.check_sizeof('0s', 1) self.check_sizeof('0c', 0) + +class UnpackIteratorTest(unittest.TestCase): + """ + Tests for iterative unpacking (struct.Struct.iter_unpack). + """ + + def test_construct(self): + def _check_iterator(it): + self.assertIsInstance(it, abc.Iterator) + self.assertIsInstance(it, abc.Iterable) + s = struct.Struct('>ibcp') + it = s.iter_unpack(b"") + _check_iterator(it) + it = s.iter_unpack(b"1234567") + _check_iterator(it) + # Wrong bytes length + with self.assertRaises(struct.error): + s.iter_unpack(b"123456") + with self.assertRaises(struct.error): + s.iter_unpack(b"12345678") + # Zero-length struct + s = struct.Struct('>') + with self.assertRaises(struct.error): + s.iter_unpack(b"") + with self.assertRaises(struct.error): + s.iter_unpack(b"12") + + def test_iterate(self): + s = struct.Struct('>IB') + b = bytes(range(1, 16)) + it = s.iter_unpack(b) + self.assertEqual(next(it), (0x01020304, 5)) + self.assertEqual(next(it), (0x06070809, 10)) + self.assertEqual(next(it), (0x0b0c0d0e, 15)) + self.assertRaises(StopIteration, next, it) + self.assertRaises(StopIteration, next, it) + + def test_arbitrary_buffer(self): + s = struct.Struct('>IB') + b = bytes(range(1, 11)) + it = s.iter_unpack(memoryview(b)) + self.assertEqual(next(it), (0x01020304, 5)) + self.assertEqual(next(it), (0x06070809, 10)) + self.assertRaises(StopIteration, next, it) + self.assertRaises(StopIteration, next, it) + + def test_length_hint(self): + lh = operator.length_hint + s = struct.Struct('>IB') + b = bytes(range(1, 16)) + it = s.iter_unpack(b) + self.assertEqual(lh(it), 3) + next(it) + self.assertEqual(lh(it), 2) + next(it) + self.assertEqual(lh(it), 1) + next(it) + self.assertEqual(lh(it), 0) + self.assertRaises(StopIteration, next, it) + self.assertEqual(lh(it), 0) + + def test_module_func(self): + # Sanity check for the global struct.iter_unpack() + it = struct.iter_unpack('>IB', bytes(range(1, 11))) + self.assertEqual(next(it), (0x01020304, 5)) + self.assertEqual(next(it), (0x06070809, 10)) + self.assertRaises(StopIteration, next, it) + self.assertRaises(StopIteration, next, it) + + def test_main(): - support.run_unittest(StructTest) + support.run_unittest(__name__) if __name__ == '__main__': test_main() diff --git a/Misc/NEWS b/Misc/NEWS index 273f11c92ba..5c1016df62e 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -49,6 +49,9 @@ Core and Builtins Library ------- +- Issue #17804: New function ``struct.iter_unpack`` allows for streaming + struct unpacking. + - Issue #17830: When keyword.py is used to update a keyword file, it now preserves the line endings of the original file. diff --git a/Modules/_struct.c b/Modules/_struct.c index 208559cf1c2..2dec4ed6d74 100644 --- a/Modules/_struct.c +++ b/Modules/_struct.c @@ -1247,6 +1247,9 @@ align(Py_ssize_t size, char c, const formatdef *e) return size; } +/* + * Struct object implementation. + */ /* calculate the size of a format string */ @@ -1556,6 +1559,142 @@ s_unpack_from(PyObject *self, PyObject *args, PyObject *kwds) } +/* Unpack iterator type */ + +typedef struct { + PyObject_HEAD + PyStructObject *so; + Py_buffer buf; + Py_ssize_t index; +} unpackiterobject; + +static void +unpackiter_dealloc(unpackiterobject *self) +{ + Py_XDECREF(self->so); + PyBuffer_Release(&self->buf); + PyObject_GC_Del(self); +} + +static int +unpackiter_traverse(unpackiterobject *self, visitproc visit, void *arg) +{ + Py_VISIT(self->so); + Py_VISIT(self->buf.obj); + return 0; +} + +static PyObject * +unpackiter_len(unpackiterobject *self) +{ + Py_ssize_t len; + if (self->so == NULL) + len = 0; + else + len = (self->buf.len - self->index) / self->so->s_size; + return PyLong_FromSsize_t(len); +} + +static PyMethodDef unpackiter_methods[] = { + {"__length_hint__", (PyCFunction) unpackiter_len, METH_NOARGS, NULL}, + {NULL, NULL} /* sentinel */ +}; + +static PyObject * +unpackiter_iternext(unpackiterobject *self) +{ + PyObject *result; + if (self->so == NULL) + return NULL; + if (self->index >= self->buf.len) { + /* Iterator exhausted */ + Py_CLEAR(self->so); + PyBuffer_Release(&self->buf); + return NULL; + } + assert(self->index + self->so->s_size <= self->buf.len); + result = s_unpack_internal(self->so, + (char*) self->buf.buf + self->index); + self->index += self->so->s_size; + return result; +} + +PyTypeObject unpackiter_type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + "unpack_iterator", /* tp_name */ + sizeof(unpackiterobject), /* tp_basicsize */ + 0, /* tp_itemsize */ + (destructor)unpackiter_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_reserved */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ + 0, /* tp_doc */ + (traverseproc)unpackiter_traverse, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + PyObject_SelfIter, /* tp_iter */ + (iternextfunc)unpackiter_iternext, /* tp_iternext */ + unpackiter_methods /* tp_methods */ +}; + +PyDoc_STRVAR(s_iter_unpack__doc__, +"S.iter_unpack(buffer) -> iterator(v1, v2, ...)\n\ +\n\ +Return an iterator yielding tuples unpacked from the given bytes\n\ +source, like a repeated invocation of unpack_from(). Requires\n\ +that the bytes length be a multiple of the struct size."); + +static PyObject * +s_iter_unpack(PyObject *_so, PyObject *input) +{ + PyStructObject *so = (PyStructObject *) _so; + unpackiterobject *self; + + assert(PyStruct_Check(_so)); + assert(so->s_codes != NULL); + + if (so->s_size == 0) { + PyErr_Format(StructError, + "cannot iteratively unpack with a struct of length 0"); + return NULL; + } + + self = (unpackiterobject *) PyType_GenericAlloc(&unpackiter_type, 0); + if (self == NULL) + return NULL; + + if (PyObject_GetBuffer(input, &self->buf, PyBUF_SIMPLE) < 0) { + Py_DECREF(self); + return NULL; + } + if (self->buf.len % so->s_size != 0) { + PyErr_Format(StructError, + "iterative unpacking requires a bytes length " + "multiple of %zd", + so->s_size); + Py_DECREF(self); + return NULL; + } + Py_INCREF(so); + self->so = so; + self->index = 0; + return (PyObject *) self; +} + + /* * Guts of the pack function. * @@ -1776,6 +1915,7 @@ s_sizeof(PyStructObject *self, void *unused) /* List of functions */ static struct PyMethodDef s_methods[] = { + {"iter_unpack", s_iter_unpack, METH_O, s_iter_unpack__doc__}, {"pack", s_pack, METH_VARARGS, s_pack__doc__}, {"pack_into", s_pack_into, METH_VARARGS, s_pack_into__doc__}, {"unpack", s_unpack, METH_O, s_unpack__doc__}, @@ -2025,9 +2165,34 @@ unpack_from(PyObject *self, PyObject *args, PyObject *kwds) return result; } +PyDoc_STRVAR(iter_unpack_doc, +"iter_unpack(fmt, buffer) -> iterator(v1, v2, ...)\n\ +\n\ +Return an iterator yielding tuples unpacked from the given bytes\n\ +source according to the format string, like a repeated invocation of\n\ +unpack_from(). Requires that the bytes length be a multiple of the\n\ +format struct size."); + +static PyObject * +iter_unpack(PyObject *self, PyObject *args) +{ + PyObject *s_object, *fmt, *input, *result; + + if (!PyArg_ParseTuple(args, "OO:iter_unpack", &fmt, &input)) + return NULL; + + s_object = cache_struct(fmt); + if (s_object == NULL) + return NULL; + result = s_iter_unpack(s_object, input); + Py_DECREF(s_object); + return result; +} + static struct PyMethodDef module_functions[] = { {"_clearcache", (PyCFunction)clearcache, METH_NOARGS, clearcache_doc}, {"calcsize", calcsize, METH_O, calcsize_doc}, + {"iter_unpack", iter_unpack, METH_VARARGS, iter_unpack_doc}, {"pack", pack, METH_VARARGS, pack_doc}, {"pack_into", pack_into, METH_VARARGS, pack_into_doc}, {"unpack", unpack, METH_VARARGS, unpack_doc},