From 7d12d9df136b45785fb945dcb4812fdb068a2498 Mon Sep 17 00:00:00 2001 From: Stefan Krah Date: Sat, 28 Jul 2012 12:25:55 +0200 Subject: [PATCH] Issue #12834: Fix PyBuffer_ToContiguous() for non-contiguous arrays. --- Include/abstract.h | 5 +- Lib/test/test_buffer.py | 291 +++++++++++++++++++++++++++++++++++++- Misc/NEWS | 2 + Modules/_testbuffer.c | 44 ++++++ Objects/abstract.c | 56 -------- Objects/bytearrayobject.c | 2 +- Objects/bytesobject.c | 1 - Objects/memoryobject.c | 77 +++++++++- 8 files changed, 410 insertions(+), 68 deletions(-) diff --git a/Include/abstract.h b/Include/abstract.h index e879fa1b23a..44b5af72c9b 100644 --- a/Include/abstract.h +++ b/Include/abstract.h @@ -535,11 +535,12 @@ xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx*/ + /* Implementation in memoryobject.c */ PyAPI_FUNC(int) PyBuffer_ToContiguous(void *buf, Py_buffer *view, - Py_ssize_t len, char fort); + Py_ssize_t len, char order); PyAPI_FUNC(int) PyBuffer_FromContiguous(Py_buffer *view, void *buf, - Py_ssize_t len, char fort); + Py_ssize_t len, char order); /* Copy len bytes of data from the contiguous chunk of memory diff --git a/Lib/test/test_buffer.py b/Lib/test/test_buffer.py index b6cb3acdba0..c91f3c40a5b 100644 --- a/Lib/test/test_buffer.py +++ b/Lib/test/test_buffer.py @@ -53,6 +53,11 @@ NATIVE = { 'f':0, 'd':0, 'P':0 } +# NumPy does not have 'n' or 'N': +if numpy_array: + del NATIVE['n'] + del NATIVE['N'] + if struct: try: # Add "qQ" if present in native mode. @@ -855,11 +860,49 @@ class TestBufferProtocol(unittest.TestCase): is_contiguous(result, 'F') and order == 'C': # The flattened list is already in C-order. expected = ndarray(flattened, shape=shape, format=ff) - contig = get_contiguous(result, PyBUF_READ, order) + contig = get_contiguous(result, PyBUF_READ, order) self.assertEqual(contig.tobytes(), b) self.assertTrue(cmp_contig(contig, expected)) + if ndim == 0: + continue + + nmemb = len(flattened) + ro = 0 if readonly else ND_WRITABLE + + ### See comment in test_py_buffer_to_contiguous for an + ### explanation why these tests are valid. + + # To 'C' + contig = py_buffer_to_contiguous(result, 'C', PyBUF_FULL_RO) + self.assertEqual(len(contig), nmemb * itemsize) + initlst = [struct.unpack_from(fmt, contig, n*itemsize)[0] + for n in range(nmemb)] + + y = ndarray(initlst, shape=shape, flags=ro, format=fmt) + self.assertEqual(memoryview(y), memoryview(result)) + + # To 'F' + contig = py_buffer_to_contiguous(result, 'F', PyBUF_FULL_RO) + self.assertEqual(len(contig), nmemb * itemsize) + initlst = [struct.unpack_from(fmt, contig, n*itemsize)[0] + for n in range(nmemb)] + + y = ndarray(initlst, shape=shape, flags=ro|ND_FORTRAN, + format=fmt) + self.assertEqual(memoryview(y), memoryview(result)) + + # To 'A' + contig = py_buffer_to_contiguous(result, 'A', PyBUF_FULL_RO) + self.assertEqual(len(contig), nmemb * itemsize) + initlst = [struct.unpack_from(fmt, contig, n*itemsize)[0] + for n in range(nmemb)] + + f = ND_FORTRAN if is_contiguous(result, 'F') else 0 + y = ndarray(initlst, shape=shape, flags=f|ro, format=fmt) + self.assertEqual(memoryview(y), memoryview(result)) + if is_memoryview_format(fmt): try: m = memoryview(result) @@ -1805,6 +1848,9 @@ class TestBufferProtocol(unittest.TestCase): self.assertEqual(mvlist, ylist) if numpy_array: + # XXX NumPy (as far as it compiles with 3.3) currently + # segfaults here. Wait for a stable 3.3 compatible version. + continue shape = t[3] if 0 in shape: continue # http://projects.scipy.org/numpy/ticket/1910 @@ -1884,6 +1930,9 @@ class TestBufferProtocol(unittest.TestCase): self.assertEqual(mr.tolist(), yrlist) if numpy_array: + # XXX NumPy (as far as it compiles with 3.3) currently + # segfaults here. Wait for a stable 3.3 compatible version. + continue if 0 in lshape or 0 in rshape: continue # http://projects.scipy.org/numpy/ticket/1910 @@ -2020,6 +2069,246 @@ class TestBufferProtocol(unittest.TestCase): nd = ndarray(list(range(12)), shape=[2,2,3], format='L') self.assertEqual(hash(nd), hash(nd.tobytes())) + def test_py_buffer_to_contiguous(self): + + # The requests are used in _testbuffer.c:py_buffer_to_contiguous + # to generate buffers without full information for testing. + requests = ( + # distinct flags + PyBUF_INDIRECT, PyBUF_STRIDES, PyBUF_ND, PyBUF_SIMPLE, + # compound requests + PyBUF_FULL, PyBUF_FULL_RO, + PyBUF_RECORDS, PyBUF_RECORDS_RO, + PyBUF_STRIDED, PyBUF_STRIDED_RO, + PyBUF_CONTIG, PyBUF_CONTIG_RO, + ) + + # no buffer interface + self.assertRaises(TypeError, py_buffer_to_contiguous, {}, 'F', + PyBUF_FULL_RO) + + # scalar, read-only request + nd = ndarray(9, shape=(), format="L", flags=ND_WRITABLE) + for order in ['C', 'F', 'A']: + for request in requests: + b = py_buffer_to_contiguous(nd, order, request) + self.assertEqual(b, nd.tobytes()) + + # zeros in shape + nd = ndarray([1], shape=[0], format="L", flags=ND_WRITABLE) + for order in ['C', 'F', 'A']: + for request in requests: + b = py_buffer_to_contiguous(nd, order, request) + self.assertEqual(b, b'') + + nd = ndarray(list(range(8)), shape=[2, 0, 7], format="L", + flags=ND_WRITABLE) + for order in ['C', 'F', 'A']: + for request in requests: + b = py_buffer_to_contiguous(nd, order, request) + self.assertEqual(b, b'') + + ### One-dimensional arrays are trivial, since Fortran and C order + ### are the same. + + # one-dimensional + for f in [0, ND_FORTRAN]: + nd = ndarray([1], shape=[1], format="h", flags=f|ND_WRITABLE) + ndbytes = nd.tobytes() + for order in ['C', 'F', 'A']: + for request in requests: + b = py_buffer_to_contiguous(nd, order, request) + self.assertEqual(b, ndbytes) + + nd = ndarray([1, 2, 3], shape=[3], format="b", flags=f|ND_WRITABLE) + ndbytes = nd.tobytes() + for order in ['C', 'F', 'A']: + for request in requests: + b = py_buffer_to_contiguous(nd, order, request) + self.assertEqual(b, ndbytes) + + # one-dimensional, non-contiguous input + nd = ndarray([1, 2, 3], shape=[2], strides=[2], flags=ND_WRITABLE) + ndbytes = nd.tobytes() + for order in ['C', 'F', 'A']: + for request in [PyBUF_STRIDES, PyBUF_FULL]: + b = py_buffer_to_contiguous(nd, order, request) + self.assertEqual(b, ndbytes) + + nd = nd[::-1] + ndbytes = nd.tobytes() + for order in ['C', 'F', 'A']: + for request in requests: + try: + b = py_buffer_to_contiguous(nd, order, request) + except BufferError: + continue + self.assertEqual(b, ndbytes) + + ### + ### Multi-dimensional arrays: + ### + ### The goal here is to preserve the logical representation of the + ### input array but change the physical representation if necessary. + ### + ### _testbuffer example: + ### ==================== + ### + ### C input array: + ### -------------- + ### >>> nd = ndarray(list(range(12)), shape=[3, 4]) + ### >>> nd.tolist() + ### [[0, 1, 2, 3], + ### [4, 5, 6, 7], + ### [8, 9, 10, 11]] + ### + ### Fortran output: + ### --------------- + ### >>> py_buffer_to_contiguous(nd, 'F', PyBUF_FULL_RO) + ### >>> b'\x00\x04\x08\x01\x05\t\x02\x06\n\x03\x07\x0b' + ### + ### The return value corresponds to this input list for + ### _testbuffer's ndarray: + ### >>> nd = ndarray([0,4,8,1,5,9,2,6,10,3,7,11], shape=[3,4], + ### flags=ND_FORTRAN) + ### >>> nd.tolist() + ### [[0, 1, 2, 3], + ### [4, 5, 6, 7], + ### [8, 9, 10, 11]] + ### + ### The logical array is the same, but the values in memory are now + ### in Fortran order. + ### + ### NumPy example: + ### ============== + ### _testbuffer's ndarray takes lists to initialize the memory. + ### Here's the same sequence in NumPy: + ### + ### C input: + ### -------- + ### >>> nd = ndarray(buffer=bytearray(list(range(12))), + ### shape=[3, 4], dtype='B') + ### >>> nd + ### array([[ 0, 1, 2, 3], + ### [ 4, 5, 6, 7], + ### [ 8, 9, 10, 11]], dtype=uint8) + ### + ### Fortran output: + ### --------------- + ### >>> fortran_buf = nd.tostring(order='F') + ### >>> fortran_buf + ### b'\x00\x04\x08\x01\x05\t\x02\x06\n\x03\x07\x0b' + ### + ### >>> nd = ndarray(buffer=fortran_buf, shape=[3, 4], + ### dtype='B', order='F') + ### + ### >>> nd + ### array([[ 0, 1, 2, 3], + ### [ 4, 5, 6, 7], + ### [ 8, 9, 10, 11]], dtype=uint8) + ### + + # multi-dimensional, contiguous input + lst = list(range(12)) + for f in [0, ND_FORTRAN]: + nd = ndarray(lst, shape=[3, 4], flags=f|ND_WRITABLE) + if numpy_array: + na = numpy_array(buffer=bytearray(lst), + shape=[3, 4], dtype='B', + order='C' if f == 0 else 'F') + + # 'C' request + if f == ND_FORTRAN: # 'F' to 'C' + x = ndarray(transpose(lst, [4, 3]), shape=[3, 4], + flags=ND_WRITABLE) + expected = x.tobytes() + else: + expected = nd.tobytes() + for request in requests: + try: + b = py_buffer_to_contiguous(nd, 'C', request) + except BufferError: + continue + + self.assertEqual(b, expected) + + # Check that output can be used as the basis for constructing + # a C array that is logically identical to the input array. + y = ndarray([v for v in b], shape=[3, 4], flags=ND_WRITABLE) + self.assertEqual(memoryview(y), memoryview(nd)) + + if numpy_array: + self.assertEqual(b, na.tostring(order='C')) + + # 'F' request + if f == 0: # 'C' to 'F' + x = ndarray(transpose(lst, [3, 4]), shape=[4, 3], + flags=ND_WRITABLE) + else: + x = ndarray(lst, shape=[3, 4], flags=ND_WRITABLE) + expected = x.tobytes() + for request in [PyBUF_FULL, PyBUF_FULL_RO, PyBUF_INDIRECT, + PyBUF_STRIDES, PyBUF_ND]: + try: + b = py_buffer_to_contiguous(nd, 'F', request) + except BufferError: + continue + self.assertEqual(b, expected) + + # Check that output can be used as the basis for constructing + # a Fortran array that is logically identical to the input array. + y = ndarray([v for v in b], shape=[3, 4], flags=ND_FORTRAN|ND_WRITABLE) + self.assertEqual(memoryview(y), memoryview(nd)) + + if numpy_array: + self.assertEqual(b, na.tostring(order='F')) + + # 'A' request + if f == ND_FORTRAN: + x = ndarray(lst, shape=[3, 4], flags=ND_WRITABLE) + expected = x.tobytes() + else: + expected = nd.tobytes() + for request in [PyBUF_FULL, PyBUF_FULL_RO, PyBUF_INDIRECT, + PyBUF_STRIDES, PyBUF_ND]: + try: + b = py_buffer_to_contiguous(nd, 'A', request) + except BufferError: + continue + + self.assertEqual(b, expected) + + # Check that output can be used as the basis for constructing + # an array with order=f that is logically identical to the input + # array. + y = ndarray([v for v in b], shape=[3, 4], flags=f|ND_WRITABLE) + self.assertEqual(memoryview(y), memoryview(nd)) + + if numpy_array: + self.assertEqual(b, na.tostring(order='A')) + + # multi-dimensional, non-contiguous input + nd = ndarray(list(range(12)), shape=[3, 4], flags=ND_WRITABLE|ND_PIL) + + # 'C' + b = py_buffer_to_contiguous(nd, 'C', PyBUF_FULL_RO) + self.assertEqual(b, nd.tobytes()) + y = ndarray([v for v in b], shape=[3, 4], flags=ND_WRITABLE) + self.assertEqual(memoryview(y), memoryview(nd)) + + # 'F' + b = py_buffer_to_contiguous(nd, 'F', PyBUF_FULL_RO) + x = ndarray(transpose(lst, [3, 4]), shape=[4, 3], flags=ND_WRITABLE) + self.assertEqual(b, x.tobytes()) + y = ndarray([v for v in b], shape=[3, 4], flags=ND_FORTRAN|ND_WRITABLE) + self.assertEqual(memoryview(y), memoryview(nd)) + + # 'A' + b = py_buffer_to_contiguous(nd, 'A', PyBUF_FULL_RO) + self.assertEqual(b, nd.tobytes()) + y = ndarray([v for v in b], shape=[3, 4], flags=ND_WRITABLE) + self.assertEqual(memoryview(y), memoryview(nd)) + def test_memoryview_construction(self): items_shape = [(9, []), ([1,2,3], [3]), (list(range(2*3*5)), [2,3,5])] diff --git a/Misc/NEWS b/Misc/NEWS index cf0d170b7fd..cabea97150d 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,8 @@ What's New in Python 3.3.0 Beta 2? Core and Builtins ----------------- +- Issue #12834: Fix PyBuffer_ToContiguous() for non-contiguous arrays. + - Issue #15456: Fix code __sizeof__ after #12399 change. Patch by Serhiy Storchaka. diff --git a/Modules/_testbuffer.c b/Modules/_testbuffer.c index 1ff685c1762..b291a14e209 100644 --- a/Modules/_testbuffer.c +++ b/Modules/_testbuffer.c @@ -2397,6 +2397,49 @@ get_contiguous(PyObject *self, PyObject *args) return PyMemoryView_GetContiguous(obj, (int)type, ord); } +/* PyBuffer_ToContiguous() */ +static PyObject * +py_buffer_to_contiguous(PyObject *self, PyObject *args) +{ + PyObject *obj; + PyObject *order; + PyObject *ret = NULL; + int flags; + char ord; + Py_buffer view; + char *buf = NULL; + + if (!PyArg_ParseTuple(args, "OOi", &obj, &order, &flags)) { + return NULL; + } + + if (PyObject_GetBuffer(obj, &view, flags) < 0) { + return NULL; + } + + ord = get_ascii_order(order); + if (ord == CHAR_MAX) { + goto out; + } + + buf = PyMem_Malloc(view.len); + if (buf == NULL) { + PyErr_NoMemory(); + goto out; + } + + if (PyBuffer_ToContiguous(buf, &view, view.len, ord) < 0) { + goto out; + } + + ret = PyBytes_FromStringAndSize(buf, view.len); + +out: + PyBuffer_Release(&view); + PyMem_XFree(buf); + return ret; +} + static int fmtcmp(const char *fmt1, const char *fmt2) { @@ -2734,6 +2777,7 @@ static struct PyMethodDef _testbuffer_functions[] = { {"get_pointer", get_pointer, METH_VARARGS, NULL}, {"get_sizeof_void_p", (PyCFunction)get_sizeof_void_p, METH_NOARGS, NULL}, {"get_contiguous", get_contiguous, METH_VARARGS, NULL}, + {"py_buffer_to_contiguous", py_buffer_to_contiguous, METH_VARARGS, NULL}, {"is_contiguous", is_contiguous, METH_VARARGS, NULL}, {"cmp_contig", cmp_contig, METH_VARARGS, NULL}, {NULL, NULL} diff --git a/Objects/abstract.c b/Objects/abstract.c index 42cd16943c3..aa43b723297 100644 --- a/Objects/abstract.c +++ b/Objects/abstract.c @@ -445,62 +445,6 @@ _Py_add_one_to_index_C(int nd, Py_ssize_t *index, const Py_ssize_t *shape) } } - /* view is not checked for consistency in either of these. It is - assumed that the size of the buffer is view->len in - view->len / view->itemsize elements. - */ - -int -PyBuffer_ToContiguous(void *buf, Py_buffer *view, Py_ssize_t len, char fort) -{ - int k; - void (*addone)(int, Py_ssize_t *, const Py_ssize_t *); - Py_ssize_t *indices, elements; - char *dest, *ptr; - - if (len > view->len) { - len = view->len; - } - - if (PyBuffer_IsContiguous(view, fort)) { - /* simplest copy is all that is needed */ - memcpy(buf, view->buf, len); - return 0; - } - - /* Otherwise a more elaborate scheme is needed */ - - /* XXX(nnorwitz): need to check for overflow! */ - indices = (Py_ssize_t *)PyMem_Malloc(sizeof(Py_ssize_t)*(view->ndim)); - if (indices == NULL) { - PyErr_NoMemory(); - return -1; - } - for (k=0; kndim;k++) { - indices[k] = 0; - } - - if (fort == 'F') { - addone = _Py_add_one_to_index_F; - } - else { - addone = _Py_add_one_to_index_C; - } - dest = buf; - /* XXX : This is not going to be the fastest code in the world - several optimizations are possible. - */ - elements = len / view->itemsize; - while (elements--) { - addone(view->ndim, indices, view->shape); - ptr = PyBuffer_GetPointer(view, indices); - memcpy(dest, ptr, view->itemsize); - dest += view->itemsize; - } - PyMem_Free(indices); - return 0; -} - int PyBuffer_FromContiguous(Py_buffer *view, void *buf, Py_ssize_t len, char fort) { diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c index 5b7083d6838..2bb3a29daaa 100644 --- a/Objects/bytearrayobject.c +++ b/Objects/bytearrayobject.c @@ -789,7 +789,7 @@ bytearray_init(PyByteArrayObject *self, PyObject *args, PyObject *kwds) size = view.len; if (PyByteArray_Resize((PyObject *)self, size) < 0) goto fail; if (PyBuffer_ToContiguous(self->ob_bytes, &view, size, 'C') < 0) - goto fail; + goto fail; PyBuffer_Release(&view); return 0; fail: diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 14bd8e68c56..bf9259f187a 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -2591,7 +2591,6 @@ PyBytes_FromObject(PyObject *x) new = PyBytes_FromStringAndSize(NULL, view.len); if (!new) goto fail; - /* XXX(brett.cannon): Better way to get to internal buffer? */ if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval, &view, view.len, 'C') < 0) goto fail; diff --git a/Objects/memoryobject.c b/Objects/memoryobject.c index 62427d4b863..c7185f034dc 100644 --- a/Objects/memoryobject.c +++ b/Objects/memoryobject.c @@ -438,15 +438,17 @@ init_fortran_strides_from_shape(Py_buffer *view) view->strides[i] = view->strides[i-1] * view->shape[i-1]; } -/* Copy src to a C-contiguous representation. Assumptions: +/* Copy src to a contiguous representation. order is one of 'C', 'F' (Fortran) + or 'A' (Any). Assumptions: src has PyBUF_FULL information, src->ndim >= 1, len(mem) == src->len. */ static int -buffer_to_c_contiguous(char *mem, Py_buffer *src) +buffer_to_contiguous(char *mem, Py_buffer *src, char order) { Py_buffer dest; Py_ssize_t *strides; int ret; + assert(src->ndim >= 1); assert(src->shape != NULL); assert(src->strides != NULL); @@ -456,12 +458,22 @@ buffer_to_c_contiguous(char *mem, Py_buffer *src) return -1; } - /* initialize dest as a C-contiguous buffer */ + /* initialize dest */ dest = *src; dest.buf = mem; - /* shape is constant and shared */ + /* shape is constant and shared: the logical representation of the + array is unaltered. */ + + /* The physical representation determined by strides (and possibly + suboffsets) may change. */ dest.strides = strides; - init_strides_from_shape(&dest); + if (order == 'C' || order == 'A') { + init_strides_from_shape(&dest); + } + else { + init_fortran_strides_from_shape(&dest); + } + dest.suboffsets = NULL; ret = copy_buffer(&dest, src); @@ -921,6 +933,57 @@ memory_new(PyTypeObject *subtype, PyObject *args, PyObject *kwds) } +/****************************************************************************/ +/* Previously in abstract.c */ +/****************************************************************************/ + +typedef struct { + Py_buffer view; + Py_ssize_t array[1]; +} Py_buffer_full; + +int +PyBuffer_ToContiguous(void *buf, Py_buffer *src, Py_ssize_t len, char order) +{ + Py_buffer_full *fb = NULL; + int ret; + + assert(order == 'C' || order == 'F' || order == 'A'); + + if (len != src->len) { + PyErr_SetString(PyExc_ValueError, + "PyBuffer_ToContiguous: len != view->len"); + return -1; + } + + if (PyBuffer_IsContiguous(src, order)) { + memcpy((char *)buf, src->buf, len); + return 0; + } + + /* buffer_to_contiguous() assumes PyBUF_FULL */ + fb = PyMem_Malloc(sizeof *fb + 3 * src->ndim * (sizeof *fb->array)); + if (fb == NULL) { + PyErr_NoMemory(); + return -1; + } + fb->view.ndim = src->ndim; + fb->view.shape = fb->array; + fb->view.strides = fb->array + src->ndim; + fb->view.suboffsets = fb->array + 2 * src->ndim; + + init_shared_values(&fb->view, src); + init_shape_strides(&fb->view, src); + init_suboffsets(&fb->view, src); + + src = &fb->view; + + ret = buffer_to_contiguous(buf, src, order); + PyMem_Free(fb); + return ret; +} + + /****************************************************************************/ /* Release/GC management */ /****************************************************************************/ @@ -1889,7 +1952,7 @@ memory_tobytes(PyMemoryViewObject *self, PyObject *dummy) if (bytes == NULL) return NULL; - if (buffer_to_c_contiguous(PyBytes_AS_STRING(bytes), src) < 0) { + if (buffer_to_contiguous(PyBytes_AS_STRING(bytes), src, 'C') < 0) { Py_DECREF(bytes); return NULL; } @@ -2423,7 +2486,7 @@ memory_hash(PyMemoryViewObject *self) PyErr_NoMemory(); return -1; } - if (buffer_to_c_contiguous(mem, view) < 0) { + if (buffer_to_contiguous(mem, view, 'C') < 0) { PyMem_Free(mem); return -1; }