Issue #5506: BytesIO objects now have a getbuffer() method exporting a

view of their contents without duplicating them.  The view is both readable
and writable.
This commit is contained in:
Antoine Pitrou 2010-09-06 18:48:21 +00:00
parent 6285774f06
commit 972ee13e03
7 changed files with 197 additions and 1 deletions

View File

@ -518,6 +518,24 @@ In many situations, buffered I/O streams will provide higher performance
:class:`BytesIO` provides or overrides these methods in addition to those
from :class:`BufferedIOBase` and :class:`IOBase`:
.. method:: getbuffer()
Return a readable and writable view over the contents of the buffer
without copying them. Also, mutating the view will transparently
update the contents of the buffer::
>>> b = io.BytesIO(b"abcdef")
>>> view = b.getbuffer()
>>> view[2:4] = b"56"
>>> b.getvalue()
b'ab56ef'
.. note::
As long as the view exists, the :class:`BytesIO` object cannot be
resized.
.. versionadded:: 3.2
.. method:: getvalue()
Return ``bytes`` containing the entire contents of the buffer.

View File

@ -785,6 +785,11 @@ class BytesIO(BufferedIOBase):
raise ValueError("getvalue on closed file")
return bytes(self._buffer)
def getbuffer(self):
"""Return a readable and writable view of the buffer.
"""
return memoryview(self._buffer)
def read(self, n=None):
if self.closed:
raise ValueError("read from closed file")

View File

@ -384,7 +384,31 @@ class MemoryTestMixin:
del __main__.PickleTestMemIO
class PyBytesIOTest(MemoryTestMixin, MemorySeekTestMixin, unittest.TestCase):
class BytesIOMixin:
def test_getbuffer(self):
memio = self.ioclass(b"1234567890")
buf = memio.getbuffer()
self.assertEqual(bytes(buf), b"1234567890")
memio.seek(5)
buf = memio.getbuffer()
self.assertEqual(bytes(buf), b"1234567890")
# Trying to change the size of the BytesIO while a buffer is exported
# raises a BufferError.
self.assertRaises(BufferError, memio.write, b'x' * 100)
self.assertRaises(BufferError, memio.truncate)
# Mutating the buffer updates the BytesIO
buf[3:6] = b"abc"
self.assertEqual(bytes(buf), b"123abc7890")
self.assertEqual(memio.getvalue(), b"123abc7890")
# After the buffer gets released, we can resize the BytesIO again
del buf
support.gc_collect()
memio.truncate()
class PyBytesIOTest(MemoryTestMixin, MemorySeekTestMixin,
BytesIOMixin, unittest.TestCase):
UnsupportedOperation = pyio.UnsupportedOperation

View File

@ -13,6 +13,10 @@ Core and Builtins
Library
-------
- Issue #5506: BytesIO objects now have a getbuffer() method exporting a
view of their contents without duplicating them. The view is both readable
and writable.
- Issue #7566: Implement os.path.sameopenfile for Windows.
- Issue #9293: I/O streams now raise ``io.UnsupportedOperation`` when an

View File

@ -710,6 +710,8 @@ PyInit__io(void)
/* BytesIO */
PyBytesIO_Type.tp_base = &PyBufferedIOBase_Type;
ADD_TYPE(&PyBytesIO_Type, "BytesIO");
if (PyType_Ready(&_PyBytesIOBuffer_Type) < 0)
goto fail;
/* StringIO */
PyStringIO_Type.tp_base = &PyTextIOBase_Type;

View File

@ -169,3 +169,5 @@ extern PyObject *_PyIO_str_write;
extern PyObject *_PyIO_empty_str;
extern PyObject *_PyIO_empty_bytes;
extern PyObject *_PyIO_zero;
extern PyTypeObject _PyBytesIOBuffer_Type;

View File

@ -10,8 +10,15 @@ typedef struct {
size_t buf_size;
PyObject *dict;
PyObject *weakreflist;
Py_ssize_t exports;
} bytesio;
typedef struct {
PyObject_HEAD
bytesio *source;
} bytesiobuf;
#define CHECK_CLOSED(self) \
if ((self)->buf == NULL) { \
PyErr_SetString(PyExc_ValueError, \
@ -19,6 +26,14 @@ typedef struct {
return NULL; \
}
#define CHECK_EXPORTS(self) \
if ((self)->exports > 0) { \
PyErr_SetString(PyExc_BufferError, \
"Existing exports of data: object cannot be re-sized"); \
return NULL; \
}
/* Internal routine to get a line from the buffer of a BytesIO
object. Returns the length between the current position to the
next newline character. */
@ -173,6 +188,30 @@ bytesio_flush(bytesio *self)
Py_RETURN_NONE;
}
PyDoc_STRVAR(getbuffer_doc,
"getbuffer() -> bytes.\n"
"\n"
"Get a read-write view over the contents of the BytesIO object.");
static PyObject *
bytesio_getbuffer(bytesio *self)
{
PyTypeObject *type = &_PyBytesIOBuffer_Type;
bytesiobuf *buf;
PyObject *view;
CHECK_CLOSED(self);
buf = (bytesiobuf *) type->tp_alloc(type, 0);
if (buf == NULL)
return NULL;
Py_INCREF(self);
buf->source = self;
view = PyMemoryView_FromObject((PyObject *) buf);
Py_DECREF(buf);
return view;
}
PyDoc_STRVAR(getval_doc,
"getvalue() -> bytes.\n"
"\n"
@ -422,6 +461,7 @@ bytesio_truncate(bytesio *self, PyObject *args)
PyObject *arg = Py_None;
CHECK_CLOSED(self);
CHECK_EXPORTS(self);
if (!PyArg_ParseTuple(args, "|O:truncate", &arg))
return NULL;
@ -543,6 +583,7 @@ bytesio_write(bytesio *self, PyObject *obj)
PyObject *result = NULL;
CHECK_CLOSED(self);
CHECK_EXPORTS(self);
if (PyObject_GetBuffer(obj, &buf, PyBUF_CONTIG_RO) < 0)
return NULL;
@ -664,6 +705,7 @@ bytesio_setstate(bytesio *self, PyObject *state)
Py_TYPE(self)->tp_name, Py_TYPE(state)->tp_name);
return NULL;
}
CHECK_EXPORTS(self);
/* Reset the object to its default state. This is only needed to handle
the case of repeated calls to __setstate__. */
self->string_size = 0;
@ -724,6 +766,11 @@ static void
bytesio_dealloc(bytesio *self)
{
_PyObject_GC_UNTRACK(self);
if (self->exports > 0) {
PyErr_SetString(PyExc_SystemError,
"deallocated BytesIO object has exported buffers");
PyErr_Print();
}
if (self->buf != NULL) {
PyMem_Free(self->buf);
self->buf = NULL;
@ -818,6 +865,7 @@ static struct PyMethodDef bytesio_methods[] = {
{"readline", (PyCFunction)bytesio_readline, METH_VARARGS, readline_doc},
{"readlines", (PyCFunction)bytesio_readlines, METH_VARARGS, readlines_doc},
{"read", (PyCFunction)bytesio_read, METH_VARARGS, read_doc},
{"getbuffer", (PyCFunction)bytesio_getbuffer, METH_NOARGS, getbuffer_doc},
{"getvalue", (PyCFunction)bytesio_getvalue, METH_NOARGS, getval_doc},
{"seek", (PyCFunction)bytesio_seek, METH_VARARGS, seek_doc},
{"truncate", (PyCFunction)bytesio_truncate, METH_VARARGS, truncate_doc},
@ -873,3 +921,96 @@ PyTypeObject PyBytesIO_Type = {
0, /*tp_alloc*/
bytesio_new, /*tp_new*/
};
/*
* Implementation of the small intermediate object used by getbuffer().
* getbuffer() returns a memoryview over this object, which should make it
* invisible from Python code.
*/
static int
bytesiobuf_getbuffer(bytesiobuf *obj, Py_buffer *view, int flags)
{
int ret;
void *ptr;
bytesio *b = (bytesio *) obj->source;
if (view == NULL) {
b->exports++;
return 0;
}
ptr = (void *) obj;
ret = PyBuffer_FillInfo(view, (PyObject*)obj, b->buf, b->string_size,
0, flags);
if (ret >= 0) {
b->exports++;
}
return ret;
}
static void
bytesiobuf_releasebuffer(bytesiobuf *obj, Py_buffer *view)
{
bytesio *b = (bytesio *) obj->source;
b->exports--;
}
static int
bytesiobuf_traverse(bytesiobuf *self, visitproc visit, void *arg)
{
Py_VISIT(self->source);
return 0;
}
static void
bytesiobuf_dealloc(bytesiobuf *self)
{
Py_CLEAR(self->source);
Py_TYPE(self)->tp_free(self);
}
static PyBufferProcs bytesiobuf_as_buffer = {
(getbufferproc) bytesiobuf_getbuffer,
(releasebufferproc) bytesiobuf_releasebuffer,
};
PyTypeObject _PyBytesIOBuffer_Type = {
PyVarObject_HEAD_INIT(NULL, 0)
"_io._BytesIOBuffer", /*tp_name*/
sizeof(bytesiobuf), /*tp_basicsize*/
0, /*tp_itemsize*/
(destructor)bytesiobuf_dealloc, /*tp_dealloc*/
0, /*tp_print*/
0, /*tp_getattr*/
0, /*tp_setattr*/
0, /*tp_reserved*/
0, /*tp_repr*/
0, /*tp_as_number*/
0, /*tp_as_sequence*/
0, /*tp_as_mapping*/
0, /*tp_hash*/
0, /*tp_call*/
0, /*tp_str*/
0, /*tp_getattro*/
0, /*tp_setattro*/
&bytesiobuf_as_buffer, /*tp_as_buffer*/
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
0, /*tp_doc*/
(traverseproc)bytesiobuf_traverse, /*tp_traverse*/
0, /*tp_clear*/
0, /*tp_richcompare*/
0, /*tp_weaklistoffset*/
0, /*tp_iter*/
0, /*tp_iternext*/
0, /*tp_methods*/
0, /*tp_members*/
0, /*tp_getset*/
0, /*tp_base*/
0, /*tp_dict*/
0, /*tp_descr_get*/
0, /*tp_descr_set*/
0, /*tp_dictoffset*/
0, /*tp_init*/
0, /*tp_alloc*/
0, /*tp_new*/
};