From b8503896ade8feab60406f616ed0b568cded43ff Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Tue, 29 Apr 2014 10:14:02 +0200 Subject: [PATCH] Issue #21057: TextIOWrapper now allows the underlying binary stream's read() or read1() method to return an arbitrary bytes-like object (such as a memoryview). Patch by Nikolaus Rath. --- Lib/test/test_io.py | 28 ++++++++++++++++++++++++++++ Misc/NEWS | 4 ++++ Modules/_io/textio.c | 34 ++++++++++++++++++++-------------- 3 files changed, 52 insertions(+), 14 deletions(-) diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index 267537fdeb7..8e702db194d 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -2681,6 +2681,34 @@ class TextIOWrapperTest(unittest.TestCase): self.assertFalse(err) self.assertEqual("ok", out.decode().strip()) + def test_read_byteslike(self): + r = MemviewBytesIO(b'Just some random string\n') + t = self.TextIOWrapper(r, 'utf-8') + + # TextIOwrapper will not read the full string, because + # we truncate it to a multiple of the native int size + # so that we can construct a more complex memoryview. + bytes_val = _to_memoryview(r.getvalue()).tobytes() + + self.assertEqual(t.read(200), bytes_val.decode('utf-8')) + +class MemviewBytesIO(io.BytesIO): + '''A BytesIO object whose read method returns memoryviews + rather than bytes''' + + def read1(self, len_): + return _to_memoryview(super().read1(len_)) + + def read(self, len_): + return _to_memoryview(super().read(len_)) + +def _to_memoryview(buf): + '''Convert bytes-object *buf* to a non-trivial memoryview''' + + arr = array.array('i') + idx = len(buf) - len(buf) % arr.itemsize + arr.frombytes(buf[:idx]) + return memoryview(arr) class CTextIOWrapperTest(TextIOWrapperTest): io = io diff --git a/Misc/NEWS b/Misc/NEWS index 23245c040e7..76df8dfff8e 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -60,6 +60,10 @@ Core and Builtins Library ------- +- Issue #21057: TextIOWrapper now allows the underlying binary stream's + read() or read1() method to return an arbitrary bytes-like object + (such as a memoryview). Patch by Nikolaus Rath. + - Issue #20951: SSLSocket.send() now raises either SSLWantReadError or SSLWantWriteError on a non-blocking socket if the operation would block. Previously, it would return 0. Patch by Nikolaus Rath. diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index 5739bc4d01d..d5c7339b965 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -1439,6 +1439,7 @@ textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint) PyObject *dec_buffer = NULL; PyObject *dec_flags = NULL; PyObject *input_chunk = NULL; + Py_buffer input_chunk_buf; PyObject *decoded_chars, *chunk_size; Py_ssize_t nbytes, nchars; int eof; @@ -1470,6 +1471,15 @@ textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint) Py_DECREF(state); return -1; } + + if (!PyBytes_Check(dec_buffer)) { + PyErr_Format(PyExc_TypeError, + "decoder getstate() should have returned a bytes " + "object, not '%.200s'", + Py_TYPE(dec_buffer)->tp_name); + Py_DECREF(state); + return -1; + } Py_INCREF(dec_buffer); Py_INCREF(dec_flags); Py_DECREF(state); @@ -1482,23 +1492,24 @@ textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint) chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint)); if (chunk_size == NULL) goto fail; + input_chunk = PyObject_CallMethodObjArgs(self->buffer, (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read), chunk_size, NULL); Py_DECREF(chunk_size); if (input_chunk == NULL) goto fail; - if (!PyBytes_Check(input_chunk)) { + + if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) { PyErr_Format(PyExc_TypeError, - "underlying %s() should have returned a bytes object, " + "underlying %s() should have returned a bytes-like object, " "not '%.200s'", (self->has_read1 ? "read1": "read"), Py_TYPE(input_chunk)->tp_name); goto fail; } - nbytes = PyBytes_Size(input_chunk); + nbytes = input_chunk_buf.len; eof = (nbytes == 0); - if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) { decoded_chars = _PyIncrementalNewlineDecoder_decode( self->decoder, input_chunk, eof); @@ -1507,6 +1518,7 @@ textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint) decoded_chars = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL); } + PyBuffer_Release(&input_chunk_buf); if (check_decoded(decoded_chars) < 0) goto fail; @@ -1523,18 +1535,12 @@ textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint) /* At the snapshot point, len(dec_buffer) bytes before the read, the * next input to be decoded is dec_buffer + input_chunk. */ - PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk); - if (next_input == NULL) - goto fail; - if (!PyBytes_Check(next_input)) { - PyErr_Format(PyExc_TypeError, - "decoder getstate() should have returned a bytes " - "object, not '%.200s'", - Py_TYPE(next_input)->tp_name); - Py_DECREF(next_input); + PyObject *next_input = dec_buffer; + PyBytes_Concat(&next_input, input_chunk); + if (next_input == NULL) { + dec_buffer = NULL; /* Reference lost to PyBytes_Concat */ goto fail; } - Py_DECREF(dec_buffer); Py_CLEAR(self->snapshot); self->snapshot = Py_BuildValue("NN", dec_flags, next_input); }