From 10e847bbc7427e85f38191298275eaf14be625e5 Mon Sep 17 00:00:00 2001 From: Benjamin Peterson Date: Sat, 7 Jun 2014 20:06:48 -0700 Subject: [PATCH] add BufferedIOBase.readinto1 (closes #20578) Patch by Nikolaus Rath. --- Doc/library/io.rst | 31 +++++++++++---- Lib/_pyio.py | 83 +++++++++++++++++++++++++++++++++++++++- Lib/test/test_io.py | 25 ++++++++++++ Misc/NEWS | 2 + Modules/_io/bufferedio.c | 67 +++++++++++++++++++++++++++++--- 5 files changed, 194 insertions(+), 14 deletions(-) diff --git a/Doc/library/io.rst b/Doc/library/io.rst index 79f65e00071..3a9872a0339 100644 --- a/Doc/library/io.rst +++ b/Doc/library/io.rst @@ -385,8 +385,8 @@ I/O Base Classes .. method:: readinto(b) Read up to ``len(b)`` bytes into :class:`bytearray` *b* and return the - number of bytes read. If the object is in non-blocking mode and no - bytes are available, ``None`` is returned. + number of bytes read. If the object is in non-blocking mode and no bytes + are available, ``None`` is returned. .. method:: write(b) @@ -459,10 +459,11 @@ I/O Base Classes .. method:: read1(size=-1) - Read and return up to *size* bytes, with at most one call to the underlying - raw stream's :meth:`~RawIOBase.read` method. This can be useful if you - are implementing your own buffering on top of a :class:`BufferedIOBase` - object. + Read and return up to *size* bytes, with at most one call to the + underlying raw stream's :meth:`~RawIOBase.read` (or + :meth:`~RawIOBase.readinto`) method. This can be useful if you + are implementing your own buffering on top of a + :class:`BufferedIOBase` object. .. method:: readinto(b) @@ -472,8 +473,19 @@ I/O Base Classes Like :meth:`read`, multiple reads may be issued to the underlying raw stream, unless the latter is interactive. + A :exc:`BlockingIOError` is raised if the underlying raw stream is in non + blocking-mode, and has no data available at the moment. + + .. method:: readinto1(b) + + Read up to ``len(b)`` bytes into bytearray *b*, using at most one call to + the underlying raw stream's :meth:`~RawIOBase.read` (or + :meth:`~RawIOBase.readinto`) method. Return the number of bytes read. + A :exc:`BlockingIOError` is raised if the underlying raw stream is in - non blocking-mode, and has no data available at the moment. + non-blocking mode and has no data available at the moment. + + .. versionadded:: 3.5 .. method:: write(b) @@ -590,6 +602,11 @@ than raw I/O does. In :class:`BytesIO`, this is the same as :meth:`read`. + .. method:: readinto1() + + In :class:`BytesIO`, this is the same as :meth:`readinto`. + + .. versionadded:: 3.5 .. class:: BufferedReader(raw, buffer_size=DEFAULT_BUFFER_SIZE) diff --git a/Lib/_pyio.py b/Lib/_pyio.py index b04d23a0c2f..a53f2817729 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -655,8 +655,26 @@ class BufferedIOBase(IOBase): Raises BlockingIOError if the underlying raw stream has no data at the moment. """ + + return self._readinto(b, read1=False) + + def readinto1(self, b): + """Read up to len(b) bytes into *b*, using at most one system call + + Returns an int representing the number of bytes read (0 for EOF). + + Raises BlockingIOError if the underlying raw stream has no + data at the moment. + """ + + return self._readinto(b, read1=True) + + def _readinto(self, b, read1): # XXX This ought to work with anything that supports the buffer API - data = self.read(len(b)) + if read1: + data = self.read1(len(b)) + else: + data = self.read(len(b)) n = len(data) try: b[:n] = data @@ -1058,6 +1076,62 @@ class BufferedReader(_BufferedIOMixin): return self._read_unlocked( min(size, len(self._read_buf) - self._read_pos)) + # Implementing readinto() and readinto1() is not strictly necessary (we + # could rely on the base class that provides an implementation in terms of + # read() and read1()). We do ai anyway to keep the _pyio implementation + # similar to the io implementation (which implements the methods for + # performance reasons). + def readinto(self, buf): + """Read data into *buf*.""" + return self._readinto(buf, read1=False) + def readinto1(self, buf): + """Read data into *buf* with at most one system call.""" + return self._readinto(buf, read1=True) + + def _readinto(self, buf, read1): + """Read data into *buf* with at most one system call.""" + + if len(buf) == 0: + return 0 + + written = 0 + with self._read_lock: + while written < len(buf): + + # First try to read from internal buffer + avail = min(len(self._read_buf) - self._read_pos, len(buf)) + if avail: + buf[written:written+avail] = \ + self._read_buf[self._read_pos:self._read_pos+avail] + self._read_pos += avail + written += avail + if written == len(buf): + break + + # If remaining space in callers buffer is larger than + # internal buffer, read directly into callers buffer + if len(buf) - written > self.buffer_size: + # If we don't use a memoryview, slicing buf will create + # a new object + if not isinstance(buf, memoryview): + buf = memoryview(buf) + n = self.raw.readinto(buf[written:]) + if not n: + break # eof + written += n + + # Otherwise refill internal buffer - unless we're + # in read1 mode and already got some data + elif not (read1 and written): + if not self._peek_unlocked(1): + break # eof + + # In readinto1 mode, return as soon as we have some data + if read1 and written: + break + + return written + def tell(self): return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos @@ -1207,6 +1281,9 @@ class BufferedRWPair(BufferedIOBase): def read1(self, size): return self.reader.read1(size) + def readinto1(self, b): + return self.reader.readinto1(b) + def readable(self): return self.reader.readable() @@ -1289,6 +1366,10 @@ class BufferedRandom(BufferedWriter, BufferedReader): self.flush() return BufferedReader.read1(self, size) + def readinto1(self, b): + self.flush() + return BufferedReader.readinto1(self, b) + def write(self, b): if self._read_buf: # Undo readahead diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index 347832d71fa..ee6db48c688 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -911,6 +911,29 @@ class BufferedReaderTest(unittest.TestCase, CommonBufferedTests): self.assertEqual(bufio.readinto(b), 1) self.assertEqual(b, b"cb") + def test_readinto1(self): + buffer_size = 10 + rawio = self.MockRawIO((b"abc", b"de", b"fgh", b"jkl")) + bufio = self.tp(rawio, buffer_size=buffer_size) + b = bytearray(2) + self.assertEqual(bufio.peek(3), b'abc') + self.assertEqual(rawio._reads, 1) + self.assertEqual(bufio.readinto1(b), 2) + self.assertEqual(b, b"ab") + self.assertEqual(rawio._reads, 1) + self.assertEqual(bufio.readinto1(b), 1) + self.assertEqual(b[:1], b"c") + self.assertEqual(rawio._reads, 1) + self.assertEqual(bufio.readinto1(b), 2) + self.assertEqual(b, b"de") + self.assertEqual(rawio._reads, 2) + b = bytearray(2*buffer_size) + self.assertEqual(bufio.peek(3), b'fgh') + self.assertEqual(rawio._reads, 3) + self.assertEqual(bufio.readinto1(b), 6) + self.assertEqual(b[:6], b"fghjkl") + self.assertEqual(rawio._reads, 4) + def test_readlines(self): def bufio(): rawio = self.MockRawIO((b"abc\n", b"d\n", b"ef")) @@ -2985,6 +3008,8 @@ class MiscIOTest(unittest.TestCase): self.assertRaises(ValueError, f.readall) if hasattr(f, "readinto"): self.assertRaises(ValueError, f.readinto, bytearray(1024)) + if hasattr(f, "readinto1"): + self.assertRaises(ValueError, f.readinto1, bytearray(1024)) self.assertRaises(ValueError, f.readline) self.assertRaises(ValueError, f.readlines) self.assertRaises(ValueError, f.seek, 0) diff --git a/Misc/NEWS b/Misc/NEWS index 46b383bf4b6..3679df42ca7 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -92,6 +92,8 @@ Core and Builtins Library ------- +- Issue #20578: Add io.BufferedIOBase.readinto1. + - Issue #21515: tempfile.TemporaryFile now uses os.O_TMPFILE flag is available. - Issue #13223: Fix pydoc.writedoc so that the HTML documentation for methods diff --git a/Modules/_io/bufferedio.c b/Modules/_io/bufferedio.c index 7494646342e..b328d2525cb 100644 --- a/Modules/_io/bufferedio.c +++ b/Modules/_io/bufferedio.c @@ -24,6 +24,7 @@ _Py_IDENTIFIER(read); _Py_IDENTIFIER(read1); _Py_IDENTIFIER(readable); _Py_IDENTIFIER(readinto); +_Py_IDENTIFIER(readinto1); _Py_IDENTIFIER(writable); _Py_IDENTIFIER(write); @@ -47,17 +48,21 @@ PyDoc_STRVAR(bufferediobase_doc, ); static PyObject * -bufferediobase_readinto(PyObject *self, PyObject *args) +_bufferediobase_readinto_generic(PyObject *self, PyObject *args, char readinto1) { Py_buffer buf; Py_ssize_t len; PyObject *data; - if (!PyArg_ParseTuple(args, "w*:readinto", &buf)) { + if (!PyArg_ParseTuple(args, + readinto1 ? "w*:readinto1" : "w*:readinto", + &buf)) { return NULL; } - data = _PyObject_CallMethodId(self, &PyId_read, "n", buf.len); + data = _PyObject_CallMethodId(self, + readinto1 ? &PyId_read1 : &PyId_read, + "n", buf.len); if (data == NULL) goto error; @@ -88,6 +93,18 @@ bufferediobase_readinto(PyObject *self, PyObject *args) return NULL; } +static PyObject * +bufferediobase_readinto(PyObject *self, PyObject *args) +{ + return _bufferediobase_readinto_generic(self, args, 0); +} + +static PyObject * +bufferediobase_readinto1(PyObject *self, PyObject *args) +{ + return _bufferediobase_readinto_generic(self, args, 1); +} + static PyObject * bufferediobase_unsupported(const char *message) { @@ -167,6 +184,7 @@ static PyMethodDef bufferediobase_methods[] = { {"read", bufferediobase_read, METH_VARARGS, bufferediobase_read_doc}, {"read1", bufferediobase_read1, METH_VARARGS, bufferediobase_read1_doc}, {"readinto", bufferediobase_readinto, METH_VARARGS, NULL}, + {"readinto1", bufferediobase_readinto1, METH_VARARGS, NULL}, {"write", bufferediobase_write, METH_VARARGS, bufferediobase_write_doc}, {NULL, NULL} }; @@ -988,7 +1006,7 @@ buffered_read1(buffered *self, PyObject *args) } static PyObject * -buffered_readinto(buffered *self, PyObject *args) +_buffered_readinto_generic(buffered *self, PyObject *args, char readinto1) { Py_buffer buf; Py_ssize_t n, written = 0, remaining; @@ -996,7 +1014,9 @@ buffered_readinto(buffered *self, PyObject *args) CHECK_INITIALIZED(self) - if (!PyArg_ParseTuple(args, "w*:readinto", &buf)) + if (!PyArg_ParseTuple(args, + readinto1 ? "w*:readinto1" : "w*:readinto", + &buf)) return NULL; n = Py_SAFE_DOWNCAST(READAHEAD(self), Py_off_t, Py_ssize_t); @@ -1034,7 +1054,10 @@ buffered_readinto(buffered *self, PyObject *args) n = _bufferedreader_raw_read(self, (char *) buf.buf + written, remaining); } - else { + + /* In readinto1 mode, we do not want to fill the internal + buffer if we already have some data to return */ + else if (!(readinto1 && written)) { n = _bufferedreader_fill_buffer(self); if (n > 0) { if (n > remaining) @@ -1045,6 +1068,10 @@ buffered_readinto(buffered *self, PyObject *args) continue; /* short circuit */ } } + else { + n = 0; + } + if (n == 0 || (n == -2 && written > 0)) break; if (n < 0) { @@ -1054,6 +1081,12 @@ buffered_readinto(buffered *self, PyObject *args) } goto end; } + + /* At most one read in readinto1 mode */ + if (readinto1) { + written += n; + break; + } } res = PyLong_FromSsize_t(written); @@ -1064,6 +1097,19 @@ end_unlocked: return res; } +static PyObject * +buffered_readinto(buffered *self, PyObject *args) +{ + return _buffered_readinto_generic(self, args, 0); +} + +static PyObject * +buffered_readinto1(buffered *self, PyObject *args) +{ + return _buffered_readinto_generic(self, args, 1); +} + + static PyObject * _buffered_readline(buffered *self, Py_ssize_t limit) { @@ -1749,6 +1795,7 @@ static PyMethodDef bufferedreader_methods[] = { {"peek", (PyCFunction)buffered_peek, METH_VARARGS}, {"read1", (PyCFunction)buffered_read1, METH_VARARGS}, {"readinto", (PyCFunction)buffered_readinto, METH_VARARGS}, + {"readinto1", (PyCFunction)buffered_readinto1, METH_VARARGS}, {"readline", (PyCFunction)buffered_readline, METH_VARARGS}, {"seek", (PyCFunction)buffered_seek, METH_VARARGS}, {"tell", (PyCFunction)buffered_tell, METH_NOARGS}, @@ -2347,6 +2394,12 @@ bufferedrwpair_readinto(rwpair *self, PyObject *args) return _forward_call(self->reader, &PyId_readinto, args); } +static PyObject * +bufferedrwpair_readinto1(rwpair *self, PyObject *args) +{ + return _forward_call(self->reader, &PyId_readinto1, args); +} + static PyObject * bufferedrwpair_write(rwpair *self, PyObject *args) { @@ -2412,6 +2465,7 @@ static PyMethodDef bufferedrwpair_methods[] = { {"peek", (PyCFunction)bufferedrwpair_peek, METH_VARARGS}, {"read1", (PyCFunction)bufferedrwpair_read1, METH_VARARGS}, {"readinto", (PyCFunction)bufferedrwpair_readinto, METH_VARARGS}, + {"readinto1", (PyCFunction)bufferedrwpair_readinto1, METH_VARARGS}, {"write", (PyCFunction)bufferedrwpair_write, METH_VARARGS}, {"flush", (PyCFunction)bufferedrwpair_flush, METH_NOARGS}, @@ -2560,6 +2614,7 @@ static PyMethodDef bufferedrandom_methods[] = { {"read", (PyCFunction)buffered_read, METH_VARARGS}, {"read1", (PyCFunction)buffered_read1, METH_VARARGS}, {"readinto", (PyCFunction)buffered_readinto, METH_VARARGS}, + {"readinto1", (PyCFunction)buffered_readinto1, METH_VARARGS}, {"readline", (PyCFunction)buffered_readline, METH_VARARGS}, {"peek", (PyCFunction)buffered_peek, METH_VARARGS}, {"write", (PyCFunction)bufferedwriter_write, METH_VARARGS},