add BufferedIOBase.readinto1 (closes #20578)

Patch by Nikolaus Rath.
This commit is contained in:
Benjamin Peterson 2014-06-22 14:17:44 -07:00
parent 77143dbaee
commit a96fea03e8
4 changed files with 235 additions and 19 deletions

View File

@ -465,10 +465,11 @@ I/O Base Classes
.. method:: read1(size=-1)
Read and return up to *size* bytes, with at most one call to the underlying
raw stream's :meth:`~RawIOBase.read` method. This can be useful if you
are implementing your own buffering on top of a :class:`BufferedIOBase`
object.
Read and return up to *size* bytes, with at most one call to the
underlying raw stream's :meth:`~RawIOBase.read` (or
:meth:`~RawIOBase.readinto`) method. This can be useful if you
are implementing your own buffering on top of a
:class:`BufferedIOBase` object.
.. method:: readinto(b)
@ -481,6 +482,18 @@ I/O Base Classes
A :exc:`BlockingIOError` is raised if the underlying raw stream is in
non blocking-mode, and has no data available at the moment.
.. method:: readinto1(b)
Read up to ``len(b)`` bytes into bytearray *b*, ,using at most
one call to the underlying raw stream's :meth:`~RawIOBase.read`
(or :meth:`~RawIOBase.readinto`) method. Return the number of
bytes read.
A :exc:`BlockingIOError` is raised if the underlying raw stream is in
non blocking-mode, and has no data available at the moment.
.. versionadded:: 3.5
.. method:: write(b)
Write the given :class:`bytes` or :class:`bytearray` object, *b* and
@ -596,6 +609,11 @@ than raw I/O does.
In :class:`BytesIO`, this is the same as :meth:`read`.
.. method:: readinto1()
In :class:`BytesIO`, this is the same as :meth:`readinto`.
.. versionadded:: 3.5
.. class:: BufferedReader(raw, buffer_size=DEFAULT_BUFFER_SIZE)

View File

@ -6,6 +6,7 @@ import os
import abc
import codecs
import errno
import array
# Import _thread instead of threading to reduce startup cost
try:
from _thread import allocate_lock as Lock
@ -662,16 +663,33 @@ class BufferedIOBase(IOBase):
Raises BlockingIOError if the underlying raw stream has no
data at the moment.
"""
# XXX This ought to work with anything that supports the buffer API
data = self.read(len(b))
return self._readinto(b, read1=False)
def readinto1(self, b):
"""Read up to len(b) bytes into *b*, using at most one system call
Returns an int representing the number of bytes read (0 for EOF).
Raises BlockingIOError if the underlying raw stream has no
data at the moment.
"""
return self._readinto(b, read1=True)
def _readinto(self, b, read1):
if not isinstance(b, memoryview):
b = memoryview(b)
b = b.cast('B')
if read1:
data = self.read1(len(b))
else:
data = self.read(len(b))
n = len(data)
try:
b[:n] = data
except TypeError as err:
import array
if not isinstance(b, array.array):
raise err
b[:n] = array.array('b', data)
b[:n] = data
return n
def write(self, b):
@ -1065,6 +1083,58 @@ class BufferedReader(_BufferedIOMixin):
return self._read_unlocked(
min(size, len(self._read_buf) - self._read_pos))
# Implementing readinto() and readinto1() is not strictly necessary (we
# could rely on the base class that provides an implementation in terms of
# read() and read1()). We do it anyway to keep the _pyio implementation
# similar to the io implementation (which implements the methods for
# performance reasons).
def _readinto(self, buf, read1):
"""Read data into *buf* with at most one system call."""
if len(buf) == 0:
return 0
# Need to create a memoryview object of type 'b', otherwise
# we may not be able to assign bytes to it, and slicing it
# would create a new object.
if not isinstance(buf, memoryview):
buf = memoryview(buf)
buf = buf.cast('B')
written = 0
with self._read_lock:
while written < len(buf):
# First try to read from internal buffer
avail = min(len(self._read_buf) - self._read_pos, len(buf))
if avail:
buf[written:written+avail] = \
self._read_buf[self._read_pos:self._read_pos+avail]
self._read_pos += avail
written += avail
if written == len(buf):
break
# If remaining space in callers buffer is larger than
# internal buffer, read directly into callers buffer
if len(buf) - written > self.buffer_size:
n = self.raw.readinto(buf[written:])
if not n:
break # eof
written += n
# Otherwise refill internal buffer - unless we're
# in read1 mode and already got some data
elif not (read1 and written):
if not self._peek_unlocked(1):
break # eof
# In readinto1 mode, return as soon as we have some data
if read1 and written:
break
return written
def tell(self):
return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
@ -1214,6 +1284,9 @@ class BufferedRWPair(BufferedIOBase):
def read1(self, size):
return self.reader.read1(size)
def readinto1(self, b):
return self.reader.readinto1(b)
def readable(self):
return self.reader.readable()
@ -1296,6 +1369,10 @@ class BufferedRandom(BufferedWriter, BufferedReader):
self.flush()
return BufferedReader.read1(self, size)
def readinto1(self, b):
self.flush()
return BufferedReader.readinto1(self, b)
def write(self, b):
if self._read_buf:
# Undo readahead

View File

@ -943,6 +943,71 @@ class BufferedReaderTest(unittest.TestCase, CommonBufferedTests):
self.assertEqual(bufio.readinto(b), 1)
self.assertEqual(b, b"cb")
def test_readinto1(self):
buffer_size = 10
rawio = self.MockRawIO((b"abc", b"de", b"fgh", b"jkl"))
bufio = self.tp(rawio, buffer_size=buffer_size)
b = bytearray(2)
self.assertEqual(bufio.peek(3), b'abc')
self.assertEqual(rawio._reads, 1)
self.assertEqual(bufio.readinto1(b), 2)
self.assertEqual(b, b"ab")
self.assertEqual(rawio._reads, 1)
self.assertEqual(bufio.readinto1(b), 1)
self.assertEqual(b[:1], b"c")
self.assertEqual(rawio._reads, 1)
self.assertEqual(bufio.readinto1(b), 2)
self.assertEqual(b, b"de")
self.assertEqual(rawio._reads, 2)
b = bytearray(2*buffer_size)
self.assertEqual(bufio.peek(3), b'fgh')
self.assertEqual(rawio._reads, 3)
self.assertEqual(bufio.readinto1(b), 6)
self.assertEqual(b[:6], b"fghjkl")
self.assertEqual(rawio._reads, 4)
def test_readinto_array(self):
buffer_size = 60
data = b"a" * 26
rawio = self.MockRawIO((data,))
bufio = self.tp(rawio, buffer_size=buffer_size)
# Create an array with element size > 1 byte
b = array.array('i', b'x' * 32)
assert len(b) != 16
# Read into it. We should get as many *bytes* as we can fit into b
# (which is more than the number of elements)
n = bufio.readinto(b)
self.assertGreater(n, len(b))
# Check that old contents of b are preserved
bm = memoryview(b).cast('B')
self.assertLess(n, len(bm))
self.assertEqual(bm[:n], data[:n])
self.assertEqual(bm[n:], b'x' * (len(bm[n:])))
def test_readinto1_array(self):
buffer_size = 60
data = b"a" * 26
rawio = self.MockRawIO((data,))
bufio = self.tp(rawio, buffer_size=buffer_size)
# Create an array with element size > 1 byte
b = array.array('i', b'x' * 32)
assert len(b) != 16
# Read into it. We should get as many *bytes* as we can fit into b
# (which is more than the number of elements)
n = bufio.readinto1(b)
self.assertGreater(n, len(b))
# Check that old contents of b are preserved
bm = memoryview(b).cast('B')
self.assertLess(n, len(bm))
self.assertEqual(bm[:n], data[:n])
self.assertEqual(bm[n:], b'x' * (len(bm[n:])))
def test_readlines(self):
def bufio():
rawio = self.MockRawIO((b"abc\n", b"d\n", b"ef"))
@ -3050,6 +3115,8 @@ class MiscIOTest(unittest.TestCase):
self.assertRaises(ValueError, f.readall)
if hasattr(f, "readinto"):
self.assertRaises(ValueError, f.readinto, bytearray(1024))
if hasattr(f, "readinto1"):
self.assertRaises(ValueError, f.readinto1, bytearray(1024))
self.assertRaises(ValueError, f.readline)
self.assertRaises(ValueError, f.readlines)
self.assertRaises(ValueError, f.seek, 0)

View File

@ -24,6 +24,7 @@ _Py_IDENTIFIER(read);
_Py_IDENTIFIER(read1);
_Py_IDENTIFIER(readable);
_Py_IDENTIFIER(readinto);
_Py_IDENTIFIER(readinto1);
_Py_IDENTIFIER(writable);
_Py_IDENTIFIER(write);
@ -47,17 +48,21 @@ PyDoc_STRVAR(bufferediobase_doc,
);
static PyObject *
bufferediobase_readinto(PyObject *self, PyObject *args)
_bufferediobase_readinto_generic(PyObject *self, PyObject *args, char readinto1)
{
Py_buffer buf;
Py_ssize_t len;
PyObject *data;
if (!PyArg_ParseTuple(args, "w*:readinto", &buf)) {
if (!PyArg_ParseTuple(args,
readinto1 ? "w*:readinto1" : "w*:readinto",
&buf)) {
return NULL;
}
data = _PyObject_CallMethodId(self, &PyId_read, "n", buf.len);
data = _PyObject_CallMethodId(self,
readinto1 ? &PyId_read1 : &PyId_read,
"n", buf.len);
if (data == NULL)
goto error;
@ -88,6 +93,18 @@ bufferediobase_readinto(PyObject *self, PyObject *args)
return NULL;
}
static PyObject *
bufferediobase_readinto(PyObject *self, PyObject *args)
{
return _bufferediobase_readinto_generic(self, args, 0);
}
static PyObject *
bufferediobase_readinto1(PyObject *self, PyObject *args)
{
return _bufferediobase_readinto_generic(self, args, 1);
}
static PyObject *
bufferediobase_unsupported(const char *message)
{
@ -167,6 +184,7 @@ static PyMethodDef bufferediobase_methods[] = {
{"read", bufferediobase_read, METH_VARARGS, bufferediobase_read_doc},
{"read1", bufferediobase_read1, METH_VARARGS, bufferediobase_read1_doc},
{"readinto", bufferediobase_readinto, METH_VARARGS, NULL},
{"readinto1", bufferediobase_readinto1, METH_VARARGS, NULL},
{"write", bufferediobase_write, METH_VARARGS, bufferediobase_write_doc},
{NULL, NULL}
};
@ -989,7 +1007,7 @@ buffered_read1(buffered *self, PyObject *args)
}
static PyObject *
buffered_readinto(buffered *self, PyObject *args)
_buffered_readinto_generic(buffered *self, PyObject *args, char readinto1)
{
Py_buffer buf;
Py_ssize_t n, written = 0, remaining;
@ -997,7 +1015,9 @@ buffered_readinto(buffered *self, PyObject *args)
CHECK_INITIALIZED(self)
if (!PyArg_ParseTuple(args, "w*:readinto", &buf))
if (!PyArg_ParseTuple(args,
readinto1 ? "w*:readinto1" : "w*:readinto",
&buf))
return NULL;
n = Py_SAFE_DOWNCAST(READAHEAD(self), Py_off_t, Py_ssize_t);
@ -1035,7 +1055,10 @@ buffered_readinto(buffered *self, PyObject *args)
n = _bufferedreader_raw_read(self, (char *) buf.buf + written,
remaining);
}
else {
/* In readinto1 mode, we do not want to fill the internal
buffer if we already have some data to return */
else if (!(readinto1 && written)) {
n = _bufferedreader_fill_buffer(self);
if (n > 0) {
if (n > remaining)
@ -1046,6 +1069,9 @@ buffered_readinto(buffered *self, PyObject *args)
continue; /* short circuit */
}
}
else
n = 0;
if (n == 0 || (n == -2 && written > 0))
break;
if (n < 0) {
@ -1055,6 +1081,12 @@ buffered_readinto(buffered *self, PyObject *args)
}
goto end;
}
/* At most one read in readinto1 mode */
if (readinto1) {
written += n;
break;
}
}
res = PyLong_FromSsize_t(written);
@ -1065,6 +1097,19 @@ end_unlocked:
return res;
}
static PyObject *
buffered_readinto(buffered *self, PyObject *args)
{
return _buffered_readinto_generic(self, args, 0);
}
static PyObject *
buffered_readinto1(buffered *self, PyObject *args)
{
return _buffered_readinto_generic(self, args, 1);
}
static PyObject *
_buffered_readline(buffered *self, Py_ssize_t limit)
{
@ -1750,6 +1795,7 @@ static PyMethodDef bufferedreader_methods[] = {
{"peek", (PyCFunction)buffered_peek, METH_VARARGS},
{"read1", (PyCFunction)buffered_read1, METH_VARARGS},
{"readinto", (PyCFunction)buffered_readinto, METH_VARARGS},
{"readinto1", (PyCFunction)buffered_readinto1, METH_VARARGS},
{"readline", (PyCFunction)buffered_readline, METH_VARARGS},
{"seek", (PyCFunction)buffered_seek, METH_VARARGS},
{"tell", (PyCFunction)buffered_tell, METH_NOARGS},
@ -2348,6 +2394,12 @@ bufferedrwpair_readinto(rwpair *self, PyObject *args)
return _forward_call(self->reader, &PyId_readinto, args);
}
static PyObject *
bufferedrwpair_readinto1(rwpair *self, PyObject *args)
{
return _forward_call(self->reader, &PyId_readinto1, args);
}
static PyObject *
bufferedrwpair_write(rwpair *self, PyObject *args)
{
@ -2413,6 +2465,7 @@ static PyMethodDef bufferedrwpair_methods[] = {
{"peek", (PyCFunction)bufferedrwpair_peek, METH_VARARGS},
{"read1", (PyCFunction)bufferedrwpair_read1, METH_VARARGS},
{"readinto", (PyCFunction)bufferedrwpair_readinto, METH_VARARGS},
{"readinto1", (PyCFunction)bufferedrwpair_readinto1, METH_VARARGS},
{"write", (PyCFunction)bufferedrwpair_write, METH_VARARGS},
{"flush", (PyCFunction)bufferedrwpair_flush, METH_NOARGS},
@ -2561,6 +2614,7 @@ static PyMethodDef bufferedrandom_methods[] = {
{"read", (PyCFunction)buffered_read, METH_VARARGS},
{"read1", (PyCFunction)buffered_read1, METH_VARARGS},
{"readinto", (PyCFunction)buffered_readinto, METH_VARARGS},
{"readinto1", (PyCFunction)buffered_readinto1, METH_VARARGS},
{"readline", (PyCFunction)buffered_readline, METH_VARARGS},
{"peek", (PyCFunction)buffered_peek, METH_VARARGS},
{"write", (PyCFunction)bufferedwriter_write, METH_VARARGS},