From 01a2752d19d0ad615c989b8d742a736ca8d51a57 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 7 Mar 2007 01:00:12 +0000 Subject: [PATCH] New version from Mike Verdone (sat in my inbox since 2/27). I cleaned up whitespace but otherwise didn't change it. This will need work to reflect the tentative decision to drop nonblocking I/O support from the buffering layers. --- Lib/io.py | 231 +++++++++++++++++++++++++++++++++----------- Lib/test/test_io.py | 144 +++++++++++++++++++++------ 2 files changed, 288 insertions(+), 87 deletions(-) diff --git a/Lib/io.py b/Lib/io.py index 3ad9e9084c1..d6ee186b978 100644 --- a/Lib/io.py +++ b/Lib/io.py @@ -10,12 +10,21 @@ __author__ = ("Guido van Rossum , " "Mike Verdone ") __all__ = ["open", "RawIOBase", "FileIO", "SocketIO", "BytesIO", - "BufferedReader", "BufferedWriter", "BufferedRWPair", "EOF"] + "BufferedReader", "BufferedWriter", "BufferedRWPair", + "BufferedRandom", "EOF"] import os DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes -EOF = b"" +DEFAULT_MAX_BUFFER_SIZE = 16 * 1024 # bytes +EOF = b'' + + +class BlockingIO(IOError): + def __init__(self, errno, strerror, characters_written): + IOError.__init__(self, errno, strerror) + self.characters_written = characters_written + def open(filename, mode="r", buffering=None, *, encoding=None): """Replacement for the built-in open function. @@ -117,6 +126,11 @@ class RawIOBase: # XXX Add individual method docstrings def read(self, n): + """Read and return up to n bytes. + + Returns an empty bytes array on EOF, or None if the object is + set not to block and has no data to read. + """ b = bytes(n.__index__()) self.readinto(b) return b @@ -125,6 +139,10 @@ class RawIOBase: raise IOError(".readinto() not supported") def write(self, b): + """Write the given buffer to the IO stream. + + Returns the number of bytes written. + """ raise IOError(".write() not supported") def seek(self, pos, whence=0): @@ -324,111 +342,210 @@ class BytesIO(BufferedIOBase): return True +class BufferedIOBase(RawIOBase): + + """Base class for buffered IO objects.""" + + def flush(self): + """Flush the buffer to the underlying raw IO object.""" + raise IOError(".flush() unsupported") + + class BufferedReader(BufferedIOBase): - """Buffered reader. + """Buffer for a readable sequential RawIO object. - Buffer for a readable sequential RawIO object. Does not allow - random access (seek, tell). + Does not allow random access (seek, tell). """ def __init__(self, raw): - """ - Create a new buffered reader using the given readable raw IO object. + """Create a new buffered reader using the given readable raw IO object. """ assert raw.readable() self.raw = raw - self._read_buf = b'' + self._read_buf = b"" if hasattr(raw, 'fileno'): self.fileno = raw.fileno def read(self, n=None): + """Read n bytes. + + Returns exactly n bytes of data unless the underlying raw IO + stream reaches EOF of if the call would block in non-blocking + mode. If n is None, read until EOF or until read() would + block. """ - Read n bytes. Returns exactly n bytes of data unless the underlying - raw IO stream reaches EOF of if the call would block in non-blocking - mode. If n is None, read until EOF or until read() would block. - """ + assert n is None or n > 0 nodata_val = EOF while (len(self._read_buf) < n) if (n is not None) else True: current = self.raw.read(n) if current in (EOF, None): nodata_val = current break - self._read_buf += current # XXX using += is bad - read = self._read_buf[:n] - if (not self._read_buf): - return nodata_val - self._read_buf = self._read_buf[n if n else 0:] - return read - - def write(self, b): - raise IOError(".write() unsupported") + self._read_buf += current + if self._read_buf: + if n is None: + n = len(self._read_buf) + out = self._read_buf[:n] + self._read_buf = self._read_buf[n:] + else: + out = nodata_val + return out def readable(self): return True + def fileno(self): + return self.raw.fileno() + def flush(self): # Flush is a no-op pass + def close(self): + self.raw.close() + class BufferedWriter(BufferedIOBase): - """Buffered writer. - - XXX More docs. - """ - - def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE): - assert raw.writeable() + def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE, + max_buffer_size=DEFAULT_MAX_BUFFER_SIZE): + assert raw.writable() self.raw = raw self.buffer_size = buffer_size - self._write_buf_stack = [] - self._write_buf_size = 0 - if hasattr(raw, 'fileno'): - self.fileno = raw.fileno - - def read(self, n=None): - raise IOError(".read() not supported") + self.max_buffer_size = max_buffer_size + self._write_buf = b'' def write(self, b): + # XXX we can implement some more tricks to try and avoid partial writes assert issubclass(type(b), bytes) - self._write_buf_stack.append(b) - self._write_buf_size += len(b) - if (self._write_buf_size > self.buffer_size): - self.flush() + if len(self._write_buf) > self.buffer_size: + # We're full, so let's pre-flush the buffer + try: + self.flush() + except BlockingIO as e: + # We can't accept anything else. + raise BlockingIO(e.errno, e.strerror, 0) + self._write_buf += b + if (len(self._write_buf) > self.buffer_size): + try: + self.flush() + except BlockingIO as e: + if (len(self._write_buf) > self.max_buffer_size): + # We've hit max_buffer_size. We have to accept a partial + # write and cut back our buffer. + overage = len(self._write_buf) - self.max_buffer_size + self._write_buf = self._write_buf[:self.max_buffer_size] + raise BlockingIO(e.errno, e.strerror, overage) - def writeable(self): + def writable(self): return True def flush(self): - buf = b''.join(self._write_buf_stack) - while len(buf): - buf = buf[self.raw.write(buf):] - self._write_buf_stack = [] - self._write_buf_size = 0 + try: + while len(self._write_buf): + self._write_buf = self._write_buf[ + self.raw.write(self._write_buf):] + except BlockingIO as e: + self._write_buf[e.characters_written:] + raise - # XXX support flushing buffer on close, del + def fileno(self): + return self.raw.fileno() + + def close(self): + self.raw.close() + + def __del__(self): + # XXX flush buffers before dying. Is there a nicer way to do this? + if self._write_buf: + self.flush() class BufferedRWPair(BufferedReader, BufferedWriter): - """Buffered Read/Write Pair. + """A buffered reader and writer object together. A buffered reader object and buffered writer object put together to form a sequential IO object that can read and write. """ - def __init__(self, bufferedReader, bufferedWriter): - assert bufferedReader.readable() - assert bufferedWriter.writeable() - self.bufferedReader = bufferedReader - self.bufferedWriter = bufferedWriter - self.read = bufferedReader.read - self.write = bufferedWriter.write - self.flush = bufferedWriter.flush - self.readable = bufferedReader.readable - self.writeable = bufferedWriter.writeable + def __init__(self, reader, writer, buffer_size=DEFAULT_BUFFER_SIZE, + max_buffer_size=DEFAULT_MAX_BUFFER_SIZE): + assert reader.readable() + assert writer.writable() + BufferedReader.__init__(self, reader) + BufferedWriter.__init__(self, writer, buffer_size, max_buffer_size) + self.reader = reader + self.writer = writer + + def read(self, n=None): + return self.reader.read(n) + + def write(self, b): + return self.writer.write(b) + + def readable(self): + return self.reader.readable() + + def writable(self): + return self.writer.writable() + + def flush(self): + return self.writer.flush() def seekable(self): return False + + def fileno(self): + # XXX whose fileno do we return? Reader's? Writer's? Unsupported? + raise IOError(".fileno() unsupported") + + def close(self): + self.reader.close() + self.writer.close() + + +class BufferedRandom(BufferedReader, BufferedWriter): + + def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE, + max_buffer_size=DEFAULT_MAX_BUFFER_SIZE): + assert raw.seekable() + BufferedReader.__init__(self, raw) + BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size) + + def seekable(self): + return self.raw.seekable() + + def readable(self): + return self.raw.readable() + + def writable(self): + return self.raw.writable() + + def seek(self, pos, whence=0): + self.flush() + self._read_buf = b"" + self.raw.seek(pos, whence) + # XXX I suppose we could implement some magic here to move through the + # existing read buffer in the case of seek(, 1) + + def tell(self): + if (self._write_buf): + return self.raw.tell() + len(self._write_buf) + else: + return self.raw.tell() - len(self._read_buf) + + def read(self, n=None): + self.flush() + return BufferedReader.read(self, n) + + def write(self, b): + self._read_buf = b"" + return BufferedWriter.write(self, b) + + def flush(self): + BufferedWriter.flush(self) + + def close(self): + self.raw.close() diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index 7384999d63a..1be1b714e54 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -5,10 +5,10 @@ from test import test_support import io - -class MockReadIO(io.RawIOBase): - def __init__(self, readStack): +class MockIO(io.RawIOBase): + def __init__(self, readStack=()): self._readStack = list(readStack) + self._writeStack = [] def read(self, n=None): try: @@ -16,27 +16,41 @@ class MockReadIO(io.RawIOBase): except: return io.EOF + def write(self, b): + self._writeStack.append(b) + return len(b) + + def writable(self): + return True + def fileno(self): return 42 def readable(self): return True - -class MockWriteIO(io.RawIOBase): - def __init__(self): - self._writeStack = [] - - def write(self, b): - self._writeStack.append(b) - return len(b) - - def writeable(self): + def seekable(self): return True - def fileno(self): + def seek(self, pos, whence): + pass + + def tell(self): return 42 +class MockNonBlockWriterIO(io.RawIOBase): + def __init__(self, blockingScript): + self.bs = list(blockingScript) + self._write_stack = [] + def write(self, b): + self._write_stack.append(b) + n = self.bs.pop(0) + if (n < 0): + raise io.BlockingIO(0, "test blocking", -n) + else: + return n + def writable(self): + return True class IOTest(unittest.TestCase): @@ -90,9 +104,7 @@ class IOTest(unittest.TestCase): f = io.BytesIO(data) self.read_ops(f) - class BytesIOTest(unittest.TestCase): - def testInit(self): buf = b"1234567890" bytesIo = io.BytesIO(buf) @@ -134,44 +146,51 @@ class BytesIOTest(unittest.TestCase): bytesIo.seek(10000) self.assertEquals(10000, bytesIo.tell()) - class BufferedReaderTest(unittest.TestCase): - def testRead(self): - rawIo = MockReadIO((b"abc", b"d", b"efg")) + rawIo = MockIO((b"abc", b"d", b"efg")) bufIo = io.BufferedReader(rawIo) self.assertEquals(b"abcdef", bufIo.read(6)) + def testReadNonBlocking(self): + # Inject some None's in there to simulate EWOULDBLOCK + rawIo = MockIO((b"abc", b"d", None, b"efg", None, None)) + bufIo = io.BufferedReader(rawIo) + + self.assertEquals(b"abcd", bufIo.read(6)) + self.assertEquals(b"e", bufIo.read(1)) + self.assertEquals(b"fg", bufIo.read()) + self.assert_(None is bufIo.read()) + self.assertEquals(io.EOF, bufIo.read()) + def testReadToEof(self): - rawIo = MockReadIO((b"abc", b"d", b"efg")) + rawIo = MockIO((b"abc", b"d", b"efg")) bufIo = io.BufferedReader(rawIo) self.assertEquals(b"abcdefg", bufIo.read(9000)) def testReadNoArgs(self): - rawIo = MockReadIO((b"abc", b"d", b"efg")) + rawIo = MockIO((b"abc", b"d", b"efg")) bufIo = io.BufferedReader(rawIo) self.assertEquals(b"abcdefg", bufIo.read()) def testFileno(self): - rawIo = MockReadIO((b"abc", b"d", b"efg")) + rawIo = MockIO((b"abc", b"d", b"efg")) bufIo = io.BufferedReader(rawIo) self.assertEquals(42, bufIo.fileno()) def testFilenoNoFileno(self): - # TODO will we always have fileno() function? If so, kill + # XXX will we always have fileno() function? If so, kill # this test. Else, write it. pass - class BufferedWriterTest(unittest.TestCase): - def testWrite(self): # Write to the buffered IO but don't overflow the buffer. - writer = MockWriteIO() + writer = MockIO() bufIo = io.BufferedWriter(writer, 8) bufIo.write(b"abc") @@ -179,7 +198,7 @@ class BufferedWriterTest(unittest.TestCase): self.assertFalse(writer._writeStack) def testWriteOverflow(self): - writer = MockWriteIO() + writer = MockIO() bufIo = io.BufferedWriter(writer, 8) bufIo.write(b"abc") @@ -187,8 +206,33 @@ class BufferedWriterTest(unittest.TestCase): self.assertEquals(b"abcdefghijkl", writer._writeStack[0]) + def testWriteNonBlocking(self): + raw = MockNonBlockWriterIO((9, 2, 22, -6, 10, 12, 12)) + bufIo = io.BufferedWriter(raw, 8, 16) + + bufIo.write(b"asdf") + bufIo.write(b"asdfa") + self.assertEquals(b"asdfasdfa", raw._write_stack[0]) + + bufIo.write(b"asdfasdfasdf") + self.assertEquals(b"asdfasdfasdf", raw._write_stack[1]) + bufIo.write(b"asdfasdfasdf") + self.assertEquals(b"dfasdfasdf", raw._write_stack[2]) + self.assertEquals(b"asdfasdfasdf", raw._write_stack[3]) + + bufIo.write(b"asdfasdfasdf") + + # XXX I don't like this test. It relies too heavily on how the algorithm + # actually works, which we might change. Refactor later. + + def testFileno(self): + rawIo = MockIO((b"abc", b"d", b"efg")) + bufIo = io.BufferedWriter(rawIo) + + self.assertEquals(42, bufIo.fileno()) + def testFlush(self): - writer = MockWriteIO() + writer = MockIO() bufIo = io.BufferedWriter(writer, 8) bufIo.write(b"abc") @@ -196,11 +240,51 @@ class BufferedWriterTest(unittest.TestCase): self.assertEquals(b"abc", writer._writeStack[0]) -# TODO. Tests for open() +class BufferedRWPairTest(unittest.TestCase): + def testRWPair(self): + r = MockIO(()) + w = MockIO() + pair = io.BufferedRWPair(r, w) + + # XXX need implementation + +class BufferedRandom(unittest.TestCase): + def testReadAndWrite(self): + raw = MockIO((b"asdf", b"ghjk")) + rw = io.BufferedRandom(raw, 8, 12) + + self.assertEqual(b"as", rw.read(2)) + rw.write(b"ddd") + rw.write(b"eee") + self.assertFalse(raw._writeStack) # Buffer writes + self.assertEqual(b"ghjk", rw.read()) # This read forces write flush + self.assertEquals(b"dddeee", raw._writeStack[0]) + + def testSeekAndTell(self): + raw = io.BytesIO(b"asdfghjkl") + rw = io.BufferedRandom(raw) + + self.assertEquals(b"as", rw.read(2)) + self.assertEquals(2, rw.tell()) + rw.seek(0, 0) + self.assertEquals(b"asdf", rw.read(4)) + + rw.write(b"asdf") + rw.seek(0, 0) + self.assertEquals(b"asdfasdfl", rw.read()) + self.assertEquals(9, rw.tell()) + rw.seek(-4, 2) + self.assertEquals(5, rw.tell()) + rw.seek(2, 1) + self.assertEquals(7, rw.tell()) + self.assertEquals(b"fl", rw.read(11)) + +# XXX Tests for open() def test_main(): test_support.run_unittest(IOTest, BytesIOTest, BufferedReaderTest, - BufferedWriterTest) + BufferedWriterTest, BufferedRWPairTest, + BufferedRandom) if __name__ == "__main__": test_main()