diff --git a/Doc/library/io.rst b/Doc/library/io.rst index 54970e23748..e3c71036784 100644 --- a/Doc/library/io.rst +++ b/Doc/library/io.rst @@ -515,8 +515,8 @@ Raw File I/O Buffered Streams ---------------- -In many situations, buffered I/O streams will provide higher performance -(bandwidth and latency) than raw I/O streams. Their API is also more usable. +Buffered I/O streams provide a higher-level interface to an I/O device +than raw I/O does. .. class:: BytesIO([initial_bytes]) @@ -776,8 +776,72 @@ Text I/O # .getvalue() will now raise an exception. output.close() + .. class:: IncrementalNewlineDecoder A helper codec that decodes newlines for universal newlines mode. It inherits :class:`codecs.IncrementalDecoder`. + +Advanced topics +--------------- + +Here we will discuss several advanced topics pertaining to the concrete +I/O implementations described above. + +Performance +^^^^^^^^^^^ + +Binary I/O +"""""""""" + +By reading and writing only large chunks of data even when the user asks +for a single byte, buffered I/O is designed to hide any inefficiency in +calling and executing the operating system's unbuffered I/O routines. The +gain will vary very much depending on the OS and the kind of I/O which is +performed (for example, on some contemporary OSes such as Linux, unbuffered +disk I/O can be as fast as buffered I/O). The bottom line, however, is +that buffered I/O will offer you predictable performance regardless of the +platform and the backing device. Therefore, it is most always preferable to +use buffered I/O rather than unbuffered I/O. + +Text I/O +"""""""" + +Text I/O over a binary storage (such as a file) is significantly slower than +binary I/O over the same storage, because it implies conversions from +unicode to binary data using a character codec. This can become noticeable +if you handle huge amounts of text data (for example very large log files). + +:class:`StringIO`, however, is a native in-memory unicode container and will +exhibit similar speed to :class:`BytesIO`. + +Multi-threading +^^^^^^^^^^^^^^^ + +:class:`FileIO` objects are thread-safe to the extent that the operating +system calls (such as ``read(2)`` under Unix) they are wrapping are thread-safe +too. + +Binary buffered objects (instances of :class:`BufferedReader`, +:class:`BufferedWriter`, :class:`BufferedRandom` and :class:`BufferedRWPair`) +protect their internal structures using a lock; it is therefore safe to call +them from multiple threads at once. + +:class:`TextIOWrapper` objects are not thread-safe. + +Reentrancy +^^^^^^^^^^ + +Binary buffered objects (instances of :class:`BufferedReader`, +:class:`BufferedWriter`, :class:`BufferedRandom` and :class:`BufferedRWPair`) +are not reentrant. While reentrant calls will not happen in normal situations, +they can arise if you are doing I/O in a :mod:`signal` handler. If it is +attempted to enter a buffered object again while already being accessed +*from the same thread*, then a :exc:`RuntimeError` is raised. + +The above implicitly extends to text files, since the :func:`open()` +function will wrap a buffered object inside a :class:`TextIOWrapper`. This +includes standard streams and therefore affects the built-in function +:func:`print()` as well. + diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index ad2919b925a..042879dce70 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -2561,12 +2561,50 @@ class SignalsTest(unittest.TestCase): def test_interrupted_write_text(self): self.check_interrupted_write("xy", b"xy", mode="w", encoding="ascii") + def check_reentrant_write(self, data, **fdopen_kwargs): + def on_alarm(*args): + # Will be called reentrantly from the same thread + wio.write(data) + 1/0 + signal.signal(signal.SIGALRM, on_alarm) + r, w = os.pipe() + wio = self.io.open(w, **fdopen_kwargs) + try: + signal.alarm(1) + # Either the reentrant call to wio.write() fails with RuntimeError, + # or the signal handler raises ZeroDivisionError. + with self.assertRaises((ZeroDivisionError, RuntimeError)) as cm: + while 1: + for i in range(100): + wio.write(data) + wio.flush() + # Make sure the buffer doesn't fill up and block further writes + os.read(r, len(data) * 100) + exc = cm.exception + if isinstance(exc, RuntimeError): + self.assertTrue(str(exc).startswith("reentrant call"), str(exc)) + finally: + wio.close() + os.close(r) + + def test_reentrant_write_buffered(self): + self.check_reentrant_write(b"xy", mode="wb") + + def test_reentrant_write_text(self): + self.check_reentrant_write("xy", mode="w", encoding="ascii") + + class CSignalsTest(SignalsTest): io = io class PySignalsTest(SignalsTest): io = pyio + # Handling reentrancy issues would slow down _pyio even more, so the + # tests are disabled. + test_reentrant_write_buffered = None + test_reentrant_write_text = None + def test_main(): tests = (CIOTest, PyIOTest, diff --git a/Misc/NEWS b/Misc/NEWS index 82a4cd5ae05..fcea07204cb 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -1,4 +1,3 @@ -:xq Python News +++++++++++ @@ -13,6 +12,10 @@ Core and Builtins Library ------- +- Issue #10478: Reentrant calls inside buffered IO objects (for example by + way of a signal handler) now raise a RuntimeError instead of freezing the + current process. + - Issue #10497: Fix incorrect use of gettext in argparse. - Issue #10464: netrc now correctly handles lines with embedded '#' characters. diff --git a/Modules/_io/bufferedio.c b/Modules/_io/bufferedio.c index d96b7eee3ce..c6373c6680b 100644 --- a/Modules/_io/bufferedio.c +++ b/Modules/_io/bufferedio.c @@ -224,6 +224,7 @@ typedef struct { #ifdef WITH_THREAD PyThread_type_lock lock; + volatile long owner; #endif Py_ssize_t buffer_size; @@ -259,17 +260,39 @@ typedef struct { /* These macros protect the buffered object against concurrent operations. */ #ifdef WITH_THREAD -#define ENTER_BUFFERED(self) \ - if (!PyThread_acquire_lock(self->lock, 0)) { \ - Py_BEGIN_ALLOW_THREADS \ - PyThread_acquire_lock(self->lock, 1); \ - Py_END_ALLOW_THREADS \ + +static int +_enter_buffered_busy(buffered *self) +{ + if (self->owner == PyThread_get_thread_ident()) { + PyObject *r = PyObject_Repr((PyObject *) self); + if (r != NULL) { + PyErr_Format(PyExc_RuntimeError, + "reentrant call inside %s", + PyString_AS_STRING(r)); + Py_DECREF(r); + } + return 0; } + Py_BEGIN_ALLOW_THREADS + PyThread_acquire_lock(self->lock, 1); + Py_END_ALLOW_THREADS + return 1; +} + +#define ENTER_BUFFERED(self) \ + ( (PyThread_acquire_lock(self->lock, 0) ? \ + 1 : _enter_buffered_busy(self)) \ + && (self->owner = PyThread_get_thread_ident(), 1) ) #define LEAVE_BUFFERED(self) \ - PyThread_release_lock(self->lock); + do { \ + self->owner = 0; \ + PyThread_release_lock(self->lock); \ + } while(0); + #else -#define ENTER_BUFFERED(self) +#define ENTER_BUFFERED(self) 1 #define LEAVE_BUFFERED(self) #endif @@ -425,7 +448,8 @@ buffered_close(buffered *self, PyObject *args) int r; CHECK_INITIALIZED(self) - ENTER_BUFFERED(self) + if (!ENTER_BUFFERED(self)) + return NULL; r = buffered_closed(self); if (r < 0) @@ -438,7 +462,8 @@ buffered_close(buffered *self, PyObject *args) /* flush() will most probably re-take the lock, so drop it first */ LEAVE_BUFFERED(self) res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL); - ENTER_BUFFERED(self) + if (!ENTER_BUFFERED(self)) + return NULL; if (res == NULL) { goto end; } @@ -643,6 +668,7 @@ _buffered_init(buffered *self) PyErr_SetString(PyExc_RuntimeError, "can't allocate read lock"); return -1; } + self->owner = 0; #endif /* Find out whether buffer_size is a power of 2 */ /* XXX is this optimization useful? */ @@ -669,7 +695,8 @@ buffered_flush(buffered *self, PyObject *args) CHECK_INITIALIZED(self) CHECK_CLOSED(self, "flush of closed file") - ENTER_BUFFERED(self) + if (!ENTER_BUFFERED(self)) + return NULL; res = _bufferedwriter_flush_unlocked(self, 0); if (res != NULL && self->readable) { /* Rewind the raw stream so that its position corresponds to @@ -696,7 +723,8 @@ buffered_peek(buffered *self, PyObject *args) return NULL; } - ENTER_BUFFERED(self) + if (!ENTER_BUFFERED(self)) + return NULL; if (self->writable) { res = _bufferedwriter_flush_unlocked(self, 1); @@ -731,7 +759,8 @@ buffered_read(buffered *self, PyObject *args) if (n == -1) { /* The number of bytes is unspecified, read until the end of stream */ - ENTER_BUFFERED(self) + if (!ENTER_BUFFERED(self)) + return NULL; res = _bufferedreader_read_all(self); LEAVE_BUFFERED(self) } @@ -739,7 +768,8 @@ buffered_read(buffered *self, PyObject *args) res = _bufferedreader_read_fast(self, n); if (res == Py_None) { Py_DECREF(res); - ENTER_BUFFERED(self) + if (!ENTER_BUFFERED(self)) + return NULL; res = _bufferedreader_read_generic(self, n); LEAVE_BUFFERED(self) } @@ -767,7 +797,8 @@ buffered_read1(buffered *self, PyObject *args) if (n == 0) return PyBytes_FromStringAndSize(NULL, 0); - ENTER_BUFFERED(self) + if (!ENTER_BUFFERED(self)) + return NULL; if (self->writable) { res = _bufferedwriter_flush_unlocked(self, 1); @@ -823,7 +854,8 @@ buffered_readinto(buffered *self, PyObject *args) /* TODO: use raw.readinto() instead! */ if (self->writable) { - ENTER_BUFFERED(self) + if (!ENTER_BUFFERED(self)) + return NULL; res = _bufferedwriter_flush_unlocked(self, 0); LEAVE_BUFFERED(self) if (res == NULL) @@ -867,7 +899,8 @@ _buffered_readline(buffered *self, Py_ssize_t limit) goto end_unlocked; } - ENTER_BUFFERED(self) + if (!ENTER_BUFFERED(self)) + goto end_unlocked; /* Now we try to get some more from the raw stream */ if (self->writable) { @@ -1017,7 +1050,8 @@ buffered_seek(buffered *self, PyObject *args) } } - ENTER_BUFFERED(self) + if (!ENTER_BUFFERED(self)) + return NULL; /* Fallback: invoke raw seek() method and clear buffer */ if (self->writable) { @@ -1055,7 +1089,8 @@ buffered_truncate(buffered *self, PyObject *args) return NULL; } - ENTER_BUFFERED(self) + if (!ENTER_BUFFERED(self)) + return NULL; if (self->writable) { res = _bufferedwriter_flush_unlocked(self, 0); @@ -1715,7 +1750,10 @@ bufferedwriter_write(buffered *self, PyObject *args) return NULL; } - ENTER_BUFFERED(self) + if (!ENTER_BUFFERED(self)) { + PyBuffer_Release(&buf); + return NULL; + } /* Fast path: the data to write can be fully buffered. */ if (!VALID_READ_BUFFER(self) && !VALID_WRITE_BUFFER(self)) {