Merged revisions 86981,86984 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/branches/py3k ........ r86981 | antoine.pitrou | 2010-12-03 19:41:39 +0100 (ven., 03 déc. 2010) | 5 lines Issue #10478: Reentrant calls inside buffered IO objects (for example by way of a signal handler) now raise a RuntimeError instead of freezing the current process. ........ r86984 | antoine.pitrou | 2010-12-03 20:14:17 +0100 (ven., 03 déc. 2010) | 3 lines Add an "advanced topics" section to the io doc. ........
This commit is contained in:
parent
fb3de6b437
commit
4cb64ad8bb
|
@ -515,8 +515,8 @@ Raw File I/O
|
||||||
Buffered Streams
|
Buffered Streams
|
||||||
----------------
|
----------------
|
||||||
|
|
||||||
In many situations, buffered I/O streams will provide higher performance
|
Buffered I/O streams provide a higher-level interface to an I/O device
|
||||||
(bandwidth and latency) than raw I/O streams. Their API is also more usable.
|
than raw I/O does.
|
||||||
|
|
||||||
.. class:: BytesIO([initial_bytes])
|
.. class:: BytesIO([initial_bytes])
|
||||||
|
|
||||||
|
@ -776,8 +776,72 @@ Text I/O
|
||||||
# .getvalue() will now raise an exception.
|
# .getvalue() will now raise an exception.
|
||||||
output.close()
|
output.close()
|
||||||
|
|
||||||
|
|
||||||
.. class:: IncrementalNewlineDecoder
|
.. class:: IncrementalNewlineDecoder
|
||||||
|
|
||||||
A helper codec that decodes newlines for universal newlines mode. It
|
A helper codec that decodes newlines for universal newlines mode. It
|
||||||
inherits :class:`codecs.IncrementalDecoder`.
|
inherits :class:`codecs.IncrementalDecoder`.
|
||||||
|
|
||||||
|
|
||||||
|
Advanced topics
|
||||||
|
---------------
|
||||||
|
|
||||||
|
Here we will discuss several advanced topics pertaining to the concrete
|
||||||
|
I/O implementations described above.
|
||||||
|
|
||||||
|
Performance
|
||||||
|
^^^^^^^^^^^
|
||||||
|
|
||||||
|
Binary I/O
|
||||||
|
""""""""""
|
||||||
|
|
||||||
|
By reading and writing only large chunks of data even when the user asks
|
||||||
|
for a single byte, buffered I/O is designed to hide any inefficiency in
|
||||||
|
calling and executing the operating system's unbuffered I/O routines. The
|
||||||
|
gain will vary very much depending on the OS and the kind of I/O which is
|
||||||
|
performed (for example, on some contemporary OSes such as Linux, unbuffered
|
||||||
|
disk I/O can be as fast as buffered I/O). The bottom line, however, is
|
||||||
|
that buffered I/O will offer you predictable performance regardless of the
|
||||||
|
platform and the backing device. Therefore, it is most always preferable to
|
||||||
|
use buffered I/O rather than unbuffered I/O.
|
||||||
|
|
||||||
|
Text I/O
|
||||||
|
""""""""
|
||||||
|
|
||||||
|
Text I/O over a binary storage (such as a file) is significantly slower than
|
||||||
|
binary I/O over the same storage, because it implies conversions from
|
||||||
|
unicode to binary data using a character codec. This can become noticeable
|
||||||
|
if you handle huge amounts of text data (for example very large log files).
|
||||||
|
|
||||||
|
:class:`StringIO`, however, is a native in-memory unicode container and will
|
||||||
|
exhibit similar speed to :class:`BytesIO`.
|
||||||
|
|
||||||
|
Multi-threading
|
||||||
|
^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
:class:`FileIO` objects are thread-safe to the extent that the operating
|
||||||
|
system calls (such as ``read(2)`` under Unix) they are wrapping are thread-safe
|
||||||
|
too.
|
||||||
|
|
||||||
|
Binary buffered objects (instances of :class:`BufferedReader`,
|
||||||
|
:class:`BufferedWriter`, :class:`BufferedRandom` and :class:`BufferedRWPair`)
|
||||||
|
protect their internal structures using a lock; it is therefore safe to call
|
||||||
|
them from multiple threads at once.
|
||||||
|
|
||||||
|
:class:`TextIOWrapper` objects are not thread-safe.
|
||||||
|
|
||||||
|
Reentrancy
|
||||||
|
^^^^^^^^^^
|
||||||
|
|
||||||
|
Binary buffered objects (instances of :class:`BufferedReader`,
|
||||||
|
:class:`BufferedWriter`, :class:`BufferedRandom` and :class:`BufferedRWPair`)
|
||||||
|
are not reentrant. While reentrant calls will not happen in normal situations,
|
||||||
|
they can arise if you are doing I/O in a :mod:`signal` handler. If it is
|
||||||
|
attempted to enter a buffered object again while already being accessed
|
||||||
|
*from the same thread*, then a :exc:`RuntimeError` is raised.
|
||||||
|
|
||||||
|
The above implicitly extends to text files, since the :func:`open()`
|
||||||
|
function will wrap a buffered object inside a :class:`TextIOWrapper`. This
|
||||||
|
includes standard streams and therefore affects the built-in function
|
||||||
|
:func:`print()` as well.
|
||||||
|
|
||||||
|
|
|
@ -2561,12 +2561,50 @@ class SignalsTest(unittest.TestCase):
|
||||||
def test_interrupted_write_text(self):
|
def test_interrupted_write_text(self):
|
||||||
self.check_interrupted_write("xy", b"xy", mode="w", encoding="ascii")
|
self.check_interrupted_write("xy", b"xy", mode="w", encoding="ascii")
|
||||||
|
|
||||||
|
def check_reentrant_write(self, data, **fdopen_kwargs):
|
||||||
|
def on_alarm(*args):
|
||||||
|
# Will be called reentrantly from the same thread
|
||||||
|
wio.write(data)
|
||||||
|
1/0
|
||||||
|
signal.signal(signal.SIGALRM, on_alarm)
|
||||||
|
r, w = os.pipe()
|
||||||
|
wio = self.io.open(w, **fdopen_kwargs)
|
||||||
|
try:
|
||||||
|
signal.alarm(1)
|
||||||
|
# Either the reentrant call to wio.write() fails with RuntimeError,
|
||||||
|
# or the signal handler raises ZeroDivisionError.
|
||||||
|
with self.assertRaises((ZeroDivisionError, RuntimeError)) as cm:
|
||||||
|
while 1:
|
||||||
|
for i in range(100):
|
||||||
|
wio.write(data)
|
||||||
|
wio.flush()
|
||||||
|
# Make sure the buffer doesn't fill up and block further writes
|
||||||
|
os.read(r, len(data) * 100)
|
||||||
|
exc = cm.exception
|
||||||
|
if isinstance(exc, RuntimeError):
|
||||||
|
self.assertTrue(str(exc).startswith("reentrant call"), str(exc))
|
||||||
|
finally:
|
||||||
|
wio.close()
|
||||||
|
os.close(r)
|
||||||
|
|
||||||
|
def test_reentrant_write_buffered(self):
|
||||||
|
self.check_reentrant_write(b"xy", mode="wb")
|
||||||
|
|
||||||
|
def test_reentrant_write_text(self):
|
||||||
|
self.check_reentrant_write("xy", mode="w", encoding="ascii")
|
||||||
|
|
||||||
|
|
||||||
class CSignalsTest(SignalsTest):
|
class CSignalsTest(SignalsTest):
|
||||||
io = io
|
io = io
|
||||||
|
|
||||||
class PySignalsTest(SignalsTest):
|
class PySignalsTest(SignalsTest):
|
||||||
io = pyio
|
io = pyio
|
||||||
|
|
||||||
|
# Handling reentrancy issues would slow down _pyio even more, so the
|
||||||
|
# tests are disabled.
|
||||||
|
test_reentrant_write_buffered = None
|
||||||
|
test_reentrant_write_text = None
|
||||||
|
|
||||||
|
|
||||||
def test_main():
|
def test_main():
|
||||||
tests = (CIOTest, PyIOTest,
|
tests = (CIOTest, PyIOTest,
|
||||||
|
|
|
@ -1,4 +1,3 @@
|
||||||
:xq
|
|
||||||
Python News
|
Python News
|
||||||
+++++++++++
|
+++++++++++
|
||||||
|
|
||||||
|
@ -13,6 +12,10 @@ Core and Builtins
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #10478: Reentrant calls inside buffered IO objects (for example by
|
||||||
|
way of a signal handler) now raise a RuntimeError instead of freezing the
|
||||||
|
current process.
|
||||||
|
|
||||||
- Issue #10497: Fix incorrect use of gettext in argparse.
|
- Issue #10497: Fix incorrect use of gettext in argparse.
|
||||||
|
|
||||||
- Issue #10464: netrc now correctly handles lines with embedded '#' characters.
|
- Issue #10464: netrc now correctly handles lines with embedded '#' characters.
|
||||||
|
|
|
@ -224,6 +224,7 @@ typedef struct {
|
||||||
|
|
||||||
#ifdef WITH_THREAD
|
#ifdef WITH_THREAD
|
||||||
PyThread_type_lock lock;
|
PyThread_type_lock lock;
|
||||||
|
volatile long owner;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
Py_ssize_t buffer_size;
|
Py_ssize_t buffer_size;
|
||||||
|
@ -259,17 +260,39 @@ typedef struct {
|
||||||
/* These macros protect the buffered object against concurrent operations. */
|
/* These macros protect the buffered object against concurrent operations. */
|
||||||
|
|
||||||
#ifdef WITH_THREAD
|
#ifdef WITH_THREAD
|
||||||
#define ENTER_BUFFERED(self) \
|
|
||||||
if (!PyThread_acquire_lock(self->lock, 0)) { \
|
static int
|
||||||
Py_BEGIN_ALLOW_THREADS \
|
_enter_buffered_busy(buffered *self)
|
||||||
PyThread_acquire_lock(self->lock, 1); \
|
{
|
||||||
Py_END_ALLOW_THREADS \
|
if (self->owner == PyThread_get_thread_ident()) {
|
||||||
|
PyObject *r = PyObject_Repr((PyObject *) self);
|
||||||
|
if (r != NULL) {
|
||||||
|
PyErr_Format(PyExc_RuntimeError,
|
||||||
|
"reentrant call inside %s",
|
||||||
|
PyString_AS_STRING(r));
|
||||||
|
Py_DECREF(r);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
Py_BEGIN_ALLOW_THREADS
|
||||||
|
PyThread_acquire_lock(self->lock, 1);
|
||||||
|
Py_END_ALLOW_THREADS
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define ENTER_BUFFERED(self) \
|
||||||
|
( (PyThread_acquire_lock(self->lock, 0) ? \
|
||||||
|
1 : _enter_buffered_busy(self)) \
|
||||||
|
&& (self->owner = PyThread_get_thread_ident(), 1) )
|
||||||
|
|
||||||
#define LEAVE_BUFFERED(self) \
|
#define LEAVE_BUFFERED(self) \
|
||||||
PyThread_release_lock(self->lock);
|
do { \
|
||||||
|
self->owner = 0; \
|
||||||
|
PyThread_release_lock(self->lock); \
|
||||||
|
} while(0);
|
||||||
|
|
||||||
#else
|
#else
|
||||||
#define ENTER_BUFFERED(self)
|
#define ENTER_BUFFERED(self) 1
|
||||||
#define LEAVE_BUFFERED(self)
|
#define LEAVE_BUFFERED(self)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -425,7 +448,8 @@ buffered_close(buffered *self, PyObject *args)
|
||||||
int r;
|
int r;
|
||||||
|
|
||||||
CHECK_INITIALIZED(self)
|
CHECK_INITIALIZED(self)
|
||||||
ENTER_BUFFERED(self)
|
if (!ENTER_BUFFERED(self))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
r = buffered_closed(self);
|
r = buffered_closed(self);
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
|
@ -438,7 +462,8 @@ buffered_close(buffered *self, PyObject *args)
|
||||||
/* flush() will most probably re-take the lock, so drop it first */
|
/* flush() will most probably re-take the lock, so drop it first */
|
||||||
LEAVE_BUFFERED(self)
|
LEAVE_BUFFERED(self)
|
||||||
res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
|
res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
|
||||||
ENTER_BUFFERED(self)
|
if (!ENTER_BUFFERED(self))
|
||||||
|
return NULL;
|
||||||
if (res == NULL) {
|
if (res == NULL) {
|
||||||
goto end;
|
goto end;
|
||||||
}
|
}
|
||||||
|
@ -643,6 +668,7 @@ _buffered_init(buffered *self)
|
||||||
PyErr_SetString(PyExc_RuntimeError, "can't allocate read lock");
|
PyErr_SetString(PyExc_RuntimeError, "can't allocate read lock");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
self->owner = 0;
|
||||||
#endif
|
#endif
|
||||||
/* Find out whether buffer_size is a power of 2 */
|
/* Find out whether buffer_size is a power of 2 */
|
||||||
/* XXX is this optimization useful? */
|
/* XXX is this optimization useful? */
|
||||||
|
@ -669,7 +695,8 @@ buffered_flush(buffered *self, PyObject *args)
|
||||||
CHECK_INITIALIZED(self)
|
CHECK_INITIALIZED(self)
|
||||||
CHECK_CLOSED(self, "flush of closed file")
|
CHECK_CLOSED(self, "flush of closed file")
|
||||||
|
|
||||||
ENTER_BUFFERED(self)
|
if (!ENTER_BUFFERED(self))
|
||||||
|
return NULL;
|
||||||
res = _bufferedwriter_flush_unlocked(self, 0);
|
res = _bufferedwriter_flush_unlocked(self, 0);
|
||||||
if (res != NULL && self->readable) {
|
if (res != NULL && self->readable) {
|
||||||
/* Rewind the raw stream so that its position corresponds to
|
/* Rewind the raw stream so that its position corresponds to
|
||||||
|
@ -696,7 +723,8 @@ buffered_peek(buffered *self, PyObject *args)
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
ENTER_BUFFERED(self)
|
if (!ENTER_BUFFERED(self))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
if (self->writable) {
|
if (self->writable) {
|
||||||
res = _bufferedwriter_flush_unlocked(self, 1);
|
res = _bufferedwriter_flush_unlocked(self, 1);
|
||||||
|
@ -731,7 +759,8 @@ buffered_read(buffered *self, PyObject *args)
|
||||||
|
|
||||||
if (n == -1) {
|
if (n == -1) {
|
||||||
/* The number of bytes is unspecified, read until the end of stream */
|
/* The number of bytes is unspecified, read until the end of stream */
|
||||||
ENTER_BUFFERED(self)
|
if (!ENTER_BUFFERED(self))
|
||||||
|
return NULL;
|
||||||
res = _bufferedreader_read_all(self);
|
res = _bufferedreader_read_all(self);
|
||||||
LEAVE_BUFFERED(self)
|
LEAVE_BUFFERED(self)
|
||||||
}
|
}
|
||||||
|
@ -739,7 +768,8 @@ buffered_read(buffered *self, PyObject *args)
|
||||||
res = _bufferedreader_read_fast(self, n);
|
res = _bufferedreader_read_fast(self, n);
|
||||||
if (res == Py_None) {
|
if (res == Py_None) {
|
||||||
Py_DECREF(res);
|
Py_DECREF(res);
|
||||||
ENTER_BUFFERED(self)
|
if (!ENTER_BUFFERED(self))
|
||||||
|
return NULL;
|
||||||
res = _bufferedreader_read_generic(self, n);
|
res = _bufferedreader_read_generic(self, n);
|
||||||
LEAVE_BUFFERED(self)
|
LEAVE_BUFFERED(self)
|
||||||
}
|
}
|
||||||
|
@ -767,7 +797,8 @@ buffered_read1(buffered *self, PyObject *args)
|
||||||
if (n == 0)
|
if (n == 0)
|
||||||
return PyBytes_FromStringAndSize(NULL, 0);
|
return PyBytes_FromStringAndSize(NULL, 0);
|
||||||
|
|
||||||
ENTER_BUFFERED(self)
|
if (!ENTER_BUFFERED(self))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
if (self->writable) {
|
if (self->writable) {
|
||||||
res = _bufferedwriter_flush_unlocked(self, 1);
|
res = _bufferedwriter_flush_unlocked(self, 1);
|
||||||
|
@ -823,7 +854,8 @@ buffered_readinto(buffered *self, PyObject *args)
|
||||||
|
|
||||||
/* TODO: use raw.readinto() instead! */
|
/* TODO: use raw.readinto() instead! */
|
||||||
if (self->writable) {
|
if (self->writable) {
|
||||||
ENTER_BUFFERED(self)
|
if (!ENTER_BUFFERED(self))
|
||||||
|
return NULL;
|
||||||
res = _bufferedwriter_flush_unlocked(self, 0);
|
res = _bufferedwriter_flush_unlocked(self, 0);
|
||||||
LEAVE_BUFFERED(self)
|
LEAVE_BUFFERED(self)
|
||||||
if (res == NULL)
|
if (res == NULL)
|
||||||
|
@ -867,7 +899,8 @@ _buffered_readline(buffered *self, Py_ssize_t limit)
|
||||||
goto end_unlocked;
|
goto end_unlocked;
|
||||||
}
|
}
|
||||||
|
|
||||||
ENTER_BUFFERED(self)
|
if (!ENTER_BUFFERED(self))
|
||||||
|
goto end_unlocked;
|
||||||
|
|
||||||
/* Now we try to get some more from the raw stream */
|
/* Now we try to get some more from the raw stream */
|
||||||
if (self->writable) {
|
if (self->writable) {
|
||||||
|
@ -1017,7 +1050,8 @@ buffered_seek(buffered *self, PyObject *args)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ENTER_BUFFERED(self)
|
if (!ENTER_BUFFERED(self))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
/* Fallback: invoke raw seek() method and clear buffer */
|
/* Fallback: invoke raw seek() method and clear buffer */
|
||||||
if (self->writable) {
|
if (self->writable) {
|
||||||
|
@ -1055,7 +1089,8 @@ buffered_truncate(buffered *self, PyObject *args)
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
ENTER_BUFFERED(self)
|
if (!ENTER_BUFFERED(self))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
if (self->writable) {
|
if (self->writable) {
|
||||||
res = _bufferedwriter_flush_unlocked(self, 0);
|
res = _bufferedwriter_flush_unlocked(self, 0);
|
||||||
|
@ -1715,7 +1750,10 @@ bufferedwriter_write(buffered *self, PyObject *args)
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
ENTER_BUFFERED(self)
|
if (!ENTER_BUFFERED(self)) {
|
||||||
|
PyBuffer_Release(&buf);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
/* Fast path: the data to write can be fully buffered. */
|
/* Fast path: the data to write can be fully buffered. */
|
||||||
if (!VALID_READ_BUFFER(self) && !VALID_WRITE_BUFFER(self)) {
|
if (!VALID_READ_BUFFER(self) && !VALID_WRITE_BUFFER(self)) {
|
||||||
|
|
Loading…
Reference in New Issue