From 5135992164f4c0df8d18d3b486431b28214db16b Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Sat, 23 Jun 2012 23:55:39 -0700 Subject: [PATCH] Fixes issue #12268: File readline, readlines and read() or readall() methods no longer lose data when an underlying read system call is interrupted. IOError is no longer raised due to a read system call returning EINTR from within these methods. --- Lib/test/test_file_eintr.py | 236 ++++++++++++++++++++++++++++++++++++ Misc/NEWS | 5 + Modules/_io/_iomodule.h | 5 + Modules/_io/bufferedio.c | 8 +- Modules/_io/fileio.c | 7 ++ Modules/_io/iobase.c | 21 +++- Modules/_io/textio.c | 16 ++- 7 files changed, 290 insertions(+), 8 deletions(-) create mode 100644 Lib/test/test_file_eintr.py diff --git a/Lib/test/test_file_eintr.py b/Lib/test/test_file_eintr.py new file mode 100644 index 00000000000..b4e18ce17d1 --- /dev/null +++ b/Lib/test/test_file_eintr.py @@ -0,0 +1,236 @@ +# Written to test interrupted system calls interfering with our many buffered +# IO implementations. http://bugs.python.org/issue12268 +# +# It was suggested that this code could be merged into test_io and the tests +# made to work using the same method as the existing signal tests in test_io. +# I was unable to get single process tests using alarm or setitimer that way +# to reproduce the EINTR problems. This process based test suite reproduces +# the problems prior to the issue12268 patch reliably on Linux and OSX. +# - gregory.p.smith + +import os +import select +import signal +import subprocess +import sys +from test.support import run_unittest +import time +import unittest + +# Test import all of the things we're about to try testing up front. +from _io import FileIO + + +@unittest.skipUnless(os.name == 'posix', 'tests requires a posix system.') +class TestFileIOSignalInterrupt(unittest.TestCase): + def setUp(self): + self._process = None + + def tearDown(self): + if self._process and self._process.poll() is None: + try: + self._process.kill() + except OSError: + pass + + def _generate_infile_setup_code(self): + """Returns the infile = ... line of code for the reader process. + + subclasseses should override this to test different IO objects. + """ + return ('import _io ;' + 'infile = _io.FileIO(sys.stdin.fileno(), "rb")') + + def fail_with_process_info(self, why, stdout=b'', stderr=b'', + communicate=True): + """A common way to cleanup and fail with useful debug output. + + Kills the process if it is still running, collects remaining output + and fails the test with an error message including the output. + + Args: + why: Text to go after "Error from IO process" in the message. + stdout, stderr: standard output and error from the process so + far to include in the error message. + communicate: bool, when True we call communicate() on the process + after killing it to gather additional output. + """ + if self._process.poll() is None: + time.sleep(0.1) # give it time to finish printing the error. + try: + self._process.terminate() # Ensure it dies. + except OSError: + pass + if communicate: + stdout_end, stderr_end = self._process.communicate() + stdout += stdout_end + stderr += stderr_end + self.fail('Error from IO process %s:\nSTDOUT:\n%sSTDERR:\n%s\n' % + (why, stdout.decode(), stderr.decode())) + + def _test_reading(self, data_to_write, read_and_verify_code): + """Generic buffered read method test harness to validate EINTR behavior. + + Also validates that Python signal handlers are run during the read. + + Args: + data_to_write: String to write to the child process for reading + before sending it a signal, confirming the signal was handled, + writing a final newline and closing the infile pipe. + read_and_verify_code: Single "line" of code to read from a file + object named 'infile' and validate the result. This will be + executed as part of a python subprocess fed data_to_write. + """ + infile_setup_code = self._generate_infile_setup_code() + # Total pipe IO in this function is smaller than the minimum posix OS + # pipe buffer size of 512 bytes. No writer should block. + assert len(data_to_write) < 512, 'data_to_write must fit in pipe buf.' + + # Start a subprocess to call our read method while handling a signal. + self._process = subprocess.Popen( + [sys.executable, '-u', '-c', + 'import signal, sys ;' + 'signal.signal(signal.SIGINT, ' + 'lambda s, f: sys.stderr.write("$\\n")) ;' + + infile_setup_code + ' ;' + + 'sys.stderr.write("Worm Sign!\\n") ;' + + read_and_verify_code + ' ;' + + 'infile.close()' + ], + stdin=subprocess.PIPE, stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + + # Wait for the signal handler to be installed. + worm_sign = self._process.stderr.read(len(b'Worm Sign!\n')) + if worm_sign != b'Worm Sign!\n': # See also, Dune by Frank Herbert. + self.fail_with_process_info('while awaiting a sign', + stderr=worm_sign) + self._process.stdin.write(data_to_write) + + signals_sent = 0 + rlist = [] + # We don't know when the read_and_verify_code in our child is actually + # executing within the read system call we want to interrupt. This + # loop waits for a bit before sending the first signal to increase + # the likelihood of that. Implementations without correct EINTR + # and signal handling usually fail this test. + while not rlist: + rlist, _, _ = select.select([self._process.stderr], (), (), 0.05) + self._process.send_signal(signal.SIGINT) + signals_sent += 1 + if signals_sent > 200: + self._process.kill() + self.fail('reader process failed to handle our signals.') + # This assumes anything unexpected that writes to stderr will also + # write a newline. That is true of the traceback printing code. + signal_line = self._process.stderr.readline() + if signal_line != b'$\n': + self.fail_with_process_info('while awaiting signal', + stderr=signal_line) + + # We append a newline to our input so that a readline call can + # end on its own before the EOF is seen and so that we're testing + # the read call that was interrupted by a signal before the end of + # the data stream has been reached. + stdout, stderr = self._process.communicate(input=b'\n') + if self._process.returncode: + self.fail_with_process_info( + 'exited rc=%d' % self._process.returncode, + stdout, stderr, communicate=False) + # PASS! + + # String format for the read_and_verify_code used by read methods. + _READING_CODE_TEMPLATE = ( + 'got = infile.{read_method_name}() ;' + 'expected = {expected!r} ;' + 'assert got == expected, (' + '"{read_method_name} returned wrong data.\\n"' + '"got data %r\\nexpected %r" % (got, expected))' + ) + + def test_readline(self): + """readline() must handle signals and not lose data.""" + self._test_reading( + data_to_write=b'hello, world!', + read_and_verify_code=self._READING_CODE_TEMPLATE.format( + read_method_name='readline', + expected=b'hello, world!\n')) + + def test_readlines(self): + """readlines() must handle signals and not lose data.""" + self._test_reading( + data_to_write=b'hello\nworld!', + read_and_verify_code=self._READING_CODE_TEMPLATE.format( + read_method_name='readlines', + expected=[b'hello\n', b'world!\n'])) + + def test_readall(self): + """readall() must handle signals and not lose data.""" + self._test_reading( + data_to_write=b'hello\nworld!', + read_and_verify_code=self._READING_CODE_TEMPLATE.format( + read_method_name='readall', + expected=b'hello\nworld!\n')) + # read() is the same thing as readall(). + self._test_reading( + data_to_write=b'hello\nworld!', + read_and_verify_code=self._READING_CODE_TEMPLATE.format( + read_method_name='read', + expected=b'hello\nworld!\n')) + + +class TestBufferedIOSignalInterrupt(TestFileIOSignalInterrupt): + def _generate_infile_setup_code(self): + """Returns the infile = ... line of code to make a BufferedReader.""" + return ('infile = open(sys.stdin.fileno(), "rb") ;' + 'import _io ;assert isinstance(infile, _io.BufferedReader)') + + def test_readall(self): + """BufferedReader.read() must handle signals and not lose data.""" + self._test_reading( + data_to_write=b'hello\nworld!', + read_and_verify_code=self._READING_CODE_TEMPLATE.format( + read_method_name='read', + expected=b'hello\nworld!\n')) + + +class TestTextIOSignalInterrupt(TestFileIOSignalInterrupt): + def _generate_infile_setup_code(self): + """Returns the infile = ... line of code to make a TextIOWrapper.""" + return ('infile = open(sys.stdin.fileno(), "rt", newline=None) ;' + 'import _io ;assert isinstance(infile, _io.TextIOWrapper)') + + def test_readline(self): + """readline() must handle signals and not lose data.""" + self._test_reading( + data_to_write=b'hello, world!', + read_and_verify_code=self._READING_CODE_TEMPLATE.format( + read_method_name='readline', + expected='hello, world!\n')) + + def test_readlines(self): + """readlines() must handle signals and not lose data.""" + self._test_reading( + data_to_write=b'hello\r\nworld!', + read_and_verify_code=self._READING_CODE_TEMPLATE.format( + read_method_name='readlines', + expected=['hello\n', 'world!\n'])) + + def test_readall(self): + """read() must handle signals and not lose data.""" + self._test_reading( + data_to_write=b'hello\nworld!', + read_and_verify_code=self._READING_CODE_TEMPLATE.format( + read_method_name='read', + expected="hello\nworld!\n")) + + +def test_main(): + test_cases = [ + tc for tc in globals().values() + if isinstance(tc, type) and issubclass(tc, unittest.TestCase)] + run_unittest(*test_cases) + + +if __name__ == '__main__': + test_main() diff --git a/Misc/NEWS b/Misc/NEWS index 6f591100396..95f66d71ead 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,11 @@ What's New in Python 3.2.4 Core and Builtins ----------------- +- Issue #12268: File readline, readlines and read() or readall() methods + no longer lose data when an underlying read system call is interrupted. + IOError is no longer raised due to a read system call returning EINTR + from within these methods. + - Issue #15142: Fix reference leak when deallocating instances of types created using PyType_FromSpec(). diff --git a/Modules/_io/_iomodule.h b/Modules/_io/_iomodule.h index c198b43e782..aa8bfd69ddf 100644 --- a/Modules/_io/_iomodule.h +++ b/Modules/_io/_iomodule.h @@ -57,6 +57,11 @@ extern Py_ssize_t _PyIO_find_line_ending( int translated, int universal, PyObject *readnl, Py_UNICODE *start, Py_UNICODE *end, Py_ssize_t *consumed); +/* Return 1 if an EnvironmentError with errno == EINTR is set (and then + clears the error indicator), 0 otherwise. + Should only be called when PyErr_Occurred() is true. +*/ +extern int _PyIO_trap_eintr(void); #define DEFAULT_BUFFER_SIZE (8 * 1024) /* bytes */ diff --git a/Modules/_io/bufferedio.c b/Modules/_io/bufferedio.c index 4c43c69060e..21e6b36068a 100644 --- a/Modules/_io/bufferedio.c +++ b/Modules/_io/bufferedio.c @@ -730,8 +730,8 @@ _buffered_init(buffered *self) clears the error indicator), 0 otherwise. Should only be called when PyErr_Occurred() is true. */ -static int -_trap_eintr(void) +int +_PyIO_trap_eintr(void) { static PyObject *eintr_int = NULL; PyObject *typ, *val, *tb; @@ -1314,7 +1314,7 @@ _bufferedreader_raw_read(buffered *self, char *start, Py_ssize_t len) */ do { res = PyObject_CallMethodObjArgs(self->raw, _PyIO_str_readinto, memobj, NULL); - } while (res == NULL && _trap_eintr()); + } while (res == NULL && _PyIO_trap_eintr()); Py_DECREF(memobj); if (res == NULL) return -1; @@ -1742,7 +1742,7 @@ _bufferedwriter_raw_write(buffered *self, char *start, Py_ssize_t len) errno = 0; res = PyObject_CallMethodObjArgs(self->raw, _PyIO_str_write, memobj, NULL); errnum = errno; - } while (res == NULL && _trap_eintr()); + } while (res == NULL && _PyIO_trap_eintr()); Py_DECREF(memobj); if (res == NULL) return -1; diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c index 6feca2137f4..a7fad360b18 100644 --- a/Modules/_io/fileio.c +++ b/Modules/_io/fileio.c @@ -605,6 +605,13 @@ fileio_readall(fileio *self) if (n == 0) break; if (n < 0) { + if (errno == EINTR) { + if (PyErr_CheckSignals()) { + Py_DECREF(result); + return NULL; + } + continue; + } if (total > 0) break; if (errno == EAGAIN) { diff --git a/Modules/_io/iobase.c b/Modules/_io/iobase.c index 35c7cdd480d..2bba1bfd58f 100644 --- a/Modules/_io/iobase.c +++ b/Modules/_io/iobase.c @@ -482,8 +482,14 @@ iobase_readline(PyObject *self, PyObject *args) if (has_peek) { PyObject *readahead = PyObject_CallMethod(self, "peek", "i", 1); - if (readahead == NULL) + if (readahead == NULL) { + /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals() + when EINTR occurs so we needn't do it ourselves. */ + if (_PyIO_trap_eintr()) { + continue; + } goto fail; + } if (!PyBytes_Check(readahead)) { PyErr_Format(PyExc_IOError, "peek() should have returned a bytes object, " @@ -516,8 +522,14 @@ iobase_readline(PyObject *self, PyObject *args) } b = PyObject_CallMethod(self, "read", "n", nreadahead); - if (b == NULL) + if (b == NULL) { + /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals() + when EINTR occurs so we needn't do it ourselves. */ + if (_PyIO_trap_eintr()) { + continue; + } goto fail; + } if (!PyBytes_Check(b)) { PyErr_Format(PyExc_IOError, "read() should have returned a bytes object, " @@ -826,6 +838,11 @@ rawiobase_readall(PyObject *self, PyObject *args) PyObject *data = PyObject_CallMethod(self, "read", "i", DEFAULT_BUFFER_SIZE); if (!data) { + /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals() + when EINTR occurs so we needn't do it ourselves. */ + if (_PyIO_trap_eintr()) { + continue; + } Py_DECREF(chunks); return NULL; } diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index baf0a971a77..d86a1c7b6da 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -1541,8 +1541,14 @@ textiowrapper_read(textio *self, PyObject *args) /* Keep reading chunks until we have n characters to return */ while (remaining > 0) { res = textiowrapper_read_chunk(self); - if (res < 0) + if (res < 0) { + /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals() + when EINTR occurs so we needn't do it ourselves. */ + if (_PyIO_trap_eintr()) { + continue; + } goto fail; + } if (res == 0) /* EOF */ break; if (chunks == NULL) { @@ -1701,8 +1707,14 @@ _textiowrapper_readline(textio *self, Py_ssize_t limit) while (!self->decoded_chars || !PyUnicode_GET_SIZE(self->decoded_chars)) { res = textiowrapper_read_chunk(self); - if (res < 0) + if (res < 0) { + /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals() + when EINTR occurs so we needn't do it ourselves. */ + if (_PyIO_trap_eintr()) { + continue; + } goto error; + } if (res == 0) break; }