From 354d50ee37f2c7e96812a2824b5e899cfd5ee3b1 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sun, 3 Feb 2013 17:10:42 +0200 Subject: [PATCH] Issue #17106: Fix a segmentation fault in io.TextIOWrapper when an underlying stream or a decoder produces data of an unexpected type (i.e. when io.TextIOWrapper initialized with text stream or use bytes-to-bytes codec). --- Lib/test/test_io.py | 40 +++++++++++++++++++++++++++- Misc/NEWS | 4 +++ Modules/_io/textio.c | 63 +++++++++++++++++++++++++++++++------------- 3 files changed, 87 insertions(+), 20 deletions(-) diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index 6132bae82f3..aabc1d23dd6 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -36,6 +36,7 @@ from itertools import cycle, count from collections import deque from UserList import UserList from test import test_support as support +import contextlib import codecs import io # C implementation of io @@ -2419,6 +2420,39 @@ class TextIOWrapperTest(unittest.TestCase): with self.assertRaises((AttributeError, TypeError)): txt.buffer = buf + def test_read_nonbytes(self): + # Issue #17106 + # Crash when underlying read() returns non-bytes + class NonbytesStream(self.StringIO): + read1 = self.StringIO.read + class NonbytesStream(self.StringIO): + read1 = self.StringIO.read + t = self.TextIOWrapper(NonbytesStream('a')) + with self.maybeRaises(TypeError): + t.read(1) + t = self.TextIOWrapper(NonbytesStream('a')) + with self.maybeRaises(TypeError): + t.readline() + t = self.TextIOWrapper(NonbytesStream('a')) + self.assertEqual(t.read(), u'a') + + def test_illegal_decoder(self): + # Issue #17106 + # Crash when decoder returns non-string + t = self.TextIOWrapper(self.BytesIO(b'aaaaaa'), newline='\n', + encoding='quopri_codec') + with self.maybeRaises(TypeError): + t.read(1) + t = self.TextIOWrapper(self.BytesIO(b'aaaaaa'), newline='\n', + encoding='quopri_codec') + with self.maybeRaises(TypeError): + t.readline() + t = self.TextIOWrapper(self.BytesIO(b'aaaaaa'), newline='\n', + encoding='quopri_codec') + with self.maybeRaises(TypeError): + t.read() + + class CTextIOWrapperTest(TextIOWrapperTest): def test_initialization(self): @@ -2460,9 +2494,13 @@ class CTextIOWrapperTest(TextIOWrapperTest): t2.buddy = t1 support.gc_collect() + maybeRaises = unittest.TestCase.assertRaises + class PyTextIOWrapperTest(TextIOWrapperTest): - pass + @contextlib.contextmanager + def maybeRaises(self, *args, **kwds): + yield class IncrementalNewlineDecoderTest(unittest.TestCase): diff --git a/Misc/NEWS b/Misc/NEWS index 9c9bc108fe3..0fe7c9a017d 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -199,6 +199,10 @@ Core and Builtins Library ------- +- Issue #17106: Fix a segmentation fault in io.TextIOWrapper when an underlying + stream or a decoder produces data of an unexpected type (i.e. when + io.TextIOWrapper initialized with text stream or use bytes-to-bytes codec). + - Issue #15633: httplib.HTTPResponse is now mark closed when the server sends less than the advertised Content-Length. diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index c9913d4a9eb..cd6d443e23f 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -236,6 +236,21 @@ incrementalnewlinedecoder_dealloc(nldecoder_object *self) Py_TYPE(self)->tp_free((PyObject *)self); } +static int +check_decoded(PyObject *decoded) +{ + if (decoded == NULL) + return -1; + if (!PyUnicode_Check(decoded)) { + PyErr_Format(PyExc_TypeError, + "decoder should return a string result, not '%.200s'", + Py_TYPE(decoded)->tp_name); + Py_DECREF(decoded); + return -1; + } + return 0; +} + #define SEEN_CR 1 #define SEEN_LF 2 #define SEEN_CRLF 4 @@ -265,15 +280,9 @@ _PyIncrementalNewlineDecoder_decode(PyObject *_self, Py_INCREF(output); } - if (output == NULL) + if (check_decoded(output) < 0) return NULL; - if (!PyUnicode_Check(output)) { - PyErr_SetString(PyExc_TypeError, - "decoder should return a string result"); - goto error; - } - output_len = PyUnicode_GET_SIZE(output); if (self->pendingcr && (final || output_len > 0)) { Py_UNICODE *out; @@ -1417,7 +1426,12 @@ textiowrapper_read_chunk(textio *self) Py_DECREF(chunk_size); if (input_chunk == NULL) goto fail; - assert(PyBytes_Check(input_chunk)); + if (!PyBytes_Check(input_chunk)) { + PyErr_Format(PyExc_TypeError, + "underlying read1() should have returned a bytes object, " + "not '%.200s'", Py_TYPE(input_chunk)->tp_name); + goto fail; + } eof = (PyBytes_Size(input_chunk) == 0); @@ -1430,8 +1444,7 @@ textiowrapper_read_chunk(textio *self) _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL); } - /* TODO sanity check: isinstance(decoded_chars, unicode) */ - if (decoded_chars == NULL) + if (check_decoded(decoded_chars) < 0) goto fail; textiowrapper_set_decoded_chars(self, decoded_chars); if (PyUnicode_GET_SIZE(decoded_chars) > 0) @@ -1444,7 +1457,14 @@ textiowrapper_read_chunk(textio *self) PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk); if (next_input == NULL) goto fail; - assert (PyBytes_Check(next_input)); + if (!PyBytes_Check(next_input)) { + PyErr_Format(PyExc_TypeError, + "decoder getstate() should have returned a bytes " + "object, not '%.200s'", + Py_TYPE(next_input)->tp_name); + Py_DECREF(next_input); + goto fail; + } Py_DECREF(dec_buffer); Py_CLEAR(self->snapshot); self->snapshot = Py_BuildValue("NN", dec_flags, next_input); @@ -1490,7 +1510,7 @@ textiowrapper_read(textio *self, PyObject *args) decoded = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_decode, bytes, Py_True, NULL); Py_DECREF(bytes); - if (decoded == NULL) + if (check_decoded(decoded) < 0) goto fail; result = textiowrapper_get_decoded_chars(self, -1); @@ -2110,7 +2130,14 @@ textiowrapper_seek(textio *self, PyObject *args) if (input_chunk == NULL) goto fail; - assert (PyBytes_Check(input_chunk)); + if (!PyBytes_Check(input_chunk)) { + PyErr_Format(PyExc_TypeError, + "underlying read() should have returned a bytes " + "object, not '%.200s'", + Py_TYPE(input_chunk)->tp_name); + Py_DECREF(input_chunk); + goto fail; + } self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk); if (self->snapshot == NULL) { @@ -2121,7 +2148,7 @@ textiowrapper_seek(textio *self, PyObject *args) decoded = PyObject_CallMethod(self->decoder, "decode", "Oi", input_chunk, (int)cookie.need_eof); - if (decoded == NULL) + if (check_decoded(decoded) < 0) goto fail; textiowrapper_set_decoded_chars(self, decoded); @@ -2245,9 +2272,8 @@ textiowrapper_tell(textio *self, PyObject *args) PyObject *decoded = PyObject_CallMethod( self->decoder, "decode", "s#", input, 1); - if (decoded == NULL) + if (check_decoded(decoded) < 0) goto fail; - assert (PyUnicode_Check(decoded)); chars_decoded += PyUnicode_GET_SIZE(decoded); Py_DECREF(decoded); @@ -2279,9 +2305,8 @@ textiowrapper_tell(textio *self, PyObject *args) /* We didn't get enough decoded data; signal EOF to get more. */ PyObject *decoded = PyObject_CallMethod( self->decoder, "decode", "si", "", /* final = */ 1); - if (decoded == NULL) + if (check_decoded(decoded) < 0) goto fail; - assert (PyUnicode_Check(decoded)); chars_decoded += PyUnicode_GET_SIZE(decoded); Py_DECREF(decoded); cookie.need_eof = 1; @@ -2440,7 +2465,7 @@ textiowrapper_close(textio *self, PyObject *args) Py_DECREF(res); if (r < 0) return NULL; - + if (r > 0) { Py_RETURN_NONE; /* stream already closed */ }