bpo-43260: io: Prevent large data remains in textio buffer. (GH-24592)

When very large data remains in TextIOWrapper, flush() may fail forever.

So prevent that data larger than chunk_size is remained in TextIOWrapper internal
buffer.

Co-Authored-By: Eryk Sun
This commit is contained in:
Inada Naoki 2021-02-22 08:29:30 +09:00 committed by GitHub
parent 84f7afe65c
commit 01806d5beb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 46 additions and 3 deletions

View File

@ -3767,6 +3767,33 @@ class CTextIOWrapperTest(TextIOWrapperTest):
with self.assertRaises(AttributeError):
del t._CHUNK_SIZE
def test_internal_buffer_size(self):
# bpo-43260: TextIOWrapper's internal buffer should not store
# data larger than chunk size.
chunk_size = 8192 # default chunk size, updated later
class MockIO(self.MockRawIO):
def write(self, data):
if len(data) > chunk_size:
raise RuntimeError
return super().write(data)
buf = MockIO()
t = self.TextIOWrapper(buf, encoding="ascii")
chunk_size = t._CHUNK_SIZE
t.write("abc")
t.write("def")
# default chunk size is 8192 bytes so t don't write data to buf.
self.assertEqual([], buf._write_stack)
with self.assertRaises(RuntimeError):
t.write("x"*(chunk_size+1))
self.assertEqual([b"abcdef"], buf._write_stack)
t.write("ghi")
t.write("x"*chunk_size)
self.assertEqual([b"abcdef", b"ghi", b"x"*chunk_size], buf._write_stack)
class PyTextIOWrapperTest(TextIOWrapperTest):
io = pyio

View File

@ -0,0 +1,2 @@
Fix TextIOWrapper can not flush internal buffer forever after very large
text is written.

View File

@ -1585,6 +1585,8 @@ _textiowrapper_writeflush(textio *self)
ret = PyObject_CallMethodOneArg(self->buffer, _PyIO_str_write, b);
} while (ret == NULL && _PyIO_trap_eintr());
Py_DECREF(b);
// NOTE: We cleared buffer but we don't know how many bytes are actually written
// when an error occurred.
if (ret == NULL)
return -1;
Py_DECREF(ret);
@ -1642,7 +1644,10 @@ _io_TextIOWrapper_write_impl(textio *self, PyObject *text)
/* XXX What if we were just reading? */
if (self->encodefunc != NULL) {
if (PyUnicode_IS_ASCII(text) && is_asciicompat_encoding(self->encodefunc)) {
if (PyUnicode_IS_ASCII(text) &&
// See bpo-43260
PyUnicode_GET_LENGTH(text) <= self->chunk_size &&
is_asciicompat_encoding(self->encodefunc)) {
b = text;
Py_INCREF(b);
}
@ -1651,8 +1656,9 @@ _io_TextIOWrapper_write_impl(textio *self, PyObject *text)
}
self->encoding_start_of_stream = 0;
}
else
else {
b = PyObject_CallMethodOneArg(self->encoder, _PyIO_str_encode, text);
}
Py_DECREF(text);
if (b == NULL)
@ -1677,6 +1683,14 @@ _io_TextIOWrapper_write_impl(textio *self, PyObject *text)
self->pending_bytes_count = 0;
self->pending_bytes = b;
}
else if (self->pending_bytes_count + bytes_len > self->chunk_size) {
// Prevent to concatenate more than chunk_size data.
if (_textiowrapper_writeflush(self) < 0) {
Py_DECREF(b);
return NULL;
}
self->pending_bytes = b;
}
else if (!PyList_CheckExact(self->pending_bytes)) {
PyObject *list = PyList_New(2);
if (list == NULL) {
@ -1696,7 +1710,7 @@ _io_TextIOWrapper_write_impl(textio *self, PyObject *text)
}
self->pending_bytes_count += bytes_len;
if (self->pending_bytes_count > self->chunk_size || needflush ||
if (self->pending_bytes_count >= self->chunk_size || needflush ||
text_needflush) {
if (_textiowrapper_writeflush(self) < 0)
return NULL;