Issue #7358: cStringIO.StringIO now supports writing to and reading from

a stream larger than 2 GiB on 64-bit systems.
This commit is contained in:
Serhiy Storchaka 2013-02-09 13:47:43 +02:00
parent beaa3adae9
commit 276f1d5139
3 changed files with 96 additions and 32 deletions

View File

@ -5,6 +5,7 @@ import StringIO
import cStringIO import cStringIO
import types import types
import array import array
import sys
from test import test_support from test import test_support
@ -105,6 +106,45 @@ class TestGenericStringIO(unittest.TestCase):
self._fp.close() self._fp.close()
self.assertRaises(ValueError, self._fp.getvalue) self.assertRaises(ValueError, self._fp.getvalue)
@test_support.bigmemtest(test_support._2G + 2**26, memuse=2.001)
def test_reads_from_large_stream(self, size):
linesize = 2**26 # 64 MiB
lines = ['x' * (linesize - 1) + '\n'] * (size // linesize) + \
['y' * (size % linesize)]
f = self.MODULE.StringIO(''.join(lines))
for i, expected in enumerate(lines):
line = f.read(len(expected))
self.assertEqual(len(line), len(expected))
self.assertEqual(line, expected)
self.assertEqual(f.read(), '')
f.seek(0)
for i, expected in enumerate(lines):
line = f.readline()
self.assertEqual(len(line), len(expected))
self.assertEqual(line, expected)
self.assertEqual(f.readline(), '')
f.seek(0)
self.assertEqual(f.readlines(), lines)
self.assertEqual(f.readlines(), [])
f.seek(0)
self.assertEqual(f.readlines(size), lines)
self.assertEqual(f.readlines(), [])
# In worst case cStringIO requires 2 + 1 + 1/2 + 1/2**2 + ... = 4
# bytes per input character.
@test_support.bigmemtest(test_support._2G, memuse=4)
def test_writes_to_large_stream(self, size):
s = 'x' * 2**26 # 64 MiB
f = self.MODULE.StringIO()
n = size
while n > len(s):
f.write(s)
n -= len(s)
s = None
f.write('x' * n)
self.assertEqual(len(f.getvalue()), size)
class TestStringIO(TestGenericStringIO): class TestStringIO(TestGenericStringIO):
MODULE = StringIO MODULE = StringIO

View File

@ -202,6 +202,9 @@ Core and Builtins
Library Library
------- -------
- Issue #7358: cStringIO.StringIO now supports writing to and reading from
a stream larger than 2 GiB on 64-bit systems.
- Issue #10355: In SpooledTemporaryFile class mode and name properties and - Issue #10355: In SpooledTemporaryFile class mode and name properties and
xreadlines method now work for unrolled files. encoding and newlines xreadlines method now work for unrolled files. encoding and newlines
properties now removed as they have no sense and always produced properties now removed as they have no sense and always produced

View File

@ -170,10 +170,15 @@ IO_cread(PyObject *self, char **output, Py_ssize_t n) {
n = l; n = l;
if (n < 0) n=0; if (n < 0) n=0;
} }
if (n > INT_MAX) {
PyErr_SetString(PyExc_OverflowError,
"length too large");
return -1;
}
*output=((IOobject*)self)->buf + ((IOobject*)self)->pos; *output=((IOobject*)self)->buf + ((IOobject*)self)->pos;
((IOobject*)self)->pos += n; ((IOobject*)self)->pos += n;
return n; return (int)n;
} }
static PyObject * static PyObject *
@ -192,26 +197,33 @@ PyDoc_STRVAR(IO_readline__doc__, "readline() -- Read one line");
static int static int
IO_creadline(PyObject *self, char **output) { IO_creadline(PyObject *self, char **output) {
char *n, *s; char *n, *start, *end;
Py_ssize_t l; Py_ssize_t len;
if (!IO__opencheck(IOOOBJECT(self))) return -1; if (!IO__opencheck(IOOOBJECT(self))) return -1;
for (n = ((IOobject*)self)->buf + ((IOobject*)self)->pos, n = start = ((IOobject*)self)->buf + ((IOobject*)self)->pos;
s = ((IOobject*)self)->buf + ((IOobject*)self)->string_size; end = ((IOobject*)self)->buf + ((IOobject*)self)->string_size;
n < s && *n != '\n'; n++); while (n < end && *n != '\n')
n++;
if (n < s) n++; if (n < end) n++;
*output=((IOobject*)self)->buf + ((IOobject*)self)->pos; len = n - start;
l = n - ((IOobject*)self)->buf - ((IOobject*)self)->pos; if (len > INT_MAX) {
PyErr_SetString(PyExc_OverflowError,
"length too large");
return -1;
}
assert(IOOOBJECT(self)->pos <= PY_SSIZE_T_MAX - l); *output=start;
assert(IOOOBJECT(self)->pos <= PY_SSIZE_T_MAX - len);
assert(IOOOBJECT(self)->pos >= 0); assert(IOOOBJECT(self)->pos >= 0);
assert(IOOOBJECT(self)->string_size >= 0); assert(IOOOBJECT(self)->string_size >= 0);
((IOobject*)self)->pos += l; ((IOobject*)self)->pos += len;
return (int)l; return (int)len;
} }
static PyObject * static PyObject *
@ -239,9 +251,9 @@ IO_readlines(IOobject *self, PyObject *args) {
int n; int n;
char *output; char *output;
PyObject *result, *line; PyObject *result, *line;
int hint = 0, length = 0; Py_ssize_t hint = 0, length = 0;
if (!PyArg_ParseTuple(args, "|i:readlines", &hint)) return NULL; if (!PyArg_ParseTuple(args, "|n:readlines", &hint)) return NULL;
result = PyList_New(0); result = PyList_New(0);
if (!result) if (!result)
@ -377,31 +389,41 @@ PyDoc_STRVAR(O_write__doc__,
static int static int
O_cwrite(PyObject *self, const char *c, Py_ssize_t l) { O_cwrite(PyObject *self, const char *c, Py_ssize_t len) {
Py_ssize_t newl; Py_ssize_t newpos;
Oobject *oself; Oobject *oself;
char *newbuf; char *newbuf;
if (!IO__opencheck(IOOOBJECT(self))) return -1; if (!IO__opencheck(IOOOBJECT(self))) return -1;
oself = (Oobject *)self; oself = (Oobject *)self;
newl = oself->pos+l; if (len > INT_MAX) {
if (newl >= oself->buf_size) { PyErr_SetString(PyExc_OverflowError,
oself->buf_size *= 2; "length too large");
if (oself->buf_size <= newl) { return -1;
assert(newl + 1 < INT_MAX); }
oself->buf_size = (int)(newl+1); assert(len >= 0);
if (oself->pos >= PY_SSIZE_T_MAX - len) {
PyErr_SetString(PyExc_OverflowError,
"new position too large");
return -1;
}
newpos = oself->pos + len;
if (newpos >= oself->buf_size) {
size_t newsize = oself->buf_size;
newsize *= 2;
if (newsize <= (size_t)newpos || newsize > PY_SSIZE_T_MAX) {
assert(newpos < PY_SSIZE_T_MAX - 1);
newsize = newpos + 1;
} }
newbuf = (char*)realloc(oself->buf, oself->buf_size); newbuf = (char*)realloc(oself->buf, newsize);
if (!newbuf) { if (!newbuf) {
PyErr_SetString(PyExc_MemoryError,"out of memory"); PyErr_SetString(PyExc_MemoryError,"out of memory");
free(oself->buf);
oself->buf = 0;
oself->buf_size = oself->pos = 0;
return -1; return -1;
} }
oself->buf_size = (Py_ssize_t)newsize;
oself->buf = newbuf; oself->buf = newbuf;
} }
if (oself->string_size < oself->pos) { if (oself->string_size < oself->pos) {
/* In case of overseek, pad with null bytes the buffer region between /* In case of overseek, pad with null bytes the buffer region between
@ -416,16 +438,15 @@ O_cwrite(PyObject *self, const char *c, Py_ssize_t l) {
(oself->pos - oself->string_size) * sizeof(char)); (oself->pos - oself->string_size) * sizeof(char));
} }
memcpy(oself->buf+oself->pos,c,l); memcpy(oself->buf + oself->pos, c, len);
assert(oself->pos + l < INT_MAX); oself->pos = newpos;
oself->pos += (int)l;
if (oself->string_size < oself->pos) { if (oself->string_size < oself->pos) {
oself->string_size = oself->pos; oself->string_size = oself->pos;
} }
return (int)l; return (int)len;
} }
static PyObject * static PyObject *