Issue #7358: cStringIO.StringIO now supports writing to and reading from

a stream larger than 2 GiB on 64-bit systems.
This commit is contained in:
Serhiy Storchaka 2013-02-09 13:47:43 +02:00
parent beaa3adae9
commit 276f1d5139
3 changed files with 96 additions and 32 deletions

View File

@ -5,6 +5,7 @@ import StringIO
import cStringIO
import types
import array
import sys
from test import test_support
@ -105,6 +106,45 @@ class TestGenericStringIO(unittest.TestCase):
self._fp.close()
self.assertRaises(ValueError, self._fp.getvalue)
@test_support.bigmemtest(test_support._2G + 2**26, memuse=2.001)
def test_reads_from_large_stream(self, size):
linesize = 2**26 # 64 MiB
lines = ['x' * (linesize - 1) + '\n'] * (size // linesize) + \
['y' * (size % linesize)]
f = self.MODULE.StringIO(''.join(lines))
for i, expected in enumerate(lines):
line = f.read(len(expected))
self.assertEqual(len(line), len(expected))
self.assertEqual(line, expected)
self.assertEqual(f.read(), '')
f.seek(0)
for i, expected in enumerate(lines):
line = f.readline()
self.assertEqual(len(line), len(expected))
self.assertEqual(line, expected)
self.assertEqual(f.readline(), '')
f.seek(0)
self.assertEqual(f.readlines(), lines)
self.assertEqual(f.readlines(), [])
f.seek(0)
self.assertEqual(f.readlines(size), lines)
self.assertEqual(f.readlines(), [])
# In worst case cStringIO requires 2 + 1 + 1/2 + 1/2**2 + ... = 4
# bytes per input character.
@test_support.bigmemtest(test_support._2G, memuse=4)
def test_writes_to_large_stream(self, size):
s = 'x' * 2**26 # 64 MiB
f = self.MODULE.StringIO()
n = size
while n > len(s):
f.write(s)
n -= len(s)
s = None
f.write('x' * n)
self.assertEqual(len(f.getvalue()), size)
class TestStringIO(TestGenericStringIO):
MODULE = StringIO

View File

@ -202,6 +202,9 @@ Core and Builtins
Library
-------
- Issue #7358: cStringIO.StringIO now supports writing to and reading from
a stream larger than 2 GiB on 64-bit systems.
- Issue #10355: In SpooledTemporaryFile class mode and name properties and
xreadlines method now work for unrolled files. encoding and newlines
properties now removed as they have no sense and always produced

View File

@ -170,10 +170,15 @@ IO_cread(PyObject *self, char **output, Py_ssize_t n) {
n = l;
if (n < 0) n=0;
}
if (n > INT_MAX) {
PyErr_SetString(PyExc_OverflowError,
"length too large");
return -1;
}
*output=((IOobject*)self)->buf + ((IOobject*)self)->pos;
((IOobject*)self)->pos += n;
return n;
return (int)n;
}
static PyObject *
@ -192,26 +197,33 @@ PyDoc_STRVAR(IO_readline__doc__, "readline() -- Read one line");
static int
IO_creadline(PyObject *self, char **output) {
char *n, *s;
Py_ssize_t l;
char *n, *start, *end;
Py_ssize_t len;
if (!IO__opencheck(IOOOBJECT(self))) return -1;
for (n = ((IOobject*)self)->buf + ((IOobject*)self)->pos,
s = ((IOobject*)self)->buf + ((IOobject*)self)->string_size;
n < s && *n != '\n'; n++);
n = start = ((IOobject*)self)->buf + ((IOobject*)self)->pos;
end = ((IOobject*)self)->buf + ((IOobject*)self)->string_size;
while (n < end && *n != '\n')
n++;
if (n < s) n++;
if (n < end) n++;
*output=((IOobject*)self)->buf + ((IOobject*)self)->pos;
l = n - ((IOobject*)self)->buf - ((IOobject*)self)->pos;
len = n - start;
if (len > INT_MAX) {
PyErr_SetString(PyExc_OverflowError,
"length too large");
return -1;
}
assert(IOOOBJECT(self)->pos <= PY_SSIZE_T_MAX - l);
*output=start;
assert(IOOOBJECT(self)->pos <= PY_SSIZE_T_MAX - len);
assert(IOOOBJECT(self)->pos >= 0);
assert(IOOOBJECT(self)->string_size >= 0);
((IOobject*)self)->pos += l;
return (int)l;
((IOobject*)self)->pos += len;
return (int)len;
}
static PyObject *
@ -239,9 +251,9 @@ IO_readlines(IOobject *self, PyObject *args) {
int n;
char *output;
PyObject *result, *line;
int hint = 0, length = 0;
Py_ssize_t hint = 0, length = 0;
if (!PyArg_ParseTuple(args, "|i:readlines", &hint)) return NULL;
if (!PyArg_ParseTuple(args, "|n:readlines", &hint)) return NULL;
result = PyList_New(0);
if (!result)
@ -377,29 +389,39 @@ PyDoc_STRVAR(O_write__doc__,
static int
O_cwrite(PyObject *self, const char *c, Py_ssize_t l) {
Py_ssize_t newl;
O_cwrite(PyObject *self, const char *c, Py_ssize_t len) {
Py_ssize_t newpos;
Oobject *oself;
char *newbuf;
if (!IO__opencheck(IOOOBJECT(self))) return -1;
oself = (Oobject *)self;
newl = oself->pos+l;
if (newl >= oself->buf_size) {
oself->buf_size *= 2;
if (oself->buf_size <= newl) {
assert(newl + 1 < INT_MAX);
oself->buf_size = (int)(newl+1);
}
newbuf = (char*)realloc(oself->buf, oself->buf_size);
if (!newbuf) {
PyErr_SetString(PyExc_MemoryError,"out of memory");
free(oself->buf);
oself->buf = 0;
oself->buf_size = oself->pos = 0;
if (len > INT_MAX) {
PyErr_SetString(PyExc_OverflowError,
"length too large");
return -1;
}
assert(len >= 0);
if (oself->pos >= PY_SSIZE_T_MAX - len) {
PyErr_SetString(PyExc_OverflowError,
"new position too large");
return -1;
}
newpos = oself->pos + len;
if (newpos >= oself->buf_size) {
size_t newsize = oself->buf_size;
newsize *= 2;
if (newsize <= (size_t)newpos || newsize > PY_SSIZE_T_MAX) {
assert(newpos < PY_SSIZE_T_MAX - 1);
newsize = newpos + 1;
}
newbuf = (char*)realloc(oself->buf, newsize);
if (!newbuf) {
PyErr_SetString(PyExc_MemoryError,"out of memory");
return -1;
}
oself->buf_size = (Py_ssize_t)newsize;
oself->buf = newbuf;
}
@ -416,16 +438,15 @@ O_cwrite(PyObject *self, const char *c, Py_ssize_t l) {
(oself->pos - oself->string_size) * sizeof(char));
}
memcpy(oself->buf+oself->pos,c,l);
memcpy(oself->buf + oself->pos, c, len);
assert(oself->pos + l < INT_MAX);
oself->pos += (int)l;
oself->pos = newpos;
if (oself->string_size < oself->pos) {
oself->string_size = oself->pos;
}
return (int)l;
return (int)len;
}
static PyObject *