mirror of https://github.com/python/cpython
Issue #7358: cStringIO.StringIO now supports writing to and reading from
a stream larger than 2 GiB on 64-bit systems.
This commit is contained in:
parent
beaa3adae9
commit
276f1d5139
|
@ -5,6 +5,7 @@ import StringIO
|
|||
import cStringIO
|
||||
import types
|
||||
import array
|
||||
import sys
|
||||
from test import test_support
|
||||
|
||||
|
||||
|
@ -105,6 +106,45 @@ class TestGenericStringIO(unittest.TestCase):
|
|||
self._fp.close()
|
||||
self.assertRaises(ValueError, self._fp.getvalue)
|
||||
|
||||
@test_support.bigmemtest(test_support._2G + 2**26, memuse=2.001)
|
||||
def test_reads_from_large_stream(self, size):
|
||||
linesize = 2**26 # 64 MiB
|
||||
lines = ['x' * (linesize - 1) + '\n'] * (size // linesize) + \
|
||||
['y' * (size % linesize)]
|
||||
f = self.MODULE.StringIO(''.join(lines))
|
||||
for i, expected in enumerate(lines):
|
||||
line = f.read(len(expected))
|
||||
self.assertEqual(len(line), len(expected))
|
||||
self.assertEqual(line, expected)
|
||||
self.assertEqual(f.read(), '')
|
||||
f.seek(0)
|
||||
for i, expected in enumerate(lines):
|
||||
line = f.readline()
|
||||
self.assertEqual(len(line), len(expected))
|
||||
self.assertEqual(line, expected)
|
||||
self.assertEqual(f.readline(), '')
|
||||
f.seek(0)
|
||||
self.assertEqual(f.readlines(), lines)
|
||||
self.assertEqual(f.readlines(), [])
|
||||
f.seek(0)
|
||||
self.assertEqual(f.readlines(size), lines)
|
||||
self.assertEqual(f.readlines(), [])
|
||||
|
||||
# In worst case cStringIO requires 2 + 1 + 1/2 + 1/2**2 + ... = 4
|
||||
# bytes per input character.
|
||||
@test_support.bigmemtest(test_support._2G, memuse=4)
|
||||
def test_writes_to_large_stream(self, size):
|
||||
s = 'x' * 2**26 # 64 MiB
|
||||
f = self.MODULE.StringIO()
|
||||
n = size
|
||||
while n > len(s):
|
||||
f.write(s)
|
||||
n -= len(s)
|
||||
s = None
|
||||
f.write('x' * n)
|
||||
self.assertEqual(len(f.getvalue()), size)
|
||||
|
||||
|
||||
class TestStringIO(TestGenericStringIO):
|
||||
MODULE = StringIO
|
||||
|
||||
|
|
|
@ -202,6 +202,9 @@ Core and Builtins
|
|||
Library
|
||||
-------
|
||||
|
||||
- Issue #7358: cStringIO.StringIO now supports writing to and reading from
|
||||
a stream larger than 2 GiB on 64-bit systems.
|
||||
|
||||
- Issue #10355: In SpooledTemporaryFile class mode and name properties and
|
||||
xreadlines method now work for unrolled files. encoding and newlines
|
||||
properties now removed as they have no sense and always produced
|
||||
|
|
|
@ -170,10 +170,15 @@ IO_cread(PyObject *self, char **output, Py_ssize_t n) {
|
|||
n = l;
|
||||
if (n < 0) n=0;
|
||||
}
|
||||
if (n > INT_MAX) {
|
||||
PyErr_SetString(PyExc_OverflowError,
|
||||
"length too large");
|
||||
return -1;
|
||||
}
|
||||
|
||||
*output=((IOobject*)self)->buf + ((IOobject*)self)->pos;
|
||||
((IOobject*)self)->pos += n;
|
||||
return n;
|
||||
return (int)n;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
|
@ -192,26 +197,33 @@ PyDoc_STRVAR(IO_readline__doc__, "readline() -- Read one line");
|
|||
|
||||
static int
|
||||
IO_creadline(PyObject *self, char **output) {
|
||||
char *n, *s;
|
||||
Py_ssize_t l;
|
||||
char *n, *start, *end;
|
||||
Py_ssize_t len;
|
||||
|
||||
if (!IO__opencheck(IOOOBJECT(self))) return -1;
|
||||
|
||||
for (n = ((IOobject*)self)->buf + ((IOobject*)self)->pos,
|
||||
s = ((IOobject*)self)->buf + ((IOobject*)self)->string_size;
|
||||
n < s && *n != '\n'; n++);
|
||||
n = start = ((IOobject*)self)->buf + ((IOobject*)self)->pos;
|
||||
end = ((IOobject*)self)->buf + ((IOobject*)self)->string_size;
|
||||
while (n < end && *n != '\n')
|
||||
n++;
|
||||
|
||||
if (n < s) n++;
|
||||
if (n < end) n++;
|
||||
|
||||
*output=((IOobject*)self)->buf + ((IOobject*)self)->pos;
|
||||
l = n - ((IOobject*)self)->buf - ((IOobject*)self)->pos;
|
||||
len = n - start;
|
||||
if (len > INT_MAX) {
|
||||
PyErr_SetString(PyExc_OverflowError,
|
||||
"length too large");
|
||||
return -1;
|
||||
}
|
||||
|
||||
assert(IOOOBJECT(self)->pos <= PY_SSIZE_T_MAX - l);
|
||||
*output=start;
|
||||
|
||||
assert(IOOOBJECT(self)->pos <= PY_SSIZE_T_MAX - len);
|
||||
assert(IOOOBJECT(self)->pos >= 0);
|
||||
assert(IOOOBJECT(self)->string_size >= 0);
|
||||
|
||||
((IOobject*)self)->pos += l;
|
||||
return (int)l;
|
||||
((IOobject*)self)->pos += len;
|
||||
return (int)len;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
|
@ -239,9 +251,9 @@ IO_readlines(IOobject *self, PyObject *args) {
|
|||
int n;
|
||||
char *output;
|
||||
PyObject *result, *line;
|
||||
int hint = 0, length = 0;
|
||||
Py_ssize_t hint = 0, length = 0;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "|i:readlines", &hint)) return NULL;
|
||||
if (!PyArg_ParseTuple(args, "|n:readlines", &hint)) return NULL;
|
||||
|
||||
result = PyList_New(0);
|
||||
if (!result)
|
||||
|
@ -377,31 +389,41 @@ PyDoc_STRVAR(O_write__doc__,
|
|||
|
||||
|
||||
static int
|
||||
O_cwrite(PyObject *self, const char *c, Py_ssize_t l) {
|
||||
Py_ssize_t newl;
|
||||
O_cwrite(PyObject *self, const char *c, Py_ssize_t len) {
|
||||
Py_ssize_t newpos;
|
||||
Oobject *oself;
|
||||
char *newbuf;
|
||||
|
||||
if (!IO__opencheck(IOOOBJECT(self))) return -1;
|
||||
oself = (Oobject *)self;
|
||||
|
||||
newl = oself->pos+l;
|
||||
if (newl >= oself->buf_size) {
|
||||
oself->buf_size *= 2;
|
||||
if (oself->buf_size <= newl) {
|
||||
assert(newl + 1 < INT_MAX);
|
||||
oself->buf_size = (int)(newl+1);
|
||||
if (len > INT_MAX) {
|
||||
PyErr_SetString(PyExc_OverflowError,
|
||||
"length too large");
|
||||
return -1;
|
||||
}
|
||||
assert(len >= 0);
|
||||
if (oself->pos >= PY_SSIZE_T_MAX - len) {
|
||||
PyErr_SetString(PyExc_OverflowError,
|
||||
"new position too large");
|
||||
return -1;
|
||||
}
|
||||
newpos = oself->pos + len;
|
||||
if (newpos >= oself->buf_size) {
|
||||
size_t newsize = oself->buf_size;
|
||||
newsize *= 2;
|
||||
if (newsize <= (size_t)newpos || newsize > PY_SSIZE_T_MAX) {
|
||||
assert(newpos < PY_SSIZE_T_MAX - 1);
|
||||
newsize = newpos + 1;
|
||||
}
|
||||
newbuf = (char*)realloc(oself->buf, oself->buf_size);
|
||||
newbuf = (char*)realloc(oself->buf, newsize);
|
||||
if (!newbuf) {
|
||||
PyErr_SetString(PyExc_MemoryError,"out of memory");
|
||||
free(oself->buf);
|
||||
oself->buf = 0;
|
||||
oself->buf_size = oself->pos = 0;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
oself->buf_size = (Py_ssize_t)newsize;
|
||||
oself->buf = newbuf;
|
||||
}
|
||||
}
|
||||
|
||||
if (oself->string_size < oself->pos) {
|
||||
/* In case of overseek, pad with null bytes the buffer region between
|
||||
|
@ -416,16 +438,15 @@ O_cwrite(PyObject *self, const char *c, Py_ssize_t l) {
|
|||
(oself->pos - oself->string_size) * sizeof(char));
|
||||
}
|
||||
|
||||
memcpy(oself->buf+oself->pos,c,l);
|
||||
memcpy(oself->buf + oself->pos, c, len);
|
||||
|
||||
assert(oself->pos + l < INT_MAX);
|
||||
oself->pos += (int)l;
|
||||
oself->pos = newpos;
|
||||
|
||||
if (oself->string_size < oself->pos) {
|
||||
oself->string_size = oself->pos;
|
||||
}
|
||||
|
||||
return (int)l;
|
||||
return (int)len;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
|
|
Loading…
Reference in New Issue