mirror of https://github.com/python/cpython
Issue #7358: cStringIO.StringIO now supports writing to and reading from
a stream larger than 2 GiB on 64-bit systems.
This commit is contained in:
parent
beaa3adae9
commit
276f1d5139
|
@ -5,6 +5,7 @@ import StringIO
|
||||||
import cStringIO
|
import cStringIO
|
||||||
import types
|
import types
|
||||||
import array
|
import array
|
||||||
|
import sys
|
||||||
from test import test_support
|
from test import test_support
|
||||||
|
|
||||||
|
|
||||||
|
@ -105,6 +106,45 @@ class TestGenericStringIO(unittest.TestCase):
|
||||||
self._fp.close()
|
self._fp.close()
|
||||||
self.assertRaises(ValueError, self._fp.getvalue)
|
self.assertRaises(ValueError, self._fp.getvalue)
|
||||||
|
|
||||||
|
@test_support.bigmemtest(test_support._2G + 2**26, memuse=2.001)
|
||||||
|
def test_reads_from_large_stream(self, size):
|
||||||
|
linesize = 2**26 # 64 MiB
|
||||||
|
lines = ['x' * (linesize - 1) + '\n'] * (size // linesize) + \
|
||||||
|
['y' * (size % linesize)]
|
||||||
|
f = self.MODULE.StringIO(''.join(lines))
|
||||||
|
for i, expected in enumerate(lines):
|
||||||
|
line = f.read(len(expected))
|
||||||
|
self.assertEqual(len(line), len(expected))
|
||||||
|
self.assertEqual(line, expected)
|
||||||
|
self.assertEqual(f.read(), '')
|
||||||
|
f.seek(0)
|
||||||
|
for i, expected in enumerate(lines):
|
||||||
|
line = f.readline()
|
||||||
|
self.assertEqual(len(line), len(expected))
|
||||||
|
self.assertEqual(line, expected)
|
||||||
|
self.assertEqual(f.readline(), '')
|
||||||
|
f.seek(0)
|
||||||
|
self.assertEqual(f.readlines(), lines)
|
||||||
|
self.assertEqual(f.readlines(), [])
|
||||||
|
f.seek(0)
|
||||||
|
self.assertEqual(f.readlines(size), lines)
|
||||||
|
self.assertEqual(f.readlines(), [])
|
||||||
|
|
||||||
|
# In worst case cStringIO requires 2 + 1 + 1/2 + 1/2**2 + ... = 4
|
||||||
|
# bytes per input character.
|
||||||
|
@test_support.bigmemtest(test_support._2G, memuse=4)
|
||||||
|
def test_writes_to_large_stream(self, size):
|
||||||
|
s = 'x' * 2**26 # 64 MiB
|
||||||
|
f = self.MODULE.StringIO()
|
||||||
|
n = size
|
||||||
|
while n > len(s):
|
||||||
|
f.write(s)
|
||||||
|
n -= len(s)
|
||||||
|
s = None
|
||||||
|
f.write('x' * n)
|
||||||
|
self.assertEqual(len(f.getvalue()), size)
|
||||||
|
|
||||||
|
|
||||||
class TestStringIO(TestGenericStringIO):
|
class TestStringIO(TestGenericStringIO):
|
||||||
MODULE = StringIO
|
MODULE = StringIO
|
||||||
|
|
||||||
|
|
|
@ -202,6 +202,9 @@ Core and Builtins
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #7358: cStringIO.StringIO now supports writing to and reading from
|
||||||
|
a stream larger than 2 GiB on 64-bit systems.
|
||||||
|
|
||||||
- Issue #10355: In SpooledTemporaryFile class mode and name properties and
|
- Issue #10355: In SpooledTemporaryFile class mode and name properties and
|
||||||
xreadlines method now work for unrolled files. encoding and newlines
|
xreadlines method now work for unrolled files. encoding and newlines
|
||||||
properties now removed as they have no sense and always produced
|
properties now removed as they have no sense and always produced
|
||||||
|
|
|
@ -170,10 +170,15 @@ IO_cread(PyObject *self, char **output, Py_ssize_t n) {
|
||||||
n = l;
|
n = l;
|
||||||
if (n < 0) n=0;
|
if (n < 0) n=0;
|
||||||
}
|
}
|
||||||
|
if (n > INT_MAX) {
|
||||||
|
PyErr_SetString(PyExc_OverflowError,
|
||||||
|
"length too large");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
*output=((IOobject*)self)->buf + ((IOobject*)self)->pos;
|
*output=((IOobject*)self)->buf + ((IOobject*)self)->pos;
|
||||||
((IOobject*)self)->pos += n;
|
((IOobject*)self)->pos += n;
|
||||||
return n;
|
return (int)n;
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
|
@ -192,26 +197,33 @@ PyDoc_STRVAR(IO_readline__doc__, "readline() -- Read one line");
|
||||||
|
|
||||||
static int
|
static int
|
||||||
IO_creadline(PyObject *self, char **output) {
|
IO_creadline(PyObject *self, char **output) {
|
||||||
char *n, *s;
|
char *n, *start, *end;
|
||||||
Py_ssize_t l;
|
Py_ssize_t len;
|
||||||
|
|
||||||
if (!IO__opencheck(IOOOBJECT(self))) return -1;
|
if (!IO__opencheck(IOOOBJECT(self))) return -1;
|
||||||
|
|
||||||
for (n = ((IOobject*)self)->buf + ((IOobject*)self)->pos,
|
n = start = ((IOobject*)self)->buf + ((IOobject*)self)->pos;
|
||||||
s = ((IOobject*)self)->buf + ((IOobject*)self)->string_size;
|
end = ((IOobject*)self)->buf + ((IOobject*)self)->string_size;
|
||||||
n < s && *n != '\n'; n++);
|
while (n < end && *n != '\n')
|
||||||
|
n++;
|
||||||
|
|
||||||
if (n < s) n++;
|
if (n < end) n++;
|
||||||
|
|
||||||
*output=((IOobject*)self)->buf + ((IOobject*)self)->pos;
|
len = n - start;
|
||||||
l = n - ((IOobject*)self)->buf - ((IOobject*)self)->pos;
|
if (len > INT_MAX) {
|
||||||
|
PyErr_SetString(PyExc_OverflowError,
|
||||||
|
"length too large");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
assert(IOOOBJECT(self)->pos <= PY_SSIZE_T_MAX - l);
|
*output=start;
|
||||||
|
|
||||||
|
assert(IOOOBJECT(self)->pos <= PY_SSIZE_T_MAX - len);
|
||||||
assert(IOOOBJECT(self)->pos >= 0);
|
assert(IOOOBJECT(self)->pos >= 0);
|
||||||
assert(IOOOBJECT(self)->string_size >= 0);
|
assert(IOOOBJECT(self)->string_size >= 0);
|
||||||
|
|
||||||
((IOobject*)self)->pos += l;
|
((IOobject*)self)->pos += len;
|
||||||
return (int)l;
|
return (int)len;
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
|
@ -239,9 +251,9 @@ IO_readlines(IOobject *self, PyObject *args) {
|
||||||
int n;
|
int n;
|
||||||
char *output;
|
char *output;
|
||||||
PyObject *result, *line;
|
PyObject *result, *line;
|
||||||
int hint = 0, length = 0;
|
Py_ssize_t hint = 0, length = 0;
|
||||||
|
|
||||||
if (!PyArg_ParseTuple(args, "|i:readlines", &hint)) return NULL;
|
if (!PyArg_ParseTuple(args, "|n:readlines", &hint)) return NULL;
|
||||||
|
|
||||||
result = PyList_New(0);
|
result = PyList_New(0);
|
||||||
if (!result)
|
if (!result)
|
||||||
|
@ -377,31 +389,41 @@ PyDoc_STRVAR(O_write__doc__,
|
||||||
|
|
||||||
|
|
||||||
static int
|
static int
|
||||||
O_cwrite(PyObject *self, const char *c, Py_ssize_t l) {
|
O_cwrite(PyObject *self, const char *c, Py_ssize_t len) {
|
||||||
Py_ssize_t newl;
|
Py_ssize_t newpos;
|
||||||
Oobject *oself;
|
Oobject *oself;
|
||||||
char *newbuf;
|
char *newbuf;
|
||||||
|
|
||||||
if (!IO__opencheck(IOOOBJECT(self))) return -1;
|
if (!IO__opencheck(IOOOBJECT(self))) return -1;
|
||||||
oself = (Oobject *)self;
|
oself = (Oobject *)self;
|
||||||
|
|
||||||
newl = oself->pos+l;
|
if (len > INT_MAX) {
|
||||||
if (newl >= oself->buf_size) {
|
PyErr_SetString(PyExc_OverflowError,
|
||||||
oself->buf_size *= 2;
|
"length too large");
|
||||||
if (oself->buf_size <= newl) {
|
return -1;
|
||||||
assert(newl + 1 < INT_MAX);
|
}
|
||||||
oself->buf_size = (int)(newl+1);
|
assert(len >= 0);
|
||||||
|
if (oself->pos >= PY_SSIZE_T_MAX - len) {
|
||||||
|
PyErr_SetString(PyExc_OverflowError,
|
||||||
|
"new position too large");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
newpos = oself->pos + len;
|
||||||
|
if (newpos >= oself->buf_size) {
|
||||||
|
size_t newsize = oself->buf_size;
|
||||||
|
newsize *= 2;
|
||||||
|
if (newsize <= (size_t)newpos || newsize > PY_SSIZE_T_MAX) {
|
||||||
|
assert(newpos < PY_SSIZE_T_MAX - 1);
|
||||||
|
newsize = newpos + 1;
|
||||||
}
|
}
|
||||||
newbuf = (char*)realloc(oself->buf, oself->buf_size);
|
newbuf = (char*)realloc(oself->buf, newsize);
|
||||||
if (!newbuf) {
|
if (!newbuf) {
|
||||||
PyErr_SetString(PyExc_MemoryError,"out of memory");
|
PyErr_SetString(PyExc_MemoryError,"out of memory");
|
||||||
free(oself->buf);
|
|
||||||
oself->buf = 0;
|
|
||||||
oself->buf_size = oself->pos = 0;
|
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
oself->buf_size = (Py_ssize_t)newsize;
|
||||||
oself->buf = newbuf;
|
oself->buf = newbuf;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (oself->string_size < oself->pos) {
|
if (oself->string_size < oself->pos) {
|
||||||
/* In case of overseek, pad with null bytes the buffer region between
|
/* In case of overseek, pad with null bytes the buffer region between
|
||||||
|
@ -416,16 +438,15 @@ O_cwrite(PyObject *self, const char *c, Py_ssize_t l) {
|
||||||
(oself->pos - oself->string_size) * sizeof(char));
|
(oself->pos - oself->string_size) * sizeof(char));
|
||||||
}
|
}
|
||||||
|
|
||||||
memcpy(oself->buf+oself->pos,c,l);
|
memcpy(oself->buf + oself->pos, c, len);
|
||||||
|
|
||||||
assert(oself->pos + l < INT_MAX);
|
oself->pos = newpos;
|
||||||
oself->pos += (int)l;
|
|
||||||
|
|
||||||
if (oself->string_size < oself->pos) {
|
if (oself->string_size < oself->pos) {
|
||||||
oself->string_size = oself->pos;
|
oself->string_size = oself->pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
return (int)l;
|
return (int)len;
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
|
|
Loading…
Reference in New Issue