Issue #13149: Speed up append-only StringIO objects.
This is very similar to the "lazy strings" idea.
This commit is contained in:
parent
9f4b1e9c50
commit
de20b0b50e
|
@ -365,6 +365,8 @@ Core and Builtins
|
|||
Library
|
||||
-------
|
||||
|
||||
- Issue #13149: Speed up append-only StringIO objects.
|
||||
|
||||
- Issue #13373: multiprocessing.Queue.get() could sometimes block indefinitely
|
||||
when called with a timeout. Patch by Arnaud Ysmal.
|
||||
|
||||
|
|
|
@ -7,6 +7,9 @@
|
|||
than the enclosed string, for proper functioning of _PyIO_find_line_ending.
|
||||
*/
|
||||
|
||||
#define STATE_REALIZED 1
|
||||
#define STATE_ACCUMULATING 2
|
||||
|
||||
typedef struct {
|
||||
PyObject_HEAD
|
||||
Py_UCS4 *buf;
|
||||
|
@ -14,6 +17,15 @@ typedef struct {
|
|||
Py_ssize_t string_size;
|
||||
size_t buf_size;
|
||||
|
||||
/* The stringio object can be in two states: accumulating or realized.
|
||||
In accumulating state, the internal buffer contains nothing and
|
||||
the contents are given by the embedded _PyAccu structure.
|
||||
In realized state, the internal buffer is meaningful and the
|
||||
_PyAccu is destroyed.
|
||||
*/
|
||||
int state;
|
||||
_PyAccu accu;
|
||||
|
||||
char ok; /* initialized? */
|
||||
char closed;
|
||||
char readuniversal;
|
||||
|
@ -40,6 +52,11 @@ typedef struct {
|
|||
return NULL; \
|
||||
}
|
||||
|
||||
#define ENSURE_REALIZED(self) \
|
||||
if (realize(self) < 0) { \
|
||||
return NULL; \
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(stringio_doc,
|
||||
"Text I/O implementation using an in-memory buffer.\n"
|
||||
"\n"
|
||||
|
@ -102,6 +119,54 @@ resize_buffer(stringio *self, size_t size)
|
|||
return -1;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
make_intermediate(stringio *self)
|
||||
{
|
||||
PyObject *intermediate = _PyAccu_Finish(&self->accu);
|
||||
self->state = STATE_REALIZED;
|
||||
if (intermediate == NULL)
|
||||
return NULL;
|
||||
if (_PyAccu_Init(&self->accu) ||
|
||||
_PyAccu_Accumulate(&self->accu, intermediate)) {
|
||||
Py_DECREF(intermediate);
|
||||
return NULL;
|
||||
}
|
||||
self->state = STATE_ACCUMULATING;
|
||||
return intermediate;
|
||||
}
|
||||
|
||||
static int
|
||||
realize(stringio *self)
|
||||
{
|
||||
Py_ssize_t len;
|
||||
PyObject *intermediate;
|
||||
|
||||
if (self->state == STATE_REALIZED)
|
||||
return 0;
|
||||
assert(self->state == STATE_ACCUMULATING);
|
||||
self->state = STATE_REALIZED;
|
||||
|
||||
intermediate = _PyAccu_Finish(&self->accu);
|
||||
if (intermediate == NULL)
|
||||
return -1;
|
||||
|
||||
/* Append the intermediate string to the internal buffer.
|
||||
The length should be equal to the current cursor position.
|
||||
*/
|
||||
len = PyUnicode_GET_LENGTH(intermediate);
|
||||
if (resize_buffer(self, len) < 0) {
|
||||
Py_DECREF(intermediate);
|
||||
return -1;
|
||||
}
|
||||
if (!PyUnicode_AsUCS4(intermediate, self->buf, len, 0)) {
|
||||
Py_DECREF(intermediate);
|
||||
return -1;
|
||||
}
|
||||
|
||||
Py_DECREF(intermediate);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Internal routine for writing a whole PyUnicode object to the buffer of a
|
||||
StringIO object. Returns 0 on success, or -1 on error. */
|
||||
static Py_ssize_t
|
||||
|
@ -136,7 +201,6 @@ write_str(stringio *self, PyObject *obj)
|
|||
return -1;
|
||||
}
|
||||
len = PyUnicode_GET_LENGTH(decoded);
|
||||
|
||||
assert(len >= 0);
|
||||
|
||||
/* This overflow check is not strictly necessary. However, it avoids us to
|
||||
|
@ -147,6 +211,17 @@ write_str(stringio *self, PyObject *obj)
|
|||
"new position too large");
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (self->state == STATE_ACCUMULATING) {
|
||||
if (self->string_size == self->pos) {
|
||||
if (_PyAccu_Accumulate(&self->accu, decoded))
|
||||
goto fail;
|
||||
goto success;
|
||||
}
|
||||
if (realize(self))
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (self->pos + len > self->string_size) {
|
||||
if (resize_buffer(self, self->pos + len) < 0)
|
||||
goto fail;
|
||||
|
@ -174,6 +249,7 @@ write_str(stringio *self, PyObject *obj)
|
|||
0))
|
||||
goto fail;
|
||||
|
||||
success:
|
||||
/* Set the new length of the internal string if it has changed. */
|
||||
self->pos += len;
|
||||
if (self->string_size < self->pos)
|
||||
|
@ -195,6 +271,8 @@ stringio_getvalue(stringio *self)
|
|||
{
|
||||
CHECK_INITIALIZED(self);
|
||||
CHECK_CLOSED(self);
|
||||
if (self->state == STATE_ACCUMULATING)
|
||||
return make_intermediate(self);
|
||||
return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, self->buf,
|
||||
self->string_size);
|
||||
}
|
||||
|
@ -251,6 +329,14 @@ stringio_read(stringio *self, PyObject *args)
|
|||
size = 0;
|
||||
}
|
||||
|
||||
/* Optimization for seek(0); read() */
|
||||
if (self->state == STATE_ACCUMULATING && self->pos == 0 && size == n) {
|
||||
PyObject *result = make_intermediate(self);
|
||||
self->pos = self->string_size;
|
||||
return result;
|
||||
}
|
||||
|
||||
ENSURE_REALIZED(self);
|
||||
output = self->buf + self->pos;
|
||||
self->pos += size;
|
||||
return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, output, size);
|
||||
|
@ -301,6 +387,7 @@ stringio_readline(stringio *self, PyObject *args)
|
|||
if (!PyArg_ParseTuple(args, "|O:readline", &arg))
|
||||
return NULL;
|
||||
CHECK_CLOSED(self);
|
||||
ENSURE_REALIZED(self);
|
||||
|
||||
if (PyNumber_Check(arg)) {
|
||||
limit = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
|
||||
|
@ -322,6 +409,7 @@ stringio_iternext(stringio *self)
|
|||
|
||||
CHECK_INITIALIZED(self);
|
||||
CHECK_CLOSED(self);
|
||||
ENSURE_REALIZED(self);
|
||||
|
||||
if (Py_TYPE(self) == &PyStringIO_Type) {
|
||||
/* Skip method call overhead for speed */
|
||||
|
@ -392,6 +480,7 @@ stringio_truncate(stringio *self, PyObject *args)
|
|||
}
|
||||
|
||||
if (size < self->string_size) {
|
||||
ENSURE_REALIZED(self);
|
||||
if (resize_buffer(self, size) < 0)
|
||||
return NULL;
|
||||
self->string_size = size;
|
||||
|
@ -492,6 +581,7 @@ stringio_close(stringio *self)
|
|||
/* Free up some memory */
|
||||
if (resize_buffer(self, 0) < 0)
|
||||
return NULL;
|
||||
_PyAccu_Destroy(&self->accu);
|
||||
Py_CLEAR(self->readnl);
|
||||
Py_CLEAR(self->writenl);
|
||||
Py_CLEAR(self->decoder);
|
||||
|
@ -521,6 +611,7 @@ stringio_dealloc(stringio *self)
|
|||
PyMem_Free(self->buf);
|
||||
self->buf = NULL;
|
||||
}
|
||||
_PyAccu_Destroy(&self->accu);
|
||||
Py_CLEAR(self->readnl);
|
||||
Py_CLEAR(self->writenl);
|
||||
Py_CLEAR(self->decoder);
|
||||
|
@ -559,6 +650,7 @@ stringio_init(stringio *self, PyObject *args, PyObject *kwds)
|
|||
PyObject *value = NULL;
|
||||
PyObject *newline_obj = NULL;
|
||||
char *newline = "\n";
|
||||
Py_ssize_t value_len;
|
||||
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OO:__init__", kwlist,
|
||||
&value, &newline_obj))
|
||||
|
@ -600,6 +692,7 @@ stringio_init(stringio *self, PyObject *args, PyObject *kwds)
|
|||
|
||||
self->ok = 0;
|
||||
|
||||
_PyAccu_Destroy(&self->accu);
|
||||
Py_CLEAR(self->readnl);
|
||||
Py_CLEAR(self->writenl);
|
||||
Py_CLEAR(self->decoder);
|
||||
|
@ -636,19 +729,27 @@ stringio_init(stringio *self, PyObject *args, PyObject *kwds)
|
|||
/* Now everything is set up, resize buffer to size of initial value,
|
||||
and copy it */
|
||||
self->string_size = 0;
|
||||
if (value && value != Py_None) {
|
||||
Py_ssize_t len = PyUnicode_GetSize(value);
|
||||
if (value && value != Py_None)
|
||||
value_len = PyUnicode_GetSize(value);
|
||||
else
|
||||
value_len = 0;
|
||||
if (value_len > 0) {
|
||||
/* This is a heuristic, for newline translation might change
|
||||
the string length. */
|
||||
if (resize_buffer(self, len) < 0)
|
||||
if (resize_buffer(self, 0) < 0)
|
||||
return -1;
|
||||
self->state = STATE_REALIZED;
|
||||
self->pos = 0;
|
||||
if (write_str(self, value) < 0)
|
||||
return -1;
|
||||
}
|
||||
else {
|
||||
/* Empty stringio object, we can start by accumulating */
|
||||
if (resize_buffer(self, 0) < 0)
|
||||
return -1;
|
||||
if (_PyAccu_Init(&self->accu))
|
||||
return -1;
|
||||
self->state = STATE_ACCUMULATING;
|
||||
}
|
||||
self->pos = 0;
|
||||
|
||||
|
|
|
@ -2055,7 +2055,7 @@ Py_UCS4*
|
|||
PyUnicode_AsUCS4(PyObject *string, Py_UCS4 *target, Py_ssize_t targetsize,
|
||||
int copy_null)
|
||||
{
|
||||
if (target == NULL || targetsize < 1) {
|
||||
if (target == NULL || targetsize < 0) {
|
||||
PyErr_BadInternalCall();
|
||||
return NULL;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue