mirror of https://github.com/python/cpython
Issue #13149: Speed up append-only StringIO objects.
This is very similar to the "lazy strings" idea.
This commit is contained in:
parent
9f4b1e9c50
commit
de20b0b50e
|
@ -365,6 +365,8 @@ Core and Builtins
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #13149: Speed up append-only StringIO objects.
|
||||||
|
|
||||||
- Issue #13373: multiprocessing.Queue.get() could sometimes block indefinitely
|
- Issue #13373: multiprocessing.Queue.get() could sometimes block indefinitely
|
||||||
when called with a timeout. Patch by Arnaud Ysmal.
|
when called with a timeout. Patch by Arnaud Ysmal.
|
||||||
|
|
||||||
|
|
|
@ -7,6 +7,9 @@
|
||||||
than the enclosed string, for proper functioning of _PyIO_find_line_ending.
|
than the enclosed string, for proper functioning of _PyIO_find_line_ending.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#define STATE_REALIZED 1
|
||||||
|
#define STATE_ACCUMULATING 2
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
PyObject_HEAD
|
PyObject_HEAD
|
||||||
Py_UCS4 *buf;
|
Py_UCS4 *buf;
|
||||||
|
@ -14,6 +17,15 @@ typedef struct {
|
||||||
Py_ssize_t string_size;
|
Py_ssize_t string_size;
|
||||||
size_t buf_size;
|
size_t buf_size;
|
||||||
|
|
||||||
|
/* The stringio object can be in two states: accumulating or realized.
|
||||||
|
In accumulating state, the internal buffer contains nothing and
|
||||||
|
the contents are given by the embedded _PyAccu structure.
|
||||||
|
In realized state, the internal buffer is meaningful and the
|
||||||
|
_PyAccu is destroyed.
|
||||||
|
*/
|
||||||
|
int state;
|
||||||
|
_PyAccu accu;
|
||||||
|
|
||||||
char ok; /* initialized? */
|
char ok; /* initialized? */
|
||||||
char closed;
|
char closed;
|
||||||
char readuniversal;
|
char readuniversal;
|
||||||
|
@ -40,6 +52,11 @@ typedef struct {
|
||||||
return NULL; \
|
return NULL; \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define ENSURE_REALIZED(self) \
|
||||||
|
if (realize(self) < 0) { \
|
||||||
|
return NULL; \
|
||||||
|
}
|
||||||
|
|
||||||
PyDoc_STRVAR(stringio_doc,
|
PyDoc_STRVAR(stringio_doc,
|
||||||
"Text I/O implementation using an in-memory buffer.\n"
|
"Text I/O implementation using an in-memory buffer.\n"
|
||||||
"\n"
|
"\n"
|
||||||
|
@ -102,6 +119,54 @@ resize_buffer(stringio *self, size_t size)
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
make_intermediate(stringio *self)
|
||||||
|
{
|
||||||
|
PyObject *intermediate = _PyAccu_Finish(&self->accu);
|
||||||
|
self->state = STATE_REALIZED;
|
||||||
|
if (intermediate == NULL)
|
||||||
|
return NULL;
|
||||||
|
if (_PyAccu_Init(&self->accu) ||
|
||||||
|
_PyAccu_Accumulate(&self->accu, intermediate)) {
|
||||||
|
Py_DECREF(intermediate);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
self->state = STATE_ACCUMULATING;
|
||||||
|
return intermediate;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
realize(stringio *self)
|
||||||
|
{
|
||||||
|
Py_ssize_t len;
|
||||||
|
PyObject *intermediate;
|
||||||
|
|
||||||
|
if (self->state == STATE_REALIZED)
|
||||||
|
return 0;
|
||||||
|
assert(self->state == STATE_ACCUMULATING);
|
||||||
|
self->state = STATE_REALIZED;
|
||||||
|
|
||||||
|
intermediate = _PyAccu_Finish(&self->accu);
|
||||||
|
if (intermediate == NULL)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
/* Append the intermediate string to the internal buffer.
|
||||||
|
The length should be equal to the current cursor position.
|
||||||
|
*/
|
||||||
|
len = PyUnicode_GET_LENGTH(intermediate);
|
||||||
|
if (resize_buffer(self, len) < 0) {
|
||||||
|
Py_DECREF(intermediate);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (!PyUnicode_AsUCS4(intermediate, self->buf, len, 0)) {
|
||||||
|
Py_DECREF(intermediate);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
Py_DECREF(intermediate);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/* Internal routine for writing a whole PyUnicode object to the buffer of a
|
/* Internal routine for writing a whole PyUnicode object to the buffer of a
|
||||||
StringIO object. Returns 0 on success, or -1 on error. */
|
StringIO object. Returns 0 on success, or -1 on error. */
|
||||||
static Py_ssize_t
|
static Py_ssize_t
|
||||||
|
@ -136,7 +201,6 @@ write_str(stringio *self, PyObject *obj)
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
len = PyUnicode_GET_LENGTH(decoded);
|
len = PyUnicode_GET_LENGTH(decoded);
|
||||||
|
|
||||||
assert(len >= 0);
|
assert(len >= 0);
|
||||||
|
|
||||||
/* This overflow check is not strictly necessary. However, it avoids us to
|
/* This overflow check is not strictly necessary. However, it avoids us to
|
||||||
|
@ -147,6 +211,17 @@ write_str(stringio *self, PyObject *obj)
|
||||||
"new position too large");
|
"new position too large");
|
||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (self->state == STATE_ACCUMULATING) {
|
||||||
|
if (self->string_size == self->pos) {
|
||||||
|
if (_PyAccu_Accumulate(&self->accu, decoded))
|
||||||
|
goto fail;
|
||||||
|
goto success;
|
||||||
|
}
|
||||||
|
if (realize(self))
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
if (self->pos + len > self->string_size) {
|
if (self->pos + len > self->string_size) {
|
||||||
if (resize_buffer(self, self->pos + len) < 0)
|
if (resize_buffer(self, self->pos + len) < 0)
|
||||||
goto fail;
|
goto fail;
|
||||||
|
@ -174,6 +249,7 @@ write_str(stringio *self, PyObject *obj)
|
||||||
0))
|
0))
|
||||||
goto fail;
|
goto fail;
|
||||||
|
|
||||||
|
success:
|
||||||
/* Set the new length of the internal string if it has changed. */
|
/* Set the new length of the internal string if it has changed. */
|
||||||
self->pos += len;
|
self->pos += len;
|
||||||
if (self->string_size < self->pos)
|
if (self->string_size < self->pos)
|
||||||
|
@ -195,6 +271,8 @@ stringio_getvalue(stringio *self)
|
||||||
{
|
{
|
||||||
CHECK_INITIALIZED(self);
|
CHECK_INITIALIZED(self);
|
||||||
CHECK_CLOSED(self);
|
CHECK_CLOSED(self);
|
||||||
|
if (self->state == STATE_ACCUMULATING)
|
||||||
|
return make_intermediate(self);
|
||||||
return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, self->buf,
|
return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, self->buf,
|
||||||
self->string_size);
|
self->string_size);
|
||||||
}
|
}
|
||||||
|
@ -251,6 +329,14 @@ stringio_read(stringio *self, PyObject *args)
|
||||||
size = 0;
|
size = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Optimization for seek(0); read() */
|
||||||
|
if (self->state == STATE_ACCUMULATING && self->pos == 0 && size == n) {
|
||||||
|
PyObject *result = make_intermediate(self);
|
||||||
|
self->pos = self->string_size;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
ENSURE_REALIZED(self);
|
||||||
output = self->buf + self->pos;
|
output = self->buf + self->pos;
|
||||||
self->pos += size;
|
self->pos += size;
|
||||||
return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, output, size);
|
return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, output, size);
|
||||||
|
@ -301,6 +387,7 @@ stringio_readline(stringio *self, PyObject *args)
|
||||||
if (!PyArg_ParseTuple(args, "|O:readline", &arg))
|
if (!PyArg_ParseTuple(args, "|O:readline", &arg))
|
||||||
return NULL;
|
return NULL;
|
||||||
CHECK_CLOSED(self);
|
CHECK_CLOSED(self);
|
||||||
|
ENSURE_REALIZED(self);
|
||||||
|
|
||||||
if (PyNumber_Check(arg)) {
|
if (PyNumber_Check(arg)) {
|
||||||
limit = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
|
limit = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
|
||||||
|
@ -322,6 +409,7 @@ stringio_iternext(stringio *self)
|
||||||
|
|
||||||
CHECK_INITIALIZED(self);
|
CHECK_INITIALIZED(self);
|
||||||
CHECK_CLOSED(self);
|
CHECK_CLOSED(self);
|
||||||
|
ENSURE_REALIZED(self);
|
||||||
|
|
||||||
if (Py_TYPE(self) == &PyStringIO_Type) {
|
if (Py_TYPE(self) == &PyStringIO_Type) {
|
||||||
/* Skip method call overhead for speed */
|
/* Skip method call overhead for speed */
|
||||||
|
@ -392,6 +480,7 @@ stringio_truncate(stringio *self, PyObject *args)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (size < self->string_size) {
|
if (size < self->string_size) {
|
||||||
|
ENSURE_REALIZED(self);
|
||||||
if (resize_buffer(self, size) < 0)
|
if (resize_buffer(self, size) < 0)
|
||||||
return NULL;
|
return NULL;
|
||||||
self->string_size = size;
|
self->string_size = size;
|
||||||
|
@ -492,6 +581,7 @@ stringio_close(stringio *self)
|
||||||
/* Free up some memory */
|
/* Free up some memory */
|
||||||
if (resize_buffer(self, 0) < 0)
|
if (resize_buffer(self, 0) < 0)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
_PyAccu_Destroy(&self->accu);
|
||||||
Py_CLEAR(self->readnl);
|
Py_CLEAR(self->readnl);
|
||||||
Py_CLEAR(self->writenl);
|
Py_CLEAR(self->writenl);
|
||||||
Py_CLEAR(self->decoder);
|
Py_CLEAR(self->decoder);
|
||||||
|
@ -521,6 +611,7 @@ stringio_dealloc(stringio *self)
|
||||||
PyMem_Free(self->buf);
|
PyMem_Free(self->buf);
|
||||||
self->buf = NULL;
|
self->buf = NULL;
|
||||||
}
|
}
|
||||||
|
_PyAccu_Destroy(&self->accu);
|
||||||
Py_CLEAR(self->readnl);
|
Py_CLEAR(self->readnl);
|
||||||
Py_CLEAR(self->writenl);
|
Py_CLEAR(self->writenl);
|
||||||
Py_CLEAR(self->decoder);
|
Py_CLEAR(self->decoder);
|
||||||
|
@ -559,6 +650,7 @@ stringio_init(stringio *self, PyObject *args, PyObject *kwds)
|
||||||
PyObject *value = NULL;
|
PyObject *value = NULL;
|
||||||
PyObject *newline_obj = NULL;
|
PyObject *newline_obj = NULL;
|
||||||
char *newline = "\n";
|
char *newline = "\n";
|
||||||
|
Py_ssize_t value_len;
|
||||||
|
|
||||||
if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OO:__init__", kwlist,
|
if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OO:__init__", kwlist,
|
||||||
&value, &newline_obj))
|
&value, &newline_obj))
|
||||||
|
@ -600,6 +692,7 @@ stringio_init(stringio *self, PyObject *args, PyObject *kwds)
|
||||||
|
|
||||||
self->ok = 0;
|
self->ok = 0;
|
||||||
|
|
||||||
|
_PyAccu_Destroy(&self->accu);
|
||||||
Py_CLEAR(self->readnl);
|
Py_CLEAR(self->readnl);
|
||||||
Py_CLEAR(self->writenl);
|
Py_CLEAR(self->writenl);
|
||||||
Py_CLEAR(self->decoder);
|
Py_CLEAR(self->decoder);
|
||||||
|
@ -636,19 +729,27 @@ stringio_init(stringio *self, PyObject *args, PyObject *kwds)
|
||||||
/* Now everything is set up, resize buffer to size of initial value,
|
/* Now everything is set up, resize buffer to size of initial value,
|
||||||
and copy it */
|
and copy it */
|
||||||
self->string_size = 0;
|
self->string_size = 0;
|
||||||
if (value && value != Py_None) {
|
if (value && value != Py_None)
|
||||||
Py_ssize_t len = PyUnicode_GetSize(value);
|
value_len = PyUnicode_GetSize(value);
|
||||||
|
else
|
||||||
|
value_len = 0;
|
||||||
|
if (value_len > 0) {
|
||||||
/* This is a heuristic, for newline translation might change
|
/* This is a heuristic, for newline translation might change
|
||||||
the string length. */
|
the string length. */
|
||||||
if (resize_buffer(self, len) < 0)
|
if (resize_buffer(self, 0) < 0)
|
||||||
return -1;
|
return -1;
|
||||||
|
self->state = STATE_REALIZED;
|
||||||
self->pos = 0;
|
self->pos = 0;
|
||||||
if (write_str(self, value) < 0)
|
if (write_str(self, value) < 0)
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
/* Empty stringio object, we can start by accumulating */
|
||||||
if (resize_buffer(self, 0) < 0)
|
if (resize_buffer(self, 0) < 0)
|
||||||
return -1;
|
return -1;
|
||||||
|
if (_PyAccu_Init(&self->accu))
|
||||||
|
return -1;
|
||||||
|
self->state = STATE_ACCUMULATING;
|
||||||
}
|
}
|
||||||
self->pos = 0;
|
self->pos = 0;
|
||||||
|
|
||||||
|
|
|
@ -2055,7 +2055,7 @@ Py_UCS4*
|
||||||
PyUnicode_AsUCS4(PyObject *string, Py_UCS4 *target, Py_ssize_t targetsize,
|
PyUnicode_AsUCS4(PyObject *string, Py_UCS4 *target, Py_ssize_t targetsize,
|
||||||
int copy_null)
|
int copy_null)
|
||||||
{
|
{
|
||||||
if (target == NULL || targetsize < 1) {
|
if (target == NULL || targetsize < 0) {
|
||||||
PyErr_BadInternalCall();
|
PyErr_BadInternalCall();
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue