bpo-36748: optimize TextIOWrapper.write() for ASCII string (GH-13002)
This commit is contained in:
parent
8a533ffb49
commit
bfba8c373e
|
@ -0,0 +1,3 @@
|
||||||
|
Optimized write buffering in C implementation of ``TextIOWrapper``. Writing
|
||||||
|
ASCII string to ``TextIOWrapper`` with ascii, latin1, or utf-8 encoding is
|
||||||
|
about 20% faster. Patch by Inada Naoki.
|
|
@ -674,8 +674,8 @@ typedef struct
|
||||||
*/
|
*/
|
||||||
PyObject *decoded_chars; /* buffer for text returned from decoder */
|
PyObject *decoded_chars; /* buffer for text returned from decoder */
|
||||||
Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
|
Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
|
||||||
PyObject *pending_bytes; /* list of bytes objects waiting to be
|
PyObject *pending_bytes; // data waiting to be written.
|
||||||
written, or NULL */
|
// ascii unicode, bytes, or list of them.
|
||||||
Py_ssize_t pending_bytes_count;
|
Py_ssize_t pending_bytes_count;
|
||||||
|
|
||||||
/* snapshot is either NULL, or a tuple (dec_flags, next_input) where
|
/* snapshot is either NULL, or a tuple (dec_flags, next_input) where
|
||||||
|
@ -777,6 +777,15 @@ latin1_encode(textio *self, PyObject *text)
|
||||||
return _PyUnicode_AsLatin1String(text, PyUnicode_AsUTF8(self->errors));
|
return _PyUnicode_AsLatin1String(text, PyUnicode_AsUTF8(self->errors));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Return true when encoding can be skipped when text is ascii.
|
||||||
|
static inline int
|
||||||
|
is_asciicompat_encoding(encodefunc_t f)
|
||||||
|
{
|
||||||
|
return f == (encodefunc_t) ascii_encode
|
||||||
|
|| f == (encodefunc_t) latin1_encode
|
||||||
|
|| f == (encodefunc_t) utf8_encode;
|
||||||
|
}
|
||||||
|
|
||||||
/* Map normalized encoding names onto the specialized encoding funcs */
|
/* Map normalized encoding names onto the specialized encoding funcs */
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
@ -1489,21 +1498,62 @@ _io_TextIOWrapper_detach_impl(textio *self)
|
||||||
static int
|
static int
|
||||||
_textiowrapper_writeflush(textio *self)
|
_textiowrapper_writeflush(textio *self)
|
||||||
{
|
{
|
||||||
PyObject *pending, *b, *ret;
|
|
||||||
|
|
||||||
if (self->pending_bytes == NULL)
|
if (self->pending_bytes == NULL)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
pending = self->pending_bytes;
|
PyObject *pending = self->pending_bytes;
|
||||||
Py_INCREF(pending);
|
PyObject *b;
|
||||||
self->pending_bytes_count = 0;
|
|
||||||
Py_CLEAR(self->pending_bytes);
|
|
||||||
|
|
||||||
b = _PyBytes_Join(_PyIO_empty_bytes, pending);
|
if (PyBytes_Check(pending)) {
|
||||||
|
b = pending;
|
||||||
|
Py_INCREF(b);
|
||||||
|
}
|
||||||
|
else if (PyUnicode_Check(pending)) {
|
||||||
|
assert(PyUnicode_IS_ASCII(pending));
|
||||||
|
assert(PyUnicode_GET_LENGTH(pending) == self->pending_bytes_count);
|
||||||
|
b = PyBytes_FromStringAndSize(
|
||||||
|
PyUnicode_DATA(pending), PyUnicode_GET_LENGTH(pending));
|
||||||
|
if (b == NULL) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
assert(PyList_Check(pending));
|
||||||
|
b = PyBytes_FromStringAndSize(NULL, self->pending_bytes_count);
|
||||||
|
if (b == NULL) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
char *buf = PyBytes_AsString(b);
|
||||||
|
Py_ssize_t pos = 0;
|
||||||
|
|
||||||
|
for (Py_ssize_t i = 0; i < PyList_GET_SIZE(pending); i++) {
|
||||||
|
PyObject *obj = PyList_GET_ITEM(pending, i);
|
||||||
|
char *src;
|
||||||
|
Py_ssize_t len;
|
||||||
|
if (PyUnicode_Check(obj)) {
|
||||||
|
assert(PyUnicode_IS_ASCII(obj));
|
||||||
|
src = PyUnicode_DATA(obj);
|
||||||
|
len = PyUnicode_GET_LENGTH(obj);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
assert(PyBytes_Check(obj));
|
||||||
|
if (PyBytes_AsStringAndSize(obj, &src, &len) < 0) {
|
||||||
|
Py_DECREF(b);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
memcpy(buf + pos, src, len);
|
||||||
|
pos += len;
|
||||||
|
}
|
||||||
|
assert(pos == self->pending_bytes_count);
|
||||||
|
}
|
||||||
|
|
||||||
|
self->pending_bytes_count = 0;
|
||||||
|
self->pending_bytes = NULL;
|
||||||
Py_DECREF(pending);
|
Py_DECREF(pending);
|
||||||
if (b == NULL)
|
|
||||||
return -1;
|
PyObject *ret;
|
||||||
ret = NULL;
|
|
||||||
do {
|
do {
|
||||||
ret = PyObject_CallMethodObjArgs(self->buffer,
|
ret = PyObject_CallMethodObjArgs(self->buffer,
|
||||||
_PyIO_str_write, b, NULL);
|
_PyIO_str_write, b, NULL);
|
||||||
|
@ -1566,16 +1616,23 @@ _io_TextIOWrapper_write_impl(textio *self, PyObject *text)
|
||||||
|
|
||||||
/* XXX What if we were just reading? */
|
/* XXX What if we were just reading? */
|
||||||
if (self->encodefunc != NULL) {
|
if (self->encodefunc != NULL) {
|
||||||
b = (*self->encodefunc)((PyObject *) self, text);
|
if (PyUnicode_IS_ASCII(text) && is_asciicompat_encoding(self->encodefunc)) {
|
||||||
|
b = text;
|
||||||
|
Py_INCREF(b);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
b = (*self->encodefunc)((PyObject *) self, text);
|
||||||
|
}
|
||||||
self->encoding_start_of_stream = 0;
|
self->encoding_start_of_stream = 0;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
b = PyObject_CallMethodObjArgs(self->encoder,
|
b = PyObject_CallMethodObjArgs(self->encoder,
|
||||||
_PyIO_str_encode, text, NULL);
|
_PyIO_str_encode, text, NULL);
|
||||||
|
|
||||||
Py_DECREF(text);
|
Py_DECREF(text);
|
||||||
if (b == NULL)
|
if (b == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
if (!PyBytes_Check(b)) {
|
if (b != text && !PyBytes_Check(b)) {
|
||||||
PyErr_Format(PyExc_TypeError,
|
PyErr_Format(PyExc_TypeError,
|
||||||
"encoder should return a bytes object, not '%.200s'",
|
"encoder should return a bytes object, not '%.200s'",
|
||||||
Py_TYPE(b)->tp_name);
|
Py_TYPE(b)->tp_name);
|
||||||
|
@ -1583,20 +1640,37 @@ _io_TextIOWrapper_write_impl(textio *self, PyObject *text)
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Py_ssize_t bytes_len;
|
||||||
|
if (b == text) {
|
||||||
|
bytes_len = PyUnicode_GET_LENGTH(b);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
bytes_len = PyBytes_GET_SIZE(b);
|
||||||
|
}
|
||||||
|
|
||||||
if (self->pending_bytes == NULL) {
|
if (self->pending_bytes == NULL) {
|
||||||
self->pending_bytes = PyList_New(0);
|
self->pending_bytes_count = 0;
|
||||||
if (self->pending_bytes == NULL) {
|
self->pending_bytes = b;
|
||||||
|
}
|
||||||
|
else if (!PyList_CheckExact(self->pending_bytes)) {
|
||||||
|
PyObject *list = PyList_New(2);
|
||||||
|
if (list == NULL) {
|
||||||
Py_DECREF(b);
|
Py_DECREF(b);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
self->pending_bytes_count = 0;
|
PyList_SET_ITEM(list, 0, self->pending_bytes);
|
||||||
|
PyList_SET_ITEM(list, 1, b);
|
||||||
|
self->pending_bytes = list;
|
||||||
}
|
}
|
||||||
if (PyList_Append(self->pending_bytes, b) < 0) {
|
else {
|
||||||
|
if (PyList_Append(self->pending_bytes, b) < 0) {
|
||||||
|
Py_DECREF(b);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
Py_DECREF(b);
|
Py_DECREF(b);
|
||||||
return NULL;
|
|
||||||
}
|
}
|
||||||
self->pending_bytes_count += PyBytes_GET_SIZE(b);
|
|
||||||
Py_DECREF(b);
|
self->pending_bytes_count += bytes_len;
|
||||||
if (self->pending_bytes_count > self->chunk_size || needflush ||
|
if (self->pending_bytes_count > self->chunk_size || needflush ||
|
||||||
text_needflush) {
|
text_needflush) {
|
||||||
if (_textiowrapper_writeflush(self) < 0)
|
if (_textiowrapper_writeflush(self) < 0)
|
||||||
|
|
Loading…
Reference in New Issue