PyUnicode_Join() calls directly memcpy() if all strings are of the same kind
This commit is contained in:
parent
756b169c5a
commit
dd07732af5
|
@ -9124,7 +9124,7 @@ PyObject *
|
||||||
PyUnicode_Join(PyObject *separator, PyObject *seq)
|
PyUnicode_Join(PyObject *separator, PyObject *seq)
|
||||||
{
|
{
|
||||||
PyObject *sep = NULL;
|
PyObject *sep = NULL;
|
||||||
Py_ssize_t seplen = 1;
|
Py_ssize_t seplen;
|
||||||
PyObject *res = NULL; /* the result */
|
PyObject *res = NULL; /* the result */
|
||||||
PyObject *fseq; /* PySequence_Fast(seq) */
|
PyObject *fseq; /* PySequence_Fast(seq) */
|
||||||
Py_ssize_t seqlen; /* len(fseq) -- number of items in sequence */
|
Py_ssize_t seqlen; /* len(fseq) -- number of items in sequence */
|
||||||
|
@ -9133,6 +9133,10 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
|
||||||
Py_ssize_t sz, i, res_offset;
|
Py_ssize_t sz, i, res_offset;
|
||||||
Py_UCS4 maxchar;
|
Py_UCS4 maxchar;
|
||||||
Py_UCS4 item_maxchar;
|
Py_UCS4 item_maxchar;
|
||||||
|
int use_memcpy;
|
||||||
|
unsigned char *res_data = NULL, *sep_data = NULL;
|
||||||
|
PyObject *last_obj;
|
||||||
|
unsigned int kind = 0;
|
||||||
|
|
||||||
fseq = PySequence_Fast(seq, "");
|
fseq = PySequence_Fast(seq, "");
|
||||||
if (fseq == NULL) {
|
if (fseq == NULL) {
|
||||||
|
@ -9153,6 +9157,7 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* If singleton sequence with an exact Unicode, return that. */
|
/* If singleton sequence with an exact Unicode, return that. */
|
||||||
|
last_obj = NULL;
|
||||||
items = PySequence_Fast_ITEMS(fseq);
|
items = PySequence_Fast_ITEMS(fseq);
|
||||||
if (seqlen == 1) {
|
if (seqlen == 1) {
|
||||||
if (PyUnicode_CheckExact(items[0])) {
|
if (PyUnicode_CheckExact(items[0])) {
|
||||||
|
@ -9161,7 +9166,7 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
|
||||||
Py_DECREF(fseq);
|
Py_DECREF(fseq);
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
sep = NULL;
|
seplen = 0;
|
||||||
maxchar = 0;
|
maxchar = 0;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
@ -9171,6 +9176,7 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
|
||||||
sep = PyUnicode_FromOrdinal(' ');
|
sep = PyUnicode_FromOrdinal(' ');
|
||||||
if (!sep)
|
if (!sep)
|
||||||
goto onError;
|
goto onError;
|
||||||
|
seplen = 1;
|
||||||
maxchar = 32;
|
maxchar = 32;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
@ -9190,6 +9196,7 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
|
||||||
above case of a blank separator */
|
above case of a blank separator */
|
||||||
Py_INCREF(sep);
|
Py_INCREF(sep);
|
||||||
}
|
}
|
||||||
|
last_obj = sep;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* There are at least two things to join, or else we have a subclass
|
/* There are at least two things to join, or else we have a subclass
|
||||||
|
@ -9198,6 +9205,11 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
|
||||||
* need (sz), and see whether all argument are strings.
|
* need (sz), and see whether all argument are strings.
|
||||||
*/
|
*/
|
||||||
sz = 0;
|
sz = 0;
|
||||||
|
#ifdef Py_DEBUG
|
||||||
|
use_memcpy = 0;
|
||||||
|
#else
|
||||||
|
use_memcpy = 1;
|
||||||
|
#endif
|
||||||
for (i = 0; i < seqlen; i++) {
|
for (i = 0; i < seqlen; i++) {
|
||||||
const Py_ssize_t old_sz = sz;
|
const Py_ssize_t old_sz = sz;
|
||||||
item = items[i];
|
item = items[i];
|
||||||
|
@ -9220,6 +9232,11 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
|
||||||
"join() result is too long for a Python string");
|
"join() result is too long for a Python string");
|
||||||
goto onError;
|
goto onError;
|
||||||
}
|
}
|
||||||
|
if (use_memcpy && last_obj != NULL) {
|
||||||
|
if (PyUnicode_KIND(last_obj) != PyUnicode_KIND(item))
|
||||||
|
use_memcpy = 0;
|
||||||
|
}
|
||||||
|
last_obj = item;
|
||||||
}
|
}
|
||||||
|
|
||||||
res = PyUnicode_New(sz, maxchar);
|
res = PyUnicode_New(sz, maxchar);
|
||||||
|
@ -9227,20 +9244,50 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
|
||||||
goto onError;
|
goto onError;
|
||||||
|
|
||||||
/* Catenate everything. */
|
/* Catenate everything. */
|
||||||
|
#ifdef Py_DEBUG
|
||||||
|
use_memcpy = 0;
|
||||||
|
#else
|
||||||
|
if (use_memcpy) {
|
||||||
|
res_data = PyUnicode_1BYTE_DATA(res);
|
||||||
|
kind = PyUnicode_KIND(res);
|
||||||
|
if (seplen != 0)
|
||||||
|
sep_data = PyUnicode_1BYTE_DATA(sep);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
for (i = 0, res_offset = 0; i < seqlen; ++i) {
|
for (i = 0, res_offset = 0; i < seqlen; ++i) {
|
||||||
Py_ssize_t itemlen;
|
Py_ssize_t itemlen;
|
||||||
item = items[i];
|
item = items[i];
|
||||||
/* Copy item, and maybe the separator. */
|
/* Copy item, and maybe the separator. */
|
||||||
if (i && seplen != 0) {
|
if (i && seplen != 0) {
|
||||||
|
if (use_memcpy) {
|
||||||
|
Py_MEMCPY(res_data,
|
||||||
|
sep_data,
|
||||||
|
PyUnicode_KIND_SIZE(kind, seplen));
|
||||||
|
res_data += PyUnicode_KIND_SIZE(kind, seplen);
|
||||||
|
}
|
||||||
|
else {
|
||||||
copy_characters(res, res_offset, sep, 0, seplen);
|
copy_characters(res, res_offset, sep, 0, seplen);
|
||||||
res_offset += seplen;
|
res_offset += seplen;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
itemlen = PyUnicode_GET_LENGTH(item);
|
itemlen = PyUnicode_GET_LENGTH(item);
|
||||||
if (itemlen != 0) {
|
if (itemlen != 0) {
|
||||||
|
if (use_memcpy) {
|
||||||
|
Py_MEMCPY(res_data,
|
||||||
|
PyUnicode_DATA(item),
|
||||||
|
PyUnicode_KIND_SIZE(kind, itemlen));
|
||||||
|
res_data += PyUnicode_KIND_SIZE(kind, itemlen);
|
||||||
|
}
|
||||||
|
else {
|
||||||
copy_characters(res, res_offset, item, 0, itemlen);
|
copy_characters(res, res_offset, item, 0, itemlen);
|
||||||
res_offset += itemlen;
|
res_offset += itemlen;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
if (use_memcpy)
|
||||||
|
assert(res_data == PyUnicode_1BYTE_DATA(res)
|
||||||
|
+ PyUnicode_KIND_SIZE(kind, PyUnicode_GET_LENGTH(res)));
|
||||||
|
else
|
||||||
assert(res_offset == PyUnicode_GET_LENGTH(res));
|
assert(res_offset == PyUnicode_GET_LENGTH(res));
|
||||||
|
|
||||||
Py_DECREF(fseq);
|
Py_DECREF(fseq);
|
||||||
|
|
Loading…
Reference in New Issue