PyUnicode_Join() calls directly memcpy() if all strings are of the same kind

This commit is contained in:
Victor Stinner 2011-10-07 17:02:31 +02:00
parent 756b169c5a
commit dd07732af5
1 changed files with 54 additions and 7 deletions

View File

@ -9124,7 +9124,7 @@ PyObject *
PyUnicode_Join(PyObject *separator, PyObject *seq) PyUnicode_Join(PyObject *separator, PyObject *seq)
{ {
PyObject *sep = NULL; PyObject *sep = NULL;
Py_ssize_t seplen = 1; Py_ssize_t seplen;
PyObject *res = NULL; /* the result */ PyObject *res = NULL; /* the result */
PyObject *fseq; /* PySequence_Fast(seq) */ PyObject *fseq; /* PySequence_Fast(seq) */
Py_ssize_t seqlen; /* len(fseq) -- number of items in sequence */ Py_ssize_t seqlen; /* len(fseq) -- number of items in sequence */
@ -9133,6 +9133,10 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
Py_ssize_t sz, i, res_offset; Py_ssize_t sz, i, res_offset;
Py_UCS4 maxchar; Py_UCS4 maxchar;
Py_UCS4 item_maxchar; Py_UCS4 item_maxchar;
int use_memcpy;
unsigned char *res_data = NULL, *sep_data = NULL;
PyObject *last_obj;
unsigned int kind = 0;
fseq = PySequence_Fast(seq, ""); fseq = PySequence_Fast(seq, "");
if (fseq == NULL) { if (fseq == NULL) {
@ -9153,6 +9157,7 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
} }
/* If singleton sequence with an exact Unicode, return that. */ /* If singleton sequence with an exact Unicode, return that. */
last_obj = NULL;
items = PySequence_Fast_ITEMS(fseq); items = PySequence_Fast_ITEMS(fseq);
if (seqlen == 1) { if (seqlen == 1) {
if (PyUnicode_CheckExact(items[0])) { if (PyUnicode_CheckExact(items[0])) {
@ -9161,7 +9166,7 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
Py_DECREF(fseq); Py_DECREF(fseq);
return res; return res;
} }
sep = NULL; seplen = 0;
maxchar = 0; maxchar = 0;
} }
else { else {
@ -9171,6 +9176,7 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
sep = PyUnicode_FromOrdinal(' '); sep = PyUnicode_FromOrdinal(' ');
if (!sep) if (!sep)
goto onError; goto onError;
seplen = 1;
maxchar = 32; maxchar = 32;
} }
else { else {
@ -9190,6 +9196,7 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
above case of a blank separator */ above case of a blank separator */
Py_INCREF(sep); Py_INCREF(sep);
} }
last_obj = sep;
} }
/* There are at least two things to join, or else we have a subclass /* There are at least two things to join, or else we have a subclass
@ -9198,6 +9205,11 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
* need (sz), and see whether all argument are strings. * need (sz), and see whether all argument are strings.
*/ */
sz = 0; sz = 0;
#ifdef Py_DEBUG
use_memcpy = 0;
#else
use_memcpy = 1;
#endif
for (i = 0; i < seqlen; i++) { for (i = 0; i < seqlen; i++) {
const Py_ssize_t old_sz = sz; const Py_ssize_t old_sz = sz;
item = items[i]; item = items[i];
@ -9220,6 +9232,11 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
"join() result is too long for a Python string"); "join() result is too long for a Python string");
goto onError; goto onError;
} }
if (use_memcpy && last_obj != NULL) {
if (PyUnicode_KIND(last_obj) != PyUnicode_KIND(item))
use_memcpy = 0;
}
last_obj = item;
} }
res = PyUnicode_New(sz, maxchar); res = PyUnicode_New(sz, maxchar);
@ -9227,20 +9244,50 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
goto onError; goto onError;
/* Catenate everything. */ /* Catenate everything. */
#ifdef Py_DEBUG
use_memcpy = 0;
#else
if (use_memcpy) {
res_data = PyUnicode_1BYTE_DATA(res);
kind = PyUnicode_KIND(res);
if (seplen != 0)
sep_data = PyUnicode_1BYTE_DATA(sep);
}
#endif
for (i = 0, res_offset = 0; i < seqlen; ++i) { for (i = 0, res_offset = 0; i < seqlen; ++i) {
Py_ssize_t itemlen; Py_ssize_t itemlen;
item = items[i]; item = items[i];
/* Copy item, and maybe the separator. */ /* Copy item, and maybe the separator. */
if (i && seplen != 0) { if (i && seplen != 0) {
if (use_memcpy) {
Py_MEMCPY(res_data,
sep_data,
PyUnicode_KIND_SIZE(kind, seplen));
res_data += PyUnicode_KIND_SIZE(kind, seplen);
}
else {
copy_characters(res, res_offset, sep, 0, seplen); copy_characters(res, res_offset, sep, 0, seplen);
res_offset += seplen; res_offset += seplen;
} }
}
itemlen = PyUnicode_GET_LENGTH(item); itemlen = PyUnicode_GET_LENGTH(item);
if (itemlen != 0) { if (itemlen != 0) {
if (use_memcpy) {
Py_MEMCPY(res_data,
PyUnicode_DATA(item),
PyUnicode_KIND_SIZE(kind, itemlen));
res_data += PyUnicode_KIND_SIZE(kind, itemlen);
}
else {
copy_characters(res, res_offset, item, 0, itemlen); copy_characters(res, res_offset, item, 0, itemlen);
res_offset += itemlen; res_offset += itemlen;
} }
} }
}
if (use_memcpy)
assert(res_data == PyUnicode_1BYTE_DATA(res)
+ PyUnicode_KIND_SIZE(kind, PyUnicode_GET_LENGTH(res)));
else
assert(res_offset == PyUnicode_GET_LENGTH(res)); assert(res_offset == PyUnicode_GET_LENGTH(res));
Py_DECREF(fseq); Py_DECREF(fseq);