mirror of https://github.com/python/cpython
163 lines
4.6 KiB
C
163 lines
4.6 KiB
C
/* stringlib: bytes joining implementation */
|
|
|
|
#if STRINGLIB_IS_UNICODE
|
|
#error join.h only compatible with byte-wise strings
|
|
#endif
|
|
|
|
Py_LOCAL_INLINE(PyObject *)
|
|
STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable)
|
|
{
|
|
const char *sepstr = STRINGLIB_STR(sep);
|
|
Py_ssize_t seplen = STRINGLIB_LEN(sep);
|
|
PyObject *res = NULL;
|
|
char *p;
|
|
Py_ssize_t seqlen = 0;
|
|
Py_ssize_t sz = 0;
|
|
Py_ssize_t i, nbufs;
|
|
PyObject *seq, *item;
|
|
Py_buffer *buffers = NULL;
|
|
#define NB_STATIC_BUFFERS 10
|
|
Py_buffer static_buffers[NB_STATIC_BUFFERS];
|
|
#define GIL_THRESHOLD 1048576
|
|
int drop_gil = 1;
|
|
PyThreadState *save = NULL;
|
|
|
|
seq = PySequence_Fast(iterable, "can only join an iterable");
|
|
if (seq == NULL) {
|
|
return NULL;
|
|
}
|
|
|
|
seqlen = PySequence_Fast_GET_SIZE(seq);
|
|
if (seqlen == 0) {
|
|
Py_DECREF(seq);
|
|
return STRINGLIB_NEW(NULL, 0);
|
|
}
|
|
#if !STRINGLIB_MUTABLE
|
|
if (seqlen == 1) {
|
|
item = PySequence_Fast_GET_ITEM(seq, 0);
|
|
if (STRINGLIB_CHECK_EXACT(item)) {
|
|
Py_INCREF(item);
|
|
Py_DECREF(seq);
|
|
return item;
|
|
}
|
|
}
|
|
#endif
|
|
if (seqlen > NB_STATIC_BUFFERS) {
|
|
buffers = PyMem_NEW(Py_buffer, seqlen);
|
|
if (buffers == NULL) {
|
|
Py_DECREF(seq);
|
|
PyErr_NoMemory();
|
|
return NULL;
|
|
}
|
|
}
|
|
else {
|
|
buffers = static_buffers;
|
|
}
|
|
|
|
/* Here is the general case. Do a pre-pass to figure out the total
|
|
* amount of space we'll need (sz), and see whether all arguments are
|
|
* bytes-like.
|
|
*/
|
|
for (i = 0, nbufs = 0; i < seqlen; i++) {
|
|
Py_ssize_t itemlen;
|
|
item = PySequence_Fast_GET_ITEM(seq, i);
|
|
if (PyBytes_CheckExact(item)) {
|
|
/* Fast path. */
|
|
buffers[i].obj = Py_NewRef(item);
|
|
buffers[i].buf = PyBytes_AS_STRING(item);
|
|
buffers[i].len = PyBytes_GET_SIZE(item);
|
|
}
|
|
else {
|
|
if (PyObject_GetBuffer(item, &buffers[i], PyBUF_SIMPLE) != 0) {
|
|
PyErr_Format(PyExc_TypeError,
|
|
"sequence item %zd: expected a bytes-like object, "
|
|
"%.80s found",
|
|
i, Py_TYPE(item)->tp_name);
|
|
goto error;
|
|
}
|
|
/* If the backing objects are mutable, then dropping the GIL
|
|
* opens up race conditions where another thread tries to modify
|
|
* the object which we hold a buffer on it. Such code has data
|
|
* races anyway, but this is a conservative approach that avoids
|
|
* changing the behaviour of that data race.
|
|
*/
|
|
drop_gil = 0;
|
|
}
|
|
nbufs = i + 1; /* for error cleanup */
|
|
itemlen = buffers[i].len;
|
|
if (itemlen > PY_SSIZE_T_MAX - sz) {
|
|
PyErr_SetString(PyExc_OverflowError,
|
|
"join() result is too long");
|
|
goto error;
|
|
}
|
|
sz += itemlen;
|
|
if (i != 0) {
|
|
if (seplen > PY_SSIZE_T_MAX - sz) {
|
|
PyErr_SetString(PyExc_OverflowError,
|
|
"join() result is too long");
|
|
goto error;
|
|
}
|
|
sz += seplen;
|
|
}
|
|
if (seqlen != PySequence_Fast_GET_SIZE(seq)) {
|
|
PyErr_SetString(PyExc_RuntimeError,
|
|
"sequence changed size during iteration");
|
|
goto error;
|
|
}
|
|
}
|
|
|
|
/* Allocate result space. */
|
|
res = STRINGLIB_NEW(NULL, sz);
|
|
if (res == NULL)
|
|
goto error;
|
|
|
|
/* Catenate everything. */
|
|
p = STRINGLIB_STR(res);
|
|
if (sz < GIL_THRESHOLD) {
|
|
drop_gil = 0; /* Benefits are likely outweighed by the overheads */
|
|
}
|
|
if (drop_gil) {
|
|
save = PyEval_SaveThread();
|
|
}
|
|
if (!seplen) {
|
|
/* fast path */
|
|
for (i = 0; i < nbufs; i++) {
|
|
Py_ssize_t n = buffers[i].len;
|
|
char *q = buffers[i].buf;
|
|
memcpy(p, q, n);
|
|
p += n;
|
|
}
|
|
}
|
|
else {
|
|
for (i = 0; i < nbufs; i++) {
|
|
Py_ssize_t n;
|
|
char *q;
|
|
if (i) {
|
|
memcpy(p, sepstr, seplen);
|
|
p += seplen;
|
|
}
|
|
n = buffers[i].len;
|
|
q = buffers[i].buf;
|
|
memcpy(p, q, n);
|
|
p += n;
|
|
}
|
|
}
|
|
if (drop_gil) {
|
|
PyEval_RestoreThread(save);
|
|
}
|
|
goto done;
|
|
|
|
error:
|
|
res = NULL;
|
|
done:
|
|
Py_DECREF(seq);
|
|
for (i = 0; i < nbufs; i++)
|
|
PyBuffer_Release(&buffers[i]);
|
|
if (buffers != static_buffers)
|
|
PyMem_Free(buffers);
|
|
return res;
|
|
}
|
|
|
|
#undef NB_STATIC_BUFFERS
|
|
#undef GIL_THRESHOLD
|