Optimize bytearray % args
Issue #25399: Don't create temporary bytes objects: modify _PyBytes_Format() to create work directly on bytearray objects. * Rename _PyBytes_Format() to _PyBytes_FormatEx() just in case if something outside CPython uses it * _PyBytes_FormatEx() now uses (char*, Py_ssize_t) for the input string, so bytearray_format() doesn't need tot create a temporary input bytes object * Add use_bytearray parameter to _PyBytes_FormatEx() which is passed to _PyBytesWriter, to create a bytearray buffer instead of a bytes buffer Most formatting operations are now between 2.5 and 5 times faster.
This commit is contained in:
parent
661aaccf9d
commit
772b2b09f2
|
@ -62,7 +62,11 @@ PyAPI_FUNC(void) PyBytes_Concat(PyObject **, PyObject *);
|
||||||
PyAPI_FUNC(void) PyBytes_ConcatAndDel(PyObject **, PyObject *);
|
PyAPI_FUNC(void) PyBytes_ConcatAndDel(PyObject **, PyObject *);
|
||||||
#ifndef Py_LIMITED_API
|
#ifndef Py_LIMITED_API
|
||||||
PyAPI_FUNC(int) _PyBytes_Resize(PyObject **, Py_ssize_t);
|
PyAPI_FUNC(int) _PyBytes_Resize(PyObject **, Py_ssize_t);
|
||||||
PyAPI_FUNC(PyObject *) _PyBytes_Format(PyObject *, PyObject *);
|
PyAPI_FUNC(PyObject*) _PyBytes_FormatEx(
|
||||||
|
const char *format,
|
||||||
|
Py_ssize_t format_len,
|
||||||
|
PyObject *args,
|
||||||
|
int use_bytearray);
|
||||||
#endif
|
#endif
|
||||||
PyAPI_FUNC(PyObject *) PyBytes_DecodeEscape(const char *, Py_ssize_t,
|
PyAPI_FUNC(PyObject *) PyBytes_DecodeEscape(const char *, Py_ssize_t,
|
||||||
const char *, Py_ssize_t,
|
const char *, Py_ssize_t,
|
||||||
|
|
|
@ -282,26 +282,14 @@ PyByteArray_Concat(PyObject *a, PyObject *b)
|
||||||
static PyObject *
|
static PyObject *
|
||||||
bytearray_format(PyByteArrayObject *self, PyObject *args)
|
bytearray_format(PyByteArrayObject *self, PyObject *args)
|
||||||
{
|
{
|
||||||
PyObject *bytes_in, *bytes_out, *res;
|
if (self == NULL || !PyByteArray_Check(self)) {
|
||||||
char *bytestring;
|
|
||||||
|
|
||||||
if (self == NULL || !PyByteArray_Check(self) || args == NULL) {
|
|
||||||
PyErr_BadInternalCall();
|
PyErr_BadInternalCall();
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
bytestring = PyByteArray_AS_STRING(self);
|
|
||||||
bytes_in = PyBytes_FromString(bytestring);
|
return _PyBytes_FormatEx(PyByteArray_AS_STRING(self),
|
||||||
if (bytes_in == NULL)
|
PyByteArray_GET_SIZE(self),
|
||||||
return NULL;
|
args, 1);
|
||||||
bytes_out = _PyBytes_Format(bytes_in, args);
|
|
||||||
Py_DECREF(bytes_in);
|
|
||||||
if (bytes_out == NULL)
|
|
||||||
return NULL;
|
|
||||||
res = PyByteArray_FromObject(bytes_out);
|
|
||||||
Py_DECREF(bytes_out);
|
|
||||||
if (res == NULL)
|
|
||||||
return NULL;
|
|
||||||
return res;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Functions stuffed into the type object */
|
/* Functions stuffed into the type object */
|
||||||
|
|
|
@ -568,27 +568,31 @@ format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
|
||||||
/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
|
/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
|
||||||
|
|
||||||
PyObject *
|
PyObject *
|
||||||
_PyBytes_Format(PyObject *format, PyObject *args)
|
_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
|
||||||
|
PyObject *args, int use_bytearray)
|
||||||
{
|
{
|
||||||
char *fmt, *res;
|
const char *fmt;
|
||||||
|
char *res;
|
||||||
Py_ssize_t arglen, argidx;
|
Py_ssize_t arglen, argidx;
|
||||||
Py_ssize_t fmtcnt;
|
Py_ssize_t fmtcnt;
|
||||||
int args_owned = 0;
|
int args_owned = 0;
|
||||||
PyObject *dict = NULL;
|
PyObject *dict = NULL;
|
||||||
_PyBytesWriter writer;
|
_PyBytesWriter writer;
|
||||||
|
|
||||||
if (format == NULL || !PyBytes_Check(format) || args == NULL) {
|
if (args == NULL) {
|
||||||
PyErr_BadInternalCall();
|
PyErr_BadInternalCall();
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
fmt = PyBytes_AS_STRING(format);
|
fmt = format;
|
||||||
fmtcnt = PyBytes_GET_SIZE(format);
|
fmtcnt = format_len;
|
||||||
|
|
||||||
_PyBytesWriter_Init(&writer);
|
_PyBytesWriter_Init(&writer);
|
||||||
|
writer.use_bytearray = use_bytearray;
|
||||||
|
|
||||||
res = _PyBytesWriter_Alloc(&writer, fmtcnt);
|
res = _PyBytesWriter_Alloc(&writer, fmtcnt);
|
||||||
if (res == NULL)
|
if (res == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
if (!use_bytearray)
|
||||||
writer.overallocate = 1;
|
writer.overallocate = 1;
|
||||||
|
|
||||||
if (PyTuple_Check(args)) {
|
if (PyTuple_Check(args)) {
|
||||||
|
@ -613,10 +617,8 @@ _PyBytes_Format(PyObject *format, PyObject *args)
|
||||||
pos = strchr(fmt + 1, '%');
|
pos = strchr(fmt + 1, '%');
|
||||||
if (pos != NULL)
|
if (pos != NULL)
|
||||||
len = pos - fmt;
|
len = pos - fmt;
|
||||||
else {
|
else
|
||||||
len = PyBytes_GET_SIZE(format);
|
len = format_len - (fmt - format);
|
||||||
len -= (fmt - PyBytes_AS_STRING(format));
|
|
||||||
}
|
|
||||||
assert(len != 0);
|
assert(len != 0);
|
||||||
|
|
||||||
Py_MEMCPY(res, fmt, len);
|
Py_MEMCPY(res, fmt, len);
|
||||||
|
@ -644,7 +646,7 @@ _PyBytes_Format(PyObject *format, PyObject *args)
|
||||||
|
|
||||||
fmt++;
|
fmt++;
|
||||||
if (*fmt == '(') {
|
if (*fmt == '(') {
|
||||||
char *keystart;
|
const char *keystart;
|
||||||
Py_ssize_t keylen;
|
Py_ssize_t keylen;
|
||||||
PyObject *key;
|
PyObject *key;
|
||||||
int pcount = 1;
|
int pcount = 1;
|
||||||
|
@ -924,8 +926,7 @@ _PyBytes_Format(PyObject *format, PyObject *args)
|
||||||
"unsupported format character '%c' (0x%x) "
|
"unsupported format character '%c' (0x%x) "
|
||||||
"at index %zd",
|
"at index %zd",
|
||||||
c, c,
|
c, c,
|
||||||
(Py_ssize_t)(fmt - 1 -
|
(Py_ssize_t)(fmt - 1 - format));
|
||||||
PyBytes_AsString(format)));
|
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1028,7 +1029,7 @@ _PyBytes_Format(PyObject *format, PyObject *args)
|
||||||
|
|
||||||
/* If overallocation was disabled, ensure that it was the last
|
/* If overallocation was disabled, ensure that it was the last
|
||||||
write. Otherwise, we missed an optimization */
|
write. Otherwise, we missed an optimization */
|
||||||
assert(writer.overallocate || fmtcnt < 0);
|
assert(writer.overallocate || fmtcnt < 0 || use_bytearray);
|
||||||
} /* until end */
|
} /* until end */
|
||||||
|
|
||||||
if (argidx < arglen && !dict) {
|
if (argidx < arglen && !dict) {
|
||||||
|
@ -3233,11 +3234,15 @@ bytes_methods[] = {
|
||||||
};
|
};
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
bytes_mod(PyObject *v, PyObject *w)
|
bytes_mod(PyObject *self, PyObject *args)
|
||||||
{
|
{
|
||||||
if (!PyBytes_Check(v))
|
if (self == NULL || !PyBytes_Check(self)) {
|
||||||
Py_RETURN_NOTIMPLEMENTED;
|
PyErr_BadInternalCall();
|
||||||
return _PyBytes_Format(v, w);
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
|
||||||
|
args, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyNumberMethods bytes_as_number = {
|
static PyNumberMethods bytes_as_number = {
|
||||||
|
|
Loading…
Reference in New Issue