Optimize bytearray % args

Issue #25399: Don't create temporary bytes objects: modify _PyBytes_Format() to
create work directly on bytearray objects.

* Rename _PyBytes_Format() to _PyBytes_FormatEx() just in case if something
  outside CPython uses it
* _PyBytes_FormatEx() now uses (char*, Py_ssize_t) for the input string, so
  bytearray_format() doesn't need tot create a temporary input bytes object
* Add use_bytearray parameter to _PyBytes_FormatEx() which is passed to
  _PyBytesWriter, to create a bytearray buffer instead of a bytes buffer

Most formatting operations are now between 2.5 and 5 times faster.
This commit is contained in:
Victor Stinner 2015-10-14 09:56:53 +02:00
parent 661aaccf9d
commit 772b2b09f2
3 changed files with 33 additions and 36 deletions

View File

@ -62,7 +62,11 @@ PyAPI_FUNC(void) PyBytes_Concat(PyObject **, PyObject *);
PyAPI_FUNC(void) PyBytes_ConcatAndDel(PyObject **, PyObject *); PyAPI_FUNC(void) PyBytes_ConcatAndDel(PyObject **, PyObject *);
#ifndef Py_LIMITED_API #ifndef Py_LIMITED_API
PyAPI_FUNC(int) _PyBytes_Resize(PyObject **, Py_ssize_t); PyAPI_FUNC(int) _PyBytes_Resize(PyObject **, Py_ssize_t);
PyAPI_FUNC(PyObject *) _PyBytes_Format(PyObject *, PyObject *); PyAPI_FUNC(PyObject*) _PyBytes_FormatEx(
const char *format,
Py_ssize_t format_len,
PyObject *args,
int use_bytearray);
#endif #endif
PyAPI_FUNC(PyObject *) PyBytes_DecodeEscape(const char *, Py_ssize_t, PyAPI_FUNC(PyObject *) PyBytes_DecodeEscape(const char *, Py_ssize_t,
const char *, Py_ssize_t, const char *, Py_ssize_t,

View File

@ -282,26 +282,14 @@ PyByteArray_Concat(PyObject *a, PyObject *b)
static PyObject * static PyObject *
bytearray_format(PyByteArrayObject *self, PyObject *args) bytearray_format(PyByteArrayObject *self, PyObject *args)
{ {
PyObject *bytes_in, *bytes_out, *res; if (self == NULL || !PyByteArray_Check(self)) {
char *bytestring;
if (self == NULL || !PyByteArray_Check(self) || args == NULL) {
PyErr_BadInternalCall(); PyErr_BadInternalCall();
return NULL; return NULL;
} }
bytestring = PyByteArray_AS_STRING(self);
bytes_in = PyBytes_FromString(bytestring); return _PyBytes_FormatEx(PyByteArray_AS_STRING(self),
if (bytes_in == NULL) PyByteArray_GET_SIZE(self),
return NULL; args, 1);
bytes_out = _PyBytes_Format(bytes_in, args);
Py_DECREF(bytes_in);
if (bytes_out == NULL)
return NULL;
res = PyByteArray_FromObject(bytes_out);
Py_DECREF(bytes_out);
if (res == NULL)
return NULL;
return res;
} }
/* Functions stuffed into the type object */ /* Functions stuffed into the type object */

View File

@ -568,27 +568,31 @@ format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */ /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
PyObject * PyObject *
_PyBytes_Format(PyObject *format, PyObject *args) _PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
PyObject *args, int use_bytearray)
{ {
char *fmt, *res; const char *fmt;
char *res;
Py_ssize_t arglen, argidx; Py_ssize_t arglen, argidx;
Py_ssize_t fmtcnt; Py_ssize_t fmtcnt;
int args_owned = 0; int args_owned = 0;
PyObject *dict = NULL; PyObject *dict = NULL;
_PyBytesWriter writer; _PyBytesWriter writer;
if (format == NULL || !PyBytes_Check(format) || args == NULL) { if (args == NULL) {
PyErr_BadInternalCall(); PyErr_BadInternalCall();
return NULL; return NULL;
} }
fmt = PyBytes_AS_STRING(format); fmt = format;
fmtcnt = PyBytes_GET_SIZE(format); fmtcnt = format_len;
_PyBytesWriter_Init(&writer); _PyBytesWriter_Init(&writer);
writer.use_bytearray = use_bytearray;
res = _PyBytesWriter_Alloc(&writer, fmtcnt); res = _PyBytesWriter_Alloc(&writer, fmtcnt);
if (res == NULL) if (res == NULL)
return NULL; return NULL;
if (!use_bytearray)
writer.overallocate = 1; writer.overallocate = 1;
if (PyTuple_Check(args)) { if (PyTuple_Check(args)) {
@ -613,10 +617,8 @@ _PyBytes_Format(PyObject *format, PyObject *args)
pos = strchr(fmt + 1, '%'); pos = strchr(fmt + 1, '%');
if (pos != NULL) if (pos != NULL)
len = pos - fmt; len = pos - fmt;
else { else
len = PyBytes_GET_SIZE(format); len = format_len - (fmt - format);
len -= (fmt - PyBytes_AS_STRING(format));
}
assert(len != 0); assert(len != 0);
Py_MEMCPY(res, fmt, len); Py_MEMCPY(res, fmt, len);
@ -644,7 +646,7 @@ _PyBytes_Format(PyObject *format, PyObject *args)
fmt++; fmt++;
if (*fmt == '(') { if (*fmt == '(') {
char *keystart; const char *keystart;
Py_ssize_t keylen; Py_ssize_t keylen;
PyObject *key; PyObject *key;
int pcount = 1; int pcount = 1;
@ -924,8 +926,7 @@ _PyBytes_Format(PyObject *format, PyObject *args)
"unsupported format character '%c' (0x%x) " "unsupported format character '%c' (0x%x) "
"at index %zd", "at index %zd",
c, c, c, c,
(Py_ssize_t)(fmt - 1 - (Py_ssize_t)(fmt - 1 - format));
PyBytes_AsString(format)));
goto error; goto error;
} }
@ -1028,7 +1029,7 @@ _PyBytes_Format(PyObject *format, PyObject *args)
/* If overallocation was disabled, ensure that it was the last /* If overallocation was disabled, ensure that it was the last
write. Otherwise, we missed an optimization */ write. Otherwise, we missed an optimization */
assert(writer.overallocate || fmtcnt < 0); assert(writer.overallocate || fmtcnt < 0 || use_bytearray);
} /* until end */ } /* until end */
if (argidx < arglen && !dict) { if (argidx < arglen && !dict) {
@ -3233,11 +3234,15 @@ bytes_methods[] = {
}; };
static PyObject * static PyObject *
bytes_mod(PyObject *v, PyObject *w) bytes_mod(PyObject *self, PyObject *args)
{ {
if (!PyBytes_Check(v)) if (self == NULL || !PyBytes_Check(self)) {
Py_RETURN_NOTIMPLEMENTED; PyErr_BadInternalCall();
return _PyBytes_Format(v, w); return NULL;
}
return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
args, 0);
} }
static PyNumberMethods bytes_as_number = { static PyNumberMethods bytes_as_number = {