Issue #25349: Optimize bytes % args using the new private _PyBytesWriter API

* Thanks to the _PyBytesWriter API, output smaller than 512 bytes are allocated
  on the stack and so avoid calling _PyBytes_Resize(). Because of that, change
  the default buffer size to fmtcnt instead of fmtcnt+100.
* Rely on _PyBytesWriter algorithm to overallocate the buffer instead of using
  a custom code. For example, _PyBytesWriter uses a different overallocation
  factor (25% or 50%) depending on the platform to get best performances.
* Disable overallocation for the last write.
* Replace C loops to fill characters with memset()
* Add also many comments to _PyBytes_Format()
* Remove unused FORMATBUFLEN constant
* Avoid the creation of a temporary bytes object when formatting a floating
  point number (when no custom formatting option is used)
* Fix also reference leaks on error handling
* Use Py_MEMCPY() to copy bytes between two formatters (%)
This commit is contained in:
Victor Stinner 2015-10-09 11:48:06 +02:00
parent bd5f0e8c1c
commit fa7762ec06
2 changed files with 130 additions and 59 deletions

View File

@ -10,6 +10,8 @@ Release date: XXXX-XX-XX
Core and Builtins Core and Builtins
----------------- -----------------
- Issue #25349: Optimize bytes % args using the new private _PyBytesWriter API.
- Issue #24806: Prevent builtin types that are not allowed to be subclassed from - Issue #24806: Prevent builtin types that are not allowed to be subclassed from
being subclassed through multiple inheritance. being subclassed through multiple inheritance.

View File

@ -409,12 +409,15 @@ getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
/* Returns a new reference to a PyBytes object, or NULL on failure. */ /* Returns a new reference to a PyBytes object, or NULL on failure. */
static PyObject * static char*
formatfloat(PyObject *v, int flags, int prec, int type) formatfloat(PyObject *v, int flags, int prec, int type,
PyObject **p_result, _PyBytesWriter *writer, char *str,
Py_ssize_t prealloc)
{ {
char *p; char *p;
PyObject *result; PyObject *result;
double x; double x;
size_t len;
x = PyFloat_AsDouble(v); x = PyFloat_AsDouble(v);
if (x == -1.0 && PyErr_Occurred()) { if (x == -1.0 && PyErr_Occurred()) {
@ -431,9 +434,23 @@ formatfloat(PyObject *v, int flags, int prec, int type)
if (p == NULL) if (p == NULL)
return NULL; return NULL;
result = PyBytes_FromStringAndSize(p, strlen(p));
len = strlen(p);
if (writer != NULL) {
if ((Py_ssize_t)len > prealloc) {
str = _PyBytesWriter_Prepare(writer, str, len - prealloc);
if (str == NULL)
return NULL;
}
Py_MEMCPY(str, p, len);
str += len;
return str;
}
result = PyBytes_FromStringAndSize(p, len);
PyMem_Free(p); PyMem_Free(p);
return result; *p_result = result;
return str;
} }
static PyObject * static PyObject *
@ -557,36 +574,32 @@ format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
return NULL; return NULL;
} }
/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
FORMATBUFLEN is the length of the buffer in which the ints &
chars are formatted. XXX This is a magic number. Each formatting
routine does bounds checking to ensure no overflow, but a better
solution may be to malloc a buffer of appropriate size for each
format. For now, the current solution is sufficient.
*/
#define FORMATBUFLEN (size_t)120
PyObject * PyObject *
_PyBytes_Format(PyObject *format, PyObject *args) _PyBytes_Format(PyObject *format, PyObject *args)
{ {
char *fmt, *res; char *fmt, *res;
Py_ssize_t arglen, argidx; Py_ssize_t arglen, argidx;
Py_ssize_t reslen, rescnt, fmtcnt; Py_ssize_t fmtcnt;
int args_owned = 0; int args_owned = 0;
PyObject *result;
PyObject *dict = NULL; PyObject *dict = NULL;
_PyBytesWriter writer;
if (format == NULL || !PyBytes_Check(format) || args == NULL) { if (format == NULL || !PyBytes_Check(format) || args == NULL) {
PyErr_BadInternalCall(); PyErr_BadInternalCall();
return NULL; return NULL;
} }
fmt = PyBytes_AS_STRING(format); fmt = PyBytes_AS_STRING(format);
fmtcnt = PyBytes_GET_SIZE(format); fmtcnt = PyBytes_GET_SIZE(format);
reslen = rescnt = fmtcnt + 100;
result = PyBytes_FromStringAndSize((char *)NULL, reslen); _PyBytesWriter_Init(&writer);
if (result == NULL)
res = _PyBytesWriter_Alloc(&writer, fmtcnt);
if (res == NULL)
return NULL; return NULL;
res = PyBytes_AsString(result); writer.overallocate = 1;
if (PyTuple_Check(args)) { if (PyTuple_Check(args)) {
arglen = PyTuple_GET_SIZE(args); arglen = PyTuple_GET_SIZE(args);
argidx = 0; argidx = 0;
@ -600,18 +613,25 @@ _PyBytes_Format(PyObject *format, PyObject *args)
!PyByteArray_Check(args)) { !PyByteArray_Check(args)) {
dict = args; dict = args;
} }
while (--fmtcnt >= 0) { while (--fmtcnt >= 0) {
if (*fmt != '%') { if (*fmt != '%') {
if (--rescnt < 0) { Py_ssize_t len;
rescnt = fmtcnt + 100; char *pos;
reslen += rescnt;
if (_PyBytes_Resize(&result, reslen)) pos = strchr(fmt + 1, '%');
return NULL; if (pos != NULL)
res = PyBytes_AS_STRING(result) len = pos - fmt;
+ reslen - rescnt; else {
--rescnt; len = PyBytes_GET_SIZE(format);
len -= (fmt - PyBytes_AS_STRING(format));
} }
*res++ = *fmt++; assert(len != 0);
Py_MEMCPY(res, fmt, len);
res += len;
fmt += len;
fmtcnt -= (len - 1);
} }
else { else {
/* Got a format specifier */ /* Got a format specifier */
@ -626,6 +646,10 @@ _PyBytes_Format(PyObject *format, PyObject *args)
int sign; int sign;
Py_ssize_t len = 0; Py_ssize_t len = 0;
char onechar; /* For byte_converter() */ char onechar; /* For byte_converter() */
Py_ssize_t alloc;
#ifdef Py_DEBUG
char *before;
#endif
fmt++; fmt++;
if (*fmt == '(') { if (*fmt == '(') {
@ -673,6 +697,8 @@ _PyBytes_Format(PyObject *format, PyObject *args)
arglen = -1; arglen = -1;
argidx = -2; argidx = -2;
} }
/* Parse flags. Example: "%+i" => flags=F_SIGN. */
while (--fmtcnt >= 0) { while (--fmtcnt >= 0) {
switch (c = *fmt++) { switch (c = *fmt++) {
case '-': flags |= F_LJUST; continue; case '-': flags |= F_LJUST; continue;
@ -683,6 +709,8 @@ _PyBytes_Format(PyObject *format, PyObject *args)
} }
break; break;
} }
/* Parse width. Example: "%10s" => width=10 */
if (c == '*') { if (c == '*') {
v = getnextarg(args, arglen, &argidx); v = getnextarg(args, arglen, &argidx);
if (v == NULL) if (v == NULL)
@ -717,6 +745,8 @@ _PyBytes_Format(PyObject *format, PyObject *args)
width = width*10 + (c - '0'); width = width*10 + (c - '0');
} }
} }
/* Parse precision. Example: "%.3f" => prec=3 */
if (c == '.') { if (c == '.') {
prec = 0; prec = 0;
if (--fmtcnt >= 0) if (--fmtcnt >= 0)
@ -771,6 +801,12 @@ _PyBytes_Format(PyObject *format, PyObject *args)
if (v == NULL) if (v == NULL)
goto error; goto error;
} }
if (fmtcnt < 0) {
/* last writer: disable writer overallocation */
writer.overallocate = 0;
}
sign = 0; sign = 0;
fill = ' '; fill = ' ';
switch (c) { switch (c) {
@ -778,6 +814,7 @@ _PyBytes_Format(PyObject *format, PyObject *args)
pbuf = "%"; pbuf = "%";
len = 1; len = 1;
break; break;
case 'r': case 'r':
// %r is only for 2/3 code; 3 only code should use %a // %r is only for 2/3 code; 3 only code should use %a
case 'a': case 'a':
@ -790,6 +827,7 @@ _PyBytes_Format(PyObject *format, PyObject *args)
if (prec >= 0 && len > prec) if (prec >= 0 && len > prec)
len = prec; len = prec;
break; break;
case 's': case 's':
// %s is only for 2/3 code; 3 only code should use %b // %s is only for 2/3 code; 3 only code should use %b
case 'b': case 'b':
@ -799,6 +837,7 @@ _PyBytes_Format(PyObject *format, PyObject *args)
if (prec >= 0 && len > prec) if (prec >= 0 && len > prec)
len = prec; len = prec;
break; break;
case 'i': case 'i':
case 'd': case 'd':
case 'u': case 'u':
@ -815,14 +854,24 @@ _PyBytes_Format(PyObject *format, PyObject *args)
if (flags & F_ZERO) if (flags & F_ZERO)
fill = '0'; fill = '0';
break; break;
case 'e': case 'e':
case 'E': case 'E':
case 'f': case 'f':
case 'F': case 'F':
case 'g': case 'g':
case 'G': case 'G':
temp = formatfloat(v, flags, prec, c); if (width == -1 && prec == -1
if (temp == NULL) && !(flags & (F_SIGN | F_BLANK)))
{
/* Fast path */
res = formatfloat(v, flags, prec, c, NULL, &writer, res, 1);
if (res == NULL)
goto error;
continue;
}
if (!formatfloat(v, flags, prec, c, &temp, NULL, res, 1))
goto error; goto error;
pbuf = PyBytes_AS_STRING(temp); pbuf = PyBytes_AS_STRING(temp);
len = PyBytes_GET_SIZE(temp); len = PyBytes_GET_SIZE(temp);
@ -830,12 +879,14 @@ _PyBytes_Format(PyObject *format, PyObject *args)
if (flags & F_ZERO) if (flags & F_ZERO)
fill = '0'; fill = '0';
break; break;
case 'c': case 'c':
pbuf = &onechar; pbuf = &onechar;
len = byte_converter(v, &onechar); len = byte_converter(v, &onechar);
if (!len) if (!len)
goto error; goto error;
break; break;
default: default:
PyErr_Format(PyExc_ValueError, PyErr_Format(PyExc_ValueError,
"unsupported format character '%c' (0x%x) " "unsupported format character '%c' (0x%x) "
@ -845,6 +896,7 @@ _PyBytes_Format(PyObject *format, PyObject *args)
PyBytes_AsString(format))); PyBytes_AsString(format)));
goto error; goto error;
} }
if (sign) { if (sign) {
if (*pbuf == '-' || *pbuf == '+') { if (*pbuf == '-' || *pbuf == '+') {
sign = *pbuf++; sign = *pbuf++;
@ -859,29 +911,30 @@ _PyBytes_Format(PyObject *format, PyObject *args)
} }
if (width < len) if (width < len)
width = len; width = len;
if (rescnt - (sign != 0) < width) {
reslen -= rescnt; alloc = width;
rescnt = width + fmtcnt + 100; if (sign != 0 && len == width)
reslen += rescnt; alloc++;
if (reslen < 0) { if (alloc > 1) {
Py_DECREF(result); res = _PyBytesWriter_Prepare(&writer, res, alloc - 1);
Py_XDECREF(temp); if (res == NULL)
return PyErr_NoMemory(); goto error;
}
if (_PyBytes_Resize(&result, reslen)) {
Py_XDECREF(temp);
return NULL;
}
res = PyBytes_AS_STRING(result)
+ reslen - rescnt;
} }
#ifdef Py_DEBUG
before = res;
#endif
/* Write the sign if needed */
if (sign) { if (sign) {
if (fill != ' ') if (fill != ' ')
*res++ = sign; *res++ = sign;
rescnt--;
if (width > len) if (width > len)
width--; width--;
} }
/* Write the numeric prefix for "x", "X" and "o" formats
if the alternate form is used.
For example, write "0x" for the "%#x" format. */
if ((flags & F_ALT) && (c == 'x' || c == 'X')) { if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
assert(pbuf[0] == '0'); assert(pbuf[0] == '0');
assert(pbuf[1] == c); assert(pbuf[1] == c);
@ -889,18 +942,21 @@ _PyBytes_Format(PyObject *format, PyObject *args)
*res++ = *pbuf++; *res++ = *pbuf++;
*res++ = *pbuf++; *res++ = *pbuf++;
} }
rescnt -= 2;
width -= 2; width -= 2;
if (width < 0) if (width < 0)
width = 0; width = 0;
len -= 2; len -= 2;
} }
/* Pad left with the fill character if needed */
if (width > len && !(flags & F_LJUST)) { if (width > len && !(flags & F_LJUST)) {
do { memset(res, fill, width - len);
--rescnt; res += (width - len);
*res++ = fill; width = len;
} while (--width > len);
} }
/* If padding with spaces: write sign if needed and/or numeric
prefix if the alternate form is used */
if (fill == ' ') { if (fill == ' ') {
if (sign) if (sign)
*res++ = sign; *res++ = sign;
@ -912,13 +968,17 @@ _PyBytes_Format(PyObject *format, PyObject *args)
*res++ = *pbuf++; *res++ = *pbuf++;
} }
} }
/* Copy bytes */
Py_MEMCPY(res, pbuf, len); Py_MEMCPY(res, pbuf, len);
res += len; res += len;
rescnt -= len;
while (--width >= len) { /* Pad right with the fill character if needed */
--rescnt; if (width > len) {
*res++ = ' '; memset(res, ' ', width - len);
res += (width - len);
} }
if (dict && (argidx < arglen) && c != '%') { if (dict && (argidx < arglen) && c != '%') {
PyErr_SetString(PyExc_TypeError, PyErr_SetString(PyExc_TypeError,
"not all arguments converted during bytes formatting"); "not all arguments converted during bytes formatting");
@ -926,22 +986,31 @@ _PyBytes_Format(PyObject *format, PyObject *args)
goto error; goto error;
} }
Py_XDECREF(temp); Py_XDECREF(temp);
#ifdef Py_DEBUG
/* check that we computed the exact size for this write */
assert((res - before) == alloc);
#endif
} /* '%' */ } /* '%' */
/* If overallocation was disabled, ensure that it was the last
write. Otherwise, we missed an optimization */
assert(writer.overallocate || fmtcnt < 0);
} /* until end */ } /* until end */
if (argidx < arglen && !dict) { if (argidx < arglen && !dict) {
PyErr_SetString(PyExc_TypeError, PyErr_SetString(PyExc_TypeError,
"not all arguments converted during bytes formatting"); "not all arguments converted during bytes formatting");
goto error; goto error;
} }
if (args_owned) { if (args_owned) {
Py_DECREF(args); Py_DECREF(args);
} }
if (_PyBytes_Resize(&result, reslen - rescnt)) return _PyBytesWriter_Finish(&writer, res);
return NULL;
return result;
error: error:
Py_DECREF(result); _PyBytesWriter_Dealloc(&writer);
if (args_owned) { if (args_owned) {
Py_DECREF(args); Py_DECREF(args);
} }