Issue #25349: Optimize bytes % int

Optimize bytes.__mod__(args) for integere formats: %d (%i, %u), %o, %x and %X.
_PyBytesWriter is now used to format directly the integer into the writer
buffer, instead of using a temporary bytes object.

Formatting is between 30% and 50% faster on a microbenchmark.
This commit is contained in:
Victor Stinner 2015-10-09 22:43:24 +02:00
parent aecf63ee3a
commit be75b8cf23
3 changed files with 136 additions and 27 deletions

View File

@ -182,6 +182,13 @@ PyAPI_FUNC(int) _PyLong_FormatWriter(
int base, int base,
int alternate); int alternate);
PyAPI_FUNC(char*) _PyLong_FormatBytesWriter(
_PyBytesWriter *writer,
char *str,
PyObject *obj,
int base,
int alternate);
/* Format the object based on the format_spec, as defined in PEP 3101 /* Format the object based on the format_spec, as defined in PEP 3101
(Advanced String Formatting). */ (Advanced String Formatting). */
PyAPI_FUNC(int) _PyLong_FormatAdvancedWriter( PyAPI_FUNC(int) _PyLong_FormatAdvancedWriter(

View File

@ -841,6 +841,42 @@ _PyBytes_Format(PyObject *format, PyObject *args)
case 'o': case 'o':
case 'x': case 'x':
case 'X': case 'X':
if (PyLong_CheckExact(v)
&& width == -1 && prec == -1
&& !(flags & (F_SIGN | F_BLANK))
&& c != 'X')
{
/* Fast path */
int alternate = flags & F_ALT;
int base;
switch(c)
{
default:
assert(0 && "'type' not in [diuoxX]");
case 'd':
case 'i':
case 'u':
base = 10;
break;
case 'o':
base = 8;
break;
case 'x':
case 'X':
base = 16;
break;
}
/* Fast path */
writer.min_size -= 2; /* size preallocated by "%d" */
res = _PyLong_FormatBytesWriter(&writer, res,
v, base, alternate);
if (res == NULL)
goto error;
continue;
}
temp = formatlong(v, flags, prec, c); temp = formatlong(v, flags, prec, c);
if (!temp) if (!temp)
goto error; goto error;

View File

@ -1582,7 +1582,9 @@ divrem1(PyLongObject *a, digit n, digit *prem)
static int static int
long_to_decimal_string_internal(PyObject *aa, long_to_decimal_string_internal(PyObject *aa,
PyObject **p_output, PyObject **p_output,
_PyUnicodeWriter *writer) _PyUnicodeWriter *writer,
_PyBytesWriter *bytes_writer,
char **bytes_str)
{ {
PyLongObject *scratch, *a; PyLongObject *scratch, *a;
PyObject *str; PyObject *str;
@ -1664,6 +1666,13 @@ long_to_decimal_string_internal(PyObject *aa,
kind = writer->kind; kind = writer->kind;
str = NULL; str = NULL;
} }
else if (bytes_writer) {
*bytes_str = _PyBytesWriter_Prepare(bytes_writer, *bytes_str, strlen);
if (*bytes_str == NULL) {
Py_DECREF(scratch);
return -1;
}
}
else { else {
str = PyUnicode_New(strlen, '9'); str = PyUnicode_New(strlen, '9');
if (str == NULL) { if (str == NULL) {
@ -1673,13 +1682,8 @@ long_to_decimal_string_internal(PyObject *aa,
kind = PyUnicode_KIND(str); kind = PyUnicode_KIND(str);
} }
#define WRITE_DIGITS(TYPE) \ #define WRITE_DIGITS(p) \
do { \ do { \
if (writer) \
p = (TYPE*)PyUnicode_DATA(writer->buffer) + writer->pos + strlen; \
else \
p = (TYPE*)PyUnicode_DATA(str) + strlen; \
\
/* pout[0] through pout[size-2] contribute exactly \ /* pout[0] through pout[size-2] contribute exactly \
_PyLong_DECIMAL_SHIFT digits each */ \ _PyLong_DECIMAL_SHIFT digits each */ \
for (i=0; i < size - 1; i++) { \ for (i=0; i < size - 1; i++) { \
@ -1699,6 +1703,16 @@ long_to_decimal_string_internal(PyObject *aa,
/* and sign */ \ /* and sign */ \
if (negative) \ if (negative) \
*--p = '-'; \ *--p = '-'; \
} while (0)
#define WRITE_UNICODE_DIGITS(TYPE) \
do { \
if (writer) \
p = (TYPE*)PyUnicode_DATA(writer->buffer) + writer->pos + strlen; \
else \
p = (TYPE*)PyUnicode_DATA(str) + strlen; \
\
WRITE_DIGITS(p); \
\ \
/* check we've counted correctly */ \ /* check we've counted correctly */ \
if (writer) \ if (writer) \
@ -1708,25 +1722,34 @@ long_to_decimal_string_internal(PyObject *aa,
} while (0) } while (0)
/* fill the string right-to-left */ /* fill the string right-to-left */
if (kind == PyUnicode_1BYTE_KIND) { if (bytes_writer) {
char *p = *bytes_str + strlen;
WRITE_DIGITS(p);
assert(p == *bytes_str);
}
else if (kind == PyUnicode_1BYTE_KIND) {
Py_UCS1 *p; Py_UCS1 *p;
WRITE_DIGITS(Py_UCS1); WRITE_UNICODE_DIGITS(Py_UCS1);
} }
else if (kind == PyUnicode_2BYTE_KIND) { else if (kind == PyUnicode_2BYTE_KIND) {
Py_UCS2 *p; Py_UCS2 *p;
WRITE_DIGITS(Py_UCS2); WRITE_UNICODE_DIGITS(Py_UCS2);
} }
else { else {
Py_UCS4 *p; Py_UCS4 *p;
assert (kind == PyUnicode_4BYTE_KIND); assert (kind == PyUnicode_4BYTE_KIND);
WRITE_DIGITS(Py_UCS4); WRITE_UNICODE_DIGITS(Py_UCS4);
} }
#undef WRITE_DIGITS #undef WRITE_DIGITS
#undef WRITE_UNICODE_DIGITS
Py_DECREF(scratch); Py_DECREF(scratch);
if (writer) { if (writer) {
writer->pos += strlen; writer->pos += strlen;
} }
else if (bytes_writer) {
(*bytes_str) += strlen;
}
else { else {
assert(_PyUnicode_CheckConsistency(str, 1)); assert(_PyUnicode_CheckConsistency(str, 1));
*p_output = (PyObject *)str; *p_output = (PyObject *)str;
@ -1738,7 +1761,7 @@ static PyObject *
long_to_decimal_string(PyObject *aa) long_to_decimal_string(PyObject *aa)
{ {
PyObject *v; PyObject *v;
if (long_to_decimal_string_internal(aa, &v, NULL) == -1) if (long_to_decimal_string_internal(aa, &v, NULL, NULL, NULL) == -1)
return NULL; return NULL;
return v; return v;
} }
@ -1750,7 +1773,8 @@ long_to_decimal_string(PyObject *aa)
static int static int
long_format_binary(PyObject *aa, int base, int alternate, long_format_binary(PyObject *aa, int base, int alternate,
PyObject **p_output, _PyUnicodeWriter *writer) PyObject **p_output, _PyUnicodeWriter *writer,
_PyBytesWriter *bytes_writer, char **bytes_str)
{ {
PyLongObject *a = (PyLongObject *)aa; PyLongObject *a = (PyLongObject *)aa;
PyObject *v; PyObject *v;
@ -1812,6 +1836,11 @@ long_format_binary(PyObject *aa, int base, int alternate,
kind = writer->kind; kind = writer->kind;
v = NULL; v = NULL;
} }
else if (writer) {
*bytes_str = _PyBytesWriter_Prepare(bytes_writer, *bytes_str, sz);
if (*bytes_str == NULL)
return -1;
}
else { else {
v = PyUnicode_New(sz, 'x'); v = PyUnicode_New(sz, 'x');
if (v == NULL) if (v == NULL)
@ -1819,13 +1848,8 @@ long_format_binary(PyObject *aa, int base, int alternate,
kind = PyUnicode_KIND(v); kind = PyUnicode_KIND(v);
} }
#define WRITE_DIGITS(TYPE) \ #define WRITE_DIGITS(p) \
do { \ do { \
if (writer) \
p = (TYPE*)PyUnicode_DATA(writer->buffer) + writer->pos + sz; \
else \
p = (TYPE*)PyUnicode_DATA(v) + sz; \
\
if (size_a == 0) { \ if (size_a == 0) { \
*--p = '0'; \ *--p = '0'; \
} \ } \
@ -1860,30 +1884,50 @@ long_format_binary(PyObject *aa, int base, int alternate,
} \ } \
if (negative) \ if (negative) \
*--p = '-'; \ *--p = '-'; \
} while (0)
#define WRITE_UNICODE_DIGITS(TYPE) \
do { \
if (writer) \
p = (TYPE*)PyUnicode_DATA(writer->buffer) + writer->pos + sz; \
else \
p = (TYPE*)PyUnicode_DATA(v) + sz; \
\
WRITE_DIGITS(p); \
\
if (writer) \ if (writer) \
assert(p == ((TYPE*)PyUnicode_DATA(writer->buffer) + writer->pos)); \ assert(p == ((TYPE*)PyUnicode_DATA(writer->buffer) + writer->pos)); \
else \ else \
assert(p == (TYPE*)PyUnicode_DATA(v)); \ assert(p == (TYPE*)PyUnicode_DATA(v)); \
} while (0) } while (0)
if (kind == PyUnicode_1BYTE_KIND) { if (bytes_writer) {
char *p = *bytes_str + sz;
WRITE_DIGITS(p);
assert(p == *bytes_str);
}
else if (kind == PyUnicode_1BYTE_KIND) {
Py_UCS1 *p; Py_UCS1 *p;
WRITE_DIGITS(Py_UCS1); WRITE_UNICODE_DIGITS(Py_UCS1);
} }
else if (kind == PyUnicode_2BYTE_KIND) { else if (kind == PyUnicode_2BYTE_KIND) {
Py_UCS2 *p; Py_UCS2 *p;
WRITE_DIGITS(Py_UCS2); WRITE_UNICODE_DIGITS(Py_UCS2);
} }
else { else {
Py_UCS4 *p; Py_UCS4 *p;
assert (kind == PyUnicode_4BYTE_KIND); assert (kind == PyUnicode_4BYTE_KIND);
WRITE_DIGITS(Py_UCS4); WRITE_UNICODE_DIGITS(Py_UCS4);
} }
#undef WRITE_DIGITS #undef WRITE_DIGITS
#undef WRITE_UNICODE_DIGITS
if (writer) { if (writer) {
writer->pos += sz; writer->pos += sz;
} }
else if (bytes_writer) {
(*bytes_str) += sz;
}
else { else {
assert(_PyUnicode_CheckConsistency(v, 1)); assert(_PyUnicode_CheckConsistency(v, 1));
*p_output = v; *p_output = v;
@ -1897,9 +1941,9 @@ _PyLong_Format(PyObject *obj, int base)
PyObject *str; PyObject *str;
int err; int err;
if (base == 10) if (base == 10)
err = long_to_decimal_string_internal(obj, &str, NULL); err = long_to_decimal_string_internal(obj, &str, NULL, NULL, NULL);
else else
err = long_format_binary(obj, base, 1, &str, NULL); err = long_format_binary(obj, base, 1, &str, NULL, NULL, NULL);
if (err == -1) if (err == -1)
return NULL; return NULL;
return str; return str;
@ -1911,9 +1955,31 @@ _PyLong_FormatWriter(_PyUnicodeWriter *writer,
int base, int alternate) int base, int alternate)
{ {
if (base == 10) if (base == 10)
return long_to_decimal_string_internal(obj, NULL, writer); return long_to_decimal_string_internal(obj, NULL, writer,
NULL, NULL);
else else
return long_format_binary(obj, base, alternate, NULL, writer); return long_format_binary(obj, base, alternate, NULL, writer,
NULL, NULL);
}
char*
_PyLong_FormatBytesWriter(_PyBytesWriter *writer, char *str,
PyObject *obj,
int base, int alternate)
{
char *str2;
int res;
str2 = str;
if (base == 10)
res = long_to_decimal_string_internal(obj, NULL, NULL,
writer, &str2);
else
res = long_format_binary(obj, base, alternate, NULL, NULL,
writer, &str2);
if (res < 0)
return NULL;
assert(str2 != NULL);
return str2;
} }
/* Table of digit values for 8-bit string -> integer conversion. /* Table of digit values for 8-bit string -> integer conversion.