mirror of https://github.com/python/cpython
Issue #14744: Inline unicode_writer_write_char() and unicode_write_str()
Optimize also PyUnicode_Format(): call unicode_writer_prepare() only once per argument.
This commit is contained in:
parent
c1fdad3e48
commit
ee4544c920
|
@ -500,6 +500,7 @@ render_field(PyObject *fieldobj, SubString *format_spec, unicode_writer_t *write
|
||||||
PyObject *result = NULL;
|
PyObject *result = NULL;
|
||||||
PyObject *format_spec_object = NULL;
|
PyObject *format_spec_object = NULL;
|
||||||
PyObject *(*formatter)(PyObject *, PyObject *, Py_ssize_t, Py_ssize_t) = NULL;
|
PyObject *(*formatter)(PyObject *, PyObject *, Py_ssize_t, Py_ssize_t) = NULL;
|
||||||
|
Py_ssize_t len;
|
||||||
|
|
||||||
/* If we know the type exactly, skip the lookup of __format__ and just
|
/* If we know the type exactly, skip the lookup of __format__ and just
|
||||||
call the formatter directly. */
|
call the formatter directly. */
|
||||||
|
@ -533,12 +534,19 @@ render_field(PyObject *fieldobj, SubString *format_spec, unicode_writer_t *write
|
||||||
|
|
||||||
result = PyObject_Format(fieldobj, format_spec_object);
|
result = PyObject_Format(fieldobj, format_spec_object);
|
||||||
}
|
}
|
||||||
if (result == NULL || PyUnicode_READY(result) == -1)
|
if (result == NULL)
|
||||||
|
goto done;
|
||||||
|
if (PyUnicode_READY(result) == -1)
|
||||||
goto done;
|
goto done;
|
||||||
|
|
||||||
assert(PyUnicode_Check(result));
|
len = PyUnicode_GET_LENGTH(result);
|
||||||
|
if (unicode_writer_prepare(writer,
|
||||||
ok = (unicode_writer_write_str(writer, result, 0, PyUnicode_GET_LENGTH(result)) == 0);
|
len, PyUnicode_MAX_CHAR_VALUE(result)) == -1)
|
||||||
|
goto done;
|
||||||
|
copy_characters(writer->buffer, writer->pos,
|
||||||
|
result, 0, len);
|
||||||
|
writer->pos += len;
|
||||||
|
ok = 1;
|
||||||
done:
|
done:
|
||||||
Py_XDECREF(format_spec_object);
|
Py_XDECREF(format_spec_object);
|
||||||
Py_XDECREF(result);
|
Py_XDECREF(result);
|
||||||
|
@ -873,7 +881,8 @@ do_markup(SubString *input, PyObject *args, PyObject *kwargs,
|
||||||
SubString literal;
|
SubString literal;
|
||||||
SubString field_name;
|
SubString field_name;
|
||||||
SubString format_spec;
|
SubString format_spec;
|
||||||
Py_UCS4 conversion;
|
Py_UCS4 conversion, maxchar;
|
||||||
|
Py_ssize_t sublen;
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
MarkupIterator_init(&iter, input->str, input->start, input->end);
|
MarkupIterator_init(&iter, input->str, input->start, input->end);
|
||||||
|
@ -881,11 +890,18 @@ do_markup(SubString *input, PyObject *args, PyObject *kwargs,
|
||||||
&field_name, &format_spec,
|
&field_name, &format_spec,
|
||||||
&conversion,
|
&conversion,
|
||||||
&format_spec_needs_expanding)) == 2) {
|
&format_spec_needs_expanding)) == 2) {
|
||||||
err = unicode_writer_write_str(writer,
|
sublen = literal.end - literal.start;
|
||||||
literal.str, literal.start,
|
if (sublen) {
|
||||||
literal.end - literal.start);
|
maxchar = _PyUnicode_FindMaxChar(literal.str,
|
||||||
if (err == -1)
|
literal.start, literal.end);
|
||||||
return 0;
|
err = unicode_writer_prepare(writer, sublen, maxchar);
|
||||||
|
if (err == -1)
|
||||||
|
return 0;
|
||||||
|
copy_characters(writer->buffer, writer->pos,
|
||||||
|
literal.str, literal.start, sublen);
|
||||||
|
writer->pos += sublen;
|
||||||
|
}
|
||||||
|
|
||||||
if (field_present)
|
if (field_present)
|
||||||
if (!output_markup(&field_name, &format_spec,
|
if (!output_markup(&field_name, &format_spec,
|
||||||
format_spec_needs_expanding, conversion, writer,
|
format_spec_needs_expanding, conversion, writer,
|
||||||
|
|
|
@ -1150,14 +1150,15 @@ _copy_characters(PyObject *to, Py_ssize_t to_start,
|
||||||
void *from_data, *to_data;
|
void *from_data, *to_data;
|
||||||
int fast;
|
int fast;
|
||||||
|
|
||||||
|
assert(0 <= how_many);
|
||||||
|
assert(0 <= from_start);
|
||||||
|
assert(0 <= to_start);
|
||||||
assert(PyUnicode_Check(from));
|
assert(PyUnicode_Check(from));
|
||||||
assert(PyUnicode_Check(to));
|
assert(PyUnicode_Check(to));
|
||||||
assert(PyUnicode_IS_READY(from));
|
assert(PyUnicode_IS_READY(from));
|
||||||
assert(PyUnicode_IS_READY(to));
|
assert(PyUnicode_IS_READY(to));
|
||||||
|
assert(from_start + how_many <= PyUnicode_GET_LENGTH(from));
|
||||||
assert(PyUnicode_GET_LENGTH(from) >= how_many);
|
|
||||||
assert(to_start + how_many <= PyUnicode_GET_LENGTH(to));
|
assert(to_start + how_many <= PyUnicode_GET_LENGTH(to));
|
||||||
assert(0 <= how_many);
|
|
||||||
|
|
||||||
if (how_many == 0)
|
if (how_many == 0)
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -13271,48 +13272,6 @@ unicode_writer_prepare(unicode_writer_t *writer,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
Py_LOCAL_INLINE(int)
|
|
||||||
unicode_writer_write_str(
|
|
||||||
unicode_writer_t *writer,
|
|
||||||
PyObject *str, Py_ssize_t start, Py_ssize_t length)
|
|
||||||
{
|
|
||||||
Py_UCS4 maxchar;
|
|
||||||
|
|
||||||
assert(str != NULL);
|
|
||||||
assert(PyUnicode_Check(str));
|
|
||||||
if (PyUnicode_READY(str) == -1)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
assert(0 <= start);
|
|
||||||
assert(0 <= length);
|
|
||||||
assert(start + length <= PyUnicode_GET_LENGTH(str));
|
|
||||||
if (length == 0)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
maxchar = _PyUnicode_FindMaxChar(str, start, start + length);
|
|
||||||
if (unicode_writer_prepare(writer, length, maxchar) == -1)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
assert((writer->pos + length) <= PyUnicode_GET_LENGTH(writer->buffer));
|
|
||||||
copy_characters(writer->buffer, writer->pos,
|
|
||||||
str, start, length);
|
|
||||||
writer->pos += length;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
Py_LOCAL_INLINE(int)
|
|
||||||
unicode_writer_write_char(
|
|
||||||
unicode_writer_t *writer,
|
|
||||||
Py_UCS4 ch)
|
|
||||||
{
|
|
||||||
if (unicode_writer_prepare(writer, 1, ch) == -1)
|
|
||||||
return -1;
|
|
||||||
assert((writer->pos + 1) <= PyUnicode_GET_LENGTH(writer->buffer));
|
|
||||||
PyUnicode_WRITE(writer->kind, writer->data, writer->pos, ch);
|
|
||||||
writer->pos += 1;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
Py_LOCAL(PyObject *)
|
Py_LOCAL(PyObject *)
|
||||||
unicode_writer_finish(unicode_writer_t *writer)
|
unicode_writer_finish(unicode_writer_t *writer)
|
||||||
{
|
{
|
||||||
|
@ -13791,6 +13750,8 @@ PyUnicode_Format(PyObject *format, PyObject *args)
|
||||||
void *fmt;
|
void *fmt;
|
||||||
enum PyUnicode_Kind kind, fmtkind;
|
enum PyUnicode_Kind kind, fmtkind;
|
||||||
unicode_writer_t writer;
|
unicode_writer_t writer;
|
||||||
|
Py_ssize_t sublen;
|
||||||
|
Py_UCS4 maxchar;
|
||||||
|
|
||||||
if (format == NULL || args == NULL) {
|
if (format == NULL || args == NULL) {
|
||||||
PyErr_BadInternalCall();
|
PyErr_BadInternalCall();
|
||||||
|
@ -13833,8 +13794,15 @@ PyUnicode_Format(PyObject *format, PyObject *args)
|
||||||
}
|
}
|
||||||
if (fmtcnt < 0)
|
if (fmtcnt < 0)
|
||||||
fmtpos--;
|
fmtpos--;
|
||||||
if (unicode_writer_write_str(&writer, uformat, nonfmtpos, fmtpos - nonfmtpos) < 0)
|
sublen = fmtpos - nonfmtpos;
|
||||||
|
maxchar = _PyUnicode_FindMaxChar(uformat,
|
||||||
|
nonfmtpos, nonfmtpos + sublen);
|
||||||
|
if (unicode_writer_prepare(&writer, sublen, maxchar) == -1)
|
||||||
goto onError;
|
goto onError;
|
||||||
|
|
||||||
|
copy_characters(writer.buffer, writer.pos,
|
||||||
|
uformat, nonfmtpos, sublen);
|
||||||
|
writer.pos += sublen;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
/* Got a format specifier */
|
/* Got a format specifier */
|
||||||
|
@ -13849,6 +13817,8 @@ PyUnicode_Format(PyObject *format, PyObject *args)
|
||||||
PyObject *v = NULL;
|
PyObject *v = NULL;
|
||||||
void *pbuf = NULL;
|
void *pbuf = NULL;
|
||||||
Py_ssize_t pindex, len;
|
Py_ssize_t pindex, len;
|
||||||
|
Py_UCS4 bufmaxchar;
|
||||||
|
Py_ssize_t buflen;
|
||||||
|
|
||||||
fmtpos++;
|
fmtpos++;
|
||||||
c = PyUnicode_READ(fmtkind, fmt, fmtpos);
|
c = PyUnicode_READ(fmtkind, fmt, fmtpos);
|
||||||
|
@ -13991,8 +13961,10 @@ PyUnicode_Format(PyObject *format, PyObject *args)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (c == '%') {
|
if (c == '%') {
|
||||||
if (unicode_writer_write_char(&writer, '%') < 0)
|
if (unicode_writer_prepare(&writer, 1, '%') == -1)
|
||||||
goto onError;
|
goto onError;
|
||||||
|
PyUnicode_WRITE(writer.kind, writer.data, writer.pos, '%');
|
||||||
|
writer.pos += 1;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -14126,10 +14098,35 @@ PyUnicode_Format(PyObject *format, PyObject *args)
|
||||||
}
|
}
|
||||||
if (width < len)
|
if (width < len)
|
||||||
width = len;
|
width = len;
|
||||||
|
|
||||||
|
/* Compute the length and maximum character of the
|
||||||
|
written characters */
|
||||||
|
bufmaxchar = 127;
|
||||||
|
if (!(flags & F_LJUST)) {
|
||||||
|
if (sign) {
|
||||||
|
if ((width-1) > len)
|
||||||
|
bufmaxchar = Py_MAX(bufmaxchar, fill);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if (width > len)
|
||||||
|
bufmaxchar = Py_MAX(bufmaxchar, fill);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
maxchar = _PyUnicode_FindMaxChar(temp, 0, pindex+len);
|
||||||
|
bufmaxchar = Py_MAX(bufmaxchar, maxchar);
|
||||||
|
|
||||||
|
buflen = width;
|
||||||
|
if (sign && len == width)
|
||||||
|
buflen++;
|
||||||
|
|
||||||
|
if (unicode_writer_prepare(&writer, buflen, bufmaxchar) == -1)
|
||||||
|
goto onError;
|
||||||
|
|
||||||
|
/* Write characters */
|
||||||
if (sign) {
|
if (sign) {
|
||||||
if (fill != ' ') {
|
if (fill != ' ') {
|
||||||
if (unicode_writer_write_char(&writer, signchar) < 0)
|
PyUnicode_WRITE(writer.kind, writer.data, writer.pos, signchar);
|
||||||
goto onError;
|
writer.pos += 1;
|
||||||
}
|
}
|
||||||
if (width > len)
|
if (width > len)
|
||||||
width--;
|
width--;
|
||||||
|
@ -14138,8 +14135,6 @@ PyUnicode_Format(PyObject *format, PyObject *args)
|
||||||
assert(PyUnicode_READ(kind, pbuf, pindex) == '0');
|
assert(PyUnicode_READ(kind, pbuf, pindex) == '0');
|
||||||
assert(PyUnicode_READ(kind, pbuf, pindex + 1) == c);
|
assert(PyUnicode_READ(kind, pbuf, pindex + 1) == c);
|
||||||
if (fill != ' ') {
|
if (fill != ' ') {
|
||||||
if (unicode_writer_prepare(&writer, 2, 127) < 0)
|
|
||||||
goto onError;
|
|
||||||
PyUnicode_WRITE(writer.kind, writer.data, writer.pos, '0');
|
PyUnicode_WRITE(writer.kind, writer.data, writer.pos, '0');
|
||||||
PyUnicode_WRITE(writer.kind, writer.data, writer.pos+1, c);
|
PyUnicode_WRITE(writer.kind, writer.data, writer.pos+1, c);
|
||||||
writer.pos += 2;
|
writer.pos += 2;
|
||||||
|
@ -14151,43 +14146,35 @@ PyUnicode_Format(PyObject *format, PyObject *args)
|
||||||
len -= 2;
|
len -= 2;
|
||||||
}
|
}
|
||||||
if (width > len && !(flags & F_LJUST)) {
|
if (width > len && !(flags & F_LJUST)) {
|
||||||
Py_ssize_t sublen;
|
|
||||||
sublen = width - len;
|
sublen = width - len;
|
||||||
if (unicode_writer_prepare(&writer, sublen, fill) < 0)
|
|
||||||
goto onError;
|
|
||||||
FILL(writer.kind, writer.data, fill, writer.pos, sublen);
|
FILL(writer.kind, writer.data, fill, writer.pos, sublen);
|
||||||
writer.pos += sublen;
|
writer.pos += sublen;
|
||||||
width = len;
|
width = len;
|
||||||
}
|
}
|
||||||
if (fill == ' ') {
|
if (fill == ' ') {
|
||||||
if (sign) {
|
if (sign) {
|
||||||
if (unicode_writer_write_char(&writer, signchar) < 0)
|
PyUnicode_WRITE(writer.kind, writer.data, writer.pos, signchar);
|
||||||
goto onError;
|
writer.pos += 1;
|
||||||
}
|
}
|
||||||
if ((flags & F_ALT) && (c == 'x' || c == 'X' || c == 'o')) {
|
if ((flags & F_ALT) && (c == 'x' || c == 'X' || c == 'o')) {
|
||||||
assert(PyUnicode_READ(kind, pbuf, pindex) == '0');
|
assert(PyUnicode_READ(kind, pbuf, pindex) == '0');
|
||||||
assert(PyUnicode_READ(kind, pbuf, pindex+1) == c);
|
assert(PyUnicode_READ(kind, pbuf, pindex+1) == c);
|
||||||
|
|
||||||
if (unicode_writer_prepare(&writer, 2, 127) < 0)
|
|
||||||
goto onError;
|
|
||||||
PyUnicode_WRITE(writer.kind, writer.data, writer.pos, '0');
|
PyUnicode_WRITE(writer.kind, writer.data, writer.pos, '0');
|
||||||
PyUnicode_WRITE(writer.kind, writer.data, writer.pos+1, c);
|
PyUnicode_WRITE(writer.kind, writer.data, writer.pos+1, c);
|
||||||
writer.pos += 2;
|
writer.pos += 2;
|
||||||
|
|
||||||
pindex += 2;
|
pindex += 2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Copy all characters, preserving len */
|
copy_characters(writer.buffer, writer.pos,
|
||||||
if (unicode_writer_write_str(&writer, temp, pindex, len) < 0)
|
temp, pindex, len);
|
||||||
goto onError;
|
writer.pos += len;
|
||||||
if (width > len) {
|
if (width > len) {
|
||||||
Py_ssize_t sublen = width - len;
|
sublen = width - len;
|
||||||
if (unicode_writer_prepare(&writer, sublen, ' ') < 0)
|
|
||||||
goto onError;
|
|
||||||
FILL(writer.kind, writer.data, ' ', writer.pos, sublen);
|
FILL(writer.kind, writer.data, ' ', writer.pos, sublen);
|
||||||
writer.pos += sublen;
|
writer.pos += sublen;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dict && (argidx < arglen) && c != '%') {
|
if (dict && (argidx < arglen) && c != '%') {
|
||||||
PyErr_SetString(PyExc_TypeError,
|
PyErr_SetString(PyExc_TypeError,
|
||||||
"not all arguments converted during string formatting");
|
"not all arguments converted during string formatting");
|
||||||
|
|
Loading…
Reference in New Issue