From ee4544c9202793aa6731d3c12c1e898b15eea94d Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 9 May 2012 22:24:08 +0200 Subject: [PATCH] Issue #14744: Inline unicode_writer_write_char() and unicode_write_str() Optimize also PyUnicode_Format(): call unicode_writer_prepare() only once per argument. --- Objects/stringlib/unicode_format.h | 36 ++++++--- Objects/unicodeobject.c | 119 +++++++++++++---------------- 2 files changed, 79 insertions(+), 76 deletions(-) diff --git a/Objects/stringlib/unicode_format.h b/Objects/stringlib/unicode_format.h index 85a29f5d4e3..cdd3b292e55 100644 --- a/Objects/stringlib/unicode_format.h +++ b/Objects/stringlib/unicode_format.h @@ -500,6 +500,7 @@ render_field(PyObject *fieldobj, SubString *format_spec, unicode_writer_t *write PyObject *result = NULL; PyObject *format_spec_object = NULL; PyObject *(*formatter)(PyObject *, PyObject *, Py_ssize_t, Py_ssize_t) = NULL; + Py_ssize_t len; /* If we know the type exactly, skip the lookup of __format__ and just call the formatter directly. */ @@ -533,12 +534,19 @@ render_field(PyObject *fieldobj, SubString *format_spec, unicode_writer_t *write result = PyObject_Format(fieldobj, format_spec_object); } - if (result == NULL || PyUnicode_READY(result) == -1) + if (result == NULL) + goto done; + if (PyUnicode_READY(result) == -1) goto done; - assert(PyUnicode_Check(result)); - - ok = (unicode_writer_write_str(writer, result, 0, PyUnicode_GET_LENGTH(result)) == 0); + len = PyUnicode_GET_LENGTH(result); + if (unicode_writer_prepare(writer, + len, PyUnicode_MAX_CHAR_VALUE(result)) == -1) + goto done; + copy_characters(writer->buffer, writer->pos, + result, 0, len); + writer->pos += len; + ok = 1; done: Py_XDECREF(format_spec_object); Py_XDECREF(result); @@ -873,7 +881,8 @@ do_markup(SubString *input, PyObject *args, PyObject *kwargs, SubString literal; SubString field_name; SubString format_spec; - Py_UCS4 conversion; + Py_UCS4 conversion, maxchar; + Py_ssize_t sublen; int err; MarkupIterator_init(&iter, input->str, input->start, input->end); @@ -881,11 +890,18 @@ do_markup(SubString *input, PyObject *args, PyObject *kwargs, &field_name, &format_spec, &conversion, &format_spec_needs_expanding)) == 2) { - err = unicode_writer_write_str(writer, - literal.str, literal.start, - literal.end - literal.start); - if (err == -1) - return 0; + sublen = literal.end - literal.start; + if (sublen) { + maxchar = _PyUnicode_FindMaxChar(literal.str, + literal.start, literal.end); + err = unicode_writer_prepare(writer, sublen, maxchar); + if (err == -1) + return 0; + copy_characters(writer->buffer, writer->pos, + literal.str, literal.start, sublen); + writer->pos += sublen; + } + if (field_present) if (!output_markup(&field_name, &format_spec, format_spec_needs_expanding, conversion, writer, diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 2c8378edb05..e810b180bc1 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -1150,14 +1150,15 @@ _copy_characters(PyObject *to, Py_ssize_t to_start, void *from_data, *to_data; int fast; + assert(0 <= how_many); + assert(0 <= from_start); + assert(0 <= to_start); assert(PyUnicode_Check(from)); assert(PyUnicode_Check(to)); assert(PyUnicode_IS_READY(from)); assert(PyUnicode_IS_READY(to)); - - assert(PyUnicode_GET_LENGTH(from) >= how_many); + assert(from_start + how_many <= PyUnicode_GET_LENGTH(from)); assert(to_start + how_many <= PyUnicode_GET_LENGTH(to)); - assert(0 <= how_many); if (how_many == 0) return 0; @@ -13271,48 +13272,6 @@ unicode_writer_prepare(unicode_writer_t *writer, return 0; } -Py_LOCAL_INLINE(int) -unicode_writer_write_str( - unicode_writer_t *writer, - PyObject *str, Py_ssize_t start, Py_ssize_t length) -{ - Py_UCS4 maxchar; - - assert(str != NULL); - assert(PyUnicode_Check(str)); - if (PyUnicode_READY(str) == -1) - return -1; - - assert(0 <= start); - assert(0 <= length); - assert(start + length <= PyUnicode_GET_LENGTH(str)); - if (length == 0) - return 0; - - maxchar = _PyUnicode_FindMaxChar(str, start, start + length); - if (unicode_writer_prepare(writer, length, maxchar) == -1) - return -1; - - assert((writer->pos + length) <= PyUnicode_GET_LENGTH(writer->buffer)); - copy_characters(writer->buffer, writer->pos, - str, start, length); - writer->pos += length; - return 0; -} - -Py_LOCAL_INLINE(int) -unicode_writer_write_char( - unicode_writer_t *writer, - Py_UCS4 ch) -{ - if (unicode_writer_prepare(writer, 1, ch) == -1) - return -1; - assert((writer->pos + 1) <= PyUnicode_GET_LENGTH(writer->buffer)); - PyUnicode_WRITE(writer->kind, writer->data, writer->pos, ch); - writer->pos += 1; - return 0; -} - Py_LOCAL(PyObject *) unicode_writer_finish(unicode_writer_t *writer) { @@ -13791,6 +13750,8 @@ PyUnicode_Format(PyObject *format, PyObject *args) void *fmt; enum PyUnicode_Kind kind, fmtkind; unicode_writer_t writer; + Py_ssize_t sublen; + Py_UCS4 maxchar; if (format == NULL || args == NULL) { PyErr_BadInternalCall(); @@ -13833,8 +13794,15 @@ PyUnicode_Format(PyObject *format, PyObject *args) } if (fmtcnt < 0) fmtpos--; - if (unicode_writer_write_str(&writer, uformat, nonfmtpos, fmtpos - nonfmtpos) < 0) + sublen = fmtpos - nonfmtpos; + maxchar = _PyUnicode_FindMaxChar(uformat, + nonfmtpos, nonfmtpos + sublen); + if (unicode_writer_prepare(&writer, sublen, maxchar) == -1) goto onError; + + copy_characters(writer.buffer, writer.pos, + uformat, nonfmtpos, sublen); + writer.pos += sublen; } else { /* Got a format specifier */ @@ -13849,6 +13817,8 @@ PyUnicode_Format(PyObject *format, PyObject *args) PyObject *v = NULL; void *pbuf = NULL; Py_ssize_t pindex, len; + Py_UCS4 bufmaxchar; + Py_ssize_t buflen; fmtpos++; c = PyUnicode_READ(fmtkind, fmt, fmtpos); @@ -13991,8 +13961,10 @@ PyUnicode_Format(PyObject *format, PyObject *args) } if (c == '%') { - if (unicode_writer_write_char(&writer, '%') < 0) + if (unicode_writer_prepare(&writer, 1, '%') == -1) goto onError; + PyUnicode_WRITE(writer.kind, writer.data, writer.pos, '%'); + writer.pos += 1; continue; } @@ -14126,10 +14098,35 @@ PyUnicode_Format(PyObject *format, PyObject *args) } if (width < len) width = len; + + /* Compute the length and maximum character of the + written characters */ + bufmaxchar = 127; + if (!(flags & F_LJUST)) { + if (sign) { + if ((width-1) > len) + bufmaxchar = Py_MAX(bufmaxchar, fill); + } + else { + if (width > len) + bufmaxchar = Py_MAX(bufmaxchar, fill); + } + } + maxchar = _PyUnicode_FindMaxChar(temp, 0, pindex+len); + bufmaxchar = Py_MAX(bufmaxchar, maxchar); + + buflen = width; + if (sign && len == width) + buflen++; + + if (unicode_writer_prepare(&writer, buflen, bufmaxchar) == -1) + goto onError; + + /* Write characters */ if (sign) { if (fill != ' ') { - if (unicode_writer_write_char(&writer, signchar) < 0) - goto onError; + PyUnicode_WRITE(writer.kind, writer.data, writer.pos, signchar); + writer.pos += 1; } if (width > len) width--; @@ -14138,8 +14135,6 @@ PyUnicode_Format(PyObject *format, PyObject *args) assert(PyUnicode_READ(kind, pbuf, pindex) == '0'); assert(PyUnicode_READ(kind, pbuf, pindex + 1) == c); if (fill != ' ') { - if (unicode_writer_prepare(&writer, 2, 127) < 0) - goto onError; PyUnicode_WRITE(writer.kind, writer.data, writer.pos, '0'); PyUnicode_WRITE(writer.kind, writer.data, writer.pos+1, c); writer.pos += 2; @@ -14151,43 +14146,35 @@ PyUnicode_Format(PyObject *format, PyObject *args) len -= 2; } if (width > len && !(flags & F_LJUST)) { - Py_ssize_t sublen; sublen = width - len; - if (unicode_writer_prepare(&writer, sublen, fill) < 0) - goto onError; FILL(writer.kind, writer.data, fill, writer.pos, sublen); writer.pos += sublen; width = len; } if (fill == ' ') { if (sign) { - if (unicode_writer_write_char(&writer, signchar) < 0) - goto onError; + PyUnicode_WRITE(writer.kind, writer.data, writer.pos, signchar); + writer.pos += 1; } if ((flags & F_ALT) && (c == 'x' || c == 'X' || c == 'o')) { assert(PyUnicode_READ(kind, pbuf, pindex) == '0'); assert(PyUnicode_READ(kind, pbuf, pindex+1) == c); - - if (unicode_writer_prepare(&writer, 2, 127) < 0) - goto onError; PyUnicode_WRITE(writer.kind, writer.data, writer.pos, '0'); PyUnicode_WRITE(writer.kind, writer.data, writer.pos+1, c); writer.pos += 2; - pindex += 2; } } - /* Copy all characters, preserving len */ - if (unicode_writer_write_str(&writer, temp, pindex, len) < 0) - goto onError; + copy_characters(writer.buffer, writer.pos, + temp, pindex, len); + writer.pos += len; if (width > len) { - Py_ssize_t sublen = width - len; - if (unicode_writer_prepare(&writer, sublen, ' ') < 0) - goto onError; + sublen = width - len; FILL(writer.kind, writer.data, ' ', writer.pos, sublen); writer.pos += sublen; } + if (dict && (argidx < arglen) && c != '%') { PyErr_SetString(PyExc_TypeError, "not all arguments converted during string formatting");