diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index a70585c1e69..09173076ae2 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -940,6 +940,15 @@ _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str /* Unicode string */ ); +/* Append a substring of a Unicode string. + Return 0 on success, raise an exception and return -1 on error. */ +PyAPI_FUNC(int) +_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer, + PyObject *str, /* Unicode string */ + Py_ssize_t start, + Py_ssize_t end + ); + /* Append a latin1-encoded byte string. Return 0 on success, raise an exception and return -1 on error. */ PyAPI_FUNC(int) diff --git a/Lib/test/test_format.py b/Lib/test/test_format.py index bc56fbce533..bbce970a97e 100644 --- a/Lib/test/test_format.py +++ b/Lib/test/test_format.py @@ -324,6 +324,9 @@ class FormatTest(unittest.TestCase): self.assertIs("{0:1s}".format(text), text) self.assertIs("{0:5s}".format(text), text) + self.assertIs(text % (), text) + self.assertIs(text.format(), text) + def test_main(): support.run_unittest(FormatTest) diff --git a/Objects/stringlib/unicode_format.h b/Objects/stringlib/unicode_format.h index e9be516318f..2f58946ec35 100644 --- a/Objects/stringlib/unicode_format.h +++ b/Objects/stringlib/unicode_format.h @@ -869,25 +869,19 @@ do_markup(SubString *input, PyObject *args, PyObject *kwargs, SubString literal; SubString field_name; SubString format_spec; - Py_UCS4 conversion, maxchar; - Py_ssize_t sublen; - int err; + Py_UCS4 conversion; MarkupIterator_init(&iter, input->str, input->start, input->end); while ((result = MarkupIterator_next(&iter, &literal, &field_present, &field_name, &format_spec, &conversion, &format_spec_needs_expanding)) == 2) { - sublen = literal.end - literal.start; - if (sublen) { - maxchar = _PyUnicode_FindMaxChar(literal.str, - literal.start, literal.end); - err = _PyUnicodeWriter_Prepare(writer, sublen, maxchar); - if (err == -1) + if (literal.end != literal.start) { + if (!field_present && iter.str.start == iter.str.end) + writer->overallocate = 0; + if (_PyUnicodeWriter_WriteSubstring(writer, literal.str, + literal.start, literal.end) < 0) return 0; - _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos, - literal.str, literal.start, sublen); - writer->pos += sublen; } if (field_present) { diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 23e57f03fb0..a926e371b1a 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -12790,6 +12790,41 @@ _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str) return 0; } +int +_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer, PyObject *str, + Py_ssize_t start, Py_ssize_t end) +{ + Py_UCS4 maxchar; + Py_ssize_t len; + + if (PyUnicode_READY(str) == -1) + return -1; + + assert(0 <= start); + assert(end <= PyUnicode_GET_LENGTH(str)); + assert(start <= end); + + if (end == 0) + return 0; + + if (start == 0 && end == PyUnicode_GET_LENGTH(str)) + return _PyUnicodeWriter_WriteStr(writer, str); + + if (PyUnicode_MAX_CHAR_VALUE(str) > writer->maxchar) + maxchar = _PyUnicode_FindMaxChar(str, start, end); + else + maxchar = writer->maxchar; + len = end - start; + + if (_PyUnicodeWriter_Prepare(writer, len, maxchar) < 0) + return -1; + + _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos, + str, start, len); + writer->pos += len; + return 0; +} + int _PyUnicodeWriter_WriteCstr(_PyUnicodeWriter *writer, const char *str, Py_ssize_t len) { @@ -13963,7 +13998,7 @@ PyUnicode_Format(PyObject *format, PyObject *args) while (--ctx.fmtcnt >= 0) { if (PyUnicode_READ(ctx.fmtkind, ctx.fmtdata, ctx.fmtpos) != '%') { - Py_ssize_t nonfmtpos, sublen; + Py_ssize_t nonfmtpos; Py_UCS4 maxchar; nonfmtpos = ctx.fmtpos++; @@ -13976,15 +14011,10 @@ PyUnicode_Format(PyObject *format, PyObject *args) ctx.fmtpos--; ctx.writer.overallocate = 0; } - sublen = ctx.fmtpos - nonfmtpos; - maxchar = _PyUnicode_FindMaxChar(ctx.fmtstr, - nonfmtpos, nonfmtpos + sublen); - if (_PyUnicodeWriter_Prepare(&ctx.writer, sublen, maxchar) == -1) - goto onError; - _PyUnicode_FastCopyCharacters(ctx.writer.buffer, ctx.writer.pos, - ctx.fmtstr, nonfmtpos, sublen); - ctx.writer.pos += sublen; + if (_PyUnicodeWriter_WriteSubstring(&ctx.writer, ctx.fmtstr, + nonfmtpos, ctx.fmtpos) < 0) + goto onError; } else { ctx.fmtpos++;