mirror of https://github.com/python/cpython
Add _PyUnicodeWriter_WriteSubstring() function
Write a function to enable more optimizations: * If the substring is the whole string and overallocation is disabled, just keep a reference to the string, don't copy characters * Avoid a call to the expensive _PyUnicode_FindMaxChar() function when possible
This commit is contained in:
parent
4489e927a6
commit
cfc4c13b04
|
@ -940,6 +940,15 @@ _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer,
|
|||
PyObject *str /* Unicode string */
|
||||
);
|
||||
|
||||
/* Append a substring of a Unicode string.
|
||||
Return 0 on success, raise an exception and return -1 on error. */
|
||||
PyAPI_FUNC(int)
|
||||
_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer,
|
||||
PyObject *str, /* Unicode string */
|
||||
Py_ssize_t start,
|
||||
Py_ssize_t end
|
||||
);
|
||||
|
||||
/* Append a latin1-encoded byte string.
|
||||
Return 0 on success, raise an exception and return -1 on error. */
|
||||
PyAPI_FUNC(int)
|
||||
|
|
|
@ -324,6 +324,9 @@ class FormatTest(unittest.TestCase):
|
|||
self.assertIs("{0:1s}".format(text), text)
|
||||
self.assertIs("{0:5s}".format(text), text)
|
||||
|
||||
self.assertIs(text % (), text)
|
||||
self.assertIs(text.format(), text)
|
||||
|
||||
|
||||
def test_main():
|
||||
support.run_unittest(FormatTest)
|
||||
|
|
|
@ -869,25 +869,19 @@ do_markup(SubString *input, PyObject *args, PyObject *kwargs,
|
|||
SubString literal;
|
||||
SubString field_name;
|
||||
SubString format_spec;
|
||||
Py_UCS4 conversion, maxchar;
|
||||
Py_ssize_t sublen;
|
||||
int err;
|
||||
Py_UCS4 conversion;
|
||||
|
||||
MarkupIterator_init(&iter, input->str, input->start, input->end);
|
||||
while ((result = MarkupIterator_next(&iter, &literal, &field_present,
|
||||
&field_name, &format_spec,
|
||||
&conversion,
|
||||
&format_spec_needs_expanding)) == 2) {
|
||||
sublen = literal.end - literal.start;
|
||||
if (sublen) {
|
||||
maxchar = _PyUnicode_FindMaxChar(literal.str,
|
||||
literal.start, literal.end);
|
||||
err = _PyUnicodeWriter_Prepare(writer, sublen, maxchar);
|
||||
if (err == -1)
|
||||
if (literal.end != literal.start) {
|
||||
if (!field_present && iter.str.start == iter.str.end)
|
||||
writer->overallocate = 0;
|
||||
if (_PyUnicodeWriter_WriteSubstring(writer, literal.str,
|
||||
literal.start, literal.end) < 0)
|
||||
return 0;
|
||||
_PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
|
||||
literal.str, literal.start, sublen);
|
||||
writer->pos += sublen;
|
||||
}
|
||||
|
||||
if (field_present) {
|
||||
|
|
|
@ -12790,6 +12790,41 @@ _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str)
|
|||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer, PyObject *str,
|
||||
Py_ssize_t start, Py_ssize_t end)
|
||||
{
|
||||
Py_UCS4 maxchar;
|
||||
Py_ssize_t len;
|
||||
|
||||
if (PyUnicode_READY(str) == -1)
|
||||
return -1;
|
||||
|
||||
assert(0 <= start);
|
||||
assert(end <= PyUnicode_GET_LENGTH(str));
|
||||
assert(start <= end);
|
||||
|
||||
if (end == 0)
|
||||
return 0;
|
||||
|
||||
if (start == 0 && end == PyUnicode_GET_LENGTH(str))
|
||||
return _PyUnicodeWriter_WriteStr(writer, str);
|
||||
|
||||
if (PyUnicode_MAX_CHAR_VALUE(str) > writer->maxchar)
|
||||
maxchar = _PyUnicode_FindMaxChar(str, start, end);
|
||||
else
|
||||
maxchar = writer->maxchar;
|
||||
len = end - start;
|
||||
|
||||
if (_PyUnicodeWriter_Prepare(writer, len, maxchar) < 0)
|
||||
return -1;
|
||||
|
||||
_PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
|
||||
str, start, len);
|
||||
writer->pos += len;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
_PyUnicodeWriter_WriteCstr(_PyUnicodeWriter *writer, const char *str, Py_ssize_t len)
|
||||
{
|
||||
|
@ -13963,7 +13998,7 @@ PyUnicode_Format(PyObject *format, PyObject *args)
|
|||
|
||||
while (--ctx.fmtcnt >= 0) {
|
||||
if (PyUnicode_READ(ctx.fmtkind, ctx.fmtdata, ctx.fmtpos) != '%') {
|
||||
Py_ssize_t nonfmtpos, sublen;
|
||||
Py_ssize_t nonfmtpos;
|
||||
Py_UCS4 maxchar;
|
||||
|
||||
nonfmtpos = ctx.fmtpos++;
|
||||
|
@ -13976,15 +14011,10 @@ PyUnicode_Format(PyObject *format, PyObject *args)
|
|||
ctx.fmtpos--;
|
||||
ctx.writer.overallocate = 0;
|
||||
}
|
||||
sublen = ctx.fmtpos - nonfmtpos;
|
||||
maxchar = _PyUnicode_FindMaxChar(ctx.fmtstr,
|
||||
nonfmtpos, nonfmtpos + sublen);
|
||||
if (_PyUnicodeWriter_Prepare(&ctx.writer, sublen, maxchar) == -1)
|
||||
goto onError;
|
||||
|
||||
_PyUnicode_FastCopyCharacters(ctx.writer.buffer, ctx.writer.pos,
|
||||
ctx.fmtstr, nonfmtpos, sublen);
|
||||
ctx.writer.pos += sublen;
|
||||
if (_PyUnicodeWriter_WriteSubstring(&ctx.writer, ctx.fmtstr,
|
||||
nonfmtpos, ctx.fmtpos) < 0)
|
||||
goto onError;
|
||||
}
|
||||
else {
|
||||
ctx.fmtpos++;
|
||||
|
|
Loading…
Reference in New Issue