mirror of https://github.com/python/cpython
gh-119182: Optimize PyUnicode_FromFormat() (#120796)
Use strchr() and ucs1lib_find_max_char() to optimize the code path formatting sub-strings between '%' formats.
This commit is contained in:
parent
85d90b59e2
commit
5150795b1c
|
@ -2875,47 +2875,47 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer,
|
||||||
static int
|
static int
|
||||||
unicode_from_format(_PyUnicodeWriter *writer, const char *format, va_list vargs)
|
unicode_from_format(_PyUnicodeWriter *writer, const char *format, va_list vargs)
|
||||||
{
|
{
|
||||||
writer->min_length += strlen(format) + 100;
|
Py_ssize_t len = strlen(format);
|
||||||
|
writer->min_length += len + 100;
|
||||||
writer->overallocate = 1;
|
writer->overallocate = 1;
|
||||||
|
|
||||||
va_list vargs2;
|
|
||||||
const char *f;
|
|
||||||
|
|
||||||
// Copy varags to be able to pass a reference to a subfunction.
|
// Copy varags to be able to pass a reference to a subfunction.
|
||||||
|
va_list vargs2;
|
||||||
va_copy(vargs2, vargs);
|
va_copy(vargs2, vargs);
|
||||||
|
|
||||||
for (f = format; *f; ) {
|
// _PyUnicodeWriter_WriteASCIIString() below requires the format string
|
||||||
|
// to be encoded to ASCII.
|
||||||
|
int is_ascii = (ucs1lib_find_max_char((Py_UCS1*)format, (Py_UCS1*)format + len) < 128);
|
||||||
|
if (!is_ascii) {
|
||||||
|
Py_ssize_t i;
|
||||||
|
for (i=0; i < len && (unsigned char)format[i] <= 127; i++);
|
||||||
|
PyErr_Format(PyExc_ValueError,
|
||||||
|
"PyUnicode_FromFormatV() expects an ASCII-encoded format "
|
||||||
|
"string, got a non-ASCII byte: 0x%02x",
|
||||||
|
(unsigned char)format[i]);
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const char *f = format; *f; ) {
|
||||||
if (*f == '%') {
|
if (*f == '%') {
|
||||||
f = unicode_fromformat_arg(writer, f, &vargs2);
|
f = unicode_fromformat_arg(writer, f, &vargs2);
|
||||||
if (f == NULL)
|
if (f == NULL)
|
||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
const char *p;
|
const char *p = strchr(f, '%');
|
||||||
Py_ssize_t len;
|
if (p != NULL) {
|
||||||
|
len = p - f;
|
||||||
p = f;
|
|
||||||
do
|
|
||||||
{
|
|
||||||
if ((unsigned char)*p > 127) {
|
|
||||||
PyErr_Format(PyExc_ValueError,
|
|
||||||
"PyUnicode_FromFormatV() expects an ASCII-encoded format "
|
|
||||||
"string, got a non-ASCII byte: 0x%02x",
|
|
||||||
(unsigned char)*p);
|
|
||||||
goto fail;
|
|
||||||
}
|
|
||||||
p++;
|
|
||||||
}
|
}
|
||||||
while (*p != '\0' && *p != '%');
|
else {
|
||||||
len = p - f;
|
len = strlen(f);
|
||||||
|
|
||||||
if (*p == '\0')
|
|
||||||
writer->overallocate = 0;
|
writer->overallocate = 0;
|
||||||
|
}
|
||||||
|
|
||||||
if (_PyUnicodeWriter_WriteASCIIString(writer, f, len) < 0)
|
if (_PyUnicodeWriter_WriteASCIIString(writer, f, len) < 0) {
|
||||||
goto fail;
|
goto fail;
|
||||||
|
}
|
||||||
f = p;
|
f += len;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
va_end(vargs2);
|
va_end(vargs2);
|
||||||
|
|
Loading…
Reference in New Issue