gh-119182: Optimize PyUnicode_FromFormat() (#120796)

Use strchr() and ucs1lib_find_max_char() to optimize the code path
formatting sub-strings between '%' formats.
This commit is contained in:
Victor Stinner 2024-06-20 21:06:16 +02:00 committed by GitHub
parent 85d90b59e2
commit 5150795b1c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 26 additions and 26 deletions

View File

@ -2875,47 +2875,47 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer,
static int static int
unicode_from_format(_PyUnicodeWriter *writer, const char *format, va_list vargs) unicode_from_format(_PyUnicodeWriter *writer, const char *format, va_list vargs)
{ {
writer->min_length += strlen(format) + 100; Py_ssize_t len = strlen(format);
writer->min_length += len + 100;
writer->overallocate = 1; writer->overallocate = 1;
va_list vargs2;
const char *f;
// Copy varags to be able to pass a reference to a subfunction. // Copy varags to be able to pass a reference to a subfunction.
va_list vargs2;
va_copy(vargs2, vargs); va_copy(vargs2, vargs);
for (f = format; *f; ) { // _PyUnicodeWriter_WriteASCIIString() below requires the format string
// to be encoded to ASCII.
int is_ascii = (ucs1lib_find_max_char((Py_UCS1*)format, (Py_UCS1*)format + len) < 128);
if (!is_ascii) {
Py_ssize_t i;
for (i=0; i < len && (unsigned char)format[i] <= 127; i++);
PyErr_Format(PyExc_ValueError,
"PyUnicode_FromFormatV() expects an ASCII-encoded format "
"string, got a non-ASCII byte: 0x%02x",
(unsigned char)format[i]);
goto fail;
}
for (const char *f = format; *f; ) {
if (*f == '%') { if (*f == '%') {
f = unicode_fromformat_arg(writer, f, &vargs2); f = unicode_fromformat_arg(writer, f, &vargs2);
if (f == NULL) if (f == NULL)
goto fail; goto fail;
} }
else { else {
const char *p; const char *p = strchr(f, '%');
Py_ssize_t len; if (p != NULL) {
p = f;
do
{
if ((unsigned char)*p > 127) {
PyErr_Format(PyExc_ValueError,
"PyUnicode_FromFormatV() expects an ASCII-encoded format "
"string, got a non-ASCII byte: 0x%02x",
(unsigned char)*p);
goto fail;
}
p++;
}
while (*p != '\0' && *p != '%');
len = p - f; len = p - f;
}
if (*p == '\0') else {
len = strlen(f);
writer->overallocate = 0; writer->overallocate = 0;
}
if (_PyUnicodeWriter_WriteASCIIString(writer, f, len) < 0) if (_PyUnicodeWriter_WriteASCIIString(writer, f, len) < 0) {
goto fail; goto fail;
}
f = p; f += len;
} }
} }
va_end(vargs2); va_end(vargs2);