gh-95781: More strict format string checking in PyUnicode_FromFormatV() (GH-95784)

An unrecognized format character in PyUnicode_FromFormat() and
PyUnicode_FromFormatV() now sets a SystemError.
In previous versions it caused all the rest of the format string to be
copied as-is to the result string, and any extra arguments discarded.
This commit is contained in:
Serhiy Storchaka 2022-08-08 19:21:07 +03:00 committed by GitHub
parent 63140b445e
commit 62f06508e7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 35 additions and 39 deletions

View File

@ -477,9 +477,6 @@ APIs:
| | | :c:func:`PyObject_Repr`. |
+-------------------+---------------------+----------------------------------+
An unrecognized format character causes all the rest of the format string to be
copied as-is to the result string, and any extra arguments discarded.
.. note::
The width formatter unit is number of characters rather than bytes.
The precision formatter unit is number of bytes for ``"%s"`` and
@ -500,6 +497,11 @@ APIs:
Support width and precision formatter for ``"%s"``, ``"%A"``, ``"%U"``,
``"%V"``, ``"%S"``, ``"%R"`` added.
.. versionchanged:: 3.12
An unrecognized format character now sets a :exc:`SystemError`.
In previous versions it caused all the rest of the format string to be
copied as-is to the result string, and any extra arguments discarded.
.. c:function:: PyObject* PyUnicode_FromFormatV(const char *format, va_list vargs)

View File

@ -469,6 +469,12 @@ Porting to Python 3.12
:py:meth:`~class.__subclasses__` (using :c:func:`PyObject_CallMethod`,
for example).
* An unrecognized format character in :c:func:`PyUnicode_FromFormat` and
:c:func:`PyUnicode_FromFormatV` now sets a :exc:`SystemError`.
In previous versions it caused all the rest of the format string to be
copied as-is to the result string, and any extra arguments discarded.
(Contributed by Serhiy Storchaka in :gh:`95781`.)
Deprecated
----------

View File

@ -2641,8 +2641,6 @@ class CAPITest(unittest.TestCase):
b'%c%c', c_int(0x10000), c_int(0x100000))
# test "%"
check_format('%',
b'%')
check_format('%',
b'%%')
check_format('%s',
@ -2819,23 +2817,22 @@ class CAPITest(unittest.TestCase):
check_format('repr=abc\ufffd',
b'repr=%V', None, b'abc\xff')
# not supported: copy the raw format string. these tests are just here
# to check for crashes and should not be considered as specifications
check_format('%s',
b'%1%s', b'abc')
check_format('%1abc',
b'%1abc')
check_format('%+i',
b'%+i', c_int(10))
check_format('%.%s',
b'%.%s', b'abc')
# Issue #33817: empty strings
check_format('',
b'')
check_format('',
b'%s', b'')
# check for crashes
for fmt in (b'%', b'%0', b'%01', b'%.', b'%.1',
b'%0%s', b'%1%s', b'%.%s', b'%.1%s', b'%1abc',
b'%l', b'%ll', b'%z', b'%ls', b'%lls', b'%zs'):
with self.subTest(fmt=fmt):
self.assertRaisesRegex(SystemError, 'invalid format string',
PyUnicode_FromFormat, fmt, b'abc')
self.assertRaisesRegex(SystemError, 'invalid format string',
PyUnicode_FromFormat, b'%+i', c_int(10))
# Test PyUnicode_AsWideChar()
@support.cpython_only
@unittest.skipIf(_testcapi is None, 'need _testcapi module')

View File

@ -0,0 +1,4 @@
An unrecognized format character in :c:func:`PyUnicode_FromFormat` and
:c:func:`PyUnicode_FromFormatV` now sets a :exc:`SystemError`.
In previous versions it caused all the rest of the format string to be
copied as-is to the result string, and any extra arguments discarded.

View File

@ -2355,6 +2355,13 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer,
p = f;
f++;
if (*f == '%') {
if (_PyUnicodeWriter_WriteCharInline(writer, '%') < 0)
return NULL;
f++;
return f;
}
zeropad = 0;
if (*f == '0') {
zeropad = 1;
@ -2392,14 +2399,6 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer,
f++;
}
}
if (*f == '%') {
/* "%.3%s" => f points to "3" */
f--;
}
}
if (*f == '\0') {
/* bogus format "%.123" => go backward, f points to "3" */
f--;
}
/* Handle %ld, %lu, %lld and %llu. */
@ -2423,7 +2422,7 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer,
++f;
}
if (f[1] == '\0')
if (f[0] != '\0' && f[1] == '\0')
writer->overallocate = 0;
switch (*f) {
@ -2616,21 +2615,9 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer,
break;
}
case '%':
if (_PyUnicodeWriter_WriteCharInline(writer, '%') < 0)
return NULL;
break;
default:
/* if we stumble upon an unknown formatting code, copy the rest
of the format string to the output string. (we cannot just
skip the code, since there's no way to know what's in the
argument list) */
len = strlen(p);
if (_PyUnicodeWriter_WriteLatin1String(writer, p, len) == -1)
return NULL;
f = p+len;
return f;
PyErr_Format(PyExc_SystemError, "invalid format string: %s", p);
return NULL;
}
f++;