mirror of https://github.com/python/cpython
gh-95781: More strict format string checking in PyUnicode_FromFormatV() (GH-95784)
An unrecognized format character in PyUnicode_FromFormat() and PyUnicode_FromFormatV() now sets a SystemError. In previous versions it caused all the rest of the format string to be copied as-is to the result string, and any extra arguments discarded.
This commit is contained in:
parent
63140b445e
commit
62f06508e7
|
@ -477,9 +477,6 @@ APIs:
|
|||
| | | :c:func:`PyObject_Repr`. |
|
||||
+-------------------+---------------------+----------------------------------+
|
||||
|
||||
An unrecognized format character causes all the rest of the format string to be
|
||||
copied as-is to the result string, and any extra arguments discarded.
|
||||
|
||||
.. note::
|
||||
The width formatter unit is number of characters rather than bytes.
|
||||
The precision formatter unit is number of bytes for ``"%s"`` and
|
||||
|
@ -500,6 +497,11 @@ APIs:
|
|||
Support width and precision formatter for ``"%s"``, ``"%A"``, ``"%U"``,
|
||||
``"%V"``, ``"%S"``, ``"%R"`` added.
|
||||
|
||||
.. versionchanged:: 3.12
|
||||
An unrecognized format character now sets a :exc:`SystemError`.
|
||||
In previous versions it caused all the rest of the format string to be
|
||||
copied as-is to the result string, and any extra arguments discarded.
|
||||
|
||||
|
||||
.. c:function:: PyObject* PyUnicode_FromFormatV(const char *format, va_list vargs)
|
||||
|
||||
|
|
|
@ -469,6 +469,12 @@ Porting to Python 3.12
|
|||
:py:meth:`~class.__subclasses__` (using :c:func:`PyObject_CallMethod`,
|
||||
for example).
|
||||
|
||||
* An unrecognized format character in :c:func:`PyUnicode_FromFormat` and
|
||||
:c:func:`PyUnicode_FromFormatV` now sets a :exc:`SystemError`.
|
||||
In previous versions it caused all the rest of the format string to be
|
||||
copied as-is to the result string, and any extra arguments discarded.
|
||||
(Contributed by Serhiy Storchaka in :gh:`95781`.)
|
||||
|
||||
|
||||
Deprecated
|
||||
----------
|
||||
|
|
|
@ -2641,8 +2641,6 @@ class CAPITest(unittest.TestCase):
|
|||
b'%c%c', c_int(0x10000), c_int(0x100000))
|
||||
|
||||
# test "%"
|
||||
check_format('%',
|
||||
b'%')
|
||||
check_format('%',
|
||||
b'%%')
|
||||
check_format('%s',
|
||||
|
@ -2819,23 +2817,22 @@ class CAPITest(unittest.TestCase):
|
|||
check_format('repr=abc\ufffd',
|
||||
b'repr=%V', None, b'abc\xff')
|
||||
|
||||
# not supported: copy the raw format string. these tests are just here
|
||||
# to check for crashes and should not be considered as specifications
|
||||
check_format('%s',
|
||||
b'%1%s', b'abc')
|
||||
check_format('%1abc',
|
||||
b'%1abc')
|
||||
check_format('%+i',
|
||||
b'%+i', c_int(10))
|
||||
check_format('%.%s',
|
||||
b'%.%s', b'abc')
|
||||
|
||||
# Issue #33817: empty strings
|
||||
check_format('',
|
||||
b'')
|
||||
check_format('',
|
||||
b'%s', b'')
|
||||
|
||||
# check for crashes
|
||||
for fmt in (b'%', b'%0', b'%01', b'%.', b'%.1',
|
||||
b'%0%s', b'%1%s', b'%.%s', b'%.1%s', b'%1abc',
|
||||
b'%l', b'%ll', b'%z', b'%ls', b'%lls', b'%zs'):
|
||||
with self.subTest(fmt=fmt):
|
||||
self.assertRaisesRegex(SystemError, 'invalid format string',
|
||||
PyUnicode_FromFormat, fmt, b'abc')
|
||||
self.assertRaisesRegex(SystemError, 'invalid format string',
|
||||
PyUnicode_FromFormat, b'%+i', c_int(10))
|
||||
|
||||
# Test PyUnicode_AsWideChar()
|
||||
@support.cpython_only
|
||||
@unittest.skipIf(_testcapi is None, 'need _testcapi module')
|
||||
|
|
|
@ -0,0 +1,4 @@
|
|||
An unrecognized format character in :c:func:`PyUnicode_FromFormat` and
|
||||
:c:func:`PyUnicode_FromFormatV` now sets a :exc:`SystemError`.
|
||||
In previous versions it caused all the rest of the format string to be
|
||||
copied as-is to the result string, and any extra arguments discarded.
|
|
@ -2355,6 +2355,13 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer,
|
|||
|
||||
p = f;
|
||||
f++;
|
||||
if (*f == '%') {
|
||||
if (_PyUnicodeWriter_WriteCharInline(writer, '%') < 0)
|
||||
return NULL;
|
||||
f++;
|
||||
return f;
|
||||
}
|
||||
|
||||
zeropad = 0;
|
||||
if (*f == '0') {
|
||||
zeropad = 1;
|
||||
|
@ -2392,14 +2399,6 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer,
|
|||
f++;
|
||||
}
|
||||
}
|
||||
if (*f == '%') {
|
||||
/* "%.3%s" => f points to "3" */
|
||||
f--;
|
||||
}
|
||||
}
|
||||
if (*f == '\0') {
|
||||
/* bogus format "%.123" => go backward, f points to "3" */
|
||||
f--;
|
||||
}
|
||||
|
||||
/* Handle %ld, %lu, %lld and %llu. */
|
||||
|
@ -2423,7 +2422,7 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer,
|
|||
++f;
|
||||
}
|
||||
|
||||
if (f[1] == '\0')
|
||||
if (f[0] != '\0' && f[1] == '\0')
|
||||
writer->overallocate = 0;
|
||||
|
||||
switch (*f) {
|
||||
|
@ -2616,21 +2615,9 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer,
|
|||
break;
|
||||
}
|
||||
|
||||
case '%':
|
||||
if (_PyUnicodeWriter_WriteCharInline(writer, '%') < 0)
|
||||
return NULL;
|
||||
break;
|
||||
|
||||
default:
|
||||
/* if we stumble upon an unknown formatting code, copy the rest
|
||||
of the format string to the output string. (we cannot just
|
||||
skip the code, since there's no way to know what's in the
|
||||
argument list) */
|
||||
len = strlen(p);
|
||||
if (_PyUnicodeWriter_WriteLatin1String(writer, p, len) == -1)
|
||||
return NULL;
|
||||
f = p+len;
|
||||
return f;
|
||||
PyErr_Format(PyExc_SystemError, "invalid format string: %s", p);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
f++;
|
||||
|
|
Loading…
Reference in New Issue