bpo-40943: PY_SSIZE_T_CLEAN required for '#' formats (GH-20784)

The PY_SSIZE_T_CLEAN macro must now be defined to use
PyArg_ParseTuple() and Py_BuildValue() "#" formats: "es#", "et#",
"s#", "u#", "y#", "z#", "U#" and "Z#". See the PEP 353.

Update _testcapi.test_buildvalue_issue38913().
This commit is contained in:
Victor Stinner 2020-06-19 11:45:31 +02:00 committed by GitHub
parent 01ece63d42
commit 37bb289556
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 86 additions and 101 deletions

View File

@ -55,13 +55,11 @@ which disallows mutable objects such as :class:`bytearray`.
.. note::
For all ``#`` variants of formats (``s#``, ``y#``, etc.), the type of
the length argument (int or :c:type:`Py_ssize_t`) is controlled by
defining the macro :c:macro:`PY_SSIZE_T_CLEAN` before including
:file:`Python.h`. If the macro was defined, length is a
:c:type:`Py_ssize_t` rather than an :c:type:`int`. This behavior will change
in a future Python version to only support :c:type:`Py_ssize_t` and
drop :c:type:`int` support. It is best to always define :c:macro:`PY_SSIZE_T_CLEAN`.
For all ``#`` variants of formats (``s#``, ``y#``, etc.), the macro
:c:macro:`PY_SSIZE_T_CLEAN` must be defined before including
:file:`Python.h`. On Python 3.9 and older, the type of the length argument
is :c:type:`Py_ssize_t` if the :c:macro:`PY_SSIZE_T_CLEAN` macro is defined,
or int otherwise.
``s`` (:class:`str`) [const char \*]
@ -90,7 +88,7 @@ which disallows mutable objects such as :class:`bytearray`.
In this case the resulting C string may contain embedded NUL bytes.
Unicode objects are converted to C strings using ``'utf-8'`` encoding.
``s#`` (:class:`str`, read-only :term:`bytes-like object`) [const char \*, int or :c:type:`Py_ssize_t`]
``s#`` (:class:`str`, read-only :term:`bytes-like object`) [const char \*, :c:type:`Py_ssize_t`]
Like ``s*``, except that it doesn't accept mutable objects.
The result is stored into two C variables,
the first one a pointer to a C string, the second one its length.
@ -105,7 +103,7 @@ which disallows mutable objects such as :class:`bytearray`.
Like ``s*``, but the Python object may also be ``None``, in which case the
``buf`` member of the :c:type:`Py_buffer` structure is set to ``NULL``.
``z#`` (:class:`str`, read-only :term:`bytes-like object` or ``None``) [const char \*, int or :c:type:`Py_ssize_t`]
``z#`` (:class:`str`, read-only :term:`bytes-like object` or ``None``) [const char \*, :c:type:`Py_ssize_t`]
Like ``s#``, but the Python object may also be ``None``, in which case the C
pointer is set to ``NULL``.
@ -124,7 +122,7 @@ which disallows mutable objects such as :class:`bytearray`.
bytes-like objects. **This is the recommended way to accept
binary data.**
``y#`` (read-only :term:`bytes-like object`) [const char \*, int or :c:type:`Py_ssize_t`]
``y#`` (read-only :term:`bytes-like object`) [const char \*, :c:type:`Py_ssize_t`]
This variant on ``s#`` doesn't accept Unicode objects, only bytes-like
objects.
@ -155,7 +153,7 @@ which disallows mutable objects such as :class:`bytearray`.
Part of the old-style :c:type:`Py_UNICODE` API; please migrate to using
:c:func:`PyUnicode_AsWideCharString`.
``u#`` (:class:`str`) [const Py_UNICODE \*, int or :c:type:`Py_ssize_t`]
``u#`` (:class:`str`) [const Py_UNICODE \*, :c:type:`Py_ssize_t`]
This variant on ``u`` stores into two C variables, the first one a pointer to a
Unicode data buffer, the second one its length. This variant allows
null code points.
@ -172,7 +170,7 @@ which disallows mutable objects such as :class:`bytearray`.
Part of the old-style :c:type:`Py_UNICODE` API; please migrate to using
:c:func:`PyUnicode_AsWideCharString`.
``Z#`` (:class:`str` or ``None``) [const Py_UNICODE \*, int or :c:type:`Py_ssize_t`]
``Z#`` (:class:`str` or ``None``) [const Py_UNICODE \*, :c:type:`Py_ssize_t`]
Like ``u#``, but the Python object may also be ``None``, in which case the
:c:type:`Py_UNICODE` pointer is set to ``NULL``.
@ -213,7 +211,7 @@ which disallows mutable objects such as :class:`bytearray`.
recoding them. Instead, the implementation assumes that the byte string object uses
the encoding passed in as parameter.
``es#`` (:class:`str`) [const char \*encoding, char \*\*buffer, int or :c:type:`Py_ssize_t` \*buffer_length]
``es#`` (:class:`str`) [const char \*encoding, char \*\*buffer, :c:type:`Py_ssize_t` \*buffer_length]
This variant on ``s#`` is used for encoding Unicode into a character buffer.
Unlike the ``es`` format, this variant allows input data which contains NUL
characters.
@ -244,7 +242,7 @@ which disallows mutable objects such as :class:`bytearray`.
In both cases, *\*buffer_length* is set to the length of the encoded data
without the trailing NUL byte.
``et#`` (:class:`str`, :class:`bytes` or :class:`bytearray`) [const char \*encoding, char \*\*buffer, int or :c:type:`Py_ssize_t` \*buffer_length]
``et#`` (:class:`str`, :class:`bytes` or :class:`bytearray`) [const char \*encoding, char \*\*buffer, :c:type:`Py_ssize_t` \*buffer_length]
Same as ``es#`` except that byte string objects are passed through without recoding
them. Instead, the implementation assumes that the byte string object uses the
encoding passed in as parameter.
@ -549,7 +547,7 @@ Building values
Convert a null-terminated C string to a Python :class:`str` object using ``'utf-8'``
encoding. If the C string pointer is ``NULL``, ``None`` is used.
``s#`` (:class:`str` or ``None``) [const char \*, int or :c:type:`Py_ssize_t`]
``s#`` (:class:`str` or ``None``) [const char \*, :c:type:`Py_ssize_t`]
Convert a C string and its length to a Python :class:`str` object using ``'utf-8'``
encoding. If the C string pointer is ``NULL``, the length is ignored and
``None`` is returned.
@ -558,14 +556,14 @@ Building values
This converts a C string to a Python :class:`bytes` object. If the C
string pointer is ``NULL``, ``None`` is returned.
``y#`` (:class:`bytes`) [const char \*, int or :c:type:`Py_ssize_t`]
``y#`` (:class:`bytes`) [const char \*, :c:type:`Py_ssize_t`]
This converts a C string and its lengths to a Python object. If the C
string pointer is ``NULL``, ``None`` is returned.
``z`` (:class:`str` or ``None``) [const char \*]
Same as ``s``.
``z#`` (:class:`str` or ``None``) [const char \*, int or :c:type:`Py_ssize_t`]
``z#`` (:class:`str` or ``None``) [const char \*, :c:type:`Py_ssize_t`]
Same as ``s#``.
``u`` (:class:`str`) [const wchar_t \*]
@ -573,7 +571,7 @@ Building values
data to a Python Unicode object. If the Unicode buffer pointer is ``NULL``,
``None`` is returned.
``u#`` (:class:`str`) [const wchar_t \*, int or :c:type:`Py_ssize_t`]
``u#`` (:class:`str`) [const wchar_t \*, :c:type:`Py_ssize_t`]
Convert a Unicode (UTF-16 or UCS-4) data buffer and its length to a Python
Unicode object. If the Unicode buffer pointer is ``NULL``, the length is ignored
and ``None`` is returned.
@ -581,7 +579,7 @@ Building values
``U`` (:class:`str` or ``None``) [const char \*]
Same as ``s``.
``U#`` (:class:`str` or ``None``) [const char \*, int or :c:type:`Py_ssize_t`]
``U#`` (:class:`str` or ``None``) [const char \*, :c:type:`Py_ssize_t`]
Same as ``s#``.
``i`` (:class:`int`) [int]

View File

@ -155,6 +155,13 @@ New Features
Porting to Python 3.10
----------------------
* The ``PY_SSIZE_T_CLEAN`` macro must now be defined to use
:c:func:`PyArg_ParseTuple` and :c:func:`Py_BuildValue` formats which use
``#``: ``es#``, ``et#``, ``s#``, ``u#``, ``y#``, ``z#``, ``U#`` and ``Z#``.
See :ref:`Parsing arguments and building values
<arg-parsing>` and the :pep:`353`.
(Contributed by Victor Stinner in :issue:`40943`.)
* Since :c:func:`Py_TYPE()` is changed to the inline static function,
``Py_TYPE(obj) = new_type`` must be replaced with ``Py_SET_TYPE(obj, new_type)``:
see :c:func:`Py_SET_TYPE()` (available since Python 3.9). For backward

View File

@ -0,0 +1,5 @@
The ``PY_SSIZE_T_CLEAN`` macro must now be defined to use
:c:func:`PyArg_ParseTuple` and :c:func:`Py_BuildValue` formats which use ``#``:
``es#``, ``et#``, ``s#``, ``u#``, ``y#``, ``z#``, ``U#`` and ``Z#``.
See :ref:`Parsing arguments and building values <arg-parsing>` and the
:pep:`353`.

View File

@ -6868,29 +6868,36 @@ test_buildvalue_issue38913(PyObject *self, PyObject *Py_UNUSED(ignored))
PyObject *res;
const char str[] = "string";
const Py_UNICODE unicode[] = L"unicode";
PyErr_SetNone(PyExc_ZeroDivisionError);
assert(!PyErr_Occurred());
res = Py_BuildValue("(s#O)", str, 1, Py_None);
assert(res == NULL);
if (!PyErr_ExceptionMatches(PyExc_ZeroDivisionError)) {
if (!PyErr_ExceptionMatches(PyExc_SystemError)) {
return NULL;
}
PyErr_Clear();
res = Py_BuildValue("(z#O)", str, 1, Py_None);
assert(res == NULL);
if (!PyErr_ExceptionMatches(PyExc_ZeroDivisionError)) {
if (!PyErr_ExceptionMatches(PyExc_SystemError)) {
return NULL;
}
PyErr_Clear();
res = Py_BuildValue("(y#O)", str, 1, Py_None);
assert(res == NULL);
if (!PyErr_ExceptionMatches(PyExc_ZeroDivisionError)) {
if (!PyErr_ExceptionMatches(PyExc_SystemError)) {
return NULL;
}
PyErr_Clear();
res = Py_BuildValue("(u#O)", unicode, 1, Py_None);
assert(res == NULL);
if (!PyErr_ExceptionMatches(PyExc_ZeroDivisionError)) {
if (!PyErr_ExceptionMatches(PyExc_SystemError)) {
return NULL;
}
PyErr_Clear();
Py_RETURN_NONE;
}

View File

@ -656,27 +656,12 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
char *msgbuf, size_t bufsize, freelist_t *freelist)
{
/* For # codes */
#define FETCH_SIZE int *q=NULL;Py_ssize_t *q2=NULL;\
if (flags & FLAG_SIZE_T) q2=va_arg(*p_va, Py_ssize_t*); \
else { \
if (PyErr_WarnEx(PyExc_DeprecationWarning, \
"PY_SSIZE_T_CLEAN will be required for '#' formats", 1)) { \
#define REQUIRE_PY_SSIZE_T_CLEAN \
if (!(flags & FLAG_SIZE_T)) { \
PyErr_SetString(PyExc_SystemError, \
"PY_SSIZE_T_CLEAN macro must be defined for '#' formats"); \
return NULL; \
} \
q=va_arg(*p_va, int*); \
}
#define STORE_SIZE(s) \
if (flags & FLAG_SIZE_T) \
*q2=s; \
else { \
if (INT_MAX < s) { \
PyErr_SetString(PyExc_OverflowError, \
"size does not fit in an int"); \
return converterr("", arg, msgbuf, bufsize); \
} \
*q = (int)s; \
}
#define BUFFER_LEN ((flags & FLAG_SIZE_T) ? *q2:*q)
#define RETURN_ERR_OCCURRED return msgbuf
const char *format = *p_format;
@ -931,8 +916,9 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
if (count < 0)
return converterr(buf, arg, msgbuf, bufsize);
if (*format == '#') {
FETCH_SIZE;
STORE_SIZE(count);
REQUIRE_PY_SSIZE_T_CLEAN;
Py_ssize_t *psize = va_arg(*p_va, Py_ssize_t*);
*psize = count;
format++;
} else {
if (strlen(*p) != (size_t)count) {
@ -974,11 +960,12 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
} else if (*format == '#') { /* a string or read-only bytes-like object */
/* "s#" or "z#" */
const void **p = (const void **)va_arg(*p_va, const char **);
FETCH_SIZE;
REQUIRE_PY_SSIZE_T_CLEAN;
Py_ssize_t *psize = va_arg(*p_va, Py_ssize_t*);
if (c == 'z' && arg == Py_None) {
*p = NULL;
STORE_SIZE(0);
*psize = 0;
}
else if (PyUnicode_Check(arg)) {
Py_ssize_t len;
@ -987,7 +974,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
return converterr(CONV_UNICODE,
arg, msgbuf, bufsize);
*p = sarg;
STORE_SIZE(len);
*psize = len;
}
else { /* read-only bytes-like object */
/* XXX Really? */
@ -995,7 +982,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
Py_ssize_t count = convertbuffer(arg, p, &buf);
if (count < 0)
return converterr(buf, arg, msgbuf, bufsize);
STORE_SIZE(count);
*psize = count;
}
format++;
} else {
@ -1034,18 +1021,19 @@ _Py_COMP_DIAG_IGNORE_DEPR_DECLS
if (*format == '#') {
/* "u#" or "Z#" */
FETCH_SIZE;
REQUIRE_PY_SSIZE_T_CLEAN;
Py_ssize_t *psize = va_arg(*p_va, Py_ssize_t*);
if (c == 'Z' && arg == Py_None) {
*p = NULL;
STORE_SIZE(0);
*psize = 0;
}
else if (PyUnicode_Check(arg)) {
Py_ssize_t len;
*p = PyUnicode_AsUnicodeAndSize(arg, &len);
if (*p == NULL)
RETURN_ERR_OCCURRED;
STORE_SIZE(len);
*psize = len;
}
else
return converterr(c == 'Z' ? "str or None" : "str",
@ -1160,22 +1148,11 @@ _Py_COMP_DIAG_POP
trailing 0-byte
*/
int *q = NULL; Py_ssize_t *q2 = NULL;
if (flags & FLAG_SIZE_T) {
q2 = va_arg(*p_va, Py_ssize_t*);
}
else {
if (PyErr_WarnEx(PyExc_DeprecationWarning,
"PY_SSIZE_T_CLEAN will be required for '#' formats", 1))
{
Py_DECREF(s);
return NULL;
}
q = va_arg(*p_va, int*);
}
REQUIRE_PY_SSIZE_T_CLEAN;
Py_ssize_t *psize = va_arg(*p_va, Py_ssize_t*);
format++;
if (q == NULL && q2 == NULL) {
if (psize == NULL) {
Py_DECREF(s);
return converterr(
"(buffer_len is NULL)",
@ -1195,30 +1172,20 @@ _Py_COMP_DIAG_POP
arg, msgbuf, bufsize);
}
} else {
if (size + 1 > BUFFER_LEN) {
if (size + 1 > *psize) {
Py_DECREF(s);
PyErr_Format(PyExc_ValueError,
"encoded string too long "
"(%zd, maximum length %zd)",
(Py_ssize_t)size, (Py_ssize_t)(BUFFER_LEN-1));
(Py_ssize_t)size, (Py_ssize_t)(*psize - 1));
RETURN_ERR_OCCURRED;
}
}
memcpy(*buffer, ptr, size+1);
if (flags & FLAG_SIZE_T) {
*q2 = size;
*psize = size;
}
else {
if (INT_MAX < size) {
Py_DECREF(s);
PyErr_SetString(PyExc_OverflowError,
"size does not fit in an int");
return converterr("", arg, msgbuf, bufsize);
}
*q = (int)size;
}
} else {
/* Using a 0-terminated buffer:
- the encoded string has to be 0-terminated
@ -1356,9 +1323,7 @@ _Py_COMP_DIAG_POP
*p_format = format;
return NULL;
#undef FETCH_SIZE
#undef STORE_SIZE
#undef BUFFER_LEN
#undef REQUIRE_PY_SSIZE_T_CLEAN
#undef RETURN_ERR_OCCURRED
}

View File

@ -283,6 +283,13 @@ do_mktuple(const char **p_format, va_list *p_va, char endchar, Py_ssize_t n, int
static PyObject *
do_mkvalue(const char **p_format, va_list *p_va, int flags)
{
#define ERROR_NEED_PY_SSIZE_T_CLEAN \
{ \
PyErr_SetString(PyExc_SystemError, \
"PY_SSIZE_T_CLEAN macro must be defined for '#' formats"); \
return NULL; \
}
for (;;) {
switch (*(*p_format)++) {
case '(':
@ -341,14 +348,12 @@ do_mkvalue(const char **p_format, va_list *p_va, int flags)
Py_ssize_t n;
if (**p_format == '#') {
++*p_format;
if (flags & FLAG_SIZE_T)
if (flags & FLAG_SIZE_T) {
n = va_arg(*p_va, Py_ssize_t);
}
else {
n = va_arg(*p_va, int);
if (PyErr_WarnEx(PyExc_DeprecationWarning,
"PY_SSIZE_T_CLEAN will be required for '#' formats", 1)) {
return NULL;
}
ERROR_NEED_PY_SSIZE_T_CLEAN;
}
}
else
@ -394,14 +399,12 @@ do_mkvalue(const char **p_format, va_list *p_va, int flags)
Py_ssize_t n;
if (**p_format == '#') {
++*p_format;
if (flags & FLAG_SIZE_T)
if (flags & FLAG_SIZE_T) {
n = va_arg(*p_va, Py_ssize_t);
}
else {
n = va_arg(*p_va, int);
if (PyErr_WarnEx(PyExc_DeprecationWarning,
"PY_SSIZE_T_CLEAN will be required for '#' formats", 1)) {
return NULL;
}
ERROR_NEED_PY_SSIZE_T_CLEAN;
}
}
else
@ -432,14 +435,12 @@ do_mkvalue(const char **p_format, va_list *p_va, int flags)
Py_ssize_t n;
if (**p_format == '#') {
++*p_format;
if (flags & FLAG_SIZE_T)
if (flags & FLAG_SIZE_T) {
n = va_arg(*p_va, Py_ssize_t);
}
else {
n = va_arg(*p_va, int);
if (PyErr_WarnEx(PyExc_DeprecationWarning,
"PY_SSIZE_T_CLEAN will be required for '#' formats", 1)) {
return NULL;
}
ERROR_NEED_PY_SSIZE_T_CLEAN;
}
}
else
@ -507,6 +508,8 @@ do_mkvalue(const char **p_format, va_list *p_va, int flags)
}
}
#undef ERROR_NEED_PY_SSIZE_T_CLEAN
}